Skip to content

Instantly share code, notes, and snippets.

@aiguofer
Last active April 27, 2023 19:12
Show Gist options
  • Save aiguofer/5b0a5532de84402e4ced0548f4650f2d to your computer and use it in GitHub Desktop.
Save aiguofer/5b0a5532de84402e4ced0548f4650f2d to your computer and use it in GitHub Desktop.

Revisions

  1. aiguofer revised this gist Jul 12, 2019. 1 changed file with 10 additions and 0 deletions.
    10 changes: 10 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    Find out how much memory each of the jupyter notebooks running on a server is using. Helpful for knowing which ones to shut down.

    Original code from http://stackoverflow.com/questions/34685825/jupyter-notebook-memory-usage-for-each-notebook

    You'll need to

    ```
    pip install tabulate psutil pandas requests
    ```

  2. aiguofer revised this gist May 17, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -107,7 +107,7 @@ def parse_args():
    return parser.parse_args()


    def main(hostname=None, password=None, token=None, print_ascii=False):
    def main(password=None, print_ascii=False):
    df_mem = get_proc_info()
    df_nb = get_session_info(password)

  3. aiguofer revised this gist May 4, 2019. 1 changed file with 84 additions and 84 deletions.
    168 changes: 84 additions & 84 deletions notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -1,124 +1,124 @@
    import os
    import pwd
    import psutil
    import argparse
    import re
    import string
    import subprocess

    import pandas as pd
    import psutil
    import requests
    import socket
    import argparse
    import tabulate
    import pandas as pd

    UID = 1
    kernel_regex = re.compile(r".+kernel-(.+)\.json")
    notebook_regex = re.compile(r"(https?://([^:/]+):?(\d+)?)/?(\?token=([a-z0-9]+))?")

    regex = re.compile(r'.+kernel-(.+)\.json')
    port_regex = re.compile(r'port=(\d+)')

    def get_proc_info():
    pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
    pids = psutil.pids()

    # memory info from psutil.Process
    df_mem = []
    # running ports
    ports = []
    default_port = 8888

    for pid in pids:
    try:
    ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
    except IOError: # proc has already terminated
    proc = psutil.Process(pid)
    cmd = " ".join(proc.cmdline())
    except psutil.NoSuchProcess:
    continue

    # jupyter notebook processes
    if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret):
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
    ports.append(int(port))
    else:
    ports.append(default_port)
    default_port += 1
    if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret:
    if len(cmd) > 0 and ("jupyter" in cmd or "ipython" in cmd) and "kernel" in cmd:
    # kernel
    kernel_ID = re.sub(regex, r'\1', ret)
    kernel_ID = filter(lambda x: x in string.printable, kernel_ID)
    kernel_ID = re.sub(kernel_regex, r"\1", cmd)

    # memory
    process = psutil.Process(int(pid))
    mem = process.memory_info()[0] / float(1e9)
    mem = proc.memory_info()[0] / float(1e9)

    # user name for pid
    for ln in open('/proc/{0}/status'.format(int(pid))):
    if ln.startswith('Uid:'):
    uid = int(ln.split()[UID])
    uname = pwd.getpwuid(uid).pw_name
    uname = proc.username()

    # user, pid, memory, kernel_ID
    df_mem.append([uname, pid, mem, kernel_ID])

    df_mem = pd.DataFrame(df_mem)
    df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID']
    return df_mem, ports

    def get_session_info(ports, opts):
    # notebook info from assessing ports
    if opts.get('hostname'):
    hostnames = [opts['hostname']]
    else:
    hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0']
    df_mem.columns = ["user", "pid", "memory_GB", "kernel_ID"]
    return df_mem


    def get_running_notebooks():
    notebooks = []

    for n in subprocess.Popen(
    ["jupyter", "notebook", "list"], stdout=subprocess.PIPE
    ).stdout.readlines()[1:]:
    match = re.match(notebook_regex, n.decode())
    if match:
    base_url, host, port, _, token = match.groups()
    notebooks.append({"base_url": base_url, "token": token})
    else:
    print("Unknown format: {}".format(n.decode()))

    return notebooks


    def get_session_info(password=None):
    df_nb = []
    kernels = []

    for port in set(ports):
    for hostname in set(hostnames):
    sessions = None
    try:
    base_url = 'http://{0}:{1}/'.format(hostname, port)
    s = requests.Session()
    if opts.get('password'):
    # Seems jupyter auth process has changed, need to first get a cookie,
    # then add that cookie to the data being sent over with the password
    data = {
    'password': opts['password']
    }
    s.post(base_url + 'login', data=data)
    data.update(s.cookies)
    s.post(base_url + 'login', data=data)

    sessions = s.get(base_url + 'api/sessions').json()
    except:
    sessions = None

    if sessions:
    for sess in sessions:
    kernel_ID = sess['kernel']['id']
    if kernel_ID not in kernels:
    notebook_path = sess['notebook']['path']
    df_nb.append([port, kernel_ID, notebook_path])
    kernels.append(kernel_ID)
    for notebook in get_running_notebooks():
    s = requests.Session()
    if notebook["token"] is not None:
    s.get(notebook["base_url"] + "/?token=" + notebook["token"])
    else:
    # do a get to the base url to get the session cookies
    s.get(notebook["base_url"])
    if password is not None:
    # Seems jupyter auth process has changed, need to first get a cookie,
    # then add that cookie to the data being sent over with the password
    data = {"password": password}
    data.update(s.cookies)
    s.post(notebook["base_url"] + "/login", data=data)

    res = s.get(notebook["base_url"] + "/api/sessions")

    if res.status_code != 200:
    raise Exception(res.json())

    for sess in res.json():
    kernel_ID = sess["kernel"]["id"]
    if kernel_ID not in kernels:
    kernel = {
    "kernel_ID": kernel_ID,
    "kernel_name": sess["kernel"]["name"],
    "kernel_state": sess["kernel"]["execution_state"],
    "kernel_connections": sess["kernel"]["connections"],
    # "notebook_url": notebook["base_url"] + "/notebook/" + sess["id"],
    "notebook_path": sess["path"],
    }
    kernel.update(notebook)
    df_nb.append(kernel)
    kernels.append(kernel_ID)

    df_nb = pd.DataFrame(df_nb)
    df_nb.columns = ['port', 'kernel_ID', 'notebook_path']
    del df_nb["token"]
    return df_nb


    def parse_args():
    parser = argparse.ArgumentParser(description='Find memory usage.')
    parser.add_argument('--hostname', help='hostname (default: try to find it)')
    parser.add_argument('--password', help='password (only needed if pass-protected)')
    parser = argparse.ArgumentParser(description="Find memory usage.")
    parser.add_argument("--password", help="password (only needed if pass-protected)")

    return parser.parse_args()

    def main(opts):
    df_mem, ports = get_proc_info()
    df_nb = get_session_info(ports, opts)

    # joining tables
    df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
    df = df.sort_values('memory_GB', ascending=False).reset_index(drop=True)
    def main(hostname=None, password=None, token=None, print_ascii=False):
    df_mem = get_proc_info()
    df_nb = get_session_info(password)

    print tabulate.tabulate(df, headers=(df.columns.tolist()))
    # joining tables
    df = pd.merge(df_nb, df_mem, on=["kernel_ID"], how="inner")
    df = df.sort_values("memory_GB", ascending=False).reset_index(drop=True)
    if print_ascii:
    print(tabulate.tabulate(df, headers=(df.columns.tolist())))
    return df

    if __name__ == '__main__':

    if __name__ == "__main__":
    args = vars(parse_args())
    main(args)
    main(args["password"], print_ascii=True)
  4. aiguofer revised this gist Aug 9, 2017. 1 changed file with 10 additions and 5 deletions.
    15 changes: 10 additions & 5 deletions notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -74,13 +74,18 @@ def get_session_info(ports, opts):
    sessions = None
    try:
    base_url = 'http://{0}:{1}/'.format(hostname, port)
    h = {}
    s = requests.Session()
    if opts.get('password'):
    r = requests.post(base_url + 'login', params={
    # Seems jupyter auth process has changed, need to first get a cookie,
    # then add that cookie to the data being sent over with the password
    data = {
    'password': opts['password']
    })
    h = r.request.headers
    sessions = requests.get(base_url + 'api/sessions', headers=h).json()
    }
    s.post(base_url + 'login', data=data)
    data.update(s.cookies)
    s.post(base_url + 'login', data=data)

    sessions = s.get(base_url + 'api/sessions').json()
    except:
    sessions = None

  5. aiguofer revised this gist Oct 19, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -109,7 +109,7 @@ def main(opts):

    # joining tables
    df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
    df = df.sort_values('memory_GB', ascending=False)
    df = df.sort_values('memory_GB', ascending=False).reset_index(drop=True)

    print tabulate.tabulate(df, headers=(df.columns.tolist()))
    return df
  6. aiguofer revised this gist Oct 13, 2016. 1 changed file with 104 additions and 74 deletions.
    178 changes: 104 additions & 74 deletions notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -3,87 +3,117 @@
    import psutil
    import re
    import string
    import json
    import urllib2
    import requests
    import socket
    import argparse
    import tabulate
    import pandas as pd

    UID = 1

    regex = re.compile(r'.+kernel-(.+)\.json')
    port_regex = re.compile(r'port=(\d+)')

    pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]

    # memory info from psutil.Process
    df_mem = []
    ports = []
    default_port = 8888

    for pid in pids:
    try:
    ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
    except IOError: # proc has already terminated
    continue

    # jupyter notebook processes
    if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret):
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
    ports.append(int(port))
    else:
    ports.append(default_port)
    default_port += 1
    if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret:
    # kernel
    kernel_ID = re.sub(regex, r'\1', ret)
    kernel_ID = filter(lambda x: x in string.printable, kernel_ID)

    # memory
    process = psutil.Process(int(pid))
    mem = process.memory_info()[0] / float(1e9)

    # user name for pid
    for ln in open('/proc/{0}/status'.format(int(pid))):
    if ln.startswith('Uid:'):
    uid = int(ln.split()[UID])
    uname = pwd.getpwuid(uid).pw_name

    # user, pid, memory, kernel_ID
    df_mem.append([uname, pid, mem, kernel_ID])

    df_mem = pd.DataFrame(df_mem)
    df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID']

    # notebook info from assessing ports
    hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0']
    df_nb = []
    kernels = []

    for port in set(ports):
    for hostname in set(hostnames):
    sessions = None
    try:
    url = 'http://{0}:{1}/api/sessions'.format(hostname, port)
    print url
    sessions = json.load(urllib2.urlopen(url))
    except urllib2.URLError:
    sessions = None

    if sessions:
    for sess in sessions:
    kernel_ID = str(sess['kernel']['id'])
    if kernel_ID not in kernels:
    notebook_path = sess['notebook']['path']
    df_nb.append([port, kernel_ID, notebook_path])
    kernels.append(kernel_ID)
    def get_proc_info():
    pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]

    df_nb = pd.DataFrame(df_nb)
    df_nb.columns = ['port', 'kernel_ID', 'notebook_path']
    # memory info from psutil.Process
    df_mem = []
    # running ports
    ports = []
    default_port = 8888

    # joining tables
    df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
    df = df.sort_values('memory_GB', ascending=False)

    df.to_csv('notebook_mem_usage.csv', index=False)
    for pid in pids:
    try:
    ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
    except IOError: # proc has already terminated
    continue

    # jupyter notebook processes
    if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret):
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
    ports.append(int(port))
    else:
    ports.append(default_port)
    default_port += 1
    if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret:
    # kernel
    kernel_ID = re.sub(regex, r'\1', ret)
    kernel_ID = filter(lambda x: x in string.printable, kernel_ID)

    # memory
    process = psutil.Process(int(pid))
    mem = process.memory_info()[0] / float(1e9)

    # user name for pid
    for ln in open('/proc/{0}/status'.format(int(pid))):
    if ln.startswith('Uid:'):
    uid = int(ln.split()[UID])
    uname = pwd.getpwuid(uid).pw_name

    # user, pid, memory, kernel_ID
    df_mem.append([uname, pid, mem, kernel_ID])

    df_mem = pd.DataFrame(df_mem)
    df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID']
    return df_mem, ports

    def get_session_info(ports, opts):
    # notebook info from assessing ports
    if opts.get('hostname'):
    hostnames = [opts['hostname']]
    else:
    hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0']
    df_nb = []
    kernels = []

    for port in set(ports):
    for hostname in set(hostnames):
    sessions = None
    try:
    base_url = 'http://{0}:{1}/'.format(hostname, port)
    h = {}
    if opts.get('password'):
    r = requests.post(base_url + 'login', params={
    'password': opts['password']
    })
    h = r.request.headers
    sessions = requests.get(base_url + 'api/sessions', headers=h).json()
    except:
    sessions = None

    if sessions:
    for sess in sessions:
    kernel_ID = sess['kernel']['id']
    if kernel_ID not in kernels:
    notebook_path = sess['notebook']['path']
    df_nb.append([port, kernel_ID, notebook_path])
    kernels.append(kernel_ID)

    df_nb = pd.DataFrame(df_nb)
    df_nb.columns = ['port', 'kernel_ID', 'notebook_path']
    return df_nb

    def parse_args():
    parser = argparse.ArgumentParser(description='Find memory usage.')
    parser.add_argument('--hostname', help='hostname (default: try to find it)')
    parser.add_argument('--password', help='password (only needed if pass-protected)')

    return parser.parse_args()

    def main(opts):
    df_mem, ports = get_proc_info()
    df_nb = get_session_info(ports, opts)

    # joining tables
    df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
    df = df.sort_values('memory_GB', ascending=False)

    print tabulate.tabulate(df, headers=(df.columns.tolist()))
    return df

    if __name__ == '__main__':
    args = vars(parse_args())
    main(args)
  7. aiguofer revised this gist Oct 11, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -27,7 +27,7 @@
    continue

    # jupyter notebook processes
    if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython-notebook' in ret):
    if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret):
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
  8. aiguofer revised this gist Oct 11, 2016. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -27,15 +27,15 @@
    continue

    # jupyter notebook processes
    if len(ret) > 0 and 'jupyter-notebook' in ret:
    if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython-notebook' in ret):
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
    ports.append(int(port))
    else:
    ports.append(default_port)
    default_port += 1
    if len(ret) > 0 and 'jupyter' in ret and 'kernel' in ret:
    if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret:
    # kernel
    kernel_ID = re.sub(regex, r'\1', ret)
    kernel_ID = filter(lambda x: x in string.printable, kernel_ID)
  9. aiguofer revised this gist Oct 11, 2016. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -28,7 +28,6 @@

    # jupyter notebook processes
    if len(ret) > 0 and 'jupyter-notebook' in ret:
    print ret
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
    @@ -38,7 +37,6 @@
    default_port += 1
    if len(ret) > 0 and 'jupyter' in ret and 'kernel' in ret:
    # kernel
    print ret
    kernel_ID = re.sub(regex, r'\1', ret)
    kernel_ID = filter(lambda x: x in string.printable, kernel_ID)

  10. aiguofer created this gist Oct 11, 2016.
    91 changes: 91 additions & 0 deletions notebook_mem_usage.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,91 @@
    import os
    import pwd
    import psutil
    import re
    import string
    import json
    import urllib2
    import socket
    import pandas as pd

    UID = 1

    regex = re.compile(r'.+kernel-(.+)\.json')
    port_regex = re.compile(r'port=(\d+)')

    pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]

    # memory info from psutil.Process
    df_mem = []
    ports = []
    default_port = 8888

    for pid in pids:
    try:
    ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
    except IOError: # proc has already terminated
    continue

    # jupyter notebook processes
    if len(ret) > 0 and 'jupyter-notebook' in ret:
    print ret
    port_match = re.search(port_regex, ret)
    if port_match:
    port = port_match.group(1)
    ports.append(int(port))
    else:
    ports.append(default_port)
    default_port += 1
    if len(ret) > 0 and 'jupyter' in ret and 'kernel' in ret:
    # kernel
    print ret
    kernel_ID = re.sub(regex, r'\1', ret)
    kernel_ID = filter(lambda x: x in string.printable, kernel_ID)

    # memory
    process = psutil.Process(int(pid))
    mem = process.memory_info()[0] / float(1e9)

    # user name for pid
    for ln in open('/proc/{0}/status'.format(int(pid))):
    if ln.startswith('Uid:'):
    uid = int(ln.split()[UID])
    uname = pwd.getpwuid(uid).pw_name

    # user, pid, memory, kernel_ID
    df_mem.append([uname, pid, mem, kernel_ID])

    df_mem = pd.DataFrame(df_mem)
    df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID']

    # notebook info from assessing ports
    hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0']
    df_nb = []
    kernels = []

    for port in set(ports):
    for hostname in set(hostnames):
    sessions = None
    try:
    url = 'http://{0}:{1}/api/sessions'.format(hostname, port)
    print url
    sessions = json.load(urllib2.urlopen(url))
    except urllib2.URLError:
    sessions = None

    if sessions:
    for sess in sessions:
    kernel_ID = str(sess['kernel']['id'])
    if kernel_ID not in kernels:
    notebook_path = sess['notebook']['path']
    df_nb.append([port, kernel_ID, notebook_path])
    kernels.append(kernel_ID)

    df_nb = pd.DataFrame(df_nb)
    df_nb.columns = ['port', 'kernel_ID', 'notebook_path']

    # joining tables
    df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
    df = df.sort_values('memory_GB', ascending=False)

    df.to_csv('notebook_mem_usage.csv', index=False)