Last active
April 27, 2023 19:12
-
-
Save aiguofer/5b0a5532de84402e4ced0548f4650f2d to your computer and use it in GitHub Desktop.
Revisions
-
aiguofer revised this gist
Jul 12, 2019 . 1 changed file with 10 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,10 @@ Find out how much memory each of the jupyter notebooks running on a server is using. Helpful for knowing which ones to shut down. Original code from http://stackoverflow.com/questions/34685825/jupyter-notebook-memory-usage-for-each-notebook You'll need to ``` pip install tabulate psutil pandas requests ``` -
aiguofer revised this gist
May 17, 2019 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -107,7 +107,7 @@ def parse_args(): return parser.parse_args() def main(password=None, print_ascii=False): df_mem = get_proc_info() df_nb = get_session_info(password) -
aiguofer revised this gist
May 4, 2019 . 1 changed file with 84 additions and 84 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,124 +1,124 @@ import argparse import re import subprocess import pandas as pd import psutil import requests import tabulate kernel_regex = re.compile(r".+kernel-(.+)\.json") notebook_regex = re.compile(r"(https?://([^:/]+):?(\d+)?)/?(\?token=([a-z0-9]+))?") def get_proc_info(): pids = psutil.pids() # memory info from psutil.Process df_mem = [] for pid in pids: try: proc = psutil.Process(pid) cmd = " ".join(proc.cmdline()) except psutil.NoSuchProcess: continue if len(cmd) > 0 and ("jupyter" in cmd or "ipython" in cmd) and "kernel" in cmd: # kernel kernel_ID = re.sub(kernel_regex, r"\1", cmd) # memory mem = proc.memory_info()[0] / float(1e9) uname = proc.username() # user, pid, memory, kernel_ID df_mem.append([uname, pid, mem, kernel_ID]) df_mem = pd.DataFrame(df_mem) df_mem.columns = ["user", "pid", "memory_GB", "kernel_ID"] return df_mem def get_running_notebooks(): notebooks = [] for n in subprocess.Popen( ["jupyter", "notebook", "list"], stdout=subprocess.PIPE ).stdout.readlines()[1:]: match = re.match(notebook_regex, n.decode()) if match: base_url, host, port, _, token = match.groups() notebooks.append({"base_url": base_url, "token": token}) else: print("Unknown format: {}".format(n.decode())) return notebooks def get_session_info(password=None): df_nb = [] kernels = [] for notebook in get_running_notebooks(): s = requests.Session() if notebook["token"] is not None: s.get(notebook["base_url"] + "/?token=" + notebook["token"]) else: # do a get to the base url to get the session cookies s.get(notebook["base_url"]) if password is not None: # Seems jupyter auth process has changed, need to first get a cookie, # then add that cookie to the data being sent over with the password data = {"password": password} data.update(s.cookies) s.post(notebook["base_url"] + "/login", data=data) res = s.get(notebook["base_url"] + "/api/sessions") if res.status_code != 200: raise Exception(res.json()) for sess in res.json(): kernel_ID = sess["kernel"]["id"] if kernel_ID not in kernels: kernel = { "kernel_ID": kernel_ID, "kernel_name": sess["kernel"]["name"], "kernel_state": sess["kernel"]["execution_state"], "kernel_connections": sess["kernel"]["connections"], # "notebook_url": notebook["base_url"] + "/notebook/" + sess["id"], "notebook_path": sess["path"], } kernel.update(notebook) df_nb.append(kernel) kernels.append(kernel_ID) df_nb = pd.DataFrame(df_nb) del df_nb["token"] return df_nb def parse_args(): parser = argparse.ArgumentParser(description="Find memory usage.") parser.add_argument("--password", help="password (only needed if pass-protected)") return parser.parse_args() def main(hostname=None, password=None, token=None, print_ascii=False): df_mem = get_proc_info() df_nb = get_session_info(password) # joining tables df = pd.merge(df_nb, df_mem, on=["kernel_ID"], how="inner") df = df.sort_values("memory_GB", ascending=False).reset_index(drop=True) if print_ascii: print(tabulate.tabulate(df, headers=(df.columns.tolist()))) return df if __name__ == "__main__": args = vars(parse_args()) main(args["password"], print_ascii=True) -
aiguofer revised this gist
Aug 9, 2017 . 1 changed file with 10 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -74,13 +74,18 @@ def get_session_info(ports, opts): sessions = None try: base_url = 'http://{0}:{1}/'.format(hostname, port) s = requests.Session() if opts.get('password'): # Seems jupyter auth process has changed, need to first get a cookie, # then add that cookie to the data being sent over with the password data = { 'password': opts['password'] } s.post(base_url + 'login', data=data) data.update(s.cookies) s.post(base_url + 'login', data=data) sessions = s.get(base_url + 'api/sessions').json() except: sessions = None -
aiguofer revised this gist
Oct 19, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -109,7 +109,7 @@ def main(opts): # joining tables df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner') df = df.sort_values('memory_GB', ascending=False).reset_index(drop=True) print tabulate.tabulate(df, headers=(df.columns.tolist())) return df -
aiguofer revised this gist
Oct 13, 2016 . 1 changed file with 104 additions and 74 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,87 +3,117 @@ import psutil import re import string import requests import socket import argparse import tabulate import pandas as pd UID = 1 regex = re.compile(r'.+kernel-(.+)\.json') port_regex = re.compile(r'port=(\d+)') def get_proc_info(): pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] # memory info from psutil.Process df_mem = [] # running ports ports = [] default_port = 8888 for pid in pids: try: ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read() except IOError: # proc has already terminated continue # jupyter notebook processes if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret): port_match = re.search(port_regex, ret) if port_match: port = port_match.group(1) ports.append(int(port)) else: ports.append(default_port) default_port += 1 if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret: # kernel kernel_ID = re.sub(regex, r'\1', ret) kernel_ID = filter(lambda x: x in string.printable, kernel_ID) # memory process = psutil.Process(int(pid)) mem = process.memory_info()[0] / float(1e9) # user name for pid for ln in open('/proc/{0}/status'.format(int(pid))): if ln.startswith('Uid:'): uid = int(ln.split()[UID]) uname = pwd.getpwuid(uid).pw_name # user, pid, memory, kernel_ID df_mem.append([uname, pid, mem, kernel_ID]) df_mem = pd.DataFrame(df_mem) df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID'] return df_mem, ports def get_session_info(ports, opts): # notebook info from assessing ports if opts.get('hostname'): hostnames = [opts['hostname']] else: hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0'] df_nb = [] kernels = [] for port in set(ports): for hostname in set(hostnames): sessions = None try: base_url = 'http://{0}:{1}/'.format(hostname, port) h = {} if opts.get('password'): r = requests.post(base_url + 'login', params={ 'password': opts['password'] }) h = r.request.headers sessions = requests.get(base_url + 'api/sessions', headers=h).json() except: sessions = None if sessions: for sess in sessions: kernel_ID = sess['kernel']['id'] if kernel_ID not in kernels: notebook_path = sess['notebook']['path'] df_nb.append([port, kernel_ID, notebook_path]) kernels.append(kernel_ID) df_nb = pd.DataFrame(df_nb) df_nb.columns = ['port', 'kernel_ID', 'notebook_path'] return df_nb def parse_args(): parser = argparse.ArgumentParser(description='Find memory usage.') parser.add_argument('--hostname', help='hostname (default: try to find it)') parser.add_argument('--password', help='password (only needed if pass-protected)') return parser.parse_args() def main(opts): df_mem, ports = get_proc_info() df_nb = get_session_info(ports, opts) # joining tables df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner') df = df.sort_values('memory_GB', ascending=False) print tabulate.tabulate(df, headers=(df.columns.tolist())) return df if __name__ == '__main__': args = vars(parse_args()) main(args) -
aiguofer revised this gist
Oct 11, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -27,7 +27,7 @@ continue # jupyter notebook processes if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret): port_match = re.search(port_regex, ret) if port_match: port = port_match.group(1) -
aiguofer revised this gist
Oct 11, 2016 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -27,15 +27,15 @@ continue # jupyter notebook processes if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython-notebook' in ret): port_match = re.search(port_regex, ret) if port_match: port = port_match.group(1) ports.append(int(port)) else: ports.append(default_port) default_port += 1 if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret: # kernel kernel_ID = re.sub(regex, r'\1', ret) kernel_ID = filter(lambda x: x in string.printable, kernel_ID) -
aiguofer revised this gist
Oct 11, 2016 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -28,7 +28,6 @@ # jupyter notebook processes if len(ret) > 0 and 'jupyter-notebook' in ret: port_match = re.search(port_regex, ret) if port_match: port = port_match.group(1) @@ -38,7 +37,6 @@ default_port += 1 if len(ret) > 0 and 'jupyter' in ret and 'kernel' in ret: # kernel kernel_ID = re.sub(regex, r'\1', ret) kernel_ID = filter(lambda x: x in string.printable, kernel_ID) -
aiguofer created this gist
Oct 11, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,91 @@ import os import pwd import psutil import re import string import json import urllib2 import socket import pandas as pd UID = 1 regex = re.compile(r'.+kernel-(.+)\.json') port_regex = re.compile(r'port=(\d+)') pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] # memory info from psutil.Process df_mem = [] ports = [] default_port = 8888 for pid in pids: try: ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read() except IOError: # proc has already terminated continue # jupyter notebook processes if len(ret) > 0 and 'jupyter-notebook' in ret: print ret port_match = re.search(port_regex, ret) if port_match: port = port_match.group(1) ports.append(int(port)) else: ports.append(default_port) default_port += 1 if len(ret) > 0 and 'jupyter' in ret and 'kernel' in ret: # kernel print ret kernel_ID = re.sub(regex, r'\1', ret) kernel_ID = filter(lambda x: x in string.printable, kernel_ID) # memory process = psutil.Process(int(pid)) mem = process.memory_info()[0] / float(1e9) # user name for pid for ln in open('/proc/{0}/status'.format(int(pid))): if ln.startswith('Uid:'): uid = int(ln.split()[UID]) uname = pwd.getpwuid(uid).pw_name # user, pid, memory, kernel_ID df_mem.append([uname, pid, mem, kernel_ID]) df_mem = pd.DataFrame(df_mem) df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID'] # notebook info from assessing ports hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0'] df_nb = [] kernels = [] for port in set(ports): for hostname in set(hostnames): sessions = None try: url = 'http://{0}:{1}/api/sessions'.format(hostname, port) print url sessions = json.load(urllib2.urlopen(url)) except urllib2.URLError: sessions = None if sessions: for sess in sessions: kernel_ID = str(sess['kernel']['id']) if kernel_ID not in kernels: notebook_path = sess['notebook']['path'] df_nb.append([port, kernel_ID, notebook_path]) kernels.append(kernel_ID) df_nb = pd.DataFrame(df_nb) df_nb.columns = ['port', 'kernel_ID', 'notebook_path'] # joining tables df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner') df = df.sort_values('memory_GB', ascending=False) df.to_csv('notebook_mem_usage.csv', index=False)