Skip to content

Instantly share code, notes, and snippets.

@aiguofer
Last active April 27, 2023 19:12
Show Gist options
  • Save aiguofer/5b0a5532de84402e4ced0548f4650f2d to your computer and use it in GitHub Desktop.
Save aiguofer/5b0a5532de84402e4ced0548f4650f2d to your computer and use it in GitHub Desktop.
Get info about running jupyter notebooks including memory consumption, how long they've been running, etc.
import os
import pwd
import psutil
import re
import string
import requests
import socket
import argparse
import tabulate
import pandas as pd
UID = 1
regex = re.compile(r'.+kernel-(.+)\.json')
port_regex = re.compile(r'port=(\d+)')
def get_proc_info():
pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
# memory info from psutil.Process
df_mem = []
# running ports
ports = []
default_port = 8888
for pid in pids:
try:
ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
except IOError: # proc has already terminated
continue
# jupyter notebook processes
if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret):
port_match = re.search(port_regex, ret)
if port_match:
port = port_match.group(1)
ports.append(int(port))
else:
ports.append(default_port)
default_port += 1
if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret:
# kernel
kernel_ID = re.sub(regex, r'\1', ret)
kernel_ID = filter(lambda x: x in string.printable, kernel_ID)
# memory
process = psutil.Process(int(pid))
mem = process.memory_info()[0] / float(1e9)
# user name for pid
for ln in open('/proc/{0}/status'.format(int(pid))):
if ln.startswith('Uid:'):
uid = int(ln.split()[UID])
uname = pwd.getpwuid(uid).pw_name
# user, pid, memory, kernel_ID
df_mem.append([uname, pid, mem, kernel_ID])
df_mem = pd.DataFrame(df_mem)
df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID']
return df_mem, ports
def get_session_info(ports, opts):
# notebook info from assessing ports
if opts.get('hostname'):
hostnames = [opts['hostname']]
else:
hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0']
df_nb = []
kernels = []
for port in set(ports):
for hostname in set(hostnames):
sessions = None
try:
base_url = 'http://{0}:{1}/'.format(hostname, port)
s = requests.Session()
if opts.get('password'):
# Seems jupyter auth process has changed, need to first get a cookie,
# then add that cookie to the data being sent over with the password
data = {
'password': opts['password']
}
s.post(base_url + 'login', data=data)
data.update(s.cookies)
s.post(base_url + 'login', data=data)
sessions = s.get(base_url + 'api/sessions').json()
except:
sessions = None
if sessions:
for sess in sessions:
kernel_ID = sess['kernel']['id']
if kernel_ID not in kernels:
notebook_path = sess['notebook']['path']
df_nb.append([port, kernel_ID, notebook_path])
kernels.append(kernel_ID)
df_nb = pd.DataFrame(df_nb)
df_nb.columns = ['port', 'kernel_ID', 'notebook_path']
return df_nb
def parse_args():
parser = argparse.ArgumentParser(description='Find memory usage.')
parser.add_argument('--hostname', help='hostname (default: try to find it)')
parser.add_argument('--password', help='password (only needed if pass-protected)')
return parser.parse_args()
def main(opts):
df_mem, ports = get_proc_info()
df_nb = get_session_info(ports, opts)
# joining tables
df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
df = df.sort_values('memory_GB', ascending=False).reset_index(drop=True)
print tabulate.tabulate(df, headers=(df.columns.tolist()))
return df
if __name__ == '__main__':
args = vars(parse_args())
main(args)
@ajay2611
Copy link

Updating it for python3?

@aiguofer
Copy link
Author

aiguofer commented May 4, 2019

@ajay2611 I've updated it to Python 3 and to a variety of changes in Jupyter itself; mainly, handling token authentication and leveraging jupyter notebook list to get currently available servers. It also uses psutil for everything now and doesn't rely on /proc so it should also work on Windows.

@Kumudaya
Copy link

I have a question regarding clearing up the GPU memory after finishing a deep learning model with Jupyter notebook. The problem is, no matter what framework I am sticking to (tensorflow, pytorch) the memory stored in the GPU do not get released except I kill the process manually from nvidia-smi or kill the kernel and restart the Jupyter. Do you have any idea how we can possible get rid of this problem by automating the steps?

@RafaelWO
Copy link

RafaelWO commented May 10, 2022

I had to comment out line 99 (del df_nb["token"]) to get it working. Maybe it's cleaner to use

df_nb.drop(columns="token", inplace=True, errors="ignore")

P.S. If someone wants to use this for JupyterLab instead, simply replace notebook with lab in line 48:

for n in subprocess.Popen(
    ["jupyter", "lab", "list"], stdout=subprocess.PIPE
).stdout.readlines()[1:]:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment