import os import pwd import psutil import re import string import json import urllib2 import socket import pandas as pd UID = 1 regex = re.compile(r'.+kernel-(.+)\.json') port_regex = re.compile(r'port=(\d+)') pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] # memory info from psutil.Process df_mem = [] ports = [] default_port = 8888 for pid in pids: try: ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read() except IOError: # proc has already terminated continue # jupyter notebook processes if len(ret) > 0 and 'jupyter-notebook' in ret: print ret port_match = re.search(port_regex, ret) if port_match: port = port_match.group(1) ports.append(int(port)) else: ports.append(default_port) default_port += 1 if len(ret) > 0 and 'jupyter' in ret and 'kernel' in ret: # kernel print ret kernel_ID = re.sub(regex, r'\1', ret) kernel_ID = filter(lambda x: x in string.printable, kernel_ID) # memory process = psutil.Process(int(pid)) mem = process.memory_info()[0] / float(1e9) # user name for pid for ln in open('/proc/{0}/status'.format(int(pid))): if ln.startswith('Uid:'): uid = int(ln.split()[UID]) uname = pwd.getpwuid(uid).pw_name # user, pid, memory, kernel_ID df_mem.append([uname, pid, mem, kernel_ID]) df_mem = pd.DataFrame(df_mem) df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID'] # notebook info from assessing ports hostnames = [socket.gethostname(), '127.0.0.1', 'localhost', '0.0.0.0'] df_nb = [] kernels = [] for port in set(ports): for hostname in set(hostnames): sessions = None try: url = 'http://{0}:{1}/api/sessions'.format(hostname, port) print url sessions = json.load(urllib2.urlopen(url)) except urllib2.URLError: sessions = None if sessions: for sess in sessions: kernel_ID = str(sess['kernel']['id']) if kernel_ID not in kernels: notebook_path = sess['notebook']['path'] df_nb.append([port, kernel_ID, notebook_path]) kernels.append(kernel_ID) df_nb = pd.DataFrame(df_nb) df_nb.columns = ['port', 'kernel_ID', 'notebook_path'] # joining tables df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner') df = df.sort_values('memory_GB', ascending=False) df.to_csv('notebook_mem_usage.csv', index=False)