Last active
February 27, 2024 14:05
-
-
Save aadityabhatia/50574836c727a1add565c7908e22cb98 to your computer and use it in GitHub Desktop.
Revisions
-
aadityabhatia revised this gist
Feb 27, 2024 . 1 changed file with 8 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -18,7 +18,14 @@ def get_slurm_status_status(username): # parse output and return a set of status lines output = output.strip('" \n').split('\n') outputSet = set() for line in output: line = line.strip('" ') if line: outputSet.add(line) return outputSet def send_notification(post_url, message): -
aadityabhatia revised this gist
Feb 25, 2024 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -18,7 +18,7 @@ def get_slurm_status_status(username): # parse output and return a set of status lines output = output.strip('" \n').split('\n') return set([node.strip('" ') for node in output]) def send_notification(post_url, message): -
aadityabhatia revised this gist
Feb 24, 2024 . 1 changed file with 3 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -30,7 +30,7 @@ def send_notification(post_url, message): }) if response.status_code != 200: raise Exception( f"Failed HTTP POST with {response.status_code}") def monitor_slurm_status(username, post_url): @@ -48,13 +48,12 @@ def monitor_slurm_status(username, post_url): # Send a notification if there are any changes if added_status: print(f"{datetime.now()} Added: {added_status}") message = "; ".join(added_status) # send a notification only if any of the added lines contain "RUNNING" if any("RUNNING" in line for line in added_status): send_notification(post_url, message) print(f"{datetime.now()} Notification sent: {message}") if removed_status: print(f"{datetime.now()} Removed: {removed_status}") -
aadityabhatia revised this gist
Feb 24, 2024 . 1 changed file with 26 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,9 +4,15 @@ from datetime import datetime import time import random import signal # arbitrarily chosen prime numbers SLEEP_MIN = 127 SLEEP_MAX = 157 def get_slurm_status_status(username): """Get the status of the slurm nodes for a given user.""" output = subprocess.check_output( ['squeue', '-u', username, '-h', '-o', '"%i %N %T"']).decode('utf-8') @@ -15,9 +21,9 @@ def get_slurm_status_status(username): return set([node.strip() for node in output]) def send_notification(post_url, message): """Send notification via HTTP POST.""" response = requests.post(post_url, data=message, headers={ "Title": "Slurm Status Update", "Priority": "max", "Tags": "computer" @@ -26,10 +32,10 @@ def send_notification(post_url, data): raise Exception( f"Failed to send notification: HTTP {response.status_code}") def monitor_slurm_status(username, post_url): """Monitor the slurm status and send notifications for changes.""" previous_status = set() while True: @@ -42,10 +48,13 @@ def monitor_slurm_status(username, post_url): # Send a notification if there are any changes if added_status: print(f"{datetime.now()} Added: {added_status}") message = "\n".join(added_status) # send a notification only if any of the added lines contain "RUNNING" if any("RUNNING" in line for line in added_status): send_notification(post_url, message) print(datetime.now(), "Notification sent:", message.replace("\n", "; ")) if removed_status: print(f"{datetime.now()} Removed: {removed_status}") @@ -54,11 +63,21 @@ def monitor_slurm_status(username, post_url): previous_status = current_status # sleep for a random interval betwen 127 and 157 seconds time.sleep(random.randint(SLEEP_MIN, SLEEP_MAX)) if __name__ == '__main__': # first argument is the username to monitor username = sys.argv[1] # second argument is the URL to send the notification post_url = sys.argv[2] # trap SIGINT def signal_handler(sig, frame): print("Exiting...") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) monitor_slurm_status(username, post_url) -
aadityabhatia created this gist
Feb 24, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,64 @@ import subprocess import requests import sys from datetime import datetime import time import random def get_slurm_status_status(username): output = subprocess.check_output( ['squeue', '-u', username, '-h', '-o', '"%i %N %T"']).decode('utf-8') # parse output and return a set of status lines output = output.strip('" \n').split('\n') return set([node.strip() for node in output]) def send_notification(post_url, data): """Send notification via HTTP POST.""" response = requests.post(post_url, data=data, headers={ "Title": "Slurm Status Update", "Priority": "max", "Tags": "computer" }) if response.status_code != 200: raise Exception( f"Failed to send notification: HTTP {response.status_code}") print(datetime.now(), "Notification sent:", data) def monitor_slurm_status(username, post_url): previous_status = set() while True: current_status = get_slurm_status_status(username) # Check for changes in the nodes added_status = current_status - previous_status removed_status = previous_status - current_status # Send a notification if there are any changes if added_status: print(f"{datetime.now()} Added: {added_status}") message = "squeue update:\n" + "\n".join(added_status) # send a notification only if any of the added lines contain "RUNNING" if any("RUNNING" in line for line in added_status): send_notification(post_url, message) if removed_status: print(f"{datetime.now()} Removed: {removed_status}") # Update the previous nodes previous_status = current_status # sleep for a random interval betwen 127 and 157 seconds time.sleep(random.randint(127, 157)) if __name__ == '__main__': # read username from first argument username = sys.argv[1] post_url = sys.argv[2] monitor_slurm_status(username, post_url)