import glob import json from datetime import datetime from matplotlib import pyplot age_ts = [] age_keys = ["prob_13_17", "prob_18_24", "prob_25_34", "prob_35_over"] age_lists = {k:[] for k in age_keys} gen_ts = [] gen_keys = ["prob_male", "prob_female", "prob_non_binary_gender_expansive"] gen_lists = {k:[] for k in gen_keys} activity_files = glob.glob("./events-*-*-of-*.json") for activity_file in activity_files: print("Processing", activity_file) with open(activity_file, "r") as f: for l in f: if ',"predicted_' in l: j = json.loads(l) if "predicted_age" in j: age_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", ""))) for key in age_keys: age_lists[key].append(j.get(key)) if "predicted_gender" in j: gen_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", ""))) for key in gen_lists: gen_lists[key].append(j.get(key)) def sort_data(ts, data): combined = list(zip(ts, *data.values())) combined.sort(key=lambda x: x[0]) sorted_ts = [x[0] for x in combined] sorted_data = {k: [x[i + 1] for x in combined] for i, k in enumerate(data.keys())} return sorted_ts, sorted_data if len(age_ts): age_ts, age_lists = sort_data(age_ts, age_lists) pyplot.title("Discord predicted age") for key in age_keys: pyplot.plot(age_ts, age_lists[key], marker="o") pyplot.legend(["13-17", "18-24", "25-34", "35+"]) pyplot.show() if len(gen_ts): gen_ts, gen_lists = sort_data(gen_ts, gen_lists) pyplot.title("Discord predicted gender") for key in gen_keys: pyplot.plot(gen_ts, gen_lists[key], marker="o") pyplot.legend(["male", "female", "non-binary"]) pyplot.show() if not len(activity_files): input("Couldn't find your activity file! Make sure you have a file named events-YEAR-XXXXX-of-XXXXX.json in the current directory. It should be in activity/analytics/ in your Discord data dump.") elif not len(age_ts) + len(gen_ts): input("No predicted data found for you :(")