Last active
May 22, 2018 06:13
-
-
Save nibrahim/e32e99c904317b326b2460048a3ea16b to your computer and use it in GitHub Desktop.
Revisions
-
nibrahim revised this gist
May 22, 2018 . 1 changed file with 25 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,12 +4,16 @@ #import pprint ## You should probably close the file before returning d here. Use a ## with statement def parse_file(filename): f = open(filename, "rt") d = json.load(f) return d ## You don't necessarily need to do this upfront. You can just add ## events as you go through the original data structure def set_events(data): distinct_events = set() for i in data: @@ -21,6 +25,13 @@ def set_events(data): def calc_metrics(data, distinct_events): #distinct_events = set_events(data) ## You shouldn't do this. The code should run based on the ## data. Adding something like this means that you have to change ## the code when the data changes. This is an anti-pattern. ## ## Also, you shouldn't name variables dict, list etc. since these ## are builtins. dict = { 'weekend':{}, 'reading':{}, @@ -49,16 +60,23 @@ def calc_metrics(data, distinct_events): 'potatoes':{}, 'pudding':{} } ## While this loop works, I think a more idiomatic (though perhaps ## not as efficient solution) is to just count and use numbers ## rather than the string which you've using. ## ## I also recommend building the functions with proper names ## "calc_metrics" could mean anything. for i in data: events = i["events"] squirrel = int(i["squirrel"]) for j in distinct_events: ## You've commented this out above. I'm assuming that was an error if j in events: x = "{}{}".format(1,squirrel) else: x = "{}{}".format(0, squirrel) try: dict[j][x] = dict[x]+1 ## Shouldn't the right side be dict[j][x] + 1 ? except KeyError: #print(j) #print(x) @@ -67,6 +85,7 @@ def calc_metrics(data, distinct_events): def calc_phi(dict): ## Same comment about repeating the data as above. dict1 = { 'weekend': {}, 'reading': {}, @@ -96,14 +115,15 @@ def calc_phi(dict): 'pudding':{} } for i in dict1: if not ("11" in dict[i].keys()): ## You don't need the .keys(). You can use the `in` operator directly on dictionaries dict[i]["11"] = 0 if not ("10" in dict[i].keys()): dict[i]["10"] = 0 if not ("01" in dict[i].keys()): dict[i]["01"] = 0 if not ("00" in dict[i].keys()): dict[i]["00"] = 0 ## This whole calculation here becomes rather dense. I recommend you clean it up a little with temporary variables. x = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"]) y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"])) dict1[i] = x/y @@ -114,7 +134,7 @@ def main(filename): d = parse_file(filename) distinct_events = set_events(d) dict = calc_metrics(d, distinct_events) dict1 = calc_phi(dict) ## Your calc_phi mutates the dict parameter which is a bad idea. print(json.dumps(dict1, indent=10)) #pprint.pprint(dict1, indent=10) -
nibrahim created this gist
May 22, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,123 @@ import json import math import sys #import pprint def parse_file(filename): f = open(filename, "rt") d = json.load(f) return d def set_events(data): distinct_events = set() for i in data: events = i["events"] for j in events: distinct_events.add(j) return list(distinct_events) def calc_metrics(data, distinct_events): #distinct_events = set_events(data) dict = { 'weekend':{}, 'reading':{}, 'cycling':{}, 'lettuce':{}, 'dentist':{}, 'running':{}, 'television':{}, 'exercise':{}, 'brussel sprouts':{}, 'candy':{}, 'beer':{}, 'spaghetti':{}, 'brushed teeth':{}, 'work':{}, 'peanuts':{}, 'lasagna':{}, 'carrot':{}, 'bread':{}, 'touched tree':{}, 'computer':{}, 'pizza':{}, 'nachos':{}, 'cauliflower':{}, 'ice cream':{}, 'potatoes':{}, 'pudding':{} } for i in data: events = i["events"] squirrel = int(i["squirrel"]) for j in distinct_events: if j in events: x = "{}{}".format(1,squirrel) else: x = "{}{}".format(0, squirrel) try: dict[j][x] = dict[x]+1 except KeyError: #print(j) #print(x) dict[j][x] = 1 return dict def calc_phi(dict): dict1 = { 'weekend': {}, 'reading': {}, 'cycling': {}, 'lettuce': {}, 'dentist': {}, 'running': {}, 'television': {}, 'exercise': {}, 'brussel sprouts': {}, 'candy': {}, 'beer': {}, 'spaghetti': {}, 'brushed teeth': {}, 'work': {}, 'peanuts': {}, 'lasagna': {}, 'carrot': {}, 'bread': {}, 'touched tree': {}, 'computer': {}, 'pizza': {}, 'nachos': {}, 'cauliflower': {}, 'ice cream': {}, 'potatoes': {}, 'pudding':{} } for i in dict1: if not ("11" in dict[i].keys()): dict[i]["11"] = 0 if not ("10" in dict[i].keys()): dict[i]["10"] = 0 if not ("01" in dict[i].keys()): dict[i]["01"] = 0 if not ("00" in dict[i].keys()): dict[i]["00"] = 0 x = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"]) y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"])) dict1[i] = x/y return dict1 def main(filename): d = parse_file(filename) distinct_events = set_events(d) dict = calc_metrics(d, distinct_events) dict1 = calc_phi(dict) print(json.dumps(dict1, indent=10)) #pprint.pprint(dict1, indent=10) if __name__ == "__main__": # Import guard main(sys.argv[1])