nibrahim · May 22, 2018 06:13 · May 22, 2018 · May 22, 2018
diff --git a/correlation.py b/correlation.py
@@ -4,12 +4,16 @@
 #import pprint
 
 
-def parse_file(filename):
+## You should probably close the file before returning d here. Use a
+## with statement
+def parse_file(filename): 
     f = open(filename, "rt")
     d = json.load(f)
     return d
 
 
+## You don't necessarily need to do this upfront. You can just add
+## events as you go through the original data structure
 def set_events(data):
     distinct_events = set()
     for i in data:
@@ -21,6 +25,13 @@ def set_events(data):
 
 def calc_metrics(data, distinct_events):
     #distinct_events = set_events(data)
+
+    ## You shouldn't do this. The code should run based on the
+    ## data. Adding something like this means that you have to change
+    ## the code when the data changes. This is an anti-pattern.
+    ##
+    ## Also, you shouldn't name variables dict, list etc. since these
+    ## are builtins.
     dict = {
         'weekend':{},
         'reading':{},
@@ -49,16 +60,23 @@ def calc_metrics(data, distinct_events):
         'potatoes':{},
         'pudding':{}
     }
+    ## While this loop works, I think a more idiomatic (though perhaps
+    ## not as efficient solution) is to just count and use numbers
+    ## rather than the string which you've using.
+    ##
+    ## I also recommend building the functions with proper names
+    ## "calc_metrics" could mean anything.
     for i in data:
         events = i["events"]
         squirrel = int(i["squirrel"])
-        for j in distinct_events:
+
+        for j in distinct_events: ## You've commented this out above. I'm assuming that was an error
             if j in events:
                 x = "{}{}".format(1,squirrel)
             else:
                 x = "{}{}".format(0, squirrel)
             try:
-                dict[j][x] = dict[x]+1
+                dict[j][x] = dict[x]+1 ## Shouldn't the right side be dict[j][x] + 1 ?
             except KeyError:
                 #print(j)
                 #print(x)
@@ -67,6 +85,7 @@ def calc_metrics(data, distinct_events):
 
 
 def calc_phi(dict):
+    ## Same comment about repeating the data as above.
     dict1 = {
         'weekend': {},
         'reading': {},
@@ -96,14 +115,15 @@ def calc_phi(dict):
         'pudding':{}
     }
     for i in dict1:
-        if not ("11" in dict[i].keys()):
+        if not ("11" in dict[i].keys()): ## You don't need the .keys(). You can use the `in` operator directly on dictionaries
             dict[i]["11"] = 0
         if not ("10" in dict[i].keys()):
             dict[i]["10"] = 0
         if not ("01" in dict[i].keys()):
             dict[i]["01"] = 0
         if not ("00" in dict[i].keys()):
             dict[i]["00"] = 0
+        ## This whole calculation here becomes rather dense. I recommend you clean it up a little with temporary variables.
         x  = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"])
         y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"]))
         dict1[i] = x/y
@@ -114,7 +134,7 @@ def main(filename):
     d = parse_file(filename)
     distinct_events = set_events(d)
     dict = calc_metrics(d, distinct_events)
-    dict1 = calc_phi(dict)
+    dict1 = calc_phi(dict) ## Your calc_phi mutates the dict parameter which is a bad idea.
     print(json.dumps(dict1, indent=10))
     #pprint.pprint(dict1, indent=10)
 

diff --git a/correlation.py b/correlation.py
@@ -0,0 +1,123 @@
+import json
+import math
+import sys
+#import pprint
+
+
+def parse_file(filename):
+    f = open(filename, "rt")
+    d = json.load(f)
+    return d
+
+
+def set_events(data):
+    distinct_events = set()
+    for i in data:
+        events = i["events"]
+        for j in events:
+            distinct_events.add(j)
+    return list(distinct_events)
+
+
+def calc_metrics(data, distinct_events):
+    #distinct_events = set_events(data)
+    dict = {
+        'weekend':{},
+        'reading':{},
+        'cycling':{},
+        'lettuce':{},
+        'dentist':{},
+        'running':{},
+        'television':{},
+        'exercise':{},
+        'brussel sprouts':{},
+        'candy':{},
+        'beer':{},
+        'spaghetti':{},
+        'brushed teeth':{},
+        'work':{},
+        'peanuts':{},
+        'lasagna':{},
+        'carrot':{},
+        'bread':{},
+        'touched tree':{},
+        'computer':{},
+        'pizza':{},
+        'nachos':{},
+        'cauliflower':{},
+        'ice cream':{},
+        'potatoes':{},
+        'pudding':{}
+    }
+    for i in data:
+        events = i["events"]
+        squirrel = int(i["squirrel"])
+        for j in distinct_events:
+            if j in events:
+                x = "{}{}".format(1,squirrel)
+            else:
+                x = "{}{}".format(0, squirrel)
+            try:
+                dict[j][x] = dict[x]+1
+            except KeyError:
+                #print(j)
+                #print(x)
+                dict[j][x] = 1
+    return dict
+
+
+def calc_phi(dict):
+    dict1 = {
+        'weekend': {},
+        'reading': {},
+        'cycling': {},
+        'lettuce': {},
+        'dentist': {},
+        'running': {},
+        'television': {},
+        'exercise': {},
+        'brussel sprouts': {},
+        'candy': {},
+        'beer': {},
+        'spaghetti': {},
+        'brushed teeth': {},
+        'work': {},
+        'peanuts': {},
+        'lasagna': {},
+        'carrot': {},
+        'bread': {},
+        'touched tree': {},
+        'computer': {},
+        'pizza': {},
+        'nachos': {},
+        'cauliflower': {},
+        'ice cream': {},
+        'potatoes': {},
+        'pudding':{}
+    }
+    for i in dict1:
+        if not ("11" in dict[i].keys()):
+            dict[i]["11"] = 0
+        if not ("10" in dict[i].keys()):
+            dict[i]["10"] = 0
+        if not ("01" in dict[i].keys()):
+            dict[i]["01"] = 0
+        if not ("00" in dict[i].keys()):
+            dict[i]["00"] = 0
+        x  = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"])
+        y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"]))
+        dict1[i] = x/y
+    return dict1
+
+
+def main(filename):
+    d = parse_file(filename)
+    distinct_events = set_events(d)
+    dict = calc_metrics(d, distinct_events)
+    dict1 = calc_phi(dict)
+    print(json.dumps(dict1, indent=10))
+    #pprint.pprint(dict1, indent=10)
+
+
+if __name__ == "__main__":  # Import guard
+    main(sys.argv[1])