Skip to content

Instantly share code, notes, and snippets.

@nibrahim
Last active May 22, 2018 06:13
Show Gist options
  • Save nibrahim/e32e99c904317b326b2460048a3ea16b to your computer and use it in GitHub Desktop.
Save nibrahim/e32e99c904317b326b2460048a3ea16b to your computer and use it in GitHub Desktop.

Revisions

  1. nibrahim revised this gist May 22, 2018. 1 changed file with 25 additions and 5 deletions.
    30 changes: 25 additions & 5 deletions correlation.py
    Original file line number Diff line number Diff line change
    @@ -4,12 +4,16 @@
    #import pprint


    def parse_file(filename):
    ## You should probably close the file before returning d here. Use a
    ## with statement
    def parse_file(filename):
    f = open(filename, "rt")
    d = json.load(f)
    return d


    ## You don't necessarily need to do this upfront. You can just add
    ## events as you go through the original data structure
    def set_events(data):
    distinct_events = set()
    for i in data:
    @@ -21,6 +25,13 @@ def set_events(data):

    def calc_metrics(data, distinct_events):
    #distinct_events = set_events(data)

    ## You shouldn't do this. The code should run based on the
    ## data. Adding something like this means that you have to change
    ## the code when the data changes. This is an anti-pattern.
    ##
    ## Also, you shouldn't name variables dict, list etc. since these
    ## are builtins.
    dict = {
    'weekend':{},
    'reading':{},
    @@ -49,16 +60,23 @@ def calc_metrics(data, distinct_events):
    'potatoes':{},
    'pudding':{}
    }
    ## While this loop works, I think a more idiomatic (though perhaps
    ## not as efficient solution) is to just count and use numbers
    ## rather than the string which you've using.
    ##
    ## I also recommend building the functions with proper names
    ## "calc_metrics" could mean anything.
    for i in data:
    events = i["events"]
    squirrel = int(i["squirrel"])
    for j in distinct_events:

    for j in distinct_events: ## You've commented this out above. I'm assuming that was an error
    if j in events:
    x = "{}{}".format(1,squirrel)
    else:
    x = "{}{}".format(0, squirrel)
    try:
    dict[j][x] = dict[x]+1
    dict[j][x] = dict[x]+1 ## Shouldn't the right side be dict[j][x] + 1 ?
    except KeyError:
    #print(j)
    #print(x)
    @@ -67,6 +85,7 @@ def calc_metrics(data, distinct_events):


    def calc_phi(dict):
    ## Same comment about repeating the data as above.
    dict1 = {
    'weekend': {},
    'reading': {},
    @@ -96,14 +115,15 @@ def calc_phi(dict):
    'pudding':{}
    }
    for i in dict1:
    if not ("11" in dict[i].keys()):
    if not ("11" in dict[i].keys()): ## You don't need the .keys(). You can use the `in` operator directly on dictionaries
    dict[i]["11"] = 0
    if not ("10" in dict[i].keys()):
    dict[i]["10"] = 0
    if not ("01" in dict[i].keys()):
    dict[i]["01"] = 0
    if not ("00" in dict[i].keys()):
    dict[i]["00"] = 0
    ## This whole calculation here becomes rather dense. I recommend you clean it up a little with temporary variables.
    x = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"])
    y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"]))
    dict1[i] = x/y
    @@ -114,7 +134,7 @@ def main(filename):
    d = parse_file(filename)
    distinct_events = set_events(d)
    dict = calc_metrics(d, distinct_events)
    dict1 = calc_phi(dict)
    dict1 = calc_phi(dict) ## Your calc_phi mutates the dict parameter which is a bad idea.
    print(json.dumps(dict1, indent=10))
    #pprint.pprint(dict1, indent=10)

  2. nibrahim created this gist May 22, 2018.
    123 changes: 123 additions & 0 deletions correlation.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,123 @@
    import json
    import math
    import sys
    #import pprint


    def parse_file(filename):
    f = open(filename, "rt")
    d = json.load(f)
    return d


    def set_events(data):
    distinct_events = set()
    for i in data:
    events = i["events"]
    for j in events:
    distinct_events.add(j)
    return list(distinct_events)


    def calc_metrics(data, distinct_events):
    #distinct_events = set_events(data)
    dict = {
    'weekend':{},
    'reading':{},
    'cycling':{},
    'lettuce':{},
    'dentist':{},
    'running':{},
    'television':{},
    'exercise':{},
    'brussel sprouts':{},
    'candy':{},
    'beer':{},
    'spaghetti':{},
    'brushed teeth':{},
    'work':{},
    'peanuts':{},
    'lasagna':{},
    'carrot':{},
    'bread':{},
    'touched tree':{},
    'computer':{},
    'pizza':{},
    'nachos':{},
    'cauliflower':{},
    'ice cream':{},
    'potatoes':{},
    'pudding':{}
    }
    for i in data:
    events = i["events"]
    squirrel = int(i["squirrel"])
    for j in distinct_events:
    if j in events:
    x = "{}{}".format(1,squirrel)
    else:
    x = "{}{}".format(0, squirrel)
    try:
    dict[j][x] = dict[x]+1
    except KeyError:
    #print(j)
    #print(x)
    dict[j][x] = 1
    return dict


    def calc_phi(dict):
    dict1 = {
    'weekend': {},
    'reading': {},
    'cycling': {},
    'lettuce': {},
    'dentist': {},
    'running': {},
    'television': {},
    'exercise': {},
    'brussel sprouts': {},
    'candy': {},
    'beer': {},
    'spaghetti': {},
    'brushed teeth': {},
    'work': {},
    'peanuts': {},
    'lasagna': {},
    'carrot': {},
    'bread': {},
    'touched tree': {},
    'computer': {},
    'pizza': {},
    'nachos': {},
    'cauliflower': {},
    'ice cream': {},
    'potatoes': {},
    'pudding':{}
    }
    for i in dict1:
    if not ("11" in dict[i].keys()):
    dict[i]["11"] = 0
    if not ("10" in dict[i].keys()):
    dict[i]["10"] = 0
    if not ("01" in dict[i].keys()):
    dict[i]["01"] = 0
    if not ("00" in dict[i].keys()):
    dict[i]["00"] = 0
    x = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"])
    y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"]))
    dict1[i] = x/y
    return dict1


    def main(filename):
    d = parse_file(filename)
    distinct_events = set_events(d)
    dict = calc_metrics(d, distinct_events)
    dict1 = calc_phi(dict)
    print(json.dumps(dict1, indent=10))
    #pprint.pprint(dict1, indent=10)


    if __name__ == "__main__": # Import guard
    main(sys.argv[1])