import json
import math
import sys
#import pprint


## You should probably close the file before returning d here. Use a
## with statement
def parse_file(filename): 
    f = open(filename, "rt")
    d = json.load(f)
    return d


## You don't necessarily need to do this upfront. You can just add
## events as you go through the original data structure
def set_events(data):
    distinct_events = set()
    for i in data:
        events = i["events"]
        for j in events:
            distinct_events.add(j)
    return list(distinct_events)


def calc_metrics(data, distinct_events):
    #distinct_events = set_events(data)

    ## You shouldn't do this. The code should run based on the
    ## data. Adding something like this means that you have to change
    ## the code when the data changes. This is an anti-pattern.
    ##
    ## Also, you shouldn't name variables dict, list etc. since these
    ## are builtins.
    dict = {
        'weekend':{},
        'reading':{},
        'cycling':{},
        'lettuce':{},
        'dentist':{},
        'running':{},
        'television':{},
        'exercise':{},
        'brussel sprouts':{},
        'candy':{},
        'beer':{},
        'spaghetti':{},
        'brushed teeth':{},
        'work':{},
        'peanuts':{},
        'lasagna':{},
        'carrot':{},
        'bread':{},
        'touched tree':{},
        'computer':{},
        'pizza':{},
        'nachos':{},
        'cauliflower':{},
        'ice cream':{},
        'potatoes':{},
        'pudding':{}
    }
    ## While this loop works, I think a more idiomatic (though perhaps
    ## not as efficient solution) is to just count and use numbers
    ## rather than the string which you've using.
    ##
    ## I also recommend building the functions with proper names
    ## "calc_metrics" could mean anything.
    for i in data:
        events = i["events"]
        squirrel = int(i["squirrel"])

        for j in distinct_events: ## You've commented this out above. I'm assuming that was an error
            if j in events:
                x = "{}{}".format(1,squirrel)
            else:
                x = "{}{}".format(0, squirrel)
            try:
                dict[j][x] = dict[x]+1 ## Shouldn't the right side be dict[j][x] + 1 ?
            except KeyError:
                #print(j)
                #print(x)
                dict[j][x] = 1
    return dict


def calc_phi(dict):
    ## Same comment about repeating the data as above.
    dict1 = {
        'weekend': {},
        'reading': {},
        'cycling': {},
        'lettuce': {},
        'dentist': {},
        'running': {},
        'television': {},
        'exercise': {},
        'brussel sprouts': {},
        'candy': {},
        'beer': {},
        'spaghetti': {},
        'brushed teeth': {},
        'work': {},
        'peanuts': {},
        'lasagna': {},
        'carrot': {},
        'bread': {},
        'touched tree': {},
        'computer': {},
        'pizza': {},
        'nachos': {},
        'cauliflower': {},
        'ice cream': {},
        'potatoes': {},
        'pudding':{}
    }
    for i in dict1:
        if not ("11" in dict[i].keys()): ## You don't need the .keys(). You can use the `in` operator directly on dictionaries
            dict[i]["11"] = 0
        if not ("10" in dict[i].keys()):
            dict[i]["10"] = 0
        if not ("01" in dict[i].keys()):
            dict[i]["01"] = 0
        if not ("00" in dict[i].keys()):
            dict[i]["00"] = 0
        ## This whole calculation here becomes rather dense. I recommend you clean it up a little with temporary variables.
        x  = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"])
        y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"]))
        dict1[i] = x/y
    return dict1


def main(filename):
    d = parse_file(filename)
    distinct_events = set_events(d)
    dict = calc_metrics(d, distinct_events)
    dict1 = calc_phi(dict) ## Your calc_phi mutates the dict parameter which is a bad idea.
    print(json.dumps(dict1, indent=10))
    #pprint.pprint(dict1, indent=10)


if __name__ == "__main__":  # Import guard
    main(sys.argv[1])