"""
categories: pro, premier, open.
cly --> strong men
ath --> strong women

swim, t1, bike, t2, run, total
where t1 and t2 refer to the times needed for the transition between the modes of exercise.
"""

import matplotlib.pyplot as plt
import numpy as np
import datetime

f = open('HiMCM_TriDataSet.csv', 'r')

people = []

class person():
    def __init__(self):
        self.id = 0
        self.age = 0
        self.gender = ""
        self.cat = ""
        self.timings = []
        self.speeds = []
    def processTimings(self):
        newtimings = []
        speeds = []
        for i in self.timings:
            #print int(i.split(':')[1]) * 60
            t = int(i.split(':')[0]) * 3600 + int(i.split(':')[1]) * 60 + int(i.split(':')[2].strip())
            newtimings.append(t)
            self.speeds.append(1.0/t)
        self.timings = newtimings

for line in f:
    if line[:1] == "#":
        continue
    if len(line) == 0:
        continue
    a = person()
    data = line.strip().split(",")
    a.id = int(data[0])
    a.age = int(data[1])
    a.gender = data[2]
    a.cat = data[3]
    a.timings = data[4:]
    a.processTimings()
    people.append(a)

male_open_totaltiming = []
female_open_totaltiming = []

def genHistPlot(cats, timingindex, filename, fn = None): # category, timings index
    _data = []
    if fn == None:
        for i in cats:
            _data.append([person.timings[timingindex] for person in people if person.cat == i])
    else:
        for i in cats:
            _data.append([fn(person.timings[timingindex]) for person in people if person.cat == i])

    with plt.style.context('fivethirtyeight'):

        plt.gcf().subplots_adjust(bottom=0.15)
        plt.gcf().subplots_adjust(left=0.17)
        plt.grid('off')
        for i in _data:
            plt.hist(i, normed=1,alpha = 0.5)
        plt.xlabel("Total time [s]", fontsize=16)
        plt.ylabel("Normalised frequency [#]", fontsize=16)
        plt.savefig(filename+'.pdf')
        plt.clf()
        plt.cla()
        plt.close()

genHistPlot(["M OPEN", "F OPEN"], 5, "open_gender_timing_distribution")
genHistPlot(["M OPEN", "F OPEN"], 5, "open_gender_speed_distribution", fn = lambda x: 1.0/x)
genHistPlot(["M PREMIER", "M PRO", "CLY"], 1, "male_non-open_swimtime_distribution")