""" categories: pro, premier, open. cly --> strong men ath --> strong women swim, t1, bike, t2, run, total where t1 and t2 refer to the times needed for the transition between the modes of exercise. """ import matplotlib.pyplot as plt import numpy as np import datetime f = open('HiMCM_TriDataSet.csv', 'r') people = [] class person(): def __init__(self): self.id = 0 self.age = 0 self.gender = "" self.cat = "" self.timings = [] self.speeds = [] def processTimings(self): newtimings = [] speeds = [] for i in self.timings: #print int(i.split(':')[1]) * 60 t = int(i.split(':')[0]) * 3600 + int(i.split(':')[1]) * 60 + int(i.split(':')[2].strip()) newtimings.append(t) self.speeds.append(1.0/t) self.timings = newtimings for line in f: if line[:1] == "#": continue if len(line) == 0: continue a = person() data = line.strip().split(",") a.id = int(data[0]) a.age = int(data[1]) a.gender = data[2] a.cat = data[3] a.timings = data[4:] a.processTimings() people.append(a) male_open_totaltiming = [] female_open_totaltiming = [] def genHistPlot(cats, timingindex, filename, fn = None): # category, timings index _data = [] if fn == None: for i in cats: _data.append([person.timings[timingindex] for person in people if person.cat == i]) else: for i in cats: _data.append([fn(person.timings[timingindex]) for person in people if person.cat == i]) with plt.style.context('fivethirtyeight'): plt.gcf().subplots_adjust(bottom=0.15) plt.gcf().subplots_adjust(left=0.17) plt.grid('off') for i in _data: plt.hist(i, normed=1,alpha = 0.5) plt.xlabel("Total time [s]", fontsize=16) plt.ylabel("Normalised frequency [#]", fontsize=16) plt.savefig(filename+'.pdf') plt.clf() plt.cla() plt.close() genHistPlot(["M OPEN", "F OPEN"], 5, "open_gender_timing_distribution") genHistPlot(["M OPEN", "F OPEN"], 5, "open_gender_speed_distribution", fn = lambda x: 1.0/x) genHistPlot(["M PREMIER", "M PRO", "CLY"], 1, "male_non-open_swimtime_distribution")