import pickle as pkl import numpy as np import seaborn as sns # load data scores = pkl.load(open('y_scores_20.pkl','rb')) # create bins bins = np.linspace(0, 1, 157) # 157 is determined arbitrarily digitized = np.digitize(scores, bins) bin_means = [scores[digitized == i].mean() for i in range(1, len(bins))] # get the histogram, https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html hist = np.histogram(scores,bins='sturges') np.savetxt('bins.csv',scores[scores > hist[1][1]],delimiter=';') # export without the first bin # if visualized in tableau, bins are computed as follows: Number of Bins = 3 + log2(n) * log(n) # ref: http://onlinehelp.tableau.com/current/pro/desktop/en-us/calculations_bins.html # plot with seaborn dist = sns.distplot(scores[scores > hist[1][1]]) # visualize without the first bin dist.figure.savefig('output.png')