from datetime import datetime, timedelta,date import pandas as pd %matplotlib inline from sklearn.metrics import classification_report,confusion_matrix import matplotlib.pyplot as plt import numpy as np import seaborn as sns from __future__ import division #must if you use python 2 from sklearn.cluster import KMeans import plotly.plotly as py import plotly.offline as pyoff import plotly.graph_objs as go import sklearn import xgboost as xgb from sklearn.model_selection import KFold, cross_val_score, train_test_split #initate plotly pyoff.init_notebook_mode() #function for ordering cluster numbers for given criteria def order_cluster(cluster_field_name, target_field_name,df,ascending): new_cluster_field_name = 'new_' + cluster_field_name df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index() df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True) df_new['index'] = df_new.index df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name) df_final = df_final.drop([cluster_field_name],axis=1) df_final = df_final.rename(columns={"index":cluster_field_name}) return df_final #import the data df_data = pd.read_csv('response_data.csv') #print first 10 rows df_data.head(10)