Created
July 27, 2019 13:22
-
-
Save karamanbk/0bc29fb112cbe3cad5efa878719539d8 to your computer and use it in GitHub Desktop.
Revisions
-
karamanbk created this gist
Jul 27, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,38 @@ from datetime import datetime, timedelta,date import pandas as pd %matplotlib inline from sklearn.metrics import classification_report,confusion_matrix import matplotlib.pyplot as plt import numpy as np import seaborn as sns from __future__ import division #must if you use python 2 from sklearn.cluster import KMeans import plotly.plotly as py import plotly.offline as pyoff import plotly.graph_objs as go import sklearn import xgboost as xgb from sklearn.model_selection import KFold, cross_val_score, train_test_split #initate plotly pyoff.init_notebook_mode() #function for ordering cluster numbers for given criteria def order_cluster(cluster_field_name, target_field_name,df,ascending): new_cluster_field_name = 'new_' + cluster_field_name df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index() df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True) df_new['index'] = df_new.index df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name) df_final = df_final.drop([cluster_field_name],axis=1) df_final = df_final.rename(columns={"index":cluster_field_name}) return df_final #import the data df_data = pd.read_csv('response_data.csv') #print first 10 rows df_data.head(10)