karamanbk · July 27, 2019 13:22 · Jul 27, 2019
diff --git a/g_series_7_intro.py b/g_series_7_intro.py
@@ -0,0 +1,38 @@
+from datetime import datetime, timedelta,date
+import pandas as pd
+%matplotlib inline
+from sklearn.metrics import classification_report,confusion_matrix
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from __future__ import division #must if you use python 2
+from sklearn.cluster import KMeans
+
+import plotly.plotly as py
+import plotly.offline as pyoff
+import plotly.graph_objs as go
+
+import sklearn
+import xgboost as xgb
+from sklearn.model_selection import KFold, cross_val_score, train_test_split
+
+#initate plotly 
+pyoff.init_notebook_mode()
+
+#function for ordering cluster numbers for given criteria
+def order_cluster(cluster_field_name, target_field_name,df,ascending):
+    new_cluster_field_name = 'new_' + cluster_field_name
+    df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
+    df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
+    df_new['index'] = df_new.index
+    df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name)
+    df_final = df_final.drop([cluster_field_name],axis=1)
+    df_final = df_final.rename(columns={"index":cluster_field_name})
+    return df_final
+
+
+ #import the data
+df_data = pd.read_csv('response_data.csv')
+
+#print first 10 rows
+df_data.head(10)