Skip to content

Instantly share code, notes, and snippets.

@karamanbk
Created July 27, 2019 13:22
Show Gist options
  • Save karamanbk/0bc29fb112cbe3cad5efa878719539d8 to your computer and use it in GitHub Desktop.
Save karamanbk/0bc29fb112cbe3cad5efa878719539d8 to your computer and use it in GitHub Desktop.

Revisions

  1. karamanbk created this gist Jul 27, 2019.
    38 changes: 38 additions & 0 deletions g_series_7_intro.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,38 @@
    from datetime import datetime, timedelta,date
    import pandas as pd
    %matplotlib inline
    from sklearn.metrics import classification_report,confusion_matrix
    import matplotlib.pyplot as plt
    import numpy as np
    import seaborn as sns
    from __future__ import division #must if you use python 2
    from sklearn.cluster import KMeans

    import plotly.plotly as py
    import plotly.offline as pyoff
    import plotly.graph_objs as go

    import sklearn
    import xgboost as xgb
    from sklearn.model_selection import KFold, cross_val_score, train_test_split

    #initate plotly
    pyoff.init_notebook_mode()

    #function for ordering cluster numbers for given criteria
    def order_cluster(cluster_field_name, target_field_name,df,ascending):
    new_cluster_field_name = 'new_' + cluster_field_name
    df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
    df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
    df_new['index'] = df_new.index
    df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name)
    df_final = df_final.drop([cluster_field_name],axis=1)
    df_final = df_final.rename(columns={"index":cluster_field_name})
    return df_final


    #import the data
    df_data = pd.read_csv('response_data.csv')

    #print first 10 rows
    df_data.head(10)