- 
      
- 
        Save manojpandey/f0de8df3a0707cf44c5c887c07ee357c to your computer and use it in GitHub Desktop. 
Revisions
- 
        bsweger revised this gist Oct 12, 2015 . 1 changed file with 9 additions and 4 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -37,9 +37,18 @@ for index, row in df.iterrows(): print index, row['some column'] #Next few examples show how to work with text data in Pandas. #Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html #Slice values in a DataFrame column (aka Series) df.column.str[0:2] #Lower-case everything in a DataFrame column df.column_name = df.column_name.str.lower() #Get length of data in a DataFrame column df.column_name.str.len() #Sort dataframe by multiple columns df = df.sort(['col1','col2','col3'],ascending=[1,1,0]) @@ -49,7 +58,6 @@ #Grab DataFrame rows where specific column is null/notnull newdf = df[df['column'].isnull()] #select from DataFrame using multiple keys of a hierarchical index df.xs(('index level 1 value','index level 2 value'), level=('level 1','level 2')) @@ -58,9 +66,6 @@ #loading to a db) df = df.where((pd.notnull(df)), None) #Get quick count of rows in a DataFrame len(df.index) 
- 
        bsweger revised this gist Sep 24, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -34,7 +34,7 @@ #Loop through rows in a DataFrame #(if you must) for index, row in df.iterrows(): print index, row['some column'] #Lower-case everything in a DataFrame column 
- 
        bsweger revised this gist Aug 28, 2015 . 1 changed file with 4 additions and 2 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -107,7 +107,9 @@ df.columns = df.columns.get_level_values(0) #Convert Django queryset to DataFrame qs = DjangoModelName.objects.all() q = qs.values() df = pd.DataFrame.from_records(q) #Create a DataFrame from a Python dictionary df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2']) 
- 
        bsweger revised this gist Jul 6, 2015 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -92,6 +92,10 @@ 'col6': '99' }) #Concatenate two DataFrame columns into a new, single column #(useful when dealing with composite keys, for example) df['newcol'] = df['col1'].map(str) + df['col2'].map(str) #Doing calculations with DataFrame columns that have missing values #In example below, swap in 0 for df['col1'] cells that contain null df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2'] 
- 
        bsweger revised this gist Jul 3, 2015 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -99,6 +99,9 @@ # Split delimited values in a DataFrame column into two new columns df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1))) # Collapse hierarchical column indexes df.columns = df.columns.get_level_values(0) #Convert Django queryset to DataFrame #convert queryset to dataframe qs = DjangoModelName.objects.all() 
- 
        bsweger revised this gist Dec 31, 2014 . 1 changed file with 0 additions and 4 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -32,10 +32,6 @@ #lower-case all DataFrame column names (for example) df.rename(columns=lambda x: x.split('.')[-1], inplace=True) #Loop through rows in a DataFrame #(if you must) for index, row in df: 
- 
        bsweger revised this gist Dec 19, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -34,7 +34,7 @@ #set entire column values in a way that avoids the #SettingWithCopy Warning df.loc[:, 'column_name'] = 'new value' #Loop through rows in a DataFrame #(if you must) 
- 
        bsweger revised this gist Dec 19, 2014 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -32,6 +32,10 @@ #lower-case all DataFrame column names (for example) df.rename(columns=lambda x: x.split('.')[-1], inplace=True) #set entire column values in a way that avoids the #SettingWithCopy Warning df.loc[df.index, 'column_name'] = 'new value' #Loop through rows in a DataFrame #(if you must) for index, row in df: 
- 
        bsweger revised this gist Dec 5, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,6 +12,9 @@ valuelist = ['value1', 'value2', 'value3'] df = df[~df.column.isin(value_list)] #Delete column from DataFrame del df['column'] #Select from DataFrame using criteria from multiple columns newdf = df[(df['column_one']>2004) & (df['column_two']==9)] 
- 
        bsweger revised this gist Dec 4, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -55,6 +55,9 @@ #loading to a db) df = df.where((pd.notnull(df)), None) #Slice values in a DataFrame column (aka Series) df.column.str[0:2] #Get quick count of rows in a DataFrame len(df.index) 
- 
        bsweger revised this gist Aug 20, 2014 . 1 changed file with 7 additions and 3 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,14 +22,18 @@ 'col3 old name':'col3 new name', }) #lower-case all DataFrame column names df.columns = map(str.lower, df.columns) #even more fancy DataFrame column re-naming #lower-case all DataFrame column names (for example) df.rename(columns=lambda x: x.split('.')[-1], inplace=True) #Loop through rows in a DataFrame #(if you must) for index, row in df: print index, row['some column'] #Lower-case everything in a DataFrame column df.column_name = df.column_name.str.lower() 
- 
        bsweger revised this gist Aug 19, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -69,6 +69,9 @@ for col in refunds.columns.values: refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True) #Set DataFrame column values based on other column values df['column_to_change'][(df['column1'] == some_value) & (df['column2'] == some_other_value)] = new_value #Clean up missing values in multiple DataFrame columns df = df.fillna({ 'col1': 'missing', 
- 
        bsweger revised this gist Aug 19, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -44,6 +44,9 @@ newdf = df[df['column'].isnull()] len(newdf) #select from DataFrame using multiple keys of a hierarchical index df.xs(('index level 1 value','index level 2 value'), level=('level 1','level 2')) #Change all NaNs to None (useful before #loading to a db) df = df.where((pd.notnull(df)), None) 
- 
        bsweger revised this gist Aug 13, 2014 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -34,7 +34,11 @@ df.column_name = df.column_name.str.lower() #Sort dataframe by multiple columns df = df.sort(['col1','col2','col3'],ascending=[1,1,0]) #get top n for each group of columns in a sorted dataframe #(make sure dataframe is sorted first) top5 = df.groupby(['groupingcol1', 'groupingcol2']).head(5) #Grab DataFrame rows where specific column is null/notnull newdf = df[df['column'].isnull()] 
- 
        bsweger revised this gist Aug 13, 2014 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -33,8 +33,8 @@ #Lower-case everything in a DataFrame column df.column_name = df.column_name.str.lower() #Sort dataframe by multiple columns df = df.sort_indx(by=['col1', 'col2']) #Grab DataFrame rows where specific column is null/notnull newdf = df[df['column'].isnull()] 
- 
        bsweger revised this gist Aug 13, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -33,6 +33,9 @@ #Lower-case everything in a DataFrame column df.column_name = df.column_name.str.lower() #Sort multi-index dataframe df = df.sort_indx(by=['indexpart1', 'indexpart2']) #Grab DataFrame rows where specific column is null/notnull newdf = df[df['column'].isnull()] len(newdf) 
- 
        bsweger revised this gist Aug 7, 2014 . 1 changed file with 8 additions and 9 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,20 +1,20 @@ #List unique values in a DataFrame column pd.unique(df.column_name.ravel()) #Convert Series datatype to numeric, getting rid of any non-numeric values df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) #Grab DataFrame rows where column has certain values valuelist = ['value1', 'value2', 'value3'] df = df[df.column.isin(value_list)] #Grab DataFrame rows where column doesn't have certain values valuelist = ['value1', 'value2', 'value3'] df = df[~df.column.isin(value_list)] #Select from DataFrame using criteria from multiple columns newdf = df[(df['column_one']>2004) & (df['column_two']==9)] #Rename several DataFrame columns df = df.rename(columns = { 'col1 old name':'col1 new name', @@ -42,7 +42,6 @@ df = df.where((pd.notnull(df)), None) #Get quick count of rows in a DataFrame len(df.index) #Pivot data (with flexibility about what what 
- 
        bsweger revised this gist Aug 7, 2014 . 1 changed file with 32 additions and 31 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,27 +1,16 @@ #Convert Series datatype to numeric, getting rid of any values that are non-numeric df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) #Grab DataFrame rows where specified column has certain values valuelist = ['value1', 'value2', 'value3'] df = df[df.column.isin(value_list)] #List unique values in a DataFrame column pd.unique(df.column_name.ravel()) #Select from DataFrame using criteria from multiple columns newdf = df[(df['column_one']>2004) & (df['column_two']==9)] #Grab DataFrame rows where specified column doesn't have certain values valuelist = ['value1', 'value2', 'value3'] df = df[~df.column.isin(value_list)] @@ -33,13 +22,8 @@ 'col3 old name':'col3 new name', }) #Loop through rows in a DataFrame #(if you must) for index, row in df: print index, row['some column'] @@ -57,12 +41,6 @@ #loading to a db) df = df.where((pd.notnull(df)), None) #Get quick count of rows in a DataFrame #(note that using df[0].count will exclude NaNs) len(df.index) @@ -75,9 +53,32 @@ index=['col1', 'col2', 'col3'], #these stay as columns columns=['col4']) #data values in this column become their own column #change data type of DataFrame column df.column_name = df.column_name.astype(np.int64) # Get rid of non-numeric values throughout a DataFrame: for col in refunds.columns.values: refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True) #Clean up missing values in multiple DataFrame columns df = df.fillna({ 'col1': 'missing', 'col2': '99.999', 'col3': '999', 'col4': 'missing', 'col5': 'missing', 'col6': '99' }) #Doing calculations with DataFrame columns that have missing values #In example below, swap in 0 for df['col1'] cells that contain null df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2'] # Split delimited values in a DataFrame column into two new columns df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1))) #Convert Django queryset to DataFrame #convert queryset to dataframe qs = DjangoModelName.objects.all() q = qs.values() df = pd.DataFrame.from_records(q) 
- 
        bsweger revised this gist Aug 6, 2014 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -77,4 +77,7 @@ #Doing calculations with DataFrame columns that have missing values #In example below, swap in 0 for df['col1'] cells that contain null df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2'] #change data type of DataFrame column df.column_name = df.column_name.astype(np.int64) 
- 
        bsweger revised this gist Aug 6, 2014 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -73,4 +73,8 @@ pd.pivot_table( df,values='cell_value', index=['col1', 'col2', 'col3'], #these stay as columns columns=['col4']) #data values in this column become their own column #Doing calculations with DataFrame columns that have missing values #In example below, swap in 0 for df['col1'] cells that contain null df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2'] 
- 
        bsweger revised this gist Aug 6, 2014 . 1 changed file with 9 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -65,4 +65,12 @@ #Get quick count of rows in a DataFrame #(note that using df[0].count will exclude NaNs) len(df.index) #Pivot data (with flexibility about what what #becomes a column and what stays a row). #Syntax works on Pandas >= .14 pd.pivot_table( df,values='cell_value', index=['col1', 'col2', 'col3'], #these stay as columns columns=['col4']) #data values in this column become their own column 
- 
        bsweger revised this gist Aug 6, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ # Get rid of non-numeric values throughout a DataFrame: for col in refunds.columns.values: refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True) # Split delimited values in a DataFrame column into two new columns df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1))) 
- 
        bsweger revised this gist Aug 6, 2014 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -61,4 +61,8 @@ pd.unique(df.column_name.ravel()) #Select from DataFrame using criteria from multiple columns newdf = df[(df['column_one']>2004) & (df['column_two']==9)] #Get quick count of rows in a DataFrame #(note that using df[0].count will exclude NaNs) len(df.index) 
- 
        bsweger revised this gist Aug 5, 2014 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -58,4 +58,7 @@ df = df.where((pd.notnull(df)), None) #List unique values in a DataFrame column pd.unique(df.column_name.ravel()) #Select from DataFrame using criteria from multiple columns newdf = df[(df['column_one']>2004) & (df['column_two']==9)] 
- 
        bsweger revised this gist Aug 5, 2014 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -55,4 +55,7 @@ #Change all NaNs to None (useful before #loading to a db) df = df.where((pd.notnull(df)), None) #List unique values in a DataFrame column pd.unique(df.column_name.ravel()) 
- 
        bsweger revised this gist Aug 1, 2014 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -51,4 +51,8 @@ #Grab DataFrame rows where specific column is null/notnull newdf = df[df['column'].isnull()] len(newdf) #Change all NaNs to None (useful before #loading to a db) df = df.where((pd.notnull(df)), None) 
- 
        bsweger revised this gist Aug 1, 2014 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -47,4 +47,8 @@ df.columns = map(str.lower, df.columns) #Lower-case everything in a DataFrame column df.column_name = df.column_name.str.lower() #Grab DataFrame rows where specific column is null/notnull newdf = df[df['column'].isnull()] len(newdf) 
- 
        bsweger revised this gist Aug 1, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -47,4 +47,4 @@ df.columns = map(str.lower, df.columns) #Lower-case everything in a DataFrame column df.column_name = df.column_name.str.lower() 
- 
        bsweger revised this gist Aug 1, 2014 . 1 changed file with 6 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -42,3 +42,9 @@ #Loop through rows in a DataFrame for index, row in df: print index, row['some column'] #lower-case all DataFrame column names df.columns = map(str.lower, df.columns) #Lower-case everything in a DataFrame column df = df.column_name.str.lower() 
- 
        bsweger revised this gist Aug 1, 2014 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,3 +38,7 @@ qs = DjangoModelName.objects.all() q = qs.values() df = pd.DataFrame.from_records(q) #Loop through rows in a DataFrame for index, row in df: print index, row['some column'] 
NewerOlder