-
-
Save manojpandey/f0de8df3a0707cf44c5c887c07ee357c to your computer and use it in GitHub Desktop.
Useful Pandas Snippets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Get rid of non-numeric values throughout a DataFrame: | |
| for col in refunds.columns.values: | |
| refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True) | |
| # Split delimited values in a DataFrame column into two new columns | |
| df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1))) | |
| #Convert Series datatype to numeric, getting rid of any values that are non-numeric | |
| df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
| #Clean up missing values in multiple DataFrame columns | |
| df = df.fillna({ | |
| 'fyq': 'missing', | |
| 'cfda_program_num': '99.999', | |
| 'recipient_county_code': '999', | |
| 'recipient_country_code': 'missing', | |
| 'uri': 'missing', | |
| 'recipient_state_code': '99' | |
| }) | |
| #Grab DataFrame rows where specified column has certain values | |
| valuelist = ['value1', 'value2', 'value3'] | |
| df = df[df.column.isin(value_list)] | |
| #Grab DataFrame rows where specified column doesn't have certain values | |
| valuelist = ['value1', 'value2', 'value3'] | |
| df = df[~df.column.isin(value_list)] | |
| #Rename several DataFrame columns | |
| df = df.rename(columns = { | |
| 'col1 old name':'col1 new name', | |
| 'col2 old name':'col2 new name', | |
| 'col3 old name':'col3 new name', | |
| }) | |
| #Convert Django queryset to DataFrame | |
| #convert queryset to dataframe | |
| qs = DjangoModelName.objects.all() | |
| q = qs.values() | |
| df = pd.DataFrame.from_records(q) | |
| #Loop through rows in a DataFrame | |
| for index, row in df: | |
| print index, row['some column'] | |
| #lower-case all DataFrame column names | |
| df.columns = map(str.lower, df.columns) | |
| #Lower-case everything in a DataFrame column | |
| df.column_name = df.column_name.str.lower() | |
| #Grab DataFrame rows where specific column is null/notnull | |
| newdf = df[df['column'].isnull()] | |
| len(newdf) | |
| #Change all NaNs to None (useful before | |
| #loading to a db) | |
| df = df.where((pd.notnull(df)), None) | |
| #List unique values in a DataFrame column | |
| pd.unique(df.column_name.ravel()) | |
| #Select from DataFrame using criteria from multiple columns | |
| newdf = df[(df['column_one']>2004) & (df['column_two']==9)] | |
| #Get quick count of rows in a DataFrame | |
| #(note that using df[0].count will exclude NaNs) | |
| len(df.index) | |
| #Pivot data (with flexibility about what what | |
| #becomes a column and what stays a row). | |
| #Syntax works on Pandas >= .14 | |
| pd.pivot_table( | |
| df,values='cell_value', | |
| index=['col1', 'col2', 'col3'], #these stay as columns | |
| columns=['col4']) #data values in this column become their own column |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment