"""Given a dataframe df to filter by a series s:""" df[df['col_name'].isin(s)] """to do the same filter on the index instead of arbitrary column""" df.ix[s] """ display only certain columns, note it is a list inside the parans """ df[['A', 'B']] """drop rows with atleast one null value, pass params to modify to atmost instead of atleast etc.""" df.dropna() """deleting a column""" del df['column-name'] # note that df.column-name won't work. """filter by multiple conditions in a dataframe df parentheses!""" df[(df['gender'] == 'M') & (df['cc_iso'] == 'US')] """filter by conditions and the condition on row labels(index)""" df[(df.a > 0) & (df.index.isin([0, 2, 4]))] """quick way to create a data frame to try things out""" df = pandas.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c']) """creating complex filters using functions on rows: http://goo.gl/r57b1""" df[df.apply(lambda x: x['b'] > x['c'], axis=1)] """Pandas replace operation http://goo.gl/DJphs""" df[2].replace(4, 17, inplace=True) df[1][df[1] == 4] = 19 """apply and map examples""" """add 1 to every element""" df.applymap(lambda x: x+1) """add 2 to row 3 and return the series""" df.apply(lambda x: x[3]+2,axis=0) """add 3 to col A and return the series""" df.apply(lambda x: x['a']+1,axis=1) """example of applying a complex external function to each row of a data frame""" def stripper(x): l = re.findall(r'[0-9]+(?:\.[0-9]+){3}', x['Text with IP adress embedded']) # you can take care of special # cases and missing values, more than expected # number of return values etc like this. if l == []: return '' else: return l[0] df.apply(stripper, axis=1) """groupby used like a histogram to obtain counts on sub-ranges of a variable, pretty handy""" df.groupby(pd.cut(df.age, range(0, 130, 10))).size() """finding the distribution based on quantiles""" df.groupby(pd.qcut(df.age, [0, 0.99, 1]) """if you don't need specific bins like above, and just want to count number of each values""" df.age.value_counts() """one liner to normalize a data frame""" (df - df.mean()) / (df.max() - df.min()) """grouping and applying a group specific function to each group element, I think this could be simpler, but here is my current version""" quantile = [0, 0.50, 0.75, 0.90, 0.95, 0.99, 1] grouped = df.groupby(pd.qcut(df.age, quantile)) frame_list = [] for i, group in enumerate(grouped): (label, frame) = group frame['age_quantile'] = quantile[i + 1] frame_list.append(frame) self.df = pd.concat(frame_list)