why-not · October 2, 2025 03:23 · jose-roberto-abreu · Jul 23, 2015 · patrickongwong · Feb 27, 2016
diff --git a/gistfile1.py b/gistfile1.py
 """Given a dataframe df to filter by a series s:""" 
 df[df['col_name'].isin(s)]

 """to do the same filter on the index instead of arbitrary column"""
 df.ix[s]

 """ display only certain columns, note it is a list inside the parans """
 df[['A', 'B']]

 """drop rows with atleast one null value, pass params to modify 
 to atmost instead of atleast etc.""" 
 df.dropna()

 """deleting a column""" 
 del df['column-name'] # note that df.column-name won't work. 

 """filter by multiple conditions in a dataframe df
   parentheses!""" 
 df[(df['gender'] == 'M') & (df['cc_iso'] == 'US')]

 """filter by conditions and the condition on row labels(index)"""
 df[(df.a > 0) & (df.index.isin([0, 2, 4]))]

 """quick way to create a data frame to try things out""" 
 df = pandas.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c'])

 """creating complex filters using functions on rows: http://goo.gl/r57b1"""
 df[df.apply(lambda x: x['b'] > x['c'], axis=1)]

 """Pandas replace operation http://goo.gl/DJphs"""
 df[2].replace(4, 17, inplace=True)
 df[1][df[1] == 4] = 19

 """apply and map examples"""
 """add 1 to every element"""
 df.applymap(lambda x: x+1)

 """add 2 to row 3 and return the series"""
 df.apply(lambda x: x[3]+2,axis=0)

 """add 3 to col A and return the series"""
 df.apply(lambda x: x['a']+1,axis=1)

 """example of applying a complex external function 
 to each row of a data frame""" 
 def stripper(x):
    l = re.findall(r'[0-9]+(?:\.[0-9]+){3}', x['Text with IP adress embedded'])
    # you can take care of special 
    # cases and missing values, more than expected 
    # number of return values etc like this. 
    if l == []:
        return ''
    else: 
        return l[0]

 df.apply(stripper, axis=1)

 """groupby used like a histogram to obtain counts on sub-ranges of a variable, pretty handy""" 
 df.groupby(pd.cut(df.age, range(0, 130, 10))).size()

 """finding the distribution based on quantiles""" 
 df.groupby(pd.qcut(df.age, [0, 0.99, 1])

 """if you don't need specific bins like above, and just want to count number of each values"""
 df.age.value_counts()

 """one liner to normalize a data frame""" 
 (df - df.mean()) / (df.max() - df.min())

 """grouping and applying a group specific function to each group element, 
 I think this could be simpler, but here is my current version""" 
 quantile = [0, 0.50, 0.75, 0.90, 0.95, 0.99, 1]
 grouped = df.groupby(pd.qcut(df.age, quantile))
 frame_list = []
 for i, group in enumerate(grouped):
   (label, frame) = group
   frame['age_quantile'] = quantile[i + 1]
   frame_list.append(frame)
 self.df = pd.concat(frame_list)
	"""Given a dataframe df to filter by a series s:"""
	df[df['col_name'].isin(s)]

	"""to do the same filter on the index instead of arbitrary column"""
	df.ix[s]

	""" display only certain columns, note it is a list inside the parans """
	df[['A', 'B']]

	"""drop rows with atleast one null value, pass params to modify
	to atmost instead of atleast etc."""
	df.dropna()

	"""deleting a column"""
	del df['column-name'] # note that df.column-name won't work.

	"""filter by multiple conditions in a dataframe df
	parentheses!"""
	df[(df['gender'] == 'M') & (df['cc_iso'] == 'US')]

	"""filter by conditions and the condition on row labels(index)"""
	df[(df.a > 0) & (df.index.isin([0, 2, 4]))]

	"""quick way to create a data frame to try things out"""
	df = pandas.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c'])

	"""creating complex filters using functions on rows: http://goo.gl/r57b1"""
	df[df.apply(lambda x: x['b'] > x['c'], axis=1)]

	"""Pandas replace operation http://goo.gl/DJphs"""
	df[2].replace(4, 17, inplace=True)
	df[1][df[1] == 4] = 19

	"""apply and map examples"""
	"""add 1 to every element"""
	df.applymap(lambda x: x+1)

	"""add 2 to row 3 and return the series"""
	df.apply(lambda x: x[3]+2,axis=0)

	"""add 3 to col A and return the series"""
	df.apply(lambda x: x['a']+1,axis=1)

	"""example of applying a complex external function
	to each row of a data frame"""
	def stripper(x):
	l = re.findall(r'[0-9]+(?:\.[0-9]+){3}', x['Text with IP adress embedded'])
	# you can take care of special
	# cases and missing values, more than expected
	# number of return values etc like this.
	if l == []:
	return ''
	else:
	return l[0]

	df.apply(stripper, axis=1)

	"""groupby used like a histogram to obtain counts on sub-ranges of a variable, pretty handy"""
	df.groupby(pd.cut(df.age, range(0, 130, 10))).size()

	"""finding the distribution based on quantiles"""
	df.groupby(pd.qcut(df.age, [0, 0.99, 1])

	"""if you don't need specific bins like above, and just want to count number of each values"""
	df.age.value_counts()

	"""one liner to normalize a data frame"""
	(df - df.mean()) / (df.max() - df.min())

	"""grouping and applying a group specific function to each group element,
	I think this could be simpler, but here is my current version"""
	quantile = [0, 0.50, 0.75, 0.90, 0.95, 0.99, 1]
	grouped = df.groupby(pd.qcut(df.age, quantile))
	frame_list = []
	for i, group in enumerate(grouped):
	(label, frame) = group
	frame['age_quantile'] = quantile[i + 1]
	frame_list.append(frame)
	self.df = pd.concat(frame_list)
No results found