Skip to content

Instantly share code, notes, and snippets.

@paiva
Last active March 8, 2017 04:37
Show Gist options
  • Save paiva/083387046ebfec858316 to your computer and use it in GitHub Desktop.
Save paiva/083387046ebfec858316 to your computer and use it in GitHub Desktop.
This is a description test
import pandas as pd
raw_df = pd.read_csv('pages.csv')
def get_global_data(self):
"""Extracts global metrics from Google Webmaster Tools
Args:
Month: Month to evaluate
Returns:
Dataframe with total number of Impressions,Clicks,CTR, Avg. Position, and # of landing pages
"""
# First dataframe for Impressions, Clicks, and CTR
df = pd.DataFrame({
'Global #Impressions': raw_df['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
'Global #Clicks': raw_df['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
'Global Avg. Position': raw_df['Avg. position'].astype(float),
'Global #Landing Pages': raw_df['Page'].map(self.extract_url_form),
'Global #Queries': raw_df_q['Query']
}).dropna()
df['Global #Impressions'] = df['Global #Impressions'].sum()
df['Global #Clicks'] = df['Global #Clicks'].sum()
df['Global Avg. Position'] = df['Global Avg. Position'].mean()
df['Global #Landing Pages'] = df['Global #Landing Pages'].count()
df['Global #Queries'] = df['Global #Queries'].count()
# Modifiy the value of 'Global CTR' to be #Clicks/#Impressions
df['Global CTR(%)'] = ((df['Global #Clicks'].astype(float)/df['Global #Impressions'].astype(float))*100).astype(int)
return df.head(n=1)
""" HEAT MAP of Impressions or Clicks"""
def heat_map(self,key):
"""Provides a heat map of the performance of each page language per country
Args:
month: month to analyze
key: either 'Impressions' or 'Clicks'
Returns:
Dataframe with the sum of clicks and impressions per average position per Language
"""
# Create a list of languages
languages = ['en','es','fr']
# Creates an empty data frame
df_array = []
# For every language, create a dataframe, and append it to the empty data frame
for lang in languages:
raw_df_lang = raw_df[raw_df['Page'].str.contains('/'+lang+'/')]
#Dataframe for each language
data = pd.DataFrame({
'url_form': raw_df_lang['Page'].map(lambda url: extractor(self.extract_url_form(url))),
'Language': raw_df_lang['Page'].map(self.extract_lang),
'Impressions': raw_df_lang['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
'Clicks': raw_df_lang['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
}).dropna()
# Group by url_form and language
data = data.groupby(['url_form','Language']).sum()
# Add language data frame to main data frame
df_array.append(data)
# Concatenate array and reset index
df = pd.concat(df_array)
df = df.reset_index()
# Return dataframe pivoted
return pd.pivot_table(df, cols=['Language'], rows=['url_form'], values=key, fill_value=0, aggfunc='sum')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment