Last active
March 8, 2017 04:37
-
-
Save paiva/083387046ebfec858316 to your computer and use it in GitHub Desktop.
This is a description test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| raw_df = pd.read_csv('pages.csv') | |
| def get_global_data(self): | |
| """Extracts global metrics from Google Webmaster Tools | |
| Args: | |
| Month: Month to evaluate | |
| Returns: | |
| Dataframe with total number of Impressions,Clicks,CTR, Avg. Position, and # of landing pages | |
| """ | |
| # First dataframe for Impressions, Clicks, and CTR | |
| df = pd.DataFrame({ | |
| 'Global #Impressions': raw_df['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))), | |
| 'Global #Clicks': raw_df['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))), | |
| 'Global Avg. Position': raw_df['Avg. position'].astype(float), | |
| 'Global #Landing Pages': raw_df['Page'].map(self.extract_url_form), | |
| 'Global #Queries': raw_df_q['Query'] | |
| }).dropna() | |
| df['Global #Impressions'] = df['Global #Impressions'].sum() | |
| df['Global #Clicks'] = df['Global #Clicks'].sum() | |
| df['Global Avg. Position'] = df['Global Avg. Position'].mean() | |
| df['Global #Landing Pages'] = df['Global #Landing Pages'].count() | |
| df['Global #Queries'] = df['Global #Queries'].count() | |
| # Modifiy the value of 'Global CTR' to be #Clicks/#Impressions | |
| df['Global CTR(%)'] = ((df['Global #Clicks'].astype(float)/df['Global #Impressions'].astype(float))*100).astype(int) | |
| return df.head(n=1) | |
| """ HEAT MAP of Impressions or Clicks""" | |
| def heat_map(self,key): | |
| """Provides a heat map of the performance of each page language per country | |
| Args: | |
| month: month to analyze | |
| key: either 'Impressions' or 'Clicks' | |
| Returns: | |
| Dataframe with the sum of clicks and impressions per average position per Language | |
| """ | |
| # Create a list of languages | |
| languages = ['en','es','fr'] | |
| # Creates an empty data frame | |
| df_array = [] | |
| # For every language, create a dataframe, and append it to the empty data frame | |
| for lang in languages: | |
| raw_df_lang = raw_df[raw_df['Page'].str.contains('/'+lang+'/')] | |
| #Dataframe for each language | |
| data = pd.DataFrame({ | |
| 'url_form': raw_df_lang['Page'].map(lambda url: extractor(self.extract_url_form(url))), | |
| 'Language': raw_df_lang['Page'].map(self.extract_lang), | |
| 'Impressions': raw_df_lang['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))), | |
| 'Clicks': raw_df_lang['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))), | |
| }).dropna() | |
| # Group by url_form and language | |
| data = data.groupby(['url_form','Language']).sum() | |
| # Add language data frame to main data frame | |
| df_array.append(data) | |
| # Concatenate array and reset index | |
| df = pd.concat(df_array) | |
| df = df.reset_index() | |
| # Return dataframe pivoted | |
| return pd.pivot_table(df, cols=['Language'], rows=['url_form'], values=key, fill_value=0, aggfunc='sum') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment