paiva · March 8, 2017 04:37
diff --git a/df.py b/df.py
 import pandas as pd

 raw_df = pd.read_csv('pages.csv')

 def get_global_data(self):
    """Extracts global metrics from Google Webmaster Tools

        Args: 
            Month: Month to evaluate

        Returns: 
            Dataframe with total number of Impressions,Clicks,CTR, Avg. Position, and # of landing pages
    """

    # First dataframe for Impressions, Clicks, and CTR
    df = pd.DataFrame({
                    'Global #Impressions': raw_df['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
                    'Global #Clicks': raw_df['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
                    'Global Avg. Position': raw_df['Avg. position'].astype(float),
                    'Global #Landing Pages': raw_df['Page'].map(self.extract_url_form),
                    'Global #Queries': raw_df_q['Query']
                    }).dropna()

    df['Global #Impressions'] = df['Global #Impressions'].sum()
    df['Global #Clicks'] = df['Global #Clicks'].sum()
    df['Global Avg. Position'] = df['Global Avg. Position'].mean()
    df['Global #Landing Pages'] = df['Global #Landing Pages'].count()
    df['Global #Queries'] = df['Global #Queries'].count()

    # Modifiy the value of 'Global CTR' to be #Clicks/#Impressions
    df['Global CTR(%)'] = ((df['Global #Clicks'].astype(float)/df['Global #Impressions'].astype(float))*100).astype(int)

    return df.head(n=1)


 """ HEAT MAP of Impressions or Clicks""" 
 def heat_map(self,key):
    """Provides a heat map of the performance of each page language per country

    Args: 
      month: month to analyze
      key: either 'Impressions' or 'Clicks'

    Returns: 
      Dataframe with the sum of clicks and impressions per average position per Language
    """

    # Create a list of languages
    languages = ['en','es','fr']

    # Creates an empty data frame
    df_array = []

    # For every language, create a dataframe, and append it to the empty data frame
    for lang in languages:

        raw_df_lang = raw_df[raw_df['Page'].str.contains('/'+lang+'/')]

        #Dataframe for each language 
        data = pd.DataFrame({
                          'url_form': raw_df_lang['Page'].map(lambda url: extractor(self.extract_url_form(url))),
                          'Language': raw_df_lang['Page'].map(self.extract_lang),
                          'Impressions': raw_df_lang['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
                      'Clicks': raw_df_lang['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
                      }).dropna()

        # Group by url_form and language
        data = data.groupby(['url_form','Language']).sum()

        # Add language data frame to main data frame
        df_array.append(data)

    # Concatenate array and reset index
    df = pd.concat(df_array)   
    df = df.reset_index()

    # Return dataframe pivoted
    return pd.pivot_table(df, cols=['Language'], rows=['url_form'], values=key, fill_value=0, aggfunc='sum')
	import pandas as pd

	raw_df = pd.read_csv('pages.csv')

	def get_global_data(self):
	"""Extracts global metrics from Google Webmaster Tools

	Args:
	Month: Month to evaluate

	Returns:
	Dataframe with total number of Impressions,Clicks,CTR, Avg. Position, and # of landing pages
	"""

	# First dataframe for Impressions, Clicks, and CTR
	df = pd.DataFrame({
	'Global #Impressions': raw_df['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
	'Global #Clicks': raw_df['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
	'Global Avg. Position': raw_df['Avg. position'].astype(float),
	'Global #Landing Pages': raw_df['Page'].map(self.extract_url_form),
	'Global #Queries': raw_df_q['Query']
	}).dropna()

	df['Global #Impressions'] = df['Global #Impressions'].sum()
	df['Global #Clicks'] = df['Global #Clicks'].sum()
	df['Global Avg. Position'] = df['Global Avg. Position'].mean()
	df['Global #Landing Pages'] = df['Global #Landing Pages'].count()
	df['Global #Queries'] = df['Global #Queries'].count()

	# Modifiy the value of 'Global CTR' to be #Clicks/#Impressions
	df['Global CTR(%)'] = ((df['Global #Clicks'].astype(float)/df['Global #Impressions'].astype(float))*100).astype(int)

	return df.head(n=1)


	""" HEAT MAP of Impressions or Clicks"""
	def heat_map(self,key):
	"""Provides a heat map of the performance of each page language per country

	Args:
	month: month to analyze
	key: either 'Impressions' or 'Clicks'

	Returns:
	Dataframe with the sum of clicks and impressions per average position per Language
	"""

	# Create a list of languages
	languages = ['en','es','fr']

	# Creates an empty data frame
	df_array = []

	# For every language, create a dataframe, and append it to the empty data frame
	for lang in languages:

	raw_df_lang = raw_df[raw_df['Page'].str.contains('/'+lang+'/')]

	#Dataframe for each language
	data = pd.DataFrame({
	'url_form': raw_df_lang['Page'].map(lambda url: extractor(self.extract_url_form(url))),
	'Language': raw_df_lang['Page'].map(self.extract_lang),
	'Impressions': raw_df_lang['Impressions'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
	'Clicks': raw_df_lang['Clicks'].map(lambda x: int(x) if not ',' in str(x) else int(x.replace(',',''))),
	}).dropna()

	# Group by url_form and language
	data = data.groupby(['url_form','Language']).sum()

	# Add language data frame to main data frame
	df_array.append(data)

	# Concatenate array and reset index
	df = pd.concat(df_array)
	df = df.reset_index()

	# Return dataframe pivoted
	return pd.pivot_table(df, cols=['Language'], rows=['url_form'], values=key, fill_value=0, aggfunc='sum')