require 'digest' # = simple_google_analytics.rb # # Chris Le # # This module is an wrapper to export data from Google Analytics as a flattened # hash suitable for database storage. It does not require any other gems other # than 'oauth'. I used this simply to get metrics and directly store them in # a database. # # This may be a better choice over Garb or Gattica if you are already familiar # with Google Analytics' API parameters because there's no DSL or sugar # involved. It does have a few pivoting methods that can come in handy. # # The returned hash also includes a SHA1 signature made up from of the date # and the profile ID number. I use it as a unique primary key so I can call # the Google API for lots of different stuff and not worry about finding the # right row when storing it. # # Dimensions and Metrics reference # https://developers.google.com/analytics/devguides/reporting/core/dimsmets # # == Example # # API_CONFIG[:google_oauth_app_key] = # API_CONFIG[:google_oauth_app_secret] = # API_CONFIG[:google_oauth_user_token] = # API_CONFIG[:google_oauth_user_secret] = # # # New instance of Google Analytics # ga = ExternalApi::GoogleAnalytics.new # # # Return a hash of profiles that are available to you # profiles = ga.profiles # # # Return a hash of web_properties that are available to you # web_properties = ga.web_properties # # # Return a hash of profiles that are available to you # profiles = ga.profiles # # ####################### # # report = ExternalApi::GoogleAnalytics.new({ # :profile_id => 12345678, # :start_date => '2012-12-01', # :end_date => '2012-12-30' # }) # # # Get organic traffic for the month of December # report.organic_traffic # module ExternalApi class GoogleAnalytics BASE = 'https://www.googleapis.com/analytics/v3' # Used to filter everything by organic traffic only ORGANIC_FILTER = 'ga:medium==organic' # When getting traffic metric, pull in these metrics DEFAULT_METRICS = ['ga:visits', 'ga:bounces', 'ga:visitBounceRate', 'ga:timeOnSite', 'ga:transactions', 'ga:transactionRevenue'].join(",") # Initialize the class. If you specify no paramters, you can use the # management API calls like .accounts, .web_properties. If you specify # parameters, you can use the metrics methods. # # Yes, it's kind of sloppy but it was for backwards compatibility :) # # === Examples # # ga = ExternalApi::GoogleAnalytics.new # # ## or... # # ga = ExternalApi::GoogleAnalytics.new({ # :profile_id => 23987717, # :start_date => '2012-12-01', # :end_date => '2012-12-30' # }) # def initialize(params = nil) if params.present? if (params[:start_date].class != String) && (params[:end_date].class != String) raise 'Start and ending date needs to be a string like YYYY-MM-DD' end if (params[:profile_id].class != Fixnum) raise 'Profile ID must be a fixnum' end @profile_id = "ga:#{params[:profile_id]}" @start_date = params[:start_date] @end_date = params[:end_date] end end # Return all the accounts from Google Analytics # # === Example # # my_accounts = ga.accounts # def accounts fetch_items("#{BASE}/management/accounts/") end # Return all the web properties from Google Analytics # # === Example # # web_properties = ga.web_properties # def web_properties fetch_items("#{BASE}/management/accounts/~all/webproperties/") end # Return all the profiles from Google Analytics # # === Example # # profiles = ga.profiles # def profiles fetch_items("#{BASE}/management/accounts/~all/webproperties/~all/profiles") end # Return all the goals from Google Analytics # # === Example # # goals = ga.goals # def goals fetch_items("#{BASE}/management/accounts/~all/webproperties/~all/profiles/~all/goals") end # Returns organic traffic between two dates # # === Example # # report.organic_traffic # def organic_traffic fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => DEFAULT_METRICS, 'dimensions' => 'ga:date', 'filters' => ORGANIC_FILTER }).to_h end # Returns a hash of organic traffic coming from mobile operating systems # # === Example # # report.organic_mobile_traffic # def organic_mobile_traffic data = fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => 'ga:visits', 'dimensions' => 'ga:date,ga:isMobile,ga:operatingSystem', 'filters' => "ga:isMobile==Yes;#{ORGANIC_FILTER}" }) data.pivot_by_keymap({ :key => 'operatingSystem', :value => 'visits' }).to_h end # Returns a hash of organic goal metrics # # === Example # # # Get metrics for goal #1 # report.organic_goal_metrics(1) # def organic_goal_metrics(number) metrics = [ "ga:goal#{number}Starts", "ga:goal#{number}Completions", "ga:goal#{number}Value", "ga:goal#{number}ConversionRate", "ga:goal#{number}Abandons", "ga:goal#{number}AbandonRate" ].join(",") fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => metrics, 'dimensions' => 'ga:date', 'filters' => ORGANIC_FILTER }).to_h end # Returns a hash of organic goals by medium # # === Example # # # Get goal metrics from referral traffic # report.organic_goal_by_medium('referral') # def goal_all_by_medium(medium) medium = '(none)' if medium == 'none' metrics = [ "ga:goalStartsAll", "ga:goalCompletionsAll", "ga:goalValueAll", "ga:goalConversionRateAll", "ga:goalAbandonsAll", "ga:goalAbandonRateAll" ].join(",") data = fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => metrics, 'dimensions' => 'ga:date', 'filters' => "ga:medium==#{medium};#{ORGANIC_FILTER}" }).to_h end # Returns organic visits grouped by search engines (google, yahoo, bing, # organic) # # === Example # # report.organic_visits_from_search_engines # def organic_visits_from_search_engines data = fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => 'ga:visits', 'dimensions' => 'ga:date,ga:source', 'filters' => "ga:source==google,ga:source==yahoo,ga:source==bing;#{ORGANIC_FILTER}" }) data.pivot_by_keymap({ :key => 'source', :value => 'visits' }).to_h end # Returns average site speeds # # === Example # # report.site_speed # def site_speed fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => ['ga:avgPageLoadTime', 'ga:avgDomainLookupTime', 'ga:avgPageDownloadTime', 'ga:avgRedirectionTime', 'ga:avgServerConnectionTime', 'ga:avgServerResponseTime'].join(','), 'dimensions' => 'ga:date' }).to_h end # Returns the number of keywords that are driving traffic # # === Example # # report.keywords_driving_organic_traffic # def keywords_driving_organic_traffic data = fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => 'ga:visits', 'dimensions' => 'ga:date,ga:keyword', 'filters' => ORGANIC_FILTER, 'max-results' => '10000' }) data.count_of_field(:keyword, :keywords_driving_traffic) end # Returns the number of organic landing pages that drove traffic # # === Example # # report.organic_landing_pages # def organic_landing_pages data = fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => 'ga:visits', 'dimensions' => 'ga:date,ga:landingPagePath', 'filters' => ORGANIC_FILTER, 'max-results' => '10000' }) data.count_of_field(:landingPagePath, :organic_landing_pages) end # Top 10 organic landing pages by visit. If the metric has a "-" at the # beginning, then we will sort in descending order. # # === Example # # # Visits in ascending order # report.top_10_organic_landing_pages_by('visits') # # # Visits in descending order # report.top_10_organic_landing_pages_by('-visits') # def top_10_organic_landing_pages_by(metric) if metric[0] == '-' order = '-' metric = metric[1..-1] else order = '' end data = fetch_query({ 'ids' => @profile_id, 'start-date' => @start_date, 'end-date' => @end_date, 'metrics' => "ga:#{metric}", 'dimensions' => 'ga:date,ga:landingPagePath', 'filters' => ORGANIC_FILTER, 'max-results' => '10000', 'sort' => "#{order}ga:#{metric}" }) data.collect_serial_by_field('landingPagePath', metric) end ########################################################################## # Fetch URL. Returns the raw body. Raises an exception if we get an 403 def fetch(url) puts "Fetching #{url}" response = token.get(url) if response.code_type == Net::HTTPForbidden raise GaAccessForbidden, "403 Forbidden" end response.body end # Fetch from a query string and returns a GaResponseParser instance def fetch_query(query) url = "#{BASE}/data/ga?#{query.to_query}" response = fetch(url) GaResponseParser.new(response) end # Recursively fetch items from a URL while next links exists. def fetch_items(url, items = []) response = JSON.parse(fetch(url)) items += response['items'] if response.keys.include?('nextLink') items += fetch_items(response['nextLink'], items) end items end # Returns an oauth token def token consumer = OAuth::Consumer.new(API_CONFIG[:google_oauth_app_key], API_CONFIG[:google_oauth_app_secret], { site: 'https://www.googleapis.com', request_token_path: '/accounts/OAuthGetRequestToken', access_token_path: '/accounts/OAuthGetAccessToken', authorize_path: '/accounts/OAuthAuthorizeToken' } ) OAuth::AccessToken.new consumer, API_CONFIG[:google_oauth_user_token], API_CONFIG[:google_oauth_user_secret] end # Class for parsing the Google Analytics response # # === Example # # url = "https://www.googleapis.com/analytics/v3/ ...." # response = token.get(url) # # data = GaResponseParser.new(response.body) # data.to_h # class GaResponseParser def initialize(response, pivot = nil) @json = JSON.parse(response) @hash = transform(response_to_hash(@json)) end # Returns the GA response as a hash def to_h @hash end # Count of a specific field # # === Example # # ga = ExternalApi::GoogleAnalytics.new({ # :profile_id => 123456789, # :start_date => '2012-12-15', # :end_date => '2012-12-30' # }) # # => [{:date => "2012-12-15", # :organic_landing_pages => 56, # :signature => "a07a481a11a9397939362af003966c9abf6eba03", # :profile_id => "123456789"}, # ....] # def count_of_field(field, hash_field_name) field_counts = [] dates = @hash.collect { |d| d['date'] }.uniq dates.each do |date| matches = @hash.find_all { |d| d['date'] == date } field_counts << { :date => date, hash_field_name.to_sym => matches.count, :signature => matches.first[:signature], :profile_id => matches.first[:profile_id] } end field_counts end # Collects all of the metrics and serializes them as a JSON encoded # string # # === Example # # ga = ExternalApi::GoogleAnalytics.new({ # :profile_id => 123456789, # :start_date => '2012-12-15', # :end_date => '2012-12-30' # }) # ga.top_10_organic_landing_pages_by('visits') # # # => [{:date=>"2012-12-17", # :organic_landing_pages_by_visits=> # "[{\"path\":\"/\",\"visits\":\"79\"}, .... # :signature=>"0f2ba619c9db80a382313c09c5951a9bf8132b08", # :profile_id=>"123456789"}, # {:date=>"2012-12-19", # :organic_landing_pages_by_visits=> # "[{\"path\":\"/\",\"visits\":\"71\"}, .... # :signature=>"c70020af755384c0444bafecddc81feae9f0fdc5", # :profile_id=>"123456789"}, # ....] # def collect_serial_by_field(field, metric) retval = [] dates = @hash.collect { |d| d['date'] }.uniq dates.each do |date| matches = @hash.find_all { |d| d['date'] == date } collected_values = matches.collect { |m| { field.underscore.to_sym => m[field.underscore], metric.underscore.to_sym => m[metric.underscore] } } collected_values = collected_values.to_json retval << { :date => date, metric.underscore.to_sym => collected_values, :signature => matches.first[:signature], :profile_id => matches.first[:profile_id] } end retval end # Pivots based on a keymap # # === Example # # data = GaResponseParser.new(response.body) # data.pivot_by_keymap({ # :key => 'operatingSystem', # :value => 'visits' # }).to_h # def pivot_by_keymap(keymap) temp = @hash.collect do |hash| key_value = hash[keymap[:key].underscore] key = "#{keymap[:value]}_#{keymap[:key]}_#{key_value.downcase}".underscore merge_value = { key => hash[keymap[:value]] } hash.merge!(merge_value) hash.delete(keymap[:key].underscore) hash.delete(keymap[:value]) hash end @hash = temp.group_by { |h| h['date'] }.map { |k, v| v.inject(:merge) } self end # Returns an array of hashes from the response. Returns empty array # when nothing gets returned. def response_to_hash(response) if response['rows'].present? keys = response['columnHeaders'].collect { |header| header['name'].gsub(/ga\:/, '').underscore } response['rows'].collect { |row| Hash[keys.zip(row)] } else return [] end end # Transforms fields in the hash: # - Changes the date from YYYYMMDD to YYYY-MM-DD # - Adds a SHA1 hash of the date and profile id def transform(hashes) hashes.each do |row| # Change the date to YYYY-MM-DD date = Date.strptime(row['date'], '%Y%m%d').strftime row['date'] = date # Add a signature (SHA1) key = "#{date};#{profile_id}" signature = Digest::SHA1.hexdigest(key) row.merge!({ :signature => signature }) row.merge!({ :profile_id => profile_id }) end hashes end # Returns the profile ID in the response def profile_id @json['profileInfo']['profileId'] end end end end