Skip to content

Instantly share code, notes, and snippets.

@jpigla
Created May 19, 2022 12:55
Show Gist options
  • Save jpigla/eb08b51ed0b2e8dcf2ea9457f1dee876 to your computer and use it in GitHub Desktop.
Save jpigla/eb08b51ed0b2e8dcf2ea9457f1dee876 to your computer and use it in GitHub Desktop.

Revisions

  1. jpigla created this gist May 19, 2022.
    126 changes: 126 additions & 0 deletions get_crux_data.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,126 @@
    # --- Open the file containing the list of URLs ------------------------------------------------------------------------

    urls_file = open('urls.txt', 'r')
    url_list_file = urls_file.readlines()

    # url_list_file = pd.read_csv('urls.csv', header=None).drop_duplicates()[0].to_list()

    # --- Set parameters and variables --------------------------------------------------------------------------------------

    # Get date of today
    date_today = date.today().strftime("%d.%m.%Y")

    # Create empty dataframe for the results
    df_crux = pd.DataFrame(None)

    # Set request parameters
    lst_formFactor = ['PHONE', 'DESKTOP'] # 'PHONE' or 'TABLET' or 'DESKTOP'
    level = 'url' # 'url' or 'origin'
    counter = 0

    # Create dict for the results (temporarily used)
    crux_data_dict = {}
    crux_data_dict['date'] = date.today()
    crux_data_dict['level'] = level

    # --- Loop over the URLs ------------------------------------------------------------------------------------------------

    for url in url_list_file:

    # Strip URL
    url = url.strip()
    # Get domain name
    url_netloc = urlparse(url).netloc

    for formFactor in lst_formFactor:

    # Set request parameters
    api_url = f'https://chromeuxreport.googleapis.com/v1/records:queryRecord?key={API_KEY}'
    data_dic = { 'formFactor': formFactor, level: url }
    header_dic = { 'Content-Type': 'application/json' }

    result = requests.post(api_url, json = data_dic, headers = header_dic)
    result = result.json()

    # print(result)

    try:

    # Get url
    try: crux_data_dict['url'] = result['record']['key']['url']
    except KeyError: crux_data_dict['url'] = url

    # Set domain name
    crux_data_dict['url_netloc'] = url_netloc

    # Get form factor
    try: crux_data_dict['device'] = result['record']['key']['formFactor']
    except KeyError: crux_data_dict['device'] = formFactor

    # Get status of the request
    try: crux_data_dict['status'] = result['error']['status']
    except KeyError: crux_data_dict['status'] = 'Success'


    # Get CLS data
    try: crux_data_dict['cumulative_layout_shift'] = result['record']['metrics']['cumulative_layout_shift']['percentiles']['p75']
    except KeyError: crux_data_dict['cumulative_layout_shift'] = np.nan
    try: crux_data_dict['cumulative_layout_shift_good'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][0]['density']
    except KeyError: crux_data_dict['cumulative_layout_shift_good'] = np.nan
    try: crux_data_dict['cumulative_layout_shift_ni'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][1]['density']
    except KeyError: crux_data_dict['cumulative_layout_shift_ni'] = np.nan
    try: crux_data_dict['cumulative_layout_shift_bad'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][2]['density']
    except KeyError: crux_data_dict['cumulative_layout_shift_bad'] = np.nan

    # Get FCP data
    try: crux_data_dict['first_contentful_paint'] = result['record']['metrics']['first_contentful_paint']['percentiles']['p75']
    except KeyError: crux_data_dict['first_contentful_paint'] = np.nan
    try: crux_data_dict['first_contentful_paint_good'] = result['record']['metrics']['first_contentful_paint']['histogram'][0]['density']
    except KeyError: crux_data_dict['first_contentful_paint_good'] = np.nan
    try: crux_data_dict['first_contentful_paint_ni'] = result['record']['metrics']['first_contentful_paint']['histogram'][1]['density']
    except KeyError: crux_data_dict['first_contentful_paint_ni'] = np.nan
    try: crux_data_dict['first_contentful_paint_bad'] = result['record']['metrics']['first_contentful_paint']['histogram'][2]['density']
    except KeyError: crux_data_dict['first_contentful_paint_bad'] = np.nan

    # Get FID data
    try: crux_data_dict['first_input_delay'] = result['record']['metrics']['first_input_delay']['percentiles']['p75']
    except KeyError: crux_data_dict['first_input_delay'] = np.nan
    try: crux_data_dict['first_input_delay_good'] = result['record']['metrics']['first_input_delay']['histogram'][0]['density']
    except KeyError: crux_data_dict['first_input_delay_good'] = np.nan
    try: crux_data_dict['first_input_delay_ni'] = result['record']['metrics']['first_input_delay']['histogram'][1]['density']
    except KeyError: crux_data_dict['first_input_delay_ni'] = np.nan
    try: crux_data_dict['first_input_delay_bad'] = result['record']['metrics']['first_input_delay']['histogram'][2]['density']
    except KeyError: crux_data_dict['first_input_delay_bad'] = np.nan

    # Get LCP data
    try: crux_data_dict['largest_contentful_paint'] = result['record']['metrics']['largest_contentful_paint']['percentiles']['p75']
    except KeyError: crux_data_dict['largest_contentful_paint'] = np.nan
    try: crux_data_dict['largest_contentful_paint_good'] = result['record']['metrics']['largest_contentful_paint']['histogram'][0]['density']
    except KeyError: crux_data_dict['largest_contentful_paint_good'] = np.nan
    try: crux_data_dict['largest_contentful_paint_ni'] = result['record']['metrics']['largest_contentful_paint']['histogram'][1]['density']
    except KeyError: crux_data_dict['largest_contentful_paint_ni'] = np.nan
    try: crux_data_dict['largest_contentful_paint_bad'] = result['record']['metrics']['largest_contentful_paint']['histogram'][2]['density']
    except KeyError: crux_data_dict['largest_contentful_paint_bad'] = np.nan

    # If the request fails
    except Exception as e:
    # print(e)
    crux_data_dict['url'] = url
    crux_data_dict['url_netloc'] = url_netloc
    crux_data_dict['device'] = formFactor
    crux_data_dict['status'] = 'ERROR - ' + str(e)

    # After the request - append data to remaining data
    df_crux = df_crux.append(pd.DataFrame(crux_data_dict, columns=crux_data_dict.keys(), index=[0]), ignore_index=True)

    # If backup is needed, save every 10th stepts during loope
    counter += 1
    if counter % 10 == 0:
    df_crux.to_csv(f'crux_results_{date_today}_wip.csv', sep=';', index=True)

    # Set time to sleep between requests
    time.sleep(0.3)


    df_crux.to_csv(f'crux_results_{date_today}_final.csv', sep=';', index=True)
    df_crux