Created
May 19, 2022 12:55
-
-
Save jpigla/eb08b51ed0b2e8dcf2ea9457f1dee876 to your computer and use it in GitHub Desktop.
Revisions
-
jpigla created this gist
May 19, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,126 @@ # --- Open the file containing the list of URLs ------------------------------------------------------------------------ urls_file = open('urls.txt', 'r') url_list_file = urls_file.readlines() # url_list_file = pd.read_csv('urls.csv', header=None).drop_duplicates()[0].to_list() # --- Set parameters and variables -------------------------------------------------------------------------------------- # Get date of today date_today = date.today().strftime("%d.%m.%Y") # Create empty dataframe for the results df_crux = pd.DataFrame(None) # Set request parameters lst_formFactor = ['PHONE', 'DESKTOP'] # 'PHONE' or 'TABLET' or 'DESKTOP' level = 'url' # 'url' or 'origin' counter = 0 # Create dict for the results (temporarily used) crux_data_dict = {} crux_data_dict['date'] = date.today() crux_data_dict['level'] = level # --- Loop over the URLs ------------------------------------------------------------------------------------------------ for url in url_list_file: # Strip URL url = url.strip() # Get domain name url_netloc = urlparse(url).netloc for formFactor in lst_formFactor: # Set request parameters api_url = f'https://chromeuxreport.googleapis.com/v1/records:queryRecord?key={API_KEY}' data_dic = { 'formFactor': formFactor, level: url } header_dic = { 'Content-Type': 'application/json' } result = requests.post(api_url, json = data_dic, headers = header_dic) result = result.json() # print(result) try: # Get url try: crux_data_dict['url'] = result['record']['key']['url'] except KeyError: crux_data_dict['url'] = url # Set domain name crux_data_dict['url_netloc'] = url_netloc # Get form factor try: crux_data_dict['device'] = result['record']['key']['formFactor'] except KeyError: crux_data_dict['device'] = formFactor # Get status of the request try: crux_data_dict['status'] = result['error']['status'] except KeyError: crux_data_dict['status'] = 'Success' # Get CLS data try: crux_data_dict['cumulative_layout_shift'] = result['record']['metrics']['cumulative_layout_shift']['percentiles']['p75'] except KeyError: crux_data_dict['cumulative_layout_shift'] = np.nan try: crux_data_dict['cumulative_layout_shift_good'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][0]['density'] except KeyError: crux_data_dict['cumulative_layout_shift_good'] = np.nan try: crux_data_dict['cumulative_layout_shift_ni'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][1]['density'] except KeyError: crux_data_dict['cumulative_layout_shift_ni'] = np.nan try: crux_data_dict['cumulative_layout_shift_bad'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][2]['density'] except KeyError: crux_data_dict['cumulative_layout_shift_bad'] = np.nan # Get FCP data try: crux_data_dict['first_contentful_paint'] = result['record']['metrics']['first_contentful_paint']['percentiles']['p75'] except KeyError: crux_data_dict['first_contentful_paint'] = np.nan try: crux_data_dict['first_contentful_paint_good'] = result['record']['metrics']['first_contentful_paint']['histogram'][0]['density'] except KeyError: crux_data_dict['first_contentful_paint_good'] = np.nan try: crux_data_dict['first_contentful_paint_ni'] = result['record']['metrics']['first_contentful_paint']['histogram'][1]['density'] except KeyError: crux_data_dict['first_contentful_paint_ni'] = np.nan try: crux_data_dict['first_contentful_paint_bad'] = result['record']['metrics']['first_contentful_paint']['histogram'][2]['density'] except KeyError: crux_data_dict['first_contentful_paint_bad'] = np.nan # Get FID data try: crux_data_dict['first_input_delay'] = result['record']['metrics']['first_input_delay']['percentiles']['p75'] except KeyError: crux_data_dict['first_input_delay'] = np.nan try: crux_data_dict['first_input_delay_good'] = result['record']['metrics']['first_input_delay']['histogram'][0]['density'] except KeyError: crux_data_dict['first_input_delay_good'] = np.nan try: crux_data_dict['first_input_delay_ni'] = result['record']['metrics']['first_input_delay']['histogram'][1]['density'] except KeyError: crux_data_dict['first_input_delay_ni'] = np.nan try: crux_data_dict['first_input_delay_bad'] = result['record']['metrics']['first_input_delay']['histogram'][2]['density'] except KeyError: crux_data_dict['first_input_delay_bad'] = np.nan # Get LCP data try: crux_data_dict['largest_contentful_paint'] = result['record']['metrics']['largest_contentful_paint']['percentiles']['p75'] except KeyError: crux_data_dict['largest_contentful_paint'] = np.nan try: crux_data_dict['largest_contentful_paint_good'] = result['record']['metrics']['largest_contentful_paint']['histogram'][0]['density'] except KeyError: crux_data_dict['largest_contentful_paint_good'] = np.nan try: crux_data_dict['largest_contentful_paint_ni'] = result['record']['metrics']['largest_contentful_paint']['histogram'][1]['density'] except KeyError: crux_data_dict['largest_contentful_paint_ni'] = np.nan try: crux_data_dict['largest_contentful_paint_bad'] = result['record']['metrics']['largest_contentful_paint']['histogram'][2]['density'] except KeyError: crux_data_dict['largest_contentful_paint_bad'] = np.nan # If the request fails except Exception as e: # print(e) crux_data_dict['url'] = url crux_data_dict['url_netloc'] = url_netloc crux_data_dict['device'] = formFactor crux_data_dict['status'] = 'ERROR - ' + str(e) # After the request - append data to remaining data df_crux = df_crux.append(pd.DataFrame(crux_data_dict, columns=crux_data_dict.keys(), index=[0]), ignore_index=True) # If backup is needed, save every 10th stepts during loope counter += 1 if counter % 10 == 0: df_crux.to_csv(f'crux_results_{date_today}_wip.csv', sep=';', index=True) # Set time to sleep between requests time.sleep(0.3) df_crux.to_csv(f'crux_results_{date_today}_final.csv', sep=';', index=True) df_crux