import requests import json import csv import os # For details about AMPBench and the API: # https://github.com/ampproject/ampbench urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: ')) urls = open(urlinput, "r") outputcsv = os.path.join(os.path.dirname(__file__), input('Enter a filename (minus file extension): ')+'.csv') ampbench_url = "https://ampbench.appspot.com" # Replace URL if running locally user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' headers = { 'User-Agent' : user_agent } f = csv.writer(open(outputcsv, "w+", newline="\n", encoding="utf-8")) f.writerow(["URL", "AMP Status", "Canonical URL", "AMP URL", "AMP Required Markup - Status", "AMP Required Markup - Warning Count", "AMP Required Markup - Warning Status", "AMP Validation - Status", "Google AMP Cache - Status", "Google AMP Cache - Result", "Google AMP Cache - URL", "Google AMP Cache - Viewer URL", "Robots.txt - Status", "Robots.txt Googlebot - Status" "Robots.txt Googlebot-Smartphone - Status", "Robots Meta - Status", "X-Robots Tag Header - Status", "Structured Data - Status", "Structured Data - Result", "Structured Data - Kind", "Structured Data - Type", "Structured Data Is AMP?", "Structured Data Logo - Status", "Structured Data Logo - Result", "Structured Data Article - Status", "Structured Data Article - Result" ]) amp_api = ampbench_url + "/api2" # Replace with desired API version for line in iter(urls): querystring = {"url" : line.strip()} data = requests.request("GET", amp_api, params=querystring) respData = str(data.text) j_obj = json.loads(respData) f.writerow([line, str(j_obj['status']), str(j_obj['amp_links']['canonical_url']), str(j_obj['amp_links']['amphtml_url']), str(j_obj['amp_required_markup']['status']), str(j_obj['amp_required_markup']['warning_count']), str(j_obj['amp_required_markup']['warning_status']), str(j_obj['amp_validation']['status']), str(j_obj['google_amp_cache']['status']), str(j_obj['google_amp_cache']['result']), str(j_obj['google_amp_cache']['google_amp_cache_url']), str(j_obj['google_amp_cache']['google_amp_viewer_url']), str(j_obj['robots']['robots_txt_status']), str(j_obj['robots']['robots_txt_googlebot_status']), str(j_obj['robots']['robots_txt_googlebot_smartphone_status']), str(j_obj['robots']['robots_meta_status']), str(j_obj['robots']['x_robots_tag_header_status']), str(j_obj['sd_validation']['status']), str(j_obj['sd_validation']['result']), str(j_obj['sd_validation']['sd_kind']), str(j_obj['sd_validation']['sd_type']), str(j_obj['sd_validation']['sd_type_is_amp']), str(j_obj['sd_validation']['sd_logo_image']['status']), str(j_obj['sd_validation']['sd_logo_image']['result']), str(j_obj['sd_validation']['sd_article']['status']), str(j_obj['sd_validation']['sd_article']['result']) ]) urls.close() print ("Writing to " + outputcsv + " complete.")