Created
February 21, 2017 21:39
-
-
Save pshapiro/bca29598a38b09a332b1af2f979a6cf2 to your computer and use it in GitHub Desktop.
Revisions
-
pshapiro created this gist
Feb 21, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,82 @@ import requests import json import csv import os # For details about AMPBench and the API: # https://github.com/ampproject/ampbench urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: ')) urls = open(urlinput, "r") outputcsv = os.path.join(os.path.dirname(__file__), input('Enter a filename (minus file extension): ')+'.csv') ampbench_url = "https://ampbench.appspot.com" # Replace URL if running locally user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' headers = { 'User-Agent' : user_agent } f = csv.writer(open(outputcsv, "w+", newline="\n", encoding="utf-8")) f.writerow(["URL", "AMP Status", "Canonical URL", "AMP URL", "AMP Required Markup - Status", "AMP Required Markup - Warning Count", "AMP Required Markup - Warning Status", "AMP Validation - Status", "Google AMP Cache - Status", "Google AMP Cache - Result", "Google AMP Cache - URL", "Google AMP Cache - Viewer URL", "Robots.txt - Status", "Robots.txt Googlebot - Status" "Robots.txt Googlebot-Smartphone - Status", "Robots Meta - Status", "X-Robots Tag Header - Status", "Structured Data - Status", "Structured Data - Result", "Structured Data - Kind", "Structured Data - Type", "Structured Data Is AMP?", "Structured Data Logo - Status", "Structured Data Logo - Result", "Structured Data Article - Status", "Structured Data Article - Result" ]) amp_api = ampbench_url + "/api2" # Replace with desired API version for line in iter(urls): querystring = {"url" : line.strip()} data = requests.request("GET", amp_api, params=querystring) respData = str(data.text) j_obj = json.loads(respData) f.writerow([line, str(j_obj['status']), str(j_obj['amp_links']['canonical_url']), str(j_obj['amp_links']['amphtml_url']), str(j_obj['amp_required_markup']['status']), str(j_obj['amp_required_markup']['warning_count']), str(j_obj['amp_required_markup']['warning_status']), str(j_obj['amp_validation']['status']), str(j_obj['google_amp_cache']['status']), str(j_obj['google_amp_cache']['result']), str(j_obj['google_amp_cache']['google_amp_cache_url']), str(j_obj['google_amp_cache']['google_amp_viewer_url']), str(j_obj['robots']['robots_txt_status']), str(j_obj['robots']['robots_txt_googlebot_status']), str(j_obj['robots']['robots_txt_googlebot_smartphone_status']), str(j_obj['robots']['robots_meta_status']), str(j_obj['robots']['x_robots_tag_header_status']), str(j_obj['sd_validation']['status']), str(j_obj['sd_validation']['result']), str(j_obj['sd_validation']['sd_kind']), str(j_obj['sd_validation']['sd_type']), str(j_obj['sd_validation']['sd_type_is_amp']), str(j_obj['sd_validation']['sd_logo_image']['status']), str(j_obj['sd_validation']['sd_logo_image']['result']), str(j_obj['sd_validation']['sd_article']['status']), str(j_obj['sd_validation']['sd_article']['result']) ]) urls.close() print ("Writing to " + outputcsv + " complete.")