Skip to content

Instantly share code, notes, and snippets.

@pshapiro
Created February 21, 2017 21:39
Show Gist options
  • Save pshapiro/bca29598a38b09a332b1af2f979a6cf2 to your computer and use it in GitHub Desktop.
Save pshapiro/bca29598a38b09a332b1af2f979a6cf2 to your computer and use it in GitHub Desktop.

Revisions

  1. pshapiro created this gist Feb 21, 2017.
    82 changes: 82 additions & 0 deletions BulkAMPValidator.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,82 @@
    import requests
    import json
    import csv
    import os

    # For details about AMPBench and the API:
    # https://github.com/ampproject/ampbench

    urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: '))
    urls = open(urlinput, "r")
    outputcsv = os.path.join(os.path.dirname(__file__), input('Enter a filename (minus file extension): ')+'.csv')

    ampbench_url = "https://ampbench.appspot.com" # Replace URL if running locally
    user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
    headers = { 'User-Agent' : user_agent }

    f = csv.writer(open(outputcsv, "w+", newline="\n", encoding="utf-8"))
    f.writerow(["URL",
    "AMP Status",
    "Canonical URL",
    "AMP URL",
    "AMP Required Markup - Status",
    "AMP Required Markup - Warning Count",
    "AMP Required Markup - Warning Status",
    "AMP Validation - Status",
    "Google AMP Cache - Status",
    "Google AMP Cache - Result",
    "Google AMP Cache - URL",
    "Google AMP Cache - Viewer URL",
    "Robots.txt - Status",
    "Robots.txt Googlebot - Status"
    "Robots.txt Googlebot-Smartphone - Status",
    "Robots Meta - Status",
    "X-Robots Tag Header - Status",
    "Structured Data - Status",
    "Structured Data - Result",
    "Structured Data - Kind",
    "Structured Data - Type",
    "Structured Data Is AMP?",
    "Structured Data Logo - Status",
    "Structured Data Logo - Result",
    "Structured Data Article - Status",
    "Structured Data Article - Result"
    ])

    amp_api = ampbench_url + "/api2" # Replace with desired API version

    for line in iter(urls):
    querystring = {"url" : line.strip()}
    data = requests.request("GET", amp_api, params=querystring)
    respData = str(data.text)
    j_obj = json.loads(respData)
    f.writerow([line,
    str(j_obj['status']),
    str(j_obj['amp_links']['canonical_url']),
    str(j_obj['amp_links']['amphtml_url']),
    str(j_obj['amp_required_markup']['status']),
    str(j_obj['amp_required_markup']['warning_count']),
    str(j_obj['amp_required_markup']['warning_status']),
    str(j_obj['amp_validation']['status']),
    str(j_obj['google_amp_cache']['status']),
    str(j_obj['google_amp_cache']['result']),
    str(j_obj['google_amp_cache']['google_amp_cache_url']),
    str(j_obj['google_amp_cache']['google_amp_viewer_url']),
    str(j_obj['robots']['robots_txt_status']),
    str(j_obj['robots']['robots_txt_googlebot_status']),
    str(j_obj['robots']['robots_txt_googlebot_smartphone_status']),
    str(j_obj['robots']['robots_meta_status']),
    str(j_obj['robots']['x_robots_tag_header_status']),
    str(j_obj['sd_validation']['status']),
    str(j_obj['sd_validation']['result']),
    str(j_obj['sd_validation']['sd_kind']),
    str(j_obj['sd_validation']['sd_type']),
    str(j_obj['sd_validation']['sd_type_is_amp']),
    str(j_obj['sd_validation']['sd_logo_image']['status']),
    str(j_obj['sd_validation']['sd_logo_image']['result']),
    str(j_obj['sd_validation']['sd_article']['status']),
    str(j_obj['sd_validation']['sd_article']['result'])
    ])
    urls.close()

    print ("Writing to " + outputcsv + " complete.")