Skip to content

Instantly share code, notes, and snippets.

@srt0
Forked from mhmdiaa/waybackurls.py
Created February 17, 2020 11:21
Show Gist options
  • Save srt0/498a260949747d96fb0a00ccfebde063 to your computer and use it in GitHub Desktop.
Save srt0/498a260949747d96fb0a00ccfebde063 to your computer and use it in GitHub Desktop.

Revisions

  1. Mohammed Diaa revised this gist May 1, 2017. 1 changed file with 7 additions and 8 deletions.
    15 changes: 7 additions & 8 deletions waybackurls.py
    Original file line number Diff line number Diff line change
    @@ -1,18 +1,16 @@
    import requests
    import sys
    import json


    def waybackurls(host, with_subs):
    if with_subs:
    url = 'http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original' % host
    url = 'http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey' % host
    else:
    url = 'http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original' % host
    url = 'http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey' % host
    r = requests.get(url)
    results = r.json()
    urls = set()
    for i in results:
    urls.add(i[0])
    return urls
    return results[1:]


    if __name__ == '__main__':
    @@ -27,10 +25,11 @@ def waybackurls(host, with_subs):
    with_subs = True

    urls = waybackurls(host, with_subs)
    json_urls = json.dumps(urls)
    if urls:
    filename = '%s-waybackurls.txt' % host
    filename = '%s-waybackurls.json' % host
    with open(filename, 'w') as f:
    f.write('\n'.join(urls))
    f.write(json_urls)
    print('[*] Saved results to %s' % filename)
    else:
    print('[-] Found nothing')
  2. Mohammed Diaa created this gist May 1, 2017.
    36 changes: 36 additions & 0 deletions waybackurls.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,36 @@
    import requests
    import sys


    def waybackurls(host, with_subs):
    if with_subs:
    url = 'http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original' % host
    else:
    url = 'http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original' % host
    r = requests.get(url)
    results = r.json()
    urls = set()
    for i in results:
    urls.add(i[0])
    return urls


    if __name__ == '__main__':
    argc = len(sys.argv)
    if argc < 2:
    print('Usage:\n\tpython3 waybackurls.py <url> <include_subdomains:optional>')
    sys.exit()

    host = sys.argv[1]
    with_subs = False
    if argc > 3:
    with_subs = True

    urls = waybackurls(host, with_subs)
    if urls:
    filename = '%s-waybackurls.txt' % host
    with open(filename, 'w') as f:
    f.write('\n'.join(urls))
    print('[*] Saved results to %s' % filename)
    else:
    print('[-] Found nothing')