Skip to content

Instantly share code, notes, and snippets.

@PatrikHudak
Created December 12, 2018 15:35
Show Gist options
  • Save PatrikHudak/2006c50a694cc76ead705c91805df78b to your computer and use it in GitHub Desktop.
Save PatrikHudak/2006c50a694cc76ead705c91805df78b to your computer and use it in GitHub Desktop.

Revisions

  1. PatrikHudak revised this gist Dec 12, 2018. No changes.
  2. PatrikHudak renamed this gist Dec 12, 2018. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. PatrikHudak created this gist Dec 12, 2018.
    120 changes: 120 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,120 @@
    # coding=utf-8
    # python3

    from urllib.parse import urlparse

    import requests
    import urllib3

    from bs4 import BeautifulSoup

    # Disable SSL insecure warnings
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    # Timeout for all HTTP requests
    GLOBAL_HTTP_TIMEOUT = 7

    # Set User-Agent for "OPSEC"
    UA = {
    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
    }

    def normalize_url(domain, src):
    '''
    (Try to) Normalize URL to its absolute form
    '''

    src = src.strip()
    src = src.rstrip('/')

    # Protocol relative URL
    if src.startswith('//'):
    return 'http:{}'.format(src)

    # Relative URL with /
    if src.startswith('/'):
    return 'http://{}{}'.format(domain, src)

    # Relative URL with ?
    if src.startswith('?'):
    return 'http://{}/{}'.format(domain, src)

    # Relative URL with ./
    if src.startswith('./'):
    return 'http://{}{}'.format(domain, src[1:])

    # Absolute URL
    if src.startswith('https://') or src.startswith('http://'):
    return src

    # Else let's hope it is relative URL
    return 'http://{}/{}'.format(domain, src)

    def extract_javascript(domain, source_code):
    '''
    Extract and normalize external javascript files from HTML
    '''

    tree = BeautifulSoup(source_code, 'html.parser')
    scripts = [normalize_url(domain, s.get('src')) for s in tree.find_all('script') if s.get('src')]
    return list(set(scripts))

    def extract_links(domain, source_code):
    '''
    Extract and normalize links in HTML file
    '''

    tree = BeautifulSoup(source_code, 'html.parser')
    hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('a') if s.get('href')]
    return list(set(hrefs))

    def extract_styles(domain, source_code):
    '''
    Extract and normalize CSS in HTML file
    '''

    tree = BeautifulSoup(source_code, 'html.parser')
    hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('link') if s.get('href')]
    return list(set(hrefs))

    def extract_cors(headers):
    cors = headers['Access-Control-Allow-Origin'].split(',')
    if '*' in cors:
    # Use your imagination here
    return []
    return cors

    def extract_domain(url):
    '''Extracts domain name from given URL'''

    return urlparse(url).netloc

    if __name__ == '__main__':
    # This is sample of intended functionality
    # ----
    # Note that there is a missing functionality for showing
    # origin domain name where takeover was discovered (if any)
    # ----

    domains = [] # Database retrieval
    results = {}
    for d in domains:
    for prefix in ['http://', 'https://']:
    # Trying both HTTP and HTTPS where HTTPS has higher priority
    # (Thus second in the list)
    try:
    r = requests.get('{}{}'.format(prefix, d), timeout=GLOBAL_HTTP_TIMEOUT, verify=False, headers=UA)
    except:
    pass

    if r is None:
    # Connection refused / NXDOMAIN / ...
    continue

    urls = extract_javascript(d, r.text)
    urls += extract_links(d, r.text)
    urls += extract_styles(d, r.text)
    urls += extract_cors(r.headers)

    # takeoverable = subdomain_takeover.check([extract_domain(u) for u in urls])
    # ...