Created
December 12, 2018 15:35
-
-
Save PatrikHudak/2006c50a694cc76ead705c91805df78b to your computer and use it in GitHub Desktop.
Revisions
-
PatrikHudak revised this gist
Dec 12, 2018 . No changes.There are no files selected for viewing
-
PatrikHudak renamed this gist
Dec 12, 2018 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
PatrikHudak created this gist
Dec 12, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,120 @@ # coding=utf-8 # python3 from urllib.parse import urlparse import requests import urllib3 from bs4 import BeautifulSoup # Disable SSL insecure warnings urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Timeout for all HTTP requests GLOBAL_HTTP_TIMEOUT = 7 # Set User-Agent for "OPSEC" UA = { 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" } def normalize_url(domain, src): ''' (Try to) Normalize URL to its absolute form ''' src = src.strip() src = src.rstrip('/') # Protocol relative URL if src.startswith('//'): return 'http:{}'.format(src) # Relative URL with / if src.startswith('/'): return 'http://{}{}'.format(domain, src) # Relative URL with ? if src.startswith('?'): return 'http://{}/{}'.format(domain, src) # Relative URL with ./ if src.startswith('./'): return 'http://{}{}'.format(domain, src[1:]) # Absolute URL if src.startswith('https://') or src.startswith('http://'): return src # Else let's hope it is relative URL return 'http://{}/{}'.format(domain, src) def extract_javascript(domain, source_code): ''' Extract and normalize external javascript files from HTML ''' tree = BeautifulSoup(source_code, 'html.parser') scripts = [normalize_url(domain, s.get('src')) for s in tree.find_all('script') if s.get('src')] return list(set(scripts)) def extract_links(domain, source_code): ''' Extract and normalize links in HTML file ''' tree = BeautifulSoup(source_code, 'html.parser') hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('a') if s.get('href')] return list(set(hrefs)) def extract_styles(domain, source_code): ''' Extract and normalize CSS in HTML file ''' tree = BeautifulSoup(source_code, 'html.parser') hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('link') if s.get('href')] return list(set(hrefs)) def extract_cors(headers): cors = headers['Access-Control-Allow-Origin'].split(',') if '*' in cors: # Use your imagination here return [] return cors def extract_domain(url): '''Extracts domain name from given URL''' return urlparse(url).netloc if __name__ == '__main__': # This is sample of intended functionality # ---- # Note that there is a missing functionality for showing # origin domain name where takeover was discovered (if any) # ---- domains = [] # Database retrieval results = {} for d in domains: for prefix in ['http://', 'https://']: # Trying both HTTP and HTTPS where HTTPS has higher priority # (Thus second in the list) try: r = requests.get('{}{}'.format(prefix, d), timeout=GLOBAL_HTTP_TIMEOUT, verify=False, headers=UA) except: pass if r is None: # Connection refused / NXDOMAIN / ... continue urls = extract_javascript(d, r.text) urls += extract_links(d, r.text) urls += extract_styles(d, r.text) urls += extract_cors(r.headers) # takeoverable = subdomain_takeover.check([extract_domain(u) for u in urls]) # ...