PatrikHudak · December 12, 2018 15:35 · Dec 12, 2018 · Dec 12, 2018 · Dec 12, 2018
diff --git a/gistfile1.txt → second-order.py b/gistfile1.txt → second-order.py
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1,120 @@
+# coding=utf-8
+# python3
+
+from urllib.parse import urlparse
+
+import requests
+import urllib3
+
+from bs4 import BeautifulSoup
+
+# Disable SSL insecure warnings
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+# Timeout for all HTTP requests
+GLOBAL_HTTP_TIMEOUT = 7
+
+# Set User-Agent for "OPSEC"
+UA = {
+	'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
+}
+
+def normalize_url(domain, src):
+	'''
+	(Try to) Normalize URL to its absolute form
+	'''
+
+	src = src.strip()
+	src = src.rstrip('/')
+
+	# Protocol relative URL
+	if src.startswith('//'):
+		return 'http:{}'.format(src)
+
+	# Relative URL with /
+	if src.startswith('/'):
+		return 'http://{}{}'.format(domain, src)
+
+	# Relative URL with ?
+	if src.startswith('?'):
+		return 'http://{}/{}'.format(domain, src)
+
+	# Relative URL with ./
+	if src.startswith('./'):
+		return 'http://{}{}'.format(domain, src[1:])
+
+	# Absolute URL
+	if src.startswith('https://') or src.startswith('http://'):
+		return src
+
+	# Else let's hope it is relative URL
+	return 'http://{}/{}'.format(domain, src)
+
+def extract_javascript(domain, source_code):
+	'''
+	Extract and normalize external javascript files from HTML
+	'''
+
+	tree = BeautifulSoup(source_code, 'html.parser')
+	scripts = [normalize_url(domain, s.get('src')) for s in tree.find_all('script') if s.get('src')]
+	return list(set(scripts))
+
+def extract_links(domain, source_code):
+	'''
+	Extract and normalize links in HTML file 
+	'''
+
+	tree = BeautifulSoup(source_code, 'html.parser')
+	hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('a') if s.get('href')]
+	return list(set(hrefs))
+
+def extract_styles(domain, source_code):
+	'''
+	Extract and normalize CSS in HTML file 
+	'''
+
+	tree = BeautifulSoup(source_code, 'html.parser')
+	hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('link') if s.get('href')]
+	return list(set(hrefs))
+
+def extract_cors(headers):
+	cors = headers['Access-Control-Allow-Origin'].split(',')
+	if '*' in cors:
+		# Use your imagination here
+		return []
+	return cors
+
+def extract_domain(url):
+	'''Extracts domain name from given URL'''
+
+	return urlparse(url).netloc
+
+if __name__ == '__main__':
+	# This is sample of intended functionality
+	# ----
+	# Note that there is a missing functionality for showing
+	# origin domain name where takeover was discovered (if any)
+	# ----
+
+	domains = [] # Database retrieval
+	results = {}
+	for d in domains:
+		for prefix in ['http://', 'https://']:
+			# Trying both HTTP and HTTPS where HTTPS has higher priority
+			# (Thus second in the list)
+			try:
+				r = requests.get('{}{}'.format(prefix, d), timeout=GLOBAL_HTTP_TIMEOUT, verify=False, headers=UA)
+			except:
+				pass
+
+		if r is None:
+			# Connection refused / NXDOMAIN / ...
+			continue
+
+		urls = extract_javascript(d, r.text)
+		urls += extract_links(d, r.text)
+		urls += extract_styles(d, r.text)
+		urls += extract_cors(r.headers)
+
+		# takeoverable = subdomain_takeover.check([extract_domain(u) for u in urls])
+		# ...