#!/usr/bin/env python3 ''' Get the excellent GoLinkFinder tool via github.com/0xsha/GoLinkFinder ... based on my boy here: https://github.com/GerbenJavado/LinkFinder Anyways, this gives an excellent clean and parsed output after running GoLinkFinder on a gang of urls. use this like: python3 golinkfinderx.py urls.txt ''' import sys import re import subprocess import tempfile import os from urllib.parse import urlparse from pathlib import Path def load_urls(urls_file): urls = [] domains = set() try: with open(urls_file, 'r') as f: for line in f: line = line.strip() if line: urls.append(line) # Storing a domain for later - just trust me if line.startswith(('http://', 'https://')): parsed = urlparse(line) domains.add(parsed.netloc.lower()) else: # domains just be domains even when they arent domains.add(line.lower()) if not line.startswith(('http://', 'https://')): urls[-1] = 'https://' + line except FileNotFoundError: print(f"Error: {urls_file} not found") sys.exit(1) return urls, domains def run_golinkfinder(urls): all_output = [] print(f"Running GoLinkFinder on {len(urls)} URLs...") for i, url in enumerate(urls, 1): print(f"[{i}/{len(urls)}] Processing {url}") try: result = subprocess.run( ['GoLinkFinder', '-d', url], capture_output=True, text=True, timeout=15 ) if result.returncode == 0: lines = result.stdout.strip().split('\n') all_output.extend([line.strip() for line in lines if line.strip()]) print(f" ✓ Found {len(lines)} links") else: print(f" ✗ R.I.P to: {url}: {result.stderr.strip()}") except subprocess.TimeoutExpired: print(f" ✗ Timeout processing {url}") except FileNotFoundError: print("Error: GoLinkFinder not even here, bro, what are you even doing?") print("go install github.com/003random/GoLinkFinder@latest") sys.exit(1) except Exception as e: print(f" ✗ Critical Death-Error when encountering: {url}: {e}") print(f"\nTotal links collected: {len(all_output)}") return all_output def clean_path(path): # Cleaning up the wordlist the way that I like them if not path: return "" cleaned = re.sub(r'^[./\\]+', '', path) cleaned = cleaned.lstrip('/') return cleaned def extract_path_from_url(url): try: parsed = urlparse(url) path = parsed.path if parsed.query: path += '?' + parsed.query if parsed.fragment: path += '#' + parsed.fragment return clean_path(path) except: return "" def is_valid_url(line): return line.startswith(('http://', 'https://')) def is_path(line): return (line.startswith(('/', './', '../')) or (not line.startswith(('http://', 'https://')) and ('.' in line or '/' in line))) def parse_golinkfinder_output(output_lines, target_domains): wordlist = set() external_urls = set() domain_urls = set() for line_num, line in enumerate(output_lines, 1): line = line.strip() if not line: continue # Remove content that is 99.9% likely to suck, i.e. dates, JS elements if re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', line): continue if is_valid_url(line): try: parsed = urlparse(line) domain = parsed.netloc.lower() if domain in target_domains: domain_urls.add(line) path = extract_path_from_url(line) if path: wordlist.add(path) else: external_urls.add(line) except Exception as e: print(f"Error parsing URL on line {line_num}: {line} - {e}", file=sys.stderr) elif is_path(line): cleaned_path = clean_path(line) if cleaned_path: wordlist.add(cleaned_path) else: if any(char in line for char in ['/', '.', '-', '_']) and not line.isdigit(): cleaned_path = clean_path(line) if cleaned_path: wordlist.add(cleaned_path) return wordlist, external_urls, domain_urls def write_output_files(wordlist, external_urls, domain_urls, raw_output=None): if raw_output: with open('golinkfinder-raw-output.txt', 'w') as f: for line in raw_output: f.write(line + '\n') print(f"Wrote {len(raw_output)} raw lines to golinkfinder-raw-output.txt") with open('golinkfinder-wordlist.txt', 'w') as f: for path in sorted(wordlist): f.write(path + '\n') print(f"Wrote {len(wordlist)} paths to golinkfinder-wordlist.txt") with open('golinkfinder-external-urls.txt', 'w') as f: for url in sorted(external_urls): f.write(url + '\n') print(f"Wrote {len(external_urls)} external URLs to golinkfinder-external-urls.txt") with open('golinkfinder-full-urls.txt', 'w') as f: for url in sorted(domain_urls): f.write(url + '\n') print(f"Wrote {len(domain_urls)} domain-matching URLs to golinkfinder-full-urls.txt") def main(): if len(sys.argv) < 2: print("Usage: python3 golinkfinder_parser.py ") print("\nThis script will:") print(" 1. Run GoLinkFinder on each URL in the file") print(" 2. Collect and parse all output") print(" 3. Create organized output files:") print(" - golinkfinder-wordlist.txt (cleaned paths)") print(" - golinkfinder-external-urls.txt (external domain URLs)") print(" - golinkfinder-full-urls.txt (target domain URLs)") print(" - golinkfinder-raw-output.txt (raw GoLinkFinder output)") print("\nRequires: GoLinkFinder (go install github.com/003random/GoLinkFinder@latest)") sys.exit(1) urls_file = sys.argv[1] urls, target_domains = load_urls(urls_file) print(f"Loaded {len(urls)} URLs with {len(target_domains)} target domains") print(f"Target domains: {sorted(target_domains)}") raw_output = run_golinkfinder(urls) if not raw_output: print("No output collected from GoLinkFinder") sys.exit(1) print("\nParsing collected output...") wordlist, external_urls, domain_urls = parse_golinkfinder_output(raw_output, target_domains) print("\nWriting output files...") write_output_files(wordlist, external_urls, domain_urls, raw_output) print(f"\n=== SUMMARY ===") print(f"URLs processed: {len(urls)}") print(f"Raw boys found: {len(raw_output)}") print(f"Wordlist length: {len(wordlist)}") print(f"External boys: {len(external_urls)}") print(f"In-Scope URLs: {len(domain_urls)}") print(f"\nFiles created:") print(f" - golinkfinder-raw-output.txt") print(f" - golinkfinder-wordlist.txt") print(f" - golinkfinder-external-urls.txt") print(f" - golinkfinder-full-urls.txt") if __name__ == "__main__": main()