scumdestroy · September 8, 2025 01:23 · Sep 8, 2025
diff --git a/golinkfinderx.py b/golinkfinderx.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+
+'''
+Get the excellent GoLinkFinder tool via github.com/0xsha/GoLinkFinder 
+... based on my boy here: https://github.com/GerbenJavado/LinkFinder
+Anyways, this gives an excellent clean and parsed output after running GoLinkFinder on a gang of urls.
+use this like:
+python3 golinkfinderx.py urls.txt
+'''
+
+import sys
+import re
+import subprocess
+import tempfile
+import os
+from urllib.parse import urlparse
+from pathlib import Path
+
+def load_urls(urls_file):
+    urls = []
+    domains = set()
+
+    try:
+        with open(urls_file, 'r') as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    urls.append(line)
+                    # Storing a domain for later - just trust me
+                    if line.startswith(('http://', 'https://')):
+                        parsed = urlparse(line)
+                        domains.add(parsed.netloc.lower())
+                    else:
+                        # domains just be domains even when they arent
+                        domains.add(line.lower())
+
+                        if not line.startswith(('http://', 'https://')):
+                            urls[-1] = 'https://' + line
+    except FileNotFoundError:
+        print(f"Error: {urls_file} not found")
+        sys.exit(1)
+
+    return urls, domains
+
+def run_golinkfinder(urls):
+    all_output = []
+    print(f"Running GoLinkFinder on {len(urls)} URLs...")
+
+    for i, url in enumerate(urls, 1):
+        print(f"[{i}/{len(urls)}] Processing {url}")
+
+        try:
+            result = subprocess.run(
+                ['GoLinkFinder', '-d', url],
+                capture_output=True,
+                text=True,
+                timeout=15  
+            )
+
+            if result.returncode == 0:
+                lines = result.stdout.strip().split('\n')
+                all_output.extend([line.strip() for line in lines if line.strip()])
+                print(f"  ✓ Found {len(lines)} links")
+            else:
+                print(f"  ✗ R.I.P to: {url}: {result.stderr.strip()}")
+
+        except subprocess.TimeoutExpired:
+            print(f"  ✗ Timeout processing {url}")
+        except FileNotFoundError:
+            print("Error: GoLinkFinder not even here, bro, what are you even doing?")
+            print("go install github.com/003random/GoLinkFinder@latest")
+            sys.exit(1)
+        except Exception as e:
+            print(f"  ✗ Critical Death-Error when encountering: {url}: {e}")
+
+    print(f"\nTotal links collected: {len(all_output)}")
+    return all_output
+
+def clean_path(path):
+    # Cleaning up the wordlist the way that I like them 
+    if not path:
+        return ""
+
+    cleaned = re.sub(r'^[./\\]+', '', path)
+    cleaned = cleaned.lstrip('/')
+    return cleaned
+
+def extract_path_from_url(url):
+    try:
+        parsed = urlparse(url)
+        path = parsed.path
+        if parsed.query:
+            path += '?' + parsed.query
+        if parsed.fragment:
+            path += '#' + parsed.fragment
+        return clean_path(path)
+    except:
+        return ""
+
+def is_valid_url(line):
+    return line.startswith(('http://', 'https://'))
+
+def is_path(line):
+    return (line.startswith(('/', './', '../')) or
+            (not line.startswith(('http://', 'https://')) and
+             ('.' in line or '/' in line)))
+
+def parse_golinkfinder_output(output_lines, target_domains):
+    wordlist = set()
+    external_urls = set()
+    domain_urls = set()
+
+    for line_num, line in enumerate(output_lines, 1):
+        line = line.strip()
+        if not line:
+            continue
+
+        # Remove content that is 99.9% likely to suck, i.e. dates, JS elements
+        if re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', line):
+            continue
+
+        if is_valid_url(line):
+            try:
+                parsed = urlparse(line)
+                domain = parsed.netloc.lower()
+
+                if domain in target_domains:
+                    domain_urls.add(line)
+                    path = extract_path_from_url(line)
+                    if path:
+                        wordlist.add(path)
+                else:
+                    external_urls.add(line)
+
+            except Exception as e:
+                print(f"Error parsing URL on line {line_num}: {line} - {e}", file=sys.stderr)
+
+        elif is_path(line):
+            cleaned_path = clean_path(line)
+            if cleaned_path:
+                wordlist.add(cleaned_path)
+        else:
+            if any(char in line for char in ['/', '.', '-', '_']) and not line.isdigit():
+                cleaned_path = clean_path(line)
+                if cleaned_path:
+                    wordlist.add(cleaned_path)
+
+    return wordlist, external_urls, domain_urls
+
+def write_output_files(wordlist, external_urls, domain_urls, raw_output=None):
+
+    if raw_output:
+        with open('golinkfinder-raw-output.txt', 'w') as f:
+            for line in raw_output:
+                f.write(line + '\n')
+        print(f"Wrote {len(raw_output)} raw lines to golinkfinder-raw-output.txt")
+
+
+    with open('golinkfinder-wordlist.txt', 'w') as f:
+        for path in sorted(wordlist):
+            f.write(path + '\n')
+    print(f"Wrote {len(wordlist)} paths to golinkfinder-wordlist.txt")
+
+
+    with open('golinkfinder-external-urls.txt', 'w') as f:
+        for url in sorted(external_urls):
+            f.write(url + '\n')
+    print(f"Wrote {len(external_urls)} external URLs to golinkfinder-external-urls.txt")
+
+
+    with open('golinkfinder-full-urls.txt', 'w') as f:
+        for url in sorted(domain_urls):
+            f.write(url + '\n')
+    print(f"Wrote {len(domain_urls)} domain-matching URLs to golinkfinder-full-urls.txt")
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python3 golinkfinder_parser.py <urls.txt>")
+        print("\nThis script will:")
+        print("  1. Run GoLinkFinder on each URL in the file")
+        print("  2. Collect and parse all output")
+        print("  3. Create organized output files:")
+        print("     - golinkfinder-wordlist.txt (cleaned paths)")
+        print("     - golinkfinder-external-urls.txt (external domain URLs)")
+        print("     - golinkfinder-full-urls.txt (target domain URLs)")
+        print("     - golinkfinder-raw-output.txt (raw GoLinkFinder output)")
+        print("\nRequires: GoLinkFinder (go install github.com/003random/GoLinkFinder@latest)")
+        sys.exit(1)
+
+    urls_file = sys.argv[1]
+
+    urls, target_domains = load_urls(urls_file)
+    print(f"Loaded {len(urls)} URLs with {len(target_domains)} target domains")
+    print(f"Target domains: {sorted(target_domains)}")
+
+    raw_output = run_golinkfinder(urls)
+
+    if not raw_output:
+        print("No output collected from GoLinkFinder")
+        sys.exit(1)
+
+    print("\nParsing collected output...")
+    wordlist, external_urls, domain_urls = parse_golinkfinder_output(raw_output, target_domains)
+
+    print("\nWriting output files...")
+    write_output_files(wordlist, external_urls, domain_urls, raw_output)
+
+    print(f"\n=== SUMMARY ===")
+    print(f"URLs processed: {len(urls)}")
+    print(f"Raw boys found: {len(raw_output)}")
+    print(f"Wordlist length: {len(wordlist)}")
+    print(f"External boys: {len(external_urls)}")
+    print(f"In-Scope URLs: {len(domain_urls)}")
+    print(f"\nFiles created:")
+    print(f"  - golinkfinder-raw-output.txt")
+    print(f"  - golinkfinder-wordlist.txt")
+    print(f"  - golinkfinder-external-urls.txt")
+    print(f"  - golinkfinder-full-urls.txt")
+
+if __name__ == "__main__":
+    main()