Skip to content

Instantly share code, notes, and snippets.

@scumdestroy
Created September 8, 2025 01:23
Show Gist options
  • Save scumdestroy/30adc7dd1ed8ac8a07643f2dc8cb5734 to your computer and use it in GitHub Desktop.
Save scumdestroy/30adc7dd1ed8ac8a07643f2dc8cb5734 to your computer and use it in GitHub Desktop.

Revisions

  1. scumdestroy created this gist Sep 8, 2025.
    221 changes: 221 additions & 0 deletions golinkfinderx.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,221 @@
    #!/usr/bin/env python3

    '''
    Get the excellent GoLinkFinder tool via github.com/0xsha/GoLinkFinder
    ... based on my boy here: https://github.com/GerbenJavado/LinkFinder
    Anyways, this gives an excellent clean and parsed output after running GoLinkFinder on a gang of urls.
    use this like:
    python3 golinkfinderx.py urls.txt
    '''

    import sys
    import re
    import subprocess
    import tempfile
    import os
    from urllib.parse import urlparse
    from pathlib import Path

    def load_urls(urls_file):
    urls = []
    domains = set()

    try:
    with open(urls_file, 'r') as f:
    for line in f:
    line = line.strip()
    if line:
    urls.append(line)
    # Storing a domain for later - just trust me
    if line.startswith(('http://', 'https://')):
    parsed = urlparse(line)
    domains.add(parsed.netloc.lower())
    else:
    # domains just be domains even when they arent
    domains.add(line.lower())

    if not line.startswith(('http://', 'https://')):
    urls[-1] = 'https://' + line
    except FileNotFoundError:
    print(f"Error: {urls_file} not found")
    sys.exit(1)

    return urls, domains

    def run_golinkfinder(urls):
    all_output = []
    print(f"Running GoLinkFinder on {len(urls)} URLs...")

    for i, url in enumerate(urls, 1):
    print(f"[{i}/{len(urls)}] Processing {url}")

    try:
    result = subprocess.run(
    ['GoLinkFinder', '-d', url],
    capture_output=True,
    text=True,
    timeout=15
    )

    if result.returncode == 0:
    lines = result.stdout.strip().split('\n')
    all_output.extend([line.strip() for line in lines if line.strip()])
    print(f" ✓ Found {len(lines)} links")
    else:
    print(f" ✗ R.I.P to: {url}: {result.stderr.strip()}")

    except subprocess.TimeoutExpired:
    print(f" ✗ Timeout processing {url}")
    except FileNotFoundError:
    print("Error: GoLinkFinder not even here, bro, what are you even doing?")
    print("go install github.com/003random/GoLinkFinder@latest")
    sys.exit(1)
    except Exception as e:
    print(f" ✗ Critical Death-Error when encountering: {url}: {e}")

    print(f"\nTotal links collected: {len(all_output)}")
    return all_output

    def clean_path(path):
    # Cleaning up the wordlist the way that I like them
    if not path:
    return ""

    cleaned = re.sub(r'^[./\\]+', '', path)
    cleaned = cleaned.lstrip('/')
    return cleaned

    def extract_path_from_url(url):
    try:
    parsed = urlparse(url)
    path = parsed.path
    if parsed.query:
    path += '?' + parsed.query
    if parsed.fragment:
    path += '#' + parsed.fragment
    return clean_path(path)
    except:
    return ""

    def is_valid_url(line):
    return line.startswith(('http://', 'https://'))

    def is_path(line):
    return (line.startswith(('/', './', '../')) or
    (not line.startswith(('http://', 'https://')) and
    ('.' in line or '/' in line)))

    def parse_golinkfinder_output(output_lines, target_domains):
    wordlist = set()
    external_urls = set()
    domain_urls = set()

    for line_num, line in enumerate(output_lines, 1):
    line = line.strip()
    if not line:
    continue

    # Remove content that is 99.9% likely to suck, i.e. dates, JS elements
    if re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', line):
    continue

    if is_valid_url(line):
    try:
    parsed = urlparse(line)
    domain = parsed.netloc.lower()

    if domain in target_domains:
    domain_urls.add(line)
    path = extract_path_from_url(line)
    if path:
    wordlist.add(path)
    else:
    external_urls.add(line)

    except Exception as e:
    print(f"Error parsing URL on line {line_num}: {line} - {e}", file=sys.stderr)

    elif is_path(line):
    cleaned_path = clean_path(line)
    if cleaned_path:
    wordlist.add(cleaned_path)
    else:
    if any(char in line for char in ['/', '.', '-', '_']) and not line.isdigit():
    cleaned_path = clean_path(line)
    if cleaned_path:
    wordlist.add(cleaned_path)

    return wordlist, external_urls, domain_urls

    def write_output_files(wordlist, external_urls, domain_urls, raw_output=None):

    if raw_output:
    with open('golinkfinder-raw-output.txt', 'w') as f:
    for line in raw_output:
    f.write(line + '\n')
    print(f"Wrote {len(raw_output)} raw lines to golinkfinder-raw-output.txt")


    with open('golinkfinder-wordlist.txt', 'w') as f:
    for path in sorted(wordlist):
    f.write(path + '\n')
    print(f"Wrote {len(wordlist)} paths to golinkfinder-wordlist.txt")


    with open('golinkfinder-external-urls.txt', 'w') as f:
    for url in sorted(external_urls):
    f.write(url + '\n')
    print(f"Wrote {len(external_urls)} external URLs to golinkfinder-external-urls.txt")


    with open('golinkfinder-full-urls.txt', 'w') as f:
    for url in sorted(domain_urls):
    f.write(url + '\n')
    print(f"Wrote {len(domain_urls)} domain-matching URLs to golinkfinder-full-urls.txt")

    def main():
    if len(sys.argv) < 2:
    print("Usage: python3 golinkfinder_parser.py <urls.txt>")
    print("\nThis script will:")
    print(" 1. Run GoLinkFinder on each URL in the file")
    print(" 2. Collect and parse all output")
    print(" 3. Create organized output files:")
    print(" - golinkfinder-wordlist.txt (cleaned paths)")
    print(" - golinkfinder-external-urls.txt (external domain URLs)")
    print(" - golinkfinder-full-urls.txt (target domain URLs)")
    print(" - golinkfinder-raw-output.txt (raw GoLinkFinder output)")
    print("\nRequires: GoLinkFinder (go install github.com/003random/GoLinkFinder@latest)")
    sys.exit(1)

    urls_file = sys.argv[1]

    urls, target_domains = load_urls(urls_file)
    print(f"Loaded {len(urls)} URLs with {len(target_domains)} target domains")
    print(f"Target domains: {sorted(target_domains)}")

    raw_output = run_golinkfinder(urls)

    if not raw_output:
    print("No output collected from GoLinkFinder")
    sys.exit(1)

    print("\nParsing collected output...")
    wordlist, external_urls, domain_urls = parse_golinkfinder_output(raw_output, target_domains)

    print("\nWriting output files...")
    write_output_files(wordlist, external_urls, domain_urls, raw_output)

    print(f"\n=== SUMMARY ===")
    print(f"URLs processed: {len(urls)}")
    print(f"Raw boys found: {len(raw_output)}")
    print(f"Wordlist length: {len(wordlist)}")
    print(f"External boys: {len(external_urls)}")
    print(f"In-Scope URLs: {len(domain_urls)}")
    print(f"\nFiles created:")
    print(f" - golinkfinder-raw-output.txt")
    print(f" - golinkfinder-wordlist.txt")
    print(f" - golinkfinder-external-urls.txt")
    print(f" - golinkfinder-full-urls.txt")

    if __name__ == "__main__":
    main()