#!/usr/bin/env python3 ''' Greetings bug-slaying brothers of the pythonian blood. This script takes my httpx output after slamming in a bunch of subdomains and organizes it so its a bit easier to read and work with. The HTTPX payload I use first is: httpx -sc -cl -title -bp -server -td -ip -cname -asn -cdn -vhost -fhr | anew httpx-quicc This script will organize the data by status code and then from smallest to largest for each status code output looks like: #200s https://goodurl.com/ [18] https://based.goodurl.com [1049] #301s https://redir.based.goodurl.com [223] and so on... ''' import sys import re from collections import defaultdict def strip_ansi(text): # ANSI color codes made parsing this total hell until I removed them like so ansi_escape = re.compile(r'\x1b\[[0-9;]*m') return ansi_escape.sub('', text) def parse_httpx_line(line): line = line.strip() if not line or not line.startswith(('http://', 'https://')): return None try: clean_line = strip_ansi(line) # annihilating these script breaking nasty output chunks parts = clean_line.split(' ', 1) if len(parts) < 2: return None url = parts[0] rest = parts[1] # bracket extraction ceremony brackets = re.findall(r'\[([^\]]*)\]', rest) if len(brackets) < 2: return None # first bracket is status code, then content length status_raw = brackets[0] try: content_length = int(brackets[1]) except ValueError: return None # some lines have [301, 302] for status code, which is DEALT WITH below if ',' in status_raw: primary_status = status_raw.split(',')[0] else: primary_status = status_raw # some of httpx's returned status codes are very creative and abstract if not primary_status.isdigit(): return None return { 'url': url, 'status': primary_status, 'content_length': content_length, 'raw_status': status_raw } except Exception as e: print(f"We in trouble: {clean_line[:50]}... - {e}", file=sys.stderr) return None def main(): try: # you can pipe your httpx-output or use it as an arg if len(sys.argv) > 1: with open(sys.argv[1], 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() else: lines = sys.stdin.readlines() status_groups = defaultdict(list) for line_num, line in enumerate(lines, 1): parsed = parse_httpx_line(line) if parsed: status_groups[parsed['status']].append(parsed) if not status_groups: print("Your data be looking sus and fried. try again. please.", file=sys.stderr) return # genius level lambda sorting magic for status in sorted(status_groups.keys(), key=int): entries = sorted(status_groups[status], key=lambda x: x['content_length']) print(f"#{status}s") for entry in entries: print(f"{entry['url']} [{entry['content_length']}]") print() except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()