Last active
March 25, 2025 01:40
-
-
Save and0x00/dd5fb8ede37325c80d84f40ba7f94707 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Script to split files into chunks, process each line with "subfy", | |
| # and consolidate results while avoiding duplicate processing via hashes. | |
| # --- Usage --------------------------------------------------------------- | |
| usage() { | |
| cat <<EOF | |
| Usage: $0 [options] <file1> [file2 ... fileN] | |
| Options: | |
| --no-hash Skip hash file creation/verification. | |
| --terminal Display output on terminal; skip hash file operations. | |
| --debug Enable debug output. | |
| -o <output> Output file (default: ${HOME}/.subdomains_db). | |
| --hash-file <log> Hash log file (default: ${HOME}/.processed_hashes.log). | |
| --base-dir <dir> Base directory for temporary files (default: /tmp). | |
| EOF | |
| exit 1 | |
| } | |
| # --- Global Variables ---------------------------------------------------- | |
| OUTPUT="${HOME}/.subdomains_db" | |
| NO_HASH=false | |
| DEBUG=false | |
| HASH_LOG="${HOME}/.processed_hashes.log" | |
| BASE_DIR="/tmp" | |
| MAX_FILE_SIZE=1048576 # 1MB | |
| INPUT_FILES=() | |
| # --- Parse Arguments ----------------------------------------------------- | |
| parse_args() { | |
| while [[ "$#" -gt 0 ]]; do | |
| case $1 in | |
| -o) | |
| OUTPUT="$2" | |
| shift ;; | |
| --hash-file) | |
| HASH_LOG="$2" | |
| shift ;; | |
| --base-dir) | |
| BASE_DIR="$2" | |
| shift ;; | |
| --no-hash) | |
| NO_HASH=true ;; | |
| --terminal) | |
| NO_HASH=true | |
| OUTPUT="" ;; | |
| --debug) | |
| DEBUG=true ;; | |
| -*) | |
| echo "Unknown option: $1" | |
| usage ;; | |
| *) | |
| INPUT_FILES+=("$1") ;; | |
| esac | |
| shift | |
| done | |
| [[ ${#INPUT_FILES[@]} -ge 1 ]] || usage | |
| } | |
| # --- Setup Temporary Directory ------------------------------------------- | |
| setup_temp_dir() { | |
| TEMP_DIR=$(mktemp -d -p "$BASE_DIR" tmp.XXXXXX) | |
| [[ "$DEBUG" == true ]] && echo "Temporary directory: $TEMP_DIR" | |
| TEMP_PREFIX="${TEMP_DIR}/tmp_" | |
| } | |
| # --- Cleanup ------------------------------------------------------------- | |
| cleanup() { | |
| [[ "$DEBUG" == true ]] && echo "Cleaning up temporary files..." | |
| rm -rf "$TEMP_DIR" | |
| } | |
| trap cleanup EXIT | |
| # --- Hash Verification --------------------------------------------------- | |
| file_processed() { | |
| local hash="$1" | |
| grep -q "^${hash} " "$HASH_LOG" | |
| } | |
| # --- Generate Random String ---------------------------------------------- | |
| gen_rand() { | |
| openssl rand -hex 8 | |
| } | |
| # --- Process File -------------------------------------------------------- | |
| process_file() { | |
| local file="$1" | |
| [[ "$DEBUG" == true ]] && echo "Splitting '$file' into chunks..." | |
| # Split file into 1000-line chunks | |
| split -d -a 5 -l 1000 "$file" "${TEMP_DIR}/chunk_" | |
| shopt -s nullglob | |
| local chunks=("${TEMP_DIR}/chunk_"*) | |
| shopt -u nullglob | |
| [[ "$DEBUG" == true ]] && echo "File '$file' split into ${#chunks[@]} chunks." | |
| local counter=1 | |
| for chunk in "${chunks[@]}"; do | |
| local rand_str=$(gen_rand) | |
| local tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp" | |
| [[ "$DEBUG" == true ]] && echo "Creating temp file: $tmp_file" | |
| while IFS= read -r line; do | |
| # Process line with subfy; sort and remove duplicates per line. | |
| echo "$line" | /usr/local/bin/subfy -f /dev/stdin | sort -T /tmp -S 50% | uniq >> "$tmp_file" | |
| # Check file size and start a new file if needed. | |
| if [ -f "$tmp_file" ]; then | |
| local size | |
| size=$(stat -c%s "$tmp_file") | |
| if [ "$size" -ge "$MAX_FILE_SIZE" ]; then | |
| counter=$((counter + 1)) | |
| rand_str=$(gen_rand) | |
| tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp" | |
| [[ "$DEBUG" == true ]] && echo "Max size reached. New temp file: $tmp_file" | |
| fi | |
| fi | |
| done < "$chunk" | |
| rm -f "$chunk" | |
| [[ "$DEBUG" == true ]] && echo "Removed chunk: $chunk" | |
| done | |
| } | |
| # --- Merge Output -------------------------------------------------------- | |
| merge_temp_files() { | |
| local output_file="$1" | |
| sort -T /tmp -S 50% -u "${TEMP_PREFIX}"*.tmp > "$output_file" | |
| [[ "$DEBUG" == true ]] && echo "Merged temporary files into: $output_file" | |
| } | |
| # --- Merge With Existing Output ------------------------------------------ | |
| merge_with_existing() { | |
| local new_output="$1" | |
| if [ -f "$OUTPUT" ]; then | |
| local before_count | |
| before_count=$(du -b "$OUTPUT" | cut -f1) | |
| sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp" | |
| mv "${OUTPUT}.tmp" "$OUTPUT" | |
| local after_count | |
| after_count=$(du -b "$OUTPUT" | cut -f1) | |
| [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes." | |
| else | |
| mv "$new_output" "$OUTPUT" | |
| local count | |
| count=$(wc -l < "$OUTPUT") | |
| [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count bytes." | |
| fi | |
| } | |
| # --- Main ---------------------------------------------------------------- | |
| main() { | |
| parse_args "$@" | |
| setup_temp_dir | |
| for file in "${INPUT_FILES[@]}"; do | |
| if [ "$NO_HASH" = false ]; then | |
| local file_hash | |
| file_hash=$(sha256sum "$file" | awk '{print $1}') | |
| if file_processed "$file_hash"; then | |
| [[ "$DEBUG" == true ]] && echo "Skipping already processed file: $file" | |
| continue | |
| fi | |
| fi | |
| [[ "$DEBUG" == true ]] && echo "Processing file: $file" | |
| process_file "$file" | |
| if [ "$NO_HASH" = false ]; then | |
| file_hash=$(sha256sum "$file" | awk '{print $1}') | |
| echo "$file_hash $file" >> "$HASH_LOG" | |
| fi | |
| done | |
| # Merge all temporary results into a final output file. | |
| local session_output="${TEMP_DIR}/session_output.txt" | |
| merge_temp_files "$session_output" | |
| if [ -n "$OUTPUT" ]; then | |
| merge_with_existing "$session_output" | |
| [[ "$DEBUG" == true ]] && echo "Processing complete. Output saved to '$OUTPUT'." | |
| else | |
| cat "$session_output" | |
| fi | |
| # Optional debug report. | |
| local tmp_count total_size | |
| tmp_count=$(ls "${TEMP_PREFIX}"*.tmp 2>/dev/null | wc -l) | |
| total_size=$(du -ch "${TEMP_PREFIX}"*.tmp 2>/dev/null | grep total$ | awk '{print $1}') | |
| [[ "$DEBUG" == true ]] && echo "Temporary files removed: $tmp_count, totaling ~$total_size." | |
| } | |
| # --- Execute ------------------------------------------------------------- | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment