Skip to content

Instantly share code, notes, and snippets.

@and0x00
Last active March 25, 2025 01:40
Show Gist options
  • Select an option

  • Save and0x00/dd5fb8ede37325c80d84f40ba7f94707 to your computer and use it in GitHub Desktop.

Select an option

Save and0x00/dd5fb8ede37325c80d84f40ba7f94707 to your computer and use it in GitHub Desktop.

Revisions

  1. and0x00 revised this gist Mar 25, 2025. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions subfyx.sh
    Original file line number Diff line number Diff line change
    @@ -133,11 +133,11 @@ merge_with_existing() {
    local new_output="$1"
    if [ -f "$OUTPUT" ]; then
    local before_count
    before_count=$(du -m "$OUTPUT" | cut -f1)
    before_count=$(du -b "$OUTPUT" | cut -f1)
    sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
    mv "${OUTPUT}.tmp" "$OUTPUT"
    local after_count
    after_count=$(du -m "$OUTPUT" | cut -f1)
    after_count=$(du -b "$OUTPUT" | cut -f1)
    [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes."
    else
    mv "$new_output" "$OUTPUT"
  2. and0x00 revised this gist Mar 25, 2025. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions subfyx.sh
    Original file line number Diff line number Diff line change
    @@ -133,17 +133,17 @@ merge_with_existing() {
    local new_output="$1"
    if [ -f "$OUTPUT" ]; then
    local before_count
    before_count=$(wc -l < "$OUTPUT")
    before_count=$(du -m "$OUTPUT" | cut -f1)
    sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
    mv "${OUTPUT}.tmp" "$OUTPUT"
    local after_count
    after_count=$(wc -l < "$OUTPUT")
    [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count lines."
    after_count=$(du -m "$OUTPUT" | cut -f1)
    [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes."
    else
    mv "$new_output" "$OUTPUT"
    local count
    count=$(wc -l < "$OUTPUT")
    [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count lines."
    [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count bytes."
    fi
    }

  3. and0x00 revised this gist Mar 25, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion subfyx.sh
    Original file line number Diff line number Diff line change
    @@ -167,7 +167,7 @@ main() {

    if [ "$NO_HASH" = false ]; then
    file_hash=$(sha256sum "$file" | awk '{print $1}')
    echo "$file_hash - $file" >> "$HASH_LOG"
    echo "$file_hash $file" >> "$HASH_LOG"
    fi
    done

  4. and0x00 created this gist Mar 23, 2025.
    193 changes: 193 additions & 0 deletions subfyx.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,193 @@
    #!/bin/bash
    # Script to split files into chunks, process each line with "subfy",
    # and consolidate results while avoiding duplicate processing via hashes.

    # --- Usage ---------------------------------------------------------------
    usage() {
    cat <<EOF
    Usage: $0 [options] <file1> [file2 ... fileN]
    Options:
    --no-hash Skip hash file creation/verification.
    --terminal Display output on terminal; skip hash file operations.
    --debug Enable debug output.
    -o <output> Output file (default: ${HOME}/.subdomains_db).
    --hash-file <log> Hash log file (default: ${HOME}/.processed_hashes.log).
    --base-dir <dir> Base directory for temporary files (default: /tmp).
    EOF
    exit 1
    }

    # --- Global Variables ----------------------------------------------------
    OUTPUT="${HOME}/.subdomains_db"
    NO_HASH=false
    DEBUG=false
    HASH_LOG="${HOME}/.processed_hashes.log"
    BASE_DIR="/tmp"
    MAX_FILE_SIZE=1048576 # 1MB
    INPUT_FILES=()

    # --- Parse Arguments -----------------------------------------------------
    parse_args() {
    while [[ "$#" -gt 0 ]]; do
    case $1 in
    -o)
    OUTPUT="$2"
    shift ;;
    --hash-file)
    HASH_LOG="$2"
    shift ;;
    --base-dir)
    BASE_DIR="$2"
    shift ;;
    --no-hash)
    NO_HASH=true ;;
    --terminal)
    NO_HASH=true
    OUTPUT="" ;;
    --debug)
    DEBUG=true ;;
    -*)
    echo "Unknown option: $1"
    usage ;;
    *)
    INPUT_FILES+=("$1") ;;
    esac
    shift
    done

    [[ ${#INPUT_FILES[@]} -ge 1 ]] || usage
    }

    # --- Setup Temporary Directory -------------------------------------------
    setup_temp_dir() {
    TEMP_DIR=$(mktemp -d -p "$BASE_DIR" tmp.XXXXXX)
    [[ "$DEBUG" == true ]] && echo "Temporary directory: $TEMP_DIR"
    TEMP_PREFIX="${TEMP_DIR}/tmp_"
    }

    # --- Cleanup -------------------------------------------------------------
    cleanup() {
    [[ "$DEBUG" == true ]] && echo "Cleaning up temporary files..."
    rm -rf "$TEMP_DIR"
    }
    trap cleanup EXIT

    # --- Hash Verification ---------------------------------------------------
    file_processed() {
    local hash="$1"
    grep -q "^${hash} " "$HASH_LOG"
    }

    # --- Generate Random String ----------------------------------------------
    gen_rand() {
    openssl rand -hex 8
    }

    # --- Process File --------------------------------------------------------
    process_file() {
    local file="$1"
    [[ "$DEBUG" == true ]] && echo "Splitting '$file' into chunks..."
    # Split file into 1000-line chunks
    split -d -a 5 -l 1000 "$file" "${TEMP_DIR}/chunk_"

    shopt -s nullglob
    local chunks=("${TEMP_DIR}/chunk_"*)
    shopt -u nullglob
    [[ "$DEBUG" == true ]] && echo "File '$file' split into ${#chunks[@]} chunks."

    local counter=1
    for chunk in "${chunks[@]}"; do
    local rand_str=$(gen_rand)
    local tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
    [[ "$DEBUG" == true ]] && echo "Creating temp file: $tmp_file"

    while IFS= read -r line; do
    # Process line with subfy; sort and remove duplicates per line.
    echo "$line" | /usr/local/bin/subfy -f /dev/stdin | sort -T /tmp -S 50% | uniq >> "$tmp_file"
    # Check file size and start a new file if needed.
    if [ -f "$tmp_file" ]; then
    local size
    size=$(stat -c%s "$tmp_file")
    if [ "$size" -ge "$MAX_FILE_SIZE" ]; then
    counter=$((counter + 1))
    rand_str=$(gen_rand)
    tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
    [[ "$DEBUG" == true ]] && echo "Max size reached. New temp file: $tmp_file"
    fi
    fi
    done < "$chunk"
    rm -f "$chunk"
    [[ "$DEBUG" == true ]] && echo "Removed chunk: $chunk"
    done
    }

    # --- Merge Output --------------------------------------------------------
    merge_temp_files() {
    local output_file="$1"
    sort -T /tmp -S 50% -u "${TEMP_PREFIX}"*.tmp > "$output_file"
    [[ "$DEBUG" == true ]] && echo "Merged temporary files into: $output_file"
    }

    # --- Merge With Existing Output ------------------------------------------
    merge_with_existing() {
    local new_output="$1"
    if [ -f "$OUTPUT" ]; then
    local before_count
    before_count=$(wc -l < "$OUTPUT")
    sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
    mv "${OUTPUT}.tmp" "$OUTPUT"
    local after_count
    after_count=$(wc -l < "$OUTPUT")
    [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count lines."
    else
    mv "$new_output" "$OUTPUT"
    local count
    count=$(wc -l < "$OUTPUT")
    [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count lines."
    fi
    }

    # --- Main ----------------------------------------------------------------
    main() {
    parse_args "$@"
    setup_temp_dir

    for file in "${INPUT_FILES[@]}"; do
    if [ "$NO_HASH" = false ]; then
    local file_hash
    file_hash=$(sha256sum "$file" | awk '{print $1}')
    if file_processed "$file_hash"; then
    [[ "$DEBUG" == true ]] && echo "Skipping already processed file: $file"
    continue
    fi
    fi

    [[ "$DEBUG" == true ]] && echo "Processing file: $file"
    process_file "$file"

    if [ "$NO_HASH" = false ]; then
    file_hash=$(sha256sum "$file" | awk '{print $1}')
    echo "$file_hash - $file" >> "$HASH_LOG"
    fi
    done

    # Merge all temporary results into a final output file.
    local session_output="${TEMP_DIR}/session_output.txt"
    merge_temp_files "$session_output"

    if [ -n "$OUTPUT" ]; then
    merge_with_existing "$session_output"
    [[ "$DEBUG" == true ]] && echo "Processing complete. Output saved to '$OUTPUT'."
    else
    cat "$session_output"
    fi

    # Optional debug report.
    local tmp_count total_size
    tmp_count=$(ls "${TEMP_PREFIX}"*.tmp 2>/dev/null | wc -l)
    total_size=$(du -ch "${TEMP_PREFIX}"*.tmp 2>/dev/null | grep total$ | awk '{print $1}')
    [[ "$DEBUG" == true ]] && echo "Temporary files removed: $tmp_count, totaling ~$total_size."
    }

    # --- Execute -------------------------------------------------------------
    main "$@"