and0x00 · March 25, 2025 01:40
diff --git a/subfyx.sh b/subfyx.sh
 #!/bin/bash
 # Script to split files into chunks, process each line with "subfy",
 # and consolidate results while avoiding duplicate processing via hashes.

 # --- Usage ---------------------------------------------------------------
 usage() {
    cat <<EOF
 Usage: $0 [options] <file1> [file2 ... fileN]
 Options:
  --no-hash             Skip hash file creation/verification.
  --terminal            Display output on terminal; skip hash file operations.
  --debug               Enable debug output.
  -o <output>           Output file (default: ${HOME}/.subdomains_db).
  --hash-file <log>     Hash log file (default: ${HOME}/.processed_hashes.log).
  --base-dir <dir>      Base directory for temporary files (default: /tmp).
 EOF
    exit 1
 }

 # --- Global Variables ----------------------------------------------------
 OUTPUT="${HOME}/.subdomains_db"
 NO_HASH=false
 DEBUG=false
 HASH_LOG="${HOME}/.processed_hashes.log"
 BASE_DIR="/tmp"
 MAX_FILE_SIZE=1048576  # 1MB
 INPUT_FILES=()

 # --- Parse Arguments -----------------------------------------------------
 parse_args() {
    while [[ "$#" -gt 0 ]]; do
        case $1 in
            -o)
                OUTPUT="$2"
                shift ;;
            --hash-file)
                HASH_LOG="$2"
                shift ;;
            --base-dir)
                BASE_DIR="$2"
                shift ;;
            --no-hash)
                NO_HASH=true ;;
            --terminal)
                NO_HASH=true
                OUTPUT="" ;;
            --debug)
                DEBUG=true ;;
            -*)
                echo "Unknown option: $1"
                usage ;;
            *)
                INPUT_FILES+=("$1") ;;
        esac
        shift
    done

    [[ ${#INPUT_FILES[@]} -ge 1 ]] || usage
 }

 # --- Setup Temporary Directory -------------------------------------------
 setup_temp_dir() {
    TEMP_DIR=$(mktemp -d -p "$BASE_DIR" tmp.XXXXXX)
    [[ "$DEBUG" == true ]] && echo "Temporary directory: $TEMP_DIR"
    TEMP_PREFIX="${TEMP_DIR}/tmp_"
 }

 # --- Cleanup -------------------------------------------------------------
 cleanup() {
    [[ "$DEBUG" == true ]] && echo "Cleaning up temporary files..."
    rm -rf "$TEMP_DIR"
 }
 trap cleanup EXIT

 # --- Hash Verification ---------------------------------------------------
 file_processed() {
    local hash="$1"
    grep -q "^${hash} " "$HASH_LOG"
 }

 # --- Generate Random String ----------------------------------------------
 gen_rand() {
    openssl rand -hex 8
 }

 # --- Process File --------------------------------------------------------
 process_file() {
    local file="$1"
    [[ "$DEBUG" == true ]] && echo "Splitting '$file' into chunks..."
    # Split file into 1000-line chunks
    split -d -a 5 -l 1000 "$file" "${TEMP_DIR}/chunk_"
    
    shopt -s nullglob
    local chunks=("${TEMP_DIR}/chunk_"*)
    shopt -u nullglob
    [[ "$DEBUG" == true ]] && echo "File '$file' split into ${#chunks[@]} chunks."

    local counter=1
    for chunk in "${chunks[@]}"; do
        local rand_str=$(gen_rand)
        local tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
        [[ "$DEBUG" == true ]] && echo "Creating temp file: $tmp_file"

        while IFS= read -r line; do
            # Process line with subfy; sort and remove duplicates per line.
            echo "$line" | /usr/local/bin/subfy -f /dev/stdin | sort -T /tmp -S 50% | uniq >> "$tmp_file"
            # Check file size and start a new file if needed.
            if [ -f "$tmp_file" ]; then
                local size
                size=$(stat -c%s "$tmp_file")
                if [ "$size" -ge "$MAX_FILE_SIZE" ]; then
                    counter=$((counter + 1))
                    rand_str=$(gen_rand)
                    tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
                    [[ "$DEBUG" == true ]] && echo "Max size reached. New temp file: $tmp_file"
                fi
            fi
        done < "$chunk"
        rm -f "$chunk"
        [[ "$DEBUG" == true ]] && echo "Removed chunk: $chunk"
    done
 }

 # --- Merge Output --------------------------------------------------------
 merge_temp_files() {
    local output_file="$1"
    sort -T /tmp -S 50% -u "${TEMP_PREFIX}"*.tmp > "$output_file"
    [[ "$DEBUG" == true ]] && echo "Merged temporary files into: $output_file"
 }

 # --- Merge With Existing Output ------------------------------------------
 merge_with_existing() {
    local new_output="$1"
    if [ -f "$OUTPUT" ]; then
        local before_count
        before_count=$(du -b "$OUTPUT" | cut -f1)
        sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
        mv "${OUTPUT}.tmp" "$OUTPUT"
        local after_count
        after_count=$(du -b "$OUTPUT" | cut -f1)
        [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes."
    else
        mv "$new_output" "$OUTPUT"
        local count
        count=$(wc -l < "$OUTPUT")
        [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count bytes."
    fi
 }

 # --- Main ----------------------------------------------------------------
 main() {
    parse_args "$@"
    setup_temp_dir

    for file in "${INPUT_FILES[@]}"; do
        if [ "$NO_HASH" = false ]; then
            local file_hash
            file_hash=$(sha256sum "$file" | awk '{print $1}')
            if file_processed "$file_hash"; then
                [[ "$DEBUG" == true ]] && echo "Skipping already processed file: $file"
                continue
            fi
        fi

        [[ "$DEBUG" == true ]] && echo "Processing file: $file"
        process_file "$file"

        if [ "$NO_HASH" = false ]; then
            file_hash=$(sha256sum "$file" | awk '{print $1}')
            echo "$file_hash  $file" >> "$HASH_LOG"
        fi
    done

    # Merge all temporary results into a final output file.
    local session_output="${TEMP_DIR}/session_output.txt"
    merge_temp_files "$session_output"

    if [ -n "$OUTPUT" ]; then
        merge_with_existing "$session_output"
        [[ "$DEBUG" == true ]] && echo "Processing complete. Output saved to '$OUTPUT'."
    else
        cat "$session_output"
    fi

    # Optional debug report.
    local tmp_count total_size
    tmp_count=$(ls "${TEMP_PREFIX}"*.tmp 2>/dev/null | wc -l)
    total_size=$(du -ch "${TEMP_PREFIX}"*.tmp 2>/dev/null | grep total$ | awk '{print $1}')
    [[ "$DEBUG" == true ]] && echo "Temporary files removed: $tmp_count, totaling ~$total_size."
 }

 # --- Execute -------------------------------------------------------------
 main "$@"
	#!/bin/bash
	# Script to split files into chunks, process each line with "subfy",
	# and consolidate results while avoiding duplicate processing via hashes.

	# --- Usage ---------------------------------------------------------------
	usage() {
	cat <<EOF
	Usage: $0 [options] <file1> [file2 ... fileN]
	Options:
	--no-hash Skip hash file creation/verification.
	--terminal Display output on terminal; skip hash file operations.
	--debug Enable debug output.
	-o <output> Output file (default: ${HOME}/.subdomains_db).
	--hash-file <log> Hash log file (default: ${HOME}/.processed_hashes.log).
	--base-dir <dir> Base directory for temporary files (default: /tmp).
	EOF
	exit 1
	}

	# --- Global Variables ----------------------------------------------------
	OUTPUT="${HOME}/.subdomains_db"
	NO_HASH=false
	DEBUG=false
	HASH_LOG="${HOME}/.processed_hashes.log"
	BASE_DIR="/tmp"
	MAX_FILE_SIZE=1048576 # 1MB
	INPUT_FILES=()

	# --- Parse Arguments -----------------------------------------------------
	parse_args() {
	while [[ "$#" -gt 0 ]]; do
	case $1 in
	-o)
	OUTPUT="$2"
	shift ;;
	--hash-file)
	HASH_LOG="$2"
	shift ;;
	--base-dir)
	BASE_DIR="$2"
	shift ;;
	--no-hash)
	NO_HASH=true ;;
	--terminal)
	NO_HASH=true
	OUTPUT="" ;;
	--debug)
	DEBUG=true ;;
	-*)
	echo "Unknown option: $1"
	usage ;;
	*)
	INPUT_FILES+=("$1") ;;
	esac
	shift
	done

	[[ ${#INPUT_FILES[@]} -ge 1 ]] \|\| usage
	}

	# --- Setup Temporary Directory -------------------------------------------
	setup_temp_dir() {
	TEMP_DIR=$(mktemp -d -p "$BASE_DIR" tmp.XXXXXX)
	[[ "$DEBUG" == true ]] && echo "Temporary directory: $TEMP_DIR"
	TEMP_PREFIX="${TEMP_DIR}/tmp_"
	}

	# --- Cleanup -------------------------------------------------------------
	cleanup() {
	[[ "$DEBUG" == true ]] && echo "Cleaning up temporary files..."
	rm -rf "$TEMP_DIR"
	}
	trap cleanup EXIT

	# --- Hash Verification ---------------------------------------------------
	file_processed() {
	local hash="$1"
	grep -q "^${hash} " "$HASH_LOG"
	}

	# --- Generate Random String ----------------------------------------------
	gen_rand() {
	openssl rand -hex 8
	}

	# --- Process File --------------------------------------------------------
	process_file() {
	local file="$1"
	[[ "$DEBUG" == true ]] && echo "Splitting '$file' into chunks..."
	# Split file into 1000-line chunks
	split -d -a 5 -l 1000 "$file" "${TEMP_DIR}/chunk_"

	shopt -s nullglob
	local chunks=("${TEMP_DIR}/chunk_"*)
	shopt -u nullglob
	[[ "$DEBUG" == true ]] && echo "File '$file' split into ${#chunks[@]} chunks."

	local counter=1
	for chunk in "${chunks[@]}"; do
	local rand_str=$(gen_rand)
	local tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
	[[ "$DEBUG" == true ]] && echo "Creating temp file: $tmp_file"

	while IFS= read -r line; do
	# Process line with subfy; sort and remove duplicates per line.
	echo "$line" \| /usr/local/bin/subfy -f /dev/stdin \| sort -T /tmp -S 50% \| uniq >> "$tmp_file"
	# Check file size and start a new file if needed.
	if [ -f "$tmp_file" ]; then
	local size
	size=$(stat -c%s "$tmp_file")
	if [ "$size" -ge "$MAX_FILE_SIZE" ]; then
	counter=$((counter + 1))
	rand_str=$(gen_rand)
	tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
	[[ "$DEBUG" == true ]] && echo "Max size reached. New temp file: $tmp_file"
	fi
	fi
	done < "$chunk"
	rm -f "$chunk"
	[[ "$DEBUG" == true ]] && echo "Removed chunk: $chunk"
	done
	}

	# --- Merge Output --------------------------------------------------------
	merge_temp_files() {
	local output_file="$1"
	sort -T /tmp -S 50% -u "${TEMP_PREFIX}"*.tmp > "$output_file"
	[[ "$DEBUG" == true ]] && echo "Merged temporary files into: $output_file"
	}

	# --- Merge With Existing Output ------------------------------------------
	merge_with_existing() {
	local new_output="$1"
	if [ -f "$OUTPUT" ]; then
	local before_count
	before_count=$(du -b "$OUTPUT" \| cut -f1)
	sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
	mv "${OUTPUT}.tmp" "$OUTPUT"
	local after_count
	after_count=$(du -b "$OUTPUT" \| cut -f1)
	[[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes."
	else
	mv "$new_output" "$OUTPUT"
	local count
	count=$(wc -l < "$OUTPUT")
	[[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count bytes."
	fi
	}

	# --- Main ----------------------------------------------------------------
	main() {
	parse_args "$@"
	setup_temp_dir

	for file in "${INPUT_FILES[@]}"; do
	if [ "$NO_HASH" = false ]; then
	local file_hash
	file_hash=$(sha256sum "$file" \| awk '{print $1}')
	if file_processed "$file_hash"; then
	[[ "$DEBUG" == true ]] && echo "Skipping already processed file: $file"
	continue
	fi
	fi

	[[ "$DEBUG" == true ]] && echo "Processing file: $file"
	process_file "$file"

	if [ "$NO_HASH" = false ]; then
	file_hash=$(sha256sum "$file" \| awk '{print $1}')
	echo "$file_hash $file" >> "$HASH_LOG"
	fi
	done

	# Merge all temporary results into a final output file.
	local session_output="${TEMP_DIR}/session_output.txt"
	merge_temp_files "$session_output"

	if [ -n "$OUTPUT" ]; then
	merge_with_existing "$session_output"
	[[ "$DEBUG" == true ]] && echo "Processing complete. Output saved to '$OUTPUT'."
	else
	cat "$session_output"
	fi

	# Optional debug report.
	local tmp_count total_size
	tmp_count=$(ls "${TEMP_PREFIX}"*.tmp 2>/dev/null \| wc -l)
	total_size=$(du -ch "${TEMP_PREFIX}"*.tmp 2>/dev/null \| grep total$ \| awk '{print $1}')
	[[ "$DEBUG" == true ]] && echo "Temporary files removed: $tmp_count, totaling ~$total_size."
	}

	# --- Execute -------------------------------------------------------------
	main "$@"
No results found