and0x00 · March 25, 2025 01:40 · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 23, 2025
diff --git a/subfyx.sh b/subfyx.sh
@@ -133,11 +133,11 @@ merge_with_existing() {
     local new_output="$1"
     if [ -f "$OUTPUT" ]; then
         local before_count
-        before_count=$(du -m "$OUTPUT" | cut -f1)
+        before_count=$(du -b "$OUTPUT" | cut -f1)
         sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
         mv "${OUTPUT}.tmp" "$OUTPUT"
         local after_count
-        after_count=$(du -m "$OUTPUT" | cut -f1)
+        after_count=$(du -b "$OUTPUT" | cut -f1)
         [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes."
     else
         mv "$new_output" "$OUTPUT"

diff --git a/subfyx.sh b/subfyx.sh
@@ -133,17 +133,17 @@ merge_with_existing() {
     local new_output="$1"
     if [ -f "$OUTPUT" ]; then
         local before_count
-        before_count=$(wc -l < "$OUTPUT")
+        before_count=$(du -m "$OUTPUT" | cut -f1)
         sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
         mv "${OUTPUT}.tmp" "$OUTPUT"
         local after_count
-        after_count=$(wc -l < "$OUTPUT")
-        [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count lines."
+        after_count=$(du -m "$OUTPUT" | cut -f1)
+        [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes."
     else
         mv "$new_output" "$OUTPUT"
         local count
         count=$(wc -l < "$OUTPUT")
-        [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count lines."
+        [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count bytes."
     fi
 }
 

diff --git a/subfyx.sh b/subfyx.sh
@@ -167,7 +167,7 @@ main() {
 
         if [ "$NO_HASH" = false ]; then
             file_hash=$(sha256sum "$file" | awk '{print $1}')
-            echo "$file_hash - $file" >> "$HASH_LOG"
+            echo "$file_hash  $file" >> "$HASH_LOG"
         fi
     done
 

diff --git a/subfyx.sh b/subfyx.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# Script to split files into chunks, process each line with "subfy",
+# and consolidate results while avoiding duplicate processing via hashes.
+
+# --- Usage ---------------------------------------------------------------
+usage() {
+    cat <<EOF
+Usage: $0 [options] <file1> [file2 ... fileN]
+Options:
+  --no-hash             Skip hash file creation/verification.
+  --terminal            Display output on terminal; skip hash file operations.
+  --debug               Enable debug output.
+  -o <output>           Output file (default: ${HOME}/.subdomains_db).
+  --hash-file <log>     Hash log file (default: ${HOME}/.processed_hashes.log).
+  --base-dir <dir>      Base directory for temporary files (default: /tmp).
+EOF
+    exit 1
+}
+
+# --- Global Variables ----------------------------------------------------
+OUTPUT="${HOME}/.subdomains_db"
+NO_HASH=false
+DEBUG=false
+HASH_LOG="${HOME}/.processed_hashes.log"
+BASE_DIR="/tmp"
+MAX_FILE_SIZE=1048576  # 1MB
+INPUT_FILES=()
+
+# --- Parse Arguments -----------------------------------------------------
+parse_args() {
+    while [[ "$#" -gt 0 ]]; do
+        case $1 in
+            -o)
+                OUTPUT="$2"
+                shift ;;
+            --hash-file)
+                HASH_LOG="$2"
+                shift ;;
+            --base-dir)
+                BASE_DIR="$2"
+                shift ;;
+            --no-hash)
+                NO_HASH=true ;;
+            --terminal)
+                NO_HASH=true
+                OUTPUT="" ;;
+            --debug)
+                DEBUG=true ;;
+            -*)
+                echo "Unknown option: $1"
+                usage ;;
+            *)
+                INPUT_FILES+=("$1") ;;
+        esac
+        shift
+    done
+
+    [[ ${#INPUT_FILES[@]} -ge 1 ]] || usage
+}
+
+# --- Setup Temporary Directory -------------------------------------------
+setup_temp_dir() {
+    TEMP_DIR=$(mktemp -d -p "$BASE_DIR" tmp.XXXXXX)
+    [[ "$DEBUG" == true ]] && echo "Temporary directory: $TEMP_DIR"
+    TEMP_PREFIX="${TEMP_DIR}/tmp_"
+}
+
+# --- Cleanup -------------------------------------------------------------
+cleanup() {
+    [[ "$DEBUG" == true ]] && echo "Cleaning up temporary files..."
+    rm -rf "$TEMP_DIR"
+}
+trap cleanup EXIT
+
+# --- Hash Verification ---------------------------------------------------
+file_processed() {
+    local hash="$1"
+    grep -q "^${hash} " "$HASH_LOG"
+}
+
+# --- Generate Random String ----------------------------------------------
+gen_rand() {
+    openssl rand -hex 8
+}
+
+# --- Process File --------------------------------------------------------
+process_file() {
+    local file="$1"
+    [[ "$DEBUG" == true ]] && echo "Splitting '$file' into chunks..."
+    # Split file into 1000-line chunks
+    split -d -a 5 -l 1000 "$file" "${TEMP_DIR}/chunk_"
+
+    shopt -s nullglob
+    local chunks=("${TEMP_DIR}/chunk_"*)
+    shopt -u nullglob
+    [[ "$DEBUG" == true ]] && echo "File '$file' split into ${#chunks[@]} chunks."
+
+    local counter=1
+    for chunk in "${chunks[@]}"; do
+        local rand_str=$(gen_rand)
+        local tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
+        [[ "$DEBUG" == true ]] && echo "Creating temp file: $tmp_file"
+
+        while IFS= read -r line; do
+            # Process line with subfy; sort and remove duplicates per line.
+            echo "$line" | /usr/local/bin/subfy -f /dev/stdin | sort -T /tmp -S 50% | uniq >> "$tmp_file"
+            # Check file size and start a new file if needed.
+            if [ -f "$tmp_file" ]; then
+                local size
+                size=$(stat -c%s "$tmp_file")
+                if [ "$size" -ge "$MAX_FILE_SIZE" ]; then
+                    counter=$((counter + 1))
+                    rand_str=$(gen_rand)
+                    tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp"
+                    [[ "$DEBUG" == true ]] && echo "Max size reached. New temp file: $tmp_file"
+                fi
+            fi
+        done < "$chunk"
+        rm -f "$chunk"
+        [[ "$DEBUG" == true ]] && echo "Removed chunk: $chunk"
+    done
+}
+
+# --- Merge Output --------------------------------------------------------
+merge_temp_files() {
+    local output_file="$1"
+    sort -T /tmp -S 50% -u "${TEMP_PREFIX}"*.tmp > "$output_file"
+    [[ "$DEBUG" == true ]] && echo "Merged temporary files into: $output_file"
+}
+
+# --- Merge With Existing Output ------------------------------------------
+merge_with_existing() {
+    local new_output="$1"
+    if [ -f "$OUTPUT" ]; then
+        local before_count
+        before_count=$(wc -l < "$OUTPUT")
+        sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp"
+        mv "${OUTPUT}.tmp" "$OUTPUT"
+        local after_count
+        after_count=$(wc -l < "$OUTPUT")
+        [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count lines."
+    else
+        mv "$new_output" "$OUTPUT"
+        local count
+        count=$(wc -l < "$OUTPUT")
+        [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count lines."
+    fi
+}
+
+# --- Main ----------------------------------------------------------------
+main() {
+    parse_args "$@"
+    setup_temp_dir
+
+    for file in "${INPUT_FILES[@]}"; do
+        if [ "$NO_HASH" = false ]; then
+            local file_hash
+            file_hash=$(sha256sum "$file" | awk '{print $1}')
+            if file_processed "$file_hash"; then
+                [[ "$DEBUG" == true ]] && echo "Skipping already processed file: $file"
+                continue
+            fi
+        fi
+
+        [[ "$DEBUG" == true ]] && echo "Processing file: $file"
+        process_file "$file"
+
+        if [ "$NO_HASH" = false ]; then
+            file_hash=$(sha256sum "$file" | awk '{print $1}')
+            echo "$file_hash - $file" >> "$HASH_LOG"
+        fi
+    done
+
+    # Merge all temporary results into a final output file.
+    local session_output="${TEMP_DIR}/session_output.txt"
+    merge_temp_files "$session_output"
+
+    if [ -n "$OUTPUT" ]; then
+        merge_with_existing "$session_output"
+        [[ "$DEBUG" == true ]] && echo "Processing complete. Output saved to '$OUTPUT'."
+    else
+        cat "$session_output"
+    fi
+
+    # Optional debug report.
+    local tmp_count total_size
+    tmp_count=$(ls "${TEMP_PREFIX}"*.tmp 2>/dev/null | wc -l)
+    total_size=$(du -ch "${TEMP_PREFIX}"*.tmp 2>/dev/null | grep total$ | awk '{print $1}')
+    [[ "$DEBUG" == true ]] && echo "Temporary files removed: $tmp_count, totaling ~$total_size."
+}
+
+# --- Execute -------------------------------------------------------------
+main "$@"
No results found