Last active
March 25, 2025 01:40
-
-
Save and0x00/dd5fb8ede37325c80d84f40ba7f94707 to your computer and use it in GitHub Desktop.
Revisions
-
and0x00 revised this gist
Mar 25, 2025 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -133,11 +133,11 @@ merge_with_existing() { local new_output="$1" if [ -f "$OUTPUT" ]; then local before_count before_count=$(du -b "$OUTPUT" | cut -f1) sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp" mv "${OUTPUT}.tmp" "$OUTPUT" local after_count after_count=$(du -b "$OUTPUT" | cut -f1) [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes." else mv "$new_output" "$OUTPUT" -
and0x00 revised this gist
Mar 25, 2025 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -133,17 +133,17 @@ merge_with_existing() { local new_output="$1" if [ -f "$OUTPUT" ]; then local before_count before_count=$(du -m "$OUTPUT" | cut -f1) sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp" mv "${OUTPUT}.tmp" "$OUTPUT" local after_count after_count=$(du -m "$OUTPUT" | cut -f1) [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count bytes." else mv "$new_output" "$OUTPUT" local count count=$(wc -l < "$OUTPUT") [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count bytes." fi } -
and0x00 revised this gist
Mar 25, 2025 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -167,7 +167,7 @@ main() { if [ "$NO_HASH" = false ]; then file_hash=$(sha256sum "$file" | awk '{print $1}') echo "$file_hash $file" >> "$HASH_LOG" fi done -
and0x00 created this gist
Mar 23, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,193 @@ #!/bin/bash # Script to split files into chunks, process each line with "subfy", # and consolidate results while avoiding duplicate processing via hashes. # --- Usage --------------------------------------------------------------- usage() { cat <<EOF Usage: $0 [options] <file1> [file2 ... fileN] Options: --no-hash Skip hash file creation/verification. --terminal Display output on terminal; skip hash file operations. --debug Enable debug output. -o <output> Output file (default: ${HOME}/.subdomains_db). --hash-file <log> Hash log file (default: ${HOME}/.processed_hashes.log). --base-dir <dir> Base directory for temporary files (default: /tmp). EOF exit 1 } # --- Global Variables ---------------------------------------------------- OUTPUT="${HOME}/.subdomains_db" NO_HASH=false DEBUG=false HASH_LOG="${HOME}/.processed_hashes.log" BASE_DIR="/tmp" MAX_FILE_SIZE=1048576 # 1MB INPUT_FILES=() # --- Parse Arguments ----------------------------------------------------- parse_args() { while [[ "$#" -gt 0 ]]; do case $1 in -o) OUTPUT="$2" shift ;; --hash-file) HASH_LOG="$2" shift ;; --base-dir) BASE_DIR="$2" shift ;; --no-hash) NO_HASH=true ;; --terminal) NO_HASH=true OUTPUT="" ;; --debug) DEBUG=true ;; -*) echo "Unknown option: $1" usage ;; *) INPUT_FILES+=("$1") ;; esac shift done [[ ${#INPUT_FILES[@]} -ge 1 ]] || usage } # --- Setup Temporary Directory ------------------------------------------- setup_temp_dir() { TEMP_DIR=$(mktemp -d -p "$BASE_DIR" tmp.XXXXXX) [[ "$DEBUG" == true ]] && echo "Temporary directory: $TEMP_DIR" TEMP_PREFIX="${TEMP_DIR}/tmp_" } # --- Cleanup ------------------------------------------------------------- cleanup() { [[ "$DEBUG" == true ]] && echo "Cleaning up temporary files..." rm -rf "$TEMP_DIR" } trap cleanup EXIT # --- Hash Verification --------------------------------------------------- file_processed() { local hash="$1" grep -q "^${hash} " "$HASH_LOG" } # --- Generate Random String ---------------------------------------------- gen_rand() { openssl rand -hex 8 } # --- Process File -------------------------------------------------------- process_file() { local file="$1" [[ "$DEBUG" == true ]] && echo "Splitting '$file' into chunks..." # Split file into 1000-line chunks split -d -a 5 -l 1000 "$file" "${TEMP_DIR}/chunk_" shopt -s nullglob local chunks=("${TEMP_DIR}/chunk_"*) shopt -u nullglob [[ "$DEBUG" == true ]] && echo "File '$file' split into ${#chunks[@]} chunks." local counter=1 for chunk in "${chunks[@]}"; do local rand_str=$(gen_rand) local tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp" [[ "$DEBUG" == true ]] && echo "Creating temp file: $tmp_file" while IFS= read -r line; do # Process line with subfy; sort and remove duplicates per line. echo "$line" | /usr/local/bin/subfy -f /dev/stdin | sort -T /tmp -S 50% | uniq >> "$tmp_file" # Check file size and start a new file if needed. if [ -f "$tmp_file" ]; then local size size=$(stat -c%s "$tmp_file") if [ "$size" -ge "$MAX_FILE_SIZE" ]; then counter=$((counter + 1)) rand_str=$(gen_rand) tmp_file="${TEMP_PREFIX}${rand_str}_${counter}.tmp" [[ "$DEBUG" == true ]] && echo "Max size reached. New temp file: $tmp_file" fi fi done < "$chunk" rm -f "$chunk" [[ "$DEBUG" == true ]] && echo "Removed chunk: $chunk" done } # --- Merge Output -------------------------------------------------------- merge_temp_files() { local output_file="$1" sort -T /tmp -S 50% -u "${TEMP_PREFIX}"*.tmp > "$output_file" [[ "$DEBUG" == true ]] && echo "Merged temporary files into: $output_file" } # --- Merge With Existing Output ------------------------------------------ merge_with_existing() { local new_output="$1" if [ -f "$OUTPUT" ]; then local before_count before_count=$(wc -l < "$OUTPUT") sort -T /tmp -S 50% -u "$OUTPUT" "$new_output" > "${OUTPUT}.tmp" mv "${OUTPUT}.tmp" "$OUTPUT" local after_count after_count=$(wc -l < "$OUTPUT") [[ "$DEBUG" == true ]] && echo "Output updated: $before_count -> $after_count lines." else mv "$new_output" "$OUTPUT" local count count=$(wc -l < "$OUTPUT") [[ "$DEBUG" == true ]] && echo "Created output file '$OUTPUT' with $count lines." fi } # --- Main ---------------------------------------------------------------- main() { parse_args "$@" setup_temp_dir for file in "${INPUT_FILES[@]}"; do if [ "$NO_HASH" = false ]; then local file_hash file_hash=$(sha256sum "$file" | awk '{print $1}') if file_processed "$file_hash"; then [[ "$DEBUG" == true ]] && echo "Skipping already processed file: $file" continue fi fi [[ "$DEBUG" == true ]] && echo "Processing file: $file" process_file "$file" if [ "$NO_HASH" = false ]; then file_hash=$(sha256sum "$file" | awk '{print $1}') echo "$file_hash - $file" >> "$HASH_LOG" fi done # Merge all temporary results into a final output file. local session_output="${TEMP_DIR}/session_output.txt" merge_temp_files "$session_output" if [ -n "$OUTPUT" ]; then merge_with_existing "$session_output" [[ "$DEBUG" == true ]] && echo "Processing complete. Output saved to '$OUTPUT'." else cat "$session_output" fi # Optional debug report. local tmp_count total_size tmp_count=$(ls "${TEMP_PREFIX}"*.tmp 2>/dev/null | wc -l) total_size=$(du -ch "${TEMP_PREFIX}"*.tmp 2>/dev/null | grep total$ | awk '{print $1}') [[ "$DEBUG" == true ]] && echo "Temporary files removed: $tmp_count, totaling ~$total_size." } # --- Execute ------------------------------------------------------------- main "$@"