Paradiddle131 · May 12, 2025 21:11
diff --git a/gitingest.sh b/gitingest.sh
 #!/bin/bash

 OUTPUT_FILE=""
 VERBOSE=false
 DRY_RUN=false
 ESTIMATE_TOKENS=true
 INPUT_PATHS=()
 EXPLICIT_EXCLUDE_PATHS=() # For --exclude specific_path
 INCLUDE_PATTERNS=()       # For --include-pattern (glob)
 EXCLUDE_SHELL_PATTERNS=() # For --exclude-pattern (glob)
 INCLUDE_REGEX_PATTERNS=() # New: For --include-regex
 EXCLUDE_REGEX_PATTERNS=() # New: For --exclude-regex

 # Default patterns for initial pruning (applied before glob/regex filtering)
 EXCLUDE_DIRS_PATTERN=( ".git" "node_modules" "dist" "build" ".venv" "venv" "env" "bin" "obj" "__pycache__" ".pytest_cache" ".mypy_cache" ".ruff_cache" ".idea" ".vscode" "target" "out" "logs" )
 EXCLUDE_FILES_PATTERN=( "*.pyc" "*.pyo" "*.log" "*.swp" "*.swo" "*.lock" "poetry.lock" "package-lock.json" "yarn.lock" "Pipfile.lock" "uv.lock" "*.DS_Store" "Thumbs.db" "*.class" "*.jar" "*.war" "*.ear" )

 log_verbose() {
    if [ "$VERBOSE" = true ]; then
        echo "[VERBOSE] $@" >&2
    fi
 }

 get_lang_hint() {
    local filename="$1"
    local ext="${filename##*.}"
    case "$ext" in
        py) echo "python" ;; js) echo "javascript" ;; ts) echo "typescript" ;;
        java) echo "java" ;; c) echo "c" ;; cpp | cxx | h | hpp) echo "cpp" ;;
        cs) echo "csharp" ;; go) echo "go" ;; rb) echo "ruby" ;; php) echo "php" ;;
        html | htm) echo "html" ;; css) echo "css" ;; scss | sass) echo "scss" ;;
        sh | bash) echo "bash" ;; zsh) echo "zsh" ;; sql) echo "sql" ;;
        md | markdown) echo "markdown" ;; json) echo "json" ;;
        yaml | yml) echo "yaml" ;; xml) echo "xml" ;; *) echo "" ;;
    esac
 }

 process_file() {
    local filepath="$1"; local relative_path="$2"; local output_target="$3"
    if [ ! -f "$filepath" ] || [ ! -r "$filepath" ]; then
        echo "Warning: Cannot read file: $filepath. Skipping." >&2
        return 1
    fi
    log_verbose "Processing file: $filepath (as $relative_path)"
    local lang_hint=$(get_lang_hint "$filepath")
    echo "================================================" >> "$output_target"
    echo "FILE: $relative_path" >> "$output_target"
    echo "================================================" >> "$output_target"
    echo '```'$lang_hint >> "$output_target"
    cat "$filepath" >> "$output_target"; local exit_code=$?
    echo '```' >> "$output_target"
    echo "" >> "$output_target"
    if [ $exit_code -ne 0 ]; then
        echo "Warning: Error reading file content: $filepath" >&2
        return 1
    fi
    return 0
 }

 is_match_by_pattern() { # For shell globs
    local name_to_check="$1"; shift; local patterns=("$@")
    for pattern in "${patterns[@]}"; do
        if [[ "$name_to_check" == $pattern ]]; then
            return 0 # Matches
        fi
    done
    return 1 # No match
 }

 is_match_by_regex() { # For regex against a string (path)
    local string_to_check="$1"; shift; local regex_patterns=("$@")
    for regex in "${regex_patterns[@]}"; do
        if [[ "$string_to_check" =~ $regex ]]; then
            return 0 # Matches
        fi
    done
    return 1 # No match
 }


 is_explicitly_excluded() { # For --exclude specific_path
    local candidate_path="$1"; local abs_candidate_path
    abs_candidate_path=$(realpath -sm -- "$candidate_path" 2>/dev/null || echo "$candidate_path")
    for excluded_path_entry in "${EXPLICIT_EXCLUDE_PATHS[@]}"; do
        if [[ "$abs_candidate_path" == "$excluded_path_entry" ]]; then
             log_verbose "Explicitly excluding '$candidate_path' (abs: '$abs_candidate_path') due to --exclude exact match with '$excluded_path_entry'"
             return 0
        fi
        if [[ -d "$excluded_path_entry" && "$abs_candidate_path" == "$excluded_path_entry/"* ]]; then
             log_verbose "Explicitly excluding '$candidate_path' (abs: '$abs_candidate_path') because it's inside --exclude directory '$excluded_path_entry'"
             return 0
        fi
    done
    return 1
 }

 run_find_in_dir() {
    local search_path="$1"
    local find_cmd_args=()
    find_cmd_args+=("$search_path")

    # --- Initial Pruning (always applied) ---
    local prune_conditions=()
    local has_prune_conditions=false
    if [ ${#EXCLUDE_DIRS_PATTERN[@]} -gt 0 ]; then
        prune_conditions+=("("); for p in "${EXCLUDE_DIRS_PATTERN[@]}"; do prune_conditions+=("-name" "$p" "-o"); done
        prune_conditions[${#prune_conditions[@]}-1]=")"; prune_conditions+=("-type" "d"); has_prune_conditions=true
    fi
    if [ ${#EXCLUDE_FILES_PATTERN[@]} -gt 0 ]; then
        if [ "$has_prune_conditions" = true ]; then prune_conditions+=("-o"); fi
        prune_conditions+=("("); for p in "${EXCLUDE_FILES_PATTERN[@]}"; do prune_conditions+=("-name" "$p" "-o"); done
        prune_conditions[${#prune_conditions[@]}-1]=")"; prune_conditions+=("-type" "f"); has_prune_conditions=true
    fi
    if [ "$has_prune_conditions" = true ]; then
        find_cmd_args+=("${prune_conditions[@]}" "-prune" "-o")
    fi

    # --- Main Filtering (Glob or Regex based) ---
    local using_regex_filter=false
    if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ] || [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
        using_regex_filter=true
    fi

    if [ "$using_regex_filter" = true ]; then
        log_verbose "Using REGEX filtering for find."
        find_cmd_args+=("-regextype" "posix-extended")
        # Include Regexes
        if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
            find_cmd_args+=("-and" "(")
            for regex in "${INCLUDE_REGEX_PATTERNS[@]}"; do find_cmd_args+=("-regex" "$regex" "-o"); done
            find_cmd_args[${#find_cmd_args[@]}-1]=")"
        fi
        # Exclude Regexes
        if [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
            find_cmd_args+=("-and" "-not" "(")
            for regex in "${EXCLUDE_REGEX_PATTERNS[@]}"; do find_cmd_args+=("-regex" "$regex" "-o"); done
            find_cmd_args[${#find_cmd_args[@]}-1]=")"
        fi
    else # Using Glob filtering
        log_verbose "Using GLOB filtering for find."
        # Include Patterns (Globs)
        if [ ${#INCLUDE_PATTERNS[@]} -gt 0 ]; then
            find_cmd_args+=("-and" "(")
            for p in "${INCLUDE_PATTERNS[@]}"; do
                if [[ "$p" == *"/"* ]]; then find_cmd_args+=("-path" "$p" "-o"); else find_cmd_args+=("-name" "$p" "-o"); fi
            done
            find_cmd_args[${#find_cmd_args[@]}-1]=")"
        fi
         # Exclude Patterns (Globs)
        if [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then
            find_cmd_args+=("-and" "-not" "(")
            for p in "${EXCLUDE_SHELL_PATTERNS[@]}"; do
                if [[ "$p" == *"/"* ]]; then find_cmd_args+=("-path" "$p" "-o"); else find_cmd_args+=("-name" "$p" "-o"); fi
            done
            find_cmd_args[${#find_cmd_args[@]}-1]=")"
        fi
    fi

    find_cmd_args+=("-type" "f" "-printf" "%P\0")
    log_verbose "Executing find: find ${find_cmd_args[*]}"
    find "${find_cmd_args[@]}" 2>/dev/null
 }

 calculate_and_print_token_estimate() {
    local total_bytes="$1"; local output_target="$2"
    if [[ ! "$total_bytes" =~ ^[0-9]+$ ]] || [ "$total_bytes" -eq 0 ]; then log_verbose "No bytes processed, cannot estimate tokens."; return; fi
    local estimated_tokens=$((total_bytes / 4)); local formatted_tokens
    if [ "$estimated_tokens" -ge 1000000 ]; then
        local m=$((estimated_tokens/1000000)); local rk=$(((estimated_tokens%1000000)/100000)); formatted_tokens="${m}.${rk}M"
    elif [ "$estimated_tokens" -ge 1000 ]; then
        local k=$((estimated_tokens/1000)); local rh=$(((estimated_tokens%1000)/100)); formatted_tokens="${k}.${rh}K"
    else formatted_tokens="$estimated_tokens"; fi
    echo "Rough Token Estimate (~chars/4) for ${output_target:-processed files}: ${formatted_tokens} tokens (${total_bytes} bytes)"
 }

 usage() {
    # Get default name for usage message *only*
    local default_name_example=$(basename "$(pwd)")"-ingest.txt"
    echo "Usage: $0 [options] [--] [file_or_dir ...]" >&2
    echo "Options:" >&2
    echo "  -o <filename.txt>: Output file (default: '$default_name_example')." >&2
    echo "  -v, --verbose: Enable verbose logging." >&2
    echo "  -d, --dry-run: List files to include, don't create output." >&2
    echo "  --estimate-tokens: Show rough token estimate (~chars/4) at the end." >&2
    echo "  --exclude <path>: Specific file/dir to exclude (shell wildcards expanded by shell)." >&2
    echo "  --include-pattern <glob>: Glob pattern ('*.py') to include in search. Repeatable." >&2
    echo "  --exclude-pattern <glob>: Glob pattern ('*.log') to exclude from search. Repeatable." >&2
    echo "  --include-regex <regex>: POSIX ERE regex to include files (matches full relative path). Repeatable." >&2
    echo "  --exclude-regex <regex>: POSIX ERE regex to exclude files (matches full relative path). Repeatable." >&2
    echo "  --: Use if a filename starts with '-'." >&2
    echo "Input: Files or directories. Default '.' if using --include-pattern/regex and no paths given." >&2
    exit 1
 }

 if [ $# -eq 0 ]; then
    usage
 fi

 while [[ $# -gt 0 ]]; do
    case "$1" in
        -o) if [ -z "$2" ] || [[ "$2" == -* ]]; then echo "Error: -o requires filename." >&2; usage; fi; OUTPUT_FILE="$2"; shift 2 ;;
        -v|--verbose) VERBOSE=true; shift ;;
        -d|--dry-run) DRY_RUN=true; shift ;;
        --estimate-tokens) ESTIMATE_TOKENS=true; shift ;;
        --exclude) shift; while [[ $# -gt 0 && "$1" != -* ]]; do EXPLICIT_EXCLUDE_PATHS+=("$(realpath -sm -- "$1" 2>/dev/null || echo "$1")"); shift; done ;;
        --include-pattern) shift; while [[ $# -gt 0 && "$1" != -* ]]; do INCLUDE_PATTERNS+=("$1"); shift; done ;;
        --exclude-pattern) shift; while [[ $# -gt 0 && "$1" != -* ]]; do EXCLUDE_SHELL_PATTERNS+=("$1"); shift; done ;;
        --include-regex) shift; while [[ $# -gt 0 && "$1" != -* ]]; do INCLUDE_REGEX_PATTERNS+=("$1"); shift; done ;;
        --exclude-regex) shift; while [[ $# -gt 0 && "$1" != -* ]]; do EXCLUDE_REGEX_PATTERNS+=("$1"); shift; done ;;
        --) shift; while [[ $# -gt 0 ]]; do INPUT_PATHS+=("$1"); shift; done; break ;;
        -*) echo "Error: Unknown option '$1'." >&2; usage ;;
        *) INPUT_PATHS+=("$1"); shift ;;
    esac
 done

 # --- Set Default Input/Output ---
 if [ ${#INPUT_PATHS[@]} -eq 0 ] && { [ ${#INCLUDE_PATTERNS[@]} -gt 0 ] || [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; }; then
     INPUT_PATHS+=(".")
     log_verbose "Defaulting input path to '.' with include patterns/regex."
 elif [ ${#INPUT_PATHS[@]} -eq 0 ]; then
     echo "Error: No input or include patterns/regex specified." >&2
     usage
 fi

 # Set default output file name if -o was not used
 if [ -z "$OUTPUT_FILE" ]; then
    current_dir_name=$(basename "$(pwd)")
    OUTPUT_FILE="${current_dir_name}-ingest.txt"
    log_verbose "Output file not specified, defaulting to: $OUTPUT_FILE"
 fi

 # --- Initialization ---
 total_bytes_processed=0
 processed_count=0
 files_to_process=()

 # --- Prepare Output / Dry Run Start ---
 if [ "$DRY_RUN" = true ]; then
    log_verbose "Starting dry run..."
    echo "--- Files that would be included ---"
 else
    log_verbose "Starting code ingestion..."
    log_verbose "Output file: $OUTPUT_FILE"
    : > "$OUTPUT_FILE"
    if [ $? -ne 0 ]; then
        echo "Error: Could not create/clear output file: $OUTPUT_FILE" >&2
        exit 1
    fi
 fi

 log_verbose "Input paths: ${INPUT_PATHS[*]}"
 log_verbose "Default Exclude Dirs Pattern: ${EXCLUDE_DIRS_PATTERN[*]}"
 log_verbose "Default Exclude Files Pattern: ${EXCLUDE_FILES_PATTERN[*]}"
 if [ ${#EXPLICIT_EXCLUDE_PATHS[@]} -gt 0 ]; then log_verbose "Explicit Exclude Paths (--exclude): ${EXPLICIT_EXCLUDE_PATHS[*]}"; fi
 if [ ${#INCLUDE_PATTERNS[@]} -gt 0 ]; then log_verbose "Include Patterns (Glob): ${INCLUDE_PATTERNS[*]}"; fi
 if [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then log_verbose "Exclude Patterns (Glob): ${EXCLUDE_SHELL_PATTERNS[*]}"; fi
 if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then log_verbose "Include Regex: ${INCLUDE_REGEX_PATTERNS[*]}"; fi
 if [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then log_verbose "Exclude Regex: ${EXCLUDE_REGEX_PATTERNS[*]}"; fi

 USING_REGEX_FOR_SINGLE_CHECKS=false
 if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ] || [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
    USING_REGEX_FOR_SINGLE_CHECKS=true
 fi

 # --- File Discovery and Filtering ---
 for item in "${INPUT_PATHS[@]}"; do
    current_item_relative_path="$item"
    if [[ "$item" != /* && "$item" != "./"* ]]; then
        current_item_relative_path="./$item"
    fi

    if [ ! -e "$item" ]; then
        echo "Warning: Input path does not exist: '$item'. Skipping." >&2
        continue
    fi
    if is_explicitly_excluded "$item"; then
        continue
    fi
    item_basename=$(basename "$item")

    if [ -f "$item" ]; then
        # Default file pattern excludes (e.g. *.pyc)
        if is_match_by_pattern "$item_basename" "${EXCLUDE_FILES_PATTERN[@]}"; then
            log_verbose "Excluding single file (default pattern): $item"
            continue
        fi

        if [ "$USING_REGEX_FOR_SINGLE_CHECKS" = true ]; then
            passes_include_regex=true
            if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
                passes_include_regex=false
                if is_match_by_regex "$current_item_relative_path" "${INCLUDE_REGEX_PATTERNS[@]}"; then passes_include_regex=true; fi
            fi
            if ! $passes_include_regex; then
                log_verbose "Excluding single file '$item' (no include regex match)"
                continue
            fi

            if [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
                if is_match_by_regex "$current_item_relative_path" "${EXCLUDE_REGEX_PATTERNS[@]}"; then
                    log_verbose "Excluding single file '$item' (exclude regex match)"
                    continue
                fi
            fi
        else # Using Glob filtering
            if [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then
                 if is_match_by_pattern "$item_basename" "${EXCLUDE_SHELL_PATTERNS[@]}"; then
                     log_verbose "Excluding single file '$item' (glob --exclude-pattern)"
                     continue
                 fi
                 if is_match_by_pattern "$item" "${EXCLUDE_SHELL_PATTERNS[@]}"; then
                     log_verbose "Excluding single file '$item' (path glob --exclude-pattern)"
                     continue
                 fi
            fi
            passes_include_glob=true
            if [ ${#INCLUDE_PATTERNS[@]} -gt 0 ]; then
                passes_include_glob=false
                if is_match_by_pattern "$item_basename" "${INCLUDE_PATTERNS[@]}"; then passes_include_glob=true; fi
                if ! $passes_include_glob && is_match_by_pattern "$item" "${INCLUDE_PATTERNS[@]}"; then passes_include_glob=true; fi
            fi
            if ! $passes_include_glob; then
                log_verbose "Excluding single file '$item' (no include glob match)"
                continue
            fi
        fi
        files_to_process+=("$item")

    elif [ -d "$item" ]; then
        if is_match_by_pattern "$item_basename" "${EXCLUDE_DIRS_PATTERN[@]}"; then
            log_verbose "Excluding directory (default pattern): $item"
            continue
        fi
        if ! $USING_REGEX_FOR_SINGLE_CHECKS && [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then
             if is_match_by_pattern "$item_basename" "${EXCLUDE_SHELL_PATTERNS[@]}"; then
                 log_verbose "Excluding directory '$item' (glob --exclude-pattern)"
                 continue
             fi
        fi
        log_verbose "Searching directory: $item"
        mapfile -d $'\0' -t found_files < <(run_find_in_dir "$item")
        for relative_path_from_find in "${found_files[@]}"; do
             if [ -z "$relative_path_from_find" ]; then
                 continue
             fi
             full_path="$item/$relative_path_from_find"
             full_path=$(echo "$full_path" | sed 's://:/:g')
             if is_explicitly_excluded "$full_path"; then # Check --exclude specific path again
                 continue
             fi
             files_to_process+=("$full_path")
         done
    else
        echo "Warning: Input path '$item' is not a file or directory. Skipping." >&2
    fi
 done

 # --- Processing Phase ---
 for file_path in "${files_to_process[@]}"; do
    relative_display_path="$file_path" # Simple approach for now
    if [ "$DRY_RUN" = true ]; then
        echo "$file_path"
        if [ "$ESTIMATE_TOKENS" = true ]; then
            file_bytes=$(wc -c < "$file_path" 2>/dev/null || echo 0)
            total_bytes_processed=$((total_bytes_processed + file_bytes))
        fi
        processed_count=$((processed_count + 1))
    else
        if process_file "$file_path" "$relative_display_path" "$OUTPUT_FILE"; then
            processed_count=$((processed_count + 1))
        fi
    fi
 done

 # --- Final Output / Summary ---
 if [ "$DRY_RUN" = true ]; then
    echo "--- End of dry run list ---"
    echo "Dry run complete. Would include $processed_count file(s)."
    if [ "$ESTIMATE_TOKENS" = true ]; then
        calculate_and_print_token_estimate "$total_bytes_processed" "files listed above"
    fi
 else
    log_verbose "Finished processing $processed_count files."
    echo "Code ingested into: $OUTPUT_FILE"
    if [ "$ESTIMATE_TOKENS" = true ]; then
        final_bytes=$(wc -c < "$OUTPUT_FILE" 2>/dev/null || echo 0)
        calculate_and_print_token_estimate "$final_bytes" "$OUTPUT_FILE"
    fi
 fi
 exit 0
	#!/bin/bash

	OUTPUT_FILE=""
	VERBOSE=false
	DRY_RUN=false
	ESTIMATE_TOKENS=true
	INPUT_PATHS=()
	EXPLICIT_EXCLUDE_PATHS=() # For --exclude specific_path
	INCLUDE_PATTERNS=() # For --include-pattern (glob)
	EXCLUDE_SHELL_PATTERNS=() # For --exclude-pattern (glob)
	INCLUDE_REGEX_PATTERNS=() # New: For --include-regex
	EXCLUDE_REGEX_PATTERNS=() # New: For --exclude-regex

	# Default patterns for initial pruning (applied before glob/regex filtering)
	EXCLUDE_DIRS_PATTERN=( ".git" "node_modules" "dist" "build" ".venv" "venv" "env" "bin" "obj" "__pycache__" ".pytest_cache" ".mypy_cache" ".ruff_cache" ".idea" ".vscode" "target" "out" "logs" )
	EXCLUDE_FILES_PATTERN=( ".pyc" ".pyo" ".log" ".swp" ".swo" ".lock" "poetry.lock" "package-lock.json" "yarn.lock" "Pipfile.lock" "uv.lock" ".DS_Store" "Thumbs.db" ".class" ".jar" ".war" "*.ear" )

	log_verbose() {
	if [ "$VERBOSE" = true ]; then
	echo "[VERBOSE] $@" >&2
	fi
	}

	get_lang_hint() {
	local filename="$1"
	local ext="${filename##*.}"
	case "$ext" in
	py) echo "python" ;; js) echo "javascript" ;; ts) echo "typescript" ;;
	java) echo "java" ;; c) echo "c" ;; cpp \| cxx \| h \| hpp) echo "cpp" ;;
	cs) echo "csharp" ;; go) echo "go" ;; rb) echo "ruby" ;; php) echo "php" ;;
	html \| htm) echo "html" ;; css) echo "css" ;; scss \| sass) echo "scss" ;;
	sh \| bash) echo "bash" ;; zsh) echo "zsh" ;; sql) echo "sql" ;;
	md \| markdown) echo "markdown" ;; json) echo "json" ;;
	yaml \| yml) echo "yaml" ;; xml) echo "xml" ;; *) echo "" ;;
	esac
	}

	process_file() {
	local filepath="$1"; local relative_path="$2"; local output_target="$3"
	if [ ! -f "$filepath" ] \|\| [ ! -r "$filepath" ]; then
	echo "Warning: Cannot read file: $filepath. Skipping." >&2
	return 1
	fi
	log_verbose "Processing file: $filepath (as $relative_path)"
	local lang_hint=$(get_lang_hint "$filepath")
	echo "================================================" >> "$output_target"
	echo "FILE: $relative_path" >> "$output_target"
	echo "================================================" >> "$output_target"
	echo '```'$lang_hint >> "$output_target"
	cat "$filepath" >> "$output_target"; local exit_code=$?
	echo '```' >> "$output_target"
	echo "" >> "$output_target"
	if [ $exit_code -ne 0 ]; then
	echo "Warning: Error reading file content: $filepath" >&2
	return 1
	fi
	return 0
	}

	is_match_by_pattern() { # For shell globs
	local name_to_check="$1"; shift; local patterns=("$@")
	for pattern in "${patterns[@]}"; do
	if [[ "$name_to_check" == $pattern ]]; then
	return 0 # Matches
	fi
	done
	return 1 # No match
	}

	is_match_by_regex() { # For regex against a string (path)
	local string_to_check="$1"; shift; local regex_patterns=("$@")
	for regex in "${regex_patterns[@]}"; do
	if [[ "$string_to_check" =~ $regex ]]; then
	return 0 # Matches
	fi
	done
	return 1 # No match
	}


	is_explicitly_excluded() { # For --exclude specific_path
	local candidate_path="$1"; local abs_candidate_path
	abs_candidate_path=$(realpath -sm -- "$candidate_path" 2>/dev/null \|\| echo "$candidate_path")
	for excluded_path_entry in "${EXPLICIT_EXCLUDE_PATHS[@]}"; do
	if [[ "$abs_candidate_path" == "$excluded_path_entry" ]]; then
	log_verbose "Explicitly excluding '$candidate_path' (abs: '$abs_candidate_path') due to --exclude exact match with '$excluded_path_entry'"
	return 0
	fi
	if [[ -d "$excluded_path_entry" && "$abs_candidate_path" == "$excluded_path_entry/"* ]]; then
	log_verbose "Explicitly excluding '$candidate_path' (abs: '$abs_candidate_path') because it's inside --exclude directory '$excluded_path_entry'"
	return 0
	fi
	done
	return 1
	}

	run_find_in_dir() {
	local search_path="$1"
	local find_cmd_args=()
	find_cmd_args+=("$search_path")

	# --- Initial Pruning (always applied) ---
	local prune_conditions=()
	local has_prune_conditions=false
	if [ ${#EXCLUDE_DIRS_PATTERN[@]} -gt 0 ]; then
	prune_conditions+=("("); for p in "${EXCLUDE_DIRS_PATTERN[@]}"; do prune_conditions+=("-name" "$p" "-o"); done
	prune_conditions[${#prune_conditions[@]}-1]=")"; prune_conditions+=("-type" "d"); has_prune_conditions=true
	fi
	if [ ${#EXCLUDE_FILES_PATTERN[@]} -gt 0 ]; then
	if [ "$has_prune_conditions" = true ]; then prune_conditions+=("-o"); fi
	prune_conditions+=("("); for p in "${EXCLUDE_FILES_PATTERN[@]}"; do prune_conditions+=("-name" "$p" "-o"); done
	prune_conditions[${#prune_conditions[@]}-1]=")"; prune_conditions+=("-type" "f"); has_prune_conditions=true
	fi
	if [ "$has_prune_conditions" = true ]; then
	find_cmd_args+=("${prune_conditions[@]}" "-prune" "-o")
	fi

	# --- Main Filtering (Glob or Regex based) ---
	local using_regex_filter=false
	if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ] \|\| [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
	using_regex_filter=true
	fi

	if [ "$using_regex_filter" = true ]; then
	log_verbose "Using REGEX filtering for find."
	find_cmd_args+=("-regextype" "posix-extended")
	# Include Regexes
	if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
	find_cmd_args+=("-and" "(")
	for regex in "${INCLUDE_REGEX_PATTERNS[@]}"; do find_cmd_args+=("-regex" "$regex" "-o"); done
	find_cmd_args[${#find_cmd_args[@]}-1]=")"
	fi
	# Exclude Regexes
	if [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
	find_cmd_args+=("-and" "-not" "(")
	for regex in "${EXCLUDE_REGEX_PATTERNS[@]}"; do find_cmd_args+=("-regex" "$regex" "-o"); done
	find_cmd_args[${#find_cmd_args[@]}-1]=")"
	fi
	else # Using Glob filtering
	log_verbose "Using GLOB filtering for find."
	# Include Patterns (Globs)
	if [ ${#INCLUDE_PATTERNS[@]} -gt 0 ]; then
	find_cmd_args+=("-and" "(")
	for p in "${INCLUDE_PATTERNS[@]}"; do
	if [[ "$p" == "/" ]]; then find_cmd_args+=("-path" "$p" "-o"); else find_cmd_args+=("-name" "$p" "-o"); fi
	done
	find_cmd_args[${#find_cmd_args[@]}-1]=")"
	fi
	# Exclude Patterns (Globs)
	if [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then
	find_cmd_args+=("-and" "-not" "(")
	for p in "${EXCLUDE_SHELL_PATTERNS[@]}"; do
	if [[ "$p" == "/" ]]; then find_cmd_args+=("-path" "$p" "-o"); else find_cmd_args+=("-name" "$p" "-o"); fi
	done
	find_cmd_args[${#find_cmd_args[@]}-1]=")"
	fi
	fi

	find_cmd_args+=("-type" "f" "-printf" "%P\0")
	log_verbose "Executing find: find ${find_cmd_args[*]}"
	find "${find_cmd_args[@]}" 2>/dev/null
	}

	calculate_and_print_token_estimate() {
	local total_bytes="$1"; local output_target="$2"
	if [[ ! "$total_bytes" =~ ^[0-9]+$ ]] \|\| [ "$total_bytes" -eq 0 ]; then log_verbose "No bytes processed, cannot estimate tokens."; return; fi
	local estimated_tokens=$((total_bytes / 4)); local formatted_tokens
	if [ "$estimated_tokens" -ge 1000000 ]; then
	local m=$((estimated_tokens/1000000)); local rk=$(((estimated_tokens%1000000)/100000)); formatted_tokens="${m}.${rk}M"
	elif [ "$estimated_tokens" -ge 1000 ]; then
	local k=$((estimated_tokens/1000)); local rh=$(((estimated_tokens%1000)/100)); formatted_tokens="${k}.${rh}K"
	else formatted_tokens="$estimated_tokens"; fi
	echo "Rough Token Estimate (~chars/4) for ${output_target:-processed files}: ${formatted_tokens} tokens (${total_bytes} bytes)"
	}

	usage() {
	# Get default name for usage message only
	local default_name_example=$(basename "$(pwd)")"-ingest.txt"
	echo "Usage: $0 [options] [--] [file_or_dir ...]" >&2
	echo "Options:" >&2
	echo " -o <filename.txt>: Output file (default: '$default_name_example')." >&2
	echo " -v, --verbose: Enable verbose logging." >&2
	echo " -d, --dry-run: List files to include, don't create output." >&2
	echo " --estimate-tokens: Show rough token estimate (~chars/4) at the end." >&2
	echo " --exclude <path>: Specific file/dir to exclude (shell wildcards expanded by shell)." >&2
	echo " --include-pattern <glob>: Glob pattern ('*.py') to include in search. Repeatable." >&2
	echo " --exclude-pattern <glob>: Glob pattern ('*.log') to exclude from search. Repeatable." >&2
	echo " --include-regex <regex>: POSIX ERE regex to include files (matches full relative path). Repeatable." >&2
	echo " --exclude-regex <regex>: POSIX ERE regex to exclude files (matches full relative path). Repeatable." >&2
	echo " --: Use if a filename starts with '-'." >&2
	echo "Input: Files or directories. Default '.' if using --include-pattern/regex and no paths given." >&2
	exit 1
	}

	if [ $# -eq 0 ]; then
	usage
	fi

	while [[ $# -gt 0 ]]; do
	case "$1" in
	-o) if [ -z "$2" ] \|\| [[ "$2" == -* ]]; then echo "Error: -o requires filename." >&2; usage; fi; OUTPUT_FILE="$2"; shift 2 ;;
	-v\|--verbose) VERBOSE=true; shift ;;
	-d\|--dry-run) DRY_RUN=true; shift ;;
	--estimate-tokens) ESTIMATE_TOKENS=true; shift ;;
	--exclude) shift; while [[ $# -gt 0 && "$1" != -* ]]; do EXPLICIT_EXCLUDE_PATHS+=("$(realpath -sm -- "$1" 2>/dev/null \|\| echo "$1")"); shift; done ;;
	--include-pattern) shift; while [[ $# -gt 0 && "$1" != -* ]]; do INCLUDE_PATTERNS+=("$1"); shift; done ;;
	--exclude-pattern) shift; while [[ $# -gt 0 && "$1" != -* ]]; do EXCLUDE_SHELL_PATTERNS+=("$1"); shift; done ;;
	--include-regex) shift; while [[ $# -gt 0 && "$1" != -* ]]; do INCLUDE_REGEX_PATTERNS+=("$1"); shift; done ;;
	--exclude-regex) shift; while [[ $# -gt 0 && "$1" != -* ]]; do EXCLUDE_REGEX_PATTERNS+=("$1"); shift; done ;;
	--) shift; while [[ $# -gt 0 ]]; do INPUT_PATHS+=("$1"); shift; done; break ;;
	-*) echo "Error: Unknown option '$1'." >&2; usage ;;
	*) INPUT_PATHS+=("$1"); shift ;;
	esac
	done

	# --- Set Default Input/Output ---
	if [ ${#INPUT_PATHS[@]} -eq 0 ] && { [ ${#INCLUDE_PATTERNS[@]} -gt 0 ] \|\| [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; }; then
	INPUT_PATHS+=(".")
	log_verbose "Defaulting input path to '.' with include patterns/regex."
	elif [ ${#INPUT_PATHS[@]} -eq 0 ]; then
	echo "Error: No input or include patterns/regex specified." >&2
	usage
	fi

	# Set default output file name if -o was not used
	if [ -z "$OUTPUT_FILE" ]; then
	current_dir_name=$(basename "$(pwd)")
	OUTPUT_FILE="${current_dir_name}-ingest.txt"
	log_verbose "Output file not specified, defaulting to: $OUTPUT_FILE"
	fi

	# --- Initialization ---
	total_bytes_processed=0
	processed_count=0
	files_to_process=()

	# --- Prepare Output / Dry Run Start ---
	if [ "$DRY_RUN" = true ]; then
	log_verbose "Starting dry run..."
	echo "--- Files that would be included ---"
	else
	log_verbose "Starting code ingestion..."
	log_verbose "Output file: $OUTPUT_FILE"
	: > "$OUTPUT_FILE"
	if [ $? -ne 0 ]; then
	echo "Error: Could not create/clear output file: $OUTPUT_FILE" >&2
	exit 1
	fi
	fi

	log_verbose "Input paths: ${INPUT_PATHS[*]}"
	log_verbose "Default Exclude Dirs Pattern: ${EXCLUDE_DIRS_PATTERN[*]}"
	log_verbose "Default Exclude Files Pattern: ${EXCLUDE_FILES_PATTERN[*]}"
	if [ ${#EXPLICIT_EXCLUDE_PATHS[@]} -gt 0 ]; then log_verbose "Explicit Exclude Paths (--exclude): ${EXPLICIT_EXCLUDE_PATHS[*]}"; fi
	if [ ${#INCLUDE_PATTERNS[@]} -gt 0 ]; then log_verbose "Include Patterns (Glob): ${INCLUDE_PATTERNS[*]}"; fi
	if [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then log_verbose "Exclude Patterns (Glob): ${EXCLUDE_SHELL_PATTERNS[*]}"; fi
	if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then log_verbose "Include Regex: ${INCLUDE_REGEX_PATTERNS[*]}"; fi
	if [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then log_verbose "Exclude Regex: ${EXCLUDE_REGEX_PATTERNS[*]}"; fi

	USING_REGEX_FOR_SINGLE_CHECKS=false
	if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ] \|\| [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
	USING_REGEX_FOR_SINGLE_CHECKS=true
	fi

	# --- File Discovery and Filtering ---
	for item in "${INPUT_PATHS[@]}"; do
	current_item_relative_path="$item"
	if [[ "$item" != /* && "$item" != "./"* ]]; then
	current_item_relative_path="./$item"
	fi

	if [ ! -e "$item" ]; then
	echo "Warning: Input path does not exist: '$item'. Skipping." >&2
	continue
	fi
	if is_explicitly_excluded "$item"; then
	continue
	fi
	item_basename=$(basename "$item")

	if [ -f "$item" ]; then
	# Default file pattern excludes (e.g. *.pyc)
	if is_match_by_pattern "$item_basename" "${EXCLUDE_FILES_PATTERN[@]}"; then
	log_verbose "Excluding single file (default pattern): $item"
	continue
	fi

	if [ "$USING_REGEX_FOR_SINGLE_CHECKS" = true ]; then
	passes_include_regex=true
	if [ ${#INCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
	passes_include_regex=false
	if is_match_by_regex "$current_item_relative_path" "${INCLUDE_REGEX_PATTERNS[@]}"; then passes_include_regex=true; fi
	fi
	if ! $passes_include_regex; then
	log_verbose "Excluding single file '$item' (no include regex match)"
	continue
	fi

	if [ ${#EXCLUDE_REGEX_PATTERNS[@]} -gt 0 ]; then
	if is_match_by_regex "$current_item_relative_path" "${EXCLUDE_REGEX_PATTERNS[@]}"; then
	log_verbose "Excluding single file '$item' (exclude regex match)"
	continue
	fi
	fi
	else # Using Glob filtering
	if [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then
	if is_match_by_pattern "$item_basename" "${EXCLUDE_SHELL_PATTERNS[@]}"; then
	log_verbose "Excluding single file '$item' (glob --exclude-pattern)"
	continue
	fi
	if is_match_by_pattern "$item" "${EXCLUDE_SHELL_PATTERNS[@]}"; then
	log_verbose "Excluding single file '$item' (path glob --exclude-pattern)"
	continue
	fi
	fi
	passes_include_glob=true
	if [ ${#INCLUDE_PATTERNS[@]} -gt 0 ]; then
	passes_include_glob=false
	if is_match_by_pattern "$item_basename" "${INCLUDE_PATTERNS[@]}"; then passes_include_glob=true; fi
	if ! $passes_include_glob && is_match_by_pattern "$item" "${INCLUDE_PATTERNS[@]}"; then passes_include_glob=true; fi
	fi
	if ! $passes_include_glob; then
	log_verbose "Excluding single file '$item' (no include glob match)"
	continue
	fi
	fi
	files_to_process+=("$item")

	elif [ -d "$item" ]; then
	if is_match_by_pattern "$item_basename" "${EXCLUDE_DIRS_PATTERN[@]}"; then
	log_verbose "Excluding directory (default pattern): $item"
	continue
	fi
	if ! $USING_REGEX_FOR_SINGLE_CHECKS && [ ${#EXCLUDE_SHELL_PATTERNS[@]} -gt 0 ]; then
	if is_match_by_pattern "$item_basename" "${EXCLUDE_SHELL_PATTERNS[@]}"; then
	log_verbose "Excluding directory '$item' (glob --exclude-pattern)"
	continue
	fi
	fi
	log_verbose "Searching directory: $item"
	mapfile -d $'\0' -t found_files < <(run_find_in_dir "$item")
	for relative_path_from_find in "${found_files[@]}"; do
	if [ -z "$relative_path_from_find" ]; then
	continue
	fi
	full_path="$item/$relative_path_from_find"
	full_path=$(echo "$full_path" \| sed 's://:/:g')
	if is_explicitly_excluded "$full_path"; then # Check --exclude specific path again
	continue
	fi
	files_to_process+=("$full_path")
	done
	else
	echo "Warning: Input path '$item' is not a file or directory. Skipping." >&2
	fi
	done

	# --- Processing Phase ---
	for file_path in "${files_to_process[@]}"; do
	relative_display_path="$file_path" # Simple approach for now
	if [ "$DRY_RUN" = true ]; then
	echo "$file_path"
	if [ "$ESTIMATE_TOKENS" = true ]; then
	file_bytes=$(wc -c < "$file_path" 2>/dev/null \|\| echo 0)
	total_bytes_processed=$((total_bytes_processed + file_bytes))
	fi
	processed_count=$((processed_count + 1))
	else
	if process_file "$file_path" "$relative_display_path" "$OUTPUT_FILE"; then
	processed_count=$((processed_count + 1))
	fi
	fi
	done

	# --- Final Output / Summary ---
	if [ "$DRY_RUN" = true ]; then
	echo "--- End of dry run list ---"
	echo "Dry run complete. Would include $processed_count file(s)."
	if [ "$ESTIMATE_TOKENS" = true ]; then
	calculate_and_print_token_estimate "$total_bytes_processed" "files listed above"
	fi
	else
	log_verbose "Finished processing $processed_count files."
	echo "Code ingested into: $OUTPUT_FILE"
	if [ "$ESTIMATE_TOKENS" = true ]; then
	final_bytes=$(wc -c < "$OUTPUT_FILE" 2>/dev/null \|\| echo 0)
	calculate_and_print_token_estimate "$final_bytes" "$OUTPUT_FILE"
	fi
	fi
	exit 0
No results found