#!/usr/bin/env python3 """ Codebase Chat CLI - GPU Accelerated A command-line interface for interacting with codebases using local LLMs via Ollama. Supports GPU acceleration for improved performance and ChromaDB for vector indexing. Features: - Vector index creation of source code files with ChromaDB and Ollama embeddings - .codechatignore support for excluding files/folders - Interactive querying of indexed codebases - GPU and Apple Silicon acceleration (CUDA/MPS) for embeddings and chat - Project management capabilities (indexing, analysis, listing) - Multi-language support (Java, Kotlin, Python, JS, TS, Go, Rust, C++, etc.) - Dry-run mode for previewing indexing operations Environment Variables: - OLLAMA_MODEL: Default chat model (e.g., "phi4:14b") - OLLAMA_EMBED_MODEL: Embedding model (e.g., "nomic-embed-text") - OLLAMA_URL: Ollama API endpoint (default: http://localhost:11434) - INDEX_ROOT: Root directory for storing vector indexes """ import os import sys import argparse import shutil import time import re import chromadb import torch from pathlib import Path from typing import Optional, List, Dict, Any from dotenv import load_dotenv from pathspec import PathSpec from packaging import version # Enhanced LlamaIndex imports from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, ServiceContext from llama_index.core.settings import Settings from llama_index.core.node_parser import TokenTextSplitter from llama_index.core.prompts import PromptTemplate from llama_index.embeddings.ollama import OllamaEmbedding from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.llms.ollama import Ollama # --- Configuration --- load_dotenv() DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "phi4:14b") #DEFAULT_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text") DEFAULT_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "all-minilm") DEFAULT_OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434") INDEX_ROOT = os.getenv("INDEX_ROOT", os.path.expanduser("~/.codechat/indexes")) ALLOWED_EXTENSIONS = {".java", ".kt", ".py", ".js", ".ts", ".go", ".rs", ".cpp", ".h", ".xml", ".properties", ".yml", ".md"} # Quality Improvement Defaults DEFAULT_CHUNK_SIZE = 512 DEFAULT_CHUNK_OVERLAP = 128 DEFAULT_SIMILARITY_TOP_K = 3 # Enhanced QA Prompt CODE_QA_PROMPT = PromptTemplate(""" You are a senior developer analyzing this codebase. Provide: 1. Concise technical explanation 2. Relevant code snippets with source file references 3. Usage examples when appropriate 4. Any potential issues or caveats Format your response in markdown with proper code blocks. Context: {context_str} Question: {query_str} Answer: """) # Timeout settings (seconds) DEFAULT_TIMEOUT = 60 MAX_RETRIES = 2 # Minimum required versions MIN_CHROMADB_VERSION = "0.4.0" MIN_TORCH_VERSION = "1.10.0" def validate_project_name(name: str) -> bool: """ Validates a project name to ensure it is safe for use as a filesystem directory name. Args: name (str): The project name to validate. Returns: bool: True if the name is valid (contains only letters, numbers, underscores, or hyphens), False otherwise. """ if not name: return False return bool(re.match(r'^[a-zA-Z0-9_-]+$', name)) def check_dependencies() -> None: """ Checks the versions of required dependencies and prints warnings if they do not meet the minimum required versions. """ try: chroma_version = version.parse(chromadb.__version__) if chroma_version < version.parse(MIN_CHROMADB_VERSION): print(f"⚠️ ChromaDB version {chromadb.__version__} is below minimum required {MIN_CHROMADB_VERSION}") torch_version = version.parse(torch.__version__) if torch_version < version.parse(MIN_TORCH_VERSION): print(f"⚠️ PyTorch version {torch.__version__} is below minimum required {MIN_TORCH_VERSION}") except Exception as e: print(f"⚠️ Could not verify dependency versions: {str(e)}") def get_device(force_cpu: bool = False) -> str: """ Determines the most suitable compute device for processing. Args: force_cpu (bool): If True, always return 'cpu' regardless of available hardware. Returns: str: The device to use ('cuda', 'mps', or 'cpu'). """ if not force_cpu and torch.cuda.is_available(): return "cuda" elif not force_cpu and torch.backends.mps.is_available(): return "mps" # Apple Silicon return "cpu" def should_index_file(path: Path) -> bool: """ Checks whether a given file should be indexed based on its file extension. Args: path (Path): The file path to check. Returns: bool: True if the file extension is supported; False otherwise. """ return path.suffix.lower() in ALLOWED_EXTENSIONS def gather_files( codebase_path: Path, verbose: bool = False, ignore_file_path: Optional[Path] = None ) -> List[str]: """ Recursively collects file paths from a codebase directory, applying .codechatignore patterns if present. Args: codebase_path (Path): Root directory of the codebase. verbose (bool, optional): Enables detailed output during file collection. Defaults to False. ignore_file_path (Optional[Path], optional): Custom path to a .codechatignore file. If None, looks for .codechatignore in default locations. Defaults to None. Returns: List[str]: A list of string paths to source files eligible for indexing. """ # Look for ignore files in priority order possible_ignore_files = [] if ignore_file_path: possible_ignore_files.append(ignore_file_path) possible_ignore_files.extend([ Path.cwd() / ".codechatignore", codebase_path / ".codechatignore" ]) spec = None for ignore_file in possible_ignore_files: if ignore_file.exists(): if verbose: print(f"🔍 Found .codechatignore at {ignore_file}") with ignore_file.open("r", encoding="utf-8") as f: patterns = [line.strip() for line in f if line.strip() and not line.startswith("#")] if verbose and patterns: print(f"📜 Ignore patterns: {patterns}") spec = PathSpec.from_lines("gitwildmatch", patterns) break files = [] for p in codebase_path.rglob("*"): if not p.is_file(): continue if not should_index_file(p): if verbose: print(f"➖ Skipping (extension): {p}") continue try: rel_path = p.relative_to(codebase_path).as_posix() if verbose: print(f"🔄 Testing path: {rel_path}") except ValueError: if verbose: print(f"⚠️ Path error: {p}") continue if spec and spec.match_file(rel_path): if verbose: print(f"🚫 Excluded by pattern: {rel_path}") continue files.append(str(p)) if verbose: print(f"✅ Added: {p}") return files def verify_metadata(index: VectorStoreIndex) -> bool: """ Verifies that metadata is present for each node in the index. Args: index (VectorStoreIndex): VectorStoreIndex instance to verify. Returns: bool: True if all nodes contain source metadata; False otherwise. """ for node_id, node in index.docstore.docs.items(): if not node.metadata.get('source_file'): print(f"⚠️ Missing source_file in node {node_id}") return False return True def build_index( project: str, codebase_path: Path, embed_model: str, device: str, clean: bool = False, dry_run: bool = False, verbose: bool = False, ignore_file_path: Optional[Path] = None, chunk_size: int = DEFAULT_CHUNK_SIZE, chunk_overlap: int = DEFAULT_CHUNK_OVERLAP ) -> None: """ Builds a ChromaDB-based vector index for the specified project. Args: project (str): Project name for indexing. codebase_path (Path): Path to the source code directory. embed_model (str): The embedding model name for document vectorization. device (str): Compute device identifier (e.g., 'cuda', 'cpu', 'mps'). clean (bool, optional): If True, deletes and rebuilds the index. Defaults to False. dry_run (bool, optional): If True, only simulates the indexing process. Defaults to False. verbose (bool, optional): Enables debug output. Defaults to False. ignore_file_path (Optional[Path], optional): Custom .codechatignore path. Defaults to None. chunk_size (int, optional): Maximum token chunk size for embedding. Defaults to DEFAULT_CHUNK_SIZE. chunk_overlap (int, optional): Overlap between chunks. Defaults to DEFAULT_CHUNK_OVERLAP. Raises: SystemExit: If no indexable files are found. """ project_index_path = Path(INDEX_ROOT) / project if clean: shutil.rmtree(project_index_path, ignore_errors=True) os.makedirs(project_index_path, exist_ok=True) indexed_files = gather_files(codebase_path, verbose, ignore_file_path) if not indexed_files: print("❌ No indexable files found.") sys.exit(1) if dry_run: print(f"✅ Dry run complete (would index {len(indexed_files)} files)") return # Document processing node_parser = TokenTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator="\n" ) documents = SimpleDirectoryReader( input_files=indexed_files, file_metadata=lambda x: { 'source_file': str(Path(x).absolute()), 'file_path': str(Path(x).relative_to(codebase_path)), 'file_name': Path(x).name, 'timestamp': time.time() } ).load_data() # Configure Settings instead of ServiceContext Settings.llm = Ollama(model=DEFAULT_MODEL, base_url=DEFAULT_OLLAMA_URL) Settings.embed_model = OllamaEmbedding(model_name=embed_model, device=device) Settings.node_parser = node_parser Settings.chunk_size = chunk_size Settings.chunk_overlap = chunk_overlap # Create index chroma_client = chromadb.PersistentClient(path=str(project_index_path)) vector_store = ChromaVectorStore(chroma_collection=chroma_client.get_or_create_collection(f"{project}_collection")) index = VectorStoreIndex.from_documents( documents, storage_context=StorageContext.from_defaults(vector_store=vector_store), show_progress=verbose ) index.storage_context.persist() if not verify_metadata(index): print("❌ Metadata issues detected - some sources may show as Unknown") print(f"\n✅ Index built with {len(indexed_files)} files (chunk size: {chunk_size}, overlap: {chunk_overlap})") def chat( project: str, model: str, embed_model: str, temperature: float, num_ctx: int, top_p: float, repeat_penalty: float, device: str, verbose: bool = False, similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K, timeout: int = DEFAULT_TIMEOUT, max_retries: int = MAX_RETRIES, chunk_size: int = DEFAULT_CHUNK_SIZE, chunk_overlap: int = DEFAULT_CHUNK_OVERLAP ) -> None: """ Start an interactive chat session with an indexed codebase. Args: project (str): Name of the project to chat with. model (str): Ollama model name to use for chat. embed_model (str): Ollama embedding model name. temperature (float): Controls randomness of responses (0.0-1.0). num_ctx (int): Context window size in tokens. top_p (float): Top-p sampling parameter. repeat_penalty (float): Penalty for repeated tokens. device (str): Compute device to use ('cuda', 'mps', or 'cpu'). verbose (bool, optional): If True, prints detailed progress information. Defaults to False. similarity_top_k (int, optional): Number of similar chunks to retrieve. Defaults to DEFAULT_SIMILARITY_TOP_K. timeout (int, optional): Query timeout in seconds. Defaults to DEFAULT_TIMEOUT. max_retries (int, optional): Number of retry attempts on timeout. Defaults to MAX_RETRIES. chunk_size (int, optional): Text chunk size for processing. Defaults to DEFAULT_CHUNK_SIZE. chunk_overlap (int, optional): Context overlap between chunks. Defaults to DEFAULT_CHUNK_OVERLAP. Raises: SystemExit: If no index is found for the specified project. """ project_index_path = Path(INDEX_ROOT) / project if not project_index_path.exists(): print(f"❌ No index found for '{project}'. Run with --reindex first.") sys.exit(1) # Configure for quality responses Settings.embed_model = OllamaEmbedding( model_name=embed_model, base_url=DEFAULT_OLLAMA_URL, device=device ) Settings.llm = Ollama( model=model, base_url=DEFAULT_OLLAMA_URL, temperature=temperature, num_ctx=num_ctx, top_p=top_p, repeat_penalty=repeat_penalty, device=device, request_timeout=timeout ) # Quality-optimized query engine chroma_client = chromadb.PersistentClient(path=str(project_index_path)) vector_store = ChromaVectorStore(chroma_collection=chroma_client.get_collection(f"{project}_collection")) index = VectorStoreIndex.from_vector_store(vector_store) query_engine = index.as_query_engine( similarity_top_k=similarity_top_k, include_metadata=True, metadata_fields=['source_file', 'file_name', 'file_path'], vector_store_query_mode="hybrid", response_mode="tree_summarize", text_qa_template=CODE_QA_PROMPT, verbose=verbose, timeout=timeout, retry_on_timeout=True, max_retries=max_retries ) sample_embedding = Settings.embed_model.get_text_embedding("sample code class") print(f"Embedding dimension: {len(sample_embedding)}") def debug_index_metadata(index: VectorStoreIndex, verbose: bool = True) -> None: """Debug function to check what metadata exists in the index. Args: index (VectorStoreIndex): The index to debug. verbose (bool, optional): If True, prints detailed information. Defaults to True. """ if not verbose: return print("\n🔍 Debugging index metadata:") try: collection = index._vector_store._collection print(f"Collection name: {collection.name}") print(f"Total vectors: {collection.count()}") # Get sample items with metadata items = collection.get(limit=3, include=["metadatas", "documents"]) if items and "metadatas" in items: print("\nSample metadata found:") for i, (meta, doc) in enumerate(zip(items["metadatas"], items["documents"][:3])): print(f" {i + 1}. Metadata: {meta}") print(f" First 50 chars: {doc[:50]}...\n") else: print("⚠️ No metadata found in collection") except Exception as e: print(f"⚠️ Error checking metadata: {str(e)}") print("Trying alternative access method...") try: # Alternative way to check nodes nodes = index.docstore.docs print(f"\nFound {len(nodes)} nodes in docstore") for node_id, node in list(nodes.items())[:3]: print(f"Node {node_id}:") print(f" Metadata: {node.metadata}") print(f" Text: {node.text[:50]}...\n") except Exception as e2: print(f"⚠️ Couldn't access docstore either: {str(e2)}") print("\n🔎 Verifying index structure...") debug_index_metadata(index, verbose=True) # Additional verification print("\n🔍 Index Verification:") try: print(f"- Vectors: {index._vector_store._collection.count()}") if hasattr(index, 'docstore'): print(f"- Documents: {len(index.docstore.docs)}") else: print("- Docstore: Not available (normal for ChromaDB)") except Exception as e: print(f"⚠️ Verification note: {str(e)}") debug_index_metadata(index, verbose=True) # Response enhancement functions def enhance_query(query: str) -> str: """Add context based on query type to get better responses. Args: query (str): The original user query. Returns: str: The enhanced query with additional context. """ query = query.strip() lower_query = query.lower() # Module/package queries if "module" in query or "package" in query: return ("List all Java modules/packages with their relative paths, " "main classes, and 1-2 sentence descriptions. " "Include the module's purpose and key features.") # Explanation queries elif any(q_word in lower_query for q_word in ["how", "why", "explain"]): return f"{query} (provide detailed explanation with code references)" # Example queries elif "example" in lower_query: return f"{query} (include practical usage examples)" # Default case - return original query return query def format_response(response: Any) -> str: """Formats the response with source references. Args: response (Any): The query response object. Returns: str: The formatted response text with sources. """ text = response.response # Source nodes handling if hasattr(response, 'source_nodes') and response.source_nodes: sources = [] for node in response.source_nodes[:3]: # Show top 3 sources source = node.metadata.get('source_file') or node.metadata.get('file_path', 'Unknown') if source != 'Unknown': try: # First try making it relative to INDEX_ROOT source = str(Path(source).relative_to(INDEX_ROOT)) except ValueError: try: # If that fails, just show the filename source = Path(source).name except: source = "Unknown path" sources.append(f"- {source} (score: {node.score:.2f})") text += "\n\n🔍 Sources:\n" + "\n".join(sources) return text # Interactive chat loop print(f"\n💬 Chatting with {project} (Enhanced Mode)") print("Type 'exit' or press Ctrl+C to quit\n") # Show optimization tips if settings might cause performance issues optimization_params = { 'timeout': timeout, 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'similarity_top_k': similarity_top_k, 'model': model } tips = get_optimization_tips(optimization_params) if tips: print("\n💡 Performance Tips:") for tip in tips: print(f" - {tip}") print() while True: try: question = input("🤖 > ").strip() if question.lower() in {"exit", "quit"}: break start_time = time.time() try: response = query_engine.query(enhance_query(question)) print(f"\n{format_response(response)}") # DEBUG: Show raw source nodes if hasattr(response, 'source_nodes'): print("\n🔍 DEBUG - Source Nodes:") for i, node in enumerate(response.source_nodes[:3]): print(f"Node {i + 1}:") print(f" Score: {node.score}") try: print(f" Path: {node.metadata.get('file_path')}") print(f" Source: {node.metadata.get('source_file')}") except Exception as e: print(f" Metadata error: {str(e)}") print(f" Text: {node.text[:100]}...") except Exception as e: if "timeout" in str(e).lower(): print("\n⏱️ The query timed out. Try:") print("- Asking a more specific question") print(f"- Increasing timeout (current: {timeout}s)") print(f"- Reducing chunk size (current: {chunk_size})") else: print(f"\n❌ Query Error: {str(e)}") print(f"\n⏱️ Response time: {time.time() - start_time:.2f}s") except KeyboardInterrupt: print("\n👋 Exiting...") break def list_projects(verbose: bool = False) -> None: """ Display all indexed projects with accurate status. Args: verbose (bool, optional): If True, shows additional details about each project. Defaults to False. """ index_root_path = Path(INDEX_ROOT) if not index_root_path.exists(): print("No projects indexed yet.") return print("📂 Indexed Projects:") for project_dir in sorted(index_root_path.iterdir()): if project_dir.is_dir(): status = "❌" size_info = "unknown" try: client = chromadb.PersistentClient(path=str(project_dir)) collections = client.list_collections() if collections: # Find matching collection for col in collections: if col.name == project_dir.name or col.name == f"{project_dir.name}_collection": count = col.count() size_info = f"{count} vectors" status = "✅" break except Exception as e: if verbose: print(f"⚠️ Error checking {project_dir.name}: {str(e)}") print(f" - {project_dir.name} {status} ({size_info})") def show_config(args: argparse.Namespace) -> None: """ Display the current configuration including hardware and model settings. Args: args (argparse.Namespace): Parsed command-line arguments. """ device = get_device(force_cpu=args.cpu) gpu_type = "None" if device == "cuda": gpu_type = torch.cuda.get_device_name(0) elif device == "mps": gpu_type = "Apple Silicon (MPS)" print("⚙️ Current Configuration:") print(f" Project: {args.project if hasattr(args, 'project') else 'N/A'}") print(f" Model: {args.model}") print(f" Embed Model: {args.embed_model}") print(f" Device: {device.upper()} ({gpu_type})") print(f" Temperature: {args.temperature}") print(f" Context Window: {args.num_ctx} tokens") print("\n🛠️ Paths:") print(f" Index Root: {INDEX_ROOT}") print(f" Ollama URL: {DEFAULT_OLLAMA_URL}") # Show ignore file info if available ignore_locations = [ Path(args.ignore_file) if hasattr(args, 'ignore_file') and args.ignore_file else None, Path.cwd() / ".codechatignore", Path(args.reindex) / ".codechatignore" if hasattr(args, 'reindex') and args.reindex else None ] found = False for loc in ignore_locations: if loc and loc.exists(): print(f"\n🔍 Active .codechatignore at: {loc}") with open(loc, 'r') as f: print(" Ignore Patterns:") for line in f: line = line.strip() if line and not line.startswith("#"): print(f" - {line}") found = True break if not found: print("\n⚠️ No .codechatignore file found") def analyze_project(project: str, verbose: bool = False) -> None: """ Display detailed analytics about an indexed project. Args: project (str): Name of the project to analyze. verbose (bool, optional): If True, shows additional storage details. Defaults to False. Raises: None: This function handles errors gracefully and prints messages instead of raising exceptions. """ project_path = Path(INDEX_ROOT) / project if not project_path.exists(): print(f"❌ Project '{project}' not found") return print(f"\n📊 Analysis for '{project}':") print("─" * 50) # 1. Enhanced ChromaDB Stats try: client = chromadb.PersistentClient(path=str(project_path)) collection = client.get_collection(f"{project}_collection") # Count vectors and their distribution count = collection.count() metadata = collection.get(include=["metadatas"]) file_types = {} file_sizes = {} if metadata and "metadatas" in metadata: for item in metadata["metadatas"]: if item and isinstance(item, dict) and "file_path" in item: try: ext = Path(item["file_path"]).suffix.lower() file_types[ext] = file_types.get(ext, 0) + 1 # Get file size if available if "file_size" in item: file_sizes[ext] = file_sizes.get(ext, 0) + int(item["file_size"]) except (TypeError, AttributeError) as e: if verbose: print(f"⚠️ Could not process metadata item: {str(e)}") continue print("\n📈 Embedding Statistics:") print(f" - Total vectors: {count}") if file_types: print(" - File type distribution:") for ext, num in sorted(file_types.items(), key=lambda x: x[1], reverse=True): size_info = "" if ext in file_sizes: size_info = f" ({file_sizes[ext] / 1024:.1f} KB total)" print(f" - {ext if ext else 'no-extension'}: {num} vectors{size_info}") except Exception as e: print(f"⚠️ Couldn't read ChromaDB collection: {str(e)}") if "truth value of an array" in str(e): print("💡 Try upgrading ChromaDB: pip install --upgrade chromadb numpy") # 2. Storage Analysis try: total_size = sum(f.stat().st_size for f in project_path.glob('**/*') if f.is_file()) print("\n💾 Storage Usage:") print(f" - Index size: {total_size / 1024 / 1024:.2f} MB") print(f" - Files: {len(list(project_path.glob('**/*')))}") if verbose: print("\n🔍 Detailed Storage Breakdown:") for item in project_path.iterdir(): if item.is_file(): print(f" - {item.name}: {item.stat().st_size / 1024:.1f} KB") elif item.is_dir(): dir_size = sum(f.stat().st_size for f in item.glob('**/*') if f.is_file()) print(f" - {item.name}/: {dir_size / 1024:.1f} KB") except Exception as e: print(f"⚠️ Couldn't analyze storage: {str(e)}") # 3. Health Check - Updated for ChromaDB v0.4+ format print("\n🩺 Health Check:") healthy = True # Required files for ChromaDB v0.4+ required_files = { "chroma.sqlite3": "SQLite database", } # Optional files optional_files = { "chroma_settings.json": "Settings file", "chroma-embeddings.parquet": "Embeddings data (legacy)" } # Check required files for file, desc in required_files.items(): if (project_path / file).exists(): print(f" - ✅ {desc} present") else: print(f" - ❌ {desc} missing!") healthy = False # Check optional files for file, desc in optional_files.items(): if (project_path / file).exists(): print(f" - ☑️ {desc} present") else: print(f" - ⚠️ {desc} not found (optional)") # Check collection exists and is accessible try: client = chromadb.PersistentClient(path=str(project_path)) collection = client.get_collection(f"{project}_collection") print(f" - ✅ Collection accessible ({collection.count()} vectors)") except Exception as e: print(f" - ❌ Collection error: {str(e)}") healthy = False print(f"\n{'✅ Index is healthy' if healthy else '❌ Index has issues!'}") print("─" * 50) def repair_project(project: str, verbose: bool = False) -> None: """ Attempt to repair a potentially corrupted index. Args: project (str): Name of the project to repair. verbose (bool, optional): If True, shows additional repair details. Defaults to False. """ project_path = Path(INDEX_ROOT) / project if not project_path.exists(): print(f"❌ Project directory '{project}' not found") return print(f"\n🔧 Repairing project '{project}'...") try: client = chromadb.PersistentClient(path=str(project_path)) # ChromaDB uses different collection naming in newer versions collections = client.list_collections() if not collections: raise ValueError("No collections found in project directory") # Try both naming conventions collection_name = None for col in collections: if col.name == project or col.name == f"{project}_collection": collection_name = col.name break if not collection_name: raise ValueError(f"No matching collection found (tried: '{project}', '{project}_collection')") if verbose: print(f"🔄 Found collection: {collection_name}") collection = client.get_collection(collection_name) count = collection.count() print(f"\n✅ Repair successful - project is healthy") print(f" Collection: {collection_name}") print(f" Total vectors: {count}") except Exception as e: print(f"\n❌ Repair failed: {str(e)}") print("\nRecommended solutions:") print(f"1. Clean reindex: --project {project} --reindex /path/to/code --clean") print(f"2. Manual repair steps:") print(f" - Delete directory: {project_path}") print(f" - Check collection name in: {project_path}/chroma.sqlite3") def get_optimization_tips(params: Dict[str, Any]) -> List[str]: """Generate performance optimization suggestions based on current parameters. Args: params (Dict[str, Any]): Dictionary of current configuration parameters. Returns: List[str]: List of optimization tips. """ tips = [] # Timeout-related tips if params['timeout'] < 30: tips.append(f"Increase timeout (current: {params['timeout']}s)") # Chunking-related tips if params['chunk_size'] > 768: tips.append(f"Reduce chunk size (current: {params['chunk_size']})") if params['chunk_overlap'] > 128: tips.append(f"Reduce chunk overlap (current: {params['chunk_overlap']})") # Retrieval-related tips if params['similarity_top_k'] > 3: tips.append(f"Reduce retrieved chunks (current: {params['similarity_top_k']})") # Model-related tips if "34b" in params['model'] or "70b" in params['model']: tips.append(f"Try smaller model (current: {params['model']})") return tips def main(): """Entry point for the Codebase Chat CLI application. Handles command-line arguments and orchestrates the main application flow including: - Dependency checks - Project management (listing, analyzing, repairing) - Indexing operations - Chat functionality Command Line Arguments: --project PROJECT_NAME : Specifies project to operate on (for chat/reindex/repair) --list-projects : Lists all indexed projects --show-config : Displays current configuration --repair PROJECT : Attempts to repair a corrupted index --reindex PATH : Path to codebase to index --analyze : Shows detailed project analysis --model MODEL_NAME : Specifies Ollama model to use (default: DEFAULT_MODEL) --embed-model EMBED_MODEL : Specifies Ollama embedding model (default: DEFAULT_EMBED_MODEL) --cpu : Forces CPU mode --gpu : Forces GPU mode if available --temperature FLOAT : Sets model temperature (default: 0.0) --num-ctx INT : Sets context window size (default: 8192) --top-p FLOAT : Sets top-p sampling value (default: 1.0) --repeat-penalty FLOAT : Sets repetition penalty (default: 1.0) --clean : Deletes and recreates the index --dry-run : Only lists files to be indexed --verbose : Shows detailed debug output --ignore-file PATH : Path to custom .codechatignore file --chunk-size INT : Text chunk size for processing (default: DEFAULT_CHUNK_SIZE) --chunk-overlap INT : Context overlap between chunks (default: DEFAULT_CHUNK_OVERLAP) --similarity-top-k INT : Number of similar chunks to retrieve (default: DEFAULT_SIMILARITY_TOP_K) --timeout INT : Query timeout in seconds (default: DEFAULT_TIMEOUT) --max-retries INT : Number of retry attempts on timeout (default: MAX_RETRIES) """ # Check dependencies first check_dependencies() parser = argparse.ArgumentParser( description="Quality-Enhanced Codebase Chat CLI", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) # Create mutually exclusive group for main actions action_group = parser.add_mutually_exclusive_group(required=True) action_group.add_argument("--project", help="Project name (for chat/reindex/repair)") action_group.add_argument("--list-projects", action="store_true", help="List all indexed projects") action_group.add_argument("--show-config", action="store_true", help="Show current configuration") action_group.add_argument("--repair", metavar="PROJECT", help="Attempt to repair a corrupted index") # Project-specific arguments parser.add_argument("--reindex", metavar="PATH", help="Path to codebase to index") parser.add_argument("--analyze", action="store_true", help="Show detailed project analysis") # Model settings parser.add_argument("--model", default=DEFAULT_MODEL, help="Ollama model name") parser.add_argument("--embed-model", default=DEFAULT_EMBED_MODEL, help=f"Ollama embedding model (default: {DEFAULT_EMBED_MODEL})") # Hardware control parser.add_argument("--cpu", action="store_true", help="Force CPU mode") parser.add_argument("--gpu", action="store_true", help="Force GPU mode if available") # Performance tuning parser.add_argument("--temperature", type=float, default=float(os.getenv("OLLAMA_TEMPERATURE", 0.0)), help="Model temperature") parser.add_argument("--num-ctx", type=int, default=int(os.getenv("OLLAMA_NUM_CTX", 8192)), help="Context window size") parser.add_argument("--top-p", type=float, default=float(os.getenv("OLLAMA_TOP_P", 1.0)), help="Top-p sampling") parser.add_argument("--repeat-penalty", type=float, default=float(os.getenv("OLLAMA_REPEAT_PENALTY", 1.0)), help="Repetition penalty") # Utility flags parser.add_argument("--clean", action="store_true", help="Delete and recreate the index") parser.add_argument("--dry-run", action="store_true", help="Only list files to be indexed") parser.add_argument("--verbose", action="store_true", help="Show detailed debug output") parser.add_argument("--ignore-file", help="Path to custom .codechatignore file") # Add quality parameters parser.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE, help="Text chunk size for processing") parser.add_argument("--chunk-overlap", type=int, default=DEFAULT_CHUNK_OVERLAP, help="Context overlap between chunks") parser.add_argument("--similarity-top-k", type=int, default=DEFAULT_SIMILARITY_TOP_K, help="Number of similar chunks to retrieve") parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, help="Query timeout in seconds") parser.add_argument("--max-retries", type=int, default=MAX_RETRIES, help="Number of retry attempts on timeout") args = parser.parse_args() # Handle global commands first if args.list_projects: list_projects(verbose=args.verbose) return if args.show_config: show_config(args) return if args.repair: repair_project(args.repair, verbose=args.verbose) return # Validate project-specific commands if not hasattr(args, 'project') or not args.project: print("❌ Project name is required for this action") parser.print_help() sys.exit(1) if not validate_project_name(args.project): print("❌ Invalid project name. Only alphanumeric, underscore and hyphen characters are allowed.") sys.exit(1) # Device selection if args.gpu and args.cpu: print("❌ Cannot force both GPU and CPU modes") sys.exit(1) device = get_device(force_cpu=args.cpu) if args.gpu and device != "cuda": print("⚠️ GPU requested but not available - falling back to CPU") device = "cpu" if args.verbose: print(f"\n⚙️ Configuration:") print(f" Device: {device.upper()} ({'✅ GPU' if device == 'cuda' else '⚠️ CPU'})") print(f" Model: {args.model}") print(f" Embed Model: {args.embed_model}") if hasattr(args, 'project'): print(f" Project: {args.project}") print(f" Index Location: {Path(INDEX_ROOT) / args.project}\n") # Handle project actions if args.analyze: analyze_project(args.project, args.verbose) elif args.reindex: build_index( project=args.project, codebase_path=Path(args.reindex), embed_model=args.embed_model, device=device, clean=args.clean, dry_run=args.dry_run, verbose=args.verbose, ignore_file_path=Path(args.ignore_file) if args.ignore_file else None, chunk_size=args.chunk_size, chunk_overlap=args.chunk_overlap ) else: chat( project=args.project, model=args.model, embed_model=args.embed_model, temperature=args.temperature, num_ctx=args.num_ctx, top_p=args.top_p, repeat_penalty=args.repeat_penalty, device=device, verbose=args.verbose, similarity_top_k=args.similarity_top_k, timeout=args.timeout, max_retries=args.max_retries, chunk_size=args.chunk_size, chunk_overlap=args.chunk_overlap ) if __name__ == "__main__": main()