@@ -0,0 +1,1055 @@ 
   
    
     
     
  
    #!/usr/bin/env python3  
 
    
     
     
  
    """  
 
    
     
     
  
    Codebase Chat CLI - GPU Accelerated  
 
    
     
     
  
     
 
    
     
     
  
    A command-line interface for interacting with codebases using local LLMs via Ollama.  
 
    
     
     
  
    Supports GPU acceleration for improved performance and ChromaDB for vector indexing.  
 
    
     
     
  
     
 
    
     
     
  
    Features:  
 
    
     
     
  
    - Vector index creation of source code files with ChromaDB and Ollama embeddings  
 
    
     
     
  
    - .codechatignore support for excluding files/folders  
 
    
     
     
  
    - Interactive querying of indexed codebases  
 
    
     
     
  
    - GPU and Apple Silicon acceleration (CUDA/MPS) for embeddings and chat  
 
    
     
     
  
    - Project management capabilities (indexing, analysis, listing)  
 
    
     
     
  
    - Multi-language support (Java, Kotlin, Python, JS, TS, Go, Rust, C++, etc.)  
 
    
     
     
  
    - Dry-run mode for previewing indexing operations  
 
    
     
     
  
     
 
    
     
     
  
    Environment Variables:  
 
    
     
     
  
    - OLLAMA_MODEL: Default chat model (e.g., "phi4:14b")  
 
    
     
     
  
    - OLLAMA_EMBED_MODEL: Embedding model (e.g., "nomic-embed-text")  
 
    
     
     
  
    - OLLAMA_URL: Ollama API endpoint (default: http://localhost:11434)  
 
    
     
     
  
    - INDEX_ROOT: Root directory for storing vector indexes  
 
    
     
     
  
    """  
 
    
     
     
  
     
 
    
     
     
  
    import  os  
 
    
     
     
  
    import  sys  
 
    
     
     
  
    import  argparse  
 
    
     
     
  
    import  shutil  
 
    
     
     
  
    import  time  
 
    
     
     
  
    import  re  
 
    
     
     
  
    import  chromadb  
 
    
     
     
  
    import  torch  
 
    
     
     
  
    from  pathlib  import  Path  
 
    
     
     
  
    from  typing  import  Optional , List , Dict , Any  
 
    
     
     
  
    from  dotenv  import  load_dotenv  
 
    
     
     
  
    from  pathspec  import  PathSpec  
 
    
     
     
  
    from  packaging  import  version  
 
    
     
     
  
     
 
    
     
     
  
    # Enhanced LlamaIndex imports  
 
    
     
     
  
    from  llama_index .core  import  VectorStoreIndex , SimpleDirectoryReader , StorageContext , ServiceContext  
 
    
     
     
  
    from  llama_index .core .settings  import  Settings  
 
    
     
     
  
    from  llama_index .core .node_parser  import  TokenTextSplitter  
 
    
     
     
  
    from  llama_index .core .prompts  import  PromptTemplate  
 
    
     
     
  
    from  llama_index .embeddings .ollama  import  OllamaEmbedding  
 
    
     
     
  
    from  llama_index .vector_stores .chroma  import  ChromaVectorStore  
 
    
     
     
  
    from  llama_index .llms .ollama  import  Ollama  
 
    
     
     
  
     
 
    
     
     
  
    # --- Configuration ---  
 
    
     
     
  
    load_dotenv () 
 
    
     
     
  
    DEFAULT_MODEL  =  os .getenv ("OLLAMA_MODEL" , "phi4:14b" ) 
 
    
     
     
  
    #DEFAULT_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")  
 
    
     
     
  
    DEFAULT_EMBED_MODEL  =  os .getenv ("OLLAMA_EMBED_MODEL" , "all-minilm" ) 
 
    
     
     
  
    DEFAULT_OLLAMA_URL  =  os .getenv ("OLLAMA_URL" , "http://localhost:11434" ) 
 
    
     
     
  
    INDEX_ROOT  =  os .getenv ("INDEX_ROOT" , os .path .expanduser ("~/.codechat/indexes" )) 
 
    
     
     
  
    ALLOWED_EXTENSIONS  =  {".java" , ".kt" , ".py" , ".js" , ".ts" , ".go" , ".rs" , ".cpp" , ".h" , ".xml" , ".properties" , ".yml" , 
 
    
     
     
  
                          ".md" }  
 
    
     
     
  
     
 
    
     
     
  
    # Quality Improvement Defaults  
 
    
     
     
  
    DEFAULT_CHUNK_SIZE  =  512  
 
    
     
     
  
    DEFAULT_CHUNK_OVERLAP  =  128  
 
    
     
     
  
    DEFAULT_SIMILARITY_TOP_K  =  3  
 
    
     
     
  
     
 
    
     
     
  
    # Enhanced QA Prompt  
 
    
     
     
  
    CODE_QA_PROMPT  =  PromptTemplate ("""  
 
    
     
     
  
    You are a senior developer analyzing this codebase. Provide:  
 
    
     
     
  
    1. Concise technical explanation  
 
    
     
     
  
    2. Relevant code snippets with source file references  
 
    
     
     
  
    3. Usage examples when appropriate  
 
    
     
     
  
    4. Any potential issues or caveats  
 
    
     
     
  
     
 
    
     
     
  
    Format your response in markdown with proper code blocks.  
 
    
     
     
  
     
 
    
     
     
  
    Context: {context_str}  
 
    
     
     
  
    Question: {query_str}  
 
    
     
     
  
     
 
    
     
     
  
    Answer:  
 
    
     
     
  
    """ ) 
 
    
     
     
  
     
 
    
     
     
  
    # Timeout settings (seconds)  
 
    
     
     
  
    DEFAULT_TIMEOUT  =  60  
 
    
     
     
  
    MAX_RETRIES  =  2  
 
    
     
     
  
     
 
    
     
     
  
    # Minimum required versions  
 
    
     
     
  
    MIN_CHROMADB_VERSION  =  "0.4.0"  
 
    
     
     
  
    MIN_TORCH_VERSION  =  "1.10.0"  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  validate_project_name (name : str ) ->  bool : 
 
    
     
     
  
        """   
 
    
     
     
  
        Validates a project name to ensure it is safe for use as a filesystem directory name.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            name (str): The project name to validate.  
 
    
     
     
  
     
 
    
     
     
  
        Returns:  
 
    
     
     
  
            bool: True if the name is valid (contains only letters, numbers, underscores, or hyphens), False otherwise.  
 
    
     
     
  
        """  
 
    
     
     
  
        if  not  name :  
 
    
     
     
  
            return  False   
 
    
     
     
  
        return  bool (re .match (r'^[a-zA-Z0-9_-]+$' , name ))  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  check_dependencies () ->  None : 
 
    
     
     
  
        """   
 
    
     
     
  
        Checks the versions of required dependencies and prints warnings if they  
 
    
     
     
  
        do not meet the minimum required versions.  
 
    
     
     
  
        """  
 
    
     
     
  
        try :  
 
    
     
     
  
            chroma_version  =  version .parse (chromadb .__version__ )  
 
    
     
     
  
            if  chroma_version  <  version .parse (MIN_CHROMADB_VERSION ):  
 
    
     
     
  
                print (f"⚠️ ChromaDB version { chromadb .__version__ }   is below minimum required { MIN_CHROMADB_VERSION }  " )  
 
    
     
     
  
     
 
    
     
     
  
            torch_version  =  version .parse (torch .__version__ )  
 
    
     
     
  
            if  torch_version  <  version .parse (MIN_TORCH_VERSION ):  
 
    
     
     
  
                print (f"⚠️ PyTorch version { torch .__version__ }   is below minimum required { MIN_TORCH_VERSION }  " )  
 
    
     
     
  
     
 
    
     
     
  
        except  Exception  as  e :  
 
    
     
     
  
            print (f"⚠️ Could not verify dependency versions: { str (e )}  " )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  get_device (force_cpu : bool  =  False ) ->  str : 
 
    
     
     
  
        """   
 
    
     
     
  
        Determines the most suitable compute device for processing.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            force_cpu (bool): If True, always return 'cpu' regardless of available hardware.  
 
    
     
     
  
     
 
    
     
     
  
        Returns:  
 
    
     
     
  
            str: The device to use ('cuda', 'mps', or 'cpu').  
 
    
     
     
  
        """  
 
    
     
     
  
        if  not  force_cpu  and  torch .cuda .is_available ():  
 
    
     
     
  
            return  "cuda"   
 
    
     
     
  
        elif  not  force_cpu  and  torch .backends .mps .is_available ():  
 
    
     
     
  
            return  "mps"   # Apple Silicon   
 
    
     
     
  
        return  "cpu"   
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  should_index_file (path : Path ) ->  bool : 
 
    
     
     
  
        """   
 
    
     
     
  
        Checks whether a given file should be indexed based on its file extension.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            path (Path): The file path to check.  
 
    
     
     
  
     
 
    
     
     
  
        Returns:  
 
    
     
     
  
            bool: True if the file extension is supported; False otherwise.  
 
    
     
     
  
        """  
 
    
     
     
  
        return  path .suffix .lower () in  ALLOWED_EXTENSIONS   
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  gather_files ( 
 
    
     
     
  
            codebase_path : Path ,  
 
    
     
     
  
            verbose : bool  =  False ,  
 
    
     
     
  
            ignore_file_path : Optional [Path ] =  None   
 
    
     
     
  
    ) ->  List [str ]:  
 
    
     
     
  
        """   
 
    
     
     
  
        Recursively collects file paths from a codebase directory, applying .codechatignore patterns if present.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            codebase_path (Path): Root directory of the codebase.  
 
    
     
     
  
            verbose (bool, optional): Enables detailed output during file collection. Defaults to False.  
 
    
     
     
  
            ignore_file_path (Optional[Path], optional): Custom path to a .codechatignore file.  
 
    
     
     
  
                If None, looks for .codechatignore in default locations. Defaults to None.  
 
    
     
     
  
     
 
    
     
     
  
        Returns:  
 
    
     
     
  
            List[str]: A list of string paths to source files eligible for indexing.  
 
    
     
     
  
        """  
 
    
     
     
  
        # Look for ignore files in priority order   
 
    
     
     
  
        possible_ignore_files  =  []  
 
    
     
     
  
        if  ignore_file_path :  
 
    
     
     
  
            possible_ignore_files .append (ignore_file_path )  
 
    
     
     
  
        possible_ignore_files .extend ([  
 
    
     
     
  
            Path .cwd () /  ".codechatignore" ,  
 
    
     
     
  
            codebase_path  /  ".codechatignore"   
 
    
     
     
  
        ])  
 
    
     
     
  
     
 
    
     
     
  
        spec  =  None   
 
    
     
     
  
        for  ignore_file  in  possible_ignore_files :  
 
    
     
     
  
            if  ignore_file .exists ():  
 
    
     
     
  
                if  verbose :  
 
    
     
     
  
                    print (f"🔍 Found .codechatignore at { ignore_file }  " )  
 
    
     
     
  
                with  ignore_file .open ("r" , encoding = "utf-8" ) as  f :  
 
    
     
     
  
                    patterns  =  [line .strip () for  line  in  f  if  line .strip () and  not  line .startswith ("#" )]  
 
    
     
     
  
                    if  verbose  and  patterns :  
 
    
     
     
  
                        print (f"📜 Ignore patterns: { patterns }  " )  
 
    
     
     
  
                    spec  =  PathSpec .from_lines ("gitwildmatch" , patterns )  
 
    
     
     
  
                break   
 
    
     
     
  
     
 
    
     
     
  
        files  =  []  
 
    
     
     
  
        for  p  in  codebase_path .rglob ("*" ):  
 
    
     
     
  
            if  not  p .is_file ():  
 
    
     
     
  
                continue   
 
    
     
     
  
     
 
    
     
     
  
            if  not  should_index_file (p ):  
 
    
     
     
  
                if  verbose :  
 
    
     
     
  
                    print (f"➖ Skipping (extension): { p }  " )  
 
    
     
     
  
                continue   
 
    
     
     
  
     
 
    
     
     
  
            try :  
 
    
     
     
  
                rel_path  =  p .relative_to (codebase_path ).as_posix ()  
 
    
     
     
  
                if  verbose :  
 
    
     
     
  
                    print (f"🔄 Testing path: { rel_path }  " )  
 
    
     
     
  
            except  ValueError :  
 
    
     
     
  
                if  verbose :  
 
    
     
     
  
                    print (f"⚠️ Path error: { p }  " )  
 
    
     
     
  
                continue   
 
    
     
     
  
     
 
    
     
     
  
            if  spec  and  spec .match_file (rel_path ):  
 
    
     
     
  
                if  verbose :  
 
    
     
     
  
                    print (f"🚫 Excluded by pattern: { rel_path }  " )  
 
    
     
     
  
                continue   
 
    
     
     
  
     
 
    
     
     
  
            files .append (str (p ))  
 
    
     
     
  
            if  verbose :  
 
    
     
     
  
                print (f"✅ Added: { p }  " )  
 
    
     
     
  
     
 
    
     
     
  
        return  files   
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  verify_metadata (index : VectorStoreIndex ) ->  bool : 
 
    
     
     
  
        """   
 
    
     
     
  
        Verifies that metadata is present for each node in the index.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            index (VectorStoreIndex): VectorStoreIndex instance to verify.  
 
    
     
     
  
     
 
    
     
     
  
        Returns:  
 
    
     
     
  
            bool: True if all nodes contain source metadata; False otherwise.  
 
    
     
     
  
        """  
 
    
     
     
  
        for  node_id , node  in  index .docstore .docs .items ():  
 
    
     
     
  
            if  not  node .metadata .get ('source_file' ):  
 
    
     
     
  
                print (f"⚠️ Missing source_file in node { node_id }  " )  
 
    
     
     
  
                return  False   
 
    
     
     
  
        return  True   
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  build_index ( 
 
    
     
     
  
            project : str ,  
 
    
     
     
  
            codebase_path : Path ,  
 
    
     
     
  
            embed_model : str ,  
 
    
     
     
  
            device : str ,  
 
    
     
     
  
            clean : bool  =  False ,  
 
    
     
     
  
            dry_run : bool  =  False ,  
 
    
     
     
  
            verbose : bool  =  False ,  
 
    
     
     
  
            ignore_file_path : Optional [Path ] =  None ,  
 
    
     
     
  
            chunk_size : int  =  DEFAULT_CHUNK_SIZE ,  
 
    
     
     
  
            chunk_overlap : int  =  DEFAULT_CHUNK_OVERLAP   
 
    
     
     
  
    ) ->  None :  
 
    
     
     
  
        """   
 
    
     
     
  
        Builds a ChromaDB-based vector index for the specified project.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            project (str): Project name for indexing.  
 
    
     
     
  
            codebase_path (Path): Path to the source code directory.  
 
    
     
     
  
            embed_model (str): The embedding model name for document vectorization.  
 
    
     
     
  
            device (str): Compute device identifier (e.g., 'cuda', 'cpu', 'mps').  
 
    
     
     
  
            clean (bool, optional): If True, deletes and rebuilds the index. Defaults to False.  
 
    
     
     
  
            dry_run (bool, optional): If True, only simulates the indexing process. Defaults to False.  
 
    
     
     
  
            verbose (bool, optional): Enables debug output. Defaults to False.  
 
    
     
     
  
            ignore_file_path (Optional[Path], optional): Custom .codechatignore path. Defaults to None.  
 
    
     
     
  
            chunk_size (int, optional): Maximum token chunk size for embedding. Defaults to DEFAULT_CHUNK_SIZE.  
 
    
     
     
  
            chunk_overlap (int, optional): Overlap between chunks. Defaults to DEFAULT_CHUNK_OVERLAP.  
 
    
     
     
  
     
 
    
     
     
  
        Raises:  
 
    
     
     
  
            SystemExit: If no indexable files are found.  
 
    
     
     
  
        """  
 
    
     
     
  
        project_index_path  =  Path (INDEX_ROOT ) /  project   
 
    
     
     
  
        if  clean :  
 
    
     
     
  
            shutil .rmtree (project_index_path , ignore_errors = True )  
 
    
     
     
  
        os .makedirs (project_index_path , exist_ok = True )  
 
    
     
     
  
     
 
    
     
     
  
        indexed_files  =  gather_files (codebase_path , verbose , ignore_file_path )  
 
    
     
     
  
        if  not  indexed_files :  
 
    
     
     
  
            print ("❌ No indexable files found." )  
 
    
     
     
  
            sys .exit (1 )  
 
    
     
     
  
     
 
    
     
     
  
        if  dry_run :  
 
    
     
     
  
            print (f"✅ Dry run complete (would index { len (indexed_files )}   files)" )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        # Document processing   
 
    
     
     
  
        node_parser  =  TokenTextSplitter (  
 
    
     
     
  
            chunk_size = chunk_size ,  
 
    
     
     
  
            chunk_overlap = chunk_overlap ,  
 
    
     
     
  
            separator = "\n "   
 
    
     
     
  
        )  
 
    
     
     
  
     
 
    
     
     
  
        documents  =  SimpleDirectoryReader (  
 
    
     
     
  
            input_files = indexed_files ,  
 
    
     
     
  
            file_metadata = lambda  x : {  
 
    
     
     
  
                'source_file' : str (Path (x ).absolute ()),  
 
    
     
     
  
                'file_path' : str (Path (x ).relative_to (codebase_path )),  
 
    
     
     
  
                'file_name' : Path (x ).name ,  
 
    
     
     
  
                'timestamp' : time .time ()  
 
    
     
     
  
            }  
 
    
     
     
  
        ).load_data ()  
 
    
     
     
  
     
 
    
     
     
  
        # Configure Settings instead of ServiceContext   
 
    
     
     
  
        Settings .llm  =  Ollama (model = DEFAULT_MODEL , base_url = DEFAULT_OLLAMA_URL )  
 
    
     
     
  
        Settings .embed_model  =  OllamaEmbedding (model_name = embed_model , device = device )  
 
    
     
     
  
        Settings .node_parser  =  node_parser   
 
    
     
     
  
        Settings .chunk_size  =  chunk_size   
 
    
     
     
  
        Settings .chunk_overlap  =  chunk_overlap   
 
    
     
     
  
     
 
    
     
     
  
        # Create index   
 
    
     
     
  
        chroma_client  =  chromadb .PersistentClient (path = str (project_index_path ))  
 
    
     
     
  
        vector_store  =  ChromaVectorStore (chroma_collection = chroma_client .get_or_create_collection (f"{ project }  _collection" ))  
 
    
     
     
  
     
 
    
     
     
  
        index  =  VectorStoreIndex .from_documents (  
 
    
     
     
  
            documents ,  
 
    
     
     
  
            storage_context = StorageContext .from_defaults (vector_store = vector_store ),  
 
    
     
     
  
            show_progress = verbose   
 
    
     
     
  
        )  
 
    
     
     
  
        index .storage_context .persist ()  
 
    
     
     
  
     
 
    
     
     
  
        if  not  verify_metadata (index ):  
 
    
     
     
  
            print ("❌ Metadata issues detected - some sources may show as Unknown" )  
 
    
     
     
  
     
 
    
     
     
  
        print (f"\n ✅ Index built with { len (indexed_files )}   files (chunk size: { chunk_size }  , overlap: { chunk_overlap }  )" )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  chat ( 
 
    
     
     
  
            project : str ,  
 
    
     
     
  
            model : str ,  
 
    
     
     
  
            embed_model : str ,  
 
    
     
     
  
            temperature : float ,  
 
    
     
     
  
            num_ctx : int ,  
 
    
     
     
  
            top_p : float ,  
 
    
     
     
  
            repeat_penalty : float ,  
 
    
     
     
  
            device : str ,  
 
    
     
     
  
            verbose : bool  =  False ,  
 
    
     
     
  
            similarity_top_k : int  =  DEFAULT_SIMILARITY_TOP_K ,  
 
    
     
     
  
            timeout : int  =  DEFAULT_TIMEOUT ,  
 
    
     
     
  
            max_retries : int  =  MAX_RETRIES ,  
 
    
     
     
  
            chunk_size : int  =  DEFAULT_CHUNK_SIZE ,  
 
    
     
     
  
            chunk_overlap : int  =  DEFAULT_CHUNK_OVERLAP   
 
    
     
     
  
    ) ->  None :  
 
    
     
     
  
        """   
 
    
     
     
  
        Start an interactive chat session with an indexed codebase.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            project (str): Name of the project to chat with.  
 
    
     
     
  
            model (str): Ollama model name to use for chat.  
 
    
     
     
  
            embed_model (str): Ollama embedding model name.  
 
    
     
     
  
            temperature (float): Controls randomness of responses (0.0-1.0).  
 
    
     
     
  
            num_ctx (int): Context window size in tokens.  
 
    
     
     
  
            top_p (float): Top-p sampling parameter.  
 
    
     
     
  
            repeat_penalty (float): Penalty for repeated tokens.  
 
    
     
     
  
            device (str): Compute device to use ('cuda', 'mps', or 'cpu').  
 
    
     
     
  
            verbose (bool, optional): If True, prints detailed progress information. Defaults to False.  
 
    
     
     
  
            similarity_top_k (int, optional): Number of similar chunks to retrieve. Defaults to DEFAULT_SIMILARITY_TOP_K.  
 
    
     
     
  
            timeout (int, optional): Query timeout in seconds. Defaults to DEFAULT_TIMEOUT.  
 
    
     
     
  
            max_retries (int, optional): Number of retry attempts on timeout. Defaults to MAX_RETRIES.  
 
    
     
     
  
            chunk_size (int, optional): Text chunk size for processing. Defaults to DEFAULT_CHUNK_SIZE.  
 
    
     
     
  
            chunk_overlap (int, optional): Context overlap between chunks. Defaults to DEFAULT_CHUNK_OVERLAP.  
 
    
     
     
  
     
 
    
     
     
  
        Raises:  
 
    
     
     
  
            SystemExit: If no index is found for the specified project.  
 
    
     
     
  
        """  
 
    
     
     
  
        project_index_path  =  Path (INDEX_ROOT ) /  project   
 
    
     
     
  
        if  not  project_index_path .exists ():  
 
    
     
     
  
            print (f"❌ No index found for '{ project }  '. Run with --reindex first." )  
 
    
     
     
  
            sys .exit (1 )  
 
    
     
     
  
     
 
    
     
     
  
        # Configure for quality responses   
 
    
     
     
  
        Settings .embed_model  =  OllamaEmbedding (  
 
    
     
     
  
            model_name = embed_model ,  
 
    
     
     
  
            base_url = DEFAULT_OLLAMA_URL ,  
 
    
     
     
  
            device = device   
 
    
     
     
  
        )  
 
    
     
     
  
        Settings .llm  =  Ollama (  
 
    
     
     
  
            model = model ,  
 
    
     
     
  
            base_url = DEFAULT_OLLAMA_URL ,  
 
    
     
     
  
            temperature = temperature ,  
 
    
     
     
  
            num_ctx = num_ctx ,  
 
    
     
     
  
            top_p = top_p ,  
 
    
     
     
  
            repeat_penalty = repeat_penalty ,  
 
    
     
     
  
            device = device ,  
 
    
     
     
  
            request_timeout = timeout   
 
    
     
     
  
        )  
 
    
     
     
  
     
 
    
     
     
  
        # Quality-optimized query engine   
 
    
     
     
  
        chroma_client  =  chromadb .PersistentClient (path = str (project_index_path ))  
 
    
     
     
  
        vector_store  =  ChromaVectorStore (chroma_collection = chroma_client .get_collection (f"{ project }  _collection" ))  
 
    
     
     
  
        index  =  VectorStoreIndex .from_vector_store (vector_store )  
 
    
     
     
  
     
 
    
     
     
  
        query_engine  =  index .as_query_engine (  
 
    
     
     
  
            similarity_top_k = similarity_top_k ,  
 
    
     
     
  
            include_metadata = True ,  
 
    
     
     
  
            metadata_fields = ['source_file' , 'file_name' , 'file_path' ],  
 
    
     
     
  
            vector_store_query_mode = "hybrid" ,  
 
    
     
     
  
            response_mode = "tree_summarize" ,  
 
    
     
     
  
            text_qa_template = CODE_QA_PROMPT ,  
 
    
     
     
  
            verbose = verbose ,  
 
    
     
     
  
            timeout = timeout ,  
 
    
     
     
  
            retry_on_timeout = True ,  
 
    
     
     
  
            max_retries = max_retries   
 
    
     
     
  
        )  
 
    
     
     
  
     
 
    
     
     
  
        sample_embedding  =  Settings .embed_model .get_text_embedding ("sample code class" )  
 
    
     
     
  
        print (f"Embedding dimension: { len (sample_embedding )}  " )  
 
    
     
     
  
     
 
    
     
     
  
        def  debug_index_metadata (index : VectorStoreIndex , verbose : bool  =  True ) ->  None :  
 
    
     
     
  
            """Debug function to check what metadata exists in the index.   
 
    
     
     
  
     
 
    
     
     
  
            Args:  
 
    
     
     
  
                index (VectorStoreIndex): The index to debug.  
 
    
     
     
  
                verbose (bool, optional): If True, prints detailed information. Defaults to True.  
 
    
     
     
  
            """  
 
    
     
     
  
            if  not  verbose :  
 
    
     
     
  
                return   
 
    
     
     
  
     
 
    
     
     
  
            print ("\n 🔍 Debugging index metadata:" )  
 
    
     
     
  
            try :  
 
    
     
     
  
                collection  =  index ._vector_store ._collection   
 
    
     
     
  
                print (f"Collection name: { collection .name }  " )  
 
    
     
     
  
                print (f"Total vectors: { collection .count ()}  " )  
 
    
     
     
  
     
 
    
     
     
  
                # Get sample items with metadata   
 
    
     
     
  
                items  =  collection .get (limit = 3 , include = ["metadatas" , "documents" ])  
 
    
     
     
  
     
 
    
     
     
  
                if  items  and  "metadatas"  in  items :  
 
    
     
     
  
                    print ("\n Sample metadata found:" )  
 
    
     
     
  
                    for  i , (meta , doc ) in  enumerate (zip (items ["metadatas" ], items ["documents" ][:3 ])):  
 
    
     
     
  
                        print (f"  { i  +  1 }  . Metadata: { meta }  " )  
 
    
     
     
  
                        print (f"     First 50 chars: { doc [:50 ]}  ...\n " )  
 
    
     
     
  
                else :  
 
    
     
     
  
                    print ("⚠️ No metadata found in collection" )  
 
    
     
     
  
     
 
    
     
     
  
            except  Exception  as  e :  
 
    
     
     
  
                print (f"⚠️ Error checking metadata: { str (e )}  " )  
 
    
     
     
  
                print ("Trying alternative access method..." )  
 
    
     
     
  
                try :  
 
    
     
     
  
                    # Alternative way to check nodes   
 
    
     
     
  
                    nodes  =  index .docstore .docs   
 
    
     
     
  
                    print (f"\n Found { len (nodes )}   nodes in docstore" )  
 
    
     
     
  
                    for  node_id , node  in  list (nodes .items ())[:3 ]:  
 
    
     
     
  
                        print (f"Node { node_id }  :" )  
 
    
     
     
  
                        print (f"  Metadata: { node .metadata }  " )  
 
    
     
     
  
                        print (f"  Text: { node .text [:50 ]}  ...\n " )  
 
    
     
     
  
                except  Exception  as  e2 :  
 
    
     
     
  
                    print (f"⚠️ Couldn't access docstore either: { str (e2 )}  " )  
 
    
     
     
  
     
 
    
     
     
  
        print ("\n 🔎 Verifying index structure..." )  
 
    
     
     
  
        debug_index_metadata (index , verbose = True )  
 
    
     
     
  
     
 
    
     
     
  
        # Additional verification   
 
    
     
     
  
        print ("\n 🔍 Index Verification:" )  
 
    
     
     
  
        try :  
 
    
     
     
  
            print (f"- Vectors: { index ._vector_store ._collection .count ()}  " )  
 
    
     
     
  
            if  hasattr (index , 'docstore' ):  
 
    
     
     
  
                print (f"- Documents: { len (index .docstore .docs )}  " )  
 
    
     
     
  
            else :  
 
    
     
     
  
                print ("- Docstore: Not available (normal for ChromaDB)" )  
 
    
     
     
  
        except  Exception  as  e :  
 
    
     
     
  
            print (f"⚠️ Verification note: { str (e )}  " )  
 
    
     
     
  
     
 
    
     
     
  
        debug_index_metadata (index , verbose = True )  
 
    
     
     
  
     
 
    
     
     
  
        # Response enhancement functions   
 
    
     
     
  
        def  enhance_query (query : str ) ->  str :  
 
    
     
     
  
            """Add context based on query type to get better responses.   
 
    
     
     
  
     
 
    
     
     
  
            Args:  
 
    
     
     
  
                query (str): The original user query.  
 
    
     
     
  
     
 
    
     
     
  
            Returns:  
 
    
     
     
  
                str: The enhanced query with additional context.  
 
    
     
     
  
            """  
 
    
     
     
  
            query  =  query .strip ()  
 
    
     
     
  
            lower_query  =  query .lower ()  
 
    
     
     
  
     
 
    
     
     
  
            # Module/package queries   
 
    
     
     
  
            if  "module"  in  query  or  "package"  in  query :  
 
    
     
     
  
                return  ("List all Java modules/packages with their relative paths, "   
 
    
     
     
  
                        "main classes, and 1-2 sentence descriptions. "   
 
    
     
     
  
                        "Include the module's purpose and key features." )  
 
    
     
     
  
     
 
    
     
     
  
            # Explanation queries   
 
    
     
     
  
            elif  any (q_word  in  lower_query  for  q_word  in  ["how" , "why" , "explain" ]):  
 
    
     
     
  
                return  f"{ query }   (provide detailed explanation with code references)"   
 
    
     
     
  
     
 
    
     
     
  
            # Example queries   
 
    
     
     
  
            elif  "example"  in  lower_query :  
 
    
     
     
  
                return  f"{ query }   (include practical usage examples)"   
 
    
     
     
  
     
 
    
     
     
  
            # Default case - return original query   
 
    
     
     
  
            return  query   
 
    
     
     
  
     
 
    
     
     
  
        def  format_response (response : Any ) ->  str :  
 
    
     
     
  
            """Formats the response with source references.   
 
    
     
     
  
     
 
    
     
     
  
            Args:  
 
    
     
     
  
                response (Any): The query response object.  
 
    
     
     
  
     
 
    
     
     
  
            Returns:  
 
    
     
     
  
                str: The formatted response text with sources.  
 
    
     
     
  
            """  
 
    
     
     
  
            text  =  response .response   
 
    
     
     
  
     
 
    
     
     
  
            # Source nodes handling   
 
    
     
     
  
            if  hasattr (response , 'source_nodes' ) and  response .source_nodes :  
 
    
     
     
  
                sources  =  []  
 
    
     
     
  
                for  node  in  response .source_nodes [:3 ]:  # Show top 3 sources   
 
    
     
     
  
                    source  =  node .metadata .get ('source_file' ) or  node .metadata .get ('file_path' , 'Unknown' )  
 
    
     
     
  
                    if  source  !=  'Unknown' :  
 
    
     
     
  
                        try :  
 
    
     
     
  
                            # First try making it relative to INDEX_ROOT   
 
    
     
     
  
                            source  =  str (Path (source ).relative_to (INDEX_ROOT ))  
 
    
     
     
  
                        except  ValueError :  
 
    
     
     
  
                            try :  
 
    
     
     
  
                                # If that fails, just show the filename   
 
    
     
     
  
                                source  =  Path (source ).name   
 
    
     
     
  
                            except :  
 
    
     
     
  
                                source  =  "Unknown path"   
 
    
     
     
  
                    sources .append (f"- { source }   (score: { node .score :.2f}  )" )  
 
    
     
     
  
     
 
    
     
     
  
                text  +=  "\n \n 🔍 Sources:\n "  +  "\n " .join (sources )  
 
    
     
     
  
     
 
    
     
     
  
            return  text   
 
    
     
     
  
     
 
    
     
     
  
        # Interactive chat loop   
 
    
     
     
  
        print (f"\n 💬 Chatting with { project }   (Enhanced Mode)" )  
 
    
     
     
  
        print ("Type 'exit' or press Ctrl+C to quit\n " )  
 
    
     
     
  
     
 
    
     
     
  
        # Show optimization tips if settings might cause performance issues   
 
    
     
     
  
        optimization_params  =  {  
 
    
     
     
  
            'timeout' : timeout ,  
 
    
     
     
  
            'chunk_size' : chunk_size ,  
 
    
     
     
  
            'chunk_overlap' : chunk_overlap ,  
 
    
     
     
  
            'similarity_top_k' : similarity_top_k ,  
 
    
     
     
  
            'model' : model   
 
    
     
     
  
        }  
 
    
     
     
  
        tips  =  get_optimization_tips (optimization_params )  
 
    
     
     
  
        if  tips :  
 
    
     
     
  
            print ("\n 💡 Performance Tips:" )  
 
    
     
     
  
            for  tip  in  tips :  
 
    
     
     
  
                print (f"  - { tip }  " )  
 
    
     
     
  
            print ()  
 
    
     
     
  
     
 
    
     
     
  
        while  True :  
 
    
     
     
  
            try :  
 
    
     
     
  
                question  =  input ("🤖 > " ).strip ()  
 
    
     
     
  
                if  question .lower () in  {"exit" , "quit" }:  
 
    
     
     
  
                    break   
 
    
     
     
  
     
 
    
     
     
  
                start_time  =  time .time ()  
 
    
     
     
  
                try :  
 
    
     
     
  
                    response  =  query_engine .query (enhance_query (question ))  
 
    
     
     
  
                    print (f"\n { format_response (response )}  " )  
 
    
     
     
  
     
 
    
     
     
  
                    # DEBUG: Show raw source nodes   
 
    
     
     
  
                    if  hasattr (response , 'source_nodes' ):  
 
    
     
     
  
                        print ("\n 🔍 DEBUG - Source Nodes:" )  
 
    
     
     
  
                        for  i , node  in  enumerate (response .source_nodes [:3 ]):  
 
    
     
     
  
                            print (f"Node { i  +  1 }  :" )  
 
    
     
     
  
                            print (f"  Score: { node .score }  " )  
 
    
     
     
  
                            try :  
 
    
     
     
  
                                print (f"  Path: { node .metadata .get ('file_path' )}  " )  
 
    
     
     
  
                                print (f"  Source: { node .metadata .get ('source_file' )}  " )  
 
    
     
     
  
                            except  Exception  as  e :  
 
    
     
     
  
                                print (f"  Metadata error: { str (e )}  " )  
 
    
     
     
  
                            print (f"  Text: { node .text [:100 ]}  ..." )  
 
    
     
     
  
                except  Exception  as  e :  
 
    
     
     
  
                    if  "timeout"  in  str (e ).lower ():  
 
    
     
     
  
                        print ("\n ⏱️ The query timed out. Try:" )  
 
    
     
     
  
                        print ("- Asking a more specific question" )  
 
    
     
     
  
                        print (f"- Increasing timeout (current: { timeout }  s)" )  
 
    
     
     
  
                        print (f"- Reducing chunk size (current: { chunk_size }  )" )  
 
    
     
     
  
                    else :  
 
    
     
     
  
                        print (f"\n ❌ Query Error: { str (e )}  " )  
 
    
     
     
  
     
 
    
     
     
  
                print (f"\n ⏱️  Response time: { time .time () -  start_time :.2f}  s" )  
 
    
     
     
  
     
 
    
     
     
  
            except  KeyboardInterrupt :  
 
    
     
     
  
                print ("\n 👋 Exiting..." )  
 
    
     
     
  
                break   
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  list_projects (verbose : bool  =  False ) ->  None : 
 
    
     
     
  
        """   
 
    
     
     
  
        Display all indexed projects with accurate status.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            verbose (bool, optional): If True, shows additional details about each project. Defaults to False.  
 
    
     
     
  
        """  
 
    
     
     
  
        index_root_path  =  Path (INDEX_ROOT )  
 
    
     
     
  
        if  not  index_root_path .exists ():  
 
    
     
     
  
            print ("No projects indexed yet." )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        print ("📂 Indexed Projects:" )  
 
    
     
     
  
        for  project_dir  in  sorted (index_root_path .iterdir ()):  
 
    
     
     
  
            if  project_dir .is_dir ():  
 
    
     
     
  
                status  =  "❌"   
 
    
     
     
  
                size_info  =  "unknown"   
 
    
     
     
  
     
 
    
     
     
  
                try :  
 
    
     
     
  
                    client  =  chromadb .PersistentClient (path = str (project_dir ))  
 
    
     
     
  
                    collections  =  client .list_collections ()  
 
    
     
     
  
     
 
    
     
     
  
                    if  collections :  
 
    
     
     
  
                        # Find matching collection   
 
    
     
     
  
                        for  col  in  collections :  
 
    
     
     
  
                            if  col .name  ==  project_dir .name  or  col .name  ==  f"{ project_dir .name }  _collection" :  
 
    
     
     
  
                                count  =  col .count ()  
 
    
     
     
  
                                size_info  =  f"{ count }   vectors"   
 
    
     
     
  
                                status  =  "✅"   
 
    
     
     
  
                                break   
 
    
     
     
  
     
 
    
     
     
  
                except  Exception  as  e :  
 
    
     
     
  
                    if  verbose :  
 
    
     
     
  
                        print (f"⚠️ Error checking { project_dir .name }  : { str (e )}  " )  
 
    
     
     
  
     
 
    
     
     
  
                print (f"  - { project_dir .name }   { status }   ({ size_info }  )" )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  show_config (args : argparse .Namespace ) ->  None : 
 
    
     
     
  
        """   
 
    
     
     
  
        Display the current configuration including hardware and model settings.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            args (argparse.Namespace): Parsed command-line arguments.  
 
    
     
     
  
        """  
 
    
     
     
  
        device  =  get_device (force_cpu = args .cpu )  
 
    
     
     
  
        gpu_type  =  "None"   
 
    
     
     
  
        if  device  ==  "cuda" :  
 
    
     
     
  
            gpu_type  =  torch .cuda .get_device_name (0 )  
 
    
     
     
  
        elif  device  ==  "mps" :  
 
    
     
     
  
            gpu_type  =  "Apple Silicon (MPS)"   
 
    
     
     
  
     
 
    
     
     
  
        print ("⚙️ Current Configuration:" )  
 
    
     
     
  
        print (f"  Project:          { args .project  if  hasattr (args , 'project' ) else  'N/A' }  " )  
 
    
     
     
  
        print (f"  Model:            { args .model }  " )  
 
    
     
     
  
        print (f"  Embed Model:      { args .embed_model }  " )  
 
    
     
     
  
        print (f"  Device:           { device .upper ()}   ({ gpu_type }  )" )  
 
    
     
     
  
        print (f"  Temperature:      { args .temperature }  " )  
 
    
     
     
  
        print (f"  Context Window:   { args .num_ctx }   tokens" )  
 
    
     
     
  
        print ("\n 🛠️  Paths:" )  
 
    
     
     
  
        print (f"  Index Root:       { INDEX_ROOT }  " )  
 
    
     
     
  
        print (f"  Ollama URL:       { DEFAULT_OLLAMA_URL }  " )  
 
    
     
     
  
     
 
    
     
     
  
        # Show ignore file info if available   
 
    
     
     
  
        ignore_locations  =  [  
 
    
     
     
  
            Path (args .ignore_file ) if  hasattr (args , 'ignore_file' ) and  args .ignore_file  else  None ,  
 
    
     
     
  
            Path .cwd () /  ".codechatignore" ,  
 
    
     
     
  
            Path (args .reindex ) /  ".codechatignore"  if  hasattr (args , 'reindex' ) and  args .reindex  else  None   
 
    
     
     
  
        ]  
 
    
     
     
  
     
 
    
     
     
  
        found  =  False   
 
    
     
     
  
        for  loc  in  ignore_locations :  
 
    
     
     
  
            if  loc  and  loc .exists ():  
 
    
     
     
  
                print (f"\n 🔍 Active .codechatignore at: { loc }  " )  
 
    
     
     
  
                with  open (loc , 'r' ) as  f :  
 
    
     
     
  
                    print ("   Ignore Patterns:" )  
 
    
     
     
  
                    for  line  in  f :  
 
    
     
     
  
                        line  =  line .strip ()  
 
    
     
     
  
                        if  line  and  not  line .startswith ("#" ):  
 
    
     
     
  
                            print (f"     - { line }  " )  
 
    
     
     
  
                found  =  True   
 
    
     
     
  
                break   
 
    
     
     
  
     
 
    
     
     
  
        if  not  found :  
 
    
     
     
  
            print ("\n ⚠️ No .codechatignore file found" )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  analyze_project (project : str , verbose : bool  =  False ) ->  None : 
 
    
     
     
  
        """   
 
    
     
     
  
        Display detailed analytics about an indexed project.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            project (str): Name of the project to analyze.  
 
    
     
     
  
            verbose (bool, optional): If True, shows additional storage details. Defaults to False.  
 
    
     
     
  
     
 
    
     
     
  
        Raises:  
 
    
     
     
  
            None: This function handles errors gracefully and prints messages instead of raising exceptions.  
 
    
     
     
  
        """  
 
    
     
     
  
        project_path  =  Path (INDEX_ROOT ) /  project   
 
    
     
     
  
        if  not  project_path .exists ():  
 
    
     
     
  
            print (f"❌ Project '{ project }  ' not found" )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        print (f"\n 📊 Analysis for '{ project }  ':" )  
 
    
     
     
  
        print ("─"  *  50 )  
 
    
     
     
  
     
 
    
     
     
  
        # 1. Enhanced ChromaDB Stats   
 
    
     
     
  
        try :  
 
    
     
     
  
            client  =  chromadb .PersistentClient (path = str (project_path ))  
 
    
     
     
  
            collection  =  client .get_collection (f"{ project }  _collection" )  
 
    
     
     
  
     
 
    
     
     
  
            # Count vectors and their distribution   
 
    
     
     
  
            count  =  collection .count ()  
 
    
     
     
  
            metadata  =  collection .get (include = ["metadatas" ])  
 
    
     
     
  
            file_types  =  {}  
 
    
     
     
  
            file_sizes  =  {}  
 
    
     
     
  
     
 
    
     
     
  
            if  metadata  and  "metadatas"  in  metadata :  
 
    
     
     
  
                for  item  in  metadata ["metadatas" ]:  
 
    
     
     
  
                    if  item  and  isinstance (item , dict ) and  "file_path"  in  item :  
 
    
     
     
  
                        try :  
 
    
     
     
  
                            ext  =  Path (item ["file_path" ]).suffix .lower ()  
 
    
     
     
  
                            file_types [ext ] =  file_types .get (ext , 0 ) +  1   
 
    
     
     
  
     
 
    
     
     
  
                            # Get file size if available   
 
    
     
     
  
                            if  "file_size"  in  item :  
 
    
     
     
  
                                file_sizes [ext ] =  file_sizes .get (ext , 0 ) +  int (item ["file_size" ])  
 
    
     
     
  
                        except  (TypeError , AttributeError ) as  e :  
 
    
     
     
  
                            if  verbose :  
 
    
     
     
  
                                print (f"⚠️ Could not process metadata item: { str (e )}  " )  
 
    
     
     
  
                            continue   
 
    
     
     
  
     
 
    
     
     
  
            print ("\n 📈 Embedding Statistics:" )  
 
    
     
     
  
            print (f"  - Total vectors: { count }  " )  
 
    
     
     
  
            if  file_types :  
 
    
     
     
  
                print ("  - File type distribution:" )  
 
    
     
     
  
                for  ext , num  in  sorted (file_types .items (), key = lambda  x : x [1 ], reverse = True ):  
 
    
     
     
  
                    size_info  =  ""   
 
    
     
     
  
                    if  ext  in  file_sizes :  
 
    
     
     
  
                        size_info  =  f" ({ file_sizes [ext ] /  1024 :.1f}   KB total)"   
 
    
     
     
  
                    print (f"    - { ext  if  ext  else  'no-extension' }  : { num }   vectors{ size_info }  " )  
 
    
     
     
  
     
 
    
     
     
  
        except  Exception  as  e :  
 
    
     
     
  
            print (f"⚠️ Couldn't read ChromaDB collection: { str (e )}  " )  
 
    
     
     
  
            if  "truth value of an array"  in  str (e ):  
 
    
     
     
  
                print ("💡 Try upgrading ChromaDB: pip install --upgrade chromadb numpy" )  
 
    
     
     
  
     
 
    
     
     
  
        # 2. Storage Analysis   
 
    
     
     
  
        try :  
 
    
     
     
  
            total_size  =  sum (f .stat ().st_size  for  f  in  project_path .glob ('**/*' ) if  f .is_file ())  
 
    
     
     
  
            print ("\n 💾 Storage Usage:" )  
 
    
     
     
  
            print (f"  - Index size: { total_size  /  1024  /  1024 :.2f}   MB" )  
 
    
     
     
  
            print (f"  - Files: { len (list (project_path .glob ('**/*' )))}  " )  
 
    
     
     
  
     
 
    
     
     
  
            if  verbose :  
 
    
     
     
  
                print ("\n 🔍 Detailed Storage Breakdown:" )  
 
    
     
     
  
                for  item  in  project_path .iterdir ():  
 
    
     
     
  
                    if  item .is_file ():  
 
    
     
     
  
                        print (f"  - { item .name }  : { item .stat ().st_size  /  1024 :.1f}   KB" )  
 
    
     
     
  
                    elif  item .is_dir ():  
 
    
     
     
  
                        dir_size  =  sum (f .stat ().st_size  for  f  in  item .glob ('**/*' ) if  f .is_file ())  
 
    
     
     
  
                        print (f"  - { item .name }  /: { dir_size  /  1024 :.1f}   KB" )  
 
    
     
     
  
     
 
    
     
     
  
        except  Exception  as  e :  
 
    
     
     
  
            print (f"⚠️ Couldn't analyze storage: { str (e )}  " )  
 
    
     
     
  
     
 
    
     
     
  
        # 3. Health Check - Updated for ChromaDB v0.4+ format   
 
    
     
     
  
        print ("\n 🩺 Health Check:" )  
 
    
     
     
  
        healthy  =  True   
 
    
     
     
  
     
 
    
     
     
  
        # Required files for ChromaDB v0.4+   
 
    
     
     
  
        required_files  =  {  
 
    
     
     
  
            "chroma.sqlite3" : "SQLite database" ,  
 
    
     
     
  
        }  
 
    
     
     
  
     
 
    
     
     
  
        # Optional files   
 
    
     
     
  
        optional_files  =  {  
 
    
     
     
  
            "chroma_settings.json" : "Settings file" ,  
 
    
     
     
  
            "chroma-embeddings.parquet" : "Embeddings data (legacy)"   
 
    
     
     
  
        }  
 
    
     
     
  
     
 
    
     
     
  
        # Check required files   
 
    
     
     
  
        for  file , desc  in  required_files .items ():  
 
    
     
     
  
            if  (project_path  /  file ).exists ():  
 
    
     
     
  
                print (f"  - ✅ { desc }   present" )  
 
    
     
     
  
            else :  
 
    
     
     
  
                print (f"  - ❌ { desc }   missing!" )  
 
    
     
     
  
                healthy  =  False   
 
    
     
     
  
     
 
    
     
     
  
        # Check optional files   
 
    
     
     
  
        for  file , desc  in  optional_files .items ():  
 
    
     
     
  
            if  (project_path  /  file ).exists ():  
 
    
     
     
  
                print (f"  - ☑️ { desc }   present" )  
 
    
     
     
  
            else :  
 
    
     
     
  
                print (f"  - ⚠️ { desc }   not found (optional)" )  
 
    
     
     
  
     
 
    
     
     
  
        # Check collection exists and is accessible   
 
    
     
     
  
        try :  
 
    
     
     
  
            client  =  chromadb .PersistentClient (path = str (project_path ))  
 
    
     
     
  
            collection  =  client .get_collection (f"{ project }  _collection" )  
 
    
     
     
  
            print (f"  - ✅ Collection accessible ({ collection .count ()}   vectors)" )  
 
    
     
     
  
        except  Exception  as  e :  
 
    
     
     
  
            print (f"  - ❌ Collection error: { str (e )}  " )  
 
    
     
     
  
            healthy  =  False   
 
    
     
     
  
     
 
    
     
     
  
        print (f"\n { '✅ Index is healthy'  if  healthy  else  '❌ Index has issues!' }  " )  
 
    
     
     
  
        print ("─"  *  50 )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  repair_project (project : str , verbose : bool  =  False ) ->  None : 
 
    
     
     
  
        """   
 
    
     
     
  
        Attempt to repair a potentially corrupted index.  
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            project (str): Name of the project to repair.  
 
    
     
     
  
            verbose (bool, optional): If True, shows additional repair details. Defaults to False.  
 
    
     
     
  
        """  
 
    
     
     
  
        project_path  =  Path (INDEX_ROOT ) /  project   
 
    
     
     
  
        if  not  project_path .exists ():  
 
    
     
     
  
            print (f"❌ Project directory '{ project }  ' not found" )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        print (f"\n 🔧 Repairing project '{ project }  '..." )  
 
    
     
     
  
     
 
    
     
     
  
        try :  
 
    
     
     
  
            client  =  chromadb .PersistentClient (path = str (project_path ))  
 
    
     
     
  
     
 
    
     
     
  
            # ChromaDB uses different collection naming in newer versions   
 
    
     
     
  
            collections  =  client .list_collections ()  
 
    
     
     
  
            if  not  collections :  
 
    
     
     
  
                raise  ValueError ("No collections found in project directory" )  
 
    
     
     
  
     
 
    
     
     
  
            # Try both naming conventions   
 
    
     
     
  
            collection_name  =  None   
 
    
     
     
  
            for  col  in  collections :  
 
    
     
     
  
                if  col .name  ==  project  or  col .name  ==  f"{ project }  _collection" :  
 
    
     
     
  
                    collection_name  =  col .name   
 
    
     
     
  
                    break   
 
    
     
     
  
     
 
    
     
     
  
            if  not  collection_name :  
 
    
     
     
  
                raise  ValueError (f"No matching collection found (tried: '{ project }  ', '{ project }  _collection')" )  
 
    
     
     
  
     
 
    
     
     
  
            if  verbose :  
 
    
     
     
  
                print (f"🔄 Found collection: { collection_name }  " )  
 
    
     
     
  
     
 
    
     
     
  
            collection  =  client .get_collection (collection_name )  
 
    
     
     
  
            count  =  collection .count ()  
 
    
     
     
  
     
 
    
     
     
  
            print (f"\n ✅ Repair successful - project is healthy" )  
 
    
     
     
  
            print (f"   Collection: { collection_name }  " )  
 
    
     
     
  
            print (f"   Total vectors: { count }  " )  
 
    
     
     
  
     
 
    
     
     
  
        except  Exception  as  e :  
 
    
     
     
  
            print (f"\n ❌ Repair failed: { str (e )}  " )  
 
    
     
     
  
            print ("\n Recommended solutions:" )  
 
    
     
     
  
            print (f"1. Clean reindex: --project { project }   --reindex /path/to/code --clean" )  
 
    
     
     
  
            print (f"2. Manual repair steps:" )  
 
    
     
     
  
            print (f"   - Delete directory: { project_path }  " )  
 
    
     
     
  
            print (f"   - Check collection name in: { project_path }  /chroma.sqlite3" )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  get_optimization_tips (params : Dict [str , Any ]) ->  List [str ]: 
 
    
     
     
  
        """Generate performance optimization suggestions based on current parameters.   
 
    
     
     
  
     
 
    
     
     
  
        Args:  
 
    
     
     
  
            params (Dict[str, Any]): Dictionary of current configuration parameters.  
 
    
     
     
  
     
 
    
     
     
  
        Returns:  
 
    
     
     
  
            List[str]: List of optimization tips.  
 
    
     
     
  
        """  
 
    
     
     
  
        tips  =  []  
 
    
     
     
  
     
 
    
     
     
  
        # Timeout-related tips   
 
    
     
     
  
        if  params ['timeout' ] <  30 :  
 
    
     
     
  
            tips .append (f"Increase timeout (current: { params ['timeout' ]}  s)" )  
 
    
     
     
  
     
 
    
     
     
  
        # Chunking-related tips   
 
    
     
     
  
        if  params ['chunk_size' ] >  768 :  
 
    
     
     
  
            tips .append (f"Reduce chunk size (current: { params ['chunk_size' ]}  )" )  
 
    
     
     
  
     
 
    
     
     
  
        if  params ['chunk_overlap' ] >  128 :  
 
    
     
     
  
            tips .append (f"Reduce chunk overlap (current: { params ['chunk_overlap' ]}  )" )  
 
    
     
     
  
     
 
    
     
     
  
        # Retrieval-related tips   
 
    
     
     
  
        if  params ['similarity_top_k' ] >  3 :  
 
    
     
     
  
            tips .append (f"Reduce retrieved chunks (current: { params ['similarity_top_k' ]}  )" )  
 
    
     
     
  
     
 
    
     
     
  
        # Model-related tips   
 
    
     
     
  
        if  "34b"  in  params ['model' ] or  "70b"  in  params ['model' ]:  
 
    
     
     
  
            tips .append (f"Try smaller model (current: { params ['model' ]}  )" )  
 
    
     
     
  
     
 
    
     
     
  
        return  tips   
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    def  main (): 
 
    
     
     
  
        """Entry point for the Codebase Chat CLI application.   
 
    
     
     
  
     
 
    
     
     
  
        Handles command-line arguments and orchestrates the main application flow including:  
 
    
     
     
  
        - Dependency checks  
 
    
     
     
  
        - Project management (listing, analyzing, repairing)  
 
    
     
     
  
        - Indexing operations  
 
    
     
     
  
        - Chat functionality  
 
    
     
     
  
     
 
    
     
     
  
        Command Line Arguments:  
 
    
     
     
  
            --project PROJECT_NAME       : Specifies project to operate on (for chat/reindex/repair)  
 
    
     
     
  
            --list-projects             : Lists all indexed projects  
 
    
     
     
  
            --show-config               : Displays current configuration  
 
    
     
     
  
            --repair PROJECT            : Attempts to repair a corrupted index  
 
    
     
     
  
            --reindex PATH             : Path to codebase to index  
 
    
     
     
  
            --analyze                  : Shows detailed project analysis  
 
    
     
     
  
            --model MODEL_NAME          : Specifies Ollama model to use (default: DEFAULT_MODEL)  
 
    
     
     
  
            --embed-model EMBED_MODEL   : Specifies Ollama embedding model (default: DEFAULT_EMBED_MODEL)  
 
    
     
     
  
            --cpu                       : Forces CPU mode  
 
    
     
     
  
            --gpu                       : Forces GPU mode if available  
 
    
     
     
  
            --temperature FLOAT         : Sets model temperature (default: 0.0)  
 
    
     
     
  
            --num-ctx INT               : Sets context window size (default: 8192)  
 
    
     
     
  
            --top-p FLOAT               : Sets top-p sampling value (default: 1.0)  
 
    
     
     
  
            --repeat-penalty FLOAT      : Sets repetition penalty (default: 1.0)  
 
    
     
     
  
            --clean                     : Deletes and recreates the index  
 
    
     
     
  
            --dry-run                   : Only lists files to be indexed  
 
    
     
     
  
            --verbose                  : Shows detailed debug output  
 
    
     
     
  
            --ignore-file PATH          : Path to custom .codechatignore file  
 
    
     
     
  
            --chunk-size INT           : Text chunk size for processing (default: DEFAULT_CHUNK_SIZE)  
 
    
     
     
  
            --chunk-overlap INT         : Context overlap between chunks (default: DEFAULT_CHUNK_OVERLAP)  
 
    
     
     
  
            --similarity-top-k INT      : Number of similar chunks to retrieve (default: DEFAULT_SIMILARITY_TOP_K)  
 
    
     
     
  
            --timeout INT               : Query timeout in seconds (default: DEFAULT_TIMEOUT)  
 
    
     
     
  
            --max-retries INT           : Number of retry attempts on timeout (default: MAX_RETRIES)  
 
    
     
     
  
        """  
 
    
     
     
  
        # Check dependencies first   
 
    
     
     
  
        check_dependencies ()  
 
    
     
     
  
     
 
    
     
     
  
        parser  =  argparse .ArgumentParser (  
 
    
     
     
  
            description = "Quality-Enhanced Codebase Chat CLI" ,  
 
    
     
     
  
            formatter_class = argparse .ArgumentDefaultsHelpFormatter   
 
    
     
     
  
        )  
 
    
     
     
  
     
 
    
     
     
  
        # Create mutually exclusive group for main actions   
 
    
     
     
  
        action_group  =  parser .add_mutually_exclusive_group (required = True )  
 
    
     
     
  
        action_group .add_argument ("--project" , help = "Project name (for chat/reindex/repair)" )  
 
    
     
     
  
        action_group .add_argument ("--list-projects" , action = "store_true" ,  
 
    
     
     
  
                                  help = "List all indexed projects" )  
 
    
     
     
  
        action_group .add_argument ("--show-config" , action = "store_true" ,  
 
    
     
     
  
                                  help = "Show current configuration" )  
 
    
     
     
  
        action_group .add_argument ("--repair" , metavar = "PROJECT" ,  
 
    
     
     
  
                                  help = "Attempt to repair a corrupted index" )  
 
    
     
     
  
     
 
    
     
     
  
        # Project-specific arguments   
 
    
     
     
  
        parser .add_argument ("--reindex" , metavar = "PATH" , help = "Path to codebase to index" )  
 
    
     
     
  
        parser .add_argument ("--analyze" , action = "store_true" ,  
 
    
     
     
  
                            help = "Show detailed project analysis" )  
 
    
     
     
  
     
 
    
     
     
  
        # Model settings   
 
    
     
     
  
        parser .add_argument ("--model" , default = DEFAULT_MODEL , help = "Ollama model name" )  
 
    
     
     
  
        parser .add_argument ("--embed-model" , default = DEFAULT_EMBED_MODEL ,  
 
    
     
     
  
                            help = f"Ollama embedding model (default: { DEFAULT_EMBED_MODEL }  )" )  
 
    
     
     
  
     
 
    
     
     
  
        # Hardware control   
 
    
     
     
  
        parser .add_argument ("--cpu" , action = "store_true" , help = "Force CPU mode" )  
 
    
     
     
  
        parser .add_argument ("--gpu" , action = "store_true" , help = "Force GPU mode if available" )  
 
    
     
     
  
     
 
    
     
     
  
        # Performance tuning   
 
    
     
     
  
        parser .add_argument ("--temperature" , type = float ,  
 
    
     
     
  
                            default = float (os .getenv ("OLLAMA_TEMPERATURE" , 0.0 )),  
 
    
     
     
  
                            help = "Model temperature" )  
 
    
     
     
  
        parser .add_argument ("--num-ctx" , type = int ,  
 
    
     
     
  
                            default = int (os .getenv ("OLLAMA_NUM_CTX" , 8192 )),  
 
    
     
     
  
                            help = "Context window size" )  
 
    
     
     
  
        parser .add_argument ("--top-p" , type = float ,  
 
    
     
     
  
                            default = float (os .getenv ("OLLAMA_TOP_P" , 1.0 )),  
 
    
     
     
  
                            help = "Top-p sampling" )  
 
    
     
     
  
        parser .add_argument ("--repeat-penalty" , type = float ,  
 
    
     
     
  
                            default = float (os .getenv ("OLLAMA_REPEAT_PENALTY" , 1.0 )),  
 
    
     
     
  
                            help = "Repetition penalty" )  
 
    
     
     
  
     
 
    
     
     
  
        # Utility flags   
 
    
     
     
  
        parser .add_argument ("--clean" , action = "store_true" ,  
 
    
     
     
  
                            help = "Delete and recreate the index" )  
 
    
     
     
  
        parser .add_argument ("--dry-run" , action = "store_true" ,  
 
    
     
     
  
                            help = "Only list files to be indexed" )  
 
    
     
     
  
        parser .add_argument ("--verbose" , action = "store_true" ,  
 
    
     
     
  
                            help = "Show detailed debug output" )  
 
    
     
     
  
        parser .add_argument ("--ignore-file" ,  
 
    
     
     
  
                            help = "Path to custom .codechatignore file" )  
 
    
     
     
  
     
 
    
     
     
  
        # Add quality parameters   
 
    
     
     
  
        parser .add_argument ("--chunk-size" , type = int , default = DEFAULT_CHUNK_SIZE ,  
 
    
     
     
  
                            help = "Text chunk size for processing" )  
 
    
     
     
  
        parser .add_argument ("--chunk-overlap" , type = int , default = DEFAULT_CHUNK_OVERLAP ,  
 
    
     
     
  
                            help = "Context overlap between chunks" )  
 
    
     
     
  
        parser .add_argument ("--similarity-top-k" , type = int , default = DEFAULT_SIMILARITY_TOP_K ,  
 
    
     
     
  
                            help = "Number of similar chunks to retrieve" )  
 
    
     
     
  
        parser .add_argument ("--timeout" , type = int , default = DEFAULT_TIMEOUT ,  
 
    
     
     
  
                            help = "Query timeout in seconds" )  
 
    
     
     
  
        parser .add_argument ("--max-retries" , type = int , default = MAX_RETRIES ,  
 
    
     
     
  
                            help = "Number of retry attempts on timeout" )  
 
    
     
     
  
     
 
    
     
     
  
        args  =  parser .parse_args ()  
 
    
     
     
  
     
 
    
     
     
  
        # Handle global commands first   
 
    
     
     
  
        if  args .list_projects :  
 
    
     
     
  
            list_projects (verbose = args .verbose )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        if  args .show_config :  
 
    
     
     
  
            show_config (args )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        if  args .repair :  
 
    
     
     
  
            repair_project (args .repair , verbose = args .verbose )  
 
    
     
     
  
            return   
 
    
     
     
  
     
 
    
     
     
  
        # Validate project-specific commands   
 
    
     
     
  
        if  not  hasattr (args , 'project' ) or  not  args .project :  
 
    
     
     
  
            print ("❌ Project name is required for this action" )  
 
    
     
     
  
            parser .print_help ()  
 
    
     
     
  
            sys .exit (1 )  
 
    
     
     
  
     
 
    
     
     
  
        if  not  validate_project_name (args .project ):  
 
    
     
     
  
            print ("❌ Invalid project name. Only alphanumeric, underscore and hyphen characters are allowed." )  
 
    
     
     
  
            sys .exit (1 )  
 
    
     
     
  
     
 
    
     
     
  
        # Device selection   
 
    
     
     
  
        if  args .gpu  and  args .cpu :  
 
    
     
     
  
            print ("❌ Cannot force both GPU and CPU modes" )  
 
    
     
     
  
            sys .exit (1 )  
 
    
     
     
  
     
 
    
     
     
  
        device  =  get_device (force_cpu = args .cpu )  
 
    
     
     
  
        if  args .gpu  and  device  !=  "cuda" :  
 
    
     
     
  
            print ("⚠️ GPU requested but not available - falling back to CPU" )  
 
    
     
     
  
            device  =  "cpu"   
 
    
     
     
  
     
 
    
     
     
  
        if  args .verbose :  
 
    
     
     
  
            print (f"\n ⚙️  Configuration:" )  
 
    
     
     
  
            print (f"   Device: { device .upper ()}   ({ '✅ GPU'  if  device  ==  'cuda'  else  '⚠️ CPU' }  )" )  
 
    
     
     
  
            print (f"   Model: { args .model }  " )  
 
    
     
     
  
            print (f"   Embed Model: { args .embed_model }  " )  
 
    
     
     
  
            if  hasattr (args , 'project' ):  
 
    
     
     
  
                print (f"   Project: { args .project }  " )  
 
    
     
     
  
                print (f"   Index Location: { Path (INDEX_ROOT ) /  args .project } \n " )  
 
    
     
     
  
     
 
    
     
     
  
        # Handle project actions   
 
    
     
     
  
        if  args .analyze :  
 
    
     
     
  
            analyze_project (args .project , args .verbose )  
 
    
     
     
  
        elif  args .reindex :  
 
    
     
     
  
            build_index (  
 
    
     
     
  
                project = args .project ,  
 
    
     
     
  
                codebase_path = Path (args .reindex ),  
 
    
     
     
  
                embed_model = args .embed_model ,  
 
    
     
     
  
                device = device ,  
 
    
     
     
  
                clean = args .clean ,  
 
    
     
     
  
                dry_run = args .dry_run ,  
 
    
     
     
  
                verbose = args .verbose ,  
 
    
     
     
  
                ignore_file_path = Path (args .ignore_file ) if  args .ignore_file  else  None ,  
 
    
     
     
  
                chunk_size = args .chunk_size ,  
 
    
     
     
  
                chunk_overlap = args .chunk_overlap   
 
    
     
     
  
            )  
 
    
     
     
  
        else :  
 
    
     
     
  
            chat (  
 
    
     
     
  
                project = args .project ,  
 
    
     
     
  
                model = args .model ,  
 
    
     
     
  
                embed_model = args .embed_model ,  
 
    
     
     
  
                temperature = args .temperature ,  
 
    
     
     
  
                num_ctx = args .num_ctx ,  
 
    
     
     
  
                top_p = args .top_p ,  
 
    
     
     
  
                repeat_penalty = args .repeat_penalty ,  
 
    
     
     
  
                device = device ,  
 
    
     
     
  
                verbose = args .verbose ,  
 
    
     
     
  
                similarity_top_k = args .similarity_top_k ,  
 
    
     
     
  
                timeout = args .timeout ,  
 
    
     
     
  
                max_retries = args .max_retries ,  
 
    
     
     
  
                chunk_size = args .chunk_size ,  
 
    
     
     
  
                chunk_overlap = args .chunk_overlap   
 
    
     
     
  
            )  
 
    
     
     
  
     
 
    
     
     
  
     
 
    
     
     
  
    if  __name__  ==  "__main__" : 
 
    
     
     
  
        main ()