# src/vectorstores/chroma_manager.py """ ChromaDB connection manager to ensure consistent settings and connection handling """ import os import shutil import asyncio import logging import chromadb from chromadb.config import Settings from typing import Optional, Dict, Any # Global connection lock and instance _instance_lock = asyncio.Lock() _chroma_instance = None _collection_lock = asyncio.Lock() _collection_instances = {} class ChromaManager: """Singleton manager for ChromaDB connections""" @staticmethod async def get_client( persist_directory: str, reset_if_needed: bool = False ) -> chromadb.PersistentClient: """ Get a shared ChromaDB client with consistent settings Args: persist_directory (str): Directory to persist ChromaDB reset_if_needed (bool): Whether to reset the database if connection fails Returns: chromadb.PersistentClient: Shared client instance """ global _chroma_instance, _instance_lock async with _instance_lock: if _chroma_instance is not None: return _chroma_instance # Try to create a client try: settings = Settings( allow_reset=True, anonymized_telemetry=False, is_persistent=True ) _chroma_instance = chromadb.PersistentClient( path=persist_directory, settings=settings ) logging.info( f"Successfully created ChromaDB client at {persist_directory}") return _chroma_instance except ValueError as e: if "already exists" in str(e) and reset_if_needed: logging.warning( f"ChromaDB instance exists with different settings. Attempting reset: {str(e)}") await ChromaManager.reset_chroma(persist_directory) # Try again after reset _chroma_instance = chromadb.PersistentClient( path=persist_directory, settings=settings ) return _chroma_instance raise @staticmethod async def get_or_create_collection( client: chromadb.PersistentClient, collection_name: str, embedding_dimension: int = 1024 ): """ Get or create a collection with proper error handling Args: client (chromadb.PersistentClient): ChromaDB client collection_name (str): Name of the collection embedding_dimension (int): Dimension of embeddings Returns: Collection: ChromaDB collection """ global _collection_lock, _collection_instances # Use just the collection name as key collection_key = collection_name async with _collection_lock: if collection_key in _collection_instances: return _collection_instances[collection_key] try: # Try to get existing collection collection = client.get_collection( name=collection_name, embedding_function=None ) logging.info(f"Found existing collection: {collection_name}") _collection_instances[collection_key] = collection return collection except Exception as e: logging.info( f"Collection {collection_name} does not exist, creating new one: {str(e)}") # Create new collection with minimal metadata # Removed the problematic "hnsw:dim" parameter try: collection = client.create_collection( name=collection_name, metadata={"hnsw:space": "cosine"} ) except Exception as create_error: # If that fails too, try with no metadata logging.warning( f"Error creating collection with metadata: {str(create_error)}") collection = client.create_collection( name=collection_name ) _collection_instances[collection_key] = collection return collection @staticmethod async def reset_chroma(persist_directory: str): """ Reset ChromaDB completely by removing the directory Args: persist_directory (str): Directory to remove """ global _chroma_instance, _collection_instances # Clear global instances first _chroma_instance = None _collection_instances = {} try: # Force garbage collection to release file handles import gc gc.collect() # Remove the entire directory if os.path.exists(persist_directory): shutil.rmtree(persist_directory) logging.info( f"Removed ChromaDB directory: {persist_directory}") # Recreate empty directory os.makedirs(persist_directory, exist_ok=True) logging.info( f"Created fresh ChromaDB directory: {persist_directory}") except Exception as e: logging.error(f"Error resetting ChromaDB: {str(e)}") raise