Spaces:
Running
Running
# src/vectorstores/chroma_manager.py | |
""" | |
ChromaDB connection manager to ensure consistent settings and connection handling | |
""" | |
import os | |
import shutil | |
import asyncio | |
import logging | |
import chromadb | |
from chromadb.config import Settings | |
from typing import Optional, Dict, Any | |
# Global connection lock and instance | |
_instance_lock = asyncio.Lock() | |
_chroma_instance = None | |
_collection_lock = asyncio.Lock() | |
_collection_instances = {} | |
class ChromaManager: | |
"""Singleton manager for ChromaDB connections""" | |
async def get_client( | |
persist_directory: str, | |
reset_if_needed: bool = False | |
) -> chromadb.PersistentClient: | |
""" | |
Get a shared ChromaDB client with consistent settings | |
Args: | |
persist_directory (str): Directory to persist ChromaDB | |
reset_if_needed (bool): Whether to reset the database if connection fails | |
Returns: | |
chromadb.PersistentClient: Shared client instance | |
""" | |
global _chroma_instance, _instance_lock | |
async with _instance_lock: | |
if _chroma_instance is not None: | |
return _chroma_instance | |
# Try to create a client | |
try: | |
settings = Settings( | |
allow_reset=True, | |
anonymized_telemetry=False, | |
is_persistent=True | |
) | |
_chroma_instance = chromadb.PersistentClient( | |
path=persist_directory, | |
settings=settings | |
) | |
logging.info( | |
f"Successfully created ChromaDB client at {persist_directory}") | |
return _chroma_instance | |
except ValueError as e: | |
if "already exists" in str(e) and reset_if_needed: | |
logging.warning( | |
f"ChromaDB instance exists with different settings. Attempting reset: {str(e)}") | |
await ChromaManager.reset_chroma(persist_directory) | |
# Try again after reset | |
_chroma_instance = chromadb.PersistentClient( | |
path=persist_directory, | |
settings=settings | |
) | |
return _chroma_instance | |
raise | |
async def get_or_create_collection( | |
client: chromadb.PersistentClient, | |
collection_name: str, | |
embedding_dimension: int = 1024 | |
): | |
""" | |
Get or create a collection with proper error handling | |
Args: | |
client (chromadb.PersistentClient): ChromaDB client | |
collection_name (str): Name of the collection | |
embedding_dimension (int): Dimension of embeddings | |
Returns: | |
Collection: ChromaDB collection | |
""" | |
global _collection_lock, _collection_instances | |
# Use just the collection name as key | |
collection_key = collection_name | |
async with _collection_lock: | |
if collection_key in _collection_instances: | |
return _collection_instances[collection_key] | |
try: | |
# Try to get existing collection | |
collection = client.get_collection( | |
name=collection_name, | |
embedding_function=None | |
) | |
logging.info(f"Found existing collection: {collection_name}") | |
_collection_instances[collection_key] = collection | |
return collection | |
except Exception as e: | |
logging.info( | |
f"Collection {collection_name} does not exist, creating new one: {str(e)}") | |
# Create new collection with minimal metadata | |
# Removed the problematic "hnsw:dim" parameter | |
try: | |
collection = client.create_collection( | |
name=collection_name, | |
metadata={"hnsw:space": "cosine"} | |
) | |
except Exception as create_error: | |
# If that fails too, try with no metadata | |
logging.warning( | |
f"Error creating collection with metadata: {str(create_error)}") | |
collection = client.create_collection( | |
name=collection_name | |
) | |
_collection_instances[collection_key] = collection | |
return collection | |
async def reset_chroma(persist_directory: str): | |
""" | |
Reset ChromaDB completely by removing the directory | |
Args: | |
persist_directory (str): Directory to remove | |
""" | |
global _chroma_instance, _collection_instances | |
# Clear global instances first | |
_chroma_instance = None | |
_collection_instances = {} | |
try: | |
# Force garbage collection to release file handles | |
import gc | |
gc.collect() | |
# Remove the entire directory | |
if os.path.exists(persist_directory): | |
shutil.rmtree(persist_directory) | |
logging.info( | |
f"Removed ChromaDB directory: {persist_directory}") | |
# Recreate empty directory | |
os.makedirs(persist_directory, exist_ok=True) | |
logging.info( | |
f"Created fresh ChromaDB directory: {persist_directory}") | |
except Exception as e: | |
logging.error(f"Error resetting ChromaDB: {str(e)}") | |
raise | |