chatbot-backend / src /vectorstores /chroma_manager.py
TalatMasood's picture
Updating chroma db to be singleton class
6082154
raw
history blame
5.58 kB
# src/vectorstores/chroma_manager.py
"""
ChromaDB connection manager to ensure consistent settings and connection handling
"""
import os
import shutil
import asyncio
import logging
import chromadb
from chromadb.config import Settings
from typing import Optional, Dict, Any
# Global connection lock and instance
_instance_lock = asyncio.Lock()
_chroma_instance = None
_collection_lock = asyncio.Lock()
_collection_instances = {}
class ChromaManager:
"""Singleton manager for ChromaDB connections"""
@staticmethod
async def get_client(
persist_directory: str,
reset_if_needed: bool = False
) -> chromadb.PersistentClient:
"""
Get a shared ChromaDB client with consistent settings
Args:
persist_directory (str): Directory to persist ChromaDB
reset_if_needed (bool): Whether to reset the database if connection fails
Returns:
chromadb.PersistentClient: Shared client instance
"""
global _chroma_instance, _instance_lock
async with _instance_lock:
if _chroma_instance is not None:
return _chroma_instance
# Try to create a client
try:
settings = Settings(
allow_reset=True,
anonymized_telemetry=False,
is_persistent=True
)
_chroma_instance = chromadb.PersistentClient(
path=persist_directory,
settings=settings
)
logging.info(
f"Successfully created ChromaDB client at {persist_directory}")
return _chroma_instance
except ValueError as e:
if "already exists" in str(e) and reset_if_needed:
logging.warning(
f"ChromaDB instance exists with different settings. Attempting reset: {str(e)}")
await ChromaManager.reset_chroma(persist_directory)
# Try again after reset
_chroma_instance = chromadb.PersistentClient(
path=persist_directory,
settings=settings
)
return _chroma_instance
raise
@staticmethod
async def get_or_create_collection(
client: chromadb.PersistentClient,
collection_name: str,
embedding_dimension: int = 1024
):
"""
Get or create a collection with proper error handling
Args:
client (chromadb.PersistentClient): ChromaDB client
collection_name (str): Name of the collection
embedding_dimension (int): Dimension of embeddings
Returns:
Collection: ChromaDB collection
"""
global _collection_lock, _collection_instances
# Use just the collection name as key
collection_key = collection_name
async with _collection_lock:
if collection_key in _collection_instances:
return _collection_instances[collection_key]
try:
# Try to get existing collection
collection = client.get_collection(
name=collection_name,
embedding_function=None
)
logging.info(f"Found existing collection: {collection_name}")
_collection_instances[collection_key] = collection
return collection
except Exception as e:
logging.info(
f"Collection {collection_name} does not exist, creating new one: {str(e)}")
# Create new collection with minimal metadata
# Removed the problematic "hnsw:dim" parameter
try:
collection = client.create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
except Exception as create_error:
# If that fails too, try with no metadata
logging.warning(
f"Error creating collection with metadata: {str(create_error)}")
collection = client.create_collection(
name=collection_name
)
_collection_instances[collection_key] = collection
return collection
@staticmethod
async def reset_chroma(persist_directory: str):
"""
Reset ChromaDB completely by removing the directory
Args:
persist_directory (str): Directory to remove
"""
global _chroma_instance, _collection_instances
# Clear global instances first
_chroma_instance = None
_collection_instances = {}
try:
# Force garbage collection to release file handles
import gc
gc.collect()
# Remove the entire directory
if os.path.exists(persist_directory):
shutil.rmtree(persist_directory)
logging.info(
f"Removed ChromaDB directory: {persist_directory}")
# Recreate empty directory
os.makedirs(persist_directory, exist_ok=True)
logging.info(
f"Created fresh ChromaDB directory: {persist_directory}")
except Exception as e:
logging.error(f"Error resetting ChromaDB: {str(e)}")
raise