File size: 5,576 Bytes
6082154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# src/vectorstores/chroma_manager.py
"""
ChromaDB connection manager to ensure consistent settings and connection handling
"""

import os
import shutil
import asyncio
import logging
import chromadb
from chromadb.config import Settings
from typing import Optional, Dict, Any

# Global connection lock and instance
_instance_lock = asyncio.Lock()
_chroma_instance = None
_collection_lock = asyncio.Lock()
_collection_instances = {}


class ChromaManager:
    """Singleton manager for ChromaDB connections"""

    @staticmethod
    async def get_client(
        persist_directory: str,
        reset_if_needed: bool = False
    ) -> chromadb.PersistentClient:
        """
        Get a shared ChromaDB client with consistent settings

        Args:
            persist_directory (str): Directory to persist ChromaDB
            reset_if_needed (bool): Whether to reset the database if connection fails

        Returns:
            chromadb.PersistentClient: Shared client instance
        """
        global _chroma_instance, _instance_lock

        async with _instance_lock:
            if _chroma_instance is not None:
                return _chroma_instance

            # Try to create a client
            try:
                settings = Settings(
                    allow_reset=True,
                    anonymized_telemetry=False,
                    is_persistent=True
                )

                _chroma_instance = chromadb.PersistentClient(
                    path=persist_directory,
                    settings=settings
                )
                logging.info(
                    f"Successfully created ChromaDB client at {persist_directory}")
                return _chroma_instance

            except ValueError as e:
                if "already exists" in str(e) and reset_if_needed:
                    logging.warning(
                        f"ChromaDB instance exists with different settings. Attempting reset: {str(e)}")
                    await ChromaManager.reset_chroma(persist_directory)

                    # Try again after reset
                    _chroma_instance = chromadb.PersistentClient(
                        path=persist_directory,
                        settings=settings
                    )
                    return _chroma_instance
                raise

    @staticmethod
    async def get_or_create_collection(
        client: chromadb.PersistentClient,
        collection_name: str,
        embedding_dimension: int = 1024
    ):
        """
        Get or create a collection with proper error handling

        Args:
            client (chromadb.PersistentClient): ChromaDB client
            collection_name (str): Name of the collection
            embedding_dimension (int): Dimension of embeddings

        Returns:
            Collection: ChromaDB collection
        """
        global _collection_lock, _collection_instances

        # Use just the collection name as key
        collection_key = collection_name

        async with _collection_lock:
            if collection_key in _collection_instances:
                return _collection_instances[collection_key]

            try:
                # Try to get existing collection
                collection = client.get_collection(
                    name=collection_name,
                    embedding_function=None
                )
                logging.info(f"Found existing collection: {collection_name}")
                _collection_instances[collection_key] = collection
                return collection

            except Exception as e:
                logging.info(
                    f"Collection {collection_name} does not exist, creating new one: {str(e)}")

                # Create new collection with minimal metadata
                # Removed the problematic "hnsw:dim" parameter
                try:
                    collection = client.create_collection(
                        name=collection_name,
                        metadata={"hnsw:space": "cosine"}
                    )
                except Exception as create_error:
                    # If that fails too, try with no metadata
                    logging.warning(
                        f"Error creating collection with metadata: {str(create_error)}")
                    collection = client.create_collection(
                        name=collection_name
                    )

                _collection_instances[collection_key] = collection
                return collection

    @staticmethod
    async def reset_chroma(persist_directory: str):
        """
        Reset ChromaDB completely by removing the directory

        Args:
            persist_directory (str): Directory to remove
        """
        global _chroma_instance, _collection_instances

        # Clear global instances first
        _chroma_instance = None
        _collection_instances = {}

        try:
            # Force garbage collection to release file handles
            import gc
            gc.collect()

            # Remove the entire directory
            if os.path.exists(persist_directory):
                shutil.rmtree(persist_directory)
                logging.info(
                    f"Removed ChromaDB directory: {persist_directory}")

            # Recreate empty directory
            os.makedirs(persist_directory, exist_ok=True)
            logging.info(
                f"Created fresh ChromaDB directory: {persist_directory}")

        except Exception as e:
            logging.error(f"Error resetting ChromaDB: {str(e)}")
            raise