Spaces:
Running
Running
File size: 5,137 Bytes
b953016 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# src/vectorstores/optimized_vectorstore.py
import asyncio
from typing import Tuple, Optional, List, Dict, Any, Callable
import concurrent.futures
from functools import lru_cache
from .base_vectorstore import BaseVectorStore
from .chroma_vectorstore import ChromaVectorStore
from src.embeddings.huggingface_embedding import HuggingFaceEmbedding
from src.utils.logger import logger
from config.config import settings
class OptimizedVectorStore(ChromaVectorStore):
"""
Optimized vector store that maintains ChromaVectorStore compatibility
while adding caching and async initialization
"""
_instance: Optional['OptimizedVectorStore'] = None
_lock = asyncio.Lock()
_initialized = False
_embedding_model: Optional[HuggingFaceEmbedding] = None
_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(
self,
embedding_function: Optional[Callable] = None,
persist_directory: str = settings.CHROMA_PATH,
collection_name: str = "documents",
client_settings: Optional[Dict[str, Any]] = None
):
"""
Initialize the optimized vector store
Note: The actual initialization is deferred until needed
"""
if not self._initialized:
self._persist_directory = persist_directory
self._collection_name = collection_name
self._client_settings = client_settings
self._embedding_function = embedding_function
# Don't call super().__init__() here - we'll do it in _initialize()
@classmethod
async def create(
cls,
persist_directory: str = settings.CHROMA_PATH,
collection_name: str = "documents",
client_settings: Optional[Dict[str, Any]] = None
) -> Tuple['OptimizedVectorStore', HuggingFaceEmbedding]:
"""
Asynchronously create or get instance
Returns:
Tuple[OptimizedVectorStore, HuggingFaceEmbedding]:
The vector store instance and embedding model
"""
async with cls._lock:
if not cls._instance or not cls._initialized:
instance = cls(
persist_directory=persist_directory,
collection_name=collection_name,
client_settings=client_settings
)
await instance._initialize()
cls._instance = instance
return cls._instance, cls._instance._embedding_model
async def _initialize(self) -> None:
"""Initialize the vector store and embedding model"""
if self._initialized:
return
try:
# Load embedding model in background thread
self._embedding_model = await self._load_embedding_model()
# Initialize ChromaVectorStore with the loaded model
super().__init__(
embedding_function=self._embedding_model.embed_documents,
persist_directory=self._persist_directory,
collection_name=self._collection_name,
client_settings=self._client_settings
)
self._initialized = True
except Exception as e:
logger.error(f"Error initializing vector store: {str(e)}")
raise
async def _load_embedding_model(self) -> HuggingFaceEmbedding:
"""Load embedding model in background thread"""
try:
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
self._executor,
self._create_embedding_model
)
except Exception as e:
logger.error(f"Error loading embedding model: {str(e)}")
raise
@staticmethod
@lru_cache(maxsize=1)
def _create_embedding_model() -> HuggingFaceEmbedding:
"""Create and cache embedding model"""
return HuggingFaceEmbedding(model_name=settings.EMBEDDING_MODEL)
def __getattribute__(self, name):
"""
Ensure initialization before accessing any ChromaVectorStore methods
"""
# Get the attribute from the class
attr = super().__getattribute__(name)
# If it's a method from ChromaVectorStore, ensure initialization
if callable(attr) and name in ChromaVectorStore.__dict__:
if not self._initialized:
raise RuntimeError(
"Vector store not initialized. Please use 'await OptimizedVectorStore.create()'"
)
return attr
# Factory function for getting optimized vector store
async def get_optimized_vector_store() -> Tuple[ChromaVectorStore, HuggingFaceEmbedding]:
"""
Get or create an optimized vector store instance
Returns:
Tuple[ChromaVectorStore, HuggingFaceEmbedding]:
The vector store and embedding model instances
"""
return await OptimizedVectorStore.create() |