Spaces:

alexneakameni
/

medivocate

Running

medivocate / src /vector_store /vector_store.py

anekameni

Refactor RAG system query methods; update descriptions and improve logging for better clarity

56d99ec about 1 month ago

1.74 kB

	import os
	from typing import List

	from langchain_chroma import Chroma
	from tqdm import tqdm

	from ..utilities.llm_models import get_llm_model_embedding

	class VectorStoreManager:
	def __init__(self, docs_dir: str, persist_directory_dir: str, batch_size=64):
	self.embeddings = get_llm_model_embedding()
	self.vector_store = None
	self.docs_dir = docs_dir
	self.persist_directory_dir = persist_directory_dir
	self.batch_size = batch_size
	self.collection_name = os.getenv("OLLAM_EMB").split(":")[0]

	def _batch_process_documents(self, documents: List):
	"""Process documents in batches"""
	for i in tqdm(
	range(0, len(documents), self.batch_size), desc="Processing documents"
	):
	batch = documents[i : i + self.batch_size]

	if not self.vector_store:
	# Initialize vector store with first batch
	self.vector_store = Chroma.from_documents(
	collection_name=self.collection_name,
	documents=batch,
	embedding=self.embeddings,
	persist_directory=self.persist_directory_dir,
	)
	else:
	# Add subsequent batches
	self.vector_store.add_documents(batch)

	def initialize_vector_store(self, documents: List = None):
	"""Initialize or load the vector store"""
	if documents:
	self._batch_process_documents(documents)
	else:
	self.vector_store = Chroma(
	collection_name=self.collection_name,
	persist_directory=self.persist_directory_dir,
	embedding_function=self.embeddings,
	)