import os from pathlib import Path from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceBgeEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.schema import Document # Text splitter text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) # Embedding model embeddings = HuggingFaceBgeEmbeddings( model_name="BAAI/bge-small-en", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) VECTORSTORE_DIR = "user_vectorstores" os.makedirs(VECTORSTORE_DIR, exist_ok=True) def ingest_report(user_id: str, report_text: str): # Split into documents documents = text_splitter.create_documents([report_text]) # Create FAISS vectorstore vectorstore = FAISS.from_documents(documents, embeddings) # Save to disk user_path = Path(VECTORSTORE_DIR) / f"{user_id}_faiss" vectorstore.save_local(str(user_path)) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) return vectorstore, retriever def get_user_retriever(user_id: str): user_path = Path(VECTORSTORE_DIR) / f"{user_id}_faiss" if not user_path.exists(): raise FileNotFoundError(f"Vectorstore for user {user_id} not found.") vectorstore = FAISS.load_local(str(user_path), embeddings, allow_dangerous_deserialization=True) return vectorstore.as_retriever(search_kwargs={"k": 3})