originws-app / vectorstore_handler.py
Maurizio Dipierro
origin working
cd65ba5
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
def get_embeddings():
"""Initialize and return OpenAI embeddings."""
return OpenAIEmbeddings(model="text-embedding-3-large")
def load_or_create_vectorstore(docs, embeddings,path):
"""Load or create a Chroma vectorstore."""
if os.path.exists(path):
print("Loading existing Chroma vector store from disk...")
return Chroma(persist_directory=path, embedding_function=embeddings)
# Split documents if vectorstore doesn't exist
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(docs)
print(f"Documents are split into {len(all_splits)} chunks from {len(docs)} documents.")
# Create new vectorstore
print("Creating new Chroma vector store...")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory=path)
print(f"Vectorstore created and saved to {path}")
return vectorstore