import os from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain_chroma import Chroma def get_embeddings(): """Initialize and return OpenAI embeddings.""" return OpenAIEmbeddings(model="text-embedding-3-large") def load_or_create_vectorstore(docs, embeddings,path): """Load or create a Chroma vectorstore.""" if os.path.exists(path): print("Loading existing Chroma vector store from disk...") return Chroma(persist_directory=path, embedding_function=embeddings) # Split documents if vectorstore doesn't exist text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) all_splits = text_splitter.split_documents(docs) print(f"Documents are split into {len(all_splits)} chunks from {len(docs)} documents.") # Create new vectorstore print("Creating new Chroma vector store...") vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory=path) print(f"Vectorstore created and saved to {path}") return vectorstore