Spaces:
Sleeping
Sleeping
import os | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_chroma import Chroma | |
def get_embeddings(): | |
"""Initialize and return OpenAI embeddings.""" | |
return OpenAIEmbeddings(model="text-embedding-3-large") | |
def load_or_create_vectorstore(docs, embeddings,path): | |
"""Load or create a Chroma vectorstore.""" | |
if os.path.exists(path): | |
print("Loading existing Chroma vector store from disk...") | |
return Chroma(persist_directory=path, embedding_function=embeddings) | |
# Split documents if vectorstore doesn't exist | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) | |
all_splits = text_splitter.split_documents(docs) | |
print(f"Documents are split into {len(all_splits)} chunks from {len(docs)} documents.") | |
# Create new vectorstore | |
print("Creating new Chroma vector store...") | |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory=path) | |
print(f"Vectorstore created and saved to {path}") | |
return vectorstore | |