Spaces:
Sleeping
Sleeping
File size: 1,124 Bytes
6c94128 cd65ba5 6c94128 cd65ba5 6c94128 cd65ba5 6c94128 cd65ba5 6c94128 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
def get_embeddings():
"""Initialize and return OpenAI embeddings."""
return OpenAIEmbeddings(model="text-embedding-3-large")
def load_or_create_vectorstore(docs, embeddings,path):
"""Load or create a Chroma vectorstore."""
if os.path.exists(path):
print("Loading existing Chroma vector store from disk...")
return Chroma(persist_directory=path, embedding_function=embeddings)
# Split documents if vectorstore doesn't exist
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(docs)
print(f"Documents are split into {len(all_splits)} chunks from {len(docs)} documents.")
# Create new vectorstore
print("Creating new Chroma vector store...")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory=path)
print(f"Vectorstore created and saved to {path}")
return vectorstore
|