Spaces:
Sleeping
Sleeping
from llama_index import ( | |
VectorStoreIndex, | |
SimpleDirectoryReader, | |
ServiceContext, | |
StorageContext, | |
load_index_from_storage | |
) | |
from llama_index.vector_stores import SimpleVectorStore | |
from llama_index.llms import Ollama, OpenAI | |
import os | |
DOCS_DIR = "./raw" | |
PERSIST_DIR = './persist' | |
def load_documents(): | |
documents = SimpleDirectoryReader(DOCS_DIR).load_data() | |
return documents | |
def build_service_context(): | |
# llm = Ollama(model='mistral') | |
llm = OpenAI(model="gpt-3.5-turbo") | |
return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5") | |
def build_index(documents, service_context): | |
persist_dir = os.path.abspath(PERSIST_DIR) | |
if os.path.exists(persist_dir + '/index_store.json'): # Load | |
print('Loading index...') | |
# Solving issue with naming | |
old_name = '/default__vector_store.json' | |
new_name = '/vector_store.json' | |
if os.path.exists(persist_dir + old_name): | |
os.rename(persist_dir + old_name, persist_dir + new_name) | |
storage_context = StorageContext.from_defaults( | |
vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir), | |
persist_dir=persist_dir, | |
) | |
index = load_index_from_storage(storage_context, service_context=service_context) | |
else: # Create | |
print('Creaing index...') | |
storage_context = StorageContext.from_defaults( | |
vector_store=SimpleVectorStore(), | |
) | |
index = VectorStoreIndex.from_documents( | |
documents, | |
service_context=service_context, | |
storage_context=storage_context | |
) | |
# storage_context.persist(persist_dir=persist_dir) | |
index.storage_context.persist(persist_dir=persist_dir) | |
return index | |