SAIRA / index.py
sm1rk's picture
Add index persisting
2810627
raw
history blame
1.8 kB
from llama_index import (
VectorStoreIndex,
SimpleDirectoryReader,
ServiceContext,
StorageContext,
load_index_from_storage
)
from llama_index.vector_stores import SimpleVectorStore
from llama_index.llms import Ollama, OpenAI
import os
DOCS_DIR = "./raw"
PERSIST_DIR = './persist'
def load_documents():
documents = SimpleDirectoryReader(DOCS_DIR).load_data()
return documents
def build_service_context():
# llm = Ollama(model='mistral')
llm = OpenAI(model="gpt-3.5-turbo")
return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")
def build_index(documents, service_context):
persist_dir = os.path.abspath(PERSIST_DIR)
if os.path.exists(persist_dir + '/index_store.json'): # Load
print('Loading index...')
# Solving issue with naming
old_name = '/default__vector_store.json'
new_name = '/vector_store.json'
if os.path.exists(persist_dir + old_name):
os.rename(persist_dir + old_name, persist_dir + new_name)
storage_context = StorageContext.from_defaults(
vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
persist_dir=persist_dir,
)
index = load_index_from_storage(storage_context, service_context=service_context)
else: # Create
print('Creaing index...')
storage_context = StorageContext.from_defaults(
vector_store=SimpleVectorStore(),
)
index = VectorStoreIndex.from_documents(
documents,
service_context=service_context,
storage_context=storage_context
)
# storage_context.persist(persist_dir=persist_dir)
index.storage_context.persist(persist_dir=persist_dir)
return index