Spaces:
Sleeping
Sleeping
File size: 1,802 Bytes
2810627 b87de8f 2810627 b87de8f 2810627 b87de8f 2810627 b87de8f 2810627 b87de8f 2810627 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
from llama_index import (
VectorStoreIndex,
SimpleDirectoryReader,
ServiceContext,
StorageContext,
load_index_from_storage
)
from llama_index.vector_stores import SimpleVectorStore
from llama_index.llms import Ollama, OpenAI
import os
DOCS_DIR = "./raw"
PERSIST_DIR = './persist'
def load_documents():
documents = SimpleDirectoryReader(DOCS_DIR).load_data()
return documents
def build_service_context():
# llm = Ollama(model='mistral')
llm = OpenAI(model="gpt-3.5-turbo")
return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")
def build_index(documents, service_context):
persist_dir = os.path.abspath(PERSIST_DIR)
if os.path.exists(persist_dir + '/index_store.json'): # Load
print('Loading index...')
# Solving issue with naming
old_name = '/default__vector_store.json'
new_name = '/vector_store.json'
if os.path.exists(persist_dir + old_name):
os.rename(persist_dir + old_name, persist_dir + new_name)
storage_context = StorageContext.from_defaults(
vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
persist_dir=persist_dir,
)
index = load_index_from_storage(storage_context, service_context=service_context)
else: # Create
print('Creaing index...')
storage_context = StorageContext.from_defaults(
vector_store=SimpleVectorStore(),
)
index = VectorStoreIndex.from_documents(
documents,
service_context=service_context,
storage_context=storage_context
)
# storage_context.persist(persist_dir=persist_dir)
index.storage_context.persist(persist_dir=persist_dir)
return index
|