File size: 1,802 Bytes
2810627
 
 
 
 
 
 
 
 
 
b87de8f
 
2810627
 
 
 
 
 
 
b87de8f
 
2810627
 
 
b87de8f
2810627
 
b87de8f
2810627
 
 
 
 
 
 
b87de8f
2810627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    load_index_from_storage
)
from llama_index.vector_stores import SimpleVectorStore
from llama_index.llms import Ollama, OpenAI
import os


DOCS_DIR = "./raw"
PERSIST_DIR = './persist'


def load_documents():
    documents = SimpleDirectoryReader(DOCS_DIR).load_data()
    return documents

def build_service_context():
    # llm = Ollama(model='mistral')
    llm = OpenAI(model="gpt-3.5-turbo")
    return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")

def build_index(documents, service_context):
    persist_dir = os.path.abspath(PERSIST_DIR)

    if os.path.exists(persist_dir + '/index_store.json'):  # Load
        print('Loading index...')
        # Solving issue with naming
        old_name = '/default__vector_store.json'
        new_name = '/vector_store.json'
        if os.path.exists(persist_dir + old_name):
            os.rename(persist_dir + old_name, persist_dir + new_name)

        storage_context = StorageContext.from_defaults(
            vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
            persist_dir=persist_dir,
        )
        index = load_index_from_storage(storage_context, service_context=service_context)
    else:  # Create
        print('Creaing index...')
        storage_context = StorageContext.from_defaults(
            vector_store=SimpleVectorStore(),
        )
        index = VectorStoreIndex.from_documents(
            documents,
            service_context=service_context,
            storage_context=storage_context
        )
        # storage_context.persist(persist_dir=persist_dir)
        index.storage_context.persist(persist_dir=persist_dir)
    return index