File size: 3,209 Bytes
d9814cc
 
 
 
 
 
 
 
 
 
 
9597e6d
ffcb62d
 
 
 
 
e6958ab
 
 
 
 
 
 
 
ffcb62d
452bc8e
 
ffcb62d
 
 
 
 
 
 
 
 
 
 
 
0711684
ffcb62d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9814cc
 
 
 
 
 
 
 
 
 
 
 
 
 
ffcb62d
 
452bc8e
ffcb62d
 
 
 
 
cdc433f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from my_functions.save_response import save_response
from llama_index import (
    SimpleDirectoryReader,
    GPTListIndex,
    GPTSimpleVectorIndex,
    LLMPredictor,
    PromptHelper,
    ServiceContext,
)
from llama_index.node_parser import SimpleNodeParser
from langchain import OpenAI
import gradio as gr
import sys
import os
import os.path
import shutil

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
# Get the value of OPENAI_API_KEY from the environment
api_key = os.getenv("OPENAI_API_KEY")
# Use the API key in your code
os.environ["OPENAI_API_KEY"] = api_key

sys.path.append("/my_functions")

# Defining the parameters for the index
max_input_size = 4096
num_outputs = 1024
max_chunk_overlap = 20

prompt_helper = PromptHelper(
    max_input_size,
    num_outputs,
    max_chunk_overlap,
)

llm_predictor = LLMPredictor(
    llm=OpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs)
)

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor, prompt_helper=prompt_helper
)


def construct_index(directory_path):
    if os.path.isfile("index.json"):
        # Index file exists, so we'll load it and add new documents to it
        index = GPTSimpleVectorIndex.load_from_disk(
            "index.json", service_context=service_context
        )
        documents = SimpleDirectoryReader(directory_path).load_data()
        for doc in documents:
            index.insert(doc, service_context=service_context)
        index.save_to_disk("index.json")
    else:
        # Index file doesn't exist, so we'll create a new index from scratch
        documents = SimpleDirectoryReader(directory_path).load_data()
        index = GPTSimpleVectorIndex.from_documents(
            documents, service_context=service_context
        )
        index.save_to_disk("index.json")

    # Define the paths to the source and destination folders
    absolute_path = os.path.dirname(__file__)
    src_folder = os.path.join(absolute_path, "docs/")
    dest_folder = os.path.join(absolute_path, "indexed_documents/")

    # Get a list of all the files in the source folder
    files = os.listdir(src_folder)

    # Move each file from the source folder to the destination folder,
    # except for the "do_not_delete.txt" file
    for file in files:
        if file != "do_not_delete.txt":
            src_path = os.path.join(src_folder, file)
            dest_path = os.path.join(dest_folder, file)
            shutil.move(src_path, dest_path)

    return index


def chatbot(input_text):
    index = GPTSimpleVectorIndex.load_from_disk(
        "index.json", service_context=service_context
    )
    response = index.query(input_text, response_mode="default")
    try:
        save_response(input_text, response)
    except Exception as e:
        print("Error saving response:", e)
    return response.response, response.get_formatted_sources()


iface = gr.Interface(
    fn=chatbot,
    inputs=gr.inputs.Textbox(lines=2, label="Enter your text"),
    outputs=[gr.Textbox(lines=30, label="Output"), gr.Textbox(lines=4, label="Source")],
    title="Custom-trained AI Chatbot",
)

index = construct_index("docs")
iface.launch()