Spaces:
Sleeping
Sleeping
File size: 3,830 Bytes
a4018ba 33f8cf1 a4018ba 6435195 a4018ba 33f8cf1 a4018ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
from llama_index.llms import OpenAI
from llama_index.evaluation import ResponseEvaluator
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader
import gradio as gr
import sys
import os
try:
from config import OPEN_AI_KEY
os.environ["OPENAI_API_KEY"] = OPEN_AI_KEY
except:
pass
# ===============================
# Settings
# ===============================
MAX_INPUT_SIZE = 4096
NUM_OUTPUT = 2048
CHUNK_OVERLAP_RATIO = 0.15
CHUNK_SIZE_LIMIT = 1000
TEMPERATURE = 0.5
DIRECTORY = 'dww_rev_cleaned'
DIRECTORY_PERSIST = 'dww_rev_cleaned_persist'
# Define LLM: gpt-3.5-turbo, temp:0.7
llm = OpenAI(model="gpt-3.5-turbo", temperature=TEMPERATURE, max_tokens=NUM_OUTPUT)
# Define prompt helper
prompt_helper = PromptHelper(context_window=MAX_INPUT_SIZE, num_output=NUM_OUTPUT, chunk_overlap_ratio=CHUNK_OVERLAP_RATIO, chunk_size_limit=CHUNK_SIZE_LIMIT)
# Set service context
service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper)
set_global_service_context(service_context)
# ===============================
# Functions
# ===============================
def construct_index(directory_path, index_path):
if os.listdir(index_path) != []:
storage_context = StorageContext.from_defaults(persist_dir=index_path)
index = load_index_from_storage(storage_context)
return index
else:
# Load in documents
documents = SimpleDirectoryReader(directory_path).load_data()
# Index documents
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
# Save index
index.storage_context.persist(persist_dir=index_path)
return index
INDEX = construct_index(DIRECTORY, DIRECTORY_PERSIST)
QE = INDEX.as_query_engine()
PDF_CONTENT = gr.State("")
def upload_file(file):
try:
read_pdf = PdfReader(file.name)
pdf_text = "\n\n".join([w.extract_text() for w in read_pdf.pages])
PDF_CONTENT.value = pdf_text
return pdf_text
except Exception as e:
return f"Error: {str(e)}"
def chatfunc(input_text, chat_history, max_chat_length=4):
prompt = """You are an insight bot that helps users (special educators and school psychologists) build individual education programs based on disability categories using DWW (a library of research-backed interviews and tools) as reference.
Refer to the DWW's context as much as you can to provide a detailed answer."""
if PDF_CONTENT.value:
prompt = prompt + "The following is the relevant document provided by the user" + PDF_CONTENT.value + "\n\n"
for chat in chat_history[~max_chat_length:]:
user_chat, bot_chat = chat
prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
prompt = f"{prompt}\nUser: {input_text}\nAssistant:"
response = QE.query(prompt)
chat_history.append([input_text, response.response])
return "", chat_history
with gr.Blocks() as iface:
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(label="Ask the DWW Bot anything about research-based practices in education")
submit = gr.Button("π¬ Submit")
with gr.Row():
clear = gr.ClearButton(value="π§Ή Clear outputs", components=[msg, chatbot])
upload_button = gr.UploadButton("π Upload a relevant document", file_types=[".pdf"], file_count="single")
with gr.Accordion("π View your document"):
syl = gr.Textbox(label="Your documents' content will show here")
msg.submit(chatfunc, [msg, chatbot], [msg, chatbot])
upload_button.upload(upload_file, upload_button, syl)
iface.launch(share=False) |