Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
import os | |
css_style = """ | |
.gradio-container { | |
font-family: "IBM Plex Mono"; | |
} | |
""" | |
def request_pathname(files, data, openai_api_key, index): | |
if files is None: | |
return [[]] | |
for file in files: | |
# make sure we're not duplicating things in the dataset | |
if file.name in [x[0] for x in data]: | |
continue | |
data.append([file.name, None, None]) | |
mydataset = pd.DataFrame(data, columns=["filepath", "citation string", "key"]) | |
validation, index = validate_dataset(mydataset, openai_api_key, index) | |
return ( | |
[[len(data), 0]], | |
data, | |
data, | |
validation, | |
index | |
) | |
def validate_dataset(dataset, openapi, index): | |
docs_ready = dataset.iloc[-1, 0] != "" | |
if docs_ready and type(openapi) is str and len(openapi) > 0: | |
os.environ["OPENAI_API_KEY"] = openapi.strip() | |
index = get_index(dataset, openapi, index) | |
return "✨Ready✨", index | |
elif docs_ready: | |
return "⚠️Waiting for key⚠️", index | |
elif type(openapi) is str and len(openapi) > 0: | |
return "⚠️Waiting for documents⚠️", index | |
else: | |
return "⚠️Waiting for documents and key⚠️", index | |
def get_index(dataset, openapi, index): | |
docs_ready = dataset.iloc[-1, 0] != "" | |
if docs_ready and type(openapi) is str and len(openapi) > 0: | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.vectorstores import DocArrayInMemorySearch | |
from IPython.display import display, Markdown | |
from langchain.indexes import VectorstoreIndexCreator | |
# myfile = "Angela Merkel - Wikipedia.pdf" | |
# loader = PyPDFLoader(file_path=myfile) | |
loader = PyPDFLoader(file_path=dataset["filepath"][0]) | |
index = VectorstoreIndexCreator( | |
vectorstore_cls=DocArrayInMemorySearch | |
).from_loaders([loader]) | |
return index | |
def make_stats(docs): | |
return [[len(docs.doc_previews), sum([x[0] for x in docs.doc_previews])]] | |
def do_ask(question, button, openapi, dataset, index): | |
passages = "" | |
docs_ready = dataset.iloc[-1, 0] != "" | |
out = '' | |
if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready: | |
# "Please provide a summary of signifcant personal life events of Angela Merkel. Of that summary extract all events with dates and put these into a markdown table." | |
# limit = f' Limit your answer to a maxmium of {length} words.' | |
query = question # + limit | |
response = index.query(query) | |
out = response | |
yield out, index | |
with gr.Blocks(css=css_style) as demo: | |
docs = gr.State() | |
data = gr.State([]) | |
openai_api_key = gr.State("") | |
gr.Markdown( | |
""" | |
# Document Question and Answer | |
*By D8a.ai* | |
Based on https://huggingface.co/spaces/whitead/paper-qa | |
Significant advances in langchain have made it possible to simplify the code. | |
This tool allows you to ask questions of your uploaded text, PDF documents. | |
It uses OpenAI's GPT models, so you need to enter your API key below. This | |
tool is under active development and currently uses a lot of tokens - up to 10,000 | |
for a single query. This is $0.10-0.20 per query, so please be careful! | |
* [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes. | |
1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys)) | |
2. Upload your documents | |
3. Ask a questions | |
""" | |
) | |
openai_api_key = gr.Textbox( | |
label="OpenAI API Key", placeholder="sk-...", type="password" | |
) | |
with gr.Tab("File Upload"): | |
uploaded_files = gr.File( | |
label="Your Documents Upload (PDF or txt)", | |
file_count="multiple", | |
) | |
with gr.Accordion("See Docs:", open=False): | |
dataset = gr.Dataframe( | |
headers=["filepath", "citation string", "key"], | |
datatype=["str", "str", "str"], | |
col_count=(3, "fixed"), | |
interactive=False, | |
label="Documents and Citations", | |
overflow_row_behaviour="paginate", | |
max_rows=5, | |
) | |
buildb = gr.Textbox( | |
"⚠️Waiting for documents and key...", | |
label="Status", | |
interactive=False, | |
show_label=True, | |
max_lines=1, | |
) | |
index = gr.State() | |
stats = gr.Dataframe( | |
headers=["Docs", "Chunks"], | |
datatype=["number", "number"], | |
col_count=(2, "fixed"), | |
interactive=False, | |
label="Doc Stats", | |
) | |
openai_api_key.change( | |
validate_dataset, inputs=[dataset, openai_api_key], outputs=[buildb, index] | |
) | |
dataset.change(validate_dataset, inputs=[dataset, openai_api_key, index], outputs=[buildb, index]) | |
uploaded_files.change( | |
request_pathname, | |
inputs=[uploaded_files, data, openai_api_key, index], | |
outputs=[stats, data, dataset, buildb, index], | |
) | |
query = gr.Textbox(placeholder="Enter your question here...", label="Question") | |
# with gr.Row(): | |
# length = gr.Slider(25, 200, value=100, step=5, label="Words in answer") | |
ask = gr.Button("Ask Question") | |
answer = gr.Markdown(label="Answer") | |
ask.click( | |
do_ask, | |
inputs=[query, buildb, openai_api_key, dataset, index], | |
outputs=[answer, index], | |
) | |
demo.queue(concurrency_count=20) | |
demo.launch(show_error=True) | |