Spaces:

d8aai
/

simple-paper-qa

Runtime error

App Files Files Community

hfwittmann commited on Jun 12, 2023

Commit

708e7b3

1 Parent(s): 806ab1d

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -100

app.py CHANGED Viewed

@@ -1,7 +1,15 @@
 import gradio as gr
 import pandas as pd
-from pathlib import Path
-import os
 css_style = """
 .gradio-container {
@@ -10,91 +18,101 @@ css_style = """
 """
-def request_pathname(files, data, openai_api_key, index):
-    if files is None:
-        return [[]]
-    for file in files:
-        # make sure we're not duplicating things in the dataset
-        if file.name in [x[0] for x in data]:
-            continue
-        data.append([file.name, None, None])
-    mydataset = pd.DataFrame(data, columns=["filepath", "citation string", "key"])
-    validation, index = validate_dataset(mydataset, openai_api_key, index)
-    return (
-        [[len(data), 0]],
-        data,
-        data,
-        validation,
-        index
-    )
-def validate_dataset(dataset, openapi, index):
-    docs_ready = dataset.iloc[-1, 0] != ""
-    if docs_ready and type(openapi) is str and len(openapi) > 0:
-        os.environ["OPENAI_API_KEY"] = openapi.strip()
-        index = get_index(dataset, openapi, index)
-        return "✨Ready✨", index
-    elif docs_ready:
-        return "⚠️Waiting for key⚠️", index
-    elif type(openapi) is str and len(openapi) > 0:
-        return "⚠️Waiting for documents⚠️", index
-    else:
-        return "⚠️Waiting for documents and key⚠️", index
-def get_index(dataset, openapi, index):
-    docs_ready = dataset.iloc[-1, 0] != ""
-    if docs_ready and type(openapi) is str and len(openapi) > 0:
-        from langchain.document_loaders import PyPDFLoader
-        from langchain.vectorstores import DocArrayInMemorySearch
-        from IPython.display import display, Markdown
-        from langchain.indexes import VectorstoreIndexCreator
-        # myfile = "Angela Merkel - Wikipedia.pdf"
-        # loader = PyPDFLoader(file_path=myfile)
-        loader = PyPDFLoader(file_path=dataset["filepath"][0])
-        index = VectorstoreIndexCreator(
-            vectorstore_cls=DocArrayInMemorySearch
-        ).from_loaders([loader])
-    return index
-def make_stats(docs):
-    return [[len(docs.doc_previews), sum([x[0] for x in docs.doc_previews])]]
-def do_ask(question, button, openapi, dataset, index):
-    passages = ""
-    docs_ready = dataset.iloc[-1, 0] != ""
-    out = ''
-    if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
-    # "Please provide a summary of  signifcant personal life events of  Angela Merkel. Of that summary extract all events with dates and put these into a markdown table."
-    # limit = f' Limit your answer to a maxmium of {length} words.'
-        query =   question # + limit
-        response = index.query(query)
-        out = response
-    yield out, index
 with gr.Blocks(css=css_style) as demo:
     docs = gr.State()
     data = gr.State([])
     openai_api_key = gr.State("")
     gr.Markdown(
         """
@@ -102,7 +120,7 @@ with gr.Blocks(css=css_style) as demo:
     *By D8a.ai*
-    Based on https://huggingface.co/spaces/whitead/paper-qa
     Significant advances in langchain have made it possible to simplify the code.
@@ -115,17 +133,16 @@ with gr.Blocks(css=css_style) as demo:
     * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
     1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
     2. Upload your documents
-    3. Ask a questions
     """
     )
     openai_api_key = gr.Textbox(
         label="OpenAI API Key", placeholder="sk-...", type="password"
     )
-    with gr.Tab("File Upload"):
         uploaded_files = gr.File(
-            label="Your Documents Upload (PDF or txt)",
-            file_count="multiple",
         )
     with gr.Accordion("See Docs:", open=False):
@@ -139,7 +156,6 @@ with gr.Blocks(css=css_style) as demo:
             max_rows=5,
         )
     buildb = gr.Textbox(
         "⚠️Waiting for documents and key...",
         label="Status",
@@ -147,40 +163,27 @@ with gr.Blocks(css=css_style) as demo:
         show_label=True,
         max_lines=1,
     )
-    index = gr.State()
-    stats = gr.Dataframe(
-        headers=["Docs", "Chunks"],
-        datatype=["number", "number"],
-        col_count=(2, "fixed"),
-        interactive=False,
-        label="Doc Stats",
-    )
-    openai_api_key.change(
-        validate_dataset, inputs=[dataset, openai_api_key], outputs=[buildb, index]
-    )
-    dataset.change(validate_dataset, inputs=[dataset, openai_api_key, index], outputs=[buildb, index])
-    uploaded_files.change(
-        request_pathname,
-        inputs=[uploaded_files, data, openai_api_key, index],
-        outputs=[stats, data, dataset, buildb, index],
-    )
     query = gr.Textbox(placeholder="Enter your question here...", label="Question")
-    # with gr.Row():
-    #     length = gr.Slider(25, 200, value=100, step=5, label="Words in answer")
     ask = gr.Button("Ask Question")
     answer = gr.Markdown(label="Answer")
     ask.click(
-        do_ask,
-        inputs=[query, buildb, openai_api_key, dataset, index],
-        outputs=[answer, index],
     )
 demo.queue(concurrency_count=20)
 demo.launch(show_error=True)

+import os
+from typing import Any
 import gradio as gr
+import openai
 import pandas as pd
+from IPython.display import Markdown, display
+from langchain.document_loaders import PyPDFLoader
+from langchain.indexes import VectorstoreIndexCreator
+from langchain.vectorstores import DocArrayInMemorySearch
+from langchain.embeddings import OpenAIEmbeddings
 css_style = """
 .gradio-container {
 """
+class myClass:
+    def __init__(self) -> None:
+        self.openapi = ""
+        self.valid_key = False
+        self.docs_ready = False
+        self.status = "⚠️Waiting for documents and key⚠️"
+        pass
+    def check_status(self):
+        if self.docs_ready and self.valid_key:
+            out = "✨Ready✨"
+        elif self.docs_ready:
+            out = "⚠️Waiting for key⚠️"
+        elif self.valid_key:
+            out = "⚠️Waiting for documents⚠️"
+        else:
+            out = "⚠️Waiting for documents and key⚠️"
+        self.status = out
+    def validate_key(self, myin):
+        assert isinstance(myin, str)
+        self.valid_key = True
+        self.openai_api_key = myin.strip()
+        self.check_status()
+        return self.status
+    def request_pathname(self, files, data):
+        if files is None:
+            self.docs_ready = False
+            self.check_status()
+            return (
+                pd.DataFrame(data, columns=["filepath", "citation string", "key"]),
+                self.status,
+            )
+        for file in files:
+            # make sure we're not duplicating things in the dataset
+            if file.name in [x[0] for x in data]:
+                continue
+            data.append([file.name, None, None])
+        mydataset = pd.DataFrame(data, columns=["filepath", "citation string", "key"])
+        validation_button = self.validate_dataset(mydataset)
+        return mydataset, validation_button
+    def validate_dataset(self, dataset):
+        self.docs_ready = dataset.iloc[-1, 0] != ""
+        self.dataset = dataset
+        self.check_status()
+        if self.status == "✨Ready✨":
+            self.get_index()
+        return self.status
+    def get_index(self):
+        if self.docs_ready and self.valid_key:
+            # openai = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
+            os.environ["OPENAI_API_KEY"] = self.openai_api_key
+            # myfile = "Angela Merkel - Wikipedia.pdf"
+            # loader = PyPDFLoader(file_path=myfile)
+            loader = PyPDFLoader(file_path=self.dataset["filepath"][0])
+            self.index = VectorstoreIndexCreator(
+                vectorstore_cls=DocArrayInMemorySearch
+            ).from_loaders([loader])
+            del os.environ["OPENAI_API_KEY"]
+        pass
+    def do_ask(self, question):
+        # os.environ["OPENAI_API_KEY"] = self.openai_api_key
+        # openai.api_key = self.openai_api_key
+        if self.status == "✨Ready✨":
+            # openai = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
+            os.environ["OPENAI_API_KEY"] = self.openai_api_key
+            response = self.index.query(question=question)
+            del os.environ["OPENAI_API_KEY"]
+            yield response
+        pass
 with gr.Blocks(css=css_style) as demo:
     docs = gr.State()
     data = gr.State([])
     openai_api_key = gr.State("")
+    index = gr.State()
+    myInstance = gr.State()
+    myInstance = myClass()
     gr.Markdown(
         """
     *By D8a.ai*
+    Idea based on https://huggingface.co/spaces/whitead/paper-qa
     Significant advances in langchain have made it possible to simplify the code.
     * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
     1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
     2. Upload your documents
+    3. Ask questions
     """
     )
     openai_api_key = gr.Textbox(
         label="OpenAI API Key", placeholder="sk-...", type="password"
     )
+    with gr.Tab("File upload"):
         uploaded_files = gr.File(
+            label="Upload your pdf Dokument", file_count="multiple"
         )
     with gr.Accordion("See Docs:", open=False):
             max_rows=5,
         )
     buildb = gr.Textbox(
         "⚠️Waiting for documents and key...",
         label="Status",
         show_label=True,
         max_lines=1,
     )
     query = gr.Textbox(placeholder="Enter your question here...", label="Question")
     ask = gr.Button("Ask Question")
     answer = gr.Markdown(label="Answer")
+    openai_api_key.change(
+        myInstance.validate_key, inputs=openai_api_key, outputs=buildb
+    )
+    uploaded_files.change(
+        myInstance.request_pathname,
+        inputs=[uploaded_files, data],
+        outputs=[dataset, buildb],
+    )
     ask.click(
+        myInstance.do_ask,
+        inputs=[query],
+        outputs=answer,
     )
 demo.queue(concurrency_count=20)
 demo.launch(show_error=True)