Spaces:
Runtime error
Runtime error
Commit
·
6b50a9a
1
Parent(s):
708e7b3
Update app.py
Browse files
app.py
CHANGED
@@ -6,10 +6,11 @@ import openai
|
|
6 |
import pandas as pd
|
7 |
from IPython.display import Markdown, display
|
8 |
from langchain.document_loaders import PyPDFLoader
|
|
|
9 |
from langchain.indexes import VectorstoreIndexCreator
|
|
|
10 |
from langchain.vectorstores import DocArrayInMemorySearch
|
11 |
-
from
|
12 |
-
|
13 |
|
14 |
css_style = """
|
15 |
.gradio-container {
|
@@ -24,6 +25,7 @@ class myClass:
|
|
24 |
self.valid_key = False
|
25 |
self.docs_ready = False
|
26 |
self.status = "⚠️Waiting for documents and key⚠️"
|
|
|
27 |
pass
|
28 |
|
29 |
def check_status(self):
|
@@ -42,9 +44,11 @@ class myClass:
|
|
42 |
assert isinstance(myin, str)
|
43 |
self.valid_key = True
|
44 |
self.openai_api_key = myin.strip()
|
45 |
-
|
|
|
|
|
46 |
self.check_status()
|
47 |
-
return self.status
|
48 |
|
49 |
def request_pathname(self, files, data):
|
50 |
if files is None:
|
@@ -78,58 +82,70 @@ class myClass:
|
|
78 |
|
79 |
def get_index(self):
|
80 |
if self.docs_ready and self.valid_key:
|
81 |
-
#
|
82 |
-
os.environ["OPENAI_API_KEY"] = self.openai_api_key
|
83 |
-
|
84 |
|
85 |
# myfile = "Angela Merkel - Wikipedia.pdf"
|
86 |
# loader = PyPDFLoader(file_path=myfile)
|
87 |
-
|
88 |
|
89 |
self.index = VectorstoreIndexCreator(
|
90 |
-
vectorstore_cls=DocArrayInMemorySearch
|
91 |
-
).from_loaders(
|
92 |
-
|
|
|
93 |
|
94 |
pass
|
95 |
|
96 |
def do_ask(self, question):
|
97 |
# os.environ["OPENAI_API_KEY"] = self.openai_api_key
|
98 |
-
# openai.api_key = self.openai_api_key
|
|
|
99 |
if self.status == "✨Ready✨":
|
100 |
-
#
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
del os.environ["OPENAI_API_KEY"]
|
105 |
yield response
|
106 |
pass
|
107 |
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
with gr.Blocks(css=css_style) as demo:
|
|
|
|
|
110 |
docs = gr.State()
|
111 |
data = gr.State([])
|
112 |
-
openai_api_key = gr.State("")
|
113 |
index = gr.State()
|
114 |
-
myInstance = gr.State()
|
115 |
-
myInstance = myClass()
|
116 |
|
117 |
gr.Markdown(
|
118 |
"""
|
119 |
# Document Question and Answer
|
120 |
-
|
121 |
*By D8a.ai*
|
122 |
-
|
123 |
Idea based on https://huggingface.co/spaces/whitead/paper-qa
|
124 |
-
|
125 |
Significant advances in langchain have made it possible to simplify the code.
|
126 |
-
|
127 |
This tool allows you to ask questions of your uploaded text, PDF documents.
|
128 |
-
|
129 |
It uses OpenAI's GPT models, so you need to enter your API key below. This
|
130 |
tool is under active development and currently uses a lot of tokens - up to 10,000
|
131 |
for a single query. This is $0.10-0.20 per query, so please be careful!
|
132 |
-
|
133 |
* [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
|
134 |
1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
|
135 |
2. Upload your documents
|
@@ -169,21 +185,21 @@ with gr.Blocks(css=css_style) as demo:
|
|
169 |
answer = gr.Markdown(label="Answer")
|
170 |
|
171 |
openai_api_key.change(
|
172 |
-
|
173 |
)
|
174 |
|
175 |
uploaded_files.change(
|
176 |
-
|
177 |
-
inputs=[uploaded_files, data],
|
178 |
-
outputs=[dataset, buildb],
|
179 |
)
|
180 |
|
181 |
ask.click(
|
182 |
-
|
183 |
-
inputs=[query],
|
184 |
-
outputs=answer,
|
185 |
)
|
186 |
|
187 |
|
188 |
demo.queue(concurrency_count=20)
|
189 |
-
demo.launch(show_error=True)
|
|
|
6 |
import pandas as pd
|
7 |
from IPython.display import Markdown, display
|
8 |
from langchain.document_loaders import PyPDFLoader
|
9 |
+
from langchain.embeddings import OpenAIEmbeddings
|
10 |
from langchain.indexes import VectorstoreIndexCreator
|
11 |
+
from langchain.llms import OpenAI
|
12 |
from langchain.vectorstores import DocArrayInMemorySearch
|
13 |
+
from uuid import uuid4
|
|
|
14 |
|
15 |
css_style = """
|
16 |
.gradio-container {
|
|
|
25 |
self.valid_key = False
|
26 |
self.docs_ready = False
|
27 |
self.status = "⚠️Waiting for documents and key⚠️"
|
28 |
+
self.uuid = uuid4()
|
29 |
pass
|
30 |
|
31 |
def check_status(self):
|
|
|
44 |
assert isinstance(myin, str)
|
45 |
self.valid_key = True
|
46 |
self.openai_api_key = myin.strip()
|
47 |
+
self.embedding = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
|
48 |
+
self.llm = OpenAI(openai_api_key=self.openai_api_key)
|
49 |
+
|
50 |
self.check_status()
|
51 |
+
return [self.status]
|
52 |
|
53 |
def request_pathname(self, files, data):
|
54 |
if files is None:
|
|
|
82 |
|
83 |
def get_index(self):
|
84 |
if self.docs_ready and self.valid_key:
|
85 |
+
# os.environ["OPENAI_API_KEY"] = self.openai_api_key
|
|
|
|
|
86 |
|
87 |
# myfile = "Angela Merkel - Wikipedia.pdf"
|
88 |
# loader = PyPDFLoader(file_path=myfile)
|
89 |
+
loaders = [PyPDFLoader(f) for f in self.dataset["filepath"]]
|
90 |
|
91 |
self.index = VectorstoreIndexCreator(
|
92 |
+
vectorstore_cls=DocArrayInMemorySearch, embedding=self.embedding
|
93 |
+
).from_loaders(loaders=loaders)
|
94 |
+
|
95 |
+
# del os.environ["OPENAI_API_KEY"]
|
96 |
|
97 |
pass
|
98 |
|
99 |
def do_ask(self, question):
|
100 |
# os.environ["OPENAI_API_KEY"] = self.openai_api_key
|
101 |
+
# openai.api_key = self.openai_api_key
|
102 |
+
|
103 |
if self.status == "✨Ready✨":
|
104 |
+
# os.environ["OPENAI_API_KEY"] = self.openai_api_key
|
105 |
+
|
106 |
+
response = self.index.query(question=question, llm=self.llm)
|
107 |
+
# del os.environ["OPENAI_API_KEY"]
|
|
|
108 |
yield response
|
109 |
pass
|
110 |
|
111 |
|
112 |
+
def validate_key(myInstance: myClass, openai_api_key):
|
113 |
+
if myInstance is None:
|
114 |
+
myInstance = myClass()
|
115 |
+
|
116 |
+
out = myInstance.validate_key(openai_api_key)
|
117 |
+
return myInstance, *out
|
118 |
+
|
119 |
+
|
120 |
+
def request_pathname(myInstance: myClass, files, data):
|
121 |
+
if myInstance is None:
|
122 |
+
myInstance = myClass()
|
123 |
+
out = myInstance.request_pathname(files, data)
|
124 |
+
return myInstance, *out
|
125 |
+
|
126 |
+
|
127 |
+
def do_ask(myInstance: myClass, question):
|
128 |
+
out = myInstance.do_ask(question)
|
129 |
+
return myInstance, *out
|
130 |
+
|
131 |
+
|
132 |
with gr.Blocks(css=css_style) as demo:
|
133 |
+
myInstance = gr.State()
|
134 |
+
openai_api_key = gr.State("")
|
135 |
docs = gr.State()
|
136 |
data = gr.State([])
|
|
|
137 |
index = gr.State()
|
|
|
|
|
138 |
|
139 |
gr.Markdown(
|
140 |
"""
|
141 |
# Document Question and Answer
|
|
|
142 |
*By D8a.ai*
|
|
|
143 |
Idea based on https://huggingface.co/spaces/whitead/paper-qa
|
|
|
144 |
Significant advances in langchain have made it possible to simplify the code.
|
|
|
145 |
This tool allows you to ask questions of your uploaded text, PDF documents.
|
|
|
146 |
It uses OpenAI's GPT models, so you need to enter your API key below. This
|
147 |
tool is under active development and currently uses a lot of tokens - up to 10,000
|
148 |
for a single query. This is $0.10-0.20 per query, so please be careful!
|
|
|
149 |
* [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
|
150 |
1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
|
151 |
2. Upload your documents
|
|
|
185 |
answer = gr.Markdown(label="Answer")
|
186 |
|
187 |
openai_api_key.change(
|
188 |
+
validate_key, inputs=[myInstance, openai_api_key], outputs=[myInstance, buildb]
|
189 |
)
|
190 |
|
191 |
uploaded_files.change(
|
192 |
+
request_pathname,
|
193 |
+
inputs=[myInstance, uploaded_files, data],
|
194 |
+
outputs=[myInstance, dataset, buildb],
|
195 |
)
|
196 |
|
197 |
ask.click(
|
198 |
+
do_ask,
|
199 |
+
inputs=[myInstance, query],
|
200 |
+
outputs=[myInstance, answer],
|
201 |
)
|
202 |
|
203 |
|
204 |
demo.queue(concurrency_count=20)
|
205 |
+
demo.launch(show_error=True)
|