hfwittmann commited on
Commit
6b50a9a
·
1 Parent(s): 708e7b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -34
app.py CHANGED
@@ -6,10 +6,11 @@ import openai
6
  import pandas as pd
7
  from IPython.display import Markdown, display
8
  from langchain.document_loaders import PyPDFLoader
 
9
  from langchain.indexes import VectorstoreIndexCreator
 
10
  from langchain.vectorstores import DocArrayInMemorySearch
11
- from langchain.embeddings import OpenAIEmbeddings
12
-
13
 
14
  css_style = """
15
  .gradio-container {
@@ -24,6 +25,7 @@ class myClass:
24
  self.valid_key = False
25
  self.docs_ready = False
26
  self.status = "⚠️Waiting for documents and key⚠️"
 
27
  pass
28
 
29
  def check_status(self):
@@ -42,9 +44,11 @@ class myClass:
42
  assert isinstance(myin, str)
43
  self.valid_key = True
44
  self.openai_api_key = myin.strip()
45
-
 
 
46
  self.check_status()
47
- return self.status
48
 
49
  def request_pathname(self, files, data):
50
  if files is None:
@@ -78,58 +82,70 @@ class myClass:
78
 
79
  def get_index(self):
80
  if self.docs_ready and self.valid_key:
81
- # openai = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
82
- os.environ["OPENAI_API_KEY"] = self.openai_api_key
83
-
84
 
85
  # myfile = "Angela Merkel - Wikipedia.pdf"
86
  # loader = PyPDFLoader(file_path=myfile)
87
- loader = PyPDFLoader(file_path=self.dataset["filepath"][0])
88
 
89
  self.index = VectorstoreIndexCreator(
90
- vectorstore_cls=DocArrayInMemorySearch
91
- ).from_loaders([loader])
92
- del os.environ["OPENAI_API_KEY"]
 
93
 
94
  pass
95
 
96
  def do_ask(self, question):
97
  # os.environ["OPENAI_API_KEY"] = self.openai_api_key
98
- # openai.api_key = self.openai_api_key
 
99
  if self.status == "✨Ready✨":
100
- # openai = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
101
- os.environ["OPENAI_API_KEY"] = self.openai_api_key
102
-
103
- response = self.index.query(question=question)
104
- del os.environ["OPENAI_API_KEY"]
105
  yield response
106
  pass
107
 
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  with gr.Blocks(css=css_style) as demo:
 
 
110
  docs = gr.State()
111
  data = gr.State([])
112
- openai_api_key = gr.State("")
113
  index = gr.State()
114
- myInstance = gr.State()
115
- myInstance = myClass()
116
 
117
  gr.Markdown(
118
  """
119
  # Document Question and Answer
120
-
121
  *By D8a.ai*
122
-
123
  Idea based on https://huggingface.co/spaces/whitead/paper-qa
124
-
125
  Significant advances in langchain have made it possible to simplify the code.
126
-
127
  This tool allows you to ask questions of your uploaded text, PDF documents.
128
-
129
  It uses OpenAI's GPT models, so you need to enter your API key below. This
130
  tool is under active development and currently uses a lot of tokens - up to 10,000
131
  for a single query. This is $0.10-0.20 per query, so please be careful!
132
-
133
  * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
134
  1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
135
  2. Upload your documents
@@ -169,21 +185,21 @@ with gr.Blocks(css=css_style) as demo:
169
  answer = gr.Markdown(label="Answer")
170
 
171
  openai_api_key.change(
172
- myInstance.validate_key, inputs=openai_api_key, outputs=buildb
173
  )
174
 
175
  uploaded_files.change(
176
- myInstance.request_pathname,
177
- inputs=[uploaded_files, data],
178
- outputs=[dataset, buildb],
179
  )
180
 
181
  ask.click(
182
- myInstance.do_ask,
183
- inputs=[query],
184
- outputs=answer,
185
  )
186
 
187
 
188
  demo.queue(concurrency_count=20)
189
- demo.launch(show_error=True)
 
6
  import pandas as pd
7
  from IPython.display import Markdown, display
8
  from langchain.document_loaders import PyPDFLoader
9
+ from langchain.embeddings import OpenAIEmbeddings
10
  from langchain.indexes import VectorstoreIndexCreator
11
+ from langchain.llms import OpenAI
12
  from langchain.vectorstores import DocArrayInMemorySearch
13
+ from uuid import uuid4
 
14
 
15
  css_style = """
16
  .gradio-container {
 
25
  self.valid_key = False
26
  self.docs_ready = False
27
  self.status = "⚠️Waiting for documents and key⚠️"
28
+ self.uuid = uuid4()
29
  pass
30
 
31
  def check_status(self):
 
44
  assert isinstance(myin, str)
45
  self.valid_key = True
46
  self.openai_api_key = myin.strip()
47
+ self.embedding = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
48
+ self.llm = OpenAI(openai_api_key=self.openai_api_key)
49
+
50
  self.check_status()
51
+ return [self.status]
52
 
53
  def request_pathname(self, files, data):
54
  if files is None:
 
82
 
83
  def get_index(self):
84
  if self.docs_ready and self.valid_key:
85
+ # os.environ["OPENAI_API_KEY"] = self.openai_api_key
 
 
86
 
87
  # myfile = "Angela Merkel - Wikipedia.pdf"
88
  # loader = PyPDFLoader(file_path=myfile)
89
+ loaders = [PyPDFLoader(f) for f in self.dataset["filepath"]]
90
 
91
  self.index = VectorstoreIndexCreator(
92
+ vectorstore_cls=DocArrayInMemorySearch, embedding=self.embedding
93
+ ).from_loaders(loaders=loaders)
94
+
95
+ # del os.environ["OPENAI_API_KEY"]
96
 
97
  pass
98
 
99
  def do_ask(self, question):
100
  # os.environ["OPENAI_API_KEY"] = self.openai_api_key
101
+ # openai.api_key = self.openai_api_key
102
+
103
  if self.status == "✨Ready✨":
104
+ # os.environ["OPENAI_API_KEY"] = self.openai_api_key
105
+
106
+ response = self.index.query(question=question, llm=self.llm)
107
+ # del os.environ["OPENAI_API_KEY"]
 
108
  yield response
109
  pass
110
 
111
 
112
+ def validate_key(myInstance: myClass, openai_api_key):
113
+ if myInstance is None:
114
+ myInstance = myClass()
115
+
116
+ out = myInstance.validate_key(openai_api_key)
117
+ return myInstance, *out
118
+
119
+
120
+ def request_pathname(myInstance: myClass, files, data):
121
+ if myInstance is None:
122
+ myInstance = myClass()
123
+ out = myInstance.request_pathname(files, data)
124
+ return myInstance, *out
125
+
126
+
127
+ def do_ask(myInstance: myClass, question):
128
+ out = myInstance.do_ask(question)
129
+ return myInstance, *out
130
+
131
+
132
  with gr.Blocks(css=css_style) as demo:
133
+ myInstance = gr.State()
134
+ openai_api_key = gr.State("")
135
  docs = gr.State()
136
  data = gr.State([])
 
137
  index = gr.State()
 
 
138
 
139
  gr.Markdown(
140
  """
141
  # Document Question and Answer
 
142
  *By D8a.ai*
 
143
  Idea based on https://huggingface.co/spaces/whitead/paper-qa
 
144
  Significant advances in langchain have made it possible to simplify the code.
 
145
  This tool allows you to ask questions of your uploaded text, PDF documents.
 
146
  It uses OpenAI's GPT models, so you need to enter your API key below. This
147
  tool is under active development and currently uses a lot of tokens - up to 10,000
148
  for a single query. This is $0.10-0.20 per query, so please be careful!
 
149
  * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
150
  1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
151
  2. Upload your documents
 
185
  answer = gr.Markdown(label="Answer")
186
 
187
  openai_api_key.change(
188
+ validate_key, inputs=[myInstance, openai_api_key], outputs=[myInstance, buildb]
189
  )
190
 
191
  uploaded_files.change(
192
+ request_pathname,
193
+ inputs=[myInstance, uploaded_files, data],
194
+ outputs=[myInstance, dataset, buildb],
195
  )
196
 
197
  ask.click(
198
+ do_ask,
199
+ inputs=[myInstance, query],
200
+ outputs=[myInstance, answer],
201
  )
202
 
203
 
204
  demo.queue(concurrency_count=20)
205
+ demo.launch(show_error=True)