hfwittmann commited on
Commit
23c2d7a
·
1 Parent(s): ae72375

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -213
app.py CHANGED
@@ -1,216 +1,7 @@
1
- import os
2
- from typing import Any
3
-
4
  import gradio as gr
5
- import openai
6
- import pandas as pd
7
- from IPython.display import Markdown, display
8
- from langchain.document_loaders import PyPDFLoader
9
- from langchain.embeddings import OpenAIEmbeddings
10
- from langchain.indexes import VectorstoreIndexCreator
11
- from langchain.text_splitter import CharacterTextSplitter
12
- from langchain.text_splitter import RecursiveCharacterTextSplitter
13
- from langchain.llms import OpenAI
14
- from langchain.vectorstores import DocArrayInMemorySearch
15
- from uuid import uuid4
16
-
17
- css_style = """
18
- .gradio-container {
19
- font-family: "IBM Plex Mono";
20
- }
21
- """
22
-
23
-
24
- class myClass:
25
- def __init__(self) -> None:
26
- self.openapi = ""
27
- self.valid_key = False
28
- self.docs_ready = False
29
- self.status = "⚠️Waiting for documents and key⚠️"
30
- self.uuid = uuid4()
31
- pass
32
-
33
- def check_status(self):
34
- if self.docs_ready and self.valid_key:
35
- out = "✨Ready✨"
36
- elif self.docs_ready:
37
- out = "⚠️Waiting for key⚠️"
38
- elif self.valid_key:
39
- out = "⚠️Waiting for documents⚠️"
40
- else:
41
- out = "⚠️Waiting for documents and key⚠️"
42
-
43
- self.status = out
44
-
45
- def validate_key(self, myin):
46
- assert isinstance(myin, str)
47
- self.valid_key = True
48
- self.openai_api_key = myin.strip()
49
- self.embedding = OpenAIEmbeddings(openai_api_key=self.openai_api_key)
50
- self.llm = OpenAI(openai_api_key=self.openai_api_key)
51
-
52
- self.check_status()
53
- return [self.status]
54
-
55
- def request_pathname(self, files, data):
56
- if files is None:
57
- self.docs_ready = False
58
- self.check_status()
59
- return (
60
- pd.DataFrame(data, columns=["filepath", "citation string", "key"]),
61
- self.status,
62
- )
63
- for file in files:
64
- # make sure we're not duplicating things in the dataset
65
- if file.name in [x[0] for x in data]:
66
- continue
67
- data.append([file.name, None, None])
68
-
69
- mydataset = pd.DataFrame(data, columns=["filepath", "citation string", "key"])
70
- validation_button = self.validate_dataset(mydataset)
71
-
72
- return mydataset, validation_button
73
-
74
- def validate_dataset(self, dataset):
75
- self.docs_ready = dataset.iloc[-1, 0] != ""
76
- self.dataset = dataset
77
-
78
- self.check_status()
79
-
80
- if self.status == "✨Ready✨":
81
- self.get_index()
82
-
83
- return self.status
84
-
85
- def get_index(self):
86
- if self.docs_ready and self.valid_key:
87
- # os.environ["OPENAI_API_KEY"] = self.openai_api_key
88
-
89
- # myfile = "Angela Merkel - Wikipedia.pdf"
90
- # loader = PyPDFLoader(file_path=myfile)
91
- loaders = [PyPDFLoader(f) for f in self.dataset["filepath"]]
92
-
93
- self.index = VectorstoreIndexCreator(
94
- vectorstore_cls=DocArrayInMemorySearch,
95
- embedding=self.embedding,
96
- text_splitter = RecursiveCharacterTextSplitter(
97
- # Set a really small chunk size, just to show.
98
- chunk_size = 1000,
99
- chunk_overlap = 20,
100
- length_function = len,
101
- separators="."
102
- )
103
-
104
- ).from_loaders(loaders=loaders)
105
-
106
- # del os.environ["OPENAI_API_KEY"]
107
-
108
- pass
109
-
110
- def do_ask(self, question):
111
- # os.environ["OPENAI_API_KEY"] = self.openai_api_key
112
- # openai.api_key = self.openai_api_key
113
-
114
- if self.status == "✨Ready✨":
115
- # os.environ["OPENAI_API_KEY"] = self.openai_api_key
116
-
117
- response = self.index.query(question=question, llm=self.llm)
118
- # del os.environ["OPENAI_API_KEY"]
119
- yield response
120
- pass
121
-
122
-
123
- def validate_key(myInstance: myClass, openai_api_key):
124
- if myInstance is None:
125
- myInstance = myClass()
126
-
127
- out = myInstance.validate_key(openai_api_key)
128
- return myInstance, *out
129
-
130
-
131
- def request_pathname(myInstance: myClass, files, data):
132
- if myInstance is None:
133
- myInstance = myClass()
134
- out = myInstance.request_pathname(files, data)
135
- return myInstance, *out
136
-
137
-
138
- def do_ask(myInstance: myClass, question):
139
- out = myInstance.do_ask(question)
140
- return myInstance, *out
141
-
142
-
143
- with gr.Blocks(css=css_style) as demo:
144
- myInstance = gr.State()
145
- openai_api_key = gr.State("")
146
- docs = gr.State()
147
- data = gr.State([])
148
- index = gr.State()
149
-
150
- gr.Markdown(
151
- """
152
- # Document Question and Answer
153
- *By D8a.ai*
154
- Idea based on https://huggingface.co/spaces/whitead/paper-qa
155
- Significant advances in langchain have made it possible to simplify the code.
156
- This tool allows you to ask questions of your uploaded text, PDF documents.
157
- It uses OpenAI's GPT models, so you need to enter your API key below. This
158
- tool is under active development and currently uses a lot of tokens - up to 10,000
159
- for a single query. This is $0.10-0.20 per query, so please be careful!
160
- * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
161
- 1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
162
- 2. Upload your documents
163
- 3. Ask questions
164
- """
165
- )
166
-
167
- openai_api_key = gr.Textbox(
168
- label="OpenAI API Key", placeholder="sk-...", type="password"
169
- )
170
- with gr.Tab("File upload"):
171
- uploaded_files = gr.File(
172
- label="Upload your pdf Dokument", file_count="multiple"
173
- )
174
-
175
- with gr.Accordion("See Docs:", open=False):
176
- dataset = gr.Dataframe(
177
- headers=["filepath", "citation string", "key"],
178
- datatype=["str", "str", "str"],
179
- col_count=(3, "fixed"),
180
- interactive=False,
181
- label="Documents and Citations",
182
- overflow_row_behaviour="paginate",
183
- max_rows=5,
184
- )
185
-
186
- buildb = gr.Textbox(
187
- "⚠️Waiting for documents and key...",
188
- label="Status",
189
- interactive=False,
190
- show_label=True,
191
- max_lines=1,
192
- )
193
-
194
- query = gr.Textbox(placeholder="Enter your question here...", label="Question")
195
- ask = gr.Button("Ask Question")
196
- answer = gr.Markdown(label="Answer")
197
-
198
- openai_api_key.change(
199
- validate_key, inputs=[myInstance, openai_api_key], outputs=[myInstance, buildb]
200
- )
201
-
202
- uploaded_files.change(
203
- request_pathname,
204
- inputs=[myInstance, uploaded_files, data],
205
- outputs=[myInstance, dataset, buildb],
206
- )
207
-
208
- ask.click(
209
- do_ask,
210
- inputs=[myInstance, query],
211
- outputs=[myInstance, answer],
212
- )
213
 
 
 
214
 
215
- demo.queue(concurrency_count=20)
216
- demo.launch(show_error=True)
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
 
6
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ iface.launch()