Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,15 +6,18 @@ import tensorflow_hub as hub
|
|
6 |
import openai
|
7 |
import gradio as gr
|
8 |
import os
|
|
|
9 |
from sklearn.neighbors import NearestNeighbors
|
10 |
|
11 |
openai.api_key = os.getenv('OpenAPI')
|
12 |
|
13 |
-
|
14 |
-
|
15 |
def download_pdf(url, output_path):
|
16 |
urllib.request.urlretrieve(url, output_path)
|
17 |
|
|
|
|
|
|
|
|
|
18 |
def preprocess(text):
|
19 |
text = text.replace('\n', ' ')
|
20 |
text = re.sub('\s+', ' ', text)
|
@@ -53,7 +56,6 @@ def text_to_chunks(texts, word_length=150, start_page=1):
|
|
53 |
chunks.append(chunk)
|
54 |
return chunks
|
55 |
|
56 |
-
|
57 |
class SemanticSearch:
|
58 |
|
59 |
def __init__(self):
|
@@ -88,21 +90,26 @@ class SemanticSearch:
|
|
88 |
|
89 |
recommender = SemanticSearch()
|
90 |
|
91 |
-
def load_recommender(
|
92 |
global recommender
|
93 |
-
|
94 |
-
|
|
|
|
|
95 |
recommender.fit(chunks)
|
96 |
return 'Corpus Loaded.'
|
97 |
-
|
98 |
def generate_text(prompt, engine="davinci"):
|
|
|
|
|
|
|
99 |
completions = openai.Completion.create(
|
100 |
engine=engine,
|
101 |
prompt=prompt,
|
102 |
max_tokens=512,
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
)
|
107 |
message = completions.choices[0].text
|
108 |
return message
|
@@ -124,23 +131,23 @@ def generate_answer(question):
|
|
124 |
answer = generate_text(prompt, "davinci")
|
125 |
return answer
|
126 |
|
127 |
-
def question_answer(
|
128 |
-
if
|
129 |
-
return '[ERROR]: Both
|
130 |
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
133 |
|
134 |
-
if
|
135 |
-
|
136 |
-
|
|
|
137 |
|
138 |
-
|
139 |
-
old_file_name = file.name
|
140 |
-
file_name = file.name
|
141 |
-
file_name = file_name[:-12] + file_name[-4:]
|
142 |
-
os.rename(old_file_name, file_name)
|
143 |
-
load_recommender(file_name)
|
144 |
|
145 |
if question.strip() == '':
|
146 |
return '[ERROR]: Question field is empty'
|
@@ -153,8 +160,8 @@ description = """ PDF GPT allows you to chat with your PDF file using Universal
|
|
153 |
iface = gr.Interface(
|
154 |
fn=question_answer,
|
155 |
inputs=[
|
156 |
-
gr.inputs.Textbox(label="Enter PDF
|
157 |
-
gr.inputs.File(label="Upload PDF
|
158 |
gr.inputs.Textbox(label="Enter your question here"),
|
159 |
],
|
160 |
outputs=gr.outputs.Textbox(label="Generated Answer"),
|
|
|
6 |
import openai
|
7 |
import gradio as gr
|
8 |
import os
|
9 |
+
import zipfile
|
10 |
from sklearn.neighbors import NearestNeighbors
|
11 |
|
12 |
openai.api_key = os.getenv('OpenAPI')
|
13 |
|
|
|
|
|
14 |
def download_pdf(url, output_path):
|
15 |
urllib.request.urlretrieve(url, output_path)
|
16 |
|
17 |
+
def extract_zip(file):
|
18 |
+
with zipfile.ZipFile(file, 'r') as zip_ref:
|
19 |
+
zip_ref.extractall('pdfs')
|
20 |
+
|
21 |
def preprocess(text):
|
22 |
text = text.replace('\n', ' ')
|
23 |
text = re.sub('\s+', ' ', text)
|
|
|
56 |
chunks.append(chunk)
|
57 |
return chunks
|
58 |
|
|
|
59 |
class SemanticSearch:
|
60 |
|
61 |
def __init__(self):
|
|
|
90 |
|
91 |
recommender = SemanticSearch()
|
92 |
|
93 |
+
def load_recommender(paths, start_page=1):
|
94 |
global recommender
|
95 |
+
chunks = []
|
96 |
+
for path in paths:
|
97 |
+
texts = pdf_to_text(path, start_page=start_page)
|
98 |
+
chunks += text_to_chunks(texts, start_page=start_page)
|
99 |
recommender.fit(chunks)
|
100 |
return 'Corpus Loaded.'
|
101 |
+
|
102 |
def generate_text(prompt, engine="davinci"):
|
103 |
+
completions = openApologies for the cut-off. Here's the rest of the code:
|
104 |
+
|
105 |
+
```python
|
106 |
completions = openai.Completion.create(
|
107 |
engine=engine,
|
108 |
prompt=prompt,
|
109 |
max_tokens=512,
|
110 |
+
n=1,
|
111 |
+
stop=None,
|
112 |
+
temperature=0.7,
|
113 |
)
|
114 |
message = completions.choices[0].text
|
115 |
return message
|
|
|
131 |
answer = generate_text(prompt, "davinci")
|
132 |
return answer
|
133 |
|
134 |
+
def question_answer(urls, file, question):
|
135 |
+
if urls.strip() == '' and file is None:
|
136 |
+
return '[ERROR]: Both URLs and PDFs are empty. Provide at least one.'
|
137 |
|
138 |
+
paths = []
|
139 |
+
if urls.strip() != '':
|
140 |
+
urls = urls.split(',') # split the URLs string into a list of URLs
|
141 |
+
for url in urls:
|
142 |
+
download_pdf(url.strip(), 'corpus.pdf')
|
143 |
+
paths.append('corpus.pdf')
|
144 |
|
145 |
+
if file is not None:
|
146 |
+
extract_zip(file.name) # extract the PDFs from the zip file
|
147 |
+
for pdf_file in os.listdir('pdfs'):
|
148 |
+
paths.append(os.path.join('pdfs', pdf_file))
|
149 |
|
150 |
+
load_recommender(paths)
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
if question.strip() == '':
|
153 |
return '[ERROR]: Question field is empty'
|
|
|
160 |
iface = gr.Interface(
|
161 |
fn=question_answer,
|
162 |
inputs=[
|
163 |
+
gr.inputs.Textbox(label="Enter PDF URLs here, separated by commas"),
|
164 |
+
gr.inputs.File(label="Upload a zip file containing PDF files"),
|
165 |
gr.inputs.Textbox(label="Enter your question here"),
|
166 |
],
|
167 |
outputs=gr.outputs.Textbox(label="Generated Answer"),
|