Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,16 +8,16 @@ import gradio as gr
|
|
8 |
import os
|
9 |
from sklearn.neighbors import NearestNeighbors
|
10 |
|
|
|
|
|
11 |
def download_pdf(url, output_path):
|
12 |
urllib.request.urlretrieve(url, output_path)
|
13 |
|
14 |
-
|
15 |
def preprocess(text):
|
16 |
text = text.replace('\n', ' ')
|
17 |
text = re.sub('\s+', ' ', text)
|
18 |
return text
|
19 |
|
20 |
-
|
21 |
def pdf_to_text(path, start_page=1, end_page=None):
|
22 |
doc = fitz.open(path)
|
23 |
total_pages = doc.page_count
|
@@ -35,10 +35,8 @@ def pdf_to_text(path, start_page=1, end_page=None):
|
|
35 |
doc.close()
|
36 |
return text_list
|
37 |
|
38 |
-
|
39 |
def text_to_chunks(texts, word_length=150, start_page=1):
|
40 |
text_toks = [t.split(' ') for t in texts]
|
41 |
-
page_nums = []
|
42 |
chunks = []
|
43 |
|
44 |
for idx, words in enumerate(text_toks):
|
@@ -93,23 +91,21 @@ def load_recommender(path, start_page=1):
|
|
93 |
chunks = text_to_chunks(texts, start_page=start_page)
|
94 |
recommender.fit(chunks)
|
95 |
return 'Corpus Loaded.'
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
openai.api_key = openAI_key
|
100 |
completions = openai.Completion.create(
|
101 |
engine=engine,
|
102 |
prompt=prompt,
|
103 |
max_tokens=512,
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
)
|
108 |
message = completions.choices[0].text
|
109 |
return message
|
110 |
|
111 |
-
|
112 |
-
def generate_answer(question, openAI_key):
|
113 |
topn_chunks = recommender(question)
|
114 |
prompt = ""
|
115 |
prompt += 'search results:\n\n'
|
@@ -126,13 +122,10 @@ def generate_answer(question, openAI_key):
|
|
126 |
"answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
|
127 |
|
128 |
prompt += f"Query: {question}\nAnswer:"
|
129 |
-
answer = generate_text(
|
130 |
return answer
|
131 |
|
132 |
-
|
133 |
-
def question_answer(url, file, question, openAI_key):
|
134 |
-
if openAI_key.strip() == '':
|
135 |
-
return '[ERROR]: Please enter your Open AI Key. Get your key here: https://platform.openai.com/account/api-keys'
|
136 |
if url.strip() == '' and file is None:
|
137 |
return '[ERROR]: Both URL and PDF are empty. Provide at least one.'
|
138 |
|
@@ -153,14 +146,22 @@ def question_answer(url, file, question, openAI_key):
|
|
153 |
if question.strip() == '':
|
154 |
return '[ERROR]: Question field is empty'
|
155 |
|
156 |
-
return generate_answer(question
|
157 |
-
|
158 |
|
159 |
title = 'PDF GPT'
|
160 |
description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
|
161 |
|
162 |
-
iface = gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
iface.launch()
|
164 |
|
165 |
|
166 |
-
|
|
|
8 |
import os
|
9 |
from sklearn.neighbors import NearestNeighbors
|
10 |
|
11 |
+
openai.api_key = openAI
|
12 |
+
|
13 |
def download_pdf(url, output_path):
|
14 |
urllib.request.urlretrieve(url, output_path)
|
15 |
|
|
|
16 |
def preprocess(text):
|
17 |
text = text.replace('\n', ' ')
|
18 |
text = re.sub('\s+', ' ', text)
|
19 |
return text
|
20 |
|
|
|
21 |
def pdf_to_text(path, start_page=1, end_page=None):
|
22 |
doc = fitz.open(path)
|
23 |
total_pages = doc.page_count
|
|
|
35 |
doc.close()
|
36 |
return text_list
|
37 |
|
|
|
38 |
def text_to_chunks(texts, word_length=150, start_page=1):
|
39 |
text_toks = [t.split(' ') for t in texts]
|
|
|
40 |
chunks = []
|
41 |
|
42 |
for idx, words in enumerate(text_toks):
|
|
|
91 |
chunks = text_to_chunks(texts, start_page=start_page)
|
92 |
recommender.fit(chunks)
|
93 |
return 'Corpus Loaded.'
|
94 |
+
|
95 |
+
def generate_text(prompt, engine="davinci"):
|
96 |
+
openai.api_key = openAI
|
|
|
97 |
completions = openai.Completion.create(
|
98 |
engine=engine,
|
99 |
prompt=prompt,
|
100 |
max_tokens=512,
|
101 |
+
n=1,
|
102 |
+
stop=None,
|
103 |
+
temperature=0.7,
|
104 |
)
|
105 |
message = completions.choices[0].text
|
106 |
return message
|
107 |
|
108 |
+
def generate_answer(question):
|
|
|
109 |
topn_chunks = recommender(question)
|
110 |
prompt = ""
|
111 |
prompt += 'search results:\n\n'
|
|
|
122 |
"answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
|
123 |
|
124 |
prompt += f"Query: {question}\nAnswer:"
|
125 |
+
answer = generate_text(prompt, "davinci")
|
126 |
return answer
|
127 |
|
128 |
+
def question_answer(url, file, question):
|
|
|
|
|
|
|
129 |
if url.strip() == '' and file is None:
|
130 |
return '[ERROR]: Both URL and PDF are empty. Provide at least one.'
|
131 |
|
|
|
146 |
if question.strip() == '':
|
147 |
return '[ERROR]: Question field is empty'
|
148 |
|
149 |
+
return generate_answer(question)
|
|
|
150 |
|
151 |
title = 'PDF GPT'
|
152 |
description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
|
153 |
|
154 |
+
iface = gr.Interface(
|
155 |
+
fn=question_answer,
|
156 |
+
inputs=[
|
157 |
+
gr.inputs.Textbox(label="Enter PDF URL here"),
|
158 |
+
gr.inputs.File(label="Upload PDF file"),
|
159 |
+
gr.inputs.Textbox(label="Enter your question here"),
|
160 |
+
],
|
161 |
+
outputs=gr.outputs.Textbox(label="Generated Answer"),
|
162 |
+
title=title,
|
163 |
+
description=description
|
164 |
+
)
|
165 |
iface.launch()
|
166 |
|
167 |
|
|