cogcorp commited on
Commit
ba952d1
·
1 Parent(s): 75ce576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -8,16 +8,16 @@ import gradio as gr
8
  import os
9
  from sklearn.neighbors import NearestNeighbors
10
 
 
 
11
  def download_pdf(url, output_path):
12
  urllib.request.urlretrieve(url, output_path)
13
 
14
-
15
  def preprocess(text):
16
  text = text.replace('\n', ' ')
17
  text = re.sub('\s+', ' ', text)
18
  return text
19
 
20
-
21
  def pdf_to_text(path, start_page=1, end_page=None):
22
  doc = fitz.open(path)
23
  total_pages = doc.page_count
@@ -35,10 +35,8 @@ def pdf_to_text(path, start_page=1, end_page=None):
35
  doc.close()
36
  return text_list
37
 
38
-
39
  def text_to_chunks(texts, word_length=150, start_page=1):
40
  text_toks = [t.split(' ') for t in texts]
41
- page_nums = []
42
  chunks = []
43
 
44
  for idx, words in enumerate(text_toks):
@@ -93,23 +91,21 @@ def load_recommender(path, start_page=1):
93
  chunks = text_to_chunks(texts, start_page=start_page)
94
  recommender.fit(chunks)
95
  return 'Corpus Loaded.'
96
-
97
-
98
- def generate_text(openAI_key, prompt, engine="davinci"):
99
- openai.api_key = openAI_key
100
  completions = openai.Completion.create(
101
  engine=engine,
102
  prompt=prompt,
103
  max_tokens=512,
104
- n=1,
105
- stop=None,
106
- temperature=0.7,
107
  )
108
  message = completions.choices[0].text
109
  return message
110
 
111
-
112
- def generate_answer(question, openAI_key):
113
  topn_chunks = recommender(question)
114
  prompt = ""
115
  prompt += 'search results:\n\n'
@@ -126,13 +122,10 @@ def generate_answer(question, openAI_key):
126
  "answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
127
 
128
  prompt += f"Query: {question}\nAnswer:"
129
- answer = generate_text(openAI_key, prompt, "davinci")
130
  return answer
131
 
132
-
133
- def question_answer(url, file, question, openAI_key):
134
- if openAI_key.strip() == '':
135
- return '[ERROR]: Please enter your Open AI Key. Get your key here: https://platform.openai.com/account/api-keys'
136
  if url.strip() == '' and file is None:
137
  return '[ERROR]: Both URL and PDF are empty. Provide at least one.'
138
 
@@ -153,14 +146,22 @@ def question_answer(url, file, question, openAI_key):
153
  if question.strip() == '':
154
  return '[ERROR]: Question field is empty'
155
 
156
- return generate_answer(question, openAI_key)
157
-
158
 
159
  title = 'PDF GPT'
160
  description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
161
 
162
- iface = gr.Interface(fn=question_answer, inputs=[gr.inputs.Textbox(label="Enter PDF URL here"), file, question, openAI_key], outputs=[answer], title=title, description=description)
 
 
 
 
 
 
 
 
 
 
163
  iface.launch()
164
 
165
 
166
-
 
8
  import os
9
  from sklearn.neighbors import NearestNeighbors
10
 
11
+ openai.api_key = openAI
12
+
13
  def download_pdf(url, output_path):
14
  urllib.request.urlretrieve(url, output_path)
15
 
 
16
  def preprocess(text):
17
  text = text.replace('\n', ' ')
18
  text = re.sub('\s+', ' ', text)
19
  return text
20
 
 
21
  def pdf_to_text(path, start_page=1, end_page=None):
22
  doc = fitz.open(path)
23
  total_pages = doc.page_count
 
35
  doc.close()
36
  return text_list
37
 
 
38
  def text_to_chunks(texts, word_length=150, start_page=1):
39
  text_toks = [t.split(' ') for t in texts]
 
40
  chunks = []
41
 
42
  for idx, words in enumerate(text_toks):
 
91
  chunks = text_to_chunks(texts, start_page=start_page)
92
  recommender.fit(chunks)
93
  return 'Corpus Loaded.'
94
+
95
+ def generate_text(prompt, engine="davinci"):
96
+ openai.api_key = openAI
 
97
  completions = openai.Completion.create(
98
  engine=engine,
99
  prompt=prompt,
100
  max_tokens=512,
101
+ n=1,
102
+ stop=None,
103
+ temperature=0.7,
104
  )
105
  message = completions.choices[0].text
106
  return message
107
 
108
+ def generate_answer(question):
 
109
  topn_chunks = recommender(question)
110
  prompt = ""
111
  prompt += 'search results:\n\n'
 
122
  "answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
123
 
124
  prompt += f"Query: {question}\nAnswer:"
125
+ answer = generate_text(prompt, "davinci")
126
  return answer
127
 
128
+ def question_answer(url, file, question):
 
 
 
129
  if url.strip() == '' and file is None:
130
  return '[ERROR]: Both URL and PDF are empty. Provide at least one.'
131
 
 
146
  if question.strip() == '':
147
  return '[ERROR]: Question field is empty'
148
 
149
+ return generate_answer(question)
 
150
 
151
  title = 'PDF GPT'
152
  description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
153
 
154
+ iface = gr.Interface(
155
+ fn=question_answer,
156
+ inputs=[
157
+ gr.inputs.Textbox(label="Enter PDF URL here"),
158
+ gr.inputs.File(label="Upload PDF file"),
159
+ gr.inputs.Textbox(label="Enter your question here"),
160
+ ],
161
+ outputs=gr.outputs.Textbox(label="Generated Answer"),
162
+ title=title,
163
+ description=description
164
+ )
165
  iface.launch()
166
 
167