cogcorp commited on
Commit
48f21f7
·
1 Parent(s): 17d4b41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -26
app.py CHANGED
@@ -6,15 +6,18 @@ import tensorflow_hub as hub
6
  import openai
7
  import gradio as gr
8
  import os
 
9
  from sklearn.neighbors import NearestNeighbors
10
 
11
  openai.api_key = os.getenv('OpenAPI')
12
 
13
-
14
-
15
  def download_pdf(url, output_path):
16
  urllib.request.urlretrieve(url, output_path)
17
 
 
 
 
 
18
  def preprocess(text):
19
  text = text.replace('\n', ' ')
20
  text = re.sub('\s+', ' ', text)
@@ -53,7 +56,6 @@ def text_to_chunks(texts, word_length=150, start_page=1):
53
  chunks.append(chunk)
54
  return chunks
55
 
56
-
57
  class SemanticSearch:
58
 
59
  def __init__(self):
@@ -88,21 +90,26 @@ class SemanticSearch:
88
 
89
  recommender = SemanticSearch()
90
 
91
- def load_recommender(path, start_page=1):
92
  global recommender
93
- texts = pdf_to_text(path, start_page=start_page)
94
- chunks = text_to_chunks(texts, start_page=start_page)
 
 
95
  recommender.fit(chunks)
96
  return 'Corpus Loaded.'
97
-
98
  def generate_text(prompt, engine="davinci"):
 
 
 
99
  completions = openai.Completion.create(
100
  engine=engine,
101
  prompt=prompt,
102
  max_tokens=512,
103
- n=1,
104
- stop=None,
105
- temperature=0.7,
106
  )
107
  message = completions.choices[0].text
108
  return message
@@ -124,23 +131,23 @@ def generate_answer(question):
124
  answer = generate_text(prompt, "davinci")
125
  return answer
126
 
127
- def question_answer(url, file, question):
128
- if url.strip() == '' and file is None:
129
- return '[ERROR]: Both URL and PDF are empty. Provide at least one.'
130
 
131
- if url.strip() != '' and file is not None:
132
- return '[ERROR]: Both URL and PDF are provided. Please provide only one (either URL or PDF).'
 
 
 
 
133
 
134
- if url.strip() != '':
135
- download_pdf(url, 'corpus.pdf')
136
- load_recommender('corpus.pdf')
 
137
 
138
- else:
139
- old_file_name = file.name
140
- file_name = file.name
141
- file_name = file_name[:-12] + file_name[-4:]
142
- os.rename(old_file_name, file_name)
143
- load_recommender(file_name)
144
 
145
  if question.strip() == '':
146
  return '[ERROR]: Question field is empty'
@@ -153,8 +160,8 @@ description = """ PDF GPT allows you to chat with your PDF file using Universal
153
  iface = gr.Interface(
154
  fn=question_answer,
155
  inputs=[
156
- gr.inputs.Textbox(label="Enter PDF URL here"),
157
- gr.inputs.File(label="Upload PDF file"),
158
  gr.inputs.Textbox(label="Enter your question here"),
159
  ],
160
  outputs=gr.outputs.Textbox(label="Generated Answer"),
 
6
  import openai
7
  import gradio as gr
8
  import os
9
+ import zipfile
10
  from sklearn.neighbors import NearestNeighbors
11
 
12
  openai.api_key = os.getenv('OpenAPI')
13
 
 
 
14
  def download_pdf(url, output_path):
15
  urllib.request.urlretrieve(url, output_path)
16
 
17
+ def extract_zip(file):
18
+ with zipfile.ZipFile(file, 'r') as zip_ref:
19
+ zip_ref.extractall('pdfs')
20
+
21
  def preprocess(text):
22
  text = text.replace('\n', ' ')
23
  text = re.sub('\s+', ' ', text)
 
56
  chunks.append(chunk)
57
  return chunks
58
 
 
59
  class SemanticSearch:
60
 
61
  def __init__(self):
 
90
 
91
  recommender = SemanticSearch()
92
 
93
+ def load_recommender(paths, start_page=1):
94
  global recommender
95
+ chunks = []
96
+ for path in paths:
97
+ texts = pdf_to_text(path, start_page=start_page)
98
+ chunks += text_to_chunks(texts, start_page=start_page)
99
  recommender.fit(chunks)
100
  return 'Corpus Loaded.'
101
+
102
  def generate_text(prompt, engine="davinci"):
103
+ completions = openApologies for the cut-off. Here's the rest of the code:
104
+
105
+ ```python
106
  completions = openai.Completion.create(
107
  engine=engine,
108
  prompt=prompt,
109
  max_tokens=512,
110
+ n=1,
111
+ stop=None,
112
+ temperature=0.7,
113
  )
114
  message = completions.choices[0].text
115
  return message
 
131
  answer = generate_text(prompt, "davinci")
132
  return answer
133
 
134
+ def question_answer(urls, file, question):
135
+ if urls.strip() == '' and file is None:
136
+ return '[ERROR]: Both URLs and PDFs are empty. Provide at least one.'
137
 
138
+ paths = []
139
+ if urls.strip() != '':
140
+ urls = urls.split(',') # split the URLs string into a list of URLs
141
+ for url in urls:
142
+ download_pdf(url.strip(), 'corpus.pdf')
143
+ paths.append('corpus.pdf')
144
 
145
+ if file is not None:
146
+ extract_zip(file.name) # extract the PDFs from the zip file
147
+ for pdf_file in os.listdir('pdfs'):
148
+ paths.append(os.path.join('pdfs', pdf_file))
149
 
150
+ load_recommender(paths)
 
 
 
 
 
151
 
152
  if question.strip() == '':
153
  return '[ERROR]: Question field is empty'
 
160
  iface = gr.Interface(
161
  fn=question_answer,
162
  inputs=[
163
+ gr.inputs.Textbox(label="Enter PDF URLs here, separated by commas"),
164
+ gr.inputs.File(label="Upload a zip file containing PDF files"),
165
  gr.inputs.Textbox(label="Enter your question here"),
166
  ],
167
  outputs=gr.outputs.Textbox(label="Generated Answer"),