Spaces:

cogcorp
/

assignment1

Sleeping

App Files Files Community

cogcorp commited on May 24, 2023

Commit

48f21f7

1 Parent(s): 17d4b41

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -26

app.py CHANGED Viewed

@@ -6,15 +6,18 @@ import tensorflow_hub as hub
 import openai
 import gradio as gr
 import os
 from sklearn.neighbors import NearestNeighbors
 openai.api_key = os.getenv('OpenAPI')
 def download_pdf(url, output_path):
     urllib.request.urlretrieve(url, output_path)
 def preprocess(text):
     text = text.replace('\n', ' ')
     text = re.sub('\s+', ' ', text)
@@ -53,7 +56,6 @@ def text_to_chunks(texts, word_length=150, start_page=1):
             chunks.append(chunk)
     return chunks
 class SemanticSearch:
     def __init__(self):
@@ -88,21 +90,26 @@ class SemanticSearch:
 recommender = SemanticSearch()
-def load_recommender(path, start_page=1):
     global recommender
-    texts = pdf_to_text(path, start_page=start_page)
-    chunks = text_to_chunks(texts, start_page=start_page)
     recommender.fit(chunks)
     return 'Corpus Loaded.'
 def generate_text(prompt, engine="davinci"):
     completions = openai.Completion.create(
         engine=engine,
         prompt=prompt,
         max_tokens=512,
-    n=1,
-    stop=None,
-    temperature=0.7,
     )
     message = completions.choices[0].text
     return message
@@ -124,23 +131,23 @@ def generate_answer(question):
     answer = generate_text(prompt, "davinci")
     return answer
-def question_answer(url, file, question):
-    if url.strip() == '' and file is None:
-        return '[ERROR]: Both URL and PDF are empty. Provide at least one.'
-    if url.strip() != '' and file is not None:
-        return '[ERROR]: Both URL and PDF are provided. Please provide only one (either URL or PDF).'
-    if url.strip() != '':
-        download_pdf(url, 'corpus.pdf')
-        load_recommender('corpus.pdf')
-    else:
-        old_file_name = file.name
-        file_name = file.name
-        file_name = file_name[:-12] + file_name[-4:]
-        os.rename(old_file_name, file_name)
-        load_recommender(file_name)
     if question.strip() == '':
         return '[ERROR]: Question field is empty'
@@ -153,8 +160,8 @@ description = """ PDF GPT allows you to chat with your PDF file using Universal
 iface = gr.Interface(
     fn=question_answer,
     inputs=[
-        gr.inputs.Textbox(label="Enter PDF URL here"),
-        gr.inputs.File(label="Upload PDF file"),
         gr.inputs.Textbox(label="Enter your question here"),
     ],
     outputs=gr.outputs.Textbox(label="Generated Answer"),

 import openai
 import gradio as gr
 import os
+import zipfile
 from sklearn.neighbors import NearestNeighbors
 openai.api_key = os.getenv('OpenAPI')
 def download_pdf(url, output_path):
     urllib.request.urlretrieve(url, output_path)
+def extract_zip(file):
+    with zipfile.ZipFile(file, 'r') as zip_ref:
+        zip_ref.extractall('pdfs')
 def preprocess(text):
     text = text.replace('\n', ' ')
     text = re.sub('\s+', ' ', text)
             chunks.append(chunk)
     return chunks
 class SemanticSearch:
     def __init__(self):
 recommender = SemanticSearch()
+def load_recommender(paths, start_page=1):
     global recommender
+    chunks = []
+    for path in paths:
+        texts = pdf_to_text(path, start_page=start_page)
+        chunks += text_to_chunks(texts, start_page=start_page)
     recommender.fit(chunks)
     return 'Corpus Loaded.'
 def generate_text(prompt, engine="davinci"):
+    completions = openApologies for the cut-off. Here's the rest of the code:
+```python
     completions = openai.Completion.create(
         engine=engine,
         prompt=prompt,
         max_tokens=512,
+        n=1,
+        stop=None,
+        temperature=0.7,
     )
     message = completions.choices[0].text
     return message
     answer = generate_text(prompt, "davinci")
     return answer
+def question_answer(urls, file, question):
+    if urls.strip() == '' and file is None:
+        return '[ERROR]: Both URLs and PDFs are empty. Provide at least one.'
+    paths = []
+    if urls.strip() != '':
+        urls = urls.split(',')  # split the URLs string into a list of URLs
+        for url in urls:
+            download_pdf(url.strip(), 'corpus.pdf')
+            paths.append('corpus.pdf')
+    if file is not None:
+        extract_zip(file.name)  # extract the PDFs from the zip file
+        for pdf_file in os.listdir('pdfs'):
+            paths.append(os.path.join('pdfs', pdf_file))
+    load_recommender(paths)
     if question.strip() == '':
         return '[ERROR]: Question field is empty'
 iface = gr.Interface(
     fn=question_answer,
     inputs=[
+        gr.inputs.Textbox(label="Enter PDF URLs here, separated by commas"),
+        gr.inputs.File(label="Upload a zip file containing PDF files"),
         gr.inputs.Textbox(label="Enter your question here"),
     ],
     outputs=gr.outputs.Textbox(label="Generated Answer"),