Spaces:

ivyblossom
/

question-answering

Running

App Files Files Community

ivyblossom commited on Aug 4, 2023

Commit

12978ef

1 Parent(s): 795f458

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -18

app.py CHANGED Viewed

@@ -4,41 +4,43 @@ from transformers import pipeline
 from PyPDF2 import PdfReader
 # Function to perform question-answering
-def question_answering(question, pdf_path):
-    pdf_reader = PdfReader(pdf_path)
-    pdf_text_with_pages = []
-    for page_num, pdf_page in enumerate(pdf_reader.pages, start=1):
-        pdf_text = pdf_page.extract_text()
-        pdf_text_with_pages.append((page_num, pdf_text))
-    pdf_text = "\n".join([text for _, text in pdf_text_with_pages])
     # Perform question-answering using Hugging Face's Transformers
     question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
-    answer = question_answerer(question=question, context=pdf_text)
-    return answer
 def main():
-    st.title("Question Answering on a PDF File")
     uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     question = st.text_input("Ask your question:")
     if st.button("Answer") and uploaded_file is not None:
         pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
         with open(pdf_path, "wb") as f:
             f.write(uploaded_file.read())
-        answer = question_answering(question, pdf_path)
         # Delete the uploaded file after processing
         os.remove(pdf_path)
-        st.write(f"Question: '{question}'")
-        st.write("Answer:", answer['answer'])
-        st.write("Score:", answer['score'])
 if __name__ == "__main__":
     main()

 from PyPDF2 import PdfReader
 # Function to perform question-answering
+@st.cache(show_spinner=False)
+def question_answering(questions, pdf_text):
     # Perform question-answering using Hugging Face's Transformers
     question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
+    answers = question_answerer(question=questions, context=pdf_text)
+    return answers
 def main():
+    st.title("Question Answering on PDF Files")
     uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     question = st.text_input("Ask your question:")
     if st.button("Answer") and uploaded_file is not None:
         pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
         with open(pdf_path, "wb") as f:
             f.write(uploaded_file.read())
+        # Read PDF text once and cache it for batch processing
+        pdf_reader = PdfReader(pdf_path)
+        pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
+        # Get a list of questions (assuming the user enters multiple questions separated by newlines)
+        questions = question.split('\n')
+        # Perform question-answering in batches
+        answers = question_answering(questions, pdf_text)
         # Delete the uploaded file after processing
         os.remove(pdf_path)
+        st.write("Questions and Answers:")
+        for i, (question, answer) in enumerate(zip(questions, answers)):
+            st.write(f"Question {i + 1}: '{question}'")
+            st.write("Answer:", answer['answer'])
+            st.write("Score:", answer['score'])
 if __name__ == "__main__":
     main()