ivyblossom commited on
Commit
12978ef
·
1 Parent(s): 795f458

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -4,41 +4,43 @@ from transformers import pipeline
4
  from PyPDF2 import PdfReader
5
 
6
  # Function to perform question-answering
7
- def question_answering(question, pdf_path):
8
- pdf_reader = PdfReader(pdf_path)
9
- pdf_text_with_pages = []
10
-
11
- for page_num, pdf_page in enumerate(pdf_reader.pages, start=1):
12
- pdf_text = pdf_page.extract_text()
13
- pdf_text_with_pages.append((page_num, pdf_text))
14
-
15
- pdf_text = "\n".join([text for _, text in pdf_text_with_pages])
16
-
17
  # Perform question-answering using Hugging Face's Transformers
18
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
19
- answer = question_answerer(question=question, context=pdf_text)
20
 
21
- return answer
22
 
23
  def main():
24
- st.title("Question Answering on a PDF File")
25
 
26
  uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
27
  question = st.text_input("Ask your question:")
28
-
29
  if st.button("Answer") and uploaded_file is not None:
30
  pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
31
  with open(pdf_path, "wb") as f:
32
  f.write(uploaded_file.read())
33
 
34
- answer = question_answering(question, pdf_path)
 
 
 
 
 
 
 
 
35
 
36
  # Delete the uploaded file after processing
37
  os.remove(pdf_path)
38
 
39
- st.write(f"Question: '{question}'")
40
- st.write("Answer:", answer['answer'])
41
- st.write("Score:", answer['score'])
 
 
42
 
43
  if __name__ == "__main__":
44
  main()
 
4
  from PyPDF2 import PdfReader
5
 
6
  # Function to perform question-answering
7
+ @st.cache(show_spinner=False)
8
+ def question_answering(questions, pdf_text):
 
 
 
 
 
 
 
 
9
  # Perform question-answering using Hugging Face's Transformers
10
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
11
+ answers = question_answerer(question=questions, context=pdf_text)
12
 
13
+ return answers
14
 
15
  def main():
16
+ st.title("Question Answering on PDF Files")
17
 
18
  uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
19
  question = st.text_input("Ask your question:")
20
+
21
  if st.button("Answer") and uploaded_file is not None:
22
  pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
23
  with open(pdf_path, "wb") as f:
24
  f.write(uploaded_file.read())
25
 
26
+ # Read PDF text once and cache it for batch processing
27
+ pdf_reader = PdfReader(pdf_path)
28
+ pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
29
+
30
+ # Get a list of questions (assuming the user enters multiple questions separated by newlines)
31
+ questions = question.split('\n')
32
+
33
+ # Perform question-answering in batches
34
+ answers = question_answering(questions, pdf_text)
35
 
36
  # Delete the uploaded file after processing
37
  os.remove(pdf_path)
38
 
39
+ st.write("Questions and Answers:")
40
+ for i, (question, answer) in enumerate(zip(questions, answers)):
41
+ st.write(f"Question {i + 1}: '{question}'")
42
+ st.write("Answer:", answer['answer'])
43
+ st.write("Score:", answer['score'])
44
 
45
  if __name__ == "__main__":
46
  main()