ivyblossom commited on
Commit
e776a42
·
1 Parent(s): 6d20f5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -20
app.py CHANGED
@@ -1,28 +1,15 @@
1
  import os
2
  import streamlit as st
3
- from transformers import BertTokenizer, BertForQuestionAnswering, pipeline
4
  from PyPDF2 import PdfReader
5
  import tempfile
6
 
7
  # Function to perform question-answering
8
  @st.cache_data(show_spinner=False)
9
- def question_answering_bert(questions, pdf_text):
10
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
11
- model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
12
-
13
- answers = []
14
-
15
- for question in questions:
16
- inputs = tokenizer(question, pdf_text, padding=True, return_tensors='pt')
17
- outputs = model(**inputs)
18
- start_scores = outputs.start_logits
19
- end_scores = outputs.end_logits
20
-
21
- start_index = start_scores.argmax()
22
- end_index = end_scores.argmax() + 1
23
-
24
- answer = tokenizer.decode(inputs['input_ids'][0][start_index:end_index])
25
- answers.append({"answer": answer, "score": start_scores.max().item() + end_scores.max().item()})
26
 
27
  return answers
28
 
@@ -43,8 +30,8 @@ def main():
43
  pdf_reader = PdfReader(pdf_path)
44
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
45
 
46
- # Perform question-answering using BERT model
47
- answers = question_answering_bert(questions, pdf_text)
48
 
49
  st.write("Questions and Answers:")
50
  for i, (question, answer) in enumerate(zip(questions, answers)):
 
1
  import os
2
  import streamlit as st
3
+ from transformers import pipeline
4
  from PyPDF2 import PdfReader
5
  import tempfile
6
 
7
  # Function to perform question-answering
8
  @st.cache_data(show_spinner=False)
9
+ def question_answering(questions, pdf_text):
10
+ # Perform question-answering using Hugging Face's Transformers
11
+ question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
12
+ answers = question_answerer(question=questions, context=pdf_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  return answers
15
 
 
30
  pdf_reader = PdfReader(pdf_path)
31
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
32
 
33
+ # Perform question-answering in batches
34
+ answers = question_answering(questions, pdf_text)
35
 
36
  st.write("Questions and Answers:")
37
  for i, (question, answer) in enumerate(zip(questions, answers)):