ivyblossom commited on
Commit
6d20f5a
·
1 Parent(s): ed5b1fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -8
app.py CHANGED
@@ -1,15 +1,28 @@
1
  import os
2
  import streamlit as st
3
- from transformers import pipeline
4
  from PyPDF2 import PdfReader
5
  import tempfile
6
 
7
  # Function to perform question-answering
8
  @st.cache_data(show_spinner=False)
9
- def question_answering(questions, pdf_text):
10
- # Perform question-answering using Hugging Face's Transformers
11
- question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
12
- answers = question_answerer(question=questions, context=pdf_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  return answers
15
 
@@ -30,8 +43,8 @@ def main():
30
  pdf_reader = PdfReader(pdf_path)
31
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
32
 
33
- # Perform question-answering in batches
34
- answers = question_answering(questions, pdf_text)
35
 
36
  st.write("Questions and Answers:")
37
  for i, (question, answer) in enumerate(zip(questions, answers)):
@@ -40,4 +53,4 @@ def main():
40
  st.write("Score:", answer['score'])
41
 
42
  if __name__ == "__main__":
43
- main()
 
1
  import os
2
  import streamlit as st
3
+ from transformers import BertTokenizer, BertForQuestionAnswering, pipeline
4
  from PyPDF2 import PdfReader
5
  import tempfile
6
 
7
  # Function to perform question-answering
8
  @st.cache_data(show_spinner=False)
9
+ def question_answering_bert(questions, pdf_text):
10
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
11
+ model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
12
+
13
+ answers = []
14
+
15
+ for question in questions:
16
+ inputs = tokenizer(question, pdf_text, padding=True, return_tensors='pt')
17
+ outputs = model(**inputs)
18
+ start_scores = outputs.start_logits
19
+ end_scores = outputs.end_logits
20
+
21
+ start_index = start_scores.argmax()
22
+ end_index = end_scores.argmax() + 1
23
+
24
+ answer = tokenizer.decode(inputs['input_ids'][0][start_index:end_index])
25
+ answers.append({"answer": answer, "score": start_scores.max().item() + end_scores.max().item()})
26
 
27
  return answers
28
 
 
43
  pdf_reader = PdfReader(pdf_path)
44
  pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
45
 
46
+ # Perform question-answering using BERT model
47
+ answers = question_answering_bert(questions, pdf_text)
48
 
49
  st.write("Questions and Answers:")
50
  for i, (question, answer) in enumerate(zip(questions, answers)):
 
53
  st.write("Score:", answer['score'])
54
 
55
  if __name__ == "__main__":
56
+ main()