Spaces:

ivyblossom
/

question-answering

Running

ivyblossom commited on Aug 10, 2023

Commit

6d20f5a

1 Parent(s): ed5b1fa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,15 +1,28 @@
 import os
 import streamlit as st
-from transformers import pipeline
 from PyPDF2 import PdfReader
 import tempfile
 # Function to perform question-answering
 @st.cache_data(show_spinner=False)
-def question_answering(questions, pdf_text):
-    # Perform question-answering using Hugging Face's Transformers
-    question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
-    answers = question_answerer(question=questions, context=pdf_text)
     return answers
@@ -30,8 +43,8 @@ def main():
         pdf_reader = PdfReader(pdf_path)
         pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
-        # Perform question-answering in batches
-        answers = question_answering(questions, pdf_text)
         st.write("Questions and Answers:")
         for i, (question, answer) in enumerate(zip(questions, answers)):
@@ -40,4 +53,4 @@ def main():
             st.write("Score:", answer['score'])
 if __name__ == "__main__":
-    main()

 import os
 import streamlit as st
+from transformers import BertTokenizer, BertForQuestionAnswering, pipeline
 from PyPDF2 import PdfReader
 import tempfile
 # Function to perform question-answering
 @st.cache_data(show_spinner=False)
+def question_answering_bert(questions, pdf_text):
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
+    answers = []
+    for question in questions:
+        inputs = tokenizer(question, pdf_text, padding=True, return_tensors='pt')
+        outputs = model(**inputs)
+        start_scores = outputs.start_logits
+        end_scores = outputs.end_logits
+        start_index = start_scores.argmax()
+        end_index = end_scores.argmax() + 1
+        answer = tokenizer.decode(inputs['input_ids'][0][start_index:end_index])
+        answers.append({"answer": answer, "score": start_scores.max().item() + end_scores.max().item()})
     return answers
         pdf_reader = PdfReader(pdf_path)
         pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
+        # Perform question-answering using BERT model
+        answers = question_answering_bert(questions, pdf_text)
         st.write("Questions and Answers:")
         for i, (question, answer) in enumerate(zip(questions, answers)):
             st.write("Score:", answer['score'])
 if __name__ == "__main__":
+    main()