Spaces:

kxx-kkk
/

FYP_Essay_QASystem

Sleeping

App Files Files Community

kxx-kkk commited on Apr 10, 2024

Commit

505522b

verified ·

1 Parent(s): 2b5f59d

Upload app.py

Browse files

Files changed (1) hide show

app.py +55 -55

app.py CHANGED Viewed

@@ -30,63 +30,63 @@ def question_model():
     question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer, handle_impossible_answer=True)
     return question_answerer
-# get the answer by passing the context & question to the model
-def question_answering(context, question):
-    with st.spinner(text="Loading question model..."):
-        question_answerer = question_model()
-    with st.spinner(text="Getting answer..."):
-        answer = question_answerer(context=context, question=question)
-        print(answer)
-        answer_score = str(answer["score"])
-        answer = answer["answer"]
-        if (answer==""):
-            answer = "CANNOT ANSWER"
-        # display the result in container
-        container = st.container(border=True)
-        container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
 # def question_answering(context, question):
 #     with st.spinner(text="Loading question model..."):
 #         question_answerer = question_model()
 #     with st.spinner(text="Getting answer..."):
-#         segment_size = 45000
-#         overlap_size = 50
-#         text_length = len(context)
-#         segments = []
-#         # Split context into segments
-#         for i in range(0, text_length, segment_size - overlap_size):
-#             segment_start = i
-#             segment_end = i + segment_size
-#             segment = context[segment_start:segment_end]
-#             segments.append(segment)
-#         answers = {}  # Dictionary to store answers for each segment
-#         # Get answers for each segment
-#         for i, segment in enumerate(segments):
-#             answer = question_answerer(context=segment, question=question)
-#             answers[i] = answer
-#         # Find the answer with the highest score
-#         highest_score = -1
-#         highest_answer = None
-#         for segment_index, answer in answers.items():
-#             print(answer)
-#             score = answer["score"]
-#             if score > highest_score:
-#                 highest_score = score
-#                 highest_answer = answer
-#         if highest_answer is not None:
-#             answer = highest_answer["answer"]
-#             answer_score = str(highest_answer["score"])
-#             # Display the result in container
-#             container = st.container(border=True)
-#             container.write("<h5><b>Answer:</b></h5>" + answer + "<p><small>(F1 score: " + answer_score + ")</small></p><br>",
-#                             unsafe_allow_html=True)
 @st.cache_data(show_spinner=False)
 def extract_text(file_path):
@@ -106,8 +106,8 @@ def extract_text(file_path):
         for i, image in enumerate(images):
             image_text += pytesseract.image_to_string(image)
-        # text = text + image_text
-        text = image_text
         # remove more than one new line
         text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
     return text

     question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer, handle_impossible_answer=True)
     return question_answerer
+# # get the answer by passing the context & question to the model
 # def question_answering(context, question):
 #     with st.spinner(text="Loading question model..."):
 #         question_answerer = question_model()
 #     with st.spinner(text="Getting answer..."):
+#         answer = question_answerer(context=context, question=question)
+#         print(answer)
+#         answer_score = str(answer["score"])
+#         answer = answer["answer"]
+#         if (answer==""):
+#             answer = "CANNOT ANSWER"
+#         # display the result in container
+#         container = st.container(border=True)
+#         container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
+def question_answering(context, question):
+    with st.spinner(text="Loading question model..."):
+        question_answerer = question_model()
+    with st.spinner(text="Getting answer..."):
+        segment_size = 45000
+        overlap_size = 50
+        text_length = len(context)
+        segments = []
+        # Split context into segments
+        for i in range(0, text_length, segment_size - overlap_size):
+            segment_start = i
+            segment_end = i + segment_size
+            segment = context[segment_start:segment_end]
+            segments.append(segment)
+        answers = {}  # Dictionary to store answers for each segment
+        # Get answers for each segment
+        for i, segment in enumerate(segments):
+            answer = question_answerer(context=segment, question=question)
+            answers[i] = answer
+        # Find the answer with the highest score
+        highest_score = -1
+        highest_answer = None
+        for answer in answers.items():
+            print(answer)
+            score = answer["score"]
+            if score > highest_score:
+                highest_score = score
+                highest_answer = answer
+        if highest_answer is not None:
+            answer = highest_answer["answer"]
+            answer_score = str(highest_answer["score"])
+            # Display the result in container
+            container = st.container(border=True)
+            container.write("<h5><b>Answer:</b></h5>" + answer + "<p><small>(F1 score: " + answer_score + ")</small></p><br>",
+                            unsafe_allow_html=True)
 @st.cache_data(show_spinner=False)
 def extract_text(file_path):
         for i, image in enumerate(images):
             image_text += pytesseract.image_to_string(image)
+        text = text + image_text
+        # text = image_text
         # remove more than one new line
         text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
     return text