Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -30,63 +30,63 @@ def question_model():
|
|
30 |
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer, handle_impossible_answer=True)
|
31 |
return question_answerer
|
32 |
|
33 |
-
# get the answer by passing the context & question to the model
|
34 |
-
def question_answering(context, question):
|
35 |
-
with st.spinner(text="Loading question model..."):
|
36 |
-
question_answerer = question_model()
|
37 |
-
with st.spinner(text="Getting answer..."):
|
38 |
-
answer = question_answerer(context=context, question=question)
|
39 |
-
print(answer)
|
40 |
-
answer_score = str(answer["score"])
|
41 |
-
answer = answer["answer"]
|
42 |
-
if (answer==""):
|
43 |
-
answer = "CANNOT ANSWER"
|
44 |
-
|
45 |
-
# display the result in container
|
46 |
-
container = st.container(border=True)
|
47 |
-
container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
|
48 |
-
|
49 |
# def question_answering(context, question):
|
50 |
# with st.spinner(text="Loading question model..."):
|
51 |
# question_answerer = question_model()
|
52 |
# with st.spinner(text="Getting answer..."):
|
53 |
-
#
|
54 |
-
#
|
55 |
-
#
|
56 |
-
#
|
57 |
-
|
58 |
-
#
|
59 |
-
|
60 |
-
#
|
61 |
-
#
|
62 |
-
#
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
#
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
#
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
#
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
@st.cache_data(show_spinner=False)
|
92 |
def extract_text(file_path):
|
@@ -106,8 +106,8 @@ def extract_text(file_path):
|
|
106 |
for i, image in enumerate(images):
|
107 |
image_text += pytesseract.image_to_string(image)
|
108 |
|
109 |
-
|
110 |
-
text = image_text
|
111 |
# remove more than one new line
|
112 |
text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
|
113 |
return text
|
|
|
30 |
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer, handle_impossible_answer=True)
|
31 |
return question_answerer
|
32 |
|
33 |
+
# # get the answer by passing the context & question to the model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# def question_answering(context, question):
|
35 |
# with st.spinner(text="Loading question model..."):
|
36 |
# question_answerer = question_model()
|
37 |
# with st.spinner(text="Getting answer..."):
|
38 |
+
# answer = question_answerer(context=context, question=question)
|
39 |
+
# print(answer)
|
40 |
+
# answer_score = str(answer["score"])
|
41 |
+
# answer = answer["answer"]
|
42 |
+
# if (answer==""):
|
43 |
+
# answer = "CANNOT ANSWER"
|
44 |
+
|
45 |
+
# # display the result in container
|
46 |
+
# container = st.container(border=True)
|
47 |
+
# container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
|
48 |
+
|
49 |
+
def question_answering(context, question):
|
50 |
+
with st.spinner(text="Loading question model..."):
|
51 |
+
question_answerer = question_model()
|
52 |
+
with st.spinner(text="Getting answer..."):
|
53 |
+
segment_size = 45000
|
54 |
+
overlap_size = 50
|
55 |
+
text_length = len(context)
|
56 |
+
segments = []
|
57 |
+
|
58 |
+
# Split context into segments
|
59 |
+
for i in range(0, text_length, segment_size - overlap_size):
|
60 |
+
segment_start = i
|
61 |
+
segment_end = i + segment_size
|
62 |
+
segment = context[segment_start:segment_end]
|
63 |
+
segments.append(segment)
|
64 |
+
|
65 |
+
answers = {} # Dictionary to store answers for each segment
|
66 |
+
|
67 |
+
# Get answers for each segment
|
68 |
+
for i, segment in enumerate(segments):
|
69 |
+
answer = question_answerer(context=segment, question=question)
|
70 |
+
answers[i] = answer
|
71 |
+
|
72 |
+
# Find the answer with the highest score
|
73 |
+
highest_score = -1
|
74 |
+
highest_answer = None
|
75 |
+
for answer in answers.items():
|
76 |
+
print(answer)
|
77 |
+
score = answer["score"]
|
78 |
+
if score > highest_score:
|
79 |
+
highest_score = score
|
80 |
+
highest_answer = answer
|
81 |
+
|
82 |
+
if highest_answer is not None:
|
83 |
+
answer = highest_answer["answer"]
|
84 |
+
answer_score = str(highest_answer["score"])
|
85 |
+
|
86 |
+
# Display the result in container
|
87 |
+
container = st.container(border=True)
|
88 |
+
container.write("<h5><b>Answer:</b></h5>" + answer + "<p><small>(F1 score: " + answer_score + ")</small></p><br>",
|
89 |
+
unsafe_allow_html=True)
|
90 |
|
91 |
@st.cache_data(show_spinner=False)
|
92 |
def extract_text(file_path):
|
|
|
106 |
for i, image in enumerate(images):
|
107 |
image_text += pytesseract.image_to_string(image)
|
108 |
|
109 |
+
text = text + image_text
|
110 |
+
# text = image_text
|
111 |
# remove more than one new line
|
112 |
text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
|
113 |
return text
|