Spaces:
Sleeping
Sleeping
Update app.py
Browse filesdebugging file uploader
app.py
CHANGED
@@ -19,7 +19,7 @@ st.markdown("<h3 style='text-align: left; color:#F63366; font-size:18px;'><b>Wha
|
|
19 |
st.write("Extractive question answering is a Natural Language Processing task where text is provided for a model so that the model can refer to it and make predictions about where the answer to a question is.")
|
20 |
|
21 |
# store the model in cache resources to enhance efficiency (ref: https://docs.streamlit.io/library/advanced-features/caching)
|
22 |
-
@st.cache_resource(show_spinner=
|
23 |
def question_model():
|
24 |
# call my model for question answering
|
25 |
model_name = "kxx-kkk/FYP_ms_squad"
|
@@ -40,7 +40,7 @@ def question_answering(context, question):
|
|
40 |
container = st.container(border=True)
|
41 |
container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
|
42 |
|
43 |
-
@st.cache_data(show_spinner=
|
44 |
def extract_text(file_path):
|
45 |
text = ""
|
46 |
image_text = ""
|
@@ -56,17 +56,10 @@ def extract_text(file_path):
|
|
56 |
|
57 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
58 |
for i, image in enumerate(images):
|
59 |
-
# st.write(f"Page {i + 1}")
|
60 |
image_text += pytesseract.image_to_string(image)
|
61 |
|
62 |
-
#
|
63 |
-
|
64 |
-
# st.write("image_text")
|
65 |
-
# st.write(image_text)
|
66 |
-
|
67 |
-
text = text + image_text
|
68 |
-
# st.write("plus")
|
69 |
-
# st.write(text) # Display the extracted text from the image
|
70 |
return text
|
71 |
|
72 |
|
@@ -118,19 +111,13 @@ with tab2:
|
|
118 |
|
119 |
# transfer file to context and allow ask question, then perform question answering
|
120 |
if uploaded_file is not None:
|
121 |
-
# if uploaded_file.type is "txt":
|
122 |
-
# st.write("success txt")
|
123 |
-
# raw_text = str(uploaded_file.read(),"utf-8")
|
124 |
-
# elif uploaded_file.type is "pdf":
|
125 |
-
st.write("success pdf")
|
126 |
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
127 |
-
st.write("success pdf 2")
|
128 |
temp_file.write(uploaded_file.read()) # Save uploaded file to a temporary path
|
129 |
raw_text = extract_text(temp_file.name)
|
130 |
-
st.session_state.contextInput2 =
|
131 |
|
132 |
context2 = st.session_state.contextInput2
|
133 |
-
|
134 |
question2 = st.text_input(label="Enter your question",value=question2, key="questionInput2")
|
135 |
context2 = st.text_area("Your essay context: ", value=context2, height=330, key="contextInput2")
|
136 |
|
|
|
19 |
st.write("Extractive question answering is a Natural Language Processing task where text is provided for a model so that the model can refer to it and make predictions about where the answer to a question is.")
|
20 |
|
21 |
# store the model in cache resources to enhance efficiency (ref: https://docs.streamlit.io/library/advanced-features/caching)
|
22 |
+
@st.cache_resource(show_spinner=False)
|
23 |
def question_model():
|
24 |
# call my model for question answering
|
25 |
model_name = "kxx-kkk/FYP_ms_squad"
|
|
|
40 |
container = st.container(border=True)
|
41 |
container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
|
42 |
|
43 |
+
@st.cache_data(show_spinner=False)
|
44 |
def extract_text(file_path):
|
45 |
text = ""
|
46 |
image_text = ""
|
|
|
56 |
|
57 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
58 |
for i, image in enumerate(images):
|
|
|
59 |
image_text += pytesseract.image_to_string(image)
|
60 |
|
61 |
+
# text = text + image_text
|
62 |
+
text = image_text
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
return text
|
64 |
|
65 |
|
|
|
111 |
|
112 |
# transfer file to context and allow ask question, then perform question answering
|
113 |
if uploaded_file is not None:
|
|
|
|
|
|
|
|
|
|
|
114 |
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
|
|
115 |
temp_file.write(uploaded_file.read()) # Save uploaded file to a temporary path
|
116 |
raw_text = extract_text(temp_file.name)
|
117 |
+
st.session_state.contextInput2 = raw_text
|
118 |
|
119 |
context2 = st.session_state.contextInput2
|
120 |
+
|
121 |
question2 = st.text_input(label="Enter your question",value=question2, key="questionInput2")
|
122 |
context2 = st.text_area("Your essay context: ", value=context2, height=330, key="contextInput2")
|
123 |
|