kxx-kkk commited on
Commit
dc9a4f7
·
verified ·
1 Parent(s): 411c19a

Update app.py

Browse files

debugging file uploader

Files changed (1) hide show
  1. app.py +6 -19
app.py CHANGED
@@ -19,7 +19,7 @@ st.markdown("<h3 style='text-align: left; color:#F63366; font-size:18px;'><b>Wha
19
  st.write("Extractive question answering is a Natural Language Processing task where text is provided for a model so that the model can refer to it and make predictions about where the answer to a question is.")
20
 
21
  # store the model in cache resources to enhance efficiency (ref: https://docs.streamlit.io/library/advanced-features/caching)
22
- @st.cache_resource(show_spinner=True)
23
  def question_model():
24
  # call my model for question answering
25
  model_name = "kxx-kkk/FYP_ms_squad"
@@ -40,7 +40,7 @@ def question_answering(context, question):
40
  container = st.container(border=True)
41
  container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
42
 
43
- @st.cache_data(show_spinner=True)
44
  def extract_text(file_path):
45
  text = ""
46
  image_text = ""
@@ -56,17 +56,10 @@ def extract_text(file_path):
56
 
57
  images = convert_from_path(file_path) # Convert PDF pages to images
58
  for i, image in enumerate(images):
59
- # st.write(f"Page {i + 1}")
60
  image_text += pytesseract.image_to_string(image)
61
 
62
- # st.write("text")
63
- # st.write(text)
64
- # st.write("image_text")
65
- # st.write(image_text)
66
-
67
- text = text + image_text
68
- # st.write("plus")
69
- # st.write(text) # Display the extracted text from the image
70
  return text
71
 
72
 
@@ -118,19 +111,13 @@ with tab2:
118
 
119
  # transfer file to context and allow ask question, then perform question answering
120
  if uploaded_file is not None:
121
- # if uploaded_file.type is "txt":
122
- # st.write("success txt")
123
- # raw_text = str(uploaded_file.read(),"utf-8")
124
- # elif uploaded_file.type is "pdf":
125
- st.write("success pdf")
126
  with tempfile.NamedTemporaryFile(delete=False) as temp_file:
127
- st.write("success pdf 2")
128
  temp_file.write(uploaded_file.read()) # Save uploaded file to a temporary path
129
  raw_text = extract_text(temp_file.name)
130
- st.session_state.contextInput2 = str(raw_text)
131
 
132
  context2 = st.session_state.contextInput2
133
-
134
  question2 = st.text_input(label="Enter your question",value=question2, key="questionInput2")
135
  context2 = st.text_area("Your essay context: ", value=context2, height=330, key="contextInput2")
136
 
 
19
  st.write("Extractive question answering is a Natural Language Processing task where text is provided for a model so that the model can refer to it and make predictions about where the answer to a question is.")
20
 
21
  # store the model in cache resources to enhance efficiency (ref: https://docs.streamlit.io/library/advanced-features/caching)
22
+ @st.cache_resource(show_spinner=False)
23
  def question_model():
24
  # call my model for question answering
25
  model_name = "kxx-kkk/FYP_ms_squad"
 
40
  container = st.container(border=True)
41
  container.write("<h5><b>Answer:</b></h5>"+answer+"<p><small>(F1 score: "+answer_score+")</small></p><br>", unsafe_allow_html=True)
42
 
43
+ @st.cache_data(show_spinner=False)
44
  def extract_text(file_path):
45
  text = ""
46
  image_text = ""
 
56
 
57
  images = convert_from_path(file_path) # Convert PDF pages to images
58
  for i, image in enumerate(images):
 
59
  image_text += pytesseract.image_to_string(image)
60
 
61
+ # text = text + image_text
62
+ text = image_text
 
 
 
 
 
 
63
  return text
64
 
65
 
 
111
 
112
  # transfer file to context and allow ask question, then perform question answering
113
  if uploaded_file is not None:
 
 
 
 
 
114
  with tempfile.NamedTemporaryFile(delete=False) as temp_file:
 
115
  temp_file.write(uploaded_file.read()) # Save uploaded file to a temporary path
116
  raw_text = extract_text(temp_file.name)
117
+ st.session_state.contextInput2 = raw_text
118
 
119
  context2 = st.session_state.contextInput2
120
+
121
  question2 = st.text_input(label="Enter your question",value=question2, key="questionInput2")
122
  context2 = st.text_area("Your essay context: ", value=context2, height=330, key="contextInput2")
123