Spaces:
Running
Running
Rename app_2.py to app_3.py
Browse files- app_2.py → app_3.py +4 -80
app_2.py → app_3.py
RENAMED
@@ -131,38 +131,10 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
|
|
131 |
|
132 |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
133 |
token_ids = inputs["input_ids"][0]
|
134 |
-
offset_mapping = inputs["offset_mapping"][0].tolist()
|
135 |
-
|
136 |
-
for i, (token, word_id) in enumerate(zip(tokens, word_ids)):
|
137 |
-
st.write(f"Token {i}: {token}, Word ID: {word_id}")
|
138 |
-
|
139 |
-
st.write("Token & offset:")
|
140 |
-
for i, (token, offset) in enumerate(zip(tokens, offset_mapping)):
|
141 |
-
st.write(f"Token {i}: {token}, Offset: {offset}")
|
142 |
|
143 |
|
144 |
-
st.write("Token Positions, IDs, and Corresponding Tokens:")
|
145 |
-
for position, (token_id, token) in enumerate(zip(token_ids, tokens)):
|
146 |
-
st.write(f"Position: {position}, ID: {token_id}, Token: {token}")
|
147 |
|
148 |
-
st.write(f"Start Cause 1: {start_cause1}, End Cause: {end_cause1}")
|
149 |
-
st.write(f"Start Effect 1: {start_effect1}, End Cause: {end_effect1}")
|
150 |
-
st.write(f"Start Signal: {start_signal}, End Signal: {end_signal}")
|
151 |
-
|
152 |
-
def extract_span(start, end):
|
153 |
-
return tokenizer.convert_tokens_to_string(tokens[start:end+1]) if start is not None and end is not None else ""
|
154 |
-
|
155 |
-
cause1 = extract_span(start_cause1, end_cause1)
|
156 |
-
cause2 = extract_span(start_cause2, end_cause2)
|
157 |
-
effect1 = extract_span(start_effect1, end_effect1)
|
158 |
-
effect2 = extract_span(start_effect2, end_effect2)
|
159 |
-
if has_signal:
|
160 |
-
signal = extract_span(start_signal, end_signal)
|
161 |
-
if not has_signal:
|
162 |
-
signal = 'NA'
|
163 |
-
list1 = [start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal]
|
164 |
-
list2 = [start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal]
|
165 |
-
#return cause1, cause2, effect1, effect2, signal, list1, list2
|
166 |
return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
|
167 |
|
168 |
|
@@ -170,7 +142,9 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
|
|
170 |
def mark_text_by_position(original_text, start_token, end_token, color):
|
171 |
"""Marks text in the original string based on character positions."""
|
172 |
# Inserts tags into the original text based on token offsets.
|
173 |
-
|
|
|
|
|
174 |
start_idx, end_idx = offset_mapping[start_token][0], offset_mapping[end_token][1]
|
175 |
|
176 |
if start_idx is not None and end_idx is not None and start_idx <= end_idx:
|
@@ -212,55 +186,5 @@ if st.button("Extract"):
|
|
212 |
st.markdown(f"**Effect:** {effect_text2}", unsafe_allow_html=True)
|
213 |
st.markdown(f"**Signal:** {signal_text}", unsafe_allow_html=True)
|
214 |
|
215 |
-
else:
|
216 |
-
st.warning("Please enter some text before extracting.")
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
if st.button("Extract1"):
|
223 |
-
if input_text:
|
224 |
-
start_cause_id, end_cause_id, start_effect_id, end_effect_id, start_signal_id, end_signal_id = extract_arguments(input_text, tokenizer, model, beam_search=beam_search)
|
225 |
-
|
226 |
-
cause_text = mark_text_by_word_ids(input_text, inputs["input_ids"][0], start_cause_id, end_cause_id, "#FFD700") # Gold for cause
|
227 |
-
effect_text = mark_text_by_word_ids(input_text, inputs["input_ids"][0], start_effect_id, end_effect_id, "#90EE90") # Light green for effect
|
228 |
-
signal_text = mark_text_by_word_ids(input_text, inputs["input_ids"][0], start_signal_id, end_signal_id, "#FF6347") # Tomato red for signal
|
229 |
-
|
230 |
-
st.markdown(f"**Cause:**<br>{cause_text}", unsafe_allow_html=True)
|
231 |
-
st.markdown(f"**Effect:**<br>{effect_text}", unsafe_allow_html=True)
|
232 |
-
st.markdown(f"**Signal:**<br>{signal_text}", unsafe_allow_html=True)
|
233 |
-
else:
|
234 |
-
st.warning("Please enter some text before extracting.")
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
if st.button("Extract1"):
|
240 |
-
if input_text:
|
241 |
-
start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal = extract_arguments(input_text, tokenizer, model, beam_search=beam_search)
|
242 |
-
|
243 |
-
# Convert text to tokenized format
|
244 |
-
tokenized_input = tokenizer.tokenize(input_text)
|
245 |
-
|
246 |
-
cause_text1 = mark_text_by_tokens(tokenizer, tokenized_input, start_cause1, end_cause1, "#FFD700") # Gold for cause
|
247 |
-
effect_text1 = mark_text_by_tokens(tokenizer, tokenized_input, start_effect1, end_effect1, "#90EE90") # Light green for effect
|
248 |
-
signal_text = mark_text_by_tokens(tokenizer, tokenized_input, start_signal, end_signal, "#FF6347") # Tomato red for signal
|
249 |
-
|
250 |
-
# Display first relation
|
251 |
-
st.markdown(f"<strong>Relation 1:</strong>", unsafe_allow_html=True)
|
252 |
-
st.markdown(f"**Cause:** {cause_text1}", unsafe_allow_html=True)
|
253 |
-
st.markdown(f"**Effect:** {effect_text1}", unsafe_allow_html=True)
|
254 |
-
st.markdown(f"**Signal:** {signal_text}", unsafe_allow_html=True)
|
255 |
-
|
256 |
-
# Display second relation if beam search is enabled
|
257 |
-
if beam_search:
|
258 |
-
cause_text2 = mark_text_by_tokens(tokenizer, tokenized_input, start_cause2, end_cause2, "#FFD700")
|
259 |
-
effect_text2 = mark_text_by_tokens(tokenizer, tokenized_input, start_effect2, end_effect2, "#90EE90")
|
260 |
-
|
261 |
-
st.markdown(f"<strong>Relation 2:</strong>", unsafe_allow_html=True)
|
262 |
-
st.markdown(f"**Cause:** {cause_text2}", unsafe_allow_html=True)
|
263 |
-
st.markdown(f"**Effect:** {effect_text2}", unsafe_allow_html=True)
|
264 |
-
st.markdown(f"**Signal:** {signal_text}", unsafe_allow_html=True)
|
265 |
else:
|
266 |
st.warning("Please enter some text before extracting.")
|
|
|
131 |
|
132 |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
133 |
token_ids = inputs["input_ids"][0]
|
134 |
+
#offset_mapping = inputs["offset_mapping"][0].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
|
|
|
|
|
|
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
|
139 |
|
140 |
|
|
|
142 |
def mark_text_by_position(original_text, start_token, end_token, color):
|
143 |
"""Marks text in the original string based on character positions."""
|
144 |
# Inserts tags into the original text based on token offsets.
|
145 |
+
|
146 |
+
offset_mapping = inputs["offset_mapping"][0].tolist()
|
147 |
+
|
148 |
start_idx, end_idx = offset_mapping[start_token][0], offset_mapping[end_token][1]
|
149 |
|
150 |
if start_idx is not None and end_idx is not None and start_idx <= end_idx:
|
|
|
186 |
st.markdown(f"**Effect:** {effect_text2}", unsafe_allow_html=True)
|
187 |
st.markdown(f"**Signal:** {signal_text}", unsafe_allow_html=True)
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
else:
|
190 |
st.warning("Please enter some text before extracting.")
|