Spaces:
Running
Running
Update app_22.py
Browse files
app_22.py
CHANGED
@@ -162,7 +162,17 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
|
|
162 |
#return cause1, cause2, effect1, effect2, signal, list1, list2
|
163 |
#return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
|
164 |
|
|
|
|
|
|
|
|
|
|
|
165 |
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
# Add the argument tags in the sentence directly
|
168 |
def add_tags(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
|
@@ -195,10 +205,56 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
|
|
195 |
# Join tokens back into a string
|
196 |
return ' '.join(this_space_splitted_tokens)
|
197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
# Apply the tags to the sentence tokens
|
200 |
-
tagged_sentence1 =
|
201 |
-
tagged_sentence2 =
|
202 |
return tagged_sentence1, tagged_sentence2
|
203 |
|
204 |
|
|
|
162 |
#return cause1, cause2, effect1, effect2, signal, list1, list2
|
163 |
#return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
|
164 |
|
165 |
+
# Find the first valid token in a multi-token word
|
166 |
+
def find_valid_start(position):
|
167 |
+
while position > 0 and word_ids[position] == word_ids[position - 1]:
|
168 |
+
position -= 1
|
169 |
+
return position
|
170 |
|
171 |
+
def find_valid_end(position):
|
172 |
+
while position < len(word_ids) - 1 and word_ids[position] == word_ids[position + 1]:
|
173 |
+
position += 1
|
174 |
+
return position
|
175 |
+
|
176 |
|
177 |
# Add the argument tags in the sentence directly
|
178 |
def add_tags(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
|
|
|
205 |
# Join tokens back into a string
|
206 |
return ' '.join(this_space_splitted_tokens)
|
207 |
|
208 |
+
def add_tags_find(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
|
209 |
+
space_splitted_tokens = original_text.split(" ")
|
210 |
+
this_space_splitted_tokens = copy.deepcopy(space_splitted_tokens)
|
211 |
+
|
212 |
+
def safe_insert(tag, position, start=True):
|
213 |
+
"""Safely insert a tag, checking for None values and index validity."""
|
214 |
+
if position is not None and word_ids[position] is not None:
|
215 |
+
word_index = word_ids[position]
|
216 |
+
|
217 |
+
# Ensure word_index is within range
|
218 |
+
if 0 <= word_index < len(this_space_splitted_tokens):
|
219 |
+
if start:
|
220 |
+
this_space_splitted_tokens[word_index] = tag + this_space_splitted_tokens[word_index]
|
221 |
+
else:
|
222 |
+
this_space_splitted_tokens[word_index] += tag
|
223 |
+
|
224 |
+
# Find valid start and end positions for words
|
225 |
+
start_cause = find_valid_start(start_cause)
|
226 |
+
end_cause = find_valid_end(end_cause)
|
227 |
+
start_effect = find_valid_start(start_effect)
|
228 |
+
end_effect = find_valid_end(end_effect)
|
229 |
+
if start_signal is not None:
|
230 |
+
start_signal = find_valid_start(start_signal)
|
231 |
+
end_signal = find_valid_end(end_signal)
|
232 |
+
|
233 |
+
# Adjust for punctuation shifts
|
234 |
+
if tokens[end_cause] in [".", ",", "-", ":", ";"]:
|
235 |
+
end_cause -= 1
|
236 |
+
if tokens[end_effect] in [".", ",", "-", ":", ";"]:
|
237 |
+
end_effect -= 1
|
238 |
+
|
239 |
+
# Add argument tags safely
|
240 |
+
safe_insert('<ARG0>', start_cause, start=True)
|
241 |
+
safe_insert('</ARG0>', end_cause, start=False)
|
242 |
+
safe_insert('<ARG1>', start_effect, start=True)
|
243 |
+
safe_insert('</ARG1>', end_effect, start=False)
|
244 |
+
|
245 |
+
# Add signal tags safely (if signal exists)
|
246 |
+
if start_signal is not None and end_signal is not None:
|
247 |
+
safe_insert('<SIG0>', start_signal, start=True)
|
248 |
+
safe_insert('</SIG0>', end_signal, start=False)
|
249 |
+
|
250 |
+
# Join tokens back into a string
|
251 |
+
return ' '.join(this_space_splitted_tokens)
|
252 |
+
|
253 |
+
|
254 |
|
255 |
# Apply the tags to the sentence tokens
|
256 |
+
tagged_sentence1 = add_tags_find(input_text, word_ids, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
|
257 |
+
tagged_sentence2 = add_tags_find(input_text, word_ids, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
|
258 |
return tagged_sentence1, tagged_sentence2
|
259 |
|
260 |
|