anamargarida commited on
Commit
9a65bb3
·
verified ·
1 Parent(s): 32e4b5c

Update app_22.py

Browse files
Files changed (1) hide show
  1. app_22.py +58 -2
app_22.py CHANGED
@@ -162,7 +162,17 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
162
  #return cause1, cause2, effect1, effect2, signal, list1, list2
163
  #return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
164
 
 
 
 
 
 
165
 
 
 
 
 
 
166
 
167
  # Add the argument tags in the sentence directly
168
  def add_tags(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
@@ -195,10 +205,56 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
195
  # Join tokens back into a string
196
  return ' '.join(this_space_splitted_tokens)
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  # Apply the tags to the sentence tokens
200
- tagged_sentence1 = add_tags(input_text, word_ids, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
201
- tagged_sentence2 = add_tags(input_text, word_ids, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
202
  return tagged_sentence1, tagged_sentence2
203
 
204
 
 
162
  #return cause1, cause2, effect1, effect2, signal, list1, list2
163
  #return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
164
 
165
+ # Find the first valid token in a multi-token word
166
+ def find_valid_start(position):
167
+ while position > 0 and word_ids[position] == word_ids[position - 1]:
168
+ position -= 1
169
+ return position
170
 
171
+ def find_valid_end(position):
172
+ while position < len(word_ids) - 1 and word_ids[position] == word_ids[position + 1]:
173
+ position += 1
174
+ return position
175
+
176
 
177
  # Add the argument tags in the sentence directly
178
  def add_tags(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
 
205
  # Join tokens back into a string
206
  return ' '.join(this_space_splitted_tokens)
207
 
208
+ def add_tags_find(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
209
+ space_splitted_tokens = original_text.split(" ")
210
+ this_space_splitted_tokens = copy.deepcopy(space_splitted_tokens)
211
+
212
+ def safe_insert(tag, position, start=True):
213
+ """Safely insert a tag, checking for None values and index validity."""
214
+ if position is not None and word_ids[position] is not None:
215
+ word_index = word_ids[position]
216
+
217
+ # Ensure word_index is within range
218
+ if 0 <= word_index < len(this_space_splitted_tokens):
219
+ if start:
220
+ this_space_splitted_tokens[word_index] = tag + this_space_splitted_tokens[word_index]
221
+ else:
222
+ this_space_splitted_tokens[word_index] += tag
223
+
224
+ # Find valid start and end positions for words
225
+ start_cause = find_valid_start(start_cause)
226
+ end_cause = find_valid_end(end_cause)
227
+ start_effect = find_valid_start(start_effect)
228
+ end_effect = find_valid_end(end_effect)
229
+ if start_signal is not None:
230
+ start_signal = find_valid_start(start_signal)
231
+ end_signal = find_valid_end(end_signal)
232
+
233
+ # Adjust for punctuation shifts
234
+ if tokens[end_cause] in [".", ",", "-", ":", ";"]:
235
+ end_cause -= 1
236
+ if tokens[end_effect] in [".", ",", "-", ":", ";"]:
237
+ end_effect -= 1
238
+
239
+ # Add argument tags safely
240
+ safe_insert('<ARG0>', start_cause, start=True)
241
+ safe_insert('</ARG0>', end_cause, start=False)
242
+ safe_insert('<ARG1>', start_effect, start=True)
243
+ safe_insert('</ARG1>', end_effect, start=False)
244
+
245
+ # Add signal tags safely (if signal exists)
246
+ if start_signal is not None and end_signal is not None:
247
+ safe_insert('<SIG0>', start_signal, start=True)
248
+ safe_insert('</SIG0>', end_signal, start=False)
249
+
250
+ # Join tokens back into a string
251
+ return ' '.join(this_space_splitted_tokens)
252
+
253
+
254
 
255
  # Apply the tags to the sentence tokens
256
+ tagged_sentence1 = add_tags_find(input_text, word_ids, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
257
+ tagged_sentence2 = add_tags_find(input_text, word_ids, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
258
  return tagged_sentence1, tagged_sentence2
259
 
260