Spaces:

anamargarida
/

Trial

Running

App Files Files Community

anamargarida commited on Mar 12

Commit

9a65bb3

verified ·

1 Parent(s): 32e4b5c

Update app_22.py

Browse files

Files changed (1) hide show

app_22.py +58 -2

app_22.py CHANGED Viewed

@@ -162,7 +162,17 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
     #return cause1, cause2, effect1, effect2, signal, list1, list2
     #return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
     # Add the argument tags in the sentence directly
     def add_tags(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
@@ -195,10 +205,56 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
         # Join tokens back into a string
         return ' '.join(this_space_splitted_tokens)
     # Apply the tags to the sentence tokens
-    tagged_sentence1 = add_tags(input_text, word_ids, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
-    tagged_sentence2 = add_tags(input_text, word_ids, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
     return tagged_sentence1, tagged_sentence2

     #return cause1, cause2, effect1, effect2, signal, list1, list2
     #return start_cause1, end_cause1, start_cause2, end_cause2, start_effect1, end_effect1, start_effect2, end_effect2, start_signal, end_signal
+    # Find the first valid token in a multi-token word
+    def find_valid_start(position):
+        while position > 0 and word_ids[position] == word_ids[position - 1]:
+            position -= 1
+        return position
+    def find_valid_end(position):
+        while position < len(word_ids) - 1 and word_ids[position] == word_ids[position + 1]:
+            position += 1
+        return position
     # Add the argument tags in the sentence directly
     def add_tags(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
         # Join tokens back into a string
         return ' '.join(this_space_splitted_tokens)
+    def add_tags_find(original_text, word_ids, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
+        space_splitted_tokens = original_text.split(" ")
+        this_space_splitted_tokens = copy.deepcopy(space_splitted_tokens)
+        def safe_insert(tag, position, start=True):
+            """Safely insert a tag, checking for None values and index validity."""
+            if position is not None and word_ids[position] is not None:
+                word_index = word_ids[position]
+                # Ensure word_index is within range
+                if 0 <= word_index < len(this_space_splitted_tokens):
+                    if start:
+                        this_space_splitted_tokens[word_index] = tag + this_space_splitted_tokens[word_index]
+                    else:
+                        this_space_splitted_tokens[word_index] += tag
+        # Find valid start and end positions for words
+        start_cause = find_valid_start(start_cause)
+        end_cause = find_valid_end(end_cause)
+        start_effect = find_valid_start(start_effect)
+        end_effect = find_valid_end(end_effect)
+        if start_signal is not None:
+            start_signal = find_valid_start(start_signal)
+            end_signal = find_valid_end(end_signal)
+        # Adjust for punctuation shifts
+        if tokens[end_cause] in [".", ",", "-", ":", ";"]:
+            end_cause -= 1
+        if tokens[end_effect] in [".", ",", "-", ":", ";"]:
+            end_effect -= 1
+        # Add argument tags safely
+        safe_insert('<ARG0>', start_cause, start=True)
+        safe_insert('</ARG0>', end_cause, start=False)
+        safe_insert('<ARG1>', start_effect, start=True)
+        safe_insert('</ARG1>', end_effect, start=False)
+        # Add signal tags safely (if signal exists)
+        if start_signal is not None and end_signal is not None:
+            safe_insert('<SIG0>', start_signal, start=True)
+            safe_insert('</SIG0>', end_signal, start=False)
+        # Join tokens back into a string
+        return ' '.join(this_space_splitted_tokens)
     # Apply the tags to the sentence tokens
+    tagged_sentence1 = add_tags_find(input_text, word_ids, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
+    tagged_sentence2 = add_tags_find(input_text, word_ids, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
     return tagged_sentence1, tagged_sentence2