Spaces:

cpllab
/

syntaxgym

Sleeping

jgauthier commited on Nov 1, 2022

Commit

7eae31f

1 Parent(s): 8a3618a

don't mix up condition indexing in suites where items have different order of content by condition (e.g. number_prep in syntaxgym2020)

Files changed (1) hide show

syntaxgym.py CHANGED Viewed

@@ -265,12 +265,13 @@ class SyntaxGym(evaluate.EvaluationModule):
         }
         return results
-    def get_region_edges(self, item, condition_idx):
         """
         Get left edge of each region as a character index.
         """
         # NB this is coupled with `condition_to_string` logic of course
         regions = item["conditions"]["regions"][condition_idx]
         idx = 0
@@ -298,8 +299,8 @@ class SyntaxGym(evaluate.EvaluationModule):
         max_long = torch.iinfo(torch.int64).max
-        for i_cond, i_offsets in enumerate(offset_mapping):
-            region_edges = self.get_region_edges(item, i_cond)
             t_cursor, r_cursor = 0, 0
             while t_cursor < i_offsets.shape[0]:
@@ -321,7 +322,7 @@ class SyntaxGym(evaluate.EvaluationModule):
                     r_cursor += 1
                     continue
-                region2tokens[condition_order[i_cond]][r_cursor + 1].append(t_cursor)
                 t_cursor += 1
         return region2tokens

         }
         return results
+    def get_region_edges(self, item, condition_name):
         """
         Get left edge of each region as a character index.
         """
         # NB this is coupled with `condition_to_string` logic of course
+        condition_idx = item["conditions"]["condition_name"].index(condition_name)
         regions = item["conditions"]["regions"][condition_idx]
         idx = 0
         max_long = torch.iinfo(torch.int64).max
+        for cond_name, i_offsets in zip(condition_order, offset_mapping):
+            region_edges = self.get_region_edges(item, cond_name)
             t_cursor, r_cursor = 0, 0
             while t_cursor < i_offsets.shape[0]:
                     r_cursor += 1
                     continue
+                region2tokens[cond_name][r_cursor + 1].append(t_cursor)
                 t_cursor += 1
         return region2tokens