jinaai
/

xlm-roberta-flash-implementation

🇪🇺 Region: EU

Model card Files Files and versions Community

jupyterjazz commited on May 15, 2024

Commit

c55e591

·

1 Parent(s): b27fa55

refactor: truncation fn

Signed-off-by: jupyterjazz <[email protected]>

Files changed (1) hide show

modeling_xlm_roberta.py +14 -9

modeling_xlm_roberta.py CHANGED Viewed

@@ -579,15 +579,7 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         all_embeddings = [all_embeddings[idx] for idx in inverse_permutation]
         if truncate_dim:
-            if not self.config.matryoshka_dimensions:
-                logger.warning(
-                    'Matryoshka embeddings are not supported, so dimension truncation will not be performed.'
-                )
-            elif truncate_dim in self.config.matryoshka_dimensions:
-                all_embeddings = [tensor[:truncate_dim] for tensor in all_embeddings]
-            else:
-                raise ValueError(f'The provided `truncate_dim` value of {truncate_dim} is not supported. '
-                                 f'Supported dimensions are {self.config.matryoshka_dimensions}.')
         if convert_to_tensor:
             all_embeddings = torch.stack(all_embeddings)
@@ -600,6 +592,19 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         self.train(is_training)
         return all_embeddings
     def mean_pooling(
         self, token_embeddings: torch.Tensor, attention_mask: torch.Tensor
     ):

         all_embeddings = [all_embeddings[idx] for idx in inverse_permutation]
         if truncate_dim:
+            all_embeddings = self.truncate_embeddings(all_embeddings, truncate_dim)
         if convert_to_tensor:
             all_embeddings = torch.stack(all_embeddings)
         self.train(is_training)
         return all_embeddings
+    def truncate_embeddings(self, embeddings, truncate_dim):
+        if not self.config.matryoshka_dimensions:
+            logger.warning(
+                'Matryoshka embeddings are not supported, so dimension truncation will not be performed.'
+            )
+            return embeddings
+        elif truncate_dim in self.config.matryoshka_dimensions:
+            return [tensor[:truncate_dim] for tensor in embeddings]
+        else:
+            raise ValueError(f'The provided `truncate_dim` value of {truncate_dim} is not supported. '
+                             f'Supported dimensions are {self.config.matryoshka_dimensions}.')
     def mean_pooling(
         self, token_embeddings: torch.Tensor, attention_mask: torch.Tensor
     ):