cfli
/

MiniCPM-2B-reranker

Text Generation

Transformers

minicpm

custom_code

Model card Files Files and versions Community

cfli commited on Mar 14, 2024

Commit

11d667e

verified ·

1 Parent(s): a451b62

Upload 3 files

Browse files

Files changed (1) hide show

modeling_minicpm_reranker.py +8 -8

modeling_minicpm_reranker.py CHANGED Viewed

@@ -51,7 +51,7 @@ from transformers.utils import (
     replace_return_docstrings,
 )
 from transformers.utils.import_utils import is_torch_fx_available
-from configuration_minicpm_reranker import MiniCPMConfig
 import re
 try:
@@ -70,7 +70,7 @@ if is_torch_fx_available():
 logger = logging.get_logger(__name__)
-_CONFIG_FOR_DOC = "MiniCPMConfig"
 def _get_unpad_data(attention_mask):
@@ -301,7 +301,7 @@ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
 class MiniCPMAttention(nn.Module):
     """Multi-headed attention from 'Attention Is All You Need' paper"""
-    def __init__(self, config: MiniCPMConfig, layer_idx: Optional[int] = None):
         super().__init__()
         self.config = config
         self.layer_idx = layer_idx
@@ -763,7 +763,7 @@ MINICPM_ATTENTION_CLASSES = {
 class MiniCPMDecoderLayer(nn.Module):
-    def __init__(self, config: MiniCPMConfig, layer_idx: int):
         super().__init__()
         self.hidden_size = config.hidden_size
         self.self_attn = MINICPM_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
@@ -847,7 +847,7 @@ MINICPM_START_DOCSTRING = r"""
     and behavior.
     Parameters:
-        config ([`MiniCPMConfig`]):
             Model configuration class with all the parameters of the model. Initializing with a config file does not
             load the weights associated with the model, only the configuration. Check out the
             [`~PreTrainedModel.from_pretrained`] method to load the model weights.
@@ -859,7 +859,7 @@ MINICPM_START_DOCSTRING = r"""
     MINICPM_START_DOCSTRING,
 )
 class MiniCPMPreTrainedModel(PreTrainedModel):
-    config_class = MiniCPMConfig
     base_model_prefix = "model"
     supports_gradient_checkpointing = True
     _no_split_modules = ["MiniCPMDecoderLayer"]
@@ -959,10 +959,10 @@ class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
     Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
     Args:
-        config: MiniCPMConfig
     """
-    def __init__(self, config: MiniCPMConfig):
         super().__init__(config)
         self.padding_idx = config.pad_token_id
         self.vocab_size = config.vocab_size

     replace_return_docstrings,
 )
 from transformers.utils.import_utils import is_torch_fx_available
+from configuration_minicpm_reranker import LayerWiseMiniCPMConfig
 import re
 try:
 logger = logging.get_logger(__name__)
+_CONFIG_FOR_DOC = "LayerWiseMiniCPMConfig"
 def _get_unpad_data(attention_mask):
 class MiniCPMAttention(nn.Module):
     """Multi-headed attention from 'Attention Is All You Need' paper"""
+    def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional[int] = None):
         super().__init__()
         self.config = config
         self.layer_idx = layer_idx
 class MiniCPMDecoderLayer(nn.Module):
+    def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
         super().__init__()
         self.hidden_size = config.hidden_size
         self.self_attn = MINICPM_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
     and behavior.
     Parameters:
+        config ([`LayerWiseMiniCPMConfig`]):
             Model configuration class with all the parameters of the model. Initializing with a config file does not
             load the weights associated with the model, only the configuration. Check out the
             [`~PreTrainedModel.from_pretrained`] method to load the model weights.
     MINICPM_START_DOCSTRING,
 )
 class MiniCPMPreTrainedModel(PreTrainedModel):
+    config_class = LayerWiseMiniCPMConfig
     base_model_prefix = "model"
     supports_gradient_checkpointing = True
     _no_split_modules = ["MiniCPMDecoderLayer"]
     Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
     Args:
+        config: LayerWiseMiniCPMConfig
     """
+    def __init__(self, config: LayerWiseMiniCPMConfig):
         super().__init__(config)
         self.padding_idx = config.pad_token_id
         self.vocab_size = config.vocab_size