cfli commited on
Commit
11d667e
·
verified ·
1 Parent(s): a451b62

Upload 3 files

Browse files
Files changed (1) hide show
  1. modeling_minicpm_reranker.py +8 -8
modeling_minicpm_reranker.py CHANGED
@@ -51,7 +51,7 @@ from transformers.utils import (
51
  replace_return_docstrings,
52
  )
53
  from transformers.utils.import_utils import is_torch_fx_available
54
- from configuration_minicpm_reranker import MiniCPMConfig
55
  import re
56
 
57
  try:
@@ -70,7 +70,7 @@ if is_torch_fx_available():
70
 
71
  logger = logging.get_logger(__name__)
72
 
73
- _CONFIG_FOR_DOC = "MiniCPMConfig"
74
 
75
 
76
  def _get_unpad_data(attention_mask):
@@ -301,7 +301,7 @@ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
301
  class MiniCPMAttention(nn.Module):
302
  """Multi-headed attention from 'Attention Is All You Need' paper"""
303
 
304
- def __init__(self, config: MiniCPMConfig, layer_idx: Optional[int] = None):
305
  super().__init__()
306
  self.config = config
307
  self.layer_idx = layer_idx
@@ -763,7 +763,7 @@ MINICPM_ATTENTION_CLASSES = {
763
 
764
 
765
  class MiniCPMDecoderLayer(nn.Module):
766
- def __init__(self, config: MiniCPMConfig, layer_idx: int):
767
  super().__init__()
768
  self.hidden_size = config.hidden_size
769
  self.self_attn = MINICPM_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
@@ -847,7 +847,7 @@ MINICPM_START_DOCSTRING = r"""
847
  and behavior.
848
 
849
  Parameters:
850
- config ([`MiniCPMConfig`]):
851
  Model configuration class with all the parameters of the model. Initializing with a config file does not
852
  load the weights associated with the model, only the configuration. Check out the
853
  [`~PreTrainedModel.from_pretrained`] method to load the model weights.
@@ -859,7 +859,7 @@ MINICPM_START_DOCSTRING = r"""
859
  MINICPM_START_DOCSTRING,
860
  )
861
  class MiniCPMPreTrainedModel(PreTrainedModel):
862
- config_class = MiniCPMConfig
863
  base_model_prefix = "model"
864
  supports_gradient_checkpointing = True
865
  _no_split_modules = ["MiniCPMDecoderLayer"]
@@ -959,10 +959,10 @@ class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
959
  Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
960
 
961
  Args:
962
- config: MiniCPMConfig
963
  """
964
 
965
- def __init__(self, config: MiniCPMConfig):
966
  super().__init__(config)
967
  self.padding_idx = config.pad_token_id
968
  self.vocab_size = config.vocab_size
 
51
  replace_return_docstrings,
52
  )
53
  from transformers.utils.import_utils import is_torch_fx_available
54
+ from configuration_minicpm_reranker import LayerWiseMiniCPMConfig
55
  import re
56
 
57
  try:
 
70
 
71
  logger = logging.get_logger(__name__)
72
 
73
+ _CONFIG_FOR_DOC = "LayerWiseMiniCPMConfig"
74
 
75
 
76
  def _get_unpad_data(attention_mask):
 
301
  class MiniCPMAttention(nn.Module):
302
  """Multi-headed attention from 'Attention Is All You Need' paper"""
303
 
304
+ def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional[int] = None):
305
  super().__init__()
306
  self.config = config
307
  self.layer_idx = layer_idx
 
763
 
764
 
765
  class MiniCPMDecoderLayer(nn.Module):
766
+ def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
767
  super().__init__()
768
  self.hidden_size = config.hidden_size
769
  self.self_attn = MINICPM_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
 
847
  and behavior.
848
 
849
  Parameters:
850
+ config ([`LayerWiseMiniCPMConfig`]):
851
  Model configuration class with all the parameters of the model. Initializing with a config file does not
852
  load the weights associated with the model, only the configuration. Check out the
853
  [`~PreTrainedModel.from_pretrained`] method to load the model weights.
 
859
  MINICPM_START_DOCSTRING,
860
  )
861
  class MiniCPMPreTrainedModel(PreTrainedModel):
862
+ config_class = LayerWiseMiniCPMConfig
863
  base_model_prefix = "model"
864
  supports_gradient_checkpointing = True
865
  _no_split_modules = ["MiniCPMDecoderLayer"]
 
959
  Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
960
 
961
  Args:
962
+ config: LayerWiseMiniCPMConfig
963
  """
964
 
965
+ def __init__(self, config: LayerWiseMiniCPMConfig):
966
  super().__init__(config)
967
  self.padding_idx = config.pad_token_id
968
  self.vocab_size = config.vocab_size