Upload 3 files
Browse files
modeling_minicpm_reranker.py
CHANGED
@@ -51,7 +51,7 @@ from transformers.utils import (
|
|
51 |
replace_return_docstrings,
|
52 |
)
|
53 |
from transformers.utils.import_utils import is_torch_fx_available
|
54 |
-
from configuration_minicpm_reranker import
|
55 |
import re
|
56 |
|
57 |
try:
|
@@ -70,7 +70,7 @@ if is_torch_fx_available():
|
|
70 |
|
71 |
logger = logging.get_logger(__name__)
|
72 |
|
73 |
-
_CONFIG_FOR_DOC = "
|
74 |
|
75 |
|
76 |
def _get_unpad_data(attention_mask):
|
@@ -301,7 +301,7 @@ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
|
|
301 |
class MiniCPMAttention(nn.Module):
|
302 |
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
303 |
|
304 |
-
def __init__(self, config:
|
305 |
super().__init__()
|
306 |
self.config = config
|
307 |
self.layer_idx = layer_idx
|
@@ -763,7 +763,7 @@ MINICPM_ATTENTION_CLASSES = {
|
|
763 |
|
764 |
|
765 |
class MiniCPMDecoderLayer(nn.Module):
|
766 |
-
def __init__(self, config:
|
767 |
super().__init__()
|
768 |
self.hidden_size = config.hidden_size
|
769 |
self.self_attn = MINICPM_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
|
@@ -847,7 +847,7 @@ MINICPM_START_DOCSTRING = r"""
|
|
847 |
and behavior.
|
848 |
|
849 |
Parameters:
|
850 |
-
config ([`
|
851 |
Model configuration class with all the parameters of the model. Initializing with a config file does not
|
852 |
load the weights associated with the model, only the configuration. Check out the
|
853 |
[`~PreTrainedModel.from_pretrained`] method to load the model weights.
|
@@ -859,7 +859,7 @@ MINICPM_START_DOCSTRING = r"""
|
|
859 |
MINICPM_START_DOCSTRING,
|
860 |
)
|
861 |
class MiniCPMPreTrainedModel(PreTrainedModel):
|
862 |
-
config_class =
|
863 |
base_model_prefix = "model"
|
864 |
supports_gradient_checkpointing = True
|
865 |
_no_split_modules = ["MiniCPMDecoderLayer"]
|
@@ -959,10 +959,10 @@ class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
|
|
959 |
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
|
960 |
|
961 |
Args:
|
962 |
-
config:
|
963 |
"""
|
964 |
|
965 |
-
def __init__(self, config:
|
966 |
super().__init__(config)
|
967 |
self.padding_idx = config.pad_token_id
|
968 |
self.vocab_size = config.vocab_size
|
|
|
51 |
replace_return_docstrings,
|
52 |
)
|
53 |
from transformers.utils.import_utils import is_torch_fx_available
|
54 |
+
from configuration_minicpm_reranker import LayerWiseMiniCPMConfig
|
55 |
import re
|
56 |
|
57 |
try:
|
|
|
70 |
|
71 |
logger = logging.get_logger(__name__)
|
72 |
|
73 |
+
_CONFIG_FOR_DOC = "LayerWiseMiniCPMConfig"
|
74 |
|
75 |
|
76 |
def _get_unpad_data(attention_mask):
|
|
|
301 |
class MiniCPMAttention(nn.Module):
|
302 |
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
303 |
|
304 |
+
def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional[int] = None):
|
305 |
super().__init__()
|
306 |
self.config = config
|
307 |
self.layer_idx = layer_idx
|
|
|
763 |
|
764 |
|
765 |
class MiniCPMDecoderLayer(nn.Module):
|
766 |
+
def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
|
767 |
super().__init__()
|
768 |
self.hidden_size = config.hidden_size
|
769 |
self.self_attn = MINICPM_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
|
|
|
847 |
and behavior.
|
848 |
|
849 |
Parameters:
|
850 |
+
config ([`LayerWiseMiniCPMConfig`]):
|
851 |
Model configuration class with all the parameters of the model. Initializing with a config file does not
|
852 |
load the weights associated with the model, only the configuration. Check out the
|
853 |
[`~PreTrainedModel.from_pretrained`] method to load the model weights.
|
|
|
859 |
MINICPM_START_DOCSTRING,
|
860 |
)
|
861 |
class MiniCPMPreTrainedModel(PreTrainedModel):
|
862 |
+
config_class = LayerWiseMiniCPMConfig
|
863 |
base_model_prefix = "model"
|
864 |
supports_gradient_checkpointing = True
|
865 |
_no_split_modules = ["MiniCPMDecoderLayer"]
|
|
|
959 |
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
|
960 |
|
961 |
Args:
|
962 |
+
config: LayerWiseMiniCPMConfig
|
963 |
"""
|
964 |
|
965 |
+
def __init__(self, config: LayerWiseMiniCPMConfig):
|
966 |
super().__init__(config)
|
967 |
self.padding_idx = config.pad_token_id
|
968 |
self.vocab_size = config.vocab_size
|