ZhenYang21 commited on
Commit
c8e7585
·
1 Parent(s): f25f57e

Delete configuration_glm.py

Browse files
Files changed (1) hide show
  1. configuration_glm.py +0 -136
configuration_glm.py DELETED
@@ -1,136 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2022 shunxing1234 and The HuggingFace Inc. team. All rights reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """ GLM model configuration """
16
-
17
- from transformers.configuration_utils import PretrainedConfig
18
- from transformers.utils import logging
19
-
20
- logger = logging.get_logger(__name__)
21
-
22
- GLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
23
- "shunxing1234/GLM": "https://huggingface.co/shunxing1234/GLM/resolve/main/config.json",
24
- # See all GLM models at https://huggingface.co/models?filter=glm
25
- }
26
-
27
-
28
- class GLMConfig(PretrainedConfig):
29
- r"""
30
- This is the configuration class to store the configuration of a [`~GLMModel`].
31
- It is used to instantiate an GLM model according to the specified arguments, defining the model
32
- architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
33
- the GLM [shunxing1234/GLM-base-cased](https://huggingface.co/shunxing1234/GLM-base-cased) architecture.
34
-
35
- Configuration objects inherit from [`PretrainedConfig`] and can be used
36
- to control the model outputs. Read the documentation from [`PretrainedConfig`]
37
- for more information.
38
-
39
-
40
- Args:
41
- vocab_size (`int`, *optional*, defaults to 30522):
42
- Vocabulary size of the GLM model. Defines the number of different tokens that can be represented by the
43
- `inputs_ids` passed when calling [`~GLMModel`] or
44
- [`~TFGLMModel`].
45
- hidden_size (`int`, *optional*, defaults to 768):
46
- Dimension of the encoder layers and the pooler layer.
47
- num_hidden_layers (`int`, *optional*, defaults to 12):
48
- Number of hidden layers in the Transformer encoder.
49
- num_attention_heads (`int`, *optional*, defaults to 12):
50
- Number of attention heads for each attention layer in the Transformer encoder.
51
- intermediate_size (`int`, *optional*, defaults to 3072):
52
- Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
53
- hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
54
- The non-linear activation function (function or string) in the encoder and pooler.
55
- If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
56
- hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
57
- The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
58
- attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
59
- The dropout ratio for the attention probabilities.
60
- max_position_embeddings (`int`, *optional*, defaults to 512):
61
- The maximum sequence length that this model might ever be used with.
62
- Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
63
- type_vocab_size (`int`, *optional*, defaults to 2):
64
- The vocabulary size of the `token_type_ids` passed when calling [`~GLMModel`] or
65
- [`~TFGLMModel`].
66
- initializer_range (`float`, *optional*, defaults to 0.02):
67
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
68
- layer_norm_eps (`float`, *optional*, defaults to 1e-12):
69
- The epsilon used by the layer normalization layers.
70
- use_cache (`bool`, *optional*, defaults to `True`):
71
- Whether or not the model should return the last key/values attentions (not used by all models). Only
72
- relevant if `config.is_decoder=True`.
73
- Example:
74
-
75
- ```python
76
- >>> from transformers import GLMModel, GLMConfig
77
-
78
- >>> # Initializing a GLM shunxing1234/GLM-base-cased style configuration
79
- >>> configuration = GLMConfig()
80
-
81
- >>> # Initializing a model from the shunxing1234/GLM-base-cased style configuration
82
- >>> model = GLMModel(configuration)
83
-
84
- >>> # Accessing the model configuration
85
- >>> configuration = model.config
86
- ```
87
- """
88
- model_type = "glm"
89
- attribute_map = {
90
- "num_hidden_layers": "num_layers"
91
- }
92
-
93
- def __init__(
94
- self,
95
- num_layers=24,
96
- vocab_size=30592,
97
- hidden_size=1024,
98
- num_attention_heads=16,
99
- embedding_dropout_prob=0.1,
100
- attention_dropout_prob=0.1,
101
- output_dropout_prob=0.1,
102
- max_sequence_length=512,
103
- checkpoint_activations=False,
104
- checkpoint_num_layers=1,
105
- parallel_output=True,
106
- relative_encoding=False,
107
- block_position_encoding=True,
108
- output_predict=False,
109
- spell_length=None,
110
- spell_func="lstm",
111
- attention_scale=1.0,
112
- initializer_range=0.02,
113
- pool_token="cls",
114
- **kwargs
115
- ):
116
- self.num_layers = num_layers
117
- self.vocab_size = vocab_size
118
- self.hidden_size = hidden_size
119
- self.num_attention_heads = num_attention_heads
120
- self.embedding_dropout_prob = embedding_dropout_prob
121
- self.attention_dropout_prob = attention_dropout_prob
122
- self.output_dropout_prob = output_dropout_prob
123
- self.max_sequence_length = max_sequence_length
124
- self.checkpoint_activations = checkpoint_activations
125
- self.checkpoint_num_layers = checkpoint_num_layers
126
- self.parallel_output = parallel_output
127
- self.relative_encoding = relative_encoding
128
- self.block_position_encoding = block_position_encoding
129
- self.output_predict = output_predict
130
- self.spell_length = spell_length
131
- self.spell_func = spell_func
132
- self.attention_scale = attention_scale
133
- self.initializer_range = initializer_range
134
- self.pool_token = pool_token
135
-
136
- super().__init__(**kwargs)