MikkelWK commited on
Commit
12305cb
·
verified ·
1 Parent(s): abd9808

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ step_*
2
+ epoch_*
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huawei-noah/TinyBERT_General_4L_312D",
3
+ "architectures": [
4
+ "BertForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "cell": {},
8
+ "classifier_dropout": null,
9
+ "emb_size": 312,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 312,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 1200,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 4,
20
+ "pad_token_id": 0,
21
+ "position_embedding_type": "absolute",
22
+ "pre_trained": "",
23
+ "structure": [],
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.44.2",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 30522
29
+ }
generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "pad_token_id": 0,
4
+ "transformers_version": "4.44.2"
5
+ }
log.json ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_perpl": [
3
+ 31449.7657081341,
4
+ 6135.761638226119,
5
+ 2275.301693637439,
6
+ 2100.5636547496974,
7
+ 1763.9246096238794,
8
+ 912.4593282895613,
9
+ 673.2478721800974,
10
+ 426.95282488572326,
11
+ 292.066480449549,
12
+ 229.60749931792827,
13
+ 178.48175607976276,
14
+ 151.7738098243359,
15
+ 132.68325221447097,
16
+ 131.60733192012995,
17
+ 121.35699482687662,
18
+ 113.03692537619067,
19
+ 314.2693244669915,
20
+ 319.62471471397265,
21
+ 357.5297510309972,
22
+ 373.37600350886794,
23
+ 427.6761706922814,
24
+ 418.7241591849811,
25
+ 88.4342864632641,
26
+ 81.68638521632647,
27
+ 71.71099212003867,
28
+ 69.46567916489799,
29
+ 84.99562405056126,
30
+ 97.03116680777474,
31
+ 99.61508321269098,
32
+ 99.53868474540631,
33
+ 96.69570308133885,
34
+ 97.3618052463106,
35
+ 97.37972722605315,
36
+ 95.8043254339647,
37
+ 94.59195856098238,
38
+ 100.33980437996713,
39
+ 95.41348395061875,
40
+ 94.98106528056366,
41
+ 91.7840253560043,
42
+ 92.34501897421714,
43
+ 90.46576611879469,
44
+ 91.17134170708903,
45
+ 89.93299763326101,
46
+ 87.14281899601743,
47
+ 88.47460898187907,
48
+ 88.24948549868428,
49
+ 84.98678917210486,
50
+ 87.07220763140273,
51
+ 63.93756172611777,
52
+ 60.228661637513234,
53
+ 59.19890548462696,
54
+ 57.310445090319284,
55
+ 55.892054997349064,
56
+ 58.32000354396741,
57
+ 56.82671999573329,
58
+ 59.14614220744896,
59
+ 59.17415449564529,
60
+ 59.723865239121785,
61
+ 56.183552716071574,
62
+ 42.941688373342394,
63
+ 43.17332328067362,
64
+ 42.02991489794291,
65
+ 41.175966066255576,
66
+ 42.14810471722296,
67
+ 41.02890386392398,
68
+ 40.96517442480964,
69
+ 39.17880381005861,
70
+ 39.25041154205274
71
+ ],
72
+ "eval_loss": [
73
+ 10.356146812438965,
74
+ 8.72188949584961,
75
+ 7.729867935180664,
76
+ 7.649960994720459,
77
+ 7.475296497344971,
78
+ 6.81614351272583,
79
+ 6.512113571166992,
80
+ 6.056673526763916,
81
+ 5.676981449127197,
82
+ 5.436371326446533,
83
+ 5.184486389160156,
84
+ 5.022391319274902,
85
+ 4.887964725494385,
86
+ 4.879822731018066,
87
+ 4.798736572265625,
88
+ 4.727714538574219,
89
+ 5.750250339508057,
90
+ 5.767147541046143,
91
+ 5.879218578338623,
92
+ 5.922585964202881,
93
+ 6.058366298675537,
94
+ 6.037212371826172,
95
+ 4.482259750366211,
96
+ 4.402887344360352,
97
+ 4.27264404296875,
98
+ 4.240832805633545,
99
+ 4.442599773406982,
100
+ 4.5750322341918945,
101
+ 4.601313591003418,
102
+ 4.600546360015869,
103
+ 4.571568965911865,
104
+ 4.578433990478516,
105
+ 4.578618049621582,
106
+ 4.562307834625244,
107
+ 4.549572467803955,
108
+ 4.608562469482422,
109
+ 4.558219909667969,
110
+ 4.553677558898926,
111
+ 4.51943826675415,
112
+ 4.525531768798828,
113
+ 4.504971504211426,
114
+ 4.512740612030029,
115
+ 4.499064922332764,
116
+ 4.467548370361328,
117
+ 4.482715606689453,
118
+ 4.480167865753174,
119
+ 4.442495822906494,
120
+ 4.466737747192383,
121
+ 4.157907009124756,
122
+ 4.098148345947266,
123
+ 4.080903053283691,
124
+ 4.048482894897461,
125
+ 4.0234222412109375,
126
+ 4.065945148468018,
127
+ 4.040006637573242,
128
+ 4.080011367797852,
129
+ 4.080484867095947,
130
+ 4.089731693267822,
131
+ 4.028624057769775,
132
+ 3.759843111038208,
133
+ 3.7652227878570557,
134
+ 3.7383816242218018,
135
+ 3.7178547382354736,
136
+ 3.74118971824646,
137
+ 3.7142767906188965,
138
+ 3.7127223014831543,
139
+ 3.66813588142395,
140
+ 3.669961929321289
141
+ ],
142
+ "train_loss": [
143
+ null,
144
+ null,
145
+ null,
146
+ null,
147
+ null,
148
+ null,
149
+ null,
150
+ null,
151
+ null,
152
+ null,
153
+ null,
154
+ null,
155
+ null,
156
+ null,
157
+ null,
158
+ null,
159
+ null,
160
+ null,
161
+ null,
162
+ null,
163
+ null,
164
+ null,
165
+ null,
166
+ null,
167
+ null,
168
+ null,
169
+ null,
170
+ null,
171
+ null,
172
+ null,
173
+ null,
174
+ null,
175
+ null,
176
+ null,
177
+ null,
178
+ null,
179
+ null,
180
+ null,
181
+ null,
182
+ null,
183
+ null,
184
+ null,
185
+ null,
186
+ null,
187
+ null,
188
+ null,
189
+ null,
190
+ null,
191
+ null,
192
+ null,
193
+ null,
194
+ null,
195
+ null,
196
+ null,
197
+ null,
198
+ null,
199
+ null,
200
+ null,
201
+ null,
202
+ null,
203
+ null,
204
+ null,
205
+ null,
206
+ null,
207
+ null,
208
+ null,
209
+ null,
210
+ null
211
+ ],
212
+ "completed_steps": [
213
+ 0,
214
+ 100,
215
+ 200,
216
+ 300,
217
+ 400,
218
+ 500,
219
+ 600,
220
+ 700,
221
+ 800,
222
+ 900,
223
+ 1000,
224
+ 1100,
225
+ 1200,
226
+ 1300,
227
+ 1400,
228
+ 1500,
229
+ 1600,
230
+ 1700,
231
+ 1800,
232
+ 1900,
233
+ 2000,
234
+ 2100,
235
+ 2200,
236
+ 2300,
237
+ 2400,
238
+ 2500,
239
+ 2600,
240
+ 2700,
241
+ 2800,
242
+ 2900,
243
+ 3000,
244
+ 3100,
245
+ 3200,
246
+ 3300,
247
+ 3400,
248
+ 3500,
249
+ 3600,
250
+ 3700,
251
+ 3800,
252
+ 3900,
253
+ 4000,
254
+ 4100,
255
+ 4200,
256
+ 4300,
257
+ 4400,
258
+ 4500,
259
+ 4600,
260
+ 4700,
261
+ 4800,
262
+ 4900,
263
+ 5000,
264
+ 5100,
265
+ 5200,
266
+ 5300,
267
+ 5400,
268
+ 5500,
269
+ 5600,
270
+ 5700,
271
+ 5800,
272
+ 5900,
273
+ 6000,
274
+ 6100,
275
+ 6200,
276
+ 6300,
277
+ 6400,
278
+ 6500,
279
+ 6600,
280
+ 6700
281
+ ]
282
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4647aea8665a92bfe22555c96b68c47c0b94aba8dcb84ddabc2ccb81709ee3fa
3
+ size 57534056
special_tokens_map.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "mask_token": "[MASK]",
3
+ "pad_token": "[PAD]"
4
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "mask_token": "[MASK]",
46
+ "model_max_length": 1000000000000000019884624838656,
47
+ "pad_token": "[PAD]",
48
+ "tokenizer_class": "PreTrainedTokenizerFast"
49
+ }