ZhangYunchenY commited on
Commit
569f189
·
1 Parent(s): 46b9440

[Model] bert-base-uncased-stsb

Browse files
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/mnt/lustre/weixiuying/transformer/pretrain_models/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "stsb",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "LABEL_0": 0
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "regression",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.13.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ae11f0df38f5795865ba1d3997efb24b8729f301afe94832cdd5f36870272f
3
+ size 875991075
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99a426d8487002df8623f33011771fe7fb759da0271cd42673303c4c6ae047fb
3
+ size 438021779
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a7c8a4c63ab300071a77268a406ee50547f5793c538dcde70593def84c9ed8
3
+ size 14659
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b6298878d15035f3cba5611cfa3c9c3ddf0a2576fe0e93990d6695ababd1f34
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "/mnt/lustre/weixiuying/transformer/pretrain_models/bert-base-uncased", "tokenizer_class": "BertTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4471864700317383,
3
+ "best_model_checkpoint": "output_dir/finetuned_best_stsb/checkpoint-1900",
4
+ "epoch": 6.111111111111111,
5
+ "global_step": 2200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.14,
12
+ "eval_combined_score": 0.8343080271135384,
13
+ "eval_loss": 0.7862432599067688,
14
+ "eval_pearson": 0.829615903016159,
15
+ "eval_runtime": 7.9747,
16
+ "eval_samples_per_second": 188.095,
17
+ "eval_spearmanr": 0.8390001512109179,
18
+ "eval_steps_per_second": 11.787,
19
+ "step": 50
20
+ },
21
+ {
22
+ "epoch": 0.28,
23
+ "eval_combined_score": 0.8549898629109538,
24
+ "eval_loss": 0.6401779055595398,
25
+ "eval_pearson": 0.8531715498817952,
26
+ "eval_runtime": 5.6147,
27
+ "eval_samples_per_second": 267.156,
28
+ "eval_spearmanr": 0.8568081759401125,
29
+ "eval_steps_per_second": 16.742,
30
+ "step": 100
31
+ },
32
+ {
33
+ "epoch": 0.42,
34
+ "eval_combined_score": 0.8443191275483415,
35
+ "eval_loss": 0.6554257273674011,
36
+ "eval_pearson": 0.8474110080447523,
37
+ "eval_runtime": 5.6204,
38
+ "eval_samples_per_second": 266.887,
39
+ "eval_spearmanr": 0.8412272470519305,
40
+ "eval_steps_per_second": 16.725,
41
+ "step": 150
42
+ },
43
+ {
44
+ "epoch": 0.56,
45
+ "eval_combined_score": 0.8673615699410524,
46
+ "eval_loss": 0.5701442956924438,
47
+ "eval_pearson": 0.8692556554827053,
48
+ "eval_runtime": 5.9834,
49
+ "eval_samples_per_second": 250.694,
50
+ "eval_spearmanr": 0.8654674843993996,
51
+ "eval_steps_per_second": 15.71,
52
+ "step": 200
53
+ },
54
+ {
55
+ "epoch": 0.69,
56
+ "eval_combined_score": 0.8721712658068579,
57
+ "eval_loss": 0.5915741920471191,
58
+ "eval_pearson": 0.8722910960572966,
59
+ "eval_runtime": 5.5689,
60
+ "eval_samples_per_second": 269.355,
61
+ "eval_spearmanr": 0.8720514355564192,
62
+ "eval_steps_per_second": 16.88,
63
+ "step": 250
64
+ },
65
+ {
66
+ "epoch": 0.83,
67
+ "eval_combined_score": 0.8812625681870452,
68
+ "eval_loss": 0.5046865344047546,
69
+ "eval_pearson": 0.881670075532939,
70
+ "eval_runtime": 6.0146,
71
+ "eval_samples_per_second": 249.392,
72
+ "eval_spearmanr": 0.8808550608411514,
73
+ "eval_steps_per_second": 15.629,
74
+ "step": 300
75
+ },
76
+ {
77
+ "epoch": 0.97,
78
+ "eval_combined_score": 0.878795558425897,
79
+ "eval_loss": 0.5713155269622803,
80
+ "eval_pearson": 0.8736557247061164,
81
+ "eval_runtime": 6.0781,
82
+ "eval_samples_per_second": 246.786,
83
+ "eval_spearmanr": 0.8839353921456775,
84
+ "eval_steps_per_second": 15.465,
85
+ "step": 350
86
+ },
87
+ {
88
+ "epoch": 1.11,
89
+ "eval_combined_score": 0.8778669265351486,
90
+ "eval_loss": 0.5400073528289795,
91
+ "eval_pearson": 0.8804620985683852,
92
+ "eval_runtime": 5.5918,
93
+ "eval_samples_per_second": 268.248,
94
+ "eval_spearmanr": 0.875271754501912,
95
+ "eval_steps_per_second": 16.81,
96
+ "step": 400
97
+ },
98
+ {
99
+ "epoch": 1.25,
100
+ "eval_combined_score": 0.8833550106755617,
101
+ "eval_loss": 0.49523210525512695,
102
+ "eval_pearson": 0.8858630637184036,
103
+ "eval_runtime": 5.7638,
104
+ "eval_samples_per_second": 260.246,
105
+ "eval_spearmanr": 0.8808469576327196,
106
+ "eval_steps_per_second": 16.309,
107
+ "step": 450
108
+ },
109
+ {
110
+ "epoch": 1.39,
111
+ "learning_rate": 4.1319444444444445e-05,
112
+ "loss": 0.7828,
113
+ "step": 500
114
+ },
115
+ {
116
+ "epoch": 1.39,
117
+ "eval_combined_score": 0.8827148836128291,
118
+ "eval_loss": 0.5279519557952881,
119
+ "eval_pearson": 0.8845811601256096,
120
+ "eval_runtime": 5.9061,
121
+ "eval_samples_per_second": 253.975,
122
+ "eval_spearmanr": 0.8808486071000486,
123
+ "eval_steps_per_second": 15.916,
124
+ "step": 500
125
+ },
126
+ {
127
+ "epoch": 1.53,
128
+ "eval_combined_score": 0.8801981392571121,
129
+ "eval_loss": 0.5072206258773804,
130
+ "eval_pearson": 0.8810815176670656,
131
+ "eval_runtime": 5.5676,
132
+ "eval_samples_per_second": 269.418,
133
+ "eval_spearmanr": 0.8793147608471588,
134
+ "eval_steps_per_second": 16.884,
135
+ "step": 550
136
+ },
137
+ {
138
+ "epoch": 1.67,
139
+ "eval_combined_score": 0.8851362735229233,
140
+ "eval_loss": 0.49581071734428406,
141
+ "eval_pearson": 0.8875092909393476,
142
+ "eval_runtime": 5.6319,
143
+ "eval_samples_per_second": 266.34,
144
+ "eval_spearmanr": 0.8827632561064991,
145
+ "eval_steps_per_second": 16.691,
146
+ "step": 600
147
+ },
148
+ {
149
+ "epoch": 1.81,
150
+ "eval_combined_score": 0.8852476609333064,
151
+ "eval_loss": 0.4841392934322357,
152
+ "eval_pearson": 0.8870465421559632,
153
+ "eval_runtime": 5.8286,
154
+ "eval_samples_per_second": 257.351,
155
+ "eval_spearmanr": 0.8834487797106495,
156
+ "eval_steps_per_second": 16.127,
157
+ "step": 650
158
+ },
159
+ {
160
+ "epoch": 1.94,
161
+ "eval_combined_score": 0.8837498972174722,
162
+ "eval_loss": 0.5464913845062256,
163
+ "eval_pearson": 0.8849881470427847,
164
+ "eval_runtime": 5.5884,
165
+ "eval_samples_per_second": 268.412,
166
+ "eval_spearmanr": 0.8825116473921596,
167
+ "eval_steps_per_second": 16.82,
168
+ "step": 700
169
+ },
170
+ {
171
+ "epoch": 2.08,
172
+ "eval_combined_score": 0.8858322086355523,
173
+ "eval_loss": 0.48856478929519653,
174
+ "eval_pearson": 0.8881076481621831,
175
+ "eval_runtime": 5.6259,
176
+ "eval_samples_per_second": 266.624,
177
+ "eval_spearmanr": 0.8835567691089214,
178
+ "eval_steps_per_second": 16.708,
179
+ "step": 750
180
+ },
181
+ {
182
+ "epoch": 2.22,
183
+ "eval_combined_score": 0.8865111243309827,
184
+ "eval_loss": 0.4807361662387848,
185
+ "eval_pearson": 0.8892892267625273,
186
+ "eval_runtime": 5.825,
187
+ "eval_samples_per_second": 257.512,
188
+ "eval_spearmanr": 0.8837330218994379,
189
+ "eval_steps_per_second": 16.137,
190
+ "step": 800
191
+ },
192
+ {
193
+ "epoch": 2.36,
194
+ "eval_combined_score": 0.8881887854009817,
195
+ "eval_loss": 0.48988330364227295,
196
+ "eval_pearson": 0.8898428387864722,
197
+ "eval_runtime": 6.1177,
198
+ "eval_samples_per_second": 245.192,
199
+ "eval_spearmanr": 0.8865347320154912,
200
+ "eval_steps_per_second": 15.365,
201
+ "step": 850
202
+ },
203
+ {
204
+ "epoch": 2.5,
205
+ "eval_combined_score": 0.8868479524643782,
206
+ "eval_loss": 0.4934450387954712,
207
+ "eval_pearson": 0.8891167810226788,
208
+ "eval_runtime": 5.6331,
209
+ "eval_samples_per_second": 266.284,
210
+ "eval_spearmanr": 0.8845791239060775,
211
+ "eval_steps_per_second": 16.687,
212
+ "step": 900
213
+ },
214
+ {
215
+ "epoch": 2.64,
216
+ "eval_combined_score": 0.8889778694122226,
217
+ "eval_loss": 0.4866905212402344,
218
+ "eval_pearson": 0.8890192489258524,
219
+ "eval_runtime": 5.7954,
220
+ "eval_samples_per_second": 258.824,
221
+ "eval_spearmanr": 0.8889364898985929,
222
+ "eval_steps_per_second": 16.22,
223
+ "step": 950
224
+ },
225
+ {
226
+ "epoch": 2.78,
227
+ "learning_rate": 3.263888888888889e-05,
228
+ "loss": 0.2778,
229
+ "step": 1000
230
+ },
231
+ {
232
+ "epoch": 2.78,
233
+ "eval_combined_score": 0.88330796900741,
234
+ "eval_loss": 0.49821072816848755,
235
+ "eval_pearson": 0.8849717556333476,
236
+ "eval_runtime": 6.4097,
237
+ "eval_samples_per_second": 234.021,
238
+ "eval_spearmanr": 0.8816441823814725,
239
+ "eval_steps_per_second": 14.665,
240
+ "step": 1000
241
+ },
242
+ {
243
+ "epoch": 2.92,
244
+ "eval_combined_score": 0.8847183157645039,
245
+ "eval_loss": 0.5095486044883728,
246
+ "eval_pearson": 0.8871448241460179,
247
+ "eval_runtime": 5.596,
248
+ "eval_samples_per_second": 268.047,
249
+ "eval_spearmanr": 0.8822918073829898,
250
+ "eval_steps_per_second": 16.798,
251
+ "step": 1050
252
+ },
253
+ {
254
+ "epoch": 3.06,
255
+ "eval_combined_score": 0.8874272464175201,
256
+ "eval_loss": 0.4730277955532074,
257
+ "eval_pearson": 0.8895831976333978,
258
+ "eval_runtime": 5.6819,
259
+ "eval_samples_per_second": 263.994,
260
+ "eval_spearmanr": 0.8852712952016424,
261
+ "eval_steps_per_second": 16.544,
262
+ "step": 1100
263
+ },
264
+ {
265
+ "epoch": 3.19,
266
+ "eval_combined_score": 0.886644981813258,
267
+ "eval_loss": 0.4820166826248169,
268
+ "eval_pearson": 0.8884315334781125,
269
+ "eval_runtime": 6.0521,
270
+ "eval_samples_per_second": 247.847,
271
+ "eval_spearmanr": 0.8848584301484034,
272
+ "eval_steps_per_second": 15.532,
273
+ "step": 1150
274
+ },
275
+ {
276
+ "epoch": 3.33,
277
+ "eval_combined_score": 0.8894357915147134,
278
+ "eval_loss": 0.47540178894996643,
279
+ "eval_pearson": 0.8913766457258069,
280
+ "eval_runtime": 5.6258,
281
+ "eval_samples_per_second": 266.631,
282
+ "eval_spearmanr": 0.88749493730362,
283
+ "eval_steps_per_second": 16.709,
284
+ "step": 1200
285
+ },
286
+ {
287
+ "epoch": 3.47,
288
+ "eval_combined_score": 0.8882337266236713,
289
+ "eval_loss": 0.47845378518104553,
290
+ "eval_pearson": 0.8898157282686552,
291
+ "eval_runtime": 5.9023,
292
+ "eval_samples_per_second": 254.137,
293
+ "eval_spearmanr": 0.8866517249786874,
294
+ "eval_steps_per_second": 15.926,
295
+ "step": 1250
296
+ },
297
+ {
298
+ "epoch": 3.61,
299
+ "eval_combined_score": 0.88903651674165,
300
+ "eval_loss": 0.4937942624092102,
301
+ "eval_pearson": 0.8910047139114365,
302
+ "eval_runtime": 6.1737,
303
+ "eval_samples_per_second": 242.964,
304
+ "eval_spearmanr": 0.8870683195718634,
305
+ "eval_steps_per_second": 15.226,
306
+ "step": 1300
307
+ },
308
+ {
309
+ "epoch": 3.75,
310
+ "eval_combined_score": 0.8901558681204902,
311
+ "eval_loss": 0.46190693974494934,
312
+ "eval_pearson": 0.8927358825114844,
313
+ "eval_runtime": 5.7562,
314
+ "eval_samples_per_second": 260.588,
315
+ "eval_spearmanr": 0.887575853729496,
316
+ "eval_steps_per_second": 16.33,
317
+ "step": 1350
318
+ },
319
+ {
320
+ "epoch": 3.89,
321
+ "eval_combined_score": 0.8889376492175258,
322
+ "eval_loss": 0.49563419818878174,
323
+ "eval_pearson": 0.8915857107166335,
324
+ "eval_runtime": 5.5793,
325
+ "eval_samples_per_second": 268.853,
326
+ "eval_spearmanr": 0.8862895877184183,
327
+ "eval_steps_per_second": 16.848,
328
+ "step": 1400
329
+ },
330
+ {
331
+ "epoch": 4.03,
332
+ "eval_combined_score": 0.8911112435797486,
333
+ "eval_loss": 0.4565975069999695,
334
+ "eval_pearson": 0.8938483143744439,
335
+ "eval_runtime": 6.0592,
336
+ "eval_samples_per_second": 247.559,
337
+ "eval_spearmanr": 0.8883741727850532,
338
+ "eval_steps_per_second": 15.514,
339
+ "step": 1450
340
+ },
341
+ {
342
+ "epoch": 4.17,
343
+ "learning_rate": 2.3958333333333334e-05,
344
+ "loss": 0.141,
345
+ "step": 1500
346
+ },
347
+ {
348
+ "epoch": 4.17,
349
+ "eval_combined_score": 0.890231579373417,
350
+ "eval_loss": 0.4669683873653412,
351
+ "eval_pearson": 0.8928537444959865,
352
+ "eval_runtime": 6.214,
353
+ "eval_samples_per_second": 241.389,
354
+ "eval_spearmanr": 0.8876094142508474,
355
+ "eval_steps_per_second": 15.127,
356
+ "step": 1500
357
+ },
358
+ {
359
+ "epoch": 4.31,
360
+ "eval_combined_score": 0.8917620927758609,
361
+ "eval_loss": 0.4545550048351288,
362
+ "eval_pearson": 0.8941284924896612,
363
+ "eval_runtime": 5.6167,
364
+ "eval_samples_per_second": 267.062,
365
+ "eval_spearmanr": 0.8893956930620608,
366
+ "eval_steps_per_second": 16.736,
367
+ "step": 1550
368
+ },
369
+ {
370
+ "epoch": 4.44,
371
+ "eval_combined_score": 0.8919076890360074,
372
+ "eval_loss": 0.46239447593688965,
373
+ "eval_pearson": 0.8939787505423513,
374
+ "eval_runtime": 6.1392,
375
+ "eval_samples_per_second": 244.33,
376
+ "eval_spearmanr": 0.8898366275296635,
377
+ "eval_steps_per_second": 15.311,
378
+ "step": 1600
379
+ },
380
+ {
381
+ "epoch": 4.58,
382
+ "eval_combined_score": 0.8905705624659631,
383
+ "eval_loss": 0.4560569226741791,
384
+ "eval_pearson": 0.8930309291971393,
385
+ "eval_runtime": 5.7715,
386
+ "eval_samples_per_second": 259.896,
387
+ "eval_spearmanr": 0.8881101957347869,
388
+ "eval_steps_per_second": 16.287,
389
+ "step": 1650
390
+ },
391
+ {
392
+ "epoch": 4.72,
393
+ "eval_combined_score": 0.89164078789547,
394
+ "eval_loss": 0.47078728675842285,
395
+ "eval_pearson": 0.8931105725859777,
396
+ "eval_runtime": 5.5689,
397
+ "eval_samples_per_second": 269.352,
398
+ "eval_spearmanr": 0.8901710032049623,
399
+ "eval_steps_per_second": 16.879,
400
+ "step": 1700
401
+ },
402
+ {
403
+ "epoch": 4.86,
404
+ "eval_combined_score": 0.8922518284784362,
405
+ "eval_loss": 0.5126345157623291,
406
+ "eval_pearson": 0.8948084725851928,
407
+ "eval_runtime": 5.5913,
408
+ "eval_samples_per_second": 268.275,
409
+ "eval_spearmanr": 0.8896951843716796,
410
+ "eval_steps_per_second": 16.812,
411
+ "step": 1750
412
+ },
413
+ {
414
+ "epoch": 5.0,
415
+ "eval_combined_score": 0.8936765208721354,
416
+ "eval_loss": 0.4546430706977844,
417
+ "eval_pearson": 0.8960807975667862,
418
+ "eval_runtime": 5.6821,
419
+ "eval_samples_per_second": 263.988,
420
+ "eval_spearmanr": 0.8912722441774847,
421
+ "eval_steps_per_second": 16.543,
422
+ "step": 1800
423
+ },
424
+ {
425
+ "epoch": 5.14,
426
+ "eval_combined_score": 0.8930431248747293,
427
+ "eval_loss": 0.44920727610588074,
428
+ "eval_pearson": 0.8947890019960217,
429
+ "eval_runtime": 5.9173,
430
+ "eval_samples_per_second": 253.493,
431
+ "eval_spearmanr": 0.8912972477534367,
432
+ "eval_steps_per_second": 15.886,
433
+ "step": 1850
434
+ },
435
+ {
436
+ "epoch": 5.28,
437
+ "eval_combined_score": 0.894053370656966,
438
+ "eval_loss": 0.4471864700317383,
439
+ "eval_pearson": 0.8965503528124947,
440
+ "eval_runtime": 5.6396,
441
+ "eval_samples_per_second": 265.978,
442
+ "eval_spearmanr": 0.8915563885014371,
443
+ "eval_steps_per_second": 16.668,
444
+ "step": 1900
445
+ },
446
+ {
447
+ "epoch": 5.42,
448
+ "eval_combined_score": 0.8948116388641144,
449
+ "eval_loss": 0.470526784658432,
450
+ "eval_pearson": 0.8971476181305765,
451
+ "eval_runtime": 5.7724,
452
+ "eval_samples_per_second": 259.855,
453
+ "eval_spearmanr": 0.8924756595976523,
454
+ "eval_steps_per_second": 16.284,
455
+ "step": 1950
456
+ },
457
+ {
458
+ "epoch": 5.56,
459
+ "learning_rate": 1.527777777777778e-05,
460
+ "loss": 0.0844,
461
+ "step": 2000
462
+ },
463
+ {
464
+ "epoch": 5.56,
465
+ "eval_combined_score": 0.8922658461057144,
466
+ "eval_loss": 0.46638524532318115,
467
+ "eval_pearson": 0.8939377420957451,
468
+ "eval_runtime": 5.7907,
469
+ "eval_samples_per_second": 259.036,
470
+ "eval_spearmanr": 0.8905939501156837,
471
+ "eval_steps_per_second": 16.233,
472
+ "step": 2000
473
+ },
474
+ {
475
+ "epoch": 5.69,
476
+ "eval_combined_score": 0.8915829503407375,
477
+ "eval_loss": 0.49491751194000244,
478
+ "eval_pearson": 0.8941690530900368,
479
+ "eval_runtime": 5.6201,
480
+ "eval_samples_per_second": 266.9,
481
+ "eval_spearmanr": 0.8889968475914382,
482
+ "eval_steps_per_second": 16.726,
483
+ "step": 2050
484
+ },
485
+ {
486
+ "epoch": 5.83,
487
+ "eval_combined_score": 0.8940547207791253,
488
+ "eval_loss": 0.451761394739151,
489
+ "eval_pearson": 0.8965362985968673,
490
+ "eval_runtime": 6.0011,
491
+ "eval_samples_per_second": 249.956,
492
+ "eval_spearmanr": 0.8915731429613832,
493
+ "eval_steps_per_second": 15.664,
494
+ "step": 2100
495
+ },
496
+ {
497
+ "epoch": 5.97,
498
+ "eval_combined_score": 0.8923553049941013,
499
+ "eval_loss": 0.45326724648475647,
500
+ "eval_pearson": 0.8945634893619782,
501
+ "eval_runtime": 5.6075,
502
+ "eval_samples_per_second": 267.501,
503
+ "eval_spearmanr": 0.8901471206262244,
504
+ "eval_steps_per_second": 16.763,
505
+ "step": 2150
506
+ },
507
+ {
508
+ "epoch": 6.11,
509
+ "eval_combined_score": 0.8949367985553012,
510
+ "eval_loss": 0.44921526312828064,
511
+ "eval_pearson": 0.8970393518565526,
512
+ "eval_runtime": 5.5418,
513
+ "eval_samples_per_second": 270.668,
514
+ "eval_spearmanr": 0.8928342452540498,
515
+ "eval_steps_per_second": 16.962,
516
+ "step": 2200
517
+ }
518
+ ],
519
+ "max_steps": 2880,
520
+ "num_train_epochs": 8,
521
+ "total_flos": 2311015204895232.0,
522
+ "trial_name": null,
523
+ "trial_params": null
524
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ef45588b98ee4ba395f9b88a031be7a5cdda64e25e3b6bf85dd70623d9274f
3
+ size 2927
vocab.txt ADDED
The diff for this file is too large to render. See raw diff