joelniklaus commited on
Commit
1a1a837
1 Parent(s): d5dc244

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "data/plms/legal-maltese-roberta-base",
3
- "architectures": [
4
- "RobertaForMaskedLM"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 1,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "roberta",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "pad_token_id": 0,
21
- "position_embedding_type": "absolute",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.20.1",
24
- "type_vocab_size": 1,
25
- "use_cache": true,
26
- "vocab_size": 32000
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7943214636961d5475251044b7197b0b45ed9bccd5d4ec66749d6527d3fd9c49
3
- size 885325017
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cfb7e109c350a5e049ae110b4544e7bc349cf3e8e56c5ef38281f837a7164ae
3
- size 442675755
 
 
 
 
last-checkpoint/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_4.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_5.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_6.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_7.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330f61e4578349a46401e4fdccb97c82369ec86c2b5a06002a5d69389ac5937
3
- size 13611
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:34c7791bbb10178054552ff1c1aa9bf08a101a8199906b7bf72dd42f5c977109
3
- size 623
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": "<mask>",
6
- "pad_token": "<pad>",
7
- "sep_token": "</s>",
8
- "unk_token": "<unk>"
9
- }
 
 
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": "<mask>",
6
- "model_max_length": 512,
7
- "name_or_path": "data/plms/legal-maltese-roberta-base",
8
- "pad_token": "<pad>",
9
- "sep_token": "</s>",
10
- "special_tokens_map_file": "data/plms/legal-maltese-roberta-base_32k/special_tokens_map.json",
11
- "tokenizer_class": "PreTrainedTokenizerFast",
12
- "unk_token": "<unk>"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,1248 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 94.0036,
5
- "global_step": 200000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 1e-05,
13
- "loss": 6.5534,
14
- "step": 1000
15
- },
16
- {
17
- "epoch": 0.01,
18
- "learning_rate": 2e-05,
19
- "loss": 4.1516,
20
- "step": 2000
21
- },
22
- {
23
- "epoch": 1.0,
24
- "learning_rate": 3e-05,
25
- "loss": 3.4614,
26
- "step": 3000
27
- },
28
- {
29
- "epoch": 1.01,
30
- "learning_rate": 4e-05,
31
- "loss": 3.1907,
32
- "step": 4000
33
- },
34
- {
35
- "epoch": 2.0,
36
- "learning_rate": 5e-05,
37
- "loss": 2.6515,
38
- "step": 5000
39
- },
40
- {
41
- "epoch": 2.01,
42
- "learning_rate": 6e-05,
43
- "loss": 1.7738,
44
- "step": 6000
45
- },
46
- {
47
- "epoch": 3.0,
48
- "learning_rate": 7e-05,
49
- "loss": 1.391,
50
- "step": 7000
51
- },
52
- {
53
- "epoch": 3.01,
54
- "learning_rate": 8e-05,
55
- "loss": 1.2048,
56
- "step": 8000
57
- },
58
- {
59
- "epoch": 4.0,
60
- "learning_rate": 9e-05,
61
- "loss": 1.1721,
62
- "step": 9000
63
- },
64
- {
65
- "epoch": 4.01,
66
- "learning_rate": 0.0001,
67
- "loss": 1.0419,
68
- "step": 10000
69
- },
70
- {
71
- "epoch": 5.0,
72
- "learning_rate": 9.999316524962345e-05,
73
- "loss": 1.0721,
74
- "step": 11000
75
- },
76
- {
77
- "epoch": 5.01,
78
- "learning_rate": 9.997266286704631e-05,
79
- "loss": 0.9575,
80
- "step": 12000
81
- },
82
- {
83
- "epoch": 6.0,
84
- "learning_rate": 9.993849845741524e-05,
85
- "loss": 1.0098,
86
- "step": 13000
87
- },
88
- {
89
- "epoch": 6.01,
90
- "learning_rate": 9.989068136093873e-05,
91
- "loss": 0.9054,
92
- "step": 14000
93
- },
94
- {
95
- "epoch": 7.0,
96
- "learning_rate": 9.98292246503335e-05,
97
- "loss": 0.9631,
98
- "step": 15000
99
- },
100
- {
101
- "epoch": 7.01,
102
- "learning_rate": 9.975414512725057e-05,
103
- "loss": 0.8696,
104
- "step": 16000
105
- },
106
- {
107
- "epoch": 8.0,
108
- "learning_rate": 9.966546331768191e-05,
109
- "loss": 0.9371,
110
- "step": 17000
111
- },
112
- {
113
- "epoch": 8.01,
114
- "learning_rate": 9.956320346634876e-05,
115
- "loss": 0.8348,
116
- "step": 18000
117
- },
118
- {
119
- "epoch": 8.01,
120
- "learning_rate": 9.944739353007344e-05,
121
- "loss": 0.9098,
122
- "step": 19000
123
- },
124
- {
125
- "epoch": 9.0,
126
- "learning_rate": 9.931806517013612e-05,
127
- "loss": 0.8175,
128
- "step": 20000
129
- },
130
- {
131
- "epoch": 9.01,
132
- "learning_rate": 9.917525374361912e-05,
133
- "loss": 0.8767,
134
- "step": 21000
135
- },
136
- {
137
- "epoch": 10.0,
138
- "learning_rate": 9.901899829374047e-05,
139
- "loss": 0.8122,
140
- "step": 22000
141
- },
142
- {
143
- "epoch": 10.01,
144
- "learning_rate": 9.884934153917997e-05,
145
- "loss": 0.8499,
146
- "step": 23000
147
- },
148
- {
149
- "epoch": 11.0,
150
- "learning_rate": 9.86663298624003e-05,
151
- "loss": 0.8202,
152
- "step": 24000
153
- },
154
- {
155
- "epoch": 11.01,
156
- "learning_rate": 9.847001329696653e-05,
157
- "loss": 0.8144,
158
- "step": 25000
159
- },
160
- {
161
- "epoch": 12.0,
162
- "learning_rate": 9.826044551386744e-05,
163
- "loss": 0.8235,
164
- "step": 26000
165
- },
166
- {
167
- "epoch": 12.01,
168
- "learning_rate": 9.803768380684242e-05,
169
- "loss": 0.7798,
170
- "step": 27000
171
- },
172
- {
173
- "epoch": 13.0,
174
- "learning_rate": 9.780178907671789e-05,
175
- "loss": 0.8221,
176
- "step": 28000
177
- },
178
- {
179
- "epoch": 13.01,
180
- "learning_rate": 9.755282581475769e-05,
181
- "loss": 0.7611,
182
- "step": 29000
183
- },
184
- {
185
- "epoch": 14.0,
186
- "learning_rate": 9.729086208503174e-05,
187
- "loss": 0.8189,
188
- "step": 30000
189
- },
190
- {
191
- "epoch": 14.01,
192
- "learning_rate": 9.701596950580806e-05,
193
- "loss": 0.7435,
194
- "step": 31000
195
- },
196
- {
197
- "epoch": 15.0,
198
- "learning_rate": 9.672822322997305e-05,
199
- "loss": 0.8112,
200
- "step": 32000
201
- },
202
- {
203
- "epoch": 15.01,
204
- "learning_rate": 9.642770192448536e-05,
205
- "loss": 0.736,
206
- "step": 33000
207
- },
208
- {
209
- "epoch": 16.0,
210
- "learning_rate": 9.611448774886924e-05,
211
- "loss": 0.8037,
212
- "step": 34000
213
- },
214
- {
215
- "epoch": 16.01,
216
- "learning_rate": 9.578866633275288e-05,
217
- "loss": 0.7246,
218
- "step": 35000
219
- },
220
- {
221
- "epoch": 16.01,
222
- "learning_rate": 9.545032675245813e-05,
223
- "loss": 0.8023,
224
- "step": 36000
225
- },
226
- {
227
- "epoch": 17.0,
228
- "learning_rate": 9.509956150664796e-05,
229
- "loss": 0.7149,
230
- "step": 37000
231
- },
232
- {
233
- "epoch": 17.01,
234
- "learning_rate": 9.473646649103818e-05,
235
- "loss": 0.7778,
236
- "step": 38000
237
- },
238
- {
239
- "epoch": 18.0,
240
- "learning_rate": 9.43611409721806e-05,
241
- "loss": 0.7235,
242
- "step": 39000
243
- },
244
- {
245
- "epoch": 18.01,
246
- "learning_rate": 9.397368756032445e-05,
247
- "loss": 0.7667,
248
- "step": 40000
249
- },
250
- {
251
- "epoch": 19.0,
252
- "learning_rate": 9.357421218136386e-05,
253
- "loss": 0.7283,
254
- "step": 41000
255
- },
256
- {
257
- "epoch": 19.01,
258
- "learning_rate": 9.316282404787871e-05,
259
- "loss": 0.7437,
260
- "step": 42000
261
- },
262
- {
263
- "epoch": 20.0,
264
- "learning_rate": 9.273963562927695e-05,
265
- "loss": 0.7436,
266
- "step": 43000
267
- },
268
- {
269
- "epoch": 20.01,
270
- "learning_rate": 9.230476262104677e-05,
271
- "loss": 0.719,
272
- "step": 44000
273
- },
274
- {
275
- "epoch": 21.0,
276
- "learning_rate": 9.185832391312644e-05,
277
- "loss": 0.7477,
278
- "step": 45000
279
- },
280
- {
281
- "epoch": 21.01,
282
- "learning_rate": 9.140044155740101e-05,
283
- "loss": 0.7012,
284
- "step": 46000
285
- },
286
- {
287
- "epoch": 22.0,
288
- "learning_rate": 9.093124073433463e-05,
289
- "loss": 0.7536,
290
- "step": 47000
291
- },
292
- {
293
- "epoch": 22.01,
294
- "learning_rate": 9.045084971874738e-05,
295
- "loss": 0.6864,
296
- "step": 48000
297
- },
298
- {
299
- "epoch": 23.0,
300
- "learning_rate": 8.995939984474624e-05,
301
- "loss": 0.7518,
302
- "step": 49000
303
- },
304
- {
305
- "epoch": 23.01,
306
- "learning_rate": 8.945702546981969e-05,
307
- "loss": 0.6815,
308
- "step": 50000
309
- },
310
- {
311
- "epoch": 23.01,
312
- "eval_loss": 0.526369035243988,
313
- "eval_runtime": 17.4444,
314
- "eval_samples_per_second": 286.625,
315
- "eval_steps_per_second": 2.293,
316
- "step": 50000
317
- },
318
- {
319
- "epoch": 24.0,
320
- "learning_rate": 8.894386393810563e-05,
321
- "loss": 0.7469,
322
- "step": 51000
323
- },
324
- {
325
- "epoch": 24.01,
326
- "learning_rate": 8.842005554284296e-05,
327
- "loss": 0.6762,
328
- "step": 52000
329
- },
330
- {
331
- "epoch": 24.01,
332
- "learning_rate": 8.788574348801675e-05,
333
- "loss": 0.7545,
334
- "step": 53000
335
- },
336
- {
337
- "epoch": 25.0,
338
- "learning_rate": 8.73410738492077e-05,
339
- "loss": 0.6625,
340
- "step": 54000
341
- },
342
- {
343
- "epoch": 25.01,
344
- "learning_rate": 8.678619553365659e-05,
345
- "loss": 0.738,
346
- "step": 55000
347
- },
348
- {
349
- "epoch": 26.0,
350
- "learning_rate": 8.622126023955446e-05,
351
- "loss": 0.6738,
352
- "step": 56000
353
- },
354
- {
355
- "epoch": 26.01,
356
- "learning_rate": 8.564642241456986e-05,
357
- "loss": 0.7235,
358
- "step": 57000
359
- },
360
- {
361
- "epoch": 27.0,
362
- "learning_rate": 8.506183921362443e-05,
363
- "loss": 0.6707,
364
- "step": 58000
365
- },
366
- {
367
- "epoch": 27.01,
368
- "learning_rate": 8.44676704559283e-05,
369
- "loss": 0.7156,
370
- "step": 59000
371
- },
372
- {
373
- "epoch": 28.0,
374
- "learning_rate": 8.386407858128706e-05,
375
- "loss": 0.698,
376
- "step": 60000
377
- },
378
- {
379
- "epoch": 28.01,
380
- "learning_rate": 8.32512286056924e-05,
381
- "loss": 0.6826,
382
- "step": 61000
383
- },
384
- {
385
- "epoch": 29.0,
386
- "learning_rate": 8.262928807620843e-05,
387
- "loss": 0.7076,
388
- "step": 62000
389
- },
390
- {
391
- "epoch": 29.01,
392
- "learning_rate": 8.199842702516583e-05,
393
- "loss": 0.6635,
394
- "step": 63000
395
- },
396
- {
397
- "epoch": 30.0,
398
- "learning_rate": 8.135881792367686e-05,
399
- "loss": 0.7139,
400
- "step": 64000
401
- },
402
- {
403
- "epoch": 30.01,
404
- "learning_rate": 8.07106356344834e-05,
405
- "loss": 0.6529,
406
- "step": 65000
407
- },
408
- {
409
- "epoch": 31.0,
410
- "learning_rate": 8.005405736415126e-05,
411
- "loss": 0.7152,
412
- "step": 66000
413
- },
414
- {
415
- "epoch": 31.01,
416
- "learning_rate": 7.938926261462366e-05,
417
- "loss": 0.648,
418
- "step": 67000
419
- },
420
- {
421
- "epoch": 32.0,
422
- "learning_rate": 7.871643313414718e-05,
423
- "loss": 0.7112,
424
- "step": 68000
425
- },
426
- {
427
- "epoch": 32.01,
428
- "learning_rate": 7.803575286758364e-05,
429
- "loss": 0.6436,
430
- "step": 69000
431
- },
432
- {
433
- "epoch": 33.0,
434
- "learning_rate": 7.734740790612136e-05,
435
- "loss": 0.7159,
436
- "step": 70000
437
- },
438
- {
439
- "epoch": 33.01,
440
- "learning_rate": 7.66515864363997e-05,
441
- "loss": 0.6343,
442
- "step": 71000
443
- },
444
- {
445
- "epoch": 33.01,
446
- "learning_rate": 7.594847868906076e-05,
447
- "loss": 0.7121,
448
- "step": 72000
449
- },
450
- {
451
- "epoch": 34.0,
452
- "learning_rate": 7.52382768867422e-05,
453
- "loss": 0.6346,
454
- "step": 73000
455
- },
456
- {
457
- "epoch": 34.01,
458
- "learning_rate": 7.452117519152542e-05,
459
- "loss": 0.6972,
460
- "step": 74000
461
- },
462
- {
463
- "epoch": 35.0,
464
- "learning_rate": 7.379736965185368e-05,
465
- "loss": 0.6407,
466
- "step": 75000
467
- },
468
- {
469
- "epoch": 35.01,
470
- "learning_rate": 7.30670581489344e-05,
471
- "loss": 0.6868,
472
- "step": 76000
473
- },
474
- {
475
- "epoch": 36.0,
476
- "learning_rate": 7.233044034264034e-05,
477
- "loss": 0.6596,
478
- "step": 77000
479
- },
480
- {
481
- "epoch": 36.01,
482
- "learning_rate": 7.158771761692464e-05,
483
- "loss": 0.6654,
484
- "step": 78000
485
- },
486
- {
487
- "epoch": 37.0,
488
- "learning_rate": 7.083909302476453e-05,
489
- "loss": 0.6721,
490
- "step": 79000
491
- },
492
- {
493
- "epoch": 37.01,
494
- "learning_rate": 7.008477123264848e-05,
495
- "loss": 0.6429,
496
- "step": 80000
497
- },
498
- {
499
- "epoch": 38.0,
500
- "learning_rate": 6.932495846462261e-05,
501
- "loss": 0.6817,
502
- "step": 81000
503
- },
504
- {
505
- "epoch": 38.01,
506
- "learning_rate": 6.855986244591104e-05,
507
- "loss": 0.6304,
508
- "step": 82000
509
- },
510
- {
511
- "epoch": 39.0,
512
- "learning_rate": 6.778969234612584e-05,
513
- "loss": 0.6869,
514
- "step": 83000
515
- },
516
- {
517
- "epoch": 39.01,
518
- "learning_rate": 6.701465872208216e-05,
519
- "loss": 0.621,
520
- "step": 84000
521
- },
522
- {
523
- "epoch": 40.0,
524
- "learning_rate": 6.623497346023418e-05,
525
- "loss": 0.6863,
526
- "step": 85000
527
- },
528
- {
529
- "epoch": 40.01,
530
- "learning_rate": 6.545084971874738e-05,
531
- "loss": 0.6192,
532
- "step": 86000
533
- },
534
- {
535
- "epoch": 41.0,
536
- "learning_rate": 6.466250186922325e-05,
537
- "loss": 0.6862,
538
- "step": 87000
539
- },
540
- {
541
- "epoch": 41.01,
542
- "learning_rate": 6.387014543809223e-05,
543
- "loss": 0.6125,
544
- "step": 88000
545
- },
546
- {
547
- "epoch": 41.01,
548
- "learning_rate": 6.307399704769099e-05,
549
- "loss": 0.6915,
550
- "step": 89000
551
- },
552
- {
553
- "epoch": 42.0,
554
- "learning_rate": 6.227427435703997e-05,
555
- "loss": 0.6083,
556
- "step": 90000
557
- },
558
- {
559
- "epoch": 42.01,
560
- "learning_rate": 6.147119600233758e-05,
561
- "loss": 0.6731,
562
- "step": 91000
563
- },
564
- {
565
- "epoch": 43.0,
566
- "learning_rate": 6.066498153718735e-05,
567
- "loss": 0.6196,
568
- "step": 92000
569
- },
570
- {
571
- "epoch": 43.01,
572
- "learning_rate": 5.985585137257401e-05,
573
- "loss": 0.6667,
574
- "step": 93000
575
- },
576
- {
577
- "epoch": 44.0,
578
- "learning_rate": 5.90440267166055e-05,
579
- "loss": 0.628,
580
- "step": 94000
581
- },
582
- {
583
- "epoch": 44.01,
584
- "learning_rate": 5.8229729514036705e-05,
585
- "loss": 0.6492,
586
- "step": 95000
587
- },
588
- {
589
- "epoch": 45.0,
590
- "learning_rate": 5.74131823855921e-05,
591
- "loss": 0.6463,
592
- "step": 96000
593
- },
594
- {
595
- "epoch": 45.01,
596
- "learning_rate": 5.6594608567103456e-05,
597
- "loss": 0.6283,
598
- "step": 97000
599
- },
600
- {
601
- "epoch": 46.0,
602
- "learning_rate": 5.577423184847932e-05,
603
- "loss": 0.6538,
604
- "step": 98000
605
- },
606
- {
607
- "epoch": 46.01,
608
- "learning_rate": 5.495227651252315e-05,
609
- "loss": 0.6136,
610
- "step": 99000
611
- },
612
- {
613
- "epoch": 47.0,
614
- "learning_rate": 5.4128967273616625e-05,
615
- "loss": 0.6655,
616
- "step": 100000
617
- },
618
- {
619
- "epoch": 47.0,
620
- "eval_loss": 0.46227478981018066,
621
- "eval_runtime": 11.4578,
622
- "eval_samples_per_second": 436.385,
623
- "eval_steps_per_second": 3.491,
624
- "step": 100000
625
- },
626
- {
627
- "epoch": 47.01,
628
- "learning_rate": 5.330452921628497e-05,
629
- "loss": 0.6019,
630
- "step": 101000
631
- },
632
- {
633
- "epoch": 48.0,
634
- "learning_rate": 5.247918773366112e-05,
635
- "loss": 0.6658,
636
- "step": 102000
637
- },
638
- {
639
- "epoch": 48.01,
640
- "learning_rate": 5.165316846586541e-05,
641
- "loss": 0.5996,
642
- "step": 103000
643
- },
644
- {
645
- "epoch": 49.0,
646
- "learning_rate": 5.0826697238317935e-05,
647
- "loss": 0.6633,
648
- "step": 104000
649
- },
650
- {
651
- "epoch": 49.01,
652
- "learning_rate": 5e-05,
653
- "loss": 0.5962,
654
- "step": 105000
655
- },
656
- {
657
- "epoch": 49.01,
658
- "learning_rate": 4.917330276168208e-05,
659
- "loss": 0.6737,
660
- "step": 106000
661
- },
662
- {
663
- "epoch": 50.01,
664
- "learning_rate": 4.834683153413459e-05,
665
- "loss": 0.5852,
666
- "step": 107000
667
- },
668
- {
669
- "epoch": 50.01,
670
- "learning_rate": 4.7520812266338885e-05,
671
- "loss": 0.6609,
672
- "step": 108000
673
- },
674
- {
675
- "epoch": 51.0,
676
- "learning_rate": 4.669547078371504e-05,
677
- "loss": 0.5984,
678
- "step": 109000
679
- },
680
- {
681
- "epoch": 51.01,
682
- "learning_rate": 4.5871032726383386e-05,
683
- "loss": 0.6499,
684
- "step": 110000
685
- },
686
- {
687
- "epoch": 52.0,
688
- "learning_rate": 4.504772348747687e-05,
689
- "loss": 0.5973,
690
- "step": 111000
691
- },
692
- {
693
- "epoch": 52.01,
694
- "learning_rate": 4.4225768151520694e-05,
695
- "loss": 0.6432,
696
- "step": 112000
697
- },
698
- {
699
- "epoch": 53.0,
700
- "learning_rate": 4.3405391432896555e-05,
701
- "loss": 0.6252,
702
- "step": 113000
703
- },
704
- {
705
- "epoch": 53.01,
706
- "learning_rate": 4.2586817614407895e-05,
707
- "loss": 0.6144,
708
- "step": 114000
709
- },
710
- {
711
- "epoch": 54.0,
712
- "learning_rate": 4.17702704859633e-05,
713
- "loss": 0.6367,
714
- "step": 115000
715
- },
716
- {
717
- "epoch": 54.01,
718
- "learning_rate": 4.095597328339452e-05,
719
- "loss": 0.5973,
720
- "step": 116000
721
- },
722
- {
723
- "epoch": 55.0,
724
- "learning_rate": 4.0144148627425993e-05,
725
- "loss": 0.6451,
726
- "step": 117000
727
- },
728
- {
729
- "epoch": 55.01,
730
- "learning_rate": 3.933501846281267e-05,
731
- "loss": 0.5877,
732
- "step": 118000
733
- },
734
- {
735
- "epoch": 56.0,
736
- "learning_rate": 3.852880399766243e-05,
737
- "loss": 0.6487,
738
- "step": 119000
739
- },
740
- {
741
- "epoch": 56.01,
742
- "learning_rate": 3.772572564296005e-05,
743
- "loss": 0.5837,
744
- "step": 120000
745
- },
746
- {
747
- "epoch": 57.0,
748
- "learning_rate": 3.6926002952309016e-05,
749
- "loss": 0.6466,
750
- "step": 121000
751
- },
752
- {
753
- "epoch": 57.01,
754
- "learning_rate": 3.612985456190778e-05,
755
- "loss": 0.5821,
756
- "step": 122000
757
- },
758
- {
759
- "epoch": 58.0,
760
- "learning_rate": 3.533749813077677e-05,
761
- "loss": 0.6532,
762
- "step": 123000
763
- },
764
- {
765
- "epoch": 58.01,
766
- "learning_rate": 3.4549150281252636e-05,
767
- "loss": 0.5736,
768
- "step": 124000
769
- },
770
- {
771
- "epoch": 58.01,
772
- "learning_rate": 3.3765026539765834e-05,
773
- "loss": 0.6513,
774
- "step": 125000
775
- },
776
- {
777
- "epoch": 59.0,
778
- "learning_rate": 3.298534127791785e-05,
779
- "loss": 0.5755,
780
- "step": 126000
781
- },
782
- {
783
- "epoch": 59.01,
784
- "learning_rate": 3.221030765387417e-05,
785
- "loss": 0.6387,
786
- "step": 127000
787
- },
788
- {
789
- "epoch": 60.0,
790
- "learning_rate": 3.144013755408895e-05,
791
- "loss": 0.5829,
792
- "step": 128000
793
- },
794
- {
795
- "epoch": 60.01,
796
- "learning_rate": 3.0675041535377405e-05,
797
- "loss": 0.6301,
798
- "step": 129000
799
- },
800
- {
801
- "epoch": 61.0,
802
- "learning_rate": 2.991522876735154e-05,
803
- "loss": 0.6023,
804
- "step": 130000
805
- },
806
- {
807
- "epoch": 61.01,
808
- "learning_rate": 2.916090697523549e-05,
809
- "loss": 0.6108,
810
- "step": 131000
811
- },
812
- {
813
- "epoch": 62.0,
814
- "learning_rate": 2.8412282383075363e-05,
815
- "loss": 0.6165,
816
- "step": 132000
817
- },
818
- {
819
- "epoch": 62.01,
820
- "learning_rate": 2.766955965735968e-05,
821
- "loss": 0.5903,
822
- "step": 133000
823
- },
824
- {
825
- "epoch": 63.0,
826
- "learning_rate": 2.693294185106562e-05,
827
- "loss": 0.6279,
828
- "step": 134000
829
- },
830
- {
831
- "epoch": 63.01,
832
- "learning_rate": 2.6202630348146324e-05,
833
- "loss": 0.5786,
834
- "step": 135000
835
- },
836
- {
837
- "epoch": 64.0,
838
- "learning_rate": 2.547882480847461e-05,
839
- "loss": 0.6337,
840
- "step": 136000
841
- },
842
- {
843
- "epoch": 64.01,
844
- "learning_rate": 2.476172311325783e-05,
845
- "loss": 0.5708,
846
- "step": 137000
847
- },
848
- {
849
- "epoch": 65.0,
850
- "learning_rate": 2.405152131093926e-05,
851
- "loss": 0.6348,
852
- "step": 138000
853
- },
854
- {
855
- "epoch": 65.01,
856
- "learning_rate": 2.3348413563600325e-05,
857
- "loss": 0.5705,
858
- "step": 139000
859
- },
860
- {
861
- "epoch": 66.0,
862
- "learning_rate": 2.2652592093878666e-05,
863
- "loss": 0.6366,
864
- "step": 140000
865
- },
866
- {
867
- "epoch": 66.01,
868
- "learning_rate": 2.196424713241637e-05,
869
- "loss": 0.565,
870
- "step": 141000
871
- },
872
- {
873
- "epoch": 66.01,
874
- "learning_rate": 2.128356686585282e-05,
875
- "loss": 0.6425,
876
- "step": 142000
877
- },
878
- {
879
- "epoch": 67.0,
880
- "learning_rate": 2.061073738537635e-05,
881
- "loss": 0.562,
882
- "step": 143000
883
- },
884
- {
885
- "epoch": 67.01,
886
- "learning_rate": 1.9945942635848748e-05,
887
- "loss": 0.6265,
888
- "step": 144000
889
- },
890
- {
891
- "epoch": 68.0,
892
- "learning_rate": 1.928936436551661e-05,
893
- "loss": 0.5741,
894
- "step": 145000
895
- },
896
- {
897
- "epoch": 68.01,
898
- "learning_rate": 1.8641182076323148e-05,
899
- "loss": 0.6221,
900
- "step": 146000
901
- },
902
- {
903
- "epoch": 69.0,
904
- "learning_rate": 1.800157297483417e-05,
905
- "loss": 0.5826,
906
- "step": 147000
907
- },
908
- {
909
- "epoch": 69.01,
910
- "learning_rate": 1.7370711923791567e-05,
911
- "loss": 0.6061,
912
- "step": 148000
913
- },
914
- {
915
- "epoch": 70.0,
916
- "learning_rate": 1.6748771394307585e-05,
917
- "loss": 0.6024,
918
- "step": 149000
919
- },
920
- {
921
- "epoch": 70.01,
922
- "learning_rate": 1.6135921418712956e-05,
923
- "loss": 0.5867,
924
- "step": 150000
925
- },
926
- {
927
- "epoch": 70.01,
928
- "eval_loss": 0.4325275421142578,
929
- "eval_runtime": 11.266,
930
- "eval_samples_per_second": 443.813,
931
- "eval_steps_per_second": 3.551,
932
- "step": 150000
933
- },
934
- {
935
- "epoch": 71.0,
936
- "learning_rate": 1.553232954407171e-05,
937
- "loss": 0.6123,
938
- "step": 151000
939
- },
940
- {
941
- "epoch": 71.01,
942
- "learning_rate": 1.4938160786375572e-05,
943
- "loss": 0.573,
944
- "step": 152000
945
- },
946
- {
947
- "epoch": 72.0,
948
- "learning_rate": 1.435357758543015e-05,
949
- "loss": 0.6236,
950
- "step": 153000
951
- },
952
- {
953
- "epoch": 72.01,
954
- "learning_rate": 1.3778739760445552e-05,
955
- "loss": 0.5627,
956
- "step": 154000
957
- },
958
- {
959
- "epoch": 73.0,
960
- "learning_rate": 1.3213804466343421e-05,
961
- "loss": 0.6252,
962
- "step": 155000
963
- },
964
- {
965
- "epoch": 73.01,
966
- "learning_rate": 1.2658926150792322e-05,
967
- "loss": 0.5616,
968
- "step": 156000
969
- },
970
- {
971
- "epoch": 74.0,
972
- "learning_rate": 1.2114256511983274e-05,
973
- "loss": 0.6251,
974
- "step": 157000
975
- },
976
- {
977
- "epoch": 74.01,
978
- "learning_rate": 1.157994445715706e-05,
979
- "loss": 0.5591,
980
- "step": 158000
981
- },
982
- {
983
- "epoch": 74.01,
984
- "learning_rate": 1.1056136061894384e-05,
985
- "loss": 0.6356,
986
- "step": 159000
987
- },
988
- {
989
- "epoch": 75.0,
990
- "learning_rate": 1.0542974530180327e-05,
991
- "loss": 0.5502,
992
- "step": 160000
993
- },
994
- {
995
- "epoch": 75.01,
996
- "learning_rate": 1.0040600155253765e-05,
997
- "loss": 0.6248,
998
- "step": 161000
999
- },
1000
- {
1001
- "epoch": 76.0,
1002
- "learning_rate": 9.549150281252633e-06,
1003
- "loss": 0.5635,
1004
- "step": 162000
1005
- },
1006
- {
1007
- "epoch": 76.01,
1008
- "learning_rate": 9.068759265665384e-06,
1009
- "loss": 0.6153,
1010
- "step": 163000
1011
- },
1012
- {
1013
- "epoch": 77.0,
1014
- "learning_rate": 8.599558442598998e-06,
1015
- "loss": 0.5637,
1016
- "step": 164000
1017
- },
1018
- {
1019
- "epoch": 77.01,
1020
- "learning_rate": 8.141676086873572e-06,
1021
- "loss": 0.6101,
1022
- "step": 165000
1023
- },
1024
- {
1025
- "epoch": 78.0,
1026
- "learning_rate": 7.695237378953223e-06,
1027
- "loss": 0.592,
1028
- "step": 166000
1029
- },
1030
- {
1031
- "epoch": 78.01,
1032
- "learning_rate": 7.260364370723044e-06,
1033
- "loss": 0.583,
1034
- "step": 167000
1035
- },
1036
- {
1037
- "epoch": 79.0,
1038
- "learning_rate": 6.837175952121306e-06,
1039
- "loss": 0.6055,
1040
- "step": 168000
1041
- },
1042
- {
1043
- "epoch": 79.01,
1044
- "learning_rate": 6.425787818636131e-06,
1045
- "loss": 0.567,
1046
- "step": 169000
1047
- },
1048
- {
1049
- "epoch": 80.0,
1050
- "learning_rate": 6.026312439675552e-06,
1051
- "loss": 0.6148,
1052
- "step": 170000
1053
- },
1054
- {
1055
- "epoch": 80.01,
1056
- "learning_rate": 5.6388590278194096e-06,
1057
- "loss": 0.5592,
1058
- "step": 171000
1059
- },
1060
- {
1061
- "epoch": 81.0,
1062
- "learning_rate": 5.263533508961827e-06,
1063
- "loss": 0.6192,
1064
- "step": 172000
1065
- },
1066
- {
1067
- "epoch": 81.01,
1068
- "learning_rate": 4.900438493352055e-06,
1069
- "loss": 0.5564,
1070
- "step": 173000
1071
- },
1072
- {
1073
- "epoch": 82.0,
1074
- "learning_rate": 4.549673247541875e-06,
1075
- "loss": 0.619,
1076
- "step": 174000
1077
- },
1078
- {
1079
- "epoch": 82.01,
1080
- "learning_rate": 4.2113336672471245e-06,
1081
- "loss": 0.5554,
1082
- "step": 175000
1083
- },
1084
- {
1085
- "epoch": 83.0,
1086
- "learning_rate": 3.885512251130763e-06,
1087
- "loss": 0.6263,
1088
- "step": 176000
1089
- },
1090
- {
1091
- "epoch": 83.01,
1092
- "learning_rate": 3.5722980755146517e-06,
1093
- "loss": 0.5488,
1094
- "step": 177000
1095
- },
1096
- {
1097
- "epoch": 83.01,
1098
- "learning_rate": 3.271776770026963e-06,
1099
- "loss": 0.6256,
1100
- "step": 178000
1101
- },
1102
- {
1103
- "epoch": 84.0,
1104
- "learning_rate": 2.9840304941919415e-06,
1105
- "loss": 0.5518,
1106
- "step": 179000
1107
- },
1108
- {
1109
- "epoch": 84.01,
1110
- "learning_rate": 2.7091379149682685e-06,
1111
- "loss": 0.6152,
1112
- "step": 180000
1113
- },
1114
- {
1115
- "epoch": 85.0,
1116
- "learning_rate": 2.4471741852423237e-06,
1117
- "loss": 0.5599,
1118
- "step": 181000
1119
- },
1120
- {
1121
- "epoch": 85.01,
1122
- "learning_rate": 2.1982109232821178e-06,
1123
- "loss": 0.6081,
1124
- "step": 182000
1125
- },
1126
- {
1127
- "epoch": 86.0,
1128
- "learning_rate": 1.962316193157593e-06,
1129
- "loss": 0.5797,
1130
- "step": 183000
1131
- },
1132
- {
1133
- "epoch": 86.01,
1134
- "learning_rate": 1.7395544861325718e-06,
1135
- "loss": 0.5887,
1136
- "step": 184000
1137
- },
1138
- {
1139
- "epoch": 87.0,
1140
- "learning_rate": 1.5299867030334814e-06,
1141
- "loss": 0.5954,
1142
- "step": 185000
1143
- },
1144
- {
1145
- "epoch": 87.01,
1146
- "learning_rate": 1.333670137599713e-06,
1147
- "loss": 0.5704,
1148
- "step": 186000
1149
- },
1150
- {
1151
- "epoch": 88.0,
1152
- "learning_rate": 1.1506584608200367e-06,
1153
- "loss": 0.6077,
1154
- "step": 187000
1155
- },
1156
- {
1157
- "epoch": 88.01,
1158
- "learning_rate": 9.810017062595322e-07,
1159
- "loss": 0.5601,
1160
- "step": 188000
1161
- },
1162
- {
1163
- "epoch": 89.0,
1164
- "learning_rate": 8.247462563808817e-07,
1165
- "loss": 0.6152,
1166
- "step": 189000
1167
- },
1168
- {
1169
- "epoch": 89.01,
1170
- "learning_rate": 6.819348298638839e-07,
1171
- "loss": 0.5529,
1172
- "step": 190000
1173
- },
1174
- {
1175
- "epoch": 90.0,
1176
- "learning_rate": 5.526064699265753e-07,
1177
- "loss": 0.6175,
1178
- "step": 191000
1179
- },
1180
- {
1181
- "epoch": 90.01,
1182
- "learning_rate": 4.367965336512403e-07,
1183
- "loss": 0.5541,
1184
- "step": 192000
1185
- },
1186
- {
1187
- "epoch": 91.0,
1188
- "learning_rate": 3.3453668231809286e-07,
1189
- "loss": 0.6199,
1190
- "step": 193000
1191
- },
1192
- {
1193
- "epoch": 91.01,
1194
- "learning_rate": 2.458548727494292e-07,
1195
- "loss": 0.5506,
1196
- "step": 194000
1197
- },
1198
- {
1199
- "epoch": 91.01,
1200
- "learning_rate": 1.7077534966650766e-07,
1201
- "loss": 0.6275,
1202
- "step": 195000
1203
- },
1204
- {
1205
- "epoch": 92.0,
1206
- "learning_rate": 1.0931863906127327e-07,
1207
- "loss": 0.548,
1208
- "step": 196000
1209
- },
1210
- {
1211
- "epoch": 92.01,
1212
- "learning_rate": 6.150154258476315e-08,
1213
- "loss": 0.6124,
1214
- "step": 197000
1215
- },
1216
- {
1217
- "epoch": 93.0,
1218
- "learning_rate": 2.7337132953697554e-08,
1219
- "loss": 0.5607,
1220
- "step": 198000
1221
- },
1222
- {
1223
- "epoch": 93.01,
1224
- "learning_rate": 6.834750376549792e-09,
1225
- "loss": 0.6086,
1226
- "step": 199000
1227
- },
1228
- {
1229
- "epoch": 94.0,
1230
- "learning_rate": 0.0,
1231
- "loss": 0.5706,
1232
- "step": 200000
1233
- },
1234
- {
1235
- "epoch": 94.0,
1236
- "eval_loss": 0.41860753297805786,
1237
- "eval_runtime": 11.1936,
1238
- "eval_samples_per_second": 446.686,
1239
- "eval_steps_per_second": 3.573,
1240
- "step": 200000
1241
- }
1242
- ],
1243
- "max_steps": 200000,
1244
- "num_train_epochs": 9223372036854775807,
1245
- "total_flos": 3.3690797211648e+18,
1246
- "trial_name": null,
1247
- "trial_params": null
1248
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a5c67a3affd35498d080669546e52ddd212d178aa66955eddea909f6058919b
3
- size 3439
 
 
 
 
runs/Feb09_18-28-25_t1v-n-d1a73e20-w-0/events.out.tfevents.1675967330.t1v-n-d1a73e20-w-0.966527.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26eefbd5aec1824d42ec4db674b4ae0ff25f39b381e9de57b53a269acc022fda
3
- size 36836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75283ab9566881987dfb722a6193b8e259b632ee56d6054d712dba782eb7cc55
3
+ size 37196