younggi commited on
Commit
1953bdb
·
1 Parent(s): 0f95c0b

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 49.02,
3
- "eval_accuracy": 0.9210526315789473,
4
- "eval_loss": 0.326847106218338,
5
- "eval_runtime": 4.6548,
6
- "eval_samples_per_second": 16.327,
7
- "eval_steps_per_second": 4.082
8
  }
 
1
  {
2
  "epoch": 49.02,
3
+ "eval_accuracy": 0.9431818181818182,
4
+ "eval_loss": 0.2483096718788147,
5
+ "eval_runtime": 5.3467,
6
+ "eval_samples_per_second": 16.459,
7
+ "eval_steps_per_second": 4.115
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a576aa47c7d89056eeb95702b41b38a24376ddc83df54618b7f32d5af2b22640
3
- size 345004539
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ad1a6a98c838df0b38ec1a227af8dac858d11eeff0cc9fec722042b8cfa0a5
3
+ size 345004687
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 49.02,
3
- "eval_accuracy": 0.9210526315789473,
4
- "eval_loss": 0.326847106218338,
5
- "eval_runtime": 4.6548,
6
- "eval_samples_per_second": 16.327,
7
- "eval_steps_per_second": 4.082
8
  }
 
1
  {
2
  "epoch": 49.02,
3
+ "eval_accuracy": 0.9431818181818182,
4
+ "eval_loss": 0.2483096718788147,
5
+ "eval_runtime": 5.3467,
6
+ "eval_samples_per_second": 16.459,
7
+ "eval_steps_per_second": 4.115
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-825",
4
  "epoch": 49.02,
5
  "global_step": 3750,
6
  "is_hyper_param_search": false,
@@ -10,730 +10,730 @@
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 1.3333333333333334e-06,
13
- "loss": 2.3925,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.01,
18
  "learning_rate": 2.666666666666667e-06,
19
- "loss": 2.3073,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.01,
24
  "learning_rate": 4.000000000000001e-06,
25
- "loss": 2.345,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.01,
30
  "learning_rate": 5.333333333333334e-06,
31
- "loss": 2.2641,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.01,
36
  "learning_rate": 6.666666666666667e-06,
37
- "loss": 2.2744,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.02,
42
  "learning_rate": 8.000000000000001e-06,
43
- "loss": 2.2117,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.02,
48
  "learning_rate": 9.333333333333334e-06,
49
- "loss": 2.2547,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.02,
54
- "eval_accuracy": 0.22580645161290322,
55
- "eval_loss": 2.2531020641326904,
56
- "eval_runtime": 2.0467,
57
- "eval_samples_per_second": 15.146,
58
- "eval_steps_per_second": 3.909,
59
  "step": 75
60
  },
61
  {
62
  "epoch": 1.0,
63
  "learning_rate": 1.0666666666666667e-05,
64
- "loss": 2.248,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 1.0,
69
  "learning_rate": 1.2e-05,
70
- "loss": 2.1841,
71
  "step": 90
72
  },
73
  {
74
  "epoch": 1.01,
75
  "learning_rate": 1.3333333333333333e-05,
76
- "loss": 2.1868,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.01,
81
  "learning_rate": 1.4666666666666668e-05,
82
- "loss": 2.1535,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.01,
87
  "learning_rate": 1.6000000000000003e-05,
88
- "loss": 2.0819,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 1.01,
93
  "learning_rate": 1.7333333333333336e-05,
94
- "loss": 2.0471,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.02,
99
  "learning_rate": 1.866666666666667e-05,
100
- "loss": 2.0011,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.02,
105
  "learning_rate": 2e-05,
106
- "loss": 1.7995,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.02,
111
- "eval_accuracy": 0.3548387096774194,
112
- "eval_loss": 1.8327221870422363,
113
- "eval_runtime": 1.9884,
114
- "eval_samples_per_second": 15.591,
115
- "eval_steps_per_second": 4.023,
116
  "step": 150
117
  },
118
  {
119
  "epoch": 2.0,
120
  "learning_rate": 2.1333333333333335e-05,
121
- "loss": 1.6304,
122
  "step": 160
123
  },
124
  {
125
  "epoch": 2.01,
126
  "learning_rate": 2.2666666666666668e-05,
127
- "loss": 1.4125,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 2.01,
132
  "learning_rate": 2.4e-05,
133
- "loss": 1.3991,
134
  "step": 180
135
  },
136
  {
137
  "epoch": 2.01,
138
  "learning_rate": 2.5333333333333337e-05,
139
- "loss": 1.135,
140
  "step": 190
141
  },
142
  {
143
  "epoch": 2.01,
144
  "learning_rate": 2.6666666666666667e-05,
145
- "loss": 1.0372,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 2.02,
150
  "learning_rate": 2.8000000000000003e-05,
151
- "loss": 0.928,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 2.02,
156
  "learning_rate": 2.9333333333333336e-05,
157
- "loss": 1.0062,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 2.02,
162
- "eval_accuracy": 0.6451612903225806,
163
- "eval_loss": 0.953277587890625,
164
- "eval_runtime": 1.9414,
165
- "eval_samples_per_second": 15.968,
166
- "eval_steps_per_second": 4.121,
167
  "step": 225
168
  },
169
  {
170
  "epoch": 3.0,
171
  "learning_rate": 3.066666666666667e-05,
172
- "loss": 0.7021,
173
  "step": 230
174
  },
175
  {
176
  "epoch": 3.0,
177
  "learning_rate": 3.2000000000000005e-05,
178
- "loss": 0.7046,
179
  "step": 240
180
  },
181
  {
182
  "epoch": 3.01,
183
  "learning_rate": 3.3333333333333335e-05,
184
- "loss": 0.537,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 3.01,
189
  "learning_rate": 3.466666666666667e-05,
190
- "loss": 0.4732,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 3.01,
195
  "learning_rate": 3.6e-05,
196
- "loss": 0.6748,
197
  "step": 270
198
  },
199
  {
200
  "epoch": 3.01,
201
  "learning_rate": 3.733333333333334e-05,
202
- "loss": 0.3289,
203
  "step": 280
204
  },
205
  {
206
  "epoch": 3.02,
207
  "learning_rate": 3.866666666666667e-05,
208
- "loss": 0.469,
209
  "step": 290
210
  },
211
  {
212
  "epoch": 3.02,
213
  "learning_rate": 4e-05,
214
- "loss": 0.6316,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 3.02,
219
- "eval_accuracy": 0.8064516129032258,
220
- "eval_loss": 0.42770135402679443,
221
- "eval_runtime": 1.9683,
222
- "eval_samples_per_second": 15.749,
223
- "eval_steps_per_second": 4.064,
224
  "step": 300
225
  },
226
  {
227
  "epoch": 4.0,
228
  "learning_rate": 4.133333333333333e-05,
229
- "loss": 0.1767,
230
  "step": 310
231
  },
232
  {
233
  "epoch": 4.01,
234
  "learning_rate": 4.266666666666667e-05,
235
- "loss": 0.4263,
236
  "step": 320
237
  },
238
  {
239
  "epoch": 4.01,
240
  "learning_rate": 4.4000000000000006e-05,
241
- "loss": 0.2588,
242
  "step": 330
243
  },
244
  {
245
  "epoch": 4.01,
246
  "learning_rate": 4.5333333333333335e-05,
247
- "loss": 0.1301,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 4.01,
252
  "learning_rate": 4.666666666666667e-05,
253
- "loss": 0.4264,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 4.02,
258
  "learning_rate": 4.8e-05,
259
- "loss": 0.2462,
260
  "step": 360
261
  },
262
  {
263
  "epoch": 4.02,
264
  "learning_rate": 4.933333333333334e-05,
265
- "loss": 0.2867,
266
  "step": 370
267
  },
268
  {
269
  "epoch": 4.02,
270
- "eval_accuracy": 0.7741935483870968,
271
- "eval_loss": 0.44487717747688293,
272
- "eval_runtime": 1.9664,
273
- "eval_samples_per_second": 15.765,
274
- "eval_steps_per_second": 4.068,
275
  "step": 375
276
  },
277
  {
278
  "epoch": 5.0,
279
  "learning_rate": 4.9925925925925926e-05,
280
- "loss": 0.5059,
281
  "step": 380
282
  },
283
  {
284
  "epoch": 5.0,
285
  "learning_rate": 4.977777777777778e-05,
286
- "loss": 0.5456,
287
  "step": 390
288
  },
289
  {
290
  "epoch": 5.01,
291
  "learning_rate": 4.962962962962963e-05,
292
- "loss": 0.451,
293
  "step": 400
294
  },
295
  {
296
  "epoch": 5.01,
297
  "learning_rate": 4.9481481481481485e-05,
298
- "loss": 0.6779,
299
  "step": 410
300
  },
301
  {
302
  "epoch": 5.01,
303
  "learning_rate": 4.933333333333334e-05,
304
- "loss": 0.0615,
305
  "step": 420
306
  },
307
  {
308
  "epoch": 5.01,
309
  "learning_rate": 4.918518518518519e-05,
310
- "loss": 0.1825,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 5.02,
315
  "learning_rate": 4.903703703703704e-05,
316
- "loss": 0.4993,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 5.02,
321
  "learning_rate": 4.888888888888889e-05,
322
- "loss": 0.2703,
323
  "step": 450
324
  },
325
  {
326
  "epoch": 5.02,
327
- "eval_accuracy": 0.8387096774193549,
328
- "eval_loss": 0.3418065011501312,
329
- "eval_runtime": 1.9502,
330
- "eval_samples_per_second": 15.896,
331
- "eval_steps_per_second": 4.102,
332
  "step": 450
333
  },
334
  {
335
  "epoch": 6.0,
336
  "learning_rate": 4.874074074074074e-05,
337
- "loss": 0.0368,
338
  "step": 460
339
  },
340
  {
341
  "epoch": 6.01,
342
  "learning_rate": 4.8592592592592596e-05,
343
- "loss": 0.3592,
344
  "step": 470
345
  },
346
  {
347
  "epoch": 6.01,
348
  "learning_rate": 4.844444444444445e-05,
349
- "loss": 0.1413,
350
  "step": 480
351
  },
352
  {
353
  "epoch": 6.01,
354
  "learning_rate": 4.82962962962963e-05,
355
- "loss": 0.2684,
356
  "step": 490
357
  },
358
  {
359
  "epoch": 6.01,
360
  "learning_rate": 4.814814814814815e-05,
361
- "loss": 0.0553,
362
  "step": 500
363
  },
364
  {
365
  "epoch": 6.02,
366
  "learning_rate": 4.8e-05,
367
- "loss": 0.2376,
368
  "step": 510
369
  },
370
  {
371
  "epoch": 6.02,
372
  "learning_rate": 4.7851851851851854e-05,
373
- "loss": 0.1765,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 6.02,
378
- "eval_accuracy": 0.8387096774193549,
379
- "eval_loss": 0.6093774437904358,
380
- "eval_runtime": 1.9718,
381
- "eval_samples_per_second": 15.722,
382
- "eval_steps_per_second": 4.057,
383
  "step": 525
384
  },
385
  {
386
  "epoch": 7.0,
387
  "learning_rate": 4.770370370370371e-05,
388
- "loss": 0.2709,
389
  "step": 530
390
  },
391
  {
392
  "epoch": 7.0,
393
  "learning_rate": 4.755555555555556e-05,
394
- "loss": 0.1705,
395
  "step": 540
396
  },
397
  {
398
  "epoch": 7.01,
399
  "learning_rate": 4.740740740740741e-05,
400
- "loss": 0.2239,
401
  "step": 550
402
  },
403
  {
404
  "epoch": 7.01,
405
  "learning_rate": 4.7259259259259266e-05,
406
- "loss": 0.0064,
407
  "step": 560
408
  },
409
  {
410
  "epoch": 7.01,
411
  "learning_rate": 4.711111111111111e-05,
412
- "loss": 0.0094,
413
  "step": 570
414
  },
415
  {
416
  "epoch": 7.01,
417
  "learning_rate": 4.6962962962962966e-05,
418
- "loss": 0.1632,
419
  "step": 580
420
  },
421
  {
422
  "epoch": 7.02,
423
  "learning_rate": 4.681481481481482e-05,
424
- "loss": 0.049,
425
  "step": 590
426
  },
427
  {
428
  "epoch": 7.02,
429
  "learning_rate": 4.666666666666667e-05,
430
- "loss": 0.0048,
431
  "step": 600
432
  },
433
  {
434
  "epoch": 7.02,
435
- "eval_accuracy": 0.9032258064516129,
436
- "eval_loss": 0.3613308370113373,
437
- "eval_runtime": 2.0165,
438
- "eval_samples_per_second": 15.373,
439
- "eval_steps_per_second": 3.967,
440
  "step": 600
441
  },
442
  {
443
  "epoch": 8.0,
444
  "learning_rate": 4.6518518518518525e-05,
445
- "loss": 0.0081,
446
  "step": 610
447
  },
448
  {
449
  "epoch": 8.01,
450
  "learning_rate": 4.637037037037038e-05,
451
- "loss": 0.2853,
452
  "step": 620
453
  },
454
  {
455
  "epoch": 8.01,
456
  "learning_rate": 4.6222222222222224e-05,
457
- "loss": 0.1142,
458
  "step": 630
459
  },
460
  {
461
  "epoch": 8.01,
462
  "learning_rate": 4.607407407407408e-05,
463
- "loss": 0.011,
464
  "step": 640
465
  },
466
  {
467
  "epoch": 8.01,
468
  "learning_rate": 4.592592592592593e-05,
469
- "loss": 0.0467,
470
  "step": 650
471
  },
472
  {
473
  "epoch": 8.02,
474
  "learning_rate": 4.577777777777778e-05,
475
- "loss": 0.0359,
476
  "step": 660
477
  },
478
  {
479
  "epoch": 8.02,
480
  "learning_rate": 4.5629629629629636e-05,
481
- "loss": 0.1896,
482
  "step": 670
483
  },
484
  {
485
  "epoch": 8.02,
486
- "eval_accuracy": 0.9354838709677419,
487
- "eval_loss": 0.280781626701355,
488
- "eval_runtime": 2.0115,
489
- "eval_samples_per_second": 15.411,
490
- "eval_steps_per_second": 3.977,
491
  "step": 675
492
  },
493
  {
494
  "epoch": 9.0,
495
  "learning_rate": 4.548148148148149e-05,
496
- "loss": 0.0479,
497
  "step": 680
498
  },
499
  {
500
  "epoch": 9.0,
501
  "learning_rate": 4.5333333333333335e-05,
502
- "loss": 0.0805,
503
  "step": 690
504
  },
505
  {
506
  "epoch": 9.01,
507
  "learning_rate": 4.518518518518519e-05,
508
- "loss": 0.1153,
509
  "step": 700
510
  },
511
  {
512
  "epoch": 9.01,
513
  "learning_rate": 4.503703703703704e-05,
514
- "loss": 0.0098,
515
  "step": 710
516
  },
517
  {
518
  "epoch": 9.01,
519
  "learning_rate": 4.4888888888888894e-05,
520
- "loss": 0.0031,
521
  "step": 720
522
  },
523
  {
524
  "epoch": 9.01,
525
  "learning_rate": 4.474074074074075e-05,
526
- "loss": 0.0107,
527
  "step": 730
528
  },
529
  {
530
  "epoch": 9.02,
531
  "learning_rate": 4.4592592592592594e-05,
532
- "loss": 0.109,
533
  "step": 740
534
  },
535
  {
536
  "epoch": 9.02,
537
  "learning_rate": 4.4444444444444447e-05,
538
- "loss": 0.0654,
539
  "step": 750
540
  },
541
  {
542
  "epoch": 9.02,
543
- "eval_accuracy": 0.967741935483871,
544
- "eval_loss": 0.037398889660835266,
545
- "eval_runtime": 2.0488,
546
- "eval_samples_per_second": 15.131,
547
- "eval_steps_per_second": 3.905,
548
  "step": 750
549
  },
550
  {
551
  "epoch": 10.0,
552
  "learning_rate": 4.42962962962963e-05,
553
- "loss": 0.0034,
554
  "step": 760
555
  },
556
  {
557
  "epoch": 10.01,
558
  "learning_rate": 4.414814814814815e-05,
559
- "loss": 0.0028,
560
  "step": 770
561
  },
562
  {
563
  "epoch": 10.01,
564
  "learning_rate": 4.4000000000000006e-05,
565
- "loss": 0.0024,
566
  "step": 780
567
  },
568
  {
569
  "epoch": 10.01,
570
  "learning_rate": 4.385185185185185e-05,
571
- "loss": 0.0052,
572
  "step": 790
573
  },
574
  {
575
  "epoch": 10.01,
576
  "learning_rate": 4.3703703703703705e-05,
577
- "loss": 0.0053,
578
  "step": 800
579
  },
580
  {
581
  "epoch": 10.02,
582
  "learning_rate": 4.355555555555556e-05,
583
- "loss": 0.0024,
584
  "step": 810
585
  },
586
  {
587
  "epoch": 10.02,
588
  "learning_rate": 4.340740740740741e-05,
589
- "loss": 0.0027,
590
  "step": 820
591
  },
592
  {
593
  "epoch": 10.02,
594
- "eval_accuracy": 1.0,
595
- "eval_loss": 0.007468333002179861,
596
- "eval_runtime": 2.0434,
597
- "eval_samples_per_second": 15.171,
598
- "eval_steps_per_second": 3.915,
599
  "step": 825
600
  },
601
  {
602
  "epoch": 11.0,
603
  "learning_rate": 4.325925925925926e-05,
604
- "loss": 0.0026,
605
  "step": 830
606
  },
607
  {
608
  "epoch": 11.0,
609
  "learning_rate": 4.311111111111111e-05,
610
- "loss": 0.002,
611
  "step": 840
612
  },
613
  {
614
  "epoch": 11.01,
615
  "learning_rate": 4.296296296296296e-05,
616
- "loss": 0.0021,
617
  "step": 850
618
  },
619
  {
620
  "epoch": 11.01,
621
  "learning_rate": 4.2814814814814816e-05,
622
- "loss": 0.0022,
623
  "step": 860
624
  },
625
  {
626
  "epoch": 11.01,
627
  "learning_rate": 4.266666666666667e-05,
628
- "loss": 0.0025,
629
  "step": 870
630
  },
631
  {
632
  "epoch": 11.01,
633
  "learning_rate": 4.2518518518518515e-05,
634
- "loss": 0.0511,
635
  "step": 880
636
  },
637
  {
638
  "epoch": 11.02,
639
  "learning_rate": 4.237037037037037e-05,
640
- "loss": 0.1272,
641
  "step": 890
642
  },
643
  {
644
  "epoch": 11.02,
645
  "learning_rate": 4.222222222222222e-05,
646
- "loss": 0.0017,
647
  "step": 900
648
  },
649
  {
650
  "epoch": 11.02,
651
- "eval_accuracy": 0.967741935483871,
652
- "eval_loss": 0.1445997655391693,
653
- "eval_runtime": 2.062,
654
- "eval_samples_per_second": 15.034,
655
- "eval_steps_per_second": 3.88,
656
  "step": 900
657
  },
658
  {
659
  "epoch": 12.0,
660
  "learning_rate": 4.2074074074074075e-05,
661
- "loss": 0.0017,
662
  "step": 910
663
  },
664
  {
665
  "epoch": 12.01,
666
  "learning_rate": 4.192592592592593e-05,
667
- "loss": 0.0017,
668
  "step": 920
669
  },
670
  {
671
  "epoch": 12.01,
672
  "learning_rate": 4.177777777777778e-05,
673
- "loss": 0.1351,
674
  "step": 930
675
  },
676
  {
677
  "epoch": 12.01,
678
  "learning_rate": 4.162962962962963e-05,
679
- "loss": 0.0042,
680
  "step": 940
681
  },
682
  {
683
  "epoch": 12.01,
684
  "learning_rate": 4.148148148148148e-05,
685
- "loss": 0.1209,
686
  "step": 950
687
  },
688
  {
689
  "epoch": 12.02,
690
  "learning_rate": 4.133333333333333e-05,
691
- "loss": 0.0017,
692
  "step": 960
693
  },
694
  {
695
  "epoch": 12.02,
696
  "learning_rate": 4.1185185185185186e-05,
697
- "loss": 0.0015,
698
  "step": 970
699
  },
700
  {
701
  "epoch": 12.02,
702
- "eval_accuracy": 1.0,
703
- "eval_loss": 0.021370282396674156,
704
- "eval_runtime": 2.0352,
705
- "eval_samples_per_second": 15.232,
706
- "eval_steps_per_second": 3.931,
707
  "step": 975
708
  },
709
  {
710
  "epoch": 13.0,
711
  "learning_rate": 4.103703703703704e-05,
712
- "loss": 0.0016,
713
  "step": 980
714
  },
715
  {
716
  "epoch": 13.0,
717
  "learning_rate": 4.088888888888889e-05,
718
- "loss": 0.0015,
719
  "step": 990
720
  },
721
  {
722
  "epoch": 13.01,
723
  "learning_rate": 4.074074074074074e-05,
724
- "loss": 0.1613,
725
  "step": 1000
726
  },
727
  {
728
  "epoch": 13.01,
729
  "learning_rate": 4.059259259259259e-05,
730
- "loss": 0.1646,
731
  "step": 1010
732
  },
733
  {
734
  "epoch": 13.01,
735
  "learning_rate": 4.0444444444444444e-05,
736
- "loss": 0.0016,
737
  "step": 1020
738
  },
739
  {
@@ -745,22 +745,22 @@
745
  {
746
  "epoch": 13.02,
747
  "learning_rate": 4.014814814814815e-05,
748
- "loss": 0.0205,
749
  "step": 1040
750
  },
751
  {
752
  "epoch": 13.02,
753
  "learning_rate": 4e-05,
754
- "loss": 0.0117,
755
  "step": 1050
756
  },
757
  {
758
  "epoch": 13.02,
759
- "eval_accuracy": 0.9032258064516129,
760
- "eval_loss": 0.3851369619369507,
761
- "eval_runtime": 2.024,
762
- "eval_samples_per_second": 15.316,
763
- "eval_steps_per_second": 3.953,
764
  "step": 1050
765
  },
766
  {
@@ -772,25 +772,25 @@
772
  {
773
  "epoch": 14.01,
774
  "learning_rate": 3.97037037037037e-05,
775
- "loss": 0.0014,
776
  "step": 1070
777
  },
778
  {
779
  "epoch": 14.01,
780
  "learning_rate": 3.9555555555555556e-05,
781
- "loss": 0.0013,
782
  "step": 1080
783
  },
784
  {
785
  "epoch": 14.01,
786
  "learning_rate": 3.940740740740741e-05,
787
- "loss": 0.0013,
788
  "step": 1090
789
  },
790
  {
791
  "epoch": 14.01,
792
  "learning_rate": 3.925925925925926e-05,
793
- "loss": 0.0014,
794
  "step": 1100
795
  },
796
  {
@@ -802,40 +802,40 @@
802
  {
803
  "epoch": 14.02,
804
  "learning_rate": 3.896296296296296e-05,
805
- "loss": 0.0017,
806
  "step": 1120
807
  },
808
  {
809
  "epoch": 14.02,
810
- "eval_accuracy": 0.8709677419354839,
811
- "eval_loss": 0.4688844382762909,
812
- "eval_runtime": 1.9804,
813
- "eval_samples_per_second": 15.653,
814
- "eval_steps_per_second": 4.04,
815
  "step": 1125
816
  },
817
  {
818
  "epoch": 15.0,
819
  "learning_rate": 3.8814814814814814e-05,
820
- "loss": 0.0012,
821
  "step": 1130
822
  },
823
  {
824
  "epoch": 15.0,
825
  "learning_rate": 3.866666666666667e-05,
826
- "loss": 0.0906,
827
  "step": 1140
828
  },
829
  {
830
  "epoch": 15.01,
831
  "learning_rate": 3.851851851851852e-05,
832
- "loss": 0.0152,
833
  "step": 1150
834
  },
835
  {
836
  "epoch": 15.01,
837
  "learning_rate": 3.837037037037037e-05,
838
- "loss": 0.0368,
839
  "step": 1160
840
  },
841
  {
@@ -847,97 +847,97 @@
847
  {
848
  "epoch": 15.01,
849
  "learning_rate": 3.807407407407408e-05,
850
- "loss": 0.0019,
851
  "step": 1180
852
  },
853
  {
854
  "epoch": 15.02,
855
  "learning_rate": 3.7925925925925925e-05,
856
- "loss": 0.0011,
857
  "step": 1190
858
  },
859
  {
860
  "epoch": 15.02,
861
  "learning_rate": 3.777777777777778e-05,
862
- "loss": 0.0013,
863
  "step": 1200
864
  },
865
  {
866
  "epoch": 15.02,
867
- "eval_accuracy": 1.0,
868
- "eval_loss": 0.015822507441043854,
869
- "eval_runtime": 2.0695,
870
- "eval_samples_per_second": 14.979,
871
- "eval_steps_per_second": 3.866,
872
  "step": 1200
873
  },
874
  {
875
  "epoch": 16.0,
876
  "learning_rate": 3.762962962962963e-05,
877
- "loss": 0.0012,
878
  "step": 1210
879
  },
880
  {
881
  "epoch": 16.01,
882
  "learning_rate": 3.7481481481481484e-05,
883
- "loss": 0.0012,
884
  "step": 1220
885
  },
886
  {
887
  "epoch": 16.01,
888
  "learning_rate": 3.733333333333334e-05,
889
- "loss": 0.0011,
890
  "step": 1230
891
  },
892
  {
893
  "epoch": 16.01,
894
  "learning_rate": 3.718518518518519e-05,
895
- "loss": 0.001,
896
  "step": 1240
897
  },
898
  {
899
  "epoch": 16.01,
900
  "learning_rate": 3.7037037037037037e-05,
901
- "loss": 0.0011,
902
  "step": 1250
903
  },
904
  {
905
  "epoch": 16.02,
906
  "learning_rate": 3.688888888888889e-05,
907
- "loss": 0.001,
908
  "step": 1260
909
  },
910
  {
911
  "epoch": 16.02,
912
  "learning_rate": 3.674074074074074e-05,
913
- "loss": 0.0011,
914
  "step": 1270
915
  },
916
  {
917
  "epoch": 16.02,
918
  "eval_accuracy": 1.0,
919
- "eval_loss": 0.008321798406541348,
920
- "eval_runtime": 2.0776,
921
- "eval_samples_per_second": 14.921,
922
- "eval_steps_per_second": 3.851,
923
  "step": 1275
924
  },
925
  {
926
  "epoch": 17.0,
927
  "learning_rate": 3.6592592592592596e-05,
928
- "loss": 0.0033,
929
  "step": 1280
930
  },
931
  {
932
  "epoch": 17.0,
933
  "learning_rate": 3.644444444444445e-05,
934
- "loss": 0.0098,
935
  "step": 1290
936
  },
937
  {
938
  "epoch": 17.01,
939
  "learning_rate": 3.62962962962963e-05,
940
- "loss": 0.0012,
941
  "step": 1300
942
  },
943
  {
@@ -949,223 +949,223 @@
949
  {
950
  "epoch": 17.01,
951
  "learning_rate": 3.6e-05,
952
- "loss": 0.0024,
953
  "step": 1320
954
  },
955
  {
956
  "epoch": 17.01,
957
  "learning_rate": 3.5851851851851854e-05,
958
- "loss": 0.126,
959
  "step": 1330
960
  },
961
  {
962
  "epoch": 17.02,
963
  "learning_rate": 3.570370370370371e-05,
964
- "loss": 0.0012,
965
  "step": 1340
966
  },
967
  {
968
  "epoch": 17.02,
969
  "learning_rate": 3.555555555555556e-05,
970
- "loss": 0.0011,
971
  "step": 1350
972
  },
973
  {
974
  "epoch": 17.02,
975
- "eval_accuracy": 0.9032258064516129,
976
- "eval_loss": 0.3690930902957916,
977
- "eval_runtime": 2.115,
978
- "eval_samples_per_second": 14.657,
979
- "eval_steps_per_second": 3.783,
980
  "step": 1350
981
  },
982
  {
983
  "epoch": 18.0,
984
  "learning_rate": 3.540740740740741e-05,
985
- "loss": 0.0009,
986
  "step": 1360
987
  },
988
  {
989
  "epoch": 18.01,
990
  "learning_rate": 3.525925925925926e-05,
991
- "loss": 0.001,
992
  "step": 1370
993
  },
994
  {
995
  "epoch": 18.01,
996
  "learning_rate": 3.511111111111111e-05,
997
- "loss": 0.001,
998
  "step": 1380
999
  },
1000
  {
1001
  "epoch": 18.01,
1002
  "learning_rate": 3.4962962962962965e-05,
1003
- "loss": 0.0068,
1004
  "step": 1390
1005
  },
1006
  {
1007
  "epoch": 18.01,
1008
  "learning_rate": 3.481481481481482e-05,
1009
- "loss": 0.001,
1010
  "step": 1400
1011
  },
1012
  {
1013
  "epoch": 18.02,
1014
  "learning_rate": 3.466666666666667e-05,
1015
- "loss": 0.107,
1016
  "step": 1410
1017
  },
1018
  {
1019
  "epoch": 18.02,
1020
  "learning_rate": 3.4518518518518524e-05,
1021
- "loss": 0.001,
1022
  "step": 1420
1023
  },
1024
  {
1025
  "epoch": 18.02,
1026
- "eval_accuracy": 0.967741935483871,
1027
- "eval_loss": 0.20155225694179535,
1028
- "eval_runtime": 2.1042,
1029
- "eval_samples_per_second": 14.732,
1030
- "eval_steps_per_second": 3.802,
1031
  "step": 1425
1032
  },
1033
  {
1034
  "epoch": 19.0,
1035
  "learning_rate": 3.437037037037037e-05,
1036
- "loss": 0.001,
1037
  "step": 1430
1038
  },
1039
  {
1040
  "epoch": 19.0,
1041
  "learning_rate": 3.4222222222222224e-05,
1042
- "loss": 0.0011,
1043
  "step": 1440
1044
  },
1045
  {
1046
  "epoch": 19.01,
1047
  "learning_rate": 3.4074074074074077e-05,
1048
- "loss": 0.061,
1049
  "step": 1450
1050
  },
1051
  {
1052
  "epoch": 19.01,
1053
  "learning_rate": 3.392592592592593e-05,
1054
- "loss": 0.0009,
1055
  "step": 1460
1056
  },
1057
  {
1058
  "epoch": 19.01,
1059
  "learning_rate": 3.377777777777778e-05,
1060
- "loss": 0.001,
1061
  "step": 1470
1062
  },
1063
  {
1064
  "epoch": 19.01,
1065
  "learning_rate": 3.3629629629629636e-05,
1066
- "loss": 0.001,
1067
  "step": 1480
1068
  },
1069
  {
1070
  "epoch": 19.02,
1071
  "learning_rate": 3.348148148148148e-05,
1072
- "loss": 0.0009,
1073
  "step": 1490
1074
  },
1075
  {
1076
  "epoch": 19.02,
1077
  "learning_rate": 3.3333333333333335e-05,
1078
- "loss": 0.0009,
1079
  "step": 1500
1080
  },
1081
  {
1082
  "epoch": 19.02,
1083
- "eval_accuracy": 0.967741935483871,
1084
- "eval_loss": 0.08350614458322525,
1085
- "eval_runtime": 2.1324,
1086
- "eval_samples_per_second": 14.537,
1087
- "eval_steps_per_second": 3.752,
1088
  "step": 1500
1089
  },
1090
  {
1091
  "epoch": 20.0,
1092
  "learning_rate": 3.318518518518519e-05,
1093
- "loss": 0.1209,
1094
  "step": 1510
1095
  },
1096
  {
1097
  "epoch": 20.01,
1098
  "learning_rate": 3.303703703703704e-05,
1099
- "loss": 0.0009,
1100
  "step": 1520
1101
  },
1102
  {
1103
  "epoch": 20.01,
1104
  "learning_rate": 3.2888888888888894e-05,
1105
- "loss": 0.0009,
1106
  "step": 1530
1107
  },
1108
  {
1109
  "epoch": 20.01,
1110
  "learning_rate": 3.274074074074075e-05,
1111
- "loss": 0.0008,
1112
  "step": 1540
1113
  },
1114
  {
1115
  "epoch": 20.01,
1116
  "learning_rate": 3.25925925925926e-05,
1117
- "loss": 0.0012,
1118
  "step": 1550
1119
  },
1120
  {
1121
  "epoch": 20.02,
1122
  "learning_rate": 3.2444444444444446e-05,
1123
- "loss": 0.0008,
1124
  "step": 1560
1125
  },
1126
  {
1127
  "epoch": 20.02,
1128
  "learning_rate": 3.22962962962963e-05,
1129
- "loss": 0.0009,
1130
  "step": 1570
1131
  },
1132
  {
1133
  "epoch": 20.02,
1134
- "eval_accuracy": 0.9354838709677419,
1135
- "eval_loss": 0.31769511103630066,
1136
- "eval_runtime": 2.1181,
1137
- "eval_samples_per_second": 14.636,
1138
- "eval_steps_per_second": 3.777,
1139
  "step": 1575
1140
  },
1141
  {
1142
  "epoch": 21.0,
1143
  "learning_rate": 3.214814814814815e-05,
1144
- "loss": 0.0009,
1145
  "step": 1580
1146
  },
1147
  {
1148
  "epoch": 21.0,
1149
  "learning_rate": 3.2000000000000005e-05,
1150
- "loss": 0.0008,
1151
  "step": 1590
1152
  },
1153
  {
1154
  "epoch": 21.01,
1155
  "learning_rate": 3.185185185185185e-05,
1156
- "loss": 0.0676,
1157
  "step": 1600
1158
  },
1159
  {
1160
  "epoch": 21.01,
1161
  "learning_rate": 3.1703703703703705e-05,
1162
- "loss": 0.0008,
1163
  "step": 1610
1164
  },
1165
  {
1166
  "epoch": 21.01,
1167
  "learning_rate": 3.155555555555556e-05,
1168
- "loss": 0.0011,
1169
  "step": 1620
1170
  },
1171
  {
@@ -1177,34 +1177,34 @@
1177
  {
1178
  "epoch": 21.02,
1179
  "learning_rate": 3.1259259259259264e-05,
1180
- "loss": 0.0011,
1181
  "step": 1640
1182
  },
1183
  {
1184
  "epoch": 21.02,
1185
  "learning_rate": 3.111111111111111e-05,
1186
- "loss": 0.0016,
1187
  "step": 1650
1188
  },
1189
  {
1190
  "epoch": 21.02,
1191
- "eval_accuracy": 0.967741935483871,
1192
- "eval_loss": 0.21165208518505096,
1193
- "eval_runtime": 2.16,
1194
- "eval_samples_per_second": 14.352,
1195
- "eval_steps_per_second": 3.704,
1196
  "step": 1650
1197
  },
1198
  {
1199
  "epoch": 22.0,
1200
  "learning_rate": 3.096296296296296e-05,
1201
- "loss": 0.0012,
1202
  "step": 1660
1203
  },
1204
  {
1205
  "epoch": 22.01,
1206
  "learning_rate": 3.0814814814814816e-05,
1207
- "loss": 0.0008,
1208
  "step": 1670
1209
  },
1210
  {
@@ -1216,7 +1216,7 @@
1216
  {
1217
  "epoch": 22.01,
1218
  "learning_rate": 3.0518518518518515e-05,
1219
- "loss": 0.0011,
1220
  "step": 1690
1221
  },
1222
  {
@@ -1234,28 +1234,28 @@
1234
  {
1235
  "epoch": 22.02,
1236
  "learning_rate": 3.0074074074074078e-05,
1237
- "loss": 0.0008,
1238
  "step": 1720
1239
  },
1240
  {
1241
  "epoch": 22.02,
1242
- "eval_accuracy": 0.9354838709677419,
1243
- "eval_loss": 0.15717540681362152,
1244
- "eval_runtime": 2.1202,
1245
- "eval_samples_per_second": 14.621,
1246
- "eval_steps_per_second": 3.773,
1247
  "step": 1725
1248
  },
1249
  {
1250
  "epoch": 23.0,
1251
  "learning_rate": 2.992592592592593e-05,
1252
- "loss": 0.0007,
1253
  "step": 1730
1254
  },
1255
  {
1256
  "epoch": 23.0,
1257
  "learning_rate": 2.9777777777777777e-05,
1258
- "loss": 0.0007,
1259
  "step": 1740
1260
  },
1261
  {
@@ -1267,7 +1267,7 @@
1267
  {
1268
  "epoch": 23.01,
1269
  "learning_rate": 2.9481481481481483e-05,
1270
- "loss": 0.0007,
1271
  "step": 1760
1272
  },
1273
  {
@@ -1285,7 +1285,7 @@
1285
  {
1286
  "epoch": 23.02,
1287
  "learning_rate": 2.9037037037037042e-05,
1288
- "loss": 0.0008,
1289
  "step": 1790
1290
  },
1291
  {
@@ -1296,23 +1296,23 @@
1296
  },
1297
  {
1298
  "epoch": 23.02,
1299
- "eval_accuracy": 0.967741935483871,
1300
- "eval_loss": 0.09172616899013519,
1301
- "eval_runtime": 2.0539,
1302
- "eval_samples_per_second": 15.093,
1303
- "eval_steps_per_second": 3.895,
1304
  "step": 1800
1305
  },
1306
  {
1307
  "epoch": 24.0,
1308
  "learning_rate": 2.874074074074074e-05,
1309
- "loss": 0.0007,
1310
  "step": 1810
1311
  },
1312
  {
1313
  "epoch": 24.01,
1314
  "learning_rate": 2.8592592592592594e-05,
1315
- "loss": 0.0007,
1316
  "step": 1820
1317
  },
1318
  {
@@ -1324,46 +1324,46 @@
1324
  {
1325
  "epoch": 24.01,
1326
  "learning_rate": 2.8296296296296297e-05,
1327
- "loss": 0.001,
1328
  "step": 1840
1329
  },
1330
  {
1331
  "epoch": 24.01,
1332
  "learning_rate": 2.814814814814815e-05,
1333
- "loss": 0.0022,
1334
  "step": 1850
1335
  },
1336
  {
1337
  "epoch": 24.02,
1338
  "learning_rate": 2.8000000000000003e-05,
1339
- "loss": 0.0006,
1340
  "step": 1860
1341
  },
1342
  {
1343
  "epoch": 24.02,
1344
  "learning_rate": 2.7851851851851853e-05,
1345
- "loss": 0.0007,
1346
  "step": 1870
1347
  },
1348
  {
1349
  "epoch": 24.02,
1350
- "eval_accuracy": 0.967741935483871,
1351
- "eval_loss": 0.10195574164390564,
1352
- "eval_runtime": 1.9997,
1353
- "eval_samples_per_second": 15.503,
1354
- "eval_steps_per_second": 4.001,
1355
  "step": 1875
1356
  },
1357
  {
1358
  "epoch": 25.0,
1359
  "learning_rate": 2.7703703703703706e-05,
1360
- "loss": 0.001,
1361
  "step": 1880
1362
  },
1363
  {
1364
  "epoch": 25.0,
1365
  "learning_rate": 2.7555555555555555e-05,
1366
- "loss": 0.0008,
1367
  "step": 1890
1368
  },
1369
  {
@@ -1393,7 +1393,7 @@
1393
  {
1394
  "epoch": 25.02,
1395
  "learning_rate": 2.6814814814814814e-05,
1396
- "loss": 0.0007,
1397
  "step": 1940
1398
  },
1399
  {
@@ -1404,11 +1404,11 @@
1404
  },
1405
  {
1406
  "epoch": 25.02,
1407
- "eval_accuracy": 0.967741935483871,
1408
- "eval_loss": 0.04205064848065376,
1409
- "eval_runtime": 1.9929,
1410
- "eval_samples_per_second": 15.555,
1411
- "eval_steps_per_second": 4.014,
1412
  "step": 1950
1413
  },
1414
  {
@@ -1438,40 +1438,40 @@
1438
  {
1439
  "epoch": 26.01,
1440
  "learning_rate": 2.5925925925925925e-05,
1441
- "loss": 0.0006,
1442
  "step": 2000
1443
  },
1444
  {
1445
  "epoch": 26.02,
1446
  "learning_rate": 2.5777777777777778e-05,
1447
- "loss": 0.0006,
1448
  "step": 2010
1449
  },
1450
  {
1451
  "epoch": 26.02,
1452
  "learning_rate": 2.562962962962963e-05,
1453
- "loss": 0.0006,
1454
  "step": 2020
1455
  },
1456
  {
1457
  "epoch": 26.02,
1458
- "eval_accuracy": 1.0,
1459
- "eval_loss": 0.00902103167027235,
1460
- "eval_runtime": 1.9903,
1461
- "eval_samples_per_second": 15.575,
1462
- "eval_steps_per_second": 4.019,
1463
  "step": 2025
1464
  },
1465
  {
1466
  "epoch": 27.0,
1467
  "learning_rate": 2.5481481481481484e-05,
1468
- "loss": 0.0006,
1469
  "step": 2030
1470
  },
1471
  {
1472
  "epoch": 27.0,
1473
  "learning_rate": 2.5333333333333337e-05,
1474
- "loss": 0.0005,
1475
  "step": 2040
1476
  },
1477
  {
@@ -1495,13 +1495,13 @@
1495
  {
1496
  "epoch": 27.01,
1497
  "learning_rate": 2.4740740740740742e-05,
1498
- "loss": 0.1483,
1499
  "step": 2080
1500
  },
1501
  {
1502
  "epoch": 27.02,
1503
  "learning_rate": 2.4592592592592595e-05,
1504
- "loss": 0.0039,
1505
  "step": 2090
1506
  },
1507
  {
@@ -1512,17 +1512,17 @@
1512
  },
1513
  {
1514
  "epoch": 27.02,
1515
- "eval_accuracy": 1.0,
1516
- "eval_loss": 0.009729193523526192,
1517
- "eval_runtime": 2.0816,
1518
- "eval_samples_per_second": 14.892,
1519
- "eval_steps_per_second": 3.843,
1520
  "step": 2100
1521
  },
1522
  {
1523
  "epoch": 28.0,
1524
  "learning_rate": 2.4296296296296298e-05,
1525
- "loss": 0.0006,
1526
  "step": 2110
1527
  },
1528
  {
@@ -1546,28 +1546,28 @@
1546
  {
1547
  "epoch": 28.01,
1548
  "learning_rate": 2.3703703703703707e-05,
1549
- "loss": 0.0006,
1550
  "step": 2150
1551
  },
1552
  {
1553
  "epoch": 28.02,
1554
  "learning_rate": 2.3555555555555556e-05,
1555
- "loss": 0.0026,
1556
  "step": 2160
1557
  },
1558
  {
1559
  "epoch": 28.02,
1560
  "learning_rate": 2.340740740740741e-05,
1561
- "loss": 0.0007,
1562
  "step": 2170
1563
  },
1564
  {
1565
  "epoch": 28.02,
1566
- "eval_accuracy": 0.9354838709677419,
1567
- "eval_loss": 0.26273736357688904,
1568
- "eval_runtime": 2.0036,
1569
- "eval_samples_per_second": 15.472,
1570
- "eval_steps_per_second": 3.993,
1571
  "step": 2175
1572
  },
1573
  {
@@ -1579,13 +1579,13 @@
1579
  {
1580
  "epoch": 29.0,
1581
  "learning_rate": 2.3111111111111112e-05,
1582
- "loss": 0.0006,
1583
  "step": 2190
1584
  },
1585
  {
1586
  "epoch": 29.01,
1587
  "learning_rate": 2.2962962962962965e-05,
1588
- "loss": 0.0006,
1589
  "step": 2200
1590
  },
1591
  {
@@ -1609,22 +1609,22 @@
1609
  {
1610
  "epoch": 29.02,
1611
  "learning_rate": 2.2370370370370374e-05,
1612
- "loss": 0.0005,
1613
  "step": 2240
1614
  },
1615
  {
1616
  "epoch": 29.02,
1617
  "learning_rate": 2.2222222222222223e-05,
1618
- "loss": 0.0748,
1619
  "step": 2250
1620
  },
1621
  {
1622
  "epoch": 29.02,
1623
- "eval_accuracy": 0.967741935483871,
1624
- "eval_loss": 0.09200659394264221,
1625
- "eval_runtime": 2.031,
1626
- "eval_samples_per_second": 15.263,
1627
- "eval_steps_per_second": 3.939,
1628
  "step": 2250
1629
  },
1630
  {
@@ -1642,19 +1642,19 @@
1642
  {
1643
  "epoch": 30.01,
1644
  "learning_rate": 2.177777777777778e-05,
1645
- "loss": 0.002,
1646
  "step": 2280
1647
  },
1648
  {
1649
  "epoch": 30.01,
1650
  "learning_rate": 2.162962962962963e-05,
1651
- "loss": 0.0005,
1652
  "step": 2290
1653
  },
1654
  {
1655
  "epoch": 30.01,
1656
  "learning_rate": 2.148148148148148e-05,
1657
- "loss": 0.2038,
1658
  "step": 2300
1659
  },
1660
  {
@@ -1666,52 +1666,52 @@
1666
  {
1667
  "epoch": 30.02,
1668
  "learning_rate": 2.1185185185185184e-05,
1669
- "loss": 0.0006,
1670
  "step": 2320
1671
  },
1672
  {
1673
  "epoch": 30.02,
1674
- "eval_accuracy": 0.9032258064516129,
1675
- "eval_loss": 0.4840385317802429,
1676
- "eval_runtime": 2.0178,
1677
- "eval_samples_per_second": 15.363,
1678
- "eval_steps_per_second": 3.965,
1679
  "step": 2325
1680
  },
1681
  {
1682
  "epoch": 31.0,
1683
  "learning_rate": 2.1037037037037037e-05,
1684
- "loss": 0.0006,
1685
  "step": 2330
1686
  },
1687
  {
1688
  "epoch": 31.0,
1689
  "learning_rate": 2.088888888888889e-05,
1690
- "loss": 0.0006,
1691
  "step": 2340
1692
  },
1693
  {
1694
  "epoch": 31.01,
1695
  "learning_rate": 2.074074074074074e-05,
1696
- "loss": 0.0006,
1697
  "step": 2350
1698
  },
1699
  {
1700
  "epoch": 31.01,
1701
  "learning_rate": 2.0592592592592593e-05,
1702
- "loss": 0.0006,
1703
  "step": 2360
1704
  },
1705
  {
1706
  "epoch": 31.01,
1707
  "learning_rate": 2.0444444444444446e-05,
1708
- "loss": 0.0008,
1709
  "step": 2370
1710
  },
1711
  {
1712
  "epoch": 31.01,
1713
  "learning_rate": 2.0296296296296296e-05,
1714
- "loss": 0.0005,
1715
  "step": 2380
1716
  },
1717
  {
@@ -1723,16 +1723,16 @@
1723
  {
1724
  "epoch": 31.02,
1725
  "learning_rate": 2e-05,
1726
- "loss": 0.0006,
1727
  "step": 2400
1728
  },
1729
  {
1730
  "epoch": 31.02,
1731
- "eval_accuracy": 0.9354838709677419,
1732
- "eval_loss": 0.2736887037754059,
1733
- "eval_runtime": 2.0235,
1734
- "eval_samples_per_second": 15.32,
1735
- "eval_steps_per_second": 3.954,
1736
  "step": 2400
1737
  },
1738
  {
@@ -1750,7 +1750,7 @@
1750
  {
1751
  "epoch": 32.01,
1752
  "learning_rate": 1.9555555555555557e-05,
1753
- "loss": 0.0006,
1754
  "step": 2430
1755
  },
1756
  {
@@ -1779,11 +1779,11 @@
1779
  },
1780
  {
1781
  "epoch": 32.02,
1782
- "eval_accuracy": 0.9354838709677419,
1783
- "eval_loss": 0.3084464371204376,
1784
- "eval_runtime": 2.0657,
1785
- "eval_samples_per_second": 15.007,
1786
- "eval_steps_per_second": 3.873,
1787
  "step": 2475
1788
  },
1789
  {
@@ -1807,7 +1807,7 @@
1807
  {
1808
  "epoch": 33.01,
1809
  "learning_rate": 1.837037037037037e-05,
1810
- "loss": 0.0004,
1811
  "step": 2510
1812
  },
1813
  {
@@ -1836,17 +1836,17 @@
1836
  },
1837
  {
1838
  "epoch": 33.02,
1839
- "eval_accuracy": 0.9354838709677419,
1840
- "eval_loss": 0.29349207878112793,
1841
- "eval_runtime": 2.0411,
1842
- "eval_samples_per_second": 15.188,
1843
- "eval_steps_per_second": 3.919,
1844
  "step": 2550
1845
  },
1846
  {
1847
  "epoch": 34.0,
1848
  "learning_rate": 1.762962962962963e-05,
1849
- "loss": 0.0005,
1850
  "step": 2560
1851
  },
1852
  {
@@ -1864,7 +1864,7 @@
1864
  {
1865
  "epoch": 34.01,
1866
  "learning_rate": 1.7185185185185185e-05,
1867
- "loss": 0.0004,
1868
  "step": 2590
1869
  },
1870
  {
@@ -1887,23 +1887,23 @@
1887
  },
1888
  {
1889
  "epoch": 34.02,
1890
- "eval_accuracy": 0.9354838709677419,
1891
- "eval_loss": 0.2671518921852112,
1892
- "eval_runtime": 2.0489,
1893
- "eval_samples_per_second": 15.13,
1894
- "eval_steps_per_second": 3.905,
1895
  "step": 2625
1896
  },
1897
  {
1898
  "epoch": 35.0,
1899
  "learning_rate": 1.6592592592592594e-05,
1900
- "loss": 0.0005,
1901
  "step": 2630
1902
  },
1903
  {
1904
  "epoch": 35.0,
1905
  "learning_rate": 1.6444444444444447e-05,
1906
- "loss": 0.0004,
1907
  "step": 2640
1908
  },
1909
  {
@@ -1921,7 +1921,7 @@
1921
  {
1922
  "epoch": 35.01,
1923
  "learning_rate": 1.6000000000000003e-05,
1924
- "loss": 0.0005,
1925
  "step": 2670
1926
  },
1927
  {
@@ -1933,7 +1933,7 @@
1933
  {
1934
  "epoch": 35.02,
1935
  "learning_rate": 1.5703703703703705e-05,
1936
- "loss": 0.0004,
1937
  "step": 2690
1938
  },
1939
  {
@@ -1944,35 +1944,35 @@
1944
  },
1945
  {
1946
  "epoch": 35.02,
1947
- "eval_accuracy": 0.9354838709677419,
1948
- "eval_loss": 0.2431970089673996,
1949
- "eval_runtime": 2.032,
1950
- "eval_samples_per_second": 15.256,
1951
- "eval_steps_per_second": 3.937,
1952
  "step": 2700
1953
  },
1954
  {
1955
  "epoch": 36.0,
1956
  "learning_rate": 1.5407407407407408e-05,
1957
- "loss": 0.0004,
1958
  "step": 2710
1959
  },
1960
  {
1961
  "epoch": 36.01,
1962
  "learning_rate": 1.5259259259259258e-05,
1963
- "loss": 0.0004,
1964
  "step": 2720
1965
  },
1966
  {
1967
  "epoch": 36.01,
1968
  "learning_rate": 1.5111111111111112e-05,
1969
- "loss": 0.0004,
1970
  "step": 2730
1971
  },
1972
  {
1973
  "epoch": 36.01,
1974
  "learning_rate": 1.4962962962962965e-05,
1975
- "loss": 0.0004,
1976
  "step": 2740
1977
  },
1978
  {
@@ -1995,11 +1995,11 @@
1995
  },
1996
  {
1997
  "epoch": 36.02,
1998
- "eval_accuracy": 0.9354838709677419,
1999
- "eval_loss": 0.23820169270038605,
2000
- "eval_runtime": 1.8723,
2001
- "eval_samples_per_second": 16.557,
2002
- "eval_steps_per_second": 4.273,
2003
  "step": 2775
2004
  },
2005
  {
@@ -2011,7 +2011,7 @@
2011
  {
2012
  "epoch": 37.0,
2013
  "learning_rate": 1.4222222222222224e-05,
2014
- "loss": 0.0004,
2015
  "step": 2790
2016
  },
2017
  {
@@ -2029,19 +2029,19 @@
2029
  {
2030
  "epoch": 37.01,
2031
  "learning_rate": 1.3777777777777778e-05,
2032
- "loss": 0.0004,
2033
  "step": 2820
2034
  },
2035
  {
2036
  "epoch": 37.01,
2037
  "learning_rate": 1.362962962962963e-05,
2038
- "loss": 0.0004,
2039
  "step": 2830
2040
  },
2041
  {
2042
  "epoch": 37.02,
2043
  "learning_rate": 1.348148148148148e-05,
2044
- "loss": 0.0004,
2045
  "step": 2840
2046
  },
2047
  {
@@ -2052,11 +2052,11 @@
2052
  },
2053
  {
2054
  "epoch": 37.02,
2055
- "eval_accuracy": 0.9354838709677419,
2056
- "eval_loss": 0.22140412032604218,
2057
- "eval_runtime": 1.9289,
2058
- "eval_samples_per_second": 16.072,
2059
- "eval_steps_per_second": 4.148,
2060
  "step": 2850
2061
  },
2062
  {
@@ -2068,7 +2068,7 @@
2068
  {
2069
  "epoch": 38.01,
2070
  "learning_rate": 1.3037037037037036e-05,
2071
- "loss": 0.0004,
2072
  "step": 2870
2073
  },
2074
  {
@@ -2086,7 +2086,7 @@
2086
  {
2087
  "epoch": 38.01,
2088
  "learning_rate": 1.2592592592592592e-05,
2089
- "loss": 0.0004,
2090
  "step": 2900
2091
  },
2092
  {
@@ -2098,16 +2098,16 @@
2098
  {
2099
  "epoch": 38.02,
2100
  "learning_rate": 1.2296296296296298e-05,
2101
- "loss": 0.0004,
2102
  "step": 2920
2103
  },
2104
  {
2105
  "epoch": 38.02,
2106
- "eval_accuracy": 0.9354838709677419,
2107
- "eval_loss": 0.21227142214775085,
2108
- "eval_runtime": 1.9476,
2109
- "eval_samples_per_second": 15.917,
2110
- "eval_steps_per_second": 4.108,
2111
  "step": 2925
2112
  },
2113
  {
@@ -2131,7 +2131,7 @@
2131
  {
2132
  "epoch": 39.01,
2133
  "learning_rate": 1.1703703703703705e-05,
2134
- "loss": 0.0004,
2135
  "step": 2960
2136
  },
2137
  {
@@ -2149,7 +2149,7 @@
2149
  {
2150
  "epoch": 39.02,
2151
  "learning_rate": 1.125925925925926e-05,
2152
- "loss": 0.0004,
2153
  "step": 2990
2154
  },
2155
  {
@@ -2160,11 +2160,11 @@
2160
  },
2161
  {
2162
  "epoch": 39.02,
2163
- "eval_accuracy": 0.9354838709677419,
2164
- "eval_loss": 0.20368844270706177,
2165
- "eval_runtime": 1.8819,
2166
- "eval_samples_per_second": 16.472,
2167
- "eval_steps_per_second": 4.251,
2168
  "step": 3000
2169
  },
2170
  {
@@ -2211,11 +2211,11 @@
2211
  },
2212
  {
2213
  "epoch": 40.02,
2214
- "eval_accuracy": 0.9354838709677419,
2215
- "eval_loss": 0.19793939590454102,
2216
- "eval_runtime": 1.8907,
2217
- "eval_samples_per_second": 16.396,
2218
- "eval_steps_per_second": 4.231,
2219
  "step": 3075
2220
  },
2221
  {
@@ -2268,11 +2268,11 @@
2268
  },
2269
  {
2270
  "epoch": 41.02,
2271
- "eval_accuracy": 0.9354838709677419,
2272
- "eval_loss": 0.1887228637933731,
2273
- "eval_runtime": 1.8802,
2274
- "eval_samples_per_second": 16.488,
2275
- "eval_steps_per_second": 4.255,
2276
  "step": 3150
2277
  },
2278
  {
@@ -2319,11 +2319,11 @@
2319
  },
2320
  {
2321
  "epoch": 42.02,
2322
- "eval_accuracy": 0.9354838709677419,
2323
- "eval_loss": 0.17983676493167877,
2324
- "eval_runtime": 1.9763,
2325
- "eval_samples_per_second": 15.686,
2326
- "eval_steps_per_second": 4.048,
2327
  "step": 3225
2328
  },
2329
  {
@@ -2376,11 +2376,11 @@
2376
  },
2377
  {
2378
  "epoch": 43.02,
2379
- "eval_accuracy": 0.9354838709677419,
2380
- "eval_loss": 0.17034880816936493,
2381
- "eval_runtime": 1.9064,
2382
- "eval_samples_per_second": 16.261,
2383
- "eval_steps_per_second": 4.196,
2384
  "step": 3300
2385
  },
2386
  {
@@ -2427,11 +2427,11 @@
2427
  },
2428
  {
2429
  "epoch": 44.02,
2430
- "eval_accuracy": 0.9354838709677419,
2431
- "eval_loss": 0.15491537749767303,
2432
- "eval_runtime": 1.9495,
2433
- "eval_samples_per_second": 15.902,
2434
- "eval_steps_per_second": 4.104,
2435
  "step": 3375
2436
  },
2437
  {
@@ -2467,13 +2467,13 @@
2467
  {
2468
  "epoch": 45.01,
2469
  "learning_rate": 4.740740740740741e-06,
2470
- "loss": 0.0003,
2471
  "step": 3430
2472
  },
2473
  {
2474
  "epoch": 45.02,
2475
  "learning_rate": 4.592592592592593e-06,
2476
- "loss": 0.0003,
2477
  "step": 3440
2478
  },
2479
  {
@@ -2484,11 +2484,11 @@
2484
  },
2485
  {
2486
  "epoch": 45.02,
2487
- "eval_accuracy": 0.9354838709677419,
2488
- "eval_loss": 0.15048637986183167,
2489
- "eval_runtime": 1.9493,
2490
- "eval_samples_per_second": 15.903,
2491
- "eval_steps_per_second": 4.104,
2492
  "step": 3450
2493
  },
2494
  {
@@ -2535,11 +2535,11 @@
2535
  },
2536
  {
2537
  "epoch": 46.02,
2538
- "eval_accuracy": 0.9354838709677419,
2539
- "eval_loss": 0.1432822197675705,
2540
- "eval_runtime": 1.9341,
2541
- "eval_samples_per_second": 16.028,
2542
- "eval_steps_per_second": 4.136,
2543
  "step": 3525
2544
  },
2545
  {
@@ -2592,17 +2592,17 @@
2592
  },
2593
  {
2594
  "epoch": 47.02,
2595
- "eval_accuracy": 0.9354838709677419,
2596
- "eval_loss": 0.14155419170856476,
2597
- "eval_runtime": 1.9544,
2598
- "eval_samples_per_second": 15.861,
2599
- "eval_steps_per_second": 4.093,
2600
  "step": 3600
2601
  },
2602
  {
2603
  "epoch": 48.0,
2604
  "learning_rate": 2.0740740740740742e-06,
2605
- "loss": 0.0003,
2606
  "step": 3610
2607
  },
2608
  {
@@ -2620,7 +2620,7 @@
2620
  {
2621
  "epoch": 48.01,
2622
  "learning_rate": 1.6296296296296295e-06,
2623
- "loss": 0.0004,
2624
  "step": 3640
2625
  },
2626
  {
@@ -2643,11 +2643,11 @@
2643
  },
2644
  {
2645
  "epoch": 48.02,
2646
- "eval_accuracy": 0.9354838709677419,
2647
- "eval_loss": 0.14026829600334167,
2648
- "eval_runtime": 1.9357,
2649
- "eval_samples_per_second": 16.015,
2650
- "eval_steps_per_second": 4.133,
2651
  "step": 3675
2652
  },
2653
  {
@@ -2683,13 +2683,13 @@
2683
  {
2684
  "epoch": 49.01,
2685
  "learning_rate": 2.962962962962963e-07,
2686
- "loss": 0.0003,
2687
  "step": 3730
2688
  },
2689
  {
2690
  "epoch": 49.02,
2691
  "learning_rate": 1.4814814814814815e-07,
2692
- "loss": 0.0003,
2693
  "step": 3740
2694
  },
2695
  {
@@ -2700,38 +2700,38 @@
2700
  },
2701
  {
2702
  "epoch": 49.02,
2703
- "eval_accuracy": 0.9354838709677419,
2704
- "eval_loss": 0.14033780992031097,
2705
- "eval_runtime": 1.912,
2706
- "eval_samples_per_second": 16.213,
2707
- "eval_steps_per_second": 4.184,
2708
  "step": 3750
2709
  },
2710
  {
2711
  "epoch": 49.02,
2712
  "step": 3750,
2713
  "total_flos": 1.86923023515648e+19,
2714
- "train_loss": 0.15028690623094637,
2715
- "train_runtime": 3633.1287,
2716
- "train_samples_per_second": 4.129,
2717
- "train_steps_per_second": 1.032
2718
  },
2719
  {
2720
  "epoch": 49.02,
2721
- "eval_accuracy": 0.9210526315789473,
2722
- "eval_loss": 0.326847106218338,
2723
- "eval_runtime": 7.8789,
2724
- "eval_samples_per_second": 9.646,
2725
- "eval_steps_per_second": 2.412,
2726
  "step": 3750
2727
  },
2728
  {
2729
  "epoch": 49.02,
2730
- "eval_accuracy": 0.9210526315789473,
2731
- "eval_loss": 0.326847106218338,
2732
- "eval_runtime": 4.6548,
2733
- "eval_samples_per_second": 16.327,
2734
- "eval_steps_per_second": 4.082,
2735
  "step": 3750
2736
  }
2737
  ],
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-1275",
4
  "epoch": 49.02,
5
  "global_step": 3750,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 1.3333333333333334e-06,
13
+ "loss": 2.3098,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.01,
18
  "learning_rate": 2.666666666666667e-06,
19
+ "loss": 2.404,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.01,
24
  "learning_rate": 4.000000000000001e-06,
25
+ "loss": 2.3519,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.01,
30
  "learning_rate": 5.333333333333334e-06,
31
+ "loss": 2.3632,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.01,
36
  "learning_rate": 6.666666666666667e-06,
37
+ "loss": 2.2731,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.02,
42
  "learning_rate": 8.000000000000001e-06,
43
+ "loss": 2.3121,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.02,
48
  "learning_rate": 9.333333333333334e-06,
49
+ "loss": 2.3832,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.02,
54
+ "eval_accuracy": 0.3783783783783784,
55
+ "eval_loss": 2.170041561126709,
56
+ "eval_runtime": 2.1587,
57
+ "eval_samples_per_second": 17.14,
58
+ "eval_steps_per_second": 4.632,
59
  "step": 75
60
  },
61
  {
62
  "epoch": 1.0,
63
  "learning_rate": 1.0666666666666667e-05,
64
+ "loss": 2.2792,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 1.0,
69
  "learning_rate": 1.2e-05,
70
+ "loss": 2.2242,
71
  "step": 90
72
  },
73
  {
74
  "epoch": 1.01,
75
  "learning_rate": 1.3333333333333333e-05,
76
+ "loss": 2.1522,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.01,
81
  "learning_rate": 1.4666666666666668e-05,
82
+ "loss": 2.1471,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.01,
87
  "learning_rate": 1.6000000000000003e-05,
88
+ "loss": 2.1708,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 1.01,
93
  "learning_rate": 1.7333333333333336e-05,
94
+ "loss": 2.064,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.02,
99
  "learning_rate": 1.866666666666667e-05,
100
+ "loss": 2.0836,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.02,
105
  "learning_rate": 2e-05,
106
+ "loss": 1.8551,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.02,
111
+ "eval_accuracy": 0.3783783783783784,
112
+ "eval_loss": 1.8235533237457275,
113
+ "eval_runtime": 2.2292,
114
+ "eval_samples_per_second": 16.598,
115
+ "eval_steps_per_second": 4.486,
116
  "step": 150
117
  },
118
  {
119
  "epoch": 2.0,
120
  "learning_rate": 2.1333333333333335e-05,
121
+ "loss": 1.5745,
122
  "step": 160
123
  },
124
  {
125
  "epoch": 2.01,
126
  "learning_rate": 2.2666666666666668e-05,
127
+ "loss": 1.4592,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 2.01,
132
  "learning_rate": 2.4e-05,
133
+ "loss": 1.4084,
134
  "step": 180
135
  },
136
  {
137
  "epoch": 2.01,
138
  "learning_rate": 2.5333333333333337e-05,
139
+ "loss": 1.1281,
140
  "step": 190
141
  },
142
  {
143
  "epoch": 2.01,
144
  "learning_rate": 2.6666666666666667e-05,
145
+ "loss": 1.0162,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 2.02,
150
  "learning_rate": 2.8000000000000003e-05,
151
+ "loss": 0.8929,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 2.02,
156
  "learning_rate": 2.9333333333333336e-05,
157
+ "loss": 1.0117,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 2.02,
162
+ "eval_accuracy": 0.5135135135135135,
163
+ "eval_loss": 1.1747188568115234,
164
+ "eval_runtime": 2.3005,
165
+ "eval_samples_per_second": 16.083,
166
+ "eval_steps_per_second": 4.347,
167
  "step": 225
168
  },
169
  {
170
  "epoch": 3.0,
171
  "learning_rate": 3.066666666666667e-05,
172
+ "loss": 0.9181,
173
  "step": 230
174
  },
175
  {
176
  "epoch": 3.0,
177
  "learning_rate": 3.2000000000000005e-05,
178
+ "loss": 0.7052,
179
  "step": 240
180
  },
181
  {
182
  "epoch": 3.01,
183
  "learning_rate": 3.3333333333333335e-05,
184
+ "loss": 0.5638,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 3.01,
189
  "learning_rate": 3.466666666666667e-05,
190
+ "loss": 0.5728,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 3.01,
195
  "learning_rate": 3.6e-05,
196
+ "loss": 0.731,
197
  "step": 270
198
  },
199
  {
200
  "epoch": 3.01,
201
  "learning_rate": 3.733333333333334e-05,
202
+ "loss": 0.3545,
203
  "step": 280
204
  },
205
  {
206
  "epoch": 3.02,
207
  "learning_rate": 3.866666666666667e-05,
208
+ "loss": 0.5066,
209
  "step": 290
210
  },
211
  {
212
  "epoch": 3.02,
213
  "learning_rate": 4e-05,
214
+ "loss": 0.6169,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 3.02,
219
+ "eval_accuracy": 0.8108108108108109,
220
+ "eval_loss": 0.44089388847351074,
221
+ "eval_runtime": 2.3874,
222
+ "eval_samples_per_second": 15.498,
223
+ "eval_steps_per_second": 4.189,
224
  "step": 300
225
  },
226
  {
227
  "epoch": 4.0,
228
  "learning_rate": 4.133333333333333e-05,
229
+ "loss": 0.4126,
230
  "step": 310
231
  },
232
  {
233
  "epoch": 4.01,
234
  "learning_rate": 4.266666666666667e-05,
235
+ "loss": 0.4563,
236
  "step": 320
237
  },
238
  {
239
  "epoch": 4.01,
240
  "learning_rate": 4.4000000000000006e-05,
241
+ "loss": 0.2671,
242
  "step": 330
243
  },
244
  {
245
  "epoch": 4.01,
246
  "learning_rate": 4.5333333333333335e-05,
247
+ "loss": 0.5918,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 4.01,
252
  "learning_rate": 4.666666666666667e-05,
253
+ "loss": 0.5229,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 4.02,
258
  "learning_rate": 4.8e-05,
259
+ "loss": 0.9112,
260
  "step": 360
261
  },
262
  {
263
  "epoch": 4.02,
264
  "learning_rate": 4.933333333333334e-05,
265
+ "loss": 0.3897,
266
  "step": 370
267
  },
268
  {
269
  "epoch": 4.02,
270
+ "eval_accuracy": 0.8108108108108109,
271
+ "eval_loss": 0.6102820038795471,
272
+ "eval_runtime": 2.4331,
273
+ "eval_samples_per_second": 15.207,
274
+ "eval_steps_per_second": 4.11,
275
  "step": 375
276
  },
277
  {
278
  "epoch": 5.0,
279
  "learning_rate": 4.9925925925925926e-05,
280
+ "loss": 0.2776,
281
  "step": 380
282
  },
283
  {
284
  "epoch": 5.0,
285
  "learning_rate": 4.977777777777778e-05,
286
+ "loss": 0.494,
287
  "step": 390
288
  },
289
  {
290
  "epoch": 5.01,
291
  "learning_rate": 4.962962962962963e-05,
292
+ "loss": 0.5899,
293
  "step": 400
294
  },
295
  {
296
  "epoch": 5.01,
297
  "learning_rate": 4.9481481481481485e-05,
298
+ "loss": 0.5555,
299
  "step": 410
300
  },
301
  {
302
  "epoch": 5.01,
303
  "learning_rate": 4.933333333333334e-05,
304
+ "loss": 0.4075,
305
  "step": 420
306
  },
307
  {
308
  "epoch": 5.01,
309
  "learning_rate": 4.918518518518519e-05,
310
+ "loss": 0.4451,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 5.02,
315
  "learning_rate": 4.903703703703704e-05,
316
+ "loss": 0.7433,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 5.02,
321
  "learning_rate": 4.888888888888889e-05,
322
+ "loss": 0.3564,
323
  "step": 450
324
  },
325
  {
326
  "epoch": 5.02,
327
+ "eval_accuracy": 0.7837837837837838,
328
+ "eval_loss": 0.9210058450698853,
329
+ "eval_runtime": 2.3655,
330
+ "eval_samples_per_second": 15.642,
331
+ "eval_steps_per_second": 4.227,
332
  "step": 450
333
  },
334
  {
335
  "epoch": 6.0,
336
  "learning_rate": 4.874074074074074e-05,
337
+ "loss": 0.3144,
338
  "step": 460
339
  },
340
  {
341
  "epoch": 6.01,
342
  "learning_rate": 4.8592592592592596e-05,
343
+ "loss": 0.1399,
344
  "step": 470
345
  },
346
  {
347
  "epoch": 6.01,
348
  "learning_rate": 4.844444444444445e-05,
349
+ "loss": 0.1546,
350
  "step": 480
351
  },
352
  {
353
  "epoch": 6.01,
354
  "learning_rate": 4.82962962962963e-05,
355
+ "loss": 0.3734,
356
  "step": 490
357
  },
358
  {
359
  "epoch": 6.01,
360
  "learning_rate": 4.814814814814815e-05,
361
+ "loss": 0.255,
362
  "step": 500
363
  },
364
  {
365
  "epoch": 6.02,
366
  "learning_rate": 4.8e-05,
367
+ "loss": 0.3182,
368
  "step": 510
369
  },
370
  {
371
  "epoch": 6.02,
372
  "learning_rate": 4.7851851851851854e-05,
373
+ "loss": 0.4998,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 6.02,
378
+ "eval_accuracy": 0.8378378378378378,
379
+ "eval_loss": 0.6993206143379211,
380
+ "eval_runtime": 2.2397,
381
+ "eval_samples_per_second": 16.52,
382
+ "eval_steps_per_second": 4.465,
383
  "step": 525
384
  },
385
  {
386
  "epoch": 7.0,
387
  "learning_rate": 4.770370370370371e-05,
388
+ "loss": 0.8842,
389
  "step": 530
390
  },
391
  {
392
  "epoch": 7.0,
393
  "learning_rate": 4.755555555555556e-05,
394
+ "loss": 0.5218,
395
  "step": 540
396
  },
397
  {
398
  "epoch": 7.01,
399
  "learning_rate": 4.740740740740741e-05,
400
+ "loss": 0.1773,
401
  "step": 550
402
  },
403
  {
404
  "epoch": 7.01,
405
  "learning_rate": 4.7259259259259266e-05,
406
+ "loss": 0.1435,
407
  "step": 560
408
  },
409
  {
410
  "epoch": 7.01,
411
  "learning_rate": 4.711111111111111e-05,
412
+ "loss": 0.1301,
413
  "step": 570
414
  },
415
  {
416
  "epoch": 7.01,
417
  "learning_rate": 4.6962962962962966e-05,
418
+ "loss": 0.1264,
419
  "step": 580
420
  },
421
  {
422
  "epoch": 7.02,
423
  "learning_rate": 4.681481481481482e-05,
424
+ "loss": 0.0623,
425
  "step": 590
426
  },
427
  {
428
  "epoch": 7.02,
429
  "learning_rate": 4.666666666666667e-05,
430
+ "loss": 0.0605,
431
  "step": 600
432
  },
433
  {
434
  "epoch": 7.02,
435
+ "eval_accuracy": 0.918918918918919,
436
+ "eval_loss": 0.16171327233314514,
437
+ "eval_runtime": 2.3543,
438
+ "eval_samples_per_second": 15.716,
439
+ "eval_steps_per_second": 4.248,
440
  "step": 600
441
  },
442
  {
443
  "epoch": 8.0,
444
  "learning_rate": 4.6518518518518525e-05,
445
+ "loss": 0.0462,
446
  "step": 610
447
  },
448
  {
449
  "epoch": 8.01,
450
  "learning_rate": 4.637037037037038e-05,
451
+ "loss": 0.3026,
452
  "step": 620
453
  },
454
  {
455
  "epoch": 8.01,
456
  "learning_rate": 4.6222222222222224e-05,
457
+ "loss": 0.0066,
458
  "step": 630
459
  },
460
  {
461
  "epoch": 8.01,
462
  "learning_rate": 4.607407407407408e-05,
463
+ "loss": 0.0487,
464
  "step": 640
465
  },
466
  {
467
  "epoch": 8.01,
468
  "learning_rate": 4.592592592592593e-05,
469
+ "loss": 0.0711,
470
  "step": 650
471
  },
472
  {
473
  "epoch": 8.02,
474
  "learning_rate": 4.577777777777778e-05,
475
+ "loss": 0.1624,
476
  "step": 660
477
  },
478
  {
479
  "epoch": 8.02,
480
  "learning_rate": 4.5629629629629636e-05,
481
+ "loss": 0.0814,
482
  "step": 670
483
  },
484
  {
485
  "epoch": 8.02,
486
+ "eval_accuracy": 0.8378378378378378,
487
+ "eval_loss": 0.6547620892524719,
488
+ "eval_runtime": 2.2934,
489
+ "eval_samples_per_second": 16.133,
490
+ "eval_steps_per_second": 4.36,
491
  "step": 675
492
  },
493
  {
494
  "epoch": 9.0,
495
  "learning_rate": 4.548148148148149e-05,
496
+ "loss": 0.014,
497
  "step": 680
498
  },
499
  {
500
  "epoch": 9.0,
501
  "learning_rate": 4.5333333333333335e-05,
502
+ "loss": 0.112,
503
  "step": 690
504
  },
505
  {
506
  "epoch": 9.01,
507
  "learning_rate": 4.518518518518519e-05,
508
+ "loss": 0.06,
509
  "step": 700
510
  },
511
  {
512
  "epoch": 9.01,
513
  "learning_rate": 4.503703703703704e-05,
514
+ "loss": 0.1564,
515
  "step": 710
516
  },
517
  {
518
  "epoch": 9.01,
519
  "learning_rate": 4.4888888888888894e-05,
520
+ "loss": 0.142,
521
  "step": 720
522
  },
523
  {
524
  "epoch": 9.01,
525
  "learning_rate": 4.474074074074075e-05,
526
+ "loss": 0.0574,
527
  "step": 730
528
  },
529
  {
530
  "epoch": 9.02,
531
  "learning_rate": 4.4592592592592594e-05,
532
+ "loss": 0.2043,
533
  "step": 740
534
  },
535
  {
536
  "epoch": 9.02,
537
  "learning_rate": 4.4444444444444447e-05,
538
+ "loss": 0.0312,
539
  "step": 750
540
  },
541
  {
542
  "epoch": 9.02,
543
+ "eval_accuracy": 0.8648648648648649,
544
+ "eval_loss": 0.5517419576644897,
545
+ "eval_runtime": 2.2202,
546
+ "eval_samples_per_second": 16.665,
547
+ "eval_steps_per_second": 4.504,
548
  "step": 750
549
  },
550
  {
551
  "epoch": 10.0,
552
  "learning_rate": 4.42962962962963e-05,
553
+ "loss": 0.0041,
554
  "step": 760
555
  },
556
  {
557
  "epoch": 10.01,
558
  "learning_rate": 4.414814814814815e-05,
559
+ "loss": 0.1084,
560
  "step": 770
561
  },
562
  {
563
  "epoch": 10.01,
564
  "learning_rate": 4.4000000000000006e-05,
565
+ "loss": 0.2527,
566
  "step": 780
567
  },
568
  {
569
  "epoch": 10.01,
570
  "learning_rate": 4.385185185185185e-05,
571
+ "loss": 0.0034,
572
  "step": 790
573
  },
574
  {
575
  "epoch": 10.01,
576
  "learning_rate": 4.3703703703703705e-05,
577
+ "loss": 0.0029,
578
  "step": 800
579
  },
580
  {
581
  "epoch": 10.02,
582
  "learning_rate": 4.355555555555556e-05,
583
+ "loss": 0.0052,
584
  "step": 810
585
  },
586
  {
587
  "epoch": 10.02,
588
  "learning_rate": 4.340740740740741e-05,
589
+ "loss": 0.023,
590
  "step": 820
591
  },
592
  {
593
  "epoch": 10.02,
594
+ "eval_accuracy": 0.9459459459459459,
595
+ "eval_loss": 0.3978298008441925,
596
+ "eval_runtime": 2.2307,
597
+ "eval_samples_per_second": 16.587,
598
+ "eval_steps_per_second": 4.483,
599
  "step": 825
600
  },
601
  {
602
  "epoch": 11.0,
603
  "learning_rate": 4.325925925925926e-05,
604
+ "loss": 0.0064,
605
  "step": 830
606
  },
607
  {
608
  "epoch": 11.0,
609
  "learning_rate": 4.311111111111111e-05,
610
+ "loss": 0.0926,
611
  "step": 840
612
  },
613
  {
614
  "epoch": 11.01,
615
  "learning_rate": 4.296296296296296e-05,
616
+ "loss": 0.0824,
617
  "step": 850
618
  },
619
  {
620
  "epoch": 11.01,
621
  "learning_rate": 4.2814814814814816e-05,
622
+ "loss": 0.0054,
623
  "step": 860
624
  },
625
  {
626
  "epoch": 11.01,
627
  "learning_rate": 4.266666666666667e-05,
628
+ "loss": 0.0038,
629
  "step": 870
630
  },
631
  {
632
  "epoch": 11.01,
633
  "learning_rate": 4.2518518518518515e-05,
634
+ "loss": 0.0046,
635
  "step": 880
636
  },
637
  {
638
  "epoch": 11.02,
639
  "learning_rate": 4.237037037037037e-05,
640
+ "loss": 0.0029,
641
  "step": 890
642
  },
643
  {
644
  "epoch": 11.02,
645
  "learning_rate": 4.222222222222222e-05,
646
+ "loss": 0.0021,
647
  "step": 900
648
  },
649
  {
650
  "epoch": 11.02,
651
+ "eval_accuracy": 0.918918918918919,
652
+ "eval_loss": 0.3967694342136383,
653
+ "eval_runtime": 2.1802,
654
+ "eval_samples_per_second": 16.971,
655
+ "eval_steps_per_second": 4.587,
656
  "step": 900
657
  },
658
  {
659
  "epoch": 12.0,
660
  "learning_rate": 4.2074074074074075e-05,
661
+ "loss": 0.0097,
662
  "step": 910
663
  },
664
  {
665
  "epoch": 12.01,
666
  "learning_rate": 4.192592592592593e-05,
667
+ "loss": 0.0019,
668
  "step": 920
669
  },
670
  {
671
  "epoch": 12.01,
672
  "learning_rate": 4.177777777777778e-05,
673
+ "loss": 0.0137,
674
  "step": 930
675
  },
676
  {
677
  "epoch": 12.01,
678
  "learning_rate": 4.162962962962963e-05,
679
+ "loss": 0.0022,
680
  "step": 940
681
  },
682
  {
683
  "epoch": 12.01,
684
  "learning_rate": 4.148148148148148e-05,
685
+ "loss": 0.0614,
686
  "step": 950
687
  },
688
  {
689
  "epoch": 12.02,
690
  "learning_rate": 4.133333333333333e-05,
691
+ "loss": 0.0019,
692
  "step": 960
693
  },
694
  {
695
  "epoch": 12.02,
696
  "learning_rate": 4.1185185185185186e-05,
697
+ "loss": 0.1367,
698
  "step": 970
699
  },
700
  {
701
  "epoch": 12.02,
702
+ "eval_accuracy": 0.972972972972973,
703
+ "eval_loss": 0.04322541132569313,
704
+ "eval_runtime": 2.1825,
705
+ "eval_samples_per_second": 16.953,
706
+ "eval_steps_per_second": 4.582,
707
  "step": 975
708
  },
709
  {
710
  "epoch": 13.0,
711
  "learning_rate": 4.103703703703704e-05,
712
+ "loss": 0.0021,
713
  "step": 980
714
  },
715
  {
716
  "epoch": 13.0,
717
  "learning_rate": 4.088888888888889e-05,
718
+ "loss": 0.0023,
719
  "step": 990
720
  },
721
  {
722
  "epoch": 13.01,
723
  "learning_rate": 4.074074074074074e-05,
724
+ "loss": 0.0019,
725
  "step": 1000
726
  },
727
  {
728
  "epoch": 13.01,
729
  "learning_rate": 4.059259259259259e-05,
730
+ "loss": 0.0029,
731
  "step": 1010
732
  },
733
  {
734
  "epoch": 13.01,
735
  "learning_rate": 4.0444444444444444e-05,
736
+ "loss": 0.002,
737
  "step": 1020
738
  },
739
  {
 
745
  {
746
  "epoch": 13.02,
747
  "learning_rate": 4.014814814814815e-05,
748
+ "loss": 0.0015,
749
  "step": 1040
750
  },
751
  {
752
  "epoch": 13.02,
753
  "learning_rate": 4e-05,
754
+ "loss": 0.0021,
755
  "step": 1050
756
  },
757
  {
758
  "epoch": 13.02,
759
+ "eval_accuracy": 0.972972972972973,
760
+ "eval_loss": 0.18388445675373077,
761
+ "eval_runtime": 2.1858,
762
+ "eval_samples_per_second": 16.927,
763
+ "eval_steps_per_second": 4.575,
764
  "step": 1050
765
  },
766
  {
 
772
  {
773
  "epoch": 14.01,
774
  "learning_rate": 3.97037037037037e-05,
775
+ "loss": 0.1972,
776
  "step": 1070
777
  },
778
  {
779
  "epoch": 14.01,
780
  "learning_rate": 3.9555555555555556e-05,
781
+ "loss": 0.0014,
782
  "step": 1080
783
  },
784
  {
785
  "epoch": 14.01,
786
  "learning_rate": 3.940740740740741e-05,
787
+ "loss": 0.0015,
788
  "step": 1090
789
  },
790
  {
791
  "epoch": 14.01,
792
  "learning_rate": 3.925925925925926e-05,
793
+ "loss": 0.0015,
794
  "step": 1100
795
  },
796
  {
 
802
  {
803
  "epoch": 14.02,
804
  "learning_rate": 3.896296296296296e-05,
805
+ "loss": 0.2373,
806
  "step": 1120
807
  },
808
  {
809
  "epoch": 14.02,
810
+ "eval_accuracy": 0.972972972972973,
811
+ "eval_loss": 0.07551968842744827,
812
+ "eval_runtime": 2.247,
813
+ "eval_samples_per_second": 16.466,
814
+ "eval_steps_per_second": 4.45,
815
  "step": 1125
816
  },
817
  {
818
  "epoch": 15.0,
819
  "learning_rate": 3.8814814814814814e-05,
820
+ "loss": 0.0013,
821
  "step": 1130
822
  },
823
  {
824
  "epoch": 15.0,
825
  "learning_rate": 3.866666666666667e-05,
826
+ "loss": 0.039,
827
  "step": 1140
828
  },
829
  {
830
  "epoch": 15.01,
831
  "learning_rate": 3.851851851851852e-05,
832
+ "loss": 0.1708,
833
  "step": 1150
834
  },
835
  {
836
  "epoch": 15.01,
837
  "learning_rate": 3.837037037037037e-05,
838
+ "loss": 0.076,
839
  "step": 1160
840
  },
841
  {
 
847
  {
848
  "epoch": 15.01,
849
  "learning_rate": 3.807407407407408e-05,
850
+ "loss": 0.002,
851
  "step": 1180
852
  },
853
  {
854
  "epoch": 15.02,
855
  "learning_rate": 3.7925925925925925e-05,
856
+ "loss": 0.0012,
857
  "step": 1190
858
  },
859
  {
860
  "epoch": 15.02,
861
  "learning_rate": 3.777777777777778e-05,
862
+ "loss": 0.0015,
863
  "step": 1200
864
  },
865
  {
866
  "epoch": 15.02,
867
+ "eval_accuracy": 0.9459459459459459,
868
+ "eval_loss": 0.14861811697483063,
869
+ "eval_runtime": 2.2534,
870
+ "eval_samples_per_second": 16.42,
871
+ "eval_steps_per_second": 4.438,
872
  "step": 1200
873
  },
874
  {
875
  "epoch": 16.0,
876
  "learning_rate": 3.762962962962963e-05,
877
+ "loss": 0.0659,
878
  "step": 1210
879
  },
880
  {
881
  "epoch": 16.01,
882
  "learning_rate": 3.7481481481481484e-05,
883
+ "loss": 0.0036,
884
  "step": 1220
885
  },
886
  {
887
  "epoch": 16.01,
888
  "learning_rate": 3.733333333333334e-05,
889
+ "loss": 0.1704,
890
  "step": 1230
891
  },
892
  {
893
  "epoch": 16.01,
894
  "learning_rate": 3.718518518518519e-05,
895
+ "loss": 0.0014,
896
  "step": 1240
897
  },
898
  {
899
  "epoch": 16.01,
900
  "learning_rate": 3.7037037037037037e-05,
901
+ "loss": 0.0014,
902
  "step": 1250
903
  },
904
  {
905
  "epoch": 16.02,
906
  "learning_rate": 3.688888888888889e-05,
907
+ "loss": 0.0012,
908
  "step": 1260
909
  },
910
  {
911
  "epoch": 16.02,
912
  "learning_rate": 3.674074074074074e-05,
913
+ "loss": 0.0013,
914
  "step": 1270
915
  },
916
  {
917
  "epoch": 16.02,
918
  "eval_accuracy": 1.0,
919
+ "eval_loss": 0.017381420359015465,
920
+ "eval_runtime": 2.2311,
921
+ "eval_samples_per_second": 16.584,
922
+ "eval_steps_per_second": 4.482,
923
  "step": 1275
924
  },
925
  {
926
  "epoch": 17.0,
927
  "learning_rate": 3.6592592592592596e-05,
928
+ "loss": 0.0061,
929
  "step": 1280
930
  },
931
  {
932
  "epoch": 17.0,
933
  "learning_rate": 3.644444444444445e-05,
934
+ "loss": 0.0012,
935
  "step": 1290
936
  },
937
  {
938
  "epoch": 17.01,
939
  "learning_rate": 3.62962962962963e-05,
940
+ "loss": 0.0011,
941
  "step": 1300
942
  },
943
  {
 
949
  {
950
  "epoch": 17.01,
951
  "learning_rate": 3.6e-05,
952
+ "loss": 0.0011,
953
  "step": 1320
954
  },
955
  {
956
  "epoch": 17.01,
957
  "learning_rate": 3.5851851851851854e-05,
958
+ "loss": 0.0018,
959
  "step": 1330
960
  },
961
  {
962
  "epoch": 17.02,
963
  "learning_rate": 3.570370370370371e-05,
964
+ "loss": 0.0028,
965
  "step": 1340
966
  },
967
  {
968
  "epoch": 17.02,
969
  "learning_rate": 3.555555555555556e-05,
970
+ "loss": 0.1707,
971
  "step": 1350
972
  },
973
  {
974
  "epoch": 17.02,
975
+ "eval_accuracy": 0.8918918918918919,
976
+ "eval_loss": 0.5295668840408325,
977
+ "eval_runtime": 2.2321,
978
+ "eval_samples_per_second": 16.576,
979
+ "eval_steps_per_second": 4.48,
980
  "step": 1350
981
  },
982
  {
983
  "epoch": 18.0,
984
  "learning_rate": 3.540740740740741e-05,
985
+ "loss": 0.0011,
986
  "step": 1360
987
  },
988
  {
989
  "epoch": 18.01,
990
  "learning_rate": 3.525925925925926e-05,
991
+ "loss": 0.4316,
992
  "step": 1370
993
  },
994
  {
995
  "epoch": 18.01,
996
  "learning_rate": 3.511111111111111e-05,
997
+ "loss": 0.197,
998
  "step": 1380
999
  },
1000
  {
1001
  "epoch": 18.01,
1002
  "learning_rate": 3.4962962962962965e-05,
1003
+ "loss": 0.0015,
1004
  "step": 1390
1005
  },
1006
  {
1007
  "epoch": 18.01,
1008
  "learning_rate": 3.481481481481482e-05,
1009
+ "loss": 0.0013,
1010
  "step": 1400
1011
  },
1012
  {
1013
  "epoch": 18.02,
1014
  "learning_rate": 3.466666666666667e-05,
1015
+ "loss": 0.0025,
1016
  "step": 1410
1017
  },
1018
  {
1019
  "epoch": 18.02,
1020
  "learning_rate": 3.4518518518518524e-05,
1021
+ "loss": 0.0014,
1022
  "step": 1420
1023
  },
1024
  {
1025
  "epoch": 18.02,
1026
+ "eval_accuracy": 1.0,
1027
+ "eval_loss": 0.02301825024187565,
1028
+ "eval_runtime": 2.2472,
1029
+ "eval_samples_per_second": 16.465,
1030
+ "eval_steps_per_second": 4.45,
1031
  "step": 1425
1032
  },
1033
  {
1034
  "epoch": 19.0,
1035
  "learning_rate": 3.437037037037037e-05,
1036
+ "loss": 0.1095,
1037
  "step": 1430
1038
  },
1039
  {
1040
  "epoch": 19.0,
1041
  "learning_rate": 3.4222222222222224e-05,
1042
+ "loss": 0.0083,
1043
  "step": 1440
1044
  },
1045
  {
1046
  "epoch": 19.01,
1047
  "learning_rate": 3.4074074074074077e-05,
1048
+ "loss": 0.0023,
1049
  "step": 1450
1050
  },
1051
  {
1052
  "epoch": 19.01,
1053
  "learning_rate": 3.392592592592593e-05,
1054
+ "loss": 0.048,
1055
  "step": 1460
1056
  },
1057
  {
1058
  "epoch": 19.01,
1059
  "learning_rate": 3.377777777777778e-05,
1060
+ "loss": 0.1036,
1061
  "step": 1470
1062
  },
1063
  {
1064
  "epoch": 19.01,
1065
  "learning_rate": 3.3629629629629636e-05,
1066
+ "loss": 0.0012,
1067
  "step": 1480
1068
  },
1069
  {
1070
  "epoch": 19.02,
1071
  "learning_rate": 3.348148148148148e-05,
1072
+ "loss": 0.0936,
1073
  "step": 1490
1074
  },
1075
  {
1076
  "epoch": 19.02,
1077
  "learning_rate": 3.3333333333333335e-05,
1078
+ "loss": 0.0011,
1079
  "step": 1500
1080
  },
1081
  {
1082
  "epoch": 19.02,
1083
+ "eval_accuracy": 0.8918918918918919,
1084
+ "eval_loss": 0.5438269972801208,
1085
+ "eval_runtime": 2.2529,
1086
+ "eval_samples_per_second": 16.423,
1087
+ "eval_steps_per_second": 4.439,
1088
  "step": 1500
1089
  },
1090
  {
1091
  "epoch": 20.0,
1092
  "learning_rate": 3.318518518518519e-05,
1093
+ "loss": 0.0011,
1094
  "step": 1510
1095
  },
1096
  {
1097
  "epoch": 20.01,
1098
  "learning_rate": 3.303703703703704e-05,
1099
+ "loss": 0.03,
1100
  "step": 1520
1101
  },
1102
  {
1103
  "epoch": 20.01,
1104
  "learning_rate": 3.2888888888888894e-05,
1105
+ "loss": 0.0014,
1106
  "step": 1530
1107
  },
1108
  {
1109
  "epoch": 20.01,
1110
  "learning_rate": 3.274074074074075e-05,
1111
+ "loss": 0.0013,
1112
  "step": 1540
1113
  },
1114
  {
1115
  "epoch": 20.01,
1116
  "learning_rate": 3.25925925925926e-05,
1117
+ "loss": 0.0015,
1118
  "step": 1550
1119
  },
1120
  {
1121
  "epoch": 20.02,
1122
  "learning_rate": 3.2444444444444446e-05,
1123
+ "loss": 0.0017,
1124
  "step": 1560
1125
  },
1126
  {
1127
  "epoch": 20.02,
1128
  "learning_rate": 3.22962962962963e-05,
1129
+ "loss": 0.0011,
1130
  "step": 1570
1131
  },
1132
  {
1133
  "epoch": 20.02,
1134
+ "eval_accuracy": 0.8378378378378378,
1135
+ "eval_loss": 0.6956642866134644,
1136
+ "eval_runtime": 2.332,
1137
+ "eval_samples_per_second": 15.866,
1138
+ "eval_steps_per_second": 4.288,
1139
  "step": 1575
1140
  },
1141
  {
1142
  "epoch": 21.0,
1143
  "learning_rate": 3.214814814814815e-05,
1144
+ "loss": 0.1479,
1145
  "step": 1580
1146
  },
1147
  {
1148
  "epoch": 21.0,
1149
  "learning_rate": 3.2000000000000005e-05,
1150
+ "loss": 0.001,
1151
  "step": 1590
1152
  },
1153
  {
1154
  "epoch": 21.01,
1155
  "learning_rate": 3.185185185185185e-05,
1156
+ "loss": 0.0009,
1157
  "step": 1600
1158
  },
1159
  {
1160
  "epoch": 21.01,
1161
  "learning_rate": 3.1703703703703705e-05,
1162
+ "loss": 0.001,
1163
  "step": 1610
1164
  },
1165
  {
1166
  "epoch": 21.01,
1167
  "learning_rate": 3.155555555555556e-05,
1168
+ "loss": 0.1083,
1169
  "step": 1620
1170
  },
1171
  {
 
1177
  {
1178
  "epoch": 21.02,
1179
  "learning_rate": 3.1259259259259264e-05,
1180
+ "loss": 0.001,
1181
  "step": 1640
1182
  },
1183
  {
1184
  "epoch": 21.02,
1185
  "learning_rate": 3.111111111111111e-05,
1186
+ "loss": 0.0008,
1187
  "step": 1650
1188
  },
1189
  {
1190
  "epoch": 21.02,
1191
+ "eval_accuracy": 0.918918918918919,
1192
+ "eval_loss": 0.27051687240600586,
1193
+ "eval_runtime": 2.2804,
1194
+ "eval_samples_per_second": 16.225,
1195
+ "eval_steps_per_second": 4.385,
1196
  "step": 1650
1197
  },
1198
  {
1199
  "epoch": 22.0,
1200
  "learning_rate": 3.096296296296296e-05,
1201
+ "loss": 0.0011,
1202
  "step": 1660
1203
  },
1204
  {
1205
  "epoch": 22.01,
1206
  "learning_rate": 3.0814814814814816e-05,
1207
+ "loss": 0.0009,
1208
  "step": 1670
1209
  },
1210
  {
 
1216
  {
1217
  "epoch": 22.01,
1218
  "learning_rate": 3.0518518518518515e-05,
1219
+ "loss": 0.0008,
1220
  "step": 1690
1221
  },
1222
  {
 
1234
  {
1235
  "epoch": 22.02,
1236
  "learning_rate": 3.0074074074074078e-05,
1237
+ "loss": 0.0028,
1238
  "step": 1720
1239
  },
1240
  {
1241
  "epoch": 22.02,
1242
+ "eval_accuracy": 0.972972972972973,
1243
+ "eval_loss": 0.19646592438220978,
1244
+ "eval_runtime": 2.3582,
1245
+ "eval_samples_per_second": 15.69,
1246
+ "eval_steps_per_second": 4.24,
1247
  "step": 1725
1248
  },
1249
  {
1250
  "epoch": 23.0,
1251
  "learning_rate": 2.992592592592593e-05,
1252
+ "loss": 0.0008,
1253
  "step": 1730
1254
  },
1255
  {
1256
  "epoch": 23.0,
1257
  "learning_rate": 2.9777777777777777e-05,
1258
+ "loss": 0.0274,
1259
  "step": 1740
1260
  },
1261
  {
 
1267
  {
1268
  "epoch": 23.01,
1269
  "learning_rate": 2.9481481481481483e-05,
1270
+ "loss": 0.0008,
1271
  "step": 1760
1272
  },
1273
  {
 
1285
  {
1286
  "epoch": 23.02,
1287
  "learning_rate": 2.9037037037037042e-05,
1288
+ "loss": 0.0007,
1289
  "step": 1790
1290
  },
1291
  {
 
1296
  },
1297
  {
1298
  "epoch": 23.02,
1299
+ "eval_accuracy": 0.972972972972973,
1300
+ "eval_loss": 0.1782662570476532,
1301
+ "eval_runtime": 2.3245,
1302
+ "eval_samples_per_second": 15.918,
1303
+ "eval_steps_per_second": 4.302,
1304
  "step": 1800
1305
  },
1306
  {
1307
  "epoch": 24.0,
1308
  "learning_rate": 2.874074074074074e-05,
1309
+ "loss": 0.0008,
1310
  "step": 1810
1311
  },
1312
  {
1313
  "epoch": 24.01,
1314
  "learning_rate": 2.8592592592592594e-05,
1315
+ "loss": 0.0008,
1316
  "step": 1820
1317
  },
1318
  {
 
1324
  {
1325
  "epoch": 24.01,
1326
  "learning_rate": 2.8296296296296297e-05,
1327
+ "loss": 0.0007,
1328
  "step": 1840
1329
  },
1330
  {
1331
  "epoch": 24.01,
1332
  "learning_rate": 2.814814814814815e-05,
1333
+ "loss": 0.0007,
1334
  "step": 1850
1335
  },
1336
  {
1337
  "epoch": 24.02,
1338
  "learning_rate": 2.8000000000000003e-05,
1339
+ "loss": 0.0007,
1340
  "step": 1860
1341
  },
1342
  {
1343
  "epoch": 24.02,
1344
  "learning_rate": 2.7851851851851853e-05,
1345
+ "loss": 0.0008,
1346
  "step": 1870
1347
  },
1348
  {
1349
  "epoch": 24.02,
1350
+ "eval_accuracy": 0.972972972972973,
1351
+ "eval_loss": 0.18091563880443573,
1352
+ "eval_runtime": 2.31,
1353
+ "eval_samples_per_second": 16.017,
1354
+ "eval_steps_per_second": 4.329,
1355
  "step": 1875
1356
  },
1357
  {
1358
  "epoch": 25.0,
1359
  "learning_rate": 2.7703703703703706e-05,
1360
+ "loss": 0.0007,
1361
  "step": 1880
1362
  },
1363
  {
1364
  "epoch": 25.0,
1365
  "learning_rate": 2.7555555555555555e-05,
1366
+ "loss": 0.0007,
1367
  "step": 1890
1368
  },
1369
  {
 
1393
  {
1394
  "epoch": 25.02,
1395
  "learning_rate": 2.6814814814814814e-05,
1396
+ "loss": 0.0006,
1397
  "step": 1940
1398
  },
1399
  {
 
1404
  },
1405
  {
1406
  "epoch": 25.02,
1407
+ "eval_accuracy": 0.972972972972973,
1408
+ "eval_loss": 0.17932352423667908,
1409
+ "eval_runtime": 2.377,
1410
+ "eval_samples_per_second": 15.566,
1411
+ "eval_steps_per_second": 4.207,
1412
  "step": 1950
1413
  },
1414
  {
 
1438
  {
1439
  "epoch": 26.01,
1440
  "learning_rate": 2.5925925925925925e-05,
1441
+ "loss": 0.0051,
1442
  "step": 2000
1443
  },
1444
  {
1445
  "epoch": 26.02,
1446
  "learning_rate": 2.5777777777777778e-05,
1447
+ "loss": 0.0007,
1448
  "step": 2010
1449
  },
1450
  {
1451
  "epoch": 26.02,
1452
  "learning_rate": 2.562962962962963e-05,
1453
+ "loss": 0.0009,
1454
  "step": 2020
1455
  },
1456
  {
1457
  "epoch": 26.02,
1458
+ "eval_accuracy": 0.972972972972973,
1459
+ "eval_loss": 0.09698139131069183,
1460
+ "eval_runtime": 2.2968,
1461
+ "eval_samples_per_second": 16.109,
1462
+ "eval_steps_per_second": 4.354,
1463
  "step": 2025
1464
  },
1465
  {
1466
  "epoch": 27.0,
1467
  "learning_rate": 2.5481481481481484e-05,
1468
+ "loss": 0.0008,
1469
  "step": 2030
1470
  },
1471
  {
1472
  "epoch": 27.0,
1473
  "learning_rate": 2.5333333333333337e-05,
1474
+ "loss": 0.0006,
1475
  "step": 2040
1476
  },
1477
  {
 
1495
  {
1496
  "epoch": 27.01,
1497
  "learning_rate": 2.4740740740740742e-05,
1498
+ "loss": 0.0006,
1499
  "step": 2080
1500
  },
1501
  {
1502
  "epoch": 27.02,
1503
  "learning_rate": 2.4592592592592595e-05,
1504
+ "loss": 0.0006,
1505
  "step": 2090
1506
  },
1507
  {
 
1512
  },
1513
  {
1514
  "epoch": 27.02,
1515
+ "eval_accuracy": 0.9459459459459459,
1516
+ "eval_loss": 0.24828802049160004,
1517
+ "eval_runtime": 2.2708,
1518
+ "eval_samples_per_second": 16.294,
1519
+ "eval_steps_per_second": 4.404,
1520
  "step": 2100
1521
  },
1522
  {
1523
  "epoch": 28.0,
1524
  "learning_rate": 2.4296296296296298e-05,
1525
+ "loss": 0.0007,
1526
  "step": 2110
1527
  },
1528
  {
 
1546
  {
1547
  "epoch": 28.01,
1548
  "learning_rate": 2.3703703703703707e-05,
1549
+ "loss": 0.0008,
1550
  "step": 2150
1551
  },
1552
  {
1553
  "epoch": 28.02,
1554
  "learning_rate": 2.3555555555555556e-05,
1555
+ "loss": 0.0006,
1556
  "step": 2160
1557
  },
1558
  {
1559
  "epoch": 28.02,
1560
  "learning_rate": 2.340740740740741e-05,
1561
+ "loss": 0.0006,
1562
  "step": 2170
1563
  },
1564
  {
1565
  "epoch": 28.02,
1566
+ "eval_accuracy": 0.9459459459459459,
1567
+ "eval_loss": 0.3035326898097992,
1568
+ "eval_runtime": 2.2477,
1569
+ "eval_samples_per_second": 16.461,
1570
+ "eval_steps_per_second": 4.449,
1571
  "step": 2175
1572
  },
1573
  {
 
1579
  {
1580
  "epoch": 29.0,
1581
  "learning_rate": 2.3111111111111112e-05,
1582
+ "loss": 0.0005,
1583
  "step": 2190
1584
  },
1585
  {
1586
  "epoch": 29.01,
1587
  "learning_rate": 2.2962962962962965e-05,
1588
+ "loss": 0.0008,
1589
  "step": 2200
1590
  },
1591
  {
 
1609
  {
1610
  "epoch": 29.02,
1611
  "learning_rate": 2.2370370370370374e-05,
1612
+ "loss": 0.0006,
1613
  "step": 2240
1614
  },
1615
  {
1616
  "epoch": 29.02,
1617
  "learning_rate": 2.2222222222222223e-05,
1618
+ "loss": 0.0006,
1619
  "step": 2250
1620
  },
1621
  {
1622
  "epoch": 29.02,
1623
+ "eval_accuracy": 0.9459459459459459,
1624
+ "eval_loss": 0.2314397692680359,
1625
+ "eval_runtime": 2.3203,
1626
+ "eval_samples_per_second": 15.946,
1627
+ "eval_steps_per_second": 4.31,
1628
  "step": 2250
1629
  },
1630
  {
 
1642
  {
1643
  "epoch": 30.01,
1644
  "learning_rate": 2.177777777777778e-05,
1645
+ "loss": 0.0005,
1646
  "step": 2280
1647
  },
1648
  {
1649
  "epoch": 30.01,
1650
  "learning_rate": 2.162962962962963e-05,
1651
+ "loss": 0.0006,
1652
  "step": 2290
1653
  },
1654
  {
1655
  "epoch": 30.01,
1656
  "learning_rate": 2.148148148148148e-05,
1657
+ "loss": 0.0005,
1658
  "step": 2300
1659
  },
1660
  {
 
1666
  {
1667
  "epoch": 30.02,
1668
  "learning_rate": 2.1185185185185184e-05,
1669
+ "loss": 0.0005,
1670
  "step": 2320
1671
  },
1672
  {
1673
  "epoch": 30.02,
1674
+ "eval_accuracy": 0.9459459459459459,
1675
+ "eval_loss": 0.19059520959854126,
1676
+ "eval_runtime": 2.2511,
1677
+ "eval_samples_per_second": 16.436,
1678
+ "eval_steps_per_second": 4.442,
1679
  "step": 2325
1680
  },
1681
  {
1682
  "epoch": 31.0,
1683
  "learning_rate": 2.1037037037037037e-05,
1684
+ "loss": 0.0005,
1685
  "step": 2330
1686
  },
1687
  {
1688
  "epoch": 31.0,
1689
  "learning_rate": 2.088888888888889e-05,
1690
+ "loss": 0.0005,
1691
  "step": 2340
1692
  },
1693
  {
1694
  "epoch": 31.01,
1695
  "learning_rate": 2.074074074074074e-05,
1696
+ "loss": 0.0005,
1697
  "step": 2350
1698
  },
1699
  {
1700
  "epoch": 31.01,
1701
  "learning_rate": 2.0592592592592593e-05,
1702
+ "loss": 0.0005,
1703
  "step": 2360
1704
  },
1705
  {
1706
  "epoch": 31.01,
1707
  "learning_rate": 2.0444444444444446e-05,
1708
+ "loss": 0.0011,
1709
  "step": 2370
1710
  },
1711
  {
1712
  "epoch": 31.01,
1713
  "learning_rate": 2.0296296296296296e-05,
1714
+ "loss": 0.1101,
1715
  "step": 2380
1716
  },
1717
  {
 
1723
  {
1724
  "epoch": 31.02,
1725
  "learning_rate": 2e-05,
1726
+ "loss": 0.0005,
1727
  "step": 2400
1728
  },
1729
  {
1730
  "epoch": 31.02,
1731
+ "eval_accuracy": 0.972972972972973,
1732
+ "eval_loss": 0.08136174082756042,
1733
+ "eval_runtime": 2.2842,
1734
+ "eval_samples_per_second": 16.198,
1735
+ "eval_steps_per_second": 4.378,
1736
  "step": 2400
1737
  },
1738
  {
 
1750
  {
1751
  "epoch": 32.01,
1752
  "learning_rate": 1.9555555555555557e-05,
1753
+ "loss": 0.0005,
1754
  "step": 2430
1755
  },
1756
  {
 
1779
  },
1780
  {
1781
  "epoch": 32.02,
1782
+ "eval_accuracy": 0.9459459459459459,
1783
+ "eval_loss": 0.08805122971534729,
1784
+ "eval_runtime": 2.3973,
1785
+ "eval_samples_per_second": 15.434,
1786
+ "eval_steps_per_second": 4.171,
1787
  "step": 2475
1788
  },
1789
  {
 
1807
  {
1808
  "epoch": 33.01,
1809
  "learning_rate": 1.837037037037037e-05,
1810
+ "loss": 0.0005,
1811
  "step": 2510
1812
  },
1813
  {
 
1836
  },
1837
  {
1838
  "epoch": 33.02,
1839
+ "eval_accuracy": 0.9459459459459459,
1840
+ "eval_loss": 0.07978475093841553,
1841
+ "eval_runtime": 2.302,
1842
+ "eval_samples_per_second": 16.073,
1843
+ "eval_steps_per_second": 4.344,
1844
  "step": 2550
1845
  },
1846
  {
1847
  "epoch": 34.0,
1848
  "learning_rate": 1.762962962962963e-05,
1849
+ "loss": 0.0004,
1850
  "step": 2560
1851
  },
1852
  {
 
1864
  {
1865
  "epoch": 34.01,
1866
  "learning_rate": 1.7185185185185185e-05,
1867
+ "loss": 0.0005,
1868
  "step": 2590
1869
  },
1870
  {
 
1887
  },
1888
  {
1889
  "epoch": 34.02,
1890
+ "eval_accuracy": 0.9459459459459459,
1891
+ "eval_loss": 0.07055646181106567,
1892
+ "eval_runtime": 2.2924,
1893
+ "eval_samples_per_second": 16.14,
1894
+ "eval_steps_per_second": 4.362,
1895
  "step": 2625
1896
  },
1897
  {
1898
  "epoch": 35.0,
1899
  "learning_rate": 1.6592592592592594e-05,
1900
+ "loss": 0.0004,
1901
  "step": 2630
1902
  },
1903
  {
1904
  "epoch": 35.0,
1905
  "learning_rate": 1.6444444444444447e-05,
1906
+ "loss": 0.0005,
1907
  "step": 2640
1908
  },
1909
  {
 
1921
  {
1922
  "epoch": 35.01,
1923
  "learning_rate": 1.6000000000000003e-05,
1924
+ "loss": 0.0167,
1925
  "step": 2670
1926
  },
1927
  {
 
1933
  {
1934
  "epoch": 35.02,
1935
  "learning_rate": 1.5703703703703705e-05,
1936
+ "loss": 0.0858,
1937
  "step": 2690
1938
  },
1939
  {
 
1944
  },
1945
  {
1946
  "epoch": 35.02,
1947
+ "eval_accuracy": 0.9459459459459459,
1948
+ "eval_loss": 0.09486611932516098,
1949
+ "eval_runtime": 2.2709,
1950
+ "eval_samples_per_second": 16.293,
1951
+ "eval_steps_per_second": 4.404,
1952
  "step": 2700
1953
  },
1954
  {
1955
  "epoch": 36.0,
1956
  "learning_rate": 1.5407407407407408e-05,
1957
+ "loss": 0.0005,
1958
  "step": 2710
1959
  },
1960
  {
1961
  "epoch": 36.01,
1962
  "learning_rate": 1.5259259259259258e-05,
1963
+ "loss": 0.0005,
1964
  "step": 2720
1965
  },
1966
  {
1967
  "epoch": 36.01,
1968
  "learning_rate": 1.5111111111111112e-05,
1969
+ "loss": 0.0005,
1970
  "step": 2730
1971
  },
1972
  {
1973
  "epoch": 36.01,
1974
  "learning_rate": 1.4962962962962965e-05,
1975
+ "loss": 0.0005,
1976
  "step": 2740
1977
  },
1978
  {
 
1995
  },
1996
  {
1997
  "epoch": 36.02,
1998
+ "eval_accuracy": 0.9459459459459459,
1999
+ "eval_loss": 0.08677444607019424,
2000
+ "eval_runtime": 2.2061,
2001
+ "eval_samples_per_second": 16.772,
2002
+ "eval_steps_per_second": 4.533,
2003
  "step": 2775
2004
  },
2005
  {
 
2011
  {
2012
  "epoch": 37.0,
2013
  "learning_rate": 1.4222222222222224e-05,
2014
+ "loss": 0.0005,
2015
  "step": 2790
2016
  },
2017
  {
 
2029
  {
2030
  "epoch": 37.01,
2031
  "learning_rate": 1.3777777777777778e-05,
2032
+ "loss": 0.0005,
2033
  "step": 2820
2034
  },
2035
  {
2036
  "epoch": 37.01,
2037
  "learning_rate": 1.362962962962963e-05,
2038
+ "loss": 0.0006,
2039
  "step": 2830
2040
  },
2041
  {
2042
  "epoch": 37.02,
2043
  "learning_rate": 1.348148148148148e-05,
2044
+ "loss": 0.0005,
2045
  "step": 2840
2046
  },
2047
  {
 
2052
  },
2053
  {
2054
  "epoch": 37.02,
2055
+ "eval_accuracy": 0.972972972972973,
2056
+ "eval_loss": 0.059545956552028656,
2057
+ "eval_runtime": 2.2376,
2058
+ "eval_samples_per_second": 16.536,
2059
+ "eval_steps_per_second": 4.469,
2060
  "step": 2850
2061
  },
2062
  {
 
2068
  {
2069
  "epoch": 38.01,
2070
  "learning_rate": 1.3037037037037036e-05,
2071
+ "loss": 0.1455,
2072
  "step": 2870
2073
  },
2074
  {
 
2086
  {
2087
  "epoch": 38.01,
2088
  "learning_rate": 1.2592592592592592e-05,
2089
+ "loss": 0.0005,
2090
  "step": 2900
2091
  },
2092
  {
 
2098
  {
2099
  "epoch": 38.02,
2100
  "learning_rate": 1.2296296296296298e-05,
2101
+ "loss": 0.0005,
2102
  "step": 2920
2103
  },
2104
  {
2105
  "epoch": 38.02,
2106
+ "eval_accuracy": 0.972972972972973,
2107
+ "eval_loss": 0.13422098755836487,
2108
+ "eval_runtime": 2.2338,
2109
+ "eval_samples_per_second": 16.563,
2110
+ "eval_steps_per_second": 4.477,
2111
  "step": 2925
2112
  },
2113
  {
 
2131
  {
2132
  "epoch": 39.01,
2133
  "learning_rate": 1.1703703703703705e-05,
2134
+ "loss": 0.0005,
2135
  "step": 2960
2136
  },
2137
  {
 
2149
  {
2150
  "epoch": 39.02,
2151
  "learning_rate": 1.125925925925926e-05,
2152
+ "loss": 0.0006,
2153
  "step": 2990
2154
  },
2155
  {
 
2160
  },
2161
  {
2162
  "epoch": 39.02,
2163
+ "eval_accuracy": 0.972972972972973,
2164
+ "eval_loss": 0.15938998758792877,
2165
+ "eval_runtime": 2.243,
2166
+ "eval_samples_per_second": 16.496,
2167
+ "eval_steps_per_second": 4.458,
2168
  "step": 3000
2169
  },
2170
  {
 
2211
  },
2212
  {
2213
  "epoch": 40.02,
2214
+ "eval_accuracy": 0.972972972972973,
2215
+ "eval_loss": 0.14878199994564056,
2216
+ "eval_runtime": 2.2635,
2217
+ "eval_samples_per_second": 16.346,
2218
+ "eval_steps_per_second": 4.418,
2219
  "step": 3075
2220
  },
2221
  {
 
2268
  },
2269
  {
2270
  "epoch": 41.02,
2271
+ "eval_accuracy": 0.972972972972973,
2272
+ "eval_loss": 0.1433899849653244,
2273
+ "eval_runtime": 2.4808,
2274
+ "eval_samples_per_second": 14.914,
2275
+ "eval_steps_per_second": 4.031,
2276
  "step": 3150
2277
  },
2278
  {
 
2319
  },
2320
  {
2321
  "epoch": 42.02,
2322
+ "eval_accuracy": 0.972972972972973,
2323
+ "eval_loss": 0.11490071564912796,
2324
+ "eval_runtime": 2.2713,
2325
+ "eval_samples_per_second": 16.29,
2326
+ "eval_steps_per_second": 4.403,
2327
  "step": 3225
2328
  },
2329
  {
 
2376
  },
2377
  {
2378
  "epoch": 43.02,
2379
+ "eval_accuracy": 0.972972972972973,
2380
+ "eval_loss": 0.11192985624074936,
2381
+ "eval_runtime": 2.2722,
2382
+ "eval_samples_per_second": 16.284,
2383
+ "eval_steps_per_second": 4.401,
2384
  "step": 3300
2385
  },
2386
  {
 
2427
  },
2428
  {
2429
  "epoch": 44.02,
2430
+ "eval_accuracy": 0.972972972972973,
2431
+ "eval_loss": 0.11190944164991379,
2432
+ "eval_runtime": 2.2053,
2433
+ "eval_samples_per_second": 16.778,
2434
+ "eval_steps_per_second": 4.535,
2435
  "step": 3375
2436
  },
2437
  {
 
2467
  {
2468
  "epoch": 45.01,
2469
  "learning_rate": 4.740740740740741e-06,
2470
+ "loss": 0.0004,
2471
  "step": 3430
2472
  },
2473
  {
2474
  "epoch": 45.02,
2475
  "learning_rate": 4.592592592592593e-06,
2476
+ "loss": 0.0004,
2477
  "step": 3440
2478
  },
2479
  {
 
2484
  },
2485
  {
2486
  "epoch": 45.02,
2487
+ "eval_accuracy": 0.972972972972973,
2488
+ "eval_loss": 0.10959651321172714,
2489
+ "eval_runtime": 2.3608,
2490
+ "eval_samples_per_second": 15.672,
2491
+ "eval_steps_per_second": 4.236,
2492
  "step": 3450
2493
  },
2494
  {
 
2535
  },
2536
  {
2537
  "epoch": 46.02,
2538
+ "eval_accuracy": 0.972972972972973,
2539
+ "eval_loss": 0.1096063181757927,
2540
+ "eval_runtime": 2.2302,
2541
+ "eval_samples_per_second": 16.591,
2542
+ "eval_steps_per_second": 4.484,
2543
  "step": 3525
2544
  },
2545
  {
 
2592
  },
2593
  {
2594
  "epoch": 47.02,
2595
+ "eval_accuracy": 0.972972972972973,
2596
+ "eval_loss": 0.10854890942573547,
2597
+ "eval_runtime": 2.2666,
2598
+ "eval_samples_per_second": 16.324,
2599
+ "eval_steps_per_second": 4.412,
2600
  "step": 3600
2601
  },
2602
  {
2603
  "epoch": 48.0,
2604
  "learning_rate": 2.0740740740740742e-06,
2605
+ "loss": 0.0004,
2606
  "step": 3610
2607
  },
2608
  {
 
2620
  {
2621
  "epoch": 48.01,
2622
  "learning_rate": 1.6296296296296295e-06,
2623
+ "loss": 0.0005,
2624
  "step": 3640
2625
  },
2626
  {
 
2643
  },
2644
  {
2645
  "epoch": 48.02,
2646
+ "eval_accuracy": 0.972972972972973,
2647
+ "eval_loss": 0.10318177938461304,
2648
+ "eval_runtime": 2.2948,
2649
+ "eval_samples_per_second": 16.124,
2650
+ "eval_steps_per_second": 4.358,
2651
  "step": 3675
2652
  },
2653
  {
 
2683
  {
2684
  "epoch": 49.01,
2685
  "learning_rate": 2.962962962962963e-07,
2686
+ "loss": 0.0004,
2687
  "step": 3730
2688
  },
2689
  {
2690
  "epoch": 49.02,
2691
  "learning_rate": 1.4814814814814815e-07,
2692
+ "loss": 0.0004,
2693
  "step": 3740
2694
  },
2695
  {
 
2700
  },
2701
  {
2702
  "epoch": 49.02,
2703
+ "eval_accuracy": 0.972972972972973,
2704
+ "eval_loss": 0.10637960582971573,
2705
+ "eval_runtime": 2.2939,
2706
+ "eval_samples_per_second": 16.13,
2707
+ "eval_steps_per_second": 4.359,
2708
  "step": 3750
2709
  },
2710
  {
2711
  "epoch": 49.02,
2712
  "step": 3750,
2713
  "total_flos": 1.86923023515648e+19,
2714
+ "train_loss": 0.1699262268283715,
2715
+ "train_runtime": 3668.7053,
2716
+ "train_samples_per_second": 4.089,
2717
+ "train_steps_per_second": 1.022
2718
  },
2719
  {
2720
  "epoch": 49.02,
2721
+ "eval_accuracy": 0.9431818181818182,
2722
+ "eval_loss": 0.2483096718788147,
2723
+ "eval_runtime": 8.1578,
2724
+ "eval_samples_per_second": 10.787,
2725
+ "eval_steps_per_second": 2.697,
2726
  "step": 3750
2727
  },
2728
  {
2729
  "epoch": 49.02,
2730
+ "eval_accuracy": 0.9431818181818182,
2731
+ "eval_loss": 0.2483096718788147,
2732
+ "eval_runtime": 5.3467,
2733
+ "eval_samples_per_second": 16.459,
2734
+ "eval_steps_per_second": 4.115,
2735
  "step": 3750
2736
  }
2737
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da42640f7bfa8b57722d5b238ddd6b901e4561a7a6e5544a6da12e1bbc949957
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ff6269f1807faf7ac2be9aadf648f57f17c5fdf647c535d3f6e8d478b99fe7
3
  size 3439