younggi commited on
Commit
503a110
1 Parent(s): eb67f04

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 49.02,
3
- "eval_accuracy": 0.8850574712643678,
4
- "eval_loss": 0.38414719700813293,
5
- "eval_runtime": 6.1302,
6
- "eval_samples_per_second": 14.192,
7
- "eval_steps_per_second": 3.589
8
  }
 
1
  {
2
  "epoch": 49.02,
3
+ "eval_accuracy": 0.9078947368421053,
4
+ "eval_loss": 0.41711878776550293,
5
+ "eval_runtime": 5.3059,
6
+ "eval_samples_per_second": 14.324,
7
+ "eval_steps_per_second": 3.581
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7766ef368aacab55697adbd46e013c9f7c6750ab26d73d882c5ed619162c21f
3
- size 345004552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b806e98fc7300981b861a34391efa5a6fb36c3df4d06b4bc2682ff6735535ef0
3
+ size 345004517
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 49.02,
3
- "eval_accuracy": 0.8850574712643678,
4
- "eval_loss": 0.38414719700813293,
5
- "eval_runtime": 6.1302,
6
- "eval_samples_per_second": 14.192,
7
- "eval_steps_per_second": 3.589
8
  }
 
1
  {
2
  "epoch": 49.02,
3
+ "eval_accuracy": 0.9078947368421053,
4
+ "eval_loss": 0.41711878776550293,
5
+ "eval_runtime": 5.3059,
6
+ "eval_samples_per_second": 14.324,
7
+ "eval_steps_per_second": 3.581
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.972972972972973,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-375",
4
  "epoch": 49.02,
5
  "global_step": 3750,
6
  "is_hyper_param_search": false,
@@ -10,559 +10,559 @@
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 1.3333333333333334e-06,
13
- "loss": 2.4049,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.01,
18
  "learning_rate": 2.666666666666667e-06,
19
- "loss": 2.3782,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.01,
24
  "learning_rate": 4.000000000000001e-06,
25
- "loss": 2.3345,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.01,
30
  "learning_rate": 5.333333333333334e-06,
31
- "loss": 2.3762,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.01,
36
  "learning_rate": 6.666666666666667e-06,
37
- "loss": 2.3277,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.02,
42
  "learning_rate": 8.000000000000001e-06,
43
- "loss": 2.2711,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.02,
48
  "learning_rate": 9.333333333333334e-06,
49
- "loss": 2.369,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.02,
54
- "eval_accuracy": 0.2972972972972973,
55
- "eval_loss": 2.2216224670410156,
56
- "eval_runtime": 2.6845,
57
- "eval_samples_per_second": 13.783,
58
- "eval_steps_per_second": 3.725,
59
  "step": 75
60
  },
61
  {
62
  "epoch": 1.0,
63
  "learning_rate": 1.0666666666666667e-05,
64
- "loss": 2.2595,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 1.0,
69
  "learning_rate": 1.2e-05,
70
- "loss": 2.2602,
71
  "step": 90
72
  },
73
  {
74
  "epoch": 1.01,
75
  "learning_rate": 1.3333333333333333e-05,
76
- "loss": 2.1993,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.01,
81
  "learning_rate": 1.4666666666666668e-05,
82
- "loss": 2.2198,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.01,
87
  "learning_rate": 1.6000000000000003e-05,
88
- "loss": 2.1327,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 1.01,
93
  "learning_rate": 1.7333333333333336e-05,
94
- "loss": 2.0154,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.02,
99
  "learning_rate": 1.866666666666667e-05,
100
- "loss": 2.0248,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.02,
105
  "learning_rate": 2e-05,
106
- "loss": 1.8283,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.02,
111
- "eval_accuracy": 0.4864864864864865,
112
- "eval_loss": 1.7583507299423218,
113
- "eval_runtime": 2.7247,
114
- "eval_samples_per_second": 13.58,
115
- "eval_steps_per_second": 3.67,
116
  "step": 150
117
  },
118
  {
119
  "epoch": 2.0,
120
  "learning_rate": 2.1333333333333335e-05,
121
- "loss": 1.5075,
122
  "step": 160
123
  },
124
  {
125
  "epoch": 2.01,
126
  "learning_rate": 2.2666666666666668e-05,
127
- "loss": 1.4104,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 2.01,
132
  "learning_rate": 2.4e-05,
133
- "loss": 1.1892,
134
  "step": 180
135
  },
136
  {
137
  "epoch": 2.01,
138
  "learning_rate": 2.5333333333333337e-05,
139
- "loss": 0.9929,
140
  "step": 190
141
  },
142
  {
143
  "epoch": 2.01,
144
  "learning_rate": 2.6666666666666667e-05,
145
- "loss": 0.8859,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 2.02,
150
  "learning_rate": 2.8000000000000003e-05,
151
- "loss": 0.7763,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 2.02,
156
  "learning_rate": 2.9333333333333336e-05,
157
- "loss": 0.8729,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 2.02,
162
- "eval_accuracy": 0.7027027027027027,
163
- "eval_loss": 1.0192049741744995,
164
- "eval_runtime": 2.6893,
165
- "eval_samples_per_second": 13.758,
166
- "eval_steps_per_second": 3.718,
167
  "step": 225
168
  },
169
  {
170
  "epoch": 3.0,
171
  "learning_rate": 3.066666666666667e-05,
172
- "loss": 0.7605,
173
  "step": 230
174
  },
175
  {
176
  "epoch": 3.0,
177
  "learning_rate": 3.2000000000000005e-05,
178
- "loss": 0.709,
179
  "step": 240
180
  },
181
  {
182
  "epoch": 3.01,
183
  "learning_rate": 3.3333333333333335e-05,
184
- "loss": 0.4213,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 3.01,
189
  "learning_rate": 3.466666666666667e-05,
190
- "loss": 0.6716,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 3.01,
195
  "learning_rate": 3.6e-05,
196
- "loss": 0.7687,
197
  "step": 270
198
  },
199
  {
200
  "epoch": 3.01,
201
  "learning_rate": 3.733333333333334e-05,
202
- "loss": 0.4992,
203
  "step": 280
204
  },
205
  {
206
  "epoch": 3.02,
207
  "learning_rate": 3.866666666666667e-05,
208
- "loss": 0.5896,
209
  "step": 290
210
  },
211
  {
212
  "epoch": 3.02,
213
  "learning_rate": 4e-05,
214
- "loss": 0.4077,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 3.02,
219
- "eval_accuracy": 0.8378378378378378,
220
- "eval_loss": 0.48492956161499023,
221
- "eval_runtime": 2.6873,
222
- "eval_samples_per_second": 13.768,
223
- "eval_steps_per_second": 3.721,
224
  "step": 300
225
  },
226
  {
227
  "epoch": 4.0,
228
  "learning_rate": 4.133333333333333e-05,
229
- "loss": 0.2703,
230
  "step": 310
231
  },
232
  {
233
  "epoch": 4.01,
234
  "learning_rate": 4.266666666666667e-05,
235
- "loss": 0.3431,
236
  "step": 320
237
  },
238
  {
239
  "epoch": 4.01,
240
  "learning_rate": 4.4000000000000006e-05,
241
- "loss": 0.4284,
242
  "step": 330
243
  },
244
  {
245
  "epoch": 4.01,
246
  "learning_rate": 4.5333333333333335e-05,
247
- "loss": 0.5786,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 4.01,
252
  "learning_rate": 4.666666666666667e-05,
253
- "loss": 0.5347,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 4.02,
258
  "learning_rate": 4.8e-05,
259
- "loss": 0.2638,
260
  "step": 360
261
  },
262
  {
263
  "epoch": 4.02,
264
  "learning_rate": 4.933333333333334e-05,
265
- "loss": 0.3742,
266
  "step": 370
267
  },
268
  {
269
  "epoch": 4.02,
270
- "eval_accuracy": 0.972972972972973,
271
- "eval_loss": 0.1343977451324463,
272
- "eval_runtime": 2.7118,
273
- "eval_samples_per_second": 13.644,
274
- "eval_steps_per_second": 3.688,
275
  "step": 375
276
  },
277
  {
278
  "epoch": 5.0,
279
  "learning_rate": 4.9925925925925926e-05,
280
- "loss": 0.2036,
281
  "step": 380
282
  },
283
  {
284
  "epoch": 5.0,
285
  "learning_rate": 4.977777777777778e-05,
286
- "loss": 0.1899,
287
  "step": 390
288
  },
289
  {
290
  "epoch": 5.01,
291
  "learning_rate": 4.962962962962963e-05,
292
- "loss": 0.3761,
293
  "step": 400
294
  },
295
  {
296
  "epoch": 5.01,
297
  "learning_rate": 4.9481481481481485e-05,
298
- "loss": 0.3704,
299
  "step": 410
300
  },
301
  {
302
  "epoch": 5.01,
303
  "learning_rate": 4.933333333333334e-05,
304
- "loss": 0.2038,
305
  "step": 420
306
  },
307
  {
308
  "epoch": 5.01,
309
  "learning_rate": 4.918518518518519e-05,
310
- "loss": 0.4751,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 5.02,
315
  "learning_rate": 4.903703703703704e-05,
316
- "loss": 0.1951,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 5.02,
321
  "learning_rate": 4.888888888888889e-05,
322
- "loss": 0.094,
323
  "step": 450
324
  },
325
  {
326
  "epoch": 5.02,
327
- "eval_accuracy": 0.8918918918918919,
328
- "eval_loss": 0.24485082924365997,
329
- "eval_runtime": 2.7174,
330
- "eval_samples_per_second": 13.616,
331
- "eval_steps_per_second": 3.68,
332
  "step": 450
333
  },
334
  {
335
  "epoch": 6.0,
336
  "learning_rate": 4.874074074074074e-05,
337
- "loss": 0.1336,
338
  "step": 460
339
  },
340
  {
341
  "epoch": 6.01,
342
  "learning_rate": 4.8592592592592596e-05,
343
- "loss": 0.0813,
344
  "step": 470
345
  },
346
  {
347
  "epoch": 6.01,
348
  "learning_rate": 4.844444444444445e-05,
349
- "loss": 0.1722,
350
  "step": 480
351
  },
352
  {
353
  "epoch": 6.01,
354
  "learning_rate": 4.82962962962963e-05,
355
- "loss": 0.5579,
356
  "step": 490
357
  },
358
  {
359
  "epoch": 6.01,
360
  "learning_rate": 4.814814814814815e-05,
361
- "loss": 0.3787,
362
  "step": 500
363
  },
364
  {
365
  "epoch": 6.02,
366
  "learning_rate": 4.8e-05,
367
- "loss": 0.5287,
368
  "step": 510
369
  },
370
  {
371
  "epoch": 6.02,
372
  "learning_rate": 4.7851851851851854e-05,
373
- "loss": 0.1005,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 6.02,
378
- "eval_accuracy": 0.7837837837837838,
379
- "eval_loss": 1.0794074535369873,
380
- "eval_runtime": 2.7101,
381
- "eval_samples_per_second": 13.653,
382
- "eval_steps_per_second": 3.69,
383
  "step": 525
384
  },
385
  {
386
  "epoch": 7.0,
387
  "learning_rate": 4.770370370370371e-05,
388
- "loss": 0.5154,
389
  "step": 530
390
  },
391
  {
392
  "epoch": 7.0,
393
  "learning_rate": 4.755555555555556e-05,
394
- "loss": 0.1985,
395
  "step": 540
396
  },
397
  {
398
  "epoch": 7.01,
399
  "learning_rate": 4.740740740740741e-05,
400
- "loss": 0.2665,
401
  "step": 550
402
  },
403
  {
404
  "epoch": 7.01,
405
  "learning_rate": 4.7259259259259266e-05,
406
- "loss": 0.0321,
407
  "step": 560
408
  },
409
  {
410
  "epoch": 7.01,
411
  "learning_rate": 4.711111111111111e-05,
412
- "loss": 0.159,
413
  "step": 570
414
  },
415
  {
416
  "epoch": 7.01,
417
  "learning_rate": 4.6962962962962966e-05,
418
- "loss": 0.3711,
419
  "step": 580
420
  },
421
  {
422
  "epoch": 7.02,
423
  "learning_rate": 4.681481481481482e-05,
424
- "loss": 0.0089,
425
  "step": 590
426
  },
427
  {
428
  "epoch": 7.02,
429
  "learning_rate": 4.666666666666667e-05,
430
- "loss": 0.0053,
431
  "step": 600
432
  },
433
  {
434
  "epoch": 7.02,
435
- "eval_accuracy": 0.9459459459459459,
436
- "eval_loss": 0.23636393249034882,
437
- "eval_runtime": 2.6811,
438
- "eval_samples_per_second": 13.8,
439
- "eval_steps_per_second": 3.73,
440
  "step": 600
441
  },
442
  {
443
  "epoch": 8.0,
444
  "learning_rate": 4.6518518518518525e-05,
445
- "loss": 0.0056,
446
  "step": 610
447
  },
448
  {
449
  "epoch": 8.01,
450
  "learning_rate": 4.637037037037038e-05,
451
- "loss": 0.3093,
452
  "step": 620
453
  },
454
  {
455
  "epoch": 8.01,
456
  "learning_rate": 4.6222222222222224e-05,
457
- "loss": 0.3216,
458
  "step": 630
459
  },
460
  {
461
  "epoch": 8.01,
462
  "learning_rate": 4.607407407407408e-05,
463
- "loss": 0.1304,
464
  "step": 640
465
  },
466
  {
467
  "epoch": 8.01,
468
  "learning_rate": 4.592592592592593e-05,
469
- "loss": 0.148,
470
  "step": 650
471
  },
472
  {
473
  "epoch": 8.02,
474
  "learning_rate": 4.577777777777778e-05,
475
- "loss": 0.132,
476
  "step": 660
477
  },
478
  {
479
  "epoch": 8.02,
480
  "learning_rate": 4.5629629629629636e-05,
481
- "loss": 0.0807,
482
  "step": 670
483
  },
484
  {
485
  "epoch": 8.02,
486
- "eval_accuracy": 0.8378378378378378,
487
- "eval_loss": 0.6658951640129089,
488
- "eval_runtime": 2.6902,
489
- "eval_samples_per_second": 13.754,
490
- "eval_steps_per_second": 3.717,
491
  "step": 675
492
  },
493
  {
494
  "epoch": 9.0,
495
  "learning_rate": 4.548148148148149e-05,
496
- "loss": 0.0775,
497
  "step": 680
498
  },
499
  {
500
  "epoch": 9.0,
501
  "learning_rate": 4.5333333333333335e-05,
502
- "loss": 0.1351,
503
  "step": 690
504
  },
505
  {
506
  "epoch": 9.01,
507
  "learning_rate": 4.518518518518519e-05,
508
- "loss": 0.1521,
509
  "step": 700
510
  },
511
  {
512
  "epoch": 9.01,
513
  "learning_rate": 4.503703703703704e-05,
514
- "loss": 0.4525,
515
  "step": 710
516
  },
517
  {
518
  "epoch": 9.01,
519
  "learning_rate": 4.4888888888888894e-05,
520
- "loss": 0.4013,
521
  "step": 720
522
  },
523
  {
524
  "epoch": 9.01,
525
  "learning_rate": 4.474074074074075e-05,
526
- "loss": 0.0261,
527
  "step": 730
528
  },
529
  {
530
  "epoch": 9.02,
531
  "learning_rate": 4.4592592592592594e-05,
532
- "loss": 0.0091,
533
  "step": 740
534
  },
535
  {
536
  "epoch": 9.02,
537
  "learning_rate": 4.4444444444444447e-05,
538
- "loss": 0.0031,
539
  "step": 750
540
  },
541
  {
542
  "epoch": 9.02,
543
- "eval_accuracy": 0.918918918918919,
544
- "eval_loss": 0.4496133029460907,
545
- "eval_runtime": 2.7338,
546
- "eval_samples_per_second": 13.534,
547
- "eval_steps_per_second": 3.658,
548
  "step": 750
549
  },
550
  {
551
  "epoch": 10.0,
552
  "learning_rate": 4.42962962962963e-05,
553
- "loss": 0.0035,
554
  "step": 760
555
  },
556
  {
557
  "epoch": 10.01,
558
  "learning_rate": 4.414814814814815e-05,
559
- "loss": 0.0199,
560
  "step": 770
561
  },
562
  {
563
  "epoch": 10.01,
564
  "learning_rate": 4.4000000000000006e-05,
565
- "loss": 0.0028,
566
  "step": 780
567
  },
568
  {
@@ -574,40 +574,40 @@
574
  {
575
  "epoch": 10.01,
576
  "learning_rate": 4.3703703703703705e-05,
577
- "loss": 0.0035,
578
  "step": 800
579
  },
580
  {
581
  "epoch": 10.02,
582
  "learning_rate": 4.355555555555556e-05,
583
- "loss": 0.0025,
584
  "step": 810
585
  },
586
  {
587
  "epoch": 10.02,
588
  "learning_rate": 4.340740740740741e-05,
589
- "loss": 0.0203,
590
  "step": 820
591
  },
592
  {
593
  "epoch": 10.02,
594
- "eval_accuracy": 0.918918918918919,
595
- "eval_loss": 0.3398858308792114,
596
- "eval_runtime": 2.8361,
597
- "eval_samples_per_second": 13.046,
598
- "eval_steps_per_second": 3.526,
599
  "step": 825
600
  },
601
  {
602
  "epoch": 11.0,
603
  "learning_rate": 4.325925925925926e-05,
604
- "loss": 0.0042,
605
  "step": 830
606
  },
607
  {
608
  "epoch": 11.0,
609
  "learning_rate": 4.311111111111111e-05,
610
- "loss": 0.0024,
611
  "step": 840
612
  },
613
  {
@@ -619,103 +619,103 @@
619
  {
620
  "epoch": 11.01,
621
  "learning_rate": 4.2814814814814816e-05,
622
- "loss": 0.0266,
623
  "step": 860
624
  },
625
  {
626
  "epoch": 11.01,
627
  "learning_rate": 4.266666666666667e-05,
628
- "loss": 0.018,
629
  "step": 870
630
  },
631
  {
632
  "epoch": 11.01,
633
  "learning_rate": 4.2518518518518515e-05,
634
- "loss": 0.0028,
635
  "step": 880
636
  },
637
  {
638
  "epoch": 11.02,
639
  "learning_rate": 4.237037037037037e-05,
640
- "loss": 0.2435,
641
  "step": 890
642
  },
643
  {
644
  "epoch": 11.02,
645
  "learning_rate": 4.222222222222222e-05,
646
- "loss": 0.0093,
647
  "step": 900
648
  },
649
  {
650
  "epoch": 11.02,
651
- "eval_accuracy": 0.9459459459459459,
652
- "eval_loss": 0.3724738359451294,
653
- "eval_runtime": 2.729,
654
- "eval_samples_per_second": 13.558,
655
- "eval_steps_per_second": 3.664,
656
  "step": 900
657
  },
658
  {
659
  "epoch": 12.0,
660
  "learning_rate": 4.2074074074074075e-05,
661
- "loss": 0.1554,
662
  "step": 910
663
  },
664
  {
665
  "epoch": 12.01,
666
  "learning_rate": 4.192592592592593e-05,
667
- "loss": 0.0049,
668
  "step": 920
669
  },
670
  {
671
  "epoch": 12.01,
672
  "learning_rate": 4.177777777777778e-05,
673
- "loss": 0.1019,
674
  "step": 930
675
  },
676
  {
677
  "epoch": 12.01,
678
  "learning_rate": 4.162962962962963e-05,
679
- "loss": 0.2167,
680
  "step": 940
681
  },
682
  {
683
  "epoch": 12.01,
684
  "learning_rate": 4.148148148148148e-05,
685
- "loss": 0.1541,
686
  "step": 950
687
  },
688
  {
689
  "epoch": 12.02,
690
  "learning_rate": 4.133333333333333e-05,
691
- "loss": 0.0032,
692
  "step": 960
693
  },
694
  {
695
  "epoch": 12.02,
696
  "learning_rate": 4.1185185185185186e-05,
697
- "loss": 0.0022,
698
  "step": 970
699
  },
700
  {
701
  "epoch": 12.02,
702
- "eval_accuracy": 0.918918918918919,
703
- "eval_loss": 0.5498412847518921,
704
- "eval_runtime": 2.7424,
705
- "eval_samples_per_second": 13.492,
706
- "eval_steps_per_second": 3.647,
707
  "step": 975
708
  },
709
  {
710
  "epoch": 13.0,
711
  "learning_rate": 4.103703703703704e-05,
712
- "loss": 0.1514,
713
  "step": 980
714
  },
715
  {
716
  "epoch": 13.0,
717
  "learning_rate": 4.088888888888889e-05,
718
- "loss": 0.0019,
719
  "step": 990
720
  },
721
  {
@@ -727,40 +727,40 @@
727
  {
728
  "epoch": 13.01,
729
  "learning_rate": 4.059259259259259e-05,
730
- "loss": 0.0017,
731
  "step": 1010
732
  },
733
  {
734
  "epoch": 13.01,
735
  "learning_rate": 4.0444444444444444e-05,
736
- "loss": 0.0017,
737
  "step": 1020
738
  },
739
  {
740
  "epoch": 13.01,
741
  "learning_rate": 4.02962962962963e-05,
742
- "loss": 0.0015,
743
  "step": 1030
744
  },
745
  {
746
  "epoch": 13.02,
747
  "learning_rate": 4.014814814814815e-05,
748
- "loss": 0.0015,
749
  "step": 1040
750
  },
751
  {
752
  "epoch": 13.02,
753
  "learning_rate": 4e-05,
754
- "loss": 0.0017,
755
  "step": 1050
756
  },
757
  {
758
  "epoch": 13.02,
759
- "eval_accuracy": 0.972972972972973,
760
- "eval_loss": 0.16978278756141663,
761
- "eval_runtime": 2.7539,
762
- "eval_samples_per_second": 13.436,
763
- "eval_steps_per_second": 3.631,
764
  "step": 1050
765
  },
766
  {
@@ -772,25 +772,25 @@
772
  {
773
  "epoch": 14.01,
774
  "learning_rate": 3.97037037037037e-05,
775
- "loss": 0.0014,
776
  "step": 1070
777
  },
778
  {
779
  "epoch": 14.01,
780
  "learning_rate": 3.9555555555555556e-05,
781
- "loss": 0.0014,
782
  "step": 1080
783
  },
784
  {
785
  "epoch": 14.01,
786
  "learning_rate": 3.940740740740741e-05,
787
- "loss": 0.0013,
788
  "step": 1090
789
  },
790
  {
791
  "epoch": 14.01,
792
  "learning_rate": 3.925925925925926e-05,
793
- "loss": 0.0012,
794
  "step": 1100
795
  },
796
  {
@@ -802,73 +802,73 @@
802
  {
803
  "epoch": 14.02,
804
  "learning_rate": 3.896296296296296e-05,
805
- "loss": 0.0014,
806
  "step": 1120
807
  },
808
  {
809
  "epoch": 14.02,
810
- "eval_accuracy": 0.9459459459459459,
811
- "eval_loss": 0.19233068823814392,
812
- "eval_runtime": 2.8803,
813
- "eval_samples_per_second": 12.846,
814
- "eval_steps_per_second": 3.472,
815
  "step": 1125
816
  },
817
  {
818
  "epoch": 15.0,
819
  "learning_rate": 3.8814814814814814e-05,
820
- "loss": 0.0012,
821
  "step": 1130
822
  },
823
  {
824
  "epoch": 15.0,
825
  "learning_rate": 3.866666666666667e-05,
826
- "loss": 0.0013,
827
  "step": 1140
828
  },
829
  {
830
  "epoch": 15.01,
831
  "learning_rate": 3.851851851851852e-05,
832
- "loss": 0.0206,
833
  "step": 1150
834
  },
835
  {
836
  "epoch": 15.01,
837
  "learning_rate": 3.837037037037037e-05,
838
- "loss": 0.0014,
839
  "step": 1160
840
  },
841
  {
842
  "epoch": 15.01,
843
  "learning_rate": 3.8222222222222226e-05,
844
- "loss": 0.2069,
845
  "step": 1170
846
  },
847
  {
848
  "epoch": 15.01,
849
  "learning_rate": 3.807407407407408e-05,
850
- "loss": 0.0014,
851
  "step": 1180
852
  },
853
  {
854
  "epoch": 15.02,
855
  "learning_rate": 3.7925925925925925e-05,
856
- "loss": 0.0012,
857
  "step": 1190
858
  },
859
  {
860
  "epoch": 15.02,
861
  "learning_rate": 3.777777777777778e-05,
862
- "loss": 0.0014,
863
  "step": 1200
864
  },
865
  {
866
  "epoch": 15.02,
867
- "eval_accuracy": 0.972972972972973,
868
- "eval_loss": 0.15707702934741974,
869
- "eval_runtime": 2.8394,
870
- "eval_samples_per_second": 13.031,
871
- "eval_steps_per_second": 3.522,
872
  "step": 1200
873
  },
874
  {
@@ -880,139 +880,139 @@
880
  {
881
  "epoch": 16.01,
882
  "learning_rate": 3.7481481481481484e-05,
883
- "loss": 0.0018,
884
  "step": 1220
885
  },
886
  {
887
  "epoch": 16.01,
888
  "learning_rate": 3.733333333333334e-05,
889
- "loss": 0.1871,
890
  "step": 1230
891
  },
892
  {
893
  "epoch": 16.01,
894
  "learning_rate": 3.718518518518519e-05,
895
- "loss": 0.0012,
896
  "step": 1240
897
  },
898
  {
899
  "epoch": 16.01,
900
  "learning_rate": 3.7037037037037037e-05,
901
- "loss": 0.0014,
902
  "step": 1250
903
  },
904
  {
905
  "epoch": 16.02,
906
  "learning_rate": 3.688888888888889e-05,
907
- "loss": 0.132,
908
  "step": 1260
909
  },
910
  {
911
  "epoch": 16.02,
912
  "learning_rate": 3.674074074074074e-05,
913
- "loss": 0.0474,
914
  "step": 1270
915
  },
916
  {
917
  "epoch": 16.02,
918
- "eval_accuracy": 0.8918918918918919,
919
- "eval_loss": 0.5192855596542358,
920
- "eval_runtime": 2.8857,
921
- "eval_samples_per_second": 12.822,
922
- "eval_steps_per_second": 3.465,
923
  "step": 1275
924
  },
925
  {
926
  "epoch": 17.0,
927
  "learning_rate": 3.6592592592592596e-05,
928
- "loss": 0.0019,
929
  "step": 1280
930
  },
931
  {
932
  "epoch": 17.0,
933
  "learning_rate": 3.644444444444445e-05,
934
- "loss": 0.0013,
935
  "step": 1290
936
  },
937
  {
938
  "epoch": 17.01,
939
  "learning_rate": 3.62962962962963e-05,
940
- "loss": 0.1087,
941
  "step": 1300
942
  },
943
  {
944
  "epoch": 17.01,
945
  "learning_rate": 3.614814814814815e-05,
946
- "loss": 0.081,
947
  "step": 1310
948
  },
949
  {
950
  "epoch": 17.01,
951
  "learning_rate": 3.6e-05,
952
- "loss": 0.0019,
953
  "step": 1320
954
  },
955
  {
956
  "epoch": 17.01,
957
  "learning_rate": 3.5851851851851854e-05,
958
- "loss": 0.0012,
959
  "step": 1330
960
  },
961
  {
962
  "epoch": 17.02,
963
  "learning_rate": 3.570370370370371e-05,
964
- "loss": 0.0012,
965
  "step": 1340
966
  },
967
  {
968
  "epoch": 17.02,
969
  "learning_rate": 3.555555555555556e-05,
970
- "loss": 0.0011,
971
  "step": 1350
972
  },
973
  {
974
  "epoch": 17.02,
975
- "eval_accuracy": 0.972972972972973,
976
- "eval_loss": 0.14079852402210236,
977
- "eval_runtime": 3.0095,
978
- "eval_samples_per_second": 12.294,
979
- "eval_steps_per_second": 3.323,
980
  "step": 1350
981
  },
982
  {
983
  "epoch": 18.0,
984
  "learning_rate": 3.540740740740741e-05,
985
- "loss": 0.0009,
986
  "step": 1360
987
  },
988
  {
989
  "epoch": 18.01,
990
  "learning_rate": 3.525925925925926e-05,
991
- "loss": 0.0011,
992
  "step": 1370
993
  },
994
  {
995
  "epoch": 18.01,
996
  "learning_rate": 3.511111111111111e-05,
997
- "loss": 0.0277,
998
  "step": 1380
999
  },
1000
  {
1001
  "epoch": 18.01,
1002
  "learning_rate": 3.4962962962962965e-05,
1003
- "loss": 0.0011,
1004
  "step": 1390
1005
  },
1006
  {
1007
  "epoch": 18.01,
1008
  "learning_rate": 3.481481481481482e-05,
1009
- "loss": 0.0011,
1010
  "step": 1400
1011
  },
1012
  {
1013
  "epoch": 18.02,
1014
  "learning_rate": 3.466666666666667e-05,
1015
- "loss": 0.1377,
1016
  "step": 1410
1017
  },
1018
  {
@@ -1023,137 +1023,137 @@
1023
  },
1024
  {
1025
  "epoch": 18.02,
1026
- "eval_accuracy": 0.9459459459459459,
1027
- "eval_loss": 0.34057652950286865,
1028
- "eval_runtime": 2.8674,
1029
- "eval_samples_per_second": 12.904,
1030
- "eval_steps_per_second": 3.488,
1031
  "step": 1425
1032
  },
1033
  {
1034
  "epoch": 19.0,
1035
  "learning_rate": 3.437037037037037e-05,
1036
- "loss": 0.002,
1037
  "step": 1430
1038
  },
1039
  {
1040
  "epoch": 19.0,
1041
  "learning_rate": 3.4222222222222224e-05,
1042
- "loss": 0.0018,
1043
  "step": 1440
1044
  },
1045
  {
1046
  "epoch": 19.01,
1047
  "learning_rate": 3.4074074074074077e-05,
1048
- "loss": 0.2287,
1049
  "step": 1450
1050
  },
1051
  {
1052
  "epoch": 19.01,
1053
  "learning_rate": 3.392592592592593e-05,
1054
- "loss": 0.102,
1055
  "step": 1460
1056
  },
1057
  {
1058
  "epoch": 19.01,
1059
  "learning_rate": 3.377777777777778e-05,
1060
- "loss": 0.0785,
1061
  "step": 1470
1062
  },
1063
  {
1064
  "epoch": 19.01,
1065
  "learning_rate": 3.3629629629629636e-05,
1066
- "loss": 0.002,
1067
  "step": 1480
1068
  },
1069
  {
1070
  "epoch": 19.02,
1071
  "learning_rate": 3.348148148148148e-05,
1072
- "loss": 0.0357,
1073
  "step": 1490
1074
  },
1075
  {
1076
  "epoch": 19.02,
1077
  "learning_rate": 3.3333333333333335e-05,
1078
- "loss": 0.0034,
1079
  "step": 1500
1080
  },
1081
  {
1082
  "epoch": 19.02,
1083
- "eval_accuracy": 0.9459459459459459,
1084
- "eval_loss": 0.25160789489746094,
1085
- "eval_runtime": 2.8519,
1086
- "eval_samples_per_second": 12.974,
1087
- "eval_steps_per_second": 3.506,
1088
  "step": 1500
1089
  },
1090
  {
1091
  "epoch": 20.0,
1092
  "learning_rate": 3.318518518518519e-05,
1093
- "loss": 0.0609,
1094
  "step": 1510
1095
  },
1096
  {
1097
  "epoch": 20.01,
1098
  "learning_rate": 3.303703703703704e-05,
1099
- "loss": 0.0013,
1100
  "step": 1520
1101
  },
1102
  {
1103
  "epoch": 20.01,
1104
  "learning_rate": 3.2888888888888894e-05,
1105
- "loss": 0.0012,
1106
  "step": 1530
1107
  },
1108
  {
1109
  "epoch": 20.01,
1110
  "learning_rate": 3.274074074074075e-05,
1111
- "loss": 0.0014,
1112
  "step": 1540
1113
  },
1114
  {
1115
  "epoch": 20.01,
1116
  "learning_rate": 3.25925925925926e-05,
1117
- "loss": 0.1104,
1118
  "step": 1550
1119
  },
1120
  {
1121
  "epoch": 20.02,
1122
  "learning_rate": 3.2444444444444446e-05,
1123
- "loss": 0.0011,
1124
  "step": 1560
1125
  },
1126
  {
1127
  "epoch": 20.02,
1128
  "learning_rate": 3.22962962962963e-05,
1129
- "loss": 0.0029,
1130
  "step": 1570
1131
  },
1132
  {
1133
  "epoch": 20.02,
1134
- "eval_accuracy": 0.918918918918919,
1135
- "eval_loss": 0.29616308212280273,
1136
- "eval_runtime": 2.9222,
1137
- "eval_samples_per_second": 12.662,
1138
- "eval_steps_per_second": 3.422,
1139
  "step": 1575
1140
  },
1141
  {
1142
  "epoch": 21.0,
1143
  "learning_rate": 3.214814814814815e-05,
1144
- "loss": 0.0014,
1145
  "step": 1580
1146
  },
1147
  {
1148
  "epoch": 21.0,
1149
  "learning_rate": 3.2000000000000005e-05,
1150
- "loss": 0.0769,
1151
  "step": 1590
1152
  },
1153
  {
1154
  "epoch": 21.01,
1155
  "learning_rate": 3.185185185185185e-05,
1156
- "loss": 0.001,
1157
  "step": 1600
1158
  },
1159
  {
@@ -1165,64 +1165,64 @@
1165
  {
1166
  "epoch": 21.01,
1167
  "learning_rate": 3.155555555555556e-05,
1168
- "loss": 0.1219,
1169
  "step": 1620
1170
  },
1171
  {
1172
  "epoch": 21.01,
1173
  "learning_rate": 3.140740740740741e-05,
1174
- "loss": 0.001,
1175
  "step": 1630
1176
  },
1177
  {
1178
  "epoch": 21.02,
1179
  "learning_rate": 3.1259259259259264e-05,
1180
- "loss": 0.0013,
1181
  "step": 1640
1182
  },
1183
  {
1184
  "epoch": 21.02,
1185
  "learning_rate": 3.111111111111111e-05,
1186
- "loss": 0.0008,
1187
  "step": 1650
1188
  },
1189
  {
1190
  "epoch": 21.02,
1191
- "eval_accuracy": 0.918918918918919,
1192
- "eval_loss": 0.4023502469062805,
1193
- "eval_runtime": 2.9031,
1194
- "eval_samples_per_second": 12.745,
1195
- "eval_steps_per_second": 3.445,
1196
  "step": 1650
1197
  },
1198
  {
1199
  "epoch": 22.0,
1200
  "learning_rate": 3.096296296296296e-05,
1201
- "loss": 0.001,
1202
  "step": 1660
1203
  },
1204
  {
1205
  "epoch": 22.01,
1206
  "learning_rate": 3.0814814814814816e-05,
1207
- "loss": 0.0009,
1208
  "step": 1670
1209
  },
1210
  {
1211
  "epoch": 22.01,
1212
  "learning_rate": 3.066666666666667e-05,
1213
- "loss": 0.0022,
1214
  "step": 1680
1215
  },
1216
  {
1217
  "epoch": 22.01,
1218
  "learning_rate": 3.0518518518518515e-05,
1219
- "loss": 0.0009,
1220
  "step": 1690
1221
  },
1222
  {
1223
  "epoch": 22.01,
1224
  "learning_rate": 3.037037037037037e-05,
1225
- "loss": 0.0009,
1226
  "step": 1700
1227
  },
1228
  {
@@ -1234,22 +1234,22 @@
1234
  {
1235
  "epoch": 22.02,
1236
  "learning_rate": 3.0074074074074078e-05,
1237
- "loss": 0.0008,
1238
  "step": 1720
1239
  },
1240
  {
1241
  "epoch": 22.02,
1242
- "eval_accuracy": 0.918918918918919,
1243
- "eval_loss": 0.4643724262714386,
1244
- "eval_runtime": 2.9019,
1245
- "eval_samples_per_second": 12.75,
1246
- "eval_steps_per_second": 3.446,
1247
  "step": 1725
1248
  },
1249
  {
1250
  "epoch": 23.0,
1251
  "learning_rate": 2.992592592592593e-05,
1252
- "loss": 0.0009,
1253
  "step": 1730
1254
  },
1255
  {
@@ -1261,70 +1261,70 @@
1261
  {
1262
  "epoch": 23.01,
1263
  "learning_rate": 2.962962962962963e-05,
1264
- "loss": 0.0336,
1265
  "step": 1750
1266
  },
1267
  {
1268
  "epoch": 23.01,
1269
  "learning_rate": 2.9481481481481483e-05,
1270
- "loss": 0.0044,
1271
  "step": 1760
1272
  },
1273
  {
1274
  "epoch": 23.01,
1275
  "learning_rate": 2.9333333333333336e-05,
1276
- "loss": 0.0008,
1277
  "step": 1770
1278
  },
1279
  {
1280
  "epoch": 23.01,
1281
  "learning_rate": 2.918518518518519e-05,
1282
- "loss": 0.001,
1283
  "step": 1780
1284
  },
1285
  {
1286
  "epoch": 23.02,
1287
  "learning_rate": 2.9037037037037042e-05,
1288
- "loss": 0.0008,
1289
  "step": 1790
1290
  },
1291
  {
1292
  "epoch": 23.02,
1293
  "learning_rate": 2.8888888888888888e-05,
1294
- "loss": 0.1521,
1295
  "step": 1800
1296
  },
1297
  {
1298
  "epoch": 23.02,
1299
- "eval_accuracy": 0.918918918918919,
1300
- "eval_loss": 0.48252156376838684,
1301
- "eval_runtime": 2.9421,
1302
- "eval_samples_per_second": 12.576,
1303
- "eval_steps_per_second": 3.399,
1304
  "step": 1800
1305
  },
1306
  {
1307
  "epoch": 24.0,
1308
  "learning_rate": 2.874074074074074e-05,
1309
- "loss": 0.0023,
1310
  "step": 1810
1311
  },
1312
  {
1313
  "epoch": 24.01,
1314
  "learning_rate": 2.8592592592592594e-05,
1315
- "loss": 0.0008,
1316
  "step": 1820
1317
  },
1318
  {
1319
  "epoch": 24.01,
1320
  "learning_rate": 2.8444444444444447e-05,
1321
- "loss": 0.0074,
1322
  "step": 1830
1323
  },
1324
  {
1325
  "epoch": 24.01,
1326
  "learning_rate": 2.8296296296296297e-05,
1327
- "loss": 0.0009,
1328
  "step": 1840
1329
  },
1330
  {
@@ -1336,22 +1336,22 @@
1336
  {
1337
  "epoch": 24.02,
1338
  "learning_rate": 2.8000000000000003e-05,
1339
- "loss": 0.0007,
1340
  "step": 1860
1341
  },
1342
  {
1343
  "epoch": 24.02,
1344
  "learning_rate": 2.7851851851851853e-05,
1345
- "loss": 0.001,
1346
  "step": 1870
1347
  },
1348
  {
1349
  "epoch": 24.02,
1350
- "eval_accuracy": 0.918918918918919,
1351
- "eval_loss": 0.6339796185493469,
1352
- "eval_runtime": 2.8461,
1353
- "eval_samples_per_second": 13.0,
1354
- "eval_steps_per_second": 3.514,
1355
  "step": 1875
1356
  },
1357
  {
@@ -1369,97 +1369,97 @@
1369
  {
1370
  "epoch": 25.01,
1371
  "learning_rate": 2.7407407407407408e-05,
1372
- "loss": 0.0008,
1373
  "step": 1900
1374
  },
1375
  {
1376
  "epoch": 25.01,
1377
  "learning_rate": 2.725925925925926e-05,
1378
- "loss": 0.001,
1379
  "step": 1910
1380
  },
1381
  {
1382
  "epoch": 25.01,
1383
  "learning_rate": 2.7111111111111114e-05,
1384
- "loss": 0.0007,
1385
  "step": 1920
1386
  },
1387
  {
1388
  "epoch": 25.01,
1389
  "learning_rate": 2.696296296296296e-05,
1390
- "loss": 0.0007,
1391
  "step": 1930
1392
  },
1393
  {
1394
  "epoch": 25.02,
1395
  "learning_rate": 2.6814814814814814e-05,
1396
- "loss": 0.0007,
1397
  "step": 1940
1398
  },
1399
  {
1400
  "epoch": 25.02,
1401
  "learning_rate": 2.6666666666666667e-05,
1402
- "loss": 0.0245,
1403
  "step": 1950
1404
  },
1405
  {
1406
  "epoch": 25.02,
1407
- "eval_accuracy": 0.9459459459459459,
1408
- "eval_loss": 0.3778836727142334,
1409
- "eval_runtime": 2.8022,
1410
- "eval_samples_per_second": 13.204,
1411
- "eval_steps_per_second": 3.569,
1412
  "step": 1950
1413
  },
1414
  {
1415
  "epoch": 26.0,
1416
  "learning_rate": 2.651851851851852e-05,
1417
- "loss": 0.0007,
1418
  "step": 1960
1419
  },
1420
  {
1421
  "epoch": 26.01,
1422
  "learning_rate": 2.6370370370370373e-05,
1423
- "loss": 0.0007,
1424
  "step": 1970
1425
  },
1426
  {
1427
  "epoch": 26.01,
1428
  "learning_rate": 2.6222222222222226e-05,
1429
- "loss": 0.0007,
1430
  "step": 1980
1431
  },
1432
  {
1433
  "epoch": 26.01,
1434
  "learning_rate": 2.6074074074074072e-05,
1435
- "loss": 0.0007,
1436
  "step": 1990
1437
  },
1438
  {
1439
  "epoch": 26.01,
1440
  "learning_rate": 2.5925925925925925e-05,
1441
- "loss": 0.0947,
1442
  "step": 2000
1443
  },
1444
  {
1445
  "epoch": 26.02,
1446
  "learning_rate": 2.5777777777777778e-05,
1447
- "loss": 0.001,
1448
  "step": 2010
1449
  },
1450
  {
1451
  "epoch": 26.02,
1452
  "learning_rate": 2.562962962962963e-05,
1453
- "loss": 0.0007,
1454
  "step": 2020
1455
  },
1456
  {
1457
  "epoch": 26.02,
1458
- "eval_accuracy": 0.9459459459459459,
1459
- "eval_loss": 0.3375699818134308,
1460
- "eval_runtime": 2.7928,
1461
- "eval_samples_per_second": 13.248,
1462
- "eval_steps_per_second": 3.581,
1463
  "step": 2025
1464
  },
1465
  {
@@ -1471,13 +1471,13 @@
1471
  {
1472
  "epoch": 27.0,
1473
  "learning_rate": 2.5333333333333337e-05,
1474
- "loss": 0.0012,
1475
  "step": 2040
1476
  },
1477
  {
1478
  "epoch": 27.01,
1479
  "learning_rate": 2.5185185185185183e-05,
1480
- "loss": 0.0007,
1481
  "step": 2050
1482
  },
1483
  {
@@ -1489,64 +1489,64 @@
1489
  {
1490
  "epoch": 27.01,
1491
  "learning_rate": 2.488888888888889e-05,
1492
- "loss": 0.0927,
1493
  "step": 2070
1494
  },
1495
  {
1496
  "epoch": 27.01,
1497
  "learning_rate": 2.4740740740740742e-05,
1498
- "loss": 0.0817,
1499
  "step": 2080
1500
  },
1501
  {
1502
  "epoch": 27.02,
1503
  "learning_rate": 2.4592592592592595e-05,
1504
- "loss": 0.2909,
1505
  "step": 2090
1506
  },
1507
  {
1508
  "epoch": 27.02,
1509
  "learning_rate": 2.4444444444444445e-05,
1510
- "loss": 0.0011,
1511
  "step": 2100
1512
  },
1513
  {
1514
  "epoch": 27.02,
1515
- "eval_accuracy": 0.9459459459459459,
1516
- "eval_loss": 0.28334298729896545,
1517
- "eval_runtime": 2.7397,
1518
- "eval_samples_per_second": 13.505,
1519
- "eval_steps_per_second": 3.65,
1520
  "step": 2100
1521
  },
1522
  {
1523
  "epoch": 28.0,
1524
  "learning_rate": 2.4296296296296298e-05,
1525
- "loss": 0.0007,
1526
  "step": 2110
1527
  },
1528
  {
1529
  "epoch": 28.01,
1530
  "learning_rate": 2.414814814814815e-05,
1531
- "loss": 0.0007,
1532
  "step": 2120
1533
  },
1534
  {
1535
  "epoch": 28.01,
1536
  "learning_rate": 2.4e-05,
1537
- "loss": 0.0007,
1538
  "step": 2130
1539
  },
1540
  {
1541
  "epoch": 28.01,
1542
  "learning_rate": 2.3851851851851854e-05,
1543
- "loss": 0.0007,
1544
  "step": 2140
1545
  },
1546
  {
1547
  "epoch": 28.01,
1548
  "learning_rate": 2.3703703703703707e-05,
1549
- "loss": 0.0007,
1550
  "step": 2150
1551
  },
1552
  {
@@ -1558,16 +1558,16 @@
1558
  {
1559
  "epoch": 28.02,
1560
  "learning_rate": 2.340740740740741e-05,
1561
- "loss": 0.0008,
1562
  "step": 2170
1563
  },
1564
  {
1565
  "epoch": 28.02,
1566
- "eval_accuracy": 0.972972972972973,
1567
- "eval_loss": 0.15925714373588562,
1568
- "eval_runtime": 2.6329,
1569
- "eval_samples_per_second": 14.053,
1570
- "eval_steps_per_second": 3.798,
1571
  "step": 2175
1572
  },
1573
  {
@@ -1579,25 +1579,25 @@
1579
  {
1580
  "epoch": 29.0,
1581
  "learning_rate": 2.3111111111111112e-05,
1582
- "loss": 0.0015,
1583
  "step": 2190
1584
  },
1585
  {
1586
  "epoch": 29.01,
1587
  "learning_rate": 2.2962962962962965e-05,
1588
- "loss": 0.0008,
1589
  "step": 2200
1590
  },
1591
  {
1592
  "epoch": 29.01,
1593
  "learning_rate": 2.2814814814814818e-05,
1594
- "loss": 0.0007,
1595
  "step": 2210
1596
  },
1597
  {
1598
  "epoch": 29.01,
1599
  "learning_rate": 2.2666666666666668e-05,
1600
- "loss": 0.0006,
1601
  "step": 2220
1602
  },
1603
  {
@@ -1609,28 +1609,28 @@
1609
  {
1610
  "epoch": 29.02,
1611
  "learning_rate": 2.2370370370370374e-05,
1612
- "loss": 0.0953,
1613
  "step": 2240
1614
  },
1615
  {
1616
  "epoch": 29.02,
1617
  "learning_rate": 2.2222222222222223e-05,
1618
- "loss": 0.0008,
1619
  "step": 2250
1620
  },
1621
  {
1622
  "epoch": 29.02,
1623
- "eval_accuracy": 0.972972972972973,
1624
- "eval_loss": 0.08563826233148575,
1625
- "eval_runtime": 2.7114,
1626
- "eval_samples_per_second": 13.646,
1627
- "eval_steps_per_second": 3.688,
1628
  "step": 2250
1629
  },
1630
  {
1631
  "epoch": 30.0,
1632
  "learning_rate": 2.2074074074074076e-05,
1633
- "loss": 0.0006,
1634
  "step": 2260
1635
  },
1636
  {
@@ -1642,19 +1642,19 @@
1642
  {
1643
  "epoch": 30.01,
1644
  "learning_rate": 2.177777777777778e-05,
1645
- "loss": 0.0006,
1646
  "step": 2280
1647
  },
1648
  {
1649
  "epoch": 30.01,
1650
  "learning_rate": 2.162962962962963e-05,
1651
- "loss": 0.0006,
1652
  "step": 2290
1653
  },
1654
  {
1655
  "epoch": 30.01,
1656
  "learning_rate": 2.148148148148148e-05,
1657
- "loss": 0.0007,
1658
  "step": 2300
1659
  },
1660
  {
@@ -1666,28 +1666,28 @@
1666
  {
1667
  "epoch": 30.02,
1668
  "learning_rate": 2.1185185185185184e-05,
1669
- "loss": 0.0005,
1670
  "step": 2320
1671
  },
1672
  {
1673
  "epoch": 30.02,
1674
- "eval_accuracy": 0.972972972972973,
1675
- "eval_loss": 0.1049196645617485,
1676
- "eval_runtime": 2.6802,
1677
- "eval_samples_per_second": 13.805,
1678
- "eval_steps_per_second": 3.731,
1679
  "step": 2325
1680
  },
1681
  {
1682
  "epoch": 31.0,
1683
  "learning_rate": 2.1037037037037037e-05,
1684
- "loss": 0.0006,
1685
  "step": 2330
1686
  },
1687
  {
1688
  "epoch": 31.0,
1689
  "learning_rate": 2.088888888888889e-05,
1690
- "loss": 0.0005,
1691
  "step": 2340
1692
  },
1693
  {
@@ -1728,11 +1728,11 @@
1728
  },
1729
  {
1730
  "epoch": 31.02,
1731
- "eval_accuracy": 0.972972972972973,
1732
- "eval_loss": 0.11323297023773193,
1733
- "eval_runtime": 3.2205,
1734
- "eval_samples_per_second": 11.489,
1735
- "eval_steps_per_second": 3.105,
1736
  "step": 2400
1737
  },
1738
  {
@@ -1750,7 +1750,7 @@
1750
  {
1751
  "epoch": 32.01,
1752
  "learning_rate": 1.9555555555555557e-05,
1753
- "loss": 0.0005,
1754
  "step": 2430
1755
  },
1756
  {
@@ -1779,11 +1779,11 @@
1779
  },
1780
  {
1781
  "epoch": 32.02,
1782
- "eval_accuracy": 0.972972972972973,
1783
- "eval_loss": 0.11638977378606796,
1784
- "eval_runtime": 2.5685,
1785
- "eval_samples_per_second": 14.405,
1786
- "eval_steps_per_second": 3.893,
1787
  "step": 2475
1788
  },
1789
  {
@@ -1795,7 +1795,7 @@
1795
  {
1796
  "epoch": 33.0,
1797
  "learning_rate": 1.866666666666667e-05,
1798
- "loss": 0.0005,
1799
  "step": 2490
1800
  },
1801
  {
@@ -1813,7 +1813,7 @@
1813
  {
1814
  "epoch": 33.01,
1815
  "learning_rate": 1.8222222222222224e-05,
1816
- "loss": 0.0005,
1817
  "step": 2520
1818
  },
1819
  {
@@ -1831,16 +1831,16 @@
1831
  {
1832
  "epoch": 33.02,
1833
  "learning_rate": 1.777777777777778e-05,
1834
- "loss": 0.0005,
1835
  "step": 2550
1836
  },
1837
  {
1838
  "epoch": 33.02,
1839
- "eval_accuracy": 0.972972972972973,
1840
- "eval_loss": 0.12427014112472534,
1841
- "eval_runtime": 2.5792,
1842
- "eval_samples_per_second": 14.345,
1843
- "eval_steps_per_second": 3.877,
1844
  "step": 2550
1845
  },
1846
  {
@@ -1887,11 +1887,11 @@
1887
  },
1888
  {
1889
  "epoch": 34.02,
1890
- "eval_accuracy": 0.972972972972973,
1891
- "eval_loss": 0.1306069791316986,
1892
- "eval_runtime": 2.6024,
1893
- "eval_samples_per_second": 14.217,
1894
- "eval_steps_per_second": 3.843,
1895
  "step": 2625
1896
  },
1897
  {
@@ -1915,7 +1915,7 @@
1915
  {
1916
  "epoch": 35.01,
1917
  "learning_rate": 1.614814814814815e-05,
1918
- "loss": 0.0007,
1919
  "step": 2660
1920
  },
1921
  {
@@ -1933,7 +1933,7 @@
1933
  {
1934
  "epoch": 35.02,
1935
  "learning_rate": 1.5703703703703705e-05,
1936
- "loss": 0.0005,
1937
  "step": 2690
1938
  },
1939
  {
@@ -1944,17 +1944,17 @@
1944
  },
1945
  {
1946
  "epoch": 35.02,
1947
- "eval_accuracy": 0.9459459459459459,
1948
- "eval_loss": 0.39192795753479004,
1949
- "eval_runtime": 2.5502,
1950
- "eval_samples_per_second": 14.509,
1951
- "eval_steps_per_second": 3.921,
1952
  "step": 2700
1953
  },
1954
  {
1955
  "epoch": 36.0,
1956
  "learning_rate": 1.5407407407407408e-05,
1957
- "loss": 0.0005,
1958
  "step": 2710
1959
  },
1960
  {
@@ -1984,7 +1984,7 @@
1984
  {
1985
  "epoch": 36.02,
1986
  "learning_rate": 1.4666666666666668e-05,
1987
- "loss": 0.0005,
1988
  "step": 2760
1989
  },
1990
  {
@@ -1995,17 +1995,17 @@
1995
  },
1996
  {
1997
  "epoch": 36.02,
1998
- "eval_accuracy": 0.9459459459459459,
1999
- "eval_loss": 0.36302804946899414,
2000
- "eval_runtime": 2.4982,
2001
- "eval_samples_per_second": 14.811,
2002
- "eval_steps_per_second": 4.003,
2003
  "step": 2775
2004
  },
2005
  {
2006
  "epoch": 37.0,
2007
  "learning_rate": 1.437037037037037e-05,
2008
- "loss": 0.0005,
2009
  "step": 2780
2010
  },
2011
  {
@@ -2023,7 +2023,7 @@
2023
  {
2024
  "epoch": 37.01,
2025
  "learning_rate": 1.3925925925925926e-05,
2026
- "loss": 0.0005,
2027
  "step": 2810
2028
  },
2029
  {
@@ -2035,13 +2035,13 @@
2035
  {
2036
  "epoch": 37.01,
2037
  "learning_rate": 1.362962962962963e-05,
2038
- "loss": 0.0005,
2039
  "step": 2830
2040
  },
2041
  {
2042
  "epoch": 37.02,
2043
  "learning_rate": 1.348148148148148e-05,
2044
- "loss": 0.0005,
2045
  "step": 2840
2046
  },
2047
  {
@@ -2052,11 +2052,11 @@
2052
  },
2053
  {
2054
  "epoch": 37.02,
2055
- "eval_accuracy": 0.9459459459459459,
2056
- "eval_loss": 0.2762458324432373,
2057
- "eval_runtime": 2.5203,
2058
- "eval_samples_per_second": 14.681,
2059
- "eval_steps_per_second": 3.968,
2060
  "step": 2850
2061
  },
2062
  {
@@ -2086,7 +2086,7 @@
2086
  {
2087
  "epoch": 38.01,
2088
  "learning_rate": 1.2592592592592592e-05,
2089
- "loss": 0.0004,
2090
  "step": 2900
2091
  },
2092
  {
@@ -2098,16 +2098,16 @@
2098
  {
2099
  "epoch": 38.02,
2100
  "learning_rate": 1.2296296296296298e-05,
2101
- "loss": 0.0005,
2102
  "step": 2920
2103
  },
2104
  {
2105
  "epoch": 38.02,
2106
- "eval_accuracy": 0.9459459459459459,
2107
- "eval_loss": 0.23680266737937927,
2108
- "eval_runtime": 2.5362,
2109
- "eval_samples_per_second": 14.589,
2110
- "eval_steps_per_second": 3.943,
2111
  "step": 2925
2112
  },
2113
  {
@@ -2125,13 +2125,13 @@
2125
  {
2126
  "epoch": 39.01,
2127
  "learning_rate": 1.1851851851851853e-05,
2128
- "loss": 0.0005,
2129
  "step": 2950
2130
  },
2131
  {
2132
  "epoch": 39.01,
2133
  "learning_rate": 1.1703703703703705e-05,
2134
- "loss": 0.0005,
2135
  "step": 2960
2136
  },
2137
  {
@@ -2160,11 +2160,11 @@
2160
  },
2161
  {
2162
  "epoch": 39.02,
2163
- "eval_accuracy": 0.972972972972973,
2164
- "eval_loss": 0.1935373693704605,
2165
- "eval_runtime": 2.5511,
2166
- "eval_samples_per_second": 14.504,
2167
- "eval_steps_per_second": 3.92,
2168
  "step": 3000
2169
  },
2170
  {
@@ -2211,11 +2211,11 @@
2211
  },
2212
  {
2213
  "epoch": 40.02,
2214
- "eval_accuracy": 0.972972972972973,
2215
- "eval_loss": 0.19308657944202423,
2216
- "eval_runtime": 2.5348,
2217
- "eval_samples_per_second": 14.597,
2218
- "eval_steps_per_second": 3.945,
2219
  "step": 3075
2220
  },
2221
  {
@@ -2227,7 +2227,7 @@
2227
  {
2228
  "epoch": 41.0,
2229
  "learning_rate": 9.777777777777779e-06,
2230
- "loss": 0.0085,
2231
  "step": 3090
2232
  },
2233
  {
@@ -2257,7 +2257,7 @@
2257
  {
2258
  "epoch": 41.02,
2259
  "learning_rate": 9.037037037037037e-06,
2260
- "loss": 0.0005,
2261
  "step": 3140
2262
  },
2263
  {
@@ -2268,11 +2268,11 @@
2268
  },
2269
  {
2270
  "epoch": 41.02,
2271
- "eval_accuracy": 0.9459459459459459,
2272
- "eval_loss": 0.21387839317321777,
2273
- "eval_runtime": 2.5103,
2274
- "eval_samples_per_second": 14.739,
2275
- "eval_steps_per_second": 3.984,
2276
  "step": 3150
2277
  },
2278
  {
@@ -2284,7 +2284,7 @@
2284
  {
2285
  "epoch": 42.01,
2286
  "learning_rate": 8.592592592592593e-06,
2287
- "loss": 0.0005,
2288
  "step": 3170
2289
  },
2290
  {
@@ -2319,17 +2319,17 @@
2319
  },
2320
  {
2321
  "epoch": 42.02,
2322
- "eval_accuracy": 0.972972972972973,
2323
- "eval_loss": 0.1899683177471161,
2324
- "eval_runtime": 2.6252,
2325
- "eval_samples_per_second": 14.094,
2326
- "eval_steps_per_second": 3.809,
2327
  "step": 3225
2328
  },
2329
  {
2330
  "epoch": 43.0,
2331
  "learning_rate": 7.703703703703704e-06,
2332
- "loss": 0.0015,
2333
  "step": 3230
2334
  },
2335
  {
@@ -2371,16 +2371,16 @@
2371
  {
2372
  "epoch": 43.02,
2373
  "learning_rate": 6.666666666666667e-06,
2374
- "loss": 0.0006,
2375
  "step": 3300
2376
  },
2377
  {
2378
  "epoch": 43.02,
2379
- "eval_accuracy": 0.972972972972973,
2380
- "eval_loss": 0.1750660091638565,
2381
- "eval_runtime": 2.5683,
2382
- "eval_samples_per_second": 14.407,
2383
- "eval_steps_per_second": 3.894,
2384
  "step": 3300
2385
  },
2386
  {
@@ -2427,11 +2427,11 @@
2427
  },
2428
  {
2429
  "epoch": 44.02,
2430
- "eval_accuracy": 0.9459459459459459,
2431
- "eval_loss": 0.29775792360305786,
2432
- "eval_runtime": 2.6767,
2433
- "eval_samples_per_second": 13.823,
2434
- "eval_steps_per_second": 3.736,
2435
  "step": 3375
2436
  },
2437
  {
@@ -2484,11 +2484,11 @@
2484
  },
2485
  {
2486
  "epoch": 45.02,
2487
- "eval_accuracy": 0.9459459459459459,
2488
- "eval_loss": 0.2776608467102051,
2489
- "eval_runtime": 2.5843,
2490
- "eval_samples_per_second": 14.317,
2491
- "eval_steps_per_second": 3.87,
2492
  "step": 3450
2493
  },
2494
  {
@@ -2535,11 +2535,11 @@
2535
  },
2536
  {
2537
  "epoch": 46.02,
2538
- "eval_accuracy": 0.9459459459459459,
2539
- "eval_loss": 0.2706151008605957,
2540
- "eval_runtime": 2.6031,
2541
- "eval_samples_per_second": 14.214,
2542
- "eval_steps_per_second": 3.842,
2543
  "step": 3525
2544
  },
2545
  {
@@ -2592,11 +2592,11 @@
2592
  },
2593
  {
2594
  "epoch": 47.02,
2595
- "eval_accuracy": 0.9459459459459459,
2596
- "eval_loss": 0.26377302408218384,
2597
- "eval_runtime": 2.5538,
2598
- "eval_samples_per_second": 14.488,
2599
- "eval_steps_per_second": 3.916,
2600
  "step": 3600
2601
  },
2602
  {
@@ -2608,7 +2608,7 @@
2608
  {
2609
  "epoch": 48.01,
2610
  "learning_rate": 1.925925925925926e-06,
2611
- "loss": 0.0003,
2612
  "step": 3620
2613
  },
2614
  {
@@ -2620,7 +2620,7 @@
2620
  {
2621
  "epoch": 48.01,
2622
  "learning_rate": 1.6296296296296295e-06,
2623
- "loss": 0.0005,
2624
  "step": 3640
2625
  },
2626
  {
@@ -2643,11 +2643,11 @@
2643
  },
2644
  {
2645
  "epoch": 48.02,
2646
- "eval_accuracy": 0.9459459459459459,
2647
- "eval_loss": 0.21225357055664062,
2648
- "eval_runtime": 2.6586,
2649
- "eval_samples_per_second": 13.917,
2650
- "eval_steps_per_second": 3.761,
2651
  "step": 3675
2652
  },
2653
  {
@@ -2700,38 +2700,38 @@
2700
  },
2701
  {
2702
  "epoch": 49.02,
2703
- "eval_accuracy": 0.9459459459459459,
2704
- "eval_loss": 0.21062754094600677,
2705
- "eval_runtime": 2.7099,
2706
- "eval_samples_per_second": 13.653,
2707
- "eval_steps_per_second": 3.69,
2708
  "step": 3750
2709
  },
2710
  {
2711
  "epoch": 49.02,
2712
  "step": 3750,
2713
  "total_flos": 1.86923023515648e+19,
2714
- "train_loss": 0.16172246270999313,
2715
- "train_runtime": 3621.2802,
2716
- "train_samples_per_second": 4.142,
2717
- "train_steps_per_second": 1.036
2718
  },
2719
  {
2720
  "epoch": 49.02,
2721
- "eval_accuracy": 0.8850574712643678,
2722
- "eval_loss": 0.38414710760116577,
2723
- "eval_runtime": 8.6129,
2724
- "eval_samples_per_second": 10.101,
2725
- "eval_steps_per_second": 2.554,
2726
  "step": 3750
2727
  },
2728
  {
2729
  "epoch": 49.02,
2730
- "eval_accuracy": 0.8850574712643678,
2731
- "eval_loss": 0.38414719700813293,
2732
- "eval_runtime": 6.1302,
2733
- "eval_samples_per_second": 14.192,
2734
- "eval_steps_per_second": 3.589,
2735
  "step": 3750
2736
  }
2737
  ],
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-1950",
4
  "epoch": 49.02,
5
  "global_step": 3750,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 1.3333333333333334e-06,
13
+ "loss": 2.3504,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.01,
18
  "learning_rate": 2.666666666666667e-06,
19
+ "loss": 2.409,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.01,
24
  "learning_rate": 4.000000000000001e-06,
25
+ "loss": 2.3102,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.01,
30
  "learning_rate": 5.333333333333334e-06,
31
+ "loss": 2.3537,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.01,
36
  "learning_rate": 6.666666666666667e-06,
37
+ "loss": 2.2566,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.02,
42
  "learning_rate": 8.000000000000001e-06,
43
+ "loss": 2.2599,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.02,
48
  "learning_rate": 9.333333333333334e-06,
49
+ "loss": 2.3212,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.02,
54
+ "eval_accuracy": 0.25806451612903225,
55
+ "eval_loss": 2.2295100688934326,
56
+ "eval_runtime": 2.0436,
57
+ "eval_samples_per_second": 15.169,
58
+ "eval_steps_per_second": 3.915,
59
  "step": 75
60
  },
61
  {
62
  "epoch": 1.0,
63
  "learning_rate": 1.0666666666666667e-05,
64
+ "loss": 2.235,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 1.0,
69
  "learning_rate": 1.2e-05,
70
+ "loss": 2.1721,
71
  "step": 90
72
  },
73
  {
74
  "epoch": 1.01,
75
  "learning_rate": 1.3333333333333333e-05,
76
+ "loss": 2.1171,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.01,
81
  "learning_rate": 1.4666666666666668e-05,
82
+ "loss": 2.1702,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.01,
87
  "learning_rate": 1.6000000000000003e-05,
88
+ "loss": 2.0646,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 1.01,
93
  "learning_rate": 1.7333333333333336e-05,
94
+ "loss": 1.9602,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.02,
99
  "learning_rate": 1.866666666666667e-05,
100
+ "loss": 1.9113,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.02,
105
  "learning_rate": 2e-05,
106
+ "loss": 1.7775,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.02,
111
+ "eval_accuracy": 0.3870967741935484,
112
+ "eval_loss": 1.780012845993042,
113
+ "eval_runtime": 2.1066,
114
+ "eval_samples_per_second": 14.716,
115
+ "eval_steps_per_second": 3.798,
116
  "step": 150
117
  },
118
  {
119
  "epoch": 2.0,
120
  "learning_rate": 2.1333333333333335e-05,
121
+ "loss": 1.5096,
122
  "step": 160
123
  },
124
  {
125
  "epoch": 2.01,
126
  "learning_rate": 2.2666666666666668e-05,
127
+ "loss": 1.4483,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 2.01,
132
  "learning_rate": 2.4e-05,
133
+ "loss": 1.4243,
134
  "step": 180
135
  },
136
  {
137
  "epoch": 2.01,
138
  "learning_rate": 2.5333333333333337e-05,
139
+ "loss": 1.1883,
140
  "step": 190
141
  },
142
  {
143
  "epoch": 2.01,
144
  "learning_rate": 2.6666666666666667e-05,
145
+ "loss": 0.9991,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 2.02,
150
  "learning_rate": 2.8000000000000003e-05,
151
+ "loss": 1.0043,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 2.02,
156
  "learning_rate": 2.9333333333333336e-05,
157
+ "loss": 1.0633,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 2.02,
162
+ "eval_accuracy": 0.5483870967741935,
163
+ "eval_loss": 0.9655246734619141,
164
+ "eval_runtime": 2.0203,
165
+ "eval_samples_per_second": 15.344,
166
+ "eval_steps_per_second": 3.96,
167
  "step": 225
168
  },
169
  {
170
  "epoch": 3.0,
171
  "learning_rate": 3.066666666666667e-05,
172
+ "loss": 0.7053,
173
  "step": 230
174
  },
175
  {
176
  "epoch": 3.0,
177
  "learning_rate": 3.2000000000000005e-05,
178
+ "loss": 0.7653,
179
  "step": 240
180
  },
181
  {
182
  "epoch": 3.01,
183
  "learning_rate": 3.3333333333333335e-05,
184
+ "loss": 0.5797,
185
  "step": 250
186
  },
187
  {
188
  "epoch": 3.01,
189
  "learning_rate": 3.466666666666667e-05,
190
+ "loss": 0.5341,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 3.01,
195
  "learning_rate": 3.6e-05,
196
+ "loss": 0.7323,
197
  "step": 270
198
  },
199
  {
200
  "epoch": 3.01,
201
  "learning_rate": 3.733333333333334e-05,
202
+ "loss": 0.4958,
203
  "step": 280
204
  },
205
  {
206
  "epoch": 3.02,
207
  "learning_rate": 3.866666666666667e-05,
208
+ "loss": 0.6096,
209
  "step": 290
210
  },
211
  {
212
  "epoch": 3.02,
213
  "learning_rate": 4e-05,
214
+ "loss": 0.3783,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 3.02,
219
+ "eval_accuracy": 0.7419354838709677,
220
+ "eval_loss": 0.5900937914848328,
221
+ "eval_runtime": 2.0722,
222
+ "eval_samples_per_second": 14.96,
223
+ "eval_steps_per_second": 3.861,
224
  "step": 300
225
  },
226
  {
227
  "epoch": 4.0,
228
  "learning_rate": 4.133333333333333e-05,
229
+ "loss": 0.2128,
230
  "step": 310
231
  },
232
  {
233
  "epoch": 4.01,
234
  "learning_rate": 4.266666666666667e-05,
235
+ "loss": 0.5407,
236
  "step": 320
237
  },
238
  {
239
  "epoch": 4.01,
240
  "learning_rate": 4.4000000000000006e-05,
241
+ "loss": 0.3279,
242
  "step": 330
243
  },
244
  {
245
  "epoch": 4.01,
246
  "learning_rate": 4.5333333333333335e-05,
247
+ "loss": 0.4606,
248
  "step": 340
249
  },
250
  {
251
  "epoch": 4.01,
252
  "learning_rate": 4.666666666666667e-05,
253
+ "loss": 0.4715,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 4.02,
258
  "learning_rate": 4.8e-05,
259
+ "loss": 0.3578,
260
  "step": 360
261
  },
262
  {
263
  "epoch": 4.02,
264
  "learning_rate": 4.933333333333334e-05,
265
+ "loss": 0.756,
266
  "step": 370
267
  },
268
  {
269
  "epoch": 4.02,
270
+ "eval_accuracy": 0.6774193548387096,
271
+ "eval_loss": 0.9142343401908875,
272
+ "eval_runtime": 2.1263,
273
+ "eval_samples_per_second": 14.579,
274
+ "eval_steps_per_second": 3.762,
275
  "step": 375
276
  },
277
  {
278
  "epoch": 5.0,
279
  "learning_rate": 4.9925925925925926e-05,
280
+ "loss": 0.2887,
281
  "step": 380
282
  },
283
  {
284
  "epoch": 5.0,
285
  "learning_rate": 4.977777777777778e-05,
286
+ "loss": 1.0616,
287
  "step": 390
288
  },
289
  {
290
  "epoch": 5.01,
291
  "learning_rate": 4.962962962962963e-05,
292
+ "loss": 0.2231,
293
  "step": 400
294
  },
295
  {
296
  "epoch": 5.01,
297
  "learning_rate": 4.9481481481481485e-05,
298
+ "loss": 0.4401,
299
  "step": 410
300
  },
301
  {
302
  "epoch": 5.01,
303
  "learning_rate": 4.933333333333334e-05,
304
+ "loss": 0.1261,
305
  "step": 420
306
  },
307
  {
308
  "epoch": 5.01,
309
  "learning_rate": 4.918518518518519e-05,
310
+ "loss": 0.655,
311
  "step": 430
312
  },
313
  {
314
  "epoch": 5.02,
315
  "learning_rate": 4.903703703703704e-05,
316
+ "loss": 0.4523,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 5.02,
321
  "learning_rate": 4.888888888888889e-05,
322
+ "loss": 0.5186,
323
  "step": 450
324
  },
325
  {
326
  "epoch": 5.02,
327
+ "eval_accuracy": 0.7741935483870968,
328
+ "eval_loss": 0.7383989095687866,
329
+ "eval_runtime": 2.1403,
330
+ "eval_samples_per_second": 14.484,
331
+ "eval_steps_per_second": 3.738,
332
  "step": 450
333
  },
334
  {
335
  "epoch": 6.0,
336
  "learning_rate": 4.874074074074074e-05,
337
+ "loss": 0.1326,
338
  "step": 460
339
  },
340
  {
341
  "epoch": 6.01,
342
  "learning_rate": 4.8592592592592596e-05,
343
+ "loss": 0.0233,
344
  "step": 470
345
  },
346
  {
347
  "epoch": 6.01,
348
  "learning_rate": 4.844444444444445e-05,
349
+ "loss": 0.3254,
350
  "step": 480
351
  },
352
  {
353
  "epoch": 6.01,
354
  "learning_rate": 4.82962962962963e-05,
355
+ "loss": 0.2502,
356
  "step": 490
357
  },
358
  {
359
  "epoch": 6.01,
360
  "learning_rate": 4.814814814814815e-05,
361
+ "loss": 0.1076,
362
  "step": 500
363
  },
364
  {
365
  "epoch": 6.02,
366
  "learning_rate": 4.8e-05,
367
+ "loss": 0.5042,
368
  "step": 510
369
  },
370
  {
371
  "epoch": 6.02,
372
  "learning_rate": 4.7851851851851854e-05,
373
+ "loss": 0.3714,
374
  "step": 520
375
  },
376
  {
377
  "epoch": 6.02,
378
+ "eval_accuracy": 0.7741935483870968,
379
+ "eval_loss": 1.1661574840545654,
380
+ "eval_runtime": 2.1688,
381
+ "eval_samples_per_second": 14.293,
382
+ "eval_steps_per_second": 3.689,
383
  "step": 525
384
  },
385
  {
386
  "epoch": 7.0,
387
  "learning_rate": 4.770370370370371e-05,
388
+ "loss": 0.8849,
389
  "step": 530
390
  },
391
  {
392
  "epoch": 7.0,
393
  "learning_rate": 4.755555555555556e-05,
394
+ "loss": 0.3047,
395
  "step": 540
396
  },
397
  {
398
  "epoch": 7.01,
399
  "learning_rate": 4.740740740740741e-05,
400
+ "loss": 0.4302,
401
  "step": 550
402
  },
403
  {
404
  "epoch": 7.01,
405
  "learning_rate": 4.7259259259259266e-05,
406
+ "loss": 0.454,
407
  "step": 560
408
  },
409
  {
410
  "epoch": 7.01,
411
  "learning_rate": 4.711111111111111e-05,
412
+ "loss": 0.1688,
413
  "step": 570
414
  },
415
  {
416
  "epoch": 7.01,
417
  "learning_rate": 4.6962962962962966e-05,
418
+ "loss": 0.1852,
419
  "step": 580
420
  },
421
  {
422
  "epoch": 7.02,
423
  "learning_rate": 4.681481481481482e-05,
424
+ "loss": 0.0478,
425
  "step": 590
426
  },
427
  {
428
  "epoch": 7.02,
429
  "learning_rate": 4.666666666666667e-05,
430
+ "loss": 0.0263,
431
  "step": 600
432
  },
433
  {
434
  "epoch": 7.02,
435
+ "eval_accuracy": 0.8064516129032258,
436
+ "eval_loss": 0.910193681716919,
437
+ "eval_runtime": 2.227,
438
+ "eval_samples_per_second": 13.92,
439
+ "eval_steps_per_second": 3.592,
440
  "step": 600
441
  },
442
  {
443
  "epoch": 8.0,
444
  "learning_rate": 4.6518518518518525e-05,
445
+ "loss": 0.0189,
446
  "step": 610
447
  },
448
  {
449
  "epoch": 8.01,
450
  "learning_rate": 4.637037037037038e-05,
451
+ "loss": 0.0965,
452
  "step": 620
453
  },
454
  {
455
  "epoch": 8.01,
456
  "learning_rate": 4.6222222222222224e-05,
457
+ "loss": 0.0689,
458
  "step": 630
459
  },
460
  {
461
  "epoch": 8.01,
462
  "learning_rate": 4.607407407407408e-05,
463
+ "loss": 0.0171,
464
  "step": 640
465
  },
466
  {
467
  "epoch": 8.01,
468
  "learning_rate": 4.592592592592593e-05,
469
+ "loss": 0.281,
470
  "step": 650
471
  },
472
  {
473
  "epoch": 8.02,
474
  "learning_rate": 4.577777777777778e-05,
475
+ "loss": 0.2834,
476
  "step": 660
477
  },
478
  {
479
  "epoch": 8.02,
480
  "learning_rate": 4.5629629629629636e-05,
481
+ "loss": 0.0848,
482
  "step": 670
483
  },
484
  {
485
  "epoch": 8.02,
486
+ "eval_accuracy": 0.9354838709677419,
487
+ "eval_loss": 0.12099169194698334,
488
+ "eval_runtime": 2.1891,
489
+ "eval_samples_per_second": 14.161,
490
+ "eval_steps_per_second": 3.654,
491
  "step": 675
492
  },
493
  {
494
  "epoch": 9.0,
495
  "learning_rate": 4.548148148148149e-05,
496
+ "loss": 0.009,
497
  "step": 680
498
  },
499
  {
500
  "epoch": 9.0,
501
  "learning_rate": 4.5333333333333335e-05,
502
+ "loss": 0.1009,
503
  "step": 690
504
  },
505
  {
506
  "epoch": 9.01,
507
  "learning_rate": 4.518518518518519e-05,
508
+ "loss": 0.1219,
509
  "step": 700
510
  },
511
  {
512
  "epoch": 9.01,
513
  "learning_rate": 4.503703703703704e-05,
514
+ "loss": 0.0463,
515
  "step": 710
516
  },
517
  {
518
  "epoch": 9.01,
519
  "learning_rate": 4.4888888888888894e-05,
520
+ "loss": 0.0083,
521
  "step": 720
522
  },
523
  {
524
  "epoch": 9.01,
525
  "learning_rate": 4.474074074074075e-05,
526
+ "loss": 0.0036,
527
  "step": 730
528
  },
529
  {
530
  "epoch": 9.02,
531
  "learning_rate": 4.4592592592592594e-05,
532
+ "loss": 0.0295,
533
  "step": 740
534
  },
535
  {
536
  "epoch": 9.02,
537
  "learning_rate": 4.4444444444444447e-05,
538
+ "loss": 0.0028,
539
  "step": 750
540
  },
541
  {
542
  "epoch": 9.02,
543
+ "eval_accuracy": 0.9354838709677419,
544
+ "eval_loss": 0.2373807281255722,
545
+ "eval_runtime": 2.1663,
546
+ "eval_samples_per_second": 14.31,
547
+ "eval_steps_per_second": 3.693,
548
  "step": 750
549
  },
550
  {
551
  "epoch": 10.0,
552
  "learning_rate": 4.42962962962963e-05,
553
+ "loss": 0.005,
554
  "step": 760
555
  },
556
  {
557
  "epoch": 10.01,
558
  "learning_rate": 4.414814814814815e-05,
559
+ "loss": 0.0043,
560
  "step": 770
561
  },
562
  {
563
  "epoch": 10.01,
564
  "learning_rate": 4.4000000000000006e-05,
565
+ "loss": 0.0094,
566
  "step": 780
567
  },
568
  {
 
574
  {
575
  "epoch": 10.01,
576
  "learning_rate": 4.3703703703703705e-05,
577
+ "loss": 0.0051,
578
  "step": 800
579
  },
580
  {
581
  "epoch": 10.02,
582
  "learning_rate": 4.355555555555556e-05,
583
+ "loss": 0.0024,
584
  "step": 810
585
  },
586
  {
587
  "epoch": 10.02,
588
  "learning_rate": 4.340740740740741e-05,
589
+ "loss": 0.0863,
590
  "step": 820
591
  },
592
  {
593
  "epoch": 10.02,
594
+ "eval_accuracy": 0.967741935483871,
595
+ "eval_loss": 0.17276477813720703,
596
+ "eval_runtime": 2.132,
597
+ "eval_samples_per_second": 14.54,
598
+ "eval_steps_per_second": 3.752,
599
  "step": 825
600
  },
601
  {
602
  "epoch": 11.0,
603
  "learning_rate": 4.325925925925926e-05,
604
+ "loss": 0.0026,
605
  "step": 830
606
  },
607
  {
608
  "epoch": 11.0,
609
  "learning_rate": 4.311111111111111e-05,
610
+ "loss": 0.0022,
611
  "step": 840
612
  },
613
  {
 
619
  {
620
  "epoch": 11.01,
621
  "learning_rate": 4.2814814814814816e-05,
622
+ "loss": 0.002,
623
  "step": 860
624
  },
625
  {
626
  "epoch": 11.01,
627
  "learning_rate": 4.266666666666667e-05,
628
+ "loss": 0.002,
629
  "step": 870
630
  },
631
  {
632
  "epoch": 11.01,
633
  "learning_rate": 4.2518518518518515e-05,
634
+ "loss": 0.0033,
635
  "step": 880
636
  },
637
  {
638
  "epoch": 11.02,
639
  "learning_rate": 4.237037037037037e-05,
640
+ "loss": 0.1644,
641
  "step": 890
642
  },
643
  {
644
  "epoch": 11.02,
645
  "learning_rate": 4.222222222222222e-05,
646
+ "loss": 0.0018,
647
  "step": 900
648
  },
649
  {
650
  "epoch": 11.02,
651
+ "eval_accuracy": 0.9354838709677419,
652
+ "eval_loss": 0.3042505085468292,
653
+ "eval_runtime": 2.1005,
654
+ "eval_samples_per_second": 14.759,
655
+ "eval_steps_per_second": 3.809,
656
  "step": 900
657
  },
658
  {
659
  "epoch": 12.0,
660
  "learning_rate": 4.2074074074074075e-05,
661
+ "loss": 0.002,
662
  "step": 910
663
  },
664
  {
665
  "epoch": 12.01,
666
  "learning_rate": 4.192592592592593e-05,
667
+ "loss": 0.0018,
668
  "step": 920
669
  },
670
  {
671
  "epoch": 12.01,
672
  "learning_rate": 4.177777777777778e-05,
673
+ "loss": 0.0043,
674
  "step": 930
675
  },
676
  {
677
  "epoch": 12.01,
678
  "learning_rate": 4.162962962962963e-05,
679
+ "loss": 0.0028,
680
  "step": 940
681
  },
682
  {
683
  "epoch": 12.01,
684
  "learning_rate": 4.148148148148148e-05,
685
+ "loss": 0.002,
686
  "step": 950
687
  },
688
  {
689
  "epoch": 12.02,
690
  "learning_rate": 4.133333333333333e-05,
691
+ "loss": 0.0027,
692
  "step": 960
693
  },
694
  {
695
  "epoch": 12.02,
696
  "learning_rate": 4.1185185185185186e-05,
697
+ "loss": 0.1662,
698
  "step": 970
699
  },
700
  {
701
  "epoch": 12.02,
702
+ "eval_accuracy": 0.9032258064516129,
703
+ "eval_loss": 0.42439064383506775,
704
+ "eval_runtime": 2.1089,
705
+ "eval_samples_per_second": 14.699,
706
+ "eval_steps_per_second": 3.793,
707
  "step": 975
708
  },
709
  {
710
  "epoch": 13.0,
711
  "learning_rate": 4.103703703703704e-05,
712
+ "loss": 0.0018,
713
  "step": 980
714
  },
715
  {
716
  "epoch": 13.0,
717
  "learning_rate": 4.088888888888889e-05,
718
+ "loss": 0.041,
719
  "step": 990
720
  },
721
  {
 
727
  {
728
  "epoch": 13.01,
729
  "learning_rate": 4.059259259259259e-05,
730
+ "loss": 0.0107,
731
  "step": 1010
732
  },
733
  {
734
  "epoch": 13.01,
735
  "learning_rate": 4.0444444444444444e-05,
736
+ "loss": 0.0019,
737
  "step": 1020
738
  },
739
  {
740
  "epoch": 13.01,
741
  "learning_rate": 4.02962962962963e-05,
742
+ "loss": 0.0016,
743
  "step": 1030
744
  },
745
  {
746
  "epoch": 13.02,
747
  "learning_rate": 4.014814814814815e-05,
748
+ "loss": 0.0016,
749
  "step": 1040
750
  },
751
  {
752
  "epoch": 13.02,
753
  "learning_rate": 4e-05,
754
+ "loss": 0.0132,
755
  "step": 1050
756
  },
757
  {
758
  "epoch": 13.02,
759
+ "eval_accuracy": 0.8709677419354839,
760
+ "eval_loss": 0.6965717673301697,
761
+ "eval_runtime": 2.0555,
762
+ "eval_samples_per_second": 15.082,
763
+ "eval_steps_per_second": 3.892,
764
  "step": 1050
765
  },
766
  {
 
772
  {
773
  "epoch": 14.01,
774
  "learning_rate": 3.97037037037037e-05,
775
+ "loss": 0.0444,
776
  "step": 1070
777
  },
778
  {
779
  "epoch": 14.01,
780
  "learning_rate": 3.9555555555555556e-05,
781
+ "loss": 0.0015,
782
  "step": 1080
783
  },
784
  {
785
  "epoch": 14.01,
786
  "learning_rate": 3.940740740740741e-05,
787
+ "loss": 0.0015,
788
  "step": 1090
789
  },
790
  {
791
  "epoch": 14.01,
792
  "learning_rate": 3.925925925925926e-05,
793
+ "loss": 0.0014,
794
  "step": 1100
795
  },
796
  {
 
802
  {
803
  "epoch": 14.02,
804
  "learning_rate": 3.896296296296296e-05,
805
+ "loss": 0.0019,
806
  "step": 1120
807
  },
808
  {
809
  "epoch": 14.02,
810
+ "eval_accuracy": 0.8709677419354839,
811
+ "eval_loss": 0.5602208971977234,
812
+ "eval_runtime": 2.0688,
813
+ "eval_samples_per_second": 14.985,
814
+ "eval_steps_per_second": 3.867,
815
  "step": 1125
816
  },
817
  {
818
  "epoch": 15.0,
819
  "learning_rate": 3.8814814814814814e-05,
820
+ "loss": 0.0013,
821
  "step": 1130
822
  },
823
  {
824
  "epoch": 15.0,
825
  "learning_rate": 3.866666666666667e-05,
826
+ "loss": 0.2104,
827
  "step": 1140
828
  },
829
  {
830
  "epoch": 15.01,
831
  "learning_rate": 3.851851851851852e-05,
832
+ "loss": 0.0014,
833
  "step": 1150
834
  },
835
  {
836
  "epoch": 15.01,
837
  "learning_rate": 3.837037037037037e-05,
838
+ "loss": 0.0015,
839
  "step": 1160
840
  },
841
  {
842
  "epoch": 15.01,
843
  "learning_rate": 3.8222222222222226e-05,
844
+ "loss": 0.0011,
845
  "step": 1170
846
  },
847
  {
848
  "epoch": 15.01,
849
  "learning_rate": 3.807407407407408e-05,
850
+ "loss": 0.0013,
851
  "step": 1180
852
  },
853
  {
854
  "epoch": 15.02,
855
  "learning_rate": 3.7925925925925925e-05,
856
+ "loss": 0.0011,
857
  "step": 1190
858
  },
859
  {
860
  "epoch": 15.02,
861
  "learning_rate": 3.777777777777778e-05,
862
+ "loss": 0.0012,
863
  "step": 1200
864
  },
865
  {
866
  "epoch": 15.02,
867
+ "eval_accuracy": 0.8709677419354839,
868
+ "eval_loss": 0.26489391922950745,
869
+ "eval_runtime": 2.1064,
870
+ "eval_samples_per_second": 14.717,
871
+ "eval_steps_per_second": 3.798,
872
  "step": 1200
873
  },
874
  {
 
880
  {
881
  "epoch": 16.01,
882
  "learning_rate": 3.7481481481481484e-05,
883
+ "loss": 0.0014,
884
  "step": 1220
885
  },
886
  {
887
  "epoch": 16.01,
888
  "learning_rate": 3.733333333333334e-05,
889
+ "loss": 0.0012,
890
  "step": 1230
891
  },
892
  {
893
  "epoch": 16.01,
894
  "learning_rate": 3.718518518518519e-05,
895
+ "loss": 0.0011,
896
  "step": 1240
897
  },
898
  {
899
  "epoch": 16.01,
900
  "learning_rate": 3.7037037037037037e-05,
901
+ "loss": 0.0013,
902
  "step": 1250
903
  },
904
  {
905
  "epoch": 16.02,
906
  "learning_rate": 3.688888888888889e-05,
907
+ "loss": 0.003,
908
  "step": 1260
909
  },
910
  {
911
  "epoch": 16.02,
912
  "learning_rate": 3.674074074074074e-05,
913
+ "loss": 0.0061,
914
  "step": 1270
915
  },
916
  {
917
  "epoch": 16.02,
918
+ "eval_accuracy": 0.8709677419354839,
919
+ "eval_loss": 0.7361043095588684,
920
+ "eval_runtime": 2.0949,
921
+ "eval_samples_per_second": 14.798,
922
+ "eval_steps_per_second": 3.819,
923
  "step": 1275
924
  },
925
  {
926
  "epoch": 17.0,
927
  "learning_rate": 3.6592592592592596e-05,
928
+ "loss": 0.1429,
929
  "step": 1280
930
  },
931
  {
932
  "epoch": 17.0,
933
  "learning_rate": 3.644444444444445e-05,
934
+ "loss": 0.0012,
935
  "step": 1290
936
  },
937
  {
938
  "epoch": 17.01,
939
  "learning_rate": 3.62962962962963e-05,
940
+ "loss": 0.0011,
941
  "step": 1300
942
  },
943
  {
944
  "epoch": 17.01,
945
  "learning_rate": 3.614814814814815e-05,
946
+ "loss": 0.2073,
947
  "step": 1310
948
  },
949
  {
950
  "epoch": 17.01,
951
  "learning_rate": 3.6e-05,
952
+ "loss": 0.0425,
953
  "step": 1320
954
  },
955
  {
956
  "epoch": 17.01,
957
  "learning_rate": 3.5851851851851854e-05,
958
+ "loss": 0.0014,
959
  "step": 1330
960
  },
961
  {
962
  "epoch": 17.02,
963
  "learning_rate": 3.570370370370371e-05,
964
+ "loss": 0.0016,
965
  "step": 1340
966
  },
967
  {
968
  "epoch": 17.02,
969
  "learning_rate": 3.555555555555556e-05,
970
+ "loss": 0.0012,
971
  "step": 1350
972
  },
973
  {
974
  "epoch": 17.02,
975
+ "eval_accuracy": 0.9354838709677419,
976
+ "eval_loss": 0.2821272313594818,
977
+ "eval_runtime": 2.1139,
978
+ "eval_samples_per_second": 14.665,
979
+ "eval_steps_per_second": 3.784,
980
  "step": 1350
981
  },
982
  {
983
  "epoch": 18.0,
984
  "learning_rate": 3.540740740740741e-05,
985
+ "loss": 0.0011,
986
  "step": 1360
987
  },
988
  {
989
  "epoch": 18.01,
990
  "learning_rate": 3.525925925925926e-05,
991
+ "loss": 0.0951,
992
  "step": 1370
993
  },
994
  {
995
  "epoch": 18.01,
996
  "learning_rate": 3.511111111111111e-05,
997
+ "loss": 0.0011,
998
  "step": 1380
999
  },
1000
  {
1001
  "epoch": 18.01,
1002
  "learning_rate": 3.4962962962962965e-05,
1003
+ "loss": 0.0013,
1004
  "step": 1390
1005
  },
1006
  {
1007
  "epoch": 18.01,
1008
  "learning_rate": 3.481481481481482e-05,
1009
+ "loss": 0.001,
1010
  "step": 1400
1011
  },
1012
  {
1013
  "epoch": 18.02,
1014
  "learning_rate": 3.466666666666667e-05,
1015
+ "loss": 0.0094,
1016
  "step": 1410
1017
  },
1018
  {
 
1023
  },
1024
  {
1025
  "epoch": 18.02,
1026
+ "eval_accuracy": 0.9354838709677419,
1027
+ "eval_loss": 0.3117120563983917,
1028
+ "eval_runtime": 2.0573,
1029
+ "eval_samples_per_second": 15.068,
1030
+ "eval_steps_per_second": 3.889,
1031
  "step": 1425
1032
  },
1033
  {
1034
  "epoch": 19.0,
1035
  "learning_rate": 3.437037037037037e-05,
1036
+ "loss": 0.001,
1037
  "step": 1430
1038
  },
1039
  {
1040
  "epoch": 19.0,
1041
  "learning_rate": 3.4222222222222224e-05,
1042
+ "loss": 0.001,
1043
  "step": 1440
1044
  },
1045
  {
1046
  "epoch": 19.01,
1047
  "learning_rate": 3.4074074074074077e-05,
1048
+ "loss": 0.0147,
1049
  "step": 1450
1050
  },
1051
  {
1052
  "epoch": 19.01,
1053
  "learning_rate": 3.392592592592593e-05,
1054
+ "loss": 0.0013,
1055
  "step": 1460
1056
  },
1057
  {
1058
  "epoch": 19.01,
1059
  "learning_rate": 3.377777777777778e-05,
1060
+ "loss": 0.063,
1061
  "step": 1470
1062
  },
1063
  {
1064
  "epoch": 19.01,
1065
  "learning_rate": 3.3629629629629636e-05,
1066
+ "loss": 0.1805,
1067
  "step": 1480
1068
  },
1069
  {
1070
  "epoch": 19.02,
1071
  "learning_rate": 3.348148148148148e-05,
1072
+ "loss": 0.1541,
1073
  "step": 1490
1074
  },
1075
  {
1076
  "epoch": 19.02,
1077
  "learning_rate": 3.3333333333333335e-05,
1078
+ "loss": 0.001,
1079
  "step": 1500
1080
  },
1081
  {
1082
  "epoch": 19.02,
1083
+ "eval_accuracy": 0.967741935483871,
1084
+ "eval_loss": 0.11934669315814972,
1085
+ "eval_runtime": 2.125,
1086
+ "eval_samples_per_second": 14.588,
1087
+ "eval_steps_per_second": 3.765,
1088
  "step": 1500
1089
  },
1090
  {
1091
  "epoch": 20.0,
1092
  "learning_rate": 3.318518518518519e-05,
1093
+ "loss": 0.0022,
1094
  "step": 1510
1095
  },
1096
  {
1097
  "epoch": 20.01,
1098
  "learning_rate": 3.303703703703704e-05,
1099
+ "loss": 0.0009,
1100
  "step": 1520
1101
  },
1102
  {
1103
  "epoch": 20.01,
1104
  "learning_rate": 3.2888888888888894e-05,
1105
+ "loss": 0.0016,
1106
  "step": 1530
1107
  },
1108
  {
1109
  "epoch": 20.01,
1110
  "learning_rate": 3.274074074074075e-05,
1111
+ "loss": 0.1235,
1112
  "step": 1540
1113
  },
1114
  {
1115
  "epoch": 20.01,
1116
  "learning_rate": 3.25925925925926e-05,
1117
+ "loss": 0.0055,
1118
  "step": 1550
1119
  },
1120
  {
1121
  "epoch": 20.02,
1122
  "learning_rate": 3.2444444444444446e-05,
1123
+ "loss": 0.001,
1124
  "step": 1560
1125
  },
1126
  {
1127
  "epoch": 20.02,
1128
  "learning_rate": 3.22962962962963e-05,
1129
+ "loss": 0.0021,
1130
  "step": 1570
1131
  },
1132
  {
1133
  "epoch": 20.02,
1134
+ "eval_accuracy": 0.9354838709677419,
1135
+ "eval_loss": 0.24128369987010956,
1136
+ "eval_runtime": 2.0867,
1137
+ "eval_samples_per_second": 14.856,
1138
+ "eval_steps_per_second": 3.834,
1139
  "step": 1575
1140
  },
1141
  {
1142
  "epoch": 21.0,
1143
  "learning_rate": 3.214814814814815e-05,
1144
+ "loss": 0.001,
1145
  "step": 1580
1146
  },
1147
  {
1148
  "epoch": 21.0,
1149
  "learning_rate": 3.2000000000000005e-05,
1150
+ "loss": 0.0009,
1151
  "step": 1590
1152
  },
1153
  {
1154
  "epoch": 21.01,
1155
  "learning_rate": 3.185185185185185e-05,
1156
+ "loss": 0.0012,
1157
  "step": 1600
1158
  },
1159
  {
 
1165
  {
1166
  "epoch": 21.01,
1167
  "learning_rate": 3.155555555555556e-05,
1168
+ "loss": 0.0009,
1169
  "step": 1620
1170
  },
1171
  {
1172
  "epoch": 21.01,
1173
  "learning_rate": 3.140740740740741e-05,
1174
+ "loss": 0.0008,
1175
  "step": 1630
1176
  },
1177
  {
1178
  "epoch": 21.02,
1179
  "learning_rate": 3.1259259259259264e-05,
1180
+ "loss": 0.0009,
1181
  "step": 1640
1182
  },
1183
  {
1184
  "epoch": 21.02,
1185
  "learning_rate": 3.111111111111111e-05,
1186
+ "loss": 0.0009,
1187
  "step": 1650
1188
  },
1189
  {
1190
  "epoch": 21.02,
1191
+ "eval_accuracy": 0.967741935483871,
1192
+ "eval_loss": 0.1641165167093277,
1193
+ "eval_runtime": 2.0769,
1194
+ "eval_samples_per_second": 14.926,
1195
+ "eval_steps_per_second": 3.852,
1196
  "step": 1650
1197
  },
1198
  {
1199
  "epoch": 22.0,
1200
  "learning_rate": 3.096296296296296e-05,
1201
+ "loss": 0.0009,
1202
  "step": 1660
1203
  },
1204
  {
1205
  "epoch": 22.01,
1206
  "learning_rate": 3.0814814814814816e-05,
1207
+ "loss": 0.0289,
1208
  "step": 1670
1209
  },
1210
  {
1211
  "epoch": 22.01,
1212
  "learning_rate": 3.066666666666667e-05,
1213
+ "loss": 0.0008,
1214
  "step": 1680
1215
  },
1216
  {
1217
  "epoch": 22.01,
1218
  "learning_rate": 3.0518518518518515e-05,
1219
+ "loss": 0.0008,
1220
  "step": 1690
1221
  },
1222
  {
1223
  "epoch": 22.01,
1224
  "learning_rate": 3.037037037037037e-05,
1225
+ "loss": 0.0008,
1226
  "step": 1700
1227
  },
1228
  {
 
1234
  {
1235
  "epoch": 22.02,
1236
  "learning_rate": 3.0074074074074078e-05,
1237
+ "loss": 0.0016,
1238
  "step": 1720
1239
  },
1240
  {
1241
  "epoch": 22.02,
1242
+ "eval_accuracy": 0.967741935483871,
1243
+ "eval_loss": 0.1332567036151886,
1244
+ "eval_runtime": 2.0589,
1245
+ "eval_samples_per_second": 15.057,
1246
+ "eval_steps_per_second": 3.886,
1247
  "step": 1725
1248
  },
1249
  {
1250
  "epoch": 23.0,
1251
  "learning_rate": 2.992592592592593e-05,
1252
+ "loss": 0.0008,
1253
  "step": 1730
1254
  },
1255
  {
 
1261
  {
1262
  "epoch": 23.01,
1263
  "learning_rate": 2.962962962962963e-05,
1264
+ "loss": 0.0009,
1265
  "step": 1750
1266
  },
1267
  {
1268
  "epoch": 23.01,
1269
  "learning_rate": 2.9481481481481483e-05,
1270
+ "loss": 0.0008,
1271
  "step": 1760
1272
  },
1273
  {
1274
  "epoch": 23.01,
1275
  "learning_rate": 2.9333333333333336e-05,
1276
+ "loss": 0.0009,
1277
  "step": 1770
1278
  },
1279
  {
1280
  "epoch": 23.01,
1281
  "learning_rate": 2.918518518518519e-05,
1282
+ "loss": 0.0007,
1283
  "step": 1780
1284
  },
1285
  {
1286
  "epoch": 23.02,
1287
  "learning_rate": 2.9037037037037042e-05,
1288
+ "loss": 0.0007,
1289
  "step": 1790
1290
  },
1291
  {
1292
  "epoch": 23.02,
1293
  "learning_rate": 2.8888888888888888e-05,
1294
+ "loss": 0.0007,
1295
  "step": 1800
1296
  },
1297
  {
1298
  "epoch": 23.02,
1299
+ "eval_accuracy": 0.967741935483871,
1300
+ "eval_loss": 0.10598693042993546,
1301
+ "eval_runtime": 2.0414,
1302
+ "eval_samples_per_second": 15.186,
1303
+ "eval_steps_per_second": 3.919,
1304
  "step": 1800
1305
  },
1306
  {
1307
  "epoch": 24.0,
1308
  "learning_rate": 2.874074074074074e-05,
1309
+ "loss": 0.0007,
1310
  "step": 1810
1311
  },
1312
  {
1313
  "epoch": 24.01,
1314
  "learning_rate": 2.8592592592592594e-05,
1315
+ "loss": 0.0007,
1316
  "step": 1820
1317
  },
1318
  {
1319
  "epoch": 24.01,
1320
  "learning_rate": 2.8444444444444447e-05,
1321
+ "loss": 0.0007,
1322
  "step": 1830
1323
  },
1324
  {
1325
  "epoch": 24.01,
1326
  "learning_rate": 2.8296296296296297e-05,
1327
+ "loss": 0.0007,
1328
  "step": 1840
1329
  },
1330
  {
 
1336
  {
1337
  "epoch": 24.02,
1338
  "learning_rate": 2.8000000000000003e-05,
1339
+ "loss": 0.0006,
1340
  "step": 1860
1341
  },
1342
  {
1343
  "epoch": 24.02,
1344
  "learning_rate": 2.7851851851851853e-05,
1345
+ "loss": 0.0008,
1346
  "step": 1870
1347
  },
1348
  {
1349
  "epoch": 24.02,
1350
+ "eval_accuracy": 0.967741935483871,
1351
+ "eval_loss": 0.1111554503440857,
1352
+ "eval_runtime": 2.0546,
1353
+ "eval_samples_per_second": 15.088,
1354
+ "eval_steps_per_second": 3.894,
1355
  "step": 1875
1356
  },
1357
  {
 
1369
  {
1370
  "epoch": 25.01,
1371
  "learning_rate": 2.7407407407407408e-05,
1372
+ "loss": 0.0007,
1373
  "step": 1900
1374
  },
1375
  {
1376
  "epoch": 25.01,
1377
  "learning_rate": 2.725925925925926e-05,
1378
+ "loss": 0.0007,
1379
  "step": 1910
1380
  },
1381
  {
1382
  "epoch": 25.01,
1383
  "learning_rate": 2.7111111111111114e-05,
1384
+ "loss": 0.0006,
1385
  "step": 1920
1386
  },
1387
  {
1388
  "epoch": 25.01,
1389
  "learning_rate": 2.696296296296296e-05,
1390
+ "loss": 0.0006,
1391
  "step": 1930
1392
  },
1393
  {
1394
  "epoch": 25.02,
1395
  "learning_rate": 2.6814814814814814e-05,
1396
+ "loss": 0.0006,
1397
  "step": 1940
1398
  },
1399
  {
1400
  "epoch": 25.02,
1401
  "learning_rate": 2.6666666666666667e-05,
1402
+ "loss": 0.0006,
1403
  "step": 1950
1404
  },
1405
  {
1406
  "epoch": 25.02,
1407
+ "eval_accuracy": 1.0,
1408
+ "eval_loss": 0.01604604721069336,
1409
+ "eval_runtime": 2.0655,
1410
+ "eval_samples_per_second": 15.008,
1411
+ "eval_steps_per_second": 3.873,
1412
  "step": 1950
1413
  },
1414
  {
1415
  "epoch": 26.0,
1416
  "learning_rate": 2.651851851851852e-05,
1417
+ "loss": 0.0006,
1418
  "step": 1960
1419
  },
1420
  {
1421
  "epoch": 26.01,
1422
  "learning_rate": 2.6370370370370373e-05,
1423
+ "loss": 0.0006,
1424
  "step": 1970
1425
  },
1426
  {
1427
  "epoch": 26.01,
1428
  "learning_rate": 2.6222222222222226e-05,
1429
+ "loss": 0.0006,
1430
  "step": 1980
1431
  },
1432
  {
1433
  "epoch": 26.01,
1434
  "learning_rate": 2.6074074074074072e-05,
1435
+ "loss": 0.0006,
1436
  "step": 1990
1437
  },
1438
  {
1439
  "epoch": 26.01,
1440
  "learning_rate": 2.5925925925925925e-05,
1441
+ "loss": 0.0006,
1442
  "step": 2000
1443
  },
1444
  {
1445
  "epoch": 26.02,
1446
  "learning_rate": 2.5777777777777778e-05,
1447
+ "loss": 0.0006,
1448
  "step": 2010
1449
  },
1450
  {
1451
  "epoch": 26.02,
1452
  "learning_rate": 2.562962962962963e-05,
1453
+ "loss": 0.0006,
1454
  "step": 2020
1455
  },
1456
  {
1457
  "epoch": 26.02,
1458
+ "eval_accuracy": 0.967741935483871,
1459
+ "eval_loss": 0.029128754511475563,
1460
+ "eval_runtime": 2.1074,
1461
+ "eval_samples_per_second": 14.71,
1462
+ "eval_steps_per_second": 3.796,
1463
  "step": 2025
1464
  },
1465
  {
 
1471
  {
1472
  "epoch": 27.0,
1473
  "learning_rate": 2.5333333333333337e-05,
1474
+ "loss": 0.0006,
1475
  "step": 2040
1476
  },
1477
  {
1478
  "epoch": 27.01,
1479
  "learning_rate": 2.5185185185185183e-05,
1480
+ "loss": 0.0006,
1481
  "step": 2050
1482
  },
1483
  {
 
1489
  {
1490
  "epoch": 27.01,
1491
  "learning_rate": 2.488888888888889e-05,
1492
+ "loss": 0.0006,
1493
  "step": 2070
1494
  },
1495
  {
1496
  "epoch": 27.01,
1497
  "learning_rate": 2.4740740740740742e-05,
1498
+ "loss": 0.0006,
1499
  "step": 2080
1500
  },
1501
  {
1502
  "epoch": 27.02,
1503
  "learning_rate": 2.4592592592592595e-05,
1504
+ "loss": 0.0006,
1505
  "step": 2090
1506
  },
1507
  {
1508
  "epoch": 27.02,
1509
  "learning_rate": 2.4444444444444445e-05,
1510
+ "loss": 0.0006,
1511
  "step": 2100
1512
  },
1513
  {
1514
  "epoch": 27.02,
1515
+ "eval_accuracy": 0.967741935483871,
1516
+ "eval_loss": 0.08601882308721542,
1517
+ "eval_runtime": 2.1685,
1518
+ "eval_samples_per_second": 14.296,
1519
+ "eval_steps_per_second": 3.689,
1520
  "step": 2100
1521
  },
1522
  {
1523
  "epoch": 28.0,
1524
  "learning_rate": 2.4296296296296298e-05,
1525
+ "loss": 0.0006,
1526
  "step": 2110
1527
  },
1528
  {
1529
  "epoch": 28.01,
1530
  "learning_rate": 2.414814814814815e-05,
1531
+ "loss": 0.0006,
1532
  "step": 2120
1533
  },
1534
  {
1535
  "epoch": 28.01,
1536
  "learning_rate": 2.4e-05,
1537
+ "loss": 0.0006,
1538
  "step": 2130
1539
  },
1540
  {
1541
  "epoch": 28.01,
1542
  "learning_rate": 2.3851851851851854e-05,
1543
+ "loss": 0.0006,
1544
  "step": 2140
1545
  },
1546
  {
1547
  "epoch": 28.01,
1548
  "learning_rate": 2.3703703703703707e-05,
1549
+ "loss": 0.0006,
1550
  "step": 2150
1551
  },
1552
  {
 
1558
  {
1559
  "epoch": 28.02,
1560
  "learning_rate": 2.340740740740741e-05,
1561
+ "loss": 0.0005,
1562
  "step": 2170
1563
  },
1564
  {
1565
  "epoch": 28.02,
1566
+ "eval_accuracy": 0.967741935483871,
1567
+ "eval_loss": 0.10801166296005249,
1568
+ "eval_runtime": 2.1529,
1569
+ "eval_samples_per_second": 14.399,
1570
+ "eval_steps_per_second": 3.716,
1571
  "step": 2175
1572
  },
1573
  {
 
1579
  {
1580
  "epoch": 29.0,
1581
  "learning_rate": 2.3111111111111112e-05,
1582
+ "loss": 0.0005,
1583
  "step": 2190
1584
  },
1585
  {
1586
  "epoch": 29.01,
1587
  "learning_rate": 2.2962962962962965e-05,
1588
+ "loss": 0.0006,
1589
  "step": 2200
1590
  },
1591
  {
1592
  "epoch": 29.01,
1593
  "learning_rate": 2.2814814814814818e-05,
1594
+ "loss": 0.0006,
1595
  "step": 2210
1596
  },
1597
  {
1598
  "epoch": 29.01,
1599
  "learning_rate": 2.2666666666666668e-05,
1600
+ "loss": 0.0005,
1601
  "step": 2220
1602
  },
1603
  {
 
1609
  {
1610
  "epoch": 29.02,
1611
  "learning_rate": 2.2370370370370374e-05,
1612
+ "loss": 0.0005,
1613
  "step": 2240
1614
  },
1615
  {
1616
  "epoch": 29.02,
1617
  "learning_rate": 2.2222222222222223e-05,
1618
+ "loss": 0.0006,
1619
  "step": 2250
1620
  },
1621
  {
1622
  "epoch": 29.02,
1623
+ "eval_accuracy": 0.967741935483871,
1624
+ "eval_loss": 0.11195220053195953,
1625
+ "eval_runtime": 2.1574,
1626
+ "eval_samples_per_second": 14.369,
1627
+ "eval_steps_per_second": 3.708,
1628
  "step": 2250
1629
  },
1630
  {
1631
  "epoch": 30.0,
1632
  "learning_rate": 2.2074074074074076e-05,
1633
+ "loss": 0.0005,
1634
  "step": 2260
1635
  },
1636
  {
 
1642
  {
1643
  "epoch": 30.01,
1644
  "learning_rate": 2.177777777777778e-05,
1645
+ "loss": 0.1392,
1646
  "step": 2280
1647
  },
1648
  {
1649
  "epoch": 30.01,
1650
  "learning_rate": 2.162962962962963e-05,
1651
+ "loss": 0.0005,
1652
  "step": 2290
1653
  },
1654
  {
1655
  "epoch": 30.01,
1656
  "learning_rate": 2.148148148148148e-05,
1657
+ "loss": 0.0564,
1658
  "step": 2300
1659
  },
1660
  {
 
1666
  {
1667
  "epoch": 30.02,
1668
  "learning_rate": 2.1185185185185184e-05,
1669
+ "loss": 0.0006,
1670
  "step": 2320
1671
  },
1672
  {
1673
  "epoch": 30.02,
1674
+ "eval_accuracy": 0.967741935483871,
1675
+ "eval_loss": 0.05927049741148949,
1676
+ "eval_runtime": 2.1955,
1677
+ "eval_samples_per_second": 14.12,
1678
+ "eval_steps_per_second": 3.644,
1679
  "step": 2325
1680
  },
1681
  {
1682
  "epoch": 31.0,
1683
  "learning_rate": 2.1037037037037037e-05,
1684
+ "loss": 0.0007,
1685
  "step": 2330
1686
  },
1687
  {
1688
  "epoch": 31.0,
1689
  "learning_rate": 2.088888888888889e-05,
1690
+ "loss": 0.0379,
1691
  "step": 2340
1692
  },
1693
  {
 
1728
  },
1729
  {
1730
  "epoch": 31.02,
1731
+ "eval_accuracy": 0.967741935483871,
1732
+ "eval_loss": 0.16602446138858795,
1733
+ "eval_runtime": 2.1541,
1734
+ "eval_samples_per_second": 14.391,
1735
+ "eval_steps_per_second": 3.714,
1736
  "step": 2400
1737
  },
1738
  {
 
1750
  {
1751
  "epoch": 32.01,
1752
  "learning_rate": 1.9555555555555557e-05,
1753
+ "loss": 0.0007,
1754
  "step": 2430
1755
  },
1756
  {
 
1779
  },
1780
  {
1781
  "epoch": 32.02,
1782
+ "eval_accuracy": 0.967741935483871,
1783
+ "eval_loss": 0.04543553665280342,
1784
+ "eval_runtime": 2.5566,
1785
+ "eval_samples_per_second": 12.125,
1786
+ "eval_steps_per_second": 3.129,
1787
  "step": 2475
1788
  },
1789
  {
 
1795
  {
1796
  "epoch": 33.0,
1797
  "learning_rate": 1.866666666666667e-05,
1798
+ "loss": 0.0006,
1799
  "step": 2490
1800
  },
1801
  {
 
1813
  {
1814
  "epoch": 33.01,
1815
  "learning_rate": 1.8222222222222224e-05,
1816
+ "loss": 0.0009,
1817
  "step": 2520
1818
  },
1819
  {
 
1831
  {
1832
  "epoch": 33.02,
1833
  "learning_rate": 1.777777777777778e-05,
1834
+ "loss": 0.0007,
1835
  "step": 2550
1836
  },
1837
  {
1838
  "epoch": 33.02,
1839
+ "eval_accuracy": 0.967741935483871,
1840
+ "eval_loss": 0.1014844998717308,
1841
+ "eval_runtime": 2.1217,
1842
+ "eval_samples_per_second": 14.611,
1843
+ "eval_steps_per_second": 3.771,
1844
  "step": 2550
1845
  },
1846
  {
 
1887
  },
1888
  {
1889
  "epoch": 34.02,
1890
+ "eval_accuracy": 0.967741935483871,
1891
+ "eval_loss": 0.17122069001197815,
1892
+ "eval_runtime": 2.1289,
1893
+ "eval_samples_per_second": 14.561,
1894
+ "eval_steps_per_second": 3.758,
1895
  "step": 2625
1896
  },
1897
  {
 
1915
  {
1916
  "epoch": 35.01,
1917
  "learning_rate": 1.614814814814815e-05,
1918
+ "loss": 0.0005,
1919
  "step": 2660
1920
  },
1921
  {
 
1933
  {
1934
  "epoch": 35.02,
1935
  "learning_rate": 1.5703703703703705e-05,
1936
+ "loss": 0.0004,
1937
  "step": 2690
1938
  },
1939
  {
 
1944
  },
1945
  {
1946
  "epoch": 35.02,
1947
+ "eval_accuracy": 0.967741935483871,
1948
+ "eval_loss": 0.1599966138601303,
1949
+ "eval_runtime": 2.0848,
1950
+ "eval_samples_per_second": 14.87,
1951
+ "eval_steps_per_second": 3.837,
1952
  "step": 2700
1953
  },
1954
  {
1955
  "epoch": 36.0,
1956
  "learning_rate": 1.5407407407407408e-05,
1957
+ "loss": 0.0004,
1958
  "step": 2710
1959
  },
1960
  {
 
1984
  {
1985
  "epoch": 36.02,
1986
  "learning_rate": 1.4666666666666668e-05,
1987
+ "loss": 0.0004,
1988
  "step": 2760
1989
  },
1990
  {
 
1995
  },
1996
  {
1997
  "epoch": 36.02,
1998
+ "eval_accuracy": 0.967741935483871,
1999
+ "eval_loss": 0.16178081929683685,
2000
+ "eval_runtime": 2.0795,
2001
+ "eval_samples_per_second": 14.908,
2002
+ "eval_steps_per_second": 3.847,
2003
  "step": 2775
2004
  },
2005
  {
2006
  "epoch": 37.0,
2007
  "learning_rate": 1.437037037037037e-05,
2008
+ "loss": 0.0004,
2009
  "step": 2780
2010
  },
2011
  {
 
2023
  {
2024
  "epoch": 37.01,
2025
  "learning_rate": 1.3925925925925926e-05,
2026
+ "loss": 0.0004,
2027
  "step": 2810
2028
  },
2029
  {
 
2035
  {
2036
  "epoch": 37.01,
2037
  "learning_rate": 1.362962962962963e-05,
2038
+ "loss": 0.0006,
2039
  "step": 2830
2040
  },
2041
  {
2042
  "epoch": 37.02,
2043
  "learning_rate": 1.348148148148148e-05,
2044
+ "loss": 0.0004,
2045
  "step": 2840
2046
  },
2047
  {
 
2052
  },
2053
  {
2054
  "epoch": 37.02,
2055
+ "eval_accuracy": 0.967741935483871,
2056
+ "eval_loss": 0.14675143361091614,
2057
+ "eval_runtime": 2.0875,
2058
+ "eval_samples_per_second": 14.851,
2059
+ "eval_steps_per_second": 3.832,
2060
  "step": 2850
2061
  },
2062
  {
 
2086
  {
2087
  "epoch": 38.01,
2088
  "learning_rate": 1.2592592592592592e-05,
2089
+ "loss": 0.0005,
2090
  "step": 2900
2091
  },
2092
  {
 
2098
  {
2099
  "epoch": 38.02,
2100
  "learning_rate": 1.2296296296296298e-05,
2101
+ "loss": 0.0004,
2102
  "step": 2920
2103
  },
2104
  {
2105
  "epoch": 38.02,
2106
+ "eval_accuracy": 0.967741935483871,
2107
+ "eval_loss": 0.11672930419445038,
2108
+ "eval_runtime": 2.0961,
2109
+ "eval_samples_per_second": 14.79,
2110
+ "eval_steps_per_second": 3.817,
2111
  "step": 2925
2112
  },
2113
  {
 
2125
  {
2126
  "epoch": 39.01,
2127
  "learning_rate": 1.1851851851851853e-05,
2128
+ "loss": 0.0004,
2129
  "step": 2950
2130
  },
2131
  {
2132
  "epoch": 39.01,
2133
  "learning_rate": 1.1703703703703705e-05,
2134
+ "loss": 0.0004,
2135
  "step": 2960
2136
  },
2137
  {
 
2160
  },
2161
  {
2162
  "epoch": 39.02,
2163
+ "eval_accuracy": 0.967741935483871,
2164
+ "eval_loss": 0.12776704132556915,
2165
+ "eval_runtime": 2.1378,
2166
+ "eval_samples_per_second": 14.501,
2167
+ "eval_steps_per_second": 3.742,
2168
  "step": 3000
2169
  },
2170
  {
 
2211
  },
2212
  {
2213
  "epoch": 40.02,
2214
+ "eval_accuracy": 0.967741935483871,
2215
+ "eval_loss": 0.12002281844615936,
2216
+ "eval_runtime": 2.1121,
2217
+ "eval_samples_per_second": 14.677,
2218
+ "eval_steps_per_second": 3.788,
2219
  "step": 3075
2220
  },
2221
  {
 
2227
  {
2228
  "epoch": 41.0,
2229
  "learning_rate": 9.777777777777779e-06,
2230
+ "loss": 0.0004,
2231
  "step": 3090
2232
  },
2233
  {
 
2257
  {
2258
  "epoch": 41.02,
2259
  "learning_rate": 9.037037037037037e-06,
2260
+ "loss": 0.0004,
2261
  "step": 3140
2262
  },
2263
  {
 
2268
  },
2269
  {
2270
  "epoch": 41.02,
2271
+ "eval_accuracy": 0.967741935483871,
2272
+ "eval_loss": 0.12003983557224274,
2273
+ "eval_runtime": 2.0588,
2274
+ "eval_samples_per_second": 15.057,
2275
+ "eval_steps_per_second": 3.886,
2276
  "step": 3150
2277
  },
2278
  {
 
2284
  {
2285
  "epoch": 42.01,
2286
  "learning_rate": 8.592592592592593e-06,
2287
+ "loss": 0.0004,
2288
  "step": 3170
2289
  },
2290
  {
 
2319
  },
2320
  {
2321
  "epoch": 42.02,
2322
+ "eval_accuracy": 0.967741935483871,
2323
+ "eval_loss": 0.12297246605157852,
2324
+ "eval_runtime": 2.0736,
2325
+ "eval_samples_per_second": 14.95,
2326
+ "eval_steps_per_second": 3.858,
2327
  "step": 3225
2328
  },
2329
  {
2330
  "epoch": 43.0,
2331
  "learning_rate": 7.703703703703704e-06,
2332
+ "loss": 0.0004,
2333
  "step": 3230
2334
  },
2335
  {
 
2371
  {
2372
  "epoch": 43.02,
2373
  "learning_rate": 6.666666666666667e-06,
2374
+ "loss": 0.0004,
2375
  "step": 3300
2376
  },
2377
  {
2378
  "epoch": 43.02,
2379
+ "eval_accuracy": 0.967741935483871,
2380
+ "eval_loss": 0.13225598633289337,
2381
+ "eval_runtime": 2.0961,
2382
+ "eval_samples_per_second": 14.789,
2383
+ "eval_steps_per_second": 3.817,
2384
  "step": 3300
2385
  },
2386
  {
 
2427
  },
2428
  {
2429
  "epoch": 44.02,
2430
+ "eval_accuracy": 0.967741935483871,
2431
+ "eval_loss": 0.1282513439655304,
2432
+ "eval_runtime": 2.1105,
2433
+ "eval_samples_per_second": 14.688,
2434
+ "eval_steps_per_second": 3.791,
2435
  "step": 3375
2436
  },
2437
  {
 
2484
  },
2485
  {
2486
  "epoch": 45.02,
2487
+ "eval_accuracy": 0.967741935483871,
2488
+ "eval_loss": 0.1329645812511444,
2489
+ "eval_runtime": 2.079,
2490
+ "eval_samples_per_second": 14.911,
2491
+ "eval_steps_per_second": 3.848,
2492
  "step": 3450
2493
  },
2494
  {
 
2535
  },
2536
  {
2537
  "epoch": 46.02,
2538
+ "eval_accuracy": 0.967741935483871,
2539
+ "eval_loss": 0.13405713438987732,
2540
+ "eval_runtime": 2.0851,
2541
+ "eval_samples_per_second": 14.867,
2542
+ "eval_steps_per_second": 3.837,
2543
  "step": 3525
2544
  },
2545
  {
 
2592
  },
2593
  {
2594
  "epoch": 47.02,
2595
+ "eval_accuracy": 0.967741935483871,
2596
+ "eval_loss": 0.13054759800434113,
2597
+ "eval_runtime": 2.2343,
2598
+ "eval_samples_per_second": 13.875,
2599
+ "eval_steps_per_second": 3.581,
2600
  "step": 3600
2601
  },
2602
  {
 
2608
  {
2609
  "epoch": 48.01,
2610
  "learning_rate": 1.925925925925926e-06,
2611
+ "loss": 0.0004,
2612
  "step": 3620
2613
  },
2614
  {
 
2620
  {
2621
  "epoch": 48.01,
2622
  "learning_rate": 1.6296296296296295e-06,
2623
+ "loss": 0.0003,
2624
  "step": 3640
2625
  },
2626
  {
 
2643
  },
2644
  {
2645
  "epoch": 48.02,
2646
+ "eval_accuracy": 0.967741935483871,
2647
+ "eval_loss": 0.1309322565793991,
2648
+ "eval_runtime": 2.2018,
2649
+ "eval_samples_per_second": 14.079,
2650
+ "eval_steps_per_second": 3.633,
2651
  "step": 3675
2652
  },
2653
  {
 
2700
  },
2701
  {
2702
  "epoch": 49.02,
2703
+ "eval_accuracy": 0.967741935483871,
2704
+ "eval_loss": 0.1303798407316208,
2705
+ "eval_runtime": 2.1527,
2706
+ "eval_samples_per_second": 14.4,
2707
+ "eval_steps_per_second": 3.716,
2708
  "step": 3750
2709
  },
2710
  {
2711
  "epoch": 49.02,
2712
  "step": 3750,
2713
  "total_flos": 1.86923023515648e+19,
2714
+ "train_loss": 0.16177092336689433,
2715
+ "train_runtime": 3695.0497,
2716
+ "train_samples_per_second": 4.059,
2717
+ "train_steps_per_second": 1.015
2718
  },
2719
  {
2720
  "epoch": 49.02,
2721
+ "eval_accuracy": 0.9078947368421053,
2722
+ "eval_loss": 0.4171188473701477,
2723
+ "eval_runtime": 7.7151,
2724
+ "eval_samples_per_second": 9.851,
2725
+ "eval_steps_per_second": 2.463,
2726
  "step": 3750
2727
  },
2728
  {
2729
  "epoch": 49.02,
2730
+ "eval_accuracy": 0.9078947368421053,
2731
+ "eval_loss": 0.41711878776550293,
2732
+ "eval_runtime": 5.3059,
2733
+ "eval_samples_per_second": 14.324,
2734
+ "eval_steps_per_second": 3.581,
2735
  "step": 3750
2736
  }
2737
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54be8404f3a06187402196bf7251b8491aa663c5d9c26aa1c6f1e5be6bcb9d33
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03b467334c18ac42b9abe41982148898c6d99bf9fffb9c66a198c9df721fe5c8
3
  size 3439