SanketJadhav commited on
Commit
99518a4
1 Parent(s): 1f6eb98

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 7.483912529986142e+18,
4
- "train_loss": 1.0602939221811207,
5
- "train_runtime": 5306.8618,
6
- "train_samples_per_second": 66.23,
7
- "train_steps_per_second": 0.517
 
 
 
 
 
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9641475073981334,
4
+ "eval_loss": 0.1382417380809784,
5
+ "eval_runtime": 116.8179,
6
+ "eval_samples_per_second": 150.422,
7
+ "eval_steps_per_second": 4.708,
8
+ "total_flos": 7.485957524358513e+18,
9
+ "train_loss": 0.31472766021561754,
10
+ "train_runtime": 5227.0581,
11
+ "train_samples_per_second": 67.241,
12
+ "train_steps_per_second": 0.525
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9641475073981334,
4
+ "eval_loss": 0.1382417380809784,
5
+ "eval_runtime": 116.8179,
6
+ "eval_samples_per_second": 150.422,
7
+ "eval_steps_per_second": 4.708
8
+ }
runs/May01_11-37-02_f493e3a4fb8c/events.out.tfevents.1682952065.f493e3a4fb8c.8992.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a7c12ac19a8f9062a190764e2b623963f718b629993aa7a1f71ecfc544af009
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 7.483912529986142e+18,
4
- "train_loss": 1.0602939221811207,
5
- "train_runtime": 5306.8618,
6
- "train_samples_per_second": 66.23,
7
- "train_steps_per_second": 0.517
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "total_flos": 7.485957524358513e+18,
4
+ "train_loss": 0.31472766021561754,
5
+ "train_runtime": 5227.0581,
6
+ "train_samples_per_second": 67.241,
7
+ "train_steps_per_second": 0.525
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9624402458456636,
3
- "best_model_checkpoint": "resnet-50-finetuned-eurosat/checkpoint-2745",
4
  "epoch": 4.997724169321803,
5
  "global_step": 2745,
6
  "is_hyper_param_search": false,
@@ -9,1706 +9,1706 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.02,
12
- "learning_rate": 1.818181818181818e-06,
13
- "loss": 3.6408,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.04,
18
- "learning_rate": 3.636363636363636e-06,
19
- "loss": 3.6412,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.05,
24
- "learning_rate": 5.4545454545454545e-06,
25
- "loss": 3.641,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.07,
30
- "learning_rate": 7.272727272727272e-06,
31
- "loss": 3.6404,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.09,
36
- "learning_rate": 9.090909090909091e-06,
37
- "loss": 3.6347,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.11,
42
- "learning_rate": 1.0909090909090909e-05,
43
- "loss": 3.6377,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.13,
48
- "learning_rate": 1.2727272727272727e-05,
49
- "loss": 3.6316,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.15,
54
- "learning_rate": 1.4545454545454545e-05,
55
- "loss": 3.6339,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.16,
60
- "learning_rate": 1.6363636363636366e-05,
61
- "loss": 3.6301,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.18,
66
- "learning_rate": 1.8181818181818182e-05,
67
- "loss": 3.626,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.2,
72
- "learning_rate": 2e-05,
73
- "loss": 3.621,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.22,
78
- "learning_rate": 2.1818181818181818e-05,
79
- "loss": 3.6206,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.24,
84
- "learning_rate": 2.3636363636363637e-05,
85
- "loss": 3.6118,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.25,
90
- "learning_rate": 2.5454545454545454e-05,
91
- "loss": 3.6068,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.27,
96
- "learning_rate": 2.7272727272727273e-05,
97
- "loss": 3.598,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.29,
102
- "learning_rate": 2.909090909090909e-05,
103
- "loss": 3.5991,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.31,
108
- "learning_rate": 3.090909090909091e-05,
109
- "loss": 3.5924,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.33,
114
- "learning_rate": 3.272727272727273e-05,
115
- "loss": 3.5819,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.35,
120
- "learning_rate": 3.454545454545455e-05,
121
- "loss": 3.5726,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.36,
126
- "learning_rate": 3.6363636363636364e-05,
127
- "loss": 3.5638,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.38,
132
- "learning_rate": 3.818181818181819e-05,
133
- "loss": 3.5568,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.4,
138
- "learning_rate": 4e-05,
139
- "loss": 3.5451,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.42,
144
- "learning_rate": 4.181818181818182e-05,
145
- "loss": 3.5333,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.44,
150
- "learning_rate": 4.3636363636363636e-05,
151
- "loss": 3.5256,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.46,
156
- "learning_rate": 4.545454545454546e-05,
157
- "loss": 3.5079,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.47,
162
- "learning_rate": 4.7272727272727275e-05,
163
- "loss": 3.4865,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.49,
168
- "learning_rate": 4.909090909090909e-05,
169
- "loss": 3.4667,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.51,
174
- "learning_rate": 4.989878542510122e-05,
175
- "loss": 3.4516,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.53,
180
- "learning_rate": 4.9696356275303645e-05,
181
- "loss": 3.427,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.55,
186
- "learning_rate": 4.9493927125506076e-05,
187
- "loss": 3.3999,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.56,
192
- "learning_rate": 4.9291497975708506e-05,
193
- "loss": 3.3732,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.58,
198
- "learning_rate": 4.908906882591093e-05,
199
- "loss": 3.3437,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.6,
204
- "learning_rate": 4.888663967611336e-05,
205
- "loss": 3.2873,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.62,
210
- "learning_rate": 4.868421052631579e-05,
211
- "loss": 3.2593,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.64,
216
- "learning_rate": 4.848178137651822e-05,
217
- "loss": 3.2006,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.66,
222
- "learning_rate": 4.8279352226720646e-05,
223
- "loss": 3.1593,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.67,
228
- "learning_rate": 4.8076923076923084e-05,
229
- "loss": 3.1129,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.69,
234
- "learning_rate": 4.787449392712551e-05,
235
- "loss": 3.0435,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 0.71,
240
- "learning_rate": 4.767206477732794e-05,
241
- "loss": 2.9742,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 0.73,
246
- "learning_rate": 4.746963562753037e-05,
247
- "loss": 2.9306,
248
  "step": 400
249
  },
250
  {
251
  "epoch": 0.75,
252
- "learning_rate": 4.726720647773279e-05,
253
- "loss": 2.8627,
254
  "step": 410
255
  },
256
  {
257
  "epoch": 0.76,
258
- "learning_rate": 4.7064777327935223e-05,
259
- "loss": 2.8301,
260
  "step": 420
261
  },
262
  {
263
  "epoch": 0.78,
264
- "learning_rate": 4.6862348178137654e-05,
265
- "loss": 2.7201,
266
  "step": 430
267
  },
268
  {
269
  "epoch": 0.8,
270
- "learning_rate": 4.6659919028340085e-05,
271
- "loss": 2.6538,
272
  "step": 440
273
  },
274
  {
275
  "epoch": 0.82,
276
- "learning_rate": 4.6457489878542516e-05,
277
- "loss": 2.5858,
278
  "step": 450
279
  },
280
  {
281
  "epoch": 0.84,
282
- "learning_rate": 4.625506072874494e-05,
283
- "loss": 2.4747,
284
  "step": 460
285
  },
286
  {
287
  "epoch": 0.86,
288
- "learning_rate": 4.605263157894737e-05,
289
- "loss": 2.41,
290
  "step": 470
291
  },
292
  {
293
  "epoch": 0.87,
294
- "learning_rate": 4.58502024291498e-05,
295
- "loss": 2.366,
296
  "step": 480
297
  },
298
  {
299
  "epoch": 0.89,
300
- "learning_rate": 4.564777327935223e-05,
301
- "loss": 2.3017,
302
  "step": 490
303
  },
304
  {
305
  "epoch": 0.91,
306
- "learning_rate": 4.5445344129554655e-05,
307
- "loss": 2.2318,
308
  "step": 500
309
  },
310
  {
311
  "epoch": 0.93,
312
- "learning_rate": 4.524291497975709e-05,
313
- "loss": 2.1774,
314
  "step": 510
315
  },
316
  {
317
  "epoch": 0.95,
318
- "learning_rate": 4.504048582995952e-05,
319
- "loss": 2.089,
320
  "step": 520
321
  },
322
  {
323
  "epoch": 0.96,
324
- "learning_rate": 4.483805668016194e-05,
325
- "loss": 2.0213,
326
  "step": 530
327
  },
328
  {
329
  "epoch": 0.98,
330
- "learning_rate": 4.463562753036438e-05,
331
- "loss": 1.9386,
332
  "step": 540
333
  },
334
  {
335
  "epoch": 1.0,
336
- "eval_accuracy": 0.7653084452538129,
337
- "eval_loss": 1.521803379058838,
338
- "eval_runtime": 116.2062,
339
- "eval_samples_per_second": 151.214,
340
- "eval_steps_per_second": 4.733,
341
  "step": 549
342
  },
343
  {
344
  "epoch": 1.0,
345
- "learning_rate": 4.44331983805668e-05,
346
- "loss": 1.9215,
347
  "step": 550
348
  },
349
  {
350
  "epoch": 1.02,
351
- "learning_rate": 4.423076923076923e-05,
352
- "loss": 1.8288,
353
  "step": 560
354
  },
355
  {
356
  "epoch": 1.04,
357
- "learning_rate": 4.402834008097166e-05,
358
- "loss": 1.761,
359
  "step": 570
360
  },
361
  {
362
  "epoch": 1.06,
363
- "learning_rate": 4.3825910931174094e-05,
364
- "loss": 1.7306,
365
  "step": 580
366
  },
367
  {
368
  "epoch": 1.07,
369
- "learning_rate": 4.362348178137652e-05,
370
- "loss": 1.6337,
371
  "step": 590
372
  },
373
  {
374
  "epoch": 1.09,
375
- "learning_rate": 4.342105263157895e-05,
376
- "loss": 1.5869,
377
  "step": 600
378
  },
379
  {
380
  "epoch": 1.11,
381
- "learning_rate": 4.321862348178138e-05,
382
- "loss": 1.5347,
383
  "step": 610
384
  },
385
  {
386
  "epoch": 1.13,
387
- "learning_rate": 4.30161943319838e-05,
388
- "loss": 1.4658,
389
  "step": 620
390
  },
391
  {
392
  "epoch": 1.15,
393
- "learning_rate": 4.281376518218624e-05,
394
- "loss": 1.444,
395
  "step": 630
396
  },
397
  {
398
  "epoch": 1.17,
399
- "learning_rate": 4.2611336032388664e-05,
400
- "loss": 1.3742,
401
  "step": 640
402
  },
403
  {
404
  "epoch": 1.18,
405
- "learning_rate": 4.2408906882591095e-05,
406
- "loss": 1.3474,
407
  "step": 650
408
  },
409
  {
410
  "epoch": 1.2,
411
- "learning_rate": 4.2206477732793526e-05,
412
- "loss": 1.3503,
413
  "step": 660
414
  },
415
  {
416
  "epoch": 1.22,
417
- "learning_rate": 4.200404858299595e-05,
418
- "loss": 1.2557,
419
  "step": 670
420
  },
421
  {
422
  "epoch": 1.24,
423
- "learning_rate": 4.180161943319838e-05,
424
- "loss": 1.2054,
425
  "step": 680
426
  },
427
  {
428
  "epoch": 1.26,
429
- "learning_rate": 4.159919028340081e-05,
430
- "loss": 1.253,
431
  "step": 690
432
  },
433
  {
434
  "epoch": 1.27,
435
- "learning_rate": 4.139676113360324e-05,
436
- "loss": 1.1814,
437
  "step": 700
438
  },
439
  {
440
  "epoch": 1.29,
441
- "learning_rate": 4.1194331983805666e-05,
442
- "loss": 1.1699,
443
  "step": 710
444
  },
445
  {
446
  "epoch": 1.31,
447
- "learning_rate": 4.09919028340081e-05,
448
- "loss": 1.1464,
449
  "step": 720
450
  },
451
  {
452
  "epoch": 1.33,
453
- "learning_rate": 4.078947368421053e-05,
454
- "loss": 1.1289,
455
  "step": 730
456
  },
457
  {
458
  "epoch": 1.35,
459
- "learning_rate": 4.058704453441296e-05,
460
- "loss": 1.0347,
461
  "step": 740
462
  },
463
  {
464
  "epoch": 1.37,
465
- "learning_rate": 4.038461538461539e-05,
466
- "loss": 1.0617,
467
  "step": 750
468
  },
469
  {
470
  "epoch": 1.38,
471
- "learning_rate": 4.018218623481781e-05,
472
- "loss": 1.0069,
473
  "step": 760
474
  },
475
  {
476
  "epoch": 1.4,
477
- "learning_rate": 3.997975708502025e-05,
478
- "loss": 0.9519,
479
  "step": 770
480
  },
481
  {
482
  "epoch": 1.42,
483
- "learning_rate": 3.9777327935222674e-05,
484
- "loss": 0.9229,
485
  "step": 780
486
  },
487
  {
488
  "epoch": 1.44,
489
- "learning_rate": 3.9574898785425104e-05,
490
- "loss": 0.9309,
491
  "step": 790
492
  },
493
  {
494
  "epoch": 1.46,
495
- "learning_rate": 3.9372469635627535e-05,
496
- "loss": 0.8647,
497
  "step": 800
498
  },
499
  {
500
  "epoch": 1.47,
501
- "learning_rate": 3.917004048582996e-05,
502
- "loss": 0.8625,
503
  "step": 810
504
  },
505
  {
506
  "epoch": 1.49,
507
- "learning_rate": 3.896761133603239e-05,
508
- "loss": 0.9097,
509
  "step": 820
510
  },
511
  {
512
  "epoch": 1.51,
513
- "learning_rate": 3.876518218623482e-05,
514
- "loss": 0.8751,
515
  "step": 830
516
  },
517
  {
518
  "epoch": 1.53,
519
- "learning_rate": 3.856275303643725e-05,
520
- "loss": 0.8613,
521
  "step": 840
522
  },
523
  {
524
  "epoch": 1.55,
525
- "learning_rate": 3.8360323886639675e-05,
526
- "loss": 0.8289,
527
  "step": 850
528
  },
529
  {
530
  "epoch": 1.57,
531
- "learning_rate": 3.815789473684211e-05,
532
- "loss": 0.8145,
533
  "step": 860
534
  },
535
  {
536
  "epoch": 1.58,
537
- "learning_rate": 3.7955465587044536e-05,
538
- "loss": 0.8031,
539
  "step": 870
540
  },
541
  {
542
  "epoch": 1.6,
543
- "learning_rate": 3.775303643724697e-05,
544
- "loss": 0.844,
545
  "step": 880
546
  },
547
  {
548
  "epoch": 1.62,
549
- "learning_rate": 3.75506072874494e-05,
550
- "loss": 0.8124,
551
  "step": 890
552
  },
553
  {
554
  "epoch": 1.64,
555
- "learning_rate": 3.734817813765182e-05,
556
- "loss": 0.7609,
557
  "step": 900
558
  },
559
  {
560
  "epoch": 1.66,
561
- "learning_rate": 3.714574898785425e-05,
562
- "loss": 0.7603,
563
  "step": 910
564
  },
565
  {
566
  "epoch": 1.68,
567
- "learning_rate": 3.694331983805668e-05,
568
- "loss": 0.7959,
569
  "step": 920
570
  },
571
  {
572
  "epoch": 1.69,
573
- "learning_rate": 3.6740890688259113e-05,
574
- "loss": 0.719,
575
  "step": 930
576
  },
577
  {
578
  "epoch": 1.71,
579
- "learning_rate": 3.653846153846154e-05,
580
- "loss": 0.7447,
581
  "step": 940
582
  },
583
  {
584
  "epoch": 1.73,
585
- "learning_rate": 3.633603238866397e-05,
586
- "loss": 0.7588,
587
  "step": 950
588
  },
589
  {
590
  "epoch": 1.75,
591
- "learning_rate": 3.61336032388664e-05,
592
- "loss": 0.6941,
593
  "step": 960
594
  },
595
  {
596
  "epoch": 1.77,
597
- "learning_rate": 3.593117408906882e-05,
598
- "loss": 0.709,
599
  "step": 970
600
  },
601
  {
602
  "epoch": 1.78,
603
- "learning_rate": 3.572874493927126e-05,
604
- "loss": 0.7446,
605
  "step": 980
606
  },
607
  {
608
  "epoch": 1.8,
609
- "learning_rate": 3.5526315789473684e-05,
610
- "loss": 0.6729,
611
  "step": 990
612
  },
613
  {
614
  "epoch": 1.82,
615
- "learning_rate": 3.5323886639676115e-05,
616
- "loss": 0.6262,
617
  "step": 1000
618
  },
619
  {
620
  "epoch": 1.84,
621
- "learning_rate": 3.5121457489878545e-05,
622
- "loss": 0.6725,
623
  "step": 1010
624
  },
625
  {
626
  "epoch": 1.86,
627
- "learning_rate": 3.491902834008097e-05,
628
- "loss": 0.6287,
629
  "step": 1020
630
  },
631
  {
632
  "epoch": 1.88,
633
- "learning_rate": 3.47165991902834e-05,
634
- "loss": 0.6443,
635
  "step": 1030
636
  },
637
  {
638
  "epoch": 1.89,
639
- "learning_rate": 3.451417004048583e-05,
640
- "loss": 0.6444,
641
  "step": 1040
642
  },
643
  {
644
  "epoch": 1.91,
645
- "learning_rate": 3.431174089068826e-05,
646
- "loss": 0.6219,
647
  "step": 1050
648
  },
649
  {
650
  "epoch": 1.93,
651
- "learning_rate": 3.410931174089069e-05,
652
- "loss": 0.6271,
653
  "step": 1060
654
  },
655
  {
656
  "epoch": 1.95,
657
- "learning_rate": 3.390688259109312e-05,
658
- "loss": 0.6186,
659
  "step": 1070
660
  },
661
  {
662
  "epoch": 1.97,
663
- "learning_rate": 3.3704453441295546e-05,
664
- "loss": 0.5921,
665
  "step": 1080
666
  },
667
  {
668
  "epoch": 1.98,
669
- "learning_rate": 3.350202429149798e-05,
670
- "loss": 0.5662,
671
  "step": 1090
672
  },
673
  {
674
  "epoch": 2.0,
675
- "eval_accuracy": 0.9288071932620078,
676
- "eval_loss": 0.33246490359306335,
677
- "eval_runtime": 116.3356,
678
- "eval_samples_per_second": 151.046,
679
- "eval_steps_per_second": 4.728,
680
  "step": 1098
681
  },
682
  {
683
  "epoch": 2.0,
684
- "learning_rate": 3.329959514170041e-05,
685
- "loss": 0.6063,
686
  "step": 1100
687
  },
688
  {
689
  "epoch": 2.02,
690
- "learning_rate": 3.309716599190283e-05,
691
- "loss": 0.5733,
692
  "step": 1110
693
  },
694
  {
695
  "epoch": 2.04,
696
- "learning_rate": 3.289473684210527e-05,
697
- "loss": 0.5662,
698
  "step": 1120
699
  },
700
  {
701
  "epoch": 2.06,
702
- "learning_rate": 3.269230769230769e-05,
703
- "loss": 0.5893,
704
  "step": 1130
705
  },
706
  {
707
  "epoch": 2.08,
708
- "learning_rate": 3.2489878542510124e-05,
709
- "loss": 0.5498,
710
  "step": 1140
711
  },
712
  {
713
  "epoch": 2.09,
714
- "learning_rate": 3.2287449392712554e-05,
715
- "loss": 0.5416,
716
  "step": 1150
717
  },
718
  {
719
  "epoch": 2.11,
720
- "learning_rate": 3.208502024291498e-05,
721
- "loss": 0.5271,
722
  "step": 1160
723
  },
724
  {
725
  "epoch": 2.13,
726
- "learning_rate": 3.188259109311741e-05,
727
- "loss": 0.5506,
728
  "step": 1170
729
  },
730
  {
731
  "epoch": 2.15,
732
- "learning_rate": 3.168016194331984e-05,
733
- "loss": 0.5227,
734
  "step": 1180
735
  },
736
  {
737
  "epoch": 2.17,
738
- "learning_rate": 3.147773279352227e-05,
739
- "loss": 0.5379,
740
  "step": 1190
741
  },
742
  {
743
  "epoch": 2.18,
744
- "learning_rate": 3.1275303643724694e-05,
745
- "loss": 0.4872,
746
  "step": 1200
747
  },
748
  {
749
  "epoch": 2.2,
750
- "learning_rate": 3.107287449392713e-05,
751
- "loss": 0.5166,
752
  "step": 1210
753
  },
754
  {
755
  "epoch": 2.22,
756
- "learning_rate": 3.0870445344129556e-05,
757
- "loss": 0.5416,
758
  "step": 1220
759
  },
760
  {
761
  "epoch": 2.24,
762
- "learning_rate": 3.0668016194331986e-05,
763
- "loss": 0.5171,
764
  "step": 1230
765
  },
766
  {
767
  "epoch": 2.26,
768
- "learning_rate": 3.0465587044534417e-05,
769
- "loss": 0.4827,
770
  "step": 1240
771
  },
772
  {
773
  "epoch": 2.28,
774
- "learning_rate": 3.0263157894736844e-05,
775
- "loss": 0.5535,
776
  "step": 1250
777
  },
778
  {
779
  "epoch": 2.29,
780
- "learning_rate": 3.006072874493927e-05,
781
- "loss": 0.5095,
782
  "step": 1260
783
  },
784
  {
785
  "epoch": 2.31,
786
- "learning_rate": 2.9858299595141702e-05,
787
- "loss": 0.4874,
788
  "step": 1270
789
  },
790
  {
791
  "epoch": 2.33,
792
- "learning_rate": 2.965587044534413e-05,
793
- "loss": 0.4798,
794
  "step": 1280
795
  },
796
  {
797
  "epoch": 2.35,
798
- "learning_rate": 2.9453441295546557e-05,
799
- "loss": 0.487,
800
  "step": 1290
801
  },
802
  {
803
  "epoch": 2.37,
804
- "learning_rate": 2.925101214574899e-05,
805
- "loss": 0.4855,
806
  "step": 1300
807
  },
808
  {
809
  "epoch": 2.39,
810
- "learning_rate": 2.9048582995951418e-05,
811
- "loss": 0.4818,
812
  "step": 1310
813
  },
814
  {
815
  "epoch": 2.4,
816
- "learning_rate": 2.8846153846153845e-05,
817
- "loss": 0.4359,
818
  "step": 1320
819
  },
820
  {
821
  "epoch": 2.42,
822
- "learning_rate": 2.8643724696356276e-05,
823
- "loss": 0.4553,
824
  "step": 1330
825
  },
826
  {
827
  "epoch": 2.44,
828
- "learning_rate": 2.8441295546558703e-05,
829
- "loss": 0.4566,
830
  "step": 1340
831
  },
832
  {
833
  "epoch": 2.46,
834
- "learning_rate": 2.823886639676113e-05,
835
- "loss": 0.468,
836
  "step": 1350
837
  },
838
  {
839
  "epoch": 2.48,
840
- "learning_rate": 2.8036437246963565e-05,
841
- "loss": 0.4448,
842
  "step": 1360
843
  },
844
  {
845
  "epoch": 2.49,
846
- "learning_rate": 2.7834008097165992e-05,
847
- "loss": 0.4629,
848
  "step": 1370
849
  },
850
  {
851
  "epoch": 2.51,
852
- "learning_rate": 2.7631578947368426e-05,
853
- "loss": 0.4876,
854
  "step": 1380
855
  },
856
  {
857
  "epoch": 2.53,
858
- "learning_rate": 2.7429149797570853e-05,
859
- "loss": 0.4499,
860
  "step": 1390
861
  },
862
  {
863
  "epoch": 2.55,
864
- "learning_rate": 2.722672064777328e-05,
865
- "loss": 0.4449,
866
  "step": 1400
867
  },
868
  {
869
  "epoch": 2.57,
870
- "learning_rate": 2.702429149797571e-05,
871
- "loss": 0.404,
872
  "step": 1410
873
  },
874
  {
875
  "epoch": 2.59,
876
- "learning_rate": 2.682186234817814e-05,
877
- "loss": 0.4521,
878
  "step": 1420
879
  },
880
  {
881
  "epoch": 2.6,
882
- "learning_rate": 2.6619433198380566e-05,
883
- "loss": 0.4075,
884
  "step": 1430
885
  },
886
  {
887
  "epoch": 2.62,
888
- "learning_rate": 2.6417004048583e-05,
889
- "loss": 0.4528,
890
  "step": 1440
891
  },
892
  {
893
  "epoch": 2.64,
894
- "learning_rate": 2.6214574898785427e-05,
895
- "loss": 0.4615,
896
  "step": 1450
897
  },
898
  {
899
  "epoch": 2.66,
900
- "learning_rate": 2.6012145748987855e-05,
901
- "loss": 0.4654,
902
  "step": 1460
903
  },
904
  {
905
  "epoch": 2.68,
906
- "learning_rate": 2.5809716599190285e-05,
907
- "loss": 0.425,
908
  "step": 1470
909
  },
910
  {
911
  "epoch": 2.69,
912
- "learning_rate": 2.5607287449392713e-05,
913
- "loss": 0.409,
914
  "step": 1480
915
  },
916
  {
917
  "epoch": 2.71,
918
- "learning_rate": 2.540485829959514e-05,
919
- "loss": 0.4284,
920
  "step": 1490
921
  },
922
  {
923
  "epoch": 2.73,
924
- "learning_rate": 2.5202429149797574e-05,
925
- "loss": 0.4122,
926
  "step": 1500
927
  },
928
  {
929
  "epoch": 2.75,
930
- "learning_rate": 2.5e-05,
931
- "loss": 0.4273,
932
  "step": 1510
933
  },
934
  {
935
  "epoch": 2.77,
936
- "learning_rate": 2.4797570850202432e-05,
937
- "loss": 0.3865,
938
  "step": 1520
939
  },
940
  {
941
  "epoch": 2.79,
942
- "learning_rate": 2.459514170040486e-05,
943
- "loss": 0.378,
944
  "step": 1530
945
  },
946
  {
947
  "epoch": 2.8,
948
- "learning_rate": 2.439271255060729e-05,
949
- "loss": 0.4051,
950
  "step": 1540
951
  },
952
  {
953
  "epoch": 2.82,
954
- "learning_rate": 2.4190283400809717e-05,
955
- "loss": 0.4127,
956
  "step": 1550
957
  },
958
  {
959
  "epoch": 2.84,
960
- "learning_rate": 2.3987854251012144e-05,
961
- "loss": 0.4141,
962
  "step": 1560
963
  },
964
  {
965
  "epoch": 2.86,
966
- "learning_rate": 2.3785425101214575e-05,
967
- "loss": 0.3842,
968
  "step": 1570
969
  },
970
  {
971
  "epoch": 2.88,
972
- "learning_rate": 2.3582995951417006e-05,
973
- "loss": 0.423,
974
  "step": 1580
975
  },
976
  {
977
  "epoch": 2.89,
978
- "learning_rate": 2.3380566801619436e-05,
979
- "loss": 0.3714,
980
  "step": 1590
981
  },
982
  {
983
  "epoch": 2.91,
984
- "learning_rate": 2.3178137651821864e-05,
985
- "loss": 0.3949,
986
  "step": 1600
987
  },
988
  {
989
  "epoch": 2.93,
990
- "learning_rate": 2.2975708502024294e-05,
991
- "loss": 0.398,
992
  "step": 1610
993
  },
994
  {
995
  "epoch": 2.95,
996
- "learning_rate": 2.2773279352226722e-05,
997
- "loss": 0.3838,
998
  "step": 1620
999
  },
1000
  {
1001
  "epoch": 2.97,
1002
- "learning_rate": 2.257085020242915e-05,
1003
- "loss": 0.365,
1004
  "step": 1630
1005
  },
1006
  {
1007
  "epoch": 2.99,
1008
- "learning_rate": 2.236842105263158e-05,
1009
- "loss": 0.4104,
1010
  "step": 1640
1011
  },
1012
  {
1013
  "epoch": 3.0,
1014
- "eval_accuracy": 0.9561233781015251,
1015
- "eval_loss": 0.18532179296016693,
1016
- "eval_runtime": 113.1925,
1017
- "eval_samples_per_second": 155.24,
1018
- "eval_steps_per_second": 4.859,
1019
  "step": 1647
1020
  },
1021
  {
1022
  "epoch": 3.0,
1023
- "learning_rate": 2.216599190283401e-05,
1024
- "loss": 0.4087,
1025
  "step": 1650
1026
  },
1027
  {
1028
  "epoch": 3.02,
1029
- "learning_rate": 2.1963562753036438e-05,
1030
- "loss": 0.3953,
1031
  "step": 1660
1032
  },
1033
  {
1034
  "epoch": 3.04,
1035
- "learning_rate": 2.1761133603238868e-05,
1036
- "loss": 0.3972,
1037
  "step": 1670
1038
  },
1039
  {
1040
  "epoch": 3.06,
1041
- "learning_rate": 2.15587044534413e-05,
1042
- "loss": 0.4105,
1043
  "step": 1680
1044
  },
1045
  {
1046
  "epoch": 3.08,
1047
- "learning_rate": 2.1356275303643726e-05,
1048
- "loss": 0.3761,
1049
  "step": 1690
1050
  },
1051
  {
1052
  "epoch": 3.1,
1053
- "learning_rate": 2.1153846153846154e-05,
1054
- "loss": 0.3555,
1055
  "step": 1700
1056
  },
1057
  {
1058
  "epoch": 3.11,
1059
- "learning_rate": 2.0951417004048584e-05,
1060
- "loss": 0.3801,
1061
  "step": 1710
1062
  },
1063
  {
1064
  "epoch": 3.13,
1065
- "learning_rate": 2.074898785425101e-05,
1066
- "loss": 0.402,
1067
  "step": 1720
1068
  },
1069
  {
1070
  "epoch": 3.15,
1071
- "learning_rate": 2.0546558704453442e-05,
1072
- "loss": 0.3929,
1073
  "step": 1730
1074
  },
1075
  {
1076
  "epoch": 3.17,
1077
- "learning_rate": 2.0344129554655873e-05,
1078
- "loss": 0.3884,
1079
  "step": 1740
1080
  },
1081
  {
1082
  "epoch": 3.19,
1083
- "learning_rate": 2.0141700404858304e-05,
1084
- "loss": 0.3853,
1085
  "step": 1750
1086
  },
1087
  {
1088
  "epoch": 3.2,
1089
- "learning_rate": 1.9939271255060727e-05,
1090
- "loss": 0.3869,
1091
  "step": 1760
1092
  },
1093
  {
1094
  "epoch": 3.22,
1095
- "learning_rate": 1.9736842105263158e-05,
1096
- "loss": 0.3758,
1097
  "step": 1770
1098
  },
1099
  {
1100
  "epoch": 3.24,
1101
- "learning_rate": 1.953441295546559e-05,
1102
- "loss": 0.3961,
1103
  "step": 1780
1104
  },
1105
  {
1106
  "epoch": 3.26,
1107
- "learning_rate": 1.9331983805668016e-05,
1108
- "loss": 0.3389,
1109
  "step": 1790
1110
  },
1111
  {
1112
  "epoch": 3.28,
1113
- "learning_rate": 1.9129554655870447e-05,
1114
- "loss": 0.3624,
1115
  "step": 1800
1116
  },
1117
  {
1118
  "epoch": 3.3,
1119
- "learning_rate": 1.8927125506072877e-05,
1120
- "loss": 0.3905,
1121
  "step": 1810
1122
  },
1123
  {
1124
  "epoch": 3.31,
1125
- "learning_rate": 1.8724696356275305e-05,
1126
- "loss": 0.3685,
1127
  "step": 1820
1128
  },
1129
  {
1130
  "epoch": 3.33,
1131
- "learning_rate": 1.8522267206477732e-05,
1132
- "loss": 0.3582,
1133
  "step": 1830
1134
  },
1135
  {
1136
  "epoch": 3.35,
1137
- "learning_rate": 1.8319838056680163e-05,
1138
- "loss": 0.3785,
1139
  "step": 1840
1140
  },
1141
  {
1142
  "epoch": 3.37,
1143
- "learning_rate": 1.811740890688259e-05,
1144
- "loss": 0.3758,
1145
  "step": 1850
1146
  },
1147
  {
1148
  "epoch": 3.39,
1149
- "learning_rate": 1.791497975708502e-05,
1150
- "loss": 0.386,
1151
  "step": 1860
1152
  },
1153
  {
1154
  "epoch": 3.4,
1155
- "learning_rate": 1.771255060728745e-05,
1156
- "loss": 0.3565,
1157
  "step": 1870
1158
  },
1159
  {
1160
  "epoch": 3.42,
1161
- "learning_rate": 1.751012145748988e-05,
1162
- "loss": 0.3423,
1163
  "step": 1880
1164
  },
1165
  {
1166
  "epoch": 3.44,
1167
- "learning_rate": 1.730769230769231e-05,
1168
- "loss": 0.3859,
1169
  "step": 1890
1170
  },
1171
  {
1172
  "epoch": 3.46,
1173
- "learning_rate": 1.7105263157894737e-05,
1174
- "loss": 0.3606,
1175
  "step": 1900
1176
  },
1177
  {
1178
  "epoch": 3.48,
1179
- "learning_rate": 1.6902834008097167e-05,
1180
- "loss": 0.3829,
1181
  "step": 1910
1182
  },
1183
  {
1184
  "epoch": 3.5,
1185
- "learning_rate": 1.6700404858299595e-05,
1186
- "loss": 0.3409,
1187
  "step": 1920
1188
  },
1189
  {
1190
  "epoch": 3.51,
1191
- "learning_rate": 1.6497975708502025e-05,
1192
- "loss": 0.358,
1193
  "step": 1930
1194
  },
1195
  {
1196
  "epoch": 3.53,
1197
- "learning_rate": 1.6295546558704456e-05,
1198
- "loss": 0.3182,
1199
  "step": 1940
1200
  },
1201
  {
1202
  "epoch": 3.55,
1203
- "learning_rate": 1.6093117408906883e-05,
1204
- "loss": 0.3595,
1205
  "step": 1950
1206
  },
1207
  {
1208
  "epoch": 3.57,
1209
- "learning_rate": 1.5890688259109314e-05,
1210
- "loss": 0.3536,
1211
  "step": 1960
1212
  },
1213
  {
1214
  "epoch": 3.59,
1215
- "learning_rate": 1.568825910931174e-05,
1216
- "loss": 0.3818,
1217
  "step": 1970
1218
  },
1219
  {
1220
  "epoch": 3.6,
1221
- "learning_rate": 1.548582995951417e-05,
1222
- "loss": 0.3401,
1223
  "step": 1980
1224
  },
1225
  {
1226
  "epoch": 3.62,
1227
- "learning_rate": 1.52834008097166e-05,
1228
- "loss": 0.3554,
1229
  "step": 1990
1230
  },
1231
  {
1232
  "epoch": 3.64,
1233
- "learning_rate": 1.508097165991903e-05,
1234
- "loss": 0.3465,
1235
  "step": 2000
1236
  },
1237
  {
1238
  "epoch": 3.66,
1239
- "learning_rate": 1.4878542510121457e-05,
1240
- "loss": 0.3482,
1241
  "step": 2010
1242
  },
1243
  {
1244
  "epoch": 3.68,
1245
- "learning_rate": 1.4676113360323888e-05,
1246
- "loss": 0.3492,
1247
  "step": 2020
1248
  },
1249
  {
1250
  "epoch": 3.7,
1251
- "learning_rate": 1.4473684210526317e-05,
1252
- "loss": 0.3366,
1253
  "step": 2030
1254
  },
1255
  {
1256
  "epoch": 3.71,
1257
- "learning_rate": 1.4271255060728744e-05,
1258
- "loss": 0.3392,
1259
  "step": 2040
1260
  },
1261
  {
1262
  "epoch": 3.73,
1263
- "learning_rate": 1.4068825910931175e-05,
1264
- "loss": 0.3593,
1265
  "step": 2050
1266
  },
1267
  {
1268
  "epoch": 3.75,
1269
- "learning_rate": 1.3866396761133604e-05,
1270
- "loss": 0.3961,
1271
  "step": 2060
1272
  },
1273
  {
1274
  "epoch": 3.77,
1275
- "learning_rate": 1.3663967611336034e-05,
1276
- "loss": 0.3996,
1277
  "step": 2070
1278
  },
1279
  {
1280
  "epoch": 3.79,
1281
- "learning_rate": 1.3461538461538462e-05,
1282
- "loss": 0.3178,
1283
  "step": 2080
1284
  },
1285
  {
1286
  "epoch": 3.81,
1287
- "learning_rate": 1.3259109311740892e-05,
1288
- "loss": 0.3641,
1289
  "step": 2090
1290
  },
1291
  {
1292
  "epoch": 3.82,
1293
- "learning_rate": 1.3056680161943321e-05,
1294
- "loss": 0.3176,
1295
  "step": 2100
1296
  },
1297
  {
1298
  "epoch": 3.84,
1299
- "learning_rate": 1.2854251012145749e-05,
1300
- "loss": 0.3189,
1301
  "step": 2110
1302
  },
1303
  {
1304
  "epoch": 3.86,
1305
- "learning_rate": 1.265182186234818e-05,
1306
- "loss": 0.3663,
1307
  "step": 2120
1308
  },
1309
  {
1310
  "epoch": 3.88,
1311
- "learning_rate": 1.2449392712550607e-05,
1312
- "loss": 0.3105,
1313
  "step": 2130
1314
  },
1315
  {
1316
  "epoch": 3.9,
1317
- "learning_rate": 1.2246963562753037e-05,
1318
- "loss": 0.3479,
1319
  "step": 2140
1320
  },
1321
  {
1322
  "epoch": 3.91,
1323
- "learning_rate": 1.2044534412955466e-05,
1324
- "loss": 0.3301,
1325
  "step": 2150
1326
  },
1327
  {
1328
  "epoch": 3.93,
1329
- "learning_rate": 1.1842105263157895e-05,
1330
- "loss": 0.3344,
1331
  "step": 2160
1332
  },
1333
  {
1334
  "epoch": 3.95,
1335
- "learning_rate": 1.1639676113360324e-05,
1336
- "loss": 0.3169,
1337
  "step": 2170
1338
  },
1339
  {
1340
  "epoch": 3.97,
1341
- "learning_rate": 1.1437246963562753e-05,
1342
- "loss": 0.3486,
1343
  "step": 2180
1344
  },
1345
  {
1346
  "epoch": 3.99,
1347
- "learning_rate": 1.1234817813765184e-05,
1348
- "loss": 0.3551,
1349
  "step": 2190
1350
  },
1351
  {
1352
  "epoch": 4.0,
1353
- "eval_accuracy": 0.9622695196904165,
1354
- "eval_loss": 0.14936527609825134,
1355
- "eval_runtime": 112.1506,
1356
- "eval_samples_per_second": 156.682,
1357
- "eval_steps_per_second": 4.904,
1358
  "step": 2197
1359
  },
1360
  {
1361
  "epoch": 4.01,
1362
- "learning_rate": 1.1032388663967611e-05,
1363
- "loss": 0.3307,
1364
  "step": 2200
1365
  },
1366
  {
1367
  "epoch": 4.02,
1368
- "learning_rate": 1.082995951417004e-05,
1369
- "loss": 0.3035,
1370
  "step": 2210
1371
  },
1372
  {
1373
  "epoch": 4.04,
1374
- "learning_rate": 1.062753036437247e-05,
1375
- "loss": 0.3192,
1376
  "step": 2220
1377
  },
1378
  {
1379
  "epoch": 4.06,
1380
- "learning_rate": 1.04251012145749e-05,
1381
- "loss": 0.3357,
1382
  "step": 2230
1383
  },
1384
  {
1385
  "epoch": 4.08,
1386
- "learning_rate": 1.0222672064777327e-05,
1387
- "loss": 0.3081,
1388
  "step": 2240
1389
  },
1390
  {
1391
  "epoch": 4.1,
1392
- "learning_rate": 1.0020242914979758e-05,
1393
- "loss": 0.3343,
1394
  "step": 2250
1395
  },
1396
  {
1397
  "epoch": 4.11,
1398
- "learning_rate": 9.817813765182187e-06,
1399
- "loss": 0.3438,
1400
  "step": 2260
1401
  },
1402
  {
1403
  "epoch": 4.13,
1404
- "learning_rate": 9.615384615384616e-06,
1405
- "loss": 0.2939,
1406
  "step": 2270
1407
  },
1408
  {
1409
  "epoch": 4.15,
1410
- "learning_rate": 9.412955465587045e-06,
1411
- "loss": 0.2954,
1412
  "step": 2280
1413
  },
1414
  {
1415
  "epoch": 4.17,
1416
- "learning_rate": 9.210526315789474e-06,
1417
- "loss": 0.3206,
1418
  "step": 2290
1419
  },
1420
  {
1421
  "epoch": 4.19,
1422
- "learning_rate": 9.008097165991904e-06,
1423
- "loss": 0.3116,
1424
  "step": 2300
1425
  },
1426
  {
1427
  "epoch": 4.21,
1428
- "learning_rate": 8.805668016194332e-06,
1429
- "loss": 0.3201,
1430
  "step": 2310
1431
  },
1432
  {
1433
  "epoch": 4.22,
1434
- "learning_rate": 8.60323886639676e-06,
1435
- "loss": 0.3439,
1436
  "step": 2320
1437
  },
1438
  {
1439
  "epoch": 4.24,
1440
- "learning_rate": 8.400809716599191e-06,
1441
- "loss": 0.3503,
1442
  "step": 2330
1443
  },
1444
  {
1445
  "epoch": 4.26,
1446
- "learning_rate": 8.19838056680162e-06,
1447
- "loss": 0.317,
1448
  "step": 2340
1449
  },
1450
  {
1451
  "epoch": 4.28,
1452
- "learning_rate": 7.99595141700405e-06,
1453
- "loss": 0.3113,
1454
  "step": 2350
1455
  },
1456
  {
1457
  "epoch": 4.3,
1458
- "learning_rate": 7.793522267206478e-06,
1459
- "loss": 0.311,
1460
  "step": 2360
1461
  },
1462
  {
1463
  "epoch": 4.31,
1464
- "learning_rate": 7.591093117408906e-06,
1465
- "loss": 0.3083,
1466
  "step": 2370
1467
  },
1468
  {
1469
  "epoch": 4.33,
1470
- "learning_rate": 7.388663967611337e-06,
1471
- "loss": 0.3165,
1472
  "step": 2380
1473
  },
1474
  {
1475
  "epoch": 4.35,
1476
- "learning_rate": 7.186234817813765e-06,
1477
- "loss": 0.3547,
1478
  "step": 2390
1479
  },
1480
  {
1481
  "epoch": 4.37,
1482
- "learning_rate": 6.983805668016195e-06,
1483
- "loss": 0.3071,
1484
  "step": 2400
1485
  },
1486
  {
1487
  "epoch": 4.39,
1488
- "learning_rate": 6.781376518218624e-06,
1489
- "loss": 0.3028,
1490
  "step": 2410
1491
  },
1492
  {
1493
  "epoch": 4.41,
1494
- "learning_rate": 6.578947368421053e-06,
1495
- "loss": 0.3215,
1496
  "step": 2420
1497
  },
1498
  {
1499
  "epoch": 4.42,
1500
- "learning_rate": 6.376518218623483e-06,
1501
- "loss": 0.3259,
1502
  "step": 2430
1503
  },
1504
  {
1505
  "epoch": 4.44,
1506
- "learning_rate": 6.174089068825911e-06,
1507
- "loss": 0.3515,
1508
  "step": 2440
1509
  },
1510
  {
1511
  "epoch": 4.46,
1512
- "learning_rate": 5.971659919028341e-06,
1513
- "loss": 0.3158,
1514
  "step": 2450
1515
  },
1516
  {
1517
  "epoch": 4.48,
1518
- "learning_rate": 5.76923076923077e-06,
1519
- "loss": 0.3067,
1520
  "step": 2460
1521
  },
1522
  {
1523
  "epoch": 4.5,
1524
- "learning_rate": 5.566801619433199e-06,
1525
- "loss": 0.324,
1526
  "step": 2470
1527
  },
1528
  {
1529
  "epoch": 4.52,
1530
- "learning_rate": 5.364372469635628e-06,
1531
- "loss": 0.3271,
1532
  "step": 2480
1533
  },
1534
  {
1535
  "epoch": 4.53,
1536
- "learning_rate": 5.161943319838057e-06,
1537
- "loss": 0.3367,
1538
  "step": 2490
1539
  },
1540
  {
1541
  "epoch": 4.55,
1542
- "learning_rate": 4.9595141700404865e-06,
1543
- "loss": 0.3166,
1544
  "step": 2500
1545
  },
1546
  {
1547
  "epoch": 4.57,
1548
- "learning_rate": 4.757085020242915e-06,
1549
- "loss": 0.3264,
1550
  "step": 2510
1551
  },
1552
  {
1553
  "epoch": 4.59,
1554
- "learning_rate": 4.5546558704453445e-06,
1555
- "loss": 0.3129,
1556
  "step": 2520
1557
  },
1558
  {
1559
  "epoch": 4.61,
1560
- "learning_rate": 4.3522267206477735e-06,
1561
- "loss": 0.3114,
1562
  "step": 2530
1563
  },
1564
  {
1565
  "epoch": 4.62,
1566
- "learning_rate": 4.149797570850203e-06,
1567
- "loss": 0.2751,
1568
  "step": 2540
1569
  },
1570
  {
1571
  "epoch": 4.64,
1572
- "learning_rate": 3.9473684210526315e-06,
1573
- "loss": 0.3191,
1574
  "step": 2550
1575
  },
1576
  {
1577
  "epoch": 4.66,
1578
- "learning_rate": 3.744939271255061e-06,
1579
- "loss": 0.3171,
1580
  "step": 2560
1581
  },
1582
  {
1583
  "epoch": 4.68,
1584
- "learning_rate": 3.5425101214574903e-06,
1585
- "loss": 0.2981,
1586
  "step": 2570
1587
  },
1588
  {
1589
  "epoch": 4.7,
1590
- "learning_rate": 3.3400809716599197e-06,
1591
- "loss": 0.3169,
1592
  "step": 2580
1593
  },
1594
  {
1595
  "epoch": 4.72,
1596
- "learning_rate": 3.1376518218623482e-06,
1597
- "loss": 0.304,
1598
  "step": 2590
1599
  },
1600
  {
1601
  "epoch": 4.73,
1602
- "learning_rate": 2.9352226720647772e-06,
1603
- "loss": 0.3021,
1604
  "step": 2600
1605
  },
1606
  {
1607
  "epoch": 4.75,
1608
- "learning_rate": 2.7327935222672066e-06,
1609
- "loss": 0.3106,
1610
  "step": 2610
1611
  },
1612
  {
1613
  "epoch": 4.77,
1614
- "learning_rate": 2.5303643724696356e-06,
1615
- "loss": 0.3066,
1616
  "step": 2620
1617
  },
1618
  {
1619
  "epoch": 4.79,
1620
- "learning_rate": 2.327935222672065e-06,
1621
- "loss": 0.3117,
1622
  "step": 2630
1623
  },
1624
  {
1625
  "epoch": 4.81,
1626
- "learning_rate": 2.125506072874494e-06,
1627
- "loss": 0.3079,
1628
  "step": 2640
1629
  },
1630
  {
1631
  "epoch": 4.82,
1632
- "learning_rate": 1.9230769230769234e-06,
1633
- "loss": 0.3316,
1634
  "step": 2650
1635
  },
1636
  {
1637
  "epoch": 4.84,
1638
- "learning_rate": 1.7206477732793522e-06,
1639
- "loss": 0.3103,
1640
  "step": 2660
1641
  },
1642
  {
1643
  "epoch": 4.86,
1644
- "learning_rate": 1.5182186234817814e-06,
1645
- "loss": 0.3034,
1646
  "step": 2670
1647
  },
1648
  {
1649
  "epoch": 4.88,
1650
- "learning_rate": 1.3157894736842106e-06,
1651
- "loss": 0.294,
1652
  "step": 2680
1653
  },
1654
  {
1655
  "epoch": 4.9,
1656
- "learning_rate": 1.1133603238866398e-06,
1657
- "loss": 0.3151,
1658
  "step": 2690
1659
  },
1660
  {
1661
  "epoch": 4.92,
1662
- "learning_rate": 9.109311740890688e-07,
1663
- "loss": 0.3544,
1664
  "step": 2700
1665
  },
1666
  {
1667
  "epoch": 4.93,
1668
- "learning_rate": 7.08502024291498e-07,
1669
- "loss": 0.3365,
1670
  "step": 2710
1671
  },
1672
  {
1673
  "epoch": 4.95,
1674
- "learning_rate": 5.060728744939271e-07,
1675
- "loss": 0.3181,
1676
  "step": 2720
1677
  },
1678
  {
1679
  "epoch": 4.97,
1680
- "learning_rate": 3.036437246963563e-07,
1681
- "loss": 0.303,
1682
  "step": 2730
1683
  },
1684
  {
1685
  "epoch": 4.99,
1686
- "learning_rate": 1.0121457489878543e-07,
1687
- "loss": 0.3174,
1688
  "step": 2740
1689
  },
1690
  {
1691
  "epoch": 5.0,
1692
- "eval_accuracy": 0.9624402458456636,
1693
- "eval_loss": 0.1446385681629181,
1694
- "eval_runtime": 113.1571,
1695
- "eval_samples_per_second": 155.288,
1696
- "eval_steps_per_second": 4.86,
1697
  "step": 2745
1698
  },
1699
  {
1700
  "epoch": 5.0,
1701
  "step": 2745,
1702
- "total_flos": 7.483912529986142e+18,
1703
- "train_loss": 1.0602939221811207,
1704
- "train_runtime": 5306.8618,
1705
- "train_samples_per_second": 66.23,
1706
- "train_steps_per_second": 0.517
1707
  }
1708
  ],
1709
  "max_steps": 2745,
1710
  "num_train_epochs": 5,
1711
- "total_flos": 7.483912529986142e+18,
1712
  "trial_name": null,
1713
  "trial_params": null
1714
  }
 
1
  {
2
+ "best_metric": 0.9641475073981334,
3
+ "best_model_checkpoint": "resnet-50-finetuned-eurosat/checkpoint-1098",
4
  "epoch": 4.997724169321803,
5
  "global_step": 2745,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.02,
12
+ "learning_rate": 0.0,
13
+ "loss": 0.3223,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.04,
18
+ "learning_rate": 0.0,
19
+ "loss": 0.3467,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.05,
24
+ "learning_rate": 0.0,
25
+ "loss": 0.2952,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.07,
30
+ "learning_rate": 0.0,
31
+ "loss": 0.3184,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.09,
36
+ "learning_rate": 0.0,
37
+ "loss": 0.3607,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.11,
42
+ "learning_rate": 0.0,
43
+ "loss": 0.2938,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.13,
48
+ "learning_rate": 0.0,
49
+ "loss": 0.3201,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.15,
54
+ "learning_rate": 0.0,
55
+ "loss": 0.307,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.16,
60
+ "learning_rate": 0.0,
61
+ "loss": 0.3133,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.18,
66
+ "learning_rate": 0.0,
67
+ "loss": 0.295,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.2,
72
+ "learning_rate": 0.0,
73
+ "loss": 0.3321,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.22,
78
+ "learning_rate": 0.0,
79
+ "loss": 0.3218,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.24,
84
+ "learning_rate": 0.0,
85
+ "loss": 0.3313,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.25,
90
+ "learning_rate": 0.0,
91
+ "loss": 0.3518,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.27,
96
+ "learning_rate": 0.0,
97
+ "loss": 0.319,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.29,
102
+ "learning_rate": 0.0,
103
+ "loss": 0.318,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.31,
108
+ "learning_rate": 0.0,
109
+ "loss": 0.3051,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.33,
114
+ "learning_rate": 0.0,
115
+ "loss": 0.304,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.35,
120
+ "learning_rate": 0.0,
121
+ "loss": 0.3102,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.36,
126
+ "learning_rate": 0.0,
127
+ "loss": 0.2951,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.38,
132
+ "learning_rate": 0.0,
133
+ "loss": 0.3074,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.4,
138
+ "learning_rate": 0.0,
139
+ "loss": 0.3245,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.42,
144
+ "learning_rate": 0.0,
145
+ "loss": 0.3138,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.44,
150
+ "learning_rate": 0.0,
151
+ "loss": 0.3479,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.46,
156
+ "learning_rate": 0.0,
157
+ "loss": 0.344,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.47,
162
+ "learning_rate": 0.0,
163
+ "loss": 0.3239,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.49,
168
+ "learning_rate": 0.0,
169
+ "loss": 0.3223,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.51,
174
+ "learning_rate": 0.0,
175
+ "loss": 0.3292,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.53,
180
+ "learning_rate": 0.0,
181
+ "loss": 0.2866,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.55,
186
+ "learning_rate": 0.0,
187
+ "loss": 0.3019,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.56,
192
+ "learning_rate": 0.0,
193
+ "loss": 0.3334,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.58,
198
+ "learning_rate": 0.0,
199
+ "loss": 0.2834,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.6,
204
+ "learning_rate": 0.0,
205
+ "loss": 0.3198,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.62,
210
+ "learning_rate": 0.0,
211
+ "loss": 0.3173,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.64,
216
+ "learning_rate": 0.0,
217
+ "loss": 0.3495,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.66,
222
+ "learning_rate": 0.0,
223
+ "loss": 0.3267,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.67,
228
+ "learning_rate": 0.0,
229
+ "loss": 0.3229,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.69,
234
+ "learning_rate": 0.0,
235
+ "loss": 0.3154,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 0.71,
240
+ "learning_rate": 0.0,
241
+ "loss": 0.2958,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 0.73,
246
+ "learning_rate": 0.0,
247
+ "loss": 0.3068,
248
  "step": 400
249
  },
250
  {
251
  "epoch": 0.75,
252
+ "learning_rate": 0.0,
253
+ "loss": 0.3443,
254
  "step": 410
255
  },
256
  {
257
  "epoch": 0.76,
258
+ "learning_rate": 0.0,
259
+ "loss": 0.3371,
260
  "step": 420
261
  },
262
  {
263
  "epoch": 0.78,
264
+ "learning_rate": 0.0,
265
+ "loss": 0.3311,
266
  "step": 430
267
  },
268
  {
269
  "epoch": 0.8,
270
+ "learning_rate": 0.0,
271
+ "loss": 0.3345,
272
  "step": 440
273
  },
274
  {
275
  "epoch": 0.82,
276
+ "learning_rate": 0.0,
277
+ "loss": 0.2798,
278
  "step": 450
279
  },
280
  {
281
  "epoch": 0.84,
282
+ "learning_rate": 0.0,
283
+ "loss": 0.2834,
284
  "step": 460
285
  },
286
  {
287
  "epoch": 0.86,
288
+ "learning_rate": 0.0,
289
+ "loss": 0.3295,
290
  "step": 470
291
  },
292
  {
293
  "epoch": 0.87,
294
+ "learning_rate": 0.0,
295
+ "loss": 0.3176,
296
  "step": 480
297
  },
298
  {
299
  "epoch": 0.89,
300
+ "learning_rate": 0.0,
301
+ "loss": 0.281,
302
  "step": 490
303
  },
304
  {
305
  "epoch": 0.91,
306
+ "learning_rate": 0.0,
307
+ "loss": 0.3066,
308
  "step": 500
309
  },
310
  {
311
  "epoch": 0.93,
312
+ "learning_rate": 0.0,
313
+ "loss": 0.2898,
314
  "step": 510
315
  },
316
  {
317
  "epoch": 0.95,
318
+ "learning_rate": 0.0,
319
+ "loss": 0.307,
320
  "step": 520
321
  },
322
  {
323
  "epoch": 0.96,
324
+ "learning_rate": 0.0,
325
+ "loss": 0.3126,
326
  "step": 530
327
  },
328
  {
329
  "epoch": 0.98,
330
+ "learning_rate": 0.0,
331
+ "loss": 0.2976,
332
  "step": 540
333
  },
334
  {
335
  "epoch": 1.0,
336
+ "eval_accuracy": 0.9635784202139768,
337
+ "eval_loss": 0.1450216919183731,
338
+ "eval_runtime": 111.4659,
339
+ "eval_samples_per_second": 157.645,
340
+ "eval_steps_per_second": 4.934,
341
  "step": 549
342
  },
343
  {
344
  "epoch": 1.0,
345
+ "learning_rate": 0.0,
346
+ "loss": 0.3563,
347
  "step": 550
348
  },
349
  {
350
  "epoch": 1.02,
351
+ "learning_rate": 0.0,
352
+ "loss": 0.3112,
353
  "step": 560
354
  },
355
  {
356
  "epoch": 1.04,
357
+ "learning_rate": 0.0,
358
+ "loss": 0.3078,
359
  "step": 570
360
  },
361
  {
362
  "epoch": 1.06,
363
+ "learning_rate": 0.0,
364
+ "loss": 0.2967,
365
  "step": 580
366
  },
367
  {
368
  "epoch": 1.07,
369
+ "learning_rate": 0.0,
370
+ "loss": 0.2955,
371
  "step": 590
372
  },
373
  {
374
  "epoch": 1.09,
375
+ "learning_rate": 0.0,
376
+ "loss": 0.3216,
377
  "step": 600
378
  },
379
  {
380
  "epoch": 1.11,
381
+ "learning_rate": 0.0,
382
+ "loss": 0.3227,
383
  "step": 610
384
  },
385
  {
386
  "epoch": 1.13,
387
+ "learning_rate": 0.0,
388
+ "loss": 0.3164,
389
  "step": 620
390
  },
391
  {
392
  "epoch": 1.15,
393
+ "learning_rate": 0.0,
394
+ "loss": 0.3181,
395
  "step": 630
396
  },
397
  {
398
  "epoch": 1.17,
399
+ "learning_rate": 0.0,
400
+ "loss": 0.2877,
401
  "step": 640
402
  },
403
  {
404
  "epoch": 1.18,
405
+ "learning_rate": 0.0,
406
+ "loss": 0.2977,
407
  "step": 650
408
  },
409
  {
410
  "epoch": 1.2,
411
+ "learning_rate": 0.0,
412
+ "loss": 0.3257,
413
  "step": 660
414
  },
415
  {
416
  "epoch": 1.22,
417
+ "learning_rate": 0.0,
418
+ "loss": 0.3415,
419
  "step": 670
420
  },
421
  {
422
  "epoch": 1.24,
423
+ "learning_rate": 0.0,
424
+ "loss": 0.3257,
425
  "step": 680
426
  },
427
  {
428
  "epoch": 1.26,
429
+ "learning_rate": 0.0,
430
+ "loss": 0.3145,
431
  "step": 690
432
  },
433
  {
434
  "epoch": 1.27,
435
+ "learning_rate": 0.0,
436
+ "loss": 0.3626,
437
  "step": 700
438
  },
439
  {
440
  "epoch": 1.29,
441
+ "learning_rate": 0.0,
442
+ "loss": 0.3251,
443
  "step": 710
444
  },
445
  {
446
  "epoch": 1.31,
447
+ "learning_rate": 0.0,
448
+ "loss": 0.3272,
449
  "step": 720
450
  },
451
  {
452
  "epoch": 1.33,
453
+ "learning_rate": 0.0,
454
+ "loss": 0.315,
455
  "step": 730
456
  },
457
  {
458
  "epoch": 1.35,
459
+ "learning_rate": 0.0,
460
+ "loss": 0.318,
461
  "step": 740
462
  },
463
  {
464
  "epoch": 1.37,
465
+ "learning_rate": 0.0,
466
+ "loss": 0.3067,
467
  "step": 750
468
  },
469
  {
470
  "epoch": 1.38,
471
+ "learning_rate": 0.0,
472
+ "loss": 0.2829,
473
  "step": 760
474
  },
475
  {
476
  "epoch": 1.4,
477
+ "learning_rate": 0.0,
478
+ "loss": 0.2894,
479
  "step": 770
480
  },
481
  {
482
  "epoch": 1.42,
483
+ "learning_rate": 0.0,
484
+ "loss": 0.3045,
485
  "step": 780
486
  },
487
  {
488
  "epoch": 1.44,
489
+ "learning_rate": 0.0,
490
+ "loss": 0.3003,
491
  "step": 790
492
  },
493
  {
494
  "epoch": 1.46,
495
+ "learning_rate": 0.0,
496
+ "loss": 0.2724,
497
  "step": 800
498
  },
499
  {
500
  "epoch": 1.47,
501
+ "learning_rate": 0.0,
502
+ "loss": 0.2818,
503
  "step": 810
504
  },
505
  {
506
  "epoch": 1.49,
507
+ "learning_rate": 0.0,
508
+ "loss": 0.2925,
509
  "step": 820
510
  },
511
  {
512
  "epoch": 1.51,
513
+ "learning_rate": 0.0,
514
+ "loss": 0.3095,
515
  "step": 830
516
  },
517
  {
518
  "epoch": 1.53,
519
+ "learning_rate": 0.0,
520
+ "loss": 0.3079,
521
  "step": 840
522
  },
523
  {
524
  "epoch": 1.55,
525
+ "learning_rate": 0.0,
526
+ "loss": 0.3226,
527
  "step": 850
528
  },
529
  {
530
  "epoch": 1.57,
531
+ "learning_rate": 0.0,
532
+ "loss": 0.3346,
533
  "step": 860
534
  },
535
  {
536
  "epoch": 1.58,
537
+ "learning_rate": 0.0,
538
+ "loss": 0.3343,
539
  "step": 870
540
  },
541
  {
542
  "epoch": 1.6,
543
+ "learning_rate": 0.0,
544
+ "loss": 0.2956,
545
  "step": 880
546
  },
547
  {
548
  "epoch": 1.62,
549
+ "learning_rate": 0.0,
550
+ "loss": 0.3006,
551
  "step": 890
552
  },
553
  {
554
  "epoch": 1.64,
555
+ "learning_rate": 0.0,
556
+ "loss": 0.3165,
557
  "step": 900
558
  },
559
  {
560
  "epoch": 1.66,
561
+ "learning_rate": 0.0,
562
+ "loss": 0.3185,
563
  "step": 910
564
  },
565
  {
566
  "epoch": 1.68,
567
+ "learning_rate": 0.0,
568
+ "loss": 0.2932,
569
  "step": 920
570
  },
571
  {
572
  "epoch": 1.69,
573
+ "learning_rate": 0.0,
574
+ "loss": 0.3233,
575
  "step": 930
576
  },
577
  {
578
  "epoch": 1.71,
579
+ "learning_rate": 0.0,
580
+ "loss": 0.2804,
581
  "step": 940
582
  },
583
  {
584
  "epoch": 1.73,
585
+ "learning_rate": 0.0,
586
+ "loss": 0.3244,
587
  "step": 950
588
  },
589
  {
590
  "epoch": 1.75,
591
+ "learning_rate": 0.0,
592
+ "loss": 0.3166,
593
  "step": 960
594
  },
595
  {
596
  "epoch": 1.77,
597
+ "learning_rate": 0.0,
598
+ "loss": 0.2889,
599
  "step": 970
600
  },
601
  {
602
  "epoch": 1.78,
603
+ "learning_rate": 0.0,
604
+ "loss": 0.3084,
605
  "step": 980
606
  },
607
  {
608
  "epoch": 1.8,
609
+ "learning_rate": 0.0,
610
+ "loss": 0.3083,
611
  "step": 990
612
  },
613
  {
614
  "epoch": 1.82,
615
+ "learning_rate": 0.0,
616
+ "loss": 0.3597,
617
  "step": 1000
618
  },
619
  {
620
  "epoch": 1.84,
621
+ "learning_rate": 0.0,
622
+ "loss": 0.3192,
623
  "step": 1010
624
  },
625
  {
626
  "epoch": 1.86,
627
+ "learning_rate": 0.0,
628
+ "loss": 0.3044,
629
  "step": 1020
630
  },
631
  {
632
  "epoch": 1.88,
633
+ "learning_rate": 0.0,
634
+ "loss": 0.3149,
635
  "step": 1030
636
  },
637
  {
638
  "epoch": 1.89,
639
+ "learning_rate": 0.0,
640
+ "loss": 0.3192,
641
  "step": 1040
642
  },
643
  {
644
  "epoch": 1.91,
645
+ "learning_rate": 0.0,
646
+ "loss": 0.2853,
647
  "step": 1050
648
  },
649
  {
650
  "epoch": 1.93,
651
+ "learning_rate": 0.0,
652
+ "loss": 0.3255,
653
  "step": 1060
654
  },
655
  {
656
  "epoch": 1.95,
657
+ "learning_rate": 0.0,
658
+ "loss": 0.3079,
659
  "step": 1070
660
  },
661
  {
662
  "epoch": 1.97,
663
+ "learning_rate": 0.0,
664
+ "loss": 0.2867,
665
  "step": 1080
666
  },
667
  {
668
  "epoch": 1.98,
669
+ "learning_rate": 0.0,
670
+ "loss": 0.3388,
671
  "step": 1090
672
  },
673
  {
674
  "epoch": 2.0,
675
+ "eval_accuracy": 0.9641475073981334,
676
+ "eval_loss": 0.1382417380809784,
677
+ "eval_runtime": 112.0412,
678
+ "eval_samples_per_second": 156.835,
679
+ "eval_steps_per_second": 4.909,
680
  "step": 1098
681
  },
682
  {
683
  "epoch": 2.0,
684
+ "learning_rate": 0.0,
685
+ "loss": 0.3561,
686
  "step": 1100
687
  },
688
  {
689
  "epoch": 2.02,
690
+ "learning_rate": 0.0,
691
+ "loss": 0.296,
692
  "step": 1110
693
  },
694
  {
695
  "epoch": 2.04,
696
+ "learning_rate": 0.0,
697
+ "loss": 0.2846,
698
  "step": 1120
699
  },
700
  {
701
  "epoch": 2.06,
702
+ "learning_rate": 0.0,
703
+ "loss": 0.3364,
704
  "step": 1130
705
  },
706
  {
707
  "epoch": 2.08,
708
+ "learning_rate": 0.0,
709
+ "loss": 0.3638,
710
  "step": 1140
711
  },
712
  {
713
  "epoch": 2.09,
714
+ "learning_rate": 0.0,
715
+ "loss": 0.3259,
716
  "step": 1150
717
  },
718
  {
719
  "epoch": 2.11,
720
+ "learning_rate": 0.0,
721
+ "loss": 0.316,
722
  "step": 1160
723
  },
724
  {
725
  "epoch": 2.13,
726
+ "learning_rate": 0.0,
727
+ "loss": 0.3298,
728
  "step": 1170
729
  },
730
  {
731
  "epoch": 2.15,
732
+ "learning_rate": 0.0,
733
+ "loss": 0.3135,
734
  "step": 1180
735
  },
736
  {
737
  "epoch": 2.17,
738
+ "learning_rate": 0.0,
739
+ "loss": 0.3031,
740
  "step": 1190
741
  },
742
  {
743
  "epoch": 2.18,
744
+ "learning_rate": 0.0,
745
+ "loss": 0.2982,
746
  "step": 1200
747
  },
748
  {
749
  "epoch": 2.2,
750
+ "learning_rate": 0.0,
751
+ "loss": 0.3074,
752
  "step": 1210
753
  },
754
  {
755
  "epoch": 2.22,
756
+ "learning_rate": 0.0,
757
+ "loss": 0.3465,
758
  "step": 1220
759
  },
760
  {
761
  "epoch": 2.24,
762
+ "learning_rate": 0.0,
763
+ "loss": 0.3148,
764
  "step": 1230
765
  },
766
  {
767
  "epoch": 2.26,
768
+ "learning_rate": 0.0,
769
+ "loss": 0.3406,
770
  "step": 1240
771
  },
772
  {
773
  "epoch": 2.28,
774
+ "learning_rate": 0.0,
775
+ "loss": 0.2878,
776
  "step": 1250
777
  },
778
  {
779
  "epoch": 2.29,
780
+ "learning_rate": 0.0,
781
+ "loss": 0.3026,
782
  "step": 1260
783
  },
784
  {
785
  "epoch": 2.31,
786
+ "learning_rate": 0.0,
787
+ "loss": 0.3189,
788
  "step": 1270
789
  },
790
  {
791
  "epoch": 2.33,
792
+ "learning_rate": 0.0,
793
+ "loss": 0.312,
794
  "step": 1280
795
  },
796
  {
797
  "epoch": 2.35,
798
+ "learning_rate": 0.0,
799
+ "loss": 0.3393,
800
  "step": 1290
801
  },
802
  {
803
  "epoch": 2.37,
804
+ "learning_rate": 0.0,
805
+ "loss": 0.3389,
806
  "step": 1300
807
  },
808
  {
809
  "epoch": 2.39,
810
+ "learning_rate": 0.0,
811
+ "loss": 0.3073,
812
  "step": 1310
813
  },
814
  {
815
  "epoch": 2.4,
816
+ "learning_rate": 0.0,
817
+ "loss": 0.3525,
818
  "step": 1320
819
  },
820
  {
821
  "epoch": 2.42,
822
+ "learning_rate": 0.0,
823
+ "loss": 0.2958,
824
  "step": 1330
825
  },
826
  {
827
  "epoch": 2.44,
828
+ "learning_rate": 0.0,
829
+ "loss": 0.2998,
830
  "step": 1340
831
  },
832
  {
833
  "epoch": 2.46,
834
+ "learning_rate": 0.0,
835
+ "loss": 0.2992,
836
  "step": 1350
837
  },
838
  {
839
  "epoch": 2.48,
840
+ "learning_rate": 0.0,
841
+ "loss": 0.314,
842
  "step": 1360
843
  },
844
  {
845
  "epoch": 2.49,
846
+ "learning_rate": 0.0,
847
+ "loss": 0.3128,
848
  "step": 1370
849
  },
850
  {
851
  "epoch": 2.51,
852
+ "learning_rate": 0.0,
853
+ "loss": 0.3074,
854
  "step": 1380
855
  },
856
  {
857
  "epoch": 2.53,
858
+ "learning_rate": 0.0,
859
+ "loss": 0.3189,
860
  "step": 1390
861
  },
862
  {
863
  "epoch": 2.55,
864
+ "learning_rate": 0.0,
865
+ "loss": 0.3284,
866
  "step": 1400
867
  },
868
  {
869
  "epoch": 2.57,
870
+ "learning_rate": 0.0,
871
+ "loss": 0.3278,
872
  "step": 1410
873
  },
874
  {
875
  "epoch": 2.59,
876
+ "learning_rate": 0.0,
877
+ "loss": 0.2897,
878
  "step": 1420
879
  },
880
  {
881
  "epoch": 2.6,
882
+ "learning_rate": 0.0,
883
+ "loss": 0.3472,
884
  "step": 1430
885
  },
886
  {
887
  "epoch": 2.62,
888
+ "learning_rate": 0.0,
889
+ "loss": 0.3571,
890
  "step": 1440
891
  },
892
  {
893
  "epoch": 2.64,
894
+ "learning_rate": 0.0,
895
+ "loss": 0.3078,
896
  "step": 1450
897
  },
898
  {
899
  "epoch": 2.66,
900
+ "learning_rate": 0.0,
901
+ "loss": 0.3512,
902
  "step": 1460
903
  },
904
  {
905
  "epoch": 2.68,
906
+ "learning_rate": 0.0,
907
+ "loss": 0.3017,
908
  "step": 1470
909
  },
910
  {
911
  "epoch": 2.69,
912
+ "learning_rate": 0.0,
913
+ "loss": 0.302,
914
  "step": 1480
915
  },
916
  {
917
  "epoch": 2.71,
918
+ "learning_rate": 0.0,
919
+ "loss": 0.3281,
920
  "step": 1490
921
  },
922
  {
923
  "epoch": 2.73,
924
+ "learning_rate": 0.0,
925
+ "loss": 0.3511,
926
  "step": 1500
927
  },
928
  {
929
  "epoch": 2.75,
930
+ "learning_rate": 0.0,
931
+ "loss": 0.3088,
932
  "step": 1510
933
  },
934
  {
935
  "epoch": 2.77,
936
+ "learning_rate": 0.0,
937
+ "loss": 0.2802,
938
  "step": 1520
939
  },
940
  {
941
  "epoch": 2.79,
942
+ "learning_rate": 0.0,
943
+ "loss": 0.304,
944
  "step": 1530
945
  },
946
  {
947
  "epoch": 2.8,
948
+ "learning_rate": 0.0,
949
+ "loss": 0.3119,
950
  "step": 1540
951
  },
952
  {
953
  "epoch": 2.82,
954
+ "learning_rate": 0.0,
955
+ "loss": 0.3194,
956
  "step": 1550
957
  },
958
  {
959
  "epoch": 2.84,
960
+ "learning_rate": 0.0,
961
+ "loss": 0.3028,
962
  "step": 1560
963
  },
964
  {
965
  "epoch": 2.86,
966
+ "learning_rate": 0.0,
967
+ "loss": 0.2849,
968
  "step": 1570
969
  },
970
  {
971
  "epoch": 2.88,
972
+ "learning_rate": 0.0,
973
+ "loss": 0.2897,
974
  "step": 1580
975
  },
976
  {
977
  "epoch": 2.89,
978
+ "learning_rate": 0.0,
979
+ "loss": 0.309,
980
  "step": 1590
981
  },
982
  {
983
  "epoch": 2.91,
984
+ "learning_rate": 0.0,
985
+ "loss": 0.3223,
986
  "step": 1600
987
  },
988
  {
989
  "epoch": 2.93,
990
+ "learning_rate": 0.0,
991
+ "loss": 0.3157,
992
  "step": 1610
993
  },
994
  {
995
  "epoch": 2.95,
996
+ "learning_rate": 0.0,
997
+ "loss": 0.3112,
998
  "step": 1620
999
  },
1000
  {
1001
  "epoch": 2.97,
1002
+ "learning_rate": 0.0,
1003
+ "loss": 0.2911,
1004
  "step": 1630
1005
  },
1006
  {
1007
  "epoch": 2.99,
1008
+ "learning_rate": 0.0,
1009
+ "loss": 0.361,
1010
  "step": 1640
1011
  },
1012
  {
1013
  "epoch": 3.0,
1014
+ "eval_accuracy": 0.9631800591850671,
1015
+ "eval_loss": 0.14322593808174133,
1016
+ "eval_runtime": 112.3273,
1017
+ "eval_samples_per_second": 156.436,
1018
+ "eval_steps_per_second": 4.896,
1019
  "step": 1647
1020
  },
1021
  {
1022
  "epoch": 3.0,
1023
+ "learning_rate": 0.0,
1024
+ "loss": 0.3124,
1025
  "step": 1650
1026
  },
1027
  {
1028
  "epoch": 3.02,
1029
+ "learning_rate": 0.0,
1030
+ "loss": 0.3007,
1031
  "step": 1660
1032
  },
1033
  {
1034
  "epoch": 3.04,
1035
+ "learning_rate": 0.0,
1036
+ "loss": 0.3119,
1037
  "step": 1670
1038
  },
1039
  {
1040
  "epoch": 3.06,
1041
+ "learning_rate": 0.0,
1042
+ "loss": 0.2744,
1043
  "step": 1680
1044
  },
1045
  {
1046
  "epoch": 3.08,
1047
+ "learning_rate": 0.0,
1048
+ "loss": 0.3164,
1049
  "step": 1690
1050
  },
1051
  {
1052
  "epoch": 3.1,
1053
+ "learning_rate": 0.0,
1054
+ "loss": 0.336,
1055
  "step": 1700
1056
  },
1057
  {
1058
  "epoch": 3.11,
1059
+ "learning_rate": 0.0,
1060
+ "loss": 0.3423,
1061
  "step": 1710
1062
  },
1063
  {
1064
  "epoch": 3.13,
1065
+ "learning_rate": 0.0,
1066
+ "loss": 0.347,
1067
  "step": 1720
1068
  },
1069
  {
1070
  "epoch": 3.15,
1071
+ "learning_rate": 0.0,
1072
+ "loss": 0.3064,
1073
  "step": 1730
1074
  },
1075
  {
1076
  "epoch": 3.17,
1077
+ "learning_rate": 0.0,
1078
+ "loss": 0.3241,
1079
  "step": 1740
1080
  },
1081
  {
1082
  "epoch": 3.19,
1083
+ "learning_rate": 0.0,
1084
+ "loss": 0.3381,
1085
  "step": 1750
1086
  },
1087
  {
1088
  "epoch": 3.2,
1089
+ "learning_rate": 0.0,
1090
+ "loss": 0.2719,
1091
  "step": 1760
1092
  },
1093
  {
1094
  "epoch": 3.22,
1095
+ "learning_rate": 0.0,
1096
+ "loss": 0.3141,
1097
  "step": 1770
1098
  },
1099
  {
1100
  "epoch": 3.24,
1101
+ "learning_rate": 0.0,
1102
+ "loss": 0.3369,
1103
  "step": 1780
1104
  },
1105
  {
1106
  "epoch": 3.26,
1107
+ "learning_rate": 0.0,
1108
+ "loss": 0.2892,
1109
  "step": 1790
1110
  },
1111
  {
1112
  "epoch": 3.28,
1113
+ "learning_rate": 0.0,
1114
+ "loss": 0.3228,
1115
  "step": 1800
1116
  },
1117
  {
1118
  "epoch": 3.3,
1119
+ "learning_rate": 0.0,
1120
+ "loss": 0.3339,
1121
  "step": 1810
1122
  },
1123
  {
1124
  "epoch": 3.31,
1125
+ "learning_rate": 0.0,
1126
+ "loss": 0.2774,
1127
  "step": 1820
1128
  },
1129
  {
1130
  "epoch": 3.33,
1131
+ "learning_rate": 0.0,
1132
+ "loss": 0.3103,
1133
  "step": 1830
1134
  },
1135
  {
1136
  "epoch": 3.35,
1137
+ "learning_rate": 0.0,
1138
+ "loss": 0.2864,
1139
  "step": 1840
1140
  },
1141
  {
1142
  "epoch": 3.37,
1143
+ "learning_rate": 0.0,
1144
+ "loss": 0.3264,
1145
  "step": 1850
1146
  },
1147
  {
1148
  "epoch": 3.39,
1149
+ "learning_rate": 0.0,
1150
+ "loss": 0.3145,
1151
  "step": 1860
1152
  },
1153
  {
1154
  "epoch": 3.4,
1155
+ "learning_rate": 0.0,
1156
+ "loss": 0.3559,
1157
  "step": 1870
1158
  },
1159
  {
1160
  "epoch": 3.42,
1161
+ "learning_rate": 0.0,
1162
+ "loss": 0.3365,
1163
  "step": 1880
1164
  },
1165
  {
1166
  "epoch": 3.44,
1167
+ "learning_rate": 0.0,
1168
+ "loss": 0.2965,
1169
  "step": 1890
1170
  },
1171
  {
1172
  "epoch": 3.46,
1173
+ "learning_rate": 0.0,
1174
+ "loss": 0.3412,
1175
  "step": 1900
1176
  },
1177
  {
1178
  "epoch": 3.48,
1179
+ "learning_rate": 0.0,
1180
+ "loss": 0.3237,
1181
  "step": 1910
1182
  },
1183
  {
1184
  "epoch": 3.5,
1185
+ "learning_rate": 0.0,
1186
+ "loss": 0.3052,
1187
  "step": 1920
1188
  },
1189
  {
1190
  "epoch": 3.51,
1191
+ "learning_rate": 0.0,
1192
+ "loss": 0.296,
1193
  "step": 1930
1194
  },
1195
  {
1196
  "epoch": 3.53,
1197
+ "learning_rate": 0.0,
1198
+ "loss": 0.3165,
1199
  "step": 1940
1200
  },
1201
  {
1202
  "epoch": 3.55,
1203
+ "learning_rate": 0.0,
1204
+ "loss": 0.3097,
1205
  "step": 1950
1206
  },
1207
  {
1208
  "epoch": 3.57,
1209
+ "learning_rate": 0.0,
1210
+ "loss": 0.3141,
1211
  "step": 1960
1212
  },
1213
  {
1214
  "epoch": 3.59,
1215
+ "learning_rate": 0.0,
1216
+ "loss": 0.3165,
1217
  "step": 1970
1218
  },
1219
  {
1220
  "epoch": 3.6,
1221
+ "learning_rate": 0.0,
1222
+ "loss": 0.3504,
1223
  "step": 1980
1224
  },
1225
  {
1226
  "epoch": 3.62,
1227
+ "learning_rate": 0.0,
1228
+ "loss": 0.3353,
1229
  "step": 1990
1230
  },
1231
  {
1232
  "epoch": 3.64,
1233
+ "learning_rate": 0.0,
1234
+ "loss": 0.3096,
1235
  "step": 2000
1236
  },
1237
  {
1238
  "epoch": 3.66,
1239
+ "learning_rate": 0.0,
1240
+ "loss": 0.3167,
1241
  "step": 2010
1242
  },
1243
  {
1244
  "epoch": 3.68,
1245
+ "learning_rate": 0.0,
1246
+ "loss": 0.2839,
1247
  "step": 2020
1248
  },
1249
  {
1250
  "epoch": 3.7,
1251
+ "learning_rate": 0.0,
1252
+ "loss": 0.2972,
1253
  "step": 2030
1254
  },
1255
  {
1256
  "epoch": 3.71,
1257
+ "learning_rate": 0.0,
1258
+ "loss": 0.3193,
1259
  "step": 2040
1260
  },
1261
  {
1262
  "epoch": 3.73,
1263
+ "learning_rate": 0.0,
1264
+ "loss": 0.2793,
1265
  "step": 2050
1266
  },
1267
  {
1268
  "epoch": 3.75,
1269
+ "learning_rate": 0.0,
1270
+ "loss": 0.3599,
1271
  "step": 2060
1272
  },
1273
  {
1274
  "epoch": 3.77,
1275
+ "learning_rate": 0.0,
1276
+ "loss": 0.3134,
1277
  "step": 2070
1278
  },
1279
  {
1280
  "epoch": 3.79,
1281
+ "learning_rate": 0.0,
1282
+ "loss": 0.3017,
1283
  "step": 2080
1284
  },
1285
  {
1286
  "epoch": 3.81,
1287
+ "learning_rate": 0.0,
1288
+ "loss": 0.3107,
1289
  "step": 2090
1290
  },
1291
  {
1292
  "epoch": 3.82,
1293
+ "learning_rate": 0.0,
1294
+ "loss": 0.3402,
1295
  "step": 2100
1296
  },
1297
  {
1298
  "epoch": 3.84,
1299
+ "learning_rate": 0.0,
1300
+ "loss": 0.3046,
1301
  "step": 2110
1302
  },
1303
  {
1304
  "epoch": 3.86,
1305
+ "learning_rate": 0.0,
1306
+ "loss": 0.3675,
1307
  "step": 2120
1308
  },
1309
  {
1310
  "epoch": 3.88,
1311
+ "learning_rate": 0.0,
1312
+ "loss": 0.3236,
1313
  "step": 2130
1314
  },
1315
  {
1316
  "epoch": 3.9,
1317
+ "learning_rate": 0.0,
1318
+ "loss": 0.2909,
1319
  "step": 2140
1320
  },
1321
  {
1322
  "epoch": 3.91,
1323
+ "learning_rate": 0.0,
1324
+ "loss": 0.3472,
1325
  "step": 2150
1326
  },
1327
  {
1328
  "epoch": 3.93,
1329
+ "learning_rate": 0.0,
1330
+ "loss": 0.2666,
1331
  "step": 2160
1332
  },
1333
  {
1334
  "epoch": 3.95,
1335
+ "learning_rate": 0.0,
1336
+ "loss": 0.3138,
1337
  "step": 2170
1338
  },
1339
  {
1340
  "epoch": 3.97,
1341
+ "learning_rate": 0.0,
1342
+ "loss": 0.3314,
1343
  "step": 2180
1344
  },
1345
  {
1346
  "epoch": 3.99,
1347
+ "learning_rate": 0.0,
1348
+ "loss": 0.3163,
1349
  "step": 2190
1350
  },
1351
  {
1352
  "epoch": 4.0,
1353
+ "eval_accuracy": 0.9639767812428864,
1354
+ "eval_loss": 0.14118124544620514,
1355
+ "eval_runtime": 112.337,
1356
+ "eval_samples_per_second": 156.422,
1357
+ "eval_steps_per_second": 4.896,
1358
  "step": 2197
1359
  },
1360
  {
1361
  "epoch": 4.01,
1362
+ "learning_rate": 0.0,
1363
+ "loss": 0.2996,
1364
  "step": 2200
1365
  },
1366
  {
1367
  "epoch": 4.02,
1368
+ "learning_rate": 0.0,
1369
+ "loss": 0.3324,
1370
  "step": 2210
1371
  },
1372
  {
1373
  "epoch": 4.04,
1374
+ "learning_rate": 0.0,
1375
+ "loss": 0.2756,
1376
  "step": 2220
1377
  },
1378
  {
1379
  "epoch": 4.06,
1380
+ "learning_rate": 0.0,
1381
+ "loss": 0.2944,
1382
  "step": 2230
1383
  },
1384
  {
1385
  "epoch": 4.08,
1386
+ "learning_rate": 0.0,
1387
+ "loss": 0.3082,
1388
  "step": 2240
1389
  },
1390
  {
1391
  "epoch": 4.1,
1392
+ "learning_rate": 0.0,
1393
+ "loss": 0.3241,
1394
  "step": 2250
1395
  },
1396
  {
1397
  "epoch": 4.11,
1398
+ "learning_rate": 0.0,
1399
+ "loss": 0.3301,
1400
  "step": 2260
1401
  },
1402
  {
1403
  "epoch": 4.13,
1404
+ "learning_rate": 0.0,
1405
+ "loss": 0.283,
1406
  "step": 2270
1407
  },
1408
  {
1409
  "epoch": 4.15,
1410
+ "learning_rate": 0.0,
1411
+ "loss": 0.3029,
1412
  "step": 2280
1413
  },
1414
  {
1415
  "epoch": 4.17,
1416
+ "learning_rate": 0.0,
1417
+ "loss": 0.3299,
1418
  "step": 2290
1419
  },
1420
  {
1421
  "epoch": 4.19,
1422
+ "learning_rate": 0.0,
1423
+ "loss": 0.3163,
1424
  "step": 2300
1425
  },
1426
  {
1427
  "epoch": 4.21,
1428
+ "learning_rate": 0.0,
1429
+ "loss": 0.2886,
1430
  "step": 2310
1431
  },
1432
  {
1433
  "epoch": 4.22,
1434
+ "learning_rate": 0.0,
1435
+ "loss": 0.2912,
1436
  "step": 2320
1437
  },
1438
  {
1439
  "epoch": 4.24,
1440
+ "learning_rate": 0.0,
1441
+ "loss": 0.3443,
1442
  "step": 2330
1443
  },
1444
  {
1445
  "epoch": 4.26,
1446
+ "learning_rate": 0.0,
1447
+ "loss": 0.3066,
1448
  "step": 2340
1449
  },
1450
  {
1451
  "epoch": 4.28,
1452
+ "learning_rate": 0.0,
1453
+ "loss": 0.3289,
1454
  "step": 2350
1455
  },
1456
  {
1457
  "epoch": 4.3,
1458
+ "learning_rate": 0.0,
1459
+ "loss": 0.3178,
1460
  "step": 2360
1461
  },
1462
  {
1463
  "epoch": 4.31,
1464
+ "learning_rate": 0.0,
1465
+ "loss": 0.3248,
1466
  "step": 2370
1467
  },
1468
  {
1469
  "epoch": 4.33,
1470
+ "learning_rate": 0.0,
1471
+ "loss": 0.3366,
1472
  "step": 2380
1473
  },
1474
  {
1475
  "epoch": 4.35,
1476
+ "learning_rate": 0.0,
1477
+ "loss": 0.3215,
1478
  "step": 2390
1479
  },
1480
  {
1481
  "epoch": 4.37,
1482
+ "learning_rate": 0.0,
1483
+ "loss": 0.2989,
1484
  "step": 2400
1485
  },
1486
  {
1487
  "epoch": 4.39,
1488
+ "learning_rate": 0.0,
1489
+ "loss": 0.3011,
1490
  "step": 2410
1491
  },
1492
  {
1493
  "epoch": 4.41,
1494
+ "learning_rate": 0.0,
1495
+ "loss": 0.3113,
1496
  "step": 2420
1497
  },
1498
  {
1499
  "epoch": 4.42,
1500
+ "learning_rate": 0.0,
1501
+ "loss": 0.3419,
1502
  "step": 2430
1503
  },
1504
  {
1505
  "epoch": 4.44,
1506
+ "learning_rate": 0.0,
1507
+ "loss": 0.3369,
1508
  "step": 2440
1509
  },
1510
  {
1511
  "epoch": 4.46,
1512
+ "learning_rate": 0.0,
1513
+ "loss": 0.3148,
1514
  "step": 2450
1515
  },
1516
  {
1517
  "epoch": 4.48,
1518
+ "learning_rate": 0.0,
1519
+ "loss": 0.3224,
1520
  "step": 2460
1521
  },
1522
  {
1523
  "epoch": 4.5,
1524
+ "learning_rate": 0.0,
1525
+ "loss": 0.2957,
1526
  "step": 2470
1527
  },
1528
  {
1529
  "epoch": 4.52,
1530
+ "learning_rate": 0.0,
1531
+ "loss": 0.2852,
1532
  "step": 2480
1533
  },
1534
  {
1535
  "epoch": 4.53,
1536
+ "learning_rate": 0.0,
1537
+ "loss": 0.3099,
1538
  "step": 2490
1539
  },
1540
  {
1541
  "epoch": 4.55,
1542
+ "learning_rate": 0.0,
1543
+ "loss": 0.3152,
1544
  "step": 2500
1545
  },
1546
  {
1547
  "epoch": 4.57,
1548
+ "learning_rate": 0.0,
1549
+ "loss": 0.328,
1550
  "step": 2510
1551
  },
1552
  {
1553
  "epoch": 4.59,
1554
+ "learning_rate": 0.0,
1555
+ "loss": 0.3273,
1556
  "step": 2520
1557
  },
1558
  {
1559
  "epoch": 4.61,
1560
+ "learning_rate": 0.0,
1561
+ "loss": 0.3105,
1562
  "step": 2530
1563
  },
1564
  {
1565
  "epoch": 4.62,
1566
+ "learning_rate": 0.0,
1567
+ "loss": 0.2704,
1568
  "step": 2540
1569
  },
1570
  {
1571
  "epoch": 4.64,
1572
+ "learning_rate": 0.0,
1573
+ "loss": 0.3113,
1574
  "step": 2550
1575
  },
1576
  {
1577
  "epoch": 4.66,
1578
+ "learning_rate": 0.0,
1579
+ "loss": 0.291,
1580
  "step": 2560
1581
  },
1582
  {
1583
  "epoch": 4.68,
1584
+ "learning_rate": 0.0,
1585
+ "loss": 0.3214,
1586
  "step": 2570
1587
  },
1588
  {
1589
  "epoch": 4.7,
1590
+ "learning_rate": 0.0,
1591
+ "loss": 0.3307,
1592
  "step": 2580
1593
  },
1594
  {
1595
  "epoch": 4.72,
1596
+ "learning_rate": 0.0,
1597
+ "loss": 0.2952,
1598
  "step": 2590
1599
  },
1600
  {
1601
  "epoch": 4.73,
1602
+ "learning_rate": 0.0,
1603
+ "loss": 0.3218,
1604
  "step": 2600
1605
  },
1606
  {
1607
  "epoch": 4.75,
1608
+ "learning_rate": 0.0,
1609
+ "loss": 0.2881,
1610
  "step": 2610
1611
  },
1612
  {
1613
  "epoch": 4.77,
1614
+ "learning_rate": 0.0,
1615
+ "loss": 0.2984,
1616
  "step": 2620
1617
  },
1618
  {
1619
  "epoch": 4.79,
1620
+ "learning_rate": 0.0,
1621
+ "loss": 0.3318,
1622
  "step": 2630
1623
  },
1624
  {
1625
  "epoch": 4.81,
1626
+ "learning_rate": 0.0,
1627
+ "loss": 0.3192,
1628
  "step": 2640
1629
  },
1630
  {
1631
  "epoch": 4.82,
1632
+ "learning_rate": 0.0,
1633
+ "loss": 0.2928,
1634
  "step": 2650
1635
  },
1636
  {
1637
  "epoch": 4.84,
1638
+ "learning_rate": 0.0,
1639
+ "loss": 0.3178,
1640
  "step": 2660
1641
  },
1642
  {
1643
  "epoch": 4.86,
1644
+ "learning_rate": 0.0,
1645
+ "loss": 0.3158,
1646
  "step": 2670
1647
  },
1648
  {
1649
  "epoch": 4.88,
1650
+ "learning_rate": 0.0,
1651
+ "loss": 0.3225,
1652
  "step": 2680
1653
  },
1654
  {
1655
  "epoch": 4.9,
1656
+ "learning_rate": 0.0,
1657
+ "loss": 0.3287,
1658
  "step": 2690
1659
  },
1660
  {
1661
  "epoch": 4.92,
1662
+ "learning_rate": 0.0,
1663
+ "loss": 0.2937,
1664
  "step": 2700
1665
  },
1666
  {
1667
  "epoch": 4.93,
1668
+ "learning_rate": 0.0,
1669
+ "loss": 0.2998,
1670
  "step": 2710
1671
  },
1672
  {
1673
  "epoch": 4.95,
1674
+ "learning_rate": 0.0,
1675
+ "loss": 0.3084,
1676
  "step": 2720
1677
  },
1678
  {
1679
  "epoch": 4.97,
1680
+ "learning_rate": 0.0,
1681
+ "loss": 0.3264,
1682
  "step": 2730
1683
  },
1684
  {
1685
  "epoch": 4.99,
1686
+ "learning_rate": 0.0,
1687
+ "loss": 0.3103,
1688
  "step": 2740
1689
  },
1690
  {
1691
  "epoch": 5.0,
1692
+ "eval_accuracy": 0.9639198725244708,
1693
+ "eval_loss": 0.13910169899463654,
1694
+ "eval_runtime": 112.3738,
1695
+ "eval_samples_per_second": 156.371,
1696
+ "eval_steps_per_second": 4.894,
1697
  "step": 2745
1698
  },
1699
  {
1700
  "epoch": 5.0,
1701
  "step": 2745,
1702
+ "total_flos": 7.485957524358513e+18,
1703
+ "train_loss": 0.31472766021561754,
1704
+ "train_runtime": 5227.0581,
1705
+ "train_samples_per_second": 67.241,
1706
+ "train_steps_per_second": 0.525
1707
  }
1708
  ],
1709
  "max_steps": 2745,
1710
  "num_train_epochs": 5,
1711
+ "total_flos": 7.485957524358513e+18,
1712
  "trial_name": null,
1713
  "trial_params": null
1714
  }