khleeloo commited on
Commit
1da7626
1 Parent(s): d9fe99b

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 6.0,
3
  "eval_accuracy": 0.794973544973545,
4
  "eval_f1": 0.7911822260194087,
5
  "eval_loss": 0.8158386945724487,
@@ -9,8 +9,8 @@
9
  "eval_samples_per_second": 170.604,
10
  "eval_steps_per_second": 10.719,
11
  "total_flos": 4.65670232933972e+18,
12
- "train_loss": 0.07091493732059752,
13
- "train_runtime": 1262.5493,
14
- "train_samples_per_second": 47.594,
15
- "train_steps_per_second": 2.975
16
  }
 
1
  {
2
+ "epoch": 4.0,
3
  "eval_accuracy": 0.794973544973545,
4
  "eval_f1": 0.7911822260194087,
5
  "eval_loss": 0.8158386945724487,
 
9
  "eval_samples_per_second": 170.604,
10
  "eval_steps_per_second": 10.719,
11
  "total_flos": 4.65670232933972e+18,
12
+ "train_loss": 0.2716429328141264,
13
+ "train_runtime": 832.7663,
14
+ "train_samples_per_second": 48.105,
15
+ "train_steps_per_second": 3.007
16
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f25abad16ada4e1ce195d58d5ae17d40c19d826e5cdf48814c1b92c7c2859e8
3
  size 343284077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d79839abeb277b7780036a9e3dea1bbe1251ea27e5404b8a7c432b58f121393
3
  size 343284077
runs/Oct27_21-22-14_EE4E077/1698412937.0288403/events.out.tfevents.1698412937.EE4E077.103115.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e5db06d2ba6108ae9786a2439529560fe99b41e23a1665e443f7b4fbc5dbcd
3
+ size 5919
runs/Oct27_21-22-14_EE4E077/events.out.tfevents.1698412937.EE4E077.103115.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:702ebd3f670ed964a41a1eb061ce5d0424bf6cb5fe615e5c414664aad4ca6a10
3
+ size 5660
train_results.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "epoch": 6.0,
3
- "total_flos": 4.65670232933972e+18,
4
- "train_loss": 0.07091493732059752,
5
- "train_runtime": 1262.5493,
6
- "train_samples_per_second": 47.594,
7
- "train_steps_per_second": 2.975
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.2716429328141264,
4
+ "train_runtime": 832.7663,
5
+ "train_samples_per_second": 48.105,
6
+ "train_steps_per_second": 3.007
 
7
  }
trainer_state.json CHANGED
@@ -1,2347 +1,223 @@
1
  {
2
- "best_metric": 0.8652849740932642,
3
- "best_model_checkpoint": "./vit-focal-skin/checkpoint-3756",
4
- "epoch": 6.0,
5
- "global_step": 3756,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 0.0001994675186368477,
13
- "loss": 0.2668,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.03,
18
- "learning_rate": 0.00019893503727369543,
19
- "loss": 0.3261,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.05,
24
- "learning_rate": 0.00019840255591054313,
25
- "loss": 0.1579,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.06,
30
- "learning_rate": 0.00019787007454739086,
31
- "loss": 0.256,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.08,
36
- "learning_rate": 0.00019733759318423856,
37
- "loss": 0.2145,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.1,
42
- "learning_rate": 0.00019680511182108628,
43
- "loss": 0.2802,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.11,
48
- "learning_rate": 0.00019627263045793398,
49
- "loss": 0.3481,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.13,
54
- "learning_rate": 0.00019574014909478168,
55
- "loss": 0.2614,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.14,
60
- "learning_rate": 0.0001952076677316294,
61
- "loss": 0.3515,
62
- "step": 90
63
- },
64
  {
65
  "epoch": 0.16,
66
- "learning_rate": 0.0001946751863684771,
67
- "loss": 0.2308,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.18,
72
- "learning_rate": 0.00019414270500532483,
73
- "loss": 0.3011,
74
- "step": 110
75
- },
76
- {
77
- "epoch": 0.19,
78
- "learning_rate": 0.00019361022364217253,
79
- "loss": 0.21,
80
- "step": 120
81
- },
82
- {
83
- "epoch": 0.21,
84
- "learning_rate": 0.00019307774227902025,
85
- "loss": 0.1883,
86
- "step": 130
87
- },
88
- {
89
- "epoch": 0.22,
90
- "learning_rate": 0.00019254526091586795,
91
- "loss": 0.1768,
92
- "step": 140
93
- },
94
- {
95
- "epoch": 0.24,
96
  "learning_rate": 0.00019201277955271565,
97
- "loss": 0.2197,
98
- "step": 150
99
- },
100
- {
101
- "epoch": 0.26,
102
- "learning_rate": 0.00019148029818956338,
103
- "loss": 0.2239,
104
- "step": 160
105
- },
106
- {
107
- "epoch": 0.27,
108
- "learning_rate": 0.00019094781682641108,
109
- "loss": 0.1363,
110
- "step": 170
111
- },
112
- {
113
- "epoch": 0.29,
114
- "learning_rate": 0.0001904153354632588,
115
- "loss": 0.2123,
116
- "step": 180
117
- },
118
- {
119
- "epoch": 0.3,
120
- "learning_rate": 0.0001898828541001065,
121
- "loss": 0.1508,
122
- "step": 190
123
  },
124
  {
125
  "epoch": 0.32,
126
- "learning_rate": 0.00018935037273695422,
127
- "loss": 0.3008,
128
  "step": 200
129
  },
130
- {
131
- "epoch": 0.34,
132
- "learning_rate": 0.00018881789137380192,
133
- "loss": 0.244,
134
- "step": 210
135
- },
136
- {
137
- "epoch": 0.35,
138
- "learning_rate": 0.00018828541001064962,
139
- "loss": 0.2445,
140
- "step": 220
141
- },
142
- {
143
- "epoch": 0.37,
144
- "learning_rate": 0.00018775292864749735,
145
- "loss": 0.211,
146
- "step": 230
147
- },
148
- {
149
- "epoch": 0.38,
150
- "learning_rate": 0.00018722044728434505,
151
- "loss": 0.1967,
152
- "step": 240
153
- },
154
- {
155
- "epoch": 0.4,
156
- "learning_rate": 0.00018668796592119277,
157
- "loss": 0.1522,
158
- "step": 250
159
- },
160
- {
161
- "epoch": 0.42,
162
- "learning_rate": 0.00018615548455804047,
163
- "loss": 0.155,
164
- "step": 260
165
- },
166
- {
167
- "epoch": 0.43,
168
- "learning_rate": 0.0001856230031948882,
169
- "loss": 0.3733,
170
- "step": 270
171
- },
172
- {
173
- "epoch": 0.45,
174
- "learning_rate": 0.0001850905218317359,
175
- "loss": 0.201,
176
- "step": 280
177
- },
178
- {
179
- "epoch": 0.46,
180
- "learning_rate": 0.0001845580404685836,
181
- "loss": 0.2162,
182
- "step": 290
183
- },
184
  {
185
  "epoch": 0.48,
186
- "learning_rate": 0.00018402555910543132,
187
- "loss": 0.1231,
188
  "step": 300
189
  },
190
- {
191
- "epoch": 0.5,
192
- "learning_rate": 0.00018349307774227902,
193
- "loss": 0.1749,
194
- "step": 310
195
- },
196
- {
197
- "epoch": 0.51,
198
- "learning_rate": 0.00018296059637912674,
199
- "loss": 0.1627,
200
- "step": 320
201
- },
202
- {
203
- "epoch": 0.53,
204
- "learning_rate": 0.00018242811501597444,
205
- "loss": 0.3643,
206
- "step": 330
207
- },
208
- {
209
- "epoch": 0.54,
210
- "learning_rate": 0.00018189563365282217,
211
- "loss": 0.3076,
212
- "step": 340
213
- },
214
- {
215
- "epoch": 0.56,
216
- "learning_rate": 0.00018136315228966987,
217
- "loss": 0.1679,
218
- "step": 350
219
- },
220
- {
221
- "epoch": 0.58,
222
- "learning_rate": 0.00018083067092651756,
223
- "loss": 0.2569,
224
- "step": 360
225
- },
226
- {
227
- "epoch": 0.59,
228
- "learning_rate": 0.0001802981895633653,
229
- "loss": 0.2105,
230
- "step": 370
231
- },
232
- {
233
- "epoch": 0.61,
234
- "learning_rate": 0.00017976570820021302,
235
- "loss": 0.2794,
236
- "step": 380
237
- },
238
- {
239
- "epoch": 0.62,
240
- "learning_rate": 0.00017923322683706071,
241
- "loss": 0.1752,
242
- "step": 390
243
- },
244
  {
245
  "epoch": 0.64,
246
- "learning_rate": 0.0001787007454739084,
247
- "loss": 0.3763,
248
  "step": 400
249
  },
250
- {
251
- "epoch": 0.65,
252
- "learning_rate": 0.00017816826411075614,
253
- "loss": 0.2519,
254
- "step": 410
255
- },
256
- {
257
- "epoch": 0.67,
258
- "learning_rate": 0.00017763578274760384,
259
- "loss": 0.2182,
260
- "step": 420
261
- },
262
- {
263
- "epoch": 0.69,
264
- "learning_rate": 0.00017710330138445154,
265
- "loss": 0.2159,
266
- "step": 430
267
- },
268
- {
269
- "epoch": 0.7,
270
- "learning_rate": 0.00017657082002129926,
271
- "loss": 0.1561,
272
- "step": 440
273
- },
274
- {
275
- "epoch": 0.72,
276
- "learning_rate": 0.000176038338658147,
277
- "loss": 0.1129,
278
- "step": 450
279
- },
280
- {
281
- "epoch": 0.73,
282
- "learning_rate": 0.00017550585729499469,
283
- "loss": 0.1696,
284
- "step": 460
285
- },
286
- {
287
- "epoch": 0.75,
288
- "learning_rate": 0.0001749733759318424,
289
- "loss": 0.2304,
290
- "step": 470
291
- },
292
- {
293
- "epoch": 0.77,
294
- "learning_rate": 0.0001744408945686901,
295
- "loss": 0.2481,
296
- "step": 480
297
- },
298
- {
299
- "epoch": 0.78,
300
- "learning_rate": 0.0001739084132055378,
301
- "loss": 0.2593,
302
- "step": 490
303
- },
304
  {
305
  "epoch": 0.8,
306
- "learning_rate": 0.0001733759318423855,
307
- "loss": 0.2524,
308
  "step": 500
309
  },
310
- {
311
- "epoch": 0.81,
312
- "learning_rate": 0.00017284345047923323,
313
- "loss": 0.2631,
314
- "step": 510
315
- },
316
- {
317
- "epoch": 0.83,
318
- "learning_rate": 0.00017231096911608096,
319
- "loss": 0.1654,
320
- "step": 520
321
- },
322
- {
323
- "epoch": 0.85,
324
- "learning_rate": 0.00017177848775292866,
325
- "loss": 0.147,
326
- "step": 530
327
- },
328
- {
329
- "epoch": 0.86,
330
- "learning_rate": 0.00017124600638977638,
331
- "loss": 0.2521,
332
- "step": 540
333
- },
334
- {
335
- "epoch": 0.88,
336
- "learning_rate": 0.00017071352502662408,
337
- "loss": 0.2311,
338
- "step": 550
339
- },
340
- {
341
- "epoch": 0.89,
342
- "learning_rate": 0.00017018104366347178,
343
- "loss": 0.1705,
344
- "step": 560
345
- },
346
- {
347
- "epoch": 0.91,
348
- "learning_rate": 0.00016964856230031948,
349
- "loss": 0.1595,
350
- "step": 570
351
- },
352
- {
353
- "epoch": 0.93,
354
- "learning_rate": 0.0001691160809371672,
355
- "loss": 0.1939,
356
- "step": 580
357
- },
358
- {
359
- "epoch": 0.94,
360
- "learning_rate": 0.00016858359957401493,
361
- "loss": 0.1967,
362
- "step": 590
363
- },
364
  {
365
  "epoch": 0.96,
366
- "learning_rate": 0.00016805111821086263,
367
- "loss": 0.2204,
368
  "step": 600
369
  },
370
- {
371
- "epoch": 0.97,
372
- "learning_rate": 0.00016751863684771035,
373
- "loss": 0.302,
374
- "step": 610
375
- },
376
- {
377
- "epoch": 0.99,
378
- "learning_rate": 0.00016698615548455805,
379
- "loss": 0.1024,
380
- "step": 620
381
- },
382
  {
383
  "epoch": 1.0,
384
- "eval_accuracy": 0.8186528497409327,
385
- "eval_f1": 0.8246903692192651,
386
- "eval_loss": 0.48745197057724,
387
- "eval_precision": 0.8725192667740672,
388
- "eval_recall": 0.8186528497409327,
389
- "eval_runtime": 1.2025,
390
- "eval_samples_per_second": 160.499,
391
- "eval_steps_per_second": 10.811,
392
  "step": 626
393
  },
394
- {
395
- "epoch": 1.01,
396
- "learning_rate": 0.00016645367412140575,
397
- "loss": 0.1753,
398
- "step": 630
399
- },
400
- {
401
- "epoch": 1.02,
402
- "learning_rate": 0.00016592119275825345,
403
- "loss": 0.1077,
404
- "step": 640
405
- },
406
- {
407
- "epoch": 1.04,
408
- "learning_rate": 0.00016538871139510118,
409
- "loss": 0.1503,
410
- "step": 650
411
- },
412
- {
413
- "epoch": 1.05,
414
- "learning_rate": 0.0001648562300319489,
415
- "loss": 0.1275,
416
- "step": 660
417
- },
418
- {
419
- "epoch": 1.07,
420
- "learning_rate": 0.0001643237486687966,
421
- "loss": 0.1194,
422
- "step": 670
423
- },
424
- {
425
- "epoch": 1.09,
426
- "learning_rate": 0.00016379126730564433,
427
- "loss": 0.1587,
428
- "step": 680
429
- },
430
- {
431
- "epoch": 1.1,
432
- "learning_rate": 0.00016325878594249202,
433
- "loss": 0.2023,
434
- "step": 690
435
- },
436
  {
437
  "epoch": 1.12,
438
- "learning_rate": 0.00016272630457933972,
439
- "loss": 0.0542,
440
  "step": 700
441
  },
442
- {
443
- "epoch": 1.13,
444
- "learning_rate": 0.00016219382321618745,
445
- "loss": 0.1739,
446
- "step": 710
447
- },
448
- {
449
- "epoch": 1.15,
450
- "learning_rate": 0.00016166134185303515,
451
- "loss": 0.0996,
452
- "step": 720
453
- },
454
- {
455
- "epoch": 1.17,
456
- "learning_rate": 0.00016112886048988287,
457
- "loss": 0.1538,
458
- "step": 730
459
- },
460
- {
461
- "epoch": 1.18,
462
- "learning_rate": 0.00016059637912673057,
463
- "loss": 0.215,
464
- "step": 740
465
- },
466
- {
467
- "epoch": 1.2,
468
- "learning_rate": 0.0001600638977635783,
469
- "loss": 0.0868,
470
- "step": 750
471
- },
472
- {
473
- "epoch": 1.21,
474
- "learning_rate": 0.000159531416400426,
475
- "loss": 0.1199,
476
- "step": 760
477
- },
478
- {
479
- "epoch": 1.23,
480
- "learning_rate": 0.0001589989350372737,
481
- "loss": 0.0801,
482
- "step": 770
483
- },
484
- {
485
- "epoch": 1.25,
486
- "learning_rate": 0.00015846645367412142,
487
- "loss": 0.1025,
488
- "step": 780
489
- },
490
- {
491
- "epoch": 1.26,
492
- "learning_rate": 0.00015793397231096912,
493
- "loss": 0.1524,
494
- "step": 790
495
- },
496
  {
497
  "epoch": 1.28,
498
- "learning_rate": 0.00015740149094781684,
499
- "loss": 0.1171,
500
  "step": 800
501
  },
502
- {
503
- "epoch": 1.29,
504
- "learning_rate": 0.00015686900958466454,
505
- "loss": 0.1643,
506
- "step": 810
507
- },
508
- {
509
- "epoch": 1.31,
510
- "learning_rate": 0.00015633652822151227,
511
- "loss": 0.114,
512
- "step": 820
513
- },
514
- {
515
- "epoch": 1.33,
516
- "learning_rate": 0.00015580404685835997,
517
- "loss": 0.1284,
518
- "step": 830
519
- },
520
- {
521
- "epoch": 1.34,
522
- "learning_rate": 0.00015527156549520767,
523
- "loss": 0.1069,
524
- "step": 840
525
- },
526
- {
527
- "epoch": 1.36,
528
- "learning_rate": 0.0001547390841320554,
529
- "loss": 0.0643,
530
- "step": 850
531
- },
532
- {
533
- "epoch": 1.37,
534
- "learning_rate": 0.0001542066027689031,
535
- "loss": 0.1653,
536
- "step": 860
537
- },
538
- {
539
- "epoch": 1.39,
540
- "learning_rate": 0.00015367412140575082,
541
- "loss": 0.1917,
542
- "step": 870
543
- },
544
- {
545
- "epoch": 1.41,
546
- "learning_rate": 0.00015314164004259851,
547
- "loss": 0.1041,
548
- "step": 880
549
- },
550
- {
551
- "epoch": 1.42,
552
- "learning_rate": 0.00015260915867944624,
553
- "loss": 0.1186,
554
- "step": 890
555
- },
556
  {
557
  "epoch": 1.44,
558
- "learning_rate": 0.00015207667731629394,
559
- "loss": 0.0616,
560
  "step": 900
561
  },
562
- {
563
- "epoch": 1.45,
564
- "learning_rate": 0.00015154419595314164,
565
- "loss": 0.0971,
566
- "step": 910
567
- },
568
- {
569
- "epoch": 1.47,
570
- "learning_rate": 0.00015101171458998936,
571
- "loss": 0.11,
572
- "step": 920
573
- },
574
- {
575
- "epoch": 1.49,
576
- "learning_rate": 0.00015047923322683706,
577
- "loss": 0.1126,
578
- "step": 930
579
- },
580
- {
581
- "epoch": 1.5,
582
- "learning_rate": 0.0001499467518636848,
583
- "loss": 0.1095,
584
- "step": 940
585
- },
586
- {
587
- "epoch": 1.52,
588
- "learning_rate": 0.00014941427050053249,
589
- "loss": 0.145,
590
- "step": 950
591
- },
592
- {
593
- "epoch": 1.53,
594
- "learning_rate": 0.0001488817891373802,
595
- "loss": 0.163,
596
- "step": 960
597
- },
598
- {
599
- "epoch": 1.55,
600
- "learning_rate": 0.0001483493077742279,
601
- "loss": 0.0869,
602
- "step": 970
603
- },
604
- {
605
- "epoch": 1.57,
606
- "learning_rate": 0.0001478168264110756,
607
- "loss": 0.0842,
608
- "step": 980
609
- },
610
- {
611
- "epoch": 1.58,
612
- "learning_rate": 0.00014728434504792333,
613
- "loss": 0.15,
614
- "step": 990
615
- },
616
  {
617
  "epoch": 1.6,
618
- "learning_rate": 0.00014675186368477103,
619
- "loss": 0.1368,
620
  "step": 1000
621
  },
622
  {
623
- "epoch": 1.61,
624
- "learning_rate": 0.00014621938232161876,
625
- "loss": 0.1311,
626
- "step": 1010
627
- },
628
- {
629
- "epoch": 1.63,
630
- "learning_rate": 0.00014568690095846646,
631
- "loss": 0.0986,
632
- "step": 1020
633
- },
634
- {
635
- "epoch": 1.65,
636
- "learning_rate": 0.00014515441959531418,
637
- "loss": 0.1126,
638
- "step": 1030
639
- },
640
- {
641
- "epoch": 1.66,
642
- "learning_rate": 0.00014462193823216188,
643
- "loss": 0.1521,
644
- "step": 1040
645
- },
646
- {
647
- "epoch": 1.68,
648
- "learning_rate": 0.00014408945686900958,
649
- "loss": 0.1989,
650
- "step": 1050
651
- },
652
- {
653
- "epoch": 1.69,
654
- "learning_rate": 0.0001435569755058573,
655
- "loss": 0.1445,
656
- "step": 1060
657
  },
658
  {
659
- "epoch": 1.71,
660
- "learning_rate": 0.000143024494142705,
661
- "loss": 0.0964,
662
- "step": 1070
663
  },
664
  {
665
- "epoch": 1.73,
666
- "learning_rate": 0.00014249201277955273,
667
- "loss": 0.0858,
668
- "step": 1080
 
 
 
 
 
 
669
  },
670
  {
671
- "epoch": 1.74,
672
- "learning_rate": 0.00014195953141640043,
673
- "loss": 0.0859,
674
- "step": 1090
675
  },
676
  {
677
- "epoch": 1.76,
678
- "learning_rate": 0.00014142705005324815,
679
- "loss": 0.0889,
680
- "step": 1100
681
  },
682
  {
683
- "epoch": 1.77,
684
- "learning_rate": 0.00014089456869009585,
685
- "loss": 0.1515,
686
- "step": 1110
687
  },
688
  {
689
- "epoch": 1.79,
690
- "learning_rate": 0.00014036208732694355,
691
- "loss": 0.1352,
692
- "step": 1120
693
  },
694
  {
695
- "epoch": 1.81,
696
- "learning_rate": 0.00013982960596379128,
697
- "loss": 0.0479,
698
- "step": 1130
699
  },
700
  {
701
- "epoch": 1.82,
702
- "learning_rate": 0.000139297124600639,
703
- "loss": 0.0925,
704
- "step": 1140
705
  },
706
  {
707
- "epoch": 1.84,
708
- "learning_rate": 0.0001387646432374867,
709
- "loss": 0.2283,
710
- "step": 1150
 
 
 
 
 
 
711
  },
712
  {
713
- "epoch": 1.85,
714
- "learning_rate": 0.0001382321618743344,
715
- "loss": 0.1144,
716
- "step": 1160
717
  },
718
  {
719
- "epoch": 1.87,
720
- "learning_rate": 0.00013769968051118212,
721
- "loss": 0.1118,
722
- "step": 1170
723
  },
724
  {
725
- "epoch": 1.88,
726
- "learning_rate": 0.00013716719914802982,
727
- "loss": 0.1807,
728
- "step": 1180
729
  },
730
  {
731
- "epoch": 1.9,
732
- "learning_rate": 0.00013663471778487752,
733
- "loss": 0.0692,
734
- "step": 1190
735
  },
736
  {
737
- "epoch": 1.92,
738
- "learning_rate": 0.00013610223642172525,
739
- "loss": 0.0565,
740
- "step": 1200
741
  },
742
  {
743
- "epoch": 1.93,
744
- "learning_rate": 0.00013556975505857297,
745
- "loss": 0.1044,
746
- "step": 1210
747
  },
748
  {
749
- "epoch": 1.95,
750
- "learning_rate": 0.00013503727369542067,
751
- "loss": 0.0696,
752
- "step": 1220
753
  },
754
  {
755
- "epoch": 1.96,
756
- "learning_rate": 0.00013450479233226837,
757
- "loss": 0.0669,
758
- "step": 1230
 
 
 
 
 
 
759
  },
760
  {
761
- "epoch": 1.98,
762
- "learning_rate": 0.0001339723109691161,
763
- "loss": 0.0845,
764
- "step": 1240
765
- },
766
- {
767
- "epoch": 2.0,
768
- "learning_rate": 0.0001334398296059638,
769
- "loss": 0.1279,
770
- "step": 1250
771
- },
772
- {
773
- "epoch": 2.0,
774
- "eval_accuracy": 0.8186528497409327,
775
- "eval_f1": 0.8223400297767379,
776
- "eval_loss": 0.4645093083381653,
777
- "eval_precision": 0.8294534912410559,
778
- "eval_recall": 0.8186528497409327,
779
- "eval_runtime": 1.2384,
780
- "eval_samples_per_second": 155.847,
781
- "eval_steps_per_second": 10.497,
782
- "step": 1252
783
- },
784
- {
785
- "epoch": 2.01,
786
- "learning_rate": 0.0001329073482428115,
787
- "loss": 0.1003,
788
- "step": 1260
789
- },
790
- {
791
- "epoch": 2.03,
792
- "learning_rate": 0.00013237486687965922,
793
- "loss": 0.0551,
794
- "step": 1270
795
- },
796
- {
797
- "epoch": 2.04,
798
- "learning_rate": 0.00013184238551650694,
799
- "loss": 0.1244,
800
- "step": 1280
801
- },
802
- {
803
- "epoch": 2.06,
804
- "learning_rate": 0.00013130990415335464,
805
- "loss": 0.0669,
806
- "step": 1290
807
- },
808
- {
809
- "epoch": 2.08,
810
- "learning_rate": 0.00013077742279020234,
811
- "loss": 0.0603,
812
- "step": 1300
813
- },
814
- {
815
- "epoch": 2.09,
816
- "learning_rate": 0.00013024494142705007,
817
- "loss": 0.0074,
818
- "step": 1310
819
- },
820
- {
821
- "epoch": 2.11,
822
- "learning_rate": 0.00012971246006389777,
823
- "loss": 0.0144,
824
- "step": 1320
825
- },
826
- {
827
- "epoch": 2.12,
828
- "learning_rate": 0.00012917997870074546,
829
- "loss": 0.0741,
830
- "step": 1330
831
- },
832
- {
833
- "epoch": 2.14,
834
- "learning_rate": 0.0001286474973375932,
835
- "loss": 0.053,
836
- "step": 1340
837
- },
838
- {
839
- "epoch": 2.16,
840
- "learning_rate": 0.00012811501597444092,
841
- "loss": 0.0377,
842
- "step": 1350
843
- },
844
- {
845
- "epoch": 2.17,
846
- "learning_rate": 0.00012758253461128861,
847
- "loss": 0.0137,
848
- "step": 1360
849
- },
850
- {
851
- "epoch": 2.19,
852
- "learning_rate": 0.0001270500532481363,
853
- "loss": 0.0116,
854
- "step": 1370
855
- },
856
- {
857
- "epoch": 2.2,
858
- "learning_rate": 0.00012651757188498404,
859
- "loss": 0.0615,
860
- "step": 1380
861
- },
862
- {
863
- "epoch": 2.22,
864
- "learning_rate": 0.00012598509052183174,
865
- "loss": 0.057,
866
- "step": 1390
867
- },
868
- {
869
- "epoch": 2.24,
870
- "learning_rate": 0.00012545260915867944,
871
- "loss": 0.013,
872
- "step": 1400
873
- },
874
- {
875
- "epoch": 2.25,
876
- "learning_rate": 0.00012492012779552716,
877
- "loss": 0.0113,
878
- "step": 1410
879
- },
880
- {
881
- "epoch": 2.27,
882
- "learning_rate": 0.0001243876464323749,
883
- "loss": 0.0665,
884
- "step": 1420
885
- },
886
- {
887
- "epoch": 2.28,
888
- "learning_rate": 0.00012385516506922259,
889
- "loss": 0.0143,
890
- "step": 1430
891
- },
892
- {
893
- "epoch": 2.3,
894
- "learning_rate": 0.00012332268370607028,
895
- "loss": 0.016,
896
- "step": 1440
897
- },
898
- {
899
- "epoch": 2.32,
900
- "learning_rate": 0.000122790202342918,
901
- "loss": 0.0449,
902
- "step": 1450
903
- },
904
- {
905
- "epoch": 2.33,
906
- "learning_rate": 0.0001222577209797657,
907
- "loss": 0.0312,
908
- "step": 1460
909
- },
910
- {
911
- "epoch": 2.35,
912
- "learning_rate": 0.00012172523961661342,
913
- "loss": 0.0664,
914
- "step": 1470
915
- },
916
- {
917
- "epoch": 2.36,
918
- "learning_rate": 0.00012119275825346112,
919
- "loss": 0.0901,
920
- "step": 1480
921
- },
922
- {
923
- "epoch": 2.38,
924
- "learning_rate": 0.00012066027689030886,
925
- "loss": 0.0564,
926
- "step": 1490
927
- },
928
- {
929
- "epoch": 2.4,
930
- "learning_rate": 0.00012012779552715656,
931
- "loss": 0.0304,
932
- "step": 1500
933
- },
934
- {
935
- "epoch": 2.41,
936
- "learning_rate": 0.00011959531416400427,
937
- "loss": 0.0852,
938
- "step": 1510
939
- },
940
- {
941
- "epoch": 2.43,
942
- "learning_rate": 0.00011906283280085198,
943
- "loss": 0.0405,
944
- "step": 1520
945
- },
946
- {
947
- "epoch": 2.44,
948
- "learning_rate": 0.00011853035143769968,
949
- "loss": 0.0812,
950
- "step": 1530
951
- },
952
- {
953
- "epoch": 2.46,
954
- "learning_rate": 0.00011799787007454739,
955
- "loss": 0.0343,
956
- "step": 1540
957
- },
958
- {
959
- "epoch": 2.48,
960
- "learning_rate": 0.00011746538871139509,
961
- "loss": 0.0313,
962
- "step": 1550
963
- },
964
- {
965
- "epoch": 2.49,
966
- "learning_rate": 0.00011693290734824283,
967
- "loss": 0.0683,
968
- "step": 1560
969
- },
970
- {
971
- "epoch": 2.51,
972
- "learning_rate": 0.00011640042598509053,
973
- "loss": 0.0378,
974
- "step": 1570
975
- },
976
- {
977
- "epoch": 2.52,
978
- "learning_rate": 0.00011586794462193824,
979
- "loss": 0.0404,
980
- "step": 1580
981
- },
982
- {
983
- "epoch": 2.54,
984
- "learning_rate": 0.00011533546325878595,
985
- "loss": 0.0786,
986
- "step": 1590
987
- },
988
- {
989
- "epoch": 2.56,
990
- "learning_rate": 0.00011480298189563365,
991
- "loss": 0.0353,
992
- "step": 1600
993
- },
994
- {
995
- "epoch": 2.57,
996
- "learning_rate": 0.00011427050053248136,
997
- "loss": 0.0458,
998
- "step": 1610
999
- },
1000
- {
1001
- "epoch": 2.59,
1002
- "learning_rate": 0.00011373801916932908,
1003
- "loss": 0.0624,
1004
- "step": 1620
1005
- },
1006
- {
1007
- "epoch": 2.6,
1008
- "learning_rate": 0.0001132055378061768,
1009
- "loss": 0.0878,
1010
- "step": 1630
1011
- },
1012
- {
1013
- "epoch": 2.62,
1014
- "learning_rate": 0.0001126730564430245,
1015
- "loss": 0.0245,
1016
- "step": 1640
1017
- },
1018
- {
1019
- "epoch": 2.64,
1020
- "learning_rate": 0.00011214057507987221,
1021
- "loss": 0.079,
1022
- "step": 1650
1023
- },
1024
- {
1025
- "epoch": 2.65,
1026
- "learning_rate": 0.00011160809371671992,
1027
- "loss": 0.0374,
1028
- "step": 1660
1029
- },
1030
- {
1031
- "epoch": 2.67,
1032
- "learning_rate": 0.00011107561235356762,
1033
- "loss": 0.0572,
1034
- "step": 1670
1035
- },
1036
- {
1037
- "epoch": 2.68,
1038
- "learning_rate": 0.00011054313099041533,
1039
- "loss": 0.1096,
1040
- "step": 1680
1041
- },
1042
- {
1043
- "epoch": 2.7,
1044
- "learning_rate": 0.00011001064962726305,
1045
- "loss": 0.1099,
1046
- "step": 1690
1047
- },
1048
- {
1049
- "epoch": 2.72,
1050
- "learning_rate": 0.00010947816826411077,
1051
- "loss": 0.074,
1052
- "step": 1700
1053
- },
1054
- {
1055
- "epoch": 2.73,
1056
- "learning_rate": 0.00010894568690095847,
1057
- "loss": 0.0348,
1058
- "step": 1710
1059
- },
1060
- {
1061
- "epoch": 2.75,
1062
- "learning_rate": 0.00010841320553780618,
1063
- "loss": 0.1437,
1064
- "step": 1720
1065
- },
1066
- {
1067
- "epoch": 2.76,
1068
- "learning_rate": 0.0001078807241746539,
1069
- "loss": 0.0798,
1070
- "step": 1730
1071
- },
1072
- {
1073
- "epoch": 2.78,
1074
- "learning_rate": 0.0001073482428115016,
1075
- "loss": 0.0662,
1076
- "step": 1740
1077
- },
1078
- {
1079
- "epoch": 2.8,
1080
- "learning_rate": 0.0001068157614483493,
1081
- "loss": 0.0207,
1082
- "step": 1750
1083
- },
1084
- {
1085
- "epoch": 2.81,
1086
- "learning_rate": 0.00010628328008519702,
1087
- "loss": 0.1088,
1088
- "step": 1760
1089
- },
1090
- {
1091
- "epoch": 2.83,
1092
- "learning_rate": 0.00010575079872204474,
1093
- "loss": 0.0871,
1094
- "step": 1770
1095
- },
1096
- {
1097
- "epoch": 2.84,
1098
- "learning_rate": 0.00010521831735889244,
1099
- "loss": 0.0846,
1100
- "step": 1780
1101
- },
1102
- {
1103
- "epoch": 2.86,
1104
- "learning_rate": 0.00010468583599574015,
1105
- "loss": 0.0644,
1106
- "step": 1790
1107
- },
1108
- {
1109
- "epoch": 2.88,
1110
- "learning_rate": 0.00010415335463258787,
1111
- "loss": 0.056,
1112
- "step": 1800
1113
- },
1114
- {
1115
- "epoch": 2.89,
1116
- "learning_rate": 0.00010362087326943557,
1117
- "loss": 0.1148,
1118
- "step": 1810
1119
- },
1120
- {
1121
- "epoch": 2.91,
1122
- "learning_rate": 0.00010308839190628328,
1123
- "loss": 0.0919,
1124
- "step": 1820
1125
- },
1126
- {
1127
- "epoch": 2.92,
1128
- "learning_rate": 0.000102555910543131,
1129
- "loss": 0.0343,
1130
- "step": 1830
1131
- },
1132
- {
1133
- "epoch": 2.94,
1134
- "learning_rate": 0.00010202342917997871,
1135
- "loss": 0.0103,
1136
- "step": 1840
1137
- },
1138
- {
1139
- "epoch": 2.96,
1140
- "learning_rate": 0.00010149094781682643,
1141
- "loss": 0.0287,
1142
- "step": 1850
1143
- },
1144
- {
1145
- "epoch": 2.97,
1146
- "learning_rate": 0.00010095846645367413,
1147
- "loss": 0.0382,
1148
- "step": 1860
1149
- },
1150
- {
1151
- "epoch": 2.99,
1152
- "learning_rate": 0.00010042598509052184,
1153
- "loss": 0.0912,
1154
- "step": 1870
1155
- },
1156
- {
1157
- "epoch": 3.0,
1158
- "eval_accuracy": 0.8497409326424871,
1159
- "eval_f1": 0.8454290011413006,
1160
- "eval_loss": 0.4882831573486328,
1161
- "eval_precision": 0.8461923629555449,
1162
- "eval_recall": 0.8497409326424871,
1163
- "eval_runtime": 1.2092,
1164
- "eval_samples_per_second": 159.604,
1165
- "eval_steps_per_second": 10.751,
1166
- "step": 1878
1167
- },
1168
- {
1169
- "epoch": 3.0,
1170
- "learning_rate": 9.989350372736954e-05,
1171
- "loss": 0.0122,
1172
- "step": 1880
1173
- },
1174
- {
1175
- "epoch": 3.02,
1176
- "learning_rate": 9.936102236421726e-05,
1177
- "loss": 0.0255,
1178
- "step": 1890
1179
- },
1180
- {
1181
- "epoch": 3.04,
1182
- "learning_rate": 9.882854100106496e-05,
1183
- "loss": 0.0246,
1184
- "step": 1900
1185
- },
1186
- {
1187
- "epoch": 3.05,
1188
- "learning_rate": 9.829605963791267e-05,
1189
- "loss": 0.0134,
1190
- "step": 1910
1191
- },
1192
- {
1193
- "epoch": 3.07,
1194
- "learning_rate": 9.77635782747604e-05,
1195
- "loss": 0.013,
1196
- "step": 1920
1197
- },
1198
- {
1199
- "epoch": 3.08,
1200
- "learning_rate": 9.72310969116081e-05,
1201
- "loss": 0.0057,
1202
- "step": 1930
1203
- },
1204
- {
1205
- "epoch": 3.1,
1206
- "learning_rate": 9.669861554845581e-05,
1207
- "loss": 0.0149,
1208
- "step": 1940
1209
- },
1210
- {
1211
- "epoch": 3.12,
1212
- "learning_rate": 9.616613418530351e-05,
1213
- "loss": 0.0025,
1214
- "step": 1950
1215
- },
1216
- {
1217
- "epoch": 3.13,
1218
- "learning_rate": 9.563365282215123e-05,
1219
- "loss": 0.0288,
1220
- "step": 1960
1221
- },
1222
- {
1223
- "epoch": 3.15,
1224
- "learning_rate": 9.510117145899895e-05,
1225
- "loss": 0.0014,
1226
- "step": 1970
1227
- },
1228
- {
1229
- "epoch": 3.16,
1230
- "learning_rate": 9.456869009584664e-05,
1231
- "loss": 0.0104,
1232
- "step": 1980
1233
- },
1234
- {
1235
- "epoch": 3.18,
1236
- "learning_rate": 9.403620873269437e-05,
1237
- "loss": 0.0309,
1238
- "step": 1990
1239
- },
1240
- {
1241
- "epoch": 3.19,
1242
- "learning_rate": 9.350372736954207e-05,
1243
- "loss": 0.0175,
1244
- "step": 2000
1245
- },
1246
- {
1247
- "epoch": 3.21,
1248
- "learning_rate": 9.297124600638978e-05,
1249
- "loss": 0.0238,
1250
- "step": 2010
1251
- },
1252
- {
1253
- "epoch": 3.23,
1254
- "learning_rate": 9.243876464323749e-05,
1255
- "loss": 0.0439,
1256
- "step": 2020
1257
- },
1258
- {
1259
- "epoch": 3.24,
1260
- "learning_rate": 9.19062832800852e-05,
1261
- "loss": 0.0286,
1262
- "step": 2030
1263
- },
1264
- {
1265
- "epoch": 3.26,
1266
- "learning_rate": 9.137380191693292e-05,
1267
- "loss": 0.0165,
1268
- "step": 2040
1269
- },
1270
- {
1271
- "epoch": 3.27,
1272
- "learning_rate": 9.084132055378062e-05,
1273
- "loss": 0.0108,
1274
- "step": 2050
1275
- },
1276
- {
1277
- "epoch": 3.29,
1278
- "learning_rate": 9.030883919062834e-05,
1279
- "loss": 0.0024,
1280
- "step": 2060
1281
- },
1282
- {
1283
- "epoch": 3.31,
1284
- "learning_rate": 8.977635782747604e-05,
1285
- "loss": 0.003,
1286
- "step": 2070
1287
- },
1288
- {
1289
- "epoch": 3.32,
1290
- "learning_rate": 8.924387646432375e-05,
1291
- "loss": 0.0028,
1292
- "step": 2080
1293
- },
1294
- {
1295
- "epoch": 3.34,
1296
- "learning_rate": 8.871139510117146e-05,
1297
- "loss": 0.078,
1298
- "step": 2090
1299
- },
1300
- {
1301
- "epoch": 3.35,
1302
- "learning_rate": 8.817891373801918e-05,
1303
- "loss": 0.0013,
1304
- "step": 2100
1305
- },
1306
- {
1307
- "epoch": 3.37,
1308
- "learning_rate": 8.764643237486689e-05,
1309
- "loss": 0.0027,
1310
- "step": 2110
1311
- },
1312
- {
1313
- "epoch": 3.39,
1314
- "learning_rate": 8.711395101171459e-05,
1315
- "loss": 0.0169,
1316
- "step": 2120
1317
- },
1318
- {
1319
- "epoch": 3.4,
1320
- "learning_rate": 8.658146964856231e-05,
1321
- "loss": 0.0078,
1322
- "step": 2130
1323
- },
1324
- {
1325
- "epoch": 3.42,
1326
- "learning_rate": 8.604898828541001e-05,
1327
- "loss": 0.0016,
1328
- "step": 2140
1329
- },
1330
- {
1331
- "epoch": 3.43,
1332
- "learning_rate": 8.551650692225772e-05,
1333
- "loss": 0.0444,
1334
- "step": 2150
1335
- },
1336
- {
1337
- "epoch": 3.45,
1338
- "learning_rate": 8.498402555910544e-05,
1339
- "loss": 0.003,
1340
- "step": 2160
1341
- },
1342
- {
1343
- "epoch": 3.47,
1344
- "learning_rate": 8.445154419595315e-05,
1345
- "loss": 0.0045,
1346
- "step": 2170
1347
- },
1348
- {
1349
- "epoch": 3.48,
1350
- "learning_rate": 8.391906283280086e-05,
1351
- "loss": 0.0014,
1352
- "step": 2180
1353
- },
1354
- {
1355
- "epoch": 3.5,
1356
- "learning_rate": 8.338658146964856e-05,
1357
- "loss": 0.023,
1358
- "step": 2190
1359
- },
1360
- {
1361
- "epoch": 3.51,
1362
- "learning_rate": 8.285410010649628e-05,
1363
- "loss": 0.0064,
1364
- "step": 2200
1365
- },
1366
- {
1367
- "epoch": 3.53,
1368
- "learning_rate": 8.232161874334398e-05,
1369
- "loss": 0.0225,
1370
- "step": 2210
1371
- },
1372
- {
1373
- "epoch": 3.55,
1374
- "learning_rate": 8.17891373801917e-05,
1375
- "loss": 0.06,
1376
- "step": 2220
1377
- },
1378
- {
1379
- "epoch": 3.56,
1380
- "learning_rate": 8.12566560170394e-05,
1381
- "loss": 0.0516,
1382
- "step": 2230
1383
- },
1384
- {
1385
- "epoch": 3.58,
1386
- "learning_rate": 8.072417465388712e-05,
1387
- "loss": 0.012,
1388
- "step": 2240
1389
- },
1390
- {
1391
- "epoch": 3.59,
1392
- "learning_rate": 8.019169329073483e-05,
1393
- "loss": 0.0067,
1394
- "step": 2250
1395
- },
1396
- {
1397
- "epoch": 3.61,
1398
- "learning_rate": 7.965921192758253e-05,
1399
- "loss": 0.0253,
1400
- "step": 2260
1401
- },
1402
- {
1403
- "epoch": 3.63,
1404
- "learning_rate": 7.912673056443025e-05,
1405
- "loss": 0.0124,
1406
- "step": 2270
1407
- },
1408
- {
1409
- "epoch": 3.64,
1410
- "learning_rate": 7.859424920127795e-05,
1411
- "loss": 0.0084,
1412
- "step": 2280
1413
- },
1414
- {
1415
- "epoch": 3.66,
1416
- "learning_rate": 7.806176783812567e-05,
1417
- "loss": 0.0027,
1418
- "step": 2290
1419
- },
1420
- {
1421
- "epoch": 3.67,
1422
- "learning_rate": 7.752928647497338e-05,
1423
- "loss": 0.0182,
1424
- "step": 2300
1425
- },
1426
- {
1427
- "epoch": 3.69,
1428
- "learning_rate": 7.699680511182109e-05,
1429
- "loss": 0.056,
1430
- "step": 2310
1431
- },
1432
- {
1433
- "epoch": 3.71,
1434
- "learning_rate": 7.64643237486688e-05,
1435
- "loss": 0.0153,
1436
- "step": 2320
1437
- },
1438
- {
1439
- "epoch": 3.72,
1440
- "learning_rate": 7.59318423855165e-05,
1441
- "loss": 0.0509,
1442
- "step": 2330
1443
- },
1444
- {
1445
- "epoch": 3.74,
1446
- "learning_rate": 7.539936102236423e-05,
1447
- "loss": 0.0045,
1448
- "step": 2340
1449
- },
1450
- {
1451
- "epoch": 3.75,
1452
- "learning_rate": 7.486687965921192e-05,
1453
- "loss": 0.0007,
1454
- "step": 2350
1455
- },
1456
- {
1457
- "epoch": 3.77,
1458
- "learning_rate": 7.438764643237486e-05,
1459
- "loss": 0.0583,
1460
- "step": 2360
1461
- },
1462
- {
1463
- "epoch": 3.79,
1464
- "learning_rate": 7.385516506922258e-05,
1465
- "loss": 0.0014,
1466
- "step": 2370
1467
- },
1468
- {
1469
- "epoch": 3.8,
1470
- "learning_rate": 7.332268370607029e-05,
1471
- "loss": 0.0021,
1472
- "step": 2380
1473
- },
1474
- {
1475
- "epoch": 3.82,
1476
- "learning_rate": 7.2790202342918e-05,
1477
- "loss": 0.0408,
1478
- "step": 2390
1479
- },
1480
- {
1481
- "epoch": 3.83,
1482
- "learning_rate": 7.225772097976571e-05,
1483
- "loss": 0.0138,
1484
- "step": 2400
1485
- },
1486
- {
1487
- "epoch": 3.85,
1488
- "learning_rate": 7.172523961661343e-05,
1489
- "loss": 0.0075,
1490
- "step": 2410
1491
- },
1492
- {
1493
- "epoch": 3.87,
1494
- "learning_rate": 7.119275825346114e-05,
1495
- "loss": 0.0769,
1496
- "step": 2420
1497
- },
1498
- {
1499
- "epoch": 3.88,
1500
- "learning_rate": 7.066027689030884e-05,
1501
- "loss": 0.0016,
1502
- "step": 2430
1503
- },
1504
- {
1505
- "epoch": 3.9,
1506
- "learning_rate": 7.012779552715655e-05,
1507
- "loss": 0.0213,
1508
- "step": 2440
1509
- },
1510
- {
1511
- "epoch": 3.91,
1512
- "learning_rate": 6.959531416400426e-05,
1513
- "loss": 0.0101,
1514
- "step": 2450
1515
- },
1516
- {
1517
- "epoch": 3.93,
1518
- "learning_rate": 6.906283280085197e-05,
1519
- "loss": 0.0087,
1520
- "step": 2460
1521
- },
1522
- {
1523
- "epoch": 3.95,
1524
- "learning_rate": 6.853035143769968e-05,
1525
- "loss": 0.0131,
1526
- "step": 2470
1527
- },
1528
- {
1529
- "epoch": 3.96,
1530
- "learning_rate": 6.79978700745474e-05,
1531
- "loss": 0.0012,
1532
- "step": 2480
1533
- },
1534
- {
1535
- "epoch": 3.98,
1536
- "learning_rate": 6.746538871139511e-05,
1537
- "loss": 0.0254,
1538
- "step": 2490
1539
- },
1540
- {
1541
- "epoch": 3.99,
1542
- "learning_rate": 6.693290734824281e-05,
1543
- "loss": 0.0397,
1544
- "step": 2500
1545
- },
1546
- {
1547
- "epoch": 4.0,
1548
- "eval_accuracy": 0.8238341968911918,
1549
- "eval_f1": 0.8273994981457394,
1550
- "eval_loss": 0.5439336895942688,
1551
- "eval_precision": 0.8342306448710682,
1552
- "eval_recall": 0.8238341968911918,
1553
- "eval_runtime": 1.3389,
1554
- "eval_samples_per_second": 144.149,
1555
- "eval_steps_per_second": 9.709,
1556
- "step": 2504
1557
- },
1558
- {
1559
- "epoch": 4.01,
1560
- "learning_rate": 6.640042598509052e-05,
1561
- "loss": 0.0261,
1562
- "step": 2510
1563
- },
1564
- {
1565
- "epoch": 4.03,
1566
- "learning_rate": 6.586794462193823e-05,
1567
- "loss": 0.0007,
1568
- "step": 2520
1569
- },
1570
- {
1571
- "epoch": 4.04,
1572
- "learning_rate": 6.533546325878594e-05,
1573
- "loss": 0.0232,
1574
- "step": 2530
1575
- },
1576
- {
1577
- "epoch": 4.06,
1578
- "learning_rate": 6.480298189563366e-05,
1579
- "loss": 0.0017,
1580
- "step": 2540
1581
- },
1582
- {
1583
- "epoch": 4.07,
1584
- "learning_rate": 6.427050053248137e-05,
1585
- "loss": 0.0188,
1586
- "step": 2550
1587
- },
1588
- {
1589
- "epoch": 4.09,
1590
- "learning_rate": 6.373801916932908e-05,
1591
- "loss": 0.0076,
1592
- "step": 2560
1593
- },
1594
- {
1595
- "epoch": 4.11,
1596
- "learning_rate": 6.320553780617678e-05,
1597
- "loss": 0.002,
1598
- "step": 2570
1599
- },
1600
- {
1601
- "epoch": 4.12,
1602
- "learning_rate": 6.26730564430245e-05,
1603
- "loss": 0.0158,
1604
- "step": 2580
1605
- },
1606
- {
1607
- "epoch": 4.14,
1608
- "learning_rate": 6.214057507987222e-05,
1609
- "loss": 0.0025,
1610
- "step": 2590
1611
- },
1612
- {
1613
- "epoch": 4.15,
1614
- "learning_rate": 6.160809371671991e-05,
1615
- "loss": 0.0189,
1616
- "step": 2600
1617
- },
1618
- {
1619
- "epoch": 4.17,
1620
- "learning_rate": 6.107561235356763e-05,
1621
- "loss": 0.0007,
1622
- "step": 2610
1623
- },
1624
- {
1625
- "epoch": 4.19,
1626
- "learning_rate": 6.054313099041534e-05,
1627
- "loss": 0.0008,
1628
- "step": 2620
1629
- },
1630
- {
1631
- "epoch": 4.2,
1632
- "learning_rate": 6.001064962726305e-05,
1633
- "loss": 0.0007,
1634
- "step": 2630
1635
- },
1636
- {
1637
- "epoch": 4.22,
1638
- "learning_rate": 5.9478168264110756e-05,
1639
- "loss": 0.0155,
1640
- "step": 2640
1641
- },
1642
- {
1643
- "epoch": 4.23,
1644
- "learning_rate": 5.8945686900958475e-05,
1645
- "loss": 0.0005,
1646
- "step": 2650
1647
- },
1648
- {
1649
- "epoch": 4.25,
1650
- "learning_rate": 5.841320553780618e-05,
1651
- "loss": 0.0061,
1652
- "step": 2660
1653
- },
1654
- {
1655
- "epoch": 4.27,
1656
- "learning_rate": 5.7880724174653886e-05,
1657
- "loss": 0.0009,
1658
- "step": 2670
1659
- },
1660
- {
1661
- "epoch": 4.28,
1662
- "learning_rate": 5.73482428115016e-05,
1663
- "loss": 0.0024,
1664
- "step": 2680
1665
- },
1666
- {
1667
- "epoch": 4.3,
1668
- "learning_rate": 5.681576144834932e-05,
1669
- "loss": 0.0009,
1670
- "step": 2690
1671
- },
1672
- {
1673
- "epoch": 4.31,
1674
- "learning_rate": 5.628328008519702e-05,
1675
- "loss": 0.0007,
1676
- "step": 2700
1677
- },
1678
- {
1679
- "epoch": 4.33,
1680
- "learning_rate": 5.575079872204473e-05,
1681
- "loss": 0.0021,
1682
- "step": 2710
1683
- },
1684
- {
1685
- "epoch": 4.35,
1686
- "learning_rate": 5.521831735889245e-05,
1687
- "loss": 0.017,
1688
- "step": 2720
1689
- },
1690
- {
1691
- "epoch": 4.36,
1692
- "learning_rate": 5.468583599574015e-05,
1693
- "loss": 0.0009,
1694
- "step": 2730
1695
- },
1696
- {
1697
- "epoch": 4.38,
1698
- "learning_rate": 5.415335463258786e-05,
1699
- "loss": 0.0099,
1700
- "step": 2740
1701
- },
1702
- {
1703
- "epoch": 4.39,
1704
- "learning_rate": 5.362087326943557e-05,
1705
- "loss": 0.0004,
1706
- "step": 2750
1707
- },
1708
- {
1709
- "epoch": 4.41,
1710
- "learning_rate": 5.308839190628329e-05,
1711
- "loss": 0.0016,
1712
- "step": 2760
1713
- },
1714
- {
1715
- "epoch": 4.42,
1716
- "learning_rate": 5.2555910543130994e-05,
1717
- "loss": 0.0012,
1718
- "step": 2770
1719
- },
1720
- {
1721
- "epoch": 4.44,
1722
- "learning_rate": 5.20234291799787e-05,
1723
- "loss": 0.003,
1724
- "step": 2780
1725
- },
1726
- {
1727
- "epoch": 4.46,
1728
- "learning_rate": 5.149094781682642e-05,
1729
- "loss": 0.0004,
1730
- "step": 2790
1731
- },
1732
- {
1733
- "epoch": 4.47,
1734
- "learning_rate": 5.095846645367412e-05,
1735
- "loss": 0.0004,
1736
- "step": 2800
1737
- },
1738
- {
1739
- "epoch": 4.49,
1740
- "learning_rate": 5.0425985090521835e-05,
1741
- "loss": 0.0198,
1742
- "step": 2810
1743
- },
1744
- {
1745
- "epoch": 4.5,
1746
- "learning_rate": 4.989350372736955e-05,
1747
- "loss": 0.0004,
1748
- "step": 2820
1749
- },
1750
- {
1751
- "epoch": 4.52,
1752
- "learning_rate": 4.936102236421725e-05,
1753
- "loss": 0.0003,
1754
- "step": 2830
1755
- },
1756
- {
1757
- "epoch": 4.54,
1758
- "learning_rate": 4.8828541001064965e-05,
1759
- "loss": 0.0006,
1760
- "step": 2840
1761
- },
1762
- {
1763
- "epoch": 4.55,
1764
- "learning_rate": 4.829605963791268e-05,
1765
- "loss": 0.0004,
1766
- "step": 2850
1767
- },
1768
- {
1769
- "epoch": 4.57,
1770
- "learning_rate": 4.776357827476038e-05,
1771
- "loss": 0.0223,
1772
- "step": 2860
1773
- },
1774
- {
1775
- "epoch": 4.58,
1776
- "learning_rate": 4.7231096911608095e-05,
1777
- "loss": 0.0005,
1778
- "step": 2870
1779
- },
1780
- {
1781
- "epoch": 4.6,
1782
- "learning_rate": 4.669861554845581e-05,
1783
- "loss": 0.0006,
1784
- "step": 2880
1785
- },
1786
- {
1787
- "epoch": 4.62,
1788
- "learning_rate": 4.616613418530352e-05,
1789
- "loss": 0.0006,
1790
- "step": 2890
1791
- },
1792
- {
1793
- "epoch": 4.63,
1794
- "learning_rate": 4.563365282215123e-05,
1795
- "loss": 0.0004,
1796
- "step": 2900
1797
- },
1798
- {
1799
- "epoch": 4.65,
1800
- "learning_rate": 4.5101171458998936e-05,
1801
- "loss": 0.0004,
1802
- "step": 2910
1803
- },
1804
- {
1805
- "epoch": 4.66,
1806
- "learning_rate": 4.456869009584665e-05,
1807
- "loss": 0.0012,
1808
- "step": 2920
1809
- },
1810
- {
1811
- "epoch": 4.68,
1812
- "learning_rate": 4.4036208732694354e-05,
1813
- "loss": 0.0033,
1814
- "step": 2930
1815
- },
1816
- {
1817
- "epoch": 4.7,
1818
- "learning_rate": 4.3503727369542066e-05,
1819
- "loss": 0.0007,
1820
- "step": 2940
1821
- },
1822
- {
1823
- "epoch": 4.71,
1824
- "learning_rate": 4.297124600638978e-05,
1825
- "loss": 0.0134,
1826
- "step": 2950
1827
- },
1828
- {
1829
- "epoch": 4.73,
1830
- "learning_rate": 4.243876464323749e-05,
1831
- "loss": 0.0005,
1832
- "step": 2960
1833
- },
1834
- {
1835
- "epoch": 4.74,
1836
- "learning_rate": 4.19062832800852e-05,
1837
- "loss": 0.0005,
1838
- "step": 2970
1839
- },
1840
- {
1841
- "epoch": 4.76,
1842
- "learning_rate": 4.137380191693291e-05,
1843
- "loss": 0.0122,
1844
- "step": 2980
1845
- },
1846
- {
1847
- "epoch": 4.78,
1848
- "learning_rate": 4.084132055378062e-05,
1849
- "loss": 0.0052,
1850
- "step": 2990
1851
- },
1852
- {
1853
- "epoch": 4.79,
1854
- "learning_rate": 4.0308839190628325e-05,
1855
- "loss": 0.0004,
1856
- "step": 3000
1857
- },
1858
- {
1859
- "epoch": 4.81,
1860
- "learning_rate": 3.9776357827476044e-05,
1861
- "loss": 0.0112,
1862
- "step": 3010
1863
- },
1864
- {
1865
- "epoch": 4.82,
1866
- "learning_rate": 3.9243876464323756e-05,
1867
- "loss": 0.0019,
1868
- "step": 3020
1869
- },
1870
- {
1871
- "epoch": 4.84,
1872
- "learning_rate": 3.871139510117146e-05,
1873
- "loss": 0.0016,
1874
- "step": 3030
1875
- },
1876
- {
1877
- "epoch": 4.86,
1878
- "learning_rate": 3.8178913738019174e-05,
1879
- "loss": 0.0076,
1880
- "step": 3040
1881
- },
1882
- {
1883
- "epoch": 4.87,
1884
- "learning_rate": 3.764643237486688e-05,
1885
- "loss": 0.0007,
1886
- "step": 3050
1887
- },
1888
- {
1889
- "epoch": 4.89,
1890
- "learning_rate": 3.711395101171459e-05,
1891
- "loss": 0.0006,
1892
- "step": 3060
1893
- },
1894
- {
1895
- "epoch": 4.9,
1896
- "learning_rate": 3.65814696485623e-05,
1897
- "loss": 0.0025,
1898
- "step": 3070
1899
- },
1900
- {
1901
- "epoch": 4.92,
1902
- "learning_rate": 3.6048988285410015e-05,
1903
- "loss": 0.0008,
1904
- "step": 3080
1905
- },
1906
- {
1907
- "epoch": 4.94,
1908
- "learning_rate": 3.551650692225773e-05,
1909
- "loss": 0.0004,
1910
- "step": 3090
1911
- },
1912
- {
1913
- "epoch": 4.95,
1914
- "learning_rate": 3.498402555910543e-05,
1915
- "loss": 0.0003,
1916
- "step": 3100
1917
- },
1918
- {
1919
- "epoch": 4.97,
1920
- "learning_rate": 3.4451544195953145e-05,
1921
- "loss": 0.0003,
1922
- "step": 3110
1923
- },
1924
- {
1925
- "epoch": 4.98,
1926
- "learning_rate": 3.391906283280085e-05,
1927
- "loss": 0.0004,
1928
- "step": 3120
1929
- },
1930
- {
1931
- "epoch": 5.0,
1932
- "learning_rate": 3.338658146964856e-05,
1933
- "loss": 0.0004,
1934
- "step": 3130
1935
- },
1936
- {
1937
- "epoch": 5.0,
1938
- "eval_accuracy": 0.8601036269430051,
1939
- "eval_f1": 0.8667958211367178,
1940
- "eval_loss": 0.5794870853424072,
1941
- "eval_precision": 0.878678756476684,
1942
- "eval_recall": 0.8601036269430051,
1943
- "eval_runtime": 1.3078,
1944
- "eval_samples_per_second": 147.574,
1945
- "eval_steps_per_second": 9.94,
1946
- "step": 3130
1947
- },
1948
- {
1949
- "epoch": 5.02,
1950
- "learning_rate": 3.2854100106496274e-05,
1951
- "loss": 0.0007,
1952
- "step": 3140
1953
- },
1954
- {
1955
- "epoch": 5.03,
1956
- "learning_rate": 3.2321618743343987e-05,
1957
- "loss": 0.0003,
1958
- "step": 3150
1959
- },
1960
- {
1961
- "epoch": 5.05,
1962
- "learning_rate": 3.17891373801917e-05,
1963
- "loss": 0.0005,
1964
- "step": 3160
1965
- },
1966
- {
1967
- "epoch": 5.06,
1968
- "learning_rate": 3.1256656017039404e-05,
1969
- "loss": 0.0005,
1970
- "step": 3170
1971
- },
1972
- {
1973
- "epoch": 5.08,
1974
- "learning_rate": 3.0724174653887116e-05,
1975
- "loss": 0.0004,
1976
- "step": 3180
1977
- },
1978
- {
1979
- "epoch": 5.1,
1980
- "learning_rate": 3.0191693290734825e-05,
1981
- "loss": 0.0002,
1982
- "step": 3190
1983
- },
1984
- {
1985
- "epoch": 5.11,
1986
- "learning_rate": 2.9659211927582537e-05,
1987
- "loss": 0.0003,
1988
- "step": 3200
1989
- },
1990
- {
1991
- "epoch": 5.13,
1992
- "learning_rate": 2.912673056443025e-05,
1993
- "loss": 0.0003,
1994
- "step": 3210
1995
- },
1996
- {
1997
- "epoch": 5.14,
1998
- "learning_rate": 2.8594249201277955e-05,
1999
- "loss": 0.0002,
2000
- "step": 3220
2001
- },
2002
- {
2003
- "epoch": 5.16,
2004
- "learning_rate": 2.806176783812567e-05,
2005
- "loss": 0.0003,
2006
- "step": 3230
2007
- },
2008
- {
2009
- "epoch": 5.18,
2010
- "learning_rate": 2.7529286474973375e-05,
2011
- "loss": 0.0004,
2012
- "step": 3240
2013
- },
2014
- {
2015
- "epoch": 5.19,
2016
- "learning_rate": 2.6996805111821088e-05,
2017
- "loss": 0.0003,
2018
- "step": 3250
2019
- },
2020
- {
2021
- "epoch": 5.21,
2022
- "learning_rate": 2.6464323748668796e-05,
2023
- "loss": 0.0006,
2024
- "step": 3260
2025
- },
2026
- {
2027
- "epoch": 5.22,
2028
- "learning_rate": 2.593184238551651e-05,
2029
- "loss": 0.0003,
2030
- "step": 3270
2031
- },
2032
- {
2033
- "epoch": 5.24,
2034
- "learning_rate": 2.539936102236422e-05,
2035
- "loss": 0.0003,
2036
- "step": 3280
2037
- },
2038
- {
2039
- "epoch": 5.26,
2040
- "learning_rate": 2.486687965921193e-05,
2041
- "loss": 0.0003,
2042
- "step": 3290
2043
- },
2044
- {
2045
- "epoch": 5.27,
2046
- "learning_rate": 2.4334398296059638e-05,
2047
- "loss": 0.0002,
2048
- "step": 3300
2049
- },
2050
- {
2051
- "epoch": 5.29,
2052
- "learning_rate": 2.380191693290735e-05,
2053
- "loss": 0.0004,
2054
- "step": 3310
2055
- },
2056
- {
2057
- "epoch": 5.3,
2058
- "learning_rate": 2.326943556975506e-05,
2059
- "loss": 0.0003,
2060
- "step": 3320
2061
- },
2062
- {
2063
- "epoch": 5.32,
2064
- "learning_rate": 2.273695420660277e-05,
2065
- "loss": 0.0002,
2066
- "step": 3330
2067
- },
2068
- {
2069
- "epoch": 5.34,
2070
- "learning_rate": 2.220447284345048e-05,
2071
- "loss": 0.0002,
2072
- "step": 3340
2073
- },
2074
- {
2075
- "epoch": 5.35,
2076
- "learning_rate": 2.167199148029819e-05,
2077
- "loss": 0.0003,
2078
- "step": 3350
2079
- },
2080
- {
2081
- "epoch": 5.37,
2082
- "learning_rate": 2.11395101171459e-05,
2083
- "loss": 0.0002,
2084
- "step": 3360
2085
- },
2086
- {
2087
- "epoch": 5.38,
2088
- "learning_rate": 2.0607028753993613e-05,
2089
- "loss": 0.0002,
2090
- "step": 3370
2091
- },
2092
- {
2093
- "epoch": 5.4,
2094
- "learning_rate": 2.007454739084132e-05,
2095
- "loss": 0.0002,
2096
- "step": 3380
2097
- },
2098
- {
2099
- "epoch": 5.42,
2100
- "learning_rate": 1.9542066027689034e-05,
2101
- "loss": 0.0002,
2102
- "step": 3390
2103
- },
2104
- {
2105
- "epoch": 5.43,
2106
- "learning_rate": 1.9009584664536742e-05,
2107
- "loss": 0.0003,
2108
- "step": 3400
2109
- },
2110
- {
2111
- "epoch": 5.45,
2112
- "learning_rate": 1.847710330138445e-05,
2113
- "loss": 0.0004,
2114
- "step": 3410
2115
- },
2116
- {
2117
- "epoch": 5.46,
2118
- "learning_rate": 1.7944621938232163e-05,
2119
- "loss": 0.0003,
2120
- "step": 3420
2121
- },
2122
- {
2123
- "epoch": 5.48,
2124
- "learning_rate": 1.7412140575079875e-05,
2125
- "loss": 0.0003,
2126
- "step": 3430
2127
- },
2128
- {
2129
- "epoch": 5.5,
2130
- "learning_rate": 1.6879659211927584e-05,
2131
- "loss": 0.0003,
2132
- "step": 3440
2133
- },
2134
- {
2135
- "epoch": 5.51,
2136
- "learning_rate": 1.6347177848775293e-05,
2137
- "loss": 0.0002,
2138
- "step": 3450
2139
- },
2140
- {
2141
- "epoch": 5.53,
2142
- "learning_rate": 1.5814696485623005e-05,
2143
- "loss": 0.0036,
2144
- "step": 3460
2145
- },
2146
- {
2147
- "epoch": 5.54,
2148
- "learning_rate": 1.5282215122470714e-05,
2149
- "loss": 0.0003,
2150
- "step": 3470
2151
- },
2152
- {
2153
- "epoch": 5.56,
2154
- "learning_rate": 1.4749733759318424e-05,
2155
- "loss": 0.0003,
2156
- "step": 3480
2157
- },
2158
- {
2159
- "epoch": 5.58,
2160
- "learning_rate": 1.4217252396166134e-05,
2161
- "loss": 0.0003,
2162
- "step": 3490
2163
- },
2164
- {
2165
- "epoch": 5.59,
2166
- "learning_rate": 1.3684771033013847e-05,
2167
- "loss": 0.0003,
2168
- "step": 3500
2169
- },
2170
- {
2171
- "epoch": 5.61,
2172
- "learning_rate": 1.3152289669861555e-05,
2173
- "loss": 0.0002,
2174
- "step": 3510
2175
- },
2176
- {
2177
- "epoch": 5.62,
2178
- "learning_rate": 1.2619808306709266e-05,
2179
- "loss": 0.0003,
2180
- "step": 3520
2181
- },
2182
- {
2183
- "epoch": 5.64,
2184
- "learning_rate": 1.2087326943556976e-05,
2185
- "loss": 0.0002,
2186
- "step": 3530
2187
- },
2188
- {
2189
- "epoch": 5.65,
2190
- "learning_rate": 1.1554845580404687e-05,
2191
- "loss": 0.0002,
2192
- "step": 3540
2193
- },
2194
- {
2195
- "epoch": 5.67,
2196
- "learning_rate": 1.1022364217252397e-05,
2197
- "loss": 0.0006,
2198
- "step": 3550
2199
- },
2200
- {
2201
- "epoch": 5.69,
2202
- "learning_rate": 1.0489882854100107e-05,
2203
- "loss": 0.0002,
2204
- "step": 3560
2205
- },
2206
- {
2207
- "epoch": 5.7,
2208
- "learning_rate": 9.957401490947816e-06,
2209
- "loss": 0.0002,
2210
- "step": 3570
2211
- },
2212
- {
2213
- "epoch": 5.72,
2214
- "learning_rate": 9.424920127795528e-06,
2215
- "loss": 0.0002,
2216
- "step": 3580
2217
- },
2218
- {
2219
- "epoch": 5.73,
2220
- "learning_rate": 8.892438764643237e-06,
2221
- "loss": 0.0002,
2222
- "step": 3590
2223
- },
2224
- {
2225
- "epoch": 5.75,
2226
- "learning_rate": 8.359957401490947e-06,
2227
- "loss": 0.0002,
2228
- "step": 3600
2229
- },
2230
- {
2231
- "epoch": 5.77,
2232
- "learning_rate": 7.82747603833866e-06,
2233
- "loss": 0.0002,
2234
- "step": 3610
2235
- },
2236
- {
2237
- "epoch": 5.78,
2238
- "learning_rate": 7.294994675186369e-06,
2239
- "loss": 0.0002,
2240
- "step": 3620
2241
- },
2242
- {
2243
- "epoch": 5.8,
2244
- "learning_rate": 6.762513312034079e-06,
2245
- "loss": 0.0002,
2246
- "step": 3630
2247
- },
2248
- {
2249
- "epoch": 5.81,
2250
- "learning_rate": 6.230031948881789e-06,
2251
- "loss": 0.0002,
2252
- "step": 3640
2253
- },
2254
- {
2255
- "epoch": 5.83,
2256
- "learning_rate": 5.6975505857295e-06,
2257
- "loss": 0.0002,
2258
- "step": 3650
2259
- },
2260
- {
2261
- "epoch": 5.85,
2262
- "learning_rate": 5.16506922257721e-06,
2263
- "loss": 0.0002,
2264
- "step": 3660
2265
- },
2266
- {
2267
- "epoch": 5.86,
2268
- "learning_rate": 4.6325878594249205e-06,
2269
- "loss": 0.0002,
2270
- "step": 3670
2271
- },
2272
- {
2273
- "epoch": 5.88,
2274
- "learning_rate": 4.100106496272631e-06,
2275
- "loss": 0.0003,
2276
- "step": 3680
2277
- },
2278
- {
2279
- "epoch": 5.89,
2280
- "learning_rate": 3.5676251331203413e-06,
2281
- "loss": 0.0002,
2282
- "step": 3690
2283
- },
2284
- {
2285
- "epoch": 5.91,
2286
- "learning_rate": 3.0351437699680513e-06,
2287
- "loss": 0.0002,
2288
- "step": 3700
2289
- },
2290
- {
2291
- "epoch": 5.93,
2292
- "learning_rate": 2.5026624068157617e-06,
2293
- "loss": 0.0002,
2294
- "step": 3710
2295
- },
2296
- {
2297
- "epoch": 5.94,
2298
- "learning_rate": 1.9701810436634718e-06,
2299
- "loss": 0.0003,
2300
- "step": 3720
2301
- },
2302
- {
2303
- "epoch": 5.96,
2304
- "learning_rate": 1.4376996805111822e-06,
2305
- "loss": 0.0002,
2306
- "step": 3730
2307
- },
2308
- {
2309
- "epoch": 5.97,
2310
- "learning_rate": 9.052183173588925e-07,
2311
- "loss": 0.0002,
2312
- "step": 3740
2313
- },
2314
- {
2315
- "epoch": 5.99,
2316
- "learning_rate": 3.727369542066028e-07,
2317
- "loss": 0.0002,
2318
- "step": 3750
2319
- },
2320
- {
2321
- "epoch": 6.0,
2322
- "eval_accuracy": 0.8652849740932642,
2323
- "eval_f1": 0.872574161558089,
2324
- "eval_loss": 0.5782872438430786,
2325
- "eval_precision": 0.8851064570906083,
2326
- "eval_recall": 0.8652849740932642,
2327
- "eval_runtime": 1.304,
2328
- "eval_samples_per_second": 148.006,
2329
- "eval_steps_per_second": 9.969,
2330
- "step": 3756
2331
- },
2332
- {
2333
- "epoch": 6.0,
2334
- "step": 3756,
2335
- "total_flos": 4.65670232933972e+18,
2336
- "train_loss": 0.07091493732059752,
2337
- "train_runtime": 1262.5493,
2338
- "train_samples_per_second": 47.594,
2339
- "train_steps_per_second": 2.975
2340
  }
2341
  ],
2342
- "max_steps": 3756,
2343
- "num_train_epochs": 6,
2344
- "total_flos": 4.65670232933972e+18,
2345
  "trial_name": null,
2346
  "trial_params": null
2347
  }
 
1
  {
2
+ "best_metric": 0.8860103626943006,
3
+ "best_model_checkpoint": "./vit-focal-skin/checkpoint-1252",
4
+ "epoch": 4.0,
5
+ "global_step": 2504,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.16,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "learning_rate": 0.00019201277955271565,
13
+ "loss": 0.7905,
14
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.32,
18
+ "learning_rate": 0.00018402555910543132,
19
+ "loss": 0.6162,
20
  "step": 200
21
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  {
23
  "epoch": 0.48,
24
+ "learning_rate": 0.000176038338658147,
25
+ "loss": 0.5709,
26
  "step": 300
27
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  {
29
  "epoch": 0.64,
30
+ "learning_rate": 0.00016805111821086263,
31
+ "loss": 0.5101,
32
  "step": 400
33
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  {
35
  "epoch": 0.8,
36
+ "learning_rate": 0.0001600638977635783,
37
+ "loss": 0.4966,
38
  "step": 500
39
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  {
41
  "epoch": 0.96,
42
+ "learning_rate": 0.00015207667731629394,
43
+ "loss": 0.4563,
44
  "step": 600
45
  },
 
 
 
 
 
 
 
 
 
 
 
 
46
  {
47
  "epoch": 1.0,
48
+ "eval_accuracy": 0.8341968911917098,
49
+ "eval_f1": 0.8331282144797781,
50
+ "eval_loss": 0.40268635749816895,
51
+ "eval_precision": 0.8791963514680484,
52
+ "eval_recall": 0.8341968911917098,
53
+ "eval_runtime": 1.5386,
54
+ "eval_samples_per_second": 125.44,
55
+ "eval_steps_per_second": 16.249,
56
  "step": 626
57
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  {
59
  "epoch": 1.12,
60
+ "learning_rate": 0.00014408945686900958,
61
+ "loss": 0.385,
62
  "step": 700
63
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  {
65
  "epoch": 1.28,
66
+ "learning_rate": 0.00013610223642172525,
67
+ "loss": 0.3621,
68
  "step": 800
69
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  {
71
  "epoch": 1.44,
72
+ "learning_rate": 0.00012811501597444092,
73
+ "loss": 0.3624,
74
  "step": 900
75
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  {
77
  "epoch": 1.6,
78
+ "learning_rate": 0.00012012779552715656,
79
+ "loss": 0.3403,
80
  "step": 1000
81
  },
82
  {
83
+ "epoch": 1.76,
84
+ "learning_rate": 0.00011214057507987221,
85
+ "loss": 0.3293,
86
+ "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  },
88
  {
89
+ "epoch": 1.92,
90
+ "learning_rate": 0.00010415335463258787,
91
+ "loss": 0.2954,
92
+ "step": 1200
93
  },
94
  {
95
+ "epoch": 2.0,
96
+ "eval_accuracy": 0.8860103626943006,
97
+ "eval_f1": 0.8891954516071332,
98
+ "eval_loss": 0.3063604235649109,
99
+ "eval_precision": 0.8988400162775354,
100
+ "eval_recall": 0.8860103626943006,
101
+ "eval_runtime": 1.5206,
102
+ "eval_samples_per_second": 126.921,
103
+ "eval_steps_per_second": 16.441,
104
+ "step": 1252
105
  },
106
  {
107
+ "epoch": 2.08,
108
+ "learning_rate": 9.616613418530351e-05,
109
+ "loss": 0.2258,
110
+ "step": 1300
111
  },
112
  {
113
+ "epoch": 2.24,
114
+ "learning_rate": 8.817891373801918e-05,
115
+ "loss": 0.1583,
116
+ "step": 1400
117
  },
118
  {
119
+ "epoch": 2.4,
120
+ "learning_rate": 8.019169329073483e-05,
121
+ "loss": 0.1589,
122
+ "step": 1500
123
  },
124
  {
125
+ "epoch": 2.56,
126
+ "learning_rate": 7.220447284345049e-05,
127
+ "loss": 0.1271,
128
+ "step": 1600
129
  },
130
  {
131
+ "epoch": 2.72,
132
+ "learning_rate": 6.421725239616614e-05,
133
+ "loss": 0.1746,
134
+ "step": 1700
135
  },
136
  {
137
+ "epoch": 2.88,
138
+ "learning_rate": 5.623003194888179e-05,
139
+ "loss": 0.1118,
140
+ "step": 1800
141
  },
142
  {
143
+ "epoch": 3.0,
144
+ "eval_accuracy": 0.8704663212435233,
145
+ "eval_f1": 0.8663258953141536,
146
+ "eval_loss": 0.30049628019332886,
147
+ "eval_precision": 0.8698602729520133,
148
+ "eval_recall": 0.8704663212435233,
149
+ "eval_runtime": 1.4368,
150
+ "eval_samples_per_second": 134.33,
151
+ "eval_steps_per_second": 17.4,
152
+ "step": 1878
153
  },
154
  {
155
+ "epoch": 3.04,
156
+ "learning_rate": 4.824281150159744e-05,
157
+ "loss": 0.1193,
158
+ "step": 1900
159
  },
160
  {
161
+ "epoch": 3.19,
162
+ "learning_rate": 4.0255591054313104e-05,
163
+ "loss": 0.0409,
164
+ "step": 2000
165
  },
166
  {
167
+ "epoch": 3.35,
168
+ "learning_rate": 3.226837060702875e-05,
169
+ "loss": 0.0327,
170
+ "step": 2100
171
  },
172
  {
173
+ "epoch": 3.51,
174
+ "learning_rate": 2.428115015974441e-05,
175
+ "loss": 0.0357,
176
+ "step": 2200
177
  },
178
  {
179
+ "epoch": 3.67,
180
+ "learning_rate": 1.6293929712460065e-05,
181
+ "loss": 0.0386,
182
+ "step": 2300
183
  },
184
  {
185
+ "epoch": 3.83,
186
+ "learning_rate": 8.306709265175718e-06,
187
+ "loss": 0.0303,
188
+ "step": 2400
189
  },
190
  {
191
+ "epoch": 3.99,
192
+ "learning_rate": 3.194888178913738e-07,
193
+ "loss": 0.0317,
194
+ "step": 2500
195
  },
196
  {
197
+ "epoch": 4.0,
198
+ "eval_accuracy": 0.8549222797927462,
199
+ "eval_f1": 0.8560478324319817,
200
+ "eval_loss": 0.3552953004837036,
201
+ "eval_precision": 0.8595484172497833,
202
+ "eval_recall": 0.8549222797927462,
203
+ "eval_runtime": 1.5113,
204
+ "eval_samples_per_second": 127.703,
205
+ "eval_steps_per_second": 16.542,
206
+ "step": 2504
207
  },
208
  {
209
+ "epoch": 4.0,
210
+ "step": 2504,
211
+ "total_flos": 3.104468219559813e+18,
212
+ "train_loss": 0.2716429328141264,
213
+ "train_runtime": 832.7663,
214
+ "train_samples_per_second": 48.105,
215
+ "train_steps_per_second": 3.007
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  }
217
  ],
218
+ "max_steps": 2504,
219
+ "num_train_epochs": 4,
220
+ "total_flos": 3.104468219559813e+18,
221
  "trial_name": null,
222
  "trial_params": null
223
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5060ae30078b76a968f70fd0620f572d85a996b86feeb5260ba977bcbdf91db8
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae95784d5a6901f55f2c792e11ade68b7f5871a7f402bd5113f06400a3ea472
3
  size 3899