khleeloo commited on
Commit
38bc06f
1 Parent(s): 0b988b3

Training in progress, epoch 1

Browse files
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 2023-10-21 filter=lfs diff=lfs merge=lfs -text
37
+ at filter=lfs diff=lfs merge=lfs -text
38
+ 21-56-46 filter=lfs diff=lfs merge=lfs -text
39
+ TensorBoard.png filter=lfs diff=lfs merge=lfs -text
40
+ Screenshot filter=lfs diff=lfs merge=lfs -text
41
+ 21-58-51 filter=lfs diff=lfs merge=lfs -text
42
+ 21-59-25 filter=lfs diff=lfs merge=lfs -text
Screenshot 2023-10-21 at 21-56-46 TensorBoard.png ADDED
Screenshot 2023-10-21 at 21-58-51 TensorBoard.png ADDED
Screenshot 2023-10-21 at 21-59-25 TensorBoard.png ADDED
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7473544973544973,
4
- "eval_f1": 0.7453663822696885,
5
- "eval_loss": 0.811197817325592,
6
- "eval_precision": 0.77172935934205,
7
- "eval_recall": 0.7473544973544973,
8
- "eval_runtime": 8.6997,
9
- "eval_samples_per_second": 173.8,
10
- "eval_steps_per_second": 10.92,
11
  "total_flos": 3.104468219559813e+18,
12
- "train_loss": 0.22616184618065174,
13
- "train_runtime": 849.1634,
14
- "train_samples_per_second": 47.176,
15
- "train_steps_per_second": 2.949
16
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7718253968253969,
4
+ "eval_f1": 0.7520737401500063,
5
+ "eval_loss": 0.8010103702545166,
6
+ "eval_precision": 0.7604704688179398,
7
+ "eval_recall": 0.7718253968253969,
8
+ "eval_runtime": 8.8001,
9
+ "eval_samples_per_second": 171.815,
10
+ "eval_steps_per_second": 10.795,
11
  "total_flos": 3.104468219559813e+18,
12
+ "train_loss": 0.06904298590738683,
13
+ "train_runtime": 834.8541,
14
+ "train_samples_per_second": 47.984,
15
+ "train_steps_per_second": 2.999
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7473544973544973,
4
- "eval_f1": 0.7453663822696885,
5
- "eval_loss": 0.811197817325592,
6
- "eval_precision": 0.77172935934205,
7
- "eval_recall": 0.7473544973544973,
8
- "eval_runtime": 8.6997,
9
- "eval_samples_per_second": 173.8,
10
- "eval_steps_per_second": 10.92
11
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7718253968253969,
4
+ "eval_f1": 0.7520737401500063,
5
+ "eval_loss": 0.8010103702545166,
6
+ "eval_precision": 0.7604704688179398,
7
+ "eval_recall": 0.7718253968253969,
8
+ "eval_runtime": 8.8001,
9
+ "eval_samples_per_second": 171.815,
10
+ "eval_steps_per_second": 10.795
11
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56880c5554cc5050f00515e3117129c16ec102a053313893c21b72ddda79daaf
3
  size 343284077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d2d5477ba9db242ca7d11efd60356bfad7daea63b6ac969254a9e5fcee5289
3
  size 343284077
runs/Oct21_13-32-53_EE4E077/events.out.tfevents.1697868948.EE4E077.986268.11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc53c8d917aea24374564c3e501385d59dbfde1f2a39cc168c946ce1dc72748
3
+ size 560
runs/Oct26_14-36-46_EE4E077/1698302209.970762/events.out.tfevents.1698302209.EE4E077.357099.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1d85442bd4444bc32162177b80145b06a655ac0a53929b8218a5e8a9347cef
3
+ size 5913
runs/Oct26_14-36-46_EE4E077/events.out.tfevents.1698302209.EE4E077.357099.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842c37af403c95c822da3cd6449678150989b988e524297d0ca55238eac0ff62
3
+ size 14323
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 3.104468219559813e+18,
4
- "train_loss": 0.22616184618065174,
5
- "train_runtime": 849.1634,
6
- "train_samples_per_second": 47.176,
7
- "train_steps_per_second": 2.949
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 3.104468219559813e+18,
4
+ "train_loss": 0.06904298590738683,
5
+ "train_runtime": 834.8541,
6
+ "train_samples_per_second": 47.984,
7
+ "train_steps_per_second": 2.999
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8549222797927462,
3
  "best_model_checkpoint": "./vit-base-skin/checkpoint-1252",
4
  "epoch": 4.0,
5
  "global_step": 2504,
@@ -10,1559 +10,1559 @@
10
  {
11
  "epoch": 0.02,
12
  "learning_rate": 0.00019920127795527157,
13
- "loss": 0.5383,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.03,
18
  "learning_rate": 0.00019840255591054313,
19
- "loss": 0.5698,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.05,
24
  "learning_rate": 0.00019760383386581472,
25
- "loss": 0.3583,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.06,
30
  "learning_rate": 0.00019680511182108628,
31
- "loss": 0.6847,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.08,
36
  "learning_rate": 0.00019600638977635784,
37
- "loss": 0.4862,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.1,
42
  "learning_rate": 0.0001952076677316294,
43
- "loss": 0.5762,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.11,
48
  "learning_rate": 0.00019440894568690097,
49
- "loss": 0.5177,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.13,
54
  "learning_rate": 0.00019361022364217253,
55
- "loss": 0.5162,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.14,
60
  "learning_rate": 0.0001928115015974441,
61
- "loss": 0.5828,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.16,
66
  "learning_rate": 0.00019201277955271565,
67
- "loss": 0.4072,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.18,
72
  "learning_rate": 0.0001912140575079872,
73
- "loss": 0.6595,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.19,
78
  "learning_rate": 0.0001904153354632588,
79
- "loss": 0.4277,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.21,
84
  "learning_rate": 0.00018961661341853036,
85
- "loss": 0.4526,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.22,
90
  "learning_rate": 0.00018881789137380192,
91
- "loss": 0.4281,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.24,
96
  "learning_rate": 0.00018801916932907348,
97
- "loss": 0.5989,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.26,
102
  "learning_rate": 0.00018722044728434505,
103
- "loss": 0.6671,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.27,
108
  "learning_rate": 0.00018642172523961663,
109
- "loss": 0.4508,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.29,
114
  "learning_rate": 0.0001856230031948882,
115
- "loss": 0.5537,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.3,
120
  "learning_rate": 0.00018482428115015976,
121
- "loss": 0.3831,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.32,
126
  "learning_rate": 0.00018402555910543132,
127
- "loss": 0.636,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.34,
132
  "learning_rate": 0.00018322683706070288,
133
- "loss": 0.4598,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.35,
138
  "learning_rate": 0.00018242811501597444,
139
- "loss": 0.4891,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.37,
144
  "learning_rate": 0.000181629392971246,
145
- "loss": 0.4803,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.38,
150
  "learning_rate": 0.00018083067092651756,
151
- "loss": 0.4423,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.4,
156
  "learning_rate": 0.00018003194888178913,
157
- "loss": 0.3538,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.42,
162
  "learning_rate": 0.00017923322683706071,
163
- "loss": 0.4499,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.43,
168
  "learning_rate": 0.00017843450479233228,
169
- "loss": 0.6426,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.45,
174
  "learning_rate": 0.00017763578274760384,
175
- "loss": 0.4676,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.46,
180
  "learning_rate": 0.0001768370607028754,
181
- "loss": 0.4819,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.48,
186
  "learning_rate": 0.000176038338658147,
187
- "loss": 0.3673,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.5,
192
  "learning_rate": 0.00017523961661341855,
193
- "loss": 0.4349,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.51,
198
  "learning_rate": 0.0001744408945686901,
199
- "loss": 0.34,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.53,
204
  "learning_rate": 0.00017364217252396167,
205
- "loss": 0.4458,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.54,
210
  "learning_rate": 0.00017284345047923323,
211
- "loss": 0.5578,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.56,
216
  "learning_rate": 0.00017204472843450482,
217
- "loss": 0.4578,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.58,
222
  "learning_rate": 0.00017124600638977638,
223
- "loss": 0.3579,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.59,
228
  "learning_rate": 0.00017044728434504792,
229
- "loss": 0.372,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.61,
234
  "learning_rate": 0.00016964856230031948,
235
- "loss": 0.5132,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 0.62,
240
  "learning_rate": 0.00016884984025559104,
241
- "loss": 0.4106,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 0.64,
246
  "learning_rate": 0.00016805111821086263,
247
- "loss": 0.589,
248
  "step": 400
249
  },
250
  {
251
  "epoch": 0.65,
252
  "learning_rate": 0.0001672523961661342,
253
- "loss": 0.3826,
254
  "step": 410
255
  },
256
  {
257
  "epoch": 0.67,
258
  "learning_rate": 0.00016645367412140575,
259
- "loss": 0.4216,
260
  "step": 420
261
  },
262
  {
263
  "epoch": 0.69,
264
  "learning_rate": 0.0001656549520766773,
265
- "loss": 0.5338,
266
  "step": 430
267
  },
268
  {
269
  "epoch": 0.7,
270
  "learning_rate": 0.0001648562300319489,
271
- "loss": 0.4471,
272
  "step": 440
273
  },
274
  {
275
  "epoch": 0.72,
276
  "learning_rate": 0.00016405750798722046,
277
- "loss": 0.3129,
278
  "step": 450
279
  },
280
  {
281
  "epoch": 0.73,
282
  "learning_rate": 0.00016325878594249202,
283
- "loss": 0.2749,
284
  "step": 460
285
  },
286
  {
287
  "epoch": 0.75,
288
  "learning_rate": 0.00016246006389776359,
289
- "loss": 0.4442,
290
  "step": 470
291
  },
292
  {
293
  "epoch": 0.77,
294
  "learning_rate": 0.00016166134185303515,
295
- "loss": 0.386,
296
  "step": 480
297
  },
298
  {
299
  "epoch": 0.78,
300
  "learning_rate": 0.00016086261980830674,
301
- "loss": 0.51,
302
  "step": 490
303
  },
304
  {
305
  "epoch": 0.8,
306
  "learning_rate": 0.0001600638977635783,
307
- "loss": 0.4706,
308
  "step": 500
309
  },
310
  {
311
  "epoch": 0.81,
312
  "learning_rate": 0.00015926517571884986,
313
- "loss": 0.4583,
314
  "step": 510
315
  },
316
  {
317
  "epoch": 0.83,
318
  "learning_rate": 0.00015846645367412142,
319
- "loss": 0.4591,
320
  "step": 520
321
  },
322
  {
323
  "epoch": 0.85,
324
  "learning_rate": 0.00015766773162939298,
325
- "loss": 0.3957,
326
  "step": 530
327
  },
328
  {
329
  "epoch": 0.86,
330
  "learning_rate": 0.00015686900958466454,
331
- "loss": 0.6497,
332
  "step": 540
333
  },
334
  {
335
  "epoch": 0.88,
336
  "learning_rate": 0.0001560702875399361,
337
- "loss": 0.534,
338
  "step": 550
339
  },
340
  {
341
  "epoch": 0.89,
342
  "learning_rate": 0.00015527156549520767,
343
- "loss": 0.4774,
344
  "step": 560
345
  },
346
  {
347
  "epoch": 0.91,
348
  "learning_rate": 0.00015447284345047923,
349
- "loss": 0.447,
350
  "step": 570
351
  },
352
  {
353
  "epoch": 0.93,
354
  "learning_rate": 0.00015367412140575082,
355
- "loss": 0.4498,
356
  "step": 580
357
  },
358
  {
359
  "epoch": 0.94,
360
  "learning_rate": 0.00015287539936102238,
361
- "loss": 0.4832,
362
  "step": 590
363
  },
364
  {
365
  "epoch": 0.96,
366
  "learning_rate": 0.00015207667731629394,
367
- "loss": 0.4865,
368
  "step": 600
369
  },
370
  {
371
  "epoch": 0.97,
372
  "learning_rate": 0.0001512779552715655,
373
- "loss": 0.6716,
374
  "step": 610
375
  },
376
  {
377
  "epoch": 0.99,
378
  "learning_rate": 0.00015047923322683706,
379
- "loss": 0.4385,
380
  "step": 620
381
  },
382
  {
383
  "epoch": 1.0,
384
- "eval_accuracy": 0.8290155440414507,
385
- "eval_f1": 0.8273409319195236,
386
- "eval_loss": 0.4796123206615448,
387
- "eval_precision": 0.8726260532112408,
388
- "eval_recall": 0.8290155440414507,
389
- "eval_runtime": 1.3121,
390
- "eval_samples_per_second": 147.095,
391
- "eval_steps_per_second": 9.908,
392
  "step": 626
393
  },
394
  {
395
  "epoch": 1.01,
396
  "learning_rate": 0.00014968051118210865,
397
- "loss": 0.4837,
398
  "step": 630
399
  },
400
  {
401
  "epoch": 1.02,
402
  "learning_rate": 0.0001488817891373802,
403
- "loss": 0.2649,
404
  "step": 640
405
  },
406
  {
407
  "epoch": 1.04,
408
  "learning_rate": 0.00014808306709265177,
409
- "loss": 0.3303,
410
  "step": 650
411
  },
412
  {
413
  "epoch": 1.05,
414
  "learning_rate": 0.00014728434504792333,
415
- "loss": 0.3935,
416
  "step": 660
417
  },
418
  {
419
  "epoch": 1.07,
420
  "learning_rate": 0.0001464856230031949,
421
- "loss": 0.2925,
422
  "step": 670
423
  },
424
  {
425
  "epoch": 1.09,
426
  "learning_rate": 0.00014568690095846646,
427
- "loss": 0.3297,
428
  "step": 680
429
  },
430
  {
431
  "epoch": 1.1,
432
  "learning_rate": 0.00014488817891373802,
433
- "loss": 0.3201,
434
  "step": 690
435
  },
436
  {
437
  "epoch": 1.12,
438
  "learning_rate": 0.00014408945686900958,
439
- "loss": 0.1702,
440
  "step": 700
441
  },
442
  {
443
  "epoch": 1.13,
444
  "learning_rate": 0.00014329073482428114,
445
- "loss": 0.3169,
446
  "step": 710
447
  },
448
  {
449
  "epoch": 1.15,
450
  "learning_rate": 0.00014249201277955273,
451
- "loss": 0.2407,
452
  "step": 720
453
  },
454
  {
455
  "epoch": 1.17,
456
  "learning_rate": 0.0001416932907348243,
457
- "loss": 0.2436,
458
  "step": 730
459
  },
460
  {
461
  "epoch": 1.18,
462
  "learning_rate": 0.00014089456869009585,
463
- "loss": 0.2152,
464
  "step": 740
465
  },
466
  {
467
  "epoch": 1.2,
468
  "learning_rate": 0.0001400958466453674,
469
- "loss": 0.249,
470
  "step": 750
471
  },
472
  {
473
  "epoch": 1.21,
474
  "learning_rate": 0.000139297124600639,
475
- "loss": 0.2679,
476
  "step": 760
477
  },
478
  {
479
  "epoch": 1.23,
480
  "learning_rate": 0.00013849840255591056,
481
- "loss": 0.1624,
482
  "step": 770
483
  },
484
  {
485
  "epoch": 1.25,
486
  "learning_rate": 0.00013769968051118212,
487
- "loss": 0.2803,
488
  "step": 780
489
  },
490
  {
491
  "epoch": 1.26,
492
  "learning_rate": 0.00013690095846645369,
493
- "loss": 0.416,
494
  "step": 790
495
  },
496
  {
497
  "epoch": 1.28,
498
  "learning_rate": 0.00013610223642172525,
499
- "loss": 0.28,
500
  "step": 800
501
  },
502
  {
503
  "epoch": 1.29,
504
  "learning_rate": 0.0001353035143769968,
505
- "loss": 0.3834,
506
  "step": 810
507
  },
508
  {
509
  "epoch": 1.31,
510
  "learning_rate": 0.00013450479233226837,
511
- "loss": 0.3292,
512
  "step": 820
513
  },
514
  {
515
  "epoch": 1.33,
516
  "learning_rate": 0.00013370607028753993,
517
- "loss": 0.2428,
518
  "step": 830
519
  },
520
  {
521
  "epoch": 1.34,
522
  "learning_rate": 0.0001329073482428115,
523
- "loss": 0.268,
524
  "step": 840
525
  },
526
  {
527
  "epoch": 1.36,
528
  "learning_rate": 0.00013210862619808305,
529
- "loss": 0.215,
530
  "step": 850
531
  },
532
  {
533
  "epoch": 1.37,
534
  "learning_rate": 0.00013130990415335464,
535
- "loss": 0.4138,
536
  "step": 860
537
  },
538
  {
539
  "epoch": 1.39,
540
  "learning_rate": 0.0001305111821086262,
541
- "loss": 0.3971,
542
  "step": 870
543
  },
544
  {
545
  "epoch": 1.41,
546
  "learning_rate": 0.00012971246006389777,
547
- "loss": 0.227,
548
  "step": 880
549
  },
550
  {
551
  "epoch": 1.42,
552
  "learning_rate": 0.00012891373801916933,
553
- "loss": 0.3238,
554
  "step": 890
555
  },
556
  {
557
  "epoch": 1.44,
558
  "learning_rate": 0.00012811501597444092,
559
- "loss": 0.2183,
560
  "step": 900
561
  },
562
  {
563
  "epoch": 1.45,
564
  "learning_rate": 0.00012731629392971248,
565
- "loss": 0.2531,
566
  "step": 910
567
  },
568
  {
569
  "epoch": 1.47,
570
  "learning_rate": 0.00012651757188498404,
571
- "loss": 0.3341,
572
  "step": 920
573
  },
574
  {
575
  "epoch": 1.49,
576
  "learning_rate": 0.0001257188498402556,
577
- "loss": 0.2685,
578
  "step": 930
579
  },
580
  {
581
  "epoch": 1.5,
582
  "learning_rate": 0.00012492012779552716,
583
- "loss": 0.3369,
584
  "step": 940
585
  },
586
  {
587
  "epoch": 1.52,
588
  "learning_rate": 0.00012412140575079872,
589
- "loss": 0.3655,
590
  "step": 950
591
  },
592
  {
593
  "epoch": 1.53,
594
  "learning_rate": 0.00012332268370607028,
595
- "loss": 0.2675,
596
  "step": 960
597
  },
598
  {
599
  "epoch": 1.55,
600
  "learning_rate": 0.00012252396166134185,
601
- "loss": 0.3366,
602
  "step": 970
603
  },
604
  {
605
  "epoch": 1.57,
606
  "learning_rate": 0.00012172523961661342,
607
- "loss": 0.2758,
608
  "step": 980
609
  },
610
  {
611
  "epoch": 1.58,
612
  "learning_rate": 0.000120926517571885,
613
- "loss": 0.3437,
614
  "step": 990
615
  },
616
  {
617
  "epoch": 1.6,
618
  "learning_rate": 0.00012012779552715656,
619
- "loss": 0.3037,
620
  "step": 1000
621
  },
622
  {
623
  "epoch": 1.61,
624
  "learning_rate": 0.00011932907348242812,
625
- "loss": 0.2772,
626
  "step": 1010
627
  },
628
  {
629
  "epoch": 1.63,
630
  "learning_rate": 0.00011853035143769968,
631
- "loss": 0.2468,
632
  "step": 1020
633
  },
634
  {
635
  "epoch": 1.65,
636
  "learning_rate": 0.00011773162939297124,
637
- "loss": 0.2655,
638
  "step": 1030
639
  },
640
  {
641
  "epoch": 1.66,
642
  "learning_rate": 0.00011693290734824283,
643
- "loss": 0.2848,
644
  "step": 1040
645
  },
646
  {
647
  "epoch": 1.68,
648
  "learning_rate": 0.00011613418530351439,
649
- "loss": 0.3987,
650
  "step": 1050
651
  },
652
  {
653
  "epoch": 1.69,
654
  "learning_rate": 0.00011533546325878595,
655
- "loss": 0.3305,
656
  "step": 1060
657
  },
658
  {
659
  "epoch": 1.71,
660
  "learning_rate": 0.00011453674121405751,
661
- "loss": 0.3217,
662
  "step": 1070
663
  },
664
  {
665
  "epoch": 1.73,
666
  "learning_rate": 0.00011373801916932908,
667
- "loss": 0.2892,
668
  "step": 1080
669
  },
670
  {
671
  "epoch": 1.74,
672
  "learning_rate": 0.00011293929712460065,
673
- "loss": 0.1542,
674
  "step": 1090
675
  },
676
  {
677
  "epoch": 1.76,
678
  "learning_rate": 0.00011214057507987221,
679
- "loss": 0.2328,
680
  "step": 1100
681
  },
682
  {
683
  "epoch": 1.77,
684
  "learning_rate": 0.00011134185303514377,
685
- "loss": 0.3839,
686
  "step": 1110
687
  },
688
  {
689
  "epoch": 1.79,
690
  "learning_rate": 0.00011054313099041533,
691
- "loss": 0.2766,
692
  "step": 1120
693
  },
694
  {
695
  "epoch": 1.81,
696
  "learning_rate": 0.00010974440894568691,
697
- "loss": 0.1802,
698
  "step": 1130
699
  },
700
  {
701
  "epoch": 1.82,
702
  "learning_rate": 0.00010894568690095847,
703
- "loss": 0.2725,
704
  "step": 1140
705
  },
706
  {
707
  "epoch": 1.84,
708
  "learning_rate": 0.00010814696485623003,
709
- "loss": 0.3496,
710
  "step": 1150
711
  },
712
  {
713
  "epoch": 1.85,
714
  "learning_rate": 0.0001073482428115016,
715
- "loss": 0.2154,
716
  "step": 1160
717
  },
718
  {
719
  "epoch": 1.87,
720
  "learning_rate": 0.00010654952076677316,
721
- "loss": 0.2662,
722
  "step": 1170
723
  },
724
  {
725
  "epoch": 1.88,
726
  "learning_rate": 0.00010575079872204474,
727
- "loss": 0.3407,
728
  "step": 1180
729
  },
730
  {
731
  "epoch": 1.9,
732
  "learning_rate": 0.0001049520766773163,
733
- "loss": 0.1935,
734
  "step": 1190
735
  },
736
  {
737
  "epoch": 1.92,
738
  "learning_rate": 0.00010415335463258787,
739
- "loss": 0.2008,
740
  "step": 1200
741
  },
742
  {
743
  "epoch": 1.93,
744
  "learning_rate": 0.00010335463258785943,
745
- "loss": 0.2427,
746
  "step": 1210
747
  },
748
  {
749
  "epoch": 1.95,
750
  "learning_rate": 0.000102555910543131,
751
- "loss": 0.1932,
752
  "step": 1220
753
  },
754
  {
755
  "epoch": 1.96,
756
  "learning_rate": 0.00010175718849840256,
757
- "loss": 0.1283,
758
  "step": 1230
759
  },
760
  {
761
  "epoch": 1.98,
762
  "learning_rate": 0.00010095846645367413,
763
- "loss": 0.2633,
764
  "step": 1240
765
  },
766
  {
767
  "epoch": 2.0,
768
  "learning_rate": 0.00010015974440894569,
769
- "loss": 0.3573,
770
  "step": 1250
771
  },
772
  {
773
  "epoch": 2.0,
774
- "eval_accuracy": 0.8549222797927462,
775
- "eval_f1": 0.8581374309775321,
776
- "eval_loss": 0.47454866766929626,
777
- "eval_precision": 0.865116673519467,
778
- "eval_recall": 0.8549222797927462,
779
- "eval_runtime": 1.2953,
780
- "eval_samples_per_second": 149.002,
781
- "eval_steps_per_second": 10.036,
782
  "step": 1252
783
  },
784
  {
785
  "epoch": 2.01,
786
  "learning_rate": 9.936102236421726e-05,
787
- "loss": 0.1711,
788
  "step": 1260
789
  },
790
  {
791
  "epoch": 2.03,
792
  "learning_rate": 9.856230031948882e-05,
793
- "loss": 0.0696,
794
  "step": 1270
795
  },
796
  {
797
  "epoch": 2.04,
798
  "learning_rate": 9.77635782747604e-05,
799
- "loss": 0.1372,
800
  "step": 1280
801
  },
802
  {
803
  "epoch": 2.06,
804
  "learning_rate": 9.696485623003195e-05,
805
- "loss": 0.119,
806
  "step": 1290
807
  },
808
  {
809
  "epoch": 2.08,
810
  "learning_rate": 9.616613418530351e-05,
811
- "loss": 0.0787,
812
  "step": 1300
813
  },
814
  {
815
  "epoch": 2.09,
816
  "learning_rate": 9.536741214057508e-05,
817
- "loss": 0.0853,
818
  "step": 1310
819
  },
820
  {
821
  "epoch": 2.11,
822
  "learning_rate": 9.456869009584664e-05,
823
- "loss": 0.1297,
824
  "step": 1320
825
  },
826
  {
827
  "epoch": 2.12,
828
  "learning_rate": 9.376996805111822e-05,
829
- "loss": 0.1096,
830
  "step": 1330
831
  },
832
  {
833
  "epoch": 2.14,
834
  "learning_rate": 9.297124600638978e-05,
835
- "loss": 0.0911,
836
  "step": 1340
837
  },
838
  {
839
  "epoch": 2.16,
840
  "learning_rate": 9.217252396166136e-05,
841
- "loss": 0.0269,
842
  "step": 1350
843
  },
844
  {
845
  "epoch": 2.17,
846
  "learning_rate": 9.137380191693292e-05,
847
- "loss": 0.1667,
848
  "step": 1360
849
  },
850
  {
851
  "epoch": 2.19,
852
  "learning_rate": 9.057507987220448e-05,
853
- "loss": 0.0504,
854
  "step": 1370
855
  },
856
  {
857
  "epoch": 2.2,
858
  "learning_rate": 8.977635782747604e-05,
859
- "loss": 0.0808,
860
  "step": 1380
861
  },
862
  {
863
  "epoch": 2.22,
864
  "learning_rate": 8.89776357827476e-05,
865
- "loss": 0.1603,
866
  "step": 1390
867
  },
868
  {
869
  "epoch": 2.24,
870
  "learning_rate": 8.817891373801918e-05,
871
- "loss": 0.0823,
872
  "step": 1400
873
  },
874
  {
875
  "epoch": 2.25,
876
  "learning_rate": 8.738019169329074e-05,
877
- "loss": 0.1164,
878
  "step": 1410
879
  },
880
  {
881
  "epoch": 2.27,
882
  "learning_rate": 8.658146964856231e-05,
883
- "loss": 0.1506,
884
  "step": 1420
885
  },
886
  {
887
  "epoch": 2.28,
888
  "learning_rate": 8.578274760383387e-05,
889
- "loss": 0.0774,
890
  "step": 1430
891
  },
892
  {
893
  "epoch": 2.3,
894
  "learning_rate": 8.498402555910544e-05,
895
- "loss": 0.0767,
896
  "step": 1440
897
  },
898
  {
899
  "epoch": 2.32,
900
  "learning_rate": 8.4185303514377e-05,
901
- "loss": 0.1991,
902
  "step": 1450
903
  },
904
  {
905
  "epoch": 2.33,
906
  "learning_rate": 8.338658146964856e-05,
907
- "loss": 0.1302,
908
  "step": 1460
909
  },
910
  {
911
  "epoch": 2.35,
912
  "learning_rate": 8.258785942492013e-05,
913
- "loss": 0.0911,
914
  "step": 1470
915
  },
916
  {
917
  "epoch": 2.36,
918
  "learning_rate": 8.17891373801917e-05,
919
- "loss": 0.317,
920
  "step": 1480
921
  },
922
  {
923
  "epoch": 2.38,
924
  "learning_rate": 8.099041533546327e-05,
925
- "loss": 0.07,
926
  "step": 1490
927
  },
928
  {
929
  "epoch": 2.4,
930
  "learning_rate": 8.019169329073483e-05,
931
- "loss": 0.0713,
932
  "step": 1500
933
  },
934
  {
935
  "epoch": 2.41,
936
  "learning_rate": 7.939297124600639e-05,
937
- "loss": 0.1588,
938
  "step": 1510
939
  },
940
  {
941
  "epoch": 2.43,
942
  "learning_rate": 7.859424920127795e-05,
943
- "loss": 0.1898,
944
  "step": 1520
945
  },
946
  {
947
  "epoch": 2.44,
948
  "learning_rate": 7.779552715654951e-05,
949
- "loss": 0.0744,
950
  "step": 1530
951
  },
952
  {
953
  "epoch": 2.46,
954
  "learning_rate": 7.699680511182109e-05,
955
- "loss": 0.1182,
956
  "step": 1540
957
  },
958
  {
959
  "epoch": 2.48,
960
  "learning_rate": 7.619808306709265e-05,
961
- "loss": 0.0842,
962
  "step": 1550
963
  },
964
  {
965
  "epoch": 2.49,
966
  "learning_rate": 7.539936102236423e-05,
967
- "loss": 0.1308,
968
  "step": 1560
969
  },
970
  {
971
  "epoch": 2.51,
972
  "learning_rate": 7.460063897763579e-05,
973
- "loss": 0.1424,
974
  "step": 1570
975
  },
976
  {
977
  "epoch": 2.52,
978
  "learning_rate": 7.380191693290735e-05,
979
- "loss": 0.0852,
980
  "step": 1580
981
  },
982
  {
983
  "epoch": 2.54,
984
  "learning_rate": 7.300319488817891e-05,
985
- "loss": 0.1689,
986
  "step": 1590
987
  },
988
  {
989
  "epoch": 2.56,
990
  "learning_rate": 7.220447284345049e-05,
991
- "loss": 0.1009,
992
  "step": 1600
993
  },
994
  {
995
  "epoch": 2.57,
996
  "learning_rate": 7.140575079872205e-05,
997
- "loss": 0.0491,
998
  "step": 1610
999
  },
1000
  {
1001
  "epoch": 2.59,
1002
  "learning_rate": 7.060702875399361e-05,
1003
- "loss": 0.0986,
1004
  "step": 1620
1005
  },
1006
  {
1007
  "epoch": 2.6,
1008
  "learning_rate": 6.980830670926518e-05,
1009
- "loss": 0.182,
1010
  "step": 1630
1011
  },
1012
  {
1013
  "epoch": 2.62,
1014
  "learning_rate": 6.900958466453674e-05,
1015
- "loss": 0.1219,
1016
  "step": 1640
1017
  },
1018
  {
1019
  "epoch": 2.64,
1020
  "learning_rate": 6.821086261980832e-05,
1021
- "loss": 0.147,
1022
  "step": 1650
1023
  },
1024
  {
1025
  "epoch": 2.65,
1026
  "learning_rate": 6.741214057507987e-05,
1027
- "loss": 0.1033,
1028
  "step": 1660
1029
  },
1030
  {
1031
  "epoch": 2.67,
1032
  "learning_rate": 6.661341853035144e-05,
1033
- "loss": 0.2159,
1034
  "step": 1670
1035
  },
1036
  {
1037
  "epoch": 2.68,
1038
  "learning_rate": 6.5814696485623e-05,
1039
- "loss": 0.0785,
1040
  "step": 1680
1041
  },
1042
  {
1043
  "epoch": 2.7,
1044
  "learning_rate": 6.501597444089457e-05,
1045
- "loss": 0.216,
1046
  "step": 1690
1047
  },
1048
  {
1049
  "epoch": 2.72,
1050
  "learning_rate": 6.421725239616614e-05,
1051
- "loss": 0.0612,
1052
  "step": 1700
1053
  },
1054
  {
1055
  "epoch": 2.73,
1056
  "learning_rate": 6.34185303514377e-05,
1057
- "loss": 0.1087,
1058
  "step": 1710
1059
  },
1060
  {
1061
  "epoch": 2.75,
1062
  "learning_rate": 6.261980830670928e-05,
1063
- "loss": 0.1114,
1064
  "step": 1720
1065
  },
1066
  {
1067
  "epoch": 2.76,
1068
  "learning_rate": 6.182108626198084e-05,
1069
- "loss": 0.1242,
1070
  "step": 1730
1071
  },
1072
  {
1073
  "epoch": 2.78,
1074
  "learning_rate": 6.1022364217252406e-05,
1075
- "loss": 0.1241,
1076
  "step": 1740
1077
  },
1078
  {
1079
  "epoch": 2.8,
1080
  "learning_rate": 6.022364217252396e-05,
1081
- "loss": 0.1452,
1082
  "step": 1750
1083
  },
1084
  {
1085
  "epoch": 2.81,
1086
  "learning_rate": 5.942492012779552e-05,
1087
- "loss": 0.105,
1088
  "step": 1760
1089
  },
1090
  {
1091
  "epoch": 2.83,
1092
  "learning_rate": 5.86261980830671e-05,
1093
- "loss": 0.1193,
1094
  "step": 1770
1095
  },
1096
  {
1097
  "epoch": 2.84,
1098
  "learning_rate": 5.782747603833866e-05,
1099
- "loss": 0.0811,
1100
  "step": 1780
1101
  },
1102
  {
1103
  "epoch": 2.86,
1104
  "learning_rate": 5.702875399361023e-05,
1105
- "loss": 0.1103,
1106
  "step": 1790
1107
  },
1108
  {
1109
  "epoch": 2.88,
1110
  "learning_rate": 5.623003194888179e-05,
1111
- "loss": 0.0628,
1112
  "step": 1800
1113
  },
1114
  {
1115
  "epoch": 2.89,
1116
  "learning_rate": 5.543130990415336e-05,
1117
- "loss": 0.1192,
1118
  "step": 1810
1119
  },
1120
  {
1121
  "epoch": 2.91,
1122
  "learning_rate": 5.4632587859424925e-05,
1123
- "loss": 0.1218,
1124
  "step": 1820
1125
  },
1126
  {
1127
  "epoch": 2.92,
1128
  "learning_rate": 5.383386581469649e-05,
1129
- "loss": 0.0354,
1130
  "step": 1830
1131
  },
1132
  {
1133
  "epoch": 2.94,
1134
  "learning_rate": 5.3035143769968054e-05,
1135
- "loss": 0.0451,
1136
  "step": 1840
1137
  },
1138
  {
1139
  "epoch": 2.96,
1140
  "learning_rate": 5.2236421725239616e-05,
1141
- "loss": 0.0309,
1142
  "step": 1850
1143
  },
1144
  {
1145
  "epoch": 2.97,
1146
  "learning_rate": 5.1437699680511184e-05,
1147
- "loss": 0.125,
1148
  "step": 1860
1149
  },
1150
  {
1151
  "epoch": 2.99,
1152
  "learning_rate": 5.0638977635782745e-05,
1153
- "loss": 0.1444,
1154
  "step": 1870
1155
  },
1156
  {
1157
  "epoch": 3.0,
1158
- "eval_accuracy": 0.8393782383419689,
1159
- "eval_f1": 0.8405556056199831,
1160
- "eval_loss": 0.6085613965988159,
1161
- "eval_precision": 0.8439944536071767,
1162
- "eval_recall": 0.8393782383419689,
1163
- "eval_runtime": 1.2428,
1164
- "eval_samples_per_second": 155.294,
1165
- "eval_steps_per_second": 10.46,
1166
  "step": 1878
1167
  },
1168
  {
1169
  "epoch": 3.0,
1170
  "learning_rate": 4.984025559105431e-05,
1171
- "loss": 0.0369,
1172
  "step": 1880
1173
  },
1174
  {
1175
  "epoch": 3.02,
1176
  "learning_rate": 4.904153354632588e-05,
1177
- "loss": 0.0399,
1178
  "step": 1890
1179
  },
1180
  {
1181
  "epoch": 3.04,
1182
  "learning_rate": 4.824281150159744e-05,
1183
- "loss": 0.0528,
1184
  "step": 1900
1185
  },
1186
  {
1187
  "epoch": 3.05,
1188
  "learning_rate": 4.744408945686901e-05,
1189
- "loss": 0.0718,
1190
  "step": 1910
1191
  },
1192
  {
1193
  "epoch": 3.07,
1194
  "learning_rate": 4.664536741214058e-05,
1195
- "loss": 0.054,
1196
  "step": 1920
1197
  },
1198
  {
1199
  "epoch": 3.08,
1200
  "learning_rate": 4.584664536741215e-05,
1201
- "loss": 0.0249,
1202
  "step": 1930
1203
  },
1204
  {
1205
  "epoch": 3.1,
1206
  "learning_rate": 4.504792332268371e-05,
1207
- "loss": 0.0067,
1208
  "step": 1940
1209
  },
1210
  {
1211
  "epoch": 3.12,
1212
  "learning_rate": 4.424920127795527e-05,
1213
- "loss": 0.0265,
1214
  "step": 1950
1215
  },
1216
  {
1217
  "epoch": 3.13,
1218
  "learning_rate": 4.345047923322684e-05,
1219
- "loss": 0.069,
1220
  "step": 1960
1221
  },
1222
  {
1223
  "epoch": 3.15,
1224
  "learning_rate": 4.265175718849841e-05,
1225
- "loss": 0.0069,
1226
  "step": 1970
1227
  },
1228
  {
1229
  "epoch": 3.16,
1230
  "learning_rate": 4.185303514376997e-05,
1231
- "loss": 0.0232,
1232
  "step": 1980
1233
  },
1234
  {
1235
  "epoch": 3.18,
1236
  "learning_rate": 4.1054313099041536e-05,
1237
- "loss": 0.0166,
1238
  "step": 1990
1239
  },
1240
  {
1241
  "epoch": 3.19,
1242
  "learning_rate": 4.0255591054313104e-05,
1243
- "loss": 0.0297,
1244
  "step": 2000
1245
  },
1246
  {
1247
  "epoch": 3.21,
1248
  "learning_rate": 3.9456869009584666e-05,
1249
- "loss": 0.0146,
1250
  "step": 2010
1251
  },
1252
  {
1253
  "epoch": 3.23,
1254
  "learning_rate": 3.8658146964856234e-05,
1255
- "loss": 0.009,
1256
  "step": 2020
1257
  },
1258
  {
1259
  "epoch": 3.24,
1260
  "learning_rate": 3.7859424920127795e-05,
1261
- "loss": 0.0734,
1262
  "step": 2030
1263
  },
1264
  {
1265
  "epoch": 3.26,
1266
  "learning_rate": 3.7060702875399364e-05,
1267
- "loss": 0.0522,
1268
  "step": 2040
1269
  },
1270
  {
1271
  "epoch": 3.27,
1272
  "learning_rate": 3.6261980830670925e-05,
1273
- "loss": 0.0148,
1274
  "step": 2050
1275
  },
1276
  {
1277
  "epoch": 3.29,
1278
  "learning_rate": 3.546325878594249e-05,
1279
- "loss": 0.0148,
1280
  "step": 2060
1281
  },
1282
  {
1283
  "epoch": 3.31,
1284
  "learning_rate": 3.466453674121406e-05,
1285
- "loss": 0.007,
1286
  "step": 2070
1287
  },
1288
  {
1289
  "epoch": 3.32,
1290
  "learning_rate": 3.386581469648562e-05,
1291
- "loss": 0.0156,
1292
  "step": 2080
1293
  },
1294
  {
1295
  "epoch": 3.34,
1296
  "learning_rate": 3.306709265175719e-05,
1297
- "loss": 0.0415,
1298
  "step": 2090
1299
  },
1300
  {
1301
  "epoch": 3.35,
1302
  "learning_rate": 3.226837060702875e-05,
1303
- "loss": 0.0316,
1304
  "step": 2100
1305
  },
1306
  {
1307
  "epoch": 3.37,
1308
  "learning_rate": 3.146964856230032e-05,
1309
- "loss": 0.0583,
1310
  "step": 2110
1311
  },
1312
  {
1313
  "epoch": 3.39,
1314
  "learning_rate": 3.067092651757188e-05,
1315
- "loss": 0.0201,
1316
  "step": 2120
1317
  },
1318
  {
1319
  "epoch": 3.4,
1320
  "learning_rate": 2.987220447284345e-05,
1321
- "loss": 0.0251,
1322
  "step": 2130
1323
  },
1324
  {
1325
  "epoch": 3.42,
1326
  "learning_rate": 2.907348242811502e-05,
1327
- "loss": 0.006,
1328
  "step": 2140
1329
  },
1330
  {
1331
  "epoch": 3.43,
1332
  "learning_rate": 2.8274760383386583e-05,
1333
- "loss": 0.0359,
1334
  "step": 2150
1335
  },
1336
  {
1337
  "epoch": 3.45,
1338
  "learning_rate": 2.747603833865815e-05,
1339
- "loss": 0.0064,
1340
  "step": 2160
1341
  },
1342
  {
1343
  "epoch": 3.47,
1344
- "learning_rate": 2.6757188498402557e-05,
1345
- "loss": 0.0314,
1346
  "step": 2170
1347
  },
1348
  {
1349
  "epoch": 3.48,
1350
- "learning_rate": 2.5958466453674125e-05,
1351
- "loss": 0.0323,
1352
  "step": 2180
1353
  },
1354
  {
1355
  "epoch": 3.5,
1356
- "learning_rate": 2.515974440894569e-05,
1357
- "loss": 0.0374,
1358
  "step": 2190
1359
  },
1360
  {
1361
  "epoch": 3.51,
1362
- "learning_rate": 2.4361022364217255e-05,
1363
- "loss": 0.0063,
1364
  "step": 2200
1365
  },
1366
  {
1367
  "epoch": 3.53,
1368
- "learning_rate": 2.356230031948882e-05,
1369
- "loss": 0.0232,
1370
  "step": 2210
1371
  },
1372
  {
1373
  "epoch": 3.55,
1374
- "learning_rate": 2.2763578274760385e-05,
1375
- "loss": 0.033,
1376
  "step": 2220
1377
  },
1378
  {
1379
  "epoch": 3.56,
1380
- "learning_rate": 2.196485623003195e-05,
1381
- "loss": 0.0326,
1382
  "step": 2230
1383
  },
1384
  {
1385
  "epoch": 3.58,
1386
- "learning_rate": 2.1166134185303514e-05,
1387
- "loss": 0.0158,
1388
  "step": 2240
1389
  },
1390
  {
1391
  "epoch": 3.59,
1392
- "learning_rate": 2.0367412140575082e-05,
1393
- "loss": 0.0426,
1394
  "step": 2250
1395
  },
1396
  {
1397
  "epoch": 3.61,
1398
- "learning_rate": 1.9568690095846644e-05,
1399
- "loss": 0.0686,
1400
  "step": 2260
1401
  },
1402
  {
1403
  "epoch": 3.63,
1404
- "learning_rate": 1.8769968051118212e-05,
1405
- "loss": 0.0376,
1406
  "step": 2270
1407
  },
1408
  {
1409
  "epoch": 3.64,
1410
- "learning_rate": 1.7971246006389777e-05,
1411
- "loss": 0.0085,
1412
  "step": 2280
1413
  },
1414
  {
1415
  "epoch": 3.66,
1416
- "learning_rate": 1.7172523961661345e-05,
1417
- "loss": 0.0102,
1418
  "step": 2290
1419
  },
1420
  {
1421
  "epoch": 3.67,
1422
- "learning_rate": 1.6373801916932906e-05,
1423
- "loss": 0.0055,
1424
  "step": 2300
1425
  },
1426
  {
1427
  "epoch": 3.69,
1428
- "learning_rate": 1.5575079872204475e-05,
1429
- "loss": 0.0407,
1430
  "step": 2310
1431
  },
1432
  {
1433
  "epoch": 3.71,
1434
- "learning_rate": 1.477635782747604e-05,
1435
- "loss": 0.0779,
1436
  "step": 2320
1437
  },
1438
  {
1439
  "epoch": 3.72,
1440
- "learning_rate": 1.3977635782747606e-05,
1441
- "loss": 0.0507,
1442
  "step": 2330
1443
  },
1444
  {
1445
  "epoch": 3.74,
1446
- "learning_rate": 1.3178913738019169e-05,
1447
- "loss": 0.0124,
1448
  "step": 2340
1449
  },
1450
  {
1451
  "epoch": 3.75,
1452
- "learning_rate": 1.2380191693290735e-05,
1453
- "loss": 0.0082,
1454
  "step": 2350
1455
  },
1456
  {
1457
  "epoch": 3.77,
1458
- "learning_rate": 1.1581469648562302e-05,
1459
- "loss": 0.0523,
1460
  "step": 2360
1461
  },
1462
  {
1463
  "epoch": 3.79,
1464
- "learning_rate": 1.0782747603833867e-05,
1465
- "loss": 0.0058,
1466
  "step": 2370
1467
  },
1468
  {
1469
  "epoch": 3.8,
1470
- "learning_rate": 9.984025559105432e-06,
1471
- "loss": 0.014,
1472
  "step": 2380
1473
  },
1474
  {
1475
  "epoch": 3.82,
1476
- "learning_rate": 9.185303514376996e-06,
1477
- "loss": 0.0075,
1478
  "step": 2390
1479
  },
1480
  {
1481
  "epoch": 3.83,
1482
- "learning_rate": 8.386581469648563e-06,
1483
- "loss": 0.0077,
1484
  "step": 2400
1485
  },
1486
  {
1487
  "epoch": 3.85,
1488
- "learning_rate": 7.5878594249201285e-06,
1489
- "loss": 0.016,
1490
  "step": 2410
1491
  },
1492
  {
1493
  "epoch": 3.87,
1494
- "learning_rate": 6.789137380191693e-06,
1495
- "loss": 0.0492,
1496
  "step": 2420
1497
  },
1498
  {
1499
  "epoch": 3.88,
1500
- "learning_rate": 5.990415335463259e-06,
1501
- "loss": 0.0475,
1502
  "step": 2430
1503
  },
1504
  {
1505
  "epoch": 3.9,
1506
- "learning_rate": 5.191693290734825e-06,
1507
- "loss": 0.0269,
1508
  "step": 2440
1509
  },
1510
  {
1511
  "epoch": 3.91,
1512
- "learning_rate": 4.39297124600639e-06,
1513
- "loss": 0.0119,
1514
  "step": 2450
1515
  },
1516
  {
1517
  "epoch": 3.93,
1518
- "learning_rate": 3.5942492012779555e-06,
1519
- "loss": 0.0156,
1520
  "step": 2460
1521
  },
1522
  {
1523
  "epoch": 3.95,
1524
- "learning_rate": 2.7955271565495207e-06,
1525
- "loss": 0.0264,
1526
  "step": 2470
1527
  },
1528
  {
1529
  "epoch": 3.96,
1530
- "learning_rate": 1.9968051118210863e-06,
1531
- "loss": 0.0069,
1532
  "step": 2480
1533
  },
1534
  {
1535
  "epoch": 3.98,
1536
- "learning_rate": 1.1980830670926517e-06,
1537
- "loss": 0.0066,
1538
  "step": 2490
1539
  },
1540
  {
1541
  "epoch": 3.99,
1542
- "learning_rate": 3.9936102236421723e-07,
1543
- "loss": 0.0055,
1544
  "step": 2500
1545
  },
1546
  {
1547
  "epoch": 4.0,
1548
- "eval_accuracy": 0.8549222797927462,
1549
- "eval_f1": 0.8557756855858307,
1550
- "eval_loss": 0.627236545085907,
1551
- "eval_precision": 0.8589504093802257,
1552
- "eval_recall": 0.8549222797927462,
1553
- "eval_runtime": 1.3015,
1554
- "eval_samples_per_second": 148.291,
1555
- "eval_steps_per_second": 9.989,
1556
  "step": 2504
1557
  },
1558
  {
1559
  "epoch": 4.0,
1560
  "step": 2504,
1561
  "total_flos": 3.104468219559813e+18,
1562
- "train_loss": 0.22616184618065174,
1563
- "train_runtime": 849.1634,
1564
- "train_samples_per_second": 47.176,
1565
- "train_steps_per_second": 2.949
1566
  }
1567
  ],
1568
  "max_steps": 2504,
 
1
  {
2
+ "best_metric": 0.8756476683937824,
3
  "best_model_checkpoint": "./vit-base-skin/checkpoint-1252",
4
  "epoch": 4.0,
5
  "global_step": 2504,
 
10
  {
11
  "epoch": 0.02,
12
  "learning_rate": 0.00019920127795527157,
13
+ "loss": 0.078,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.03,
18
  "learning_rate": 0.00019840255591054313,
19
+ "loss": 0.2128,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.05,
24
  "learning_rate": 0.00019760383386581472,
25
+ "loss": 0.0939,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.06,
30
  "learning_rate": 0.00019680511182108628,
31
+ "loss": 0.1508,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.08,
36
  "learning_rate": 0.00019600638977635784,
37
+ "loss": 0.1135,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.1,
42
  "learning_rate": 0.0001952076677316294,
43
+ "loss": 0.242,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.11,
48
  "learning_rate": 0.00019440894568690097,
49
+ "loss": 0.2251,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.13,
54
  "learning_rate": 0.00019361022364217253,
55
+ "loss": 0.1199,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.14,
60
  "learning_rate": 0.0001928115015974441,
61
+ "loss": 0.1629,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.16,
66
  "learning_rate": 0.00019201277955271565,
67
+ "loss": 0.1078,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.18,
72
  "learning_rate": 0.0001912140575079872,
73
+ "loss": 0.2215,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.19,
78
  "learning_rate": 0.0001904153354632588,
79
+ "loss": 0.1648,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.21,
84
  "learning_rate": 0.00018961661341853036,
85
+ "loss": 0.2631,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.22,
90
  "learning_rate": 0.00018881789137380192,
91
+ "loss": 0.1141,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.24,
96
  "learning_rate": 0.00018801916932907348,
97
+ "loss": 0.138,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.26,
102
  "learning_rate": 0.00018722044728434505,
103
+ "loss": 0.1407,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.27,
108
  "learning_rate": 0.00018642172523961663,
109
+ "loss": 0.1169,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.29,
114
  "learning_rate": 0.0001856230031948882,
115
+ "loss": 0.1297,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.3,
120
  "learning_rate": 0.00018482428115015976,
121
+ "loss": 0.1772,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.32,
126
  "learning_rate": 0.00018402555910543132,
127
+ "loss": 0.3667,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.34,
132
  "learning_rate": 0.00018322683706070288,
133
+ "loss": 0.1505,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.35,
138
  "learning_rate": 0.00018242811501597444,
139
+ "loss": 0.134,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.37,
144
  "learning_rate": 0.000181629392971246,
145
+ "loss": 0.1385,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.38,
150
  "learning_rate": 0.00018083067092651756,
151
+ "loss": 0.0883,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.4,
156
  "learning_rate": 0.00018003194888178913,
157
+ "loss": 0.1612,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.42,
162
  "learning_rate": 0.00017923322683706071,
163
+ "loss": 0.1265,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.43,
168
  "learning_rate": 0.00017843450479233228,
169
+ "loss": 0.2464,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.45,
174
  "learning_rate": 0.00017763578274760384,
175
+ "loss": 0.1802,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.46,
180
  "learning_rate": 0.0001768370607028754,
181
+ "loss": 0.1718,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.48,
186
  "learning_rate": 0.000176038338658147,
187
+ "loss": 0.1424,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.5,
192
  "learning_rate": 0.00017523961661341855,
193
+ "loss": 0.1109,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.51,
198
  "learning_rate": 0.0001744408945686901,
199
+ "loss": 0.1093,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.53,
204
  "learning_rate": 0.00017364217252396167,
205
+ "loss": 0.2388,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.54,
210
  "learning_rate": 0.00017284345047923323,
211
+ "loss": 0.165,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.56,
216
  "learning_rate": 0.00017204472843450482,
217
+ "loss": 0.1271,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.58,
222
  "learning_rate": 0.00017124600638977638,
223
+ "loss": 0.1753,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.59,
228
  "learning_rate": 0.00017044728434504792,
229
+ "loss": 0.078,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.61,
234
  "learning_rate": 0.00016964856230031948,
235
+ "loss": 0.2373,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 0.62,
240
  "learning_rate": 0.00016884984025559104,
241
+ "loss": 0.1719,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 0.64,
246
  "learning_rate": 0.00016805111821086263,
247
+ "loss": 0.1963,
248
  "step": 400
249
  },
250
  {
251
  "epoch": 0.65,
252
  "learning_rate": 0.0001672523961661342,
253
+ "loss": 0.1671,
254
  "step": 410
255
  },
256
  {
257
  "epoch": 0.67,
258
  "learning_rate": 0.00016645367412140575,
259
+ "loss": 0.1733,
260
  "step": 420
261
  },
262
  {
263
  "epoch": 0.69,
264
  "learning_rate": 0.0001656549520766773,
265
+ "loss": 0.1268,
266
  "step": 430
267
  },
268
  {
269
  "epoch": 0.7,
270
  "learning_rate": 0.0001648562300319489,
271
+ "loss": 0.185,
272
  "step": 440
273
  },
274
  {
275
  "epoch": 0.72,
276
  "learning_rate": 0.00016405750798722046,
277
+ "loss": 0.0595,
278
  "step": 450
279
  },
280
  {
281
  "epoch": 0.73,
282
  "learning_rate": 0.00016325878594249202,
283
+ "loss": 0.0833,
284
  "step": 460
285
  },
286
  {
287
  "epoch": 0.75,
288
  "learning_rate": 0.00016246006389776359,
289
+ "loss": 0.2265,
290
  "step": 470
291
  },
292
  {
293
  "epoch": 0.77,
294
  "learning_rate": 0.00016166134185303515,
295
+ "loss": 0.1407,
296
  "step": 480
297
  },
298
  {
299
  "epoch": 0.78,
300
  "learning_rate": 0.00016086261980830674,
301
+ "loss": 0.1786,
302
  "step": 490
303
  },
304
  {
305
  "epoch": 0.8,
306
  "learning_rate": 0.0001600638977635783,
307
+ "loss": 0.1276,
308
  "step": 500
309
  },
310
  {
311
  "epoch": 0.81,
312
  "learning_rate": 0.00015926517571884986,
313
+ "loss": 0.1227,
314
  "step": 510
315
  },
316
  {
317
  "epoch": 0.83,
318
  "learning_rate": 0.00015846645367412142,
319
+ "loss": 0.1551,
320
  "step": 520
321
  },
322
  {
323
  "epoch": 0.85,
324
  "learning_rate": 0.00015766773162939298,
325
+ "loss": 0.1649,
326
  "step": 530
327
  },
328
  {
329
  "epoch": 0.86,
330
  "learning_rate": 0.00015686900958466454,
331
+ "loss": 0.3069,
332
  "step": 540
333
  },
334
  {
335
  "epoch": 0.88,
336
  "learning_rate": 0.0001560702875399361,
337
+ "loss": 0.1824,
338
  "step": 550
339
  },
340
  {
341
  "epoch": 0.89,
342
  "learning_rate": 0.00015527156549520767,
343
+ "loss": 0.1662,
344
  "step": 560
345
  },
346
  {
347
  "epoch": 0.91,
348
  "learning_rate": 0.00015447284345047923,
349
+ "loss": 0.2483,
350
  "step": 570
351
  },
352
  {
353
  "epoch": 0.93,
354
  "learning_rate": 0.00015367412140575082,
355
+ "loss": 0.1293,
356
  "step": 580
357
  },
358
  {
359
  "epoch": 0.94,
360
  "learning_rate": 0.00015287539936102238,
361
+ "loss": 0.171,
362
  "step": 590
363
  },
364
  {
365
  "epoch": 0.96,
366
  "learning_rate": 0.00015207667731629394,
367
+ "loss": 0.1812,
368
  "step": 600
369
  },
370
  {
371
  "epoch": 0.97,
372
  "learning_rate": 0.0001512779552715655,
373
+ "loss": 0.1628,
374
  "step": 610
375
  },
376
  {
377
  "epoch": 0.99,
378
  "learning_rate": 0.00015047923322683706,
379
+ "loss": 0.1259,
380
  "step": 620
381
  },
382
  {
383
  "epoch": 1.0,
384
+ "eval_accuracy": 0.8341968911917098,
385
+ "eval_f1": 0.8404495414024173,
386
+ "eval_loss": 0.38346073031425476,
387
+ "eval_precision": 0.8537925291610302,
388
+ "eval_recall": 0.8341968911917098,
389
+ "eval_runtime": 1.2354,
390
+ "eval_samples_per_second": 156.22,
391
+ "eval_steps_per_second": 10.523,
392
  "step": 626
393
  },
394
  {
395
  "epoch": 1.01,
396
  "learning_rate": 0.00014968051118210865,
397
+ "loss": 0.1407,
398
  "step": 630
399
  },
400
  {
401
  "epoch": 1.02,
402
  "learning_rate": 0.0001488817891373802,
403
+ "loss": 0.0463,
404
  "step": 640
405
  },
406
  {
407
  "epoch": 1.04,
408
  "learning_rate": 0.00014808306709265177,
409
+ "loss": 0.0718,
410
  "step": 650
411
  },
412
  {
413
  "epoch": 1.05,
414
  "learning_rate": 0.00014728434504792333,
415
+ "loss": 0.097,
416
  "step": 660
417
  },
418
  {
419
  "epoch": 1.07,
420
  "learning_rate": 0.0001464856230031949,
421
+ "loss": 0.0659,
422
  "step": 670
423
  },
424
  {
425
  "epoch": 1.09,
426
  "learning_rate": 0.00014568690095846646,
427
+ "loss": 0.0862,
428
  "step": 680
429
  },
430
  {
431
  "epoch": 1.1,
432
  "learning_rate": 0.00014488817891373802,
433
+ "loss": 0.0643,
434
  "step": 690
435
  },
436
  {
437
  "epoch": 1.12,
438
  "learning_rate": 0.00014408945686900958,
439
+ "loss": 0.0632,
440
  "step": 700
441
  },
442
  {
443
  "epoch": 1.13,
444
  "learning_rate": 0.00014329073482428114,
445
+ "loss": 0.132,
446
  "step": 710
447
  },
448
  {
449
  "epoch": 1.15,
450
  "learning_rate": 0.00014249201277955273,
451
+ "loss": 0.0619,
452
  "step": 720
453
  },
454
  {
455
  "epoch": 1.17,
456
  "learning_rate": 0.0001416932907348243,
457
+ "loss": 0.0665,
458
  "step": 730
459
  },
460
  {
461
  "epoch": 1.18,
462
  "learning_rate": 0.00014089456869009585,
463
+ "loss": 0.0407,
464
  "step": 740
465
  },
466
  {
467
  "epoch": 1.2,
468
  "learning_rate": 0.0001400958466453674,
469
+ "loss": 0.0492,
470
  "step": 750
471
  },
472
  {
473
  "epoch": 1.21,
474
  "learning_rate": 0.000139297124600639,
475
+ "loss": 0.0754,
476
  "step": 760
477
  },
478
  {
479
  "epoch": 1.23,
480
  "learning_rate": 0.00013849840255591056,
481
+ "loss": 0.0884,
482
  "step": 770
483
  },
484
  {
485
  "epoch": 1.25,
486
  "learning_rate": 0.00013769968051118212,
487
+ "loss": 0.1073,
488
  "step": 780
489
  },
490
  {
491
  "epoch": 1.26,
492
  "learning_rate": 0.00013690095846645369,
493
+ "loss": 0.0956,
494
  "step": 790
495
  },
496
  {
497
  "epoch": 1.28,
498
  "learning_rate": 0.00013610223642172525,
499
+ "loss": 0.064,
500
  "step": 800
501
  },
502
  {
503
  "epoch": 1.29,
504
  "learning_rate": 0.0001353035143769968,
505
+ "loss": 0.0879,
506
  "step": 810
507
  },
508
  {
509
  "epoch": 1.31,
510
  "learning_rate": 0.00013450479233226837,
511
+ "loss": 0.0781,
512
  "step": 820
513
  },
514
  {
515
  "epoch": 1.33,
516
  "learning_rate": 0.00013370607028753993,
517
+ "loss": 0.023,
518
  "step": 830
519
  },
520
  {
521
  "epoch": 1.34,
522
  "learning_rate": 0.0001329073482428115,
523
+ "loss": 0.0874,
524
  "step": 840
525
  },
526
  {
527
  "epoch": 1.36,
528
  "learning_rate": 0.00013210862619808305,
529
+ "loss": 0.0265,
530
  "step": 850
531
  },
532
  {
533
  "epoch": 1.37,
534
  "learning_rate": 0.00013130990415335464,
535
+ "loss": 0.1008,
536
  "step": 860
537
  },
538
  {
539
  "epoch": 1.39,
540
  "learning_rate": 0.0001305111821086262,
541
+ "loss": 0.106,
542
  "step": 870
543
  },
544
  {
545
  "epoch": 1.41,
546
  "learning_rate": 0.00012971246006389777,
547
+ "loss": 0.1314,
548
  "step": 880
549
  },
550
  {
551
  "epoch": 1.42,
552
  "learning_rate": 0.00012891373801916933,
553
+ "loss": 0.1214,
554
  "step": 890
555
  },
556
  {
557
  "epoch": 1.44,
558
  "learning_rate": 0.00012811501597444092,
559
+ "loss": 0.0988,
560
  "step": 900
561
  },
562
  {
563
  "epoch": 1.45,
564
  "learning_rate": 0.00012731629392971248,
565
+ "loss": 0.1319,
566
  "step": 910
567
  },
568
  {
569
  "epoch": 1.47,
570
  "learning_rate": 0.00012651757188498404,
571
+ "loss": 0.0993,
572
  "step": 920
573
  },
574
  {
575
  "epoch": 1.49,
576
  "learning_rate": 0.0001257188498402556,
577
+ "loss": 0.0993,
578
  "step": 930
579
  },
580
  {
581
  "epoch": 1.5,
582
  "learning_rate": 0.00012492012779552716,
583
+ "loss": 0.0323,
584
  "step": 940
585
  },
586
  {
587
  "epoch": 1.52,
588
  "learning_rate": 0.00012412140575079872,
589
+ "loss": 0.0544,
590
  "step": 950
591
  },
592
  {
593
  "epoch": 1.53,
594
  "learning_rate": 0.00012332268370607028,
595
+ "loss": 0.0388,
596
  "step": 960
597
  },
598
  {
599
  "epoch": 1.55,
600
  "learning_rate": 0.00012252396166134185,
601
+ "loss": 0.0738,
602
  "step": 970
603
  },
604
  {
605
  "epoch": 1.57,
606
  "learning_rate": 0.00012172523961661342,
607
+ "loss": 0.0705,
608
  "step": 980
609
  },
610
  {
611
  "epoch": 1.58,
612
  "learning_rate": 0.000120926517571885,
613
+ "loss": 0.1676,
614
  "step": 990
615
  },
616
  {
617
  "epoch": 1.6,
618
  "learning_rate": 0.00012012779552715656,
619
+ "loss": 0.0677,
620
  "step": 1000
621
  },
622
  {
623
  "epoch": 1.61,
624
  "learning_rate": 0.00011932907348242812,
625
+ "loss": 0.0742,
626
  "step": 1010
627
  },
628
  {
629
  "epoch": 1.63,
630
  "learning_rate": 0.00011853035143769968,
631
+ "loss": 0.0339,
632
  "step": 1020
633
  },
634
  {
635
  "epoch": 1.65,
636
  "learning_rate": 0.00011773162939297124,
637
+ "loss": 0.1323,
638
  "step": 1030
639
  },
640
  {
641
  "epoch": 1.66,
642
  "learning_rate": 0.00011693290734824283,
643
+ "loss": 0.0707,
644
  "step": 1040
645
  },
646
  {
647
  "epoch": 1.68,
648
  "learning_rate": 0.00011613418530351439,
649
+ "loss": 0.1936,
650
  "step": 1050
651
  },
652
  {
653
  "epoch": 1.69,
654
  "learning_rate": 0.00011533546325878595,
655
+ "loss": 0.1448,
656
  "step": 1060
657
  },
658
  {
659
  "epoch": 1.71,
660
  "learning_rate": 0.00011453674121405751,
661
+ "loss": 0.1723,
662
  "step": 1070
663
  },
664
  {
665
  "epoch": 1.73,
666
  "learning_rate": 0.00011373801916932908,
667
+ "loss": 0.0485,
668
  "step": 1080
669
  },
670
  {
671
  "epoch": 1.74,
672
  "learning_rate": 0.00011293929712460065,
673
+ "loss": 0.0797,
674
  "step": 1090
675
  },
676
  {
677
  "epoch": 1.76,
678
  "learning_rate": 0.00011214057507987221,
679
+ "loss": 0.1008,
680
  "step": 1100
681
  },
682
  {
683
  "epoch": 1.77,
684
  "learning_rate": 0.00011134185303514377,
685
+ "loss": 0.0858,
686
  "step": 1110
687
  },
688
  {
689
  "epoch": 1.79,
690
  "learning_rate": 0.00011054313099041533,
691
+ "loss": 0.0537,
692
  "step": 1120
693
  },
694
  {
695
  "epoch": 1.81,
696
  "learning_rate": 0.00010974440894568691,
697
+ "loss": 0.1089,
698
  "step": 1130
699
  },
700
  {
701
  "epoch": 1.82,
702
  "learning_rate": 0.00010894568690095847,
703
+ "loss": 0.1016,
704
  "step": 1140
705
  },
706
  {
707
  "epoch": 1.84,
708
  "learning_rate": 0.00010814696485623003,
709
+ "loss": 0.0771,
710
  "step": 1150
711
  },
712
  {
713
  "epoch": 1.85,
714
  "learning_rate": 0.0001073482428115016,
715
+ "loss": 0.0549,
716
  "step": 1160
717
  },
718
  {
719
  "epoch": 1.87,
720
  "learning_rate": 0.00010654952076677316,
721
+ "loss": 0.0593,
722
  "step": 1170
723
  },
724
  {
725
  "epoch": 1.88,
726
  "learning_rate": 0.00010575079872204474,
727
+ "loss": 0.1017,
728
  "step": 1180
729
  },
730
  {
731
  "epoch": 1.9,
732
  "learning_rate": 0.0001049520766773163,
733
+ "loss": 0.0265,
734
  "step": 1190
735
  },
736
  {
737
  "epoch": 1.92,
738
  "learning_rate": 0.00010415335463258787,
739
+ "loss": 0.05,
740
  "step": 1200
741
  },
742
  {
743
  "epoch": 1.93,
744
  "learning_rate": 0.00010335463258785943,
745
+ "loss": 0.0442,
746
  "step": 1210
747
  },
748
  {
749
  "epoch": 1.95,
750
  "learning_rate": 0.000102555910543131,
751
+ "loss": 0.0524,
752
  "step": 1220
753
  },
754
  {
755
  "epoch": 1.96,
756
  "learning_rate": 0.00010175718849840256,
757
+ "loss": 0.0772,
758
  "step": 1230
759
  },
760
  {
761
  "epoch": 1.98,
762
  "learning_rate": 0.00010095846645367413,
763
+ "loss": 0.182,
764
  "step": 1240
765
  },
766
  {
767
  "epoch": 2.0,
768
  "learning_rate": 0.00010015974440894569,
769
+ "loss": 0.156,
770
  "step": 1250
771
  },
772
  {
773
  "epoch": 2.0,
774
+ "eval_accuracy": 0.8756476683937824,
775
+ "eval_f1": 0.8658135325711,
776
+ "eval_loss": 0.37016573548316956,
777
+ "eval_precision": 0.8651736102970483,
778
+ "eval_recall": 0.8756476683937824,
779
+ "eval_runtime": 1.3092,
780
+ "eval_samples_per_second": 147.419,
781
+ "eval_steps_per_second": 9.93,
782
  "step": 1252
783
  },
784
  {
785
  "epoch": 2.01,
786
  "learning_rate": 9.936102236421726e-05,
787
+ "loss": 0.0584,
788
  "step": 1260
789
  },
790
  {
791
  "epoch": 2.03,
792
  "learning_rate": 9.856230031948882e-05,
793
+ "loss": 0.0477,
794
  "step": 1270
795
  },
796
  {
797
  "epoch": 2.04,
798
  "learning_rate": 9.77635782747604e-05,
799
+ "loss": 0.1256,
800
  "step": 1280
801
  },
802
  {
803
  "epoch": 2.06,
804
  "learning_rate": 9.696485623003195e-05,
805
+ "loss": 0.0431,
806
  "step": 1290
807
  },
808
  {
809
  "epoch": 2.08,
810
  "learning_rate": 9.616613418530351e-05,
811
+ "loss": 0.0543,
812
  "step": 1300
813
  },
814
  {
815
  "epoch": 2.09,
816
  "learning_rate": 9.536741214057508e-05,
817
+ "loss": 0.0245,
818
  "step": 1310
819
  },
820
  {
821
  "epoch": 2.11,
822
  "learning_rate": 9.456869009584664e-05,
823
+ "loss": 0.0213,
824
  "step": 1320
825
  },
826
  {
827
  "epoch": 2.12,
828
  "learning_rate": 9.376996805111822e-05,
829
+ "loss": 0.0107,
830
  "step": 1330
831
  },
832
  {
833
  "epoch": 2.14,
834
  "learning_rate": 9.297124600638978e-05,
835
+ "loss": 0.0272,
836
  "step": 1340
837
  },
838
  {
839
  "epoch": 2.16,
840
  "learning_rate": 9.217252396166136e-05,
841
+ "loss": 0.0135,
842
  "step": 1350
843
  },
844
  {
845
  "epoch": 2.17,
846
  "learning_rate": 9.137380191693292e-05,
847
+ "loss": 0.0221,
848
  "step": 1360
849
  },
850
  {
851
  "epoch": 2.19,
852
  "learning_rate": 9.057507987220448e-05,
853
+ "loss": 0.05,
854
  "step": 1370
855
  },
856
  {
857
  "epoch": 2.2,
858
  "learning_rate": 8.977635782747604e-05,
859
+ "loss": 0.0058,
860
  "step": 1380
861
  },
862
  {
863
  "epoch": 2.22,
864
  "learning_rate": 8.89776357827476e-05,
865
+ "loss": 0.0384,
866
  "step": 1390
867
  },
868
  {
869
  "epoch": 2.24,
870
  "learning_rate": 8.817891373801918e-05,
871
+ "loss": 0.0097,
872
  "step": 1400
873
  },
874
  {
875
  "epoch": 2.25,
876
  "learning_rate": 8.738019169329074e-05,
877
+ "loss": 0.0331,
878
  "step": 1410
879
  },
880
  {
881
  "epoch": 2.27,
882
  "learning_rate": 8.658146964856231e-05,
883
+ "loss": 0.0187,
884
  "step": 1420
885
  },
886
  {
887
  "epoch": 2.28,
888
  "learning_rate": 8.578274760383387e-05,
889
+ "loss": 0.0021,
890
  "step": 1430
891
  },
892
  {
893
  "epoch": 2.3,
894
  "learning_rate": 8.498402555910544e-05,
895
+ "loss": 0.0151,
896
  "step": 1440
897
  },
898
  {
899
  "epoch": 2.32,
900
  "learning_rate": 8.4185303514377e-05,
901
+ "loss": 0.0192,
902
  "step": 1450
903
  },
904
  {
905
  "epoch": 2.33,
906
  "learning_rate": 8.338658146964856e-05,
907
+ "loss": 0.0235,
908
  "step": 1460
909
  },
910
  {
911
  "epoch": 2.35,
912
  "learning_rate": 8.258785942492013e-05,
913
+ "loss": 0.0052,
914
  "step": 1470
915
  },
916
  {
917
  "epoch": 2.36,
918
  "learning_rate": 8.17891373801917e-05,
919
+ "loss": 0.0443,
920
  "step": 1480
921
  },
922
  {
923
  "epoch": 2.38,
924
  "learning_rate": 8.099041533546327e-05,
925
+ "loss": 0.0428,
926
  "step": 1490
927
  },
928
  {
929
  "epoch": 2.4,
930
  "learning_rate": 8.019169329073483e-05,
931
+ "loss": 0.0123,
932
  "step": 1500
933
  },
934
  {
935
  "epoch": 2.41,
936
  "learning_rate": 7.939297124600639e-05,
937
+ "loss": 0.0225,
938
  "step": 1510
939
  },
940
  {
941
  "epoch": 2.43,
942
  "learning_rate": 7.859424920127795e-05,
943
+ "loss": 0.0617,
944
  "step": 1520
945
  },
946
  {
947
  "epoch": 2.44,
948
  "learning_rate": 7.779552715654951e-05,
949
+ "loss": 0.0085,
950
  "step": 1530
951
  },
952
  {
953
  "epoch": 2.46,
954
  "learning_rate": 7.699680511182109e-05,
955
+ "loss": 0.0071,
956
  "step": 1540
957
  },
958
  {
959
  "epoch": 2.48,
960
  "learning_rate": 7.619808306709265e-05,
961
+ "loss": 0.0059,
962
  "step": 1550
963
  },
964
  {
965
  "epoch": 2.49,
966
  "learning_rate": 7.539936102236423e-05,
967
+ "loss": 0.0071,
968
  "step": 1560
969
  },
970
  {
971
  "epoch": 2.51,
972
  "learning_rate": 7.460063897763579e-05,
973
+ "loss": 0.0042,
974
  "step": 1570
975
  },
976
  {
977
  "epoch": 2.52,
978
  "learning_rate": 7.380191693290735e-05,
979
+ "loss": 0.0075,
980
  "step": 1580
981
  },
982
  {
983
  "epoch": 2.54,
984
  "learning_rate": 7.300319488817891e-05,
985
+ "loss": 0.032,
986
  "step": 1590
987
  },
988
  {
989
  "epoch": 2.56,
990
  "learning_rate": 7.220447284345049e-05,
991
+ "loss": 0.0176,
992
  "step": 1600
993
  },
994
  {
995
  "epoch": 2.57,
996
  "learning_rate": 7.140575079872205e-05,
997
+ "loss": 0.0102,
998
  "step": 1610
999
  },
1000
  {
1001
  "epoch": 2.59,
1002
  "learning_rate": 7.060702875399361e-05,
1003
+ "loss": 0.0246,
1004
  "step": 1620
1005
  },
1006
  {
1007
  "epoch": 2.6,
1008
  "learning_rate": 6.980830670926518e-05,
1009
+ "loss": 0.0548,
1010
  "step": 1630
1011
  },
1012
  {
1013
  "epoch": 2.62,
1014
  "learning_rate": 6.900958466453674e-05,
1015
+ "loss": 0.0279,
1016
  "step": 1640
1017
  },
1018
  {
1019
  "epoch": 2.64,
1020
  "learning_rate": 6.821086261980832e-05,
1021
+ "loss": 0.0278,
1022
  "step": 1650
1023
  },
1024
  {
1025
  "epoch": 2.65,
1026
  "learning_rate": 6.741214057507987e-05,
1027
+ "loss": 0.0189,
1028
  "step": 1660
1029
  },
1030
  {
1031
  "epoch": 2.67,
1032
  "learning_rate": 6.661341853035144e-05,
1033
+ "loss": 0.0516,
1034
  "step": 1670
1035
  },
1036
  {
1037
  "epoch": 2.68,
1038
  "learning_rate": 6.5814696485623e-05,
1039
+ "loss": 0.0119,
1040
  "step": 1680
1041
  },
1042
  {
1043
  "epoch": 2.7,
1044
  "learning_rate": 6.501597444089457e-05,
1045
+ "loss": 0.0247,
1046
  "step": 1690
1047
  },
1048
  {
1049
  "epoch": 2.72,
1050
  "learning_rate": 6.421725239616614e-05,
1051
+ "loss": 0.0216,
1052
  "step": 1700
1053
  },
1054
  {
1055
  "epoch": 2.73,
1056
  "learning_rate": 6.34185303514377e-05,
1057
+ "loss": 0.0402,
1058
  "step": 1710
1059
  },
1060
  {
1061
  "epoch": 2.75,
1062
  "learning_rate": 6.261980830670928e-05,
1063
+ "loss": 0.0335,
1064
  "step": 1720
1065
  },
1066
  {
1067
  "epoch": 2.76,
1068
  "learning_rate": 6.182108626198084e-05,
1069
+ "loss": 0.0109,
1070
  "step": 1730
1071
  },
1072
  {
1073
  "epoch": 2.78,
1074
  "learning_rate": 6.1022364217252406e-05,
1075
+ "loss": 0.0031,
1076
  "step": 1740
1077
  },
1078
  {
1079
  "epoch": 2.8,
1080
  "learning_rate": 6.022364217252396e-05,
1081
+ "loss": 0.0062,
1082
  "step": 1750
1083
  },
1084
  {
1085
  "epoch": 2.81,
1086
  "learning_rate": 5.942492012779552e-05,
1087
+ "loss": 0.0046,
1088
  "step": 1760
1089
  },
1090
  {
1091
  "epoch": 2.83,
1092
  "learning_rate": 5.86261980830671e-05,
1093
+ "loss": 0.0268,
1094
  "step": 1770
1095
  },
1096
  {
1097
  "epoch": 2.84,
1098
  "learning_rate": 5.782747603833866e-05,
1099
+ "loss": 0.0225,
1100
  "step": 1780
1101
  },
1102
  {
1103
  "epoch": 2.86,
1104
  "learning_rate": 5.702875399361023e-05,
1105
+ "loss": 0.0159,
1106
  "step": 1790
1107
  },
1108
  {
1109
  "epoch": 2.88,
1110
  "learning_rate": 5.623003194888179e-05,
1111
+ "loss": 0.0152,
1112
  "step": 1800
1113
  },
1114
  {
1115
  "epoch": 2.89,
1116
  "learning_rate": 5.543130990415336e-05,
1117
+ "loss": 0.0338,
1118
  "step": 1810
1119
  },
1120
  {
1121
  "epoch": 2.91,
1122
  "learning_rate": 5.4632587859424925e-05,
1123
+ "loss": 0.0382,
1124
  "step": 1820
1125
  },
1126
  {
1127
  "epoch": 2.92,
1128
  "learning_rate": 5.383386581469649e-05,
1129
+ "loss": 0.008,
1130
  "step": 1830
1131
  },
1132
  {
1133
  "epoch": 2.94,
1134
  "learning_rate": 5.3035143769968054e-05,
1135
+ "loss": 0.004,
1136
  "step": 1840
1137
  },
1138
  {
1139
  "epoch": 2.96,
1140
  "learning_rate": 5.2236421725239616e-05,
1141
+ "loss": 0.0023,
1142
  "step": 1850
1143
  },
1144
  {
1145
  "epoch": 2.97,
1146
  "learning_rate": 5.1437699680511184e-05,
1147
+ "loss": 0.0166,
1148
  "step": 1860
1149
  },
1150
  {
1151
  "epoch": 2.99,
1152
  "learning_rate": 5.0638977635782745e-05,
1153
+ "loss": 0.0349,
1154
  "step": 1870
1155
  },
1156
  {
1157
  "epoch": 3.0,
1158
+ "eval_accuracy": 0.8704663212435233,
1159
+ "eval_f1": 0.8714524424651604,
1160
+ "eval_loss": 0.46994760632514954,
1161
+ "eval_precision": 0.8770991838952296,
1162
+ "eval_recall": 0.8704663212435233,
1163
+ "eval_runtime": 1.3201,
1164
+ "eval_samples_per_second": 146.2,
1165
+ "eval_steps_per_second": 9.848,
1166
  "step": 1878
1167
  },
1168
  {
1169
  "epoch": 3.0,
1170
  "learning_rate": 4.984025559105431e-05,
1171
+ "loss": 0.0296,
1172
  "step": 1880
1173
  },
1174
  {
1175
  "epoch": 3.02,
1176
  "learning_rate": 4.904153354632588e-05,
1177
+ "loss": 0.0026,
1178
  "step": 1890
1179
  },
1180
  {
1181
  "epoch": 3.04,
1182
  "learning_rate": 4.824281150159744e-05,
1183
+ "loss": 0.0011,
1184
  "step": 1900
1185
  },
1186
  {
1187
  "epoch": 3.05,
1188
  "learning_rate": 4.744408945686901e-05,
1189
+ "loss": 0.0177,
1190
  "step": 1910
1191
  },
1192
  {
1193
  "epoch": 3.07,
1194
  "learning_rate": 4.664536741214058e-05,
1195
+ "loss": 0.0018,
1196
  "step": 1920
1197
  },
1198
  {
1199
  "epoch": 3.08,
1200
  "learning_rate": 4.584664536741215e-05,
1201
+ "loss": 0.0026,
1202
  "step": 1930
1203
  },
1204
  {
1205
  "epoch": 3.1,
1206
  "learning_rate": 4.504792332268371e-05,
1207
+ "loss": 0.0006,
1208
  "step": 1940
1209
  },
1210
  {
1211
  "epoch": 3.12,
1212
  "learning_rate": 4.424920127795527e-05,
1213
+ "loss": 0.001,
1214
  "step": 1950
1215
  },
1216
  {
1217
  "epoch": 3.13,
1218
  "learning_rate": 4.345047923322684e-05,
1219
+ "loss": 0.0037,
1220
  "step": 1960
1221
  },
1222
  {
1223
  "epoch": 3.15,
1224
  "learning_rate": 4.265175718849841e-05,
1225
+ "loss": 0.0019,
1226
  "step": 1970
1227
  },
1228
  {
1229
  "epoch": 3.16,
1230
  "learning_rate": 4.185303514376997e-05,
1231
+ "loss": 0.0017,
1232
  "step": 1980
1233
  },
1234
  {
1235
  "epoch": 3.18,
1236
  "learning_rate": 4.1054313099041536e-05,
1237
+ "loss": 0.0008,
1238
  "step": 1990
1239
  },
1240
  {
1241
  "epoch": 3.19,
1242
  "learning_rate": 4.0255591054313104e-05,
1243
+ "loss": 0.0219,
1244
  "step": 2000
1245
  },
1246
  {
1247
  "epoch": 3.21,
1248
  "learning_rate": 3.9456869009584666e-05,
1249
+ "loss": 0.0006,
1250
  "step": 2010
1251
  },
1252
  {
1253
  "epoch": 3.23,
1254
  "learning_rate": 3.8658146964856234e-05,
1255
+ "loss": 0.0096,
1256
  "step": 2020
1257
  },
1258
  {
1259
  "epoch": 3.24,
1260
  "learning_rate": 3.7859424920127795e-05,
1261
+ "loss": 0.0014,
1262
  "step": 2030
1263
  },
1264
  {
1265
  "epoch": 3.26,
1266
  "learning_rate": 3.7060702875399364e-05,
1267
+ "loss": 0.0242,
1268
  "step": 2040
1269
  },
1270
  {
1271
  "epoch": 3.27,
1272
  "learning_rate": 3.6261980830670925e-05,
1273
+ "loss": 0.0006,
1274
  "step": 2050
1275
  },
1276
  {
1277
  "epoch": 3.29,
1278
  "learning_rate": 3.546325878594249e-05,
1279
+ "loss": 0.0012,
1280
  "step": 2060
1281
  },
1282
  {
1283
  "epoch": 3.31,
1284
  "learning_rate": 3.466453674121406e-05,
1285
+ "loss": 0.0008,
1286
  "step": 2070
1287
  },
1288
  {
1289
  "epoch": 3.32,
1290
  "learning_rate": 3.386581469648562e-05,
1291
+ "loss": 0.0009,
1292
  "step": 2080
1293
  },
1294
  {
1295
  "epoch": 3.34,
1296
  "learning_rate": 3.306709265175719e-05,
1297
+ "loss": 0.0013,
1298
  "step": 2090
1299
  },
1300
  {
1301
  "epoch": 3.35,
1302
  "learning_rate": 3.226837060702875e-05,
1303
+ "loss": 0.0026,
1304
  "step": 2100
1305
  },
1306
  {
1307
  "epoch": 3.37,
1308
  "learning_rate": 3.146964856230032e-05,
1309
+ "loss": 0.0069,
1310
  "step": 2110
1311
  },
1312
  {
1313
  "epoch": 3.39,
1314
  "learning_rate": 3.067092651757188e-05,
1315
+ "loss": 0.0004,
1316
  "step": 2120
1317
  },
1318
  {
1319
  "epoch": 3.4,
1320
  "learning_rate": 2.987220447284345e-05,
1321
+ "loss": 0.0006,
1322
  "step": 2130
1323
  },
1324
  {
1325
  "epoch": 3.42,
1326
  "learning_rate": 2.907348242811502e-05,
1327
+ "loss": 0.0007,
1328
  "step": 2140
1329
  },
1330
  {
1331
  "epoch": 3.43,
1332
  "learning_rate": 2.8274760383386583e-05,
1333
+ "loss": 0.0011,
1334
  "step": 2150
1335
  },
1336
  {
1337
  "epoch": 3.45,
1338
  "learning_rate": 2.747603833865815e-05,
1339
+ "loss": 0.0024,
1340
  "step": 2160
1341
  },
1342
  {
1343
  "epoch": 3.47,
1344
+ "learning_rate": 2.6677316293929716e-05,
1345
+ "loss": 0.0005,
1346
  "step": 2170
1347
  },
1348
  {
1349
  "epoch": 3.48,
1350
+ "learning_rate": 2.5878594249201278e-05,
1351
+ "loss": 0.0011,
1352
  "step": 2180
1353
  },
1354
  {
1355
  "epoch": 3.5,
1356
+ "learning_rate": 2.5079872204472842e-05,
1357
+ "loss": 0.0007,
1358
  "step": 2190
1359
  },
1360
  {
1361
  "epoch": 3.51,
1362
+ "learning_rate": 2.428115015974441e-05,
1363
+ "loss": 0.0006,
1364
  "step": 2200
1365
  },
1366
  {
1367
  "epoch": 3.53,
1368
+ "learning_rate": 2.3482428115015975e-05,
1369
+ "loss": 0.0009,
1370
  "step": 2210
1371
  },
1372
  {
1373
  "epoch": 3.55,
1374
+ "learning_rate": 2.268370607028754e-05,
1375
+ "loss": 0.001,
1376
  "step": 2220
1377
  },
1378
  {
1379
  "epoch": 3.56,
1380
+ "learning_rate": 2.188498402555911e-05,
1381
+ "loss": 0.0004,
1382
  "step": 2230
1383
  },
1384
  {
1385
  "epoch": 3.58,
1386
+ "learning_rate": 2.108626198083067e-05,
1387
+ "loss": 0.0009,
1388
  "step": 2240
1389
  },
1390
  {
1391
  "epoch": 3.59,
1392
+ "learning_rate": 2.0287539936102238e-05,
1393
+ "loss": 0.0006,
1394
  "step": 2250
1395
  },
1396
  {
1397
  "epoch": 3.61,
1398
+ "learning_rate": 1.9488817891373803e-05,
1399
+ "loss": 0.0304,
1400
  "step": 2260
1401
  },
1402
  {
1403
  "epoch": 3.63,
1404
+ "learning_rate": 1.869009584664537e-05,
1405
+ "loss": 0.009,
1406
  "step": 2270
1407
  },
1408
  {
1409
  "epoch": 3.64,
1410
+ "learning_rate": 1.7891373801916932e-05,
1411
+ "loss": 0.0146,
1412
  "step": 2280
1413
  },
1414
  {
1415
  "epoch": 3.66,
1416
+ "learning_rate": 1.70926517571885e-05,
1417
+ "loss": 0.0007,
1418
  "step": 2290
1419
  },
1420
  {
1421
  "epoch": 3.67,
1422
+ "learning_rate": 1.6293929712460065e-05,
1423
+ "loss": 0.0008,
1424
  "step": 2300
1425
  },
1426
  {
1427
  "epoch": 3.69,
1428
+ "learning_rate": 1.549520766773163e-05,
1429
+ "loss": 0.0004,
1430
  "step": 2310
1431
  },
1432
  {
1433
  "epoch": 3.71,
1434
+ "learning_rate": 1.4696485623003195e-05,
1435
+ "loss": 0.0005,
1436
  "step": 2320
1437
  },
1438
  {
1439
  "epoch": 3.72,
1440
+ "learning_rate": 1.3897763578274761e-05,
1441
+ "loss": 0.001,
1442
  "step": 2330
1443
  },
1444
  {
1445
  "epoch": 3.74,
1446
+ "learning_rate": 1.3099041533546328e-05,
1447
+ "loss": 0.0004,
1448
  "step": 2340
1449
  },
1450
  {
1451
  "epoch": 3.75,
1452
+ "learning_rate": 1.2300319488817893e-05,
1453
+ "loss": 0.0004,
1454
  "step": 2350
1455
  },
1456
  {
1457
  "epoch": 3.77,
1458
+ "learning_rate": 1.1501597444089457e-05,
1459
+ "loss": 0.0274,
1460
  "step": 2360
1461
  },
1462
  {
1463
  "epoch": 3.79,
1464
+ "learning_rate": 1.0702875399361024e-05,
1465
+ "loss": 0.0006,
1466
  "step": 2370
1467
  },
1468
  {
1469
  "epoch": 3.8,
1470
+ "learning_rate": 9.904153354632589e-06,
1471
+ "loss": 0.0004,
1472
  "step": 2380
1473
  },
1474
  {
1475
  "epoch": 3.82,
1476
+ "learning_rate": 9.105431309904154e-06,
1477
+ "loss": 0.0004,
1478
  "step": 2390
1479
  },
1480
  {
1481
  "epoch": 3.83,
1482
+ "learning_rate": 8.306709265175718e-06,
1483
+ "loss": 0.0005,
1484
  "step": 2400
1485
  },
1486
  {
1487
  "epoch": 3.85,
1488
+ "learning_rate": 7.507987220447285e-06,
1489
+ "loss": 0.0086,
1490
  "step": 2410
1491
  },
1492
  {
1493
  "epoch": 3.87,
1494
+ "learning_rate": 6.70926517571885e-06,
1495
+ "loss": 0.035,
1496
  "step": 2420
1497
  },
1498
  {
1499
  "epoch": 3.88,
1500
+ "learning_rate": 5.910543130990415e-06,
1501
+ "loss": 0.0005,
1502
  "step": 2430
1503
  },
1504
  {
1505
  "epoch": 3.9,
1506
+ "learning_rate": 5.111821086261981e-06,
1507
+ "loss": 0.0004,
1508
  "step": 2440
1509
  },
1510
  {
1511
  "epoch": 3.91,
1512
+ "learning_rate": 4.3130990415335465e-06,
1513
+ "loss": 0.0005,
1514
  "step": 2450
1515
  },
1516
  {
1517
  "epoch": 3.93,
1518
+ "learning_rate": 3.5143769968051118e-06,
1519
+ "loss": 0.0008,
1520
  "step": 2460
1521
  },
1522
  {
1523
  "epoch": 3.95,
1524
+ "learning_rate": 2.7156549520766774e-06,
1525
+ "loss": 0.0014,
1526
  "step": 2470
1527
  },
1528
  {
1529
  "epoch": 3.96,
1530
+ "learning_rate": 1.9169329073482426e-06,
1531
+ "loss": 0.0006,
1532
  "step": 2480
1533
  },
1534
  {
1535
  "epoch": 3.98,
1536
+ "learning_rate": 1.1182108626198083e-06,
1537
+ "loss": 0.0004,
1538
  "step": 2490
1539
  },
1540
  {
1541
  "epoch": 3.99,
1542
+ "learning_rate": 3.194888178913738e-07,
1543
+ "loss": 0.0005,
1544
  "step": 2500
1545
  },
1546
  {
1547
  "epoch": 4.0,
1548
+ "eval_accuracy": 0.8756476683937824,
1549
+ "eval_f1": 0.8760275990745605,
1550
+ "eval_loss": 0.4526107907295227,
1551
+ "eval_precision": 0.8781157606627441,
1552
+ "eval_recall": 0.8756476683937824,
1553
+ "eval_runtime": 1.3211,
1554
+ "eval_samples_per_second": 146.09,
1555
+ "eval_steps_per_second": 9.84,
1556
  "step": 2504
1557
  },
1558
  {
1559
  "epoch": 4.0,
1560
  "step": 2504,
1561
  "total_flos": 3.104468219559813e+18,
1562
+ "train_loss": 0.06904298590738683,
1563
+ "train_runtime": 834.8541,
1564
+ "train_samples_per_second": 47.984,
1565
+ "train_steps_per_second": 2.999
1566
  }
1567
  ],
1568
  "max_steps": 2504,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0a7691bbbf2921d421ad8726bfa07ae8d56cb8324877a4a66e5aeff0b88ae6c
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe4d4eb25af72f410b3adbbfaeb3fc73806e42f23d9f0d9a2eedb528d41c1d6
3
  size 3899