matthieulel commited on
Commit
1d35750
·
verified ·
1 Parent(s): e51725e

End of training

Browse files
Files changed (5) hide show
  1. README.md +8 -6
  2. all_results.json +13 -13
  3. eval_results.json +9 -9
  4. train_results.json +5 -5
  5. trainer_state.json +647 -2321
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: google/vit-large-patch32-224-in21k
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -18,13 +20,13 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # vit-large-patch32-224-in21k-finetuned-galaxy10-decals
20
 
21
- This model is a fine-tuned version of [google/vit-large-patch32-224-in21k](https://huggingface.co/google/vit-large-patch32-224-in21k) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.5907
24
- - Accuracy: 0.8360
25
- - Precision: 0.8348
26
- - Recall: 0.8360
27
- - F1: 0.8345
28
 
29
  ## Model description
30
 
 
2
  license: apache-2.0
3
  base_model: google/vit-large-patch32-224-in21k
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
20
 
21
  # vit-large-patch32-224-in21k-finetuned-galaxy10-decals
22
 
23
+ This model is a fine-tuned version of [google/vit-large-patch32-224-in21k](https://huggingface.co/google/vit-large-patch32-224-in21k) on the matthieulel/galaxy10_decals dataset.
24
  It achieves the following results on the evaluation set:
25
+ - Loss: 0.5281
26
+ - Accuracy: 0.8382
27
+ - Precision: 0.8372
28
+ - Recall: 0.8382
29
+ - F1: 0.8356
30
 
31
  ## Model description
32
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 29.82,
3
- "eval_accuracy": 0.8235625704622322,
4
- "eval_f1": 0.8211884981742252,
5
- "eval_loss": 0.5849118232727051,
6
- "eval_precision": 0.8257093320559268,
7
- "eval_recall": 0.8235625704622322,
8
- "eval_runtime": 13.9158,
9
- "eval_samples_per_second": 127.481,
10
- "eval_steps_per_second": 4.024,
11
- "train_loss": 0.4506285851360649,
12
- "train_runtime": 7123.617,
13
- "train_samples_per_second": 67.221,
14
- "train_steps_per_second": 0.522
15
  }
 
1
  {
2
+ "epoch": 29.76,
3
+ "eval_accuracy": 0.8382187147688839,
4
+ "eval_f1": 0.8355644052274377,
5
+ "eval_loss": 0.5280851721763611,
6
+ "eval_precision": 0.8371941971933433,
7
+ "eval_recall": 0.8382187147688839,
8
+ "eval_runtime": 14.0221,
9
+ "eval_samples_per_second": 126.515,
10
+ "eval_steps_per_second": 0.998,
11
+ "train_loss": 0.5398157842697636,
12
+ "train_runtime": 7078.6752,
13
+ "train_samples_per_second": 67.648,
14
+ "train_steps_per_second": 0.131
15
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 29.82,
3
- "eval_accuracy": 0.8235625704622322,
4
- "eval_f1": 0.8211884981742252,
5
- "eval_loss": 0.5849118232727051,
6
- "eval_precision": 0.8257093320559268,
7
- "eval_recall": 0.8235625704622322,
8
- "eval_runtime": 13.9158,
9
- "eval_samples_per_second": 127.481,
10
- "eval_steps_per_second": 4.024
11
  }
 
1
  {
2
+ "epoch": 29.76,
3
+ "eval_accuracy": 0.8382187147688839,
4
+ "eval_f1": 0.8355644052274377,
5
+ "eval_loss": 0.5280851721763611,
6
+ "eval_precision": 0.8371941971933433,
7
+ "eval_recall": 0.8382187147688839,
8
+ "eval_runtime": 14.0221,
9
+ "eval_samples_per_second": 126.515,
10
+ "eval_steps_per_second": 0.998
11
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 29.82,
3
- "train_loss": 0.4506285851360649,
4
- "train_runtime": 7123.617,
5
- "train_samples_per_second": 67.221,
6
- "train_steps_per_second": 0.522
7
  }
 
1
  {
2
+ "epoch": 29.76,
3
+ "train_loss": 0.5398157842697636,
4
+ "train_runtime": 7078.6752,
5
+ "train_samples_per_second": 67.648,
6
+ "train_steps_per_second": 0.131
7
  }
trainer_state.json CHANGED
@@ -1,2622 +1,948 @@
1
  {
2
- "best_metric": 0.8235625704622322,
3
- "best_model_checkpoint": "vit-large-patch32-224-in21k-finetuned-galaxy10-decals/checkpoint-1746",
4
- "epoch": 29.819639278557116,
5
  "eval_steps": 500,
6
- "global_step": 3720,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.08,
13
- "learning_rate": 2.688172043010753e-06,
14
- "loss": 2.2922,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.16,
19
- "learning_rate": 5.376344086021506e-06,
20
- "loss": 2.2641,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.24,
25
- "learning_rate": 8.064516129032258e-06,
26
- "loss": 2.1892,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.32,
31
- "learning_rate": 1.0752688172043012e-05,
32
- "loss": 2.0773,
 
 
 
 
 
 
 
 
 
 
 
 
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.4,
37
- "learning_rate": 1.3440860215053763e-05,
38
- "loss": 1.9892,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.48,
43
- "learning_rate": 1.6129032258064517e-05,
44
- "loss": 1.8382,
45
  "step": 60
46
  },
47
  {
48
- "epoch": 0.56,
49
- "learning_rate": 1.881720430107527e-05,
50
- "loss": 1.7049,
 
 
 
 
 
 
 
 
 
 
 
 
51
  "step": 70
52
  },
53
  {
54
- "epoch": 0.64,
55
- "learning_rate": 2.1505376344086024e-05,
56
- "loss": 1.5763,
57
  "step": 80
58
  },
59
  {
60
- "epoch": 0.72,
61
- "learning_rate": 2.4193548387096777e-05,
62
- "loss": 1.4746,
63
  "step": 90
64
  },
65
  {
66
- "epoch": 0.8,
67
- "learning_rate": 2.6881720430107527e-05,
68
- "loss": 1.3415,
 
 
 
 
 
 
 
 
 
 
 
 
69
  "step": 100
70
  },
71
  {
72
- "epoch": 0.88,
73
- "learning_rate": 2.9569892473118284e-05,
74
- "loss": 1.2596,
75
  "step": 110
76
  },
77
  {
78
- "epoch": 0.96,
79
- "learning_rate": 3.2258064516129034e-05,
80
- "loss": 1.1583,
81
  "step": 120
82
  },
83
  {
84
- "epoch": 0.99,
85
- "eval_accuracy": 0.7068771138669673,
86
- "eval_f1": 0.675810673269607,
87
- "eval_loss": 1.0550673007965088,
88
- "eval_precision": 0.6558614630795221,
89
- "eval_recall": 0.7068771138669673,
90
- "eval_runtime": 13.9158,
91
- "eval_samples_per_second": 127.481,
92
- "eval_steps_per_second": 4.024,
93
- "step": 124
94
- },
95
- {
96
- "epoch": 1.04,
97
- "learning_rate": 3.494623655913979e-05,
98
- "loss": 1.1563,
99
  "step": 130
100
  },
101
  {
102
- "epoch": 1.12,
103
- "learning_rate": 3.763440860215054e-05,
104
- "loss": 1.0897,
105
  "step": 140
106
  },
107
  {
108
- "epoch": 1.2,
109
- "learning_rate": 4.032258064516129e-05,
110
- "loss": 1.0546,
111
  "step": 150
112
  },
113
  {
114
- "epoch": 1.28,
115
- "learning_rate": 4.301075268817205e-05,
116
- "loss": 1.0226,
 
 
 
 
 
 
 
 
 
 
 
 
117
  "step": 160
118
  },
119
  {
120
- "epoch": 1.36,
121
- "learning_rate": 4.56989247311828e-05,
122
- "loss": 0.9752,
123
  "step": 170
124
  },
125
  {
126
- "epoch": 1.44,
127
- "learning_rate": 4.8387096774193554e-05,
128
- "loss": 0.9802,
129
  "step": 180
130
  },
131
  {
132
- "epoch": 1.52,
133
- "learning_rate": 5.1075268817204304e-05,
134
- "loss": 0.9326,
 
 
 
 
 
 
 
 
 
 
 
 
135
  "step": 190
136
  },
137
  {
138
- "epoch": 1.6,
139
- "learning_rate": 5.3763440860215054e-05,
140
- "loss": 0.9747,
141
  "step": 200
142
  },
143
  {
144
- "epoch": 1.68,
145
- "learning_rate": 5.645161290322582e-05,
146
- "loss": 0.9057,
147
  "step": 210
148
  },
149
  {
150
- "epoch": 1.76,
151
- "learning_rate": 5.913978494623657e-05,
152
- "loss": 0.9051,
 
 
 
 
 
 
 
 
 
 
 
 
153
  "step": 220
154
  },
155
  {
156
- "epoch": 1.84,
157
- "learning_rate": 6.182795698924732e-05,
158
- "loss": 0.9089,
159
  "step": 230
160
  },
161
  {
162
- "epoch": 1.92,
163
- "learning_rate": 6.451612903225807e-05,
164
- "loss": 0.8599,
165
  "step": 240
166
  },
167
  {
168
- "epoch": 2.0,
169
- "eval_accuracy": 0.762119503945885,
170
- "eval_f1": 0.7556580755827043,
171
- "eval_loss": 0.7913542985916138,
172
- "eval_precision": 0.7717184841610082,
173
- "eval_recall": 0.762119503945885,
174
- "eval_runtime": 13.9163,
175
- "eval_samples_per_second": 127.476,
176
- "eval_steps_per_second": 4.024,
177
- "step": 249
178
  },
179
  {
180
- "epoch": 2.0,
181
- "learning_rate": 6.720430107526882e-05,
182
- "loss": 0.8928,
 
 
 
 
 
 
183
  "step": 250
184
  },
185
  {
186
- "epoch": 2.08,
187
- "learning_rate": 6.989247311827958e-05,
188
- "loss": 0.8706,
189
  "step": 260
190
  },
191
  {
192
- "epoch": 2.16,
193
- "learning_rate": 7.258064516129033e-05,
194
- "loss": 0.8337,
195
  "step": 270
196
  },
197
  {
198
- "epoch": 2.24,
199
- "learning_rate": 7.526881720430108e-05,
200
- "loss": 0.841,
201
  "step": 280
202
  },
203
  {
204
- "epoch": 2.32,
205
- "learning_rate": 7.795698924731183e-05,
206
- "loss": 0.8265,
 
 
 
 
 
 
 
 
 
 
 
 
207
  "step": 290
208
  },
209
  {
210
- "epoch": 2.4,
211
- "learning_rate": 8.064516129032258e-05,
212
- "loss": 0.8324,
213
  "step": 300
214
  },
215
  {
216
- "epoch": 2.48,
217
- "learning_rate": 8.333333333333334e-05,
218
- "loss": 0.843,
219
  "step": 310
220
  },
221
  {
222
- "epoch": 2.57,
223
- "learning_rate": 8.60215053763441e-05,
224
- "loss": 0.8416,
 
 
 
 
 
 
 
 
 
 
 
 
225
  "step": 320
226
  },
227
  {
228
- "epoch": 2.65,
229
- "learning_rate": 8.870967741935484e-05,
230
- "loss": 0.8022,
231
  "step": 330
232
  },
233
  {
234
- "epoch": 2.73,
235
- "learning_rate": 9.13978494623656e-05,
236
- "loss": 0.8954,
237
  "step": 340
238
  },
239
  {
240
- "epoch": 2.81,
241
- "learning_rate": 9.408602150537636e-05,
242
- "loss": 0.8137,
 
 
 
 
 
 
 
 
 
 
 
 
243
  "step": 350
244
  },
245
  {
246
- "epoch": 2.89,
247
- "learning_rate": 9.677419354838711e-05,
248
- "loss": 0.793,
249
  "step": 360
250
  },
251
  {
252
- "epoch": 2.97,
253
- "learning_rate": 9.946236559139786e-05,
254
- "loss": 0.854,
255
  "step": 370
256
  },
257
  {
258
- "epoch": 3.0,
259
- "eval_accuracy": 0.7671927846674182,
260
- "eval_f1": 0.7641877899984297,
261
- "eval_loss": 0.711500346660614,
262
- "eval_precision": 0.7849640754022337,
263
- "eval_recall": 0.7671927846674182,
264
- "eval_runtime": 13.9691,
265
- "eval_samples_per_second": 126.994,
266
- "eval_steps_per_second": 4.009,
267
- "step": 374
268
- },
269
- {
270
- "epoch": 3.05,
271
- "learning_rate": 9.97610513739546e-05,
272
- "loss": 0.8143,
273
  "step": 380
274
  },
275
  {
276
- "epoch": 3.13,
277
- "learning_rate": 9.946236559139786e-05,
278
- "loss": 0.7927,
279
  "step": 390
280
  },
281
  {
282
- "epoch": 3.21,
283
- "learning_rate": 9.916367980884111e-05,
284
- "loss": 0.7554,
285
  "step": 400
286
  },
287
  {
288
- "epoch": 3.29,
289
- "learning_rate": 9.886499402628435e-05,
290
- "loss": 0.7916,
 
 
 
 
 
 
 
 
 
 
 
 
291
  "step": 410
292
  },
293
  {
294
- "epoch": 3.37,
295
- "learning_rate": 9.85663082437276e-05,
296
- "loss": 0.743,
297
  "step": 420
298
  },
299
  {
300
- "epoch": 3.45,
301
- "learning_rate": 9.826762246117085e-05,
302
- "loss": 0.7777,
303
  "step": 430
304
  },
305
  {
306
- "epoch": 3.53,
307
- "learning_rate": 9.79689366786141e-05,
308
- "loss": 0.7625,
 
 
 
 
 
 
 
 
 
 
 
 
309
  "step": 440
310
  },
311
  {
312
- "epoch": 3.61,
313
- "learning_rate": 9.767025089605735e-05,
314
- "loss": 0.7553,
315
  "step": 450
316
  },
317
  {
318
- "epoch": 3.69,
319
- "learning_rate": 9.73715651135006e-05,
320
- "loss": 0.7728,
321
  "step": 460
322
  },
323
  {
324
- "epoch": 3.77,
325
- "learning_rate": 9.707287933094386e-05,
326
- "loss": 0.7744,
 
 
 
 
 
 
 
 
 
 
 
 
327
  "step": 470
328
  },
329
  {
330
- "epoch": 3.85,
331
- "learning_rate": 9.677419354838711e-05,
332
- "loss": 0.746,
333
  "step": 480
334
  },
335
  {
336
- "epoch": 3.93,
337
- "learning_rate": 9.647550776583036e-05,
338
- "loss": 0.7282,
339
  "step": 490
340
  },
341
  {
342
- "epoch": 4.0,
343
- "eval_accuracy": 0.7683201803833145,
344
- "eval_f1": 0.7603757540346687,
345
- "eval_loss": 0.6807467937469482,
346
- "eval_precision": 0.7746268567673005,
347
- "eval_recall": 0.7683201803833145,
348
- "eval_runtime": 13.8857,
349
- "eval_samples_per_second": 127.757,
350
- "eval_steps_per_second": 4.033,
351
- "step": 499
352
- },
353
- {
354
- "epoch": 4.01,
355
- "learning_rate": 9.61768219832736e-05,
356
- "loss": 0.6907,
357
  "step": 500
358
  },
359
  {
360
- "epoch": 4.09,
361
- "learning_rate": 9.587813620071685e-05,
362
- "loss": 0.7377,
363
  "step": 510
364
  },
365
  {
366
- "epoch": 4.17,
367
- "learning_rate": 9.55794504181601e-05,
368
- "loss": 0.7166,
369
  "step": 520
370
  },
371
  {
372
- "epoch": 4.25,
373
- "learning_rate": 9.528076463560335e-05,
374
- "loss": 0.6948,
375
  "step": 530
376
  },
377
  {
378
- "epoch": 4.33,
379
- "learning_rate": 9.49820788530466e-05,
380
- "loss": 0.6907,
 
 
 
 
 
 
 
 
 
 
 
 
381
  "step": 540
382
  },
383
  {
384
- "epoch": 4.41,
385
- "learning_rate": 9.468339307048984e-05,
386
- "loss": 0.7011,
387
  "step": 550
388
  },
389
  {
390
- "epoch": 4.49,
391
- "learning_rate": 9.438470728793309e-05,
392
- "loss": 0.6377,
393
  "step": 560
394
  },
395
  {
396
- "epoch": 4.57,
397
- "learning_rate": 9.408602150537636e-05,
398
- "loss": 0.6651,
 
 
 
 
 
 
 
 
 
 
 
 
399
  "step": 570
400
  },
401
  {
402
- "epoch": 4.65,
403
- "learning_rate": 9.378733572281961e-05,
404
- "loss": 0.6564,
405
  "step": 580
406
  },
407
  {
408
- "epoch": 4.73,
409
- "learning_rate": 9.348864994026285e-05,
410
- "loss": 0.6498,
411
  "step": 590
412
  },
413
  {
414
- "epoch": 4.81,
415
- "learning_rate": 9.31899641577061e-05,
416
- "loss": 0.7252,
 
 
 
 
 
 
 
 
 
 
 
 
417
  "step": 600
418
  },
419
  {
420
- "epoch": 4.89,
421
- "learning_rate": 9.289127837514935e-05,
422
- "loss": 0.6279,
423
  "step": 610
424
  },
425
  {
426
- "epoch": 4.97,
427
- "learning_rate": 9.25925925925926e-05,
428
- "loss": 0.6165,
429
  "step": 620
430
  },
431
  {
432
- "epoch": 4.99,
433
- "eval_accuracy": 0.8015783540022547,
434
- "eval_f1": 0.8014834346227482,
435
- "eval_loss": 0.6207838654518127,
436
- "eval_precision": 0.808772544756013,
437
- "eval_recall": 0.8015783540022547,
438
- "eval_runtime": 13.9594,
439
- "eval_samples_per_second": 127.082,
440
- "eval_steps_per_second": 4.012,
441
- "step": 623
442
- },
443
- {
444
- "epoch": 5.05,
445
- "learning_rate": 9.229390681003584e-05,
446
- "loss": 0.6385,
447
  "step": 630
448
  },
449
  {
450
- "epoch": 5.13,
451
- "learning_rate": 9.199522102747909e-05,
452
- "loss": 0.6732,
453
  "step": 640
454
  },
455
  {
456
- "epoch": 5.21,
457
- "learning_rate": 9.169653524492234e-05,
458
- "loss": 0.6464,
459
  "step": 650
460
  },
461
  {
462
- "epoch": 5.29,
463
- "learning_rate": 9.13978494623656e-05,
464
- "loss": 0.6721,
 
 
 
 
 
 
 
 
 
 
 
 
465
  "step": 660
466
  },
467
  {
468
- "epoch": 5.37,
469
- "learning_rate": 9.109916367980885e-05,
470
- "loss": 0.6044,
471
  "step": 670
472
  },
473
  {
474
- "epoch": 5.45,
475
- "learning_rate": 9.080047789725208e-05,
476
- "loss": 0.6356,
477
  "step": 680
478
  },
479
  {
480
- "epoch": 5.53,
481
- "learning_rate": 9.050179211469535e-05,
482
- "loss": 0.5974,
 
 
 
 
 
 
 
 
 
 
 
 
483
  "step": 690
484
  },
485
  {
486
- "epoch": 5.61,
487
- "learning_rate": 9.02031063321386e-05,
488
- "loss": 0.6674,
489
  "step": 700
490
  },
491
  {
492
- "epoch": 5.69,
493
- "learning_rate": 8.990442054958185e-05,
494
- "loss": 0.6068,
495
  "step": 710
496
  },
497
  {
498
- "epoch": 5.77,
499
- "learning_rate": 8.960573476702509e-05,
500
- "loss": 0.5778,
 
 
 
 
 
 
 
 
 
 
 
 
501
  "step": 720
502
  },
503
  {
504
- "epoch": 5.85,
505
- "learning_rate": 8.930704898446834e-05,
506
- "loss": 0.6623,
507
  "step": 730
508
  },
509
  {
510
- "epoch": 5.93,
511
- "learning_rate": 8.900836320191159e-05,
512
- "loss": 0.5946,
513
  "step": 740
514
  },
515
  {
516
- "epoch": 6.0,
517
- "eval_accuracy": 0.8043968432919955,
518
- "eval_f1": 0.8009427414085736,
519
- "eval_loss": 0.5850139260292053,
520
- "eval_precision": 0.8083816060983575,
521
- "eval_recall": 0.8043968432919955,
522
- "eval_runtime": 13.968,
523
- "eval_samples_per_second": 127.005,
524
- "eval_steps_per_second": 4.009,
525
- "step": 748
526
  },
527
  {
528
- "epoch": 6.01,
529
- "learning_rate": 8.870967741935484e-05,
530
- "loss": 0.637,
 
 
 
 
 
 
531
  "step": 750
532
  },
533
  {
534
- "epoch": 6.09,
535
- "learning_rate": 8.84109916367981e-05,
536
- "loss": 0.5925,
537
  "step": 760
538
  },
539
  {
540
- "epoch": 6.17,
541
- "learning_rate": 8.811230585424133e-05,
542
- "loss": 0.6185,
543
  "step": 770
544
  },
545
  {
546
- "epoch": 6.25,
547
- "learning_rate": 8.781362007168459e-05,
548
- "loss": 0.5654,
549
  "step": 780
550
  },
551
  {
552
- "epoch": 6.33,
553
- "learning_rate": 8.751493428912784e-05,
554
- "loss": 0.5526,
 
 
 
 
 
 
 
 
 
 
 
 
555
  "step": 790
556
  },
557
  {
558
- "epoch": 6.41,
559
- "learning_rate": 8.72162485065711e-05,
560
- "loss": 0.5886,
561
  "step": 800
562
  },
563
  {
564
- "epoch": 6.49,
565
- "learning_rate": 8.691756272401434e-05,
566
- "loss": 0.6155,
567
  "step": 810
568
  },
569
  {
570
- "epoch": 6.57,
571
- "learning_rate": 8.661887694145759e-05,
572
- "loss": 0.5868,
 
 
 
 
 
 
 
 
 
 
 
 
573
  "step": 820
574
  },
575
  {
576
- "epoch": 6.65,
577
- "learning_rate": 8.632019115890084e-05,
578
- "loss": 0.6124,
579
  "step": 830
580
  },
581
  {
582
- "epoch": 6.73,
583
- "learning_rate": 8.60215053763441e-05,
584
- "loss": 0.5738,
585
  "step": 840
586
  },
587
  {
588
- "epoch": 6.81,
589
- "learning_rate": 8.572281959378735e-05,
590
- "loss": 0.6027,
 
 
 
 
 
 
 
 
 
 
 
 
591
  "step": 850
592
  },
593
  {
594
- "epoch": 6.89,
595
- "learning_rate": 8.542413381123058e-05,
596
- "loss": 0.6024,
597
  "step": 860
598
  },
599
  {
600
- "epoch": 6.97,
601
- "learning_rate": 8.512544802867384e-05,
602
- "loss": 0.6243,
603
  "step": 870
604
  },
605
  {
606
- "epoch": 7.0,
607
- "eval_accuracy": 0.7931228861330327,
608
- "eval_f1": 0.7934536670951715,
609
- "eval_loss": 0.6090168952941895,
610
- "eval_precision": 0.8036636158334073,
611
- "eval_recall": 0.7931228861330327,
612
- "eval_runtime": 13.9191,
613
- "eval_samples_per_second": 127.451,
614
- "eval_steps_per_second": 4.023,
615
- "step": 873
616
- },
617
- {
618
- "epoch": 7.05,
619
- "learning_rate": 8.482676224611709e-05,
620
- "loss": 0.5911,
621
  "step": 880
622
  },
623
  {
624
- "epoch": 7.13,
625
- "learning_rate": 8.452807646356034e-05,
626
- "loss": 0.575,
627
  "step": 890
628
  },
629
  {
630
- "epoch": 7.21,
631
- "learning_rate": 8.422939068100359e-05,
632
- "loss": 0.5783,
633
  "step": 900
634
  },
635
  {
636
- "epoch": 7.29,
637
- "learning_rate": 8.393070489844683e-05,
638
- "loss": 0.5047,
 
 
 
 
 
 
 
 
 
 
 
 
639
  "step": 910
640
  },
641
  {
642
- "epoch": 7.37,
643
- "learning_rate": 8.363201911589009e-05,
644
- "loss": 0.5574,
645
  "step": 920
646
  },
647
  {
648
- "epoch": 7.45,
649
- "learning_rate": 8.333333333333334e-05,
650
- "loss": 0.5489,
651
  "step": 930
652
  },
653
  {
654
- "epoch": 7.54,
655
- "learning_rate": 8.30346475507766e-05,
656
- "loss": 0.5754,
657
- "step": 940
658
- },
659
- {
660
- "epoch": 7.62,
661
- "learning_rate": 8.273596176821983e-05,
662
- "loss": 0.562,
663
- "step": 950
664
- },
665
- {
666
- "epoch": 7.7,
667
- "learning_rate": 8.243727598566309e-05,
668
- "loss": 0.5474,
669
- "step": 960
670
- },
671
- {
672
- "epoch": 7.78,
673
- "learning_rate": 8.213859020310634e-05,
674
- "loss": 0.542,
675
- "step": 970
676
- },
677
- {
678
- "epoch": 7.86,
679
- "learning_rate": 8.183990442054959e-05,
680
- "loss": 0.5261,
681
- "step": 980
682
- },
683
- {
684
- "epoch": 7.94,
685
- "learning_rate": 8.154121863799284e-05,
686
- "loss": 0.5429,
687
- "step": 990
688
- },
689
- {
690
- "epoch": 8.0,
691
- "eval_accuracy": 0.8021420518602029,
692
- "eval_f1": 0.800590565745525,
693
- "eval_loss": 0.5829988718032837,
694
- "eval_precision": 0.8086973403659105,
695
- "eval_recall": 0.8021420518602029,
696
- "eval_runtime": 13.8868,
697
- "eval_samples_per_second": 127.747,
698
- "eval_steps_per_second": 4.033,
699
- "step": 998
700
- },
701
- {
702
- "epoch": 8.02,
703
- "learning_rate": 8.124253285543608e-05,
704
- "loss": 0.5494,
705
- "step": 1000
706
- },
707
- {
708
- "epoch": 8.1,
709
- "learning_rate": 8.094384707287933e-05,
710
- "loss": 0.4858,
711
- "step": 1010
712
- },
713
- {
714
- "epoch": 8.18,
715
- "learning_rate": 8.064516129032258e-05,
716
- "loss": 0.4911,
717
- "step": 1020
718
- },
719
- {
720
- "epoch": 8.26,
721
- "learning_rate": 8.034647550776585e-05,
722
- "loss": 0.5308,
723
- "step": 1030
724
- },
725
- {
726
- "epoch": 8.34,
727
- "learning_rate": 8.004778972520908e-05,
728
- "loss": 0.4982,
729
- "step": 1040
730
- },
731
- {
732
- "epoch": 8.42,
733
- "learning_rate": 7.974910394265234e-05,
734
- "loss": 0.5323,
735
- "step": 1050
736
- },
737
- {
738
- "epoch": 8.5,
739
- "learning_rate": 7.945041816009559e-05,
740
- "loss": 0.5513,
741
- "step": 1060
742
- },
743
- {
744
- "epoch": 8.58,
745
- "learning_rate": 7.915173237753884e-05,
746
- "loss": 0.5434,
747
- "step": 1070
748
- },
749
- {
750
- "epoch": 8.66,
751
- "learning_rate": 7.885304659498209e-05,
752
- "loss": 0.5127,
753
- "step": 1080
754
- },
755
- {
756
- "epoch": 8.74,
757
- "learning_rate": 7.855436081242533e-05,
758
- "loss": 0.5551,
759
- "step": 1090
760
- },
761
- {
762
- "epoch": 8.82,
763
- "learning_rate": 7.825567502986858e-05,
764
- "loss": 0.4928,
765
- "step": 1100
766
- },
767
- {
768
- "epoch": 8.9,
769
- "learning_rate": 7.795698924731183e-05,
770
- "loss": 0.5295,
771
- "step": 1110
772
- },
773
- {
774
- "epoch": 8.98,
775
- "learning_rate": 7.765830346475508e-05,
776
- "loss": 0.558,
777
- "step": 1120
778
  },
779
  {
780
- "epoch": 8.99,
781
- "eval_accuracy": 0.8094701240135288,
782
- "eval_f1": 0.8080741738843362,
783
- "eval_loss": 0.5725377202033997,
784
- "eval_precision": 0.8190738508467059,
785
- "eval_recall": 0.8094701240135288,
786
- "eval_runtime": 13.9705,
787
- "eval_samples_per_second": 126.982,
788
- "eval_steps_per_second": 4.008,
789
- "step": 1122
790
- },
791
- {
792
- "epoch": 9.06,
793
- "learning_rate": 7.735961768219832e-05,
794
- "loss": 0.5015,
795
- "step": 1130
796
- },
797
- {
798
- "epoch": 9.14,
799
- "learning_rate": 7.706093189964157e-05,
800
- "loss": 0.5086,
801
- "step": 1140
802
- },
803
- {
804
- "epoch": 9.22,
805
- "learning_rate": 7.676224611708484e-05,
806
- "loss": 0.503,
807
- "step": 1150
808
- },
809
- {
810
- "epoch": 9.3,
811
- "learning_rate": 7.646356033452809e-05,
812
- "loss": 0.454,
813
- "step": 1160
814
- },
815
- {
816
- "epoch": 9.38,
817
- "learning_rate": 7.616487455197133e-05,
818
- "loss": 0.4917,
819
- "step": 1170
820
- },
821
- {
822
- "epoch": 9.46,
823
- "learning_rate": 7.586618876941458e-05,
824
- "loss": 0.4835,
825
- "step": 1180
826
- },
827
- {
828
- "epoch": 9.54,
829
- "learning_rate": 7.556750298685783e-05,
830
- "loss": 0.5217,
831
- "step": 1190
832
- },
833
- {
834
- "epoch": 9.62,
835
- "learning_rate": 7.526881720430108e-05,
836
- "loss": 0.4515,
837
- "step": 1200
838
- },
839
- {
840
- "epoch": 9.7,
841
- "learning_rate": 7.497013142174433e-05,
842
- "loss": 0.458,
843
- "step": 1210
844
- },
845
- {
846
- "epoch": 9.78,
847
- "learning_rate": 7.467144563918757e-05,
848
- "loss": 0.4737,
849
- "step": 1220
850
- },
851
- {
852
- "epoch": 9.86,
853
- "learning_rate": 7.437275985663082e-05,
854
- "loss": 0.4771,
855
- "step": 1230
856
- },
857
- {
858
- "epoch": 9.94,
859
- "learning_rate": 7.407407407407407e-05,
860
- "loss": 0.457,
861
- "step": 1240
862
- },
863
- {
864
- "epoch": 10.0,
865
- "eval_accuracy": 0.8122886133032694,
866
- "eval_f1": 0.8085263361904211,
867
- "eval_loss": 0.570176899433136,
868
- "eval_precision": 0.8144151275813437,
869
- "eval_recall": 0.8122886133032694,
870
- "eval_runtime": 13.9223,
871
- "eval_samples_per_second": 127.421,
872
- "eval_steps_per_second": 4.022,
873
- "step": 1247
874
- },
875
- {
876
- "epoch": 10.02,
877
- "learning_rate": 7.377538829151732e-05,
878
- "loss": 0.5038,
879
- "step": 1250
880
- },
881
- {
882
- "epoch": 10.1,
883
- "learning_rate": 7.347670250896058e-05,
884
- "loss": 0.4696,
885
- "step": 1260
886
- },
887
- {
888
- "epoch": 10.18,
889
- "learning_rate": 7.317801672640383e-05,
890
- "loss": 0.4644,
891
- "step": 1270
892
- },
893
- {
894
- "epoch": 10.26,
895
- "learning_rate": 7.287933094384708e-05,
896
- "loss": 0.4382,
897
- "step": 1280
898
- },
899
- {
900
- "epoch": 10.34,
901
- "learning_rate": 7.258064516129033e-05,
902
- "loss": 0.5107,
903
- "step": 1290
904
- },
905
- {
906
- "epoch": 10.42,
907
- "learning_rate": 7.228195937873358e-05,
908
- "loss": 0.4672,
909
- "step": 1300
910
- },
911
- {
912
- "epoch": 10.5,
913
- "learning_rate": 7.198327359617682e-05,
914
- "loss": 0.4487,
915
- "step": 1310
916
- },
917
- {
918
- "epoch": 10.58,
919
- "learning_rate": 7.168458781362007e-05,
920
- "loss": 0.4769,
921
- "step": 1320
922
- },
923
- {
924
- "epoch": 10.66,
925
- "learning_rate": 7.138590203106332e-05,
926
- "loss": 0.467,
927
- "step": 1330
928
- },
929
- {
930
- "epoch": 10.74,
931
- "learning_rate": 7.108721624850657e-05,
932
- "loss": 0.4357,
933
- "step": 1340
934
- },
935
- {
936
- "epoch": 10.82,
937
- "learning_rate": 7.078853046594983e-05,
938
- "loss": 0.4554,
939
- "step": 1350
940
- },
941
- {
942
- "epoch": 10.9,
943
- "learning_rate": 7.048984468339306e-05,
944
- "loss": 0.4287,
945
- "step": 1360
946
- },
947
- {
948
- "epoch": 10.98,
949
- "learning_rate": 7.019115890083633e-05,
950
- "loss": 0.4399,
951
- "step": 1370
952
- },
953
- {
954
- "epoch": 11.0,
955
- "eval_accuracy": 0.8021420518602029,
956
- "eval_f1": 0.7994709899560392,
957
- "eval_loss": 0.5973069667816162,
958
- "eval_precision": 0.8012595725937439,
959
- "eval_recall": 0.8021420518602029,
960
- "eval_runtime": 13.9088,
961
- "eval_samples_per_second": 127.545,
962
- "eval_steps_per_second": 4.026,
963
- "step": 1372
964
- },
965
- {
966
- "epoch": 11.06,
967
- "learning_rate": 6.989247311827958e-05,
968
- "loss": 0.4462,
969
- "step": 1380
970
- },
971
- {
972
- "epoch": 11.14,
973
- "learning_rate": 6.959378733572283e-05,
974
- "loss": 0.4108,
975
- "step": 1390
976
- },
977
- {
978
- "epoch": 11.22,
979
- "learning_rate": 6.929510155316607e-05,
980
- "loss": 0.4281,
981
- "step": 1400
982
- },
983
- {
984
- "epoch": 11.3,
985
- "learning_rate": 6.899641577060932e-05,
986
- "loss": 0.4398,
987
- "step": 1410
988
- },
989
- {
990
- "epoch": 11.38,
991
- "learning_rate": 6.869772998805257e-05,
992
- "loss": 0.4099,
993
- "step": 1420
994
- },
995
- {
996
- "epoch": 11.46,
997
- "learning_rate": 6.839904420549582e-05,
998
- "loss": 0.416,
999
- "step": 1430
1000
- },
1001
- {
1002
- "epoch": 11.54,
1003
- "learning_rate": 6.810035842293908e-05,
1004
- "loss": 0.4344,
1005
- "step": 1440
1006
- },
1007
- {
1008
- "epoch": 11.62,
1009
- "learning_rate": 6.780167264038231e-05,
1010
- "loss": 0.4743,
1011
- "step": 1450
1012
- },
1013
- {
1014
- "epoch": 11.7,
1015
- "learning_rate": 6.750298685782557e-05,
1016
- "loss": 0.4653,
1017
- "step": 1460
1018
- },
1019
- {
1020
- "epoch": 11.78,
1021
- "learning_rate": 6.720430107526882e-05,
1022
- "loss": 0.432,
1023
- "step": 1470
1024
- },
1025
- {
1026
- "epoch": 11.86,
1027
- "learning_rate": 6.690561529271207e-05,
1028
- "loss": 0.401,
1029
- "step": 1480
1030
- },
1031
- {
1032
- "epoch": 11.94,
1033
- "learning_rate": 6.660692951015532e-05,
1034
- "loss": 0.4055,
1035
- "step": 1490
1036
- },
1037
- {
1038
- "epoch": 12.0,
1039
- "eval_accuracy": 0.8156708004509583,
1040
- "eval_f1": 0.8121920876994667,
1041
- "eval_loss": 0.5799275040626526,
1042
- "eval_precision": 0.8186260895692437,
1043
- "eval_recall": 0.8156708004509583,
1044
- "eval_runtime": 13.8727,
1045
- "eval_samples_per_second": 127.877,
1046
- "eval_steps_per_second": 4.037,
1047
- "step": 1497
1048
- },
1049
- {
1050
- "epoch": 12.02,
1051
- "learning_rate": 6.630824372759857e-05,
1052
- "loss": 0.4159,
1053
- "step": 1500
1054
- },
1055
- {
1056
- "epoch": 12.1,
1057
- "learning_rate": 6.600955794504182e-05,
1058
- "loss": 0.4114,
1059
- "step": 1510
1060
- },
1061
- {
1062
- "epoch": 12.18,
1063
- "learning_rate": 6.571087216248507e-05,
1064
- "loss": 0.3556,
1065
- "step": 1520
1066
- },
1067
- {
1068
- "epoch": 12.26,
1069
- "learning_rate": 6.541218637992833e-05,
1070
- "loss": 0.3985,
1071
- "step": 1530
1072
- },
1073
- {
1074
- "epoch": 12.34,
1075
- "learning_rate": 6.511350059737156e-05,
1076
- "loss": 0.3881,
1077
- "step": 1540
1078
- },
1079
- {
1080
- "epoch": 12.42,
1081
- "learning_rate": 6.481481481481482e-05,
1082
- "loss": 0.4056,
1083
- "step": 1550
1084
- },
1085
- {
1086
- "epoch": 12.51,
1087
- "learning_rate": 6.451612903225807e-05,
1088
- "loss": 0.376,
1089
- "step": 1560
1090
- },
1091
- {
1092
- "epoch": 12.59,
1093
- "learning_rate": 6.421744324970132e-05,
1094
- "loss": 0.4252,
1095
- "step": 1570
1096
- },
1097
- {
1098
- "epoch": 12.67,
1099
- "learning_rate": 6.391875746714456e-05,
1100
- "loss": 0.4305,
1101
- "step": 1580
1102
- },
1103
- {
1104
- "epoch": 12.75,
1105
- "learning_rate": 6.362007168458781e-05,
1106
- "loss": 0.4046,
1107
- "step": 1590
1108
- },
1109
- {
1110
- "epoch": 12.83,
1111
- "learning_rate": 6.332138590203107e-05,
1112
- "loss": 0.4145,
1113
- "step": 1600
1114
- },
1115
- {
1116
- "epoch": 12.91,
1117
- "learning_rate": 6.302270011947432e-05,
1118
- "loss": 0.3957,
1119
- "step": 1610
1120
- },
1121
- {
1122
- "epoch": 12.99,
1123
- "learning_rate": 6.272401433691756e-05,
1124
- "loss": 0.417,
1125
- "step": 1620
1126
- },
1127
- {
1128
- "epoch": 12.99,
1129
- "eval_accuracy": 0.8060879368658399,
1130
- "eval_f1": 0.8065978971439847,
1131
- "eval_loss": 0.6005584597587585,
1132
- "eval_precision": 0.8174852930657591,
1133
- "eval_recall": 0.8060879368658399,
1134
- "eval_runtime": 13.8751,
1135
- "eval_samples_per_second": 127.855,
1136
- "eval_steps_per_second": 4.036,
1137
- "step": 1621
1138
- },
1139
- {
1140
- "epoch": 13.07,
1141
- "learning_rate": 6.242532855436081e-05,
1142
- "loss": 0.3922,
1143
- "step": 1630
1144
- },
1145
- {
1146
- "epoch": 13.15,
1147
- "learning_rate": 6.212664277180407e-05,
1148
- "loss": 0.3669,
1149
- "step": 1640
1150
- },
1151
- {
1152
- "epoch": 13.23,
1153
- "learning_rate": 6.182795698924732e-05,
1154
- "loss": 0.3481,
1155
- "step": 1650
1156
- },
1157
- {
1158
- "epoch": 13.31,
1159
- "learning_rate": 6.152927120669057e-05,
1160
- "loss": 0.4228,
1161
- "step": 1660
1162
- },
1163
- {
1164
- "epoch": 13.39,
1165
- "learning_rate": 6.12305854241338e-05,
1166
- "loss": 0.4005,
1167
- "step": 1670
1168
- },
1169
- {
1170
- "epoch": 13.47,
1171
- "learning_rate": 6.093189964157706e-05,
1172
- "loss": 0.3907,
1173
- "step": 1680
1174
- },
1175
- {
1176
- "epoch": 13.55,
1177
- "learning_rate": 6.063321385902031e-05,
1178
- "loss": 0.3794,
1179
- "step": 1690
1180
- },
1181
- {
1182
- "epoch": 13.63,
1183
- "learning_rate": 6.033452807646356e-05,
1184
- "loss": 0.3674,
1185
- "step": 1700
1186
- },
1187
- {
1188
- "epoch": 13.71,
1189
- "learning_rate": 6.0035842293906806e-05,
1190
- "loss": 0.3755,
1191
- "step": 1710
1192
- },
1193
- {
1194
- "epoch": 13.79,
1195
- "learning_rate": 5.9737156511350064e-05,
1196
- "loss": 0.3894,
1197
- "step": 1720
1198
- },
1199
- {
1200
- "epoch": 13.87,
1201
- "learning_rate": 5.9438470728793316e-05,
1202
- "loss": 0.3896,
1203
- "step": 1730
1204
- },
1205
- {
1206
- "epoch": 13.95,
1207
- "learning_rate": 5.913978494623657e-05,
1208
- "loss": 0.3843,
1209
- "step": 1740
1210
- },
1211
- {
1212
- "epoch": 14.0,
1213
- "eval_accuracy": 0.8235625704622322,
1214
- "eval_f1": 0.8211884981742252,
1215
- "eval_loss": 0.5849118232727051,
1216
- "eval_precision": 0.8257093320559268,
1217
- "eval_recall": 0.8235625704622322,
1218
- "eval_runtime": 13.9088,
1219
- "eval_samples_per_second": 127.545,
1220
- "eval_steps_per_second": 4.026,
1221
- "step": 1746
1222
- },
1223
- {
1224
- "epoch": 14.03,
1225
- "learning_rate": 5.884109916367981e-05,
1226
- "loss": 0.3812,
1227
- "step": 1750
1228
- },
1229
- {
1230
- "epoch": 14.11,
1231
- "learning_rate": 5.8542413381123063e-05,
1232
- "loss": 0.3649,
1233
- "step": 1760
1234
- },
1235
- {
1236
- "epoch": 14.19,
1237
- "learning_rate": 5.824372759856631e-05,
1238
- "loss": 0.3558,
1239
- "step": 1770
1240
- },
1241
- {
1242
- "epoch": 14.27,
1243
- "learning_rate": 5.794504181600956e-05,
1244
- "loss": 0.3546,
1245
- "step": 1780
1246
- },
1247
- {
1248
- "epoch": 14.35,
1249
- "learning_rate": 5.764635603345281e-05,
1250
- "loss": 0.3358,
1251
- "step": 1790
1252
- },
1253
- {
1254
- "epoch": 14.43,
1255
- "learning_rate": 5.7347670250896056e-05,
1256
- "loss": 0.3642,
1257
- "step": 1800
1258
- },
1259
- {
1260
- "epoch": 14.51,
1261
- "learning_rate": 5.704898446833931e-05,
1262
- "loss": 0.3864,
1263
- "step": 1810
1264
- },
1265
- {
1266
- "epoch": 14.59,
1267
- "learning_rate": 5.675029868578255e-05,
1268
- "loss": 0.3888,
1269
- "step": 1820
1270
- },
1271
- {
1272
- "epoch": 14.67,
1273
- "learning_rate": 5.645161290322582e-05,
1274
- "loss": 0.3685,
1275
- "step": 1830
1276
- },
1277
- {
1278
- "epoch": 14.75,
1279
- "learning_rate": 5.615292712066906e-05,
1280
- "loss": 0.3526,
1281
- "step": 1840
1282
- },
1283
- {
1284
- "epoch": 14.83,
1285
- "learning_rate": 5.5854241338112313e-05,
1286
- "loss": 0.3508,
1287
- "step": 1850
1288
- },
1289
- {
1290
- "epoch": 14.91,
1291
- "learning_rate": 5.555555555555556e-05,
1292
- "loss": 0.3899,
1293
- "step": 1860
1294
- },
1295
- {
1296
- "epoch": 14.99,
1297
- "learning_rate": 5.525686977299881e-05,
1298
- "loss": 0.371,
1299
- "step": 1870
1300
- },
1301
- {
1302
- "epoch": 15.0,
1303
- "eval_accuracy": 0.8196166854565953,
1304
- "eval_f1": 0.8160946296928634,
1305
- "eval_loss": 0.5710554718971252,
1306
- "eval_precision": 0.8157171716885654,
1307
- "eval_recall": 0.8196166854565953,
1308
- "eval_runtime": 13.9159,
1309
- "eval_samples_per_second": 127.48,
1310
- "eval_steps_per_second": 4.024,
1311
- "step": 1871
1312
- },
1313
- {
1314
- "epoch": 15.07,
1315
- "learning_rate": 5.495818399044206e-05,
1316
- "loss": 0.3403,
1317
- "step": 1880
1318
- },
1319
- {
1320
- "epoch": 15.15,
1321
- "learning_rate": 5.4659498207885306e-05,
1322
- "loss": 0.3424,
1323
- "step": 1890
1324
- },
1325
- {
1326
- "epoch": 15.23,
1327
- "learning_rate": 5.436081242532856e-05,
1328
- "loss": 0.3215,
1329
- "step": 1900
1330
- },
1331
- {
1332
- "epoch": 15.31,
1333
- "learning_rate": 5.40621266427718e-05,
1334
- "loss": 0.3331,
1335
- "step": 1910
1336
- },
1337
- {
1338
- "epoch": 15.39,
1339
- "learning_rate": 5.3763440860215054e-05,
1340
- "loss": 0.3256,
1341
- "step": 1920
1342
- },
1343
- {
1344
- "epoch": 15.47,
1345
- "learning_rate": 5.34647550776583e-05,
1346
- "loss": 0.3904,
1347
- "step": 1930
1348
- },
1349
- {
1350
- "epoch": 15.55,
1351
- "learning_rate": 5.316606929510155e-05,
1352
- "loss": 0.3341,
1353
- "step": 1940
1354
- },
1355
- {
1356
- "epoch": 15.63,
1357
- "learning_rate": 5.286738351254481e-05,
1358
- "loss": 0.3314,
1359
- "step": 1950
1360
- },
1361
- {
1362
- "epoch": 15.71,
1363
- "learning_rate": 5.256869772998806e-05,
1364
- "loss": 0.3456,
1365
- "step": 1960
1366
- },
1367
- {
1368
- "epoch": 15.79,
1369
- "learning_rate": 5.2270011947431304e-05,
1370
- "loss": 0.3519,
1371
- "step": 1970
1372
- },
1373
- {
1374
- "epoch": 15.87,
1375
- "learning_rate": 5.1971326164874556e-05,
1376
- "loss": 0.3828,
1377
- "step": 1980
1378
- },
1379
- {
1380
- "epoch": 15.95,
1381
- "learning_rate": 5.167264038231781e-05,
1382
- "loss": 0.3546,
1383
- "step": 1990
1384
- },
1385
- {
1386
- "epoch": 16.0,
1387
- "eval_accuracy": 0.8139797068771139,
1388
- "eval_f1": 0.8146871974463645,
1389
- "eval_loss": 0.6050013899803162,
1390
- "eval_precision": 0.8170888548475337,
1391
- "eval_recall": 0.8139797068771139,
1392
- "eval_runtime": 13.9647,
1393
- "eval_samples_per_second": 127.034,
1394
- "eval_steps_per_second": 4.01,
1395
- "step": 1996
1396
- },
1397
- {
1398
- "epoch": 16.03,
1399
- "learning_rate": 5.137395459976105e-05,
1400
- "loss": 0.2979,
1401
- "step": 2000
1402
- },
1403
- {
1404
- "epoch": 16.11,
1405
- "learning_rate": 5.1075268817204304e-05,
1406
- "loss": 0.3127,
1407
- "step": 2010
1408
- },
1409
- {
1410
- "epoch": 16.19,
1411
- "learning_rate": 5.077658303464755e-05,
1412
- "loss": 0.3268,
1413
- "step": 2020
1414
- },
1415
- {
1416
- "epoch": 16.27,
1417
- "learning_rate": 5.04778972520908e-05,
1418
- "loss": 0.3028,
1419
- "step": 2030
1420
- },
1421
- {
1422
- "epoch": 16.35,
1423
- "learning_rate": 5.017921146953405e-05,
1424
- "loss": 0.3038,
1425
- "step": 2040
1426
- },
1427
- {
1428
- "epoch": 16.43,
1429
- "learning_rate": 4.98805256869773e-05,
1430
- "loss": 0.3026,
1431
- "step": 2050
1432
- },
1433
- {
1434
- "epoch": 16.51,
1435
- "learning_rate": 4.9581839904420555e-05,
1436
- "loss": 0.3286,
1437
- "step": 2060
1438
- },
1439
- {
1440
- "epoch": 16.59,
1441
- "learning_rate": 4.92831541218638e-05,
1442
- "loss": 0.3566,
1443
- "step": 2070
1444
- },
1445
- {
1446
- "epoch": 16.67,
1447
- "learning_rate": 4.898446833930705e-05,
1448
- "loss": 0.3359,
1449
- "step": 2080
1450
- },
1451
- {
1452
- "epoch": 16.75,
1453
- "learning_rate": 4.86857825567503e-05,
1454
- "loss": 0.3429,
1455
- "step": 2090
1456
- },
1457
- {
1458
- "epoch": 16.83,
1459
- "learning_rate": 4.8387096774193554e-05,
1460
- "loss": 0.3022,
1461
- "step": 2100
1462
- },
1463
- {
1464
- "epoch": 16.91,
1465
- "learning_rate": 4.80884109916368e-05,
1466
- "loss": 0.2499,
1467
- "step": 2110
1468
- },
1469
- {
1470
- "epoch": 16.99,
1471
- "learning_rate": 4.778972520908005e-05,
1472
- "loss": 0.2935,
1473
- "step": 2120
1474
- },
1475
- {
1476
- "epoch": 16.99,
1477
- "eval_accuracy": 0.8105975197294251,
1478
- "eval_f1": 0.8091270414673493,
1479
- "eval_loss": 0.6425190567970276,
1480
- "eval_precision": 0.8159338106488563,
1481
- "eval_recall": 0.8105975197294251,
1482
- "eval_runtime": 13.9131,
1483
- "eval_samples_per_second": 127.506,
1484
- "eval_steps_per_second": 4.025,
1485
- "step": 2120
1486
- },
1487
- {
1488
- "epoch": 17.07,
1489
- "learning_rate": 4.74910394265233e-05,
1490
- "loss": 0.3291,
1491
- "step": 2130
1492
- },
1493
- {
1494
- "epoch": 17.15,
1495
- "learning_rate": 4.7192353643966546e-05,
1496
- "loss": 0.2689,
1497
- "step": 2140
1498
- },
1499
- {
1500
- "epoch": 17.23,
1501
- "learning_rate": 4.6893667861409805e-05,
1502
- "loss": 0.3085,
1503
- "step": 2150
1504
- },
1505
- {
1506
- "epoch": 17.31,
1507
- "learning_rate": 4.659498207885305e-05,
1508
- "loss": 0.3301,
1509
- "step": 2160
1510
- },
1511
- {
1512
- "epoch": 17.39,
1513
- "learning_rate": 4.62962962962963e-05,
1514
- "loss": 0.3513,
1515
- "step": 2170
1516
- },
1517
- {
1518
- "epoch": 17.47,
1519
- "learning_rate": 4.5997610513739546e-05,
1520
- "loss": 0.3126,
1521
- "step": 2180
1522
- },
1523
- {
1524
- "epoch": 17.56,
1525
- "learning_rate": 4.56989247311828e-05,
1526
- "loss": 0.3097,
1527
- "step": 2190
1528
- },
1529
- {
1530
- "epoch": 17.64,
1531
- "learning_rate": 4.540023894862604e-05,
1532
- "loss": 0.2827,
1533
- "step": 2200
1534
- },
1535
- {
1536
- "epoch": 17.72,
1537
- "learning_rate": 4.51015531660693e-05,
1538
- "loss": 0.3009,
1539
- "step": 2210
1540
- },
1541
- {
1542
- "epoch": 17.8,
1543
- "learning_rate": 4.4802867383512545e-05,
1544
- "loss": 0.2964,
1545
- "step": 2220
1546
- },
1547
- {
1548
- "epoch": 17.88,
1549
- "learning_rate": 4.4504181600955796e-05,
1550
- "loss": 0.3333,
1551
- "step": 2230
1552
- },
1553
- {
1554
- "epoch": 17.96,
1555
- "learning_rate": 4.420549581839905e-05,
1556
- "loss": 0.2505,
1557
- "step": 2240
1558
- },
1559
- {
1560
- "epoch": 18.0,
1561
- "eval_accuracy": 0.8111612175873731,
1562
- "eval_f1": 0.8085505452503554,
1563
- "eval_loss": 0.6569081544876099,
1564
- "eval_precision": 0.8090615845706326,
1565
- "eval_recall": 0.8111612175873731,
1566
- "eval_runtime": 13.9528,
1567
- "eval_samples_per_second": 127.143,
1568
- "eval_steps_per_second": 4.014,
1569
- "step": 2245
1570
- },
1571
- {
1572
- "epoch": 18.04,
1573
- "learning_rate": 4.390681003584229e-05,
1574
- "loss": 0.3185,
1575
- "step": 2250
1576
- },
1577
- {
1578
- "epoch": 18.12,
1579
- "learning_rate": 4.360812425328555e-05,
1580
- "loss": 0.2652,
1581
- "step": 2260
1582
- },
1583
- {
1584
- "epoch": 18.2,
1585
- "learning_rate": 4.3309438470728796e-05,
1586
- "loss": 0.2574,
1587
- "step": 2270
1588
- },
1589
- {
1590
- "epoch": 18.28,
1591
- "learning_rate": 4.301075268817205e-05,
1592
- "loss": 0.2881,
1593
- "step": 2280
1594
- },
1595
- {
1596
- "epoch": 18.36,
1597
- "learning_rate": 4.271206690561529e-05,
1598
- "loss": 0.2817,
1599
- "step": 2290
1600
- },
1601
- {
1602
- "epoch": 18.44,
1603
- "learning_rate": 4.241338112305854e-05,
1604
- "loss": 0.2459,
1605
- "step": 2300
1606
- },
1607
- {
1608
- "epoch": 18.52,
1609
- "learning_rate": 4.2114695340501795e-05,
1610
- "loss": 0.2439,
1611
- "step": 2310
1612
- },
1613
- {
1614
- "epoch": 18.6,
1615
- "learning_rate": 4.1816009557945046e-05,
1616
- "loss": 0.3175,
1617
- "step": 2320
1618
- },
1619
- {
1620
- "epoch": 18.68,
1621
- "learning_rate": 4.15173237753883e-05,
1622
- "loss": 0.2769,
1623
- "step": 2330
1624
- },
1625
- {
1626
- "epoch": 18.76,
1627
- "learning_rate": 4.121863799283154e-05,
1628
- "loss": 0.2501,
1629
- "step": 2340
1630
- },
1631
- {
1632
- "epoch": 18.84,
1633
- "learning_rate": 4.0919952210274794e-05,
1634
- "loss": 0.2753,
1635
- "step": 2350
1636
- },
1637
- {
1638
- "epoch": 18.92,
1639
- "learning_rate": 4.062126642771804e-05,
1640
- "loss": 0.2596,
1641
- "step": 2360
1642
- },
1643
- {
1644
- "epoch": 19.0,
1645
- "learning_rate": 4.032258064516129e-05,
1646
- "loss": 0.3094,
1647
- "step": 2370
1648
- },
1649
- {
1650
- "epoch": 19.0,
1651
- "eval_accuracy": 0.8162344983089064,
1652
- "eval_f1": 0.8137230782611028,
1653
- "eval_loss": 0.6557679176330566,
1654
- "eval_precision": 0.8136560183765512,
1655
- "eval_recall": 0.8162344983089064,
1656
- "eval_runtime": 13.916,
1657
- "eval_samples_per_second": 127.479,
1658
- "eval_steps_per_second": 4.024,
1659
- "step": 2370
1660
- },
1661
- {
1662
- "epoch": 19.08,
1663
- "learning_rate": 4.002389486260454e-05,
1664
- "loss": 0.2808,
1665
- "step": 2380
1666
- },
1667
- {
1668
- "epoch": 19.16,
1669
- "learning_rate": 3.972520908004779e-05,
1670
- "loss": 0.2711,
1671
- "step": 2390
1672
- },
1673
- {
1674
- "epoch": 19.24,
1675
- "learning_rate": 3.9426523297491045e-05,
1676
- "loss": 0.2398,
1677
- "step": 2400
1678
- },
1679
- {
1680
- "epoch": 19.32,
1681
- "learning_rate": 3.912783751493429e-05,
1682
- "loss": 0.2486,
1683
- "step": 2410
1684
- },
1685
- {
1686
- "epoch": 19.4,
1687
- "learning_rate": 3.882915173237754e-05,
1688
- "loss": 0.298,
1689
- "step": 2420
1690
- },
1691
- {
1692
- "epoch": 19.48,
1693
- "learning_rate": 3.8530465949820786e-05,
1694
- "loss": 0.2912,
1695
- "step": 2430
1696
- },
1697
- {
1698
- "epoch": 19.56,
1699
- "learning_rate": 3.8231780167264044e-05,
1700
- "loss": 0.2639,
1701
- "step": 2440
1702
- },
1703
- {
1704
- "epoch": 19.64,
1705
- "learning_rate": 3.793309438470729e-05,
1706
- "loss": 0.2576,
1707
- "step": 2450
1708
- },
1709
- {
1710
- "epoch": 19.72,
1711
- "learning_rate": 3.763440860215054e-05,
1712
- "loss": 0.279,
1713
- "step": 2460
1714
- },
1715
- {
1716
- "epoch": 19.8,
1717
- "learning_rate": 3.7335722819593785e-05,
1718
- "loss": 0.2663,
1719
- "step": 2470
1720
- },
1721
- {
1722
- "epoch": 19.88,
1723
- "learning_rate": 3.7037037037037037e-05,
1724
- "loss": 0.2825,
1725
- "step": 2480
1726
- },
1727
- {
1728
- "epoch": 19.96,
1729
- "learning_rate": 3.673835125448029e-05,
1730
- "loss": 0.2739,
1731
- "step": 2490
1732
- },
1733
- {
1734
- "epoch": 20.0,
1735
- "eval_accuracy": 0.8066516347237881,
1736
- "eval_f1": 0.802546589933035,
1737
- "eval_loss": 0.7201483249664307,
1738
- "eval_precision": 0.8094414374058572,
1739
- "eval_recall": 0.8066516347237881,
1740
- "eval_runtime": 13.9211,
1741
- "eval_samples_per_second": 127.433,
1742
- "eval_steps_per_second": 4.023,
1743
- "step": 2495
1744
- },
1745
- {
1746
- "epoch": 20.04,
1747
- "learning_rate": 3.643966547192354e-05,
1748
- "loss": 0.2703,
1749
- "step": 2500
1750
- },
1751
- {
1752
- "epoch": 20.12,
1753
- "learning_rate": 3.614097968936679e-05,
1754
- "loss": 0.2246,
1755
- "step": 2510
1756
- },
1757
- {
1758
- "epoch": 20.2,
1759
- "learning_rate": 3.5842293906810036e-05,
1760
- "loss": 0.2629,
1761
- "step": 2520
1762
- },
1763
- {
1764
- "epoch": 20.28,
1765
- "learning_rate": 3.554360812425329e-05,
1766
- "loss": 0.262,
1767
- "step": 2530
1768
- },
1769
- {
1770
- "epoch": 20.36,
1771
- "learning_rate": 3.524492234169653e-05,
1772
- "loss": 0.2514,
1773
- "step": 2540
1774
- },
1775
- {
1776
- "epoch": 20.44,
1777
- "learning_rate": 3.494623655913979e-05,
1778
- "loss": 0.2242,
1779
- "step": 2550
1780
- },
1781
- {
1782
- "epoch": 20.52,
1783
- "learning_rate": 3.4647550776583035e-05,
1784
- "loss": 0.2515,
1785
- "step": 2560
1786
- },
1787
- {
1788
- "epoch": 20.6,
1789
- "learning_rate": 3.4348864994026287e-05,
1790
- "loss": 0.2493,
1791
- "step": 2570
1792
- },
1793
- {
1794
- "epoch": 20.68,
1795
- "learning_rate": 3.405017921146954e-05,
1796
- "loss": 0.2214,
1797
- "step": 2580
1798
- },
1799
- {
1800
- "epoch": 20.76,
1801
- "learning_rate": 3.375149342891278e-05,
1802
- "loss": 0.2572,
1803
- "step": 2590
1804
- },
1805
- {
1806
- "epoch": 20.84,
1807
- "learning_rate": 3.3452807646356034e-05,
1808
- "loss": 0.284,
1809
- "step": 2600
1810
- },
1811
- {
1812
- "epoch": 20.92,
1813
- "learning_rate": 3.3154121863799286e-05,
1814
- "loss": 0.2224,
1815
- "step": 2610
1816
- },
1817
- {
1818
- "epoch": 20.99,
1819
- "eval_accuracy": 0.8139797068771139,
1820
- "eval_f1": 0.8113945895758867,
1821
- "eval_loss": 0.7226550579071045,
1822
- "eval_precision": 0.817507486502582,
1823
- "eval_recall": 0.8139797068771139,
1824
- "eval_runtime": 13.9528,
1825
- "eval_samples_per_second": 127.143,
1826
- "eval_steps_per_second": 4.014,
1827
- "step": 2619
1828
- },
1829
- {
1830
- "epoch": 21.0,
1831
- "learning_rate": 3.285543608124254e-05,
1832
- "loss": 0.2487,
1833
- "step": 2620
1834
- },
1835
- {
1836
- "epoch": 21.08,
1837
- "learning_rate": 3.255675029868578e-05,
1838
- "loss": 0.2287,
1839
- "step": 2630
1840
- },
1841
- {
1842
- "epoch": 21.16,
1843
- "learning_rate": 3.2258064516129034e-05,
1844
- "loss": 0.2602,
1845
- "step": 2640
1846
- },
1847
- {
1848
- "epoch": 21.24,
1849
- "learning_rate": 3.195937873357228e-05,
1850
- "loss": 0.2418,
1851
- "step": 2650
1852
- },
1853
- {
1854
- "epoch": 21.32,
1855
- "learning_rate": 3.1660692951015537e-05,
1856
- "loss": 0.2257,
1857
- "step": 2660
1858
- },
1859
- {
1860
- "epoch": 21.4,
1861
- "learning_rate": 3.136200716845878e-05,
1862
- "loss": 0.245,
1863
- "step": 2670
1864
- },
1865
- {
1866
- "epoch": 21.48,
1867
- "learning_rate": 3.106332138590203e-05,
1868
- "loss": 0.2372,
1869
- "step": 2680
1870
- },
1871
- {
1872
- "epoch": 21.56,
1873
- "learning_rate": 3.0764635603345284e-05,
1874
- "loss": 0.2423,
1875
- "step": 2690
1876
- },
1877
- {
1878
- "epoch": 21.64,
1879
- "learning_rate": 3.046594982078853e-05,
1880
- "loss": 0.2382,
1881
- "step": 2700
1882
- },
1883
- {
1884
- "epoch": 21.72,
1885
- "learning_rate": 3.016726403823178e-05,
1886
- "loss": 0.2612,
1887
- "step": 2710
1888
- },
1889
- {
1890
- "epoch": 21.8,
1891
- "learning_rate": 2.9868578255675032e-05,
1892
- "loss": 0.2672,
1893
- "step": 2720
1894
- },
1895
- {
1896
- "epoch": 21.88,
1897
- "learning_rate": 2.9569892473118284e-05,
1898
- "loss": 0.2709,
1899
- "step": 2730
1900
- },
1901
- {
1902
- "epoch": 21.96,
1903
- "learning_rate": 2.9271206690561532e-05,
1904
- "loss": 0.2359,
1905
- "step": 2740
1906
- },
1907
- {
1908
- "epoch": 22.0,
1909
- "eval_accuracy": 0.8156708004509583,
1910
- "eval_f1": 0.8136226295183577,
1911
- "eval_loss": 0.6940749287605286,
1912
- "eval_precision": 0.8141746574510457,
1913
- "eval_recall": 0.8156708004509583,
1914
- "eval_runtime": 13.9138,
1915
- "eval_samples_per_second": 127.499,
1916
- "eval_steps_per_second": 4.025,
1917
- "step": 2744
1918
- },
1919
- {
1920
- "epoch": 22.04,
1921
- "learning_rate": 2.897252090800478e-05,
1922
- "loss": 0.24,
1923
- "step": 2750
1924
- },
1925
- {
1926
- "epoch": 22.12,
1927
- "learning_rate": 2.8673835125448028e-05,
1928
- "loss": 0.2392,
1929
- "step": 2760
1930
- },
1931
- {
1932
- "epoch": 22.2,
1933
- "learning_rate": 2.8375149342891276e-05,
1934
- "loss": 0.2119,
1935
- "step": 2770
1936
- },
1937
- {
1938
- "epoch": 22.28,
1939
- "learning_rate": 2.807646356033453e-05,
1940
- "loss": 0.2302,
1941
- "step": 2780
1942
- },
1943
- {
1944
- "epoch": 22.36,
1945
- "learning_rate": 2.777777777777778e-05,
1946
- "loss": 0.2252,
1947
- "step": 2790
1948
- },
1949
- {
1950
- "epoch": 22.44,
1951
- "learning_rate": 2.747909199522103e-05,
1952
- "loss": 0.2462,
1953
- "step": 2800
1954
- },
1955
- {
1956
- "epoch": 22.53,
1957
- "learning_rate": 2.718040621266428e-05,
1958
- "loss": 0.2013,
1959
- "step": 2810
1960
- },
1961
- {
1962
- "epoch": 22.61,
1963
- "learning_rate": 2.6881720430107527e-05,
1964
- "loss": 0.2161,
1965
- "step": 2820
1966
- },
1967
- {
1968
- "epoch": 22.69,
1969
- "learning_rate": 2.6583034647550775e-05,
1970
- "loss": 0.234,
1971
- "step": 2830
1972
- },
1973
- {
1974
- "epoch": 22.77,
1975
- "learning_rate": 2.628434886499403e-05,
1976
- "loss": 0.2262,
1977
- "step": 2840
1978
- },
1979
- {
1980
- "epoch": 22.85,
1981
- "learning_rate": 2.5985663082437278e-05,
1982
- "loss": 0.2512,
1983
- "step": 2850
1984
- },
1985
- {
1986
- "epoch": 22.93,
1987
- "learning_rate": 2.5686977299880526e-05,
1988
- "loss": 0.2535,
1989
- "step": 2860
1990
- },
1991
- {
1992
- "epoch": 23.0,
1993
- "eval_accuracy": 0.8156708004509583,
1994
- "eval_f1": 0.81227251289756,
1995
- "eval_loss": 0.7086111307144165,
1996
- "eval_precision": 0.8160279268347481,
1997
- "eval_recall": 0.8156708004509583,
1998
- "eval_runtime": 13.9265,
1999
- "eval_samples_per_second": 127.383,
2000
- "eval_steps_per_second": 4.021,
2001
- "step": 2869
2002
- },
2003
- {
2004
- "epoch": 23.01,
2005
- "learning_rate": 2.5388291517323774e-05,
2006
- "loss": 0.2342,
2007
- "step": 2870
2008
- },
2009
- {
2010
- "epoch": 23.09,
2011
- "learning_rate": 2.5089605734767026e-05,
2012
- "loss": 0.2462,
2013
- "step": 2880
2014
- },
2015
- {
2016
- "epoch": 23.17,
2017
- "learning_rate": 2.4790919952210277e-05,
2018
- "loss": 0.2265,
2019
- "step": 2890
2020
- },
2021
- {
2022
- "epoch": 23.25,
2023
- "learning_rate": 2.4492234169653525e-05,
2024
- "loss": 0.2021,
2025
- "step": 2900
2026
- },
2027
- {
2028
- "epoch": 23.33,
2029
- "learning_rate": 2.4193548387096777e-05,
2030
- "loss": 0.2297,
2031
- "step": 2910
2032
- },
2033
- {
2034
- "epoch": 23.41,
2035
- "learning_rate": 2.3894862604540025e-05,
2036
- "loss": 0.2118,
2037
- "step": 2920
2038
- },
2039
- {
2040
- "epoch": 23.49,
2041
- "learning_rate": 2.3596176821983273e-05,
2042
- "loss": 0.2336,
2043
- "step": 2930
2044
- },
2045
- {
2046
- "epoch": 23.57,
2047
- "learning_rate": 2.3297491039426525e-05,
2048
- "loss": 0.2567,
2049
- "step": 2940
2050
- },
2051
- {
2052
- "epoch": 23.65,
2053
- "learning_rate": 2.2998805256869773e-05,
2054
- "loss": 0.2195,
2055
- "step": 2950
2056
- },
2057
- {
2058
- "epoch": 23.73,
2059
- "learning_rate": 2.270011947431302e-05,
2060
- "loss": 0.2118,
2061
- "step": 2960
2062
- },
2063
- {
2064
- "epoch": 23.81,
2065
- "learning_rate": 2.2401433691756272e-05,
2066
- "loss": 0.2059,
2067
- "step": 2970
2068
- },
2069
- {
2070
- "epoch": 23.89,
2071
- "learning_rate": 2.2102747909199524e-05,
2072
- "loss": 0.2296,
2073
- "step": 2980
2074
- },
2075
- {
2076
- "epoch": 23.97,
2077
- "learning_rate": 2.1804062126642775e-05,
2078
- "loss": 0.2047,
2079
- "step": 2990
2080
- },
2081
- {
2082
- "epoch": 24.0,
2083
- "eval_accuracy": 0.8235625704622322,
2084
- "eval_f1": 0.8207213990294976,
2085
- "eval_loss": 0.718461275100708,
2086
- "eval_precision": 0.8235593618387343,
2087
- "eval_recall": 0.8235625704622322,
2088
- "eval_runtime": 13.9231,
2089
- "eval_samples_per_second": 127.414,
2090
- "eval_steps_per_second": 4.022,
2091
- "step": 2994
2092
- },
2093
- {
2094
- "epoch": 24.05,
2095
- "learning_rate": 2.1505376344086024e-05,
2096
- "loss": 0.2186,
2097
- "step": 3000
2098
- },
2099
- {
2100
- "epoch": 24.13,
2101
- "learning_rate": 2.120669056152927e-05,
2102
- "loss": 0.225,
2103
- "step": 3010
2104
- },
2105
- {
2106
- "epoch": 24.21,
2107
- "learning_rate": 2.0908004778972523e-05,
2108
- "loss": 0.2026,
2109
- "step": 3020
2110
- },
2111
- {
2112
- "epoch": 24.29,
2113
- "learning_rate": 2.060931899641577e-05,
2114
- "loss": 0.2395,
2115
- "step": 3030
2116
- },
2117
- {
2118
- "epoch": 24.37,
2119
- "learning_rate": 2.031063321385902e-05,
2120
- "loss": 0.2248,
2121
- "step": 3040
2122
- },
2123
- {
2124
- "epoch": 24.45,
2125
- "learning_rate": 2.001194743130227e-05,
2126
- "loss": 0.2428,
2127
- "step": 3050
2128
- },
2129
- {
2130
- "epoch": 24.53,
2131
- "learning_rate": 1.9713261648745522e-05,
2132
- "loss": 0.205,
2133
- "step": 3060
2134
- },
2135
- {
2136
- "epoch": 24.61,
2137
- "learning_rate": 1.941457586618877e-05,
2138
- "loss": 0.1945,
2139
- "step": 3070
2140
- },
2141
- {
2142
- "epoch": 24.69,
2143
- "learning_rate": 1.9115890083632022e-05,
2144
- "loss": 0.2383,
2145
- "step": 3080
2146
- },
2147
- {
2148
- "epoch": 24.77,
2149
- "learning_rate": 1.881720430107527e-05,
2150
- "loss": 0.2024,
2151
- "step": 3090
2152
- },
2153
- {
2154
- "epoch": 24.85,
2155
- "learning_rate": 1.8518518518518518e-05,
2156
- "loss": 0.2566,
2157
- "step": 3100
2158
- },
2159
- {
2160
- "epoch": 24.93,
2161
- "learning_rate": 1.821983273596177e-05,
2162
- "loss": 0.2162,
2163
- "step": 3110
2164
- },
2165
- {
2166
- "epoch": 24.99,
2167
- "eval_accuracy": 0.8218714768883878,
2168
- "eval_f1": 0.819374560219833,
2169
- "eval_loss": 0.7135424017906189,
2170
- "eval_precision": 0.8200242274112762,
2171
- "eval_recall": 0.8218714768883878,
2172
- "eval_runtime": 13.9436,
2173
- "eval_samples_per_second": 127.227,
2174
- "eval_steps_per_second": 4.016,
2175
- "step": 3118
2176
- },
2177
- {
2178
- "epoch": 25.01,
2179
- "learning_rate": 1.7921146953405018e-05,
2180
- "loss": 0.1963,
2181
- "step": 3120
2182
- },
2183
- {
2184
- "epoch": 25.09,
2185
- "learning_rate": 1.7622461170848266e-05,
2186
- "loss": 0.2148,
2187
- "step": 3130
2188
- },
2189
- {
2190
- "epoch": 25.17,
2191
- "learning_rate": 1.7323775388291518e-05,
2192
- "loss": 0.1822,
2193
- "step": 3140
2194
- },
2195
- {
2196
- "epoch": 25.25,
2197
- "learning_rate": 1.702508960573477e-05,
2198
- "loss": 0.1892,
2199
- "step": 3150
2200
- },
2201
- {
2202
- "epoch": 25.33,
2203
- "learning_rate": 1.6726403823178017e-05,
2204
- "loss": 0.1957,
2205
- "step": 3160
2206
- },
2207
- {
2208
- "epoch": 25.41,
2209
- "learning_rate": 1.642771804062127e-05,
2210
- "loss": 0.1992,
2211
- "step": 3170
2212
- },
2213
- {
2214
- "epoch": 25.49,
2215
- "learning_rate": 1.6129032258064517e-05,
2216
- "loss": 0.2256,
2217
- "step": 3180
2218
- },
2219
- {
2220
- "epoch": 25.57,
2221
- "learning_rate": 1.5830346475507768e-05,
2222
- "loss": 0.2044,
2223
- "step": 3190
2224
- },
2225
- {
2226
- "epoch": 25.65,
2227
- "learning_rate": 1.5531660692951016e-05,
2228
- "loss": 0.1935,
2229
- "step": 3200
2230
- },
2231
- {
2232
- "epoch": 25.73,
2233
- "learning_rate": 1.5232974910394265e-05,
2234
- "loss": 0.204,
2235
- "step": 3210
2236
- },
2237
- {
2238
- "epoch": 25.81,
2239
- "learning_rate": 1.4934289127837516e-05,
2240
- "loss": 0.1704,
2241
- "step": 3220
2242
- },
2243
- {
2244
- "epoch": 25.89,
2245
- "learning_rate": 1.4635603345280766e-05,
2246
- "loss": 0.2058,
2247
- "step": 3230
2248
- },
2249
- {
2250
- "epoch": 25.97,
2251
- "learning_rate": 1.4336917562724014e-05,
2252
- "loss": 0.2297,
2253
- "step": 3240
2254
- },
2255
- {
2256
- "epoch": 26.0,
2257
- "eval_accuracy": 0.8213077790304397,
2258
- "eval_f1": 0.8179171868511029,
2259
- "eval_loss": 0.7269182801246643,
2260
- "eval_precision": 0.8171612109698507,
2261
- "eval_recall": 0.8213077790304397,
2262
- "eval_runtime": 13.9251,
2263
- "eval_samples_per_second": 127.396,
2264
- "eval_steps_per_second": 4.022,
2265
- "step": 3243
2266
- },
2267
- {
2268
- "epoch": 26.05,
2269
- "learning_rate": 1.4038231780167265e-05,
2270
- "loss": 0.204,
2271
- "step": 3250
2272
- },
2273
- {
2274
- "epoch": 26.13,
2275
- "learning_rate": 1.3739545997610515e-05,
2276
- "loss": 0.239,
2277
- "step": 3260
2278
- },
2279
- {
2280
- "epoch": 26.21,
2281
- "learning_rate": 1.3440860215053763e-05,
2282
- "loss": 0.194,
2283
- "step": 3270
2284
- },
2285
- {
2286
- "epoch": 26.29,
2287
- "learning_rate": 1.3142174432497015e-05,
2288
- "loss": 0.1982,
2289
- "step": 3280
2290
- },
2291
- {
2292
- "epoch": 26.37,
2293
- "learning_rate": 1.2843488649940263e-05,
2294
- "loss": 0.2319,
2295
- "step": 3290
2296
- },
2297
- {
2298
- "epoch": 26.45,
2299
- "learning_rate": 1.2544802867383513e-05,
2300
- "loss": 0.2058,
2301
- "step": 3300
2302
- },
2303
- {
2304
- "epoch": 26.53,
2305
- "learning_rate": 1.2246117084826763e-05,
2306
- "loss": 0.2072,
2307
- "step": 3310
2308
- },
2309
- {
2310
- "epoch": 26.61,
2311
- "learning_rate": 1.1947431302270013e-05,
2312
- "loss": 0.1955,
2313
- "step": 3320
2314
- },
2315
- {
2316
- "epoch": 26.69,
2317
- "learning_rate": 1.1648745519713262e-05,
2318
- "loss": 0.196,
2319
- "step": 3330
2320
- },
2321
- {
2322
- "epoch": 26.77,
2323
- "learning_rate": 1.135005973715651e-05,
2324
- "loss": 0.2042,
2325
- "step": 3340
2326
- },
2327
- {
2328
- "epoch": 26.85,
2329
- "learning_rate": 1.1051373954599762e-05,
2330
- "loss": 0.2072,
2331
- "step": 3350
2332
- },
2333
- {
2334
- "epoch": 26.93,
2335
- "learning_rate": 1.0752688172043012e-05,
2336
- "loss": 0.2048,
2337
- "step": 3360
2338
- },
2339
- {
2340
- "epoch": 27.0,
2341
- "eval_accuracy": 0.814543404735062,
2342
- "eval_f1": 0.8142732078615984,
2343
- "eval_loss": 0.7392024993896484,
2344
- "eval_precision": 0.8155644054213834,
2345
- "eval_recall": 0.814543404735062,
2346
- "eval_runtime": 13.9919,
2347
- "eval_samples_per_second": 126.787,
2348
- "eval_steps_per_second": 4.002,
2349
- "step": 3368
2350
- },
2351
- {
2352
- "epoch": 27.01,
2353
- "learning_rate": 1.0454002389486262e-05,
2354
- "loss": 0.2096,
2355
- "step": 3370
2356
- },
2357
- {
2358
- "epoch": 27.09,
2359
- "learning_rate": 1.015531660692951e-05,
2360
- "loss": 0.1959,
2361
- "step": 3380
2362
- },
2363
- {
2364
- "epoch": 27.17,
2365
- "learning_rate": 9.856630824372761e-06,
2366
- "loss": 0.192,
2367
- "step": 3390
2368
- },
2369
- {
2370
- "epoch": 27.25,
2371
- "learning_rate": 9.557945041816011e-06,
2372
- "loss": 0.2406,
2373
- "step": 3400
2374
- },
2375
- {
2376
- "epoch": 27.33,
2377
- "learning_rate": 9.259259259259259e-06,
2378
- "loss": 0.2225,
2379
- "step": 3410
2380
- },
2381
- {
2382
- "epoch": 27.41,
2383
- "learning_rate": 8.960573476702509e-06,
2384
- "loss": 0.206,
2385
- "step": 3420
2386
- },
2387
- {
2388
- "epoch": 27.49,
2389
- "learning_rate": 8.661887694145759e-06,
2390
- "loss": 0.1993,
2391
- "step": 3430
2392
- },
2393
- {
2394
- "epoch": 27.58,
2395
- "learning_rate": 8.363201911589009e-06,
2396
- "loss": 0.1926,
2397
- "step": 3440
2398
- },
2399
- {
2400
- "epoch": 27.66,
2401
- "learning_rate": 8.064516129032258e-06,
2402
- "loss": 0.1757,
2403
- "step": 3450
2404
- },
2405
- {
2406
- "epoch": 27.74,
2407
- "learning_rate": 7.765830346475508e-06,
2408
- "loss": 0.1913,
2409
- "step": 3460
2410
- },
2411
- {
2412
- "epoch": 27.82,
2413
- "learning_rate": 7.467144563918758e-06,
2414
- "loss": 0.196,
2415
- "step": 3470
2416
- },
2417
- {
2418
- "epoch": 27.9,
2419
- "learning_rate": 7.168458781362007e-06,
2420
- "loss": 0.1966,
2421
- "step": 3480
2422
- },
2423
- {
2424
- "epoch": 27.98,
2425
- "learning_rate": 6.869772998805258e-06,
2426
- "loss": 0.2156,
2427
- "step": 3490
2428
- },
2429
- {
2430
- "epoch": 28.0,
2431
- "eval_accuracy": 0.8207440811724915,
2432
- "eval_f1": 0.8173955936562302,
2433
- "eval_loss": 0.7452513575553894,
2434
- "eval_precision": 0.818232638453996,
2435
- "eval_recall": 0.8207440811724915,
2436
- "eval_runtime": 13.9402,
2437
- "eval_samples_per_second": 127.258,
2438
- "eval_steps_per_second": 4.017,
2439
- "step": 3493
2440
- },
2441
- {
2442
- "epoch": 28.06,
2443
- "learning_rate": 6.5710872162485075e-06,
2444
- "loss": 0.218,
2445
- "step": 3500
2446
- },
2447
- {
2448
- "epoch": 28.14,
2449
- "learning_rate": 6.2724014336917564e-06,
2450
- "loss": 0.1878,
2451
- "step": 3510
2452
- },
2453
- {
2454
- "epoch": 28.22,
2455
- "learning_rate": 5.973715651135006e-06,
2456
- "loss": 0.2132,
2457
- "step": 3520
2458
- },
2459
- {
2460
- "epoch": 28.3,
2461
- "learning_rate": 5.675029868578255e-06,
2462
- "loss": 0.1802,
2463
- "step": 3530
2464
- },
2465
- {
2466
- "epoch": 28.38,
2467
- "learning_rate": 5.376344086021506e-06,
2468
- "loss": 0.1857,
2469
- "step": 3540
2470
- },
2471
- {
2472
- "epoch": 28.46,
2473
- "learning_rate": 5.077658303464755e-06,
2474
- "loss": 0.189,
2475
- "step": 3550
2476
- },
2477
- {
2478
- "epoch": 28.54,
2479
- "learning_rate": 4.7789725209080055e-06,
2480
- "loss": 0.1699,
2481
- "step": 3560
2482
- },
2483
- {
2484
- "epoch": 28.62,
2485
- "learning_rate": 4.4802867383512545e-06,
2486
- "loss": 0.2193,
2487
- "step": 3570
2488
- },
2489
- {
2490
- "epoch": 28.7,
2491
- "learning_rate": 4.181600955794504e-06,
2492
- "loss": 0.1855,
2493
- "step": 3580
2494
- },
2495
- {
2496
- "epoch": 28.78,
2497
- "learning_rate": 3.882915173237754e-06,
2498
- "loss": 0.1957,
2499
- "step": 3590
2500
- },
2501
- {
2502
- "epoch": 28.86,
2503
- "learning_rate": 3.5842293906810035e-06,
2504
- "loss": 0.2059,
2505
- "step": 3600
2506
- },
2507
- {
2508
- "epoch": 28.94,
2509
- "learning_rate": 3.2855436081242537e-06,
2510
- "loss": 0.1785,
2511
- "step": 3610
2512
- },
2513
- {
2514
- "epoch": 28.99,
2515
- "eval_accuracy": 0.8167981961668546,
2516
- "eval_f1": 0.8144560702043755,
2517
- "eval_loss": 0.749666690826416,
2518
- "eval_precision": 0.8157480462760235,
2519
- "eval_recall": 0.8167981961668546,
2520
- "eval_runtime": 13.8966,
2521
- "eval_samples_per_second": 127.657,
2522
- "eval_steps_per_second": 4.03,
2523
- "step": 3617
2524
- },
2525
- {
2526
- "epoch": 29.02,
2527
- "learning_rate": 2.986857825567503e-06,
2528
- "loss": 0.2005,
2529
- "step": 3620
2530
- },
2531
- {
2532
- "epoch": 29.1,
2533
- "learning_rate": 2.688172043010753e-06,
2534
- "loss": 0.1767,
2535
- "step": 3630
2536
- },
2537
- {
2538
- "epoch": 29.18,
2539
- "learning_rate": 2.3894862604540028e-06,
2540
- "loss": 0.1836,
2541
- "step": 3640
2542
- },
2543
- {
2544
- "epoch": 29.26,
2545
- "learning_rate": 2.090800477897252e-06,
2546
- "loss": 0.1772,
2547
- "step": 3650
2548
- },
2549
- {
2550
- "epoch": 29.34,
2551
- "learning_rate": 1.7921146953405017e-06,
2552
- "loss": 0.1851,
2553
- "step": 3660
2554
- },
2555
- {
2556
- "epoch": 29.42,
2557
- "learning_rate": 1.4934289127837516e-06,
2558
- "loss": 0.1944,
2559
- "step": 3670
2560
- },
2561
- {
2562
- "epoch": 29.5,
2563
- "learning_rate": 1.1947431302270014e-06,
2564
- "loss": 0.1762,
2565
- "step": 3680
2566
- },
2567
- {
2568
- "epoch": 29.58,
2569
- "learning_rate": 8.960573476702509e-07,
2570
- "loss": 0.2001,
2571
- "step": 3690
2572
- },
2573
- {
2574
- "epoch": 29.66,
2575
- "learning_rate": 5.973715651135007e-07,
2576
- "loss": 0.1681,
2577
- "step": 3700
2578
- },
2579
- {
2580
- "epoch": 29.74,
2581
- "learning_rate": 2.9868578255675034e-07,
2582
- "loss": 0.1826,
2583
- "step": 3710
2584
- },
2585
- {
2586
- "epoch": 29.82,
2587
- "learning_rate": 0.0,
2588
- "loss": 0.1785,
2589
- "step": 3720
2590
- },
2591
- {
2592
- "epoch": 29.82,
2593
- "eval_accuracy": 0.8201803833145435,
2594
- "eval_f1": 0.8172820930459103,
2595
- "eval_loss": 0.7429009079933167,
2596
- "eval_precision": 0.8190214518875245,
2597
- "eval_recall": 0.8201803833145435,
2598
- "eval_runtime": 13.9063,
2599
- "eval_samples_per_second": 127.568,
2600
- "eval_steps_per_second": 4.027,
2601
- "step": 3720
2602
- },
2603
- {
2604
- "epoch": 29.82,
2605
- "step": 3720,
2606
- "total_flos": 1.3134189871289722e+20,
2607
- "train_loss": 0.4506285851360649,
2608
- "train_runtime": 7123.617,
2609
- "train_samples_per_second": 67.221,
2610
- "train_steps_per_second": 0.522
2611
  }
2612
  ],
2613
  "logging_steps": 10,
2614
- "max_steps": 3720,
2615
  "num_input_tokens_seen": 0,
2616
  "num_train_epochs": 30,
2617
  "save_steps": 500,
2618
- "total_flos": 1.3134189871289722e+20,
2619
- "train_batch_size": 32,
2620
  "trial_name": null,
2621
  "trial_params": null
2622
  }
 
1
  {
2
+ "best_metric": 0.8382187147688839,
3
+ "best_model_checkpoint": "vit-large-patch32-224-in21k-finetuned-galaxy10-decals/checkpoint-343",
4
+ "epoch": 29.76,
5
  "eval_steps": 500,
6
+ "global_step": 930,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.32,
13
+ "learning_rate": 1.0752688172043012e-05,
14
+ "loss": 2.3005,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.64,
19
+ "learning_rate": 2.1505376344086024e-05,
20
+ "loss": 2.1598,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.96,
25
+ "learning_rate": 3.2258064516129034e-05,
26
+ "loss": 1.8923,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.99,
31
+ "eval_accuracy": 0.4599774520856821,
32
+ "eval_f1": 0.36818539105134135,
33
+ "eval_loss": 1.6724576950073242,
34
+ "eval_precision": 0.5537412962661489,
35
+ "eval_recall": 0.4599774520856821,
36
+ "eval_runtime": 13.908,
37
+ "eval_samples_per_second": 127.553,
38
+ "eval_steps_per_second": 1.007,
39
+ "step": 31
40
+ },
41
+ {
42
+ "epoch": 1.28,
43
+ "learning_rate": 4.301075268817205e-05,
44
+ "loss": 1.6252,
45
  "step": 40
46
  },
47
  {
48
+ "epoch": 1.6,
49
+ "learning_rate": 5.3763440860215054e-05,
50
+ "loss": 1.374,
51
  "step": 50
52
  },
53
  {
54
+ "epoch": 1.92,
55
+ "learning_rate": 6.451612903225807e-05,
56
+ "loss": 1.1787,
57
  "step": 60
58
  },
59
  {
60
+ "epoch": 1.98,
61
+ "eval_accuracy": 0.733934611048478,
62
+ "eval_f1": 0.7095413298923192,
63
+ "eval_loss": 0.9948903918266296,
64
+ "eval_precision": 0.7513280698764072,
65
+ "eval_recall": 0.733934611048478,
66
+ "eval_runtime": 13.6836,
67
+ "eval_samples_per_second": 129.644,
68
+ "eval_steps_per_second": 1.023,
69
+ "step": 62
70
+ },
71
+ {
72
+ "epoch": 2.24,
73
+ "learning_rate": 7.526881720430108e-05,
74
+ "loss": 1.0564,
75
  "step": 70
76
  },
77
  {
78
+ "epoch": 2.56,
79
+ "learning_rate": 8.60215053763441e-05,
80
+ "loss": 0.9695,
81
  "step": 80
82
  },
83
  {
84
+ "epoch": 2.88,
85
+ "learning_rate": 9.677419354838711e-05,
86
+ "loss": 0.9165,
87
  "step": 90
88
  },
89
  {
90
+ "epoch": 2.98,
91
+ "eval_accuracy": 0.770011273957159,
92
+ "eval_f1": 0.7540108447224593,
93
+ "eval_loss": 0.7946078181266785,
94
+ "eval_precision": 0.7694119599791991,
95
+ "eval_recall": 0.770011273957159,
96
+ "eval_runtime": 13.9239,
97
+ "eval_samples_per_second": 127.407,
98
+ "eval_steps_per_second": 1.005,
99
+ "step": 93
100
+ },
101
+ {
102
+ "epoch": 3.2,
103
+ "learning_rate": 9.916367980884111e-05,
104
+ "loss": 0.8982,
105
  "step": 100
106
  },
107
  {
108
+ "epoch": 3.52,
109
+ "learning_rate": 9.79689366786141e-05,
110
+ "loss": 0.8313,
111
  "step": 110
112
  },
113
  {
114
+ "epoch": 3.84,
115
+ "learning_rate": 9.677419354838711e-05,
116
+ "loss": 0.802,
117
  "step": 120
118
  },
119
  {
120
+ "epoch": 4.0,
121
+ "eval_accuracy": 0.7948139797068771,
122
+ "eval_f1": 0.7843494012602573,
123
+ "eval_loss": 0.6747179627418518,
124
+ "eval_precision": 0.795387126369686,
125
+ "eval_recall": 0.7948139797068771,
126
+ "eval_runtime": 14.1264,
127
+ "eval_samples_per_second": 125.581,
128
+ "eval_steps_per_second": 0.991,
129
+ "step": 125
130
+ },
131
+ {
132
+ "epoch": 4.16,
133
+ "learning_rate": 9.55794504181601e-05,
134
+ "loss": 0.7681,
135
  "step": 130
136
  },
137
  {
138
+ "epoch": 4.48,
139
+ "learning_rate": 9.438470728793309e-05,
140
+ "loss": 0.7362,
141
  "step": 140
142
  },
143
  {
144
+ "epoch": 4.8,
145
+ "learning_rate": 9.31899641577061e-05,
146
+ "loss": 0.7074,
147
  "step": 150
148
  },
149
  {
150
+ "epoch": 4.99,
151
+ "eval_accuracy": 0.8117249154453213,
152
+ "eval_f1": 0.8115345249432343,
153
+ "eval_loss": 0.6196042895317078,
154
+ "eval_precision": 0.8139405296667617,
155
+ "eval_recall": 0.8117249154453213,
156
+ "eval_runtime": 13.922,
157
+ "eval_samples_per_second": 127.424,
158
+ "eval_steps_per_second": 1.006,
159
+ "step": 156
160
+ },
161
+ {
162
+ "epoch": 5.12,
163
+ "learning_rate": 9.199522102747909e-05,
164
+ "loss": 0.6855,
165
  "step": 160
166
  },
167
  {
168
+ "epoch": 5.44,
169
+ "learning_rate": 9.080047789725208e-05,
170
+ "loss": 0.6881,
171
  "step": 170
172
  },
173
  {
174
+ "epoch": 5.76,
175
+ "learning_rate": 8.960573476702509e-05,
176
+ "loss": 0.6424,
177
  "step": 180
178
  },
179
  {
180
+ "epoch": 5.98,
181
+ "eval_accuracy": 0.8021420518602029,
182
+ "eval_f1": 0.7961437261818466,
183
+ "eval_loss": 0.6204531192779541,
184
+ "eval_precision": 0.8074926673896575,
185
+ "eval_recall": 0.8021420518602029,
186
+ "eval_runtime": 13.7635,
187
+ "eval_samples_per_second": 128.891,
188
+ "eval_steps_per_second": 1.017,
189
+ "step": 187
190
+ },
191
+ {
192
+ "epoch": 6.08,
193
+ "learning_rate": 8.84109916367981e-05,
194
+ "loss": 0.6484,
195
  "step": 190
196
  },
197
  {
198
+ "epoch": 6.4,
199
+ "learning_rate": 8.72162485065711e-05,
200
+ "loss": 0.624,
201
  "step": 200
202
  },
203
  {
204
+ "epoch": 6.72,
205
+ "learning_rate": 8.60215053763441e-05,
206
+ "loss": 0.6309,
207
  "step": 210
208
  },
209
  {
210
+ "epoch": 6.98,
211
+ "eval_accuracy": 0.8117249154453213,
212
+ "eval_f1": 0.8127171755353659,
213
+ "eval_loss": 0.5759598016738892,
214
+ "eval_precision": 0.8230864221733758,
215
+ "eval_recall": 0.8117249154453213,
216
+ "eval_runtime": 13.9932,
217
+ "eval_samples_per_second": 126.776,
218
+ "eval_steps_per_second": 1.0,
219
+ "step": 218
220
+ },
221
+ {
222
+ "epoch": 7.04,
223
+ "learning_rate": 8.482676224611709e-05,
224
+ "loss": 0.6485,
225
  "step": 220
226
  },
227
  {
228
+ "epoch": 7.36,
229
+ "learning_rate": 8.363201911589009e-05,
230
+ "loss": 0.622,
231
  "step": 230
232
  },
233
  {
234
+ "epoch": 7.68,
235
+ "learning_rate": 8.243727598566309e-05,
236
+ "loss": 0.6082,
237
  "step": 240
238
  },
239
  {
240
+ "epoch": 8.0,
241
+ "learning_rate": 8.124253285543608e-05,
242
+ "loss": 0.5682,
243
+ "step": 250
 
 
 
 
 
 
244
  },
245
  {
246
+ "epoch": 8.0,
247
+ "eval_accuracy": 0.8151071025930101,
248
+ "eval_f1": 0.815748718679729,
249
+ "eval_loss": 0.5748494863510132,
250
+ "eval_precision": 0.819638540013667,
251
+ "eval_recall": 0.8151071025930101,
252
+ "eval_runtime": 13.7341,
253
+ "eval_samples_per_second": 129.168,
254
+ "eval_steps_per_second": 1.019,
255
  "step": 250
256
  },
257
  {
258
+ "epoch": 8.32,
259
+ "learning_rate": 8.004778972520908e-05,
260
+ "loss": 0.5578,
261
  "step": 260
262
  },
263
  {
264
+ "epoch": 8.64,
265
+ "learning_rate": 7.885304659498209e-05,
266
+ "loss": 0.5808,
267
  "step": 270
268
  },
269
  {
270
+ "epoch": 8.96,
271
+ "learning_rate": 7.765830346475508e-05,
272
+ "loss": 0.5981,
273
  "step": 280
274
  },
275
  {
276
+ "epoch": 8.99,
277
+ "eval_accuracy": 0.8213077790304397,
278
+ "eval_f1": 0.8157691286644828,
279
+ "eval_loss": 0.5704066157341003,
280
+ "eval_precision": 0.8268776889509669,
281
+ "eval_recall": 0.8213077790304397,
282
+ "eval_runtime": 14.3553,
283
+ "eval_samples_per_second": 123.578,
284
+ "eval_steps_per_second": 0.975,
285
+ "step": 281
286
+ },
287
+ {
288
+ "epoch": 9.28,
289
+ "learning_rate": 7.646356033452809e-05,
290
+ "loss": 0.5581,
291
  "step": 290
292
  },
293
  {
294
+ "epoch": 9.6,
295
+ "learning_rate": 7.526881720430108e-05,
296
+ "loss": 0.5585,
297
  "step": 300
298
  },
299
  {
300
+ "epoch": 9.92,
301
+ "learning_rate": 7.407407407407407e-05,
302
+ "loss": 0.547,
303
  "step": 310
304
  },
305
  {
306
+ "epoch": 9.98,
307
+ "eval_accuracy": 0.8376550169109357,
308
+ "eval_f1": 0.8344504939462281,
309
+ "eval_loss": 0.5282090902328491,
310
+ "eval_precision": 0.8352471416146373,
311
+ "eval_recall": 0.8376550169109357,
312
+ "eval_runtime": 13.9665,
313
+ "eval_samples_per_second": 127.018,
314
+ "eval_steps_per_second": 1.002,
315
+ "step": 312
316
+ },
317
+ {
318
+ "epoch": 10.24,
319
+ "learning_rate": 7.287933094384708e-05,
320
+ "loss": 0.5197,
321
  "step": 320
322
  },
323
  {
324
+ "epoch": 10.56,
325
+ "learning_rate": 7.168458781362007e-05,
326
+ "loss": 0.5413,
327
  "step": 330
328
  },
329
  {
330
+ "epoch": 10.88,
331
+ "learning_rate": 7.048984468339306e-05,
332
+ "loss": 0.5067,
333
  "step": 340
334
  },
335
  {
336
+ "epoch": 10.98,
337
+ "eval_accuracy": 0.8382187147688839,
338
+ "eval_f1": 0.8355644052274377,
339
+ "eval_loss": 0.5280851721763611,
340
+ "eval_precision": 0.8371941971933433,
341
+ "eval_recall": 0.8382187147688839,
342
+ "eval_runtime": 13.8989,
343
+ "eval_samples_per_second": 127.636,
344
+ "eval_steps_per_second": 1.007,
345
+ "step": 343
346
+ },
347
+ {
348
+ "epoch": 11.2,
349
+ "learning_rate": 6.929510155316607e-05,
350
+ "loss": 0.4844,
351
  "step": 350
352
  },
353
  {
354
+ "epoch": 11.52,
355
+ "learning_rate": 6.810035842293908e-05,
356
+ "loss": 0.4829,
357
  "step": 360
358
  },
359
  {
360
+ "epoch": 11.84,
361
+ "learning_rate": 6.690561529271207e-05,
362
+ "loss": 0.5066,
363
  "step": 370
364
  },
365
  {
366
+ "epoch": 12.0,
367
+ "eval_accuracy": 0.8246899661781285,
368
+ "eval_f1": 0.82193071428432,
369
+ "eval_loss": 0.5440835952758789,
370
+ "eval_precision": 0.8285928304505648,
371
+ "eval_recall": 0.8246899661781285,
372
+ "eval_runtime": 14.0682,
373
+ "eval_samples_per_second": 126.1,
374
+ "eval_steps_per_second": 0.995,
375
+ "step": 375
376
+ },
377
+ {
378
+ "epoch": 12.16,
379
+ "learning_rate": 6.571087216248507e-05,
380
+ "loss": 0.4778,
381
  "step": 380
382
  },
383
  {
384
+ "epoch": 12.48,
385
+ "learning_rate": 6.451612903225807e-05,
386
+ "loss": 0.4653,
387
  "step": 390
388
  },
389
  {
390
+ "epoch": 12.8,
391
+ "learning_rate": 6.332138590203107e-05,
392
+ "loss": 0.4919,
393
  "step": 400
394
  },
395
  {
396
+ "epoch": 12.99,
397
+ "eval_accuracy": 0.8156708004509583,
398
+ "eval_f1": 0.8155334582595456,
399
+ "eval_loss": 0.5579707026481628,
400
+ "eval_precision": 0.8235518646692552,
401
+ "eval_recall": 0.8156708004509583,
402
+ "eval_runtime": 13.6039,
403
+ "eval_samples_per_second": 130.404,
404
+ "eval_steps_per_second": 1.029,
405
+ "step": 406
406
+ },
407
+ {
408
+ "epoch": 13.12,
409
+ "learning_rate": 6.212664277180407e-05,
410
+ "loss": 0.4659,
411
  "step": 410
412
  },
413
  {
414
+ "epoch": 13.44,
415
+ "learning_rate": 6.093189964157706e-05,
416
+ "loss": 0.4625,
417
  "step": 420
418
  },
419
  {
420
+ "epoch": 13.76,
421
+ "learning_rate": 5.9737156511350064e-05,
422
+ "loss": 0.4508,
423
  "step": 430
424
  },
425
  {
426
+ "epoch": 13.98,
427
+ "eval_accuracy": 0.8303269447576099,
428
+ "eval_f1": 0.8279428714077549,
429
+ "eval_loss": 0.526913046836853,
430
+ "eval_precision": 0.8330911890059709,
431
+ "eval_recall": 0.8303269447576099,
432
+ "eval_runtime": 14.1136,
433
+ "eval_samples_per_second": 125.695,
434
+ "eval_steps_per_second": 0.992,
435
+ "step": 437
436
+ },
437
+ {
438
+ "epoch": 14.08,
439
+ "learning_rate": 5.8542413381123063e-05,
440
+ "loss": 0.4588,
441
  "step": 440
442
  },
443
  {
444
+ "epoch": 14.4,
445
+ "learning_rate": 5.7347670250896056e-05,
446
+ "loss": 0.4312,
447
  "step": 450
448
  },
449
  {
450
+ "epoch": 14.72,
451
+ "learning_rate": 5.615292712066906e-05,
452
+ "loss": 0.4415,
453
  "step": 460
454
  },
455
  {
456
+ "epoch": 14.98,
457
+ "eval_accuracy": 0.818489289740699,
458
+ "eval_f1": 0.8203104704622532,
459
+ "eval_loss": 0.5398772954940796,
460
+ "eval_precision": 0.824911350006833,
461
+ "eval_recall": 0.818489289740699,
462
+ "eval_runtime": 14.3225,
463
+ "eval_samples_per_second": 123.861,
464
+ "eval_steps_per_second": 0.977,
465
+ "step": 468
466
+ },
467
+ {
468
+ "epoch": 15.04,
469
+ "learning_rate": 5.495818399044206e-05,
470
+ "loss": 0.4234,
471
  "step": 470
472
  },
473
  {
474
+ "epoch": 15.36,
475
+ "learning_rate": 5.3763440860215054e-05,
476
+ "loss": 0.4074,
477
  "step": 480
478
  },
479
  {
480
+ "epoch": 15.68,
481
+ "learning_rate": 5.256869772998806e-05,
482
+ "loss": 0.409,
483
  "step": 490
484
  },
485
  {
486
+ "epoch": 16.0,
487
+ "learning_rate": 5.137395459976105e-05,
488
+ "loss": 0.4178,
489
+ "step": 500
490
+ },
491
+ {
492
+ "epoch": 16.0,
493
+ "eval_accuracy": 0.8320180383314544,
494
+ "eval_f1": 0.8300827004836914,
495
+ "eval_loss": 0.5229005813598633,
496
+ "eval_precision": 0.8357823255635188,
497
+ "eval_recall": 0.8320180383314544,
498
+ "eval_runtime": 14.0799,
499
+ "eval_samples_per_second": 125.995,
500
+ "eval_steps_per_second": 0.994,
501
  "step": 500
502
  },
503
  {
504
+ "epoch": 16.32,
505
+ "learning_rate": 5.017921146953405e-05,
506
+ "loss": 0.3928,
507
  "step": 510
508
  },
509
  {
510
+ "epoch": 16.64,
511
+ "learning_rate": 4.898446833930705e-05,
512
+ "loss": 0.4174,
513
  "step": 520
514
  },
515
  {
516
+ "epoch": 16.96,
517
+ "learning_rate": 4.778972520908005e-05,
518
+ "loss": 0.366,
519
  "step": 530
520
  },
521
  {
522
+ "epoch": 16.99,
523
+ "eval_accuracy": 0.8275084554678692,
524
+ "eval_f1": 0.8240548194511355,
525
+ "eval_loss": 0.5427183508872986,
526
+ "eval_precision": 0.828135455879007,
527
+ "eval_recall": 0.8275084554678692,
528
+ "eval_runtime": 13.7519,
529
+ "eval_samples_per_second": 129.0,
530
+ "eval_steps_per_second": 1.018,
531
+ "step": 531
532
+ },
533
+ {
534
+ "epoch": 17.28,
535
+ "learning_rate": 4.659498207885305e-05,
536
+ "loss": 0.3746,
537
  "step": 540
538
  },
539
  {
540
+ "epoch": 17.6,
541
+ "learning_rate": 4.540023894862604e-05,
542
+ "loss": 0.3956,
543
  "step": 550
544
  },
545
  {
546
+ "epoch": 17.92,
547
+ "learning_rate": 4.420549581839905e-05,
548
+ "loss": 0.3706,
549
  "step": 560
550
  },
551
  {
552
+ "epoch": 17.98,
553
+ "eval_accuracy": 0.8241262683201804,
554
+ "eval_f1": 0.8229564605373075,
555
+ "eval_loss": 0.5389071702957153,
556
+ "eval_precision": 0.8242056890621828,
557
+ "eval_recall": 0.8241262683201804,
558
+ "eval_runtime": 13.7189,
559
+ "eval_samples_per_second": 129.31,
560
+ "eval_steps_per_second": 1.02,
561
+ "step": 562
562
+ },
563
+ {
564
+ "epoch": 18.24,
565
+ "learning_rate": 4.301075268817205e-05,
566
+ "loss": 0.3715,
567
  "step": 570
568
  },
569
  {
570
+ "epoch": 18.56,
571
+ "learning_rate": 4.1816009557945046e-05,
572
+ "loss": 0.3419,
573
  "step": 580
574
  },
575
  {
576
+ "epoch": 18.88,
577
+ "learning_rate": 4.062126642771804e-05,
578
+ "loss": 0.3609,
579
  "step": 590
580
  },
581
  {
582
+ "epoch": 18.98,
583
+ "eval_accuracy": 0.8246899661781285,
584
+ "eval_f1": 0.8238722113026624,
585
+ "eval_loss": 0.5573083758354187,
586
+ "eval_precision": 0.8261543816529526,
587
+ "eval_recall": 0.8246899661781285,
588
+ "eval_runtime": 13.8749,
589
+ "eval_samples_per_second": 127.857,
590
+ "eval_steps_per_second": 1.009,
591
+ "step": 593
592
+ },
593
+ {
594
+ "epoch": 19.2,
595
+ "learning_rate": 3.9426523297491045e-05,
596
+ "loss": 0.3694,
597
  "step": 600
598
  },
599
  {
600
+ "epoch": 19.52,
601
+ "learning_rate": 3.8231780167264044e-05,
602
+ "loss": 0.3574,
603
  "step": 610
604
  },
605
  {
606
+ "epoch": 19.84,
607
+ "learning_rate": 3.7037037037037037e-05,
608
+ "loss": 0.3443,
609
  "step": 620
610
  },
611
  {
612
+ "epoch": 20.0,
613
+ "eval_accuracy": 0.8320180383314544,
614
+ "eval_f1": 0.8302429910863938,
615
+ "eval_loss": 0.5605015754699707,
616
+ "eval_precision": 0.8325444592064266,
617
+ "eval_recall": 0.8320180383314544,
618
+ "eval_runtime": 13.8905,
619
+ "eval_samples_per_second": 127.713,
620
+ "eval_steps_per_second": 1.008,
621
+ "step": 625
622
+ },
623
+ {
624
+ "epoch": 20.16,
625
+ "learning_rate": 3.5842293906810036e-05,
626
+ "loss": 0.3302,
627
  "step": 630
628
  },
629
  {
630
+ "epoch": 20.48,
631
+ "learning_rate": 3.4647550776583035e-05,
632
+ "loss": 0.3309,
633
  "step": 640
634
  },
635
  {
636
+ "epoch": 20.8,
637
+ "learning_rate": 3.3452807646356034e-05,
638
+ "loss": 0.3214,
639
  "step": 650
640
  },
641
  {
642
+ "epoch": 20.99,
643
+ "eval_accuracy": 0.8280721533258174,
644
+ "eval_f1": 0.8254240015965836,
645
+ "eval_loss": 0.5667029619216919,
646
+ "eval_precision": 0.8295428318312357,
647
+ "eval_recall": 0.8280721533258174,
648
+ "eval_runtime": 14.1177,
649
+ "eval_samples_per_second": 125.658,
650
+ "eval_steps_per_second": 0.992,
651
+ "step": 656
652
+ },
653
+ {
654
+ "epoch": 21.12,
655
+ "learning_rate": 3.2258064516129034e-05,
656
+ "loss": 0.3121,
657
  "step": 660
658
  },
659
  {
660
+ "epoch": 21.44,
661
+ "learning_rate": 3.106332138590203e-05,
662
+ "loss": 0.3157,
663
  "step": 670
664
  },
665
  {
666
+ "epoch": 21.76,
667
+ "learning_rate": 2.9868578255675032e-05,
668
+ "loss": 0.3262,
669
  "step": 680
670
  },
671
  {
672
+ "epoch": 21.98,
673
+ "eval_accuracy": 0.8235625704622322,
674
+ "eval_f1": 0.8214055556480262,
675
+ "eval_loss": 0.579714298248291,
676
+ "eval_precision": 0.8237105353518115,
677
+ "eval_recall": 0.8235625704622322,
678
+ "eval_runtime": 13.8715,
679
+ "eval_samples_per_second": 127.888,
680
+ "eval_steps_per_second": 1.009,
681
+ "step": 687
682
+ },
683
+ {
684
+ "epoch": 22.08,
685
+ "learning_rate": 2.8673835125448028e-05,
686
+ "loss": 0.3185,
687
  "step": 690
688
  },
689
  {
690
+ "epoch": 22.4,
691
+ "learning_rate": 2.747909199522103e-05,
692
+ "loss": 0.3017,
693
  "step": 700
694
  },
695
  {
696
+ "epoch": 22.72,
697
+ "learning_rate": 2.628434886499403e-05,
698
+ "loss": 0.299,
699
  "step": 710
700
  },
701
  {
702
+ "epoch": 22.98,
703
+ "eval_accuracy": 0.8201803833145435,
704
+ "eval_f1": 0.819536745894237,
705
+ "eval_loss": 0.5938186645507812,
706
+ "eval_precision": 0.8225417256475036,
707
+ "eval_recall": 0.8201803833145435,
708
+ "eval_runtime": 14.0359,
709
+ "eval_samples_per_second": 126.39,
710
+ "eval_steps_per_second": 0.997,
711
+ "step": 718
712
+ },
713
+ {
714
+ "epoch": 23.04,
715
+ "learning_rate": 2.5089605734767026e-05,
716
+ "loss": 0.3023,
717
  "step": 720
718
  },
719
  {
720
+ "epoch": 23.36,
721
+ "learning_rate": 2.3894862604540025e-05,
722
+ "loss": 0.2978,
723
  "step": 730
724
  },
725
  {
726
+ "epoch": 23.68,
727
+ "learning_rate": 2.270011947431302e-05,
728
+ "loss": 0.3062,
729
  "step": 740
730
  },
731
  {
732
+ "epoch": 24.0,
733
+ "learning_rate": 2.1505376344086024e-05,
734
+ "loss": 0.2792,
735
+ "step": 750
 
 
 
 
 
 
736
  },
737
  {
738
+ "epoch": 24.0,
739
+ "eval_accuracy": 0.8275084554678692,
740
+ "eval_f1": 0.8251168354340412,
741
+ "eval_loss": 0.5909499526023865,
742
+ "eval_precision": 0.8258356043850564,
743
+ "eval_recall": 0.8275084554678692,
744
+ "eval_runtime": 13.9981,
745
+ "eval_samples_per_second": 126.731,
746
+ "eval_steps_per_second": 1.0,
747
  "step": 750
748
  },
749
  {
750
+ "epoch": 24.32,
751
+ "learning_rate": 2.031063321385902e-05,
752
+ "loss": 0.3042,
753
  "step": 760
754
  },
755
  {
756
+ "epoch": 24.64,
757
+ "learning_rate": 1.9115890083632022e-05,
758
+ "loss": 0.2904,
759
  "step": 770
760
  },
761
  {
762
+ "epoch": 24.96,
763
+ "learning_rate": 1.7921146953405018e-05,
764
+ "loss": 0.2969,
765
  "step": 780
766
  },
767
  {
768
+ "epoch": 24.99,
769
+ "eval_accuracy": 0.830890642615558,
770
+ "eval_f1": 0.8306159214761004,
771
+ "eval_loss": 0.5657971501350403,
772
+ "eval_precision": 0.8319469060175432,
773
+ "eval_recall": 0.830890642615558,
774
+ "eval_runtime": 14.1611,
775
+ "eval_samples_per_second": 125.273,
776
+ "eval_steps_per_second": 0.989,
777
+ "step": 781
778
+ },
779
+ {
780
+ "epoch": 25.28,
781
+ "learning_rate": 1.6726403823178017e-05,
782
+ "loss": 0.2692,
783
  "step": 790
784
  },
785
  {
786
+ "epoch": 25.6,
787
+ "learning_rate": 1.5531660692951016e-05,
788
+ "loss": 0.2793,
789
  "step": 800
790
  },
791
  {
792
+ "epoch": 25.92,
793
+ "learning_rate": 1.4336917562724014e-05,
794
+ "loss": 0.2559,
795
  "step": 810
796
  },
797
  {
798
+ "epoch": 25.98,
799
+ "eval_accuracy": 0.830890642615558,
800
+ "eval_f1": 0.8293693314290957,
801
+ "eval_loss": 0.5936293601989746,
802
+ "eval_precision": 0.8294137513471009,
803
+ "eval_recall": 0.830890642615558,
804
+ "eval_runtime": 14.2541,
805
+ "eval_samples_per_second": 124.456,
806
+ "eval_steps_per_second": 0.982,
807
+ "step": 812
808
+ },
809
+ {
810
+ "epoch": 26.24,
811
+ "learning_rate": 1.3142174432497015e-05,
812
+ "loss": 0.2791,
813
  "step": 820
814
  },
815
  {
816
+ "epoch": 26.56,
817
+ "learning_rate": 1.1947431302270013e-05,
818
+ "loss": 0.2716,
819
  "step": 830
820
  },
821
  {
822
+ "epoch": 26.88,
823
+ "learning_rate": 1.0752688172043012e-05,
824
+ "loss": 0.2756,
825
  "step": 840
826
  },
827
  {
828
+ "epoch": 26.98,
829
+ "eval_accuracy": 0.8291995490417137,
830
+ "eval_f1": 0.8286998803671861,
831
+ "eval_loss": 0.5898094177246094,
832
+ "eval_precision": 0.8294813897487977,
833
+ "eval_recall": 0.8291995490417137,
834
+ "eval_runtime": 14.0481,
835
+ "eval_samples_per_second": 126.28,
836
+ "eval_steps_per_second": 0.997,
837
+ "step": 843
838
+ },
839
+ {
840
+ "epoch": 27.2,
841
+ "learning_rate": 9.557945041816011e-06,
842
+ "loss": 0.2711,
843
  "step": 850
844
  },
845
  {
846
+ "epoch": 27.52,
847
+ "learning_rate": 8.363201911589009e-06,
848
+ "loss": 0.2746,
849
  "step": 860
850
  },
851
  {
852
+ "epoch": 27.84,
853
+ "learning_rate": 7.168458781362007e-06,
854
+ "loss": 0.254,
855
  "step": 870
856
  },
857
  {
858
+ "epoch": 28.0,
859
+ "eval_accuracy": 0.8303269447576099,
860
+ "eval_f1": 0.8288956275009821,
861
+ "eval_loss": 0.6043339967727661,
862
+ "eval_precision": 0.8319314565453743,
863
+ "eval_recall": 0.8303269447576099,
864
+ "eval_runtime": 14.06,
865
+ "eval_samples_per_second": 126.173,
866
+ "eval_steps_per_second": 0.996,
867
+ "step": 875
868
+ },
869
+ {
870
+ "epoch": 28.16,
871
+ "learning_rate": 5.973715651135006e-06,
872
+ "loss": 0.2682,
873
  "step": 880
874
  },
875
  {
876
+ "epoch": 28.48,
877
+ "learning_rate": 4.7789725209080055e-06,
878
+ "loss": 0.2572,
879
  "step": 890
880
  },
881
  {
882
+ "epoch": 28.8,
883
+ "learning_rate": 3.5842293906810035e-06,
884
+ "loss": 0.2674,
885
  "step": 900
886
  },
887
  {
888
+ "epoch": 28.99,
889
+ "eval_accuracy": 0.8370913190529876,
890
+ "eval_f1": 0.8352849737173643,
891
+ "eval_loss": 0.5950246453285217,
892
+ "eval_precision": 0.836485810805898,
893
+ "eval_recall": 0.8370913190529876,
894
+ "eval_runtime": 14.0119,
895
+ "eval_samples_per_second": 126.607,
896
+ "eval_steps_per_second": 0.999,
897
+ "step": 906
898
+ },
899
+ {
900
+ "epoch": 29.12,
901
+ "learning_rate": 2.3894862604540028e-06,
902
+ "loss": 0.2611,
903
  "step": 910
904
  },
905
  {
906
+ "epoch": 29.44,
907
+ "learning_rate": 1.1947431302270014e-06,
908
+ "loss": 0.2527,
909
  "step": 920
910
  },
911
  {
912
+ "epoch": 29.76,
913
+ "learning_rate": 0.0,
914
+ "loss": 0.2432,
915
  "step": 930
916
  },
917
  {
918
+ "epoch": 29.76,
919
+ "eval_accuracy": 0.8359639233370914,
920
+ "eval_f1": 0.8344836129088197,
921
+ "eval_loss": 0.5906554460525513,
922
+ "eval_precision": 0.8348369465962998,
923
+ "eval_recall": 0.8359639233370914,
924
+ "eval_runtime": 13.9075,
925
+ "eval_samples_per_second": 127.557,
926
+ "eval_steps_per_second": 1.007,
927
+ "step": 930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928
  },
929
  {
930
+ "epoch": 29.76,
931
+ "step": 930,
932
+ "total_flos": 1.310858296646362e+20,
933
+ "train_loss": 0.5398157842697636,
934
+ "train_runtime": 7078.6752,
935
+ "train_samples_per_second": 67.648,
936
+ "train_steps_per_second": 0.131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
  }
938
  ],
939
  "logging_steps": 10,
940
+ "max_steps": 930,
941
  "num_input_tokens_seen": 0,
942
  "num_train_epochs": 30,
943
  "save_steps": 500,
944
+ "total_flos": 1.310858296646362e+20,
945
+ "train_batch_size": 128,
946
  "trial_name": null,
947
  "trial_params": null
948
  }