andrecastro commited on
Commit
79633c9
1 Parent(s): 5a2c987

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.9931972789115646
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.0272
36
- - Accuracy: 0.9932
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.9959183673469387
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.0224
36
+ - Accuracy: 0.9959
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9881481481481481,
4
- "eval_loss": 0.04212497919797897,
5
- "eval_runtime": 16.5601,
6
- "eval_samples_per_second": 163.042,
7
- "eval_steps_per_second": 20.41,
8
- "total_flos": 1.8113127425839596e+18,
9
- "train_loss": 0.2791935749937685,
10
- "train_runtime": 1159.1194,
11
- "train_samples_per_second": 62.893,
12
- "train_steps_per_second": 1.964
13
  }
 
1
  {
2
+ "epoch": 2.99,
3
+ "eval_accuracy": 0.9959183673469387,
4
+ "eval_loss": 0.02241336554288864,
5
+ "eval_runtime": 11.9967,
6
+ "eval_samples_per_second": 61.267,
7
+ "eval_steps_per_second": 7.669,
8
+ "total_flos": 4.9150232620631654e+17,
9
+ "train_loss": 0.0970435019064104,
10
+ "train_runtime": 528.4446,
11
+ "train_samples_per_second": 37.508,
12
+ "train_steps_per_second": 1.169
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9881481481481481,
4
- "eval_loss": 0.04212497919797897,
5
- "eval_runtime": 16.5601,
6
- "eval_samples_per_second": 163.042,
7
- "eval_steps_per_second": 20.41
8
  }
 
1
  {
2
+ "epoch": 2.99,
3
+ "eval_accuracy": 0.9959183673469387,
4
+ "eval_loss": 0.02241336554288864,
5
+ "eval_runtime": 11.9967,
6
+ "eval_samples_per_second": 61.267,
7
+ "eval_steps_per_second": 7.669
8
  }
runs/Nov30_00-41-17_08b01d83a514/events.out.tfevents.1701305433.08b01d83a514.504.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2152807d672c956b11fb2a102fc3ff9688d1fda4d4f3e24390ef567898132d
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 1.8113127425839596e+18,
4
- "train_loss": 0.2791935749937685,
5
- "train_runtime": 1159.1194,
6
- "train_samples_per_second": 62.893,
7
- "train_steps_per_second": 1.964
8
  }
 
1
  {
2
+ "epoch": 2.99,
3
+ "total_flos": 4.9150232620631654e+17,
4
+ "train_loss": 0.0970435019064104,
5
+ "train_runtime": 528.4446,
6
+ "train_samples_per_second": 37.508,
7
+ "train_steps_per_second": 1.169
8
  }
trainer_state.json CHANGED
@@ -1,1417 +1,421 @@
1
  {
2
- "best_metric": 0.9881481481481481,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-2277",
4
- "epoch": 2.9980250164581963,
5
  "eval_steps": 500,
6
- "global_step": 2277,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "learning_rate": 2.1929824561403507e-06,
14
- "loss": 2.3026,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.03,
19
- "learning_rate": 4.3859649122807014e-06,
20
- "loss": 2.263,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.04,
25
- "learning_rate": 6.578947368421053e-06,
26
- "loss": 2.1913,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.05,
31
- "learning_rate": 8.771929824561403e-06,
32
- "loss": 2.0553,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.07,
37
- "learning_rate": 1.0964912280701754e-05,
38
- "loss": 1.8882,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.08,
43
- "learning_rate": 1.3157894736842106e-05,
44
- "loss": 1.6925,
45
  "step": 60
46
  },
47
  {
48
- "epoch": 0.09,
49
- "learning_rate": 1.5350877192982457e-05,
50
- "loss": 1.3999,
51
  "step": 70
52
  },
53
  {
54
- "epoch": 0.11,
55
- "learning_rate": 1.7543859649122806e-05,
56
- "loss": 1.1624,
57
  "step": 80
58
  },
59
  {
60
- "epoch": 0.12,
61
- "learning_rate": 1.9736842105263158e-05,
62
- "loss": 0.8496,
63
  "step": 90
64
  },
65
  {
66
- "epoch": 0.13,
67
- "learning_rate": 2.1929824561403507e-05,
68
- "loss": 0.6803,
69
  "step": 100
70
  },
71
  {
72
- "epoch": 0.14,
73
- "learning_rate": 2.412280701754386e-05,
74
- "loss": 0.6882,
75
  "step": 110
76
  },
77
  {
78
- "epoch": 0.16,
79
- "learning_rate": 2.6315789473684212e-05,
80
- "loss": 0.6749,
81
  "step": 120
82
  },
83
  {
84
- "epoch": 0.17,
85
- "learning_rate": 2.850877192982456e-05,
86
- "loss": 0.4925,
87
  "step": 130
88
  },
89
  {
90
- "epoch": 0.18,
91
- "learning_rate": 3.0701754385964913e-05,
92
- "loss": 0.4627,
93
  "step": 140
94
  },
95
  {
96
- "epoch": 0.2,
97
- "learning_rate": 3.289473684210527e-05,
98
- "loss": 0.5142,
99
  "step": 150
100
  },
101
  {
102
- "epoch": 0.21,
103
- "learning_rate": 3.508771929824561e-05,
104
- "loss": 0.4327,
105
  "step": 160
106
  },
107
  {
108
- "epoch": 0.22,
109
- "learning_rate": 3.728070175438597e-05,
110
- "loss": 0.3355,
111
  "step": 170
112
  },
113
  {
114
- "epoch": 0.24,
115
- "learning_rate": 3.9473684210526316e-05,
116
- "loss": 0.3985,
117
  "step": 180
118
  },
119
  {
120
- "epoch": 0.25,
121
- "learning_rate": 4.166666666666667e-05,
122
- "loss": 0.4175,
123
  "step": 190
124
  },
125
  {
126
- "epoch": 0.26,
127
- "learning_rate": 4.3859649122807014e-05,
128
- "loss": 0.4031,
129
  "step": 200
130
  },
131
  {
132
- "epoch": 0.28,
133
- "learning_rate": 4.605263157894737e-05,
134
- "loss": 0.4584,
 
 
 
 
 
 
 
 
 
135
  "step": 210
136
  },
137
  {
138
- "epoch": 0.29,
139
- "learning_rate": 4.824561403508772e-05,
140
- "loss": 0.4003,
141
  "step": 220
142
  },
143
  {
144
- "epoch": 0.3,
145
- "learning_rate": 4.9951195705222064e-05,
146
- "loss": 0.3563,
147
  "step": 230
148
  },
149
  {
150
- "epoch": 0.32,
151
- "learning_rate": 4.970717423133236e-05,
152
- "loss": 0.4016,
153
  "step": 240
154
  },
155
  {
156
- "epoch": 0.33,
157
- "learning_rate": 4.946315275744266e-05,
158
- "loss": 0.3935,
159
  "step": 250
160
  },
161
  {
162
- "epoch": 0.34,
163
- "learning_rate": 4.9219131283552955e-05,
164
- "loss": 0.3803,
165
  "step": 260
166
  },
167
  {
168
- "epoch": 0.36,
169
- "learning_rate": 4.897510980966325e-05,
170
- "loss": 0.3144,
171
  "step": 270
172
  },
173
  {
174
- "epoch": 0.37,
175
- "learning_rate": 4.873108833577355e-05,
176
- "loss": 0.5159,
177
  "step": 280
178
  },
179
  {
180
- "epoch": 0.38,
181
- "learning_rate": 4.848706686188385e-05,
182
- "loss": 0.3619,
183
  "step": 290
184
  },
185
  {
186
- "epoch": 0.39,
187
- "learning_rate": 4.824304538799414e-05,
188
- "loss": 0.3081,
189
  "step": 300
190
  },
191
  {
192
- "epoch": 0.41,
193
- "learning_rate": 4.799902391410445e-05,
194
- "loss": 0.4305,
195
  "step": 310
196
  },
197
  {
198
- "epoch": 0.42,
199
- "learning_rate": 4.7755002440214744e-05,
200
- "loss": 0.3608,
201
  "step": 320
202
  },
203
  {
204
- "epoch": 0.43,
205
- "learning_rate": 4.751098096632504e-05,
206
- "loss": 0.4026,
207
  "step": 330
208
  },
209
  {
210
- "epoch": 0.45,
211
- "learning_rate": 4.726695949243534e-05,
212
- "loss": 0.4076,
213
  "step": 340
214
  },
215
  {
216
- "epoch": 0.46,
217
- "learning_rate": 4.7022938018545635e-05,
218
- "loss": 0.2873,
219
  "step": 350
220
  },
221
  {
222
- "epoch": 0.47,
223
- "learning_rate": 4.677891654465593e-05,
224
- "loss": 0.3595,
225
  "step": 360
226
  },
227
  {
228
- "epoch": 0.49,
229
- "learning_rate": 4.6534895070766235e-05,
230
- "loss": 0.4451,
231
  "step": 370
232
  },
233
  {
234
- "epoch": 0.5,
235
- "learning_rate": 4.6290873596876525e-05,
236
- "loss": 0.3699,
237
  "step": 380
238
  },
239
  {
240
- "epoch": 0.51,
241
- "learning_rate": 4.604685212298683e-05,
242
- "loss": 0.3725,
243
  "step": 390
244
  },
245
  {
246
- "epoch": 0.53,
247
- "learning_rate": 4.580283064909712e-05,
248
- "loss": 0.3517,
249
  "step": 400
250
  },
251
  {
252
- "epoch": 0.54,
253
- "learning_rate": 4.555880917520742e-05,
254
- "loss": 0.31,
255
  "step": 410
256
  },
257
  {
258
- "epoch": 0.55,
259
- "learning_rate": 4.531478770131772e-05,
260
- "loss": 0.3219,
 
 
 
 
 
 
 
 
 
261
  "step": 420
262
  },
263
  {
264
- "epoch": 0.57,
265
- "learning_rate": 4.507076622742802e-05,
266
- "loss": 0.3481,
267
  "step": 430
268
  },
269
  {
270
- "epoch": 0.58,
271
- "learning_rate": 4.4826744753538314e-05,
272
- "loss": 0.2919,
273
  "step": 440
274
  },
275
  {
276
- "epoch": 0.59,
277
- "learning_rate": 4.458272327964861e-05,
278
- "loss": 0.2448,
279
  "step": 450
280
  },
281
  {
282
- "epoch": 0.61,
283
- "learning_rate": 4.433870180575891e-05,
284
- "loss": 0.2834,
285
  "step": 460
286
  },
287
  {
288
- "epoch": 0.62,
289
- "learning_rate": 4.409468033186921e-05,
290
- "loss": 0.2367,
291
  "step": 470
292
  },
293
  {
294
- "epoch": 0.63,
295
- "learning_rate": 4.38506588579795e-05,
296
- "loss": 0.3128,
297
  "step": 480
298
  },
299
  {
300
- "epoch": 0.65,
301
- "learning_rate": 4.3606637384089805e-05,
302
- "loss": 0.3336,
303
  "step": 490
304
  },
305
  {
306
- "epoch": 0.66,
307
- "learning_rate": 4.33626159102001e-05,
308
- "loss": 0.3068,
309
  "step": 500
310
  },
311
  {
312
- "epoch": 0.67,
313
- "learning_rate": 4.31185944363104e-05,
314
- "loss": 0.2589,
315
  "step": 510
316
  },
317
  {
318
- "epoch": 0.68,
319
- "learning_rate": 4.2874572962420696e-05,
320
- "loss": 0.2879,
321
  "step": 520
322
  },
323
  {
324
- "epoch": 0.7,
325
- "learning_rate": 4.263055148853099e-05,
326
- "loss": 0.3202,
327
  "step": 530
328
  },
329
  {
330
- "epoch": 0.71,
331
- "learning_rate": 4.238653001464129e-05,
332
- "loss": 0.2673,
333
  "step": 540
334
  },
335
  {
336
- "epoch": 0.72,
337
- "learning_rate": 4.2142508540751594e-05,
338
- "loss": 0.3512,
339
  "step": 550
340
  },
341
  {
342
- "epoch": 0.74,
343
- "learning_rate": 4.1898487066861884e-05,
344
- "loss": 0.325,
345
  "step": 560
346
  },
347
  {
348
- "epoch": 0.75,
349
- "learning_rate": 4.165446559297219e-05,
350
- "loss": 0.1763,
351
  "step": 570
352
  },
353
  {
354
- "epoch": 0.76,
355
- "learning_rate": 4.141044411908248e-05,
356
- "loss": 0.2554,
357
  "step": 580
358
  },
359
- {
360
- "epoch": 0.78,
361
- "learning_rate": 4.116642264519278e-05,
362
- "loss": 0.2948,
363
- "step": 590
364
- },
365
- {
366
- "epoch": 0.79,
367
- "learning_rate": 4.092240117130308e-05,
368
- "loss": 0.2537,
369
- "step": 600
370
- },
371
- {
372
- "epoch": 0.8,
373
- "learning_rate": 4.0678379697413375e-05,
374
- "loss": 0.3489,
375
- "step": 610
376
- },
377
- {
378
- "epoch": 0.82,
379
- "learning_rate": 4.043435822352367e-05,
380
- "loss": 0.1814,
381
- "step": 620
382
- },
383
- {
384
- "epoch": 0.83,
385
- "learning_rate": 4.019033674963397e-05,
386
- "loss": 0.2324,
387
- "step": 630
388
- },
389
- {
390
- "epoch": 0.84,
391
- "learning_rate": 3.9946315275744266e-05,
392
- "loss": 0.2768,
393
- "step": 640
394
- },
395
- {
396
- "epoch": 0.86,
397
- "learning_rate": 3.970229380185457e-05,
398
- "loss": 0.2329,
399
- "step": 650
400
- },
401
- {
402
- "epoch": 0.87,
403
- "learning_rate": 3.945827232796486e-05,
404
- "loss": 0.291,
405
- "step": 660
406
- },
407
- {
408
- "epoch": 0.88,
409
- "learning_rate": 3.9214250854075164e-05,
410
- "loss": 0.2469,
411
- "step": 670
412
- },
413
- {
414
- "epoch": 0.9,
415
- "learning_rate": 3.897022938018546e-05,
416
- "loss": 0.292,
417
- "step": 680
418
- },
419
- {
420
- "epoch": 0.91,
421
- "learning_rate": 3.872620790629576e-05,
422
- "loss": 0.1927,
423
- "step": 690
424
- },
425
- {
426
- "epoch": 0.92,
427
- "learning_rate": 3.8482186432406055e-05,
428
- "loss": 0.2352,
429
- "step": 700
430
- },
431
- {
432
- "epoch": 0.93,
433
- "learning_rate": 3.823816495851635e-05,
434
- "loss": 0.2666,
435
- "step": 710
436
- },
437
- {
438
- "epoch": 0.95,
439
- "learning_rate": 3.799414348462665e-05,
440
- "loss": 0.2444,
441
- "step": 720
442
- },
443
- {
444
- "epoch": 0.96,
445
- "learning_rate": 3.775012201073695e-05,
446
- "loss": 0.2317,
447
- "step": 730
448
- },
449
- {
450
- "epoch": 0.97,
451
- "learning_rate": 3.750610053684724e-05,
452
- "loss": 0.2368,
453
- "step": 740
454
- },
455
- {
456
- "epoch": 0.99,
457
- "learning_rate": 3.7262079062957546e-05,
458
- "loss": 0.2949,
459
- "step": 750
460
- },
461
- {
462
- "epoch": 1.0,
463
- "eval_accuracy": 0.9622222222222222,
464
- "eval_loss": 0.09624730050563812,
465
- "eval_runtime": 17.2591,
466
- "eval_samples_per_second": 156.439,
467
- "eval_steps_per_second": 19.584,
468
- "step": 759
469
- },
470
- {
471
- "epoch": 1.0,
472
- "learning_rate": 3.7018057589067836e-05,
473
- "loss": 0.1646,
474
- "step": 760
475
- },
476
- {
477
- "epoch": 1.01,
478
- "learning_rate": 3.677403611517814e-05,
479
- "loss": 0.2387,
480
- "step": 770
481
- },
482
- {
483
- "epoch": 1.03,
484
- "learning_rate": 3.653001464128844e-05,
485
- "loss": 0.2492,
486
- "step": 780
487
- },
488
- {
489
- "epoch": 1.04,
490
- "learning_rate": 3.6285993167398734e-05,
491
- "loss": 0.1725,
492
- "step": 790
493
- },
494
- {
495
- "epoch": 1.05,
496
- "learning_rate": 3.604197169350903e-05,
497
- "loss": 0.2442,
498
- "step": 800
499
- },
500
- {
501
- "epoch": 1.07,
502
- "learning_rate": 3.579795021961933e-05,
503
- "loss": 0.2032,
504
- "step": 810
505
- },
506
- {
507
- "epoch": 1.08,
508
- "learning_rate": 3.5553928745729625e-05,
509
- "loss": 0.2126,
510
- "step": 820
511
- },
512
- {
513
- "epoch": 1.09,
514
- "learning_rate": 3.530990727183993e-05,
515
- "loss": 0.1808,
516
- "step": 830
517
- },
518
- {
519
- "epoch": 1.11,
520
- "learning_rate": 3.506588579795022e-05,
521
- "loss": 0.0903,
522
- "step": 840
523
- },
524
- {
525
- "epoch": 1.12,
526
- "learning_rate": 3.482186432406052e-05,
527
- "loss": 0.3547,
528
- "step": 850
529
- },
530
- {
531
- "epoch": 1.13,
532
- "learning_rate": 3.457784285017082e-05,
533
- "loss": 0.3028,
534
- "step": 860
535
- },
536
- {
537
- "epoch": 1.15,
538
- "learning_rate": 3.4333821376281116e-05,
539
- "loss": 0.3452,
540
- "step": 870
541
- },
542
- {
543
- "epoch": 1.16,
544
- "learning_rate": 3.408979990239141e-05,
545
- "loss": 0.2131,
546
- "step": 880
547
- },
548
- {
549
- "epoch": 1.17,
550
- "learning_rate": 3.384577842850171e-05,
551
- "loss": 0.1604,
552
- "step": 890
553
- },
554
- {
555
- "epoch": 1.18,
556
- "learning_rate": 3.360175695461201e-05,
557
- "loss": 0.2881,
558
- "step": 900
559
- },
560
- {
561
- "epoch": 1.2,
562
- "learning_rate": 3.335773548072231e-05,
563
- "loss": 0.2011,
564
- "step": 910
565
- },
566
- {
567
- "epoch": 1.21,
568
- "learning_rate": 3.31137140068326e-05,
569
- "loss": 0.1844,
570
- "step": 920
571
- },
572
- {
573
- "epoch": 1.22,
574
- "learning_rate": 3.2869692532942905e-05,
575
- "loss": 0.2463,
576
- "step": 930
577
- },
578
- {
579
- "epoch": 1.24,
580
- "learning_rate": 3.2625671059053195e-05,
581
- "loss": 0.2523,
582
- "step": 940
583
- },
584
- {
585
- "epoch": 1.25,
586
- "learning_rate": 3.23816495851635e-05,
587
- "loss": 0.2369,
588
- "step": 950
589
- },
590
- {
591
- "epoch": 1.26,
592
- "learning_rate": 3.2137628111273795e-05,
593
- "loss": 0.184,
594
- "step": 960
595
- },
596
- {
597
- "epoch": 1.28,
598
- "learning_rate": 3.189360663738409e-05,
599
- "loss": 0.1624,
600
- "step": 970
601
- },
602
- {
603
- "epoch": 1.29,
604
- "learning_rate": 3.164958516349439e-05,
605
- "loss": 0.2741,
606
- "step": 980
607
- },
608
- {
609
- "epoch": 1.3,
610
- "learning_rate": 3.1405563689604686e-05,
611
- "loss": 0.278,
612
- "step": 990
613
- },
614
- {
615
- "epoch": 1.32,
616
- "learning_rate": 3.116154221571498e-05,
617
- "loss": 0.2127,
618
- "step": 1000
619
- },
620
- {
621
- "epoch": 1.33,
622
- "learning_rate": 3.091752074182529e-05,
623
- "loss": 0.1746,
624
- "step": 1010
625
- },
626
- {
627
- "epoch": 1.34,
628
- "learning_rate": 3.067349926793558e-05,
629
- "loss": 0.1528,
630
- "step": 1020
631
- },
632
- {
633
- "epoch": 1.36,
634
- "learning_rate": 3.042947779404588e-05,
635
- "loss": 0.2522,
636
- "step": 1030
637
- },
638
- {
639
- "epoch": 1.37,
640
- "learning_rate": 3.0185456320156174e-05,
641
- "loss": 0.2084,
642
- "step": 1040
643
- },
644
- {
645
- "epoch": 1.38,
646
- "learning_rate": 2.9941434846266475e-05,
647
- "loss": 0.2176,
648
- "step": 1050
649
- },
650
- {
651
- "epoch": 1.4,
652
- "learning_rate": 2.969741337237677e-05,
653
- "loss": 0.261,
654
- "step": 1060
655
- },
656
- {
657
- "epoch": 1.41,
658
- "learning_rate": 2.9453391898487072e-05,
659
- "loss": 0.1546,
660
- "step": 1070
661
- },
662
- {
663
- "epoch": 1.42,
664
- "learning_rate": 2.9209370424597365e-05,
665
- "loss": 0.2127,
666
- "step": 1080
667
- },
668
- {
669
- "epoch": 1.44,
670
- "learning_rate": 2.8965348950707666e-05,
671
- "loss": 0.1761,
672
- "step": 1090
673
- },
674
- {
675
- "epoch": 1.45,
676
- "learning_rate": 2.872132747681796e-05,
677
- "loss": 0.2691,
678
- "step": 1100
679
- },
680
- {
681
- "epoch": 1.46,
682
- "learning_rate": 2.847730600292826e-05,
683
- "loss": 0.1789,
684
- "step": 1110
685
- },
686
- {
687
- "epoch": 1.47,
688
- "learning_rate": 2.8233284529038557e-05,
689
- "loss": 0.2182,
690
- "step": 1120
691
- },
692
- {
693
- "epoch": 1.49,
694
- "learning_rate": 2.7989263055148857e-05,
695
- "loss": 0.2,
696
- "step": 1130
697
- },
698
- {
699
- "epoch": 1.5,
700
- "learning_rate": 2.774524158125915e-05,
701
- "loss": 0.1607,
702
- "step": 1140
703
- },
704
- {
705
- "epoch": 1.51,
706
- "learning_rate": 2.750122010736945e-05,
707
- "loss": 0.1653,
708
- "step": 1150
709
- },
710
- {
711
- "epoch": 1.53,
712
- "learning_rate": 2.7257198633479748e-05,
713
- "loss": 0.1926,
714
- "step": 1160
715
- },
716
- {
717
- "epoch": 1.54,
718
- "learning_rate": 2.7013177159590048e-05,
719
- "loss": 0.1232,
720
- "step": 1170
721
- },
722
- {
723
- "epoch": 1.55,
724
- "learning_rate": 2.676915568570034e-05,
725
- "loss": 0.2433,
726
- "step": 1180
727
- },
728
- {
729
- "epoch": 1.57,
730
- "learning_rate": 2.6525134211810642e-05,
731
- "loss": 0.1619,
732
- "step": 1190
733
- },
734
- {
735
- "epoch": 1.58,
736
- "learning_rate": 2.628111273792094e-05,
737
- "loss": 0.2793,
738
- "step": 1200
739
- },
740
- {
741
- "epoch": 1.59,
742
- "learning_rate": 2.603709126403124e-05,
743
- "loss": 0.21,
744
- "step": 1210
745
- },
746
- {
747
- "epoch": 1.61,
748
- "learning_rate": 2.5793069790141533e-05,
749
- "loss": 0.1435,
750
- "step": 1220
751
- },
752
- {
753
- "epoch": 1.62,
754
- "learning_rate": 2.5549048316251833e-05,
755
- "loss": 0.1039,
756
- "step": 1230
757
- },
758
- {
759
- "epoch": 1.63,
760
- "learning_rate": 2.530502684236213e-05,
761
- "loss": 0.1709,
762
- "step": 1240
763
- },
764
- {
765
- "epoch": 1.65,
766
- "learning_rate": 2.506100536847243e-05,
767
- "loss": 0.18,
768
- "step": 1250
769
- },
770
- {
771
- "epoch": 1.66,
772
- "learning_rate": 2.4816983894582724e-05,
773
- "loss": 0.2101,
774
- "step": 1260
775
- },
776
- {
777
- "epoch": 1.67,
778
- "learning_rate": 2.457296242069302e-05,
779
- "loss": 0.1362,
780
- "step": 1270
781
- },
782
- {
783
- "epoch": 1.69,
784
- "learning_rate": 2.4328940946803318e-05,
785
- "loss": 0.1793,
786
- "step": 1280
787
- },
788
- {
789
- "epoch": 1.7,
790
- "learning_rate": 2.4084919472913618e-05,
791
- "loss": 0.0959,
792
- "step": 1290
793
- },
794
- {
795
- "epoch": 1.71,
796
- "learning_rate": 2.3840897999023915e-05,
797
- "loss": 0.1372,
798
- "step": 1300
799
- },
800
- {
801
- "epoch": 1.72,
802
- "learning_rate": 2.3596876525134212e-05,
803
- "loss": 0.1696,
804
- "step": 1310
805
- },
806
- {
807
- "epoch": 1.74,
808
- "learning_rate": 2.335285505124451e-05,
809
- "loss": 0.105,
810
- "step": 1320
811
- },
812
- {
813
- "epoch": 1.75,
814
- "learning_rate": 2.310883357735481e-05,
815
- "loss": 0.105,
816
- "step": 1330
817
- },
818
- {
819
- "epoch": 1.76,
820
- "learning_rate": 2.2864812103465106e-05,
821
- "loss": 0.1886,
822
- "step": 1340
823
- },
824
- {
825
- "epoch": 1.78,
826
- "learning_rate": 2.2620790629575403e-05,
827
- "loss": 0.1618,
828
- "step": 1350
829
- },
830
- {
831
- "epoch": 1.79,
832
- "learning_rate": 2.23767691556857e-05,
833
- "loss": 0.1166,
834
- "step": 1360
835
- },
836
- {
837
- "epoch": 1.8,
838
- "learning_rate": 2.2132747681795997e-05,
839
- "loss": 0.1601,
840
- "step": 1370
841
- },
842
- {
843
- "epoch": 1.82,
844
- "learning_rate": 2.1888726207906297e-05,
845
- "loss": 0.1977,
846
- "step": 1380
847
- },
848
- {
849
- "epoch": 1.83,
850
- "learning_rate": 2.1644704734016594e-05,
851
- "loss": 0.0785,
852
- "step": 1390
853
- },
854
- {
855
- "epoch": 1.84,
856
- "learning_rate": 2.140068326012689e-05,
857
- "loss": 0.1584,
858
- "step": 1400
859
- },
860
- {
861
- "epoch": 1.86,
862
- "learning_rate": 2.1156661786237188e-05,
863
- "loss": 0.1763,
864
- "step": 1410
865
- },
866
- {
867
- "epoch": 1.87,
868
- "learning_rate": 2.0912640312347485e-05,
869
- "loss": 0.1505,
870
- "step": 1420
871
- },
872
- {
873
- "epoch": 1.88,
874
- "learning_rate": 2.0668618838457785e-05,
875
- "loss": 0.1259,
876
- "step": 1430
877
- },
878
- {
879
- "epoch": 1.9,
880
- "learning_rate": 2.0424597364568082e-05,
881
- "loss": 0.1144,
882
- "step": 1440
883
- },
884
- {
885
- "epoch": 1.91,
886
- "learning_rate": 2.018057589067838e-05,
887
- "loss": 0.2199,
888
- "step": 1450
889
- },
890
- {
891
- "epoch": 1.92,
892
- "learning_rate": 1.9936554416788676e-05,
893
- "loss": 0.1495,
894
- "step": 1460
895
- },
896
- {
897
- "epoch": 1.94,
898
- "learning_rate": 1.9692532942898977e-05,
899
- "loss": 0.2081,
900
- "step": 1470
901
- },
902
- {
903
- "epoch": 1.95,
904
- "learning_rate": 1.9448511469009274e-05,
905
- "loss": 0.1412,
906
- "step": 1480
907
- },
908
- {
909
- "epoch": 1.96,
910
- "learning_rate": 1.920448999511957e-05,
911
- "loss": 0.1537,
912
- "step": 1490
913
- },
914
- {
915
- "epoch": 1.97,
916
- "learning_rate": 1.8960468521229867e-05,
917
- "loss": 0.1342,
918
- "step": 1500
919
- },
920
- {
921
- "epoch": 1.99,
922
- "learning_rate": 1.8716447047340164e-05,
923
- "loss": 0.1637,
924
- "step": 1510
925
- },
926
- {
927
- "epoch": 2.0,
928
- "eval_accuracy": 0.9818518518518519,
929
- "eval_loss": 0.06411131471395493,
930
- "eval_runtime": 16.3072,
931
- "eval_samples_per_second": 165.571,
932
- "eval_steps_per_second": 20.727,
933
- "step": 1519
934
- },
935
- {
936
- "epoch": 2.0,
937
- "learning_rate": 1.8472425573450465e-05,
938
- "loss": 0.2376,
939
- "step": 1520
940
- },
941
- {
942
- "epoch": 2.01,
943
- "learning_rate": 1.822840409956076e-05,
944
- "loss": 0.1874,
945
- "step": 1530
946
- },
947
- {
948
- "epoch": 2.03,
949
- "learning_rate": 1.798438262567106e-05,
950
- "loss": 0.152,
951
- "step": 1540
952
- },
953
- {
954
- "epoch": 2.04,
955
- "learning_rate": 1.7740361151781356e-05,
956
- "loss": 0.2576,
957
- "step": 1550
958
- },
959
- {
960
- "epoch": 2.05,
961
- "learning_rate": 1.7496339677891656e-05,
962
- "loss": 0.1981,
963
- "step": 1560
964
- },
965
- {
966
- "epoch": 2.07,
967
- "learning_rate": 1.7252318204001953e-05,
968
- "loss": 0.1047,
969
- "step": 1570
970
- },
971
- {
972
- "epoch": 2.08,
973
- "learning_rate": 1.700829673011225e-05,
974
- "loss": 0.1613,
975
- "step": 1580
976
- },
977
- {
978
- "epoch": 2.09,
979
- "learning_rate": 1.6764275256222547e-05,
980
- "loss": 0.1226,
981
- "step": 1590
982
- },
983
- {
984
- "epoch": 2.11,
985
- "learning_rate": 1.6520253782332844e-05,
986
- "loss": 0.1976,
987
- "step": 1600
988
- },
989
- {
990
- "epoch": 2.12,
991
- "learning_rate": 1.6276232308443144e-05,
992
- "loss": 0.1518,
993
- "step": 1610
994
- },
995
- {
996
- "epoch": 2.13,
997
- "learning_rate": 1.603221083455344e-05,
998
- "loss": 0.1462,
999
- "step": 1620
1000
- },
1001
- {
1002
- "epoch": 2.15,
1003
- "learning_rate": 1.5788189360663738e-05,
1004
- "loss": 0.1786,
1005
- "step": 1630
1006
- },
1007
- {
1008
- "epoch": 2.16,
1009
- "learning_rate": 1.5544167886774035e-05,
1010
- "loss": 0.1508,
1011
- "step": 1640
1012
- },
1013
- {
1014
- "epoch": 2.17,
1015
- "learning_rate": 1.5300146412884335e-05,
1016
- "loss": 0.1068,
1017
- "step": 1650
1018
- },
1019
- {
1020
- "epoch": 2.19,
1021
- "learning_rate": 1.5056124938994632e-05,
1022
- "loss": 0.2054,
1023
- "step": 1660
1024
- },
1025
- {
1026
- "epoch": 2.2,
1027
- "learning_rate": 1.4812103465104929e-05,
1028
- "loss": 0.1709,
1029
- "step": 1670
1030
- },
1031
- {
1032
- "epoch": 2.21,
1033
- "learning_rate": 1.4568081991215226e-05,
1034
- "loss": 0.13,
1035
- "step": 1680
1036
- },
1037
- {
1038
- "epoch": 2.23,
1039
- "learning_rate": 1.4324060517325525e-05,
1040
- "loss": 0.145,
1041
- "step": 1690
1042
- },
1043
- {
1044
- "epoch": 2.24,
1045
- "learning_rate": 1.4080039043435821e-05,
1046
- "loss": 0.1542,
1047
- "step": 1700
1048
- },
1049
- {
1050
- "epoch": 2.25,
1051
- "learning_rate": 1.383601756954612e-05,
1052
- "loss": 0.1799,
1053
- "step": 1710
1054
- },
1055
- {
1056
- "epoch": 2.26,
1057
- "learning_rate": 1.3591996095656417e-05,
1058
- "loss": 0.1887,
1059
- "step": 1720
1060
- },
1061
- {
1062
- "epoch": 2.28,
1063
- "learning_rate": 1.3347974621766716e-05,
1064
- "loss": 0.2048,
1065
- "step": 1730
1066
- },
1067
- {
1068
- "epoch": 2.29,
1069
- "learning_rate": 1.3103953147877013e-05,
1070
- "loss": 0.1676,
1071
- "step": 1740
1072
- },
1073
- {
1074
- "epoch": 2.3,
1075
- "learning_rate": 1.2859931673987311e-05,
1076
- "loss": 0.1288,
1077
- "step": 1750
1078
- },
1079
- {
1080
- "epoch": 2.32,
1081
- "learning_rate": 1.2615910200097608e-05,
1082
- "loss": 0.0962,
1083
- "step": 1760
1084
- },
1085
- {
1086
- "epoch": 2.33,
1087
- "learning_rate": 1.2371888726207907e-05,
1088
- "loss": 0.1243,
1089
- "step": 1770
1090
- },
1091
- {
1092
- "epoch": 2.34,
1093
- "learning_rate": 1.2127867252318205e-05,
1094
- "loss": 0.1722,
1095
- "step": 1780
1096
- },
1097
- {
1098
- "epoch": 2.36,
1099
- "learning_rate": 1.1883845778428502e-05,
1100
- "loss": 0.2244,
1101
- "step": 1790
1102
- },
1103
- {
1104
- "epoch": 2.37,
1105
- "learning_rate": 1.1639824304538801e-05,
1106
- "loss": 0.2038,
1107
- "step": 1800
1108
- },
1109
- {
1110
- "epoch": 2.38,
1111
- "learning_rate": 1.1395802830649098e-05,
1112
- "loss": 0.1551,
1113
- "step": 1810
1114
- },
1115
- {
1116
- "epoch": 2.4,
1117
- "learning_rate": 1.1151781356759395e-05,
1118
- "loss": 0.0849,
1119
- "step": 1820
1120
- },
1121
- {
1122
- "epoch": 2.41,
1123
- "learning_rate": 1.0907759882869694e-05,
1124
- "loss": 0.1296,
1125
- "step": 1830
1126
- },
1127
- {
1128
- "epoch": 2.42,
1129
- "learning_rate": 1.066373840897999e-05,
1130
- "loss": 0.0738,
1131
- "step": 1840
1132
- },
1133
- {
1134
- "epoch": 2.44,
1135
- "learning_rate": 1.0419716935090289e-05,
1136
- "loss": 0.1605,
1137
- "step": 1850
1138
- },
1139
- {
1140
- "epoch": 2.45,
1141
- "learning_rate": 1.0175695461200586e-05,
1142
- "loss": 0.2122,
1143
- "step": 1860
1144
- },
1145
- {
1146
- "epoch": 2.46,
1147
- "learning_rate": 9.931673987310885e-06,
1148
- "loss": 0.1233,
1149
- "step": 1870
1150
- },
1151
- {
1152
- "epoch": 2.48,
1153
- "learning_rate": 9.687652513421182e-06,
1154
- "loss": 0.138,
1155
- "step": 1880
1156
- },
1157
- {
1158
- "epoch": 2.49,
1159
- "learning_rate": 9.44363103953148e-06,
1160
- "loss": 0.0963,
1161
- "step": 1890
1162
- },
1163
- {
1164
- "epoch": 2.5,
1165
- "learning_rate": 9.199609565641777e-06,
1166
- "loss": 0.1447,
1167
- "step": 1900
1168
- },
1169
- {
1170
- "epoch": 2.51,
1171
- "learning_rate": 8.955588091752074e-06,
1172
- "loss": 0.15,
1173
- "step": 1910
1174
- },
1175
- {
1176
- "epoch": 2.53,
1177
- "learning_rate": 8.711566617862373e-06,
1178
- "loss": 0.0834,
1179
- "step": 1920
1180
- },
1181
- {
1182
- "epoch": 2.54,
1183
- "learning_rate": 8.46754514397267e-06,
1184
- "loss": 0.1187,
1185
- "step": 1930
1186
- },
1187
- {
1188
- "epoch": 2.55,
1189
- "learning_rate": 8.223523670082968e-06,
1190
- "loss": 0.1151,
1191
- "step": 1940
1192
- },
1193
- {
1194
- "epoch": 2.57,
1195
- "learning_rate": 7.979502196193265e-06,
1196
- "loss": 0.1217,
1197
- "step": 1950
1198
- },
1199
- {
1200
- "epoch": 2.58,
1201
- "learning_rate": 7.735480722303564e-06,
1202
- "loss": 0.0697,
1203
- "step": 1960
1204
- },
1205
- {
1206
- "epoch": 2.59,
1207
- "learning_rate": 7.491459248413861e-06,
1208
- "loss": 0.0829,
1209
- "step": 1970
1210
- },
1211
- {
1212
- "epoch": 2.61,
1213
- "learning_rate": 7.247437774524159e-06,
1214
- "loss": 0.156,
1215
- "step": 1980
1216
- },
1217
- {
1218
- "epoch": 2.62,
1219
- "learning_rate": 7.0034163006344565e-06,
1220
- "loss": 0.0987,
1221
- "step": 1990
1222
- },
1223
- {
1224
- "epoch": 2.63,
1225
- "learning_rate": 6.759394826744754e-06,
1226
- "loss": 0.1936,
1227
- "step": 2000
1228
- },
1229
- {
1230
- "epoch": 2.65,
1231
- "learning_rate": 6.515373352855052e-06,
1232
- "loss": 0.143,
1233
- "step": 2010
1234
- },
1235
- {
1236
- "epoch": 2.66,
1237
- "learning_rate": 6.27135187896535e-06,
1238
- "loss": 0.1274,
1239
- "step": 2020
1240
- },
1241
- {
1242
- "epoch": 2.67,
1243
- "learning_rate": 6.027330405075647e-06,
1244
- "loss": 0.157,
1245
- "step": 2030
1246
- },
1247
- {
1248
- "epoch": 2.69,
1249
- "learning_rate": 5.7833089311859446e-06,
1250
- "loss": 0.1373,
1251
- "step": 2040
1252
- },
1253
- {
1254
- "epoch": 2.7,
1255
- "learning_rate": 5.539287457296242e-06,
1256
- "loss": 0.0956,
1257
- "step": 2050
1258
- },
1259
- {
1260
- "epoch": 2.71,
1261
- "learning_rate": 5.29526598340654e-06,
1262
- "loss": 0.0983,
1263
- "step": 2060
1264
- },
1265
- {
1266
- "epoch": 2.73,
1267
- "learning_rate": 5.051244509516838e-06,
1268
- "loss": 0.1484,
1269
- "step": 2070
1270
- },
1271
- {
1272
- "epoch": 2.74,
1273
- "learning_rate": 4.807223035627136e-06,
1274
- "loss": 0.1515,
1275
- "step": 2080
1276
- },
1277
- {
1278
- "epoch": 2.75,
1279
- "learning_rate": 4.5632015617374335e-06,
1280
- "loss": 0.1195,
1281
- "step": 2090
1282
- },
1283
- {
1284
- "epoch": 2.76,
1285
- "learning_rate": 4.319180087847731e-06,
1286
- "loss": 0.1238,
1287
- "step": 2100
1288
- },
1289
- {
1290
- "epoch": 2.78,
1291
- "learning_rate": 4.075158613958028e-06,
1292
- "loss": 0.1279,
1293
- "step": 2110
1294
- },
1295
- {
1296
- "epoch": 2.79,
1297
- "learning_rate": 3.831137140068326e-06,
1298
- "loss": 0.1599,
1299
- "step": 2120
1300
- },
1301
- {
1302
- "epoch": 2.8,
1303
- "learning_rate": 3.587115666178624e-06,
1304
- "loss": 0.0906,
1305
- "step": 2130
1306
- },
1307
- {
1308
- "epoch": 2.82,
1309
- "learning_rate": 3.3430941922889216e-06,
1310
- "loss": 0.1046,
1311
- "step": 2140
1312
- },
1313
- {
1314
- "epoch": 2.83,
1315
- "learning_rate": 3.0990727183992194e-06,
1316
- "loss": 0.1117,
1317
- "step": 2150
1318
- },
1319
- {
1320
- "epoch": 2.84,
1321
- "learning_rate": 2.855051244509517e-06,
1322
- "loss": 0.1396,
1323
- "step": 2160
1324
- },
1325
  {
1326
  "epoch": 2.86,
1327
- "learning_rate": 2.611029770619815e-06,
1328
- "loss": 0.0778,
1329
- "step": 2170
1330
- },
1331
- {
1332
- "epoch": 2.87,
1333
- "learning_rate": 2.3670082967301123e-06,
1334
- "loss": 0.0856,
1335
- "step": 2180
1336
- },
1337
- {
1338
- "epoch": 2.88,
1339
- "learning_rate": 2.12298682284041e-06,
1340
- "loss": 0.1255,
1341
- "step": 2190
1342
- },
1343
- {
1344
- "epoch": 2.9,
1345
- "learning_rate": 1.8789653489507079e-06,
1346
- "loss": 0.0984,
1347
- "step": 2200
1348
  },
1349
  {
1350
  "epoch": 2.91,
1351
- "learning_rate": 1.6349438750610055e-06,
1352
- "loss": 0.14,
1353
- "step": 2210
1354
- },
1355
- {
1356
- "epoch": 2.92,
1357
- "learning_rate": 1.3909224011713032e-06,
1358
- "loss": 0.0625,
1359
- "step": 2220
1360
- },
1361
- {
1362
- "epoch": 2.94,
1363
- "learning_rate": 1.1469009272816008e-06,
1364
- "loss": 0.1717,
1365
- "step": 2230
1366
  },
1367
  {
1368
  "epoch": 2.95,
1369
- "learning_rate": 9.028794533918985e-07,
1370
- "loss": 0.0946,
1371
- "step": 2240
1372
- },
1373
- {
1374
- "epoch": 2.96,
1375
- "learning_rate": 6.588579795021963e-07,
1376
- "loss": 0.1303,
1377
- "step": 2250
1378
  },
1379
  {
1380
- "epoch": 2.98,
1381
- "learning_rate": 4.148365056124939e-07,
1382
- "loss": 0.0823,
1383
- "step": 2260
 
 
 
1384
  },
1385
  {
1386
  "epoch": 2.99,
1387
- "learning_rate": 1.7081503172279163e-07,
1388
- "loss": 0.0765,
1389
- "step": 2270
1390
- },
1391
- {
1392
- "epoch": 3.0,
1393
- "eval_accuracy": 0.9881481481481481,
1394
- "eval_loss": 0.04212497919797897,
1395
- "eval_runtime": 16.483,
1396
- "eval_samples_per_second": 163.805,
1397
- "eval_steps_per_second": 20.506,
1398
- "step": 2277
1399
- },
1400
- {
1401
- "epoch": 3.0,
1402
- "step": 2277,
1403
- "total_flos": 1.8113127425839596e+18,
1404
- "train_loss": 0.2791935749937685,
1405
- "train_runtime": 1159.1194,
1406
- "train_samples_per_second": 62.893,
1407
- "train_steps_per_second": 1.964
1408
  }
1409
  ],
1410
  "logging_steps": 10,
1411
- "max_steps": 2277,
1412
  "num_train_epochs": 3,
1413
  "save_steps": 500,
1414
- "total_flos": 1.8113127425839596e+18,
1415
  "trial_name": null,
1416
  "trial_params": null
1417
  }
 
1
  {
2
+ "best_metric": 0.9959183673469387,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-413",
4
+ "epoch": 2.9927360774818403,
5
  "eval_steps": 500,
6
+ "global_step": 618,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.05,
13
+ "learning_rate": 8.064516129032258e-06,
14
+ "loss": 0.6891,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.1,
19
+ "learning_rate": 1.6129032258064517e-05,
20
+ "loss": 0.6032,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.15,
25
+ "learning_rate": 2.4193548387096777e-05,
26
+ "loss": 0.452,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.19,
31
+ "learning_rate": 3.2258064516129034e-05,
32
+ "loss": 0.3304,
33
  "step": 40
34
  },
35
  {
36
+ "epoch": 0.24,
37
+ "learning_rate": 4.032258064516129e-05,
38
+ "loss": 0.2486,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.29,
43
+ "learning_rate": 4.8387096774193554e-05,
44
+ "loss": 0.1912,
45
  "step": 60
46
  },
47
  {
48
+ "epoch": 0.34,
49
+ "learning_rate": 4.9280575539568345e-05,
50
+ "loss": 0.1454,
51
  "step": 70
52
  },
53
  {
54
+ "epoch": 0.39,
55
+ "learning_rate": 4.838129496402878e-05,
56
+ "loss": 0.1398,
57
  "step": 80
58
  },
59
  {
60
+ "epoch": 0.44,
61
+ "learning_rate": 4.748201438848921e-05,
62
+ "loss": 0.1292,
63
  "step": 90
64
  },
65
  {
66
+ "epoch": 0.48,
67
+ "learning_rate": 4.658273381294964e-05,
68
+ "loss": 0.1324,
69
  "step": 100
70
  },
71
  {
72
+ "epoch": 0.53,
73
+ "learning_rate": 4.5683453237410076e-05,
74
+ "loss": 0.077,
75
  "step": 110
76
  },
77
  {
78
+ "epoch": 0.58,
79
+ "learning_rate": 4.47841726618705e-05,
80
+ "loss": 0.0785,
81
  "step": 120
82
  },
83
  {
84
+ "epoch": 0.63,
85
+ "learning_rate": 4.3884892086330935e-05,
86
+ "loss": 0.1228,
87
  "step": 130
88
  },
89
  {
90
+ "epoch": 0.68,
91
+ "learning_rate": 4.298561151079137e-05,
92
+ "loss": 0.054,
93
  "step": 140
94
  },
95
  {
96
+ "epoch": 0.73,
97
+ "learning_rate": 4.20863309352518e-05,
98
+ "loss": 0.1124,
99
  "step": 150
100
  },
101
  {
102
+ "epoch": 0.77,
103
+ "learning_rate": 4.1187050359712234e-05,
104
+ "loss": 0.0875,
105
  "step": 160
106
  },
107
  {
108
+ "epoch": 0.82,
109
+ "learning_rate": 4.0287769784172666e-05,
110
+ "loss": 0.1152,
111
  "step": 170
112
  },
113
  {
114
+ "epoch": 0.87,
115
+ "learning_rate": 3.938848920863309e-05,
116
+ "loss": 0.107,
117
  "step": 180
118
  },
119
  {
120
+ "epoch": 0.92,
121
+ "learning_rate": 3.8489208633093525e-05,
122
+ "loss": 0.0887,
123
  "step": 190
124
  },
125
  {
126
+ "epoch": 0.97,
127
+ "learning_rate": 3.758992805755396e-05,
128
+ "loss": 0.0826,
129
  "step": 200
130
  },
131
  {
132
+ "epoch": 1.0,
133
+ "eval_accuracy": 0.9918367346938776,
134
+ "eval_loss": 0.03725149855017662,
135
+ "eval_runtime": 12.9921,
136
+ "eval_samples_per_second": 56.573,
137
+ "eval_steps_per_second": 7.081,
138
+ "step": 206
139
+ },
140
+ {
141
+ "epoch": 1.02,
142
+ "learning_rate": 3.669064748201439e-05,
143
+ "loss": 0.0654,
144
  "step": 210
145
  },
146
  {
147
+ "epoch": 1.07,
148
+ "learning_rate": 3.5791366906474824e-05,
149
+ "loss": 0.0904,
150
  "step": 220
151
  },
152
  {
153
+ "epoch": 1.11,
154
+ "learning_rate": 3.489208633093525e-05,
155
+ "loss": 0.0879,
156
  "step": 230
157
  },
158
  {
159
+ "epoch": 1.16,
160
+ "learning_rate": 3.399280575539568e-05,
161
+ "loss": 0.0731,
162
  "step": 240
163
  },
164
  {
165
+ "epoch": 1.21,
166
+ "learning_rate": 3.3093525179856116e-05,
167
+ "loss": 0.0715,
168
  "step": 250
169
  },
170
  {
171
+ "epoch": 1.26,
172
+ "learning_rate": 3.219424460431655e-05,
173
+ "loss": 0.0513,
174
  "step": 260
175
  },
176
  {
177
+ "epoch": 1.31,
178
+ "learning_rate": 3.129496402877698e-05,
179
+ "loss": 0.0458,
180
  "step": 270
181
  },
182
  {
183
+ "epoch": 1.36,
184
+ "learning_rate": 3.039568345323741e-05,
185
+ "loss": 0.0795,
186
  "step": 280
187
  },
188
  {
189
+ "epoch": 1.4,
190
+ "learning_rate": 2.9496402877697844e-05,
191
+ "loss": 0.0743,
192
  "step": 290
193
  },
194
  {
195
+ "epoch": 1.45,
196
+ "learning_rate": 2.8597122302158273e-05,
197
+ "loss": 0.0943,
198
  "step": 300
199
  },
200
  {
201
+ "epoch": 1.5,
202
+ "learning_rate": 2.7697841726618706e-05,
203
+ "loss": 0.0961,
204
  "step": 310
205
  },
206
  {
207
+ "epoch": 1.55,
208
+ "learning_rate": 2.679856115107914e-05,
209
+ "loss": 0.0387,
210
  "step": 320
211
  },
212
  {
213
+ "epoch": 1.6,
214
+ "learning_rate": 2.589928057553957e-05,
215
+ "loss": 0.0843,
216
  "step": 330
217
  },
218
  {
219
+ "epoch": 1.65,
220
+ "learning_rate": 2.5e-05,
221
+ "loss": 0.0816,
222
  "step": 340
223
  },
224
  {
225
+ "epoch": 1.69,
226
+ "learning_rate": 2.4100719424460434e-05,
227
+ "loss": 0.0263,
228
  "step": 350
229
  },
230
  {
231
+ "epoch": 1.74,
232
+ "learning_rate": 2.3201438848920864e-05,
233
+ "loss": 0.0794,
234
  "step": 360
235
  },
236
  {
237
+ "epoch": 1.79,
238
+ "learning_rate": 2.2302158273381296e-05,
239
+ "loss": 0.0321,
240
  "step": 370
241
  },
242
  {
243
+ "epoch": 1.84,
244
+ "learning_rate": 2.140287769784173e-05,
245
+ "loss": 0.0087,
246
  "step": 380
247
  },
248
  {
249
+ "epoch": 1.89,
250
+ "learning_rate": 2.050359712230216e-05,
251
+ "loss": 0.0621,
252
  "step": 390
253
  },
254
  {
255
+ "epoch": 1.94,
256
+ "learning_rate": 1.960431654676259e-05,
257
+ "loss": 0.0501,
258
  "step": 400
259
  },
260
  {
261
+ "epoch": 1.99,
262
+ "learning_rate": 1.8705035971223024e-05,
263
+ "loss": 0.0658,
264
  "step": 410
265
  },
266
  {
267
+ "epoch": 2.0,
268
+ "eval_accuracy": 0.9959183673469387,
269
+ "eval_loss": 0.02241336554288864,
270
+ "eval_runtime": 14.7711,
271
+ "eval_samples_per_second": 49.759,
272
+ "eval_steps_per_second": 6.228,
273
+ "step": 413
274
+ },
275
+ {
276
+ "epoch": 2.03,
277
+ "learning_rate": 1.7805755395683454e-05,
278
+ "loss": 0.0063,
279
  "step": 420
280
  },
281
  {
282
+ "epoch": 2.08,
283
+ "learning_rate": 1.6906474820143887e-05,
284
+ "loss": 0.0207,
285
  "step": 430
286
  },
287
  {
288
+ "epoch": 2.13,
289
+ "learning_rate": 1.6007194244604316e-05,
290
+ "loss": 0.0129,
291
  "step": 440
292
  },
293
  {
294
+ "epoch": 2.18,
295
+ "learning_rate": 1.5107913669064749e-05,
296
+ "loss": 0.0116,
297
  "step": 450
298
  },
299
  {
300
+ "epoch": 2.23,
301
+ "learning_rate": 1.420863309352518e-05,
302
+ "loss": 0.0049,
303
  "step": 460
304
  },
305
  {
306
+ "epoch": 2.28,
307
+ "learning_rate": 1.3309352517985613e-05,
308
+ "loss": 0.0311,
309
  "step": 470
310
  },
311
  {
312
+ "epoch": 2.32,
313
+ "learning_rate": 1.2410071942446044e-05,
314
+ "loss": 0.037,
315
  "step": 480
316
  },
317
  {
318
+ "epoch": 2.37,
319
+ "learning_rate": 1.1510791366906475e-05,
320
+ "loss": 0.0572,
321
  "step": 490
322
  },
323
  {
324
+ "epoch": 2.42,
325
+ "learning_rate": 1.0611510791366908e-05,
326
+ "loss": 0.0038,
327
  "step": 500
328
  },
329
  {
330
+ "epoch": 2.47,
331
+ "learning_rate": 9.71223021582734e-06,
332
+ "loss": 0.0551,
333
  "step": 510
334
  },
335
  {
336
+ "epoch": 2.52,
337
+ "learning_rate": 8.812949640287769e-06,
338
+ "loss": 0.1015,
339
  "step": 520
340
  },
341
  {
342
+ "epoch": 2.57,
343
+ "learning_rate": 7.913669064748202e-06,
344
+ "loss": 0.0154,
345
  "step": 530
346
  },
347
  {
348
+ "epoch": 2.62,
349
+ "learning_rate": 7.014388489208633e-06,
350
+ "loss": 0.0531,
351
  "step": 540
352
  },
353
  {
354
+ "epoch": 2.66,
355
+ "learning_rate": 6.115107913669065e-06,
356
+ "loss": 0.0558,
357
  "step": 550
358
  },
359
  {
360
+ "epoch": 2.71,
361
+ "learning_rate": 5.215827338129497e-06,
362
+ "loss": 0.035,
363
  "step": 560
364
  },
365
  {
366
+ "epoch": 2.76,
367
+ "learning_rate": 4.316546762589929e-06,
368
+ "loss": 0.0335,
369
  "step": 570
370
  },
371
  {
372
+ "epoch": 2.81,
373
+ "learning_rate": 3.41726618705036e-06,
374
+ "loss": 0.0457,
375
  "step": 580
376
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  {
378
  "epoch": 2.86,
379
+ "learning_rate": 2.5179856115107916e-06,
380
+ "loss": 0.016,
381
+ "step": 590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  },
383
  {
384
  "epoch": 2.91,
385
+ "learning_rate": 1.618705035971223e-06,
386
+ "loss": 0.0124,
387
+ "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
388
  },
389
  {
390
  "epoch": 2.95,
391
+ "learning_rate": 7.194244604316547e-07,
392
+ "loss": 0.012,
393
+ "step": 610
 
 
 
 
 
 
394
  },
395
  {
396
+ "epoch": 2.99,
397
+ "eval_accuracy": 0.9931972789115646,
398
+ "eval_loss": 0.027184385806322098,
399
+ "eval_runtime": 13.1253,
400
+ "eval_samples_per_second": 55.999,
401
+ "eval_steps_per_second": 7.009,
402
+ "step": 618
403
  },
404
  {
405
  "epoch": 2.99,
406
+ "step": 618,
407
+ "total_flos": 4.9150232620631654e+17,
408
+ "train_loss": 0.0970435019064104,
409
+ "train_runtime": 528.4446,
410
+ "train_samples_per_second": 37.508,
411
+ "train_steps_per_second": 1.169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  }
413
  ],
414
  "logging_steps": 10,
415
+ "max_steps": 618,
416
  "num_train_epochs": 3,
417
  "save_steps": 500,
418
+ "total_flos": 4.9150232620631654e+17,
419
  "trial_name": null,
420
  "trial_params": null
421
  }