andrecastro commited on
Commit
21de218
1 Parent(s): 8a2a6f1

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 2.99,
3
- "eval_accuracy": 0.9959183673469387,
4
- "eval_loss": 0.02241336554288864,
5
- "eval_runtime": 11.9967,
6
- "eval_samples_per_second": 61.267,
7
- "eval_steps_per_second": 7.669,
8
- "total_flos": 4.9150232620631654e+17,
9
- "train_loss": 0.0970435019064104,
10
- "train_runtime": 528.4446,
11
- "train_samples_per_second": 37.508,
12
- "train_steps_per_second": 1.169
13
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "eval_accuracy": 0.7149341806627326,
4
+ "eval_loss": 1.1311845779418945,
5
+ "eval_runtime": 48.1534,
6
+ "eval_samples_per_second": 45.75,
7
+ "eval_steps_per_second": 5.732,
8
+ "total_flos": 3.815394309328896e+17,
9
+ "train_loss": 0.09319638077480098,
10
+ "train_runtime": 491.2433,
11
+ "train_samples_per_second": 31.384,
12
+ "train_steps_per_second": 0.977
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.99,
3
- "eval_accuracy": 0.9959183673469387,
4
- "eval_loss": 0.02241336554288864,
5
- "eval_runtime": 11.9967,
6
- "eval_samples_per_second": 61.267,
7
- "eval_steps_per_second": 7.669
8
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "eval_accuracy": 0.7149341806627326,
4
+ "eval_loss": 1.1311845779418945,
5
+ "eval_runtime": 48.1534,
6
+ "eval_samples_per_second": 45.75,
7
+ "eval_steps_per_second": 5.732
8
  }
runs/Nov30_01-45-27_08b01d83a514/events.out.tfevents.1701309282.08b01d83a514.20755.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4aceb9125343f6442e7fd2c42aa8a2551882702524febc6d8a040dd98ee6997
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.99,
3
- "total_flos": 4.9150232620631654e+17,
4
- "train_loss": 0.0970435019064104,
5
- "train_runtime": 528.4446,
6
- "train_samples_per_second": 37.508,
7
- "train_steps_per_second": 1.169
8
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "total_flos": 3.815394309328896e+17,
4
+ "train_loss": 0.09319638077480098,
5
+ "train_runtime": 491.2433,
6
+ "train_samples_per_second": 31.384,
7
+ "train_steps_per_second": 0.977
8
  }
trainer_state.json CHANGED
@@ -1,421 +1,343 @@
1
  {
2
- "best_metric": 0.9959183673469387,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-413",
4
- "epoch": 2.9927360774818403,
5
  "eval_steps": 500,
6
- "global_step": 618,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.05,
13
- "learning_rate": 8.064516129032258e-06,
14
- "loss": 0.6891,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.1,
19
- "learning_rate": 1.6129032258064517e-05,
20
- "loss": 0.6032,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.15,
25
- "learning_rate": 2.4193548387096777e-05,
26
- "loss": 0.452,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.19,
31
- "learning_rate": 3.2258064516129034e-05,
32
- "loss": 0.3304,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.24,
37
- "learning_rate": 4.032258064516129e-05,
38
- "loss": 0.2486,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.29,
43
- "learning_rate": 4.8387096774193554e-05,
44
- "loss": 0.1912,
45
  "step": 60
46
  },
47
  {
48
- "epoch": 0.34,
49
- "learning_rate": 4.9280575539568345e-05,
50
- "loss": 0.1454,
51
  "step": 70
52
  },
53
  {
54
- "epoch": 0.39,
55
- "learning_rate": 4.838129496402878e-05,
56
- "loss": 0.1398,
57
  "step": 80
58
  },
59
  {
60
- "epoch": 0.44,
61
- "learning_rate": 4.748201438848921e-05,
62
- "loss": 0.1292,
63
  "step": 90
64
  },
65
  {
66
- "epoch": 0.48,
67
- "learning_rate": 4.658273381294964e-05,
68
- "loss": 0.1324,
69
  "step": 100
70
  },
71
  {
72
- "epoch": 0.53,
73
- "learning_rate": 4.5683453237410076e-05,
74
- "loss": 0.077,
75
  "step": 110
76
  },
77
  {
78
- "epoch": 0.58,
79
- "learning_rate": 4.47841726618705e-05,
80
- "loss": 0.0785,
81
  "step": 120
82
  },
83
  {
84
- "epoch": 0.63,
85
- "learning_rate": 4.3884892086330935e-05,
86
- "loss": 0.1228,
87
  "step": 130
88
  },
89
  {
90
- "epoch": 0.68,
91
- "learning_rate": 4.298561151079137e-05,
92
- "loss": 0.054,
93
  "step": 140
94
  },
95
  {
96
- "epoch": 0.73,
97
- "learning_rate": 4.20863309352518e-05,
98
- "loss": 0.1124,
99
  "step": 150
100
  },
101
  {
102
- "epoch": 0.77,
103
- "learning_rate": 4.1187050359712234e-05,
104
- "loss": 0.0875,
 
 
 
 
 
 
 
 
 
105
  "step": 160
106
  },
107
  {
108
- "epoch": 0.82,
109
- "learning_rate": 4.0287769784172666e-05,
110
- "loss": 0.1152,
111
  "step": 170
112
  },
113
  {
114
- "epoch": 0.87,
115
- "learning_rate": 3.938848920863309e-05,
116
- "loss": 0.107,
117
  "step": 180
118
  },
119
  {
120
- "epoch": 0.92,
121
- "learning_rate": 3.8489208633093525e-05,
122
- "loss": 0.0887,
123
  "step": 190
124
  },
125
  {
126
- "epoch": 0.97,
127
- "learning_rate": 3.758992805755396e-05,
128
- "loss": 0.0826,
129
  "step": 200
130
  },
131
  {
132
- "epoch": 1.0,
133
- "eval_accuracy": 0.9918367346938776,
134
- "eval_loss": 0.03725149855017662,
135
- "eval_runtime": 12.9921,
136
- "eval_samples_per_second": 56.573,
137
- "eval_steps_per_second": 7.081,
138
- "step": 206
139
- },
140
- {
141
- "epoch": 1.02,
142
- "learning_rate": 3.669064748201439e-05,
143
- "loss": 0.0654,
144
  "step": 210
145
  },
146
  {
147
- "epoch": 1.07,
148
- "learning_rate": 3.5791366906474824e-05,
149
- "loss": 0.0904,
150
  "step": 220
151
  },
152
  {
153
- "epoch": 1.11,
154
- "learning_rate": 3.489208633093525e-05,
155
- "loss": 0.0879,
156
  "step": 230
157
  },
158
  {
159
- "epoch": 1.16,
160
- "learning_rate": 3.399280575539568e-05,
161
- "loss": 0.0731,
162
  "step": 240
163
  },
164
  {
165
- "epoch": 1.21,
166
- "learning_rate": 3.3093525179856116e-05,
167
- "loss": 0.0715,
168
  "step": 250
169
  },
170
  {
171
- "epoch": 1.26,
172
- "learning_rate": 3.219424460431655e-05,
173
- "loss": 0.0513,
174
  "step": 260
175
  },
176
  {
177
- "epoch": 1.31,
178
- "learning_rate": 3.129496402877698e-05,
179
- "loss": 0.0458,
180
  "step": 270
181
  },
182
  {
183
- "epoch": 1.36,
184
- "learning_rate": 3.039568345323741e-05,
185
- "loss": 0.0795,
186
  "step": 280
187
  },
188
  {
189
- "epoch": 1.4,
190
- "learning_rate": 2.9496402877697844e-05,
191
- "loss": 0.0743,
192
  "step": 290
193
  },
194
  {
195
- "epoch": 1.45,
196
- "learning_rate": 2.8597122302158273e-05,
197
- "loss": 0.0943,
198
  "step": 300
199
  },
200
  {
201
- "epoch": 1.5,
202
- "learning_rate": 2.7697841726618706e-05,
203
- "loss": 0.0961,
204
  "step": 310
205
  },
206
  {
207
- "epoch": 1.55,
208
- "learning_rate": 2.679856115107914e-05,
209
- "loss": 0.0387,
210
  "step": 320
211
  },
212
  {
213
- "epoch": 1.6,
214
- "learning_rate": 2.589928057553957e-05,
215
- "loss": 0.0843,
 
 
 
 
 
 
 
 
 
216
  "step": 330
217
  },
218
  {
219
- "epoch": 1.65,
220
- "learning_rate": 2.5e-05,
221
- "loss": 0.0816,
222
  "step": 340
223
  },
224
  {
225
- "epoch": 1.69,
226
- "learning_rate": 2.4100719424460434e-05,
227
- "loss": 0.0263,
228
  "step": 350
229
  },
230
  {
231
- "epoch": 1.74,
232
- "learning_rate": 2.3201438848920864e-05,
233
- "loss": 0.0794,
234
  "step": 360
235
  },
236
  {
237
- "epoch": 1.79,
238
- "learning_rate": 2.2302158273381296e-05,
239
- "loss": 0.0321,
240
  "step": 370
241
  },
242
  {
243
- "epoch": 1.84,
244
- "learning_rate": 2.140287769784173e-05,
245
- "loss": 0.0087,
246
  "step": 380
247
  },
248
  {
249
- "epoch": 1.89,
250
- "learning_rate": 2.050359712230216e-05,
251
- "loss": 0.0621,
252
  "step": 390
253
  },
254
  {
255
- "epoch": 1.94,
256
- "learning_rate": 1.960431654676259e-05,
257
- "loss": 0.0501,
258
  "step": 400
259
  },
260
  {
261
- "epoch": 1.99,
262
- "learning_rate": 1.8705035971223024e-05,
263
- "loss": 0.0658,
264
  "step": 410
265
  },
266
  {
267
- "epoch": 2.0,
268
- "eval_accuracy": 0.9959183673469387,
269
- "eval_loss": 0.02241336554288864,
270
- "eval_runtime": 14.7711,
271
- "eval_samples_per_second": 49.759,
272
- "eval_steps_per_second": 6.228,
273
- "step": 413
274
- },
275
- {
276
- "epoch": 2.03,
277
- "learning_rate": 1.7805755395683454e-05,
278
- "loss": 0.0063,
279
  "step": 420
280
  },
281
  {
282
- "epoch": 2.08,
283
- "learning_rate": 1.6906474820143887e-05,
284
- "loss": 0.0207,
285
  "step": 430
286
  },
287
  {
288
- "epoch": 2.13,
289
- "learning_rate": 1.6007194244604316e-05,
290
- "loss": 0.0129,
291
  "step": 440
292
  },
293
  {
294
- "epoch": 2.18,
295
- "learning_rate": 1.5107913669064749e-05,
296
- "loss": 0.0116,
297
  "step": 450
298
  },
299
  {
300
- "epoch": 2.23,
301
- "learning_rate": 1.420863309352518e-05,
302
- "loss": 0.0049,
303
  "step": 460
304
  },
305
  {
306
- "epoch": 2.28,
307
- "learning_rate": 1.3309352517985613e-05,
308
- "loss": 0.0311,
309
  "step": 470
310
  },
311
  {
312
- "epoch": 2.32,
313
- "learning_rate": 1.2410071942446044e-05,
314
- "loss": 0.037,
315
  "step": 480
316
  },
317
- {
318
- "epoch": 2.37,
319
- "learning_rate": 1.1510791366906475e-05,
320
- "loss": 0.0572,
321
- "step": 490
322
- },
323
- {
324
- "epoch": 2.42,
325
- "learning_rate": 1.0611510791366908e-05,
326
- "loss": 0.0038,
327
- "step": 500
328
- },
329
- {
330
- "epoch": 2.47,
331
- "learning_rate": 9.71223021582734e-06,
332
- "loss": 0.0551,
333
- "step": 510
334
- },
335
- {
336
- "epoch": 2.52,
337
- "learning_rate": 8.812949640287769e-06,
338
- "loss": 0.1015,
339
- "step": 520
340
- },
341
- {
342
- "epoch": 2.57,
343
- "learning_rate": 7.913669064748202e-06,
344
- "loss": 0.0154,
345
- "step": 530
346
- },
347
- {
348
- "epoch": 2.62,
349
- "learning_rate": 7.014388489208633e-06,
350
- "loss": 0.0531,
351
- "step": 540
352
- },
353
- {
354
- "epoch": 2.66,
355
- "learning_rate": 6.115107913669065e-06,
356
- "loss": 0.0558,
357
- "step": 550
358
- },
359
- {
360
- "epoch": 2.71,
361
- "learning_rate": 5.215827338129497e-06,
362
- "loss": 0.035,
363
- "step": 560
364
- },
365
- {
366
- "epoch": 2.76,
367
- "learning_rate": 4.316546762589929e-06,
368
- "loss": 0.0335,
369
- "step": 570
370
- },
371
- {
372
- "epoch": 2.81,
373
- "learning_rate": 3.41726618705036e-06,
374
- "loss": 0.0457,
375
- "step": 580
376
- },
377
- {
378
- "epoch": 2.86,
379
- "learning_rate": 2.5179856115107916e-06,
380
- "loss": 0.016,
381
- "step": 590
382
- },
383
- {
384
- "epoch": 2.91,
385
- "learning_rate": 1.618705035971223e-06,
386
- "loss": 0.0124,
387
- "step": 600
388
- },
389
- {
390
- "epoch": 2.95,
391
- "learning_rate": 7.194244604316547e-07,
392
- "loss": 0.012,
393
- "step": 610
394
- },
395
  {
396
  "epoch": 2.99,
397
- "eval_accuracy": 0.9931972789115646,
398
- "eval_loss": 0.027184385806322098,
399
- "eval_runtime": 13.1253,
400
- "eval_samples_per_second": 55.999,
401
- "eval_steps_per_second": 7.009,
402
- "step": 618
403
  },
404
  {
405
  "epoch": 2.99,
406
- "step": 618,
407
- "total_flos": 4.9150232620631654e+17,
408
- "train_loss": 0.0970435019064104,
409
- "train_runtime": 528.4446,
410
- "train_samples_per_second": 37.508,
411
- "train_steps_per_second": 1.169
412
  }
413
  ],
414
  "logging_steps": 10,
415
- "max_steps": 618,
416
  "num_train_epochs": 3,
417
  "save_steps": 500,
418
- "total_flos": 4.9150232620631654e+17,
419
  "trial_name": null,
420
  "trial_params": null
421
  }
 
1
  {
2
+ "best_metric": 0.7149341806627326,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-480",
4
+ "epoch": 2.9860031104199067,
5
  "eval_steps": 500,
6
+ "global_step": 480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.06,
13
+ "learning_rate": 1.0416666666666668e-05,
14
+ "loss": 0.6441,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.12,
19
+ "learning_rate": 2.0833333333333336e-05,
20
+ "loss": 0.5084,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.19,
25
+ "learning_rate": 3.125e-05,
26
+ "loss": 0.3659,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.25,
31
+ "learning_rate": 4.166666666666667e-05,
32
+ "loss": 0.2162,
33
  "step": 40
34
  },
35
  {
36
+ "epoch": 0.31,
37
+ "learning_rate": 4.976851851851852e-05,
38
+ "loss": 0.1387,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.37,
43
+ "learning_rate": 4.8611111111111115e-05,
44
+ "loss": 0.1187,
45
  "step": 60
46
  },
47
  {
48
+ "epoch": 0.44,
49
+ "learning_rate": 4.745370370370371e-05,
50
+ "loss": 0.1003,
51
  "step": 70
52
  },
53
  {
54
+ "epoch": 0.5,
55
+ "learning_rate": 4.62962962962963e-05,
56
+ "loss": 0.104,
57
  "step": 80
58
  },
59
  {
60
+ "epoch": 0.56,
61
+ "learning_rate": 4.5138888888888894e-05,
62
+ "loss": 0.1448,
63
  "step": 90
64
  },
65
  {
66
+ "epoch": 0.62,
67
+ "learning_rate": 4.3981481481481486e-05,
68
+ "loss": 0.0851,
69
  "step": 100
70
  },
71
  {
72
+ "epoch": 0.68,
73
+ "learning_rate": 4.282407407407408e-05,
74
+ "loss": 0.0664,
75
  "step": 110
76
  },
77
  {
78
+ "epoch": 0.75,
79
+ "learning_rate": 4.166666666666667e-05,
80
+ "loss": 0.0775,
81
  "step": 120
82
  },
83
  {
84
+ "epoch": 0.81,
85
+ "learning_rate": 4.0509259259259265e-05,
86
+ "loss": 0.0996,
87
  "step": 130
88
  },
89
  {
90
+ "epoch": 0.87,
91
+ "learning_rate": 3.935185185185186e-05,
92
+ "loss": 0.0929,
93
  "step": 140
94
  },
95
  {
96
+ "epoch": 0.93,
97
+ "learning_rate": 3.8194444444444444e-05,
98
+ "loss": 0.0983,
99
  "step": 150
100
  },
101
  {
102
+ "epoch": 1.0,
103
+ "learning_rate": 3.7037037037037037e-05,
104
+ "loss": 0.1091,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 1.0,
109
+ "eval_accuracy": 0.634135270086246,
110
+ "eval_loss": 1.134523868560791,
111
+ "eval_runtime": 48.9231,
112
+ "eval_samples_per_second": 45.03,
113
+ "eval_steps_per_second": 5.642,
114
  "step": 160
115
  },
116
  {
117
+ "epoch": 1.06,
118
+ "learning_rate": 3.587962962962963e-05,
119
+ "loss": 0.1446,
120
  "step": 170
121
  },
122
  {
123
+ "epoch": 1.12,
124
+ "learning_rate": 3.472222222222222e-05,
125
+ "loss": 0.0648,
126
  "step": 180
127
  },
128
  {
129
+ "epoch": 1.18,
130
+ "learning_rate": 3.3564814814814815e-05,
131
+ "loss": 0.0704,
132
  "step": 190
133
  },
134
  {
135
+ "epoch": 1.24,
136
+ "learning_rate": 3.240740740740741e-05,
137
+ "loss": 0.1206,
138
  "step": 200
139
  },
140
  {
141
+ "epoch": 1.31,
142
+ "learning_rate": 3.125e-05,
143
+ "loss": 0.0428,
 
 
 
 
 
 
 
 
 
144
  "step": 210
145
  },
146
  {
147
+ "epoch": 1.37,
148
+ "learning_rate": 3.0092592592592593e-05,
149
+ "loss": 0.0271,
150
  "step": 220
151
  },
152
  {
153
+ "epoch": 1.43,
154
+ "learning_rate": 2.8935185185185186e-05,
155
+ "loss": 0.0786,
156
  "step": 230
157
  },
158
  {
159
+ "epoch": 1.49,
160
+ "learning_rate": 2.777777777777778e-05,
161
+ "loss": 0.0233,
162
  "step": 240
163
  },
164
  {
165
+ "epoch": 1.56,
166
+ "learning_rate": 2.6620370370370372e-05,
167
+ "loss": 0.01,
168
  "step": 250
169
  },
170
  {
171
+ "epoch": 1.62,
172
+ "learning_rate": 2.5462962962962965e-05,
173
+ "loss": 0.0297,
174
  "step": 260
175
  },
176
  {
177
+ "epoch": 1.68,
178
+ "learning_rate": 2.4305555555555558e-05,
179
+ "loss": 0.0598,
180
  "step": 270
181
  },
182
  {
183
+ "epoch": 1.74,
184
+ "learning_rate": 2.314814814814815e-05,
185
+ "loss": 0.0714,
186
  "step": 280
187
  },
188
  {
189
+ "epoch": 1.8,
190
+ "learning_rate": 2.1990740740740743e-05,
191
+ "loss": 0.1358,
192
  "step": 290
193
  },
194
  {
195
+ "epoch": 1.87,
196
+ "learning_rate": 2.0833333333333336e-05,
197
+ "loss": 0.0164,
198
  "step": 300
199
  },
200
  {
201
+ "epoch": 1.93,
202
+ "learning_rate": 1.967592592592593e-05,
203
+ "loss": 0.0072,
204
  "step": 310
205
  },
206
  {
207
+ "epoch": 1.99,
208
+ "learning_rate": 1.8518518518518518e-05,
209
+ "loss": 0.0426,
210
  "step": 320
211
  },
212
  {
213
+ "epoch": 2.0,
214
+ "eval_accuracy": 0.6627326373127553,
215
+ "eval_loss": 1.377668857574463,
216
+ "eval_runtime": 47.5961,
217
+ "eval_samples_per_second": 46.285,
218
+ "eval_steps_per_second": 5.799,
219
+ "step": 321
220
+ },
221
+ {
222
+ "epoch": 2.05,
223
+ "learning_rate": 1.736111111111111e-05,
224
+ "loss": 0.0439,
225
  "step": 330
226
  },
227
  {
228
+ "epoch": 2.12,
229
+ "learning_rate": 1.6203703703703704e-05,
230
+ "loss": 0.0253,
231
  "step": 340
232
  },
233
  {
234
+ "epoch": 2.18,
235
+ "learning_rate": 1.5046296296296297e-05,
236
+ "loss": 0.0197,
237
  "step": 350
238
  },
239
  {
240
+ "epoch": 2.24,
241
+ "learning_rate": 1.388888888888889e-05,
242
+ "loss": 0.0112,
243
  "step": 360
244
  },
245
  {
246
+ "epoch": 2.3,
247
+ "learning_rate": 1.2731481481481482e-05,
248
+ "loss": 0.0445,
249
  "step": 370
250
  },
251
  {
252
+ "epoch": 2.36,
253
+ "learning_rate": 1.1574074074074075e-05,
254
+ "loss": 0.0338,
255
  "step": 380
256
  },
257
  {
258
+ "epoch": 2.43,
259
+ "learning_rate": 1.0416666666666668e-05,
260
+ "loss": 0.0344,
261
  "step": 390
262
  },
263
  {
264
+ "epoch": 2.49,
265
+ "learning_rate": 9.259259259259259e-06,
266
+ "loss": 0.0226,
267
  "step": 400
268
  },
269
  {
270
+ "epoch": 2.55,
271
+ "learning_rate": 8.101851851851852e-06,
272
+ "loss": 0.0748,
273
  "step": 410
274
  },
275
  {
276
+ "epoch": 2.61,
277
+ "learning_rate": 6.944444444444445e-06,
278
+ "loss": 0.0099,
 
 
 
 
 
 
 
 
 
279
  "step": 420
280
  },
281
  {
282
+ "epoch": 2.67,
283
+ "learning_rate": 5.787037037037038e-06,
284
+ "loss": 0.0558,
285
  "step": 430
286
  },
287
  {
288
+ "epoch": 2.74,
289
+ "learning_rate": 4.6296296296296296e-06,
290
+ "loss": 0.0243,
291
  "step": 440
292
  },
293
  {
294
+ "epoch": 2.8,
295
+ "learning_rate": 3.4722222222222224e-06,
296
+ "loss": 0.0239,
297
  "step": 450
298
  },
299
  {
300
+ "epoch": 2.86,
301
+ "learning_rate": 2.3148148148148148e-06,
302
+ "loss": 0.0645,
303
  "step": 460
304
  },
305
  {
306
+ "epoch": 2.92,
307
+ "learning_rate": 1.1574074074074074e-06,
308
+ "loss": 0.0547,
309
  "step": 470
310
  },
311
  {
312
+ "epoch": 2.99,
313
+ "learning_rate": 0.0,
314
+ "loss": 0.0151,
315
  "step": 480
316
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  {
318
  "epoch": 2.99,
319
+ "eval_accuracy": 0.7149341806627326,
320
+ "eval_loss": 1.1311845779418945,
321
+ "eval_runtime": 51.1784,
322
+ "eval_samples_per_second": 43.045,
323
+ "eval_steps_per_second": 5.393,
324
+ "step": 480
325
  },
326
  {
327
  "epoch": 2.99,
328
+ "step": 480,
329
+ "total_flos": 3.815394309328896e+17,
330
+ "train_loss": 0.09319638077480098,
331
+ "train_runtime": 491.2433,
332
+ "train_samples_per_second": 31.384,
333
+ "train_steps_per_second": 0.977
334
  }
335
  ],
336
  "logging_steps": 10,
337
+ "max_steps": 480,
338
  "num_train_epochs": 3,
339
  "save_steps": 500,
340
+ "total_flos": 3.815394309328896e+17,
341
  "trial_name": null,
342
  "trial_params": null
343
  }