JEdward7777 commited on
Commit
85639ed
1 Parent(s): c4c17b5

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +5 -5
  3. train_results.json +6 -6
  4. trainer_state.json +308 -296
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 39.73,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.0415542908012867,
5
- "eval_runtime": 7.8769,
6
- "eval_samples_per_second": 4.697,
7
- "eval_steps_per_second": 0.254,
8
- "total_flos": 3.262961773565706e+17,
9
- "train_loss": 0.18748833239078522,
10
- "train_runtime": 7500.5041,
11
- "train_samples_per_second": 1.76,
12
- "train_steps_per_second": 0.011
13
  }
 
1
  {
2
+ "epoch": 40.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.04307302460074425,
5
+ "eval_runtime": 7.4924,
6
+ "eval_samples_per_second": 5.339,
7
+ "eval_steps_per_second": 0.267,
8
+ "total_flos": 3.579662466809856e+17,
9
+ "train_loss": 0.17003339926401775,
10
+ "train_runtime": 8221.5625,
11
+ "train_samples_per_second": 1.751,
12
+ "train_steps_per_second": 0.015
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.73,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.0415542908012867,
5
- "eval_runtime": 7.8769,
6
- "eval_samples_per_second": 4.697,
7
- "eval_steps_per_second": 0.254
8
  }
 
1
  {
2
+ "epoch": 40.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.04307302460074425,
5
+ "eval_runtime": 7.4924,
6
+ "eval_samples_per_second": 5.339,
7
+ "eval_steps_per_second": 0.267
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.73,
3
- "total_flos": 3.262961773565706e+17,
4
- "train_loss": 0.18748833239078522,
5
- "train_runtime": 7500.5041,
6
- "train_samples_per_second": 1.76,
7
- "train_steps_per_second": 0.011
8
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "total_flos": 3.579662466809856e+17,
4
+ "train_loss": 0.17003339926401775,
5
+ "train_runtime": 8221.5625,
6
+ "train_samples_per_second": 1.751,
7
+ "train_steps_per_second": 0.015
8
  }
trainer_state.json CHANGED
@@ -1,409 +1,421 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "delivery_truck_classification\\checkpoint-2",
4
- "epoch": 39.72727272727273,
5
- "global_step": 80,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.73,
12
- "eval_accuracy": 1.0,
13
- "eval_loss": 0.0415542908012867,
14
- "eval_runtime": 7.7969,
15
- "eval_samples_per_second": 4.745,
16
- "eval_steps_per_second": 0.257,
17
- "step": 2
 
 
 
 
 
 
 
 
 
18
  },
19
  {
20
- "epoch": 1.73,
21
- "eval_accuracy": 1.0,
22
- "eval_loss": 0.03460519760847092,
23
- "eval_runtime": 9.6408,
24
- "eval_samples_per_second": 3.838,
25
- "eval_steps_per_second": 0.207,
26
- "step": 4
27
  },
28
  {
29
- "epoch": 2.73,
30
- "eval_accuracy": 1.0,
31
- "eval_loss": 0.029270131140947342,
32
- "eval_runtime": 11.5968,
33
- "eval_samples_per_second": 3.191,
34
- "eval_steps_per_second": 0.172,
35
- "step": 6
36
  },
37
  {
38
- "epoch": 3.73,
39
- "eval_accuracy": 1.0,
40
- "eval_loss": 0.01861225999891758,
41
- "eval_runtime": 8.4881,
42
- "eval_samples_per_second": 4.359,
43
- "eval_steps_per_second": 0.236,
44
- "step": 8
45
  },
46
  {
47
- "epoch": 4.73,
48
  "eval_accuracy": 1.0,
49
- "eval_loss": 0.020498055964708328,
50
- "eval_runtime": 9.9982,
51
- "eval_samples_per_second": 3.701,
52
- "eval_steps_per_second": 0.2,
53
- "step": 10
54
  },
55
  {
56
- "epoch": 5.73,
57
- "eval_accuracy": 0.972972972972973,
58
- "eval_loss": 0.06043216958642006,
59
- "eval_runtime": 8.9914,
60
- "eval_samples_per_second": 4.115,
61
- "eval_steps_per_second": 0.222,
62
- "step": 12
63
  },
64
  {
65
- "epoch": 6.73,
66
  "eval_accuracy": 1.0,
67
- "eval_loss": 0.0332246832549572,
68
- "eval_runtime": 8.8254,
69
- "eval_samples_per_second": 4.192,
70
- "eval_steps_per_second": 0.227,
71
- "step": 14
72
  },
73
  {
74
- "epoch": 7.73,
75
  "eval_accuracy": 1.0,
76
- "eval_loss": 0.025004582479596138,
77
- "eval_runtime": 11.8616,
78
- "eval_samples_per_second": 3.119,
79
- "eval_steps_per_second": 0.169,
80
- "step": 16
81
  },
82
  {
83
- "epoch": 8.73,
84
  "eval_accuracy": 1.0,
85
- "eval_loss": 0.03864024579524994,
86
- "eval_runtime": 8.8843,
87
- "eval_samples_per_second": 4.165,
88
- "eval_steps_per_second": 0.225,
89
- "step": 18
90
  },
91
  {
92
- "epoch": 9.73,
93
- "learning_rate": 4.166666666666667e-05,
94
- "loss": 0.2483,
95
- "step": 20
 
 
 
96
  },
97
  {
98
- "epoch": 9.73,
99
  "eval_accuracy": 1.0,
100
- "eval_loss": 0.04379289597272873,
101
- "eval_runtime": 8.6752,
102
- "eval_samples_per_second": 4.265,
103
- "eval_steps_per_second": 0.231,
104
- "step": 20
105
  },
106
  {
107
- "epoch": 10.73,
108
  "eval_accuracy": 1.0,
109
- "eval_loss": 0.04468226432800293,
110
- "eval_runtime": 8.2704,
111
- "eval_samples_per_second": 4.474,
112
- "eval_steps_per_second": 0.242,
113
- "step": 22
114
  },
115
  {
116
- "epoch": 11.73,
117
- "eval_accuracy": 0.972972972972973,
118
- "eval_loss": 0.06756877154111862,
119
- "eval_runtime": 8.2231,
120
- "eval_samples_per_second": 4.5,
121
- "eval_steps_per_second": 0.243,
122
- "step": 24
123
  },
124
  {
125
- "epoch": 12.73,
126
- "eval_accuracy": 0.972972972972973,
127
- "eval_loss": 0.07861027866601944,
128
- "eval_runtime": 8.9079,
129
- "eval_samples_per_second": 4.154,
130
- "eval_steps_per_second": 0.225,
131
- "step": 26
132
  },
133
  {
134
- "epoch": 13.73,
135
- "eval_accuracy": 1.0,
136
- "eval_loss": 0.03886393457651138,
137
- "eval_runtime": 7.4091,
138
- "eval_samples_per_second": 4.994,
139
- "eval_steps_per_second": 0.27,
140
- "step": 28
141
  },
142
  {
143
- "epoch": 14.73,
144
- "eval_accuracy": 1.0,
145
- "eval_loss": 0.02784094214439392,
146
- "eval_runtime": 9.7376,
147
- "eval_samples_per_second": 3.8,
148
- "eval_steps_per_second": 0.205,
149
- "step": 30
150
  },
151
  {
152
- "epoch": 15.73,
153
- "eval_accuracy": 1.0,
154
- "eval_loss": 0.02497038058936596,
155
- "eval_runtime": 7.3256,
156
- "eval_samples_per_second": 5.051,
157
- "eval_steps_per_second": 0.273,
158
- "step": 32
159
  },
160
  {
161
- "epoch": 16.73,
162
  "eval_accuracy": 1.0,
163
- "eval_loss": 0.02828327752649784,
164
- "eval_runtime": 11.0099,
165
- "eval_samples_per_second": 3.361,
166
- "eval_steps_per_second": 0.182,
167
- "step": 34
168
  },
169
  {
170
- "epoch": 17.73,
171
- "eval_accuracy": 0.972972972972973,
172
- "eval_loss": 0.05021832138299942,
173
- "eval_runtime": 9.5468,
174
- "eval_samples_per_second": 3.876,
175
- "eval_steps_per_second": 0.209,
176
- "step": 36
177
  },
178
  {
179
- "epoch": 18.73,
180
- "eval_accuracy": 0.972972972972973,
181
- "eval_loss": 0.07113233208656311,
182
- "eval_runtime": 7.5069,
183
- "eval_samples_per_second": 4.929,
184
- "eval_steps_per_second": 0.266,
185
- "step": 38
186
  },
187
  {
188
- "epoch": 19.73,
189
  "learning_rate": 2.777777777777778e-05,
190
- "loss": 0.1759,
191
- "step": 40
192
  },
193
  {
194
- "epoch": 19.73,
195
- "eval_accuracy": 0.972972972972973,
196
- "eval_loss": 0.06368651241064072,
197
- "eval_runtime": 10.8688,
198
- "eval_samples_per_second": 3.404,
199
- "eval_steps_per_second": 0.184,
200
- "step": 40
201
  },
202
  {
203
- "epoch": 20.73,
204
  "eval_accuracy": 1.0,
205
- "eval_loss": 0.04590895399451256,
206
- "eval_runtime": 7.36,
207
- "eval_samples_per_second": 5.027,
208
- "eval_steps_per_second": 0.272,
209
- "step": 42
210
  },
211
  {
212
- "epoch": 21.73,
213
  "eval_accuracy": 1.0,
214
- "eval_loss": 0.03937483951449394,
215
- "eval_runtime": 7.6664,
216
- "eval_samples_per_second": 4.826,
217
- "eval_steps_per_second": 0.261,
218
- "step": 44
219
  },
220
  {
221
- "epoch": 22.73,
222
  "eval_accuracy": 1.0,
223
- "eval_loss": 0.04189879819750786,
224
- "eval_runtime": 7.6918,
225
- "eval_samples_per_second": 4.81,
226
- "eval_steps_per_second": 0.26,
227
- "step": 46
228
  },
229
  {
230
- "epoch": 23.73,
231
  "eval_accuracy": 1.0,
232
- "eval_loss": 0.042252812534570694,
233
- "eval_runtime": 9.2536,
234
- "eval_samples_per_second": 3.998,
235
- "eval_steps_per_second": 0.216,
236
- "step": 48
237
  },
238
  {
239
- "epoch": 24.73,
240
- "eval_accuracy": 0.972972972972973,
241
- "eval_loss": 0.046256761997938156,
242
- "eval_runtime": 8.404,
243
- "eval_samples_per_second": 4.403,
244
- "eval_steps_per_second": 0.238,
245
- "step": 50
246
  },
247
  {
248
- "epoch": 25.73,
249
- "eval_accuracy": 0.972972972972973,
250
- "eval_loss": 0.050322916358709335,
251
- "eval_runtime": 7.2832,
252
- "eval_samples_per_second": 5.08,
253
- "eval_steps_per_second": 0.275,
254
- "step": 52
255
  },
256
  {
257
- "epoch": 26.73,
258
- "eval_accuracy": 0.972972972972973,
259
- "eval_loss": 0.061594847589731216,
260
- "eval_runtime": 7.736,
261
- "eval_samples_per_second": 4.783,
262
- "eval_steps_per_second": 0.259,
263
- "step": 54
264
  },
265
  {
266
- "epoch": 27.73,
267
- "eval_accuracy": 0.972972972972973,
268
- "eval_loss": 0.0641237199306488,
269
- "eval_runtime": 8.3429,
270
- "eval_samples_per_second": 4.435,
271
- "eval_steps_per_second": 0.24,
272
- "step": 56
273
  },
274
  {
275
- "epoch": 28.73,
276
- "eval_accuracy": 0.972972972972973,
277
- "eval_loss": 0.05289805307984352,
278
- "eval_runtime": 6.6939,
279
- "eval_samples_per_second": 5.527,
280
- "eval_steps_per_second": 0.299,
281
- "step": 58
282
  },
283
  {
284
- "epoch": 29.73,
285
- "learning_rate": 1.388888888888889e-05,
286
- "loss": 0.1669,
287
- "step": 60
 
 
 
288
  },
289
  {
290
- "epoch": 29.73,
291
- "eval_accuracy": 0.972972972972973,
292
- "eval_loss": 0.048487674444913864,
293
- "eval_runtime": 8.2178,
294
- "eval_samples_per_second": 4.502,
295
- "eval_steps_per_second": 0.243,
296
- "step": 60
297
  },
298
  {
299
- "epoch": 30.73,
300
- "eval_accuracy": 0.972972972972973,
301
- "eval_loss": 0.04654627665877342,
302
- "eval_runtime": 9.0621,
303
- "eval_samples_per_second": 4.083,
304
- "eval_steps_per_second": 0.221,
305
- "step": 62
306
  },
307
  {
308
- "epoch": 31.73,
309
- "eval_accuracy": 0.972972972972973,
310
- "eval_loss": 0.045613404363393784,
311
- "eval_runtime": 7.5057,
312
- "eval_samples_per_second": 4.93,
313
- "eval_steps_per_second": 0.266,
314
- "step": 64
315
  },
316
  {
317
- "epoch": 32.73,
318
- "eval_accuracy": 0.972972972972973,
319
- "eval_loss": 0.047752730548381805,
320
- "eval_runtime": 7.6794,
321
- "eval_samples_per_second": 4.818,
322
- "eval_steps_per_second": 0.26,
323
- "step": 66
324
  },
325
  {
326
- "epoch": 33.73,
327
- "eval_accuracy": 0.972972972972973,
328
- "eval_loss": 0.0467178151011467,
329
- "eval_runtime": 7.7621,
330
- "eval_samples_per_second": 4.767,
331
- "eval_steps_per_second": 0.258,
332
- "step": 68
333
  },
334
  {
335
- "epoch": 34.73,
336
- "eval_accuracy": 0.972972972972973,
337
- "eval_loss": 0.04733948037028313,
338
- "eval_runtime": 8.0246,
339
- "eval_samples_per_second": 4.611,
340
- "eval_steps_per_second": 0.249,
341
- "step": 70
342
  },
343
  {
344
- "epoch": 35.73,
345
- "eval_accuracy": 0.972972972972973,
346
- "eval_loss": 0.048583876341581345,
347
- "eval_runtime": 8.5589,
348
- "eval_samples_per_second": 4.323,
349
- "eval_steps_per_second": 0.234,
350
- "step": 72
351
  },
352
  {
353
- "epoch": 36.73,
354
- "eval_accuracy": 0.972972972972973,
355
- "eval_loss": 0.05004884675145149,
356
- "eval_runtime": 7.4868,
357
- "eval_samples_per_second": 4.942,
358
- "eval_steps_per_second": 0.267,
359
- "step": 74
360
  },
361
  {
362
- "epoch": 37.73,
363
- "eval_accuracy": 0.972972972972973,
364
- "eval_loss": 0.0501551553606987,
365
- "eval_runtime": 8.2774,
366
- "eval_samples_per_second": 4.47,
367
- "eval_steps_per_second": 0.242,
368
- "step": 76
369
  },
370
  {
371
- "epoch": 38.73,
372
- "eval_accuracy": 0.972972972972973,
373
- "eval_loss": 0.05001495033502579,
374
- "eval_runtime": 7.8563,
375
- "eval_samples_per_second": 4.71,
376
- "eval_steps_per_second": 0.255,
377
- "step": 78
378
  },
379
  {
380
- "epoch": 39.73,
381
- "learning_rate": 0.0,
382
- "loss": 0.1589,
383
- "step": 80
 
 
 
384
  },
385
  {
386
- "epoch": 39.73,
387
- "eval_accuracy": 0.972972972972973,
388
- "eval_loss": 0.04929113760590553,
389
- "eval_runtime": 8.4253,
390
- "eval_samples_per_second": 4.392,
391
- "eval_steps_per_second": 0.237,
392
- "step": 80
393
  },
394
  {
395
- "epoch": 39.73,
396
- "step": 80,
397
- "total_flos": 3.262961773565706e+17,
398
- "train_loss": 0.18748833239078522,
399
- "train_runtime": 7500.5041,
400
- "train_samples_per_second": 1.76,
401
- "train_steps_per_second": 0.011
 
 
 
 
 
 
 
 
 
402
  }
403
  ],
404
- "max_steps": 80,
405
  "num_train_epochs": 40,
406
- "total_flos": 3.262961773565706e+17,
407
  "trial_name": null,
408
  "trial_params": null
409
  }
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "delivery_truck_classification\\checkpoint-18",
4
+ "epoch": 40.0,
5
+ "global_step": 120,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.95,
13
+ "eval_loss": 0.16261602938175201,
14
+ "eval_runtime": 8.6031,
15
+ "eval_samples_per_second": 4.649,
16
+ "eval_steps_per_second": 0.232,
17
+ "step": 3
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.95,
22
+ "eval_loss": 0.1592799872159958,
23
+ "eval_runtime": 8.1709,
24
+ "eval_samples_per_second": 4.895,
25
+ "eval_steps_per_second": 0.245,
26
+ "step": 6
27
  },
28
  {
29
+ "epoch": 3.0,
30
+ "eval_accuracy": 0.95,
31
+ "eval_loss": 0.13422292470932007,
32
+ "eval_runtime": 8.0249,
33
+ "eval_samples_per_second": 4.984,
34
+ "eval_steps_per_second": 0.249,
35
+ "step": 9
36
  },
37
  {
38
+ "epoch": 4.0,
39
+ "eval_accuracy": 0.975,
40
+ "eval_loss": 0.08705286681652069,
41
+ "eval_runtime": 8.0216,
42
+ "eval_samples_per_second": 4.987,
43
+ "eval_steps_per_second": 0.249,
44
+ "step": 12
45
  },
46
  {
47
+ "epoch": 5.0,
48
+ "eval_accuracy": 0.975,
49
+ "eval_loss": 0.06115349009633064,
50
+ "eval_runtime": 7.8977,
51
+ "eval_samples_per_second": 5.065,
52
+ "eval_steps_per_second": 0.253,
53
+ "step": 15
54
  },
55
  {
56
+ "epoch": 6.0,
57
  "eval_accuracy": 1.0,
58
+ "eval_loss": 0.04307302460074425,
59
+ "eval_runtime": 8.1309,
60
+ "eval_samples_per_second": 4.92,
61
+ "eval_steps_per_second": 0.246,
62
+ "step": 18
63
  },
64
  {
65
+ "epoch": 6.67,
66
+ "learning_rate": 4.62962962962963e-05,
67
+ "loss": 0.2745,
68
+ "step": 20
 
 
 
69
  },
70
  {
71
+ "epoch": 7.0,
72
  "eval_accuracy": 1.0,
73
+ "eval_loss": 0.03329644352197647,
74
+ "eval_runtime": 8.4823,
75
+ "eval_samples_per_second": 4.716,
76
+ "eval_steps_per_second": 0.236,
77
+ "step": 21
78
  },
79
  {
80
+ "epoch": 8.0,
81
  "eval_accuracy": 1.0,
82
+ "eval_loss": 0.048651862889528275,
83
+ "eval_runtime": 7.9701,
84
+ "eval_samples_per_second": 5.019,
85
+ "eval_steps_per_second": 0.251,
86
+ "step": 24
87
  },
88
  {
89
+ "epoch": 9.0,
90
  "eval_accuracy": 1.0,
91
+ "eval_loss": 0.045587461441755295,
92
+ "eval_runtime": 7.8557,
93
+ "eval_samples_per_second": 5.092,
94
+ "eval_steps_per_second": 0.255,
95
+ "step": 27
96
  },
97
  {
98
+ "epoch": 10.0,
99
+ "eval_accuracy": 1.0,
100
+ "eval_loss": 0.02726823464035988,
101
+ "eval_runtime": 8.1787,
102
+ "eval_samples_per_second": 4.891,
103
+ "eval_steps_per_second": 0.245,
104
+ "step": 30
105
  },
106
  {
107
+ "epoch": 11.0,
108
  "eval_accuracy": 1.0,
109
+ "eval_loss": 0.017963390797376633,
110
+ "eval_runtime": 8.3083,
111
+ "eval_samples_per_second": 4.814,
112
+ "eval_steps_per_second": 0.241,
113
+ "step": 33
114
  },
115
  {
116
+ "epoch": 12.0,
117
  "eval_accuracy": 1.0,
118
+ "eval_loss": 0.016819924116134644,
119
+ "eval_runtime": 8.1002,
120
+ "eval_samples_per_second": 4.938,
121
+ "eval_steps_per_second": 0.247,
122
+ "step": 36
123
  },
124
  {
125
+ "epoch": 13.0,
126
+ "eval_accuracy": 1.0,
127
+ "eval_loss": 0.031003978103399277,
128
+ "eval_runtime": 8.1225,
129
+ "eval_samples_per_second": 4.925,
130
+ "eval_steps_per_second": 0.246,
131
+ "step": 39
132
  },
133
  {
134
+ "epoch": 13.33,
135
+ "learning_rate": 3.7037037037037037e-05,
136
+ "loss": 0.1782,
137
+ "step": 40
 
 
 
138
  },
139
  {
140
+ "epoch": 14.0,
141
+ "eval_accuracy": 0.975,
142
+ "eval_loss": 0.043801963329315186,
143
+ "eval_runtime": 8.6445,
144
+ "eval_samples_per_second": 4.627,
145
+ "eval_steps_per_second": 0.231,
146
+ "step": 42
147
  },
148
  {
149
+ "epoch": 15.0,
150
+ "eval_accuracy": 0.975,
151
+ "eval_loss": 0.07503340393304825,
152
+ "eval_runtime": 7.8763,
153
+ "eval_samples_per_second": 5.079,
154
+ "eval_steps_per_second": 0.254,
155
+ "step": 45
156
  },
157
  {
158
+ "epoch": 16.0,
159
+ "eval_accuracy": 0.975,
160
+ "eval_loss": 0.03957166522741318,
161
+ "eval_runtime": 8.3356,
162
+ "eval_samples_per_second": 4.799,
163
+ "eval_steps_per_second": 0.24,
164
+ "step": 48
165
  },
166
  {
167
+ "epoch": 17.0,
168
  "eval_accuracy": 1.0,
169
+ "eval_loss": 0.017652835696935654,
170
+ "eval_runtime": 7.9389,
171
+ "eval_samples_per_second": 5.038,
172
+ "eval_steps_per_second": 0.252,
173
+ "step": 51
174
  },
175
  {
176
+ "epoch": 18.0,
177
+ "eval_accuracy": 1.0,
178
+ "eval_loss": 0.02170238085091114,
179
+ "eval_runtime": 8.0236,
180
+ "eval_samples_per_second": 4.985,
181
+ "eval_steps_per_second": 0.249,
182
+ "step": 54
183
  },
184
  {
185
+ "epoch": 19.0,
186
+ "eval_accuracy": 1.0,
187
+ "eval_loss": 0.011599823832511902,
188
+ "eval_runtime": 8.0424,
189
+ "eval_samples_per_second": 4.974,
190
+ "eval_steps_per_second": 0.249,
191
+ "step": 57
192
  },
193
  {
194
+ "epoch": 20.0,
195
  "learning_rate": 2.777777777777778e-05,
196
+ "loss": 0.1624,
197
+ "step": 60
198
  },
199
  {
200
+ "epoch": 20.0,
201
+ "eval_accuracy": 1.0,
202
+ "eval_loss": 0.008052630349993706,
203
+ "eval_runtime": 8.0111,
204
+ "eval_samples_per_second": 4.993,
205
+ "eval_steps_per_second": 0.25,
206
+ "step": 60
207
  },
208
  {
209
+ "epoch": 21.0,
210
  "eval_accuracy": 1.0,
211
+ "eval_loss": 0.006596912629902363,
212
+ "eval_runtime": 8.4146,
213
+ "eval_samples_per_second": 4.754,
214
+ "eval_steps_per_second": 0.238,
215
+ "step": 63
216
  },
217
  {
218
+ "epoch": 22.0,
219
  "eval_accuracy": 1.0,
220
+ "eval_loss": 0.008253499865531921,
221
+ "eval_runtime": 8.4625,
222
+ "eval_samples_per_second": 4.727,
223
+ "eval_steps_per_second": 0.236,
224
+ "step": 66
225
  },
226
  {
227
+ "epoch": 23.0,
228
  "eval_accuracy": 1.0,
229
+ "eval_loss": 0.012594206258654594,
230
+ "eval_runtime": 8.1933,
231
+ "eval_samples_per_second": 4.882,
232
+ "eval_steps_per_second": 0.244,
233
+ "step": 69
234
  },
235
  {
236
+ "epoch": 24.0,
237
  "eval_accuracy": 1.0,
238
+ "eval_loss": 0.015849163755774498,
239
+ "eval_runtime": 7.9982,
240
+ "eval_samples_per_second": 5.001,
241
+ "eval_steps_per_second": 0.25,
242
+ "step": 72
243
  },
244
  {
245
+ "epoch": 25.0,
246
+ "eval_accuracy": 1.0,
247
+ "eval_loss": 0.01883917860686779,
248
+ "eval_runtime": 7.9429,
249
+ "eval_samples_per_second": 5.036,
250
+ "eval_steps_per_second": 0.252,
251
+ "step": 75
252
  },
253
  {
254
+ "epoch": 26.0,
255
+ "eval_accuracy": 1.0,
256
+ "eval_loss": 0.014945434406399727,
257
+ "eval_runtime": 8.0615,
258
+ "eval_samples_per_second": 4.962,
259
+ "eval_steps_per_second": 0.248,
260
+ "step": 78
261
  },
262
  {
263
+ "epoch": 26.67,
264
+ "learning_rate": 1.8518518518518518e-05,
265
+ "loss": 0.1475,
266
+ "step": 80
 
 
 
267
  },
268
  {
269
+ "epoch": 27.0,
270
+ "eval_accuracy": 1.0,
271
+ "eval_loss": 0.010075708851218224,
272
+ "eval_runtime": 8.0344,
273
+ "eval_samples_per_second": 4.979,
274
+ "eval_steps_per_second": 0.249,
275
+ "step": 81
276
  },
277
  {
278
+ "epoch": 28.0,
279
+ "eval_accuracy": 1.0,
280
+ "eval_loss": 0.006395348347723484,
281
+ "eval_runtime": 7.9204,
282
+ "eval_samples_per_second": 5.05,
283
+ "eval_steps_per_second": 0.253,
284
+ "step": 84
285
  },
286
  {
287
+ "epoch": 29.0,
288
+ "eval_accuracy": 1.0,
289
+ "eval_loss": 0.004969631787389517,
290
+ "eval_runtime": 8.1818,
291
+ "eval_samples_per_second": 4.889,
292
+ "eval_steps_per_second": 0.244,
293
+ "step": 87
294
  },
295
  {
296
+ "epoch": 30.0,
297
+ "eval_accuracy": 1.0,
298
+ "eval_loss": 0.005222103092819452,
299
+ "eval_runtime": 7.9872,
300
+ "eval_samples_per_second": 5.008,
301
+ "eval_steps_per_second": 0.25,
302
+ "step": 90
303
  },
304
  {
305
+ "epoch": 31.0,
306
+ "eval_accuracy": 1.0,
307
+ "eval_loss": 0.006443561054766178,
308
+ "eval_runtime": 7.9514,
309
+ "eval_samples_per_second": 5.031,
310
+ "eval_steps_per_second": 0.252,
311
+ "step": 93
312
  },
313
  {
314
+ "epoch": 32.0,
315
+ "eval_accuracy": 1.0,
316
+ "eval_loss": 0.006998550146818161,
317
+ "eval_runtime": 7.9811,
318
+ "eval_samples_per_second": 5.012,
319
+ "eval_steps_per_second": 0.251,
320
+ "step": 96
321
  },
322
  {
323
+ "epoch": 33.0,
324
+ "eval_accuracy": 1.0,
325
+ "eval_loss": 0.006886245217174292,
326
+ "eval_runtime": 7.9804,
327
+ "eval_samples_per_second": 5.012,
328
+ "eval_steps_per_second": 0.251,
329
+ "step": 99
330
  },
331
  {
332
+ "epoch": 33.33,
333
+ "learning_rate": 9.259259259259259e-06,
334
+ "loss": 0.1345,
335
+ "step": 100
 
 
 
336
  },
337
  {
338
+ "epoch": 34.0,
339
+ "eval_accuracy": 1.0,
340
+ "eval_loss": 0.005939379800111055,
341
+ "eval_runtime": 7.962,
342
+ "eval_samples_per_second": 5.024,
343
+ "eval_steps_per_second": 0.251,
344
+ "step": 102
345
  },
346
  {
347
+ "epoch": 35.0,
348
+ "eval_accuracy": 1.0,
349
+ "eval_loss": 0.004927521105855703,
350
+ "eval_runtime": 7.9624,
351
+ "eval_samples_per_second": 5.024,
352
+ "eval_steps_per_second": 0.251,
353
+ "step": 105
354
  },
355
  {
356
+ "epoch": 36.0,
357
+ "eval_accuracy": 1.0,
358
+ "eval_loss": 0.004319839645177126,
359
+ "eval_runtime": 8.2724,
360
+ "eval_samples_per_second": 4.835,
361
+ "eval_steps_per_second": 0.242,
362
+ "step": 108
363
  },
364
  {
365
+ "epoch": 37.0,
366
+ "eval_accuracy": 1.0,
367
+ "eval_loss": 0.0039680288173258305,
368
+ "eval_runtime": 7.994,
369
+ "eval_samples_per_second": 5.004,
370
+ "eval_steps_per_second": 0.25,
371
+ "step": 111
372
  },
373
  {
374
+ "epoch": 38.0,
375
+ "eval_accuracy": 1.0,
376
+ "eval_loss": 0.003840196877717972,
377
+ "eval_runtime": 7.8877,
378
+ "eval_samples_per_second": 5.071,
379
+ "eval_steps_per_second": 0.254,
380
+ "step": 114
381
  },
382
  {
383
+ "epoch": 39.0,
384
+ "eval_accuracy": 1.0,
385
+ "eval_loss": 0.0038244160823524,
386
+ "eval_runtime": 8.0172,
387
+ "eval_samples_per_second": 4.989,
388
+ "eval_steps_per_second": 0.249,
389
+ "step": 117
390
  },
391
  {
392
+ "epoch": 40.0,
393
+ "learning_rate": 0.0,
394
+ "loss": 0.1232,
395
+ "step": 120
 
 
 
396
  },
397
  {
398
+ "epoch": 40.0,
399
+ "eval_accuracy": 1.0,
400
+ "eval_loss": 0.003828426357358694,
401
+ "eval_runtime": 7.9122,
402
+ "eval_samples_per_second": 5.055,
403
+ "eval_steps_per_second": 0.253,
404
+ "step": 120
405
+ },
406
+ {
407
+ "epoch": 40.0,
408
+ "step": 120,
409
+ "total_flos": 3.579662466809856e+17,
410
+ "train_loss": 0.17003339926401775,
411
+ "train_runtime": 8221.5625,
412
+ "train_samples_per_second": 1.751,
413
+ "train_steps_per_second": 0.015
414
  }
415
  ],
416
+ "max_steps": 120,
417
  "num_train_epochs": 40,
418
+ "total_flos": 3.579662466809856e+17,
419
  "trial_name": null,
420
  "trial_params": null
421
  }