JEdward7777 commited on
Commit
ea59389
1 Parent(s): fa20d62

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +331 -319
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 39.57,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.21797223389148712,
5
- "eval_runtime": 3.8754,
6
- "eval_samples_per_second": 5.677,
7
- "eval_steps_per_second": 0.258,
8
- "total_flos": 1.9417183005730406e+17,
9
- "train_loss": 1.3339122772216796,
10
- "train_runtime": 965.4815,
11
- "train_samples_per_second": 8.162,
12
- "train_steps_per_second": 0.041
13
  }
 
1
  {
2
+ "epoch": 39.8,
3
+ "eval_accuracy": 0.9714285714285714,
4
+ "eval_loss": 0.1382756382226944,
5
+ "eval_runtime": 6.6641,
6
+ "eval_samples_per_second": 5.252,
7
+ "eval_steps_per_second": 0.3,
8
+ "total_flos": 3.088453228308726e+17,
9
+ "train_loss": 0.15159874260425568,
10
+ "train_runtime": 6474.8036,
11
+ "train_samples_per_second": 1.927,
12
+ "train_steps_per_second": 0.012
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.57,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.21797223389148712,
5
- "eval_runtime": 3.8754,
6
- "eval_samples_per_second": 5.677,
7
- "eval_steps_per_second": 0.258
8
  }
 
1
  {
2
+ "epoch": 39.8,
3
+ "eval_accuracy": 0.9714285714285714,
4
+ "eval_loss": 0.1382756382226944,
5
+ "eval_runtime": 6.6641,
6
+ "eval_samples_per_second": 5.252,
7
+ "eval_steps_per_second": 0.3
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.57,
3
- "total_flos": 1.9417183005730406e+17,
4
- "train_loss": 1.3339122772216796,
5
- "train_runtime": 965.4815,
6
- "train_samples_per_second": 8.162,
7
- "train_steps_per_second": 0.041
8
  }
 
1
  {
2
+ "epoch": 39.8,
3
+ "total_flos": 3.088453228308726e+17,
4
+ "train_loss": 0.15159874260425568,
5
+ "train_runtime": 6474.8036,
6
+ "train_samples_per_second": 1.927,
7
+ "train_steps_per_second": 0.012
8
  }
trainer_state.json CHANGED
@@ -1,397 +1,409 @@
1
  {
2
- "best_metric": 0.9090909090909091,
3
- "best_model_checkpoint": "delivery_truck_classification/checkpoint-21",
4
- "epoch": 39.57142857142857,
5
- "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.57,
12
- "eval_accuracy": 0.13636363636363635,
13
- "eval_loss": 1.9638466835021973,
14
- "eval_runtime": 1.6283,
15
- "eval_samples_per_second": 13.511,
16
- "eval_steps_per_second": 0.614,
17
- "step": 1
18
- },
19
- {
20
- "epoch": 1.57,
21
- "eval_accuracy": 0.09090909090909091,
22
- "eval_loss": 1.9021785259246826,
23
- "eval_runtime": 1.7599,
24
- "eval_samples_per_second": 12.501,
25
- "eval_steps_per_second": 0.568,
26
  "step": 2
27
  },
28
  {
29
- "epoch": 2.57,
30
- "eval_accuracy": 0.09090909090909091,
31
- "eval_loss": 1.7954376935958862,
32
- "eval_runtime": 1.7675,
33
- "eval_samples_per_second": 12.447,
34
- "eval_steps_per_second": 0.566,
35
- "step": 3
36
- },
37
- {
38
- "epoch": 3.57,
39
- "eval_accuracy": 0.36363636363636365,
40
- "eval_loss": 1.646571397781372,
41
- "eval_runtime": 1.8048,
42
- "eval_samples_per_second": 12.19,
43
- "eval_steps_per_second": 0.554,
44
  "step": 4
45
  },
46
  {
47
- "epoch": 4.57,
48
- "eval_accuracy": 0.5,
49
- "eval_loss": 1.516127347946167,
50
- "eval_runtime": 1.8376,
51
- "eval_samples_per_second": 11.972,
52
- "eval_steps_per_second": 0.544,
53
- "step": 5
54
- },
55
- {
56
- "epoch": 5.57,
57
- "eval_accuracy": 0.5454545454545454,
58
- "eval_loss": 1.4261207580566406,
59
- "eval_runtime": 1.6096,
60
- "eval_samples_per_second": 13.668,
61
- "eval_steps_per_second": 0.621,
62
  "step": 6
63
  },
64
  {
65
- "epoch": 6.57,
66
- "eval_accuracy": 0.5454545454545454,
67
- "eval_loss": 1.3546966314315796,
68
- "eval_runtime": 1.568,
69
- "eval_samples_per_second": 14.03,
70
- "eval_steps_per_second": 0.638,
71
- "step": 7
72
- },
73
- {
74
- "epoch": 7.57,
75
- "eval_accuracy": 0.6363636363636364,
76
- "eval_loss": 1.2797943353652954,
77
- "eval_runtime": 1.8043,
78
- "eval_samples_per_second": 12.193,
79
- "eval_steps_per_second": 0.554,
80
  "step": 8
81
  },
82
  {
83
- "epoch": 8.57,
84
- "eval_accuracy": 0.6363636363636364,
85
- "eval_loss": 1.2200274467468262,
86
- "eval_runtime": 1.811,
87
- "eval_samples_per_second": 12.148,
88
- "eval_steps_per_second": 0.552,
89
- "step": 9
90
- },
91
- {
92
- "epoch": 9.57,
93
- "eval_accuracy": 0.6363636363636364,
94
- "eval_loss": 1.1594452857971191,
95
- "eval_runtime": 1.803,
96
- "eval_samples_per_second": 12.202,
97
- "eval_steps_per_second": 0.555,
98
  "step": 10
99
  },
100
  {
101
- "epoch": 10.57,
102
- "eval_accuracy": 0.6818181818181818,
103
- "eval_loss": 1.1154121160507202,
104
- "eval_runtime": 1.7747,
105
- "eval_samples_per_second": 12.397,
106
- "eval_steps_per_second": 0.563,
107
- "step": 11
108
- },
109
- {
110
- "epoch": 11.57,
111
- "eval_accuracy": 0.6818181818181818,
112
- "eval_loss": 1.07809579372406,
113
- "eval_runtime": 1.6416,
114
- "eval_samples_per_second": 13.401,
115
- "eval_steps_per_second": 0.609,
116
  "step": 12
117
  },
118
  {
119
- "epoch": 12.57,
120
- "eval_accuracy": 0.6818181818181818,
121
- "eval_loss": 1.0285967588424683,
122
- "eval_runtime": 1.7863,
123
- "eval_samples_per_second": 12.316,
124
- "eval_steps_per_second": 0.56,
125
- "step": 13
126
- },
127
- {
128
- "epoch": 13.57,
129
- "eval_accuracy": 0.6818181818181818,
130
- "eval_loss": 0.9623335003852844,
131
- "eval_runtime": 1.8244,
132
- "eval_samples_per_second": 12.059,
133
- "eval_steps_per_second": 0.548,
134
  "step": 14
135
  },
136
  {
137
- "epoch": 14.57,
138
- "eval_accuracy": 0.6818181818181818,
139
- "eval_loss": 0.8952316641807556,
140
- "eval_runtime": 1.8255,
141
- "eval_samples_per_second": 12.052,
142
- "eval_steps_per_second": 0.548,
143
- "step": 15
144
- },
145
- {
146
- "epoch": 15.57,
147
- "eval_accuracy": 0.7272727272727273,
148
- "eval_loss": 0.8217518329620361,
149
- "eval_runtime": 1.8059,
150
- "eval_samples_per_second": 12.182,
151
- "eval_steps_per_second": 0.554,
152
  "step": 16
153
  },
154
  {
155
- "epoch": 16.57,
156
- "eval_accuracy": 0.7727272727272727,
157
- "eval_loss": 0.7331055998802185,
158
- "eval_runtime": 1.6554,
159
- "eval_samples_per_second": 13.29,
160
- "eval_steps_per_second": 0.604,
161
- "step": 17
162
- },
163
- {
164
- "epoch": 17.57,
165
- "eval_accuracy": 0.8181818181818182,
166
- "eval_loss": 0.6525326371192932,
167
- "eval_runtime": 1.8033,
168
- "eval_samples_per_second": 12.2,
169
- "eval_steps_per_second": 0.555,
170
  "step": 18
171
  },
172
  {
173
- "epoch": 18.57,
174
- "eval_accuracy": 0.8636363636363636,
175
- "eval_loss": 0.5678051710128784,
176
- "eval_runtime": 1.7903,
177
- "eval_samples_per_second": 12.288,
178
- "eval_steps_per_second": 0.559,
179
- "step": 19
180
- },
181
- {
182
- "epoch": 19.57,
183
- "learning_rate": 2.777777777777778e-05,
184
- "loss": 1.9399,
185
  "step": 20
186
  },
187
  {
188
- "epoch": 19.57,
189
- "eval_accuracy": 0.8636363636363636,
190
- "eval_loss": 0.497961163520813,
191
- "eval_runtime": 1.8331,
192
- "eval_samples_per_second": 12.002,
193
- "eval_steps_per_second": 0.546,
194
  "step": 20
195
  },
196
  {
197
- "epoch": 20.57,
198
- "eval_accuracy": 0.9090909090909091,
199
- "eval_loss": 0.46136438846588135,
200
- "eval_runtime": 1.8006,
201
- "eval_samples_per_second": 12.218,
202
- "eval_steps_per_second": 0.555,
203
- "step": 21
204
- },
205
- {
206
- "epoch": 21.57,
207
- "eval_accuracy": 0.9090909090909091,
208
- "eval_loss": 0.4493587613105774,
209
- "eval_runtime": 1.6389,
210
- "eval_samples_per_second": 13.424,
211
- "eval_steps_per_second": 0.61,
212
  "step": 22
213
  },
214
  {
215
- "epoch": 22.57,
216
- "eval_accuracy": 0.8181818181818182,
217
- "eval_loss": 0.4404635727405548,
218
- "eval_runtime": 1.8581,
219
- "eval_samples_per_second": 11.84,
220
- "eval_steps_per_second": 0.538,
221
- "step": 23
222
- },
223
- {
224
- "epoch": 23.57,
225
- "eval_accuracy": 0.8636363636363636,
226
- "eval_loss": 0.4357987344264984,
227
- "eval_runtime": 1.7864,
228
- "eval_samples_per_second": 12.315,
229
- "eval_steps_per_second": 0.56,
230
  "step": 24
231
  },
232
  {
233
- "epoch": 24.57,
234
- "eval_accuracy": 0.8636363636363636,
235
- "eval_loss": 0.43069612979888916,
236
- "eval_runtime": 1.804,
237
- "eval_samples_per_second": 12.195,
238
- "eval_steps_per_second": 0.554,
239
- "step": 25
240
- },
241
- {
242
- "epoch": 25.57,
243
- "eval_accuracy": 0.8636363636363636,
244
- "eval_loss": 0.4099968671798706,
245
- "eval_runtime": 1.7489,
246
- "eval_samples_per_second": 12.579,
247
- "eval_steps_per_second": 0.572,
248
  "step": 26
249
  },
250
  {
251
- "epoch": 26.57,
252
- "eval_accuracy": 0.8181818181818182,
253
- "eval_loss": 0.3925555944442749,
254
- "eval_runtime": 1.6257,
255
- "eval_samples_per_second": 13.533,
256
- "eval_steps_per_second": 0.615,
257
- "step": 27
258
- },
259
- {
260
- "epoch": 27.57,
261
- "eval_accuracy": 0.8181818181818182,
262
- "eval_loss": 0.3818438649177551,
263
- "eval_runtime": 1.8073,
264
- "eval_samples_per_second": 12.173,
265
- "eval_steps_per_second": 0.553,
266
  "step": 28
267
  },
268
  {
269
- "epoch": 28.57,
270
- "eval_accuracy": 0.8181818181818182,
271
- "eval_loss": 0.366110235452652,
272
- "eval_runtime": 1.7879,
273
- "eval_samples_per_second": 12.305,
274
- "eval_steps_per_second": 0.559,
275
- "step": 29
276
- },
277
- {
278
- "epoch": 29.57,
279
- "eval_accuracy": 0.8636363636363636,
280
- "eval_loss": 0.35147225856781006,
281
- "eval_runtime": 1.8109,
282
- "eval_samples_per_second": 12.149,
283
- "eval_steps_per_second": 0.552,
284
  "step": 30
285
  },
286
  {
287
- "epoch": 30.57,
288
- "eval_accuracy": 0.8636363636363636,
289
- "eval_loss": 0.3345157206058502,
290
- "eval_runtime": 1.8479,
291
- "eval_samples_per_second": 11.905,
292
- "eval_steps_per_second": 0.541,
293
- "step": 31
294
- },
295
- {
296
- "epoch": 31.57,
297
- "eval_accuracy": 0.8636363636363636,
298
- "eval_loss": 0.32043513655662537,
299
- "eval_runtime": 1.599,
300
- "eval_samples_per_second": 13.759,
301
- "eval_steps_per_second": 0.625,
302
  "step": 32
303
  },
304
  {
305
- "epoch": 32.57,
306
- "eval_accuracy": 0.8636363636363636,
307
- "eval_loss": 0.3077632486820221,
308
- "eval_runtime": 1.8226,
309
- "eval_samples_per_second": 12.071,
310
- "eval_steps_per_second": 0.549,
311
- "step": 33
312
- },
313
- {
314
- "epoch": 33.57,
315
- "eval_accuracy": 0.8636363636363636,
316
- "eval_loss": 0.2948474586009979,
317
- "eval_runtime": 1.7885,
318
- "eval_samples_per_second": 12.301,
319
- "eval_steps_per_second": 0.559,
320
  "step": 34
321
  },
322
  {
323
- "epoch": 34.57,
324
- "eval_accuracy": 0.8636363636363636,
325
- "eval_loss": 0.2848185896873474,
326
- "eval_runtime": 1.8362,
327
- "eval_samples_per_second": 11.981,
328
- "eval_steps_per_second": 0.545,
329
- "step": 35
330
- },
331
- {
332
- "epoch": 35.57,
333
- "eval_accuracy": 0.8636363636363636,
334
- "eval_loss": 0.2748388946056366,
335
- "eval_runtime": 1.8319,
336
- "eval_samples_per_second": 12.009,
337
- "eval_steps_per_second": 0.546,
338
  "step": 36
339
  },
340
  {
341
- "epoch": 36.57,
342
- "eval_accuracy": 0.8636363636363636,
343
- "eval_loss": 0.2679346203804016,
344
- "eval_runtime": 1.6038,
345
- "eval_samples_per_second": 13.718,
346
- "eval_steps_per_second": 0.624,
347
- "step": 37
348
- },
349
- {
350
- "epoch": 37.57,
351
- "eval_accuracy": 0.8636363636363636,
352
- "eval_loss": 0.26424679160118103,
353
- "eval_runtime": 1.8361,
354
- "eval_samples_per_second": 11.982,
355
- "eval_steps_per_second": 0.545,
356
  "step": 38
357
  },
358
  {
359
- "epoch": 38.57,
360
- "eval_accuracy": 0.8636363636363636,
361
- "eval_loss": 0.2638870179653168,
362
- "eval_runtime": 1.7769,
363
- "eval_samples_per_second": 12.381,
364
- "eval_steps_per_second": 0.563,
365
- "step": 39
366
- },
367
- {
368
- "epoch": 39.57,
369
- "learning_rate": 0.0,
370
- "loss": 0.728,
371
  "step": 40
372
  },
373
  {
374
- "epoch": 39.57,
375
- "eval_accuracy": 0.8636363636363636,
376
- "eval_loss": 0.26363667845726013,
377
- "eval_runtime": 1.7373,
378
- "eval_samples_per_second": 12.663,
379
- "eval_steps_per_second": 0.576,
380
  "step": 40
381
  },
382
  {
383
- "epoch": 39.57,
384
- "step": 40,
385
- "total_flos": 1.9417183005730406e+17,
386
- "train_loss": 1.3339122772216796,
387
- "train_runtime": 965.4815,
388
- "train_samples_per_second": 8.162,
389
- "train_steps_per_second": 0.041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  }
391
  ],
392
- "max_steps": 40,
393
  "num_train_epochs": 40,
394
- "total_flos": 1.9417183005730406e+17,
395
  "trial_name": null,
396
  "trial_params": null
397
  }
 
1
  {
2
+ "best_metric": 0.9714285714285714,
3
+ "best_model_checkpoint": "delivery_truck_classification\\checkpoint-4",
4
+ "epoch": 39.8,
5
+ "global_step": 80,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.8,
12
+ "eval_accuracy": 0.9428571428571428,
13
+ "eval_loss": 0.19194960594177246,
14
+ "eval_runtime": 8.8411,
15
+ "eval_samples_per_second": 3.959,
16
+ "eval_steps_per_second": 0.226,
 
 
 
 
 
 
 
 
 
17
  "step": 2
18
  },
19
  {
20
+ "epoch": 1.8,
21
+ "eval_accuracy": 0.9714285714285714,
22
+ "eval_loss": 0.1382756382226944,
23
+ "eval_runtime": 7.2367,
24
+ "eval_samples_per_second": 4.836,
25
+ "eval_steps_per_second": 0.276,
 
 
 
 
 
 
 
 
 
26
  "step": 4
27
  },
28
  {
29
+ "epoch": 2.8,
30
+ "eval_accuracy": 0.9142857142857143,
31
+ "eval_loss": 0.19303296506404877,
32
+ "eval_runtime": 7.0677,
33
+ "eval_samples_per_second": 4.952,
34
+ "eval_steps_per_second": 0.283,
 
 
 
 
 
 
 
 
 
35
  "step": 6
36
  },
37
  {
38
+ "epoch": 3.8,
39
+ "eval_accuracy": 0.9714285714285714,
40
+ "eval_loss": 0.1462627798318863,
41
+ "eval_runtime": 7.1631,
42
+ "eval_samples_per_second": 4.886,
43
+ "eval_steps_per_second": 0.279,
 
 
 
 
 
 
 
 
 
44
  "step": 8
45
  },
46
  {
47
+ "epoch": 4.8,
48
+ "eval_accuracy": 0.9714285714285714,
49
+ "eval_loss": 0.1735035479068756,
50
+ "eval_runtime": 6.91,
51
+ "eval_samples_per_second": 5.065,
52
+ "eval_steps_per_second": 0.289,
 
 
 
 
 
 
 
 
 
53
  "step": 10
54
  },
55
  {
56
+ "epoch": 5.8,
57
+ "eval_accuracy": 0.9714285714285714,
58
+ "eval_loss": 0.1692284345626831,
59
+ "eval_runtime": 6.8605,
60
+ "eval_samples_per_second": 5.102,
61
+ "eval_steps_per_second": 0.292,
 
 
 
 
 
 
 
 
 
62
  "step": 12
63
  },
64
  {
65
+ "epoch": 6.8,
66
+ "eval_accuracy": 0.9714285714285714,
67
+ "eval_loss": 0.1626075655221939,
68
+ "eval_runtime": 6.8284,
69
+ "eval_samples_per_second": 5.126,
70
+ "eval_steps_per_second": 0.293,
 
 
 
 
 
 
 
 
 
71
  "step": 14
72
  },
73
  {
74
+ "epoch": 7.8,
75
+ "eval_accuracy": 0.9714285714285714,
76
+ "eval_loss": 0.16586509346961975,
77
+ "eval_runtime": 6.3075,
78
+ "eval_samples_per_second": 5.549,
79
+ "eval_steps_per_second": 0.317,
 
 
 
 
 
 
 
 
 
80
  "step": 16
81
  },
82
  {
83
+ "epoch": 8.8,
84
+ "eval_accuracy": 0.9714285714285714,
85
+ "eval_loss": 0.1622493714094162,
86
+ "eval_runtime": 6.8493,
87
+ "eval_samples_per_second": 5.11,
88
+ "eval_steps_per_second": 0.292,
 
 
 
 
 
 
 
 
 
89
  "step": 18
90
  },
91
  {
92
+ "epoch": 9.8,
93
+ "learning_rate": 4.166666666666667e-05,
94
+ "loss": 0.2046,
 
 
 
 
 
 
 
 
 
95
  "step": 20
96
  },
97
  {
98
+ "epoch": 9.8,
99
+ "eval_accuracy": 0.9714285714285714,
100
+ "eval_loss": 0.1598205715417862,
101
+ "eval_runtime": 6.8584,
102
+ "eval_samples_per_second": 5.103,
103
+ "eval_steps_per_second": 0.292,
104
  "step": 20
105
  },
106
  {
107
+ "epoch": 10.8,
108
+ "eval_accuracy": 0.9714285714285714,
109
+ "eval_loss": 0.16681121289730072,
110
+ "eval_runtime": 7.2746,
111
+ "eval_samples_per_second": 4.811,
112
+ "eval_steps_per_second": 0.275,
 
 
 
 
 
 
 
 
 
113
  "step": 22
114
  },
115
  {
116
+ "epoch": 11.8,
117
+ "eval_accuracy": 0.9714285714285714,
118
+ "eval_loss": 0.1746995747089386,
119
+ "eval_runtime": 7.1258,
120
+ "eval_samples_per_second": 4.912,
121
+ "eval_steps_per_second": 0.281,
 
 
 
 
 
 
 
 
 
122
  "step": 24
123
  },
124
  {
125
+ "epoch": 12.8,
126
+ "eval_accuracy": 0.9714285714285714,
127
+ "eval_loss": 0.18037545680999756,
128
+ "eval_runtime": 6.9863,
129
+ "eval_samples_per_second": 5.01,
130
+ "eval_steps_per_second": 0.286,
 
 
 
 
 
 
 
 
 
131
  "step": 26
132
  },
133
  {
134
+ "epoch": 13.8,
135
+ "eval_accuracy": 0.9714285714285714,
136
+ "eval_loss": 0.18370455503463745,
137
+ "eval_runtime": 6.1139,
138
+ "eval_samples_per_second": 5.725,
139
+ "eval_steps_per_second": 0.327,
 
 
 
 
 
 
 
 
 
140
  "step": 28
141
  },
142
  {
143
+ "epoch": 14.8,
144
+ "eval_accuracy": 0.9714285714285714,
145
+ "eval_loss": 0.1837157905101776,
146
+ "eval_runtime": 6.7444,
147
+ "eval_samples_per_second": 5.189,
148
+ "eval_steps_per_second": 0.297,
 
 
 
 
 
 
 
 
 
149
  "step": 30
150
  },
151
  {
152
+ "epoch": 15.8,
153
+ "eval_accuracy": 0.9714285714285714,
154
+ "eval_loss": 0.18111634254455566,
155
+ "eval_runtime": 7.0847,
156
+ "eval_samples_per_second": 4.94,
157
+ "eval_steps_per_second": 0.282,
 
 
 
 
 
 
 
 
 
158
  "step": 32
159
  },
160
  {
161
+ "epoch": 16.8,
162
+ "eval_accuracy": 0.9714285714285714,
163
+ "eval_loss": 0.18009454011917114,
164
+ "eval_runtime": 6.8211,
165
+ "eval_samples_per_second": 5.131,
166
+ "eval_steps_per_second": 0.293,
 
 
 
 
 
 
 
 
 
167
  "step": 34
168
  },
169
  {
170
+ "epoch": 17.8,
171
+ "eval_accuracy": 0.9714285714285714,
172
+ "eval_loss": 0.18408751487731934,
173
+ "eval_runtime": 6.9341,
174
+ "eval_samples_per_second": 5.048,
175
+ "eval_steps_per_second": 0.288,
 
 
 
 
 
 
 
 
 
176
  "step": 36
177
  },
178
  {
179
+ "epoch": 18.8,
180
+ "eval_accuracy": 0.9714285714285714,
181
+ "eval_loss": 0.1899442970752716,
182
+ "eval_runtime": 7.0358,
183
+ "eval_samples_per_second": 4.975,
184
+ "eval_steps_per_second": 0.284,
 
 
 
 
 
 
 
 
 
185
  "step": 38
186
  },
187
  {
188
+ "epoch": 19.8,
189
+ "learning_rate": 2.777777777777778e-05,
190
+ "loss": 0.1657,
 
 
 
 
 
 
 
 
 
191
  "step": 40
192
  },
193
  {
194
+ "epoch": 19.8,
195
+ "eval_accuracy": 0.9714285714285714,
196
+ "eval_loss": 0.19598019123077393,
197
+ "eval_runtime": 7.9601,
198
+ "eval_samples_per_second": 4.397,
199
+ "eval_steps_per_second": 0.251,
200
  "step": 40
201
  },
202
  {
203
+ "epoch": 20.8,
204
+ "eval_accuracy": 0.9714285714285714,
205
+ "eval_loss": 0.19925238192081451,
206
+ "eval_runtime": 7.4131,
207
+ "eval_samples_per_second": 4.721,
208
+ "eval_steps_per_second": 0.27,
209
+ "step": 42
210
+ },
211
+ {
212
+ "epoch": 21.8,
213
+ "eval_accuracy": 0.9714285714285714,
214
+ "eval_loss": 0.20172713696956635,
215
+ "eval_runtime": 6.7891,
216
+ "eval_samples_per_second": 5.155,
217
+ "eval_steps_per_second": 0.295,
218
+ "step": 44
219
+ },
220
+ {
221
+ "epoch": 22.8,
222
+ "eval_accuracy": 0.9714285714285714,
223
+ "eval_loss": 0.2004331350326538,
224
+ "eval_runtime": 6.2296,
225
+ "eval_samples_per_second": 5.618,
226
+ "eval_steps_per_second": 0.321,
227
+ "step": 46
228
+ },
229
+ {
230
+ "epoch": 23.8,
231
+ "eval_accuracy": 0.9714285714285714,
232
+ "eval_loss": 0.19216616451740265,
233
+ "eval_runtime": 7.0857,
234
+ "eval_samples_per_second": 4.94,
235
+ "eval_steps_per_second": 0.282,
236
+ "step": 48
237
+ },
238
+ {
239
+ "epoch": 24.8,
240
+ "eval_accuracy": 0.9714285714285714,
241
+ "eval_loss": 0.18559373915195465,
242
+ "eval_runtime": 7.3612,
243
+ "eval_samples_per_second": 4.755,
244
+ "eval_steps_per_second": 0.272,
245
+ "step": 50
246
+ },
247
+ {
248
+ "epoch": 25.8,
249
+ "eval_accuracy": 0.9714285714285714,
250
+ "eval_loss": 0.18343603610992432,
251
+ "eval_runtime": 6.8088,
252
+ "eval_samples_per_second": 5.14,
253
+ "eval_steps_per_second": 0.294,
254
+ "step": 52
255
+ },
256
+ {
257
+ "epoch": 26.8,
258
+ "eval_accuracy": 0.9714285714285714,
259
+ "eval_loss": 0.18461596965789795,
260
+ "eval_runtime": 6.6873,
261
+ "eval_samples_per_second": 5.234,
262
+ "eval_steps_per_second": 0.299,
263
+ "step": 54
264
+ },
265
+ {
266
+ "epoch": 27.8,
267
+ "eval_accuracy": 0.9714285714285714,
268
+ "eval_loss": 0.18977026641368866,
269
+ "eval_runtime": 6.826,
270
+ "eval_samples_per_second": 5.127,
271
+ "eval_steps_per_second": 0.293,
272
+ "step": 56
273
+ },
274
+ {
275
+ "epoch": 28.8,
276
+ "eval_accuracy": 0.9714285714285714,
277
+ "eval_loss": 0.1951347291469574,
278
+ "eval_runtime": 6.8913,
279
+ "eval_samples_per_second": 5.079,
280
+ "eval_steps_per_second": 0.29,
281
+ "step": 58
282
+ },
283
+ {
284
+ "epoch": 29.8,
285
+ "learning_rate": 1.388888888888889e-05,
286
+ "loss": 0.1308,
287
+ "step": 60
288
+ },
289
+ {
290
+ "epoch": 29.8,
291
+ "eval_accuracy": 0.9714285714285714,
292
+ "eval_loss": 0.20185869932174683,
293
+ "eval_runtime": 6.8601,
294
+ "eval_samples_per_second": 5.102,
295
+ "eval_steps_per_second": 0.292,
296
+ "step": 60
297
+ },
298
+ {
299
+ "epoch": 30.8,
300
+ "eval_accuracy": 0.9714285714285714,
301
+ "eval_loss": 0.2095019370317459,
302
+ "eval_runtime": 7.2055,
303
+ "eval_samples_per_second": 4.857,
304
+ "eval_steps_per_second": 0.278,
305
+ "step": 62
306
+ },
307
+ {
308
+ "epoch": 31.8,
309
+ "eval_accuracy": 0.9714285714285714,
310
+ "eval_loss": 0.21445579826831818,
311
+ "eval_runtime": 7.4924,
312
+ "eval_samples_per_second": 4.671,
313
+ "eval_steps_per_second": 0.267,
314
+ "step": 64
315
+ },
316
+ {
317
+ "epoch": 32.8,
318
+ "eval_accuracy": 0.9714285714285714,
319
+ "eval_loss": 0.21541449427604675,
320
+ "eval_runtime": 6.8812,
321
+ "eval_samples_per_second": 5.086,
322
+ "eval_steps_per_second": 0.291,
323
+ "step": 66
324
+ },
325
+ {
326
+ "epoch": 33.8,
327
+ "eval_accuracy": 0.9714285714285714,
328
+ "eval_loss": 0.21372175216674805,
329
+ "eval_runtime": 6.9147,
330
+ "eval_samples_per_second": 5.062,
331
+ "eval_steps_per_second": 0.289,
332
+ "step": 68
333
+ },
334
+ {
335
+ "epoch": 34.8,
336
+ "eval_accuracy": 0.9714285714285714,
337
+ "eval_loss": 0.2116171419620514,
338
+ "eval_runtime": 7.0628,
339
+ "eval_samples_per_second": 4.956,
340
+ "eval_steps_per_second": 0.283,
341
+ "step": 70
342
+ },
343
+ {
344
+ "epoch": 35.8,
345
+ "eval_accuracy": 0.9714285714285714,
346
+ "eval_loss": 0.20960116386413574,
347
+ "eval_runtime": 7.1202,
348
+ "eval_samples_per_second": 4.916,
349
+ "eval_steps_per_second": 0.281,
350
+ "step": 72
351
+ },
352
+ {
353
+ "epoch": 36.8,
354
+ "eval_accuracy": 0.9714285714285714,
355
+ "eval_loss": 0.20841823518276215,
356
+ "eval_runtime": 7.1749,
357
+ "eval_samples_per_second": 4.878,
358
+ "eval_steps_per_second": 0.279,
359
+ "step": 74
360
+ },
361
+ {
362
+ "epoch": 37.8,
363
+ "eval_accuracy": 0.9714285714285714,
364
+ "eval_loss": 0.20780029892921448,
365
+ "eval_runtime": 6.9083,
366
+ "eval_samples_per_second": 5.066,
367
+ "eval_steps_per_second": 0.29,
368
+ "step": 76
369
+ },
370
+ {
371
+ "epoch": 38.8,
372
+ "eval_accuracy": 0.9714285714285714,
373
+ "eval_loss": 0.2074960172176361,
374
+ "eval_runtime": 6.9495,
375
+ "eval_samples_per_second": 5.036,
376
+ "eval_steps_per_second": 0.288,
377
+ "step": 78
378
+ },
379
+ {
380
+ "epoch": 39.8,
381
+ "learning_rate": 0.0,
382
+ "loss": 0.1053,
383
+ "step": 80
384
+ },
385
+ {
386
+ "epoch": 39.8,
387
+ "eval_accuracy": 0.9714285714285714,
388
+ "eval_loss": 0.20739802718162537,
389
+ "eval_runtime": 6.9547,
390
+ "eval_samples_per_second": 5.033,
391
+ "eval_steps_per_second": 0.288,
392
+ "step": 80
393
+ },
394
+ {
395
+ "epoch": 39.8,
396
+ "step": 80,
397
+ "total_flos": 3.088453228308726e+17,
398
+ "train_loss": 0.15159874260425568,
399
+ "train_runtime": 6474.8036,
400
+ "train_samples_per_second": 1.927,
401
+ "train_steps_per_second": 0.012
402
  }
403
  ],
404
+ "max_steps": 80,
405
  "num_train_epochs": 40,
406
+ "total_flos": 3.088453228308726e+17,
407
  "trial_name": null,
408
  "trial_params": null
409
  }