JEdward7777 commited on
Commit
147e9e5
·
1 Parent(s): eab1611

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 39.67,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.5916178226470947,
5
- "eval_runtime": 1.6168,
6
- "eval_samples_per_second": 13.607,
7
- "eval_steps_per_second": 0.619,
8
- "total_flos": 1.893243704668324e+17,
9
- "train_loss": 1.1570292234420776,
10
- "train_runtime": 934.4965,
11
- "train_samples_per_second": 8.218,
12
- "train_steps_per_second": 0.043
13
  }
 
1
  {
2
+ "epoch": 39.57,
3
+ "eval_accuracy": 0.9090909090909091,
4
+ "eval_loss": 0.46136438846588135,
5
+ "eval_runtime": 1.7026,
6
+ "eval_samples_per_second": 12.921,
7
+ "eval_steps_per_second": 0.587,
8
+ "total_flos": 1.9417183005730406e+17,
9
+ "train_loss": 1.3339122772216796,
10
+ "train_runtime": 965.4815,
11
+ "train_samples_per_second": 8.162,
12
+ "train_steps_per_second": 0.041
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.67,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.5916178226470947,
5
- "eval_runtime": 1.6168,
6
- "eval_samples_per_second": 13.607,
7
- "eval_steps_per_second": 0.619
8
  }
 
1
  {
2
+ "epoch": 39.57,
3
+ "eval_accuracy": 0.9090909090909091,
4
+ "eval_loss": 0.46136438846588135,
5
+ "eval_runtime": 1.7026,
6
+ "eval_samples_per_second": 12.921,
7
+ "eval_steps_per_second": 0.587
8
  }
runs/Sep08_11-09-55_9916cc61b3ca/events.out.tfevents.1662636601.9916cc61b3ca.70.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aedf680300539d244a53244ff8c543ab2aea5a73d309d5630c6f8a7e2ee7874
3
+ size 357
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.67,
3
- "total_flos": 1.893243704668324e+17,
4
- "train_loss": 1.1570292234420776,
5
- "train_runtime": 934.4965,
6
- "train_samples_per_second": 8.218,
7
- "train_steps_per_second": 0.043
8
  }
 
1
  {
2
+ "epoch": 39.57,
3
+ "total_flos": 1.9417183005730406e+17,
4
+ "train_loss": 1.3339122772216796,
5
+ "train_runtime": 965.4815,
6
+ "train_samples_per_second": 8.162,
7
+ "train_steps_per_second": 0.041
8
  }
trainer_state.json CHANGED
@@ -1,397 +1,397 @@
1
  {
2
- "best_metric": 1.0,
3
- "best_model_checkpoint": "delivery_truck_classification/checkpoint-13",
4
- "epoch": 39.666666666666664,
5
  "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.67,
12
- "eval_accuracy": 0.18181818181818182,
13
- "eval_loss": 1.868780255317688,
14
- "eval_runtime": 1.5131,
15
- "eval_samples_per_second": 14.54,
16
- "eval_steps_per_second": 0.661,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 1.67,
21
- "eval_accuracy": 0.18181818181818182,
22
- "eval_loss": 1.7919577360153198,
23
- "eval_runtime": 1.7289,
24
- "eval_samples_per_second": 12.725,
25
- "eval_steps_per_second": 0.578,
26
  "step": 2
27
  },
28
  {
29
- "epoch": 2.67,
30
- "eval_accuracy": 0.36363636363636365,
31
- "eval_loss": 1.6533408164978027,
32
- "eval_runtime": 1.6458,
33
- "eval_samples_per_second": 13.368,
34
- "eval_steps_per_second": 0.608,
35
  "step": 3
36
  },
37
  {
38
- "epoch": 3.67,
39
- "eval_accuracy": 0.45454545454545453,
40
- "eval_loss": 1.4775406122207642,
41
- "eval_runtime": 1.6663,
42
- "eval_samples_per_second": 13.203,
43
- "eval_steps_per_second": 0.6,
44
  "step": 4
45
  },
46
  {
47
- "epoch": 4.67,
48
- "eval_accuracy": 0.5909090909090909,
49
- "eval_loss": 1.2912439107894897,
50
- "eval_runtime": 1.6991,
51
- "eval_samples_per_second": 12.948,
52
- "eval_steps_per_second": 0.589,
53
  "step": 5
54
  },
55
  {
56
- "epoch": 5.67,
57
- "eval_accuracy": 0.7272727272727273,
58
- "eval_loss": 1.147495150566101,
59
- "eval_runtime": 1.713,
60
- "eval_samples_per_second": 12.843,
61
- "eval_steps_per_second": 0.584,
62
  "step": 6
63
  },
64
  {
65
- "epoch": 6.67,
66
- "eval_accuracy": 0.7727272727272727,
67
- "eval_loss": 1.0265945196151733,
68
- "eval_runtime": 1.5543,
69
- "eval_samples_per_second": 14.154,
70
- "eval_steps_per_second": 0.643,
71
  "step": 7
72
  },
73
  {
74
- "epoch": 7.67,
75
- "eval_accuracy": 0.7727272727272727,
76
- "eval_loss": 0.9196190237998962,
77
- "eval_runtime": 1.677,
78
- "eval_samples_per_second": 13.119,
79
- "eval_steps_per_second": 0.596,
80
  "step": 8
81
  },
82
  {
83
- "epoch": 8.67,
84
- "eval_accuracy": 0.8181818181818182,
85
- "eval_loss": 0.8272687792778015,
86
- "eval_runtime": 1.7098,
87
- "eval_samples_per_second": 12.867,
88
- "eval_steps_per_second": 0.585,
89
  "step": 9
90
  },
91
  {
92
- "epoch": 9.67,
93
- "eval_accuracy": 0.8181818181818182,
94
- "eval_loss": 0.7491626739501953,
95
- "eval_runtime": 1.6687,
96
- "eval_samples_per_second": 13.184,
97
- "eval_steps_per_second": 0.599,
98
  "step": 10
99
  },
100
  {
101
- "epoch": 10.67,
102
- "eval_accuracy": 0.9090909090909091,
103
- "eval_loss": 0.6857182383537292,
104
- "eval_runtime": 1.6621,
105
- "eval_samples_per_second": 13.236,
106
- "eval_steps_per_second": 0.602,
107
  "step": 11
108
  },
109
  {
110
- "epoch": 11.67,
111
- "eval_accuracy": 0.9090909090909091,
112
- "eval_loss": 0.6368551850318909,
113
- "eval_runtime": 1.555,
114
- "eval_samples_per_second": 14.148,
115
- "eval_steps_per_second": 0.643,
116
  "step": 12
117
  },
118
  {
119
- "epoch": 12.67,
120
- "eval_accuracy": 1.0,
121
- "eval_loss": 0.5916178226470947,
122
- "eval_runtime": 1.5463,
123
- "eval_samples_per_second": 14.227,
124
- "eval_steps_per_second": 0.647,
125
  "step": 13
126
  },
127
  {
128
- "epoch": 13.67,
129
- "eval_accuracy": 1.0,
130
- "eval_loss": 0.5461986660957336,
131
- "eval_runtime": 1.751,
132
- "eval_samples_per_second": 12.564,
133
- "eval_steps_per_second": 0.571,
134
  "step": 14
135
  },
136
  {
137
- "epoch": 14.67,
138
- "eval_accuracy": 1.0,
139
- "eval_loss": 0.4926997423171997,
140
- "eval_runtime": 1.6672,
141
- "eval_samples_per_second": 13.196,
142
- "eval_steps_per_second": 0.6,
143
  "step": 15
144
  },
145
  {
146
- "epoch": 15.67,
147
- "eval_accuracy": 1.0,
148
- "eval_loss": 0.4390135705471039,
149
- "eval_runtime": 1.6819,
150
- "eval_samples_per_second": 13.08,
151
- "eval_steps_per_second": 0.595,
152
  "step": 16
153
  },
154
  {
155
- "epoch": 16.67,
156
- "eval_accuracy": 1.0,
157
- "eval_loss": 0.3913687765598297,
158
- "eval_runtime": 1.7338,
159
- "eval_samples_per_second": 12.689,
160
- "eval_steps_per_second": 0.577,
161
  "step": 17
162
  },
163
  {
164
- "epoch": 17.67,
165
- "eval_accuracy": 1.0,
166
- "eval_loss": 0.3446086645126343,
167
- "eval_runtime": 1.6759,
168
- "eval_samples_per_second": 13.127,
169
- "eval_steps_per_second": 0.597,
170
  "step": 18
171
  },
172
  {
173
- "epoch": 18.67,
174
- "eval_accuracy": 1.0,
175
- "eval_loss": 0.30193081498146057,
176
- "eval_runtime": 1.53,
177
- "eval_samples_per_second": 14.379,
178
- "eval_steps_per_second": 0.654,
179
  "step": 19
180
  },
181
  {
182
- "epoch": 19.67,
183
  "learning_rate": 2.777777777777778e-05,
184
- "loss": 1.7058,
185
  "step": 20
186
  },
187
  {
188
- "epoch": 19.67,
189
- "eval_accuracy": 1.0,
190
- "eval_loss": 0.26113784313201904,
191
- "eval_runtime": 1.6899,
192
- "eval_samples_per_second": 13.018,
193
- "eval_steps_per_second": 0.592,
194
  "step": 20
195
  },
196
  {
197
- "epoch": 20.67,
198
- "eval_accuracy": 1.0,
199
- "eval_loss": 0.22893615067005157,
200
- "eval_runtime": 1.6979,
201
- "eval_samples_per_second": 12.957,
202
- "eval_steps_per_second": 0.589,
203
  "step": 21
204
  },
205
  {
206
- "epoch": 21.67,
207
- "eval_accuracy": 1.0,
208
- "eval_loss": 0.19601884484291077,
209
- "eval_runtime": 1.7011,
210
- "eval_samples_per_second": 12.933,
211
- "eval_steps_per_second": 0.588,
212
  "step": 22
213
  },
214
  {
215
- "epoch": 22.67,
216
- "eval_accuracy": 1.0,
217
- "eval_loss": 0.17112015187740326,
218
- "eval_runtime": 1.7223,
219
- "eval_samples_per_second": 12.774,
220
- "eval_steps_per_second": 0.581,
221
  "step": 23
222
  },
223
  {
224
- "epoch": 23.67,
225
- "eval_accuracy": 1.0,
226
- "eval_loss": 0.1567678302526474,
227
- "eval_runtime": 1.7218,
228
- "eval_samples_per_second": 12.778,
229
- "eval_steps_per_second": 0.581,
230
  "step": 24
231
  },
232
  {
233
- "epoch": 24.67,
234
- "eval_accuracy": 1.0,
235
- "eval_loss": 0.14628903567790985,
236
- "eval_runtime": 1.5212,
237
- "eval_samples_per_second": 14.462,
238
- "eval_steps_per_second": 0.657,
239
  "step": 25
240
  },
241
  {
242
- "epoch": 25.67,
243
- "eval_accuracy": 1.0,
244
- "eval_loss": 0.13830214738845825,
245
- "eval_runtime": 1.7359,
246
- "eval_samples_per_second": 12.673,
247
- "eval_steps_per_second": 0.576,
248
  "step": 26
249
  },
250
  {
251
- "epoch": 26.67,
252
- "eval_accuracy": 1.0,
253
- "eval_loss": 0.13232018053531647,
254
- "eval_runtime": 1.6428,
255
- "eval_samples_per_second": 13.392,
256
- "eval_steps_per_second": 0.609,
257
  "step": 27
258
  },
259
  {
260
- "epoch": 27.67,
261
- "eval_accuracy": 1.0,
262
- "eval_loss": 0.12681324779987335,
263
- "eval_runtime": 2.2983,
264
- "eval_samples_per_second": 9.572,
265
- "eval_steps_per_second": 0.435,
266
  "step": 28
267
  },
268
  {
269
- "epoch": 28.67,
270
- "eval_accuracy": 1.0,
271
- "eval_loss": 0.1198858991265297,
272
- "eval_runtime": 1.6718,
273
- "eval_samples_per_second": 13.16,
274
- "eval_steps_per_second": 0.598,
275
  "step": 29
276
  },
277
  {
278
- "epoch": 29.67,
279
- "eval_accuracy": 1.0,
280
- "eval_loss": 0.11446233093738556,
281
- "eval_runtime": 1.6684,
282
- "eval_samples_per_second": 13.186,
283
- "eval_steps_per_second": 0.599,
284
  "step": 30
285
  },
286
  {
287
- "epoch": 30.67,
288
- "eval_accuracy": 1.0,
289
- "eval_loss": 0.11288688331842422,
290
- "eval_runtime": 1.5105,
291
- "eval_samples_per_second": 14.565,
292
- "eval_steps_per_second": 0.662,
293
  "step": 31
294
  },
295
  {
296
- "epoch": 31.67,
297
- "eval_accuracy": 1.0,
298
- "eval_loss": 0.10953269153833389,
299
- "eval_runtime": 1.7039,
300
- "eval_samples_per_second": 12.911,
301
- "eval_steps_per_second": 0.587,
302
  "step": 32
303
  },
304
  {
305
- "epoch": 32.67,
306
- "eval_accuracy": 1.0,
307
- "eval_loss": 0.10787732154130936,
308
- "eval_runtime": 1.7185,
309
- "eval_samples_per_second": 12.802,
310
- "eval_steps_per_second": 0.582,
311
  "step": 33
312
  },
313
  {
314
- "epoch": 33.67,
315
- "eval_accuracy": 1.0,
316
- "eval_loss": 0.10531877726316452,
317
- "eval_runtime": 1.8305,
318
- "eval_samples_per_second": 12.019,
319
- "eval_steps_per_second": 0.546,
320
  "step": 34
321
  },
322
  {
323
- "epoch": 34.67,
324
- "eval_accuracy": 1.0,
325
- "eval_loss": 0.10338964313268661,
326
- "eval_runtime": 1.6703,
327
- "eval_samples_per_second": 13.171,
328
- "eval_steps_per_second": 0.599,
329
  "step": 35
330
  },
331
  {
332
- "epoch": 35.67,
333
- "eval_accuracy": 1.0,
334
- "eval_loss": 0.09904544055461884,
335
- "eval_runtime": 1.5401,
336
- "eval_samples_per_second": 14.285,
337
- "eval_steps_per_second": 0.649,
338
  "step": 36
339
  },
340
  {
341
- "epoch": 36.67,
342
- "eval_accuracy": 1.0,
343
- "eval_loss": 0.09628929942846298,
344
- "eval_runtime": 1.5486,
345
- "eval_samples_per_second": 14.206,
346
- "eval_steps_per_second": 0.646,
347
  "step": 37
348
  },
349
  {
350
- "epoch": 37.67,
351
- "eval_accuracy": 1.0,
352
- "eval_loss": 0.09520366042852402,
353
- "eval_runtime": 1.6769,
354
- "eval_samples_per_second": 13.12,
355
- "eval_steps_per_second": 0.596,
356
  "step": 38
357
  },
358
  {
359
- "epoch": 38.67,
360
- "eval_accuracy": 1.0,
361
- "eval_loss": 0.09436272829771042,
362
- "eval_runtime": 1.7152,
363
- "eval_samples_per_second": 12.826,
364
- "eval_steps_per_second": 0.583,
365
  "step": 39
366
  },
367
  {
368
- "epoch": 39.67,
369
  "learning_rate": 0.0,
370
- "loss": 0.6083,
371
  "step": 40
372
  },
373
  {
374
- "epoch": 39.67,
375
- "eval_accuracy": 1.0,
376
- "eval_loss": 0.0941963642835617,
377
- "eval_runtime": 1.7089,
378
- "eval_samples_per_second": 12.874,
379
- "eval_steps_per_second": 0.585,
380
  "step": 40
381
  },
382
  {
383
- "epoch": 39.67,
384
  "step": 40,
385
- "total_flos": 1.893243704668324e+17,
386
- "train_loss": 1.1570292234420776,
387
- "train_runtime": 934.4965,
388
- "train_samples_per_second": 8.218,
389
- "train_steps_per_second": 0.043
390
  }
391
  ],
392
  "max_steps": 40,
393
  "num_train_epochs": 40,
394
- "total_flos": 1.893243704668324e+17,
395
  "trial_name": null,
396
  "trial_params": null
397
  }
 
1
  {
2
+ "best_metric": 0.9090909090909091,
3
+ "best_model_checkpoint": "delivery_truck_classification/checkpoint-21",
4
+ "epoch": 39.57142857142857,
5
  "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.57,
12
+ "eval_accuracy": 0.13636363636363635,
13
+ "eval_loss": 1.9638466835021973,
14
+ "eval_runtime": 1.6283,
15
+ "eval_samples_per_second": 13.511,
16
+ "eval_steps_per_second": 0.614,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 1.57,
21
+ "eval_accuracy": 0.09090909090909091,
22
+ "eval_loss": 1.9021785259246826,
23
+ "eval_runtime": 1.7599,
24
+ "eval_samples_per_second": 12.501,
25
+ "eval_steps_per_second": 0.568,
26
  "step": 2
27
  },
28
  {
29
+ "epoch": 2.57,
30
+ "eval_accuracy": 0.09090909090909091,
31
+ "eval_loss": 1.7954376935958862,
32
+ "eval_runtime": 1.7675,
33
+ "eval_samples_per_second": 12.447,
34
+ "eval_steps_per_second": 0.566,
35
  "step": 3
36
  },
37
  {
38
+ "epoch": 3.57,
39
+ "eval_accuracy": 0.36363636363636365,
40
+ "eval_loss": 1.646571397781372,
41
+ "eval_runtime": 1.8048,
42
+ "eval_samples_per_second": 12.19,
43
+ "eval_steps_per_second": 0.554,
44
  "step": 4
45
  },
46
  {
47
+ "epoch": 4.57,
48
+ "eval_accuracy": 0.5,
49
+ "eval_loss": 1.516127347946167,
50
+ "eval_runtime": 1.8376,
51
+ "eval_samples_per_second": 11.972,
52
+ "eval_steps_per_second": 0.544,
53
  "step": 5
54
  },
55
  {
56
+ "epoch": 5.57,
57
+ "eval_accuracy": 0.5454545454545454,
58
+ "eval_loss": 1.4261207580566406,
59
+ "eval_runtime": 1.6096,
60
+ "eval_samples_per_second": 13.668,
61
+ "eval_steps_per_second": 0.621,
62
  "step": 6
63
  },
64
  {
65
+ "epoch": 6.57,
66
+ "eval_accuracy": 0.5454545454545454,
67
+ "eval_loss": 1.3546966314315796,
68
+ "eval_runtime": 1.568,
69
+ "eval_samples_per_second": 14.03,
70
+ "eval_steps_per_second": 0.638,
71
  "step": 7
72
  },
73
  {
74
+ "epoch": 7.57,
75
+ "eval_accuracy": 0.6363636363636364,
76
+ "eval_loss": 1.2797943353652954,
77
+ "eval_runtime": 1.8043,
78
+ "eval_samples_per_second": 12.193,
79
+ "eval_steps_per_second": 0.554,
80
  "step": 8
81
  },
82
  {
83
+ "epoch": 8.57,
84
+ "eval_accuracy": 0.6363636363636364,
85
+ "eval_loss": 1.2200274467468262,
86
+ "eval_runtime": 1.811,
87
+ "eval_samples_per_second": 12.148,
88
+ "eval_steps_per_second": 0.552,
89
  "step": 9
90
  },
91
  {
92
+ "epoch": 9.57,
93
+ "eval_accuracy": 0.6363636363636364,
94
+ "eval_loss": 1.1594452857971191,
95
+ "eval_runtime": 1.803,
96
+ "eval_samples_per_second": 12.202,
97
+ "eval_steps_per_second": 0.555,
98
  "step": 10
99
  },
100
  {
101
+ "epoch": 10.57,
102
+ "eval_accuracy": 0.6818181818181818,
103
+ "eval_loss": 1.1154121160507202,
104
+ "eval_runtime": 1.7747,
105
+ "eval_samples_per_second": 12.397,
106
+ "eval_steps_per_second": 0.563,
107
  "step": 11
108
  },
109
  {
110
+ "epoch": 11.57,
111
+ "eval_accuracy": 0.6818181818181818,
112
+ "eval_loss": 1.07809579372406,
113
+ "eval_runtime": 1.6416,
114
+ "eval_samples_per_second": 13.401,
115
+ "eval_steps_per_second": 0.609,
116
  "step": 12
117
  },
118
  {
119
+ "epoch": 12.57,
120
+ "eval_accuracy": 0.6818181818181818,
121
+ "eval_loss": 1.0285967588424683,
122
+ "eval_runtime": 1.7863,
123
+ "eval_samples_per_second": 12.316,
124
+ "eval_steps_per_second": 0.56,
125
  "step": 13
126
  },
127
  {
128
+ "epoch": 13.57,
129
+ "eval_accuracy": 0.6818181818181818,
130
+ "eval_loss": 0.9623335003852844,
131
+ "eval_runtime": 1.8244,
132
+ "eval_samples_per_second": 12.059,
133
+ "eval_steps_per_second": 0.548,
134
  "step": 14
135
  },
136
  {
137
+ "epoch": 14.57,
138
+ "eval_accuracy": 0.6818181818181818,
139
+ "eval_loss": 0.8952316641807556,
140
+ "eval_runtime": 1.8255,
141
+ "eval_samples_per_second": 12.052,
142
+ "eval_steps_per_second": 0.548,
143
  "step": 15
144
  },
145
  {
146
+ "epoch": 15.57,
147
+ "eval_accuracy": 0.7272727272727273,
148
+ "eval_loss": 0.8217518329620361,
149
+ "eval_runtime": 1.8059,
150
+ "eval_samples_per_second": 12.182,
151
+ "eval_steps_per_second": 0.554,
152
  "step": 16
153
  },
154
  {
155
+ "epoch": 16.57,
156
+ "eval_accuracy": 0.7727272727272727,
157
+ "eval_loss": 0.7331055998802185,
158
+ "eval_runtime": 1.6554,
159
+ "eval_samples_per_second": 13.29,
160
+ "eval_steps_per_second": 0.604,
161
  "step": 17
162
  },
163
  {
164
+ "epoch": 17.57,
165
+ "eval_accuracy": 0.8181818181818182,
166
+ "eval_loss": 0.6525326371192932,
167
+ "eval_runtime": 1.8033,
168
+ "eval_samples_per_second": 12.2,
169
+ "eval_steps_per_second": 0.555,
170
  "step": 18
171
  },
172
  {
173
+ "epoch": 18.57,
174
+ "eval_accuracy": 0.8636363636363636,
175
+ "eval_loss": 0.5678051710128784,
176
+ "eval_runtime": 1.7903,
177
+ "eval_samples_per_second": 12.288,
178
+ "eval_steps_per_second": 0.559,
179
  "step": 19
180
  },
181
  {
182
+ "epoch": 19.57,
183
  "learning_rate": 2.777777777777778e-05,
184
+ "loss": 1.9399,
185
  "step": 20
186
  },
187
  {
188
+ "epoch": 19.57,
189
+ "eval_accuracy": 0.8636363636363636,
190
+ "eval_loss": 0.497961163520813,
191
+ "eval_runtime": 1.8331,
192
+ "eval_samples_per_second": 12.002,
193
+ "eval_steps_per_second": 0.546,
194
  "step": 20
195
  },
196
  {
197
+ "epoch": 20.57,
198
+ "eval_accuracy": 0.9090909090909091,
199
+ "eval_loss": 0.46136438846588135,
200
+ "eval_runtime": 1.8006,
201
+ "eval_samples_per_second": 12.218,
202
+ "eval_steps_per_second": 0.555,
203
  "step": 21
204
  },
205
  {
206
+ "epoch": 21.57,
207
+ "eval_accuracy": 0.9090909090909091,
208
+ "eval_loss": 0.4493587613105774,
209
+ "eval_runtime": 1.6389,
210
+ "eval_samples_per_second": 13.424,
211
+ "eval_steps_per_second": 0.61,
212
  "step": 22
213
  },
214
  {
215
+ "epoch": 22.57,
216
+ "eval_accuracy": 0.8181818181818182,
217
+ "eval_loss": 0.4404635727405548,
218
+ "eval_runtime": 1.8581,
219
+ "eval_samples_per_second": 11.84,
220
+ "eval_steps_per_second": 0.538,
221
  "step": 23
222
  },
223
  {
224
+ "epoch": 23.57,
225
+ "eval_accuracy": 0.8636363636363636,
226
+ "eval_loss": 0.4357987344264984,
227
+ "eval_runtime": 1.7864,
228
+ "eval_samples_per_second": 12.315,
229
+ "eval_steps_per_second": 0.56,
230
  "step": 24
231
  },
232
  {
233
+ "epoch": 24.57,
234
+ "eval_accuracy": 0.8636363636363636,
235
+ "eval_loss": 0.43069612979888916,
236
+ "eval_runtime": 1.804,
237
+ "eval_samples_per_second": 12.195,
238
+ "eval_steps_per_second": 0.554,
239
  "step": 25
240
  },
241
  {
242
+ "epoch": 25.57,
243
+ "eval_accuracy": 0.8636363636363636,
244
+ "eval_loss": 0.4099968671798706,
245
+ "eval_runtime": 1.7489,
246
+ "eval_samples_per_second": 12.579,
247
+ "eval_steps_per_second": 0.572,
248
  "step": 26
249
  },
250
  {
251
+ "epoch": 26.57,
252
+ "eval_accuracy": 0.8181818181818182,
253
+ "eval_loss": 0.3925555944442749,
254
+ "eval_runtime": 1.6257,
255
+ "eval_samples_per_second": 13.533,
256
+ "eval_steps_per_second": 0.615,
257
  "step": 27
258
  },
259
  {
260
+ "epoch": 27.57,
261
+ "eval_accuracy": 0.8181818181818182,
262
+ "eval_loss": 0.3818438649177551,
263
+ "eval_runtime": 1.8073,
264
+ "eval_samples_per_second": 12.173,
265
+ "eval_steps_per_second": 0.553,
266
  "step": 28
267
  },
268
  {
269
+ "epoch": 28.57,
270
+ "eval_accuracy": 0.8181818181818182,
271
+ "eval_loss": 0.366110235452652,
272
+ "eval_runtime": 1.7879,
273
+ "eval_samples_per_second": 12.305,
274
+ "eval_steps_per_second": 0.559,
275
  "step": 29
276
  },
277
  {
278
+ "epoch": 29.57,
279
+ "eval_accuracy": 0.8636363636363636,
280
+ "eval_loss": 0.35147225856781006,
281
+ "eval_runtime": 1.8109,
282
+ "eval_samples_per_second": 12.149,
283
+ "eval_steps_per_second": 0.552,
284
  "step": 30
285
  },
286
  {
287
+ "epoch": 30.57,
288
+ "eval_accuracy": 0.8636363636363636,
289
+ "eval_loss": 0.3345157206058502,
290
+ "eval_runtime": 1.8479,
291
+ "eval_samples_per_second": 11.905,
292
+ "eval_steps_per_second": 0.541,
293
  "step": 31
294
  },
295
  {
296
+ "epoch": 31.57,
297
+ "eval_accuracy": 0.8636363636363636,
298
+ "eval_loss": 0.32043513655662537,
299
+ "eval_runtime": 1.599,
300
+ "eval_samples_per_second": 13.759,
301
+ "eval_steps_per_second": 0.625,
302
  "step": 32
303
  },
304
  {
305
+ "epoch": 32.57,
306
+ "eval_accuracy": 0.8636363636363636,
307
+ "eval_loss": 0.3077632486820221,
308
+ "eval_runtime": 1.8226,
309
+ "eval_samples_per_second": 12.071,
310
+ "eval_steps_per_second": 0.549,
311
  "step": 33
312
  },
313
  {
314
+ "epoch": 33.57,
315
+ "eval_accuracy": 0.8636363636363636,
316
+ "eval_loss": 0.2948474586009979,
317
+ "eval_runtime": 1.7885,
318
+ "eval_samples_per_second": 12.301,
319
+ "eval_steps_per_second": 0.559,
320
  "step": 34
321
  },
322
  {
323
+ "epoch": 34.57,
324
+ "eval_accuracy": 0.8636363636363636,
325
+ "eval_loss": 0.2848185896873474,
326
+ "eval_runtime": 1.8362,
327
+ "eval_samples_per_second": 11.981,
328
+ "eval_steps_per_second": 0.545,
329
  "step": 35
330
  },
331
  {
332
+ "epoch": 35.57,
333
+ "eval_accuracy": 0.8636363636363636,
334
+ "eval_loss": 0.2748388946056366,
335
+ "eval_runtime": 1.8319,
336
+ "eval_samples_per_second": 12.009,
337
+ "eval_steps_per_second": 0.546,
338
  "step": 36
339
  },
340
  {
341
+ "epoch": 36.57,
342
+ "eval_accuracy": 0.8636363636363636,
343
+ "eval_loss": 0.2679346203804016,
344
+ "eval_runtime": 1.6038,
345
+ "eval_samples_per_second": 13.718,
346
+ "eval_steps_per_second": 0.624,
347
  "step": 37
348
  },
349
  {
350
+ "epoch": 37.57,
351
+ "eval_accuracy": 0.8636363636363636,
352
+ "eval_loss": 0.26424679160118103,
353
+ "eval_runtime": 1.8361,
354
+ "eval_samples_per_second": 11.982,
355
+ "eval_steps_per_second": 0.545,
356
  "step": 38
357
  },
358
  {
359
+ "epoch": 38.57,
360
+ "eval_accuracy": 0.8636363636363636,
361
+ "eval_loss": 0.2638870179653168,
362
+ "eval_runtime": 1.7769,
363
+ "eval_samples_per_second": 12.381,
364
+ "eval_steps_per_second": 0.563,
365
  "step": 39
366
  },
367
  {
368
+ "epoch": 39.57,
369
  "learning_rate": 0.0,
370
+ "loss": 0.728,
371
  "step": 40
372
  },
373
  {
374
+ "epoch": 39.57,
375
+ "eval_accuracy": 0.8636363636363636,
376
+ "eval_loss": 0.26363667845726013,
377
+ "eval_runtime": 1.7373,
378
+ "eval_samples_per_second": 12.663,
379
+ "eval_steps_per_second": 0.576,
380
  "step": 40
381
  },
382
  {
383
+ "epoch": 39.57,
384
  "step": 40,
385
+ "total_flos": 1.9417183005730406e+17,
386
+ "train_loss": 1.3339122772216796,
387
+ "train_runtime": 965.4815,
388
+ "train_samples_per_second": 8.162,
389
+ "train_steps_per_second": 0.041
390
  }
391
  ],
392
  "max_steps": 40,
393
  "num_train_epochs": 40,
394
+ "total_flos": 1.9417183005730406e+17,
395
  "trial_name": null,
396
  "trial_params": null
397
  }