JEdward7777 commited on
Commit
531ee30
·
1 Parent(s): 55de35f

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 39.8,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.17870470881462097,
5
- "eval_runtime": 1.3876,
6
- "eval_samples_per_second": 12.251,
7
- "eval_steps_per_second": 0.721,
8
- "total_flos": 1.4763621798877594e+17,
9
- "train_loss": 0.872883677482605,
10
- "train_runtime": 746.1275,
11
- "train_samples_per_second": 7.988,
12
- "train_steps_per_second": 0.054
13
  }
 
1
  {
2
+ "epoch": 39.67,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.22120876610279083,
5
+ "eval_runtime": 1.4551,
6
+ "eval_samples_per_second": 13.745,
7
+ "eval_steps_per_second": 0.687,
8
+ "total_flos": 1.7187351594113434e+17,
9
+ "train_loss": 0.9913474082946777,
10
+ "train_runtime": 823.5099,
11
+ "train_samples_per_second": 8.452,
12
+ "train_steps_per_second": 0.049
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.8,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.17870470881462097,
5
- "eval_runtime": 1.3876,
6
- "eval_samples_per_second": 12.251,
7
- "eval_steps_per_second": 0.721
8
  }
 
1
  {
2
+ "epoch": 39.67,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.22120876610279083,
5
+ "eval_runtime": 1.4551,
6
+ "eval_samples_per_second": 13.745,
7
+ "eval_steps_per_second": 0.687
8
  }
runs/Sep01_20-10-24_f90d7f0f1d18/events.out.tfevents.1662064096.f90d7f0f1d18.70.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfb1ab8d357b7cb72d4238fac7c1e00f9c9522328aff3f9577791d66bd1803ee
3
+ size 357
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.8,
3
- "total_flos": 1.4763621798877594e+17,
4
- "train_loss": 0.872883677482605,
5
- "train_runtime": 746.1275,
6
- "train_samples_per_second": 7.988,
7
- "train_steps_per_second": 0.054
8
  }
 
1
  {
2
+ "epoch": 39.67,
3
+ "total_flos": 1.7187351594113434e+17,
4
+ "train_loss": 0.9913474082946777,
5
+ "train_runtime": 823.5099,
6
+ "train_samples_per_second": 8.452,
7
+ "train_steps_per_second": 0.049
8
  }
trainer_state.json CHANGED
@@ -1,397 +1,397 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "delivery_truck_classification/checkpoint-30",
4
- "epoch": 39.8,
5
  "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.8,
12
- "eval_accuracy": 0.058823529411764705,
13
- "eval_loss": 2.079441785812378,
14
- "eval_runtime": 1.4317,
15
- "eval_samples_per_second": 11.874,
16
- "eval_steps_per_second": 0.698,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 1.8,
21
- "eval_accuracy": 0.11764705882352941,
22
- "eval_loss": 2.0046820640563965,
23
- "eval_runtime": 1.4647,
24
- "eval_samples_per_second": 11.607,
25
- "eval_steps_per_second": 0.683,
26
  "step": 2
27
  },
28
  {
29
- "epoch": 2.8,
30
- "eval_accuracy": 0.17647058823529413,
31
- "eval_loss": 1.866559624671936,
32
- "eval_runtime": 1.4101,
33
- "eval_samples_per_second": 12.056,
34
- "eval_steps_per_second": 0.709,
35
  "step": 3
36
  },
37
  {
38
- "epoch": 3.8,
39
- "eval_accuracy": 0.23529411764705882,
40
- "eval_loss": 1.6799691915512085,
41
- "eval_runtime": 1.3922,
42
- "eval_samples_per_second": 12.211,
43
- "eval_steps_per_second": 0.718,
44
  "step": 4
45
  },
46
  {
47
- "epoch": 4.8,
48
- "eval_accuracy": 0.35294117647058826,
49
- "eval_loss": 1.4622201919555664,
50
- "eval_runtime": 1.3944,
51
- "eval_samples_per_second": 12.191,
52
- "eval_steps_per_second": 0.717,
53
  "step": 5
54
  },
55
  {
56
- "epoch": 5.8,
57
- "eval_accuracy": 0.5882352941176471,
58
- "eval_loss": 1.2880315780639648,
59
- "eval_runtime": 1.4241,
60
- "eval_samples_per_second": 11.937,
61
- "eval_steps_per_second": 0.702,
62
  "step": 6
63
  },
64
  {
65
- "epoch": 6.8,
66
- "eval_accuracy": 0.8823529411764706,
67
- "eval_loss": 1.1315902471542358,
68
- "eval_runtime": 1.3664,
69
- "eval_samples_per_second": 12.442,
70
- "eval_steps_per_second": 0.732,
71
  "step": 7
72
  },
73
  {
74
- "epoch": 7.8,
75
- "eval_accuracy": 0.8823529411764706,
76
- "eval_loss": 0.9924841523170471,
77
- "eval_runtime": 1.3155,
78
- "eval_samples_per_second": 12.923,
79
- "eval_steps_per_second": 0.76,
80
  "step": 8
81
  },
82
  {
83
- "epoch": 8.8,
84
- "eval_accuracy": 0.8823529411764706,
85
- "eval_loss": 0.8822251558303833,
86
- "eval_runtime": 1.399,
87
- "eval_samples_per_second": 12.152,
88
- "eval_steps_per_second": 0.715,
89
  "step": 9
90
  },
91
  {
92
- "epoch": 9.8,
93
- "eval_accuracy": 0.8823529411764706,
94
- "eval_loss": 0.7928468585014343,
95
- "eval_runtime": 1.4175,
96
- "eval_samples_per_second": 11.993,
97
- "eval_steps_per_second": 0.705,
98
  "step": 10
99
  },
100
  {
101
- "epoch": 10.8,
102
- "eval_accuracy": 0.8823529411764706,
103
- "eval_loss": 0.7265812754631042,
104
- "eval_runtime": 1.4201,
105
- "eval_samples_per_second": 11.971,
106
- "eval_steps_per_second": 0.704,
107
  "step": 11
108
  },
109
  {
110
- "epoch": 11.8,
111
- "eval_accuracy": 0.8823529411764706,
112
- "eval_loss": 0.6715043783187866,
113
- "eval_runtime": 1.4255,
114
- "eval_samples_per_second": 11.925,
115
- "eval_steps_per_second": 0.701,
116
  "step": 12
117
  },
118
  {
119
- "epoch": 12.8,
120
- "eval_accuracy": 0.8823529411764706,
121
- "eval_loss": 0.6238242387771606,
122
- "eval_runtime": 1.4187,
123
- "eval_samples_per_second": 11.983,
124
- "eval_steps_per_second": 0.705,
125
  "step": 13
126
  },
127
  {
128
- "epoch": 13.8,
129
- "eval_accuracy": 0.8823529411764706,
130
- "eval_loss": 0.5792789459228516,
131
- "eval_runtime": 1.4156,
132
- "eval_samples_per_second": 12.009,
133
- "eval_steps_per_second": 0.706,
134
  "step": 14
135
  },
136
  {
137
- "epoch": 14.8,
138
- "eval_accuracy": 0.8823529411764706,
139
- "eval_loss": 0.542333722114563,
140
- "eval_runtime": 1.5783,
141
- "eval_samples_per_second": 10.771,
142
- "eval_steps_per_second": 0.634,
143
  "step": 15
144
  },
145
  {
146
- "epoch": 15.8,
147
- "eval_accuracy": 0.8823529411764706,
148
- "eval_loss": 0.5102801322937012,
149
- "eval_runtime": 1.4137,
150
- "eval_samples_per_second": 12.025,
151
- "eval_steps_per_second": 0.707,
152
  "step": 16
153
  },
154
  {
155
- "epoch": 16.8,
156
- "eval_accuracy": 0.9411764705882353,
157
- "eval_loss": 0.486479252576828,
158
- "eval_runtime": 1.4145,
159
- "eval_samples_per_second": 12.019,
160
- "eval_steps_per_second": 0.707,
161
  "step": 17
162
  },
163
  {
164
- "epoch": 17.8,
165
- "eval_accuracy": 0.9411764705882353,
166
- "eval_loss": 0.46346449851989746,
167
- "eval_runtime": 1.4531,
168
- "eval_samples_per_second": 11.699,
169
- "eval_steps_per_second": 0.688,
170
  "step": 18
171
  },
172
  {
173
- "epoch": 18.8,
174
- "eval_accuracy": 0.9411764705882353,
175
- "eval_loss": 0.43990573287010193,
176
- "eval_runtime": 1.4361,
177
- "eval_samples_per_second": 11.838,
178
- "eval_steps_per_second": 0.696,
179
  "step": 19
180
  },
181
  {
182
- "epoch": 19.8,
183
  "learning_rate": 2.777777777777778e-05,
184
- "loss": 1.3142,
185
  "step": 20
186
  },
187
  {
188
- "epoch": 19.8,
189
- "eval_accuracy": 0.9411764705882353,
190
- "eval_loss": 0.41191565990448,
191
- "eval_runtime": 1.4086,
192
- "eval_samples_per_second": 12.069,
193
- "eval_steps_per_second": 0.71,
194
  "step": 20
195
  },
196
  {
197
- "epoch": 20.8,
198
- "eval_accuracy": 0.9411764705882353,
199
- "eval_loss": 0.3842789828777313,
200
- "eval_runtime": 1.4599,
201
- "eval_samples_per_second": 11.644,
202
- "eval_steps_per_second": 0.685,
203
  "step": 21
204
  },
205
  {
206
- "epoch": 21.8,
207
- "eval_accuracy": 0.9411764705882353,
208
- "eval_loss": 0.3496580719947815,
209
- "eval_runtime": 1.2811,
210
- "eval_samples_per_second": 13.27,
211
- "eval_steps_per_second": 0.781,
212
  "step": 22
213
  },
214
  {
215
- "epoch": 22.8,
216
- "eval_accuracy": 0.9411764705882353,
217
- "eval_loss": 0.31610190868377686,
218
- "eval_runtime": 1.3996,
219
- "eval_samples_per_second": 12.146,
220
- "eval_steps_per_second": 0.714,
221
  "step": 23
222
  },
223
  {
224
- "epoch": 23.8,
225
- "eval_accuracy": 0.9411764705882353,
226
- "eval_loss": 0.28501880168914795,
227
- "eval_runtime": 1.4408,
228
- "eval_samples_per_second": 11.799,
229
- "eval_steps_per_second": 0.694,
230
  "step": 24
231
  },
232
  {
233
- "epoch": 24.8,
234
- "eval_accuracy": 0.9411764705882353,
235
- "eval_loss": 0.2581336200237274,
236
- "eval_runtime": 1.4791,
237
- "eval_samples_per_second": 11.493,
238
- "eval_steps_per_second": 0.676,
239
  "step": 25
240
  },
241
  {
242
- "epoch": 25.8,
243
- "eval_accuracy": 0.9411764705882353,
244
- "eval_loss": 0.23626860976219177,
245
- "eval_runtime": 1.4224,
246
- "eval_samples_per_second": 11.952,
247
- "eval_steps_per_second": 0.703,
248
  "step": 26
249
  },
250
  {
251
- "epoch": 26.8,
252
- "eval_accuracy": 0.9411764705882353,
253
- "eval_loss": 0.21788160502910614,
254
- "eval_runtime": 1.4535,
255
- "eval_samples_per_second": 11.696,
256
- "eval_steps_per_second": 0.688,
257
  "step": 27
258
  },
259
  {
260
- "epoch": 27.8,
261
- "eval_accuracy": 0.9411764705882353,
262
- "eval_loss": 0.20291975140571594,
263
- "eval_runtime": 1.4574,
264
- "eval_samples_per_second": 11.665,
265
- "eval_steps_per_second": 0.686,
266
  "step": 28
267
  },
268
  {
269
- "epoch": 28.8,
270
- "eval_accuracy": 0.9411764705882353,
271
- "eval_loss": 0.19029618799686432,
272
- "eval_runtime": 1.3,
273
- "eval_samples_per_second": 13.077,
274
- "eval_steps_per_second": 0.769,
275
  "step": 29
276
  },
277
  {
278
- "epoch": 29.8,
279
  "eval_accuracy": 1.0,
280
- "eval_loss": 0.17870470881462097,
281
- "eval_runtime": 1.4384,
282
- "eval_samples_per_second": 11.818,
283
- "eval_steps_per_second": 0.695,
284
  "step": 30
285
  },
286
  {
287
- "epoch": 30.8,
288
  "eval_accuracy": 1.0,
289
- "eval_loss": 0.1676449328660965,
290
- "eval_runtime": 1.4058,
291
- "eval_samples_per_second": 12.093,
292
- "eval_steps_per_second": 0.711,
293
  "step": 31
294
  },
295
  {
296
- "epoch": 31.8,
297
  "eval_accuracy": 1.0,
298
- "eval_loss": 0.15808852016925812,
299
- "eval_runtime": 1.4182,
300
- "eval_samples_per_second": 11.987,
301
- "eval_steps_per_second": 0.705,
302
  "step": 32
303
  },
304
  {
305
- "epoch": 32.8,
306
  "eval_accuracy": 1.0,
307
- "eval_loss": 0.14869734644889832,
308
- "eval_runtime": 1.3868,
309
- "eval_samples_per_second": 12.259,
310
- "eval_steps_per_second": 0.721,
311
  "step": 33
312
  },
313
  {
314
- "epoch": 33.8,
315
  "eval_accuracy": 1.0,
316
- "eval_loss": 0.14104951918125153,
317
- "eval_runtime": 1.3908,
318
- "eval_samples_per_second": 12.223,
319
- "eval_steps_per_second": 0.719,
320
  "step": 34
321
  },
322
  {
323
- "epoch": 34.8,
324
  "eval_accuracy": 1.0,
325
- "eval_loss": 0.13486798107624054,
326
- "eval_runtime": 1.4139,
327
- "eval_samples_per_second": 12.023,
328
- "eval_steps_per_second": 0.707,
329
  "step": 35
330
  },
331
  {
332
- "epoch": 35.8,
333
  "eval_accuracy": 1.0,
334
- "eval_loss": 0.13010667264461517,
335
- "eval_runtime": 1.3204,
336
- "eval_samples_per_second": 12.875,
337
- "eval_steps_per_second": 0.757,
338
  "step": 36
339
  },
340
  {
341
- "epoch": 36.8,
342
  "eval_accuracy": 1.0,
343
- "eval_loss": 0.12663623690605164,
344
- "eval_runtime": 1.4211,
345
- "eval_samples_per_second": 11.963,
346
- "eval_steps_per_second": 0.704,
347
  "step": 37
348
  },
349
  {
350
- "epoch": 37.8,
351
  "eval_accuracy": 1.0,
352
- "eval_loss": 0.12430554628372192,
353
- "eval_runtime": 1.4023,
354
- "eval_samples_per_second": 12.123,
355
- "eval_steps_per_second": 0.713,
356
  "step": 38
357
  },
358
  {
359
- "epoch": 38.8,
360
  "eval_accuracy": 1.0,
361
- "eval_loss": 0.12299124151468277,
362
- "eval_runtime": 1.4925,
363
- "eval_samples_per_second": 11.39,
364
- "eval_steps_per_second": 0.67,
365
  "step": 39
366
  },
367
  {
368
- "epoch": 39.8,
369
  "learning_rate": 0.0,
370
- "loss": 0.4316,
371
  "step": 40
372
  },
373
  {
374
- "epoch": 39.8,
375
  "eval_accuracy": 1.0,
376
- "eval_loss": 0.12227079272270203,
377
- "eval_runtime": 1.5093,
378
- "eval_samples_per_second": 11.264,
379
- "eval_steps_per_second": 0.663,
380
  "step": 40
381
  },
382
  {
383
- "epoch": 39.8,
384
  "step": 40,
385
- "total_flos": 1.4763621798877594e+17,
386
- "train_loss": 0.872883677482605,
387
- "train_runtime": 746.1275,
388
- "train_samples_per_second": 7.988,
389
- "train_steps_per_second": 0.054
390
  }
391
  ],
392
  "max_steps": 40,
393
  "num_train_epochs": 40,
394
- "total_flos": 1.4763621798877594e+17,
395
  "trial_name": null,
396
  "trial_params": null
397
  }
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "delivery_truck_classification/checkpoint-21",
4
+ "epoch": 39.666666666666664,
5
  "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.67,
12
+ "eval_accuracy": 0.3,
13
+ "eval_loss": 1.7282015085220337,
14
+ "eval_runtime": 1.367,
15
+ "eval_samples_per_second": 14.631,
16
+ "eval_steps_per_second": 0.732,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 1.67,
21
+ "eval_accuracy": 0.3,
22
+ "eval_loss": 1.678601622581482,
23
+ "eval_runtime": 1.4785,
24
+ "eval_samples_per_second": 13.528,
25
+ "eval_steps_per_second": 0.676,
26
  "step": 2
27
  },
28
  {
29
+ "epoch": 2.67,
30
+ "eval_accuracy": 0.35,
31
+ "eval_loss": 1.58110773563385,
32
+ "eval_runtime": 1.5261,
33
+ "eval_samples_per_second": 13.105,
34
+ "eval_steps_per_second": 0.655,
35
  "step": 3
36
  },
37
  {
38
+ "epoch": 3.67,
39
+ "eval_accuracy": 0.45,
40
+ "eval_loss": 1.4410462379455566,
41
+ "eval_runtime": 1.4941,
42
+ "eval_samples_per_second": 13.386,
43
+ "eval_steps_per_second": 0.669,
44
  "step": 4
45
  },
46
  {
47
+ "epoch": 4.67,
48
+ "eval_accuracy": 0.65,
49
+ "eval_loss": 1.2802143096923828,
50
+ "eval_runtime": 1.5014,
51
+ "eval_samples_per_second": 13.321,
52
+ "eval_steps_per_second": 0.666,
53
  "step": 5
54
  },
55
  {
56
+ "epoch": 5.67,
57
+ "eval_accuracy": 0.75,
58
+ "eval_loss": 1.1453020572662354,
59
+ "eval_runtime": 1.5188,
60
+ "eval_samples_per_second": 13.168,
61
+ "eval_steps_per_second": 0.658,
62
  "step": 6
63
  },
64
  {
65
+ "epoch": 6.67,
66
+ "eval_accuracy": 0.75,
67
+ "eval_loss": 1.0252652168273926,
68
+ "eval_runtime": 1.5438,
69
+ "eval_samples_per_second": 12.955,
70
+ "eval_steps_per_second": 0.648,
71
  "step": 7
72
  },
73
  {
74
+ "epoch": 7.67,
75
+ "eval_accuracy": 0.75,
76
+ "eval_loss": 0.9306014180183411,
77
+ "eval_runtime": 1.3614,
78
+ "eval_samples_per_second": 14.69,
79
+ "eval_steps_per_second": 0.735,
80
  "step": 8
81
  },
82
  {
83
+ "epoch": 8.67,
84
+ "eval_accuracy": 0.8,
85
+ "eval_loss": 0.8565734028816223,
86
+ "eval_runtime": 1.4936,
87
+ "eval_samples_per_second": 13.391,
88
+ "eval_steps_per_second": 0.67,
89
  "step": 9
90
  },
91
  {
92
+ "epoch": 9.67,
93
+ "eval_accuracy": 0.8,
94
+ "eval_loss": 0.8048442602157593,
95
+ "eval_runtime": 1.5039,
96
+ "eval_samples_per_second": 13.299,
97
+ "eval_steps_per_second": 0.665,
98
  "step": 10
99
  },
100
  {
101
+ "epoch": 10.67,
102
+ "eval_accuracy": 0.8,
103
+ "eval_loss": 0.7585190534591675,
104
+ "eval_runtime": 1.483,
105
+ "eval_samples_per_second": 13.486,
106
+ "eval_steps_per_second": 0.674,
107
  "step": 11
108
  },
109
  {
110
+ "epoch": 11.67,
111
+ "eval_accuracy": 0.8,
112
+ "eval_loss": 0.7096863985061646,
113
+ "eval_runtime": 1.4896,
114
+ "eval_samples_per_second": 13.426,
115
+ "eval_steps_per_second": 0.671,
116
  "step": 12
117
  },
118
  {
119
+ "epoch": 12.67,
120
+ "eval_accuracy": 0.8,
121
+ "eval_loss": 0.6442805528640747,
122
+ "eval_runtime": 1.4975,
123
+ "eval_samples_per_second": 13.355,
124
+ "eval_steps_per_second": 0.668,
125
  "step": 13
126
  },
127
  {
128
+ "epoch": 13.67,
129
+ "eval_accuracy": 0.8,
130
+ "eval_loss": 0.5771742463111877,
131
+ "eval_runtime": 1.5019,
132
+ "eval_samples_per_second": 13.317,
133
+ "eval_steps_per_second": 0.666,
134
  "step": 14
135
  },
136
  {
137
+ "epoch": 14.67,
138
+ "eval_accuracy": 0.8,
139
+ "eval_loss": 0.5056056380271912,
140
+ "eval_runtime": 1.3719,
141
+ "eval_samples_per_second": 14.578,
142
+ "eval_steps_per_second": 0.729,
143
  "step": 15
144
  },
145
  {
146
+ "epoch": 15.67,
147
+ "eval_accuracy": 0.8,
148
+ "eval_loss": 0.4443889558315277,
149
+ "eval_runtime": 1.5003,
150
+ "eval_samples_per_second": 13.331,
151
+ "eval_steps_per_second": 0.667,
152
  "step": 16
153
  },
154
  {
155
+ "epoch": 16.67,
156
+ "eval_accuracy": 0.85,
157
+ "eval_loss": 0.385681688785553,
158
+ "eval_runtime": 1.4857,
159
+ "eval_samples_per_second": 13.462,
160
+ "eval_steps_per_second": 0.673,
161
  "step": 17
162
  },
163
  {
164
+ "epoch": 17.67,
165
+ "eval_accuracy": 0.85,
166
+ "eval_loss": 0.3330341875553131,
167
+ "eval_runtime": 1.5414,
168
+ "eval_samples_per_second": 12.975,
169
+ "eval_steps_per_second": 0.649,
170
  "step": 18
171
  },
172
  {
173
+ "epoch": 18.67,
174
+ "eval_accuracy": 0.9,
175
+ "eval_loss": 0.2907267212867737,
176
+ "eval_runtime": 1.5308,
177
+ "eval_samples_per_second": 13.065,
178
+ "eval_steps_per_second": 0.653,
179
  "step": 19
180
  },
181
  {
182
+ "epoch": 19.67,
183
  "learning_rate": 2.777777777777778e-05,
184
+ "loss": 1.4985,
185
  "step": 20
186
  },
187
  {
188
+ "epoch": 19.67,
189
+ "eval_accuracy": 0.95,
190
+ "eval_loss": 0.2552061080932617,
191
+ "eval_runtime": 1.5055,
192
+ "eval_samples_per_second": 13.285,
193
+ "eval_steps_per_second": 0.664,
194
  "step": 20
195
  },
196
  {
197
+ "epoch": 20.67,
198
+ "eval_accuracy": 1.0,
199
+ "eval_loss": 0.22120876610279083,
200
+ "eval_runtime": 1.529,
201
+ "eval_samples_per_second": 13.081,
202
+ "eval_steps_per_second": 0.654,
203
  "step": 21
204
  },
205
  {
206
+ "epoch": 21.67,
207
+ "eval_accuracy": 1.0,
208
+ "eval_loss": 0.19384506344795227,
209
+ "eval_runtime": 1.3951,
210
+ "eval_samples_per_second": 14.336,
211
+ "eval_steps_per_second": 0.717,
212
  "step": 22
213
  },
214
  {
215
+ "epoch": 22.67,
216
+ "eval_accuracy": 1.0,
217
+ "eval_loss": 0.1699182689189911,
218
+ "eval_runtime": 1.551,
219
+ "eval_samples_per_second": 12.895,
220
+ "eval_steps_per_second": 0.645,
221
  "step": 23
222
  },
223
  {
224
+ "epoch": 23.67,
225
+ "eval_accuracy": 1.0,
226
+ "eval_loss": 0.14904645085334778,
227
+ "eval_runtime": 1.5503,
228
+ "eval_samples_per_second": 12.901,
229
+ "eval_steps_per_second": 0.645,
230
  "step": 24
231
  },
232
  {
233
+ "epoch": 24.67,
234
+ "eval_accuracy": 1.0,
235
+ "eval_loss": 0.13287147879600525,
236
+ "eval_runtime": 1.5769,
237
+ "eval_samples_per_second": 12.683,
238
+ "eval_steps_per_second": 0.634,
239
  "step": 25
240
  },
241
  {
242
+ "epoch": 25.67,
243
+ "eval_accuracy": 1.0,
244
+ "eval_loss": 0.12029655277729034,
245
+ "eval_runtime": 1.5354,
246
+ "eval_samples_per_second": 13.026,
247
+ "eval_steps_per_second": 0.651,
248
  "step": 26
249
  },
250
  {
251
+ "epoch": 26.67,
252
+ "eval_accuracy": 1.0,
253
+ "eval_loss": 0.11412191390991211,
254
+ "eval_runtime": 1.6538,
255
+ "eval_samples_per_second": 12.093,
256
+ "eval_steps_per_second": 0.605,
257
  "step": 27
258
  },
259
  {
260
+ "epoch": 27.67,
261
+ "eval_accuracy": 1.0,
262
+ "eval_loss": 0.10837922245264053,
263
+ "eval_runtime": 1.5243,
264
+ "eval_samples_per_second": 13.121,
265
+ "eval_steps_per_second": 0.656,
266
  "step": 28
267
  },
268
  {
269
+ "epoch": 28.67,
270
+ "eval_accuracy": 1.0,
271
+ "eval_loss": 0.10183490812778473,
272
+ "eval_runtime": 1.3977,
273
+ "eval_samples_per_second": 14.31,
274
+ "eval_steps_per_second": 0.715,
275
  "step": 29
276
  },
277
  {
278
+ "epoch": 29.67,
279
  "eval_accuracy": 1.0,
280
+ "eval_loss": 0.09532036632299423,
281
+ "eval_runtime": 1.5184,
282
+ "eval_samples_per_second": 13.171,
283
+ "eval_steps_per_second": 0.659,
284
  "step": 30
285
  },
286
  {
287
+ "epoch": 30.67,
288
  "eval_accuracy": 1.0,
289
+ "eval_loss": 0.0878124088048935,
290
+ "eval_runtime": 1.4999,
291
+ "eval_samples_per_second": 13.334,
292
+ "eval_steps_per_second": 0.667,
293
  "step": 31
294
  },
295
  {
296
+ "epoch": 31.67,
297
  "eval_accuracy": 1.0,
298
+ "eval_loss": 0.07940232753753662,
299
+ "eval_runtime": 2.1996,
300
+ "eval_samples_per_second": 9.092,
301
+ "eval_steps_per_second": 0.455,
302
  "step": 32
303
  },
304
  {
305
+ "epoch": 32.67,
306
  "eval_accuracy": 1.0,
307
+ "eval_loss": 0.0729844942688942,
308
+ "eval_runtime": 1.5148,
309
+ "eval_samples_per_second": 13.203,
310
+ "eval_steps_per_second": 0.66,
311
  "step": 33
312
  },
313
  {
314
+ "epoch": 33.67,
315
  "eval_accuracy": 1.0,
316
+ "eval_loss": 0.06873825937509537,
317
+ "eval_runtime": 1.5014,
318
+ "eval_samples_per_second": 13.321,
319
+ "eval_steps_per_second": 0.666,
320
  "step": 34
321
  },
322
  {
323
+ "epoch": 34.67,
324
  "eval_accuracy": 1.0,
325
+ "eval_loss": 0.06642889976501465,
326
+ "eval_runtime": 1.5257,
327
+ "eval_samples_per_second": 13.109,
328
+ "eval_steps_per_second": 0.655,
329
  "step": 35
330
  },
331
  {
332
+ "epoch": 35.67,
333
  "eval_accuracy": 1.0,
334
+ "eval_loss": 0.06485584378242493,
335
+ "eval_runtime": 1.3992,
336
+ "eval_samples_per_second": 14.294,
337
+ "eval_steps_per_second": 0.715,
338
  "step": 36
339
  },
340
  {
341
+ "epoch": 36.67,
342
  "eval_accuracy": 1.0,
343
+ "eval_loss": 0.06402350962162018,
344
+ "eval_runtime": 1.5962,
345
+ "eval_samples_per_second": 12.53,
346
+ "eval_steps_per_second": 0.627,
347
  "step": 37
348
  },
349
  {
350
+ "epoch": 37.67,
351
  "eval_accuracy": 1.0,
352
+ "eval_loss": 0.06386792659759521,
353
+ "eval_runtime": 1.5114,
354
+ "eval_samples_per_second": 13.233,
355
+ "eval_steps_per_second": 0.662,
356
  "step": 38
357
  },
358
  {
359
+ "epoch": 38.67,
360
  "eval_accuracy": 1.0,
361
+ "eval_loss": 0.06384583562612534,
362
+ "eval_runtime": 1.4982,
363
+ "eval_samples_per_second": 13.349,
364
+ "eval_steps_per_second": 0.667,
365
  "step": 39
366
  },
367
  {
368
+ "epoch": 39.67,
369
  "learning_rate": 0.0,
370
+ "loss": 0.4842,
371
  "step": 40
372
  },
373
  {
374
+ "epoch": 39.67,
375
  "eval_accuracy": 1.0,
376
+ "eval_loss": 0.0637463703751564,
377
+ "eval_runtime": 1.4885,
378
+ "eval_samples_per_second": 13.436,
379
+ "eval_steps_per_second": 0.672,
380
  "step": 40
381
  },
382
  {
383
+ "epoch": 39.67,
384
  "step": 40,
385
+ "total_flos": 1.7187351594113434e+17,
386
+ "train_loss": 0.9913474082946777,
387
+ "train_runtime": 823.5099,
388
+ "train_samples_per_second": 8.452,
389
+ "train_steps_per_second": 0.049
390
  }
391
  ],
392
  "max_steps": 40,
393
  "num_train_epochs": 40,
394
+ "total_flos": 1.7187351594113434e+17,
395
  "trial_name": null,
396
  "trial_params": null
397
  }