glazzova commited on
Commit
e7eff73
1 Parent(s): fa7b38a

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-50",
3
+ "architectures": [
4
+ "ResNetForImageClassification"
5
+ ],
6
+ "depths": [
7
+ 3,
8
+ 4,
9
+ 6,
10
+ 3
11
+ ],
12
+ "downsample_in_first_stage": false,
13
+ "embedding_size": 64,
14
+ "hidden_act": "relu",
15
+ "hidden_sizes": [
16
+ 256,
17
+ 512,
18
+ 1024,
19
+ 2048
20
+ ],
21
+ "id2label": {
22
+ "0": "skinny",
23
+ "1": "ordinary",
24
+ "2": "overweight",
25
+ "3": "hulk"
26
+ },
27
+ "label2id": {
28
+ "hulk": 3,
29
+ "ordinary": 1,
30
+ "overweight": 2,
31
+ "skinny": 0
32
+ },
33
+ "layer_type": "bottleneck",
34
+ "model_type": "resnet",
35
+ "num_channels": 3,
36
+ "out_features": [
37
+ "stage4"
38
+ ],
39
+ "out_indices": [
40
+ 4
41
+ ],
42
+ "problem_type": "single_label_classification",
43
+ "stage_names": [
44
+ "stem",
45
+ "stage1",
46
+ "stage2",
47
+ "stage3",
48
+ "stage4"
49
+ ],
50
+ "torch_dtype": "float32",
51
+ "transformers_version": "4.32.1"
52
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8955560524dcdbfbe9b6b9ae699f6ab10aaa28b0a5eaad47bb49bcb3fb84ef
3
+ size 188260794
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_pct": 0.875,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "shortest_edge": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25b1650605916840a2e116e856ff8a962ac354248e090e2ca5067659f5111af
3
+ size 94387338
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42686a8973870b47a73c6c2082486f3aa3b0783dc3b23e2a98d72270442aac04
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8f9fbd55d6b73398f59b651ee1ad3f54fc5f752abc73f7f160adb053254995
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,925 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
+ "eval_steps": 500,
6
+ "global_step": 900,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.1111111111111111,
14
+ "eval_loss": 1.3930704593658447,
15
+ "eval_runtime": 0.7497,
16
+ "eval_samples_per_second": 24.01,
17
+ "eval_steps_per_second": 4.002,
18
+ "step": 9
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.16666666666666666,
23
+ "eval_loss": 1.376913070678711,
24
+ "eval_runtime": 0.5755,
25
+ "eval_samples_per_second": 31.277,
26
+ "eval_steps_per_second": 5.213,
27
+ "step": 18
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_accuracy": 0.5555555555555556,
32
+ "eval_loss": 1.3498488664627075,
33
+ "eval_runtime": 0.5792,
34
+ "eval_samples_per_second": 31.078,
35
+ "eval_steps_per_second": 5.18,
36
+ "step": 27
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "eval_accuracy": 0.6666666666666666,
41
+ "eval_loss": 1.3331588506698608,
42
+ "eval_runtime": 0.5535,
43
+ "eval_samples_per_second": 32.518,
44
+ "eval_steps_per_second": 5.42,
45
+ "step": 36
46
+ },
47
+ {
48
+ "epoch": 5.0,
49
+ "eval_accuracy": 0.7777777777777778,
50
+ "eval_loss": 1.3144757747650146,
51
+ "eval_runtime": 0.5818,
52
+ "eval_samples_per_second": 30.94,
53
+ "eval_steps_per_second": 5.157,
54
+ "step": 45
55
+ },
56
+ {
57
+ "epoch": 6.0,
58
+ "eval_accuracy": 0.7777777777777778,
59
+ "eval_loss": 1.300213098526001,
60
+ "eval_runtime": 0.5539,
61
+ "eval_samples_per_second": 32.496,
62
+ "eval_steps_per_second": 5.416,
63
+ "step": 54
64
+ },
65
+ {
66
+ "epoch": 7.0,
67
+ "eval_accuracy": 0.7777777777777778,
68
+ "eval_loss": 1.2916626930236816,
69
+ "eval_runtime": 0.5489,
70
+ "eval_samples_per_second": 32.791,
71
+ "eval_steps_per_second": 5.465,
72
+ "step": 63
73
+ },
74
+ {
75
+ "epoch": 8.0,
76
+ "eval_accuracy": 0.7777777777777778,
77
+ "eval_loss": 1.270052194595337,
78
+ "eval_runtime": 0.5746,
79
+ "eval_samples_per_second": 31.328,
80
+ "eval_steps_per_second": 5.221,
81
+ "step": 72
82
+ },
83
+ {
84
+ "epoch": 9.0,
85
+ "eval_accuracy": 0.7777777777777778,
86
+ "eval_loss": 1.2555011510849,
87
+ "eval_runtime": 0.5489,
88
+ "eval_samples_per_second": 32.791,
89
+ "eval_steps_per_second": 5.465,
90
+ "step": 81
91
+ },
92
+ {
93
+ "epoch": 10.0,
94
+ "eval_accuracy": 0.7777777777777778,
95
+ "eval_loss": 1.2477619647979736,
96
+ "eval_runtime": 0.5687,
97
+ "eval_samples_per_second": 31.651,
98
+ "eval_steps_per_second": 5.275,
99
+ "step": 90
100
+ },
101
+ {
102
+ "epoch": 11.0,
103
+ "eval_accuracy": 0.7777777777777778,
104
+ "eval_loss": 1.2382668256759644,
105
+ "eval_runtime": 0.5502,
106
+ "eval_samples_per_second": 32.716,
107
+ "eval_steps_per_second": 5.453,
108
+ "step": 99
109
+ },
110
+ {
111
+ "epoch": 12.0,
112
+ "eval_accuracy": 0.7222222222222222,
113
+ "eval_loss": 1.223071575164795,
114
+ "eval_runtime": 0.5476,
115
+ "eval_samples_per_second": 32.873,
116
+ "eval_steps_per_second": 5.479,
117
+ "step": 108
118
+ },
119
+ {
120
+ "epoch": 13.0,
121
+ "eval_accuracy": 0.7222222222222222,
122
+ "eval_loss": 1.2033451795578003,
123
+ "eval_runtime": 0.5538,
124
+ "eval_samples_per_second": 32.503,
125
+ "eval_steps_per_second": 5.417,
126
+ "step": 117
127
+ },
128
+ {
129
+ "epoch": 14.0,
130
+ "eval_accuracy": 0.7777777777777778,
131
+ "eval_loss": 1.186124563217163,
132
+ "eval_runtime": 0.5442,
133
+ "eval_samples_per_second": 33.075,
134
+ "eval_steps_per_second": 5.512,
135
+ "step": 126
136
+ },
137
+ {
138
+ "epoch": 15.0,
139
+ "eval_accuracy": 0.7777777777777778,
140
+ "eval_loss": 1.182234287261963,
141
+ "eval_runtime": 0.5735,
142
+ "eval_samples_per_second": 31.387,
143
+ "eval_steps_per_second": 5.231,
144
+ "step": 135
145
+ },
146
+ {
147
+ "epoch": 16.0,
148
+ "eval_accuracy": 0.7777777777777778,
149
+ "eval_loss": 1.1588956117630005,
150
+ "eval_runtime": 0.5533,
151
+ "eval_samples_per_second": 32.534,
152
+ "eval_steps_per_second": 5.422,
153
+ "step": 144
154
+ },
155
+ {
156
+ "epoch": 17.0,
157
+ "eval_accuracy": 0.7777777777777778,
158
+ "eval_loss": 1.1478044986724854,
159
+ "eval_runtime": 0.5826,
160
+ "eval_samples_per_second": 30.897,
161
+ "eval_steps_per_second": 5.15,
162
+ "step": 153
163
+ },
164
+ {
165
+ "epoch": 18.0,
166
+ "eval_accuracy": 0.7777777777777778,
167
+ "eval_loss": 1.1322474479675293,
168
+ "eval_runtime": 0.5883,
169
+ "eval_samples_per_second": 30.598,
170
+ "eval_steps_per_second": 5.1,
171
+ "step": 162
172
+ },
173
+ {
174
+ "epoch": 19.0,
175
+ "eval_accuracy": 0.7777777777777778,
176
+ "eval_loss": 1.110813021659851,
177
+ "eval_runtime": 0.5594,
178
+ "eval_samples_per_second": 32.177,
179
+ "eval_steps_per_second": 5.363,
180
+ "step": 171
181
+ },
182
+ {
183
+ "epoch": 20.0,
184
+ "eval_accuracy": 0.7222222222222222,
185
+ "eval_loss": 1.1011286973953247,
186
+ "eval_runtime": 0.5793,
187
+ "eval_samples_per_second": 31.073,
188
+ "eval_steps_per_second": 5.179,
189
+ "step": 180
190
+ },
191
+ {
192
+ "epoch": 21.0,
193
+ "eval_accuracy": 0.7222222222222222,
194
+ "eval_loss": 1.093163251876831,
195
+ "eval_runtime": 0.5654,
196
+ "eval_samples_per_second": 31.836,
197
+ "eval_steps_per_second": 5.306,
198
+ "step": 189
199
+ },
200
+ {
201
+ "epoch": 22.0,
202
+ "eval_accuracy": 0.7222222222222222,
203
+ "eval_loss": 1.0637242794036865,
204
+ "eval_runtime": 0.5679,
205
+ "eval_samples_per_second": 31.695,
206
+ "eval_steps_per_second": 5.283,
207
+ "step": 198
208
+ },
209
+ {
210
+ "epoch": 23.0,
211
+ "eval_accuracy": 0.7222222222222222,
212
+ "eval_loss": 1.0390856266021729,
213
+ "eval_runtime": 0.574,
214
+ "eval_samples_per_second": 31.358,
215
+ "eval_steps_per_second": 5.226,
216
+ "step": 207
217
+ },
218
+ {
219
+ "epoch": 24.0,
220
+ "eval_accuracy": 0.7222222222222222,
221
+ "eval_loss": 1.0522559881210327,
222
+ "eval_runtime": 0.6203,
223
+ "eval_samples_per_second": 29.02,
224
+ "eval_steps_per_second": 4.837,
225
+ "step": 216
226
+ },
227
+ {
228
+ "epoch": 25.0,
229
+ "eval_accuracy": 0.7777777777777778,
230
+ "eval_loss": 1.0473227500915527,
231
+ "eval_runtime": 0.5963,
232
+ "eval_samples_per_second": 30.186,
233
+ "eval_steps_per_second": 5.031,
234
+ "step": 225
235
+ },
236
+ {
237
+ "epoch": 26.0,
238
+ "eval_accuracy": 0.7222222222222222,
239
+ "eval_loss": 0.9998855590820312,
240
+ "eval_runtime": 0.6467,
241
+ "eval_samples_per_second": 27.834,
242
+ "eval_steps_per_second": 4.639,
243
+ "step": 234
244
+ },
245
+ {
246
+ "epoch": 27.0,
247
+ "eval_accuracy": 0.7222222222222222,
248
+ "eval_loss": 1.0170878171920776,
249
+ "eval_runtime": 0.5994,
250
+ "eval_samples_per_second": 30.032,
251
+ "eval_steps_per_second": 5.005,
252
+ "step": 243
253
+ },
254
+ {
255
+ "epoch": 28.0,
256
+ "eval_accuracy": 0.7222222222222222,
257
+ "eval_loss": 1.028573989868164,
258
+ "eval_runtime": 0.6332,
259
+ "eval_samples_per_second": 28.427,
260
+ "eval_steps_per_second": 4.738,
261
+ "step": 252
262
+ },
263
+ {
264
+ "epoch": 29.0,
265
+ "eval_accuracy": 0.7222222222222222,
266
+ "eval_loss": 1.0290330648422241,
267
+ "eval_runtime": 0.5676,
268
+ "eval_samples_per_second": 31.712,
269
+ "eval_steps_per_second": 5.285,
270
+ "step": 261
271
+ },
272
+ {
273
+ "epoch": 30.0,
274
+ "eval_accuracy": 0.7777777777777778,
275
+ "eval_loss": 0.9571393132209778,
276
+ "eval_runtime": 0.5753,
277
+ "eval_samples_per_second": 31.286,
278
+ "eval_steps_per_second": 5.214,
279
+ "step": 270
280
+ },
281
+ {
282
+ "epoch": 31.0,
283
+ "eval_accuracy": 0.7777777777777778,
284
+ "eval_loss": 0.9450912475585938,
285
+ "eval_runtime": 0.5556,
286
+ "eval_samples_per_second": 32.4,
287
+ "eval_steps_per_second": 5.4,
288
+ "step": 279
289
+ },
290
+ {
291
+ "epoch": 32.0,
292
+ "eval_accuracy": 0.7777777777777778,
293
+ "eval_loss": 0.85059654712677,
294
+ "eval_runtime": 0.5543,
295
+ "eval_samples_per_second": 32.473,
296
+ "eval_steps_per_second": 5.412,
297
+ "step": 288
298
+ },
299
+ {
300
+ "epoch": 33.0,
301
+ "eval_accuracy": 0.7777777777777778,
302
+ "eval_loss": 0.8480208516120911,
303
+ "eval_runtime": 0.5762,
304
+ "eval_samples_per_second": 31.241,
305
+ "eval_steps_per_second": 5.207,
306
+ "step": 297
307
+ },
308
+ {
309
+ "epoch": 34.0,
310
+ "eval_accuracy": 0.7777777777777778,
311
+ "eval_loss": 0.8499312400817871,
312
+ "eval_runtime": 0.555,
313
+ "eval_samples_per_second": 32.433,
314
+ "eval_steps_per_second": 5.405,
315
+ "step": 306
316
+ },
317
+ {
318
+ "epoch": 35.0,
319
+ "eval_accuracy": 0.7777777777777778,
320
+ "eval_loss": 0.8403282165527344,
321
+ "eval_runtime": 0.5709,
322
+ "eval_samples_per_second": 31.532,
323
+ "eval_steps_per_second": 5.255,
324
+ "step": 315
325
+ },
326
+ {
327
+ "epoch": 36.0,
328
+ "eval_accuracy": 0.7777777777777778,
329
+ "eval_loss": 0.7771400809288025,
330
+ "eval_runtime": 0.5569,
331
+ "eval_samples_per_second": 32.319,
332
+ "eval_steps_per_second": 5.387,
333
+ "step": 324
334
+ },
335
+ {
336
+ "epoch": 37.0,
337
+ "eval_accuracy": 0.7777777777777778,
338
+ "eval_loss": 0.7591123580932617,
339
+ "eval_runtime": 0.6487,
340
+ "eval_samples_per_second": 27.747,
341
+ "eval_steps_per_second": 4.625,
342
+ "step": 333
343
+ },
344
+ {
345
+ "epoch": 38.0,
346
+ "eval_accuracy": 0.7777777777777778,
347
+ "eval_loss": 0.7476389408111572,
348
+ "eval_runtime": 0.5654,
349
+ "eval_samples_per_second": 31.835,
350
+ "eval_steps_per_second": 5.306,
351
+ "step": 342
352
+ },
353
+ {
354
+ "epoch": 39.0,
355
+ "eval_accuracy": 0.7777777777777778,
356
+ "eval_loss": 0.7831047773361206,
357
+ "eval_runtime": 0.5458,
358
+ "eval_samples_per_second": 32.977,
359
+ "eval_steps_per_second": 5.496,
360
+ "step": 351
361
+ },
362
+ {
363
+ "epoch": 40.0,
364
+ "eval_accuracy": 0.7777777777777778,
365
+ "eval_loss": 0.7049207091331482,
366
+ "eval_runtime": 0.5872,
367
+ "eval_samples_per_second": 30.653,
368
+ "eval_steps_per_second": 5.109,
369
+ "step": 360
370
+ },
371
+ {
372
+ "epoch": 41.0,
373
+ "eval_accuracy": 0.7777777777777778,
374
+ "eval_loss": 0.6811972856521606,
375
+ "eval_runtime": 0.5667,
376
+ "eval_samples_per_second": 31.762,
377
+ "eval_steps_per_second": 5.294,
378
+ "step": 369
379
+ },
380
+ {
381
+ "epoch": 42.0,
382
+ "eval_accuracy": 0.7777777777777778,
383
+ "eval_loss": 0.6736953258514404,
384
+ "eval_runtime": 0.5538,
385
+ "eval_samples_per_second": 32.502,
386
+ "eval_steps_per_second": 5.417,
387
+ "step": 378
388
+ },
389
+ {
390
+ "epoch": 43.0,
391
+ "eval_accuracy": 0.7777777777777778,
392
+ "eval_loss": 0.6515324711799622,
393
+ "eval_runtime": 0.5632,
394
+ "eval_samples_per_second": 31.958,
395
+ "eval_steps_per_second": 5.326,
396
+ "step": 387
397
+ },
398
+ {
399
+ "epoch": 44.0,
400
+ "eval_accuracy": 0.7777777777777778,
401
+ "eval_loss": 0.6634184122085571,
402
+ "eval_runtime": 1.1806,
403
+ "eval_samples_per_second": 15.246,
404
+ "eval_steps_per_second": 2.541,
405
+ "step": 396
406
+ },
407
+ {
408
+ "epoch": 45.0,
409
+ "eval_accuracy": 0.8333333333333334,
410
+ "eval_loss": 0.6234365105628967,
411
+ "eval_runtime": 0.6418,
412
+ "eval_samples_per_second": 28.044,
413
+ "eval_steps_per_second": 4.674,
414
+ "step": 405
415
+ },
416
+ {
417
+ "epoch": 46.0,
418
+ "eval_accuracy": 0.8333333333333334,
419
+ "eval_loss": 0.8482791185379028,
420
+ "eval_runtime": 0.5859,
421
+ "eval_samples_per_second": 30.723,
422
+ "eval_steps_per_second": 5.121,
423
+ "step": 414
424
+ },
425
+ {
426
+ "epoch": 47.0,
427
+ "eval_accuracy": 0.8333333333333334,
428
+ "eval_loss": 0.7264916896820068,
429
+ "eval_runtime": 0.5835,
430
+ "eval_samples_per_second": 30.847,
431
+ "eval_steps_per_second": 5.141,
432
+ "step": 423
433
+ },
434
+ {
435
+ "epoch": 48.0,
436
+ "eval_accuracy": 0.7777777777777778,
437
+ "eval_loss": 0.7383356094360352,
438
+ "eval_runtime": 0.5549,
439
+ "eval_samples_per_second": 32.437,
440
+ "eval_steps_per_second": 5.406,
441
+ "step": 432
442
+ },
443
+ {
444
+ "epoch": 49.0,
445
+ "eval_accuracy": 0.8333333333333334,
446
+ "eval_loss": 0.7005217671394348,
447
+ "eval_runtime": 0.5608,
448
+ "eval_samples_per_second": 32.095,
449
+ "eval_steps_per_second": 5.349,
450
+ "step": 441
451
+ },
452
+ {
453
+ "epoch": 50.0,
454
+ "eval_accuracy": 0.8333333333333334,
455
+ "eval_loss": 0.5740242004394531,
456
+ "eval_runtime": 0.5536,
457
+ "eval_samples_per_second": 32.517,
458
+ "eval_steps_per_second": 5.42,
459
+ "step": 450
460
+ },
461
+ {
462
+ "epoch": 51.0,
463
+ "eval_accuracy": 0.8333333333333334,
464
+ "eval_loss": 0.5622536540031433,
465
+ "eval_runtime": 0.5935,
466
+ "eval_samples_per_second": 30.329,
467
+ "eval_steps_per_second": 5.055,
468
+ "step": 459
469
+ },
470
+ {
471
+ "epoch": 52.0,
472
+ "eval_accuracy": 0.8333333333333334,
473
+ "eval_loss": 0.557184636592865,
474
+ "eval_runtime": 0.5545,
475
+ "eval_samples_per_second": 32.463,
476
+ "eval_steps_per_second": 5.41,
477
+ "step": 468
478
+ },
479
+ {
480
+ "epoch": 53.0,
481
+ "eval_accuracy": 0.8333333333333334,
482
+ "eval_loss": 0.5769361853599548,
483
+ "eval_runtime": 0.5844,
484
+ "eval_samples_per_second": 30.802,
485
+ "eval_steps_per_second": 5.134,
486
+ "step": 477
487
+ },
488
+ {
489
+ "epoch": 54.0,
490
+ "eval_accuracy": 0.8333333333333334,
491
+ "eval_loss": 0.550247311592102,
492
+ "eval_runtime": 0.615,
493
+ "eval_samples_per_second": 29.266,
494
+ "eval_steps_per_second": 4.878,
495
+ "step": 486
496
+ },
497
+ {
498
+ "epoch": 55.0,
499
+ "eval_accuracy": 0.8888888888888888,
500
+ "eval_loss": 0.6281833052635193,
501
+ "eval_runtime": 0.5457,
502
+ "eval_samples_per_second": 32.988,
503
+ "eval_steps_per_second": 5.498,
504
+ "step": 495
505
+ },
506
+ {
507
+ "epoch": 55.56,
508
+ "learning_rate": 2.2222222222222223e-05,
509
+ "loss": 1.1157,
510
+ "step": 500
511
+ },
512
+ {
513
+ "epoch": 56.0,
514
+ "eval_accuracy": 0.8333333333333334,
515
+ "eval_loss": 0.5906974077224731,
516
+ "eval_runtime": 0.5821,
517
+ "eval_samples_per_second": 30.92,
518
+ "eval_steps_per_second": 5.153,
519
+ "step": 504
520
+ },
521
+ {
522
+ "epoch": 57.0,
523
+ "eval_accuracy": 0.9444444444444444,
524
+ "eval_loss": 0.6346855163574219,
525
+ "eval_runtime": 0.5542,
526
+ "eval_samples_per_second": 32.481,
527
+ "eval_steps_per_second": 5.414,
528
+ "step": 513
529
+ },
530
+ {
531
+ "epoch": 58.0,
532
+ "eval_accuracy": 0.8888888888888888,
533
+ "eval_loss": 0.6413730382919312,
534
+ "eval_runtime": 0.5829,
535
+ "eval_samples_per_second": 30.882,
536
+ "eval_steps_per_second": 5.147,
537
+ "step": 522
538
+ },
539
+ {
540
+ "epoch": 59.0,
541
+ "eval_accuracy": 0.8888888888888888,
542
+ "eval_loss": 0.465614378452301,
543
+ "eval_runtime": 0.5965,
544
+ "eval_samples_per_second": 30.178,
545
+ "eval_steps_per_second": 5.03,
546
+ "step": 531
547
+ },
548
+ {
549
+ "epoch": 60.0,
550
+ "eval_accuracy": 0.8888888888888888,
551
+ "eval_loss": 0.48488152027130127,
552
+ "eval_runtime": 0.5621,
553
+ "eval_samples_per_second": 32.02,
554
+ "eval_steps_per_second": 5.337,
555
+ "step": 540
556
+ },
557
+ {
558
+ "epoch": 61.0,
559
+ "eval_accuracy": 0.9444444444444444,
560
+ "eval_loss": 0.8426976799964905,
561
+ "eval_runtime": 0.5798,
562
+ "eval_samples_per_second": 31.045,
563
+ "eval_steps_per_second": 5.174,
564
+ "step": 549
565
+ },
566
+ {
567
+ "epoch": 62.0,
568
+ "eval_accuracy": 0.8888888888888888,
569
+ "eval_loss": 0.5708574652671814,
570
+ "eval_runtime": 0.5531,
571
+ "eval_samples_per_second": 32.544,
572
+ "eval_steps_per_second": 5.424,
573
+ "step": 558
574
+ },
575
+ {
576
+ "epoch": 63.0,
577
+ "eval_accuracy": 0.8888888888888888,
578
+ "eval_loss": 0.5026788115501404,
579
+ "eval_runtime": 0.5575,
580
+ "eval_samples_per_second": 32.289,
581
+ "eval_steps_per_second": 5.381,
582
+ "step": 567
583
+ },
584
+ {
585
+ "epoch": 64.0,
586
+ "eval_accuracy": 0.8888888888888888,
587
+ "eval_loss": 0.5724208354949951,
588
+ "eval_runtime": 0.5481,
589
+ "eval_samples_per_second": 32.844,
590
+ "eval_steps_per_second": 5.474,
591
+ "step": 576
592
+ },
593
+ {
594
+ "epoch": 65.0,
595
+ "eval_accuracy": 0.8888888888888888,
596
+ "eval_loss": 0.5301716327667236,
597
+ "eval_runtime": 0.553,
598
+ "eval_samples_per_second": 32.551,
599
+ "eval_steps_per_second": 5.425,
600
+ "step": 585
601
+ },
602
+ {
603
+ "epoch": 66.0,
604
+ "eval_accuracy": 0.8333333333333334,
605
+ "eval_loss": 0.5272272825241089,
606
+ "eval_runtime": 0.5566,
607
+ "eval_samples_per_second": 32.34,
608
+ "eval_steps_per_second": 5.39,
609
+ "step": 594
610
+ },
611
+ {
612
+ "epoch": 67.0,
613
+ "eval_accuracy": 0.8888888888888888,
614
+ "eval_loss": 0.5444329380989075,
615
+ "eval_runtime": 0.555,
616
+ "eval_samples_per_second": 32.435,
617
+ "eval_steps_per_second": 5.406,
618
+ "step": 603
619
+ },
620
+ {
621
+ "epoch": 68.0,
622
+ "eval_accuracy": 0.8888888888888888,
623
+ "eval_loss": 0.3936518132686615,
624
+ "eval_runtime": 0.6001,
625
+ "eval_samples_per_second": 29.994,
626
+ "eval_steps_per_second": 4.999,
627
+ "step": 612
628
+ },
629
+ {
630
+ "epoch": 69.0,
631
+ "eval_accuracy": 0.9444444444444444,
632
+ "eval_loss": 0.41802236437797546,
633
+ "eval_runtime": 0.5658,
634
+ "eval_samples_per_second": 31.816,
635
+ "eval_steps_per_second": 5.303,
636
+ "step": 621
637
+ },
638
+ {
639
+ "epoch": 70.0,
640
+ "eval_accuracy": 0.8888888888888888,
641
+ "eval_loss": 0.5185115337371826,
642
+ "eval_runtime": 0.5619,
643
+ "eval_samples_per_second": 32.036,
644
+ "eval_steps_per_second": 5.339,
645
+ "step": 630
646
+ },
647
+ {
648
+ "epoch": 71.0,
649
+ "eval_accuracy": 1.0,
650
+ "eval_loss": 0.39606520533561707,
651
+ "eval_runtime": 0.5983,
652
+ "eval_samples_per_second": 30.085,
653
+ "eval_steps_per_second": 5.014,
654
+ "step": 639
655
+ },
656
+ {
657
+ "epoch": 72.0,
658
+ "eval_accuracy": 0.9444444444444444,
659
+ "eval_loss": 0.3859682083129883,
660
+ "eval_runtime": 0.5757,
661
+ "eval_samples_per_second": 31.268,
662
+ "eval_steps_per_second": 5.211,
663
+ "step": 648
664
+ },
665
+ {
666
+ "epoch": 73.0,
667
+ "eval_accuracy": 0.9444444444444444,
668
+ "eval_loss": 0.39656686782836914,
669
+ "eval_runtime": 0.6158,
670
+ "eval_samples_per_second": 29.228,
671
+ "eval_steps_per_second": 4.871,
672
+ "step": 657
673
+ },
674
+ {
675
+ "epoch": 74.0,
676
+ "eval_accuracy": 0.8888888888888888,
677
+ "eval_loss": 0.39676183462142944,
678
+ "eval_runtime": 0.6324,
679
+ "eval_samples_per_second": 28.462,
680
+ "eval_steps_per_second": 4.744,
681
+ "step": 666
682
+ },
683
+ {
684
+ "epoch": 75.0,
685
+ "eval_accuracy": 0.8888888888888888,
686
+ "eval_loss": 0.4546321630477905,
687
+ "eval_runtime": 0.5605,
688
+ "eval_samples_per_second": 32.114,
689
+ "eval_steps_per_second": 5.352,
690
+ "step": 675
691
+ },
692
+ {
693
+ "epoch": 76.0,
694
+ "eval_accuracy": 0.8888888888888888,
695
+ "eval_loss": 0.4021334648132324,
696
+ "eval_runtime": 0.553,
697
+ "eval_samples_per_second": 32.55,
698
+ "eval_steps_per_second": 5.425,
699
+ "step": 684
700
+ },
701
+ {
702
+ "epoch": 77.0,
703
+ "eval_accuracy": 0.9444444444444444,
704
+ "eval_loss": 0.414422869682312,
705
+ "eval_runtime": 0.5759,
706
+ "eval_samples_per_second": 31.256,
707
+ "eval_steps_per_second": 5.209,
708
+ "step": 693
709
+ },
710
+ {
711
+ "epoch": 78.0,
712
+ "eval_accuracy": 1.0,
713
+ "eval_loss": 0.35500773787498474,
714
+ "eval_runtime": 0.5802,
715
+ "eval_samples_per_second": 31.024,
716
+ "eval_steps_per_second": 5.171,
717
+ "step": 702
718
+ },
719
+ {
720
+ "epoch": 79.0,
721
+ "eval_accuracy": 0.9444444444444444,
722
+ "eval_loss": 0.3838707506656647,
723
+ "eval_runtime": 0.5616,
724
+ "eval_samples_per_second": 32.052,
725
+ "eval_steps_per_second": 5.342,
726
+ "step": 711
727
+ },
728
+ {
729
+ "epoch": 80.0,
730
+ "eval_accuracy": 0.8888888888888888,
731
+ "eval_loss": 0.437086820602417,
732
+ "eval_runtime": 0.554,
733
+ "eval_samples_per_second": 32.49,
734
+ "eval_steps_per_second": 5.415,
735
+ "step": 720
736
+ },
737
+ {
738
+ "epoch": 81.0,
739
+ "eval_accuracy": 0.8888888888888888,
740
+ "eval_loss": 0.36943355202674866,
741
+ "eval_runtime": 0.5894,
742
+ "eval_samples_per_second": 30.537,
743
+ "eval_steps_per_second": 5.09,
744
+ "step": 729
745
+ },
746
+ {
747
+ "epoch": 82.0,
748
+ "eval_accuracy": 0.8888888888888888,
749
+ "eval_loss": 0.36648380756378174,
750
+ "eval_runtime": 0.5615,
751
+ "eval_samples_per_second": 32.058,
752
+ "eval_steps_per_second": 5.343,
753
+ "step": 738
754
+ },
755
+ {
756
+ "epoch": 83.0,
757
+ "eval_accuracy": 0.8888888888888888,
758
+ "eval_loss": 0.4732191264629364,
759
+ "eval_runtime": 0.5632,
760
+ "eval_samples_per_second": 31.959,
761
+ "eval_steps_per_second": 5.327,
762
+ "step": 747
763
+ },
764
+ {
765
+ "epoch": 84.0,
766
+ "eval_accuracy": 0.9444444444444444,
767
+ "eval_loss": 0.4652000665664673,
768
+ "eval_runtime": 0.592,
769
+ "eval_samples_per_second": 30.406,
770
+ "eval_steps_per_second": 5.068,
771
+ "step": 756
772
+ },
773
+ {
774
+ "epoch": 85.0,
775
+ "eval_accuracy": 0.8888888888888888,
776
+ "eval_loss": 0.670432448387146,
777
+ "eval_runtime": 0.5801,
778
+ "eval_samples_per_second": 31.028,
779
+ "eval_steps_per_second": 5.171,
780
+ "step": 765
781
+ },
782
+ {
783
+ "epoch": 86.0,
784
+ "eval_accuracy": 0.8333333333333334,
785
+ "eval_loss": 0.7354382872581482,
786
+ "eval_runtime": 0.5599,
787
+ "eval_samples_per_second": 32.151,
788
+ "eval_steps_per_second": 5.359,
789
+ "step": 774
790
+ },
791
+ {
792
+ "epoch": 87.0,
793
+ "eval_accuracy": 0.8333333333333334,
794
+ "eval_loss": 0.5439589023590088,
795
+ "eval_runtime": 0.5486,
796
+ "eval_samples_per_second": 32.811,
797
+ "eval_steps_per_second": 5.468,
798
+ "step": 783
799
+ },
800
+ {
801
+ "epoch": 88.0,
802
+ "eval_accuracy": 0.8888888888888888,
803
+ "eval_loss": 0.4809061586856842,
804
+ "eval_runtime": 0.6101,
805
+ "eval_samples_per_second": 29.505,
806
+ "eval_steps_per_second": 4.917,
807
+ "step": 792
808
+ },
809
+ {
810
+ "epoch": 89.0,
811
+ "eval_accuracy": 0.9444444444444444,
812
+ "eval_loss": 0.6704312562942505,
813
+ "eval_runtime": 0.5954,
814
+ "eval_samples_per_second": 30.229,
815
+ "eval_steps_per_second": 5.038,
816
+ "step": 801
817
+ },
818
+ {
819
+ "epoch": 90.0,
820
+ "eval_accuracy": 0.8888888888888888,
821
+ "eval_loss": 0.41327810287475586,
822
+ "eval_runtime": 0.5783,
823
+ "eval_samples_per_second": 31.124,
824
+ "eval_steps_per_second": 5.187,
825
+ "step": 810
826
+ },
827
+ {
828
+ "epoch": 91.0,
829
+ "eval_accuracy": 0.9444444444444444,
830
+ "eval_loss": 0.350969523191452,
831
+ "eval_runtime": 0.5507,
832
+ "eval_samples_per_second": 32.686,
833
+ "eval_steps_per_second": 5.448,
834
+ "step": 819
835
+ },
836
+ {
837
+ "epoch": 92.0,
838
+ "eval_accuracy": 0.8888888888888888,
839
+ "eval_loss": 0.39819759130477905,
840
+ "eval_runtime": 0.5465,
841
+ "eval_samples_per_second": 32.939,
842
+ "eval_steps_per_second": 5.49,
843
+ "step": 828
844
+ },
845
+ {
846
+ "epoch": 93.0,
847
+ "eval_accuracy": 0.9444444444444444,
848
+ "eval_loss": 0.34932640194892883,
849
+ "eval_runtime": 0.5635,
850
+ "eval_samples_per_second": 31.941,
851
+ "eval_steps_per_second": 5.323,
852
+ "step": 837
853
+ },
854
+ {
855
+ "epoch": 94.0,
856
+ "eval_accuracy": 0.8888888888888888,
857
+ "eval_loss": 0.48363664746284485,
858
+ "eval_runtime": 0.5411,
859
+ "eval_samples_per_second": 33.263,
860
+ "eval_steps_per_second": 5.544,
861
+ "step": 846
862
+ },
863
+ {
864
+ "epoch": 95.0,
865
+ "eval_accuracy": 0.9444444444444444,
866
+ "eval_loss": 0.4434005320072174,
867
+ "eval_runtime": 0.5964,
868
+ "eval_samples_per_second": 30.181,
869
+ "eval_steps_per_second": 5.03,
870
+ "step": 855
871
+ },
872
+ {
873
+ "epoch": 96.0,
874
+ "eval_accuracy": 0.8333333333333334,
875
+ "eval_loss": 0.4290742874145508,
876
+ "eval_runtime": 0.5584,
877
+ "eval_samples_per_second": 32.235,
878
+ "eval_steps_per_second": 5.373,
879
+ "step": 864
880
+ },
881
+ {
882
+ "epoch": 97.0,
883
+ "eval_accuracy": 0.9444444444444444,
884
+ "eval_loss": 0.34131091833114624,
885
+ "eval_runtime": 0.5715,
886
+ "eval_samples_per_second": 31.493,
887
+ "eval_steps_per_second": 5.249,
888
+ "step": 873
889
+ },
890
+ {
891
+ "epoch": 98.0,
892
+ "eval_accuracy": 0.8888888888888888,
893
+ "eval_loss": 0.3645610511302948,
894
+ "eval_runtime": 0.5506,
895
+ "eval_samples_per_second": 32.692,
896
+ "eval_steps_per_second": 5.449,
897
+ "step": 882
898
+ },
899
+ {
900
+ "epoch": 99.0,
901
+ "eval_accuracy": 0.9444444444444444,
902
+ "eval_loss": 0.5591509938240051,
903
+ "eval_runtime": 0.5908,
904
+ "eval_samples_per_second": 30.468,
905
+ "eval_steps_per_second": 5.078,
906
+ "step": 891
907
+ },
908
+ {
909
+ "epoch": 100.0,
910
+ "eval_accuracy": 0.9444444444444444,
911
+ "eval_loss": 0.48491573333740234,
912
+ "eval_runtime": 0.5689,
913
+ "eval_samples_per_second": 31.643,
914
+ "eval_steps_per_second": 5.274,
915
+ "step": 900
916
+ }
917
+ ],
918
+ "logging_steps": 500,
919
+ "max_steps": 900,
920
+ "num_train_epochs": 100,
921
+ "save_steps": 500,
922
+ "total_flos": 1.507976427331584e+17,
923
+ "trial_name": null,
924
+ "trial_params": null
925
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96faf171c2424278071615eee0427e37d71d76381d743e379faddb5bf4bd032
3
+ size 4472