emre570 commited on
Commit
e4d7864
·
verified ·
1 Parent(s): 2abb154

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-large-patch16-224",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "benign",
13
+ "1": "malignant",
14
+ "2": "normal"
15
+ },
16
+ "image_size": 224,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 4096,
19
+ "label2id": {
20
+ "benign": 0,
21
+ "malignant": 1,
22
+ "normal": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 16,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 24,
29
+ "patch_size": 16,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.38.2"
34
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c4a95261002257a92f9846cc25647c5bb43a07e019270f7a361648c3ce3b3c
3
+ size 1213265372
optimizer-002.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:823ac7b2a83e6142bbf00c0667d6cf819b1ae0aeb302ada9c6b8430abc3c6591
3
+ size 2426765738
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83b61c28272cbf9d5d9437321b135f15aae0e7d3ae86b73db04bec06ed879451
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f820865d11b90edd2dc3d1d4297827aba983623b9bd59f2cde35bb8f14d7a8b
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9358974358974359,
3
+ "best_model_checkpoint": "output-models/checkpoint-470",
4
+ "epoch": 40.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1880,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "step": 47,
14
+ "train_accuracy": 0.7072649572649573,
15
+ "train_loss": 0.6782774329185486,
16
+ "train_runtime": 13.5956,
17
+ "train_samples_per_second": 34.423,
18
+ "train_steps_per_second": 8.606
19
+ },
20
+ {
21
+ "epoch": 1.0,
22
+ "eval_accuracy": 0.7243589743589743,
23
+ "eval_loss": 0.6180987358093262,
24
+ "eval_runtime": 35.9375,
25
+ "eval_samples_per_second": 4.341,
26
+ "eval_steps_per_second": 1.085,
27
+ "step": 47
28
+ },
29
+ {
30
+ "epoch": 2.0,
31
+ "step": 94,
32
+ "train_accuracy": 0.75,
33
+ "train_loss": 0.5690865516662598,
34
+ "train_runtime": 13.7448,
35
+ "train_samples_per_second": 34.049,
36
+ "train_steps_per_second": 8.512
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.8012820512820513,
41
+ "eval_loss": 0.5130882263183594,
42
+ "eval_runtime": 4.4157,
43
+ "eval_samples_per_second": 35.328,
44
+ "eval_steps_per_second": 8.832,
45
+ "step": 94
46
+ },
47
+ {
48
+ "epoch": 3.0,
49
+ "step": 141,
50
+ "train_accuracy": 0.7350427350427351,
51
+ "train_loss": 0.6460429430007935,
52
+ "train_runtime": 14.0717,
53
+ "train_samples_per_second": 33.258,
54
+ "train_steps_per_second": 8.315
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.8205128205128205,
59
+ "eval_loss": 0.45179083943367004,
60
+ "eval_runtime": 4.8055,
61
+ "eval_samples_per_second": 32.463,
62
+ "eval_steps_per_second": 8.116,
63
+ "step": 141
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "step": 188,
68
+ "train_accuracy": 0.8098290598290598,
69
+ "train_loss": 0.39663246273994446,
70
+ "train_runtime": 13.7878,
71
+ "train_samples_per_second": 33.943,
72
+ "train_steps_per_second": 8.486
73
+ },
74
+ {
75
+ "epoch": 4.0,
76
+ "eval_accuracy": 0.8846153846153846,
77
+ "eval_loss": 0.2980358898639679,
78
+ "eval_runtime": 4.4674,
79
+ "eval_samples_per_second": 34.919,
80
+ "eval_steps_per_second": 8.73,
81
+ "step": 188
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "step": 235,
86
+ "train_accuracy": 0.8632478632478633,
87
+ "train_loss": 0.3636291027069092,
88
+ "train_runtime": 13.58,
89
+ "train_samples_per_second": 34.462,
90
+ "train_steps_per_second": 8.616
91
+ },
92
+ {
93
+ "epoch": 5.0,
94
+ "eval_accuracy": 0.8717948717948718,
95
+ "eval_loss": 0.2997760772705078,
96
+ "eval_runtime": 4.9986,
97
+ "eval_samples_per_second": 31.208,
98
+ "eval_steps_per_second": 7.802,
99
+ "step": 235
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "step": 282,
104
+ "train_accuracy": 0.8376068376068376,
105
+ "train_loss": 0.4208720922470093,
106
+ "train_runtime": 13.5735,
107
+ "train_samples_per_second": 34.479,
108
+ "train_steps_per_second": 8.62
109
+ },
110
+ {
111
+ "epoch": 6.0,
112
+ "eval_accuracy": 0.9102564102564102,
113
+ "eval_loss": 0.33183348178863525,
114
+ "eval_runtime": 4.4091,
115
+ "eval_samples_per_second": 35.381,
116
+ "eval_steps_per_second": 8.845,
117
+ "step": 282
118
+ },
119
+ {
120
+ "epoch": 7.0,
121
+ "step": 329,
122
+ "train_accuracy": 0.8568376068376068,
123
+ "train_loss": 0.32207924127578735,
124
+ "train_runtime": 13.5635,
125
+ "train_samples_per_second": 34.504,
126
+ "train_steps_per_second": 8.626
127
+ },
128
+ {
129
+ "epoch": 7.0,
130
+ "eval_accuracy": 0.9166666666666666,
131
+ "eval_loss": 0.24086996912956238,
132
+ "eval_runtime": 4.7402,
133
+ "eval_samples_per_second": 32.91,
134
+ "eval_steps_per_second": 8.228,
135
+ "step": 329
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "step": 376,
140
+ "train_accuracy": 0.8846153846153846,
141
+ "train_loss": 0.3257001042366028,
142
+ "train_runtime": 13.9348,
143
+ "train_samples_per_second": 33.585,
144
+ "train_steps_per_second": 8.396
145
+ },
146
+ {
147
+ "epoch": 8.0,
148
+ "eval_accuracy": 0.8717948717948718,
149
+ "eval_loss": 0.3424080014228821,
150
+ "eval_runtime": 4.9539,
151
+ "eval_samples_per_second": 31.49,
152
+ "eval_steps_per_second": 7.873,
153
+ "step": 376
154
+ },
155
+ {
156
+ "epoch": 9.0,
157
+ "step": 423,
158
+ "train_accuracy": 0.8717948717948718,
159
+ "train_loss": 0.2687961757183075,
160
+ "train_runtime": 13.6829,
161
+ "train_samples_per_second": 34.203,
162
+ "train_steps_per_second": 8.551
163
+ },
164
+ {
165
+ "epoch": 9.0,
166
+ "eval_accuracy": 0.9166666666666666,
167
+ "eval_loss": 0.25928938388824463,
168
+ "eval_runtime": 4.4508,
169
+ "eval_samples_per_second": 35.05,
170
+ "eval_steps_per_second": 8.762,
171
+ "step": 423
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "step": 470,
176
+ "train_accuracy": 0.9102564102564102,
177
+ "train_loss": 0.2113831341266632,
178
+ "train_runtime": 13.44,
179
+ "train_samples_per_second": 34.821,
180
+ "train_steps_per_second": 8.705
181
+ },
182
+ {
183
+ "epoch": 10.0,
184
+ "eval_accuracy": 0.9358974358974359,
185
+ "eval_loss": 0.25084006786346436,
186
+ "eval_runtime": 4.8092,
187
+ "eval_samples_per_second": 32.438,
188
+ "eval_steps_per_second": 8.109,
189
+ "step": 470
190
+ },
191
+ {
192
+ "epoch": 10.64,
193
+ "grad_norm": 7.874776840209961,
194
+ "learning_rate": 1.4680851063829789e-05,
195
+ "loss": 0.4595,
196
+ "step": 500
197
+ },
198
+ {
199
+ "epoch": 11.0,
200
+ "step": 517,
201
+ "train_accuracy": 0.8995726495726496,
202
+ "train_loss": 0.25938984751701355,
203
+ "train_runtime": 13.5841,
204
+ "train_samples_per_second": 34.452,
205
+ "train_steps_per_second": 8.613
206
+ },
207
+ {
208
+ "epoch": 11.0,
209
+ "eval_accuracy": 0.9294871794871795,
210
+ "eval_loss": 0.32409772276878357,
211
+ "eval_runtime": 4.4182,
212
+ "eval_samples_per_second": 35.308,
213
+ "eval_steps_per_second": 8.827,
214
+ "step": 517
215
+ },
216
+ {
217
+ "epoch": 12.0,
218
+ "step": 564,
219
+ "train_accuracy": 0.8952991452991453,
220
+ "train_loss": 0.26763853430747986,
221
+ "train_runtime": 13.8203,
222
+ "train_samples_per_second": 33.863,
223
+ "train_steps_per_second": 8.466
224
+ },
225
+ {
226
+ "epoch": 12.0,
227
+ "eval_accuracy": 0.8910256410256411,
228
+ "eval_loss": 0.3308241069316864,
229
+ "eval_runtime": 4.4447,
230
+ "eval_samples_per_second": 35.098,
231
+ "eval_steps_per_second": 8.774,
232
+ "step": 564
233
+ },
234
+ {
235
+ "epoch": 13.0,
236
+ "step": 611,
237
+ "train_accuracy": 0.9081196581196581,
238
+ "train_loss": 0.23129615187644958,
239
+ "train_runtime": 13.4973,
240
+ "train_samples_per_second": 34.674,
241
+ "train_steps_per_second": 8.668
242
+ },
243
+ {
244
+ "epoch": 13.0,
245
+ "eval_accuracy": 0.9294871794871795,
246
+ "eval_loss": 0.255931556224823,
247
+ "eval_runtime": 4.6436,
248
+ "eval_samples_per_second": 33.595,
249
+ "eval_steps_per_second": 8.399,
250
+ "step": 611
251
+ },
252
+ {
253
+ "epoch": 14.0,
254
+ "step": 658,
255
+ "train_accuracy": 0.8846153846153846,
256
+ "train_loss": 0.31185245513916016,
257
+ "train_runtime": 13.5189,
258
+ "train_samples_per_second": 34.618,
259
+ "train_steps_per_second": 8.655
260
+ },
261
+ {
262
+ "epoch": 14.0,
263
+ "eval_accuracy": 0.9166666666666666,
264
+ "eval_loss": 0.3017214834690094,
265
+ "eval_runtime": 4.6078,
266
+ "eval_samples_per_second": 33.856,
267
+ "eval_steps_per_second": 8.464,
268
+ "step": 658
269
+ },
270
+ {
271
+ "epoch": 15.0,
272
+ "step": 705,
273
+ "train_accuracy": 0.9166666666666666,
274
+ "train_loss": 0.3007480502128601,
275
+ "train_runtime": 13.4373,
276
+ "train_samples_per_second": 34.829,
277
+ "train_steps_per_second": 8.707
278
+ },
279
+ {
280
+ "epoch": 15.0,
281
+ "eval_accuracy": 0.9038461538461539,
282
+ "eval_loss": 0.4220944046974182,
283
+ "eval_runtime": 4.9304,
284
+ "eval_samples_per_second": 31.64,
285
+ "eval_steps_per_second": 7.91,
286
+ "step": 705
287
+ },
288
+ {
289
+ "epoch": 16.0,
290
+ "step": 752,
291
+ "train_accuracy": 0.9188034188034188,
292
+ "train_loss": 0.20939397811889648,
293
+ "train_runtime": 13.6683,
294
+ "train_samples_per_second": 34.24,
295
+ "train_steps_per_second": 8.56
296
+ },
297
+ {
298
+ "epoch": 16.0,
299
+ "eval_accuracy": 0.9166666666666666,
300
+ "eval_loss": 0.36174264550209045,
301
+ "eval_runtime": 4.4608,
302
+ "eval_samples_per_second": 34.971,
303
+ "eval_steps_per_second": 8.743,
304
+ "step": 752
305
+ },
306
+ {
307
+ "epoch": 17.0,
308
+ "step": 799,
309
+ "train_accuracy": 0.9209401709401709,
310
+ "train_loss": 0.18879051506519318,
311
+ "train_runtime": 13.7929,
312
+ "train_samples_per_second": 33.931,
313
+ "train_steps_per_second": 8.483
314
+ },
315
+ {
316
+ "epoch": 17.0,
317
+ "eval_accuracy": 0.9102564102564102,
318
+ "eval_loss": 0.35188791155815125,
319
+ "eval_runtime": 4.7732,
320
+ "eval_samples_per_second": 32.683,
321
+ "eval_steps_per_second": 8.171,
322
+ "step": 799
323
+ },
324
+ {
325
+ "epoch": 18.0,
326
+ "step": 846,
327
+ "train_accuracy": 0.8952991452991453,
328
+ "train_loss": 0.25016605854034424,
329
+ "train_runtime": 13.3521,
330
+ "train_samples_per_second": 35.051,
331
+ "train_steps_per_second": 8.763
332
+ },
333
+ {
334
+ "epoch": 18.0,
335
+ "eval_accuracy": 0.9102564102564102,
336
+ "eval_loss": 0.3965354859828949,
337
+ "eval_runtime": 4.3963,
338
+ "eval_samples_per_second": 35.485,
339
+ "eval_steps_per_second": 8.871,
340
+ "step": 846
341
+ },
342
+ {
343
+ "epoch": 19.0,
344
+ "step": 893,
345
+ "train_accuracy": 0.9209401709401709,
346
+ "train_loss": 0.1891285479068756,
347
+ "train_runtime": 13.4884,
348
+ "train_samples_per_second": 34.696,
349
+ "train_steps_per_second": 8.674
350
+ },
351
+ {
352
+ "epoch": 19.0,
353
+ "eval_accuracy": 0.9038461538461539,
354
+ "eval_loss": 0.31604066491127014,
355
+ "eval_runtime": 5.1415,
356
+ "eval_samples_per_second": 30.341,
357
+ "eval_steps_per_second": 7.585,
358
+ "step": 893
359
+ },
360
+ {
361
+ "epoch": 20.0,
362
+ "step": 940,
363
+ "train_accuracy": 0.9401709401709402,
364
+ "train_loss": 0.1873449832201004,
365
+ "train_runtime": 13.9057,
366
+ "train_samples_per_second": 33.655,
367
+ "train_steps_per_second": 8.414
368
+ },
369
+ {
370
+ "epoch": 20.0,
371
+ "eval_accuracy": 0.9294871794871795,
372
+ "eval_loss": 0.3332672119140625,
373
+ "eval_runtime": 4.9421,
374
+ "eval_samples_per_second": 31.565,
375
+ "eval_steps_per_second": 7.891,
376
+ "step": 940
377
+ },
378
+ {
379
+ "epoch": 21.0,
380
+ "step": 987,
381
+ "train_accuracy": 0.9230769230769231,
382
+ "train_loss": 0.18881197273731232,
383
+ "train_runtime": 13.5338,
384
+ "train_samples_per_second": 34.58,
385
+ "train_steps_per_second": 8.645
386
+ },
387
+ {
388
+ "epoch": 21.0,
389
+ "eval_accuracy": 0.8910256410256411,
390
+ "eval_loss": 0.3720751404762268,
391
+ "eval_runtime": 4.8223,
392
+ "eval_samples_per_second": 32.35,
393
+ "eval_steps_per_second": 8.088,
394
+ "step": 987
395
+ },
396
+ {
397
+ "epoch": 21.28,
398
+ "grad_norm": 5.682499408721924,
399
+ "learning_rate": 9.361702127659576e-06,
400
+ "loss": 0.2485,
401
+ "step": 1000
402
+ },
403
+ {
404
+ "epoch": 22.0,
405
+ "step": 1034,
406
+ "train_accuracy": 0.9444444444444444,
407
+ "train_loss": 0.1338244080543518,
408
+ "train_runtime": 13.6664,
409
+ "train_samples_per_second": 34.245,
410
+ "train_steps_per_second": 8.561
411
+ },
412
+ {
413
+ "epoch": 22.0,
414
+ "eval_accuracy": 0.9166666666666666,
415
+ "eval_loss": 0.3777410686016083,
416
+ "eval_runtime": 4.3337,
417
+ "eval_samples_per_second": 35.997,
418
+ "eval_steps_per_second": 8.999,
419
+ "step": 1034
420
+ },
421
+ {
422
+ "epoch": 23.0,
423
+ "step": 1081,
424
+ "train_accuracy": 0.9252136752136753,
425
+ "train_loss": 0.18711484968662262,
426
+ "train_runtime": 13.702,
427
+ "train_samples_per_second": 34.155,
428
+ "train_steps_per_second": 8.539
429
+ },
430
+ {
431
+ "epoch": 23.0,
432
+ "eval_accuracy": 0.9038461538461539,
433
+ "eval_loss": 0.3984796702861786,
434
+ "eval_runtime": 4.5267,
435
+ "eval_samples_per_second": 34.462,
436
+ "eval_steps_per_second": 8.616,
437
+ "step": 1081
438
+ },
439
+ {
440
+ "epoch": 24.0,
441
+ "step": 1128,
442
+ "train_accuracy": 0.9444444444444444,
443
+ "train_loss": 0.161672905087471,
444
+ "train_runtime": 13.9523,
445
+ "train_samples_per_second": 33.543,
446
+ "train_steps_per_second": 8.386
447
+ },
448
+ {
449
+ "epoch": 24.0,
450
+ "eval_accuracy": 0.9230769230769231,
451
+ "eval_loss": 0.38641923666000366,
452
+ "eval_runtime": 5.1128,
453
+ "eval_samples_per_second": 30.511,
454
+ "eval_steps_per_second": 7.628,
455
+ "step": 1128
456
+ },
457
+ {
458
+ "epoch": 25.0,
459
+ "step": 1175,
460
+ "train_accuracy": 0.9444444444444444,
461
+ "train_loss": 0.17891307175159454,
462
+ "train_runtime": 13.7483,
463
+ "train_samples_per_second": 34.041,
464
+ "train_steps_per_second": 8.51
465
+ },
466
+ {
467
+ "epoch": 25.0,
468
+ "eval_accuracy": 0.9230769230769231,
469
+ "eval_loss": 0.42098188400268555,
470
+ "eval_runtime": 5.0358,
471
+ "eval_samples_per_second": 30.978,
472
+ "eval_steps_per_second": 7.745,
473
+ "step": 1175
474
+ },
475
+ {
476
+ "epoch": 26.0,
477
+ "step": 1222,
478
+ "train_accuracy": 0.9572649572649573,
479
+ "train_loss": 0.10899731516838074,
480
+ "train_runtime": 14.0913,
481
+ "train_samples_per_second": 33.212,
482
+ "train_steps_per_second": 8.303
483
+ },
484
+ {
485
+ "epoch": 26.0,
486
+ "eval_accuracy": 0.9038461538461539,
487
+ "eval_loss": 0.4160342216491699,
488
+ "eval_runtime": 4.7918,
489
+ "eval_samples_per_second": 32.555,
490
+ "eval_steps_per_second": 8.139,
491
+ "step": 1222
492
+ },
493
+ {
494
+ "epoch": 27.0,
495
+ "step": 1269,
496
+ "train_accuracy": 0.938034188034188,
497
+ "train_loss": 0.16018715500831604,
498
+ "train_runtime": 13.888,
499
+ "train_samples_per_second": 33.698,
500
+ "train_steps_per_second": 8.425
501
+ },
502
+ {
503
+ "epoch": 27.0,
504
+ "eval_accuracy": 0.9102564102564102,
505
+ "eval_loss": 0.39854034781455994,
506
+ "eval_runtime": 4.8553,
507
+ "eval_samples_per_second": 32.13,
508
+ "eval_steps_per_second": 8.032,
509
+ "step": 1269
510
+ },
511
+ {
512
+ "epoch": 28.0,
513
+ "step": 1316,
514
+ "train_accuracy": 0.9444444444444444,
515
+ "train_loss": 0.14988763630390167,
516
+ "train_runtime": 13.7687,
517
+ "train_samples_per_second": 33.99,
518
+ "train_steps_per_second": 8.498
519
+ },
520
+ {
521
+ "epoch": 28.0,
522
+ "eval_accuracy": 0.9102564102564102,
523
+ "eval_loss": 0.40767335891723633,
524
+ "eval_runtime": 4.434,
525
+ "eval_samples_per_second": 35.182,
526
+ "eval_steps_per_second": 8.796,
527
+ "step": 1316
528
+ },
529
+ {
530
+ "epoch": 29.0,
531
+ "step": 1363,
532
+ "train_accuracy": 0.9316239316239316,
533
+ "train_loss": 0.17876969277858734,
534
+ "train_runtime": 13.3686,
535
+ "train_samples_per_second": 35.008,
536
+ "train_steps_per_second": 8.752
537
+ },
538
+ {
539
+ "epoch": 29.0,
540
+ "eval_accuracy": 0.8782051282051282,
541
+ "eval_loss": 0.6035234928131104,
542
+ "eval_runtime": 4.8916,
543
+ "eval_samples_per_second": 31.891,
544
+ "eval_steps_per_second": 7.973,
545
+ "step": 1363
546
+ },
547
+ {
548
+ "epoch": 30.0,
549
+ "step": 1410,
550
+ "train_accuracy": 0.9572649572649573,
551
+ "train_loss": 0.12170404940843582,
552
+ "train_runtime": 13.3084,
553
+ "train_samples_per_second": 35.166,
554
+ "train_steps_per_second": 8.791
555
+ },
556
+ {
557
+ "epoch": 30.0,
558
+ "eval_accuracy": 0.9230769230769231,
559
+ "eval_loss": 0.3604837954044342,
560
+ "eval_runtime": 4.5664,
561
+ "eval_samples_per_second": 34.163,
562
+ "eval_steps_per_second": 8.541,
563
+ "step": 1410
564
+ },
565
+ {
566
+ "epoch": 31.0,
567
+ "step": 1457,
568
+ "train_accuracy": 0.9444444444444444,
569
+ "train_loss": 0.17193935811519623,
570
+ "train_runtime": 13.3567,
571
+ "train_samples_per_second": 35.039,
572
+ "train_steps_per_second": 8.76
573
+ },
574
+ {
575
+ "epoch": 31.0,
576
+ "eval_accuracy": 0.9038461538461539,
577
+ "eval_loss": 0.42400404810905457,
578
+ "eval_runtime": 4.3974,
579
+ "eval_samples_per_second": 35.475,
580
+ "eval_steps_per_second": 8.869,
581
+ "step": 1457
582
+ },
583
+ {
584
+ "epoch": 31.91,
585
+ "grad_norm": 0.40821418166160583,
586
+ "learning_rate": 4.042553191489362e-06,
587
+ "loss": 0.1715,
588
+ "step": 1500
589
+ },
590
+ {
591
+ "epoch": 32.0,
592
+ "step": 1504,
593
+ "train_accuracy": 0.9551282051282052,
594
+ "train_loss": 0.15071353316307068,
595
+ "train_runtime": 13.338,
596
+ "train_samples_per_second": 35.088,
597
+ "train_steps_per_second": 8.772
598
+ },
599
+ {
600
+ "epoch": 32.0,
601
+ "eval_accuracy": 0.9166666666666666,
602
+ "eval_loss": 0.3800322711467743,
603
+ "eval_runtime": 4.4234,
604
+ "eval_samples_per_second": 35.267,
605
+ "eval_steps_per_second": 8.817,
606
+ "step": 1504
607
+ },
608
+ {
609
+ "epoch": 33.0,
610
+ "step": 1551,
611
+ "train_accuracy": 0.9423076923076923,
612
+ "train_loss": 0.15298214554786682,
613
+ "train_runtime": 13.6206,
614
+ "train_samples_per_second": 34.36,
615
+ "train_steps_per_second": 8.59
616
+ },
617
+ {
618
+ "epoch": 33.0,
619
+ "eval_accuracy": 0.9038461538461539,
620
+ "eval_loss": 0.42538413405418396,
621
+ "eval_runtime": 4.5076,
622
+ "eval_samples_per_second": 34.608,
623
+ "eval_steps_per_second": 8.652,
624
+ "step": 1551
625
+ },
626
+ {
627
+ "epoch": 34.0,
628
+ "step": 1598,
629
+ "train_accuracy": 0.9615384615384616,
630
+ "train_loss": 0.09072276204824448,
631
+ "train_runtime": 13.7151,
632
+ "train_samples_per_second": 34.123,
633
+ "train_steps_per_second": 8.531
634
+ },
635
+ {
636
+ "epoch": 34.0,
637
+ "eval_accuracy": 0.9230769230769231,
638
+ "eval_loss": 0.41397902369499207,
639
+ "eval_runtime": 4.448,
640
+ "eval_samples_per_second": 35.072,
641
+ "eval_steps_per_second": 8.768,
642
+ "step": 1598
643
+ },
644
+ {
645
+ "epoch": 35.0,
646
+ "step": 1645,
647
+ "train_accuracy": 0.9594017094017094,
648
+ "train_loss": 0.15199129283428192,
649
+ "train_runtime": 13.6214,
650
+ "train_samples_per_second": 34.358,
651
+ "train_steps_per_second": 8.589
652
+ },
653
+ {
654
+ "epoch": 35.0,
655
+ "eval_accuracy": 0.9230769230769231,
656
+ "eval_loss": 0.39104607701301575,
657
+ "eval_runtime": 4.4304,
658
+ "eval_samples_per_second": 35.211,
659
+ "eval_steps_per_second": 8.803,
660
+ "step": 1645
661
+ },
662
+ {
663
+ "epoch": 36.0,
664
+ "step": 1692,
665
+ "train_accuracy": 0.9594017094017094,
666
+ "train_loss": 0.134719118475914,
667
+ "train_runtime": 13.9053,
668
+ "train_samples_per_second": 33.656,
669
+ "train_steps_per_second": 8.414
670
+ },
671
+ {
672
+ "epoch": 36.0,
673
+ "eval_accuracy": 0.9102564102564102,
674
+ "eval_loss": 0.4161369204521179,
675
+ "eval_runtime": 4.6871,
676
+ "eval_samples_per_second": 33.283,
677
+ "eval_steps_per_second": 8.321,
678
+ "step": 1692
679
+ },
680
+ {
681
+ "epoch": 37.0,
682
+ "step": 1739,
683
+ "train_accuracy": 0.9529914529914529,
684
+ "train_loss": 0.16535791754722595,
685
+ "train_runtime": 14.0143,
686
+ "train_samples_per_second": 33.394,
687
+ "train_steps_per_second": 8.349
688
+ },
689
+ {
690
+ "epoch": 37.0,
691
+ "eval_accuracy": 0.9102564102564102,
692
+ "eval_loss": 0.43847039341926575,
693
+ "eval_runtime": 4.8404,
694
+ "eval_samples_per_second": 32.229,
695
+ "eval_steps_per_second": 8.057,
696
+ "step": 1739
697
+ },
698
+ {
699
+ "epoch": 38.0,
700
+ "step": 1786,
701
+ "train_accuracy": 0.9487179487179487,
702
+ "train_loss": 0.11996147781610489,
703
+ "train_runtime": 14.1086,
704
+ "train_samples_per_second": 33.171,
705
+ "train_steps_per_second": 8.293
706
+ },
707
+ {
708
+ "epoch": 38.0,
709
+ "eval_accuracy": 0.9166666666666666,
710
+ "eval_loss": 0.41302695870399475,
711
+ "eval_runtime": 4.7702,
712
+ "eval_samples_per_second": 32.703,
713
+ "eval_steps_per_second": 8.176,
714
+ "step": 1786
715
+ },
716
+ {
717
+ "epoch": 39.0,
718
+ "step": 1833,
719
+ "train_accuracy": 0.9529914529914529,
720
+ "train_loss": 0.17013560235500336,
721
+ "train_runtime": 13.8197,
722
+ "train_samples_per_second": 33.865,
723
+ "train_steps_per_second": 8.466
724
+ },
725
+ {
726
+ "epoch": 39.0,
727
+ "eval_accuracy": 0.9166666666666666,
728
+ "eval_loss": 0.4181523323059082,
729
+ "eval_runtime": 5.0402,
730
+ "eval_samples_per_second": 30.951,
731
+ "eval_steps_per_second": 7.738,
732
+ "step": 1833
733
+ },
734
+ {
735
+ "epoch": 40.0,
736
+ "step": 1880,
737
+ "train_accuracy": 0.9551282051282052,
738
+ "train_loss": 0.11466003954410553,
739
+ "train_runtime": 13.6881,
740
+ "train_samples_per_second": 34.19,
741
+ "train_steps_per_second": 8.548
742
+ },
743
+ {
744
+ "epoch": 40.0,
745
+ "eval_accuracy": 0.9166666666666666,
746
+ "eval_loss": 0.4133930802345276,
747
+ "eval_runtime": 4.7653,
748
+ "eval_samples_per_second": 32.736,
749
+ "eval_steps_per_second": 8.184,
750
+ "step": 1880
751
+ }
752
+ ],
753
+ "logging_steps": 500,
754
+ "max_steps": 1880,
755
+ "num_input_tokens_seen": 0,
756
+ "num_train_epochs": 40,
757
+ "save_steps": 500,
758
+ "total_flos": 5.128065177052447e+18,
759
+ "train_batch_size": 10,
760
+ "trial_name": null,
761
+ "trial_params": null
762
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52687b65f9d6d4c07e09fec56b9cef39541528f990e60ca9175a8537158d8089
3
+ size 4856