awanicka commited on
Commit
2f0c90c
·
verified ·
1 Parent(s): 3217289

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -3
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +640 -0
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
 
6
  - generated_from_trainer
7
  datasets:
8
  - imagefolder
@@ -23,7 +25,7 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.9820627802690582
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,8 +35,8 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.0517
37
- - Accuracy: 0.9821
38
 
39
  ## Model description
40
 
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
+ - vision
8
  - generated_from_trainer
9
  datasets:
10
  - imagefolder
 
25
  metrics:
26
  - name: Accuracy
27
  type: accuracy
28
+ value: 0.9955156950672646
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
35
 
36
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
37
  It achieves the following results on the evaluation set:
38
+ - Loss: 0.0411
39
+ - Accuracy: 0.9955
40
 
41
  ## Model description
42
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9955156950672646,
4
+ "eval_loss": 0.0411064513027668,
5
+ "eval_runtime": 27.0318,
6
+ "eval_samples_per_second": 8.25,
7
+ "eval_steps_per_second": 1.036,
8
+ "total_flos": 4.893619144161485e+17,
9
+ "train_loss": 0.08755870511448836,
10
+ "train_runtime": 2689.1935,
11
+ "train_samples_per_second": 2.348,
12
+ "train_steps_per_second": 0.294
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9955156950672646,
4
+ "eval_loss": 0.0411064513027668,
5
+ "eval_runtime": 27.0318,
6
+ "eval_samples_per_second": 8.25,
7
+ "eval_steps_per_second": 1.036
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 4.893619144161485e+17,
4
+ "train_loss": 0.08755870511448836,
5
+ "train_runtime": 2689.1935,
6
+ "train_samples_per_second": 2.348,
7
+ "train_steps_per_second": 0.294
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,640 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0411064513027668,
3
+ "best_model_checkpoint": "./TransparentBagClassifier/checkpoint-316",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 790,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06329113924050633,
13
+ "grad_norm": 1.8177610635757446,
14
+ "learning_rate": 1.974683544303798e-05,
15
+ "loss": 0.6502,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.12658227848101267,
20
+ "grad_norm": 1.735235333442688,
21
+ "learning_rate": 1.949367088607595e-05,
22
+ "loss": 0.5886,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.189873417721519,
27
+ "grad_norm": 1.5866175889968872,
28
+ "learning_rate": 1.9240506329113926e-05,
29
+ "loss": 0.5145,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.25316455696202533,
34
+ "grad_norm": 2.1754307746887207,
35
+ "learning_rate": 1.89873417721519e-05,
36
+ "loss": 0.4214,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.31645569620253167,
41
+ "grad_norm": 1.926560401916504,
42
+ "learning_rate": 1.8734177215189874e-05,
43
+ "loss": 0.303,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.379746835443038,
48
+ "grad_norm": 1.284515619277954,
49
+ "learning_rate": 1.848101265822785e-05,
50
+ "loss": 0.2735,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.4430379746835443,
55
+ "grad_norm": 1.0484488010406494,
56
+ "learning_rate": 1.8227848101265824e-05,
57
+ "loss": 0.2382,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.5063291139240507,
62
+ "grad_norm": 0.6332635879516602,
63
+ "learning_rate": 1.7974683544303798e-05,
64
+ "loss": 0.1651,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.569620253164557,
69
+ "grad_norm": 0.7192436456680298,
70
+ "learning_rate": 1.7721518987341772e-05,
71
+ "loss": 0.1691,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.6329113924050633,
76
+ "grad_norm": 0.5396292209625244,
77
+ "learning_rate": 1.746835443037975e-05,
78
+ "loss": 0.1371,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.6962025316455697,
83
+ "grad_norm": 1.0010855197906494,
84
+ "learning_rate": 1.7215189873417723e-05,
85
+ "loss": 0.1054,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.759493670886076,
90
+ "grad_norm": 2.316347360610962,
91
+ "learning_rate": 1.6962025316455696e-05,
92
+ "loss": 0.1485,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.8227848101265823,
97
+ "grad_norm": 4.399435043334961,
98
+ "learning_rate": 1.6708860759493674e-05,
99
+ "loss": 0.0898,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.8860759493670886,
104
+ "grad_norm": 0.6504904627799988,
105
+ "learning_rate": 1.6455696202531647e-05,
106
+ "loss": 0.1151,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.9493670886075949,
111
+ "grad_norm": 0.26793766021728516,
112
+ "learning_rate": 1.620253164556962e-05,
113
+ "loss": 0.0694,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "eval_accuracy": 0.9820627802690582,
119
+ "eval_loss": 0.0719464123249054,
120
+ "eval_runtime": 29.347,
121
+ "eval_samples_per_second": 7.599,
122
+ "eval_steps_per_second": 0.954,
123
+ "step": 158
124
+ },
125
+ {
126
+ "epoch": 1.0126582278481013,
127
+ "grad_norm": 0.31842896342277527,
128
+ "learning_rate": 1.5949367088607598e-05,
129
+ "loss": 0.0743,
130
+ "step": 160
131
+ },
132
+ {
133
+ "epoch": 1.0759493670886076,
134
+ "grad_norm": 4.0135979652404785,
135
+ "learning_rate": 1.5696202531645572e-05,
136
+ "loss": 0.0614,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 1.139240506329114,
141
+ "grad_norm": 0.2785925567150116,
142
+ "learning_rate": 1.5443037974683546e-05,
143
+ "loss": 0.0635,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 1.2025316455696202,
148
+ "grad_norm": 0.21624621748924255,
149
+ "learning_rate": 1.5189873417721521e-05,
150
+ "loss": 0.0524,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.2658227848101267,
155
+ "grad_norm": 0.35363027453422546,
156
+ "learning_rate": 1.4936708860759495e-05,
157
+ "loss": 0.0342,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.3291139240506329,
162
+ "grad_norm": 1.0792415142059326,
163
+ "learning_rate": 1.468354430379747e-05,
164
+ "loss": 0.0497,
165
+ "step": 210
166
+ },
167
+ {
168
+ "epoch": 1.3924050632911391,
169
+ "grad_norm": 0.19490359723567963,
170
+ "learning_rate": 1.4430379746835444e-05,
171
+ "loss": 0.0591,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 1.4556962025316456,
176
+ "grad_norm": 0.16427624225616455,
177
+ "learning_rate": 1.417721518987342e-05,
178
+ "loss": 0.037,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.518987341772152,
183
+ "grad_norm": 7.667697429656982,
184
+ "learning_rate": 1.3924050632911395e-05,
185
+ "loss": 0.089,
186
+ "step": 240
187
+ },
188
+ {
189
+ "epoch": 1.5822784810126582,
190
+ "grad_norm": 0.15142761170864105,
191
+ "learning_rate": 1.3670886075949368e-05,
192
+ "loss": 0.1013,
193
+ "step": 250
194
+ },
195
+ {
196
+ "epoch": 1.6455696202531644,
197
+ "grad_norm": 3.7054553031921387,
198
+ "learning_rate": 1.3417721518987344e-05,
199
+ "loss": 0.077,
200
+ "step": 260
201
+ },
202
+ {
203
+ "epoch": 1.7088607594936709,
204
+ "grad_norm": 0.13252966105937958,
205
+ "learning_rate": 1.3164556962025317e-05,
206
+ "loss": 0.0528,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 1.7721518987341773,
211
+ "grad_norm": 0.14607028663158417,
212
+ "learning_rate": 1.2911392405063293e-05,
213
+ "loss": 0.1078,
214
+ "step": 280
215
+ },
216
+ {
217
+ "epoch": 1.8354430379746836,
218
+ "grad_norm": 0.17516593635082245,
219
+ "learning_rate": 1.2658227848101268e-05,
220
+ "loss": 0.0872,
221
+ "step": 290
222
+ },
223
+ {
224
+ "epoch": 1.8987341772151898,
225
+ "grad_norm": 0.4909473955631256,
226
+ "learning_rate": 1.240506329113924e-05,
227
+ "loss": 0.1023,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.9620253164556962,
232
+ "grad_norm": 17.056110382080078,
233
+ "learning_rate": 1.2151898734177216e-05,
234
+ "loss": 0.0871,
235
+ "step": 310
236
+ },
237
+ {
238
+ "epoch": 2.0,
239
+ "eval_accuracy": 0.9955156950672646,
240
+ "eval_loss": 0.0411064513027668,
241
+ "eval_runtime": 30.7571,
242
+ "eval_samples_per_second": 7.25,
243
+ "eval_steps_per_second": 0.91,
244
+ "step": 316
245
+ },
246
+ {
247
+ "epoch": 2.0253164556962027,
248
+ "grad_norm": 0.14253969490528107,
249
+ "learning_rate": 1.189873417721519e-05,
250
+ "loss": 0.0258,
251
+ "step": 320
252
+ },
253
+ {
254
+ "epoch": 2.088607594936709,
255
+ "grad_norm": 0.1595589965581894,
256
+ "learning_rate": 1.1645569620253165e-05,
257
+ "loss": 0.0181,
258
+ "step": 330
259
+ },
260
+ {
261
+ "epoch": 2.151898734177215,
262
+ "grad_norm": 0.11532098799943924,
263
+ "learning_rate": 1.139240506329114e-05,
264
+ "loss": 0.0369,
265
+ "step": 340
266
+ },
267
+ {
268
+ "epoch": 2.2151898734177213,
269
+ "grad_norm": 0.10694638639688492,
270
+ "learning_rate": 1.1139240506329114e-05,
271
+ "loss": 0.055,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 2.278481012658228,
276
+ "grad_norm": 0.11190392822027206,
277
+ "learning_rate": 1.088607594936709e-05,
278
+ "loss": 0.0981,
279
+ "step": 360
280
+ },
281
+ {
282
+ "epoch": 2.3417721518987342,
283
+ "grad_norm": 0.11986145377159119,
284
+ "learning_rate": 1.0632911392405063e-05,
285
+ "loss": 0.1199,
286
+ "step": 370
287
+ },
288
+ {
289
+ "epoch": 2.4050632911392404,
290
+ "grad_norm": 0.8623329401016235,
291
+ "learning_rate": 1.0379746835443039e-05,
292
+ "loss": 0.076,
293
+ "step": 380
294
+ },
295
+ {
296
+ "epoch": 2.4683544303797467,
297
+ "grad_norm": 0.34745362401008606,
298
+ "learning_rate": 1.0126582278481014e-05,
299
+ "loss": 0.0396,
300
+ "step": 390
301
+ },
302
+ {
303
+ "epoch": 2.5316455696202533,
304
+ "grad_norm": 2.8758838176727295,
305
+ "learning_rate": 9.87341772151899e-06,
306
+ "loss": 0.0168,
307
+ "step": 400
308
+ },
309
+ {
310
+ "epoch": 2.5949367088607596,
311
+ "grad_norm": 0.15997739136219025,
312
+ "learning_rate": 9.620253164556963e-06,
313
+ "loss": 0.0848,
314
+ "step": 410
315
+ },
316
+ {
317
+ "epoch": 2.6582278481012658,
318
+ "grad_norm": 0.1280534863471985,
319
+ "learning_rate": 9.367088607594937e-06,
320
+ "loss": 0.0895,
321
+ "step": 420
322
+ },
323
+ {
324
+ "epoch": 2.721518987341772,
325
+ "grad_norm": 0.09299108386039734,
326
+ "learning_rate": 9.113924050632912e-06,
327
+ "loss": 0.0217,
328
+ "step": 430
329
+ },
330
+ {
331
+ "epoch": 2.7848101265822782,
332
+ "grad_norm": 0.09745831787586212,
333
+ "learning_rate": 8.860759493670886e-06,
334
+ "loss": 0.0136,
335
+ "step": 440
336
+ },
337
+ {
338
+ "epoch": 2.848101265822785,
339
+ "grad_norm": 0.3660314977169037,
340
+ "learning_rate": 8.607594936708861e-06,
341
+ "loss": 0.066,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 2.911392405063291,
346
+ "grad_norm": 0.10188435018062592,
347
+ "learning_rate": 8.354430379746837e-06,
348
+ "loss": 0.0337,
349
+ "step": 460
350
+ },
351
+ {
352
+ "epoch": 2.9746835443037973,
353
+ "grad_norm": 0.08400288224220276,
354
+ "learning_rate": 8.10126582278481e-06,
355
+ "loss": 0.0561,
356
+ "step": 470
357
+ },
358
+ {
359
+ "epoch": 3.0,
360
+ "eval_accuracy": 0.9910313901345291,
361
+ "eval_loss": 0.041871435940265656,
362
+ "eval_runtime": 28.742,
363
+ "eval_samples_per_second": 7.759,
364
+ "eval_steps_per_second": 0.974,
365
+ "step": 474
366
+ },
367
+ {
368
+ "epoch": 3.037974683544304,
369
+ "grad_norm": 0.08712360262870789,
370
+ "learning_rate": 7.848101265822786e-06,
371
+ "loss": 0.0165,
372
+ "step": 480
373
+ },
374
+ {
375
+ "epoch": 3.1012658227848102,
376
+ "grad_norm": 0.08917823433876038,
377
+ "learning_rate": 7.5949367088607605e-06,
378
+ "loss": 0.0788,
379
+ "step": 490
380
+ },
381
+ {
382
+ "epoch": 3.1645569620253164,
383
+ "grad_norm": 12.753520965576172,
384
+ "learning_rate": 7.341772151898735e-06,
385
+ "loss": 0.0523,
386
+ "step": 500
387
+ },
388
+ {
389
+ "epoch": 3.2278481012658227,
390
+ "grad_norm": 0.1649811565876007,
391
+ "learning_rate": 7.08860759493671e-06,
392
+ "loss": 0.0119,
393
+ "step": 510
394
+ },
395
+ {
396
+ "epoch": 3.291139240506329,
397
+ "grad_norm": 0.08447328209877014,
398
+ "learning_rate": 6.835443037974684e-06,
399
+ "loss": 0.0114,
400
+ "step": 520
401
+ },
402
+ {
403
+ "epoch": 3.3544303797468356,
404
+ "grad_norm": 0.48710471391677856,
405
+ "learning_rate": 6.582278481012659e-06,
406
+ "loss": 0.0506,
407
+ "step": 530
408
+ },
409
+ {
410
+ "epoch": 3.4177215189873418,
411
+ "grad_norm": 0.10614439100027084,
412
+ "learning_rate": 6.329113924050634e-06,
413
+ "loss": 0.0155,
414
+ "step": 540
415
+ },
416
+ {
417
+ "epoch": 3.481012658227848,
418
+ "grad_norm": 0.08778823167085648,
419
+ "learning_rate": 6.075949367088608e-06,
420
+ "loss": 0.0118,
421
+ "step": 550
422
+ },
423
+ {
424
+ "epoch": 3.5443037974683547,
425
+ "grad_norm": 0.08901780843734741,
426
+ "learning_rate": 5.8227848101265824e-06,
427
+ "loss": 0.0144,
428
+ "step": 560
429
+ },
430
+ {
431
+ "epoch": 3.607594936708861,
432
+ "grad_norm": 3.638420581817627,
433
+ "learning_rate": 5.569620253164557e-06,
434
+ "loss": 0.0123,
435
+ "step": 570
436
+ },
437
+ {
438
+ "epoch": 3.670886075949367,
439
+ "grad_norm": 0.07501719892024994,
440
+ "learning_rate": 5.3164556962025316e-06,
441
+ "loss": 0.0112,
442
+ "step": 580
443
+ },
444
+ {
445
+ "epoch": 3.7341772151898733,
446
+ "grad_norm": 0.07867681980133057,
447
+ "learning_rate": 5.063291139240507e-06,
448
+ "loss": 0.0175,
449
+ "step": 590
450
+ },
451
+ {
452
+ "epoch": 3.7974683544303796,
453
+ "grad_norm": 1.1297374963760376,
454
+ "learning_rate": 4.8101265822784815e-06,
455
+ "loss": 0.0109,
456
+ "step": 600
457
+ },
458
+ {
459
+ "epoch": 3.8607594936708862,
460
+ "grad_norm": 0.27534130215644836,
461
+ "learning_rate": 4.556962025316456e-06,
462
+ "loss": 0.0386,
463
+ "step": 610
464
+ },
465
+ {
466
+ "epoch": 3.9240506329113924,
467
+ "grad_norm": 0.07276225835084915,
468
+ "learning_rate": 4.303797468354431e-06,
469
+ "loss": 0.0186,
470
+ "step": 620
471
+ },
472
+ {
473
+ "epoch": 3.9873417721518987,
474
+ "grad_norm": 0.07785341143608093,
475
+ "learning_rate": 4.050632911392405e-06,
476
+ "loss": 0.0673,
477
+ "step": 630
478
+ },
479
+ {
480
+ "epoch": 4.0,
481
+ "eval_accuracy": 0.9865470852017937,
482
+ "eval_loss": 0.04242047667503357,
483
+ "eval_runtime": 41.253,
484
+ "eval_samples_per_second": 5.406,
485
+ "eval_steps_per_second": 0.679,
486
+ "step": 632
487
+ },
488
+ {
489
+ "epoch": 4.050632911392405,
490
+ "grad_norm": 0.07723096013069153,
491
+ "learning_rate": 3.7974683544303802e-06,
492
+ "loss": 0.0101,
493
+ "step": 640
494
+ },
495
+ {
496
+ "epoch": 4.113924050632911,
497
+ "grad_norm": 0.07696326822042465,
498
+ "learning_rate": 3.544303797468355e-06,
499
+ "loss": 0.0289,
500
+ "step": 650
501
+ },
502
+ {
503
+ "epoch": 4.177215189873418,
504
+ "grad_norm": 0.07799258828163147,
505
+ "learning_rate": 3.2911392405063294e-06,
506
+ "loss": 0.0604,
507
+ "step": 660
508
+ },
509
+ {
510
+ "epoch": 4.2405063291139244,
511
+ "grad_norm": 0.08302771300077438,
512
+ "learning_rate": 3.037974683544304e-06,
513
+ "loss": 0.0096,
514
+ "step": 670
515
+ },
516
+ {
517
+ "epoch": 4.30379746835443,
518
+ "grad_norm": 0.0777091532945633,
519
+ "learning_rate": 2.7848101265822785e-06,
520
+ "loss": 0.0096,
521
+ "step": 680
522
+ },
523
+ {
524
+ "epoch": 4.367088607594937,
525
+ "grad_norm": 0.1073741614818573,
526
+ "learning_rate": 2.5316455696202535e-06,
527
+ "loss": 0.0128,
528
+ "step": 690
529
+ },
530
+ {
531
+ "epoch": 4.430379746835443,
532
+ "grad_norm": 0.06873705983161926,
533
+ "learning_rate": 2.278481012658228e-06,
534
+ "loss": 0.1127,
535
+ "step": 700
536
+ },
537
+ {
538
+ "epoch": 4.493670886075949,
539
+ "grad_norm": 0.10447876155376434,
540
+ "learning_rate": 2.0253164556962026e-06,
541
+ "loss": 0.0099,
542
+ "step": 710
543
+ },
544
+ {
545
+ "epoch": 4.556962025316456,
546
+ "grad_norm": 0.07418622821569443,
547
+ "learning_rate": 1.7721518987341774e-06,
548
+ "loss": 0.0138,
549
+ "step": 720
550
+ },
551
+ {
552
+ "epoch": 4.620253164556962,
553
+ "grad_norm": 0.07888732105493546,
554
+ "learning_rate": 1.518987341772152e-06,
555
+ "loss": 0.0539,
556
+ "step": 730
557
+ },
558
+ {
559
+ "epoch": 4.6835443037974684,
560
+ "grad_norm": 0.07641228288412094,
561
+ "learning_rate": 1.2658227848101267e-06,
562
+ "loss": 0.0095,
563
+ "step": 740
564
+ },
565
+ {
566
+ "epoch": 4.746835443037975,
567
+ "grad_norm": 0.07773224264383316,
568
+ "learning_rate": 1.0126582278481013e-06,
569
+ "loss": 0.0501,
570
+ "step": 750
571
+ },
572
+ {
573
+ "epoch": 4.810126582278481,
574
+ "grad_norm": 0.08096041530370712,
575
+ "learning_rate": 7.59493670886076e-07,
576
+ "loss": 0.0093,
577
+ "step": 760
578
+ },
579
+ {
580
+ "epoch": 4.8734177215189876,
581
+ "grad_norm": 0.0873405709862709,
582
+ "learning_rate": 5.063291139240507e-07,
583
+ "loss": 0.0842,
584
+ "step": 770
585
+ },
586
+ {
587
+ "epoch": 4.936708860759493,
588
+ "grad_norm": 0.788959264755249,
589
+ "learning_rate": 2.5316455696202533e-07,
590
+ "loss": 0.0162,
591
+ "step": 780
592
+ },
593
+ {
594
+ "epoch": 5.0,
595
+ "grad_norm": 0.11727602779865265,
596
+ "learning_rate": 0.0,
597
+ "loss": 0.0099,
598
+ "step": 790
599
+ },
600
+ {
601
+ "epoch": 5.0,
602
+ "eval_accuracy": 0.9820627802690582,
603
+ "eval_loss": 0.05168794468045235,
604
+ "eval_runtime": 29.684,
605
+ "eval_samples_per_second": 7.512,
606
+ "eval_steps_per_second": 0.943,
607
+ "step": 790
608
+ },
609
+ {
610
+ "epoch": 5.0,
611
+ "step": 790,
612
+ "total_flos": 4.893619144161485e+17,
613
+ "train_loss": 0.08755870511448836,
614
+ "train_runtime": 2689.1935,
615
+ "train_samples_per_second": 2.348,
616
+ "train_steps_per_second": 0.294
617
+ }
618
+ ],
619
+ "logging_steps": 10,
620
+ "max_steps": 790,
621
+ "num_input_tokens_seen": 0,
622
+ "num_train_epochs": 5,
623
+ "save_steps": 500,
624
+ "stateful_callbacks": {
625
+ "TrainerControl": {
626
+ "args": {
627
+ "should_epoch_stop": false,
628
+ "should_evaluate": false,
629
+ "should_log": false,
630
+ "should_save": true,
631
+ "should_training_stop": true
632
+ },
633
+ "attributes": {}
634
+ }
635
+ },
636
+ "total_flos": 4.893619144161485e+17,
637
+ "train_batch_size": 8,
638
+ "trial_name": null,
639
+ "trial_params": null
640
+ }