dima806 commited on
Commit
72850c8
·
verified ·
1 Parent(s): 9500f1e

Upload folder using huggingface_hub

Browse files
checkpoint-31974/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4434bb1c21783caa0078e23ee6f1962484398526b4d9b38ba912fb4305c8921d
3
  size 343387012
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f264162406cd17fcf0599f6f7c0ad35e95c7d2759c1a4c22c1c88689bb6fea6
3
  size 343387012
checkpoint-31974/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39d09992be6f219f1b52b97f7b474c809b08afdec896bd4e66ef0b1ff2c5b686
3
  size 686894469
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac87579cac35eebed50d1dde6502cefc1af83f7d4d7e492ea24913c02bac8bb
3
  size 686894469
checkpoint-31974/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1abc14e7ba11e17402625b7b339fefea2dcce1e00bf11864da3caa4b97d7d60a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9c551fd44ddc8ddfa398002e02443e4ff95bea8a77387c173af1507fc08e78
3
  size 627
checkpoint-31974/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 2.573267698287964,
3
  "best_model_checkpoint": "car_brands_image_detection/checkpoint-31974",
4
  "epoch": 6.0,
5
  "eval_steps": 500,
@@ -10,503 +10,503 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.09382623381497467,
13
- "grad_norm": 1.1010551452636719,
14
- "learning_rate": 1.9718080441047486e-06,
15
- "loss": 4.0,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.18765246762994933,
20
- "grad_norm": 1.328322410583496,
21
- "learning_rate": 1.9404836486655806e-06,
22
- "loss": 3.9571,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.281478701444924,
27
- "grad_norm": 1.4056262969970703,
28
- "learning_rate": 1.9091592532264125e-06,
29
- "loss": 3.9054,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.37530493525989866,
34
- "grad_norm": 1.557868480682373,
35
- "learning_rate": 1.8778348577872447e-06,
36
- "loss": 3.8497,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.46913116907487334,
41
- "grad_norm": 1.532055377960205,
42
- "learning_rate": 1.8465104623480764e-06,
43
- "loss": 3.7942,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.562957402889848,
48
- "grad_norm": 1.6487107276916504,
49
- "learning_rate": 1.8151860669089086e-06,
50
- "loss": 3.7438,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 0.6567836367048226,
55
- "grad_norm": 1.6391078233718872,
56
- "learning_rate": 1.7838616714697405e-06,
57
- "loss": 3.6947,
58
  "step": 3500
59
  },
60
  {
61
  "epoch": 0.7506098705197973,
62
- "grad_norm": 1.8407845497131348,
63
- "learning_rate": 1.7525372760305725e-06,
64
- "loss": 3.6408,
65
  "step": 4000
66
  },
67
  {
68
  "epoch": 0.844436104334772,
69
- "grad_norm": 1.6594038009643555,
70
- "learning_rate": 1.7212128805914046e-06,
71
- "loss": 3.5885,
72
  "step": 4500
73
  },
74
  {
75
  "epoch": 0.9382623381497467,
76
- "grad_norm": 1.889947772026062,
77
- "learning_rate": 1.6898884851522366e-06,
78
- "loss": 3.5404,
79
  "step": 5000
80
  },
81
  {
82
  "epoch": 1.0,
83
- "eval_accuracy": 0.28911624469975544,
84
- "eval_loss": 3.5254716873168945,
85
- "eval_model_preparation_time": 0.0054,
86
- "eval_runtime": 1133.5642,
87
- "eval_samples_per_second": 100.28,
88
- "eval_steps_per_second": 12.536,
89
  "step": 5329
90
  },
91
  {
92
  "epoch": 1.0320885719647213,
93
- "grad_norm": 2.195939540863037,
94
- "learning_rate": 1.6585640897130683e-06,
95
- "loss": 3.4895,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.125914805779696,
100
- "grad_norm": 2.153141736984253,
101
- "learning_rate": 1.6272396942739004e-06,
102
- "loss": 3.4444,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.2197410395946706,
107
- "grad_norm": 1.90287446975708,
108
- "learning_rate": 1.5959152988347324e-06,
109
- "loss": 3.4021,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 1.3135672734096453,
114
- "grad_norm": 2.318300247192383,
115
- "learning_rate": 1.5645909033955643e-06,
116
- "loss": 3.3694,
117
  "step": 7000
118
  },
119
  {
120
  "epoch": 1.40739350722462,
121
- "grad_norm": 2.0277891159057617,
122
- "learning_rate": 1.5332665079563965e-06,
123
- "loss": 3.335,
124
  "step": 7500
125
  },
126
  {
127
  "epoch": 1.5012197410395947,
128
- "grad_norm": 2.453045606613159,
129
- "learning_rate": 1.5019421125172282e-06,
130
- "loss": 3.2886,
131
  "step": 8000
132
  },
133
  {
134
  "epoch": 1.5950459748545693,
135
- "grad_norm": 2.8956668376922607,
136
- "learning_rate": 1.4706177170780602e-06,
137
- "loss": 3.2617,
138
  "step": 8500
139
  },
140
  {
141
  "epoch": 1.688872208669544,
142
- "grad_norm": 2.398240804672241,
143
- "learning_rate": 1.4392933216388923e-06,
144
- "loss": 3.2273,
145
  "step": 9000
146
  },
147
  {
148
  "epoch": 1.7826984424845187,
149
- "grad_norm": 3.2654738426208496,
150
- "learning_rate": 1.4079689261997243e-06,
151
- "loss": 3.1976,
152
  "step": 9500
153
  },
154
  {
155
  "epoch": 1.8765246762994934,
156
- "grad_norm": 2.843698263168335,
157
- "learning_rate": 1.3766445307605562e-06,
158
- "loss": 3.1636,
159
  "step": 10000
160
  },
161
  {
162
  "epoch": 1.970350910114468,
163
- "grad_norm": 3.392847776412964,
164
- "learning_rate": 1.3453201353213884e-06,
165
- "loss": 3.1312,
166
  "step": 10500
167
  },
168
  {
169
  "epoch": 2.0,
170
- "eval_accuracy": 0.3840104157503035,
171
- "eval_loss": 3.157210350036621,
172
- "eval_model_preparation_time": 0.0054,
173
- "eval_runtime": 1120.185,
174
- "eval_samples_per_second": 101.478,
175
- "eval_steps_per_second": 12.685,
176
  "step": 10658
177
  },
178
  {
179
  "epoch": 2.0641771439294425,
180
- "grad_norm": 2.8789336681365967,
181
- "learning_rate": 1.3139957398822201e-06,
182
- "loss": 3.1001,
183
  "step": 11000
184
  },
185
  {
186
  "epoch": 2.1580033777444174,
187
- "grad_norm": 3.523937225341797,
188
- "learning_rate": 1.282671344443052e-06,
189
- "loss": 3.065,
190
  "step": 11500
191
  },
192
  {
193
  "epoch": 2.251829611559392,
194
- "grad_norm": 3.3203866481781006,
195
- "learning_rate": 1.2513469490038842e-06,
196
- "loss": 3.0418,
197
  "step": 12000
198
  },
199
  {
200
  "epoch": 2.3456558453743668,
201
- "grad_norm": 2.9499928951263428,
202
- "learning_rate": 1.2200225535647162e-06,
203
- "loss": 3.0185,
204
  "step": 12500
205
  },
206
  {
207
  "epoch": 2.4394820791893412,
208
- "grad_norm": 3.711545705795288,
209
- "learning_rate": 1.1886981581255481e-06,
210
- "loss": 2.9926,
211
  "step": 13000
212
  },
213
  {
214
  "epoch": 2.533308313004316,
215
- "grad_norm": 3.0691421031951904,
216
- "learning_rate": 1.1573737626863803e-06,
217
- "loss": 2.9589,
218
  "step": 13500
219
  },
220
  {
221
  "epoch": 2.6271345468192906,
222
- "grad_norm": 2.8521406650543213,
223
- "learning_rate": 1.126049367247212e-06,
224
- "loss": 2.9496,
225
  "step": 14000
226
  },
227
  {
228
  "epoch": 2.7209607806342655,
229
- "grad_norm": 3.645907402038574,
230
- "learning_rate": 1.094724971808044e-06,
231
- "loss": 2.9115,
232
  "step": 14500
233
  },
234
  {
235
  "epoch": 2.81478701444924,
236
- "grad_norm": 2.6633188724517822,
237
- "learning_rate": 1.063400576368876e-06,
238
- "loss": 2.8923,
239
  "step": 15000
240
  },
241
  {
242
  "epoch": 2.9086132482642144,
243
- "grad_norm": 3.053062915802002,
244
- "learning_rate": 1.032076180929708e-06,
245
- "loss": 2.8703,
246
  "step": 15500
247
  },
248
  {
249
  "epoch": 3.0,
250
- "eval_accuracy": 0.43637067403276036,
251
- "eval_loss": 2.8984599113464355,
252
- "eval_model_preparation_time": 0.0054,
253
- "eval_runtime": 1123.7967,
254
- "eval_samples_per_second": 101.152,
255
- "eval_steps_per_second": 12.645,
256
  "step": 15987
257
  },
258
  {
259
  "epoch": 3.0024394820791893,
260
- "grad_norm": 2.9147167205810547,
261
- "learning_rate": 1.00075178549054e-06,
262
- "loss": 2.8419,
263
  "step": 16000
264
  },
265
  {
266
  "epoch": 3.096265715894164,
267
- "grad_norm": 3.5354490280151367,
268
- "learning_rate": 9.69427390051372e-07,
269
- "loss": 2.8221,
270
  "step": 16500
271
  },
272
  {
273
  "epoch": 3.1900919497091387,
274
- "grad_norm": 3.0408437252044678,
275
- "learning_rate": 9.38102994612204e-07,
276
- "loss": 2.7963,
277
  "step": 17000
278
  },
279
  {
280
  "epoch": 3.283918183524113,
281
- "grad_norm": 4.841484069824219,
282
- "learning_rate": 9.067785991730359e-07,
283
- "loss": 2.7837,
284
  "step": 17500
285
  },
286
  {
287
  "epoch": 3.377744417339088,
288
- "grad_norm": 3.432560682296753,
289
- "learning_rate": 8.754542037338679e-07,
290
- "loss": 2.7699,
291
  "step": 18000
292
  },
293
  {
294
  "epoch": 3.4715706511540625,
295
- "grad_norm": 4.054901123046875,
296
- "learning_rate": 8.441298082946998e-07,
297
- "loss": 2.7518,
298
  "step": 18500
299
  },
300
  {
301
  "epoch": 3.5653968849690374,
302
- "grad_norm": 4.113468647003174,
303
- "learning_rate": 8.128054128555319e-07,
304
- "loss": 2.73,
305
  "step": 19000
306
  },
307
  {
308
  "epoch": 3.659223118784012,
309
- "grad_norm": 3.7899582386016846,
310
- "learning_rate": 7.814810174163638e-07,
311
- "loss": 2.6995,
312
  "step": 19500
313
  },
314
  {
315
  "epoch": 3.7530493525989868,
316
- "grad_norm": 3.954853057861328,
317
- "learning_rate": 7.501566219771958e-07,
318
- "loss": 2.6955,
319
  "step": 20000
320
  },
321
  {
322
  "epoch": 3.846875586413961,
323
- "grad_norm": 3.6600358486175537,
324
- "learning_rate": 7.188322265380278e-07,
325
- "loss": 2.6632,
326
  "step": 20500
327
  },
328
  {
329
  "epoch": 3.940701820228936,
330
- "grad_norm": 5.025015830993652,
331
- "learning_rate": 6.875078310988598e-07,
332
- "loss": 2.6537,
333
  "step": 21000
334
  },
335
  {
336
  "epoch": 4.0,
337
- "eval_accuracy": 0.47423333391980577,
338
- "eval_loss": 2.717848300933838,
339
- "eval_model_preparation_time": 0.0054,
340
- "eval_runtime": 1116.9833,
341
- "eval_samples_per_second": 101.769,
342
- "eval_steps_per_second": 12.722,
343
  "step": 21316
344
  },
345
  {
346
  "epoch": 4.034528054043911,
347
- "grad_norm": 3.939284324645996,
348
- "learning_rate": 6.561834356596917e-07,
349
- "loss": 2.6525,
350
  "step": 21500
351
  },
352
  {
353
  "epoch": 4.128354287858885,
354
- "grad_norm": 3.3858115673065186,
355
- "learning_rate": 6.248590402205238e-07,
356
- "loss": 2.6308,
357
  "step": 22000
358
  },
359
  {
360
  "epoch": 4.22218052167386,
361
- "grad_norm": 3.881986379623413,
362
- "learning_rate": 5.935346447813557e-07,
363
- "loss": 2.6093,
364
  "step": 22500
365
  },
366
  {
367
  "epoch": 4.316006755488835,
368
- "grad_norm": 3.7069151401519775,
369
- "learning_rate": 5.622102493421877e-07,
370
- "loss": 2.6153,
371
  "step": 23000
372
  },
373
  {
374
  "epoch": 4.409832989303809,
375
- "grad_norm": 4.156313419342041,
376
- "learning_rate": 5.308858539030196e-07,
377
- "loss": 2.5988,
378
  "step": 23500
379
  },
380
  {
381
  "epoch": 4.503659223118784,
382
- "grad_norm": 3.6459434032440186,
383
- "learning_rate": 4.995614584638517e-07,
384
- "loss": 2.5809,
385
  "step": 24000
386
  },
387
  {
388
  "epoch": 4.597485456933759,
389
- "grad_norm": 3.659991502761841,
390
- "learning_rate": 4.682370630246836e-07,
391
- "loss": 2.57,
392
  "step": 24500
393
  },
394
  {
395
  "epoch": 4.6913116907487336,
396
- "grad_norm": 3.7952539920806885,
397
- "learning_rate": 4.369126675855156e-07,
398
- "loss": 2.5518,
399
  "step": 25000
400
  },
401
  {
402
  "epoch": 4.785137924563708,
403
- "grad_norm": 4.7407026290893555,
404
- "learning_rate": 4.0558827214634755e-07,
405
- "loss": 2.5444,
406
  "step": 25500
407
  },
408
  {
409
  "epoch": 4.8789641583786825,
410
- "grad_norm": 2.688025951385498,
411
- "learning_rate": 3.742638767071795e-07,
412
- "loss": 2.5441,
413
  "step": 26000
414
  },
415
  {
416
  "epoch": 4.972790392193657,
417
- "grad_norm": 4.759518623352051,
418
- "learning_rate": 3.4293948126801154e-07,
419
- "loss": 2.5364,
420
  "step": 26500
421
  },
422
  {
423
  "epoch": 5.0,
424
- "eval_accuracy": 0.49621725284585744,
425
- "eval_loss": 2.6088995933532715,
426
- "eval_model_preparation_time": 0.0054,
427
- "eval_runtime": 1104.5624,
428
- "eval_samples_per_second": 102.913,
429
- "eval_steps_per_second": 12.865,
430
  "step": 26645
431
  },
432
  {
433
  "epoch": 5.066616626008632,
434
- "grad_norm": 6.005688190460205,
435
- "learning_rate": 3.116150858288435e-07,
436
- "loss": 2.5174,
437
  "step": 27000
438
  },
439
  {
440
  "epoch": 5.160442859823607,
441
- "grad_norm": 3.0456652641296387,
442
- "learning_rate": 2.8029069038967543e-07,
443
- "loss": 2.5143,
444
  "step": 27500
445
  },
446
  {
447
  "epoch": 5.254269093638581,
448
- "grad_norm": 4.690863132476807,
449
- "learning_rate": 2.4896629495050743e-07,
450
- "loss": 2.5159,
451
  "step": 28000
452
  },
453
  {
454
  "epoch": 5.348095327453556,
455
- "grad_norm": 6.047962665557861,
456
- "learning_rate": 2.1764189951133943e-07,
457
- "loss": 2.5094,
458
  "step": 28500
459
  },
460
  {
461
  "epoch": 5.441921561268531,
462
- "grad_norm": 4.963265895843506,
463
- "learning_rate": 1.863175040721714e-07,
464
- "loss": 2.5113,
465
  "step": 29000
466
  },
467
  {
468
  "epoch": 5.5357477950835055,
469
- "grad_norm": 4.23138952255249,
470
- "learning_rate": 1.5499310863300338e-07,
471
- "loss": 2.4888,
472
  "step": 29500
473
  },
474
  {
475
  "epoch": 5.62957402889848,
476
- "grad_norm": 3.680149793624878,
477
- "learning_rate": 1.2366871319383535e-07,
478
- "loss": 2.5027,
479
  "step": 30000
480
  },
481
  {
482
  "epoch": 5.723400262713454,
483
- "grad_norm": 3.7554168701171875,
484
- "learning_rate": 9.234431775466733e-08,
485
- "loss": 2.4941,
486
  "step": 30500
487
  },
488
  {
489
  "epoch": 5.81722649652843,
490
- "grad_norm": 3.9157700538635254,
491
- "learning_rate": 6.10199223154993e-08,
492
- "loss": 2.4996,
493
  "step": 31000
494
  },
495
  {
496
  "epoch": 5.911052730343404,
497
- "grad_norm": 3.978684902191162,
498
- "learning_rate": 2.9695526876331285e-08,
499
- "loss": 2.4811,
500
  "step": 31500
501
  },
502
  {
503
  "epoch": 6.0,
504
- "eval_accuracy": 0.5020937065643859,
505
- "eval_loss": 2.573267698287964,
506
- "eval_model_preparation_time": 0.0054,
507
- "eval_runtime": 1092.2998,
508
- "eval_samples_per_second": 104.068,
509
- "eval_steps_per_second": 13.009,
510
  "step": 31974
511
  }
512
  ],
 
1
  {
2
+ "best_metric": 1.374941349029541,
3
  "best_model_checkpoint": "car_brands_image_detection/checkpoint-31974",
4
  "epoch": 6.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.09382623381497467,
13
+ "grad_norm": 1.4237160682678223,
14
+ "learning_rate": 4.929520110261872e-06,
15
+ "loss": 3.9691,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.18765246762994933,
20
+ "grad_norm": 1.500626802444458,
21
+ "learning_rate": 4.8512091216639525e-06,
22
+ "loss": 3.8449,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.281478701444924,
27
+ "grad_norm": 1.6687654256820679,
28
+ "learning_rate": 4.7728981330660325e-06,
29
+ "loss": 3.6987,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.37530493525989866,
34
+ "grad_norm": 2.551961898803711,
35
+ "learning_rate": 4.694587144468113e-06,
36
+ "loss": 3.5622,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.46913116907487334,
41
+ "grad_norm": 2.852647542953491,
42
+ "learning_rate": 4.616276155870192e-06,
43
+ "loss": 3.4424,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.562957402889848,
48
+ "grad_norm": 2.4419143199920654,
49
+ "learning_rate": 4.537965167272272e-06,
50
+ "loss": 3.3403,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 0.6567836367048226,
55
+ "grad_norm": 2.429936647415161,
56
+ "learning_rate": 4.459654178674352e-06,
57
+ "loss": 3.2402,
58
  "step": 3500
59
  },
60
  {
61
  "epoch": 0.7506098705197973,
62
+ "grad_norm": 2.774019956588745,
63
+ "learning_rate": 4.381343190076432e-06,
64
+ "loss": 3.1484,
65
  "step": 4000
66
  },
67
  {
68
  "epoch": 0.844436104334772,
69
+ "grad_norm": 3.3507306575775146,
70
+ "learning_rate": 4.303032201478512e-06,
71
+ "loss": 3.0678,
72
  "step": 4500
73
  },
74
  {
75
  "epoch": 0.9382623381497467,
76
+ "grad_norm": 2.8889214992523193,
77
+ "learning_rate": 4.224721212880592e-06,
78
+ "loss": 2.9878,
79
  "step": 5000
80
  },
81
  {
82
  "epoch": 1.0,
83
+ "eval_accuracy": 0.4087302285483048,
84
+ "eval_loss": 2.9374125003814697,
85
+ "eval_model_preparation_time": 0.0053,
86
+ "eval_runtime": 1096.2596,
87
+ "eval_samples_per_second": 103.693,
88
+ "eval_steps_per_second": 12.962,
89
  "step": 5329
90
  },
91
  {
92
  "epoch": 1.0320885719647213,
93
+ "grad_norm": 3.342593193054199,
94
+ "learning_rate": 4.1464102242826715e-06,
95
+ "loss": 2.9069,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.125914805779696,
100
+ "grad_norm": 3.207279920578003,
101
+ "learning_rate": 4.0680992356847515e-06,
102
+ "loss": 2.8357,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.2197410395946706,
107
+ "grad_norm": 3.5192267894744873,
108
+ "learning_rate": 3.989788247086832e-06,
109
+ "loss": 2.7523,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 1.3135672734096453,
114
+ "grad_norm": 4.029764652252197,
115
+ "learning_rate": 3.911477258488912e-06,
116
+ "loss": 2.6941,
117
  "step": 7000
118
  },
119
  {
120
  "epoch": 1.40739350722462,
121
+ "grad_norm": 4.04934024810791,
122
+ "learning_rate": 3.833166269890992e-06,
123
+ "loss": 2.6166,
124
  "step": 7500
125
  },
126
  {
127
  "epoch": 1.5012197410395947,
128
+ "grad_norm": 5.199851989746094,
129
+ "learning_rate": 3.754855281293071e-06,
130
+ "loss": 2.5469,
131
  "step": 8000
132
  },
133
  {
134
  "epoch": 1.5950459748545693,
135
+ "grad_norm": 3.6607906818389893,
136
+ "learning_rate": 3.676544292695151e-06,
137
+ "loss": 2.4769,
138
  "step": 8500
139
  },
140
  {
141
  "epoch": 1.688872208669544,
142
+ "grad_norm": 4.265751361846924,
143
+ "learning_rate": 3.598233304097231e-06,
144
+ "loss": 2.4212,
145
  "step": 9000
146
  },
147
  {
148
  "epoch": 1.7826984424845187,
149
+ "grad_norm": 5.644491672515869,
150
+ "learning_rate": 3.5199223154993112e-06,
151
+ "loss": 2.3652,
152
  "step": 9500
153
  },
154
  {
155
  "epoch": 1.8765246762994934,
156
+ "grad_norm": 5.2569580078125,
157
+ "learning_rate": 3.4416113269013913e-06,
158
+ "loss": 2.2909,
159
  "step": 10000
160
  },
161
  {
162
  "epoch": 1.970350910114468,
163
+ "grad_norm": 6.754888534545898,
164
+ "learning_rate": 3.3633003383034714e-06,
165
+ "loss": 2.2504,
166
  "step": 10500
167
  },
168
  {
169
  "epoch": 2.0,
170
+ "eval_accuracy": 0.5390502665517181,
171
+ "eval_loss": 2.2533023357391357,
172
+ "eval_model_preparation_time": 0.0053,
173
+ "eval_runtime": 1104.9881,
174
+ "eval_samples_per_second": 102.874,
175
+ "eval_steps_per_second": 12.86,
176
  "step": 10658
177
  },
178
  {
179
  "epoch": 2.0641771439294425,
180
+ "grad_norm": 6.7972259521484375,
181
+ "learning_rate": 3.2849893497055506e-06,
182
+ "loss": 2.1715,
183
  "step": 11000
184
  },
185
  {
186
  "epoch": 2.1580033777444174,
187
+ "grad_norm": 3.576768398284912,
188
+ "learning_rate": 3.2066783611076307e-06,
189
+ "loss": 2.1279,
190
  "step": 11500
191
  },
192
  {
193
  "epoch": 2.251829611559392,
194
+ "grad_norm": 6.923471450805664,
195
+ "learning_rate": 3.1283673725097108e-06,
196
+ "loss": 2.0707,
197
  "step": 12000
198
  },
199
  {
200
  "epoch": 2.3456558453743668,
201
+ "grad_norm": 4.643533706665039,
202
+ "learning_rate": 3.050056383911791e-06,
203
+ "loss": 2.0385,
204
  "step": 12500
205
  },
206
  {
207
  "epoch": 2.4394820791893412,
208
+ "grad_norm": 6.029781341552734,
209
+ "learning_rate": 2.971745395313871e-06,
210
+ "loss": 1.9869,
211
  "step": 13000
212
  },
213
  {
214
  "epoch": 2.533308313004316,
215
+ "grad_norm": 5.756152153015137,
216
+ "learning_rate": 2.893434406715951e-06,
217
+ "loss": 1.9542,
218
  "step": 13500
219
  },
220
  {
221
  "epoch": 2.6271345468192906,
222
+ "grad_norm": 7.7480244636535645,
223
+ "learning_rate": 2.8151234181180302e-06,
224
+ "loss": 1.9093,
225
  "step": 14000
226
  },
227
  {
228
  "epoch": 2.7209607806342655,
229
+ "grad_norm": 9.918256759643555,
230
+ "learning_rate": 2.7368124295201103e-06,
231
+ "loss": 1.8805,
232
  "step": 14500
233
  },
234
  {
235
  "epoch": 2.81478701444924,
236
+ "grad_norm": 6.404055118560791,
237
+ "learning_rate": 2.6585014409221904e-06,
238
+ "loss": 1.8297,
239
  "step": 15000
240
  },
241
  {
242
  "epoch": 2.9086132482642144,
243
+ "grad_norm": 4.804295063018799,
244
+ "learning_rate": 2.5801904523242704e-06,
245
+ "loss": 1.794,
246
  "step": 15500
247
  },
248
  {
249
  "epoch": 3.0,
250
+ "eval_accuracy": 0.6154353678061826,
251
+ "eval_loss": 1.8137885332107544,
252
+ "eval_model_preparation_time": 0.0053,
253
+ "eval_runtime": 1101.4513,
254
+ "eval_samples_per_second": 103.204,
255
+ "eval_steps_per_second": 12.901,
256
  "step": 15987
257
  },
258
  {
259
  "epoch": 3.0024394820791893,
260
+ "grad_norm": 7.666245460510254,
261
+ "learning_rate": 2.5018794637263505e-06,
262
+ "loss": 1.7543,
263
  "step": 16000
264
  },
265
  {
266
  "epoch": 3.096265715894164,
267
+ "grad_norm": 8.590559005737305,
268
+ "learning_rate": 2.42356847512843e-06,
269
+ "loss": 1.7155,
270
  "step": 16500
271
  },
272
  {
273
  "epoch": 3.1900919497091387,
274
+ "grad_norm": 6.843356132507324,
275
+ "learning_rate": 2.3452574865305102e-06,
276
+ "loss": 1.6821,
277
  "step": 17000
278
  },
279
  {
280
  "epoch": 3.283918183524113,
281
+ "grad_norm": 10.152677536010742,
282
+ "learning_rate": 2.26694649793259e-06,
283
+ "loss": 1.6734,
284
  "step": 17500
285
  },
286
  {
287
  "epoch": 3.377744417339088,
288
+ "grad_norm": 7.38956356048584,
289
+ "learning_rate": 2.18863550933467e-06,
290
+ "loss": 1.6254,
291
  "step": 18000
292
  },
293
  {
294
  "epoch": 3.4715706511540625,
295
+ "grad_norm": 7.098983287811279,
296
+ "learning_rate": 2.1103245207367496e-06,
297
+ "loss": 1.6125,
298
  "step": 18500
299
  },
300
  {
301
  "epoch": 3.5653968849690374,
302
+ "grad_norm": 8.060213088989258,
303
+ "learning_rate": 2.0320135321388297e-06,
304
+ "loss": 1.5813,
305
  "step": 19000
306
  },
307
  {
308
  "epoch": 3.659223118784012,
309
+ "grad_norm": 7.161475658416748,
310
+ "learning_rate": 1.9537025435409098e-06,
311
+ "loss": 1.5367,
312
  "step": 19500
313
  },
314
  {
315
  "epoch": 3.7530493525989868,
316
+ "grad_norm": 14.963991165161133,
317
+ "learning_rate": 1.8753915549429896e-06,
318
+ "loss": 1.5131,
319
  "step": 20000
320
  },
321
  {
322
  "epoch": 3.846875586413961,
323
+ "grad_norm": 8.202707290649414,
324
+ "learning_rate": 1.7970805663450697e-06,
325
+ "loss": 1.5161,
326
  "step": 20500
327
  },
328
  {
329
  "epoch": 3.940701820228936,
330
+ "grad_norm": 7.647439002990723,
331
+ "learning_rate": 1.7187695777471498e-06,
332
+ "loss": 1.5041,
333
  "step": 21000
334
  },
335
  {
336
  "epoch": 4.0,
337
+ "eval_accuracy": 0.6565089642310467,
338
+ "eval_loss": 1.5613937377929688,
339
+ "eval_model_preparation_time": 0.0053,
340
+ "eval_runtime": 1094.3529,
341
+ "eval_samples_per_second": 103.873,
342
+ "eval_steps_per_second": 12.985,
343
  "step": 21316
344
  },
345
  {
346
  "epoch": 4.034528054043911,
347
+ "grad_norm": 6.454436779022217,
348
+ "learning_rate": 1.6404585891492294e-06,
349
+ "loss": 1.4779,
350
  "step": 21500
351
  },
352
  {
353
  "epoch": 4.128354287858885,
354
+ "grad_norm": 11.403889656066895,
355
+ "learning_rate": 1.5621476005513095e-06,
356
+ "loss": 1.4441,
357
  "step": 22000
358
  },
359
  {
360
  "epoch": 4.22218052167386,
361
+ "grad_norm": 10.452070236206055,
362
+ "learning_rate": 1.4838366119533896e-06,
363
+ "loss": 1.4355,
364
  "step": 22500
365
  },
366
  {
367
  "epoch": 4.316006755488835,
368
+ "grad_norm": 7.726356506347656,
369
+ "learning_rate": 1.4055256233554693e-06,
370
+ "loss": 1.4132,
371
  "step": 23000
372
  },
373
  {
374
  "epoch": 4.409832989303809,
375
+ "grad_norm": 6.455691337585449,
376
+ "learning_rate": 1.3272146347575493e-06,
377
+ "loss": 1.3937,
378
  "step": 23500
379
  },
380
  {
381
  "epoch": 4.503659223118784,
382
+ "grad_norm": 8.14889907836914,
383
+ "learning_rate": 1.2489036461596292e-06,
384
+ "loss": 1.3721,
385
  "step": 24000
386
  },
387
  {
388
  "epoch": 4.597485456933759,
389
+ "grad_norm": 8.627324104309082,
390
+ "learning_rate": 1.170592657561709e-06,
391
+ "loss": 1.3555,
392
  "step": 24500
393
  },
394
  {
395
  "epoch": 4.6913116907487336,
396
+ "grad_norm": 10.486886978149414,
397
+ "learning_rate": 1.0922816689637891e-06,
398
+ "loss": 1.377,
399
  "step": 25000
400
  },
401
  {
402
  "epoch": 4.785137924563708,
403
+ "grad_norm": 8.634580612182617,
404
+ "learning_rate": 1.013970680365869e-06,
405
+ "loss": 1.3634,
406
  "step": 25500
407
  },
408
  {
409
  "epoch": 4.8789641583786825,
410
+ "grad_norm": 9.948036193847656,
411
+ "learning_rate": 9.356596917679489e-07,
412
+ "loss": 1.3451,
413
  "step": 26000
414
  },
415
  {
416
  "epoch": 4.972790392193657,
417
+ "grad_norm": 10.133368492126465,
418
+ "learning_rate": 8.573487031700289e-07,
419
+ "loss": 1.3204,
420
  "step": 26500
421
  },
422
  {
423
  "epoch": 5.0,
424
+ "eval_accuracy": 0.6835072224079385,
425
+ "eval_loss": 1.4212292432785034,
426
+ "eval_model_preparation_time": 0.0053,
427
+ "eval_runtime": 1081.7006,
428
+ "eval_samples_per_second": 105.088,
429
+ "eval_steps_per_second": 13.137,
430
  "step": 26645
431
  },
432
  {
433
  "epoch": 5.066616626008632,
434
+ "grad_norm": 7.147021293640137,
435
+ "learning_rate": 7.790377145721088e-07,
436
+ "loss": 1.3151,
437
  "step": 27000
438
  },
439
  {
440
  "epoch": 5.160442859823607,
441
+ "grad_norm": 7.843013763427734,
442
+ "learning_rate": 7.007267259741888e-07,
443
+ "loss": 1.2924,
444
  "step": 27500
445
  },
446
  {
447
  "epoch": 5.254269093638581,
448
+ "grad_norm": 7.825103282928467,
449
+ "learning_rate": 6.224157373762686e-07,
450
+ "loss": 1.2944,
451
  "step": 28000
452
  },
453
  {
454
  "epoch": 5.348095327453556,
455
+ "grad_norm": 5.51339864730835,
456
+ "learning_rate": 5.441047487783486e-07,
457
+ "loss": 1.2958,
458
  "step": 28500
459
  },
460
  {
461
  "epoch": 5.441921561268531,
462
+ "grad_norm": 9.967358589172363,
463
+ "learning_rate": 4.657937601804285e-07,
464
+ "loss": 1.2925,
465
  "step": 29000
466
  },
467
  {
468
  "epoch": 5.5357477950835055,
469
+ "grad_norm": 7.193011283874512,
470
+ "learning_rate": 3.874827715825085e-07,
471
+ "loss": 1.2973,
472
  "step": 29500
473
  },
474
  {
475
  "epoch": 5.62957402889848,
476
+ "grad_norm": 7.687045097351074,
477
+ "learning_rate": 3.091717829845884e-07,
478
+ "loss": 1.2652,
479
  "step": 30000
480
  },
481
  {
482
  "epoch": 5.723400262713454,
483
+ "grad_norm": 12.939030647277832,
484
+ "learning_rate": 2.3086079438666836e-07,
485
+ "loss": 1.2808,
486
  "step": 30500
487
  },
488
  {
489
  "epoch": 5.81722649652843,
490
+ "grad_norm": 9.08949089050293,
491
+ "learning_rate": 1.525498057887483e-07,
492
+ "loss": 1.2643,
493
  "step": 31000
494
  },
495
  {
496
  "epoch": 5.911052730343404,
497
+ "grad_norm": 10.904788970947266,
498
+ "learning_rate": 7.423881719082822e-08,
499
+ "loss": 1.2828,
500
  "step": 31500
501
  },
502
  {
503
  "epoch": 6.0,
504
+ "eval_accuracy": 0.6924626563682108,
505
+ "eval_loss": 1.374941349029541,
506
+ "eval_model_preparation_time": 0.0053,
507
+ "eval_runtime": 1090.2857,
508
+ "eval_samples_per_second": 104.261,
509
+ "eval_steps_per_second": 13.033,
510
  "step": 31974
511
  }
512
  ],
checkpoint-31974/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5be75e60227ec3925963a94a4bd6597f67e127af381ca33e725a64c3425537be
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29015c96f2bea471733807d2f0d90a80549b90424dd54e981ee9e627381c13a5
3
  size 4731
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4434bb1c21783caa0078e23ee6f1962484398526b4d9b38ba912fb4305c8921d
3
  size 343387012
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f264162406cd17fcf0599f6f7c0ad35e95c7d2759c1a4c22c1c88689bb6fea6
3
  size 343387012
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5be75e60227ec3925963a94a4bd6597f67e127af381ca33e725a64c3425537be
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29015c96f2bea471733807d2f0d90a80549b90424dd54e981ee9e627381c13a5
3
  size 4731