AkitoP commited on
Commit
a4afe77
·
verified ·
1 Parent(s): 01002ad
Files changed (38) hide show
  1. checkpoint-1000/adapter_model.safetensors +1 -1
  2. checkpoint-1000/adapter_model/adapter_model.safetensors +1 -1
  3. checkpoint-1000/optimizer.pt +1 -1
  4. checkpoint-1000/rng_state.pth +1 -1
  5. checkpoint-1000/trainer_state.json +491 -473
  6. checkpoint-1000/training_args.bin +1 -1
  7. checkpoint-1500/adapter_model.safetensors +1 -1
  8. checkpoint-1500/adapter_model/adapter_model.safetensors +1 -1
  9. checkpoint-1500/optimizer.pt +1 -1
  10. checkpoint-1500/rng_state.pth +1 -1
  11. checkpoint-1500/trainer_state.json +703 -676
  12. checkpoint-1500/training_args.bin +1 -1
  13. checkpoint-1750/README.md +202 -0
  14. checkpoint-1750/adapter_config.json +35 -0
  15. checkpoint-1750/adapter_model.safetensors +3 -0
  16. checkpoint-1750/adapter_model/README.md +202 -0
  17. checkpoint-1750/adapter_model/adapter_config.json +35 -0
  18. checkpoint-1750/adapter_model/adapter_model.safetensors +3 -0
  19. checkpoint-1750/added_tokens.json +1611 -0
  20. checkpoint-1750/merges.txt +0 -0
  21. checkpoint-1750/normalizer.json +1742 -0
  22. checkpoint-1750/optimizer.pt +3 -0
  23. checkpoint-1750/rng_state.pth +3 -0
  24. checkpoint-1750/scheduler.pt +3 -0
  25. checkpoint-1750/special_tokens_map.json +139 -0
  26. checkpoint-1750/tokenizer_config.json +0 -0
  27. checkpoint-1750/trainer_state.json +2546 -0
  28. checkpoint-1750/training_args.bin +3 -0
  29. checkpoint-1750/vocab.json +0 -0
  30. checkpoint-2000/adapter_model.safetensors +1 -1
  31. checkpoint-2000/adapter_model/adapter_model.safetensors +1 -1
  32. checkpoint-2000/optimizer.pt +1 -1
  33. checkpoint-2000/rng_state.pth +1 -1
  34. checkpoint-2000/trainer_state.json +911 -875
  35. checkpoint-2000/training_args.bin +1 -1
  36. runs/Feb11_15-08-30_5d7eb40c28a7/events.out.tfevents.1739286511.5d7eb40c28a7.5353.0 +2 -2
  37. runs/Feb11_17-49-47_5d7eb40c28a7/events.out.tfevents.1739296188.5d7eb40c28a7.6631.0 +3 -0
  38. runs/Feb11_17-52-20_5d7eb40c28a7/events.out.tfevents.1739296341.5d7eb40c28a7.6851.0 +3 -0
checkpoint-1000/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbcc202bf351b24218a18a08272d65e0e85f19f5253eb3cbd5eb92b9234e2fe5
3
  size 26237160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c12ec8fdfd34d68eff539d3e5164cb1fca0d117cd481a0357036db15d8805e
3
  size 26237160
checkpoint-1000/adapter_model/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbcc202bf351b24218a18a08272d65e0e85f19f5253eb3cbd5eb92b9234e2fe5
3
  size 26237160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c12ec8fdfd34d68eff539d3e5164cb1fca0d117cd481a0357036db15d8805e
3
  size 26237160
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9425d6b0b5355d8b22784b5736e775d774df52af5a6bd420ac8d97ecbf270bb3
3
  size 52563258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45732b1f0dc93eef2a0178f3b6411af22aab1145fad33e0bd3eaad3b25b1d22b
3
  size 52563258
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ef93395958445aa7e8daf531ff69a9a3a4d8afda84f70e9514eaba1b7e9e514
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e221199d554a09b12b2fc1ce004d16165a5d5f6010e02e78d76b6cbdcdc96aa4
3
  size 14244
checkpoint-1000/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.04981744137059402,
3
  "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
  "epoch": 4.0,
5
- "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -10,1420 +10,1438 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
- "grad_norm": 11.033143997192383,
14
- "learning_rate": 8.333333333333334e-07,
15
- "loss": 3.7365,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.04,
20
- "grad_norm": 10.168129920959473,
21
- "learning_rate": 1.875e-06,
22
- "loss": 3.6756,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
- "grad_norm": 10.131425857543945,
28
- "learning_rate": 2.916666666666667e-06,
29
- "loss": 3.6681,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.08,
34
- "grad_norm": 9.962166786193848,
35
- "learning_rate": 3.958333333333334e-06,
36
- "loss": 3.6567,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.1,
41
- "grad_norm": 9.637451171875,
42
- "learning_rate": 4.791666666666667e-06,
43
- "loss": 3.5903,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.12,
48
- "grad_norm": 9.27942943572998,
49
- "learning_rate": 5.833333333333334e-06,
50
- "loss": 3.4592,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.14,
55
- "grad_norm": 9.690427780151367,
56
- "learning_rate": 6.875e-06,
57
- "loss": 3.299,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.16,
62
- "grad_norm": 8.123926162719727,
63
- "learning_rate": 7.916666666666668e-06,
64
- "loss": 3.2058,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.18,
69
- "grad_norm": 6.938026428222656,
70
- "learning_rate": 8.958333333333334e-06,
71
- "loss": 3.0613,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2,
76
- "grad_norm": 6.615925312042236,
77
- "learning_rate": 1e-05,
78
- "loss": 2.8859,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.22,
83
- "grad_norm": 5.712332725524902,
84
- "learning_rate": 1.1041666666666666e-05,
85
- "loss": 2.6746,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.24,
90
- "grad_norm": 4.229877471923828,
91
- "learning_rate": 1.2083333333333333e-05,
92
- "loss": 2.4948,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.26,
97
- "grad_norm": 3.9951322078704834,
98
- "learning_rate": 1.3125e-05,
99
- "loss": 2.3496,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.28,
104
- "grad_norm": 4.010512351989746,
105
- "learning_rate": 1.4166666666666666e-05,
106
- "loss": 2.2345,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.3,
111
- "grad_norm": 3.2869503498077393,
112
- "learning_rate": 1.5208333333333335e-05,
113
- "loss": 2.0418,
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.32,
118
- "grad_norm": 3.47694993019104,
119
- "learning_rate": 1.6250000000000002e-05,
120
- "loss": 1.8212,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.34,
125
- "grad_norm": 2.761810779571533,
126
- "learning_rate": 1.7291666666666666e-05,
127
- "loss": 1.7471,
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.36,
132
- "grad_norm": 2.83661150932312,
133
- "learning_rate": 1.8333333333333333e-05,
134
- "loss": 1.6647,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.38,
139
- "grad_norm": 2.7371621131896973,
140
- "learning_rate": 1.9375e-05,
141
- "loss": 1.5239,
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.4,
146
- "grad_norm": 2.5980722904205322,
147
- "learning_rate": 2.0416666666666667e-05,
148
- "loss": 1.3501,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.42,
153
- "grad_norm": 2.8566689491271973,
154
- "learning_rate": 2.1458333333333334e-05,
155
- "loss": 1.3153,
156
  "step": 105
157
  },
158
  {
159
  "epoch": 0.44,
160
- "grad_norm": 2.052793264389038,
161
- "learning_rate": 2.2499999999999998e-05,
162
- "loss": 1.175,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.46,
167
- "grad_norm": 2.08168363571167,
168
- "learning_rate": 2.3541666666666665e-05,
169
- "loss": 1.0395,
170
  "step": 115
171
  },
172
  {
173
  "epoch": 0.48,
174
- "grad_norm": 1.830390453338623,
175
- "learning_rate": 2.4583333333333332e-05,
176
- "loss": 0.9517,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.5,
181
- "grad_norm": 2.9608633518218994,
182
- "learning_rate": 2.5625e-05,
183
- "loss": 0.8602,
184
  "step": 125
185
  },
186
  {
187
  "epoch": 0.52,
188
- "grad_norm": 2.2221925258636475,
189
- "learning_rate": 2.666666666666667e-05,
190
- "loss": 0.7687,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.54,
195
- "grad_norm": 2.185246706008911,
196
- "learning_rate": 2.7708333333333334e-05,
197
- "loss": 0.6957,
198
  "step": 135
199
  },
200
  {
201
  "epoch": 0.56,
202
- "grad_norm": 2.3892769813537598,
203
- "learning_rate": 2.875e-05,
204
- "loss": 0.6452,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.58,
209
- "grad_norm": 2.2923471927642822,
210
- "learning_rate": 2.9791666666666668e-05,
211
- "loss": 0.6001,
212
  "step": 145
213
  },
214
  {
215
  "epoch": 0.6,
216
- "grad_norm": 3.059990882873535,
217
- "learning_rate": 3.0833333333333335e-05,
218
- "loss": 0.5505,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.62,
223
- "grad_norm": 2.1722524166107178,
224
- "learning_rate": 3.1875e-05,
225
- "loss": 0.513,
226
  "step": 155
227
  },
228
  {
229
  "epoch": 0.64,
230
- "grad_norm": 2.4212610721588135,
231
- "learning_rate": 3.291666666666666e-05,
232
- "loss": 0.4868,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.66,
237
- "grad_norm": 2.297727584838867,
238
- "learning_rate": 3.3958333333333337e-05,
239
- "loss": 0.4696,
240
  "step": 165
241
  },
242
  {
243
  "epoch": 0.68,
244
- "grad_norm": 1.7377690076828003,
245
- "learning_rate": 3.5000000000000004e-05,
246
- "loss": 0.4174,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.7,
251
- "grad_norm": 1.821341872215271,
252
- "learning_rate": 3.6041666666666664e-05,
253
- "loss": 0.4204,
254
  "step": 175
255
  },
256
  {
257
  "epoch": 0.72,
258
- "grad_norm": 2.0993902683258057,
259
- "learning_rate": 3.708333333333334e-05,
260
- "loss": 0.3846,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.74,
265
- "grad_norm": 2.5224227905273438,
266
- "learning_rate": 3.8125e-05,
267
- "loss": 0.3499,
268
  "step": 185
269
  },
270
  {
271
  "epoch": 0.76,
272
- "grad_norm": 1.8540211915969849,
273
- "learning_rate": 3.916666666666667e-05,
274
- "loss": 0.3414,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.78,
279
- "grad_norm": 1.9813562631607056,
280
- "learning_rate": 4.020833333333333e-05,
281
- "loss": 0.3274,
282
  "step": 195
283
  },
284
  {
285
  "epoch": 0.8,
286
- "grad_norm": 1.385871171951294,
287
- "learning_rate": 4.125e-05,
288
- "loss": 0.2907,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.82,
293
- "grad_norm": 2.0511081218719482,
294
- "learning_rate": 4.229166666666667e-05,
295
- "loss": 0.2735,
296
  "step": 205
297
  },
298
  {
299
  "epoch": 0.84,
300
- "grad_norm": 2.0850329399108887,
301
- "learning_rate": 4.3333333333333334e-05,
302
- "loss": 0.2384,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.86,
307
- "grad_norm": 2.191450595855713,
308
- "learning_rate": 4.4375e-05,
309
- "loss": 0.2244,
310
  "step": 215
311
  },
312
  {
313
  "epoch": 0.88,
314
- "grad_norm": 3.4809000492095947,
315
- "learning_rate": 4.541666666666667e-05,
316
- "loss": 0.223,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.9,
321
- "grad_norm": 1.4290976524353027,
322
- "learning_rate": 4.645833333333333e-05,
323
- "loss": 0.194,
324
  "step": 225
325
  },
326
  {
327
  "epoch": 0.92,
328
- "grad_norm": 1.8528721332550049,
329
- "learning_rate": 4.75e-05,
330
- "loss": 0.1817,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.94,
335
- "grad_norm": 1.4630467891693115,
336
- "learning_rate": 4.854166666666666e-05,
337
- "loss": 0.1728,
338
  "step": 235
339
  },
340
  {
341
  "epoch": 0.96,
342
- "grad_norm": 1.6305458545684814,
343
- "learning_rate": 4.958333333333334e-05,
344
- "loss": 0.1859,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.98,
349
- "grad_norm": 1.455244779586792,
350
- "learning_rate": 5.0625000000000004e-05,
351
- "loss": 0.1531,
352
  "step": 245
353
  },
354
  {
355
  "epoch": 1.0,
356
- "grad_norm": 1.8049136400222778,
357
- "learning_rate": 5.1666666666666664e-05,
358
- "loss": 0.1412,
 
 
 
 
 
 
 
 
 
359
  "step": 250
360
  },
361
  {
362
  "epoch": 1.02,
363
- "grad_norm": 1.577805519104004,
364
- "learning_rate": 5.270833333333334e-05,
365
- "loss": 0.1384,
366
  "step": 255
367
  },
368
  {
369
  "epoch": 1.04,
370
- "grad_norm": 2.020803213119507,
371
- "learning_rate": 5.375e-05,
372
- "loss": 0.1398,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 1.06,
377
- "grad_norm": 1.4404263496398926,
378
- "learning_rate": 5.479166666666667e-05,
379
- "loss": 0.1484,
380
  "step": 265
381
  },
382
  {
383
  "epoch": 1.08,
384
- "grad_norm": 2.497192621231079,
385
- "learning_rate": 5.583333333333333e-05,
386
- "loss": 0.1353,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 1.1,
391
- "grad_norm": 3.2407219409942627,
392
- "learning_rate": 5.6875e-05,
393
- "loss": 0.1433,
394
  "step": 275
395
  },
396
  {
397
  "epoch": 1.12,
398
- "grad_norm": 1.4051156044006348,
399
- "learning_rate": 5.791666666666667e-05,
400
- "loss": 0.1181,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 1.1400000000000001,
405
- "grad_norm": 1.4117530584335327,
406
- "learning_rate": 5.8958333333333334e-05,
407
- "loss": 0.1252,
408
  "step": 285
409
  },
410
  {
411
  "epoch": 1.16,
412
- "grad_norm": 1.6360172033309937,
413
- "learning_rate": 6e-05,
414
- "loss": 0.1233,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 1.18,
419
- "grad_norm": 1.225799560546875,
420
- "learning_rate": 6.104166666666667e-05,
421
- "loss": 0.1084,
422
  "step": 295
423
  },
424
  {
425
  "epoch": 1.2,
426
- "grad_norm": 1.4874345064163208,
427
- "learning_rate": 6.208333333333333e-05,
428
- "loss": 0.125,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 1.22,
433
- "grad_norm": 1.3238331079483032,
434
  "learning_rate": 6.25e-05,
435
- "loss": 0.1132,
436
  "step": 305
437
  },
438
  {
439
  "epoch": 1.24,
440
- "grad_norm": 2.354384183883667,
441
  "learning_rate": 6.25e-05,
442
- "loss": 0.0993,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 1.26,
447
- "grad_norm": 2.2216718196868896,
448
  "learning_rate": 6.25e-05,
449
- "loss": 0.1325,
450
  "step": 315
451
  },
452
  {
453
  "epoch": 1.28,
454
- "grad_norm": 1.026408076286316,
455
  "learning_rate": 6.25e-05,
456
- "loss": 0.1035,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 1.3,
461
- "grad_norm": 2.0583767890930176,
462
  "learning_rate": 6.25e-05,
463
- "loss": 0.1208,
464
  "step": 325
465
  },
466
  {
467
  "epoch": 1.32,
468
- "grad_norm": 1.9303004741668701,
469
  "learning_rate": 6.25e-05,
470
- "loss": 0.1119,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 1.34,
475
- "grad_norm": 1.7043157815933228,
476
  "learning_rate": 6.25e-05,
477
- "loss": 0.1023,
478
  "step": 335
479
  },
480
  {
481
  "epoch": 1.3599999999999999,
482
- "grad_norm": 1.3245861530303955,
483
  "learning_rate": 6.25e-05,
484
- "loss": 0.1008,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 1.38,
489
- "grad_norm": 1.541318655014038,
490
  "learning_rate": 6.25e-05,
491
- "loss": 0.0977,
492
  "step": 345
493
  },
494
  {
495
  "epoch": 1.4,
496
- "grad_norm": 1.8400285243988037,
497
  "learning_rate": 6.25e-05,
498
- "loss": 0.0887,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 1.42,
503
- "grad_norm": 1.0839234590530396,
504
  "learning_rate": 6.25e-05,
505
- "loss": 0.0912,
506
  "step": 355
507
  },
508
  {
509
  "epoch": 1.44,
510
- "grad_norm": 1.269062876701355,
511
  "learning_rate": 6.25e-05,
512
- "loss": 0.0959,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 1.46,
517
- "grad_norm": 1.0546581745147705,
518
  "learning_rate": 6.25e-05,
519
- "loss": 0.0875,
520
  "step": 365
521
  },
522
  {
523
  "epoch": 1.48,
524
- "grad_norm": 1.683465838432312,
525
  "learning_rate": 6.25e-05,
526
- "loss": 0.0933,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 1.5,
531
- "grad_norm": 1.470189094543457,
532
  "learning_rate": 6.25e-05,
533
- "loss": 0.0986,
534
  "step": 375
535
  },
536
  {
537
  "epoch": 1.52,
538
- "grad_norm": 1.183585524559021,
539
  "learning_rate": 6.25e-05,
540
- "loss": 0.0979,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 1.54,
545
- "grad_norm": 1.3886022567749023,
546
  "learning_rate": 6.25e-05,
547
- "loss": 0.1031,
548
  "step": 385
549
  },
550
  {
551
  "epoch": 1.56,
552
- "grad_norm": 1.105749487876892,
553
  "learning_rate": 6.25e-05,
554
- "loss": 0.0995,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 1.58,
559
- "grad_norm": 1.0494953393936157,
560
  "learning_rate": 6.25e-05,
561
- "loss": 0.0755,
562
  "step": 395
563
  },
564
  {
565
  "epoch": 1.6,
566
- "grad_norm": 1.7028089761734009,
567
  "learning_rate": 6.25e-05,
568
- "loss": 0.0981,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 1.62,
573
- "grad_norm": 1.5404858589172363,
574
  "learning_rate": 6.25e-05,
575
- "loss": 0.0917,
576
  "step": 405
577
  },
578
  {
579
  "epoch": 1.6400000000000001,
580
- "grad_norm": 1.6659576892852783,
581
  "learning_rate": 6.25e-05,
582
- "loss": 0.0891,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 1.6600000000000001,
587
- "grad_norm": 1.353579044342041,
588
  "learning_rate": 6.25e-05,
589
- "loss": 0.0889,
590
  "step": 415
591
  },
592
  {
593
  "epoch": 1.6800000000000002,
594
- "grad_norm": 2.1539247035980225,
595
  "learning_rate": 6.25e-05,
596
- "loss": 0.1092,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 1.7,
601
- "grad_norm": 1.4106309413909912,
602
  "learning_rate": 6.25e-05,
603
- "loss": 0.0951,
604
  "step": 425
605
  },
606
  {
607
  "epoch": 1.72,
608
- "grad_norm": 1.1167716979980469,
609
  "learning_rate": 6.25e-05,
610
- "loss": 0.0899,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 1.74,
615
- "grad_norm": 1.2029541730880737,
616
  "learning_rate": 6.25e-05,
617
- "loss": 0.0902,
618
  "step": 435
619
  },
620
  {
621
  "epoch": 1.76,
622
- "grad_norm": 1.0979869365692139,
623
  "learning_rate": 6.25e-05,
624
- "loss": 0.089,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 1.78,
629
- "grad_norm": 1.1568419933319092,
630
  "learning_rate": 6.25e-05,
631
- "loss": 0.0859,
632
  "step": 445
633
  },
634
  {
635
  "epoch": 1.8,
636
- "grad_norm": 1.2472410202026367,
637
  "learning_rate": 6.25e-05,
638
- "loss": 0.0877,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 1.8199999999999998,
643
- "grad_norm": 1.2323497533798218,
644
  "learning_rate": 6.25e-05,
645
- "loss": 0.0865,
646
  "step": 455
647
  },
648
  {
649
  "epoch": 1.8399999999999999,
650
- "grad_norm": 1.2814995050430298,
651
  "learning_rate": 6.25e-05,
652
- "loss": 0.0831,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 1.8599999999999999,
657
- "grad_norm": 0.912714421749115,
658
  "learning_rate": 6.25e-05,
659
- "loss": 0.0813,
660
  "step": 465
661
  },
662
  {
663
  "epoch": 1.88,
664
- "grad_norm": 1.2273714542388916,
665
  "learning_rate": 6.25e-05,
666
- "loss": 0.0884,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.9,
671
- "grad_norm": 1.5928541421890259,
672
  "learning_rate": 6.25e-05,
673
- "loss": 0.0852,
674
  "step": 475
675
  },
676
  {
677
  "epoch": 1.92,
678
- "grad_norm": 0.9074931740760803,
679
  "learning_rate": 6.25e-05,
680
- "loss": 0.0792,
681
  "step": 480
682
  },
683
  {
684
  "epoch": 1.94,
685
- "grad_norm": 0.9795681834220886,
686
  "learning_rate": 6.25e-05,
687
- "loss": 0.0781,
688
  "step": 485
689
  },
690
  {
691
  "epoch": 1.96,
692
- "grad_norm": 1.4303114414215088,
693
  "learning_rate": 6.25e-05,
694
- "loss": 0.0757,
695
  "step": 490
696
  },
697
  {
698
  "epoch": 1.98,
699
- "grad_norm": 1.0313260555267334,
700
  "learning_rate": 6.25e-05,
701
- "loss": 0.0881,
702
  "step": 495
703
  },
704
  {
705
  "epoch": 2.0,
706
- "grad_norm": 1.1294418573379517,
707
  "learning_rate": 6.25e-05,
708
- "loss": 0.0785,
709
  "step": 500
710
  },
711
  {
712
  "epoch": 2.0,
713
- "eval_cer": 0.05613677854233956,
714
- "eval_loss": 0.08199143409729004,
715
- "eval_runtime": 495.5562,
716
- "eval_samples_per_second": 2.018,
717
- "eval_steps_per_second": 0.504,
718
  "step": 500
719
  },
720
  {
721
  "epoch": 2.02,
722
- "grad_norm": 1.0397248268127441,
723
  "learning_rate": 6.25e-05,
724
- "loss": 0.0591,
725
  "step": 505
726
  },
727
  {
728
  "epoch": 2.04,
729
- "grad_norm": 0.8539375066757202,
730
  "learning_rate": 6.25e-05,
731
- "loss": 0.0664,
732
  "step": 510
733
  },
734
  {
735
  "epoch": 2.06,
736
- "grad_norm": 1.3555073738098145,
737
  "learning_rate": 6.25e-05,
738
- "loss": 0.0772,
739
  "step": 515
740
  },
741
  {
742
  "epoch": 2.08,
743
- "grad_norm": 1.6025832891464233,
744
  "learning_rate": 6.25e-05,
745
- "loss": 0.0737,
746
  "step": 520
747
  },
748
  {
749
  "epoch": 2.1,
750
- "grad_norm": 1.3090434074401855,
751
  "learning_rate": 6.25e-05,
752
- "loss": 0.0619,
753
  "step": 525
754
  },
755
  {
756
  "epoch": 2.12,
757
- "grad_norm": 0.9269134998321533,
758
  "learning_rate": 6.25e-05,
759
- "loss": 0.0679,
760
  "step": 530
761
  },
762
  {
763
  "epoch": 2.14,
764
- "grad_norm": 0.8540180325508118,
765
  "learning_rate": 6.25e-05,
766
- "loss": 0.0718,
767
  "step": 535
768
  },
769
  {
770
  "epoch": 2.16,
771
- "grad_norm": 0.9320145845413208,
772
  "learning_rate": 6.25e-05,
773
- "loss": 0.0769,
774
  "step": 540
775
  },
776
  {
777
  "epoch": 2.18,
778
- "grad_norm": 1.6879560947418213,
779
  "learning_rate": 6.25e-05,
780
- "loss": 0.0765,
781
  "step": 545
782
  },
783
  {
784
  "epoch": 2.2,
785
- "grad_norm": 1.3687632083892822,
786
  "learning_rate": 6.25e-05,
787
- "loss": 0.0769,
788
  "step": 550
789
  },
790
  {
791
  "epoch": 2.22,
792
- "grad_norm": 1.2760627269744873,
793
  "learning_rate": 6.25e-05,
794
- "loss": 0.073,
795
  "step": 555
796
  },
797
  {
798
  "epoch": 2.24,
799
- "grad_norm": 0.8481590747833252,
800
  "learning_rate": 6.25e-05,
801
- "loss": 0.0669,
802
  "step": 560
803
  },
804
  {
805
  "epoch": 2.26,
806
- "grad_norm": 1.3147085905075073,
807
  "learning_rate": 6.25e-05,
808
- "loss": 0.0626,
809
  "step": 565
810
  },
811
  {
812
  "epoch": 2.2800000000000002,
813
- "grad_norm": 1.1458756923675537,
814
  "learning_rate": 6.25e-05,
815
- "loss": 0.0665,
816
  "step": 570
817
  },
818
  {
819
  "epoch": 2.3,
820
- "grad_norm": 1.0779470205307007,
821
  "learning_rate": 6.25e-05,
822
- "loss": 0.0783,
823
  "step": 575
824
  },
825
  {
826
  "epoch": 2.32,
827
- "grad_norm": 1.0333281755447388,
828
  "learning_rate": 6.25e-05,
829
- "loss": 0.0605,
830
  "step": 580
831
  },
832
  {
833
  "epoch": 2.34,
834
- "grad_norm": 1.1638994216918945,
835
  "learning_rate": 6.25e-05,
836
- "loss": 0.0644,
837
  "step": 585
838
  },
839
  {
840
  "epoch": 2.36,
841
- "grad_norm": 0.9482213258743286,
842
  "learning_rate": 6.25e-05,
843
- "loss": 0.0689,
844
  "step": 590
845
  },
846
  {
847
  "epoch": 2.38,
848
- "grad_norm": 0.9697607755661011,
849
  "learning_rate": 6.25e-05,
850
- "loss": 0.0628,
851
  "step": 595
852
  },
853
  {
854
  "epoch": 2.4,
855
- "grad_norm": 1.0675064325332642,
856
  "learning_rate": 6.25e-05,
857
- "loss": 0.0644,
858
  "step": 600
859
  },
860
  {
861
  "epoch": 2.42,
862
- "grad_norm": 1.0749262571334839,
863
  "learning_rate": 6.25e-05,
864
- "loss": 0.0572,
865
  "step": 605
866
  },
867
  {
868
  "epoch": 2.44,
869
- "grad_norm": 1.2545385360717773,
870
  "learning_rate": 6.25e-05,
871
- "loss": 0.0601,
872
  "step": 610
873
  },
874
  {
875
  "epoch": 2.46,
876
- "grad_norm": 1.1135083436965942,
877
  "learning_rate": 6.25e-05,
878
- "loss": 0.0607,
879
  "step": 615
880
  },
881
  {
882
  "epoch": 2.48,
883
- "grad_norm": 0.821413516998291,
884
  "learning_rate": 6.25e-05,
885
- "loss": 0.0622,
886
  "step": 620
887
  },
888
  {
889
  "epoch": 2.5,
890
- "grad_norm": 0.8959715366363525,
891
  "learning_rate": 6.25e-05,
892
- "loss": 0.073,
893
  "step": 625
894
  },
895
  {
896
  "epoch": 2.52,
897
- "grad_norm": 0.6712917685508728,
898
  "learning_rate": 6.25e-05,
899
- "loss": 0.0649,
900
  "step": 630
901
  },
902
  {
903
  "epoch": 2.54,
904
- "grad_norm": 0.6646750569343567,
905
  "learning_rate": 6.25e-05,
906
- "loss": 0.07,
907
  "step": 635
908
  },
909
  {
910
  "epoch": 2.56,
911
- "grad_norm": 1.0652884244918823,
912
  "learning_rate": 6.25e-05,
913
- "loss": 0.0657,
914
  "step": 640
915
  },
916
  {
917
  "epoch": 2.58,
918
- "grad_norm": 1.035218596458435,
919
  "learning_rate": 6.25e-05,
920
- "loss": 0.0789,
921
  "step": 645
922
  },
923
  {
924
  "epoch": 2.6,
925
- "grad_norm": 0.860249936580658,
926
  "learning_rate": 6.25e-05,
927
- "loss": 0.0589,
928
  "step": 650
929
  },
930
  {
931
  "epoch": 2.62,
932
- "grad_norm": 0.7494838237762451,
933
  "learning_rate": 6.25e-05,
934
- "loss": 0.077,
935
  "step": 655
936
  },
937
  {
938
  "epoch": 2.64,
939
- "grad_norm": 1.524198055267334,
940
  "learning_rate": 6.25e-05,
941
- "loss": 0.0727,
942
  "step": 660
943
  },
944
  {
945
  "epoch": 2.66,
946
- "grad_norm": 0.9438517689704895,
947
  "learning_rate": 6.25e-05,
948
- "loss": 0.0641,
949
  "step": 665
950
  },
951
  {
952
  "epoch": 2.68,
953
- "grad_norm": 1.0982081890106201,
954
  "learning_rate": 6.25e-05,
955
- "loss": 0.0647,
956
  "step": 670
957
  },
958
  {
959
  "epoch": 2.7,
960
- "grad_norm": 0.7919325232505798,
961
  "learning_rate": 6.25e-05,
962
- "loss": 0.0689,
963
  "step": 675
964
  },
965
  {
966
  "epoch": 2.7199999999999998,
967
- "grad_norm": 0.9766571521759033,
968
  "learning_rate": 6.25e-05,
969
- "loss": 0.0671,
970
  "step": 680
971
  },
972
  {
973
  "epoch": 2.74,
974
- "grad_norm": 1.0895709991455078,
975
  "learning_rate": 6.25e-05,
976
- "loss": 0.0681,
977
  "step": 685
978
  },
979
  {
980
  "epoch": 2.76,
981
- "grad_norm": 1.1461646556854248,
982
  "learning_rate": 6.25e-05,
983
- "loss": 0.0729,
984
  "step": 690
985
  },
986
  {
987
  "epoch": 2.7800000000000002,
988
- "grad_norm": 0.7813361883163452,
989
  "learning_rate": 6.25e-05,
990
- "loss": 0.0651,
991
  "step": 695
992
  },
993
  {
994
  "epoch": 2.8,
995
- "grad_norm": 0.8545769453048706,
996
  "learning_rate": 6.25e-05,
997
- "loss": 0.064,
998
  "step": 700
999
  },
1000
  {
1001
  "epoch": 2.82,
1002
- "grad_norm": 0.8444038033485413,
1003
  "learning_rate": 6.25e-05,
1004
- "loss": 0.0626,
1005
  "step": 705
1006
  },
1007
  {
1008
  "epoch": 2.84,
1009
- "grad_norm": 1.022660732269287,
1010
  "learning_rate": 6.25e-05,
1011
- "loss": 0.0644,
1012
  "step": 710
1013
  },
1014
  {
1015
  "epoch": 2.86,
1016
- "grad_norm": 0.7192943096160889,
1017
  "learning_rate": 6.25e-05,
1018
- "loss": 0.0605,
1019
  "step": 715
1020
  },
1021
  {
1022
  "epoch": 2.88,
1023
- "grad_norm": 0.8225955367088318,
1024
  "learning_rate": 6.25e-05,
1025
- "loss": 0.0657,
1026
  "step": 720
1027
  },
1028
  {
1029
  "epoch": 2.9,
1030
- "grad_norm": 0.8145541548728943,
1031
  "learning_rate": 6.25e-05,
1032
- "loss": 0.058,
1033
  "step": 725
1034
  },
1035
  {
1036
  "epoch": 2.92,
1037
- "grad_norm": 0.8709245920181274,
1038
  "learning_rate": 6.25e-05,
1039
- "loss": 0.0568,
1040
  "step": 730
1041
  },
1042
  {
1043
  "epoch": 2.94,
1044
- "grad_norm": 1.0601686239242554,
1045
  "learning_rate": 6.25e-05,
1046
- "loss": 0.0608,
1047
  "step": 735
1048
  },
1049
  {
1050
  "epoch": 2.96,
1051
- "grad_norm": 1.0230211019515991,
1052
  "learning_rate": 6.25e-05,
1053
- "loss": 0.0666,
1054
  "step": 740
1055
  },
1056
  {
1057
  "epoch": 2.98,
1058
- "grad_norm": 0.9883492588996887,
1059
  "learning_rate": 6.25e-05,
1060
- "loss": 0.062,
1061
  "step": 745
1062
  },
1063
  {
1064
  "epoch": 3.0,
1065
- "grad_norm": 1.0670173168182373,
1066
  "learning_rate": 6.25e-05,
1067
- "loss": 0.0724,
 
 
 
 
 
 
 
 
 
1068
  "step": 750
1069
  },
1070
  {
1071
  "epoch": 3.02,
1072
- "grad_norm": 1.0427318811416626,
1073
  "learning_rate": 6.25e-05,
1074
- "loss": 0.0477,
1075
  "step": 755
1076
  },
1077
  {
1078
  "epoch": 3.04,
1079
- "grad_norm": 1.355022668838501,
1080
  "learning_rate": 6.25e-05,
1081
- "loss": 0.0455,
1082
  "step": 760
1083
  },
1084
  {
1085
  "epoch": 3.06,
1086
- "grad_norm": 1.001657247543335,
1087
  "learning_rate": 6.25e-05,
1088
- "loss": 0.0605,
1089
  "step": 765
1090
  },
1091
  {
1092
  "epoch": 3.08,
1093
- "grad_norm": 1.4077788591384888,
1094
  "learning_rate": 6.25e-05,
1095
- "loss": 0.0463,
1096
  "step": 770
1097
  },
1098
  {
1099
  "epoch": 3.1,
1100
- "grad_norm": 1.3163388967514038,
1101
  "learning_rate": 6.25e-05,
1102
- "loss": 0.0581,
1103
  "step": 775
1104
  },
1105
  {
1106
  "epoch": 3.12,
1107
- "grad_norm": 0.6931395530700684,
1108
  "learning_rate": 6.25e-05,
1109
- "loss": 0.0531,
1110
  "step": 780
1111
  },
1112
  {
1113
  "epoch": 3.14,
1114
- "grad_norm": 0.648444652557373,
1115
  "learning_rate": 6.25e-05,
1116
- "loss": 0.0517,
1117
  "step": 785
1118
  },
1119
  {
1120
  "epoch": 3.16,
1121
- "grad_norm": 0.8961315751075745,
1122
  "learning_rate": 6.25e-05,
1123
- "loss": 0.0551,
1124
  "step": 790
1125
  },
1126
  {
1127
  "epoch": 3.18,
1128
- "grad_norm": 0.8918541669845581,
1129
  "learning_rate": 6.25e-05,
1130
- "loss": 0.0573,
1131
  "step": 795
1132
  },
1133
  {
1134
  "epoch": 3.2,
1135
- "grad_norm": 0.638659656047821,
1136
  "learning_rate": 6.25e-05,
1137
- "loss": 0.0544,
1138
  "step": 800
1139
  },
1140
  {
1141
  "epoch": 3.22,
1142
- "grad_norm": 0.6866273880004883,
1143
  "learning_rate": 6.25e-05,
1144
- "loss": 0.052,
1145
  "step": 805
1146
  },
1147
  {
1148
  "epoch": 3.24,
1149
- "grad_norm": 1.7238422632217407,
1150
  "learning_rate": 6.25e-05,
1151
- "loss": 0.0555,
1152
  "step": 810
1153
  },
1154
  {
1155
  "epoch": 3.26,
1156
- "grad_norm": 0.958077073097229,
1157
  "learning_rate": 6.25e-05,
1158
- "loss": 0.0606,
1159
  "step": 815
1160
  },
1161
  {
1162
  "epoch": 3.2800000000000002,
1163
- "grad_norm": 0.8000004887580872,
1164
  "learning_rate": 6.25e-05,
1165
- "loss": 0.05,
1166
  "step": 820
1167
  },
1168
  {
1169
  "epoch": 3.3,
1170
- "grad_norm": 0.7521831393241882,
1171
  "learning_rate": 6.25e-05,
1172
- "loss": 0.0486,
1173
  "step": 825
1174
  },
1175
  {
1176
  "epoch": 3.32,
1177
- "grad_norm": 0.8134447336196899,
1178
  "learning_rate": 6.25e-05,
1179
- "loss": 0.0557,
1180
  "step": 830
1181
  },
1182
  {
1183
  "epoch": 3.34,
1184
- "grad_norm": 1.1599044799804688,
1185
  "learning_rate": 6.25e-05,
1186
- "loss": 0.0596,
1187
  "step": 835
1188
  },
1189
  {
1190
  "epoch": 3.36,
1191
- "grad_norm": 0.5582810044288635,
1192
  "learning_rate": 6.25e-05,
1193
- "loss": 0.05,
1194
  "step": 840
1195
  },
1196
  {
1197
  "epoch": 3.38,
1198
- "grad_norm": 0.6436423659324646,
1199
  "learning_rate": 6.25e-05,
1200
- "loss": 0.048,
1201
  "step": 845
1202
  },
1203
  {
1204
  "epoch": 3.4,
1205
- "grad_norm": 1.0337690114974976,
1206
  "learning_rate": 6.25e-05,
1207
- "loss": 0.0512,
1208
  "step": 850
1209
  },
1210
  {
1211
  "epoch": 3.42,
1212
- "grad_norm": 1.2385281324386597,
1213
  "learning_rate": 6.25e-05,
1214
- "loss": 0.0541,
1215
  "step": 855
1216
  },
1217
  {
1218
  "epoch": 3.44,
1219
- "grad_norm": 1.335816740989685,
1220
  "learning_rate": 6.25e-05,
1221
- "loss": 0.054,
1222
  "step": 860
1223
  },
1224
  {
1225
  "epoch": 3.46,
1226
- "grad_norm": 0.8935145139694214,
1227
  "learning_rate": 6.25e-05,
1228
- "loss": 0.0529,
1229
  "step": 865
1230
  },
1231
  {
1232
  "epoch": 3.48,
1233
- "grad_norm": 0.897282600402832,
1234
  "learning_rate": 6.25e-05,
1235
- "loss": 0.0569,
1236
  "step": 870
1237
  },
1238
  {
1239
  "epoch": 3.5,
1240
- "grad_norm": 0.5967718362808228,
1241
  "learning_rate": 6.25e-05,
1242
- "loss": 0.0554,
1243
  "step": 875
1244
  },
1245
  {
1246
  "epoch": 3.52,
1247
- "grad_norm": 0.6769823431968689,
1248
  "learning_rate": 6.25e-05,
1249
- "loss": 0.0541,
1250
  "step": 880
1251
  },
1252
  {
1253
  "epoch": 3.54,
1254
- "grad_norm": 0.6052355170249939,
1255
  "learning_rate": 6.25e-05,
1256
- "loss": 0.0453,
1257
  "step": 885
1258
  },
1259
  {
1260
  "epoch": 3.56,
1261
- "grad_norm": 0.6003367900848389,
1262
  "learning_rate": 6.25e-05,
1263
- "loss": 0.0488,
1264
  "step": 890
1265
  },
1266
  {
1267
  "epoch": 3.58,
1268
- "grad_norm": 1.1685441732406616,
1269
  "learning_rate": 6.25e-05,
1270
- "loss": 0.0518,
1271
  "step": 895
1272
  },
1273
  {
1274
  "epoch": 3.6,
1275
- "grad_norm": 1.1867949962615967,
1276
  "learning_rate": 6.25e-05,
1277
- "loss": 0.0581,
1278
  "step": 900
1279
  },
1280
  {
1281
  "epoch": 3.62,
1282
- "grad_norm": 0.8192417025566101,
1283
  "learning_rate": 6.25e-05,
1284
- "loss": 0.0564,
1285
  "step": 905
1286
  },
1287
  {
1288
  "epoch": 3.64,
1289
- "grad_norm": 0.9395178556442261,
1290
  "learning_rate": 6.25e-05,
1291
- "loss": 0.057,
1292
  "step": 910
1293
  },
1294
  {
1295
  "epoch": 3.66,
1296
- "grad_norm": 0.8012380003929138,
1297
  "learning_rate": 6.25e-05,
1298
- "loss": 0.0492,
1299
  "step": 915
1300
  },
1301
  {
1302
  "epoch": 3.68,
1303
- "grad_norm": 0.6032869815826416,
1304
  "learning_rate": 6.25e-05,
1305
- "loss": 0.0551,
1306
  "step": 920
1307
  },
1308
  {
1309
  "epoch": 3.7,
1310
- "grad_norm": 0.6057426333427429,
1311
  "learning_rate": 6.25e-05,
1312
- "loss": 0.0648,
1313
  "step": 925
1314
  },
1315
  {
1316
  "epoch": 3.7199999999999998,
1317
- "grad_norm": 0.752521276473999,
1318
  "learning_rate": 6.25e-05,
1319
- "loss": 0.0519,
1320
  "step": 930
1321
  },
1322
  {
1323
  "epoch": 3.74,
1324
- "grad_norm": 0.9516021609306335,
1325
  "learning_rate": 6.25e-05,
1326
- "loss": 0.0594,
1327
  "step": 935
1328
  },
1329
  {
1330
  "epoch": 3.76,
1331
- "grad_norm": 1.0422921180725098,
1332
  "learning_rate": 6.25e-05,
1333
- "loss": 0.0547,
1334
  "step": 940
1335
  },
1336
  {
1337
  "epoch": 3.7800000000000002,
1338
- "grad_norm": 0.7628741264343262,
1339
  "learning_rate": 6.25e-05,
1340
- "loss": 0.0575,
1341
  "step": 945
1342
  },
1343
  {
1344
  "epoch": 3.8,
1345
- "grad_norm": 0.6722723841667175,
1346
  "learning_rate": 6.25e-05,
1347
- "loss": 0.0464,
1348
  "step": 950
1349
  },
1350
  {
1351
  "epoch": 3.82,
1352
- "grad_norm": 1.0617127418518066,
1353
  "learning_rate": 6.25e-05,
1354
- "loss": 0.0477,
1355
  "step": 955
1356
  },
1357
  {
1358
  "epoch": 3.84,
1359
- "grad_norm": 0.689552903175354,
1360
  "learning_rate": 6.25e-05,
1361
- "loss": 0.0462,
1362
  "step": 960
1363
  },
1364
  {
1365
  "epoch": 3.86,
1366
- "grad_norm": 0.7258830666542053,
1367
  "learning_rate": 6.25e-05,
1368
- "loss": 0.0543,
1369
  "step": 965
1370
  },
1371
  {
1372
  "epoch": 3.88,
1373
- "grad_norm": 0.9028825759887695,
1374
  "learning_rate": 6.25e-05,
1375
- "loss": 0.0561,
1376
  "step": 970
1377
  },
1378
  {
1379
  "epoch": 3.9,
1380
- "grad_norm": 1.1875150203704834,
1381
  "learning_rate": 6.25e-05,
1382
- "loss": 0.0477,
1383
  "step": 975
1384
  },
1385
  {
1386
  "epoch": 3.92,
1387
- "grad_norm": 1.2121100425720215,
1388
  "learning_rate": 6.25e-05,
1389
- "loss": 0.0541,
1390
  "step": 980
1391
  },
1392
  {
1393
  "epoch": 3.94,
1394
- "grad_norm": 0.7245278358459473,
1395
  "learning_rate": 6.25e-05,
1396
- "loss": 0.0499,
1397
  "step": 985
1398
  },
1399
  {
1400
  "epoch": 3.96,
1401
- "grad_norm": 0.7678513526916504,
1402
  "learning_rate": 6.25e-05,
1403
- "loss": 0.0548,
1404
  "step": 990
1405
  },
1406
  {
1407
  "epoch": 3.98,
1408
- "grad_norm": 0.621068000793457,
1409
  "learning_rate": 6.25e-05,
1410
- "loss": 0.0413,
1411
  "step": 995
1412
  },
1413
  {
1414
  "epoch": 4.0,
1415
- "grad_norm": 0.6947305798530579,
1416
  "learning_rate": 6.25e-05,
1417
- "loss": 0.0576,
1418
  "step": 1000
1419
  },
1420
  {
1421
  "epoch": 4.0,
1422
- "eval_cer": 0.04981744137059402,
1423
- "eval_loss": 0.06959603726863861,
1424
- "eval_runtime": 490.1374,
1425
- "eval_samples_per_second": 2.04,
1426
- "eval_steps_per_second": 0.51,
1427
  "step": 1000
1428
  }
1429
  ],
@@ -1431,7 +1449,7 @@
1431
  "max_steps": 5000,
1432
  "num_input_tokens_seen": 0,
1433
  "num_train_epochs": 20,
1434
- "save_steps": 500,
1435
  "stateful_callbacks": {
1436
  "TrainerControl": {
1437
  "args": {
 
1
  {
2
+ "best_metric": 0.046749928297655986,
3
  "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
  "epoch": 4.0,
5
+ "eval_steps": 250,
6
  "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
+ "grad_norm": 11.320270538330078,
14
+ "learning_rate": 1.25e-06,
15
+ "loss": 3.7364,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.04,
20
+ "grad_norm": 10.51279354095459,
21
+ "learning_rate": 2.8124999999999998e-06,
22
+ "loss": 3.669,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
+ "grad_norm": 10.274462699890137,
28
+ "learning_rate": 4.3750000000000005e-06,
29
+ "loss": 3.6416,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.08,
34
+ "grad_norm": 10.032905578613281,
35
+ "learning_rate": 5.9375e-06,
36
+ "loss": 3.5981,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.1,
41
+ "grad_norm": 9.448946952819824,
42
+ "learning_rate": 7.1875e-06,
43
+ "loss": 3.4937,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.12,
48
+ "grad_norm": 8.844466209411621,
49
+ "learning_rate": 8.750000000000001e-06,
50
+ "loss": 3.3145,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.14,
55
+ "grad_norm": 8.622856140136719,
56
+ "learning_rate": 1.03125e-05,
57
+ "loss": 3.0978,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.16,
62
+ "grad_norm": 6.81293249130249,
63
+ "learning_rate": 1.1875e-05,
64
+ "loss": 2.9583,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.18,
69
+ "grad_norm": 5.21970272064209,
70
+ "learning_rate": 1.34375e-05,
71
+ "loss": 2.7668,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2,
76
+ "grad_norm": 4.440727710723877,
77
+ "learning_rate": 1.5e-05,
78
+ "loss": 2.5467,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.22,
83
+ "grad_norm": 4.219883918762207,
84
+ "learning_rate": 1.6562500000000003e-05,
85
+ "loss": 2.3237,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.24,
90
+ "grad_norm": 3.6006925106048584,
91
+ "learning_rate": 1.8125e-05,
92
+ "loss": 2.1307,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.26,
97
+ "grad_norm": 3.07859206199646,
98
+ "learning_rate": 1.96875e-05,
99
+ "loss": 1.9725,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.28,
104
+ "grad_norm": 3.0515847206115723,
105
+ "learning_rate": 2.125e-05,
106
+ "loss": 1.8568,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.3,
111
+ "grad_norm": 2.7134475708007812,
112
+ "learning_rate": 2.28125e-05,
113
+ "loss": 1.6583,
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.32,
118
+ "grad_norm": 2.7643449306488037,
119
+ "learning_rate": 2.4375000000000003e-05,
120
+ "loss": 1.4433,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.34,
125
+ "grad_norm": 2.427220582962036,
126
+ "learning_rate": 2.59375e-05,
127
+ "loss": 1.3435,
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.36,
132
+ "grad_norm": 2.0288472175598145,
133
+ "learning_rate": 2.75e-05,
134
+ "loss": 1.2446,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.38,
139
+ "grad_norm": 2.7967100143432617,
140
+ "learning_rate": 2.90625e-05,
141
+ "loss": 1.1031,
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.4,
146
+ "grad_norm": 2.407944440841675,
147
+ "learning_rate": 3.0625e-05,
148
+ "loss": 0.9777,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.42,
153
+ "grad_norm": 3.4412190914154053,
154
+ "learning_rate": 3.21875e-05,
155
+ "loss": 0.9294,
156
  "step": 105
157
  },
158
  {
159
  "epoch": 0.44,
160
+ "grad_norm": 2.0439155101776123,
161
+ "learning_rate": 3.375e-05,
162
+ "loss": 0.8108,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.46,
167
+ "grad_norm": 1.616652011871338,
168
+ "learning_rate": 3.53125e-05,
169
+ "loss": 0.6951,
170
  "step": 115
171
  },
172
  {
173
  "epoch": 0.48,
174
+ "grad_norm": 1.690824031829834,
175
+ "learning_rate": 3.6875e-05,
176
+ "loss": 0.6352,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.5,
181
+ "grad_norm": 2.7902116775512695,
182
+ "learning_rate": 3.84375e-05,
183
+ "loss": 0.5562,
184
  "step": 125
185
  },
186
  {
187
  "epoch": 0.52,
188
+ "grad_norm": 2.4872801303863525,
189
+ "learning_rate": 4e-05,
190
+ "loss": 0.5111,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.54,
195
+ "grad_norm": 2.4177122116088867,
196
+ "learning_rate": 4.15625e-05,
197
+ "loss": 0.4696,
198
  "step": 135
199
  },
200
  {
201
  "epoch": 0.56,
202
+ "grad_norm": 2.0231056213378906,
203
+ "learning_rate": 4.3125e-05,
204
+ "loss": 0.4462,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.58,
209
+ "grad_norm": 2.004688024520874,
210
+ "learning_rate": 4.46875e-05,
211
+ "loss": 0.4224,
212
  "step": 145
213
  },
214
  {
215
  "epoch": 0.6,
216
+ "grad_norm": 3.170652389526367,
217
+ "learning_rate": 4.625e-05,
218
+ "loss": 0.3967,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.62,
223
+ "grad_norm": 1.8620476722717285,
224
+ "learning_rate": 4.7812500000000003e-05,
225
+ "loss": 0.3739,
226
  "step": 155
227
  },
228
  {
229
  "epoch": 0.64,
230
+ "grad_norm": 2.4667856693267822,
231
+ "learning_rate": 4.9375e-05,
232
+ "loss": 0.3542,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.66,
237
+ "grad_norm": 1.7935612201690674,
238
+ "learning_rate": 5.09375e-05,
239
+ "loss": 0.3409,
240
  "step": 165
241
  },
242
  {
243
  "epoch": 0.68,
244
+ "grad_norm": 1.6230987310409546,
245
+ "learning_rate": 5.25e-05,
246
+ "loss": 0.3068,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.7,
251
+ "grad_norm": 2.739957094192505,
252
+ "learning_rate": 5.40625e-05,
253
+ "loss": 0.2963,
254
  "step": 175
255
  },
256
  {
257
  "epoch": 0.72,
258
+ "grad_norm": 1.7342944145202637,
259
+ "learning_rate": 5.5625000000000004e-05,
260
+ "loss": 0.253,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.74,
265
+ "grad_norm": 2.0191333293914795,
266
+ "learning_rate": 5.71875e-05,
267
+ "loss": 0.2175,
268
  "step": 185
269
  },
270
  {
271
  "epoch": 0.76,
272
+ "grad_norm": 1.6039254665374756,
273
+ "learning_rate": 5.875e-05,
274
+ "loss": 0.2009,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.78,
279
+ "grad_norm": 2.2860054969787598,
280
+ "learning_rate": 6.03125e-05,
281
+ "loss": 0.1774,
282
  "step": 195
283
  },
284
  {
285
  "epoch": 0.8,
286
+ "grad_norm": 1.528680443763733,
287
+ "learning_rate": 6.1875e-05,
288
+ "loss": 0.1603,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.82,
293
+ "grad_norm": 1.526693344116211,
294
+ "learning_rate": 6.25e-05,
295
+ "loss": 0.1504,
296
  "step": 205
297
  },
298
  {
299
  "epoch": 0.84,
300
+ "grad_norm": 2.199506998062134,
301
+ "learning_rate": 6.25e-05,
302
+ "loss": 0.1357,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.86,
307
+ "grad_norm": 2.170020341873169,
308
+ "learning_rate": 6.25e-05,
309
+ "loss": 0.1519,
310
  "step": 215
311
  },
312
  {
313
  "epoch": 0.88,
314
+ "grad_norm": 1.5418131351470947,
315
+ "learning_rate": 6.25e-05,
316
+ "loss": 0.1524,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.9,
321
+ "grad_norm": 2.1583192348480225,
322
+ "learning_rate": 6.25e-05,
323
+ "loss": 0.1264,
324
  "step": 225
325
  },
326
  {
327
  "epoch": 0.92,
328
+ "grad_norm": 1.908937692642212,
329
+ "learning_rate": 6.25e-05,
330
+ "loss": 0.1221,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.94,
335
+ "grad_norm": 1.4072145223617554,
336
+ "learning_rate": 6.25e-05,
337
+ "loss": 0.1254,
338
  "step": 235
339
  },
340
  {
341
  "epoch": 0.96,
342
+ "grad_norm": 1.3102571964263916,
343
+ "learning_rate": 6.25e-05,
344
+ "loss": 0.1412,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.98,
349
+ "grad_norm": 1.4941678047180176,
350
+ "learning_rate": 6.25e-05,
351
+ "loss": 0.1203,
352
  "step": 245
353
  },
354
  {
355
  "epoch": 1.0,
356
+ "grad_norm": 1.8279727697372437,
357
+ "learning_rate": 6.25e-05,
358
+ "loss": 0.1107,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "eval_cer": 0.07099835736448257,
364
+ "eval_loss": 0.11958163231611252,
365
+ "eval_runtime": 280.2233,
366
+ "eval_samples_per_second": 1.784,
367
+ "eval_steps_per_second": 0.446,
368
  "step": 250
369
  },
370
  {
371
  "epoch": 1.02,
372
+ "grad_norm": 1.0286716222763062,
373
+ "learning_rate": 6.25e-05,
374
+ "loss": 0.1047,
375
  "step": 255
376
  },
377
  {
378
  "epoch": 1.04,
379
+ "grad_norm": 1.7464964389801025,
380
+ "learning_rate": 6.25e-05,
381
+ "loss": 0.1108,
382
  "step": 260
383
  },
384
  {
385
  "epoch": 1.06,
386
+ "grad_norm": 1.3992992639541626,
387
+ "learning_rate": 6.25e-05,
388
+ "loss": 0.1176,
389
  "step": 265
390
  },
391
  {
392
  "epoch": 1.08,
393
+ "grad_norm": 1.150010347366333,
394
+ "learning_rate": 6.25e-05,
395
+ "loss": 0.1059,
396
  "step": 270
397
  },
398
  {
399
  "epoch": 1.1,
400
+ "grad_norm": 1.983775019645691,
401
+ "learning_rate": 6.25e-05,
402
+ "loss": 0.1223,
403
  "step": 275
404
  },
405
  {
406
  "epoch": 1.12,
407
+ "grad_norm": 1.0895100831985474,
408
+ "learning_rate": 6.25e-05,
409
+ "loss": 0.0929,
410
  "step": 280
411
  },
412
  {
413
  "epoch": 1.1400000000000001,
414
+ "grad_norm": 1.631362795829773,
415
+ "learning_rate": 6.25e-05,
416
+ "loss": 0.1005,
417
  "step": 285
418
  },
419
  {
420
  "epoch": 1.16,
421
+ "grad_norm": 1.39686918258667,
422
+ "learning_rate": 6.25e-05,
423
+ "loss": 0.1022,
424
  "step": 290
425
  },
426
  {
427
  "epoch": 1.18,
428
+ "grad_norm": 1.2856369018554688,
429
+ "learning_rate": 6.25e-05,
430
+ "loss": 0.0908,
431
  "step": 295
432
  },
433
  {
434
  "epoch": 1.2,
435
+ "grad_norm": 1.2866718769073486,
436
+ "learning_rate": 6.25e-05,
437
+ "loss": 0.103,
438
  "step": 300
439
  },
440
  {
441
  "epoch": 1.22,
442
+ "grad_norm": 1.1115745306015015,
443
  "learning_rate": 6.25e-05,
444
+ "loss": 0.0934,
445
  "step": 305
446
  },
447
  {
448
  "epoch": 1.24,
449
+ "grad_norm": 1.2674397230148315,
450
  "learning_rate": 6.25e-05,
451
+ "loss": 0.0807,
452
  "step": 310
453
  },
454
  {
455
  "epoch": 1.26,
456
+ "grad_norm": 3.003493547439575,
457
  "learning_rate": 6.25e-05,
458
+ "loss": 0.1163,
459
  "step": 315
460
  },
461
  {
462
  "epoch": 1.28,
463
+ "grad_norm": 1.3472819328308105,
464
  "learning_rate": 6.25e-05,
465
+ "loss": 0.0939,
466
  "step": 320
467
  },
468
  {
469
  "epoch": 1.3,
470
+ "grad_norm": 1.576393485069275,
471
  "learning_rate": 6.25e-05,
472
+ "loss": 0.1023,
473
  "step": 325
474
  },
475
  {
476
  "epoch": 1.32,
477
+ "grad_norm": 1.2895311117172241,
478
  "learning_rate": 6.25e-05,
479
+ "loss": 0.0908,
480
  "step": 330
481
  },
482
  {
483
  "epoch": 1.34,
484
+ "grad_norm": 1.7044769525527954,
485
  "learning_rate": 6.25e-05,
486
+ "loss": 0.0849,
487
  "step": 335
488
  },
489
  {
490
  "epoch": 1.3599999999999999,
491
+ "grad_norm": 1.316157341003418,
492
  "learning_rate": 6.25e-05,
493
+ "loss": 0.0865,
494
  "step": 340
495
  },
496
  {
497
  "epoch": 1.38,
498
+ "grad_norm": 1.4973046779632568,
499
  "learning_rate": 6.25e-05,
500
+ "loss": 0.0864,
501
  "step": 345
502
  },
503
  {
504
  "epoch": 1.4,
505
+ "grad_norm": 0.7230541706085205,
506
  "learning_rate": 6.25e-05,
507
+ "loss": 0.074,
508
  "step": 350
509
  },
510
  {
511
  "epoch": 1.42,
512
+ "grad_norm": 1.026584267616272,
513
  "learning_rate": 6.25e-05,
514
+ "loss": 0.0788,
515
  "step": 355
516
  },
517
  {
518
  "epoch": 1.44,
519
+ "grad_norm": 1.3976407051086426,
520
  "learning_rate": 6.25e-05,
521
+ "loss": 0.0862,
522
  "step": 360
523
  },
524
  {
525
  "epoch": 1.46,
526
+ "grad_norm": 1.3054964542388916,
527
  "learning_rate": 6.25e-05,
528
+ "loss": 0.0757,
529
  "step": 365
530
  },
531
  {
532
  "epoch": 1.48,
533
+ "grad_norm": 1.8163508176803589,
534
  "learning_rate": 6.25e-05,
535
+ "loss": 0.0822,
536
  "step": 370
537
  },
538
  {
539
  "epoch": 1.5,
540
+ "grad_norm": 1.23239004611969,
541
  "learning_rate": 6.25e-05,
542
+ "loss": 0.0886,
543
  "step": 375
544
  },
545
  {
546
  "epoch": 1.52,
547
+ "grad_norm": 1.1320103406906128,
548
  "learning_rate": 6.25e-05,
549
+ "loss": 0.0834,
550
  "step": 380
551
  },
552
  {
553
  "epoch": 1.54,
554
+ "grad_norm": 1.8913005590438843,
555
  "learning_rate": 6.25e-05,
556
+ "loss": 0.0917,
557
  "step": 385
558
  },
559
  {
560
  "epoch": 1.56,
561
+ "grad_norm": 1.3522365093231201,
562
  "learning_rate": 6.25e-05,
563
+ "loss": 0.0927,
564
  "step": 390
565
  },
566
  {
567
  "epoch": 1.58,
568
+ "grad_norm": 1.3687407970428467,
569
  "learning_rate": 6.25e-05,
570
+ "loss": 0.0701,
571
  "step": 395
572
  },
573
  {
574
  "epoch": 1.6,
575
+ "grad_norm": 1.6905425786972046,
576
  "learning_rate": 6.25e-05,
577
+ "loss": 0.0914,
578
  "step": 400
579
  },
580
  {
581
  "epoch": 1.62,
582
+ "grad_norm": 1.3366554975509644,
583
  "learning_rate": 6.25e-05,
584
+ "loss": 0.0833,
585
  "step": 405
586
  },
587
  {
588
  "epoch": 1.6400000000000001,
589
+ "grad_norm": 1.0540807247161865,
590
  "learning_rate": 6.25e-05,
591
+ "loss": 0.088,
592
  "step": 410
593
  },
594
  {
595
  "epoch": 1.6600000000000001,
596
+ "grad_norm": 1.0234986543655396,
597
  "learning_rate": 6.25e-05,
598
+ "loss": 0.075,
599
  "step": 415
600
  },
601
  {
602
  "epoch": 1.6800000000000002,
603
+ "grad_norm": 1.3205113410949707,
604
  "learning_rate": 6.25e-05,
605
+ "loss": 0.0931,
606
  "step": 420
607
  },
608
  {
609
  "epoch": 1.7,
610
+ "grad_norm": 0.8865799307823181,
611
  "learning_rate": 6.25e-05,
612
+ "loss": 0.0831,
613
  "step": 425
614
  },
615
  {
616
  "epoch": 1.72,
617
+ "grad_norm": 1.2646653652191162,
618
  "learning_rate": 6.25e-05,
619
+ "loss": 0.0825,
620
  "step": 430
621
  },
622
  {
623
  "epoch": 1.74,
624
+ "grad_norm": 0.9991198182106018,
625
  "learning_rate": 6.25e-05,
626
+ "loss": 0.0825,
627
  "step": 435
628
  },
629
  {
630
  "epoch": 1.76,
631
+ "grad_norm": 0.8784312605857849,
632
  "learning_rate": 6.25e-05,
633
+ "loss": 0.082,
634
  "step": 440
635
  },
636
  {
637
  "epoch": 1.78,
638
+ "grad_norm": 1.304877519607544,
639
  "learning_rate": 6.25e-05,
640
+ "loss": 0.0775,
641
  "step": 445
642
  },
643
  {
644
  "epoch": 1.8,
645
+ "grad_norm": 1.2007408142089844,
646
  "learning_rate": 6.25e-05,
647
+ "loss": 0.0796,
648
  "step": 450
649
  },
650
  {
651
  "epoch": 1.8199999999999998,
652
+ "grad_norm": 0.9978143572807312,
653
  "learning_rate": 6.25e-05,
654
+ "loss": 0.0794,
655
  "step": 455
656
  },
657
  {
658
  "epoch": 1.8399999999999999,
659
+ "grad_norm": 1.2883387804031372,
660
  "learning_rate": 6.25e-05,
661
+ "loss": 0.0744,
662
  "step": 460
663
  },
664
  {
665
  "epoch": 1.8599999999999999,
666
+ "grad_norm": 0.8542335629463196,
667
  "learning_rate": 6.25e-05,
668
+ "loss": 0.074,
669
  "step": 465
670
  },
671
  {
672
  "epoch": 1.88,
673
+ "grad_norm": 1.0009572505950928,
674
  "learning_rate": 6.25e-05,
675
+ "loss": 0.0823,
676
  "step": 470
677
  },
678
  {
679
  "epoch": 1.9,
680
+ "grad_norm": 1.126528263092041,
681
  "learning_rate": 6.25e-05,
682
+ "loss": 0.0805,
683
  "step": 475
684
  },
685
  {
686
  "epoch": 1.92,
687
+ "grad_norm": 0.9336584210395813,
688
  "learning_rate": 6.25e-05,
689
+ "loss": 0.0722,
690
  "step": 480
691
  },
692
  {
693
  "epoch": 1.94,
694
+ "grad_norm": 1.0387274026870728,
695
  "learning_rate": 6.25e-05,
696
+ "loss": 0.0722,
697
  "step": 485
698
  },
699
  {
700
  "epoch": 1.96,
701
+ "grad_norm": 1.4692296981811523,
702
  "learning_rate": 6.25e-05,
703
+ "loss": 0.071,
704
  "step": 490
705
  },
706
  {
707
  "epoch": 1.98,
708
+ "grad_norm": 0.9859362244606018,
709
  "learning_rate": 6.25e-05,
710
+ "loss": 0.0795,
711
  "step": 495
712
  },
713
  {
714
  "epoch": 2.0,
715
+ "grad_norm": 1.0557219982147217,
716
  "learning_rate": 6.25e-05,
717
+ "loss": 0.0741,
718
  "step": 500
719
  },
720
  {
721
  "epoch": 2.0,
722
+ "eval_cer": 0.05483273798659818,
723
+ "eval_loss": 0.0840950757265091,
724
+ "eval_runtime": 280.5964,
725
+ "eval_samples_per_second": 1.782,
726
+ "eval_steps_per_second": 0.445,
727
  "step": 500
728
  },
729
  {
730
  "epoch": 2.02,
731
+ "grad_norm": 1.1070911884307861,
732
  "learning_rate": 6.25e-05,
733
+ "loss": 0.0552,
734
  "step": 505
735
  },
736
  {
737
  "epoch": 2.04,
738
+ "grad_norm": 0.7629583477973938,
739
  "learning_rate": 6.25e-05,
740
+ "loss": 0.0613,
741
  "step": 510
742
  },
743
  {
744
  "epoch": 2.06,
745
+ "grad_norm": 1.3060976266860962,
746
  "learning_rate": 6.25e-05,
747
+ "loss": 0.0746,
748
  "step": 515
749
  },
750
  {
751
  "epoch": 2.08,
752
+ "grad_norm": 1.067309021949768,
753
  "learning_rate": 6.25e-05,
754
+ "loss": 0.0717,
755
  "step": 520
756
  },
757
  {
758
  "epoch": 2.1,
759
+ "grad_norm": 1.1334558725357056,
760
  "learning_rate": 6.25e-05,
761
+ "loss": 0.0582,
762
  "step": 525
763
  },
764
  {
765
  "epoch": 2.12,
766
+ "grad_norm": 0.8565890192985535,
767
  "learning_rate": 6.25e-05,
768
+ "loss": 0.066,
769
  "step": 530
770
  },
771
  {
772
  "epoch": 2.14,
773
+ "grad_norm": 1.042038083076477,
774
  "learning_rate": 6.25e-05,
775
+ "loss": 0.0686,
776
  "step": 535
777
  },
778
  {
779
  "epoch": 2.16,
780
+ "grad_norm": 0.8787774443626404,
781
  "learning_rate": 6.25e-05,
782
+ "loss": 0.0714,
783
  "step": 540
784
  },
785
  {
786
  "epoch": 2.18,
787
+ "grad_norm": 1.2969956398010254,
788
  "learning_rate": 6.25e-05,
789
+ "loss": 0.069,
790
  "step": 545
791
  },
792
  {
793
  "epoch": 2.2,
794
+ "grad_norm": 1.0625072717666626,
795
  "learning_rate": 6.25e-05,
796
+ "loss": 0.0701,
797
  "step": 550
798
  },
799
  {
800
  "epoch": 2.22,
801
+ "grad_norm": 1.1786212921142578,
802
  "learning_rate": 6.25e-05,
803
+ "loss": 0.0672,
804
  "step": 555
805
  },
806
  {
807
  "epoch": 2.24,
808
+ "grad_norm": 0.7678006887435913,
809
  "learning_rate": 6.25e-05,
810
+ "loss": 0.0637,
811
  "step": 560
812
  },
813
  {
814
  "epoch": 2.26,
815
+ "grad_norm": 1.087916612625122,
816
  "learning_rate": 6.25e-05,
817
+ "loss": 0.0592,
818
  "step": 565
819
  },
820
  {
821
  "epoch": 2.2800000000000002,
822
+ "grad_norm": 1.0709354877471924,
823
  "learning_rate": 6.25e-05,
824
+ "loss": 0.0641,
825
  "step": 570
826
  },
827
  {
828
  "epoch": 2.3,
829
+ "grad_norm": 0.9933990240097046,
830
  "learning_rate": 6.25e-05,
831
+ "loss": 0.0724,
832
  "step": 575
833
  },
834
  {
835
  "epoch": 2.32,
836
+ "grad_norm": 0.9537047147750854,
837
  "learning_rate": 6.25e-05,
838
+ "loss": 0.0565,
839
  "step": 580
840
  },
841
  {
842
  "epoch": 2.34,
843
+ "grad_norm": 0.8913723230361938,
844
  "learning_rate": 6.25e-05,
845
+ "loss": 0.0601,
846
  "step": 585
847
  },
848
  {
849
  "epoch": 2.36,
850
+ "grad_norm": 1.4037823677062988,
851
  "learning_rate": 6.25e-05,
852
+ "loss": 0.0656,
853
  "step": 590
854
  },
855
  {
856
  "epoch": 2.38,
857
+ "grad_norm": 0.8686001896858215,
858
  "learning_rate": 6.25e-05,
859
+ "loss": 0.0617,
860
  "step": 595
861
  },
862
  {
863
  "epoch": 2.4,
864
+ "grad_norm": 1.1040139198303223,
865
  "learning_rate": 6.25e-05,
866
+ "loss": 0.0612,
867
  "step": 600
868
  },
869
  {
870
  "epoch": 2.42,
871
+ "grad_norm": 0.8995397090911865,
872
  "learning_rate": 6.25e-05,
873
+ "loss": 0.0528,
874
  "step": 605
875
  },
876
  {
877
  "epoch": 2.44,
878
+ "grad_norm": 1.0924474000930786,
879
  "learning_rate": 6.25e-05,
880
+ "loss": 0.0551,
881
  "step": 610
882
  },
883
  {
884
  "epoch": 2.46,
885
+ "grad_norm": 1.0748484134674072,
886
  "learning_rate": 6.25e-05,
887
+ "loss": 0.0573,
888
  "step": 615
889
  },
890
  {
891
  "epoch": 2.48,
892
+ "grad_norm": 0.8827953338623047,
893
  "learning_rate": 6.25e-05,
894
+ "loss": 0.0595,
895
  "step": 620
896
  },
897
  {
898
  "epoch": 2.5,
899
+ "grad_norm": 0.8614113926887512,
900
  "learning_rate": 6.25e-05,
901
+ "loss": 0.0694,
902
  "step": 625
903
  },
904
  {
905
  "epoch": 2.52,
906
+ "grad_norm": 0.6579775810241699,
907
  "learning_rate": 6.25e-05,
908
+ "loss": 0.0608,
909
  "step": 630
910
  },
911
  {
912
  "epoch": 2.54,
913
+ "grad_norm": 0.923587441444397,
914
  "learning_rate": 6.25e-05,
915
+ "loss": 0.0684,
916
  "step": 635
917
  },
918
  {
919
  "epoch": 2.56,
920
+ "grad_norm": 1.119313359260559,
921
  "learning_rate": 6.25e-05,
922
+ "loss": 0.0651,
923
  "step": 640
924
  },
925
  {
926
  "epoch": 2.58,
927
+ "grad_norm": 1.1630853414535522,
928
  "learning_rate": 6.25e-05,
929
+ "loss": 0.0773,
930
  "step": 645
931
  },
932
  {
933
  "epoch": 2.6,
934
+ "grad_norm": 0.9517636299133301,
935
  "learning_rate": 6.25e-05,
936
+ "loss": 0.0574,
937
  "step": 650
938
  },
939
  {
940
  "epoch": 2.62,
941
+ "grad_norm": 0.767271101474762,
942
  "learning_rate": 6.25e-05,
943
+ "loss": 0.071,
944
  "step": 655
945
  },
946
  {
947
  "epoch": 2.64,
948
+ "grad_norm": 1.3324207067489624,
949
  "learning_rate": 6.25e-05,
950
+ "loss": 0.0672,
951
  "step": 660
952
  },
953
  {
954
  "epoch": 2.66,
955
+ "grad_norm": 0.8638308048248291,
956
  "learning_rate": 6.25e-05,
957
+ "loss": 0.0602,
958
  "step": 665
959
  },
960
  {
961
  "epoch": 2.68,
962
+ "grad_norm": 0.9522351622581482,
963
  "learning_rate": 6.25e-05,
964
+ "loss": 0.0626,
965
  "step": 670
966
  },
967
  {
968
  "epoch": 2.7,
969
+ "grad_norm": 0.7264077067375183,
970
  "learning_rate": 6.25e-05,
971
+ "loss": 0.0654,
972
  "step": 675
973
  },
974
  {
975
  "epoch": 2.7199999999999998,
976
+ "grad_norm": 1.185275912284851,
977
  "learning_rate": 6.25e-05,
978
+ "loss": 0.0638,
979
  "step": 680
980
  },
981
  {
982
  "epoch": 2.74,
983
+ "grad_norm": 1.549625277519226,
984
  "learning_rate": 6.25e-05,
985
+ "loss": 0.0661,
986
  "step": 685
987
  },
988
  {
989
  "epoch": 2.76,
990
+ "grad_norm": 1.202415108680725,
991
  "learning_rate": 6.25e-05,
992
+ "loss": 0.0709,
993
  "step": 690
994
  },
995
  {
996
  "epoch": 2.7800000000000002,
997
+ "grad_norm": 0.7902194857597351,
998
  "learning_rate": 6.25e-05,
999
+ "loss": 0.0604,
1000
  "step": 695
1001
  },
1002
  {
1003
  "epoch": 2.8,
1004
+ "grad_norm": 1.0128028392791748,
1005
  "learning_rate": 6.25e-05,
1006
+ "loss": 0.0612,
1007
  "step": 700
1008
  },
1009
  {
1010
  "epoch": 2.82,
1011
+ "grad_norm": 0.8418397903442383,
1012
  "learning_rate": 6.25e-05,
1013
+ "loss": 0.0616,
1014
  "step": 705
1015
  },
1016
  {
1017
  "epoch": 2.84,
1018
+ "grad_norm": 0.9352026581764221,
1019
  "learning_rate": 6.25e-05,
1020
+ "loss": 0.0635,
1021
  "step": 710
1022
  },
1023
  {
1024
  "epoch": 2.86,
1025
+ "grad_norm": 0.679918110370636,
1026
  "learning_rate": 6.25e-05,
1027
+ "loss": 0.0588,
1028
  "step": 715
1029
  },
1030
  {
1031
  "epoch": 2.88,
1032
+ "grad_norm": 0.836438000202179,
1033
  "learning_rate": 6.25e-05,
1034
+ "loss": 0.0635,
1035
  "step": 720
1036
  },
1037
  {
1038
  "epoch": 2.9,
1039
+ "grad_norm": 0.7643904089927673,
1040
  "learning_rate": 6.25e-05,
1041
+ "loss": 0.0554,
1042
  "step": 725
1043
  },
1044
  {
1045
  "epoch": 2.92,
1046
+ "grad_norm": 0.9192042946815491,
1047
  "learning_rate": 6.25e-05,
1048
+ "loss": 0.0541,
1049
  "step": 730
1050
  },
1051
  {
1052
  "epoch": 2.94,
1053
+ "grad_norm": 0.9899188280105591,
1054
  "learning_rate": 6.25e-05,
1055
+ "loss": 0.0591,
1056
  "step": 735
1057
  },
1058
  {
1059
  "epoch": 2.96,
1060
+ "grad_norm": 1.112701654434204,
1061
  "learning_rate": 6.25e-05,
1062
+ "loss": 0.0611,
1063
  "step": 740
1064
  },
1065
  {
1066
  "epoch": 2.98,
1067
+ "grad_norm": 0.9096015095710754,
1068
  "learning_rate": 6.25e-05,
1069
+ "loss": 0.0594,
1070
  "step": 745
1071
  },
1072
  {
1073
  "epoch": 3.0,
1074
+ "grad_norm": 1.158527135848999,
1075
  "learning_rate": 6.25e-05,
1076
+ "loss": 0.0703,
1077
+ "step": 750
1078
+ },
1079
+ {
1080
+ "epoch": 3.0,
1081
+ "eval_cer": 0.05350298542486898,
1082
+ "eval_loss": 0.07945344597101212,
1083
+ "eval_runtime": 281.5513,
1084
+ "eval_samples_per_second": 1.776,
1085
+ "eval_steps_per_second": 0.444,
1086
  "step": 750
1087
  },
1088
  {
1089
  "epoch": 3.02,
1090
+ "grad_norm": 0.8544594049453735,
1091
  "learning_rate": 6.25e-05,
1092
+ "loss": 0.0461,
1093
  "step": 755
1094
  },
1095
  {
1096
  "epoch": 3.04,
1097
+ "grad_norm": 0.8411735892295837,
1098
  "learning_rate": 6.25e-05,
1099
+ "loss": 0.0429,
1100
  "step": 760
1101
  },
1102
  {
1103
  "epoch": 3.06,
1104
+ "grad_norm": 0.7515286207199097,
1105
  "learning_rate": 6.25e-05,
1106
+ "loss": 0.0559,
1107
  "step": 765
1108
  },
1109
  {
1110
  "epoch": 3.08,
1111
+ "grad_norm": 0.8125985264778137,
1112
  "learning_rate": 6.25e-05,
1113
+ "loss": 0.044,
1114
  "step": 770
1115
  },
1116
  {
1117
  "epoch": 3.1,
1118
+ "grad_norm": 0.8093322515487671,
1119
  "learning_rate": 6.25e-05,
1120
+ "loss": 0.0529,
1121
  "step": 775
1122
  },
1123
  {
1124
  "epoch": 3.12,
1125
+ "grad_norm": 0.8852378129959106,
1126
  "learning_rate": 6.25e-05,
1127
+ "loss": 0.0508,
1128
  "step": 780
1129
  },
1130
  {
1131
  "epoch": 3.14,
1132
+ "grad_norm": 0.6388903856277466,
1133
  "learning_rate": 6.25e-05,
1134
+ "loss": 0.0491,
1135
  "step": 785
1136
  },
1137
  {
1138
  "epoch": 3.16,
1139
+ "grad_norm": 0.9803158640861511,
1140
  "learning_rate": 6.25e-05,
1141
+ "loss": 0.051,
1142
  "step": 790
1143
  },
1144
  {
1145
  "epoch": 3.18,
1146
+ "grad_norm": 1.163065791130066,
1147
  "learning_rate": 6.25e-05,
1148
+ "loss": 0.0538,
1149
  "step": 795
1150
  },
1151
  {
1152
  "epoch": 3.2,
1153
+ "grad_norm": 0.942138671875,
1154
  "learning_rate": 6.25e-05,
1155
+ "loss": 0.0548,
1156
  "step": 800
1157
  },
1158
  {
1159
  "epoch": 3.22,
1160
+ "grad_norm": 0.763847827911377,
1161
  "learning_rate": 6.25e-05,
1162
+ "loss": 0.0497,
1163
  "step": 805
1164
  },
1165
  {
1166
  "epoch": 3.24,
1167
+ "grad_norm": 1.1041572093963623,
1168
  "learning_rate": 6.25e-05,
1169
+ "loss": 0.0513,
1170
  "step": 810
1171
  },
1172
  {
1173
  "epoch": 3.26,
1174
+ "grad_norm": 0.8744838237762451,
1175
  "learning_rate": 6.25e-05,
1176
+ "loss": 0.0574,
1177
  "step": 815
1178
  },
1179
  {
1180
  "epoch": 3.2800000000000002,
1181
+ "grad_norm": 0.8737279176712036,
1182
  "learning_rate": 6.25e-05,
1183
+ "loss": 0.0485,
1184
  "step": 820
1185
  },
1186
  {
1187
  "epoch": 3.3,
1188
+ "grad_norm": 0.6367043256759644,
1189
  "learning_rate": 6.25e-05,
1190
+ "loss": 0.0462,
1191
  "step": 825
1192
  },
1193
  {
1194
  "epoch": 3.32,
1195
+ "grad_norm": 0.7195335030555725,
1196
  "learning_rate": 6.25e-05,
1197
+ "loss": 0.0529,
1198
  "step": 830
1199
  },
1200
  {
1201
  "epoch": 3.34,
1202
+ "grad_norm": 0.7411594986915588,
1203
  "learning_rate": 6.25e-05,
1204
+ "loss": 0.0558,
1205
  "step": 835
1206
  },
1207
  {
1208
  "epoch": 3.36,
1209
+ "grad_norm": 0.5583875179290771,
1210
  "learning_rate": 6.25e-05,
1211
+ "loss": 0.0498,
1212
  "step": 840
1213
  },
1214
  {
1215
  "epoch": 3.38,
1216
+ "grad_norm": 0.7013912796974182,
1217
  "learning_rate": 6.25e-05,
1218
+ "loss": 0.0465,
1219
  "step": 845
1220
  },
1221
  {
1222
  "epoch": 3.4,
1223
+ "grad_norm": 1.1267294883728027,
1224
  "learning_rate": 6.25e-05,
1225
+ "loss": 0.0505,
1226
  "step": 850
1227
  },
1228
  {
1229
  "epoch": 3.42,
1230
+ "grad_norm": 1.3056484460830688,
1231
  "learning_rate": 6.25e-05,
1232
+ "loss": 0.0515,
1233
  "step": 855
1234
  },
1235
  {
1236
  "epoch": 3.44,
1237
+ "grad_norm": 1.182433843612671,
1238
  "learning_rate": 6.25e-05,
1239
+ "loss": 0.0525,
1240
  "step": 860
1241
  },
1242
  {
1243
  "epoch": 3.46,
1244
+ "grad_norm": 0.8969308733940125,
1245
  "learning_rate": 6.25e-05,
1246
+ "loss": 0.0517,
1247
  "step": 865
1248
  },
1249
  {
1250
  "epoch": 3.48,
1251
+ "grad_norm": 0.7779067158699036,
1252
  "learning_rate": 6.25e-05,
1253
+ "loss": 0.0539,
1254
  "step": 870
1255
  },
1256
  {
1257
  "epoch": 3.5,
1258
+ "grad_norm": 0.591754674911499,
1259
  "learning_rate": 6.25e-05,
1260
+ "loss": 0.0546,
1261
  "step": 875
1262
  },
1263
  {
1264
  "epoch": 3.52,
1265
+ "grad_norm": 0.8097557425498962,
1266
  "learning_rate": 6.25e-05,
1267
+ "loss": 0.0529,
1268
  "step": 880
1269
  },
1270
  {
1271
  "epoch": 3.54,
1272
+ "grad_norm": 0.7054248452186584,
1273
  "learning_rate": 6.25e-05,
1274
+ "loss": 0.0436,
1275
  "step": 885
1276
  },
1277
  {
1278
  "epoch": 3.56,
1279
+ "grad_norm": 0.5832129716873169,
1280
  "learning_rate": 6.25e-05,
1281
+ "loss": 0.048,
1282
  "step": 890
1283
  },
1284
  {
1285
  "epoch": 3.58,
1286
+ "grad_norm": 0.8104725480079651,
1287
  "learning_rate": 6.25e-05,
1288
+ "loss": 0.0503,
1289
  "step": 895
1290
  },
1291
  {
1292
  "epoch": 3.6,
1293
+ "grad_norm": 0.9961804151535034,
1294
  "learning_rate": 6.25e-05,
1295
+ "loss": 0.0565,
1296
  "step": 900
1297
  },
1298
  {
1299
  "epoch": 3.62,
1300
+ "grad_norm": 0.8466907143592834,
1301
  "learning_rate": 6.25e-05,
1302
+ "loss": 0.054,
1303
  "step": 905
1304
  },
1305
  {
1306
  "epoch": 3.64,
1307
+ "grad_norm": 0.8867480158805847,
1308
  "learning_rate": 6.25e-05,
1309
+ "loss": 0.0547,
1310
  "step": 910
1311
  },
1312
  {
1313
  "epoch": 3.66,
1314
+ "grad_norm": 0.9030736684799194,
1315
  "learning_rate": 6.25e-05,
1316
+ "loss": 0.0481,
1317
  "step": 915
1318
  },
1319
  {
1320
  "epoch": 3.68,
1321
+ "grad_norm": 0.6740151643753052,
1322
  "learning_rate": 6.25e-05,
1323
+ "loss": 0.0529,
1324
  "step": 920
1325
  },
1326
  {
1327
  "epoch": 3.7,
1328
+ "grad_norm": 0.653508722782135,
1329
  "learning_rate": 6.25e-05,
1330
+ "loss": 0.0633,
1331
  "step": 925
1332
  },
1333
  {
1334
  "epoch": 3.7199999999999998,
1335
+ "grad_norm": 0.7304302453994751,
1336
  "learning_rate": 6.25e-05,
1337
+ "loss": 0.0493,
1338
  "step": 930
1339
  },
1340
  {
1341
  "epoch": 3.74,
1342
+ "grad_norm": 0.8343582153320312,
1343
  "learning_rate": 6.25e-05,
1344
+ "loss": 0.059,
1345
  "step": 935
1346
  },
1347
  {
1348
  "epoch": 3.76,
1349
+ "grad_norm": 0.8459467887878418,
1350
  "learning_rate": 6.25e-05,
1351
+ "loss": 0.0531,
1352
  "step": 940
1353
  },
1354
  {
1355
  "epoch": 3.7800000000000002,
1356
+ "grad_norm": 0.7470009326934814,
1357
  "learning_rate": 6.25e-05,
1358
+ "loss": 0.0548,
1359
  "step": 945
1360
  },
1361
  {
1362
  "epoch": 3.8,
1363
+ "grad_norm": 0.8183557987213135,
1364
  "learning_rate": 6.25e-05,
1365
+ "loss": 0.0471,
1366
  "step": 950
1367
  },
1368
  {
1369
  "epoch": 3.82,
1370
+ "grad_norm": 0.9448140263557434,
1371
  "learning_rate": 6.25e-05,
1372
+ "loss": 0.045,
1373
  "step": 955
1374
  },
1375
  {
1376
  "epoch": 3.84,
1377
+ "grad_norm": 0.7056401371955872,
1378
  "learning_rate": 6.25e-05,
1379
+ "loss": 0.045,
1380
  "step": 960
1381
  },
1382
  {
1383
  "epoch": 3.86,
1384
+ "grad_norm": 0.7785059213638306,
1385
  "learning_rate": 6.25e-05,
1386
+ "loss": 0.0554,
1387
  "step": 965
1388
  },
1389
  {
1390
  "epoch": 3.88,
1391
+ "grad_norm": 0.8976256251335144,
1392
  "learning_rate": 6.25e-05,
1393
+ "loss": 0.0529,
1394
  "step": 970
1395
  },
1396
  {
1397
  "epoch": 3.9,
1398
+ "grad_norm": 1.0849542617797852,
1399
  "learning_rate": 6.25e-05,
1400
+ "loss": 0.0457,
1401
  "step": 975
1402
  },
1403
  {
1404
  "epoch": 3.92,
1405
+ "grad_norm": 1.1612681150436401,
1406
  "learning_rate": 6.25e-05,
1407
+ "loss": 0.0513,
1408
  "step": 980
1409
  },
1410
  {
1411
  "epoch": 3.94,
1412
+ "grad_norm": 0.6912779211997986,
1413
  "learning_rate": 6.25e-05,
1414
+ "loss": 0.0469,
1415
  "step": 985
1416
  },
1417
  {
1418
  "epoch": 3.96,
1419
+ "grad_norm": 0.7129920125007629,
1420
  "learning_rate": 6.25e-05,
1421
+ "loss": 0.0509,
1422
  "step": 990
1423
  },
1424
  {
1425
  "epoch": 3.98,
1426
+ "grad_norm": 0.6439591646194458,
1427
  "learning_rate": 6.25e-05,
1428
+ "loss": 0.0412,
1429
  "step": 995
1430
  },
1431
  {
1432
  "epoch": 4.0,
1433
+ "grad_norm": 0.7044887542724609,
1434
  "learning_rate": 6.25e-05,
1435
+ "loss": 0.0558,
1436
  "step": 1000
1437
  },
1438
  {
1439
  "epoch": 4.0,
1440
+ "eval_cer": 0.046749928297655986,
1441
+ "eval_loss": 0.07047422975301743,
1442
+ "eval_runtime": 280.6209,
1443
+ "eval_samples_per_second": 1.782,
1444
+ "eval_steps_per_second": 0.445,
1445
  "step": 1000
1446
  }
1447
  ],
 
1449
  "max_steps": 5000,
1450
  "num_input_tokens_seen": 0,
1451
  "num_train_epochs": 20,
1452
+ "save_steps": 250,
1453
  "stateful_callbacks": {
1454
  "TrainerControl": {
1455
  "args": {
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:380ff1a9921ff96ab779d6709926f10f78099a5595ab698c3919c0c657657de1
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105d055d6d84eb987fbbb4fc9493aa207f4712b04ab60a83adb7510815397317
3
  size 5432
checkpoint-1500/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1276977fbb71092aac7fa5e4264a022b85a72fd80975009308de3960621b5ff6
3
  size 26237160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d48030884c36ab0b6721f6fb4977391606e88ec0f4921a8d4b3ed9d19178532
3
  size 26237160
checkpoint-1500/adapter_model/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1276977fbb71092aac7fa5e4264a022b85a72fd80975009308de3960621b5ff6
3
  size 26237160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d48030884c36ab0b6721f6fb4977391606e88ec0f4921a8d4b3ed9d19178532
3
  size 26237160
checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e8db8ed5bf6c092d0360b2faeb4d29d5a394f25dacb85b702fcfeb0f3d9ac0
3
  size 52563258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7caecc3c067ae4c39b58dd3948d8312089bed88fa4726fbc4b6903ae9361cc80
3
  size 52563258
checkpoint-1500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6623d1f8f36d930e4eba1223419c7acbab3b08b5112506a54435d991f1010d6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802b1452daa195e0a99115b4cc7912b70da70165e135510bf8baea06aabc1dfe
3
  size 14244
checkpoint-1500/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.04981744137059402,
3
  "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
  "epoch": 6.0,
5
- "eval_steps": 500,
6
  "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -10,2129 +10,2156 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
- "grad_norm": 11.033143997192383,
14
- "learning_rate": 8.333333333333334e-07,
15
- "loss": 3.7365,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.04,
20
- "grad_norm": 10.168129920959473,
21
- "learning_rate": 1.875e-06,
22
- "loss": 3.6756,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
- "grad_norm": 10.131425857543945,
28
- "learning_rate": 2.916666666666667e-06,
29
- "loss": 3.6681,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.08,
34
- "grad_norm": 9.962166786193848,
35
- "learning_rate": 3.958333333333334e-06,
36
- "loss": 3.6567,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.1,
41
- "grad_norm": 9.637451171875,
42
- "learning_rate": 4.791666666666667e-06,
43
- "loss": 3.5903,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.12,
48
- "grad_norm": 9.27942943572998,
49
- "learning_rate": 5.833333333333334e-06,
50
- "loss": 3.4592,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.14,
55
- "grad_norm": 9.690427780151367,
56
- "learning_rate": 6.875e-06,
57
- "loss": 3.299,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.16,
62
- "grad_norm": 8.123926162719727,
63
- "learning_rate": 7.916666666666668e-06,
64
- "loss": 3.2058,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.18,
69
- "grad_norm": 6.938026428222656,
70
- "learning_rate": 8.958333333333334e-06,
71
- "loss": 3.0613,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2,
76
- "grad_norm": 6.615925312042236,
77
- "learning_rate": 1e-05,
78
- "loss": 2.8859,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.22,
83
- "grad_norm": 5.712332725524902,
84
- "learning_rate": 1.1041666666666666e-05,
85
- "loss": 2.6746,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.24,
90
- "grad_norm": 4.229877471923828,
91
- "learning_rate": 1.2083333333333333e-05,
92
- "loss": 2.4948,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.26,
97
- "grad_norm": 3.9951322078704834,
98
- "learning_rate": 1.3125e-05,
99
- "loss": 2.3496,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.28,
104
- "grad_norm": 4.010512351989746,
105
- "learning_rate": 1.4166666666666666e-05,
106
- "loss": 2.2345,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.3,
111
- "grad_norm": 3.2869503498077393,
112
- "learning_rate": 1.5208333333333335e-05,
113
- "loss": 2.0418,
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.32,
118
- "grad_norm": 3.47694993019104,
119
- "learning_rate": 1.6250000000000002e-05,
120
- "loss": 1.8212,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.34,
125
- "grad_norm": 2.761810779571533,
126
- "learning_rate": 1.7291666666666666e-05,
127
- "loss": 1.7471,
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.36,
132
- "grad_norm": 2.83661150932312,
133
- "learning_rate": 1.8333333333333333e-05,
134
- "loss": 1.6647,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.38,
139
- "grad_norm": 2.7371621131896973,
140
- "learning_rate": 1.9375e-05,
141
- "loss": 1.5239,
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.4,
146
- "grad_norm": 2.5980722904205322,
147
- "learning_rate": 2.0416666666666667e-05,
148
- "loss": 1.3501,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.42,
153
- "grad_norm": 2.8566689491271973,
154
- "learning_rate": 2.1458333333333334e-05,
155
- "loss": 1.3153,
156
  "step": 105
157
  },
158
  {
159
  "epoch": 0.44,
160
- "grad_norm": 2.052793264389038,
161
- "learning_rate": 2.2499999999999998e-05,
162
- "loss": 1.175,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.46,
167
- "grad_norm": 2.08168363571167,
168
- "learning_rate": 2.3541666666666665e-05,
169
- "loss": 1.0395,
170
  "step": 115
171
  },
172
  {
173
  "epoch": 0.48,
174
- "grad_norm": 1.830390453338623,
175
- "learning_rate": 2.4583333333333332e-05,
176
- "loss": 0.9517,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.5,
181
- "grad_norm": 2.9608633518218994,
182
- "learning_rate": 2.5625e-05,
183
- "loss": 0.8602,
184
  "step": 125
185
  },
186
  {
187
  "epoch": 0.52,
188
- "grad_norm": 2.2221925258636475,
189
- "learning_rate": 2.666666666666667e-05,
190
- "loss": 0.7687,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.54,
195
- "grad_norm": 2.185246706008911,
196
- "learning_rate": 2.7708333333333334e-05,
197
- "loss": 0.6957,
198
  "step": 135
199
  },
200
  {
201
  "epoch": 0.56,
202
- "grad_norm": 2.3892769813537598,
203
- "learning_rate": 2.875e-05,
204
- "loss": 0.6452,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.58,
209
- "grad_norm": 2.2923471927642822,
210
- "learning_rate": 2.9791666666666668e-05,
211
- "loss": 0.6001,
212
  "step": 145
213
  },
214
  {
215
  "epoch": 0.6,
216
- "grad_norm": 3.059990882873535,
217
- "learning_rate": 3.0833333333333335e-05,
218
- "loss": 0.5505,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.62,
223
- "grad_norm": 2.1722524166107178,
224
- "learning_rate": 3.1875e-05,
225
- "loss": 0.513,
226
  "step": 155
227
  },
228
  {
229
  "epoch": 0.64,
230
- "grad_norm": 2.4212610721588135,
231
- "learning_rate": 3.291666666666666e-05,
232
- "loss": 0.4868,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.66,
237
- "grad_norm": 2.297727584838867,
238
- "learning_rate": 3.3958333333333337e-05,
239
- "loss": 0.4696,
240
  "step": 165
241
  },
242
  {
243
  "epoch": 0.68,
244
- "grad_norm": 1.7377690076828003,
245
- "learning_rate": 3.5000000000000004e-05,
246
- "loss": 0.4174,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.7,
251
- "grad_norm": 1.821341872215271,
252
- "learning_rate": 3.6041666666666664e-05,
253
- "loss": 0.4204,
254
  "step": 175
255
  },
256
  {
257
  "epoch": 0.72,
258
- "grad_norm": 2.0993902683258057,
259
- "learning_rate": 3.708333333333334e-05,
260
- "loss": 0.3846,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.74,
265
- "grad_norm": 2.5224227905273438,
266
- "learning_rate": 3.8125e-05,
267
- "loss": 0.3499,
268
  "step": 185
269
  },
270
  {
271
  "epoch": 0.76,
272
- "grad_norm": 1.8540211915969849,
273
- "learning_rate": 3.916666666666667e-05,
274
- "loss": 0.3414,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.78,
279
- "grad_norm": 1.9813562631607056,
280
- "learning_rate": 4.020833333333333e-05,
281
- "loss": 0.3274,
282
  "step": 195
283
  },
284
  {
285
  "epoch": 0.8,
286
- "grad_norm": 1.385871171951294,
287
- "learning_rate": 4.125e-05,
288
- "loss": 0.2907,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.82,
293
- "grad_norm": 2.0511081218719482,
294
- "learning_rate": 4.229166666666667e-05,
295
- "loss": 0.2735,
296
  "step": 205
297
  },
298
  {
299
  "epoch": 0.84,
300
- "grad_norm": 2.0850329399108887,
301
- "learning_rate": 4.3333333333333334e-05,
302
- "loss": 0.2384,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.86,
307
- "grad_norm": 2.191450595855713,
308
- "learning_rate": 4.4375e-05,
309
- "loss": 0.2244,
310
  "step": 215
311
  },
312
  {
313
  "epoch": 0.88,
314
- "grad_norm": 3.4809000492095947,
315
- "learning_rate": 4.541666666666667e-05,
316
- "loss": 0.223,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.9,
321
- "grad_norm": 1.4290976524353027,
322
- "learning_rate": 4.645833333333333e-05,
323
- "loss": 0.194,
324
  "step": 225
325
  },
326
  {
327
  "epoch": 0.92,
328
- "grad_norm": 1.8528721332550049,
329
- "learning_rate": 4.75e-05,
330
- "loss": 0.1817,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.94,
335
- "grad_norm": 1.4630467891693115,
336
- "learning_rate": 4.854166666666666e-05,
337
- "loss": 0.1728,
338
  "step": 235
339
  },
340
  {
341
  "epoch": 0.96,
342
- "grad_norm": 1.6305458545684814,
343
- "learning_rate": 4.958333333333334e-05,
344
- "loss": 0.1859,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.98,
349
- "grad_norm": 1.455244779586792,
350
- "learning_rate": 5.0625000000000004e-05,
351
- "loss": 0.1531,
352
  "step": 245
353
  },
354
  {
355
  "epoch": 1.0,
356
- "grad_norm": 1.8049136400222778,
357
- "learning_rate": 5.1666666666666664e-05,
358
- "loss": 0.1412,
 
 
 
 
 
 
 
 
 
359
  "step": 250
360
  },
361
  {
362
  "epoch": 1.02,
363
- "grad_norm": 1.577805519104004,
364
- "learning_rate": 5.270833333333334e-05,
365
- "loss": 0.1384,
366
  "step": 255
367
  },
368
  {
369
  "epoch": 1.04,
370
- "grad_norm": 2.020803213119507,
371
- "learning_rate": 5.375e-05,
372
- "loss": 0.1398,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 1.06,
377
- "grad_norm": 1.4404263496398926,
378
- "learning_rate": 5.479166666666667e-05,
379
- "loss": 0.1484,
380
  "step": 265
381
  },
382
  {
383
  "epoch": 1.08,
384
- "grad_norm": 2.497192621231079,
385
- "learning_rate": 5.583333333333333e-05,
386
- "loss": 0.1353,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 1.1,
391
- "grad_norm": 3.2407219409942627,
392
- "learning_rate": 5.6875e-05,
393
- "loss": 0.1433,
394
  "step": 275
395
  },
396
  {
397
  "epoch": 1.12,
398
- "grad_norm": 1.4051156044006348,
399
- "learning_rate": 5.791666666666667e-05,
400
- "loss": 0.1181,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 1.1400000000000001,
405
- "grad_norm": 1.4117530584335327,
406
- "learning_rate": 5.8958333333333334e-05,
407
- "loss": 0.1252,
408
  "step": 285
409
  },
410
  {
411
  "epoch": 1.16,
412
- "grad_norm": 1.6360172033309937,
413
- "learning_rate": 6e-05,
414
- "loss": 0.1233,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 1.18,
419
- "grad_norm": 1.225799560546875,
420
- "learning_rate": 6.104166666666667e-05,
421
- "loss": 0.1084,
422
  "step": 295
423
  },
424
  {
425
  "epoch": 1.2,
426
- "grad_norm": 1.4874345064163208,
427
- "learning_rate": 6.208333333333333e-05,
428
- "loss": 0.125,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 1.22,
433
- "grad_norm": 1.3238331079483032,
434
  "learning_rate": 6.25e-05,
435
- "loss": 0.1132,
436
  "step": 305
437
  },
438
  {
439
  "epoch": 1.24,
440
- "grad_norm": 2.354384183883667,
441
  "learning_rate": 6.25e-05,
442
- "loss": 0.0993,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 1.26,
447
- "grad_norm": 2.2216718196868896,
448
  "learning_rate": 6.25e-05,
449
- "loss": 0.1325,
450
  "step": 315
451
  },
452
  {
453
  "epoch": 1.28,
454
- "grad_norm": 1.026408076286316,
455
  "learning_rate": 6.25e-05,
456
- "loss": 0.1035,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 1.3,
461
- "grad_norm": 2.0583767890930176,
462
  "learning_rate": 6.25e-05,
463
- "loss": 0.1208,
464
  "step": 325
465
  },
466
  {
467
  "epoch": 1.32,
468
- "grad_norm": 1.9303004741668701,
469
  "learning_rate": 6.25e-05,
470
- "loss": 0.1119,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 1.34,
475
- "grad_norm": 1.7043157815933228,
476
  "learning_rate": 6.25e-05,
477
- "loss": 0.1023,
478
  "step": 335
479
  },
480
  {
481
  "epoch": 1.3599999999999999,
482
- "grad_norm": 1.3245861530303955,
483
  "learning_rate": 6.25e-05,
484
- "loss": 0.1008,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 1.38,
489
- "grad_norm": 1.541318655014038,
490
  "learning_rate": 6.25e-05,
491
- "loss": 0.0977,
492
  "step": 345
493
  },
494
  {
495
  "epoch": 1.4,
496
- "grad_norm": 1.8400285243988037,
497
  "learning_rate": 6.25e-05,
498
- "loss": 0.0887,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 1.42,
503
- "grad_norm": 1.0839234590530396,
504
  "learning_rate": 6.25e-05,
505
- "loss": 0.0912,
506
  "step": 355
507
  },
508
  {
509
  "epoch": 1.44,
510
- "grad_norm": 1.269062876701355,
511
  "learning_rate": 6.25e-05,
512
- "loss": 0.0959,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 1.46,
517
- "grad_norm": 1.0546581745147705,
518
  "learning_rate": 6.25e-05,
519
- "loss": 0.0875,
520
  "step": 365
521
  },
522
  {
523
  "epoch": 1.48,
524
- "grad_norm": 1.683465838432312,
525
  "learning_rate": 6.25e-05,
526
- "loss": 0.0933,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 1.5,
531
- "grad_norm": 1.470189094543457,
532
  "learning_rate": 6.25e-05,
533
- "loss": 0.0986,
534
  "step": 375
535
  },
536
  {
537
  "epoch": 1.52,
538
- "grad_norm": 1.183585524559021,
539
  "learning_rate": 6.25e-05,
540
- "loss": 0.0979,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 1.54,
545
- "grad_norm": 1.3886022567749023,
546
  "learning_rate": 6.25e-05,
547
- "loss": 0.1031,
548
  "step": 385
549
  },
550
  {
551
  "epoch": 1.56,
552
- "grad_norm": 1.105749487876892,
553
  "learning_rate": 6.25e-05,
554
- "loss": 0.0995,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 1.58,
559
- "grad_norm": 1.0494953393936157,
560
  "learning_rate": 6.25e-05,
561
- "loss": 0.0755,
562
  "step": 395
563
  },
564
  {
565
  "epoch": 1.6,
566
- "grad_norm": 1.7028089761734009,
567
  "learning_rate": 6.25e-05,
568
- "loss": 0.0981,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 1.62,
573
- "grad_norm": 1.5404858589172363,
574
  "learning_rate": 6.25e-05,
575
- "loss": 0.0917,
576
  "step": 405
577
  },
578
  {
579
  "epoch": 1.6400000000000001,
580
- "grad_norm": 1.6659576892852783,
581
  "learning_rate": 6.25e-05,
582
- "loss": 0.0891,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 1.6600000000000001,
587
- "grad_norm": 1.353579044342041,
588
  "learning_rate": 6.25e-05,
589
- "loss": 0.0889,
590
  "step": 415
591
  },
592
  {
593
  "epoch": 1.6800000000000002,
594
- "grad_norm": 2.1539247035980225,
595
  "learning_rate": 6.25e-05,
596
- "loss": 0.1092,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 1.7,
601
- "grad_norm": 1.4106309413909912,
602
  "learning_rate": 6.25e-05,
603
- "loss": 0.0951,
604
  "step": 425
605
  },
606
  {
607
  "epoch": 1.72,
608
- "grad_norm": 1.1167716979980469,
609
  "learning_rate": 6.25e-05,
610
- "loss": 0.0899,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 1.74,
615
- "grad_norm": 1.2029541730880737,
616
  "learning_rate": 6.25e-05,
617
- "loss": 0.0902,
618
  "step": 435
619
  },
620
  {
621
  "epoch": 1.76,
622
- "grad_norm": 1.0979869365692139,
623
  "learning_rate": 6.25e-05,
624
- "loss": 0.089,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 1.78,
629
- "grad_norm": 1.1568419933319092,
630
  "learning_rate": 6.25e-05,
631
- "loss": 0.0859,
632
  "step": 445
633
  },
634
  {
635
  "epoch": 1.8,
636
- "grad_norm": 1.2472410202026367,
637
  "learning_rate": 6.25e-05,
638
- "loss": 0.0877,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 1.8199999999999998,
643
- "grad_norm": 1.2323497533798218,
644
  "learning_rate": 6.25e-05,
645
- "loss": 0.0865,
646
  "step": 455
647
  },
648
  {
649
  "epoch": 1.8399999999999999,
650
- "grad_norm": 1.2814995050430298,
651
  "learning_rate": 6.25e-05,
652
- "loss": 0.0831,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 1.8599999999999999,
657
- "grad_norm": 0.912714421749115,
658
  "learning_rate": 6.25e-05,
659
- "loss": 0.0813,
660
  "step": 465
661
  },
662
  {
663
  "epoch": 1.88,
664
- "grad_norm": 1.2273714542388916,
665
  "learning_rate": 6.25e-05,
666
- "loss": 0.0884,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.9,
671
- "grad_norm": 1.5928541421890259,
672
  "learning_rate": 6.25e-05,
673
- "loss": 0.0852,
674
  "step": 475
675
  },
676
  {
677
  "epoch": 1.92,
678
- "grad_norm": 0.9074931740760803,
679
  "learning_rate": 6.25e-05,
680
- "loss": 0.0792,
681
  "step": 480
682
  },
683
  {
684
  "epoch": 1.94,
685
- "grad_norm": 0.9795681834220886,
686
  "learning_rate": 6.25e-05,
687
- "loss": 0.0781,
688
  "step": 485
689
  },
690
  {
691
  "epoch": 1.96,
692
- "grad_norm": 1.4303114414215088,
693
  "learning_rate": 6.25e-05,
694
- "loss": 0.0757,
695
  "step": 490
696
  },
697
  {
698
  "epoch": 1.98,
699
- "grad_norm": 1.0313260555267334,
700
  "learning_rate": 6.25e-05,
701
- "loss": 0.0881,
702
  "step": 495
703
  },
704
  {
705
  "epoch": 2.0,
706
- "grad_norm": 1.1294418573379517,
707
  "learning_rate": 6.25e-05,
708
- "loss": 0.0785,
709
  "step": 500
710
  },
711
  {
712
  "epoch": 2.0,
713
- "eval_cer": 0.05613677854233956,
714
- "eval_loss": 0.08199143409729004,
715
- "eval_runtime": 495.5562,
716
- "eval_samples_per_second": 2.018,
717
- "eval_steps_per_second": 0.504,
718
  "step": 500
719
  },
720
  {
721
  "epoch": 2.02,
722
- "grad_norm": 1.0397248268127441,
723
  "learning_rate": 6.25e-05,
724
- "loss": 0.0591,
725
  "step": 505
726
  },
727
  {
728
  "epoch": 2.04,
729
- "grad_norm": 0.8539375066757202,
730
  "learning_rate": 6.25e-05,
731
- "loss": 0.0664,
732
  "step": 510
733
  },
734
  {
735
  "epoch": 2.06,
736
- "grad_norm": 1.3555073738098145,
737
  "learning_rate": 6.25e-05,
738
- "loss": 0.0772,
739
  "step": 515
740
  },
741
  {
742
  "epoch": 2.08,
743
- "grad_norm": 1.6025832891464233,
744
  "learning_rate": 6.25e-05,
745
- "loss": 0.0737,
746
  "step": 520
747
  },
748
  {
749
  "epoch": 2.1,
750
- "grad_norm": 1.3090434074401855,
751
  "learning_rate": 6.25e-05,
752
- "loss": 0.0619,
753
  "step": 525
754
  },
755
  {
756
  "epoch": 2.12,
757
- "grad_norm": 0.9269134998321533,
758
  "learning_rate": 6.25e-05,
759
- "loss": 0.0679,
760
  "step": 530
761
  },
762
  {
763
  "epoch": 2.14,
764
- "grad_norm": 0.8540180325508118,
765
  "learning_rate": 6.25e-05,
766
- "loss": 0.0718,
767
  "step": 535
768
  },
769
  {
770
  "epoch": 2.16,
771
- "grad_norm": 0.9320145845413208,
772
  "learning_rate": 6.25e-05,
773
- "loss": 0.0769,
774
  "step": 540
775
  },
776
  {
777
  "epoch": 2.18,
778
- "grad_norm": 1.6879560947418213,
779
  "learning_rate": 6.25e-05,
780
- "loss": 0.0765,
781
  "step": 545
782
  },
783
  {
784
  "epoch": 2.2,
785
- "grad_norm": 1.3687632083892822,
786
  "learning_rate": 6.25e-05,
787
- "loss": 0.0769,
788
  "step": 550
789
  },
790
  {
791
  "epoch": 2.22,
792
- "grad_norm": 1.2760627269744873,
793
  "learning_rate": 6.25e-05,
794
- "loss": 0.073,
795
  "step": 555
796
  },
797
  {
798
  "epoch": 2.24,
799
- "grad_norm": 0.8481590747833252,
800
  "learning_rate": 6.25e-05,
801
- "loss": 0.0669,
802
  "step": 560
803
  },
804
  {
805
  "epoch": 2.26,
806
- "grad_norm": 1.3147085905075073,
807
  "learning_rate": 6.25e-05,
808
- "loss": 0.0626,
809
  "step": 565
810
  },
811
  {
812
  "epoch": 2.2800000000000002,
813
- "grad_norm": 1.1458756923675537,
814
  "learning_rate": 6.25e-05,
815
- "loss": 0.0665,
816
  "step": 570
817
  },
818
  {
819
  "epoch": 2.3,
820
- "grad_norm": 1.0779470205307007,
821
  "learning_rate": 6.25e-05,
822
- "loss": 0.0783,
823
  "step": 575
824
  },
825
  {
826
  "epoch": 2.32,
827
- "grad_norm": 1.0333281755447388,
828
  "learning_rate": 6.25e-05,
829
- "loss": 0.0605,
830
  "step": 580
831
  },
832
  {
833
  "epoch": 2.34,
834
- "grad_norm": 1.1638994216918945,
835
  "learning_rate": 6.25e-05,
836
- "loss": 0.0644,
837
  "step": 585
838
  },
839
  {
840
  "epoch": 2.36,
841
- "grad_norm": 0.9482213258743286,
842
  "learning_rate": 6.25e-05,
843
- "loss": 0.0689,
844
  "step": 590
845
  },
846
  {
847
  "epoch": 2.38,
848
- "grad_norm": 0.9697607755661011,
849
  "learning_rate": 6.25e-05,
850
- "loss": 0.0628,
851
  "step": 595
852
  },
853
  {
854
  "epoch": 2.4,
855
- "grad_norm": 1.0675064325332642,
856
  "learning_rate": 6.25e-05,
857
- "loss": 0.0644,
858
  "step": 600
859
  },
860
  {
861
  "epoch": 2.42,
862
- "grad_norm": 1.0749262571334839,
863
  "learning_rate": 6.25e-05,
864
- "loss": 0.0572,
865
  "step": 605
866
  },
867
  {
868
  "epoch": 2.44,
869
- "grad_norm": 1.2545385360717773,
870
  "learning_rate": 6.25e-05,
871
- "loss": 0.0601,
872
  "step": 610
873
  },
874
  {
875
  "epoch": 2.46,
876
- "grad_norm": 1.1135083436965942,
877
  "learning_rate": 6.25e-05,
878
- "loss": 0.0607,
879
  "step": 615
880
  },
881
  {
882
  "epoch": 2.48,
883
- "grad_norm": 0.821413516998291,
884
  "learning_rate": 6.25e-05,
885
- "loss": 0.0622,
886
  "step": 620
887
  },
888
  {
889
  "epoch": 2.5,
890
- "grad_norm": 0.8959715366363525,
891
  "learning_rate": 6.25e-05,
892
- "loss": 0.073,
893
  "step": 625
894
  },
895
  {
896
  "epoch": 2.52,
897
- "grad_norm": 0.6712917685508728,
898
  "learning_rate": 6.25e-05,
899
- "loss": 0.0649,
900
  "step": 630
901
  },
902
  {
903
  "epoch": 2.54,
904
- "grad_norm": 0.6646750569343567,
905
  "learning_rate": 6.25e-05,
906
- "loss": 0.07,
907
  "step": 635
908
  },
909
  {
910
  "epoch": 2.56,
911
- "grad_norm": 1.0652884244918823,
912
  "learning_rate": 6.25e-05,
913
- "loss": 0.0657,
914
  "step": 640
915
  },
916
  {
917
  "epoch": 2.58,
918
- "grad_norm": 1.035218596458435,
919
  "learning_rate": 6.25e-05,
920
- "loss": 0.0789,
921
  "step": 645
922
  },
923
  {
924
  "epoch": 2.6,
925
- "grad_norm": 0.860249936580658,
926
  "learning_rate": 6.25e-05,
927
- "loss": 0.0589,
928
  "step": 650
929
  },
930
  {
931
  "epoch": 2.62,
932
- "grad_norm": 0.7494838237762451,
933
  "learning_rate": 6.25e-05,
934
- "loss": 0.077,
935
  "step": 655
936
  },
937
  {
938
  "epoch": 2.64,
939
- "grad_norm": 1.524198055267334,
940
  "learning_rate": 6.25e-05,
941
- "loss": 0.0727,
942
  "step": 660
943
  },
944
  {
945
  "epoch": 2.66,
946
- "grad_norm": 0.9438517689704895,
947
  "learning_rate": 6.25e-05,
948
- "loss": 0.0641,
949
  "step": 665
950
  },
951
  {
952
  "epoch": 2.68,
953
- "grad_norm": 1.0982081890106201,
954
  "learning_rate": 6.25e-05,
955
- "loss": 0.0647,
956
  "step": 670
957
  },
958
  {
959
  "epoch": 2.7,
960
- "grad_norm": 0.7919325232505798,
961
  "learning_rate": 6.25e-05,
962
- "loss": 0.0689,
963
  "step": 675
964
  },
965
  {
966
  "epoch": 2.7199999999999998,
967
- "grad_norm": 0.9766571521759033,
968
  "learning_rate": 6.25e-05,
969
- "loss": 0.0671,
970
  "step": 680
971
  },
972
  {
973
  "epoch": 2.74,
974
- "grad_norm": 1.0895709991455078,
975
  "learning_rate": 6.25e-05,
976
- "loss": 0.0681,
977
  "step": 685
978
  },
979
  {
980
  "epoch": 2.76,
981
- "grad_norm": 1.1461646556854248,
982
  "learning_rate": 6.25e-05,
983
- "loss": 0.0729,
984
  "step": 690
985
  },
986
  {
987
  "epoch": 2.7800000000000002,
988
- "grad_norm": 0.7813361883163452,
989
  "learning_rate": 6.25e-05,
990
- "loss": 0.0651,
991
  "step": 695
992
  },
993
  {
994
  "epoch": 2.8,
995
- "grad_norm": 0.8545769453048706,
996
  "learning_rate": 6.25e-05,
997
- "loss": 0.064,
998
  "step": 700
999
  },
1000
  {
1001
  "epoch": 2.82,
1002
- "grad_norm": 0.8444038033485413,
1003
  "learning_rate": 6.25e-05,
1004
- "loss": 0.0626,
1005
  "step": 705
1006
  },
1007
  {
1008
  "epoch": 2.84,
1009
- "grad_norm": 1.022660732269287,
1010
  "learning_rate": 6.25e-05,
1011
- "loss": 0.0644,
1012
  "step": 710
1013
  },
1014
  {
1015
  "epoch": 2.86,
1016
- "grad_norm": 0.7192943096160889,
1017
  "learning_rate": 6.25e-05,
1018
- "loss": 0.0605,
1019
  "step": 715
1020
  },
1021
  {
1022
  "epoch": 2.88,
1023
- "grad_norm": 0.8225955367088318,
1024
  "learning_rate": 6.25e-05,
1025
- "loss": 0.0657,
1026
  "step": 720
1027
  },
1028
  {
1029
  "epoch": 2.9,
1030
- "grad_norm": 0.8145541548728943,
1031
  "learning_rate": 6.25e-05,
1032
- "loss": 0.058,
1033
  "step": 725
1034
  },
1035
  {
1036
  "epoch": 2.92,
1037
- "grad_norm": 0.8709245920181274,
1038
  "learning_rate": 6.25e-05,
1039
- "loss": 0.0568,
1040
  "step": 730
1041
  },
1042
  {
1043
  "epoch": 2.94,
1044
- "grad_norm": 1.0601686239242554,
1045
  "learning_rate": 6.25e-05,
1046
- "loss": 0.0608,
1047
  "step": 735
1048
  },
1049
  {
1050
  "epoch": 2.96,
1051
- "grad_norm": 1.0230211019515991,
1052
  "learning_rate": 6.25e-05,
1053
- "loss": 0.0666,
1054
  "step": 740
1055
  },
1056
  {
1057
  "epoch": 2.98,
1058
- "grad_norm": 0.9883492588996887,
1059
  "learning_rate": 6.25e-05,
1060
- "loss": 0.062,
1061
  "step": 745
1062
  },
1063
  {
1064
  "epoch": 3.0,
1065
- "grad_norm": 1.0670173168182373,
1066
  "learning_rate": 6.25e-05,
1067
- "loss": 0.0724,
 
 
 
 
 
 
 
 
 
1068
  "step": 750
1069
  },
1070
  {
1071
  "epoch": 3.02,
1072
- "grad_norm": 1.0427318811416626,
1073
  "learning_rate": 6.25e-05,
1074
- "loss": 0.0477,
1075
  "step": 755
1076
  },
1077
  {
1078
  "epoch": 3.04,
1079
- "grad_norm": 1.355022668838501,
1080
  "learning_rate": 6.25e-05,
1081
- "loss": 0.0455,
1082
  "step": 760
1083
  },
1084
  {
1085
  "epoch": 3.06,
1086
- "grad_norm": 1.001657247543335,
1087
  "learning_rate": 6.25e-05,
1088
- "loss": 0.0605,
1089
  "step": 765
1090
  },
1091
  {
1092
  "epoch": 3.08,
1093
- "grad_norm": 1.4077788591384888,
1094
  "learning_rate": 6.25e-05,
1095
- "loss": 0.0463,
1096
  "step": 770
1097
  },
1098
  {
1099
  "epoch": 3.1,
1100
- "grad_norm": 1.3163388967514038,
1101
  "learning_rate": 6.25e-05,
1102
- "loss": 0.0581,
1103
  "step": 775
1104
  },
1105
  {
1106
  "epoch": 3.12,
1107
- "grad_norm": 0.6931395530700684,
1108
  "learning_rate": 6.25e-05,
1109
- "loss": 0.0531,
1110
  "step": 780
1111
  },
1112
  {
1113
  "epoch": 3.14,
1114
- "grad_norm": 0.648444652557373,
1115
  "learning_rate": 6.25e-05,
1116
- "loss": 0.0517,
1117
  "step": 785
1118
  },
1119
  {
1120
  "epoch": 3.16,
1121
- "grad_norm": 0.8961315751075745,
1122
  "learning_rate": 6.25e-05,
1123
- "loss": 0.0551,
1124
  "step": 790
1125
  },
1126
  {
1127
  "epoch": 3.18,
1128
- "grad_norm": 0.8918541669845581,
1129
  "learning_rate": 6.25e-05,
1130
- "loss": 0.0573,
1131
  "step": 795
1132
  },
1133
  {
1134
  "epoch": 3.2,
1135
- "grad_norm": 0.638659656047821,
1136
  "learning_rate": 6.25e-05,
1137
- "loss": 0.0544,
1138
  "step": 800
1139
  },
1140
  {
1141
  "epoch": 3.22,
1142
- "grad_norm": 0.6866273880004883,
1143
  "learning_rate": 6.25e-05,
1144
- "loss": 0.052,
1145
  "step": 805
1146
  },
1147
  {
1148
  "epoch": 3.24,
1149
- "grad_norm": 1.7238422632217407,
1150
  "learning_rate": 6.25e-05,
1151
- "loss": 0.0555,
1152
  "step": 810
1153
  },
1154
  {
1155
  "epoch": 3.26,
1156
- "grad_norm": 0.958077073097229,
1157
  "learning_rate": 6.25e-05,
1158
- "loss": 0.0606,
1159
  "step": 815
1160
  },
1161
  {
1162
  "epoch": 3.2800000000000002,
1163
- "grad_norm": 0.8000004887580872,
1164
  "learning_rate": 6.25e-05,
1165
- "loss": 0.05,
1166
  "step": 820
1167
  },
1168
  {
1169
  "epoch": 3.3,
1170
- "grad_norm": 0.7521831393241882,
1171
  "learning_rate": 6.25e-05,
1172
- "loss": 0.0486,
1173
  "step": 825
1174
  },
1175
  {
1176
  "epoch": 3.32,
1177
- "grad_norm": 0.8134447336196899,
1178
  "learning_rate": 6.25e-05,
1179
- "loss": 0.0557,
1180
  "step": 830
1181
  },
1182
  {
1183
  "epoch": 3.34,
1184
- "grad_norm": 1.1599044799804688,
1185
  "learning_rate": 6.25e-05,
1186
- "loss": 0.0596,
1187
  "step": 835
1188
  },
1189
  {
1190
  "epoch": 3.36,
1191
- "grad_norm": 0.5582810044288635,
1192
  "learning_rate": 6.25e-05,
1193
- "loss": 0.05,
1194
  "step": 840
1195
  },
1196
  {
1197
  "epoch": 3.38,
1198
- "grad_norm": 0.6436423659324646,
1199
  "learning_rate": 6.25e-05,
1200
- "loss": 0.048,
1201
  "step": 845
1202
  },
1203
  {
1204
  "epoch": 3.4,
1205
- "grad_norm": 1.0337690114974976,
1206
  "learning_rate": 6.25e-05,
1207
- "loss": 0.0512,
1208
  "step": 850
1209
  },
1210
  {
1211
  "epoch": 3.42,
1212
- "grad_norm": 1.2385281324386597,
1213
  "learning_rate": 6.25e-05,
1214
- "loss": 0.0541,
1215
  "step": 855
1216
  },
1217
  {
1218
  "epoch": 3.44,
1219
- "grad_norm": 1.335816740989685,
1220
  "learning_rate": 6.25e-05,
1221
- "loss": 0.054,
1222
  "step": 860
1223
  },
1224
  {
1225
  "epoch": 3.46,
1226
- "grad_norm": 0.8935145139694214,
1227
  "learning_rate": 6.25e-05,
1228
- "loss": 0.0529,
1229
  "step": 865
1230
  },
1231
  {
1232
  "epoch": 3.48,
1233
- "grad_norm": 0.897282600402832,
1234
  "learning_rate": 6.25e-05,
1235
- "loss": 0.0569,
1236
  "step": 870
1237
  },
1238
  {
1239
  "epoch": 3.5,
1240
- "grad_norm": 0.5967718362808228,
1241
  "learning_rate": 6.25e-05,
1242
- "loss": 0.0554,
1243
  "step": 875
1244
  },
1245
  {
1246
  "epoch": 3.52,
1247
- "grad_norm": 0.6769823431968689,
1248
  "learning_rate": 6.25e-05,
1249
- "loss": 0.0541,
1250
  "step": 880
1251
  },
1252
  {
1253
  "epoch": 3.54,
1254
- "grad_norm": 0.6052355170249939,
1255
  "learning_rate": 6.25e-05,
1256
- "loss": 0.0453,
1257
  "step": 885
1258
  },
1259
  {
1260
  "epoch": 3.56,
1261
- "grad_norm": 0.6003367900848389,
1262
  "learning_rate": 6.25e-05,
1263
- "loss": 0.0488,
1264
  "step": 890
1265
  },
1266
  {
1267
  "epoch": 3.58,
1268
- "grad_norm": 1.1685441732406616,
1269
  "learning_rate": 6.25e-05,
1270
- "loss": 0.0518,
1271
  "step": 895
1272
  },
1273
  {
1274
  "epoch": 3.6,
1275
- "grad_norm": 1.1867949962615967,
1276
  "learning_rate": 6.25e-05,
1277
- "loss": 0.0581,
1278
  "step": 900
1279
  },
1280
  {
1281
  "epoch": 3.62,
1282
- "grad_norm": 0.8192417025566101,
1283
  "learning_rate": 6.25e-05,
1284
- "loss": 0.0564,
1285
  "step": 905
1286
  },
1287
  {
1288
  "epoch": 3.64,
1289
- "grad_norm": 0.9395178556442261,
1290
  "learning_rate": 6.25e-05,
1291
- "loss": 0.057,
1292
  "step": 910
1293
  },
1294
  {
1295
  "epoch": 3.66,
1296
- "grad_norm": 0.8012380003929138,
1297
  "learning_rate": 6.25e-05,
1298
- "loss": 0.0492,
1299
  "step": 915
1300
  },
1301
  {
1302
  "epoch": 3.68,
1303
- "grad_norm": 0.6032869815826416,
1304
  "learning_rate": 6.25e-05,
1305
- "loss": 0.0551,
1306
  "step": 920
1307
  },
1308
  {
1309
  "epoch": 3.7,
1310
- "grad_norm": 0.6057426333427429,
1311
  "learning_rate": 6.25e-05,
1312
- "loss": 0.0648,
1313
  "step": 925
1314
  },
1315
  {
1316
  "epoch": 3.7199999999999998,
1317
- "grad_norm": 0.752521276473999,
1318
  "learning_rate": 6.25e-05,
1319
- "loss": 0.0519,
1320
  "step": 930
1321
  },
1322
  {
1323
  "epoch": 3.74,
1324
- "grad_norm": 0.9516021609306335,
1325
  "learning_rate": 6.25e-05,
1326
- "loss": 0.0594,
1327
  "step": 935
1328
  },
1329
  {
1330
  "epoch": 3.76,
1331
- "grad_norm": 1.0422921180725098,
1332
  "learning_rate": 6.25e-05,
1333
- "loss": 0.0547,
1334
  "step": 940
1335
  },
1336
  {
1337
  "epoch": 3.7800000000000002,
1338
- "grad_norm": 0.7628741264343262,
1339
  "learning_rate": 6.25e-05,
1340
- "loss": 0.0575,
1341
  "step": 945
1342
  },
1343
  {
1344
  "epoch": 3.8,
1345
- "grad_norm": 0.6722723841667175,
1346
  "learning_rate": 6.25e-05,
1347
- "loss": 0.0464,
1348
  "step": 950
1349
  },
1350
  {
1351
  "epoch": 3.82,
1352
- "grad_norm": 1.0617127418518066,
1353
  "learning_rate": 6.25e-05,
1354
- "loss": 0.0477,
1355
  "step": 955
1356
  },
1357
  {
1358
  "epoch": 3.84,
1359
- "grad_norm": 0.689552903175354,
1360
  "learning_rate": 6.25e-05,
1361
- "loss": 0.0462,
1362
  "step": 960
1363
  },
1364
  {
1365
  "epoch": 3.86,
1366
- "grad_norm": 0.7258830666542053,
1367
  "learning_rate": 6.25e-05,
1368
- "loss": 0.0543,
1369
  "step": 965
1370
  },
1371
  {
1372
  "epoch": 3.88,
1373
- "grad_norm": 0.9028825759887695,
1374
  "learning_rate": 6.25e-05,
1375
- "loss": 0.0561,
1376
  "step": 970
1377
  },
1378
  {
1379
  "epoch": 3.9,
1380
- "grad_norm": 1.1875150203704834,
1381
  "learning_rate": 6.25e-05,
1382
- "loss": 0.0477,
1383
  "step": 975
1384
  },
1385
  {
1386
  "epoch": 3.92,
1387
- "grad_norm": 1.2121100425720215,
1388
  "learning_rate": 6.25e-05,
1389
- "loss": 0.0541,
1390
  "step": 980
1391
  },
1392
  {
1393
  "epoch": 3.94,
1394
- "grad_norm": 0.7245278358459473,
1395
  "learning_rate": 6.25e-05,
1396
- "loss": 0.0499,
1397
  "step": 985
1398
  },
1399
  {
1400
  "epoch": 3.96,
1401
- "grad_norm": 0.7678513526916504,
1402
  "learning_rate": 6.25e-05,
1403
- "loss": 0.0548,
1404
  "step": 990
1405
  },
1406
  {
1407
  "epoch": 3.98,
1408
- "grad_norm": 0.621068000793457,
1409
  "learning_rate": 6.25e-05,
1410
- "loss": 0.0413,
1411
  "step": 995
1412
  },
1413
  {
1414
  "epoch": 4.0,
1415
- "grad_norm": 0.6947305798530579,
1416
  "learning_rate": 6.25e-05,
1417
- "loss": 0.0576,
1418
  "step": 1000
1419
  },
1420
  {
1421
  "epoch": 4.0,
1422
- "eval_cer": 0.04981744137059402,
1423
- "eval_loss": 0.06959603726863861,
1424
- "eval_runtime": 490.1374,
1425
- "eval_samples_per_second": 2.04,
1426
- "eval_steps_per_second": 0.51,
1427
  "step": 1000
1428
  },
1429
  {
1430
  "epoch": 4.02,
1431
- "grad_norm": 0.480295330286026,
1432
  "learning_rate": 6.25e-05,
1433
  "loss": 0.0432,
1434
  "step": 1005
1435
  },
1436
  {
1437
  "epoch": 4.04,
1438
- "grad_norm": 0.5454816818237305,
1439
  "learning_rate": 6.25e-05,
1440
- "loss": 0.0466,
1441
  "step": 1010
1442
  },
1443
  {
1444
  "epoch": 4.06,
1445
- "grad_norm": 0.5476812124252319,
1446
  "learning_rate": 6.25e-05,
1447
- "loss": 0.0426,
1448
  "step": 1015
1449
  },
1450
  {
1451
  "epoch": 4.08,
1452
- "grad_norm": 0.6141966581344604,
1453
  "learning_rate": 6.25e-05,
1454
- "loss": 0.0362,
1455
  "step": 1020
1456
  },
1457
  {
1458
  "epoch": 4.1,
1459
- "grad_norm": 0.5896350741386414,
1460
  "learning_rate": 6.25e-05,
1461
- "loss": 0.0402,
1462
  "step": 1025
1463
  },
1464
  {
1465
  "epoch": 4.12,
1466
- "grad_norm": 0.6800291538238525,
1467
  "learning_rate": 6.25e-05,
1468
- "loss": 0.0408,
1469
  "step": 1030
1470
  },
1471
  {
1472
  "epoch": 4.14,
1473
- "grad_norm": 1.0093313455581665,
1474
  "learning_rate": 6.25e-05,
1475
- "loss": 0.0412,
1476
  "step": 1035
1477
  },
1478
  {
1479
  "epoch": 4.16,
1480
- "grad_norm": 0.627837061882019,
1481
  "learning_rate": 6.25e-05,
1482
- "loss": 0.0417,
1483
  "step": 1040
1484
  },
1485
  {
1486
  "epoch": 4.18,
1487
- "grad_norm": 0.8183801770210266,
1488
  "learning_rate": 6.25e-05,
1489
- "loss": 0.0438,
1490
  "step": 1045
1491
  },
1492
  {
1493
  "epoch": 4.2,
1494
- "grad_norm": 0.6498594284057617,
1495
  "learning_rate": 6.25e-05,
1496
- "loss": 0.0482,
1497
  "step": 1050
1498
  },
1499
  {
1500
  "epoch": 4.22,
1501
- "grad_norm": 0.7509708404541016,
1502
  "learning_rate": 6.25e-05,
1503
- "loss": 0.0495,
1504
  "step": 1055
1505
  },
1506
  {
1507
  "epoch": 4.24,
1508
- "grad_norm": 0.6019679307937622,
1509
  "learning_rate": 6.25e-05,
1510
- "loss": 0.0389,
1511
  "step": 1060
1512
  },
1513
  {
1514
  "epoch": 4.26,
1515
- "grad_norm": 0.8582165837287903,
1516
  "learning_rate": 6.25e-05,
1517
- "loss": 0.0479,
1518
  "step": 1065
1519
  },
1520
  {
1521
  "epoch": 4.28,
1522
- "grad_norm": 0.5960012674331665,
1523
  "learning_rate": 6.25e-05,
1524
- "loss": 0.0461,
1525
  "step": 1070
1526
  },
1527
  {
1528
  "epoch": 4.3,
1529
- "grad_norm": 0.8688340783119202,
1530
  "learning_rate": 6.25e-05,
1531
- "loss": 0.0452,
1532
  "step": 1075
1533
  },
1534
  {
1535
  "epoch": 4.32,
1536
- "grad_norm": 0.924772322177887,
1537
  "learning_rate": 6.25e-05,
1538
- "loss": 0.0501,
1539
  "step": 1080
1540
  },
1541
  {
1542
  "epoch": 4.34,
1543
- "grad_norm": 0.6622461676597595,
1544
  "learning_rate": 6.25e-05,
1545
- "loss": 0.0409,
1546
  "step": 1085
1547
  },
1548
  {
1549
  "epoch": 4.36,
1550
- "grad_norm": 0.4645944833755493,
1551
  "learning_rate": 6.25e-05,
1552
- "loss": 0.0435,
1553
  "step": 1090
1554
  },
1555
  {
1556
  "epoch": 4.38,
1557
- "grad_norm": 0.6435367465019226,
1558
  "learning_rate": 6.25e-05,
1559
- "loss": 0.0422,
1560
  "step": 1095
1561
  },
1562
  {
1563
  "epoch": 4.4,
1564
- "grad_norm": 0.785290539264679,
1565
  "learning_rate": 6.25e-05,
1566
- "loss": 0.0521,
1567
  "step": 1100
1568
  },
1569
  {
1570
  "epoch": 4.42,
1571
- "grad_norm": 0.5324105024337769,
1572
  "learning_rate": 6.25e-05,
1573
- "loss": 0.038,
1574
  "step": 1105
1575
  },
1576
  {
1577
  "epoch": 4.44,
1578
- "grad_norm": 0.7105346322059631,
1579
  "learning_rate": 6.25e-05,
1580
- "loss": 0.0472,
1581
  "step": 1110
1582
  },
1583
  {
1584
  "epoch": 4.46,
1585
- "grad_norm": 0.5921624898910522,
1586
  "learning_rate": 6.25e-05,
1587
- "loss": 0.0384,
1588
  "step": 1115
1589
  },
1590
  {
1591
  "epoch": 4.48,
1592
- "grad_norm": 0.6056894659996033,
1593
  "learning_rate": 6.25e-05,
1594
- "loss": 0.0473,
1595
  "step": 1120
1596
  },
1597
  {
1598
  "epoch": 4.5,
1599
- "grad_norm": 0.6180445551872253,
1600
  "learning_rate": 6.25e-05,
1601
- "loss": 0.0535,
1602
  "step": 1125
1603
  },
1604
  {
1605
  "epoch": 4.52,
1606
- "grad_norm": 0.7500606775283813,
1607
  "learning_rate": 6.25e-05,
1608
- "loss": 0.0471,
1609
  "step": 1130
1610
  },
1611
  {
1612
  "epoch": 4.54,
1613
- "grad_norm": 1.4782558679580688,
1614
  "learning_rate": 6.25e-05,
1615
- "loss": 0.0498,
1616
  "step": 1135
1617
  },
1618
  {
1619
  "epoch": 4.5600000000000005,
1620
- "grad_norm": 0.7095569372177124,
1621
  "learning_rate": 6.25e-05,
1622
- "loss": 0.0451,
1623
  "step": 1140
1624
  },
1625
  {
1626
  "epoch": 4.58,
1627
- "grad_norm": 0.5338032841682434,
1628
  "learning_rate": 6.25e-05,
1629
- "loss": 0.0428,
1630
  "step": 1145
1631
  },
1632
  {
1633
  "epoch": 4.6,
1634
- "grad_norm": 0.6758018732070923,
1635
  "learning_rate": 6.25e-05,
1636
- "loss": 0.044,
1637
  "step": 1150
1638
  },
1639
  {
1640
  "epoch": 4.62,
1641
- "grad_norm": 0.6550755500793457,
1642
  "learning_rate": 6.25e-05,
1643
- "loss": 0.0455,
1644
  "step": 1155
1645
  },
1646
  {
1647
  "epoch": 4.64,
1648
- "grad_norm": 1.0559263229370117,
1649
  "learning_rate": 6.25e-05,
1650
- "loss": 0.0474,
1651
  "step": 1160
1652
  },
1653
  {
1654
  "epoch": 4.66,
1655
- "grad_norm": 0.6216355562210083,
1656
  "learning_rate": 6.25e-05,
1657
- "loss": 0.0401,
1658
  "step": 1165
1659
  },
1660
  {
1661
  "epoch": 4.68,
1662
- "grad_norm": 0.756000816822052,
1663
  "learning_rate": 6.25e-05,
1664
- "loss": 0.0492,
1665
  "step": 1170
1666
  },
1667
  {
1668
  "epoch": 4.7,
1669
- "grad_norm": 0.6948612928390503,
1670
  "learning_rate": 6.25e-05,
1671
- "loss": 0.0409,
1672
  "step": 1175
1673
  },
1674
  {
1675
  "epoch": 4.72,
1676
- "grad_norm": 0.705374002456665,
1677
  "learning_rate": 6.25e-05,
1678
- "loss": 0.0433,
1679
  "step": 1180
1680
  },
1681
  {
1682
  "epoch": 4.74,
1683
- "grad_norm": 0.683259129524231,
1684
  "learning_rate": 6.25e-05,
1685
- "loss": 0.0475,
1686
  "step": 1185
1687
  },
1688
  {
1689
  "epoch": 4.76,
1690
- "grad_norm": 0.7598377466201782,
1691
  "learning_rate": 6.25e-05,
1692
- "loss": 0.0394,
1693
  "step": 1190
1694
  },
1695
  {
1696
  "epoch": 4.78,
1697
- "grad_norm": 0.6855999231338501,
1698
  "learning_rate": 6.25e-05,
1699
- "loss": 0.0414,
1700
  "step": 1195
1701
  },
1702
  {
1703
  "epoch": 4.8,
1704
- "grad_norm": 0.7347081899642944,
1705
  "learning_rate": 6.25e-05,
1706
- "loss": 0.0429,
1707
  "step": 1200
1708
  },
1709
  {
1710
  "epoch": 4.82,
1711
- "grad_norm": 0.5922674536705017,
1712
  "learning_rate": 6.25e-05,
1713
- "loss": 0.04,
1714
  "step": 1205
1715
  },
1716
  {
1717
  "epoch": 4.84,
1718
- "grad_norm": 0.51841801404953,
1719
  "learning_rate": 6.25e-05,
1720
- "loss": 0.0415,
1721
  "step": 1210
1722
  },
1723
  {
1724
  "epoch": 4.86,
1725
- "grad_norm": 0.6993411183357239,
1726
  "learning_rate": 6.25e-05,
1727
- "loss": 0.0394,
1728
  "step": 1215
1729
  },
1730
  {
1731
  "epoch": 4.88,
1732
- "grad_norm": 0.8511929512023926,
1733
  "learning_rate": 6.25e-05,
1734
- "loss": 0.0484,
1735
  "step": 1220
1736
  },
1737
  {
1738
  "epoch": 4.9,
1739
- "grad_norm": 0.8586690425872803,
1740
  "learning_rate": 6.25e-05,
1741
- "loss": 0.0443,
1742
  "step": 1225
1743
  },
1744
  {
1745
  "epoch": 4.92,
1746
- "grad_norm": 0.49296078085899353,
1747
  "learning_rate": 6.25e-05,
1748
- "loss": 0.0494,
1749
  "step": 1230
1750
  },
1751
  {
1752
  "epoch": 4.9399999999999995,
1753
- "grad_norm": 0.6447588205337524,
1754
  "learning_rate": 6.25e-05,
1755
- "loss": 0.0474,
1756
  "step": 1235
1757
  },
1758
  {
1759
  "epoch": 4.96,
1760
- "grad_norm": 1.132325530052185,
1761
  "learning_rate": 6.25e-05,
1762
- "loss": 0.0489,
1763
  "step": 1240
1764
  },
1765
  {
1766
  "epoch": 4.98,
1767
- "grad_norm": 0.682565450668335,
1768
  "learning_rate": 6.25e-05,
1769
- "loss": 0.0523,
1770
  "step": 1245
1771
  },
1772
  {
1773
  "epoch": 5.0,
1774
- "grad_norm": 0.5855417847633362,
1775
  "learning_rate": 6.25e-05,
1776
- "loss": 0.0456,
 
 
 
 
 
 
 
 
 
1777
  "step": 1250
1778
  },
1779
  {
1780
  "epoch": 5.02,
1781
- "grad_norm": 0.7072991728782654,
1782
  "learning_rate": 6.25e-05,
1783
- "loss": 0.0381,
1784
  "step": 1255
1785
  },
1786
  {
1787
  "epoch": 5.04,
1788
- "grad_norm": 0.8943975567817688,
1789
  "learning_rate": 6.25e-05,
1790
- "loss": 0.0457,
1791
  "step": 1260
1792
  },
1793
  {
1794
  "epoch": 5.06,
1795
- "grad_norm": 0.7641831040382385,
1796
  "learning_rate": 6.25e-05,
1797
- "loss": 0.0408,
1798
  "step": 1265
1799
  },
1800
  {
1801
  "epoch": 5.08,
1802
- "grad_norm": 0.8705000281333923,
1803
  "learning_rate": 6.25e-05,
1804
- "loss": 0.0417,
1805
  "step": 1270
1806
  },
1807
  {
1808
  "epoch": 5.1,
1809
- "grad_norm": 0.6074972748756409,
1810
  "learning_rate": 6.25e-05,
1811
- "loss": 0.0363,
1812
  "step": 1275
1813
  },
1814
  {
1815
  "epoch": 5.12,
1816
- "grad_norm": 0.6037366390228271,
1817
  "learning_rate": 6.25e-05,
1818
- "loss": 0.0328,
1819
  "step": 1280
1820
  },
1821
  {
1822
  "epoch": 5.14,
1823
- "grad_norm": 0.5875853896141052,
1824
  "learning_rate": 6.25e-05,
1825
- "loss": 0.042,
1826
  "step": 1285
1827
  },
1828
  {
1829
  "epoch": 5.16,
1830
- "grad_norm": 0.6076292991638184,
1831
  "learning_rate": 6.25e-05,
1832
- "loss": 0.0394,
1833
  "step": 1290
1834
  },
1835
  {
1836
  "epoch": 5.18,
1837
- "grad_norm": 0.6495656967163086,
1838
  "learning_rate": 6.25e-05,
1839
- "loss": 0.0379,
1840
  "step": 1295
1841
  },
1842
  {
1843
  "epoch": 5.2,
1844
- "grad_norm": 0.7505232095718384,
1845
  "learning_rate": 6.25e-05,
1846
- "loss": 0.0396,
1847
  "step": 1300
1848
  },
1849
  {
1850
  "epoch": 5.22,
1851
- "grad_norm": 0.792845606803894,
1852
  "learning_rate": 6.25e-05,
1853
- "loss": 0.0352,
1854
  "step": 1305
1855
  },
1856
  {
1857
  "epoch": 5.24,
1858
- "grad_norm": 0.7115448117256165,
1859
  "learning_rate": 6.25e-05,
1860
- "loss": 0.0358,
1861
  "step": 1310
1862
  },
1863
  {
1864
  "epoch": 5.26,
1865
- "grad_norm": 0.9783137440681458,
1866
  "learning_rate": 6.25e-05,
1867
- "loss": 0.0375,
1868
  "step": 1315
1869
  },
1870
  {
1871
  "epoch": 5.28,
1872
- "grad_norm": 0.43996962904930115,
1873
  "learning_rate": 6.25e-05,
1874
- "loss": 0.0304,
1875
  "step": 1320
1876
  },
1877
  {
1878
  "epoch": 5.3,
1879
- "grad_norm": 0.5700478553771973,
1880
  "learning_rate": 6.25e-05,
1881
- "loss": 0.0407,
1882
  "step": 1325
1883
  },
1884
  {
1885
  "epoch": 5.32,
1886
- "grad_norm": 0.7441264390945435,
1887
  "learning_rate": 6.25e-05,
1888
- "loss": 0.0428,
1889
  "step": 1330
1890
  },
1891
  {
1892
  "epoch": 5.34,
1893
- "grad_norm": 0.5275558829307556,
1894
  "learning_rate": 6.25e-05,
1895
- "loss": 0.0403,
1896
  "step": 1335
1897
  },
1898
  {
1899
  "epoch": 5.36,
1900
- "grad_norm": 0.7380858063697815,
1901
  "learning_rate": 6.25e-05,
1902
- "loss": 0.0353,
1903
  "step": 1340
1904
  },
1905
  {
1906
  "epoch": 5.38,
1907
- "grad_norm": 0.5178393721580505,
1908
  "learning_rate": 6.25e-05,
1909
- "loss": 0.0384,
1910
  "step": 1345
1911
  },
1912
  {
1913
  "epoch": 5.4,
1914
- "grad_norm": 0.7126160264015198,
1915
  "learning_rate": 6.25e-05,
1916
- "loss": 0.0356,
1917
  "step": 1350
1918
  },
1919
  {
1920
  "epoch": 5.42,
1921
- "grad_norm": 0.48968952894210815,
1922
  "learning_rate": 6.25e-05,
1923
- "loss": 0.0346,
1924
  "step": 1355
1925
  },
1926
  {
1927
  "epoch": 5.44,
1928
- "grad_norm": 0.6505790948867798,
1929
  "learning_rate": 6.25e-05,
1930
- "loss": 0.0362,
1931
  "step": 1360
1932
  },
1933
  {
1934
  "epoch": 5.46,
1935
- "grad_norm": 0.7864866852760315,
1936
  "learning_rate": 6.25e-05,
1937
- "loss": 0.0332,
1938
  "step": 1365
1939
  },
1940
  {
1941
  "epoch": 5.48,
1942
- "grad_norm": 1.0162925720214844,
1943
  "learning_rate": 6.25e-05,
1944
- "loss": 0.0514,
1945
  "step": 1370
1946
  },
1947
  {
1948
  "epoch": 5.5,
1949
- "grad_norm": 0.5575432181358337,
1950
  "learning_rate": 6.25e-05,
1951
- "loss": 0.037,
1952
  "step": 1375
1953
  },
1954
  {
1955
  "epoch": 5.52,
1956
- "grad_norm": 0.5482836365699768,
1957
  "learning_rate": 6.25e-05,
1958
- "loss": 0.0379,
1959
  "step": 1380
1960
  },
1961
  {
1962
  "epoch": 5.54,
1963
- "grad_norm": 0.5818304419517517,
1964
  "learning_rate": 6.25e-05,
1965
- "loss": 0.0367,
1966
  "step": 1385
1967
  },
1968
  {
1969
  "epoch": 5.5600000000000005,
1970
- "grad_norm": 0.7295159697532654,
1971
  "learning_rate": 6.25e-05,
1972
- "loss": 0.0506,
1973
  "step": 1390
1974
  },
1975
  {
1976
  "epoch": 5.58,
1977
- "grad_norm": 0.68539959192276,
1978
  "learning_rate": 6.25e-05,
1979
- "loss": 0.0393,
1980
  "step": 1395
1981
  },
1982
  {
1983
  "epoch": 5.6,
1984
- "grad_norm": 0.736714243888855,
1985
  "learning_rate": 6.25e-05,
1986
- "loss": 0.0301,
1987
  "step": 1400
1988
  },
1989
  {
1990
  "epoch": 5.62,
1991
- "grad_norm": 0.49973201751708984,
1992
  "learning_rate": 6.25e-05,
1993
- "loss": 0.0347,
1994
  "step": 1405
1995
  },
1996
  {
1997
  "epoch": 5.64,
1998
- "grad_norm": 0.35752153396606445,
1999
  "learning_rate": 6.25e-05,
2000
- "loss": 0.036,
2001
  "step": 1410
2002
  },
2003
  {
2004
  "epoch": 5.66,
2005
- "grad_norm": 0.4985082447528839,
2006
  "learning_rate": 6.25e-05,
2007
- "loss": 0.0404,
2008
  "step": 1415
2009
  },
2010
  {
2011
  "epoch": 5.68,
2012
- "grad_norm": 0.5711241364479065,
2013
  "learning_rate": 6.25e-05,
2014
- "loss": 0.041,
2015
  "step": 1420
2016
  },
2017
  {
2018
  "epoch": 5.7,
2019
- "grad_norm": 0.8970484137535095,
2020
  "learning_rate": 6.25e-05,
2021
- "loss": 0.0343,
2022
  "step": 1425
2023
  },
2024
  {
2025
  "epoch": 5.72,
2026
- "grad_norm": 0.46022269129753113,
2027
  "learning_rate": 6.25e-05,
2028
- "loss": 0.035,
2029
  "step": 1430
2030
  },
2031
  {
2032
  "epoch": 5.74,
2033
- "grad_norm": 0.8625376224517822,
2034
  "learning_rate": 6.25e-05,
2035
- "loss": 0.0426,
2036
  "step": 1435
2037
  },
2038
  {
2039
  "epoch": 5.76,
2040
- "grad_norm": 0.7475701570510864,
2041
  "learning_rate": 6.25e-05,
2042
- "loss": 0.042,
2043
  "step": 1440
2044
  },
2045
  {
2046
  "epoch": 5.78,
2047
- "grad_norm": 0.5920872688293457,
2048
  "learning_rate": 6.25e-05,
2049
- "loss": 0.0369,
2050
  "step": 1445
2051
  },
2052
  {
2053
  "epoch": 5.8,
2054
- "grad_norm": 0.41128072142601013,
2055
  "learning_rate": 6.25e-05,
2056
- "loss": 0.0327,
2057
  "step": 1450
2058
  },
2059
  {
2060
  "epoch": 5.82,
2061
- "grad_norm": 0.5794464945793152,
2062
  "learning_rate": 6.25e-05,
2063
  "loss": 0.0414,
2064
  "step": 1455
2065
  },
2066
  {
2067
  "epoch": 5.84,
2068
- "grad_norm": 0.6729599833488464,
2069
  "learning_rate": 6.25e-05,
2070
- "loss": 0.0423,
2071
  "step": 1460
2072
  },
2073
  {
2074
  "epoch": 5.86,
2075
- "grad_norm": 0.9547153115272522,
2076
  "learning_rate": 6.25e-05,
2077
- "loss": 0.0402,
2078
  "step": 1465
2079
  },
2080
  {
2081
  "epoch": 5.88,
2082
- "grad_norm": 0.7813019752502441,
2083
  "learning_rate": 6.25e-05,
2084
- "loss": 0.0388,
2085
  "step": 1470
2086
  },
2087
  {
2088
  "epoch": 5.9,
2089
- "grad_norm": 0.6496958136558533,
2090
  "learning_rate": 6.25e-05,
2091
- "loss": 0.0375,
2092
  "step": 1475
2093
  },
2094
  {
2095
  "epoch": 5.92,
2096
- "grad_norm": 0.5207449793815613,
2097
  "learning_rate": 6.25e-05,
2098
- "loss": 0.0326,
2099
  "step": 1480
2100
  },
2101
  {
2102
  "epoch": 5.9399999999999995,
2103
- "grad_norm": 0.5855777263641357,
2104
  "learning_rate": 6.25e-05,
2105
- "loss": 0.0415,
2106
  "step": 1485
2107
  },
2108
  {
2109
  "epoch": 5.96,
2110
- "grad_norm": 0.505793035030365,
2111
  "learning_rate": 6.25e-05,
2112
- "loss": 0.0365,
2113
  "step": 1490
2114
  },
2115
  {
2116
  "epoch": 5.98,
2117
- "grad_norm": 0.7970417737960815,
2118
  "learning_rate": 6.25e-05,
2119
- "loss": 0.0426,
2120
  "step": 1495
2121
  },
2122
  {
2123
  "epoch": 6.0,
2124
- "grad_norm": 0.6662250757217407,
2125
  "learning_rate": 6.25e-05,
2126
- "loss": 0.0381,
2127
  "step": 1500
2128
  },
2129
  {
2130
  "epoch": 6.0,
2131
- "eval_cer": 0.07167181575621402,
2132
- "eval_loss": 0.0693308636546135,
2133
- "eval_runtime": 496.5135,
2134
- "eval_samples_per_second": 2.014,
2135
- "eval_steps_per_second": 0.504,
2136
  "step": 1500
2137
  }
2138
  ],
@@ -2140,7 +2167,7 @@
2140
  "max_steps": 5000,
2141
  "num_input_tokens_seen": 0,
2142
  "num_train_epochs": 20,
2143
- "save_steps": 500,
2144
  "stateful_callbacks": {
2145
  "TrainerControl": {
2146
  "args": {
 
1
  {
2
+ "best_metric": 0.046749928297655986,
3
  "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
  "epoch": 6.0,
5
+ "eval_steps": 250,
6
  "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
+ "grad_norm": 11.320270538330078,
14
+ "learning_rate": 1.25e-06,
15
+ "loss": 3.7364,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.04,
20
+ "grad_norm": 10.51279354095459,
21
+ "learning_rate": 2.8124999999999998e-06,
22
+ "loss": 3.669,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
+ "grad_norm": 10.274462699890137,
28
+ "learning_rate": 4.3750000000000005e-06,
29
+ "loss": 3.6416,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.08,
34
+ "grad_norm": 10.032905578613281,
35
+ "learning_rate": 5.9375e-06,
36
+ "loss": 3.5981,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.1,
41
+ "grad_norm": 9.448946952819824,
42
+ "learning_rate": 7.1875e-06,
43
+ "loss": 3.4937,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.12,
48
+ "grad_norm": 8.844466209411621,
49
+ "learning_rate": 8.750000000000001e-06,
50
+ "loss": 3.3145,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.14,
55
+ "grad_norm": 8.622856140136719,
56
+ "learning_rate": 1.03125e-05,
57
+ "loss": 3.0978,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.16,
62
+ "grad_norm": 6.81293249130249,
63
+ "learning_rate": 1.1875e-05,
64
+ "loss": 2.9583,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.18,
69
+ "grad_norm": 5.21970272064209,
70
+ "learning_rate": 1.34375e-05,
71
+ "loss": 2.7668,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2,
76
+ "grad_norm": 4.440727710723877,
77
+ "learning_rate": 1.5e-05,
78
+ "loss": 2.5467,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.22,
83
+ "grad_norm": 4.219883918762207,
84
+ "learning_rate": 1.6562500000000003e-05,
85
+ "loss": 2.3237,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.24,
90
+ "grad_norm": 3.6006925106048584,
91
+ "learning_rate": 1.8125e-05,
92
+ "loss": 2.1307,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.26,
97
+ "grad_norm": 3.07859206199646,
98
+ "learning_rate": 1.96875e-05,
99
+ "loss": 1.9725,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.28,
104
+ "grad_norm": 3.0515847206115723,
105
+ "learning_rate": 2.125e-05,
106
+ "loss": 1.8568,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.3,
111
+ "grad_norm": 2.7134475708007812,
112
+ "learning_rate": 2.28125e-05,
113
+ "loss": 1.6583,
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.32,
118
+ "grad_norm": 2.7643449306488037,
119
+ "learning_rate": 2.4375000000000003e-05,
120
+ "loss": 1.4433,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.34,
125
+ "grad_norm": 2.427220582962036,
126
+ "learning_rate": 2.59375e-05,
127
+ "loss": 1.3435,
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.36,
132
+ "grad_norm": 2.0288472175598145,
133
+ "learning_rate": 2.75e-05,
134
+ "loss": 1.2446,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.38,
139
+ "grad_norm": 2.7967100143432617,
140
+ "learning_rate": 2.90625e-05,
141
+ "loss": 1.1031,
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.4,
146
+ "grad_norm": 2.407944440841675,
147
+ "learning_rate": 3.0625e-05,
148
+ "loss": 0.9777,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.42,
153
+ "grad_norm": 3.4412190914154053,
154
+ "learning_rate": 3.21875e-05,
155
+ "loss": 0.9294,
156
  "step": 105
157
  },
158
  {
159
  "epoch": 0.44,
160
+ "grad_norm": 2.0439155101776123,
161
+ "learning_rate": 3.375e-05,
162
+ "loss": 0.8108,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.46,
167
+ "grad_norm": 1.616652011871338,
168
+ "learning_rate": 3.53125e-05,
169
+ "loss": 0.6951,
170
  "step": 115
171
  },
172
  {
173
  "epoch": 0.48,
174
+ "grad_norm": 1.690824031829834,
175
+ "learning_rate": 3.6875e-05,
176
+ "loss": 0.6352,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.5,
181
+ "grad_norm": 2.7902116775512695,
182
+ "learning_rate": 3.84375e-05,
183
+ "loss": 0.5562,
184
  "step": 125
185
  },
186
  {
187
  "epoch": 0.52,
188
+ "grad_norm": 2.4872801303863525,
189
+ "learning_rate": 4e-05,
190
+ "loss": 0.5111,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.54,
195
+ "grad_norm": 2.4177122116088867,
196
+ "learning_rate": 4.15625e-05,
197
+ "loss": 0.4696,
198
  "step": 135
199
  },
200
  {
201
  "epoch": 0.56,
202
+ "grad_norm": 2.0231056213378906,
203
+ "learning_rate": 4.3125e-05,
204
+ "loss": 0.4462,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.58,
209
+ "grad_norm": 2.004688024520874,
210
+ "learning_rate": 4.46875e-05,
211
+ "loss": 0.4224,
212
  "step": 145
213
  },
214
  {
215
  "epoch": 0.6,
216
+ "grad_norm": 3.170652389526367,
217
+ "learning_rate": 4.625e-05,
218
+ "loss": 0.3967,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.62,
223
+ "grad_norm": 1.8620476722717285,
224
+ "learning_rate": 4.7812500000000003e-05,
225
+ "loss": 0.3739,
226
  "step": 155
227
  },
228
  {
229
  "epoch": 0.64,
230
+ "grad_norm": 2.4667856693267822,
231
+ "learning_rate": 4.9375e-05,
232
+ "loss": 0.3542,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.66,
237
+ "grad_norm": 1.7935612201690674,
238
+ "learning_rate": 5.09375e-05,
239
+ "loss": 0.3409,
240
  "step": 165
241
  },
242
  {
243
  "epoch": 0.68,
244
+ "grad_norm": 1.6230987310409546,
245
+ "learning_rate": 5.25e-05,
246
+ "loss": 0.3068,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.7,
251
+ "grad_norm": 2.739957094192505,
252
+ "learning_rate": 5.40625e-05,
253
+ "loss": 0.2963,
254
  "step": 175
255
  },
256
  {
257
  "epoch": 0.72,
258
+ "grad_norm": 1.7342944145202637,
259
+ "learning_rate": 5.5625000000000004e-05,
260
+ "loss": 0.253,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.74,
265
+ "grad_norm": 2.0191333293914795,
266
+ "learning_rate": 5.71875e-05,
267
+ "loss": 0.2175,
268
  "step": 185
269
  },
270
  {
271
  "epoch": 0.76,
272
+ "grad_norm": 1.6039254665374756,
273
+ "learning_rate": 5.875e-05,
274
+ "loss": 0.2009,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.78,
279
+ "grad_norm": 2.2860054969787598,
280
+ "learning_rate": 6.03125e-05,
281
+ "loss": 0.1774,
282
  "step": 195
283
  },
284
  {
285
  "epoch": 0.8,
286
+ "grad_norm": 1.528680443763733,
287
+ "learning_rate": 6.1875e-05,
288
+ "loss": 0.1603,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.82,
293
+ "grad_norm": 1.526693344116211,
294
+ "learning_rate": 6.25e-05,
295
+ "loss": 0.1504,
296
  "step": 205
297
  },
298
  {
299
  "epoch": 0.84,
300
+ "grad_norm": 2.199506998062134,
301
+ "learning_rate": 6.25e-05,
302
+ "loss": 0.1357,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.86,
307
+ "grad_norm": 2.170020341873169,
308
+ "learning_rate": 6.25e-05,
309
+ "loss": 0.1519,
310
  "step": 215
311
  },
312
  {
313
  "epoch": 0.88,
314
+ "grad_norm": 1.5418131351470947,
315
+ "learning_rate": 6.25e-05,
316
+ "loss": 0.1524,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.9,
321
+ "grad_norm": 2.1583192348480225,
322
+ "learning_rate": 6.25e-05,
323
+ "loss": 0.1264,
324
  "step": 225
325
  },
326
  {
327
  "epoch": 0.92,
328
+ "grad_norm": 1.908937692642212,
329
+ "learning_rate": 6.25e-05,
330
+ "loss": 0.1221,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.94,
335
+ "grad_norm": 1.4072145223617554,
336
+ "learning_rate": 6.25e-05,
337
+ "loss": 0.1254,
338
  "step": 235
339
  },
340
  {
341
  "epoch": 0.96,
342
+ "grad_norm": 1.3102571964263916,
343
+ "learning_rate": 6.25e-05,
344
+ "loss": 0.1412,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.98,
349
+ "grad_norm": 1.4941678047180176,
350
+ "learning_rate": 6.25e-05,
351
+ "loss": 0.1203,
352
  "step": 245
353
  },
354
  {
355
  "epoch": 1.0,
356
+ "grad_norm": 1.8279727697372437,
357
+ "learning_rate": 6.25e-05,
358
+ "loss": 0.1107,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "eval_cer": 0.07099835736448257,
364
+ "eval_loss": 0.11958163231611252,
365
+ "eval_runtime": 280.2233,
366
+ "eval_samples_per_second": 1.784,
367
+ "eval_steps_per_second": 0.446,
368
  "step": 250
369
  },
370
  {
371
  "epoch": 1.02,
372
+ "grad_norm": 1.0286716222763062,
373
+ "learning_rate": 6.25e-05,
374
+ "loss": 0.1047,
375
  "step": 255
376
  },
377
  {
378
  "epoch": 1.04,
379
+ "grad_norm": 1.7464964389801025,
380
+ "learning_rate": 6.25e-05,
381
+ "loss": 0.1108,
382
  "step": 260
383
  },
384
  {
385
  "epoch": 1.06,
386
+ "grad_norm": 1.3992992639541626,
387
+ "learning_rate": 6.25e-05,
388
+ "loss": 0.1176,
389
  "step": 265
390
  },
391
  {
392
  "epoch": 1.08,
393
+ "grad_norm": 1.150010347366333,
394
+ "learning_rate": 6.25e-05,
395
+ "loss": 0.1059,
396
  "step": 270
397
  },
398
  {
399
  "epoch": 1.1,
400
+ "grad_norm": 1.983775019645691,
401
+ "learning_rate": 6.25e-05,
402
+ "loss": 0.1223,
403
  "step": 275
404
  },
405
  {
406
  "epoch": 1.12,
407
+ "grad_norm": 1.0895100831985474,
408
+ "learning_rate": 6.25e-05,
409
+ "loss": 0.0929,
410
  "step": 280
411
  },
412
  {
413
  "epoch": 1.1400000000000001,
414
+ "grad_norm": 1.631362795829773,
415
+ "learning_rate": 6.25e-05,
416
+ "loss": 0.1005,
417
  "step": 285
418
  },
419
  {
420
  "epoch": 1.16,
421
+ "grad_norm": 1.39686918258667,
422
+ "learning_rate": 6.25e-05,
423
+ "loss": 0.1022,
424
  "step": 290
425
  },
426
  {
427
  "epoch": 1.18,
428
+ "grad_norm": 1.2856369018554688,
429
+ "learning_rate": 6.25e-05,
430
+ "loss": 0.0908,
431
  "step": 295
432
  },
433
  {
434
  "epoch": 1.2,
435
+ "grad_norm": 1.2866718769073486,
436
+ "learning_rate": 6.25e-05,
437
+ "loss": 0.103,
438
  "step": 300
439
  },
440
  {
441
  "epoch": 1.22,
442
+ "grad_norm": 1.1115745306015015,
443
  "learning_rate": 6.25e-05,
444
+ "loss": 0.0934,
445
  "step": 305
446
  },
447
  {
448
  "epoch": 1.24,
449
+ "grad_norm": 1.2674397230148315,
450
  "learning_rate": 6.25e-05,
451
+ "loss": 0.0807,
452
  "step": 310
453
  },
454
  {
455
  "epoch": 1.26,
456
+ "grad_norm": 3.003493547439575,
457
  "learning_rate": 6.25e-05,
458
+ "loss": 0.1163,
459
  "step": 315
460
  },
461
  {
462
  "epoch": 1.28,
463
+ "grad_norm": 1.3472819328308105,
464
  "learning_rate": 6.25e-05,
465
+ "loss": 0.0939,
466
  "step": 320
467
  },
468
  {
469
  "epoch": 1.3,
470
+ "grad_norm": 1.576393485069275,
471
  "learning_rate": 6.25e-05,
472
+ "loss": 0.1023,
473
  "step": 325
474
  },
475
  {
476
  "epoch": 1.32,
477
+ "grad_norm": 1.2895311117172241,
478
  "learning_rate": 6.25e-05,
479
+ "loss": 0.0908,
480
  "step": 330
481
  },
482
  {
483
  "epoch": 1.34,
484
+ "grad_norm": 1.7044769525527954,
485
  "learning_rate": 6.25e-05,
486
+ "loss": 0.0849,
487
  "step": 335
488
  },
489
  {
490
  "epoch": 1.3599999999999999,
491
+ "grad_norm": 1.316157341003418,
492
  "learning_rate": 6.25e-05,
493
+ "loss": 0.0865,
494
  "step": 340
495
  },
496
  {
497
  "epoch": 1.38,
498
+ "grad_norm": 1.4973046779632568,
499
  "learning_rate": 6.25e-05,
500
+ "loss": 0.0864,
501
  "step": 345
502
  },
503
  {
504
  "epoch": 1.4,
505
+ "grad_norm": 0.7230541706085205,
506
  "learning_rate": 6.25e-05,
507
+ "loss": 0.074,
508
  "step": 350
509
  },
510
  {
511
  "epoch": 1.42,
512
+ "grad_norm": 1.026584267616272,
513
  "learning_rate": 6.25e-05,
514
+ "loss": 0.0788,
515
  "step": 355
516
  },
517
  {
518
  "epoch": 1.44,
519
+ "grad_norm": 1.3976407051086426,
520
  "learning_rate": 6.25e-05,
521
+ "loss": 0.0862,
522
  "step": 360
523
  },
524
  {
525
  "epoch": 1.46,
526
+ "grad_norm": 1.3054964542388916,
527
  "learning_rate": 6.25e-05,
528
+ "loss": 0.0757,
529
  "step": 365
530
  },
531
  {
532
  "epoch": 1.48,
533
+ "grad_norm": 1.8163508176803589,
534
  "learning_rate": 6.25e-05,
535
+ "loss": 0.0822,
536
  "step": 370
537
  },
538
  {
539
  "epoch": 1.5,
540
+ "grad_norm": 1.23239004611969,
541
  "learning_rate": 6.25e-05,
542
+ "loss": 0.0886,
543
  "step": 375
544
  },
545
  {
546
  "epoch": 1.52,
547
+ "grad_norm": 1.1320103406906128,
548
  "learning_rate": 6.25e-05,
549
+ "loss": 0.0834,
550
  "step": 380
551
  },
552
  {
553
  "epoch": 1.54,
554
+ "grad_norm": 1.8913005590438843,
555
  "learning_rate": 6.25e-05,
556
+ "loss": 0.0917,
557
  "step": 385
558
  },
559
  {
560
  "epoch": 1.56,
561
+ "grad_norm": 1.3522365093231201,
562
  "learning_rate": 6.25e-05,
563
+ "loss": 0.0927,
564
  "step": 390
565
  },
566
  {
567
  "epoch": 1.58,
568
+ "grad_norm": 1.3687407970428467,
569
  "learning_rate": 6.25e-05,
570
+ "loss": 0.0701,
571
  "step": 395
572
  },
573
  {
574
  "epoch": 1.6,
575
+ "grad_norm": 1.6905425786972046,
576
  "learning_rate": 6.25e-05,
577
+ "loss": 0.0914,
578
  "step": 400
579
  },
580
  {
581
  "epoch": 1.62,
582
+ "grad_norm": 1.3366554975509644,
583
  "learning_rate": 6.25e-05,
584
+ "loss": 0.0833,
585
  "step": 405
586
  },
587
  {
588
  "epoch": 1.6400000000000001,
589
+ "grad_norm": 1.0540807247161865,
590
  "learning_rate": 6.25e-05,
591
+ "loss": 0.088,
592
  "step": 410
593
  },
594
  {
595
  "epoch": 1.6600000000000001,
596
+ "grad_norm": 1.0234986543655396,
597
  "learning_rate": 6.25e-05,
598
+ "loss": 0.075,
599
  "step": 415
600
  },
601
  {
602
  "epoch": 1.6800000000000002,
603
+ "grad_norm": 1.3205113410949707,
604
  "learning_rate": 6.25e-05,
605
+ "loss": 0.0931,
606
  "step": 420
607
  },
608
  {
609
  "epoch": 1.7,
610
+ "grad_norm": 0.8865799307823181,
611
  "learning_rate": 6.25e-05,
612
+ "loss": 0.0831,
613
  "step": 425
614
  },
615
  {
616
  "epoch": 1.72,
617
+ "grad_norm": 1.2646653652191162,
618
  "learning_rate": 6.25e-05,
619
+ "loss": 0.0825,
620
  "step": 430
621
  },
622
  {
623
  "epoch": 1.74,
624
+ "grad_norm": 0.9991198182106018,
625
  "learning_rate": 6.25e-05,
626
+ "loss": 0.0825,
627
  "step": 435
628
  },
629
  {
630
  "epoch": 1.76,
631
+ "grad_norm": 0.8784312605857849,
632
  "learning_rate": 6.25e-05,
633
+ "loss": 0.082,
634
  "step": 440
635
  },
636
  {
637
  "epoch": 1.78,
638
+ "grad_norm": 1.304877519607544,
639
  "learning_rate": 6.25e-05,
640
+ "loss": 0.0775,
641
  "step": 445
642
  },
643
  {
644
  "epoch": 1.8,
645
+ "grad_norm": 1.2007408142089844,
646
  "learning_rate": 6.25e-05,
647
+ "loss": 0.0796,
648
  "step": 450
649
  },
650
  {
651
  "epoch": 1.8199999999999998,
652
+ "grad_norm": 0.9978143572807312,
653
  "learning_rate": 6.25e-05,
654
+ "loss": 0.0794,
655
  "step": 455
656
  },
657
  {
658
  "epoch": 1.8399999999999999,
659
+ "grad_norm": 1.2883387804031372,
660
  "learning_rate": 6.25e-05,
661
+ "loss": 0.0744,
662
  "step": 460
663
  },
664
  {
665
  "epoch": 1.8599999999999999,
666
+ "grad_norm": 0.8542335629463196,
667
  "learning_rate": 6.25e-05,
668
+ "loss": 0.074,
669
  "step": 465
670
  },
671
  {
672
  "epoch": 1.88,
673
+ "grad_norm": 1.0009572505950928,
674
  "learning_rate": 6.25e-05,
675
+ "loss": 0.0823,
676
  "step": 470
677
  },
678
  {
679
  "epoch": 1.9,
680
+ "grad_norm": 1.126528263092041,
681
  "learning_rate": 6.25e-05,
682
+ "loss": 0.0805,
683
  "step": 475
684
  },
685
  {
686
  "epoch": 1.92,
687
+ "grad_norm": 0.9336584210395813,
688
  "learning_rate": 6.25e-05,
689
+ "loss": 0.0722,
690
  "step": 480
691
  },
692
  {
693
  "epoch": 1.94,
694
+ "grad_norm": 1.0387274026870728,
695
  "learning_rate": 6.25e-05,
696
+ "loss": 0.0722,
697
  "step": 485
698
  },
699
  {
700
  "epoch": 1.96,
701
+ "grad_norm": 1.4692296981811523,
702
  "learning_rate": 6.25e-05,
703
+ "loss": 0.071,
704
  "step": 490
705
  },
706
  {
707
  "epoch": 1.98,
708
+ "grad_norm": 0.9859362244606018,
709
  "learning_rate": 6.25e-05,
710
+ "loss": 0.0795,
711
  "step": 495
712
  },
713
  {
714
  "epoch": 2.0,
715
+ "grad_norm": 1.0557219982147217,
716
  "learning_rate": 6.25e-05,
717
+ "loss": 0.0741,
718
  "step": 500
719
  },
720
  {
721
  "epoch": 2.0,
722
+ "eval_cer": 0.05483273798659818,
723
+ "eval_loss": 0.0840950757265091,
724
+ "eval_runtime": 280.5964,
725
+ "eval_samples_per_second": 1.782,
726
+ "eval_steps_per_second": 0.445,
727
  "step": 500
728
  },
729
  {
730
  "epoch": 2.02,
731
+ "grad_norm": 1.1070911884307861,
732
  "learning_rate": 6.25e-05,
733
+ "loss": 0.0552,
734
  "step": 505
735
  },
736
  {
737
  "epoch": 2.04,
738
+ "grad_norm": 0.7629583477973938,
739
  "learning_rate": 6.25e-05,
740
+ "loss": 0.0613,
741
  "step": 510
742
  },
743
  {
744
  "epoch": 2.06,
745
+ "grad_norm": 1.3060976266860962,
746
  "learning_rate": 6.25e-05,
747
+ "loss": 0.0746,
748
  "step": 515
749
  },
750
  {
751
  "epoch": 2.08,
752
+ "grad_norm": 1.067309021949768,
753
  "learning_rate": 6.25e-05,
754
+ "loss": 0.0717,
755
  "step": 520
756
  },
757
  {
758
  "epoch": 2.1,
759
+ "grad_norm": 1.1334558725357056,
760
  "learning_rate": 6.25e-05,
761
+ "loss": 0.0582,
762
  "step": 525
763
  },
764
  {
765
  "epoch": 2.12,
766
+ "grad_norm": 0.8565890192985535,
767
  "learning_rate": 6.25e-05,
768
+ "loss": 0.066,
769
  "step": 530
770
  },
771
  {
772
  "epoch": 2.14,
773
+ "grad_norm": 1.042038083076477,
774
  "learning_rate": 6.25e-05,
775
+ "loss": 0.0686,
776
  "step": 535
777
  },
778
  {
779
  "epoch": 2.16,
780
+ "grad_norm": 0.8787774443626404,
781
  "learning_rate": 6.25e-05,
782
+ "loss": 0.0714,
783
  "step": 540
784
  },
785
  {
786
  "epoch": 2.18,
787
+ "grad_norm": 1.2969956398010254,
788
  "learning_rate": 6.25e-05,
789
+ "loss": 0.069,
790
  "step": 545
791
  },
792
  {
793
  "epoch": 2.2,
794
+ "grad_norm": 1.0625072717666626,
795
  "learning_rate": 6.25e-05,
796
+ "loss": 0.0701,
797
  "step": 550
798
  },
799
  {
800
  "epoch": 2.22,
801
+ "grad_norm": 1.1786212921142578,
802
  "learning_rate": 6.25e-05,
803
+ "loss": 0.0672,
804
  "step": 555
805
  },
806
  {
807
  "epoch": 2.24,
808
+ "grad_norm": 0.7678006887435913,
809
  "learning_rate": 6.25e-05,
810
+ "loss": 0.0637,
811
  "step": 560
812
  },
813
  {
814
  "epoch": 2.26,
815
+ "grad_norm": 1.087916612625122,
816
  "learning_rate": 6.25e-05,
817
+ "loss": 0.0592,
818
  "step": 565
819
  },
820
  {
821
  "epoch": 2.2800000000000002,
822
+ "grad_norm": 1.0709354877471924,
823
  "learning_rate": 6.25e-05,
824
+ "loss": 0.0641,
825
  "step": 570
826
  },
827
  {
828
  "epoch": 2.3,
829
+ "grad_norm": 0.9933990240097046,
830
  "learning_rate": 6.25e-05,
831
+ "loss": 0.0724,
832
  "step": 575
833
  },
834
  {
835
  "epoch": 2.32,
836
+ "grad_norm": 0.9537047147750854,
837
  "learning_rate": 6.25e-05,
838
+ "loss": 0.0565,
839
  "step": 580
840
  },
841
  {
842
  "epoch": 2.34,
843
+ "grad_norm": 0.8913723230361938,
844
  "learning_rate": 6.25e-05,
845
+ "loss": 0.0601,
846
  "step": 585
847
  },
848
  {
849
  "epoch": 2.36,
850
+ "grad_norm": 1.4037823677062988,
851
  "learning_rate": 6.25e-05,
852
+ "loss": 0.0656,
853
  "step": 590
854
  },
855
  {
856
  "epoch": 2.38,
857
+ "grad_norm": 0.8686001896858215,
858
  "learning_rate": 6.25e-05,
859
+ "loss": 0.0617,
860
  "step": 595
861
  },
862
  {
863
  "epoch": 2.4,
864
+ "grad_norm": 1.1040139198303223,
865
  "learning_rate": 6.25e-05,
866
+ "loss": 0.0612,
867
  "step": 600
868
  },
869
  {
870
  "epoch": 2.42,
871
+ "grad_norm": 0.8995397090911865,
872
  "learning_rate": 6.25e-05,
873
+ "loss": 0.0528,
874
  "step": 605
875
  },
876
  {
877
  "epoch": 2.44,
878
+ "grad_norm": 1.0924474000930786,
879
  "learning_rate": 6.25e-05,
880
+ "loss": 0.0551,
881
  "step": 610
882
  },
883
  {
884
  "epoch": 2.46,
885
+ "grad_norm": 1.0748484134674072,
886
  "learning_rate": 6.25e-05,
887
+ "loss": 0.0573,
888
  "step": 615
889
  },
890
  {
891
  "epoch": 2.48,
892
+ "grad_norm": 0.8827953338623047,
893
  "learning_rate": 6.25e-05,
894
+ "loss": 0.0595,
895
  "step": 620
896
  },
897
  {
898
  "epoch": 2.5,
899
+ "grad_norm": 0.8614113926887512,
900
  "learning_rate": 6.25e-05,
901
+ "loss": 0.0694,
902
  "step": 625
903
  },
904
  {
905
  "epoch": 2.52,
906
+ "grad_norm": 0.6579775810241699,
907
  "learning_rate": 6.25e-05,
908
+ "loss": 0.0608,
909
  "step": 630
910
  },
911
  {
912
  "epoch": 2.54,
913
+ "grad_norm": 0.923587441444397,
914
  "learning_rate": 6.25e-05,
915
+ "loss": 0.0684,
916
  "step": 635
917
  },
918
  {
919
  "epoch": 2.56,
920
+ "grad_norm": 1.119313359260559,
921
  "learning_rate": 6.25e-05,
922
+ "loss": 0.0651,
923
  "step": 640
924
  },
925
  {
926
  "epoch": 2.58,
927
+ "grad_norm": 1.1630853414535522,
928
  "learning_rate": 6.25e-05,
929
+ "loss": 0.0773,
930
  "step": 645
931
  },
932
  {
933
  "epoch": 2.6,
934
+ "grad_norm": 0.9517636299133301,
935
  "learning_rate": 6.25e-05,
936
+ "loss": 0.0574,
937
  "step": 650
938
  },
939
  {
940
  "epoch": 2.62,
941
+ "grad_norm": 0.767271101474762,
942
  "learning_rate": 6.25e-05,
943
+ "loss": 0.071,
944
  "step": 655
945
  },
946
  {
947
  "epoch": 2.64,
948
+ "grad_norm": 1.3324207067489624,
949
  "learning_rate": 6.25e-05,
950
+ "loss": 0.0672,
951
  "step": 660
952
  },
953
  {
954
  "epoch": 2.66,
955
+ "grad_norm": 0.8638308048248291,
956
  "learning_rate": 6.25e-05,
957
+ "loss": 0.0602,
958
  "step": 665
959
  },
960
  {
961
  "epoch": 2.68,
962
+ "grad_norm": 0.9522351622581482,
963
  "learning_rate": 6.25e-05,
964
+ "loss": 0.0626,
965
  "step": 670
966
  },
967
  {
968
  "epoch": 2.7,
969
+ "grad_norm": 0.7264077067375183,
970
  "learning_rate": 6.25e-05,
971
+ "loss": 0.0654,
972
  "step": 675
973
  },
974
  {
975
  "epoch": 2.7199999999999998,
976
+ "grad_norm": 1.185275912284851,
977
  "learning_rate": 6.25e-05,
978
+ "loss": 0.0638,
979
  "step": 680
980
  },
981
  {
982
  "epoch": 2.74,
983
+ "grad_norm": 1.549625277519226,
984
  "learning_rate": 6.25e-05,
985
+ "loss": 0.0661,
986
  "step": 685
987
  },
988
  {
989
  "epoch": 2.76,
990
+ "grad_norm": 1.202415108680725,
991
  "learning_rate": 6.25e-05,
992
+ "loss": 0.0709,
993
  "step": 690
994
  },
995
  {
996
  "epoch": 2.7800000000000002,
997
+ "grad_norm": 0.7902194857597351,
998
  "learning_rate": 6.25e-05,
999
+ "loss": 0.0604,
1000
  "step": 695
1001
  },
1002
  {
1003
  "epoch": 2.8,
1004
+ "grad_norm": 1.0128028392791748,
1005
  "learning_rate": 6.25e-05,
1006
+ "loss": 0.0612,
1007
  "step": 700
1008
  },
1009
  {
1010
  "epoch": 2.82,
1011
+ "grad_norm": 0.8418397903442383,
1012
  "learning_rate": 6.25e-05,
1013
+ "loss": 0.0616,
1014
  "step": 705
1015
  },
1016
  {
1017
  "epoch": 2.84,
1018
+ "grad_norm": 0.9352026581764221,
1019
  "learning_rate": 6.25e-05,
1020
+ "loss": 0.0635,
1021
  "step": 710
1022
  },
1023
  {
1024
  "epoch": 2.86,
1025
+ "grad_norm": 0.679918110370636,
1026
  "learning_rate": 6.25e-05,
1027
+ "loss": 0.0588,
1028
  "step": 715
1029
  },
1030
  {
1031
  "epoch": 2.88,
1032
+ "grad_norm": 0.836438000202179,
1033
  "learning_rate": 6.25e-05,
1034
+ "loss": 0.0635,
1035
  "step": 720
1036
  },
1037
  {
1038
  "epoch": 2.9,
1039
+ "grad_norm": 0.7643904089927673,
1040
  "learning_rate": 6.25e-05,
1041
+ "loss": 0.0554,
1042
  "step": 725
1043
  },
1044
  {
1045
  "epoch": 2.92,
1046
+ "grad_norm": 0.9192042946815491,
1047
  "learning_rate": 6.25e-05,
1048
+ "loss": 0.0541,
1049
  "step": 730
1050
  },
1051
  {
1052
  "epoch": 2.94,
1053
+ "grad_norm": 0.9899188280105591,
1054
  "learning_rate": 6.25e-05,
1055
+ "loss": 0.0591,
1056
  "step": 735
1057
  },
1058
  {
1059
  "epoch": 2.96,
1060
+ "grad_norm": 1.112701654434204,
1061
  "learning_rate": 6.25e-05,
1062
+ "loss": 0.0611,
1063
  "step": 740
1064
  },
1065
  {
1066
  "epoch": 2.98,
1067
+ "grad_norm": 0.9096015095710754,
1068
  "learning_rate": 6.25e-05,
1069
+ "loss": 0.0594,
1070
  "step": 745
1071
  },
1072
  {
1073
  "epoch": 3.0,
1074
+ "grad_norm": 1.158527135848999,
1075
  "learning_rate": 6.25e-05,
1076
+ "loss": 0.0703,
1077
+ "step": 750
1078
+ },
1079
+ {
1080
+ "epoch": 3.0,
1081
+ "eval_cer": 0.05350298542486898,
1082
+ "eval_loss": 0.07945344597101212,
1083
+ "eval_runtime": 281.5513,
1084
+ "eval_samples_per_second": 1.776,
1085
+ "eval_steps_per_second": 0.444,
1086
  "step": 750
1087
  },
1088
  {
1089
  "epoch": 3.02,
1090
+ "grad_norm": 0.8544594049453735,
1091
  "learning_rate": 6.25e-05,
1092
+ "loss": 0.0461,
1093
  "step": 755
1094
  },
1095
  {
1096
  "epoch": 3.04,
1097
+ "grad_norm": 0.8411735892295837,
1098
  "learning_rate": 6.25e-05,
1099
+ "loss": 0.0429,
1100
  "step": 760
1101
  },
1102
  {
1103
  "epoch": 3.06,
1104
+ "grad_norm": 0.7515286207199097,
1105
  "learning_rate": 6.25e-05,
1106
+ "loss": 0.0559,
1107
  "step": 765
1108
  },
1109
  {
1110
  "epoch": 3.08,
1111
+ "grad_norm": 0.8125985264778137,
1112
  "learning_rate": 6.25e-05,
1113
+ "loss": 0.044,
1114
  "step": 770
1115
  },
1116
  {
1117
  "epoch": 3.1,
1118
+ "grad_norm": 0.8093322515487671,
1119
  "learning_rate": 6.25e-05,
1120
+ "loss": 0.0529,
1121
  "step": 775
1122
  },
1123
  {
1124
  "epoch": 3.12,
1125
+ "grad_norm": 0.8852378129959106,
1126
  "learning_rate": 6.25e-05,
1127
+ "loss": 0.0508,
1128
  "step": 780
1129
  },
1130
  {
1131
  "epoch": 3.14,
1132
+ "grad_norm": 0.6388903856277466,
1133
  "learning_rate": 6.25e-05,
1134
+ "loss": 0.0491,
1135
  "step": 785
1136
  },
1137
  {
1138
  "epoch": 3.16,
1139
+ "grad_norm": 0.9803158640861511,
1140
  "learning_rate": 6.25e-05,
1141
+ "loss": 0.051,
1142
  "step": 790
1143
  },
1144
  {
1145
  "epoch": 3.18,
1146
+ "grad_norm": 1.163065791130066,
1147
  "learning_rate": 6.25e-05,
1148
+ "loss": 0.0538,
1149
  "step": 795
1150
  },
1151
  {
1152
  "epoch": 3.2,
1153
+ "grad_norm": 0.942138671875,
1154
  "learning_rate": 6.25e-05,
1155
+ "loss": 0.0548,
1156
  "step": 800
1157
  },
1158
  {
1159
  "epoch": 3.22,
1160
+ "grad_norm": 0.763847827911377,
1161
  "learning_rate": 6.25e-05,
1162
+ "loss": 0.0497,
1163
  "step": 805
1164
  },
1165
  {
1166
  "epoch": 3.24,
1167
+ "grad_norm": 1.1041572093963623,
1168
  "learning_rate": 6.25e-05,
1169
+ "loss": 0.0513,
1170
  "step": 810
1171
  },
1172
  {
1173
  "epoch": 3.26,
1174
+ "grad_norm": 0.8744838237762451,
1175
  "learning_rate": 6.25e-05,
1176
+ "loss": 0.0574,
1177
  "step": 815
1178
  },
1179
  {
1180
  "epoch": 3.2800000000000002,
1181
+ "grad_norm": 0.8737279176712036,
1182
  "learning_rate": 6.25e-05,
1183
+ "loss": 0.0485,
1184
  "step": 820
1185
  },
1186
  {
1187
  "epoch": 3.3,
1188
+ "grad_norm": 0.6367043256759644,
1189
  "learning_rate": 6.25e-05,
1190
+ "loss": 0.0462,
1191
  "step": 825
1192
  },
1193
  {
1194
  "epoch": 3.32,
1195
+ "grad_norm": 0.7195335030555725,
1196
  "learning_rate": 6.25e-05,
1197
+ "loss": 0.0529,
1198
  "step": 830
1199
  },
1200
  {
1201
  "epoch": 3.34,
1202
+ "grad_norm": 0.7411594986915588,
1203
  "learning_rate": 6.25e-05,
1204
+ "loss": 0.0558,
1205
  "step": 835
1206
  },
1207
  {
1208
  "epoch": 3.36,
1209
+ "grad_norm": 0.5583875179290771,
1210
  "learning_rate": 6.25e-05,
1211
+ "loss": 0.0498,
1212
  "step": 840
1213
  },
1214
  {
1215
  "epoch": 3.38,
1216
+ "grad_norm": 0.7013912796974182,
1217
  "learning_rate": 6.25e-05,
1218
+ "loss": 0.0465,
1219
  "step": 845
1220
  },
1221
  {
1222
  "epoch": 3.4,
1223
+ "grad_norm": 1.1267294883728027,
1224
  "learning_rate": 6.25e-05,
1225
+ "loss": 0.0505,
1226
  "step": 850
1227
  },
1228
  {
1229
  "epoch": 3.42,
1230
+ "grad_norm": 1.3056484460830688,
1231
  "learning_rate": 6.25e-05,
1232
+ "loss": 0.0515,
1233
  "step": 855
1234
  },
1235
  {
1236
  "epoch": 3.44,
1237
+ "grad_norm": 1.182433843612671,
1238
  "learning_rate": 6.25e-05,
1239
+ "loss": 0.0525,
1240
  "step": 860
1241
  },
1242
  {
1243
  "epoch": 3.46,
1244
+ "grad_norm": 0.8969308733940125,
1245
  "learning_rate": 6.25e-05,
1246
+ "loss": 0.0517,
1247
  "step": 865
1248
  },
1249
  {
1250
  "epoch": 3.48,
1251
+ "grad_norm": 0.7779067158699036,
1252
  "learning_rate": 6.25e-05,
1253
+ "loss": 0.0539,
1254
  "step": 870
1255
  },
1256
  {
1257
  "epoch": 3.5,
1258
+ "grad_norm": 0.591754674911499,
1259
  "learning_rate": 6.25e-05,
1260
+ "loss": 0.0546,
1261
  "step": 875
1262
  },
1263
  {
1264
  "epoch": 3.52,
1265
+ "grad_norm": 0.8097557425498962,
1266
  "learning_rate": 6.25e-05,
1267
+ "loss": 0.0529,
1268
  "step": 880
1269
  },
1270
  {
1271
  "epoch": 3.54,
1272
+ "grad_norm": 0.7054248452186584,
1273
  "learning_rate": 6.25e-05,
1274
+ "loss": 0.0436,
1275
  "step": 885
1276
  },
1277
  {
1278
  "epoch": 3.56,
1279
+ "grad_norm": 0.5832129716873169,
1280
  "learning_rate": 6.25e-05,
1281
+ "loss": 0.048,
1282
  "step": 890
1283
  },
1284
  {
1285
  "epoch": 3.58,
1286
+ "grad_norm": 0.8104725480079651,
1287
  "learning_rate": 6.25e-05,
1288
+ "loss": 0.0503,
1289
  "step": 895
1290
  },
1291
  {
1292
  "epoch": 3.6,
1293
+ "grad_norm": 0.9961804151535034,
1294
  "learning_rate": 6.25e-05,
1295
+ "loss": 0.0565,
1296
  "step": 900
1297
  },
1298
  {
1299
  "epoch": 3.62,
1300
+ "grad_norm": 0.8466907143592834,
1301
  "learning_rate": 6.25e-05,
1302
+ "loss": 0.054,
1303
  "step": 905
1304
  },
1305
  {
1306
  "epoch": 3.64,
1307
+ "grad_norm": 0.8867480158805847,
1308
  "learning_rate": 6.25e-05,
1309
+ "loss": 0.0547,
1310
  "step": 910
1311
  },
1312
  {
1313
  "epoch": 3.66,
1314
+ "grad_norm": 0.9030736684799194,
1315
  "learning_rate": 6.25e-05,
1316
+ "loss": 0.0481,
1317
  "step": 915
1318
  },
1319
  {
1320
  "epoch": 3.68,
1321
+ "grad_norm": 0.6740151643753052,
1322
  "learning_rate": 6.25e-05,
1323
+ "loss": 0.0529,
1324
  "step": 920
1325
  },
1326
  {
1327
  "epoch": 3.7,
1328
+ "grad_norm": 0.653508722782135,
1329
  "learning_rate": 6.25e-05,
1330
+ "loss": 0.0633,
1331
  "step": 925
1332
  },
1333
  {
1334
  "epoch": 3.7199999999999998,
1335
+ "grad_norm": 0.7304302453994751,
1336
  "learning_rate": 6.25e-05,
1337
+ "loss": 0.0493,
1338
  "step": 930
1339
  },
1340
  {
1341
  "epoch": 3.74,
1342
+ "grad_norm": 0.8343582153320312,
1343
  "learning_rate": 6.25e-05,
1344
+ "loss": 0.059,
1345
  "step": 935
1346
  },
1347
  {
1348
  "epoch": 3.76,
1349
+ "grad_norm": 0.8459467887878418,
1350
  "learning_rate": 6.25e-05,
1351
+ "loss": 0.0531,
1352
  "step": 940
1353
  },
1354
  {
1355
  "epoch": 3.7800000000000002,
1356
+ "grad_norm": 0.7470009326934814,
1357
  "learning_rate": 6.25e-05,
1358
+ "loss": 0.0548,
1359
  "step": 945
1360
  },
1361
  {
1362
  "epoch": 3.8,
1363
+ "grad_norm": 0.8183557987213135,
1364
  "learning_rate": 6.25e-05,
1365
+ "loss": 0.0471,
1366
  "step": 950
1367
  },
1368
  {
1369
  "epoch": 3.82,
1370
+ "grad_norm": 0.9448140263557434,
1371
  "learning_rate": 6.25e-05,
1372
+ "loss": 0.045,
1373
  "step": 955
1374
  },
1375
  {
1376
  "epoch": 3.84,
1377
+ "grad_norm": 0.7056401371955872,
1378
  "learning_rate": 6.25e-05,
1379
+ "loss": 0.045,
1380
  "step": 960
1381
  },
1382
  {
1383
  "epoch": 3.86,
1384
+ "grad_norm": 0.7785059213638306,
1385
  "learning_rate": 6.25e-05,
1386
+ "loss": 0.0554,
1387
  "step": 965
1388
  },
1389
  {
1390
  "epoch": 3.88,
1391
+ "grad_norm": 0.8976256251335144,
1392
  "learning_rate": 6.25e-05,
1393
+ "loss": 0.0529,
1394
  "step": 970
1395
  },
1396
  {
1397
  "epoch": 3.9,
1398
+ "grad_norm": 1.0849542617797852,
1399
  "learning_rate": 6.25e-05,
1400
+ "loss": 0.0457,
1401
  "step": 975
1402
  },
1403
  {
1404
  "epoch": 3.92,
1405
+ "grad_norm": 1.1612681150436401,
1406
  "learning_rate": 6.25e-05,
1407
+ "loss": 0.0513,
1408
  "step": 980
1409
  },
1410
  {
1411
  "epoch": 3.94,
1412
+ "grad_norm": 0.6912779211997986,
1413
  "learning_rate": 6.25e-05,
1414
+ "loss": 0.0469,
1415
  "step": 985
1416
  },
1417
  {
1418
  "epoch": 3.96,
1419
+ "grad_norm": 0.7129920125007629,
1420
  "learning_rate": 6.25e-05,
1421
+ "loss": 0.0509,
1422
  "step": 990
1423
  },
1424
  {
1425
  "epoch": 3.98,
1426
+ "grad_norm": 0.6439591646194458,
1427
  "learning_rate": 6.25e-05,
1428
+ "loss": 0.0412,
1429
  "step": 995
1430
  },
1431
  {
1432
  "epoch": 4.0,
1433
+ "grad_norm": 0.7044887542724609,
1434
  "learning_rate": 6.25e-05,
1435
+ "loss": 0.0558,
1436
  "step": 1000
1437
  },
1438
  {
1439
  "epoch": 4.0,
1440
+ "eval_cer": 0.046749928297655986,
1441
+ "eval_loss": 0.07047422975301743,
1442
+ "eval_runtime": 280.6209,
1443
+ "eval_samples_per_second": 1.782,
1444
+ "eval_steps_per_second": 0.445,
1445
  "step": 1000
1446
  },
1447
  {
1448
  "epoch": 4.02,
1449
+ "grad_norm": 0.6291618943214417,
1450
  "learning_rate": 6.25e-05,
1451
  "loss": 0.0432,
1452
  "step": 1005
1453
  },
1454
  {
1455
  "epoch": 4.04,
1456
+ "grad_norm": 0.5485780239105225,
1457
  "learning_rate": 6.25e-05,
1458
+ "loss": 0.0459,
1459
  "step": 1010
1460
  },
1461
  {
1462
  "epoch": 4.06,
1463
+ "grad_norm": 0.5912005305290222,
1464
  "learning_rate": 6.25e-05,
1465
+ "loss": 0.0416,
1466
  "step": 1015
1467
  },
1468
  {
1469
  "epoch": 4.08,
1470
+ "grad_norm": 0.5929523706436157,
1471
  "learning_rate": 6.25e-05,
1472
+ "loss": 0.0358,
1473
  "step": 1020
1474
  },
1475
  {
1476
  "epoch": 4.1,
1477
+ "grad_norm": 0.4929662346839905,
1478
  "learning_rate": 6.25e-05,
1479
+ "loss": 0.0389,
1480
  "step": 1025
1481
  },
1482
  {
1483
  "epoch": 4.12,
1484
+ "grad_norm": 0.6707394123077393,
1485
  "learning_rate": 6.25e-05,
1486
+ "loss": 0.0388,
1487
  "step": 1030
1488
  },
1489
  {
1490
  "epoch": 4.14,
1491
+ "grad_norm": 0.9774329662322998,
1492
  "learning_rate": 6.25e-05,
1493
+ "loss": 0.0401,
1494
  "step": 1035
1495
  },
1496
  {
1497
  "epoch": 4.16,
1498
+ "grad_norm": 0.6821659803390503,
1499
  "learning_rate": 6.25e-05,
1500
+ "loss": 0.0403,
1501
  "step": 1040
1502
  },
1503
  {
1504
  "epoch": 4.18,
1505
+ "grad_norm": 0.796459436416626,
1506
  "learning_rate": 6.25e-05,
1507
+ "loss": 0.0425,
1508
  "step": 1045
1509
  },
1510
  {
1511
  "epoch": 4.2,
1512
+ "grad_norm": 0.6956031918525696,
1513
  "learning_rate": 6.25e-05,
1514
+ "loss": 0.0475,
1515
  "step": 1050
1516
  },
1517
  {
1518
  "epoch": 4.22,
1519
+ "grad_norm": 0.7577043175697327,
1520
  "learning_rate": 6.25e-05,
1521
+ "loss": 0.0483,
1522
  "step": 1055
1523
  },
1524
  {
1525
  "epoch": 4.24,
1526
+ "grad_norm": 0.5384642481803894,
1527
  "learning_rate": 6.25e-05,
1528
+ "loss": 0.0372,
1529
  "step": 1060
1530
  },
1531
  {
1532
  "epoch": 4.26,
1533
+ "grad_norm": 0.791437566280365,
1534
  "learning_rate": 6.25e-05,
1535
+ "loss": 0.0485,
1536
  "step": 1065
1537
  },
1538
  {
1539
  "epoch": 4.28,
1540
+ "grad_norm": 0.5820832252502441,
1541
  "learning_rate": 6.25e-05,
1542
+ "loss": 0.0466,
1543
  "step": 1070
1544
  },
1545
  {
1546
  "epoch": 4.3,
1547
+ "grad_norm": 0.9597232341766357,
1548
  "learning_rate": 6.25e-05,
1549
+ "loss": 0.0437,
1550
  "step": 1075
1551
  },
1552
  {
1553
  "epoch": 4.32,
1554
+ "grad_norm": 0.9876553416252136,
1555
  "learning_rate": 6.25e-05,
1556
+ "loss": 0.05,
1557
  "step": 1080
1558
  },
1559
  {
1560
  "epoch": 4.34,
1561
+ "grad_norm": 0.6902226805686951,
1562
  "learning_rate": 6.25e-05,
1563
+ "loss": 0.0401,
1564
  "step": 1085
1565
  },
1566
  {
1567
  "epoch": 4.36,
1568
+ "grad_norm": 0.5399324893951416,
1569
  "learning_rate": 6.25e-05,
1570
+ "loss": 0.043,
1571
  "step": 1090
1572
  },
1573
  {
1574
  "epoch": 4.38,
1575
+ "grad_norm": 0.7499954700469971,
1576
  "learning_rate": 6.25e-05,
1577
+ "loss": 0.0426,
1578
  "step": 1095
1579
  },
1580
  {
1581
  "epoch": 4.4,
1582
+ "grad_norm": 0.7145591378211975,
1583
  "learning_rate": 6.25e-05,
1584
+ "loss": 0.0503,
1585
  "step": 1100
1586
  },
1587
  {
1588
  "epoch": 4.42,
1589
+ "grad_norm": 0.5746826529502869,
1590
  "learning_rate": 6.25e-05,
1591
+ "loss": 0.0383,
1592
  "step": 1105
1593
  },
1594
  {
1595
  "epoch": 4.44,
1596
+ "grad_norm": 0.7018007040023804,
1597
  "learning_rate": 6.25e-05,
1598
+ "loss": 0.0466,
1599
  "step": 1110
1600
  },
1601
  {
1602
  "epoch": 4.46,
1603
+ "grad_norm": 0.6607512831687927,
1604
  "learning_rate": 6.25e-05,
1605
+ "loss": 0.038,
1606
  "step": 1115
1607
  },
1608
  {
1609
  "epoch": 4.48,
1610
+ "grad_norm": 0.5863096714019775,
1611
  "learning_rate": 6.25e-05,
1612
+ "loss": 0.0462,
1613
  "step": 1120
1614
  },
1615
  {
1616
  "epoch": 4.5,
1617
+ "grad_norm": 0.674934983253479,
1618
  "learning_rate": 6.25e-05,
1619
+ "loss": 0.0523,
1620
  "step": 1125
1621
  },
1622
  {
1623
  "epoch": 4.52,
1624
+ "grad_norm": 0.7824676036834717,
1625
  "learning_rate": 6.25e-05,
1626
+ "loss": 0.0467,
1627
  "step": 1130
1628
  },
1629
  {
1630
  "epoch": 4.54,
1631
+ "grad_norm": 1.4591455459594727,
1632
  "learning_rate": 6.25e-05,
1633
+ "loss": 0.0485,
1634
  "step": 1135
1635
  },
1636
  {
1637
  "epoch": 4.5600000000000005,
1638
+ "grad_norm": 0.6413418650627136,
1639
  "learning_rate": 6.25e-05,
1640
+ "loss": 0.0435,
1641
  "step": 1140
1642
  },
1643
  {
1644
  "epoch": 4.58,
1645
+ "grad_norm": 0.5044887065887451,
1646
  "learning_rate": 6.25e-05,
1647
+ "loss": 0.0432,
1648
  "step": 1145
1649
  },
1650
  {
1651
  "epoch": 4.6,
1652
+ "grad_norm": 0.4768076539039612,
1653
  "learning_rate": 6.25e-05,
1654
+ "loss": 0.0422,
1655
  "step": 1150
1656
  },
1657
  {
1658
  "epoch": 4.62,
1659
+ "grad_norm": 0.7008136510848999,
1660
  "learning_rate": 6.25e-05,
1661
+ "loss": 0.045,
1662
  "step": 1155
1663
  },
1664
  {
1665
  "epoch": 4.64,
1666
+ "grad_norm": 1.1213037967681885,
1667
  "learning_rate": 6.25e-05,
1668
+ "loss": 0.0469,
1669
  "step": 1160
1670
  },
1671
  {
1672
  "epoch": 4.66,
1673
+ "grad_norm": 0.6898444890975952,
1674
  "learning_rate": 6.25e-05,
1675
+ "loss": 0.0398,
1676
  "step": 1165
1677
  },
1678
  {
1679
  "epoch": 4.68,
1680
+ "grad_norm": 0.6885802149772644,
1681
  "learning_rate": 6.25e-05,
1682
+ "loss": 0.0475,
1683
  "step": 1170
1684
  },
1685
  {
1686
  "epoch": 4.7,
1687
+ "grad_norm": 0.644440770149231,
1688
  "learning_rate": 6.25e-05,
1689
+ "loss": 0.0403,
1690
  "step": 1175
1691
  },
1692
  {
1693
  "epoch": 4.72,
1694
+ "grad_norm": 0.6610418558120728,
1695
  "learning_rate": 6.25e-05,
1696
+ "loss": 0.0415,
1697
  "step": 1180
1698
  },
1699
  {
1700
  "epoch": 4.74,
1701
+ "grad_norm": 0.7127951979637146,
1702
  "learning_rate": 6.25e-05,
1703
+ "loss": 0.0466,
1704
  "step": 1185
1705
  },
1706
  {
1707
  "epoch": 4.76,
1708
+ "grad_norm": 0.7608262300491333,
1709
  "learning_rate": 6.25e-05,
1710
+ "loss": 0.0398,
1711
  "step": 1190
1712
  },
1713
  {
1714
  "epoch": 4.78,
1715
+ "grad_norm": 0.6554054021835327,
1716
  "learning_rate": 6.25e-05,
1717
+ "loss": 0.0395,
1718
  "step": 1195
1719
  },
1720
  {
1721
  "epoch": 4.8,
1722
+ "grad_norm": 0.7710177302360535,
1723
  "learning_rate": 6.25e-05,
1724
+ "loss": 0.0412,
1725
  "step": 1200
1726
  },
1727
  {
1728
  "epoch": 4.82,
1729
+ "grad_norm": 0.5044788718223572,
1730
  "learning_rate": 6.25e-05,
1731
+ "loss": 0.0378,
1732
  "step": 1205
1733
  },
1734
  {
1735
  "epoch": 4.84,
1736
+ "grad_norm": 0.4640452265739441,
1737
  "learning_rate": 6.25e-05,
1738
+ "loss": 0.0394,
1739
  "step": 1210
1740
  },
1741
  {
1742
  "epoch": 4.86,
1743
+ "grad_norm": 0.6121119260787964,
1744
  "learning_rate": 6.25e-05,
1745
+ "loss": 0.0373,
1746
  "step": 1215
1747
  },
1748
  {
1749
  "epoch": 4.88,
1750
+ "grad_norm": 0.7307333946228027,
1751
  "learning_rate": 6.25e-05,
1752
+ "loss": 0.0462,
1753
  "step": 1220
1754
  },
1755
  {
1756
  "epoch": 4.9,
1757
+ "grad_norm": 0.841369092464447,
1758
  "learning_rate": 6.25e-05,
1759
+ "loss": 0.0433,
1760
  "step": 1225
1761
  },
1762
  {
1763
  "epoch": 4.92,
1764
+ "grad_norm": 0.48274680972099304,
1765
  "learning_rate": 6.25e-05,
1766
+ "loss": 0.0481,
1767
  "step": 1230
1768
  },
1769
  {
1770
  "epoch": 4.9399999999999995,
1771
+ "grad_norm": 0.6552777290344238,
1772
  "learning_rate": 6.25e-05,
1773
+ "loss": 0.0449,
1774
  "step": 1235
1775
  },
1776
  {
1777
  "epoch": 4.96,
1778
+ "grad_norm": 1.0837739706039429,
1779
  "learning_rate": 6.25e-05,
1780
+ "loss": 0.0465,
1781
  "step": 1240
1782
  },
1783
  {
1784
  "epoch": 4.98,
1785
+ "grad_norm": 0.7444823384284973,
1786
  "learning_rate": 6.25e-05,
1787
+ "loss": 0.0513,
1788
  "step": 1245
1789
  },
1790
  {
1791
  "epoch": 5.0,
1792
+ "grad_norm": 0.561403214931488,
1793
  "learning_rate": 6.25e-05,
1794
+ "loss": 0.0458,
1795
+ "step": 1250
1796
+ },
1797
+ {
1798
+ "epoch": 5.0,
1799
+ "eval_cer": 0.07253669856334576,
1800
+ "eval_loss": 0.06918226927518845,
1801
+ "eval_runtime": 282.8276,
1802
+ "eval_samples_per_second": 1.768,
1803
+ "eval_steps_per_second": 0.442,
1804
  "step": 1250
1805
  },
1806
  {
1807
  "epoch": 5.02,
1808
+ "grad_norm": 0.67482990026474,
1809
  "learning_rate": 6.25e-05,
1810
+ "loss": 0.037,
1811
  "step": 1255
1812
  },
1813
  {
1814
  "epoch": 5.04,
1815
+ "grad_norm": 0.6839190721511841,
1816
  "learning_rate": 6.25e-05,
1817
+ "loss": 0.0445,
1818
  "step": 1260
1819
  },
1820
  {
1821
  "epoch": 5.06,
1822
+ "grad_norm": 0.8001631498336792,
1823
  "learning_rate": 6.25e-05,
1824
+ "loss": 0.0386,
1825
  "step": 1265
1826
  },
1827
  {
1828
  "epoch": 5.08,
1829
+ "grad_norm": 0.8353962898254395,
1830
  "learning_rate": 6.25e-05,
1831
+ "loss": 0.0407,
1832
  "step": 1270
1833
  },
1834
  {
1835
  "epoch": 5.1,
1836
+ "grad_norm": 0.556709885597229,
1837
  "learning_rate": 6.25e-05,
1838
+ "loss": 0.0355,
1839
  "step": 1275
1840
  },
1841
  {
1842
  "epoch": 5.12,
1843
+ "grad_norm": 0.5634174942970276,
1844
  "learning_rate": 6.25e-05,
1845
+ "loss": 0.0322,
1846
  "step": 1280
1847
  },
1848
  {
1849
  "epoch": 5.14,
1850
+ "grad_norm": 0.6530662775039673,
1851
  "learning_rate": 6.25e-05,
1852
+ "loss": 0.041,
1853
  "step": 1285
1854
  },
1855
  {
1856
  "epoch": 5.16,
1857
+ "grad_norm": 0.5771991610527039,
1858
  "learning_rate": 6.25e-05,
1859
+ "loss": 0.0375,
1860
  "step": 1290
1861
  },
1862
  {
1863
  "epoch": 5.18,
1864
+ "grad_norm": 0.5936269164085388,
1865
  "learning_rate": 6.25e-05,
1866
+ "loss": 0.0362,
1867
  "step": 1295
1868
  },
1869
  {
1870
  "epoch": 5.2,
1871
+ "grad_norm": 0.6964532136917114,
1872
  "learning_rate": 6.25e-05,
1873
+ "loss": 0.0361,
1874
  "step": 1300
1875
  },
1876
  {
1877
  "epoch": 5.22,
1878
+ "grad_norm": 1.0432935953140259,
1879
  "learning_rate": 6.25e-05,
1880
+ "loss": 0.0346,
1881
  "step": 1305
1882
  },
1883
  {
1884
  "epoch": 5.24,
1885
+ "grad_norm": 0.6481297016143799,
1886
  "learning_rate": 6.25e-05,
1887
+ "loss": 0.0351,
1888
  "step": 1310
1889
  },
1890
  {
1891
  "epoch": 5.26,
1892
+ "grad_norm": 0.9188110828399658,
1893
  "learning_rate": 6.25e-05,
1894
+ "loss": 0.0378,
1895
  "step": 1315
1896
  },
1897
  {
1898
  "epoch": 5.28,
1899
+ "grad_norm": 0.4248051345348358,
1900
  "learning_rate": 6.25e-05,
1901
+ "loss": 0.0296,
1902
  "step": 1320
1903
  },
1904
  {
1905
  "epoch": 5.3,
1906
+ "grad_norm": 0.5334679484367371,
1907
  "learning_rate": 6.25e-05,
1908
+ "loss": 0.0397,
1909
  "step": 1325
1910
  },
1911
  {
1912
  "epoch": 5.32,
1913
+ "grad_norm": 0.7321200370788574,
1914
  "learning_rate": 6.25e-05,
1915
+ "loss": 0.0414,
1916
  "step": 1330
1917
  },
1918
  {
1919
  "epoch": 5.34,
1920
+ "grad_norm": 0.5322144627571106,
1921
  "learning_rate": 6.25e-05,
1922
+ "loss": 0.0381,
1923
  "step": 1335
1924
  },
1925
  {
1926
  "epoch": 5.36,
1927
+ "grad_norm": 0.8044850826263428,
1928
  "learning_rate": 6.25e-05,
1929
+ "loss": 0.0348,
1930
  "step": 1340
1931
  },
1932
  {
1933
  "epoch": 5.38,
1934
+ "grad_norm": 0.6011214256286621,
1935
  "learning_rate": 6.25e-05,
1936
+ "loss": 0.0379,
1937
  "step": 1345
1938
  },
1939
  {
1940
  "epoch": 5.4,
1941
+ "grad_norm": 0.7421667575836182,
1942
  "learning_rate": 6.25e-05,
1943
+ "loss": 0.0379,
1944
  "step": 1350
1945
  },
1946
  {
1947
  "epoch": 5.42,
1948
+ "grad_norm": 0.4418427348136902,
1949
  "learning_rate": 6.25e-05,
1950
+ "loss": 0.0328,
1951
  "step": 1355
1952
  },
1953
  {
1954
  "epoch": 5.44,
1955
+ "grad_norm": 0.6037031412124634,
1956
  "learning_rate": 6.25e-05,
1957
+ "loss": 0.0351,
1958
  "step": 1360
1959
  },
1960
  {
1961
  "epoch": 5.46,
1962
+ "grad_norm": 0.7416286468505859,
1963
  "learning_rate": 6.25e-05,
1964
+ "loss": 0.0344,
1965
  "step": 1365
1966
  },
1967
  {
1968
  "epoch": 5.48,
1969
+ "grad_norm": 0.9417647123336792,
1970
  "learning_rate": 6.25e-05,
1971
+ "loss": 0.0504,
1972
  "step": 1370
1973
  },
1974
  {
1975
  "epoch": 5.5,
1976
+ "grad_norm": 0.5485287308692932,
1977
  "learning_rate": 6.25e-05,
1978
+ "loss": 0.0383,
1979
  "step": 1375
1980
  },
1981
  {
1982
  "epoch": 5.52,
1983
+ "grad_norm": 0.647965133190155,
1984
  "learning_rate": 6.25e-05,
1985
+ "loss": 0.0388,
1986
  "step": 1380
1987
  },
1988
  {
1989
  "epoch": 5.54,
1990
+ "grad_norm": 0.7375500202178955,
1991
  "learning_rate": 6.25e-05,
1992
+ "loss": 0.0368,
1993
  "step": 1385
1994
  },
1995
  {
1996
  "epoch": 5.5600000000000005,
1997
+ "grad_norm": 0.7219087481498718,
1998
  "learning_rate": 6.25e-05,
1999
+ "loss": 0.0497,
2000
  "step": 1390
2001
  },
2002
  {
2003
  "epoch": 5.58,
2004
+ "grad_norm": 0.49959471821784973,
2005
  "learning_rate": 6.25e-05,
2006
+ "loss": 0.038,
2007
  "step": 1395
2008
  },
2009
  {
2010
  "epoch": 5.6,
2011
+ "grad_norm": 0.5299109816551208,
2012
  "learning_rate": 6.25e-05,
2013
+ "loss": 0.0303,
2014
  "step": 1400
2015
  },
2016
  {
2017
  "epoch": 5.62,
2018
+ "grad_norm": 0.48730289936065674,
2019
  "learning_rate": 6.25e-05,
2020
+ "loss": 0.0337,
2021
  "step": 1405
2022
  },
2023
  {
2024
  "epoch": 5.64,
2025
+ "grad_norm": 0.3811701834201813,
2026
  "learning_rate": 6.25e-05,
2027
+ "loss": 0.0367,
2028
  "step": 1410
2029
  },
2030
  {
2031
  "epoch": 5.66,
2032
+ "grad_norm": 0.4611757695674896,
2033
  "learning_rate": 6.25e-05,
2034
+ "loss": 0.0396,
2035
  "step": 1415
2036
  },
2037
  {
2038
  "epoch": 5.68,
2039
+ "grad_norm": 0.5509118437767029,
2040
  "learning_rate": 6.25e-05,
2041
+ "loss": 0.0397,
2042
  "step": 1420
2043
  },
2044
  {
2045
  "epoch": 5.7,
2046
+ "grad_norm": 0.8130658268928528,
2047
  "learning_rate": 6.25e-05,
2048
+ "loss": 0.0346,
2049
  "step": 1425
2050
  },
2051
  {
2052
  "epoch": 5.72,
2053
+ "grad_norm": 0.4248274266719818,
2054
  "learning_rate": 6.25e-05,
2055
+ "loss": 0.0334,
2056
  "step": 1430
2057
  },
2058
  {
2059
  "epoch": 5.74,
2060
+ "grad_norm": 1.1918326616287231,
2061
  "learning_rate": 6.25e-05,
2062
+ "loss": 0.041,
2063
  "step": 1435
2064
  },
2065
  {
2066
  "epoch": 5.76,
2067
+ "grad_norm": 0.6501240730285645,
2068
  "learning_rate": 6.25e-05,
2069
+ "loss": 0.0423,
2070
  "step": 1440
2071
  },
2072
  {
2073
  "epoch": 5.78,
2074
+ "grad_norm": 1.216350793838501,
2075
  "learning_rate": 6.25e-05,
2076
+ "loss": 0.0366,
2077
  "step": 1445
2078
  },
2079
  {
2080
  "epoch": 5.8,
2081
+ "grad_norm": 0.48442235589027405,
2082
  "learning_rate": 6.25e-05,
2083
+ "loss": 0.0335,
2084
  "step": 1450
2085
  },
2086
  {
2087
  "epoch": 5.82,
2088
+ "grad_norm": 0.5834723711013794,
2089
  "learning_rate": 6.25e-05,
2090
  "loss": 0.0414,
2091
  "step": 1455
2092
  },
2093
  {
2094
  "epoch": 5.84,
2095
+ "grad_norm": 0.7862647771835327,
2096
  "learning_rate": 6.25e-05,
2097
+ "loss": 0.0438,
2098
  "step": 1460
2099
  },
2100
  {
2101
  "epoch": 5.86,
2102
+ "grad_norm": 0.8282245397567749,
2103
  "learning_rate": 6.25e-05,
2104
+ "loss": 0.0384,
2105
  "step": 1465
2106
  },
2107
  {
2108
  "epoch": 5.88,
2109
+ "grad_norm": 0.8185272812843323,
2110
  "learning_rate": 6.25e-05,
2111
+ "loss": 0.0386,
2112
  "step": 1470
2113
  },
2114
  {
2115
  "epoch": 5.9,
2116
+ "grad_norm": 0.6197579503059387,
2117
  "learning_rate": 6.25e-05,
2118
+ "loss": 0.036,
2119
  "step": 1475
2120
  },
2121
  {
2122
  "epoch": 5.92,
2123
+ "grad_norm": 0.5256204009056091,
2124
  "learning_rate": 6.25e-05,
2125
+ "loss": 0.0331,
2126
  "step": 1480
2127
  },
2128
  {
2129
  "epoch": 5.9399999999999995,
2130
+ "grad_norm": 0.5693526864051819,
2131
  "learning_rate": 6.25e-05,
2132
+ "loss": 0.0404,
2133
  "step": 1485
2134
  },
2135
  {
2136
  "epoch": 5.96,
2137
+ "grad_norm": 0.505524754524231,
2138
  "learning_rate": 6.25e-05,
2139
+ "loss": 0.0345,
2140
  "step": 1490
2141
  },
2142
  {
2143
  "epoch": 5.98,
2144
+ "grad_norm": 0.7480014562606812,
2145
  "learning_rate": 6.25e-05,
2146
+ "loss": 0.0421,
2147
  "step": 1495
2148
  },
2149
  {
2150
  "epoch": 6.0,
2151
+ "grad_norm": 0.6769825220108032,
2152
  "learning_rate": 6.25e-05,
2153
+ "loss": 0.0364,
2154
  "step": 1500
2155
  },
2156
  {
2157
  "epoch": 6.0,
2158
+ "eval_cer": 0.04693244335514823,
2159
+ "eval_loss": 0.07296038419008255,
2160
+ "eval_runtime": 281.2143,
2161
+ "eval_samples_per_second": 1.778,
2162
+ "eval_steps_per_second": 0.445,
2163
  "step": 1500
2164
  }
2165
  ],
 
2167
  "max_steps": 5000,
2168
  "num_input_tokens_seen": 0,
2169
  "num_train_epochs": 20,
2170
+ "save_steps": 250,
2171
  "stateful_callbacks": {
2172
  "TrainerControl": {
2173
  "args": {
checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:380ff1a9921ff96ab779d6709926f10f78099a5595ab698c3919c0c657657de1
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105d055d6d84eb987fbbb4fc9493aa207f4712b04ab60a83adb7510815397317
3
  size 5432
checkpoint-1750/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: openai/whisper-large-v3-turbo
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-1750/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "WhisperForConditionalGeneration",
5
+ "parent_library": "transformers.models.whisper.modeling_whisper"
6
+ },
7
+ "base_model_name_or_path": "openai/whisper-large-v3-turbo",
8
+ "bias": "none",
9
+ "eva_config": null,
10
+ "exclude_modules": null,
11
+ "fan_in_fan_out": false,
12
+ "inference_mode": true,
13
+ "init_lora_weights": true,
14
+ "layer_replication": null,
15
+ "layers_pattern": null,
16
+ "layers_to_transform": null,
17
+ "loftq_config": {},
18
+ "lora_alpha": 64,
19
+ "lora_bias": false,
20
+ "lora_dropout": 0.05,
21
+ "megatron_config": null,
22
+ "megatron_core": "megatron.core",
23
+ "modules_to_save": null,
24
+ "peft_type": "LORA",
25
+ "r": 32,
26
+ "rank_pattern": {},
27
+ "revision": null,
28
+ "target_modules": [
29
+ "v_proj",
30
+ "q_proj"
31
+ ],
32
+ "task_type": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
checkpoint-1750/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c586208f4e8b273667a5196cb624b6ef6fbfea7e21d347a8b081a2a7413e77
3
+ size 26237160
checkpoint-1750/adapter_model/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: openai/whisper-large-v3-turbo
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
checkpoint-1750/adapter_model/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "WhisperForConditionalGeneration",
5
+ "parent_library": "transformers.models.whisper.modeling_whisper"
6
+ },
7
+ "base_model_name_or_path": "openai/whisper-large-v3-turbo",
8
+ "bias": "none",
9
+ "eva_config": null,
10
+ "exclude_modules": null,
11
+ "fan_in_fan_out": false,
12
+ "inference_mode": true,
13
+ "init_lora_weights": true,
14
+ "layer_replication": null,
15
+ "layers_pattern": null,
16
+ "layers_to_transform": null,
17
+ "loftq_config": {},
18
+ "lora_alpha": 64,
19
+ "lora_bias": false,
20
+ "lora_dropout": 0.05,
21
+ "megatron_config": null,
22
+ "megatron_core": "megatron.core",
23
+ "modules_to_save": null,
24
+ "peft_type": "LORA",
25
+ "r": 32,
26
+ "rank_pattern": {},
27
+ "revision": null,
28
+ "target_modules": [
29
+ "v_proj",
30
+ "q_proj"
31
+ ],
32
+ "task_type": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
checkpoint-1750/adapter_model/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c586208f4e8b273667a5196cb624b6ef6fbfea7e21d347a8b081a2a7413e77
3
+ size 26237160
checkpoint-1750/added_tokens.json ADDED
@@ -0,0 +1,1611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|0.00|>": 50365,
3
+ "<|0.02|>": 50366,
4
+ "<|0.04|>": 50367,
5
+ "<|0.06|>": 50368,
6
+ "<|0.08|>": 50369,
7
+ "<|0.10|>": 50370,
8
+ "<|0.12|>": 50371,
9
+ "<|0.14|>": 50372,
10
+ "<|0.16|>": 50373,
11
+ "<|0.18|>": 50374,
12
+ "<|0.20|>": 50375,
13
+ "<|0.22|>": 50376,
14
+ "<|0.24|>": 50377,
15
+ "<|0.26|>": 50378,
16
+ "<|0.28|>": 50379,
17
+ "<|0.30|>": 50380,
18
+ "<|0.32|>": 50381,
19
+ "<|0.34|>": 50382,
20
+ "<|0.36|>": 50383,
21
+ "<|0.38|>": 50384,
22
+ "<|0.40|>": 50385,
23
+ "<|0.42|>": 50386,
24
+ "<|0.44|>": 50387,
25
+ "<|0.46|>": 50388,
26
+ "<|0.48|>": 50389,
27
+ "<|0.50|>": 50390,
28
+ "<|0.52|>": 50391,
29
+ "<|0.54|>": 50392,
30
+ "<|0.56|>": 50393,
31
+ "<|0.58|>": 50394,
32
+ "<|0.60|>": 50395,
33
+ "<|0.62|>": 50396,
34
+ "<|0.64|>": 50397,
35
+ "<|0.66|>": 50398,
36
+ "<|0.68|>": 50399,
37
+ "<|0.70|>": 50400,
38
+ "<|0.72|>": 50401,
39
+ "<|0.74|>": 50402,
40
+ "<|0.76|>": 50403,
41
+ "<|0.78|>": 50404,
42
+ "<|0.80|>": 50405,
43
+ "<|0.82|>": 50406,
44
+ "<|0.84|>": 50407,
45
+ "<|0.86|>": 50408,
46
+ "<|0.88|>": 50409,
47
+ "<|0.90|>": 50410,
48
+ "<|0.92|>": 50411,
49
+ "<|0.94|>": 50412,
50
+ "<|0.96|>": 50413,
51
+ "<|0.98|>": 50414,
52
+ "<|1.00|>": 50415,
53
+ "<|1.02|>": 50416,
54
+ "<|1.04|>": 50417,
55
+ "<|1.06|>": 50418,
56
+ "<|1.08|>": 50419,
57
+ "<|1.10|>": 50420,
58
+ "<|1.12|>": 50421,
59
+ "<|1.14|>": 50422,
60
+ "<|1.16|>": 50423,
61
+ "<|1.18|>": 50424,
62
+ "<|1.20|>": 50425,
63
+ "<|1.22|>": 50426,
64
+ "<|1.24|>": 50427,
65
+ "<|1.26|>": 50428,
66
+ "<|1.28|>": 50429,
67
+ "<|1.30|>": 50430,
68
+ "<|1.32|>": 50431,
69
+ "<|1.34|>": 50432,
70
+ "<|1.36|>": 50433,
71
+ "<|1.38|>": 50434,
72
+ "<|1.40|>": 50435,
73
+ "<|1.42|>": 50436,
74
+ "<|1.44|>": 50437,
75
+ "<|1.46|>": 50438,
76
+ "<|1.48|>": 50439,
77
+ "<|1.50|>": 50440,
78
+ "<|1.52|>": 50441,
79
+ "<|1.54|>": 50442,
80
+ "<|1.56|>": 50443,
81
+ "<|1.58|>": 50444,
82
+ "<|1.60|>": 50445,
83
+ "<|1.62|>": 50446,
84
+ "<|1.64|>": 50447,
85
+ "<|1.66|>": 50448,
86
+ "<|1.68|>": 50449,
87
+ "<|1.70|>": 50450,
88
+ "<|1.72|>": 50451,
89
+ "<|1.74|>": 50452,
90
+ "<|1.76|>": 50453,
91
+ "<|1.78|>": 50454,
92
+ "<|1.80|>": 50455,
93
+ "<|1.82|>": 50456,
94
+ "<|1.84|>": 50457,
95
+ "<|1.86|>": 50458,
96
+ "<|1.88|>": 50459,
97
+ "<|1.90|>": 50460,
98
+ "<|1.92|>": 50461,
99
+ "<|1.94|>": 50462,
100
+ "<|1.96|>": 50463,
101
+ "<|1.98|>": 50464,
102
+ "<|10.00|>": 50865,
103
+ "<|10.02|>": 50866,
104
+ "<|10.04|>": 50867,
105
+ "<|10.06|>": 50868,
106
+ "<|10.08|>": 50869,
107
+ "<|10.10|>": 50870,
108
+ "<|10.12|>": 50871,
109
+ "<|10.14|>": 50872,
110
+ "<|10.16|>": 50873,
111
+ "<|10.18|>": 50874,
112
+ "<|10.20|>": 50875,
113
+ "<|10.22|>": 50876,
114
+ "<|10.24|>": 50877,
115
+ "<|10.26|>": 50878,
116
+ "<|10.28|>": 50879,
117
+ "<|10.30|>": 50880,
118
+ "<|10.32|>": 50881,
119
+ "<|10.34|>": 50882,
120
+ "<|10.36|>": 50883,
121
+ "<|10.38|>": 50884,
122
+ "<|10.40|>": 50885,
123
+ "<|10.42|>": 50886,
124
+ "<|10.44|>": 50887,
125
+ "<|10.46|>": 50888,
126
+ "<|10.48|>": 50889,
127
+ "<|10.50|>": 50890,
128
+ "<|10.52|>": 50891,
129
+ "<|10.54|>": 50892,
130
+ "<|10.56|>": 50893,
131
+ "<|10.58|>": 50894,
132
+ "<|10.60|>": 50895,
133
+ "<|10.62|>": 50896,
134
+ "<|10.64|>": 50897,
135
+ "<|10.66|>": 50898,
136
+ "<|10.68|>": 50899,
137
+ "<|10.70|>": 50900,
138
+ "<|10.72|>": 50901,
139
+ "<|10.74|>": 50902,
140
+ "<|10.76|>": 50903,
141
+ "<|10.78|>": 50904,
142
+ "<|10.80|>": 50905,
143
+ "<|10.82|>": 50906,
144
+ "<|10.84|>": 50907,
145
+ "<|10.86|>": 50908,
146
+ "<|10.88|>": 50909,
147
+ "<|10.90|>": 50910,
148
+ "<|10.92|>": 50911,
149
+ "<|10.94|>": 50912,
150
+ "<|10.96|>": 50913,
151
+ "<|10.98|>": 50914,
152
+ "<|11.00|>": 50915,
153
+ "<|11.02|>": 50916,
154
+ "<|11.04|>": 50917,
155
+ "<|11.06|>": 50918,
156
+ "<|11.08|>": 50919,
157
+ "<|11.10|>": 50920,
158
+ "<|11.12|>": 50921,
159
+ "<|11.14|>": 50922,
160
+ "<|11.16|>": 50923,
161
+ "<|11.18|>": 50924,
162
+ "<|11.20|>": 50925,
163
+ "<|11.22|>": 50926,
164
+ "<|11.24|>": 50927,
165
+ "<|11.26|>": 50928,
166
+ "<|11.28|>": 50929,
167
+ "<|11.30|>": 50930,
168
+ "<|11.32|>": 50931,
169
+ "<|11.34|>": 50932,
170
+ "<|11.36|>": 50933,
171
+ "<|11.38|>": 50934,
172
+ "<|11.40|>": 50935,
173
+ "<|11.42|>": 50936,
174
+ "<|11.44|>": 50937,
175
+ "<|11.46|>": 50938,
176
+ "<|11.48|>": 50939,
177
+ "<|11.50|>": 50940,
178
+ "<|11.52|>": 50941,
179
+ "<|11.54|>": 50942,
180
+ "<|11.56|>": 50943,
181
+ "<|11.58|>": 50944,
182
+ "<|11.60|>": 50945,
183
+ "<|11.62|>": 50946,
184
+ "<|11.64|>": 50947,
185
+ "<|11.66|>": 50948,
186
+ "<|11.68|>": 50949,
187
+ "<|11.70|>": 50950,
188
+ "<|11.72|>": 50951,
189
+ "<|11.74|>": 50952,
190
+ "<|11.76|>": 50953,
191
+ "<|11.78|>": 50954,
192
+ "<|11.80|>": 50955,
193
+ "<|11.82|>": 50956,
194
+ "<|11.84|>": 50957,
195
+ "<|11.86|>": 50958,
196
+ "<|11.88|>": 50959,
197
+ "<|11.90|>": 50960,
198
+ "<|11.92|>": 50961,
199
+ "<|11.94|>": 50962,
200
+ "<|11.96|>": 50963,
201
+ "<|11.98|>": 50964,
202
+ "<|12.00|>": 50965,
203
+ "<|12.02|>": 50966,
204
+ "<|12.04|>": 50967,
205
+ "<|12.06|>": 50968,
206
+ "<|12.08|>": 50969,
207
+ "<|12.10|>": 50970,
208
+ "<|12.12|>": 50971,
209
+ "<|12.14|>": 50972,
210
+ "<|12.16|>": 50973,
211
+ "<|12.18|>": 50974,
212
+ "<|12.20|>": 50975,
213
+ "<|12.22|>": 50976,
214
+ "<|12.24|>": 50977,
215
+ "<|12.26|>": 50978,
216
+ "<|12.28|>": 50979,
217
+ "<|12.30|>": 50980,
218
+ "<|12.32|>": 50981,
219
+ "<|12.34|>": 50982,
220
+ "<|12.36|>": 50983,
221
+ "<|12.38|>": 50984,
222
+ "<|12.40|>": 50985,
223
+ "<|12.42|>": 50986,
224
+ "<|12.44|>": 50987,
225
+ "<|12.46|>": 50988,
226
+ "<|12.48|>": 50989,
227
+ "<|12.50|>": 50990,
228
+ "<|12.52|>": 50991,
229
+ "<|12.54|>": 50992,
230
+ "<|12.56|>": 50993,
231
+ "<|12.58|>": 50994,
232
+ "<|12.60|>": 50995,
233
+ "<|12.62|>": 50996,
234
+ "<|12.64|>": 50997,
235
+ "<|12.66|>": 50998,
236
+ "<|12.68|>": 50999,
237
+ "<|12.70|>": 51000,
238
+ "<|12.72|>": 51001,
239
+ "<|12.74|>": 51002,
240
+ "<|12.76|>": 51003,
241
+ "<|12.78|>": 51004,
242
+ "<|12.80|>": 51005,
243
+ "<|12.82|>": 51006,
244
+ "<|12.84|>": 51007,
245
+ "<|12.86|>": 51008,
246
+ "<|12.88|>": 51009,
247
+ "<|12.90|>": 51010,
248
+ "<|12.92|>": 51011,
249
+ "<|12.94|>": 51012,
250
+ "<|12.96|>": 51013,
251
+ "<|12.98|>": 51014,
252
+ "<|13.00|>": 51015,
253
+ "<|13.02|>": 51016,
254
+ "<|13.04|>": 51017,
255
+ "<|13.06|>": 51018,
256
+ "<|13.08|>": 51019,
257
+ "<|13.10|>": 51020,
258
+ "<|13.12|>": 51021,
259
+ "<|13.14|>": 51022,
260
+ "<|13.16|>": 51023,
261
+ "<|13.18|>": 51024,
262
+ "<|13.20|>": 51025,
263
+ "<|13.22|>": 51026,
264
+ "<|13.24|>": 51027,
265
+ "<|13.26|>": 51028,
266
+ "<|13.28|>": 51029,
267
+ "<|13.30|>": 51030,
268
+ "<|13.32|>": 51031,
269
+ "<|13.34|>": 51032,
270
+ "<|13.36|>": 51033,
271
+ "<|13.38|>": 51034,
272
+ "<|13.40|>": 51035,
273
+ "<|13.42|>": 51036,
274
+ "<|13.44|>": 51037,
275
+ "<|13.46|>": 51038,
276
+ "<|13.48|>": 51039,
277
+ "<|13.50|>": 51040,
278
+ "<|13.52|>": 51041,
279
+ "<|13.54|>": 51042,
280
+ "<|13.56|>": 51043,
281
+ "<|13.58|>": 51044,
282
+ "<|13.60|>": 51045,
283
+ "<|13.62|>": 51046,
284
+ "<|13.64|>": 51047,
285
+ "<|13.66|>": 51048,
286
+ "<|13.68|>": 51049,
287
+ "<|13.70|>": 51050,
288
+ "<|13.72|>": 51051,
289
+ "<|13.74|>": 51052,
290
+ "<|13.76|>": 51053,
291
+ "<|13.78|>": 51054,
292
+ "<|13.80|>": 51055,
293
+ "<|13.82|>": 51056,
294
+ "<|13.84|>": 51057,
295
+ "<|13.86|>": 51058,
296
+ "<|13.88|>": 51059,
297
+ "<|13.90|>": 51060,
298
+ "<|13.92|>": 51061,
299
+ "<|13.94|>": 51062,
300
+ "<|13.96|>": 51063,
301
+ "<|13.98|>": 51064,
302
+ "<|14.00|>": 51065,
303
+ "<|14.02|>": 51066,
304
+ "<|14.04|>": 51067,
305
+ "<|14.06|>": 51068,
306
+ "<|14.08|>": 51069,
307
+ "<|14.10|>": 51070,
308
+ "<|14.12|>": 51071,
309
+ "<|14.14|>": 51072,
310
+ "<|14.16|>": 51073,
311
+ "<|14.18|>": 51074,
312
+ "<|14.20|>": 51075,
313
+ "<|14.22|>": 51076,
314
+ "<|14.24|>": 51077,
315
+ "<|14.26|>": 51078,
316
+ "<|14.28|>": 51079,
317
+ "<|14.30|>": 51080,
318
+ "<|14.32|>": 51081,
319
+ "<|14.34|>": 51082,
320
+ "<|14.36|>": 51083,
321
+ "<|14.38|>": 51084,
322
+ "<|14.40|>": 51085,
323
+ "<|14.42|>": 51086,
324
+ "<|14.44|>": 51087,
325
+ "<|14.46|>": 51088,
326
+ "<|14.48|>": 51089,
327
+ "<|14.50|>": 51090,
328
+ "<|14.52|>": 51091,
329
+ "<|14.54|>": 51092,
330
+ "<|14.56|>": 51093,
331
+ "<|14.58|>": 51094,
332
+ "<|14.60|>": 51095,
333
+ "<|14.62|>": 51096,
334
+ "<|14.64|>": 51097,
335
+ "<|14.66|>": 51098,
336
+ "<|14.68|>": 51099,
337
+ "<|14.70|>": 51100,
338
+ "<|14.72|>": 51101,
339
+ "<|14.74|>": 51102,
340
+ "<|14.76|>": 51103,
341
+ "<|14.78|>": 51104,
342
+ "<|14.80|>": 51105,
343
+ "<|14.82|>": 51106,
344
+ "<|14.84|>": 51107,
345
+ "<|14.86|>": 51108,
346
+ "<|14.88|>": 51109,
347
+ "<|14.90|>": 51110,
348
+ "<|14.92|>": 51111,
349
+ "<|14.94|>": 51112,
350
+ "<|14.96|>": 51113,
351
+ "<|14.98|>": 51114,
352
+ "<|15.00|>": 51115,
353
+ "<|15.02|>": 51116,
354
+ "<|15.04|>": 51117,
355
+ "<|15.06|>": 51118,
356
+ "<|15.08|>": 51119,
357
+ "<|15.10|>": 51120,
358
+ "<|15.12|>": 51121,
359
+ "<|15.14|>": 51122,
360
+ "<|15.16|>": 51123,
361
+ "<|15.18|>": 51124,
362
+ "<|15.20|>": 51125,
363
+ "<|15.22|>": 51126,
364
+ "<|15.24|>": 51127,
365
+ "<|15.26|>": 51128,
366
+ "<|15.28|>": 51129,
367
+ "<|15.30|>": 51130,
368
+ "<|15.32|>": 51131,
369
+ "<|15.34|>": 51132,
370
+ "<|15.36|>": 51133,
371
+ "<|15.38|>": 51134,
372
+ "<|15.40|>": 51135,
373
+ "<|15.42|>": 51136,
374
+ "<|15.44|>": 51137,
375
+ "<|15.46|>": 51138,
376
+ "<|15.48|>": 51139,
377
+ "<|15.50|>": 51140,
378
+ "<|15.52|>": 51141,
379
+ "<|15.54|>": 51142,
380
+ "<|15.56|>": 51143,
381
+ "<|15.58|>": 51144,
382
+ "<|15.60|>": 51145,
383
+ "<|15.62|>": 51146,
384
+ "<|15.64|>": 51147,
385
+ "<|15.66|>": 51148,
386
+ "<|15.68|>": 51149,
387
+ "<|15.70|>": 51150,
388
+ "<|15.72|>": 51151,
389
+ "<|15.74|>": 51152,
390
+ "<|15.76|>": 51153,
391
+ "<|15.78|>": 51154,
392
+ "<|15.80|>": 51155,
393
+ "<|15.82|>": 51156,
394
+ "<|15.84|>": 51157,
395
+ "<|15.86|>": 51158,
396
+ "<|15.88|>": 51159,
397
+ "<|15.90|>": 51160,
398
+ "<|15.92|>": 51161,
399
+ "<|15.94|>": 51162,
400
+ "<|15.96|>": 51163,
401
+ "<|15.98|>": 51164,
402
+ "<|16.00|>": 51165,
403
+ "<|16.02|>": 51166,
404
+ "<|16.04|>": 51167,
405
+ "<|16.06|>": 51168,
406
+ "<|16.08|>": 51169,
407
+ "<|16.10|>": 51170,
408
+ "<|16.12|>": 51171,
409
+ "<|16.14|>": 51172,
410
+ "<|16.16|>": 51173,
411
+ "<|16.18|>": 51174,
412
+ "<|16.20|>": 51175,
413
+ "<|16.22|>": 51176,
414
+ "<|16.24|>": 51177,
415
+ "<|16.26|>": 51178,
416
+ "<|16.28|>": 51179,
417
+ "<|16.30|>": 51180,
418
+ "<|16.32|>": 51181,
419
+ "<|16.34|>": 51182,
420
+ "<|16.36|>": 51183,
421
+ "<|16.38|>": 51184,
422
+ "<|16.40|>": 51185,
423
+ "<|16.42|>": 51186,
424
+ "<|16.44|>": 51187,
425
+ "<|16.46|>": 51188,
426
+ "<|16.48|>": 51189,
427
+ "<|16.50|>": 51190,
428
+ "<|16.52|>": 51191,
429
+ "<|16.54|>": 51192,
430
+ "<|16.56|>": 51193,
431
+ "<|16.58|>": 51194,
432
+ "<|16.60|>": 51195,
433
+ "<|16.62|>": 51196,
434
+ "<|16.64|>": 51197,
435
+ "<|16.66|>": 51198,
436
+ "<|16.68|>": 51199,
437
+ "<|16.70|>": 51200,
438
+ "<|16.72|>": 51201,
439
+ "<|16.74|>": 51202,
440
+ "<|16.76|>": 51203,
441
+ "<|16.78|>": 51204,
442
+ "<|16.80|>": 51205,
443
+ "<|16.82|>": 51206,
444
+ "<|16.84|>": 51207,
445
+ "<|16.86|>": 51208,
446
+ "<|16.88|>": 51209,
447
+ "<|16.90|>": 51210,
448
+ "<|16.92|>": 51211,
449
+ "<|16.94|>": 51212,
450
+ "<|16.96|>": 51213,
451
+ "<|16.98|>": 51214,
452
+ "<|17.00|>": 51215,
453
+ "<|17.02|>": 51216,
454
+ "<|17.04|>": 51217,
455
+ "<|17.06|>": 51218,
456
+ "<|17.08|>": 51219,
457
+ "<|17.10|>": 51220,
458
+ "<|17.12|>": 51221,
459
+ "<|17.14|>": 51222,
460
+ "<|17.16|>": 51223,
461
+ "<|17.18|>": 51224,
462
+ "<|17.20|>": 51225,
463
+ "<|17.22|>": 51226,
464
+ "<|17.24|>": 51227,
465
+ "<|17.26|>": 51228,
466
+ "<|17.28|>": 51229,
467
+ "<|17.30|>": 51230,
468
+ "<|17.32|>": 51231,
469
+ "<|17.34|>": 51232,
470
+ "<|17.36|>": 51233,
471
+ "<|17.38|>": 51234,
472
+ "<|17.40|>": 51235,
473
+ "<|17.42|>": 51236,
474
+ "<|17.44|>": 51237,
475
+ "<|17.46|>": 51238,
476
+ "<|17.48|>": 51239,
477
+ "<|17.50|>": 51240,
478
+ "<|17.52|>": 51241,
479
+ "<|17.54|>": 51242,
480
+ "<|17.56|>": 51243,
481
+ "<|17.58|>": 51244,
482
+ "<|17.60|>": 51245,
483
+ "<|17.62|>": 51246,
484
+ "<|17.64|>": 51247,
485
+ "<|17.66|>": 51248,
486
+ "<|17.68|>": 51249,
487
+ "<|17.70|>": 51250,
488
+ "<|17.72|>": 51251,
489
+ "<|17.74|>": 51252,
490
+ "<|17.76|>": 51253,
491
+ "<|17.78|>": 51254,
492
+ "<|17.80|>": 51255,
493
+ "<|17.82|>": 51256,
494
+ "<|17.84|>": 51257,
495
+ "<|17.86|>": 51258,
496
+ "<|17.88|>": 51259,
497
+ "<|17.90|>": 51260,
498
+ "<|17.92|>": 51261,
499
+ "<|17.94|>": 51262,
500
+ "<|17.96|>": 51263,
501
+ "<|17.98|>": 51264,
502
+ "<|18.00|>": 51265,
503
+ "<|18.02|>": 51266,
504
+ "<|18.04|>": 51267,
505
+ "<|18.06|>": 51268,
506
+ "<|18.08|>": 51269,
507
+ "<|18.10|>": 51270,
508
+ "<|18.12|>": 51271,
509
+ "<|18.14|>": 51272,
510
+ "<|18.16|>": 51273,
511
+ "<|18.18|>": 51274,
512
+ "<|18.20|>": 51275,
513
+ "<|18.22|>": 51276,
514
+ "<|18.24|>": 51277,
515
+ "<|18.26|>": 51278,
516
+ "<|18.28|>": 51279,
517
+ "<|18.30|>": 51280,
518
+ "<|18.32|>": 51281,
519
+ "<|18.34|>": 51282,
520
+ "<|18.36|>": 51283,
521
+ "<|18.38|>": 51284,
522
+ "<|18.40|>": 51285,
523
+ "<|18.42|>": 51286,
524
+ "<|18.44|>": 51287,
525
+ "<|18.46|>": 51288,
526
+ "<|18.48|>": 51289,
527
+ "<|18.50|>": 51290,
528
+ "<|18.52|>": 51291,
529
+ "<|18.54|>": 51292,
530
+ "<|18.56|>": 51293,
531
+ "<|18.58|>": 51294,
532
+ "<|18.60|>": 51295,
533
+ "<|18.62|>": 51296,
534
+ "<|18.64|>": 51297,
535
+ "<|18.66|>": 51298,
536
+ "<|18.68|>": 51299,
537
+ "<|18.70|>": 51300,
538
+ "<|18.72|>": 51301,
539
+ "<|18.74|>": 51302,
540
+ "<|18.76|>": 51303,
541
+ "<|18.78|>": 51304,
542
+ "<|18.80|>": 51305,
543
+ "<|18.82|>": 51306,
544
+ "<|18.84|>": 51307,
545
+ "<|18.86|>": 51308,
546
+ "<|18.88|>": 51309,
547
+ "<|18.90|>": 51310,
548
+ "<|18.92|>": 51311,
549
+ "<|18.94|>": 51312,
550
+ "<|18.96|>": 51313,
551
+ "<|18.98|>": 51314,
552
+ "<|19.00|>": 51315,
553
+ "<|19.02|>": 51316,
554
+ "<|19.04|>": 51317,
555
+ "<|19.06|>": 51318,
556
+ "<|19.08|>": 51319,
557
+ "<|19.10|>": 51320,
558
+ "<|19.12|>": 51321,
559
+ "<|19.14|>": 51322,
560
+ "<|19.16|>": 51323,
561
+ "<|19.18|>": 51324,
562
+ "<|19.20|>": 51325,
563
+ "<|19.22|>": 51326,
564
+ "<|19.24|>": 51327,
565
+ "<|19.26|>": 51328,
566
+ "<|19.28|>": 51329,
567
+ "<|19.30|>": 51330,
568
+ "<|19.32|>": 51331,
569
+ "<|19.34|>": 51332,
570
+ "<|19.36|>": 51333,
571
+ "<|19.38|>": 51334,
572
+ "<|19.40|>": 51335,
573
+ "<|19.42|>": 51336,
574
+ "<|19.44|>": 51337,
575
+ "<|19.46|>": 51338,
576
+ "<|19.48|>": 51339,
577
+ "<|19.50|>": 51340,
578
+ "<|19.52|>": 51341,
579
+ "<|19.54|>": 51342,
580
+ "<|19.56|>": 51343,
581
+ "<|19.58|>": 51344,
582
+ "<|19.60|>": 51345,
583
+ "<|19.62|>": 51346,
584
+ "<|19.64|>": 51347,
585
+ "<|19.66|>": 51348,
586
+ "<|19.68|>": 51349,
587
+ "<|19.70|>": 51350,
588
+ "<|19.72|>": 51351,
589
+ "<|19.74|>": 51352,
590
+ "<|19.76|>": 51353,
591
+ "<|19.78|>": 51354,
592
+ "<|19.80|>": 51355,
593
+ "<|19.82|>": 51356,
594
+ "<|19.84|>": 51357,
595
+ "<|19.86|>": 51358,
596
+ "<|19.88|>": 51359,
597
+ "<|19.90|>": 51360,
598
+ "<|19.92|>": 51361,
599
+ "<|19.94|>": 51362,
600
+ "<|19.96|>": 51363,
601
+ "<|19.98|>": 51364,
602
+ "<|2.00|>": 50465,
603
+ "<|2.02|>": 50466,
604
+ "<|2.04|>": 50467,
605
+ "<|2.06|>": 50468,
606
+ "<|2.08|>": 50469,
607
+ "<|2.10|>": 50470,
608
+ "<|2.12|>": 50471,
609
+ "<|2.14|>": 50472,
610
+ "<|2.16|>": 50473,
611
+ "<|2.18|>": 50474,
612
+ "<|2.20|>": 50475,
613
+ "<|2.22|>": 50476,
614
+ "<|2.24|>": 50477,
615
+ "<|2.26|>": 50478,
616
+ "<|2.28|>": 50479,
617
+ "<|2.30|>": 50480,
618
+ "<|2.32|>": 50481,
619
+ "<|2.34|>": 50482,
620
+ "<|2.36|>": 50483,
621
+ "<|2.38|>": 50484,
622
+ "<|2.40|>": 50485,
623
+ "<|2.42|>": 50486,
624
+ "<|2.44|>": 50487,
625
+ "<|2.46|>": 50488,
626
+ "<|2.48|>": 50489,
627
+ "<|2.50|>": 50490,
628
+ "<|2.52|>": 50491,
629
+ "<|2.54|>": 50492,
630
+ "<|2.56|>": 50493,
631
+ "<|2.58|>": 50494,
632
+ "<|2.60|>": 50495,
633
+ "<|2.62|>": 50496,
634
+ "<|2.64|>": 50497,
635
+ "<|2.66|>": 50498,
636
+ "<|2.68|>": 50499,
637
+ "<|2.70|>": 50500,
638
+ "<|2.72|>": 50501,
639
+ "<|2.74|>": 50502,
640
+ "<|2.76|>": 50503,
641
+ "<|2.78|>": 50504,
642
+ "<|2.80|>": 50505,
643
+ "<|2.82|>": 50506,
644
+ "<|2.84|>": 50507,
645
+ "<|2.86|>": 50508,
646
+ "<|2.88|>": 50509,
647
+ "<|2.90|>": 50510,
648
+ "<|2.92|>": 50511,
649
+ "<|2.94|>": 50512,
650
+ "<|2.96|>": 50513,
651
+ "<|2.98|>": 50514,
652
+ "<|20.00|>": 51365,
653
+ "<|20.02|>": 51366,
654
+ "<|20.04|>": 51367,
655
+ "<|20.06|>": 51368,
656
+ "<|20.08|>": 51369,
657
+ "<|20.10|>": 51370,
658
+ "<|20.12|>": 51371,
659
+ "<|20.14|>": 51372,
660
+ "<|20.16|>": 51373,
661
+ "<|20.18|>": 51374,
662
+ "<|20.20|>": 51375,
663
+ "<|20.22|>": 51376,
664
+ "<|20.24|>": 51377,
665
+ "<|20.26|>": 51378,
666
+ "<|20.28|>": 51379,
667
+ "<|20.30|>": 51380,
668
+ "<|20.32|>": 51381,
669
+ "<|20.34|>": 51382,
670
+ "<|20.36|>": 51383,
671
+ "<|20.38|>": 51384,
672
+ "<|20.40|>": 51385,
673
+ "<|20.42|>": 51386,
674
+ "<|20.44|>": 51387,
675
+ "<|20.46|>": 51388,
676
+ "<|20.48|>": 51389,
677
+ "<|20.50|>": 51390,
678
+ "<|20.52|>": 51391,
679
+ "<|20.54|>": 51392,
680
+ "<|20.56|>": 51393,
681
+ "<|20.58|>": 51394,
682
+ "<|20.60|>": 51395,
683
+ "<|20.62|>": 51396,
684
+ "<|20.64|>": 51397,
685
+ "<|20.66|>": 51398,
686
+ "<|20.68|>": 51399,
687
+ "<|20.70|>": 51400,
688
+ "<|20.72|>": 51401,
689
+ "<|20.74|>": 51402,
690
+ "<|20.76|>": 51403,
691
+ "<|20.78|>": 51404,
692
+ "<|20.80|>": 51405,
693
+ "<|20.82|>": 51406,
694
+ "<|20.84|>": 51407,
695
+ "<|20.86|>": 51408,
696
+ "<|20.88|>": 51409,
697
+ "<|20.90|>": 51410,
698
+ "<|20.92|>": 51411,
699
+ "<|20.94|>": 51412,
700
+ "<|20.96|>": 51413,
701
+ "<|20.98|>": 51414,
702
+ "<|21.00|>": 51415,
703
+ "<|21.02|>": 51416,
704
+ "<|21.04|>": 51417,
705
+ "<|21.06|>": 51418,
706
+ "<|21.08|>": 51419,
707
+ "<|21.10|>": 51420,
708
+ "<|21.12|>": 51421,
709
+ "<|21.14|>": 51422,
710
+ "<|21.16|>": 51423,
711
+ "<|21.18|>": 51424,
712
+ "<|21.20|>": 51425,
713
+ "<|21.22|>": 51426,
714
+ "<|21.24|>": 51427,
715
+ "<|21.26|>": 51428,
716
+ "<|21.28|>": 51429,
717
+ "<|21.30|>": 51430,
718
+ "<|21.32|>": 51431,
719
+ "<|21.34|>": 51432,
720
+ "<|21.36|>": 51433,
721
+ "<|21.38|>": 51434,
722
+ "<|21.40|>": 51435,
723
+ "<|21.42|>": 51436,
724
+ "<|21.44|>": 51437,
725
+ "<|21.46|>": 51438,
726
+ "<|21.48|>": 51439,
727
+ "<|21.50|>": 51440,
728
+ "<|21.52|>": 51441,
729
+ "<|21.54|>": 51442,
730
+ "<|21.56|>": 51443,
731
+ "<|21.58|>": 51444,
732
+ "<|21.60|>": 51445,
733
+ "<|21.62|>": 51446,
734
+ "<|21.64|>": 51447,
735
+ "<|21.66|>": 51448,
736
+ "<|21.68|>": 51449,
737
+ "<|21.70|>": 51450,
738
+ "<|21.72|>": 51451,
739
+ "<|21.74|>": 51452,
740
+ "<|21.76|>": 51453,
741
+ "<|21.78|>": 51454,
742
+ "<|21.80|>": 51455,
743
+ "<|21.82|>": 51456,
744
+ "<|21.84|>": 51457,
745
+ "<|21.86|>": 51458,
746
+ "<|21.88|>": 51459,
747
+ "<|21.90|>": 51460,
748
+ "<|21.92|>": 51461,
749
+ "<|21.94|>": 51462,
750
+ "<|21.96|>": 51463,
751
+ "<|21.98|>": 51464,
752
+ "<|22.00|>": 51465,
753
+ "<|22.02|>": 51466,
754
+ "<|22.04|>": 51467,
755
+ "<|22.06|>": 51468,
756
+ "<|22.08|>": 51469,
757
+ "<|22.10|>": 51470,
758
+ "<|22.12|>": 51471,
759
+ "<|22.14|>": 51472,
760
+ "<|22.16|>": 51473,
761
+ "<|22.18|>": 51474,
762
+ "<|22.20|>": 51475,
763
+ "<|22.22|>": 51476,
764
+ "<|22.24|>": 51477,
765
+ "<|22.26|>": 51478,
766
+ "<|22.28|>": 51479,
767
+ "<|22.30|>": 51480,
768
+ "<|22.32|>": 51481,
769
+ "<|22.34|>": 51482,
770
+ "<|22.36|>": 51483,
771
+ "<|22.38|>": 51484,
772
+ "<|22.40|>": 51485,
773
+ "<|22.42|>": 51486,
774
+ "<|22.44|>": 51487,
775
+ "<|22.46|>": 51488,
776
+ "<|22.48|>": 51489,
777
+ "<|22.50|>": 51490,
778
+ "<|22.52|>": 51491,
779
+ "<|22.54|>": 51492,
780
+ "<|22.56|>": 51493,
781
+ "<|22.58|>": 51494,
782
+ "<|22.60|>": 51495,
783
+ "<|22.62|>": 51496,
784
+ "<|22.64|>": 51497,
785
+ "<|22.66|>": 51498,
786
+ "<|22.68|>": 51499,
787
+ "<|22.70|>": 51500,
788
+ "<|22.72|>": 51501,
789
+ "<|22.74|>": 51502,
790
+ "<|22.76|>": 51503,
791
+ "<|22.78|>": 51504,
792
+ "<|22.80|>": 51505,
793
+ "<|22.82|>": 51506,
794
+ "<|22.84|>": 51507,
795
+ "<|22.86|>": 51508,
796
+ "<|22.88|>": 51509,
797
+ "<|22.90|>": 51510,
798
+ "<|22.92|>": 51511,
799
+ "<|22.94|>": 51512,
800
+ "<|22.96|>": 51513,
801
+ "<|22.98|>": 51514,
802
+ "<|23.00|>": 51515,
803
+ "<|23.02|>": 51516,
804
+ "<|23.04|>": 51517,
805
+ "<|23.06|>": 51518,
806
+ "<|23.08|>": 51519,
807
+ "<|23.10|>": 51520,
808
+ "<|23.12|>": 51521,
809
+ "<|23.14|>": 51522,
810
+ "<|23.16|>": 51523,
811
+ "<|23.18|>": 51524,
812
+ "<|23.20|>": 51525,
813
+ "<|23.22|>": 51526,
814
+ "<|23.24|>": 51527,
815
+ "<|23.26|>": 51528,
816
+ "<|23.28|>": 51529,
817
+ "<|23.30|>": 51530,
818
+ "<|23.32|>": 51531,
819
+ "<|23.34|>": 51532,
820
+ "<|23.36|>": 51533,
821
+ "<|23.38|>": 51534,
822
+ "<|23.40|>": 51535,
823
+ "<|23.42|>": 51536,
824
+ "<|23.44|>": 51537,
825
+ "<|23.46|>": 51538,
826
+ "<|23.48|>": 51539,
827
+ "<|23.50|>": 51540,
828
+ "<|23.52|>": 51541,
829
+ "<|23.54|>": 51542,
830
+ "<|23.56|>": 51543,
831
+ "<|23.58|>": 51544,
832
+ "<|23.60|>": 51545,
833
+ "<|23.62|>": 51546,
834
+ "<|23.64|>": 51547,
835
+ "<|23.66|>": 51548,
836
+ "<|23.68|>": 51549,
837
+ "<|23.70|>": 51550,
838
+ "<|23.72|>": 51551,
839
+ "<|23.74|>": 51552,
840
+ "<|23.76|>": 51553,
841
+ "<|23.78|>": 51554,
842
+ "<|23.80|>": 51555,
843
+ "<|23.82|>": 51556,
844
+ "<|23.84|>": 51557,
845
+ "<|23.86|>": 51558,
846
+ "<|23.88|>": 51559,
847
+ "<|23.90|>": 51560,
848
+ "<|23.92|>": 51561,
849
+ "<|23.94|>": 51562,
850
+ "<|23.96|>": 51563,
851
+ "<|23.98|>": 51564,
852
+ "<|24.00|>": 51565,
853
+ "<|24.02|>": 51566,
854
+ "<|24.04|>": 51567,
855
+ "<|24.06|>": 51568,
856
+ "<|24.08|>": 51569,
857
+ "<|24.10|>": 51570,
858
+ "<|24.12|>": 51571,
859
+ "<|24.14|>": 51572,
860
+ "<|24.16|>": 51573,
861
+ "<|24.18|>": 51574,
862
+ "<|24.20|>": 51575,
863
+ "<|24.22|>": 51576,
864
+ "<|24.24|>": 51577,
865
+ "<|24.26|>": 51578,
866
+ "<|24.28|>": 51579,
867
+ "<|24.30|>": 51580,
868
+ "<|24.32|>": 51581,
869
+ "<|24.34|>": 51582,
870
+ "<|24.36|>": 51583,
871
+ "<|24.38|>": 51584,
872
+ "<|24.40|>": 51585,
873
+ "<|24.42|>": 51586,
874
+ "<|24.44|>": 51587,
875
+ "<|24.46|>": 51588,
876
+ "<|24.48|>": 51589,
877
+ "<|24.50|>": 51590,
878
+ "<|24.52|>": 51591,
879
+ "<|24.54|>": 51592,
880
+ "<|24.56|>": 51593,
881
+ "<|24.58|>": 51594,
882
+ "<|24.60|>": 51595,
883
+ "<|24.62|>": 51596,
884
+ "<|24.64|>": 51597,
885
+ "<|24.66|>": 51598,
886
+ "<|24.68|>": 51599,
887
+ "<|24.70|>": 51600,
888
+ "<|24.72|>": 51601,
889
+ "<|24.74|>": 51602,
890
+ "<|24.76|>": 51603,
891
+ "<|24.78|>": 51604,
892
+ "<|24.80|>": 51605,
893
+ "<|24.82|>": 51606,
894
+ "<|24.84|>": 51607,
895
+ "<|24.86|>": 51608,
896
+ "<|24.88|>": 51609,
897
+ "<|24.90|>": 51610,
898
+ "<|24.92|>": 51611,
899
+ "<|24.94|>": 51612,
900
+ "<|24.96|>": 51613,
901
+ "<|24.98|>": 51614,
902
+ "<|25.00|>": 51615,
903
+ "<|25.02|>": 51616,
904
+ "<|25.04|>": 51617,
905
+ "<|25.06|>": 51618,
906
+ "<|25.08|>": 51619,
907
+ "<|25.10|>": 51620,
908
+ "<|25.12|>": 51621,
909
+ "<|25.14|>": 51622,
910
+ "<|25.16|>": 51623,
911
+ "<|25.18|>": 51624,
912
+ "<|25.20|>": 51625,
913
+ "<|25.22|>": 51626,
914
+ "<|25.24|>": 51627,
915
+ "<|25.26|>": 51628,
916
+ "<|25.28|>": 51629,
917
+ "<|25.30|>": 51630,
918
+ "<|25.32|>": 51631,
919
+ "<|25.34|>": 51632,
920
+ "<|25.36|>": 51633,
921
+ "<|25.38|>": 51634,
922
+ "<|25.40|>": 51635,
923
+ "<|25.42|>": 51636,
924
+ "<|25.44|>": 51637,
925
+ "<|25.46|>": 51638,
926
+ "<|25.48|>": 51639,
927
+ "<|25.50|>": 51640,
928
+ "<|25.52|>": 51641,
929
+ "<|25.54|>": 51642,
930
+ "<|25.56|>": 51643,
931
+ "<|25.58|>": 51644,
932
+ "<|25.60|>": 51645,
933
+ "<|25.62|>": 51646,
934
+ "<|25.64|>": 51647,
935
+ "<|25.66|>": 51648,
936
+ "<|25.68|>": 51649,
937
+ "<|25.70|>": 51650,
938
+ "<|25.72|>": 51651,
939
+ "<|25.74|>": 51652,
940
+ "<|25.76|>": 51653,
941
+ "<|25.78|>": 51654,
942
+ "<|25.80|>": 51655,
943
+ "<|25.82|>": 51656,
944
+ "<|25.84|>": 51657,
945
+ "<|25.86|>": 51658,
946
+ "<|25.88|>": 51659,
947
+ "<|25.90|>": 51660,
948
+ "<|25.92|>": 51661,
949
+ "<|25.94|>": 51662,
950
+ "<|25.96|>": 51663,
951
+ "<|25.98|>": 51664,
952
+ "<|26.00|>": 51665,
953
+ "<|26.02|>": 51666,
954
+ "<|26.04|>": 51667,
955
+ "<|26.06|>": 51668,
956
+ "<|26.08|>": 51669,
957
+ "<|26.10|>": 51670,
958
+ "<|26.12|>": 51671,
959
+ "<|26.14|>": 51672,
960
+ "<|26.16|>": 51673,
961
+ "<|26.18|>": 51674,
962
+ "<|26.20|>": 51675,
963
+ "<|26.22|>": 51676,
964
+ "<|26.24|>": 51677,
965
+ "<|26.26|>": 51678,
966
+ "<|26.28|>": 51679,
967
+ "<|26.30|>": 51680,
968
+ "<|26.32|>": 51681,
969
+ "<|26.34|>": 51682,
970
+ "<|26.36|>": 51683,
971
+ "<|26.38|>": 51684,
972
+ "<|26.40|>": 51685,
973
+ "<|26.42|>": 51686,
974
+ "<|26.44|>": 51687,
975
+ "<|26.46|>": 51688,
976
+ "<|26.48|>": 51689,
977
+ "<|26.50|>": 51690,
978
+ "<|26.52|>": 51691,
979
+ "<|26.54|>": 51692,
980
+ "<|26.56|>": 51693,
981
+ "<|26.58|>": 51694,
982
+ "<|26.60|>": 51695,
983
+ "<|26.62|>": 51696,
984
+ "<|26.64|>": 51697,
985
+ "<|26.66|>": 51698,
986
+ "<|26.68|>": 51699,
987
+ "<|26.70|>": 51700,
988
+ "<|26.72|>": 51701,
989
+ "<|26.74|>": 51702,
990
+ "<|26.76|>": 51703,
991
+ "<|26.78|>": 51704,
992
+ "<|26.80|>": 51705,
993
+ "<|26.82|>": 51706,
994
+ "<|26.84|>": 51707,
995
+ "<|26.86|>": 51708,
996
+ "<|26.88|>": 51709,
997
+ "<|26.90|>": 51710,
998
+ "<|26.92|>": 51711,
999
+ "<|26.94|>": 51712,
1000
+ "<|26.96|>": 51713,
1001
+ "<|26.98|>": 51714,
1002
+ "<|27.00|>": 51715,
1003
+ "<|27.02|>": 51716,
1004
+ "<|27.04|>": 51717,
1005
+ "<|27.06|>": 51718,
1006
+ "<|27.08|>": 51719,
1007
+ "<|27.10|>": 51720,
1008
+ "<|27.12|>": 51721,
1009
+ "<|27.14|>": 51722,
1010
+ "<|27.16|>": 51723,
1011
+ "<|27.18|>": 51724,
1012
+ "<|27.20|>": 51725,
1013
+ "<|27.22|>": 51726,
1014
+ "<|27.24|>": 51727,
1015
+ "<|27.26|>": 51728,
1016
+ "<|27.28|>": 51729,
1017
+ "<|27.30|>": 51730,
1018
+ "<|27.32|>": 51731,
1019
+ "<|27.34|>": 51732,
1020
+ "<|27.36|>": 51733,
1021
+ "<|27.38|>": 51734,
1022
+ "<|27.40|>": 51735,
1023
+ "<|27.42|>": 51736,
1024
+ "<|27.44|>": 51737,
1025
+ "<|27.46|>": 51738,
1026
+ "<|27.48|>": 51739,
1027
+ "<|27.50|>": 51740,
1028
+ "<|27.52|>": 51741,
1029
+ "<|27.54|>": 51742,
1030
+ "<|27.56|>": 51743,
1031
+ "<|27.58|>": 51744,
1032
+ "<|27.60|>": 51745,
1033
+ "<|27.62|>": 51746,
1034
+ "<|27.64|>": 51747,
1035
+ "<|27.66|>": 51748,
1036
+ "<|27.68|>": 51749,
1037
+ "<|27.70|>": 51750,
1038
+ "<|27.72|>": 51751,
1039
+ "<|27.74|>": 51752,
1040
+ "<|27.76|>": 51753,
1041
+ "<|27.78|>": 51754,
1042
+ "<|27.80|>": 51755,
1043
+ "<|27.82|>": 51756,
1044
+ "<|27.84|>": 51757,
1045
+ "<|27.86|>": 51758,
1046
+ "<|27.88|>": 51759,
1047
+ "<|27.90|>": 51760,
1048
+ "<|27.92|>": 51761,
1049
+ "<|27.94|>": 51762,
1050
+ "<|27.96|>": 51763,
1051
+ "<|27.98|>": 51764,
1052
+ "<|28.00|>": 51765,
1053
+ "<|28.02|>": 51766,
1054
+ "<|28.04|>": 51767,
1055
+ "<|28.06|>": 51768,
1056
+ "<|28.08|>": 51769,
1057
+ "<|28.10|>": 51770,
1058
+ "<|28.12|>": 51771,
1059
+ "<|28.14|>": 51772,
1060
+ "<|28.16|>": 51773,
1061
+ "<|28.18|>": 51774,
1062
+ "<|28.20|>": 51775,
1063
+ "<|28.22|>": 51776,
1064
+ "<|28.24|>": 51777,
1065
+ "<|28.26|>": 51778,
1066
+ "<|28.28|>": 51779,
1067
+ "<|28.30|>": 51780,
1068
+ "<|28.32|>": 51781,
1069
+ "<|28.34|>": 51782,
1070
+ "<|28.36|>": 51783,
1071
+ "<|28.38|>": 51784,
1072
+ "<|28.40|>": 51785,
1073
+ "<|28.42|>": 51786,
1074
+ "<|28.44|>": 51787,
1075
+ "<|28.46|>": 51788,
1076
+ "<|28.48|>": 51789,
1077
+ "<|28.50|>": 51790,
1078
+ "<|28.52|>": 51791,
1079
+ "<|28.54|>": 51792,
1080
+ "<|28.56|>": 51793,
1081
+ "<|28.58|>": 51794,
1082
+ "<|28.60|>": 51795,
1083
+ "<|28.62|>": 51796,
1084
+ "<|28.64|>": 51797,
1085
+ "<|28.66|>": 51798,
1086
+ "<|28.68|>": 51799,
1087
+ "<|28.70|>": 51800,
1088
+ "<|28.72|>": 51801,
1089
+ "<|28.74|>": 51802,
1090
+ "<|28.76|>": 51803,
1091
+ "<|28.78|>": 51804,
1092
+ "<|28.80|>": 51805,
1093
+ "<|28.82|>": 51806,
1094
+ "<|28.84|>": 51807,
1095
+ "<|28.86|>": 51808,
1096
+ "<|28.88|>": 51809,
1097
+ "<|28.90|>": 51810,
1098
+ "<|28.92|>": 51811,
1099
+ "<|28.94|>": 51812,
1100
+ "<|28.96|>": 51813,
1101
+ "<|28.98|>": 51814,
1102
+ "<|29.00|>": 51815,
1103
+ "<|29.02|>": 51816,
1104
+ "<|29.04|>": 51817,
1105
+ "<|29.06|>": 51818,
1106
+ "<|29.08|>": 51819,
1107
+ "<|29.10|>": 51820,
1108
+ "<|29.12|>": 51821,
1109
+ "<|29.14|>": 51822,
1110
+ "<|29.16|>": 51823,
1111
+ "<|29.18|>": 51824,
1112
+ "<|29.20|>": 51825,
1113
+ "<|29.22|>": 51826,
1114
+ "<|29.24|>": 51827,
1115
+ "<|29.26|>": 51828,
1116
+ "<|29.28|>": 51829,
1117
+ "<|29.30|>": 51830,
1118
+ "<|29.32|>": 51831,
1119
+ "<|29.34|>": 51832,
1120
+ "<|29.36|>": 51833,
1121
+ "<|29.38|>": 51834,
1122
+ "<|29.40|>": 51835,
1123
+ "<|29.42|>": 51836,
1124
+ "<|29.44|>": 51837,
1125
+ "<|29.46|>": 51838,
1126
+ "<|29.48|>": 51839,
1127
+ "<|29.50|>": 51840,
1128
+ "<|29.52|>": 51841,
1129
+ "<|29.54|>": 51842,
1130
+ "<|29.56|>": 51843,
1131
+ "<|29.58|>": 51844,
1132
+ "<|29.60|>": 51845,
1133
+ "<|29.62|>": 51846,
1134
+ "<|29.64|>": 51847,
1135
+ "<|29.66|>": 51848,
1136
+ "<|29.68|>": 51849,
1137
+ "<|29.70|>": 51850,
1138
+ "<|29.72|>": 51851,
1139
+ "<|29.74|>": 51852,
1140
+ "<|29.76|>": 51853,
1141
+ "<|29.78|>": 51854,
1142
+ "<|29.80|>": 51855,
1143
+ "<|29.82|>": 51856,
1144
+ "<|29.84|>": 51857,
1145
+ "<|29.86|>": 51858,
1146
+ "<|29.88|>": 51859,
1147
+ "<|29.90|>": 51860,
1148
+ "<|29.92|>": 51861,
1149
+ "<|29.94|>": 51862,
1150
+ "<|29.96|>": 51863,
1151
+ "<|29.98|>": 51864,
1152
+ "<|3.00|>": 50515,
1153
+ "<|3.02|>": 50516,
1154
+ "<|3.04|>": 50517,
1155
+ "<|3.06|>": 50518,
1156
+ "<|3.08|>": 50519,
1157
+ "<|3.10|>": 50520,
1158
+ "<|3.12|>": 50521,
1159
+ "<|3.14|>": 50522,
1160
+ "<|3.16|>": 50523,
1161
+ "<|3.18|>": 50524,
1162
+ "<|3.20|>": 50525,
1163
+ "<|3.22|>": 50526,
1164
+ "<|3.24|>": 50527,
1165
+ "<|3.26|>": 50528,
1166
+ "<|3.28|>": 50529,
1167
+ "<|3.30|>": 50530,
1168
+ "<|3.32|>": 50531,
1169
+ "<|3.34|>": 50532,
1170
+ "<|3.36|>": 50533,
1171
+ "<|3.38|>": 50534,
1172
+ "<|3.40|>": 50535,
1173
+ "<|3.42|>": 50536,
1174
+ "<|3.44|>": 50537,
1175
+ "<|3.46|>": 50538,
1176
+ "<|3.48|>": 50539,
1177
+ "<|3.50|>": 50540,
1178
+ "<|3.52|>": 50541,
1179
+ "<|3.54|>": 50542,
1180
+ "<|3.56|>": 50543,
1181
+ "<|3.58|>": 50544,
1182
+ "<|3.60|>": 50545,
1183
+ "<|3.62|>": 50546,
1184
+ "<|3.64|>": 50547,
1185
+ "<|3.66|>": 50548,
1186
+ "<|3.68|>": 50549,
1187
+ "<|3.70|>": 50550,
1188
+ "<|3.72|>": 50551,
1189
+ "<|3.74|>": 50552,
1190
+ "<|3.76|>": 50553,
1191
+ "<|3.78|>": 50554,
1192
+ "<|3.80|>": 50555,
1193
+ "<|3.82|>": 50556,
1194
+ "<|3.84|>": 50557,
1195
+ "<|3.86|>": 50558,
1196
+ "<|3.88|>": 50559,
1197
+ "<|3.90|>": 50560,
1198
+ "<|3.92|>": 50561,
1199
+ "<|3.94|>": 50562,
1200
+ "<|3.96|>": 50563,
1201
+ "<|3.98|>": 50564,
1202
+ "<|30.00|>": 51865,
1203
+ "<|4.00|>": 50565,
1204
+ "<|4.02|>": 50566,
1205
+ "<|4.04|>": 50567,
1206
+ "<|4.06|>": 50568,
1207
+ "<|4.08|>": 50569,
1208
+ "<|4.10|>": 50570,
1209
+ "<|4.12|>": 50571,
1210
+ "<|4.14|>": 50572,
1211
+ "<|4.16|>": 50573,
1212
+ "<|4.18|>": 50574,
1213
+ "<|4.20|>": 50575,
1214
+ "<|4.22|>": 50576,
1215
+ "<|4.24|>": 50577,
1216
+ "<|4.26|>": 50578,
1217
+ "<|4.28|>": 50579,
1218
+ "<|4.30|>": 50580,
1219
+ "<|4.32|>": 50581,
1220
+ "<|4.34|>": 50582,
1221
+ "<|4.36|>": 50583,
1222
+ "<|4.38|>": 50584,
1223
+ "<|4.40|>": 50585,
1224
+ "<|4.42|>": 50586,
1225
+ "<|4.44|>": 50587,
1226
+ "<|4.46|>": 50588,
1227
+ "<|4.48|>": 50589,
1228
+ "<|4.50|>": 50590,
1229
+ "<|4.52|>": 50591,
1230
+ "<|4.54|>": 50592,
1231
+ "<|4.56|>": 50593,
1232
+ "<|4.58|>": 50594,
1233
+ "<|4.60|>": 50595,
1234
+ "<|4.62|>": 50596,
1235
+ "<|4.64|>": 50597,
1236
+ "<|4.66|>": 50598,
1237
+ "<|4.68|>": 50599,
1238
+ "<|4.70|>": 50600,
1239
+ "<|4.72|>": 50601,
1240
+ "<|4.74|>": 50602,
1241
+ "<|4.76|>": 50603,
1242
+ "<|4.78|>": 50604,
1243
+ "<|4.80|>": 50605,
1244
+ "<|4.82|>": 50606,
1245
+ "<|4.84|>": 50607,
1246
+ "<|4.86|>": 50608,
1247
+ "<|4.88|>": 50609,
1248
+ "<|4.90|>": 50610,
1249
+ "<|4.92|>": 50611,
1250
+ "<|4.94|>": 50612,
1251
+ "<|4.96|>": 50613,
1252
+ "<|4.98|>": 50614,
1253
+ "<|5.00|>": 50615,
1254
+ "<|5.02|>": 50616,
1255
+ "<|5.04|>": 50617,
1256
+ "<|5.06|>": 50618,
1257
+ "<|5.08|>": 50619,
1258
+ "<|5.10|>": 50620,
1259
+ "<|5.12|>": 50621,
1260
+ "<|5.14|>": 50622,
1261
+ "<|5.16|>": 50623,
1262
+ "<|5.18|>": 50624,
1263
+ "<|5.20|>": 50625,
1264
+ "<|5.22|>": 50626,
1265
+ "<|5.24|>": 50627,
1266
+ "<|5.26|>": 50628,
1267
+ "<|5.28|>": 50629,
1268
+ "<|5.30|>": 50630,
1269
+ "<|5.32|>": 50631,
1270
+ "<|5.34|>": 50632,
1271
+ "<|5.36|>": 50633,
1272
+ "<|5.38|>": 50634,
1273
+ "<|5.40|>": 50635,
1274
+ "<|5.42|>": 50636,
1275
+ "<|5.44|>": 50637,
1276
+ "<|5.46|>": 50638,
1277
+ "<|5.48|>": 50639,
1278
+ "<|5.50|>": 50640,
1279
+ "<|5.52|>": 50641,
1280
+ "<|5.54|>": 50642,
1281
+ "<|5.56|>": 50643,
1282
+ "<|5.58|>": 50644,
1283
+ "<|5.60|>": 50645,
1284
+ "<|5.62|>": 50646,
1285
+ "<|5.64|>": 50647,
1286
+ "<|5.66|>": 50648,
1287
+ "<|5.68|>": 50649,
1288
+ "<|5.70|>": 50650,
1289
+ "<|5.72|>": 50651,
1290
+ "<|5.74|>": 50652,
1291
+ "<|5.76|>": 50653,
1292
+ "<|5.78|>": 50654,
1293
+ "<|5.80|>": 50655,
1294
+ "<|5.82|>": 50656,
1295
+ "<|5.84|>": 50657,
1296
+ "<|5.86|>": 50658,
1297
+ "<|5.88|>": 50659,
1298
+ "<|5.90|>": 50660,
1299
+ "<|5.92|>": 50661,
1300
+ "<|5.94|>": 50662,
1301
+ "<|5.96|>": 50663,
1302
+ "<|5.98|>": 50664,
1303
+ "<|6.00|>": 50665,
1304
+ "<|6.02|>": 50666,
1305
+ "<|6.04|>": 50667,
1306
+ "<|6.06|>": 50668,
1307
+ "<|6.08|>": 50669,
1308
+ "<|6.10|>": 50670,
1309
+ "<|6.12|>": 50671,
1310
+ "<|6.14|>": 50672,
1311
+ "<|6.16|>": 50673,
1312
+ "<|6.18|>": 50674,
1313
+ "<|6.20|>": 50675,
1314
+ "<|6.22|>": 50676,
1315
+ "<|6.24|>": 50677,
1316
+ "<|6.26|>": 50678,
1317
+ "<|6.28|>": 50679,
1318
+ "<|6.30|>": 50680,
1319
+ "<|6.32|>": 50681,
1320
+ "<|6.34|>": 50682,
1321
+ "<|6.36|>": 50683,
1322
+ "<|6.38|>": 50684,
1323
+ "<|6.40|>": 50685,
1324
+ "<|6.42|>": 50686,
1325
+ "<|6.44|>": 50687,
1326
+ "<|6.46|>": 50688,
1327
+ "<|6.48|>": 50689,
1328
+ "<|6.50|>": 50690,
1329
+ "<|6.52|>": 50691,
1330
+ "<|6.54|>": 50692,
1331
+ "<|6.56|>": 50693,
1332
+ "<|6.58|>": 50694,
1333
+ "<|6.60|>": 50695,
1334
+ "<|6.62|>": 50696,
1335
+ "<|6.64|>": 50697,
1336
+ "<|6.66|>": 50698,
1337
+ "<|6.68|>": 50699,
1338
+ "<|6.70|>": 50700,
1339
+ "<|6.72|>": 50701,
1340
+ "<|6.74|>": 50702,
1341
+ "<|6.76|>": 50703,
1342
+ "<|6.78|>": 50704,
1343
+ "<|6.80|>": 50705,
1344
+ "<|6.82|>": 50706,
1345
+ "<|6.84|>": 50707,
1346
+ "<|6.86|>": 50708,
1347
+ "<|6.88|>": 50709,
1348
+ "<|6.90|>": 50710,
1349
+ "<|6.92|>": 50711,
1350
+ "<|6.94|>": 50712,
1351
+ "<|6.96|>": 50713,
1352
+ "<|6.98|>": 50714,
1353
+ "<|7.00|>": 50715,
1354
+ "<|7.02|>": 50716,
1355
+ "<|7.04|>": 50717,
1356
+ "<|7.06|>": 50718,
1357
+ "<|7.08|>": 50719,
1358
+ "<|7.10|>": 50720,
1359
+ "<|7.12|>": 50721,
1360
+ "<|7.14|>": 50722,
1361
+ "<|7.16|>": 50723,
1362
+ "<|7.18|>": 50724,
1363
+ "<|7.20|>": 50725,
1364
+ "<|7.22|>": 50726,
1365
+ "<|7.24|>": 50727,
1366
+ "<|7.26|>": 50728,
1367
+ "<|7.28|>": 50729,
1368
+ "<|7.30|>": 50730,
1369
+ "<|7.32|>": 50731,
1370
+ "<|7.34|>": 50732,
1371
+ "<|7.36|>": 50733,
1372
+ "<|7.38|>": 50734,
1373
+ "<|7.40|>": 50735,
1374
+ "<|7.42|>": 50736,
1375
+ "<|7.44|>": 50737,
1376
+ "<|7.46|>": 50738,
1377
+ "<|7.48|>": 50739,
1378
+ "<|7.50|>": 50740,
1379
+ "<|7.52|>": 50741,
1380
+ "<|7.54|>": 50742,
1381
+ "<|7.56|>": 50743,
1382
+ "<|7.58|>": 50744,
1383
+ "<|7.60|>": 50745,
1384
+ "<|7.62|>": 50746,
1385
+ "<|7.64|>": 50747,
1386
+ "<|7.66|>": 50748,
1387
+ "<|7.68|>": 50749,
1388
+ "<|7.70|>": 50750,
1389
+ "<|7.72|>": 50751,
1390
+ "<|7.74|>": 50752,
1391
+ "<|7.76|>": 50753,
1392
+ "<|7.78|>": 50754,
1393
+ "<|7.80|>": 50755,
1394
+ "<|7.82|>": 50756,
1395
+ "<|7.84|>": 50757,
1396
+ "<|7.86|>": 50758,
1397
+ "<|7.88|>": 50759,
1398
+ "<|7.90|>": 50760,
1399
+ "<|7.92|>": 50761,
1400
+ "<|7.94|>": 50762,
1401
+ "<|7.96|>": 50763,
1402
+ "<|7.98|>": 50764,
1403
+ "<|8.00|>": 50765,
1404
+ "<|8.02|>": 50766,
1405
+ "<|8.04|>": 50767,
1406
+ "<|8.06|>": 50768,
1407
+ "<|8.08|>": 50769,
1408
+ "<|8.10|>": 50770,
1409
+ "<|8.12|>": 50771,
1410
+ "<|8.14|>": 50772,
1411
+ "<|8.16|>": 50773,
1412
+ "<|8.18|>": 50774,
1413
+ "<|8.20|>": 50775,
1414
+ "<|8.22|>": 50776,
1415
+ "<|8.24|>": 50777,
1416
+ "<|8.26|>": 50778,
1417
+ "<|8.28|>": 50779,
1418
+ "<|8.30|>": 50780,
1419
+ "<|8.32|>": 50781,
1420
+ "<|8.34|>": 50782,
1421
+ "<|8.36|>": 50783,
1422
+ "<|8.38|>": 50784,
1423
+ "<|8.40|>": 50785,
1424
+ "<|8.42|>": 50786,
1425
+ "<|8.44|>": 50787,
1426
+ "<|8.46|>": 50788,
1427
+ "<|8.48|>": 50789,
1428
+ "<|8.50|>": 50790,
1429
+ "<|8.52|>": 50791,
1430
+ "<|8.54|>": 50792,
1431
+ "<|8.56|>": 50793,
1432
+ "<|8.58|>": 50794,
1433
+ "<|8.60|>": 50795,
1434
+ "<|8.62|>": 50796,
1435
+ "<|8.64|>": 50797,
1436
+ "<|8.66|>": 50798,
1437
+ "<|8.68|>": 50799,
1438
+ "<|8.70|>": 50800,
1439
+ "<|8.72|>": 50801,
1440
+ "<|8.74|>": 50802,
1441
+ "<|8.76|>": 50803,
1442
+ "<|8.78|>": 50804,
1443
+ "<|8.80|>": 50805,
1444
+ "<|8.82|>": 50806,
1445
+ "<|8.84|>": 50807,
1446
+ "<|8.86|>": 50808,
1447
+ "<|8.88|>": 50809,
1448
+ "<|8.90|>": 50810,
1449
+ "<|8.92|>": 50811,
1450
+ "<|8.94|>": 50812,
1451
+ "<|8.96|>": 50813,
1452
+ "<|8.98|>": 50814,
1453
+ "<|9.00|>": 50815,
1454
+ "<|9.02|>": 50816,
1455
+ "<|9.04|>": 50817,
1456
+ "<|9.06|>": 50818,
1457
+ "<|9.08|>": 50819,
1458
+ "<|9.10|>": 50820,
1459
+ "<|9.12|>": 50821,
1460
+ "<|9.14|>": 50822,
1461
+ "<|9.16|>": 50823,
1462
+ "<|9.18|>": 50824,
1463
+ "<|9.20|>": 50825,
1464
+ "<|9.22|>": 50826,
1465
+ "<|9.24|>": 50827,
1466
+ "<|9.26|>": 50828,
1467
+ "<|9.28|>": 50829,
1468
+ "<|9.30|>": 50830,
1469
+ "<|9.32|>": 50831,
1470
+ "<|9.34|>": 50832,
1471
+ "<|9.36|>": 50833,
1472
+ "<|9.38|>": 50834,
1473
+ "<|9.40|>": 50835,
1474
+ "<|9.42|>": 50836,
1475
+ "<|9.44|>": 50837,
1476
+ "<|9.46|>": 50838,
1477
+ "<|9.48|>": 50839,
1478
+ "<|9.50|>": 50840,
1479
+ "<|9.52|>": 50841,
1480
+ "<|9.54|>": 50842,
1481
+ "<|9.56|>": 50843,
1482
+ "<|9.58|>": 50844,
1483
+ "<|9.60|>": 50845,
1484
+ "<|9.62|>": 50846,
1485
+ "<|9.64|>": 50847,
1486
+ "<|9.66|>": 50848,
1487
+ "<|9.68|>": 50849,
1488
+ "<|9.70|>": 50850,
1489
+ "<|9.72|>": 50851,
1490
+ "<|9.74|>": 50852,
1491
+ "<|9.76|>": 50853,
1492
+ "<|9.78|>": 50854,
1493
+ "<|9.80|>": 50855,
1494
+ "<|9.82|>": 50856,
1495
+ "<|9.84|>": 50857,
1496
+ "<|9.86|>": 50858,
1497
+ "<|9.88|>": 50859,
1498
+ "<|9.90|>": 50860,
1499
+ "<|9.92|>": 50861,
1500
+ "<|9.94|>": 50862,
1501
+ "<|9.96|>": 50863,
1502
+ "<|9.98|>": 50864,
1503
+ "<|af|>": 50327,
1504
+ "<|am|>": 50334,
1505
+ "<|ar|>": 50272,
1506
+ "<|as|>": 50350,
1507
+ "<|az|>": 50304,
1508
+ "<|ba|>": 50355,
1509
+ "<|be|>": 50330,
1510
+ "<|bg|>": 50292,
1511
+ "<|bn|>": 50302,
1512
+ "<|bo|>": 50347,
1513
+ "<|br|>": 50309,
1514
+ "<|bs|>": 50315,
1515
+ "<|ca|>": 50270,
1516
+ "<|cs|>": 50283,
1517
+ "<|cy|>": 50297,
1518
+ "<|da|>": 50285,
1519
+ "<|de|>": 50261,
1520
+ "<|el|>": 50281,
1521
+ "<|endoftext|>": 50257,
1522
+ "<|en|>": 50259,
1523
+ "<|es|>": 50262,
1524
+ "<|et|>": 50307,
1525
+ "<|eu|>": 50310,
1526
+ "<|fa|>": 50300,
1527
+ "<|fi|>": 50277,
1528
+ "<|fo|>": 50338,
1529
+ "<|fr|>": 50265,
1530
+ "<|gl|>": 50319,
1531
+ "<|gu|>": 50333,
1532
+ "<|haw|>": 50352,
1533
+ "<|ha|>": 50354,
1534
+ "<|he|>": 50279,
1535
+ "<|hi|>": 50276,
1536
+ "<|hr|>": 50291,
1537
+ "<|ht|>": 50339,
1538
+ "<|hu|>": 50286,
1539
+ "<|hy|>": 50312,
1540
+ "<|id|>": 50275,
1541
+ "<|is|>": 50311,
1542
+ "<|it|>": 50274,
1543
+ "<|ja|>": 50266,
1544
+ "<|jw|>": 50356,
1545
+ "<|ka|>": 50329,
1546
+ "<|kk|>": 50316,
1547
+ "<|km|>": 50323,
1548
+ "<|kn|>": 50306,
1549
+ "<|ko|>": 50264,
1550
+ "<|la|>": 50294,
1551
+ "<|lb|>": 50345,
1552
+ "<|ln|>": 50353,
1553
+ "<|lo|>": 50336,
1554
+ "<|lt|>": 50293,
1555
+ "<|lv|>": 50301,
1556
+ "<|mg|>": 50349,
1557
+ "<|mi|>": 50295,
1558
+ "<|mk|>": 50308,
1559
+ "<|ml|>": 50296,
1560
+ "<|mn|>": 50314,
1561
+ "<|mr|>": 50320,
1562
+ "<|ms|>": 50282,
1563
+ "<|mt|>": 50343,
1564
+ "<|my|>": 50346,
1565
+ "<|ne|>": 50313,
1566
+ "<|nl|>": 50271,
1567
+ "<|nn|>": 50342,
1568
+ "<|nospeech|>": 50363,
1569
+ "<|notimestamps|>": 50364,
1570
+ "<|no|>": 50288,
1571
+ "<|oc|>": 50328,
1572
+ "<|pa|>": 50321,
1573
+ "<|pl|>": 50269,
1574
+ "<|ps|>": 50340,
1575
+ "<|pt|>": 50267,
1576
+ "<|ro|>": 50284,
1577
+ "<|ru|>": 50263,
1578
+ "<|sa|>": 50344,
1579
+ "<|sd|>": 50332,
1580
+ "<|si|>": 50322,
1581
+ "<|sk|>": 50298,
1582
+ "<|sl|>": 50305,
1583
+ "<|sn|>": 50324,
1584
+ "<|so|>": 50326,
1585
+ "<|sq|>": 50317,
1586
+ "<|sr|>": 50303,
1587
+ "<|startoflm|>": 50361,
1588
+ "<|startofprev|>": 50362,
1589
+ "<|startoftranscript|>": 50258,
1590
+ "<|su|>": 50357,
1591
+ "<|sv|>": 50273,
1592
+ "<|sw|>": 50318,
1593
+ "<|ta|>": 50287,
1594
+ "<|te|>": 50299,
1595
+ "<|tg|>": 50331,
1596
+ "<|th|>": 50289,
1597
+ "<|tk|>": 50341,
1598
+ "<|tl|>": 50348,
1599
+ "<|transcribe|>": 50360,
1600
+ "<|translate|>": 50359,
1601
+ "<|tr|>": 50268,
1602
+ "<|tt|>": 50351,
1603
+ "<|uk|>": 50280,
1604
+ "<|ur|>": 50290,
1605
+ "<|uz|>": 50337,
1606
+ "<|vi|>": 50278,
1607
+ "<|yi|>": 50335,
1608
+ "<|yo|>": 50325,
1609
+ "<|yue|>": 50358,
1610
+ "<|zh|>": 50260
1611
+ }
checkpoint-1750/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1750/normalizer.json ADDED
@@ -0,0 +1,1742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accessorise": "accessorize",
3
+ "accessorised": "accessorized",
4
+ "accessorises": "accessorizes",
5
+ "accessorising": "accessorizing",
6
+ "acclimatisation": "acclimatization",
7
+ "acclimatise": "acclimatize",
8
+ "acclimatised": "acclimatized",
9
+ "acclimatises": "acclimatizes",
10
+ "acclimatising": "acclimatizing",
11
+ "accoutrements": "accouterments",
12
+ "aeon": "eon",
13
+ "aeons": "eons",
14
+ "aerogramme": "aerogram",
15
+ "aerogrammes": "aerograms",
16
+ "aeroplane": "airplane",
17
+ "aeroplanes": "airplanes",
18
+ "aesthete": "esthete",
19
+ "aesthetes": "esthetes",
20
+ "aesthetic": "esthetic",
21
+ "aesthetically": "esthetically",
22
+ "aesthetics": "esthetics",
23
+ "aetiology": "etiology",
24
+ "ageing": "aging",
25
+ "aggrandisement": "aggrandizement",
26
+ "agonise": "agonize",
27
+ "agonised": "agonized",
28
+ "agonises": "agonizes",
29
+ "agonising": "agonizing",
30
+ "agonisingly": "agonizingly",
31
+ "almanack": "almanac",
32
+ "almanacks": "almanacs",
33
+ "aluminium": "aluminum",
34
+ "amortisable": "amortizable",
35
+ "amortisation": "amortization",
36
+ "amortisations": "amortizations",
37
+ "amortise": "amortize",
38
+ "amortised": "amortized",
39
+ "amortises": "amortizes",
40
+ "amortising": "amortizing",
41
+ "amphitheatre": "amphitheater",
42
+ "amphitheatres": "amphitheaters",
43
+ "anaemia": "anemia",
44
+ "anaemic": "anemic",
45
+ "anaesthesia": "anesthesia",
46
+ "anaesthetic": "anesthetic",
47
+ "anaesthetics": "anesthetics",
48
+ "anaesthetise": "anesthetize",
49
+ "anaesthetised": "anesthetized",
50
+ "anaesthetises": "anesthetizes",
51
+ "anaesthetising": "anesthetizing",
52
+ "anaesthetist": "anesthetist",
53
+ "anaesthetists": "anesthetists",
54
+ "anaesthetize": "anesthetize",
55
+ "anaesthetized": "anesthetized",
56
+ "anaesthetizes": "anesthetizes",
57
+ "anaesthetizing": "anesthetizing",
58
+ "analogue": "analog",
59
+ "analogues": "analogs",
60
+ "analyse": "analyze",
61
+ "analysed": "analyzed",
62
+ "analyses": "analyzes",
63
+ "analysing": "analyzing",
64
+ "anglicise": "anglicize",
65
+ "anglicised": "anglicized",
66
+ "anglicises": "anglicizes",
67
+ "anglicising": "anglicizing",
68
+ "annualised": "annualized",
69
+ "antagonise": "antagonize",
70
+ "antagonised": "antagonized",
71
+ "antagonises": "antagonizes",
72
+ "antagonising": "antagonizing",
73
+ "apologise": "apologize",
74
+ "apologised": "apologized",
75
+ "apologises": "apologizes",
76
+ "apologising": "apologizing",
77
+ "appal": "appall",
78
+ "appals": "appalls",
79
+ "appetiser": "appetizer",
80
+ "appetisers": "appetizers",
81
+ "appetising": "appetizing",
82
+ "appetisingly": "appetizingly",
83
+ "arbour": "arbor",
84
+ "arbours": "arbors",
85
+ "archaeologically": "archeologically",
86
+ "archaeologist": "archeologist",
87
+ "archaeologists": "archeologists",
88
+ "archaeology": "archeology</span>",
89
+ "archeological": "archaeological",
90
+ "ardour": "ardor",
91
+ "armour": "armor",
92
+ "armoured": "armored",
93
+ "armourer": "armorer",
94
+ "armourers": "armorers",
95
+ "armouries": "armories",
96
+ "armoury": "armory",
97
+ "artefact": "artifact",
98
+ "artefacts": "artifacts",
99
+ "authorise": "authorize",
100
+ "authorised": "authorized",
101
+ "authorises": "authorizes",
102
+ "authorising": "authorizing",
103
+ "axe": "ax",
104
+ "backpedalled": "backpedaled",
105
+ "backpedalling": "backpedaling",
106
+ "bannister": "banister",
107
+ "bannisters": "banisters",
108
+ "baptise": "baptize",
109
+ "baptised": "baptized",
110
+ "baptises": "baptizes",
111
+ "baptising": "baptizing",
112
+ "bastardise": "bastardize",
113
+ "bastardised": "bastardized",
114
+ "bastardises": "bastardizes",
115
+ "bastardising": "bastardizing",
116
+ "battleax": "battleaxe",
117
+ "baulk": "balk",
118
+ "baulked": "balked",
119
+ "baulking": "balking",
120
+ "baulks": "balks",
121
+ "bedevilled": "bedeviled",
122
+ "bedevilling": "bedeviling",
123
+ "behaviour": "behavior",
124
+ "behavioural": "behavioral",
125
+ "behaviourism": "behaviorism",
126
+ "behaviourist": "behaviorist",
127
+ "behaviourists": "behaviorists",
128
+ "behaviours": "behaviors",
129
+ "behove": "behoove",
130
+ "behoved": "behooved",
131
+ "behoves": "behooves",
132
+ "bejewelled": "bejeweled",
133
+ "belabour": "belabor",
134
+ "belaboured": "belabored",
135
+ "belabouring": "belaboring",
136
+ "belabours": "belabors",
137
+ "bevelled": "beveled",
138
+ "bevvies": "bevies",
139
+ "bevvy": "bevy",
140
+ "biassed": "biased",
141
+ "biassing": "biasing",
142
+ "bingeing": "binging",
143
+ "bougainvillaea": "bougainvillea",
144
+ "bougainvillaeas": "bougainvilleas",
145
+ "bowdlerise": "bowdlerize",
146
+ "bowdlerised": "bowdlerized",
147
+ "bowdlerises": "bowdlerizes",
148
+ "bowdlerising": "bowdlerizing",
149
+ "breathalyse": "breathalyze",
150
+ "breathalysed": "breathalyzed",
151
+ "breathalyser": "breathalyzer",
152
+ "breathalysers": "breathalyzers",
153
+ "breathalyses": "breathalyzes",
154
+ "breathalysing": "breathalyzing",
155
+ "brutalise": "brutalize",
156
+ "brutalised": "brutalized",
157
+ "brutalises": "brutalizes",
158
+ "brutalising": "brutalizing",
159
+ "busses": "buses",
160
+ "bussing": "busing",
161
+ "caesarean": "cesarean",
162
+ "caesareans": "cesareans",
163
+ "calibre": "caliber",
164
+ "calibres": "calibers",
165
+ "calliper": "caliper",
166
+ "callipers": "calipers",
167
+ "callisthenics": "calisthenics",
168
+ "canalise": "canalize",
169
+ "canalised": "canalized",
170
+ "canalises": "canalizes",
171
+ "canalising": "canalizing",
172
+ "cancelation": "cancellation",
173
+ "cancelations": "cancellations",
174
+ "cancelled": "canceled",
175
+ "cancelling": "canceling",
176
+ "candour": "candor",
177
+ "cannibalise": "cannibalize",
178
+ "cannibalised": "cannibalized",
179
+ "cannibalises": "cannibalizes",
180
+ "cannibalising": "cannibalizing",
181
+ "canonise": "canonize",
182
+ "canonised": "canonized",
183
+ "canonises": "canonizes",
184
+ "canonising": "canonizing",
185
+ "capitalise": "capitalize",
186
+ "capitalised": "capitalized",
187
+ "capitalises": "capitalizes",
188
+ "capitalising": "capitalizing",
189
+ "caramelise": "caramelize",
190
+ "caramelised": "caramelized",
191
+ "caramelises": "caramelizes",
192
+ "caramelising": "caramelizing",
193
+ "carbonise": "carbonize",
194
+ "carbonised": "carbonized",
195
+ "carbonises": "carbonizes",
196
+ "carbonising": "carbonizing",
197
+ "carolled": "caroled",
198
+ "carolling": "caroling",
199
+ "catalogue": "catalog",
200
+ "catalogued": "cataloged",
201
+ "catalogues": "catalogs",
202
+ "cataloguing": "cataloging",
203
+ "catalyse": "catalyze",
204
+ "catalysed": "catalyzed",
205
+ "catalyses": "catalyzes",
206
+ "catalysing": "catalyzing",
207
+ "categorise": "categorize",
208
+ "categorised": "categorized",
209
+ "categorises": "categorizes",
210
+ "categorising": "categorizing",
211
+ "cauterise": "cauterize",
212
+ "cauterised": "cauterized",
213
+ "cauterises": "cauterizes",
214
+ "cauterising": "cauterizing",
215
+ "cavilled": "caviled",
216
+ "cavilling": "caviling",
217
+ "centigramme": "centigram",
218
+ "centigrammes": "centigrams",
219
+ "centilitre": "centiliter",
220
+ "centilitres": "centiliters",
221
+ "centimetre": "centimeter",
222
+ "centimetres": "centimeters",
223
+ "centralise": "centralize",
224
+ "centralised": "centralized",
225
+ "centralises": "centralizes",
226
+ "centralising": "centralizing",
227
+ "centre": "center",
228
+ "centred": "centered",
229
+ "centrefold": "centerfold",
230
+ "centrefolds": "centerfolds",
231
+ "centrepiece": "centerpiece",
232
+ "centrepieces": "centerpieces",
233
+ "centres": "centers",
234
+ "channelled": "channeled",
235
+ "channelling": "channeling",
236
+ "characterise": "characterize",
237
+ "characterised": "characterized",
238
+ "characterises": "characterizes",
239
+ "characterising": "characterizing",
240
+ "cheque": "check",
241
+ "chequebook": "checkbook",
242
+ "chequebooks": "checkbooks",
243
+ "chequered": "checkered",
244
+ "cheques": "checks",
245
+ "chilli": "chili",
246
+ "chimaera": "chimera",
247
+ "chimaeras": "chimeras",
248
+ "chiselled": "chiseled",
249
+ "chiselling": "chiseling",
250
+ "circularise": "circularize",
251
+ "circularised": "circularized",
252
+ "circularises": "circularizes",
253
+ "circularising": "circularizing",
254
+ "civilise": "civilize",
255
+ "civilised": "civilized",
256
+ "civilises": "civilizes",
257
+ "civilising": "civilizing",
258
+ "clamour": "clamor",
259
+ "clamoured": "clamored",
260
+ "clamouring": "clamoring",
261
+ "clamours": "clamors",
262
+ "clangour": "clangor",
263
+ "clarinettist": "clarinetist",
264
+ "clarinettists": "clarinetists",
265
+ "collectivise": "collectivize",
266
+ "collectivised": "collectivized",
267
+ "collectivises": "collectivizes",
268
+ "collectivising": "collectivizing",
269
+ "colonisation": "colonization",
270
+ "colonise": "colonize",
271
+ "colonised": "colonized",
272
+ "coloniser": "colonizer",
273
+ "colonisers": "colonizers",
274
+ "colonises": "colonizes",
275
+ "colonising": "colonizing",
276
+ "colour": "color",
277
+ "colourant": "colorant",
278
+ "colourants": "colorants",
279
+ "coloured": "colored",
280
+ "coloureds": "coloreds",
281
+ "colourful": "colorful",
282
+ "colourfully": "colorfully",
283
+ "colouring": "coloring",
284
+ "colourize": "colorize",
285
+ "colourized": "colorized",
286
+ "colourizes": "colorizes",
287
+ "colourizing": "colorizing",
288
+ "colourless": "colorless",
289
+ "colours": "colors",
290
+ "commercialise": "commercialize",
291
+ "commercialised": "commercialized",
292
+ "commercialises": "commercializes",
293
+ "commercialising": "commercializing",
294
+ "compartmentalise": "compartmentalize",
295
+ "compartmentalised": "compartmentalized",
296
+ "compartmentalises": "compartmentalizes",
297
+ "compartmentalising": "compartmentalizing",
298
+ "computerise": "computerize",
299
+ "computerised": "computerized",
300
+ "computerises": "computerizes",
301
+ "computerising": "computerizing",
302
+ "conceptualise": "conceptualize",
303
+ "conceptualised": "conceptualized",
304
+ "conceptualises": "conceptualizes",
305
+ "conceptualising": "conceptualizing",
306
+ "connexion": "connection",
307
+ "connexions": "connections",
308
+ "contextualise": "contextualize",
309
+ "contextualised": "contextualized",
310
+ "contextualises": "contextualizes",
311
+ "contextualising": "contextualizing",
312
+ "cosier": "cozier",
313
+ "cosies": "cozies",
314
+ "cosiest": "coziest",
315
+ "cosily": "cozily",
316
+ "cosiness": "coziness",
317
+ "cosy": "cozy",
318
+ "councillor": "councilor",
319
+ "councillors": "councilors",
320
+ "counselled": "counseled",
321
+ "counselling": "counseling",
322
+ "counsellor": "counselor",
323
+ "counsellors": "counselors",
324
+ "crenelated": "crenellated",
325
+ "criminalise": "criminalize",
326
+ "criminalised": "criminalized",
327
+ "criminalises": "criminalizes",
328
+ "criminalising": "criminalizing",
329
+ "criticise": "criticize",
330
+ "criticised": "criticized",
331
+ "criticises": "criticizes",
332
+ "criticising": "criticizing",
333
+ "crueller": "crueler",
334
+ "cruellest": "cruelest",
335
+ "crystallisation": "crystallization",
336
+ "crystallise": "crystallize",
337
+ "crystallised": "crystallized",
338
+ "crystallises": "crystallizes",
339
+ "crystallising": "crystallizing",
340
+ "cudgelled": "cudgeled",
341
+ "cudgelling": "cudgeling",
342
+ "customise": "customize",
343
+ "customised": "customized",
344
+ "customises": "customizes",
345
+ "customising": "customizing",
346
+ "cypher": "cipher",
347
+ "cyphers": "ciphers",
348
+ "decentralisation": "decentralization",
349
+ "decentralise": "decentralize",
350
+ "decentralised": "decentralized",
351
+ "decentralises": "decentralizes",
352
+ "decentralising": "decentralizing",
353
+ "decriminalisation": "decriminalization",
354
+ "decriminalise": "decriminalize",
355
+ "decriminalised": "decriminalized",
356
+ "decriminalises": "decriminalizes",
357
+ "decriminalising": "decriminalizing",
358
+ "defence": "defense",
359
+ "defenceless": "defenseless",
360
+ "defences": "defenses",
361
+ "dehumanisation": "dehumanization",
362
+ "dehumanise": "dehumanize",
363
+ "dehumanised": "dehumanized",
364
+ "dehumanises": "dehumanizes",
365
+ "dehumanising": "dehumanizing",
366
+ "demeanour": "demeanor",
367
+ "demilitarisation": "demilitarization",
368
+ "demilitarise": "demilitarize",
369
+ "demilitarised": "demilitarized",
370
+ "demilitarises": "demilitarizes",
371
+ "demilitarising": "demilitarizing",
372
+ "demobilisation": "demobilization",
373
+ "demobilise": "demobilize",
374
+ "demobilised": "demobilized",
375
+ "demobilises": "demobilizes",
376
+ "demobilising": "demobilizing",
377
+ "democratisation": "democratization",
378
+ "democratise": "democratize",
379
+ "democratised": "democratized",
380
+ "democratises": "democratizes",
381
+ "democratising": "democratizing",
382
+ "demonise": "demonize",
383
+ "demonised": "demonized",
384
+ "demonises": "demonizes",
385
+ "demonising": "demonizing",
386
+ "demoralisation": "demoralization",
387
+ "demoralise": "demoralize",
388
+ "demoralised": "demoralized",
389
+ "demoralises": "demoralizes",
390
+ "demoralising": "demoralizing",
391
+ "denationalisation": "denationalization",
392
+ "denationalise": "denationalize",
393
+ "denationalised": "denationalized",
394
+ "denationalises": "denationalizes",
395
+ "denationalising": "denationalizing",
396
+ "deodorise": "deodorize",
397
+ "deodorised": "deodorized",
398
+ "deodorises": "deodorizes",
399
+ "deodorising": "deodorizing",
400
+ "depersonalise": "depersonalize",
401
+ "depersonalised": "depersonalized",
402
+ "depersonalises": "depersonalizes",
403
+ "depersonalising": "depersonalizing",
404
+ "deputise": "deputize",
405
+ "deputised": "deputized",
406
+ "deputises": "deputizes",
407
+ "deputising": "deputizing",
408
+ "desensitisation": "desensitization",
409
+ "desensitise": "desensitize",
410
+ "desensitised": "desensitized",
411
+ "desensitises": "desensitizes",
412
+ "desensitising": "desensitizing",
413
+ "destabilisation": "destabilization",
414
+ "destabilise": "destabilize",
415
+ "destabilised": "destabilized",
416
+ "destabilises": "destabilizes",
417
+ "destabilising": "destabilizing",
418
+ "dialled": "dialed",
419
+ "dialling": "dialing",
420
+ "dialogue": "dialog",
421
+ "dialogues": "dialogs",
422
+ "diarrhoea": "diarrhea",
423
+ "digitise": "digitize",
424
+ "digitised": "digitized",
425
+ "digitises": "digitizes",
426
+ "digitising": "digitizing",
427
+ "disc": "disk",
428
+ "discolour": "discolor",
429
+ "discoloured": "discolored",
430
+ "discolouring": "discoloring",
431
+ "discolours": "discolors",
432
+ "discs": "disks",
433
+ "disembowelled": "disemboweled",
434
+ "disembowelling": "disemboweling",
435
+ "disfavour": "disfavor",
436
+ "dishevelled": "disheveled",
437
+ "dishonour": "dishonor",
438
+ "dishonourable": "dishonorable",
439
+ "dishonourably": "dishonorably",
440
+ "dishonoured": "dishonored",
441
+ "dishonouring": "dishonoring",
442
+ "dishonours": "dishonors",
443
+ "disorganisation": "disorganization",
444
+ "disorganised": "disorganized",
445
+ "distil": "distill",
446
+ "distils": "distills",
447
+ "dramatisation": "dramatization",
448
+ "dramatisations": "dramatizations",
449
+ "dramatise": "dramatize",
450
+ "dramatised": "dramatized",
451
+ "dramatises": "dramatizes",
452
+ "dramatising": "dramatizing",
453
+ "draught": "draft",
454
+ "draughtboard": "draftboard",
455
+ "draughtboards": "draftboards",
456
+ "draughtier": "draftier",
457
+ "draughtiest": "draftiest",
458
+ "draughts": "drafts",
459
+ "draughtsman": "draftsman",
460
+ "draughtsmanship": "draftsmanship",
461
+ "draughtsmen": "draftsmen",
462
+ "draughtswoman": "draftswoman",
463
+ "draughtswomen": "draftswomen",
464
+ "draughty": "drafty",
465
+ "drivelled": "driveled",
466
+ "drivelling": "driveling",
467
+ "duelled": "dueled",
468
+ "duelling": "dueling",
469
+ "economise": "economize",
470
+ "economised": "economized",
471
+ "economises": "economizes",
472
+ "economising": "economizing",
473
+ "editorialise": "editorialize",
474
+ "editorialised": "editorialized",
475
+ "editorialises": "editorializes",
476
+ "editorialising": "editorializing",
477
+ "edoema": "edema",
478
+ "empathise": "empathize",
479
+ "empathised": "empathized",
480
+ "empathises": "empathizes",
481
+ "empathising": "empathizing",
482
+ "emphasise": "emphasize",
483
+ "emphasised": "emphasized",
484
+ "emphasises": "emphasizes",
485
+ "emphasising": "emphasizing",
486
+ "enamelled": "enameled",
487
+ "enamelling": "enameling",
488
+ "enamoured": "enamored",
489
+ "encyclopaedia": "encyclopedia",
490
+ "encyclopaedias": "encyclopedias",
491
+ "encyclopaedic": "encyclopedic",
492
+ "endeavour": "endeavor",
493
+ "endeavoured": "endeavored",
494
+ "endeavouring": "endeavoring",
495
+ "endeavours": "endeavors",
496
+ "energise": "energize",
497
+ "energised": "energized",
498
+ "energises": "energizes",
499
+ "energising": "energizing",
500
+ "enrol": "enroll",
501
+ "enrols": "enrolls",
502
+ "enthral": "enthrall",
503
+ "enthrals": "enthralls",
504
+ "epaulette": "epaulet",
505
+ "epaulettes": "epaulets",
506
+ "epicentre": "epicenter",
507
+ "epicentres": "epicenters",
508
+ "epilogue": "epilog",
509
+ "epilogues": "epilogs",
510
+ "epitomise": "epitomize",
511
+ "epitomised": "epitomized",
512
+ "epitomises": "epitomizes",
513
+ "epitomising": "epitomizing",
514
+ "equalisation": "equalization",
515
+ "equalise": "equalize",
516
+ "equalised": "equalized",
517
+ "equaliser": "equalizer",
518
+ "equalisers": "equalizers",
519
+ "equalises": "equalizes",
520
+ "equalising": "equalizing",
521
+ "eulogise": "eulogize",
522
+ "eulogised": "eulogized",
523
+ "eulogises": "eulogizes",
524
+ "eulogising": "eulogizing",
525
+ "evangelise": "evangelize",
526
+ "evangelised": "evangelized",
527
+ "evangelises": "evangelizes",
528
+ "evangelising": "evangelizing",
529
+ "exorcise": "exorcize",
530
+ "exorcised": "exorcized",
531
+ "exorcises": "exorcizes",
532
+ "exorcising": "exorcizing",
533
+ "extemporisation": "extemporization",
534
+ "extemporise": "extemporize",
535
+ "extemporised": "extemporized",
536
+ "extemporises": "extemporizes",
537
+ "extemporising": "extemporizing",
538
+ "externalisation": "externalization",
539
+ "externalisations": "externalizations",
540
+ "externalise": "externalize",
541
+ "externalised": "externalized",
542
+ "externalises": "externalizes",
543
+ "externalising": "externalizing",
544
+ "factorise": "factorize",
545
+ "factorised": "factorized",
546
+ "factorises": "factorizes",
547
+ "factorising": "factorizing",
548
+ "faecal": "fecal",
549
+ "faeces": "feces",
550
+ "familiarisation": "familiarization",
551
+ "familiarise": "familiarize",
552
+ "familiarised": "familiarized",
553
+ "familiarises": "familiarizes",
554
+ "familiarising": "familiarizing",
555
+ "fantasise": "fantasize",
556
+ "fantasised": "fantasized",
557
+ "fantasises": "fantasizes",
558
+ "fantasising": "fantasizing",
559
+ "favour": "favor",
560
+ "favourable": "favorable",
561
+ "favourably": "favorably",
562
+ "favoured": "favored",
563
+ "favouring": "favoring",
564
+ "favourite": "favorite",
565
+ "favourites": "favorites",
566
+ "favouritism": "favoritism",
567
+ "favours": "favors",
568
+ "feminise": "feminize",
569
+ "feminised": "feminized",
570
+ "feminises": "feminizes",
571
+ "feminising": "feminizing",
572
+ "fertilisation": "fertilization",
573
+ "fertilise": "fertilize",
574
+ "fertilised": "fertilized",
575
+ "fertiliser": "fertilizer",
576
+ "fertilisers": "fertilizers",
577
+ "fertilises": "fertilizes",
578
+ "fertilising": "fertilizing",
579
+ "fervour": "fervor",
580
+ "fibre": "fiber",
581
+ "fibreglass": "fiberglass",
582
+ "fibres": "fibers",
583
+ "fictionalisation": "fictionalization",
584
+ "fictionalisations": "fictionalizations",
585
+ "fictionalise": "fictionalize",
586
+ "fictionalised": "fictionalized",
587
+ "fictionalises": "fictionalizes",
588
+ "fictionalising": "fictionalizing",
589
+ "fillet": "filet",
590
+ "filleted": "fileted",
591
+ "filleting": "fileting",
592
+ "fillets": "filets",
593
+ "finalisation": "finalization",
594
+ "finalise": "finalize",
595
+ "finalised": "finalized",
596
+ "finalises": "finalizes",
597
+ "finalising": "finalizing",
598
+ "flautist": "flutist",
599
+ "flautists": "flutists",
600
+ "flavour": "flavor",
601
+ "flavoured": "flavored",
602
+ "flavouring": "flavoring",
603
+ "flavourings": "flavorings",
604
+ "flavourless": "flavorless",
605
+ "flavours": "flavors",
606
+ "flavoursome": "flavorsome",
607
+ "flyer / flier": "flier / flyer",
608
+ "foetal": "fetal",
609
+ "foetid": "fetid",
610
+ "foetus": "fetus",
611
+ "foetuses": "fetuses",
612
+ "formalisation": "formalization",
613
+ "formalise": "formalize",
614
+ "formalised": "formalized",
615
+ "formalises": "formalizes",
616
+ "formalising": "formalizing",
617
+ "fossilisation": "fossilization",
618
+ "fossilise": "fossilize",
619
+ "fossilised": "fossilized",
620
+ "fossilises": "fossilizes",
621
+ "fossilising": "fossilizing",
622
+ "fraternisation": "fraternization",
623
+ "fraternise": "fraternize",
624
+ "fraternised": "fraternized",
625
+ "fraternises": "fraternizes",
626
+ "fraternising": "fraternizing",
627
+ "fulfil": "fulfill",
628
+ "fulfilment": "fulfillment",
629
+ "fulfils": "fulfills",
630
+ "funnelled": "funneled",
631
+ "funnelling": "funneling",
632
+ "gage": "gauge",
633
+ "gaged": "gauged",
634
+ "gages": "gauges",
635
+ "gaging": "gauging",
636
+ "galvanise": "galvanize",
637
+ "galvanised": "galvanized",
638
+ "galvanises": "galvanizes",
639
+ "galvanising": "galvanizing",
640
+ "gambolled": "gamboled",
641
+ "gambolling": "gamboling",
642
+ "gaol": "jail",
643
+ "gaolbird": "jailbird",
644
+ "gaolbirds": "jailbirds",
645
+ "gaolbreak": "jailbreak",
646
+ "gaolbreaks": "jailbreaks",
647
+ "gaoled": "jailed",
648
+ "gaoler": "jailer",
649
+ "gaolers": "jailers",
650
+ "gaoling": "jailing",
651
+ "gaols": "jails",
652
+ "gasses": "gases",
653
+ "generalisation": "generalization",
654
+ "generalisations": "generalizations",
655
+ "generalise": "generalize",
656
+ "generalised": "generalized",
657
+ "generalises": "generalizes",
658
+ "generalising": "generalizing",
659
+ "ghettoise": "ghettoize",
660
+ "ghettoised": "ghettoized",
661
+ "ghettoises": "ghettoizes",
662
+ "ghettoising": "ghettoizing",
663
+ "gipsies": "gypsies",
664
+ "glamor": "glamour",
665
+ "glamorise": "glamorize",
666
+ "glamorised": "glamorized",
667
+ "glamorises": "glamorizes",
668
+ "glamorising": "glamorizing",
669
+ "globalisation": "globalization",
670
+ "globalise": "globalize",
671
+ "globalised": "globalized",
672
+ "globalises": "globalizes",
673
+ "globalising": "globalizing",
674
+ "glueing": "gluing",
675
+ "goitre": "goiter",
676
+ "goitres": "goiters",
677
+ "gonorrhoea": "gonorrhea",
678
+ "gramme": "gram",
679
+ "grammes": "grams",
680
+ "gravelled": "graveled",
681
+ "grey": "gray",
682
+ "greyed": "grayed",
683
+ "greying": "graying",
684
+ "greyish": "grayish",
685
+ "greyness": "grayness",
686
+ "greys": "grays",
687
+ "grovelled": "groveled",
688
+ "grovelling": "groveling",
689
+ "groyne": "groin",
690
+ "groynes": "groins",
691
+ "gruelling": "grueling",
692
+ "gruellingly": "gruelingly",
693
+ "gryphon": "griffin",
694
+ "gryphons": "griffins",
695
+ "gynaecological": "gynecological",
696
+ "gynaecologist": "gynecologist",
697
+ "gynaecologists": "gynecologists",
698
+ "gynaecology": "gynecology",
699
+ "haematological": "hematological",
700
+ "haematologist": "hematologist",
701
+ "haematologists": "hematologists",
702
+ "haematology": "hematology",
703
+ "haemoglobin": "hemoglobin",
704
+ "haemophilia": "hemophilia",
705
+ "haemophiliac": "hemophiliac",
706
+ "haemophiliacs": "hemophiliacs",
707
+ "haemorrhage": "hemorrhage",
708
+ "haemorrhaged": "hemorrhaged",
709
+ "haemorrhages": "hemorrhages",
710
+ "haemorrhaging": "hemorrhaging",
711
+ "haemorrhoids": "hemorrhoids",
712
+ "harbour": "harbor",
713
+ "harboured": "harbored",
714
+ "harbouring": "harboring",
715
+ "harbours": "harbors",
716
+ "harmonisation": "harmonization",
717
+ "harmonise": "harmonize",
718
+ "harmonised": "harmonized",
719
+ "harmonises": "harmonizes",
720
+ "harmonising": "harmonizing",
721
+ "homoeopath": "homeopath",
722
+ "homoeopathic": "homeopathic",
723
+ "homoeopaths": "homeopaths",
724
+ "homoeopathy": "homeopathy",
725
+ "homogenise": "homogenize",
726
+ "homogenised": "homogenized",
727
+ "homogenises": "homogenizes",
728
+ "homogenising": "homogenizing",
729
+ "honour": "honor",
730
+ "honourable": "honorable",
731
+ "honourably": "honorably",
732
+ "honoured": "honored",
733
+ "honouring": "honoring",
734
+ "honours": "honors",
735
+ "hospitalisation": "hospitalization",
736
+ "hospitalise": "hospitalize",
737
+ "hospitalised": "hospitalized",
738
+ "hospitalises": "hospitalizes",
739
+ "hospitalising": "hospitalizing",
740
+ "humanise": "humanize",
741
+ "humanised": "humanized",
742
+ "humanises": "humanizes",
743
+ "humanising": "humanizing",
744
+ "humour": "humor",
745
+ "humoured": "humored",
746
+ "humouring": "humoring",
747
+ "humourless": "humorless",
748
+ "humours": "humors",
749
+ "hybridise": "hybridize",
750
+ "hybridised": "hybridized",
751
+ "hybridises": "hybridizes",
752
+ "hybridising": "hybridizing",
753
+ "hypnotise": "hypnotize",
754
+ "hypnotised": "hypnotized",
755
+ "hypnotises": "hypnotizes",
756
+ "hypnotising": "hypnotizing",
757
+ "hypothesise": "hypothesize",
758
+ "hypothesised": "hypothesized",
759
+ "hypothesises": "hypothesizes",
760
+ "hypothesising": "hypothesizing",
761
+ "idealisation": "idealization",
762
+ "idealise": "idealize",
763
+ "idealised": "idealized",
764
+ "idealises": "idealizes",
765
+ "idealising": "idealizing",
766
+ "idolise": "idolize",
767
+ "idolised": "idolized",
768
+ "idolises": "idolizes",
769
+ "idolising": "idolizing",
770
+ "immobilisation": "immobilization",
771
+ "immobilise": "immobilize",
772
+ "immobilised": "immobilized",
773
+ "immobiliser": "immobilizer",
774
+ "immobilisers": "immobilizers",
775
+ "immobilises": "immobilizes",
776
+ "immobilising": "immobilizing",
777
+ "immortalise": "immortalize",
778
+ "immortalised": "immortalized",
779
+ "immortalises": "immortalizes",
780
+ "immortalising": "immortalizing",
781
+ "immunisation": "immunization",
782
+ "immunise": "immunize",
783
+ "immunised": "immunized",
784
+ "immunises": "immunizes",
785
+ "immunising": "immunizing",
786
+ "impanelled": "impaneled",
787
+ "impanelling": "impaneling",
788
+ "imperilled": "imperiled",
789
+ "imperilling": "imperiling",
790
+ "individualise": "individualize",
791
+ "individualised": "individualized",
792
+ "individualises": "individualizes",
793
+ "individualising": "individualizing",
794
+ "industrialise": "industrialize",
795
+ "industrialised": "industrialized",
796
+ "industrialises": "industrializes",
797
+ "industrialising": "industrializing",
798
+ "inflexion": "inflection",
799
+ "inflexions": "inflections",
800
+ "initialise": "initialize",
801
+ "initialised": "initialized",
802
+ "initialises": "initializes",
803
+ "initialising": "initializing",
804
+ "initialled": "initialed",
805
+ "initialling": "initialing",
806
+ "instal": "install",
807
+ "instalment": "installment",
808
+ "instalments": "installments",
809
+ "instals": "installs",
810
+ "instil": "instill",
811
+ "instils": "instills",
812
+ "institutionalisation": "institutionalization",
813
+ "institutionalise": "institutionalize",
814
+ "institutionalised": "institutionalized",
815
+ "institutionalises": "institutionalizes",
816
+ "institutionalising": "institutionalizing",
817
+ "intellectualise": "intellectualize",
818
+ "intellectualised": "intellectualized",
819
+ "intellectualises": "intellectualizes",
820
+ "intellectualising": "intellectualizing",
821
+ "internalisation": "internalization",
822
+ "internalise": "internalize",
823
+ "internalised": "internalized",
824
+ "internalises": "internalizes",
825
+ "internalising": "internalizing",
826
+ "internationalisation": "internationalization",
827
+ "internationalise": "internationalize",
828
+ "internationalised": "internationalized",
829
+ "internationalises": "internationalizes",
830
+ "internationalising": "internationalizing",
831
+ "ionisation": "ionization",
832
+ "ionise": "ionize",
833
+ "ionised": "ionized",
834
+ "ioniser": "ionizer",
835
+ "ionisers": "ionizers",
836
+ "ionises": "ionizes",
837
+ "ionising": "ionizing",
838
+ "italicise": "italicize",
839
+ "italicised": "italicized",
840
+ "italicises": "italicizes",
841
+ "italicising": "italicizing",
842
+ "itemise": "itemize",
843
+ "itemised": "itemized",
844
+ "itemises": "itemizes",
845
+ "itemising": "itemizing",
846
+ "jeopardise": "jeopardize",
847
+ "jeopardised": "jeopardized",
848
+ "jeopardises": "jeopardizes",
849
+ "jeopardising": "jeopardizing",
850
+ "jewelled": "jeweled",
851
+ "jeweller": "jeweler",
852
+ "jewellers": "jewelers",
853
+ "jewellery": "jewelry",
854
+ "judgement": "judgment",
855
+ "kilogramme": "kilogram",
856
+ "kilogrammes": "kilograms",
857
+ "kilometre": "kilometer",
858
+ "kilometres": "kilometers",
859
+ "labelled": "labeled",
860
+ "labelling": "labeling",
861
+ "labour": "labor",
862
+ "laboured": "labored",
863
+ "labourer": "laborer",
864
+ "labourers": "laborers",
865
+ "labouring": "laboring",
866
+ "labours": "labors",
867
+ "lacklustre": "lackluster",
868
+ "legalisation": "legalization",
869
+ "legalise": "legalize",
870
+ "legalised": "legalized",
871
+ "legalises": "legalizes",
872
+ "legalising": "legalizing",
873
+ "legitimise": "legitimize",
874
+ "legitimised": "legitimized",
875
+ "legitimises": "legitimizes",
876
+ "legitimising": "legitimizing",
877
+ "leukaemia": "leukemia",
878
+ "levelled": "leveled",
879
+ "leveller": "leveler",
880
+ "levellers": "levelers",
881
+ "levelling": "leveling",
882
+ "libelled": "libeled",
883
+ "libelling": "libeling",
884
+ "libellous": "libelous",
885
+ "liberalisation": "liberalization",
886
+ "liberalise": "liberalize",
887
+ "liberalised": "liberalized",
888
+ "liberalises": "liberalizes",
889
+ "liberalising": "liberalizing",
890
+ "licence": "license",
891
+ "licenced": "licensed",
892
+ "licences": "licenses",
893
+ "licencing": "licensing",
894
+ "likeable": "likable",
895
+ "lionisation": "lionization",
896
+ "lionise": "lionize",
897
+ "lionised": "lionized",
898
+ "lionises": "lionizes",
899
+ "lionising": "lionizing",
900
+ "liquidise": "liquidize",
901
+ "liquidised": "liquidized",
902
+ "liquidiser": "liquidizer",
903
+ "liquidisers": "liquidizers",
904
+ "liquidises": "liquidizes",
905
+ "liquidising": "liquidizing",
906
+ "litre": "liter",
907
+ "litres": "liters",
908
+ "localise": "localize",
909
+ "localised": "localized",
910
+ "localises": "localizes",
911
+ "localising": "localizing",
912
+ "louvre": "louver",
913
+ "louvred": "louvered",
914
+ "louvres": "louvers",
915
+ "lustre": "luster",
916
+ "magnetise": "magnetize",
917
+ "magnetised": "magnetized",
918
+ "magnetises": "magnetizes",
919
+ "magnetising": "magnetizing",
920
+ "manoeuvrability": "maneuverability",
921
+ "manoeuvrable": "maneuverable",
922
+ "manoeuvre": "maneuver",
923
+ "manoeuvred": "maneuvered",
924
+ "manoeuvres": "maneuvers",
925
+ "manoeuvring": "maneuvering",
926
+ "manoeuvrings": "maneuverings",
927
+ "marginalisation": "marginalization",
928
+ "marginalise": "marginalize",
929
+ "marginalised": "marginalized",
930
+ "marginalises": "marginalizes",
931
+ "marginalising": "marginalizing",
932
+ "marshalled": "marshaled",
933
+ "marshalling": "marshaling",
934
+ "marvelled": "marveled",
935
+ "marvelling": "marveling",
936
+ "marvellous": "marvelous",
937
+ "marvellously": "marvelously",
938
+ "materialisation": "materialization",
939
+ "materialise": "materialize",
940
+ "materialised": "materialized",
941
+ "materialises": "materializes",
942
+ "materialising": "materializing",
943
+ "maximisation": "maximization",
944
+ "maximise": "maximize",
945
+ "maximised": "maximized",
946
+ "maximises": "maximizes",
947
+ "maximising": "maximizing",
948
+ "meagre": "meager",
949
+ "mechanisation": "mechanization",
950
+ "mechanise": "mechanize",
951
+ "mechanised": "mechanized",
952
+ "mechanises": "mechanizes",
953
+ "mechanising": "mechanizing",
954
+ "mediaeval": "medieval",
955
+ "memorialise": "memorialize",
956
+ "memorialised": "memorialized",
957
+ "memorialises": "memorializes",
958
+ "memorialising": "memorializing",
959
+ "memorise": "memorize",
960
+ "memorised": "memorized",
961
+ "memorises": "memorizes",
962
+ "memorising": "memorizing",
963
+ "mesmerise": "mesmerize",
964
+ "mesmerised": "mesmerized",
965
+ "mesmerises": "mesmerizes",
966
+ "mesmerising": "mesmerizing",
967
+ "metabolise": "metabolize",
968
+ "metabolised": "metabolized",
969
+ "metabolises": "metabolizes",
970
+ "metabolising": "metabolizing",
971
+ "metre": "meter",
972
+ "metres": "meters",
973
+ "mhm": "hmm",
974
+ "micrometre": "micrometer",
975
+ "micrometres": "micrometers",
976
+ "militarise": "militarize",
977
+ "militarised": "militarized",
978
+ "militarises": "militarizes",
979
+ "militarising": "militarizing",
980
+ "milligramme": "milligram",
981
+ "milligrammes": "milligrams",
982
+ "millilitre": "milliliter",
983
+ "millilitres": "milliliters",
984
+ "millimetre": "millimeter",
985
+ "millimetres": "millimeters",
986
+ "miniaturisation": "miniaturization",
987
+ "miniaturise": "miniaturize",
988
+ "miniaturised": "miniaturized",
989
+ "miniaturises": "miniaturizes",
990
+ "miniaturising": "miniaturizing",
991
+ "minibusses": "minibuses",
992
+ "minimise": "minimize",
993
+ "minimised": "minimized",
994
+ "minimises": "minimizes",
995
+ "minimising": "minimizing",
996
+ "misbehaviour": "misbehavior",
997
+ "misdemeanour": "misdemeanor",
998
+ "misdemeanours": "misdemeanors",
999
+ "misspelt": "misspelled",
1000
+ "mitre": "miter",
1001
+ "mitres": "miters",
1002
+ "mm": "hmm",
1003
+ "mmm": "hmm",
1004
+ "mobilisation": "mobilization",
1005
+ "mobilise": "mobilize",
1006
+ "mobilised": "mobilized",
1007
+ "mobilises": "mobilizes",
1008
+ "mobilising": "mobilizing",
1009
+ "modelled": "modeled",
1010
+ "modeller": "modeler",
1011
+ "modellers": "modelers",
1012
+ "modelling": "modeling",
1013
+ "modernise": "modernize",
1014
+ "modernised": "modernized",
1015
+ "modernises": "modernizes",
1016
+ "modernising": "modernizing",
1017
+ "moisturise": "moisturize",
1018
+ "moisturised": "moisturized",
1019
+ "moisturiser": "moisturizer",
1020
+ "moisturisers": "moisturizers",
1021
+ "moisturises": "moisturizes",
1022
+ "moisturising": "moisturizing",
1023
+ "monologue": "monolog",
1024
+ "monologues": "monologs",
1025
+ "monopolisation": "monopolization",
1026
+ "monopolise": "monopolize",
1027
+ "monopolised": "monopolized",
1028
+ "monopolises": "monopolizes",
1029
+ "monopolising": "monopolizing",
1030
+ "moralise": "moralize",
1031
+ "moralised": "moralized",
1032
+ "moralises": "moralizes",
1033
+ "moralising": "moralizing",
1034
+ "motorised": "motorized",
1035
+ "mould": "mold",
1036
+ "moulded": "molded",
1037
+ "moulder": "molder",
1038
+ "mouldered": "moldered",
1039
+ "mouldering": "moldering",
1040
+ "moulders": "molders",
1041
+ "mouldier": "moldier",
1042
+ "mouldiest": "moldiest",
1043
+ "moulding": "molding",
1044
+ "mouldings": "moldings",
1045
+ "moulds": "molds",
1046
+ "mouldy": "moldy",
1047
+ "moult": "molt",
1048
+ "moulted": "molted",
1049
+ "moulting": "molting",
1050
+ "moults": "molts",
1051
+ "moustache": "mustache",
1052
+ "moustached": "mustached",
1053
+ "moustaches": "mustaches",
1054
+ "moustachioed": "mustachioed",
1055
+ "multicoloured": "multicolored",
1056
+ "nationalisation": "nationalization",
1057
+ "nationalisations": "nationalizations",
1058
+ "nationalise": "nationalize",
1059
+ "nationalised": "nationalized",
1060
+ "nationalises": "nationalizes",
1061
+ "nationalising": "nationalizing",
1062
+ "naturalisation": "naturalization",
1063
+ "naturalise": "naturalize",
1064
+ "naturalised": "naturalized",
1065
+ "naturalises": "naturalizes",
1066
+ "naturalising": "naturalizing",
1067
+ "neighbour": "neighbor",
1068
+ "neighbourhood": "neighborhood",
1069
+ "neighbourhoods": "neighborhoods",
1070
+ "neighbouring": "neighboring",
1071
+ "neighbourliness": "neighborliness",
1072
+ "neighbourly": "neighborly",
1073
+ "neighbours": "neighbors",
1074
+ "neutralisation": "neutralization",
1075
+ "neutralise": "neutralize",
1076
+ "neutralised": "neutralized",
1077
+ "neutralises": "neutralizes",
1078
+ "neutralising": "neutralizing",
1079
+ "normalisation": "normalization",
1080
+ "normalise": "normalize",
1081
+ "normalised": "normalized",
1082
+ "normalises": "normalizes",
1083
+ "normalising": "normalizing",
1084
+ "odour": "odor",
1085
+ "odourless": "odorless",
1086
+ "odours": "odors",
1087
+ "oesophagus": "esophagus",
1088
+ "oesophaguses": "esophaguses",
1089
+ "oestrogen": "estrogen",
1090
+ "offence": "offense",
1091
+ "offences": "offenses",
1092
+ "omelette": "omelet",
1093
+ "omelettes": "omelets",
1094
+ "optimise": "optimize",
1095
+ "optimised": "optimized",
1096
+ "optimises": "optimizes",
1097
+ "optimising": "optimizing",
1098
+ "organisation": "organization",
1099
+ "organisational": "organizational",
1100
+ "organisations": "organizations",
1101
+ "organise": "organize",
1102
+ "organised": "organized",
1103
+ "organiser": "organizer",
1104
+ "organisers": "organizers",
1105
+ "organises": "organizes",
1106
+ "organising": "organizing",
1107
+ "orthopaedic": "orthopedic",
1108
+ "orthopaedics": "orthopedics",
1109
+ "ostracise": "ostracize",
1110
+ "ostracised": "ostracized",
1111
+ "ostracises": "ostracizes",
1112
+ "ostracising": "ostracizing",
1113
+ "outmanoeuvre": "outmaneuver",
1114
+ "outmanoeuvred": "outmaneuvered",
1115
+ "outmanoeuvres": "outmaneuvers",
1116
+ "outmanoeuvring": "outmaneuvering",
1117
+ "overemphasise": "overemphasize",
1118
+ "overemphasised": "overemphasized",
1119
+ "overemphasises": "overemphasizes",
1120
+ "overemphasising": "overemphasizing",
1121
+ "oxidisation": "oxidization",
1122
+ "oxidise": "oxidize",
1123
+ "oxidised": "oxidized",
1124
+ "oxidises": "oxidizes",
1125
+ "oxidising": "oxidizing",
1126
+ "paederast": "pederast",
1127
+ "paederasts": "pederasts",
1128
+ "paediatric": "pediatric",
1129
+ "paediatrician": "pediatrician",
1130
+ "paediatricians": "pediatricians",
1131
+ "paediatrics": "pediatrics",
1132
+ "paedophile": "pedophile",
1133
+ "paedophiles": "pedophiles",
1134
+ "paedophilia": "pedophilia",
1135
+ "palaeolithic": "paleolithic",
1136
+ "palaeontologist": "paleontologist",
1137
+ "palaeontologists": "paleontologists",
1138
+ "palaeontology": "paleontology",
1139
+ "panelled": "paneled",
1140
+ "panelling": "paneling",
1141
+ "panellist": "panelist",
1142
+ "panellists": "panelists",
1143
+ "paralyse": "paralyze",
1144
+ "paralysed": "paralyzed",
1145
+ "paralyses": "paralyzes",
1146
+ "paralysing": "paralyzing",
1147
+ "parcelled": "parceled",
1148
+ "parcelling": "parceling",
1149
+ "parlour": "parlor",
1150
+ "parlours": "parlors",
1151
+ "particularise": "particularize",
1152
+ "particularised": "particularized",
1153
+ "particularises": "particularizes",
1154
+ "particularising": "particularizing",
1155
+ "passivisation": "passivization",
1156
+ "passivise": "passivize",
1157
+ "passivised": "passivized",
1158
+ "passivises": "passivizes",
1159
+ "passivising": "passivizing",
1160
+ "pasteurisation": "pasteurization",
1161
+ "pasteurise": "pasteurize",
1162
+ "pasteurised": "pasteurized",
1163
+ "pasteurises": "pasteurizes",
1164
+ "pasteurising": "pasteurizing",
1165
+ "patronise": "patronize",
1166
+ "patronised": "patronized",
1167
+ "patronises": "patronizes",
1168
+ "patronising": "patronizing",
1169
+ "patronisingly": "patronizingly",
1170
+ "pedalled": "pedaled",
1171
+ "pedalling": "pedaling",
1172
+ "pedestrianisation": "pedestrianization",
1173
+ "pedestrianise": "pedestrianize",
1174
+ "pedestrianised": "pedestrianized",
1175
+ "pedestrianises": "pedestrianizes",
1176
+ "pedestrianising": "pedestrianizing",
1177
+ "penalise": "penalize",
1178
+ "penalised": "penalized",
1179
+ "penalises": "penalizes",
1180
+ "penalising": "penalizing",
1181
+ "pencilled": "penciled",
1182
+ "pencilling": "penciling",
1183
+ "personalise": "personalize",
1184
+ "personalised": "personalized",
1185
+ "personalises": "personalizes",
1186
+ "personalising": "personalizing",
1187
+ "pharmacopoeia": "pharmacopeia",
1188
+ "pharmacopoeias": "pharmacopeias",
1189
+ "philosophise": "philosophize",
1190
+ "philosophised": "philosophized",
1191
+ "philosophises": "philosophizes",
1192
+ "philosophising": "philosophizing",
1193
+ "philtre": "filter",
1194
+ "philtres": "filters",
1195
+ "phoney": "phony",
1196
+ "plagiarise": "plagiarize",
1197
+ "plagiarised": "plagiarized",
1198
+ "plagiarises": "plagiarizes",
1199
+ "plagiarising": "plagiarizing",
1200
+ "plough": "plow",
1201
+ "ploughed": "plowed",
1202
+ "ploughing": "plowing",
1203
+ "ploughman": "plowman",
1204
+ "ploughmen": "plowmen",
1205
+ "ploughs": "plows",
1206
+ "ploughshare": "plowshare",
1207
+ "ploughshares": "plowshares",
1208
+ "polarisation": "polarization",
1209
+ "polarise": "polarize",
1210
+ "polarised": "polarized",
1211
+ "polarises": "polarizes",
1212
+ "polarising": "polarizing",
1213
+ "politicisation": "politicization",
1214
+ "politicise": "politicize",
1215
+ "politicised": "politicized",
1216
+ "politicises": "politicizes",
1217
+ "politicising": "politicizing",
1218
+ "popularisation": "popularization",
1219
+ "popularise": "popularize",
1220
+ "popularised": "popularized",
1221
+ "popularises": "popularizes",
1222
+ "popularising": "popularizing",
1223
+ "pouffe": "pouf",
1224
+ "pouffes": "poufs",
1225
+ "practise": "practice",
1226
+ "practised": "practiced",
1227
+ "practises": "practices",
1228
+ "practising": "practicing",
1229
+ "praesidium": "presidium",
1230
+ "praesidiums": "presidiums",
1231
+ "pressurisation": "pressurization",
1232
+ "pressurise": "pressurize",
1233
+ "pressurised": "pressurized",
1234
+ "pressurises": "pressurizes",
1235
+ "pressurising": "pressurizing",
1236
+ "pretence": "pretense",
1237
+ "pretences": "pretenses",
1238
+ "primaeval": "primeval",
1239
+ "prioritisation": "prioritization",
1240
+ "prioritise": "prioritize",
1241
+ "prioritised": "prioritized",
1242
+ "prioritises": "prioritizes",
1243
+ "prioritising": "prioritizing",
1244
+ "privatisation": "privatization",
1245
+ "privatisations": "privatizations",
1246
+ "privatise": "privatize",
1247
+ "privatised": "privatized",
1248
+ "privatises": "privatizes",
1249
+ "privatising": "privatizing",
1250
+ "professionalisation": "professionalization",
1251
+ "professionalise": "professionalize",
1252
+ "professionalised": "professionalized",
1253
+ "professionalises": "professionalizes",
1254
+ "professionalising": "professionalizing",
1255
+ "programme": "program",
1256
+ "programmes": "programs",
1257
+ "prologue": "prolog",
1258
+ "prologues": "prologs",
1259
+ "propagandise": "propagandize",
1260
+ "propagandised": "propagandized",
1261
+ "propagandises": "propagandizes",
1262
+ "propagandising": "propagandizing",
1263
+ "proselytise": "proselytize",
1264
+ "proselytised": "proselytized",
1265
+ "proselytiser": "proselytizer",
1266
+ "proselytisers": "proselytizers",
1267
+ "proselytises": "proselytizes",
1268
+ "proselytising": "proselytizing",
1269
+ "psychoanalyse": "psychoanalyze",
1270
+ "psychoanalysed": "psychoanalyzed",
1271
+ "psychoanalyses": "psychoanalyzes",
1272
+ "psychoanalysing": "psychoanalyzing",
1273
+ "publicise": "publicize",
1274
+ "publicised": "publicized",
1275
+ "publicises": "publicizes",
1276
+ "publicising": "publicizing",
1277
+ "pulverisation": "pulverization",
1278
+ "pulverise": "pulverize",
1279
+ "pulverised": "pulverized",
1280
+ "pulverises": "pulverizes",
1281
+ "pulverising": "pulverizing",
1282
+ "pummelled": "pummel",
1283
+ "pummelling": "pummeled",
1284
+ "pyjama": "pajama",
1285
+ "pyjamas": "pajamas",
1286
+ "pzazz": "pizzazz",
1287
+ "quarrelled": "quarreled",
1288
+ "quarrelling": "quarreling",
1289
+ "radicalise": "radicalize",
1290
+ "radicalised": "radicalized",
1291
+ "radicalises": "radicalizes",
1292
+ "radicalising": "radicalizing",
1293
+ "rancour": "rancor",
1294
+ "randomise": "randomize",
1295
+ "randomised": "randomized",
1296
+ "randomises": "randomizes",
1297
+ "randomising": "randomizing",
1298
+ "rationalisation": "rationalization",
1299
+ "rationalisations": "rationalizations",
1300
+ "rationalise": "rationalize",
1301
+ "rationalised": "rationalized",
1302
+ "rationalises": "rationalizes",
1303
+ "rationalising": "rationalizing",
1304
+ "ravelled": "raveled",
1305
+ "ravelling": "raveling",
1306
+ "realisable": "realizable",
1307
+ "realisation": "realization",
1308
+ "realisations": "realizations",
1309
+ "realise": "realize",
1310
+ "realised": "realized",
1311
+ "realises": "realizes",
1312
+ "realising": "realizing",
1313
+ "recognisable": "recognizable",
1314
+ "recognisably": "recognizably",
1315
+ "recognisance": "recognizance",
1316
+ "recognise": "recognize",
1317
+ "recognised": "recognized",
1318
+ "recognises": "recognizes",
1319
+ "recognising": "recognizing",
1320
+ "reconnoitre": "reconnoiter",
1321
+ "reconnoitred": "reconnoitered",
1322
+ "reconnoitres": "reconnoiters",
1323
+ "reconnoitring": "reconnoitering",
1324
+ "refuelled": "refueled",
1325
+ "refuelling": "refueling",
1326
+ "regularisation": "regularization",
1327
+ "regularise": "regularize",
1328
+ "regularised": "regularized",
1329
+ "regularises": "regularizes",
1330
+ "regularising": "regularizing",
1331
+ "remodelled": "remodeled",
1332
+ "remodelling": "remodeling",
1333
+ "remould": "remold",
1334
+ "remoulded": "remolded",
1335
+ "remoulding": "remolding",
1336
+ "remoulds": "remolds",
1337
+ "reorganisation": "reorganization",
1338
+ "reorganisations": "reorganizations",
1339
+ "reorganise": "reorganize",
1340
+ "reorganised": "reorganized",
1341
+ "reorganises": "reorganizes",
1342
+ "reorganising": "reorganizing",
1343
+ "revelled": "reveled",
1344
+ "reveller": "reveler",
1345
+ "revellers": "revelers",
1346
+ "revelling": "reveling",
1347
+ "revitalise": "revitalize",
1348
+ "revitalised": "revitalized",
1349
+ "revitalises": "revitalizes",
1350
+ "revitalising": "revitalizing",
1351
+ "revolutionise": "revolutionize",
1352
+ "revolutionised": "revolutionized",
1353
+ "revolutionises": "revolutionizes",
1354
+ "revolutionising": "revolutionizing",
1355
+ "rhapsodise": "rhapsodize",
1356
+ "rhapsodised": "rhapsodized",
1357
+ "rhapsodises": "rhapsodizes",
1358
+ "rhapsodising": "rhapsodizing",
1359
+ "rigour": "rigor",
1360
+ "rigours": "rigors",
1361
+ "ritualised": "ritualized",
1362
+ "rivalled": "rivaled",
1363
+ "rivalling": "rivaling",
1364
+ "romanticise": "romanticize",
1365
+ "romanticised": "romanticized",
1366
+ "romanticises": "romanticizes",
1367
+ "romanticising": "romanticizing",
1368
+ "rumour": "rumor",
1369
+ "rumoured": "rumored",
1370
+ "rumours": "rumors",
1371
+ "sabre": "saber",
1372
+ "sabres": "sabers",
1373
+ "saltpetre": "saltpeter",
1374
+ "sanitise": "sanitize",
1375
+ "sanitised": "sanitized",
1376
+ "sanitises": "sanitizes",
1377
+ "sanitising": "sanitizing",
1378
+ "satirise": "satirize",
1379
+ "satirised": "satirized",
1380
+ "satirises": "satirizes",
1381
+ "satirising": "satirizing",
1382
+ "saviour": "savior",
1383
+ "saviours": "saviors",
1384
+ "savour": "savor",
1385
+ "savoured": "savored",
1386
+ "savouries": "savories",
1387
+ "savouring": "savoring",
1388
+ "savours": "savors",
1389
+ "savoury": "savory",
1390
+ "scandalise": "scandalize",
1391
+ "scandalised": "scandalized",
1392
+ "scandalises": "scandalizes",
1393
+ "scandalising": "scandalizing",
1394
+ "sceptic": "skeptic",
1395
+ "sceptical": "skeptical",
1396
+ "sceptically": "skeptically",
1397
+ "scepticism": "skepticism",
1398
+ "sceptics": "skeptics",
1399
+ "sceptre": "scepter",
1400
+ "sceptres": "scepters",
1401
+ "scrutinise": "scrutinize",
1402
+ "scrutinised": "scrutinized",
1403
+ "scrutinises": "scrutinizes",
1404
+ "scrutinising": "scrutinizing",
1405
+ "secularisation": "secularization",
1406
+ "secularise": "secularize",
1407
+ "secularised": "secularized",
1408
+ "secularises": "secularizes",
1409
+ "secularising": "secularizing",
1410
+ "sensationalise": "sensationalize",
1411
+ "sensationalised": "sensationalized",
1412
+ "sensationalises": "sensationalizes",
1413
+ "sensationalising": "sensationalizing",
1414
+ "sensitise": "sensitize",
1415
+ "sensitised": "sensitized",
1416
+ "sensitises": "sensitizes",
1417
+ "sensitising": "sensitizing",
1418
+ "sentimentalise": "sentimentalize",
1419
+ "sentimentalised": "sentimentalized",
1420
+ "sentimentalises": "sentimentalizes",
1421
+ "sentimentalising": "sentimentalizing",
1422
+ "sepulchre": "sepulcher",
1423
+ "sepulchres": "sepulchers",
1424
+ "serialisation": "serialization",
1425
+ "serialisations": "serializations",
1426
+ "serialise": "serialize",
1427
+ "serialised": "serialized",
1428
+ "serialises": "serializes",
1429
+ "serialising": "serializing",
1430
+ "sermonise": "sermonize",
1431
+ "sermonised": "sermonized",
1432
+ "sermonises": "sermonizes",
1433
+ "sermonising": "sermonizing",
1434
+ "sheikh": "sheik",
1435
+ "shovelled": "shoveled",
1436
+ "shovelling": "shoveling",
1437
+ "shrivelled": "shriveled",
1438
+ "shrivelling": "shriveling",
1439
+ "signalise": "signalize",
1440
+ "signalised": "signalized",
1441
+ "signalises": "signalizes",
1442
+ "signalising": "signalizing",
1443
+ "signalled": "signaled",
1444
+ "signalling": "signaling",
1445
+ "smoulder": "smolder",
1446
+ "smouldered": "smoldered",
1447
+ "smouldering": "smoldering",
1448
+ "smoulders": "smolders",
1449
+ "snivelled": "sniveled",
1450
+ "snivelling": "sniveling",
1451
+ "snorkelled": "snorkeled",
1452
+ "snorkelling": "snorkeling",
1453
+ "snowplough": "snowplow",
1454
+ "snowploughs": "snowplow",
1455
+ "socialisation": "socialization",
1456
+ "socialise": "socialize",
1457
+ "socialised": "socialized",
1458
+ "socialises": "socializes",
1459
+ "socialising": "socializing",
1460
+ "sodomise": "sodomize",
1461
+ "sodomised": "sodomized",
1462
+ "sodomises": "sodomizes",
1463
+ "sodomising": "sodomizing",
1464
+ "solemnise": "solemnize",
1465
+ "solemnised": "solemnized",
1466
+ "solemnises": "solemnizes",
1467
+ "solemnising": "solemnizing",
1468
+ "sombre": "somber",
1469
+ "specialisation": "specialization",
1470
+ "specialisations": "specializations",
1471
+ "specialise": "specialize",
1472
+ "specialised": "specialized",
1473
+ "specialises": "specializes",
1474
+ "specialising": "specializing",
1475
+ "spectre": "specter",
1476
+ "spectres": "specters",
1477
+ "spiralled": "spiraled",
1478
+ "spiralling": "spiraling",
1479
+ "splendour": "splendor",
1480
+ "splendours": "splendors",
1481
+ "squirrelled": "squirreled",
1482
+ "squirrelling": "squirreling",
1483
+ "stabilisation": "stabilization",
1484
+ "stabilise": "stabilize",
1485
+ "stabilised": "stabilized",
1486
+ "stabiliser": "stabilizer",
1487
+ "stabilisers": "stabilizers",
1488
+ "stabilises": "stabilizes",
1489
+ "stabilising": "stabilizing",
1490
+ "standardisation": "standardization",
1491
+ "standardise": "standardize",
1492
+ "standardised": "standardized",
1493
+ "standardises": "standardizes",
1494
+ "standardising": "standardizing",
1495
+ "stencilled": "stenciled",
1496
+ "stencilling": "stenciling",
1497
+ "sterilisation": "sterilization",
1498
+ "sterilisations": "sterilizations",
1499
+ "sterilise": "sterilize",
1500
+ "sterilised": "sterilized",
1501
+ "steriliser": "sterilizer",
1502
+ "sterilisers": "sterilizers",
1503
+ "sterilises": "sterilizes",
1504
+ "sterilising": "sterilizing",
1505
+ "stigmatisation": "stigmatization",
1506
+ "stigmatise": "stigmatize",
1507
+ "stigmatised": "stigmatized",
1508
+ "stigmatises": "stigmatizes",
1509
+ "stigmatising": "stigmatizing",
1510
+ "storey": "story",
1511
+ "storeys": "stories",
1512
+ "subsidisation": "subsidization",
1513
+ "subsidise": "subsidize",
1514
+ "subsidised": "subsidized",
1515
+ "subsidiser": "subsidizer",
1516
+ "subsidisers": "subsidizers",
1517
+ "subsidises": "subsidizes",
1518
+ "subsidising": "subsidizing",
1519
+ "succour": "succor",
1520
+ "succoured": "succored",
1521
+ "succouring": "succoring",
1522
+ "succours": "succors",
1523
+ "sulphate": "sulfate",
1524
+ "sulphates": "sulfates",
1525
+ "sulphide": "sulfide",
1526
+ "sulphides": "sulfides",
1527
+ "sulphur": "sulfur",
1528
+ "sulphurous": "sulfurous",
1529
+ "summarise": "summarize",
1530
+ "summarised": "summarized",
1531
+ "summarises": "summarizes",
1532
+ "summarising": "summarizing",
1533
+ "swivelled": "swiveled",
1534
+ "swivelling": "swiveling",
1535
+ "symbolise": "symbolize",
1536
+ "symbolised": "symbolized",
1537
+ "symbolises": "symbolizes",
1538
+ "symbolising": "symbolizing",
1539
+ "sympathise": "sympathize",
1540
+ "sympathised": "sympathized",
1541
+ "sympathiser": "sympathizer",
1542
+ "sympathisers": "sympathizers",
1543
+ "sympathises": "sympathizes",
1544
+ "sympathising": "sympathizing",
1545
+ "synchronisation": "synchronization",
1546
+ "synchronise": "synchronize",
1547
+ "synchronised": "synchronized",
1548
+ "synchronises": "synchronizes",
1549
+ "synchronising": "synchronizing",
1550
+ "synthesise": "synthesize",
1551
+ "synthesised": "synthesized",
1552
+ "synthesiser": "synthesizer",
1553
+ "synthesisers": "synthesizers",
1554
+ "synthesises": "synthesizes",
1555
+ "synthesising": "synthesizing",
1556
+ "syphon": "siphon",
1557
+ "syphoned": "siphoned",
1558
+ "syphoning": "siphoning",
1559
+ "syphons": "siphons",
1560
+ "systematisation": "systematization",
1561
+ "systematise": "systematize",
1562
+ "systematised": "systematized",
1563
+ "systematises": "systematizes",
1564
+ "systematising": "systematizing",
1565
+ "tantalise": "tantalize",
1566
+ "tantalised": "tantalized",
1567
+ "tantalises": "tantalizes",
1568
+ "tantalising": "tantalizing",
1569
+ "tantalisingly": "tantalizingly",
1570
+ "tasselled": "tasseled",
1571
+ "technicolour": "technicolor",
1572
+ "temporise": "temporize",
1573
+ "temporised": "temporized",
1574
+ "temporises": "temporizes",
1575
+ "temporising": "temporizing",
1576
+ "tenderise": "tenderize",
1577
+ "tenderised": "tenderized",
1578
+ "tenderises": "tenderizes",
1579
+ "tenderising": "tenderizing",
1580
+ "terrorise": "terrorize",
1581
+ "terrorised": "terrorized",
1582
+ "terrorises": "terrorizes",
1583
+ "terrorising": "terrorizing",
1584
+ "theatre": "theater",
1585
+ "theatregoer": "theatergoer",
1586
+ "theatregoers": "theatergoers",
1587
+ "theatres": "theaters",
1588
+ "theorise": "theorize",
1589
+ "theorised": "theorized",
1590
+ "theorises": "theorizes",
1591
+ "theorising": "theorizing",
1592
+ "tonne": "ton",
1593
+ "tonnes": "tons",
1594
+ "towelled": "toweled",
1595
+ "towelling": "toweling",
1596
+ "toxaemia": "toxemia",
1597
+ "tranquillise": "tranquilize",
1598
+ "tranquillised": "tranquilized",
1599
+ "tranquilliser": "tranquilizer",
1600
+ "tranquillisers": "tranquilizers",
1601
+ "tranquillises": "tranquilizes",
1602
+ "tranquillising": "tranquilizing",
1603
+ "tranquillity": "tranquility",
1604
+ "tranquillize": "tranquilize",
1605
+ "tranquillized": "tranquilized",
1606
+ "tranquillizer": "tranquilizer",
1607
+ "tranquillizers": "tranquilizers",
1608
+ "tranquillizes": "tranquilizes",
1609
+ "tranquillizing": "tranquilizing",
1610
+ "tranquilly": "tranquility",
1611
+ "transistorised": "transistorized",
1612
+ "traumatise": "traumatize",
1613
+ "traumatised": "traumatized",
1614
+ "traumatises": "traumatizes",
1615
+ "traumatising": "traumatizing",
1616
+ "travelled": "traveled",
1617
+ "traveller": "traveler",
1618
+ "travellers": "travelers",
1619
+ "travelling": "traveling",
1620
+ "travelog": "travelogue",
1621
+ "travelogs": "travelogues",
1622
+ "trialled": "trialed",
1623
+ "trialling": "trialing",
1624
+ "tricolour": "tricolor",
1625
+ "tricolours": "tricolors",
1626
+ "trivialise": "trivialize",
1627
+ "trivialised": "trivialized",
1628
+ "trivialises": "trivializes",
1629
+ "trivialising": "trivializing",
1630
+ "tumour": "tumor",
1631
+ "tumours": "tumors",
1632
+ "tunnelled": "tunneled",
1633
+ "tunnelling": "tunneling",
1634
+ "tyrannise": "tyrannize",
1635
+ "tyrannised": "tyrannized",
1636
+ "tyrannises": "tyrannizes",
1637
+ "tyrannising": "tyrannizing",
1638
+ "tyre": "tire",
1639
+ "tyres": "tires",
1640
+ "unauthorised": "unauthorized",
1641
+ "uncivilised": "uncivilized",
1642
+ "underutilised": "underutilized",
1643
+ "unequalled": "unequaled",
1644
+ "unfavourable": "unfavorable",
1645
+ "unfavourably": "unfavorably",
1646
+ "unionisation": "unionization",
1647
+ "unionise": "unionize",
1648
+ "unionised": "unionized",
1649
+ "unionises": "unionizes",
1650
+ "unionising": "unionizing",
1651
+ "unorganised": "unorganized",
1652
+ "unravelled": "unraveled",
1653
+ "unravelling": "unraveling",
1654
+ "unrecognisable": "unrecognizable",
1655
+ "unrecognised": "unrecognized",
1656
+ "unrivalled": "unrivaled",
1657
+ "unsavoury": "unsavory",
1658
+ "untrammelled": "untrammeled",
1659
+ "urbanisation": "urbanization",
1660
+ "urbanise": "urbanize",
1661
+ "urbanised": "urbanized",
1662
+ "urbanises": "urbanizes",
1663
+ "urbanising": "urbanizing",
1664
+ "utilisable": "utilizable",
1665
+ "utilisation": "utilization",
1666
+ "utilise": "utilize",
1667
+ "utilised": "utilized",
1668
+ "utilises": "utilizes",
1669
+ "utilising": "utilizing",
1670
+ "valour": "valor",
1671
+ "vandalise": "vandalize",
1672
+ "vandalised": "vandalized",
1673
+ "vandalises": "vandalizes",
1674
+ "vandalising": "vandalizing",
1675
+ "vaporisation": "vaporization",
1676
+ "vaporise": "vaporize",
1677
+ "vaporised": "vaporized",
1678
+ "vaporises": "vaporizes",
1679
+ "vaporising": "vaporizing",
1680
+ "vapour": "vapor",
1681
+ "vapours": "vapors",
1682
+ "verbalise": "verbalize",
1683
+ "verbalised": "verbalized",
1684
+ "verbalises": "verbalizes",
1685
+ "verbalising": "verbalizing",
1686
+ "victimisation": "victimization",
1687
+ "victimise": "victimize",
1688
+ "victimised": "victimized",
1689
+ "victimises": "victimizes",
1690
+ "victimising": "victimizing",
1691
+ "videodisc": "videodisk",
1692
+ "videodiscs": "videodisks",
1693
+ "vigour": "vigor",
1694
+ "visualisation": "visualization",
1695
+ "visualisations": "visualizations",
1696
+ "visualise": "visualize",
1697
+ "visualised": "visualized",
1698
+ "visualises": "visualizes",
1699
+ "visualising": "visualizing",
1700
+ "vocalisation": "vocalization",
1701
+ "vocalisations": "vocalizations",
1702
+ "vocalise": "vocalize",
1703
+ "vocalised": "vocalized",
1704
+ "vocalises": "vocalizes",
1705
+ "vocalising": "vocalizing",
1706
+ "vulcanised": "vulcanized",
1707
+ "vulgarisation": "vulgarization",
1708
+ "vulgarise": "vulgarize",
1709
+ "vulgarised": "vulgarized",
1710
+ "vulgarises": "vulgarizes",
1711
+ "vulgarising": "vulgarizing",
1712
+ "waggon": "wagon",
1713
+ "waggons": "wagons",
1714
+ "watercolour": "watercolor",
1715
+ "watercolours": "watercolors",
1716
+ "weaselled": "weaseled",
1717
+ "weaselling": "weaseling",
1718
+ "westernisation": "westernization",
1719
+ "westernise": "westernize",
1720
+ "westernised": "westernized",
1721
+ "westernises": "westernizes",
1722
+ "westernising": "westernizing",
1723
+ "womanise": "womanize",
1724
+ "womanised": "womanized",
1725
+ "womaniser": "womanizer",
1726
+ "womanisers": "womanizers",
1727
+ "womanises": "womanizes",
1728
+ "womanising": "womanizing",
1729
+ "woollen": "woolen",
1730
+ "woollens": "woolens",
1731
+ "woollies": "woolies",
1732
+ "woolly": "wooly",
1733
+ "worshipped": "worshiped",
1734
+ "worshipper": "worshiper",
1735
+ "worshipping": "worshiping",
1736
+ "yodelled": "yodeled",
1737
+ "yodelling": "yodeling",
1738
+ "yoghourt": "yogurt",
1739
+ "yoghourts": "yogurts",
1740
+ "yoghurt": "yogurt",
1741
+ "yoghurts": "yogurts"
1742
+ }
checkpoint-1750/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb1909ab21fc35a92565ba4bdba1da5e45c06d60b6c6b8fea4e295532a8d6c80
3
+ size 52563258
checkpoint-1750/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2968c6706b66983ec7fc1928adffcd46b8bcc2770c7bf26b672f7651983d558
3
+ size 14244
checkpoint-1750/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45476886758c4cbb4409f5aa7b32bc0b88c0c00b67f10a05476e7d0a5c4f490a
3
+ size 1064
checkpoint-1750/special_tokens_map.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|startoftranscript|>",
4
+ "<|en|>",
5
+ "<|zh|>",
6
+ "<|de|>",
7
+ "<|es|>",
8
+ "<|ru|>",
9
+ "<|ko|>",
10
+ "<|fr|>",
11
+ "<|ja|>",
12
+ "<|pt|>",
13
+ "<|tr|>",
14
+ "<|pl|>",
15
+ "<|ca|>",
16
+ "<|nl|>",
17
+ "<|ar|>",
18
+ "<|sv|>",
19
+ "<|it|>",
20
+ "<|id|>",
21
+ "<|hi|>",
22
+ "<|fi|>",
23
+ "<|vi|>",
24
+ "<|he|>",
25
+ "<|uk|>",
26
+ "<|el|>",
27
+ "<|ms|>",
28
+ "<|cs|>",
29
+ "<|ro|>",
30
+ "<|da|>",
31
+ "<|hu|>",
32
+ "<|ta|>",
33
+ "<|no|>",
34
+ "<|th|>",
35
+ "<|ur|>",
36
+ "<|hr|>",
37
+ "<|bg|>",
38
+ "<|lt|>",
39
+ "<|la|>",
40
+ "<|mi|>",
41
+ "<|ml|>",
42
+ "<|cy|>",
43
+ "<|sk|>",
44
+ "<|te|>",
45
+ "<|fa|>",
46
+ "<|lv|>",
47
+ "<|bn|>",
48
+ "<|sr|>",
49
+ "<|az|>",
50
+ "<|sl|>",
51
+ "<|kn|>",
52
+ "<|et|>",
53
+ "<|mk|>",
54
+ "<|br|>",
55
+ "<|eu|>",
56
+ "<|is|>",
57
+ "<|hy|>",
58
+ "<|ne|>",
59
+ "<|mn|>",
60
+ "<|bs|>",
61
+ "<|kk|>",
62
+ "<|sq|>",
63
+ "<|sw|>",
64
+ "<|gl|>",
65
+ "<|mr|>",
66
+ "<|pa|>",
67
+ "<|si|>",
68
+ "<|km|>",
69
+ "<|sn|>",
70
+ "<|yo|>",
71
+ "<|so|>",
72
+ "<|af|>",
73
+ "<|oc|>",
74
+ "<|ka|>",
75
+ "<|be|>",
76
+ "<|tg|>",
77
+ "<|sd|>",
78
+ "<|gu|>",
79
+ "<|am|>",
80
+ "<|yi|>",
81
+ "<|lo|>",
82
+ "<|uz|>",
83
+ "<|fo|>",
84
+ "<|ht|>",
85
+ "<|ps|>",
86
+ "<|tk|>",
87
+ "<|nn|>",
88
+ "<|mt|>",
89
+ "<|sa|>",
90
+ "<|lb|>",
91
+ "<|my|>",
92
+ "<|bo|>",
93
+ "<|tl|>",
94
+ "<|mg|>",
95
+ "<|as|>",
96
+ "<|tt|>",
97
+ "<|haw|>",
98
+ "<|ln|>",
99
+ "<|ha|>",
100
+ "<|ba|>",
101
+ "<|jw|>",
102
+ "<|su|>",
103
+ "<|yue|>",
104
+ "<|translate|>",
105
+ "<|transcribe|>",
106
+ "<|startoflm|>",
107
+ "<|startofprev|>",
108
+ "<|nospeech|>",
109
+ "<|notimestamps|>"
110
+ ],
111
+ "bos_token": {
112
+ "content": "<|endoftext|>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "eos_token": {
119
+ "content": "<|endoftext|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ },
125
+ "pad_token": {
126
+ "content": "<|endoftext|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false
131
+ },
132
+ "unk_token": {
133
+ "content": "<|endoftext|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false
138
+ }
139
+ }
checkpoint-1750/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1750/trainer_state.json ADDED
@@ -0,0 +1,2546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.046749928297655986,
3
+ "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
+ "epoch": 7.0,
5
+ "eval_steps": 250,
6
+ "global_step": 1750,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 11.320270538330078,
14
+ "learning_rate": 1.25e-06,
15
+ "loss": 3.7364,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 10.51279354095459,
21
+ "learning_rate": 2.8124999999999998e-06,
22
+ "loss": 3.669,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 10.274462699890137,
28
+ "learning_rate": 4.3750000000000005e-06,
29
+ "loss": 3.6416,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 10.032905578613281,
35
+ "learning_rate": 5.9375e-06,
36
+ "loss": 3.5981,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 9.448946952819824,
42
+ "learning_rate": 7.1875e-06,
43
+ "loss": 3.4937,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 8.844466209411621,
49
+ "learning_rate": 8.750000000000001e-06,
50
+ "loss": 3.3145,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 8.622856140136719,
56
+ "learning_rate": 1.03125e-05,
57
+ "loss": 3.0978,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 6.81293249130249,
63
+ "learning_rate": 1.1875e-05,
64
+ "loss": 2.9583,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 5.21970272064209,
70
+ "learning_rate": 1.34375e-05,
71
+ "loss": 2.7668,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 4.440727710723877,
77
+ "learning_rate": 1.5e-05,
78
+ "loss": 2.5467,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 4.219883918762207,
84
+ "learning_rate": 1.6562500000000003e-05,
85
+ "loss": 2.3237,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 3.6006925106048584,
91
+ "learning_rate": 1.8125e-05,
92
+ "loss": 2.1307,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.07859206199646,
98
+ "learning_rate": 1.96875e-05,
99
+ "loss": 1.9725,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 3.0515847206115723,
105
+ "learning_rate": 2.125e-05,
106
+ "loss": 1.8568,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.7134475708007812,
112
+ "learning_rate": 2.28125e-05,
113
+ "loss": 1.6583,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.7643449306488037,
119
+ "learning_rate": 2.4375000000000003e-05,
120
+ "loss": 1.4433,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.427220582962036,
126
+ "learning_rate": 2.59375e-05,
127
+ "loss": 1.3435,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.0288472175598145,
133
+ "learning_rate": 2.75e-05,
134
+ "loss": 1.2446,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.7967100143432617,
140
+ "learning_rate": 2.90625e-05,
141
+ "loss": 1.1031,
142
+ "step": 95
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.407944440841675,
147
+ "learning_rate": 3.0625e-05,
148
+ "loss": 0.9777,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 3.4412190914154053,
154
+ "learning_rate": 3.21875e-05,
155
+ "loss": 0.9294,
156
+ "step": 105
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.0439155101776123,
161
+ "learning_rate": 3.375e-05,
162
+ "loss": 0.8108,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.616652011871338,
168
+ "learning_rate": 3.53125e-05,
169
+ "loss": 0.6951,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.690824031829834,
175
+ "learning_rate": 3.6875e-05,
176
+ "loss": 0.6352,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 2.7902116775512695,
182
+ "learning_rate": 3.84375e-05,
183
+ "loss": 0.5562,
184
+ "step": 125
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.4872801303863525,
189
+ "learning_rate": 4e-05,
190
+ "loss": 0.5111,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 2.4177122116088867,
196
+ "learning_rate": 4.15625e-05,
197
+ "loss": 0.4696,
198
+ "step": 135
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 2.0231056213378906,
203
+ "learning_rate": 4.3125e-05,
204
+ "loss": 0.4462,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.004688024520874,
210
+ "learning_rate": 4.46875e-05,
211
+ "loss": 0.4224,
212
+ "step": 145
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 3.170652389526367,
217
+ "learning_rate": 4.625e-05,
218
+ "loss": 0.3967,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.8620476722717285,
224
+ "learning_rate": 4.7812500000000003e-05,
225
+ "loss": 0.3739,
226
+ "step": 155
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.4667856693267822,
231
+ "learning_rate": 4.9375e-05,
232
+ "loss": 0.3542,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 1.7935612201690674,
238
+ "learning_rate": 5.09375e-05,
239
+ "loss": 0.3409,
240
+ "step": 165
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.6230987310409546,
245
+ "learning_rate": 5.25e-05,
246
+ "loss": 0.3068,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 2.739957094192505,
252
+ "learning_rate": 5.40625e-05,
253
+ "loss": 0.2963,
254
+ "step": 175
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.7342944145202637,
259
+ "learning_rate": 5.5625000000000004e-05,
260
+ "loss": 0.253,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.0191333293914795,
266
+ "learning_rate": 5.71875e-05,
267
+ "loss": 0.2175,
268
+ "step": 185
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.6039254665374756,
273
+ "learning_rate": 5.875e-05,
274
+ "loss": 0.2009,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 2.2860054969787598,
280
+ "learning_rate": 6.03125e-05,
281
+ "loss": 0.1774,
282
+ "step": 195
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.528680443763733,
287
+ "learning_rate": 6.1875e-05,
288
+ "loss": 0.1603,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.526693344116211,
294
+ "learning_rate": 6.25e-05,
295
+ "loss": 0.1504,
296
+ "step": 205
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 2.199506998062134,
301
+ "learning_rate": 6.25e-05,
302
+ "loss": 0.1357,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.170020341873169,
308
+ "learning_rate": 6.25e-05,
309
+ "loss": 0.1519,
310
+ "step": 215
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.5418131351470947,
315
+ "learning_rate": 6.25e-05,
316
+ "loss": 0.1524,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 2.1583192348480225,
322
+ "learning_rate": 6.25e-05,
323
+ "loss": 0.1264,
324
+ "step": 225
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.908937692642212,
329
+ "learning_rate": 6.25e-05,
330
+ "loss": 0.1221,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.4072145223617554,
336
+ "learning_rate": 6.25e-05,
337
+ "loss": 0.1254,
338
+ "step": 235
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.3102571964263916,
343
+ "learning_rate": 6.25e-05,
344
+ "loss": 0.1412,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.4941678047180176,
350
+ "learning_rate": 6.25e-05,
351
+ "loss": 0.1203,
352
+ "step": 245
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.8279727697372437,
357
+ "learning_rate": 6.25e-05,
358
+ "loss": 0.1107,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "eval_cer": 0.07099835736448257,
364
+ "eval_loss": 0.11958163231611252,
365
+ "eval_runtime": 280.2233,
366
+ "eval_samples_per_second": 1.784,
367
+ "eval_steps_per_second": 0.446,
368
+ "step": 250
369
+ },
370
+ {
371
+ "epoch": 1.02,
372
+ "grad_norm": 1.0286716222763062,
373
+ "learning_rate": 6.25e-05,
374
+ "loss": 0.1047,
375
+ "step": 255
376
+ },
377
+ {
378
+ "epoch": 1.04,
379
+ "grad_norm": 1.7464964389801025,
380
+ "learning_rate": 6.25e-05,
381
+ "loss": 0.1108,
382
+ "step": 260
383
+ },
384
+ {
385
+ "epoch": 1.06,
386
+ "grad_norm": 1.3992992639541626,
387
+ "learning_rate": 6.25e-05,
388
+ "loss": 0.1176,
389
+ "step": 265
390
+ },
391
+ {
392
+ "epoch": 1.08,
393
+ "grad_norm": 1.150010347366333,
394
+ "learning_rate": 6.25e-05,
395
+ "loss": 0.1059,
396
+ "step": 270
397
+ },
398
+ {
399
+ "epoch": 1.1,
400
+ "grad_norm": 1.983775019645691,
401
+ "learning_rate": 6.25e-05,
402
+ "loss": 0.1223,
403
+ "step": 275
404
+ },
405
+ {
406
+ "epoch": 1.12,
407
+ "grad_norm": 1.0895100831985474,
408
+ "learning_rate": 6.25e-05,
409
+ "loss": 0.0929,
410
+ "step": 280
411
+ },
412
+ {
413
+ "epoch": 1.1400000000000001,
414
+ "grad_norm": 1.631362795829773,
415
+ "learning_rate": 6.25e-05,
416
+ "loss": 0.1005,
417
+ "step": 285
418
+ },
419
+ {
420
+ "epoch": 1.16,
421
+ "grad_norm": 1.39686918258667,
422
+ "learning_rate": 6.25e-05,
423
+ "loss": 0.1022,
424
+ "step": 290
425
+ },
426
+ {
427
+ "epoch": 1.18,
428
+ "grad_norm": 1.2856369018554688,
429
+ "learning_rate": 6.25e-05,
430
+ "loss": 0.0908,
431
+ "step": 295
432
+ },
433
+ {
434
+ "epoch": 1.2,
435
+ "grad_norm": 1.2866718769073486,
436
+ "learning_rate": 6.25e-05,
437
+ "loss": 0.103,
438
+ "step": 300
439
+ },
440
+ {
441
+ "epoch": 1.22,
442
+ "grad_norm": 1.1115745306015015,
443
+ "learning_rate": 6.25e-05,
444
+ "loss": 0.0934,
445
+ "step": 305
446
+ },
447
+ {
448
+ "epoch": 1.24,
449
+ "grad_norm": 1.2674397230148315,
450
+ "learning_rate": 6.25e-05,
451
+ "loss": 0.0807,
452
+ "step": 310
453
+ },
454
+ {
455
+ "epoch": 1.26,
456
+ "grad_norm": 3.003493547439575,
457
+ "learning_rate": 6.25e-05,
458
+ "loss": 0.1163,
459
+ "step": 315
460
+ },
461
+ {
462
+ "epoch": 1.28,
463
+ "grad_norm": 1.3472819328308105,
464
+ "learning_rate": 6.25e-05,
465
+ "loss": 0.0939,
466
+ "step": 320
467
+ },
468
+ {
469
+ "epoch": 1.3,
470
+ "grad_norm": 1.576393485069275,
471
+ "learning_rate": 6.25e-05,
472
+ "loss": 0.1023,
473
+ "step": 325
474
+ },
475
+ {
476
+ "epoch": 1.32,
477
+ "grad_norm": 1.2895311117172241,
478
+ "learning_rate": 6.25e-05,
479
+ "loss": 0.0908,
480
+ "step": 330
481
+ },
482
+ {
483
+ "epoch": 1.34,
484
+ "grad_norm": 1.7044769525527954,
485
+ "learning_rate": 6.25e-05,
486
+ "loss": 0.0849,
487
+ "step": 335
488
+ },
489
+ {
490
+ "epoch": 1.3599999999999999,
491
+ "grad_norm": 1.316157341003418,
492
+ "learning_rate": 6.25e-05,
493
+ "loss": 0.0865,
494
+ "step": 340
495
+ },
496
+ {
497
+ "epoch": 1.38,
498
+ "grad_norm": 1.4973046779632568,
499
+ "learning_rate": 6.25e-05,
500
+ "loss": 0.0864,
501
+ "step": 345
502
+ },
503
+ {
504
+ "epoch": 1.4,
505
+ "grad_norm": 0.7230541706085205,
506
+ "learning_rate": 6.25e-05,
507
+ "loss": 0.074,
508
+ "step": 350
509
+ },
510
+ {
511
+ "epoch": 1.42,
512
+ "grad_norm": 1.026584267616272,
513
+ "learning_rate": 6.25e-05,
514
+ "loss": 0.0788,
515
+ "step": 355
516
+ },
517
+ {
518
+ "epoch": 1.44,
519
+ "grad_norm": 1.3976407051086426,
520
+ "learning_rate": 6.25e-05,
521
+ "loss": 0.0862,
522
+ "step": 360
523
+ },
524
+ {
525
+ "epoch": 1.46,
526
+ "grad_norm": 1.3054964542388916,
527
+ "learning_rate": 6.25e-05,
528
+ "loss": 0.0757,
529
+ "step": 365
530
+ },
531
+ {
532
+ "epoch": 1.48,
533
+ "grad_norm": 1.8163508176803589,
534
+ "learning_rate": 6.25e-05,
535
+ "loss": 0.0822,
536
+ "step": 370
537
+ },
538
+ {
539
+ "epoch": 1.5,
540
+ "grad_norm": 1.23239004611969,
541
+ "learning_rate": 6.25e-05,
542
+ "loss": 0.0886,
543
+ "step": 375
544
+ },
545
+ {
546
+ "epoch": 1.52,
547
+ "grad_norm": 1.1320103406906128,
548
+ "learning_rate": 6.25e-05,
549
+ "loss": 0.0834,
550
+ "step": 380
551
+ },
552
+ {
553
+ "epoch": 1.54,
554
+ "grad_norm": 1.8913005590438843,
555
+ "learning_rate": 6.25e-05,
556
+ "loss": 0.0917,
557
+ "step": 385
558
+ },
559
+ {
560
+ "epoch": 1.56,
561
+ "grad_norm": 1.3522365093231201,
562
+ "learning_rate": 6.25e-05,
563
+ "loss": 0.0927,
564
+ "step": 390
565
+ },
566
+ {
567
+ "epoch": 1.58,
568
+ "grad_norm": 1.3687407970428467,
569
+ "learning_rate": 6.25e-05,
570
+ "loss": 0.0701,
571
+ "step": 395
572
+ },
573
+ {
574
+ "epoch": 1.6,
575
+ "grad_norm": 1.6905425786972046,
576
+ "learning_rate": 6.25e-05,
577
+ "loss": 0.0914,
578
+ "step": 400
579
+ },
580
+ {
581
+ "epoch": 1.62,
582
+ "grad_norm": 1.3366554975509644,
583
+ "learning_rate": 6.25e-05,
584
+ "loss": 0.0833,
585
+ "step": 405
586
+ },
587
+ {
588
+ "epoch": 1.6400000000000001,
589
+ "grad_norm": 1.0540807247161865,
590
+ "learning_rate": 6.25e-05,
591
+ "loss": 0.088,
592
+ "step": 410
593
+ },
594
+ {
595
+ "epoch": 1.6600000000000001,
596
+ "grad_norm": 1.0234986543655396,
597
+ "learning_rate": 6.25e-05,
598
+ "loss": 0.075,
599
+ "step": 415
600
+ },
601
+ {
602
+ "epoch": 1.6800000000000002,
603
+ "grad_norm": 1.3205113410949707,
604
+ "learning_rate": 6.25e-05,
605
+ "loss": 0.0931,
606
+ "step": 420
607
+ },
608
+ {
609
+ "epoch": 1.7,
610
+ "grad_norm": 0.8865799307823181,
611
+ "learning_rate": 6.25e-05,
612
+ "loss": 0.0831,
613
+ "step": 425
614
+ },
615
+ {
616
+ "epoch": 1.72,
617
+ "grad_norm": 1.2646653652191162,
618
+ "learning_rate": 6.25e-05,
619
+ "loss": 0.0825,
620
+ "step": 430
621
+ },
622
+ {
623
+ "epoch": 1.74,
624
+ "grad_norm": 0.9991198182106018,
625
+ "learning_rate": 6.25e-05,
626
+ "loss": 0.0825,
627
+ "step": 435
628
+ },
629
+ {
630
+ "epoch": 1.76,
631
+ "grad_norm": 0.8784312605857849,
632
+ "learning_rate": 6.25e-05,
633
+ "loss": 0.082,
634
+ "step": 440
635
+ },
636
+ {
637
+ "epoch": 1.78,
638
+ "grad_norm": 1.304877519607544,
639
+ "learning_rate": 6.25e-05,
640
+ "loss": 0.0775,
641
+ "step": 445
642
+ },
643
+ {
644
+ "epoch": 1.8,
645
+ "grad_norm": 1.2007408142089844,
646
+ "learning_rate": 6.25e-05,
647
+ "loss": 0.0796,
648
+ "step": 450
649
+ },
650
+ {
651
+ "epoch": 1.8199999999999998,
652
+ "grad_norm": 0.9978143572807312,
653
+ "learning_rate": 6.25e-05,
654
+ "loss": 0.0794,
655
+ "step": 455
656
+ },
657
+ {
658
+ "epoch": 1.8399999999999999,
659
+ "grad_norm": 1.2883387804031372,
660
+ "learning_rate": 6.25e-05,
661
+ "loss": 0.0744,
662
+ "step": 460
663
+ },
664
+ {
665
+ "epoch": 1.8599999999999999,
666
+ "grad_norm": 0.8542335629463196,
667
+ "learning_rate": 6.25e-05,
668
+ "loss": 0.074,
669
+ "step": 465
670
+ },
671
+ {
672
+ "epoch": 1.88,
673
+ "grad_norm": 1.0009572505950928,
674
+ "learning_rate": 6.25e-05,
675
+ "loss": 0.0823,
676
+ "step": 470
677
+ },
678
+ {
679
+ "epoch": 1.9,
680
+ "grad_norm": 1.126528263092041,
681
+ "learning_rate": 6.25e-05,
682
+ "loss": 0.0805,
683
+ "step": 475
684
+ },
685
+ {
686
+ "epoch": 1.92,
687
+ "grad_norm": 0.9336584210395813,
688
+ "learning_rate": 6.25e-05,
689
+ "loss": 0.0722,
690
+ "step": 480
691
+ },
692
+ {
693
+ "epoch": 1.94,
694
+ "grad_norm": 1.0387274026870728,
695
+ "learning_rate": 6.25e-05,
696
+ "loss": 0.0722,
697
+ "step": 485
698
+ },
699
+ {
700
+ "epoch": 1.96,
701
+ "grad_norm": 1.4692296981811523,
702
+ "learning_rate": 6.25e-05,
703
+ "loss": 0.071,
704
+ "step": 490
705
+ },
706
+ {
707
+ "epoch": 1.98,
708
+ "grad_norm": 0.9859362244606018,
709
+ "learning_rate": 6.25e-05,
710
+ "loss": 0.0795,
711
+ "step": 495
712
+ },
713
+ {
714
+ "epoch": 2.0,
715
+ "grad_norm": 1.0557219982147217,
716
+ "learning_rate": 6.25e-05,
717
+ "loss": 0.0741,
718
+ "step": 500
719
+ },
720
+ {
721
+ "epoch": 2.0,
722
+ "eval_cer": 0.05483273798659818,
723
+ "eval_loss": 0.0840950757265091,
724
+ "eval_runtime": 280.5964,
725
+ "eval_samples_per_second": 1.782,
726
+ "eval_steps_per_second": 0.445,
727
+ "step": 500
728
+ },
729
+ {
730
+ "epoch": 2.02,
731
+ "grad_norm": 1.1070911884307861,
732
+ "learning_rate": 6.25e-05,
733
+ "loss": 0.0552,
734
+ "step": 505
735
+ },
736
+ {
737
+ "epoch": 2.04,
738
+ "grad_norm": 0.7629583477973938,
739
+ "learning_rate": 6.25e-05,
740
+ "loss": 0.0613,
741
+ "step": 510
742
+ },
743
+ {
744
+ "epoch": 2.06,
745
+ "grad_norm": 1.3060976266860962,
746
+ "learning_rate": 6.25e-05,
747
+ "loss": 0.0746,
748
+ "step": 515
749
+ },
750
+ {
751
+ "epoch": 2.08,
752
+ "grad_norm": 1.067309021949768,
753
+ "learning_rate": 6.25e-05,
754
+ "loss": 0.0717,
755
+ "step": 520
756
+ },
757
+ {
758
+ "epoch": 2.1,
759
+ "grad_norm": 1.1334558725357056,
760
+ "learning_rate": 6.25e-05,
761
+ "loss": 0.0582,
762
+ "step": 525
763
+ },
764
+ {
765
+ "epoch": 2.12,
766
+ "grad_norm": 0.8565890192985535,
767
+ "learning_rate": 6.25e-05,
768
+ "loss": 0.066,
769
+ "step": 530
770
+ },
771
+ {
772
+ "epoch": 2.14,
773
+ "grad_norm": 1.042038083076477,
774
+ "learning_rate": 6.25e-05,
775
+ "loss": 0.0686,
776
+ "step": 535
777
+ },
778
+ {
779
+ "epoch": 2.16,
780
+ "grad_norm": 0.8787774443626404,
781
+ "learning_rate": 6.25e-05,
782
+ "loss": 0.0714,
783
+ "step": 540
784
+ },
785
+ {
786
+ "epoch": 2.18,
787
+ "grad_norm": 1.2969956398010254,
788
+ "learning_rate": 6.25e-05,
789
+ "loss": 0.069,
790
+ "step": 545
791
+ },
792
+ {
793
+ "epoch": 2.2,
794
+ "grad_norm": 1.0625072717666626,
795
+ "learning_rate": 6.25e-05,
796
+ "loss": 0.0701,
797
+ "step": 550
798
+ },
799
+ {
800
+ "epoch": 2.22,
801
+ "grad_norm": 1.1786212921142578,
802
+ "learning_rate": 6.25e-05,
803
+ "loss": 0.0672,
804
+ "step": 555
805
+ },
806
+ {
807
+ "epoch": 2.24,
808
+ "grad_norm": 0.7678006887435913,
809
+ "learning_rate": 6.25e-05,
810
+ "loss": 0.0637,
811
+ "step": 560
812
+ },
813
+ {
814
+ "epoch": 2.26,
815
+ "grad_norm": 1.087916612625122,
816
+ "learning_rate": 6.25e-05,
817
+ "loss": 0.0592,
818
+ "step": 565
819
+ },
820
+ {
821
+ "epoch": 2.2800000000000002,
822
+ "grad_norm": 1.0709354877471924,
823
+ "learning_rate": 6.25e-05,
824
+ "loss": 0.0641,
825
+ "step": 570
826
+ },
827
+ {
828
+ "epoch": 2.3,
829
+ "grad_norm": 0.9933990240097046,
830
+ "learning_rate": 6.25e-05,
831
+ "loss": 0.0724,
832
+ "step": 575
833
+ },
834
+ {
835
+ "epoch": 2.32,
836
+ "grad_norm": 0.9537047147750854,
837
+ "learning_rate": 6.25e-05,
838
+ "loss": 0.0565,
839
+ "step": 580
840
+ },
841
+ {
842
+ "epoch": 2.34,
843
+ "grad_norm": 0.8913723230361938,
844
+ "learning_rate": 6.25e-05,
845
+ "loss": 0.0601,
846
+ "step": 585
847
+ },
848
+ {
849
+ "epoch": 2.36,
850
+ "grad_norm": 1.4037823677062988,
851
+ "learning_rate": 6.25e-05,
852
+ "loss": 0.0656,
853
+ "step": 590
854
+ },
855
+ {
856
+ "epoch": 2.38,
857
+ "grad_norm": 0.8686001896858215,
858
+ "learning_rate": 6.25e-05,
859
+ "loss": 0.0617,
860
+ "step": 595
861
+ },
862
+ {
863
+ "epoch": 2.4,
864
+ "grad_norm": 1.1040139198303223,
865
+ "learning_rate": 6.25e-05,
866
+ "loss": 0.0612,
867
+ "step": 600
868
+ },
869
+ {
870
+ "epoch": 2.42,
871
+ "grad_norm": 0.8995397090911865,
872
+ "learning_rate": 6.25e-05,
873
+ "loss": 0.0528,
874
+ "step": 605
875
+ },
876
+ {
877
+ "epoch": 2.44,
878
+ "grad_norm": 1.0924474000930786,
879
+ "learning_rate": 6.25e-05,
880
+ "loss": 0.0551,
881
+ "step": 610
882
+ },
883
+ {
884
+ "epoch": 2.46,
885
+ "grad_norm": 1.0748484134674072,
886
+ "learning_rate": 6.25e-05,
887
+ "loss": 0.0573,
888
+ "step": 615
889
+ },
890
+ {
891
+ "epoch": 2.48,
892
+ "grad_norm": 0.8827953338623047,
893
+ "learning_rate": 6.25e-05,
894
+ "loss": 0.0595,
895
+ "step": 620
896
+ },
897
+ {
898
+ "epoch": 2.5,
899
+ "grad_norm": 0.8614113926887512,
900
+ "learning_rate": 6.25e-05,
901
+ "loss": 0.0694,
902
+ "step": 625
903
+ },
904
+ {
905
+ "epoch": 2.52,
906
+ "grad_norm": 0.6579775810241699,
907
+ "learning_rate": 6.25e-05,
908
+ "loss": 0.0608,
909
+ "step": 630
910
+ },
911
+ {
912
+ "epoch": 2.54,
913
+ "grad_norm": 0.923587441444397,
914
+ "learning_rate": 6.25e-05,
915
+ "loss": 0.0684,
916
+ "step": 635
917
+ },
918
+ {
919
+ "epoch": 2.56,
920
+ "grad_norm": 1.119313359260559,
921
+ "learning_rate": 6.25e-05,
922
+ "loss": 0.0651,
923
+ "step": 640
924
+ },
925
+ {
926
+ "epoch": 2.58,
927
+ "grad_norm": 1.1630853414535522,
928
+ "learning_rate": 6.25e-05,
929
+ "loss": 0.0773,
930
+ "step": 645
931
+ },
932
+ {
933
+ "epoch": 2.6,
934
+ "grad_norm": 0.9517636299133301,
935
+ "learning_rate": 6.25e-05,
936
+ "loss": 0.0574,
937
+ "step": 650
938
+ },
939
+ {
940
+ "epoch": 2.62,
941
+ "grad_norm": 0.767271101474762,
942
+ "learning_rate": 6.25e-05,
943
+ "loss": 0.071,
944
+ "step": 655
945
+ },
946
+ {
947
+ "epoch": 2.64,
948
+ "grad_norm": 1.3324207067489624,
949
+ "learning_rate": 6.25e-05,
950
+ "loss": 0.0672,
951
+ "step": 660
952
+ },
953
+ {
954
+ "epoch": 2.66,
955
+ "grad_norm": 0.8638308048248291,
956
+ "learning_rate": 6.25e-05,
957
+ "loss": 0.0602,
958
+ "step": 665
959
+ },
960
+ {
961
+ "epoch": 2.68,
962
+ "grad_norm": 0.9522351622581482,
963
+ "learning_rate": 6.25e-05,
964
+ "loss": 0.0626,
965
+ "step": 670
966
+ },
967
+ {
968
+ "epoch": 2.7,
969
+ "grad_norm": 0.7264077067375183,
970
+ "learning_rate": 6.25e-05,
971
+ "loss": 0.0654,
972
+ "step": 675
973
+ },
974
+ {
975
+ "epoch": 2.7199999999999998,
976
+ "grad_norm": 1.185275912284851,
977
+ "learning_rate": 6.25e-05,
978
+ "loss": 0.0638,
979
+ "step": 680
980
+ },
981
+ {
982
+ "epoch": 2.74,
983
+ "grad_norm": 1.549625277519226,
984
+ "learning_rate": 6.25e-05,
985
+ "loss": 0.0661,
986
+ "step": 685
987
+ },
988
+ {
989
+ "epoch": 2.76,
990
+ "grad_norm": 1.202415108680725,
991
+ "learning_rate": 6.25e-05,
992
+ "loss": 0.0709,
993
+ "step": 690
994
+ },
995
+ {
996
+ "epoch": 2.7800000000000002,
997
+ "grad_norm": 0.7902194857597351,
998
+ "learning_rate": 6.25e-05,
999
+ "loss": 0.0604,
1000
+ "step": 695
1001
+ },
1002
+ {
1003
+ "epoch": 2.8,
1004
+ "grad_norm": 1.0128028392791748,
1005
+ "learning_rate": 6.25e-05,
1006
+ "loss": 0.0612,
1007
+ "step": 700
1008
+ },
1009
+ {
1010
+ "epoch": 2.82,
1011
+ "grad_norm": 0.8418397903442383,
1012
+ "learning_rate": 6.25e-05,
1013
+ "loss": 0.0616,
1014
+ "step": 705
1015
+ },
1016
+ {
1017
+ "epoch": 2.84,
1018
+ "grad_norm": 0.9352026581764221,
1019
+ "learning_rate": 6.25e-05,
1020
+ "loss": 0.0635,
1021
+ "step": 710
1022
+ },
1023
+ {
1024
+ "epoch": 2.86,
1025
+ "grad_norm": 0.679918110370636,
1026
+ "learning_rate": 6.25e-05,
1027
+ "loss": 0.0588,
1028
+ "step": 715
1029
+ },
1030
+ {
1031
+ "epoch": 2.88,
1032
+ "grad_norm": 0.836438000202179,
1033
+ "learning_rate": 6.25e-05,
1034
+ "loss": 0.0635,
1035
+ "step": 720
1036
+ },
1037
+ {
1038
+ "epoch": 2.9,
1039
+ "grad_norm": 0.7643904089927673,
1040
+ "learning_rate": 6.25e-05,
1041
+ "loss": 0.0554,
1042
+ "step": 725
1043
+ },
1044
+ {
1045
+ "epoch": 2.92,
1046
+ "grad_norm": 0.9192042946815491,
1047
+ "learning_rate": 6.25e-05,
1048
+ "loss": 0.0541,
1049
+ "step": 730
1050
+ },
1051
+ {
1052
+ "epoch": 2.94,
1053
+ "grad_norm": 0.9899188280105591,
1054
+ "learning_rate": 6.25e-05,
1055
+ "loss": 0.0591,
1056
+ "step": 735
1057
+ },
1058
+ {
1059
+ "epoch": 2.96,
1060
+ "grad_norm": 1.112701654434204,
1061
+ "learning_rate": 6.25e-05,
1062
+ "loss": 0.0611,
1063
+ "step": 740
1064
+ },
1065
+ {
1066
+ "epoch": 2.98,
1067
+ "grad_norm": 0.9096015095710754,
1068
+ "learning_rate": 6.25e-05,
1069
+ "loss": 0.0594,
1070
+ "step": 745
1071
+ },
1072
+ {
1073
+ "epoch": 3.0,
1074
+ "grad_norm": 1.158527135848999,
1075
+ "learning_rate": 6.25e-05,
1076
+ "loss": 0.0703,
1077
+ "step": 750
1078
+ },
1079
+ {
1080
+ "epoch": 3.0,
1081
+ "eval_cer": 0.05350298542486898,
1082
+ "eval_loss": 0.07945344597101212,
1083
+ "eval_runtime": 281.5513,
1084
+ "eval_samples_per_second": 1.776,
1085
+ "eval_steps_per_second": 0.444,
1086
+ "step": 750
1087
+ },
1088
+ {
1089
+ "epoch": 3.02,
1090
+ "grad_norm": 0.8544594049453735,
1091
+ "learning_rate": 6.25e-05,
1092
+ "loss": 0.0461,
1093
+ "step": 755
1094
+ },
1095
+ {
1096
+ "epoch": 3.04,
1097
+ "grad_norm": 0.8411735892295837,
1098
+ "learning_rate": 6.25e-05,
1099
+ "loss": 0.0429,
1100
+ "step": 760
1101
+ },
1102
+ {
1103
+ "epoch": 3.06,
1104
+ "grad_norm": 0.7515286207199097,
1105
+ "learning_rate": 6.25e-05,
1106
+ "loss": 0.0559,
1107
+ "step": 765
1108
+ },
1109
+ {
1110
+ "epoch": 3.08,
1111
+ "grad_norm": 0.8125985264778137,
1112
+ "learning_rate": 6.25e-05,
1113
+ "loss": 0.044,
1114
+ "step": 770
1115
+ },
1116
+ {
1117
+ "epoch": 3.1,
1118
+ "grad_norm": 0.8093322515487671,
1119
+ "learning_rate": 6.25e-05,
1120
+ "loss": 0.0529,
1121
+ "step": 775
1122
+ },
1123
+ {
1124
+ "epoch": 3.12,
1125
+ "grad_norm": 0.8852378129959106,
1126
+ "learning_rate": 6.25e-05,
1127
+ "loss": 0.0508,
1128
+ "step": 780
1129
+ },
1130
+ {
1131
+ "epoch": 3.14,
1132
+ "grad_norm": 0.6388903856277466,
1133
+ "learning_rate": 6.25e-05,
1134
+ "loss": 0.0491,
1135
+ "step": 785
1136
+ },
1137
+ {
1138
+ "epoch": 3.16,
1139
+ "grad_norm": 0.9803158640861511,
1140
+ "learning_rate": 6.25e-05,
1141
+ "loss": 0.051,
1142
+ "step": 790
1143
+ },
1144
+ {
1145
+ "epoch": 3.18,
1146
+ "grad_norm": 1.163065791130066,
1147
+ "learning_rate": 6.25e-05,
1148
+ "loss": 0.0538,
1149
+ "step": 795
1150
+ },
1151
+ {
1152
+ "epoch": 3.2,
1153
+ "grad_norm": 0.942138671875,
1154
+ "learning_rate": 6.25e-05,
1155
+ "loss": 0.0548,
1156
+ "step": 800
1157
+ },
1158
+ {
1159
+ "epoch": 3.22,
1160
+ "grad_norm": 0.763847827911377,
1161
+ "learning_rate": 6.25e-05,
1162
+ "loss": 0.0497,
1163
+ "step": 805
1164
+ },
1165
+ {
1166
+ "epoch": 3.24,
1167
+ "grad_norm": 1.1041572093963623,
1168
+ "learning_rate": 6.25e-05,
1169
+ "loss": 0.0513,
1170
+ "step": 810
1171
+ },
1172
+ {
1173
+ "epoch": 3.26,
1174
+ "grad_norm": 0.8744838237762451,
1175
+ "learning_rate": 6.25e-05,
1176
+ "loss": 0.0574,
1177
+ "step": 815
1178
+ },
1179
+ {
1180
+ "epoch": 3.2800000000000002,
1181
+ "grad_norm": 0.8737279176712036,
1182
+ "learning_rate": 6.25e-05,
1183
+ "loss": 0.0485,
1184
+ "step": 820
1185
+ },
1186
+ {
1187
+ "epoch": 3.3,
1188
+ "grad_norm": 0.6367043256759644,
1189
+ "learning_rate": 6.25e-05,
1190
+ "loss": 0.0462,
1191
+ "step": 825
1192
+ },
1193
+ {
1194
+ "epoch": 3.32,
1195
+ "grad_norm": 0.7195335030555725,
1196
+ "learning_rate": 6.25e-05,
1197
+ "loss": 0.0529,
1198
+ "step": 830
1199
+ },
1200
+ {
1201
+ "epoch": 3.34,
1202
+ "grad_norm": 0.7411594986915588,
1203
+ "learning_rate": 6.25e-05,
1204
+ "loss": 0.0558,
1205
+ "step": 835
1206
+ },
1207
+ {
1208
+ "epoch": 3.36,
1209
+ "grad_norm": 0.5583875179290771,
1210
+ "learning_rate": 6.25e-05,
1211
+ "loss": 0.0498,
1212
+ "step": 840
1213
+ },
1214
+ {
1215
+ "epoch": 3.38,
1216
+ "grad_norm": 0.7013912796974182,
1217
+ "learning_rate": 6.25e-05,
1218
+ "loss": 0.0465,
1219
+ "step": 845
1220
+ },
1221
+ {
1222
+ "epoch": 3.4,
1223
+ "grad_norm": 1.1267294883728027,
1224
+ "learning_rate": 6.25e-05,
1225
+ "loss": 0.0505,
1226
+ "step": 850
1227
+ },
1228
+ {
1229
+ "epoch": 3.42,
1230
+ "grad_norm": 1.3056484460830688,
1231
+ "learning_rate": 6.25e-05,
1232
+ "loss": 0.0515,
1233
+ "step": 855
1234
+ },
1235
+ {
1236
+ "epoch": 3.44,
1237
+ "grad_norm": 1.182433843612671,
1238
+ "learning_rate": 6.25e-05,
1239
+ "loss": 0.0525,
1240
+ "step": 860
1241
+ },
1242
+ {
1243
+ "epoch": 3.46,
1244
+ "grad_norm": 0.8969308733940125,
1245
+ "learning_rate": 6.25e-05,
1246
+ "loss": 0.0517,
1247
+ "step": 865
1248
+ },
1249
+ {
1250
+ "epoch": 3.48,
1251
+ "grad_norm": 0.7779067158699036,
1252
+ "learning_rate": 6.25e-05,
1253
+ "loss": 0.0539,
1254
+ "step": 870
1255
+ },
1256
+ {
1257
+ "epoch": 3.5,
1258
+ "grad_norm": 0.591754674911499,
1259
+ "learning_rate": 6.25e-05,
1260
+ "loss": 0.0546,
1261
+ "step": 875
1262
+ },
1263
+ {
1264
+ "epoch": 3.52,
1265
+ "grad_norm": 0.8097557425498962,
1266
+ "learning_rate": 6.25e-05,
1267
+ "loss": 0.0529,
1268
+ "step": 880
1269
+ },
1270
+ {
1271
+ "epoch": 3.54,
1272
+ "grad_norm": 0.7054248452186584,
1273
+ "learning_rate": 6.25e-05,
1274
+ "loss": 0.0436,
1275
+ "step": 885
1276
+ },
1277
+ {
1278
+ "epoch": 3.56,
1279
+ "grad_norm": 0.5832129716873169,
1280
+ "learning_rate": 6.25e-05,
1281
+ "loss": 0.048,
1282
+ "step": 890
1283
+ },
1284
+ {
1285
+ "epoch": 3.58,
1286
+ "grad_norm": 0.8104725480079651,
1287
+ "learning_rate": 6.25e-05,
1288
+ "loss": 0.0503,
1289
+ "step": 895
1290
+ },
1291
+ {
1292
+ "epoch": 3.6,
1293
+ "grad_norm": 0.9961804151535034,
1294
+ "learning_rate": 6.25e-05,
1295
+ "loss": 0.0565,
1296
+ "step": 900
1297
+ },
1298
+ {
1299
+ "epoch": 3.62,
1300
+ "grad_norm": 0.8466907143592834,
1301
+ "learning_rate": 6.25e-05,
1302
+ "loss": 0.054,
1303
+ "step": 905
1304
+ },
1305
+ {
1306
+ "epoch": 3.64,
1307
+ "grad_norm": 0.8867480158805847,
1308
+ "learning_rate": 6.25e-05,
1309
+ "loss": 0.0547,
1310
+ "step": 910
1311
+ },
1312
+ {
1313
+ "epoch": 3.66,
1314
+ "grad_norm": 0.9030736684799194,
1315
+ "learning_rate": 6.25e-05,
1316
+ "loss": 0.0481,
1317
+ "step": 915
1318
+ },
1319
+ {
1320
+ "epoch": 3.68,
1321
+ "grad_norm": 0.6740151643753052,
1322
+ "learning_rate": 6.25e-05,
1323
+ "loss": 0.0529,
1324
+ "step": 920
1325
+ },
1326
+ {
1327
+ "epoch": 3.7,
1328
+ "grad_norm": 0.653508722782135,
1329
+ "learning_rate": 6.25e-05,
1330
+ "loss": 0.0633,
1331
+ "step": 925
1332
+ },
1333
+ {
1334
+ "epoch": 3.7199999999999998,
1335
+ "grad_norm": 0.7304302453994751,
1336
+ "learning_rate": 6.25e-05,
1337
+ "loss": 0.0493,
1338
+ "step": 930
1339
+ },
1340
+ {
1341
+ "epoch": 3.74,
1342
+ "grad_norm": 0.8343582153320312,
1343
+ "learning_rate": 6.25e-05,
1344
+ "loss": 0.059,
1345
+ "step": 935
1346
+ },
1347
+ {
1348
+ "epoch": 3.76,
1349
+ "grad_norm": 0.8459467887878418,
1350
+ "learning_rate": 6.25e-05,
1351
+ "loss": 0.0531,
1352
+ "step": 940
1353
+ },
1354
+ {
1355
+ "epoch": 3.7800000000000002,
1356
+ "grad_norm": 0.7470009326934814,
1357
+ "learning_rate": 6.25e-05,
1358
+ "loss": 0.0548,
1359
+ "step": 945
1360
+ },
1361
+ {
1362
+ "epoch": 3.8,
1363
+ "grad_norm": 0.8183557987213135,
1364
+ "learning_rate": 6.25e-05,
1365
+ "loss": 0.0471,
1366
+ "step": 950
1367
+ },
1368
+ {
1369
+ "epoch": 3.82,
1370
+ "grad_norm": 0.9448140263557434,
1371
+ "learning_rate": 6.25e-05,
1372
+ "loss": 0.045,
1373
+ "step": 955
1374
+ },
1375
+ {
1376
+ "epoch": 3.84,
1377
+ "grad_norm": 0.7056401371955872,
1378
+ "learning_rate": 6.25e-05,
1379
+ "loss": 0.045,
1380
+ "step": 960
1381
+ },
1382
+ {
1383
+ "epoch": 3.86,
1384
+ "grad_norm": 0.7785059213638306,
1385
+ "learning_rate": 6.25e-05,
1386
+ "loss": 0.0554,
1387
+ "step": 965
1388
+ },
1389
+ {
1390
+ "epoch": 3.88,
1391
+ "grad_norm": 0.8976256251335144,
1392
+ "learning_rate": 6.25e-05,
1393
+ "loss": 0.0529,
1394
+ "step": 970
1395
+ },
1396
+ {
1397
+ "epoch": 3.9,
1398
+ "grad_norm": 1.0849542617797852,
1399
+ "learning_rate": 6.25e-05,
1400
+ "loss": 0.0457,
1401
+ "step": 975
1402
+ },
1403
+ {
1404
+ "epoch": 3.92,
1405
+ "grad_norm": 1.1612681150436401,
1406
+ "learning_rate": 6.25e-05,
1407
+ "loss": 0.0513,
1408
+ "step": 980
1409
+ },
1410
+ {
1411
+ "epoch": 3.94,
1412
+ "grad_norm": 0.6912779211997986,
1413
+ "learning_rate": 6.25e-05,
1414
+ "loss": 0.0469,
1415
+ "step": 985
1416
+ },
1417
+ {
1418
+ "epoch": 3.96,
1419
+ "grad_norm": 0.7129920125007629,
1420
+ "learning_rate": 6.25e-05,
1421
+ "loss": 0.0509,
1422
+ "step": 990
1423
+ },
1424
+ {
1425
+ "epoch": 3.98,
1426
+ "grad_norm": 0.6439591646194458,
1427
+ "learning_rate": 6.25e-05,
1428
+ "loss": 0.0412,
1429
+ "step": 995
1430
+ },
1431
+ {
1432
+ "epoch": 4.0,
1433
+ "grad_norm": 0.7044887542724609,
1434
+ "learning_rate": 6.25e-05,
1435
+ "loss": 0.0558,
1436
+ "step": 1000
1437
+ },
1438
+ {
1439
+ "epoch": 4.0,
1440
+ "eval_cer": 0.046749928297655986,
1441
+ "eval_loss": 0.07047422975301743,
1442
+ "eval_runtime": 280.6209,
1443
+ "eval_samples_per_second": 1.782,
1444
+ "eval_steps_per_second": 0.445,
1445
+ "step": 1000
1446
+ },
1447
+ {
1448
+ "epoch": 4.02,
1449
+ "grad_norm": 0.6291618943214417,
1450
+ "learning_rate": 6.25e-05,
1451
+ "loss": 0.0432,
1452
+ "step": 1005
1453
+ },
1454
+ {
1455
+ "epoch": 4.04,
1456
+ "grad_norm": 0.5485780239105225,
1457
+ "learning_rate": 6.25e-05,
1458
+ "loss": 0.0459,
1459
+ "step": 1010
1460
+ },
1461
+ {
1462
+ "epoch": 4.06,
1463
+ "grad_norm": 0.5912005305290222,
1464
+ "learning_rate": 6.25e-05,
1465
+ "loss": 0.0416,
1466
+ "step": 1015
1467
+ },
1468
+ {
1469
+ "epoch": 4.08,
1470
+ "grad_norm": 0.5929523706436157,
1471
+ "learning_rate": 6.25e-05,
1472
+ "loss": 0.0358,
1473
+ "step": 1020
1474
+ },
1475
+ {
1476
+ "epoch": 4.1,
1477
+ "grad_norm": 0.4929662346839905,
1478
+ "learning_rate": 6.25e-05,
1479
+ "loss": 0.0389,
1480
+ "step": 1025
1481
+ },
1482
+ {
1483
+ "epoch": 4.12,
1484
+ "grad_norm": 0.6707394123077393,
1485
+ "learning_rate": 6.25e-05,
1486
+ "loss": 0.0388,
1487
+ "step": 1030
1488
+ },
1489
+ {
1490
+ "epoch": 4.14,
1491
+ "grad_norm": 0.9774329662322998,
1492
+ "learning_rate": 6.25e-05,
1493
+ "loss": 0.0401,
1494
+ "step": 1035
1495
+ },
1496
+ {
1497
+ "epoch": 4.16,
1498
+ "grad_norm": 0.6821659803390503,
1499
+ "learning_rate": 6.25e-05,
1500
+ "loss": 0.0403,
1501
+ "step": 1040
1502
+ },
1503
+ {
1504
+ "epoch": 4.18,
1505
+ "grad_norm": 0.796459436416626,
1506
+ "learning_rate": 6.25e-05,
1507
+ "loss": 0.0425,
1508
+ "step": 1045
1509
+ },
1510
+ {
1511
+ "epoch": 4.2,
1512
+ "grad_norm": 0.6956031918525696,
1513
+ "learning_rate": 6.25e-05,
1514
+ "loss": 0.0475,
1515
+ "step": 1050
1516
+ },
1517
+ {
1518
+ "epoch": 4.22,
1519
+ "grad_norm": 0.7577043175697327,
1520
+ "learning_rate": 6.25e-05,
1521
+ "loss": 0.0483,
1522
+ "step": 1055
1523
+ },
1524
+ {
1525
+ "epoch": 4.24,
1526
+ "grad_norm": 0.5384642481803894,
1527
+ "learning_rate": 6.25e-05,
1528
+ "loss": 0.0372,
1529
+ "step": 1060
1530
+ },
1531
+ {
1532
+ "epoch": 4.26,
1533
+ "grad_norm": 0.791437566280365,
1534
+ "learning_rate": 6.25e-05,
1535
+ "loss": 0.0485,
1536
+ "step": 1065
1537
+ },
1538
+ {
1539
+ "epoch": 4.28,
1540
+ "grad_norm": 0.5820832252502441,
1541
+ "learning_rate": 6.25e-05,
1542
+ "loss": 0.0466,
1543
+ "step": 1070
1544
+ },
1545
+ {
1546
+ "epoch": 4.3,
1547
+ "grad_norm": 0.9597232341766357,
1548
+ "learning_rate": 6.25e-05,
1549
+ "loss": 0.0437,
1550
+ "step": 1075
1551
+ },
1552
+ {
1553
+ "epoch": 4.32,
1554
+ "grad_norm": 0.9876553416252136,
1555
+ "learning_rate": 6.25e-05,
1556
+ "loss": 0.05,
1557
+ "step": 1080
1558
+ },
1559
+ {
1560
+ "epoch": 4.34,
1561
+ "grad_norm": 0.6902226805686951,
1562
+ "learning_rate": 6.25e-05,
1563
+ "loss": 0.0401,
1564
+ "step": 1085
1565
+ },
1566
+ {
1567
+ "epoch": 4.36,
1568
+ "grad_norm": 0.5399324893951416,
1569
+ "learning_rate": 6.25e-05,
1570
+ "loss": 0.043,
1571
+ "step": 1090
1572
+ },
1573
+ {
1574
+ "epoch": 4.38,
1575
+ "grad_norm": 0.7499954700469971,
1576
+ "learning_rate": 6.25e-05,
1577
+ "loss": 0.0426,
1578
+ "step": 1095
1579
+ },
1580
+ {
1581
+ "epoch": 4.4,
1582
+ "grad_norm": 0.7145591378211975,
1583
+ "learning_rate": 6.25e-05,
1584
+ "loss": 0.0503,
1585
+ "step": 1100
1586
+ },
1587
+ {
1588
+ "epoch": 4.42,
1589
+ "grad_norm": 0.5746826529502869,
1590
+ "learning_rate": 6.25e-05,
1591
+ "loss": 0.0383,
1592
+ "step": 1105
1593
+ },
1594
+ {
1595
+ "epoch": 4.44,
1596
+ "grad_norm": 0.7018007040023804,
1597
+ "learning_rate": 6.25e-05,
1598
+ "loss": 0.0466,
1599
+ "step": 1110
1600
+ },
1601
+ {
1602
+ "epoch": 4.46,
1603
+ "grad_norm": 0.6607512831687927,
1604
+ "learning_rate": 6.25e-05,
1605
+ "loss": 0.038,
1606
+ "step": 1115
1607
+ },
1608
+ {
1609
+ "epoch": 4.48,
1610
+ "grad_norm": 0.5863096714019775,
1611
+ "learning_rate": 6.25e-05,
1612
+ "loss": 0.0462,
1613
+ "step": 1120
1614
+ },
1615
+ {
1616
+ "epoch": 4.5,
1617
+ "grad_norm": 0.674934983253479,
1618
+ "learning_rate": 6.25e-05,
1619
+ "loss": 0.0523,
1620
+ "step": 1125
1621
+ },
1622
+ {
1623
+ "epoch": 4.52,
1624
+ "grad_norm": 0.7824676036834717,
1625
+ "learning_rate": 6.25e-05,
1626
+ "loss": 0.0467,
1627
+ "step": 1130
1628
+ },
1629
+ {
1630
+ "epoch": 4.54,
1631
+ "grad_norm": 1.4591455459594727,
1632
+ "learning_rate": 6.25e-05,
1633
+ "loss": 0.0485,
1634
+ "step": 1135
1635
+ },
1636
+ {
1637
+ "epoch": 4.5600000000000005,
1638
+ "grad_norm": 0.6413418650627136,
1639
+ "learning_rate": 6.25e-05,
1640
+ "loss": 0.0435,
1641
+ "step": 1140
1642
+ },
1643
+ {
1644
+ "epoch": 4.58,
1645
+ "grad_norm": 0.5044887065887451,
1646
+ "learning_rate": 6.25e-05,
1647
+ "loss": 0.0432,
1648
+ "step": 1145
1649
+ },
1650
+ {
1651
+ "epoch": 4.6,
1652
+ "grad_norm": 0.4768076539039612,
1653
+ "learning_rate": 6.25e-05,
1654
+ "loss": 0.0422,
1655
+ "step": 1150
1656
+ },
1657
+ {
1658
+ "epoch": 4.62,
1659
+ "grad_norm": 0.7008136510848999,
1660
+ "learning_rate": 6.25e-05,
1661
+ "loss": 0.045,
1662
+ "step": 1155
1663
+ },
1664
+ {
1665
+ "epoch": 4.64,
1666
+ "grad_norm": 1.1213037967681885,
1667
+ "learning_rate": 6.25e-05,
1668
+ "loss": 0.0469,
1669
+ "step": 1160
1670
+ },
1671
+ {
1672
+ "epoch": 4.66,
1673
+ "grad_norm": 0.6898444890975952,
1674
+ "learning_rate": 6.25e-05,
1675
+ "loss": 0.0398,
1676
+ "step": 1165
1677
+ },
1678
+ {
1679
+ "epoch": 4.68,
1680
+ "grad_norm": 0.6885802149772644,
1681
+ "learning_rate": 6.25e-05,
1682
+ "loss": 0.0475,
1683
+ "step": 1170
1684
+ },
1685
+ {
1686
+ "epoch": 4.7,
1687
+ "grad_norm": 0.644440770149231,
1688
+ "learning_rate": 6.25e-05,
1689
+ "loss": 0.0403,
1690
+ "step": 1175
1691
+ },
1692
+ {
1693
+ "epoch": 4.72,
1694
+ "grad_norm": 0.6610418558120728,
1695
+ "learning_rate": 6.25e-05,
1696
+ "loss": 0.0415,
1697
+ "step": 1180
1698
+ },
1699
+ {
1700
+ "epoch": 4.74,
1701
+ "grad_norm": 0.7127951979637146,
1702
+ "learning_rate": 6.25e-05,
1703
+ "loss": 0.0466,
1704
+ "step": 1185
1705
+ },
1706
+ {
1707
+ "epoch": 4.76,
1708
+ "grad_norm": 0.7608262300491333,
1709
+ "learning_rate": 6.25e-05,
1710
+ "loss": 0.0398,
1711
+ "step": 1190
1712
+ },
1713
+ {
1714
+ "epoch": 4.78,
1715
+ "grad_norm": 0.6554054021835327,
1716
+ "learning_rate": 6.25e-05,
1717
+ "loss": 0.0395,
1718
+ "step": 1195
1719
+ },
1720
+ {
1721
+ "epoch": 4.8,
1722
+ "grad_norm": 0.7710177302360535,
1723
+ "learning_rate": 6.25e-05,
1724
+ "loss": 0.0412,
1725
+ "step": 1200
1726
+ },
1727
+ {
1728
+ "epoch": 4.82,
1729
+ "grad_norm": 0.5044788718223572,
1730
+ "learning_rate": 6.25e-05,
1731
+ "loss": 0.0378,
1732
+ "step": 1205
1733
+ },
1734
+ {
1735
+ "epoch": 4.84,
1736
+ "grad_norm": 0.4640452265739441,
1737
+ "learning_rate": 6.25e-05,
1738
+ "loss": 0.0394,
1739
+ "step": 1210
1740
+ },
1741
+ {
1742
+ "epoch": 4.86,
1743
+ "grad_norm": 0.6121119260787964,
1744
+ "learning_rate": 6.25e-05,
1745
+ "loss": 0.0373,
1746
+ "step": 1215
1747
+ },
1748
+ {
1749
+ "epoch": 4.88,
1750
+ "grad_norm": 0.7307333946228027,
1751
+ "learning_rate": 6.25e-05,
1752
+ "loss": 0.0462,
1753
+ "step": 1220
1754
+ },
1755
+ {
1756
+ "epoch": 4.9,
1757
+ "grad_norm": 0.841369092464447,
1758
+ "learning_rate": 6.25e-05,
1759
+ "loss": 0.0433,
1760
+ "step": 1225
1761
+ },
1762
+ {
1763
+ "epoch": 4.92,
1764
+ "grad_norm": 0.48274680972099304,
1765
+ "learning_rate": 6.25e-05,
1766
+ "loss": 0.0481,
1767
+ "step": 1230
1768
+ },
1769
+ {
1770
+ "epoch": 4.9399999999999995,
1771
+ "grad_norm": 0.6552777290344238,
1772
+ "learning_rate": 6.25e-05,
1773
+ "loss": 0.0449,
1774
+ "step": 1235
1775
+ },
1776
+ {
1777
+ "epoch": 4.96,
1778
+ "grad_norm": 1.0837739706039429,
1779
+ "learning_rate": 6.25e-05,
1780
+ "loss": 0.0465,
1781
+ "step": 1240
1782
+ },
1783
+ {
1784
+ "epoch": 4.98,
1785
+ "grad_norm": 0.7444823384284973,
1786
+ "learning_rate": 6.25e-05,
1787
+ "loss": 0.0513,
1788
+ "step": 1245
1789
+ },
1790
+ {
1791
+ "epoch": 5.0,
1792
+ "grad_norm": 0.561403214931488,
1793
+ "learning_rate": 6.25e-05,
1794
+ "loss": 0.0458,
1795
+ "step": 1250
1796
+ },
1797
+ {
1798
+ "epoch": 5.0,
1799
+ "eval_cer": 0.07253669856334576,
1800
+ "eval_loss": 0.06918226927518845,
1801
+ "eval_runtime": 282.8276,
1802
+ "eval_samples_per_second": 1.768,
1803
+ "eval_steps_per_second": 0.442,
1804
+ "step": 1250
1805
+ },
1806
+ {
1807
+ "epoch": 5.02,
1808
+ "grad_norm": 0.67482990026474,
1809
+ "learning_rate": 6.25e-05,
1810
+ "loss": 0.037,
1811
+ "step": 1255
1812
+ },
1813
+ {
1814
+ "epoch": 5.04,
1815
+ "grad_norm": 0.6839190721511841,
1816
+ "learning_rate": 6.25e-05,
1817
+ "loss": 0.0445,
1818
+ "step": 1260
1819
+ },
1820
+ {
1821
+ "epoch": 5.06,
1822
+ "grad_norm": 0.8001631498336792,
1823
+ "learning_rate": 6.25e-05,
1824
+ "loss": 0.0386,
1825
+ "step": 1265
1826
+ },
1827
+ {
1828
+ "epoch": 5.08,
1829
+ "grad_norm": 0.8353962898254395,
1830
+ "learning_rate": 6.25e-05,
1831
+ "loss": 0.0407,
1832
+ "step": 1270
1833
+ },
1834
+ {
1835
+ "epoch": 5.1,
1836
+ "grad_norm": 0.556709885597229,
1837
+ "learning_rate": 6.25e-05,
1838
+ "loss": 0.0355,
1839
+ "step": 1275
1840
+ },
1841
+ {
1842
+ "epoch": 5.12,
1843
+ "grad_norm": 0.5634174942970276,
1844
+ "learning_rate": 6.25e-05,
1845
+ "loss": 0.0322,
1846
+ "step": 1280
1847
+ },
1848
+ {
1849
+ "epoch": 5.14,
1850
+ "grad_norm": 0.6530662775039673,
1851
+ "learning_rate": 6.25e-05,
1852
+ "loss": 0.041,
1853
+ "step": 1285
1854
+ },
1855
+ {
1856
+ "epoch": 5.16,
1857
+ "grad_norm": 0.5771991610527039,
1858
+ "learning_rate": 6.25e-05,
1859
+ "loss": 0.0375,
1860
+ "step": 1290
1861
+ },
1862
+ {
1863
+ "epoch": 5.18,
1864
+ "grad_norm": 0.5936269164085388,
1865
+ "learning_rate": 6.25e-05,
1866
+ "loss": 0.0362,
1867
+ "step": 1295
1868
+ },
1869
+ {
1870
+ "epoch": 5.2,
1871
+ "grad_norm": 0.6964532136917114,
1872
+ "learning_rate": 6.25e-05,
1873
+ "loss": 0.0361,
1874
+ "step": 1300
1875
+ },
1876
+ {
1877
+ "epoch": 5.22,
1878
+ "grad_norm": 1.0432935953140259,
1879
+ "learning_rate": 6.25e-05,
1880
+ "loss": 0.0346,
1881
+ "step": 1305
1882
+ },
1883
+ {
1884
+ "epoch": 5.24,
1885
+ "grad_norm": 0.6481297016143799,
1886
+ "learning_rate": 6.25e-05,
1887
+ "loss": 0.0351,
1888
+ "step": 1310
1889
+ },
1890
+ {
1891
+ "epoch": 5.26,
1892
+ "grad_norm": 0.9188110828399658,
1893
+ "learning_rate": 6.25e-05,
1894
+ "loss": 0.0378,
1895
+ "step": 1315
1896
+ },
1897
+ {
1898
+ "epoch": 5.28,
1899
+ "grad_norm": 0.4248051345348358,
1900
+ "learning_rate": 6.25e-05,
1901
+ "loss": 0.0296,
1902
+ "step": 1320
1903
+ },
1904
+ {
1905
+ "epoch": 5.3,
1906
+ "grad_norm": 0.5334679484367371,
1907
+ "learning_rate": 6.25e-05,
1908
+ "loss": 0.0397,
1909
+ "step": 1325
1910
+ },
1911
+ {
1912
+ "epoch": 5.32,
1913
+ "grad_norm": 0.7321200370788574,
1914
+ "learning_rate": 6.25e-05,
1915
+ "loss": 0.0414,
1916
+ "step": 1330
1917
+ },
1918
+ {
1919
+ "epoch": 5.34,
1920
+ "grad_norm": 0.5322144627571106,
1921
+ "learning_rate": 6.25e-05,
1922
+ "loss": 0.0381,
1923
+ "step": 1335
1924
+ },
1925
+ {
1926
+ "epoch": 5.36,
1927
+ "grad_norm": 0.8044850826263428,
1928
+ "learning_rate": 6.25e-05,
1929
+ "loss": 0.0348,
1930
+ "step": 1340
1931
+ },
1932
+ {
1933
+ "epoch": 5.38,
1934
+ "grad_norm": 0.6011214256286621,
1935
+ "learning_rate": 6.25e-05,
1936
+ "loss": 0.0379,
1937
+ "step": 1345
1938
+ },
1939
+ {
1940
+ "epoch": 5.4,
1941
+ "grad_norm": 0.7421667575836182,
1942
+ "learning_rate": 6.25e-05,
1943
+ "loss": 0.0379,
1944
+ "step": 1350
1945
+ },
1946
+ {
1947
+ "epoch": 5.42,
1948
+ "grad_norm": 0.4418427348136902,
1949
+ "learning_rate": 6.25e-05,
1950
+ "loss": 0.0328,
1951
+ "step": 1355
1952
+ },
1953
+ {
1954
+ "epoch": 5.44,
1955
+ "grad_norm": 0.6037031412124634,
1956
+ "learning_rate": 6.25e-05,
1957
+ "loss": 0.0351,
1958
+ "step": 1360
1959
+ },
1960
+ {
1961
+ "epoch": 5.46,
1962
+ "grad_norm": 0.7416286468505859,
1963
+ "learning_rate": 6.25e-05,
1964
+ "loss": 0.0344,
1965
+ "step": 1365
1966
+ },
1967
+ {
1968
+ "epoch": 5.48,
1969
+ "grad_norm": 0.9417647123336792,
1970
+ "learning_rate": 6.25e-05,
1971
+ "loss": 0.0504,
1972
+ "step": 1370
1973
+ },
1974
+ {
1975
+ "epoch": 5.5,
1976
+ "grad_norm": 0.5485287308692932,
1977
+ "learning_rate": 6.25e-05,
1978
+ "loss": 0.0383,
1979
+ "step": 1375
1980
+ },
1981
+ {
1982
+ "epoch": 5.52,
1983
+ "grad_norm": 0.647965133190155,
1984
+ "learning_rate": 6.25e-05,
1985
+ "loss": 0.0388,
1986
+ "step": 1380
1987
+ },
1988
+ {
1989
+ "epoch": 5.54,
1990
+ "grad_norm": 0.7375500202178955,
1991
+ "learning_rate": 6.25e-05,
1992
+ "loss": 0.0368,
1993
+ "step": 1385
1994
+ },
1995
+ {
1996
+ "epoch": 5.5600000000000005,
1997
+ "grad_norm": 0.7219087481498718,
1998
+ "learning_rate": 6.25e-05,
1999
+ "loss": 0.0497,
2000
+ "step": 1390
2001
+ },
2002
+ {
2003
+ "epoch": 5.58,
2004
+ "grad_norm": 0.49959471821784973,
2005
+ "learning_rate": 6.25e-05,
2006
+ "loss": 0.038,
2007
+ "step": 1395
2008
+ },
2009
+ {
2010
+ "epoch": 5.6,
2011
+ "grad_norm": 0.5299109816551208,
2012
+ "learning_rate": 6.25e-05,
2013
+ "loss": 0.0303,
2014
+ "step": 1400
2015
+ },
2016
+ {
2017
+ "epoch": 5.62,
2018
+ "grad_norm": 0.48730289936065674,
2019
+ "learning_rate": 6.25e-05,
2020
+ "loss": 0.0337,
2021
+ "step": 1405
2022
+ },
2023
+ {
2024
+ "epoch": 5.64,
2025
+ "grad_norm": 0.3811701834201813,
2026
+ "learning_rate": 6.25e-05,
2027
+ "loss": 0.0367,
2028
+ "step": 1410
2029
+ },
2030
+ {
2031
+ "epoch": 5.66,
2032
+ "grad_norm": 0.4611757695674896,
2033
+ "learning_rate": 6.25e-05,
2034
+ "loss": 0.0396,
2035
+ "step": 1415
2036
+ },
2037
+ {
2038
+ "epoch": 5.68,
2039
+ "grad_norm": 0.5509118437767029,
2040
+ "learning_rate": 6.25e-05,
2041
+ "loss": 0.0397,
2042
+ "step": 1420
2043
+ },
2044
+ {
2045
+ "epoch": 5.7,
2046
+ "grad_norm": 0.8130658268928528,
2047
+ "learning_rate": 6.25e-05,
2048
+ "loss": 0.0346,
2049
+ "step": 1425
2050
+ },
2051
+ {
2052
+ "epoch": 5.72,
2053
+ "grad_norm": 0.4248274266719818,
2054
+ "learning_rate": 6.25e-05,
2055
+ "loss": 0.0334,
2056
+ "step": 1430
2057
+ },
2058
+ {
2059
+ "epoch": 5.74,
2060
+ "grad_norm": 1.1918326616287231,
2061
+ "learning_rate": 6.25e-05,
2062
+ "loss": 0.041,
2063
+ "step": 1435
2064
+ },
2065
+ {
2066
+ "epoch": 5.76,
2067
+ "grad_norm": 0.6501240730285645,
2068
+ "learning_rate": 6.25e-05,
2069
+ "loss": 0.0423,
2070
+ "step": 1440
2071
+ },
2072
+ {
2073
+ "epoch": 5.78,
2074
+ "grad_norm": 1.216350793838501,
2075
+ "learning_rate": 6.25e-05,
2076
+ "loss": 0.0366,
2077
+ "step": 1445
2078
+ },
2079
+ {
2080
+ "epoch": 5.8,
2081
+ "grad_norm": 0.48442235589027405,
2082
+ "learning_rate": 6.25e-05,
2083
+ "loss": 0.0335,
2084
+ "step": 1450
2085
+ },
2086
+ {
2087
+ "epoch": 5.82,
2088
+ "grad_norm": 0.5834723711013794,
2089
+ "learning_rate": 6.25e-05,
2090
+ "loss": 0.0414,
2091
+ "step": 1455
2092
+ },
2093
+ {
2094
+ "epoch": 5.84,
2095
+ "grad_norm": 0.7862647771835327,
2096
+ "learning_rate": 6.25e-05,
2097
+ "loss": 0.0438,
2098
+ "step": 1460
2099
+ },
2100
+ {
2101
+ "epoch": 5.86,
2102
+ "grad_norm": 0.8282245397567749,
2103
+ "learning_rate": 6.25e-05,
2104
+ "loss": 0.0384,
2105
+ "step": 1465
2106
+ },
2107
+ {
2108
+ "epoch": 5.88,
2109
+ "grad_norm": 0.8185272812843323,
2110
+ "learning_rate": 6.25e-05,
2111
+ "loss": 0.0386,
2112
+ "step": 1470
2113
+ },
2114
+ {
2115
+ "epoch": 5.9,
2116
+ "grad_norm": 0.6197579503059387,
2117
+ "learning_rate": 6.25e-05,
2118
+ "loss": 0.036,
2119
+ "step": 1475
2120
+ },
2121
+ {
2122
+ "epoch": 5.92,
2123
+ "grad_norm": 0.5256204009056091,
2124
+ "learning_rate": 6.25e-05,
2125
+ "loss": 0.0331,
2126
+ "step": 1480
2127
+ },
2128
+ {
2129
+ "epoch": 5.9399999999999995,
2130
+ "grad_norm": 0.5693526864051819,
2131
+ "learning_rate": 6.25e-05,
2132
+ "loss": 0.0404,
2133
+ "step": 1485
2134
+ },
2135
+ {
2136
+ "epoch": 5.96,
2137
+ "grad_norm": 0.505524754524231,
2138
+ "learning_rate": 6.25e-05,
2139
+ "loss": 0.0345,
2140
+ "step": 1490
2141
+ },
2142
+ {
2143
+ "epoch": 5.98,
2144
+ "grad_norm": 0.7480014562606812,
2145
+ "learning_rate": 6.25e-05,
2146
+ "loss": 0.0421,
2147
+ "step": 1495
2148
+ },
2149
+ {
2150
+ "epoch": 6.0,
2151
+ "grad_norm": 0.6769825220108032,
2152
+ "learning_rate": 6.25e-05,
2153
+ "loss": 0.0364,
2154
+ "step": 1500
2155
+ },
2156
+ {
2157
+ "epoch": 6.0,
2158
+ "eval_cer": 0.04693244335514823,
2159
+ "eval_loss": 0.07296038419008255,
2160
+ "eval_runtime": 281.2143,
2161
+ "eval_samples_per_second": 1.778,
2162
+ "eval_steps_per_second": 0.445,
2163
+ "step": 1500
2164
+ },
2165
+ {
2166
+ "epoch": 6.02,
2167
+ "grad_norm": 0.4684133231639862,
2168
+ "learning_rate": 6.25e-05,
2169
+ "loss": 0.0311,
2170
+ "step": 1505
2171
+ },
2172
+ {
2173
+ "epoch": 6.04,
2174
+ "grad_norm": 0.43542611598968506,
2175
+ "learning_rate": 6.25e-05,
2176
+ "loss": 0.0276,
2177
+ "step": 1510
2178
+ },
2179
+ {
2180
+ "epoch": 6.06,
2181
+ "grad_norm": 0.5249391198158264,
2182
+ "learning_rate": 6.25e-05,
2183
+ "loss": 0.0314,
2184
+ "step": 1515
2185
+ },
2186
+ {
2187
+ "epoch": 6.08,
2188
+ "grad_norm": 0.7982779741287231,
2189
+ "learning_rate": 6.25e-05,
2190
+ "loss": 0.03,
2191
+ "step": 1520
2192
+ },
2193
+ {
2194
+ "epoch": 6.1,
2195
+ "grad_norm": 0.5430174469947815,
2196
+ "learning_rate": 6.25e-05,
2197
+ "loss": 0.0289,
2198
+ "step": 1525
2199
+ },
2200
+ {
2201
+ "epoch": 6.12,
2202
+ "grad_norm": 0.6563279628753662,
2203
+ "learning_rate": 6.25e-05,
2204
+ "loss": 0.0299,
2205
+ "step": 1530
2206
+ },
2207
+ {
2208
+ "epoch": 6.14,
2209
+ "grad_norm": 0.45975133776664734,
2210
+ "learning_rate": 6.25e-05,
2211
+ "loss": 0.026,
2212
+ "step": 1535
2213
+ },
2214
+ {
2215
+ "epoch": 6.16,
2216
+ "grad_norm": 0.6540797352790833,
2217
+ "learning_rate": 6.25e-05,
2218
+ "loss": 0.0313,
2219
+ "step": 1540
2220
+ },
2221
+ {
2222
+ "epoch": 6.18,
2223
+ "grad_norm": 0.6721683740615845,
2224
+ "learning_rate": 6.25e-05,
2225
+ "loss": 0.0335,
2226
+ "step": 1545
2227
+ },
2228
+ {
2229
+ "epoch": 6.2,
2230
+ "grad_norm": 0.5357054471969604,
2231
+ "learning_rate": 6.25e-05,
2232
+ "loss": 0.0357,
2233
+ "step": 1550
2234
+ },
2235
+ {
2236
+ "epoch": 6.22,
2237
+ "grad_norm": 1.2677907943725586,
2238
+ "learning_rate": 6.25e-05,
2239
+ "loss": 0.0362,
2240
+ "step": 1555
2241
+ },
2242
+ {
2243
+ "epoch": 6.24,
2244
+ "grad_norm": 0.8369943499565125,
2245
+ "learning_rate": 6.25e-05,
2246
+ "loss": 0.0331,
2247
+ "step": 1560
2248
+ },
2249
+ {
2250
+ "epoch": 6.26,
2251
+ "grad_norm": 0.8115782141685486,
2252
+ "learning_rate": 6.25e-05,
2253
+ "loss": 0.0381,
2254
+ "step": 1565
2255
+ },
2256
+ {
2257
+ "epoch": 6.28,
2258
+ "grad_norm": 0.598883330821991,
2259
+ "learning_rate": 6.25e-05,
2260
+ "loss": 0.0343,
2261
+ "step": 1570
2262
+ },
2263
+ {
2264
+ "epoch": 6.3,
2265
+ "grad_norm": 0.9512626528739929,
2266
+ "learning_rate": 6.25e-05,
2267
+ "loss": 0.0362,
2268
+ "step": 1575
2269
+ },
2270
+ {
2271
+ "epoch": 6.32,
2272
+ "grad_norm": 0.8760331273078918,
2273
+ "learning_rate": 6.25e-05,
2274
+ "loss": 0.0357,
2275
+ "step": 1580
2276
+ },
2277
+ {
2278
+ "epoch": 6.34,
2279
+ "grad_norm": 0.502618134021759,
2280
+ "learning_rate": 6.25e-05,
2281
+ "loss": 0.0273,
2282
+ "step": 1585
2283
+ },
2284
+ {
2285
+ "epoch": 6.36,
2286
+ "grad_norm": 0.5483182072639465,
2287
+ "learning_rate": 6.25e-05,
2288
+ "loss": 0.0353,
2289
+ "step": 1590
2290
+ },
2291
+ {
2292
+ "epoch": 6.38,
2293
+ "grad_norm": 0.7582818865776062,
2294
+ "learning_rate": 6.25e-05,
2295
+ "loss": 0.0325,
2296
+ "step": 1595
2297
+ },
2298
+ {
2299
+ "epoch": 6.4,
2300
+ "grad_norm": 0.7242081761360168,
2301
+ "learning_rate": 6.25e-05,
2302
+ "loss": 0.0294,
2303
+ "step": 1600
2304
+ },
2305
+ {
2306
+ "epoch": 6.42,
2307
+ "grad_norm": 0.6686793565750122,
2308
+ "learning_rate": 6.25e-05,
2309
+ "loss": 0.0376,
2310
+ "step": 1605
2311
+ },
2312
+ {
2313
+ "epoch": 6.44,
2314
+ "grad_norm": 0.6351500153541565,
2315
+ "learning_rate": 6.25e-05,
2316
+ "loss": 0.0322,
2317
+ "step": 1610
2318
+ },
2319
+ {
2320
+ "epoch": 6.46,
2321
+ "grad_norm": 0.7319616675376892,
2322
+ "learning_rate": 6.25e-05,
2323
+ "loss": 0.0306,
2324
+ "step": 1615
2325
+ },
2326
+ {
2327
+ "epoch": 6.48,
2328
+ "grad_norm": 0.6641121506690979,
2329
+ "learning_rate": 6.25e-05,
2330
+ "loss": 0.035,
2331
+ "step": 1620
2332
+ },
2333
+ {
2334
+ "epoch": 6.5,
2335
+ "grad_norm": 0.6666487455368042,
2336
+ "learning_rate": 6.25e-05,
2337
+ "loss": 0.0304,
2338
+ "step": 1625
2339
+ },
2340
+ {
2341
+ "epoch": 6.52,
2342
+ "grad_norm": 0.540726363658905,
2343
+ "learning_rate": 6.25e-05,
2344
+ "loss": 0.036,
2345
+ "step": 1630
2346
+ },
2347
+ {
2348
+ "epoch": 6.54,
2349
+ "grad_norm": 0.5046465992927551,
2350
+ "learning_rate": 6.25e-05,
2351
+ "loss": 0.0384,
2352
+ "step": 1635
2353
+ },
2354
+ {
2355
+ "epoch": 6.5600000000000005,
2356
+ "grad_norm": 0.5858854651451111,
2357
+ "learning_rate": 6.25e-05,
2358
+ "loss": 0.0349,
2359
+ "step": 1640
2360
+ },
2361
+ {
2362
+ "epoch": 6.58,
2363
+ "grad_norm": 0.6154960989952087,
2364
+ "learning_rate": 6.25e-05,
2365
+ "loss": 0.0381,
2366
+ "step": 1645
2367
+ },
2368
+ {
2369
+ "epoch": 6.6,
2370
+ "grad_norm": 0.9321079254150391,
2371
+ "learning_rate": 6.25e-05,
2372
+ "loss": 0.0298,
2373
+ "step": 1650
2374
+ },
2375
+ {
2376
+ "epoch": 6.62,
2377
+ "grad_norm": 0.4276799261569977,
2378
+ "learning_rate": 6.25e-05,
2379
+ "loss": 0.0295,
2380
+ "step": 1655
2381
+ },
2382
+ {
2383
+ "epoch": 6.64,
2384
+ "grad_norm": 0.545616090297699,
2385
+ "learning_rate": 6.25e-05,
2386
+ "loss": 0.0279,
2387
+ "step": 1660
2388
+ },
2389
+ {
2390
+ "epoch": 6.66,
2391
+ "grad_norm": 0.5112252235412598,
2392
+ "learning_rate": 6.25e-05,
2393
+ "loss": 0.0371,
2394
+ "step": 1665
2395
+ },
2396
+ {
2397
+ "epoch": 6.68,
2398
+ "grad_norm": 0.6641426086425781,
2399
+ "learning_rate": 6.25e-05,
2400
+ "loss": 0.0288,
2401
+ "step": 1670
2402
+ },
2403
+ {
2404
+ "epoch": 6.7,
2405
+ "grad_norm": 0.4481450021266937,
2406
+ "learning_rate": 6.25e-05,
2407
+ "loss": 0.0342,
2408
+ "step": 1675
2409
+ },
2410
+ {
2411
+ "epoch": 6.72,
2412
+ "grad_norm": 0.6158471703529358,
2413
+ "learning_rate": 6.25e-05,
2414
+ "loss": 0.0378,
2415
+ "step": 1680
2416
+ },
2417
+ {
2418
+ "epoch": 6.74,
2419
+ "grad_norm": 0.47170689702033997,
2420
+ "learning_rate": 6.25e-05,
2421
+ "loss": 0.0314,
2422
+ "step": 1685
2423
+ },
2424
+ {
2425
+ "epoch": 6.76,
2426
+ "grad_norm": 0.33950161933898926,
2427
+ "learning_rate": 6.25e-05,
2428
+ "loss": 0.0301,
2429
+ "step": 1690
2430
+ },
2431
+ {
2432
+ "epoch": 6.78,
2433
+ "grad_norm": 0.572180449962616,
2434
+ "learning_rate": 6.25e-05,
2435
+ "loss": 0.0336,
2436
+ "step": 1695
2437
+ },
2438
+ {
2439
+ "epoch": 6.8,
2440
+ "grad_norm": 0.7031643986701965,
2441
+ "learning_rate": 6.25e-05,
2442
+ "loss": 0.0329,
2443
+ "step": 1700
2444
+ },
2445
+ {
2446
+ "epoch": 6.82,
2447
+ "grad_norm": 0.9186747074127197,
2448
+ "learning_rate": 6.25e-05,
2449
+ "loss": 0.0332,
2450
+ "step": 1705
2451
+ },
2452
+ {
2453
+ "epoch": 6.84,
2454
+ "grad_norm": 0.6245182156562805,
2455
+ "learning_rate": 6.25e-05,
2456
+ "loss": 0.0279,
2457
+ "step": 1710
2458
+ },
2459
+ {
2460
+ "epoch": 6.86,
2461
+ "grad_norm": 0.6732586026191711,
2462
+ "learning_rate": 6.25e-05,
2463
+ "loss": 0.0373,
2464
+ "step": 1715
2465
+ },
2466
+ {
2467
+ "epoch": 6.88,
2468
+ "grad_norm": 0.4248816967010498,
2469
+ "learning_rate": 6.25e-05,
2470
+ "loss": 0.029,
2471
+ "step": 1720
2472
+ },
2473
+ {
2474
+ "epoch": 6.9,
2475
+ "grad_norm": 0.6019976735115051,
2476
+ "learning_rate": 6.25e-05,
2477
+ "loss": 0.0361,
2478
+ "step": 1725
2479
+ },
2480
+ {
2481
+ "epoch": 6.92,
2482
+ "grad_norm": 0.7918051481246948,
2483
+ "learning_rate": 6.25e-05,
2484
+ "loss": 0.0374,
2485
+ "step": 1730
2486
+ },
2487
+ {
2488
+ "epoch": 6.9399999999999995,
2489
+ "grad_norm": 0.6711762547492981,
2490
+ "learning_rate": 6.25e-05,
2491
+ "loss": 0.0364,
2492
+ "step": 1735
2493
+ },
2494
+ {
2495
+ "epoch": 6.96,
2496
+ "grad_norm": 0.41297319531440735,
2497
+ "learning_rate": 6.25e-05,
2498
+ "loss": 0.0309,
2499
+ "step": 1740
2500
+ },
2501
+ {
2502
+ "epoch": 6.98,
2503
+ "grad_norm": 0.5152994990348816,
2504
+ "learning_rate": 6.25e-05,
2505
+ "loss": 0.0405,
2506
+ "step": 1745
2507
+ },
2508
+ {
2509
+ "epoch": 7.0,
2510
+ "grad_norm": 0.6014075875282288,
2511
+ "learning_rate": 6.25e-05,
2512
+ "loss": 0.0331,
2513
+ "step": 1750
2514
+ },
2515
+ {
2516
+ "epoch": 7.0,
2517
+ "eval_cer": 0.06257659114019763,
2518
+ "eval_loss": 0.07013064622879028,
2519
+ "eval_runtime": 282.3729,
2520
+ "eval_samples_per_second": 1.771,
2521
+ "eval_steps_per_second": 0.443,
2522
+ "step": 1750
2523
+ }
2524
+ ],
2525
+ "logging_steps": 5,
2526
+ "max_steps": 5000,
2527
+ "num_input_tokens_seen": 0,
2528
+ "num_train_epochs": 20,
2529
+ "save_steps": 250,
2530
+ "stateful_callbacks": {
2531
+ "TrainerControl": {
2532
+ "args": {
2533
+ "should_epoch_stop": false,
2534
+ "should_evaluate": false,
2535
+ "should_log": false,
2536
+ "should_save": true,
2537
+ "should_training_stop": false
2538
+ },
2539
+ "attributes": {}
2540
+ }
2541
+ },
2542
+ "total_flos": 4.816141811712e+19,
2543
+ "train_batch_size": 16,
2544
+ "trial_name": null,
2545
+ "trial_params": null
2546
+ }
checkpoint-1750/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105d055d6d84eb987fbbb4fc9493aa207f4712b04ab60a83adb7510815397317
3
+ size 5432
checkpoint-1750/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd868d1cb19c6c0f97db2d977410c0e7a5423b9fd66356dfe0889762ea83d2a1
3
  size 26237160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497179bc28e24394b9b97e7109e90bad892bb849264101296c6b326682e270bb
3
  size 26237160
checkpoint-2000/adapter_model/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd868d1cb19c6c0f97db2d977410c0e7a5423b9fd66356dfe0889762ea83d2a1
3
  size 26237160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497179bc28e24394b9b97e7109e90bad892bb849264101296c6b326682e270bb
3
  size 26237160
checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdc0f349f7efdf2bc44fa1430a19fdeac2d7a1c30030e2a8641134a7c6dfb5c7
3
  size 52563258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb687e9431d7bb0a22ff90e9f4a6dccb35e5296df2f45cc9a058aa5ac22e05a
3
  size 52563258
checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec53fab41f30b0ea744426bc8021cbf90beb3d1d2df23f6cb49d7983395d1de3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371680cae369887d6595eceb7c3c2903d1cd663ffb570870fbccc2107e43c12f
3
  size 14244
checkpoint-2000/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.04981744137059402,
3
  "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
  "epoch": 8.0,
5
- "eval_steps": 500,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -10,2838 +10,2874 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
- "grad_norm": 11.033143997192383,
14
- "learning_rate": 8.333333333333334e-07,
15
- "loss": 3.7365,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.04,
20
- "grad_norm": 10.168129920959473,
21
- "learning_rate": 1.875e-06,
22
- "loss": 3.6756,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
- "grad_norm": 10.131425857543945,
28
- "learning_rate": 2.916666666666667e-06,
29
- "loss": 3.6681,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.08,
34
- "grad_norm": 9.962166786193848,
35
- "learning_rate": 3.958333333333334e-06,
36
- "loss": 3.6567,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.1,
41
- "grad_norm": 9.637451171875,
42
- "learning_rate": 4.791666666666667e-06,
43
- "loss": 3.5903,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.12,
48
- "grad_norm": 9.27942943572998,
49
- "learning_rate": 5.833333333333334e-06,
50
- "loss": 3.4592,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.14,
55
- "grad_norm": 9.690427780151367,
56
- "learning_rate": 6.875e-06,
57
- "loss": 3.299,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.16,
62
- "grad_norm": 8.123926162719727,
63
- "learning_rate": 7.916666666666668e-06,
64
- "loss": 3.2058,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.18,
69
- "grad_norm": 6.938026428222656,
70
- "learning_rate": 8.958333333333334e-06,
71
- "loss": 3.0613,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2,
76
- "grad_norm": 6.615925312042236,
77
- "learning_rate": 1e-05,
78
- "loss": 2.8859,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.22,
83
- "grad_norm": 5.712332725524902,
84
- "learning_rate": 1.1041666666666666e-05,
85
- "loss": 2.6746,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.24,
90
- "grad_norm": 4.229877471923828,
91
- "learning_rate": 1.2083333333333333e-05,
92
- "loss": 2.4948,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.26,
97
- "grad_norm": 3.9951322078704834,
98
- "learning_rate": 1.3125e-05,
99
- "loss": 2.3496,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.28,
104
- "grad_norm": 4.010512351989746,
105
- "learning_rate": 1.4166666666666666e-05,
106
- "loss": 2.2345,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.3,
111
- "grad_norm": 3.2869503498077393,
112
- "learning_rate": 1.5208333333333335e-05,
113
- "loss": 2.0418,
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.32,
118
- "grad_norm": 3.47694993019104,
119
- "learning_rate": 1.6250000000000002e-05,
120
- "loss": 1.8212,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.34,
125
- "grad_norm": 2.761810779571533,
126
- "learning_rate": 1.7291666666666666e-05,
127
- "loss": 1.7471,
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.36,
132
- "grad_norm": 2.83661150932312,
133
- "learning_rate": 1.8333333333333333e-05,
134
- "loss": 1.6647,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.38,
139
- "grad_norm": 2.7371621131896973,
140
- "learning_rate": 1.9375e-05,
141
- "loss": 1.5239,
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.4,
146
- "grad_norm": 2.5980722904205322,
147
- "learning_rate": 2.0416666666666667e-05,
148
- "loss": 1.3501,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.42,
153
- "grad_norm": 2.8566689491271973,
154
- "learning_rate": 2.1458333333333334e-05,
155
- "loss": 1.3153,
156
  "step": 105
157
  },
158
  {
159
  "epoch": 0.44,
160
- "grad_norm": 2.052793264389038,
161
- "learning_rate": 2.2499999999999998e-05,
162
- "loss": 1.175,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.46,
167
- "grad_norm": 2.08168363571167,
168
- "learning_rate": 2.3541666666666665e-05,
169
- "loss": 1.0395,
170
  "step": 115
171
  },
172
  {
173
  "epoch": 0.48,
174
- "grad_norm": 1.830390453338623,
175
- "learning_rate": 2.4583333333333332e-05,
176
- "loss": 0.9517,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.5,
181
- "grad_norm": 2.9608633518218994,
182
- "learning_rate": 2.5625e-05,
183
- "loss": 0.8602,
184
  "step": 125
185
  },
186
  {
187
  "epoch": 0.52,
188
- "grad_norm": 2.2221925258636475,
189
- "learning_rate": 2.666666666666667e-05,
190
- "loss": 0.7687,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.54,
195
- "grad_norm": 2.185246706008911,
196
- "learning_rate": 2.7708333333333334e-05,
197
- "loss": 0.6957,
198
  "step": 135
199
  },
200
  {
201
  "epoch": 0.56,
202
- "grad_norm": 2.3892769813537598,
203
- "learning_rate": 2.875e-05,
204
- "loss": 0.6452,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.58,
209
- "grad_norm": 2.2923471927642822,
210
- "learning_rate": 2.9791666666666668e-05,
211
- "loss": 0.6001,
212
  "step": 145
213
  },
214
  {
215
  "epoch": 0.6,
216
- "grad_norm": 3.059990882873535,
217
- "learning_rate": 3.0833333333333335e-05,
218
- "loss": 0.5505,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.62,
223
- "grad_norm": 2.1722524166107178,
224
- "learning_rate": 3.1875e-05,
225
- "loss": 0.513,
226
  "step": 155
227
  },
228
  {
229
  "epoch": 0.64,
230
- "grad_norm": 2.4212610721588135,
231
- "learning_rate": 3.291666666666666e-05,
232
- "loss": 0.4868,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.66,
237
- "grad_norm": 2.297727584838867,
238
- "learning_rate": 3.3958333333333337e-05,
239
- "loss": 0.4696,
240
  "step": 165
241
  },
242
  {
243
  "epoch": 0.68,
244
- "grad_norm": 1.7377690076828003,
245
- "learning_rate": 3.5000000000000004e-05,
246
- "loss": 0.4174,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.7,
251
- "grad_norm": 1.821341872215271,
252
- "learning_rate": 3.6041666666666664e-05,
253
- "loss": 0.4204,
254
  "step": 175
255
  },
256
  {
257
  "epoch": 0.72,
258
- "grad_norm": 2.0993902683258057,
259
- "learning_rate": 3.708333333333334e-05,
260
- "loss": 0.3846,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.74,
265
- "grad_norm": 2.5224227905273438,
266
- "learning_rate": 3.8125e-05,
267
- "loss": 0.3499,
268
  "step": 185
269
  },
270
  {
271
  "epoch": 0.76,
272
- "grad_norm": 1.8540211915969849,
273
- "learning_rate": 3.916666666666667e-05,
274
- "loss": 0.3414,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.78,
279
- "grad_norm": 1.9813562631607056,
280
- "learning_rate": 4.020833333333333e-05,
281
- "loss": 0.3274,
282
  "step": 195
283
  },
284
  {
285
  "epoch": 0.8,
286
- "grad_norm": 1.385871171951294,
287
- "learning_rate": 4.125e-05,
288
- "loss": 0.2907,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.82,
293
- "grad_norm": 2.0511081218719482,
294
- "learning_rate": 4.229166666666667e-05,
295
- "loss": 0.2735,
296
  "step": 205
297
  },
298
  {
299
  "epoch": 0.84,
300
- "grad_norm": 2.0850329399108887,
301
- "learning_rate": 4.3333333333333334e-05,
302
- "loss": 0.2384,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.86,
307
- "grad_norm": 2.191450595855713,
308
- "learning_rate": 4.4375e-05,
309
- "loss": 0.2244,
310
  "step": 215
311
  },
312
  {
313
  "epoch": 0.88,
314
- "grad_norm": 3.4809000492095947,
315
- "learning_rate": 4.541666666666667e-05,
316
- "loss": 0.223,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.9,
321
- "grad_norm": 1.4290976524353027,
322
- "learning_rate": 4.645833333333333e-05,
323
- "loss": 0.194,
324
  "step": 225
325
  },
326
  {
327
  "epoch": 0.92,
328
- "grad_norm": 1.8528721332550049,
329
- "learning_rate": 4.75e-05,
330
- "loss": 0.1817,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.94,
335
- "grad_norm": 1.4630467891693115,
336
- "learning_rate": 4.854166666666666e-05,
337
- "loss": 0.1728,
338
  "step": 235
339
  },
340
  {
341
  "epoch": 0.96,
342
- "grad_norm": 1.6305458545684814,
343
- "learning_rate": 4.958333333333334e-05,
344
- "loss": 0.1859,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.98,
349
- "grad_norm": 1.455244779586792,
350
- "learning_rate": 5.0625000000000004e-05,
351
- "loss": 0.1531,
352
  "step": 245
353
  },
354
  {
355
  "epoch": 1.0,
356
- "grad_norm": 1.8049136400222778,
357
- "learning_rate": 5.1666666666666664e-05,
358
- "loss": 0.1412,
 
 
 
 
 
 
 
 
 
359
  "step": 250
360
  },
361
  {
362
  "epoch": 1.02,
363
- "grad_norm": 1.577805519104004,
364
- "learning_rate": 5.270833333333334e-05,
365
- "loss": 0.1384,
366
  "step": 255
367
  },
368
  {
369
  "epoch": 1.04,
370
- "grad_norm": 2.020803213119507,
371
- "learning_rate": 5.375e-05,
372
- "loss": 0.1398,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 1.06,
377
- "grad_norm": 1.4404263496398926,
378
- "learning_rate": 5.479166666666667e-05,
379
- "loss": 0.1484,
380
  "step": 265
381
  },
382
  {
383
  "epoch": 1.08,
384
- "grad_norm": 2.497192621231079,
385
- "learning_rate": 5.583333333333333e-05,
386
- "loss": 0.1353,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 1.1,
391
- "grad_norm": 3.2407219409942627,
392
- "learning_rate": 5.6875e-05,
393
- "loss": 0.1433,
394
  "step": 275
395
  },
396
  {
397
  "epoch": 1.12,
398
- "grad_norm": 1.4051156044006348,
399
- "learning_rate": 5.791666666666667e-05,
400
- "loss": 0.1181,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 1.1400000000000001,
405
- "grad_norm": 1.4117530584335327,
406
- "learning_rate": 5.8958333333333334e-05,
407
- "loss": 0.1252,
408
  "step": 285
409
  },
410
  {
411
  "epoch": 1.16,
412
- "grad_norm": 1.6360172033309937,
413
- "learning_rate": 6e-05,
414
- "loss": 0.1233,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 1.18,
419
- "grad_norm": 1.225799560546875,
420
- "learning_rate": 6.104166666666667e-05,
421
- "loss": 0.1084,
422
  "step": 295
423
  },
424
  {
425
  "epoch": 1.2,
426
- "grad_norm": 1.4874345064163208,
427
- "learning_rate": 6.208333333333333e-05,
428
- "loss": 0.125,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 1.22,
433
- "grad_norm": 1.3238331079483032,
434
  "learning_rate": 6.25e-05,
435
- "loss": 0.1132,
436
  "step": 305
437
  },
438
  {
439
  "epoch": 1.24,
440
- "grad_norm": 2.354384183883667,
441
  "learning_rate": 6.25e-05,
442
- "loss": 0.0993,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 1.26,
447
- "grad_norm": 2.2216718196868896,
448
  "learning_rate": 6.25e-05,
449
- "loss": 0.1325,
450
  "step": 315
451
  },
452
  {
453
  "epoch": 1.28,
454
- "grad_norm": 1.026408076286316,
455
  "learning_rate": 6.25e-05,
456
- "loss": 0.1035,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 1.3,
461
- "grad_norm": 2.0583767890930176,
462
  "learning_rate": 6.25e-05,
463
- "loss": 0.1208,
464
  "step": 325
465
  },
466
  {
467
  "epoch": 1.32,
468
- "grad_norm": 1.9303004741668701,
469
  "learning_rate": 6.25e-05,
470
- "loss": 0.1119,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 1.34,
475
- "grad_norm": 1.7043157815933228,
476
  "learning_rate": 6.25e-05,
477
- "loss": 0.1023,
478
  "step": 335
479
  },
480
  {
481
  "epoch": 1.3599999999999999,
482
- "grad_norm": 1.3245861530303955,
483
  "learning_rate": 6.25e-05,
484
- "loss": 0.1008,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 1.38,
489
- "grad_norm": 1.541318655014038,
490
  "learning_rate": 6.25e-05,
491
- "loss": 0.0977,
492
  "step": 345
493
  },
494
  {
495
  "epoch": 1.4,
496
- "grad_norm": 1.8400285243988037,
497
  "learning_rate": 6.25e-05,
498
- "loss": 0.0887,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 1.42,
503
- "grad_norm": 1.0839234590530396,
504
  "learning_rate": 6.25e-05,
505
- "loss": 0.0912,
506
  "step": 355
507
  },
508
  {
509
  "epoch": 1.44,
510
- "grad_norm": 1.269062876701355,
511
  "learning_rate": 6.25e-05,
512
- "loss": 0.0959,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 1.46,
517
- "grad_norm": 1.0546581745147705,
518
  "learning_rate": 6.25e-05,
519
- "loss": 0.0875,
520
  "step": 365
521
  },
522
  {
523
  "epoch": 1.48,
524
- "grad_norm": 1.683465838432312,
525
  "learning_rate": 6.25e-05,
526
- "loss": 0.0933,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 1.5,
531
- "grad_norm": 1.470189094543457,
532
  "learning_rate": 6.25e-05,
533
- "loss": 0.0986,
534
  "step": 375
535
  },
536
  {
537
  "epoch": 1.52,
538
- "grad_norm": 1.183585524559021,
539
  "learning_rate": 6.25e-05,
540
- "loss": 0.0979,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 1.54,
545
- "grad_norm": 1.3886022567749023,
546
  "learning_rate": 6.25e-05,
547
- "loss": 0.1031,
548
  "step": 385
549
  },
550
  {
551
  "epoch": 1.56,
552
- "grad_norm": 1.105749487876892,
553
  "learning_rate": 6.25e-05,
554
- "loss": 0.0995,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 1.58,
559
- "grad_norm": 1.0494953393936157,
560
  "learning_rate": 6.25e-05,
561
- "loss": 0.0755,
562
  "step": 395
563
  },
564
  {
565
  "epoch": 1.6,
566
- "grad_norm": 1.7028089761734009,
567
  "learning_rate": 6.25e-05,
568
- "loss": 0.0981,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 1.62,
573
- "grad_norm": 1.5404858589172363,
574
  "learning_rate": 6.25e-05,
575
- "loss": 0.0917,
576
  "step": 405
577
  },
578
  {
579
  "epoch": 1.6400000000000001,
580
- "grad_norm": 1.6659576892852783,
581
  "learning_rate": 6.25e-05,
582
- "loss": 0.0891,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 1.6600000000000001,
587
- "grad_norm": 1.353579044342041,
588
  "learning_rate": 6.25e-05,
589
- "loss": 0.0889,
590
  "step": 415
591
  },
592
  {
593
  "epoch": 1.6800000000000002,
594
- "grad_norm": 2.1539247035980225,
595
  "learning_rate": 6.25e-05,
596
- "loss": 0.1092,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 1.7,
601
- "grad_norm": 1.4106309413909912,
602
  "learning_rate": 6.25e-05,
603
- "loss": 0.0951,
604
  "step": 425
605
  },
606
  {
607
  "epoch": 1.72,
608
- "grad_norm": 1.1167716979980469,
609
  "learning_rate": 6.25e-05,
610
- "loss": 0.0899,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 1.74,
615
- "grad_norm": 1.2029541730880737,
616
  "learning_rate": 6.25e-05,
617
- "loss": 0.0902,
618
  "step": 435
619
  },
620
  {
621
  "epoch": 1.76,
622
- "grad_norm": 1.0979869365692139,
623
  "learning_rate": 6.25e-05,
624
- "loss": 0.089,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 1.78,
629
- "grad_norm": 1.1568419933319092,
630
  "learning_rate": 6.25e-05,
631
- "loss": 0.0859,
632
  "step": 445
633
  },
634
  {
635
  "epoch": 1.8,
636
- "grad_norm": 1.2472410202026367,
637
  "learning_rate": 6.25e-05,
638
- "loss": 0.0877,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 1.8199999999999998,
643
- "grad_norm": 1.2323497533798218,
644
  "learning_rate": 6.25e-05,
645
- "loss": 0.0865,
646
  "step": 455
647
  },
648
  {
649
  "epoch": 1.8399999999999999,
650
- "grad_norm": 1.2814995050430298,
651
  "learning_rate": 6.25e-05,
652
- "loss": 0.0831,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 1.8599999999999999,
657
- "grad_norm": 0.912714421749115,
658
  "learning_rate": 6.25e-05,
659
- "loss": 0.0813,
660
  "step": 465
661
  },
662
  {
663
  "epoch": 1.88,
664
- "grad_norm": 1.2273714542388916,
665
  "learning_rate": 6.25e-05,
666
- "loss": 0.0884,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.9,
671
- "grad_norm": 1.5928541421890259,
672
  "learning_rate": 6.25e-05,
673
- "loss": 0.0852,
674
  "step": 475
675
  },
676
  {
677
  "epoch": 1.92,
678
- "grad_norm": 0.9074931740760803,
679
  "learning_rate": 6.25e-05,
680
- "loss": 0.0792,
681
  "step": 480
682
  },
683
  {
684
  "epoch": 1.94,
685
- "grad_norm": 0.9795681834220886,
686
  "learning_rate": 6.25e-05,
687
- "loss": 0.0781,
688
  "step": 485
689
  },
690
  {
691
  "epoch": 1.96,
692
- "grad_norm": 1.4303114414215088,
693
  "learning_rate": 6.25e-05,
694
- "loss": 0.0757,
695
  "step": 490
696
  },
697
  {
698
  "epoch": 1.98,
699
- "grad_norm": 1.0313260555267334,
700
  "learning_rate": 6.25e-05,
701
- "loss": 0.0881,
702
  "step": 495
703
  },
704
  {
705
  "epoch": 2.0,
706
- "grad_norm": 1.1294418573379517,
707
  "learning_rate": 6.25e-05,
708
- "loss": 0.0785,
709
  "step": 500
710
  },
711
  {
712
  "epoch": 2.0,
713
- "eval_cer": 0.05613677854233956,
714
- "eval_loss": 0.08199143409729004,
715
- "eval_runtime": 495.5562,
716
- "eval_samples_per_second": 2.018,
717
- "eval_steps_per_second": 0.504,
718
  "step": 500
719
  },
720
  {
721
  "epoch": 2.02,
722
- "grad_norm": 1.0397248268127441,
723
  "learning_rate": 6.25e-05,
724
- "loss": 0.0591,
725
  "step": 505
726
  },
727
  {
728
  "epoch": 2.04,
729
- "grad_norm": 0.8539375066757202,
730
  "learning_rate": 6.25e-05,
731
- "loss": 0.0664,
732
  "step": 510
733
  },
734
  {
735
  "epoch": 2.06,
736
- "grad_norm": 1.3555073738098145,
737
  "learning_rate": 6.25e-05,
738
- "loss": 0.0772,
739
  "step": 515
740
  },
741
  {
742
  "epoch": 2.08,
743
- "grad_norm": 1.6025832891464233,
744
  "learning_rate": 6.25e-05,
745
- "loss": 0.0737,
746
  "step": 520
747
  },
748
  {
749
  "epoch": 2.1,
750
- "grad_norm": 1.3090434074401855,
751
  "learning_rate": 6.25e-05,
752
- "loss": 0.0619,
753
  "step": 525
754
  },
755
  {
756
  "epoch": 2.12,
757
- "grad_norm": 0.9269134998321533,
758
  "learning_rate": 6.25e-05,
759
- "loss": 0.0679,
760
  "step": 530
761
  },
762
  {
763
  "epoch": 2.14,
764
- "grad_norm": 0.8540180325508118,
765
  "learning_rate": 6.25e-05,
766
- "loss": 0.0718,
767
  "step": 535
768
  },
769
  {
770
  "epoch": 2.16,
771
- "grad_norm": 0.9320145845413208,
772
  "learning_rate": 6.25e-05,
773
- "loss": 0.0769,
774
  "step": 540
775
  },
776
  {
777
  "epoch": 2.18,
778
- "grad_norm": 1.6879560947418213,
779
  "learning_rate": 6.25e-05,
780
- "loss": 0.0765,
781
  "step": 545
782
  },
783
  {
784
  "epoch": 2.2,
785
- "grad_norm": 1.3687632083892822,
786
  "learning_rate": 6.25e-05,
787
- "loss": 0.0769,
788
  "step": 550
789
  },
790
  {
791
  "epoch": 2.22,
792
- "grad_norm": 1.2760627269744873,
793
  "learning_rate": 6.25e-05,
794
- "loss": 0.073,
795
  "step": 555
796
  },
797
  {
798
  "epoch": 2.24,
799
- "grad_norm": 0.8481590747833252,
800
  "learning_rate": 6.25e-05,
801
- "loss": 0.0669,
802
  "step": 560
803
  },
804
  {
805
  "epoch": 2.26,
806
- "grad_norm": 1.3147085905075073,
807
  "learning_rate": 6.25e-05,
808
- "loss": 0.0626,
809
  "step": 565
810
  },
811
  {
812
  "epoch": 2.2800000000000002,
813
- "grad_norm": 1.1458756923675537,
814
  "learning_rate": 6.25e-05,
815
- "loss": 0.0665,
816
  "step": 570
817
  },
818
  {
819
  "epoch": 2.3,
820
- "grad_norm": 1.0779470205307007,
821
  "learning_rate": 6.25e-05,
822
- "loss": 0.0783,
823
  "step": 575
824
  },
825
  {
826
  "epoch": 2.32,
827
- "grad_norm": 1.0333281755447388,
828
  "learning_rate": 6.25e-05,
829
- "loss": 0.0605,
830
  "step": 580
831
  },
832
  {
833
  "epoch": 2.34,
834
- "grad_norm": 1.1638994216918945,
835
  "learning_rate": 6.25e-05,
836
- "loss": 0.0644,
837
  "step": 585
838
  },
839
  {
840
  "epoch": 2.36,
841
- "grad_norm": 0.9482213258743286,
842
  "learning_rate": 6.25e-05,
843
- "loss": 0.0689,
844
  "step": 590
845
  },
846
  {
847
  "epoch": 2.38,
848
- "grad_norm": 0.9697607755661011,
849
  "learning_rate": 6.25e-05,
850
- "loss": 0.0628,
851
  "step": 595
852
  },
853
  {
854
  "epoch": 2.4,
855
- "grad_norm": 1.0675064325332642,
856
  "learning_rate": 6.25e-05,
857
- "loss": 0.0644,
858
  "step": 600
859
  },
860
  {
861
  "epoch": 2.42,
862
- "grad_norm": 1.0749262571334839,
863
  "learning_rate": 6.25e-05,
864
- "loss": 0.0572,
865
  "step": 605
866
  },
867
  {
868
  "epoch": 2.44,
869
- "grad_norm": 1.2545385360717773,
870
  "learning_rate": 6.25e-05,
871
- "loss": 0.0601,
872
  "step": 610
873
  },
874
  {
875
  "epoch": 2.46,
876
- "grad_norm": 1.1135083436965942,
877
  "learning_rate": 6.25e-05,
878
- "loss": 0.0607,
879
  "step": 615
880
  },
881
  {
882
  "epoch": 2.48,
883
- "grad_norm": 0.821413516998291,
884
  "learning_rate": 6.25e-05,
885
- "loss": 0.0622,
886
  "step": 620
887
  },
888
  {
889
  "epoch": 2.5,
890
- "grad_norm": 0.8959715366363525,
891
  "learning_rate": 6.25e-05,
892
- "loss": 0.073,
893
  "step": 625
894
  },
895
  {
896
  "epoch": 2.52,
897
- "grad_norm": 0.6712917685508728,
898
  "learning_rate": 6.25e-05,
899
- "loss": 0.0649,
900
  "step": 630
901
  },
902
  {
903
  "epoch": 2.54,
904
- "grad_norm": 0.6646750569343567,
905
  "learning_rate": 6.25e-05,
906
- "loss": 0.07,
907
  "step": 635
908
  },
909
  {
910
  "epoch": 2.56,
911
- "grad_norm": 1.0652884244918823,
912
  "learning_rate": 6.25e-05,
913
- "loss": 0.0657,
914
  "step": 640
915
  },
916
  {
917
  "epoch": 2.58,
918
- "grad_norm": 1.035218596458435,
919
  "learning_rate": 6.25e-05,
920
- "loss": 0.0789,
921
  "step": 645
922
  },
923
  {
924
  "epoch": 2.6,
925
- "grad_norm": 0.860249936580658,
926
  "learning_rate": 6.25e-05,
927
- "loss": 0.0589,
928
  "step": 650
929
  },
930
  {
931
  "epoch": 2.62,
932
- "grad_norm": 0.7494838237762451,
933
  "learning_rate": 6.25e-05,
934
- "loss": 0.077,
935
  "step": 655
936
  },
937
  {
938
  "epoch": 2.64,
939
- "grad_norm": 1.524198055267334,
940
  "learning_rate": 6.25e-05,
941
- "loss": 0.0727,
942
  "step": 660
943
  },
944
  {
945
  "epoch": 2.66,
946
- "grad_norm": 0.9438517689704895,
947
  "learning_rate": 6.25e-05,
948
- "loss": 0.0641,
949
  "step": 665
950
  },
951
  {
952
  "epoch": 2.68,
953
- "grad_norm": 1.0982081890106201,
954
  "learning_rate": 6.25e-05,
955
- "loss": 0.0647,
956
  "step": 670
957
  },
958
  {
959
  "epoch": 2.7,
960
- "grad_norm": 0.7919325232505798,
961
  "learning_rate": 6.25e-05,
962
- "loss": 0.0689,
963
  "step": 675
964
  },
965
  {
966
  "epoch": 2.7199999999999998,
967
- "grad_norm": 0.9766571521759033,
968
  "learning_rate": 6.25e-05,
969
- "loss": 0.0671,
970
  "step": 680
971
  },
972
  {
973
  "epoch": 2.74,
974
- "grad_norm": 1.0895709991455078,
975
  "learning_rate": 6.25e-05,
976
- "loss": 0.0681,
977
  "step": 685
978
  },
979
  {
980
  "epoch": 2.76,
981
- "grad_norm": 1.1461646556854248,
982
  "learning_rate": 6.25e-05,
983
- "loss": 0.0729,
984
  "step": 690
985
  },
986
  {
987
  "epoch": 2.7800000000000002,
988
- "grad_norm": 0.7813361883163452,
989
  "learning_rate": 6.25e-05,
990
- "loss": 0.0651,
991
  "step": 695
992
  },
993
  {
994
  "epoch": 2.8,
995
- "grad_norm": 0.8545769453048706,
996
  "learning_rate": 6.25e-05,
997
- "loss": 0.064,
998
  "step": 700
999
  },
1000
  {
1001
  "epoch": 2.82,
1002
- "grad_norm": 0.8444038033485413,
1003
  "learning_rate": 6.25e-05,
1004
- "loss": 0.0626,
1005
  "step": 705
1006
  },
1007
  {
1008
  "epoch": 2.84,
1009
- "grad_norm": 1.022660732269287,
1010
  "learning_rate": 6.25e-05,
1011
- "loss": 0.0644,
1012
  "step": 710
1013
  },
1014
  {
1015
  "epoch": 2.86,
1016
- "grad_norm": 0.7192943096160889,
1017
  "learning_rate": 6.25e-05,
1018
- "loss": 0.0605,
1019
  "step": 715
1020
  },
1021
  {
1022
  "epoch": 2.88,
1023
- "grad_norm": 0.8225955367088318,
1024
  "learning_rate": 6.25e-05,
1025
- "loss": 0.0657,
1026
  "step": 720
1027
  },
1028
  {
1029
  "epoch": 2.9,
1030
- "grad_norm": 0.8145541548728943,
1031
  "learning_rate": 6.25e-05,
1032
- "loss": 0.058,
1033
  "step": 725
1034
  },
1035
  {
1036
  "epoch": 2.92,
1037
- "grad_norm": 0.8709245920181274,
1038
  "learning_rate": 6.25e-05,
1039
- "loss": 0.0568,
1040
  "step": 730
1041
  },
1042
  {
1043
  "epoch": 2.94,
1044
- "grad_norm": 1.0601686239242554,
1045
  "learning_rate": 6.25e-05,
1046
- "loss": 0.0608,
1047
  "step": 735
1048
  },
1049
  {
1050
  "epoch": 2.96,
1051
- "grad_norm": 1.0230211019515991,
1052
  "learning_rate": 6.25e-05,
1053
- "loss": 0.0666,
1054
  "step": 740
1055
  },
1056
  {
1057
  "epoch": 2.98,
1058
- "grad_norm": 0.9883492588996887,
1059
  "learning_rate": 6.25e-05,
1060
- "loss": 0.062,
1061
  "step": 745
1062
  },
1063
  {
1064
  "epoch": 3.0,
1065
- "grad_norm": 1.0670173168182373,
1066
  "learning_rate": 6.25e-05,
1067
- "loss": 0.0724,
 
 
 
 
 
 
 
 
 
1068
  "step": 750
1069
  },
1070
  {
1071
  "epoch": 3.02,
1072
- "grad_norm": 1.0427318811416626,
1073
  "learning_rate": 6.25e-05,
1074
- "loss": 0.0477,
1075
  "step": 755
1076
  },
1077
  {
1078
  "epoch": 3.04,
1079
- "grad_norm": 1.355022668838501,
1080
  "learning_rate": 6.25e-05,
1081
- "loss": 0.0455,
1082
  "step": 760
1083
  },
1084
  {
1085
  "epoch": 3.06,
1086
- "grad_norm": 1.001657247543335,
1087
  "learning_rate": 6.25e-05,
1088
- "loss": 0.0605,
1089
  "step": 765
1090
  },
1091
  {
1092
  "epoch": 3.08,
1093
- "grad_norm": 1.4077788591384888,
1094
  "learning_rate": 6.25e-05,
1095
- "loss": 0.0463,
1096
  "step": 770
1097
  },
1098
  {
1099
  "epoch": 3.1,
1100
- "grad_norm": 1.3163388967514038,
1101
  "learning_rate": 6.25e-05,
1102
- "loss": 0.0581,
1103
  "step": 775
1104
  },
1105
  {
1106
  "epoch": 3.12,
1107
- "grad_norm": 0.6931395530700684,
1108
  "learning_rate": 6.25e-05,
1109
- "loss": 0.0531,
1110
  "step": 780
1111
  },
1112
  {
1113
  "epoch": 3.14,
1114
- "grad_norm": 0.648444652557373,
1115
  "learning_rate": 6.25e-05,
1116
- "loss": 0.0517,
1117
  "step": 785
1118
  },
1119
  {
1120
  "epoch": 3.16,
1121
- "grad_norm": 0.8961315751075745,
1122
  "learning_rate": 6.25e-05,
1123
- "loss": 0.0551,
1124
  "step": 790
1125
  },
1126
  {
1127
  "epoch": 3.18,
1128
- "grad_norm": 0.8918541669845581,
1129
  "learning_rate": 6.25e-05,
1130
- "loss": 0.0573,
1131
  "step": 795
1132
  },
1133
  {
1134
  "epoch": 3.2,
1135
- "grad_norm": 0.638659656047821,
1136
  "learning_rate": 6.25e-05,
1137
- "loss": 0.0544,
1138
  "step": 800
1139
  },
1140
  {
1141
  "epoch": 3.22,
1142
- "grad_norm": 0.6866273880004883,
1143
  "learning_rate": 6.25e-05,
1144
- "loss": 0.052,
1145
  "step": 805
1146
  },
1147
  {
1148
  "epoch": 3.24,
1149
- "grad_norm": 1.7238422632217407,
1150
  "learning_rate": 6.25e-05,
1151
- "loss": 0.0555,
1152
  "step": 810
1153
  },
1154
  {
1155
  "epoch": 3.26,
1156
- "grad_norm": 0.958077073097229,
1157
  "learning_rate": 6.25e-05,
1158
- "loss": 0.0606,
1159
  "step": 815
1160
  },
1161
  {
1162
  "epoch": 3.2800000000000002,
1163
- "grad_norm": 0.8000004887580872,
1164
  "learning_rate": 6.25e-05,
1165
- "loss": 0.05,
1166
  "step": 820
1167
  },
1168
  {
1169
  "epoch": 3.3,
1170
- "grad_norm": 0.7521831393241882,
1171
  "learning_rate": 6.25e-05,
1172
- "loss": 0.0486,
1173
  "step": 825
1174
  },
1175
  {
1176
  "epoch": 3.32,
1177
- "grad_norm": 0.8134447336196899,
1178
  "learning_rate": 6.25e-05,
1179
- "loss": 0.0557,
1180
  "step": 830
1181
  },
1182
  {
1183
  "epoch": 3.34,
1184
- "grad_norm": 1.1599044799804688,
1185
  "learning_rate": 6.25e-05,
1186
- "loss": 0.0596,
1187
  "step": 835
1188
  },
1189
  {
1190
  "epoch": 3.36,
1191
- "grad_norm": 0.5582810044288635,
1192
  "learning_rate": 6.25e-05,
1193
- "loss": 0.05,
1194
  "step": 840
1195
  },
1196
  {
1197
  "epoch": 3.38,
1198
- "grad_norm": 0.6436423659324646,
1199
  "learning_rate": 6.25e-05,
1200
- "loss": 0.048,
1201
  "step": 845
1202
  },
1203
  {
1204
  "epoch": 3.4,
1205
- "grad_norm": 1.0337690114974976,
1206
  "learning_rate": 6.25e-05,
1207
- "loss": 0.0512,
1208
  "step": 850
1209
  },
1210
  {
1211
  "epoch": 3.42,
1212
- "grad_norm": 1.2385281324386597,
1213
  "learning_rate": 6.25e-05,
1214
- "loss": 0.0541,
1215
  "step": 855
1216
  },
1217
  {
1218
  "epoch": 3.44,
1219
- "grad_norm": 1.335816740989685,
1220
  "learning_rate": 6.25e-05,
1221
- "loss": 0.054,
1222
  "step": 860
1223
  },
1224
  {
1225
  "epoch": 3.46,
1226
- "grad_norm": 0.8935145139694214,
1227
  "learning_rate": 6.25e-05,
1228
- "loss": 0.0529,
1229
  "step": 865
1230
  },
1231
  {
1232
  "epoch": 3.48,
1233
- "grad_norm": 0.897282600402832,
1234
  "learning_rate": 6.25e-05,
1235
- "loss": 0.0569,
1236
  "step": 870
1237
  },
1238
  {
1239
  "epoch": 3.5,
1240
- "grad_norm": 0.5967718362808228,
1241
  "learning_rate": 6.25e-05,
1242
- "loss": 0.0554,
1243
  "step": 875
1244
  },
1245
  {
1246
  "epoch": 3.52,
1247
- "grad_norm": 0.6769823431968689,
1248
  "learning_rate": 6.25e-05,
1249
- "loss": 0.0541,
1250
  "step": 880
1251
  },
1252
  {
1253
  "epoch": 3.54,
1254
- "grad_norm": 0.6052355170249939,
1255
  "learning_rate": 6.25e-05,
1256
- "loss": 0.0453,
1257
  "step": 885
1258
  },
1259
  {
1260
  "epoch": 3.56,
1261
- "grad_norm": 0.6003367900848389,
1262
  "learning_rate": 6.25e-05,
1263
- "loss": 0.0488,
1264
  "step": 890
1265
  },
1266
  {
1267
  "epoch": 3.58,
1268
- "grad_norm": 1.1685441732406616,
1269
  "learning_rate": 6.25e-05,
1270
- "loss": 0.0518,
1271
  "step": 895
1272
  },
1273
  {
1274
  "epoch": 3.6,
1275
- "grad_norm": 1.1867949962615967,
1276
  "learning_rate": 6.25e-05,
1277
- "loss": 0.0581,
1278
  "step": 900
1279
  },
1280
  {
1281
  "epoch": 3.62,
1282
- "grad_norm": 0.8192417025566101,
1283
  "learning_rate": 6.25e-05,
1284
- "loss": 0.0564,
1285
  "step": 905
1286
  },
1287
  {
1288
  "epoch": 3.64,
1289
- "grad_norm": 0.9395178556442261,
1290
  "learning_rate": 6.25e-05,
1291
- "loss": 0.057,
1292
  "step": 910
1293
  },
1294
  {
1295
  "epoch": 3.66,
1296
- "grad_norm": 0.8012380003929138,
1297
  "learning_rate": 6.25e-05,
1298
- "loss": 0.0492,
1299
  "step": 915
1300
  },
1301
  {
1302
  "epoch": 3.68,
1303
- "grad_norm": 0.6032869815826416,
1304
  "learning_rate": 6.25e-05,
1305
- "loss": 0.0551,
1306
  "step": 920
1307
  },
1308
  {
1309
  "epoch": 3.7,
1310
- "grad_norm": 0.6057426333427429,
1311
  "learning_rate": 6.25e-05,
1312
- "loss": 0.0648,
1313
  "step": 925
1314
  },
1315
  {
1316
  "epoch": 3.7199999999999998,
1317
- "grad_norm": 0.752521276473999,
1318
  "learning_rate": 6.25e-05,
1319
- "loss": 0.0519,
1320
  "step": 930
1321
  },
1322
  {
1323
  "epoch": 3.74,
1324
- "grad_norm": 0.9516021609306335,
1325
  "learning_rate": 6.25e-05,
1326
- "loss": 0.0594,
1327
  "step": 935
1328
  },
1329
  {
1330
  "epoch": 3.76,
1331
- "grad_norm": 1.0422921180725098,
1332
  "learning_rate": 6.25e-05,
1333
- "loss": 0.0547,
1334
  "step": 940
1335
  },
1336
  {
1337
  "epoch": 3.7800000000000002,
1338
- "grad_norm": 0.7628741264343262,
1339
  "learning_rate": 6.25e-05,
1340
- "loss": 0.0575,
1341
  "step": 945
1342
  },
1343
  {
1344
  "epoch": 3.8,
1345
- "grad_norm": 0.6722723841667175,
1346
  "learning_rate": 6.25e-05,
1347
- "loss": 0.0464,
1348
  "step": 950
1349
  },
1350
  {
1351
  "epoch": 3.82,
1352
- "grad_norm": 1.0617127418518066,
1353
  "learning_rate": 6.25e-05,
1354
- "loss": 0.0477,
1355
  "step": 955
1356
  },
1357
  {
1358
  "epoch": 3.84,
1359
- "grad_norm": 0.689552903175354,
1360
  "learning_rate": 6.25e-05,
1361
- "loss": 0.0462,
1362
  "step": 960
1363
  },
1364
  {
1365
  "epoch": 3.86,
1366
- "grad_norm": 0.7258830666542053,
1367
  "learning_rate": 6.25e-05,
1368
- "loss": 0.0543,
1369
  "step": 965
1370
  },
1371
  {
1372
  "epoch": 3.88,
1373
- "grad_norm": 0.9028825759887695,
1374
  "learning_rate": 6.25e-05,
1375
- "loss": 0.0561,
1376
  "step": 970
1377
  },
1378
  {
1379
  "epoch": 3.9,
1380
- "grad_norm": 1.1875150203704834,
1381
  "learning_rate": 6.25e-05,
1382
- "loss": 0.0477,
1383
  "step": 975
1384
  },
1385
  {
1386
  "epoch": 3.92,
1387
- "grad_norm": 1.2121100425720215,
1388
  "learning_rate": 6.25e-05,
1389
- "loss": 0.0541,
1390
  "step": 980
1391
  },
1392
  {
1393
  "epoch": 3.94,
1394
- "grad_norm": 0.7245278358459473,
1395
  "learning_rate": 6.25e-05,
1396
- "loss": 0.0499,
1397
  "step": 985
1398
  },
1399
  {
1400
  "epoch": 3.96,
1401
- "grad_norm": 0.7678513526916504,
1402
  "learning_rate": 6.25e-05,
1403
- "loss": 0.0548,
1404
  "step": 990
1405
  },
1406
  {
1407
  "epoch": 3.98,
1408
- "grad_norm": 0.621068000793457,
1409
  "learning_rate": 6.25e-05,
1410
- "loss": 0.0413,
1411
  "step": 995
1412
  },
1413
  {
1414
  "epoch": 4.0,
1415
- "grad_norm": 0.6947305798530579,
1416
  "learning_rate": 6.25e-05,
1417
- "loss": 0.0576,
1418
  "step": 1000
1419
  },
1420
  {
1421
  "epoch": 4.0,
1422
- "eval_cer": 0.04981744137059402,
1423
- "eval_loss": 0.06959603726863861,
1424
- "eval_runtime": 490.1374,
1425
- "eval_samples_per_second": 2.04,
1426
- "eval_steps_per_second": 0.51,
1427
  "step": 1000
1428
  },
1429
  {
1430
  "epoch": 4.02,
1431
- "grad_norm": 0.480295330286026,
1432
  "learning_rate": 6.25e-05,
1433
  "loss": 0.0432,
1434
  "step": 1005
1435
  },
1436
  {
1437
  "epoch": 4.04,
1438
- "grad_norm": 0.5454816818237305,
1439
  "learning_rate": 6.25e-05,
1440
- "loss": 0.0466,
1441
  "step": 1010
1442
  },
1443
  {
1444
  "epoch": 4.06,
1445
- "grad_norm": 0.5476812124252319,
1446
  "learning_rate": 6.25e-05,
1447
- "loss": 0.0426,
1448
  "step": 1015
1449
  },
1450
  {
1451
  "epoch": 4.08,
1452
- "grad_norm": 0.6141966581344604,
1453
  "learning_rate": 6.25e-05,
1454
- "loss": 0.0362,
1455
  "step": 1020
1456
  },
1457
  {
1458
  "epoch": 4.1,
1459
- "grad_norm": 0.5896350741386414,
1460
  "learning_rate": 6.25e-05,
1461
- "loss": 0.0402,
1462
  "step": 1025
1463
  },
1464
  {
1465
  "epoch": 4.12,
1466
- "grad_norm": 0.6800291538238525,
1467
  "learning_rate": 6.25e-05,
1468
- "loss": 0.0408,
1469
  "step": 1030
1470
  },
1471
  {
1472
  "epoch": 4.14,
1473
- "grad_norm": 1.0093313455581665,
1474
  "learning_rate": 6.25e-05,
1475
- "loss": 0.0412,
1476
  "step": 1035
1477
  },
1478
  {
1479
  "epoch": 4.16,
1480
- "grad_norm": 0.627837061882019,
1481
  "learning_rate": 6.25e-05,
1482
- "loss": 0.0417,
1483
  "step": 1040
1484
  },
1485
  {
1486
  "epoch": 4.18,
1487
- "grad_norm": 0.8183801770210266,
1488
  "learning_rate": 6.25e-05,
1489
- "loss": 0.0438,
1490
  "step": 1045
1491
  },
1492
  {
1493
  "epoch": 4.2,
1494
- "grad_norm": 0.6498594284057617,
1495
  "learning_rate": 6.25e-05,
1496
- "loss": 0.0482,
1497
  "step": 1050
1498
  },
1499
  {
1500
  "epoch": 4.22,
1501
- "grad_norm": 0.7509708404541016,
1502
  "learning_rate": 6.25e-05,
1503
- "loss": 0.0495,
1504
  "step": 1055
1505
  },
1506
  {
1507
  "epoch": 4.24,
1508
- "grad_norm": 0.6019679307937622,
1509
  "learning_rate": 6.25e-05,
1510
- "loss": 0.0389,
1511
  "step": 1060
1512
  },
1513
  {
1514
  "epoch": 4.26,
1515
- "grad_norm": 0.8582165837287903,
1516
  "learning_rate": 6.25e-05,
1517
- "loss": 0.0479,
1518
  "step": 1065
1519
  },
1520
  {
1521
  "epoch": 4.28,
1522
- "grad_norm": 0.5960012674331665,
1523
  "learning_rate": 6.25e-05,
1524
- "loss": 0.0461,
1525
  "step": 1070
1526
  },
1527
  {
1528
  "epoch": 4.3,
1529
- "grad_norm": 0.8688340783119202,
1530
  "learning_rate": 6.25e-05,
1531
- "loss": 0.0452,
1532
  "step": 1075
1533
  },
1534
  {
1535
  "epoch": 4.32,
1536
- "grad_norm": 0.924772322177887,
1537
  "learning_rate": 6.25e-05,
1538
- "loss": 0.0501,
1539
  "step": 1080
1540
  },
1541
  {
1542
  "epoch": 4.34,
1543
- "grad_norm": 0.6622461676597595,
1544
  "learning_rate": 6.25e-05,
1545
- "loss": 0.0409,
1546
  "step": 1085
1547
  },
1548
  {
1549
  "epoch": 4.36,
1550
- "grad_norm": 0.4645944833755493,
1551
  "learning_rate": 6.25e-05,
1552
- "loss": 0.0435,
1553
  "step": 1090
1554
  },
1555
  {
1556
  "epoch": 4.38,
1557
- "grad_norm": 0.6435367465019226,
1558
  "learning_rate": 6.25e-05,
1559
- "loss": 0.0422,
1560
  "step": 1095
1561
  },
1562
  {
1563
  "epoch": 4.4,
1564
- "grad_norm": 0.785290539264679,
1565
  "learning_rate": 6.25e-05,
1566
- "loss": 0.0521,
1567
  "step": 1100
1568
  },
1569
  {
1570
  "epoch": 4.42,
1571
- "grad_norm": 0.5324105024337769,
1572
  "learning_rate": 6.25e-05,
1573
- "loss": 0.038,
1574
  "step": 1105
1575
  },
1576
  {
1577
  "epoch": 4.44,
1578
- "grad_norm": 0.7105346322059631,
1579
  "learning_rate": 6.25e-05,
1580
- "loss": 0.0472,
1581
  "step": 1110
1582
  },
1583
  {
1584
  "epoch": 4.46,
1585
- "grad_norm": 0.5921624898910522,
1586
  "learning_rate": 6.25e-05,
1587
- "loss": 0.0384,
1588
  "step": 1115
1589
  },
1590
  {
1591
  "epoch": 4.48,
1592
- "grad_norm": 0.6056894659996033,
1593
  "learning_rate": 6.25e-05,
1594
- "loss": 0.0473,
1595
  "step": 1120
1596
  },
1597
  {
1598
  "epoch": 4.5,
1599
- "grad_norm": 0.6180445551872253,
1600
  "learning_rate": 6.25e-05,
1601
- "loss": 0.0535,
1602
  "step": 1125
1603
  },
1604
  {
1605
  "epoch": 4.52,
1606
- "grad_norm": 0.7500606775283813,
1607
  "learning_rate": 6.25e-05,
1608
- "loss": 0.0471,
1609
  "step": 1130
1610
  },
1611
  {
1612
  "epoch": 4.54,
1613
- "grad_norm": 1.4782558679580688,
1614
  "learning_rate": 6.25e-05,
1615
- "loss": 0.0498,
1616
  "step": 1135
1617
  },
1618
  {
1619
  "epoch": 4.5600000000000005,
1620
- "grad_norm": 0.7095569372177124,
1621
  "learning_rate": 6.25e-05,
1622
- "loss": 0.0451,
1623
  "step": 1140
1624
  },
1625
  {
1626
  "epoch": 4.58,
1627
- "grad_norm": 0.5338032841682434,
1628
  "learning_rate": 6.25e-05,
1629
- "loss": 0.0428,
1630
  "step": 1145
1631
  },
1632
  {
1633
  "epoch": 4.6,
1634
- "grad_norm": 0.6758018732070923,
1635
  "learning_rate": 6.25e-05,
1636
- "loss": 0.044,
1637
  "step": 1150
1638
  },
1639
  {
1640
  "epoch": 4.62,
1641
- "grad_norm": 0.6550755500793457,
1642
  "learning_rate": 6.25e-05,
1643
- "loss": 0.0455,
1644
  "step": 1155
1645
  },
1646
  {
1647
  "epoch": 4.64,
1648
- "grad_norm": 1.0559263229370117,
1649
  "learning_rate": 6.25e-05,
1650
- "loss": 0.0474,
1651
  "step": 1160
1652
  },
1653
  {
1654
  "epoch": 4.66,
1655
- "grad_norm": 0.6216355562210083,
1656
  "learning_rate": 6.25e-05,
1657
- "loss": 0.0401,
1658
  "step": 1165
1659
  },
1660
  {
1661
  "epoch": 4.68,
1662
- "grad_norm": 0.756000816822052,
1663
  "learning_rate": 6.25e-05,
1664
- "loss": 0.0492,
1665
  "step": 1170
1666
  },
1667
  {
1668
  "epoch": 4.7,
1669
- "grad_norm": 0.6948612928390503,
1670
  "learning_rate": 6.25e-05,
1671
- "loss": 0.0409,
1672
  "step": 1175
1673
  },
1674
  {
1675
  "epoch": 4.72,
1676
- "grad_norm": 0.705374002456665,
1677
  "learning_rate": 6.25e-05,
1678
- "loss": 0.0433,
1679
  "step": 1180
1680
  },
1681
  {
1682
  "epoch": 4.74,
1683
- "grad_norm": 0.683259129524231,
1684
  "learning_rate": 6.25e-05,
1685
- "loss": 0.0475,
1686
  "step": 1185
1687
  },
1688
  {
1689
  "epoch": 4.76,
1690
- "grad_norm": 0.7598377466201782,
1691
  "learning_rate": 6.25e-05,
1692
- "loss": 0.0394,
1693
  "step": 1190
1694
  },
1695
  {
1696
  "epoch": 4.78,
1697
- "grad_norm": 0.6855999231338501,
1698
  "learning_rate": 6.25e-05,
1699
- "loss": 0.0414,
1700
  "step": 1195
1701
  },
1702
  {
1703
  "epoch": 4.8,
1704
- "grad_norm": 0.7347081899642944,
1705
  "learning_rate": 6.25e-05,
1706
- "loss": 0.0429,
1707
  "step": 1200
1708
  },
1709
  {
1710
  "epoch": 4.82,
1711
- "grad_norm": 0.5922674536705017,
1712
  "learning_rate": 6.25e-05,
1713
- "loss": 0.04,
1714
  "step": 1205
1715
  },
1716
  {
1717
  "epoch": 4.84,
1718
- "grad_norm": 0.51841801404953,
1719
  "learning_rate": 6.25e-05,
1720
- "loss": 0.0415,
1721
  "step": 1210
1722
  },
1723
  {
1724
  "epoch": 4.86,
1725
- "grad_norm": 0.6993411183357239,
1726
  "learning_rate": 6.25e-05,
1727
- "loss": 0.0394,
1728
  "step": 1215
1729
  },
1730
  {
1731
  "epoch": 4.88,
1732
- "grad_norm": 0.8511929512023926,
1733
  "learning_rate": 6.25e-05,
1734
- "loss": 0.0484,
1735
  "step": 1220
1736
  },
1737
  {
1738
  "epoch": 4.9,
1739
- "grad_norm": 0.8586690425872803,
1740
  "learning_rate": 6.25e-05,
1741
- "loss": 0.0443,
1742
  "step": 1225
1743
  },
1744
  {
1745
  "epoch": 4.92,
1746
- "grad_norm": 0.49296078085899353,
1747
  "learning_rate": 6.25e-05,
1748
- "loss": 0.0494,
1749
  "step": 1230
1750
  },
1751
  {
1752
  "epoch": 4.9399999999999995,
1753
- "grad_norm": 0.6447588205337524,
1754
  "learning_rate": 6.25e-05,
1755
- "loss": 0.0474,
1756
  "step": 1235
1757
  },
1758
  {
1759
  "epoch": 4.96,
1760
- "grad_norm": 1.132325530052185,
1761
  "learning_rate": 6.25e-05,
1762
- "loss": 0.0489,
1763
  "step": 1240
1764
  },
1765
  {
1766
  "epoch": 4.98,
1767
- "grad_norm": 0.682565450668335,
1768
  "learning_rate": 6.25e-05,
1769
- "loss": 0.0523,
1770
  "step": 1245
1771
  },
1772
  {
1773
  "epoch": 5.0,
1774
- "grad_norm": 0.5855417847633362,
1775
  "learning_rate": 6.25e-05,
1776
- "loss": 0.0456,
 
 
 
 
 
 
 
 
 
1777
  "step": 1250
1778
  },
1779
  {
1780
  "epoch": 5.02,
1781
- "grad_norm": 0.7072991728782654,
1782
  "learning_rate": 6.25e-05,
1783
- "loss": 0.0381,
1784
  "step": 1255
1785
  },
1786
  {
1787
  "epoch": 5.04,
1788
- "grad_norm": 0.8943975567817688,
1789
  "learning_rate": 6.25e-05,
1790
- "loss": 0.0457,
1791
  "step": 1260
1792
  },
1793
  {
1794
  "epoch": 5.06,
1795
- "grad_norm": 0.7641831040382385,
1796
  "learning_rate": 6.25e-05,
1797
- "loss": 0.0408,
1798
  "step": 1265
1799
  },
1800
  {
1801
  "epoch": 5.08,
1802
- "grad_norm": 0.8705000281333923,
1803
  "learning_rate": 6.25e-05,
1804
- "loss": 0.0417,
1805
  "step": 1270
1806
  },
1807
  {
1808
  "epoch": 5.1,
1809
- "grad_norm": 0.6074972748756409,
1810
  "learning_rate": 6.25e-05,
1811
- "loss": 0.0363,
1812
  "step": 1275
1813
  },
1814
  {
1815
  "epoch": 5.12,
1816
- "grad_norm": 0.6037366390228271,
1817
  "learning_rate": 6.25e-05,
1818
- "loss": 0.0328,
1819
  "step": 1280
1820
  },
1821
  {
1822
  "epoch": 5.14,
1823
- "grad_norm": 0.5875853896141052,
1824
  "learning_rate": 6.25e-05,
1825
- "loss": 0.042,
1826
  "step": 1285
1827
  },
1828
  {
1829
  "epoch": 5.16,
1830
- "grad_norm": 0.6076292991638184,
1831
  "learning_rate": 6.25e-05,
1832
- "loss": 0.0394,
1833
  "step": 1290
1834
  },
1835
  {
1836
  "epoch": 5.18,
1837
- "grad_norm": 0.6495656967163086,
1838
  "learning_rate": 6.25e-05,
1839
- "loss": 0.0379,
1840
  "step": 1295
1841
  },
1842
  {
1843
  "epoch": 5.2,
1844
- "grad_norm": 0.7505232095718384,
1845
  "learning_rate": 6.25e-05,
1846
- "loss": 0.0396,
1847
  "step": 1300
1848
  },
1849
  {
1850
  "epoch": 5.22,
1851
- "grad_norm": 0.792845606803894,
1852
  "learning_rate": 6.25e-05,
1853
- "loss": 0.0352,
1854
  "step": 1305
1855
  },
1856
  {
1857
  "epoch": 5.24,
1858
- "grad_norm": 0.7115448117256165,
1859
  "learning_rate": 6.25e-05,
1860
- "loss": 0.0358,
1861
  "step": 1310
1862
  },
1863
  {
1864
  "epoch": 5.26,
1865
- "grad_norm": 0.9783137440681458,
1866
  "learning_rate": 6.25e-05,
1867
- "loss": 0.0375,
1868
  "step": 1315
1869
  },
1870
  {
1871
  "epoch": 5.28,
1872
- "grad_norm": 0.43996962904930115,
1873
  "learning_rate": 6.25e-05,
1874
- "loss": 0.0304,
1875
  "step": 1320
1876
  },
1877
  {
1878
  "epoch": 5.3,
1879
- "grad_norm": 0.5700478553771973,
1880
  "learning_rate": 6.25e-05,
1881
- "loss": 0.0407,
1882
  "step": 1325
1883
  },
1884
  {
1885
  "epoch": 5.32,
1886
- "grad_norm": 0.7441264390945435,
1887
  "learning_rate": 6.25e-05,
1888
- "loss": 0.0428,
1889
  "step": 1330
1890
  },
1891
  {
1892
  "epoch": 5.34,
1893
- "grad_norm": 0.5275558829307556,
1894
  "learning_rate": 6.25e-05,
1895
- "loss": 0.0403,
1896
  "step": 1335
1897
  },
1898
  {
1899
  "epoch": 5.36,
1900
- "grad_norm": 0.7380858063697815,
1901
  "learning_rate": 6.25e-05,
1902
- "loss": 0.0353,
1903
  "step": 1340
1904
  },
1905
  {
1906
  "epoch": 5.38,
1907
- "grad_norm": 0.5178393721580505,
1908
  "learning_rate": 6.25e-05,
1909
- "loss": 0.0384,
1910
  "step": 1345
1911
  },
1912
  {
1913
  "epoch": 5.4,
1914
- "grad_norm": 0.7126160264015198,
1915
  "learning_rate": 6.25e-05,
1916
- "loss": 0.0356,
1917
  "step": 1350
1918
  },
1919
  {
1920
  "epoch": 5.42,
1921
- "grad_norm": 0.48968952894210815,
1922
  "learning_rate": 6.25e-05,
1923
- "loss": 0.0346,
1924
  "step": 1355
1925
  },
1926
  {
1927
  "epoch": 5.44,
1928
- "grad_norm": 0.6505790948867798,
1929
  "learning_rate": 6.25e-05,
1930
- "loss": 0.0362,
1931
  "step": 1360
1932
  },
1933
  {
1934
  "epoch": 5.46,
1935
- "grad_norm": 0.7864866852760315,
1936
  "learning_rate": 6.25e-05,
1937
- "loss": 0.0332,
1938
  "step": 1365
1939
  },
1940
  {
1941
  "epoch": 5.48,
1942
- "grad_norm": 1.0162925720214844,
1943
  "learning_rate": 6.25e-05,
1944
- "loss": 0.0514,
1945
  "step": 1370
1946
  },
1947
  {
1948
  "epoch": 5.5,
1949
- "grad_norm": 0.5575432181358337,
1950
  "learning_rate": 6.25e-05,
1951
- "loss": 0.037,
1952
  "step": 1375
1953
  },
1954
  {
1955
  "epoch": 5.52,
1956
- "grad_norm": 0.5482836365699768,
1957
  "learning_rate": 6.25e-05,
1958
- "loss": 0.0379,
1959
  "step": 1380
1960
  },
1961
  {
1962
  "epoch": 5.54,
1963
- "grad_norm": 0.5818304419517517,
1964
  "learning_rate": 6.25e-05,
1965
- "loss": 0.0367,
1966
  "step": 1385
1967
  },
1968
  {
1969
  "epoch": 5.5600000000000005,
1970
- "grad_norm": 0.7295159697532654,
1971
  "learning_rate": 6.25e-05,
1972
- "loss": 0.0506,
1973
  "step": 1390
1974
  },
1975
  {
1976
  "epoch": 5.58,
1977
- "grad_norm": 0.68539959192276,
1978
  "learning_rate": 6.25e-05,
1979
- "loss": 0.0393,
1980
  "step": 1395
1981
  },
1982
  {
1983
  "epoch": 5.6,
1984
- "grad_norm": 0.736714243888855,
1985
  "learning_rate": 6.25e-05,
1986
- "loss": 0.0301,
1987
  "step": 1400
1988
  },
1989
  {
1990
  "epoch": 5.62,
1991
- "grad_norm": 0.49973201751708984,
1992
  "learning_rate": 6.25e-05,
1993
- "loss": 0.0347,
1994
  "step": 1405
1995
  },
1996
  {
1997
  "epoch": 5.64,
1998
- "grad_norm": 0.35752153396606445,
1999
  "learning_rate": 6.25e-05,
2000
- "loss": 0.036,
2001
  "step": 1410
2002
  },
2003
  {
2004
  "epoch": 5.66,
2005
- "grad_norm": 0.4985082447528839,
2006
  "learning_rate": 6.25e-05,
2007
- "loss": 0.0404,
2008
  "step": 1415
2009
  },
2010
  {
2011
  "epoch": 5.68,
2012
- "grad_norm": 0.5711241364479065,
2013
  "learning_rate": 6.25e-05,
2014
- "loss": 0.041,
2015
  "step": 1420
2016
  },
2017
  {
2018
  "epoch": 5.7,
2019
- "grad_norm": 0.8970484137535095,
2020
  "learning_rate": 6.25e-05,
2021
- "loss": 0.0343,
2022
  "step": 1425
2023
  },
2024
  {
2025
  "epoch": 5.72,
2026
- "grad_norm": 0.46022269129753113,
2027
  "learning_rate": 6.25e-05,
2028
- "loss": 0.035,
2029
  "step": 1430
2030
  },
2031
  {
2032
  "epoch": 5.74,
2033
- "grad_norm": 0.8625376224517822,
2034
  "learning_rate": 6.25e-05,
2035
- "loss": 0.0426,
2036
  "step": 1435
2037
  },
2038
  {
2039
  "epoch": 5.76,
2040
- "grad_norm": 0.7475701570510864,
2041
  "learning_rate": 6.25e-05,
2042
- "loss": 0.042,
2043
  "step": 1440
2044
  },
2045
  {
2046
  "epoch": 5.78,
2047
- "grad_norm": 0.5920872688293457,
2048
  "learning_rate": 6.25e-05,
2049
- "loss": 0.0369,
2050
  "step": 1445
2051
  },
2052
  {
2053
  "epoch": 5.8,
2054
- "grad_norm": 0.41128072142601013,
2055
  "learning_rate": 6.25e-05,
2056
- "loss": 0.0327,
2057
  "step": 1450
2058
  },
2059
  {
2060
  "epoch": 5.82,
2061
- "grad_norm": 0.5794464945793152,
2062
  "learning_rate": 6.25e-05,
2063
  "loss": 0.0414,
2064
  "step": 1455
2065
  },
2066
  {
2067
  "epoch": 5.84,
2068
- "grad_norm": 0.6729599833488464,
2069
  "learning_rate": 6.25e-05,
2070
- "loss": 0.0423,
2071
  "step": 1460
2072
  },
2073
  {
2074
  "epoch": 5.86,
2075
- "grad_norm": 0.9547153115272522,
2076
  "learning_rate": 6.25e-05,
2077
- "loss": 0.0402,
2078
  "step": 1465
2079
  },
2080
  {
2081
  "epoch": 5.88,
2082
- "grad_norm": 0.7813019752502441,
2083
  "learning_rate": 6.25e-05,
2084
- "loss": 0.0388,
2085
  "step": 1470
2086
  },
2087
  {
2088
  "epoch": 5.9,
2089
- "grad_norm": 0.6496958136558533,
2090
  "learning_rate": 6.25e-05,
2091
- "loss": 0.0375,
2092
  "step": 1475
2093
  },
2094
  {
2095
  "epoch": 5.92,
2096
- "grad_norm": 0.5207449793815613,
2097
  "learning_rate": 6.25e-05,
2098
- "loss": 0.0326,
2099
  "step": 1480
2100
  },
2101
  {
2102
  "epoch": 5.9399999999999995,
2103
- "grad_norm": 0.5855777263641357,
2104
  "learning_rate": 6.25e-05,
2105
- "loss": 0.0415,
2106
  "step": 1485
2107
  },
2108
  {
2109
  "epoch": 5.96,
2110
- "grad_norm": 0.505793035030365,
2111
  "learning_rate": 6.25e-05,
2112
- "loss": 0.0365,
2113
  "step": 1490
2114
  },
2115
  {
2116
  "epoch": 5.98,
2117
- "grad_norm": 0.7970417737960815,
2118
  "learning_rate": 6.25e-05,
2119
- "loss": 0.0426,
2120
  "step": 1495
2121
  },
2122
  {
2123
  "epoch": 6.0,
2124
- "grad_norm": 0.6662250757217407,
2125
  "learning_rate": 6.25e-05,
2126
- "loss": 0.0381,
2127
  "step": 1500
2128
  },
2129
  {
2130
  "epoch": 6.0,
2131
- "eval_cer": 0.07167181575621402,
2132
- "eval_loss": 0.0693308636546135,
2133
- "eval_runtime": 496.5135,
2134
- "eval_samples_per_second": 2.014,
2135
- "eval_steps_per_second": 0.504,
2136
  "step": 1500
2137
  },
2138
  {
2139
  "epoch": 6.02,
2140
- "grad_norm": 0.4985259473323822,
2141
  "learning_rate": 6.25e-05,
2142
- "loss": 0.0319,
2143
  "step": 1505
2144
  },
2145
  {
2146
  "epoch": 6.04,
2147
- "grad_norm": 0.49594366550445557,
2148
  "learning_rate": 6.25e-05,
2149
- "loss": 0.0293,
2150
  "step": 1510
2151
  },
2152
  {
2153
  "epoch": 6.06,
2154
- "grad_norm": 0.570300817489624,
2155
  "learning_rate": 6.25e-05,
2156
  "loss": 0.0314,
2157
  "step": 1515
2158
  },
2159
  {
2160
  "epoch": 6.08,
2161
- "grad_norm": 0.772651731967926,
2162
  "learning_rate": 6.25e-05,
2163
- "loss": 0.0301,
2164
  "step": 1520
2165
  },
2166
  {
2167
  "epoch": 6.1,
2168
- "grad_norm": 0.5414010882377625,
2169
  "learning_rate": 6.25e-05,
2170
- "loss": 0.0306,
2171
  "step": 1525
2172
  },
2173
  {
2174
  "epoch": 6.12,
2175
- "grad_norm": 0.7267195582389832,
2176
  "learning_rate": 6.25e-05,
2177
- "loss": 0.0285,
2178
  "step": 1530
2179
  },
2180
  {
2181
  "epoch": 6.14,
2182
- "grad_norm": 0.4645964205265045,
2183
  "learning_rate": 6.25e-05,
2184
- "loss": 0.0276,
2185
  "step": 1535
2186
  },
2187
  {
2188
  "epoch": 6.16,
2189
- "grad_norm": 0.5566327571868896,
2190
  "learning_rate": 6.25e-05,
2191
- "loss": 0.0318,
2192
  "step": 1540
2193
  },
2194
  {
2195
  "epoch": 6.18,
2196
- "grad_norm": 0.6852025389671326,
2197
  "learning_rate": 6.25e-05,
2198
- "loss": 0.0344,
2199
  "step": 1545
2200
  },
2201
  {
2202
  "epoch": 6.2,
2203
- "grad_norm": 0.6733210682868958,
2204
  "learning_rate": 6.25e-05,
2205
- "loss": 0.0363,
2206
  "step": 1550
2207
  },
2208
  {
2209
  "epoch": 6.22,
2210
- "grad_norm": 0.9218971133232117,
2211
  "learning_rate": 6.25e-05,
2212
- "loss": 0.0378,
2213
  "step": 1555
2214
  },
2215
  {
2216
  "epoch": 6.24,
2217
- "grad_norm": 0.9552198648452759,
2218
  "learning_rate": 6.25e-05,
2219
- "loss": 0.0336,
2220
  "step": 1560
2221
  },
2222
  {
2223
  "epoch": 6.26,
2224
- "grad_norm": 0.8390860557556152,
2225
  "learning_rate": 6.25e-05,
2226
- "loss": 0.0388,
2227
  "step": 1565
2228
  },
2229
  {
2230
  "epoch": 6.28,
2231
- "grad_norm": 0.5966670513153076,
2232
  "learning_rate": 6.25e-05,
2233
- "loss": 0.0347,
2234
  "step": 1570
2235
  },
2236
  {
2237
  "epoch": 6.3,
2238
- "grad_norm": 1.0041142702102661,
2239
  "learning_rate": 6.25e-05,
2240
- "loss": 0.0358,
2241
  "step": 1575
2242
  },
2243
  {
2244
  "epoch": 6.32,
2245
- "grad_norm": 0.8135663270950317,
2246
  "learning_rate": 6.25e-05,
2247
- "loss": 0.037,
2248
  "step": 1580
2249
  },
2250
  {
2251
  "epoch": 6.34,
2252
- "grad_norm": 0.5268656611442566,
2253
  "learning_rate": 6.25e-05,
2254
- "loss": 0.0306,
2255
  "step": 1585
2256
  },
2257
  {
2258
  "epoch": 6.36,
2259
- "grad_norm": 0.6037728190422058,
2260
  "learning_rate": 6.25e-05,
2261
- "loss": 0.0364,
2262
  "step": 1590
2263
  },
2264
  {
2265
  "epoch": 6.38,
2266
- "grad_norm": 0.8840550184249878,
2267
  "learning_rate": 6.25e-05,
2268
- "loss": 0.0342,
2269
  "step": 1595
2270
  },
2271
  {
2272
  "epoch": 6.4,
2273
- "grad_norm": 0.8145713210105896,
2274
  "learning_rate": 6.25e-05,
2275
- "loss": 0.0306,
2276
  "step": 1600
2277
  },
2278
  {
2279
  "epoch": 6.42,
2280
- "grad_norm": 0.705767810344696,
2281
  "learning_rate": 6.25e-05,
2282
- "loss": 0.0405,
2283
  "step": 1605
2284
  },
2285
  {
2286
  "epoch": 6.44,
2287
- "grad_norm": 0.6563853621482849,
2288
  "learning_rate": 6.25e-05,
2289
- "loss": 0.0326,
2290
  "step": 1610
2291
  },
2292
  {
2293
  "epoch": 6.46,
2294
- "grad_norm": 0.619769275188446,
2295
  "learning_rate": 6.25e-05,
2296
- "loss": 0.0344,
2297
  "step": 1615
2298
  },
2299
  {
2300
  "epoch": 6.48,
2301
- "grad_norm": 0.983218252658844,
2302
  "learning_rate": 6.25e-05,
2303
- "loss": 0.0379,
2304
  "step": 1620
2305
  },
2306
  {
2307
  "epoch": 6.5,
2308
- "grad_norm": 0.6717543601989746,
2309
  "learning_rate": 6.25e-05,
2310
- "loss": 0.0326,
2311
  "step": 1625
2312
  },
2313
  {
2314
  "epoch": 6.52,
2315
- "grad_norm": 0.5207704901695251,
2316
  "learning_rate": 6.25e-05,
2317
- "loss": 0.0368,
2318
  "step": 1630
2319
  },
2320
  {
2321
  "epoch": 6.54,
2322
- "grad_norm": 0.5211384892463684,
2323
  "learning_rate": 6.25e-05,
2324
- "loss": 0.042,
2325
  "step": 1635
2326
  },
2327
  {
2328
  "epoch": 6.5600000000000005,
2329
- "grad_norm": 0.7764495015144348,
2330
  "learning_rate": 6.25e-05,
2331
- "loss": 0.0364,
2332
  "step": 1640
2333
  },
2334
  {
2335
  "epoch": 6.58,
2336
- "grad_norm": 0.599217414855957,
2337
  "learning_rate": 6.25e-05,
2338
- "loss": 0.0385,
2339
  "step": 1645
2340
  },
2341
  {
2342
  "epoch": 6.6,
2343
- "grad_norm": 0.7842808365821838,
2344
  "learning_rate": 6.25e-05,
2345
- "loss": 0.03,
2346
  "step": 1650
2347
  },
2348
  {
2349
  "epoch": 6.62,
2350
- "grad_norm": 0.4464091360569,
2351
  "learning_rate": 6.25e-05,
2352
- "loss": 0.0297,
2353
  "step": 1655
2354
  },
2355
  {
2356
  "epoch": 6.64,
2357
- "grad_norm": 0.5299388766288757,
2358
  "learning_rate": 6.25e-05,
2359
- "loss": 0.0288,
2360
  "step": 1660
2361
  },
2362
  {
2363
  "epoch": 6.66,
2364
- "grad_norm": 0.5369200706481934,
2365
  "learning_rate": 6.25e-05,
2366
- "loss": 0.0384,
2367
  "step": 1665
2368
  },
2369
  {
2370
  "epoch": 6.68,
2371
- "grad_norm": 0.4734521806240082,
2372
  "learning_rate": 6.25e-05,
2373
- "loss": 0.0277,
2374
  "step": 1670
2375
  },
2376
  {
2377
  "epoch": 6.7,
2378
- "grad_norm": 0.4279540479183197,
2379
  "learning_rate": 6.25e-05,
2380
- "loss": 0.0374,
2381
  "step": 1675
2382
  },
2383
  {
2384
  "epoch": 6.72,
2385
- "grad_norm": 0.6549651026725769,
2386
  "learning_rate": 6.25e-05,
2387
- "loss": 0.0383,
2388
  "step": 1680
2389
  },
2390
  {
2391
  "epoch": 6.74,
2392
- "grad_norm": 0.5408864617347717,
2393
  "learning_rate": 6.25e-05,
2394
- "loss": 0.0323,
2395
  "step": 1685
2396
  },
2397
  {
2398
  "epoch": 6.76,
2399
- "grad_norm": 0.45350977778434753,
2400
  "learning_rate": 6.25e-05,
2401
- "loss": 0.0312,
2402
  "step": 1690
2403
  },
2404
  {
2405
  "epoch": 6.78,
2406
- "grad_norm": 0.5761439204216003,
2407
  "learning_rate": 6.25e-05,
2408
- "loss": 0.0335,
2409
  "step": 1695
2410
  },
2411
  {
2412
  "epoch": 6.8,
2413
- "grad_norm": 0.6389983892440796,
2414
  "learning_rate": 6.25e-05,
2415
- "loss": 0.0342,
2416
  "step": 1700
2417
  },
2418
  {
2419
  "epoch": 6.82,
2420
- "grad_norm": 0.7613130807876587,
2421
  "learning_rate": 6.25e-05,
2422
- "loss": 0.0313,
2423
  "step": 1705
2424
  },
2425
  {
2426
  "epoch": 6.84,
2427
- "grad_norm": 0.6593891978263855,
2428
  "learning_rate": 6.25e-05,
2429
- "loss": 0.0292,
2430
  "step": 1710
2431
  },
2432
  {
2433
  "epoch": 6.86,
2434
- "grad_norm": 0.762394905090332,
2435
  "learning_rate": 6.25e-05,
2436
- "loss": 0.0371,
2437
  "step": 1715
2438
  },
2439
  {
2440
  "epoch": 6.88,
2441
- "grad_norm": 0.3643024265766144,
2442
  "learning_rate": 6.25e-05,
2443
- "loss": 0.028,
2444
  "step": 1720
2445
  },
2446
  {
2447
  "epoch": 6.9,
2448
- "grad_norm": 0.5596332550048828,
2449
  "learning_rate": 6.25e-05,
2450
- "loss": 0.0375,
2451
  "step": 1725
2452
  },
2453
  {
2454
  "epoch": 6.92,
2455
- "grad_norm": 0.8684321045875549,
2456
  "learning_rate": 6.25e-05,
2457
  "loss": 0.0374,
2458
  "step": 1730
2459
  },
2460
  {
2461
  "epoch": 6.9399999999999995,
2462
- "grad_norm": 0.6257331371307373,
2463
  "learning_rate": 6.25e-05,
2464
- "loss": 0.0385,
2465
  "step": 1735
2466
  },
2467
  {
2468
  "epoch": 6.96,
2469
- "grad_norm": 0.3411272168159485,
2470
  "learning_rate": 6.25e-05,
2471
- "loss": 0.0321,
2472
  "step": 1740
2473
  },
2474
  {
2475
  "epoch": 6.98,
2476
- "grad_norm": 0.5448806285858154,
2477
  "learning_rate": 6.25e-05,
2478
- "loss": 0.0404,
2479
  "step": 1745
2480
  },
2481
  {
2482
  "epoch": 7.0,
2483
- "grad_norm": 0.6229183077812195,
2484
  "learning_rate": 6.25e-05,
2485
- "loss": 0.034,
 
 
 
 
 
 
 
 
 
2486
  "step": 1750
2487
  },
2488
  {
2489
  "epoch": 7.02,
2490
- "grad_norm": 0.5916796326637268,
2491
  "learning_rate": 6.25e-05,
2492
- "loss": 0.0263,
2493
  "step": 1755
2494
  },
2495
  {
2496
  "epoch": 7.04,
2497
- "grad_norm": 0.40633058547973633,
2498
  "learning_rate": 6.25e-05,
2499
- "loss": 0.0281,
2500
  "step": 1760
2501
  },
2502
  {
2503
  "epoch": 7.06,
2504
- "grad_norm": 0.5697972178459167,
2505
  "learning_rate": 6.25e-05,
2506
- "loss": 0.036,
2507
  "step": 1765
2508
  },
2509
  {
2510
  "epoch": 7.08,
2511
- "grad_norm": 0.7487204670906067,
2512
  "learning_rate": 6.25e-05,
2513
- "loss": 0.0285,
2514
  "step": 1770
2515
  },
2516
  {
2517
  "epoch": 7.1,
2518
- "grad_norm": 0.4468456506729126,
2519
  "learning_rate": 6.25e-05,
2520
- "loss": 0.0272,
2521
  "step": 1775
2522
  },
2523
  {
2524
  "epoch": 7.12,
2525
- "grad_norm": 0.5434139370918274,
2526
  "learning_rate": 6.25e-05,
2527
- "loss": 0.0264,
2528
  "step": 1780
2529
  },
2530
  {
2531
  "epoch": 7.14,
2532
- "grad_norm": 0.5725220441818237,
2533
  "learning_rate": 6.25e-05,
2534
- "loss": 0.0257,
2535
  "step": 1785
2536
  },
2537
  {
2538
  "epoch": 7.16,
2539
- "grad_norm": 0.6501635313034058,
2540
  "learning_rate": 6.25e-05,
2541
- "loss": 0.0267,
2542
  "step": 1790
2543
  },
2544
  {
2545
  "epoch": 7.18,
2546
- "grad_norm": 0.5863832235336304,
2547
  "learning_rate": 6.25e-05,
2548
- "loss": 0.0347,
2549
  "step": 1795
2550
  },
2551
  {
2552
  "epoch": 7.2,
2553
- "grad_norm": 0.3922972083091736,
2554
  "learning_rate": 6.25e-05,
2555
- "loss": 0.029,
2556
  "step": 1800
2557
  },
2558
  {
2559
  "epoch": 7.22,
2560
- "grad_norm": 0.6584299802780151,
2561
  "learning_rate": 6.25e-05,
2562
- "loss": 0.0299,
2563
  "step": 1805
2564
  },
2565
  {
2566
  "epoch": 7.24,
2567
- "grad_norm": 0.49968084692955017,
2568
  "learning_rate": 6.25e-05,
2569
  "loss": 0.0329,
2570
  "step": 1810
2571
  },
2572
  {
2573
  "epoch": 7.26,
2574
- "grad_norm": 0.6583006978034973,
2575
  "learning_rate": 6.25e-05,
2576
- "loss": 0.0287,
2577
  "step": 1815
2578
  },
2579
  {
2580
  "epoch": 7.28,
2581
- "grad_norm": 0.43482792377471924,
2582
  "learning_rate": 6.25e-05,
2583
- "loss": 0.0266,
2584
  "step": 1820
2585
  },
2586
  {
2587
  "epoch": 7.3,
2588
- "grad_norm": 0.3462773561477661,
2589
  "learning_rate": 6.25e-05,
2590
- "loss": 0.0305,
2591
  "step": 1825
2592
  },
2593
  {
2594
  "epoch": 7.32,
2595
- "grad_norm": 0.45115697383880615,
2596
  "learning_rate": 6.25e-05,
2597
- "loss": 0.0288,
2598
  "step": 1830
2599
  },
2600
  {
2601
  "epoch": 7.34,
2602
- "grad_norm": 0.6678561568260193,
2603
  "learning_rate": 6.25e-05,
2604
  "loss": 0.0311,
2605
  "step": 1835
2606
  },
2607
  {
2608
  "epoch": 7.36,
2609
- "grad_norm": 0.5643042922019958,
2610
  "learning_rate": 6.25e-05,
2611
- "loss": 0.0258,
2612
  "step": 1840
2613
  },
2614
  {
2615
  "epoch": 7.38,
2616
- "grad_norm": 0.611005425453186,
2617
  "learning_rate": 6.25e-05,
2618
- "loss": 0.0287,
2619
  "step": 1845
2620
  },
2621
  {
2622
  "epoch": 7.4,
2623
- "grad_norm": 0.49410074949264526,
2624
  "learning_rate": 6.25e-05,
2625
- "loss": 0.0218,
2626
  "step": 1850
2627
  },
2628
  {
2629
  "epoch": 7.42,
2630
- "grad_norm": 0.6302474141120911,
2631
  "learning_rate": 6.25e-05,
2632
- "loss": 0.03,
2633
  "step": 1855
2634
  },
2635
  {
2636
  "epoch": 7.44,
2637
- "grad_norm": 0.4754486382007599,
2638
  "learning_rate": 6.25e-05,
2639
- "loss": 0.0286,
2640
  "step": 1860
2641
  },
2642
  {
2643
  "epoch": 7.46,
2644
- "grad_norm": 0.4861878454685211,
2645
  "learning_rate": 6.25e-05,
2646
- "loss": 0.0317,
2647
  "step": 1865
2648
  },
2649
  {
2650
  "epoch": 7.48,
2651
- "grad_norm": 0.7281424403190613,
2652
  "learning_rate": 6.25e-05,
2653
- "loss": 0.0312,
2654
  "step": 1870
2655
  },
2656
  {
2657
  "epoch": 7.5,
2658
- "grad_norm": 0.6567294597625732,
2659
  "learning_rate": 6.25e-05,
2660
  "loss": 0.027,
2661
  "step": 1875
2662
  },
2663
  {
2664
  "epoch": 7.52,
2665
- "grad_norm": 0.5121676325798035,
2666
  "learning_rate": 6.25e-05,
2667
- "loss": 0.0321,
2668
  "step": 1880
2669
  },
2670
  {
2671
  "epoch": 7.54,
2672
- "grad_norm": 0.4087779223918915,
2673
  "learning_rate": 6.25e-05,
2674
- "loss": 0.03,
2675
  "step": 1885
2676
  },
2677
  {
2678
  "epoch": 7.5600000000000005,
2679
- "grad_norm": 0.7357320785522461,
2680
  "learning_rate": 6.25e-05,
2681
- "loss": 0.0294,
2682
  "step": 1890
2683
  },
2684
  {
2685
  "epoch": 7.58,
2686
- "grad_norm": 0.6601704359054565,
2687
  "learning_rate": 6.25e-05,
2688
- "loss": 0.0262,
2689
  "step": 1895
2690
  },
2691
  {
2692
  "epoch": 7.6,
2693
- "grad_norm": 0.8189958930015564,
2694
  "learning_rate": 6.25e-05,
2695
- "loss": 0.031,
2696
  "step": 1900
2697
  },
2698
  {
2699
  "epoch": 7.62,
2700
- "grad_norm": 0.6652852892875671,
2701
  "learning_rate": 6.25e-05,
2702
- "loss": 0.0291,
2703
  "step": 1905
2704
  },
2705
  {
2706
  "epoch": 7.64,
2707
- "grad_norm": 0.6241147518157959,
2708
  "learning_rate": 6.25e-05,
2709
- "loss": 0.029,
2710
  "step": 1910
2711
  },
2712
  {
2713
  "epoch": 7.66,
2714
- "grad_norm": 0.4204230010509491,
2715
  "learning_rate": 6.25e-05,
2716
  "loss": 0.0288,
2717
  "step": 1915
2718
  },
2719
  {
2720
  "epoch": 7.68,
2721
- "grad_norm": 0.5172626972198486,
2722
  "learning_rate": 6.25e-05,
2723
- "loss": 0.0284,
2724
  "step": 1920
2725
  },
2726
  {
2727
  "epoch": 7.7,
2728
- "grad_norm": 0.48357152938842773,
2729
  "learning_rate": 6.25e-05,
2730
- "loss": 0.0262,
2731
  "step": 1925
2732
  },
2733
  {
2734
  "epoch": 7.72,
2735
- "grad_norm": 0.5994135737419128,
2736
  "learning_rate": 6.25e-05,
2737
- "loss": 0.0337,
2738
  "step": 1930
2739
  },
2740
  {
2741
  "epoch": 7.74,
2742
- "grad_norm": 0.5427165627479553,
2743
  "learning_rate": 6.25e-05,
2744
- "loss": 0.0332,
2745
  "step": 1935
2746
  },
2747
  {
2748
  "epoch": 7.76,
2749
- "grad_norm": 0.623006284236908,
2750
  "learning_rate": 6.25e-05,
2751
- "loss": 0.0309,
2752
  "step": 1940
2753
  },
2754
  {
2755
  "epoch": 7.78,
2756
- "grad_norm": 0.7233545184135437,
2757
  "learning_rate": 6.25e-05,
2758
- "loss": 0.0253,
2759
  "step": 1945
2760
  },
2761
  {
2762
  "epoch": 7.8,
2763
- "grad_norm": 0.6122242212295532,
2764
  "learning_rate": 6.25e-05,
2765
- "loss": 0.0321,
2766
  "step": 1950
2767
  },
2768
  {
2769
  "epoch": 7.82,
2770
- "grad_norm": 0.6996561884880066,
2771
  "learning_rate": 6.25e-05,
2772
- "loss": 0.025,
2773
  "step": 1955
2774
  },
2775
  {
2776
  "epoch": 7.84,
2777
- "grad_norm": 0.5072382688522339,
2778
  "learning_rate": 6.25e-05,
2779
- "loss": 0.0277,
2780
  "step": 1960
2781
  },
2782
  {
2783
  "epoch": 7.86,
2784
- "grad_norm": 0.6833447217941284,
2785
  "learning_rate": 6.25e-05,
2786
- "loss": 0.029,
2787
  "step": 1965
2788
  },
2789
  {
2790
  "epoch": 7.88,
2791
- "grad_norm": 0.4576677978038788,
2792
  "learning_rate": 6.25e-05,
2793
- "loss": 0.0306,
2794
  "step": 1970
2795
  },
2796
  {
2797
  "epoch": 7.9,
2798
- "grad_norm": 0.3679646849632263,
2799
  "learning_rate": 6.25e-05,
2800
- "loss": 0.0281,
2801
  "step": 1975
2802
  },
2803
  {
2804
  "epoch": 7.92,
2805
- "grad_norm": 0.6050195097923279,
2806
  "learning_rate": 6.25e-05,
2807
- "loss": 0.0292,
2808
  "step": 1980
2809
  },
2810
  {
2811
  "epoch": 7.9399999999999995,
2812
- "grad_norm": 0.5424927473068237,
2813
  "learning_rate": 6.25e-05,
2814
- "loss": 0.027,
2815
  "step": 1985
2816
  },
2817
  {
2818
  "epoch": 7.96,
2819
- "grad_norm": 1.0631431341171265,
2820
  "learning_rate": 6.25e-05,
2821
- "loss": 0.0336,
2822
  "step": 1990
2823
  },
2824
  {
2825
  "epoch": 7.98,
2826
- "grad_norm": 0.48741209506988525,
2827
  "learning_rate": 6.25e-05,
2828
- "loss": 0.0311,
2829
  "step": 1995
2830
  },
2831
  {
2832
  "epoch": 8.0,
2833
- "grad_norm": 0.5710260272026062,
2834
  "learning_rate": 6.25e-05,
2835
- "loss": 0.029,
2836
  "step": 2000
2837
  },
2838
  {
2839
  "epoch": 8.0,
2840
- "eval_cer": 0.13070495716893696,
2841
- "eval_loss": 0.07547412067651749,
2842
- "eval_runtime": 495.3218,
2843
- "eval_samples_per_second": 2.019,
2844
- "eval_steps_per_second": 0.505,
2845
  "step": 2000
2846
  }
2847
  ],
@@ -2849,7 +2885,7 @@
2849
  "max_steps": 5000,
2850
  "num_input_tokens_seen": 0,
2851
  "num_train_epochs": 20,
2852
- "save_steps": 500,
2853
  "stateful_callbacks": {
2854
  "TrainerControl": {
2855
  "args": {
 
1
  {
2
+ "best_metric": 0.046749928297655986,
3
  "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000",
4
  "epoch": 8.0,
5
+ "eval_steps": 250,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
+ "grad_norm": 11.320270538330078,
14
+ "learning_rate": 1.25e-06,
15
+ "loss": 3.7364,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.04,
20
+ "grad_norm": 10.51279354095459,
21
+ "learning_rate": 2.8124999999999998e-06,
22
+ "loss": 3.669,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
+ "grad_norm": 10.274462699890137,
28
+ "learning_rate": 4.3750000000000005e-06,
29
+ "loss": 3.6416,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.08,
34
+ "grad_norm": 10.032905578613281,
35
+ "learning_rate": 5.9375e-06,
36
+ "loss": 3.5981,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.1,
41
+ "grad_norm": 9.448946952819824,
42
+ "learning_rate": 7.1875e-06,
43
+ "loss": 3.4937,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.12,
48
+ "grad_norm": 8.844466209411621,
49
+ "learning_rate": 8.750000000000001e-06,
50
+ "loss": 3.3145,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.14,
55
+ "grad_norm": 8.622856140136719,
56
+ "learning_rate": 1.03125e-05,
57
+ "loss": 3.0978,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.16,
62
+ "grad_norm": 6.81293249130249,
63
+ "learning_rate": 1.1875e-05,
64
+ "loss": 2.9583,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.18,
69
+ "grad_norm": 5.21970272064209,
70
+ "learning_rate": 1.34375e-05,
71
+ "loss": 2.7668,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2,
76
+ "grad_norm": 4.440727710723877,
77
+ "learning_rate": 1.5e-05,
78
+ "loss": 2.5467,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.22,
83
+ "grad_norm": 4.219883918762207,
84
+ "learning_rate": 1.6562500000000003e-05,
85
+ "loss": 2.3237,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.24,
90
+ "grad_norm": 3.6006925106048584,
91
+ "learning_rate": 1.8125e-05,
92
+ "loss": 2.1307,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.26,
97
+ "grad_norm": 3.07859206199646,
98
+ "learning_rate": 1.96875e-05,
99
+ "loss": 1.9725,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.28,
104
+ "grad_norm": 3.0515847206115723,
105
+ "learning_rate": 2.125e-05,
106
+ "loss": 1.8568,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.3,
111
+ "grad_norm": 2.7134475708007812,
112
+ "learning_rate": 2.28125e-05,
113
+ "loss": 1.6583,
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.32,
118
+ "grad_norm": 2.7643449306488037,
119
+ "learning_rate": 2.4375000000000003e-05,
120
+ "loss": 1.4433,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.34,
125
+ "grad_norm": 2.427220582962036,
126
+ "learning_rate": 2.59375e-05,
127
+ "loss": 1.3435,
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.36,
132
+ "grad_norm": 2.0288472175598145,
133
+ "learning_rate": 2.75e-05,
134
+ "loss": 1.2446,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.38,
139
+ "grad_norm": 2.7967100143432617,
140
+ "learning_rate": 2.90625e-05,
141
+ "loss": 1.1031,
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.4,
146
+ "grad_norm": 2.407944440841675,
147
+ "learning_rate": 3.0625e-05,
148
+ "loss": 0.9777,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.42,
153
+ "grad_norm": 3.4412190914154053,
154
+ "learning_rate": 3.21875e-05,
155
+ "loss": 0.9294,
156
  "step": 105
157
  },
158
  {
159
  "epoch": 0.44,
160
+ "grad_norm": 2.0439155101776123,
161
+ "learning_rate": 3.375e-05,
162
+ "loss": 0.8108,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.46,
167
+ "grad_norm": 1.616652011871338,
168
+ "learning_rate": 3.53125e-05,
169
+ "loss": 0.6951,
170
  "step": 115
171
  },
172
  {
173
  "epoch": 0.48,
174
+ "grad_norm": 1.690824031829834,
175
+ "learning_rate": 3.6875e-05,
176
+ "loss": 0.6352,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.5,
181
+ "grad_norm": 2.7902116775512695,
182
+ "learning_rate": 3.84375e-05,
183
+ "loss": 0.5562,
184
  "step": 125
185
  },
186
  {
187
  "epoch": 0.52,
188
+ "grad_norm": 2.4872801303863525,
189
+ "learning_rate": 4e-05,
190
+ "loss": 0.5111,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.54,
195
+ "grad_norm": 2.4177122116088867,
196
+ "learning_rate": 4.15625e-05,
197
+ "loss": 0.4696,
198
  "step": 135
199
  },
200
  {
201
  "epoch": 0.56,
202
+ "grad_norm": 2.0231056213378906,
203
+ "learning_rate": 4.3125e-05,
204
+ "loss": 0.4462,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.58,
209
+ "grad_norm": 2.004688024520874,
210
+ "learning_rate": 4.46875e-05,
211
+ "loss": 0.4224,
212
  "step": 145
213
  },
214
  {
215
  "epoch": 0.6,
216
+ "grad_norm": 3.170652389526367,
217
+ "learning_rate": 4.625e-05,
218
+ "loss": 0.3967,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.62,
223
+ "grad_norm": 1.8620476722717285,
224
+ "learning_rate": 4.7812500000000003e-05,
225
+ "loss": 0.3739,
226
  "step": 155
227
  },
228
  {
229
  "epoch": 0.64,
230
+ "grad_norm": 2.4667856693267822,
231
+ "learning_rate": 4.9375e-05,
232
+ "loss": 0.3542,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.66,
237
+ "grad_norm": 1.7935612201690674,
238
+ "learning_rate": 5.09375e-05,
239
+ "loss": 0.3409,
240
  "step": 165
241
  },
242
  {
243
  "epoch": 0.68,
244
+ "grad_norm": 1.6230987310409546,
245
+ "learning_rate": 5.25e-05,
246
+ "loss": 0.3068,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.7,
251
+ "grad_norm": 2.739957094192505,
252
+ "learning_rate": 5.40625e-05,
253
+ "loss": 0.2963,
254
  "step": 175
255
  },
256
  {
257
  "epoch": 0.72,
258
+ "grad_norm": 1.7342944145202637,
259
+ "learning_rate": 5.5625000000000004e-05,
260
+ "loss": 0.253,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.74,
265
+ "grad_norm": 2.0191333293914795,
266
+ "learning_rate": 5.71875e-05,
267
+ "loss": 0.2175,
268
  "step": 185
269
  },
270
  {
271
  "epoch": 0.76,
272
+ "grad_norm": 1.6039254665374756,
273
+ "learning_rate": 5.875e-05,
274
+ "loss": 0.2009,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.78,
279
+ "grad_norm": 2.2860054969787598,
280
+ "learning_rate": 6.03125e-05,
281
+ "loss": 0.1774,
282
  "step": 195
283
  },
284
  {
285
  "epoch": 0.8,
286
+ "grad_norm": 1.528680443763733,
287
+ "learning_rate": 6.1875e-05,
288
+ "loss": 0.1603,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.82,
293
+ "grad_norm": 1.526693344116211,
294
+ "learning_rate": 6.25e-05,
295
+ "loss": 0.1504,
296
  "step": 205
297
  },
298
  {
299
  "epoch": 0.84,
300
+ "grad_norm": 2.199506998062134,
301
+ "learning_rate": 6.25e-05,
302
+ "loss": 0.1357,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.86,
307
+ "grad_norm": 2.170020341873169,
308
+ "learning_rate": 6.25e-05,
309
+ "loss": 0.1519,
310
  "step": 215
311
  },
312
  {
313
  "epoch": 0.88,
314
+ "grad_norm": 1.5418131351470947,
315
+ "learning_rate": 6.25e-05,
316
+ "loss": 0.1524,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.9,
321
+ "grad_norm": 2.1583192348480225,
322
+ "learning_rate": 6.25e-05,
323
+ "loss": 0.1264,
324
  "step": 225
325
  },
326
  {
327
  "epoch": 0.92,
328
+ "grad_norm": 1.908937692642212,
329
+ "learning_rate": 6.25e-05,
330
+ "loss": 0.1221,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.94,
335
+ "grad_norm": 1.4072145223617554,
336
+ "learning_rate": 6.25e-05,
337
+ "loss": 0.1254,
338
  "step": 235
339
  },
340
  {
341
  "epoch": 0.96,
342
+ "grad_norm": 1.3102571964263916,
343
+ "learning_rate": 6.25e-05,
344
+ "loss": 0.1412,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.98,
349
+ "grad_norm": 1.4941678047180176,
350
+ "learning_rate": 6.25e-05,
351
+ "loss": 0.1203,
352
  "step": 245
353
  },
354
  {
355
  "epoch": 1.0,
356
+ "grad_norm": 1.8279727697372437,
357
+ "learning_rate": 6.25e-05,
358
+ "loss": 0.1107,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "eval_cer": 0.07099835736448257,
364
+ "eval_loss": 0.11958163231611252,
365
+ "eval_runtime": 280.2233,
366
+ "eval_samples_per_second": 1.784,
367
+ "eval_steps_per_second": 0.446,
368
  "step": 250
369
  },
370
  {
371
  "epoch": 1.02,
372
+ "grad_norm": 1.0286716222763062,
373
+ "learning_rate": 6.25e-05,
374
+ "loss": 0.1047,
375
  "step": 255
376
  },
377
  {
378
  "epoch": 1.04,
379
+ "grad_norm": 1.7464964389801025,
380
+ "learning_rate": 6.25e-05,
381
+ "loss": 0.1108,
382
  "step": 260
383
  },
384
  {
385
  "epoch": 1.06,
386
+ "grad_norm": 1.3992992639541626,
387
+ "learning_rate": 6.25e-05,
388
+ "loss": 0.1176,
389
  "step": 265
390
  },
391
  {
392
  "epoch": 1.08,
393
+ "grad_norm": 1.150010347366333,
394
+ "learning_rate": 6.25e-05,
395
+ "loss": 0.1059,
396
  "step": 270
397
  },
398
  {
399
  "epoch": 1.1,
400
+ "grad_norm": 1.983775019645691,
401
+ "learning_rate": 6.25e-05,
402
+ "loss": 0.1223,
403
  "step": 275
404
  },
405
  {
406
  "epoch": 1.12,
407
+ "grad_norm": 1.0895100831985474,
408
+ "learning_rate": 6.25e-05,
409
+ "loss": 0.0929,
410
  "step": 280
411
  },
412
  {
413
  "epoch": 1.1400000000000001,
414
+ "grad_norm": 1.631362795829773,
415
+ "learning_rate": 6.25e-05,
416
+ "loss": 0.1005,
417
  "step": 285
418
  },
419
  {
420
  "epoch": 1.16,
421
+ "grad_norm": 1.39686918258667,
422
+ "learning_rate": 6.25e-05,
423
+ "loss": 0.1022,
424
  "step": 290
425
  },
426
  {
427
  "epoch": 1.18,
428
+ "grad_norm": 1.2856369018554688,
429
+ "learning_rate": 6.25e-05,
430
+ "loss": 0.0908,
431
  "step": 295
432
  },
433
  {
434
  "epoch": 1.2,
435
+ "grad_norm": 1.2866718769073486,
436
+ "learning_rate": 6.25e-05,
437
+ "loss": 0.103,
438
  "step": 300
439
  },
440
  {
441
  "epoch": 1.22,
442
+ "grad_norm": 1.1115745306015015,
443
  "learning_rate": 6.25e-05,
444
+ "loss": 0.0934,
445
  "step": 305
446
  },
447
  {
448
  "epoch": 1.24,
449
+ "grad_norm": 1.2674397230148315,
450
  "learning_rate": 6.25e-05,
451
+ "loss": 0.0807,
452
  "step": 310
453
  },
454
  {
455
  "epoch": 1.26,
456
+ "grad_norm": 3.003493547439575,
457
  "learning_rate": 6.25e-05,
458
+ "loss": 0.1163,
459
  "step": 315
460
  },
461
  {
462
  "epoch": 1.28,
463
+ "grad_norm": 1.3472819328308105,
464
  "learning_rate": 6.25e-05,
465
+ "loss": 0.0939,
466
  "step": 320
467
  },
468
  {
469
  "epoch": 1.3,
470
+ "grad_norm": 1.576393485069275,
471
  "learning_rate": 6.25e-05,
472
+ "loss": 0.1023,
473
  "step": 325
474
  },
475
  {
476
  "epoch": 1.32,
477
+ "grad_norm": 1.2895311117172241,
478
  "learning_rate": 6.25e-05,
479
+ "loss": 0.0908,
480
  "step": 330
481
  },
482
  {
483
  "epoch": 1.34,
484
+ "grad_norm": 1.7044769525527954,
485
  "learning_rate": 6.25e-05,
486
+ "loss": 0.0849,
487
  "step": 335
488
  },
489
  {
490
  "epoch": 1.3599999999999999,
491
+ "grad_norm": 1.316157341003418,
492
  "learning_rate": 6.25e-05,
493
+ "loss": 0.0865,
494
  "step": 340
495
  },
496
  {
497
  "epoch": 1.38,
498
+ "grad_norm": 1.4973046779632568,
499
  "learning_rate": 6.25e-05,
500
+ "loss": 0.0864,
501
  "step": 345
502
  },
503
  {
504
  "epoch": 1.4,
505
+ "grad_norm": 0.7230541706085205,
506
  "learning_rate": 6.25e-05,
507
+ "loss": 0.074,
508
  "step": 350
509
  },
510
  {
511
  "epoch": 1.42,
512
+ "grad_norm": 1.026584267616272,
513
  "learning_rate": 6.25e-05,
514
+ "loss": 0.0788,
515
  "step": 355
516
  },
517
  {
518
  "epoch": 1.44,
519
+ "grad_norm": 1.3976407051086426,
520
  "learning_rate": 6.25e-05,
521
+ "loss": 0.0862,
522
  "step": 360
523
  },
524
  {
525
  "epoch": 1.46,
526
+ "grad_norm": 1.3054964542388916,
527
  "learning_rate": 6.25e-05,
528
+ "loss": 0.0757,
529
  "step": 365
530
  },
531
  {
532
  "epoch": 1.48,
533
+ "grad_norm": 1.8163508176803589,
534
  "learning_rate": 6.25e-05,
535
+ "loss": 0.0822,
536
  "step": 370
537
  },
538
  {
539
  "epoch": 1.5,
540
+ "grad_norm": 1.23239004611969,
541
  "learning_rate": 6.25e-05,
542
+ "loss": 0.0886,
543
  "step": 375
544
  },
545
  {
546
  "epoch": 1.52,
547
+ "grad_norm": 1.1320103406906128,
548
  "learning_rate": 6.25e-05,
549
+ "loss": 0.0834,
550
  "step": 380
551
  },
552
  {
553
  "epoch": 1.54,
554
+ "grad_norm": 1.8913005590438843,
555
  "learning_rate": 6.25e-05,
556
+ "loss": 0.0917,
557
  "step": 385
558
  },
559
  {
560
  "epoch": 1.56,
561
+ "grad_norm": 1.3522365093231201,
562
  "learning_rate": 6.25e-05,
563
+ "loss": 0.0927,
564
  "step": 390
565
  },
566
  {
567
  "epoch": 1.58,
568
+ "grad_norm": 1.3687407970428467,
569
  "learning_rate": 6.25e-05,
570
+ "loss": 0.0701,
571
  "step": 395
572
  },
573
  {
574
  "epoch": 1.6,
575
+ "grad_norm": 1.6905425786972046,
576
  "learning_rate": 6.25e-05,
577
+ "loss": 0.0914,
578
  "step": 400
579
  },
580
  {
581
  "epoch": 1.62,
582
+ "grad_norm": 1.3366554975509644,
583
  "learning_rate": 6.25e-05,
584
+ "loss": 0.0833,
585
  "step": 405
586
  },
587
  {
588
  "epoch": 1.6400000000000001,
589
+ "grad_norm": 1.0540807247161865,
590
  "learning_rate": 6.25e-05,
591
+ "loss": 0.088,
592
  "step": 410
593
  },
594
  {
595
  "epoch": 1.6600000000000001,
596
+ "grad_norm": 1.0234986543655396,
597
  "learning_rate": 6.25e-05,
598
+ "loss": 0.075,
599
  "step": 415
600
  },
601
  {
602
  "epoch": 1.6800000000000002,
603
+ "grad_norm": 1.3205113410949707,
604
  "learning_rate": 6.25e-05,
605
+ "loss": 0.0931,
606
  "step": 420
607
  },
608
  {
609
  "epoch": 1.7,
610
+ "grad_norm": 0.8865799307823181,
611
  "learning_rate": 6.25e-05,
612
+ "loss": 0.0831,
613
  "step": 425
614
  },
615
  {
616
  "epoch": 1.72,
617
+ "grad_norm": 1.2646653652191162,
618
  "learning_rate": 6.25e-05,
619
+ "loss": 0.0825,
620
  "step": 430
621
  },
622
  {
623
  "epoch": 1.74,
624
+ "grad_norm": 0.9991198182106018,
625
  "learning_rate": 6.25e-05,
626
+ "loss": 0.0825,
627
  "step": 435
628
  },
629
  {
630
  "epoch": 1.76,
631
+ "grad_norm": 0.8784312605857849,
632
  "learning_rate": 6.25e-05,
633
+ "loss": 0.082,
634
  "step": 440
635
  },
636
  {
637
  "epoch": 1.78,
638
+ "grad_norm": 1.304877519607544,
639
  "learning_rate": 6.25e-05,
640
+ "loss": 0.0775,
641
  "step": 445
642
  },
643
  {
644
  "epoch": 1.8,
645
+ "grad_norm": 1.2007408142089844,
646
  "learning_rate": 6.25e-05,
647
+ "loss": 0.0796,
648
  "step": 450
649
  },
650
  {
651
  "epoch": 1.8199999999999998,
652
+ "grad_norm": 0.9978143572807312,
653
  "learning_rate": 6.25e-05,
654
+ "loss": 0.0794,
655
  "step": 455
656
  },
657
  {
658
  "epoch": 1.8399999999999999,
659
+ "grad_norm": 1.2883387804031372,
660
  "learning_rate": 6.25e-05,
661
+ "loss": 0.0744,
662
  "step": 460
663
  },
664
  {
665
  "epoch": 1.8599999999999999,
666
+ "grad_norm": 0.8542335629463196,
667
  "learning_rate": 6.25e-05,
668
+ "loss": 0.074,
669
  "step": 465
670
  },
671
  {
672
  "epoch": 1.88,
673
+ "grad_norm": 1.0009572505950928,
674
  "learning_rate": 6.25e-05,
675
+ "loss": 0.0823,
676
  "step": 470
677
  },
678
  {
679
  "epoch": 1.9,
680
+ "grad_norm": 1.126528263092041,
681
  "learning_rate": 6.25e-05,
682
+ "loss": 0.0805,
683
  "step": 475
684
  },
685
  {
686
  "epoch": 1.92,
687
+ "grad_norm": 0.9336584210395813,
688
  "learning_rate": 6.25e-05,
689
+ "loss": 0.0722,
690
  "step": 480
691
  },
692
  {
693
  "epoch": 1.94,
694
+ "grad_norm": 1.0387274026870728,
695
  "learning_rate": 6.25e-05,
696
+ "loss": 0.0722,
697
  "step": 485
698
  },
699
  {
700
  "epoch": 1.96,
701
+ "grad_norm": 1.4692296981811523,
702
  "learning_rate": 6.25e-05,
703
+ "loss": 0.071,
704
  "step": 490
705
  },
706
  {
707
  "epoch": 1.98,
708
+ "grad_norm": 0.9859362244606018,
709
  "learning_rate": 6.25e-05,
710
+ "loss": 0.0795,
711
  "step": 495
712
  },
713
  {
714
  "epoch": 2.0,
715
+ "grad_norm": 1.0557219982147217,
716
  "learning_rate": 6.25e-05,
717
+ "loss": 0.0741,
718
  "step": 500
719
  },
720
  {
721
  "epoch": 2.0,
722
+ "eval_cer": 0.05483273798659818,
723
+ "eval_loss": 0.0840950757265091,
724
+ "eval_runtime": 280.5964,
725
+ "eval_samples_per_second": 1.782,
726
+ "eval_steps_per_second": 0.445,
727
  "step": 500
728
  },
729
  {
730
  "epoch": 2.02,
731
+ "grad_norm": 1.1070911884307861,
732
  "learning_rate": 6.25e-05,
733
+ "loss": 0.0552,
734
  "step": 505
735
  },
736
  {
737
  "epoch": 2.04,
738
+ "grad_norm": 0.7629583477973938,
739
  "learning_rate": 6.25e-05,
740
+ "loss": 0.0613,
741
  "step": 510
742
  },
743
  {
744
  "epoch": 2.06,
745
+ "grad_norm": 1.3060976266860962,
746
  "learning_rate": 6.25e-05,
747
+ "loss": 0.0746,
748
  "step": 515
749
  },
750
  {
751
  "epoch": 2.08,
752
+ "grad_norm": 1.067309021949768,
753
  "learning_rate": 6.25e-05,
754
+ "loss": 0.0717,
755
  "step": 520
756
  },
757
  {
758
  "epoch": 2.1,
759
+ "grad_norm": 1.1334558725357056,
760
  "learning_rate": 6.25e-05,
761
+ "loss": 0.0582,
762
  "step": 525
763
  },
764
  {
765
  "epoch": 2.12,
766
+ "grad_norm": 0.8565890192985535,
767
  "learning_rate": 6.25e-05,
768
+ "loss": 0.066,
769
  "step": 530
770
  },
771
  {
772
  "epoch": 2.14,
773
+ "grad_norm": 1.042038083076477,
774
  "learning_rate": 6.25e-05,
775
+ "loss": 0.0686,
776
  "step": 535
777
  },
778
  {
779
  "epoch": 2.16,
780
+ "grad_norm": 0.8787774443626404,
781
  "learning_rate": 6.25e-05,
782
+ "loss": 0.0714,
783
  "step": 540
784
  },
785
  {
786
  "epoch": 2.18,
787
+ "grad_norm": 1.2969956398010254,
788
  "learning_rate": 6.25e-05,
789
+ "loss": 0.069,
790
  "step": 545
791
  },
792
  {
793
  "epoch": 2.2,
794
+ "grad_norm": 1.0625072717666626,
795
  "learning_rate": 6.25e-05,
796
+ "loss": 0.0701,
797
  "step": 550
798
  },
799
  {
800
  "epoch": 2.22,
801
+ "grad_norm": 1.1786212921142578,
802
  "learning_rate": 6.25e-05,
803
+ "loss": 0.0672,
804
  "step": 555
805
  },
806
  {
807
  "epoch": 2.24,
808
+ "grad_norm": 0.7678006887435913,
809
  "learning_rate": 6.25e-05,
810
+ "loss": 0.0637,
811
  "step": 560
812
  },
813
  {
814
  "epoch": 2.26,
815
+ "grad_norm": 1.087916612625122,
816
  "learning_rate": 6.25e-05,
817
+ "loss": 0.0592,
818
  "step": 565
819
  },
820
  {
821
  "epoch": 2.2800000000000002,
822
+ "grad_norm": 1.0709354877471924,
823
  "learning_rate": 6.25e-05,
824
+ "loss": 0.0641,
825
  "step": 570
826
  },
827
  {
828
  "epoch": 2.3,
829
+ "grad_norm": 0.9933990240097046,
830
  "learning_rate": 6.25e-05,
831
+ "loss": 0.0724,
832
  "step": 575
833
  },
834
  {
835
  "epoch": 2.32,
836
+ "grad_norm": 0.9537047147750854,
837
  "learning_rate": 6.25e-05,
838
+ "loss": 0.0565,
839
  "step": 580
840
  },
841
  {
842
  "epoch": 2.34,
843
+ "grad_norm": 0.8913723230361938,
844
  "learning_rate": 6.25e-05,
845
+ "loss": 0.0601,
846
  "step": 585
847
  },
848
  {
849
  "epoch": 2.36,
850
+ "grad_norm": 1.4037823677062988,
851
  "learning_rate": 6.25e-05,
852
+ "loss": 0.0656,
853
  "step": 590
854
  },
855
  {
856
  "epoch": 2.38,
857
+ "grad_norm": 0.8686001896858215,
858
  "learning_rate": 6.25e-05,
859
+ "loss": 0.0617,
860
  "step": 595
861
  },
862
  {
863
  "epoch": 2.4,
864
+ "grad_norm": 1.1040139198303223,
865
  "learning_rate": 6.25e-05,
866
+ "loss": 0.0612,
867
  "step": 600
868
  },
869
  {
870
  "epoch": 2.42,
871
+ "grad_norm": 0.8995397090911865,
872
  "learning_rate": 6.25e-05,
873
+ "loss": 0.0528,
874
  "step": 605
875
  },
876
  {
877
  "epoch": 2.44,
878
+ "grad_norm": 1.0924474000930786,
879
  "learning_rate": 6.25e-05,
880
+ "loss": 0.0551,
881
  "step": 610
882
  },
883
  {
884
  "epoch": 2.46,
885
+ "grad_norm": 1.0748484134674072,
886
  "learning_rate": 6.25e-05,
887
+ "loss": 0.0573,
888
  "step": 615
889
  },
890
  {
891
  "epoch": 2.48,
892
+ "grad_norm": 0.8827953338623047,
893
  "learning_rate": 6.25e-05,
894
+ "loss": 0.0595,
895
  "step": 620
896
  },
897
  {
898
  "epoch": 2.5,
899
+ "grad_norm": 0.8614113926887512,
900
  "learning_rate": 6.25e-05,
901
+ "loss": 0.0694,
902
  "step": 625
903
  },
904
  {
905
  "epoch": 2.52,
906
+ "grad_norm": 0.6579775810241699,
907
  "learning_rate": 6.25e-05,
908
+ "loss": 0.0608,
909
  "step": 630
910
  },
911
  {
912
  "epoch": 2.54,
913
+ "grad_norm": 0.923587441444397,
914
  "learning_rate": 6.25e-05,
915
+ "loss": 0.0684,
916
  "step": 635
917
  },
918
  {
919
  "epoch": 2.56,
920
+ "grad_norm": 1.119313359260559,
921
  "learning_rate": 6.25e-05,
922
+ "loss": 0.0651,
923
  "step": 640
924
  },
925
  {
926
  "epoch": 2.58,
927
+ "grad_norm": 1.1630853414535522,
928
  "learning_rate": 6.25e-05,
929
+ "loss": 0.0773,
930
  "step": 645
931
  },
932
  {
933
  "epoch": 2.6,
934
+ "grad_norm": 0.9517636299133301,
935
  "learning_rate": 6.25e-05,
936
+ "loss": 0.0574,
937
  "step": 650
938
  },
939
  {
940
  "epoch": 2.62,
941
+ "grad_norm": 0.767271101474762,
942
  "learning_rate": 6.25e-05,
943
+ "loss": 0.071,
944
  "step": 655
945
  },
946
  {
947
  "epoch": 2.64,
948
+ "grad_norm": 1.3324207067489624,
949
  "learning_rate": 6.25e-05,
950
+ "loss": 0.0672,
951
  "step": 660
952
  },
953
  {
954
  "epoch": 2.66,
955
+ "grad_norm": 0.8638308048248291,
956
  "learning_rate": 6.25e-05,
957
+ "loss": 0.0602,
958
  "step": 665
959
  },
960
  {
961
  "epoch": 2.68,
962
+ "grad_norm": 0.9522351622581482,
963
  "learning_rate": 6.25e-05,
964
+ "loss": 0.0626,
965
  "step": 670
966
  },
967
  {
968
  "epoch": 2.7,
969
+ "grad_norm": 0.7264077067375183,
970
  "learning_rate": 6.25e-05,
971
+ "loss": 0.0654,
972
  "step": 675
973
  },
974
  {
975
  "epoch": 2.7199999999999998,
976
+ "grad_norm": 1.185275912284851,
977
  "learning_rate": 6.25e-05,
978
+ "loss": 0.0638,
979
  "step": 680
980
  },
981
  {
982
  "epoch": 2.74,
983
+ "grad_norm": 1.549625277519226,
984
  "learning_rate": 6.25e-05,
985
+ "loss": 0.0661,
986
  "step": 685
987
  },
988
  {
989
  "epoch": 2.76,
990
+ "grad_norm": 1.202415108680725,
991
  "learning_rate": 6.25e-05,
992
+ "loss": 0.0709,
993
  "step": 690
994
  },
995
  {
996
  "epoch": 2.7800000000000002,
997
+ "grad_norm": 0.7902194857597351,
998
  "learning_rate": 6.25e-05,
999
+ "loss": 0.0604,
1000
  "step": 695
1001
  },
1002
  {
1003
  "epoch": 2.8,
1004
+ "grad_norm": 1.0128028392791748,
1005
  "learning_rate": 6.25e-05,
1006
+ "loss": 0.0612,
1007
  "step": 700
1008
  },
1009
  {
1010
  "epoch": 2.82,
1011
+ "grad_norm": 0.8418397903442383,
1012
  "learning_rate": 6.25e-05,
1013
+ "loss": 0.0616,
1014
  "step": 705
1015
  },
1016
  {
1017
  "epoch": 2.84,
1018
+ "grad_norm": 0.9352026581764221,
1019
  "learning_rate": 6.25e-05,
1020
+ "loss": 0.0635,
1021
  "step": 710
1022
  },
1023
  {
1024
  "epoch": 2.86,
1025
+ "grad_norm": 0.679918110370636,
1026
  "learning_rate": 6.25e-05,
1027
+ "loss": 0.0588,
1028
  "step": 715
1029
  },
1030
  {
1031
  "epoch": 2.88,
1032
+ "grad_norm": 0.836438000202179,
1033
  "learning_rate": 6.25e-05,
1034
+ "loss": 0.0635,
1035
  "step": 720
1036
  },
1037
  {
1038
  "epoch": 2.9,
1039
+ "grad_norm": 0.7643904089927673,
1040
  "learning_rate": 6.25e-05,
1041
+ "loss": 0.0554,
1042
  "step": 725
1043
  },
1044
  {
1045
  "epoch": 2.92,
1046
+ "grad_norm": 0.9192042946815491,
1047
  "learning_rate": 6.25e-05,
1048
+ "loss": 0.0541,
1049
  "step": 730
1050
  },
1051
  {
1052
  "epoch": 2.94,
1053
+ "grad_norm": 0.9899188280105591,
1054
  "learning_rate": 6.25e-05,
1055
+ "loss": 0.0591,
1056
  "step": 735
1057
  },
1058
  {
1059
  "epoch": 2.96,
1060
+ "grad_norm": 1.112701654434204,
1061
  "learning_rate": 6.25e-05,
1062
+ "loss": 0.0611,
1063
  "step": 740
1064
  },
1065
  {
1066
  "epoch": 2.98,
1067
+ "grad_norm": 0.9096015095710754,
1068
  "learning_rate": 6.25e-05,
1069
+ "loss": 0.0594,
1070
  "step": 745
1071
  },
1072
  {
1073
  "epoch": 3.0,
1074
+ "grad_norm": 1.158527135848999,
1075
  "learning_rate": 6.25e-05,
1076
+ "loss": 0.0703,
1077
+ "step": 750
1078
+ },
1079
+ {
1080
+ "epoch": 3.0,
1081
+ "eval_cer": 0.05350298542486898,
1082
+ "eval_loss": 0.07945344597101212,
1083
+ "eval_runtime": 281.5513,
1084
+ "eval_samples_per_second": 1.776,
1085
+ "eval_steps_per_second": 0.444,
1086
  "step": 750
1087
  },
1088
  {
1089
  "epoch": 3.02,
1090
+ "grad_norm": 0.8544594049453735,
1091
  "learning_rate": 6.25e-05,
1092
+ "loss": 0.0461,
1093
  "step": 755
1094
  },
1095
  {
1096
  "epoch": 3.04,
1097
+ "grad_norm": 0.8411735892295837,
1098
  "learning_rate": 6.25e-05,
1099
+ "loss": 0.0429,
1100
  "step": 760
1101
  },
1102
  {
1103
  "epoch": 3.06,
1104
+ "grad_norm": 0.7515286207199097,
1105
  "learning_rate": 6.25e-05,
1106
+ "loss": 0.0559,
1107
  "step": 765
1108
  },
1109
  {
1110
  "epoch": 3.08,
1111
+ "grad_norm": 0.8125985264778137,
1112
  "learning_rate": 6.25e-05,
1113
+ "loss": 0.044,
1114
  "step": 770
1115
  },
1116
  {
1117
  "epoch": 3.1,
1118
+ "grad_norm": 0.8093322515487671,
1119
  "learning_rate": 6.25e-05,
1120
+ "loss": 0.0529,
1121
  "step": 775
1122
  },
1123
  {
1124
  "epoch": 3.12,
1125
+ "grad_norm": 0.8852378129959106,
1126
  "learning_rate": 6.25e-05,
1127
+ "loss": 0.0508,
1128
  "step": 780
1129
  },
1130
  {
1131
  "epoch": 3.14,
1132
+ "grad_norm": 0.6388903856277466,
1133
  "learning_rate": 6.25e-05,
1134
+ "loss": 0.0491,
1135
  "step": 785
1136
  },
1137
  {
1138
  "epoch": 3.16,
1139
+ "grad_norm": 0.9803158640861511,
1140
  "learning_rate": 6.25e-05,
1141
+ "loss": 0.051,
1142
  "step": 790
1143
  },
1144
  {
1145
  "epoch": 3.18,
1146
+ "grad_norm": 1.163065791130066,
1147
  "learning_rate": 6.25e-05,
1148
+ "loss": 0.0538,
1149
  "step": 795
1150
  },
1151
  {
1152
  "epoch": 3.2,
1153
+ "grad_norm": 0.942138671875,
1154
  "learning_rate": 6.25e-05,
1155
+ "loss": 0.0548,
1156
  "step": 800
1157
  },
1158
  {
1159
  "epoch": 3.22,
1160
+ "grad_norm": 0.763847827911377,
1161
  "learning_rate": 6.25e-05,
1162
+ "loss": 0.0497,
1163
  "step": 805
1164
  },
1165
  {
1166
  "epoch": 3.24,
1167
+ "grad_norm": 1.1041572093963623,
1168
  "learning_rate": 6.25e-05,
1169
+ "loss": 0.0513,
1170
  "step": 810
1171
  },
1172
  {
1173
  "epoch": 3.26,
1174
+ "grad_norm": 0.8744838237762451,
1175
  "learning_rate": 6.25e-05,
1176
+ "loss": 0.0574,
1177
  "step": 815
1178
  },
1179
  {
1180
  "epoch": 3.2800000000000002,
1181
+ "grad_norm": 0.8737279176712036,
1182
  "learning_rate": 6.25e-05,
1183
+ "loss": 0.0485,
1184
  "step": 820
1185
  },
1186
  {
1187
  "epoch": 3.3,
1188
+ "grad_norm": 0.6367043256759644,
1189
  "learning_rate": 6.25e-05,
1190
+ "loss": 0.0462,
1191
  "step": 825
1192
  },
1193
  {
1194
  "epoch": 3.32,
1195
+ "grad_norm": 0.7195335030555725,
1196
  "learning_rate": 6.25e-05,
1197
+ "loss": 0.0529,
1198
  "step": 830
1199
  },
1200
  {
1201
  "epoch": 3.34,
1202
+ "grad_norm": 0.7411594986915588,
1203
  "learning_rate": 6.25e-05,
1204
+ "loss": 0.0558,
1205
  "step": 835
1206
  },
1207
  {
1208
  "epoch": 3.36,
1209
+ "grad_norm": 0.5583875179290771,
1210
  "learning_rate": 6.25e-05,
1211
+ "loss": 0.0498,
1212
  "step": 840
1213
  },
1214
  {
1215
  "epoch": 3.38,
1216
+ "grad_norm": 0.7013912796974182,
1217
  "learning_rate": 6.25e-05,
1218
+ "loss": 0.0465,
1219
  "step": 845
1220
  },
1221
  {
1222
  "epoch": 3.4,
1223
+ "grad_norm": 1.1267294883728027,
1224
  "learning_rate": 6.25e-05,
1225
+ "loss": 0.0505,
1226
  "step": 850
1227
  },
1228
  {
1229
  "epoch": 3.42,
1230
+ "grad_norm": 1.3056484460830688,
1231
  "learning_rate": 6.25e-05,
1232
+ "loss": 0.0515,
1233
  "step": 855
1234
  },
1235
  {
1236
  "epoch": 3.44,
1237
+ "grad_norm": 1.182433843612671,
1238
  "learning_rate": 6.25e-05,
1239
+ "loss": 0.0525,
1240
  "step": 860
1241
  },
1242
  {
1243
  "epoch": 3.46,
1244
+ "grad_norm": 0.8969308733940125,
1245
  "learning_rate": 6.25e-05,
1246
+ "loss": 0.0517,
1247
  "step": 865
1248
  },
1249
  {
1250
  "epoch": 3.48,
1251
+ "grad_norm": 0.7779067158699036,
1252
  "learning_rate": 6.25e-05,
1253
+ "loss": 0.0539,
1254
  "step": 870
1255
  },
1256
  {
1257
  "epoch": 3.5,
1258
+ "grad_norm": 0.591754674911499,
1259
  "learning_rate": 6.25e-05,
1260
+ "loss": 0.0546,
1261
  "step": 875
1262
  },
1263
  {
1264
  "epoch": 3.52,
1265
+ "grad_norm": 0.8097557425498962,
1266
  "learning_rate": 6.25e-05,
1267
+ "loss": 0.0529,
1268
  "step": 880
1269
  },
1270
  {
1271
  "epoch": 3.54,
1272
+ "grad_norm": 0.7054248452186584,
1273
  "learning_rate": 6.25e-05,
1274
+ "loss": 0.0436,
1275
  "step": 885
1276
  },
1277
  {
1278
  "epoch": 3.56,
1279
+ "grad_norm": 0.5832129716873169,
1280
  "learning_rate": 6.25e-05,
1281
+ "loss": 0.048,
1282
  "step": 890
1283
  },
1284
  {
1285
  "epoch": 3.58,
1286
+ "grad_norm": 0.8104725480079651,
1287
  "learning_rate": 6.25e-05,
1288
+ "loss": 0.0503,
1289
  "step": 895
1290
  },
1291
  {
1292
  "epoch": 3.6,
1293
+ "grad_norm": 0.9961804151535034,
1294
  "learning_rate": 6.25e-05,
1295
+ "loss": 0.0565,
1296
  "step": 900
1297
  },
1298
  {
1299
  "epoch": 3.62,
1300
+ "grad_norm": 0.8466907143592834,
1301
  "learning_rate": 6.25e-05,
1302
+ "loss": 0.054,
1303
  "step": 905
1304
  },
1305
  {
1306
  "epoch": 3.64,
1307
+ "grad_norm": 0.8867480158805847,
1308
  "learning_rate": 6.25e-05,
1309
+ "loss": 0.0547,
1310
  "step": 910
1311
  },
1312
  {
1313
  "epoch": 3.66,
1314
+ "grad_norm": 0.9030736684799194,
1315
  "learning_rate": 6.25e-05,
1316
+ "loss": 0.0481,
1317
  "step": 915
1318
  },
1319
  {
1320
  "epoch": 3.68,
1321
+ "grad_norm": 0.6740151643753052,
1322
  "learning_rate": 6.25e-05,
1323
+ "loss": 0.0529,
1324
  "step": 920
1325
  },
1326
  {
1327
  "epoch": 3.7,
1328
+ "grad_norm": 0.653508722782135,
1329
  "learning_rate": 6.25e-05,
1330
+ "loss": 0.0633,
1331
  "step": 925
1332
  },
1333
  {
1334
  "epoch": 3.7199999999999998,
1335
+ "grad_norm": 0.7304302453994751,
1336
  "learning_rate": 6.25e-05,
1337
+ "loss": 0.0493,
1338
  "step": 930
1339
  },
1340
  {
1341
  "epoch": 3.74,
1342
+ "grad_norm": 0.8343582153320312,
1343
  "learning_rate": 6.25e-05,
1344
+ "loss": 0.059,
1345
  "step": 935
1346
  },
1347
  {
1348
  "epoch": 3.76,
1349
+ "grad_norm": 0.8459467887878418,
1350
  "learning_rate": 6.25e-05,
1351
+ "loss": 0.0531,
1352
  "step": 940
1353
  },
1354
  {
1355
  "epoch": 3.7800000000000002,
1356
+ "grad_norm": 0.7470009326934814,
1357
  "learning_rate": 6.25e-05,
1358
+ "loss": 0.0548,
1359
  "step": 945
1360
  },
1361
  {
1362
  "epoch": 3.8,
1363
+ "grad_norm": 0.8183557987213135,
1364
  "learning_rate": 6.25e-05,
1365
+ "loss": 0.0471,
1366
  "step": 950
1367
  },
1368
  {
1369
  "epoch": 3.82,
1370
+ "grad_norm": 0.9448140263557434,
1371
  "learning_rate": 6.25e-05,
1372
+ "loss": 0.045,
1373
  "step": 955
1374
  },
1375
  {
1376
  "epoch": 3.84,
1377
+ "grad_norm": 0.7056401371955872,
1378
  "learning_rate": 6.25e-05,
1379
+ "loss": 0.045,
1380
  "step": 960
1381
  },
1382
  {
1383
  "epoch": 3.86,
1384
+ "grad_norm": 0.7785059213638306,
1385
  "learning_rate": 6.25e-05,
1386
+ "loss": 0.0554,
1387
  "step": 965
1388
  },
1389
  {
1390
  "epoch": 3.88,
1391
+ "grad_norm": 0.8976256251335144,
1392
  "learning_rate": 6.25e-05,
1393
+ "loss": 0.0529,
1394
  "step": 970
1395
  },
1396
  {
1397
  "epoch": 3.9,
1398
+ "grad_norm": 1.0849542617797852,
1399
  "learning_rate": 6.25e-05,
1400
+ "loss": 0.0457,
1401
  "step": 975
1402
  },
1403
  {
1404
  "epoch": 3.92,
1405
+ "grad_norm": 1.1612681150436401,
1406
  "learning_rate": 6.25e-05,
1407
+ "loss": 0.0513,
1408
  "step": 980
1409
  },
1410
  {
1411
  "epoch": 3.94,
1412
+ "grad_norm": 0.6912779211997986,
1413
  "learning_rate": 6.25e-05,
1414
+ "loss": 0.0469,
1415
  "step": 985
1416
  },
1417
  {
1418
  "epoch": 3.96,
1419
+ "grad_norm": 0.7129920125007629,
1420
  "learning_rate": 6.25e-05,
1421
+ "loss": 0.0509,
1422
  "step": 990
1423
  },
1424
  {
1425
  "epoch": 3.98,
1426
+ "grad_norm": 0.6439591646194458,
1427
  "learning_rate": 6.25e-05,
1428
+ "loss": 0.0412,
1429
  "step": 995
1430
  },
1431
  {
1432
  "epoch": 4.0,
1433
+ "grad_norm": 0.7044887542724609,
1434
  "learning_rate": 6.25e-05,
1435
+ "loss": 0.0558,
1436
  "step": 1000
1437
  },
1438
  {
1439
  "epoch": 4.0,
1440
+ "eval_cer": 0.046749928297655986,
1441
+ "eval_loss": 0.07047422975301743,
1442
+ "eval_runtime": 280.6209,
1443
+ "eval_samples_per_second": 1.782,
1444
+ "eval_steps_per_second": 0.445,
1445
  "step": 1000
1446
  },
1447
  {
1448
  "epoch": 4.02,
1449
+ "grad_norm": 0.6291618943214417,
1450
  "learning_rate": 6.25e-05,
1451
  "loss": 0.0432,
1452
  "step": 1005
1453
  },
1454
  {
1455
  "epoch": 4.04,
1456
+ "grad_norm": 0.5485780239105225,
1457
  "learning_rate": 6.25e-05,
1458
+ "loss": 0.0459,
1459
  "step": 1010
1460
  },
1461
  {
1462
  "epoch": 4.06,
1463
+ "grad_norm": 0.5912005305290222,
1464
  "learning_rate": 6.25e-05,
1465
+ "loss": 0.0416,
1466
  "step": 1015
1467
  },
1468
  {
1469
  "epoch": 4.08,
1470
+ "grad_norm": 0.5929523706436157,
1471
  "learning_rate": 6.25e-05,
1472
+ "loss": 0.0358,
1473
  "step": 1020
1474
  },
1475
  {
1476
  "epoch": 4.1,
1477
+ "grad_norm": 0.4929662346839905,
1478
  "learning_rate": 6.25e-05,
1479
+ "loss": 0.0389,
1480
  "step": 1025
1481
  },
1482
  {
1483
  "epoch": 4.12,
1484
+ "grad_norm": 0.6707394123077393,
1485
  "learning_rate": 6.25e-05,
1486
+ "loss": 0.0388,
1487
  "step": 1030
1488
  },
1489
  {
1490
  "epoch": 4.14,
1491
+ "grad_norm": 0.9774329662322998,
1492
  "learning_rate": 6.25e-05,
1493
+ "loss": 0.0401,
1494
  "step": 1035
1495
  },
1496
  {
1497
  "epoch": 4.16,
1498
+ "grad_norm": 0.6821659803390503,
1499
  "learning_rate": 6.25e-05,
1500
+ "loss": 0.0403,
1501
  "step": 1040
1502
  },
1503
  {
1504
  "epoch": 4.18,
1505
+ "grad_norm": 0.796459436416626,
1506
  "learning_rate": 6.25e-05,
1507
+ "loss": 0.0425,
1508
  "step": 1045
1509
  },
1510
  {
1511
  "epoch": 4.2,
1512
+ "grad_norm": 0.6956031918525696,
1513
  "learning_rate": 6.25e-05,
1514
+ "loss": 0.0475,
1515
  "step": 1050
1516
  },
1517
  {
1518
  "epoch": 4.22,
1519
+ "grad_norm": 0.7577043175697327,
1520
  "learning_rate": 6.25e-05,
1521
+ "loss": 0.0483,
1522
  "step": 1055
1523
  },
1524
  {
1525
  "epoch": 4.24,
1526
+ "grad_norm": 0.5384642481803894,
1527
  "learning_rate": 6.25e-05,
1528
+ "loss": 0.0372,
1529
  "step": 1060
1530
  },
1531
  {
1532
  "epoch": 4.26,
1533
+ "grad_norm": 0.791437566280365,
1534
  "learning_rate": 6.25e-05,
1535
+ "loss": 0.0485,
1536
  "step": 1065
1537
  },
1538
  {
1539
  "epoch": 4.28,
1540
+ "grad_norm": 0.5820832252502441,
1541
  "learning_rate": 6.25e-05,
1542
+ "loss": 0.0466,
1543
  "step": 1070
1544
  },
1545
  {
1546
  "epoch": 4.3,
1547
+ "grad_norm": 0.9597232341766357,
1548
  "learning_rate": 6.25e-05,
1549
+ "loss": 0.0437,
1550
  "step": 1075
1551
  },
1552
  {
1553
  "epoch": 4.32,
1554
+ "grad_norm": 0.9876553416252136,
1555
  "learning_rate": 6.25e-05,
1556
+ "loss": 0.05,
1557
  "step": 1080
1558
  },
1559
  {
1560
  "epoch": 4.34,
1561
+ "grad_norm": 0.6902226805686951,
1562
  "learning_rate": 6.25e-05,
1563
+ "loss": 0.0401,
1564
  "step": 1085
1565
  },
1566
  {
1567
  "epoch": 4.36,
1568
+ "grad_norm": 0.5399324893951416,
1569
  "learning_rate": 6.25e-05,
1570
+ "loss": 0.043,
1571
  "step": 1090
1572
  },
1573
  {
1574
  "epoch": 4.38,
1575
+ "grad_norm": 0.7499954700469971,
1576
  "learning_rate": 6.25e-05,
1577
+ "loss": 0.0426,
1578
  "step": 1095
1579
  },
1580
  {
1581
  "epoch": 4.4,
1582
+ "grad_norm": 0.7145591378211975,
1583
  "learning_rate": 6.25e-05,
1584
+ "loss": 0.0503,
1585
  "step": 1100
1586
  },
1587
  {
1588
  "epoch": 4.42,
1589
+ "grad_norm": 0.5746826529502869,
1590
  "learning_rate": 6.25e-05,
1591
+ "loss": 0.0383,
1592
  "step": 1105
1593
  },
1594
  {
1595
  "epoch": 4.44,
1596
+ "grad_norm": 0.7018007040023804,
1597
  "learning_rate": 6.25e-05,
1598
+ "loss": 0.0466,
1599
  "step": 1110
1600
  },
1601
  {
1602
  "epoch": 4.46,
1603
+ "grad_norm": 0.6607512831687927,
1604
  "learning_rate": 6.25e-05,
1605
+ "loss": 0.038,
1606
  "step": 1115
1607
  },
1608
  {
1609
  "epoch": 4.48,
1610
+ "grad_norm": 0.5863096714019775,
1611
  "learning_rate": 6.25e-05,
1612
+ "loss": 0.0462,
1613
  "step": 1120
1614
  },
1615
  {
1616
  "epoch": 4.5,
1617
+ "grad_norm": 0.674934983253479,
1618
  "learning_rate": 6.25e-05,
1619
+ "loss": 0.0523,
1620
  "step": 1125
1621
  },
1622
  {
1623
  "epoch": 4.52,
1624
+ "grad_norm": 0.7824676036834717,
1625
  "learning_rate": 6.25e-05,
1626
+ "loss": 0.0467,
1627
  "step": 1130
1628
  },
1629
  {
1630
  "epoch": 4.54,
1631
+ "grad_norm": 1.4591455459594727,
1632
  "learning_rate": 6.25e-05,
1633
+ "loss": 0.0485,
1634
  "step": 1135
1635
  },
1636
  {
1637
  "epoch": 4.5600000000000005,
1638
+ "grad_norm": 0.6413418650627136,
1639
  "learning_rate": 6.25e-05,
1640
+ "loss": 0.0435,
1641
  "step": 1140
1642
  },
1643
  {
1644
  "epoch": 4.58,
1645
+ "grad_norm": 0.5044887065887451,
1646
  "learning_rate": 6.25e-05,
1647
+ "loss": 0.0432,
1648
  "step": 1145
1649
  },
1650
  {
1651
  "epoch": 4.6,
1652
+ "grad_norm": 0.4768076539039612,
1653
  "learning_rate": 6.25e-05,
1654
+ "loss": 0.0422,
1655
  "step": 1150
1656
  },
1657
  {
1658
  "epoch": 4.62,
1659
+ "grad_norm": 0.7008136510848999,
1660
  "learning_rate": 6.25e-05,
1661
+ "loss": 0.045,
1662
  "step": 1155
1663
  },
1664
  {
1665
  "epoch": 4.64,
1666
+ "grad_norm": 1.1213037967681885,
1667
  "learning_rate": 6.25e-05,
1668
+ "loss": 0.0469,
1669
  "step": 1160
1670
  },
1671
  {
1672
  "epoch": 4.66,
1673
+ "grad_norm": 0.6898444890975952,
1674
  "learning_rate": 6.25e-05,
1675
+ "loss": 0.0398,
1676
  "step": 1165
1677
  },
1678
  {
1679
  "epoch": 4.68,
1680
+ "grad_norm": 0.6885802149772644,
1681
  "learning_rate": 6.25e-05,
1682
+ "loss": 0.0475,
1683
  "step": 1170
1684
  },
1685
  {
1686
  "epoch": 4.7,
1687
+ "grad_norm": 0.644440770149231,
1688
  "learning_rate": 6.25e-05,
1689
+ "loss": 0.0403,
1690
  "step": 1175
1691
  },
1692
  {
1693
  "epoch": 4.72,
1694
+ "grad_norm": 0.6610418558120728,
1695
  "learning_rate": 6.25e-05,
1696
+ "loss": 0.0415,
1697
  "step": 1180
1698
  },
1699
  {
1700
  "epoch": 4.74,
1701
+ "grad_norm": 0.7127951979637146,
1702
  "learning_rate": 6.25e-05,
1703
+ "loss": 0.0466,
1704
  "step": 1185
1705
  },
1706
  {
1707
  "epoch": 4.76,
1708
+ "grad_norm": 0.7608262300491333,
1709
  "learning_rate": 6.25e-05,
1710
+ "loss": 0.0398,
1711
  "step": 1190
1712
  },
1713
  {
1714
  "epoch": 4.78,
1715
+ "grad_norm": 0.6554054021835327,
1716
  "learning_rate": 6.25e-05,
1717
+ "loss": 0.0395,
1718
  "step": 1195
1719
  },
1720
  {
1721
  "epoch": 4.8,
1722
+ "grad_norm": 0.7710177302360535,
1723
  "learning_rate": 6.25e-05,
1724
+ "loss": 0.0412,
1725
  "step": 1200
1726
  },
1727
  {
1728
  "epoch": 4.82,
1729
+ "grad_norm": 0.5044788718223572,
1730
  "learning_rate": 6.25e-05,
1731
+ "loss": 0.0378,
1732
  "step": 1205
1733
  },
1734
  {
1735
  "epoch": 4.84,
1736
+ "grad_norm": 0.4640452265739441,
1737
  "learning_rate": 6.25e-05,
1738
+ "loss": 0.0394,
1739
  "step": 1210
1740
  },
1741
  {
1742
  "epoch": 4.86,
1743
+ "grad_norm": 0.6121119260787964,
1744
  "learning_rate": 6.25e-05,
1745
+ "loss": 0.0373,
1746
  "step": 1215
1747
  },
1748
  {
1749
  "epoch": 4.88,
1750
+ "grad_norm": 0.7307333946228027,
1751
  "learning_rate": 6.25e-05,
1752
+ "loss": 0.0462,
1753
  "step": 1220
1754
  },
1755
  {
1756
  "epoch": 4.9,
1757
+ "grad_norm": 0.841369092464447,
1758
  "learning_rate": 6.25e-05,
1759
+ "loss": 0.0433,
1760
  "step": 1225
1761
  },
1762
  {
1763
  "epoch": 4.92,
1764
+ "grad_norm": 0.48274680972099304,
1765
  "learning_rate": 6.25e-05,
1766
+ "loss": 0.0481,
1767
  "step": 1230
1768
  },
1769
  {
1770
  "epoch": 4.9399999999999995,
1771
+ "grad_norm": 0.6552777290344238,
1772
  "learning_rate": 6.25e-05,
1773
+ "loss": 0.0449,
1774
  "step": 1235
1775
  },
1776
  {
1777
  "epoch": 4.96,
1778
+ "grad_norm": 1.0837739706039429,
1779
  "learning_rate": 6.25e-05,
1780
+ "loss": 0.0465,
1781
  "step": 1240
1782
  },
1783
  {
1784
  "epoch": 4.98,
1785
+ "grad_norm": 0.7444823384284973,
1786
  "learning_rate": 6.25e-05,
1787
+ "loss": 0.0513,
1788
  "step": 1245
1789
  },
1790
  {
1791
  "epoch": 5.0,
1792
+ "grad_norm": 0.561403214931488,
1793
  "learning_rate": 6.25e-05,
1794
+ "loss": 0.0458,
1795
+ "step": 1250
1796
+ },
1797
+ {
1798
+ "epoch": 5.0,
1799
+ "eval_cer": 0.07253669856334576,
1800
+ "eval_loss": 0.06918226927518845,
1801
+ "eval_runtime": 282.8276,
1802
+ "eval_samples_per_second": 1.768,
1803
+ "eval_steps_per_second": 0.442,
1804
  "step": 1250
1805
  },
1806
  {
1807
  "epoch": 5.02,
1808
+ "grad_norm": 0.67482990026474,
1809
  "learning_rate": 6.25e-05,
1810
+ "loss": 0.037,
1811
  "step": 1255
1812
  },
1813
  {
1814
  "epoch": 5.04,
1815
+ "grad_norm": 0.6839190721511841,
1816
  "learning_rate": 6.25e-05,
1817
+ "loss": 0.0445,
1818
  "step": 1260
1819
  },
1820
  {
1821
  "epoch": 5.06,
1822
+ "grad_norm": 0.8001631498336792,
1823
  "learning_rate": 6.25e-05,
1824
+ "loss": 0.0386,
1825
  "step": 1265
1826
  },
1827
  {
1828
  "epoch": 5.08,
1829
+ "grad_norm": 0.8353962898254395,
1830
  "learning_rate": 6.25e-05,
1831
+ "loss": 0.0407,
1832
  "step": 1270
1833
  },
1834
  {
1835
  "epoch": 5.1,
1836
+ "grad_norm": 0.556709885597229,
1837
  "learning_rate": 6.25e-05,
1838
+ "loss": 0.0355,
1839
  "step": 1275
1840
  },
1841
  {
1842
  "epoch": 5.12,
1843
+ "grad_norm": 0.5634174942970276,
1844
  "learning_rate": 6.25e-05,
1845
+ "loss": 0.0322,
1846
  "step": 1280
1847
  },
1848
  {
1849
  "epoch": 5.14,
1850
+ "grad_norm": 0.6530662775039673,
1851
  "learning_rate": 6.25e-05,
1852
+ "loss": 0.041,
1853
  "step": 1285
1854
  },
1855
  {
1856
  "epoch": 5.16,
1857
+ "grad_norm": 0.5771991610527039,
1858
  "learning_rate": 6.25e-05,
1859
+ "loss": 0.0375,
1860
  "step": 1290
1861
  },
1862
  {
1863
  "epoch": 5.18,
1864
+ "grad_norm": 0.5936269164085388,
1865
  "learning_rate": 6.25e-05,
1866
+ "loss": 0.0362,
1867
  "step": 1295
1868
  },
1869
  {
1870
  "epoch": 5.2,
1871
+ "grad_norm": 0.6964532136917114,
1872
  "learning_rate": 6.25e-05,
1873
+ "loss": 0.0361,
1874
  "step": 1300
1875
  },
1876
  {
1877
  "epoch": 5.22,
1878
+ "grad_norm": 1.0432935953140259,
1879
  "learning_rate": 6.25e-05,
1880
+ "loss": 0.0346,
1881
  "step": 1305
1882
  },
1883
  {
1884
  "epoch": 5.24,
1885
+ "grad_norm": 0.6481297016143799,
1886
  "learning_rate": 6.25e-05,
1887
+ "loss": 0.0351,
1888
  "step": 1310
1889
  },
1890
  {
1891
  "epoch": 5.26,
1892
+ "grad_norm": 0.9188110828399658,
1893
  "learning_rate": 6.25e-05,
1894
+ "loss": 0.0378,
1895
  "step": 1315
1896
  },
1897
  {
1898
  "epoch": 5.28,
1899
+ "grad_norm": 0.4248051345348358,
1900
  "learning_rate": 6.25e-05,
1901
+ "loss": 0.0296,
1902
  "step": 1320
1903
  },
1904
  {
1905
  "epoch": 5.3,
1906
+ "grad_norm": 0.5334679484367371,
1907
  "learning_rate": 6.25e-05,
1908
+ "loss": 0.0397,
1909
  "step": 1325
1910
  },
1911
  {
1912
  "epoch": 5.32,
1913
+ "grad_norm": 0.7321200370788574,
1914
  "learning_rate": 6.25e-05,
1915
+ "loss": 0.0414,
1916
  "step": 1330
1917
  },
1918
  {
1919
  "epoch": 5.34,
1920
+ "grad_norm": 0.5322144627571106,
1921
  "learning_rate": 6.25e-05,
1922
+ "loss": 0.0381,
1923
  "step": 1335
1924
  },
1925
  {
1926
  "epoch": 5.36,
1927
+ "grad_norm": 0.8044850826263428,
1928
  "learning_rate": 6.25e-05,
1929
+ "loss": 0.0348,
1930
  "step": 1340
1931
  },
1932
  {
1933
  "epoch": 5.38,
1934
+ "grad_norm": 0.6011214256286621,
1935
  "learning_rate": 6.25e-05,
1936
+ "loss": 0.0379,
1937
  "step": 1345
1938
  },
1939
  {
1940
  "epoch": 5.4,
1941
+ "grad_norm": 0.7421667575836182,
1942
  "learning_rate": 6.25e-05,
1943
+ "loss": 0.0379,
1944
  "step": 1350
1945
  },
1946
  {
1947
  "epoch": 5.42,
1948
+ "grad_norm": 0.4418427348136902,
1949
  "learning_rate": 6.25e-05,
1950
+ "loss": 0.0328,
1951
  "step": 1355
1952
  },
1953
  {
1954
  "epoch": 5.44,
1955
+ "grad_norm": 0.6037031412124634,
1956
  "learning_rate": 6.25e-05,
1957
+ "loss": 0.0351,
1958
  "step": 1360
1959
  },
1960
  {
1961
  "epoch": 5.46,
1962
+ "grad_norm": 0.7416286468505859,
1963
  "learning_rate": 6.25e-05,
1964
+ "loss": 0.0344,
1965
  "step": 1365
1966
  },
1967
  {
1968
  "epoch": 5.48,
1969
+ "grad_norm": 0.9417647123336792,
1970
  "learning_rate": 6.25e-05,
1971
+ "loss": 0.0504,
1972
  "step": 1370
1973
  },
1974
  {
1975
  "epoch": 5.5,
1976
+ "grad_norm": 0.5485287308692932,
1977
  "learning_rate": 6.25e-05,
1978
+ "loss": 0.0383,
1979
  "step": 1375
1980
  },
1981
  {
1982
  "epoch": 5.52,
1983
+ "grad_norm": 0.647965133190155,
1984
  "learning_rate": 6.25e-05,
1985
+ "loss": 0.0388,
1986
  "step": 1380
1987
  },
1988
  {
1989
  "epoch": 5.54,
1990
+ "grad_norm": 0.7375500202178955,
1991
  "learning_rate": 6.25e-05,
1992
+ "loss": 0.0368,
1993
  "step": 1385
1994
  },
1995
  {
1996
  "epoch": 5.5600000000000005,
1997
+ "grad_norm": 0.7219087481498718,
1998
  "learning_rate": 6.25e-05,
1999
+ "loss": 0.0497,
2000
  "step": 1390
2001
  },
2002
  {
2003
  "epoch": 5.58,
2004
+ "grad_norm": 0.49959471821784973,
2005
  "learning_rate": 6.25e-05,
2006
+ "loss": 0.038,
2007
  "step": 1395
2008
  },
2009
  {
2010
  "epoch": 5.6,
2011
+ "grad_norm": 0.5299109816551208,
2012
  "learning_rate": 6.25e-05,
2013
+ "loss": 0.0303,
2014
  "step": 1400
2015
  },
2016
  {
2017
  "epoch": 5.62,
2018
+ "grad_norm": 0.48730289936065674,
2019
  "learning_rate": 6.25e-05,
2020
+ "loss": 0.0337,
2021
  "step": 1405
2022
  },
2023
  {
2024
  "epoch": 5.64,
2025
+ "grad_norm": 0.3811701834201813,
2026
  "learning_rate": 6.25e-05,
2027
+ "loss": 0.0367,
2028
  "step": 1410
2029
  },
2030
  {
2031
  "epoch": 5.66,
2032
+ "grad_norm": 0.4611757695674896,
2033
  "learning_rate": 6.25e-05,
2034
+ "loss": 0.0396,
2035
  "step": 1415
2036
  },
2037
  {
2038
  "epoch": 5.68,
2039
+ "grad_norm": 0.5509118437767029,
2040
  "learning_rate": 6.25e-05,
2041
+ "loss": 0.0397,
2042
  "step": 1420
2043
  },
2044
  {
2045
  "epoch": 5.7,
2046
+ "grad_norm": 0.8130658268928528,
2047
  "learning_rate": 6.25e-05,
2048
+ "loss": 0.0346,
2049
  "step": 1425
2050
  },
2051
  {
2052
  "epoch": 5.72,
2053
+ "grad_norm": 0.4248274266719818,
2054
  "learning_rate": 6.25e-05,
2055
+ "loss": 0.0334,
2056
  "step": 1430
2057
  },
2058
  {
2059
  "epoch": 5.74,
2060
+ "grad_norm": 1.1918326616287231,
2061
  "learning_rate": 6.25e-05,
2062
+ "loss": 0.041,
2063
  "step": 1435
2064
  },
2065
  {
2066
  "epoch": 5.76,
2067
+ "grad_norm": 0.6501240730285645,
2068
  "learning_rate": 6.25e-05,
2069
+ "loss": 0.0423,
2070
  "step": 1440
2071
  },
2072
  {
2073
  "epoch": 5.78,
2074
+ "grad_norm": 1.216350793838501,
2075
  "learning_rate": 6.25e-05,
2076
+ "loss": 0.0366,
2077
  "step": 1445
2078
  },
2079
  {
2080
  "epoch": 5.8,
2081
+ "grad_norm": 0.48442235589027405,
2082
  "learning_rate": 6.25e-05,
2083
+ "loss": 0.0335,
2084
  "step": 1450
2085
  },
2086
  {
2087
  "epoch": 5.82,
2088
+ "grad_norm": 0.5834723711013794,
2089
  "learning_rate": 6.25e-05,
2090
  "loss": 0.0414,
2091
  "step": 1455
2092
  },
2093
  {
2094
  "epoch": 5.84,
2095
+ "grad_norm": 0.7862647771835327,
2096
  "learning_rate": 6.25e-05,
2097
+ "loss": 0.0438,
2098
  "step": 1460
2099
  },
2100
  {
2101
  "epoch": 5.86,
2102
+ "grad_norm": 0.8282245397567749,
2103
  "learning_rate": 6.25e-05,
2104
+ "loss": 0.0384,
2105
  "step": 1465
2106
  },
2107
  {
2108
  "epoch": 5.88,
2109
+ "grad_norm": 0.8185272812843323,
2110
  "learning_rate": 6.25e-05,
2111
+ "loss": 0.0386,
2112
  "step": 1470
2113
  },
2114
  {
2115
  "epoch": 5.9,
2116
+ "grad_norm": 0.6197579503059387,
2117
  "learning_rate": 6.25e-05,
2118
+ "loss": 0.036,
2119
  "step": 1475
2120
  },
2121
  {
2122
  "epoch": 5.92,
2123
+ "grad_norm": 0.5256204009056091,
2124
  "learning_rate": 6.25e-05,
2125
+ "loss": 0.0331,
2126
  "step": 1480
2127
  },
2128
  {
2129
  "epoch": 5.9399999999999995,
2130
+ "grad_norm": 0.5693526864051819,
2131
  "learning_rate": 6.25e-05,
2132
+ "loss": 0.0404,
2133
  "step": 1485
2134
  },
2135
  {
2136
  "epoch": 5.96,
2137
+ "grad_norm": 0.505524754524231,
2138
  "learning_rate": 6.25e-05,
2139
+ "loss": 0.0345,
2140
  "step": 1490
2141
  },
2142
  {
2143
  "epoch": 5.98,
2144
+ "grad_norm": 0.7480014562606812,
2145
  "learning_rate": 6.25e-05,
2146
+ "loss": 0.0421,
2147
  "step": 1495
2148
  },
2149
  {
2150
  "epoch": 6.0,
2151
+ "grad_norm": 0.6769825220108032,
2152
  "learning_rate": 6.25e-05,
2153
+ "loss": 0.0364,
2154
  "step": 1500
2155
  },
2156
  {
2157
  "epoch": 6.0,
2158
+ "eval_cer": 0.04693244335514823,
2159
+ "eval_loss": 0.07296038419008255,
2160
+ "eval_runtime": 281.2143,
2161
+ "eval_samples_per_second": 1.778,
2162
+ "eval_steps_per_second": 0.445,
2163
  "step": 1500
2164
  },
2165
  {
2166
  "epoch": 6.02,
2167
+ "grad_norm": 0.4684133231639862,
2168
  "learning_rate": 6.25e-05,
2169
+ "loss": 0.0311,
2170
  "step": 1505
2171
  },
2172
  {
2173
  "epoch": 6.04,
2174
+ "grad_norm": 0.43542611598968506,
2175
  "learning_rate": 6.25e-05,
2176
+ "loss": 0.0276,
2177
  "step": 1510
2178
  },
2179
  {
2180
  "epoch": 6.06,
2181
+ "grad_norm": 0.5249391198158264,
2182
  "learning_rate": 6.25e-05,
2183
  "loss": 0.0314,
2184
  "step": 1515
2185
  },
2186
  {
2187
  "epoch": 6.08,
2188
+ "grad_norm": 0.7982779741287231,
2189
  "learning_rate": 6.25e-05,
2190
+ "loss": 0.03,
2191
  "step": 1520
2192
  },
2193
  {
2194
  "epoch": 6.1,
2195
+ "grad_norm": 0.5430174469947815,
2196
  "learning_rate": 6.25e-05,
2197
+ "loss": 0.0289,
2198
  "step": 1525
2199
  },
2200
  {
2201
  "epoch": 6.12,
2202
+ "grad_norm": 0.6563279628753662,
2203
  "learning_rate": 6.25e-05,
2204
+ "loss": 0.0299,
2205
  "step": 1530
2206
  },
2207
  {
2208
  "epoch": 6.14,
2209
+ "grad_norm": 0.45975133776664734,
2210
  "learning_rate": 6.25e-05,
2211
+ "loss": 0.026,
2212
  "step": 1535
2213
  },
2214
  {
2215
  "epoch": 6.16,
2216
+ "grad_norm": 0.6540797352790833,
2217
  "learning_rate": 6.25e-05,
2218
+ "loss": 0.0313,
2219
  "step": 1540
2220
  },
2221
  {
2222
  "epoch": 6.18,
2223
+ "grad_norm": 0.6721683740615845,
2224
  "learning_rate": 6.25e-05,
2225
+ "loss": 0.0335,
2226
  "step": 1545
2227
  },
2228
  {
2229
  "epoch": 6.2,
2230
+ "grad_norm": 0.5357054471969604,
2231
  "learning_rate": 6.25e-05,
2232
+ "loss": 0.0357,
2233
  "step": 1550
2234
  },
2235
  {
2236
  "epoch": 6.22,
2237
+ "grad_norm": 1.2677907943725586,
2238
  "learning_rate": 6.25e-05,
2239
+ "loss": 0.0362,
2240
  "step": 1555
2241
  },
2242
  {
2243
  "epoch": 6.24,
2244
+ "grad_norm": 0.8369943499565125,
2245
  "learning_rate": 6.25e-05,
2246
+ "loss": 0.0331,
2247
  "step": 1560
2248
  },
2249
  {
2250
  "epoch": 6.26,
2251
+ "grad_norm": 0.8115782141685486,
2252
  "learning_rate": 6.25e-05,
2253
+ "loss": 0.0381,
2254
  "step": 1565
2255
  },
2256
  {
2257
  "epoch": 6.28,
2258
+ "grad_norm": 0.598883330821991,
2259
  "learning_rate": 6.25e-05,
2260
+ "loss": 0.0343,
2261
  "step": 1570
2262
  },
2263
  {
2264
  "epoch": 6.3,
2265
+ "grad_norm": 0.9512626528739929,
2266
  "learning_rate": 6.25e-05,
2267
+ "loss": 0.0362,
2268
  "step": 1575
2269
  },
2270
  {
2271
  "epoch": 6.32,
2272
+ "grad_norm": 0.8760331273078918,
2273
  "learning_rate": 6.25e-05,
2274
+ "loss": 0.0357,
2275
  "step": 1580
2276
  },
2277
  {
2278
  "epoch": 6.34,
2279
+ "grad_norm": 0.502618134021759,
2280
  "learning_rate": 6.25e-05,
2281
+ "loss": 0.0273,
2282
  "step": 1585
2283
  },
2284
  {
2285
  "epoch": 6.36,
2286
+ "grad_norm": 0.5483182072639465,
2287
  "learning_rate": 6.25e-05,
2288
+ "loss": 0.0353,
2289
  "step": 1590
2290
  },
2291
  {
2292
  "epoch": 6.38,
2293
+ "grad_norm": 0.7582818865776062,
2294
  "learning_rate": 6.25e-05,
2295
+ "loss": 0.0325,
2296
  "step": 1595
2297
  },
2298
  {
2299
  "epoch": 6.4,
2300
+ "grad_norm": 0.7242081761360168,
2301
  "learning_rate": 6.25e-05,
2302
+ "loss": 0.0294,
2303
  "step": 1600
2304
  },
2305
  {
2306
  "epoch": 6.42,
2307
+ "grad_norm": 0.6686793565750122,
2308
  "learning_rate": 6.25e-05,
2309
+ "loss": 0.0376,
2310
  "step": 1605
2311
  },
2312
  {
2313
  "epoch": 6.44,
2314
+ "grad_norm": 0.6351500153541565,
2315
  "learning_rate": 6.25e-05,
2316
+ "loss": 0.0322,
2317
  "step": 1610
2318
  },
2319
  {
2320
  "epoch": 6.46,
2321
+ "grad_norm": 0.7319616675376892,
2322
  "learning_rate": 6.25e-05,
2323
+ "loss": 0.0306,
2324
  "step": 1615
2325
  },
2326
  {
2327
  "epoch": 6.48,
2328
+ "grad_norm": 0.6641121506690979,
2329
  "learning_rate": 6.25e-05,
2330
+ "loss": 0.035,
2331
  "step": 1620
2332
  },
2333
  {
2334
  "epoch": 6.5,
2335
+ "grad_norm": 0.6666487455368042,
2336
  "learning_rate": 6.25e-05,
2337
+ "loss": 0.0304,
2338
  "step": 1625
2339
  },
2340
  {
2341
  "epoch": 6.52,
2342
+ "grad_norm": 0.540726363658905,
2343
  "learning_rate": 6.25e-05,
2344
+ "loss": 0.036,
2345
  "step": 1630
2346
  },
2347
  {
2348
  "epoch": 6.54,
2349
+ "grad_norm": 0.5046465992927551,
2350
  "learning_rate": 6.25e-05,
2351
+ "loss": 0.0384,
2352
  "step": 1635
2353
  },
2354
  {
2355
  "epoch": 6.5600000000000005,
2356
+ "grad_norm": 0.5858854651451111,
2357
  "learning_rate": 6.25e-05,
2358
+ "loss": 0.0349,
2359
  "step": 1640
2360
  },
2361
  {
2362
  "epoch": 6.58,
2363
+ "grad_norm": 0.6154960989952087,
2364
  "learning_rate": 6.25e-05,
2365
+ "loss": 0.0381,
2366
  "step": 1645
2367
  },
2368
  {
2369
  "epoch": 6.6,
2370
+ "grad_norm": 0.9321079254150391,
2371
  "learning_rate": 6.25e-05,
2372
+ "loss": 0.0298,
2373
  "step": 1650
2374
  },
2375
  {
2376
  "epoch": 6.62,
2377
+ "grad_norm": 0.4276799261569977,
2378
  "learning_rate": 6.25e-05,
2379
+ "loss": 0.0295,
2380
  "step": 1655
2381
  },
2382
  {
2383
  "epoch": 6.64,
2384
+ "grad_norm": 0.545616090297699,
2385
  "learning_rate": 6.25e-05,
2386
+ "loss": 0.0279,
2387
  "step": 1660
2388
  },
2389
  {
2390
  "epoch": 6.66,
2391
+ "grad_norm": 0.5112252235412598,
2392
  "learning_rate": 6.25e-05,
2393
+ "loss": 0.0371,
2394
  "step": 1665
2395
  },
2396
  {
2397
  "epoch": 6.68,
2398
+ "grad_norm": 0.6641426086425781,
2399
  "learning_rate": 6.25e-05,
2400
+ "loss": 0.0288,
2401
  "step": 1670
2402
  },
2403
  {
2404
  "epoch": 6.7,
2405
+ "grad_norm": 0.4481450021266937,
2406
  "learning_rate": 6.25e-05,
2407
+ "loss": 0.0342,
2408
  "step": 1675
2409
  },
2410
  {
2411
  "epoch": 6.72,
2412
+ "grad_norm": 0.6158471703529358,
2413
  "learning_rate": 6.25e-05,
2414
+ "loss": 0.0378,
2415
  "step": 1680
2416
  },
2417
  {
2418
  "epoch": 6.74,
2419
+ "grad_norm": 0.47170689702033997,
2420
  "learning_rate": 6.25e-05,
2421
+ "loss": 0.0314,
2422
  "step": 1685
2423
  },
2424
  {
2425
  "epoch": 6.76,
2426
+ "grad_norm": 0.33950161933898926,
2427
  "learning_rate": 6.25e-05,
2428
+ "loss": 0.0301,
2429
  "step": 1690
2430
  },
2431
  {
2432
  "epoch": 6.78,
2433
+ "grad_norm": 0.572180449962616,
2434
  "learning_rate": 6.25e-05,
2435
+ "loss": 0.0336,
2436
  "step": 1695
2437
  },
2438
  {
2439
  "epoch": 6.8,
2440
+ "grad_norm": 0.7031643986701965,
2441
  "learning_rate": 6.25e-05,
2442
+ "loss": 0.0329,
2443
  "step": 1700
2444
  },
2445
  {
2446
  "epoch": 6.82,
2447
+ "grad_norm": 0.9186747074127197,
2448
  "learning_rate": 6.25e-05,
2449
+ "loss": 0.0332,
2450
  "step": 1705
2451
  },
2452
  {
2453
  "epoch": 6.84,
2454
+ "grad_norm": 0.6245182156562805,
2455
  "learning_rate": 6.25e-05,
2456
+ "loss": 0.0279,
2457
  "step": 1710
2458
  },
2459
  {
2460
  "epoch": 6.86,
2461
+ "grad_norm": 0.6732586026191711,
2462
  "learning_rate": 6.25e-05,
2463
+ "loss": 0.0373,
2464
  "step": 1715
2465
  },
2466
  {
2467
  "epoch": 6.88,
2468
+ "grad_norm": 0.4248816967010498,
2469
  "learning_rate": 6.25e-05,
2470
+ "loss": 0.029,
2471
  "step": 1720
2472
  },
2473
  {
2474
  "epoch": 6.9,
2475
+ "grad_norm": 0.6019976735115051,
2476
  "learning_rate": 6.25e-05,
2477
+ "loss": 0.0361,
2478
  "step": 1725
2479
  },
2480
  {
2481
  "epoch": 6.92,
2482
+ "grad_norm": 0.7918051481246948,
2483
  "learning_rate": 6.25e-05,
2484
  "loss": 0.0374,
2485
  "step": 1730
2486
  },
2487
  {
2488
  "epoch": 6.9399999999999995,
2489
+ "grad_norm": 0.6711762547492981,
2490
  "learning_rate": 6.25e-05,
2491
+ "loss": 0.0364,
2492
  "step": 1735
2493
  },
2494
  {
2495
  "epoch": 6.96,
2496
+ "grad_norm": 0.41297319531440735,
2497
  "learning_rate": 6.25e-05,
2498
+ "loss": 0.0309,
2499
  "step": 1740
2500
  },
2501
  {
2502
  "epoch": 6.98,
2503
+ "grad_norm": 0.5152994990348816,
2504
  "learning_rate": 6.25e-05,
2505
+ "loss": 0.0405,
2506
  "step": 1745
2507
  },
2508
  {
2509
  "epoch": 7.0,
2510
+ "grad_norm": 0.6014075875282288,
2511
  "learning_rate": 6.25e-05,
2512
+ "loss": 0.0331,
2513
+ "step": 1750
2514
+ },
2515
+ {
2516
+ "epoch": 7.0,
2517
+ "eval_cer": 0.06257659114019763,
2518
+ "eval_loss": 0.07013064622879028,
2519
+ "eval_runtime": 282.3729,
2520
+ "eval_samples_per_second": 1.771,
2521
+ "eval_steps_per_second": 0.443,
2522
  "step": 1750
2523
  },
2524
  {
2525
  "epoch": 7.02,
2526
+ "grad_norm": 0.5180896520614624,
2527
  "learning_rate": 6.25e-05,
2528
+ "loss": 0.0261,
2529
  "step": 1755
2530
  },
2531
  {
2532
  "epoch": 7.04,
2533
+ "grad_norm": 0.45584699511528015,
2534
  "learning_rate": 6.25e-05,
2535
+ "loss": 0.0272,
2536
  "step": 1760
2537
  },
2538
  {
2539
  "epoch": 7.06,
2540
+ "grad_norm": 0.5258373618125916,
2541
  "learning_rate": 6.25e-05,
2542
+ "loss": 0.0346,
2543
  "step": 1765
2544
  },
2545
  {
2546
  "epoch": 7.08,
2547
+ "grad_norm": 0.7696036696434021,
2548
  "learning_rate": 6.25e-05,
2549
+ "loss": 0.0264,
2550
  "step": 1770
2551
  },
2552
  {
2553
  "epoch": 7.1,
2554
+ "grad_norm": 0.3956270217895508,
2555
  "learning_rate": 6.25e-05,
2556
+ "loss": 0.0268,
2557
  "step": 1775
2558
  },
2559
  {
2560
  "epoch": 7.12,
2561
+ "grad_norm": 0.5123682618141174,
2562
  "learning_rate": 6.25e-05,
2563
+ "loss": 0.0265,
2564
  "step": 1780
2565
  },
2566
  {
2567
  "epoch": 7.14,
2568
+ "grad_norm": 0.7040964961051941,
2569
  "learning_rate": 6.25e-05,
2570
+ "loss": 0.0256,
2571
  "step": 1785
2572
  },
2573
  {
2574
  "epoch": 7.16,
2575
+ "grad_norm": 0.6306995749473572,
2576
  "learning_rate": 6.25e-05,
2577
+ "loss": 0.0252,
2578
  "step": 1790
2579
  },
2580
  {
2581
  "epoch": 7.18,
2582
+ "grad_norm": 0.6259332299232483,
2583
  "learning_rate": 6.25e-05,
2584
+ "loss": 0.0351,
2585
  "step": 1795
2586
  },
2587
  {
2588
  "epoch": 7.2,
2589
+ "grad_norm": 0.5234382152557373,
2590
  "learning_rate": 6.25e-05,
2591
+ "loss": 0.0291,
2592
  "step": 1800
2593
  },
2594
  {
2595
  "epoch": 7.22,
2596
+ "grad_norm": 0.6372880339622498,
2597
  "learning_rate": 6.25e-05,
2598
+ "loss": 0.0286,
2599
  "step": 1805
2600
  },
2601
  {
2602
  "epoch": 7.24,
2603
+ "grad_norm": 0.47996407747268677,
2604
  "learning_rate": 6.25e-05,
2605
  "loss": 0.0329,
2606
  "step": 1810
2607
  },
2608
  {
2609
  "epoch": 7.26,
2610
+ "grad_norm": 1.0856232643127441,
2611
  "learning_rate": 6.25e-05,
2612
+ "loss": 0.0289,
2613
  "step": 1815
2614
  },
2615
  {
2616
  "epoch": 7.28,
2617
+ "grad_norm": 0.41406768560409546,
2618
  "learning_rate": 6.25e-05,
2619
+ "loss": 0.0279,
2620
  "step": 1820
2621
  },
2622
  {
2623
  "epoch": 7.3,
2624
+ "grad_norm": 0.39531177282333374,
2625
  "learning_rate": 6.25e-05,
2626
+ "loss": 0.03,
2627
  "step": 1825
2628
  },
2629
  {
2630
  "epoch": 7.32,
2631
+ "grad_norm": 0.38410675525665283,
2632
  "learning_rate": 6.25e-05,
2633
+ "loss": 0.0289,
2634
  "step": 1830
2635
  },
2636
  {
2637
  "epoch": 7.34,
2638
+ "grad_norm": 0.6105230450630188,
2639
  "learning_rate": 6.25e-05,
2640
  "loss": 0.0311,
2641
  "step": 1835
2642
  },
2643
  {
2644
  "epoch": 7.36,
2645
+ "grad_norm": 0.5529378652572632,
2646
  "learning_rate": 6.25e-05,
2647
+ "loss": 0.026,
2648
  "step": 1840
2649
  },
2650
  {
2651
  "epoch": 7.38,
2652
+ "grad_norm": 0.5076948404312134,
2653
  "learning_rate": 6.25e-05,
2654
+ "loss": 0.0291,
2655
  "step": 1845
2656
  },
2657
  {
2658
  "epoch": 7.4,
2659
+ "grad_norm": 0.5312634110450745,
2660
  "learning_rate": 6.25e-05,
2661
+ "loss": 0.022,
2662
  "step": 1850
2663
  },
2664
  {
2665
  "epoch": 7.42,
2666
+ "grad_norm": 0.705721378326416,
2667
  "learning_rate": 6.25e-05,
2668
+ "loss": 0.0282,
2669
  "step": 1855
2670
  },
2671
  {
2672
  "epoch": 7.44,
2673
+ "grad_norm": 0.5436967015266418,
2674
  "learning_rate": 6.25e-05,
2675
+ "loss": 0.029,
2676
  "step": 1860
2677
  },
2678
  {
2679
  "epoch": 7.46,
2680
+ "grad_norm": 0.5614831447601318,
2681
  "learning_rate": 6.25e-05,
2682
+ "loss": 0.0295,
2683
  "step": 1865
2684
  },
2685
  {
2686
  "epoch": 7.48,
2687
+ "grad_norm": 0.5950233340263367,
2688
  "learning_rate": 6.25e-05,
2689
+ "loss": 0.0291,
2690
  "step": 1870
2691
  },
2692
  {
2693
  "epoch": 7.5,
2694
+ "grad_norm": 0.6754202246665955,
2695
  "learning_rate": 6.25e-05,
2696
  "loss": 0.027,
2697
  "step": 1875
2698
  },
2699
  {
2700
  "epoch": 7.52,
2701
+ "grad_norm": 0.666309654712677,
2702
  "learning_rate": 6.25e-05,
2703
+ "loss": 0.0312,
2704
  "step": 1880
2705
  },
2706
  {
2707
  "epoch": 7.54,
2708
+ "grad_norm": 0.47739177942276,
2709
  "learning_rate": 6.25e-05,
2710
+ "loss": 0.0313,
2711
  "step": 1885
2712
  },
2713
  {
2714
  "epoch": 7.5600000000000005,
2715
+ "grad_norm": 0.7252377867698669,
2716
  "learning_rate": 6.25e-05,
2717
+ "loss": 0.0293,
2718
  "step": 1890
2719
  },
2720
  {
2721
  "epoch": 7.58,
2722
+ "grad_norm": 0.6819180846214294,
2723
  "learning_rate": 6.25e-05,
2724
+ "loss": 0.0256,
2725
  "step": 1895
2726
  },
2727
  {
2728
  "epoch": 7.6,
2729
+ "grad_norm": 0.7461476922035217,
2730
  "learning_rate": 6.25e-05,
2731
+ "loss": 0.0307,
2732
  "step": 1900
2733
  },
2734
  {
2735
  "epoch": 7.62,
2736
+ "grad_norm": 0.5476313829421997,
2737
  "learning_rate": 6.25e-05,
2738
+ "loss": 0.0286,
2739
  "step": 1905
2740
  },
2741
  {
2742
  "epoch": 7.64,
2743
+ "grad_norm": 0.5961133241653442,
2744
  "learning_rate": 6.25e-05,
2745
+ "loss": 0.0275,
2746
  "step": 1910
2747
  },
2748
  {
2749
  "epoch": 7.66,
2750
+ "grad_norm": 0.39747199416160583,
2751
  "learning_rate": 6.25e-05,
2752
  "loss": 0.0288,
2753
  "step": 1915
2754
  },
2755
  {
2756
  "epoch": 7.68,
2757
+ "grad_norm": 0.6783774495124817,
2758
  "learning_rate": 6.25e-05,
2759
+ "loss": 0.0287,
2760
  "step": 1920
2761
  },
2762
  {
2763
  "epoch": 7.7,
2764
+ "grad_norm": 0.532483696937561,
2765
  "learning_rate": 6.25e-05,
2766
+ "loss": 0.026,
2767
  "step": 1925
2768
  },
2769
  {
2770
  "epoch": 7.72,
2771
+ "grad_norm": 0.6823123097419739,
2772
  "learning_rate": 6.25e-05,
2773
+ "loss": 0.0324,
2774
  "step": 1930
2775
  },
2776
  {
2777
  "epoch": 7.74,
2778
+ "grad_norm": 0.5105339288711548,
2779
  "learning_rate": 6.25e-05,
2780
+ "loss": 0.0311,
2781
  "step": 1935
2782
  },
2783
  {
2784
  "epoch": 7.76,
2785
+ "grad_norm": 0.5813966989517212,
2786
  "learning_rate": 6.25e-05,
2787
+ "loss": 0.0305,
2788
  "step": 1940
2789
  },
2790
  {
2791
  "epoch": 7.78,
2792
+ "grad_norm": 0.7668830156326294,
2793
  "learning_rate": 6.25e-05,
2794
+ "loss": 0.0252,
2795
  "step": 1945
2796
  },
2797
  {
2798
  "epoch": 7.8,
2799
+ "grad_norm": 0.6099779605865479,
2800
  "learning_rate": 6.25e-05,
2801
+ "loss": 0.0305,
2802
  "step": 1950
2803
  },
2804
  {
2805
  "epoch": 7.82,
2806
+ "grad_norm": 0.6207693219184875,
2807
  "learning_rate": 6.25e-05,
2808
+ "loss": 0.0248,
2809
  "step": 1955
2810
  },
2811
  {
2812
  "epoch": 7.84,
2813
+ "grad_norm": 0.5245756506919861,
2814
  "learning_rate": 6.25e-05,
2815
+ "loss": 0.0286,
2816
  "step": 1960
2817
  },
2818
  {
2819
  "epoch": 7.86,
2820
+ "grad_norm": 0.6205756068229675,
2821
  "learning_rate": 6.25e-05,
2822
+ "loss": 0.0286,
2823
  "step": 1965
2824
  },
2825
  {
2826
  "epoch": 7.88,
2827
+ "grad_norm": 0.5376330018043518,
2828
  "learning_rate": 6.25e-05,
2829
+ "loss": 0.0314,
2830
  "step": 1970
2831
  },
2832
  {
2833
  "epoch": 7.9,
2834
+ "grad_norm": 0.3462754786014557,
2835
  "learning_rate": 6.25e-05,
2836
+ "loss": 0.0277,
2837
  "step": 1975
2838
  },
2839
  {
2840
  "epoch": 7.92,
2841
+ "grad_norm": 0.6299518346786499,
2842
  "learning_rate": 6.25e-05,
2843
+ "loss": 0.029,
2844
  "step": 1980
2845
  },
2846
  {
2847
  "epoch": 7.9399999999999995,
2848
+ "grad_norm": 0.5435101985931396,
2849
  "learning_rate": 6.25e-05,
2850
+ "loss": 0.0288,
2851
  "step": 1985
2852
  },
2853
  {
2854
  "epoch": 7.96,
2855
+ "grad_norm": 0.9162132740020752,
2856
  "learning_rate": 6.25e-05,
2857
+ "loss": 0.0328,
2858
  "step": 1990
2859
  },
2860
  {
2861
  "epoch": 7.98,
2862
+ "grad_norm": 0.49135687947273254,
2863
  "learning_rate": 6.25e-05,
2864
+ "loss": 0.0319,
2865
  "step": 1995
2866
  },
2867
  {
2868
  "epoch": 8.0,
2869
+ "grad_norm": 0.6287830471992493,
2870
  "learning_rate": 6.25e-05,
2871
+ "loss": 0.0281,
2872
  "step": 2000
2873
  },
2874
  {
2875
  "epoch": 8.0,
2876
+ "eval_cer": 0.08247073240685214,
2877
+ "eval_loss": 0.07459326088428497,
2878
+ "eval_runtime": 283.7181,
2879
+ "eval_samples_per_second": 1.762,
2880
+ "eval_steps_per_second": 0.441,
2881
  "step": 2000
2882
  }
2883
  ],
 
2885
  "max_steps": 5000,
2886
  "num_input_tokens_seen": 0,
2887
  "num_train_epochs": 20,
2888
+ "save_steps": 250,
2889
  "stateful_callbacks": {
2890
  "TrainerControl": {
2891
  "args": {
checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:380ff1a9921ff96ab779d6709926f10f78099a5595ab698c3919c0c657657de1
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105d055d6d84eb987fbbb4fc9493aa207f4712b04ab60a83adb7510815397317
3
  size 5432
runs/Feb11_15-08-30_5d7eb40c28a7/events.out.tfevents.1739286511.5d7eb40c28a7.5353.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dd98be88afd8022e07b1f0f1e6488633638231dc4ce76481a9c4f02bb9c5800
3
- size 99440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db603ef6a46d5820da12718b2490e8dd31b8b672af532650908ac47132ddc7e0
3
+ size 105137
runs/Feb11_17-49-47_5d7eb40c28a7/events.out.tfevents.1739296188.5d7eb40c28a7.6631.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86969232f02d13c985cb25513266d2ba408af96db5bfdbcfb0de4480f9ed50fe
3
+ size 6264
runs/Feb11_17-52-20_5d7eb40c28a7/events.out.tfevents.1739296341.5d7eb40c28a7.6851.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ce9eeb007bf6fb9437a767a9bf9def0a27a085d34bae65504fe7ae6d0c7ba9
3
+ size 98602