geninhu commited on
Commit
aa1dbb5
1 Parent(s): d2f4d5b

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.13,
3
- "eval_cer": 8.984656437625084,
4
- "eval_loss": 0.6150030493736267,
5
- "eval_runtime": 902.9168,
6
- "eval_samples_per_second": 1.37,
7
- "eval_steps_per_second": 0.086,
8
- "eval_wer": 20.49791620969511,
9
- "train_loss": 0.12916588606834412,
10
- "train_runtime": 40862.8523,
11
- "train_samples_per_second": 3.916,
12
- "train_steps_per_second": 0.122
13
  }
 
1
  {
2
+ "epoch": 6.13,
3
+ "eval_cer": 8.437625083388927,
4
+ "eval_loss": 0.7791085243225098,
5
+ "eval_runtime": 942.0284,
6
+ "eval_samples_per_second": 1.313,
7
+ "eval_steps_per_second": 0.041,
8
+ "eval_wer": 19.368282518096073,
9
+ "train_loss": 0.04111601705942303,
10
+ "train_runtime": 74950.7687,
11
+ "train_samples_per_second": 4.269,
12
+ "train_steps_per_second": 0.067
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.13,
3
- "eval_cer": 8.984656437625084,
4
- "eval_loss": 0.6150030493736267,
5
- "eval_runtime": 902.9168,
6
- "eval_samples_per_second": 1.37,
7
- "eval_steps_per_second": 0.086,
8
- "eval_wer": 20.49791620969511
9
  }
 
1
  {
2
+ "epoch": 6.13,
3
+ "eval_cer": 8.437625083388927,
4
+ "eval_loss": 0.7791085243225098,
5
+ "eval_runtime": 942.0284,
6
+ "eval_samples_per_second": 1.313,
7
+ "eval_steps_per_second": 0.041,
8
+ "eval_wer": 19.368282518096073
9
  }
runs/Dec16_14-32-26_129-213-131-48/events.out.tfevents.1671277192.129-213-131-48.2969237.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3361a4cd3c65c9449f5485ebad36b45e484598b6b2148b14d0a0dfdd6843023e
3
+ size 405
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 3.13,
3
- "train_loss": 0.12916588606834412,
4
- "train_runtime": 40862.8523,
5
- "train_samples_per_second": 3.916,
6
- "train_steps_per_second": 0.122
7
  }
 
1
  {
2
+ "epoch": 6.13,
3
+ "train_loss": 0.04111601705942303,
4
+ "train_runtime": 74950.7687,
5
+ "train_samples_per_second": 4.269,
6
+ "train_steps_per_second": 0.067
7
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 20.49791620969511,
3
- "best_model_checkpoint": "./checkpoint-5000",
4
- "epoch": 3.1252,
5
  "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
@@ -9,1267 +9,1267 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.01,
12
- "learning_rate": 4.5999999999999995e-08,
13
- "loss": 1.42,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
- "learning_rate": 9.6e-08,
19
- "loss": 1.5013,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
- "learning_rate": 1.4599999999999998e-07,
25
- "loss": 1.4979,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.02,
30
- "learning_rate": 1.96e-07,
31
- "loss": 1.3085,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.03,
36
- "learning_rate": 2.46e-07,
37
- "loss": 1.0474,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.03,
42
- "learning_rate": 2.9599999999999995e-07,
43
- "loss": 0.9708,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.04,
48
- "learning_rate": 3.4599999999999995e-07,
49
- "loss": 0.8137,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.04,
54
- "learning_rate": 3.96e-07,
55
- "loss": 0.6283,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.04,
60
- "learning_rate": 4.46e-07,
61
- "loss": 0.544,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.05,
66
- "learning_rate": 4.96e-07,
67
- "loss": 0.492,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 0.06,
72
- "learning_rate": 5.46e-07,
73
- "loss": 0.4598,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.06,
78
- "learning_rate": 5.96e-07,
79
- "loss": 0.3992,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 0.07,
84
- "learning_rate": 6.46e-07,
85
- "loss": 0.3701,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.07,
90
- "learning_rate": 6.959999999999999e-07,
91
- "loss": 0.3612,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.07,
96
- "learning_rate": 7.459999999999999e-07,
97
- "loss": 0.3257,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.08,
102
- "learning_rate": 7.96e-07,
103
- "loss": 0.2667,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 0.09,
108
- "learning_rate": 8.459999999999999e-07,
109
- "loss": 0.2553,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.09,
114
- "learning_rate": 8.96e-07,
115
- "loss": 0.2584,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 0.1,
120
- "learning_rate": 9.459999999999999e-07,
121
- "loss": 0.2449,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 0.1,
126
- "learning_rate": 9.959999999999999e-07,
127
- "loss": 0.2331,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 0.1,
132
- "learning_rate": 9.948888888888888e-07,
133
- "loss": 0.2259,
134
  "step": 525
135
  },
136
  {
137
  "epoch": 0.11,
138
- "learning_rate": 9.893333333333332e-07,
139
- "loss": 0.2184,
140
  "step": 550
141
  },
142
  {
143
  "epoch": 0.12,
144
- "learning_rate": 9.837777777777778e-07,
145
- "loss": 0.1952,
146
  "step": 575
147
  },
148
  {
149
  "epoch": 0.12,
150
- "learning_rate": 9.782222222222222e-07,
151
- "loss": 0.2016,
152
  "step": 600
153
  },
154
  {
155
  "epoch": 0.12,
156
- "learning_rate": 9.726666666666666e-07,
157
- "loss": 0.2159,
158
  "step": 625
159
  },
160
  {
161
  "epoch": 0.13,
162
- "learning_rate": 9.67111111111111e-07,
163
- "loss": 0.2069,
164
  "step": 650
165
  },
166
  {
167
  "epoch": 0.14,
168
- "learning_rate": 9.615555555555556e-07,
169
- "loss": 0.1451,
170
  "step": 675
171
  },
172
  {
173
  "epoch": 0.14,
174
- "learning_rate": 9.559999999999998e-07,
175
- "loss": 0.14,
176
  "step": 700
177
  },
178
  {
179
  "epoch": 0.14,
180
- "learning_rate": 9.504444444444443e-07,
181
- "loss": 0.1481,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 0.15,
186
- "learning_rate": 9.448888888888888e-07,
187
- "loss": 0.16,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.15,
192
- "learning_rate": 9.393333333333334e-07,
193
- "loss": 0.1278,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 0.16,
198
- "learning_rate": 9.337777777777778e-07,
199
- "loss": 0.1233,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 0.17,
204
- "learning_rate": 9.282222222222222e-07,
205
- "loss": 0.1304,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 0.17,
210
- "learning_rate": 9.226666666666666e-07,
211
- "loss": 0.1241,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 0.17,
216
- "learning_rate": 9.171111111111111e-07,
217
- "loss": 0.1275,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 0.18,
222
- "learning_rate": 9.115555555555555e-07,
223
- "loss": 0.1449,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 0.18,
228
- "learning_rate": 9.06e-07,
229
- "loss": 0.1295,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 0.19,
234
- "learning_rate": 9.004444444444445e-07,
235
- "loss": 0.1186,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 0.2,
240
- "learning_rate": 8.948888888888888e-07,
241
- "loss": 0.1009,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 0.2,
246
- "learning_rate": 8.893333333333333e-07,
247
- "loss": 0.1041,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 0.2,
252
- "eval_cer": 9.71314209472982,
253
- "eval_loss": 0.5132914185523987,
254
- "eval_runtime": 890.2064,
255
- "eval_samples_per_second": 1.39,
256
- "eval_steps_per_second": 0.088,
257
- "eval_wer": 22.614608466769027,
258
  "step": 1000
259
  },
260
  {
261
- "epoch": 0.2,
262
- "learning_rate": 8.837777777777777e-07,
263
- "loss": 0.1115,
264
  "step": 1025
265
  },
266
  {
267
- "epoch": 0.21,
268
- "learning_rate": 8.782222222222222e-07,
269
- "loss": 0.122,
270
  "step": 1050
271
  },
272
  {
273
- "epoch": 0.21,
274
- "learning_rate": 8.726666666666666e-07,
275
- "loss": 0.1163,
276
  "step": 1075
277
  },
278
  {
279
- "epoch": 0.22,
280
- "learning_rate": 8.671111111111111e-07,
281
- "loss": 0.1208,
282
  "step": 1100
283
  },
284
  {
285
- "epoch": 0.23,
286
- "learning_rate": 8.615555555555555e-07,
287
- "loss": 0.112,
288
  "step": 1125
289
  },
290
  {
291
- "epoch": 0.23,
292
- "learning_rate": 8.559999999999999e-07,
293
- "loss": 0.099,
294
  "step": 1150
295
  },
296
  {
297
- "epoch": 0.23,
298
- "learning_rate": 8.504444444444444e-07,
299
- "loss": 0.0813,
300
  "step": 1175
301
  },
302
  {
303
- "epoch": 0.24,
304
- "learning_rate": 8.448888888888888e-07,
305
- "loss": 0.1203,
306
  "step": 1200
307
  },
308
  {
309
- "epoch": 0.24,
310
- "learning_rate": 8.393333333333334e-07,
311
- "loss": 0.1505,
312
  "step": 1225
313
  },
314
  {
315
- "epoch": 0.25,
316
- "learning_rate": 8.337777777777777e-07,
317
- "loss": 0.109,
318
  "step": 1250
319
  },
320
  {
321
- "epoch": 0.26,
322
- "learning_rate": 8.282222222222222e-07,
323
- "loss": 0.1293,
324
  "step": 1275
325
  },
326
  {
327
- "epoch": 0.26,
328
- "learning_rate": 8.226666666666666e-07,
329
- "loss": 0.1126,
330
  "step": 1300
331
  },
332
  {
333
- "epoch": 0.27,
334
- "learning_rate": 8.171111111111111e-07,
335
- "loss": 0.0969,
336
  "step": 1325
337
  },
338
  {
339
- "epoch": 0.27,
340
- "learning_rate": 8.115555555555556e-07,
341
- "loss": 0.0864,
342
  "step": 1350
343
  },
344
  {
345
- "epoch": 0.28,
346
- "learning_rate": 8.06e-07,
347
- "loss": 0.0894,
348
  "step": 1375
349
  },
350
  {
351
- "epoch": 0.28,
352
- "learning_rate": 8.004444444444444e-07,
353
- "loss": 0.0823,
354
  "step": 1400
355
  },
356
  {
357
- "epoch": 0.28,
358
- "learning_rate": 7.948888888888888e-07,
359
- "loss": 0.0851,
360
  "step": 1425
361
  },
362
  {
363
- "epoch": 0.29,
364
- "learning_rate": 7.893333333333333e-07,
365
- "loss": 0.0845,
366
  "step": 1450
367
  },
368
  {
369
- "epoch": 1.0,
370
- "learning_rate": 7.837777777777777e-07,
371
- "loss": 0.1286,
372
  "step": 1475
373
  },
374
  {
375
- "epoch": 1.01,
376
- "learning_rate": 7.782222222222222e-07,
377
- "loss": 0.135,
378
  "step": 1500
379
  },
380
  {
381
- "epoch": 1.01,
382
- "learning_rate": 7.726666666666666e-07,
383
- "loss": 0.1162,
384
  "step": 1525
385
  },
386
  {
387
- "epoch": 1.02,
388
- "learning_rate": 7.67111111111111e-07,
389
- "loss": 0.1213,
390
  "step": 1550
391
  },
392
  {
393
- "epoch": 1.02,
394
- "learning_rate": 7.615555555555555e-07,
395
- "loss": 0.082,
396
  "step": 1575
397
  },
398
  {
399
- "epoch": 1.03,
400
- "learning_rate": 7.559999999999999e-07,
401
- "loss": 0.0702,
402
  "step": 1600
403
  },
404
  {
405
- "epoch": 1.03,
406
- "learning_rate": 7.504444444444444e-07,
407
- "loss": 0.0725,
408
  "step": 1625
409
  },
410
  {
411
- "epoch": 1.04,
412
- "learning_rate": 7.448888888888889e-07,
413
- "loss": 0.058,
414
  "step": 1650
415
  },
416
  {
417
- "epoch": 1.04,
418
- "learning_rate": 7.393333333333333e-07,
419
- "loss": 0.0598,
420
  "step": 1675
421
  },
422
  {
423
- "epoch": 1.05,
424
- "learning_rate": 7.337777777777778e-07,
425
- "loss": 0.0757,
426
  "step": 1700
427
  },
428
  {
429
- "epoch": 1.05,
430
- "learning_rate": 7.282222222222222e-07,
431
- "loss": 0.0782,
432
  "step": 1725
433
  },
434
  {
435
- "epoch": 1.06,
436
- "learning_rate": 7.226666666666667e-07,
437
- "loss": 0.0583,
438
  "step": 1750
439
  },
440
  {
441
- "epoch": 1.06,
442
- "learning_rate": 7.171111111111111e-07,
443
- "loss": 0.0677,
444
  "step": 1775
445
  },
446
  {
447
- "epoch": 1.07,
448
- "learning_rate": 7.115555555555556e-07,
449
- "loss": 0.0825,
450
  "step": 1800
451
  },
452
  {
453
- "epoch": 1.07,
454
- "learning_rate": 7.059999999999999e-07,
455
- "loss": 0.0791,
456
  "step": 1825
457
  },
458
  {
459
- "epoch": 1.08,
460
- "learning_rate": 7.004444444444444e-07,
461
- "loss": 0.055,
462
  "step": 1850
463
  },
464
  {
465
- "epoch": 1.08,
466
- "learning_rate": 6.948888888888889e-07,
467
- "loss": 0.0575,
468
  "step": 1875
469
  },
470
  {
471
- "epoch": 1.09,
472
- "learning_rate": 6.893333333333333e-07,
473
- "loss": 0.0723,
474
  "step": 1900
475
  },
476
  {
477
- "epoch": 1.09,
478
- "learning_rate": 6.837777777777778e-07,
479
- "loss": 0.0672,
480
  "step": 1925
481
  },
482
  {
483
- "epoch": 1.1,
484
- "learning_rate": 6.782222222222221e-07,
485
- "loss": 0.0751,
486
  "step": 1950
487
  },
488
  {
489
- "epoch": 1.1,
490
- "learning_rate": 6.726666666666666e-07,
491
- "loss": 0.0807,
492
  "step": 1975
493
  },
494
  {
495
- "epoch": 1.11,
496
- "learning_rate": 6.67111111111111e-07,
497
- "loss": 0.074,
498
  "step": 2000
499
  },
500
  {
501
- "epoch": 1.11,
502
- "eval_cer": 9.41961307538359,
503
- "eval_loss": 0.5532106161117554,
504
- "eval_runtime": 889.2915,
505
- "eval_samples_per_second": 1.391,
506
- "eval_steps_per_second": 0.088,
507
- "eval_wer": 21.61658258389998,
508
  "step": 2000
509
  },
510
  {
511
- "epoch": 1.11,
512
- "learning_rate": 6.615555555555555e-07,
513
- "loss": 0.073,
514
  "step": 2025
515
  },
516
  {
517
- "epoch": 1.12,
518
- "learning_rate": 6.56e-07,
519
- "loss": 0.0629,
520
  "step": 2050
521
  },
522
  {
523
- "epoch": 1.12,
524
- "learning_rate": 6.504444444444445e-07,
525
- "loss": 0.0888,
526
  "step": 2075
527
  },
528
  {
529
- "epoch": 1.13,
530
- "learning_rate": 6.448888888888889e-07,
531
- "loss": 0.0747,
532
  "step": 2100
533
  },
534
  {
535
- "epoch": 1.13,
536
- "learning_rate": 6.393333333333333e-07,
537
- "loss": 0.0638,
538
  "step": 2125
539
  },
540
  {
541
- "epoch": 1.14,
542
- "learning_rate": 6.337777777777778e-07,
543
- "loss": 0.0414,
544
  "step": 2150
545
  },
546
  {
547
- "epoch": 1.14,
548
- "learning_rate": 6.282222222222222e-07,
549
- "loss": 0.057,
550
  "step": 2175
551
  },
552
  {
553
- "epoch": 1.15,
554
- "learning_rate": 6.226666666666667e-07,
555
- "loss": 0.0599,
556
  "step": 2200
557
  },
558
  {
559
- "epoch": 1.15,
560
- "learning_rate": 6.171111111111111e-07,
561
- "loss": 0.0561,
562
  "step": 2225
563
  },
564
  {
565
- "epoch": 1.16,
566
- "learning_rate": 6.115555555555555e-07,
567
- "loss": 0.0443,
568
  "step": 2250
569
  },
570
  {
571
- "epoch": 1.16,
572
- "learning_rate": 6.06e-07,
573
- "loss": 0.0551,
574
  "step": 2275
575
  },
576
  {
577
- "epoch": 1.17,
578
- "learning_rate": 6.004444444444444e-07,
579
- "loss": 0.0525,
580
  "step": 2300
581
  },
582
  {
583
- "epoch": 1.17,
584
- "learning_rate": 5.948888888888889e-07,
585
- "loss": 0.0595,
586
  "step": 2325
587
  },
588
  {
589
- "epoch": 1.18,
590
- "learning_rate": 5.893333333333333e-07,
591
- "loss": 0.0665,
592
  "step": 2350
593
  },
594
  {
595
- "epoch": 1.18,
596
- "learning_rate": 5.837777777777777e-07,
597
- "loss": 0.0708,
598
  "step": 2375
599
  },
600
  {
601
- "epoch": 1.19,
602
- "learning_rate": 5.782222222222221e-07,
603
- "loss": 0.0547,
604
  "step": 2400
605
  },
606
  {
607
- "epoch": 1.19,
608
- "learning_rate": 5.726666666666666e-07,
609
- "loss": 0.0464,
610
  "step": 2425
611
  },
612
  {
613
- "epoch": 1.2,
614
- "learning_rate": 5.671111111111111e-07,
615
- "loss": 0.0409,
616
  "step": 2450
617
  },
618
  {
619
- "epoch": 1.2,
620
- "learning_rate": 5.615555555555555e-07,
621
- "loss": 0.0557,
622
  "step": 2475
623
  },
624
  {
625
- "epoch": 1.21,
626
- "learning_rate": 5.560000000000001e-07,
627
- "loss": 0.0689,
628
  "step": 2500
629
  },
630
  {
631
- "epoch": 1.21,
632
- "learning_rate": 5.504444444444444e-07,
633
- "loss": 0.0644,
634
  "step": 2525
635
  },
636
  {
637
- "epoch": 1.22,
638
- "learning_rate": 5.448888888888889e-07,
639
- "loss": 0.057,
640
  "step": 2550
641
  },
642
  {
643
- "epoch": 1.22,
644
- "learning_rate": 5.393333333333333e-07,
645
- "loss": 0.0707,
646
  "step": 2575
647
  },
648
  {
649
- "epoch": 1.23,
650
- "learning_rate": 5.337777777777778e-07,
651
- "loss": 0.0548,
652
  "step": 2600
653
  },
654
  {
655
- "epoch": 1.23,
656
- "learning_rate": 5.282222222222223e-07,
657
- "loss": 0.0371,
658
  "step": 2625
659
  },
660
  {
661
- "epoch": 1.24,
662
- "learning_rate": 5.226666666666666e-07,
663
- "loss": 0.0521,
664
  "step": 2650
665
  },
666
  {
667
- "epoch": 1.24,
668
- "learning_rate": 5.171111111111111e-07,
669
- "loss": 0.0843,
670
  "step": 2675
671
  },
672
  {
673
- "epoch": 1.25,
674
- "learning_rate": 5.115555555555555e-07,
675
- "loss": 0.0779,
676
  "step": 2700
677
  },
678
  {
679
- "epoch": 1.25,
680
- "learning_rate": 5.06e-07,
681
- "loss": 0.0677,
682
  "step": 2725
683
  },
684
  {
685
- "epoch": 1.26,
686
- "learning_rate": 5.004444444444444e-07,
687
- "loss": 0.083,
688
  "step": 2750
689
  },
690
  {
691
- "epoch": 1.26,
692
- "learning_rate": 4.948888888888888e-07,
693
- "loss": 0.0482,
694
  "step": 2775
695
  },
696
  {
697
- "epoch": 1.27,
698
- "learning_rate": 4.893333333333333e-07,
699
- "loss": 0.0467,
700
  "step": 2800
701
  },
702
  {
703
- "epoch": 1.27,
704
- "learning_rate": 4.837777777777777e-07,
705
- "loss": 0.0539,
706
  "step": 2825
707
  },
708
  {
709
- "epoch": 1.28,
710
- "learning_rate": 4.782222222222222e-07,
711
- "loss": 0.0379,
712
  "step": 2850
713
  },
714
  {
715
- "epoch": 1.28,
716
- "learning_rate": 4.7266666666666664e-07,
717
- "loss": 0.0411,
718
  "step": 2875
719
  },
720
  {
721
- "epoch": 1.29,
722
- "learning_rate": 4.6711111111111104e-07,
723
- "loss": 0.0501,
724
  "step": 2900
725
  },
726
  {
727
- "epoch": 2.0,
728
- "learning_rate": 4.6155555555555555e-07,
729
- "loss": 0.0698,
730
  "step": 2925
731
  },
732
  {
733
- "epoch": 2.01,
734
- "learning_rate": 4.56e-07,
735
- "loss": 0.0881,
736
  "step": 2950
737
  },
738
  {
739
- "epoch": 2.01,
740
- "learning_rate": 4.504444444444444e-07,
741
- "loss": 0.0847,
742
  "step": 2975
743
  },
744
  {
745
- "epoch": 2.02,
746
- "learning_rate": 4.4488888888888887e-07,
747
- "loss": 0.0796,
748
  "step": 3000
749
  },
750
  {
751
- "epoch": 2.02,
752
- "eval_cer": 9.243495663775851,
753
- "eval_loss": 0.6025404930114746,
754
- "eval_runtime": 911.8299,
755
- "eval_samples_per_second": 1.357,
756
- "eval_steps_per_second": 0.086,
757
- "eval_wer": 21.33143233165168,
758
  "step": 3000
759
  },
760
  {
761
- "epoch": 2.02,
762
- "learning_rate": 4.393333333333333e-07,
763
- "loss": 0.0536,
764
  "step": 3025
765
  },
766
  {
767
- "epoch": 2.03,
768
- "learning_rate": 4.3377777777777773e-07,
769
- "loss": 0.0476,
770
  "step": 3050
771
  },
772
  {
773
- "epoch": 2.03,
774
- "learning_rate": 4.282222222222222e-07,
775
- "loss": 0.0385,
776
  "step": 3075
777
  },
778
  {
779
- "epoch": 2.04,
780
- "learning_rate": 4.226666666666667e-07,
781
- "loss": 0.0302,
782
  "step": 3100
783
  },
784
  {
785
- "epoch": 2.04,
786
- "learning_rate": 4.171111111111111e-07,
787
- "loss": 0.0301,
788
  "step": 3125
789
  },
790
  {
791
- "epoch": 2.05,
792
- "learning_rate": 4.1155555555555555e-07,
793
- "loss": 0.0422,
794
  "step": 3150
795
  },
796
  {
797
- "epoch": 2.05,
798
- "learning_rate": 4.06e-07,
799
- "loss": 0.0509,
800
  "step": 3175
801
  },
802
  {
803
- "epoch": 2.06,
804
- "learning_rate": 4.004444444444444e-07,
805
- "loss": 0.0374,
806
  "step": 3200
807
  },
808
  {
809
- "epoch": 2.06,
810
- "learning_rate": 3.9488888888888887e-07,
811
- "loss": 0.0375,
812
  "step": 3225
813
  },
814
  {
815
- "epoch": 2.07,
816
- "learning_rate": 3.8933333333333327e-07,
817
- "loss": 0.0468,
818
  "step": 3250
819
  },
820
  {
821
- "epoch": 2.07,
822
- "learning_rate": 3.837777777777778e-07,
823
- "loss": 0.0572,
824
  "step": 3275
825
  },
826
  {
827
- "epoch": 2.08,
828
- "learning_rate": 3.7822222222222224e-07,
829
- "loss": 0.0362,
830
  "step": 3300
831
  },
832
  {
833
- "epoch": 2.08,
834
- "learning_rate": 3.7266666666666664e-07,
835
- "loss": 0.0298,
836
  "step": 3325
837
  },
838
  {
839
- "epoch": 2.09,
840
- "learning_rate": 3.671111111111111e-07,
841
- "loss": 0.0448,
842
  "step": 3350
843
  },
844
  {
845
- "epoch": 2.09,
846
- "learning_rate": 3.6155555555555555e-07,
847
- "loss": 0.0438,
848
  "step": 3375
849
  },
850
  {
851
- "epoch": 2.1,
852
- "learning_rate": 3.5599999999999996e-07,
853
- "loss": 0.0436,
854
  "step": 3400
855
  },
856
  {
857
- "epoch": 2.1,
858
- "learning_rate": 3.504444444444444e-07,
859
- "loss": 0.0545,
860
  "step": 3425
861
  },
862
  {
863
- "epoch": 2.11,
864
- "learning_rate": 3.4488888888888887e-07,
865
- "loss": 0.0468,
866
  "step": 3450
867
  },
868
  {
869
- "epoch": 2.11,
870
- "learning_rate": 3.3933333333333333e-07,
871
- "loss": 0.0492,
872
  "step": 3475
873
  },
874
  {
875
- "epoch": 2.12,
876
- "learning_rate": 3.337777777777778e-07,
877
- "loss": 0.0337,
878
  "step": 3500
879
  },
880
  {
881
- "epoch": 2.12,
882
- "learning_rate": 3.2822222222222224e-07,
883
- "loss": 0.0567,
884
  "step": 3525
885
  },
886
  {
887
- "epoch": 2.13,
888
- "learning_rate": 3.2266666666666664e-07,
889
- "loss": 0.0619,
890
  "step": 3550
891
  },
892
  {
893
- "epoch": 2.13,
894
- "learning_rate": 3.171111111111111e-07,
895
- "loss": 0.0395,
896
  "step": 3575
897
  },
898
  {
899
- "epoch": 2.14,
900
- "learning_rate": 3.115555555555555e-07,
901
- "loss": 0.0293,
902
  "step": 3600
903
  },
904
  {
905
- "epoch": 2.14,
906
- "learning_rate": 3.0599999999999996e-07,
907
- "loss": 0.025,
908
  "step": 3625
909
  },
910
  {
911
- "epoch": 2.15,
912
- "learning_rate": 3.0044444444444447e-07,
913
- "loss": 0.0432,
914
  "step": 3650
915
  },
916
  {
917
- "epoch": 2.15,
918
- "learning_rate": 2.948888888888889e-07,
919
- "loss": 0.0316,
920
  "step": 3675
921
  },
922
  {
923
- "epoch": 2.16,
924
- "learning_rate": 2.8933333333333333e-07,
925
- "loss": 0.0299,
926
  "step": 3700
927
  },
928
  {
929
- "epoch": 2.16,
930
- "learning_rate": 2.837777777777778e-07,
931
- "loss": 0.0341,
932
  "step": 3725
933
  },
934
  {
935
- "epoch": 2.17,
936
- "learning_rate": 2.782222222222222e-07,
937
- "loss": 0.0368,
938
  "step": 3750
939
  },
940
  {
941
- "epoch": 2.17,
942
- "learning_rate": 2.7266666666666665e-07,
943
- "loss": 0.0373,
944
  "step": 3775
945
  },
946
  {
947
- "epoch": 2.18,
948
- "learning_rate": 2.671111111111111e-07,
949
- "loss": 0.0458,
950
  "step": 3800
951
  },
952
  {
953
- "epoch": 2.18,
954
- "learning_rate": 2.615555555555555e-07,
955
- "loss": 0.05,
956
  "step": 3825
957
  },
958
  {
959
- "epoch": 2.19,
960
- "learning_rate": 2.56e-07,
961
- "loss": 0.0406,
962
  "step": 3850
963
  },
964
  {
965
- "epoch": 2.19,
966
- "learning_rate": 2.5044444444444447e-07,
967
- "loss": 0.03,
968
  "step": 3875
969
  },
970
  {
971
- "epoch": 2.2,
972
- "learning_rate": 2.448888888888889e-07,
973
- "loss": 0.0296,
974
  "step": 3900
975
  },
976
  {
977
- "epoch": 2.2,
978
- "learning_rate": 2.3933333333333333e-07,
979
- "loss": 0.035,
980
  "step": 3925
981
  },
982
  {
983
- "epoch": 2.21,
984
- "learning_rate": 2.3377777777777776e-07,
985
- "loss": 0.0381,
986
  "step": 3950
987
  },
988
  {
989
- "epoch": 2.21,
990
- "learning_rate": 2.282222222222222e-07,
991
- "loss": 0.0546,
992
  "step": 3975
993
  },
994
  {
995
- "epoch": 2.22,
996
- "learning_rate": 2.2266666666666668e-07,
997
- "loss": 0.0422,
998
  "step": 4000
999
  },
1000
  {
1001
- "epoch": 2.22,
1002
- "eval_cer": 9.027351567711808,
1003
- "eval_loss": 0.6029081344604492,
1004
- "eval_runtime": 892.3275,
1005
- "eval_samples_per_second": 1.386,
1006
- "eval_steps_per_second": 0.087,
1007
- "eval_wer": 20.73919719236675,
1008
  "step": 4000
1009
  },
1010
  {
1011
- "epoch": 2.22,
1012
- "learning_rate": 2.171111111111111e-07,
1013
- "loss": 0.0576,
1014
  "step": 4025
1015
  },
1016
  {
1017
- "epoch": 2.23,
1018
- "learning_rate": 2.1155555555555554e-07,
1019
- "loss": 0.0378,
1020
  "step": 4050
1021
  },
1022
  {
1023
- "epoch": 2.23,
1024
- "learning_rate": 2.06e-07,
1025
- "loss": 0.0308,
1026
  "step": 4075
1027
  },
1028
  {
1029
- "epoch": 2.24,
1030
- "learning_rate": 2.0044444444444445e-07,
1031
- "loss": 0.0224,
1032
  "step": 4100
1033
  },
1034
  {
1035
- "epoch": 2.24,
1036
- "learning_rate": 1.9488888888888888e-07,
1037
- "loss": 0.0568,
1038
  "step": 4125
1039
  },
1040
  {
1041
- "epoch": 2.25,
1042
- "learning_rate": 1.893333333333333e-07,
1043
- "loss": 0.0644,
1044
  "step": 4150
1045
  },
1046
  {
1047
- "epoch": 2.25,
1048
- "learning_rate": 1.837777777777778e-07,
1049
- "loss": 0.0313,
1050
  "step": 4175
1051
  },
1052
  {
1053
- "epoch": 2.26,
1054
- "learning_rate": 1.7822222222222222e-07,
1055
- "loss": 0.0769,
1056
  "step": 4200
1057
  },
1058
  {
1059
- "epoch": 2.26,
1060
- "learning_rate": 1.7266666666666665e-07,
1061
- "loss": 0.0442,
1062
  "step": 4225
1063
  },
1064
  {
1065
- "epoch": 2.27,
1066
- "learning_rate": 1.6711111111111108e-07,
1067
- "loss": 0.0304,
1068
  "step": 4250
1069
  },
1070
  {
1071
- "epoch": 2.27,
1072
- "learning_rate": 1.6155555555555556e-07,
1073
- "loss": 0.0385,
1074
  "step": 4275
1075
  },
1076
  {
1077
- "epoch": 2.28,
1078
- "learning_rate": 1.56e-07,
1079
- "loss": 0.0285,
1080
  "step": 4300
1081
  },
1082
  {
1083
- "epoch": 2.28,
1084
- "learning_rate": 1.5044444444444442e-07,
1085
- "loss": 0.0344,
1086
  "step": 4325
1087
  },
1088
  {
1089
- "epoch": 2.29,
1090
- "learning_rate": 1.448888888888889e-07,
1091
- "loss": 0.0326,
1092
  "step": 4350
1093
  },
1094
  {
1095
- "epoch": 3.0,
1096
- "learning_rate": 1.3933333333333334e-07,
1097
- "loss": 0.0428,
1098
  "step": 4375
1099
  },
1100
  {
1101
- "epoch": 3.01,
1102
- "learning_rate": 1.3377777777777777e-07,
1103
- "loss": 0.072,
1104
  "step": 4400
1105
  },
1106
  {
1107
- "epoch": 3.01,
1108
- "learning_rate": 1.282222222222222e-07,
1109
- "loss": 0.0718,
1110
  "step": 4425
1111
  },
1112
  {
1113
- "epoch": 3.02,
1114
- "learning_rate": 1.2266666666666665e-07,
1115
- "loss": 0.0662,
1116
  "step": 4450
1117
  },
1118
  {
1119
- "epoch": 3.02,
1120
- "learning_rate": 1.1711111111111111e-07,
1121
- "loss": 0.0455,
1122
  "step": 4475
1123
  },
1124
  {
1125
- "epoch": 3.03,
1126
- "learning_rate": 1.1155555555555555e-07,
1127
- "loss": 0.0367,
1128
  "step": 4500
1129
  },
1130
  {
1131
- "epoch": 3.03,
1132
- "learning_rate": 1.06e-07,
1133
- "loss": 0.0312,
1134
  "step": 4525
1135
  },
1136
  {
1137
- "epoch": 3.04,
1138
- "learning_rate": 1.0044444444444444e-07,
1139
- "loss": 0.0246,
1140
  "step": 4550
1141
  },
1142
  {
1143
- "epoch": 3.04,
1144
- "learning_rate": 9.488888888888888e-08,
1145
- "loss": 0.0212,
1146
  "step": 4575
1147
  },
1148
  {
1149
- "epoch": 3.05,
1150
- "learning_rate": 8.933333333333333e-08,
1151
- "loss": 0.0215,
1152
  "step": 4600
1153
  },
1154
  {
1155
- "epoch": 3.05,
1156
- "learning_rate": 8.377777777777778e-08,
1157
- "loss": 0.0356,
1158
  "step": 4625
1159
  },
1160
  {
1161
- "epoch": 3.06,
1162
- "learning_rate": 7.822222222222221e-08,
1163
- "loss": 0.0314,
1164
  "step": 4650
1165
  },
1166
  {
1167
- "epoch": 3.06,
1168
- "learning_rate": 7.266666666666667e-08,
1169
- "loss": 0.0253,
1170
  "step": 4675
1171
  },
1172
  {
1173
- "epoch": 3.07,
1174
- "learning_rate": 6.71111111111111e-08,
1175
- "loss": 0.0309,
1176
  "step": 4700
1177
  },
1178
  {
1179
- "epoch": 3.07,
1180
- "learning_rate": 6.155555555555556e-08,
1181
- "loss": 0.0418,
1182
  "step": 4725
1183
  },
1184
  {
1185
- "epoch": 3.08,
1186
- "learning_rate": 5.6e-08,
1187
- "loss": 0.0323,
1188
  "step": 4750
1189
  },
1190
  {
1191
- "epoch": 3.08,
1192
- "learning_rate": 5.044444444444444e-08,
1193
- "loss": 0.0309,
1194
  "step": 4775
1195
  },
1196
  {
1197
- "epoch": 3.09,
1198
- "learning_rate": 4.4888888888888885e-08,
1199
- "loss": 0.0285,
1200
  "step": 4800
1201
  },
1202
  {
1203
- "epoch": 3.09,
1204
- "learning_rate": 3.933333333333333e-08,
1205
- "loss": 0.0357,
1206
  "step": 4825
1207
  },
1208
  {
1209
- "epoch": 3.1,
1210
- "learning_rate": 3.377777777777777e-08,
1211
- "loss": 0.0341,
1212
  "step": 4850
1213
  },
1214
  {
1215
- "epoch": 3.1,
1216
- "learning_rate": 2.822222222222222e-08,
1217
- "loss": 0.0444,
1218
  "step": 4875
1219
  },
1220
  {
1221
- "epoch": 3.11,
1222
- "learning_rate": 2.2666666666666668e-08,
1223
- "loss": 0.032,
1224
  "step": 4900
1225
  },
1226
  {
1227
- "epoch": 3.11,
1228
- "learning_rate": 1.711111111111111e-08,
1229
- "loss": 0.0426,
1230
  "step": 4925
1231
  },
1232
  {
1233
- "epoch": 3.12,
1234
- "learning_rate": 1.1555555555555554e-08,
1235
- "loss": 0.0284,
1236
  "step": 4950
1237
  },
1238
  {
1239
- "epoch": 3.12,
1240
- "learning_rate": 6e-09,
1241
- "loss": 0.0374,
1242
  "step": 4975
1243
  },
1244
  {
1245
- "epoch": 3.13,
1246
- "learning_rate": 4.4444444444444443e-10,
1247
- "loss": 0.0517,
1248
  "step": 5000
1249
  },
1250
  {
1251
- "epoch": 3.13,
1252
- "eval_cer": 8.984656437625084,
1253
- "eval_loss": 0.6150030493736267,
1254
- "eval_runtime": 893.8591,
1255
- "eval_samples_per_second": 1.384,
1256
- "eval_steps_per_second": 0.087,
1257
- "eval_wer": 20.49791620969511,
1258
  "step": 5000
1259
  },
1260
  {
1261
- "epoch": 3.13,
1262
  "step": 5000,
1263
- "total_flos": 1.6324473435291648e+20,
1264
- "train_loss": 0.12916588606834412,
1265
- "train_runtime": 40862.8523,
1266
- "train_samples_per_second": 3.916,
1267
- "train_steps_per_second": 0.122
1268
  }
1269
  ],
1270
  "max_steps": 5000,
1271
  "num_train_epochs": 9223372036854775807,
1272
- "total_flos": 1.6324473435291648e+20,
1273
  "trial_name": null,
1274
  "trial_params": null
1275
  }
 
1
  {
2
+ "best_metric": 19.368282518096073,
3
+ "best_model_checkpoint": "./checkpoint-4000",
4
+ "epoch": 6.1252,
5
  "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.01,
12
+ "learning_rate": 4.800000000000001e-07,
13
+ "loss": 1.4296,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "learning_rate": 9.800000000000001e-07,
19
+ "loss": 1.1127,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
+ "learning_rate": 1.48e-06,
25
+ "loss": 0.5985,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.02,
30
+ "learning_rate": 1.98e-06,
31
+ "loss": 0.3874,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.03,
36
+ "learning_rate": 2.4800000000000004e-06,
37
+ "loss": 0.2839,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.03,
42
+ "learning_rate": 2.9800000000000003e-06,
43
+ "loss": 0.2291,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.04,
48
+ "learning_rate": 3.48e-06,
49
+ "loss": 0.1973,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.04,
54
+ "learning_rate": 3.980000000000001e-06,
55
+ "loss": 0.1512,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.04,
60
+ "learning_rate": 4.48e-06,
61
+ "loss": 0.1399,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.05,
66
+ "learning_rate": 4.980000000000001e-06,
67
+ "loss": 0.1351,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 0.06,
72
+ "learning_rate": 5.480000000000001e-06,
73
+ "loss": 0.1304,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.06,
78
+ "learning_rate": 5.98e-06,
79
+ "loss": 0.1156,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 0.07,
84
+ "learning_rate": 6.480000000000001e-06,
85
+ "loss": 0.1333,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.07,
90
+ "learning_rate": 6.98e-06,
91
+ "loss": 0.0772,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.07,
96
+ "learning_rate": 7.48e-06,
97
+ "loss": 0.095,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.08,
102
+ "learning_rate": 7.980000000000002e-06,
103
+ "loss": 0.0779,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 0.09,
108
+ "learning_rate": 8.48e-06,
109
+ "loss": 0.0813,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.09,
114
+ "learning_rate": 8.98e-06,
115
+ "loss": 0.0933,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 0.1,
120
+ "learning_rate": 9.48e-06,
121
+ "loss": 0.0805,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 0.1,
126
+ "learning_rate": 9.980000000000001e-06,
127
+ "loss": 0.0665,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 0.1,
132
+ "learning_rate": 9.946666666666667e-06,
133
+ "loss": 0.0787,
134
  "step": 525
135
  },
136
  {
137
  "epoch": 0.11,
138
+ "learning_rate": 9.891111111111113e-06,
139
+ "loss": 0.0872,
140
  "step": 550
141
  },
142
  {
143
  "epoch": 0.12,
144
+ "learning_rate": 9.835555555555556e-06,
145
+ "loss": 0.0749,
146
  "step": 575
147
  },
148
  {
149
  "epoch": 0.12,
150
+ "learning_rate": 9.780000000000001e-06,
151
+ "loss": 0.0721,
152
  "step": 600
153
  },
154
  {
155
  "epoch": 0.12,
156
+ "learning_rate": 9.724444444444445e-06,
157
+ "loss": 0.0908,
158
  "step": 625
159
  },
160
  {
161
  "epoch": 0.13,
162
+ "learning_rate": 9.66888888888889e-06,
163
+ "loss": 0.0845,
164
  "step": 650
165
  },
166
  {
167
  "epoch": 0.14,
168
+ "learning_rate": 9.613333333333335e-06,
169
+ "loss": 0.0622,
170
  "step": 675
171
  },
172
  {
173
  "epoch": 0.14,
174
+ "learning_rate": 9.557777777777777e-06,
175
+ "loss": 0.0559,
176
  "step": 700
177
  },
178
  {
179
  "epoch": 0.14,
180
+ "learning_rate": 9.502222222222223e-06,
181
+ "loss": 0.0556,
182
  "step": 725
183
  },
184
  {
185
+ "epoch": 1.0,
186
+ "learning_rate": 9.446666666666667e-06,
187
+ "loss": 0.0966,
188
  "step": 750
189
  },
190
  {
191
+ "epoch": 1.01,
192
+ "learning_rate": 9.391111111111111e-06,
193
+ "loss": 0.0812,
194
  "step": 775
195
  },
196
  {
197
+ "epoch": 1.01,
198
+ "learning_rate": 9.335555555555557e-06,
199
+ "loss": 0.0492,
200
  "step": 800
201
  },
202
  {
203
+ "epoch": 1.02,
204
+ "learning_rate": 9.280000000000001e-06,
205
+ "loss": 0.0381,
206
  "step": 825
207
  },
208
  {
209
+ "epoch": 1.02,
210
+ "learning_rate": 9.224444444444445e-06,
211
+ "loss": 0.0428,
212
  "step": 850
213
  },
214
  {
215
+ "epoch": 1.03,
216
+ "learning_rate": 9.168888888888889e-06,
217
+ "loss": 0.0405,
218
  "step": 875
219
  },
220
  {
221
+ "epoch": 1.03,
222
+ "learning_rate": 9.113333333333335e-06,
223
+ "loss": 0.0474,
224
  "step": 900
225
  },
226
  {
227
+ "epoch": 1.04,
228
+ "learning_rate": 9.057777777777779e-06,
229
+ "loss": 0.041,
230
  "step": 925
231
  },
232
  {
233
+ "epoch": 1.04,
234
+ "learning_rate": 9.002222222222223e-06,
235
+ "loss": 0.0394,
236
  "step": 950
237
  },
238
  {
239
+ "epoch": 1.05,
240
+ "learning_rate": 8.946666666666669e-06,
241
+ "loss": 0.0437,
242
  "step": 975
243
  },
244
  {
245
+ "epoch": 1.05,
246
+ "learning_rate": 8.891111111111111e-06,
247
+ "loss": 0.0471,
248
  "step": 1000
249
  },
250
  {
251
+ "epoch": 1.05,
252
+ "eval_cer": 8.981987991994664,
253
+ "eval_loss": 0.5961337685585022,
254
+ "eval_runtime": 892.0726,
255
+ "eval_samples_per_second": 1.387,
256
+ "eval_steps_per_second": 0.044,
257
+ "eval_wer": 20.289537179205965,
258
  "step": 1000
259
  },
260
  {
261
+ "epoch": 1.06,
262
+ "learning_rate": 8.835555555555557e-06,
263
+ "loss": 0.0379,
264
  "step": 1025
265
  },
266
  {
267
+ "epoch": 1.06,
268
+ "learning_rate": 8.78e-06,
269
+ "loss": 0.0464,
270
  "step": 1050
271
  },
272
  {
273
+ "epoch": 1.07,
274
+ "learning_rate": 8.724444444444445e-06,
275
+ "loss": 0.025,
276
  "step": 1075
277
  },
278
  {
279
+ "epoch": 1.07,
280
+ "learning_rate": 8.66888888888889e-06,
281
+ "loss": 0.0324,
282
  "step": 1100
283
  },
284
  {
285
+ "epoch": 1.08,
286
+ "learning_rate": 8.613333333333333e-06,
287
+ "loss": 0.025,
288
  "step": 1125
289
  },
290
  {
291
+ "epoch": 1.08,
292
+ "learning_rate": 8.557777777777778e-06,
293
+ "loss": 0.0304,
294
  "step": 1150
295
  },
296
  {
297
+ "epoch": 1.09,
298
+ "learning_rate": 8.502222222222223e-06,
299
+ "loss": 0.0356,
300
  "step": 1175
301
  },
302
  {
303
+ "epoch": 1.09,
304
+ "learning_rate": 8.446666666666668e-06,
305
+ "loss": 0.031,
306
  "step": 1200
307
  },
308
  {
309
+ "epoch": 1.1,
310
+ "learning_rate": 8.391111111111112e-06,
311
+ "loss": 0.0182,
312
  "step": 1225
313
  },
314
  {
315
+ "epoch": 1.1,
316
+ "learning_rate": 8.335555555555556e-06,
317
+ "loss": 0.0344,
318
  "step": 1250
319
  },
320
  {
321
+ "epoch": 1.11,
322
+ "learning_rate": 8.28e-06,
323
+ "loss": 0.0329,
324
  "step": 1275
325
  },
326
  {
327
+ "epoch": 1.11,
328
+ "learning_rate": 8.224444444444444e-06,
329
+ "loss": 0.032,
330
  "step": 1300
331
  },
332
  {
333
+ "epoch": 1.12,
334
+ "learning_rate": 8.16888888888889e-06,
335
+ "loss": 0.0192,
336
  "step": 1325
337
  },
338
  {
339
+ "epoch": 1.12,
340
+ "learning_rate": 8.113333333333334e-06,
341
+ "loss": 0.0359,
342
  "step": 1350
343
  },
344
  {
345
+ "epoch": 1.13,
346
+ "learning_rate": 8.057777777777778e-06,
347
+ "loss": 0.0386,
348
  "step": 1375
349
  },
350
  {
351
+ "epoch": 1.13,
352
+ "learning_rate": 8.002222222222222e-06,
353
+ "loss": 0.0201,
354
  "step": 1400
355
  },
356
  {
357
+ "epoch": 1.14,
358
+ "learning_rate": 7.946666666666666e-06,
359
+ "loss": 0.0213,
360
  "step": 1425
361
  },
362
  {
363
+ "epoch": 1.14,
364
+ "learning_rate": 7.891111111111112e-06,
365
+ "loss": 0.0195,
366
  "step": 1450
367
  },
368
  {
369
+ "epoch": 2.0,
370
+ "learning_rate": 7.835555555555556e-06,
371
+ "loss": 0.0423,
372
  "step": 1475
373
  },
374
  {
375
+ "epoch": 2.01,
376
+ "learning_rate": 7.78e-06,
377
+ "loss": 0.042,
378
  "step": 1500
379
  },
380
  {
381
+ "epoch": 2.01,
382
+ "learning_rate": 7.724444444444446e-06,
383
+ "loss": 0.0225,
384
  "step": 1525
385
  },
386
  {
387
+ "epoch": 2.02,
388
+ "learning_rate": 7.66888888888889e-06,
389
+ "loss": 0.0139,
390
  "step": 1550
391
  },
392
  {
393
+ "epoch": 2.02,
394
+ "learning_rate": 7.613333333333334e-06,
395
+ "loss": 0.0132,
396
  "step": 1575
397
  },
398
  {
399
+ "epoch": 2.03,
400
+ "learning_rate": 7.557777777777779e-06,
401
+ "loss": 0.017,
402
  "step": 1600
403
  },
404
  {
405
+ "epoch": 2.03,
406
+ "learning_rate": 7.502222222222223e-06,
407
+ "loss": 0.016,
408
  "step": 1625
409
  },
410
  {
411
+ "epoch": 2.04,
412
+ "learning_rate": 7.446666666666668e-06,
413
+ "loss": 0.0206,
414
  "step": 1650
415
  },
416
  {
417
+ "epoch": 2.04,
418
+ "learning_rate": 7.3911111111111125e-06,
419
+ "loss": 0.0157,
420
  "step": 1675
421
  },
422
  {
423
+ "epoch": 2.05,
424
+ "learning_rate": 7.335555555555556e-06,
425
+ "loss": 0.019,
426
  "step": 1700
427
  },
428
  {
429
+ "epoch": 2.05,
430
+ "learning_rate": 7.280000000000001e-06,
431
+ "loss": 0.0208,
432
  "step": 1725
433
  },
434
  {
435
+ "epoch": 2.06,
436
+ "learning_rate": 7.224444444444445e-06,
437
+ "loss": 0.0148,
438
  "step": 1750
439
  },
440
  {
441
+ "epoch": 2.06,
442
+ "learning_rate": 7.1688888888888895e-06,
443
+ "loss": 0.0231,
444
  "step": 1775
445
  },
446
  {
447
+ "epoch": 2.07,
448
+ "learning_rate": 7.113333333333334e-06,
449
+ "loss": 0.0121,
450
  "step": 1800
451
  },
452
  {
453
+ "epoch": 2.07,
454
+ "learning_rate": 7.057777777777778e-06,
455
+ "loss": 0.0121,
456
  "step": 1825
457
  },
458
  {
459
+ "epoch": 2.08,
460
+ "learning_rate": 7.0022222222222225e-06,
461
+ "loss": 0.0115,
462
  "step": 1850
463
  },
464
  {
465
+ "epoch": 2.08,
466
+ "learning_rate": 6.946666666666667e-06,
467
+ "loss": 0.0136,
468
  "step": 1875
469
  },
470
  {
471
+ "epoch": 2.09,
472
+ "learning_rate": 6.891111111111111e-06,
473
+ "loss": 0.0146,
474
  "step": 1900
475
  },
476
  {
477
+ "epoch": 2.09,
478
+ "learning_rate": 6.835555555555556e-06,
479
+ "loss": 0.0148,
480
  "step": 1925
481
  },
482
  {
483
+ "epoch": 2.1,
484
+ "learning_rate": 6.780000000000001e-06,
485
+ "loss": 0.0093,
486
  "step": 1950
487
  },
488
  {
489
+ "epoch": 2.1,
490
+ "learning_rate": 6.724444444444444e-06,
491
+ "loss": 0.0154,
492
  "step": 1975
493
  },
494
  {
495
+ "epoch": 2.11,
496
+ "learning_rate": 6.668888888888889e-06,
497
+ "loss": 0.0194,
498
  "step": 2000
499
  },
500
  {
501
+ "epoch": 2.11,
502
+ "eval_cer": 9.710473649099399,
503
+ "eval_loss": 1.0999209880828857,
504
+ "eval_runtime": 990.357,
505
+ "eval_samples_per_second": 1.249,
506
+ "eval_steps_per_second": 0.039,
507
+ "eval_wer": 22.614608466769027,
508
  "step": 2000
509
  },
510
  {
511
+ "epoch": 2.11,
512
+ "learning_rate": 6.613333333333334e-06,
513
+ "loss": 0.0166,
514
  "step": 2025
515
  },
516
  {
517
+ "epoch": 2.12,
518
+ "learning_rate": 6.557777777777778e-06,
519
+ "loss": 0.0068,
520
  "step": 2050
521
  },
522
  {
523
+ "epoch": 2.12,
524
+ "learning_rate": 6.502222222222223e-06,
525
+ "loss": 0.0149,
526
  "step": 2075
527
  },
528
  {
529
+ "epoch": 2.13,
530
+ "learning_rate": 6.446666666666668e-06,
531
+ "loss": 0.0184,
532
  "step": 2100
533
  },
534
  {
535
+ "epoch": 2.13,
536
+ "learning_rate": 6.391111111111111e-06,
537
+ "loss": 0.0097,
538
  "step": 2125
539
  },
540
  {
541
+ "epoch": 2.14,
542
+ "learning_rate": 6.335555555555556e-06,
543
+ "loss": 0.0089,
544
  "step": 2150
545
  },
546
  {
547
+ "epoch": 2.14,
548
+ "learning_rate": 6.280000000000001e-06,
549
+ "loss": 0.0097,
550
  "step": 2175
551
  },
552
  {
553
+ "epoch": 3.0,
554
+ "learning_rate": 6.224444444444445e-06,
555
+ "loss": 0.0176,
556
  "step": 2200
557
  },
558
  {
559
+ "epoch": 3.01,
560
+ "learning_rate": 6.16888888888889e-06,
561
+ "loss": 0.0239,
562
  "step": 2225
563
  },
564
  {
565
+ "epoch": 3.01,
566
+ "learning_rate": 6.113333333333333e-06,
567
+ "loss": 0.0129,
568
  "step": 2250
569
  },
570
  {
571
+ "epoch": 3.02,
572
+ "learning_rate": 6.057777777777778e-06,
573
+ "loss": 0.006,
574
  "step": 2275
575
  },
576
  {
577
+ "epoch": 3.02,
578
+ "learning_rate": 6.002222222222223e-06,
579
+ "loss": 0.0041,
580
  "step": 2300
581
  },
582
  {
583
+ "epoch": 3.03,
584
+ "learning_rate": 5.946666666666668e-06,
585
+ "loss": 0.0076,
586
  "step": 2325
587
  },
588
  {
589
+ "epoch": 3.03,
590
+ "learning_rate": 5.891111111111112e-06,
591
+ "loss": 0.0049,
592
  "step": 2350
593
  },
594
  {
595
+ "epoch": 3.04,
596
+ "learning_rate": 5.8355555555555565e-06,
597
+ "loss": 0.0085,
598
  "step": 2375
599
  },
600
  {
601
+ "epoch": 3.04,
602
+ "learning_rate": 5.78e-06,
603
+ "loss": 0.0082,
604
  "step": 2400
605
  },
606
  {
607
+ "epoch": 3.05,
608
+ "learning_rate": 5.724444444444445e-06,
609
+ "loss": 0.008,
610
  "step": 2425
611
  },
612
  {
613
+ "epoch": 3.05,
614
+ "learning_rate": 5.6688888888888895e-06,
615
+ "loss": 0.0086,
616
  "step": 2450
617
  },
618
  {
619
+ "epoch": 3.06,
620
+ "learning_rate": 5.613333333333334e-06,
621
+ "loss": 0.0081,
622
  "step": 2475
623
  },
624
  {
625
+ "epoch": 3.06,
626
+ "learning_rate": 5.557777777777778e-06,
627
+ "loss": 0.0089,
628
  "step": 2500
629
  },
630
  {
631
+ "epoch": 3.07,
632
+ "learning_rate": 5.5022222222222224e-06,
633
+ "loss": 0.0077,
634
  "step": 2525
635
  },
636
  {
637
+ "epoch": 3.07,
638
+ "learning_rate": 5.4466666666666665e-06,
639
+ "loss": 0.0044,
640
  "step": 2550
641
  },
642
  {
643
+ "epoch": 3.08,
644
+ "learning_rate": 5.391111111111111e-06,
645
+ "loss": 0.0056,
646
  "step": 2575
647
  },
648
  {
649
+ "epoch": 3.08,
650
+ "learning_rate": 5.335555555555556e-06,
651
+ "loss": 0.0068,
652
  "step": 2600
653
  },
654
  {
655
+ "epoch": 3.09,
656
+ "learning_rate": 5.28e-06,
657
+ "loss": 0.0058,
658
  "step": 2625
659
  },
660
  {
661
+ "epoch": 3.09,
662
+ "learning_rate": 5.224444444444445e-06,
663
+ "loss": 0.0082,
664
  "step": 2650
665
  },
666
  {
667
+ "epoch": 3.1,
668
+ "learning_rate": 5.168888888888889e-06,
669
+ "loss": 0.0038,
670
  "step": 2675
671
  },
672
  {
673
+ "epoch": 3.1,
674
+ "learning_rate": 5.113333333333333e-06,
675
+ "loss": 0.0066,
676
  "step": 2700
677
  },
678
  {
679
+ "epoch": 3.11,
680
+ "learning_rate": 5.057777777777778e-06,
681
+ "loss": 0.0093,
682
  "step": 2725
683
  },
684
  {
685
+ "epoch": 3.11,
686
+ "learning_rate": 5.002222222222223e-06,
687
+ "loss": 0.0074,
688
  "step": 2750
689
  },
690
  {
691
+ "epoch": 3.12,
692
+ "learning_rate": 4.946666666666667e-06,
693
+ "loss": 0.0033,
694
  "step": 2775
695
  },
696
  {
697
+ "epoch": 3.12,
698
+ "learning_rate": 4.891111111111111e-06,
699
+ "loss": 0.0071,
700
  "step": 2800
701
  },
702
  {
703
+ "epoch": 3.13,
704
+ "learning_rate": 4.835555555555556e-06,
705
+ "loss": 0.0076,
706
  "step": 2825
707
  },
708
  {
709
+ "epoch": 3.13,
710
+ "learning_rate": 4.78e-06,
711
+ "loss": 0.0045,
712
  "step": 2850
713
  },
714
  {
715
+ "epoch": 3.14,
716
+ "learning_rate": 4.724444444444445e-06,
717
+ "loss": 0.0058,
718
  "step": 2875
719
  },
720
  {
721
+ "epoch": 3.14,
722
+ "learning_rate": 4.66888888888889e-06,
723
+ "loss": 0.0026,
724
  "step": 2900
725
  },
726
  {
727
+ "epoch": 4.0,
728
+ "learning_rate": 4.613333333333334e-06,
729
+ "loss": 0.0065,
730
  "step": 2925
731
  },
732
  {
733
+ "epoch": 4.01,
734
+ "learning_rate": 4.557777777777778e-06,
735
+ "loss": 0.0097,
736
  "step": 2950
737
  },
738
  {
739
+ "epoch": 4.01,
740
+ "learning_rate": 4.502222222222223e-06,
741
+ "loss": 0.006,
742
  "step": 2975
743
  },
744
  {
745
+ "epoch": 4.02,
746
+ "learning_rate": 4.446666666666667e-06,
747
+ "loss": 0.002,
748
  "step": 3000
749
  },
750
  {
751
+ "epoch": 4.02,
752
+ "eval_cer": 8.837891927951969,
753
+ "eval_loss": 0.7289467453956604,
754
+ "eval_runtime": 919.6749,
755
+ "eval_samples_per_second": 1.345,
756
+ "eval_steps_per_second": 0.042,
757
+ "eval_wer": 20.201798640052644,
758
  "step": 3000
759
  },
760
  {
761
+ "epoch": 4.02,
762
+ "learning_rate": 4.391111111111112e-06,
763
+ "loss": 0.0018,
764
  "step": 3025
765
  },
766
  {
767
+ "epoch": 4.03,
768
+ "learning_rate": 4.3355555555555565e-06,
769
+ "loss": 0.0034,
770
  "step": 3050
771
  },
772
  {
773
+ "epoch": 4.03,
774
+ "learning_rate": 4.2800000000000005e-06,
775
+ "loss": 0.0022,
776
  "step": 3075
777
  },
778
  {
779
+ "epoch": 4.04,
780
+ "learning_rate": 4.2244444444444446e-06,
781
+ "loss": 0.0038,
782
  "step": 3100
783
  },
784
  {
785
+ "epoch": 4.04,
786
+ "learning_rate": 4.168888888888889e-06,
787
+ "loss": 0.0019,
788
  "step": 3125
789
  },
790
  {
791
+ "epoch": 4.05,
792
+ "learning_rate": 4.1133333333333335e-06,
793
+ "loss": 0.0043,
794
  "step": 3150
795
  },
796
  {
797
+ "epoch": 4.05,
798
+ "learning_rate": 4.057777777777778e-06,
799
+ "loss": 0.0033,
800
  "step": 3175
801
  },
802
  {
803
+ "epoch": 4.06,
804
+ "learning_rate": 4.002222222222222e-06,
805
+ "loss": 0.0024,
806
  "step": 3200
807
  },
808
  {
809
+ "epoch": 4.06,
810
+ "learning_rate": 3.946666666666667e-06,
811
+ "loss": 0.0038,
812
  "step": 3225
813
  },
814
  {
815
+ "epoch": 4.07,
816
+ "learning_rate": 3.891111111111111e-06,
817
+ "loss": 0.0026,
818
  "step": 3250
819
  },
820
  {
821
+ "epoch": 4.07,
822
+ "learning_rate": 3.835555555555555e-06,
823
+ "loss": 0.0012,
824
  "step": 3275
825
  },
826
  {
827
+ "epoch": 4.08,
828
+ "learning_rate": 3.7800000000000002e-06,
829
+ "loss": 0.0019,
830
  "step": 3300
831
  },
832
  {
833
+ "epoch": 4.08,
834
+ "learning_rate": 3.724444444444445e-06,
835
+ "loss": 0.0026,
836
  "step": 3325
837
  },
838
  {
839
+ "epoch": 4.09,
840
+ "learning_rate": 3.668888888888889e-06,
841
+ "loss": 0.0023,
842
  "step": 3350
843
  },
844
  {
845
+ "epoch": 4.09,
846
+ "learning_rate": 3.6133333333333336e-06,
847
+ "loss": 0.0031,
848
  "step": 3375
849
  },
850
  {
851
+ "epoch": 4.1,
852
+ "learning_rate": 3.5577777777777785e-06,
853
+ "loss": 0.0012,
854
  "step": 3400
855
  },
856
  {
857
+ "epoch": 4.1,
858
+ "learning_rate": 3.5022222222222225e-06,
859
+ "loss": 0.0036,
860
  "step": 3425
861
  },
862
  {
863
+ "epoch": 4.11,
864
+ "learning_rate": 3.446666666666667e-06,
865
+ "loss": 0.0038,
866
  "step": 3450
867
  },
868
  {
869
+ "epoch": 4.11,
870
+ "learning_rate": 3.391111111111111e-06,
871
+ "loss": 0.0024,
872
  "step": 3475
873
  },
874
  {
875
+ "epoch": 4.12,
876
+ "learning_rate": 3.335555555555556e-06,
877
+ "loss": 0.0013,
878
  "step": 3500
879
  },
880
  {
881
+ "epoch": 4.12,
882
+ "learning_rate": 3.2800000000000004e-06,
883
+ "loss": 0.0022,
884
  "step": 3525
885
  },
886
  {
887
+ "epoch": 4.13,
888
+ "learning_rate": 3.2244444444444444e-06,
889
+ "loss": 0.002,
890
  "step": 3550
891
  },
892
  {
893
+ "epoch": 4.13,
894
+ "learning_rate": 3.1688888888888893e-06,
895
+ "loss": 0.0029,
896
  "step": 3575
897
  },
898
  {
899
+ "epoch": 4.14,
900
+ "learning_rate": 3.1133333333333337e-06,
901
+ "loss": 0.0028,
902
  "step": 3600
903
  },
904
  {
905
+ "epoch": 4.14,
906
+ "learning_rate": 3.0577777777777778e-06,
907
+ "loss": 0.0014,
908
  "step": 3625
909
  },
910
  {
911
+ "epoch": 5.0,
912
+ "learning_rate": 3.0022222222222227e-06,
913
+ "loss": 0.0021,
914
  "step": 3650
915
  },
916
  {
917
+ "epoch": 5.01,
918
+ "learning_rate": 2.946666666666667e-06,
919
+ "loss": 0.0042,
920
  "step": 3675
921
  },
922
  {
923
+ "epoch": 5.01,
924
+ "learning_rate": 2.891111111111111e-06,
925
+ "loss": 0.0021,
926
  "step": 3700
927
  },
928
  {
929
+ "epoch": 5.02,
930
+ "learning_rate": 2.835555555555556e-06,
931
+ "loss": 0.0014,
932
  "step": 3725
933
  },
934
  {
935
+ "epoch": 5.02,
936
+ "learning_rate": 2.7800000000000005e-06,
937
+ "loss": 0.0008,
938
  "step": 3750
939
  },
940
  {
941
+ "epoch": 5.03,
942
+ "learning_rate": 2.7244444444444445e-06,
943
+ "loss": 0.0008,
944
  "step": 3775
945
  },
946
  {
947
+ "epoch": 5.03,
948
+ "learning_rate": 2.6688888888888894e-06,
949
+ "loss": 0.0006,
950
  "step": 3800
951
  },
952
  {
953
+ "epoch": 5.04,
954
+ "learning_rate": 2.6133333333333334e-06,
955
+ "loss": 0.0012,
956
  "step": 3825
957
  },
958
  {
959
+ "epoch": 5.04,
960
+ "learning_rate": 2.557777777777778e-06,
961
+ "loss": 0.0017,
962
  "step": 3850
963
  },
964
  {
965
+ "epoch": 5.05,
966
+ "learning_rate": 2.5022222222222224e-06,
967
+ "loss": 0.0019,
968
  "step": 3875
969
  },
970
  {
971
+ "epoch": 5.05,
972
+ "learning_rate": 2.446666666666667e-06,
973
+ "loss": 0.0012,
974
  "step": 3900
975
  },
976
  {
977
+ "epoch": 5.06,
978
+ "learning_rate": 2.3911111111111113e-06,
979
+ "loss": 0.0009,
980
  "step": 3925
981
  },
982
  {
983
+ "epoch": 5.06,
984
+ "learning_rate": 2.3355555555555557e-06,
985
+ "loss": 0.0011,
986
  "step": 3950
987
  },
988
  {
989
+ "epoch": 5.07,
990
+ "learning_rate": 2.28e-06,
991
+ "loss": 0.0009,
992
  "step": 3975
993
  },
994
  {
995
+ "epoch": 5.07,
996
+ "learning_rate": 2.2244444444444447e-06,
997
+ "loss": 0.0006,
998
  "step": 4000
999
  },
1000
  {
1001
+ "epoch": 5.07,
1002
+ "eval_cer": 8.437625083388927,
1003
+ "eval_loss": 0.7791085243225098,
1004
+ "eval_runtime": 930.9266,
1005
+ "eval_samples_per_second": 1.329,
1006
+ "eval_steps_per_second": 0.042,
1007
+ "eval_wer": 19.368282518096073,
1008
  "step": 4000
1009
  },
1010
  {
1011
+ "epoch": 5.08,
1012
+ "learning_rate": 2.168888888888889e-06,
1013
+ "loss": 0.0006,
1014
  "step": 4025
1015
  },
1016
  {
1017
+ "epoch": 5.08,
1018
+ "learning_rate": 2.1133333333333336e-06,
1019
+ "loss": 0.0009,
1020
  "step": 4050
1021
  },
1022
  {
1023
+ "epoch": 5.09,
1024
+ "learning_rate": 2.057777777777778e-06,
1025
+ "loss": 0.0008,
1026
  "step": 4075
1027
  },
1028
  {
1029
+ "epoch": 5.09,
1030
+ "learning_rate": 2.0022222222222225e-06,
1031
+ "loss": 0.0013,
1032
  "step": 4100
1033
  },
1034
  {
1035
+ "epoch": 5.1,
1036
+ "learning_rate": 1.9466666666666665e-06,
1037
+ "loss": 0.0004,
1038
  "step": 4125
1039
  },
1040
  {
1041
+ "epoch": 5.1,
1042
+ "learning_rate": 1.8911111111111114e-06,
1043
+ "loss": 0.0005,
1044
  "step": 4150
1045
  },
1046
  {
1047
+ "epoch": 5.11,
1048
+ "learning_rate": 1.8355555555555557e-06,
1049
+ "loss": 0.0016,
1050
  "step": 4175
1051
  },
1052
  {
1053
+ "epoch": 5.11,
1054
+ "learning_rate": 1.7800000000000001e-06,
1055
+ "loss": 0.0009,
1056
  "step": 4200
1057
  },
1058
  {
1059
+ "epoch": 5.12,
1060
+ "learning_rate": 1.7244444444444448e-06,
1061
+ "loss": 0.0004,
1062
  "step": 4225
1063
  },
1064
  {
1065
+ "epoch": 5.12,
1066
+ "learning_rate": 1.668888888888889e-06,
1067
+ "loss": 0.0006,
1068
  "step": 4250
1069
  },
1070
  {
1071
+ "epoch": 5.13,
1072
+ "learning_rate": 1.6133333333333335e-06,
1073
+ "loss": 0.0007,
1074
  "step": 4275
1075
  },
1076
  {
1077
+ "epoch": 5.13,
1078
+ "learning_rate": 1.5577777777777777e-06,
1079
+ "loss": 0.0013,
1080
  "step": 4300
1081
  },
1082
  {
1083
+ "epoch": 5.14,
1084
+ "learning_rate": 1.5022222222222224e-06,
1085
+ "loss": 0.0009,
1086
  "step": 4325
1087
  },
1088
  {
1089
+ "epoch": 5.14,
1090
+ "learning_rate": 1.4466666666666669e-06,
1091
+ "loss": 0.0005,
1092
  "step": 4350
1093
  },
1094
  {
1095
+ "epoch": 6.0,
1096
+ "learning_rate": 1.3911111111111111e-06,
1097
+ "loss": 0.0003,
1098
  "step": 4375
1099
  },
1100
  {
1101
+ "epoch": 6.01,
1102
+ "learning_rate": 1.3355555555555558e-06,
1103
+ "loss": 0.0011,
1104
  "step": 4400
1105
  },
1106
  {
1107
+ "epoch": 6.01,
1108
+ "learning_rate": 1.28e-06,
1109
+ "loss": 0.0005,
1110
  "step": 4425
1111
  },
1112
  {
1113
+ "epoch": 6.02,
1114
+ "learning_rate": 1.2244444444444445e-06,
1115
+ "loss": 0.0004,
1116
  "step": 4450
1117
  },
1118
  {
1119
+ "epoch": 6.02,
1120
+ "learning_rate": 1.168888888888889e-06,
1121
+ "loss": 0.0002,
1122
  "step": 4475
1123
  },
1124
  {
1125
+ "epoch": 6.03,
1126
+ "learning_rate": 1.1133333333333334e-06,
1127
+ "loss": 0.0003,
1128
  "step": 4500
1129
  },
1130
  {
1131
+ "epoch": 6.03,
1132
+ "learning_rate": 1.0577777777777779e-06,
1133
+ "loss": 0.0003,
1134
  "step": 4525
1135
  },
1136
  {
1137
+ "epoch": 6.04,
1138
+ "learning_rate": 1.0022222222222223e-06,
1139
+ "loss": 0.0003,
1140
  "step": 4550
1141
  },
1142
  {
1143
+ "epoch": 6.04,
1144
+ "learning_rate": 9.466666666666667e-07,
1145
+ "loss": 0.0005,
1146
  "step": 4575
1147
  },
1148
  {
1149
+ "epoch": 6.05,
1150
+ "learning_rate": 8.911111111111112e-07,
1151
+ "loss": 0.0003,
1152
  "step": 4600
1153
  },
1154
  {
1155
+ "epoch": 6.05,
1156
+ "learning_rate": 8.355555555555556e-07,
1157
+ "loss": 0.0003,
1158
  "step": 4625
1159
  },
1160
  {
1161
+ "epoch": 6.06,
1162
+ "learning_rate": 7.8e-07,
1163
+ "loss": 0.0004,
1164
  "step": 4650
1165
  },
1166
  {
1167
+ "epoch": 6.06,
1168
+ "learning_rate": 7.244444444444446e-07,
1169
+ "loss": 0.0003,
1170
  "step": 4675
1171
  },
1172
  {
1173
+ "epoch": 6.07,
1174
+ "learning_rate": 6.68888888888889e-07,
1175
+ "loss": 0.0003,
1176
  "step": 4700
1177
  },
1178
  {
1179
+ "epoch": 6.07,
1180
+ "learning_rate": 6.133333333333333e-07,
1181
+ "loss": 0.0003,
1182
  "step": 4725
1183
  },
1184
  {
1185
+ "epoch": 6.08,
1186
+ "learning_rate": 5.577777777777779e-07,
1187
+ "loss": 0.0003,
1188
  "step": 4750
1189
  },
1190
  {
1191
+ "epoch": 6.08,
1192
+ "learning_rate": 5.022222222222222e-07,
1193
+ "loss": 0.0002,
1194
  "step": 4775
1195
  },
1196
  {
1197
+ "epoch": 6.09,
1198
+ "learning_rate": 4.466666666666667e-07,
1199
+ "loss": 0.0002,
1200
  "step": 4800
1201
  },
1202
  {
1203
+ "epoch": 6.09,
1204
+ "learning_rate": 3.9111111111111115e-07,
1205
+ "loss": 0.0004,
1206
  "step": 4825
1207
  },
1208
  {
1209
+ "epoch": 6.1,
1210
+ "learning_rate": 3.3555555555555556e-07,
1211
+ "loss": 0.0003,
1212
  "step": 4850
1213
  },
1214
  {
1215
+ "epoch": 6.1,
1216
+ "learning_rate": 2.8e-07,
1217
+ "loss": 0.0002,
1218
  "step": 4875
1219
  },
1220
  {
1221
+ "epoch": 6.11,
1222
+ "learning_rate": 2.2444444444444445e-07,
1223
+ "loss": 0.0004,
1224
  "step": 4900
1225
  },
1226
  {
1227
+ "epoch": 6.11,
1228
+ "learning_rate": 1.6888888888888888e-07,
1229
+ "loss": 0.0003,
1230
  "step": 4925
1231
  },
1232
  {
1233
+ "epoch": 6.12,
1234
+ "learning_rate": 1.1333333333333336e-07,
1235
+ "loss": 0.0003,
1236
  "step": 4950
1237
  },
1238
  {
1239
+ "epoch": 6.12,
1240
+ "learning_rate": 5.777777777777778e-08,
1241
+ "loss": 0.0003,
1242
  "step": 4975
1243
  },
1244
  {
1245
+ "epoch": 6.13,
1246
+ "learning_rate": 2.2222222222222225e-09,
1247
+ "loss": 0.0003,
1248
  "step": 5000
1249
  },
1250
  {
1251
+ "epoch": 6.13,
1252
+ "eval_cer": 8.728485657104738,
1253
+ "eval_loss": 0.7427007555961609,
1254
+ "eval_runtime": 932.7395,
1255
+ "eval_samples_per_second": 1.326,
1256
+ "eval_steps_per_second": 0.042,
1257
+ "eval_wer": 19.390217152884404,
1258
  "step": 5000
1259
  },
1260
  {
1261
+ "epoch": 6.13,
1262
  "step": 5000,
1263
+ "total_flos": 3.2648946870583296e+20,
1264
+ "train_loss": 0.04111601705942303,
1265
+ "train_runtime": 74950.7687,
1266
+ "train_samples_per_second": 4.269,
1267
+ "train_steps_per_second": 0.067
1268
  }
1269
  ],
1270
  "max_steps": 5000,
1271
  "num_train_epochs": 9223372036854775807,
1272
+ "total_flos": 3.2648946870583296e+20,
1273
  "trial_name": null,
1274
  "trial_params": null
1275
  }