Jobaula commited on
Commit
2f8b2e3
1 Parent(s): 5ff915f

End of training

Browse files
README.md CHANGED
@@ -3,19 +3,20 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: openai/whisper-medium
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
  - audiofolder
9
  metrics:
10
  - wer
11
  model-index:
12
- - name: openai/whisper-medium
13
  results:
14
  - task:
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
- name: audiofolder
19
  type: audiofolder
20
  config: nan-tw
21
  split: test
@@ -29,9 +30,9 @@ model-index:
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
- # openai/whisper-medium
33
 
34
- This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the audiofolder dataset.
35
  It achieves the following results on the evaluation set:
36
  - Loss: 0.0141
37
  - Model Preparation Time: 0.0121
 
3
  license: apache-2.0
4
  base_model: openai/whisper-medium
5
  tags:
6
+ - whisper-event
7
  - generated_from_trainer
8
  datasets:
9
  - audiofolder
10
  metrics:
11
  - wer
12
  model-index:
13
+ - name: Whisper medium nan-tw common voice
14
  results:
15
  - task:
16
  name: Automatic Speech Recognition
17
  type: automatic-speech-recognition
18
  dataset:
19
+ name: audiofolder nan-tw
20
  type: audiofolder
21
  config: nan-tw
22
  split: test
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
31
  should probably proofread and complete it, then remove this comment. -->
32
 
33
+ # Whisper medium nan-tw common voice
34
 
35
+ This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the audiofolder nan-tw dataset.
36
  It achieves the following results on the evaluation set:
37
  - Loss: 0.0141
38
  - Model Preparation Time: 0.0121
all_results.json CHANGED
@@ -1,14 +1,15 @@
1
  {
2
- "epoch": 6.05,
3
- "eval_cer": 134.28571428571428,
4
- "eval_loss": 2.4461984634399414,
5
  "eval_model_preparation_time": 0.0121,
6
- "eval_runtime": 134.5644,
7
- "eval_samples_per_second": 2.229,
8
- "eval_steps_per_second": 1.115,
9
- "eval_wer": 87.5,
10
- "train_loss": 0.3284327008752851,
11
- "train_runtime": 3725.671,
12
- "train_samples_per_second": 2.684,
13
- "train_steps_per_second": 1.342
 
14
  }
 
1
  {
2
+ "epoch": 2.2776,
3
+ "eval_cer": 0.9523809523809524,
4
+ "eval_loss": 0.014132725074887276,
5
  "eval_model_preparation_time": 0.0121,
6
+ "eval_runtime": 117.6465,
7
+ "eval_samples_per_second": 2.55,
8
+ "eval_steps_per_second": 1.275,
9
+ "eval_wer": 0.9615384615384616,
10
+ "total_flos": 1.02060490752e+19,
11
+ "train_loss": 0.5477467903137208,
12
+ "train_runtime": 5409.143,
13
+ "train_samples_per_second": 1.849,
14
+ "train_steps_per_second": 0.924
15
  }
eval_results.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "eval_cer": 134.28571428571428,
3
- "eval_loss": 2.4461984634399414,
 
4
  "eval_model_preparation_time": 0.0121,
5
- "eval_runtime": 134.5644,
6
- "eval_samples_per_second": 2.229,
7
- "eval_steps_per_second": 1.115,
8
- "eval_wer": 87.5
9
  }
 
1
  {
2
+ "epoch": 2.2776,
3
+ "eval_cer": 0.9523809523809524,
4
+ "eval_loss": 0.014132725074887276,
5
  "eval_model_preparation_time": 0.0121,
6
+ "eval_runtime": 117.6465,
7
+ "eval_samples_per_second": 2.55,
8
+ "eval_steps_per_second": 1.275,
9
+ "eval_wer": 0.9615384615384616
10
  }
runs/Nov29_01-15-14_19115951741f/events.out.tfevents.1732848929.19115951741f.2594.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4f0b1c0b3df21e47fca5d383f121ca6e36b0441045ee54496e4495233100aa
3
+ size 519
train_results.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
- "epoch": 6.05,
3
- "train_loss": 0.3284327008752851,
4
- "train_runtime": 3725.671,
5
- "train_samples_per_second": 2.684,
6
- "train_steps_per_second": 1.342
 
7
  }
 
1
  {
2
+ "epoch": 2.2776,
3
+ "total_flos": 1.02060490752e+19,
4
+ "train_loss": 0.5477467903137208,
5
+ "train_runtime": 5409.143,
6
+ "train_samples_per_second": 1.849,
7
+ "train_steps_per_second": 0.924
8
  }
trainer_state.json CHANGED
@@ -1,1275 +1,1497 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.0532,
 
5
  "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
 
12
  "learning_rate": 4.2000000000000006e-07,
13
- "loss": 2.3482,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
 
18
  "learning_rate": 9.200000000000001e-07,
19
- "loss": 2.3433,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 0.01,
 
24
  "learning_rate": 1.42e-06,
25
- "loss": 1.761,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.02,
 
30
  "learning_rate": 1.9200000000000003e-06,
31
- "loss": 1.5988,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 0.03,
36
- "learning_rate": 2.4000000000000003e-06,
37
- "loss": 1.4245,
 
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.03,
42
- "learning_rate": 2.9e-06,
43
- "loss": 1.5665,
 
44
  "step": 150
45
  },
46
  {
47
- "epoch": 0.04,
48
- "learning_rate": 3.4000000000000005e-06,
49
- "loss": 1.4588,
 
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.04,
54
- "learning_rate": 3.900000000000001e-06,
55
- "loss": 1.2609,
 
56
  "step": 200
57
  },
58
  {
59
- "epoch": 0.04,
60
- "learning_rate": 4.4e-06,
61
- "loss": 1.2301,
 
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.05,
66
- "learning_rate": 4.9000000000000005e-06,
67
- "loss": 1.4295,
 
68
  "step": 250
69
  },
70
  {
71
- "epoch": 0.06,
72
- "learning_rate": 5.400000000000001e-06,
73
- "loss": 1.415,
 
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.06,
78
- "learning_rate": 5.9e-06,
79
- "loss": 1.1804,
 
80
  "step": 300
81
  },
82
  {
83
- "epoch": 0.07,
 
84
  "learning_rate": 6.4000000000000006e-06,
85
- "loss": 1.3484,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.07,
 
90
  "learning_rate": 6.9e-06,
91
- "loss": 1.3593,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 0.07,
 
96
  "learning_rate": 7.4e-06,
97
- "loss": 1.179,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.08,
 
102
  "learning_rate": 7.9e-06,
103
- "loss": 1.3824,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 0.09,
 
108
  "learning_rate": 8.400000000000001e-06,
109
- "loss": 1.1744,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.09,
 
114
  "learning_rate": 8.900000000000001e-06,
115
- "loss": 1.2079,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 0.1,
 
120
  "learning_rate": 9.4e-06,
121
- "loss": 1.2476,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 0.1,
 
126
  "learning_rate": 9.9e-06,
127
- "loss": 1.3404,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 0.1,
 
132
  "learning_rate": 9.955555555555556e-06,
133
- "loss": 1.0947,
134
  "step": 525
135
  },
136
  {
137
  "epoch": 0.11,
 
138
  "learning_rate": 9.9e-06,
139
- "loss": 1.3787,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 0.12,
 
144
  "learning_rate": 9.844444444444446e-06,
145
- "loss": 1.0917,
146
  "step": 575
147
  },
148
  {
149
  "epoch": 0.12,
 
150
  "learning_rate": 9.78888888888889e-06,
151
- "loss": 1.1884,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 0.12,
 
156
  "learning_rate": 9.733333333333334e-06,
157
- "loss": 1.2814,
158
  "step": 625
159
  },
160
  {
161
  "epoch": 0.13,
 
162
  "learning_rate": 9.677777777777778e-06,
163
- "loss": 1.1693,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 0.14,
 
168
  "learning_rate": 9.622222222222222e-06,
169
- "loss": 1.3852,
170
  "step": 675
171
  },
172
  {
173
  "epoch": 0.14,
 
174
  "learning_rate": 9.566666666666668e-06,
175
- "loss": 1.0322,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 0.14,
 
180
  "learning_rate": 9.511111111111112e-06,
181
- "loss": 1.1455,
182
  "step": 725
183
  },
184
  {
185
  "epoch": 0.15,
 
186
  "learning_rate": 9.455555555555557e-06,
187
- "loss": 0.9578,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.15,
 
192
  "learning_rate": 9.4e-06,
193
- "loss": 1.2149,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 1.0,
 
198
  "learning_rate": 9.344444444444446e-06,
199
- "loss": 0.9243,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 1.01,
 
204
  "learning_rate": 9.28888888888889e-06,
205
- "loss": 0.7373,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 1.01,
 
210
  "learning_rate": 9.233333333333334e-06,
211
- "loss": 0.7298,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 1.02,
 
216
  "learning_rate": 9.17777777777778e-06,
217
- "loss": 0.7556,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 1.02,
 
222
  "learning_rate": 9.122222222222223e-06,
223
- "loss": 0.6975,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 1.03,
 
228
  "learning_rate": 9.066666666666667e-06,
229
- "loss": 0.7299,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 1.03,
 
234
  "learning_rate": 9.011111111111111e-06,
235
- "loss": 0.6555,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 1.04,
 
240
  "learning_rate": 8.955555555555555e-06,
241
- "loss": 0.6755,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 1.04,
 
246
  "learning_rate": 8.900000000000001e-06,
247
- "loss": 0.5832,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 1.04,
252
- "eval_cer": 56.474494415937215,
253
- "eval_loss": 1.0634080171585083,
254
- "eval_runtime": 219.1746,
255
- "eval_samples_per_second": 4.499,
256
- "eval_steps_per_second": 2.249,
257
- "eval_wer": 56.30534351145038,
 
258
  "step": 1000
259
  },
260
  {
261
- "epoch": 1.05,
 
262
  "learning_rate": 8.844444444444445e-06,
263
- "loss": 0.5757,
264
  "step": 1025
265
  },
266
  {
267
- "epoch": 1.05,
 
268
  "learning_rate": 8.788888888888891e-06,
269
- "loss": 0.6065,
270
  "step": 1050
271
  },
272
  {
273
- "epoch": 1.06,
 
274
  "learning_rate": 8.733333333333333e-06,
275
- "loss": 0.5487,
276
  "step": 1075
277
  },
278
  {
279
- "epoch": 1.06,
 
280
  "learning_rate": 8.677777777777779e-06,
281
- "loss": 0.6339,
282
  "step": 1100
283
  },
284
  {
285
- "epoch": 1.07,
 
286
  "learning_rate": 8.622222222222223e-06,
287
- "loss": 0.5477,
288
  "step": 1125
289
  },
290
  {
291
- "epoch": 1.07,
 
292
  "learning_rate": 8.566666666666667e-06,
293
- "loss": 0.597,
294
  "step": 1150
295
  },
296
  {
297
- "epoch": 1.08,
 
298
  "learning_rate": 8.511111111111113e-06,
299
- "loss": 0.5394,
300
  "step": 1175
301
  },
302
  {
303
- "epoch": 1.08,
 
304
  "learning_rate": 8.455555555555555e-06,
305
- "loss": 0.52,
306
  "step": 1200
307
  },
308
  {
309
- "epoch": 1.09,
 
310
  "learning_rate": 8.400000000000001e-06,
311
- "loss": 0.6005,
312
  "step": 1225
313
  },
314
  {
315
- "epoch": 1.09,
 
316
  "learning_rate": 8.344444444444445e-06,
317
- "loss": 0.4988,
318
  "step": 1250
319
  },
320
  {
321
- "epoch": 1.1,
 
322
  "learning_rate": 8.288888888888889e-06,
323
- "loss": 0.5381,
324
  "step": 1275
325
  },
326
  {
327
- "epoch": 1.1,
 
328
  "learning_rate": 8.233333333333335e-06,
329
- "loss": 0.5005,
330
  "step": 1300
331
  },
332
  {
333
- "epoch": 1.11,
 
334
  "learning_rate": 8.177777777777779e-06,
335
- "loss": 0.3982,
336
  "step": 1325
337
  },
338
  {
339
- "epoch": 1.11,
 
340
  "learning_rate": 8.122222222222223e-06,
341
- "loss": 0.5537,
342
  "step": 1350
343
  },
344
  {
345
- "epoch": 1.12,
 
346
  "learning_rate": 8.066666666666667e-06,
347
- "loss": 0.3985,
348
  "step": 1375
349
  },
350
  {
351
- "epoch": 1.12,
 
352
  "learning_rate": 8.011111111111113e-06,
353
- "loss": 0.3653,
354
  "step": 1400
355
  },
356
  {
357
- "epoch": 1.13,
 
358
  "learning_rate": 7.955555555555557e-06,
359
- "loss": 0.4001,
360
  "step": 1425
361
  },
362
  {
363
- "epoch": 1.13,
364
- "learning_rate": 7.9e-06,
365
- "loss": 0.4278,
 
366
  "step": 1450
367
  },
368
  {
369
- "epoch": 1.14,
370
- "learning_rate": 7.844444444444446e-06,
371
- "loss": 0.3982,
 
372
  "step": 1475
373
  },
374
  {
375
- "epoch": 1.14,
376
- "learning_rate": 7.788888888888889e-06,
377
- "loss": 0.3967,
 
378
  "step": 1500
379
  },
380
  {
381
- "epoch": 1.15,
382
- "learning_rate": 7.733333333333334e-06,
383
- "loss": 0.4366,
 
384
  "step": 1525
385
  },
386
  {
387
- "epoch": 1.15,
388
- "learning_rate": 7.677777777777778e-06,
389
- "loss": 0.4665,
 
390
  "step": 1550
391
  },
392
  {
393
- "epoch": 1.16,
394
- "learning_rate": 7.622222222222223e-06,
395
- "loss": 0.4292,
 
396
  "step": 1575
397
  },
398
  {
399
- "epoch": 2.0,
400
- "learning_rate": 7.566666666666667e-06,
401
- "loss": 0.2518,
 
402
  "step": 1600
403
  },
404
  {
405
- "epoch": 2.01,
406
- "learning_rate": 7.511111111111111e-06,
407
- "loss": 0.1433,
 
408
  "step": 1625
409
  },
410
  {
411
- "epoch": 2.01,
412
- "learning_rate": 7.455555555555556e-06,
413
- "loss": 0.1932,
 
414
  "step": 1650
415
  },
416
  {
417
- "epoch": 2.02,
418
- "learning_rate": 7.4e-06,
419
- "loss": 0.1875,
 
420
  "step": 1675
421
  },
422
  {
423
- "epoch": 2.02,
424
- "learning_rate": 7.344444444444445e-06,
425
- "loss": 0.2236,
 
426
  "step": 1700
427
  },
428
  {
429
- "epoch": 2.03,
430
- "learning_rate": 7.28888888888889e-06,
431
- "loss": 0.2445,
 
432
  "step": 1725
433
  },
434
  {
435
- "epoch": 2.03,
436
- "learning_rate": 7.233333333333334e-06,
437
- "loss": 0.1751,
 
438
  "step": 1750
439
  },
440
  {
441
- "epoch": 2.04,
442
- "learning_rate": 7.177777777777778e-06,
443
- "loss": 0.1976,
 
444
  "step": 1775
445
  },
446
  {
447
- "epoch": 2.04,
448
- "learning_rate": 7.122222222222222e-06,
449
- "loss": 0.1307,
 
450
  "step": 1800
451
  },
452
  {
453
- "epoch": 2.05,
454
- "learning_rate": 7.066666666666667e-06,
455
- "loss": 0.1849,
 
456
  "step": 1825
457
  },
458
  {
459
- "epoch": 2.05,
460
- "learning_rate": 7.011111111111112e-06,
461
- "loss": 0.1233,
 
462
  "step": 1850
463
  },
464
  {
465
- "epoch": 2.06,
466
- "learning_rate": 6.955555555555557e-06,
467
- "loss": 0.136,
 
468
  "step": 1875
469
  },
470
  {
471
- "epoch": 2.06,
472
- "learning_rate": 6.9e-06,
473
- "loss": 0.1014,
 
474
  "step": 1900
475
  },
476
  {
477
- "epoch": 2.07,
478
- "learning_rate": 6.844444444444445e-06,
479
- "loss": 0.1366,
 
480
  "step": 1925
481
  },
482
  {
483
- "epoch": 2.07,
484
- "learning_rate": 6.788888888888889e-06,
485
- "loss": 0.2011,
 
486
  "step": 1950
487
  },
488
  {
489
- "epoch": 2.08,
490
- "learning_rate": 6.733333333333334e-06,
491
- "loss": 0.0962,
 
492
  "step": 1975
493
  },
494
  {
495
- "epoch": 2.08,
496
- "learning_rate": 6.677777777777779e-06,
497
- "loss": 0.1467,
 
498
  "step": 2000
499
  },
500
  {
501
- "epoch": 2.08,
502
- "eval_cer": 51.01116812556595,
503
- "eval_loss": 1.040711760520935,
504
- "eval_runtime": 219.0004,
505
- "eval_samples_per_second": 4.502,
506
- "eval_steps_per_second": 2.251,
507
- "eval_wer": 50.961832061068705,
 
508
  "step": 2000
509
  },
510
  {
511
- "epoch": 2.09,
512
- "learning_rate": 6.6222222222222236e-06,
513
- "loss": 0.1376,
 
514
  "step": 2025
515
  },
516
  {
517
- "epoch": 2.09,
518
- "learning_rate": 6.566666666666667e-06,
519
- "loss": 0.1322,
 
520
  "step": 2050
521
  },
522
  {
523
- "epoch": 2.1,
524
- "learning_rate": 6.511111111111112e-06,
525
- "loss": 0.0889,
 
526
  "step": 2075
527
  },
528
  {
529
- "epoch": 2.1,
530
- "learning_rate": 6.455555555555556e-06,
531
- "loss": 0.0725,
 
532
  "step": 2100
533
  },
534
  {
535
- "epoch": 2.11,
536
- "learning_rate": 6.4000000000000006e-06,
537
- "loss": 0.084,
 
538
  "step": 2125
539
  },
540
  {
541
- "epoch": 2.11,
542
- "learning_rate": 6.3444444444444454e-06,
543
- "loss": 0.078,
 
544
  "step": 2150
545
  },
546
  {
547
- "epoch": 2.12,
548
- "learning_rate": 6.28888888888889e-06,
549
- "loss": 0.0666,
 
550
  "step": 2175
551
  },
552
  {
553
- "epoch": 2.12,
554
- "learning_rate": 6.2333333333333335e-06,
555
- "loss": 0.1335,
 
556
  "step": 2200
557
  },
558
  {
559
- "epoch": 2.13,
560
- "learning_rate": 6.177777777777778e-06,
561
- "loss": 0.0919,
 
562
  "step": 2225
563
  },
564
  {
565
- "epoch": 2.13,
566
- "learning_rate": 6.1222222222222224e-06,
567
- "loss": 0.0966,
 
568
  "step": 2250
569
  },
570
  {
571
- "epoch": 2.14,
572
- "learning_rate": 6.066666666666667e-06,
573
- "loss": 0.0947,
 
574
  "step": 2275
575
  },
576
  {
577
- "epoch": 2.14,
578
- "learning_rate": 6.011111111111112e-06,
579
- "loss": 0.0731,
 
580
  "step": 2300
581
  },
582
  {
583
- "epoch": 2.15,
584
- "learning_rate": 5.955555555555555e-06,
585
- "loss": 0.1585,
 
586
  "step": 2325
587
  },
588
  {
589
- "epoch": 2.15,
590
- "learning_rate": 5.9e-06,
591
- "loss": 0.1475,
 
592
  "step": 2350
593
  },
594
  {
595
- "epoch": 3.0,
596
- "learning_rate": 5.844444444444445e-06,
597
- "loss": 0.0635,
 
598
  "step": 2375
599
  },
600
  {
601
- "epoch": 3.01,
602
- "learning_rate": 5.788888888888889e-06,
603
- "loss": 0.0489,
 
604
  "step": 2400
605
  },
606
  {
607
- "epoch": 3.01,
608
- "learning_rate": 5.733333333333334e-06,
609
- "loss": 0.0325,
 
610
  "step": 2425
611
  },
612
  {
613
- "epoch": 3.02,
614
- "learning_rate": 5.677777777777779e-06,
615
- "loss": 0.0661,
 
616
  "step": 2450
617
  },
618
  {
619
- "epoch": 3.02,
620
- "learning_rate": 5.622222222222222e-06,
621
- "loss": 0.0433,
 
622
  "step": 2475
623
  },
624
  {
625
- "epoch": 3.03,
626
- "learning_rate": 5.566666666666667e-06,
627
- "loss": 0.0215,
 
628
  "step": 2500
629
  },
630
  {
631
- "epoch": 3.03,
632
- "learning_rate": 5.511111111111112e-06,
633
- "loss": 0.0477,
 
634
  "step": 2525
635
  },
636
  {
637
- "epoch": 3.04,
638
- "learning_rate": 5.455555555555556e-06,
639
- "loss": 0.0315,
 
640
  "step": 2550
641
  },
642
  {
643
- "epoch": 3.04,
644
- "learning_rate": 5.400000000000001e-06,
645
- "loss": 0.0798,
 
646
  "step": 2575
647
  },
648
  {
649
- "epoch": 3.05,
650
- "learning_rate": 5.344444444444446e-06,
651
- "loss": 0.0298,
 
652
  "step": 2600
653
  },
654
  {
655
- "epoch": 3.05,
656
- "learning_rate": 5.288888888888889e-06,
657
- "loss": 0.033,
 
658
  "step": 2625
659
  },
660
  {
661
- "epoch": 3.06,
662
- "learning_rate": 5.233333333333334e-06,
663
- "loss": 0.0151,
 
664
  "step": 2650
665
  },
666
  {
667
- "epoch": 3.06,
668
- "learning_rate": 5.177777777777779e-06,
669
- "loss": 0.0465,
 
670
  "step": 2675
671
  },
672
  {
673
- "epoch": 3.07,
674
- "learning_rate": 5.122222222222223e-06,
675
- "loss": 0.0138,
 
676
  "step": 2700
677
  },
678
  {
679
- "epoch": 3.07,
680
- "learning_rate": 5.0666666666666676e-06,
681
- "loss": 0.0293,
 
682
  "step": 2725
683
  },
684
  {
685
- "epoch": 3.08,
686
- "learning_rate": 5.011111111111111e-06,
687
- "loss": 0.0296,
 
688
  "step": 2750
689
  },
690
  {
691
- "epoch": 3.08,
692
- "learning_rate": 4.9555555555555565e-06,
693
- "loss": 0.0336,
 
694
  "step": 2775
695
  },
696
  {
697
- "epoch": 3.09,
698
- "learning_rate": 4.9000000000000005e-06,
699
- "loss": 0.0377,
 
700
  "step": 2800
701
  },
702
  {
703
- "epoch": 3.09,
704
- "learning_rate": 4.8444444444444446e-06,
705
- "loss": 0.0345,
 
706
  "step": 2825
707
  },
708
  {
709
- "epoch": 3.1,
710
- "learning_rate": 4.7888888888888894e-06,
711
- "loss": 0.0129,
 
712
  "step": 2850
713
  },
714
  {
715
- "epoch": 3.1,
716
- "learning_rate": 4.7333333333333335e-06,
717
- "loss": 0.0141,
 
718
  "step": 2875
719
  },
720
  {
721
- "epoch": 3.11,
722
- "learning_rate": 4.677777777777778e-06,
723
- "loss": 0.0165,
 
724
  "step": 2900
725
  },
726
  {
727
- "epoch": 3.11,
728
- "learning_rate": 4.622222222222222e-06,
729
- "loss": 0.0318,
 
730
  "step": 2925
731
  },
732
  {
733
- "epoch": 3.12,
734
- "learning_rate": 4.566666666666667e-06,
735
- "loss": 0.0161,
 
736
  "step": 2950
737
  },
738
  {
739
- "epoch": 3.12,
740
- "learning_rate": 4.511111111111111e-06,
741
- "loss": 0.028,
 
742
  "step": 2975
743
  },
744
  {
745
- "epoch": 3.13,
746
- "learning_rate": 4.455555555555555e-06,
747
- "loss": 0.016,
 
748
  "step": 3000
749
  },
750
  {
751
- "epoch": 3.13,
752
- "eval_cer": 46.5137337760338,
753
- "eval_loss": 1.0225664377212524,
754
- "eval_runtime": 217.5747,
755
- "eval_samples_per_second": 4.532,
756
- "eval_steps_per_second": 2.266,
757
- "eval_wer": 46.44274809160305,
 
758
  "step": 3000
759
  },
760
  {
761
- "epoch": 3.13,
762
- "learning_rate": 4.4e-06,
763
- "loss": 0.0509,
 
764
  "step": 3025
765
  },
766
  {
767
- "epoch": 3.14,
768
- "learning_rate": 4.344444444444445e-06,
769
- "loss": 0.0254,
 
770
  "step": 3050
771
  },
772
  {
773
- "epoch": 3.14,
774
- "learning_rate": 4.288888888888889e-06,
775
- "loss": 0.0075,
 
776
  "step": 3075
777
  },
778
  {
779
- "epoch": 3.15,
780
- "learning_rate": 4.233333333333334e-06,
781
- "loss": 0.0303,
 
782
  "step": 3100
783
  },
784
  {
785
- "epoch": 3.15,
786
- "learning_rate": 4.177777777777778e-06,
787
- "loss": 0.0127,
 
788
  "step": 3125
789
  },
790
  {
791
- "epoch": 3.16,
792
- "learning_rate": 4.122222222222222e-06,
793
- "loss": 0.0184,
 
794
  "step": 3150
795
  },
796
  {
797
- "epoch": 4.0,
798
- "learning_rate": 4.066666666666667e-06,
799
- "loss": 0.0311,
 
800
  "step": 3175
801
  },
802
  {
803
- "epoch": 4.01,
804
- "learning_rate": 4.011111111111111e-06,
805
- "loss": 0.0012,
 
806
  "step": 3200
807
  },
808
  {
809
- "epoch": 4.01,
810
- "learning_rate": 3.955555555555556e-06,
811
- "loss": 0.0106,
 
812
  "step": 3225
813
  },
814
  {
815
- "epoch": 4.02,
816
- "learning_rate": 3.900000000000001e-06,
817
- "loss": 0.0085,
 
818
  "step": 3250
819
  },
820
  {
821
- "epoch": 4.02,
822
- "learning_rate": 3.844444444444445e-06,
823
- "loss": 0.0016,
 
824
  "step": 3275
825
  },
826
  {
827
- "epoch": 4.03,
828
- "learning_rate": 3.7888888888888893e-06,
829
- "loss": 0.0083,
 
830
  "step": 3300
831
  },
832
  {
833
- "epoch": 4.03,
834
- "learning_rate": 3.7333333333333337e-06,
835
- "loss": 0.0099,
 
836
  "step": 3325
837
  },
838
  {
839
- "epoch": 4.04,
840
- "learning_rate": 3.6777777777777778e-06,
841
- "loss": 0.0017,
 
842
  "step": 3350
843
  },
844
  {
845
- "epoch": 4.04,
846
- "learning_rate": 3.6222222222222226e-06,
847
- "loss": 0.0097,
 
848
  "step": 3375
849
  },
850
  {
851
- "epoch": 4.05,
852
- "learning_rate": 3.566666666666667e-06,
853
- "loss": 0.0083,
 
854
  "step": 3400
855
  },
856
  {
857
- "epoch": 4.05,
858
- "learning_rate": 3.511111111111111e-06,
859
- "loss": 0.0182,
 
860
  "step": 3425
861
  },
862
  {
863
- "epoch": 4.06,
864
- "learning_rate": 3.455555555555556e-06,
865
- "loss": 0.0041,
 
866
  "step": 3450
867
  },
868
  {
869
- "epoch": 4.06,
870
- "learning_rate": 3.4000000000000005e-06,
871
- "loss": 0.0321,
 
872
  "step": 3475
873
  },
874
  {
875
- "epoch": 4.07,
876
- "learning_rate": 3.3444444444444445e-06,
877
- "loss": 0.0009,
 
878
  "step": 3500
879
  },
880
  {
881
- "epoch": 4.07,
882
- "learning_rate": 3.2888888888888894e-06,
883
- "loss": 0.0008,
 
884
  "step": 3525
885
  },
886
  {
887
- "epoch": 4.08,
888
- "learning_rate": 3.2333333333333334e-06,
889
- "loss": 0.0016,
 
890
  "step": 3550
891
  },
892
  {
893
- "epoch": 4.08,
894
- "learning_rate": 3.177777777777778e-06,
895
- "loss": 0.0368,
 
896
  "step": 3575
897
  },
898
  {
899
- "epoch": 4.09,
900
- "learning_rate": 3.1222222222222228e-06,
901
- "loss": 0.006,
 
902
  "step": 3600
903
  },
904
  {
905
- "epoch": 4.09,
906
- "learning_rate": 3.066666666666667e-06,
907
- "loss": 0.0023,
 
908
  "step": 3625
909
  },
910
  {
911
- "epoch": 4.1,
912
- "learning_rate": 3.0111111111111113e-06,
913
- "loss": 0.001,
 
914
  "step": 3650
915
  },
916
  {
917
- "epoch": 4.1,
918
- "learning_rate": 2.955555555555556e-06,
919
- "loss": 0.0012,
 
920
  "step": 3675
921
  },
922
  {
923
- "epoch": 4.11,
924
- "learning_rate": 2.9e-06,
925
- "loss": 0.0009,
 
926
  "step": 3700
927
  },
928
  {
929
- "epoch": 4.11,
930
- "learning_rate": 2.8444444444444446e-06,
931
- "loss": 0.0009,
 
932
  "step": 3725
933
  },
934
  {
935
- "epoch": 4.12,
936
- "learning_rate": 2.788888888888889e-06,
937
- "loss": 0.0005,
 
938
  "step": 3750
939
  },
940
  {
941
- "epoch": 4.12,
942
- "learning_rate": 2.7333333333333336e-06,
943
- "loss": 0.0095,
 
944
  "step": 3775
945
  },
946
  {
947
- "epoch": 4.13,
948
- "learning_rate": 2.677777777777778e-06,
949
- "loss": 0.0072,
 
950
  "step": 3800
951
  },
952
  {
953
- "epoch": 4.13,
954
- "learning_rate": 2.6222222222222225e-06,
955
- "loss": 0.0203,
 
956
  "step": 3825
957
  },
958
  {
959
- "epoch": 4.14,
960
- "learning_rate": 2.566666666666667e-06,
961
- "loss": 0.001,
 
962
  "step": 3850
963
  },
964
  {
965
- "epoch": 4.14,
966
- "learning_rate": 2.5111111111111114e-06,
967
- "loss": 0.0004,
 
968
  "step": 3875
969
  },
970
  {
971
- "epoch": 4.15,
972
- "learning_rate": 2.455555555555556e-06,
973
- "loss": 0.0224,
 
974
  "step": 3900
975
  },
976
  {
977
- "epoch": 4.15,
978
- "learning_rate": 2.4000000000000003e-06,
979
- "loss": 0.0012,
 
980
  "step": 3925
981
  },
982
  {
983
- "epoch": 5.0,
984
- "learning_rate": 2.3444444444444448e-06,
985
- "loss": 0.0002,
 
986
  "step": 3950
987
  },
988
  {
989
- "epoch": 5.01,
990
- "learning_rate": 2.2888888888888892e-06,
991
- "loss": 0.0003,
 
992
  "step": 3975
993
  },
994
  {
995
- "epoch": 5.01,
996
- "learning_rate": 2.2333333333333333e-06,
997
- "loss": 0.0001,
 
998
  "step": 4000
999
  },
1000
  {
1001
- "epoch": 5.01,
1002
- "eval_cer": 45.60821008149713,
1003
- "eval_loss": 0.9974298477172852,
1004
- "eval_runtime": 214.4422,
1005
- "eval_samples_per_second": 4.598,
1006
- "eval_steps_per_second": 2.299,
1007
- "eval_wer": 45.465648854961835,
 
1008
  "step": 4000
1009
  },
1010
  {
1011
- "epoch": 5.02,
1012
- "learning_rate": 2.1777777777777777e-06,
1013
- "loss": 0.0002,
 
1014
  "step": 4025
1015
  },
1016
  {
1017
- "epoch": 5.02,
1018
- "learning_rate": 2.1222222222222226e-06,
1019
- "loss": 0.0003,
 
1020
  "step": 4050
1021
  },
1022
  {
1023
- "epoch": 5.03,
1024
- "learning_rate": 2.0666666666666666e-06,
1025
- "loss": 0.0011,
 
1026
  "step": 4075
1027
  },
1028
  {
1029
- "epoch": 5.03,
1030
- "learning_rate": 2.011111111111111e-06,
1031
- "loss": 0.0002,
 
1032
  "step": 4100
1033
  },
1034
  {
1035
- "epoch": 5.04,
1036
- "learning_rate": 1.955555555555556e-06,
1037
- "loss": 0.0005,
 
1038
  "step": 4125
1039
  },
1040
  {
1041
- "epoch": 5.04,
1042
- "learning_rate": 1.9000000000000002e-06,
1043
- "loss": 0.0003,
 
1044
  "step": 4150
1045
  },
1046
  {
1047
- "epoch": 5.05,
1048
- "learning_rate": 1.8444444444444445e-06,
1049
- "loss": 0.0001,
 
1050
  "step": 4175
1051
  },
1052
  {
1053
- "epoch": 5.05,
1054
- "learning_rate": 1.788888888888889e-06,
1055
- "loss": 0.0002,
 
1056
  "step": 4200
1057
  },
1058
  {
1059
- "epoch": 5.06,
1060
- "learning_rate": 1.7333333333333336e-06,
1061
- "loss": 0.0018,
 
1062
  "step": 4225
1063
  },
1064
  {
1065
- "epoch": 5.06,
1066
- "learning_rate": 1.6777777777777779e-06,
1067
- "loss": 0.0002,
 
1068
  "step": 4250
1069
  },
1070
  {
1071
- "epoch": 5.07,
1072
- "learning_rate": 1.6222222222222223e-06,
1073
- "loss": 0.0001,
 
1074
  "step": 4275
1075
  },
1076
  {
1077
- "epoch": 5.07,
1078
- "learning_rate": 1.566666666666667e-06,
1079
- "loss": 0.0037,
 
1080
  "step": 4300
1081
  },
1082
  {
1083
- "epoch": 5.08,
1084
- "learning_rate": 1.5111111111111112e-06,
1085
- "loss": 0.0002,
 
1086
  "step": 4325
1087
  },
1088
  {
1089
- "epoch": 5.08,
1090
- "learning_rate": 1.4555555555555557e-06,
1091
- "loss": 0.0001,
 
1092
  "step": 4350
1093
  },
1094
  {
1095
- "epoch": 5.09,
1096
- "learning_rate": 1.4000000000000001e-06,
1097
- "loss": 0.0001,
 
1098
  "step": 4375
1099
  },
1100
  {
1101
- "epoch": 5.09,
1102
- "learning_rate": 1.3444444444444446e-06,
1103
- "loss": 0.0004,
 
1104
  "step": 4400
1105
  },
1106
  {
1107
- "epoch": 5.1,
1108
- "learning_rate": 1.288888888888889e-06,
1109
- "loss": 0.0001,
 
1110
  "step": 4425
1111
  },
1112
  {
1113
- "epoch": 5.1,
1114
- "learning_rate": 1.2333333333333335e-06,
1115
- "loss": 0.0001,
 
1116
  "step": 4450
1117
  },
1118
  {
1119
- "epoch": 5.11,
1120
- "learning_rate": 1.1777777777777778e-06,
1121
- "loss": 0.0001,
 
1122
  "step": 4475
1123
  },
1124
  {
1125
- "epoch": 5.11,
1126
- "learning_rate": 1.1222222222222222e-06,
1127
- "loss": 0.0005,
 
1128
  "step": 4500
1129
  },
1130
  {
1131
- "epoch": 5.12,
1132
- "learning_rate": 1.066666666666667e-06,
1133
- "loss": 0.0014,
 
1134
  "step": 4525
1135
  },
1136
  {
1137
- "epoch": 5.12,
1138
- "learning_rate": 1.0111111111111111e-06,
1139
- "loss": 0.0001,
 
1140
  "step": 4550
1141
  },
1142
  {
1143
- "epoch": 5.13,
1144
- "learning_rate": 9.555555555555556e-07,
1145
- "loss": 0.0022,
 
1146
  "step": 4575
1147
  },
1148
  {
1149
- "epoch": 5.13,
1150
- "learning_rate": 9.000000000000001e-07,
1151
- "loss": 0.0001,
 
1152
  "step": 4600
1153
  },
1154
  {
1155
- "epoch": 5.14,
1156
- "learning_rate": 8.444444444444445e-07,
1157
- "loss": 0.0003,
 
1158
  "step": 4625
1159
  },
1160
  {
1161
- "epoch": 5.14,
1162
- "learning_rate": 7.888888888888889e-07,
1163
- "loss": 0.0001,
 
1164
  "step": 4650
1165
  },
1166
  {
1167
- "epoch": 5.15,
1168
- "learning_rate": 7.333333333333334e-07,
1169
- "loss": 0.0003,
 
1170
  "step": 4675
1171
  },
1172
  {
1173
- "epoch": 5.15,
1174
- "learning_rate": 6.777777777777779e-07,
1175
- "loss": 0.0001,
 
1176
  "step": 4700
1177
  },
1178
  {
1179
- "epoch": 5.16,
1180
- "learning_rate": 6.222222222222223e-07,
1181
- "loss": 0.0001,
 
1182
  "step": 4725
1183
  },
1184
  {
1185
- "epoch": 6.0,
1186
- "learning_rate": 5.666666666666667e-07,
1187
- "loss": 0.0001,
 
1188
  "step": 4750
1189
  },
1190
  {
1191
- "epoch": 6.01,
1192
- "learning_rate": 5.111111111111112e-07,
1193
- "loss": 0.0001,
 
1194
  "step": 4775
1195
  },
1196
  {
1197
- "epoch": 6.01,
1198
- "learning_rate": 4.5555555555555563e-07,
1199
- "loss": 0.0001,
 
1200
  "step": 4800
1201
  },
1202
  {
1203
- "epoch": 6.02,
1204
- "learning_rate": 4.0000000000000003e-07,
1205
- "loss": 0.0001,
 
1206
  "step": 4825
1207
  },
1208
  {
1209
- "epoch": 6.02,
1210
- "learning_rate": 3.444444444444445e-07,
1211
- "loss": 0.0001,
 
1212
  "step": 4850
1213
  },
1214
  {
1215
- "epoch": 6.03,
1216
- "learning_rate": 2.888888888888889e-07,
1217
- "loss": 0.0001,
 
1218
  "step": 4875
1219
  },
1220
  {
1221
- "epoch": 6.03,
1222
- "learning_rate": 2.3333333333333336e-07,
1223
- "loss": 0.0001,
 
1224
  "step": 4900
1225
  },
1226
  {
1227
- "epoch": 6.04,
1228
- "learning_rate": 1.777777777777778e-07,
1229
- "loss": 0.0001,
 
1230
  "step": 4925
1231
  },
1232
  {
1233
- "epoch": 6.04,
1234
- "learning_rate": 1.2222222222222225e-07,
1235
- "loss": 0.0001,
 
1236
  "step": 4950
1237
  },
1238
  {
1239
- "epoch": 6.05,
1240
- "learning_rate": 6.666666666666668e-08,
1241
- "loss": 0.0001,
 
1242
  "step": 4975
1243
  },
1244
  {
1245
- "epoch": 6.05,
1246
- "learning_rate": 1.1111111111111112e-08,
1247
- "loss": 0.0001,
 
1248
  "step": 5000
1249
  },
1250
  {
1251
- "epoch": 6.05,
1252
- "eval_cer": 45.36673709628735,
1253
- "eval_loss": 0.994395911693573,
1254
- "eval_runtime": 216.4096,
1255
- "eval_samples_per_second": 4.556,
1256
- "eval_steps_per_second": 2.278,
1257
- "eval_wer": 45.2824427480916,
 
1258
  "step": 5000
1259
  },
1260
  {
1261
- "epoch": 6.05,
1262
  "step": 5000,
1263
- "total_flos": 1.019992544575488e+19,
1264
- "train_loss": 0.3284327008752851,
1265
- "train_runtime": 3725.671,
1266
- "train_samples_per_second": 2.684,
1267
- "train_steps_per_second": 1.342
1268
  }
1269
  ],
 
1270
  "max_steps": 5000,
 
1271
  "num_train_epochs": 9223372036854775807,
1272
- "total_flos": 1.019992544575488e+19,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1273
  "trial_name": null,
1274
  "trial_params": null
1275
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.2776,
5
+ "eval_steps": 1000,
6
  "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.005,
13
+ "grad_norm": 88.54288482666016,
14
  "learning_rate": 4.2000000000000006e-07,
15
+ "loss": 2.5968,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.01,
20
+ "grad_norm": 71.25262451171875,
21
  "learning_rate": 9.200000000000001e-07,
22
+ "loss": 2.3534,
23
  "step": 50
24
  },
25
  {
26
+ "epoch": 0.015,
27
+ "grad_norm": 80.52667999267578,
28
  "learning_rate": 1.42e-06,
29
+ "loss": 1.9642,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 0.02,
34
+ "grad_norm": 57.064151763916016,
35
  "learning_rate": 1.9200000000000003e-06,
36
+ "loss": 1.6321,
37
  "step": 100
38
  },
39
  {
40
+ "epoch": 0.025,
41
+ "grad_norm": 50.983245849609375,
42
+ "learning_rate": 2.42e-06,
43
+ "loss": 1.5842,
44
  "step": 125
45
  },
46
  {
47
  "epoch": 0.03,
48
+ "grad_norm": 52.22635269165039,
49
+ "learning_rate": 2.92e-06,
50
+ "loss": 1.4106,
51
  "step": 150
52
  },
53
  {
54
+ "epoch": 0.035,
55
+ "grad_norm": 53.9218864440918,
56
+ "learning_rate": 3.4200000000000007e-06,
57
+ "loss": 1.417,
58
  "step": 175
59
  },
60
  {
61
  "epoch": 0.04,
62
+ "grad_norm": 48.82598876953125,
63
+ "learning_rate": 3.920000000000001e-06,
64
+ "loss": 1.4496,
65
  "step": 200
66
  },
67
  {
68
+ "epoch": 0.045,
69
+ "grad_norm": 63.08296585083008,
70
+ "learning_rate": 4.42e-06,
71
+ "loss": 1.3191,
72
  "step": 225
73
  },
74
  {
75
  "epoch": 0.05,
76
+ "grad_norm": 41.98011016845703,
77
+ "learning_rate": 4.92e-06,
78
+ "loss": 1.0756,
79
  "step": 250
80
  },
81
  {
82
+ "epoch": 0.055,
83
+ "grad_norm": 53.777217864990234,
84
+ "learning_rate": 5.420000000000001e-06,
85
+ "loss": 1.2538,
86
  "step": 275
87
  },
88
  {
89
  "epoch": 0.06,
90
+ "grad_norm": 57.49655532836914,
91
+ "learning_rate": 5.92e-06,
92
+ "loss": 1.2225,
93
  "step": 300
94
  },
95
  {
96
+ "epoch": 0.065,
97
+ "grad_norm": 52.660003662109375,
98
  "learning_rate": 6.4000000000000006e-06,
99
+ "loss": 1.2031,
100
  "step": 325
101
  },
102
  {
103
  "epoch": 0.07,
104
+ "grad_norm": 45.03782272338867,
105
  "learning_rate": 6.9e-06,
106
+ "loss": 1.2953,
107
  "step": 350
108
  },
109
  {
110
+ "epoch": 0.075,
111
+ "grad_norm": 49.226261138916016,
112
  "learning_rate": 7.4e-06,
113
+ "loss": 1.0642,
114
  "step": 375
115
  },
116
  {
117
  "epoch": 0.08,
118
+ "grad_norm": 33.70058822631836,
119
  "learning_rate": 7.9e-06,
120
+ "loss": 1.0962,
121
  "step": 400
122
  },
123
  {
124
+ "epoch": 0.085,
125
+ "grad_norm": 47.71772766113281,
126
  "learning_rate": 8.400000000000001e-06,
127
+ "loss": 1.3042,
128
  "step": 425
129
  },
130
  {
131
  "epoch": 0.09,
132
+ "grad_norm": 55.22367477416992,
133
  "learning_rate": 8.900000000000001e-06,
134
+ "loss": 1.3794,
135
  "step": 450
136
  },
137
  {
138
+ "epoch": 0.095,
139
+ "grad_norm": 62.34690475463867,
140
  "learning_rate": 9.4e-06,
141
+ "loss": 1.1516,
142
  "step": 475
143
  },
144
  {
145
  "epoch": 0.1,
146
+ "grad_norm": 84.5876693725586,
147
  "learning_rate": 9.9e-06,
148
+ "loss": 1.2527,
149
  "step": 500
150
  },
151
  {
152
+ "epoch": 0.105,
153
+ "grad_norm": 53.2620735168457,
154
  "learning_rate": 9.955555555555556e-06,
155
+ "loss": 1.1349,
156
  "step": 525
157
  },
158
  {
159
  "epoch": 0.11,
160
+ "grad_norm": 54.91654586791992,
161
  "learning_rate": 9.9e-06,
162
+ "loss": 1.2107,
163
  "step": 550
164
  },
165
  {
166
+ "epoch": 0.115,
167
+ "grad_norm": 43.851173400878906,
168
  "learning_rate": 9.844444444444446e-06,
169
+ "loss": 1.4065,
170
  "step": 575
171
  },
172
  {
173
  "epoch": 0.12,
174
+ "grad_norm": 48.8751335144043,
175
  "learning_rate": 9.78888888888889e-06,
176
+ "loss": 1.2255,
177
  "step": 600
178
  },
179
  {
180
+ "epoch": 0.125,
181
+ "grad_norm": 58.15187454223633,
182
  "learning_rate": 9.733333333333334e-06,
183
+ "loss": 1.1948,
184
  "step": 625
185
  },
186
  {
187
  "epoch": 0.13,
188
+ "grad_norm": 53.069602966308594,
189
  "learning_rate": 9.677777777777778e-06,
190
+ "loss": 1.2878,
191
  "step": 650
192
  },
193
  {
194
+ "epoch": 0.135,
195
+ "grad_norm": 45.524715423583984,
196
  "learning_rate": 9.622222222222222e-06,
197
+ "loss": 1.1325,
198
  "step": 675
199
  },
200
  {
201
  "epoch": 0.14,
202
+ "grad_norm": 26.86398696899414,
203
  "learning_rate": 9.566666666666668e-06,
204
+ "loss": 1.0762,
205
  "step": 700
206
  },
207
  {
208
+ "epoch": 0.145,
209
+ "grad_norm": 48.63008499145508,
210
  "learning_rate": 9.511111111111112e-06,
211
+ "loss": 1.279,
212
  "step": 725
213
  },
214
  {
215
  "epoch": 0.15,
216
+ "grad_norm": 50.58299255371094,
217
  "learning_rate": 9.455555555555557e-06,
218
+ "loss": 1.0627,
219
  "step": 750
220
  },
221
  {
222
+ "epoch": 0.155,
223
+ "grad_norm": 32.71992492675781,
224
  "learning_rate": 9.4e-06,
225
+ "loss": 1.0438,
226
  "step": 775
227
  },
228
  {
229
+ "epoch": 0.16,
230
+ "grad_norm": 42.12641143798828,
231
  "learning_rate": 9.344444444444446e-06,
232
+ "loss": 1.1204,
233
  "step": 800
234
  },
235
  {
236
+ "epoch": 0.165,
237
+ "grad_norm": 44.01963806152344,
238
  "learning_rate": 9.28888888888889e-06,
239
+ "loss": 0.908,
240
  "step": 825
241
  },
242
  {
243
+ "epoch": 0.17,
244
+ "grad_norm": 39.93177795410156,
245
  "learning_rate": 9.233333333333334e-06,
246
+ "loss": 1.158,
247
  "step": 850
248
  },
249
  {
250
+ "epoch": 0.175,
251
+ "grad_norm": 38.166259765625,
252
  "learning_rate": 9.17777777777778e-06,
253
+ "loss": 1.2021,
254
  "step": 875
255
  },
256
  {
257
+ "epoch": 0.18,
258
+ "grad_norm": 40.696678161621094,
259
  "learning_rate": 9.122222222222223e-06,
260
+ "loss": 1.0054,
261
  "step": 900
262
  },
263
  {
264
+ "epoch": 0.185,
265
+ "grad_norm": 48.79116439819336,
266
  "learning_rate": 9.066666666666667e-06,
267
+ "loss": 0.9587,
268
  "step": 925
269
  },
270
  {
271
+ "epoch": 0.19,
272
+ "grad_norm": 39.57270431518555,
273
  "learning_rate": 9.011111111111111e-06,
274
+ "loss": 1.1084,
275
  "step": 950
276
  },
277
  {
278
+ "epoch": 0.195,
279
+ "grad_norm": 54.647216796875,
280
  "learning_rate": 8.955555555555555e-06,
281
+ "loss": 1.0416,
282
  "step": 975
283
  },
284
  {
285
+ "epoch": 0.2,
286
+ "grad_norm": 42.69215774536133,
287
  "learning_rate": 8.900000000000001e-06,
288
+ "loss": 0.97,
289
  "step": 1000
290
  },
291
  {
292
+ "epoch": 0.2,
293
+ "eval_cer": 38.476190476190474,
294
+ "eval_loss": 0.7355929613113403,
295
+ "eval_model_preparation_time": 0.0121,
296
+ "eval_runtime": 116.5648,
297
+ "eval_samples_per_second": 2.574,
298
+ "eval_steps_per_second": 1.287,
299
+ "eval_wer": 38.17307692307692,
300
  "step": 1000
301
  },
302
  {
303
+ "epoch": 0.205,
304
+ "grad_norm": 53.78874206542969,
305
  "learning_rate": 8.844444444444445e-06,
306
+ "loss": 1.2455,
307
  "step": 1025
308
  },
309
  {
310
+ "epoch": 0.21,
311
+ "grad_norm": 34.4659423828125,
312
  "learning_rate": 8.788888888888891e-06,
313
+ "loss": 1.0515,
314
  "step": 1050
315
  },
316
  {
317
+ "epoch": 0.215,
318
+ "grad_norm": 51.5352668762207,
319
  "learning_rate": 8.733333333333333e-06,
320
+ "loss": 0.9412,
321
  "step": 1075
322
  },
323
  {
324
+ "epoch": 0.22,
325
+ "grad_norm": 41.004310607910156,
326
  "learning_rate": 8.677777777777779e-06,
327
+ "loss": 1.0831,
328
  "step": 1100
329
  },
330
  {
331
+ "epoch": 0.225,
332
+ "grad_norm": 43.79686737060547,
333
  "learning_rate": 8.622222222222223e-06,
334
+ "loss": 0.8895,
335
  "step": 1125
336
  },
337
  {
338
+ "epoch": 0.23,
339
+ "grad_norm": 32.330204010009766,
340
  "learning_rate": 8.566666666666667e-06,
341
+ "loss": 0.8984,
342
  "step": 1150
343
  },
344
  {
345
+ "epoch": 0.235,
346
+ "grad_norm": 43.38914108276367,
347
  "learning_rate": 8.511111111111113e-06,
348
+ "loss": 1.1215,
349
  "step": 1175
350
  },
351
  {
352
+ "epoch": 0.24,
353
+ "grad_norm": 62.52465057373047,
354
  "learning_rate": 8.455555555555555e-06,
355
+ "loss": 0.9912,
356
  "step": 1200
357
  },
358
  {
359
+ "epoch": 0.245,
360
+ "grad_norm": 39.1291618347168,
361
  "learning_rate": 8.400000000000001e-06,
362
+ "loss": 1.0422,
363
  "step": 1225
364
  },
365
  {
366
+ "epoch": 0.25,
367
+ "grad_norm": 53.39470291137695,
368
  "learning_rate": 8.344444444444445e-06,
369
+ "loss": 1.1502,
370
  "step": 1250
371
  },
372
  {
373
+ "epoch": 0.255,
374
+ "grad_norm": 42.28481674194336,
375
  "learning_rate": 8.288888888888889e-06,
376
+ "loss": 1.112,
377
  "step": 1275
378
  },
379
  {
380
+ "epoch": 0.26,
381
+ "grad_norm": 37.374542236328125,
382
  "learning_rate": 8.233333333333335e-06,
383
+ "loss": 1.0152,
384
  "step": 1300
385
  },
386
  {
387
+ "epoch": 0.265,
388
+ "grad_norm": 52.94354248046875,
389
  "learning_rate": 8.177777777777779e-06,
390
+ "loss": 0.9147,
391
  "step": 1325
392
  },
393
  {
394
+ "epoch": 0.27,
395
+ "grad_norm": 46.23354721069336,
396
  "learning_rate": 8.122222222222223e-06,
397
+ "loss": 0.8429,
398
  "step": 1350
399
  },
400
  {
401
+ "epoch": 0.275,
402
+ "grad_norm": 40.089210510253906,
403
  "learning_rate": 8.066666666666667e-06,
404
+ "loss": 0.8026,
405
  "step": 1375
406
  },
407
  {
408
+ "epoch": 0.28,
409
+ "grad_norm": 42.95212173461914,
410
  "learning_rate": 8.011111111111113e-06,
411
+ "loss": 0.7804,
412
  "step": 1400
413
  },
414
  {
415
+ "epoch": 0.285,
416
+ "grad_norm": 56.216678619384766,
417
  "learning_rate": 7.955555555555557e-06,
418
+ "loss": 0.8045,
419
  "step": 1425
420
  },
421
  {
422
+ "epoch": 0.29,
423
+ "grad_norm": 45.01803207397461,
424
+ "learning_rate": 7.902222222222223e-06,
425
+ "loss": 0.9807,
426
  "step": 1450
427
  },
428
  {
429
+ "epoch": 0.295,
430
+ "grad_norm": 63.81135940551758,
431
+ "learning_rate": 7.846666666666667e-06,
432
+ "loss": 0.9447,
433
  "step": 1475
434
  },
435
  {
436
+ "epoch": 0.3,
437
+ "grad_norm": 38.958457946777344,
438
+ "learning_rate": 7.791111111111111e-06,
439
+ "loss": 1.0144,
440
  "step": 1500
441
  },
442
  {
443
+ "epoch": 0.305,
444
+ "grad_norm": 48.87187957763672,
445
+ "learning_rate": 7.735555555555557e-06,
446
+ "loss": 1.0261,
447
  "step": 1525
448
  },
449
  {
450
+ "epoch": 0.31,
451
+ "grad_norm": 34.80329513549805,
452
+ "learning_rate": 7.680000000000001e-06,
453
+ "loss": 0.8152,
454
  "step": 1550
455
  },
456
  {
457
+ "epoch": 0.315,
458
+ "grad_norm": 45.12413024902344,
459
+ "learning_rate": 7.624444444444445e-06,
460
+ "loss": 0.8943,
461
  "step": 1575
462
  },
463
  {
464
+ "epoch": 0.32,
465
+ "grad_norm": 38.103729248046875,
466
+ "learning_rate": 7.56888888888889e-06,
467
+ "loss": 1.085,
468
  "step": 1600
469
  },
470
  {
471
+ "epoch": 0.325,
472
+ "grad_norm": 38.715492248535156,
473
+ "learning_rate": 7.513333333333334e-06,
474
+ "loss": 0.9853,
475
  "step": 1625
476
  },
477
  {
478
+ "epoch": 0.33,
479
+ "grad_norm": 42.01364517211914,
480
+ "learning_rate": 7.457777777777778e-06,
481
+ "loss": 0.8214,
482
  "step": 1650
483
  },
484
  {
485
+ "epoch": 0.335,
486
+ "grad_norm": 37.475799560546875,
487
+ "learning_rate": 7.402222222222223e-06,
488
+ "loss": 0.7706,
489
  "step": 1675
490
  },
491
  {
492
+ "epoch": 0.34,
493
+ "grad_norm": 40.387210845947266,
494
+ "learning_rate": 7.346666666666668e-06,
495
+ "loss": 1.081,
496
  "step": 1700
497
  },
498
  {
499
+ "epoch": 0.345,
500
+ "grad_norm": 21.215946197509766,
501
+ "learning_rate": 7.291111111111112e-06,
502
+ "loss": 0.8047,
503
  "step": 1725
504
  },
505
  {
506
+ "epoch": 0.35,
507
+ "grad_norm": 21.454147338867188,
508
+ "learning_rate": 7.235555555555556e-06,
509
+ "loss": 0.836,
510
  "step": 1750
511
  },
512
  {
513
+ "epoch": 0.355,
514
+ "grad_norm": 46.05310821533203,
515
+ "learning_rate": 7.180000000000001e-06,
516
+ "loss": 0.8883,
517
  "step": 1775
518
  },
519
  {
520
+ "epoch": 0.36,
521
+ "grad_norm": 61.988643646240234,
522
+ "learning_rate": 7.124444444444445e-06,
523
+ "loss": 0.8254,
524
  "step": 1800
525
  },
526
  {
527
+ "epoch": 1.0038,
528
+ "grad_norm": 34.00222396850586,
529
+ "learning_rate": 7.06888888888889e-06,
530
+ "loss": 0.5202,
531
  "step": 1825
532
  },
533
  {
534
+ "epoch": 1.0088,
535
+ "grad_norm": 23.966150283813477,
536
+ "learning_rate": 7.0133333333333345e-06,
537
+ "loss": 0.362,
538
  "step": 1850
539
  },
540
  {
541
+ "epoch": 1.0138,
542
+ "grad_norm": 19.050518035888672,
543
+ "learning_rate": 6.9577777777777785e-06,
544
+ "loss": 0.2891,
545
  "step": 1875
546
  },
547
  {
548
+ "epoch": 1.0188,
549
+ "grad_norm": 34.59785079956055,
550
+ "learning_rate": 6.902222222222223e-06,
551
+ "loss": 0.3438,
552
  "step": 1900
553
  },
554
  {
555
+ "epoch": 1.0238,
556
+ "grad_norm": 29.40850257873535,
557
+ "learning_rate": 6.846666666666667e-06,
558
+ "loss": 0.2822,
559
  "step": 1925
560
  },
561
  {
562
+ "epoch": 1.0288,
563
+ "grad_norm": 15.476716041564941,
564
+ "learning_rate": 6.7911111111111115e-06,
565
+ "loss": 0.4843,
566
  "step": 1950
567
  },
568
  {
569
+ "epoch": 1.0338,
570
+ "grad_norm": 33.912174224853516,
571
+ "learning_rate": 6.735555555555556e-06,
572
+ "loss": 0.3741,
573
  "step": 1975
574
  },
575
  {
576
+ "epoch": 1.0388,
577
+ "grad_norm": 30.411020278930664,
578
+ "learning_rate": 6.680000000000001e-06,
579
+ "loss": 0.3044,
580
  "step": 2000
581
  },
582
  {
583
+ "epoch": 1.0388,
584
+ "eval_cer": 23.904761904761905,
585
+ "eval_loss": 0.309874564409256,
586
+ "eval_model_preparation_time": 0.0121,
587
+ "eval_runtime": 119.6464,
588
+ "eval_samples_per_second": 2.507,
589
+ "eval_steps_per_second": 1.254,
590
+ "eval_wer": 23.46153846153846,
591
  "step": 2000
592
  },
593
  {
594
+ "epoch": 1.0438,
595
+ "grad_norm": 19.959854125976562,
596
+ "learning_rate": 6.6244444444444445e-06,
597
+ "loss": 0.2255,
598
  "step": 2025
599
  },
600
  {
601
+ "epoch": 1.0488,
602
+ "grad_norm": 20.751602172851562,
603
+ "learning_rate": 6.568888888888889e-06,
604
+ "loss": 0.3341,
605
  "step": 2050
606
  },
607
  {
608
+ "epoch": 1.0538,
609
+ "grad_norm": 24.52460479736328,
610
+ "learning_rate": 6.513333333333333e-06,
611
+ "loss": 0.4861,
612
  "step": 2075
613
  },
614
  {
615
+ "epoch": 1.0588,
616
+ "grad_norm": 5.740904331207275,
617
+ "learning_rate": 6.457777777777778e-06,
618
+ "loss": 0.4165,
619
  "step": 2100
620
  },
621
  {
622
+ "epoch": 1.0638,
623
+ "grad_norm": 24.452116012573242,
624
+ "learning_rate": 6.402222222222223e-06,
625
+ "loss": 0.4478,
626
  "step": 2125
627
  },
628
  {
629
+ "epoch": 1.0688,
630
+ "grad_norm": 29.230716705322266,
631
+ "learning_rate": 6.346666666666668e-06,
632
+ "loss": 0.4387,
633
  "step": 2150
634
  },
635
  {
636
+ "epoch": 1.0738,
637
+ "grad_norm": 41.07571792602539,
638
+ "learning_rate": 6.291111111111111e-06,
639
+ "loss": 0.2466,
640
  "step": 2175
641
  },
642
  {
643
+ "epoch": 1.0788,
644
+ "grad_norm": 19.89525032043457,
645
+ "learning_rate": 6.235555555555556e-06,
646
+ "loss": 0.3156,
647
  "step": 2200
648
  },
649
  {
650
+ "epoch": 1.0838,
651
+ "grad_norm": 33.628971099853516,
652
+ "learning_rate": 6.18e-06,
653
+ "loss": 0.3624,
654
  "step": 2225
655
  },
656
  {
657
+ "epoch": 1.0888,
658
+ "grad_norm": 23.34870147705078,
659
+ "learning_rate": 6.124444444444445e-06,
660
+ "loss": 0.4263,
661
  "step": 2250
662
  },
663
  {
664
+ "epoch": 1.0937999999999999,
665
+ "grad_norm": 30.75408172607422,
666
+ "learning_rate": 6.06888888888889e-06,
667
+ "loss": 0.4153,
668
  "step": 2275
669
  },
670
  {
671
+ "epoch": 1.0988,
672
+ "grad_norm": 7.229944705963135,
673
+ "learning_rate": 6.013333333333335e-06,
674
+ "loss": 0.3068,
675
  "step": 2300
676
  },
677
  {
678
+ "epoch": 1.1038000000000001,
679
+ "grad_norm": 37.87436294555664,
680
+ "learning_rate": 5.957777777777778e-06,
681
+ "loss": 0.3049,
682
  "step": 2325
683
  },
684
  {
685
+ "epoch": 1.1088,
686
+ "grad_norm": 33.34481430053711,
687
+ "learning_rate": 5.902222222222223e-06,
688
+ "loss": 0.2759,
689
  "step": 2350
690
  },
691
  {
692
+ "epoch": 1.1138,
693
+ "grad_norm": 24.211904525756836,
694
+ "learning_rate": 5.846666666666667e-06,
695
+ "loss": 0.35,
696
  "step": 2375
697
  },
698
  {
699
+ "epoch": 1.1188,
700
+ "grad_norm": 41.01383590698242,
701
+ "learning_rate": 5.791111111111112e-06,
702
+ "loss": 0.3992,
703
  "step": 2400
704
  },
705
  {
706
+ "epoch": 1.1238,
707
+ "grad_norm": 6.543262004852295,
708
+ "learning_rate": 5.735555555555557e-06,
709
+ "loss": 0.3356,
710
  "step": 2425
711
  },
712
  {
713
+ "epoch": 1.1288,
714
+ "grad_norm": 34.344913482666016,
715
+ "learning_rate": 5.68e-06,
716
+ "loss": 0.3364,
717
  "step": 2450
718
  },
719
  {
720
+ "epoch": 1.1338,
721
+ "grad_norm": 9.35561752319336,
722
+ "learning_rate": 5.624444444444445e-06,
723
+ "loss": 0.3606,
724
  "step": 2475
725
  },
726
  {
727
+ "epoch": 1.1388,
728
+ "grad_norm": 21.762096405029297,
729
+ "learning_rate": 5.56888888888889e-06,
730
+ "loss": 0.3437,
731
  "step": 2500
732
  },
733
  {
734
+ "epoch": 1.1438,
735
+ "grad_norm": 29.119796752929688,
736
+ "learning_rate": 5.513333333333334e-06,
737
+ "loss": 0.428,
738
  "step": 2525
739
  },
740
  {
741
+ "epoch": 1.1488,
742
+ "grad_norm": 46.66371536254883,
743
+ "learning_rate": 5.4577777777777785e-06,
744
+ "loss": 0.4566,
745
  "step": 2550
746
  },
747
  {
748
+ "epoch": 1.1538,
749
+ "grad_norm": 15.0108642578125,
750
+ "learning_rate": 5.402222222222223e-06,
751
+ "loss": 0.3162,
752
  "step": 2575
753
  },
754
  {
755
+ "epoch": 1.1588,
756
+ "grad_norm": 60.40862274169922,
757
+ "learning_rate": 5.346666666666667e-06,
758
+ "loss": 0.3696,
759
  "step": 2600
760
  },
761
  {
762
+ "epoch": 1.1638,
763
+ "grad_norm": 26.4654598236084,
764
+ "learning_rate": 5.2911111111111115e-06,
765
+ "loss": 0.2831,
766
  "step": 2625
767
  },
768
  {
769
+ "epoch": 1.1688,
770
+ "grad_norm": 23.651691436767578,
771
+ "learning_rate": 5.235555555555556e-06,
772
+ "loss": 0.4484,
773
  "step": 2650
774
  },
775
  {
776
+ "epoch": 1.1738,
777
+ "grad_norm": 33.675167083740234,
778
+ "learning_rate": 5.18e-06,
779
+ "loss": 0.3062,
780
  "step": 2675
781
  },
782
  {
783
+ "epoch": 1.1788,
784
+ "grad_norm": 27.336896896362305,
785
+ "learning_rate": 5.124444444444445e-06,
786
+ "loss": 0.3738,
787
  "step": 2700
788
  },
789
  {
790
+ "epoch": 1.1838,
791
+ "grad_norm": 17.323768615722656,
792
+ "learning_rate": 5.06888888888889e-06,
793
+ "loss": 0.3182,
794
  "step": 2725
795
  },
796
  {
797
+ "epoch": 1.1888,
798
+ "grad_norm": 18.657014846801758,
799
+ "learning_rate": 5.013333333333333e-06,
800
+ "loss": 0.3589,
801
  "step": 2750
802
  },
803
  {
804
+ "epoch": 1.1938,
805
+ "grad_norm": 39.54949188232422,
806
+ "learning_rate": 4.957777777777778e-06,
807
+ "loss": 0.4187,
808
  "step": 2775
809
  },
810
  {
811
+ "epoch": 1.1988,
812
+ "grad_norm": 57.646602630615234,
813
+ "learning_rate": 4.902222222222222e-06,
814
+ "loss": 0.3374,
815
  "step": 2800
816
  },
817
  {
818
+ "epoch": 1.2038,
819
+ "grad_norm": 24.029294967651367,
820
+ "learning_rate": 4.846666666666667e-06,
821
+ "loss": 0.2373,
822
  "step": 2825
823
  },
824
  {
825
+ "epoch": 1.2088,
826
+ "grad_norm": 34.673709869384766,
827
+ "learning_rate": 4.791111111111111e-06,
828
+ "loss": 0.3274,
829
  "step": 2850
830
  },
831
  {
832
+ "epoch": 1.2138,
833
+ "grad_norm": 40.28670120239258,
834
+ "learning_rate": 4.735555555555556e-06,
835
+ "loss": 0.3105,
836
  "step": 2875
837
  },
838
  {
839
+ "epoch": 1.2187999999999999,
840
+ "grad_norm": 22.120824813842773,
841
+ "learning_rate": 4.680000000000001e-06,
842
+ "loss": 0.2678,
843
  "step": 2900
844
  },
845
  {
846
+ "epoch": 1.2238,
847
+ "grad_norm": 21.942718505859375,
848
+ "learning_rate": 4.624444444444445e-06,
849
+ "loss": 0.5025,
850
  "step": 2925
851
  },
852
  {
853
+ "epoch": 1.2288000000000001,
854
+ "grad_norm": 31.645906448364258,
855
+ "learning_rate": 4.568888888888889e-06,
856
+ "loss": 0.3753,
857
  "step": 2950
858
  },
859
  {
860
+ "epoch": 1.2338,
861
+ "grad_norm": 19.00498390197754,
862
+ "learning_rate": 4.513333333333333e-06,
863
+ "loss": 0.3421,
864
  "step": 2975
865
  },
866
  {
867
+ "epoch": 1.2388,
868
+ "grad_norm": 42.73635482788086,
869
+ "learning_rate": 4.457777777777778e-06,
870
+ "loss": 0.3108,
871
  "step": 3000
872
  },
873
  {
874
+ "epoch": 1.2388,
875
+ "eval_cer": 7.7142857142857135,
876
+ "eval_loss": 0.11533673852682114,
877
+ "eval_model_preparation_time": 0.0121,
878
+ "eval_runtime": 120.7728,
879
+ "eval_samples_per_second": 2.484,
880
+ "eval_steps_per_second": 1.242,
881
+ "eval_wer": 7.5,
882
  "step": 3000
883
  },
884
  {
885
+ "epoch": 1.2438,
886
+ "grad_norm": 20.023395538330078,
887
+ "learning_rate": 4.402222222222223e-06,
888
+ "loss": 0.3144,
889
  "step": 3025
890
  },
891
  {
892
+ "epoch": 1.2488,
893
+ "grad_norm": 32.98371505737305,
894
+ "learning_rate": 4.346666666666667e-06,
895
+ "loss": 0.2618,
896
  "step": 3050
897
  },
898
  {
899
+ "epoch": 1.2538,
900
+ "grad_norm": 42.568119049072266,
901
+ "learning_rate": 4.291111111111112e-06,
902
+ "loss": 0.331,
903
  "step": 3075
904
  },
905
  {
906
+ "epoch": 1.2588,
907
+ "grad_norm": 0.5769469738006592,
908
+ "learning_rate": 4.235555555555556e-06,
909
+ "loss": 0.3391,
910
  "step": 3100
911
  },
912
  {
913
+ "epoch": 1.2638,
914
+ "grad_norm": 20.518579483032227,
915
+ "learning_rate": 4.18e-06,
916
+ "loss": 0.2234,
917
  "step": 3125
918
  },
919
  {
920
+ "epoch": 1.2688,
921
+ "grad_norm": 27.69402313232422,
922
+ "learning_rate": 4.124444444444445e-06,
923
+ "loss": 0.295,
924
  "step": 3150
925
  },
926
  {
927
+ "epoch": 1.2738,
928
+ "grad_norm": 31.121999740600586,
929
+ "learning_rate": 4.0688888888888896e-06,
930
+ "loss": 0.3753,
931
  "step": 3175
932
  },
933
  {
934
+ "epoch": 1.2788,
935
+ "grad_norm": 14.77844524383545,
936
+ "learning_rate": 4.013333333333334e-06,
937
+ "loss": 0.3706,
938
  "step": 3200
939
  },
940
  {
941
+ "epoch": 1.2838,
942
+ "grad_norm": 27.002138137817383,
943
+ "learning_rate": 3.9577777777777785e-06,
944
+ "loss": 0.2957,
945
  "step": 3225
946
  },
947
  {
948
+ "epoch": 1.2888,
949
+ "grad_norm": 13.426039695739746,
950
+ "learning_rate": 3.9022222222222225e-06,
951
+ "loss": 0.2902,
952
  "step": 3250
953
  },
954
  {
955
+ "epoch": 1.2938,
956
+ "grad_norm": 41.75296401977539,
957
+ "learning_rate": 3.8466666666666665e-06,
958
+ "loss": 0.2924,
959
  "step": 3275
960
  },
961
  {
962
+ "epoch": 1.2988,
963
+ "grad_norm": 52.886409759521484,
964
+ "learning_rate": 3.7911111111111114e-06,
965
+ "loss": 0.2519,
966
  "step": 3300
967
  },
968
  {
969
+ "epoch": 1.3038,
970
+ "grad_norm": 33.73487854003906,
971
+ "learning_rate": 3.7355555555555555e-06,
972
+ "loss": 0.2966,
973
  "step": 3325
974
  },
975
  {
976
+ "epoch": 1.3088,
977
+ "grad_norm": 27.994157791137695,
978
+ "learning_rate": 3.6800000000000003e-06,
979
+ "loss": 0.2887,
980
  "step": 3350
981
  },
982
  {
983
+ "epoch": 1.3138,
984
+ "grad_norm": 32.397579193115234,
985
+ "learning_rate": 3.624444444444445e-06,
986
+ "loss": 0.2368,
987
  "step": 3375
988
  },
989
  {
990
+ "epoch": 1.3188,
991
+ "grad_norm": 25.96181869506836,
992
+ "learning_rate": 3.568888888888889e-06,
993
+ "loss": 0.2911,
994
  "step": 3400
995
  },
996
  {
997
+ "epoch": 1.3237999999999999,
998
+ "grad_norm": 7.705018520355225,
999
+ "learning_rate": 3.5133333333333337e-06,
1000
+ "loss": 0.2647,
1001
  "step": 3425
1002
  },
1003
  {
1004
+ "epoch": 1.3288,
1005
+ "grad_norm": 28.499221801757812,
1006
+ "learning_rate": 3.457777777777778e-06,
1007
+ "loss": 0.4149,
1008
  "step": 3450
1009
  },
1010
  {
1011
+ "epoch": 1.3338,
1012
+ "grad_norm": 40.06334686279297,
1013
+ "learning_rate": 3.4022222222222222e-06,
1014
+ "loss": 0.2952,
1015
  "step": 3475
1016
  },
1017
  {
1018
+ "epoch": 1.3388,
1019
+ "grad_norm": 1.474075198173523,
1020
+ "learning_rate": 3.346666666666667e-06,
1021
+ "loss": 0.207,
1022
  "step": 3500
1023
  },
1024
  {
1025
+ "epoch": 1.3437999999999999,
1026
+ "grad_norm": 27.121145248413086,
1027
+ "learning_rate": 3.2911111111111116e-06,
1028
+ "loss": 0.3432,
1029
  "step": 3525
1030
  },
1031
  {
1032
+ "epoch": 1.3488,
1033
+ "grad_norm": 22.38213539123535,
1034
+ "learning_rate": 3.2355555555555556e-06,
1035
+ "loss": 0.3522,
1036
  "step": 3550
1037
  },
1038
  {
1039
+ "epoch": 1.3538000000000001,
1040
+ "grad_norm": 42.13764190673828,
1041
+ "learning_rate": 3.1800000000000005e-06,
1042
+ "loss": 0.3729,
1043
  "step": 3575
1044
  },
1045
  {
1046
+ "epoch": 1.3588,
1047
+ "grad_norm": 27.436649322509766,
1048
+ "learning_rate": 3.124444444444445e-06,
1049
+ "loss": 0.1482,
1050
  "step": 3600
1051
  },
1052
  {
1053
+ "epoch": 2.0026,
1054
+ "grad_norm": 16.43301010131836,
1055
+ "learning_rate": 3.068888888888889e-06,
1056
+ "loss": 0.1802,
1057
  "step": 3625
1058
  },
1059
  {
1060
+ "epoch": 2.0076,
1061
+ "grad_norm": 11.994711875915527,
1062
+ "learning_rate": 3.013333333333334e-06,
1063
+ "loss": 0.0922,
1064
  "step": 3650
1065
  },
1066
  {
1067
+ "epoch": 2.0126,
1068
+ "grad_norm": 7.826560020446777,
1069
+ "learning_rate": 2.957777777777778e-06,
1070
+ "loss": 0.0312,
1071
  "step": 3675
1072
  },
1073
  {
1074
+ "epoch": 2.0176,
1075
+ "grad_norm": 6.457350730895996,
1076
+ "learning_rate": 2.9022222222222223e-06,
1077
+ "loss": 0.0343,
1078
  "step": 3700
1079
  },
1080
  {
1081
+ "epoch": 2.0226,
1082
+ "grad_norm": 1.624599575996399,
1083
+ "learning_rate": 2.8466666666666672e-06,
1084
+ "loss": 0.0751,
1085
  "step": 3725
1086
  },
1087
  {
1088
+ "epoch": 2.0276,
1089
+ "grad_norm": 4.553808212280273,
1090
+ "learning_rate": 2.7911111111111113e-06,
1091
+ "loss": 0.0905,
1092
  "step": 3750
1093
  },
1094
  {
1095
+ "epoch": 2.0326,
1096
+ "grad_norm": 4.33929967880249,
1097
+ "learning_rate": 2.7355555555555557e-06,
1098
+ "loss": 0.079,
1099
  "step": 3775
1100
  },
1101
  {
1102
+ "epoch": 2.0376,
1103
+ "grad_norm": 11.390565872192383,
1104
+ "learning_rate": 2.68e-06,
1105
+ "loss": 0.064,
1106
  "step": 3800
1107
  },
1108
  {
1109
+ "epoch": 2.0426,
1110
+ "grad_norm": 0.5454270243644714,
1111
+ "learning_rate": 2.6244444444444446e-06,
1112
+ "loss": 0.0351,
1113
  "step": 3825
1114
  },
1115
  {
1116
+ "epoch": 2.0476,
1117
+ "grad_norm": 25.50840950012207,
1118
+ "learning_rate": 2.568888888888889e-06,
1119
+ "loss": 0.0726,
1120
  "step": 3850
1121
  },
1122
  {
1123
+ "epoch": 2.0526,
1124
+ "grad_norm": 5.380075931549072,
1125
+ "learning_rate": 2.5133333333333336e-06,
1126
+ "loss": 0.0405,
1127
  "step": 3875
1128
  },
1129
  {
1130
+ "epoch": 2.0576,
1131
+ "grad_norm": 1.0126858949661255,
1132
+ "learning_rate": 2.457777777777778e-06,
1133
+ "loss": 0.0858,
1134
  "step": 3900
1135
  },
1136
  {
1137
+ "epoch": 2.0626,
1138
+ "grad_norm": 1.7211323976516724,
1139
+ "learning_rate": 2.4022222222222225e-06,
1140
+ "loss": 0.0431,
1141
  "step": 3925
1142
  },
1143
  {
1144
+ "epoch": 2.0676,
1145
+ "grad_norm": 2.049405574798584,
1146
+ "learning_rate": 2.346666666666667e-06,
1147
+ "loss": 0.0127,
1148
  "step": 3950
1149
  },
1150
  {
1151
+ "epoch": 2.0726,
1152
+ "grad_norm": 1.535748839378357,
1153
+ "learning_rate": 2.2911111111111114e-06,
1154
+ "loss": 0.0559,
1155
  "step": 3975
1156
  },
1157
  {
1158
+ "epoch": 2.0776,
1159
+ "grad_norm": 19.566328048706055,
1160
+ "learning_rate": 2.235555555555556e-06,
1161
+ "loss": 0.0544,
1162
  "step": 4000
1163
  },
1164
  {
1165
+ "epoch": 2.0776,
1166
+ "eval_cer": 2.2857142857142856,
1167
+ "eval_loss": 0.029534637928009033,
1168
+ "eval_model_preparation_time": 0.0121,
1169
+ "eval_runtime": 119.7419,
1170
+ "eval_samples_per_second": 2.505,
1171
+ "eval_steps_per_second": 1.253,
1172
+ "eval_wer": 2.307692307692308,
1173
  "step": 4000
1174
  },
1175
  {
1176
+ "epoch": 2.0826000000000002,
1177
+ "grad_norm": 0.4915294945240021,
1178
+ "learning_rate": 2.1800000000000003e-06,
1179
+ "loss": 0.0556,
1180
  "step": 4025
1181
  },
1182
  {
1183
+ "epoch": 2.0876,
1184
+ "grad_norm": 13.67375659942627,
1185
+ "learning_rate": 2.1244444444444443e-06,
1186
+ "loss": 0.0377,
1187
  "step": 4050
1188
  },
1189
  {
1190
+ "epoch": 2.0926,
1191
+ "grad_norm": 0.06471225619316101,
1192
+ "learning_rate": 2.0688888888888892e-06,
1193
+ "loss": 0.0535,
1194
  "step": 4075
1195
  },
1196
  {
1197
+ "epoch": 2.0976,
1198
+ "grad_norm": 0.1591552495956421,
1199
+ "learning_rate": 2.0133333333333337e-06,
1200
+ "loss": 0.037,
1201
  "step": 4100
1202
  },
1203
  {
1204
+ "epoch": 2.1026,
1205
+ "grad_norm": 1.6957018375396729,
1206
+ "learning_rate": 1.9577777777777777e-06,
1207
+ "loss": 0.0548,
1208
  "step": 4125
1209
  },
1210
  {
1211
+ "epoch": 2.1076,
1212
+ "grad_norm": 7.906589508056641,
1213
+ "learning_rate": 1.9022222222222222e-06,
1214
+ "loss": 0.066,
1215
  "step": 4150
1216
  },
1217
  {
1218
+ "epoch": 2.1126,
1219
+ "grad_norm": 0.4227987825870514,
1220
+ "learning_rate": 1.8466666666666668e-06,
1221
+ "loss": 0.0687,
1222
  "step": 4175
1223
  },
1224
  {
1225
+ "epoch": 2.1176,
1226
+ "grad_norm": 0.3206275999546051,
1227
+ "learning_rate": 1.7911111111111113e-06,
1228
+ "loss": 0.0263,
1229
  "step": 4200
1230
  },
1231
  {
1232
+ "epoch": 2.1226,
1233
+ "grad_norm": 0.16232123970985413,
1234
+ "learning_rate": 1.7355555555555555e-06,
1235
+ "loss": 0.0257,
1236
  "step": 4225
1237
  },
1238
  {
1239
+ "epoch": 2.1276,
1240
+ "grad_norm": 21.80668830871582,
1241
+ "learning_rate": 1.6800000000000002e-06,
1242
+ "loss": 0.0346,
1243
  "step": 4250
1244
  },
1245
  {
1246
+ "epoch": 2.1326,
1247
+ "grad_norm": 3.021885871887207,
1248
+ "learning_rate": 1.6244444444444447e-06,
1249
+ "loss": 0.0715,
1250
  "step": 4275
1251
  },
1252
  {
1253
+ "epoch": 2.1376,
1254
+ "grad_norm": 9.51052188873291,
1255
+ "learning_rate": 1.568888888888889e-06,
1256
+ "loss": 0.0686,
1257
  "step": 4300
1258
  },
1259
  {
1260
+ "epoch": 2.1426,
1261
+ "grad_norm": 7.770538806915283,
1262
+ "learning_rate": 1.5133333333333334e-06,
1263
+ "loss": 0.0681,
1264
  "step": 4325
1265
  },
1266
  {
1267
+ "epoch": 2.1476,
1268
+ "grad_norm": 24.541362762451172,
1269
+ "learning_rate": 1.457777777777778e-06,
1270
+ "loss": 0.0589,
1271
  "step": 4350
1272
  },
1273
  {
1274
+ "epoch": 2.1526,
1275
+ "grad_norm": 0.2108163684606552,
1276
+ "learning_rate": 1.4022222222222223e-06,
1277
+ "loss": 0.0407,
1278
  "step": 4375
1279
  },
1280
  {
1281
+ "epoch": 2.1576,
1282
+ "grad_norm": 39.73221206665039,
1283
+ "learning_rate": 1.3466666666666668e-06,
1284
+ "loss": 0.0895,
1285
  "step": 4400
1286
  },
1287
  {
1288
+ "epoch": 2.1626,
1289
+ "grad_norm": 4.3332438468933105,
1290
+ "learning_rate": 1.2911111111111112e-06,
1291
+ "loss": 0.0645,
1292
  "step": 4425
1293
  },
1294
  {
1295
+ "epoch": 2.1676,
1296
+ "grad_norm": 1.142255425453186,
1297
+ "learning_rate": 1.2355555555555557e-06,
1298
+ "loss": 0.0592,
1299
  "step": 4450
1300
  },
1301
  {
1302
+ "epoch": 2.1726,
1303
+ "grad_norm": 0.046843066811561584,
1304
+ "learning_rate": 1.1800000000000001e-06,
1305
+ "loss": 0.034,
1306
  "step": 4475
1307
  },
1308
  {
1309
+ "epoch": 2.1776,
1310
+ "grad_norm": 14.69985580444336,
1311
+ "learning_rate": 1.1244444444444446e-06,
1312
+ "loss": 0.0704,
1313
  "step": 4500
1314
  },
1315
  {
1316
+ "epoch": 2.1826,
1317
+ "grad_norm": 11.02341079711914,
1318
+ "learning_rate": 1.068888888888889e-06,
1319
+ "loss": 0.0621,
1320
  "step": 4525
1321
  },
1322
  {
1323
+ "epoch": 2.1875999999999998,
1324
+ "grad_norm": 0.05967080965638161,
1325
+ "learning_rate": 1.0133333333333333e-06,
1326
+ "loss": 0.0331,
1327
  "step": 4550
1328
  },
1329
  {
1330
+ "epoch": 2.1926,
1331
+ "grad_norm": 9.536999702453613,
1332
+ "learning_rate": 9.57777777777778e-07,
1333
+ "loss": 0.0762,
1334
  "step": 4575
1335
  },
1336
  {
1337
+ "epoch": 2.1976,
1338
+ "grad_norm": 3.740366220474243,
1339
+ "learning_rate": 9.022222222222222e-07,
1340
+ "loss": 0.0346,
1341
  "step": 4600
1342
  },
1343
  {
1344
+ "epoch": 2.2026,
1345
+ "grad_norm": 1.1851881742477417,
1346
+ "learning_rate": 8.466666666666668e-07,
1347
+ "loss": 0.0502,
1348
  "step": 4625
1349
  },
1350
  {
1351
+ "epoch": 2.2076000000000002,
1352
+ "grad_norm": 23.711048126220703,
1353
+ "learning_rate": 7.911111111111111e-07,
1354
+ "loss": 0.0661,
1355
  "step": 4650
1356
  },
1357
  {
1358
+ "epoch": 2.2126,
1359
+ "grad_norm": 1.0020112991333008,
1360
+ "learning_rate": 7.355555555555556e-07,
1361
+ "loss": 0.0252,
1362
  "step": 4675
1363
  },
1364
  {
1365
+ "epoch": 2.2176,
1366
+ "grad_norm": 21.021774291992188,
1367
+ "learning_rate": 6.800000000000001e-07,
1368
+ "loss": 0.0415,
1369
  "step": 4700
1370
  },
1371
  {
1372
+ "epoch": 2.2226,
1373
+ "grad_norm": 0.8725367784500122,
1374
+ "learning_rate": 6.244444444444445e-07,
1375
+ "loss": 0.0293,
1376
  "step": 4725
1377
  },
1378
  {
1379
+ "epoch": 2.2276,
1380
+ "grad_norm": 0.3742901086807251,
1381
+ "learning_rate": 5.68888888888889e-07,
1382
+ "loss": 0.0146,
1383
  "step": 4750
1384
  },
1385
  {
1386
+ "epoch": 2.2326,
1387
+ "grad_norm": 12.225566864013672,
1388
+ "learning_rate": 5.133333333333334e-07,
1389
+ "loss": 0.0775,
1390
  "step": 4775
1391
  },
1392
  {
1393
+ "epoch": 2.2376,
1394
+ "grad_norm": 0.5857837200164795,
1395
+ "learning_rate": 4.5777777777777784e-07,
1396
+ "loss": 0.0335,
1397
  "step": 4800
1398
  },
1399
  {
1400
+ "epoch": 2.2426,
1401
+ "grad_norm": 2.0213797092437744,
1402
+ "learning_rate": 4.0222222222222224e-07,
1403
+ "loss": 0.06,
1404
  "step": 4825
1405
  },
1406
  {
1407
+ "epoch": 2.2476,
1408
+ "grad_norm": 0.22115977108478546,
1409
+ "learning_rate": 3.466666666666667e-07,
1410
+ "loss": 0.0437,
1411
  "step": 4850
1412
  },
1413
  {
1414
+ "epoch": 2.2526,
1415
+ "grad_norm": 11.783333778381348,
1416
+ "learning_rate": 2.9111111111111116e-07,
1417
+ "loss": 0.0454,
1418
  "step": 4875
1419
  },
1420
  {
1421
+ "epoch": 2.2576,
1422
+ "grad_norm": 2.67048978805542,
1423
+ "learning_rate": 2.3555555555555556e-07,
1424
+ "loss": 0.093,
1425
  "step": 4900
1426
  },
1427
  {
1428
+ "epoch": 2.2626,
1429
+ "grad_norm": 0.16820040345191956,
1430
+ "learning_rate": 1.8e-07,
1431
+ "loss": 0.1193,
1432
  "step": 4925
1433
  },
1434
  {
1435
+ "epoch": 2.2676,
1436
+ "grad_norm": 1.518930435180664,
1437
+ "learning_rate": 1.2444444444444446e-07,
1438
+ "loss": 0.0493,
1439
  "step": 4950
1440
  },
1441
  {
1442
+ "epoch": 2.2726,
1443
+ "grad_norm": 0.10118613392114639,
1444
+ "learning_rate": 6.888888888888889e-08,
1445
+ "loss": 0.0538,
1446
  "step": 4975
1447
  },
1448
  {
1449
+ "epoch": 2.2776,
1450
+ "grad_norm": 0.5119066834449768,
1451
+ "learning_rate": 1.3333333333333334e-08,
1452
+ "loss": 0.0678,
1453
  "step": 5000
1454
  },
1455
  {
1456
+ "epoch": 2.2776,
1457
+ "eval_cer": 0.9523809523809524,
1458
+ "eval_loss": 0.014132725074887276,
1459
+ "eval_model_preparation_time": 0.0121,
1460
+ "eval_runtime": 119.856,
1461
+ "eval_samples_per_second": 2.503,
1462
+ "eval_steps_per_second": 1.252,
1463
+ "eval_wer": 0.9615384615384616,
1464
  "step": 5000
1465
  },
1466
  {
1467
+ "epoch": 2.2776,
1468
  "step": 5000,
1469
+ "total_flos": 1.02060490752e+19,
1470
+ "train_loss": 0.5477467903137208,
1471
+ "train_runtime": 5409.143,
1472
+ "train_samples_per_second": 1.849,
1473
+ "train_steps_per_second": 0.924
1474
  }
1475
  ],
1476
+ "logging_steps": 25,
1477
  "max_steps": 5000,
1478
+ "num_input_tokens_seen": 0,
1479
  "num_train_epochs": 9223372036854775807,
1480
+ "save_steps": 1000,
1481
+ "stateful_callbacks": {
1482
+ "TrainerControl": {
1483
+ "args": {
1484
+ "should_epoch_stop": false,
1485
+ "should_evaluate": false,
1486
+ "should_log": false,
1487
+ "should_save": true,
1488
+ "should_training_stop": true
1489
+ },
1490
+ "attributes": {}
1491
+ }
1492
+ },
1493
+ "total_flos": 1.02060490752e+19,
1494
+ "train_batch_size": 2,
1495
  "trial_name": null,
1496
  "trial_params": null
1497
  }