|
{ |
|
"best_metric": 0.3606263447286636, |
|
"best_model_checkpoint": "w2v-bert-2.0-CV_Fleurs-lg-10hrs-v4/checkpoint-25137", |
|
"epoch": 55.0, |
|
"eval_steps": 500, |
|
"global_step": 28215, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 30.792112350463867, |
|
"learning_rate": 9.998031496062993e-05, |
|
"loss": 1.4106, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.1264185884550246, |
|
"eval_loss": 0.5293365716934204, |
|
"eval_runtime": 31.0205, |
|
"eval_samples_per_second": 26.628, |
|
"eval_steps_per_second": 6.673, |
|
"eval_wer": 0.5665790102797036, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 10.16055679321289, |
|
"learning_rate": 9.897047244094488e-05, |
|
"loss": 0.4775, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.11182513176846737, |
|
"eval_loss": 0.4882170259952545, |
|
"eval_runtime": 30.4398, |
|
"eval_samples_per_second": 27.135, |
|
"eval_steps_per_second": 6.8, |
|
"eval_wer": 0.5251016017212526, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 24.60751724243164, |
|
"learning_rate": 9.796062992125985e-05, |
|
"loss": 0.3696, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.09981164627486981, |
|
"eval_loss": 0.397329181432724, |
|
"eval_runtime": 30.5019, |
|
"eval_samples_per_second": 27.08, |
|
"eval_steps_per_second": 6.786, |
|
"eval_wer": 0.4564905570164953, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 8.973501205444336, |
|
"learning_rate": 9.695078740157481e-05, |
|
"loss": 0.3007, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.09322717991737761, |
|
"eval_loss": 0.3895888030529022, |
|
"eval_runtime": 29.8209, |
|
"eval_samples_per_second": 27.699, |
|
"eval_steps_per_second": 6.941, |
|
"eval_wer": 0.4311498924217069, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.461097240447998, |
|
"learning_rate": 9.594094488188978e-05, |
|
"loss": 0.2564, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.09659855331676664, |
|
"eval_loss": 0.3743041455745697, |
|
"eval_runtime": 29.9644, |
|
"eval_samples_per_second": 27.566, |
|
"eval_steps_per_second": 6.908, |
|
"eval_wer": 0.4372459956968683, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 9.60593032836914, |
|
"learning_rate": 9.493110236220472e-05, |
|
"loss": 0.2273, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.0879880973108153, |
|
"eval_loss": 0.3549309968948364, |
|
"eval_runtime": 30.2119, |
|
"eval_samples_per_second": 27.34, |
|
"eval_steps_per_second": 6.852, |
|
"eval_wer": 0.41836002868754485, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.7023348808288574, |
|
"learning_rate": 9.392125984251969e-05, |
|
"loss": 0.203, |
|
"step": 3591 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.08468003608794061, |
|
"eval_loss": 0.3835783302783966, |
|
"eval_runtime": 29.6315, |
|
"eval_samples_per_second": 27.876, |
|
"eval_steps_per_second": 6.986, |
|
"eval_wer": 0.4000717188620607, |
|
"step": 3591 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.196669340133667, |
|
"learning_rate": 9.291141732283465e-05, |
|
"loss": 0.1775, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.08550309438262714, |
|
"eval_loss": 0.3693934977054596, |
|
"eval_runtime": 30.2095, |
|
"eval_samples_per_second": 27.342, |
|
"eval_steps_per_second": 6.852, |
|
"eval_wer": 0.4132201769065264, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.045036792755127, |
|
"learning_rate": 9.190354330708662e-05, |
|
"loss": 0.159, |
|
"step": 4617 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.0889694360467877, |
|
"eval_loss": 0.3487217128276825, |
|
"eval_runtime": 29.9136, |
|
"eval_samples_per_second": 27.613, |
|
"eval_steps_per_second": 6.92, |
|
"eval_wer": 0.4136983026535979, |
|
"step": 4617 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.3439998626708984, |
|
"learning_rate": 9.089370078740158e-05, |
|
"loss": 0.1444, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.08915937257633075, |
|
"eval_loss": 0.3705226480960846, |
|
"eval_runtime": 30.2455, |
|
"eval_samples_per_second": 27.31, |
|
"eval_steps_per_second": 6.844, |
|
"eval_wer": 0.42768348075543866, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.339082956314087, |
|
"learning_rate": 8.988385826771653e-05, |
|
"loss": 0.1262, |
|
"step": 5643 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.08284398296902452, |
|
"eval_loss": 0.3619905114173889, |
|
"eval_runtime": 30.7416, |
|
"eval_samples_per_second": 26.869, |
|
"eval_steps_per_second": 6.734, |
|
"eval_wer": 0.3981592158737748, |
|
"step": 5643 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.1660577654838562, |
|
"learning_rate": 8.88740157480315e-05, |
|
"loss": 0.1139, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.08422102280821159, |
|
"eval_loss": 0.3665634095668793, |
|
"eval_runtime": 30.3996, |
|
"eval_samples_per_second": 27.171, |
|
"eval_steps_per_second": 6.809, |
|
"eval_wer": 0.40545063351661487, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.19767990708351135, |
|
"learning_rate": 8.786417322834646e-05, |
|
"loss": 0.1015, |
|
"step": 6669 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.08355624495481094, |
|
"eval_loss": 0.3656393885612488, |
|
"eval_runtime": 30.0647, |
|
"eval_samples_per_second": 27.474, |
|
"eval_steps_per_second": 6.885, |
|
"eval_wer": 0.40114750179297154, |
|
"step": 6669 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.6208112239837646, |
|
"learning_rate": 8.685433070866143e-05, |
|
"loss": 0.0914, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.08339796451352506, |
|
"eval_loss": 0.3572923541069031, |
|
"eval_runtime": 30.3516, |
|
"eval_samples_per_second": 27.214, |
|
"eval_steps_per_second": 6.82, |
|
"eval_wer": 0.39278030121922064, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.129692316055298, |
|
"learning_rate": 8.584448818897639e-05, |
|
"loss": 0.0836, |
|
"step": 7695 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.07895028411339211, |
|
"eval_loss": 0.379753053188324, |
|
"eval_runtime": 30.1224, |
|
"eval_samples_per_second": 27.421, |
|
"eval_steps_per_second": 6.872, |
|
"eval_wer": 0.3795123117379871, |
|
"step": 7695 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.029957294464111, |
|
"learning_rate": 8.483464566929134e-05, |
|
"loss": 0.0729, |
|
"step": 8208 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.08219503315975245, |
|
"eval_loss": 0.4288513660430908, |
|
"eval_runtime": 30.1728, |
|
"eval_samples_per_second": 27.376, |
|
"eval_steps_per_second": 6.86, |
|
"eval_wer": 0.3895529524264882, |
|
"step": 8208 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.5638870596885681, |
|
"learning_rate": 8.38248031496063e-05, |
|
"loss": 0.0652, |
|
"step": 8721 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.087386631633929, |
|
"eval_loss": 0.4778367280960083, |
|
"eval_runtime": 30.6189, |
|
"eval_samples_per_second": 26.977, |
|
"eval_steps_per_second": 6.761, |
|
"eval_wer": 0.4157303370786517, |
|
"step": 8721 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.010239621624350548, |
|
"learning_rate": 8.281496062992127e-05, |
|
"loss": 0.0599, |
|
"step": 9234 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.07918770477532092, |
|
"eval_loss": 0.4570063352584839, |
|
"eval_runtime": 30.2821, |
|
"eval_samples_per_second": 27.277, |
|
"eval_steps_per_second": 6.836, |
|
"eval_wer": 0.37963184317475496, |
|
"step": 9234 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.2985517978668213, |
|
"learning_rate": 8.180511811023623e-05, |
|
"loss": 0.0504, |
|
"step": 9747 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.08564554677978442, |
|
"eval_loss": 0.44442683458328247, |
|
"eval_runtime": 30.5951, |
|
"eval_samples_per_second": 26.998, |
|
"eval_steps_per_second": 6.766, |
|
"eval_wer": 0.4125029882859192, |
|
"step": 9747 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.014229847118258476, |
|
"learning_rate": 8.079724409448819e-05, |
|
"loss": 0.0498, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.08422102280821159, |
|
"eval_loss": 0.4611632823944092, |
|
"eval_runtime": 30.4634, |
|
"eval_samples_per_second": 27.115, |
|
"eval_steps_per_second": 6.795, |
|
"eval_wer": 0.3944537413339708, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 0.029411843046545982, |
|
"learning_rate": 7.978740157480316e-05, |
|
"loss": 0.0434, |
|
"step": 10773 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.08431599107298311, |
|
"eval_loss": 0.48810598254203796, |
|
"eval_runtime": 30.3298, |
|
"eval_samples_per_second": 27.234, |
|
"eval_steps_per_second": 6.825, |
|
"eval_wer": 0.40019125029882857, |
|
"step": 10773 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 0.19160379469394684, |
|
"learning_rate": 7.877755905511811e-05, |
|
"loss": 0.0386, |
|
"step": 11286 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.08035898004083636, |
|
"eval_loss": 0.509917676448822, |
|
"eval_runtime": 31.249, |
|
"eval_samples_per_second": 26.433, |
|
"eval_steps_per_second": 6.624, |
|
"eval_wer": 0.377719340186469, |
|
"step": 11286 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.18706804513931274, |
|
"learning_rate": 7.776771653543308e-05, |
|
"loss": 0.0381, |
|
"step": 11799 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.08241662577755267, |
|
"eval_loss": 0.49042966961860657, |
|
"eval_runtime": 30.3145, |
|
"eval_samples_per_second": 27.248, |
|
"eval_steps_per_second": 6.828, |
|
"eval_wer": 0.38656466650729143, |
|
"step": 11799 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.048221975564956665, |
|
"learning_rate": 7.675984251968504e-05, |
|
"loss": 0.0344, |
|
"step": 12312 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.08344544864591083, |
|
"eval_loss": 0.4621882736682892, |
|
"eval_runtime": 30.4774, |
|
"eval_samples_per_second": 27.102, |
|
"eval_steps_per_second": 6.792, |
|
"eval_wer": 0.4028209419077217, |
|
"step": 12312 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 1.851014494895935, |
|
"learning_rate": 7.575e-05, |
|
"loss": 0.0302, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.08198926858608081, |
|
"eval_loss": 0.4985555112361908, |
|
"eval_runtime": 30.5557, |
|
"eval_samples_per_second": 27.033, |
|
"eval_steps_per_second": 6.775, |
|
"eval_wer": 0.39182404972507767, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.03737611323595047, |
|
"learning_rate": 7.474015748031497e-05, |
|
"loss": 0.0268, |
|
"step": 13338 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.08118203833552289, |
|
"eval_loss": 0.5162001252174377, |
|
"eval_runtime": 30.3567, |
|
"eval_samples_per_second": 27.21, |
|
"eval_steps_per_second": 6.819, |
|
"eval_wer": 0.3954099928281138, |
|
"step": 13338 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 0.08066482096910477, |
|
"learning_rate": 7.373031496062992e-05, |
|
"loss": 0.0272, |
|
"step": 13851 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.07914022064293515, |
|
"eval_loss": 0.4747571647167206, |
|
"eval_runtime": 30.1451, |
|
"eval_samples_per_second": 27.401, |
|
"eval_steps_per_second": 6.867, |
|
"eval_wer": 0.3773607458761654, |
|
"step": 13851 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 1.6533007621765137, |
|
"learning_rate": 7.27224409448819e-05, |
|
"loss": 0.0235, |
|
"step": 14364 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.07853875496604884, |
|
"eval_loss": 0.47181904315948486, |
|
"eval_runtime": 30.7257, |
|
"eval_samples_per_second": 26.883, |
|
"eval_steps_per_second": 6.737, |
|
"eval_wer": 0.3822615347836481, |
|
"step": 14364 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.020692087709903717, |
|
"learning_rate": 7.171259842519685e-05, |
|
"loss": 0.0219, |
|
"step": 14877 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.0796942021874357, |
|
"eval_loss": 0.5317575335502625, |
|
"eval_runtime": 29.8983, |
|
"eval_samples_per_second": 27.627, |
|
"eval_steps_per_second": 6.923, |
|
"eval_wer": 0.37377480277312936, |
|
"step": 14877 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.030614351853728294, |
|
"learning_rate": 7.070275590551182e-05, |
|
"loss": 0.0205, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.07830133430412003, |
|
"eval_loss": 0.5195885300636292, |
|
"eval_runtime": 30.3135, |
|
"eval_samples_per_second": 27.249, |
|
"eval_steps_per_second": 6.829, |
|
"eval_wer": 0.37688262012909396, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 1.4625964164733887, |
|
"learning_rate": 6.969291338582678e-05, |
|
"loss": 0.0203, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.07879200367210623, |
|
"eval_loss": 0.5203195214271545, |
|
"eval_runtime": 29.9013, |
|
"eval_samples_per_second": 27.624, |
|
"eval_steps_per_second": 6.923, |
|
"eval_wer": 0.37413339708343296, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.2324291467666626, |
|
"learning_rate": 6.868307086614174e-05, |
|
"loss": 0.019, |
|
"step": 16416 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.08089713354120831, |
|
"eval_loss": 0.5031083822250366, |
|
"eval_runtime": 30.2, |
|
"eval_samples_per_second": 27.351, |
|
"eval_steps_per_second": 6.854, |
|
"eval_wer": 0.3858474778866842, |
|
"step": 16416 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 0.04213400557637215, |
|
"learning_rate": 6.76732283464567e-05, |
|
"loss": 0.0179, |
|
"step": 16929 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.08099210180597984, |
|
"eval_loss": 0.5772231221199036, |
|
"eval_runtime": 30.1355, |
|
"eval_samples_per_second": 27.41, |
|
"eval_steps_per_second": 6.869, |
|
"eval_wer": 0.37449199139373657, |
|
"step": 16929 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 1.1665908098220825, |
|
"learning_rate": 6.666338582677165e-05, |
|
"loss": 0.0175, |
|
"step": 17442 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.07630700074391808, |
|
"eval_loss": 0.4905574917793274, |
|
"eval_runtime": 30.2826, |
|
"eval_samples_per_second": 27.276, |
|
"eval_steps_per_second": 6.836, |
|
"eval_wer": 0.3675591680612001, |
|
"step": 17442 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.009321126155555248, |
|
"learning_rate": 6.565354330708662e-05, |
|
"loss": 0.0166, |
|
"step": 17955 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.0786020671425632, |
|
"eval_loss": 0.5370538830757141, |
|
"eval_runtime": 30.0507, |
|
"eval_samples_per_second": 27.487, |
|
"eval_steps_per_second": 6.888, |
|
"eval_wer": 0.3693521396127181, |
|
"step": 17955 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.08575043827295303, |
|
"learning_rate": 6.464370078740158e-05, |
|
"loss": 0.0138, |
|
"step": 18468 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 0.07882365976036342, |
|
"eval_loss": 0.5748464465141296, |
|
"eval_runtime": 30.2194, |
|
"eval_samples_per_second": 27.333, |
|
"eval_steps_per_second": 6.85, |
|
"eval_wer": 0.37437245995696866, |
|
"step": 18468 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 0.006642826832830906, |
|
"learning_rate": 6.363582677165355e-05, |
|
"loss": 0.0134, |
|
"step": 18981 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 0.07782649298026242, |
|
"eval_loss": 0.5342956781387329, |
|
"eval_runtime": 30.2477, |
|
"eval_samples_per_second": 27.308, |
|
"eval_steps_per_second": 6.843, |
|
"eval_wer": 0.36971073392302173, |
|
"step": 18981 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 0.032926104962825775, |
|
"learning_rate": 6.26259842519685e-05, |
|
"loss": 0.0135, |
|
"step": 19494 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 0.08035898004083636, |
|
"eval_loss": 0.5406966805458069, |
|
"eval_runtime": 30.4073, |
|
"eval_samples_per_second": 27.164, |
|
"eval_steps_per_second": 6.808, |
|
"eval_wer": 0.3839349748983983, |
|
"step": 19494 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 0.4430481791496277, |
|
"learning_rate": 6.161614173228348e-05, |
|
"loss": 0.0123, |
|
"step": 20007 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 0.07668687380300417, |
|
"eval_loss": 0.5342694520950317, |
|
"eval_runtime": 29.877, |
|
"eval_samples_per_second": 27.647, |
|
"eval_steps_per_second": 6.928, |
|
"eval_wer": 0.36612479081998567, |
|
"step": 20007 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.005585499573498964, |
|
"learning_rate": 6.0606299212598426e-05, |
|
"loss": 0.0124, |
|
"step": 20520 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_cer": 0.08170436379176625, |
|
"eval_loss": 0.5632651448249817, |
|
"eval_runtime": 30.3597, |
|
"eval_samples_per_second": 27.207, |
|
"eval_steps_per_second": 6.818, |
|
"eval_wer": 0.3801099689218264, |
|
"step": 20520 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 0.006700966972857714, |
|
"learning_rate": 5.9596456692913396e-05, |
|
"loss": 0.0131, |
|
"step": 21033 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_cer": 0.07735165165640481, |
|
"eval_loss": 0.5580958724021912, |
|
"eval_runtime": 30.4235, |
|
"eval_samples_per_second": 27.15, |
|
"eval_steps_per_second": 6.804, |
|
"eval_wer": 0.36325603633755676, |
|
"step": 21033 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 0.0002161410520784557, |
|
"learning_rate": 5.858661417322835e-05, |
|
"loss": 0.0094, |
|
"step": 21546 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_cer": 0.07888697193687776, |
|
"eval_loss": 0.5862228870391846, |
|
"eval_runtime": 30.2348, |
|
"eval_samples_per_second": 27.319, |
|
"eval_steps_per_second": 6.846, |
|
"eval_wer": 0.3683958881185752, |
|
"step": 21546 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 0.005379769951105118, |
|
"learning_rate": 5.75767716535433e-05, |
|
"loss": 0.0101, |
|
"step": 22059 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_cer": 0.07608540812611786, |
|
"eval_loss": 0.5479421615600586, |
|
"eval_runtime": 30.4733, |
|
"eval_samples_per_second": 27.106, |
|
"eval_steps_per_second": 6.793, |
|
"eval_wer": 0.36457088214200334, |
|
"step": 22059 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.008168138563632965, |
|
"learning_rate": 5.656692913385827e-05, |
|
"loss": 0.0094, |
|
"step": 22572 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_cer": 0.07605375203786068, |
|
"eval_loss": 0.5737643241882324, |
|
"eval_runtime": 30.1141, |
|
"eval_samples_per_second": 27.429, |
|
"eval_steps_per_second": 6.874, |
|
"eval_wer": 0.3620607219698781, |
|
"step": 22572 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 0.001390150049701333, |
|
"learning_rate": 5.555708661417323e-05, |
|
"loss": 0.0078, |
|
"step": 23085 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_cer": 0.0777473527596195, |
|
"eval_loss": 0.5284022688865662, |
|
"eval_runtime": 30.2759, |
|
"eval_samples_per_second": 27.282, |
|
"eval_steps_per_second": 6.837, |
|
"eval_wer": 0.37819746593354053, |
|
"step": 23085 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.4551181102362206e-05, |
|
"loss": 0.0074, |
|
"step": 23598 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_cer": 0.07898194020164928, |
|
"eval_loss": 0.627711832523346, |
|
"eval_runtime": 30.4473, |
|
"eval_samples_per_second": 27.129, |
|
"eval_steps_per_second": 6.799, |
|
"eval_wer": 0.3724599569686828, |
|
"step": 23598 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"grad_norm": 0.0076849376782774925, |
|
"learning_rate": 5.354133858267717e-05, |
|
"loss": 0.01, |
|
"step": 24111 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_cer": 0.0765127653175897, |
|
"eval_loss": 0.5825507640838623, |
|
"eval_runtime": 30.4404, |
|
"eval_samples_per_second": 27.135, |
|
"eval_steps_per_second": 6.8, |
|
"eval_wer": 0.3686349509921109, |
|
"step": 24111 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.015365912578999996, |
|
"learning_rate": 5.2531496062992125e-05, |
|
"loss": 0.0088, |
|
"step": 24624 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_cer": 0.07611706421437503, |
|
"eval_loss": 0.5601234436035156, |
|
"eval_runtime": 30.4389, |
|
"eval_samples_per_second": 27.136, |
|
"eval_steps_per_second": 6.801, |
|
"eval_wer": 0.36600525938321776, |
|
"step": 24624 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"grad_norm": 0.0013850124087184668, |
|
"learning_rate": 5.152165354330709e-05, |
|
"loss": 0.0083, |
|
"step": 25137 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_cer": 0.07686098228841862, |
|
"eval_loss": 0.5409994721412659, |
|
"eval_runtime": 30.7659, |
|
"eval_samples_per_second": 26.848, |
|
"eval_steps_per_second": 6.728, |
|
"eval_wer": 0.3606263447286636, |
|
"step": 25137 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.0006066646310500801, |
|
"learning_rate": 5.0513779527559055e-05, |
|
"loss": 0.0074, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_cer": 0.07804808559806264, |
|
"eval_loss": 0.559197723865509, |
|
"eval_runtime": 30.2523, |
|
"eval_samples_per_second": 27.304, |
|
"eval_steps_per_second": 6.842, |
|
"eval_wer": 0.3613435333492709, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"grad_norm": 0.04679730907082558, |
|
"learning_rate": 4.950393700787402e-05, |
|
"loss": 0.007, |
|
"step": 26163 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_cer": 0.07793728928916253, |
|
"eval_loss": 0.5890862941741943, |
|
"eval_runtime": 30.6371, |
|
"eval_samples_per_second": 26.961, |
|
"eval_steps_per_second": 6.757, |
|
"eval_wer": 0.3689935453024145, |
|
"step": 26163 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 0.012076578103005886, |
|
"learning_rate": 4.849409448818898e-05, |
|
"loss": 0.0067, |
|
"step": 26676 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_cer": 0.07788980515677678, |
|
"eval_loss": 0.5806664228439331, |
|
"eval_runtime": 30.6637, |
|
"eval_samples_per_second": 26.937, |
|
"eval_steps_per_second": 6.751, |
|
"eval_wer": 0.3662443222567535, |
|
"step": 26676 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"grad_norm": 0.0049696676433086395, |
|
"learning_rate": 4.748425196850394e-05, |
|
"loss": 0.0067, |
|
"step": 27189 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_cer": 0.07728833947989047, |
|
"eval_loss": 0.5851139426231384, |
|
"eval_runtime": 30.4226, |
|
"eval_samples_per_second": 27.151, |
|
"eval_steps_per_second": 6.804, |
|
"eval_wer": 0.363973224958164, |
|
"step": 27189 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"grad_norm": 0.0018264329992234707, |
|
"learning_rate": 4.64744094488189e-05, |
|
"loss": 0.0065, |
|
"step": 27702 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_cer": 0.07667104575887558, |
|
"eval_loss": 0.5988566875457764, |
|
"eval_runtime": 29.9692, |
|
"eval_samples_per_second": 27.562, |
|
"eval_steps_per_second": 6.907, |
|
"eval_wer": 0.36672244800382503, |
|
"step": 27702 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 5.14693021774292, |
|
"learning_rate": 4.5464566929133864e-05, |
|
"loss": 0.005, |
|
"step": 28215 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_cer": 0.07852292692192026, |
|
"eval_loss": 0.5746394395828247, |
|
"eval_runtime": 30.2216, |
|
"eval_samples_per_second": 27.331, |
|
"eval_steps_per_second": 6.849, |
|
"eval_wer": 0.37568730576141524, |
|
"step": 28215 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 51300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 20, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 6 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.001178563865598e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|