medium-poleval-le15 / trainer_state.json
Aspik101's picture
Upload 8 files
e6bd9e2 verified
{
"best_metric": 0.1458934662818158,
"best_model_checkpoint": "results15\\checkpoint-64000",
"epoch": 1.151192,
"eval_steps": 4000,
"global_step": 68000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008,
"grad_norm": 12.430644989013672,
"learning_rate": 1.8800000000000002e-06,
"loss": 1.5617,
"step": 100
},
{
"epoch": 0.0016,
"grad_norm": 11.342061042785645,
"learning_rate": 3.88e-06,
"loss": 0.7924,
"step": 200
},
{
"epoch": 0.0024,
"grad_norm": 6.638418674468994,
"learning_rate": 5.8800000000000005e-06,
"loss": 0.3435,
"step": 300
},
{
"epoch": 0.0032,
"grad_norm": 19.131393432617188,
"learning_rate": 7.88e-06,
"loss": 0.2877,
"step": 400
},
{
"epoch": 0.004,
"grad_norm": 11.609911918640137,
"learning_rate": 9.88e-06,
"loss": 0.3076,
"step": 500
},
{
"epoch": 0.0048,
"grad_norm": 8.114494323730469,
"learning_rate": 9.992449799196789e-06,
"loss": 0.2996,
"step": 600
},
{
"epoch": 0.0056,
"grad_norm": 11.082118034362793,
"learning_rate": 9.984417670682733e-06,
"loss": 0.3146,
"step": 700
},
{
"epoch": 0.0064,
"grad_norm": 10.258833885192871,
"learning_rate": 9.976385542168675e-06,
"loss": 0.2866,
"step": 800
},
{
"epoch": 0.0072,
"grad_norm": 4.887807369232178,
"learning_rate": 9.968353413654619e-06,
"loss": 0.2909,
"step": 900
},
{
"epoch": 0.008,
"grad_norm": 6.694243431091309,
"learning_rate": 9.960321285140563e-06,
"loss": 0.3067,
"step": 1000
},
{
"epoch": 0.0088,
"grad_norm": 9.998162269592285,
"learning_rate": 9.952289156626507e-06,
"loss": 0.305,
"step": 1100
},
{
"epoch": 0.0096,
"grad_norm": 14.1092529296875,
"learning_rate": 9.94425702811245e-06,
"loss": 0.2772,
"step": 1200
},
{
"epoch": 0.0104,
"grad_norm": 11.840794563293457,
"learning_rate": 9.936224899598395e-06,
"loss": 0.3012,
"step": 1300
},
{
"epoch": 0.0112,
"grad_norm": 9.802504539489746,
"learning_rate": 9.928192771084338e-06,
"loss": 0.2899,
"step": 1400
},
{
"epoch": 0.012,
"grad_norm": 21.836496353149414,
"learning_rate": 9.920160642570282e-06,
"loss": 0.3063,
"step": 1500
},
{
"epoch": 0.0128,
"grad_norm": 8.876431465148926,
"learning_rate": 9.912128514056226e-06,
"loss": 0.3292,
"step": 1600
},
{
"epoch": 0.0136,
"grad_norm": 13.00693416595459,
"learning_rate": 9.904096385542169e-06,
"loss": 0.305,
"step": 1700
},
{
"epoch": 0.0144,
"grad_norm": 10.583467483520508,
"learning_rate": 9.896064257028112e-06,
"loss": 0.2992,
"step": 1800
},
{
"epoch": 0.0152,
"grad_norm": 10.112874031066895,
"learning_rate": 9.888032128514056e-06,
"loss": 0.2896,
"step": 1900
},
{
"epoch": 0.016,
"grad_norm": 7.511252403259277,
"learning_rate": 9.88e-06,
"loss": 0.3163,
"step": 2000
},
{
"epoch": 0.0168,
"grad_norm": 9.708840370178223,
"learning_rate": 9.871967871485944e-06,
"loss": 0.274,
"step": 2100
},
{
"epoch": 0.0176,
"grad_norm": 8.038116455078125,
"learning_rate": 9.863935742971888e-06,
"loss": 0.2818,
"step": 2200
},
{
"epoch": 0.0184,
"grad_norm": 9.183858871459961,
"learning_rate": 9.855903614457832e-06,
"loss": 0.2909,
"step": 2300
},
{
"epoch": 0.0192,
"grad_norm": 13.90609073638916,
"learning_rate": 9.847871485943776e-06,
"loss": 0.2839,
"step": 2400
},
{
"epoch": 0.02,
"grad_norm": 17.10437774658203,
"learning_rate": 9.83983935742972e-06,
"loss": 0.3,
"step": 2500
},
{
"epoch": 0.0208,
"grad_norm": 19.14161491394043,
"learning_rate": 9.831807228915664e-06,
"loss": 0.2813,
"step": 2600
},
{
"epoch": 0.0216,
"grad_norm": 13.615035057067871,
"learning_rate": 9.823775100401608e-06,
"loss": 0.3083,
"step": 2700
},
{
"epoch": 0.0224,
"grad_norm": 15.303385734558105,
"learning_rate": 9.81574297188755e-06,
"loss": 0.2885,
"step": 2800
},
{
"epoch": 0.0232,
"grad_norm": 13.770358085632324,
"learning_rate": 9.807710843373494e-06,
"loss": 0.2998,
"step": 2900
},
{
"epoch": 0.024,
"grad_norm": 8.218709945678711,
"learning_rate": 9.799678714859438e-06,
"loss": 0.3097,
"step": 3000
},
{
"epoch": 0.0248,
"grad_norm": 11.543137550354004,
"learning_rate": 9.791646586345382e-06,
"loss": 0.3023,
"step": 3100
},
{
"epoch": 0.0256,
"grad_norm": 17.413169860839844,
"learning_rate": 9.783614457831326e-06,
"loss": 0.2775,
"step": 3200
},
{
"epoch": 0.0264,
"grad_norm": 11.18759536743164,
"learning_rate": 9.77558232931727e-06,
"loss": 0.2624,
"step": 3300
},
{
"epoch": 0.0272,
"grad_norm": 9.198080062866211,
"learning_rate": 9.767550200803213e-06,
"loss": 0.2866,
"step": 3400
},
{
"epoch": 0.028,
"grad_norm": 11.68255615234375,
"learning_rate": 9.759518072289157e-06,
"loss": 0.2834,
"step": 3500
},
{
"epoch": 0.0288,
"grad_norm": 5.951743125915527,
"learning_rate": 9.751485943775101e-06,
"loss": 0.2766,
"step": 3600
},
{
"epoch": 0.0296,
"grad_norm": 11.82551383972168,
"learning_rate": 9.743453815261045e-06,
"loss": 0.2837,
"step": 3700
},
{
"epoch": 0.0304,
"grad_norm": 12.415241241455078,
"learning_rate": 9.735421686746989e-06,
"loss": 0.2834,
"step": 3800
},
{
"epoch": 0.0312,
"grad_norm": 11.2643404006958,
"learning_rate": 9.727389558232933e-06,
"loss": 0.2929,
"step": 3900
},
{
"epoch": 0.032,
"grad_norm": 10.987563133239746,
"learning_rate": 9.719357429718877e-06,
"loss": 0.2566,
"step": 4000
},
{
"epoch": 0.032,
"eval_test1_cer": 0.14171147957278363,
"eval_test1_cer_norm": 0.10705693087137129,
"eval_test1_loss": 0.265484482049942,
"eval_test1_runtime": 1239.3947,
"eval_test1_samples_per_second": 2.017,
"eval_test1_steps_per_second": 0.504,
"eval_test1_wer": 0.30916353246450334,
"eval_test1_wer_norm": 0.24156170548525674,
"step": 4000
},
{
"epoch": 0.032,
"eval_test2_cer": 0.26862377272557586,
"eval_test2_cer_norm": 0.208736829872947,
"eval_test2_loss": 0.44017764925956726,
"eval_test2_runtime": 1372.2097,
"eval_test2_samples_per_second": 1.822,
"eval_test2_steps_per_second": 0.455,
"eval_test2_wer": 0.47839894712748915,
"eval_test2_wer_norm": 0.4037073114829246,
"step": 4000
},
{
"epoch": 0.0328,
"grad_norm": 10.9562406539917,
"learning_rate": 9.711325301204821e-06,
"loss": 0.3019,
"step": 4100
},
{
"epoch": 0.0336,
"grad_norm": 5.758121013641357,
"learning_rate": 9.703293172690765e-06,
"loss": 0.2978,
"step": 4200
},
{
"epoch": 0.0344,
"grad_norm": 10.012574195861816,
"learning_rate": 9.695261044176709e-06,
"loss": 0.295,
"step": 4300
},
{
"epoch": 0.0352,
"grad_norm": 8.023552894592285,
"learning_rate": 9.687228915662651e-06,
"loss": 0.2642,
"step": 4400
},
{
"epoch": 0.036,
"grad_norm": 7.264355182647705,
"learning_rate": 9.679196787148595e-06,
"loss": 0.2856,
"step": 4500
},
{
"epoch": 0.0368,
"grad_norm": 17.713090896606445,
"learning_rate": 9.671164658634539e-06,
"loss": 0.2838,
"step": 4600
},
{
"epoch": 0.0376,
"grad_norm": 6.670265197753906,
"learning_rate": 9.663132530120483e-06,
"loss": 0.2555,
"step": 4700
},
{
"epoch": 0.0384,
"grad_norm": 8.260579109191895,
"learning_rate": 9.655100401606427e-06,
"loss": 0.2664,
"step": 4800
},
{
"epoch": 0.0392,
"grad_norm": 6.691250801086426,
"learning_rate": 9.64706827309237e-06,
"loss": 0.2731,
"step": 4900
},
{
"epoch": 0.04,
"grad_norm": 9.206613540649414,
"learning_rate": 9.639036144578314e-06,
"loss": 0.2425,
"step": 5000
},
{
"epoch": 0.0408,
"grad_norm": 8.662139892578125,
"learning_rate": 9.631004016064258e-06,
"loss": 0.2774,
"step": 5100
},
{
"epoch": 0.0416,
"grad_norm": 7.598929405212402,
"learning_rate": 9.622971887550202e-06,
"loss": 0.2874,
"step": 5200
},
{
"epoch": 0.0424,
"grad_norm": 6.341280937194824,
"learning_rate": 9.614939759036145e-06,
"loss": 0.2905,
"step": 5300
},
{
"epoch": 0.0432,
"grad_norm": 9.12799072265625,
"learning_rate": 9.606907630522088e-06,
"loss": 0.2913,
"step": 5400
},
{
"epoch": 0.044,
"grad_norm": 6.347501754760742,
"learning_rate": 9.598875502008032e-06,
"loss": 0.2809,
"step": 5500
},
{
"epoch": 0.0448,
"grad_norm": 8.827099800109863,
"learning_rate": 9.590843373493976e-06,
"loss": 0.2826,
"step": 5600
},
{
"epoch": 0.0456,
"grad_norm": 12.478800773620605,
"learning_rate": 9.58281124497992e-06,
"loss": 0.2742,
"step": 5700
},
{
"epoch": 0.0464,
"grad_norm": 8.185443878173828,
"learning_rate": 9.574779116465864e-06,
"loss": 0.2874,
"step": 5800
},
{
"epoch": 0.0472,
"grad_norm": 29.066831588745117,
"learning_rate": 9.566746987951808e-06,
"loss": 0.2753,
"step": 5900
},
{
"epoch": 0.048,
"grad_norm": 8.861905097961426,
"learning_rate": 9.558714859437752e-06,
"loss": 0.2649,
"step": 6000
},
{
"epoch": 0.0488,
"grad_norm": 11.275808334350586,
"learning_rate": 9.550682730923696e-06,
"loss": 0.2835,
"step": 6100
},
{
"epoch": 0.0496,
"grad_norm": 7.473984241485596,
"learning_rate": 9.542650602409638e-06,
"loss": 0.2571,
"step": 6200
},
{
"epoch": 0.0504,
"grad_norm": 6.271098613739014,
"learning_rate": 9.534618473895582e-06,
"loss": 0.2563,
"step": 6300
},
{
"epoch": 0.0512,
"grad_norm": 7.11587381362915,
"learning_rate": 9.526586345381526e-06,
"loss": 0.2642,
"step": 6400
},
{
"epoch": 0.052,
"grad_norm": 6.847942352294922,
"learning_rate": 9.51855421686747e-06,
"loss": 0.2651,
"step": 6500
},
{
"epoch": 0.0528,
"grad_norm": 9.094855308532715,
"learning_rate": 9.510522088353414e-06,
"loss": 0.283,
"step": 6600
},
{
"epoch": 0.0536,
"grad_norm": 7.255010604858398,
"learning_rate": 9.502489959839358e-06,
"loss": 0.2583,
"step": 6700
},
{
"epoch": 0.0544,
"grad_norm": 10.873835563659668,
"learning_rate": 9.494457831325302e-06,
"loss": 0.2707,
"step": 6800
},
{
"epoch": 0.0552,
"grad_norm": 9.488873481750488,
"learning_rate": 9.486425702811246e-06,
"loss": 0.2759,
"step": 6900
},
{
"epoch": 0.056,
"grad_norm": 8.395374298095703,
"learning_rate": 9.47839357429719e-06,
"loss": 0.2754,
"step": 7000
},
{
"epoch": 0.0568,
"grad_norm": 8.9957857131958,
"learning_rate": 9.470361445783133e-06,
"loss": 0.2672,
"step": 7100
},
{
"epoch": 0.0576,
"grad_norm": 12.339411735534668,
"learning_rate": 9.462329317269077e-06,
"loss": 0.2651,
"step": 7200
},
{
"epoch": 0.0584,
"grad_norm": 6.282168388366699,
"learning_rate": 9.454297188755021e-06,
"loss": 0.2746,
"step": 7300
},
{
"epoch": 0.0592,
"grad_norm": 11.324309349060059,
"learning_rate": 9.446265060240965e-06,
"loss": 0.2659,
"step": 7400
},
{
"epoch": 0.06,
"grad_norm": 19.300565719604492,
"learning_rate": 9.438232931726909e-06,
"loss": 0.2594,
"step": 7500
},
{
"epoch": 0.0608,
"grad_norm": 9.297099113464355,
"learning_rate": 9.430200803212853e-06,
"loss": 0.2841,
"step": 7600
},
{
"epoch": 0.0616,
"grad_norm": 8.913684844970703,
"learning_rate": 9.422168674698797e-06,
"loss": 0.2468,
"step": 7700
},
{
"epoch": 0.0624,
"grad_norm": 9.521671295166016,
"learning_rate": 9.414136546184741e-06,
"loss": 0.2612,
"step": 7800
},
{
"epoch": 0.0632,
"grad_norm": 4.213898181915283,
"learning_rate": 9.406104417670685e-06,
"loss": 0.2692,
"step": 7900
},
{
"epoch": 0.064,
"grad_norm": 12.735309600830078,
"learning_rate": 9.398072289156627e-06,
"loss": 0.2556,
"step": 8000
},
{
"epoch": 0.064,
"eval_test1_cer": 0.10348046904175069,
"eval_test1_cer_norm": 0.07815208778543473,
"eval_test1_loss": 0.24871498346328735,
"eval_test1_runtime": 1203.5945,
"eval_test1_samples_per_second": 2.077,
"eval_test1_steps_per_second": 0.519,
"eval_test1_wer": 0.2385200734715298,
"eval_test1_wer_norm": 0.17586720827610391,
"step": 8000
},
{
"epoch": 0.064,
"eval_test2_cer": 0.17687684324485758,
"eval_test2_cer_norm": 0.13818465292841647,
"eval_test2_loss": 0.41706421971321106,
"eval_test2_runtime": 1259.4129,
"eval_test2_samples_per_second": 1.985,
"eval_test2_steps_per_second": 0.496,
"eval_test2_wer": 0.3423552300297551,
"eval_test2_wer_norm": 0.27343570937428374,
"step": 8000
},
{
"epoch": 0.0648,
"grad_norm": 13.473925590515137,
"learning_rate": 9.390040160642571e-06,
"loss": 0.282,
"step": 8100
},
{
"epoch": 0.0656,
"grad_norm": 10.660150527954102,
"learning_rate": 9.382008032128515e-06,
"loss": 0.2741,
"step": 8200
},
{
"epoch": 0.0664,
"grad_norm": 6.230668544769287,
"learning_rate": 9.373975903614459e-06,
"loss": 0.2686,
"step": 8300
},
{
"epoch": 0.0672,
"grad_norm": 5.246466159820557,
"learning_rate": 9.365943775100403e-06,
"loss": 0.2382,
"step": 8400
},
{
"epoch": 0.068,
"grad_norm": 5.55143928527832,
"learning_rate": 9.357911646586347e-06,
"loss": 0.2386,
"step": 8500
},
{
"epoch": 0.0688,
"grad_norm": 7.775327682495117,
"learning_rate": 9.34987951807229e-06,
"loss": 0.2767,
"step": 8600
},
{
"epoch": 0.0696,
"grad_norm": 9.929217338562012,
"learning_rate": 9.341847389558234e-06,
"loss": 0.241,
"step": 8700
},
{
"epoch": 0.0704,
"grad_norm": 6.488868236541748,
"learning_rate": 9.333815261044178e-06,
"loss": 0.2761,
"step": 8800
},
{
"epoch": 0.0712,
"grad_norm": 10.308218955993652,
"learning_rate": 9.325783132530122e-06,
"loss": 0.2927,
"step": 8900
},
{
"epoch": 0.072,
"grad_norm": 16.63512420654297,
"learning_rate": 9.317751004016065e-06,
"loss": 0.262,
"step": 9000
},
{
"epoch": 0.0728,
"grad_norm": 8.194585800170898,
"learning_rate": 9.309718875502008e-06,
"loss": 0.2535,
"step": 9100
},
{
"epoch": 0.0736,
"grad_norm": 10.36143970489502,
"learning_rate": 9.301686746987952e-06,
"loss": 0.2692,
"step": 9200
},
{
"epoch": 0.0744,
"grad_norm": 8.524481773376465,
"learning_rate": 9.293654618473896e-06,
"loss": 0.2785,
"step": 9300
},
{
"epoch": 0.0752,
"grad_norm": 5.517594337463379,
"learning_rate": 9.28562248995984e-06,
"loss": 0.26,
"step": 9400
},
{
"epoch": 0.076,
"grad_norm": 8.915616989135742,
"learning_rate": 9.277670682730925e-06,
"loss": 0.2491,
"step": 9500
},
{
"epoch": 0.0768,
"grad_norm": 10.76728343963623,
"learning_rate": 9.269638554216868e-06,
"loss": 0.2721,
"step": 9600
},
{
"epoch": 0.0776,
"grad_norm": 9.26211929321289,
"learning_rate": 9.261606425702812e-06,
"loss": 0.2614,
"step": 9700
},
{
"epoch": 0.0784,
"grad_norm": 7.043396472930908,
"learning_rate": 9.253574297188756e-06,
"loss": 0.2445,
"step": 9800
},
{
"epoch": 0.0792,
"grad_norm": 6.651730537414551,
"learning_rate": 9.2455421686747e-06,
"loss": 0.2872,
"step": 9900
},
{
"epoch": 0.08,
"grad_norm": 19.39574432373047,
"learning_rate": 9.237510040160642e-06,
"loss": 0.2755,
"step": 10000
},
{
"epoch": 0.0808,
"grad_norm": 7.920570373535156,
"learning_rate": 9.229477911646586e-06,
"loss": 0.2596,
"step": 10100
},
{
"epoch": 0.0816,
"grad_norm": 8.022053718566895,
"learning_rate": 9.22144578313253e-06,
"loss": 0.257,
"step": 10200
},
{
"epoch": 0.0824,
"grad_norm": 11.654303550720215,
"learning_rate": 9.213413654618474e-06,
"loss": 0.257,
"step": 10300
},
{
"epoch": 0.0832,
"grad_norm": 8.477298736572266,
"learning_rate": 9.205381526104418e-06,
"loss": 0.2405,
"step": 10400
},
{
"epoch": 0.084,
"grad_norm": 6.778027534484863,
"learning_rate": 9.197349397590362e-06,
"loss": 0.2688,
"step": 10500
},
{
"epoch": 0.0848,
"grad_norm": 3.938582420349121,
"learning_rate": 9.189317269076306e-06,
"loss": 0.2512,
"step": 10600
},
{
"epoch": 0.0856,
"grad_norm": 7.830332279205322,
"learning_rate": 9.18128514056225e-06,
"loss": 0.2595,
"step": 10700
},
{
"epoch": 0.0864,
"grad_norm": 6.613556861877441,
"learning_rate": 9.173253012048194e-06,
"loss": 0.2182,
"step": 10800
},
{
"epoch": 0.0872,
"grad_norm": 7.161635398864746,
"learning_rate": 9.165220883534138e-06,
"loss": 0.2548,
"step": 10900
},
{
"epoch": 0.088,
"grad_norm": 8.175447463989258,
"learning_rate": 9.157188755020082e-06,
"loss": 0.2754,
"step": 11000
},
{
"epoch": 0.0888,
"grad_norm": 8.775857925415039,
"learning_rate": 9.149156626506026e-06,
"loss": 0.2552,
"step": 11100
},
{
"epoch": 0.0896,
"grad_norm": 8.940972328186035,
"learning_rate": 9.14112449799197e-06,
"loss": 0.247,
"step": 11200
},
{
"epoch": 0.0904,
"grad_norm": 7.314133644104004,
"learning_rate": 9.133092369477913e-06,
"loss": 0.2463,
"step": 11300
},
{
"epoch": 0.0912,
"grad_norm": 13.069714546203613,
"learning_rate": 9.125060240963857e-06,
"loss": 0.2368,
"step": 11400
},
{
"epoch": 0.092,
"grad_norm": 3.7738101482391357,
"learning_rate": 9.1170281124498e-06,
"loss": 0.2429,
"step": 11500
},
{
"epoch": 0.0928,
"grad_norm": 4.591954708099365,
"learning_rate": 9.108995983935743e-06,
"loss": 0.2465,
"step": 11600
},
{
"epoch": 0.0936,
"grad_norm": 7.4973225593566895,
"learning_rate": 9.100963855421687e-06,
"loss": 0.2526,
"step": 11700
},
{
"epoch": 0.0944,
"grad_norm": 16.275197982788086,
"learning_rate": 9.092931726907631e-06,
"loss": 0.2469,
"step": 11800
},
{
"epoch": 0.0952,
"grad_norm": 13.245892524719238,
"learning_rate": 9.084899598393575e-06,
"loss": 0.2715,
"step": 11900
},
{
"epoch": 0.096,
"grad_norm": 10.107722282409668,
"learning_rate": 9.076867469879519e-06,
"loss": 0.2288,
"step": 12000
},
{
"epoch": 0.096,
"eval_test1_cer": 0.08446299200836507,
"eval_test1_cer_norm": 0.05986985857324657,
"eval_test1_loss": 0.24487894773483276,
"eval_test1_runtime": 1174.9494,
"eval_test1_samples_per_second": 2.128,
"eval_test1_steps_per_second": 0.532,
"eval_test1_wer": 0.2207936091431237,
"eval_test1_wer_norm": 0.15590753674858995,
"step": 12000
},
{
"epoch": 0.096,
"eval_test2_cer": 0.21867417030649194,
"eval_test2_cer_norm": 0.17033041524635884,
"eval_test2_loss": 0.4188956022262573,
"eval_test2_runtime": 1303.0061,
"eval_test2_samples_per_second": 1.919,
"eval_test2_steps_per_second": 0.48,
"eval_test2_wer": 0.38798924238956284,
"eval_test2_wer_norm": 0.31910382764153106,
"step": 12000
},
{
"epoch": 0.0968,
"grad_norm": 13.199896812438965,
"learning_rate": 9.068835341365463e-06,
"loss": 0.2436,
"step": 12100
},
{
"epoch": 0.0976,
"grad_norm": 8.258294105529785,
"learning_rate": 9.060803212851407e-06,
"loss": 0.2677,
"step": 12200
},
{
"epoch": 0.0984,
"grad_norm": 6.465874671936035,
"learning_rate": 9.052771084337351e-06,
"loss": 0.2659,
"step": 12300
},
{
"epoch": 0.0992,
"grad_norm": 8.216276168823242,
"learning_rate": 9.044738955823293e-06,
"loss": 0.2553,
"step": 12400
},
{
"epoch": 0.1,
"grad_norm": 7.199169158935547,
"learning_rate": 9.036706827309237e-06,
"loss": 0.2244,
"step": 12500
},
{
"epoch": 0.1008,
"grad_norm": 5.689333438873291,
"learning_rate": 9.028674698795181e-06,
"loss": 0.2366,
"step": 12600
},
{
"epoch": 0.1016,
"grad_norm": 9.028944969177246,
"learning_rate": 9.020642570281125e-06,
"loss": 0.2152,
"step": 12700
},
{
"epoch": 0.1024,
"grad_norm": 6.4437127113342285,
"learning_rate": 9.012610441767069e-06,
"loss": 0.2549,
"step": 12800
},
{
"epoch": 0.1032,
"grad_norm": 9.835594177246094,
"learning_rate": 9.004578313253013e-06,
"loss": 0.2383,
"step": 12900
},
{
"epoch": 0.104,
"grad_norm": 7.06815767288208,
"learning_rate": 8.996546184738957e-06,
"loss": 0.2381,
"step": 13000
},
{
"epoch": 0.1048,
"grad_norm": 5.8962297439575195,
"learning_rate": 8.9885140562249e-06,
"loss": 0.251,
"step": 13100
},
{
"epoch": 0.1056,
"grad_norm": 8.3475980758667,
"learning_rate": 8.980481927710844e-06,
"loss": 0.2315,
"step": 13200
},
{
"epoch": 0.1064,
"grad_norm": 9.03033447265625,
"learning_rate": 8.972449799196787e-06,
"loss": 0.2229,
"step": 13300
},
{
"epoch": 0.1072,
"grad_norm": 2.628983497619629,
"learning_rate": 8.96441767068273e-06,
"loss": 0.2563,
"step": 13400
},
{
"epoch": 0.108,
"grad_norm": 12.840631484985352,
"learning_rate": 8.956385542168675e-06,
"loss": 0.2499,
"step": 13500
},
{
"epoch": 0.1088,
"grad_norm": 5.249873161315918,
"learning_rate": 8.94843373493976e-06,
"loss": 0.2406,
"step": 13600
},
{
"epoch": 0.1096,
"grad_norm": 7.155534267425537,
"learning_rate": 8.940401606425704e-06,
"loss": 0.2345,
"step": 13700
},
{
"epoch": 0.1104,
"grad_norm": 6.4776787757873535,
"learning_rate": 8.932369477911648e-06,
"loss": 0.2504,
"step": 13800
},
{
"epoch": 0.1112,
"grad_norm": 13.455100059509277,
"learning_rate": 8.924337349397592e-06,
"loss": 0.2557,
"step": 13900
},
{
"epoch": 0.112,
"grad_norm": 7.236751079559326,
"learning_rate": 8.916305220883535e-06,
"loss": 0.2355,
"step": 14000
},
{
"epoch": 0.1128,
"grad_norm": 2.6910972595214844,
"learning_rate": 8.908273092369478e-06,
"loss": 0.2406,
"step": 14100
},
{
"epoch": 0.1136,
"grad_norm": 10.298126220703125,
"learning_rate": 8.900240963855422e-06,
"loss": 0.2418,
"step": 14200
},
{
"epoch": 0.1144,
"grad_norm": 9.934380531311035,
"learning_rate": 8.892208835341366e-06,
"loss": 0.2457,
"step": 14300
},
{
"epoch": 0.1152,
"grad_norm": 5.105091571807861,
"learning_rate": 8.88417670682731e-06,
"loss": 0.2278,
"step": 14400
},
{
"epoch": 0.116,
"grad_norm": 8.721418380737305,
"learning_rate": 8.876144578313254e-06,
"loss": 0.2355,
"step": 14500
},
{
"epoch": 0.1168,
"grad_norm": 10.942416191101074,
"learning_rate": 8.868112449799198e-06,
"loss": 0.2311,
"step": 14600
},
{
"epoch": 0.1176,
"grad_norm": 5.4364705085754395,
"learning_rate": 8.860080321285142e-06,
"loss": 0.26,
"step": 14700
},
{
"epoch": 0.1184,
"grad_norm": 10.078380584716797,
"learning_rate": 8.852048192771086e-06,
"loss": 0.2596,
"step": 14800
},
{
"epoch": 0.1192,
"grad_norm": 9.630817413330078,
"learning_rate": 8.844016064257028e-06,
"loss": 0.2182,
"step": 14900
},
{
"epoch": 0.12,
"grad_norm": 9.832001686096191,
"learning_rate": 8.835983935742972e-06,
"loss": 0.2581,
"step": 15000
},
{
"epoch": 0.1208,
"grad_norm": 8.944684028625488,
"learning_rate": 8.827951807228916e-06,
"loss": 0.2388,
"step": 15100
},
{
"epoch": 0.1216,
"grad_norm": 8.616948127746582,
"learning_rate": 8.81991967871486e-06,
"loss": 0.2433,
"step": 15200
},
{
"epoch": 0.1224,
"grad_norm": 7.957048416137695,
"learning_rate": 8.811887550200804e-06,
"loss": 0.2338,
"step": 15300
},
{
"epoch": 0.1232,
"grad_norm": 6.853145122528076,
"learning_rate": 8.803855421686748e-06,
"loss": 0.2328,
"step": 15400
},
{
"epoch": 0.124,
"grad_norm": 5.673152923583984,
"learning_rate": 8.795823293172692e-06,
"loss": 0.2492,
"step": 15500
},
{
"epoch": 0.1248,
"grad_norm": 9.295065879821777,
"learning_rate": 8.787871485943776e-06,
"loss": 0.2342,
"step": 15600
},
{
"epoch": 0.1256,
"grad_norm": 7.0689215660095215,
"learning_rate": 8.77983935742972e-06,
"loss": 0.2481,
"step": 15700
},
{
"epoch": 0.1264,
"grad_norm": 3.174997329711914,
"learning_rate": 8.771807228915664e-06,
"loss": 0.2384,
"step": 15800
},
{
"epoch": 0.1272,
"grad_norm": 10.203186988830566,
"learning_rate": 8.763775100401608e-06,
"loss": 0.2393,
"step": 15900
},
{
"epoch": 0.128,
"grad_norm": 7.220344066619873,
"learning_rate": 8.755742971887552e-06,
"loss": 0.2696,
"step": 16000
},
{
"epoch": 0.128,
"eval_test1_cer": 0.061454365523937564,
"eval_test1_cer_norm": 0.04506927269671285,
"eval_test1_loss": 0.22977162897586823,
"eval_test1_runtime": 1134.9922,
"eval_test1_samples_per_second": 2.203,
"eval_test1_steps_per_second": 0.551,
"eval_test1_wer": 0.17863494562523688,
"eval_test1_wer_norm": 0.11663111137087583,
"step": 16000
},
{
"epoch": 0.128,
"eval_test2_cer": 0.16853791018031133,
"eval_test2_cer_norm": 0.13256313913851875,
"eval_test2_loss": 0.4023756980895996,
"eval_test2_runtime": 1236.1872,
"eval_test2_samples_per_second": 2.022,
"eval_test2_steps_per_second": 0.506,
"eval_test2_wer": 0.32633325703822386,
"eval_test2_wer_norm": 0.2594545037818015,
"step": 16000
},
{
"epoch": 0.1288,
"grad_norm": 6.053903102874756,
"learning_rate": 8.747710843373496e-06,
"loss": 0.2549,
"step": 16100
},
{
"epoch": 0.1296,
"grad_norm": 7.160613536834717,
"learning_rate": 8.73967871485944e-06,
"loss": 0.247,
"step": 16200
},
{
"epoch": 0.1304,
"grad_norm": 9.369489669799805,
"learning_rate": 8.731726907630524e-06,
"loss": 0.2391,
"step": 16300
},
{
"epoch": 0.1312,
"grad_norm": 7.7761549949646,
"learning_rate": 8.723694779116466e-06,
"loss": 0.2477,
"step": 16400
},
{
"epoch": 0.132,
"grad_norm": 15.849722862243652,
"learning_rate": 8.71566265060241e-06,
"loss": 0.2477,
"step": 16500
},
{
"epoch": 0.1328,
"grad_norm": 4.059557914733887,
"learning_rate": 8.707630522088354e-06,
"loss": 0.2358,
"step": 16600
},
{
"epoch": 0.1336,
"grad_norm": 7.216188907623291,
"learning_rate": 8.699598393574298e-06,
"loss": 0.2663,
"step": 16700
},
{
"epoch": 0.1344,
"grad_norm": 5.579075336456299,
"learning_rate": 8.691566265060242e-06,
"loss": 0.2388,
"step": 16800
},
{
"epoch": 0.1352,
"grad_norm": 7.436819553375244,
"learning_rate": 8.683534136546186e-06,
"loss": 0.2262,
"step": 16900
},
{
"epoch": 0.136,
"grad_norm": 9.426555633544922,
"learning_rate": 8.67550200803213e-06,
"loss": 0.2334,
"step": 17000
},
{
"epoch": 0.1368,
"grad_norm": 11.434511184692383,
"learning_rate": 8.667469879518073e-06,
"loss": 0.2536,
"step": 17100
},
{
"epoch": 0.1376,
"grad_norm": 9.391510963439941,
"learning_rate": 8.659437751004017e-06,
"loss": 0.2311,
"step": 17200
},
{
"epoch": 0.1384,
"grad_norm": 14.07253646850586,
"learning_rate": 8.65140562248996e-06,
"loss": 0.2272,
"step": 17300
},
{
"epoch": 0.1392,
"grad_norm": 10.38249683380127,
"learning_rate": 8.643373493975904e-06,
"loss": 0.2602,
"step": 17400
},
{
"epoch": 0.14,
"grad_norm": 11.22042465209961,
"learning_rate": 8.635341365461847e-06,
"loss": 0.2664,
"step": 17500
},
{
"epoch": 0.1408,
"grad_norm": 8.036049842834473,
"learning_rate": 8.627309236947791e-06,
"loss": 0.2423,
"step": 17600
},
{
"epoch": 0.1416,
"grad_norm": 8.139640808105469,
"learning_rate": 8.619277108433735e-06,
"loss": 0.2336,
"step": 17700
},
{
"epoch": 0.1424,
"grad_norm": 10.12194538116455,
"learning_rate": 8.61124497991968e-06,
"loss": 0.2329,
"step": 17800
},
{
"epoch": 0.1432,
"grad_norm": 8.494665145874023,
"learning_rate": 8.603212851405623e-06,
"loss": 0.2411,
"step": 17900
},
{
"epoch": 0.144,
"grad_norm": 7.564216136932373,
"learning_rate": 8.595180722891567e-06,
"loss": 0.2477,
"step": 18000
},
{
"epoch": 0.1448,
"grad_norm": 6.718033313751221,
"learning_rate": 8.587148594377511e-06,
"loss": 0.243,
"step": 18100
},
{
"epoch": 0.1456,
"grad_norm": 4.666207313537598,
"learning_rate": 8.579116465863455e-06,
"loss": 0.2492,
"step": 18200
},
{
"epoch": 0.1464,
"grad_norm": 9.410490036010742,
"learning_rate": 8.571084337349397e-06,
"loss": 0.2265,
"step": 18300
},
{
"epoch": 0.1472,
"grad_norm": 6.1468119621276855,
"learning_rate": 8.563052208835341e-06,
"loss": 0.2409,
"step": 18400
},
{
"epoch": 0.148,
"grad_norm": 9.485304832458496,
"learning_rate": 8.555020080321285e-06,
"loss": 0.2584,
"step": 18500
},
{
"epoch": 0.1488,
"grad_norm": 4.741575717926025,
"learning_rate": 8.546987951807229e-06,
"loss": 0.2309,
"step": 18600
},
{
"epoch": 0.1496,
"grad_norm": 7.345742225646973,
"learning_rate": 8.538955823293173e-06,
"loss": 0.233,
"step": 18700
},
{
"epoch": 0.1504,
"grad_norm": 17.133358001708984,
"learning_rate": 8.530923694779117e-06,
"loss": 0.2335,
"step": 18800
},
{
"epoch": 0.1512,
"grad_norm": 7.573137283325195,
"learning_rate": 8.52289156626506e-06,
"loss": 0.2346,
"step": 18900
},
{
"epoch": 0.152,
"grad_norm": 11.430822372436523,
"learning_rate": 8.514859437751005e-06,
"loss": 0.2435,
"step": 19000
},
{
"epoch": 0.1528,
"grad_norm": 7.896162986755371,
"learning_rate": 8.506827309236948e-06,
"loss": 0.239,
"step": 19100
},
{
"epoch": 0.1536,
"grad_norm": 7.543678283691406,
"learning_rate": 8.498795180722892e-06,
"loss": 0.2509,
"step": 19200
},
{
"epoch": 0.1544,
"grad_norm": 11.438505172729492,
"learning_rate": 8.490763052208836e-06,
"loss": 0.2244,
"step": 19300
},
{
"epoch": 0.1552,
"grad_norm": 5.8480730056762695,
"learning_rate": 8.48281124497992e-06,
"loss": 0.2428,
"step": 19400
},
{
"epoch": 0.156,
"grad_norm": 6.436691761016846,
"learning_rate": 8.474779116465865e-06,
"loss": 0.2367,
"step": 19500
},
{
"epoch": 0.1568,
"grad_norm": 7.197071075439453,
"learning_rate": 8.466746987951808e-06,
"loss": 0.2162,
"step": 19600
},
{
"epoch": 0.1576,
"grad_norm": 4.8729634284973145,
"learning_rate": 8.458714859437752e-06,
"loss": 0.2231,
"step": 19700
},
{
"epoch": 0.1584,
"grad_norm": 7.567530155181885,
"learning_rate": 8.450682730923695e-06,
"loss": 0.2208,
"step": 19800
},
{
"epoch": 0.1592,
"grad_norm": 6.356396675109863,
"learning_rate": 8.442650602409639e-06,
"loss": 0.2217,
"step": 19900
},
{
"epoch": 0.16,
"grad_norm": 11.299875259399414,
"learning_rate": 8.434618473895582e-06,
"loss": 0.2376,
"step": 20000
},
{
"epoch": 0.16,
"eval_test1_cer": 0.0942891552767197,
"eval_test1_cer_norm": 0.06996422311330948,
"eval_test1_loss": 0.22547538578510284,
"eval_test1_runtime": 1189.7817,
"eval_test1_samples_per_second": 2.101,
"eval_test1_steps_per_second": 0.525,
"eval_test1_wer": 0.22478789469080732,
"eval_test1_wer_norm": 0.16227826646015373,
"step": 20000
},
{
"epoch": 0.16,
"eval_test2_cer": 0.17272370926195543,
"eval_test2_cer_norm": 0.13756488224356989,
"eval_test2_loss": 0.3925027847290039,
"eval_test2_runtime": 1241.7968,
"eval_test2_samples_per_second": 2.013,
"eval_test2_steps_per_second": 0.503,
"eval_test2_wer": 0.3331712062256809,
"eval_test2_wer_norm": 0.26449690579876234,
"step": 20000
},
{
"epoch": 0.1608,
"grad_norm": 3.2471466064453125,
"learning_rate": 8.426586345381526e-06,
"loss": 0.2182,
"step": 20100
},
{
"epoch": 0.1616,
"grad_norm": 8.284675598144531,
"learning_rate": 8.41855421686747e-06,
"loss": 0.2382,
"step": 20200
},
{
"epoch": 0.1624,
"grad_norm": 5.969610214233398,
"learning_rate": 8.410522088353414e-06,
"loss": 0.2372,
"step": 20300
},
{
"epoch": 0.1632,
"grad_norm": 12.841833114624023,
"learning_rate": 8.402489959839358e-06,
"loss": 0.2389,
"step": 20400
},
{
"epoch": 0.164,
"grad_norm": 6.656698703765869,
"learning_rate": 8.394457831325302e-06,
"loss": 0.2368,
"step": 20500
},
{
"epoch": 0.1648,
"grad_norm": 5.6654276847839355,
"learning_rate": 8.386425702811246e-06,
"loss": 0.2122,
"step": 20600
},
{
"epoch": 0.1656,
"grad_norm": 4.837275505065918,
"learning_rate": 8.378393574297188e-06,
"loss": 0.2088,
"step": 20700
},
{
"epoch": 0.1664,
"grad_norm": 7.151382923126221,
"learning_rate": 8.370361445783132e-06,
"loss": 0.2328,
"step": 20800
},
{
"epoch": 0.1672,
"grad_norm": 10.351961135864258,
"learning_rate": 8.362329317269076e-06,
"loss": 0.2217,
"step": 20900
},
{
"epoch": 0.168,
"grad_norm": 4.848665714263916,
"learning_rate": 8.35429718875502e-06,
"loss": 0.2372,
"step": 21000
},
{
"epoch": 0.1688,
"grad_norm": 4.431462287902832,
"learning_rate": 8.346265060240964e-06,
"loss": 0.2193,
"step": 21100
},
{
"epoch": 0.1696,
"grad_norm": 4.42501163482666,
"learning_rate": 8.338232931726908e-06,
"loss": 0.2198,
"step": 21200
},
{
"epoch": 0.1704,
"grad_norm": 11.573799133300781,
"learning_rate": 8.330200803212852e-06,
"loss": 0.2454,
"step": 21300
},
{
"epoch": 0.1712,
"grad_norm": 5.41486120223999,
"learning_rate": 8.322168674698796e-06,
"loss": 0.2328,
"step": 21400
},
{
"epoch": 0.172,
"grad_norm": 5.6592488288879395,
"learning_rate": 8.31413654618474e-06,
"loss": 0.2388,
"step": 21500
},
{
"epoch": 0.1728,
"grad_norm": 5.244436264038086,
"learning_rate": 8.306104417670683e-06,
"loss": 0.2384,
"step": 21600
},
{
"epoch": 0.1736,
"grad_norm": 4.57738733291626,
"learning_rate": 8.298072289156627e-06,
"loss": 0.2227,
"step": 21700
},
{
"epoch": 0.1744,
"grad_norm": 11.43252944946289,
"learning_rate": 8.290040160642571e-06,
"loss": 0.2265,
"step": 21800
},
{
"epoch": 0.1752,
"grad_norm": 4.118782997131348,
"learning_rate": 8.282008032128515e-06,
"loss": 0.2211,
"step": 21900
},
{
"epoch": 0.176,
"grad_norm": 7.751514434814453,
"learning_rate": 8.273975903614459e-06,
"loss": 0.2359,
"step": 22000
},
{
"epoch": 0.1768,
"grad_norm": 11.36629867553711,
"learning_rate": 8.265943775100403e-06,
"loss": 0.234,
"step": 22100
},
{
"epoch": 0.1776,
"grad_norm": 4.669189453125,
"learning_rate": 8.257911646586347e-06,
"loss": 0.222,
"step": 22200
},
{
"epoch": 0.1784,
"grad_norm": 8.595556259155273,
"learning_rate": 8.249879518072291e-06,
"loss": 0.2188,
"step": 22300
},
{
"epoch": 0.1792,
"grad_norm": 9.70352840423584,
"learning_rate": 8.241847389558235e-06,
"loss": 0.2289,
"step": 22400
},
{
"epoch": 0.18,
"grad_norm": 6.345508575439453,
"learning_rate": 8.233815261044177e-06,
"loss": 0.222,
"step": 22500
},
{
"epoch": 0.1808,
"grad_norm": 4.9803643226623535,
"learning_rate": 8.225783132530121e-06,
"loss": 0.2338,
"step": 22600
},
{
"epoch": 0.1816,
"grad_norm": 9.151689529418945,
"learning_rate": 8.217751004016065e-06,
"loss": 0.2475,
"step": 22700
},
{
"epoch": 0.1824,
"grad_norm": 7.338719844818115,
"learning_rate": 8.209718875502009e-06,
"loss": 0.2194,
"step": 22800
},
{
"epoch": 0.1832,
"grad_norm": 15.189725875854492,
"learning_rate": 8.201686746987953e-06,
"loss": 0.2406,
"step": 22900
},
{
"epoch": 0.184,
"grad_norm": 6.638333320617676,
"learning_rate": 8.193654618473897e-06,
"loss": 0.2365,
"step": 23000
},
{
"epoch": 0.1848,
"grad_norm": 11.742992401123047,
"learning_rate": 8.18562248995984e-06,
"loss": 0.2187,
"step": 23100
},
{
"epoch": 0.1856,
"grad_norm": 13.45174789428711,
"learning_rate": 8.177590361445784e-06,
"loss": 0.215,
"step": 23200
},
{
"epoch": 0.1864,
"grad_norm": 4.851787090301514,
"learning_rate": 8.169558232931728e-06,
"loss": 0.2547,
"step": 23300
},
{
"epoch": 0.1872,
"grad_norm": 15.86487865447998,
"learning_rate": 8.161526104417672e-06,
"loss": 0.205,
"step": 23400
},
{
"epoch": 0.188,
"grad_norm": 7.100121974945068,
"learning_rate": 8.153574297188755e-06,
"loss": 0.2496,
"step": 23500
},
{
"epoch": 0.1888,
"grad_norm": 4.416376113891602,
"learning_rate": 8.145542168674699e-06,
"loss": 0.2205,
"step": 23600
},
{
"epoch": 0.1896,
"grad_norm": 8.244140625,
"learning_rate": 8.137510040160643e-06,
"loss": 0.216,
"step": 23700
},
{
"epoch": 0.1904,
"grad_norm": 7.894223690032959,
"learning_rate": 8.129477911646587e-06,
"loss": 0.2482,
"step": 23800
},
{
"epoch": 0.1912,
"grad_norm": 7.517197608947754,
"learning_rate": 8.121526104417673e-06,
"loss": 0.2349,
"step": 23900
},
{
"epoch": 0.192,
"grad_norm": 18.533597946166992,
"learning_rate": 8.113493975903615e-06,
"loss": 0.2205,
"step": 24000
},
{
"epoch": 0.192,
"eval_test1_cer": 0.08556464261707372,
"eval_test1_cer_norm": 0.06268398684178933,
"eval_test1_loss": 0.22241076827049255,
"eval_test1_runtime": 1196.1164,
"eval_test1_samples_per_second": 2.09,
"eval_test1_steps_per_second": 0.523,
"eval_test1_wer": 0.21058923000670574,
"eval_test1_wer_norm": 0.14939068938309127,
"step": 24000
},
{
"epoch": 0.192,
"eval_test2_cer": 0.13958729980960913,
"eval_test2_cer_norm": 0.1119799736597459,
"eval_test2_loss": 0.3912878632545471,
"eval_test2_runtime": 1198.2977,
"eval_test2_samples_per_second": 2.086,
"eval_test2_steps_per_second": 0.522,
"eval_test2_wer": 0.28147173266193637,
"eval_test2_wer_norm": 0.21352853541141417,
"step": 24000
},
{
"epoch": 0.1928,
"grad_norm": 7.209935665130615,
"learning_rate": 8.1055421686747e-06,
"loss": 0.2215,
"step": 24100
},
{
"epoch": 0.1936,
"grad_norm": 8.64651107788086,
"learning_rate": 8.097590361445784e-06,
"loss": 0.2163,
"step": 24200
},
{
"epoch": 0.1944,
"grad_norm": 11.35953426361084,
"learning_rate": 8.089558232931727e-06,
"loss": 0.2356,
"step": 24300
},
{
"epoch": 0.1952,
"grad_norm": 10.773683547973633,
"learning_rate": 8.081526104417671e-06,
"loss": 0.2359,
"step": 24400
},
{
"epoch": 0.196,
"grad_norm": 7.809534072875977,
"learning_rate": 8.073493975903615e-06,
"loss": 0.2224,
"step": 24500
},
{
"epoch": 0.1968,
"grad_norm": 6.901273727416992,
"learning_rate": 8.06546184738956e-06,
"loss": 0.2193,
"step": 24600
},
{
"epoch": 0.1976,
"grad_norm": 10.301507949829102,
"learning_rate": 8.057429718875503e-06,
"loss": 0.238,
"step": 24700
},
{
"epoch": 0.1984,
"grad_norm": 6.89518404006958,
"learning_rate": 8.049397590361447e-06,
"loss": 0.2358,
"step": 24800
},
{
"epoch": 0.1992,
"grad_norm": 3.8990767002105713,
"learning_rate": 8.041365461847391e-06,
"loss": 0.2192,
"step": 24900
},
{
"epoch": 0.2,
"grad_norm": 6.363491058349609,
"learning_rate": 8.033333333333335e-06,
"loss": 0.2334,
"step": 25000
},
{
"epoch": 0.2008,
"grad_norm": 21.063215255737305,
"learning_rate": 8.025301204819279e-06,
"loss": 0.236,
"step": 25100
},
{
"epoch": 0.2016,
"grad_norm": 7.973589897155762,
"learning_rate": 8.017269076305221e-06,
"loss": 0.237,
"step": 25200
},
{
"epoch": 0.2024,
"grad_norm": 8.546441078186035,
"learning_rate": 8.009236947791165e-06,
"loss": 0.2591,
"step": 25300
},
{
"epoch": 0.2032,
"grad_norm": 14.01245403289795,
"learning_rate": 8.001204819277109e-06,
"loss": 0.2364,
"step": 25400
},
{
"epoch": 0.204,
"grad_norm": 3.486008405685425,
"learning_rate": 7.993172690763053e-06,
"loss": 0.2349,
"step": 25500
},
{
"epoch": 0.2048,
"grad_norm": 8.98222827911377,
"learning_rate": 7.985140562248997e-06,
"loss": 0.2138,
"step": 25600
},
{
"epoch": 0.2056,
"grad_norm": 10.626484870910645,
"learning_rate": 7.97710843373494e-06,
"loss": 0.2229,
"step": 25700
},
{
"epoch": 0.2064,
"grad_norm": 6.444272041320801,
"learning_rate": 7.969076305220885e-06,
"loss": 0.2145,
"step": 25800
},
{
"epoch": 0.2072,
"grad_norm": 6.722916603088379,
"learning_rate": 7.961044176706828e-06,
"loss": 0.2189,
"step": 25900
},
{
"epoch": 0.208,
"grad_norm": 10.45226001739502,
"learning_rate": 7.953012048192772e-06,
"loss": 0.214,
"step": 26000
},
{
"epoch": 0.2088,
"grad_norm": 7.136618614196777,
"learning_rate": 7.944979919678716e-06,
"loss": 0.2147,
"step": 26100
},
{
"epoch": 0.2096,
"grad_norm": 5.5284953117370605,
"learning_rate": 7.937028112449799e-06,
"loss": 0.2236,
"step": 26200
},
{
"epoch": 0.2104,
"grad_norm": 7.089993000030518,
"learning_rate": 7.928995983935743e-06,
"loss": 0.2083,
"step": 26300
},
{
"epoch": 0.2112,
"grad_norm": 9.043608665466309,
"learning_rate": 7.920963855421687e-06,
"loss": 0.2478,
"step": 26400
},
{
"epoch": 0.212,
"grad_norm": 5.048727989196777,
"learning_rate": 7.91293172690763e-06,
"loss": 0.2063,
"step": 26500
},
{
"epoch": 0.2128,
"grad_norm": 9.47496509552002,
"learning_rate": 7.904899598393575e-06,
"loss": 0.2401,
"step": 26600
},
{
"epoch": 0.2136,
"grad_norm": 5.263045787811279,
"learning_rate": 7.896867469879519e-06,
"loss": 0.2373,
"step": 26700
},
{
"epoch": 0.2144,
"grad_norm": 10.800714492797852,
"learning_rate": 7.888835341365462e-06,
"loss": 0.2123,
"step": 26800
},
{
"epoch": 0.2152,
"grad_norm": 5.966376304626465,
"learning_rate": 7.880803212851406e-06,
"loss": 0.2335,
"step": 26900
},
{
"epoch": 0.216,
"grad_norm": 4.061577320098877,
"learning_rate": 7.87277108433735e-06,
"loss": 0.2183,
"step": 27000
},
{
"epoch": 0.2168,
"grad_norm": 10.104215621948242,
"learning_rate": 7.864738955823293e-06,
"loss": 0.1969,
"step": 27100
},
{
"epoch": 0.2176,
"grad_norm": 6.445654392242432,
"learning_rate": 7.856706827309236e-06,
"loss": 0.2144,
"step": 27200
},
{
"epoch": 0.2184,
"grad_norm": 9.509758949279785,
"learning_rate": 7.84867469879518e-06,
"loss": 0.2022,
"step": 27300
},
{
"epoch": 0.2192,
"grad_norm": 11.04575252532959,
"learning_rate": 7.840642570281124e-06,
"loss": 0.2153,
"step": 27400
},
{
"epoch": 0.22,
"grad_norm": 10.30285358428955,
"learning_rate": 7.832610441767068e-06,
"loss": 0.2269,
"step": 27500
},
{
"epoch": 0.2208,
"grad_norm": 9.692445755004883,
"learning_rate": 7.824578313253012e-06,
"loss": 0.2372,
"step": 27600
},
{
"epoch": 0.2216,
"grad_norm": 7.303224563598633,
"learning_rate": 7.816546184738956e-06,
"loss": 0.2229,
"step": 27700
},
{
"epoch": 0.2224,
"grad_norm": 7.364916801452637,
"learning_rate": 7.8085140562249e-06,
"loss": 0.2312,
"step": 27800
},
{
"epoch": 0.2232,
"grad_norm": 8.000985145568848,
"learning_rate": 7.800481927710844e-06,
"loss": 0.2146,
"step": 27900
},
{
"epoch": 0.224,
"grad_norm": 7.1691131591796875,
"learning_rate": 7.792449799196788e-06,
"loss": 0.2328,
"step": 28000
},
{
"epoch": 0.224,
"eval_test1_cer": 0.06472664127268653,
"eval_test1_cer_norm": 0.04759046269839364,
"eval_test1_loss": 0.21469315886497498,
"eval_test1_runtime": 1210.6322,
"eval_test1_samples_per_second": 2.065,
"eval_test1_steps_per_second": 0.516,
"eval_test1_wer": 0.18274585264876528,
"eval_test1_wer_norm": 0.12037172331161052,
"step": 28000
},
{
"epoch": 0.224,
"eval_test2_cer": 0.17353100384514877,
"eval_test2_cer_norm": 0.13871242640223116,
"eval_test2_loss": 0.37395521998405457,
"eval_test2_runtime": 1313.1792,
"eval_test2_samples_per_second": 1.904,
"eval_test2_steps_per_second": 0.476,
"eval_test2_wer": 0.35056649118791483,
"eval_test2_wer_norm": 0.2835205134082054,
"step": 28000
},
{
"epoch": 0.2248,
"grad_norm": 5.209496021270752,
"learning_rate": 7.784417670682732e-06,
"loss": 0.2212,
"step": 28100
},
{
"epoch": 0.2256,
"grad_norm": 6.241885185241699,
"learning_rate": 7.776465863453816e-06,
"loss": 0.2175,
"step": 28200
},
{
"epoch": 0.2264,
"grad_norm": 6.768528461456299,
"learning_rate": 7.76843373493976e-06,
"loss": 0.2111,
"step": 28300
},
{
"epoch": 0.2272,
"grad_norm": 16.620553970336914,
"learning_rate": 7.760401606425704e-06,
"loss": 0.2201,
"step": 28400
},
{
"epoch": 0.228,
"grad_norm": 5.785097122192383,
"learning_rate": 7.752369477911648e-06,
"loss": 0.2195,
"step": 28500
},
{
"epoch": 0.2288,
"grad_norm": 7.3884663581848145,
"learning_rate": 7.744337349397592e-06,
"loss": 0.2107,
"step": 28600
},
{
"epoch": 0.2296,
"grad_norm": 6.965432643890381,
"learning_rate": 7.736305220883534e-06,
"loss": 0.247,
"step": 28700
},
{
"epoch": 0.2304,
"grad_norm": 7.899501323699951,
"learning_rate": 7.728273092369478e-06,
"loss": 0.2315,
"step": 28800
},
{
"epoch": 0.2312,
"grad_norm": 8.336009979248047,
"learning_rate": 7.720240963855422e-06,
"loss": 0.2262,
"step": 28900
},
{
"epoch": 0.232,
"grad_norm": 7.90683126449585,
"learning_rate": 7.712208835341366e-06,
"loss": 0.2322,
"step": 29000
},
{
"epoch": 0.2328,
"grad_norm": 6.586302757263184,
"learning_rate": 7.70417670682731e-06,
"loss": 0.2341,
"step": 29100
},
{
"epoch": 0.2336,
"grad_norm": 6.594479084014893,
"learning_rate": 7.696144578313254e-06,
"loss": 0.2226,
"step": 29200
},
{
"epoch": 0.2344,
"grad_norm": 7.850062370300293,
"learning_rate": 7.688112449799197e-06,
"loss": 0.2094,
"step": 29300
},
{
"epoch": 0.2352,
"grad_norm": 12.644471168518066,
"learning_rate": 7.680080321285141e-06,
"loss": 0.226,
"step": 29400
},
{
"epoch": 0.236,
"grad_norm": 7.289064884185791,
"learning_rate": 7.672048192771085e-06,
"loss": 0.2214,
"step": 29500
},
{
"epoch": 0.2368,
"grad_norm": 10.635286331176758,
"learning_rate": 7.664016064257028e-06,
"loss": 0.2142,
"step": 29600
},
{
"epoch": 0.2376,
"grad_norm": 5.214193344116211,
"learning_rate": 7.655983935742971e-06,
"loss": 0.2218,
"step": 29700
},
{
"epoch": 0.2384,
"grad_norm": 6.800753593444824,
"learning_rate": 7.647951807228915e-06,
"loss": 0.2179,
"step": 29800
},
{
"epoch": 0.2392,
"grad_norm": 8.79254150390625,
"learning_rate": 7.63991967871486e-06,
"loss": 0.2066,
"step": 29900
},
{
"epoch": 0.24,
"grad_norm": 7.172235488891602,
"learning_rate": 7.631887550200803e-06,
"loss": 0.2207,
"step": 30000
},
{
"epoch": 0.2408,
"grad_norm": 5.066342830657959,
"learning_rate": 7.623855421686748e-06,
"loss": 0.2034,
"step": 30100
},
{
"epoch": 0.2416,
"grad_norm": 5.975930690765381,
"learning_rate": 7.615903614457832e-06,
"loss": 0.2136,
"step": 30200
},
{
"epoch": 0.2424,
"grad_norm": 5.854097366333008,
"learning_rate": 7.607871485943775e-06,
"loss": 0.2174,
"step": 30300
},
{
"epoch": 0.2432,
"grad_norm": 6.811086177825928,
"learning_rate": 7.599839357429719e-06,
"loss": 0.2232,
"step": 30400
},
{
"epoch": 0.244,
"grad_norm": 7.538053035736084,
"learning_rate": 7.591807228915663e-06,
"loss": 0.2003,
"step": 30500
},
{
"epoch": 0.2448,
"grad_norm": 5.63380241394043,
"learning_rate": 7.583775100401607e-06,
"loss": 0.22,
"step": 30600
},
{
"epoch": 0.2456,
"grad_norm": 7.308541297912598,
"learning_rate": 7.575742971887551e-06,
"loss": 0.194,
"step": 30700
},
{
"epoch": 0.2464,
"grad_norm": 12.48105239868164,
"learning_rate": 7.567710843373495e-06,
"loss": 0.2254,
"step": 30800
},
{
"epoch": 0.2472,
"grad_norm": 6.443123817443848,
"learning_rate": 7.559678714859439e-06,
"loss": 0.2157,
"step": 30900
},
{
"epoch": 0.248,
"grad_norm": 5.197587013244629,
"learning_rate": 7.551646586345383e-06,
"loss": 0.2095,
"step": 31000
},
{
"epoch": 0.2488,
"grad_norm": 5.799818515777588,
"learning_rate": 7.543614457831327e-06,
"loss": 0.2432,
"step": 31100
},
{
"epoch": 0.2496,
"grad_norm": 6.902918338775635,
"learning_rate": 7.535582329317269e-06,
"loss": 0.2106,
"step": 31200
},
{
"epoch": 0.2504,
"grad_norm": 12.562540054321289,
"learning_rate": 7.527550200803213e-06,
"loss": 0.2241,
"step": 31300
},
{
"epoch": 0.2512,
"grad_norm": 6.103118419647217,
"learning_rate": 7.519518072289157e-06,
"loss": 0.2141,
"step": 31400
},
{
"epoch": 0.252,
"grad_norm": 3.2933740615844727,
"learning_rate": 7.511485943775101e-06,
"loss": 0.2079,
"step": 31500
},
{
"epoch": 0.2528,
"grad_norm": 4.528769493103027,
"learning_rate": 7.503453815261045e-06,
"loss": 0.2072,
"step": 31600
},
{
"epoch": 0.2536,
"grad_norm": 5.217092990875244,
"learning_rate": 7.4954216867469886e-06,
"loss": 0.2165,
"step": 31700
},
{
"epoch": 0.2544,
"grad_norm": 6.123150825500488,
"learning_rate": 7.4873895582329325e-06,
"loss": 0.1907,
"step": 31800
},
{
"epoch": 0.2552,
"grad_norm": 4.618309020996094,
"learning_rate": 7.479357429718876e-06,
"loss": 0.2214,
"step": 31900
},
{
"epoch": 0.256,
"grad_norm": 8.602248191833496,
"learning_rate": 7.47132530120482e-06,
"loss": 0.2075,
"step": 32000
},
{
"epoch": 0.256,
"eval_test1_cer": 0.06202853088356113,
"eval_test1_cer_norm": 0.046020121497346744,
"eval_test1_loss": 0.21270166337490082,
"eval_test1_runtime": 1209.2653,
"eval_test1_samples_per_second": 2.067,
"eval_test1_steps_per_second": 0.517,
"eval_test1_wer": 0.18376629056240706,
"eval_test1_wer_norm": 0.12262193518220871,
"step": 32000
},
{
"epoch": 0.256,
"eval_test2_cer": 0.12037555530667861,
"eval_test2_cer_norm": 0.09361442516268981,
"eval_test2_loss": 0.3685109317302704,
"eval_test2_runtime": 1222.5262,
"eval_test2_samples_per_second": 2.045,
"eval_test2_steps_per_second": 0.511,
"eval_test2_wer": 0.2525177386129549,
"eval_test2_wer_norm": 0.18381847352738942,
"step": 32000
},
{
"epoch": 0.2568,
"grad_norm": 4.757772922515869,
"learning_rate": 7.463293172690763e-06,
"loss": 0.2147,
"step": 32100
},
{
"epoch": 0.2576,
"grad_norm": 2.9504878520965576,
"learning_rate": 7.455261044176707e-06,
"loss": 0.225,
"step": 32200
},
{
"epoch": 0.2584,
"grad_norm": 6.651636600494385,
"learning_rate": 7.447228915662651e-06,
"loss": 0.2233,
"step": 32300
},
{
"epoch": 0.2592,
"grad_norm": 6.254730224609375,
"learning_rate": 7.4392771084337355e-06,
"loss": 0.2067,
"step": 32400
},
{
"epoch": 0.26,
"grad_norm": 8.979650497436523,
"learning_rate": 7.4312449799196795e-06,
"loss": 0.2281,
"step": 32500
},
{
"epoch": 0.2608,
"grad_norm": 4.179977893829346,
"learning_rate": 7.423212851405623e-06,
"loss": 0.1986,
"step": 32600
},
{
"epoch": 0.2616,
"grad_norm": 5.462186336517334,
"learning_rate": 7.415180722891567e-06,
"loss": 0.2104,
"step": 32700
},
{
"epoch": 0.2624,
"grad_norm": 7.806402683258057,
"learning_rate": 7.40714859437751e-06,
"loss": 0.2153,
"step": 32800
},
{
"epoch": 0.2632,
"grad_norm": 5.667836666107178,
"learning_rate": 7.399116465863454e-06,
"loss": 0.1883,
"step": 32900
},
{
"epoch": 0.264,
"grad_norm": 8.295492172241211,
"learning_rate": 7.391084337349398e-06,
"loss": 0.2139,
"step": 33000
},
{
"epoch": 0.2648,
"grad_norm": 10.960628509521484,
"learning_rate": 7.383052208835342e-06,
"loss": 0.2415,
"step": 33100
},
{
"epoch": 0.2656,
"grad_norm": 8.189926147460938,
"learning_rate": 7.375020080321286e-06,
"loss": 0.2168,
"step": 33200
},
{
"epoch": 0.2664,
"grad_norm": 8.110308647155762,
"learning_rate": 7.36698795180723e-06,
"loss": 0.2169,
"step": 33300
},
{
"epoch": 0.2672,
"grad_norm": 7.833503723144531,
"learning_rate": 7.358955823293174e-06,
"loss": 0.2131,
"step": 33400
},
{
"epoch": 0.268,
"grad_norm": 4.76417350769043,
"learning_rate": 7.350923694779117e-06,
"loss": 0.2093,
"step": 33500
},
{
"epoch": 0.2688,
"grad_norm": 6.401252269744873,
"learning_rate": 7.342891566265061e-06,
"loss": 0.2128,
"step": 33600
},
{
"epoch": 0.2696,
"grad_norm": 4.773887634277344,
"learning_rate": 7.334859437751004e-06,
"loss": 0.2042,
"step": 33700
},
{
"epoch": 0.2704,
"grad_norm": 3.329989433288574,
"learning_rate": 7.326827309236948e-06,
"loss": 0.1916,
"step": 33800
},
{
"epoch": 0.2712,
"grad_norm": 5.542182445526123,
"learning_rate": 7.318795180722892e-06,
"loss": 0.2039,
"step": 33900
},
{
"epoch": 0.272,
"grad_norm": 10.034430503845215,
"learning_rate": 7.310763052208836e-06,
"loss": 0.207,
"step": 34000
},
{
"epoch": 0.2728,
"grad_norm": 6.576022624969482,
"learning_rate": 7.30273092369478e-06,
"loss": 0.2136,
"step": 34100
},
{
"epoch": 0.2736,
"grad_norm": 5.687163829803467,
"learning_rate": 7.2946987951807236e-06,
"loss": 0.1901,
"step": 34200
},
{
"epoch": 0.2744,
"grad_norm": 7.63690710067749,
"learning_rate": 7.2866666666666675e-06,
"loss": 0.2094,
"step": 34300
},
{
"epoch": 0.2752,
"grad_norm": 17.66728401184082,
"learning_rate": 7.278714859437752e-06,
"loss": 0.2149,
"step": 34400
},
{
"epoch": 0.276,
"grad_norm": 6.838101863861084,
"learning_rate": 7.270682730923695e-06,
"loss": 0.2181,
"step": 34500
},
{
"epoch": 0.2768,
"grad_norm": 9.850558280944824,
"learning_rate": 7.262650602409639e-06,
"loss": 0.1968,
"step": 34600
},
{
"epoch": 0.2776,
"grad_norm": 5.048129558563232,
"learning_rate": 7.254618473895583e-06,
"loss": 0.2264,
"step": 34700
},
{
"epoch": 0.2784,
"grad_norm": 14.177302360534668,
"learning_rate": 7.246586345381527e-06,
"loss": 0.2079,
"step": 34800
},
{
"epoch": 0.2792,
"grad_norm": 6.149969577789307,
"learning_rate": 7.2385542168674706e-06,
"loss": 0.2301,
"step": 34900
},
{
"epoch": 0.28,
"grad_norm": 5.608641624450684,
"learning_rate": 7.2305220883534145e-06,
"loss": 0.2208,
"step": 35000
},
{
"epoch": 0.2808,
"grad_norm": 6.874965667724609,
"learning_rate": 7.222489959839358e-06,
"loss": 0.208,
"step": 35100
},
{
"epoch": 0.2816,
"grad_norm": 5.777594089508057,
"learning_rate": 7.214457831325302e-06,
"loss": 0.1997,
"step": 35200
},
{
"epoch": 0.2824,
"grad_norm": 2.708245277404785,
"learning_rate": 7.206425702811246e-06,
"loss": 0.2111,
"step": 35300
},
{
"epoch": 0.2832,
"grad_norm": 3.1390602588653564,
"learning_rate": 7.1983935742971885e-06,
"loss": 0.2094,
"step": 35400
},
{
"epoch": 0.284,
"grad_norm": 4.028870105743408,
"learning_rate": 7.190361445783132e-06,
"loss": 0.2246,
"step": 35500
},
{
"epoch": 0.2848,
"grad_norm": 15.425012588500977,
"learning_rate": 7.182329317269076e-06,
"loss": 0.1956,
"step": 35600
},
{
"epoch": 0.2856,
"grad_norm": 5.734979152679443,
"learning_rate": 7.17429718875502e-06,
"loss": 0.2125,
"step": 35700
},
{
"epoch": 0.2864,
"grad_norm": 5.9506916999816895,
"learning_rate": 7.166265060240964e-06,
"loss": 0.205,
"step": 35800
},
{
"epoch": 0.2872,
"grad_norm": 6.654294013977051,
"learning_rate": 7.158232931726908e-06,
"loss": 0.2183,
"step": 35900
},
{
"epoch": 0.288,
"grad_norm": 4.6381754875183105,
"learning_rate": 7.150200803212852e-06,
"loss": 0.2108,
"step": 36000
},
{
"epoch": 0.288,
"eval_test1_cer": 0.05035850324893569,
"eval_test1_cer_norm": 0.034854851489903235,
"eval_test1_loss": 0.2031903713941574,
"eval_test1_runtime": 1180.1384,
"eval_test1_samples_per_second": 2.118,
"eval_test1_steps_per_second": 0.53,
"eval_test1_wer": 0.15638939910784572,
"eval_test1_wer_norm": 0.09538560448873433,
"step": 36000
},
{
"epoch": 0.288,
"eval_test2_cer": 0.15337197147870235,
"eval_test2_cer_norm": 0.11982878834831112,
"eval_test2_loss": 0.36121314764022827,
"eval_test2_runtime": 1277.8248,
"eval_test2_samples_per_second": 1.956,
"eval_test2_steps_per_second": 0.489,
"eval_test2_wer": 0.2875944151979858,
"eval_test2_wer_norm": 0.22060508824203529,
"step": 36000
},
{
"epoch": 0.2888,
"grad_norm": 6.482578277587891,
"learning_rate": 7.142168674698796e-06,
"loss": 0.2319,
"step": 36100
},
{
"epoch": 0.2896,
"grad_norm": 6.14495849609375,
"learning_rate": 7.13413654618474e-06,
"loss": 0.2008,
"step": 36200
},
{
"epoch": 0.2904,
"grad_norm": 7.641204357147217,
"learning_rate": 7.126104417670683e-06,
"loss": 0.2154,
"step": 36300
},
{
"epoch": 0.2912,
"grad_norm": 10.709319114685059,
"learning_rate": 7.118072289156627e-06,
"loss": 0.1996,
"step": 36400
},
{
"epoch": 0.292,
"grad_norm": 9.637004852294922,
"learning_rate": 7.110120481927711e-06,
"loss": 0.214,
"step": 36500
},
{
"epoch": 0.2928,
"grad_norm": 2.7376303672790527,
"learning_rate": 7.102088353413655e-06,
"loss": 0.2064,
"step": 36600
},
{
"epoch": 0.2936,
"grad_norm": 5.24441385269165,
"learning_rate": 7.094056224899599e-06,
"loss": 0.2105,
"step": 36700
},
{
"epoch": 0.2944,
"grad_norm": 2.739924907684326,
"learning_rate": 7.086024096385543e-06,
"loss": 0.231,
"step": 36800
},
{
"epoch": 0.2952,
"grad_norm": 10.428742408752441,
"learning_rate": 7.077991967871487e-06,
"loss": 0.1984,
"step": 36900
},
{
"epoch": 0.296,
"grad_norm": 7.877098560333252,
"learning_rate": 7.06995983935743e-06,
"loss": 0.2054,
"step": 37000
},
{
"epoch": 0.2968,
"grad_norm": 4.027400493621826,
"learning_rate": 7.061927710843374e-06,
"loss": 0.2088,
"step": 37100
},
{
"epoch": 0.2976,
"grad_norm": 2.8548176288604736,
"learning_rate": 7.053895582329318e-06,
"loss": 0.2126,
"step": 37200
},
{
"epoch": 0.2984,
"grad_norm": 8.31721019744873,
"learning_rate": 7.045863453815262e-06,
"loss": 0.2093,
"step": 37300
},
{
"epoch": 0.2992,
"grad_norm": 10.732782363891602,
"learning_rate": 7.0378313253012056e-06,
"loss": 0.2148,
"step": 37400
},
{
"epoch": 0.3,
"grad_norm": 6.584483623504639,
"learning_rate": 7.0297991967871495e-06,
"loss": 0.215,
"step": 37500
},
{
"epoch": 0.3008,
"grad_norm": 5.9521684646606445,
"learning_rate": 7.021767068273093e-06,
"loss": 0.1889,
"step": 37600
},
{
"epoch": 0.3016,
"grad_norm": 4.7626166343688965,
"learning_rate": 7.013734939759037e-06,
"loss": 0.2022,
"step": 37700
},
{
"epoch": 0.3024,
"grad_norm": 4.184934616088867,
"learning_rate": 7.005702811244981e-06,
"loss": 0.2237,
"step": 37800
},
{
"epoch": 0.3032,
"grad_norm": 5.2891998291015625,
"learning_rate": 6.9976706827309235e-06,
"loss": 0.1974,
"step": 37900
},
{
"epoch": 0.304,
"grad_norm": 12.890066146850586,
"learning_rate": 6.989638554216867e-06,
"loss": 0.2317,
"step": 38000
},
{
"epoch": 0.3048,
"grad_norm": 13.514941215515137,
"learning_rate": 6.981606425702811e-06,
"loss": 0.2175,
"step": 38100
},
{
"epoch": 0.3056,
"grad_norm": 5.781461238861084,
"learning_rate": 6.973574297188755e-06,
"loss": 0.1852,
"step": 38200
},
{
"epoch": 0.3064,
"grad_norm": 8.979798316955566,
"learning_rate": 6.965542168674699e-06,
"loss": 0.2156,
"step": 38300
},
{
"epoch": 0.3072,
"grad_norm": 6.392210006713867,
"learning_rate": 6.957510040160643e-06,
"loss": 0.1815,
"step": 38400
},
{
"epoch": 0.308,
"grad_norm": 7.205411434173584,
"learning_rate": 6.949558232931728e-06,
"loss": 0.2074,
"step": 38500
},
{
"epoch": 0.3088,
"grad_norm": 6.6332621574401855,
"learning_rate": 6.9415261044176704e-06,
"loss": 0.2037,
"step": 38600
},
{
"epoch": 0.3096,
"grad_norm": 6.583282470703125,
"learning_rate": 6.933493975903614e-06,
"loss": 0.2161,
"step": 38700
},
{
"epoch": 0.3104,
"grad_norm": 5.363083362579346,
"learning_rate": 6.925461847389558e-06,
"loss": 0.1842,
"step": 38800
},
{
"epoch": 0.3112,
"grad_norm": 11.732734680175781,
"learning_rate": 6.917429718875502e-06,
"loss": 0.1955,
"step": 38900
},
{
"epoch": 0.312,
"grad_norm": 4.4274444580078125,
"learning_rate": 6.909397590361446e-06,
"loss": 0.2065,
"step": 39000
},
{
"epoch": 0.3128,
"grad_norm": 8.530516624450684,
"learning_rate": 6.90136546184739e-06,
"loss": 0.2015,
"step": 39100
},
{
"epoch": 0.3136,
"grad_norm": 4.273055076599121,
"learning_rate": 6.893333333333334e-06,
"loss": 0.1997,
"step": 39200
},
{
"epoch": 0.3144,
"grad_norm": 5.483185291290283,
"learning_rate": 6.885301204819278e-06,
"loss": 0.1987,
"step": 39300
},
{
"epoch": 0.3152,
"grad_norm": 8.107812881469727,
"learning_rate": 6.877269076305222e-06,
"loss": 0.2146,
"step": 39400
},
{
"epoch": 0.316,
"grad_norm": 11.944705963134766,
"learning_rate": 6.869236947791165e-06,
"loss": 0.2243,
"step": 39500
},
{
"epoch": 0.3168,
"grad_norm": 6.820212364196777,
"learning_rate": 6.861204819277109e-06,
"loss": 0.2106,
"step": 39600
},
{
"epoch": 0.3176,
"grad_norm": 16.74178123474121,
"learning_rate": 6.853172690763053e-06,
"loss": 0.208,
"step": 39700
},
{
"epoch": 0.3184,
"grad_norm": 8.6283597946167,
"learning_rate": 6.845140562248997e-06,
"loss": 0.191,
"step": 39800
},
{
"epoch": 0.3192,
"grad_norm": 6.8946661949157715,
"learning_rate": 6.8371084337349406e-06,
"loss": 0.2055,
"step": 39900
},
{
"epoch": 0.32,
"grad_norm": 6.649729251861572,
"learning_rate": 6.8290763052208845e-06,
"loss": 0.2005,
"step": 40000
},
{
"epoch": 0.32,
"eval_test1_cer": 0.04994771827619688,
"eval_test1_cer_norm": 0.03569524815713017,
"eval_test1_loss": 0.20380495488643646,
"eval_test1_runtime": 1179.5149,
"eval_test1_samples_per_second": 2.12,
"eval_test1_steps_per_second": 0.53,
"eval_test1_wer": 0.16318260007580396,
"eval_test1_wer_norm": 0.10292527543177767,
"step": 40000
},
{
"epoch": 0.32,
"eval_test2_cer": 0.10917142643819763,
"eval_test2_cer_norm": 0.08240529129222188,
"eval_test2_loss": 0.36028367280960083,
"eval_test2_runtime": 1209.014,
"eval_test2_samples_per_second": 2.068,
"eval_test2_steps_per_second": 0.517,
"eval_test2_wer": 0.24101625085831999,
"eval_test2_wer_norm": 0.17456451982580792,
"step": 40000
},
{
"epoch": 0.3208,
"grad_norm": 6.898298740386963,
"learning_rate": 6.821044176706828e-06,
"loss": 0.1872,
"step": 40100
},
{
"epoch": 0.3216,
"grad_norm": 5.272704124450684,
"learning_rate": 6.813012048192772e-06,
"loss": 0.207,
"step": 40200
},
{
"epoch": 0.3224,
"grad_norm": 3.6596715450286865,
"learning_rate": 6.804979919678716e-06,
"loss": 0.2143,
"step": 40300
},
{
"epoch": 0.3232,
"grad_norm": 3.9344301223754883,
"learning_rate": 6.7969477911646585e-06,
"loss": 0.1844,
"step": 40400
},
{
"epoch": 0.324,
"grad_norm": 10.56173324584961,
"learning_rate": 6.788915662650602e-06,
"loss": 0.2096,
"step": 40500
},
{
"epoch": 0.3248,
"grad_norm": 4.7323713302612305,
"learning_rate": 6.780883534136546e-06,
"loss": 0.2012,
"step": 40600
},
{
"epoch": 0.3256,
"grad_norm": 13.328306198120117,
"learning_rate": 6.7729317269076315e-06,
"loss": 0.1891,
"step": 40700
},
{
"epoch": 0.3264,
"grad_norm": 6.378972053527832,
"learning_rate": 6.764899598393575e-06,
"loss": 0.2113,
"step": 40800
},
{
"epoch": 0.3272,
"grad_norm": 2.7503671646118164,
"learning_rate": 6.756867469879519e-06,
"loss": 0.2242,
"step": 40900
},
{
"epoch": 0.328,
"grad_norm": 7.0376296043396,
"learning_rate": 6.748835341365463e-06,
"loss": 0.1938,
"step": 41000
},
{
"epoch": 0.3288,
"grad_norm": 7.667542457580566,
"learning_rate": 6.740803212851406e-06,
"loss": 0.2199,
"step": 41100
},
{
"epoch": 0.3296,
"grad_norm": 7.959465026855469,
"learning_rate": 6.732771084337349e-06,
"loss": 0.2046,
"step": 41200
},
{
"epoch": 0.3304,
"grad_norm": 6.901830196380615,
"learning_rate": 6.724738955823293e-06,
"loss": 0.2257,
"step": 41300
},
{
"epoch": 0.3312,
"grad_norm": 3.7971105575561523,
"learning_rate": 6.716706827309237e-06,
"loss": 0.1972,
"step": 41400
},
{
"epoch": 0.332,
"grad_norm": 7.321595191955566,
"learning_rate": 6.708674698795181e-06,
"loss": 0.1896,
"step": 41500
},
{
"epoch": 0.3328,
"grad_norm": 4.369442462921143,
"learning_rate": 6.700642570281125e-06,
"loss": 0.2164,
"step": 41600
},
{
"epoch": 0.3336,
"grad_norm": 6.851342678070068,
"learning_rate": 6.692610441767069e-06,
"loss": 0.2097,
"step": 41700
},
{
"epoch": 0.3344,
"grad_norm": 7.6141228675842285,
"learning_rate": 6.684578313253013e-06,
"loss": 0.2214,
"step": 41800
},
{
"epoch": 0.3352,
"grad_norm": 2.4399728775024414,
"learning_rate": 6.676546184738957e-06,
"loss": 0.2117,
"step": 41900
},
{
"epoch": 0.336,
"grad_norm": 4.702713966369629,
"learning_rate": 6.668514056224901e-06,
"loss": 0.2122,
"step": 42000
},
{
"epoch": 0.3368,
"grad_norm": 13.61172866821289,
"learning_rate": 6.660481927710844e-06,
"loss": 0.2114,
"step": 42100
},
{
"epoch": 0.3376,
"grad_norm": 8.276487350463867,
"learning_rate": 6.652449799196788e-06,
"loss": 0.2041,
"step": 42200
},
{
"epoch": 0.3384,
"grad_norm": 7.3196539878845215,
"learning_rate": 6.644417670682732e-06,
"loss": 0.2107,
"step": 42300
},
{
"epoch": 0.3392,
"grad_norm": 6.650379657745361,
"learning_rate": 6.6363855421686756e-06,
"loss": 0.2114,
"step": 42400
},
{
"epoch": 0.34,
"grad_norm": 9.550190925598145,
"learning_rate": 6.628353413654619e-06,
"loss": 0.1889,
"step": 42500
},
{
"epoch": 0.3408,
"grad_norm": 4.141082286834717,
"learning_rate": 6.6203212851405626e-06,
"loss": 0.2006,
"step": 42600
},
{
"epoch": 0.3416,
"grad_norm": 8.406717300415039,
"learning_rate": 6.6122891566265065e-06,
"loss": 0.2016,
"step": 42700
},
{
"epoch": 0.3424,
"grad_norm": 11.38437557220459,
"learning_rate": 6.604337349397591e-06,
"loss": 0.2128,
"step": 42800
},
{
"epoch": 0.3432,
"grad_norm": 4.217543125152588,
"learning_rate": 6.596305220883535e-06,
"loss": 0.2126,
"step": 42900
},
{
"epoch": 0.344,
"grad_norm": 7.4914703369140625,
"learning_rate": 6.588273092369479e-06,
"loss": 0.1981,
"step": 43000
},
{
"epoch": 0.3448,
"grad_norm": 9.509758949279785,
"learning_rate": 6.580240963855422e-06,
"loss": 0.2041,
"step": 43100
},
{
"epoch": 0.3456,
"grad_norm": 6.551538467407227,
"learning_rate": 6.572208835341366e-06,
"loss": 0.2226,
"step": 43200
},
{
"epoch": 0.3464,
"grad_norm": 6.811914443969727,
"learning_rate": 6.5641767068273095e-06,
"loss": 0.2015,
"step": 43300
},
{
"epoch": 0.3472,
"grad_norm": 4.834348201751709,
"learning_rate": 6.5561445783132535e-06,
"loss": 0.2036,
"step": 43400
},
{
"epoch": 0.348,
"grad_norm": 10.421252250671387,
"learning_rate": 6.548112449799197e-06,
"loss": 0.2204,
"step": 43500
},
{
"epoch": 0.3488,
"grad_norm": 7.1061882972717285,
"learning_rate": 6.540080321285141e-06,
"loss": 0.1971,
"step": 43600
},
{
"epoch": 0.3496,
"grad_norm": 3.9507927894592285,
"learning_rate": 6.532048192771084e-06,
"loss": 0.217,
"step": 43700
},
{
"epoch": 0.3504,
"grad_norm": 6.255137920379639,
"learning_rate": 6.524016064257028e-06,
"loss": 0.199,
"step": 43800
},
{
"epoch": 0.3512,
"grad_norm": 6.308269500732422,
"learning_rate": 6.515983935742972e-06,
"loss": 0.2024,
"step": 43900
},
{
"epoch": 0.352,
"grad_norm": 2.7685184478759766,
"learning_rate": 6.507951807228916e-06,
"loss": 0.2076,
"step": 44000
},
{
"epoch": 0.352,
"eval_test1_cer": 0.05585275225931735,
"eval_test1_cer_norm": 0.04021898336014599,
"eval_test1_loss": 0.19787286221981049,
"eval_test1_runtime": 1201.2365,
"eval_test1_samples_per_second": 2.081,
"eval_test1_steps_per_second": 0.52,
"eval_test1_wer": 0.17079215137467565,
"eval_test1_wer_norm": 0.10894532277389754,
"step": 44000
},
{
"epoch": 0.352,
"eval_test2_cer": 0.11820565946167917,
"eval_test2_cer_norm": 0.0912660753021382,
"eval_test2_loss": 0.3588273823261261,
"eval_test2_runtime": 1231.6351,
"eval_test2_samples_per_second": 2.03,
"eval_test2_steps_per_second": 0.507,
"eval_test2_wer": 0.252431906614786,
"eval_test2_wer_norm": 0.18519367407746964,
"step": 44000
},
{
"epoch": 0.3528,
"grad_norm": 10.823402404785156,
"learning_rate": 6.49991967871486e-06,
"loss": 0.1976,
"step": 44100
},
{
"epoch": 0.3536,
"grad_norm": 3.3830673694610596,
"learning_rate": 6.491887550200804e-06,
"loss": 0.1952,
"step": 44200
},
{
"epoch": 0.3544,
"grad_norm": 8.530442237854004,
"learning_rate": 6.483855421686748e-06,
"loss": 0.2122,
"step": 44300
},
{
"epoch": 0.3552,
"grad_norm": 4.178443908691406,
"learning_rate": 6.475823293172692e-06,
"loss": 0.2014,
"step": 44400
},
{
"epoch": 0.356,
"grad_norm": 13.146326065063477,
"learning_rate": 6.467791164658636e-06,
"loss": 0.179,
"step": 44500
},
{
"epoch": 0.3568,
"grad_norm": 9.41238021850586,
"learning_rate": 6.459759036144578e-06,
"loss": 0.2135,
"step": 44600
},
{
"epoch": 0.3576,
"grad_norm": 4.255426406860352,
"learning_rate": 6.451726907630522e-06,
"loss": 0.1957,
"step": 44700
},
{
"epoch": 0.3584,
"grad_norm": 6.541229248046875,
"learning_rate": 6.443775100401607e-06,
"loss": 0.2,
"step": 44800
},
{
"epoch": 0.3592,
"grad_norm": 4.5669636726379395,
"learning_rate": 6.435742971887551e-06,
"loss": 0.187,
"step": 44900
},
{
"epoch": 0.36,
"grad_norm": 6.742759704589844,
"learning_rate": 6.427710843373495e-06,
"loss": 0.1961,
"step": 45000
},
{
"epoch": 0.3608,
"grad_norm": 6.676318168640137,
"learning_rate": 6.419678714859439e-06,
"loss": 0.2023,
"step": 45100
},
{
"epoch": 0.3616,
"grad_norm": 5.580842018127441,
"learning_rate": 6.411646586345383e-06,
"loss": 0.2257,
"step": 45200
},
{
"epoch": 0.3624,
"grad_norm": 12.327486038208008,
"learning_rate": 6.403614457831325e-06,
"loss": 0.1922,
"step": 45300
},
{
"epoch": 0.3632,
"grad_norm": 3.540501117706299,
"learning_rate": 6.395582329317269e-06,
"loss": 0.1914,
"step": 45400
},
{
"epoch": 0.364,
"grad_norm": 5.7018351554870605,
"learning_rate": 6.387550200803213e-06,
"loss": 0.1921,
"step": 45500
},
{
"epoch": 0.3648,
"grad_norm": 5.11850118637085,
"learning_rate": 6.379518072289157e-06,
"loss": 0.1847,
"step": 45600
},
{
"epoch": 0.3656,
"grad_norm": 9.415154457092285,
"learning_rate": 6.371485943775101e-06,
"loss": 0.1992,
"step": 45700
},
{
"epoch": 0.3664,
"grad_norm": 5.810830116271973,
"learning_rate": 6.3634538152610445e-06,
"loss": 0.2,
"step": 45800
},
{
"epoch": 0.3672,
"grad_norm": 6.580953598022461,
"learning_rate": 6.3554216867469885e-06,
"loss": 0.1853,
"step": 45900
},
{
"epoch": 0.368,
"grad_norm": 5.693816184997559,
"learning_rate": 6.347389558232932e-06,
"loss": 0.1935,
"step": 46000
},
{
"epoch": 0.3688,
"grad_norm": 3.0731186866760254,
"learning_rate": 6.339357429718876e-06,
"loss": 0.1779,
"step": 46100
},
{
"epoch": 0.3696,
"grad_norm": 7.017910003662109,
"learning_rate": 6.331325301204819e-06,
"loss": 0.189,
"step": 46200
},
{
"epoch": 0.3704,
"grad_norm": 13.572850227355957,
"learning_rate": 6.323293172690763e-06,
"loss": 0.1949,
"step": 46300
},
{
"epoch": 0.3712,
"grad_norm": 4.851452827453613,
"learning_rate": 6.315261044176707e-06,
"loss": 0.1865,
"step": 46400
},
{
"epoch": 0.372,
"grad_norm": 3.416046619415283,
"learning_rate": 6.307228915662651e-06,
"loss": 0.2003,
"step": 46500
},
{
"epoch": 0.3728,
"grad_norm": 7.885288715362549,
"learning_rate": 6.299196787148595e-06,
"loss": 0.2126,
"step": 46600
},
{
"epoch": 0.3736,
"grad_norm": 3.072671890258789,
"learning_rate": 6.291164658634539e-06,
"loss": 0.2006,
"step": 46700
},
{
"epoch": 0.3744,
"grad_norm": 3.8454980850219727,
"learning_rate": 6.283132530120483e-06,
"loss": 0.1911,
"step": 46800
},
{
"epoch": 0.3752,
"grad_norm": 1.3022770881652832,
"learning_rate": 6.275100401606427e-06,
"loss": 0.1904,
"step": 46900
},
{
"epoch": 0.376,
"grad_norm": 9.75496768951416,
"learning_rate": 6.267068273092371e-06,
"loss": 0.1902,
"step": 47000
},
{
"epoch": 0.3768,
"grad_norm": 5.822065353393555,
"learning_rate": 6.259116465863454e-06,
"loss": 0.2156,
"step": 47100
},
{
"epoch": 0.3776,
"grad_norm": 11.050053596496582,
"learning_rate": 6.251084337349398e-06,
"loss": 0.2068,
"step": 47200
},
{
"epoch": 0.3784,
"grad_norm": 11.641470909118652,
"learning_rate": 6.243052208835342e-06,
"loss": 0.1784,
"step": 47300
},
{
"epoch": 0.3792,
"grad_norm": 6.159931182861328,
"learning_rate": 6.235020080321286e-06,
"loss": 0.1869,
"step": 47400
},
{
"epoch": 0.38,
"grad_norm": 5.114069938659668,
"learning_rate": 6.22698795180723e-06,
"loss": 0.1897,
"step": 47500
},
{
"epoch": 0.3808,
"grad_norm": 4.50574254989624,
"learning_rate": 6.218955823293174e-06,
"loss": 0.1936,
"step": 47600
},
{
"epoch": 0.3816,
"grad_norm": 8.144811630249023,
"learning_rate": 6.210923694779118e-06,
"loss": 0.2113,
"step": 47700
},
{
"epoch": 0.3824,
"grad_norm": 4.306349754333496,
"learning_rate": 6.202891566265062e-06,
"loss": 0.1951,
"step": 47800
},
{
"epoch": 0.3832,
"grad_norm": 6.88608455657959,
"learning_rate": 6.194859437751004e-06,
"loss": 0.2107,
"step": 47900
},
{
"epoch": 0.384,
"grad_norm": 10.85550594329834,
"learning_rate": 6.186827309236948e-06,
"loss": 0.2117,
"step": 48000
},
{
"epoch": 0.384,
"eval_test1_cer": 0.05253379640002988,
"eval_test1_cer_norm": 0.035930559223953704,
"eval_test1_loss": 0.1944696605205536,
"eval_test1_runtime": 2131.2533,
"eval_test1_samples_per_second": 1.173,
"eval_test1_steps_per_second": 0.293,
"eval_test1_wer": 0.15726406017668154,
"eval_test1_wer_norm": 0.09462579268827259,
"step": 48000
},
{
"epoch": 0.384,
"eval_test2_cer": 0.11918094598125957,
"eval_test2_cer_norm": 0.08796870158041524,
"eval_test2_loss": 0.3547360897064209,
"eval_test2_runtime": 2218.4059,
"eval_test2_samples_per_second": 1.127,
"eval_test2_steps_per_second": 0.282,
"eval_test2_wer": 0.2637045090409705,
"eval_test2_wer_norm": 0.1967396286958515,
"step": 48000
},
{
"epoch": 0.3848,
"grad_norm": 8.570365905761719,
"learning_rate": 6.178795180722892e-06,
"loss": 0.2176,
"step": 48100
},
{
"epoch": 0.3856,
"grad_norm": 6.4876604080200195,
"learning_rate": 6.170763052208836e-06,
"loss": 0.2062,
"step": 48200
},
{
"epoch": 0.3864,
"grad_norm": 4.121545314788818,
"learning_rate": 6.1627309236947796e-06,
"loss": 0.1855,
"step": 48300
},
{
"epoch": 0.3872,
"grad_norm": 8.148366928100586,
"learning_rate": 6.1546987951807235e-06,
"loss": 0.1953,
"step": 48400
},
{
"epoch": 0.388,
"grad_norm": 5.750129222869873,
"learning_rate": 6.146666666666667e-06,
"loss": 0.1947,
"step": 48500
},
{
"epoch": 0.3888,
"grad_norm": 7.019141674041748,
"learning_rate": 6.138634538152611e-06,
"loss": 0.1868,
"step": 48600
},
{
"epoch": 0.3896,
"grad_norm": 4.4510273933410645,
"learning_rate": 6.130602409638555e-06,
"loss": 0.2102,
"step": 48700
},
{
"epoch": 0.3904,
"grad_norm": 7.094069480895996,
"learning_rate": 6.122570281124498e-06,
"loss": 0.1899,
"step": 48800
},
{
"epoch": 0.3912,
"grad_norm": 6.22620153427124,
"learning_rate": 6.114538152610442e-06,
"loss": 0.2206,
"step": 48900
},
{
"epoch": 0.392,
"grad_norm": 7.25572395324707,
"learning_rate": 6.106506024096386e-06,
"loss": 0.1914,
"step": 49000
},
{
"epoch": 0.3928,
"grad_norm": 5.544373512268066,
"learning_rate": 6.0985542168674705e-06,
"loss": 0.1877,
"step": 49100
},
{
"epoch": 1.000792,
"grad_norm": 5.080130577087402,
"learning_rate": 6.090522088353414e-06,
"loss": 0.1799,
"step": 49200
},
{
"epoch": 1.001592,
"grad_norm": 2.341817617416382,
"learning_rate": 6.082489959839358e-06,
"loss": 0.1597,
"step": 49300
},
{
"epoch": 1.002392,
"grad_norm": 3.7877089977264404,
"learning_rate": 6.074457831325302e-06,
"loss": 0.1463,
"step": 49400
},
{
"epoch": 1.003192,
"grad_norm": 2.4026944637298584,
"learning_rate": 6.066425702811245e-06,
"loss": 0.1515,
"step": 49500
},
{
"epoch": 1.003992,
"grad_norm": 4.575283527374268,
"learning_rate": 6.058393574297189e-06,
"loss": 0.1579,
"step": 49600
},
{
"epoch": 1.004792,
"grad_norm": 5.175034999847412,
"learning_rate": 6.050361445783133e-06,
"loss": 0.1313,
"step": 49700
},
{
"epoch": 1.005592,
"grad_norm": 4.901139259338379,
"learning_rate": 6.042329317269077e-06,
"loss": 0.1501,
"step": 49800
},
{
"epoch": 1.006392,
"grad_norm": 3.304086685180664,
"learning_rate": 6.034297188755021e-06,
"loss": 0.1295,
"step": 49900
},
{
"epoch": 1.007192,
"grad_norm": 3.6357131004333496,
"learning_rate": 6.026265060240965e-06,
"loss": 0.127,
"step": 50000
},
{
"epoch": 1.007992,
"grad_norm": 23.502784729003906,
"learning_rate": 6.018232931726908e-06,
"loss": 0.1354,
"step": 50100
},
{
"epoch": 1.008792,
"grad_norm": 2.340665817260742,
"learning_rate": 6.010200803212852e-06,
"loss": 0.1405,
"step": 50200
},
{
"epoch": 1.009592,
"grad_norm": 4.528487205505371,
"learning_rate": 6.002168674698796e-06,
"loss": 0.1299,
"step": 50300
},
{
"epoch": 1.010392,
"grad_norm": 3.589071035385132,
"learning_rate": 5.994136546184739e-06,
"loss": 0.1478,
"step": 50400
},
{
"epoch": 1.011192,
"grad_norm": 11.359587669372559,
"learning_rate": 5.986104417670683e-06,
"loss": 0.133,
"step": 50500
},
{
"epoch": 1.011992,
"grad_norm": 2.8059144020080566,
"learning_rate": 5.978072289156627e-06,
"loss": 0.1366,
"step": 50600
},
{
"epoch": 1.012792,
"grad_norm": 8.648773193359375,
"learning_rate": 5.970040160642571e-06,
"loss": 0.1499,
"step": 50700
},
{
"epoch": 1.013592,
"grad_norm": 7.918379306793213,
"learning_rate": 5.9620080321285146e-06,
"loss": 0.1493,
"step": 50800
},
{
"epoch": 1.014392,
"grad_norm": 5.396472454071045,
"learning_rate": 5.9539759036144585e-06,
"loss": 0.1433,
"step": 50900
},
{
"epoch": 1.015192,
"grad_norm": 6.0125412940979,
"learning_rate": 5.945943775100402e-06,
"loss": 0.1216,
"step": 51000
},
{
"epoch": 1.015992,
"grad_norm": 5.78593111038208,
"learning_rate": 5.937911646586346e-06,
"loss": 0.1524,
"step": 51100
},
{
"epoch": 1.016792,
"grad_norm": 2.646674156188965,
"learning_rate": 5.92995983935743e-06,
"loss": 0.1303,
"step": 51200
},
{
"epoch": 1.017592,
"grad_norm": 4.267711639404297,
"learning_rate": 5.921927710843374e-06,
"loss": 0.1298,
"step": 51300
},
{
"epoch": 1.018392,
"grad_norm": 6.027831554412842,
"learning_rate": 5.913895582329318e-06,
"loss": 0.1361,
"step": 51400
},
{
"epoch": 1.019192,
"grad_norm": 7.927816390991211,
"learning_rate": 5.9058634538152615e-06,
"loss": 0.1341,
"step": 51500
},
{
"epoch": 1.019992,
"grad_norm": 9.55642318725586,
"learning_rate": 5.8978313253012055e-06,
"loss": 0.1351,
"step": 51600
},
{
"epoch": 1.020792,
"grad_norm": 5.5017409324646,
"learning_rate": 5.889799196787149e-06,
"loss": 0.1325,
"step": 51700
},
{
"epoch": 1.021592,
"grad_norm": 5.213791370391846,
"learning_rate": 5.881767068273093e-06,
"loss": 0.1418,
"step": 51800
},
{
"epoch": 1.022392,
"grad_norm": 2.497223138809204,
"learning_rate": 5.873734939759037e-06,
"loss": 0.1407,
"step": 51900
},
{
"epoch": 1.023192,
"grad_norm": 3.251509428024292,
"learning_rate": 5.8657028112449794e-06,
"loss": 0.133,
"step": 52000
},
{
"epoch": 1.023192,
"eval_test1_cer": 0.04808518186571066,
"eval_test1_cer_norm": 0.032487334021658223,
"eval_test1_loss": 0.19697345793247223,
"eval_test1_runtime": 1190.0899,
"eval_test1_samples_per_second": 2.101,
"eval_test1_steps_per_second": 0.525,
"eval_test1_wer": 0.1493629551881979,
"eval_test1_wer_norm": 0.08699845115286829,
"step": 52000
},
{
"epoch": 1.023192,
"eval_test2_cer": 0.11403385970806734,
"eval_test2_cer_norm": 0.08345115432290053,
"eval_test2_loss": 0.36047160625457764,
"eval_test2_runtime": 1226.7275,
"eval_test2_samples_per_second": 2.038,
"eval_test2_steps_per_second": 0.509,
"eval_test2_wer": 0.24490730144197756,
"eval_test2_wer_norm": 0.17745817098326838,
"step": 52000
},
{
"epoch": 1.023992,
"grad_norm": 7.025960922241211,
"learning_rate": 5.857670682730923e-06,
"loss": 0.1494,
"step": 52100
},
{
"epoch": 1.024792,
"grad_norm": 4.487267971038818,
"learning_rate": 5.849638554216867e-06,
"loss": 0.1403,
"step": 52200
},
{
"epoch": 1.025592,
"grad_norm": 4.388223648071289,
"learning_rate": 5.841606425702811e-06,
"loss": 0.1172,
"step": 52300
},
{
"epoch": 1.026392,
"grad_norm": 6.941164493560791,
"learning_rate": 5.833574297188755e-06,
"loss": 0.1218,
"step": 52400
},
{
"epoch": 1.027192,
"grad_norm": 6.419257640838623,
"learning_rate": 5.825542168674699e-06,
"loss": 0.1317,
"step": 52500
},
{
"epoch": 1.027992,
"grad_norm": 6.4062323570251465,
"learning_rate": 5.817510040160643e-06,
"loss": 0.1345,
"step": 52600
},
{
"epoch": 1.028792,
"grad_norm": 8.374984741210938,
"learning_rate": 5.809477911646587e-06,
"loss": 0.126,
"step": 52700
},
{
"epoch": 1.029592,
"grad_norm": 9.091133117675781,
"learning_rate": 5.801445783132531e-06,
"loss": 0.1247,
"step": 52800
},
{
"epoch": 1.030392,
"grad_norm": 6.033024311065674,
"learning_rate": 5.793413654618474e-06,
"loss": 0.1273,
"step": 52900
},
{
"epoch": 1.031192,
"grad_norm": 4.625008583068848,
"learning_rate": 5.785381526104418e-06,
"loss": 0.1448,
"step": 53000
},
{
"epoch": 1.031992,
"grad_norm": 3.7176895141601562,
"learning_rate": 5.777349397590362e-06,
"loss": 0.1105,
"step": 53100
},
{
"epoch": 1.032792,
"grad_norm": 4.577524185180664,
"learning_rate": 5.769317269076306e-06,
"loss": 0.1568,
"step": 53200
},
{
"epoch": 1.033592,
"grad_norm": 3.0853545665740967,
"learning_rate": 5.7612851405622496e-06,
"loss": 0.1369,
"step": 53300
},
{
"epoch": 1.034392,
"grad_norm": 5.356490135192871,
"learning_rate": 5.7532530120481935e-06,
"loss": 0.1373,
"step": 53400
},
{
"epoch": 1.035192,
"grad_norm": 7.52249813079834,
"learning_rate": 5.745220883534137e-06,
"loss": 0.1256,
"step": 53500
},
{
"epoch": 1.035992,
"grad_norm": 6.246498107910156,
"learning_rate": 5.737188755020081e-06,
"loss": 0.1473,
"step": 53600
},
{
"epoch": 1.036792,
"grad_norm": 6.8541693687438965,
"learning_rate": 5.729236947791165e-06,
"loss": 0.1281,
"step": 53700
},
{
"epoch": 1.037592,
"grad_norm": 2.0510551929473877,
"learning_rate": 5.721204819277109e-06,
"loss": 0.1243,
"step": 53800
},
{
"epoch": 1.038392,
"grad_norm": 6.91987419128418,
"learning_rate": 5.713172690763053e-06,
"loss": 0.1199,
"step": 53900
},
{
"epoch": 1.039192,
"grad_norm": 3.939608097076416,
"learning_rate": 5.7051405622489965e-06,
"loss": 0.1334,
"step": 54000
},
{
"epoch": 1.039992,
"grad_norm": 8.798985481262207,
"learning_rate": 5.6971084337349405e-06,
"loss": 0.1119,
"step": 54100
},
{
"epoch": 1.040792,
"grad_norm": 6.9459123611450195,
"learning_rate": 5.689076305220884e-06,
"loss": 0.1324,
"step": 54200
},
{
"epoch": 1.041592,
"grad_norm": 7.139726161956787,
"learning_rate": 5.681044176706828e-06,
"loss": 0.1342,
"step": 54300
},
{
"epoch": 1.042392,
"grad_norm": 1.988398790359497,
"learning_rate": 5.673012048192772e-06,
"loss": 0.123,
"step": 54400
},
{
"epoch": 1.043192,
"grad_norm": 1.8150029182434082,
"learning_rate": 5.664979919678716e-06,
"loss": 0.1376,
"step": 54500
},
{
"epoch": 1.043992,
"grad_norm": 8.445022583007812,
"learning_rate": 5.656947791164658e-06,
"loss": 0.1393,
"step": 54600
},
{
"epoch": 1.044792,
"grad_norm": 6.788310527801514,
"learning_rate": 5.648915662650602e-06,
"loss": 0.1412,
"step": 54700
},
{
"epoch": 1.045592,
"grad_norm": 6.29990816116333,
"learning_rate": 5.640883534136546e-06,
"loss": 0.1312,
"step": 54800
},
{
"epoch": 1.046392,
"grad_norm": 1.2310799360275269,
"learning_rate": 5.63285140562249e-06,
"loss": 0.1331,
"step": 54900
},
{
"epoch": 1.047192,
"grad_norm": 5.38347053527832,
"learning_rate": 5.624819277108434e-06,
"loss": 0.1295,
"step": 55000
},
{
"epoch": 1.047992,
"grad_norm": 3.4652884006500244,
"learning_rate": 5.616787148594378e-06,
"loss": 0.1215,
"step": 55100
},
{
"epoch": 1.048792,
"grad_norm": 7.579988479614258,
"learning_rate": 5.608755020080322e-06,
"loss": 0.1388,
"step": 55200
},
{
"epoch": 1.049592,
"grad_norm": 0.8664081692695618,
"learning_rate": 5.600722891566266e-06,
"loss": 0.1199,
"step": 55300
},
{
"epoch": 1.050392,
"grad_norm": 6.584039211273193,
"learning_rate": 5.59269076305221e-06,
"loss": 0.123,
"step": 55400
},
{
"epoch": 1.051192,
"grad_norm": 4.955043792724609,
"learning_rate": 5.584658634538153e-06,
"loss": 0.1223,
"step": 55500
},
{
"epoch": 1.051992,
"grad_norm": 2.857651710510254,
"learning_rate": 5.576626506024097e-06,
"loss": 0.1334,
"step": 55600
},
{
"epoch": 1.052792,
"grad_norm": 6.896506309509277,
"learning_rate": 5.568594377510041e-06,
"loss": 0.1377,
"step": 55700
},
{
"epoch": 1.053592,
"grad_norm": 7.342093467712402,
"learning_rate": 5.5605622489959846e-06,
"loss": 0.1272,
"step": 55800
},
{
"epoch": 1.054392,
"grad_norm": 4.082319259643555,
"learning_rate": 5.552610441767069e-06,
"loss": 0.1355,
"step": 55900
},
{
"epoch": 1.055192,
"grad_norm": 5.364993572235107,
"learning_rate": 5.544578313253013e-06,
"loss": 0.1305,
"step": 56000
},
{
"epoch": 1.055192,
"eval_test1_cer": 0.05301460153857644,
"eval_test1_cer_norm": 0.03525824189017216,
"eval_test1_loss": 0.19662749767303467,
"eval_test1_runtime": 1188.1388,
"eval_test1_samples_per_second": 2.104,
"eval_test1_steps_per_second": 0.526,
"eval_test1_wer": 0.15286159946354122,
"eval_test1_wer_norm": 0.09231713375610041,
"step": 56000
},
{
"epoch": 1.055192,
"eval_test2_cer": 0.1047943032067794,
"eval_test2_cer_norm": 0.08095754570808801,
"eval_test2_loss": 0.36192458868026733,
"eval_test2_runtime": 1219.4681,
"eval_test2_samples_per_second": 2.05,
"eval_test2_steps_per_second": 0.513,
"eval_test2_wer": 0.22871366445410848,
"eval_test2_wer_norm": 0.1619012147604859,
"step": 56000
},
{
"epoch": 1.055992,
"grad_norm": 5.108110427856445,
"learning_rate": 5.536546184738957e-06,
"loss": 0.1304,
"step": 56100
},
{
"epoch": 1.056792,
"grad_norm": 1.9956285953521729,
"learning_rate": 5.5285140562249e-06,
"loss": 0.1325,
"step": 56200
},
{
"epoch": 1.057592,
"grad_norm": 7.1683220863342285,
"learning_rate": 5.520481927710844e-06,
"loss": 0.1315,
"step": 56300
},
{
"epoch": 1.058392,
"grad_norm": 6.5139546394348145,
"learning_rate": 5.512449799196788e-06,
"loss": 0.1287,
"step": 56400
},
{
"epoch": 1.059192,
"grad_norm": 6.996273040771484,
"learning_rate": 5.5044176706827315e-06,
"loss": 0.1273,
"step": 56500
},
{
"epoch": 1.059992,
"grad_norm": 1.6335968971252441,
"learning_rate": 5.4963855421686755e-06,
"loss": 0.1157,
"step": 56600
},
{
"epoch": 1.060792,
"grad_norm": 4.1844892501831055,
"learning_rate": 5.488353413654619e-06,
"loss": 0.1316,
"step": 56700
},
{
"epoch": 1.061592,
"grad_norm": 8.634051322937012,
"learning_rate": 5.480321285140563e-06,
"loss": 0.1083,
"step": 56800
},
{
"epoch": 1.062392,
"grad_norm": 8.461755752563477,
"learning_rate": 5.472289156626507e-06,
"loss": 0.1307,
"step": 56900
},
{
"epoch": 1.063192,
"grad_norm": 5.679012298583984,
"learning_rate": 5.464257028112451e-06,
"loss": 0.133,
"step": 57000
},
{
"epoch": 1.063992,
"grad_norm": 5.279029846191406,
"learning_rate": 5.456224899598393e-06,
"loss": 0.1203,
"step": 57100
},
{
"epoch": 1.064792,
"grad_norm": 6.086945533752441,
"learning_rate": 5.448192771084337e-06,
"loss": 0.1371,
"step": 57200
},
{
"epoch": 1.065592,
"grad_norm": 4.381275177001953,
"learning_rate": 5.440160642570281e-06,
"loss": 0.134,
"step": 57300
},
{
"epoch": 1.066392,
"grad_norm": 3.3568952083587646,
"learning_rate": 5.432128514056225e-06,
"loss": 0.1294,
"step": 57400
},
{
"epoch": 1.067192,
"grad_norm": 5.998335838317871,
"learning_rate": 5.424096385542169e-06,
"loss": 0.1074,
"step": 57500
},
{
"epoch": 1.067992,
"grad_norm": 5.995476245880127,
"learning_rate": 5.416064257028113e-06,
"loss": 0.1129,
"step": 57600
},
{
"epoch": 1.068792,
"grad_norm": 7.281689167022705,
"learning_rate": 5.408112449799197e-06,
"loss": 0.1342,
"step": 57700
},
{
"epoch": 1.069592,
"grad_norm": 3.4488370418548584,
"learning_rate": 5.40008032128514e-06,
"loss": 0.1126,
"step": 57800
},
{
"epoch": 1.070392,
"grad_norm": 6.784258842468262,
"learning_rate": 5.392048192771084e-06,
"loss": 0.129,
"step": 57900
},
{
"epoch": 1.071192,
"grad_norm": 3.7626733779907227,
"learning_rate": 5.384016064257028e-06,
"loss": 0.1323,
"step": 58000
},
{
"epoch": 1.071992,
"grad_norm": 6.320082187652588,
"learning_rate": 5.375983935742972e-06,
"loss": 0.1275,
"step": 58100
},
{
"epoch": 1.072792,
"grad_norm": 10.221648216247559,
"learning_rate": 5.367951807228916e-06,
"loss": 0.1229,
"step": 58200
},
{
"epoch": 1.073592,
"grad_norm": 3.2260794639587402,
"learning_rate": 5.35991967871486e-06,
"loss": 0.1237,
"step": 58300
},
{
"epoch": 1.074392,
"grad_norm": 7.026370525360107,
"learning_rate": 5.351887550200804e-06,
"loss": 0.1351,
"step": 58400
},
{
"epoch": 1.075192,
"grad_norm": 3.7613117694854736,
"learning_rate": 5.343855421686748e-06,
"loss": 0.1246,
"step": 58500
},
{
"epoch": 1.075992,
"grad_norm": 19.284780502319336,
"learning_rate": 5.335823293172692e-06,
"loss": 0.1211,
"step": 58600
},
{
"epoch": 1.076792,
"grad_norm": 2.5568318367004395,
"learning_rate": 5.327791164658635e-06,
"loss": 0.1386,
"step": 58700
},
{
"epoch": 1.077592,
"grad_norm": 8.899717330932617,
"learning_rate": 5.319759036144579e-06,
"loss": 0.1297,
"step": 58800
},
{
"epoch": 1.078392,
"grad_norm": 7.388535022735596,
"learning_rate": 5.311726907630523e-06,
"loss": 0.1218,
"step": 58900
},
{
"epoch": 1.079192,
"grad_norm": 7.612710952758789,
"learning_rate": 5.3036947791164666e-06,
"loss": 0.1342,
"step": 59000
},
{
"epoch": 1.079992,
"grad_norm": 6.390445232391357,
"learning_rate": 5.29566265060241e-06,
"loss": 0.138,
"step": 59100
},
{
"epoch": 1.080792,
"grad_norm": 2.2183034420013428,
"learning_rate": 5.2876305220883535e-06,
"loss": 0.1212,
"step": 59200
},
{
"epoch": 1.081592,
"grad_norm": 4.985264778137207,
"learning_rate": 5.2795983935742975e-06,
"loss": 0.1187,
"step": 59300
},
{
"epoch": 1.082392,
"grad_norm": 10.64930534362793,
"learning_rate": 5.271566265060241e-06,
"loss": 0.1109,
"step": 59400
},
{
"epoch": 1.083192,
"grad_norm": 6.063853740692139,
"learning_rate": 5.263534136546185e-06,
"loss": 0.1136,
"step": 59500
},
{
"epoch": 1.083992,
"grad_norm": 6.947314739227295,
"learning_rate": 5.255502008032128e-06,
"loss": 0.1289,
"step": 59600
},
{
"epoch": 1.084792,
"grad_norm": 2.270883560180664,
"learning_rate": 5.247469879518072e-06,
"loss": 0.1298,
"step": 59700
},
{
"epoch": 1.0855920000000001,
"grad_norm": 0.7966949939727783,
"learning_rate": 5.239437751004016e-06,
"loss": 0.1272,
"step": 59800
},
{
"epoch": 1.086392,
"grad_norm": 2.719560384750366,
"learning_rate": 5.23140562248996e-06,
"loss": 0.0987,
"step": 59900
},
{
"epoch": 1.087192,
"grad_norm": 6.565618515014648,
"learning_rate": 5.223373493975904e-06,
"loss": 0.1165,
"step": 60000
},
{
"epoch": 1.087192,
"eval_test1_cer": 0.05625886922100232,
"eval_test1_cer_norm": 0.0387686988258458,
"eval_test1_loss": 0.199602872133255,
"eval_test1_runtime": 1190.6217,
"eval_test1_samples_per_second": 2.1,
"eval_test1_steps_per_second": 0.525,
"eval_test1_wer": 0.1555438934079711,
"eval_test1_wer_norm": 0.09591162804290014,
"step": 60000
},
{
"epoch": 1.087192,
"eval_test2_cer": 0.12617127711203197,
"eval_test2_cer_norm": 0.09629202819956616,
"eval_test2_loss": 0.36100009083747864,
"eval_test2_runtime": 1239.0925,
"eval_test2_samples_per_second": 2.018,
"eval_test2_steps_per_second": 0.504,
"eval_test2_wer": 0.25226024261844815,
"eval_test2_wer_norm": 0.18728512491404997,
"step": 60000
},
{
"epoch": 1.087992,
"grad_norm": 6.482507228851318,
"learning_rate": 5.215341365461848e-06,
"loss": 0.1414,
"step": 60100
},
{
"epoch": 1.088792,
"grad_norm": 6.777385234832764,
"learning_rate": 5.207309236947792e-06,
"loss": 0.1258,
"step": 60200
},
{
"epoch": 1.0895920000000001,
"grad_norm": 6.040877819061279,
"learning_rate": 5.199277108433736e-06,
"loss": 0.1167,
"step": 60300
},
{
"epoch": 1.090392,
"grad_norm": 4.490113258361816,
"learning_rate": 5.19124497991968e-06,
"loss": 0.113,
"step": 60400
},
{
"epoch": 1.091192,
"grad_norm": 7.03712797164917,
"learning_rate": 5.183212851405624e-06,
"loss": 0.1084,
"step": 60500
},
{
"epoch": 1.091992,
"grad_norm": 2.478569984436035,
"learning_rate": 5.175180722891566e-06,
"loss": 0.1162,
"step": 60600
},
{
"epoch": 1.092792,
"grad_norm": 2.2656235694885254,
"learning_rate": 5.16714859437751e-06,
"loss": 0.1138,
"step": 60700
},
{
"epoch": 1.093592,
"grad_norm": 6.424910545349121,
"learning_rate": 5.159116465863454e-06,
"loss": 0.1212,
"step": 60800
},
{
"epoch": 1.094392,
"grad_norm": 4.551820278167725,
"learning_rate": 5.151084337349398e-06,
"loss": 0.1148,
"step": 60900
},
{
"epoch": 1.095192,
"grad_norm": 4.301316738128662,
"learning_rate": 5.1430522088353416e-06,
"loss": 0.1316,
"step": 61000
},
{
"epoch": 1.095992,
"grad_norm": 5.7936530113220215,
"learning_rate": 5.1350200803212855e-06,
"loss": 0.1036,
"step": 61100
},
{
"epoch": 1.096792,
"grad_norm": 7.100841045379639,
"learning_rate": 5.126987951807229e-06,
"loss": 0.1138,
"step": 61200
},
{
"epoch": 1.097592,
"grad_norm": 2.908416986465454,
"learning_rate": 5.118955823293173e-06,
"loss": 0.1315,
"step": 61300
},
{
"epoch": 1.098392,
"grad_norm": 3.4737966060638428,
"learning_rate": 5.110923694779117e-06,
"loss": 0.1333,
"step": 61400
},
{
"epoch": 1.099192,
"grad_norm": 4.395986080169678,
"learning_rate": 5.10289156626506e-06,
"loss": 0.1168,
"step": 61500
},
{
"epoch": 1.099992,
"grad_norm": 5.61540412902832,
"learning_rate": 5.094859437751004e-06,
"loss": 0.1006,
"step": 61600
},
{
"epoch": 1.100792,
"grad_norm": 7.517305850982666,
"learning_rate": 5.0869076305220885e-06,
"loss": 0.1156,
"step": 61700
},
{
"epoch": 1.101592,
"grad_norm": 3.4721179008483887,
"learning_rate": 5.0788755020080325e-06,
"loss": 0.1029,
"step": 61800
},
{
"epoch": 1.102392,
"grad_norm": 8.264998435974121,
"learning_rate": 5.070843373493976e-06,
"loss": 0.1173,
"step": 61900
},
{
"epoch": 1.103192,
"grad_norm": 5.457461833953857,
"learning_rate": 5.06281124497992e-06,
"loss": 0.1189,
"step": 62000
},
{
"epoch": 1.103992,
"grad_norm": 5.589449882507324,
"learning_rate": 5.054779116465864e-06,
"loss": 0.1162,
"step": 62100
},
{
"epoch": 1.104792,
"grad_norm": 6.30127477645874,
"learning_rate": 5.046746987951807e-06,
"loss": 0.1196,
"step": 62200
},
{
"epoch": 1.105592,
"grad_norm": 3.9898123741149902,
"learning_rate": 5.038714859437751e-06,
"loss": 0.1033,
"step": 62300
},
{
"epoch": 1.106392,
"grad_norm": 4.696240425109863,
"learning_rate": 5.030682730923695e-06,
"loss": 0.1028,
"step": 62400
},
{
"epoch": 1.107192,
"grad_norm": 5.9711384773254395,
"learning_rate": 5.022650602409639e-06,
"loss": 0.1151,
"step": 62500
},
{
"epoch": 1.107992,
"grad_norm": 4.918034076690674,
"learning_rate": 5.014618473895583e-06,
"loss": 0.1182,
"step": 62600
},
{
"epoch": 1.108792,
"grad_norm": 2.1706135272979736,
"learning_rate": 5.006586345381527e-06,
"loss": 0.1219,
"step": 62700
},
{
"epoch": 1.109592,
"grad_norm": 3.3897037506103516,
"learning_rate": 4.998554216867471e-06,
"loss": 0.109,
"step": 62800
},
{
"epoch": 1.110392,
"grad_norm": 5.085102558135986,
"learning_rate": 4.990522088353414e-06,
"loss": 0.1182,
"step": 62900
},
{
"epoch": 1.111192,
"grad_norm": 6.255206108093262,
"learning_rate": 4.982489959839358e-06,
"loss": 0.1228,
"step": 63000
},
{
"epoch": 1.111992,
"grad_norm": 5.1899309158325195,
"learning_rate": 4.974457831325302e-06,
"loss": 0.1158,
"step": 63100
},
{
"epoch": 1.112792,
"grad_norm": 7.743951320648193,
"learning_rate": 4.966425702811246e-06,
"loss": 0.1247,
"step": 63200
},
{
"epoch": 1.113592,
"grad_norm": 4.700534343719482,
"learning_rate": 4.958393574297189e-06,
"loss": 0.1152,
"step": 63300
},
{
"epoch": 1.114392,
"grad_norm": 5.209968090057373,
"learning_rate": 4.950361445783133e-06,
"loss": 0.1098,
"step": 63400
},
{
"epoch": 1.115192,
"grad_norm": 6.524351119995117,
"learning_rate": 4.9423293172690766e-06,
"loss": 0.1073,
"step": 63500
},
{
"epoch": 1.115992,
"grad_norm": 8.537145614624023,
"learning_rate": 4.9342971887550205e-06,
"loss": 0.1137,
"step": 63600
},
{
"epoch": 1.116792,
"grad_norm": 8.046852111816406,
"learning_rate": 4.926265060240964e-06,
"loss": 0.1059,
"step": 63700
},
{
"epoch": 1.117592,
"grad_norm": 6.976133346557617,
"learning_rate": 4.9182329317269075e-06,
"loss": 0.1238,
"step": 63800
},
{
"epoch": 1.118392,
"grad_norm": 3.892697334289551,
"learning_rate": 4.910200803212851e-06,
"loss": 0.1257,
"step": 63900
},
{
"epoch": 1.119192,
"grad_norm": 4.94338321685791,
"learning_rate": 4.902168674698795e-06,
"loss": 0.1004,
"step": 64000
},
{
"epoch": 1.119192,
"eval_test1_cer": 0.046203973411009035,
"eval_test1_cer_norm": 0.031037049487358033,
"eval_test1_loss": 0.20126041769981384,
"eval_test1_runtime": 1185.1944,
"eval_test1_samples_per_second": 2.109,
"eval_test1_steps_per_second": 0.527,
"eval_test1_wer": 0.1458934662818158,
"eval_test1_wer_norm": 0.08451445103597417,
"step": 64000
},
{
"epoch": 1.119192,
"eval_test2_cer": 0.09468212192481427,
"eval_test2_cer_norm": 0.07436764022311744,
"eval_test2_loss": 0.36592334508895874,
"eval_test2_runtime": 1203.0799,
"eval_test2_samples_per_second": 2.078,
"eval_test2_steps_per_second": 0.519,
"eval_test2_wer": 0.21420805676356144,
"eval_test2_wer_norm": 0.14769080907632362,
"step": 64000
},
{
"epoch": 1.119992,
"grad_norm": 7.434136867523193,
"learning_rate": 4.894136546184739e-06,
"loss": 0.1235,
"step": 64100
},
{
"epoch": 1.120792,
"grad_norm": 4.429540157318115,
"learning_rate": 4.886104417670683e-06,
"loss": 0.1095,
"step": 64200
},
{
"epoch": 1.121592,
"grad_norm": 3.018216133117676,
"learning_rate": 4.878072289156627e-06,
"loss": 0.1165,
"step": 64300
},
{
"epoch": 1.122392,
"grad_norm": 3.923384189605713,
"learning_rate": 4.870040160642571e-06,
"loss": 0.1174,
"step": 64400
},
{
"epoch": 1.123192,
"grad_norm": 9.313278198242188,
"learning_rate": 4.862008032128515e-06,
"loss": 0.1145,
"step": 64500
},
{
"epoch": 1.123992,
"grad_norm": 4.272242069244385,
"learning_rate": 4.853975903614459e-06,
"loss": 0.1187,
"step": 64600
},
{
"epoch": 1.124792,
"grad_norm": 5.922658920288086,
"learning_rate": 4.845943775100402e-06,
"loss": 0.1142,
"step": 64700
},
{
"epoch": 1.125592,
"grad_norm": 6.251376628875732,
"learning_rate": 4.837911646586346e-06,
"loss": 0.115,
"step": 64800
},
{
"epoch": 1.126392,
"grad_norm": 7.813810348510742,
"learning_rate": 4.82987951807229e-06,
"loss": 0.119,
"step": 64900
},
{
"epoch": 1.127192,
"grad_norm": 2.6300604343414307,
"learning_rate": 4.821847389558234e-06,
"loss": 0.1097,
"step": 65000
},
{
"epoch": 1.1279919999999999,
"grad_norm": 3.9435408115386963,
"learning_rate": 4.813815261044177e-06,
"loss": 0.1368,
"step": 65100
},
{
"epoch": 1.128792,
"grad_norm": 3.7023379802703857,
"learning_rate": 4.805783132530121e-06,
"loss": 0.1216,
"step": 65200
},
{
"epoch": 1.129592,
"grad_norm": 3.5150551795959473,
"learning_rate": 4.797751004016065e-06,
"loss": 0.1188,
"step": 65300
},
{
"epoch": 1.130392,
"grad_norm": 8.510544776916504,
"learning_rate": 4.7897188755020085e-06,
"loss": 0.1071,
"step": 65400
},
{
"epoch": 1.131192,
"grad_norm": 4.728481292724609,
"learning_rate": 4.7816867469879524e-06,
"loss": 0.1149,
"step": 65500
},
{
"epoch": 1.1319919999999999,
"grad_norm": 2.2337489128112793,
"learning_rate": 4.7736546184738955e-06,
"loss": 0.1155,
"step": 65600
},
{
"epoch": 1.132792,
"grad_norm": 8.161867141723633,
"learning_rate": 4.765622489959839e-06,
"loss": 0.115,
"step": 65700
},
{
"epoch": 1.133592,
"grad_norm": 5.036279678344727,
"learning_rate": 4.757590361445783e-06,
"loss": 0.1316,
"step": 65800
},
{
"epoch": 1.134392,
"grad_norm": 9.957592964172363,
"learning_rate": 4.749558232931727e-06,
"loss": 0.1127,
"step": 65900
},
{
"epoch": 1.135192,
"grad_norm": 4.2074809074401855,
"learning_rate": 4.741526104417671e-06,
"loss": 0.1092,
"step": 66000
},
{
"epoch": 1.135992,
"grad_norm": 3.1782147884368896,
"learning_rate": 4.733493975903615e-06,
"loss": 0.111,
"step": 66100
},
{
"epoch": 1.136792,
"grad_norm": 3.371274709701538,
"learning_rate": 4.725461847389559e-06,
"loss": 0.1286,
"step": 66200
},
{
"epoch": 1.137592,
"grad_norm": 3.8433542251586914,
"learning_rate": 4.717429718875502e-06,
"loss": 0.109,
"step": 66300
},
{
"epoch": 1.138392,
"grad_norm": 5.162513256072998,
"learning_rate": 4.709477911646586e-06,
"loss": 0.1066,
"step": 66400
},
{
"epoch": 1.139192,
"grad_norm": 3.1563637256622314,
"learning_rate": 4.70144578313253e-06,
"loss": 0.1252,
"step": 66500
},
{
"epoch": 1.139992,
"grad_norm": 10.928020477294922,
"learning_rate": 4.693413654618474e-06,
"loss": 0.1255,
"step": 66600
},
{
"epoch": 1.140792,
"grad_norm": 12.533303260803223,
"learning_rate": 4.685381526104418e-06,
"loss": 0.1141,
"step": 66700
},
{
"epoch": 1.141592,
"grad_norm": 4.71325159072876,
"learning_rate": 4.677349397590361e-06,
"loss": 0.1074,
"step": 66800
},
{
"epoch": 1.142392,
"grad_norm": 4.4170427322387695,
"learning_rate": 4.669317269076305e-06,
"loss": 0.1081,
"step": 66900
},
{
"epoch": 1.143192,
"grad_norm": 7.2962965965271,
"learning_rate": 4.661285140562249e-06,
"loss": 0.1115,
"step": 67000
},
{
"epoch": 1.143992,
"grad_norm": 6.532419204711914,
"learning_rate": 4.653253012048193e-06,
"loss": 0.1194,
"step": 67100
},
{
"epoch": 1.144792,
"grad_norm": 9.143524169921875,
"learning_rate": 4.645220883534137e-06,
"loss": 0.1148,
"step": 67200
},
{
"epoch": 1.145592,
"grad_norm": 2.3634395599365234,
"learning_rate": 4.637188755020081e-06,
"loss": 0.1322,
"step": 67300
},
{
"epoch": 1.146392,
"grad_norm": 8.534736633300781,
"learning_rate": 4.629156626506025e-06,
"loss": 0.1033,
"step": 67400
},
{
"epoch": 1.147192,
"grad_norm": 7.3824944496154785,
"learning_rate": 4.621124497991969e-06,
"loss": 0.1109,
"step": 67500
},
{
"epoch": 1.147992,
"grad_norm": 3.094473123550415,
"learning_rate": 4.613092369477913e-06,
"loss": 0.1201,
"step": 67600
},
{
"epoch": 1.148792,
"grad_norm": 4.593748569488525,
"learning_rate": 4.605060240963856e-06,
"loss": 0.1096,
"step": 67700
},
{
"epoch": 1.149592,
"grad_norm": 4.944604396820068,
"learning_rate": 4.5970281124498e-06,
"loss": 0.1128,
"step": 67800
},
{
"epoch": 1.150392,
"grad_norm": 6.725574493408203,
"learning_rate": 4.5889959839357435e-06,
"loss": 0.1129,
"step": 67900
},
{
"epoch": 1.151192,
"grad_norm": 8.640876770019531,
"learning_rate": 4.5809638554216874e-06,
"loss": 0.0995,
"step": 68000
},
{
"epoch": 1.151192,
"eval_test1_cer": 0.05561935170662484,
"eval_test1_cer_norm": 0.03838931975892621,
"eval_test1_loss": 0.19823457300662994,
"eval_test1_runtime": 1196.2654,
"eval_test1_samples_per_second": 2.09,
"eval_test1_steps_per_second": 0.522,
"eval_test1_wer": 0.15913000379019795,
"eval_test1_wer_norm": 0.09649609865863994,
"step": 68000
},
{
"epoch": 1.151192,
"eval_test2_cer": 0.09447679844700788,
"eval_test2_cer_norm": 0.07438216609854353,
"eval_test2_loss": 0.36025092005729675,
"eval_test2_runtime": 1407.094,
"eval_test2_samples_per_second": 1.777,
"eval_test2_steps_per_second": 0.444,
"eval_test2_wer": 0.21621080338750287,
"eval_test2_wer_norm": 0.14837840935136373,
"step": 68000
}
],
"logging_steps": 100,
"max_steps": 125000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.7760453484544e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}