whisper-tiny-lv / trainer_state.json
Raivis Dejus
Adding model files
071f60e
raw
history blame
81.3 kB
{
"best_metric": 21.95886308189698,
"best_model_checkpoint": "./whisper-tiny-lv/checkpoint-91000",
"epoch": 41.91616766467066,
"eval_steps": 1000,
"global_step": 91000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"grad_norm": 6.921171188354492,
"learning_rate": 4.9000000000000005e-06,
"loss": 1.8377,
"step": 250
},
{
"epoch": 0.23,
"grad_norm": 6.424713611602783,
"learning_rate": 9.9e-06,
"loss": 0.7472,
"step": 500
},
{
"epoch": 0.35,
"grad_norm": 6.457923889160156,
"learning_rate": 9.988688827331488e-06,
"loss": 0.5694,
"step": 750
},
{
"epoch": 0.46,
"grad_norm": 5.4181013107299805,
"learning_rate": 9.977146814404432e-06,
"loss": 0.481,
"step": 1000
},
{
"epoch": 0.46,
"eval_loss": 0.511210560798645,
"eval_runtime": 658.2418,
"eval_samples_per_second": 10.258,
"eval_steps_per_second": 0.321,
"eval_wer": 51.379495011603225,
"step": 1000
},
{
"epoch": 0.58,
"grad_norm": 5.981955528259277,
"learning_rate": 9.965604801477378e-06,
"loss": 0.4294,
"step": 1250
},
{
"epoch": 0.69,
"grad_norm": 6.043950080871582,
"learning_rate": 9.954062788550324e-06,
"loss": 0.3919,
"step": 1500
},
{
"epoch": 0.81,
"grad_norm": 5.050214767456055,
"learning_rate": 9.94252077562327e-06,
"loss": 0.3599,
"step": 1750
},
{
"epoch": 0.92,
"grad_norm": 5.506693363189697,
"learning_rate": 9.930978762696215e-06,
"loss": 0.3399,
"step": 2000
},
{
"epoch": 0.92,
"eval_loss": 0.39190948009490967,
"eval_runtime": 666.4927,
"eval_samples_per_second": 10.131,
"eval_steps_per_second": 0.317,
"eval_wer": 42.10881250371899,
"step": 2000
},
{
"epoch": 1.04,
"grad_norm": 4.966064929962158,
"learning_rate": 9.91943674976916e-06,
"loss": 0.3048,
"step": 2250
},
{
"epoch": 1.15,
"grad_norm": 4.654673099517822,
"learning_rate": 9.907894736842107e-06,
"loss": 0.2753,
"step": 2500
},
{
"epoch": 1.27,
"grad_norm": 4.946408271789551,
"learning_rate": 9.896352723915051e-06,
"loss": 0.2609,
"step": 2750
},
{
"epoch": 1.38,
"grad_norm": 4.584148406982422,
"learning_rate": 9.884810710987997e-06,
"loss": 0.2539,
"step": 3000
},
{
"epoch": 1.38,
"eval_loss": 0.33731845021247864,
"eval_runtime": 657.5879,
"eval_samples_per_second": 10.268,
"eval_steps_per_second": 0.321,
"eval_wer": 38.419580696987126,
"step": 3000
},
{
"epoch": 1.5,
"grad_norm": 4.824137210845947,
"learning_rate": 9.873268698060943e-06,
"loss": 0.2474,
"step": 3250
},
{
"epoch": 1.61,
"grad_norm": 4.751330852508545,
"learning_rate": 9.861726685133887e-06,
"loss": 0.2374,
"step": 3500
},
{
"epoch": 1.73,
"grad_norm": 5.100882530212402,
"learning_rate": 9.850184672206833e-06,
"loss": 0.2293,
"step": 3750
},
{
"epoch": 1.84,
"grad_norm": 4.561028480529785,
"learning_rate": 9.83864265927978e-06,
"loss": 0.2252,
"step": 4000
},
{
"epoch": 1.84,
"eval_loss": 0.30130764842033386,
"eval_runtime": 668.7558,
"eval_samples_per_second": 10.096,
"eval_steps_per_second": 0.316,
"eval_wer": 35.210345716722536,
"step": 4000
},
{
"epoch": 1.96,
"grad_norm": 4.661614418029785,
"learning_rate": 9.827100646352725e-06,
"loss": 0.2127,
"step": 4250
},
{
"epoch": 2.07,
"grad_norm": 4.2613959312438965,
"learning_rate": 9.81555863342567e-06,
"loss": 0.1886,
"step": 4500
},
{
"epoch": 2.19,
"grad_norm": 4.164435386657715,
"learning_rate": 9.804016620498615e-06,
"loss": 0.1725,
"step": 4750
},
{
"epoch": 2.3,
"grad_norm": 4.097248077392578,
"learning_rate": 9.792474607571561e-06,
"loss": 0.1715,
"step": 5000
},
{
"epoch": 2.3,
"eval_loss": 0.283372163772583,
"eval_runtime": 661.6835,
"eval_samples_per_second": 10.204,
"eval_steps_per_second": 0.319,
"eval_wer": 33.31415990638079,
"step": 5000
},
{
"epoch": 2.42,
"grad_norm": 4.298040866851807,
"learning_rate": 9.780932594644506e-06,
"loss": 0.1657,
"step": 5250
},
{
"epoch": 2.53,
"grad_norm": 4.593989849090576,
"learning_rate": 9.769390581717453e-06,
"loss": 0.1637,
"step": 5500
},
{
"epoch": 2.65,
"grad_norm": 4.4265055656433105,
"learning_rate": 9.757848568790398e-06,
"loss": 0.1619,
"step": 5750
},
{
"epoch": 2.76,
"grad_norm": 3.670001983642578,
"learning_rate": 9.746306555863344e-06,
"loss": 0.1562,
"step": 6000
},
{
"epoch": 2.76,
"eval_loss": 0.2656751573085785,
"eval_runtime": 654.0062,
"eval_samples_per_second": 10.324,
"eval_steps_per_second": 0.323,
"eval_wer": 31.9693754090882,
"step": 6000
},
{
"epoch": 2.88,
"grad_norm": 4.299009799957275,
"learning_rate": 9.73476454293629e-06,
"loss": 0.1547,
"step": 6250
},
{
"epoch": 2.99,
"grad_norm": 4.178277492523193,
"learning_rate": 9.723222530009234e-06,
"loss": 0.1523,
"step": 6500
},
{
"epoch": 3.11,
"grad_norm": 3.7556698322296143,
"learning_rate": 9.71168051708218e-06,
"loss": 0.1205,
"step": 6750
},
{
"epoch": 3.22,
"grad_norm": 4.128946304321289,
"learning_rate": 9.700138504155126e-06,
"loss": 0.1177,
"step": 7000
},
{
"epoch": 3.22,
"eval_loss": 0.25489723682403564,
"eval_runtime": 667.3883,
"eval_samples_per_second": 10.117,
"eval_steps_per_second": 0.316,
"eval_wer": 30.705912688180575,
"step": 7000
},
{
"epoch": 3.34,
"grad_norm": 3.462374448776245,
"learning_rate": 9.68859649122807e-06,
"loss": 0.1173,
"step": 7250
},
{
"epoch": 3.45,
"grad_norm": 4.692279815673828,
"learning_rate": 9.677054478301016e-06,
"loss": 0.117,
"step": 7500
},
{
"epoch": 3.57,
"grad_norm": 3.5876481533050537,
"learning_rate": 9.665512465373962e-06,
"loss": 0.1147,
"step": 7750
},
{
"epoch": 3.68,
"grad_norm": 4.460909843444824,
"learning_rate": 9.653970452446908e-06,
"loss": 0.1149,
"step": 8000
},
{
"epoch": 3.68,
"eval_loss": 0.24221286177635193,
"eval_runtime": 671.3545,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 0.314,
"eval_wer": 29.97004978479481,
"step": 8000
},
{
"epoch": 3.8,
"grad_norm": 3.741210460662842,
"learning_rate": 9.642428439519853e-06,
"loss": 0.1124,
"step": 8250
},
{
"epoch": 3.92,
"grad_norm": 4.417487144470215,
"learning_rate": 9.630886426592799e-06,
"loss": 0.1124,
"step": 8500
},
{
"epoch": 4.03,
"grad_norm": 3.8332269191741943,
"learning_rate": 9.619344413665745e-06,
"loss": 0.1028,
"step": 8750
},
{
"epoch": 4.15,
"grad_norm": 3.3890438079833984,
"learning_rate": 9.607802400738689e-06,
"loss": 0.0834,
"step": 9000
},
{
"epoch": 4.15,
"eval_loss": 0.23551978170871735,
"eval_runtime": 665.3587,
"eval_samples_per_second": 10.148,
"eval_steps_per_second": 0.317,
"eval_wer": 29.347243985163736,
"step": 9000
},
{
"epoch": 4.26,
"grad_norm": 3.3677661418914795,
"learning_rate": 9.596260387811635e-06,
"loss": 0.0835,
"step": 9250
},
{
"epoch": 4.38,
"grad_norm": 3.392284631729126,
"learning_rate": 9.584718374884581e-06,
"loss": 0.0835,
"step": 9500
},
{
"epoch": 4.49,
"grad_norm": 3.0857667922973633,
"learning_rate": 9.573176361957525e-06,
"loss": 0.0813,
"step": 9750
},
{
"epoch": 4.61,
"grad_norm": 3.726276159286499,
"learning_rate": 9.561634349030471e-06,
"loss": 0.0825,
"step": 10000
},
{
"epoch": 4.61,
"eval_loss": 0.229040265083313,
"eval_runtime": 671.1646,
"eval_samples_per_second": 10.06,
"eval_steps_per_second": 0.314,
"eval_wer": 28.81567725172065,
"step": 10000
},
{
"epoch": 4.72,
"grad_norm": 3.28595232963562,
"learning_rate": 9.550092336103417e-06,
"loss": 0.0816,
"step": 10250
},
{
"epoch": 4.84,
"grad_norm": 3.427420139312744,
"learning_rate": 9.538550323176363e-06,
"loss": 0.0814,
"step": 10500
},
{
"epoch": 4.95,
"grad_norm": 3.8041698932647705,
"learning_rate": 9.527008310249308e-06,
"loss": 0.0802,
"step": 10750
},
{
"epoch": 5.07,
"grad_norm": 2.9603447914123535,
"learning_rate": 9.515466297322253e-06,
"loss": 0.0669,
"step": 11000
},
{
"epoch": 5.07,
"eval_loss": 0.22645771503448486,
"eval_runtime": 667.9924,
"eval_samples_per_second": 10.108,
"eval_steps_per_second": 0.316,
"eval_wer": 28.53402622131424,
"step": 11000
},
{
"epoch": 5.18,
"grad_norm": 3.0245766639709473,
"learning_rate": 9.5039242843952e-06,
"loss": 0.057,
"step": 11250
},
{
"epoch": 5.3,
"grad_norm": 2.435096502304077,
"learning_rate": 9.492382271468144e-06,
"loss": 0.0588,
"step": 11500
},
{
"epoch": 5.41,
"grad_norm": 2.9605906009674072,
"learning_rate": 9.480840258541091e-06,
"loss": 0.0583,
"step": 11750
},
{
"epoch": 5.53,
"grad_norm": 2.877732515335083,
"learning_rate": 9.469298245614036e-06,
"loss": 0.0567,
"step": 12000
},
{
"epoch": 5.53,
"eval_loss": 0.2239210605621338,
"eval_runtime": 671.9438,
"eval_samples_per_second": 10.048,
"eval_steps_per_second": 0.314,
"eval_wer": 27.875518178392213,
"step": 12000
},
{
"epoch": 5.64,
"grad_norm": 3.2738921642303467,
"learning_rate": 9.457848568790397e-06,
"loss": 0.0589,
"step": 12250
},
{
"epoch": 5.76,
"grad_norm": 2.727008104324341,
"learning_rate": 9.446306555863343e-06,
"loss": 0.0586,
"step": 12500
},
{
"epoch": 5.87,
"grad_norm": 3.6808159351348877,
"learning_rate": 9.43476454293629e-06,
"loss": 0.0602,
"step": 12750
},
{
"epoch": 5.99,
"grad_norm": 3.2749545574188232,
"learning_rate": 9.423222530009234e-06,
"loss": 0.0589,
"step": 13000
},
{
"epoch": 5.99,
"eval_loss": 0.21996423602104187,
"eval_runtime": 674.0776,
"eval_samples_per_second": 10.017,
"eval_steps_per_second": 0.313,
"eval_wer": 27.879485094313427,
"step": 13000
},
{
"epoch": 6.1,
"grad_norm": 2.8687331676483154,
"learning_rate": 9.41168051708218e-06,
"loss": 0.0406,
"step": 13250
},
{
"epoch": 6.22,
"grad_norm": 2.4594838619232178,
"learning_rate": 9.400138504155126e-06,
"loss": 0.0405,
"step": 13500
},
{
"epoch": 6.33,
"grad_norm": 2.6956489086151123,
"learning_rate": 9.388596491228072e-06,
"loss": 0.0398,
"step": 13750
},
{
"epoch": 6.45,
"grad_norm": 2.6623504161834717,
"learning_rate": 9.377054478301016e-06,
"loss": 0.0415,
"step": 14000
},
{
"epoch": 6.45,
"eval_loss": 0.22312334179878235,
"eval_runtime": 673.796,
"eval_samples_per_second": 10.021,
"eval_steps_per_second": 0.313,
"eval_wer": 26.97701172223655,
"step": 14000
},
{
"epoch": 6.56,
"grad_norm": 3.0151000022888184,
"learning_rate": 9.365512465373962e-06,
"loss": 0.0422,
"step": 14250
},
{
"epoch": 6.68,
"grad_norm": 3.232048749923706,
"learning_rate": 9.353970452446908e-06,
"loss": 0.0412,
"step": 14500
},
{
"epoch": 6.79,
"grad_norm": 2.809514284133911,
"learning_rate": 9.342428439519852e-06,
"loss": 0.0411,
"step": 14750
},
{
"epoch": 6.91,
"grad_norm": 2.0289547443389893,
"learning_rate": 9.3308864265928e-06,
"loss": 0.0423,
"step": 15000
},
{
"epoch": 6.91,
"eval_loss": 0.2184455841779709,
"eval_runtime": 688.9862,
"eval_samples_per_second": 9.8,
"eval_steps_per_second": 0.306,
"eval_wer": 27.084118452109408,
"step": 15000
},
{
"epoch": 7.02,
"grad_norm": 3.098123788833618,
"learning_rate": 9.319344413665744e-06,
"loss": 0.0375,
"step": 15250
},
{
"epoch": 7.14,
"grad_norm": 2.8762073516845703,
"learning_rate": 9.307802400738688e-06,
"loss": 0.0266,
"step": 15500
},
{
"epoch": 7.25,
"grad_norm": 4.211934566497803,
"learning_rate": 9.296260387811636e-06,
"loss": 0.0278,
"step": 15750
},
{
"epoch": 7.37,
"grad_norm": 2.860619306564331,
"learning_rate": 9.28471837488458e-06,
"loss": 0.0281,
"step": 16000
},
{
"epoch": 7.37,
"eval_loss": 0.22083307802677155,
"eval_runtime": 665.4984,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 0.317,
"eval_wer": 27.224943967312615,
"step": 16000
},
{
"epoch": 7.49,
"grad_norm": 2.3288867473602295,
"learning_rate": 9.273176361957526e-06,
"loss": 0.0278,
"step": 16250
},
{
"epoch": 7.6,
"grad_norm": 2.9130966663360596,
"learning_rate": 9.261634349030472e-06,
"loss": 0.0281,
"step": 16500
},
{
"epoch": 7.72,
"grad_norm": 1.9348437786102295,
"learning_rate": 9.250092336103417e-06,
"loss": 0.0294,
"step": 16750
},
{
"epoch": 7.83,
"grad_norm": 3.404127836227417,
"learning_rate": 9.238550323176363e-06,
"loss": 0.0296,
"step": 17000
},
{
"epoch": 7.83,
"eval_loss": 0.2222394496202469,
"eval_runtime": 668.3229,
"eval_samples_per_second": 10.103,
"eval_steps_per_second": 0.316,
"eval_wer": 26.802467421702996,
"step": 17000
},
{
"epoch": 7.95,
"grad_norm": 2.1417627334594727,
"learning_rate": 9.227008310249309e-06,
"loss": 0.0287,
"step": 17250
},
{
"epoch": 8.06,
"grad_norm": 1.783292531967163,
"learning_rate": 9.215512465373963e-06,
"loss": 0.023,
"step": 17500
},
{
"epoch": 8.18,
"grad_norm": 1.875301480293274,
"learning_rate": 9.203970452446908e-06,
"loss": 0.0185,
"step": 17750
},
{
"epoch": 8.29,
"grad_norm": 1.9140523672103882,
"learning_rate": 9.192428439519852e-06,
"loss": 0.0186,
"step": 18000
},
{
"epoch": 8.29,
"eval_loss": 0.2229994833469391,
"eval_runtime": 670.5805,
"eval_samples_per_second": 10.069,
"eval_steps_per_second": 0.315,
"eval_wer": 26.498998353729892,
"step": 18000
},
{
"epoch": 8.41,
"grad_norm": 2.4175968170166016,
"learning_rate": 9.1808864265928e-06,
"loss": 0.0192,
"step": 18250
},
{
"epoch": 8.52,
"grad_norm": 2.8320376873016357,
"learning_rate": 9.169344413665744e-06,
"loss": 0.0198,
"step": 18500
},
{
"epoch": 8.64,
"grad_norm": 2.431974172592163,
"learning_rate": 9.15780240073869e-06,
"loss": 0.0196,
"step": 18750
},
{
"epoch": 8.75,
"grad_norm": 2.0679523944854736,
"learning_rate": 9.146260387811636e-06,
"loss": 0.0201,
"step": 19000
},
{
"epoch": 8.75,
"eval_loss": 0.22365085780620575,
"eval_runtime": 657.293,
"eval_samples_per_second": 10.272,
"eval_steps_per_second": 0.321,
"eval_wer": 26.042803022789933,
"step": 19000
},
{
"epoch": 8.87,
"grad_norm": 2.1873040199279785,
"learning_rate": 9.13471837488458e-06,
"loss": 0.0202,
"step": 19250
},
{
"epoch": 8.98,
"grad_norm": 2.5239417552948,
"learning_rate": 9.123222530009235e-06,
"loss": 0.0201,
"step": 19500
},
{
"epoch": 9.1,
"grad_norm": 2.638354778289795,
"learning_rate": 9.111680517082179e-06,
"loss": 0.0134,
"step": 19750
},
{
"epoch": 9.21,
"grad_norm": 1.6381027698516846,
"learning_rate": 9.100138504155125e-06,
"loss": 0.0127,
"step": 20000
},
{
"epoch": 9.21,
"eval_loss": 0.22688329219818115,
"eval_runtime": 665.6628,
"eval_samples_per_second": 10.143,
"eval_steps_per_second": 0.317,
"eval_wer": 26.134042088977928,
"step": 20000
},
{
"epoch": 9.33,
"grad_norm": 1.1191093921661377,
"learning_rate": 9.088596491228071e-06,
"loss": 0.0133,
"step": 20250
},
{
"epoch": 9.44,
"grad_norm": 2.025820016860962,
"learning_rate": 9.077054478301015e-06,
"loss": 0.0133,
"step": 20500
},
{
"epoch": 9.56,
"grad_norm": 1.148484468460083,
"learning_rate": 9.065512465373963e-06,
"loss": 0.0136,
"step": 20750
},
{
"epoch": 9.67,
"grad_norm": 2.849606513977051,
"learning_rate": 9.053970452446907e-06,
"loss": 0.0135,
"step": 21000
},
{
"epoch": 9.67,
"eval_loss": 0.22943329811096191,
"eval_runtime": 665.6291,
"eval_samples_per_second": 10.144,
"eval_steps_per_second": 0.317,
"eval_wer": 26.330404427078168,
"step": 21000
},
{
"epoch": 9.79,
"grad_norm": 2.421963691711426,
"learning_rate": 9.042428439519853e-06,
"loss": 0.014,
"step": 21250
},
{
"epoch": 9.9,
"grad_norm": 1.5668810606002808,
"learning_rate": 9.0308864265928e-06,
"loss": 0.0148,
"step": 21500
},
{
"epoch": 10.02,
"grad_norm": 0.8170527815818787,
"learning_rate": 9.019390581717452e-06,
"loss": 0.0135,
"step": 21750
},
{
"epoch": 10.13,
"grad_norm": 1.8164241313934326,
"learning_rate": 9.007848568790398e-06,
"loss": 0.0086,
"step": 22000
},
{
"epoch": 10.13,
"eval_loss": 0.23072278499603271,
"eval_runtime": 665.1317,
"eval_samples_per_second": 10.151,
"eval_steps_per_second": 0.317,
"eval_wer": 26.07057143423845,
"step": 22000
},
{
"epoch": 10.25,
"grad_norm": 1.6295300722122192,
"learning_rate": 8.996306555863344e-06,
"loss": 0.0091,
"step": 22250
},
{
"epoch": 10.36,
"grad_norm": 1.5732313394546509,
"learning_rate": 8.984764542936288e-06,
"loss": 0.0093,
"step": 22500
},
{
"epoch": 10.48,
"grad_norm": 1.6755157709121704,
"learning_rate": 8.973222530009234e-06,
"loss": 0.0095,
"step": 22750
},
{
"epoch": 10.59,
"grad_norm": 1.9076685905456543,
"learning_rate": 8.96168051708218e-06,
"loss": 0.0097,
"step": 23000
},
{
"epoch": 10.59,
"eval_loss": 0.2347114235162735,
"eval_runtime": 674.6572,
"eval_samples_per_second": 10.008,
"eval_steps_per_second": 0.313,
"eval_wer": 25.425947597040683,
"step": 23000
},
{
"epoch": 10.71,
"grad_norm": 1.4558827877044678,
"learning_rate": 8.950138504155126e-06,
"loss": 0.0097,
"step": 23250
},
{
"epoch": 10.82,
"grad_norm": 2.6334474086761475,
"learning_rate": 8.93859649122807e-06,
"loss": 0.0103,
"step": 23500
},
{
"epoch": 10.94,
"grad_norm": 1.4682759046554565,
"learning_rate": 8.927100646352724e-06,
"loss": 0.0104,
"step": 23750
},
{
"epoch": 11.05,
"grad_norm": 1.105055332183838,
"learning_rate": 8.915558633425671e-06,
"loss": 0.0082,
"step": 24000
},
{
"epoch": 11.05,
"eval_loss": 0.23529361188411713,
"eval_runtime": 677.9021,
"eval_samples_per_second": 9.96,
"eval_steps_per_second": 0.311,
"eval_wer": 25.18594918380705,
"step": 24000
},
{
"epoch": 11.17,
"grad_norm": 0.9997087717056274,
"learning_rate": 8.904016620498616e-06,
"loss": 0.0062,
"step": 24250
},
{
"epoch": 11.29,
"grad_norm": 1.274902582168579,
"learning_rate": 8.89247460757156e-06,
"loss": 0.0067,
"step": 24500
},
{
"epoch": 11.4,
"grad_norm": 1.1603277921676636,
"learning_rate": 8.880932594644508e-06,
"loss": 0.0068,
"step": 24750
},
{
"epoch": 11.52,
"grad_norm": 1.450378179550171,
"learning_rate": 8.869390581717452e-06,
"loss": 0.0069,
"step": 25000
},
{
"epoch": 11.52,
"eval_loss": 0.23996803164482117,
"eval_runtime": 672.8137,
"eval_samples_per_second": 10.035,
"eval_steps_per_second": 0.314,
"eval_wer": 26.251066108653827,
"step": 25000
},
{
"epoch": 11.63,
"grad_norm": 1.0134578943252563,
"learning_rate": 8.857848568790398e-06,
"loss": 0.0072,
"step": 25250
},
{
"epoch": 11.75,
"grad_norm": 1.8789595365524292,
"learning_rate": 8.846306555863344e-06,
"loss": 0.0072,
"step": 25500
},
{
"epoch": 11.86,
"grad_norm": 0.972827136516571,
"learning_rate": 8.834764542936288e-06,
"loss": 0.0076,
"step": 25750
},
{
"epoch": 11.98,
"grad_norm": 2.2902746200561523,
"learning_rate": 8.823268698060943e-06,
"loss": 0.0075,
"step": 26000
},
{
"epoch": 11.98,
"eval_loss": 0.23599743843078613,
"eval_runtime": 667.2943,
"eval_samples_per_second": 10.118,
"eval_steps_per_second": 0.316,
"eval_wer": 25.400162643552772,
"step": 26000
},
{
"epoch": 12.09,
"grad_norm": 1.1216572523117065,
"learning_rate": 8.811726685133887e-06,
"loss": 0.0054,
"step": 26250
},
{
"epoch": 12.21,
"grad_norm": 1.168253779411316,
"learning_rate": 8.800184672206835e-06,
"loss": 0.0048,
"step": 26500
},
{
"epoch": 12.32,
"grad_norm": 0.6528610587120056,
"learning_rate": 8.788642659279779e-06,
"loss": 0.0049,
"step": 26750
},
{
"epoch": 12.44,
"grad_norm": 1.3029311895370483,
"learning_rate": 8.777100646352723e-06,
"loss": 0.0052,
"step": 27000
},
{
"epoch": 12.44,
"eval_loss": 0.24270391464233398,
"eval_runtime": 668.33,
"eval_samples_per_second": 10.103,
"eval_steps_per_second": 0.316,
"eval_wer": 24.73967114267013,
"step": 27000
},
{
"epoch": 12.55,
"grad_norm": 1.0006558895111084,
"learning_rate": 8.765558633425671e-06,
"loss": 0.0053,
"step": 27250
},
{
"epoch": 12.67,
"grad_norm": 1.4560002088546753,
"learning_rate": 8.754016620498615e-06,
"loss": 0.0052,
"step": 27500
},
{
"epoch": 12.78,
"grad_norm": 1.9307841062545776,
"learning_rate": 8.742474607571561e-06,
"loss": 0.0053,
"step": 27750
},
{
"epoch": 12.9,
"grad_norm": 1.977569580078125,
"learning_rate": 8.730932594644507e-06,
"loss": 0.0056,
"step": 28000
},
{
"epoch": 12.9,
"eval_loss": 0.24133123457431793,
"eval_runtime": 682.6727,
"eval_samples_per_second": 9.891,
"eval_steps_per_second": 0.309,
"eval_wer": 25.247436380585913,
"step": 28000
},
{
"epoch": 13.01,
"grad_norm": 0.9914600849151611,
"learning_rate": 8.719390581717452e-06,
"loss": 0.0056,
"step": 28250
},
{
"epoch": 13.13,
"grad_norm": 0.8464221358299255,
"learning_rate": 8.707848568790398e-06,
"loss": 0.0037,
"step": 28500
},
{
"epoch": 13.24,
"grad_norm": 0.6425495147705078,
"learning_rate": 8.69635272391505e-06,
"loss": 0.0038,
"step": 28750
},
{
"epoch": 13.36,
"grad_norm": 2.0510671138763428,
"learning_rate": 8.684810710987998e-06,
"loss": 0.0041,
"step": 29000
},
{
"epoch": 13.36,
"eval_loss": 0.24362993240356445,
"eval_runtime": 666.1767,
"eval_samples_per_second": 10.135,
"eval_steps_per_second": 0.317,
"eval_wer": 25.074875538012968,
"step": 29000
},
{
"epoch": 13.47,
"grad_norm": 1.125919222831726,
"learning_rate": 8.673268698060943e-06,
"loss": 0.0041,
"step": 29250
},
{
"epoch": 13.59,
"grad_norm": 1.247779369354248,
"learning_rate": 8.661726685133889e-06,
"loss": 0.0044,
"step": 29500
},
{
"epoch": 13.7,
"grad_norm": 1.009018063545227,
"learning_rate": 8.650184672206835e-06,
"loss": 0.0046,
"step": 29750
},
{
"epoch": 13.82,
"grad_norm": 1.848225712776184,
"learning_rate": 8.638642659279779e-06,
"loss": 0.0048,
"step": 30000
},
{
"epoch": 13.82,
"eval_loss": 0.24545399844646454,
"eval_runtime": 667.6447,
"eval_samples_per_second": 10.113,
"eval_steps_per_second": 0.316,
"eval_wer": 25.29900628756174,
"step": 30000
},
{
"epoch": 13.93,
"grad_norm": 0.4666302502155304,
"learning_rate": 8.627100646352725e-06,
"loss": 0.0046,
"step": 30250
},
{
"epoch": 14.05,
"grad_norm": 0.6961706280708313,
"learning_rate": 8.61555863342567e-06,
"loss": 0.004,
"step": 30500
},
{
"epoch": 14.16,
"grad_norm": 0.9278767108917236,
"learning_rate": 8.604016620498615e-06,
"loss": 0.003,
"step": 30750
},
{
"epoch": 14.28,
"grad_norm": 0.9175160527229309,
"learning_rate": 8.59252077562327e-06,
"loss": 0.0033,
"step": 31000
},
{
"epoch": 14.28,
"eval_loss": 0.247360959649086,
"eval_runtime": 674.4213,
"eval_samples_per_second": 10.012,
"eval_steps_per_second": 0.313,
"eval_wer": 25.13041236091001,
"step": 31000
},
{
"epoch": 14.39,
"grad_norm": 0.6542864441871643,
"learning_rate": 8.580978762696216e-06,
"loss": 0.0034,
"step": 31250
},
{
"epoch": 14.51,
"grad_norm": 14.155986785888672,
"learning_rate": 8.56943674976916e-06,
"loss": 0.0037,
"step": 31500
},
{
"epoch": 14.62,
"grad_norm": 0.7710313200950623,
"learning_rate": 8.557940904893815e-06,
"loss": 0.0039,
"step": 31750
},
{
"epoch": 14.74,
"grad_norm": 1.6279186010360718,
"learning_rate": 8.546398891966759e-06,
"loss": 0.0038,
"step": 32000
},
{
"epoch": 14.74,
"eval_loss": 0.25091758370399475,
"eval_runtime": 686.3757,
"eval_samples_per_second": 9.837,
"eval_steps_per_second": 0.307,
"eval_wer": 25.287105539798084,
"step": 32000
},
{
"epoch": 14.85,
"grad_norm": 0.883815586566925,
"learning_rate": 8.534856879039707e-06,
"loss": 0.0038,
"step": 32250
},
{
"epoch": 14.97,
"grad_norm": 0.6397805213928223,
"learning_rate": 8.523314866112651e-06,
"loss": 0.0037,
"step": 32500
},
{
"epoch": 15.09,
"grad_norm": 0.40614956617355347,
"learning_rate": 8.511772853185595e-06,
"loss": 0.003,
"step": 32750
},
{
"epoch": 15.2,
"grad_norm": 0.3507106602191925,
"learning_rate": 8.500230840258543e-06,
"loss": 0.0026,
"step": 33000
},
{
"epoch": 15.2,
"eval_loss": 0.2525634467601776,
"eval_runtime": 668.1892,
"eval_samples_per_second": 10.105,
"eval_steps_per_second": 0.316,
"eval_wer": 25.26925441815261,
"step": 33000
},
{
"epoch": 15.32,
"grad_norm": 0.236125648021698,
"learning_rate": 8.488688827331487e-06,
"loss": 0.0028,
"step": 33250
},
{
"epoch": 15.43,
"grad_norm": 1.2400788068771362,
"learning_rate": 8.477146814404433e-06,
"loss": 0.0028,
"step": 33500
},
{
"epoch": 15.55,
"grad_norm": 1.4743680953979492,
"learning_rate": 8.465604801477379e-06,
"loss": 0.0032,
"step": 33750
},
{
"epoch": 15.66,
"grad_norm": 1.9355671405792236,
"learning_rate": 8.454062788550323e-06,
"loss": 0.0035,
"step": 34000
},
{
"epoch": 15.66,
"eval_loss": 0.2545054256916046,
"eval_runtime": 681.2267,
"eval_samples_per_second": 9.912,
"eval_steps_per_second": 0.31,
"eval_wer": 25.43784834480433,
"step": 34000
},
{
"epoch": 15.78,
"grad_norm": 0.8273574709892273,
"learning_rate": 8.442566943674978e-06,
"loss": 0.0033,
"step": 34250
},
{
"epoch": 15.89,
"grad_norm": 1.760032057762146,
"learning_rate": 8.431024930747922e-06,
"loss": 0.0035,
"step": 34500
},
{
"epoch": 16.01,
"grad_norm": 1.946081280708313,
"learning_rate": 8.419482917820868e-06,
"loss": 0.0033,
"step": 34750
},
{
"epoch": 16.12,
"grad_norm": 0.564914345741272,
"learning_rate": 8.407940904893814e-06,
"loss": 0.0024,
"step": 35000
},
{
"epoch": 16.12,
"eval_loss": 0.2501762807369232,
"eval_runtime": 670.7755,
"eval_samples_per_second": 10.066,
"eval_steps_per_second": 0.315,
"eval_wer": 24.212071325148262,
"step": 35000
},
{
"epoch": 16.24,
"grad_norm": 1.6005988121032715,
"learning_rate": 8.396398891966759e-06,
"loss": 0.0024,
"step": 35250
},
{
"epoch": 16.35,
"grad_norm": 0.8374671339988708,
"learning_rate": 8.384856879039706e-06,
"loss": 0.0028,
"step": 35500
},
{
"epoch": 16.47,
"grad_norm": 1.0652960538864136,
"learning_rate": 8.37331486611265e-06,
"loss": 0.003,
"step": 35750
},
{
"epoch": 16.58,
"grad_norm": 1.4033843278884888,
"learning_rate": 8.361772853185595e-06,
"loss": 0.0027,
"step": 36000
},
{
"epoch": 16.58,
"eval_loss": 0.2562379240989685,
"eval_runtime": 678.952,
"eval_samples_per_second": 9.945,
"eval_steps_per_second": 0.311,
"eval_wer": 24.701985441418568,
"step": 36000
},
{
"epoch": 16.7,
"grad_norm": 0.6179186701774597,
"learning_rate": 8.350230840258543e-06,
"loss": 0.0029,
"step": 36250
},
{
"epoch": 16.81,
"grad_norm": 1.806768774986267,
"learning_rate": 8.338688827331487e-06,
"loss": 0.0031,
"step": 36500
},
{
"epoch": 16.93,
"grad_norm": 0.528777003288269,
"learning_rate": 8.327146814404433e-06,
"loss": 0.0031,
"step": 36750
},
{
"epoch": 17.04,
"grad_norm": 0.18216899037361145,
"learning_rate": 8.315604801477379e-06,
"loss": 0.0029,
"step": 37000
},
{
"epoch": 17.04,
"eval_loss": 0.25483274459838867,
"eval_runtime": 673.8208,
"eval_samples_per_second": 10.02,
"eval_steps_per_second": 0.313,
"eval_wer": 24.735704226748915,
"step": 37000
},
{
"epoch": 17.16,
"grad_norm": 0.3349086046218872,
"learning_rate": 8.304062788550323e-06,
"loss": 0.0019,
"step": 37250
},
{
"epoch": 17.27,
"grad_norm": 1.109415888786316,
"learning_rate": 8.29252077562327e-06,
"loss": 0.0023,
"step": 37500
},
{
"epoch": 17.39,
"grad_norm": 0.5039780735969543,
"learning_rate": 8.280978762696215e-06,
"loss": 0.0024,
"step": 37750
},
{
"epoch": 17.5,
"grad_norm": 1.2728774547576904,
"learning_rate": 8.269436749769161e-06,
"loss": 0.0026,
"step": 38000
},
{
"epoch": 17.5,
"eval_loss": 0.25514695048332214,
"eval_runtime": 676.5647,
"eval_samples_per_second": 9.98,
"eval_steps_per_second": 0.312,
"eval_wer": 24.102981137314792,
"step": 38000
},
{
"epoch": 17.62,
"grad_norm": 0.5640347003936768,
"learning_rate": 8.257894736842105e-06,
"loss": 0.0027,
"step": 38250
},
{
"epoch": 17.73,
"grad_norm": 0.6144846677780151,
"learning_rate": 8.24639889196676e-06,
"loss": 0.0027,
"step": 38500
},
{
"epoch": 17.85,
"grad_norm": 0.42385855317115784,
"learning_rate": 8.234856879039706e-06,
"loss": 0.0027,
"step": 38750
},
{
"epoch": 17.96,
"grad_norm": 0.5083030462265015,
"learning_rate": 8.22331486611265e-06,
"loss": 0.0026,
"step": 39000
},
{
"epoch": 17.96,
"eval_loss": 0.2563398778438568,
"eval_runtime": 693.9966,
"eval_samples_per_second": 9.729,
"eval_steps_per_second": 0.304,
"eval_wer": 24.52942459884563,
"step": 39000
},
{
"epoch": 18.08,
"grad_norm": 0.7999371290206909,
"learning_rate": 8.211772853185596e-06,
"loss": 0.0022,
"step": 39250
},
{
"epoch": 18.19,
"grad_norm": 0.47253143787384033,
"learning_rate": 8.200230840258542e-06,
"loss": 0.0019,
"step": 39500
},
{
"epoch": 18.31,
"grad_norm": 0.9011787176132202,
"learning_rate": 8.188688827331487e-06,
"loss": 0.0019,
"step": 39750
},
{
"epoch": 18.42,
"grad_norm": 1.1234568357467651,
"learning_rate": 8.177146814404433e-06,
"loss": 0.002,
"step": 40000
},
{
"epoch": 18.42,
"eval_loss": 0.2568005323410034,
"eval_runtime": 670.3005,
"eval_samples_per_second": 10.073,
"eval_steps_per_second": 0.315,
"eval_wer": 24.36479758811512,
"step": 40000
},
{
"epoch": 18.54,
"grad_norm": 0.7889108657836914,
"learning_rate": 8.165604801477379e-06,
"loss": 0.002,
"step": 40250
},
{
"epoch": 18.65,
"grad_norm": 1.1920995712280273,
"learning_rate": 8.154062788550325e-06,
"loss": 0.0021,
"step": 40500
},
{
"epoch": 18.77,
"grad_norm": 0.8429755568504333,
"learning_rate": 8.142520775623269e-06,
"loss": 0.0022,
"step": 40750
},
{
"epoch": 18.89,
"grad_norm": 3.1110446453094482,
"learning_rate": 8.131024930747923e-06,
"loss": 0.0028,
"step": 41000
},
{
"epoch": 18.89,
"eval_loss": 0.2559947073459625,
"eval_runtime": 691.8949,
"eval_samples_per_second": 9.759,
"eval_steps_per_second": 0.305,
"eval_wer": 24.575044131939624,
"step": 41000
},
{
"epoch": 19.0,
"grad_norm": 0.3139660656452179,
"learning_rate": 8.11948291782087e-06,
"loss": 0.0026,
"step": 41250
},
{
"epoch": 19.12,
"grad_norm": 0.21102827787399292,
"learning_rate": 8.107940904893814e-06,
"loss": 0.0017,
"step": 41500
},
{
"epoch": 19.23,
"grad_norm": 0.2366773635149002,
"learning_rate": 8.09639889196676e-06,
"loss": 0.0015,
"step": 41750
},
{
"epoch": 19.35,
"grad_norm": 0.5561370253562927,
"learning_rate": 8.084903047091414e-06,
"loss": 0.0019,
"step": 42000
},
{
"epoch": 19.35,
"eval_loss": 0.26248979568481445,
"eval_runtime": 678.0519,
"eval_samples_per_second": 9.958,
"eval_steps_per_second": 0.311,
"eval_wer": 24.596862169506316,
"step": 42000
},
{
"epoch": 19.46,
"grad_norm": 0.6396375894546509,
"learning_rate": 8.073361034164359e-06,
"loss": 0.0019,
"step": 42250
},
{
"epoch": 19.58,
"grad_norm": 0.4240398406982422,
"learning_rate": 8.061819021237305e-06,
"loss": 0.0021,
"step": 42500
},
{
"epoch": 19.69,
"grad_norm": 2.101404905319214,
"learning_rate": 8.05027700831025e-06,
"loss": 0.002,
"step": 42750
},
{
"epoch": 19.81,
"grad_norm": 0.4409444034099579,
"learning_rate": 8.038734995383195e-06,
"loss": 0.0021,
"step": 43000
},
{
"epoch": 19.81,
"eval_loss": 0.26329436898231506,
"eval_runtime": 674.3068,
"eval_samples_per_second": 10.013,
"eval_steps_per_second": 0.313,
"eval_wer": 24.102981137314792,
"step": 43000
},
{
"epoch": 19.92,
"grad_norm": 1.5778237581253052,
"learning_rate": 8.027192982456141e-06,
"loss": 0.0021,
"step": 43250
},
{
"epoch": 20.04,
"grad_norm": 0.34445250034332275,
"learning_rate": 8.015650969529087e-06,
"loss": 0.002,
"step": 43500
},
{
"epoch": 20.15,
"grad_norm": 0.8328190445899963,
"learning_rate": 8.004108956602033e-06,
"loss": 0.0015,
"step": 43750
},
{
"epoch": 20.27,
"grad_norm": 1.4873714447021484,
"learning_rate": 7.992566943674977e-06,
"loss": 0.0015,
"step": 44000
},
{
"epoch": 20.27,
"eval_loss": 0.2650669813156128,
"eval_runtime": 678.5481,
"eval_samples_per_second": 9.951,
"eval_steps_per_second": 0.311,
"eval_wer": 24.23983973659678,
"step": 44000
},
{
"epoch": 20.38,
"grad_norm": 2.7649030685424805,
"learning_rate": 7.981024930747923e-06,
"loss": 0.0016,
"step": 44250
},
{
"epoch": 20.5,
"grad_norm": 0.20013980567455292,
"learning_rate": 7.969529085872578e-06,
"loss": 0.0019,
"step": 44500
},
{
"epoch": 20.61,
"grad_norm": 1.8381603956222534,
"learning_rate": 7.957987072945522e-06,
"loss": 0.0021,
"step": 44750
},
{
"epoch": 20.73,
"grad_norm": 1.9389904737472534,
"learning_rate": 7.946445060018468e-06,
"loss": 0.0018,
"step": 45000
},
{
"epoch": 20.73,
"eval_loss": 0.26352566480636597,
"eval_runtime": 684.4278,
"eval_samples_per_second": 9.865,
"eval_steps_per_second": 0.308,
"eval_wer": 24.100997679354187,
"step": 45000
},
{
"epoch": 20.84,
"grad_norm": 0.1582639217376709,
"learning_rate": 7.934903047091414e-06,
"loss": 0.0022,
"step": 45250
},
{
"epoch": 20.96,
"grad_norm": 1.1058118343353271,
"learning_rate": 7.923361034164358e-06,
"loss": 0.0021,
"step": 45500
},
{
"epoch": 21.07,
"grad_norm": 0.4075948894023895,
"learning_rate": 7.911819021237304e-06,
"loss": 0.0018,
"step": 45750
},
{
"epoch": 21.19,
"grad_norm": 0.894478440284729,
"learning_rate": 7.90027700831025e-06,
"loss": 0.0013,
"step": 46000
},
{
"epoch": 21.19,
"eval_loss": 0.2585604190826416,
"eval_runtime": 674.5545,
"eval_samples_per_second": 10.01,
"eval_steps_per_second": 0.313,
"eval_wer": 23.849098518356904,
"step": 46000
},
{
"epoch": 21.3,
"grad_norm": 0.8086264729499817,
"learning_rate": 7.888734995383196e-06,
"loss": 0.0016,
"step": 46250
},
{
"epoch": 21.42,
"grad_norm": 0.5767725706100464,
"learning_rate": 7.87719298245614e-06,
"loss": 0.0016,
"step": 46500
},
{
"epoch": 21.53,
"grad_norm": 1.622611403465271,
"learning_rate": 7.865650969529087e-06,
"loss": 0.0019,
"step": 46750
},
{
"epoch": 21.65,
"grad_norm": 2.0076467990875244,
"learning_rate": 7.854108956602033e-06,
"loss": 0.0018,
"step": 47000
},
{
"epoch": 21.65,
"eval_loss": 0.2612689435482025,
"eval_runtime": 680.9382,
"eval_samples_per_second": 9.916,
"eval_steps_per_second": 0.31,
"eval_wer": 23.940337584544896,
"step": 47000
},
{
"epoch": 21.76,
"grad_norm": 0.3226953446865082,
"learning_rate": 7.842566943674977e-06,
"loss": 0.0018,
"step": 47250
},
{
"epoch": 21.88,
"grad_norm": 0.5793449878692627,
"learning_rate": 7.831024930747923e-06,
"loss": 0.0019,
"step": 47500
},
{
"epoch": 21.99,
"grad_norm": 0.3840419352054596,
"learning_rate": 7.819482917820869e-06,
"loss": 0.0019,
"step": 47750
},
{
"epoch": 22.11,
"grad_norm": 2.221217155456543,
"learning_rate": 7.807940904893813e-06,
"loss": 0.0014,
"step": 48000
},
{
"epoch": 22.11,
"eval_loss": 0.26184049248695374,
"eval_runtime": 679.8585,
"eval_samples_per_second": 9.931,
"eval_steps_per_second": 0.31,
"eval_wer": 23.591248983477794,
"step": 48000
},
{
"epoch": 22.22,
"grad_norm": 0.1267741620540619,
"learning_rate": 7.79639889196676e-06,
"loss": 0.0013,
"step": 48250
},
{
"epoch": 22.34,
"grad_norm": 0.3871385157108307,
"learning_rate": 7.784856879039705e-06,
"loss": 0.0016,
"step": 48500
},
{
"epoch": 22.46,
"grad_norm": 0.319933146238327,
"learning_rate": 7.773361034164358e-06,
"loss": 0.0016,
"step": 48750
},
{
"epoch": 22.57,
"grad_norm": 0.21517297625541687,
"learning_rate": 7.761819021237304e-06,
"loss": 0.0017,
"step": 49000
},
{
"epoch": 22.57,
"eval_loss": 0.2654561698436737,
"eval_runtime": 676.7053,
"eval_samples_per_second": 9.978,
"eval_steps_per_second": 0.312,
"eval_wer": 23.755875994208303,
"step": 49000
},
{
"epoch": 22.69,
"grad_norm": 0.17343254387378693,
"learning_rate": 7.75027700831025e-06,
"loss": 0.0017,
"step": 49250
},
{
"epoch": 22.8,
"grad_norm": 0.31837859749794006,
"learning_rate": 7.738734995383196e-06,
"loss": 0.0015,
"step": 49500
},
{
"epoch": 22.92,
"grad_norm": 1.0666159391403198,
"learning_rate": 7.72719298245614e-06,
"loss": 0.0015,
"step": 49750
},
{
"epoch": 23.03,
"grad_norm": 0.5933089852333069,
"learning_rate": 7.715650969529086e-06,
"loss": 0.0016,
"step": 50000
},
{
"epoch": 23.03,
"eval_loss": 0.2641240656375885,
"eval_runtime": 701.7818,
"eval_samples_per_second": 9.621,
"eval_steps_per_second": 0.301,
"eval_wer": 23.57141440387171,
"step": 50000
},
{
"epoch": 23.15,
"grad_norm": 4.56223201751709,
"learning_rate": 7.704108956602032e-06,
"loss": 0.0012,
"step": 50250
},
{
"epoch": 23.26,
"grad_norm": 0.13189882040023804,
"learning_rate": 7.692566943674977e-06,
"loss": 0.0013,
"step": 50500
},
{
"epoch": 23.38,
"grad_norm": 0.3501899242401123,
"learning_rate": 7.681024930747923e-06,
"loss": 0.0014,
"step": 50750
},
{
"epoch": 23.49,
"grad_norm": 0.3943934738636017,
"learning_rate": 7.669529085872577e-06,
"loss": 0.0014,
"step": 51000
},
{
"epoch": 23.49,
"eval_loss": 0.26596611738204956,
"eval_runtime": 667.3547,
"eval_samples_per_second": 10.118,
"eval_steps_per_second": 0.316,
"eval_wer": 23.60910010512327,
"step": 51000
},
{
"epoch": 23.61,
"grad_norm": 0.6058038473129272,
"learning_rate": 7.657987072945522e-06,
"loss": 0.0014,
"step": 51250
},
{
"epoch": 23.72,
"grad_norm": 0.18997740745544434,
"learning_rate": 7.646445060018468e-06,
"loss": 0.0018,
"step": 51500
},
{
"epoch": 23.84,
"grad_norm": 0.4755234122276306,
"learning_rate": 7.634903047091414e-06,
"loss": 0.0017,
"step": 51750
},
{
"epoch": 23.95,
"grad_norm": 0.6140190362930298,
"learning_rate": 7.62336103416436e-06,
"loss": 0.0018,
"step": 52000
},
{
"epoch": 23.95,
"eval_loss": 0.2636994421482086,
"eval_runtime": 678.6943,
"eval_samples_per_second": 9.949,
"eval_steps_per_second": 0.311,
"eval_wer": 23.870916555923596,
"step": 52000
},
{
"epoch": 24.07,
"grad_norm": 0.6709560751914978,
"learning_rate": 7.611819021237305e-06,
"loss": 0.0013,
"step": 52250
},
{
"epoch": 24.18,
"grad_norm": 0.40510040521621704,
"learning_rate": 7.60027700831025e-06,
"loss": 0.0011,
"step": 52500
},
{
"epoch": 24.3,
"grad_norm": 0.464121550321579,
"learning_rate": 7.588734995383196e-06,
"loss": 0.0011,
"step": 52750
},
{
"epoch": 24.41,
"grad_norm": 0.415995329618454,
"learning_rate": 7.577192982456141e-06,
"loss": 0.0012,
"step": 53000
},
{
"epoch": 24.41,
"eval_loss": 0.2662787139415741,
"eval_runtime": 679.0891,
"eval_samples_per_second": 9.943,
"eval_steps_per_second": 0.311,
"eval_wer": 23.48017533768372,
"step": 53000
},
{
"epoch": 24.53,
"grad_norm": 0.6342004537582397,
"learning_rate": 7.565650969529087e-06,
"loss": 0.0014,
"step": 53250
},
{
"epoch": 24.64,
"grad_norm": 0.36562052369117737,
"learning_rate": 7.554155124653741e-06,
"loss": 0.0014,
"step": 53500
},
{
"epoch": 24.76,
"grad_norm": 0.47582271695137024,
"learning_rate": 7.542613111726685e-06,
"loss": 0.0015,
"step": 53750
},
{
"epoch": 24.87,
"grad_norm": 0.7419930696487427,
"learning_rate": 7.531071098799632e-06,
"loss": 0.0015,
"step": 54000
},
{
"epoch": 24.87,
"eval_loss": 0.2703973352909088,
"eval_runtime": 665.4932,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 0.317,
"eval_wer": 23.75190907828709,
"step": 54000
},
{
"epoch": 24.99,
"grad_norm": 0.7274812459945679,
"learning_rate": 7.519529085872577e-06,
"loss": 0.0016,
"step": 54250
},
{
"epoch": 25.1,
"grad_norm": 1.2647254467010498,
"learning_rate": 7.507987072945521e-06,
"loss": 0.0012,
"step": 54500
},
{
"epoch": 25.22,
"grad_norm": 0.27594470977783203,
"learning_rate": 7.496445060018468e-06,
"loss": 0.0011,
"step": 54750
},
{
"epoch": 25.33,
"grad_norm": 0.14190466701984406,
"learning_rate": 7.484903047091413e-06,
"loss": 0.0012,
"step": 55000
},
{
"epoch": 25.33,
"eval_loss": 0.2656785249710083,
"eval_runtime": 668.1719,
"eval_samples_per_second": 10.105,
"eval_steps_per_second": 0.316,
"eval_wer": 24.233889362714958,
"step": 55000
},
{
"epoch": 25.45,
"grad_norm": 0.559374213218689,
"learning_rate": 7.473361034164359e-06,
"loss": 0.0013,
"step": 55250
},
{
"epoch": 25.56,
"grad_norm": 3.8242385387420654,
"learning_rate": 7.461865189289012e-06,
"loss": 0.0012,
"step": 55500
},
{
"epoch": 25.68,
"grad_norm": 0.23001307249069214,
"learning_rate": 7.450323176361957e-06,
"loss": 0.0014,
"step": 55750
},
{
"epoch": 25.79,
"grad_norm": 0.45375123620033264,
"learning_rate": 7.438781163434904e-06,
"loss": 0.0013,
"step": 56000
},
{
"epoch": 25.79,
"eval_loss": 0.2668148875236511,
"eval_runtime": 675.9749,
"eval_samples_per_second": 9.989,
"eval_steps_per_second": 0.312,
"eval_wer": 23.287779915504693,
"step": 56000
},
{
"epoch": 25.91,
"grad_norm": 0.11875366419553757,
"learning_rate": 7.427239150507849e-06,
"loss": 0.0014,
"step": 56250
},
{
"epoch": 26.02,
"grad_norm": 0.21367508172988892,
"learning_rate": 7.415697137580795e-06,
"loss": 0.0011,
"step": 56500
},
{
"epoch": 26.14,
"grad_norm": 0.2883310317993164,
"learning_rate": 7.4041551246537405e-06,
"loss": 0.001,
"step": 56750
},
{
"epoch": 26.26,
"grad_norm": 0.4850456118583679,
"learning_rate": 7.392613111726686e-06,
"loss": 0.001,
"step": 57000
},
{
"epoch": 26.26,
"eval_loss": 0.26939988136291504,
"eval_runtime": 668.5911,
"eval_samples_per_second": 10.099,
"eval_steps_per_second": 0.316,
"eval_wer": 23.285796457544084,
"step": 57000
},
{
"epoch": 26.37,
"grad_norm": 0.21186549961566925,
"learning_rate": 7.381071098799632e-06,
"loss": 0.0014,
"step": 57250
},
{
"epoch": 26.49,
"grad_norm": 1.3765850067138672,
"learning_rate": 7.369529085872577e-06,
"loss": 0.0014,
"step": 57500
},
{
"epoch": 26.6,
"grad_norm": 1.716868281364441,
"learning_rate": 7.358033240997231e-06,
"loss": 0.0012,
"step": 57750
},
{
"epoch": 26.72,
"grad_norm": 1.3002432584762573,
"learning_rate": 7.3464912280701765e-06,
"loss": 0.0013,
"step": 58000
},
{
"epoch": 26.72,
"eval_loss": 0.2650892734527588,
"eval_runtime": 675.4132,
"eval_samples_per_second": 9.997,
"eval_steps_per_second": 0.312,
"eval_wer": 23.279846083662257,
"step": 58000
},
{
"epoch": 26.83,
"grad_norm": 0.3222731053829193,
"learning_rate": 7.334949215143121e-06,
"loss": 0.0013,
"step": 58250
},
{
"epoch": 26.95,
"grad_norm": 0.376442015171051,
"learning_rate": 7.323407202216068e-06,
"loss": 0.0013,
"step": 58500
},
{
"epoch": 27.06,
"grad_norm": 0.1525341123342514,
"learning_rate": 7.311865189289013e-06,
"loss": 0.001,
"step": 58750
},
{
"epoch": 27.18,
"grad_norm": 0.3236662745475769,
"learning_rate": 7.300323176361959e-06,
"loss": 0.0009,
"step": 59000
},
{
"epoch": 27.18,
"eval_loss": 0.26985055208206177,
"eval_runtime": 680.6209,
"eval_samples_per_second": 9.92,
"eval_steps_per_second": 0.31,
"eval_wer": 23.258028046095564,
"step": 59000
},
{
"epoch": 27.29,
"grad_norm": 0.12151502072811127,
"learning_rate": 7.288827331486612e-06,
"loss": 0.0012,
"step": 59250
},
{
"epoch": 27.41,
"grad_norm": 1.5825998783111572,
"learning_rate": 7.277285318559557e-06,
"loss": 0.0012,
"step": 59500
},
{
"epoch": 27.52,
"grad_norm": 0.1484094262123108,
"learning_rate": 7.265743305632504e-06,
"loss": 0.0012,
"step": 59750
},
{
"epoch": 27.64,
"grad_norm": 0.6981366872787476,
"learning_rate": 7.254201292705448e-06,
"loss": 0.001,
"step": 60000
},
{
"epoch": 27.64,
"eval_loss": 0.2713184356689453,
"eval_runtime": 670.5479,
"eval_samples_per_second": 10.069,
"eval_steps_per_second": 0.315,
"eval_wer": 23.236210008528868,
"step": 60000
},
{
"epoch": 27.75,
"grad_norm": 0.1501929610967636,
"learning_rate": 7.242659279778393e-06,
"loss": 0.0011,
"step": 60250
},
{
"epoch": 27.87,
"grad_norm": 3.85675048828125,
"learning_rate": 7.23111726685134e-06,
"loss": 0.0014,
"step": 60500
},
{
"epoch": 27.98,
"grad_norm": 0.3616078197956085,
"learning_rate": 7.219575253924285e-06,
"loss": 0.0013,
"step": 60750
},
{
"epoch": 28.1,
"grad_norm": 0.20366336405277252,
"learning_rate": 7.208033240997231e-06,
"loss": 0.0011,
"step": 61000
},
{
"epoch": 28.1,
"eval_loss": 0.2708372175693512,
"eval_runtime": 673.0742,
"eval_samples_per_second": 10.032,
"eval_steps_per_second": 0.313,
"eval_wer": 23.390919729456332,
"step": 61000
},
{
"epoch": 28.21,
"grad_norm": 0.40390634536743164,
"learning_rate": 7.196491228070176e-06,
"loss": 0.001,
"step": 61250
},
{
"epoch": 28.33,
"grad_norm": 1.1840142011642456,
"learning_rate": 7.184949215143121e-06,
"loss": 0.0009,
"step": 61500
},
{
"epoch": 28.44,
"grad_norm": 1.5266140699386597,
"learning_rate": 7.173407202216067e-06,
"loss": 0.0009,
"step": 61750
},
{
"epoch": 28.56,
"grad_norm": 0.1660241335630417,
"learning_rate": 7.1618651892890125e-06,
"loss": 0.0012,
"step": 62000
},
{
"epoch": 28.56,
"eval_loss": 0.27276286482810974,
"eval_runtime": 683.6949,
"eval_samples_per_second": 9.876,
"eval_steps_per_second": 0.309,
"eval_wer": 22.956542436083065,
"step": 62000
},
{
"epoch": 28.67,
"grad_norm": 1.5870364904403687,
"learning_rate": 7.1503231763619585e-06,
"loss": 0.0011,
"step": 62250
},
{
"epoch": 28.79,
"grad_norm": 0.21234387159347534,
"learning_rate": 7.138781163434904e-06,
"loss": 0.0014,
"step": 62500
},
{
"epoch": 28.9,
"grad_norm": 0.25426217913627625,
"learning_rate": 7.127239150507849e-06,
"loss": 0.0012,
"step": 62750
},
{
"epoch": 29.02,
"grad_norm": 0.1948922723531723,
"learning_rate": 7.115697137580795e-06,
"loss": 0.0013,
"step": 63000
},
{
"epoch": 29.02,
"eval_loss": 0.27328047156333923,
"eval_runtime": 662.6991,
"eval_samples_per_second": 10.189,
"eval_steps_per_second": 0.318,
"eval_wer": 22.79389888331317,
"step": 63000
},
{
"epoch": 29.13,
"grad_norm": 0.1855873465538025,
"learning_rate": 7.10415512465374e-06,
"loss": 0.0008,
"step": 63250
},
{
"epoch": 29.25,
"grad_norm": 1.3260753154754639,
"learning_rate": 7.092613111726686e-06,
"loss": 0.0007,
"step": 63500
},
{
"epoch": 29.36,
"grad_norm": 0.13366416096687317,
"learning_rate": 7.08111726685134e-06,
"loss": 0.0009,
"step": 63750
},
{
"epoch": 29.48,
"grad_norm": 0.8121051788330078,
"learning_rate": 7.069575253924285e-06,
"loss": 0.0009,
"step": 64000
},
{
"epoch": 29.48,
"eval_loss": 0.272777795791626,
"eval_runtime": 672.0892,
"eval_samples_per_second": 10.046,
"eval_steps_per_second": 0.314,
"eval_wer": 22.861336453973855,
"step": 64000
},
{
"epoch": 29.59,
"grad_norm": 0.20612682402133942,
"learning_rate": 7.058033240997231e-06,
"loss": 0.0011,
"step": 64250
},
{
"epoch": 29.71,
"grad_norm": 0.37823590636253357,
"learning_rate": 7.046491228070176e-06,
"loss": 0.0011,
"step": 64500
},
{
"epoch": 29.82,
"grad_norm": 2.2586910724639893,
"learning_rate": 7.034949215143122e-06,
"loss": 0.0011,
"step": 64750
},
{
"epoch": 29.94,
"grad_norm": 0.2618952989578247,
"learning_rate": 7.023407202216067e-06,
"loss": 0.001,
"step": 65000
},
{
"epoch": 29.94,
"eval_loss": 0.27243489027023315,
"eval_runtime": 683.4018,
"eval_samples_per_second": 9.88,
"eval_steps_per_second": 0.309,
"eval_wer": 22.87522065969812,
"step": 65000
},
{
"epoch": 30.06,
"grad_norm": 0.1083679348230362,
"learning_rate": 7.011865189289012e-06,
"loss": 0.0009,
"step": 65250
},
{
"epoch": 30.17,
"grad_norm": 0.12795807421207428,
"learning_rate": 7.000323176361958e-06,
"loss": 0.0006,
"step": 65500
},
{
"epoch": 30.29,
"grad_norm": 1.4113242626190186,
"learning_rate": 6.988781163434903e-06,
"loss": 0.0005,
"step": 65750
},
{
"epoch": 30.4,
"grad_norm": 0.4899054765701294,
"learning_rate": 6.977285318559557e-06,
"loss": 0.0009,
"step": 66000
},
{
"epoch": 30.4,
"eval_loss": 0.2714119553565979,
"eval_runtime": 679.0994,
"eval_samples_per_second": 9.943,
"eval_steps_per_second": 0.311,
"eval_wer": 23.03588075450741,
"step": 66000
},
{
"epoch": 30.52,
"grad_norm": 0.13397559523582458,
"learning_rate": 6.965743305632503e-06,
"loss": 0.0008,
"step": 66250
},
{
"epoch": 30.63,
"grad_norm": 0.22414207458496094,
"learning_rate": 6.954201292705448e-06,
"loss": 0.0008,
"step": 66500
},
{
"epoch": 30.75,
"grad_norm": 0.2454010248184204,
"learning_rate": 6.942659279778394e-06,
"loss": 0.0011,
"step": 66750
},
{
"epoch": 30.86,
"grad_norm": 0.5344116687774658,
"learning_rate": 6.9311172668513394e-06,
"loss": 0.0014,
"step": 67000
},
{
"epoch": 30.86,
"eval_loss": 0.27881717681884766,
"eval_runtime": 673.8635,
"eval_samples_per_second": 10.02,
"eval_steps_per_second": 0.313,
"eval_wer": 23.321498700835036,
"step": 67000
},
{
"epoch": 30.98,
"grad_norm": 1.2090712785720825,
"learning_rate": 6.919575253924285e-06,
"loss": 0.0012,
"step": 67250
},
{
"epoch": 31.09,
"grad_norm": 0.17009182274341583,
"learning_rate": 6.908033240997231e-06,
"loss": 0.0009,
"step": 67500
},
{
"epoch": 31.21,
"grad_norm": 0.47179415822029114,
"learning_rate": 6.896491228070176e-06,
"loss": 0.0007,
"step": 67750
},
{
"epoch": 31.32,
"grad_norm": 0.2590140402317047,
"learning_rate": 6.884949215143122e-06,
"loss": 0.0007,
"step": 68000
},
{
"epoch": 31.32,
"eval_loss": 0.278424471616745,
"eval_runtime": 660.7801,
"eval_samples_per_second": 10.218,
"eval_steps_per_second": 0.319,
"eval_wer": 23.246127298331913,
"step": 68000
},
{
"epoch": 31.44,
"grad_norm": 0.6639719009399414,
"learning_rate": 6.873407202216067e-06,
"loss": 0.0009,
"step": 68250
},
{
"epoch": 31.55,
"grad_norm": 0.8088191151618958,
"learning_rate": 6.861911357340721e-06,
"loss": 0.001,
"step": 68500
},
{
"epoch": 31.67,
"grad_norm": 0.9694509506225586,
"learning_rate": 6.850369344413667e-06,
"loss": 0.001,
"step": 68750
},
{
"epoch": 31.78,
"grad_norm": 1.2024418115615845,
"learning_rate": 6.838827331486612e-06,
"loss": 0.0009,
"step": 69000
},
{
"epoch": 31.78,
"eval_loss": 0.27510857582092285,
"eval_runtime": 673.7472,
"eval_samples_per_second": 10.022,
"eval_steps_per_second": 0.313,
"eval_wer": 23.137037110498444,
"step": 69000
},
{
"epoch": 31.9,
"grad_norm": 0.2124684602022171,
"learning_rate": 6.827285318559558e-06,
"loss": 0.0012,
"step": 69250
},
{
"epoch": 32.01,
"grad_norm": 0.1560162901878357,
"learning_rate": 6.815743305632503e-06,
"loss": 0.001,
"step": 69500
},
{
"epoch": 32.13,
"grad_norm": 0.1794072687625885,
"learning_rate": 6.804201292705448e-06,
"loss": 0.0006,
"step": 69750
},
{
"epoch": 32.24,
"grad_norm": 0.2194598764181137,
"learning_rate": 6.792659279778394e-06,
"loss": 0.0005,
"step": 70000
},
{
"epoch": 32.24,
"eval_loss": 0.27563953399658203,
"eval_runtime": 687.9789,
"eval_samples_per_second": 9.814,
"eval_steps_per_second": 0.307,
"eval_wer": 22.787948509431345,
"step": 70000
},
{
"epoch": 32.36,
"grad_norm": 0.11972519010305405,
"learning_rate": 6.781117266851339e-06,
"loss": 0.0007,
"step": 70250
},
{
"epoch": 32.47,
"grad_norm": 1.2118364572525024,
"learning_rate": 6.769575253924285e-06,
"loss": 0.0008,
"step": 70500
},
{
"epoch": 32.59,
"grad_norm": 0.159651979804039,
"learning_rate": 6.75803324099723e-06,
"loss": 0.0009,
"step": 70750
},
{
"epoch": 32.7,
"grad_norm": 1.5151838064193726,
"learning_rate": 6.7464912280701755e-06,
"loss": 0.0009,
"step": 71000
},
{
"epoch": 32.7,
"eval_loss": 0.27915722131729126,
"eval_runtime": 689.7748,
"eval_samples_per_second": 9.789,
"eval_steps_per_second": 0.306,
"eval_wer": 22.797865799234383,
"step": 71000
},
{
"epoch": 32.82,
"grad_norm": 0.10878114402294159,
"learning_rate": 6.73499538319483e-06,
"loss": 0.0008,
"step": 71250
},
{
"epoch": 32.93,
"grad_norm": 0.13962584733963013,
"learning_rate": 6.723453370267775e-06,
"loss": 0.001,
"step": 71500
},
{
"epoch": 33.05,
"grad_norm": 2.416551113128662,
"learning_rate": 6.711911357340721e-06,
"loss": 0.0009,
"step": 71750
},
{
"epoch": 33.16,
"grad_norm": 0.14477728307247162,
"learning_rate": 6.700369344413666e-06,
"loss": 0.0007,
"step": 72000
},
{
"epoch": 33.16,
"eval_loss": 0.2731185853481293,
"eval_runtime": 673.8757,
"eval_samples_per_second": 10.02,
"eval_steps_per_second": 0.313,
"eval_wer": 23.30364757918956,
"step": 72000
},
{
"epoch": 33.28,
"grad_norm": 0.0625206008553505,
"learning_rate": 6.6888273314866115e-06,
"loss": 0.0007,
"step": 72250
},
{
"epoch": 33.39,
"grad_norm": 0.1424214392900467,
"learning_rate": 6.6772853185595575e-06,
"loss": 0.0008,
"step": 72500
},
{
"epoch": 33.51,
"grad_norm": 0.3345101773738861,
"learning_rate": 6.665743305632503e-06,
"loss": 0.0008,
"step": 72750
},
{
"epoch": 33.63,
"grad_norm": 1.2112958431243896,
"learning_rate": 6.654201292705449e-06,
"loss": 0.0009,
"step": 73000
},
{
"epoch": 33.63,
"eval_loss": 0.2806909680366516,
"eval_runtime": 699.4048,
"eval_samples_per_second": 9.654,
"eval_steps_per_second": 0.302,
"eval_wer": 22.60348691909475,
"step": 73000
},
{
"epoch": 33.74,
"grad_norm": 0.6432926058769226,
"learning_rate": 6.642659279778394e-06,
"loss": 0.001,
"step": 73250
},
{
"epoch": 33.86,
"grad_norm": 0.5472640991210938,
"learning_rate": 6.6311634349030475e-06,
"loss": 0.0008,
"step": 73500
},
{
"epoch": 33.97,
"grad_norm": 0.05136106163263321,
"learning_rate": 6.6196214219759935e-06,
"loss": 0.0008,
"step": 73750
},
{
"epoch": 34.09,
"grad_norm": 0.16334278881549835,
"learning_rate": 6.608079409048939e-06,
"loss": 0.0008,
"step": 74000
},
{
"epoch": 34.09,
"eval_loss": 0.2772423326969147,
"eval_runtime": 671.9826,
"eval_samples_per_second": 10.048,
"eval_steps_per_second": 0.314,
"eval_wer": 22.41307495487633,
"step": 74000
},
{
"epoch": 34.2,
"grad_norm": 0.17405687272548676,
"learning_rate": 6.596537396121884e-06,
"loss": 0.0008,
"step": 74250
},
{
"epoch": 34.32,
"grad_norm": 1.0651663541793823,
"learning_rate": 6.58499538319483e-06,
"loss": 0.0008,
"step": 74500
},
{
"epoch": 34.43,
"grad_norm": 0.22232329845428467,
"learning_rate": 6.573453370267775e-06,
"loss": 0.0007,
"step": 74750
},
{
"epoch": 34.55,
"grad_norm": 0.10098864883184433,
"learning_rate": 6.561911357340721e-06,
"loss": 0.0007,
"step": 75000
},
{
"epoch": 34.55,
"eval_loss": 0.2794438600540161,
"eval_runtime": 681.119,
"eval_samples_per_second": 9.913,
"eval_steps_per_second": 0.31,
"eval_wer": 22.53604934843406,
"step": 75000
},
{
"epoch": 34.66,
"grad_norm": 0.09176724404096603,
"learning_rate": 6.550369344413666e-06,
"loss": 0.0008,
"step": 75250
},
{
"epoch": 34.78,
"grad_norm": 0.3179700970649719,
"learning_rate": 6.538827331486611e-06,
"loss": 0.0008,
"step": 75500
},
{
"epoch": 34.89,
"grad_norm": 0.38459789752960205,
"learning_rate": 6.527285318559557e-06,
"loss": 0.0008,
"step": 75750
},
{
"epoch": 35.01,
"grad_norm": 0.2603273391723633,
"learning_rate": 6.515743305632502e-06,
"loss": 0.0008,
"step": 76000
},
{
"epoch": 35.01,
"eval_loss": 0.2777673900127411,
"eval_runtime": 692.7589,
"eval_samples_per_second": 9.747,
"eval_steps_per_second": 0.305,
"eval_wer": 22.811750004958643,
"step": 76000
},
{
"epoch": 35.12,
"grad_norm": 0.051916543394327164,
"learning_rate": 6.504247460757157e-06,
"loss": 0.0007,
"step": 76250
},
{
"epoch": 35.24,
"grad_norm": 0.30884623527526855,
"learning_rate": 6.492705447830102e-06,
"loss": 0.0006,
"step": 76500
},
{
"epoch": 35.35,
"grad_norm": 0.1257990300655365,
"learning_rate": 6.481163434903047e-06,
"loss": 0.0007,
"step": 76750
},
{
"epoch": 35.47,
"grad_norm": 0.08370446413755417,
"learning_rate": 6.469621421975993e-06,
"loss": 0.0008,
"step": 77000
},
{
"epoch": 35.47,
"eval_loss": 0.2764694094657898,
"eval_runtime": 692.6631,
"eval_samples_per_second": 9.748,
"eval_steps_per_second": 0.305,
"eval_wer": 22.82960112660412,
"step": 77000
},
{
"epoch": 35.58,
"grad_norm": 0.16313733160495758,
"learning_rate": 6.458079409048938e-06,
"loss": 0.0007,
"step": 77250
},
{
"epoch": 35.7,
"grad_norm": 1.0557291507720947,
"learning_rate": 6.446537396121884e-06,
"loss": 0.0007,
"step": 77500
},
{
"epoch": 35.81,
"grad_norm": 0.2264009267091751,
"learning_rate": 6.4349953831948295e-06,
"loss": 0.0008,
"step": 77750
},
{
"epoch": 35.93,
"grad_norm": 0.2705702781677246,
"learning_rate": 6.423453370267775e-06,
"loss": 0.0009,
"step": 78000
},
{
"epoch": 35.93,
"eval_loss": 0.27600711584091187,
"eval_runtime": 689.6167,
"eval_samples_per_second": 9.791,
"eval_steps_per_second": 0.306,
"eval_wer": 22.551917012118928,
"step": 78000
},
{
"epoch": 36.04,
"grad_norm": 0.2169518917798996,
"learning_rate": 6.411911357340721e-06,
"loss": 0.0006,
"step": 78250
},
{
"epoch": 36.16,
"grad_norm": 0.19748559594154358,
"learning_rate": 6.400415512465374e-06,
"loss": 0.0006,
"step": 78500
},
{
"epoch": 36.27,
"grad_norm": 1.7767668962478638,
"learning_rate": 6.3888734995383196e-06,
"loss": 0.0006,
"step": 78750
},
{
"epoch": 36.39,
"grad_norm": 0.2516990303993225,
"learning_rate": 6.3773314866112655e-06,
"loss": 0.0005,
"step": 79000
},
{
"epoch": 36.39,
"eval_loss": 0.2752860188484192,
"eval_runtime": 664.3231,
"eval_samples_per_second": 10.164,
"eval_steps_per_second": 0.318,
"eval_wer": 22.64315607830692,
"step": 79000
},
{
"epoch": 36.5,
"grad_norm": 0.05742982402443886,
"learning_rate": 6.365789473684211e-06,
"loss": 0.0005,
"step": 79250
},
{
"epoch": 36.62,
"grad_norm": 5.542628765106201,
"learning_rate": 6.354247460757157e-06,
"loss": 0.0009,
"step": 79500
},
{
"epoch": 36.73,
"grad_norm": 0.12612101435661316,
"learning_rate": 6.342705447830102e-06,
"loss": 0.0009,
"step": 79750
},
{
"epoch": 36.85,
"grad_norm": 1.6482515335083008,
"learning_rate": 6.331163434903047e-06,
"loss": 0.0007,
"step": 80000
},
{
"epoch": 36.85,
"eval_loss": 0.2798755466938019,
"eval_runtime": 679.0069,
"eval_samples_per_second": 9.944,
"eval_steps_per_second": 0.311,
"eval_wer": 22.450760656127894,
"step": 80000
},
{
"epoch": 36.96,
"grad_norm": 0.1331368237733841,
"learning_rate": 6.319621421975993e-06,
"loss": 0.0007,
"step": 80250
},
{
"epoch": 37.08,
"grad_norm": 0.097502700984478,
"learning_rate": 6.308079409048938e-06,
"loss": 0.0006,
"step": 80500
},
{
"epoch": 37.19,
"grad_norm": 0.05282368138432503,
"learning_rate": 6.296537396121884e-06,
"loss": 0.0005,
"step": 80750
},
{
"epoch": 37.31,
"grad_norm": 0.7441471815109253,
"learning_rate": 6.284995383194829e-06,
"loss": 0.0006,
"step": 81000
},
{
"epoch": 37.31,
"eval_loss": 0.2776803970336914,
"eval_runtime": 685.1247,
"eval_samples_per_second": 9.855,
"eval_steps_per_second": 0.308,
"eval_wer": 22.208778784933653,
"step": 81000
},
{
"epoch": 37.43,
"grad_norm": 0.12682919204235077,
"learning_rate": 6.273499538319483e-06,
"loss": 0.0006,
"step": 81250
},
{
"epoch": 37.54,
"grad_norm": 0.14379066228866577,
"learning_rate": 6.261957525392429e-06,
"loss": 0.0006,
"step": 81500
},
{
"epoch": 37.66,
"grad_norm": 0.23371708393096924,
"learning_rate": 6.250415512465374e-06,
"loss": 0.0006,
"step": 81750
},
{
"epoch": 37.77,
"grad_norm": 0.21299830079078674,
"learning_rate": 6.238919667590029e-06,
"loss": 0.0008,
"step": 82000
},
{
"epoch": 37.77,
"eval_loss": 0.27769771218299866,
"eval_runtime": 686.8168,
"eval_samples_per_second": 9.831,
"eval_steps_per_second": 0.307,
"eval_wer": 22.746295892258566,
"step": 82000
},
{
"epoch": 37.89,
"grad_norm": 0.1677282303571701,
"learning_rate": 6.227377654662974e-06,
"loss": 0.0008,
"step": 82250
},
{
"epoch": 38.0,
"grad_norm": 1.1451334953308105,
"learning_rate": 6.215835641735919e-06,
"loss": 0.0009,
"step": 82500
},
{
"epoch": 38.12,
"grad_norm": 0.19697508215904236,
"learning_rate": 6.204293628808865e-06,
"loss": 0.0006,
"step": 82750
},
{
"epoch": 38.23,
"grad_norm": 0.07527792453765869,
"learning_rate": 6.19275161588181e-06,
"loss": 0.0007,
"step": 83000
},
{
"epoch": 38.23,
"eval_loss": 0.2804949879646301,
"eval_runtime": 675.5771,
"eval_samples_per_second": 9.994,
"eval_steps_per_second": 0.312,
"eval_wer": 22.71257710692822,
"step": 83000
},
{
"epoch": 38.35,
"grad_norm": 1.9222028255462646,
"learning_rate": 6.181209602954756e-06,
"loss": 0.0007,
"step": 83250
},
{
"epoch": 38.46,
"grad_norm": 0.09556487202644348,
"learning_rate": 6.169667590027701e-06,
"loss": 0.0007,
"step": 83500
},
{
"epoch": 38.58,
"grad_norm": 0.21437525749206543,
"learning_rate": 6.1581255771006465e-06,
"loss": 0.0006,
"step": 83750
},
{
"epoch": 38.69,
"grad_norm": 0.1807592660188675,
"learning_rate": 6.1465835641735925e-06,
"loss": 0.0007,
"step": 84000
},
{
"epoch": 38.69,
"eval_loss": 0.2847980260848999,
"eval_runtime": 672.9632,
"eval_samples_per_second": 10.033,
"eval_steps_per_second": 0.314,
"eval_wer": 22.424975702639983,
"step": 84000
},
{
"epoch": 38.81,
"grad_norm": 0.16911369562149048,
"learning_rate": 6.135041551246538e-06,
"loss": 0.0007,
"step": 84250
},
{
"epoch": 38.92,
"grad_norm": 1.3031611442565918,
"learning_rate": 6.123545706371191e-06,
"loss": 0.0008,
"step": 84500
},
{
"epoch": 39.04,
"grad_norm": 0.04561692103743553,
"learning_rate": 6.112003693444137e-06,
"loss": 0.0006,
"step": 84750
},
{
"epoch": 39.15,
"grad_norm": 0.061062462627887726,
"learning_rate": 6.1004616805170825e-06,
"loss": 0.0003,
"step": 85000
},
{
"epoch": 39.15,
"eval_loss": 0.2790899872779846,
"eval_runtime": 674.5949,
"eval_samples_per_second": 10.009,
"eval_steps_per_second": 0.313,
"eval_wer": 22.01439990479402,
"step": 85000
},
{
"epoch": 39.27,
"grad_norm": 0.07837537676095963,
"learning_rate": 6.0889196675900285e-06,
"loss": 0.0004,
"step": 85250
},
{
"epoch": 39.38,
"grad_norm": 0.05536266788840294,
"learning_rate": 6.077377654662974e-06,
"loss": 0.0006,
"step": 85500
},
{
"epoch": 39.5,
"grad_norm": 5.097941875457764,
"learning_rate": 6.065835641735919e-06,
"loss": 0.0009,
"step": 85750
},
{
"epoch": 39.61,
"grad_norm": 0.07944060117006302,
"learning_rate": 6.054293628808865e-06,
"loss": 0.0006,
"step": 86000
},
{
"epoch": 39.61,
"eval_loss": 0.2777423858642578,
"eval_runtime": 687.4077,
"eval_samples_per_second": 9.822,
"eval_steps_per_second": 0.307,
"eval_wer": 22.262332149870083,
"step": 86000
},
{
"epoch": 39.73,
"grad_norm": 1.522270917892456,
"learning_rate": 6.04275161588181e-06,
"loss": 0.0006,
"step": 86250
},
{
"epoch": 39.84,
"grad_norm": 0.05595465004444122,
"learning_rate": 6.031209602954756e-06,
"loss": 0.0006,
"step": 86500
},
{
"epoch": 39.96,
"grad_norm": 0.5327405333518982,
"learning_rate": 6.019667590027701e-06,
"loss": 0.0005,
"step": 86750
},
{
"epoch": 40.07,
"grad_norm": 0.18009261786937714,
"learning_rate": 6.008125577100646e-06,
"loss": 0.0003,
"step": 87000
},
{
"epoch": 40.07,
"eval_loss": 0.2798568308353424,
"eval_runtime": 666.6019,
"eval_samples_per_second": 10.129,
"eval_steps_per_second": 0.317,
"eval_wer": 22.0798540174941,
"step": 87000
},
{
"epoch": 40.19,
"grad_norm": 0.15836778283119202,
"learning_rate": 5.996583564173592e-06,
"loss": 0.0005,
"step": 87250
},
{
"epoch": 40.3,
"grad_norm": 0.06244779750704765,
"learning_rate": 5.985041551246537e-06,
"loss": 0.0005,
"step": 87500
},
{
"epoch": 40.42,
"grad_norm": 0.47360849380493164,
"learning_rate": 5.973499538319484e-06,
"loss": 0.0005,
"step": 87750
},
{
"epoch": 40.53,
"grad_norm": 0.12535277009010315,
"learning_rate": 5.9619575253924285e-06,
"loss": 0.0005,
"step": 88000
},
{
"epoch": 40.53,
"eval_loss": 0.2800135612487793,
"eval_runtime": 683.9129,
"eval_samples_per_second": 9.873,
"eval_steps_per_second": 0.309,
"eval_wer": 22.23059682250035,
"step": 88000
},
{
"epoch": 40.65,
"grad_norm": 0.06063379347324371,
"learning_rate": 5.950415512465374e-06,
"loss": 0.0007,
"step": 88250
},
{
"epoch": 40.76,
"grad_norm": 0.14233584702014923,
"learning_rate": 5.9388734995383205e-06,
"loss": 0.0005,
"step": 88500
},
{
"epoch": 40.88,
"grad_norm": 2.8523402214050293,
"learning_rate": 5.927331486611266e-06,
"loss": 0.0006,
"step": 88750
},
{
"epoch": 40.99,
"grad_norm": 0.20620940625667572,
"learning_rate": 5.915789473684212e-06,
"loss": 0.0007,
"step": 89000
},
{
"epoch": 40.99,
"eval_loss": 0.2811349332332611,
"eval_runtime": 676.2894,
"eval_samples_per_second": 9.984,
"eval_steps_per_second": 0.312,
"eval_wer": 22.298034393161036,
"step": 89000
},
{
"epoch": 41.11,
"grad_norm": 0.5996530652046204,
"learning_rate": 5.904247460757157e-06,
"loss": 0.0004,
"step": 89250
},
{
"epoch": 41.23,
"grad_norm": 0.19429056346416473,
"learning_rate": 5.892705447830102e-06,
"loss": 0.0004,
"step": 89500
},
{
"epoch": 41.34,
"grad_norm": 0.07230094075202942,
"learning_rate": 5.881163434903048e-06,
"loss": 0.0005,
"step": 89750
},
{
"epoch": 41.46,
"grad_norm": 1.4591439962387085,
"learning_rate": 5.869621421975993e-06,
"loss": 0.0004,
"step": 90000
},
{
"epoch": 41.46,
"eval_loss": 0.2819642424583435,
"eval_runtime": 679.526,
"eval_samples_per_second": 9.936,
"eval_steps_per_second": 0.311,
"eval_wer": 22.71257710692822,
"step": 90000
},
{
"epoch": 41.57,
"grad_norm": 0.07382282614707947,
"learning_rate": 5.858079409048939e-06,
"loss": 0.0005,
"step": 90250
},
{
"epoch": 41.69,
"grad_norm": 0.9907983541488647,
"learning_rate": 5.846583564173593e-06,
"loss": 0.0006,
"step": 90500
},
{
"epoch": 41.8,
"grad_norm": 0.11259205639362335,
"learning_rate": 5.835041551246537e-06,
"loss": 0.0006,
"step": 90750
},
{
"epoch": 41.92,
"grad_norm": 0.06520923972129822,
"learning_rate": 5.823499538319484e-06,
"loss": 0.0006,
"step": 91000
},
{
"epoch": 41.92,
"eval_loss": 0.28300294280052185,
"eval_runtime": 684.3308,
"eval_samples_per_second": 9.867,
"eval_steps_per_second": 0.308,
"eval_wer": 21.95886308189698,
"step": 91000
}
],
"logging_steps": 250,
"max_steps": 217100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 1000,
"total_flos": 1.4334909633589248e+20,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}