{ "best_metric": 21.95886308189698, "best_model_checkpoint": "./whisper-tiny-lv/checkpoint-91000", "epoch": 41.91616766467066, "eval_steps": 1000, "global_step": 91000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "grad_norm": 6.921171188354492, "learning_rate": 4.9000000000000005e-06, "loss": 1.8377, "step": 250 }, { "epoch": 0.23, "grad_norm": 6.424713611602783, "learning_rate": 9.9e-06, "loss": 0.7472, "step": 500 }, { "epoch": 0.35, "grad_norm": 6.457923889160156, "learning_rate": 9.988688827331488e-06, "loss": 0.5694, "step": 750 }, { "epoch": 0.46, "grad_norm": 5.4181013107299805, "learning_rate": 9.977146814404432e-06, "loss": 0.481, "step": 1000 }, { "epoch": 0.46, "eval_loss": 0.511210560798645, "eval_runtime": 658.2418, "eval_samples_per_second": 10.258, "eval_steps_per_second": 0.321, "eval_wer": 51.379495011603225, "step": 1000 }, { "epoch": 0.58, "grad_norm": 5.981955528259277, "learning_rate": 9.965604801477378e-06, "loss": 0.4294, "step": 1250 }, { "epoch": 0.69, "grad_norm": 6.043950080871582, "learning_rate": 9.954062788550324e-06, "loss": 0.3919, "step": 1500 }, { "epoch": 0.81, "grad_norm": 5.050214767456055, "learning_rate": 9.94252077562327e-06, "loss": 0.3599, "step": 1750 }, { "epoch": 0.92, "grad_norm": 5.506693363189697, "learning_rate": 9.930978762696215e-06, "loss": 0.3399, "step": 2000 }, { "epoch": 0.92, "eval_loss": 0.39190948009490967, "eval_runtime": 666.4927, "eval_samples_per_second": 10.131, "eval_steps_per_second": 0.317, "eval_wer": 42.10881250371899, "step": 2000 }, { "epoch": 1.04, "grad_norm": 4.966064929962158, "learning_rate": 9.91943674976916e-06, "loss": 0.3048, "step": 2250 }, { "epoch": 1.15, "grad_norm": 4.654673099517822, "learning_rate": 9.907894736842107e-06, "loss": 0.2753, "step": 2500 }, { "epoch": 1.27, "grad_norm": 4.946408271789551, "learning_rate": 9.896352723915051e-06, "loss": 0.2609, "step": 2750 }, { "epoch": 1.38, "grad_norm": 4.584148406982422, "learning_rate": 9.884810710987997e-06, "loss": 0.2539, "step": 3000 }, { "epoch": 1.38, "eval_loss": 0.33731845021247864, "eval_runtime": 657.5879, "eval_samples_per_second": 10.268, "eval_steps_per_second": 0.321, "eval_wer": 38.419580696987126, "step": 3000 }, { "epoch": 1.5, "grad_norm": 4.824137210845947, "learning_rate": 9.873268698060943e-06, "loss": 0.2474, "step": 3250 }, { "epoch": 1.61, "grad_norm": 4.751330852508545, "learning_rate": 9.861726685133887e-06, "loss": 0.2374, "step": 3500 }, { "epoch": 1.73, "grad_norm": 5.100882530212402, "learning_rate": 9.850184672206833e-06, "loss": 0.2293, "step": 3750 }, { "epoch": 1.84, "grad_norm": 4.561028480529785, "learning_rate": 9.83864265927978e-06, "loss": 0.2252, "step": 4000 }, { "epoch": 1.84, "eval_loss": 0.30130764842033386, "eval_runtime": 668.7558, "eval_samples_per_second": 10.096, "eval_steps_per_second": 0.316, "eval_wer": 35.210345716722536, "step": 4000 }, { "epoch": 1.96, "grad_norm": 4.661614418029785, "learning_rate": 9.827100646352725e-06, "loss": 0.2127, "step": 4250 }, { "epoch": 2.07, "grad_norm": 4.2613959312438965, "learning_rate": 9.81555863342567e-06, "loss": 0.1886, "step": 4500 }, { "epoch": 2.19, "grad_norm": 4.164435386657715, "learning_rate": 9.804016620498615e-06, "loss": 0.1725, "step": 4750 }, { "epoch": 2.3, "grad_norm": 4.097248077392578, "learning_rate": 9.792474607571561e-06, "loss": 0.1715, "step": 5000 }, { "epoch": 2.3, "eval_loss": 0.283372163772583, "eval_runtime": 661.6835, "eval_samples_per_second": 10.204, "eval_steps_per_second": 0.319, "eval_wer": 33.31415990638079, "step": 5000 }, { "epoch": 2.42, "grad_norm": 4.298040866851807, "learning_rate": 9.780932594644506e-06, "loss": 0.1657, "step": 5250 }, { "epoch": 2.53, "grad_norm": 4.593989849090576, "learning_rate": 9.769390581717453e-06, "loss": 0.1637, "step": 5500 }, { "epoch": 2.65, "grad_norm": 4.4265055656433105, "learning_rate": 9.757848568790398e-06, "loss": 0.1619, "step": 5750 }, { "epoch": 2.76, "grad_norm": 3.670001983642578, "learning_rate": 9.746306555863344e-06, "loss": 0.1562, "step": 6000 }, { "epoch": 2.76, "eval_loss": 0.2656751573085785, "eval_runtime": 654.0062, "eval_samples_per_second": 10.324, "eval_steps_per_second": 0.323, "eval_wer": 31.9693754090882, "step": 6000 }, { "epoch": 2.88, "grad_norm": 4.299009799957275, "learning_rate": 9.73476454293629e-06, "loss": 0.1547, "step": 6250 }, { "epoch": 2.99, "grad_norm": 4.178277492523193, "learning_rate": 9.723222530009234e-06, "loss": 0.1523, "step": 6500 }, { "epoch": 3.11, "grad_norm": 3.7556698322296143, "learning_rate": 9.71168051708218e-06, "loss": 0.1205, "step": 6750 }, { "epoch": 3.22, "grad_norm": 4.128946304321289, "learning_rate": 9.700138504155126e-06, "loss": 0.1177, "step": 7000 }, { "epoch": 3.22, "eval_loss": 0.25489723682403564, "eval_runtime": 667.3883, "eval_samples_per_second": 10.117, "eval_steps_per_second": 0.316, "eval_wer": 30.705912688180575, "step": 7000 }, { "epoch": 3.34, "grad_norm": 3.462374448776245, "learning_rate": 9.68859649122807e-06, "loss": 0.1173, "step": 7250 }, { "epoch": 3.45, "grad_norm": 4.692279815673828, "learning_rate": 9.677054478301016e-06, "loss": 0.117, "step": 7500 }, { "epoch": 3.57, "grad_norm": 3.5876481533050537, "learning_rate": 9.665512465373962e-06, "loss": 0.1147, "step": 7750 }, { "epoch": 3.68, "grad_norm": 4.460909843444824, "learning_rate": 9.653970452446908e-06, "loss": 0.1149, "step": 8000 }, { "epoch": 3.68, "eval_loss": 0.24221286177635193, "eval_runtime": 671.3545, "eval_samples_per_second": 10.057, "eval_steps_per_second": 0.314, "eval_wer": 29.97004978479481, "step": 8000 }, { "epoch": 3.8, "grad_norm": 3.741210460662842, "learning_rate": 9.642428439519853e-06, "loss": 0.1124, "step": 8250 }, { "epoch": 3.92, "grad_norm": 4.417487144470215, "learning_rate": 9.630886426592799e-06, "loss": 0.1124, "step": 8500 }, { "epoch": 4.03, "grad_norm": 3.8332269191741943, "learning_rate": 9.619344413665745e-06, "loss": 0.1028, "step": 8750 }, { "epoch": 4.15, "grad_norm": 3.3890438079833984, "learning_rate": 9.607802400738689e-06, "loss": 0.0834, "step": 9000 }, { "epoch": 4.15, "eval_loss": 0.23551978170871735, "eval_runtime": 665.3587, "eval_samples_per_second": 10.148, "eval_steps_per_second": 0.317, "eval_wer": 29.347243985163736, "step": 9000 }, { "epoch": 4.26, "grad_norm": 3.3677661418914795, "learning_rate": 9.596260387811635e-06, "loss": 0.0835, "step": 9250 }, { "epoch": 4.38, "grad_norm": 3.392284631729126, "learning_rate": 9.584718374884581e-06, "loss": 0.0835, "step": 9500 }, { "epoch": 4.49, "grad_norm": 3.0857667922973633, "learning_rate": 9.573176361957525e-06, "loss": 0.0813, "step": 9750 }, { "epoch": 4.61, "grad_norm": 3.726276159286499, "learning_rate": 9.561634349030471e-06, "loss": 0.0825, "step": 10000 }, { "epoch": 4.61, "eval_loss": 0.229040265083313, "eval_runtime": 671.1646, "eval_samples_per_second": 10.06, "eval_steps_per_second": 0.314, "eval_wer": 28.81567725172065, "step": 10000 }, { "epoch": 4.72, "grad_norm": 3.28595232963562, "learning_rate": 9.550092336103417e-06, "loss": 0.0816, "step": 10250 }, { "epoch": 4.84, "grad_norm": 3.427420139312744, "learning_rate": 9.538550323176363e-06, "loss": 0.0814, "step": 10500 }, { "epoch": 4.95, "grad_norm": 3.8041698932647705, "learning_rate": 9.527008310249308e-06, "loss": 0.0802, "step": 10750 }, { "epoch": 5.07, "grad_norm": 2.9603447914123535, "learning_rate": 9.515466297322253e-06, "loss": 0.0669, "step": 11000 }, { "epoch": 5.07, "eval_loss": 0.22645771503448486, "eval_runtime": 667.9924, "eval_samples_per_second": 10.108, "eval_steps_per_second": 0.316, "eval_wer": 28.53402622131424, "step": 11000 }, { "epoch": 5.18, "grad_norm": 3.0245766639709473, "learning_rate": 9.5039242843952e-06, "loss": 0.057, "step": 11250 }, { "epoch": 5.3, "grad_norm": 2.435096502304077, "learning_rate": 9.492382271468144e-06, "loss": 0.0588, "step": 11500 }, { "epoch": 5.41, "grad_norm": 2.9605906009674072, "learning_rate": 9.480840258541091e-06, "loss": 0.0583, "step": 11750 }, { "epoch": 5.53, "grad_norm": 2.877732515335083, "learning_rate": 9.469298245614036e-06, "loss": 0.0567, "step": 12000 }, { "epoch": 5.53, "eval_loss": 0.2239210605621338, "eval_runtime": 671.9438, "eval_samples_per_second": 10.048, "eval_steps_per_second": 0.314, "eval_wer": 27.875518178392213, "step": 12000 }, { "epoch": 5.64, "grad_norm": 3.2738921642303467, "learning_rate": 9.457848568790397e-06, "loss": 0.0589, "step": 12250 }, { "epoch": 5.76, "grad_norm": 2.727008104324341, "learning_rate": 9.446306555863343e-06, "loss": 0.0586, "step": 12500 }, { "epoch": 5.87, "grad_norm": 3.6808159351348877, "learning_rate": 9.43476454293629e-06, "loss": 0.0602, "step": 12750 }, { "epoch": 5.99, "grad_norm": 3.2749545574188232, "learning_rate": 9.423222530009234e-06, "loss": 0.0589, "step": 13000 }, { "epoch": 5.99, "eval_loss": 0.21996423602104187, "eval_runtime": 674.0776, "eval_samples_per_second": 10.017, "eval_steps_per_second": 0.313, "eval_wer": 27.879485094313427, "step": 13000 }, { "epoch": 6.1, "grad_norm": 2.8687331676483154, "learning_rate": 9.41168051708218e-06, "loss": 0.0406, "step": 13250 }, { "epoch": 6.22, "grad_norm": 2.4594838619232178, "learning_rate": 9.400138504155126e-06, "loss": 0.0405, "step": 13500 }, { "epoch": 6.33, "grad_norm": 2.6956489086151123, "learning_rate": 9.388596491228072e-06, "loss": 0.0398, "step": 13750 }, { "epoch": 6.45, "grad_norm": 2.6623504161834717, "learning_rate": 9.377054478301016e-06, "loss": 0.0415, "step": 14000 }, { "epoch": 6.45, "eval_loss": 0.22312334179878235, "eval_runtime": 673.796, "eval_samples_per_second": 10.021, "eval_steps_per_second": 0.313, "eval_wer": 26.97701172223655, "step": 14000 }, { "epoch": 6.56, "grad_norm": 3.0151000022888184, "learning_rate": 9.365512465373962e-06, "loss": 0.0422, "step": 14250 }, { "epoch": 6.68, "grad_norm": 3.232048749923706, "learning_rate": 9.353970452446908e-06, "loss": 0.0412, "step": 14500 }, { "epoch": 6.79, "grad_norm": 2.809514284133911, "learning_rate": 9.342428439519852e-06, "loss": 0.0411, "step": 14750 }, { "epoch": 6.91, "grad_norm": 2.0289547443389893, "learning_rate": 9.3308864265928e-06, "loss": 0.0423, "step": 15000 }, { "epoch": 6.91, "eval_loss": 0.2184455841779709, "eval_runtime": 688.9862, "eval_samples_per_second": 9.8, "eval_steps_per_second": 0.306, "eval_wer": 27.084118452109408, "step": 15000 }, { "epoch": 7.02, "grad_norm": 3.098123788833618, "learning_rate": 9.319344413665744e-06, "loss": 0.0375, "step": 15250 }, { "epoch": 7.14, "grad_norm": 2.8762073516845703, "learning_rate": 9.307802400738688e-06, "loss": 0.0266, "step": 15500 }, { "epoch": 7.25, "grad_norm": 4.211934566497803, "learning_rate": 9.296260387811636e-06, "loss": 0.0278, "step": 15750 }, { "epoch": 7.37, "grad_norm": 2.860619306564331, "learning_rate": 9.28471837488458e-06, "loss": 0.0281, "step": 16000 }, { "epoch": 7.37, "eval_loss": 0.22083307802677155, "eval_runtime": 665.4984, "eval_samples_per_second": 10.146, "eval_steps_per_second": 0.317, "eval_wer": 27.224943967312615, "step": 16000 }, { "epoch": 7.49, "grad_norm": 2.3288867473602295, "learning_rate": 9.273176361957526e-06, "loss": 0.0278, "step": 16250 }, { "epoch": 7.6, "grad_norm": 2.9130966663360596, "learning_rate": 9.261634349030472e-06, "loss": 0.0281, "step": 16500 }, { "epoch": 7.72, "grad_norm": 1.9348437786102295, "learning_rate": 9.250092336103417e-06, "loss": 0.0294, "step": 16750 }, { "epoch": 7.83, "grad_norm": 3.404127836227417, "learning_rate": 9.238550323176363e-06, "loss": 0.0296, "step": 17000 }, { "epoch": 7.83, "eval_loss": 0.2222394496202469, "eval_runtime": 668.3229, "eval_samples_per_second": 10.103, "eval_steps_per_second": 0.316, "eval_wer": 26.802467421702996, "step": 17000 }, { "epoch": 7.95, "grad_norm": 2.1417627334594727, "learning_rate": 9.227008310249309e-06, "loss": 0.0287, "step": 17250 }, { "epoch": 8.06, "grad_norm": 1.783292531967163, "learning_rate": 9.215512465373963e-06, "loss": 0.023, "step": 17500 }, { "epoch": 8.18, "grad_norm": 1.875301480293274, "learning_rate": 9.203970452446908e-06, "loss": 0.0185, "step": 17750 }, { "epoch": 8.29, "grad_norm": 1.9140523672103882, "learning_rate": 9.192428439519852e-06, "loss": 0.0186, "step": 18000 }, { "epoch": 8.29, "eval_loss": 0.2229994833469391, "eval_runtime": 670.5805, "eval_samples_per_second": 10.069, "eval_steps_per_second": 0.315, "eval_wer": 26.498998353729892, "step": 18000 }, { "epoch": 8.41, "grad_norm": 2.4175968170166016, "learning_rate": 9.1808864265928e-06, "loss": 0.0192, "step": 18250 }, { "epoch": 8.52, "grad_norm": 2.8320376873016357, "learning_rate": 9.169344413665744e-06, "loss": 0.0198, "step": 18500 }, { "epoch": 8.64, "grad_norm": 2.431974172592163, "learning_rate": 9.15780240073869e-06, "loss": 0.0196, "step": 18750 }, { "epoch": 8.75, "grad_norm": 2.0679523944854736, "learning_rate": 9.146260387811636e-06, "loss": 0.0201, "step": 19000 }, { "epoch": 8.75, "eval_loss": 0.22365085780620575, "eval_runtime": 657.293, "eval_samples_per_second": 10.272, "eval_steps_per_second": 0.321, "eval_wer": 26.042803022789933, "step": 19000 }, { "epoch": 8.87, "grad_norm": 2.1873040199279785, "learning_rate": 9.13471837488458e-06, "loss": 0.0202, "step": 19250 }, { "epoch": 8.98, "grad_norm": 2.5239417552948, "learning_rate": 9.123222530009235e-06, "loss": 0.0201, "step": 19500 }, { "epoch": 9.1, "grad_norm": 2.638354778289795, "learning_rate": 9.111680517082179e-06, "loss": 0.0134, "step": 19750 }, { "epoch": 9.21, "grad_norm": 1.6381027698516846, "learning_rate": 9.100138504155125e-06, "loss": 0.0127, "step": 20000 }, { "epoch": 9.21, "eval_loss": 0.22688329219818115, "eval_runtime": 665.6628, "eval_samples_per_second": 10.143, "eval_steps_per_second": 0.317, "eval_wer": 26.134042088977928, "step": 20000 }, { "epoch": 9.33, "grad_norm": 1.1191093921661377, "learning_rate": 9.088596491228071e-06, "loss": 0.0133, "step": 20250 }, { "epoch": 9.44, "grad_norm": 2.025820016860962, "learning_rate": 9.077054478301015e-06, "loss": 0.0133, "step": 20500 }, { "epoch": 9.56, "grad_norm": 1.148484468460083, "learning_rate": 9.065512465373963e-06, "loss": 0.0136, "step": 20750 }, { "epoch": 9.67, "grad_norm": 2.849606513977051, "learning_rate": 9.053970452446907e-06, "loss": 0.0135, "step": 21000 }, { "epoch": 9.67, "eval_loss": 0.22943329811096191, "eval_runtime": 665.6291, "eval_samples_per_second": 10.144, "eval_steps_per_second": 0.317, "eval_wer": 26.330404427078168, "step": 21000 }, { "epoch": 9.79, "grad_norm": 2.421963691711426, "learning_rate": 9.042428439519853e-06, "loss": 0.014, "step": 21250 }, { "epoch": 9.9, "grad_norm": 1.5668810606002808, "learning_rate": 9.0308864265928e-06, "loss": 0.0148, "step": 21500 }, { "epoch": 10.02, "grad_norm": 0.8170527815818787, "learning_rate": 9.019390581717452e-06, "loss": 0.0135, "step": 21750 }, { "epoch": 10.13, "grad_norm": 1.8164241313934326, "learning_rate": 9.007848568790398e-06, "loss": 0.0086, "step": 22000 }, { "epoch": 10.13, "eval_loss": 0.23072278499603271, "eval_runtime": 665.1317, "eval_samples_per_second": 10.151, "eval_steps_per_second": 0.317, "eval_wer": 26.07057143423845, "step": 22000 }, { "epoch": 10.25, "grad_norm": 1.6295300722122192, "learning_rate": 8.996306555863344e-06, "loss": 0.0091, "step": 22250 }, { "epoch": 10.36, "grad_norm": 1.5732313394546509, "learning_rate": 8.984764542936288e-06, "loss": 0.0093, "step": 22500 }, { "epoch": 10.48, "grad_norm": 1.6755157709121704, "learning_rate": 8.973222530009234e-06, "loss": 0.0095, "step": 22750 }, { "epoch": 10.59, "grad_norm": 1.9076685905456543, "learning_rate": 8.96168051708218e-06, "loss": 0.0097, "step": 23000 }, { "epoch": 10.59, "eval_loss": 0.2347114235162735, "eval_runtime": 674.6572, "eval_samples_per_second": 10.008, "eval_steps_per_second": 0.313, "eval_wer": 25.425947597040683, "step": 23000 }, { "epoch": 10.71, "grad_norm": 1.4558827877044678, "learning_rate": 8.950138504155126e-06, "loss": 0.0097, "step": 23250 }, { "epoch": 10.82, "grad_norm": 2.6334474086761475, "learning_rate": 8.93859649122807e-06, "loss": 0.0103, "step": 23500 }, { "epoch": 10.94, "grad_norm": 1.4682759046554565, "learning_rate": 8.927100646352724e-06, "loss": 0.0104, "step": 23750 }, { "epoch": 11.05, "grad_norm": 1.105055332183838, "learning_rate": 8.915558633425671e-06, "loss": 0.0082, "step": 24000 }, { "epoch": 11.05, "eval_loss": 0.23529361188411713, "eval_runtime": 677.9021, "eval_samples_per_second": 9.96, "eval_steps_per_second": 0.311, "eval_wer": 25.18594918380705, "step": 24000 }, { "epoch": 11.17, "grad_norm": 0.9997087717056274, "learning_rate": 8.904016620498616e-06, "loss": 0.0062, "step": 24250 }, { "epoch": 11.29, "grad_norm": 1.274902582168579, "learning_rate": 8.89247460757156e-06, "loss": 0.0067, "step": 24500 }, { "epoch": 11.4, "grad_norm": 1.1603277921676636, "learning_rate": 8.880932594644508e-06, "loss": 0.0068, "step": 24750 }, { "epoch": 11.52, "grad_norm": 1.450378179550171, "learning_rate": 8.869390581717452e-06, "loss": 0.0069, "step": 25000 }, { "epoch": 11.52, "eval_loss": 0.23996803164482117, "eval_runtime": 672.8137, "eval_samples_per_second": 10.035, "eval_steps_per_second": 0.314, "eval_wer": 26.251066108653827, "step": 25000 }, { "epoch": 11.63, "grad_norm": 1.0134578943252563, "learning_rate": 8.857848568790398e-06, "loss": 0.0072, "step": 25250 }, { "epoch": 11.75, "grad_norm": 1.8789595365524292, "learning_rate": 8.846306555863344e-06, "loss": 0.0072, "step": 25500 }, { "epoch": 11.86, "grad_norm": 0.972827136516571, "learning_rate": 8.834764542936288e-06, "loss": 0.0076, "step": 25750 }, { "epoch": 11.98, "grad_norm": 2.2902746200561523, "learning_rate": 8.823268698060943e-06, "loss": 0.0075, "step": 26000 }, { "epoch": 11.98, "eval_loss": 0.23599743843078613, "eval_runtime": 667.2943, "eval_samples_per_second": 10.118, "eval_steps_per_second": 0.316, "eval_wer": 25.400162643552772, "step": 26000 }, { "epoch": 12.09, "grad_norm": 1.1216572523117065, "learning_rate": 8.811726685133887e-06, "loss": 0.0054, "step": 26250 }, { "epoch": 12.21, "grad_norm": 1.168253779411316, "learning_rate": 8.800184672206835e-06, "loss": 0.0048, "step": 26500 }, { "epoch": 12.32, "grad_norm": 0.6528610587120056, "learning_rate": 8.788642659279779e-06, "loss": 0.0049, "step": 26750 }, { "epoch": 12.44, "grad_norm": 1.3029311895370483, "learning_rate": 8.777100646352723e-06, "loss": 0.0052, "step": 27000 }, { "epoch": 12.44, "eval_loss": 0.24270391464233398, "eval_runtime": 668.33, "eval_samples_per_second": 10.103, "eval_steps_per_second": 0.316, "eval_wer": 24.73967114267013, "step": 27000 }, { "epoch": 12.55, "grad_norm": 1.0006558895111084, "learning_rate": 8.765558633425671e-06, "loss": 0.0053, "step": 27250 }, { "epoch": 12.67, "grad_norm": 1.4560002088546753, "learning_rate": 8.754016620498615e-06, "loss": 0.0052, "step": 27500 }, { "epoch": 12.78, "grad_norm": 1.9307841062545776, "learning_rate": 8.742474607571561e-06, "loss": 0.0053, "step": 27750 }, { "epoch": 12.9, "grad_norm": 1.977569580078125, "learning_rate": 8.730932594644507e-06, "loss": 0.0056, "step": 28000 }, { "epoch": 12.9, "eval_loss": 0.24133123457431793, "eval_runtime": 682.6727, "eval_samples_per_second": 9.891, "eval_steps_per_second": 0.309, "eval_wer": 25.247436380585913, "step": 28000 }, { "epoch": 13.01, "grad_norm": 0.9914600849151611, "learning_rate": 8.719390581717452e-06, "loss": 0.0056, "step": 28250 }, { "epoch": 13.13, "grad_norm": 0.8464221358299255, "learning_rate": 8.707848568790398e-06, "loss": 0.0037, "step": 28500 }, { "epoch": 13.24, "grad_norm": 0.6425495147705078, "learning_rate": 8.69635272391505e-06, "loss": 0.0038, "step": 28750 }, { "epoch": 13.36, "grad_norm": 2.0510671138763428, "learning_rate": 8.684810710987998e-06, "loss": 0.0041, "step": 29000 }, { "epoch": 13.36, "eval_loss": 0.24362993240356445, "eval_runtime": 666.1767, "eval_samples_per_second": 10.135, "eval_steps_per_second": 0.317, "eval_wer": 25.074875538012968, "step": 29000 }, { "epoch": 13.47, "grad_norm": 1.125919222831726, "learning_rate": 8.673268698060943e-06, "loss": 0.0041, "step": 29250 }, { "epoch": 13.59, "grad_norm": 1.247779369354248, "learning_rate": 8.661726685133889e-06, "loss": 0.0044, "step": 29500 }, { "epoch": 13.7, "grad_norm": 1.009018063545227, "learning_rate": 8.650184672206835e-06, "loss": 0.0046, "step": 29750 }, { "epoch": 13.82, "grad_norm": 1.848225712776184, "learning_rate": 8.638642659279779e-06, "loss": 0.0048, "step": 30000 }, { "epoch": 13.82, "eval_loss": 0.24545399844646454, "eval_runtime": 667.6447, "eval_samples_per_second": 10.113, "eval_steps_per_second": 0.316, "eval_wer": 25.29900628756174, "step": 30000 }, { "epoch": 13.93, "grad_norm": 0.4666302502155304, "learning_rate": 8.627100646352725e-06, "loss": 0.0046, "step": 30250 }, { "epoch": 14.05, "grad_norm": 0.6961706280708313, "learning_rate": 8.61555863342567e-06, "loss": 0.004, "step": 30500 }, { "epoch": 14.16, "grad_norm": 0.9278767108917236, "learning_rate": 8.604016620498615e-06, "loss": 0.003, "step": 30750 }, { "epoch": 14.28, "grad_norm": 0.9175160527229309, "learning_rate": 8.59252077562327e-06, "loss": 0.0033, "step": 31000 }, { "epoch": 14.28, "eval_loss": 0.247360959649086, "eval_runtime": 674.4213, "eval_samples_per_second": 10.012, "eval_steps_per_second": 0.313, "eval_wer": 25.13041236091001, "step": 31000 }, { "epoch": 14.39, "grad_norm": 0.6542864441871643, "learning_rate": 8.580978762696216e-06, "loss": 0.0034, "step": 31250 }, { "epoch": 14.51, "grad_norm": 14.155986785888672, "learning_rate": 8.56943674976916e-06, "loss": 0.0037, "step": 31500 }, { "epoch": 14.62, "grad_norm": 0.7710313200950623, "learning_rate": 8.557940904893815e-06, "loss": 0.0039, "step": 31750 }, { "epoch": 14.74, "grad_norm": 1.6279186010360718, "learning_rate": 8.546398891966759e-06, "loss": 0.0038, "step": 32000 }, { "epoch": 14.74, "eval_loss": 0.25091758370399475, "eval_runtime": 686.3757, "eval_samples_per_second": 9.837, "eval_steps_per_second": 0.307, "eval_wer": 25.287105539798084, "step": 32000 }, { "epoch": 14.85, "grad_norm": 0.883815586566925, "learning_rate": 8.534856879039707e-06, "loss": 0.0038, "step": 32250 }, { "epoch": 14.97, "grad_norm": 0.6397805213928223, "learning_rate": 8.523314866112651e-06, "loss": 0.0037, "step": 32500 }, { "epoch": 15.09, "grad_norm": 0.40614956617355347, "learning_rate": 8.511772853185595e-06, "loss": 0.003, "step": 32750 }, { "epoch": 15.2, "grad_norm": 0.3507106602191925, "learning_rate": 8.500230840258543e-06, "loss": 0.0026, "step": 33000 }, { "epoch": 15.2, "eval_loss": 0.2525634467601776, "eval_runtime": 668.1892, "eval_samples_per_second": 10.105, "eval_steps_per_second": 0.316, "eval_wer": 25.26925441815261, "step": 33000 }, { "epoch": 15.32, "grad_norm": 0.236125648021698, "learning_rate": 8.488688827331487e-06, "loss": 0.0028, "step": 33250 }, { "epoch": 15.43, "grad_norm": 1.2400788068771362, "learning_rate": 8.477146814404433e-06, "loss": 0.0028, "step": 33500 }, { "epoch": 15.55, "grad_norm": 1.4743680953979492, "learning_rate": 8.465604801477379e-06, "loss": 0.0032, "step": 33750 }, { "epoch": 15.66, "grad_norm": 1.9355671405792236, "learning_rate": 8.454062788550323e-06, "loss": 0.0035, "step": 34000 }, { "epoch": 15.66, "eval_loss": 0.2545054256916046, "eval_runtime": 681.2267, "eval_samples_per_second": 9.912, "eval_steps_per_second": 0.31, "eval_wer": 25.43784834480433, "step": 34000 }, { "epoch": 15.78, "grad_norm": 0.8273574709892273, "learning_rate": 8.442566943674978e-06, "loss": 0.0033, "step": 34250 }, { "epoch": 15.89, "grad_norm": 1.760032057762146, "learning_rate": 8.431024930747922e-06, "loss": 0.0035, "step": 34500 }, { "epoch": 16.01, "grad_norm": 1.946081280708313, "learning_rate": 8.419482917820868e-06, "loss": 0.0033, "step": 34750 }, { "epoch": 16.12, "grad_norm": 0.564914345741272, "learning_rate": 8.407940904893814e-06, "loss": 0.0024, "step": 35000 }, { "epoch": 16.12, "eval_loss": 0.2501762807369232, "eval_runtime": 670.7755, "eval_samples_per_second": 10.066, "eval_steps_per_second": 0.315, "eval_wer": 24.212071325148262, "step": 35000 }, { "epoch": 16.24, "grad_norm": 1.6005988121032715, "learning_rate": 8.396398891966759e-06, "loss": 0.0024, "step": 35250 }, { "epoch": 16.35, "grad_norm": 0.8374671339988708, "learning_rate": 8.384856879039706e-06, "loss": 0.0028, "step": 35500 }, { "epoch": 16.47, "grad_norm": 1.0652960538864136, "learning_rate": 8.37331486611265e-06, "loss": 0.003, "step": 35750 }, { "epoch": 16.58, "grad_norm": 1.4033843278884888, "learning_rate": 8.361772853185595e-06, "loss": 0.0027, "step": 36000 }, { "epoch": 16.58, "eval_loss": 0.2562379240989685, "eval_runtime": 678.952, "eval_samples_per_second": 9.945, "eval_steps_per_second": 0.311, "eval_wer": 24.701985441418568, "step": 36000 }, { "epoch": 16.7, "grad_norm": 0.6179186701774597, "learning_rate": 8.350230840258543e-06, "loss": 0.0029, "step": 36250 }, { "epoch": 16.81, "grad_norm": 1.806768774986267, "learning_rate": 8.338688827331487e-06, "loss": 0.0031, "step": 36500 }, { "epoch": 16.93, "grad_norm": 0.528777003288269, "learning_rate": 8.327146814404433e-06, "loss": 0.0031, "step": 36750 }, { "epoch": 17.04, "grad_norm": 0.18216899037361145, "learning_rate": 8.315604801477379e-06, "loss": 0.0029, "step": 37000 }, { "epoch": 17.04, "eval_loss": 0.25483274459838867, "eval_runtime": 673.8208, "eval_samples_per_second": 10.02, "eval_steps_per_second": 0.313, "eval_wer": 24.735704226748915, "step": 37000 }, { "epoch": 17.16, "grad_norm": 0.3349086046218872, "learning_rate": 8.304062788550323e-06, "loss": 0.0019, "step": 37250 }, { "epoch": 17.27, "grad_norm": 1.109415888786316, "learning_rate": 8.29252077562327e-06, "loss": 0.0023, "step": 37500 }, { "epoch": 17.39, "grad_norm": 0.5039780735969543, "learning_rate": 8.280978762696215e-06, "loss": 0.0024, "step": 37750 }, { "epoch": 17.5, "grad_norm": 1.2728774547576904, "learning_rate": 8.269436749769161e-06, "loss": 0.0026, "step": 38000 }, { "epoch": 17.5, "eval_loss": 0.25514695048332214, "eval_runtime": 676.5647, "eval_samples_per_second": 9.98, "eval_steps_per_second": 0.312, "eval_wer": 24.102981137314792, "step": 38000 }, { "epoch": 17.62, "grad_norm": 0.5640347003936768, "learning_rate": 8.257894736842105e-06, "loss": 0.0027, "step": 38250 }, { "epoch": 17.73, "grad_norm": 0.6144846677780151, "learning_rate": 8.24639889196676e-06, "loss": 0.0027, "step": 38500 }, { "epoch": 17.85, "grad_norm": 0.42385855317115784, "learning_rate": 8.234856879039706e-06, "loss": 0.0027, "step": 38750 }, { "epoch": 17.96, "grad_norm": 0.5083030462265015, "learning_rate": 8.22331486611265e-06, "loss": 0.0026, "step": 39000 }, { "epoch": 17.96, "eval_loss": 0.2563398778438568, "eval_runtime": 693.9966, "eval_samples_per_second": 9.729, "eval_steps_per_second": 0.304, "eval_wer": 24.52942459884563, "step": 39000 }, { "epoch": 18.08, "grad_norm": 0.7999371290206909, "learning_rate": 8.211772853185596e-06, "loss": 0.0022, "step": 39250 }, { "epoch": 18.19, "grad_norm": 0.47253143787384033, "learning_rate": 8.200230840258542e-06, "loss": 0.0019, "step": 39500 }, { "epoch": 18.31, "grad_norm": 0.9011787176132202, "learning_rate": 8.188688827331487e-06, "loss": 0.0019, "step": 39750 }, { "epoch": 18.42, "grad_norm": 1.1234568357467651, "learning_rate": 8.177146814404433e-06, "loss": 0.002, "step": 40000 }, { "epoch": 18.42, "eval_loss": 0.2568005323410034, "eval_runtime": 670.3005, "eval_samples_per_second": 10.073, "eval_steps_per_second": 0.315, "eval_wer": 24.36479758811512, "step": 40000 }, { "epoch": 18.54, "grad_norm": 0.7889108657836914, "learning_rate": 8.165604801477379e-06, "loss": 0.002, "step": 40250 }, { "epoch": 18.65, "grad_norm": 1.1920995712280273, "learning_rate": 8.154062788550325e-06, "loss": 0.0021, "step": 40500 }, { "epoch": 18.77, "grad_norm": 0.8429755568504333, "learning_rate": 8.142520775623269e-06, "loss": 0.0022, "step": 40750 }, { "epoch": 18.89, "grad_norm": 3.1110446453094482, "learning_rate": 8.131024930747923e-06, "loss": 0.0028, "step": 41000 }, { "epoch": 18.89, "eval_loss": 0.2559947073459625, "eval_runtime": 691.8949, "eval_samples_per_second": 9.759, "eval_steps_per_second": 0.305, "eval_wer": 24.575044131939624, "step": 41000 }, { "epoch": 19.0, "grad_norm": 0.3139660656452179, "learning_rate": 8.11948291782087e-06, "loss": 0.0026, "step": 41250 }, { "epoch": 19.12, "grad_norm": 0.21102827787399292, "learning_rate": 8.107940904893814e-06, "loss": 0.0017, "step": 41500 }, { "epoch": 19.23, "grad_norm": 0.2366773635149002, "learning_rate": 8.09639889196676e-06, "loss": 0.0015, "step": 41750 }, { "epoch": 19.35, "grad_norm": 0.5561370253562927, "learning_rate": 8.084903047091414e-06, "loss": 0.0019, "step": 42000 }, { "epoch": 19.35, "eval_loss": 0.26248979568481445, "eval_runtime": 678.0519, "eval_samples_per_second": 9.958, "eval_steps_per_second": 0.311, "eval_wer": 24.596862169506316, "step": 42000 }, { "epoch": 19.46, "grad_norm": 0.6396375894546509, "learning_rate": 8.073361034164359e-06, "loss": 0.0019, "step": 42250 }, { "epoch": 19.58, "grad_norm": 0.4240398406982422, "learning_rate": 8.061819021237305e-06, "loss": 0.0021, "step": 42500 }, { "epoch": 19.69, "grad_norm": 2.101404905319214, "learning_rate": 8.05027700831025e-06, "loss": 0.002, "step": 42750 }, { "epoch": 19.81, "grad_norm": 0.4409444034099579, "learning_rate": 8.038734995383195e-06, "loss": 0.0021, "step": 43000 }, { "epoch": 19.81, "eval_loss": 0.26329436898231506, "eval_runtime": 674.3068, "eval_samples_per_second": 10.013, "eval_steps_per_second": 0.313, "eval_wer": 24.102981137314792, "step": 43000 }, { "epoch": 19.92, "grad_norm": 1.5778237581253052, "learning_rate": 8.027192982456141e-06, "loss": 0.0021, "step": 43250 }, { "epoch": 20.04, "grad_norm": 0.34445250034332275, "learning_rate": 8.015650969529087e-06, "loss": 0.002, "step": 43500 }, { "epoch": 20.15, "grad_norm": 0.8328190445899963, "learning_rate": 8.004108956602033e-06, "loss": 0.0015, "step": 43750 }, { "epoch": 20.27, "grad_norm": 1.4873714447021484, "learning_rate": 7.992566943674977e-06, "loss": 0.0015, "step": 44000 }, { "epoch": 20.27, "eval_loss": 0.2650669813156128, "eval_runtime": 678.5481, "eval_samples_per_second": 9.951, "eval_steps_per_second": 0.311, "eval_wer": 24.23983973659678, "step": 44000 }, { "epoch": 20.38, "grad_norm": 2.7649030685424805, "learning_rate": 7.981024930747923e-06, "loss": 0.0016, "step": 44250 }, { "epoch": 20.5, "grad_norm": 0.20013980567455292, "learning_rate": 7.969529085872578e-06, "loss": 0.0019, "step": 44500 }, { "epoch": 20.61, "grad_norm": 1.8381603956222534, "learning_rate": 7.957987072945522e-06, "loss": 0.0021, "step": 44750 }, { "epoch": 20.73, "grad_norm": 1.9389904737472534, "learning_rate": 7.946445060018468e-06, "loss": 0.0018, "step": 45000 }, { "epoch": 20.73, "eval_loss": 0.26352566480636597, "eval_runtime": 684.4278, "eval_samples_per_second": 9.865, "eval_steps_per_second": 0.308, "eval_wer": 24.100997679354187, "step": 45000 }, { "epoch": 20.84, "grad_norm": 0.1582639217376709, "learning_rate": 7.934903047091414e-06, "loss": 0.0022, "step": 45250 }, { "epoch": 20.96, "grad_norm": 1.1058118343353271, "learning_rate": 7.923361034164358e-06, "loss": 0.0021, "step": 45500 }, { "epoch": 21.07, "grad_norm": 0.4075948894023895, "learning_rate": 7.911819021237304e-06, "loss": 0.0018, "step": 45750 }, { "epoch": 21.19, "grad_norm": 0.894478440284729, "learning_rate": 7.90027700831025e-06, "loss": 0.0013, "step": 46000 }, { "epoch": 21.19, "eval_loss": 0.2585604190826416, "eval_runtime": 674.5545, "eval_samples_per_second": 10.01, "eval_steps_per_second": 0.313, "eval_wer": 23.849098518356904, "step": 46000 }, { "epoch": 21.3, "grad_norm": 0.8086264729499817, "learning_rate": 7.888734995383196e-06, "loss": 0.0016, "step": 46250 }, { "epoch": 21.42, "grad_norm": 0.5767725706100464, "learning_rate": 7.87719298245614e-06, "loss": 0.0016, "step": 46500 }, { "epoch": 21.53, "grad_norm": 1.622611403465271, "learning_rate": 7.865650969529087e-06, "loss": 0.0019, "step": 46750 }, { "epoch": 21.65, "grad_norm": 2.0076467990875244, "learning_rate": 7.854108956602033e-06, "loss": 0.0018, "step": 47000 }, { "epoch": 21.65, "eval_loss": 0.2612689435482025, "eval_runtime": 680.9382, "eval_samples_per_second": 9.916, "eval_steps_per_second": 0.31, "eval_wer": 23.940337584544896, "step": 47000 }, { "epoch": 21.76, "grad_norm": 0.3226953446865082, "learning_rate": 7.842566943674977e-06, "loss": 0.0018, "step": 47250 }, { "epoch": 21.88, "grad_norm": 0.5793449878692627, "learning_rate": 7.831024930747923e-06, "loss": 0.0019, "step": 47500 }, { "epoch": 21.99, "grad_norm": 0.3840419352054596, "learning_rate": 7.819482917820869e-06, "loss": 0.0019, "step": 47750 }, { "epoch": 22.11, "grad_norm": 2.221217155456543, "learning_rate": 7.807940904893813e-06, "loss": 0.0014, "step": 48000 }, { "epoch": 22.11, "eval_loss": 0.26184049248695374, "eval_runtime": 679.8585, "eval_samples_per_second": 9.931, "eval_steps_per_second": 0.31, "eval_wer": 23.591248983477794, "step": 48000 }, { "epoch": 22.22, "grad_norm": 0.1267741620540619, "learning_rate": 7.79639889196676e-06, "loss": 0.0013, "step": 48250 }, { "epoch": 22.34, "grad_norm": 0.3871385157108307, "learning_rate": 7.784856879039705e-06, "loss": 0.0016, "step": 48500 }, { "epoch": 22.46, "grad_norm": 0.319933146238327, "learning_rate": 7.773361034164358e-06, "loss": 0.0016, "step": 48750 }, { "epoch": 22.57, "grad_norm": 0.21517297625541687, "learning_rate": 7.761819021237304e-06, "loss": 0.0017, "step": 49000 }, { "epoch": 22.57, "eval_loss": 0.2654561698436737, "eval_runtime": 676.7053, "eval_samples_per_second": 9.978, "eval_steps_per_second": 0.312, "eval_wer": 23.755875994208303, "step": 49000 }, { "epoch": 22.69, "grad_norm": 0.17343254387378693, "learning_rate": 7.75027700831025e-06, "loss": 0.0017, "step": 49250 }, { "epoch": 22.8, "grad_norm": 0.31837859749794006, "learning_rate": 7.738734995383196e-06, "loss": 0.0015, "step": 49500 }, { "epoch": 22.92, "grad_norm": 1.0666159391403198, "learning_rate": 7.72719298245614e-06, "loss": 0.0015, "step": 49750 }, { "epoch": 23.03, "grad_norm": 0.5933089852333069, "learning_rate": 7.715650969529086e-06, "loss": 0.0016, "step": 50000 }, { "epoch": 23.03, "eval_loss": 0.2641240656375885, "eval_runtime": 701.7818, "eval_samples_per_second": 9.621, "eval_steps_per_second": 0.301, "eval_wer": 23.57141440387171, "step": 50000 }, { "epoch": 23.15, "grad_norm": 4.56223201751709, "learning_rate": 7.704108956602032e-06, "loss": 0.0012, "step": 50250 }, { "epoch": 23.26, "grad_norm": 0.13189882040023804, "learning_rate": 7.692566943674977e-06, "loss": 0.0013, "step": 50500 }, { "epoch": 23.38, "grad_norm": 0.3501899242401123, "learning_rate": 7.681024930747923e-06, "loss": 0.0014, "step": 50750 }, { "epoch": 23.49, "grad_norm": 0.3943934738636017, "learning_rate": 7.669529085872577e-06, "loss": 0.0014, "step": 51000 }, { "epoch": 23.49, "eval_loss": 0.26596611738204956, "eval_runtime": 667.3547, "eval_samples_per_second": 10.118, "eval_steps_per_second": 0.316, "eval_wer": 23.60910010512327, "step": 51000 }, { "epoch": 23.61, "grad_norm": 0.6058038473129272, "learning_rate": 7.657987072945522e-06, "loss": 0.0014, "step": 51250 }, { "epoch": 23.72, "grad_norm": 0.18997740745544434, "learning_rate": 7.646445060018468e-06, "loss": 0.0018, "step": 51500 }, { "epoch": 23.84, "grad_norm": 0.4755234122276306, "learning_rate": 7.634903047091414e-06, "loss": 0.0017, "step": 51750 }, { "epoch": 23.95, "grad_norm": 0.6140190362930298, "learning_rate": 7.62336103416436e-06, "loss": 0.0018, "step": 52000 }, { "epoch": 23.95, "eval_loss": 0.2636994421482086, "eval_runtime": 678.6943, "eval_samples_per_second": 9.949, "eval_steps_per_second": 0.311, "eval_wer": 23.870916555923596, "step": 52000 }, { "epoch": 24.07, "grad_norm": 0.6709560751914978, "learning_rate": 7.611819021237305e-06, "loss": 0.0013, "step": 52250 }, { "epoch": 24.18, "grad_norm": 0.40510040521621704, "learning_rate": 7.60027700831025e-06, "loss": 0.0011, "step": 52500 }, { "epoch": 24.3, "grad_norm": 0.464121550321579, "learning_rate": 7.588734995383196e-06, "loss": 0.0011, "step": 52750 }, { "epoch": 24.41, "grad_norm": 0.415995329618454, "learning_rate": 7.577192982456141e-06, "loss": 0.0012, "step": 53000 }, { "epoch": 24.41, "eval_loss": 0.2662787139415741, "eval_runtime": 679.0891, "eval_samples_per_second": 9.943, "eval_steps_per_second": 0.311, "eval_wer": 23.48017533768372, "step": 53000 }, { "epoch": 24.53, "grad_norm": 0.6342004537582397, "learning_rate": 7.565650969529087e-06, "loss": 0.0014, "step": 53250 }, { "epoch": 24.64, "grad_norm": 0.36562052369117737, "learning_rate": 7.554155124653741e-06, "loss": 0.0014, "step": 53500 }, { "epoch": 24.76, "grad_norm": 0.47582271695137024, "learning_rate": 7.542613111726685e-06, "loss": 0.0015, "step": 53750 }, { "epoch": 24.87, "grad_norm": 0.7419930696487427, "learning_rate": 7.531071098799632e-06, "loss": 0.0015, "step": 54000 }, { "epoch": 24.87, "eval_loss": 0.2703973352909088, "eval_runtime": 665.4932, "eval_samples_per_second": 10.146, "eval_steps_per_second": 0.317, "eval_wer": 23.75190907828709, "step": 54000 }, { "epoch": 24.99, "grad_norm": 0.7274812459945679, "learning_rate": 7.519529085872577e-06, "loss": 0.0016, "step": 54250 }, { "epoch": 25.1, "grad_norm": 1.2647254467010498, "learning_rate": 7.507987072945521e-06, "loss": 0.0012, "step": 54500 }, { "epoch": 25.22, "grad_norm": 0.27594470977783203, "learning_rate": 7.496445060018468e-06, "loss": 0.0011, "step": 54750 }, { "epoch": 25.33, "grad_norm": 0.14190466701984406, "learning_rate": 7.484903047091413e-06, "loss": 0.0012, "step": 55000 }, { "epoch": 25.33, "eval_loss": 0.2656785249710083, "eval_runtime": 668.1719, "eval_samples_per_second": 10.105, "eval_steps_per_second": 0.316, "eval_wer": 24.233889362714958, "step": 55000 }, { "epoch": 25.45, "grad_norm": 0.559374213218689, "learning_rate": 7.473361034164359e-06, "loss": 0.0013, "step": 55250 }, { "epoch": 25.56, "grad_norm": 3.8242385387420654, "learning_rate": 7.461865189289012e-06, "loss": 0.0012, "step": 55500 }, { "epoch": 25.68, "grad_norm": 0.23001307249069214, "learning_rate": 7.450323176361957e-06, "loss": 0.0014, "step": 55750 }, { "epoch": 25.79, "grad_norm": 0.45375123620033264, "learning_rate": 7.438781163434904e-06, "loss": 0.0013, "step": 56000 }, { "epoch": 25.79, "eval_loss": 0.2668148875236511, "eval_runtime": 675.9749, "eval_samples_per_second": 9.989, "eval_steps_per_second": 0.312, "eval_wer": 23.287779915504693, "step": 56000 }, { "epoch": 25.91, "grad_norm": 0.11875366419553757, "learning_rate": 7.427239150507849e-06, "loss": 0.0014, "step": 56250 }, { "epoch": 26.02, "grad_norm": 0.21367508172988892, "learning_rate": 7.415697137580795e-06, "loss": 0.0011, "step": 56500 }, { "epoch": 26.14, "grad_norm": 0.2883310317993164, "learning_rate": 7.4041551246537405e-06, "loss": 0.001, "step": 56750 }, { "epoch": 26.26, "grad_norm": 0.4850456118583679, "learning_rate": 7.392613111726686e-06, "loss": 0.001, "step": 57000 }, { "epoch": 26.26, "eval_loss": 0.26939988136291504, "eval_runtime": 668.5911, "eval_samples_per_second": 10.099, "eval_steps_per_second": 0.316, "eval_wer": 23.285796457544084, "step": 57000 }, { "epoch": 26.37, "grad_norm": 0.21186549961566925, "learning_rate": 7.381071098799632e-06, "loss": 0.0014, "step": 57250 }, { "epoch": 26.49, "grad_norm": 1.3765850067138672, "learning_rate": 7.369529085872577e-06, "loss": 0.0014, "step": 57500 }, { "epoch": 26.6, "grad_norm": 1.716868281364441, "learning_rate": 7.358033240997231e-06, "loss": 0.0012, "step": 57750 }, { "epoch": 26.72, "grad_norm": 1.3002432584762573, "learning_rate": 7.3464912280701765e-06, "loss": 0.0013, "step": 58000 }, { "epoch": 26.72, "eval_loss": 0.2650892734527588, "eval_runtime": 675.4132, "eval_samples_per_second": 9.997, "eval_steps_per_second": 0.312, "eval_wer": 23.279846083662257, "step": 58000 }, { "epoch": 26.83, "grad_norm": 0.3222731053829193, "learning_rate": 7.334949215143121e-06, "loss": 0.0013, "step": 58250 }, { "epoch": 26.95, "grad_norm": 0.376442015171051, "learning_rate": 7.323407202216068e-06, "loss": 0.0013, "step": 58500 }, { "epoch": 27.06, "grad_norm": 0.1525341123342514, "learning_rate": 7.311865189289013e-06, "loss": 0.001, "step": 58750 }, { "epoch": 27.18, "grad_norm": 0.3236662745475769, "learning_rate": 7.300323176361959e-06, "loss": 0.0009, "step": 59000 }, { "epoch": 27.18, "eval_loss": 0.26985055208206177, "eval_runtime": 680.6209, "eval_samples_per_second": 9.92, "eval_steps_per_second": 0.31, "eval_wer": 23.258028046095564, "step": 59000 }, { "epoch": 27.29, "grad_norm": 0.12151502072811127, "learning_rate": 7.288827331486612e-06, "loss": 0.0012, "step": 59250 }, { "epoch": 27.41, "grad_norm": 1.5825998783111572, "learning_rate": 7.277285318559557e-06, "loss": 0.0012, "step": 59500 }, { "epoch": 27.52, "grad_norm": 0.1484094262123108, "learning_rate": 7.265743305632504e-06, "loss": 0.0012, "step": 59750 }, { "epoch": 27.64, "grad_norm": 0.6981366872787476, "learning_rate": 7.254201292705448e-06, "loss": 0.001, "step": 60000 }, { "epoch": 27.64, "eval_loss": 0.2713184356689453, "eval_runtime": 670.5479, "eval_samples_per_second": 10.069, "eval_steps_per_second": 0.315, "eval_wer": 23.236210008528868, "step": 60000 }, { "epoch": 27.75, "grad_norm": 0.1501929610967636, "learning_rate": 7.242659279778393e-06, "loss": 0.0011, "step": 60250 }, { "epoch": 27.87, "grad_norm": 3.85675048828125, "learning_rate": 7.23111726685134e-06, "loss": 0.0014, "step": 60500 }, { "epoch": 27.98, "grad_norm": 0.3616078197956085, "learning_rate": 7.219575253924285e-06, "loss": 0.0013, "step": 60750 }, { "epoch": 28.1, "grad_norm": 0.20366336405277252, "learning_rate": 7.208033240997231e-06, "loss": 0.0011, "step": 61000 }, { "epoch": 28.1, "eval_loss": 0.2708372175693512, "eval_runtime": 673.0742, "eval_samples_per_second": 10.032, "eval_steps_per_second": 0.313, "eval_wer": 23.390919729456332, "step": 61000 }, { "epoch": 28.21, "grad_norm": 0.40390634536743164, "learning_rate": 7.196491228070176e-06, "loss": 0.001, "step": 61250 }, { "epoch": 28.33, "grad_norm": 1.1840142011642456, "learning_rate": 7.184949215143121e-06, "loss": 0.0009, "step": 61500 }, { "epoch": 28.44, "grad_norm": 1.5266140699386597, "learning_rate": 7.173407202216067e-06, "loss": 0.0009, "step": 61750 }, { "epoch": 28.56, "grad_norm": 0.1660241335630417, "learning_rate": 7.1618651892890125e-06, "loss": 0.0012, "step": 62000 }, { "epoch": 28.56, "eval_loss": 0.27276286482810974, "eval_runtime": 683.6949, "eval_samples_per_second": 9.876, "eval_steps_per_second": 0.309, "eval_wer": 22.956542436083065, "step": 62000 }, { "epoch": 28.67, "grad_norm": 1.5870364904403687, "learning_rate": 7.1503231763619585e-06, "loss": 0.0011, "step": 62250 }, { "epoch": 28.79, "grad_norm": 0.21234387159347534, "learning_rate": 7.138781163434904e-06, "loss": 0.0014, "step": 62500 }, { "epoch": 28.9, "grad_norm": 0.25426217913627625, "learning_rate": 7.127239150507849e-06, "loss": 0.0012, "step": 62750 }, { "epoch": 29.02, "grad_norm": 0.1948922723531723, "learning_rate": 7.115697137580795e-06, "loss": 0.0013, "step": 63000 }, { "epoch": 29.02, "eval_loss": 0.27328047156333923, "eval_runtime": 662.6991, "eval_samples_per_second": 10.189, "eval_steps_per_second": 0.318, "eval_wer": 22.79389888331317, "step": 63000 }, { "epoch": 29.13, "grad_norm": 0.1855873465538025, "learning_rate": 7.10415512465374e-06, "loss": 0.0008, "step": 63250 }, { "epoch": 29.25, "grad_norm": 1.3260753154754639, "learning_rate": 7.092613111726686e-06, "loss": 0.0007, "step": 63500 }, { "epoch": 29.36, "grad_norm": 0.13366416096687317, "learning_rate": 7.08111726685134e-06, "loss": 0.0009, "step": 63750 }, { "epoch": 29.48, "grad_norm": 0.8121051788330078, "learning_rate": 7.069575253924285e-06, "loss": 0.0009, "step": 64000 }, { "epoch": 29.48, "eval_loss": 0.272777795791626, "eval_runtime": 672.0892, "eval_samples_per_second": 10.046, "eval_steps_per_second": 0.314, "eval_wer": 22.861336453973855, "step": 64000 }, { "epoch": 29.59, "grad_norm": 0.20612682402133942, "learning_rate": 7.058033240997231e-06, "loss": 0.0011, "step": 64250 }, { "epoch": 29.71, "grad_norm": 0.37823590636253357, "learning_rate": 7.046491228070176e-06, "loss": 0.0011, "step": 64500 }, { "epoch": 29.82, "grad_norm": 2.2586910724639893, "learning_rate": 7.034949215143122e-06, "loss": 0.0011, "step": 64750 }, { "epoch": 29.94, "grad_norm": 0.2618952989578247, "learning_rate": 7.023407202216067e-06, "loss": 0.001, "step": 65000 }, { "epoch": 29.94, "eval_loss": 0.27243489027023315, "eval_runtime": 683.4018, "eval_samples_per_second": 9.88, "eval_steps_per_second": 0.309, "eval_wer": 22.87522065969812, "step": 65000 }, { "epoch": 30.06, "grad_norm": 0.1083679348230362, "learning_rate": 7.011865189289012e-06, "loss": 0.0009, "step": 65250 }, { "epoch": 30.17, "grad_norm": 0.12795807421207428, "learning_rate": 7.000323176361958e-06, "loss": 0.0006, "step": 65500 }, { "epoch": 30.29, "grad_norm": 1.4113242626190186, "learning_rate": 6.988781163434903e-06, "loss": 0.0005, "step": 65750 }, { "epoch": 30.4, "grad_norm": 0.4899054765701294, "learning_rate": 6.977285318559557e-06, "loss": 0.0009, "step": 66000 }, { "epoch": 30.4, "eval_loss": 0.2714119553565979, "eval_runtime": 679.0994, "eval_samples_per_second": 9.943, "eval_steps_per_second": 0.311, "eval_wer": 23.03588075450741, "step": 66000 }, { "epoch": 30.52, "grad_norm": 0.13397559523582458, "learning_rate": 6.965743305632503e-06, "loss": 0.0008, "step": 66250 }, { "epoch": 30.63, "grad_norm": 0.22414207458496094, "learning_rate": 6.954201292705448e-06, "loss": 0.0008, "step": 66500 }, { "epoch": 30.75, "grad_norm": 0.2454010248184204, "learning_rate": 6.942659279778394e-06, "loss": 0.0011, "step": 66750 }, { "epoch": 30.86, "grad_norm": 0.5344116687774658, "learning_rate": 6.9311172668513394e-06, "loss": 0.0014, "step": 67000 }, { "epoch": 30.86, "eval_loss": 0.27881717681884766, "eval_runtime": 673.8635, "eval_samples_per_second": 10.02, "eval_steps_per_second": 0.313, "eval_wer": 23.321498700835036, "step": 67000 }, { "epoch": 30.98, "grad_norm": 1.2090712785720825, "learning_rate": 6.919575253924285e-06, "loss": 0.0012, "step": 67250 }, { "epoch": 31.09, "grad_norm": 0.17009182274341583, "learning_rate": 6.908033240997231e-06, "loss": 0.0009, "step": 67500 }, { "epoch": 31.21, "grad_norm": 0.47179415822029114, "learning_rate": 6.896491228070176e-06, "loss": 0.0007, "step": 67750 }, { "epoch": 31.32, "grad_norm": 0.2590140402317047, "learning_rate": 6.884949215143122e-06, "loss": 0.0007, "step": 68000 }, { "epoch": 31.32, "eval_loss": 0.278424471616745, "eval_runtime": 660.7801, "eval_samples_per_second": 10.218, "eval_steps_per_second": 0.319, "eval_wer": 23.246127298331913, "step": 68000 }, { "epoch": 31.44, "grad_norm": 0.6639719009399414, "learning_rate": 6.873407202216067e-06, "loss": 0.0009, "step": 68250 }, { "epoch": 31.55, "grad_norm": 0.8088191151618958, "learning_rate": 6.861911357340721e-06, "loss": 0.001, "step": 68500 }, { "epoch": 31.67, "grad_norm": 0.9694509506225586, "learning_rate": 6.850369344413667e-06, "loss": 0.001, "step": 68750 }, { "epoch": 31.78, "grad_norm": 1.2024418115615845, "learning_rate": 6.838827331486612e-06, "loss": 0.0009, "step": 69000 }, { "epoch": 31.78, "eval_loss": 0.27510857582092285, "eval_runtime": 673.7472, "eval_samples_per_second": 10.022, "eval_steps_per_second": 0.313, "eval_wer": 23.137037110498444, "step": 69000 }, { "epoch": 31.9, "grad_norm": 0.2124684602022171, "learning_rate": 6.827285318559558e-06, "loss": 0.0012, "step": 69250 }, { "epoch": 32.01, "grad_norm": 0.1560162901878357, "learning_rate": 6.815743305632503e-06, "loss": 0.001, "step": 69500 }, { "epoch": 32.13, "grad_norm": 0.1794072687625885, "learning_rate": 6.804201292705448e-06, "loss": 0.0006, "step": 69750 }, { "epoch": 32.24, "grad_norm": 0.2194598764181137, "learning_rate": 6.792659279778394e-06, "loss": 0.0005, "step": 70000 }, { "epoch": 32.24, "eval_loss": 0.27563953399658203, "eval_runtime": 687.9789, "eval_samples_per_second": 9.814, "eval_steps_per_second": 0.307, "eval_wer": 22.787948509431345, "step": 70000 }, { "epoch": 32.36, "grad_norm": 0.11972519010305405, "learning_rate": 6.781117266851339e-06, "loss": 0.0007, "step": 70250 }, { "epoch": 32.47, "grad_norm": 1.2118364572525024, "learning_rate": 6.769575253924285e-06, "loss": 0.0008, "step": 70500 }, { "epoch": 32.59, "grad_norm": 0.159651979804039, "learning_rate": 6.75803324099723e-06, "loss": 0.0009, "step": 70750 }, { "epoch": 32.7, "grad_norm": 1.5151838064193726, "learning_rate": 6.7464912280701755e-06, "loss": 0.0009, "step": 71000 }, { "epoch": 32.7, "eval_loss": 0.27915722131729126, "eval_runtime": 689.7748, "eval_samples_per_second": 9.789, "eval_steps_per_second": 0.306, "eval_wer": 22.797865799234383, "step": 71000 }, { "epoch": 32.82, "grad_norm": 0.10878114402294159, "learning_rate": 6.73499538319483e-06, "loss": 0.0008, "step": 71250 }, { "epoch": 32.93, "grad_norm": 0.13962584733963013, "learning_rate": 6.723453370267775e-06, "loss": 0.001, "step": 71500 }, { "epoch": 33.05, "grad_norm": 2.416551113128662, "learning_rate": 6.711911357340721e-06, "loss": 0.0009, "step": 71750 }, { "epoch": 33.16, "grad_norm": 0.14477728307247162, "learning_rate": 6.700369344413666e-06, "loss": 0.0007, "step": 72000 }, { "epoch": 33.16, "eval_loss": 0.2731185853481293, "eval_runtime": 673.8757, "eval_samples_per_second": 10.02, "eval_steps_per_second": 0.313, "eval_wer": 23.30364757918956, "step": 72000 }, { "epoch": 33.28, "grad_norm": 0.0625206008553505, "learning_rate": 6.6888273314866115e-06, "loss": 0.0007, "step": 72250 }, { "epoch": 33.39, "grad_norm": 0.1424214392900467, "learning_rate": 6.6772853185595575e-06, "loss": 0.0008, "step": 72500 }, { "epoch": 33.51, "grad_norm": 0.3345101773738861, "learning_rate": 6.665743305632503e-06, "loss": 0.0008, "step": 72750 }, { "epoch": 33.63, "grad_norm": 1.2112958431243896, "learning_rate": 6.654201292705449e-06, "loss": 0.0009, "step": 73000 }, { "epoch": 33.63, "eval_loss": 0.2806909680366516, "eval_runtime": 699.4048, "eval_samples_per_second": 9.654, "eval_steps_per_second": 0.302, "eval_wer": 22.60348691909475, "step": 73000 }, { "epoch": 33.74, "grad_norm": 0.6432926058769226, "learning_rate": 6.642659279778394e-06, "loss": 0.001, "step": 73250 }, { "epoch": 33.86, "grad_norm": 0.5472640991210938, "learning_rate": 6.6311634349030475e-06, "loss": 0.0008, "step": 73500 }, { "epoch": 33.97, "grad_norm": 0.05136106163263321, "learning_rate": 6.6196214219759935e-06, "loss": 0.0008, "step": 73750 }, { "epoch": 34.09, "grad_norm": 0.16334278881549835, "learning_rate": 6.608079409048939e-06, "loss": 0.0008, "step": 74000 }, { "epoch": 34.09, "eval_loss": 0.2772423326969147, "eval_runtime": 671.9826, "eval_samples_per_second": 10.048, "eval_steps_per_second": 0.314, "eval_wer": 22.41307495487633, "step": 74000 }, { "epoch": 34.2, "grad_norm": 0.17405687272548676, "learning_rate": 6.596537396121884e-06, "loss": 0.0008, "step": 74250 }, { "epoch": 34.32, "grad_norm": 1.0651663541793823, "learning_rate": 6.58499538319483e-06, "loss": 0.0008, "step": 74500 }, { "epoch": 34.43, "grad_norm": 0.22232329845428467, "learning_rate": 6.573453370267775e-06, "loss": 0.0007, "step": 74750 }, { "epoch": 34.55, "grad_norm": 0.10098864883184433, "learning_rate": 6.561911357340721e-06, "loss": 0.0007, "step": 75000 }, { "epoch": 34.55, "eval_loss": 0.2794438600540161, "eval_runtime": 681.119, "eval_samples_per_second": 9.913, "eval_steps_per_second": 0.31, "eval_wer": 22.53604934843406, "step": 75000 }, { "epoch": 34.66, "grad_norm": 0.09176724404096603, "learning_rate": 6.550369344413666e-06, "loss": 0.0008, "step": 75250 }, { "epoch": 34.78, "grad_norm": 0.3179700970649719, "learning_rate": 6.538827331486611e-06, "loss": 0.0008, "step": 75500 }, { "epoch": 34.89, "grad_norm": 0.38459789752960205, "learning_rate": 6.527285318559557e-06, "loss": 0.0008, "step": 75750 }, { "epoch": 35.01, "grad_norm": 0.2603273391723633, "learning_rate": 6.515743305632502e-06, "loss": 0.0008, "step": 76000 }, { "epoch": 35.01, "eval_loss": 0.2777673900127411, "eval_runtime": 692.7589, "eval_samples_per_second": 9.747, "eval_steps_per_second": 0.305, "eval_wer": 22.811750004958643, "step": 76000 }, { "epoch": 35.12, "grad_norm": 0.051916543394327164, "learning_rate": 6.504247460757157e-06, "loss": 0.0007, "step": 76250 }, { "epoch": 35.24, "grad_norm": 0.30884623527526855, "learning_rate": 6.492705447830102e-06, "loss": 0.0006, "step": 76500 }, { "epoch": 35.35, "grad_norm": 0.1257990300655365, "learning_rate": 6.481163434903047e-06, "loss": 0.0007, "step": 76750 }, { "epoch": 35.47, "grad_norm": 0.08370446413755417, "learning_rate": 6.469621421975993e-06, "loss": 0.0008, "step": 77000 }, { "epoch": 35.47, "eval_loss": 0.2764694094657898, "eval_runtime": 692.6631, "eval_samples_per_second": 9.748, "eval_steps_per_second": 0.305, "eval_wer": 22.82960112660412, "step": 77000 }, { "epoch": 35.58, "grad_norm": 0.16313733160495758, "learning_rate": 6.458079409048938e-06, "loss": 0.0007, "step": 77250 }, { "epoch": 35.7, "grad_norm": 1.0557291507720947, "learning_rate": 6.446537396121884e-06, "loss": 0.0007, "step": 77500 }, { "epoch": 35.81, "grad_norm": 0.2264009267091751, "learning_rate": 6.4349953831948295e-06, "loss": 0.0008, "step": 77750 }, { "epoch": 35.93, "grad_norm": 0.2705702781677246, "learning_rate": 6.423453370267775e-06, "loss": 0.0009, "step": 78000 }, { "epoch": 35.93, "eval_loss": 0.27600711584091187, "eval_runtime": 689.6167, "eval_samples_per_second": 9.791, "eval_steps_per_second": 0.306, "eval_wer": 22.551917012118928, "step": 78000 }, { "epoch": 36.04, "grad_norm": 0.2169518917798996, "learning_rate": 6.411911357340721e-06, "loss": 0.0006, "step": 78250 }, { "epoch": 36.16, "grad_norm": 0.19748559594154358, "learning_rate": 6.400415512465374e-06, "loss": 0.0006, "step": 78500 }, { "epoch": 36.27, "grad_norm": 1.7767668962478638, "learning_rate": 6.3888734995383196e-06, "loss": 0.0006, "step": 78750 }, { "epoch": 36.39, "grad_norm": 0.2516990303993225, "learning_rate": 6.3773314866112655e-06, "loss": 0.0005, "step": 79000 }, { "epoch": 36.39, "eval_loss": 0.2752860188484192, "eval_runtime": 664.3231, "eval_samples_per_second": 10.164, "eval_steps_per_second": 0.318, "eval_wer": 22.64315607830692, "step": 79000 }, { "epoch": 36.5, "grad_norm": 0.05742982402443886, "learning_rate": 6.365789473684211e-06, "loss": 0.0005, "step": 79250 }, { "epoch": 36.62, "grad_norm": 5.542628765106201, "learning_rate": 6.354247460757157e-06, "loss": 0.0009, "step": 79500 }, { "epoch": 36.73, "grad_norm": 0.12612101435661316, "learning_rate": 6.342705447830102e-06, "loss": 0.0009, "step": 79750 }, { "epoch": 36.85, "grad_norm": 1.6482515335083008, "learning_rate": 6.331163434903047e-06, "loss": 0.0007, "step": 80000 }, { "epoch": 36.85, "eval_loss": 0.2798755466938019, "eval_runtime": 679.0069, "eval_samples_per_second": 9.944, "eval_steps_per_second": 0.311, "eval_wer": 22.450760656127894, "step": 80000 }, { "epoch": 36.96, "grad_norm": 0.1331368237733841, "learning_rate": 6.319621421975993e-06, "loss": 0.0007, "step": 80250 }, { "epoch": 37.08, "grad_norm": 0.097502700984478, "learning_rate": 6.308079409048938e-06, "loss": 0.0006, "step": 80500 }, { "epoch": 37.19, "grad_norm": 0.05282368138432503, "learning_rate": 6.296537396121884e-06, "loss": 0.0005, "step": 80750 }, { "epoch": 37.31, "grad_norm": 0.7441471815109253, "learning_rate": 6.284995383194829e-06, "loss": 0.0006, "step": 81000 }, { "epoch": 37.31, "eval_loss": 0.2776803970336914, "eval_runtime": 685.1247, "eval_samples_per_second": 9.855, "eval_steps_per_second": 0.308, "eval_wer": 22.208778784933653, "step": 81000 }, { "epoch": 37.43, "grad_norm": 0.12682919204235077, "learning_rate": 6.273499538319483e-06, "loss": 0.0006, "step": 81250 }, { "epoch": 37.54, "grad_norm": 0.14379066228866577, "learning_rate": 6.261957525392429e-06, "loss": 0.0006, "step": 81500 }, { "epoch": 37.66, "grad_norm": 0.23371708393096924, "learning_rate": 6.250415512465374e-06, "loss": 0.0006, "step": 81750 }, { "epoch": 37.77, "grad_norm": 0.21299830079078674, "learning_rate": 6.238919667590029e-06, "loss": 0.0008, "step": 82000 }, { "epoch": 37.77, "eval_loss": 0.27769771218299866, "eval_runtime": 686.8168, "eval_samples_per_second": 9.831, "eval_steps_per_second": 0.307, "eval_wer": 22.746295892258566, "step": 82000 }, { "epoch": 37.89, "grad_norm": 0.1677282303571701, "learning_rate": 6.227377654662974e-06, "loss": 0.0008, "step": 82250 }, { "epoch": 38.0, "grad_norm": 1.1451334953308105, "learning_rate": 6.215835641735919e-06, "loss": 0.0009, "step": 82500 }, { "epoch": 38.12, "grad_norm": 0.19697508215904236, "learning_rate": 6.204293628808865e-06, "loss": 0.0006, "step": 82750 }, { "epoch": 38.23, "grad_norm": 0.07527792453765869, "learning_rate": 6.19275161588181e-06, "loss": 0.0007, "step": 83000 }, { "epoch": 38.23, "eval_loss": 0.2804949879646301, "eval_runtime": 675.5771, "eval_samples_per_second": 9.994, "eval_steps_per_second": 0.312, "eval_wer": 22.71257710692822, "step": 83000 }, { "epoch": 38.35, "grad_norm": 1.9222028255462646, "learning_rate": 6.181209602954756e-06, "loss": 0.0007, "step": 83250 }, { "epoch": 38.46, "grad_norm": 0.09556487202644348, "learning_rate": 6.169667590027701e-06, "loss": 0.0007, "step": 83500 }, { "epoch": 38.58, "grad_norm": 0.21437525749206543, "learning_rate": 6.1581255771006465e-06, "loss": 0.0006, "step": 83750 }, { "epoch": 38.69, "grad_norm": 0.1807592660188675, "learning_rate": 6.1465835641735925e-06, "loss": 0.0007, "step": 84000 }, { "epoch": 38.69, "eval_loss": 0.2847980260848999, "eval_runtime": 672.9632, "eval_samples_per_second": 10.033, "eval_steps_per_second": 0.314, "eval_wer": 22.424975702639983, "step": 84000 }, { "epoch": 38.81, "grad_norm": 0.16911369562149048, "learning_rate": 6.135041551246538e-06, "loss": 0.0007, "step": 84250 }, { "epoch": 38.92, "grad_norm": 1.3031611442565918, "learning_rate": 6.123545706371191e-06, "loss": 0.0008, "step": 84500 }, { "epoch": 39.04, "grad_norm": 0.04561692103743553, "learning_rate": 6.112003693444137e-06, "loss": 0.0006, "step": 84750 }, { "epoch": 39.15, "grad_norm": 0.061062462627887726, "learning_rate": 6.1004616805170825e-06, "loss": 0.0003, "step": 85000 }, { "epoch": 39.15, "eval_loss": 0.2790899872779846, "eval_runtime": 674.5949, "eval_samples_per_second": 10.009, "eval_steps_per_second": 0.313, "eval_wer": 22.01439990479402, "step": 85000 }, { "epoch": 39.27, "grad_norm": 0.07837537676095963, "learning_rate": 6.0889196675900285e-06, "loss": 0.0004, "step": 85250 }, { "epoch": 39.38, "grad_norm": 0.05536266788840294, "learning_rate": 6.077377654662974e-06, "loss": 0.0006, "step": 85500 }, { "epoch": 39.5, "grad_norm": 5.097941875457764, "learning_rate": 6.065835641735919e-06, "loss": 0.0009, "step": 85750 }, { "epoch": 39.61, "grad_norm": 0.07944060117006302, "learning_rate": 6.054293628808865e-06, "loss": 0.0006, "step": 86000 }, { "epoch": 39.61, "eval_loss": 0.2777423858642578, "eval_runtime": 687.4077, "eval_samples_per_second": 9.822, "eval_steps_per_second": 0.307, "eval_wer": 22.262332149870083, "step": 86000 }, { "epoch": 39.73, "grad_norm": 1.522270917892456, "learning_rate": 6.04275161588181e-06, "loss": 0.0006, "step": 86250 }, { "epoch": 39.84, "grad_norm": 0.05595465004444122, "learning_rate": 6.031209602954756e-06, "loss": 0.0006, "step": 86500 }, { "epoch": 39.96, "grad_norm": 0.5327405333518982, "learning_rate": 6.019667590027701e-06, "loss": 0.0005, "step": 86750 }, { "epoch": 40.07, "grad_norm": 0.18009261786937714, "learning_rate": 6.008125577100646e-06, "loss": 0.0003, "step": 87000 }, { "epoch": 40.07, "eval_loss": 0.2798568308353424, "eval_runtime": 666.6019, "eval_samples_per_second": 10.129, "eval_steps_per_second": 0.317, "eval_wer": 22.0798540174941, "step": 87000 }, { "epoch": 40.19, "grad_norm": 0.15836778283119202, "learning_rate": 5.996583564173592e-06, "loss": 0.0005, "step": 87250 }, { "epoch": 40.3, "grad_norm": 0.06244779750704765, "learning_rate": 5.985041551246537e-06, "loss": 0.0005, "step": 87500 }, { "epoch": 40.42, "grad_norm": 0.47360849380493164, "learning_rate": 5.973499538319484e-06, "loss": 0.0005, "step": 87750 }, { "epoch": 40.53, "grad_norm": 0.12535277009010315, "learning_rate": 5.9619575253924285e-06, "loss": 0.0005, "step": 88000 }, { "epoch": 40.53, "eval_loss": 0.2800135612487793, "eval_runtime": 683.9129, "eval_samples_per_second": 9.873, "eval_steps_per_second": 0.309, "eval_wer": 22.23059682250035, "step": 88000 }, { "epoch": 40.65, "grad_norm": 0.06063379347324371, "learning_rate": 5.950415512465374e-06, "loss": 0.0007, "step": 88250 }, { "epoch": 40.76, "grad_norm": 0.14233584702014923, "learning_rate": 5.9388734995383205e-06, "loss": 0.0005, "step": 88500 }, { "epoch": 40.88, "grad_norm": 2.8523402214050293, "learning_rate": 5.927331486611266e-06, "loss": 0.0006, "step": 88750 }, { "epoch": 40.99, "grad_norm": 0.20620940625667572, "learning_rate": 5.915789473684212e-06, "loss": 0.0007, "step": 89000 }, { "epoch": 40.99, "eval_loss": 0.2811349332332611, "eval_runtime": 676.2894, "eval_samples_per_second": 9.984, "eval_steps_per_second": 0.312, "eval_wer": 22.298034393161036, "step": 89000 }, { "epoch": 41.11, "grad_norm": 0.5996530652046204, "learning_rate": 5.904247460757157e-06, "loss": 0.0004, "step": 89250 }, { "epoch": 41.23, "grad_norm": 0.19429056346416473, "learning_rate": 5.892705447830102e-06, "loss": 0.0004, "step": 89500 }, { "epoch": 41.34, "grad_norm": 0.07230094075202942, "learning_rate": 5.881163434903048e-06, "loss": 0.0005, "step": 89750 }, { "epoch": 41.46, "grad_norm": 1.4591439962387085, "learning_rate": 5.869621421975993e-06, "loss": 0.0004, "step": 90000 }, { "epoch": 41.46, "eval_loss": 0.2819642424583435, "eval_runtime": 679.526, "eval_samples_per_second": 9.936, "eval_steps_per_second": 0.311, "eval_wer": 22.71257710692822, "step": 90000 }, { "epoch": 41.57, "grad_norm": 0.07382282614707947, "learning_rate": 5.858079409048939e-06, "loss": 0.0005, "step": 90250 }, { "epoch": 41.69, "grad_norm": 0.9907983541488647, "learning_rate": 5.846583564173593e-06, "loss": 0.0006, "step": 90500 }, { "epoch": 41.8, "grad_norm": 0.11259205639362335, "learning_rate": 5.835041551246537e-06, "loss": 0.0006, "step": 90750 }, { "epoch": 41.92, "grad_norm": 0.06520923972129822, "learning_rate": 5.823499538319484e-06, "loss": 0.0006, "step": 91000 }, { "epoch": 41.92, "eval_loss": 0.28300294280052185, "eval_runtime": 684.3308, "eval_samples_per_second": 9.867, "eval_steps_per_second": 0.308, "eval_wer": 21.95886308189698, "step": 91000 } ], "logging_steps": 250, "max_steps": 217100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 1000, "total_flos": 1.4334909633589248e+20, "train_batch_size": 64, "trial_name": null, "trial_params": null }