diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4979 @@ +{ + "best_metric": 9.556300454211874, + "best_model_checkpoint": "./whisper-small-lv/checkpoint-134000", + "epoch": 30.861354214647626, + "eval_steps": 1000, + "global_step": 134000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06, + "grad_norm": 10.87213134765625, + "learning_rate": 4.92e-06, + "loss": 1.6097, + "step": 250 + }, + { + "epoch": 0.12, + "grad_norm": 5.319793701171875, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4717, + "step": 500 + }, + { + "epoch": 0.17, + "grad_norm": 5.155162811279297, + "learning_rate": 9.994327876412267e-06, + "loss": 0.3114, + "step": 750 + }, + { + "epoch": 0.23, + "grad_norm": 4.760919570922852, + "learning_rate": 9.9885635231727e-06, + "loss": 0.2588, + "step": 1000 + }, + { + "epoch": 0.23, + "eval_loss": 0.29197508096694946, + "eval_runtime": 858.8311, + "eval_samples_per_second": 7.862, + "eval_steps_per_second": 0.246, + "eval_wer": 31.485411666699726, + "step": 1000 + }, + { + "epoch": 0.29, + "grad_norm": 4.729026794433594, + "learning_rate": 9.982799169933133e-06, + "loss": 0.2303, + "step": 1250 + }, + { + "epoch": 0.35, + "grad_norm": 4.435351848602295, + "learning_rate": 9.977034816693568e-06, + "loss": 0.2142, + "step": 1500 + }, + { + "epoch": 0.4, + "grad_norm": 4.544332504272461, + "learning_rate": 9.971270463454001e-06, + "loss": 0.1945, + "step": 1750 + }, + { + "epoch": 0.46, + "grad_norm": 3.7481513023376465, + "learning_rate": 9.965506110214434e-06, + "loss": 0.1786, + "step": 2000 + }, + { + "epoch": 0.46, + "eval_loss": 0.2217991054058075, + "eval_runtime": 859.3081, + "eval_samples_per_second": 7.857, + "eval_steps_per_second": 0.246, + "eval_wer": 25.128428902949402, + "step": 2000 + }, + { + "epoch": 0.52, + "grad_norm": 3.2256762981414795, + "learning_rate": 9.959741756974867e-06, + "loss": 0.1682, + "step": 2250 + }, + { + "epoch": 0.58, + "grad_norm": 3.6426024436950684, + "learning_rate": 9.953977403735302e-06, + "loss": 0.1599, + "step": 2500 + }, + { + "epoch": 0.63, + "grad_norm": 4.202388286590576, + "learning_rate": 9.948213050495735e-06, + "loss": 0.1499, + "step": 2750 + }, + { + "epoch": 0.69, + "grad_norm": 4.096649169921875, + "learning_rate": 9.942448697256168e-06, + "loss": 0.1433, + "step": 3000 + }, + { + "epoch": 0.69, + "eval_loss": 0.18866823613643646, + "eval_runtime": 871.5329, + "eval_samples_per_second": 7.747, + "eval_steps_per_second": 0.242, + "eval_wer": 21.99853224110915, + "step": 3000 + }, + { + "epoch": 0.75, + "grad_norm": 3.3941469192504883, + "learning_rate": 9.936684344016601e-06, + "loss": 0.1369, + "step": 3250 + }, + { + "epoch": 0.81, + "grad_norm": 3.8051373958587646, + "learning_rate": 9.930919990777036e-06, + "loss": 0.1302, + "step": 3500 + }, + { + "epoch": 0.86, + "grad_norm": 4.107229232788086, + "learning_rate": 9.925155637537469e-06, + "loss": 0.1232, + "step": 3750 + }, + { + "epoch": 0.92, + "grad_norm": 3.520193338394165, + "learning_rate": 9.919391284297902e-06, + "loss": 0.1222, + "step": 4000 + }, + { + "epoch": 0.92, + "eval_loss": 0.16477710008621216, + "eval_runtime": 864.1221, + "eval_samples_per_second": 7.814, + "eval_steps_per_second": 0.244, + "eval_wer": 19.57871352916675, + "step": 4000 + }, + { + "epoch": 0.98, + "grad_norm": 4.162296295166016, + "learning_rate": 9.913626931058335e-06, + "loss": 0.1125, + "step": 4250 + }, + { + "epoch": 1.04, + "grad_norm": 2.3214519023895264, + "learning_rate": 9.90786257781877e-06, + "loss": 0.0832, + "step": 4500 + }, + { + "epoch": 1.09, + "grad_norm": 3.2043275833129883, + "learning_rate": 9.902098224579203e-06, + "loss": 0.0666, + "step": 4750 + }, + { + "epoch": 1.15, + "grad_norm": 2.7628753185272217, + "learning_rate": 9.896333871339636e-06, + "loss": 0.0645, + "step": 5000 + }, + { + "epoch": 1.15, + "eval_loss": 0.15404021739959717, + "eval_runtime": 868.0851, + "eval_samples_per_second": 7.778, + "eval_steps_per_second": 0.243, + "eval_wer": 18.128805759961917, + "step": 5000 + }, + { + "epoch": 1.21, + "grad_norm": 2.9250879287719727, + "learning_rate": 9.89056951810007e-06, + "loss": 0.0624, + "step": 5250 + }, + { + "epoch": 1.27, + "grad_norm": 2.2111053466796875, + "learning_rate": 9.884805164860504e-06, + "loss": 0.0653, + "step": 5500 + }, + { + "epoch": 1.32, + "grad_norm": 3.372081995010376, + "learning_rate": 9.879040811620937e-06, + "loss": 0.0625, + "step": 5750 + }, + { + "epoch": 1.38, + "grad_norm": 1.3998991250991821, + "learning_rate": 9.87327645838137e-06, + "loss": 0.06, + "step": 6000 + }, + { + "epoch": 1.38, + "eval_loss": 0.14527586102485657, + "eval_runtime": 867.9425, + "eval_samples_per_second": 7.779, + "eval_steps_per_second": 0.243, + "eval_wer": 17.113275284130353, + "step": 6000 + }, + { + "epoch": 1.44, + "grad_norm": 2.6196086406707764, + "learning_rate": 9.867512105141804e-06, + "loss": 0.0585, + "step": 6250 + }, + { + "epoch": 1.5, + "grad_norm": 2.8574118614196777, + "learning_rate": 9.861747751902238e-06, + "loss": 0.0604, + "step": 6500 + }, + { + "epoch": 1.55, + "grad_norm": 2.2665085792541504, + "learning_rate": 9.85598339866267e-06, + "loss": 0.0581, + "step": 6750 + }, + { + "epoch": 1.61, + "grad_norm": 2.5183279514312744, + "learning_rate": 9.850219045423104e-06, + "loss": 0.0561, + "step": 7000 + }, + { + "epoch": 1.61, + "eval_loss": 0.13737037777900696, + "eval_runtime": 874.2241, + "eval_samples_per_second": 7.723, + "eval_steps_per_second": 0.241, + "eval_wer": 16.181050042644348, + "step": 7000 + }, + { + "epoch": 1.67, + "grad_norm": 2.2866740226745605, + "learning_rate": 9.844454692183538e-06, + "loss": 0.0555, + "step": 7250 + }, + { + "epoch": 1.73, + "grad_norm": 3.0550711154937744, + "learning_rate": 9.838690338943971e-06, + "loss": 0.0551, + "step": 7500 + }, + { + "epoch": 1.78, + "grad_norm": 2.4893910884857178, + "learning_rate": 9.832925985704405e-06, + "loss": 0.0535, + "step": 7750 + }, + { + "epoch": 1.84, + "grad_norm": 3.216590642929077, + "learning_rate": 9.827161632464838e-06, + "loss": 0.0538, + "step": 8000 + }, + { + "epoch": 1.84, + "eval_loss": 0.12932516634464264, + "eval_runtime": 868.0214, + "eval_samples_per_second": 7.779, + "eval_steps_per_second": 0.243, + "eval_wer": 15.659400599004304, + "step": 8000 + }, + { + "epoch": 1.9, + "grad_norm": 2.0171263217926025, + "learning_rate": 9.821397279225272e-06, + "loss": 0.0512, + "step": 8250 + }, + { + "epoch": 1.96, + "grad_norm": 2.5316624641418457, + "learning_rate": 9.815655983398664e-06, + "loss": 0.0505, + "step": 8500 + }, + { + "epoch": 2.02, + "grad_norm": 1.9595179557800293, + "learning_rate": 9.809891630159097e-06, + "loss": 0.0442, + "step": 8750 + }, + { + "epoch": 2.07, + "grad_norm": 2.2406492233276367, + "learning_rate": 9.80412727691953e-06, + "loss": 0.021, + "step": 9000 + }, + { + "epoch": 2.07, + "eval_loss": 0.1289907842874527, + "eval_runtime": 855.2407, + "eval_samples_per_second": 7.895, + "eval_steps_per_second": 0.247, + "eval_wer": 14.508994981851359, + "step": 9000 + }, + { + "epoch": 2.13, + "grad_norm": 1.1805692911148071, + "learning_rate": 9.798362923679965e-06, + "loss": 0.0219, + "step": 9250 + }, + { + "epoch": 2.19, + "grad_norm": 1.599218487739563, + "learning_rate": 9.792598570440398e-06, + "loss": 0.0224, + "step": 9500 + }, + { + "epoch": 2.25, + "grad_norm": 2.063124418258667, + "learning_rate": 9.78683421720083e-06, + "loss": 0.0234, + "step": 9750 + }, + { + "epoch": 2.3, + "grad_norm": 2.7844250202178955, + "learning_rate": 9.781092921374222e-06, + "loss": 0.0231, + "step": 10000 + }, + { + "epoch": 2.3, + "eval_loss": 0.12939909100532532, + "eval_runtime": 857.9085, + "eval_samples_per_second": 7.87, + "eval_steps_per_second": 0.246, + "eval_wer": 14.405855167899716, + "step": 10000 + }, + { + "epoch": 2.36, + "grad_norm": 1.8661818504333496, + "learning_rate": 9.775328568134657e-06, + "loss": 0.0226, + "step": 10250 + }, + { + "epoch": 2.42, + "grad_norm": 1.9496829509735107, + "learning_rate": 9.76956421489509e-06, + "loss": 0.0221, + "step": 10500 + }, + { + "epoch": 2.48, + "grad_norm": 2.1408982276916504, + "learning_rate": 9.763799861655523e-06, + "loss": 0.0229, + "step": 10750 + }, + { + "epoch": 2.53, + "grad_norm": 2.1212332248687744, + "learning_rate": 9.758035508415956e-06, + "loss": 0.0223, + "step": 11000 + }, + { + "epoch": 2.53, + "eval_loss": 0.12141337990760803, + "eval_runtime": 863.1344, + "eval_samples_per_second": 7.823, + "eval_steps_per_second": 0.244, + "eval_wer": 13.654124600829084, + "step": 11000 + }, + { + "epoch": 2.59, + "grad_norm": 2.9175450801849365, + "learning_rate": 9.75227115517639e-06, + "loss": 0.0216, + "step": 11250 + }, + { + "epoch": 2.65, + "grad_norm": 2.1823952198028564, + "learning_rate": 9.746506801936824e-06, + "loss": 0.022, + "step": 11500 + }, + { + "epoch": 2.71, + "grad_norm": 2.2630937099456787, + "learning_rate": 9.740742448697257e-06, + "loss": 0.0216, + "step": 11750 + }, + { + "epoch": 2.76, + "grad_norm": 1.8593631982803345, + "learning_rate": 9.73497809545769e-06, + "loss": 0.0217, + "step": 12000 + }, + { + "epoch": 2.76, + "eval_loss": 0.12216547876596451, + "eval_runtime": 860.7599, + "eval_samples_per_second": 7.844, + "eval_steps_per_second": 0.245, + "eval_wer": 13.650157684907867, + "step": 12000 + }, + { + "epoch": 2.82, + "grad_norm": 1.9656251668930054, + "learning_rate": 9.729213742218125e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 2.88, + "grad_norm": 2.4857194423675537, + "learning_rate": 9.723449388978558e-06, + "loss": 0.022, + "step": 12500 + }, + { + "epoch": 2.94, + "grad_norm": 2.0210490226745605, + "learning_rate": 9.71768503573899e-06, + "loss": 0.0219, + "step": 12750 + }, + { + "epoch": 2.99, + "grad_norm": 1.4191805124282837, + "learning_rate": 9.711920682499424e-06, + "loss": 0.0223, + "step": 13000 + }, + { + "epoch": 2.99, + "eval_loss": 0.1153900995850563, + "eval_runtime": 867.7389, + "eval_samples_per_second": 7.781, + "eval_steps_per_second": 0.243, + "eval_wer": 13.041236091001052, + "step": 13000 + }, + { + "epoch": 3.05, + "grad_norm": 0.8872393369674683, + "learning_rate": 9.706156329259858e-06, + "loss": 0.0113, + "step": 13250 + }, + { + "epoch": 3.11, + "grad_norm": 1.113614559173584, + "learning_rate": 9.700391976020292e-06, + "loss": 0.0099, + "step": 13500 + }, + { + "epoch": 3.17, + "grad_norm": 2.4680979251861572, + "learning_rate": 9.694627622780725e-06, + "loss": 0.0105, + "step": 13750 + }, + { + "epoch": 3.22, + "grad_norm": 0.9727824926376343, + "learning_rate": 9.688886326954116e-06, + "loss": 0.0107, + "step": 14000 + }, + { + "epoch": 3.22, + "eval_loss": 0.12547273933887482, + "eval_runtime": 855.5477, + "eval_samples_per_second": 7.892, + "eval_steps_per_second": 0.247, + "eval_wer": 13.029335343237399, + "step": 14000 + }, + { + "epoch": 3.28, + "grad_norm": 0.43644458055496216, + "learning_rate": 9.68312197371455e-06, + "loss": 0.0103, + "step": 14250 + }, + { + "epoch": 3.34, + "grad_norm": 0.9514774680137634, + "learning_rate": 9.677357620474984e-06, + "loss": 0.0105, + "step": 14500 + }, + { + "epoch": 3.4, + "grad_norm": 1.8206623792648315, + "learning_rate": 9.671593267235417e-06, + "loss": 0.0111, + "step": 14750 + }, + { + "epoch": 3.45, + "grad_norm": 1.4537785053253174, + "learning_rate": 9.66582891399585e-06, + "loss": 0.0106, + "step": 15000 + }, + { + "epoch": 3.45, + "eval_loss": 0.12262353301048279, + "eval_runtime": 865.9365, + "eval_samples_per_second": 7.797, + "eval_steps_per_second": 0.244, + "eval_wer": 12.821072257373507, + "step": 15000 + }, + { + "epoch": 3.51, + "grad_norm": 1.7742477655410767, + "learning_rate": 9.660064560756283e-06, + "loss": 0.011, + "step": 15250 + }, + { + "epoch": 3.57, + "grad_norm": 1.4441837072372437, + "learning_rate": 9.654300207516718e-06, + "loss": 0.0117, + "step": 15500 + }, + { + "epoch": 3.63, + "grad_norm": 0.9210599064826965, + "learning_rate": 9.64853585427715e-06, + "loss": 0.0121, + "step": 15750 + }, + { + "epoch": 3.68, + "grad_norm": 3.534360647201538, + "learning_rate": 9.642771501037584e-06, + "loss": 0.0116, + "step": 16000 + }, + { + "epoch": 3.68, + "eval_loss": 0.12254315614700317, + "eval_runtime": 864.7376, + "eval_samples_per_second": 7.808, + "eval_steps_per_second": 0.244, + "eval_wer": 12.650494872761172, + "step": 16000 + }, + { + "epoch": 3.74, + "grad_norm": 1.7790424823760986, + "learning_rate": 9.637007147798017e-06, + "loss": 0.0115, + "step": 16250 + }, + { + "epoch": 3.8, + "grad_norm": 1.789859414100647, + "learning_rate": 9.631242794558452e-06, + "loss": 0.0116, + "step": 16500 + }, + { + "epoch": 3.86, + "grad_norm": 1.898427963256836, + "learning_rate": 9.625478441318885e-06, + "loss": 0.0115, + "step": 16750 + }, + { + "epoch": 3.92, + "grad_norm": 2.6319220066070557, + "learning_rate": 9.619714088079318e-06, + "loss": 0.0118, + "step": 17000 + }, + { + "epoch": 3.92, + "eval_loss": 0.1181536465883255, + "eval_runtime": 866.1146, + "eval_samples_per_second": 7.796, + "eval_steps_per_second": 0.244, + "eval_wer": 12.787353472043161, + "step": 17000 + }, + { + "epoch": 3.97, + "grad_norm": 2.027708053588867, + "learning_rate": 9.61394973483975e-06, + "loss": 0.0113, + "step": 17250 + }, + { + "epoch": 4.03, + "grad_norm": 0.5843227505683899, + "learning_rate": 9.608185381600186e-06, + "loss": 0.0092, + "step": 17500 + }, + { + "epoch": 4.09, + "grad_norm": 1.0360699892044067, + "learning_rate": 9.602421028360619e-06, + "loss": 0.0059, + "step": 17750 + }, + { + "epoch": 4.15, + "grad_norm": 2.0503389835357666, + "learning_rate": 9.596656675121052e-06, + "loss": 0.0069, + "step": 18000 + }, + { + "epoch": 4.15, + "eval_loss": 0.12232159078121185, + "eval_runtime": 874.5807, + "eval_samples_per_second": 7.72, + "eval_steps_per_second": 0.241, + "eval_wer": 12.555288890651964, + "step": 18000 + }, + { + "epoch": 4.2, + "grad_norm": 1.467950701713562, + "learning_rate": 9.590892321881485e-06, + "loss": 0.0062, + "step": 18250 + }, + { + "epoch": 4.26, + "grad_norm": 1.5525797605514526, + "learning_rate": 9.58512796864192e-06, + "loss": 0.0066, + "step": 18500 + }, + { + "epoch": 4.32, + "grad_norm": 1.1637401580810547, + "learning_rate": 9.57938667281531e-06, + "loss": 0.0074, + "step": 18750 + }, + { + "epoch": 4.38, + "grad_norm": 0.30910471081733704, + "learning_rate": 9.573622319575744e-06, + "loss": 0.0069, + "step": 19000 + }, + { + "epoch": 4.38, + "eval_loss": 0.12405180186033249, + "eval_runtime": 867.8174, + "eval_samples_per_second": 7.78, + "eval_steps_per_second": 0.243, + "eval_wer": 12.285538608009203, + "step": 19000 + }, + { + "epoch": 4.43, + "grad_norm": 2.687534809112549, + "learning_rate": 9.567857966336179e-06, + "loss": 0.0077, + "step": 19250 + }, + { + "epoch": 4.49, + "grad_norm": 1.3895576000213623, + "learning_rate": 9.562093613096612e-06, + "loss": 0.0074, + "step": 19500 + }, + { + "epoch": 4.55, + "grad_norm": 1.3012845516204834, + "learning_rate": 9.556329259857045e-06, + "loss": 0.0077, + "step": 19750 + }, + { + "epoch": 4.61, + "grad_norm": 1.299644112586975, + "learning_rate": 9.55056490661748e-06, + "loss": 0.0075, + "step": 20000 + }, + { + "epoch": 4.61, + "eval_loss": 0.12338556349277496, + "eval_runtime": 867.2958, + "eval_samples_per_second": 7.785, + "eval_steps_per_second": 0.243, + "eval_wer": 12.166531130372691, + "step": 20000 + }, + { + "epoch": 4.66, + "grad_norm": 1.9311870336532593, + "learning_rate": 9.544800553377913e-06, + "loss": 0.0074, + "step": 20250 + }, + { + "epoch": 4.72, + "grad_norm": 0.8604072332382202, + "learning_rate": 9.539036200138346e-06, + "loss": 0.0072, + "step": 20500 + }, + { + "epoch": 4.78, + "grad_norm": 1.3878774642944336, + "learning_rate": 9.533271846898779e-06, + "loss": 0.0075, + "step": 20750 + }, + { + "epoch": 4.84, + "grad_norm": 0.29127371311187744, + "learning_rate": 9.52753055107217e-06, + "loss": 0.0075, + "step": 21000 + }, + { + "epoch": 4.84, + "eval_loss": 0.12316736578941345, + "eval_runtime": 874.2652, + "eval_samples_per_second": 7.723, + "eval_steps_per_second": 0.241, + "eval_wer": 12.237935616954598, + "step": 21000 + }, + { + "epoch": 4.89, + "grad_norm": 0.9361172914505005, + "learning_rate": 9.521789255245563e-06, + "loss": 0.0077, + "step": 21250 + }, + { + "epoch": 4.95, + "grad_norm": 0.9276347160339355, + "learning_rate": 9.516024902005996e-06, + "loss": 0.0083, + "step": 21500 + }, + { + "epoch": 5.01, + "grad_norm": 0.9278080463409424, + "learning_rate": 9.510260548766429e-06, + "loss": 0.0073, + "step": 21750 + }, + { + "epoch": 5.07, + "grad_norm": 1.3185243606567383, + "learning_rate": 9.504496195526862e-06, + "loss": 0.0048, + "step": 22000 + }, + { + "epoch": 5.07, + "eval_loss": 0.12173668295145035, + "eval_runtime": 864.9202, + "eval_samples_per_second": 7.807, + "eval_steps_per_second": 0.244, + "eval_wer": 11.658765892456909, + "step": 22000 + }, + { + "epoch": 5.12, + "grad_norm": 0.7605379223823547, + "learning_rate": 9.498731842287297e-06, + "loss": 0.0051, + "step": 22250 + }, + { + "epoch": 5.18, + "grad_norm": 0.9030295014381409, + "learning_rate": 9.49296748904773e-06, + "loss": 0.0049, + "step": 22500 + }, + { + "epoch": 5.24, + "grad_norm": 0.5184769630432129, + "learning_rate": 9.487203135808163e-06, + "loss": 0.0051, + "step": 22750 + }, + { + "epoch": 5.3, + "grad_norm": 0.11390486359596252, + "learning_rate": 9.481438782568596e-06, + "loss": 0.0052, + "step": 23000 + }, + { + "epoch": 5.3, + "eval_loss": 0.1290554255247116, + "eval_runtime": 859.7689, + "eval_samples_per_second": 7.853, + "eval_steps_per_second": 0.245, + "eval_wer": 12.015788325366444, + "step": 23000 + }, + { + "epoch": 5.35, + "grad_norm": 0.6451162695884705, + "learning_rate": 9.475674429329031e-06, + "loss": 0.0054, + "step": 23250 + }, + { + "epoch": 5.41, + "grad_norm": 1.7149757146835327, + "learning_rate": 9.469910076089464e-06, + "loss": 0.0058, + "step": 23500 + }, + { + "epoch": 5.47, + "grad_norm": 0.8278890252113342, + "learning_rate": 9.464145722849897e-06, + "loss": 0.0055, + "step": 23750 + }, + { + "epoch": 5.53, + "grad_norm": 1.2663037776947021, + "learning_rate": 9.458381369610332e-06, + "loss": 0.0053, + "step": 24000 + }, + { + "epoch": 5.53, + "eval_loss": 0.12593936920166016, + "eval_runtime": 862.6306, + "eval_samples_per_second": 7.827, + "eval_steps_per_second": 0.245, + "eval_wer": 11.84719439871472, + "step": 24000 + }, + { + "epoch": 5.58, + "grad_norm": 0.23468834161758423, + "learning_rate": 9.452617016370765e-06, + "loss": 0.006, + "step": 24250 + }, + { + "epoch": 5.64, + "grad_norm": 2.092036247253418, + "learning_rate": 9.446852663131198e-06, + "loss": 0.0057, + "step": 24500 + }, + { + "epoch": 5.7, + "grad_norm": 1.3907628059387207, + "learning_rate": 9.441088309891631e-06, + "loss": 0.0058, + "step": 24750 + }, + { + "epoch": 5.76, + "grad_norm": 0.8375486135482788, + "learning_rate": 9.435347014065022e-06, + "loss": 0.0064, + "step": 25000 + }, + { + "epoch": 5.76, + "eval_loss": 0.12751518189907074, + "eval_runtime": 855.0746, + "eval_samples_per_second": 7.896, + "eval_steps_per_second": 0.247, + "eval_wer": 12.069341690302874, + "step": 25000 + }, + { + "epoch": 5.82, + "grad_norm": 1.525996208190918, + "learning_rate": 9.429582660825455e-06, + "loss": 0.0064, + "step": 25250 + }, + { + "epoch": 5.87, + "grad_norm": 1.7118737697601318, + "learning_rate": 9.423818307585888e-06, + "loss": 0.0061, + "step": 25500 + }, + { + "epoch": 5.93, + "grad_norm": 1.0815613269805908, + "learning_rate": 9.418053954346323e-06, + "loss": 0.0063, + "step": 25750 + }, + { + "epoch": 5.99, + "grad_norm": 1.5884686708450317, + "learning_rate": 9.412289601106756e-06, + "loss": 0.0056, + "step": 26000 + }, + { + "epoch": 5.99, + "eval_loss": 0.1244601458311081, + "eval_runtime": 859.6409, + "eval_samples_per_second": 7.854, + "eval_steps_per_second": 0.245, + "eval_wer": 11.96620187635123, + "step": 26000 + }, + { + "epoch": 6.05, + "grad_norm": 1.1554352045059204, + "learning_rate": 9.40652524786719e-06, + "loss": 0.0044, + "step": 26250 + }, + { + "epoch": 6.1, + "grad_norm": 1.1166563034057617, + "learning_rate": 9.400760894627624e-06, + "loss": 0.0037, + "step": 26500 + }, + { + "epoch": 6.16, + "grad_norm": 0.11678412556648254, + "learning_rate": 9.394996541388057e-06, + "loss": 0.0043, + "step": 26750 + }, + { + "epoch": 6.22, + "grad_norm": 0.4431307315826416, + "learning_rate": 9.38923218814849e-06, + "loss": 0.0039, + "step": 27000 + }, + { + "epoch": 6.22, + "eval_loss": 0.12606889009475708, + "eval_runtime": 869.3384, + "eval_samples_per_second": 7.767, + "eval_steps_per_second": 0.243, + "eval_wer": 12.134795803002955, + "step": 27000 + }, + { + "epoch": 6.28, + "grad_norm": 0.2918953597545624, + "learning_rate": 9.383467834908923e-06, + "loss": 0.0042, + "step": 27250 + }, + { + "epoch": 6.33, + "grad_norm": 1.4891304969787598, + "learning_rate": 9.377703481669358e-06, + "loss": 0.0043, + "step": 27500 + }, + { + "epoch": 6.39, + "grad_norm": 1.534620761871338, + "learning_rate": 9.371939128429791e-06, + "loss": 0.0043, + "step": 27750 + }, + { + "epoch": 6.45, + "grad_norm": 0.4249553680419922, + "learning_rate": 9.366174775190224e-06, + "loss": 0.0048, + "step": 28000 + }, + { + "epoch": 6.45, + "eval_loss": 0.12901394069194794, + "eval_runtime": 878.5914, + "eval_samples_per_second": 7.685, + "eval_steps_per_second": 0.24, + "eval_wer": 11.863062062399587, + "step": 28000 + }, + { + "epoch": 6.51, + "grad_norm": 0.7702826261520386, + "learning_rate": 9.360410421950657e-06, + "loss": 0.005, + "step": 28250 + }, + { + "epoch": 6.56, + "grad_norm": 1.9649180173873901, + "learning_rate": 9.354646068711092e-06, + "loss": 0.0054, + "step": 28500 + }, + { + "epoch": 6.62, + "grad_norm": 2.576911211013794, + "learning_rate": 9.348881715471525e-06, + "loss": 0.005, + "step": 28750 + }, + { + "epoch": 6.68, + "grad_norm": 1.0807923078536987, + "learning_rate": 9.343117362231958e-06, + "loss": 0.0047, + "step": 29000 + }, + { + "epoch": 6.68, + "eval_loss": 0.12914614379405975, + "eval_runtime": 865.8374, + "eval_samples_per_second": 7.798, + "eval_steps_per_second": 0.244, + "eval_wer": 11.928516175099668, + "step": 29000 + }, + { + "epoch": 6.74, + "grad_norm": 1.7776553630828857, + "learning_rate": 9.337353008992391e-06, + "loss": 0.0048, + "step": 29250 + }, + { + "epoch": 6.79, + "grad_norm": 1.3761777877807617, + "learning_rate": 9.331588655752826e-06, + "loss": 0.0055, + "step": 29500 + }, + { + "epoch": 6.85, + "grad_norm": 2.5688774585723877, + "learning_rate": 9.325824302513259e-06, + "loss": 0.0049, + "step": 29750 + }, + { + "epoch": 6.91, + "grad_norm": 0.4477705657482147, + "learning_rate": 9.320059949273692e-06, + "loss": 0.0051, + "step": 30000 + }, + { + "epoch": 6.91, + "eval_loss": 0.1264464259147644, + "eval_runtime": 860.2965, + "eval_samples_per_second": 7.848, + "eval_steps_per_second": 0.245, + "eval_wer": 11.884880099966281, + "step": 30000 + }, + { + "epoch": 6.97, + "grad_norm": 2.3739101886749268, + "learning_rate": 9.314318653447085e-06, + "loss": 0.0061, + "step": 30250 + }, + { + "epoch": 7.02, + "grad_norm": 0.3228144645690918, + "learning_rate": 9.308554300207518e-06, + "loss": 0.0041, + "step": 30500 + }, + { + "epoch": 7.08, + "grad_norm": 1.3196961879730225, + "learning_rate": 9.302789946967951e-06, + "loss": 0.003, + "step": 30750 + }, + { + "epoch": 7.14, + "grad_norm": 1.6070516109466553, + "learning_rate": 9.297025593728384e-06, + "loss": 0.0036, + "step": 31000 + }, + { + "epoch": 7.14, + "eval_loss": 0.1240488588809967, + "eval_runtime": 867.7656, + "eval_samples_per_second": 7.781, + "eval_steps_per_second": 0.243, + "eval_wer": 11.387032151853543, + "step": 31000 + }, + { + "epoch": 7.2, + "grad_norm": 0.3357069492340088, + "learning_rate": 9.291284297901775e-06, + "loss": 0.0032, + "step": 31250 + }, + { + "epoch": 7.25, + "grad_norm": 0.24253146350383759, + "learning_rate": 9.28551994466221e-06, + "loss": 0.0039, + "step": 31500 + }, + { + "epoch": 7.31, + "grad_norm": 2.30023455619812, + "learning_rate": 9.279755591422643e-06, + "loss": 0.0042, + "step": 31750 + }, + { + "epoch": 7.37, + "grad_norm": 0.30979445576667786, + "learning_rate": 9.273991238183076e-06, + "loss": 0.0041, + "step": 32000 + }, + { + "epoch": 7.37, + "eval_loss": 0.12816853821277618, + "eval_runtime": 929.7626, + "eval_samples_per_second": 7.262, + "eval_steps_per_second": 0.227, + "eval_wer": 11.621080191205348, + "step": 32000 + }, + { + "epoch": 7.43, + "grad_norm": 1.5548877716064453, + "learning_rate": 9.26822688494351e-06, + "loss": 0.0043, + "step": 32250 + }, + { + "epoch": 7.49, + "grad_norm": 0.23581494390964508, + "learning_rate": 9.262462531703944e-06, + "loss": 0.004, + "step": 32500 + }, + { + "epoch": 7.54, + "grad_norm": 0.1430416703224182, + "learning_rate": 9.256698178464377e-06, + "loss": 0.0042, + "step": 32750 + }, + { + "epoch": 7.6, + "grad_norm": 1.1397004127502441, + "learning_rate": 9.25093382522481e-06, + "loss": 0.0042, + "step": 33000 + }, + { + "epoch": 7.6, + "eval_loss": 0.13014425337314606, + "eval_runtime": 972.7499, + "eval_samples_per_second": 6.941, + "eval_steps_per_second": 0.217, + "eval_wer": 11.75198841660551, + "step": 33000 + }, + { + "epoch": 7.66, + "grad_norm": 2.46602463722229, + "learning_rate": 9.245169471985243e-06, + "loss": 0.0047, + "step": 33250 + }, + { + "epoch": 7.72, + "grad_norm": 0.6565764546394348, + "learning_rate": 9.239405118745678e-06, + "loss": 0.0039, + "step": 33500 + }, + { + "epoch": 7.77, + "grad_norm": 0.5344523787498474, + "learning_rate": 9.233640765506111e-06, + "loss": 0.0048, + "step": 33750 + }, + { + "epoch": 7.83, + "grad_norm": 1.3736058473587036, + "learning_rate": 9.227876412266544e-06, + "loss": 0.0042, + "step": 34000 + }, + { + "epoch": 7.83, + "eval_loss": 0.12513211369514465, + "eval_runtime": 868.4948, + "eval_samples_per_second": 7.774, + "eval_steps_per_second": 0.243, + "eval_wer": 11.38901560981415, + "step": 34000 + }, + { + "epoch": 7.89, + "grad_norm": 0.4617059826850891, + "learning_rate": 9.222112059026977e-06, + "loss": 0.0041, + "step": 34250 + }, + { + "epoch": 7.95, + "grad_norm": 0.7892825603485107, + "learning_rate": 9.216347705787412e-06, + "loss": 0.004, + "step": 34500 + }, + { + "epoch": 8.0, + "grad_norm": 0.2047586441040039, + "learning_rate": 9.210583352547845e-06, + "loss": 0.0043, + "step": 34750 + }, + { + "epoch": 8.06, + "grad_norm": 0.3856526017189026, + "learning_rate": 9.204818999308278e-06, + "loss": 0.0029, + "step": 35000 + }, + { + "epoch": 8.06, + "eval_loss": 0.12612038850784302, + "eval_runtime": 863.5386, + "eval_samples_per_second": 7.819, + "eval_steps_per_second": 0.244, + "eval_wer": 11.222405141123033, + "step": 35000 + }, + { + "epoch": 8.12, + "grad_norm": 1.465882420539856, + "learning_rate": 9.199054646068711e-06, + "loss": 0.0028, + "step": 35250 + }, + { + "epoch": 8.18, + "grad_norm": 0.45589736104011536, + "learning_rate": 9.193290292829146e-06, + "loss": 0.0028, + "step": 35500 + }, + { + "epoch": 8.23, + "grad_norm": 1.3802299499511719, + "learning_rate": 9.187525939589579e-06, + "loss": 0.0032, + "step": 35750 + }, + { + "epoch": 8.29, + "grad_norm": 1.1035882234573364, + "learning_rate": 9.181761586350012e-06, + "loss": 0.0034, + "step": 36000 + }, + { + "epoch": 8.29, + "eval_loss": 0.1280359923839569, + "eval_runtime": 861.7939, + "eval_samples_per_second": 7.835, + "eval_steps_per_second": 0.245, + "eval_wer": 11.510006545411269, + "step": 36000 + }, + { + "epoch": 8.35, + "grad_norm": 1.5274240970611572, + "learning_rate": 9.176020290523405e-06, + "loss": 0.0039, + "step": 36250 + }, + { + "epoch": 8.41, + "grad_norm": 2.005580186843872, + "learning_rate": 9.170255937283838e-06, + "loss": 0.0033, + "step": 36500 + }, + { + "epoch": 8.46, + "grad_norm": 0.9045758843421936, + "learning_rate": 9.164491584044271e-06, + "loss": 0.0046, + "step": 36750 + }, + { + "epoch": 8.52, + "grad_norm": 1.8008005619049072, + "learning_rate": 9.158727230804704e-06, + "loss": 0.0035, + "step": 37000 + }, + { + "epoch": 8.52, + "eval_loss": 0.13088497519493103, + "eval_runtime": 856.6628, + "eval_samples_per_second": 7.882, + "eval_steps_per_second": 0.246, + "eval_wer": 11.527857667056747, + "step": 37000 + }, + { + "epoch": 8.58, + "grad_norm": 1.5927373170852661, + "learning_rate": 9.152962877565137e-06, + "loss": 0.0037, + "step": 37250 + }, + { + "epoch": 8.64, + "grad_norm": 1.2567116022109985, + "learning_rate": 9.14722158173853e-06, + "loss": 0.0035, + "step": 37500 + }, + { + "epoch": 8.69, + "grad_norm": 1.1662616729736328, + "learning_rate": 9.141457228498963e-06, + "loss": 0.0039, + "step": 37750 + }, + { + "epoch": 8.75, + "grad_norm": 1.5941380262374878, + "learning_rate": 9.135692875259396e-06, + "loss": 0.0043, + "step": 38000 + }, + { + "epoch": 8.75, + "eval_loss": 0.1274692267179489, + "eval_runtime": 865.0862, + "eval_samples_per_second": 7.805, + "eval_steps_per_second": 0.244, + "eval_wer": 11.513973461332487, + "step": 38000 + }, + { + "epoch": 8.81, + "grad_norm": 1.7074130773544312, + "learning_rate": 9.12992852201983e-06, + "loss": 0.004, + "step": 38250 + }, + { + "epoch": 8.87, + "grad_norm": 1.6893212795257568, + "learning_rate": 9.124164168780264e-06, + "loss": 0.004, + "step": 38500 + }, + { + "epoch": 8.92, + "grad_norm": 1.044587254524231, + "learning_rate": 9.118399815540697e-06, + "loss": 0.0035, + "step": 38750 + }, + { + "epoch": 8.98, + "grad_norm": 0.12974177300930023, + "learning_rate": 9.11263546230113e-06, + "loss": 0.004, + "step": 39000 + }, + { + "epoch": 8.98, + "eval_loss": 0.12444661557674408, + "eval_runtime": 872.943, + "eval_samples_per_second": 7.735, + "eval_steps_per_second": 0.242, + "eval_wer": 11.139099906777476, + "step": 39000 + }, + { + "epoch": 9.04, + "grad_norm": 1.5708075761795044, + "learning_rate": 9.106871109061563e-06, + "loss": 0.0031, + "step": 39250 + }, + { + "epoch": 9.1, + "grad_norm": 1.6801316738128662, + "learning_rate": 9.101106755821998e-06, + "loss": 0.0023, + "step": 39500 + }, + { + "epoch": 9.15, + "grad_norm": 1.937129020690918, + "learning_rate": 9.095342402582431e-06, + "loss": 0.0025, + "step": 39750 + }, + { + "epoch": 9.21, + "grad_norm": 0.7914566993713379, + "learning_rate": 9.089578049342864e-06, + "loss": 0.0032, + "step": 40000 + }, + { + "epoch": 9.21, + "eval_loss": 0.12887698411941528, + "eval_runtime": 862.542, + "eval_samples_per_second": 7.828, + "eval_steps_per_second": 0.245, + "eval_wer": 11.561576452387092, + "step": 40000 + }, + { + "epoch": 9.27, + "grad_norm": 0.04653190076351166, + "learning_rate": 9.083813696103297e-06, + "loss": 0.0027, + "step": 40250 + }, + { + "epoch": 9.33, + "grad_norm": 0.34394124150276184, + "learning_rate": 9.07807240027669e-06, + "loss": 0.0033, + "step": 40500 + }, + { + "epoch": 9.39, + "grad_norm": 0.496722936630249, + "learning_rate": 9.072308047037123e-06, + "loss": 0.003, + "step": 40750 + }, + { + "epoch": 9.44, + "grad_norm": 0.2492765635251999, + "learning_rate": 9.066543693797556e-06, + "loss": 0.0029, + "step": 41000 + }, + { + "epoch": 9.44, + "eval_loss": 0.12612169981002808, + "eval_runtime": 862.7524, + "eval_samples_per_second": 7.826, + "eval_steps_per_second": 0.245, + "eval_wer": 11.252157010532162, + "step": 41000 + }, + { + "epoch": 9.5, + "grad_norm": 2.209688425064087, + "learning_rate": 9.06077934055799e-06, + "loss": 0.0031, + "step": 41250 + }, + { + "epoch": 9.56, + "grad_norm": 0.5090010166168213, + "learning_rate": 9.055014987318424e-06, + "loss": 0.0032, + "step": 41500 + }, + { + "epoch": 9.62, + "grad_norm": 0.561119794845581, + "learning_rate": 9.049250634078857e-06, + "loss": 0.0039, + "step": 41750 + }, + { + "epoch": 9.67, + "grad_norm": 0.20059019327163696, + "learning_rate": 9.04348628083929e-06, + "loss": 0.0031, + "step": 42000 + }, + { + "epoch": 9.67, + "eval_loss": 0.1292741298675537, + "eval_runtime": 864.5766, + "eval_samples_per_second": 7.81, + "eval_steps_per_second": 0.244, + "eval_wer": 11.369181030208065, + "step": 42000 + }, + { + "epoch": 9.73, + "grad_norm": 0.5912229418754578, + "learning_rate": 9.037721927599723e-06, + "loss": 0.0032, + "step": 42250 + }, + { + "epoch": 9.79, + "grad_norm": 0.274827241897583, + "learning_rate": 9.031957574360158e-06, + "loss": 0.0034, + "step": 42500 + }, + { + "epoch": 9.85, + "grad_norm": 1.7666079998016357, + "learning_rate": 9.026193221120591e-06, + "loss": 0.0037, + "step": 42750 + }, + { + "epoch": 9.9, + "grad_norm": 0.4969967305660248, + "learning_rate": 9.020428867881024e-06, + "loss": 0.0031, + "step": 43000 + }, + { + "epoch": 9.9, + "eval_loss": 0.1289782077074051, + "eval_runtime": 871.6635, + "eval_samples_per_second": 7.746, + "eval_steps_per_second": 0.242, + "eval_wer": 11.055794672431919, + "step": 43000 + }, + { + "epoch": 9.96, + "grad_norm": 2.4347357749938965, + "learning_rate": 9.014687572054416e-06, + "loss": 0.0033, + "step": 43250 + }, + { + "epoch": 10.02, + "grad_norm": 0.36096087098121643, + "learning_rate": 9.008946276227809e-06, + "loss": 0.004, + "step": 43500 + }, + { + "epoch": 10.08, + "grad_norm": 0.30262959003448486, + "learning_rate": 9.003181922988242e-06, + "loss": 0.0022, + "step": 43750 + }, + { + "epoch": 10.13, + "grad_norm": 0.9803399443626404, + "learning_rate": 8.997417569748675e-06, + "loss": 0.0021, + "step": 44000 + }, + { + "epoch": 10.13, + "eval_loss": 0.12753725051879883, + "eval_runtime": 848.9932, + "eval_samples_per_second": 7.953, + "eval_steps_per_second": 0.249, + "eval_wer": 11.154967570462345, + "step": 44000 + }, + { + "epoch": 10.19, + "grad_norm": 0.08501572906970978, + "learning_rate": 8.991653216509108e-06, + "loss": 0.0023, + "step": 44250 + }, + { + "epoch": 10.25, + "grad_norm": 0.17812615633010864, + "learning_rate": 8.985888863269543e-06, + "loss": 0.0022, + "step": 44500 + }, + { + "epoch": 10.31, + "grad_norm": 0.192487895488739, + "learning_rate": 8.980124510029976e-06, + "loss": 0.0023, + "step": 44750 + }, + { + "epoch": 10.36, + "grad_norm": 1.9704803228378296, + "learning_rate": 8.974360156790409e-06, + "loss": 0.0026, + "step": 45000 + }, + { + "epoch": 10.36, + "eval_loss": 0.1309252232313156, + "eval_runtime": 858.8815, + "eval_samples_per_second": 7.861, + "eval_steps_per_second": 0.246, + "eval_wer": 11.109348037368347, + "step": 45000 + }, + { + "epoch": 10.42, + "grad_norm": 0.22806860506534576, + "learning_rate": 8.968595803550842e-06, + "loss": 0.0027, + "step": 45250 + }, + { + "epoch": 10.48, + "grad_norm": 0.2697317600250244, + "learning_rate": 8.962831450311276e-06, + "loss": 0.0031, + "step": 45500 + }, + { + "epoch": 10.54, + "grad_norm": 0.503118097782135, + "learning_rate": 8.95706709707171e-06, + "loss": 0.0034, + "step": 45750 + }, + { + "epoch": 10.59, + "grad_norm": 1.2143537998199463, + "learning_rate": 8.951302743832143e-06, + "loss": 0.0031, + "step": 46000 + }, + { + "epoch": 10.59, + "eval_loss": 0.13377217948436737, + "eval_runtime": 857.5578, + "eval_samples_per_second": 7.874, + "eval_steps_per_second": 0.246, + "eval_wer": 11.539758414820398, + "step": 46000 + }, + { + "epoch": 10.65, + "grad_norm": 0.5072382688522339, + "learning_rate": 8.945538390592576e-06, + "loss": 0.0034, + "step": 46250 + }, + { + "epoch": 10.71, + "grad_norm": 0.45895808935165405, + "learning_rate": 8.939774037353009e-06, + "loss": 0.0029, + "step": 46500 + }, + { + "epoch": 10.77, + "grad_norm": 1.3219285011291504, + "learning_rate": 8.934009684113443e-06, + "loss": 0.0031, + "step": 46750 + }, + { + "epoch": 10.82, + "grad_norm": 0.9804620742797852, + "learning_rate": 8.928245330873877e-06, + "loss": 0.0035, + "step": 47000 + }, + { + "epoch": 10.82, + "eval_loss": 0.12685059010982513, + "eval_runtime": 862.1359, + "eval_samples_per_second": 7.832, + "eval_steps_per_second": 0.245, + "eval_wer": 11.055794672431919, + "step": 47000 + }, + { + "epoch": 10.88, + "grad_norm": 0.4526994228363037, + "learning_rate": 8.92248097763431e-06, + "loss": 0.0034, + "step": 47250 + }, + { + "epoch": 10.94, + "grad_norm": 0.8010832667350769, + "learning_rate": 8.916739681807703e-06, + "loss": 0.0064, + "step": 47500 + }, + { + "epoch": 11.0, + "grad_norm": 0.5813642144203186, + "learning_rate": 8.910975328568136e-06, + "loss": 0.0031, + "step": 47750 + }, + { + "epoch": 11.05, + "grad_norm": 0.029226483777165413, + "learning_rate": 8.905210975328569e-06, + "loss": 0.0021, + "step": 48000 + }, + { + "epoch": 11.05, + "eval_loss": 0.1262122392654419, + "eval_runtime": 848.9631, + "eval_samples_per_second": 7.953, + "eval_steps_per_second": 0.249, + "eval_wer": 11.028026260983397, + "step": 48000 + }, + { + "epoch": 11.11, + "grad_norm": 1.0615992546081543, + "learning_rate": 8.899446622089002e-06, + "loss": 0.0021, + "step": 48250 + }, + { + "epoch": 11.17, + "grad_norm": 1.0726251602172852, + "learning_rate": 8.893682268849437e-06, + "loss": 0.0019, + "step": 48500 + }, + { + "epoch": 11.23, + "grad_norm": 0.3940158486366272, + "learning_rate": 8.88791791560987e-06, + "loss": 0.0026, + "step": 48750 + }, + { + "epoch": 11.29, + "grad_norm": 0.5974787473678589, + "learning_rate": 8.882153562370303e-06, + "loss": 0.0025, + "step": 49000 + }, + { + "epoch": 11.29, + "eval_loss": 0.12749157845973969, + "eval_runtime": 861.4829, + "eval_samples_per_second": 7.838, + "eval_steps_per_second": 0.245, + "eval_wer": 11.279925421980682, + "step": 49000 + }, + { + "epoch": 11.34, + "grad_norm": 1.4202208518981934, + "learning_rate": 8.876389209130736e-06, + "loss": 0.0023, + "step": 49250 + }, + { + "epoch": 11.4, + "grad_norm": 1.4934862852096558, + "learning_rate": 8.87062485589117e-06, + "loss": 0.0026, + "step": 49500 + }, + { + "epoch": 11.46, + "grad_norm": 1.8105831146240234, + "learning_rate": 8.864860502651604e-06, + "loss": 0.0026, + "step": 49750 + }, + { + "epoch": 11.52, + "grad_norm": 0.6586973071098328, + "learning_rate": 8.859096149412037e-06, + "loss": 0.0024, + "step": 50000 + }, + { + "epoch": 11.52, + "eval_loss": 0.13289304077625275, + "eval_runtime": 861.8006, + "eval_samples_per_second": 7.835, + "eval_steps_per_second": 0.245, + "eval_wer": 11.06769542019557, + "step": 50000 + }, + { + "epoch": 11.57, + "grad_norm": 0.08556759357452393, + "learning_rate": 8.85333179617247e-06, + "loss": 0.0027, + "step": 50250 + }, + { + "epoch": 11.63, + "grad_norm": 0.31020841002464294, + "learning_rate": 8.847567442932904e-06, + "loss": 0.0028, + "step": 50500 + }, + { + "epoch": 11.69, + "grad_norm": 0.2600391209125519, + "learning_rate": 8.841803089693337e-06, + "loss": 0.003, + "step": 50750 + }, + { + "epoch": 11.75, + "grad_norm": 1.2706468105316162, + "learning_rate": 8.83603873645377e-06, + "loss": 0.0031, + "step": 51000 + }, + { + "epoch": 11.75, + "eval_loss": 0.13028421998023987, + "eval_runtime": 861.6838, + "eval_samples_per_second": 7.836, + "eval_steps_per_second": 0.245, + "eval_wer": 10.93083682091358, + "step": 51000 + }, + { + "epoch": 11.8, + "grad_norm": 0.8451604247093201, + "learning_rate": 8.830274383214205e-06, + "loss": 0.003, + "step": 51250 + }, + { + "epoch": 11.86, + "grad_norm": 1.8342962265014648, + "learning_rate": 8.824510029974638e-06, + "loss": 0.0033, + "step": 51500 + }, + { + "epoch": 11.92, + "grad_norm": 0.15427502989768982, + "learning_rate": 8.818745676735071e-06, + "loss": 0.0028, + "step": 51750 + }, + { + "epoch": 11.98, + "grad_norm": 1.4967540502548218, + "learning_rate": 8.812981323495504e-06, + "loss": 0.0028, + "step": 52000 + }, + { + "epoch": 11.98, + "eval_loss": 0.13051362335681915, + "eval_runtime": 865.4409, + "eval_samples_per_second": 7.802, + "eval_steps_per_second": 0.244, + "eval_wer": 11.228355515004859, + "step": 52000 + }, + { + "epoch": 12.03, + "grad_norm": 1.1010907888412476, + "learning_rate": 8.807240027668896e-06, + "loss": 0.0022, + "step": 52250 + }, + { + "epoch": 12.09, + "grad_norm": 0.7162982821464539, + "learning_rate": 8.801475674429329e-06, + "loss": 0.0018, + "step": 52500 + }, + { + "epoch": 12.15, + "grad_norm": 0.30756324529647827, + "learning_rate": 8.795711321189762e-06, + "loss": 0.0023, + "step": 52750 + }, + { + "epoch": 12.21, + "grad_norm": 1.2408708333969116, + "learning_rate": 8.789946967950197e-06, + "loss": 0.002, + "step": 53000 + }, + { + "epoch": 12.21, + "eval_loss": 0.13401812314987183, + "eval_runtime": 859.7266, + "eval_samples_per_second": 7.854, + "eval_steps_per_second": 0.245, + "eval_wer": 11.105381121447131, + "step": 53000 + }, + { + "epoch": 12.26, + "grad_norm": 1.0242916345596313, + "learning_rate": 8.78418261471063e-06, + "loss": 0.0022, + "step": 53250 + }, + { + "epoch": 12.32, + "grad_norm": 0.08462110161781311, + "learning_rate": 8.778418261471063e-06, + "loss": 0.0023, + "step": 53500 + }, + { + "epoch": 12.38, + "grad_norm": 0.19610953330993652, + "learning_rate": 8.772653908231497e-06, + "loss": 0.0021, + "step": 53750 + }, + { + "epoch": 12.44, + "grad_norm": 2.1351606845855713, + "learning_rate": 8.76688955499193e-06, + "loss": 0.0025, + "step": 54000 + }, + { + "epoch": 12.44, + "eval_loss": 0.12935225665569305, + "eval_runtime": 862.1306, + "eval_samples_per_second": 7.832, + "eval_steps_per_second": 0.245, + "eval_wer": 10.899101493543844, + "step": 54000 + }, + { + "epoch": 12.49, + "grad_norm": 1.4103162288665771, + "learning_rate": 8.761125201752364e-06, + "loss": 0.0021, + "step": 54250 + }, + { + "epoch": 12.55, + "grad_norm": 0.27089348435401917, + "learning_rate": 8.755360848512797e-06, + "loss": 0.0023, + "step": 54500 + }, + { + "epoch": 12.61, + "grad_norm": 2.1209752559661865, + "learning_rate": 8.749596495273231e-06, + "loss": 0.0026, + "step": 54750 + }, + { + "epoch": 12.67, + "grad_norm": 0.4095695912837982, + "learning_rate": 8.743855199446623e-06, + "loss": 0.0025, + "step": 55000 + }, + { + "epoch": 12.67, + "eval_loss": 0.12996189296245575, + "eval_runtime": 859.3218, + "eval_samples_per_second": 7.857, + "eval_steps_per_second": 0.246, + "eval_wer": 10.893151119662019, + "step": 55000 + }, + { + "epoch": 12.72, + "grad_norm": 0.7925490736961365, + "learning_rate": 8.738090846207057e-06, + "loss": 0.0027, + "step": 55250 + }, + { + "epoch": 12.78, + "grad_norm": 0.7041000723838806, + "learning_rate": 8.73232649296749e-06, + "loss": 0.0021, + "step": 55500 + }, + { + "epoch": 12.84, + "grad_norm": 1.976866364479065, + "learning_rate": 8.726562139727924e-06, + "loss": 0.0028, + "step": 55750 + }, + { + "epoch": 12.9, + "grad_norm": 0.39583584666252136, + "learning_rate": 8.720820843901315e-06, + "loss": 0.0025, + "step": 56000 + }, + { + "epoch": 12.9, + "eval_loss": 0.13089019060134888, + "eval_runtime": 856.9052, + "eval_samples_per_second": 7.88, + "eval_steps_per_second": 0.246, + "eval_wer": 10.791994763670983, + "step": 56000 + }, + { + "epoch": 12.95, + "grad_norm": 0.4872288405895233, + "learning_rate": 8.715056490661748e-06, + "loss": 0.0027, + "step": 56250 + }, + { + "epoch": 13.01, + "grad_norm": 2.2456576824188232, + "learning_rate": 8.709292137422181e-06, + "loss": 0.0025, + "step": 56500 + }, + { + "epoch": 13.07, + "grad_norm": 0.20264917612075806, + "learning_rate": 8.703527784182614e-06, + "loss": 0.0018, + "step": 56750 + }, + { + "epoch": 13.13, + "grad_norm": 0.3795398771762848, + "learning_rate": 8.697763430943049e-06, + "loss": 0.0016, + "step": 57000 + }, + { + "epoch": 13.13, + "eval_loss": 0.12614800035953522, + "eval_runtime": 860.8165, + "eval_samples_per_second": 7.844, + "eval_steps_per_second": 0.245, + "eval_wer": 10.69083840767995, + "step": 57000 + }, + { + "epoch": 13.19, + "grad_norm": 2.3237907886505127, + "learning_rate": 8.691999077703482e-06, + "loss": 0.0017, + "step": 57250 + }, + { + "epoch": 13.24, + "grad_norm": 0.7427815794944763, + "learning_rate": 8.686234724463915e-06, + "loss": 0.002, + "step": 57500 + }, + { + "epoch": 13.3, + "grad_norm": 0.1203819140791893, + "learning_rate": 8.68047037122435e-06, + "loss": 0.002, + "step": 57750 + }, + { + "epoch": 13.36, + "grad_norm": 0.11056603491306305, + "learning_rate": 8.674706017984783e-06, + "loss": 0.0024, + "step": 58000 + }, + { + "epoch": 13.36, + "eval_loss": 0.13082969188690186, + "eval_runtime": 861.4091, + "eval_samples_per_second": 7.838, + "eval_steps_per_second": 0.245, + "eval_wer": 10.873316540055933, + "step": 58000 + }, + { + "epoch": 13.42, + "grad_norm": 0.3026280701160431, + "learning_rate": 8.668941664745216e-06, + "loss": 0.002, + "step": 58250 + }, + { + "epoch": 13.47, + "grad_norm": 1.0132057666778564, + "learning_rate": 8.663177311505649e-06, + "loss": 0.0023, + "step": 58500 + }, + { + "epoch": 13.53, + "grad_norm": 0.26512446999549866, + "learning_rate": 8.657412958266084e-06, + "loss": 0.0025, + "step": 58750 + }, + { + "epoch": 13.59, + "grad_norm": 1.13190758228302, + "learning_rate": 8.651648605026517e-06, + "loss": 0.0022, + "step": 59000 + }, + { + "epoch": 13.59, + "eval_loss": 0.13342134654521942, + "eval_runtime": 863.3109, + "eval_samples_per_second": 7.821, + "eval_steps_per_second": 0.244, + "eval_wer": 10.825713549001328, + "step": 59000 + }, + { + "epoch": 13.65, + "grad_norm": 0.338905930519104, + "learning_rate": 8.64588425178695e-06, + "loss": 0.0022, + "step": 59250 + }, + { + "epoch": 13.7, + "grad_norm": 0.8769376873970032, + "learning_rate": 8.640119898547383e-06, + "loss": 0.0021, + "step": 59500 + }, + { + "epoch": 13.76, + "grad_norm": 0.6874887943267822, + "learning_rate": 8.634355545307818e-06, + "loss": 0.0023, + "step": 59750 + }, + { + "epoch": 13.82, + "grad_norm": 0.38465040922164917, + "learning_rate": 8.62859119206825e-06, + "loss": 0.0027, + "step": 60000 + }, + { + "epoch": 13.82, + "eval_loss": 0.1326436996459961, + "eval_runtime": 862.9972, + "eval_samples_per_second": 7.824, + "eval_steps_per_second": 0.244, + "eval_wer": 10.978439811968185, + "step": 60000 + }, + { + "epoch": 13.88, + "grad_norm": 0.63032466173172, + "learning_rate": 8.622826838828684e-06, + "loss": 0.0025, + "step": 60250 + }, + { + "epoch": 13.93, + "grad_norm": 1.5229909420013428, + "learning_rate": 8.617085543002077e-06, + "loss": 0.0028, + "step": 60500 + }, + { + "epoch": 13.99, + "grad_norm": 1.10037100315094, + "learning_rate": 8.61132118976251e-06, + "loss": 0.0023, + "step": 60750 + }, + { + "epoch": 14.05, + "grad_norm": 0.09242303669452667, + "learning_rate": 8.605556836522943e-06, + "loss": 0.0014, + "step": 61000 + }, + { + "epoch": 14.05, + "eval_loss": 0.13251730799674988, + "eval_runtime": 858.8112, + "eval_samples_per_second": 7.862, + "eval_steps_per_second": 0.246, + "eval_wer": 10.694805323601166, + "step": 61000 + }, + { + "epoch": 14.11, + "grad_norm": 2.284032106399536, + "learning_rate": 8.599792483283378e-06, + "loss": 0.002, + "step": 61250 + }, + { + "epoch": 14.16, + "grad_norm": 0.04196714237332344, + "learning_rate": 8.59402813004381e-06, + "loss": 0.0016, + "step": 61500 + }, + { + "epoch": 14.22, + "grad_norm": 1.1200813055038452, + "learning_rate": 8.588263776804244e-06, + "loss": 0.0017, + "step": 61750 + }, + { + "epoch": 14.28, + "grad_norm": 0.09567234665155411, + "learning_rate": 8.582499423564677e-06, + "loss": 0.002, + "step": 62000 + }, + { + "epoch": 14.28, + "eval_loss": 0.13003449141979218, + "eval_runtime": 856.542, + "eval_samples_per_second": 7.883, + "eval_steps_per_second": 0.246, + "eval_wer": 10.75629252038003, + "step": 62000 + }, + { + "epoch": 14.34, + "grad_norm": 0.9637365937232971, + "learning_rate": 8.576781185151026e-06, + "loss": 0.0019, + "step": 62250 + }, + { + "epoch": 14.39, + "grad_norm": 0.9736013412475586, + "learning_rate": 8.571016831911461e-06, + "loss": 0.0019, + "step": 62500 + }, + { + "epoch": 14.45, + "grad_norm": 0.8420233130455017, + "learning_rate": 8.565252478671894e-06, + "loss": 0.0022, + "step": 62750 + }, + { + "epoch": 14.51, + "grad_norm": 0.3317677974700928, + "learning_rate": 8.559488125432327e-06, + "loss": 0.0022, + "step": 63000 + }, + { + "epoch": 14.51, + "eval_loss": 0.13171714544296265, + "eval_runtime": 847.1312, + "eval_samples_per_second": 7.97, + "eval_steps_per_second": 0.249, + "eval_wer": 10.712656445246644, + "step": 63000 + }, + { + "epoch": 14.57, + "grad_norm": 1.777006983757019, + "learning_rate": 8.55372377219276e-06, + "loss": 0.0024, + "step": 63250 + }, + { + "epoch": 14.62, + "grad_norm": 0.9992290735244751, + "learning_rate": 8.547959418953195e-06, + "loss": 0.0023, + "step": 63500 + }, + { + "epoch": 14.68, + "grad_norm": 1.5052534341812134, + "learning_rate": 8.542195065713628e-06, + "loss": 0.0024, + "step": 63750 + }, + { + "epoch": 14.74, + "grad_norm": 0.3194350600242615, + "learning_rate": 8.536430712474061e-06, + "loss": 0.0025, + "step": 64000 + }, + { + "epoch": 14.74, + "eval_loss": 0.12849073112010956, + "eval_runtime": 852.5941, + "eval_samples_per_second": 7.919, + "eval_steps_per_second": 0.247, + "eval_wer": 10.587698593728307, + "step": 64000 + }, + { + "epoch": 14.8, + "grad_norm": 0.10011152178049088, + "learning_rate": 8.530666359234494e-06, + "loss": 0.0021, + "step": 64250 + }, + { + "epoch": 14.85, + "grad_norm": 2.3586697578430176, + "learning_rate": 8.524902005994929e-06, + "loss": 0.0024, + "step": 64500 + }, + { + "epoch": 14.91, + "grad_norm": 0.3267650008201599, + "learning_rate": 8.519137652755362e-06, + "loss": 0.0023, + "step": 64750 + }, + { + "epoch": 14.97, + "grad_norm": 0.058121953159570694, + "learning_rate": 8.513373299515795e-06, + "loss": 0.0025, + "step": 65000 + }, + { + "epoch": 14.97, + "eval_loss": 0.12760809063911438, + "eval_runtime": 865.2509, + "eval_samples_per_second": 7.804, + "eval_steps_per_second": 0.244, + "eval_wer": 10.496459527540313, + "step": 65000 + }, + { + "epoch": 15.03, + "grad_norm": 0.024574730545282364, + "learning_rate": 8.50760894627623e-06, + "loss": 0.0016, + "step": 65250 + }, + { + "epoch": 15.09, + "grad_norm": 0.04577196016907692, + "learning_rate": 8.501844593036663e-06, + "loss": 0.0016, + "step": 65500 + }, + { + "epoch": 15.14, + "grad_norm": 1.1313352584838867, + "learning_rate": 8.496080239797096e-06, + "loss": 0.0018, + "step": 65750 + }, + { + "epoch": 15.2, + "grad_norm": 0.08705839514732361, + "learning_rate": 8.490315886557529e-06, + "loss": 0.0013, + "step": 66000 + }, + { + "epoch": 15.2, + "eval_loss": 0.13220834732055664, + "eval_runtime": 857.8748, + "eval_samples_per_second": 7.871, + "eval_steps_per_second": 0.246, + "eval_wer": 10.619433921098043, + "step": 66000 + }, + { + "epoch": 15.26, + "grad_norm": 1.0441516637802124, + "learning_rate": 8.484551533317962e-06, + "loss": 0.0014, + "step": 66250 + }, + { + "epoch": 15.32, + "grad_norm": 1.5698387622833252, + "learning_rate": 8.478787180078397e-06, + "loss": 0.0016, + "step": 66500 + }, + { + "epoch": 15.37, + "grad_norm": 1.2932049036026, + "learning_rate": 8.47302282683883e-06, + "loss": 0.0021, + "step": 66750 + }, + { + "epoch": 15.43, + "grad_norm": 0.2987584173679352, + "learning_rate": 8.467258473599263e-06, + "loss": 0.0022, + "step": 67000 + }, + { + "epoch": 15.43, + "eval_loss": 0.13362213969230652, + "eval_runtime": 860.791, + "eval_samples_per_second": 7.844, + "eval_steps_per_second": 0.245, + "eval_wer": 10.728524108931511, + "step": 67000 + }, + { + "epoch": 15.49, + "grad_norm": 0.15311214327812195, + "learning_rate": 8.461494120359696e-06, + "loss": 0.0019, + "step": 67250 + }, + { + "epoch": 15.55, + "grad_norm": 0.13635623455047607, + "learning_rate": 8.455752824533087e-06, + "loss": 0.0021, + "step": 67500 + }, + { + "epoch": 15.6, + "grad_norm": 0.4941750168800354, + "learning_rate": 8.449988471293522e-06, + "loss": 0.002, + "step": 67750 + }, + { + "epoch": 15.66, + "grad_norm": 0.04697488993406296, + "learning_rate": 8.444224118053955e-06, + "loss": 0.0025, + "step": 68000 + }, + { + "epoch": 15.66, + "eval_loss": 0.13006770610809326, + "eval_runtime": 863.9415, + "eval_samples_per_second": 7.815, + "eval_steps_per_second": 0.244, + "eval_wer": 10.506376817343357, + "step": 68000 + }, + { + "epoch": 15.72, + "grad_norm": 1.116538166999817, + "learning_rate": 8.438459764814388e-06, + "loss": 0.0021, + "step": 68250 + }, + { + "epoch": 15.78, + "grad_norm": 0.79182368516922, + "learning_rate": 8.432695411574821e-06, + "loss": 0.0019, + "step": 68500 + }, + { + "epoch": 15.83, + "grad_norm": 0.18393570184707642, + "learning_rate": 8.426931058335256e-06, + "loss": 0.0018, + "step": 68750 + }, + { + "epoch": 15.89, + "grad_norm": 0.31326913833618164, + "learning_rate": 8.421166705095689e-06, + "loss": 0.0022, + "step": 69000 + }, + { + "epoch": 15.89, + "eval_loss": 0.1315659135580063, + "eval_runtime": 863.1675, + "eval_samples_per_second": 7.822, + "eval_steps_per_second": 0.244, + "eval_wer": 10.50042644346153, + "step": 69000 + }, + { + "epoch": 15.95, + "grad_norm": 2.326347827911377, + "learning_rate": 8.415402351856122e-06, + "loss": 0.0018, + "step": 69250 + }, + { + "epoch": 16.01, + "grad_norm": 0.02481095865368843, + "learning_rate": 8.409637998616555e-06, + "loss": 0.0019, + "step": 69500 + }, + { + "epoch": 16.06, + "grad_norm": 0.3131246566772461, + "learning_rate": 8.40387364537699e-06, + "loss": 0.0013, + "step": 69750 + }, + { + "epoch": 16.12, + "grad_norm": 0.11796487122774124, + "learning_rate": 8.398109292137423e-06, + "loss": 0.0019, + "step": 70000 + }, + { + "epoch": 16.12, + "eval_loss": 0.13220052421092987, + "eval_runtime": 862.684, + "eval_samples_per_second": 7.827, + "eval_steps_per_second": 0.245, + "eval_wer": 10.432988872800841, + "step": 70000 + }, + { + "epoch": 16.18, + "grad_norm": 0.130937397480011, + "learning_rate": 8.392344938897856e-06, + "loss": 0.0017, + "step": 70250 + }, + { + "epoch": 16.24, + "grad_norm": 0.2919136881828308, + "learning_rate": 8.386580585658289e-06, + "loss": 0.0016, + "step": 70500 + }, + { + "epoch": 16.29, + "grad_norm": 0.09921443462371826, + "learning_rate": 8.380816232418724e-06, + "loss": 0.0014, + "step": 70750 + }, + { + "epoch": 16.35, + "grad_norm": 0.490876168012619, + "learning_rate": 8.375051879179157e-06, + "loss": 0.0018, + "step": 71000 + }, + { + "epoch": 16.35, + "eval_loss": 0.1313299685716629, + "eval_runtime": 868.192, + "eval_samples_per_second": 7.777, + "eval_steps_per_second": 0.243, + "eval_wer": 10.671003828073863, + "step": 71000 + }, + { + "epoch": 16.41, + "grad_norm": 0.18082498013973236, + "learning_rate": 8.36928752593959e-06, + "loss": 0.0018, + "step": 71250 + }, + { + "epoch": 16.47, + "grad_norm": 0.24912157654762268, + "learning_rate": 8.363523172700023e-06, + "loss": 0.0017, + "step": 71500 + }, + { + "epoch": 16.52, + "grad_norm": 0.12575741112232208, + "learning_rate": 8.357758819460458e-06, + "loss": 0.0017, + "step": 71750 + }, + { + "epoch": 16.58, + "grad_norm": 1.444352626800537, + "learning_rate": 8.35199446622089e-06, + "loss": 0.0021, + "step": 72000 + }, + { + "epoch": 16.58, + "eval_loss": 0.13000106811523438, + "eval_runtime": 860.5334, + "eval_samples_per_second": 7.846, + "eval_steps_per_second": 0.245, + "eval_wer": 10.692821865640557, + "step": 72000 + }, + { + "epoch": 16.64, + "grad_norm": 0.48708584904670715, + "learning_rate": 8.346230112981324e-06, + "loss": 0.0023, + "step": 72250 + }, + { + "epoch": 16.7, + "grad_norm": 1.296075463294983, + "learning_rate": 8.340488817154715e-06, + "loss": 0.002, + "step": 72500 + }, + { + "epoch": 16.75, + "grad_norm": 0.3294328451156616, + "learning_rate": 8.334747521328108e-06, + "loss": 0.0021, + "step": 72750 + }, + { + "epoch": 16.81, + "grad_norm": 0.04183769226074219, + "learning_rate": 8.328983168088541e-06, + "loss": 0.0023, + "step": 73000 + }, + { + "epoch": 16.81, + "eval_loss": 0.12990330159664154, + "eval_runtime": 882.105, + "eval_samples_per_second": 7.654, + "eval_steps_per_second": 0.239, + "eval_wer": 10.319931769046155, + "step": 73000 + }, + { + "epoch": 16.87, + "grad_norm": 0.42695850133895874, + "learning_rate": 8.323241872261933e-06, + "loss": 0.002, + "step": 73250 + }, + { + "epoch": 16.93, + "grad_norm": 0.2880449593067169, + "learning_rate": 8.317477519022367e-06, + "loss": 0.0021, + "step": 73500 + }, + { + "epoch": 16.99, + "grad_norm": 1.023850917816162, + "learning_rate": 8.3117131657828e-06, + "loss": 0.0017, + "step": 73750 + }, + { + "epoch": 17.04, + "grad_norm": 1.6181273460388184, + "learning_rate": 8.305948812543234e-06, + "loss": 0.0015, + "step": 74000 + }, + { + "epoch": 17.04, + "eval_loss": 0.13121721148490906, + "eval_runtime": 859.3639, + "eval_samples_per_second": 7.857, + "eval_steps_per_second": 0.246, + "eval_wer": 10.403237003391713, + "step": 74000 + }, + { + "epoch": 17.1, + "grad_norm": 0.26772114634513855, + "learning_rate": 8.300184459303667e-06, + "loss": 0.0014, + "step": 74250 + }, + { + "epoch": 17.16, + "grad_norm": 3.353755235671997, + "learning_rate": 8.294420106064101e-06, + "loss": 0.0009, + "step": 74500 + }, + { + "epoch": 17.22, + "grad_norm": 1.0177912712097168, + "learning_rate": 8.288655752824534e-06, + "loss": 0.0013, + "step": 74750 + }, + { + "epoch": 17.27, + "grad_norm": 1.6711407899856567, + "learning_rate": 8.282891399584967e-06, + "loss": 0.0016, + "step": 75000 + }, + { + "epoch": 17.27, + "eval_loss": 0.13091835379600525, + "eval_runtime": 876.3454, + "eval_samples_per_second": 7.705, + "eval_steps_per_second": 0.241, + "eval_wer": 10.928853362952973, + "step": 75000 + }, + { + "epoch": 17.33, + "grad_norm": 2.060011148452759, + "learning_rate": 8.2771270463454e-06, + "loss": 0.0015, + "step": 75250 + }, + { + "epoch": 17.39, + "grad_norm": 0.3818737864494324, + "learning_rate": 8.271362693105834e-06, + "loss": 0.0015, + "step": 75500 + }, + { + "epoch": 17.45, + "grad_norm": 0.08700312674045563, + "learning_rate": 8.265598339866268e-06, + "loss": 0.0014, + "step": 75750 + }, + { + "epoch": 17.5, + "grad_norm": 0.17121294140815735, + "learning_rate": 8.259833986626701e-06, + "loss": 0.0019, + "step": 76000 + }, + { + "epoch": 17.5, + "eval_loss": 0.13205237686634064, + "eval_runtime": 856.9406, + "eval_samples_per_second": 7.879, + "eval_steps_per_second": 0.246, + "eval_wer": 10.40918737727354, + "step": 76000 + }, + { + "epoch": 17.56, + "grad_norm": 0.05054645985364914, + "learning_rate": 8.254069633387134e-06, + "loss": 0.0017, + "step": 76250 + }, + { + "epoch": 17.62, + "grad_norm": 0.03382011130452156, + "learning_rate": 8.248305280147568e-06, + "loss": 0.002, + "step": 76500 + }, + { + "epoch": 17.68, + "grad_norm": 0.9019294381141663, + "learning_rate": 8.242540926908002e-06, + "loss": 0.0016, + "step": 76750 + }, + { + "epoch": 17.73, + "grad_norm": 0.12265095859766006, + "learning_rate": 8.236776573668435e-06, + "loss": 0.0016, + "step": 77000 + }, + { + "epoch": 17.73, + "eval_loss": 0.12913843989372253, + "eval_runtime": 853.8374, + "eval_samples_per_second": 7.908, + "eval_steps_per_second": 0.247, + "eval_wer": 10.337782890691633, + "step": 77000 + }, + { + "epoch": 17.79, + "grad_norm": 1.6407935619354248, + "learning_rate": 8.231012220428868e-06, + "loss": 0.0019, + "step": 77250 + }, + { + "epoch": 17.85, + "grad_norm": 0.5586839914321899, + "learning_rate": 8.22527092460226e-06, + "loss": 0.0016, + "step": 77500 + }, + { + "epoch": 17.91, + "grad_norm": 2.7545528411865234, + "learning_rate": 8.219506571362694e-06, + "loss": 0.0019, + "step": 77750 + }, + { + "epoch": 17.96, + "grad_norm": 0.10660357773303986, + "learning_rate": 8.213742218123128e-06, + "loss": 0.0019, + "step": 78000 + }, + { + "epoch": 17.96, + "eval_loss": 0.12859106063842773, + "eval_runtime": 860.0021, + "eval_samples_per_second": 7.851, + "eval_steps_per_second": 0.245, + "eval_wer": 10.310014479243112, + "step": 78000 + }, + { + "epoch": 18.02, + "grad_norm": 0.057170018553733826, + "learning_rate": 8.20797786488356e-06, + "loss": 0.0019, + "step": 78250 + }, + { + "epoch": 18.08, + "grad_norm": 0.8357897996902466, + "learning_rate": 8.202213511643994e-06, + "loss": 0.0013, + "step": 78500 + }, + { + "epoch": 18.14, + "grad_norm": 0.2379799783229828, + "learning_rate": 8.196449158404428e-06, + "loss": 0.0015, + "step": 78750 + }, + { + "epoch": 18.19, + "grad_norm": 0.020079199224710464, + "learning_rate": 8.190684805164861e-06, + "loss": 0.0015, + "step": 79000 + }, + { + "epoch": 18.19, + "eval_loss": 0.13000167906284332, + "eval_runtime": 861.6458, + "eval_samples_per_second": 7.836, + "eval_steps_per_second": 0.245, + "eval_wer": 10.403237003391713, + "step": 79000 + }, + { + "epoch": 18.25, + "grad_norm": 1.2580838203430176, + "learning_rate": 8.184920451925294e-06, + "loss": 0.0012, + "step": 79250 + }, + { + "epoch": 18.31, + "grad_norm": 0.2000938206911087, + "learning_rate": 8.179156098685728e-06, + "loss": 0.0012, + "step": 79500 + }, + { + "epoch": 18.37, + "grad_norm": 0.07198411971330643, + "learning_rate": 8.173391745446162e-06, + "loss": 0.0019, + "step": 79750 + }, + { + "epoch": 18.42, + "grad_norm": 0.19537071883678436, + "learning_rate": 8.167627392206595e-06, + "loss": 0.0016, + "step": 80000 + }, + { + "epoch": 18.42, + "eval_loss": 0.13053175806999207, + "eval_runtime": 860.9809, + "eval_samples_per_second": 7.842, + "eval_steps_per_second": 0.245, + "eval_wer": 10.30009718944007, + "step": 80000 + }, + { + "epoch": 18.48, + "grad_norm": 0.3851453363895416, + "learning_rate": 8.161863038967028e-06, + "loss": 0.002, + "step": 80250 + }, + { + "epoch": 18.54, + "grad_norm": 1.5883591175079346, + "learning_rate": 8.156098685727461e-06, + "loss": 0.0016, + "step": 80500 + }, + { + "epoch": 18.6, + "grad_norm": 0.21738067269325256, + "learning_rate": 8.150334332487896e-06, + "loss": 0.0018, + "step": 80750 + }, + { + "epoch": 18.65, + "grad_norm": 0.2855512201786041, + "learning_rate": 8.14456997924833e-06, + "loss": 0.0016, + "step": 81000 + }, + { + "epoch": 18.65, + "eval_loss": 0.13212721049785614, + "eval_runtime": 864.7213, + "eval_samples_per_second": 7.808, + "eval_steps_per_second": 0.244, + "eval_wer": 10.468691116091794, + "step": 81000 + }, + { + "epoch": 18.71, + "grad_norm": 0.42549794912338257, + "learning_rate": 8.138805626008762e-06, + "loss": 0.0018, + "step": 81250 + }, + { + "epoch": 18.77, + "grad_norm": 0.1744888573884964, + "learning_rate": 8.133064330182154e-06, + "loss": 0.0017, + "step": 81500 + }, + { + "epoch": 18.83, + "grad_norm": 0.4819861650466919, + "learning_rate": 8.127299976942587e-06, + "loss": 0.0018, + "step": 81750 + }, + { + "epoch": 18.89, + "grad_norm": 0.4029196798801422, + "learning_rate": 8.121535623703021e-06, + "loss": 0.0017, + "step": 82000 + }, + { + "epoch": 18.89, + "eval_loss": 0.1323300451040268, + "eval_runtime": 863.1777, + "eval_samples_per_second": 7.822, + "eval_steps_per_second": 0.244, + "eval_wer": 10.282246067794592, + "step": 82000 + }, + { + "epoch": 18.94, + "grad_norm": 1.2428728342056274, + "learning_rate": 8.115771270463455e-06, + "loss": 0.0018, + "step": 82250 + }, + { + "epoch": 19.0, + "grad_norm": 0.11099331825971603, + "learning_rate": 8.110006917223888e-06, + "loss": 0.0015, + "step": 82500 + }, + { + "epoch": 19.06, + "grad_norm": 0.02784816548228264, + "learning_rate": 8.10424256398432e-06, + "loss": 0.0011, + "step": 82750 + }, + { + "epoch": 19.12, + "grad_norm": 0.06138940528035164, + "learning_rate": 8.098478210744755e-06, + "loss": 0.0012, + "step": 83000 + }, + { + "epoch": 19.12, + "eval_loss": 0.13197582960128784, + "eval_runtime": 855.8423, + "eval_samples_per_second": 7.889, + "eval_steps_per_second": 0.247, + "eval_wer": 10.191007001606602, + "step": 83000 + }, + { + "epoch": 19.17, + "grad_norm": 1.163352608680725, + "learning_rate": 8.092713857505188e-06, + "loss": 0.0011, + "step": 83250 + }, + { + "epoch": 19.23, + "grad_norm": 0.19741587340831757, + "learning_rate": 8.086949504265622e-06, + "loss": 0.0011, + "step": 83500 + }, + { + "epoch": 19.29, + "grad_norm": 0.019343126565217972, + "learning_rate": 8.081185151026055e-06, + "loss": 0.001, + "step": 83750 + }, + { + "epoch": 19.35, + "grad_norm": 1.0601211786270142, + "learning_rate": 8.075420797786488e-06, + "loss": 0.0012, + "step": 84000 + }, + { + "epoch": 19.35, + "eval_loss": 0.13453201949596405, + "eval_runtime": 855.5505, + "eval_samples_per_second": 7.892, + "eval_steps_per_second": 0.247, + "eval_wer": 10.371501676021976, + "step": 84000 + }, + { + "epoch": 19.4, + "grad_norm": 0.49607208371162415, + "learning_rate": 8.069656444546922e-06, + "loss": 0.0012, + "step": 84250 + }, + { + "epoch": 19.46, + "grad_norm": 0.5492927432060242, + "learning_rate": 8.063892091307355e-06, + "loss": 0.0014, + "step": 84500 + }, + { + "epoch": 19.52, + "grad_norm": 0.07558491080999374, + "learning_rate": 8.058127738067788e-06, + "loss": 0.0017, + "step": 84750 + }, + { + "epoch": 19.58, + "grad_norm": 0.2147219181060791, + "learning_rate": 8.052363384828223e-06, + "loss": 0.0021, + "step": 85000 + }, + { + "epoch": 19.58, + "eval_loss": 0.13526426255702972, + "eval_runtime": 862.7391, + "eval_samples_per_second": 7.826, + "eval_steps_per_second": 0.245, + "eval_wer": 10.405220461352322, + "step": 85000 + }, + { + "epoch": 19.63, + "grad_norm": 0.6740741729736328, + "learning_rate": 8.046599031588656e-06, + "loss": 0.0013, + "step": 85250 + }, + { + "epoch": 19.69, + "grad_norm": 0.10711955279111862, + "learning_rate": 8.04083467834909e-06, + "loss": 0.0018, + "step": 85500 + }, + { + "epoch": 19.75, + "grad_norm": 0.09672527760267258, + "learning_rate": 8.035070325109522e-06, + "loss": 0.0013, + "step": 85750 + }, + { + "epoch": 19.81, + "grad_norm": 2.067840814590454, + "learning_rate": 8.029305971869957e-06, + "loss": 0.0018, + "step": 86000 + }, + { + "epoch": 19.81, + "eval_loss": 0.13280877470970154, + "eval_runtime": 867.5864, + "eval_samples_per_second": 7.783, + "eval_steps_per_second": 0.243, + "eval_wer": 10.284229525755203, + "step": 86000 + }, + { + "epoch": 19.86, + "grad_norm": 1.6060312986373901, + "learning_rate": 8.023587733456307e-06, + "loss": 0.0019, + "step": 86250 + }, + { + "epoch": 19.92, + "grad_norm": 1.6165310144424438, + "learning_rate": 8.01782338021674e-06, + "loss": 0.0017, + "step": 86500 + }, + { + "epoch": 19.98, + "grad_norm": 0.09126902371644974, + "learning_rate": 8.012059026977173e-06, + "loss": 0.0017, + "step": 86750 + }, + { + "epoch": 20.04, + "grad_norm": 0.06976249814033508, + "learning_rate": 8.006294673737606e-06, + "loss": 0.0016, + "step": 87000 + }, + { + "epoch": 20.04, + "eval_loss": 0.13026085495948792, + "eval_runtime": 862.5128, + "eval_samples_per_second": 7.828, + "eval_steps_per_second": 0.245, + "eval_wer": 10.250510740424856, + "step": 87000 + }, + { + "epoch": 20.09, + "grad_norm": 0.02732229419052601, + "learning_rate": 8.00053032049804e-06, + "loss": 0.0011, + "step": 87250 + }, + { + "epoch": 20.15, + "grad_norm": 0.67994225025177, + "learning_rate": 7.994765967258474e-06, + "loss": 0.0014, + "step": 87500 + }, + { + "epoch": 20.21, + "grad_norm": 0.10088305920362473, + "learning_rate": 7.989001614018907e-06, + "loss": 0.001, + "step": 87750 + }, + { + "epoch": 20.27, + "grad_norm": 0.11000318080186844, + "learning_rate": 7.98323726077934e-06, + "loss": 0.001, + "step": 88000 + }, + { + "epoch": 20.27, + "eval_loss": 0.12965452671051025, + "eval_runtime": 856.312, + "eval_samples_per_second": 7.885, + "eval_steps_per_second": 0.246, + "eval_wer": 10.175139337921733, + "step": 88000 + }, + { + "epoch": 20.32, + "grad_norm": 0.09820275008678436, + "learning_rate": 7.977472907539775e-06, + "loss": 0.0014, + "step": 88250 + }, + { + "epoch": 20.38, + "grad_norm": 0.023317014798521996, + "learning_rate": 7.971708554300208e-06, + "loss": 0.0014, + "step": 88500 + }, + { + "epoch": 20.44, + "grad_norm": 0.012326917611062527, + "learning_rate": 7.96594420106064e-06, + "loss": 0.0013, + "step": 88750 + }, + { + "epoch": 20.5, + "grad_norm": 0.0349048413336277, + "learning_rate": 7.960179847821076e-06, + "loss": 0.0011, + "step": 89000 + }, + { + "epoch": 20.5, + "eval_loss": 0.13150520622730255, + "eval_runtime": 865.4062, + "eval_samples_per_second": 7.802, + "eval_steps_per_second": 0.244, + "eval_wer": 10.230676160818772, + "step": 89000 + }, + { + "epoch": 20.56, + "grad_norm": 0.019593272358179092, + "learning_rate": 7.954415494581509e-06, + "loss": 0.0014, + "step": 89250 + }, + { + "epoch": 20.61, + "grad_norm": 1.248373031616211, + "learning_rate": 7.948651141341942e-06, + "loss": 0.0016, + "step": 89500 + }, + { + "epoch": 20.67, + "grad_norm": 0.050250094383955, + "learning_rate": 7.942886788102375e-06, + "loss": 0.0014, + "step": 89750 + }, + { + "epoch": 20.73, + "grad_norm": 0.13364633917808533, + "learning_rate": 7.93712243486281e-06, + "loss": 0.0017, + "step": 90000 + }, + { + "epoch": 20.73, + "eval_loss": 0.13066557049751282, + "eval_runtime": 845.7111, + "eval_samples_per_second": 7.984, + "eval_steps_per_second": 0.249, + "eval_wer": 10.333815974770415, + "step": 90000 + }, + { + "epoch": 20.79, + "grad_norm": 1.2950758934020996, + "learning_rate": 7.931358081623242e-06, + "loss": 0.0014, + "step": 90250 + }, + { + "epoch": 20.84, + "grad_norm": 0.03415424004197121, + "learning_rate": 7.925593728383676e-06, + "loss": 0.0014, + "step": 90500 + }, + { + "epoch": 20.9, + "grad_norm": 0.10316485166549683, + "learning_rate": 7.919852432557069e-06, + "loss": 0.0016, + "step": 90750 + }, + { + "epoch": 20.96, + "grad_norm": 0.19864577054977417, + "learning_rate": 7.914088079317502e-06, + "loss": 0.0013, + "step": 91000 + }, + { + "epoch": 20.96, + "eval_loss": 0.12971824407577515, + "eval_runtime": 865.4465, + "eval_samples_per_second": 7.802, + "eval_steps_per_second": 0.244, + "eval_wer": 10.139437094630779, + "step": 91000 + }, + { + "epoch": 21.02, + "grad_norm": 1.215699553489685, + "learning_rate": 7.908323726077935e-06, + "loss": 0.0012, + "step": 91250 + }, + { + "epoch": 21.07, + "grad_norm": 0.38624367117881775, + "learning_rate": 7.90255937283837e-06, + "loss": 0.0013, + "step": 91500 + }, + { + "epoch": 21.13, + "grad_norm": 0.0665733814239502, + "learning_rate": 7.896795019598802e-06, + "loss": 0.0013, + "step": 91750 + }, + { + "epoch": 21.19, + "grad_norm": 1.9759693145751953, + "learning_rate": 7.891030666359236e-06, + "loss": 0.0013, + "step": 92000 + }, + { + "epoch": 21.19, + "eval_loss": 0.1314748227596283, + "eval_runtime": 846.5805, + "eval_samples_per_second": 7.976, + "eval_steps_per_second": 0.249, + "eval_wer": 10.242576908582421, + "step": 92000 + }, + { + "epoch": 21.25, + "grad_norm": 1.912919521331787, + "learning_rate": 7.885266313119669e-06, + "loss": 0.0014, + "step": 92250 + }, + { + "epoch": 21.3, + "grad_norm": 2.836052417755127, + "learning_rate": 7.879501959880103e-06, + "loss": 0.0015, + "step": 92500 + }, + { + "epoch": 21.36, + "grad_norm": 0.8251251578330994, + "learning_rate": 7.873737606640536e-06, + "loss": 0.0013, + "step": 92750 + }, + { + "epoch": 21.42, + "grad_norm": 0.017601924017071724, + "learning_rate": 7.86797325340097e-06, + "loss": 0.0013, + "step": 93000 + }, + { + "epoch": 21.42, + "eval_loss": 0.1311779022216797, + "eval_runtime": 857.0196, + "eval_samples_per_second": 7.878, + "eval_steps_per_second": 0.246, + "eval_wer": 10.206874665291469, + "step": 93000 + }, + { + "epoch": 21.48, + "grad_norm": 0.2548398971557617, + "learning_rate": 7.86223195757436e-06, + "loss": 0.0013, + "step": 93250 + }, + { + "epoch": 21.53, + "grad_norm": 0.050698913633823395, + "learning_rate": 7.856467604334794e-06, + "loss": 0.0012, + "step": 93500 + }, + { + "epoch": 21.59, + "grad_norm": 0.15648888051509857, + "learning_rate": 7.850703251095227e-06, + "loss": 0.0009, + "step": 93750 + }, + { + "epoch": 21.65, + "grad_norm": 0.6790530681610107, + "learning_rate": 7.844938897855662e-06, + "loss": 0.0016, + "step": 94000 + }, + { + "epoch": 21.65, + "eval_loss": 0.13513444364070892, + "eval_runtime": 855.2346, + "eval_samples_per_second": 7.895, + "eval_steps_per_second": 0.247, + "eval_wer": 10.276295693912768, + "step": 94000 + }, + { + "epoch": 21.71, + "grad_norm": 0.8243337869644165, + "learning_rate": 7.839197602029053e-06, + "loss": 0.0013, + "step": 94250 + }, + { + "epoch": 21.76, + "grad_norm": 0.05715777724981308, + "learning_rate": 7.833433248789486e-06, + "loss": 0.0015, + "step": 94500 + }, + { + "epoch": 21.82, + "grad_norm": 0.10455400496721268, + "learning_rate": 7.827668895549921e-06, + "loss": 0.0018, + "step": 94750 + }, + { + "epoch": 21.88, + "grad_norm": 0.06954806298017502, + "learning_rate": 7.821904542310354e-06, + "loss": 0.0012, + "step": 95000 + }, + { + "epoch": 21.88, + "eval_loss": 0.13230295479297638, + "eval_runtime": 864.2691, + "eval_samples_per_second": 7.812, + "eval_steps_per_second": 0.244, + "eval_wer": 9.964892794097228, + "step": 95000 + }, + { + "epoch": 21.94, + "grad_norm": 0.35232919454574585, + "learning_rate": 7.816140189070787e-06, + "loss": 0.0016, + "step": 95250 + }, + { + "epoch": 21.99, + "grad_norm": 0.27226054668426514, + "learning_rate": 7.81037583583122e-06, + "loss": 0.0015, + "step": 95500 + }, + { + "epoch": 22.05, + "grad_norm": 0.03192569315433502, + "learning_rate": 7.804611482591655e-06, + "loss": 0.0009, + "step": 95750 + }, + { + "epoch": 22.11, + "grad_norm": 0.772456705570221, + "learning_rate": 7.798847129352088e-06, + "loss": 0.0009, + "step": 96000 + }, + { + "epoch": 22.11, + "eval_loss": 0.13247309625148773, + "eval_runtime": 846.1981, + "eval_samples_per_second": 7.979, + "eval_steps_per_second": 0.249, + "eval_wer": 10.012495785151835, + "step": 96000 + }, + { + "epoch": 22.17, + "grad_norm": 0.05217467620968819, + "learning_rate": 7.793082776112521e-06, + "loss": 0.0009, + "step": 96250 + }, + { + "epoch": 22.22, + "grad_norm": 0.03777674213051796, + "learning_rate": 7.787318422872956e-06, + "loss": 0.0012, + "step": 96500 + }, + { + "epoch": 22.28, + "grad_norm": 0.20706115663051605, + "learning_rate": 7.781554069633389e-06, + "loss": 0.0011, + "step": 96750 + }, + { + "epoch": 22.34, + "grad_norm": 0.07679043710231781, + "learning_rate": 7.775789716393822e-06, + "loss": 0.0008, + "step": 97000 + }, + { + "epoch": 22.34, + "eval_loss": 0.13062846660614014, + "eval_runtime": 859.6538, + "eval_samples_per_second": 7.854, + "eval_steps_per_second": 0.245, + "eval_wer": 9.780431203760637, + "step": 97000 + }, + { + "epoch": 22.4, + "grad_norm": 0.2341454029083252, + "learning_rate": 7.770025363154255e-06, + "loss": 0.001, + "step": 97250 + }, + { + "epoch": 22.46, + "grad_norm": 0.5662194490432739, + "learning_rate": 7.764284067327646e-06, + "loss": 0.0013, + "step": 97500 + }, + { + "epoch": 22.51, + "grad_norm": 0.08940891921520233, + "learning_rate": 7.75851971408808e-06, + "loss": 0.0012, + "step": 97750 + }, + { + "epoch": 22.57, + "grad_norm": 0.28177976608276367, + "learning_rate": 7.752755360848512e-06, + "loss": 0.0015, + "step": 98000 + }, + { + "epoch": 22.57, + "eval_loss": 0.13175997138023376, + "eval_runtime": 865.6294, + "eval_samples_per_second": 7.8, + "eval_steps_per_second": 0.244, + "eval_wer": 10.066049150088265, + "step": 98000 + }, + { + "epoch": 22.63, + "grad_norm": 0.4019626975059509, + "learning_rate": 7.746991007608947e-06, + "loss": 0.0012, + "step": 98250 + }, + { + "epoch": 22.69, + "grad_norm": 0.06278575956821442, + "learning_rate": 7.74122665436938e-06, + "loss": 0.0019, + "step": 98500 + }, + { + "epoch": 22.74, + "grad_norm": 2.755483388900757, + "learning_rate": 7.735462301129813e-06, + "loss": 0.0013, + "step": 98750 + }, + { + "epoch": 22.8, + "grad_norm": 0.478965163230896, + "learning_rate": 7.729697947890248e-06, + "loss": 0.0012, + "step": 99000 + }, + { + "epoch": 22.8, + "eval_loss": 0.13538804650306702, + "eval_runtime": 848.5461, + "eval_samples_per_second": 7.957, + "eval_steps_per_second": 0.249, + "eval_wer": 9.996628121466966, + "step": 99000 + }, + { + "epoch": 22.86, + "grad_norm": 0.18010005354881287, + "learning_rate": 7.723933594650681e-06, + "loss": 0.0014, + "step": 99250 + }, + { + "epoch": 22.92, + "grad_norm": 0.32389816641807556, + "learning_rate": 7.718169241411114e-06, + "loss": 0.0011, + "step": 99500 + }, + { + "epoch": 22.97, + "grad_norm": 0.031109152361750603, + "learning_rate": 7.712427945584507e-06, + "loss": 0.0013, + "step": 99750 + }, + { + "epoch": 23.03, + "grad_norm": 0.40925395488739014, + "learning_rate": 7.70666359234494e-06, + "loss": 0.0013, + "step": 100000 + }, + { + "epoch": 23.03, + "eval_loss": 0.13106635212898254, + "eval_runtime": 855.4169, + "eval_samples_per_second": 7.893, + "eval_steps_per_second": 0.247, + "eval_wer": 9.673324473887776, + "step": 100000 + }, + { + "epoch": 23.09, + "grad_norm": 2.2084646224975586, + "learning_rate": 7.700922296518331e-06, + "loss": 0.0015, + "step": 100250 + }, + { + "epoch": 23.15, + "grad_norm": 0.03353571146726608, + "learning_rate": 7.695157943278765e-06, + "loss": 0.0007, + "step": 100500 + }, + { + "epoch": 23.2, + "grad_norm": 0.27213382720947266, + "learning_rate": 7.689393590039198e-06, + "loss": 0.0008, + "step": 100750 + }, + { + "epoch": 23.26, + "grad_norm": 1.3013813495635986, + "learning_rate": 7.68362923679963e-06, + "loss": 0.001, + "step": 101000 + }, + { + "epoch": 23.26, + "eval_loss": 0.13327930867671967, + "eval_runtime": 860.1438, + "eval_samples_per_second": 7.85, + "eval_steps_per_second": 0.245, + "eval_wer": 10.161255132197473, + "step": 101000 + }, + { + "epoch": 23.32, + "grad_norm": 1.0605800151824951, + "learning_rate": 7.677864883560065e-06, + "loss": 0.0016, + "step": 101250 + }, + { + "epoch": 23.38, + "grad_norm": 1.6323225498199463, + "learning_rate": 7.672100530320498e-06, + "loss": 0.0016, + "step": 101500 + }, + { + "epoch": 23.43, + "grad_norm": 0.3052765130996704, + "learning_rate": 7.666336177080931e-06, + "loss": 0.001, + "step": 101750 + }, + { + "epoch": 23.49, + "grad_norm": 1.3335562944412231, + "learning_rate": 7.660571823841365e-06, + "loss": 0.0013, + "step": 102000 + }, + { + "epoch": 23.49, + "eval_loss": 0.1332116574048996, + "eval_runtime": 862.515, + "eval_samples_per_second": 7.828, + "eval_steps_per_second": 0.245, + "eval_wer": 10.036297280679136, + "step": 102000 + }, + { + "epoch": 23.55, + "grad_norm": 1.166803240776062, + "learning_rate": 7.6548074706018e-06, + "loss": 0.0014, + "step": 102250 + }, + { + "epoch": 23.61, + "grad_norm": 0.06531043350696564, + "learning_rate": 7.649043117362232e-06, + "loss": 0.0013, + "step": 102500 + }, + { + "epoch": 23.66, + "grad_norm": 0.2673274874687195, + "learning_rate": 7.643278764122665e-06, + "loss": 0.0013, + "step": 102750 + }, + { + "epoch": 23.72, + "grad_norm": 0.007462978828698397, + "learning_rate": 7.6375144108831e-06, + "loss": 0.0013, + "step": 103000 + }, + { + "epoch": 23.72, + "eval_loss": 0.13317851722240448, + "eval_runtime": 854.6131, + "eval_samples_per_second": 7.901, + "eval_steps_per_second": 0.247, + "eval_wer": 10.012495785151835, + "step": 103000 + }, + { + "epoch": 23.78, + "grad_norm": 1.090072512626648, + "learning_rate": 7.631750057643533e-06, + "loss": 0.0016, + "step": 103250 + }, + { + "epoch": 23.84, + "grad_norm": 0.10874152928590775, + "learning_rate": 7.625985704403966e-06, + "loss": 0.0014, + "step": 103500 + }, + { + "epoch": 23.89, + "grad_norm": 0.3460334837436676, + "learning_rate": 7.620221351164399e-06, + "loss": 0.0012, + "step": 103750 + }, + { + "epoch": 23.95, + "grad_norm": 0.0227680541574955, + "learning_rate": 7.614456997924834e-06, + "loss": 0.0012, + "step": 104000 + }, + { + "epoch": 23.95, + "eval_loss": 0.13361206650733948, + "eval_runtime": 864.3568, + "eval_samples_per_second": 7.812, + "eval_steps_per_second": 0.244, + "eval_wer": 10.058115318245829, + "step": 104000 + }, + { + "epoch": 24.01, + "grad_norm": 1.1514167785644531, + "learning_rate": 7.608692644685267e-06, + "loss": 0.0015, + "step": 104250 + }, + { + "epoch": 24.07, + "grad_norm": 0.04206667095422745, + "learning_rate": 7.6029282914457e-06, + "loss": 0.0009, + "step": 104500 + }, + { + "epoch": 24.12, + "grad_norm": 0.03346676006913185, + "learning_rate": 7.597163938206133e-06, + "loss": 0.0007, + "step": 104750 + }, + { + "epoch": 24.18, + "grad_norm": 0.005905906204134226, + "learning_rate": 7.591399584966568e-06, + "loss": 0.0009, + "step": 105000 + }, + { + "epoch": 24.18, + "eval_loss": 0.13135230541229248, + "eval_runtime": 859.844, + "eval_samples_per_second": 7.853, + "eval_steps_per_second": 0.245, + "eval_wer": 9.89745522343654, + "step": 105000 + }, + { + "epoch": 24.24, + "grad_norm": 0.48378223180770874, + "learning_rate": 7.585635231727001e-06, + "loss": 0.001, + "step": 105250 + }, + { + "epoch": 24.3, + "grad_norm": 0.013876562006771564, + "learning_rate": 7.579870878487434e-06, + "loss": 0.0009, + "step": 105500 + }, + { + "epoch": 24.36, + "grad_norm": 0.7280595898628235, + "learning_rate": 7.574106525247867e-06, + "loss": 0.0013, + "step": 105750 + }, + { + "epoch": 24.41, + "grad_norm": 1.0038032531738281, + "learning_rate": 7.568342172008302e-06, + "loss": 0.0012, + "step": 106000 + }, + { + "epoch": 24.41, + "eval_loss": 0.13419808447360992, + "eval_runtime": 868.3234, + "eval_samples_per_second": 7.776, + "eval_steps_per_second": 0.243, + "eval_wer": 10.222742328976338, + "step": 106000 + }, + { + "epoch": 24.47, + "grad_norm": 1.810659646987915, + "learning_rate": 7.562600876181692e-06, + "loss": 0.0015, + "step": 106250 + }, + { + "epoch": 24.53, + "grad_norm": 0.027089903131127357, + "learning_rate": 7.556836522942127e-06, + "loss": 0.0014, + "step": 106500 + }, + { + "epoch": 24.59, + "grad_norm": 0.1833794265985489, + "learning_rate": 7.55107216970256e-06, + "loss": 0.001, + "step": 106750 + }, + { + "epoch": 24.64, + "grad_norm": 0.4848494529724121, + "learning_rate": 7.5453308738759516e-06, + "loss": 0.0011, + "step": 107000 + }, + { + "epoch": 24.64, + "eval_loss": 0.134404718875885, + "eval_runtime": 871.0128, + "eval_samples_per_second": 7.752, + "eval_steps_per_second": 0.242, + "eval_wer": 9.812166531130373, + "step": 107000 + }, + { + "epoch": 24.7, + "grad_norm": 0.03687746450304985, + "learning_rate": 7.5395665206363855e-06, + "loss": 0.0011, + "step": 107250 + }, + { + "epoch": 24.76, + "grad_norm": 1.3746637105941772, + "learning_rate": 7.5338021673968185e-06, + "loss": 0.0015, + "step": 107500 + }, + { + "epoch": 24.82, + "grad_norm": 0.5606046319007874, + "learning_rate": 7.528037814157252e-06, + "loss": 0.0016, + "step": 107750 + }, + { + "epoch": 24.87, + "grad_norm": 0.23363818228244781, + "learning_rate": 7.522273460917686e-06, + "loss": 0.0011, + "step": 108000 + }, + { + "epoch": 24.87, + "eval_loss": 0.131892129778862, + "eval_runtime": 866.9375, + "eval_samples_per_second": 7.788, + "eval_steps_per_second": 0.243, + "eval_wer": 9.820100362972807, + "step": 108000 + }, + { + "epoch": 24.93, + "grad_norm": 0.14785051345825195, + "learning_rate": 7.516509107678119e-06, + "loss": 0.0012, + "step": 108250 + }, + { + "epoch": 24.99, + "grad_norm": 0.17186763882637024, + "learning_rate": 7.5107447544385524e-06, + "loss": 0.001, + "step": 108500 + }, + { + "epoch": 25.05, + "grad_norm": 0.7404451370239258, + "learning_rate": 7.5049804011989855e-06, + "loss": 0.0012, + "step": 108750 + }, + { + "epoch": 25.1, + "grad_norm": 0.11903788894414902, + "learning_rate": 7.49921604795942e-06, + "loss": 0.0008, + "step": 109000 + }, + { + "epoch": 25.1, + "eval_loss": 0.1291409283876419, + "eval_runtime": 862.8202, + "eval_samples_per_second": 7.826, + "eval_steps_per_second": 0.245, + "eval_wer": 9.744728960469683, + "step": 109000 + }, + { + "epoch": 25.16, + "grad_norm": 0.028790809214115143, + "learning_rate": 7.493451694719853e-06, + "loss": 0.0008, + "step": 109250 + }, + { + "epoch": 25.22, + "grad_norm": 0.08509263396263123, + "learning_rate": 7.487687341480286e-06, + "loss": 0.0009, + "step": 109500 + }, + { + "epoch": 25.28, + "grad_norm": 3.7736191749572754, + "learning_rate": 7.481922988240719e-06, + "loss": 0.001, + "step": 109750 + }, + { + "epoch": 25.33, + "grad_norm": 1.2739046812057495, + "learning_rate": 7.476158635001154e-06, + "loss": 0.0011, + "step": 110000 + }, + { + "epoch": 25.33, + "eval_loss": 0.12932626903057098, + "eval_runtime": 872.4168, + "eval_samples_per_second": 7.739, + "eval_steps_per_second": 0.242, + "eval_wer": 9.643572604478647, + "step": 110000 + }, + { + "epoch": 25.39, + "grad_norm": 1.4399291276931763, + "learning_rate": 7.470394281761587e-06, + "loss": 0.0009, + "step": 110250 + }, + { + "epoch": 25.45, + "grad_norm": 0.06352319568395615, + "learning_rate": 7.46462992852202e-06, + "loss": 0.0011, + "step": 110500 + }, + { + "epoch": 25.51, + "grad_norm": 0.0745418593287468, + "learning_rate": 7.458865575282453e-06, + "loss": 0.0008, + "step": 110750 + }, + { + "epoch": 25.56, + "grad_norm": 2.0110867023468018, + "learning_rate": 7.453101222042888e-06, + "loss": 0.0015, + "step": 111000 + }, + { + "epoch": 25.56, + "eval_loss": 0.1350392997264862, + "eval_runtime": 859.6078, + "eval_samples_per_second": 7.855, + "eval_steps_per_second": 0.245, + "eval_wer": 9.728861296784814, + "step": 111000 + }, + { + "epoch": 25.62, + "grad_norm": 1.9989393949508667, + "learning_rate": 7.447359926216279e-06, + "loss": 0.0012, + "step": 111250 + }, + { + "epoch": 25.68, + "grad_norm": 0.7481454610824585, + "learning_rate": 7.441595572976713e-06, + "loss": 0.0012, + "step": 111500 + }, + { + "epoch": 25.74, + "grad_norm": 0.42684781551361084, + "learning_rate": 7.435854277150104e-06, + "loss": 0.0013, + "step": 111750 + }, + { + "epoch": 25.79, + "grad_norm": 0.21273395419120789, + "learning_rate": 7.430089923910537e-06, + "loss": 0.0009, + "step": 112000 + }, + { + "epoch": 25.79, + "eval_loss": 0.13176259398460388, + "eval_runtime": 853.5624, + "eval_samples_per_second": 7.91, + "eval_steps_per_second": 0.247, + "eval_wer": 9.653489894281691, + "step": 112000 + }, + { + "epoch": 25.85, + "grad_norm": 0.798125147819519, + "learning_rate": 7.424325570670972e-06, + "loss": 0.0011, + "step": 112250 + }, + { + "epoch": 25.91, + "grad_norm": 0.3447425663471222, + "learning_rate": 7.418561217431405e-06, + "loss": 0.0012, + "step": 112500 + }, + { + "epoch": 25.97, + "grad_norm": 1.2573052644729614, + "learning_rate": 7.412819921604797e-06, + "loss": 0.0009, + "step": 112750 + }, + { + "epoch": 26.02, + "grad_norm": 0.2099132090806961, + "learning_rate": 7.40705556836523e-06, + "loss": 0.001, + "step": 113000 + }, + { + "epoch": 26.02, + "eval_loss": 0.1325974017381668, + "eval_runtime": 859.4858, + "eval_samples_per_second": 7.856, + "eval_steps_per_second": 0.245, + "eval_wer": 9.685225221651427, + "step": 113000 + }, + { + "epoch": 26.08, + "grad_norm": 0.06933692842721939, + "learning_rate": 7.401291215125663e-06, + "loss": 0.0007, + "step": 113250 + }, + { + "epoch": 26.14, + "grad_norm": 0.12258395552635193, + "learning_rate": 7.395526861886096e-06, + "loss": 0.0008, + "step": 113500 + }, + { + "epoch": 26.2, + "grad_norm": 0.2960332930088043, + "learning_rate": 7.389762508646531e-06, + "loss": 0.0008, + "step": 113750 + }, + { + "epoch": 26.26, + "grad_norm": 0.03250851482152939, + "learning_rate": 7.383998155406964e-06, + "loss": 0.0006, + "step": 114000 + }, + { + "epoch": 26.26, + "eval_loss": 0.1351398229598999, + "eval_runtime": 869.8853, + "eval_samples_per_second": 7.762, + "eval_steps_per_second": 0.243, + "eval_wer": 9.845885316460718, + "step": 114000 + }, + { + "epoch": 26.31, + "grad_norm": 0.2106870859861374, + "learning_rate": 7.378233802167397e-06, + "loss": 0.0013, + "step": 114250 + }, + { + "epoch": 26.37, + "grad_norm": 0.6854831576347351, + "learning_rate": 7.372469448927832e-06, + "loss": 0.0012, + "step": 114500 + }, + { + "epoch": 26.43, + "grad_norm": 1.116827368736267, + "learning_rate": 7.366705095688265e-06, + "loss": 0.0009, + "step": 114750 + }, + { + "epoch": 26.49, + "grad_norm": 0.060012660920619965, + "learning_rate": 7.360940742448698e-06, + "loss": 0.0011, + "step": 115000 + }, + { + "epoch": 26.49, + "eval_loss": 0.13296382129192352, + "eval_runtime": 858.1234, + "eval_samples_per_second": 7.868, + "eval_steps_per_second": 0.246, + "eval_wer": 9.891504849554714, + "step": 115000 + }, + { + "epoch": 26.54, + "grad_norm": 0.2378009855747223, + "learning_rate": 7.355176389209131e-06, + "loss": 0.0011, + "step": 115250 + }, + { + "epoch": 26.6, + "grad_norm": 0.07352372258901596, + "learning_rate": 7.349412035969566e-06, + "loss": 0.0015, + "step": 115500 + }, + { + "epoch": 26.66, + "grad_norm": 0.9573454856872559, + "learning_rate": 7.343647682729999e-06, + "loss": 0.0009, + "step": 115750 + }, + { + "epoch": 26.72, + "grad_norm": 2.2814531326293945, + "learning_rate": 7.337883329490432e-06, + "loss": 0.0013, + "step": 116000 + }, + { + "epoch": 26.72, + "eval_loss": 0.13392788171768188, + "eval_runtime": 863.6577, + "eval_samples_per_second": 7.818, + "eval_steps_per_second": 0.244, + "eval_wer": 10.036297280679136, + "step": 116000 + }, + { + "epoch": 26.77, + "grad_norm": 0.2348572313785553, + "learning_rate": 7.332118976250865e-06, + "loss": 0.001, + "step": 116250 + }, + { + "epoch": 26.83, + "grad_norm": 0.3893941640853882, + "learning_rate": 7.3263546230112995e-06, + "loss": 0.0011, + "step": 116500 + }, + { + "epoch": 26.89, + "grad_norm": 1.920695424079895, + "learning_rate": 7.320590269771733e-06, + "loss": 0.0012, + "step": 116750 + }, + { + "epoch": 26.95, + "grad_norm": 0.17352449893951416, + "learning_rate": 7.314825916532166e-06, + "loss": 0.0011, + "step": 117000 + }, + { + "epoch": 26.95, + "eval_loss": 0.13272777199745178, + "eval_runtime": 857.9116, + "eval_samples_per_second": 7.87, + "eval_steps_per_second": 0.246, + "eval_wer": 10.083900271733741, + "step": 117000 + }, + { + "epoch": 27.0, + "grad_norm": 0.03900158777832985, + "learning_rate": 7.309061563292599e-06, + "loss": 0.0012, + "step": 117250 + }, + { + "epoch": 27.06, + "grad_norm": 0.03566018119454384, + "learning_rate": 7.3032972100530334e-06, + "loss": 0.0006, + "step": 117500 + }, + { + "epoch": 27.12, + "grad_norm": 0.057484470307826996, + "learning_rate": 7.2975328568134665e-06, + "loss": 0.0006, + "step": 117750 + }, + { + "epoch": 27.18, + "grad_norm": 0.03549089655280113, + "learning_rate": 7.2917685035738995e-06, + "loss": 0.0008, + "step": 118000 + }, + { + "epoch": 27.18, + "eval_loss": 0.13313508033752441, + "eval_runtime": 854.7837, + "eval_samples_per_second": 7.899, + "eval_steps_per_second": 0.247, + "eval_wer": 9.935140924688103, + "step": 118000 + }, + { + "epoch": 27.23, + "grad_norm": 0.009218300692737103, + "learning_rate": 7.286004150334333e-06, + "loss": 0.0008, + "step": 118250 + }, + { + "epoch": 27.29, + "grad_norm": 0.0295584537088871, + "learning_rate": 7.2802397970947665e-06, + "loss": 0.0008, + "step": 118500 + }, + { + "epoch": 27.35, + "grad_norm": 0.41446352005004883, + "learning_rate": 7.2744754438552e-06, + "loss": 0.0009, + "step": 118750 + }, + { + "epoch": 27.41, + "grad_norm": 0.4680302143096924, + "learning_rate": 7.2687110906156335e-06, + "loss": 0.0009, + "step": 119000 + }, + { + "epoch": 27.41, + "eval_loss": 0.1315101832151413, + "eval_runtime": 865.9401, + "eval_samples_per_second": 7.797, + "eval_steps_per_second": 0.244, + "eval_wer": 9.919273261003234, + "step": 119000 + }, + { + "epoch": 27.46, + "grad_norm": 4.383372783660889, + "learning_rate": 7.2629467373760665e-06, + "loss": 0.0009, + "step": 119250 + }, + { + "epoch": 27.52, + "grad_norm": 0.050101153552532196, + "learning_rate": 7.2571823841365e-06, + "loss": 0.0013, + "step": 119500 + }, + { + "epoch": 27.58, + "grad_norm": 0.22907772660255432, + "learning_rate": 7.251418030896934e-06, + "loss": 0.0011, + "step": 119750 + }, + { + "epoch": 27.64, + "grad_norm": 0.2788342237472534, + "learning_rate": 7.245653677657367e-06, + "loss": 0.0014, + "step": 120000 + }, + { + "epoch": 27.64, + "eval_loss": 0.1343310922384262, + "eval_runtime": 855.2599, + "eval_samples_per_second": 7.895, + "eval_steps_per_second": 0.247, + "eval_wer": 9.832001110736458, + "step": 120000 + }, + { + "epoch": 27.69, + "grad_norm": 0.027169860899448395, + "learning_rate": 7.239889324417801e-06, + "loss": 0.0011, + "step": 120250 + }, + { + "epoch": 27.75, + "grad_norm": 0.3283644914627075, + "learning_rate": 7.234124971178234e-06, + "loss": 0.001, + "step": 120500 + }, + { + "epoch": 27.81, + "grad_norm": 0.9259850382804871, + "learning_rate": 7.228383675351626e-06, + "loss": 0.0008, + "step": 120750 + }, + { + "epoch": 27.87, + "grad_norm": 0.20162805914878845, + "learning_rate": 7.22261932211206e-06, + "loss": 0.0013, + "step": 121000 + }, + { + "epoch": 27.87, + "eval_loss": 0.1318797767162323, + "eval_runtime": 854.2094, + "eval_samples_per_second": 7.904, + "eval_steps_per_second": 0.247, + "eval_wer": 9.94902513041236, + "step": 121000 + }, + { + "epoch": 27.92, + "grad_norm": 0.029574675485491753, + "learning_rate": 7.2168549688724935e-06, + "loss": 0.0008, + "step": 121250 + }, + { + "epoch": 27.98, + "grad_norm": 0.36377763748168945, + "learning_rate": 7.2110906156329266e-06, + "loss": 0.0007, + "step": 121500 + }, + { + "epoch": 28.04, + "grad_norm": 0.15056151151657104, + "learning_rate": 7.20532626239336e-06, + "loss": 0.0008, + "step": 121750 + }, + { + "epoch": 28.1, + "grad_norm": 0.8695113658905029, + "learning_rate": 7.1995619091537935e-06, + "loss": 0.0006, + "step": 122000 + }, + { + "epoch": 28.1, + "eval_loss": 0.135031595826149, + "eval_runtime": 850.3861, + "eval_samples_per_second": 7.94, + "eval_steps_per_second": 0.248, + "eval_wer": 9.851835690342543, + "step": 122000 + }, + { + "epoch": 28.16, + "grad_norm": 0.009558724239468575, + "learning_rate": 7.193797555914227e-06, + "loss": 0.0008, + "step": 122250 + }, + { + "epoch": 28.21, + "grad_norm": 0.4410775303840637, + "learning_rate": 7.1880332026746605e-06, + "loss": 0.0008, + "step": 122500 + }, + { + "epoch": 28.27, + "grad_norm": 0.3716282546520233, + "learning_rate": 7.182268849435094e-06, + "loss": 0.0007, + "step": 122750 + }, + { + "epoch": 28.33, + "grad_norm": 0.018515612930059433, + "learning_rate": 7.176527553608486e-06, + "loss": 0.001, + "step": 123000 + }, + { + "epoch": 28.33, + "eval_loss": 0.13689374923706055, + "eval_runtime": 864.0622, + "eval_samples_per_second": 7.814, + "eval_steps_per_second": 0.244, + "eval_wer": 9.968859710018446, + "step": 123000 + }, + { + "epoch": 28.39, + "grad_norm": 0.07072445005178452, + "learning_rate": 7.170763200368919e-06, + "loss": 0.0011, + "step": 123250 + }, + { + "epoch": 28.44, + "grad_norm": 0.7424390316009521, + "learning_rate": 7.164998847129353e-06, + "loss": 0.0013, + "step": 123500 + }, + { + "epoch": 28.5, + "grad_norm": 0.031032560393214226, + "learning_rate": 7.159234493889787e-06, + "loss": 0.001, + "step": 123750 + }, + { + "epoch": 28.56, + "grad_norm": 1.3974592685699463, + "learning_rate": 7.15347014065022e-06, + "loss": 0.001, + "step": 124000 + }, + { + "epoch": 28.56, + "eval_loss": 0.13558751344680786, + "eval_runtime": 859.3374, + "eval_samples_per_second": 7.857, + "eval_steps_per_second": 0.246, + "eval_wer": 9.889521391594105, + "step": 124000 + }, + { + "epoch": 28.62, + "grad_norm": 2.0855116844177246, + "learning_rate": 7.147705787410653e-06, + "loss": 0.0008, + "step": 124250 + }, + { + "epoch": 28.67, + "grad_norm": 0.008047823794186115, + "learning_rate": 7.141941434171087e-06, + "loss": 0.0009, + "step": 124500 + }, + { + "epoch": 28.73, + "grad_norm": 0.1283062994480133, + "learning_rate": 7.13617708093152e-06, + "loss": 0.0009, + "step": 124750 + }, + { + "epoch": 28.79, + "grad_norm": 0.0888696163892746, + "learning_rate": 7.1304127276919536e-06, + "loss": 0.0009, + "step": 125000 + }, + { + "epoch": 28.79, + "eval_loss": 0.13283681869506836, + "eval_runtime": 852.2523, + "eval_samples_per_second": 7.923, + "eval_steps_per_second": 0.248, + "eval_wer": 9.724894380863597, + "step": 125000 + }, + { + "epoch": 28.85, + "grad_norm": 0.011558549478650093, + "learning_rate": 7.1246483744523875e-06, + "loss": 0.0009, + "step": 125250 + }, + { + "epoch": 28.9, + "grad_norm": 0.0639125183224678, + "learning_rate": 7.1188840212128205e-06, + "loss": 0.0009, + "step": 125500 + }, + { + "epoch": 28.96, + "grad_norm": 0.038483619689941406, + "learning_rate": 7.1131196679732536e-06, + "loss": 0.0009, + "step": 125750 + }, + { + "epoch": 29.02, + "grad_norm": 0.053386442363262177, + "learning_rate": 7.1073553147336875e-06, + "loss": 0.0008, + "step": 126000 + }, + { + "epoch": 29.02, + "eval_loss": 0.13507717847824097, + "eval_runtime": 865.2057, + "eval_samples_per_second": 7.804, + "eval_steps_per_second": 0.244, + "eval_wer": 9.788365035603071, + "step": 126000 + }, + { + "epoch": 29.08, + "grad_norm": 0.5218244791030884, + "learning_rate": 7.101590961494121e-06, + "loss": 0.0005, + "step": 126250 + }, + { + "epoch": 29.13, + "grad_norm": 0.046457044780254364, + "learning_rate": 7.0958266082545544e-06, + "loss": 0.0005, + "step": 126500 + }, + { + "epoch": 29.19, + "grad_norm": 0.017029544338583946, + "learning_rate": 7.0900622550149875e-06, + "loss": 0.0007, + "step": 126750 + }, + { + "epoch": 29.25, + "grad_norm": 0.09011877328157425, + "learning_rate": 7.08432095918838e-06, + "loss": 0.0007, + "step": 127000 + }, + { + "epoch": 29.25, + "eval_loss": 0.13347487151622772, + "eval_runtime": 859.4608, + "eval_samples_per_second": 7.856, + "eval_steps_per_second": 0.246, + "eval_wer": 9.659440268163516, + "step": 127000 + }, + { + "epoch": 29.31, + "grad_norm": 0.025057533755898476, + "learning_rate": 7.078556605948813e-06, + "loss": 0.0006, + "step": 127250 + }, + { + "epoch": 29.36, + "grad_norm": 0.673778235912323, + "learning_rate": 7.072815310122205e-06, + "loss": 0.0011, + "step": 127500 + }, + { + "epoch": 29.42, + "grad_norm": 0.3776152729988098, + "learning_rate": 7.067050956882638e-06, + "loss": 0.0008, + "step": 127750 + }, + { + "epoch": 29.48, + "grad_norm": 0.062430597841739655, + "learning_rate": 7.061286603643072e-06, + "loss": 0.0007, + "step": 128000 + }, + { + "epoch": 29.48, + "eval_loss": 0.1375785619020462, + "eval_runtime": 869.3107, + "eval_samples_per_second": 7.767, + "eval_steps_per_second": 0.243, + "eval_wer": 9.968859710018446, + "step": 128000 + }, + { + "epoch": 29.54, + "grad_norm": 0.054513975977897644, + "learning_rate": 7.055545307816464e-06, + "loss": 0.001, + "step": 128250 + }, + { + "epoch": 29.59, + "grad_norm": 0.19355349242687225, + "learning_rate": 7.049780954576897e-06, + "loss": 0.001, + "step": 128500 + }, + { + "epoch": 29.65, + "grad_norm": 0.01914064772427082, + "learning_rate": 7.04401660133733e-06, + "loss": 0.0009, + "step": 128750 + }, + { + "epoch": 29.71, + "grad_norm": 0.566556453704834, + "learning_rate": 7.038252248097764e-06, + "loss": 0.0006, + "step": 129000 + }, + { + "epoch": 29.71, + "eval_loss": 0.13309036195278168, + "eval_runtime": 871.0297, + "eval_samples_per_second": 7.752, + "eval_steps_per_second": 0.242, + "eval_wer": 9.810183073169764, + "step": 129000 + }, + { + "epoch": 29.77, + "grad_norm": 0.0412914976477623, + "learning_rate": 7.032487894858197e-06, + "loss": 0.001, + "step": 129250 + }, + { + "epoch": 29.82, + "grad_norm": 0.22140279412269592, + "learning_rate": 7.026723541618631e-06, + "loss": 0.0009, + "step": 129500 + }, + { + "epoch": 29.88, + "grad_norm": 1.2251185178756714, + "learning_rate": 7.020959188379064e-06, + "loss": 0.0009, + "step": 129750 + }, + { + "epoch": 29.94, + "grad_norm": 0.04262693226337433, + "learning_rate": 7.015194835139498e-06, + "loss": 0.0012, + "step": 130000 + }, + { + "epoch": 29.94, + "eval_loss": 0.13242684304714203, + "eval_runtime": 871.8865, + "eval_samples_per_second": 7.744, + "eval_steps_per_second": 0.242, + "eval_wer": 9.822083820933415, + "step": 130000 + }, + { + "epoch": 30.0, + "grad_norm": 0.08901894092559814, + "learning_rate": 7.009430481899931e-06, + "loss": 0.0009, + "step": 130250 + }, + { + "epoch": 30.06, + "grad_norm": 0.27203115820884705, + "learning_rate": 7.003666128660365e-06, + "loss": 0.0006, + "step": 130500 + }, + { + "epoch": 30.11, + "grad_norm": 0.07738136500120163, + "learning_rate": 6.997924832833756e-06, + "loss": 0.0006, + "step": 130750 + }, + { + "epoch": 30.17, + "grad_norm": 0.07566077262163162, + "learning_rate": 6.99216047959419e-06, + "loss": 0.0006, + "step": 131000 + }, + { + "epoch": 30.17, + "eval_loss": 0.13523517549037933, + "eval_runtime": 849.9774, + "eval_samples_per_second": 7.944, + "eval_steps_per_second": 0.248, + "eval_wer": 9.86770335402741, + "step": 131000 + }, + { + "epoch": 30.23, + "grad_norm": 0.04363285005092621, + "learning_rate": 6.986396126354623e-06, + "loss": 0.0007, + "step": 131250 + }, + { + "epoch": 30.29, + "grad_norm": 0.8319553136825562, + "learning_rate": 6.980631773115057e-06, + "loss": 0.0006, + "step": 131500 + }, + { + "epoch": 30.34, + "grad_norm": 0.05418200418353081, + "learning_rate": 6.97486741987549e-06, + "loss": 0.0009, + "step": 131750 + }, + { + "epoch": 30.4, + "grad_norm": 0.023919006809592247, + "learning_rate": 6.969103066635924e-06, + "loss": 0.001, + "step": 132000 + }, + { + "epoch": 30.4, + "eval_loss": 0.13232696056365967, + "eval_runtime": 869.6064, + "eval_samples_per_second": 7.764, + "eval_steps_per_second": 0.243, + "eval_wer": 9.730844754745423, + "step": 132000 + }, + { + "epoch": 30.46, + "grad_norm": 0.04671400040388107, + "learning_rate": 6.963338713396357e-06, + "loss": 0.001, + "step": 132250 + }, + { + "epoch": 30.52, + "grad_norm": 0.01245614979416132, + "learning_rate": 6.957574360156791e-06, + "loss": 0.0009, + "step": 132500 + }, + { + "epoch": 30.57, + "grad_norm": 1.1713380813598633, + "learning_rate": 6.951810006917224e-06, + "loss": 0.001, + "step": 132750 + }, + { + "epoch": 30.63, + "grad_norm": 0.01822400651872158, + "learning_rate": 6.946045653677658e-06, + "loss": 0.0008, + "step": 133000 + }, + { + "epoch": 30.63, + "eval_loss": 0.13655835390090942, + "eval_runtime": 855.6562, + "eval_samples_per_second": 7.891, + "eval_steps_per_second": 0.247, + "eval_wer": 9.90935597120019, + "step": 133000 + }, + { + "epoch": 30.69, + "grad_norm": 0.11439549922943115, + "learning_rate": 6.940281300438091e-06, + "loss": 0.0009, + "step": 133250 + }, + { + "epoch": 30.75, + "grad_norm": 0.2022576779127121, + "learning_rate": 6.934516947198525e-06, + "loss": 0.0011, + "step": 133500 + }, + { + "epoch": 30.8, + "grad_norm": 0.0333305299282074, + "learning_rate": 6.928752593958958e-06, + "loss": 0.001, + "step": 133750 + }, + { + "epoch": 30.86, + "grad_norm": 0.27165189385414124, + "learning_rate": 6.922988240719391e-06, + "loss": 0.0008, + "step": 134000 + }, + { + "epoch": 30.86, + "eval_loss": 0.13061845302581787, + "eval_runtime": 861.0795, + "eval_samples_per_second": 7.841, + "eval_steps_per_second": 0.245, + "eval_wer": 9.556300454211874, + "step": 134000 + } + ], + "logging_steps": 250, + "max_steps": 434200, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 1000, + "total_flos": 1.2371858121498627e+21, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}