{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.352781546811396, "eval_steps": 400, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "grad_norm": 4.026116371154785, "learning_rate": 0.00023999999999999998, "loss": 5.8396, "step": 400 }, { "epoch": 0.27, "eval_loss": 0.9751555919647217, "eval_runtime": 211.3976, "eval_samples_per_second": 6.618, "eval_steps_per_second": 0.828, "eval_wer": 0.8373902326297951, "step": 400 }, { "epoch": 0.54, "grad_norm": 1.9866678714752197, "learning_rate": 0.00029794144556267153, "loss": 0.6967, "step": 800 }, { "epoch": 0.54, "eval_loss": 0.38119566440582275, "eval_runtime": 210.7469, "eval_samples_per_second": 6.638, "eval_steps_per_second": 0.83, "eval_wer": 0.5935140964412263, "step": 800 }, { "epoch": 0.81, "grad_norm": 1.540819525718689, "learning_rate": 0.00029519670631290027, "loss": 0.4806, "step": 1200 }, { "epoch": 0.81, "eval_loss": 0.3368071913719177, "eval_runtime": 211.9206, "eval_samples_per_second": 6.602, "eval_steps_per_second": 0.826, "eval_wer": 0.4757356339547065, "step": 1200 }, { "epoch": 1.09, "grad_norm": 0.9303820729255676, "learning_rate": 0.000292451967063129, "loss": 0.3996, "step": 1600 }, { "epoch": 1.09, "eval_loss": 0.199427992105484, "eval_runtime": 210.2196, "eval_samples_per_second": 6.655, "eval_steps_per_second": 0.832, "eval_wer": 0.3142813125866584, "step": 1600 }, { "epoch": 1.36, "grad_norm": 0.5186350345611572, "learning_rate": 0.0002897072278133577, "loss": 0.3497, "step": 2000 }, { "epoch": 1.36, "eval_loss": 0.16836047172546387, "eval_runtime": 210.345, "eval_samples_per_second": 6.651, "eval_steps_per_second": 0.832, "eval_wer": 0.2564319827453397, "step": 2000 }, { "epoch": 1.63, "grad_norm": 0.9710769057273865, "learning_rate": 0.00028696248856358643, "loss": 0.3372, "step": 2400 }, { "epoch": 1.63, "eval_loss": 0.15862207114696503, "eval_runtime": 212.5997, "eval_samples_per_second": 6.58, "eval_steps_per_second": 0.823, "eval_wer": 0.2418733631181636, "step": 2400 }, { "epoch": 1.9, "grad_norm": 0.8559458255767822, "learning_rate": 0.00028421774931381517, "loss": 0.312, "step": 2800 }, { "epoch": 1.9, "eval_loss": 0.1412586122751236, "eval_runtime": 211.389, "eval_samples_per_second": 6.618, "eval_steps_per_second": 0.828, "eval_wer": 0.2226929594823602, "step": 2800 }, { "epoch": 2.17, "grad_norm": 0.6652762293815613, "learning_rate": 0.0002814730100640439, "loss": 0.2797, "step": 3200 }, { "epoch": 2.17, "eval_loss": 0.1465640813112259, "eval_runtime": 210.5595, "eval_samples_per_second": 6.644, "eval_steps_per_second": 0.831, "eval_wer": 0.22731474349098751, "step": 3200 }, { "epoch": 2.44, "grad_norm": 0.361288458108902, "learning_rate": 0.0002787282708142726, "loss": 0.2554, "step": 3600 }, { "epoch": 2.44, "eval_loss": 0.1542745977640152, "eval_runtime": 211.2213, "eval_samples_per_second": 6.623, "eval_steps_per_second": 0.829, "eval_wer": 0.2365583115082422, "step": 3600 }, { "epoch": 2.71, "grad_norm": 0.5574541091918945, "learning_rate": 0.00027598353156450134, "loss": 0.2613, "step": 4000 }, { "epoch": 2.71, "eval_loss": 0.14501063525676727, "eval_runtime": 212.1205, "eval_samples_per_second": 6.595, "eval_steps_per_second": 0.825, "eval_wer": 0.23255276536743183, "step": 4000 }, { "epoch": 2.99, "grad_norm": 0.4116342067718506, "learning_rate": 0.0002732387923147301, "loss": 0.2399, "step": 4400 }, { "epoch": 2.99, "eval_loss": 0.12376150488853455, "eval_runtime": 212.9821, "eval_samples_per_second": 6.569, "eval_steps_per_second": 0.822, "eval_wer": 0.20297334771221692, "step": 4400 }, { "epoch": 3.26, "grad_norm": 0.834938645362854, "learning_rate": 0.0002704940530649588, "loss": 0.2125, "step": 4800 }, { "epoch": 3.26, "eval_loss": 0.09888482838869095, "eval_runtime": 213.2706, "eval_samples_per_second": 6.56, "eval_steps_per_second": 0.821, "eval_wer": 0.16099214296718534, "step": 4800 }, { "epoch": 3.53, "grad_norm": 0.4636085331439972, "learning_rate": 0.0002677493138151875, "loss": 0.2144, "step": 5200 }, { "epoch": 3.53, "eval_loss": 0.09842105209827423, "eval_runtime": 210.8983, "eval_samples_per_second": 6.634, "eval_steps_per_second": 0.83, "eval_wer": 0.1612232321676167, "step": 5200 }, { "epoch": 3.8, "grad_norm": 0.4843950569629669, "learning_rate": 0.0002650045745654163, "loss": 0.212, "step": 5600 }, { "epoch": 3.8, "eval_loss": 0.08756741881370544, "eval_runtime": 210.9696, "eval_samples_per_second": 6.631, "eval_steps_per_second": 0.83, "eval_wer": 0.1507471884147281, "step": 5600 }, { "epoch": 4.07, "grad_norm": 0.847865641117096, "learning_rate": 0.000262259835315645, "loss": 0.1964, "step": 6000 }, { "epoch": 4.07, "eval_loss": 0.1017051413655281, "eval_runtime": 211.736, "eval_samples_per_second": 6.607, "eval_steps_per_second": 0.827, "eval_wer": 0.17531967339393006, "step": 6000 }, { "epoch": 4.34, "grad_norm": 0.35252293944358826, "learning_rate": 0.0002595150960658737, "loss": 0.1814, "step": 6400 }, { "epoch": 4.34, "eval_loss": 0.0966610386967659, "eval_runtime": 211.5792, "eval_samples_per_second": 6.612, "eval_steps_per_second": 0.827, "eval_wer": 0.1653828377753813, "step": 6400 }, { "epoch": 4.61, "grad_norm": 0.38534751534461975, "learning_rate": 0.00025677035681610246, "loss": 0.1772, "step": 6800 }, { "epoch": 4.61, "eval_loss": 0.0955633744597435, "eval_runtime": 211.2723, "eval_samples_per_second": 6.622, "eval_steps_per_second": 0.828, "eval_wer": 0.16314897550454474, "step": 6800 }, { "epoch": 4.88, "grad_norm": 0.3151361346244812, "learning_rate": 0.0002540256175663312, "loss": 0.1748, "step": 7200 }, { "epoch": 4.88, "eval_loss": 0.08700825273990631, "eval_runtime": 213.02, "eval_samples_per_second": 6.567, "eval_steps_per_second": 0.822, "eval_wer": 0.14828223694346018, "step": 7200 }, { "epoch": 5.16, "grad_norm": 1.1274446249008179, "learning_rate": 0.0002512808783165599, "loss": 0.1706, "step": 7600 }, { "epoch": 5.16, "eval_loss": 0.07706322520971298, "eval_runtime": 212.6781, "eval_samples_per_second": 6.578, "eval_steps_per_second": 0.823, "eval_wer": 0.1306424279771992, "step": 7600 }, { "epoch": 5.43, "grad_norm": 1.3828215599060059, "learning_rate": 0.0002485361390667886, "loss": 0.1545, "step": 8000 }, { "epoch": 5.43, "eval_loss": 0.06534561514854431, "eval_runtime": 211.3365, "eval_samples_per_second": 6.62, "eval_steps_per_second": 0.828, "eval_wer": 0.1198582652904021, "step": 8000 }, { "epoch": 5.7, "grad_norm": 0.4036734402179718, "learning_rate": 0.00024579139981701736, "loss": 0.1627, "step": 8400 }, { "epoch": 5.7, "eval_loss": 0.060022782534360886, "eval_runtime": 211.4007, "eval_samples_per_second": 6.618, "eval_steps_per_second": 0.828, "eval_wer": 0.11030657833923894, "step": 8400 }, { "epoch": 5.97, "grad_norm": 0.3543277084827423, "learning_rate": 0.0002430466605672461, "loss": 0.1541, "step": 8800 }, { "epoch": 5.97, "eval_loss": 0.058936990797519684, "eval_runtime": 212.2764, "eval_samples_per_second": 6.59, "eval_steps_per_second": 0.824, "eval_wer": 0.10676321059929132, "step": 8800 }, { "epoch": 6.24, "grad_norm": 0.23857054114341736, "learning_rate": 0.00024030192131747481, "loss": 0.1382, "step": 9200 }, { "epoch": 6.24, "eval_loss": 0.07100619375705719, "eval_runtime": 213.3512, "eval_samples_per_second": 6.557, "eval_steps_per_second": 0.82, "eval_wer": 0.12309351409644123, "step": 9200 }, { "epoch": 6.51, "grad_norm": 0.6902241706848145, "learning_rate": 0.00023755718206770355, "loss": 0.1397, "step": 9600 }, { "epoch": 6.51, "eval_loss": 0.06513579189777374, "eval_runtime": 211.6334, "eval_samples_per_second": 6.61, "eval_steps_per_second": 0.827, "eval_wer": 0.12478816823293791, "step": 9600 }, { "epoch": 6.78, "grad_norm": 0.3040298819541931, "learning_rate": 0.00023481244281793227, "loss": 0.1345, "step": 10000 }, { "epoch": 6.78, "eval_loss": 0.06700535863637924, "eval_runtime": 211.9668, "eval_samples_per_second": 6.6, "eval_steps_per_second": 0.826, "eval_wer": 0.11939608688953936, "step": 10000 }, { "epoch": 7.06, "grad_norm": 1.0794196128845215, "learning_rate": 0.000232067703568161, "loss": 0.1281, "step": 10400 }, { "epoch": 7.06, "eval_loss": 0.054103270173072815, "eval_runtime": 209.427, "eval_samples_per_second": 6.68, "eval_steps_per_second": 0.836, "eval_wer": 0.10060083192112156, "step": 10400 }, { "epoch": 7.33, "grad_norm": 0.8765202164649963, "learning_rate": 0.00022932296431838972, "loss": 0.1315, "step": 10800 }, { "epoch": 7.33, "eval_loss": 0.05593947321176529, "eval_runtime": 208.8917, "eval_samples_per_second": 6.697, "eval_steps_per_second": 0.838, "eval_wer": 0.10622400246495148, "step": 10800 }, { "epoch": 7.6, "grad_norm": 0.5989521741867065, "learning_rate": 0.00022657822506861846, "loss": 0.1234, "step": 11200 }, { "epoch": 7.6, "eval_loss": 0.05276945233345032, "eval_runtime": 209.5765, "eval_samples_per_second": 6.675, "eval_steps_per_second": 0.835, "eval_wer": 0.09698043444769681, "step": 11200 }, { "epoch": 7.87, "grad_norm": 0.3679282069206238, "learning_rate": 0.00022383348581884717, "loss": 0.1248, "step": 11600 }, { "epoch": 7.87, "eval_loss": 0.044818080961704254, "eval_runtime": 209.7397, "eval_samples_per_second": 6.67, "eval_steps_per_second": 0.834, "eval_wer": 0.0865043906948082, "step": 11600 }, { "epoch": 8.14, "grad_norm": 0.3467447757720947, "learning_rate": 0.00022108874656907594, "loss": 0.115, "step": 12000 }, { "epoch": 8.14, "eval_loss": 0.0545777752995491, "eval_runtime": 209.6701, "eval_samples_per_second": 6.672, "eval_steps_per_second": 0.835, "eval_wer": 0.0993683561854876, "step": 12000 }, { "epoch": 8.41, "grad_norm": 1.4704556465148926, "learning_rate": 0.00021834400731930465, "loss": 0.1143, "step": 12400 }, { "epoch": 8.41, "eval_loss": 0.05952217057347298, "eval_runtime": 210.7235, "eval_samples_per_second": 6.639, "eval_steps_per_second": 0.83, "eval_wer": 0.10861192420274225, "step": 12400 }, { "epoch": 8.68, "grad_norm": 0.2509690821170807, "learning_rate": 0.0002155992680695334, "loss": 0.1169, "step": 12800 }, { "epoch": 8.68, "eval_loss": 0.04853290319442749, "eval_runtime": 209.7854, "eval_samples_per_second": 6.669, "eval_steps_per_second": 0.834, "eval_wer": 0.08742874749653366, "step": 12800 }, { "epoch": 8.96, "grad_norm": 0.2134234458208084, "learning_rate": 0.0002128545288197621, "loss": 0.1165, "step": 13200 }, { "epoch": 8.96, "eval_loss": 0.05237515643239021, "eval_runtime": 210.2596, "eval_samples_per_second": 6.654, "eval_steps_per_second": 0.832, "eval_wer": 0.09767370204899091, "step": 13200 }, { "epoch": 9.23, "grad_norm": 0.4314160943031311, "learning_rate": 0.00021010978956999084, "loss": 0.1035, "step": 13600 }, { "epoch": 9.23, "eval_loss": 0.044478464871644974, "eval_runtime": 210.5273, "eval_samples_per_second": 6.645, "eval_steps_per_second": 0.831, "eval_wer": 0.0837313202896318, "step": 13600 }, { "epoch": 9.5, "grad_norm": 0.4133451581001282, "learning_rate": 0.00020736505032021955, "loss": 0.1017, "step": 14000 }, { "epoch": 9.5, "eval_loss": 0.04130551964044571, "eval_runtime": 210.2347, "eval_samples_per_second": 6.654, "eval_steps_per_second": 0.832, "eval_wer": 0.0791865660144816, "step": 14000 }, { "epoch": 9.77, "grad_norm": 0.29300200939178467, "learning_rate": 0.0002046203110704483, "loss": 0.109, "step": 14400 }, { "epoch": 9.77, "eval_loss": 0.04202074185013771, "eval_runtime": 211.2523, "eval_samples_per_second": 6.622, "eval_steps_per_second": 0.828, "eval_wer": 0.08326914188876906, "step": 14400 }, { "epoch": 10.04, "grad_norm": 0.2547191083431244, "learning_rate": 0.000201875571820677, "loss": 0.1018, "step": 14800 }, { "epoch": 10.04, "eval_loss": 0.04540720209479332, "eval_runtime": 210.6476, "eval_samples_per_second": 6.641, "eval_steps_per_second": 0.831, "eval_wer": 0.0823447850870436, "step": 14800 }, { "epoch": 10.31, "grad_norm": 0.2799816131591797, "learning_rate": 0.00019913083257090574, "loss": 0.0929, "step": 15200 }, { "epoch": 10.31, "eval_loss": 0.042868729680776596, "eval_runtime": 209.1492, "eval_samples_per_second": 6.689, "eval_steps_per_second": 0.837, "eval_wer": 0.0785703281466646, "step": 15200 }, { "epoch": 10.58, "grad_norm": 0.18747101724147797, "learning_rate": 0.00019638609332113446, "loss": 0.0956, "step": 15600 }, { "epoch": 10.58, "eval_loss": 0.04033521190285683, "eval_runtime": 209.4853, "eval_samples_per_second": 6.678, "eval_steps_per_second": 0.835, "eval_wer": 0.07718379294407642, "step": 15600 }, { "epoch": 10.85, "grad_norm": 0.3078967034816742, "learning_rate": 0.0001936413540713632, "loss": 0.0986, "step": 16000 }, { "epoch": 10.85, "eval_loss": 0.046831514686346054, "eval_runtime": 209.7149, "eval_samples_per_second": 6.671, "eval_steps_per_second": 0.834, "eval_wer": 0.09058696656909566, "step": 16000 }, { "epoch": 11.13, "grad_norm": 0.10461205244064331, "learning_rate": 0.0001908966148215919, "loss": 0.0941, "step": 16400 }, { "epoch": 11.13, "eval_loss": 0.03615270182490349, "eval_runtime": 209.392, "eval_samples_per_second": 6.681, "eval_steps_per_second": 0.836, "eval_wer": 0.06940378986288707, "step": 16400 }, { "epoch": 11.4, "grad_norm": 0.5313912630081177, "learning_rate": 0.00018815187557182068, "loss": 0.0845, "step": 16800 }, { "epoch": 11.4, "eval_loss": 0.038695286959409714, "eval_runtime": 210.5021, "eval_samples_per_second": 6.646, "eval_steps_per_second": 0.831, "eval_wer": 0.0701740871976583, "step": 16800 }, { "epoch": 11.67, "grad_norm": 0.5434714555740356, "learning_rate": 0.0001854071363220494, "loss": 0.0955, "step": 17200 }, { "epoch": 11.67, "eval_loss": 0.03512905538082123, "eval_runtime": 210.6242, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.831, "eval_wer": 0.06270220305037745, "step": 17200 }, { "epoch": 11.94, "grad_norm": 0.6010161638259888, "learning_rate": 0.00018266239707227813, "loss": 0.089, "step": 17600 }, { "epoch": 11.94, "eval_loss": 0.03609244525432587, "eval_runtime": 210.1814, "eval_samples_per_second": 6.656, "eval_steps_per_second": 0.833, "eval_wer": 0.06747804652595903, "step": 17600 }, { "epoch": 12.21, "grad_norm": 0.2907910943031311, "learning_rate": 0.00017991765782250684, "loss": 0.0806, "step": 18000 }, { "epoch": 12.21, "eval_loss": 0.03811544552445412, "eval_runtime": 210.0932, "eval_samples_per_second": 6.659, "eval_steps_per_second": 0.833, "eval_wer": 0.06847943306116161, "step": 18000 }, { "epoch": 12.48, "grad_norm": 0.25918644666671753, "learning_rate": 0.00017717291857273558, "loss": 0.0803, "step": 18400 }, { "epoch": 12.48, "eval_loss": 0.0369856134057045, "eval_runtime": 210.0481, "eval_samples_per_second": 6.66, "eval_steps_per_second": 0.833, "eval_wer": 0.06747804652595903, "step": 18400 }, { "epoch": 12.75, "grad_norm": 0.3781072199344635, "learning_rate": 0.0001744281793229643, "loss": 0.0839, "step": 18800 }, { "epoch": 12.75, "eval_loss": 0.0333174392580986, "eval_runtime": 210.517, "eval_samples_per_second": 6.646, "eval_steps_per_second": 0.831, "eval_wer": 0.06193190571560622, "step": 18800 }, { "epoch": 13.03, "grad_norm": 0.42045047879219055, "learning_rate": 0.00017168344007319303, "loss": 0.0834, "step": 19200 }, { "epoch": 13.03, "eval_loss": 0.033445805311203, "eval_runtime": 210.6516, "eval_samples_per_second": 6.641, "eval_steps_per_second": 0.831, "eval_wer": 0.057695270374364505, "step": 19200 }, { "epoch": 13.3, "grad_norm": 0.4078648090362549, "learning_rate": 0.00016893870082342174, "loss": 0.0779, "step": 19600 }, { "epoch": 13.3, "eval_loss": 0.03577824681997299, "eval_runtime": 210.5433, "eval_samples_per_second": 6.645, "eval_steps_per_second": 0.831, "eval_wer": 0.06208596518256047, "step": 19600 }, { "epoch": 13.57, "grad_norm": 0.8884561657905579, "learning_rate": 0.00016619396157365048, "loss": 0.0773, "step": 20000 }, { "epoch": 13.57, "eval_loss": 0.032973628491163254, "eval_runtime": 210.0131, "eval_samples_per_second": 6.661, "eval_steps_per_second": 0.833, "eval_wer": 0.05646279463873055, "step": 20000 }, { "epoch": 13.84, "grad_norm": 0.5232133269309998, "learning_rate": 0.0001634492223238792, "loss": 0.0717, "step": 20400 }, { "epoch": 13.84, "eval_loss": 0.03495289012789726, "eval_runtime": 210.8453, "eval_samples_per_second": 6.635, "eval_steps_per_second": 0.83, "eval_wer": 0.0625481435834232, "step": 20400 }, { "epoch": 14.11, "grad_norm": 0.3025607466697693, "learning_rate": 0.00016070448307410794, "loss": 0.0737, "step": 20800 }, { "epoch": 14.11, "eval_loss": 0.03549855947494507, "eval_runtime": 209.8898, "eval_samples_per_second": 6.665, "eval_steps_per_second": 0.834, "eval_wer": 0.060314281312586655, "step": 20800 }, { "epoch": 14.38, "grad_norm": 1.34682297706604, "learning_rate": 0.00015795974382433665, "loss": 0.075, "step": 21200 }, { "epoch": 14.38, "eval_loss": 0.03607061505317688, "eval_runtime": 209.844, "eval_samples_per_second": 6.667, "eval_steps_per_second": 0.834, "eval_wer": 0.06262517331690032, "step": 21200 }, { "epoch": 14.65, "grad_norm": 0.1968027800321579, "learning_rate": 0.00015521500457456541, "loss": 0.0715, "step": 21600 }, { "epoch": 14.65, "eval_loss": 0.03143769130110741, "eval_runtime": 212.2, "eval_samples_per_second": 6.593, "eval_steps_per_second": 0.825, "eval_wer": 0.057464181173933135, "step": 21600 }, { "epoch": 14.93, "grad_norm": 0.19861555099487305, "learning_rate": 0.00015247026532479413, "loss": 0.0722, "step": 22000 }, { "epoch": 14.93, "eval_loss": 0.03097483143210411, "eval_runtime": 209.3416, "eval_samples_per_second": 6.683, "eval_steps_per_second": 0.836, "eval_wer": 0.05754121090741026, "step": 22000 }, { "epoch": 15.2, "grad_norm": 0.333710640668869, "learning_rate": 0.00014972552607502287, "loss": 0.0666, "step": 22400 }, { "epoch": 15.2, "eval_loss": 0.031425874680280685, "eval_runtime": 210.734, "eval_samples_per_second": 6.639, "eval_steps_per_second": 0.83, "eval_wer": 0.05592358650439069, "step": 22400 }, { "epoch": 15.47, "grad_norm": 0.4220418632030487, "learning_rate": 0.00014698078682525158, "loss": 0.0672, "step": 22800 }, { "epoch": 15.47, "eval_loss": 0.030694805085659027, "eval_runtime": 208.4031, "eval_samples_per_second": 6.713, "eval_steps_per_second": 0.84, "eval_wer": 0.05345863503312279, "step": 22800 }, { "epoch": 15.74, "grad_norm": 0.9596304297447205, "learning_rate": 0.00014423604757548032, "loss": 0.0664, "step": 23200 }, { "epoch": 15.74, "eval_loss": 0.03153248876333237, "eval_runtime": 209.4047, "eval_samples_per_second": 6.681, "eval_steps_per_second": 0.836, "eval_wer": 0.05515328916961947, "step": 23200 }, { "epoch": 16.01, "grad_norm": 0.5310277938842773, "learning_rate": 0.00014149130832570906, "loss": 0.0678, "step": 23600 }, { "epoch": 16.01, "eval_loss": 0.031200062483549118, "eval_runtime": 212.7627, "eval_samples_per_second": 6.575, "eval_steps_per_second": 0.823, "eval_wer": 0.054845170235710984, "step": 23600 }, { "epoch": 16.28, "grad_norm": 0.3012249767780304, "learning_rate": 0.00013874656907593777, "loss": 0.0616, "step": 24000 }, { "epoch": 16.28, "eval_loss": 0.0315285250544548, "eval_runtime": 208.0351, "eval_samples_per_second": 6.725, "eval_steps_per_second": 0.841, "eval_wer": 0.052688337698351566, "step": 24000 }, { "epoch": 16.55, "grad_norm": 0.2116428166627884, "learning_rate": 0.0001360018298261665, "loss": 0.0644, "step": 24400 }, { "epoch": 16.55, "eval_loss": 0.026890287175774574, "eval_runtime": 209.0713, "eval_samples_per_second": 6.691, "eval_steps_per_second": 0.837, "eval_wer": 0.048066553689724234, "step": 24400 }, { "epoch": 16.82, "grad_norm": 0.44349026679992676, "learning_rate": 0.00013325709057639522, "loss": 0.062, "step": 24800 }, { "epoch": 16.82, "eval_loss": 0.030771076679229736, "eval_runtime": 210.1625, "eval_samples_per_second": 6.657, "eval_steps_per_second": 0.833, "eval_wer": 0.05130180249576336, "step": 24800 }, { "epoch": 17.1, "grad_norm": 0.5564978718757629, "learning_rate": 0.00013051235132662396, "loss": 0.0584, "step": 25200 }, { "epoch": 17.1, "eval_loss": 0.02937587909400463, "eval_runtime": 209.0218, "eval_samples_per_second": 6.693, "eval_steps_per_second": 0.837, "eval_wer": 0.05022338622708365, "step": 25200 }, { "epoch": 17.37, "grad_norm": 0.3329567015171051, "learning_rate": 0.0001277676120768527, "loss": 0.0563, "step": 25600 }, { "epoch": 17.37, "eval_loss": 0.02935618720948696, "eval_runtime": 208.6661, "eval_samples_per_second": 6.704, "eval_steps_per_second": 0.839, "eval_wer": 0.04922199969188107, "step": 25600 }, { "epoch": 17.64, "grad_norm": 1.3281415700912476, "learning_rate": 0.0001250228728270814, "loss": 0.0547, "step": 26000 }, { "epoch": 17.64, "eval_loss": 0.028093835338950157, "eval_runtime": 208.0055, "eval_samples_per_second": 6.726, "eval_steps_per_second": 0.841, "eval_wer": 0.045216453551070714, "step": 26000 }, { "epoch": 17.91, "grad_norm": 0.33733347058296204, "learning_rate": 0.00012227813357731015, "loss": 0.056, "step": 26400 }, { "epoch": 17.91, "eval_loss": 0.02792263962328434, "eval_runtime": 208.746, "eval_samples_per_second": 6.702, "eval_steps_per_second": 0.838, "eval_wer": 0.04513942381759359, "step": 26400 }, { "epoch": 18.18, "grad_norm": 0.172608882188797, "learning_rate": 0.00011953339432753888, "loss": 0.0572, "step": 26800 }, { "epoch": 18.18, "eval_loss": 0.029318030923604965, "eval_runtime": 208.963, "eval_samples_per_second": 6.695, "eval_steps_per_second": 0.837, "eval_wer": 0.045986750885841934, "step": 26800 }, { "epoch": 18.45, "grad_norm": 1.2717292308807373, "learning_rate": 0.0001167886550777676, "loss": 0.0544, "step": 27200 }, { "epoch": 18.45, "eval_loss": 0.02826772816479206, "eval_runtime": 208.8935, "eval_samples_per_second": 6.697, "eval_steps_per_second": 0.838, "eval_wer": 0.04644892928670467, "step": 27200 }, { "epoch": 18.72, "grad_norm": 0.9943646192550659, "learning_rate": 0.00011404391582799633, "loss": 0.052, "step": 27600 }, { "epoch": 18.72, "eval_loss": 0.02736526168882847, "eval_runtime": 209.8515, "eval_samples_per_second": 6.667, "eval_steps_per_second": 0.834, "eval_wer": 0.04375288861500539, "step": 27600 }, { "epoch": 19.0, "grad_norm": 0.289604514837265, "learning_rate": 0.00011129917657822506, "loss": 0.0533, "step": 28000 }, { "epoch": 19.0, "eval_loss": 0.0264176856726408, "eval_runtime": 210.0905, "eval_samples_per_second": 6.659, "eval_steps_per_second": 0.833, "eval_wer": 0.041287937143737484, "step": 28000 }, { "epoch": 19.27, "grad_norm": 0.4070824086666107, "learning_rate": 0.00010855443732845378, "loss": 0.046, "step": 28400 }, { "epoch": 19.27, "eval_loss": 0.02758474461734295, "eval_runtime": 209.6786, "eval_samples_per_second": 6.672, "eval_steps_per_second": 0.835, "eval_wer": 0.04121090741026036, "step": 28400 }, { "epoch": 19.54, "grad_norm": 0.8942829966545105, "learning_rate": 0.00010580969807868251, "loss": 0.0498, "step": 28800 }, { "epoch": 19.54, "eval_loss": 0.02821315824985504, "eval_runtime": 210.0298, "eval_samples_per_second": 6.661, "eval_steps_per_second": 0.833, "eval_wer": 0.04190417501155446, "step": 28800 }, { "epoch": 19.81, "grad_norm": 0.6549252271652222, "learning_rate": 0.00010306495882891125, "loss": 0.0454, "step": 29200 }, { "epoch": 19.81, "eval_loss": 0.027856331318616867, "eval_runtime": 210.3168, "eval_samples_per_second": 6.652, "eval_steps_per_second": 0.832, "eval_wer": 0.04167308581112309, "step": 29200 }, { "epoch": 20.08, "grad_norm": 7.543890953063965, "learning_rate": 0.00010032021957913997, "loss": 0.0483, "step": 29600 }, { "epoch": 20.08, "eval_loss": 0.025978881865739822, "eval_runtime": 210.6554, "eval_samples_per_second": 6.641, "eval_steps_per_second": 0.831, "eval_wer": 0.0395932830072408, "step": 29600 }, { "epoch": 20.35, "grad_norm": 0.2715985178947449, "learning_rate": 9.75754803293687e-05, "loss": 0.0447, "step": 30000 }, { "epoch": 20.35, "eval_loss": 0.026736732572317123, "eval_runtime": 209.4915, "eval_samples_per_second": 6.678, "eval_steps_per_second": 0.835, "eval_wer": 0.041750115544600216, "step": 30000 } ], "logging_steps": 400, "max_steps": 44220, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "total_flos": 9.150437899797026e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }