SpideyDLK's picture
Training in progress, step 30000, checkpoint
ebc403b verified
raw
history blame
30.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.352781546811396,
"eval_steps": 400,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"grad_norm": 4.026116371154785,
"learning_rate": 0.00023999999999999998,
"loss": 5.8396,
"step": 400
},
{
"epoch": 0.27,
"eval_loss": 0.9751555919647217,
"eval_runtime": 211.3976,
"eval_samples_per_second": 6.618,
"eval_steps_per_second": 0.828,
"eval_wer": 0.8373902326297951,
"step": 400
},
{
"epoch": 0.54,
"grad_norm": 1.9866678714752197,
"learning_rate": 0.00029794144556267153,
"loss": 0.6967,
"step": 800
},
{
"epoch": 0.54,
"eval_loss": 0.38119566440582275,
"eval_runtime": 210.7469,
"eval_samples_per_second": 6.638,
"eval_steps_per_second": 0.83,
"eval_wer": 0.5935140964412263,
"step": 800
},
{
"epoch": 0.81,
"grad_norm": 1.540819525718689,
"learning_rate": 0.00029519670631290027,
"loss": 0.4806,
"step": 1200
},
{
"epoch": 0.81,
"eval_loss": 0.3368071913719177,
"eval_runtime": 211.9206,
"eval_samples_per_second": 6.602,
"eval_steps_per_second": 0.826,
"eval_wer": 0.4757356339547065,
"step": 1200
},
{
"epoch": 1.09,
"grad_norm": 0.9303820729255676,
"learning_rate": 0.000292451967063129,
"loss": 0.3996,
"step": 1600
},
{
"epoch": 1.09,
"eval_loss": 0.199427992105484,
"eval_runtime": 210.2196,
"eval_samples_per_second": 6.655,
"eval_steps_per_second": 0.832,
"eval_wer": 0.3142813125866584,
"step": 1600
},
{
"epoch": 1.36,
"grad_norm": 0.5186350345611572,
"learning_rate": 0.0002897072278133577,
"loss": 0.3497,
"step": 2000
},
{
"epoch": 1.36,
"eval_loss": 0.16836047172546387,
"eval_runtime": 210.345,
"eval_samples_per_second": 6.651,
"eval_steps_per_second": 0.832,
"eval_wer": 0.2564319827453397,
"step": 2000
},
{
"epoch": 1.63,
"grad_norm": 0.9710769057273865,
"learning_rate": 0.00028696248856358643,
"loss": 0.3372,
"step": 2400
},
{
"epoch": 1.63,
"eval_loss": 0.15862207114696503,
"eval_runtime": 212.5997,
"eval_samples_per_second": 6.58,
"eval_steps_per_second": 0.823,
"eval_wer": 0.2418733631181636,
"step": 2400
},
{
"epoch": 1.9,
"grad_norm": 0.8559458255767822,
"learning_rate": 0.00028421774931381517,
"loss": 0.312,
"step": 2800
},
{
"epoch": 1.9,
"eval_loss": 0.1412586122751236,
"eval_runtime": 211.389,
"eval_samples_per_second": 6.618,
"eval_steps_per_second": 0.828,
"eval_wer": 0.2226929594823602,
"step": 2800
},
{
"epoch": 2.17,
"grad_norm": 0.6652762293815613,
"learning_rate": 0.0002814730100640439,
"loss": 0.2797,
"step": 3200
},
{
"epoch": 2.17,
"eval_loss": 0.1465640813112259,
"eval_runtime": 210.5595,
"eval_samples_per_second": 6.644,
"eval_steps_per_second": 0.831,
"eval_wer": 0.22731474349098751,
"step": 3200
},
{
"epoch": 2.44,
"grad_norm": 0.361288458108902,
"learning_rate": 0.0002787282708142726,
"loss": 0.2554,
"step": 3600
},
{
"epoch": 2.44,
"eval_loss": 0.1542745977640152,
"eval_runtime": 211.2213,
"eval_samples_per_second": 6.623,
"eval_steps_per_second": 0.829,
"eval_wer": 0.2365583115082422,
"step": 3600
},
{
"epoch": 2.71,
"grad_norm": 0.5574541091918945,
"learning_rate": 0.00027598353156450134,
"loss": 0.2613,
"step": 4000
},
{
"epoch": 2.71,
"eval_loss": 0.14501063525676727,
"eval_runtime": 212.1205,
"eval_samples_per_second": 6.595,
"eval_steps_per_second": 0.825,
"eval_wer": 0.23255276536743183,
"step": 4000
},
{
"epoch": 2.99,
"grad_norm": 0.4116342067718506,
"learning_rate": 0.0002732387923147301,
"loss": 0.2399,
"step": 4400
},
{
"epoch": 2.99,
"eval_loss": 0.12376150488853455,
"eval_runtime": 212.9821,
"eval_samples_per_second": 6.569,
"eval_steps_per_second": 0.822,
"eval_wer": 0.20297334771221692,
"step": 4400
},
{
"epoch": 3.26,
"grad_norm": 0.834938645362854,
"learning_rate": 0.0002704940530649588,
"loss": 0.2125,
"step": 4800
},
{
"epoch": 3.26,
"eval_loss": 0.09888482838869095,
"eval_runtime": 213.2706,
"eval_samples_per_second": 6.56,
"eval_steps_per_second": 0.821,
"eval_wer": 0.16099214296718534,
"step": 4800
},
{
"epoch": 3.53,
"grad_norm": 0.4636085331439972,
"learning_rate": 0.0002677493138151875,
"loss": 0.2144,
"step": 5200
},
{
"epoch": 3.53,
"eval_loss": 0.09842105209827423,
"eval_runtime": 210.8983,
"eval_samples_per_second": 6.634,
"eval_steps_per_second": 0.83,
"eval_wer": 0.1612232321676167,
"step": 5200
},
{
"epoch": 3.8,
"grad_norm": 0.4843950569629669,
"learning_rate": 0.0002650045745654163,
"loss": 0.212,
"step": 5600
},
{
"epoch": 3.8,
"eval_loss": 0.08756741881370544,
"eval_runtime": 210.9696,
"eval_samples_per_second": 6.631,
"eval_steps_per_second": 0.83,
"eval_wer": 0.1507471884147281,
"step": 5600
},
{
"epoch": 4.07,
"grad_norm": 0.847865641117096,
"learning_rate": 0.000262259835315645,
"loss": 0.1964,
"step": 6000
},
{
"epoch": 4.07,
"eval_loss": 0.1017051413655281,
"eval_runtime": 211.736,
"eval_samples_per_second": 6.607,
"eval_steps_per_second": 0.827,
"eval_wer": 0.17531967339393006,
"step": 6000
},
{
"epoch": 4.34,
"grad_norm": 0.35252293944358826,
"learning_rate": 0.0002595150960658737,
"loss": 0.1814,
"step": 6400
},
{
"epoch": 4.34,
"eval_loss": 0.0966610386967659,
"eval_runtime": 211.5792,
"eval_samples_per_second": 6.612,
"eval_steps_per_second": 0.827,
"eval_wer": 0.1653828377753813,
"step": 6400
},
{
"epoch": 4.61,
"grad_norm": 0.38534751534461975,
"learning_rate": 0.00025677035681610246,
"loss": 0.1772,
"step": 6800
},
{
"epoch": 4.61,
"eval_loss": 0.0955633744597435,
"eval_runtime": 211.2723,
"eval_samples_per_second": 6.622,
"eval_steps_per_second": 0.828,
"eval_wer": 0.16314897550454474,
"step": 6800
},
{
"epoch": 4.88,
"grad_norm": 0.3151361346244812,
"learning_rate": 0.0002540256175663312,
"loss": 0.1748,
"step": 7200
},
{
"epoch": 4.88,
"eval_loss": 0.08700825273990631,
"eval_runtime": 213.02,
"eval_samples_per_second": 6.567,
"eval_steps_per_second": 0.822,
"eval_wer": 0.14828223694346018,
"step": 7200
},
{
"epoch": 5.16,
"grad_norm": 1.1274446249008179,
"learning_rate": 0.0002512808783165599,
"loss": 0.1706,
"step": 7600
},
{
"epoch": 5.16,
"eval_loss": 0.07706322520971298,
"eval_runtime": 212.6781,
"eval_samples_per_second": 6.578,
"eval_steps_per_second": 0.823,
"eval_wer": 0.1306424279771992,
"step": 7600
},
{
"epoch": 5.43,
"grad_norm": 1.3828215599060059,
"learning_rate": 0.0002485361390667886,
"loss": 0.1545,
"step": 8000
},
{
"epoch": 5.43,
"eval_loss": 0.06534561514854431,
"eval_runtime": 211.3365,
"eval_samples_per_second": 6.62,
"eval_steps_per_second": 0.828,
"eval_wer": 0.1198582652904021,
"step": 8000
},
{
"epoch": 5.7,
"grad_norm": 0.4036734402179718,
"learning_rate": 0.00024579139981701736,
"loss": 0.1627,
"step": 8400
},
{
"epoch": 5.7,
"eval_loss": 0.060022782534360886,
"eval_runtime": 211.4007,
"eval_samples_per_second": 6.618,
"eval_steps_per_second": 0.828,
"eval_wer": 0.11030657833923894,
"step": 8400
},
{
"epoch": 5.97,
"grad_norm": 0.3543277084827423,
"learning_rate": 0.0002430466605672461,
"loss": 0.1541,
"step": 8800
},
{
"epoch": 5.97,
"eval_loss": 0.058936990797519684,
"eval_runtime": 212.2764,
"eval_samples_per_second": 6.59,
"eval_steps_per_second": 0.824,
"eval_wer": 0.10676321059929132,
"step": 8800
},
{
"epoch": 6.24,
"grad_norm": 0.23857054114341736,
"learning_rate": 0.00024030192131747481,
"loss": 0.1382,
"step": 9200
},
{
"epoch": 6.24,
"eval_loss": 0.07100619375705719,
"eval_runtime": 213.3512,
"eval_samples_per_second": 6.557,
"eval_steps_per_second": 0.82,
"eval_wer": 0.12309351409644123,
"step": 9200
},
{
"epoch": 6.51,
"grad_norm": 0.6902241706848145,
"learning_rate": 0.00023755718206770355,
"loss": 0.1397,
"step": 9600
},
{
"epoch": 6.51,
"eval_loss": 0.06513579189777374,
"eval_runtime": 211.6334,
"eval_samples_per_second": 6.61,
"eval_steps_per_second": 0.827,
"eval_wer": 0.12478816823293791,
"step": 9600
},
{
"epoch": 6.78,
"grad_norm": 0.3040298819541931,
"learning_rate": 0.00023481244281793227,
"loss": 0.1345,
"step": 10000
},
{
"epoch": 6.78,
"eval_loss": 0.06700535863637924,
"eval_runtime": 211.9668,
"eval_samples_per_second": 6.6,
"eval_steps_per_second": 0.826,
"eval_wer": 0.11939608688953936,
"step": 10000
},
{
"epoch": 7.06,
"grad_norm": 1.0794196128845215,
"learning_rate": 0.000232067703568161,
"loss": 0.1281,
"step": 10400
},
{
"epoch": 7.06,
"eval_loss": 0.054103270173072815,
"eval_runtime": 209.427,
"eval_samples_per_second": 6.68,
"eval_steps_per_second": 0.836,
"eval_wer": 0.10060083192112156,
"step": 10400
},
{
"epoch": 7.33,
"grad_norm": 0.8765202164649963,
"learning_rate": 0.00022932296431838972,
"loss": 0.1315,
"step": 10800
},
{
"epoch": 7.33,
"eval_loss": 0.05593947321176529,
"eval_runtime": 208.8917,
"eval_samples_per_second": 6.697,
"eval_steps_per_second": 0.838,
"eval_wer": 0.10622400246495148,
"step": 10800
},
{
"epoch": 7.6,
"grad_norm": 0.5989521741867065,
"learning_rate": 0.00022657822506861846,
"loss": 0.1234,
"step": 11200
},
{
"epoch": 7.6,
"eval_loss": 0.05276945233345032,
"eval_runtime": 209.5765,
"eval_samples_per_second": 6.675,
"eval_steps_per_second": 0.835,
"eval_wer": 0.09698043444769681,
"step": 11200
},
{
"epoch": 7.87,
"grad_norm": 0.3679282069206238,
"learning_rate": 0.00022383348581884717,
"loss": 0.1248,
"step": 11600
},
{
"epoch": 7.87,
"eval_loss": 0.044818080961704254,
"eval_runtime": 209.7397,
"eval_samples_per_second": 6.67,
"eval_steps_per_second": 0.834,
"eval_wer": 0.0865043906948082,
"step": 11600
},
{
"epoch": 8.14,
"grad_norm": 0.3467447757720947,
"learning_rate": 0.00022108874656907594,
"loss": 0.115,
"step": 12000
},
{
"epoch": 8.14,
"eval_loss": 0.0545777752995491,
"eval_runtime": 209.6701,
"eval_samples_per_second": 6.672,
"eval_steps_per_second": 0.835,
"eval_wer": 0.0993683561854876,
"step": 12000
},
{
"epoch": 8.41,
"grad_norm": 1.4704556465148926,
"learning_rate": 0.00021834400731930465,
"loss": 0.1143,
"step": 12400
},
{
"epoch": 8.41,
"eval_loss": 0.05952217057347298,
"eval_runtime": 210.7235,
"eval_samples_per_second": 6.639,
"eval_steps_per_second": 0.83,
"eval_wer": 0.10861192420274225,
"step": 12400
},
{
"epoch": 8.68,
"grad_norm": 0.2509690821170807,
"learning_rate": 0.0002155992680695334,
"loss": 0.1169,
"step": 12800
},
{
"epoch": 8.68,
"eval_loss": 0.04853290319442749,
"eval_runtime": 209.7854,
"eval_samples_per_second": 6.669,
"eval_steps_per_second": 0.834,
"eval_wer": 0.08742874749653366,
"step": 12800
},
{
"epoch": 8.96,
"grad_norm": 0.2134234458208084,
"learning_rate": 0.0002128545288197621,
"loss": 0.1165,
"step": 13200
},
{
"epoch": 8.96,
"eval_loss": 0.05237515643239021,
"eval_runtime": 210.2596,
"eval_samples_per_second": 6.654,
"eval_steps_per_second": 0.832,
"eval_wer": 0.09767370204899091,
"step": 13200
},
{
"epoch": 9.23,
"grad_norm": 0.4314160943031311,
"learning_rate": 0.00021010978956999084,
"loss": 0.1035,
"step": 13600
},
{
"epoch": 9.23,
"eval_loss": 0.044478464871644974,
"eval_runtime": 210.5273,
"eval_samples_per_second": 6.645,
"eval_steps_per_second": 0.831,
"eval_wer": 0.0837313202896318,
"step": 13600
},
{
"epoch": 9.5,
"grad_norm": 0.4133451581001282,
"learning_rate": 0.00020736505032021955,
"loss": 0.1017,
"step": 14000
},
{
"epoch": 9.5,
"eval_loss": 0.04130551964044571,
"eval_runtime": 210.2347,
"eval_samples_per_second": 6.654,
"eval_steps_per_second": 0.832,
"eval_wer": 0.0791865660144816,
"step": 14000
},
{
"epoch": 9.77,
"grad_norm": 0.29300200939178467,
"learning_rate": 0.0002046203110704483,
"loss": 0.109,
"step": 14400
},
{
"epoch": 9.77,
"eval_loss": 0.04202074185013771,
"eval_runtime": 211.2523,
"eval_samples_per_second": 6.622,
"eval_steps_per_second": 0.828,
"eval_wer": 0.08326914188876906,
"step": 14400
},
{
"epoch": 10.04,
"grad_norm": 0.2547191083431244,
"learning_rate": 0.000201875571820677,
"loss": 0.1018,
"step": 14800
},
{
"epoch": 10.04,
"eval_loss": 0.04540720209479332,
"eval_runtime": 210.6476,
"eval_samples_per_second": 6.641,
"eval_steps_per_second": 0.831,
"eval_wer": 0.0823447850870436,
"step": 14800
},
{
"epoch": 10.31,
"grad_norm": 0.2799816131591797,
"learning_rate": 0.00019913083257090574,
"loss": 0.0929,
"step": 15200
},
{
"epoch": 10.31,
"eval_loss": 0.042868729680776596,
"eval_runtime": 209.1492,
"eval_samples_per_second": 6.689,
"eval_steps_per_second": 0.837,
"eval_wer": 0.0785703281466646,
"step": 15200
},
{
"epoch": 10.58,
"grad_norm": 0.18747101724147797,
"learning_rate": 0.00019638609332113446,
"loss": 0.0956,
"step": 15600
},
{
"epoch": 10.58,
"eval_loss": 0.04033521190285683,
"eval_runtime": 209.4853,
"eval_samples_per_second": 6.678,
"eval_steps_per_second": 0.835,
"eval_wer": 0.07718379294407642,
"step": 15600
},
{
"epoch": 10.85,
"grad_norm": 0.3078967034816742,
"learning_rate": 0.0001936413540713632,
"loss": 0.0986,
"step": 16000
},
{
"epoch": 10.85,
"eval_loss": 0.046831514686346054,
"eval_runtime": 209.7149,
"eval_samples_per_second": 6.671,
"eval_steps_per_second": 0.834,
"eval_wer": 0.09058696656909566,
"step": 16000
},
{
"epoch": 11.13,
"grad_norm": 0.10461205244064331,
"learning_rate": 0.0001908966148215919,
"loss": 0.0941,
"step": 16400
},
{
"epoch": 11.13,
"eval_loss": 0.03615270182490349,
"eval_runtime": 209.392,
"eval_samples_per_second": 6.681,
"eval_steps_per_second": 0.836,
"eval_wer": 0.06940378986288707,
"step": 16400
},
{
"epoch": 11.4,
"grad_norm": 0.5313912630081177,
"learning_rate": 0.00018815187557182068,
"loss": 0.0845,
"step": 16800
},
{
"epoch": 11.4,
"eval_loss": 0.038695286959409714,
"eval_runtime": 210.5021,
"eval_samples_per_second": 6.646,
"eval_steps_per_second": 0.831,
"eval_wer": 0.0701740871976583,
"step": 16800
},
{
"epoch": 11.67,
"grad_norm": 0.5434714555740356,
"learning_rate": 0.0001854071363220494,
"loss": 0.0955,
"step": 17200
},
{
"epoch": 11.67,
"eval_loss": 0.03512905538082123,
"eval_runtime": 210.6242,
"eval_samples_per_second": 6.642,
"eval_steps_per_second": 0.831,
"eval_wer": 0.06270220305037745,
"step": 17200
},
{
"epoch": 11.94,
"grad_norm": 0.6010161638259888,
"learning_rate": 0.00018266239707227813,
"loss": 0.089,
"step": 17600
},
{
"epoch": 11.94,
"eval_loss": 0.03609244525432587,
"eval_runtime": 210.1814,
"eval_samples_per_second": 6.656,
"eval_steps_per_second": 0.833,
"eval_wer": 0.06747804652595903,
"step": 17600
},
{
"epoch": 12.21,
"grad_norm": 0.2907910943031311,
"learning_rate": 0.00017991765782250684,
"loss": 0.0806,
"step": 18000
},
{
"epoch": 12.21,
"eval_loss": 0.03811544552445412,
"eval_runtime": 210.0932,
"eval_samples_per_second": 6.659,
"eval_steps_per_second": 0.833,
"eval_wer": 0.06847943306116161,
"step": 18000
},
{
"epoch": 12.48,
"grad_norm": 0.25918644666671753,
"learning_rate": 0.00017717291857273558,
"loss": 0.0803,
"step": 18400
},
{
"epoch": 12.48,
"eval_loss": 0.0369856134057045,
"eval_runtime": 210.0481,
"eval_samples_per_second": 6.66,
"eval_steps_per_second": 0.833,
"eval_wer": 0.06747804652595903,
"step": 18400
},
{
"epoch": 12.75,
"grad_norm": 0.3781072199344635,
"learning_rate": 0.0001744281793229643,
"loss": 0.0839,
"step": 18800
},
{
"epoch": 12.75,
"eval_loss": 0.0333174392580986,
"eval_runtime": 210.517,
"eval_samples_per_second": 6.646,
"eval_steps_per_second": 0.831,
"eval_wer": 0.06193190571560622,
"step": 18800
},
{
"epoch": 13.03,
"grad_norm": 0.42045047879219055,
"learning_rate": 0.00017168344007319303,
"loss": 0.0834,
"step": 19200
},
{
"epoch": 13.03,
"eval_loss": 0.033445805311203,
"eval_runtime": 210.6516,
"eval_samples_per_second": 6.641,
"eval_steps_per_second": 0.831,
"eval_wer": 0.057695270374364505,
"step": 19200
},
{
"epoch": 13.3,
"grad_norm": 0.4078648090362549,
"learning_rate": 0.00016893870082342174,
"loss": 0.0779,
"step": 19600
},
{
"epoch": 13.3,
"eval_loss": 0.03577824681997299,
"eval_runtime": 210.5433,
"eval_samples_per_second": 6.645,
"eval_steps_per_second": 0.831,
"eval_wer": 0.06208596518256047,
"step": 19600
},
{
"epoch": 13.57,
"grad_norm": 0.8884561657905579,
"learning_rate": 0.00016619396157365048,
"loss": 0.0773,
"step": 20000
},
{
"epoch": 13.57,
"eval_loss": 0.032973628491163254,
"eval_runtime": 210.0131,
"eval_samples_per_second": 6.661,
"eval_steps_per_second": 0.833,
"eval_wer": 0.05646279463873055,
"step": 20000
},
{
"epoch": 13.84,
"grad_norm": 0.5232133269309998,
"learning_rate": 0.0001634492223238792,
"loss": 0.0717,
"step": 20400
},
{
"epoch": 13.84,
"eval_loss": 0.03495289012789726,
"eval_runtime": 210.8453,
"eval_samples_per_second": 6.635,
"eval_steps_per_second": 0.83,
"eval_wer": 0.0625481435834232,
"step": 20400
},
{
"epoch": 14.11,
"grad_norm": 0.3025607466697693,
"learning_rate": 0.00016070448307410794,
"loss": 0.0737,
"step": 20800
},
{
"epoch": 14.11,
"eval_loss": 0.03549855947494507,
"eval_runtime": 209.8898,
"eval_samples_per_second": 6.665,
"eval_steps_per_second": 0.834,
"eval_wer": 0.060314281312586655,
"step": 20800
},
{
"epoch": 14.38,
"grad_norm": 1.34682297706604,
"learning_rate": 0.00015795974382433665,
"loss": 0.075,
"step": 21200
},
{
"epoch": 14.38,
"eval_loss": 0.03607061505317688,
"eval_runtime": 209.844,
"eval_samples_per_second": 6.667,
"eval_steps_per_second": 0.834,
"eval_wer": 0.06262517331690032,
"step": 21200
},
{
"epoch": 14.65,
"grad_norm": 0.1968027800321579,
"learning_rate": 0.00015521500457456541,
"loss": 0.0715,
"step": 21600
},
{
"epoch": 14.65,
"eval_loss": 0.03143769130110741,
"eval_runtime": 212.2,
"eval_samples_per_second": 6.593,
"eval_steps_per_second": 0.825,
"eval_wer": 0.057464181173933135,
"step": 21600
},
{
"epoch": 14.93,
"grad_norm": 0.19861555099487305,
"learning_rate": 0.00015247026532479413,
"loss": 0.0722,
"step": 22000
},
{
"epoch": 14.93,
"eval_loss": 0.03097483143210411,
"eval_runtime": 209.3416,
"eval_samples_per_second": 6.683,
"eval_steps_per_second": 0.836,
"eval_wer": 0.05754121090741026,
"step": 22000
},
{
"epoch": 15.2,
"grad_norm": 0.333710640668869,
"learning_rate": 0.00014972552607502287,
"loss": 0.0666,
"step": 22400
},
{
"epoch": 15.2,
"eval_loss": 0.031425874680280685,
"eval_runtime": 210.734,
"eval_samples_per_second": 6.639,
"eval_steps_per_second": 0.83,
"eval_wer": 0.05592358650439069,
"step": 22400
},
{
"epoch": 15.47,
"grad_norm": 0.4220418632030487,
"learning_rate": 0.00014698078682525158,
"loss": 0.0672,
"step": 22800
},
{
"epoch": 15.47,
"eval_loss": 0.030694805085659027,
"eval_runtime": 208.4031,
"eval_samples_per_second": 6.713,
"eval_steps_per_second": 0.84,
"eval_wer": 0.05345863503312279,
"step": 22800
},
{
"epoch": 15.74,
"grad_norm": 0.9596304297447205,
"learning_rate": 0.00014423604757548032,
"loss": 0.0664,
"step": 23200
},
{
"epoch": 15.74,
"eval_loss": 0.03153248876333237,
"eval_runtime": 209.4047,
"eval_samples_per_second": 6.681,
"eval_steps_per_second": 0.836,
"eval_wer": 0.05515328916961947,
"step": 23200
},
{
"epoch": 16.01,
"grad_norm": 0.5310277938842773,
"learning_rate": 0.00014149130832570906,
"loss": 0.0678,
"step": 23600
},
{
"epoch": 16.01,
"eval_loss": 0.031200062483549118,
"eval_runtime": 212.7627,
"eval_samples_per_second": 6.575,
"eval_steps_per_second": 0.823,
"eval_wer": 0.054845170235710984,
"step": 23600
},
{
"epoch": 16.28,
"grad_norm": 0.3012249767780304,
"learning_rate": 0.00013874656907593777,
"loss": 0.0616,
"step": 24000
},
{
"epoch": 16.28,
"eval_loss": 0.0315285250544548,
"eval_runtime": 208.0351,
"eval_samples_per_second": 6.725,
"eval_steps_per_second": 0.841,
"eval_wer": 0.052688337698351566,
"step": 24000
},
{
"epoch": 16.55,
"grad_norm": 0.2116428166627884,
"learning_rate": 0.0001360018298261665,
"loss": 0.0644,
"step": 24400
},
{
"epoch": 16.55,
"eval_loss": 0.026890287175774574,
"eval_runtime": 209.0713,
"eval_samples_per_second": 6.691,
"eval_steps_per_second": 0.837,
"eval_wer": 0.048066553689724234,
"step": 24400
},
{
"epoch": 16.82,
"grad_norm": 0.44349026679992676,
"learning_rate": 0.00013325709057639522,
"loss": 0.062,
"step": 24800
},
{
"epoch": 16.82,
"eval_loss": 0.030771076679229736,
"eval_runtime": 210.1625,
"eval_samples_per_second": 6.657,
"eval_steps_per_second": 0.833,
"eval_wer": 0.05130180249576336,
"step": 24800
},
{
"epoch": 17.1,
"grad_norm": 0.5564978718757629,
"learning_rate": 0.00013051235132662396,
"loss": 0.0584,
"step": 25200
},
{
"epoch": 17.1,
"eval_loss": 0.02937587909400463,
"eval_runtime": 209.0218,
"eval_samples_per_second": 6.693,
"eval_steps_per_second": 0.837,
"eval_wer": 0.05022338622708365,
"step": 25200
},
{
"epoch": 17.37,
"grad_norm": 0.3329567015171051,
"learning_rate": 0.0001277676120768527,
"loss": 0.0563,
"step": 25600
},
{
"epoch": 17.37,
"eval_loss": 0.02935618720948696,
"eval_runtime": 208.6661,
"eval_samples_per_second": 6.704,
"eval_steps_per_second": 0.839,
"eval_wer": 0.04922199969188107,
"step": 25600
},
{
"epoch": 17.64,
"grad_norm": 1.3281415700912476,
"learning_rate": 0.0001250228728270814,
"loss": 0.0547,
"step": 26000
},
{
"epoch": 17.64,
"eval_loss": 0.028093835338950157,
"eval_runtime": 208.0055,
"eval_samples_per_second": 6.726,
"eval_steps_per_second": 0.841,
"eval_wer": 0.045216453551070714,
"step": 26000
},
{
"epoch": 17.91,
"grad_norm": 0.33733347058296204,
"learning_rate": 0.00012227813357731015,
"loss": 0.056,
"step": 26400
},
{
"epoch": 17.91,
"eval_loss": 0.02792263962328434,
"eval_runtime": 208.746,
"eval_samples_per_second": 6.702,
"eval_steps_per_second": 0.838,
"eval_wer": 0.04513942381759359,
"step": 26400
},
{
"epoch": 18.18,
"grad_norm": 0.172608882188797,
"learning_rate": 0.00011953339432753888,
"loss": 0.0572,
"step": 26800
},
{
"epoch": 18.18,
"eval_loss": 0.029318030923604965,
"eval_runtime": 208.963,
"eval_samples_per_second": 6.695,
"eval_steps_per_second": 0.837,
"eval_wer": 0.045986750885841934,
"step": 26800
},
{
"epoch": 18.45,
"grad_norm": 1.2717292308807373,
"learning_rate": 0.0001167886550777676,
"loss": 0.0544,
"step": 27200
},
{
"epoch": 18.45,
"eval_loss": 0.02826772816479206,
"eval_runtime": 208.8935,
"eval_samples_per_second": 6.697,
"eval_steps_per_second": 0.838,
"eval_wer": 0.04644892928670467,
"step": 27200
},
{
"epoch": 18.72,
"grad_norm": 0.9943646192550659,
"learning_rate": 0.00011404391582799633,
"loss": 0.052,
"step": 27600
},
{
"epoch": 18.72,
"eval_loss": 0.02736526168882847,
"eval_runtime": 209.8515,
"eval_samples_per_second": 6.667,
"eval_steps_per_second": 0.834,
"eval_wer": 0.04375288861500539,
"step": 27600
},
{
"epoch": 19.0,
"grad_norm": 0.289604514837265,
"learning_rate": 0.00011129917657822506,
"loss": 0.0533,
"step": 28000
},
{
"epoch": 19.0,
"eval_loss": 0.0264176856726408,
"eval_runtime": 210.0905,
"eval_samples_per_second": 6.659,
"eval_steps_per_second": 0.833,
"eval_wer": 0.041287937143737484,
"step": 28000
},
{
"epoch": 19.27,
"grad_norm": 0.4070824086666107,
"learning_rate": 0.00010855443732845378,
"loss": 0.046,
"step": 28400
},
{
"epoch": 19.27,
"eval_loss": 0.02758474461734295,
"eval_runtime": 209.6786,
"eval_samples_per_second": 6.672,
"eval_steps_per_second": 0.835,
"eval_wer": 0.04121090741026036,
"step": 28400
},
{
"epoch": 19.54,
"grad_norm": 0.8942829966545105,
"learning_rate": 0.00010580969807868251,
"loss": 0.0498,
"step": 28800
},
{
"epoch": 19.54,
"eval_loss": 0.02821315824985504,
"eval_runtime": 210.0298,
"eval_samples_per_second": 6.661,
"eval_steps_per_second": 0.833,
"eval_wer": 0.04190417501155446,
"step": 28800
},
{
"epoch": 19.81,
"grad_norm": 0.6549252271652222,
"learning_rate": 0.00010306495882891125,
"loss": 0.0454,
"step": 29200
},
{
"epoch": 19.81,
"eval_loss": 0.027856331318616867,
"eval_runtime": 210.3168,
"eval_samples_per_second": 6.652,
"eval_steps_per_second": 0.832,
"eval_wer": 0.04167308581112309,
"step": 29200
},
{
"epoch": 20.08,
"grad_norm": 7.543890953063965,
"learning_rate": 0.00010032021957913997,
"loss": 0.0483,
"step": 29600
},
{
"epoch": 20.08,
"eval_loss": 0.025978881865739822,
"eval_runtime": 210.6554,
"eval_samples_per_second": 6.641,
"eval_steps_per_second": 0.831,
"eval_wer": 0.0395932830072408,
"step": 29600
},
{
"epoch": 20.35,
"grad_norm": 0.2715985178947449,
"learning_rate": 9.75754803293687e-05,
"loss": 0.0447,
"step": 30000
},
{
"epoch": 20.35,
"eval_loss": 0.026736732572317123,
"eval_runtime": 209.4915,
"eval_samples_per_second": 6.678,
"eval_steps_per_second": 0.835,
"eval_wer": 0.041750115544600216,
"step": 30000
}
],
"logging_steps": 400,
"max_steps": 44220,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"total_flos": 9.150437899797026e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}