md_d_l2_arctic / trainer_state.json
tuanio's picture
End of training
d1757e9 verified
{
"best_metric": 0.5420793793724458,
"best_model_checkpoint": "md_d_l2_arctic/checkpoint-3700",
"epoch": 100.0,
"eval_steps": 100,
"global_step": 7900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.27,
"grad_norm": 92.3280258178711,
"learning_rate": 2.4050632911392408e-06,
"loss": 14.3657,
"step": 100
},
{
"epoch": 1.27,
"eval_loss": 9.120972633361816,
"eval_runtime": 15.883,
"eval_samples_per_second": 28.332,
"eval_steps_per_second": 1.448,
"eval_wer": 2.4374870125372308,
"step": 100
},
{
"epoch": 2.53,
"grad_norm": 1.9874589443206787,
"learning_rate": 4.9113924050632915e-06,
"loss": 4.382,
"step": 200
},
{
"epoch": 2.53,
"eval_loss": 3.421921730041504,
"eval_runtime": 10.9345,
"eval_samples_per_second": 41.154,
"eval_steps_per_second": 2.103,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 3.8,
"grad_norm": 3.151137590408325,
"learning_rate": 7.443037974683544e-06,
"loss": 3.2514,
"step": 300
},
{
"epoch": 3.8,
"eval_loss": 2.788123369216919,
"eval_runtime": 10.9315,
"eval_samples_per_second": 41.165,
"eval_steps_per_second": 2.104,
"eval_wer": 0.9979912724250191,
"step": 300
},
{
"epoch": 5.06,
"grad_norm": 3.4454073905944824,
"learning_rate": 9.974683544303799e-06,
"loss": 2.4508,
"step": 400
},
{
"epoch": 5.06,
"eval_loss": 1.8000441789627075,
"eval_runtime": 10.9542,
"eval_samples_per_second": 41.08,
"eval_steps_per_second": 2.1,
"eval_wer": 0.7380342176352428,
"step": 400
},
{
"epoch": 6.33,
"grad_norm": 2.928997039794922,
"learning_rate": 1.2506329113924051e-05,
"loss": 1.6168,
"step": 500
},
{
"epoch": 6.33,
"eval_loss": 1.131545901298523,
"eval_runtime": 11.2822,
"eval_samples_per_second": 39.886,
"eval_steps_per_second": 2.039,
"eval_wer": 0.967791092332202,
"step": 500
},
{
"epoch": 7.59,
"grad_norm": 2.174651861190796,
"learning_rate": 1.5037974683544306e-05,
"loss": 1.1212,
"step": 600
},
{
"epoch": 7.59,
"eval_loss": 0.8748846054077148,
"eval_runtime": 11.1275,
"eval_samples_per_second": 40.44,
"eval_steps_per_second": 2.067,
"eval_wer": 1.065803144697652,
"step": 600
},
{
"epoch": 8.86,
"grad_norm": 7.105869770050049,
"learning_rate": 1.7569620253164558e-05,
"loss": 0.8953,
"step": 700
},
{
"epoch": 8.86,
"eval_loss": 0.7655201554298401,
"eval_runtime": 11.2119,
"eval_samples_per_second": 40.136,
"eval_steps_per_second": 2.051,
"eval_wer": 0.9655052988848098,
"step": 700
},
{
"epoch": 10.13,
"grad_norm": 2.2692487239837646,
"learning_rate": 1.99887482419128e-05,
"loss": 0.7684,
"step": 800
},
{
"epoch": 10.13,
"eval_loss": 0.6687426567077637,
"eval_runtime": 11.2107,
"eval_samples_per_second": 40.14,
"eval_steps_per_second": 2.052,
"eval_wer": 0.7621389485350142,
"step": 800
},
{
"epoch": 11.39,
"grad_norm": 3.137871742248535,
"learning_rate": 1.970745428973277e-05,
"loss": 0.6661,
"step": 900
},
{
"epoch": 11.39,
"eval_loss": 0.6319410800933838,
"eval_runtime": 11.2724,
"eval_samples_per_second": 39.921,
"eval_steps_per_second": 2.04,
"eval_wer": 0.6755558634065248,
"step": 900
},
{
"epoch": 12.66,
"grad_norm": 2.894819498062134,
"learning_rate": 1.9426160337552744e-05,
"loss": 0.6306,
"step": 1000
},
{
"epoch": 12.66,
"eval_loss": 0.6195651292800903,
"eval_runtime": 11.3009,
"eval_samples_per_second": 39.82,
"eval_steps_per_second": 2.035,
"eval_wer": 0.6963358038373624,
"step": 1000
},
{
"epoch": 13.92,
"grad_norm": 3.4255077838897705,
"learning_rate": 1.9144866385372717e-05,
"loss": 0.5759,
"step": 1100
},
{
"epoch": 13.92,
"eval_loss": 0.5875259637832642,
"eval_runtime": 11.1702,
"eval_samples_per_second": 40.286,
"eval_steps_per_second": 2.059,
"eval_wer": 0.5965228233012398,
"step": 1100
},
{
"epoch": 15.19,
"grad_norm": 2.189448118209839,
"learning_rate": 1.8863572433192687e-05,
"loss": 0.5417,
"step": 1200
},
{
"epoch": 15.19,
"eval_loss": 0.5779715776443481,
"eval_runtime": 11.2889,
"eval_samples_per_second": 39.862,
"eval_steps_per_second": 2.037,
"eval_wer": 0.6528364618688093,
"step": 1200
},
{
"epoch": 16.46,
"grad_norm": 6.146909713745117,
"learning_rate": 1.858227848101266e-05,
"loss": 0.528,
"step": 1300
},
{
"epoch": 16.46,
"eval_loss": 0.5798078179359436,
"eval_runtime": 11.2225,
"eval_samples_per_second": 40.098,
"eval_steps_per_second": 2.049,
"eval_wer": 0.6539447253584539,
"step": 1300
},
{
"epoch": 17.72,
"grad_norm": 3.1426494121551514,
"learning_rate": 1.8300984528832633e-05,
"loss": 0.4857,
"step": 1400
},
{
"epoch": 17.72,
"eval_loss": 0.5568873286247253,
"eval_runtime": 11.201,
"eval_samples_per_second": 40.175,
"eval_steps_per_second": 2.053,
"eval_wer": 0.5724873588695713,
"step": 1400
},
{
"epoch": 18.99,
"grad_norm": 1.9612127542495728,
"learning_rate": 1.8019690576652603e-05,
"loss": 0.4655,
"step": 1500
},
{
"epoch": 18.99,
"eval_loss": 0.549960196018219,
"eval_runtime": 11.3494,
"eval_samples_per_second": 39.65,
"eval_steps_per_second": 2.027,
"eval_wer": 0.575535083466094,
"step": 1500
},
{
"epoch": 20.25,
"grad_norm": 2.946164131164551,
"learning_rate": 1.7738396624472576e-05,
"loss": 0.4526,
"step": 1600
},
{
"epoch": 20.25,
"eval_loss": 0.5582863092422485,
"eval_runtime": 11.3026,
"eval_samples_per_second": 39.814,
"eval_steps_per_second": 2.035,
"eval_wer": 0.5776130775091778,
"step": 1600
},
{
"epoch": 21.52,
"grad_norm": 2.338858127593994,
"learning_rate": 1.745710267229255e-05,
"loss": 0.4287,
"step": 1700
},
{
"epoch": 21.52,
"eval_loss": 0.555654764175415,
"eval_runtime": 11.2714,
"eval_samples_per_second": 39.924,
"eval_steps_per_second": 2.041,
"eval_wer": 0.5609891251645078,
"step": 1700
},
{
"epoch": 22.78,
"grad_norm": 2.5261471271514893,
"learning_rate": 1.717580872011252e-05,
"loss": 0.4149,
"step": 1800
},
{
"epoch": 22.78,
"eval_loss": 0.5575445294380188,
"eval_runtime": 11.3966,
"eval_samples_per_second": 39.486,
"eval_steps_per_second": 2.018,
"eval_wer": 0.5748424187850661,
"step": 1800
},
{
"epoch": 24.05,
"grad_norm": 3.1321146488189697,
"learning_rate": 1.689451476793249e-05,
"loss": 0.3983,
"step": 1900
},
{
"epoch": 24.05,
"eval_loss": 0.5648804306983948,
"eval_runtime": 11.191,
"eval_samples_per_second": 40.211,
"eval_steps_per_second": 2.055,
"eval_wer": 0.6003324790468934,
"step": 1900
},
{
"epoch": 25.32,
"grad_norm": 2.3534066677093506,
"learning_rate": 1.661322081575246e-05,
"loss": 0.4001,
"step": 2000
},
{
"epoch": 25.32,
"eval_loss": 0.567441463470459,
"eval_runtime": 11.1757,
"eval_samples_per_second": 40.266,
"eval_steps_per_second": 2.058,
"eval_wer": 0.5976310867908845,
"step": 2000
},
{
"epoch": 26.58,
"grad_norm": 2.417196750640869,
"learning_rate": 1.6331926863572434e-05,
"loss": 0.3649,
"step": 2100
},
{
"epoch": 26.58,
"eval_loss": 0.579703152179718,
"eval_runtime": 11.126,
"eval_samples_per_second": 40.446,
"eval_steps_per_second": 2.067,
"eval_wer": 0.5804530027013922,
"step": 2100
},
{
"epoch": 27.85,
"grad_norm": 2.8033077716827393,
"learning_rate": 1.6050632911392404e-05,
"loss": 0.3711,
"step": 2200
},
{
"epoch": 27.85,
"eval_loss": 0.5839091539382935,
"eval_runtime": 11.2012,
"eval_samples_per_second": 40.174,
"eval_steps_per_second": 2.053,
"eval_wer": 0.6546373900394818,
"step": 2200
},
{
"epoch": 29.11,
"grad_norm": 2.2273740768432617,
"learning_rate": 1.576933895921238e-05,
"loss": 0.3547,
"step": 2300
},
{
"epoch": 29.11,
"eval_loss": 0.5734866261482239,
"eval_runtime": 11.079,
"eval_samples_per_second": 40.617,
"eval_steps_per_second": 2.076,
"eval_wer": 0.5904273741081942,
"step": 2300
},
{
"epoch": 30.38,
"grad_norm": 2.2852895259857178,
"learning_rate": 1.548804500703235e-05,
"loss": 0.3402,
"step": 2400
},
{
"epoch": 30.38,
"eval_loss": 0.5698839426040649,
"eval_runtime": 11.0702,
"eval_samples_per_second": 40.65,
"eval_steps_per_second": 2.078,
"eval_wer": 0.5426335111172681,
"step": 2400
},
{
"epoch": 31.65,
"grad_norm": 1.869658350944519,
"learning_rate": 1.5206751054852323e-05,
"loss": 0.3414,
"step": 2500
},
{
"epoch": 31.65,
"eval_loss": 0.5700486302375793,
"eval_runtime": 11.3836,
"eval_samples_per_second": 39.531,
"eval_steps_per_second": 2.02,
"eval_wer": 0.5421486458405486,
"step": 2500
},
{
"epoch": 32.91,
"grad_norm": 1.7649214267730713,
"learning_rate": 1.4925457102672294e-05,
"loss": 0.3255,
"step": 2600
},
{
"epoch": 32.91,
"eval_loss": 0.5744786262512207,
"eval_runtime": 11.2135,
"eval_samples_per_second": 40.13,
"eval_steps_per_second": 2.051,
"eval_wer": 0.5663226432084228,
"step": 2600
},
{
"epoch": 34.18,
"grad_norm": 2.0651187896728516,
"learning_rate": 1.4644163150492266e-05,
"loss": 0.3093,
"step": 2700
},
{
"epoch": 34.18,
"eval_loss": 0.5957615971565247,
"eval_runtime": 11.1504,
"eval_samples_per_second": 40.357,
"eval_steps_per_second": 2.063,
"eval_wer": 0.5931980328323059,
"step": 2700
},
{
"epoch": 35.44,
"grad_norm": 2.1554198265075684,
"learning_rate": 1.4362869198312237e-05,
"loss": 0.315,
"step": 2800
},
{
"epoch": 35.44,
"eval_loss": 0.5933964848518372,
"eval_runtime": 11.202,
"eval_samples_per_second": 40.171,
"eval_steps_per_second": 2.053,
"eval_wer": 0.5905659070443998,
"step": 2800
},
{
"epoch": 36.71,
"grad_norm": 2.880059003829956,
"learning_rate": 1.4081575246132208e-05,
"loss": 0.31,
"step": 2900
},
{
"epoch": 36.71,
"eval_loss": 0.6071695685386658,
"eval_runtime": 11.2335,
"eval_samples_per_second": 40.059,
"eval_steps_per_second": 2.047,
"eval_wer": 0.601094410196024,
"step": 2900
},
{
"epoch": 37.97,
"grad_norm": 2.7784523963928223,
"learning_rate": 1.380028129395218e-05,
"loss": 0.3026,
"step": 3000
},
{
"epoch": 37.97,
"eval_loss": 0.6038002371788025,
"eval_runtime": 11.1544,
"eval_samples_per_second": 40.343,
"eval_steps_per_second": 2.062,
"eval_wer": 0.5760199487428136,
"step": 3000
},
{
"epoch": 39.24,
"grad_norm": 2.2978756427764893,
"learning_rate": 1.3518987341772155e-05,
"loss": 0.2802,
"step": 3100
},
{
"epoch": 39.24,
"eval_loss": 0.6079789400100708,
"eval_runtime": 11.1769,
"eval_samples_per_second": 40.262,
"eval_steps_per_second": 2.058,
"eval_wer": 0.5776823439772806,
"step": 3100
},
{
"epoch": 40.51,
"grad_norm": 2.1417360305786133,
"learning_rate": 1.3237693389592126e-05,
"loss": 0.2835,
"step": 3200
},
{
"epoch": 40.51,
"eval_loss": 0.6061974167823792,
"eval_runtime": 11.1226,
"eval_samples_per_second": 40.458,
"eval_steps_per_second": 2.068,
"eval_wer": 0.5743575535083466,
"step": 3200
},
{
"epoch": 41.77,
"grad_norm": 1.6934860944747925,
"learning_rate": 1.2956399437412097e-05,
"loss": 0.2585,
"step": 3300
},
{
"epoch": 41.77,
"eval_loss": 0.6224856972694397,
"eval_runtime": 11.0743,
"eval_samples_per_second": 40.634,
"eval_steps_per_second": 2.077,
"eval_wer": 0.5784442751264113,
"step": 3300
},
{
"epoch": 43.04,
"grad_norm": 1.937164068222046,
"learning_rate": 1.2675105485232069e-05,
"loss": 0.2699,
"step": 3400
},
{
"epoch": 43.04,
"eval_loss": 0.6225900650024414,
"eval_runtime": 11.1534,
"eval_samples_per_second": 40.347,
"eval_steps_per_second": 2.062,
"eval_wer": 0.5664611761446284,
"step": 3400
},
{
"epoch": 44.3,
"grad_norm": 2.5404953956604004,
"learning_rate": 1.239381153305204e-05,
"loss": 0.2785,
"step": 3500
},
{
"epoch": 44.3,
"eval_loss": 0.6240466833114624,
"eval_runtime": 11.1649,
"eval_samples_per_second": 40.305,
"eval_steps_per_second": 2.06,
"eval_wer": 0.5713790953799266,
"step": 3500
},
{
"epoch": 45.57,
"grad_norm": 5.070058822631836,
"learning_rate": 1.2112517580872011e-05,
"loss": 0.2689,
"step": 3600
},
{
"epoch": 45.57,
"eval_loss": 0.6294780969619751,
"eval_runtime": 11.118,
"eval_samples_per_second": 40.475,
"eval_steps_per_second": 2.069,
"eval_wer": 0.5648680473782642,
"step": 3600
},
{
"epoch": 46.84,
"grad_norm": 2.5456833839416504,
"learning_rate": 1.1831223628691983e-05,
"loss": 0.2514,
"step": 3700
},
{
"epoch": 46.84,
"eval_loss": 0.6424580812454224,
"eval_runtime": 11.271,
"eval_samples_per_second": 39.926,
"eval_steps_per_second": 2.041,
"eval_wer": 0.5420793793724458,
"step": 3700
},
{
"epoch": 48.1,
"grad_norm": 2.153717279434204,
"learning_rate": 1.1549929676511956e-05,
"loss": 0.2433,
"step": 3800
},
{
"epoch": 48.1,
"eval_loss": 0.6667928099632263,
"eval_runtime": 11.226,
"eval_samples_per_second": 40.086,
"eval_steps_per_second": 2.049,
"eval_wer": 0.606774260580453,
"step": 3800
},
{
"epoch": 49.37,
"grad_norm": 2.5784971714019775,
"learning_rate": 1.1268635724331929e-05,
"loss": 0.2403,
"step": 3900
},
{
"epoch": 49.37,
"eval_loss": 0.6562526226043701,
"eval_runtime": 11.112,
"eval_samples_per_second": 40.497,
"eval_steps_per_second": 2.07,
"eval_wer": 0.5749809517212717,
"step": 3900
},
{
"epoch": 50.63,
"grad_norm": 2.656663656234741,
"learning_rate": 1.09873417721519e-05,
"loss": 0.2287,
"step": 4000
},
{
"epoch": 50.63,
"eval_loss": 0.6695858240127563,
"eval_runtime": 11.0907,
"eval_samples_per_second": 40.575,
"eval_steps_per_second": 2.074,
"eval_wer": 0.5932672993004087,
"step": 4000
},
{
"epoch": 51.9,
"grad_norm": 2.9128212928771973,
"learning_rate": 1.0706047819971872e-05,
"loss": 0.2366,
"step": 4100
},
{
"epoch": 51.9,
"eval_loss": 0.6738879680633545,
"eval_runtime": 11.1372,
"eval_samples_per_second": 40.405,
"eval_steps_per_second": 2.065,
"eval_wer": 0.5731107570824964,
"step": 4100
},
{
"epoch": 53.16,
"grad_norm": 1.8941991329193115,
"learning_rate": 1.0424753867791843e-05,
"loss": 0.2295,
"step": 4200
},
{
"epoch": 53.16,
"eval_loss": 0.680944561958313,
"eval_runtime": 11.1339,
"eval_samples_per_second": 40.417,
"eval_steps_per_second": 2.066,
"eval_wer": 0.6090600540278451,
"step": 4200
},
{
"epoch": 54.43,
"grad_norm": 2.2944602966308594,
"learning_rate": 1.0143459915611814e-05,
"loss": 0.2274,
"step": 4300
},
{
"epoch": 54.43,
"eval_loss": 0.6874995827674866,
"eval_runtime": 11.1569,
"eval_samples_per_second": 40.334,
"eval_steps_per_second": 2.061,
"eval_wer": 0.5913971046616333,
"step": 4300
},
{
"epoch": 55.7,
"grad_norm": 2.4684228897094727,
"learning_rate": 9.862165963431787e-06,
"loss": 0.2178,
"step": 4400
},
{
"epoch": 55.7,
"eval_loss": 0.6899309158325195,
"eval_runtime": 11.21,
"eval_samples_per_second": 40.143,
"eval_steps_per_second": 2.052,
"eval_wer": 0.5949296945348757,
"step": 4400
},
{
"epoch": 56.96,
"grad_norm": 2.0573887825012207,
"learning_rate": 9.580872011251759e-06,
"loss": 0.2176,
"step": 4500
},
{
"epoch": 56.96,
"eval_loss": 0.6924750208854675,
"eval_runtime": 11.1413,
"eval_samples_per_second": 40.39,
"eval_steps_per_second": 2.064,
"eval_wer": 0.5828080626168871,
"step": 4500
},
{
"epoch": 58.23,
"grad_norm": 1.9555515050888062,
"learning_rate": 9.299578059071732e-06,
"loss": 0.2064,
"step": 4600
},
{
"epoch": 58.23,
"eval_loss": 0.7009023427963257,
"eval_runtime": 11.0931,
"eval_samples_per_second": 40.566,
"eval_steps_per_second": 2.073,
"eval_wer": 0.598462284408118,
"step": 4600
},
{
"epoch": 59.49,
"grad_norm": 3.208376169204712,
"learning_rate": 9.018284106891703e-06,
"loss": 0.2081,
"step": 4700
},
{
"epoch": 59.49,
"eval_loss": 0.701277494430542,
"eval_runtime": 11.216,
"eval_samples_per_second": 40.121,
"eval_steps_per_second": 2.051,
"eval_wer": 0.5995705478977627,
"step": 4700
},
{
"epoch": 60.76,
"grad_norm": 2.5926976203918457,
"learning_rate": 8.736990154711675e-06,
"loss": 0.2093,
"step": 4800
},
{
"epoch": 60.76,
"eval_loss": 0.725727379322052,
"eval_runtime": 11.1473,
"eval_samples_per_second": 40.369,
"eval_steps_per_second": 2.063,
"eval_wer": 0.6086444552192284,
"step": 4800
},
{
"epoch": 62.03,
"grad_norm": 2.2040176391601562,
"learning_rate": 8.455696202531646e-06,
"loss": 0.2024,
"step": 4900
},
{
"epoch": 62.03,
"eval_loss": 0.7215314507484436,
"eval_runtime": 11.4213,
"eval_samples_per_second": 39.4,
"eval_steps_per_second": 2.014,
"eval_wer": 0.6003324790468934,
"step": 4900
},
{
"epoch": 63.29,
"grad_norm": 1.6947568655014038,
"learning_rate": 8.174402250351619e-06,
"loss": 0.1999,
"step": 5000
},
{
"epoch": 63.29,
"eval_loss": 0.7332788109779358,
"eval_runtime": 11.2504,
"eval_samples_per_second": 39.999,
"eval_steps_per_second": 2.044,
"eval_wer": 0.6090600540278451,
"step": 5000
},
{
"epoch": 64.56,
"grad_norm": 2.657949686050415,
"learning_rate": 7.89310829817159e-06,
"loss": 0.2064,
"step": 5100
},
{
"epoch": 64.56,
"eval_loss": 0.7529835104942322,
"eval_runtime": 11.2168,
"eval_samples_per_second": 40.118,
"eval_steps_per_second": 2.05,
"eval_wer": 0.6397450993973818,
"step": 5100
},
{
"epoch": 65.82,
"grad_norm": 2.161647081375122,
"learning_rate": 7.611814345991562e-06,
"loss": 0.186,
"step": 5200
},
{
"epoch": 65.82,
"eval_loss": 0.7542085647583008,
"eval_runtime": 11.1628,
"eval_samples_per_second": 40.312,
"eval_steps_per_second": 2.06,
"eval_wer": 0.6348964466301863,
"step": 5200
},
{
"epoch": 67.09,
"grad_norm": 1.5503740310668945,
"learning_rate": 7.330520393811533e-06,
"loss": 0.186,
"step": 5300
},
{
"epoch": 67.09,
"eval_loss": 0.7416096925735474,
"eval_runtime": 11.1272,
"eval_samples_per_second": 40.441,
"eval_steps_per_second": 2.067,
"eval_wer": 0.6270000692664681,
"step": 5300
},
{
"epoch": 68.35,
"grad_norm": 2.8439977169036865,
"learning_rate": 7.049226441631506e-06,
"loss": 0.1807,
"step": 5400
},
{
"epoch": 68.35,
"eval_loss": 0.7548705339431763,
"eval_runtime": 11.1833,
"eval_samples_per_second": 40.239,
"eval_steps_per_second": 2.057,
"eval_wer": 0.6352427789707003,
"step": 5400
},
{
"epoch": 69.62,
"grad_norm": 3.1191818714141846,
"learning_rate": 6.7679324894514775e-06,
"loss": 0.1784,
"step": 5500
},
{
"epoch": 69.62,
"eval_loss": 0.7506438493728638,
"eval_runtime": 11.1231,
"eval_samples_per_second": 40.456,
"eval_steps_per_second": 2.068,
"eval_wer": 0.5844011913832514,
"step": 5500
},
{
"epoch": 70.89,
"grad_norm": 2.1088929176330566,
"learning_rate": 6.486638537271449e-06,
"loss": 0.1824,
"step": 5600
},
{
"epoch": 70.89,
"eval_loss": 0.7611370086669922,
"eval_runtime": 11.0844,
"eval_samples_per_second": 40.598,
"eval_steps_per_second": 2.075,
"eval_wer": 0.6252684075638983,
"step": 5600
},
{
"epoch": 72.15,
"grad_norm": 3.0879805088043213,
"learning_rate": 6.208157524613221e-06,
"loss": 0.1769,
"step": 5700
},
{
"epoch": 72.15,
"eval_loss": 0.771263837814331,
"eval_runtime": 11.226,
"eval_samples_per_second": 40.085,
"eval_steps_per_second": 2.049,
"eval_wer": 0.5927131675555863,
"step": 5700
},
{
"epoch": 73.42,
"grad_norm": 2.1844921112060547,
"learning_rate": 5.926863572433193e-06,
"loss": 0.1843,
"step": 5800
},
{
"epoch": 73.42,
"eval_loss": 0.7719753980636597,
"eval_runtime": 11.1675,
"eval_samples_per_second": 40.295,
"eval_steps_per_second": 2.06,
"eval_wer": 0.5955530927478008,
"step": 5800
},
{
"epoch": 74.68,
"grad_norm": 2.3081653118133545,
"learning_rate": 5.645569620253165e-06,
"loss": 0.1709,
"step": 5900
},
{
"epoch": 74.68,
"eval_loss": 0.7804738879203796,
"eval_runtime": 11.2408,
"eval_samples_per_second": 40.033,
"eval_steps_per_second": 2.046,
"eval_wer": 0.6258225393087207,
"step": 5900
},
{
"epoch": 75.95,
"grad_norm": 3.2150704860687256,
"learning_rate": 5.364275668073137e-06,
"loss": 0.1691,
"step": 6000
},
{
"epoch": 75.95,
"eval_loss": 0.7865281105041504,
"eval_runtime": 11.2778,
"eval_samples_per_second": 39.901,
"eval_steps_per_second": 2.039,
"eval_wer": 0.6281775992242156,
"step": 6000
},
{
"epoch": 77.22,
"grad_norm": 1.8031377792358398,
"learning_rate": 5.082981715893108e-06,
"loss": 0.1701,
"step": 6100
},
{
"epoch": 77.22,
"eval_loss": 0.7807941436767578,
"eval_runtime": 11.2568,
"eval_samples_per_second": 39.976,
"eval_steps_per_second": 2.043,
"eval_wer": 0.6218050841587588,
"step": 6100
},
{
"epoch": 78.48,
"grad_norm": 1.8435957431793213,
"learning_rate": 4.8016877637130805e-06,
"loss": 0.1735,
"step": 6200
},
{
"epoch": 78.48,
"eval_loss": 0.7789934873580933,
"eval_runtime": 11.2304,
"eval_samples_per_second": 40.07,
"eval_steps_per_second": 2.048,
"eval_wer": 0.5965920897693426,
"step": 6200
},
{
"epoch": 79.75,
"grad_norm": 1.9381072521209717,
"learning_rate": 4.520393811533053e-06,
"loss": 0.1746,
"step": 6300
},
{
"epoch": 79.75,
"eval_loss": 0.7949352264404297,
"eval_runtime": 11.4313,
"eval_samples_per_second": 39.366,
"eval_steps_per_second": 2.012,
"eval_wer": 0.6430698898663157,
"step": 6300
},
{
"epoch": 81.01,
"grad_norm": 1.8376802206039429,
"learning_rate": 4.239099859353024e-06,
"loss": 0.1745,
"step": 6400
},
{
"epoch": 81.01,
"eval_loss": 0.8125633001327515,
"eval_runtime": 11.456,
"eval_samples_per_second": 39.281,
"eval_steps_per_second": 2.008,
"eval_wer": 0.6284546650966267,
"step": 6400
},
{
"epoch": 82.28,
"grad_norm": 2.187868118286133,
"learning_rate": 3.957805907172996e-06,
"loss": 0.1605,
"step": 6500
},
{
"epoch": 82.28,
"eval_loss": 0.8113065361976624,
"eval_runtime": 11.3144,
"eval_samples_per_second": 39.772,
"eval_steps_per_second": 2.033,
"eval_wer": 0.6194500242432638,
"step": 6500
},
{
"epoch": 83.54,
"grad_norm": 2.5882511138916016,
"learning_rate": 3.676511954992968e-06,
"loss": 0.1579,
"step": 6600
},
{
"epoch": 83.54,
"eval_loss": 0.7976768612861633,
"eval_runtime": 11.4022,
"eval_samples_per_second": 39.466,
"eval_steps_per_second": 2.017,
"eval_wer": 0.6155018355614047,
"step": 6600
},
{
"epoch": 84.81,
"grad_norm": 2.416449546813965,
"learning_rate": 3.39521800281294e-06,
"loss": 0.1704,
"step": 6700
},
{
"epoch": 84.81,
"eval_loss": 0.8016535043716431,
"eval_runtime": 11.3366,
"eval_samples_per_second": 39.694,
"eval_steps_per_second": 2.029,
"eval_wer": 0.6139779732631433,
"step": 6700
},
{
"epoch": 86.08,
"grad_norm": 1.5159286260604858,
"learning_rate": 3.1139240506329116e-06,
"loss": 0.1659,
"step": 6800
},
{
"epoch": 86.08,
"eval_loss": 0.8146914839744568,
"eval_runtime": 11.289,
"eval_samples_per_second": 39.862,
"eval_steps_per_second": 2.037,
"eval_wer": 0.6279005333518044,
"step": 6800
},
{
"epoch": 87.34,
"grad_norm": 2.9167511463165283,
"learning_rate": 2.832630098452884e-06,
"loss": 0.166,
"step": 6900
},
{
"epoch": 87.34,
"eval_loss": 0.8088270425796509,
"eval_runtime": 11.28,
"eval_samples_per_second": 39.893,
"eval_steps_per_second": 2.039,
"eval_wer": 0.6350349795663919,
"step": 6900
},
{
"epoch": 88.61,
"grad_norm": 2.3707916736602783,
"learning_rate": 2.5513361462728552e-06,
"loss": 0.1539,
"step": 7000
},
{
"epoch": 88.61,
"eval_loss": 0.8052927255630493,
"eval_runtime": 11.3196,
"eval_samples_per_second": 39.754,
"eval_steps_per_second": 2.032,
"eval_wer": 0.616402299646741,
"step": 7000
},
{
"epoch": 89.87,
"grad_norm": 2.315516471862793,
"learning_rate": 2.270042194092827e-06,
"loss": 0.1589,
"step": 7100
},
{
"epoch": 89.87,
"eval_loss": 0.8188755512237549,
"eval_runtime": 11.3039,
"eval_samples_per_second": 39.809,
"eval_steps_per_second": 2.035,
"eval_wer": 0.6357276442474198,
"step": 7100
},
{
"epoch": 91.14,
"grad_norm": 1.863142967224121,
"learning_rate": 1.9887482419127992e-06,
"loss": 0.1559,
"step": 7200
},
{
"epoch": 91.14,
"eval_loss": 0.8152031898498535,
"eval_runtime": 11.2054,
"eval_samples_per_second": 40.159,
"eval_steps_per_second": 2.053,
"eval_wer": 0.6258225393087207,
"step": 7200
},
{
"epoch": 92.41,
"grad_norm": 3.034898519515991,
"learning_rate": 1.7074542897327708e-06,
"loss": 0.1564,
"step": 7300
},
{
"epoch": 92.41,
"eval_loss": 0.8190972208976746,
"eval_runtime": 11.3427,
"eval_samples_per_second": 39.673,
"eval_steps_per_second": 2.028,
"eval_wer": 0.6245064764147676,
"step": 7300
},
{
"epoch": 93.67,
"grad_norm": 1.673194408416748,
"learning_rate": 1.4261603375527428e-06,
"loss": 0.158,
"step": 7400
},
{
"epoch": 93.67,
"eval_loss": 0.8254526853561401,
"eval_runtime": 11.282,
"eval_samples_per_second": 39.887,
"eval_steps_per_second": 2.039,
"eval_wer": 0.6333033178638221,
"step": 7400
},
{
"epoch": 94.94,
"grad_norm": 1.6554739475250244,
"learning_rate": 1.1448663853727146e-06,
"loss": 0.1595,
"step": 7500
},
{
"epoch": 94.94,
"eval_loss": 0.8184179663658142,
"eval_runtime": 11.2214,
"eval_samples_per_second": 40.102,
"eval_steps_per_second": 2.05,
"eval_wer": 0.6205582877329086,
"step": 7500
},
{
"epoch": 96.2,
"grad_norm": 2.310234785079956,
"learning_rate": 8.635724331926865e-07,
"loss": 0.1638,
"step": 7600
},
{
"epoch": 96.2,
"eval_loss": 0.8229891061782837,
"eval_runtime": 11.1115,
"eval_samples_per_second": 40.499,
"eval_steps_per_second": 2.07,
"eval_wer": 0.6364203089284477,
"step": 7600
},
{
"epoch": 97.47,
"grad_norm": 1.6313074827194214,
"learning_rate": 5.822784810126583e-07,
"loss": 0.1629,
"step": 7700
},
{
"epoch": 97.47,
"eval_loss": 0.8244702219963074,
"eval_runtime": 11.1778,
"eval_samples_per_second": 40.258,
"eval_steps_per_second": 2.058,
"eval_wer": 0.6312253238207384,
"step": 7700
},
{
"epoch": 98.73,
"grad_norm": 2.2342050075531006,
"learning_rate": 3.009845288326301e-07,
"loss": 0.1531,
"step": 7800
},
{
"epoch": 98.73,
"eval_loss": 0.8226235508918762,
"eval_runtime": 11.0989,
"eval_samples_per_second": 40.545,
"eval_steps_per_second": 2.072,
"eval_wer": 0.626723003394057,
"step": 7800
},
{
"epoch": 100.0,
"grad_norm": 3.3648197650909424,
"learning_rate": 1.9690576652601972e-08,
"loss": 0.1572,
"step": 7900
},
{
"epoch": 100.0,
"eval_loss": 0.8258158564567566,
"eval_runtime": 11.1124,
"eval_samples_per_second": 40.495,
"eval_steps_per_second": 2.07,
"eval_wer": 0.6288702639052435,
"step": 7900
},
{
"epoch": 100.0,
"step": 7900,
"total_flos": 3.6740184088961606e+19,
"train_loss": 0.6056561310683625,
"train_runtime": 25369.8434,
"train_samples_per_second": 12.404,
"train_steps_per_second": 0.311
}
],
"logging_steps": 100,
"max_steps": 7900,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 100,
"total_flos": 3.6740184088961606e+19,
"train_batch_size": 20,
"trial_name": null,
"trial_params": null
}