{ "best_metric": 0.5420793793724458, "best_model_checkpoint": "md_d_l2_arctic/checkpoint-3700", "epoch": 100.0, "eval_steps": 100, "global_step": 7900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.27, "grad_norm": 92.3280258178711, "learning_rate": 2.4050632911392408e-06, "loss": 14.3657, "step": 100 }, { "epoch": 1.27, "eval_loss": 9.120972633361816, "eval_runtime": 15.883, "eval_samples_per_second": 28.332, "eval_steps_per_second": 1.448, "eval_wer": 2.4374870125372308, "step": 100 }, { "epoch": 2.53, "grad_norm": 1.9874589443206787, "learning_rate": 4.9113924050632915e-06, "loss": 4.382, "step": 200 }, { "epoch": 2.53, "eval_loss": 3.421921730041504, "eval_runtime": 10.9345, "eval_samples_per_second": 41.154, "eval_steps_per_second": 2.103, "eval_wer": 1.0, "step": 200 }, { "epoch": 3.8, "grad_norm": 3.151137590408325, "learning_rate": 7.443037974683544e-06, "loss": 3.2514, "step": 300 }, { "epoch": 3.8, "eval_loss": 2.788123369216919, "eval_runtime": 10.9315, "eval_samples_per_second": 41.165, "eval_steps_per_second": 2.104, "eval_wer": 0.9979912724250191, "step": 300 }, { "epoch": 5.06, "grad_norm": 3.4454073905944824, "learning_rate": 9.974683544303799e-06, "loss": 2.4508, "step": 400 }, { "epoch": 5.06, "eval_loss": 1.8000441789627075, "eval_runtime": 10.9542, "eval_samples_per_second": 41.08, "eval_steps_per_second": 2.1, "eval_wer": 0.7380342176352428, "step": 400 }, { "epoch": 6.33, "grad_norm": 2.928997039794922, "learning_rate": 1.2506329113924051e-05, "loss": 1.6168, "step": 500 }, { "epoch": 6.33, "eval_loss": 1.131545901298523, "eval_runtime": 11.2822, "eval_samples_per_second": 39.886, "eval_steps_per_second": 2.039, "eval_wer": 0.967791092332202, "step": 500 }, { "epoch": 7.59, "grad_norm": 2.174651861190796, "learning_rate": 1.5037974683544306e-05, "loss": 1.1212, "step": 600 }, { "epoch": 7.59, "eval_loss": 0.8748846054077148, "eval_runtime": 11.1275, "eval_samples_per_second": 40.44, "eval_steps_per_second": 2.067, "eval_wer": 1.065803144697652, "step": 600 }, { "epoch": 8.86, "grad_norm": 7.105869770050049, "learning_rate": 1.7569620253164558e-05, "loss": 0.8953, "step": 700 }, { "epoch": 8.86, "eval_loss": 0.7655201554298401, "eval_runtime": 11.2119, "eval_samples_per_second": 40.136, "eval_steps_per_second": 2.051, "eval_wer": 0.9655052988848098, "step": 700 }, { "epoch": 10.13, "grad_norm": 2.2692487239837646, "learning_rate": 1.99887482419128e-05, "loss": 0.7684, "step": 800 }, { "epoch": 10.13, "eval_loss": 0.6687426567077637, "eval_runtime": 11.2107, "eval_samples_per_second": 40.14, "eval_steps_per_second": 2.052, "eval_wer": 0.7621389485350142, "step": 800 }, { "epoch": 11.39, "grad_norm": 3.137871742248535, "learning_rate": 1.970745428973277e-05, "loss": 0.6661, "step": 900 }, { "epoch": 11.39, "eval_loss": 0.6319410800933838, "eval_runtime": 11.2724, "eval_samples_per_second": 39.921, "eval_steps_per_second": 2.04, "eval_wer": 0.6755558634065248, "step": 900 }, { "epoch": 12.66, "grad_norm": 2.894819498062134, "learning_rate": 1.9426160337552744e-05, "loss": 0.6306, "step": 1000 }, { "epoch": 12.66, "eval_loss": 0.6195651292800903, "eval_runtime": 11.3009, "eval_samples_per_second": 39.82, "eval_steps_per_second": 2.035, "eval_wer": 0.6963358038373624, "step": 1000 }, { "epoch": 13.92, "grad_norm": 3.4255077838897705, "learning_rate": 1.9144866385372717e-05, "loss": 0.5759, "step": 1100 }, { "epoch": 13.92, "eval_loss": 0.5875259637832642, "eval_runtime": 11.1702, "eval_samples_per_second": 40.286, "eval_steps_per_second": 2.059, "eval_wer": 0.5965228233012398, "step": 1100 }, { "epoch": 15.19, "grad_norm": 2.189448118209839, "learning_rate": 1.8863572433192687e-05, "loss": 0.5417, "step": 1200 }, { "epoch": 15.19, "eval_loss": 0.5779715776443481, "eval_runtime": 11.2889, "eval_samples_per_second": 39.862, "eval_steps_per_second": 2.037, "eval_wer": 0.6528364618688093, "step": 1200 }, { "epoch": 16.46, "grad_norm": 6.146909713745117, "learning_rate": 1.858227848101266e-05, "loss": 0.528, "step": 1300 }, { "epoch": 16.46, "eval_loss": 0.5798078179359436, "eval_runtime": 11.2225, "eval_samples_per_second": 40.098, "eval_steps_per_second": 2.049, "eval_wer": 0.6539447253584539, "step": 1300 }, { "epoch": 17.72, "grad_norm": 3.1426494121551514, "learning_rate": 1.8300984528832633e-05, "loss": 0.4857, "step": 1400 }, { "epoch": 17.72, "eval_loss": 0.5568873286247253, "eval_runtime": 11.201, "eval_samples_per_second": 40.175, "eval_steps_per_second": 2.053, "eval_wer": 0.5724873588695713, "step": 1400 }, { "epoch": 18.99, "grad_norm": 1.9612127542495728, "learning_rate": 1.8019690576652603e-05, "loss": 0.4655, "step": 1500 }, { "epoch": 18.99, "eval_loss": 0.549960196018219, "eval_runtime": 11.3494, "eval_samples_per_second": 39.65, "eval_steps_per_second": 2.027, "eval_wer": 0.575535083466094, "step": 1500 }, { "epoch": 20.25, "grad_norm": 2.946164131164551, "learning_rate": 1.7738396624472576e-05, "loss": 0.4526, "step": 1600 }, { "epoch": 20.25, "eval_loss": 0.5582863092422485, "eval_runtime": 11.3026, "eval_samples_per_second": 39.814, "eval_steps_per_second": 2.035, "eval_wer": 0.5776130775091778, "step": 1600 }, { "epoch": 21.52, "grad_norm": 2.338858127593994, "learning_rate": 1.745710267229255e-05, "loss": 0.4287, "step": 1700 }, { "epoch": 21.52, "eval_loss": 0.555654764175415, "eval_runtime": 11.2714, "eval_samples_per_second": 39.924, "eval_steps_per_second": 2.041, "eval_wer": 0.5609891251645078, "step": 1700 }, { "epoch": 22.78, "grad_norm": 2.5261471271514893, "learning_rate": 1.717580872011252e-05, "loss": 0.4149, "step": 1800 }, { "epoch": 22.78, "eval_loss": 0.5575445294380188, "eval_runtime": 11.3966, "eval_samples_per_second": 39.486, "eval_steps_per_second": 2.018, "eval_wer": 0.5748424187850661, "step": 1800 }, { "epoch": 24.05, "grad_norm": 3.1321146488189697, "learning_rate": 1.689451476793249e-05, "loss": 0.3983, "step": 1900 }, { "epoch": 24.05, "eval_loss": 0.5648804306983948, "eval_runtime": 11.191, "eval_samples_per_second": 40.211, "eval_steps_per_second": 2.055, "eval_wer": 0.6003324790468934, "step": 1900 }, { "epoch": 25.32, "grad_norm": 2.3534066677093506, "learning_rate": 1.661322081575246e-05, "loss": 0.4001, "step": 2000 }, { "epoch": 25.32, "eval_loss": 0.567441463470459, "eval_runtime": 11.1757, "eval_samples_per_second": 40.266, "eval_steps_per_second": 2.058, "eval_wer": 0.5976310867908845, "step": 2000 }, { "epoch": 26.58, "grad_norm": 2.417196750640869, "learning_rate": 1.6331926863572434e-05, "loss": 0.3649, "step": 2100 }, { "epoch": 26.58, "eval_loss": 0.579703152179718, "eval_runtime": 11.126, "eval_samples_per_second": 40.446, "eval_steps_per_second": 2.067, "eval_wer": 0.5804530027013922, "step": 2100 }, { "epoch": 27.85, "grad_norm": 2.8033077716827393, "learning_rate": 1.6050632911392404e-05, "loss": 0.3711, "step": 2200 }, { "epoch": 27.85, "eval_loss": 0.5839091539382935, "eval_runtime": 11.2012, "eval_samples_per_second": 40.174, "eval_steps_per_second": 2.053, "eval_wer": 0.6546373900394818, "step": 2200 }, { "epoch": 29.11, "grad_norm": 2.2273740768432617, "learning_rate": 1.576933895921238e-05, "loss": 0.3547, "step": 2300 }, { "epoch": 29.11, "eval_loss": 0.5734866261482239, "eval_runtime": 11.079, "eval_samples_per_second": 40.617, "eval_steps_per_second": 2.076, "eval_wer": 0.5904273741081942, "step": 2300 }, { "epoch": 30.38, "grad_norm": 2.2852895259857178, "learning_rate": 1.548804500703235e-05, "loss": 0.3402, "step": 2400 }, { "epoch": 30.38, "eval_loss": 0.5698839426040649, "eval_runtime": 11.0702, "eval_samples_per_second": 40.65, "eval_steps_per_second": 2.078, "eval_wer": 0.5426335111172681, "step": 2400 }, { "epoch": 31.65, "grad_norm": 1.869658350944519, "learning_rate": 1.5206751054852323e-05, "loss": 0.3414, "step": 2500 }, { "epoch": 31.65, "eval_loss": 0.5700486302375793, "eval_runtime": 11.3836, "eval_samples_per_second": 39.531, "eval_steps_per_second": 2.02, "eval_wer": 0.5421486458405486, "step": 2500 }, { "epoch": 32.91, "grad_norm": 1.7649214267730713, "learning_rate": 1.4925457102672294e-05, "loss": 0.3255, "step": 2600 }, { "epoch": 32.91, "eval_loss": 0.5744786262512207, "eval_runtime": 11.2135, "eval_samples_per_second": 40.13, "eval_steps_per_second": 2.051, "eval_wer": 0.5663226432084228, "step": 2600 }, { "epoch": 34.18, "grad_norm": 2.0651187896728516, "learning_rate": 1.4644163150492266e-05, "loss": 0.3093, "step": 2700 }, { "epoch": 34.18, "eval_loss": 0.5957615971565247, "eval_runtime": 11.1504, "eval_samples_per_second": 40.357, "eval_steps_per_second": 2.063, "eval_wer": 0.5931980328323059, "step": 2700 }, { "epoch": 35.44, "grad_norm": 2.1554198265075684, "learning_rate": 1.4362869198312237e-05, "loss": 0.315, "step": 2800 }, { "epoch": 35.44, "eval_loss": 0.5933964848518372, "eval_runtime": 11.202, "eval_samples_per_second": 40.171, "eval_steps_per_second": 2.053, "eval_wer": 0.5905659070443998, "step": 2800 }, { "epoch": 36.71, "grad_norm": 2.880059003829956, "learning_rate": 1.4081575246132208e-05, "loss": 0.31, "step": 2900 }, { "epoch": 36.71, "eval_loss": 0.6071695685386658, "eval_runtime": 11.2335, "eval_samples_per_second": 40.059, "eval_steps_per_second": 2.047, "eval_wer": 0.601094410196024, "step": 2900 }, { "epoch": 37.97, "grad_norm": 2.7784523963928223, "learning_rate": 1.380028129395218e-05, "loss": 0.3026, "step": 3000 }, { "epoch": 37.97, "eval_loss": 0.6038002371788025, "eval_runtime": 11.1544, "eval_samples_per_second": 40.343, "eval_steps_per_second": 2.062, "eval_wer": 0.5760199487428136, "step": 3000 }, { "epoch": 39.24, "grad_norm": 2.2978756427764893, "learning_rate": 1.3518987341772155e-05, "loss": 0.2802, "step": 3100 }, { "epoch": 39.24, "eval_loss": 0.6079789400100708, "eval_runtime": 11.1769, "eval_samples_per_second": 40.262, "eval_steps_per_second": 2.058, "eval_wer": 0.5776823439772806, "step": 3100 }, { "epoch": 40.51, "grad_norm": 2.1417360305786133, "learning_rate": 1.3237693389592126e-05, "loss": 0.2835, "step": 3200 }, { "epoch": 40.51, "eval_loss": 0.6061974167823792, "eval_runtime": 11.1226, "eval_samples_per_second": 40.458, "eval_steps_per_second": 2.068, "eval_wer": 0.5743575535083466, "step": 3200 }, { "epoch": 41.77, "grad_norm": 1.6934860944747925, "learning_rate": 1.2956399437412097e-05, "loss": 0.2585, "step": 3300 }, { "epoch": 41.77, "eval_loss": 0.6224856972694397, "eval_runtime": 11.0743, "eval_samples_per_second": 40.634, "eval_steps_per_second": 2.077, "eval_wer": 0.5784442751264113, "step": 3300 }, { "epoch": 43.04, "grad_norm": 1.937164068222046, "learning_rate": 1.2675105485232069e-05, "loss": 0.2699, "step": 3400 }, { "epoch": 43.04, "eval_loss": 0.6225900650024414, "eval_runtime": 11.1534, "eval_samples_per_second": 40.347, "eval_steps_per_second": 2.062, "eval_wer": 0.5664611761446284, "step": 3400 }, { "epoch": 44.3, "grad_norm": 2.5404953956604004, "learning_rate": 1.239381153305204e-05, "loss": 0.2785, "step": 3500 }, { "epoch": 44.3, "eval_loss": 0.6240466833114624, "eval_runtime": 11.1649, "eval_samples_per_second": 40.305, "eval_steps_per_second": 2.06, "eval_wer": 0.5713790953799266, "step": 3500 }, { "epoch": 45.57, "grad_norm": 5.070058822631836, "learning_rate": 1.2112517580872011e-05, "loss": 0.2689, "step": 3600 }, { "epoch": 45.57, "eval_loss": 0.6294780969619751, "eval_runtime": 11.118, "eval_samples_per_second": 40.475, "eval_steps_per_second": 2.069, "eval_wer": 0.5648680473782642, "step": 3600 }, { "epoch": 46.84, "grad_norm": 2.5456833839416504, "learning_rate": 1.1831223628691983e-05, "loss": 0.2514, "step": 3700 }, { "epoch": 46.84, "eval_loss": 0.6424580812454224, "eval_runtime": 11.271, "eval_samples_per_second": 39.926, "eval_steps_per_second": 2.041, "eval_wer": 0.5420793793724458, "step": 3700 }, { "epoch": 48.1, "grad_norm": 2.153717279434204, "learning_rate": 1.1549929676511956e-05, "loss": 0.2433, "step": 3800 }, { "epoch": 48.1, "eval_loss": 0.6667928099632263, "eval_runtime": 11.226, "eval_samples_per_second": 40.086, "eval_steps_per_second": 2.049, "eval_wer": 0.606774260580453, "step": 3800 }, { "epoch": 49.37, "grad_norm": 2.5784971714019775, "learning_rate": 1.1268635724331929e-05, "loss": 0.2403, "step": 3900 }, { "epoch": 49.37, "eval_loss": 0.6562526226043701, "eval_runtime": 11.112, "eval_samples_per_second": 40.497, "eval_steps_per_second": 2.07, "eval_wer": 0.5749809517212717, "step": 3900 }, { "epoch": 50.63, "grad_norm": 2.656663656234741, "learning_rate": 1.09873417721519e-05, "loss": 0.2287, "step": 4000 }, { "epoch": 50.63, "eval_loss": 0.6695858240127563, "eval_runtime": 11.0907, "eval_samples_per_second": 40.575, "eval_steps_per_second": 2.074, "eval_wer": 0.5932672993004087, "step": 4000 }, { "epoch": 51.9, "grad_norm": 2.9128212928771973, "learning_rate": 1.0706047819971872e-05, "loss": 0.2366, "step": 4100 }, { "epoch": 51.9, "eval_loss": 0.6738879680633545, "eval_runtime": 11.1372, "eval_samples_per_second": 40.405, "eval_steps_per_second": 2.065, "eval_wer": 0.5731107570824964, "step": 4100 }, { "epoch": 53.16, "grad_norm": 1.8941991329193115, "learning_rate": 1.0424753867791843e-05, "loss": 0.2295, "step": 4200 }, { "epoch": 53.16, "eval_loss": 0.680944561958313, "eval_runtime": 11.1339, "eval_samples_per_second": 40.417, "eval_steps_per_second": 2.066, "eval_wer": 0.6090600540278451, "step": 4200 }, { "epoch": 54.43, "grad_norm": 2.2944602966308594, "learning_rate": 1.0143459915611814e-05, "loss": 0.2274, "step": 4300 }, { "epoch": 54.43, "eval_loss": 0.6874995827674866, "eval_runtime": 11.1569, "eval_samples_per_second": 40.334, "eval_steps_per_second": 2.061, "eval_wer": 0.5913971046616333, "step": 4300 }, { "epoch": 55.7, "grad_norm": 2.4684228897094727, "learning_rate": 9.862165963431787e-06, "loss": 0.2178, "step": 4400 }, { "epoch": 55.7, "eval_loss": 0.6899309158325195, "eval_runtime": 11.21, "eval_samples_per_second": 40.143, "eval_steps_per_second": 2.052, "eval_wer": 0.5949296945348757, "step": 4400 }, { "epoch": 56.96, "grad_norm": 2.0573887825012207, "learning_rate": 9.580872011251759e-06, "loss": 0.2176, "step": 4500 }, { "epoch": 56.96, "eval_loss": 0.6924750208854675, "eval_runtime": 11.1413, "eval_samples_per_second": 40.39, "eval_steps_per_second": 2.064, "eval_wer": 0.5828080626168871, "step": 4500 }, { "epoch": 58.23, "grad_norm": 1.9555515050888062, "learning_rate": 9.299578059071732e-06, "loss": 0.2064, "step": 4600 }, { "epoch": 58.23, "eval_loss": 0.7009023427963257, "eval_runtime": 11.0931, "eval_samples_per_second": 40.566, "eval_steps_per_second": 2.073, "eval_wer": 0.598462284408118, "step": 4600 }, { "epoch": 59.49, "grad_norm": 3.208376169204712, "learning_rate": 9.018284106891703e-06, "loss": 0.2081, "step": 4700 }, { "epoch": 59.49, "eval_loss": 0.701277494430542, "eval_runtime": 11.216, "eval_samples_per_second": 40.121, "eval_steps_per_second": 2.051, "eval_wer": 0.5995705478977627, "step": 4700 }, { "epoch": 60.76, "grad_norm": 2.5926976203918457, "learning_rate": 8.736990154711675e-06, "loss": 0.2093, "step": 4800 }, { "epoch": 60.76, "eval_loss": 0.725727379322052, "eval_runtime": 11.1473, "eval_samples_per_second": 40.369, "eval_steps_per_second": 2.063, "eval_wer": 0.6086444552192284, "step": 4800 }, { "epoch": 62.03, "grad_norm": 2.2040176391601562, "learning_rate": 8.455696202531646e-06, "loss": 0.2024, "step": 4900 }, { "epoch": 62.03, "eval_loss": 0.7215314507484436, "eval_runtime": 11.4213, "eval_samples_per_second": 39.4, "eval_steps_per_second": 2.014, "eval_wer": 0.6003324790468934, "step": 4900 }, { "epoch": 63.29, "grad_norm": 1.6947568655014038, "learning_rate": 8.174402250351619e-06, "loss": 0.1999, "step": 5000 }, { "epoch": 63.29, "eval_loss": 0.7332788109779358, "eval_runtime": 11.2504, "eval_samples_per_second": 39.999, "eval_steps_per_second": 2.044, "eval_wer": 0.6090600540278451, "step": 5000 }, { "epoch": 64.56, "grad_norm": 2.657949686050415, "learning_rate": 7.89310829817159e-06, "loss": 0.2064, "step": 5100 }, { "epoch": 64.56, "eval_loss": 0.7529835104942322, "eval_runtime": 11.2168, "eval_samples_per_second": 40.118, "eval_steps_per_second": 2.05, "eval_wer": 0.6397450993973818, "step": 5100 }, { "epoch": 65.82, "grad_norm": 2.161647081375122, "learning_rate": 7.611814345991562e-06, "loss": 0.186, "step": 5200 }, { "epoch": 65.82, "eval_loss": 0.7542085647583008, "eval_runtime": 11.1628, "eval_samples_per_second": 40.312, "eval_steps_per_second": 2.06, "eval_wer": 0.6348964466301863, "step": 5200 }, { "epoch": 67.09, "grad_norm": 1.5503740310668945, "learning_rate": 7.330520393811533e-06, "loss": 0.186, "step": 5300 }, { "epoch": 67.09, "eval_loss": 0.7416096925735474, "eval_runtime": 11.1272, "eval_samples_per_second": 40.441, "eval_steps_per_second": 2.067, "eval_wer": 0.6270000692664681, "step": 5300 }, { "epoch": 68.35, "grad_norm": 2.8439977169036865, "learning_rate": 7.049226441631506e-06, "loss": 0.1807, "step": 5400 }, { "epoch": 68.35, "eval_loss": 0.7548705339431763, "eval_runtime": 11.1833, "eval_samples_per_second": 40.239, "eval_steps_per_second": 2.057, "eval_wer": 0.6352427789707003, "step": 5400 }, { "epoch": 69.62, "grad_norm": 3.1191818714141846, "learning_rate": 6.7679324894514775e-06, "loss": 0.1784, "step": 5500 }, { "epoch": 69.62, "eval_loss": 0.7506438493728638, "eval_runtime": 11.1231, "eval_samples_per_second": 40.456, "eval_steps_per_second": 2.068, "eval_wer": 0.5844011913832514, "step": 5500 }, { "epoch": 70.89, "grad_norm": 2.1088929176330566, "learning_rate": 6.486638537271449e-06, "loss": 0.1824, "step": 5600 }, { "epoch": 70.89, "eval_loss": 0.7611370086669922, "eval_runtime": 11.0844, "eval_samples_per_second": 40.598, "eval_steps_per_second": 2.075, "eval_wer": 0.6252684075638983, "step": 5600 }, { "epoch": 72.15, "grad_norm": 3.0879805088043213, "learning_rate": 6.208157524613221e-06, "loss": 0.1769, "step": 5700 }, { "epoch": 72.15, "eval_loss": 0.771263837814331, "eval_runtime": 11.226, "eval_samples_per_second": 40.085, "eval_steps_per_second": 2.049, "eval_wer": 0.5927131675555863, "step": 5700 }, { "epoch": 73.42, "grad_norm": 2.1844921112060547, "learning_rate": 5.926863572433193e-06, "loss": 0.1843, "step": 5800 }, { "epoch": 73.42, "eval_loss": 0.7719753980636597, "eval_runtime": 11.1675, "eval_samples_per_second": 40.295, "eval_steps_per_second": 2.06, "eval_wer": 0.5955530927478008, "step": 5800 }, { "epoch": 74.68, "grad_norm": 2.3081653118133545, "learning_rate": 5.645569620253165e-06, "loss": 0.1709, "step": 5900 }, { "epoch": 74.68, "eval_loss": 0.7804738879203796, "eval_runtime": 11.2408, "eval_samples_per_second": 40.033, "eval_steps_per_second": 2.046, "eval_wer": 0.6258225393087207, "step": 5900 }, { "epoch": 75.95, "grad_norm": 3.2150704860687256, "learning_rate": 5.364275668073137e-06, "loss": 0.1691, "step": 6000 }, { "epoch": 75.95, "eval_loss": 0.7865281105041504, "eval_runtime": 11.2778, "eval_samples_per_second": 39.901, "eval_steps_per_second": 2.039, "eval_wer": 0.6281775992242156, "step": 6000 }, { "epoch": 77.22, "grad_norm": 1.8031377792358398, "learning_rate": 5.082981715893108e-06, "loss": 0.1701, "step": 6100 }, { "epoch": 77.22, "eval_loss": 0.7807941436767578, "eval_runtime": 11.2568, "eval_samples_per_second": 39.976, "eval_steps_per_second": 2.043, "eval_wer": 0.6218050841587588, "step": 6100 }, { "epoch": 78.48, "grad_norm": 1.8435957431793213, "learning_rate": 4.8016877637130805e-06, "loss": 0.1735, "step": 6200 }, { "epoch": 78.48, "eval_loss": 0.7789934873580933, "eval_runtime": 11.2304, "eval_samples_per_second": 40.07, "eval_steps_per_second": 2.048, "eval_wer": 0.5965920897693426, "step": 6200 }, { "epoch": 79.75, "grad_norm": 1.9381072521209717, "learning_rate": 4.520393811533053e-06, "loss": 0.1746, "step": 6300 }, { "epoch": 79.75, "eval_loss": 0.7949352264404297, "eval_runtime": 11.4313, "eval_samples_per_second": 39.366, "eval_steps_per_second": 2.012, "eval_wer": 0.6430698898663157, "step": 6300 }, { "epoch": 81.01, "grad_norm": 1.8376802206039429, "learning_rate": 4.239099859353024e-06, "loss": 0.1745, "step": 6400 }, { "epoch": 81.01, "eval_loss": 0.8125633001327515, "eval_runtime": 11.456, "eval_samples_per_second": 39.281, "eval_steps_per_second": 2.008, "eval_wer": 0.6284546650966267, "step": 6400 }, { "epoch": 82.28, "grad_norm": 2.187868118286133, "learning_rate": 3.957805907172996e-06, "loss": 0.1605, "step": 6500 }, { "epoch": 82.28, "eval_loss": 0.8113065361976624, "eval_runtime": 11.3144, "eval_samples_per_second": 39.772, "eval_steps_per_second": 2.033, "eval_wer": 0.6194500242432638, "step": 6500 }, { "epoch": 83.54, "grad_norm": 2.5882511138916016, "learning_rate": 3.676511954992968e-06, "loss": 0.1579, "step": 6600 }, { "epoch": 83.54, "eval_loss": 0.7976768612861633, "eval_runtime": 11.4022, "eval_samples_per_second": 39.466, "eval_steps_per_second": 2.017, "eval_wer": 0.6155018355614047, "step": 6600 }, { "epoch": 84.81, "grad_norm": 2.416449546813965, "learning_rate": 3.39521800281294e-06, "loss": 0.1704, "step": 6700 }, { "epoch": 84.81, "eval_loss": 0.8016535043716431, "eval_runtime": 11.3366, "eval_samples_per_second": 39.694, "eval_steps_per_second": 2.029, "eval_wer": 0.6139779732631433, "step": 6700 }, { "epoch": 86.08, "grad_norm": 1.5159286260604858, "learning_rate": 3.1139240506329116e-06, "loss": 0.1659, "step": 6800 }, { "epoch": 86.08, "eval_loss": 0.8146914839744568, "eval_runtime": 11.289, "eval_samples_per_second": 39.862, "eval_steps_per_second": 2.037, "eval_wer": 0.6279005333518044, "step": 6800 }, { "epoch": 87.34, "grad_norm": 2.9167511463165283, "learning_rate": 2.832630098452884e-06, "loss": 0.166, "step": 6900 }, { "epoch": 87.34, "eval_loss": 0.8088270425796509, "eval_runtime": 11.28, "eval_samples_per_second": 39.893, "eval_steps_per_second": 2.039, "eval_wer": 0.6350349795663919, "step": 6900 }, { "epoch": 88.61, "grad_norm": 2.3707916736602783, "learning_rate": 2.5513361462728552e-06, "loss": 0.1539, "step": 7000 }, { "epoch": 88.61, "eval_loss": 0.8052927255630493, "eval_runtime": 11.3196, "eval_samples_per_second": 39.754, "eval_steps_per_second": 2.032, "eval_wer": 0.616402299646741, "step": 7000 }, { "epoch": 89.87, "grad_norm": 2.315516471862793, "learning_rate": 2.270042194092827e-06, "loss": 0.1589, "step": 7100 }, { "epoch": 89.87, "eval_loss": 0.8188755512237549, "eval_runtime": 11.3039, "eval_samples_per_second": 39.809, "eval_steps_per_second": 2.035, "eval_wer": 0.6357276442474198, "step": 7100 }, { "epoch": 91.14, "grad_norm": 1.863142967224121, "learning_rate": 1.9887482419127992e-06, "loss": 0.1559, "step": 7200 }, { "epoch": 91.14, "eval_loss": 0.8152031898498535, "eval_runtime": 11.2054, "eval_samples_per_second": 40.159, "eval_steps_per_second": 2.053, "eval_wer": 0.6258225393087207, "step": 7200 }, { "epoch": 92.41, "grad_norm": 3.034898519515991, "learning_rate": 1.7074542897327708e-06, "loss": 0.1564, "step": 7300 }, { "epoch": 92.41, "eval_loss": 0.8190972208976746, "eval_runtime": 11.3427, "eval_samples_per_second": 39.673, "eval_steps_per_second": 2.028, "eval_wer": 0.6245064764147676, "step": 7300 }, { "epoch": 93.67, "grad_norm": 1.673194408416748, "learning_rate": 1.4261603375527428e-06, "loss": 0.158, "step": 7400 }, { "epoch": 93.67, "eval_loss": 0.8254526853561401, "eval_runtime": 11.282, "eval_samples_per_second": 39.887, "eval_steps_per_second": 2.039, "eval_wer": 0.6333033178638221, "step": 7400 }, { "epoch": 94.94, "grad_norm": 1.6554739475250244, "learning_rate": 1.1448663853727146e-06, "loss": 0.1595, "step": 7500 }, { "epoch": 94.94, "eval_loss": 0.8184179663658142, "eval_runtime": 11.2214, "eval_samples_per_second": 40.102, "eval_steps_per_second": 2.05, "eval_wer": 0.6205582877329086, "step": 7500 }, { "epoch": 96.2, "grad_norm": 2.310234785079956, "learning_rate": 8.635724331926865e-07, "loss": 0.1638, "step": 7600 }, { "epoch": 96.2, "eval_loss": 0.8229891061782837, "eval_runtime": 11.1115, "eval_samples_per_second": 40.499, "eval_steps_per_second": 2.07, "eval_wer": 0.6364203089284477, "step": 7600 }, { "epoch": 97.47, "grad_norm": 1.6313074827194214, "learning_rate": 5.822784810126583e-07, "loss": 0.1629, "step": 7700 }, { "epoch": 97.47, "eval_loss": 0.8244702219963074, "eval_runtime": 11.1778, "eval_samples_per_second": 40.258, "eval_steps_per_second": 2.058, "eval_wer": 0.6312253238207384, "step": 7700 }, { "epoch": 98.73, "grad_norm": 2.2342050075531006, "learning_rate": 3.009845288326301e-07, "loss": 0.1531, "step": 7800 }, { "epoch": 98.73, "eval_loss": 0.8226235508918762, "eval_runtime": 11.0989, "eval_samples_per_second": 40.545, "eval_steps_per_second": 2.072, "eval_wer": 0.626723003394057, "step": 7800 }, { "epoch": 100.0, "grad_norm": 3.3648197650909424, "learning_rate": 1.9690576652601972e-08, "loss": 0.1572, "step": 7900 }, { "epoch": 100.0, "eval_loss": 0.8258158564567566, "eval_runtime": 11.1124, "eval_samples_per_second": 40.495, "eval_steps_per_second": 2.07, "eval_wer": 0.6288702639052435, "step": 7900 }, { "epoch": 100.0, "step": 7900, "total_flos": 3.6740184088961606e+19, "train_loss": 0.6056561310683625, "train_runtime": 25369.8434, "train_samples_per_second": 12.404, "train_steps_per_second": 0.311 } ], "logging_steps": 100, "max_steps": 7900, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 100, "total_flos": 3.6740184088961606e+19, "train_batch_size": 20, "trial_name": null, "trial_params": null }