|
{ |
|
"best_metric": 0.5420793793724458, |
|
"best_model_checkpoint": "md_d_l2_arctic/checkpoint-3700", |
|
"epoch": 100.0, |
|
"eval_steps": 100, |
|
"global_step": 7900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 92.3280258178711, |
|
"learning_rate": 2.4050632911392408e-06, |
|
"loss": 14.3657, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 9.120972633361816, |
|
"eval_runtime": 15.883, |
|
"eval_samples_per_second": 28.332, |
|
"eval_steps_per_second": 1.448, |
|
"eval_wer": 2.4374870125372308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.9874589443206787, |
|
"learning_rate": 4.9113924050632915e-06, |
|
"loss": 4.382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 3.421921730041504, |
|
"eval_runtime": 10.9345, |
|
"eval_samples_per_second": 41.154, |
|
"eval_steps_per_second": 2.103, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 3.151137590408325, |
|
"learning_rate": 7.443037974683544e-06, |
|
"loss": 3.2514, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 2.788123369216919, |
|
"eval_runtime": 10.9315, |
|
"eval_samples_per_second": 41.165, |
|
"eval_steps_per_second": 2.104, |
|
"eval_wer": 0.9979912724250191, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 3.4454073905944824, |
|
"learning_rate": 9.974683544303799e-06, |
|
"loss": 2.4508, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_loss": 1.8000441789627075, |
|
"eval_runtime": 10.9542, |
|
"eval_samples_per_second": 41.08, |
|
"eval_steps_per_second": 2.1, |
|
"eval_wer": 0.7380342176352428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"grad_norm": 2.928997039794922, |
|
"learning_rate": 1.2506329113924051e-05, |
|
"loss": 1.6168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 1.131545901298523, |
|
"eval_runtime": 11.2822, |
|
"eval_samples_per_second": 39.886, |
|
"eval_steps_per_second": 2.039, |
|
"eval_wer": 0.967791092332202, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 2.174651861190796, |
|
"learning_rate": 1.5037974683544306e-05, |
|
"loss": 1.1212, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_loss": 0.8748846054077148, |
|
"eval_runtime": 11.1275, |
|
"eval_samples_per_second": 40.44, |
|
"eval_steps_per_second": 2.067, |
|
"eval_wer": 1.065803144697652, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 7.105869770050049, |
|
"learning_rate": 1.7569620253164558e-05, |
|
"loss": 0.8953, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_loss": 0.7655201554298401, |
|
"eval_runtime": 11.2119, |
|
"eval_samples_per_second": 40.136, |
|
"eval_steps_per_second": 2.051, |
|
"eval_wer": 0.9655052988848098, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"grad_norm": 2.2692487239837646, |
|
"learning_rate": 1.99887482419128e-05, |
|
"loss": 0.7684, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"eval_loss": 0.6687426567077637, |
|
"eval_runtime": 11.2107, |
|
"eval_samples_per_second": 40.14, |
|
"eval_steps_per_second": 2.052, |
|
"eval_wer": 0.7621389485350142, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"grad_norm": 3.137871742248535, |
|
"learning_rate": 1.970745428973277e-05, |
|
"loss": 0.6661, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"eval_loss": 0.6319410800933838, |
|
"eval_runtime": 11.2724, |
|
"eval_samples_per_second": 39.921, |
|
"eval_steps_per_second": 2.04, |
|
"eval_wer": 0.6755558634065248, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"grad_norm": 2.894819498062134, |
|
"learning_rate": 1.9426160337552744e-05, |
|
"loss": 0.6306, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"eval_loss": 0.6195651292800903, |
|
"eval_runtime": 11.3009, |
|
"eval_samples_per_second": 39.82, |
|
"eval_steps_per_second": 2.035, |
|
"eval_wer": 0.6963358038373624, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"grad_norm": 3.4255077838897705, |
|
"learning_rate": 1.9144866385372717e-05, |
|
"loss": 0.5759, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"eval_loss": 0.5875259637832642, |
|
"eval_runtime": 11.1702, |
|
"eval_samples_per_second": 40.286, |
|
"eval_steps_per_second": 2.059, |
|
"eval_wer": 0.5965228233012398, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"grad_norm": 2.189448118209839, |
|
"learning_rate": 1.8863572433192687e-05, |
|
"loss": 0.5417, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"eval_loss": 0.5779715776443481, |
|
"eval_runtime": 11.2889, |
|
"eval_samples_per_second": 39.862, |
|
"eval_steps_per_second": 2.037, |
|
"eval_wer": 0.6528364618688093, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"grad_norm": 6.146909713745117, |
|
"learning_rate": 1.858227848101266e-05, |
|
"loss": 0.528, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"eval_loss": 0.5798078179359436, |
|
"eval_runtime": 11.2225, |
|
"eval_samples_per_second": 40.098, |
|
"eval_steps_per_second": 2.049, |
|
"eval_wer": 0.6539447253584539, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"grad_norm": 3.1426494121551514, |
|
"learning_rate": 1.8300984528832633e-05, |
|
"loss": 0.4857, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"eval_loss": 0.5568873286247253, |
|
"eval_runtime": 11.201, |
|
"eval_samples_per_second": 40.175, |
|
"eval_steps_per_second": 2.053, |
|
"eval_wer": 0.5724873588695713, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"grad_norm": 1.9612127542495728, |
|
"learning_rate": 1.8019690576652603e-05, |
|
"loss": 0.4655, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_loss": 0.549960196018219, |
|
"eval_runtime": 11.3494, |
|
"eval_samples_per_second": 39.65, |
|
"eval_steps_per_second": 2.027, |
|
"eval_wer": 0.575535083466094, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"grad_norm": 2.946164131164551, |
|
"learning_rate": 1.7738396624472576e-05, |
|
"loss": 0.4526, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"eval_loss": 0.5582863092422485, |
|
"eval_runtime": 11.3026, |
|
"eval_samples_per_second": 39.814, |
|
"eval_steps_per_second": 2.035, |
|
"eval_wer": 0.5776130775091778, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"grad_norm": 2.338858127593994, |
|
"learning_rate": 1.745710267229255e-05, |
|
"loss": 0.4287, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"eval_loss": 0.555654764175415, |
|
"eval_runtime": 11.2714, |
|
"eval_samples_per_second": 39.924, |
|
"eval_steps_per_second": 2.041, |
|
"eval_wer": 0.5609891251645078, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"grad_norm": 2.5261471271514893, |
|
"learning_rate": 1.717580872011252e-05, |
|
"loss": 0.4149, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"eval_loss": 0.5575445294380188, |
|
"eval_runtime": 11.3966, |
|
"eval_samples_per_second": 39.486, |
|
"eval_steps_per_second": 2.018, |
|
"eval_wer": 0.5748424187850661, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"grad_norm": 3.1321146488189697, |
|
"learning_rate": 1.689451476793249e-05, |
|
"loss": 0.3983, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"eval_loss": 0.5648804306983948, |
|
"eval_runtime": 11.191, |
|
"eval_samples_per_second": 40.211, |
|
"eval_steps_per_second": 2.055, |
|
"eval_wer": 0.6003324790468934, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"grad_norm": 2.3534066677093506, |
|
"learning_rate": 1.661322081575246e-05, |
|
"loss": 0.4001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"eval_loss": 0.567441463470459, |
|
"eval_runtime": 11.1757, |
|
"eval_samples_per_second": 40.266, |
|
"eval_steps_per_second": 2.058, |
|
"eval_wer": 0.5976310867908845, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 26.58, |
|
"grad_norm": 2.417196750640869, |
|
"learning_rate": 1.6331926863572434e-05, |
|
"loss": 0.3649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 26.58, |
|
"eval_loss": 0.579703152179718, |
|
"eval_runtime": 11.126, |
|
"eval_samples_per_second": 40.446, |
|
"eval_steps_per_second": 2.067, |
|
"eval_wer": 0.5804530027013922, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 27.85, |
|
"grad_norm": 2.8033077716827393, |
|
"learning_rate": 1.6050632911392404e-05, |
|
"loss": 0.3711, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 27.85, |
|
"eval_loss": 0.5839091539382935, |
|
"eval_runtime": 11.2012, |
|
"eval_samples_per_second": 40.174, |
|
"eval_steps_per_second": 2.053, |
|
"eval_wer": 0.6546373900394818, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 29.11, |
|
"grad_norm": 2.2273740768432617, |
|
"learning_rate": 1.576933895921238e-05, |
|
"loss": 0.3547, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 29.11, |
|
"eval_loss": 0.5734866261482239, |
|
"eval_runtime": 11.079, |
|
"eval_samples_per_second": 40.617, |
|
"eval_steps_per_second": 2.076, |
|
"eval_wer": 0.5904273741081942, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 30.38, |
|
"grad_norm": 2.2852895259857178, |
|
"learning_rate": 1.548804500703235e-05, |
|
"loss": 0.3402, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 30.38, |
|
"eval_loss": 0.5698839426040649, |
|
"eval_runtime": 11.0702, |
|
"eval_samples_per_second": 40.65, |
|
"eval_steps_per_second": 2.078, |
|
"eval_wer": 0.5426335111172681, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"grad_norm": 1.869658350944519, |
|
"learning_rate": 1.5206751054852323e-05, |
|
"loss": 0.3414, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"eval_loss": 0.5700486302375793, |
|
"eval_runtime": 11.3836, |
|
"eval_samples_per_second": 39.531, |
|
"eval_steps_per_second": 2.02, |
|
"eval_wer": 0.5421486458405486, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.91, |
|
"grad_norm": 1.7649214267730713, |
|
"learning_rate": 1.4925457102672294e-05, |
|
"loss": 0.3255, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 32.91, |
|
"eval_loss": 0.5744786262512207, |
|
"eval_runtime": 11.2135, |
|
"eval_samples_per_second": 40.13, |
|
"eval_steps_per_second": 2.051, |
|
"eval_wer": 0.5663226432084228, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 34.18, |
|
"grad_norm": 2.0651187896728516, |
|
"learning_rate": 1.4644163150492266e-05, |
|
"loss": 0.3093, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 34.18, |
|
"eval_loss": 0.5957615971565247, |
|
"eval_runtime": 11.1504, |
|
"eval_samples_per_second": 40.357, |
|
"eval_steps_per_second": 2.063, |
|
"eval_wer": 0.5931980328323059, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 35.44, |
|
"grad_norm": 2.1554198265075684, |
|
"learning_rate": 1.4362869198312237e-05, |
|
"loss": 0.315, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 35.44, |
|
"eval_loss": 0.5933964848518372, |
|
"eval_runtime": 11.202, |
|
"eval_samples_per_second": 40.171, |
|
"eval_steps_per_second": 2.053, |
|
"eval_wer": 0.5905659070443998, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"grad_norm": 2.880059003829956, |
|
"learning_rate": 1.4081575246132208e-05, |
|
"loss": 0.31, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"eval_loss": 0.6071695685386658, |
|
"eval_runtime": 11.2335, |
|
"eval_samples_per_second": 40.059, |
|
"eval_steps_per_second": 2.047, |
|
"eval_wer": 0.601094410196024, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"grad_norm": 2.7784523963928223, |
|
"learning_rate": 1.380028129395218e-05, |
|
"loss": 0.3026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_loss": 0.6038002371788025, |
|
"eval_runtime": 11.1544, |
|
"eval_samples_per_second": 40.343, |
|
"eval_steps_per_second": 2.062, |
|
"eval_wer": 0.5760199487428136, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"grad_norm": 2.2978756427764893, |
|
"learning_rate": 1.3518987341772155e-05, |
|
"loss": 0.2802, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"eval_loss": 0.6079789400100708, |
|
"eval_runtime": 11.1769, |
|
"eval_samples_per_second": 40.262, |
|
"eval_steps_per_second": 2.058, |
|
"eval_wer": 0.5776823439772806, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 40.51, |
|
"grad_norm": 2.1417360305786133, |
|
"learning_rate": 1.3237693389592126e-05, |
|
"loss": 0.2835, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 40.51, |
|
"eval_loss": 0.6061974167823792, |
|
"eval_runtime": 11.1226, |
|
"eval_samples_per_second": 40.458, |
|
"eval_steps_per_second": 2.068, |
|
"eval_wer": 0.5743575535083466, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"grad_norm": 1.6934860944747925, |
|
"learning_rate": 1.2956399437412097e-05, |
|
"loss": 0.2585, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"eval_loss": 0.6224856972694397, |
|
"eval_runtime": 11.0743, |
|
"eval_samples_per_second": 40.634, |
|
"eval_steps_per_second": 2.077, |
|
"eval_wer": 0.5784442751264113, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"grad_norm": 1.937164068222046, |
|
"learning_rate": 1.2675105485232069e-05, |
|
"loss": 0.2699, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"eval_loss": 0.6225900650024414, |
|
"eval_runtime": 11.1534, |
|
"eval_samples_per_second": 40.347, |
|
"eval_steps_per_second": 2.062, |
|
"eval_wer": 0.5664611761446284, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 44.3, |
|
"grad_norm": 2.5404953956604004, |
|
"learning_rate": 1.239381153305204e-05, |
|
"loss": 0.2785, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 44.3, |
|
"eval_loss": 0.6240466833114624, |
|
"eval_runtime": 11.1649, |
|
"eval_samples_per_second": 40.305, |
|
"eval_steps_per_second": 2.06, |
|
"eval_wer": 0.5713790953799266, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"grad_norm": 5.070058822631836, |
|
"learning_rate": 1.2112517580872011e-05, |
|
"loss": 0.2689, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"eval_loss": 0.6294780969619751, |
|
"eval_runtime": 11.118, |
|
"eval_samples_per_second": 40.475, |
|
"eval_steps_per_second": 2.069, |
|
"eval_wer": 0.5648680473782642, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 46.84, |
|
"grad_norm": 2.5456833839416504, |
|
"learning_rate": 1.1831223628691983e-05, |
|
"loss": 0.2514, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 46.84, |
|
"eval_loss": 0.6424580812454224, |
|
"eval_runtime": 11.271, |
|
"eval_samples_per_second": 39.926, |
|
"eval_steps_per_second": 2.041, |
|
"eval_wer": 0.5420793793724458, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 48.1, |
|
"grad_norm": 2.153717279434204, |
|
"learning_rate": 1.1549929676511956e-05, |
|
"loss": 0.2433, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 48.1, |
|
"eval_loss": 0.6667928099632263, |
|
"eval_runtime": 11.226, |
|
"eval_samples_per_second": 40.086, |
|
"eval_steps_per_second": 2.049, |
|
"eval_wer": 0.606774260580453, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 49.37, |
|
"grad_norm": 2.5784971714019775, |
|
"learning_rate": 1.1268635724331929e-05, |
|
"loss": 0.2403, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 49.37, |
|
"eval_loss": 0.6562526226043701, |
|
"eval_runtime": 11.112, |
|
"eval_samples_per_second": 40.497, |
|
"eval_steps_per_second": 2.07, |
|
"eval_wer": 0.5749809517212717, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 50.63, |
|
"grad_norm": 2.656663656234741, |
|
"learning_rate": 1.09873417721519e-05, |
|
"loss": 0.2287, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 50.63, |
|
"eval_loss": 0.6695858240127563, |
|
"eval_runtime": 11.0907, |
|
"eval_samples_per_second": 40.575, |
|
"eval_steps_per_second": 2.074, |
|
"eval_wer": 0.5932672993004087, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 51.9, |
|
"grad_norm": 2.9128212928771973, |
|
"learning_rate": 1.0706047819971872e-05, |
|
"loss": 0.2366, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 51.9, |
|
"eval_loss": 0.6738879680633545, |
|
"eval_runtime": 11.1372, |
|
"eval_samples_per_second": 40.405, |
|
"eval_steps_per_second": 2.065, |
|
"eval_wer": 0.5731107570824964, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 53.16, |
|
"grad_norm": 1.8941991329193115, |
|
"learning_rate": 1.0424753867791843e-05, |
|
"loss": 0.2295, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 53.16, |
|
"eval_loss": 0.680944561958313, |
|
"eval_runtime": 11.1339, |
|
"eval_samples_per_second": 40.417, |
|
"eval_steps_per_second": 2.066, |
|
"eval_wer": 0.6090600540278451, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 54.43, |
|
"grad_norm": 2.2944602966308594, |
|
"learning_rate": 1.0143459915611814e-05, |
|
"loss": 0.2274, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 54.43, |
|
"eval_loss": 0.6874995827674866, |
|
"eval_runtime": 11.1569, |
|
"eval_samples_per_second": 40.334, |
|
"eval_steps_per_second": 2.061, |
|
"eval_wer": 0.5913971046616333, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 55.7, |
|
"grad_norm": 2.4684228897094727, |
|
"learning_rate": 9.862165963431787e-06, |
|
"loss": 0.2178, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 55.7, |
|
"eval_loss": 0.6899309158325195, |
|
"eval_runtime": 11.21, |
|
"eval_samples_per_second": 40.143, |
|
"eval_steps_per_second": 2.052, |
|
"eval_wer": 0.5949296945348757, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"grad_norm": 2.0573887825012207, |
|
"learning_rate": 9.580872011251759e-06, |
|
"loss": 0.2176, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"eval_loss": 0.6924750208854675, |
|
"eval_runtime": 11.1413, |
|
"eval_samples_per_second": 40.39, |
|
"eval_steps_per_second": 2.064, |
|
"eval_wer": 0.5828080626168871, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 58.23, |
|
"grad_norm": 1.9555515050888062, |
|
"learning_rate": 9.299578059071732e-06, |
|
"loss": 0.2064, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 58.23, |
|
"eval_loss": 0.7009023427963257, |
|
"eval_runtime": 11.0931, |
|
"eval_samples_per_second": 40.566, |
|
"eval_steps_per_second": 2.073, |
|
"eval_wer": 0.598462284408118, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 59.49, |
|
"grad_norm": 3.208376169204712, |
|
"learning_rate": 9.018284106891703e-06, |
|
"loss": 0.2081, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 59.49, |
|
"eval_loss": 0.701277494430542, |
|
"eval_runtime": 11.216, |
|
"eval_samples_per_second": 40.121, |
|
"eval_steps_per_second": 2.051, |
|
"eval_wer": 0.5995705478977627, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 60.76, |
|
"grad_norm": 2.5926976203918457, |
|
"learning_rate": 8.736990154711675e-06, |
|
"loss": 0.2093, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 60.76, |
|
"eval_loss": 0.725727379322052, |
|
"eval_runtime": 11.1473, |
|
"eval_samples_per_second": 40.369, |
|
"eval_steps_per_second": 2.063, |
|
"eval_wer": 0.6086444552192284, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 62.03, |
|
"grad_norm": 2.2040176391601562, |
|
"learning_rate": 8.455696202531646e-06, |
|
"loss": 0.2024, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 62.03, |
|
"eval_loss": 0.7215314507484436, |
|
"eval_runtime": 11.4213, |
|
"eval_samples_per_second": 39.4, |
|
"eval_steps_per_second": 2.014, |
|
"eval_wer": 0.6003324790468934, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 63.29, |
|
"grad_norm": 1.6947568655014038, |
|
"learning_rate": 8.174402250351619e-06, |
|
"loss": 0.1999, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 63.29, |
|
"eval_loss": 0.7332788109779358, |
|
"eval_runtime": 11.2504, |
|
"eval_samples_per_second": 39.999, |
|
"eval_steps_per_second": 2.044, |
|
"eval_wer": 0.6090600540278451, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 64.56, |
|
"grad_norm": 2.657949686050415, |
|
"learning_rate": 7.89310829817159e-06, |
|
"loss": 0.2064, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 64.56, |
|
"eval_loss": 0.7529835104942322, |
|
"eval_runtime": 11.2168, |
|
"eval_samples_per_second": 40.118, |
|
"eval_steps_per_second": 2.05, |
|
"eval_wer": 0.6397450993973818, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 65.82, |
|
"grad_norm": 2.161647081375122, |
|
"learning_rate": 7.611814345991562e-06, |
|
"loss": 0.186, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 65.82, |
|
"eval_loss": 0.7542085647583008, |
|
"eval_runtime": 11.1628, |
|
"eval_samples_per_second": 40.312, |
|
"eval_steps_per_second": 2.06, |
|
"eval_wer": 0.6348964466301863, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 67.09, |
|
"grad_norm": 1.5503740310668945, |
|
"learning_rate": 7.330520393811533e-06, |
|
"loss": 0.186, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 67.09, |
|
"eval_loss": 0.7416096925735474, |
|
"eval_runtime": 11.1272, |
|
"eval_samples_per_second": 40.441, |
|
"eval_steps_per_second": 2.067, |
|
"eval_wer": 0.6270000692664681, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 68.35, |
|
"grad_norm": 2.8439977169036865, |
|
"learning_rate": 7.049226441631506e-06, |
|
"loss": 0.1807, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 68.35, |
|
"eval_loss": 0.7548705339431763, |
|
"eval_runtime": 11.1833, |
|
"eval_samples_per_second": 40.239, |
|
"eval_steps_per_second": 2.057, |
|
"eval_wer": 0.6352427789707003, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 69.62, |
|
"grad_norm": 3.1191818714141846, |
|
"learning_rate": 6.7679324894514775e-06, |
|
"loss": 0.1784, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 69.62, |
|
"eval_loss": 0.7506438493728638, |
|
"eval_runtime": 11.1231, |
|
"eval_samples_per_second": 40.456, |
|
"eval_steps_per_second": 2.068, |
|
"eval_wer": 0.5844011913832514, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 70.89, |
|
"grad_norm": 2.1088929176330566, |
|
"learning_rate": 6.486638537271449e-06, |
|
"loss": 0.1824, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 70.89, |
|
"eval_loss": 0.7611370086669922, |
|
"eval_runtime": 11.0844, |
|
"eval_samples_per_second": 40.598, |
|
"eval_steps_per_second": 2.075, |
|
"eval_wer": 0.6252684075638983, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 72.15, |
|
"grad_norm": 3.0879805088043213, |
|
"learning_rate": 6.208157524613221e-06, |
|
"loss": 0.1769, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 72.15, |
|
"eval_loss": 0.771263837814331, |
|
"eval_runtime": 11.226, |
|
"eval_samples_per_second": 40.085, |
|
"eval_steps_per_second": 2.049, |
|
"eval_wer": 0.5927131675555863, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 73.42, |
|
"grad_norm": 2.1844921112060547, |
|
"learning_rate": 5.926863572433193e-06, |
|
"loss": 0.1843, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 73.42, |
|
"eval_loss": 0.7719753980636597, |
|
"eval_runtime": 11.1675, |
|
"eval_samples_per_second": 40.295, |
|
"eval_steps_per_second": 2.06, |
|
"eval_wer": 0.5955530927478008, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 74.68, |
|
"grad_norm": 2.3081653118133545, |
|
"learning_rate": 5.645569620253165e-06, |
|
"loss": 0.1709, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 74.68, |
|
"eval_loss": 0.7804738879203796, |
|
"eval_runtime": 11.2408, |
|
"eval_samples_per_second": 40.033, |
|
"eval_steps_per_second": 2.046, |
|
"eval_wer": 0.6258225393087207, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 75.95, |
|
"grad_norm": 3.2150704860687256, |
|
"learning_rate": 5.364275668073137e-06, |
|
"loss": 0.1691, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 75.95, |
|
"eval_loss": 0.7865281105041504, |
|
"eval_runtime": 11.2778, |
|
"eval_samples_per_second": 39.901, |
|
"eval_steps_per_second": 2.039, |
|
"eval_wer": 0.6281775992242156, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 77.22, |
|
"grad_norm": 1.8031377792358398, |
|
"learning_rate": 5.082981715893108e-06, |
|
"loss": 0.1701, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 77.22, |
|
"eval_loss": 0.7807941436767578, |
|
"eval_runtime": 11.2568, |
|
"eval_samples_per_second": 39.976, |
|
"eval_steps_per_second": 2.043, |
|
"eval_wer": 0.6218050841587588, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 78.48, |
|
"grad_norm": 1.8435957431793213, |
|
"learning_rate": 4.8016877637130805e-06, |
|
"loss": 0.1735, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 78.48, |
|
"eval_loss": 0.7789934873580933, |
|
"eval_runtime": 11.2304, |
|
"eval_samples_per_second": 40.07, |
|
"eval_steps_per_second": 2.048, |
|
"eval_wer": 0.5965920897693426, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 79.75, |
|
"grad_norm": 1.9381072521209717, |
|
"learning_rate": 4.520393811533053e-06, |
|
"loss": 0.1746, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 79.75, |
|
"eval_loss": 0.7949352264404297, |
|
"eval_runtime": 11.4313, |
|
"eval_samples_per_second": 39.366, |
|
"eval_steps_per_second": 2.012, |
|
"eval_wer": 0.6430698898663157, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 81.01, |
|
"grad_norm": 1.8376802206039429, |
|
"learning_rate": 4.239099859353024e-06, |
|
"loss": 0.1745, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 81.01, |
|
"eval_loss": 0.8125633001327515, |
|
"eval_runtime": 11.456, |
|
"eval_samples_per_second": 39.281, |
|
"eval_steps_per_second": 2.008, |
|
"eval_wer": 0.6284546650966267, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 82.28, |
|
"grad_norm": 2.187868118286133, |
|
"learning_rate": 3.957805907172996e-06, |
|
"loss": 0.1605, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 82.28, |
|
"eval_loss": 0.8113065361976624, |
|
"eval_runtime": 11.3144, |
|
"eval_samples_per_second": 39.772, |
|
"eval_steps_per_second": 2.033, |
|
"eval_wer": 0.6194500242432638, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 83.54, |
|
"grad_norm": 2.5882511138916016, |
|
"learning_rate": 3.676511954992968e-06, |
|
"loss": 0.1579, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 83.54, |
|
"eval_loss": 0.7976768612861633, |
|
"eval_runtime": 11.4022, |
|
"eval_samples_per_second": 39.466, |
|
"eval_steps_per_second": 2.017, |
|
"eval_wer": 0.6155018355614047, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 84.81, |
|
"grad_norm": 2.416449546813965, |
|
"learning_rate": 3.39521800281294e-06, |
|
"loss": 0.1704, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 84.81, |
|
"eval_loss": 0.8016535043716431, |
|
"eval_runtime": 11.3366, |
|
"eval_samples_per_second": 39.694, |
|
"eval_steps_per_second": 2.029, |
|
"eval_wer": 0.6139779732631433, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 86.08, |
|
"grad_norm": 1.5159286260604858, |
|
"learning_rate": 3.1139240506329116e-06, |
|
"loss": 0.1659, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 86.08, |
|
"eval_loss": 0.8146914839744568, |
|
"eval_runtime": 11.289, |
|
"eval_samples_per_second": 39.862, |
|
"eval_steps_per_second": 2.037, |
|
"eval_wer": 0.6279005333518044, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 87.34, |
|
"grad_norm": 2.9167511463165283, |
|
"learning_rate": 2.832630098452884e-06, |
|
"loss": 0.166, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 87.34, |
|
"eval_loss": 0.8088270425796509, |
|
"eval_runtime": 11.28, |
|
"eval_samples_per_second": 39.893, |
|
"eval_steps_per_second": 2.039, |
|
"eval_wer": 0.6350349795663919, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 88.61, |
|
"grad_norm": 2.3707916736602783, |
|
"learning_rate": 2.5513361462728552e-06, |
|
"loss": 0.1539, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 88.61, |
|
"eval_loss": 0.8052927255630493, |
|
"eval_runtime": 11.3196, |
|
"eval_samples_per_second": 39.754, |
|
"eval_steps_per_second": 2.032, |
|
"eval_wer": 0.616402299646741, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 89.87, |
|
"grad_norm": 2.315516471862793, |
|
"learning_rate": 2.270042194092827e-06, |
|
"loss": 0.1589, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 89.87, |
|
"eval_loss": 0.8188755512237549, |
|
"eval_runtime": 11.3039, |
|
"eval_samples_per_second": 39.809, |
|
"eval_steps_per_second": 2.035, |
|
"eval_wer": 0.6357276442474198, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 91.14, |
|
"grad_norm": 1.863142967224121, |
|
"learning_rate": 1.9887482419127992e-06, |
|
"loss": 0.1559, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 91.14, |
|
"eval_loss": 0.8152031898498535, |
|
"eval_runtime": 11.2054, |
|
"eval_samples_per_second": 40.159, |
|
"eval_steps_per_second": 2.053, |
|
"eval_wer": 0.6258225393087207, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 92.41, |
|
"grad_norm": 3.034898519515991, |
|
"learning_rate": 1.7074542897327708e-06, |
|
"loss": 0.1564, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 92.41, |
|
"eval_loss": 0.8190972208976746, |
|
"eval_runtime": 11.3427, |
|
"eval_samples_per_second": 39.673, |
|
"eval_steps_per_second": 2.028, |
|
"eval_wer": 0.6245064764147676, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 93.67, |
|
"grad_norm": 1.673194408416748, |
|
"learning_rate": 1.4261603375527428e-06, |
|
"loss": 0.158, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 93.67, |
|
"eval_loss": 0.8254526853561401, |
|
"eval_runtime": 11.282, |
|
"eval_samples_per_second": 39.887, |
|
"eval_steps_per_second": 2.039, |
|
"eval_wer": 0.6333033178638221, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 94.94, |
|
"grad_norm": 1.6554739475250244, |
|
"learning_rate": 1.1448663853727146e-06, |
|
"loss": 0.1595, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 94.94, |
|
"eval_loss": 0.8184179663658142, |
|
"eval_runtime": 11.2214, |
|
"eval_samples_per_second": 40.102, |
|
"eval_steps_per_second": 2.05, |
|
"eval_wer": 0.6205582877329086, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 96.2, |
|
"grad_norm": 2.310234785079956, |
|
"learning_rate": 8.635724331926865e-07, |
|
"loss": 0.1638, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 96.2, |
|
"eval_loss": 0.8229891061782837, |
|
"eval_runtime": 11.1115, |
|
"eval_samples_per_second": 40.499, |
|
"eval_steps_per_second": 2.07, |
|
"eval_wer": 0.6364203089284477, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 97.47, |
|
"grad_norm": 1.6313074827194214, |
|
"learning_rate": 5.822784810126583e-07, |
|
"loss": 0.1629, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 97.47, |
|
"eval_loss": 0.8244702219963074, |
|
"eval_runtime": 11.1778, |
|
"eval_samples_per_second": 40.258, |
|
"eval_steps_per_second": 2.058, |
|
"eval_wer": 0.6312253238207384, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 98.73, |
|
"grad_norm": 2.2342050075531006, |
|
"learning_rate": 3.009845288326301e-07, |
|
"loss": 0.1531, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 98.73, |
|
"eval_loss": 0.8226235508918762, |
|
"eval_runtime": 11.0989, |
|
"eval_samples_per_second": 40.545, |
|
"eval_steps_per_second": 2.072, |
|
"eval_wer": 0.626723003394057, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 3.3648197650909424, |
|
"learning_rate": 1.9690576652601972e-08, |
|
"loss": 0.1572, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 0.8258158564567566, |
|
"eval_runtime": 11.1124, |
|
"eval_samples_per_second": 40.495, |
|
"eval_steps_per_second": 2.07, |
|
"eval_wer": 0.6288702639052435, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 7900, |
|
"total_flos": 3.6740184088961606e+19, |
|
"train_loss": 0.6056561310683625, |
|
"train_runtime": 25369.8434, |
|
"train_samples_per_second": 12.404, |
|
"train_steps_per_second": 0.311 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 100, |
|
"total_flos": 3.6740184088961606e+19, |
|
"train_batch_size": 20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|