wav2vec2-large-xls-r-300m-sinhala-aug-data-with-original-split-part3
/
last-checkpoint
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 20.352781546811396, | |
"eval_steps": 400, | |
"global_step": 30000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.27, | |
"grad_norm": 4.026116371154785, | |
"learning_rate": 0.00023999999999999998, | |
"loss": 5.8396, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.27, | |
"eval_loss": 0.9751555919647217, | |
"eval_runtime": 211.3976, | |
"eval_samples_per_second": 6.618, | |
"eval_steps_per_second": 0.828, | |
"eval_wer": 0.8373902326297951, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.54, | |
"grad_norm": 1.9866678714752197, | |
"learning_rate": 0.00029794144556267153, | |
"loss": 0.6967, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.54, | |
"eval_loss": 0.38119566440582275, | |
"eval_runtime": 210.7469, | |
"eval_samples_per_second": 6.638, | |
"eval_steps_per_second": 0.83, | |
"eval_wer": 0.5935140964412263, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.81, | |
"grad_norm": 1.540819525718689, | |
"learning_rate": 0.00029519670631290027, | |
"loss": 0.4806, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.81, | |
"eval_loss": 0.3368071913719177, | |
"eval_runtime": 211.9206, | |
"eval_samples_per_second": 6.602, | |
"eval_steps_per_second": 0.826, | |
"eval_wer": 0.4757356339547065, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 1.09, | |
"grad_norm": 0.9303820729255676, | |
"learning_rate": 0.000292451967063129, | |
"loss": 0.3996, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 1.09, | |
"eval_loss": 0.199427992105484, | |
"eval_runtime": 210.2196, | |
"eval_samples_per_second": 6.655, | |
"eval_steps_per_second": 0.832, | |
"eval_wer": 0.3142813125866584, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 1.36, | |
"grad_norm": 0.5186350345611572, | |
"learning_rate": 0.0002897072278133577, | |
"loss": 0.3497, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 1.36, | |
"eval_loss": 0.16836047172546387, | |
"eval_runtime": 210.345, | |
"eval_samples_per_second": 6.651, | |
"eval_steps_per_second": 0.832, | |
"eval_wer": 0.2564319827453397, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 1.63, | |
"grad_norm": 0.9710769057273865, | |
"learning_rate": 0.00028696248856358643, | |
"loss": 0.3372, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 1.63, | |
"eval_loss": 0.15862207114696503, | |
"eval_runtime": 212.5997, | |
"eval_samples_per_second": 6.58, | |
"eval_steps_per_second": 0.823, | |
"eval_wer": 0.2418733631181636, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 1.9, | |
"grad_norm": 0.8559458255767822, | |
"learning_rate": 0.00028421774931381517, | |
"loss": 0.312, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 1.9, | |
"eval_loss": 0.1412586122751236, | |
"eval_runtime": 211.389, | |
"eval_samples_per_second": 6.618, | |
"eval_steps_per_second": 0.828, | |
"eval_wer": 0.2226929594823602, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 2.17, | |
"grad_norm": 0.6652762293815613, | |
"learning_rate": 0.0002814730100640439, | |
"loss": 0.2797, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 2.17, | |
"eval_loss": 0.1465640813112259, | |
"eval_runtime": 210.5595, | |
"eval_samples_per_second": 6.644, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.22731474349098751, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 2.44, | |
"grad_norm": 0.361288458108902, | |
"learning_rate": 0.0002787282708142726, | |
"loss": 0.2554, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 2.44, | |
"eval_loss": 0.1542745977640152, | |
"eval_runtime": 211.2213, | |
"eval_samples_per_second": 6.623, | |
"eval_steps_per_second": 0.829, | |
"eval_wer": 0.2365583115082422, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 2.71, | |
"grad_norm": 0.5574541091918945, | |
"learning_rate": 0.00027598353156450134, | |
"loss": 0.2613, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 2.71, | |
"eval_loss": 0.14501063525676727, | |
"eval_runtime": 212.1205, | |
"eval_samples_per_second": 6.595, | |
"eval_steps_per_second": 0.825, | |
"eval_wer": 0.23255276536743183, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 2.99, | |
"grad_norm": 0.4116342067718506, | |
"learning_rate": 0.0002732387923147301, | |
"loss": 0.2399, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 2.99, | |
"eval_loss": 0.12376150488853455, | |
"eval_runtime": 212.9821, | |
"eval_samples_per_second": 6.569, | |
"eval_steps_per_second": 0.822, | |
"eval_wer": 0.20297334771221692, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 3.26, | |
"grad_norm": 0.834938645362854, | |
"learning_rate": 0.0002704940530649588, | |
"loss": 0.2125, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 3.26, | |
"eval_loss": 0.09888482838869095, | |
"eval_runtime": 213.2706, | |
"eval_samples_per_second": 6.56, | |
"eval_steps_per_second": 0.821, | |
"eval_wer": 0.16099214296718534, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 3.53, | |
"grad_norm": 0.4636085331439972, | |
"learning_rate": 0.0002677493138151875, | |
"loss": 0.2144, | |
"step": 5200 | |
}, | |
{ | |
"epoch": 3.53, | |
"eval_loss": 0.09842105209827423, | |
"eval_runtime": 210.8983, | |
"eval_samples_per_second": 6.634, | |
"eval_steps_per_second": 0.83, | |
"eval_wer": 0.1612232321676167, | |
"step": 5200 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 0.4843950569629669, | |
"learning_rate": 0.0002650045745654163, | |
"loss": 0.212, | |
"step": 5600 | |
}, | |
{ | |
"epoch": 3.8, | |
"eval_loss": 0.08756741881370544, | |
"eval_runtime": 210.9696, | |
"eval_samples_per_second": 6.631, | |
"eval_steps_per_second": 0.83, | |
"eval_wer": 0.1507471884147281, | |
"step": 5600 | |
}, | |
{ | |
"epoch": 4.07, | |
"grad_norm": 0.847865641117096, | |
"learning_rate": 0.000262259835315645, | |
"loss": 0.1964, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 4.07, | |
"eval_loss": 0.1017051413655281, | |
"eval_runtime": 211.736, | |
"eval_samples_per_second": 6.607, | |
"eval_steps_per_second": 0.827, | |
"eval_wer": 0.17531967339393006, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 4.34, | |
"grad_norm": 0.35252293944358826, | |
"learning_rate": 0.0002595150960658737, | |
"loss": 0.1814, | |
"step": 6400 | |
}, | |
{ | |
"epoch": 4.34, | |
"eval_loss": 0.0966610386967659, | |
"eval_runtime": 211.5792, | |
"eval_samples_per_second": 6.612, | |
"eval_steps_per_second": 0.827, | |
"eval_wer": 0.1653828377753813, | |
"step": 6400 | |
}, | |
{ | |
"epoch": 4.61, | |
"grad_norm": 0.38534751534461975, | |
"learning_rate": 0.00025677035681610246, | |
"loss": 0.1772, | |
"step": 6800 | |
}, | |
{ | |
"epoch": 4.61, | |
"eval_loss": 0.0955633744597435, | |
"eval_runtime": 211.2723, | |
"eval_samples_per_second": 6.622, | |
"eval_steps_per_second": 0.828, | |
"eval_wer": 0.16314897550454474, | |
"step": 6800 | |
}, | |
{ | |
"epoch": 4.88, | |
"grad_norm": 0.3151361346244812, | |
"learning_rate": 0.0002540256175663312, | |
"loss": 0.1748, | |
"step": 7200 | |
}, | |
{ | |
"epoch": 4.88, | |
"eval_loss": 0.08700825273990631, | |
"eval_runtime": 213.02, | |
"eval_samples_per_second": 6.567, | |
"eval_steps_per_second": 0.822, | |
"eval_wer": 0.14828223694346018, | |
"step": 7200 | |
}, | |
{ | |
"epoch": 5.16, | |
"grad_norm": 1.1274446249008179, | |
"learning_rate": 0.0002512808783165599, | |
"loss": 0.1706, | |
"step": 7600 | |
}, | |
{ | |
"epoch": 5.16, | |
"eval_loss": 0.07706322520971298, | |
"eval_runtime": 212.6781, | |
"eval_samples_per_second": 6.578, | |
"eval_steps_per_second": 0.823, | |
"eval_wer": 0.1306424279771992, | |
"step": 7600 | |
}, | |
{ | |
"epoch": 5.43, | |
"grad_norm": 1.3828215599060059, | |
"learning_rate": 0.0002485361390667886, | |
"loss": 0.1545, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 5.43, | |
"eval_loss": 0.06534561514854431, | |
"eval_runtime": 211.3365, | |
"eval_samples_per_second": 6.62, | |
"eval_steps_per_second": 0.828, | |
"eval_wer": 0.1198582652904021, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 5.7, | |
"grad_norm": 0.4036734402179718, | |
"learning_rate": 0.00024579139981701736, | |
"loss": 0.1627, | |
"step": 8400 | |
}, | |
{ | |
"epoch": 5.7, | |
"eval_loss": 0.060022782534360886, | |
"eval_runtime": 211.4007, | |
"eval_samples_per_second": 6.618, | |
"eval_steps_per_second": 0.828, | |
"eval_wer": 0.11030657833923894, | |
"step": 8400 | |
}, | |
{ | |
"epoch": 5.97, | |
"grad_norm": 0.3543277084827423, | |
"learning_rate": 0.0002430466605672461, | |
"loss": 0.1541, | |
"step": 8800 | |
}, | |
{ | |
"epoch": 5.97, | |
"eval_loss": 0.058936990797519684, | |
"eval_runtime": 212.2764, | |
"eval_samples_per_second": 6.59, | |
"eval_steps_per_second": 0.824, | |
"eval_wer": 0.10676321059929132, | |
"step": 8800 | |
}, | |
{ | |
"epoch": 6.24, | |
"grad_norm": 0.23857054114341736, | |
"learning_rate": 0.00024030192131747481, | |
"loss": 0.1382, | |
"step": 9200 | |
}, | |
{ | |
"epoch": 6.24, | |
"eval_loss": 0.07100619375705719, | |
"eval_runtime": 213.3512, | |
"eval_samples_per_second": 6.557, | |
"eval_steps_per_second": 0.82, | |
"eval_wer": 0.12309351409644123, | |
"step": 9200 | |
}, | |
{ | |
"epoch": 6.51, | |
"grad_norm": 0.6902241706848145, | |
"learning_rate": 0.00023755718206770355, | |
"loss": 0.1397, | |
"step": 9600 | |
}, | |
{ | |
"epoch": 6.51, | |
"eval_loss": 0.06513579189777374, | |
"eval_runtime": 211.6334, | |
"eval_samples_per_second": 6.61, | |
"eval_steps_per_second": 0.827, | |
"eval_wer": 0.12478816823293791, | |
"step": 9600 | |
}, | |
{ | |
"epoch": 6.78, | |
"grad_norm": 0.3040298819541931, | |
"learning_rate": 0.00023481244281793227, | |
"loss": 0.1345, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 6.78, | |
"eval_loss": 0.06700535863637924, | |
"eval_runtime": 211.9668, | |
"eval_samples_per_second": 6.6, | |
"eval_steps_per_second": 0.826, | |
"eval_wer": 0.11939608688953936, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 7.06, | |
"grad_norm": 1.0794196128845215, | |
"learning_rate": 0.000232067703568161, | |
"loss": 0.1281, | |
"step": 10400 | |
}, | |
{ | |
"epoch": 7.06, | |
"eval_loss": 0.054103270173072815, | |
"eval_runtime": 209.427, | |
"eval_samples_per_second": 6.68, | |
"eval_steps_per_second": 0.836, | |
"eval_wer": 0.10060083192112156, | |
"step": 10400 | |
}, | |
{ | |
"epoch": 7.33, | |
"grad_norm": 0.8765202164649963, | |
"learning_rate": 0.00022932296431838972, | |
"loss": 0.1315, | |
"step": 10800 | |
}, | |
{ | |
"epoch": 7.33, | |
"eval_loss": 0.05593947321176529, | |
"eval_runtime": 208.8917, | |
"eval_samples_per_second": 6.697, | |
"eval_steps_per_second": 0.838, | |
"eval_wer": 0.10622400246495148, | |
"step": 10800 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 0.5989521741867065, | |
"learning_rate": 0.00022657822506861846, | |
"loss": 0.1234, | |
"step": 11200 | |
}, | |
{ | |
"epoch": 7.6, | |
"eval_loss": 0.05276945233345032, | |
"eval_runtime": 209.5765, | |
"eval_samples_per_second": 6.675, | |
"eval_steps_per_second": 0.835, | |
"eval_wer": 0.09698043444769681, | |
"step": 11200 | |
}, | |
{ | |
"epoch": 7.87, | |
"grad_norm": 0.3679282069206238, | |
"learning_rate": 0.00022383348581884717, | |
"loss": 0.1248, | |
"step": 11600 | |
}, | |
{ | |
"epoch": 7.87, | |
"eval_loss": 0.044818080961704254, | |
"eval_runtime": 209.7397, | |
"eval_samples_per_second": 6.67, | |
"eval_steps_per_second": 0.834, | |
"eval_wer": 0.0865043906948082, | |
"step": 11600 | |
}, | |
{ | |
"epoch": 8.14, | |
"grad_norm": 0.3467447757720947, | |
"learning_rate": 0.00022108874656907594, | |
"loss": 0.115, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 8.14, | |
"eval_loss": 0.0545777752995491, | |
"eval_runtime": 209.6701, | |
"eval_samples_per_second": 6.672, | |
"eval_steps_per_second": 0.835, | |
"eval_wer": 0.0993683561854876, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 8.41, | |
"grad_norm": 1.4704556465148926, | |
"learning_rate": 0.00021834400731930465, | |
"loss": 0.1143, | |
"step": 12400 | |
}, | |
{ | |
"epoch": 8.41, | |
"eval_loss": 0.05952217057347298, | |
"eval_runtime": 210.7235, | |
"eval_samples_per_second": 6.639, | |
"eval_steps_per_second": 0.83, | |
"eval_wer": 0.10861192420274225, | |
"step": 12400 | |
}, | |
{ | |
"epoch": 8.68, | |
"grad_norm": 0.2509690821170807, | |
"learning_rate": 0.0002155992680695334, | |
"loss": 0.1169, | |
"step": 12800 | |
}, | |
{ | |
"epoch": 8.68, | |
"eval_loss": 0.04853290319442749, | |
"eval_runtime": 209.7854, | |
"eval_samples_per_second": 6.669, | |
"eval_steps_per_second": 0.834, | |
"eval_wer": 0.08742874749653366, | |
"step": 12800 | |
}, | |
{ | |
"epoch": 8.96, | |
"grad_norm": 0.2134234458208084, | |
"learning_rate": 0.0002128545288197621, | |
"loss": 0.1165, | |
"step": 13200 | |
}, | |
{ | |
"epoch": 8.96, | |
"eval_loss": 0.05237515643239021, | |
"eval_runtime": 210.2596, | |
"eval_samples_per_second": 6.654, | |
"eval_steps_per_second": 0.832, | |
"eval_wer": 0.09767370204899091, | |
"step": 13200 | |
}, | |
{ | |
"epoch": 9.23, | |
"grad_norm": 0.4314160943031311, | |
"learning_rate": 0.00021010978956999084, | |
"loss": 0.1035, | |
"step": 13600 | |
}, | |
{ | |
"epoch": 9.23, | |
"eval_loss": 0.044478464871644974, | |
"eval_runtime": 210.5273, | |
"eval_samples_per_second": 6.645, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.0837313202896318, | |
"step": 13600 | |
}, | |
{ | |
"epoch": 9.5, | |
"grad_norm": 0.4133451581001282, | |
"learning_rate": 0.00020736505032021955, | |
"loss": 0.1017, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 9.5, | |
"eval_loss": 0.04130551964044571, | |
"eval_runtime": 210.2347, | |
"eval_samples_per_second": 6.654, | |
"eval_steps_per_second": 0.832, | |
"eval_wer": 0.0791865660144816, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 9.77, | |
"grad_norm": 0.29300200939178467, | |
"learning_rate": 0.0002046203110704483, | |
"loss": 0.109, | |
"step": 14400 | |
}, | |
{ | |
"epoch": 9.77, | |
"eval_loss": 0.04202074185013771, | |
"eval_runtime": 211.2523, | |
"eval_samples_per_second": 6.622, | |
"eval_steps_per_second": 0.828, | |
"eval_wer": 0.08326914188876906, | |
"step": 14400 | |
}, | |
{ | |
"epoch": 10.04, | |
"grad_norm": 0.2547191083431244, | |
"learning_rate": 0.000201875571820677, | |
"loss": 0.1018, | |
"step": 14800 | |
}, | |
{ | |
"epoch": 10.04, | |
"eval_loss": 0.04540720209479332, | |
"eval_runtime": 210.6476, | |
"eval_samples_per_second": 6.641, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.0823447850870436, | |
"step": 14800 | |
}, | |
{ | |
"epoch": 10.31, | |
"grad_norm": 0.2799816131591797, | |
"learning_rate": 0.00019913083257090574, | |
"loss": 0.0929, | |
"step": 15200 | |
}, | |
{ | |
"epoch": 10.31, | |
"eval_loss": 0.042868729680776596, | |
"eval_runtime": 209.1492, | |
"eval_samples_per_second": 6.689, | |
"eval_steps_per_second": 0.837, | |
"eval_wer": 0.0785703281466646, | |
"step": 15200 | |
}, | |
{ | |
"epoch": 10.58, | |
"grad_norm": 0.18747101724147797, | |
"learning_rate": 0.00019638609332113446, | |
"loss": 0.0956, | |
"step": 15600 | |
}, | |
{ | |
"epoch": 10.58, | |
"eval_loss": 0.04033521190285683, | |
"eval_runtime": 209.4853, | |
"eval_samples_per_second": 6.678, | |
"eval_steps_per_second": 0.835, | |
"eval_wer": 0.07718379294407642, | |
"step": 15600 | |
}, | |
{ | |
"epoch": 10.85, | |
"grad_norm": 0.3078967034816742, | |
"learning_rate": 0.0001936413540713632, | |
"loss": 0.0986, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 10.85, | |
"eval_loss": 0.046831514686346054, | |
"eval_runtime": 209.7149, | |
"eval_samples_per_second": 6.671, | |
"eval_steps_per_second": 0.834, | |
"eval_wer": 0.09058696656909566, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 11.13, | |
"grad_norm": 0.10461205244064331, | |
"learning_rate": 0.0001908966148215919, | |
"loss": 0.0941, | |
"step": 16400 | |
}, | |
{ | |
"epoch": 11.13, | |
"eval_loss": 0.03615270182490349, | |
"eval_runtime": 209.392, | |
"eval_samples_per_second": 6.681, | |
"eval_steps_per_second": 0.836, | |
"eval_wer": 0.06940378986288707, | |
"step": 16400 | |
}, | |
{ | |
"epoch": 11.4, | |
"grad_norm": 0.5313912630081177, | |
"learning_rate": 0.00018815187557182068, | |
"loss": 0.0845, | |
"step": 16800 | |
}, | |
{ | |
"epoch": 11.4, | |
"eval_loss": 0.038695286959409714, | |
"eval_runtime": 210.5021, | |
"eval_samples_per_second": 6.646, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.0701740871976583, | |
"step": 16800 | |
}, | |
{ | |
"epoch": 11.67, | |
"grad_norm": 0.5434714555740356, | |
"learning_rate": 0.0001854071363220494, | |
"loss": 0.0955, | |
"step": 17200 | |
}, | |
{ | |
"epoch": 11.67, | |
"eval_loss": 0.03512905538082123, | |
"eval_runtime": 210.6242, | |
"eval_samples_per_second": 6.642, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.06270220305037745, | |
"step": 17200 | |
}, | |
{ | |
"epoch": 11.94, | |
"grad_norm": 0.6010161638259888, | |
"learning_rate": 0.00018266239707227813, | |
"loss": 0.089, | |
"step": 17600 | |
}, | |
{ | |
"epoch": 11.94, | |
"eval_loss": 0.03609244525432587, | |
"eval_runtime": 210.1814, | |
"eval_samples_per_second": 6.656, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.06747804652595903, | |
"step": 17600 | |
}, | |
{ | |
"epoch": 12.21, | |
"grad_norm": 0.2907910943031311, | |
"learning_rate": 0.00017991765782250684, | |
"loss": 0.0806, | |
"step": 18000 | |
}, | |
{ | |
"epoch": 12.21, | |
"eval_loss": 0.03811544552445412, | |
"eval_runtime": 210.0932, | |
"eval_samples_per_second": 6.659, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.06847943306116161, | |
"step": 18000 | |
}, | |
{ | |
"epoch": 12.48, | |
"grad_norm": 0.25918644666671753, | |
"learning_rate": 0.00017717291857273558, | |
"loss": 0.0803, | |
"step": 18400 | |
}, | |
{ | |
"epoch": 12.48, | |
"eval_loss": 0.0369856134057045, | |
"eval_runtime": 210.0481, | |
"eval_samples_per_second": 6.66, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.06747804652595903, | |
"step": 18400 | |
}, | |
{ | |
"epoch": 12.75, | |
"grad_norm": 0.3781072199344635, | |
"learning_rate": 0.0001744281793229643, | |
"loss": 0.0839, | |
"step": 18800 | |
}, | |
{ | |
"epoch": 12.75, | |
"eval_loss": 0.0333174392580986, | |
"eval_runtime": 210.517, | |
"eval_samples_per_second": 6.646, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.06193190571560622, | |
"step": 18800 | |
}, | |
{ | |
"epoch": 13.03, | |
"grad_norm": 0.42045047879219055, | |
"learning_rate": 0.00017168344007319303, | |
"loss": 0.0834, | |
"step": 19200 | |
}, | |
{ | |
"epoch": 13.03, | |
"eval_loss": 0.033445805311203, | |
"eval_runtime": 210.6516, | |
"eval_samples_per_second": 6.641, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.057695270374364505, | |
"step": 19200 | |
}, | |
{ | |
"epoch": 13.3, | |
"grad_norm": 0.4078648090362549, | |
"learning_rate": 0.00016893870082342174, | |
"loss": 0.0779, | |
"step": 19600 | |
}, | |
{ | |
"epoch": 13.3, | |
"eval_loss": 0.03577824681997299, | |
"eval_runtime": 210.5433, | |
"eval_samples_per_second": 6.645, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.06208596518256047, | |
"step": 19600 | |
}, | |
{ | |
"epoch": 13.57, | |
"grad_norm": 0.8884561657905579, | |
"learning_rate": 0.00016619396157365048, | |
"loss": 0.0773, | |
"step": 20000 | |
}, | |
{ | |
"epoch": 13.57, | |
"eval_loss": 0.032973628491163254, | |
"eval_runtime": 210.0131, | |
"eval_samples_per_second": 6.661, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.05646279463873055, | |
"step": 20000 | |
}, | |
{ | |
"epoch": 13.84, | |
"grad_norm": 0.5232133269309998, | |
"learning_rate": 0.0001634492223238792, | |
"loss": 0.0717, | |
"step": 20400 | |
}, | |
{ | |
"epoch": 13.84, | |
"eval_loss": 0.03495289012789726, | |
"eval_runtime": 210.8453, | |
"eval_samples_per_second": 6.635, | |
"eval_steps_per_second": 0.83, | |
"eval_wer": 0.0625481435834232, | |
"step": 20400 | |
}, | |
{ | |
"epoch": 14.11, | |
"grad_norm": 0.3025607466697693, | |
"learning_rate": 0.00016070448307410794, | |
"loss": 0.0737, | |
"step": 20800 | |
}, | |
{ | |
"epoch": 14.11, | |
"eval_loss": 0.03549855947494507, | |
"eval_runtime": 209.8898, | |
"eval_samples_per_second": 6.665, | |
"eval_steps_per_second": 0.834, | |
"eval_wer": 0.060314281312586655, | |
"step": 20800 | |
}, | |
{ | |
"epoch": 14.38, | |
"grad_norm": 1.34682297706604, | |
"learning_rate": 0.00015795974382433665, | |
"loss": 0.075, | |
"step": 21200 | |
}, | |
{ | |
"epoch": 14.38, | |
"eval_loss": 0.03607061505317688, | |
"eval_runtime": 209.844, | |
"eval_samples_per_second": 6.667, | |
"eval_steps_per_second": 0.834, | |
"eval_wer": 0.06262517331690032, | |
"step": 21200 | |
}, | |
{ | |
"epoch": 14.65, | |
"grad_norm": 0.1968027800321579, | |
"learning_rate": 0.00015521500457456541, | |
"loss": 0.0715, | |
"step": 21600 | |
}, | |
{ | |
"epoch": 14.65, | |
"eval_loss": 0.03143769130110741, | |
"eval_runtime": 212.2, | |
"eval_samples_per_second": 6.593, | |
"eval_steps_per_second": 0.825, | |
"eval_wer": 0.057464181173933135, | |
"step": 21600 | |
}, | |
{ | |
"epoch": 14.93, | |
"grad_norm": 0.19861555099487305, | |
"learning_rate": 0.00015247026532479413, | |
"loss": 0.0722, | |
"step": 22000 | |
}, | |
{ | |
"epoch": 14.93, | |
"eval_loss": 0.03097483143210411, | |
"eval_runtime": 209.3416, | |
"eval_samples_per_second": 6.683, | |
"eval_steps_per_second": 0.836, | |
"eval_wer": 0.05754121090741026, | |
"step": 22000 | |
}, | |
{ | |
"epoch": 15.2, | |
"grad_norm": 0.333710640668869, | |
"learning_rate": 0.00014972552607502287, | |
"loss": 0.0666, | |
"step": 22400 | |
}, | |
{ | |
"epoch": 15.2, | |
"eval_loss": 0.031425874680280685, | |
"eval_runtime": 210.734, | |
"eval_samples_per_second": 6.639, | |
"eval_steps_per_second": 0.83, | |
"eval_wer": 0.05592358650439069, | |
"step": 22400 | |
}, | |
{ | |
"epoch": 15.47, | |
"grad_norm": 0.4220418632030487, | |
"learning_rate": 0.00014698078682525158, | |
"loss": 0.0672, | |
"step": 22800 | |
}, | |
{ | |
"epoch": 15.47, | |
"eval_loss": 0.030694805085659027, | |
"eval_runtime": 208.4031, | |
"eval_samples_per_second": 6.713, | |
"eval_steps_per_second": 0.84, | |
"eval_wer": 0.05345863503312279, | |
"step": 22800 | |
}, | |
{ | |
"epoch": 15.74, | |
"grad_norm": 0.9596304297447205, | |
"learning_rate": 0.00014423604757548032, | |
"loss": 0.0664, | |
"step": 23200 | |
}, | |
{ | |
"epoch": 15.74, | |
"eval_loss": 0.03153248876333237, | |
"eval_runtime": 209.4047, | |
"eval_samples_per_second": 6.681, | |
"eval_steps_per_second": 0.836, | |
"eval_wer": 0.05515328916961947, | |
"step": 23200 | |
}, | |
{ | |
"epoch": 16.01, | |
"grad_norm": 0.5310277938842773, | |
"learning_rate": 0.00014149130832570906, | |
"loss": 0.0678, | |
"step": 23600 | |
}, | |
{ | |
"epoch": 16.01, | |
"eval_loss": 0.031200062483549118, | |
"eval_runtime": 212.7627, | |
"eval_samples_per_second": 6.575, | |
"eval_steps_per_second": 0.823, | |
"eval_wer": 0.054845170235710984, | |
"step": 23600 | |
}, | |
{ | |
"epoch": 16.28, | |
"grad_norm": 0.3012249767780304, | |
"learning_rate": 0.00013874656907593777, | |
"loss": 0.0616, | |
"step": 24000 | |
}, | |
{ | |
"epoch": 16.28, | |
"eval_loss": 0.0315285250544548, | |
"eval_runtime": 208.0351, | |
"eval_samples_per_second": 6.725, | |
"eval_steps_per_second": 0.841, | |
"eval_wer": 0.052688337698351566, | |
"step": 24000 | |
}, | |
{ | |
"epoch": 16.55, | |
"grad_norm": 0.2116428166627884, | |
"learning_rate": 0.0001360018298261665, | |
"loss": 0.0644, | |
"step": 24400 | |
}, | |
{ | |
"epoch": 16.55, | |
"eval_loss": 0.026890287175774574, | |
"eval_runtime": 209.0713, | |
"eval_samples_per_second": 6.691, | |
"eval_steps_per_second": 0.837, | |
"eval_wer": 0.048066553689724234, | |
"step": 24400 | |
}, | |
{ | |
"epoch": 16.82, | |
"grad_norm": 0.44349026679992676, | |
"learning_rate": 0.00013325709057639522, | |
"loss": 0.062, | |
"step": 24800 | |
}, | |
{ | |
"epoch": 16.82, | |
"eval_loss": 0.030771076679229736, | |
"eval_runtime": 210.1625, | |
"eval_samples_per_second": 6.657, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.05130180249576336, | |
"step": 24800 | |
}, | |
{ | |
"epoch": 17.1, | |
"grad_norm": 0.5564978718757629, | |
"learning_rate": 0.00013051235132662396, | |
"loss": 0.0584, | |
"step": 25200 | |
}, | |
{ | |
"epoch": 17.1, | |
"eval_loss": 0.02937587909400463, | |
"eval_runtime": 209.0218, | |
"eval_samples_per_second": 6.693, | |
"eval_steps_per_second": 0.837, | |
"eval_wer": 0.05022338622708365, | |
"step": 25200 | |
}, | |
{ | |
"epoch": 17.37, | |
"grad_norm": 0.3329567015171051, | |
"learning_rate": 0.0001277676120768527, | |
"loss": 0.0563, | |
"step": 25600 | |
}, | |
{ | |
"epoch": 17.37, | |
"eval_loss": 0.02935618720948696, | |
"eval_runtime": 208.6661, | |
"eval_samples_per_second": 6.704, | |
"eval_steps_per_second": 0.839, | |
"eval_wer": 0.04922199969188107, | |
"step": 25600 | |
}, | |
{ | |
"epoch": 17.64, | |
"grad_norm": 1.3281415700912476, | |
"learning_rate": 0.0001250228728270814, | |
"loss": 0.0547, | |
"step": 26000 | |
}, | |
{ | |
"epoch": 17.64, | |
"eval_loss": 0.028093835338950157, | |
"eval_runtime": 208.0055, | |
"eval_samples_per_second": 6.726, | |
"eval_steps_per_second": 0.841, | |
"eval_wer": 0.045216453551070714, | |
"step": 26000 | |
}, | |
{ | |
"epoch": 17.91, | |
"grad_norm": 0.33733347058296204, | |
"learning_rate": 0.00012227813357731015, | |
"loss": 0.056, | |
"step": 26400 | |
}, | |
{ | |
"epoch": 17.91, | |
"eval_loss": 0.02792263962328434, | |
"eval_runtime": 208.746, | |
"eval_samples_per_second": 6.702, | |
"eval_steps_per_second": 0.838, | |
"eval_wer": 0.04513942381759359, | |
"step": 26400 | |
}, | |
{ | |
"epoch": 18.18, | |
"grad_norm": 0.172608882188797, | |
"learning_rate": 0.00011953339432753888, | |
"loss": 0.0572, | |
"step": 26800 | |
}, | |
{ | |
"epoch": 18.18, | |
"eval_loss": 0.029318030923604965, | |
"eval_runtime": 208.963, | |
"eval_samples_per_second": 6.695, | |
"eval_steps_per_second": 0.837, | |
"eval_wer": 0.045986750885841934, | |
"step": 26800 | |
}, | |
{ | |
"epoch": 18.45, | |
"grad_norm": 1.2717292308807373, | |
"learning_rate": 0.0001167886550777676, | |
"loss": 0.0544, | |
"step": 27200 | |
}, | |
{ | |
"epoch": 18.45, | |
"eval_loss": 0.02826772816479206, | |
"eval_runtime": 208.8935, | |
"eval_samples_per_second": 6.697, | |
"eval_steps_per_second": 0.838, | |
"eval_wer": 0.04644892928670467, | |
"step": 27200 | |
}, | |
{ | |
"epoch": 18.72, | |
"grad_norm": 0.9943646192550659, | |
"learning_rate": 0.00011404391582799633, | |
"loss": 0.052, | |
"step": 27600 | |
}, | |
{ | |
"epoch": 18.72, | |
"eval_loss": 0.02736526168882847, | |
"eval_runtime": 209.8515, | |
"eval_samples_per_second": 6.667, | |
"eval_steps_per_second": 0.834, | |
"eval_wer": 0.04375288861500539, | |
"step": 27600 | |
}, | |
{ | |
"epoch": 19.0, | |
"grad_norm": 0.289604514837265, | |
"learning_rate": 0.00011129917657822506, | |
"loss": 0.0533, | |
"step": 28000 | |
}, | |
{ | |
"epoch": 19.0, | |
"eval_loss": 0.0264176856726408, | |
"eval_runtime": 210.0905, | |
"eval_samples_per_second": 6.659, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.041287937143737484, | |
"step": 28000 | |
}, | |
{ | |
"epoch": 19.27, | |
"grad_norm": 0.4070824086666107, | |
"learning_rate": 0.00010855443732845378, | |
"loss": 0.046, | |
"step": 28400 | |
}, | |
{ | |
"epoch": 19.27, | |
"eval_loss": 0.02758474461734295, | |
"eval_runtime": 209.6786, | |
"eval_samples_per_second": 6.672, | |
"eval_steps_per_second": 0.835, | |
"eval_wer": 0.04121090741026036, | |
"step": 28400 | |
}, | |
{ | |
"epoch": 19.54, | |
"grad_norm": 0.8942829966545105, | |
"learning_rate": 0.00010580969807868251, | |
"loss": 0.0498, | |
"step": 28800 | |
}, | |
{ | |
"epoch": 19.54, | |
"eval_loss": 0.02821315824985504, | |
"eval_runtime": 210.0298, | |
"eval_samples_per_second": 6.661, | |
"eval_steps_per_second": 0.833, | |
"eval_wer": 0.04190417501155446, | |
"step": 28800 | |
}, | |
{ | |
"epoch": 19.81, | |
"grad_norm": 0.6549252271652222, | |
"learning_rate": 0.00010306495882891125, | |
"loss": 0.0454, | |
"step": 29200 | |
}, | |
{ | |
"epoch": 19.81, | |
"eval_loss": 0.027856331318616867, | |
"eval_runtime": 210.3168, | |
"eval_samples_per_second": 6.652, | |
"eval_steps_per_second": 0.832, | |
"eval_wer": 0.04167308581112309, | |
"step": 29200 | |
}, | |
{ | |
"epoch": 20.08, | |
"grad_norm": 7.543890953063965, | |
"learning_rate": 0.00010032021957913997, | |
"loss": 0.0483, | |
"step": 29600 | |
}, | |
{ | |
"epoch": 20.08, | |
"eval_loss": 0.025978881865739822, | |
"eval_runtime": 210.6554, | |
"eval_samples_per_second": 6.641, | |
"eval_steps_per_second": 0.831, | |
"eval_wer": 0.0395932830072408, | |
"step": 29600 | |
}, | |
{ | |
"epoch": 20.35, | |
"grad_norm": 0.2715985178947449, | |
"learning_rate": 9.75754803293687e-05, | |
"loss": 0.0447, | |
"step": 30000 | |
}, | |
{ | |
"epoch": 20.35, | |
"eval_loss": 0.026736732572317123, | |
"eval_runtime": 209.4915, | |
"eval_samples_per_second": 6.678, | |
"eval_steps_per_second": 0.835, | |
"eval_wer": 0.041750115544600216, | |
"step": 30000 | |
} | |
], | |
"logging_steps": 400, | |
"max_steps": 44220, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 30, | |
"save_steps": 400, | |
"total_flos": 9.150437899797026e+19, | |
"train_batch_size": 8, | |
"trial_name": null, | |
"trial_params": null | |
} | |