|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.537318712415989, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017686593562079942, |
|
"grad_norm": 11.730201721191406, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 1.906, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.035373187124159884, |
|
"grad_norm": 9.43655014038086, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 1.5891, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05305978068623983, |
|
"grad_norm": 8.554606437683105, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.9582, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07074637424831977, |
|
"grad_norm": 7.075723171234131, |
|
"learning_rate": 1.98e-06, |
|
"loss": 0.8164, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08843296781039972, |
|
"grad_norm": 7.806982517242432, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.7283, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10611956137247966, |
|
"grad_norm": 6.871402740478516, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.721, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1238061549345596, |
|
"grad_norm": 7.813017845153809, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.68, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14149274849663954, |
|
"grad_norm": 7.54969596862793, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.7248, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1591793420587195, |
|
"grad_norm": 7.196774482727051, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.6417, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.17686593562079944, |
|
"grad_norm": 8.30762004852295, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.6594, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19455252918287938, |
|
"grad_norm": 6.641123294830322, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.6359, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.21223912274495932, |
|
"grad_norm": 8.47127914428711, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.6455, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22992571630703926, |
|
"grad_norm": 8.078540802001953, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.6563, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2476123098691192, |
|
"grad_norm": 7.481531620025635, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.6099, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.26529890343119916, |
|
"grad_norm": 8.171069145202637, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.5922, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2829854969932791, |
|
"grad_norm": 7.401958465576172, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.6232, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.30067209055535904, |
|
"grad_norm": 6.448988437652588, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.6328, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.318358684117439, |
|
"grad_norm": 6.956504821777344, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.6026, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3360452776795189, |
|
"grad_norm": 8.774309158325195, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.6318, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3537318712415989, |
|
"grad_norm": 6.220521450042725, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.6186, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3714184648036788, |
|
"grad_norm": 6.165685176849365, |
|
"learning_rate": 9.94888888888889e-06, |
|
"loss": 0.5509, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.38910505836575876, |
|
"grad_norm": 6.3863348960876465, |
|
"learning_rate": 9.893333333333334e-06, |
|
"loss": 0.63, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.40679165192783867, |
|
"grad_norm": 6.65989351272583, |
|
"learning_rate": 9.837777777777778e-06, |
|
"loss": 0.5981, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.42447824548991864, |
|
"grad_norm": 5.605571746826172, |
|
"learning_rate": 9.782222222222222e-06, |
|
"loss": 0.5611, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4421648390519986, |
|
"grad_norm": 7.080026149749756, |
|
"learning_rate": 9.726666666666668e-06, |
|
"loss": 0.5498, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4598514326140785, |
|
"grad_norm": 5.857707977294922, |
|
"learning_rate": 9.671111111111112e-06, |
|
"loss": 0.5942, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4775380261761585, |
|
"grad_norm": 6.002816200256348, |
|
"learning_rate": 9.615555555555558e-06, |
|
"loss": 0.5488, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4952246197382384, |
|
"grad_norm": 6.393026828765869, |
|
"learning_rate": 9.56e-06, |
|
"loss": 0.5464, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5129112133003184, |
|
"grad_norm": 6.212553024291992, |
|
"learning_rate": 9.504444444444446e-06, |
|
"loss": 0.5416, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5305978068623983, |
|
"grad_norm": 6.353979110717773, |
|
"learning_rate": 9.44888888888889e-06, |
|
"loss": 0.5493, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5482844004244782, |
|
"grad_norm": 5.198184013366699, |
|
"learning_rate": 9.393333333333334e-06, |
|
"loss": 0.562, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5659709939865581, |
|
"grad_norm": 5.739233493804932, |
|
"learning_rate": 9.33777777777778e-06, |
|
"loss": 0.5245, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5836575875486382, |
|
"grad_norm": 5.637094974517822, |
|
"learning_rate": 9.282222222222222e-06, |
|
"loss": 0.5056, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6013441811107181, |
|
"grad_norm": 5.2869181632995605, |
|
"learning_rate": 9.226666666666668e-06, |
|
"loss": 0.495, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.619030774672798, |
|
"grad_norm": 6.895484447479248, |
|
"learning_rate": 9.171111111111112e-06, |
|
"loss": 0.5311, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.636717368234878, |
|
"grad_norm": 5.642014026641846, |
|
"learning_rate": 9.115555555555556e-06, |
|
"loss": 0.4611, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6544039617969579, |
|
"grad_norm": 6.39215087890625, |
|
"learning_rate": 9.060000000000001e-06, |
|
"loss": 0.5128, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6720905553590378, |
|
"grad_norm": 4.879695892333984, |
|
"learning_rate": 9.004444444444445e-06, |
|
"loss": 0.502, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6897771489211177, |
|
"grad_norm": 6.497096538543701, |
|
"learning_rate": 8.94888888888889e-06, |
|
"loss": 0.4927, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7074637424831978, |
|
"grad_norm": 6.05295991897583, |
|
"learning_rate": 8.893333333333333e-06, |
|
"loss": 0.4825, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7074637424831978, |
|
"eval_loss": 0.2707957625389099, |
|
"eval_runtime": 4792.033, |
|
"eval_samples_per_second": 2.246, |
|
"eval_steps_per_second": 0.14, |
|
"eval_wer": 0.18103144801689744, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7251503360452777, |
|
"grad_norm": 6.068356990814209, |
|
"learning_rate": 8.83777777777778e-06, |
|
"loss": 0.4787, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7428369296073576, |
|
"grad_norm": 6.832317352294922, |
|
"learning_rate": 8.782222222222223e-06, |
|
"loss": 0.4583, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7605235231694376, |
|
"grad_norm": 5.528912544250488, |
|
"learning_rate": 8.726666666666667e-06, |
|
"loss": 0.4733, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"grad_norm": 6.220973968505859, |
|
"learning_rate": 8.671111111111113e-06, |
|
"loss": 0.4994, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7958967102935974, |
|
"grad_norm": 6.543335914611816, |
|
"learning_rate": 8.615555555555555e-06, |
|
"loss": 0.4694, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8135833038556773, |
|
"grad_norm": 4.369144439697266, |
|
"learning_rate": 8.560000000000001e-06, |
|
"loss": 0.4525, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8312698974177574, |
|
"grad_norm": 4.928637981414795, |
|
"learning_rate": 8.504444444444445e-06, |
|
"loss": 0.4802, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8489564909798373, |
|
"grad_norm": 5.016990661621094, |
|
"learning_rate": 8.448888888888889e-06, |
|
"loss": 0.462, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8666430845419172, |
|
"grad_norm": 5.185001373291016, |
|
"learning_rate": 8.393333333333335e-06, |
|
"loss": 0.4189, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8843296781039972, |
|
"grad_norm": 5.9500627517700195, |
|
"learning_rate": 8.337777777777777e-06, |
|
"loss": 0.4862, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9020162716660771, |
|
"grad_norm": 4.7840166091918945, |
|
"learning_rate": 8.282222222222223e-06, |
|
"loss": 0.4201, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.919702865228157, |
|
"grad_norm": 5.590398788452148, |
|
"learning_rate": 8.226666666666667e-06, |
|
"loss": 0.438, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.937389458790237, |
|
"grad_norm": 5.356804370880127, |
|
"learning_rate": 8.171111111111113e-06, |
|
"loss": 0.4343, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.955076052352317, |
|
"grad_norm": 5.994735240936279, |
|
"learning_rate": 8.115555555555557e-06, |
|
"loss": 0.4281, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9727626459143969, |
|
"grad_norm": 4.92245626449585, |
|
"learning_rate": 8.06e-06, |
|
"loss": 0.4711, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9904492394764768, |
|
"grad_norm": 6.673391342163086, |
|
"learning_rate": 8.004444444444445e-06, |
|
"loss": 0.4396, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0081358330385568, |
|
"grad_norm": 4.157800197601318, |
|
"learning_rate": 7.948888888888889e-06, |
|
"loss": 0.3655, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.0258224266006368, |
|
"grad_norm": 4.4156060218811035, |
|
"learning_rate": 7.893333333333335e-06, |
|
"loss": 0.3032, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0435090201627166, |
|
"grad_norm": 3.762485980987549, |
|
"learning_rate": 7.837777777777779e-06, |
|
"loss": 0.2597, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.0611956137247966, |
|
"grad_norm": 3.0248515605926514, |
|
"learning_rate": 7.782222222222223e-06, |
|
"loss": 0.2829, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0788822072868764, |
|
"grad_norm": 5.492848873138428, |
|
"learning_rate": 7.726666666666667e-06, |
|
"loss": 0.229, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0965688008489565, |
|
"grad_norm": 5.225216388702393, |
|
"learning_rate": 7.67111111111111e-06, |
|
"loss": 0.2677, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1142553944110365, |
|
"grad_norm": 3.8239035606384277, |
|
"learning_rate": 7.6155555555555564e-06, |
|
"loss": 0.2517, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.1319419879731163, |
|
"grad_norm": 5.077475070953369, |
|
"learning_rate": 7.5600000000000005e-06, |
|
"loss": 0.2489, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1496285815351963, |
|
"grad_norm": 4.364721298217773, |
|
"learning_rate": 7.504444444444445e-06, |
|
"loss": 0.2598, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.1673151750972763, |
|
"grad_norm": 3.3700525760650635, |
|
"learning_rate": 7.44888888888889e-06, |
|
"loss": 0.2472, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1850017686593561, |
|
"grad_norm": 4.548206329345703, |
|
"learning_rate": 7.393333333333333e-06, |
|
"loss": 0.2635, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.2026883622214362, |
|
"grad_norm": 4.475757122039795, |
|
"learning_rate": 7.337777777777778e-06, |
|
"loss": 0.2345, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2203749557835162, |
|
"grad_norm": 4.205091953277588, |
|
"learning_rate": 7.282222222222222e-06, |
|
"loss": 0.2464, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.238061549345596, |
|
"grad_norm": 4.778344631195068, |
|
"learning_rate": 7.226666666666667e-06, |
|
"loss": 0.238, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.255748142907676, |
|
"grad_norm": 3.172917604446411, |
|
"learning_rate": 7.171111111111112e-06, |
|
"loss": 0.2364, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.2734347364697558, |
|
"grad_norm": 4.275852203369141, |
|
"learning_rate": 7.115555555555557e-06, |
|
"loss": 0.2573, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2911213300318358, |
|
"grad_norm": 4.308451175689697, |
|
"learning_rate": 7.06e-06, |
|
"loss": 0.2569, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.3088079235939158, |
|
"grad_norm": 4.079460144042969, |
|
"learning_rate": 7.004444444444445e-06, |
|
"loss": 0.2261, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.3264945171559956, |
|
"grad_norm": 6.641946792602539, |
|
"learning_rate": 6.948888888888889e-06, |
|
"loss": 0.2334, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.3441811107180757, |
|
"grad_norm": 4.23126745223999, |
|
"learning_rate": 6.893333333333334e-06, |
|
"loss": 0.2307, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3618677042801557, |
|
"grad_norm": 3.1530086994171143, |
|
"learning_rate": 6.837777777777779e-06, |
|
"loss": 0.2154, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.3795542978422355, |
|
"grad_norm": 3.8190791606903076, |
|
"learning_rate": 6.782222222222222e-06, |
|
"loss": 0.2274, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.3972408914043155, |
|
"grad_norm": 3.953749179840088, |
|
"learning_rate": 6.726666666666667e-06, |
|
"loss": 0.2426, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.4149274849663955, |
|
"grad_norm": 3.527085304260254, |
|
"learning_rate": 6.671111111111112e-06, |
|
"loss": 0.2262, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4149274849663955, |
|
"eval_loss": 0.2485629916191101, |
|
"eval_runtime": 4723.0929, |
|
"eval_samples_per_second": 2.279, |
|
"eval_steps_per_second": 0.142, |
|
"eval_wer": 0.15938160056324807, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4326140785284753, |
|
"grad_norm": 4.03377628326416, |
|
"learning_rate": 6.615555555555556e-06, |
|
"loss": 0.2367, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.4503006720905554, |
|
"grad_norm": 3.128779888153076, |
|
"learning_rate": 6.560000000000001e-06, |
|
"loss": 0.2372, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4679872656526354, |
|
"grad_norm": 3.7657673358917236, |
|
"learning_rate": 6.504444444444446e-06, |
|
"loss": 0.2394, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.4856738592147152, |
|
"grad_norm": 3.009976387023926, |
|
"learning_rate": 6.448888888888889e-06, |
|
"loss": 0.2285, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5033604527767952, |
|
"grad_norm": 3.916525363922119, |
|
"learning_rate": 6.393333333333334e-06, |
|
"loss": 0.2359, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.5210470463388752, |
|
"grad_norm": 4.418327808380127, |
|
"learning_rate": 6.3377777777777786e-06, |
|
"loss": 0.2499, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.538733639900955, |
|
"grad_norm": 4.2388105392456055, |
|
"learning_rate": 6.282222222222223e-06, |
|
"loss": 0.226, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"grad_norm": 3.664215564727783, |
|
"learning_rate": 6.2266666666666675e-06, |
|
"loss": 0.2316, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.574106827025115, |
|
"grad_norm": 4.053125858306885, |
|
"learning_rate": 6.171111111111112e-06, |
|
"loss": 0.228, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.5917934205871949, |
|
"grad_norm": 4.452697277069092, |
|
"learning_rate": 6.1155555555555555e-06, |
|
"loss": 0.2196, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6094800141492749, |
|
"grad_norm": 3.919962167739868, |
|
"learning_rate": 6.0600000000000004e-06, |
|
"loss": 0.2495, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.627166607711355, |
|
"grad_norm": 4.33357572555542, |
|
"learning_rate": 6.004444444444445e-06, |
|
"loss": 0.2245, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6448532012734347, |
|
"grad_norm": 4.634097099304199, |
|
"learning_rate": 5.948888888888889e-06, |
|
"loss": 0.2438, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.6625397948355147, |
|
"grad_norm": 4.564420223236084, |
|
"learning_rate": 5.893333333333334e-06, |
|
"loss": 0.2047, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.6802263883975948, |
|
"grad_norm": 4.051385879516602, |
|
"learning_rate": 5.837777777777777e-06, |
|
"loss": 0.2392, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.6979129819596745, |
|
"grad_norm": 3.313216209411621, |
|
"learning_rate": 5.782222222222222e-06, |
|
"loss": 0.2286, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7155995755217543, |
|
"grad_norm": 3.334672451019287, |
|
"learning_rate": 5.726666666666667e-06, |
|
"loss": 0.2296, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.7332861690838346, |
|
"grad_norm": 3.519235134124756, |
|
"learning_rate": 5.671111111111112e-06, |
|
"loss": 0.2107, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7509727626459144, |
|
"grad_norm": 4.1753339767456055, |
|
"learning_rate": 5.615555555555556e-06, |
|
"loss": 0.2464, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.7686593562079942, |
|
"grad_norm": 3.395427703857422, |
|
"learning_rate": 5.560000000000001e-06, |
|
"loss": 0.2323, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7863459497700744, |
|
"grad_norm": 3.480686902999878, |
|
"learning_rate": 5.504444444444444e-06, |
|
"loss": 0.2205, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.8040325433321542, |
|
"grad_norm": 4.275322914123535, |
|
"learning_rate": 5.448888888888889e-06, |
|
"loss": 0.2147, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.821719136894234, |
|
"grad_norm": 3.6306569576263428, |
|
"learning_rate": 5.393333333333334e-06, |
|
"loss": 0.2404, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.839405730456314, |
|
"grad_norm": 3.426995038986206, |
|
"learning_rate": 5.337777777777779e-06, |
|
"loss": 0.2259, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.857092324018394, |
|
"grad_norm": 4.106579303741455, |
|
"learning_rate": 5.282222222222223e-06, |
|
"loss": 0.2043, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.8747789175804739, |
|
"grad_norm": 3.365471601486206, |
|
"learning_rate": 5.226666666666667e-06, |
|
"loss": 0.2179, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.892465511142554, |
|
"grad_norm": 3.4525575637817383, |
|
"learning_rate": 5.171111111111111e-06, |
|
"loss": 0.2185, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.910152104704634, |
|
"grad_norm": 3.5960958003997803, |
|
"learning_rate": 5.115555555555556e-06, |
|
"loss": 0.2007, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.9278386982667137, |
|
"grad_norm": 3.91766619682312, |
|
"learning_rate": 5.060000000000001e-06, |
|
"loss": 0.2284, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.9455252918287937, |
|
"grad_norm": 4.662227630615234, |
|
"learning_rate": 5.004444444444445e-06, |
|
"loss": 0.2238, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9632118853908738, |
|
"grad_norm": 3.227084159851074, |
|
"learning_rate": 4.94888888888889e-06, |
|
"loss": 0.2068, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.9808984789529536, |
|
"grad_norm": 3.7078394889831543, |
|
"learning_rate": 4.893333333333334e-06, |
|
"loss": 0.225, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.9985850725150336, |
|
"grad_norm": 3.540781259536743, |
|
"learning_rate": 4.837777777777778e-06, |
|
"loss": 0.2115, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.0162716660771136, |
|
"grad_norm": 2.6315805912017822, |
|
"learning_rate": 4.7822222222222226e-06, |
|
"loss": 0.1164, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0339582596391934, |
|
"grad_norm": 3.7077784538269043, |
|
"learning_rate": 4.7266666666666674e-06, |
|
"loss": 0.1052, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.0516448532012737, |
|
"grad_norm": 2.5548624992370605, |
|
"learning_rate": 4.6711111111111115e-06, |
|
"loss": 0.1033, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.0693314467633535, |
|
"grad_norm": 3.648667097091675, |
|
"learning_rate": 4.6155555555555555e-06, |
|
"loss": 0.0954, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.0870180403254333, |
|
"grad_norm": 1.9462636709213257, |
|
"learning_rate": 4.56e-06, |
|
"loss": 0.0907, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.104704633887513, |
|
"grad_norm": 2.353156089782715, |
|
"learning_rate": 4.504444444444444e-06, |
|
"loss": 0.0962, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.1223912274495933, |
|
"grad_norm": 1.8443711996078491, |
|
"learning_rate": 4.448888888888889e-06, |
|
"loss": 0.0867, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1223912274495933, |
|
"eval_loss": 0.25060394406318665, |
|
"eval_runtime": 4656.2877, |
|
"eval_samples_per_second": 2.311, |
|
"eval_steps_per_second": 0.145, |
|
"eval_wer": 0.15110889462567473, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.140077821011673, |
|
"grad_norm": 2.377577781677246, |
|
"learning_rate": 4.393333333333334e-06, |
|
"loss": 0.0825, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.157764414573753, |
|
"grad_norm": 2.523731231689453, |
|
"learning_rate": 4.337777777777778e-06, |
|
"loss": 0.1005, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.175451008135833, |
|
"grad_norm": 3.5590755939483643, |
|
"learning_rate": 4.282222222222222e-06, |
|
"loss": 0.1085, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.193137601697913, |
|
"grad_norm": 2.7134900093078613, |
|
"learning_rate": 4.226666666666667e-06, |
|
"loss": 0.0957, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2108241952599927, |
|
"grad_norm": 3.198664903640747, |
|
"learning_rate": 4.171111111111111e-06, |
|
"loss": 0.0898, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.228510788822073, |
|
"grad_norm": 2.207460403442383, |
|
"learning_rate": 4.115555555555556e-06, |
|
"loss": 0.0922, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.246197382384153, |
|
"grad_norm": 3.6293723583221436, |
|
"learning_rate": 4.060000000000001e-06, |
|
"loss": 0.0847, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.2638839759462326, |
|
"grad_norm": 2.8405864238739014, |
|
"learning_rate": 4.004444444444445e-06, |
|
"loss": 0.097, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.281570569508313, |
|
"grad_norm": 3.193572521209717, |
|
"learning_rate": 3.948888888888889e-06, |
|
"loss": 0.0997, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.2992571630703926, |
|
"grad_norm": 2.4953181743621826, |
|
"learning_rate": 3.893333333333333e-06, |
|
"loss": 0.0947, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3169437566324724, |
|
"grad_norm": 2.287153959274292, |
|
"learning_rate": 3.837777777777778e-06, |
|
"loss": 0.0829, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.3346303501945527, |
|
"grad_norm": 3.0835964679718018, |
|
"learning_rate": 3.782222222222223e-06, |
|
"loss": 0.0986, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3523169437566325, |
|
"grad_norm": 3.700576066970825, |
|
"learning_rate": 3.726666666666667e-06, |
|
"loss": 0.0938, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.3700035373187123, |
|
"grad_norm": 2.685814380645752, |
|
"learning_rate": 3.6711111111111113e-06, |
|
"loss": 0.0912, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.3876901308807925, |
|
"grad_norm": 2.80168080329895, |
|
"learning_rate": 3.615555555555556e-06, |
|
"loss": 0.1046, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.4053767244428723, |
|
"grad_norm": 1.993931531906128, |
|
"learning_rate": 3.5600000000000002e-06, |
|
"loss": 0.0956, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.423063318004952, |
|
"grad_norm": 2.2069132328033447, |
|
"learning_rate": 3.5044444444444447e-06, |
|
"loss": 0.0897, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.4407499115670324, |
|
"grad_norm": 1.9554574489593506, |
|
"learning_rate": 3.4488888888888896e-06, |
|
"loss": 0.0916, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.458436505129112, |
|
"grad_norm": 2.17236590385437, |
|
"learning_rate": 3.3933333333333336e-06, |
|
"loss": 0.0914, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.476123098691192, |
|
"grad_norm": 1.3875476121902466, |
|
"learning_rate": 3.337777777777778e-06, |
|
"loss": 0.0841, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.493809692253272, |
|
"grad_norm": 2.773275852203369, |
|
"learning_rate": 3.282222222222223e-06, |
|
"loss": 0.0776, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.511496285815352, |
|
"grad_norm": 2.6522185802459717, |
|
"learning_rate": 3.226666666666667e-06, |
|
"loss": 0.1017, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.529182879377432, |
|
"grad_norm": 2.5875155925750732, |
|
"learning_rate": 3.1711111111111114e-06, |
|
"loss": 0.0873, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.5468694729395116, |
|
"grad_norm": 2.877279281616211, |
|
"learning_rate": 3.1155555555555555e-06, |
|
"loss": 0.1071, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.564556066501592, |
|
"grad_norm": 2.6924633979797363, |
|
"learning_rate": 3.0600000000000003e-06, |
|
"loss": 0.0885, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.5822426600636716, |
|
"grad_norm": 3.1496784687042236, |
|
"learning_rate": 3.004444444444445e-06, |
|
"loss": 0.0875, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.599929253625752, |
|
"grad_norm": 2.8723342418670654, |
|
"learning_rate": 2.948888888888889e-06, |
|
"loss": 0.0849, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.6176158471878317, |
|
"grad_norm": 2.9840595722198486, |
|
"learning_rate": 2.8933333333333337e-06, |
|
"loss": 0.085, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.6353024407499115, |
|
"grad_norm": 2.006192684173584, |
|
"learning_rate": 2.837777777777778e-06, |
|
"loss": 0.082, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.6529890343119913, |
|
"grad_norm": 2.020374298095703, |
|
"learning_rate": 2.7822222222222222e-06, |
|
"loss": 0.083, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.6706756278740715, |
|
"grad_norm": 2.8968160152435303, |
|
"learning_rate": 2.726666666666667e-06, |
|
"loss": 0.0896, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.6883622214361513, |
|
"grad_norm": 2.3445401191711426, |
|
"learning_rate": 2.6711111111111116e-06, |
|
"loss": 0.0956, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.7060488149982316, |
|
"grad_norm": 2.18906307220459, |
|
"learning_rate": 2.6155555555555556e-06, |
|
"loss": 0.0968, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.7237354085603114, |
|
"grad_norm": 3.4939792156219482, |
|
"learning_rate": 2.56e-06, |
|
"loss": 0.0807, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.741422002122391, |
|
"grad_norm": 2.769930839538574, |
|
"learning_rate": 2.504444444444445e-06, |
|
"loss": 0.0807, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.759108595684471, |
|
"grad_norm": 2.675159215927124, |
|
"learning_rate": 2.448888888888889e-06, |
|
"loss": 0.0852, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.776795189246551, |
|
"grad_norm": 1.9144662618637085, |
|
"learning_rate": 2.3933333333333334e-06, |
|
"loss": 0.0827, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.794481782808631, |
|
"grad_norm": 2.1294009685516357, |
|
"learning_rate": 2.337777777777778e-06, |
|
"loss": 0.0723, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8121683763707113, |
|
"grad_norm": 2.5433287620544434, |
|
"learning_rate": 2.2822222222222223e-06, |
|
"loss": 0.0895, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.829854969932791, |
|
"grad_norm": 2.70267391204834, |
|
"learning_rate": 2.226666666666667e-06, |
|
"loss": 0.0973, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.829854969932791, |
|
"eval_loss": 0.24439197778701782, |
|
"eval_runtime": 4587.2812, |
|
"eval_samples_per_second": 2.346, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 0.14902605022295237, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.847541563494871, |
|
"grad_norm": 2.82698392868042, |
|
"learning_rate": 2.1711111111111113e-06, |
|
"loss": 0.087, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.8652281570569507, |
|
"grad_norm": 2.4205260276794434, |
|
"learning_rate": 2.1155555555555557e-06, |
|
"loss": 0.073, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.882914750619031, |
|
"grad_norm": 2.8398914337158203, |
|
"learning_rate": 2.06e-06, |
|
"loss": 0.0829, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.9006013441811107, |
|
"grad_norm": 2.544459581375122, |
|
"learning_rate": 2.0044444444444446e-06, |
|
"loss": 0.0944, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.9182879377431905, |
|
"grad_norm": 3.273761034011841, |
|
"learning_rate": 1.948888888888889e-06, |
|
"loss": 0.0894, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.9359745313052708, |
|
"grad_norm": 2.9628796577453613, |
|
"learning_rate": 1.8933333333333333e-06, |
|
"loss": 0.0779, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.9536611248673506, |
|
"grad_norm": 2.375798225402832, |
|
"learning_rate": 1.837777777777778e-06, |
|
"loss": 0.0887, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.9713477184294304, |
|
"grad_norm": 3.536142587661743, |
|
"learning_rate": 1.7822222222222225e-06, |
|
"loss": 0.0802, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.9890343119915106, |
|
"grad_norm": 2.959638833999634, |
|
"learning_rate": 1.7266666666666667e-06, |
|
"loss": 0.0736, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 3.0067209055535904, |
|
"grad_norm": 1.6278612613677979, |
|
"learning_rate": 1.6711111111111112e-06, |
|
"loss": 0.0726, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.02440749911567, |
|
"grad_norm": 2.070136308670044, |
|
"learning_rate": 1.6155555555555559e-06, |
|
"loss": 0.0342, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 3.0420940926777504, |
|
"grad_norm": 1.3914729356765747, |
|
"learning_rate": 1.56e-06, |
|
"loss": 0.0358, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.0597806862398302, |
|
"grad_norm": 2.264988422393799, |
|
"learning_rate": 1.5044444444444446e-06, |
|
"loss": 0.0333, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 3.07746727980191, |
|
"grad_norm": 2.115565538406372, |
|
"learning_rate": 1.4488888888888892e-06, |
|
"loss": 0.0441, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.0951538733639903, |
|
"grad_norm": 1.555413007736206, |
|
"learning_rate": 1.3933333333333335e-06, |
|
"loss": 0.0333, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.11284046692607, |
|
"grad_norm": 1.6975215673446655, |
|
"learning_rate": 1.337777777777778e-06, |
|
"loss": 0.0347, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.13052706048815, |
|
"grad_norm": 1.0206265449523926, |
|
"learning_rate": 1.2822222222222222e-06, |
|
"loss": 0.0288, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 3.14821365405023, |
|
"grad_norm": 0.8475884199142456, |
|
"learning_rate": 1.2266666666666666e-06, |
|
"loss": 0.0282, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.16590024761231, |
|
"grad_norm": 1.2225841283798218, |
|
"learning_rate": 1.171111111111111e-06, |
|
"loss": 0.0419, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 3.1835868411743897, |
|
"grad_norm": 1.5350502729415894, |
|
"learning_rate": 1.1155555555555558e-06, |
|
"loss": 0.033, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.2012734347364695, |
|
"grad_norm": 1.7746856212615967, |
|
"learning_rate": 1.06e-06, |
|
"loss": 0.0314, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.2189600282985498, |
|
"grad_norm": 0.9352214932441711, |
|
"learning_rate": 1.0044444444444445e-06, |
|
"loss": 0.0309, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.2366466218606296, |
|
"grad_norm": 1.1960421800613403, |
|
"learning_rate": 9.488888888888889e-07, |
|
"loss": 0.0432, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.25433321542271, |
|
"grad_norm": 2.057264566421509, |
|
"learning_rate": 8.933333333333334e-07, |
|
"loss": 0.0361, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.2720198089847896, |
|
"grad_norm": 1.3137741088867188, |
|
"learning_rate": 8.37777777777778e-07, |
|
"loss": 0.0274, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.2897064025468694, |
|
"grad_norm": 2.9971253871917725, |
|
"learning_rate": 7.822222222222223e-07, |
|
"loss": 0.0334, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.307392996108949, |
|
"grad_norm": 1.1959739923477173, |
|
"learning_rate": 7.266666666666668e-07, |
|
"loss": 0.0321, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.3250795896710295, |
|
"grad_norm": 1.4771987199783325, |
|
"learning_rate": 6.711111111111111e-07, |
|
"loss": 0.0304, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.3427661832331093, |
|
"grad_norm": 1.407965898513794, |
|
"learning_rate": 6.155555555555556e-07, |
|
"loss": 0.0358, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.360452776795189, |
|
"grad_norm": 1.603348970413208, |
|
"learning_rate": 5.6e-07, |
|
"loss": 0.0253, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.3781393703572693, |
|
"grad_norm": 0.6914874911308289, |
|
"learning_rate": 5.044444444444445e-07, |
|
"loss": 0.0314, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.395825963919349, |
|
"grad_norm": 0.9917079210281372, |
|
"learning_rate": 4.488888888888889e-07, |
|
"loss": 0.0366, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.413512557481429, |
|
"grad_norm": 1.6925675868988037, |
|
"learning_rate": 3.9333333333333336e-07, |
|
"loss": 0.0325, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.431199151043509, |
|
"grad_norm": 1.986444115638733, |
|
"learning_rate": 3.3777777777777777e-07, |
|
"loss": 0.0281, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.448885744605589, |
|
"grad_norm": 0.704007089138031, |
|
"learning_rate": 2.822222222222222e-07, |
|
"loss": 0.0313, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.4665723381676687, |
|
"grad_norm": 1.0404950380325317, |
|
"learning_rate": 2.266666666666667e-07, |
|
"loss": 0.0311, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.484258931729749, |
|
"grad_norm": 1.7548632621765137, |
|
"learning_rate": 1.7111111111111114e-07, |
|
"loss": 0.029, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.501945525291829, |
|
"grad_norm": 2.3233606815338135, |
|
"learning_rate": 1.1555555555555556e-07, |
|
"loss": 0.0364, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.5196321188539086, |
|
"grad_norm": 1.7240501642227173, |
|
"learning_rate": 6.000000000000001e-08, |
|
"loss": 0.035, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.537318712415989, |
|
"grad_norm": 0.9116590023040771, |
|
"learning_rate": 4.444444444444445e-09, |
|
"loss": 0.0303, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.537318712415989, |
|
"eval_loss": 0.27435705065727234, |
|
"eval_runtime": 4637.7857, |
|
"eval_samples_per_second": 2.321, |
|
"eval_steps_per_second": 0.145, |
|
"eval_wer": 0.14744191504341703, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.537318712415989, |
|
"step": 5000, |
|
"total_flos": 5.435079965953229e+20, |
|
"train_loss": 0.26693406739234926, |
|
"train_runtime": 74036.7422, |
|
"train_samples_per_second": 2.161, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.435079965953229e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|