|
{ |
|
"best_metric": 29.697292972396323, |
|
"best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-6000", |
|
"epoch": 1.9286403085824495, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 48.035465240478516, |
|
"learning_rate": 2.5e-06, |
|
"loss": 6.8285, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 142.98660278320312, |
|
"learning_rate": 5e-06, |
|
"loss": 3.2493, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 27.529985427856445, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.2763, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 21.324289321899414, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7193, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.650047302246094, |
|
"learning_rate": 9.91697110594487e-06, |
|
"loss": 1.3177, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 14.939111709594727, |
|
"learning_rate": 9.83394221188974e-06, |
|
"loss": 1.2486, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 14.85805606842041, |
|
"learning_rate": 9.750913317834608e-06, |
|
"loss": 1.1991, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 15.52128791809082, |
|
"learning_rate": 9.667884423779476e-06, |
|
"loss": 1.1126, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 22.642026901245117, |
|
"learning_rate": 9.584855529724345e-06, |
|
"loss": 1.0437, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 16.712217330932617, |
|
"learning_rate": 9.501826635669213e-06, |
|
"loss": 0.9986, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 13.973222732543945, |
|
"learning_rate": 9.418797741614083e-06, |
|
"loss": 0.9188, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 13.404074668884277, |
|
"learning_rate": 9.335768847558952e-06, |
|
"loss": 0.9244, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 12.21960163116455, |
|
"learning_rate": 9.25273995350382e-06, |
|
"loss": 0.9018, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 14.460400581359863, |
|
"learning_rate": 9.169711059448689e-06, |
|
"loss": 0.8398, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 14.076154708862305, |
|
"learning_rate": 9.086682165393557e-06, |
|
"loss": 0.8828, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 13.204269409179688, |
|
"learning_rate": 9.003653271338426e-06, |
|
"loss": 0.8503, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.978958129882812, |
|
"learning_rate": 8.920624377283296e-06, |
|
"loss": 0.8198, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 13.963995933532715, |
|
"learning_rate": 8.837595483228164e-06, |
|
"loss": 0.8202, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 13.337563514709473, |
|
"learning_rate": 8.754566589173033e-06, |
|
"loss": 0.7536, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 12.711252212524414, |
|
"learning_rate": 8.671537695117903e-06, |
|
"loss": 0.7938, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_cer": 55.83411121482864, |
|
"eval_loss": 0.7767874002456665, |
|
"eval_runtime": 1802.202, |
|
"eval_samples_per_second": 2.461, |
|
"eval_steps_per_second": 0.308, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 13.947765350341797, |
|
"learning_rate": 8.588508801062771e-06, |
|
"loss": 0.7784, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.999704360961914, |
|
"learning_rate": 8.50547990700764e-06, |
|
"loss": 0.7646, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.103652954101562, |
|
"learning_rate": 8.422451012952508e-06, |
|
"loss": 0.7001, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 13.490057945251465, |
|
"learning_rate": 8.339422118897376e-06, |
|
"loss": 0.7941, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 13.992444038391113, |
|
"learning_rate": 8.256393224842247e-06, |
|
"loss": 0.6561, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 14.403618812561035, |
|
"learning_rate": 8.173364330787115e-06, |
|
"loss": 0.6618, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 12.38306713104248, |
|
"learning_rate": 8.090335436731984e-06, |
|
"loss": 0.7515, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 13.88232135772705, |
|
"learning_rate": 8.007306542676852e-06, |
|
"loss": 0.6823, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 14.506720542907715, |
|
"learning_rate": 7.92427764862172e-06, |
|
"loss": 0.662, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.101289749145508, |
|
"learning_rate": 7.841248754566589e-06, |
|
"loss": 0.6974, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.762197494506836, |
|
"learning_rate": 7.758219860511459e-06, |
|
"loss": 0.6643, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 14.123621940612793, |
|
"learning_rate": 7.675190966456327e-06, |
|
"loss": 0.6878, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 13.668756484985352, |
|
"learning_rate": 7.592162072401196e-06, |
|
"loss": 0.6102, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 13.39156723022461, |
|
"learning_rate": 7.509133178346065e-06, |
|
"loss": 0.6133, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 11.687459945678711, |
|
"learning_rate": 7.426104284290934e-06, |
|
"loss": 0.6139, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 14.004112243652344, |
|
"learning_rate": 7.343075390235803e-06, |
|
"loss": 0.6257, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 13.317120552062988, |
|
"learning_rate": 7.260046496180671e-06, |
|
"loss": 0.6616, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 13.344803810119629, |
|
"learning_rate": 7.17701760212554e-06, |
|
"loss": 0.6086, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 12.64527416229248, |
|
"learning_rate": 7.09398870807041e-06, |
|
"loss": 0.5734, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 9.928169250488281, |
|
"learning_rate": 7.0109598140152775e-06, |
|
"loss": 0.5845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_cer": 41.15215362048273, |
|
"eval_loss": 0.5947259068489075, |
|
"eval_runtime": 1807.1895, |
|
"eval_samples_per_second": 2.455, |
|
"eval_steps_per_second": 0.307, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 13.225513458251953, |
|
"learning_rate": 6.927930919960146e-06, |
|
"loss": 0.6458, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.804333686828613, |
|
"learning_rate": 6.844902025905016e-06, |
|
"loss": 0.5106, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.24815559387207, |
|
"learning_rate": 6.7618731318498845e-06, |
|
"loss": 0.523, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 11.681272506713867, |
|
"learning_rate": 6.678844237794753e-06, |
|
"loss": 0.5585, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 10.01819133758545, |
|
"learning_rate": 6.595815343739622e-06, |
|
"loss": 0.5943, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 11.715396881103516, |
|
"learning_rate": 6.512786449684491e-06, |
|
"loss": 0.5472, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 10.607870101928711, |
|
"learning_rate": 6.429757555629359e-06, |
|
"loss": 0.5579, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 12.249415397644043, |
|
"learning_rate": 6.3467286615742285e-06, |
|
"loss": 0.5269, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 12.76510238647461, |
|
"learning_rate": 6.263699767519097e-06, |
|
"loss": 0.5273, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.935369491577148, |
|
"learning_rate": 6.180670873463966e-06, |
|
"loss": 0.525, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 11.15725040435791, |
|
"learning_rate": 6.097641979408835e-06, |
|
"loss": 0.4792, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 11.681845664978027, |
|
"learning_rate": 6.014613085353703e-06, |
|
"loss": 0.5462, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 13.019536972045898, |
|
"learning_rate": 5.931584191298572e-06, |
|
"loss": 0.4996, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 11.789406776428223, |
|
"learning_rate": 5.848555297243441e-06, |
|
"loss": 0.5088, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 13.898345947265625, |
|
"learning_rate": 5.765526403188309e-06, |
|
"loss": 0.5069, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 11.257216453552246, |
|
"learning_rate": 5.682497509133179e-06, |
|
"loss": 0.4942, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 11.38137149810791, |
|
"learning_rate": 5.599468615078048e-06, |
|
"loss": 0.4532, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 10.852495193481445, |
|
"learning_rate": 5.516439721022916e-06, |
|
"loss": 0.5231, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 14.178400039672852, |
|
"learning_rate": 5.4334108269677856e-06, |
|
"loss": 0.5041, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 11.18582534790039, |
|
"learning_rate": 5.350381932912654e-06, |
|
"loss": 0.459, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_cer": 37.618349113215096, |
|
"eval_loss": 0.5131608247756958, |
|
"eval_runtime": 1799.4249, |
|
"eval_samples_per_second": 2.465, |
|
"eval_steps_per_second": 0.308, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 9.844304084777832, |
|
"learning_rate": 5.267353038857523e-06, |
|
"loss": 0.5035, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 11.165616035461426, |
|
"learning_rate": 5.184324144802392e-06, |
|
"loss": 0.449, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 10.43535327911377, |
|
"learning_rate": 5.10129525074726e-06, |
|
"loss": 0.4471, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.737510681152344, |
|
"learning_rate": 5.0182663566921295e-06, |
|
"loss": 0.4779, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.221022605895996, |
|
"learning_rate": 4.935237462636998e-06, |
|
"loss": 0.4266, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 7.031712532043457, |
|
"learning_rate": 4.852208568581867e-06, |
|
"loss": 0.4355, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 10.13843822479248, |
|
"learning_rate": 4.769179674526736e-06, |
|
"loss": 0.4506, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 14.100777626037598, |
|
"learning_rate": 4.686150780471604e-06, |
|
"loss": 0.4484, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 11.218331336975098, |
|
"learning_rate": 4.603121886416473e-06, |
|
"loss": 0.4637, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 9.891203880310059, |
|
"learning_rate": 4.520092992361343e-06, |
|
"loss": 0.4142, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 9.585916519165039, |
|
"learning_rate": 4.437064098306211e-06, |
|
"loss": 0.4202, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 10.81905460357666, |
|
"learning_rate": 4.35403520425108e-06, |
|
"loss": 0.4459, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 13.257423400878906, |
|
"learning_rate": 4.271006310195949e-06, |
|
"loss": 0.448, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 9.057276725769043, |
|
"learning_rate": 4.187977416140817e-06, |
|
"loss": 0.4043, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 11.002601623535156, |
|
"learning_rate": 4.104948522085686e-06, |
|
"loss": 0.4011, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 15.421494483947754, |
|
"learning_rate": 4.021919628030555e-06, |
|
"loss": 0.4208, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 12.186066627502441, |
|
"learning_rate": 3.938890733975424e-06, |
|
"loss": 0.389, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 8.680899620056152, |
|
"learning_rate": 3.855861839920293e-06, |
|
"loss": 0.4189, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 10.597740173339844, |
|
"learning_rate": 3.7728329458651612e-06, |
|
"loss": 0.3654, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 10.675308227539062, |
|
"learning_rate": 3.6898040518100305e-06, |
|
"loss": 0.3512, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_cer": 35.404720629417255, |
|
"eval_loss": 0.4709227383136749, |
|
"eval_runtime": 1802.3985, |
|
"eval_samples_per_second": 2.461, |
|
"eval_steps_per_second": 0.308, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 9.923101425170898, |
|
"learning_rate": 3.6067751577548985e-06, |
|
"loss": 0.4361, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.01765251159668, |
|
"learning_rate": 3.523746263699768e-06, |
|
"loss": 0.3896, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 11.23643684387207, |
|
"learning_rate": 3.4407173696446367e-06, |
|
"loss": 0.3453, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 9.193674087524414, |
|
"learning_rate": 3.3576884755895056e-06, |
|
"loss": 0.3888, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.438018798828125, |
|
"learning_rate": 3.274659581534374e-06, |
|
"loss": 0.3798, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 9.949082374572754, |
|
"learning_rate": 3.191630687479243e-06, |
|
"loss": 0.3828, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 12.07507610321045, |
|
"learning_rate": 3.1086017934241117e-06, |
|
"loss": 0.4027, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 12.350488662719727, |
|
"learning_rate": 3.025572899368981e-06, |
|
"loss": 0.3791, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 12.681595802307129, |
|
"learning_rate": 2.9425440053138495e-06, |
|
"loss": 0.3863, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 13.789870262145996, |
|
"learning_rate": 2.8595151112587184e-06, |
|
"loss": 0.3688, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 11.885881423950195, |
|
"learning_rate": 2.7764862172035872e-06, |
|
"loss": 0.3661, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 10.707484245300293, |
|
"learning_rate": 2.6934573231484557e-06, |
|
"loss": 0.3731, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 12.371014595031738, |
|
"learning_rate": 2.6104284290933245e-06, |
|
"loss": 0.3651, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 12.415855407714844, |
|
"learning_rate": 2.5273995350381934e-06, |
|
"loss": 0.3529, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 12.046368598937988, |
|
"learning_rate": 2.4443706409830623e-06, |
|
"loss": 0.3565, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 10.2451810836792, |
|
"learning_rate": 2.361341746927931e-06, |
|
"loss": 0.3337, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 7.761926174163818, |
|
"learning_rate": 2.2783128528728e-06, |
|
"loss": 0.3636, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.736420631408691, |
|
"learning_rate": 2.1952839588176684e-06, |
|
"loss": 0.346, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.760013580322266, |
|
"learning_rate": 2.1122550647625377e-06, |
|
"loss": 0.3535, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.893476486206055, |
|
"learning_rate": 2.029226170707406e-06, |
|
"loss": 0.3758, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_cer": 33.57781037471663, |
|
"eval_loss": 0.43632233142852783, |
|
"eval_runtime": 1812.7817, |
|
"eval_samples_per_second": 2.447, |
|
"eval_steps_per_second": 0.306, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.384421348571777, |
|
"learning_rate": 1.946197276652275e-06, |
|
"loss": 0.3466, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 9.347311973571777, |
|
"learning_rate": 1.863168382597144e-06, |
|
"loss": 0.3558, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.740177154541016, |
|
"learning_rate": 1.7801394885420128e-06, |
|
"loss": 0.4067, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 10.038185119628906, |
|
"learning_rate": 1.6971105944868814e-06, |
|
"loss": 0.3431, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.289875984191895, |
|
"learning_rate": 1.6140817004317505e-06, |
|
"loss": 0.3821, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 11.42772388458252, |
|
"learning_rate": 1.5310528063766192e-06, |
|
"loss": 0.3611, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 8.776933670043945, |
|
"learning_rate": 1.4480239123214878e-06, |
|
"loss": 0.3241, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 14.44870376586914, |
|
"learning_rate": 1.364995018266357e-06, |
|
"loss": 0.3659, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.697235584259033, |
|
"learning_rate": 1.2819661242112256e-06, |
|
"loss": 0.317, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.33436107635498, |
|
"learning_rate": 1.1989372301560944e-06, |
|
"loss": 0.3243, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 7.878904819488525, |
|
"learning_rate": 1.115908336100963e-06, |
|
"loss": 0.2832, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.996261596679688, |
|
"learning_rate": 1.032879442045832e-06, |
|
"loss": 0.3585, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 10.357467651367188, |
|
"learning_rate": 9.498505479907008e-07, |
|
"loss": 0.3256, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.002203941345215, |
|
"learning_rate": 8.668216539355696e-07, |
|
"loss": 0.3459, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.587177276611328, |
|
"learning_rate": 7.837927598804385e-07, |
|
"loss": 0.3161, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.054004192352295, |
|
"learning_rate": 7.007638658253073e-07, |
|
"loss": 0.3362, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.065168380737305, |
|
"learning_rate": 6.177349717701761e-07, |
|
"loss": 0.3107, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.83284854888916, |
|
"learning_rate": 5.347060777150448e-07, |
|
"loss": 0.3301, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 11.811662673950195, |
|
"learning_rate": 4.5167718365991366e-07, |
|
"loss": 0.3792, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 11.07596206665039, |
|
"learning_rate": 3.686482896047825e-07, |
|
"loss": 0.3191, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_cer": 32.611014801973596, |
|
"eval_loss": 0.4216199815273285, |
|
"eval_runtime": 1822.6334, |
|
"eval_samples_per_second": 2.434, |
|
"eval_steps_per_second": 0.305, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.368192672729492, |
|
"learning_rate": 5.222149624305782e-06, |
|
"loss": 0.3044, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 13.727489471435547, |
|
"learning_rate": 5.181313296308397e-06, |
|
"loss": 0.3297, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.884183883666992, |
|
"learning_rate": 5.14047696831101e-06, |
|
"loss": 0.3753, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 10.073676109313965, |
|
"learning_rate": 5.0996406403136236e-06, |
|
"loss": 0.3247, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.34837532043457, |
|
"learning_rate": 5.0588043123162365e-06, |
|
"loss": 0.303, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 8.949431419372559, |
|
"learning_rate": 5.01796798431885e-06, |
|
"loss": 0.2329, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 7.911171913146973, |
|
"learning_rate": 4.977131656321464e-06, |
|
"loss": 0.2096, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 6.246947288513184, |
|
"learning_rate": 4.936295328324078e-06, |
|
"loss": 0.2208, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 7.6554059982299805, |
|
"learning_rate": 4.895459000326691e-06, |
|
"loss": 0.2508, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 9.491788864135742, |
|
"learning_rate": 4.8546226723293045e-06, |
|
"loss": 0.2274, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 9.15794849395752, |
|
"learning_rate": 4.813786344331918e-06, |
|
"loss": 0.2257, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 9.842211723327637, |
|
"learning_rate": 4.772950016334531e-06, |
|
"loss": 0.227, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 7.734405040740967, |
|
"learning_rate": 4.732113688337145e-06, |
|
"loss": 0.2207, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 8.951905250549316, |
|
"learning_rate": 4.691277360339759e-06, |
|
"loss": 0.2307, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 7.3573222160339355, |
|
"learning_rate": 4.650441032342372e-06, |
|
"loss": 0.2233, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 8.47739315032959, |
|
"learning_rate": 4.6096047043449855e-06, |
|
"loss": 0.2361, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 6.945776462554932, |
|
"learning_rate": 4.568768376347599e-06, |
|
"loss": 0.2353, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 8.67324161529541, |
|
"learning_rate": 4.527932048350212e-06, |
|
"loss": 0.2302, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 6.912210464477539, |
|
"learning_rate": 4.487095720352827e-06, |
|
"loss": 0.2292, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 8.434404373168945, |
|
"learning_rate": 4.44625939235544e-06, |
|
"loss": 0.2295, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_cer": 32.49766635551407, |
|
"eval_loss": 0.42611706256866455, |
|
"eval_runtime": 1848.4809, |
|
"eval_samples_per_second": 2.4, |
|
"eval_steps_per_second": 0.3, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 9.02902603149414, |
|
"learning_rate": 4.405423064358053e-06, |
|
"loss": 0.2382, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 8.05671215057373, |
|
"learning_rate": 4.364586736360667e-06, |
|
"loss": 0.222, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 10.546473503112793, |
|
"learning_rate": 4.32375040836328e-06, |
|
"loss": 0.2131, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 8.884702682495117, |
|
"learning_rate": 4.282914080365894e-06, |
|
"loss": 0.1886, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 7.569803237915039, |
|
"learning_rate": 4.242077752368508e-06, |
|
"loss": 0.222, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 6.256328105926514, |
|
"learning_rate": 4.201241424371121e-06, |
|
"loss": 0.2083, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 6.724915027618408, |
|
"learning_rate": 4.1604050963737345e-06, |
|
"loss": 0.2452, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 11.219491004943848, |
|
"learning_rate": 4.119568768376348e-06, |
|
"loss": 0.2217, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 6.6789469718933105, |
|
"learning_rate": 4.078732440378961e-06, |
|
"loss": 0.1958, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 7.929986476898193, |
|
"learning_rate": 4.037896112381575e-06, |
|
"loss": 0.1863, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 8.032015800476074, |
|
"learning_rate": 3.997059784384189e-06, |
|
"loss": 0.2153, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 8.176934242248535, |
|
"learning_rate": 3.956223456386802e-06, |
|
"loss": 0.1971, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 10.322613716125488, |
|
"learning_rate": 3.9153871283894155e-06, |
|
"loss": 0.1991, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 7.837410926818848, |
|
"learning_rate": 3.874550800392029e-06, |
|
"loss": 0.2043, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 12.684860229492188, |
|
"learning_rate": 3.833714472394642e-06, |
|
"loss": 0.2008, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 7.476794242858887, |
|
"learning_rate": 3.7928781443972564e-06, |
|
"loss": 0.2199, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 4.45359992980957, |
|
"learning_rate": 3.7520418163998693e-06, |
|
"loss": 0.196, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 9.373842239379883, |
|
"learning_rate": 3.7112054884024835e-06, |
|
"loss": 0.1939, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 6.383950233459473, |
|
"learning_rate": 3.670369160405097e-06, |
|
"loss": 0.2066, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 5.862789154052734, |
|
"learning_rate": 3.6295328324077102e-06, |
|
"loss": 0.1806, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_cer": 31.99093212428324, |
|
"eval_loss": 0.4084797203540802, |
|
"eval_runtime": 1898.761, |
|
"eval_samples_per_second": 2.336, |
|
"eval_steps_per_second": 0.292, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 7.52218770980835, |
|
"learning_rate": 3.588696504410324e-06, |
|
"loss": 0.1955, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 8.115983963012695, |
|
"learning_rate": 3.5478601764129374e-06, |
|
"loss": 0.1802, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 10.009458541870117, |
|
"learning_rate": 3.5070238484155507e-06, |
|
"loss": 0.1866, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 8.48315715789795, |
|
"learning_rate": 3.4661875204181645e-06, |
|
"loss": 0.2193, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 7.425174713134766, |
|
"learning_rate": 3.425351192420778e-06, |
|
"loss": 0.2096, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 5.02262544631958, |
|
"learning_rate": 3.384514864423391e-06, |
|
"loss": 0.1892, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 7.6967010498046875, |
|
"learning_rate": 3.343678536426005e-06, |
|
"loss": 0.1869, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 7.067899703979492, |
|
"learning_rate": 3.3028422084286183e-06, |
|
"loss": 0.1842, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 9.152185440063477, |
|
"learning_rate": 3.262005880431232e-06, |
|
"loss": 0.1601, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 5.13536262512207, |
|
"learning_rate": 3.2211695524338455e-06, |
|
"loss": 0.1766, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 8.453483581542969, |
|
"learning_rate": 3.180333224436459e-06, |
|
"loss": 0.1775, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 8.888550758361816, |
|
"learning_rate": 3.1394968964390726e-06, |
|
"loss": 0.1966, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 6.241116046905518, |
|
"learning_rate": 3.098660568441686e-06, |
|
"loss": 0.1602, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 10.21055793762207, |
|
"learning_rate": 3.0578242404442993e-06, |
|
"loss": 0.188, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 6.382270812988281, |
|
"learning_rate": 3.016987912446913e-06, |
|
"loss": 0.1686, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 8.593984603881836, |
|
"learning_rate": 2.9761515844495264e-06, |
|
"loss": 0.1959, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 8.248409271240234, |
|
"learning_rate": 2.93531525645214e-06, |
|
"loss": 0.1848, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 7.425219535827637, |
|
"learning_rate": 2.8944789284547536e-06, |
|
"loss": 0.1711, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 6.594272613525391, |
|
"learning_rate": 2.853642600457367e-06, |
|
"loss": 0.1601, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 6.964175224304199, |
|
"learning_rate": 2.8128062724599807e-06, |
|
"loss": 0.16, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_cer": 31.170822776370184, |
|
"eval_loss": 0.3913029432296753, |
|
"eval_runtime": 1863.6532, |
|
"eval_samples_per_second": 2.38, |
|
"eval_steps_per_second": 0.298, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 7.741447448730469, |
|
"learning_rate": 2.771969944462594e-06, |
|
"loss": 0.1552, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 6.088663101196289, |
|
"learning_rate": 2.7311336164652074e-06, |
|
"loss": 0.1759, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 9.178170204162598, |
|
"learning_rate": 2.690297288467821e-06, |
|
"loss": 0.1635, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 9.006258010864258, |
|
"learning_rate": 2.6494609604704345e-06, |
|
"loss": 0.1986, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 7.974513530731201, |
|
"learning_rate": 2.608624632473048e-06, |
|
"loss": 0.1781, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 8.142216682434082, |
|
"learning_rate": 2.5677883044756617e-06, |
|
"loss": 0.1509, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 5.452117919921875, |
|
"learning_rate": 2.526951976478275e-06, |
|
"loss": 0.1682, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 6.821118354797363, |
|
"learning_rate": 2.4861156484808888e-06, |
|
"loss": 0.1559, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 9.8412446975708, |
|
"learning_rate": 2.4452793204835026e-06, |
|
"loss": 0.1519, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 7.750609874725342, |
|
"learning_rate": 2.404442992486116e-06, |
|
"loss": 0.1394, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 8.334457397460938, |
|
"learning_rate": 2.3636066644887293e-06, |
|
"loss": 0.1605, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 5.586342811584473, |
|
"learning_rate": 2.322770336491343e-06, |
|
"loss": 0.1622, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 8.146045684814453, |
|
"learning_rate": 2.2819340084939564e-06, |
|
"loss": 0.175, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 5.796145915985107, |
|
"learning_rate": 2.24109768049657e-06, |
|
"loss": 0.1702, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 8.852866172790527, |
|
"learning_rate": 2.2002613524991835e-06, |
|
"loss": 0.1522, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 7.19501256942749, |
|
"learning_rate": 2.159425024501797e-06, |
|
"loss": 0.1465, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 7.024486064910889, |
|
"learning_rate": 2.1185886965044107e-06, |
|
"loss": 0.1539, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 7.009402751922607, |
|
"learning_rate": 2.077752368507024e-06, |
|
"loss": 0.1442, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 4.041311264038086, |
|
"learning_rate": 2.0369160405096374e-06, |
|
"loss": 0.151, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 8.767594337463379, |
|
"learning_rate": 1.996079712512251e-06, |
|
"loss": 0.1603, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_cer": 30.38405120682758, |
|
"eval_loss": 0.383564829826355, |
|
"eval_runtime": 1857.7457, |
|
"eval_samples_per_second": 2.388, |
|
"eval_steps_per_second": 0.299, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 5.629392147064209, |
|
"learning_rate": 1.9552433845148645e-06, |
|
"loss": 0.1515, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 5.018975734710693, |
|
"learning_rate": 1.914407056517478e-06, |
|
"loss": 0.157, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 6.182131767272949, |
|
"learning_rate": 1.8735707285200916e-06, |
|
"loss": 0.13, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 9.000260353088379, |
|
"learning_rate": 1.8327344005227052e-06, |
|
"loss": 0.1726, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 6.853832244873047, |
|
"learning_rate": 1.7918980725253188e-06, |
|
"loss": 0.1451, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 5.68117618560791, |
|
"learning_rate": 1.7510617445279321e-06, |
|
"loss": 0.1518, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 4.632532119750977, |
|
"learning_rate": 1.7102254165305457e-06, |
|
"loss": 0.144, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 8.772269248962402, |
|
"learning_rate": 1.6693890885331592e-06, |
|
"loss": 0.1525, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 8.809287071228027, |
|
"learning_rate": 1.6285527605357728e-06, |
|
"loss": 0.132, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 7.337480545043945, |
|
"learning_rate": 1.5877164325383862e-06, |
|
"loss": 0.1549, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 5.269392013549805, |
|
"learning_rate": 1.5468801045409997e-06, |
|
"loss": 0.1524, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 7.877448558807373, |
|
"learning_rate": 1.5060437765436133e-06, |
|
"loss": 0.1421, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 6.454422950744629, |
|
"learning_rate": 1.4652074485462266e-06, |
|
"loss": 0.1377, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 7.873298645019531, |
|
"learning_rate": 1.4243711205488402e-06, |
|
"loss": 0.1446, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 6.6517486572265625, |
|
"learning_rate": 1.383534792551454e-06, |
|
"loss": 0.1482, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 9.937956809997559, |
|
"learning_rate": 1.3426984645540676e-06, |
|
"loss": 0.1306, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 4.228558540344238, |
|
"learning_rate": 1.301862136556681e-06, |
|
"loss": 0.1229, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 4.710421085357666, |
|
"learning_rate": 1.2610258085592945e-06, |
|
"loss": 0.1374, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 4.934779644012451, |
|
"learning_rate": 1.220189480561908e-06, |
|
"loss": 0.1321, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 9.244394302368164, |
|
"learning_rate": 1.1793531525645214e-06, |
|
"loss": 0.1343, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_cer": 30.15735431390852, |
|
"eval_loss": 0.3783666491508484, |
|
"eval_runtime": 1871.1459, |
|
"eval_samples_per_second": 2.371, |
|
"eval_steps_per_second": 0.297, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 7.236656188964844, |
|
"learning_rate": 1.138516824567135e-06, |
|
"loss": 0.1295, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 6.239099502563477, |
|
"learning_rate": 1.0976804965697485e-06, |
|
"loss": 0.1378, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 4.9148945808410645, |
|
"learning_rate": 1.056844168572362e-06, |
|
"loss": 0.1272, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 7.572327136993408, |
|
"learning_rate": 1.0160078405749757e-06, |
|
"loss": 0.1405, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 6.76165771484375, |
|
"learning_rate": 9.751715125775892e-07, |
|
"loss": 0.1351, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 10.984220504760742, |
|
"learning_rate": 9.343351845802026e-07, |
|
"loss": 0.1467, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 4.543166637420654, |
|
"learning_rate": 8.934988565828162e-07, |
|
"loss": 0.1175, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 8.191649436950684, |
|
"learning_rate": 8.526625285854297e-07, |
|
"loss": 0.1388, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 7.215826988220215, |
|
"learning_rate": 8.118262005880432e-07, |
|
"loss": 0.1354, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 5.940629959106445, |
|
"learning_rate": 7.709898725906567e-07, |
|
"loss": 0.1283, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 6.796767234802246, |
|
"learning_rate": 7.301535445932702e-07, |
|
"loss": 0.1274, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 8.403697967529297, |
|
"learning_rate": 6.893172165958838e-07, |
|
"loss": 0.1333, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 6.127229690551758, |
|
"learning_rate": 6.484808885984972e-07, |
|
"loss": 0.1406, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 7.16465950012207, |
|
"learning_rate": 6.076445606011108e-07, |
|
"loss": 0.132, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 5.777968406677246, |
|
"learning_rate": 5.668082326037243e-07, |
|
"loss": 0.1437, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 6.021764755249023, |
|
"learning_rate": 5.259719046063379e-07, |
|
"loss": 0.1203, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 5.480493068695068, |
|
"learning_rate": 4.851355766089514e-07, |
|
"loss": 0.1398, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 7.609493732452393, |
|
"learning_rate": 4.442992486115649e-07, |
|
"loss": 0.1274, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 5.910650730133057, |
|
"learning_rate": 4.034629206141784e-07, |
|
"loss": 0.1352, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 4.371640682220459, |
|
"learning_rate": 3.626265926167919e-07, |
|
"loss": 0.1265, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_cer": 29.697292972396323, |
|
"eval_loss": 0.37359777092933655, |
|
"eval_runtime": 1867.3275, |
|
"eval_samples_per_second": 2.376, |
|
"eval_steps_per_second": 0.297, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 6222, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 2.770419843072e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|