{ "best_metric": 29.697292972396323, "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-6000", "epoch": 1.9286403085824495, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 48.035465240478516, "learning_rate": 2.5e-06, "loss": 6.8285, "step": 25 }, { "epoch": 0.02, "grad_norm": 142.98660278320312, "learning_rate": 5e-06, "loss": 3.2493, "step": 50 }, { "epoch": 0.02, "grad_norm": 27.529985427856445, "learning_rate": 7.500000000000001e-06, "loss": 2.2763, "step": 75 }, { "epoch": 0.03, "grad_norm": 21.324289321899414, "learning_rate": 1e-05, "loss": 1.7193, "step": 100 }, { "epoch": 0.04, "grad_norm": 13.650047302246094, "learning_rate": 9.91697110594487e-06, "loss": 1.3177, "step": 125 }, { "epoch": 0.05, "grad_norm": 14.939111709594727, "learning_rate": 9.83394221188974e-06, "loss": 1.2486, "step": 150 }, { "epoch": 0.06, "grad_norm": 14.85805606842041, "learning_rate": 9.750913317834608e-06, "loss": 1.1991, "step": 175 }, { "epoch": 0.06, "grad_norm": 15.52128791809082, "learning_rate": 9.667884423779476e-06, "loss": 1.1126, "step": 200 }, { "epoch": 0.07, "grad_norm": 22.642026901245117, "learning_rate": 9.584855529724345e-06, "loss": 1.0437, "step": 225 }, { "epoch": 0.08, "grad_norm": 16.712217330932617, "learning_rate": 9.501826635669213e-06, "loss": 0.9986, "step": 250 }, { "epoch": 0.09, "grad_norm": 13.973222732543945, "learning_rate": 9.418797741614083e-06, "loss": 0.9188, "step": 275 }, { "epoch": 0.1, "grad_norm": 13.404074668884277, "learning_rate": 9.335768847558952e-06, "loss": 0.9244, "step": 300 }, { "epoch": 0.1, "grad_norm": 12.21960163116455, "learning_rate": 9.25273995350382e-06, "loss": 0.9018, "step": 325 }, { "epoch": 0.11, "grad_norm": 14.460400581359863, "learning_rate": 9.169711059448689e-06, "loss": 0.8398, "step": 350 }, { "epoch": 0.12, "grad_norm": 14.076154708862305, "learning_rate": 9.086682165393557e-06, "loss": 0.8828, "step": 375 }, { "epoch": 0.13, "grad_norm": 13.204269409179688, "learning_rate": 9.003653271338426e-06, "loss": 0.8503, "step": 400 }, { "epoch": 0.14, "grad_norm": 10.978958129882812, "learning_rate": 8.920624377283296e-06, "loss": 0.8198, "step": 425 }, { "epoch": 0.14, "grad_norm": 13.963995933532715, "learning_rate": 8.837595483228164e-06, "loss": 0.8202, "step": 450 }, { "epoch": 0.15, "grad_norm": 13.337563514709473, "learning_rate": 8.754566589173033e-06, "loss": 0.7536, "step": 475 }, { "epoch": 0.16, "grad_norm": 12.711252212524414, "learning_rate": 8.671537695117903e-06, "loss": 0.7938, "step": 500 }, { "epoch": 0.16, "eval_cer": 55.83411121482864, "eval_loss": 0.7767874002456665, "eval_runtime": 1802.202, "eval_samples_per_second": 2.461, "eval_steps_per_second": 0.308, "step": 500 }, { "epoch": 0.17, "grad_norm": 13.947765350341797, "learning_rate": 8.588508801062771e-06, "loss": 0.7784, "step": 525 }, { "epoch": 0.18, "grad_norm": 11.999704360961914, "learning_rate": 8.50547990700764e-06, "loss": 0.7646, "step": 550 }, { "epoch": 0.18, "grad_norm": 12.103652954101562, "learning_rate": 8.422451012952508e-06, "loss": 0.7001, "step": 575 }, { "epoch": 0.19, "grad_norm": 13.490057945251465, "learning_rate": 8.339422118897376e-06, "loss": 0.7941, "step": 600 }, { "epoch": 0.2, "grad_norm": 13.992444038391113, "learning_rate": 8.256393224842247e-06, "loss": 0.6561, "step": 625 }, { "epoch": 0.21, "grad_norm": 14.403618812561035, "learning_rate": 8.173364330787115e-06, "loss": 0.6618, "step": 650 }, { "epoch": 0.22, "grad_norm": 12.38306713104248, "learning_rate": 8.090335436731984e-06, "loss": 0.7515, "step": 675 }, { "epoch": 0.23, "grad_norm": 13.88232135772705, "learning_rate": 8.007306542676852e-06, "loss": 0.6823, "step": 700 }, { "epoch": 0.23, "grad_norm": 14.506720542907715, "learning_rate": 7.92427764862172e-06, "loss": 0.662, "step": 725 }, { "epoch": 0.24, "grad_norm": 11.101289749145508, "learning_rate": 7.841248754566589e-06, "loss": 0.6974, "step": 750 }, { "epoch": 0.25, "grad_norm": 10.762197494506836, "learning_rate": 7.758219860511459e-06, "loss": 0.6643, "step": 775 }, { "epoch": 0.26, "grad_norm": 14.123621940612793, "learning_rate": 7.675190966456327e-06, "loss": 0.6878, "step": 800 }, { "epoch": 0.27, "grad_norm": 13.668756484985352, "learning_rate": 7.592162072401196e-06, "loss": 0.6102, "step": 825 }, { "epoch": 0.27, "grad_norm": 13.39156723022461, "learning_rate": 7.509133178346065e-06, "loss": 0.6133, "step": 850 }, { "epoch": 0.28, "grad_norm": 11.687459945678711, "learning_rate": 7.426104284290934e-06, "loss": 0.6139, "step": 875 }, { "epoch": 0.29, "grad_norm": 14.004112243652344, "learning_rate": 7.343075390235803e-06, "loss": 0.6257, "step": 900 }, { "epoch": 0.3, "grad_norm": 13.317120552062988, "learning_rate": 7.260046496180671e-06, "loss": 0.6616, "step": 925 }, { "epoch": 0.31, "grad_norm": 13.344803810119629, "learning_rate": 7.17701760212554e-06, "loss": 0.6086, "step": 950 }, { "epoch": 0.31, "grad_norm": 12.64527416229248, "learning_rate": 7.09398870807041e-06, "loss": 0.5734, "step": 975 }, { "epoch": 0.32, "grad_norm": 9.928169250488281, "learning_rate": 7.0109598140152775e-06, "loss": 0.5845, "step": 1000 }, { "epoch": 0.32, "eval_cer": 41.15215362048273, "eval_loss": 0.5947259068489075, "eval_runtime": 1807.1895, "eval_samples_per_second": 2.455, "eval_steps_per_second": 0.307, "step": 1000 }, { "epoch": 0.33, "grad_norm": 13.225513458251953, "learning_rate": 6.927930919960146e-06, "loss": 0.6458, "step": 1025 }, { "epoch": 0.34, "grad_norm": 10.804333686828613, "learning_rate": 6.844902025905016e-06, "loss": 0.5106, "step": 1050 }, { "epoch": 0.35, "grad_norm": 10.24815559387207, "learning_rate": 6.7618731318498845e-06, "loss": 0.523, "step": 1075 }, { "epoch": 0.35, "grad_norm": 11.681272506713867, "learning_rate": 6.678844237794753e-06, "loss": 0.5585, "step": 1100 }, { "epoch": 0.36, "grad_norm": 10.01819133758545, "learning_rate": 6.595815343739622e-06, "loss": 0.5943, "step": 1125 }, { "epoch": 0.37, "grad_norm": 11.715396881103516, "learning_rate": 6.512786449684491e-06, "loss": 0.5472, "step": 1150 }, { "epoch": 0.38, "grad_norm": 10.607870101928711, "learning_rate": 6.429757555629359e-06, "loss": 0.5579, "step": 1175 }, { "epoch": 0.39, "grad_norm": 12.249415397644043, "learning_rate": 6.3467286615742285e-06, "loss": 0.5269, "step": 1200 }, { "epoch": 0.39, "grad_norm": 12.76510238647461, "learning_rate": 6.263699767519097e-06, "loss": 0.5273, "step": 1225 }, { "epoch": 0.4, "grad_norm": 8.935369491577148, "learning_rate": 6.180670873463966e-06, "loss": 0.525, "step": 1250 }, { "epoch": 0.41, "grad_norm": 11.15725040435791, "learning_rate": 6.097641979408835e-06, "loss": 0.4792, "step": 1275 }, { "epoch": 0.42, "grad_norm": 11.681845664978027, "learning_rate": 6.014613085353703e-06, "loss": 0.5462, "step": 1300 }, { "epoch": 0.43, "grad_norm": 13.019536972045898, "learning_rate": 5.931584191298572e-06, "loss": 0.4996, "step": 1325 }, { "epoch": 0.43, "grad_norm": 11.789406776428223, "learning_rate": 5.848555297243441e-06, "loss": 0.5088, "step": 1350 }, { "epoch": 0.44, "grad_norm": 13.898345947265625, "learning_rate": 5.765526403188309e-06, "loss": 0.5069, "step": 1375 }, { "epoch": 0.45, "grad_norm": 11.257216453552246, "learning_rate": 5.682497509133179e-06, "loss": 0.4942, "step": 1400 }, { "epoch": 0.46, "grad_norm": 11.38137149810791, "learning_rate": 5.599468615078048e-06, "loss": 0.4532, "step": 1425 }, { "epoch": 0.47, "grad_norm": 10.852495193481445, "learning_rate": 5.516439721022916e-06, "loss": 0.5231, "step": 1450 }, { "epoch": 0.47, "grad_norm": 14.178400039672852, "learning_rate": 5.4334108269677856e-06, "loss": 0.5041, "step": 1475 }, { "epoch": 0.48, "grad_norm": 11.18582534790039, "learning_rate": 5.350381932912654e-06, "loss": 0.459, "step": 1500 }, { "epoch": 0.48, "eval_cer": 37.618349113215096, "eval_loss": 0.5131608247756958, "eval_runtime": 1799.4249, "eval_samples_per_second": 2.465, "eval_steps_per_second": 0.308, "step": 1500 }, { "epoch": 0.49, "grad_norm": 9.844304084777832, "learning_rate": 5.267353038857523e-06, "loss": 0.5035, "step": 1525 }, { "epoch": 0.5, "grad_norm": 11.165616035461426, "learning_rate": 5.184324144802392e-06, "loss": 0.449, "step": 1550 }, { "epoch": 0.51, "grad_norm": 10.43535327911377, "learning_rate": 5.10129525074726e-06, "loss": 0.4471, "step": 1575 }, { "epoch": 0.51, "grad_norm": 9.737510681152344, "learning_rate": 5.0182663566921295e-06, "loss": 0.4779, "step": 1600 }, { "epoch": 0.52, "grad_norm": 10.221022605895996, "learning_rate": 4.935237462636998e-06, "loss": 0.4266, "step": 1625 }, { "epoch": 0.53, "grad_norm": 7.031712532043457, "learning_rate": 4.852208568581867e-06, "loss": 0.4355, "step": 1650 }, { "epoch": 0.54, "grad_norm": 10.13843822479248, "learning_rate": 4.769179674526736e-06, "loss": 0.4506, "step": 1675 }, { "epoch": 0.55, "grad_norm": 14.100777626037598, "learning_rate": 4.686150780471604e-06, "loss": 0.4484, "step": 1700 }, { "epoch": 0.55, "grad_norm": 11.218331336975098, "learning_rate": 4.603121886416473e-06, "loss": 0.4637, "step": 1725 }, { "epoch": 0.56, "grad_norm": 9.891203880310059, "learning_rate": 4.520092992361343e-06, "loss": 0.4142, "step": 1750 }, { "epoch": 0.57, "grad_norm": 9.585916519165039, "learning_rate": 4.437064098306211e-06, "loss": 0.4202, "step": 1775 }, { "epoch": 0.58, "grad_norm": 10.81905460357666, "learning_rate": 4.35403520425108e-06, "loss": 0.4459, "step": 1800 }, { "epoch": 0.59, "grad_norm": 13.257423400878906, "learning_rate": 4.271006310195949e-06, "loss": 0.448, "step": 1825 }, { "epoch": 0.59, "grad_norm": 9.057276725769043, "learning_rate": 4.187977416140817e-06, "loss": 0.4043, "step": 1850 }, { "epoch": 0.6, "grad_norm": 11.002601623535156, "learning_rate": 4.104948522085686e-06, "loss": 0.4011, "step": 1875 }, { "epoch": 0.61, "grad_norm": 15.421494483947754, "learning_rate": 4.021919628030555e-06, "loss": 0.4208, "step": 1900 }, { "epoch": 0.62, "grad_norm": 12.186066627502441, "learning_rate": 3.938890733975424e-06, "loss": 0.389, "step": 1925 }, { "epoch": 0.63, "grad_norm": 8.680899620056152, "learning_rate": 3.855861839920293e-06, "loss": 0.4189, "step": 1950 }, { "epoch": 0.63, "grad_norm": 10.597740173339844, "learning_rate": 3.7728329458651612e-06, "loss": 0.3654, "step": 1975 }, { "epoch": 0.64, "grad_norm": 10.675308227539062, "learning_rate": 3.6898040518100305e-06, "loss": 0.3512, "step": 2000 }, { "epoch": 0.64, "eval_cer": 35.404720629417255, "eval_loss": 0.4709227383136749, "eval_runtime": 1802.3985, "eval_samples_per_second": 2.461, "eval_steps_per_second": 0.308, "step": 2000 }, { "epoch": 0.65, "grad_norm": 9.923101425170898, "learning_rate": 3.6067751577548985e-06, "loss": 0.4361, "step": 2025 }, { "epoch": 0.66, "grad_norm": 9.01765251159668, "learning_rate": 3.523746263699768e-06, "loss": 0.3896, "step": 2050 }, { "epoch": 0.67, "grad_norm": 11.23643684387207, "learning_rate": 3.4407173696446367e-06, "loss": 0.3453, "step": 2075 }, { "epoch": 0.68, "grad_norm": 9.193674087524414, "learning_rate": 3.3576884755895056e-06, "loss": 0.3888, "step": 2100 }, { "epoch": 0.68, "grad_norm": 8.438018798828125, "learning_rate": 3.274659581534374e-06, "loss": 0.3798, "step": 2125 }, { "epoch": 0.69, "grad_norm": 9.949082374572754, "learning_rate": 3.191630687479243e-06, "loss": 0.3828, "step": 2150 }, { "epoch": 0.7, "grad_norm": 12.07507610321045, "learning_rate": 3.1086017934241117e-06, "loss": 0.4027, "step": 2175 }, { "epoch": 0.71, "grad_norm": 12.350488662719727, "learning_rate": 3.025572899368981e-06, "loss": 0.3791, "step": 2200 }, { "epoch": 0.72, "grad_norm": 12.681595802307129, "learning_rate": 2.9425440053138495e-06, "loss": 0.3863, "step": 2225 }, { "epoch": 0.72, "grad_norm": 13.789870262145996, "learning_rate": 2.8595151112587184e-06, "loss": 0.3688, "step": 2250 }, { "epoch": 0.73, "grad_norm": 11.885881423950195, "learning_rate": 2.7764862172035872e-06, "loss": 0.3661, "step": 2275 }, { "epoch": 0.74, "grad_norm": 10.707484245300293, "learning_rate": 2.6934573231484557e-06, "loss": 0.3731, "step": 2300 }, { "epoch": 0.75, "grad_norm": 12.371014595031738, "learning_rate": 2.6104284290933245e-06, "loss": 0.3651, "step": 2325 }, { "epoch": 0.76, "grad_norm": 12.415855407714844, "learning_rate": 2.5273995350381934e-06, "loss": 0.3529, "step": 2350 }, { "epoch": 0.76, "grad_norm": 12.046368598937988, "learning_rate": 2.4443706409830623e-06, "loss": 0.3565, "step": 2375 }, { "epoch": 0.77, "grad_norm": 10.2451810836792, "learning_rate": 2.361341746927931e-06, "loss": 0.3337, "step": 2400 }, { "epoch": 0.78, "grad_norm": 7.761926174163818, "learning_rate": 2.2783128528728e-06, "loss": 0.3636, "step": 2425 }, { "epoch": 0.79, "grad_norm": 9.736420631408691, "learning_rate": 2.1952839588176684e-06, "loss": 0.346, "step": 2450 }, { "epoch": 0.8, "grad_norm": 9.760013580322266, "learning_rate": 2.1122550647625377e-06, "loss": 0.3535, "step": 2475 }, { "epoch": 0.8, "grad_norm": 9.893476486206055, "learning_rate": 2.029226170707406e-06, "loss": 0.3758, "step": 2500 }, { "epoch": 0.8, "eval_cer": 33.57781037471663, "eval_loss": 0.43632233142852783, "eval_runtime": 1812.7817, "eval_samples_per_second": 2.447, "eval_steps_per_second": 0.306, "step": 2500 }, { "epoch": 0.81, "grad_norm": 11.384421348571777, "learning_rate": 1.946197276652275e-06, "loss": 0.3466, "step": 2525 }, { "epoch": 0.82, "grad_norm": 9.347311973571777, "learning_rate": 1.863168382597144e-06, "loss": 0.3558, "step": 2550 }, { "epoch": 0.83, "grad_norm": 9.740177154541016, "learning_rate": 1.7801394885420128e-06, "loss": 0.4067, "step": 2575 }, { "epoch": 0.84, "grad_norm": 10.038185119628906, "learning_rate": 1.6971105944868814e-06, "loss": 0.3431, "step": 2600 }, { "epoch": 0.84, "grad_norm": 8.289875984191895, "learning_rate": 1.6140817004317505e-06, "loss": 0.3821, "step": 2625 }, { "epoch": 0.85, "grad_norm": 11.42772388458252, "learning_rate": 1.5310528063766192e-06, "loss": 0.3611, "step": 2650 }, { "epoch": 0.86, "grad_norm": 8.776933670043945, "learning_rate": 1.4480239123214878e-06, "loss": 0.3241, "step": 2675 }, { "epoch": 0.87, "grad_norm": 14.44870376586914, "learning_rate": 1.364995018266357e-06, "loss": 0.3659, "step": 2700 }, { "epoch": 0.88, "grad_norm": 7.697235584259033, "learning_rate": 1.2819661242112256e-06, "loss": 0.317, "step": 2725 }, { "epoch": 0.88, "grad_norm": 9.33436107635498, "learning_rate": 1.1989372301560944e-06, "loss": 0.3243, "step": 2750 }, { "epoch": 0.89, "grad_norm": 7.878904819488525, "learning_rate": 1.115908336100963e-06, "loss": 0.2832, "step": 2775 }, { "epoch": 0.9, "grad_norm": 8.996261596679688, "learning_rate": 1.032879442045832e-06, "loss": 0.3585, "step": 2800 }, { "epoch": 0.91, "grad_norm": 10.357467651367188, "learning_rate": 9.498505479907008e-07, "loss": 0.3256, "step": 2825 }, { "epoch": 0.92, "grad_norm": 10.002203941345215, "learning_rate": 8.668216539355696e-07, "loss": 0.3459, "step": 2850 }, { "epoch": 0.92, "grad_norm": 10.587177276611328, "learning_rate": 7.837927598804385e-07, "loss": 0.3161, "step": 2875 }, { "epoch": 0.93, "grad_norm": 7.054004192352295, "learning_rate": 7.007638658253073e-07, "loss": 0.3362, "step": 2900 }, { "epoch": 0.94, "grad_norm": 10.065168380737305, "learning_rate": 6.177349717701761e-07, "loss": 0.3107, "step": 2925 }, { "epoch": 0.95, "grad_norm": 9.83284854888916, "learning_rate": 5.347060777150448e-07, "loss": 0.3301, "step": 2950 }, { "epoch": 0.96, "grad_norm": 11.811662673950195, "learning_rate": 4.5167718365991366e-07, "loss": 0.3792, "step": 2975 }, { "epoch": 0.96, "grad_norm": 11.07596206665039, "learning_rate": 3.686482896047825e-07, "loss": 0.3191, "step": 3000 }, { "epoch": 0.96, "eval_cer": 32.611014801973596, "eval_loss": 0.4216199815273285, "eval_runtime": 1822.6334, "eval_samples_per_second": 2.434, "eval_steps_per_second": 0.305, "step": 3000 }, { "epoch": 0.97, "grad_norm": 8.368192672729492, "learning_rate": 5.222149624305782e-06, "loss": 0.3044, "step": 3025 }, { "epoch": 0.98, "grad_norm": 13.727489471435547, "learning_rate": 5.181313296308397e-06, "loss": 0.3297, "step": 3050 }, { "epoch": 0.99, "grad_norm": 9.884183883666992, "learning_rate": 5.14047696831101e-06, "loss": 0.3753, "step": 3075 }, { "epoch": 1.0, "grad_norm": 10.073676109313965, "learning_rate": 5.0996406403136236e-06, "loss": 0.3247, "step": 3100 }, { "epoch": 1.0, "grad_norm": 9.34837532043457, "learning_rate": 5.0588043123162365e-06, "loss": 0.303, "step": 3125 }, { "epoch": 1.01, "grad_norm": 8.949431419372559, "learning_rate": 5.01796798431885e-06, "loss": 0.2329, "step": 3150 }, { "epoch": 1.02, "grad_norm": 7.911171913146973, "learning_rate": 4.977131656321464e-06, "loss": 0.2096, "step": 3175 }, { "epoch": 1.03, "grad_norm": 6.246947288513184, "learning_rate": 4.936295328324078e-06, "loss": 0.2208, "step": 3200 }, { "epoch": 1.04, "grad_norm": 7.6554059982299805, "learning_rate": 4.895459000326691e-06, "loss": 0.2508, "step": 3225 }, { "epoch": 1.04, "grad_norm": 9.491788864135742, "learning_rate": 4.8546226723293045e-06, "loss": 0.2274, "step": 3250 }, { "epoch": 1.05, "grad_norm": 9.15794849395752, "learning_rate": 4.813786344331918e-06, "loss": 0.2257, "step": 3275 }, { "epoch": 1.06, "grad_norm": 9.842211723327637, "learning_rate": 4.772950016334531e-06, "loss": 0.227, "step": 3300 }, { "epoch": 1.07, "grad_norm": 7.734405040740967, "learning_rate": 4.732113688337145e-06, "loss": 0.2207, "step": 3325 }, { "epoch": 1.08, "grad_norm": 8.951905250549316, "learning_rate": 4.691277360339759e-06, "loss": 0.2307, "step": 3350 }, { "epoch": 1.08, "grad_norm": 7.3573222160339355, "learning_rate": 4.650441032342372e-06, "loss": 0.2233, "step": 3375 }, { "epoch": 1.09, "grad_norm": 8.47739315032959, "learning_rate": 4.6096047043449855e-06, "loss": 0.2361, "step": 3400 }, { "epoch": 1.1, "grad_norm": 6.945776462554932, "learning_rate": 4.568768376347599e-06, "loss": 0.2353, "step": 3425 }, { "epoch": 1.11, "grad_norm": 8.67324161529541, "learning_rate": 4.527932048350212e-06, "loss": 0.2302, "step": 3450 }, { "epoch": 1.12, "grad_norm": 6.912210464477539, "learning_rate": 4.487095720352827e-06, "loss": 0.2292, "step": 3475 }, { "epoch": 1.13, "grad_norm": 8.434404373168945, "learning_rate": 4.44625939235544e-06, "loss": 0.2295, "step": 3500 }, { "epoch": 1.13, "eval_cer": 32.49766635551407, "eval_loss": 0.42611706256866455, "eval_runtime": 1848.4809, "eval_samples_per_second": 2.4, "eval_steps_per_second": 0.3, "step": 3500 }, { "epoch": 1.13, "grad_norm": 9.02902603149414, "learning_rate": 4.405423064358053e-06, "loss": 0.2382, "step": 3525 }, { "epoch": 1.14, "grad_norm": 8.05671215057373, "learning_rate": 4.364586736360667e-06, "loss": 0.222, "step": 3550 }, { "epoch": 1.15, "grad_norm": 10.546473503112793, "learning_rate": 4.32375040836328e-06, "loss": 0.2131, "step": 3575 }, { "epoch": 1.16, "grad_norm": 8.884702682495117, "learning_rate": 4.282914080365894e-06, "loss": 0.1886, "step": 3600 }, { "epoch": 1.17, "grad_norm": 7.569803237915039, "learning_rate": 4.242077752368508e-06, "loss": 0.222, "step": 3625 }, { "epoch": 1.17, "grad_norm": 6.256328105926514, "learning_rate": 4.201241424371121e-06, "loss": 0.2083, "step": 3650 }, { "epoch": 1.18, "grad_norm": 6.724915027618408, "learning_rate": 4.1604050963737345e-06, "loss": 0.2452, "step": 3675 }, { "epoch": 1.19, "grad_norm": 11.219491004943848, "learning_rate": 4.119568768376348e-06, "loss": 0.2217, "step": 3700 }, { "epoch": 1.2, "grad_norm": 6.6789469718933105, "learning_rate": 4.078732440378961e-06, "loss": 0.1958, "step": 3725 }, { "epoch": 1.21, "grad_norm": 7.929986476898193, "learning_rate": 4.037896112381575e-06, "loss": 0.1863, "step": 3750 }, { "epoch": 1.21, "grad_norm": 8.032015800476074, "learning_rate": 3.997059784384189e-06, "loss": 0.2153, "step": 3775 }, { "epoch": 1.22, "grad_norm": 8.176934242248535, "learning_rate": 3.956223456386802e-06, "loss": 0.1971, "step": 3800 }, { "epoch": 1.23, "grad_norm": 10.322613716125488, "learning_rate": 3.9153871283894155e-06, "loss": 0.1991, "step": 3825 }, { "epoch": 1.24, "grad_norm": 7.837410926818848, "learning_rate": 3.874550800392029e-06, "loss": 0.2043, "step": 3850 }, { "epoch": 1.25, "grad_norm": 12.684860229492188, "learning_rate": 3.833714472394642e-06, "loss": 0.2008, "step": 3875 }, { "epoch": 1.25, "grad_norm": 7.476794242858887, "learning_rate": 3.7928781443972564e-06, "loss": 0.2199, "step": 3900 }, { "epoch": 1.26, "grad_norm": 4.45359992980957, "learning_rate": 3.7520418163998693e-06, "loss": 0.196, "step": 3925 }, { "epoch": 1.27, "grad_norm": 9.373842239379883, "learning_rate": 3.7112054884024835e-06, "loss": 0.1939, "step": 3950 }, { "epoch": 1.28, "grad_norm": 6.383950233459473, "learning_rate": 3.670369160405097e-06, "loss": 0.2066, "step": 3975 }, { "epoch": 1.29, "grad_norm": 5.862789154052734, "learning_rate": 3.6295328324077102e-06, "loss": 0.1806, "step": 4000 }, { "epoch": 1.29, "eval_cer": 31.99093212428324, "eval_loss": 0.4084797203540802, "eval_runtime": 1898.761, "eval_samples_per_second": 2.336, "eval_steps_per_second": 0.292, "step": 4000 }, { "epoch": 1.29, "grad_norm": 7.52218770980835, "learning_rate": 3.588696504410324e-06, "loss": 0.1955, "step": 4025 }, { "epoch": 1.3, "grad_norm": 8.115983963012695, "learning_rate": 3.5478601764129374e-06, "loss": 0.1802, "step": 4050 }, { "epoch": 1.31, "grad_norm": 10.009458541870117, "learning_rate": 3.5070238484155507e-06, "loss": 0.1866, "step": 4075 }, { "epoch": 1.32, "grad_norm": 8.48315715789795, "learning_rate": 3.4661875204181645e-06, "loss": 0.2193, "step": 4100 }, { "epoch": 1.33, "grad_norm": 7.425174713134766, "learning_rate": 3.425351192420778e-06, "loss": 0.2096, "step": 4125 }, { "epoch": 1.33, "grad_norm": 5.02262544631958, "learning_rate": 3.384514864423391e-06, "loss": 0.1892, "step": 4150 }, { "epoch": 1.34, "grad_norm": 7.6967010498046875, "learning_rate": 3.343678536426005e-06, "loss": 0.1869, "step": 4175 }, { "epoch": 1.35, "grad_norm": 7.067899703979492, "learning_rate": 3.3028422084286183e-06, "loss": 0.1842, "step": 4200 }, { "epoch": 1.36, "grad_norm": 9.152185440063477, "learning_rate": 3.262005880431232e-06, "loss": 0.1601, "step": 4225 }, { "epoch": 1.37, "grad_norm": 5.13536262512207, "learning_rate": 3.2211695524338455e-06, "loss": 0.1766, "step": 4250 }, { "epoch": 1.37, "grad_norm": 8.453483581542969, "learning_rate": 3.180333224436459e-06, "loss": 0.1775, "step": 4275 }, { "epoch": 1.38, "grad_norm": 8.888550758361816, "learning_rate": 3.1394968964390726e-06, "loss": 0.1966, "step": 4300 }, { "epoch": 1.39, "grad_norm": 6.241116046905518, "learning_rate": 3.098660568441686e-06, "loss": 0.1602, "step": 4325 }, { "epoch": 1.4, "grad_norm": 10.21055793762207, "learning_rate": 3.0578242404442993e-06, "loss": 0.188, "step": 4350 }, { "epoch": 1.41, "grad_norm": 6.382270812988281, "learning_rate": 3.016987912446913e-06, "loss": 0.1686, "step": 4375 }, { "epoch": 1.41, "grad_norm": 8.593984603881836, "learning_rate": 2.9761515844495264e-06, "loss": 0.1959, "step": 4400 }, { "epoch": 1.42, "grad_norm": 8.248409271240234, "learning_rate": 2.93531525645214e-06, "loss": 0.1848, "step": 4425 }, { "epoch": 1.43, "grad_norm": 7.425219535827637, "learning_rate": 2.8944789284547536e-06, "loss": 0.1711, "step": 4450 }, { "epoch": 1.44, "grad_norm": 6.594272613525391, "learning_rate": 2.853642600457367e-06, "loss": 0.1601, "step": 4475 }, { "epoch": 1.45, "grad_norm": 6.964175224304199, "learning_rate": 2.8128062724599807e-06, "loss": 0.16, "step": 4500 }, { "epoch": 1.45, "eval_cer": 31.170822776370184, "eval_loss": 0.3913029432296753, "eval_runtime": 1863.6532, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.298, "step": 4500 }, { "epoch": 1.45, "grad_norm": 7.741447448730469, "learning_rate": 2.771969944462594e-06, "loss": 0.1552, "step": 4525 }, { "epoch": 1.46, "grad_norm": 6.088663101196289, "learning_rate": 2.7311336164652074e-06, "loss": 0.1759, "step": 4550 }, { "epoch": 1.47, "grad_norm": 9.178170204162598, "learning_rate": 2.690297288467821e-06, "loss": 0.1635, "step": 4575 }, { "epoch": 1.48, "grad_norm": 9.006258010864258, "learning_rate": 2.6494609604704345e-06, "loss": 0.1986, "step": 4600 }, { "epoch": 1.49, "grad_norm": 7.974513530731201, "learning_rate": 2.608624632473048e-06, "loss": 0.1781, "step": 4625 }, { "epoch": 1.49, "grad_norm": 8.142216682434082, "learning_rate": 2.5677883044756617e-06, "loss": 0.1509, "step": 4650 }, { "epoch": 1.5, "grad_norm": 5.452117919921875, "learning_rate": 2.526951976478275e-06, "loss": 0.1682, "step": 4675 }, { "epoch": 1.51, "grad_norm": 6.821118354797363, "learning_rate": 2.4861156484808888e-06, "loss": 0.1559, "step": 4700 }, { "epoch": 1.52, "grad_norm": 9.8412446975708, "learning_rate": 2.4452793204835026e-06, "loss": 0.1519, "step": 4725 }, { "epoch": 1.53, "grad_norm": 7.750609874725342, "learning_rate": 2.404442992486116e-06, "loss": 0.1394, "step": 4750 }, { "epoch": 1.53, "grad_norm": 8.334457397460938, "learning_rate": 2.3636066644887293e-06, "loss": 0.1605, "step": 4775 }, { "epoch": 1.54, "grad_norm": 5.586342811584473, "learning_rate": 2.322770336491343e-06, "loss": 0.1622, "step": 4800 }, { "epoch": 1.55, "grad_norm": 8.146045684814453, "learning_rate": 2.2819340084939564e-06, "loss": 0.175, "step": 4825 }, { "epoch": 1.56, "grad_norm": 5.796145915985107, "learning_rate": 2.24109768049657e-06, "loss": 0.1702, "step": 4850 }, { "epoch": 1.57, "grad_norm": 8.852866172790527, "learning_rate": 2.2002613524991835e-06, "loss": 0.1522, "step": 4875 }, { "epoch": 1.58, "grad_norm": 7.19501256942749, "learning_rate": 2.159425024501797e-06, "loss": 0.1465, "step": 4900 }, { "epoch": 1.58, "grad_norm": 7.024486064910889, "learning_rate": 2.1185886965044107e-06, "loss": 0.1539, "step": 4925 }, { "epoch": 1.59, "grad_norm": 7.009402751922607, "learning_rate": 2.077752368507024e-06, "loss": 0.1442, "step": 4950 }, { "epoch": 1.6, "grad_norm": 4.041311264038086, "learning_rate": 2.0369160405096374e-06, "loss": 0.151, "step": 4975 }, { "epoch": 1.61, "grad_norm": 8.767594337463379, "learning_rate": 1.996079712512251e-06, "loss": 0.1603, "step": 5000 }, { "epoch": 1.61, "eval_cer": 30.38405120682758, "eval_loss": 0.383564829826355, "eval_runtime": 1857.7457, "eval_samples_per_second": 2.388, "eval_steps_per_second": 0.299, "step": 5000 }, { "epoch": 1.62, "grad_norm": 5.629392147064209, "learning_rate": 1.9552433845148645e-06, "loss": 0.1515, "step": 5025 }, { "epoch": 1.62, "grad_norm": 5.018975734710693, "learning_rate": 1.914407056517478e-06, "loss": 0.157, "step": 5050 }, { "epoch": 1.63, "grad_norm": 6.182131767272949, "learning_rate": 1.8735707285200916e-06, "loss": 0.13, "step": 5075 }, { "epoch": 1.64, "grad_norm": 9.000260353088379, "learning_rate": 1.8327344005227052e-06, "loss": 0.1726, "step": 5100 }, { "epoch": 1.65, "grad_norm": 6.853832244873047, "learning_rate": 1.7918980725253188e-06, "loss": 0.1451, "step": 5125 }, { "epoch": 1.66, "grad_norm": 5.68117618560791, "learning_rate": 1.7510617445279321e-06, "loss": 0.1518, "step": 5150 }, { "epoch": 1.66, "grad_norm": 4.632532119750977, "learning_rate": 1.7102254165305457e-06, "loss": 0.144, "step": 5175 }, { "epoch": 1.67, "grad_norm": 8.772269248962402, "learning_rate": 1.6693890885331592e-06, "loss": 0.1525, "step": 5200 }, { "epoch": 1.68, "grad_norm": 8.809287071228027, "learning_rate": 1.6285527605357728e-06, "loss": 0.132, "step": 5225 }, { "epoch": 1.69, "grad_norm": 7.337480545043945, "learning_rate": 1.5877164325383862e-06, "loss": 0.1549, "step": 5250 }, { "epoch": 1.7, "grad_norm": 5.269392013549805, "learning_rate": 1.5468801045409997e-06, "loss": 0.1524, "step": 5275 }, { "epoch": 1.7, "grad_norm": 7.877448558807373, "learning_rate": 1.5060437765436133e-06, "loss": 0.1421, "step": 5300 }, { "epoch": 1.71, "grad_norm": 6.454422950744629, "learning_rate": 1.4652074485462266e-06, "loss": 0.1377, "step": 5325 }, { "epoch": 1.72, "grad_norm": 7.873298645019531, "learning_rate": 1.4243711205488402e-06, "loss": 0.1446, "step": 5350 }, { "epoch": 1.73, "grad_norm": 6.6517486572265625, "learning_rate": 1.383534792551454e-06, "loss": 0.1482, "step": 5375 }, { "epoch": 1.74, "grad_norm": 9.937956809997559, "learning_rate": 1.3426984645540676e-06, "loss": 0.1306, "step": 5400 }, { "epoch": 1.74, "grad_norm": 4.228558540344238, "learning_rate": 1.301862136556681e-06, "loss": 0.1229, "step": 5425 }, { "epoch": 1.75, "grad_norm": 4.710421085357666, "learning_rate": 1.2610258085592945e-06, "loss": 0.1374, "step": 5450 }, { "epoch": 1.76, "grad_norm": 4.934779644012451, "learning_rate": 1.220189480561908e-06, "loss": 0.1321, "step": 5475 }, { "epoch": 1.77, "grad_norm": 9.244394302368164, "learning_rate": 1.1793531525645214e-06, "loss": 0.1343, "step": 5500 }, { "epoch": 1.77, "eval_cer": 30.15735431390852, "eval_loss": 0.3783666491508484, "eval_runtime": 1871.1459, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.297, "step": 5500 }, { "epoch": 1.78, "grad_norm": 7.236656188964844, "learning_rate": 1.138516824567135e-06, "loss": 0.1295, "step": 5525 }, { "epoch": 1.78, "grad_norm": 6.239099502563477, "learning_rate": 1.0976804965697485e-06, "loss": 0.1378, "step": 5550 }, { "epoch": 1.79, "grad_norm": 4.9148945808410645, "learning_rate": 1.056844168572362e-06, "loss": 0.1272, "step": 5575 }, { "epoch": 1.8, "grad_norm": 7.572327136993408, "learning_rate": 1.0160078405749757e-06, "loss": 0.1405, "step": 5600 }, { "epoch": 1.81, "grad_norm": 6.76165771484375, "learning_rate": 9.751715125775892e-07, "loss": 0.1351, "step": 5625 }, { "epoch": 1.82, "grad_norm": 10.984220504760742, "learning_rate": 9.343351845802026e-07, "loss": 0.1467, "step": 5650 }, { "epoch": 1.82, "grad_norm": 4.543166637420654, "learning_rate": 8.934988565828162e-07, "loss": 0.1175, "step": 5675 }, { "epoch": 1.83, "grad_norm": 8.191649436950684, "learning_rate": 8.526625285854297e-07, "loss": 0.1388, "step": 5700 }, { "epoch": 1.84, "grad_norm": 7.215826988220215, "learning_rate": 8.118262005880432e-07, "loss": 0.1354, "step": 5725 }, { "epoch": 1.85, "grad_norm": 5.940629959106445, "learning_rate": 7.709898725906567e-07, "loss": 0.1283, "step": 5750 }, { "epoch": 1.86, "grad_norm": 6.796767234802246, "learning_rate": 7.301535445932702e-07, "loss": 0.1274, "step": 5775 }, { "epoch": 1.86, "grad_norm": 8.403697967529297, "learning_rate": 6.893172165958838e-07, "loss": 0.1333, "step": 5800 }, { "epoch": 1.87, "grad_norm": 6.127229690551758, "learning_rate": 6.484808885984972e-07, "loss": 0.1406, "step": 5825 }, { "epoch": 1.88, "grad_norm": 7.16465950012207, "learning_rate": 6.076445606011108e-07, "loss": 0.132, "step": 5850 }, { "epoch": 1.89, "grad_norm": 5.777968406677246, "learning_rate": 5.668082326037243e-07, "loss": 0.1437, "step": 5875 }, { "epoch": 1.9, "grad_norm": 6.021764755249023, "learning_rate": 5.259719046063379e-07, "loss": 0.1203, "step": 5900 }, { "epoch": 1.9, "grad_norm": 5.480493068695068, "learning_rate": 4.851355766089514e-07, "loss": 0.1398, "step": 5925 }, { "epoch": 1.91, "grad_norm": 7.609493732452393, "learning_rate": 4.442992486115649e-07, "loss": 0.1274, "step": 5950 }, { "epoch": 1.92, "grad_norm": 5.910650730133057, "learning_rate": 4.034629206141784e-07, "loss": 0.1352, "step": 5975 }, { "epoch": 1.93, "grad_norm": 4.371640682220459, "learning_rate": 3.626265926167919e-07, "loss": 0.1265, "step": 6000 }, { "epoch": 1.93, "eval_cer": 29.697292972396323, "eval_loss": 0.37359777092933655, "eval_runtime": 1867.3275, "eval_samples_per_second": 2.376, "eval_steps_per_second": 0.297, "step": 6000 } ], "logging_steps": 25, "max_steps": 6222, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 2.770419843072e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }