linshoufan's picture
Training in progress, step 6000, checkpoint
994d58f verified
{
"best_metric": 29.697292972396323,
"best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-6000",
"epoch": 1.9286403085824495,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 48.035465240478516,
"learning_rate": 2.5e-06,
"loss": 6.8285,
"step": 25
},
{
"epoch": 0.02,
"grad_norm": 142.98660278320312,
"learning_rate": 5e-06,
"loss": 3.2493,
"step": 50
},
{
"epoch": 0.02,
"grad_norm": 27.529985427856445,
"learning_rate": 7.500000000000001e-06,
"loss": 2.2763,
"step": 75
},
{
"epoch": 0.03,
"grad_norm": 21.324289321899414,
"learning_rate": 1e-05,
"loss": 1.7193,
"step": 100
},
{
"epoch": 0.04,
"grad_norm": 13.650047302246094,
"learning_rate": 9.91697110594487e-06,
"loss": 1.3177,
"step": 125
},
{
"epoch": 0.05,
"grad_norm": 14.939111709594727,
"learning_rate": 9.83394221188974e-06,
"loss": 1.2486,
"step": 150
},
{
"epoch": 0.06,
"grad_norm": 14.85805606842041,
"learning_rate": 9.750913317834608e-06,
"loss": 1.1991,
"step": 175
},
{
"epoch": 0.06,
"grad_norm": 15.52128791809082,
"learning_rate": 9.667884423779476e-06,
"loss": 1.1126,
"step": 200
},
{
"epoch": 0.07,
"grad_norm": 22.642026901245117,
"learning_rate": 9.584855529724345e-06,
"loss": 1.0437,
"step": 225
},
{
"epoch": 0.08,
"grad_norm": 16.712217330932617,
"learning_rate": 9.501826635669213e-06,
"loss": 0.9986,
"step": 250
},
{
"epoch": 0.09,
"grad_norm": 13.973222732543945,
"learning_rate": 9.418797741614083e-06,
"loss": 0.9188,
"step": 275
},
{
"epoch": 0.1,
"grad_norm": 13.404074668884277,
"learning_rate": 9.335768847558952e-06,
"loss": 0.9244,
"step": 300
},
{
"epoch": 0.1,
"grad_norm": 12.21960163116455,
"learning_rate": 9.25273995350382e-06,
"loss": 0.9018,
"step": 325
},
{
"epoch": 0.11,
"grad_norm": 14.460400581359863,
"learning_rate": 9.169711059448689e-06,
"loss": 0.8398,
"step": 350
},
{
"epoch": 0.12,
"grad_norm": 14.076154708862305,
"learning_rate": 9.086682165393557e-06,
"loss": 0.8828,
"step": 375
},
{
"epoch": 0.13,
"grad_norm": 13.204269409179688,
"learning_rate": 9.003653271338426e-06,
"loss": 0.8503,
"step": 400
},
{
"epoch": 0.14,
"grad_norm": 10.978958129882812,
"learning_rate": 8.920624377283296e-06,
"loss": 0.8198,
"step": 425
},
{
"epoch": 0.14,
"grad_norm": 13.963995933532715,
"learning_rate": 8.837595483228164e-06,
"loss": 0.8202,
"step": 450
},
{
"epoch": 0.15,
"grad_norm": 13.337563514709473,
"learning_rate": 8.754566589173033e-06,
"loss": 0.7536,
"step": 475
},
{
"epoch": 0.16,
"grad_norm": 12.711252212524414,
"learning_rate": 8.671537695117903e-06,
"loss": 0.7938,
"step": 500
},
{
"epoch": 0.16,
"eval_cer": 55.83411121482864,
"eval_loss": 0.7767874002456665,
"eval_runtime": 1802.202,
"eval_samples_per_second": 2.461,
"eval_steps_per_second": 0.308,
"step": 500
},
{
"epoch": 0.17,
"grad_norm": 13.947765350341797,
"learning_rate": 8.588508801062771e-06,
"loss": 0.7784,
"step": 525
},
{
"epoch": 0.18,
"grad_norm": 11.999704360961914,
"learning_rate": 8.50547990700764e-06,
"loss": 0.7646,
"step": 550
},
{
"epoch": 0.18,
"grad_norm": 12.103652954101562,
"learning_rate": 8.422451012952508e-06,
"loss": 0.7001,
"step": 575
},
{
"epoch": 0.19,
"grad_norm": 13.490057945251465,
"learning_rate": 8.339422118897376e-06,
"loss": 0.7941,
"step": 600
},
{
"epoch": 0.2,
"grad_norm": 13.992444038391113,
"learning_rate": 8.256393224842247e-06,
"loss": 0.6561,
"step": 625
},
{
"epoch": 0.21,
"grad_norm": 14.403618812561035,
"learning_rate": 8.173364330787115e-06,
"loss": 0.6618,
"step": 650
},
{
"epoch": 0.22,
"grad_norm": 12.38306713104248,
"learning_rate": 8.090335436731984e-06,
"loss": 0.7515,
"step": 675
},
{
"epoch": 0.23,
"grad_norm": 13.88232135772705,
"learning_rate": 8.007306542676852e-06,
"loss": 0.6823,
"step": 700
},
{
"epoch": 0.23,
"grad_norm": 14.506720542907715,
"learning_rate": 7.92427764862172e-06,
"loss": 0.662,
"step": 725
},
{
"epoch": 0.24,
"grad_norm": 11.101289749145508,
"learning_rate": 7.841248754566589e-06,
"loss": 0.6974,
"step": 750
},
{
"epoch": 0.25,
"grad_norm": 10.762197494506836,
"learning_rate": 7.758219860511459e-06,
"loss": 0.6643,
"step": 775
},
{
"epoch": 0.26,
"grad_norm": 14.123621940612793,
"learning_rate": 7.675190966456327e-06,
"loss": 0.6878,
"step": 800
},
{
"epoch": 0.27,
"grad_norm": 13.668756484985352,
"learning_rate": 7.592162072401196e-06,
"loss": 0.6102,
"step": 825
},
{
"epoch": 0.27,
"grad_norm": 13.39156723022461,
"learning_rate": 7.509133178346065e-06,
"loss": 0.6133,
"step": 850
},
{
"epoch": 0.28,
"grad_norm": 11.687459945678711,
"learning_rate": 7.426104284290934e-06,
"loss": 0.6139,
"step": 875
},
{
"epoch": 0.29,
"grad_norm": 14.004112243652344,
"learning_rate": 7.343075390235803e-06,
"loss": 0.6257,
"step": 900
},
{
"epoch": 0.3,
"grad_norm": 13.317120552062988,
"learning_rate": 7.260046496180671e-06,
"loss": 0.6616,
"step": 925
},
{
"epoch": 0.31,
"grad_norm": 13.344803810119629,
"learning_rate": 7.17701760212554e-06,
"loss": 0.6086,
"step": 950
},
{
"epoch": 0.31,
"grad_norm": 12.64527416229248,
"learning_rate": 7.09398870807041e-06,
"loss": 0.5734,
"step": 975
},
{
"epoch": 0.32,
"grad_norm": 9.928169250488281,
"learning_rate": 7.0109598140152775e-06,
"loss": 0.5845,
"step": 1000
},
{
"epoch": 0.32,
"eval_cer": 41.15215362048273,
"eval_loss": 0.5947259068489075,
"eval_runtime": 1807.1895,
"eval_samples_per_second": 2.455,
"eval_steps_per_second": 0.307,
"step": 1000
},
{
"epoch": 0.33,
"grad_norm": 13.225513458251953,
"learning_rate": 6.927930919960146e-06,
"loss": 0.6458,
"step": 1025
},
{
"epoch": 0.34,
"grad_norm": 10.804333686828613,
"learning_rate": 6.844902025905016e-06,
"loss": 0.5106,
"step": 1050
},
{
"epoch": 0.35,
"grad_norm": 10.24815559387207,
"learning_rate": 6.7618731318498845e-06,
"loss": 0.523,
"step": 1075
},
{
"epoch": 0.35,
"grad_norm": 11.681272506713867,
"learning_rate": 6.678844237794753e-06,
"loss": 0.5585,
"step": 1100
},
{
"epoch": 0.36,
"grad_norm": 10.01819133758545,
"learning_rate": 6.595815343739622e-06,
"loss": 0.5943,
"step": 1125
},
{
"epoch": 0.37,
"grad_norm": 11.715396881103516,
"learning_rate": 6.512786449684491e-06,
"loss": 0.5472,
"step": 1150
},
{
"epoch": 0.38,
"grad_norm": 10.607870101928711,
"learning_rate": 6.429757555629359e-06,
"loss": 0.5579,
"step": 1175
},
{
"epoch": 0.39,
"grad_norm": 12.249415397644043,
"learning_rate": 6.3467286615742285e-06,
"loss": 0.5269,
"step": 1200
},
{
"epoch": 0.39,
"grad_norm": 12.76510238647461,
"learning_rate": 6.263699767519097e-06,
"loss": 0.5273,
"step": 1225
},
{
"epoch": 0.4,
"grad_norm": 8.935369491577148,
"learning_rate": 6.180670873463966e-06,
"loss": 0.525,
"step": 1250
},
{
"epoch": 0.41,
"grad_norm": 11.15725040435791,
"learning_rate": 6.097641979408835e-06,
"loss": 0.4792,
"step": 1275
},
{
"epoch": 0.42,
"grad_norm": 11.681845664978027,
"learning_rate": 6.014613085353703e-06,
"loss": 0.5462,
"step": 1300
},
{
"epoch": 0.43,
"grad_norm": 13.019536972045898,
"learning_rate": 5.931584191298572e-06,
"loss": 0.4996,
"step": 1325
},
{
"epoch": 0.43,
"grad_norm": 11.789406776428223,
"learning_rate": 5.848555297243441e-06,
"loss": 0.5088,
"step": 1350
},
{
"epoch": 0.44,
"grad_norm": 13.898345947265625,
"learning_rate": 5.765526403188309e-06,
"loss": 0.5069,
"step": 1375
},
{
"epoch": 0.45,
"grad_norm": 11.257216453552246,
"learning_rate": 5.682497509133179e-06,
"loss": 0.4942,
"step": 1400
},
{
"epoch": 0.46,
"grad_norm": 11.38137149810791,
"learning_rate": 5.599468615078048e-06,
"loss": 0.4532,
"step": 1425
},
{
"epoch": 0.47,
"grad_norm": 10.852495193481445,
"learning_rate": 5.516439721022916e-06,
"loss": 0.5231,
"step": 1450
},
{
"epoch": 0.47,
"grad_norm": 14.178400039672852,
"learning_rate": 5.4334108269677856e-06,
"loss": 0.5041,
"step": 1475
},
{
"epoch": 0.48,
"grad_norm": 11.18582534790039,
"learning_rate": 5.350381932912654e-06,
"loss": 0.459,
"step": 1500
},
{
"epoch": 0.48,
"eval_cer": 37.618349113215096,
"eval_loss": 0.5131608247756958,
"eval_runtime": 1799.4249,
"eval_samples_per_second": 2.465,
"eval_steps_per_second": 0.308,
"step": 1500
},
{
"epoch": 0.49,
"grad_norm": 9.844304084777832,
"learning_rate": 5.267353038857523e-06,
"loss": 0.5035,
"step": 1525
},
{
"epoch": 0.5,
"grad_norm": 11.165616035461426,
"learning_rate": 5.184324144802392e-06,
"loss": 0.449,
"step": 1550
},
{
"epoch": 0.51,
"grad_norm": 10.43535327911377,
"learning_rate": 5.10129525074726e-06,
"loss": 0.4471,
"step": 1575
},
{
"epoch": 0.51,
"grad_norm": 9.737510681152344,
"learning_rate": 5.0182663566921295e-06,
"loss": 0.4779,
"step": 1600
},
{
"epoch": 0.52,
"grad_norm": 10.221022605895996,
"learning_rate": 4.935237462636998e-06,
"loss": 0.4266,
"step": 1625
},
{
"epoch": 0.53,
"grad_norm": 7.031712532043457,
"learning_rate": 4.852208568581867e-06,
"loss": 0.4355,
"step": 1650
},
{
"epoch": 0.54,
"grad_norm": 10.13843822479248,
"learning_rate": 4.769179674526736e-06,
"loss": 0.4506,
"step": 1675
},
{
"epoch": 0.55,
"grad_norm": 14.100777626037598,
"learning_rate": 4.686150780471604e-06,
"loss": 0.4484,
"step": 1700
},
{
"epoch": 0.55,
"grad_norm": 11.218331336975098,
"learning_rate": 4.603121886416473e-06,
"loss": 0.4637,
"step": 1725
},
{
"epoch": 0.56,
"grad_norm": 9.891203880310059,
"learning_rate": 4.520092992361343e-06,
"loss": 0.4142,
"step": 1750
},
{
"epoch": 0.57,
"grad_norm": 9.585916519165039,
"learning_rate": 4.437064098306211e-06,
"loss": 0.4202,
"step": 1775
},
{
"epoch": 0.58,
"grad_norm": 10.81905460357666,
"learning_rate": 4.35403520425108e-06,
"loss": 0.4459,
"step": 1800
},
{
"epoch": 0.59,
"grad_norm": 13.257423400878906,
"learning_rate": 4.271006310195949e-06,
"loss": 0.448,
"step": 1825
},
{
"epoch": 0.59,
"grad_norm": 9.057276725769043,
"learning_rate": 4.187977416140817e-06,
"loss": 0.4043,
"step": 1850
},
{
"epoch": 0.6,
"grad_norm": 11.002601623535156,
"learning_rate": 4.104948522085686e-06,
"loss": 0.4011,
"step": 1875
},
{
"epoch": 0.61,
"grad_norm": 15.421494483947754,
"learning_rate": 4.021919628030555e-06,
"loss": 0.4208,
"step": 1900
},
{
"epoch": 0.62,
"grad_norm": 12.186066627502441,
"learning_rate": 3.938890733975424e-06,
"loss": 0.389,
"step": 1925
},
{
"epoch": 0.63,
"grad_norm": 8.680899620056152,
"learning_rate": 3.855861839920293e-06,
"loss": 0.4189,
"step": 1950
},
{
"epoch": 0.63,
"grad_norm": 10.597740173339844,
"learning_rate": 3.7728329458651612e-06,
"loss": 0.3654,
"step": 1975
},
{
"epoch": 0.64,
"grad_norm": 10.675308227539062,
"learning_rate": 3.6898040518100305e-06,
"loss": 0.3512,
"step": 2000
},
{
"epoch": 0.64,
"eval_cer": 35.404720629417255,
"eval_loss": 0.4709227383136749,
"eval_runtime": 1802.3985,
"eval_samples_per_second": 2.461,
"eval_steps_per_second": 0.308,
"step": 2000
},
{
"epoch": 0.65,
"grad_norm": 9.923101425170898,
"learning_rate": 3.6067751577548985e-06,
"loss": 0.4361,
"step": 2025
},
{
"epoch": 0.66,
"grad_norm": 9.01765251159668,
"learning_rate": 3.523746263699768e-06,
"loss": 0.3896,
"step": 2050
},
{
"epoch": 0.67,
"grad_norm": 11.23643684387207,
"learning_rate": 3.4407173696446367e-06,
"loss": 0.3453,
"step": 2075
},
{
"epoch": 0.68,
"grad_norm": 9.193674087524414,
"learning_rate": 3.3576884755895056e-06,
"loss": 0.3888,
"step": 2100
},
{
"epoch": 0.68,
"grad_norm": 8.438018798828125,
"learning_rate": 3.274659581534374e-06,
"loss": 0.3798,
"step": 2125
},
{
"epoch": 0.69,
"grad_norm": 9.949082374572754,
"learning_rate": 3.191630687479243e-06,
"loss": 0.3828,
"step": 2150
},
{
"epoch": 0.7,
"grad_norm": 12.07507610321045,
"learning_rate": 3.1086017934241117e-06,
"loss": 0.4027,
"step": 2175
},
{
"epoch": 0.71,
"grad_norm": 12.350488662719727,
"learning_rate": 3.025572899368981e-06,
"loss": 0.3791,
"step": 2200
},
{
"epoch": 0.72,
"grad_norm": 12.681595802307129,
"learning_rate": 2.9425440053138495e-06,
"loss": 0.3863,
"step": 2225
},
{
"epoch": 0.72,
"grad_norm": 13.789870262145996,
"learning_rate": 2.8595151112587184e-06,
"loss": 0.3688,
"step": 2250
},
{
"epoch": 0.73,
"grad_norm": 11.885881423950195,
"learning_rate": 2.7764862172035872e-06,
"loss": 0.3661,
"step": 2275
},
{
"epoch": 0.74,
"grad_norm": 10.707484245300293,
"learning_rate": 2.6934573231484557e-06,
"loss": 0.3731,
"step": 2300
},
{
"epoch": 0.75,
"grad_norm": 12.371014595031738,
"learning_rate": 2.6104284290933245e-06,
"loss": 0.3651,
"step": 2325
},
{
"epoch": 0.76,
"grad_norm": 12.415855407714844,
"learning_rate": 2.5273995350381934e-06,
"loss": 0.3529,
"step": 2350
},
{
"epoch": 0.76,
"grad_norm": 12.046368598937988,
"learning_rate": 2.4443706409830623e-06,
"loss": 0.3565,
"step": 2375
},
{
"epoch": 0.77,
"grad_norm": 10.2451810836792,
"learning_rate": 2.361341746927931e-06,
"loss": 0.3337,
"step": 2400
},
{
"epoch": 0.78,
"grad_norm": 7.761926174163818,
"learning_rate": 2.2783128528728e-06,
"loss": 0.3636,
"step": 2425
},
{
"epoch": 0.79,
"grad_norm": 9.736420631408691,
"learning_rate": 2.1952839588176684e-06,
"loss": 0.346,
"step": 2450
},
{
"epoch": 0.8,
"grad_norm": 9.760013580322266,
"learning_rate": 2.1122550647625377e-06,
"loss": 0.3535,
"step": 2475
},
{
"epoch": 0.8,
"grad_norm": 9.893476486206055,
"learning_rate": 2.029226170707406e-06,
"loss": 0.3758,
"step": 2500
},
{
"epoch": 0.8,
"eval_cer": 33.57781037471663,
"eval_loss": 0.43632233142852783,
"eval_runtime": 1812.7817,
"eval_samples_per_second": 2.447,
"eval_steps_per_second": 0.306,
"step": 2500
},
{
"epoch": 0.81,
"grad_norm": 11.384421348571777,
"learning_rate": 1.946197276652275e-06,
"loss": 0.3466,
"step": 2525
},
{
"epoch": 0.82,
"grad_norm": 9.347311973571777,
"learning_rate": 1.863168382597144e-06,
"loss": 0.3558,
"step": 2550
},
{
"epoch": 0.83,
"grad_norm": 9.740177154541016,
"learning_rate": 1.7801394885420128e-06,
"loss": 0.4067,
"step": 2575
},
{
"epoch": 0.84,
"grad_norm": 10.038185119628906,
"learning_rate": 1.6971105944868814e-06,
"loss": 0.3431,
"step": 2600
},
{
"epoch": 0.84,
"grad_norm": 8.289875984191895,
"learning_rate": 1.6140817004317505e-06,
"loss": 0.3821,
"step": 2625
},
{
"epoch": 0.85,
"grad_norm": 11.42772388458252,
"learning_rate": 1.5310528063766192e-06,
"loss": 0.3611,
"step": 2650
},
{
"epoch": 0.86,
"grad_norm": 8.776933670043945,
"learning_rate": 1.4480239123214878e-06,
"loss": 0.3241,
"step": 2675
},
{
"epoch": 0.87,
"grad_norm": 14.44870376586914,
"learning_rate": 1.364995018266357e-06,
"loss": 0.3659,
"step": 2700
},
{
"epoch": 0.88,
"grad_norm": 7.697235584259033,
"learning_rate": 1.2819661242112256e-06,
"loss": 0.317,
"step": 2725
},
{
"epoch": 0.88,
"grad_norm": 9.33436107635498,
"learning_rate": 1.1989372301560944e-06,
"loss": 0.3243,
"step": 2750
},
{
"epoch": 0.89,
"grad_norm": 7.878904819488525,
"learning_rate": 1.115908336100963e-06,
"loss": 0.2832,
"step": 2775
},
{
"epoch": 0.9,
"grad_norm": 8.996261596679688,
"learning_rate": 1.032879442045832e-06,
"loss": 0.3585,
"step": 2800
},
{
"epoch": 0.91,
"grad_norm": 10.357467651367188,
"learning_rate": 9.498505479907008e-07,
"loss": 0.3256,
"step": 2825
},
{
"epoch": 0.92,
"grad_norm": 10.002203941345215,
"learning_rate": 8.668216539355696e-07,
"loss": 0.3459,
"step": 2850
},
{
"epoch": 0.92,
"grad_norm": 10.587177276611328,
"learning_rate": 7.837927598804385e-07,
"loss": 0.3161,
"step": 2875
},
{
"epoch": 0.93,
"grad_norm": 7.054004192352295,
"learning_rate": 7.007638658253073e-07,
"loss": 0.3362,
"step": 2900
},
{
"epoch": 0.94,
"grad_norm": 10.065168380737305,
"learning_rate": 6.177349717701761e-07,
"loss": 0.3107,
"step": 2925
},
{
"epoch": 0.95,
"grad_norm": 9.83284854888916,
"learning_rate": 5.347060777150448e-07,
"loss": 0.3301,
"step": 2950
},
{
"epoch": 0.96,
"grad_norm": 11.811662673950195,
"learning_rate": 4.5167718365991366e-07,
"loss": 0.3792,
"step": 2975
},
{
"epoch": 0.96,
"grad_norm": 11.07596206665039,
"learning_rate": 3.686482896047825e-07,
"loss": 0.3191,
"step": 3000
},
{
"epoch": 0.96,
"eval_cer": 32.611014801973596,
"eval_loss": 0.4216199815273285,
"eval_runtime": 1822.6334,
"eval_samples_per_second": 2.434,
"eval_steps_per_second": 0.305,
"step": 3000
},
{
"epoch": 0.97,
"grad_norm": 8.368192672729492,
"learning_rate": 5.222149624305782e-06,
"loss": 0.3044,
"step": 3025
},
{
"epoch": 0.98,
"grad_norm": 13.727489471435547,
"learning_rate": 5.181313296308397e-06,
"loss": 0.3297,
"step": 3050
},
{
"epoch": 0.99,
"grad_norm": 9.884183883666992,
"learning_rate": 5.14047696831101e-06,
"loss": 0.3753,
"step": 3075
},
{
"epoch": 1.0,
"grad_norm": 10.073676109313965,
"learning_rate": 5.0996406403136236e-06,
"loss": 0.3247,
"step": 3100
},
{
"epoch": 1.0,
"grad_norm": 9.34837532043457,
"learning_rate": 5.0588043123162365e-06,
"loss": 0.303,
"step": 3125
},
{
"epoch": 1.01,
"grad_norm": 8.949431419372559,
"learning_rate": 5.01796798431885e-06,
"loss": 0.2329,
"step": 3150
},
{
"epoch": 1.02,
"grad_norm": 7.911171913146973,
"learning_rate": 4.977131656321464e-06,
"loss": 0.2096,
"step": 3175
},
{
"epoch": 1.03,
"grad_norm": 6.246947288513184,
"learning_rate": 4.936295328324078e-06,
"loss": 0.2208,
"step": 3200
},
{
"epoch": 1.04,
"grad_norm": 7.6554059982299805,
"learning_rate": 4.895459000326691e-06,
"loss": 0.2508,
"step": 3225
},
{
"epoch": 1.04,
"grad_norm": 9.491788864135742,
"learning_rate": 4.8546226723293045e-06,
"loss": 0.2274,
"step": 3250
},
{
"epoch": 1.05,
"grad_norm": 9.15794849395752,
"learning_rate": 4.813786344331918e-06,
"loss": 0.2257,
"step": 3275
},
{
"epoch": 1.06,
"grad_norm": 9.842211723327637,
"learning_rate": 4.772950016334531e-06,
"loss": 0.227,
"step": 3300
},
{
"epoch": 1.07,
"grad_norm": 7.734405040740967,
"learning_rate": 4.732113688337145e-06,
"loss": 0.2207,
"step": 3325
},
{
"epoch": 1.08,
"grad_norm": 8.951905250549316,
"learning_rate": 4.691277360339759e-06,
"loss": 0.2307,
"step": 3350
},
{
"epoch": 1.08,
"grad_norm": 7.3573222160339355,
"learning_rate": 4.650441032342372e-06,
"loss": 0.2233,
"step": 3375
},
{
"epoch": 1.09,
"grad_norm": 8.47739315032959,
"learning_rate": 4.6096047043449855e-06,
"loss": 0.2361,
"step": 3400
},
{
"epoch": 1.1,
"grad_norm": 6.945776462554932,
"learning_rate": 4.568768376347599e-06,
"loss": 0.2353,
"step": 3425
},
{
"epoch": 1.11,
"grad_norm": 8.67324161529541,
"learning_rate": 4.527932048350212e-06,
"loss": 0.2302,
"step": 3450
},
{
"epoch": 1.12,
"grad_norm": 6.912210464477539,
"learning_rate": 4.487095720352827e-06,
"loss": 0.2292,
"step": 3475
},
{
"epoch": 1.13,
"grad_norm": 8.434404373168945,
"learning_rate": 4.44625939235544e-06,
"loss": 0.2295,
"step": 3500
},
{
"epoch": 1.13,
"eval_cer": 32.49766635551407,
"eval_loss": 0.42611706256866455,
"eval_runtime": 1848.4809,
"eval_samples_per_second": 2.4,
"eval_steps_per_second": 0.3,
"step": 3500
},
{
"epoch": 1.13,
"grad_norm": 9.02902603149414,
"learning_rate": 4.405423064358053e-06,
"loss": 0.2382,
"step": 3525
},
{
"epoch": 1.14,
"grad_norm": 8.05671215057373,
"learning_rate": 4.364586736360667e-06,
"loss": 0.222,
"step": 3550
},
{
"epoch": 1.15,
"grad_norm": 10.546473503112793,
"learning_rate": 4.32375040836328e-06,
"loss": 0.2131,
"step": 3575
},
{
"epoch": 1.16,
"grad_norm": 8.884702682495117,
"learning_rate": 4.282914080365894e-06,
"loss": 0.1886,
"step": 3600
},
{
"epoch": 1.17,
"grad_norm": 7.569803237915039,
"learning_rate": 4.242077752368508e-06,
"loss": 0.222,
"step": 3625
},
{
"epoch": 1.17,
"grad_norm": 6.256328105926514,
"learning_rate": 4.201241424371121e-06,
"loss": 0.2083,
"step": 3650
},
{
"epoch": 1.18,
"grad_norm": 6.724915027618408,
"learning_rate": 4.1604050963737345e-06,
"loss": 0.2452,
"step": 3675
},
{
"epoch": 1.19,
"grad_norm": 11.219491004943848,
"learning_rate": 4.119568768376348e-06,
"loss": 0.2217,
"step": 3700
},
{
"epoch": 1.2,
"grad_norm": 6.6789469718933105,
"learning_rate": 4.078732440378961e-06,
"loss": 0.1958,
"step": 3725
},
{
"epoch": 1.21,
"grad_norm": 7.929986476898193,
"learning_rate": 4.037896112381575e-06,
"loss": 0.1863,
"step": 3750
},
{
"epoch": 1.21,
"grad_norm": 8.032015800476074,
"learning_rate": 3.997059784384189e-06,
"loss": 0.2153,
"step": 3775
},
{
"epoch": 1.22,
"grad_norm": 8.176934242248535,
"learning_rate": 3.956223456386802e-06,
"loss": 0.1971,
"step": 3800
},
{
"epoch": 1.23,
"grad_norm": 10.322613716125488,
"learning_rate": 3.9153871283894155e-06,
"loss": 0.1991,
"step": 3825
},
{
"epoch": 1.24,
"grad_norm": 7.837410926818848,
"learning_rate": 3.874550800392029e-06,
"loss": 0.2043,
"step": 3850
},
{
"epoch": 1.25,
"grad_norm": 12.684860229492188,
"learning_rate": 3.833714472394642e-06,
"loss": 0.2008,
"step": 3875
},
{
"epoch": 1.25,
"grad_norm": 7.476794242858887,
"learning_rate": 3.7928781443972564e-06,
"loss": 0.2199,
"step": 3900
},
{
"epoch": 1.26,
"grad_norm": 4.45359992980957,
"learning_rate": 3.7520418163998693e-06,
"loss": 0.196,
"step": 3925
},
{
"epoch": 1.27,
"grad_norm": 9.373842239379883,
"learning_rate": 3.7112054884024835e-06,
"loss": 0.1939,
"step": 3950
},
{
"epoch": 1.28,
"grad_norm": 6.383950233459473,
"learning_rate": 3.670369160405097e-06,
"loss": 0.2066,
"step": 3975
},
{
"epoch": 1.29,
"grad_norm": 5.862789154052734,
"learning_rate": 3.6295328324077102e-06,
"loss": 0.1806,
"step": 4000
},
{
"epoch": 1.29,
"eval_cer": 31.99093212428324,
"eval_loss": 0.4084797203540802,
"eval_runtime": 1898.761,
"eval_samples_per_second": 2.336,
"eval_steps_per_second": 0.292,
"step": 4000
},
{
"epoch": 1.29,
"grad_norm": 7.52218770980835,
"learning_rate": 3.588696504410324e-06,
"loss": 0.1955,
"step": 4025
},
{
"epoch": 1.3,
"grad_norm": 8.115983963012695,
"learning_rate": 3.5478601764129374e-06,
"loss": 0.1802,
"step": 4050
},
{
"epoch": 1.31,
"grad_norm": 10.009458541870117,
"learning_rate": 3.5070238484155507e-06,
"loss": 0.1866,
"step": 4075
},
{
"epoch": 1.32,
"grad_norm": 8.48315715789795,
"learning_rate": 3.4661875204181645e-06,
"loss": 0.2193,
"step": 4100
},
{
"epoch": 1.33,
"grad_norm": 7.425174713134766,
"learning_rate": 3.425351192420778e-06,
"loss": 0.2096,
"step": 4125
},
{
"epoch": 1.33,
"grad_norm": 5.02262544631958,
"learning_rate": 3.384514864423391e-06,
"loss": 0.1892,
"step": 4150
},
{
"epoch": 1.34,
"grad_norm": 7.6967010498046875,
"learning_rate": 3.343678536426005e-06,
"loss": 0.1869,
"step": 4175
},
{
"epoch": 1.35,
"grad_norm": 7.067899703979492,
"learning_rate": 3.3028422084286183e-06,
"loss": 0.1842,
"step": 4200
},
{
"epoch": 1.36,
"grad_norm": 9.152185440063477,
"learning_rate": 3.262005880431232e-06,
"loss": 0.1601,
"step": 4225
},
{
"epoch": 1.37,
"grad_norm": 5.13536262512207,
"learning_rate": 3.2211695524338455e-06,
"loss": 0.1766,
"step": 4250
},
{
"epoch": 1.37,
"grad_norm": 8.453483581542969,
"learning_rate": 3.180333224436459e-06,
"loss": 0.1775,
"step": 4275
},
{
"epoch": 1.38,
"grad_norm": 8.888550758361816,
"learning_rate": 3.1394968964390726e-06,
"loss": 0.1966,
"step": 4300
},
{
"epoch": 1.39,
"grad_norm": 6.241116046905518,
"learning_rate": 3.098660568441686e-06,
"loss": 0.1602,
"step": 4325
},
{
"epoch": 1.4,
"grad_norm": 10.21055793762207,
"learning_rate": 3.0578242404442993e-06,
"loss": 0.188,
"step": 4350
},
{
"epoch": 1.41,
"grad_norm": 6.382270812988281,
"learning_rate": 3.016987912446913e-06,
"loss": 0.1686,
"step": 4375
},
{
"epoch": 1.41,
"grad_norm": 8.593984603881836,
"learning_rate": 2.9761515844495264e-06,
"loss": 0.1959,
"step": 4400
},
{
"epoch": 1.42,
"grad_norm": 8.248409271240234,
"learning_rate": 2.93531525645214e-06,
"loss": 0.1848,
"step": 4425
},
{
"epoch": 1.43,
"grad_norm": 7.425219535827637,
"learning_rate": 2.8944789284547536e-06,
"loss": 0.1711,
"step": 4450
},
{
"epoch": 1.44,
"grad_norm": 6.594272613525391,
"learning_rate": 2.853642600457367e-06,
"loss": 0.1601,
"step": 4475
},
{
"epoch": 1.45,
"grad_norm": 6.964175224304199,
"learning_rate": 2.8128062724599807e-06,
"loss": 0.16,
"step": 4500
},
{
"epoch": 1.45,
"eval_cer": 31.170822776370184,
"eval_loss": 0.3913029432296753,
"eval_runtime": 1863.6532,
"eval_samples_per_second": 2.38,
"eval_steps_per_second": 0.298,
"step": 4500
},
{
"epoch": 1.45,
"grad_norm": 7.741447448730469,
"learning_rate": 2.771969944462594e-06,
"loss": 0.1552,
"step": 4525
},
{
"epoch": 1.46,
"grad_norm": 6.088663101196289,
"learning_rate": 2.7311336164652074e-06,
"loss": 0.1759,
"step": 4550
},
{
"epoch": 1.47,
"grad_norm": 9.178170204162598,
"learning_rate": 2.690297288467821e-06,
"loss": 0.1635,
"step": 4575
},
{
"epoch": 1.48,
"grad_norm": 9.006258010864258,
"learning_rate": 2.6494609604704345e-06,
"loss": 0.1986,
"step": 4600
},
{
"epoch": 1.49,
"grad_norm": 7.974513530731201,
"learning_rate": 2.608624632473048e-06,
"loss": 0.1781,
"step": 4625
},
{
"epoch": 1.49,
"grad_norm": 8.142216682434082,
"learning_rate": 2.5677883044756617e-06,
"loss": 0.1509,
"step": 4650
},
{
"epoch": 1.5,
"grad_norm": 5.452117919921875,
"learning_rate": 2.526951976478275e-06,
"loss": 0.1682,
"step": 4675
},
{
"epoch": 1.51,
"grad_norm": 6.821118354797363,
"learning_rate": 2.4861156484808888e-06,
"loss": 0.1559,
"step": 4700
},
{
"epoch": 1.52,
"grad_norm": 9.8412446975708,
"learning_rate": 2.4452793204835026e-06,
"loss": 0.1519,
"step": 4725
},
{
"epoch": 1.53,
"grad_norm": 7.750609874725342,
"learning_rate": 2.404442992486116e-06,
"loss": 0.1394,
"step": 4750
},
{
"epoch": 1.53,
"grad_norm": 8.334457397460938,
"learning_rate": 2.3636066644887293e-06,
"loss": 0.1605,
"step": 4775
},
{
"epoch": 1.54,
"grad_norm": 5.586342811584473,
"learning_rate": 2.322770336491343e-06,
"loss": 0.1622,
"step": 4800
},
{
"epoch": 1.55,
"grad_norm": 8.146045684814453,
"learning_rate": 2.2819340084939564e-06,
"loss": 0.175,
"step": 4825
},
{
"epoch": 1.56,
"grad_norm": 5.796145915985107,
"learning_rate": 2.24109768049657e-06,
"loss": 0.1702,
"step": 4850
},
{
"epoch": 1.57,
"grad_norm": 8.852866172790527,
"learning_rate": 2.2002613524991835e-06,
"loss": 0.1522,
"step": 4875
},
{
"epoch": 1.58,
"grad_norm": 7.19501256942749,
"learning_rate": 2.159425024501797e-06,
"loss": 0.1465,
"step": 4900
},
{
"epoch": 1.58,
"grad_norm": 7.024486064910889,
"learning_rate": 2.1185886965044107e-06,
"loss": 0.1539,
"step": 4925
},
{
"epoch": 1.59,
"grad_norm": 7.009402751922607,
"learning_rate": 2.077752368507024e-06,
"loss": 0.1442,
"step": 4950
},
{
"epoch": 1.6,
"grad_norm": 4.041311264038086,
"learning_rate": 2.0369160405096374e-06,
"loss": 0.151,
"step": 4975
},
{
"epoch": 1.61,
"grad_norm": 8.767594337463379,
"learning_rate": 1.996079712512251e-06,
"loss": 0.1603,
"step": 5000
},
{
"epoch": 1.61,
"eval_cer": 30.38405120682758,
"eval_loss": 0.383564829826355,
"eval_runtime": 1857.7457,
"eval_samples_per_second": 2.388,
"eval_steps_per_second": 0.299,
"step": 5000
},
{
"epoch": 1.62,
"grad_norm": 5.629392147064209,
"learning_rate": 1.9552433845148645e-06,
"loss": 0.1515,
"step": 5025
},
{
"epoch": 1.62,
"grad_norm": 5.018975734710693,
"learning_rate": 1.914407056517478e-06,
"loss": 0.157,
"step": 5050
},
{
"epoch": 1.63,
"grad_norm": 6.182131767272949,
"learning_rate": 1.8735707285200916e-06,
"loss": 0.13,
"step": 5075
},
{
"epoch": 1.64,
"grad_norm": 9.000260353088379,
"learning_rate": 1.8327344005227052e-06,
"loss": 0.1726,
"step": 5100
},
{
"epoch": 1.65,
"grad_norm": 6.853832244873047,
"learning_rate": 1.7918980725253188e-06,
"loss": 0.1451,
"step": 5125
},
{
"epoch": 1.66,
"grad_norm": 5.68117618560791,
"learning_rate": 1.7510617445279321e-06,
"loss": 0.1518,
"step": 5150
},
{
"epoch": 1.66,
"grad_norm": 4.632532119750977,
"learning_rate": 1.7102254165305457e-06,
"loss": 0.144,
"step": 5175
},
{
"epoch": 1.67,
"grad_norm": 8.772269248962402,
"learning_rate": 1.6693890885331592e-06,
"loss": 0.1525,
"step": 5200
},
{
"epoch": 1.68,
"grad_norm": 8.809287071228027,
"learning_rate": 1.6285527605357728e-06,
"loss": 0.132,
"step": 5225
},
{
"epoch": 1.69,
"grad_norm": 7.337480545043945,
"learning_rate": 1.5877164325383862e-06,
"loss": 0.1549,
"step": 5250
},
{
"epoch": 1.7,
"grad_norm": 5.269392013549805,
"learning_rate": 1.5468801045409997e-06,
"loss": 0.1524,
"step": 5275
},
{
"epoch": 1.7,
"grad_norm": 7.877448558807373,
"learning_rate": 1.5060437765436133e-06,
"loss": 0.1421,
"step": 5300
},
{
"epoch": 1.71,
"grad_norm": 6.454422950744629,
"learning_rate": 1.4652074485462266e-06,
"loss": 0.1377,
"step": 5325
},
{
"epoch": 1.72,
"grad_norm": 7.873298645019531,
"learning_rate": 1.4243711205488402e-06,
"loss": 0.1446,
"step": 5350
},
{
"epoch": 1.73,
"grad_norm": 6.6517486572265625,
"learning_rate": 1.383534792551454e-06,
"loss": 0.1482,
"step": 5375
},
{
"epoch": 1.74,
"grad_norm": 9.937956809997559,
"learning_rate": 1.3426984645540676e-06,
"loss": 0.1306,
"step": 5400
},
{
"epoch": 1.74,
"grad_norm": 4.228558540344238,
"learning_rate": 1.301862136556681e-06,
"loss": 0.1229,
"step": 5425
},
{
"epoch": 1.75,
"grad_norm": 4.710421085357666,
"learning_rate": 1.2610258085592945e-06,
"loss": 0.1374,
"step": 5450
},
{
"epoch": 1.76,
"grad_norm": 4.934779644012451,
"learning_rate": 1.220189480561908e-06,
"loss": 0.1321,
"step": 5475
},
{
"epoch": 1.77,
"grad_norm": 9.244394302368164,
"learning_rate": 1.1793531525645214e-06,
"loss": 0.1343,
"step": 5500
},
{
"epoch": 1.77,
"eval_cer": 30.15735431390852,
"eval_loss": 0.3783666491508484,
"eval_runtime": 1871.1459,
"eval_samples_per_second": 2.371,
"eval_steps_per_second": 0.297,
"step": 5500
},
{
"epoch": 1.78,
"grad_norm": 7.236656188964844,
"learning_rate": 1.138516824567135e-06,
"loss": 0.1295,
"step": 5525
},
{
"epoch": 1.78,
"grad_norm": 6.239099502563477,
"learning_rate": 1.0976804965697485e-06,
"loss": 0.1378,
"step": 5550
},
{
"epoch": 1.79,
"grad_norm": 4.9148945808410645,
"learning_rate": 1.056844168572362e-06,
"loss": 0.1272,
"step": 5575
},
{
"epoch": 1.8,
"grad_norm": 7.572327136993408,
"learning_rate": 1.0160078405749757e-06,
"loss": 0.1405,
"step": 5600
},
{
"epoch": 1.81,
"grad_norm": 6.76165771484375,
"learning_rate": 9.751715125775892e-07,
"loss": 0.1351,
"step": 5625
},
{
"epoch": 1.82,
"grad_norm": 10.984220504760742,
"learning_rate": 9.343351845802026e-07,
"loss": 0.1467,
"step": 5650
},
{
"epoch": 1.82,
"grad_norm": 4.543166637420654,
"learning_rate": 8.934988565828162e-07,
"loss": 0.1175,
"step": 5675
},
{
"epoch": 1.83,
"grad_norm": 8.191649436950684,
"learning_rate": 8.526625285854297e-07,
"loss": 0.1388,
"step": 5700
},
{
"epoch": 1.84,
"grad_norm": 7.215826988220215,
"learning_rate": 8.118262005880432e-07,
"loss": 0.1354,
"step": 5725
},
{
"epoch": 1.85,
"grad_norm": 5.940629959106445,
"learning_rate": 7.709898725906567e-07,
"loss": 0.1283,
"step": 5750
},
{
"epoch": 1.86,
"grad_norm": 6.796767234802246,
"learning_rate": 7.301535445932702e-07,
"loss": 0.1274,
"step": 5775
},
{
"epoch": 1.86,
"grad_norm": 8.403697967529297,
"learning_rate": 6.893172165958838e-07,
"loss": 0.1333,
"step": 5800
},
{
"epoch": 1.87,
"grad_norm": 6.127229690551758,
"learning_rate": 6.484808885984972e-07,
"loss": 0.1406,
"step": 5825
},
{
"epoch": 1.88,
"grad_norm": 7.16465950012207,
"learning_rate": 6.076445606011108e-07,
"loss": 0.132,
"step": 5850
},
{
"epoch": 1.89,
"grad_norm": 5.777968406677246,
"learning_rate": 5.668082326037243e-07,
"loss": 0.1437,
"step": 5875
},
{
"epoch": 1.9,
"grad_norm": 6.021764755249023,
"learning_rate": 5.259719046063379e-07,
"loss": 0.1203,
"step": 5900
},
{
"epoch": 1.9,
"grad_norm": 5.480493068695068,
"learning_rate": 4.851355766089514e-07,
"loss": 0.1398,
"step": 5925
},
{
"epoch": 1.91,
"grad_norm": 7.609493732452393,
"learning_rate": 4.442992486115649e-07,
"loss": 0.1274,
"step": 5950
},
{
"epoch": 1.92,
"grad_norm": 5.910650730133057,
"learning_rate": 4.034629206141784e-07,
"loss": 0.1352,
"step": 5975
},
{
"epoch": 1.93,
"grad_norm": 4.371640682220459,
"learning_rate": 3.626265926167919e-07,
"loss": 0.1265,
"step": 6000
},
{
"epoch": 1.93,
"eval_cer": 29.697292972396323,
"eval_loss": 0.37359777092933655,
"eval_runtime": 1867.3275,
"eval_samples_per_second": 2.376,
"eval_steps_per_second": 0.297,
"step": 6000
}
],
"logging_steps": 25,
"max_steps": 6222,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 2.770419843072e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}