trocr-mathwriting / trainer_state.json
us4's picture
Upload folder using huggingface_hub
ba3d99d verified
raw
history blame
177 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.05060984867655246,
"eval_steps": 200,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 5.060984867655246e-05,
"grad_norm": 5.005204677581787,
"learning_rate": 4.999915650252206e-05,
"loss": 0.722,
"step": 2
},
{
"epoch": 0.00010121969735310492,
"grad_norm": 4.560483932495117,
"learning_rate": 4.9998313005044116e-05,
"loss": 0.3625,
"step": 4
},
{
"epoch": 0.00015182954602965736,
"grad_norm": 9.806407928466797,
"learning_rate": 4.9997469507566176e-05,
"loss": 0.786,
"step": 6
},
{
"epoch": 0.00020243939470620983,
"grad_norm": 11.181573867797852,
"learning_rate": 4.999662601008823e-05,
"loss": 0.6063,
"step": 8
},
{
"epoch": 0.0002530492433827623,
"grad_norm": 3.7916452884674072,
"learning_rate": 4.999578251261029e-05,
"loss": 0.4681,
"step": 10
},
{
"epoch": 0.0003036590920593147,
"grad_norm": 10.196318626403809,
"learning_rate": 4.999493901513235e-05,
"loss": 0.8192,
"step": 12
},
{
"epoch": 0.0003542689407358672,
"grad_norm": 8.210321426391602,
"learning_rate": 4.9994095517654403e-05,
"loss": 0.7953,
"step": 14
},
{
"epoch": 0.00040487878941241966,
"grad_norm": 8.648380279541016,
"learning_rate": 4.9993252020176464e-05,
"loss": 0.5404,
"step": 16
},
{
"epoch": 0.0004554886380889721,
"grad_norm": NaN,
"learning_rate": 4.999283027143749e-05,
"loss": 0.7557,
"step": 18
},
{
"epoch": 0.0005060984867655246,
"grad_norm": 3.2002220153808594,
"learning_rate": 4.999198677395955e-05,
"loss": 0.5948,
"step": 20
},
{
"epoch": 0.000556708335442077,
"grad_norm": 6.236138343811035,
"learning_rate": 4.9991143276481604e-05,
"loss": 1.1657,
"step": 22
},
{
"epoch": 0.0006073181841186294,
"grad_norm": 8.113347053527832,
"learning_rate": 4.9990299779003664e-05,
"loss": 0.6126,
"step": 24
},
{
"epoch": 0.0006579280327951819,
"grad_norm": 6.6634979248046875,
"learning_rate": 4.998945628152572e-05,
"loss": 0.5794,
"step": 26
},
{
"epoch": 0.0007085378814717344,
"grad_norm": 7.940774440765381,
"learning_rate": 4.998861278404778e-05,
"loss": 0.3316,
"step": 28
},
{
"epoch": 0.0007591477301482868,
"grad_norm": 7.296051025390625,
"learning_rate": 4.998776928656984e-05,
"loss": 0.8218,
"step": 30
},
{
"epoch": 0.0008097575788248393,
"grad_norm": 6.318060398101807,
"learning_rate": 4.998692578909189e-05,
"loss": 0.7266,
"step": 32
},
{
"epoch": 0.0008603674275013917,
"grad_norm": 13.293371200561523,
"learning_rate": 4.998608229161395e-05,
"loss": 0.96,
"step": 34
},
{
"epoch": 0.0009109772761779442,
"grad_norm": NaN,
"learning_rate": 4.998566054287498e-05,
"loss": 0.9009,
"step": 36
},
{
"epoch": 0.0009615871248544967,
"grad_norm": 4.929239749908447,
"learning_rate": 4.998481704539704e-05,
"loss": 0.7153,
"step": 38
},
{
"epoch": 0.001012196973531049,
"grad_norm": 8.825164794921875,
"learning_rate": 4.998397354791909e-05,
"loss": 0.4486,
"step": 40
},
{
"epoch": 0.0010628068222076016,
"grad_norm": 8.393811225891113,
"learning_rate": 4.998313005044115e-05,
"loss": 0.7293,
"step": 42
},
{
"epoch": 0.001113416670884154,
"grad_norm": 4.109868049621582,
"learning_rate": 4.9982286552963206e-05,
"loss": 0.5638,
"step": 44
},
{
"epoch": 0.0011640265195607066,
"grad_norm": 13.631953239440918,
"learning_rate": 4.9981443055485266e-05,
"loss": 1.0084,
"step": 46
},
{
"epoch": 0.0012146363682372589,
"grad_norm": 9.624829292297363,
"learning_rate": 4.9980599558007326e-05,
"loss": 0.9545,
"step": 48
},
{
"epoch": 0.0012652462169138114,
"grad_norm": 5.674628257751465,
"learning_rate": 4.997975606052938e-05,
"loss": 0.5188,
"step": 50
},
{
"epoch": 0.0013158560655903639,
"grad_norm": 6.339033603668213,
"learning_rate": 4.997891256305144e-05,
"loss": 0.5308,
"step": 52
},
{
"epoch": 0.0013664659142669164,
"grad_norm": 3.895756244659424,
"learning_rate": 4.997806906557349e-05,
"loss": 0.6042,
"step": 54
},
{
"epoch": 0.0014170757629434689,
"grad_norm": 4.451659679412842,
"learning_rate": 4.997722556809555e-05,
"loss": 0.4957,
"step": 56
},
{
"epoch": 0.0014676856116200212,
"grad_norm": 5.246840476989746,
"learning_rate": 4.9976382070617613e-05,
"loss": 0.7031,
"step": 58
},
{
"epoch": 0.0015182954602965737,
"grad_norm": 7.467830657958984,
"learning_rate": 4.997553857313967e-05,
"loss": 0.8567,
"step": 60
},
{
"epoch": 0.0015689053089731262,
"grad_norm": 6.088858127593994,
"learning_rate": 4.997469507566173e-05,
"loss": 0.5998,
"step": 62
},
{
"epoch": 0.0016195151576496787,
"grad_norm": 10.719757080078125,
"learning_rate": 4.997385157818378e-05,
"loss": 0.8178,
"step": 64
},
{
"epoch": 0.0016701250063262312,
"grad_norm": 5.923684120178223,
"learning_rate": 4.997300808070584e-05,
"loss": 0.9103,
"step": 66
},
{
"epoch": 0.0017207348550027834,
"grad_norm": 8.114771842956543,
"learning_rate": 4.99721645832279e-05,
"loss": 0.4154,
"step": 68
},
{
"epoch": 0.001771344703679336,
"grad_norm": 9.67381477355957,
"learning_rate": 4.9971321085749954e-05,
"loss": 0.6067,
"step": 70
},
{
"epoch": 0.0018219545523558884,
"grad_norm": 7.255108833312988,
"learning_rate": 4.9970477588272014e-05,
"loss": 0.4104,
"step": 72
},
{
"epoch": 0.001872564401032441,
"grad_norm": 6.330812454223633,
"learning_rate": 4.996963409079407e-05,
"loss": 0.7361,
"step": 74
},
{
"epoch": 0.0019231742497089934,
"grad_norm": 10.727275848388672,
"learning_rate": 4.996879059331613e-05,
"loss": 0.8339,
"step": 76
},
{
"epoch": 0.001973784098385546,
"grad_norm": 5.6855950355529785,
"learning_rate": 4.996794709583819e-05,
"loss": 0.6211,
"step": 78
},
{
"epoch": 0.002024393947062098,
"grad_norm": 9.52706527709961,
"learning_rate": 4.996710359836024e-05,
"loss": 0.6736,
"step": 80
},
{
"epoch": 0.002075003795738651,
"grad_norm": 9.556534767150879,
"learning_rate": 4.99662601008823e-05,
"loss": 0.5151,
"step": 82
},
{
"epoch": 0.002125613644415203,
"grad_norm": 4.778113842010498,
"learning_rate": 4.9965416603404355e-05,
"loss": 0.5348,
"step": 84
},
{
"epoch": 0.0021762234930917555,
"grad_norm": 10.250826835632324,
"learning_rate": 4.9964573105926416e-05,
"loss": 0.5674,
"step": 86
},
{
"epoch": 0.002226833341768308,
"grad_norm": 6.03010892868042,
"learning_rate": 4.9963729608448476e-05,
"loss": 0.4641,
"step": 88
},
{
"epoch": 0.0022774431904448605,
"grad_norm": 10.152463912963867,
"learning_rate": 4.996288611097053e-05,
"loss": 1.0214,
"step": 90
},
{
"epoch": 0.002328053039121413,
"grad_norm": 8.930192947387695,
"learning_rate": 4.996204261349259e-05,
"loss": 0.7772,
"step": 92
},
{
"epoch": 0.0023786628877979655,
"grad_norm": 10.251153945922852,
"learning_rate": 4.996119911601464e-05,
"loss": 0.9428,
"step": 94
},
{
"epoch": 0.0024292727364745178,
"grad_norm": 6.223593711853027,
"learning_rate": 4.99603556185367e-05,
"loss": 0.6318,
"step": 96
},
{
"epoch": 0.0024798825851510705,
"grad_norm": 2.4227373600006104,
"learning_rate": 4.995951212105876e-05,
"loss": 0.436,
"step": 98
},
{
"epoch": 0.0025304924338276228,
"grad_norm": 6.225770473480225,
"learning_rate": 4.9958668623580817e-05,
"loss": 0.7753,
"step": 100
},
{
"epoch": 0.0025811022825041755,
"grad_norm": 5.370884895324707,
"learning_rate": 4.995782512610288e-05,
"loss": 0.5137,
"step": 102
},
{
"epoch": 0.0026317121311807278,
"grad_norm": 5.137220859527588,
"learning_rate": 4.995698162862493e-05,
"loss": 0.5953,
"step": 104
},
{
"epoch": 0.00268232197985728,
"grad_norm": 5.909286975860596,
"learning_rate": 4.995613813114699e-05,
"loss": 0.8687,
"step": 106
},
{
"epoch": 0.0027329318285338328,
"grad_norm": 6.003445148468018,
"learning_rate": 4.995529463366905e-05,
"loss": 0.53,
"step": 108
},
{
"epoch": 0.002783541677210385,
"grad_norm": 3.8742566108703613,
"learning_rate": 4.9954451136191104e-05,
"loss": 0.5512,
"step": 110
},
{
"epoch": 0.0028341515258869378,
"grad_norm": 3.5072569847106934,
"learning_rate": 4.9953607638713164e-05,
"loss": 0.3383,
"step": 112
},
{
"epoch": 0.00288476137456349,
"grad_norm": 5.679248809814453,
"learning_rate": 4.995276414123522e-05,
"loss": 0.7801,
"step": 114
},
{
"epoch": 0.0029353712232400423,
"grad_norm": 5.291722297668457,
"learning_rate": 4.995192064375728e-05,
"loss": 0.6538,
"step": 116
},
{
"epoch": 0.002985981071916595,
"grad_norm": 4.748198986053467,
"learning_rate": 4.995107714627934e-05,
"loss": 0.398,
"step": 118
},
{
"epoch": 0.0030365909205931473,
"grad_norm": 4.336450576782227,
"learning_rate": 4.995023364880139e-05,
"loss": 0.3351,
"step": 120
},
{
"epoch": 0.0030872007692697,
"grad_norm": 5.251417636871338,
"learning_rate": 4.994939015132345e-05,
"loss": 0.4014,
"step": 122
},
{
"epoch": 0.0031378106179462523,
"grad_norm": 7.911617755889893,
"learning_rate": 4.9948546653845505e-05,
"loss": 0.3151,
"step": 124
},
{
"epoch": 0.0031884204666228046,
"grad_norm": 6.031822681427002,
"learning_rate": 4.9947703156367565e-05,
"loss": 0.9222,
"step": 126
},
{
"epoch": 0.0032390303152993573,
"grad_norm": 10.08216381072998,
"learning_rate": 4.994685965888962e-05,
"loss": 0.6676,
"step": 128
},
{
"epoch": 0.0032896401639759096,
"grad_norm": 3.475123167037964,
"learning_rate": 4.994601616141168e-05,
"loss": 0.2461,
"step": 130
},
{
"epoch": 0.0033402500126524623,
"grad_norm": 4.408476829528809,
"learning_rate": 4.994517266393374e-05,
"loss": 0.26,
"step": 132
},
{
"epoch": 0.0033908598613290146,
"grad_norm": 6.929357528686523,
"learning_rate": 4.994432916645579e-05,
"loss": 0.9818,
"step": 134
},
{
"epoch": 0.003441469710005567,
"grad_norm": 6.456033229827881,
"learning_rate": 4.994348566897785e-05,
"loss": 0.4414,
"step": 136
},
{
"epoch": 0.0034920795586821196,
"grad_norm": 26.87461280822754,
"learning_rate": 4.9942642171499906e-05,
"loss": 0.7951,
"step": 138
},
{
"epoch": 0.003542689407358672,
"grad_norm": 7.420943737030029,
"learning_rate": 4.9941798674021966e-05,
"loss": 0.5827,
"step": 140
},
{
"epoch": 0.0035932992560352246,
"grad_norm": 5.9366583824157715,
"learning_rate": 4.9940955176544027e-05,
"loss": 0.6941,
"step": 142
},
{
"epoch": 0.003643909104711777,
"grad_norm": 5.8037543296813965,
"learning_rate": 4.994011167906608e-05,
"loss": 0.6301,
"step": 144
},
{
"epoch": 0.003694518953388329,
"grad_norm": 4.243365287780762,
"learning_rate": 4.993926818158814e-05,
"loss": 0.6995,
"step": 146
},
{
"epoch": 0.003745128802064882,
"grad_norm": 7.5515522956848145,
"learning_rate": 4.9938424684110194e-05,
"loss": 0.5537,
"step": 148
},
{
"epoch": 0.003795738650741434,
"grad_norm": 8.966812133789062,
"learning_rate": 4.9937581186632254e-05,
"loss": 0.5416,
"step": 150
},
{
"epoch": 0.003846348499417987,
"grad_norm": 7.70654296875,
"learning_rate": 4.9936737689154314e-05,
"loss": 0.6907,
"step": 152
},
{
"epoch": 0.003896958348094539,
"grad_norm": 6.371860504150391,
"learning_rate": 4.993589419167637e-05,
"loss": 0.468,
"step": 154
},
{
"epoch": 0.003947568196771092,
"grad_norm": 6.1000542640686035,
"learning_rate": 4.993505069419843e-05,
"loss": 0.6523,
"step": 156
},
{
"epoch": 0.003998178045447644,
"grad_norm": 5.390421390533447,
"learning_rate": 4.993420719672048e-05,
"loss": 1.0799,
"step": 158
},
{
"epoch": 0.004048787894124196,
"grad_norm": 7.502729415893555,
"learning_rate": 4.993336369924254e-05,
"loss": 0.7921,
"step": 160
},
{
"epoch": 0.004099397742800749,
"grad_norm": 3.19514536857605,
"learning_rate": 4.99325202017646e-05,
"loss": 0.2293,
"step": 162
},
{
"epoch": 0.004150007591477302,
"grad_norm": 7.2197585105896,
"learning_rate": 4.9931676704286655e-05,
"loss": 0.6778,
"step": 164
},
{
"epoch": 0.004200617440153854,
"grad_norm": 4.662430286407471,
"learning_rate": 4.9930833206808715e-05,
"loss": 0.4956,
"step": 166
},
{
"epoch": 0.004251227288830406,
"grad_norm": 5.545823574066162,
"learning_rate": 4.992998970933077e-05,
"loss": 0.4587,
"step": 168
},
{
"epoch": 0.004301837137506959,
"grad_norm": 8.578537940979004,
"learning_rate": 4.992914621185283e-05,
"loss": 0.2751,
"step": 170
},
{
"epoch": 0.004352446986183511,
"grad_norm": 4.3413472175598145,
"learning_rate": 4.992830271437489e-05,
"loss": 0.3712,
"step": 172
},
{
"epoch": 0.004403056834860064,
"grad_norm": 3.3616743087768555,
"learning_rate": 4.992745921689694e-05,
"loss": 0.4654,
"step": 174
},
{
"epoch": 0.004453666683536616,
"grad_norm": 3.7586441040039062,
"learning_rate": 4.9926615719419e-05,
"loss": 0.5338,
"step": 176
},
{
"epoch": 0.004504276532213168,
"grad_norm": 4.674256801605225,
"learning_rate": 4.9925772221941056e-05,
"loss": 0.3885,
"step": 178
},
{
"epoch": 0.004554886380889721,
"grad_norm": 6.164566993713379,
"learning_rate": 4.9924928724463116e-05,
"loss": 0.3949,
"step": 180
},
{
"epoch": 0.004605496229566274,
"grad_norm": 12.5003023147583,
"learning_rate": 4.9924085226985176e-05,
"loss": 0.6829,
"step": 182
},
{
"epoch": 0.004656106078242826,
"grad_norm": 8.045877456665039,
"learning_rate": 4.992324172950723e-05,
"loss": 0.7711,
"step": 184
},
{
"epoch": 0.004706715926919378,
"grad_norm": 5.4890570640563965,
"learning_rate": 4.992239823202929e-05,
"loss": 0.6287,
"step": 186
},
{
"epoch": 0.004757325775595931,
"grad_norm": 3.246534585952759,
"learning_rate": 4.992155473455134e-05,
"loss": 0.3895,
"step": 188
},
{
"epoch": 0.004807935624272484,
"grad_norm": 7.648909568786621,
"learning_rate": 4.9920711237073403e-05,
"loss": 0.7327,
"step": 190
},
{
"epoch": 0.0048585454729490355,
"grad_norm": 5.417934894561768,
"learning_rate": 4.9919867739595464e-05,
"loss": 0.5529,
"step": 192
},
{
"epoch": 0.004909155321625588,
"grad_norm": 18.588844299316406,
"learning_rate": 4.991902424211752e-05,
"loss": 1.1181,
"step": 194
},
{
"epoch": 0.004959765170302141,
"grad_norm": 11.765092849731445,
"learning_rate": 4.991818074463958e-05,
"loss": 0.7821,
"step": 196
},
{
"epoch": 0.005010375018978693,
"grad_norm": 6.446319580078125,
"learning_rate": 4.991733724716163e-05,
"loss": 0.7071,
"step": 198
},
{
"epoch": 0.0050609848676552455,
"grad_norm": 7.347968101501465,
"learning_rate": 4.991649374968369e-05,
"loss": 0.4164,
"step": 200
},
{
"epoch": 0.0050609848676552455,
"eval_cer": 0.23587196123692475,
"eval_loss": 0.37637796998023987,
"eval_runtime": 2775.7084,
"eval_samples_per_second": 5.647,
"eval_steps_per_second": 0.353,
"step": 200
},
{
"epoch": 0.005111594716331798,
"grad_norm": 10.311556816101074,
"learning_rate": 4.991565025220575e-05,
"loss": 1.1246,
"step": 202
},
{
"epoch": 0.005162204565008351,
"grad_norm": 10.519822120666504,
"learning_rate": 4.9914806754727805e-05,
"loss": 0.6127,
"step": 204
},
{
"epoch": 0.005212814413684903,
"grad_norm": 6.434566974639893,
"learning_rate": 4.9913963257249865e-05,
"loss": 0.5269,
"step": 206
},
{
"epoch": 0.0052634242623614555,
"grad_norm": 8.08187198638916,
"learning_rate": 4.991311975977192e-05,
"loss": 1.0978,
"step": 208
},
{
"epoch": 0.005314034111038008,
"grad_norm": 7.574239730834961,
"learning_rate": 4.991227626229398e-05,
"loss": 0.7327,
"step": 210
},
{
"epoch": 0.00536464395971456,
"grad_norm": 7.921819686889648,
"learning_rate": 4.991143276481603e-05,
"loss": 1.194,
"step": 212
},
{
"epoch": 0.005415253808391113,
"grad_norm": 9.017950057983398,
"learning_rate": 4.991058926733809e-05,
"loss": 0.9906,
"step": 214
},
{
"epoch": 0.0054658636570676655,
"grad_norm": 6.131129741668701,
"learning_rate": 4.990974576986015e-05,
"loss": 0.8681,
"step": 216
},
{
"epoch": 0.005516473505744218,
"grad_norm": 6.411858081817627,
"learning_rate": 4.9908902272382206e-05,
"loss": 0.6271,
"step": 218
},
{
"epoch": 0.00556708335442077,
"grad_norm": 6.6365227699279785,
"learning_rate": 4.9908058774904266e-05,
"loss": 0.9051,
"step": 220
},
{
"epoch": 0.005617693203097323,
"grad_norm": 7.575653553009033,
"learning_rate": 4.990721527742632e-05,
"loss": 0.9199,
"step": 222
},
{
"epoch": 0.0056683030517738755,
"grad_norm": 8.839277267456055,
"learning_rate": 4.990637177994838e-05,
"loss": 0.909,
"step": 224
},
{
"epoch": 0.005718912900450427,
"grad_norm": 8.077840805053711,
"learning_rate": 4.990552828247044e-05,
"loss": 0.6063,
"step": 226
},
{
"epoch": 0.00576952274912698,
"grad_norm": 8.73996639251709,
"learning_rate": 4.990468478499249e-05,
"loss": 0.9447,
"step": 228
},
{
"epoch": 0.005820132597803533,
"grad_norm": 6.638965129852295,
"learning_rate": 4.990384128751455e-05,
"loss": 0.6174,
"step": 230
},
{
"epoch": 0.005870742446480085,
"grad_norm": 5.351879119873047,
"learning_rate": 4.990299779003661e-05,
"loss": 0.4632,
"step": 232
},
{
"epoch": 0.005921352295156637,
"grad_norm": 6.497000694274902,
"learning_rate": 4.990215429255867e-05,
"loss": 0.5266,
"step": 234
},
{
"epoch": 0.00597196214383319,
"grad_norm": 10.24178695678711,
"learning_rate": 4.990131079508073e-05,
"loss": 0.8037,
"step": 236
},
{
"epoch": 0.006022571992509743,
"grad_norm": 5.579001426696777,
"learning_rate": 4.990046729760278e-05,
"loss": 0.6132,
"step": 238
},
{
"epoch": 0.006073181841186295,
"grad_norm": 3.613511562347412,
"learning_rate": 4.989962380012484e-05,
"loss": 0.3918,
"step": 240
},
{
"epoch": 0.006123791689862847,
"grad_norm": 9.699933052062988,
"learning_rate": 4.9898780302646894e-05,
"loss": 0.8956,
"step": 242
},
{
"epoch": 0.0061744015385394,
"grad_norm": 10.87297534942627,
"learning_rate": 4.9897936805168954e-05,
"loss": 1.1579,
"step": 244
},
{
"epoch": 0.006225011387215952,
"grad_norm": 6.29569673538208,
"learning_rate": 4.9897093307691014e-05,
"loss": 0.4237,
"step": 246
},
{
"epoch": 0.006275621235892505,
"grad_norm": 9.018596649169922,
"learning_rate": 4.989624981021307e-05,
"loss": 0.7854,
"step": 248
},
{
"epoch": 0.006326231084569057,
"grad_norm": 9.707599639892578,
"learning_rate": 4.989540631273513e-05,
"loss": 1.3102,
"step": 250
},
{
"epoch": 0.006376840933245609,
"grad_norm": 5.1023430824279785,
"learning_rate": 4.989456281525718e-05,
"loss": 0.5906,
"step": 252
},
{
"epoch": 0.006427450781922162,
"grad_norm": 2.3029062747955322,
"learning_rate": 4.989371931777924e-05,
"loss": 0.7183,
"step": 254
},
{
"epoch": 0.006478060630598715,
"grad_norm": 11.459485054016113,
"learning_rate": 4.98928758203013e-05,
"loss": 0.5505,
"step": 256
},
{
"epoch": 0.006528670479275267,
"grad_norm": 7.620695114135742,
"learning_rate": 4.9892032322823355e-05,
"loss": 0.9374,
"step": 258
},
{
"epoch": 0.006579280327951819,
"grad_norm": 5.305224895477295,
"learning_rate": 4.9891188825345416e-05,
"loss": 0.7362,
"step": 260
},
{
"epoch": 0.006629890176628372,
"grad_norm": 7.901217460632324,
"learning_rate": 4.989034532786747e-05,
"loss": 0.9518,
"step": 262
},
{
"epoch": 0.006680500025304925,
"grad_norm": 7.463654041290283,
"learning_rate": 4.988950183038953e-05,
"loss": 0.9789,
"step": 264
},
{
"epoch": 0.0067311098739814765,
"grad_norm": 4.86021089553833,
"learning_rate": 4.988865833291159e-05,
"loss": 0.8441,
"step": 266
},
{
"epoch": 0.006781719722658029,
"grad_norm": 5.242839813232422,
"learning_rate": 4.988781483543364e-05,
"loss": 0.8258,
"step": 268
},
{
"epoch": 0.006832329571334582,
"grad_norm": 3.524228572845459,
"learning_rate": 4.98869713379557e-05,
"loss": 0.3924,
"step": 270
},
{
"epoch": 0.006882939420011134,
"grad_norm": 3.8527095317840576,
"learning_rate": 4.9886127840477756e-05,
"loss": 0.5253,
"step": 272
},
{
"epoch": 0.0069335492686876865,
"grad_norm": 3.4836299419403076,
"learning_rate": 4.9885284342999817e-05,
"loss": 0.5859,
"step": 274
},
{
"epoch": 0.006984159117364239,
"grad_norm": 9.650673866271973,
"learning_rate": 4.988444084552188e-05,
"loss": 0.7122,
"step": 276
},
{
"epoch": 0.007034768966040792,
"grad_norm": 5.5393829345703125,
"learning_rate": 4.988359734804393e-05,
"loss": 0.5434,
"step": 278
},
{
"epoch": 0.007085378814717344,
"grad_norm": 11.345093727111816,
"learning_rate": 4.988275385056599e-05,
"loss": 0.9166,
"step": 280
},
{
"epoch": 0.0071359886633938965,
"grad_norm": 4.247881889343262,
"learning_rate": 4.9881910353088044e-05,
"loss": 0.6162,
"step": 282
},
{
"epoch": 0.007186598512070449,
"grad_norm": 5.472718238830566,
"learning_rate": 4.9881066855610104e-05,
"loss": 0.8378,
"step": 284
},
{
"epoch": 0.007237208360747001,
"grad_norm": 6.520363807678223,
"learning_rate": 4.9880223358132164e-05,
"loss": 0.6258,
"step": 286
},
{
"epoch": 0.007287818209423554,
"grad_norm": 4.629631996154785,
"learning_rate": 4.987937986065422e-05,
"loss": 0.5166,
"step": 288
},
{
"epoch": 0.0073384280581001065,
"grad_norm": 9.533284187316895,
"learning_rate": 4.987853636317628e-05,
"loss": 1.1145,
"step": 290
},
{
"epoch": 0.007389037906776658,
"grad_norm": 7.794739246368408,
"learning_rate": 4.987769286569833e-05,
"loss": 1.0134,
"step": 292
},
{
"epoch": 0.007439647755453211,
"grad_norm": 8.504875183105469,
"learning_rate": 4.987684936822039e-05,
"loss": 0.8261,
"step": 294
},
{
"epoch": 0.007490257604129764,
"grad_norm": 6.671535015106201,
"learning_rate": 4.9876005870742445e-05,
"loss": 0.7141,
"step": 296
},
{
"epoch": 0.0075408674528063165,
"grad_norm": 11.959641456604004,
"learning_rate": 4.9875162373264505e-05,
"loss": 0.8247,
"step": 298
},
{
"epoch": 0.007591477301482868,
"grad_norm": 4.4155378341674805,
"learning_rate": 4.9874318875786565e-05,
"loss": 0.348,
"step": 300
},
{
"epoch": 0.007642087150159421,
"grad_norm": 5.849681377410889,
"learning_rate": 4.987347537830862e-05,
"loss": 0.8355,
"step": 302
},
{
"epoch": 0.007692696998835974,
"grad_norm": 10.244682312011719,
"learning_rate": 4.987263188083068e-05,
"loss": 0.7634,
"step": 304
},
{
"epoch": 0.007743306847512526,
"grad_norm": 3.5256447792053223,
"learning_rate": 4.987178838335273e-05,
"loss": 0.451,
"step": 306
},
{
"epoch": 0.007793916696189078,
"grad_norm": 7.318663120269775,
"learning_rate": 4.987094488587479e-05,
"loss": 0.7009,
"step": 308
},
{
"epoch": 0.007844526544865631,
"grad_norm": 5.343123912811279,
"learning_rate": 4.987010138839685e-05,
"loss": 0.4291,
"step": 310
},
{
"epoch": 0.007895136393542184,
"grad_norm": 6.702011585235596,
"learning_rate": 4.9869257890918906e-05,
"loss": 0.9008,
"step": 312
},
{
"epoch": 0.007945746242218736,
"grad_norm": 9.040878295898438,
"learning_rate": 4.9868414393440966e-05,
"loss": 1.0295,
"step": 314
},
{
"epoch": 0.007996356090895287,
"grad_norm": 5.2994585037231445,
"learning_rate": 4.986757089596302e-05,
"loss": 0.6427,
"step": 316
},
{
"epoch": 0.00804696593957184,
"grad_norm": 8.381704330444336,
"learning_rate": 4.986672739848508e-05,
"loss": 0.8675,
"step": 318
},
{
"epoch": 0.008097575788248393,
"grad_norm": 9.162700653076172,
"learning_rate": 4.986588390100714e-05,
"loss": 1.1891,
"step": 320
},
{
"epoch": 0.008148185636924946,
"grad_norm": 10.618518829345703,
"learning_rate": 4.9865040403529194e-05,
"loss": 0.7131,
"step": 322
},
{
"epoch": 0.008198795485601498,
"grad_norm": 7.559556484222412,
"learning_rate": 4.9864196906051254e-05,
"loss": 0.7327,
"step": 324
},
{
"epoch": 0.008249405334278051,
"grad_norm": 7.372714519500732,
"learning_rate": 4.986335340857331e-05,
"loss": 0.5496,
"step": 326
},
{
"epoch": 0.008300015182954604,
"grad_norm": 5.887473106384277,
"learning_rate": 4.986250991109537e-05,
"loss": 0.4605,
"step": 328
},
{
"epoch": 0.008350625031631155,
"grad_norm": 3.8664021492004395,
"learning_rate": 4.986166641361743e-05,
"loss": 0.8836,
"step": 330
},
{
"epoch": 0.008401234880307707,
"grad_norm": 6.713327407836914,
"learning_rate": 4.986082291613948e-05,
"loss": 0.7846,
"step": 332
},
{
"epoch": 0.00845184472898426,
"grad_norm": 6.036464691162109,
"learning_rate": 4.985997941866154e-05,
"loss": 0.8055,
"step": 334
},
{
"epoch": 0.008502454577660813,
"grad_norm": 4.087986946105957,
"learning_rate": 4.9859135921183595e-05,
"loss": 0.7829,
"step": 336
},
{
"epoch": 0.008553064426337366,
"grad_norm": 9.335679054260254,
"learning_rate": 4.9858292423705655e-05,
"loss": 1.279,
"step": 338
},
{
"epoch": 0.008603674275013918,
"grad_norm": 6.914140701293945,
"learning_rate": 4.9857448926227715e-05,
"loss": 0.8773,
"step": 340
},
{
"epoch": 0.008654284123690471,
"grad_norm": 5.106595993041992,
"learning_rate": 4.985660542874977e-05,
"loss": 0.7357,
"step": 342
},
{
"epoch": 0.008704893972367022,
"grad_norm": 5.217001914978027,
"learning_rate": 4.985576193127183e-05,
"loss": 1.0202,
"step": 344
},
{
"epoch": 0.008755503821043575,
"grad_norm": 7.956677436828613,
"learning_rate": 4.985491843379388e-05,
"loss": 0.8517,
"step": 346
},
{
"epoch": 0.008806113669720127,
"grad_norm": 6.432283878326416,
"learning_rate": 4.985407493631594e-05,
"loss": 0.7963,
"step": 348
},
{
"epoch": 0.00885672351839668,
"grad_norm": 25.808292388916016,
"learning_rate": 4.9853231438838e-05,
"loss": 0.5176,
"step": 350
},
{
"epoch": 0.008907333367073233,
"grad_norm": 6.786406517028809,
"learning_rate": 4.9852387941360056e-05,
"loss": 0.9981,
"step": 352
},
{
"epoch": 0.008957943215749786,
"grad_norm": 13.001777648925781,
"learning_rate": 4.9851544443882116e-05,
"loss": 0.6546,
"step": 354
},
{
"epoch": 0.009008553064426337,
"grad_norm": 5.085973262786865,
"learning_rate": 4.985070094640417e-05,
"loss": 0.4445,
"step": 356
},
{
"epoch": 0.00905916291310289,
"grad_norm": 10.445619583129883,
"learning_rate": 4.984985744892623e-05,
"loss": 0.519,
"step": 358
},
{
"epoch": 0.009109772761779442,
"grad_norm": 7.5755510330200195,
"learning_rate": 4.984901395144829e-05,
"loss": 0.9253,
"step": 360
},
{
"epoch": 0.009160382610455995,
"grad_norm": 12.85741901397705,
"learning_rate": 4.984817045397034e-05,
"loss": 0.7553,
"step": 362
},
{
"epoch": 0.009210992459132547,
"grad_norm": 4.291009426116943,
"learning_rate": 4.9847326956492404e-05,
"loss": 0.7912,
"step": 364
},
{
"epoch": 0.0092616023078091,
"grad_norm": 9.602625846862793,
"learning_rate": 4.984648345901446e-05,
"loss": 0.7954,
"step": 366
},
{
"epoch": 0.009312212156485653,
"grad_norm": 10.099987030029297,
"learning_rate": 4.984563996153652e-05,
"loss": 0.6241,
"step": 368
},
{
"epoch": 0.009362822005162204,
"grad_norm": 9.353877067565918,
"learning_rate": 4.984479646405858e-05,
"loss": 0.7845,
"step": 370
},
{
"epoch": 0.009413431853838757,
"grad_norm": 5.5436506271362305,
"learning_rate": 4.984395296658063e-05,
"loss": 0.6337,
"step": 372
},
{
"epoch": 0.00946404170251531,
"grad_norm": 6.538369655609131,
"learning_rate": 4.984310946910269e-05,
"loss": 0.4818,
"step": 374
},
{
"epoch": 0.009514651551191862,
"grad_norm": 11.956756591796875,
"learning_rate": 4.9842265971624744e-05,
"loss": 0.9237,
"step": 376
},
{
"epoch": 0.009565261399868415,
"grad_norm": 11.44477367401123,
"learning_rate": 4.9841422474146805e-05,
"loss": 1.0572,
"step": 378
},
{
"epoch": 0.009615871248544967,
"grad_norm": 9.988565444946289,
"learning_rate": 4.984057897666886e-05,
"loss": 0.6858,
"step": 380
},
{
"epoch": 0.00966648109722152,
"grad_norm": 4.8977813720703125,
"learning_rate": 4.983973547919092e-05,
"loss": 0.5965,
"step": 382
},
{
"epoch": 0.009717090945898071,
"grad_norm": 6.308709144592285,
"learning_rate": 4.983889198171298e-05,
"loss": 0.4817,
"step": 384
},
{
"epoch": 0.009767700794574624,
"grad_norm": 5.661224842071533,
"learning_rate": 4.983804848423503e-05,
"loss": 0.5738,
"step": 386
},
{
"epoch": 0.009818310643251177,
"grad_norm": 5.926614284515381,
"learning_rate": 4.983720498675709e-05,
"loss": 0.5391,
"step": 388
},
{
"epoch": 0.00986892049192773,
"grad_norm": 4.607276916503906,
"learning_rate": 4.9836361489279145e-05,
"loss": 0.4223,
"step": 390
},
{
"epoch": 0.009919530340604282,
"grad_norm": 14.739141464233398,
"learning_rate": 4.9835517991801206e-05,
"loss": 1.0047,
"step": 392
},
{
"epoch": 0.009970140189280835,
"grad_norm": 4.335879325866699,
"learning_rate": 4.9834674494323266e-05,
"loss": 0.7172,
"step": 394
},
{
"epoch": 0.010020750037957386,
"grad_norm": 8.950626373291016,
"learning_rate": 4.983383099684532e-05,
"loss": 0.8787,
"step": 396
},
{
"epoch": 0.010071359886633938,
"grad_norm": 5.630717754364014,
"learning_rate": 4.983298749936738e-05,
"loss": 0.5813,
"step": 398
},
{
"epoch": 0.010121969735310491,
"grad_norm": 9.189420700073242,
"learning_rate": 4.983214400188943e-05,
"loss": 0.9012,
"step": 400
},
{
"epoch": 0.010121969735310491,
"eval_cer": 0.22362332855033584,
"eval_loss": 0.3546978533267975,
"eval_runtime": 2758.4684,
"eval_samples_per_second": 5.682,
"eval_steps_per_second": 0.355,
"step": 400
},
{
"epoch": 0.010172579583987044,
"grad_norm": 3.5027172565460205,
"learning_rate": 4.983130050441149e-05,
"loss": 0.5343,
"step": 402
},
{
"epoch": 0.010223189432663597,
"grad_norm": 7.1599836349487305,
"learning_rate": 4.983045700693355e-05,
"loss": 0.64,
"step": 404
},
{
"epoch": 0.01027379928134015,
"grad_norm": 6.179046630859375,
"learning_rate": 4.982961350945561e-05,
"loss": 0.6958,
"step": 406
},
{
"epoch": 0.010324409130016702,
"grad_norm": 6.452561855316162,
"learning_rate": 4.982877001197767e-05,
"loss": 0.9579,
"step": 408
},
{
"epoch": 0.010375018978693253,
"grad_norm": 6.45066499710083,
"learning_rate": 4.982792651449972e-05,
"loss": 0.8967,
"step": 410
},
{
"epoch": 0.010425628827369806,
"grad_norm": 8.554861068725586,
"learning_rate": 4.982708301702178e-05,
"loss": 1.1857,
"step": 412
},
{
"epoch": 0.010476238676046358,
"grad_norm": 3.6959705352783203,
"learning_rate": 4.982623951954384e-05,
"loss": 0.4889,
"step": 414
},
{
"epoch": 0.010526848524722911,
"grad_norm": 12.848132133483887,
"learning_rate": 4.9825396022065894e-05,
"loss": 0.64,
"step": 416
},
{
"epoch": 0.010577458373399464,
"grad_norm": 6.260450839996338,
"learning_rate": 4.9824552524587954e-05,
"loss": 1.0134,
"step": 418
},
{
"epoch": 0.010628068222076016,
"grad_norm": 6.700595378875732,
"learning_rate": 4.982370902711001e-05,
"loss": 0.8566,
"step": 420
},
{
"epoch": 0.01067867807075257,
"grad_norm": 6.211639404296875,
"learning_rate": 4.982286552963207e-05,
"loss": 0.9721,
"step": 422
},
{
"epoch": 0.01072928791942912,
"grad_norm": 7.872774600982666,
"learning_rate": 4.982202203215413e-05,
"loss": 0.4064,
"step": 424
},
{
"epoch": 0.010779897768105673,
"grad_norm": 9.730685234069824,
"learning_rate": 4.982117853467618e-05,
"loss": 1.044,
"step": 426
},
{
"epoch": 0.010830507616782226,
"grad_norm": 4.908615589141846,
"learning_rate": 4.982033503719824e-05,
"loss": 0.6847,
"step": 428
},
{
"epoch": 0.010881117465458778,
"grad_norm": 7.32842493057251,
"learning_rate": 4.9819491539720295e-05,
"loss": 0.8148,
"step": 430
},
{
"epoch": 0.010931727314135331,
"grad_norm": 4.537167072296143,
"learning_rate": 4.9818648042242355e-05,
"loss": 0.4907,
"step": 432
},
{
"epoch": 0.010982337162811884,
"grad_norm": 5.290032863616943,
"learning_rate": 4.9817804544764416e-05,
"loss": 0.6806,
"step": 434
},
{
"epoch": 0.011032947011488436,
"grad_norm": 6.21295166015625,
"learning_rate": 4.981696104728647e-05,
"loss": 0.6636,
"step": 436
},
{
"epoch": 0.011083556860164987,
"grad_norm": 5.210526466369629,
"learning_rate": 4.981611754980853e-05,
"loss": 0.7971,
"step": 438
},
{
"epoch": 0.01113416670884154,
"grad_norm": 40.482147216796875,
"learning_rate": 4.981527405233058e-05,
"loss": 0.7819,
"step": 440
},
{
"epoch": 0.011184776557518093,
"grad_norm": 6.940312385559082,
"learning_rate": 4.981443055485264e-05,
"loss": 0.8548,
"step": 442
},
{
"epoch": 0.011235386406194646,
"grad_norm": 10.026724815368652,
"learning_rate": 4.98135870573747e-05,
"loss": 0.5857,
"step": 444
},
{
"epoch": 0.011285996254871198,
"grad_norm": 10.645378112792969,
"learning_rate": 4.9812743559896756e-05,
"loss": 0.7426,
"step": 446
},
{
"epoch": 0.011336606103547751,
"grad_norm": 17.70381736755371,
"learning_rate": 4.981190006241882e-05,
"loss": 1.0465,
"step": 448
},
{
"epoch": 0.011387215952224302,
"grad_norm": 4.017563819885254,
"learning_rate": 4.981105656494087e-05,
"loss": 0.6183,
"step": 450
},
{
"epoch": 0.011437825800900855,
"grad_norm": 7.969531059265137,
"learning_rate": 4.981021306746293e-05,
"loss": 0.8661,
"step": 452
},
{
"epoch": 0.011488435649577407,
"grad_norm": 12.08060359954834,
"learning_rate": 4.980936956998499e-05,
"loss": 0.7461,
"step": 454
},
{
"epoch": 0.01153904549825396,
"grad_norm": 6.960525989532471,
"learning_rate": 4.9808526072507044e-05,
"loss": 0.5767,
"step": 456
},
{
"epoch": 0.011589655346930513,
"grad_norm": 10.034902572631836,
"learning_rate": 4.9807682575029104e-05,
"loss": 0.7012,
"step": 458
},
{
"epoch": 0.011640265195607066,
"grad_norm": 11.939443588256836,
"learning_rate": 4.980683907755116e-05,
"loss": 0.6601,
"step": 460
},
{
"epoch": 0.011690875044283618,
"grad_norm": 5.776790618896484,
"learning_rate": 4.980599558007322e-05,
"loss": 0.7514,
"step": 462
},
{
"epoch": 0.01174148489296017,
"grad_norm": 9.15820598602295,
"learning_rate": 4.980515208259527e-05,
"loss": 0.8779,
"step": 464
},
{
"epoch": 0.011792094741636722,
"grad_norm": 7.228218078613281,
"learning_rate": 4.980430858511733e-05,
"loss": 1.0643,
"step": 466
},
{
"epoch": 0.011842704590313275,
"grad_norm": 4.174966812133789,
"learning_rate": 4.980346508763939e-05,
"loss": 0.3511,
"step": 468
},
{
"epoch": 0.011893314438989827,
"grad_norm": 9.501602172851562,
"learning_rate": 4.9802621590161445e-05,
"loss": 0.8323,
"step": 470
},
{
"epoch": 0.01194392428766638,
"grad_norm": 4.438223361968994,
"learning_rate": 4.9801778092683505e-05,
"loss": 0.3112,
"step": 472
},
{
"epoch": 0.011994534136342933,
"grad_norm": 8.708006858825684,
"learning_rate": 4.980093459520556e-05,
"loss": 0.6483,
"step": 474
},
{
"epoch": 0.012045143985019486,
"grad_norm": 4.928430557250977,
"learning_rate": 4.980009109772762e-05,
"loss": 0.7263,
"step": 476
},
{
"epoch": 0.012095753833696037,
"grad_norm": 11.870718955993652,
"learning_rate": 4.979924760024968e-05,
"loss": 0.9143,
"step": 478
},
{
"epoch": 0.01214636368237259,
"grad_norm": 6.654867649078369,
"learning_rate": 4.979840410277173e-05,
"loss": 0.5257,
"step": 480
},
{
"epoch": 0.012196973531049142,
"grad_norm": 4.725414752960205,
"learning_rate": 4.979756060529379e-05,
"loss": 0.5919,
"step": 482
},
{
"epoch": 0.012247583379725695,
"grad_norm": 5.607127666473389,
"learning_rate": 4.9796717107815846e-05,
"loss": 0.5198,
"step": 484
},
{
"epoch": 0.012298193228402247,
"grad_norm": 5.8716864585876465,
"learning_rate": 4.9795873610337906e-05,
"loss": 0.7554,
"step": 486
},
{
"epoch": 0.0123488030770788,
"grad_norm": 7.410449504852295,
"learning_rate": 4.9795030112859966e-05,
"loss": 0.8967,
"step": 488
},
{
"epoch": 0.012399412925755351,
"grad_norm": 10.435539245605469,
"learning_rate": 4.979418661538202e-05,
"loss": 0.7446,
"step": 490
},
{
"epoch": 0.012450022774431904,
"grad_norm": 4.085461139678955,
"learning_rate": 4.979334311790408e-05,
"loss": 0.56,
"step": 492
},
{
"epoch": 0.012500632623108457,
"grad_norm": 7.522024154663086,
"learning_rate": 4.9792499620426133e-05,
"loss": 0.7217,
"step": 494
},
{
"epoch": 0.01255124247178501,
"grad_norm": 6.347813606262207,
"learning_rate": 4.9791656122948194e-05,
"loss": 0.5247,
"step": 496
},
{
"epoch": 0.012601852320461562,
"grad_norm": 8.447410583496094,
"learning_rate": 4.9790812625470254e-05,
"loss": 0.74,
"step": 498
},
{
"epoch": 0.012652462169138115,
"grad_norm": 5.143301486968994,
"learning_rate": 4.978996912799231e-05,
"loss": 0.5553,
"step": 500
},
{
"epoch": 0.012703072017814667,
"grad_norm": 5.69724702835083,
"learning_rate": 4.978912563051437e-05,
"loss": 0.9869,
"step": 502
},
{
"epoch": 0.012753681866491218,
"grad_norm": 3.5676660537719727,
"learning_rate": 4.978828213303642e-05,
"loss": 0.423,
"step": 504
},
{
"epoch": 0.012804291715167771,
"grad_norm": 5.91968297958374,
"learning_rate": 4.978743863555848e-05,
"loss": 0.7037,
"step": 506
},
{
"epoch": 0.012854901563844324,
"grad_norm": 9.412429809570312,
"learning_rate": 4.978659513808054e-05,
"loss": 1.1408,
"step": 508
},
{
"epoch": 0.012905511412520877,
"grad_norm": 8.342994689941406,
"learning_rate": 4.9785751640602595e-05,
"loss": 0.8661,
"step": 510
},
{
"epoch": 0.01295612126119743,
"grad_norm": 7.621118068695068,
"learning_rate": 4.9784908143124655e-05,
"loss": 0.677,
"step": 512
},
{
"epoch": 0.013006731109873982,
"grad_norm": 4.431066036224365,
"learning_rate": 4.978406464564671e-05,
"loss": 0.637,
"step": 514
},
{
"epoch": 0.013057340958550535,
"grad_norm": 9.607247352600098,
"learning_rate": 4.978322114816877e-05,
"loss": 0.5467,
"step": 516
},
{
"epoch": 0.013107950807227086,
"grad_norm": 8.333694458007812,
"learning_rate": 4.978237765069083e-05,
"loss": 1.0044,
"step": 518
},
{
"epoch": 0.013158560655903638,
"grad_norm": 3.5092146396636963,
"learning_rate": 4.978153415321288e-05,
"loss": 0.3496,
"step": 520
},
{
"epoch": 0.013209170504580191,
"grad_norm": 5.619466781616211,
"learning_rate": 4.978069065573494e-05,
"loss": 0.5608,
"step": 522
},
{
"epoch": 0.013259780353256744,
"grad_norm": 10.299610137939453,
"learning_rate": 4.9779847158256996e-05,
"loss": 0.7437,
"step": 524
},
{
"epoch": 0.013310390201933297,
"grad_norm": 9.14527416229248,
"learning_rate": 4.9779003660779056e-05,
"loss": 0.7961,
"step": 526
},
{
"epoch": 0.01336100005060985,
"grad_norm": 4.311985015869141,
"learning_rate": 4.9778160163301116e-05,
"loss": 0.5069,
"step": 528
},
{
"epoch": 0.013411609899286402,
"grad_norm": 7.6014862060546875,
"learning_rate": 4.977731666582317e-05,
"loss": 0.8133,
"step": 530
},
{
"epoch": 0.013462219747962953,
"grad_norm": 5.695495128631592,
"learning_rate": 4.977647316834523e-05,
"loss": 0.4957,
"step": 532
},
{
"epoch": 0.013512829596639506,
"grad_norm": 8.789469718933105,
"learning_rate": 4.977562967086728e-05,
"loss": 0.9009,
"step": 534
},
{
"epoch": 0.013563439445316058,
"grad_norm": 6.521892547607422,
"learning_rate": 4.977478617338934e-05,
"loss": 0.4392,
"step": 536
},
{
"epoch": 0.013614049293992611,
"grad_norm": 8.529374122619629,
"learning_rate": 4.9773942675911404e-05,
"loss": 0.507,
"step": 538
},
{
"epoch": 0.013664659142669164,
"grad_norm": 5.722716808319092,
"learning_rate": 4.977309917843346e-05,
"loss": 0.5071,
"step": 540
},
{
"epoch": 0.013715268991345717,
"grad_norm": 4.3265156745910645,
"learning_rate": 4.977225568095552e-05,
"loss": 0.7658,
"step": 542
},
{
"epoch": 0.013765878840022268,
"grad_norm": 5.346278190612793,
"learning_rate": 4.977141218347757e-05,
"loss": 0.5658,
"step": 544
},
{
"epoch": 0.01381648868869882,
"grad_norm": 7.274371147155762,
"learning_rate": 4.977056868599963e-05,
"loss": 0.8856,
"step": 546
},
{
"epoch": 0.013867098537375373,
"grad_norm": 5.354006767272949,
"learning_rate": 4.9769725188521684e-05,
"loss": 0.5224,
"step": 548
},
{
"epoch": 0.013917708386051926,
"grad_norm": 8.813762664794922,
"learning_rate": 4.9768881691043744e-05,
"loss": 1.1558,
"step": 550
},
{
"epoch": 0.013968318234728478,
"grad_norm": 6.957085132598877,
"learning_rate": 4.9768038193565805e-05,
"loss": 0.4142,
"step": 552
},
{
"epoch": 0.014018928083405031,
"grad_norm": 12.203534126281738,
"learning_rate": 4.976719469608786e-05,
"loss": 0.5945,
"step": 554
},
{
"epoch": 0.014069537932081584,
"grad_norm": 6.845212459564209,
"learning_rate": 4.976635119860992e-05,
"loss": 0.4359,
"step": 556
},
{
"epoch": 0.014120147780758135,
"grad_norm": 6.074872970581055,
"learning_rate": 4.976550770113197e-05,
"loss": 0.7704,
"step": 558
},
{
"epoch": 0.014170757629434688,
"grad_norm": 5.776919364929199,
"learning_rate": 4.976466420365403e-05,
"loss": 0.7799,
"step": 560
},
{
"epoch": 0.01422136747811124,
"grad_norm": 11.90626335144043,
"learning_rate": 4.976382070617609e-05,
"loss": 0.9279,
"step": 562
},
{
"epoch": 0.014271977326787793,
"grad_norm": 4.0126848220825195,
"learning_rate": 4.9762977208698145e-05,
"loss": 0.8148,
"step": 564
},
{
"epoch": 0.014322587175464346,
"grad_norm": 5.540651798248291,
"learning_rate": 4.9762133711220206e-05,
"loss": 0.528,
"step": 566
},
{
"epoch": 0.014373197024140898,
"grad_norm": 4.119876861572266,
"learning_rate": 4.976129021374226e-05,
"loss": 0.8705,
"step": 568
},
{
"epoch": 0.014423806872817451,
"grad_norm": 11.188005447387695,
"learning_rate": 4.976044671626432e-05,
"loss": 0.467,
"step": 570
},
{
"epoch": 0.014474416721494002,
"grad_norm": 6.426743030548096,
"learning_rate": 4.975960321878638e-05,
"loss": 0.5936,
"step": 572
},
{
"epoch": 0.014525026570170555,
"grad_norm": 6.039766788482666,
"learning_rate": 4.975875972130843e-05,
"loss": 0.7222,
"step": 574
},
{
"epoch": 0.014575636418847107,
"grad_norm": 12.856761932373047,
"learning_rate": 4.975791622383049e-05,
"loss": 0.501,
"step": 576
},
{
"epoch": 0.01462624626752366,
"grad_norm": 13.36010456085205,
"learning_rate": 4.9757072726352547e-05,
"loss": 0.473,
"step": 578
},
{
"epoch": 0.014676856116200213,
"grad_norm": 13.758210182189941,
"learning_rate": 4.975622922887461e-05,
"loss": 1.1569,
"step": 580
},
{
"epoch": 0.014727465964876766,
"grad_norm": 5.978826522827148,
"learning_rate": 4.975538573139667e-05,
"loss": 0.5853,
"step": 582
},
{
"epoch": 0.014778075813553317,
"grad_norm": 4.91432523727417,
"learning_rate": 4.975454223391872e-05,
"loss": 0.5073,
"step": 584
},
{
"epoch": 0.01482868566222987,
"grad_norm": 4.914419174194336,
"learning_rate": 4.975369873644078e-05,
"loss": 0.5638,
"step": 586
},
{
"epoch": 0.014879295510906422,
"grad_norm": 6.7935967445373535,
"learning_rate": 4.9752855238962834e-05,
"loss": 0.773,
"step": 588
},
{
"epoch": 0.014929905359582975,
"grad_norm": 5.500904083251953,
"learning_rate": 4.9752011741484894e-05,
"loss": 0.6985,
"step": 590
},
{
"epoch": 0.014980515208259527,
"grad_norm": 10.455467224121094,
"learning_rate": 4.9751168244006954e-05,
"loss": 0.5856,
"step": 592
},
{
"epoch": 0.01503112505693608,
"grad_norm": 14.375707626342773,
"learning_rate": 4.975032474652901e-05,
"loss": 0.8323,
"step": 594
},
{
"epoch": 0.015081734905612633,
"grad_norm": 8.137022972106934,
"learning_rate": 4.974948124905107e-05,
"loss": 0.7872,
"step": 596
},
{
"epoch": 0.015132344754289184,
"grad_norm": 4.684526443481445,
"learning_rate": 4.974863775157312e-05,
"loss": 0.7216,
"step": 598
},
{
"epoch": 0.015182954602965737,
"grad_norm": 6.151386737823486,
"learning_rate": 4.974779425409518e-05,
"loss": 0.7315,
"step": 600
},
{
"epoch": 0.015182954602965737,
"eval_cer": 0.21082101191590244,
"eval_loss": 0.33135300874710083,
"eval_runtime": 2751.8944,
"eval_samples_per_second": 5.696,
"eval_steps_per_second": 0.356,
"step": 600
},
{
"epoch": 0.01523356445164229,
"grad_norm": 7.473769664764404,
"learning_rate": 4.974695075661724e-05,
"loss": 0.3884,
"step": 602
},
{
"epoch": 0.015284174300318842,
"grad_norm": 6.925198078155518,
"learning_rate": 4.9746107259139295e-05,
"loss": 0.6214,
"step": 604
},
{
"epoch": 0.015334784148995395,
"grad_norm": 7.6508684158325195,
"learning_rate": 4.9745263761661355e-05,
"loss": 0.3224,
"step": 606
},
{
"epoch": 0.015385393997671947,
"grad_norm": 8.625561714172363,
"learning_rate": 4.974442026418341e-05,
"loss": 1.1576,
"step": 608
},
{
"epoch": 0.0154360038463485,
"grad_norm": 4.799185276031494,
"learning_rate": 4.974357676670547e-05,
"loss": 0.4953,
"step": 610
},
{
"epoch": 0.015486613695025051,
"grad_norm": 6.022134780883789,
"learning_rate": 4.974273326922753e-05,
"loss": 0.417,
"step": 612
},
{
"epoch": 0.015537223543701604,
"grad_norm": 6.1436333656311035,
"learning_rate": 4.974188977174958e-05,
"loss": 0.7479,
"step": 614
},
{
"epoch": 0.015587833392378157,
"grad_norm": 11.182157516479492,
"learning_rate": 4.974104627427164e-05,
"loss": 1.1078,
"step": 616
},
{
"epoch": 0.01563844324105471,
"grad_norm": 7.718019485473633,
"learning_rate": 4.9740202776793696e-05,
"loss": 0.7953,
"step": 618
},
{
"epoch": 0.015689053089731262,
"grad_norm": 6.890071392059326,
"learning_rate": 4.9739359279315756e-05,
"loss": 0.8054,
"step": 620
},
{
"epoch": 0.015739662938407815,
"grad_norm": 9.046427726745605,
"learning_rate": 4.973851578183782e-05,
"loss": 0.8913,
"step": 622
},
{
"epoch": 0.015790272787084367,
"grad_norm": 7.701432704925537,
"learning_rate": 4.973767228435987e-05,
"loss": 1.2825,
"step": 624
},
{
"epoch": 0.01584088263576092,
"grad_norm": 6.6972808837890625,
"learning_rate": 4.973682878688193e-05,
"loss": 0.7063,
"step": 626
},
{
"epoch": 0.015891492484437473,
"grad_norm": 7.926259517669678,
"learning_rate": 4.9735985289403984e-05,
"loss": 0.6431,
"step": 628
},
{
"epoch": 0.015942102333114026,
"grad_norm": 6.506617546081543,
"learning_rate": 4.9735141791926044e-05,
"loss": 0.7283,
"step": 630
},
{
"epoch": 0.015992712181790575,
"grad_norm": 5.657668590545654,
"learning_rate": 4.97342982944481e-05,
"loss": 0.7906,
"step": 632
},
{
"epoch": 0.016043322030467128,
"grad_norm": 5.814841270446777,
"learning_rate": 4.973345479697016e-05,
"loss": 0.7872,
"step": 634
},
{
"epoch": 0.01609393187914368,
"grad_norm": 4.063594818115234,
"learning_rate": 4.973261129949222e-05,
"loss": 0.7104,
"step": 636
},
{
"epoch": 0.016144541727820233,
"grad_norm": 2.756787061691284,
"learning_rate": 4.973176780201427e-05,
"loss": 0.3609,
"step": 638
},
{
"epoch": 0.016195151576496786,
"grad_norm": 6.163934707641602,
"learning_rate": 4.973092430453633e-05,
"loss": 0.7657,
"step": 640
},
{
"epoch": 0.01624576142517334,
"grad_norm": 7.708083629608154,
"learning_rate": 4.9730080807058385e-05,
"loss": 0.7486,
"step": 642
},
{
"epoch": 0.01629637127384989,
"grad_norm": 6.693125247955322,
"learning_rate": 4.9729237309580445e-05,
"loss": 0.799,
"step": 644
},
{
"epoch": 0.016346981122526444,
"grad_norm": 8.108495712280273,
"learning_rate": 4.9728393812102505e-05,
"loss": 0.8542,
"step": 646
},
{
"epoch": 0.016397590971202997,
"grad_norm": 13.466230392456055,
"learning_rate": 4.972755031462456e-05,
"loss": 0.9259,
"step": 648
},
{
"epoch": 0.01644820081987955,
"grad_norm": 7.1850056648254395,
"learning_rate": 4.972670681714662e-05,
"loss": 0.4547,
"step": 650
},
{
"epoch": 0.016498810668556102,
"grad_norm": 10.240496635437012,
"learning_rate": 4.972586331966867e-05,
"loss": 0.7534,
"step": 652
},
{
"epoch": 0.016549420517232655,
"grad_norm": 7.284756183624268,
"learning_rate": 4.972501982219073e-05,
"loss": 0.7111,
"step": 654
},
{
"epoch": 0.016600030365909207,
"grad_norm": 4.935502052307129,
"learning_rate": 4.972417632471279e-05,
"loss": 0.587,
"step": 656
},
{
"epoch": 0.016650640214585757,
"grad_norm": 9.42276382446289,
"learning_rate": 4.9723332827234846e-05,
"loss": 0.6914,
"step": 658
},
{
"epoch": 0.01670125006326231,
"grad_norm": 8.303751945495605,
"learning_rate": 4.9722489329756906e-05,
"loss": 0.7424,
"step": 660
},
{
"epoch": 0.016751859911938862,
"grad_norm": 5.425510883331299,
"learning_rate": 4.972164583227896e-05,
"loss": 0.4959,
"step": 662
},
{
"epoch": 0.016802469760615415,
"grad_norm": 8.581243515014648,
"learning_rate": 4.972080233480102e-05,
"loss": 0.5844,
"step": 664
},
{
"epoch": 0.016853079609291968,
"grad_norm": 7.702110290527344,
"learning_rate": 4.971995883732308e-05,
"loss": 0.6744,
"step": 666
},
{
"epoch": 0.01690368945796852,
"grad_norm": 5.569732189178467,
"learning_rate": 4.9719115339845133e-05,
"loss": 0.5237,
"step": 668
},
{
"epoch": 0.016954299306645073,
"grad_norm": 7.927607536315918,
"learning_rate": 4.9718271842367194e-05,
"loss": 0.6969,
"step": 670
},
{
"epoch": 0.017004909155321626,
"grad_norm": 6.792113304138184,
"learning_rate": 4.971742834488925e-05,
"loss": 0.4953,
"step": 672
},
{
"epoch": 0.01705551900399818,
"grad_norm": 41.776954650878906,
"learning_rate": 4.971658484741131e-05,
"loss": 0.5495,
"step": 674
},
{
"epoch": 0.01710612885267473,
"grad_norm": 5.085512638092041,
"learning_rate": 4.971574134993337e-05,
"loss": 0.6303,
"step": 676
},
{
"epoch": 0.017156738701351284,
"grad_norm": 5.323431491851807,
"learning_rate": 4.971489785245542e-05,
"loss": 0.7973,
"step": 678
},
{
"epoch": 0.017207348550027837,
"grad_norm": 5.844650745391846,
"learning_rate": 4.971405435497748e-05,
"loss": 1.0397,
"step": 680
},
{
"epoch": 0.01725795839870439,
"grad_norm": 1.8864692449569702,
"learning_rate": 4.9713210857499535e-05,
"loss": 0.3274,
"step": 682
},
{
"epoch": 0.017308568247380942,
"grad_norm": 3.4474141597747803,
"learning_rate": 4.9712367360021595e-05,
"loss": 0.5992,
"step": 684
},
{
"epoch": 0.01735917809605749,
"grad_norm": 5.180785655975342,
"learning_rate": 4.9711523862543655e-05,
"loss": 0.6462,
"step": 686
},
{
"epoch": 0.017409787944734044,
"grad_norm": 5.768479347229004,
"learning_rate": 4.971068036506571e-05,
"loss": 0.6288,
"step": 688
},
{
"epoch": 0.017460397793410597,
"grad_norm": 5.524656772613525,
"learning_rate": 4.970983686758777e-05,
"loss": 0.6045,
"step": 690
},
{
"epoch": 0.01751100764208715,
"grad_norm": 6.222484111785889,
"learning_rate": 4.970899337010982e-05,
"loss": 0.4882,
"step": 692
},
{
"epoch": 0.017561617490763702,
"grad_norm": 2.960340738296509,
"learning_rate": 4.970814987263188e-05,
"loss": 0.3803,
"step": 694
},
{
"epoch": 0.017612227339440255,
"grad_norm": 3.2236385345458984,
"learning_rate": 4.970730637515394e-05,
"loss": 0.5617,
"step": 696
},
{
"epoch": 0.017662837188116808,
"grad_norm": 8.282496452331543,
"learning_rate": 4.9706462877675996e-05,
"loss": 0.6835,
"step": 698
},
{
"epoch": 0.01771344703679336,
"grad_norm": 7.472006797790527,
"learning_rate": 4.9705619380198056e-05,
"loss": 0.4747,
"step": 700
},
{
"epoch": 0.017764056885469913,
"grad_norm": 6.8331780433654785,
"learning_rate": 4.970477588272011e-05,
"loss": 0.4765,
"step": 702
},
{
"epoch": 0.017814666734146466,
"grad_norm": 4.116718292236328,
"learning_rate": 4.970393238524217e-05,
"loss": 0.4177,
"step": 704
},
{
"epoch": 0.01786527658282302,
"grad_norm": 14.779850959777832,
"learning_rate": 4.970308888776423e-05,
"loss": 0.5939,
"step": 706
},
{
"epoch": 0.01791588643149957,
"grad_norm": 10.066930770874023,
"learning_rate": 4.970224539028628e-05,
"loss": 0.7392,
"step": 708
},
{
"epoch": 0.017966496280176124,
"grad_norm": 5.495025634765625,
"learning_rate": 4.9701401892808343e-05,
"loss": 0.6957,
"step": 710
},
{
"epoch": 0.018017106128852673,
"grad_norm": 9.583708763122559,
"learning_rate": 4.97005583953304e-05,
"loss": 0.8032,
"step": 712
},
{
"epoch": 0.018067715977529226,
"grad_norm": 6.409923076629639,
"learning_rate": 4.969971489785246e-05,
"loss": 0.9505,
"step": 714
},
{
"epoch": 0.01811832582620578,
"grad_norm": 8.952066421508789,
"learning_rate": 4.969887140037451e-05,
"loss": 0.7739,
"step": 716
},
{
"epoch": 0.01816893567488233,
"grad_norm": 6.334309101104736,
"learning_rate": 4.969802790289657e-05,
"loss": 1.0117,
"step": 718
},
{
"epoch": 0.018219545523558884,
"grad_norm": 8.889698028564453,
"learning_rate": 4.969718440541863e-05,
"loss": 0.6535,
"step": 720
},
{
"epoch": 0.018270155372235437,
"grad_norm": 2.9195971488952637,
"learning_rate": 4.9696340907940684e-05,
"loss": 0.3021,
"step": 722
},
{
"epoch": 0.01832076522091199,
"grad_norm": 8.154908180236816,
"learning_rate": 4.9695497410462744e-05,
"loss": 0.8477,
"step": 724
},
{
"epoch": 0.018371375069588542,
"grad_norm": 3.179515838623047,
"learning_rate": 4.96946539129848e-05,
"loss": 0.5856,
"step": 726
},
{
"epoch": 0.018421984918265095,
"grad_norm": 7.491313457489014,
"learning_rate": 4.969381041550686e-05,
"loss": 0.8113,
"step": 728
},
{
"epoch": 0.018472594766941648,
"grad_norm": 6.968503475189209,
"learning_rate": 4.969296691802892e-05,
"loss": 0.6095,
"step": 730
},
{
"epoch": 0.0185232046156182,
"grad_norm": 7.4488348960876465,
"learning_rate": 4.969212342055097e-05,
"loss": 1.0515,
"step": 732
},
{
"epoch": 0.018573814464294753,
"grad_norm": 2.959810733795166,
"learning_rate": 4.969127992307303e-05,
"loss": 0.5783,
"step": 734
},
{
"epoch": 0.018624424312971306,
"grad_norm": 9.092787742614746,
"learning_rate": 4.9690436425595085e-05,
"loss": 0.878,
"step": 736
},
{
"epoch": 0.01867503416164786,
"grad_norm": 7.564940929412842,
"learning_rate": 4.9689592928117146e-05,
"loss": 0.8122,
"step": 738
},
{
"epoch": 0.018725644010324408,
"grad_norm": 7.550844192504883,
"learning_rate": 4.9688749430639206e-05,
"loss": 0.4964,
"step": 740
},
{
"epoch": 0.01877625385900096,
"grad_norm": 11.271805763244629,
"learning_rate": 4.968790593316126e-05,
"loss": 0.9631,
"step": 742
},
{
"epoch": 0.018826863707677513,
"grad_norm": 13.20101547241211,
"learning_rate": 4.968706243568332e-05,
"loss": 0.6274,
"step": 744
},
{
"epoch": 0.018877473556354066,
"grad_norm": 5.18681526184082,
"learning_rate": 4.968621893820537e-05,
"loss": 0.6049,
"step": 746
},
{
"epoch": 0.01892808340503062,
"grad_norm": 3.5832290649414062,
"learning_rate": 4.968537544072743e-05,
"loss": 0.363,
"step": 748
},
{
"epoch": 0.01897869325370717,
"grad_norm": 4.764203071594238,
"learning_rate": 4.968453194324949e-05,
"loss": 0.4824,
"step": 750
},
{
"epoch": 0.019029303102383724,
"grad_norm": 3.9694879055023193,
"learning_rate": 4.9683688445771547e-05,
"loss": 0.6074,
"step": 752
},
{
"epoch": 0.019079912951060277,
"grad_norm": 4.219832897186279,
"learning_rate": 4.968284494829361e-05,
"loss": 0.3649,
"step": 754
},
{
"epoch": 0.01913052279973683,
"grad_norm": 25.03742027282715,
"learning_rate": 4.968200145081566e-05,
"loss": 0.742,
"step": 756
},
{
"epoch": 0.019181132648413382,
"grad_norm": 8.844772338867188,
"learning_rate": 4.968115795333772e-05,
"loss": 0.6859,
"step": 758
},
{
"epoch": 0.019231742497089935,
"grad_norm": 7.3545918464660645,
"learning_rate": 4.968031445585978e-05,
"loss": 0.5395,
"step": 760
},
{
"epoch": 0.019282352345766487,
"grad_norm": 3.499608278274536,
"learning_rate": 4.9679470958381834e-05,
"loss": 0.613,
"step": 762
},
{
"epoch": 0.01933296219444304,
"grad_norm": 4.7259440422058105,
"learning_rate": 4.9678627460903894e-05,
"loss": 0.7538,
"step": 764
},
{
"epoch": 0.01938357204311959,
"grad_norm": 5.475869178771973,
"learning_rate": 4.967778396342595e-05,
"loss": 0.7714,
"step": 766
},
{
"epoch": 0.019434181891796142,
"grad_norm": 18.313730239868164,
"learning_rate": 4.967694046594801e-05,
"loss": 0.9697,
"step": 768
},
{
"epoch": 0.019484791740472695,
"grad_norm": 8.159904479980469,
"learning_rate": 4.967609696847007e-05,
"loss": 0.5248,
"step": 770
},
{
"epoch": 0.019535401589149248,
"grad_norm": 5.177513122558594,
"learning_rate": 4.967525347099212e-05,
"loss": 0.4911,
"step": 772
},
{
"epoch": 0.0195860114378258,
"grad_norm": 4.758183479309082,
"learning_rate": 4.967440997351418e-05,
"loss": 0.7151,
"step": 774
},
{
"epoch": 0.019636621286502353,
"grad_norm": 8.288613319396973,
"learning_rate": 4.9673566476036235e-05,
"loss": 0.49,
"step": 776
},
{
"epoch": 0.019687231135178906,
"grad_norm": 8.448434829711914,
"learning_rate": 4.9672722978558295e-05,
"loss": 0.7352,
"step": 778
},
{
"epoch": 0.01973784098385546,
"grad_norm": 11.22861385345459,
"learning_rate": 4.9671879481080355e-05,
"loss": 0.4209,
"step": 780
},
{
"epoch": 0.01978845083253201,
"grad_norm": 8.725863456726074,
"learning_rate": 4.967103598360241e-05,
"loss": 0.5673,
"step": 782
},
{
"epoch": 0.019839060681208564,
"grad_norm": 6.322774410247803,
"learning_rate": 4.967019248612447e-05,
"loss": 0.6288,
"step": 784
},
{
"epoch": 0.019889670529885117,
"grad_norm": 3.711097240447998,
"learning_rate": 4.966934898864652e-05,
"loss": 0.6343,
"step": 786
},
{
"epoch": 0.01994028037856167,
"grad_norm": 7.519350528717041,
"learning_rate": 4.966850549116858e-05,
"loss": 0.7308,
"step": 788
},
{
"epoch": 0.019990890227238222,
"grad_norm": 7.28798246383667,
"learning_rate": 4.966766199369064e-05,
"loss": 0.5802,
"step": 790
},
{
"epoch": 0.02004150007591477,
"grad_norm": 5.8284783363342285,
"learning_rate": 4.9666818496212696e-05,
"loss": 0.5121,
"step": 792
},
{
"epoch": 0.020092109924591324,
"grad_norm": 6.361229419708252,
"learning_rate": 4.9665974998734757e-05,
"loss": 0.7215,
"step": 794
},
{
"epoch": 0.020142719773267877,
"grad_norm": 5.134431838989258,
"learning_rate": 4.966513150125681e-05,
"loss": 0.4415,
"step": 796
},
{
"epoch": 0.02019332962194443,
"grad_norm": 6.047237873077393,
"learning_rate": 4.966428800377887e-05,
"loss": 0.8233,
"step": 798
},
{
"epoch": 0.020243939470620982,
"grad_norm": 3.95216965675354,
"learning_rate": 4.9663444506300924e-05,
"loss": 0.3292,
"step": 800
},
{
"epoch": 0.020243939470620982,
"eval_cer": 0.21470199846112256,
"eval_loss": 0.33481982350349426,
"eval_runtime": 2771.1883,
"eval_samples_per_second": 5.656,
"eval_steps_per_second": 0.354,
"step": 800
},
{
"epoch": 0.020294549319297535,
"grad_norm": 11.878438949584961,
"learning_rate": 4.9662601008822984e-05,
"loss": 0.6688,
"step": 802
},
{
"epoch": 0.020345159167974088,
"grad_norm": 4.3031392097473145,
"learning_rate": 4.9661757511345044e-05,
"loss": 0.5529,
"step": 804
},
{
"epoch": 0.02039576901665064,
"grad_norm": 3.877115249633789,
"learning_rate": 4.96609140138671e-05,
"loss": 0.4497,
"step": 806
},
{
"epoch": 0.020446378865327193,
"grad_norm": 4.481441497802734,
"learning_rate": 4.966007051638916e-05,
"loss": 0.5141,
"step": 808
},
{
"epoch": 0.020496988714003746,
"grad_norm": 8.904651641845703,
"learning_rate": 4.965922701891121e-05,
"loss": 0.5108,
"step": 810
},
{
"epoch": 0.0205475985626803,
"grad_norm": 7.119351863861084,
"learning_rate": 4.965838352143327e-05,
"loss": 1.0192,
"step": 812
},
{
"epoch": 0.02059820841135685,
"grad_norm": 9.75696849822998,
"learning_rate": 4.965754002395533e-05,
"loss": 0.8766,
"step": 814
},
{
"epoch": 0.020648818260033404,
"grad_norm": 3.441471576690674,
"learning_rate": 4.9656696526477385e-05,
"loss": 0.4397,
"step": 816
},
{
"epoch": 0.020699428108709957,
"grad_norm": 5.399097442626953,
"learning_rate": 4.9655853028999445e-05,
"loss": 0.8645,
"step": 818
},
{
"epoch": 0.020750037957386506,
"grad_norm": 8.58830738067627,
"learning_rate": 4.96550095315215e-05,
"loss": 0.6172,
"step": 820
},
{
"epoch": 0.02080064780606306,
"grad_norm": 4.000862121582031,
"learning_rate": 4.965416603404356e-05,
"loss": 0.4244,
"step": 822
},
{
"epoch": 0.02085125765473961,
"grad_norm": 5.518575668334961,
"learning_rate": 4.965332253656562e-05,
"loss": 0.7934,
"step": 824
},
{
"epoch": 0.020901867503416164,
"grad_norm": 5.466125011444092,
"learning_rate": 4.965247903908767e-05,
"loss": 0.5228,
"step": 826
},
{
"epoch": 0.020952477352092717,
"grad_norm": 8.060519218444824,
"learning_rate": 4.965163554160973e-05,
"loss": 0.7465,
"step": 828
},
{
"epoch": 0.02100308720076927,
"grad_norm": 7.969659805297852,
"learning_rate": 4.9650792044131786e-05,
"loss": 0.7079,
"step": 830
},
{
"epoch": 0.021053697049445822,
"grad_norm": 5.721604347229004,
"learning_rate": 4.9649948546653846e-05,
"loss": 0.5805,
"step": 832
},
{
"epoch": 0.021104306898122375,
"grad_norm": 5.849686145782471,
"learning_rate": 4.9649105049175906e-05,
"loss": 0.7729,
"step": 834
},
{
"epoch": 0.021154916746798928,
"grad_norm": 3.3855998516082764,
"learning_rate": 4.964826155169796e-05,
"loss": 0.7193,
"step": 836
},
{
"epoch": 0.02120552659547548,
"grad_norm": 3.7108068466186523,
"learning_rate": 4.964741805422002e-05,
"loss": 0.5297,
"step": 838
},
{
"epoch": 0.021256136444152033,
"grad_norm": 7.068669319152832,
"learning_rate": 4.964657455674207e-05,
"loss": 0.8856,
"step": 840
},
{
"epoch": 0.021306746292828586,
"grad_norm": 3.230989694595337,
"learning_rate": 4.9645731059264133e-05,
"loss": 0.4015,
"step": 842
},
{
"epoch": 0.02135735614150514,
"grad_norm": 3.4090418815612793,
"learning_rate": 4.9644887561786194e-05,
"loss": 0.2911,
"step": 844
},
{
"epoch": 0.021407965990181688,
"grad_norm": 4.298464298248291,
"learning_rate": 4.964404406430825e-05,
"loss": 0.6512,
"step": 846
},
{
"epoch": 0.02145857583885824,
"grad_norm": 2.939181327819824,
"learning_rate": 4.964320056683031e-05,
"loss": 0.4267,
"step": 848
},
{
"epoch": 0.021509185687534793,
"grad_norm": 6.4090728759765625,
"learning_rate": 4.964235706935236e-05,
"loss": 0.5932,
"step": 850
},
{
"epoch": 0.021559795536211346,
"grad_norm": 10.887248992919922,
"learning_rate": 4.964151357187442e-05,
"loss": 0.8047,
"step": 852
},
{
"epoch": 0.0216104053848879,
"grad_norm": 5.774937629699707,
"learning_rate": 4.964067007439648e-05,
"loss": 0.7478,
"step": 854
},
{
"epoch": 0.02166101523356445,
"grad_norm": 4.135079860687256,
"learning_rate": 4.9639826576918535e-05,
"loss": 0.5991,
"step": 856
},
{
"epoch": 0.021711625082241004,
"grad_norm": 5.668390274047852,
"learning_rate": 4.9638983079440595e-05,
"loss": 0.4716,
"step": 858
},
{
"epoch": 0.021762234930917557,
"grad_norm": 2.0646257400512695,
"learning_rate": 4.963813958196265e-05,
"loss": 0.3783,
"step": 860
},
{
"epoch": 0.02181284477959411,
"grad_norm": 5.5543532371521,
"learning_rate": 4.963729608448471e-05,
"loss": 0.8937,
"step": 862
},
{
"epoch": 0.021863454628270662,
"grad_norm": 4.02618408203125,
"learning_rate": 4.963645258700677e-05,
"loss": 0.4268,
"step": 864
},
{
"epoch": 0.021914064476947215,
"grad_norm": 5.774693012237549,
"learning_rate": 4.963560908952882e-05,
"loss": 0.7588,
"step": 866
},
{
"epoch": 0.021964674325623768,
"grad_norm": 3.130143880844116,
"learning_rate": 4.963476559205088e-05,
"loss": 0.4623,
"step": 868
},
{
"epoch": 0.02201528417430032,
"grad_norm": 7.7838521003723145,
"learning_rate": 4.9633922094572936e-05,
"loss": 0.8172,
"step": 870
},
{
"epoch": 0.022065894022976873,
"grad_norm": 7.278140544891357,
"learning_rate": 4.9633078597094996e-05,
"loss": 0.4937,
"step": 872
},
{
"epoch": 0.022116503871653422,
"grad_norm": 3.3574812412261963,
"learning_rate": 4.9632235099617056e-05,
"loss": 0.4263,
"step": 874
},
{
"epoch": 0.022167113720329975,
"grad_norm": 5.792145729064941,
"learning_rate": 4.963139160213911e-05,
"loss": 0.6564,
"step": 876
},
{
"epoch": 0.022217723569006528,
"grad_norm": 4.801455020904541,
"learning_rate": 4.963054810466117e-05,
"loss": 0.4392,
"step": 878
},
{
"epoch": 0.02226833341768308,
"grad_norm": 5.278634548187256,
"learning_rate": 4.962970460718322e-05,
"loss": 0.6453,
"step": 880
},
{
"epoch": 0.022318943266359633,
"grad_norm": 4.173251628875732,
"learning_rate": 4.962886110970528e-05,
"loss": 0.5477,
"step": 882
},
{
"epoch": 0.022369553115036186,
"grad_norm": 3.603672981262207,
"learning_rate": 4.962801761222734e-05,
"loss": 0.8842,
"step": 884
},
{
"epoch": 0.02242016296371274,
"grad_norm": 18.358938217163086,
"learning_rate": 4.96271741147494e-05,
"loss": 0.8393,
"step": 886
},
{
"epoch": 0.02247077281238929,
"grad_norm": 6.532278537750244,
"learning_rate": 4.962633061727146e-05,
"loss": 0.5507,
"step": 888
},
{
"epoch": 0.022521382661065844,
"grad_norm": 6.95924711227417,
"learning_rate": 4.962548711979351e-05,
"loss": 0.6536,
"step": 890
},
{
"epoch": 0.022571992509742397,
"grad_norm": 3.4727678298950195,
"learning_rate": 4.962464362231557e-05,
"loss": 0.435,
"step": 892
},
{
"epoch": 0.02262260235841895,
"grad_norm": 5.473514080047607,
"learning_rate": 4.9623800124837624e-05,
"loss": 0.3681,
"step": 894
},
{
"epoch": 0.022673212207095502,
"grad_norm": 7.168368339538574,
"learning_rate": 4.9622956627359684e-05,
"loss": 0.6088,
"step": 896
},
{
"epoch": 0.022723822055772055,
"grad_norm": 9.777496337890625,
"learning_rate": 4.9622113129881744e-05,
"loss": 0.8791,
"step": 898
},
{
"epoch": 0.022774431904448604,
"grad_norm": 4.331769943237305,
"learning_rate": 4.96212696324038e-05,
"loss": 0.4703,
"step": 900
},
{
"epoch": 0.022825041753125157,
"grad_norm": 11.051033973693848,
"learning_rate": 4.962042613492586e-05,
"loss": 0.6771,
"step": 902
},
{
"epoch": 0.02287565160180171,
"grad_norm": 4.339256763458252,
"learning_rate": 4.961958263744791e-05,
"loss": 0.3235,
"step": 904
},
{
"epoch": 0.022926261450478262,
"grad_norm": 9.73657512664795,
"learning_rate": 4.961873913996997e-05,
"loss": 0.8238,
"step": 906
},
{
"epoch": 0.022976871299154815,
"grad_norm": 8.267867088317871,
"learning_rate": 4.961789564249203e-05,
"loss": 0.4584,
"step": 908
},
{
"epoch": 0.023027481147831368,
"grad_norm": 4.065835952758789,
"learning_rate": 4.9617052145014085e-05,
"loss": 0.5364,
"step": 910
},
{
"epoch": 0.02307809099650792,
"grad_norm": 9.213961601257324,
"learning_rate": 4.9616208647536146e-05,
"loss": 0.7232,
"step": 912
},
{
"epoch": 0.023128700845184473,
"grad_norm": 3.7316653728485107,
"learning_rate": 4.96153651500582e-05,
"loss": 0.417,
"step": 914
},
{
"epoch": 0.023179310693861026,
"grad_norm": 4.829885959625244,
"learning_rate": 4.961452165258026e-05,
"loss": 0.4171,
"step": 916
},
{
"epoch": 0.02322992054253758,
"grad_norm": 13.912676811218262,
"learning_rate": 4.961367815510232e-05,
"loss": 0.5439,
"step": 918
},
{
"epoch": 0.02328053039121413,
"grad_norm": 6.918741226196289,
"learning_rate": 4.961283465762437e-05,
"loss": 0.6434,
"step": 920
},
{
"epoch": 0.023331140239890684,
"grad_norm": 7.197675704956055,
"learning_rate": 4.961199116014643e-05,
"loss": 0.7114,
"step": 922
},
{
"epoch": 0.023381750088567237,
"grad_norm": 6.958329200744629,
"learning_rate": 4.9611147662668486e-05,
"loss": 0.6549,
"step": 924
},
{
"epoch": 0.02343235993724379,
"grad_norm": 7.052434921264648,
"learning_rate": 4.9610304165190547e-05,
"loss": 0.8423,
"step": 926
},
{
"epoch": 0.02348296978592034,
"grad_norm": 4.447729110717773,
"learning_rate": 4.960946066771261e-05,
"loss": 0.4996,
"step": 928
},
{
"epoch": 0.02353357963459689,
"grad_norm": 8.922036170959473,
"learning_rate": 4.960861717023466e-05,
"loss": 0.7087,
"step": 930
},
{
"epoch": 0.023584189483273444,
"grad_norm": 5.072062969207764,
"learning_rate": 4.960777367275672e-05,
"loss": 0.6203,
"step": 932
},
{
"epoch": 0.023634799331949997,
"grad_norm": 2.5875844955444336,
"learning_rate": 4.9606930175278774e-05,
"loss": 0.3663,
"step": 934
},
{
"epoch": 0.02368540918062655,
"grad_norm": 5.000091552734375,
"learning_rate": 4.9606086677800834e-05,
"loss": 0.2929,
"step": 936
},
{
"epoch": 0.023736019029303102,
"grad_norm": 5.237270355224609,
"learning_rate": 4.9605243180322894e-05,
"loss": 0.632,
"step": 938
},
{
"epoch": 0.023786628877979655,
"grad_norm": 9.747302055358887,
"learning_rate": 4.960439968284495e-05,
"loss": 0.7382,
"step": 940
},
{
"epoch": 0.023837238726656208,
"grad_norm": 7.886781215667725,
"learning_rate": 4.960355618536701e-05,
"loss": 0.6935,
"step": 942
},
{
"epoch": 0.02388784857533276,
"grad_norm": 6.9037885665893555,
"learning_rate": 4.960271268788906e-05,
"loss": 0.631,
"step": 944
},
{
"epoch": 0.023938458424009313,
"grad_norm": 4.556064128875732,
"learning_rate": 4.960186919041112e-05,
"loss": 0.5162,
"step": 946
},
{
"epoch": 0.023989068272685866,
"grad_norm": 6.1615519523620605,
"learning_rate": 4.960102569293318e-05,
"loss": 0.6383,
"step": 948
},
{
"epoch": 0.02403967812136242,
"grad_norm": 142.0865020751953,
"learning_rate": 4.9600182195455235e-05,
"loss": 0.7586,
"step": 950
},
{
"epoch": 0.02409028797003897,
"grad_norm": 10.337366104125977,
"learning_rate": 4.9599338697977295e-05,
"loss": 0.5753,
"step": 952
},
{
"epoch": 0.02414089781871552,
"grad_norm": 6.216306686401367,
"learning_rate": 4.959849520049935e-05,
"loss": 0.6553,
"step": 954
},
{
"epoch": 0.024191507667392073,
"grad_norm": 2.8385775089263916,
"learning_rate": 4.959765170302141e-05,
"loss": 0.3002,
"step": 956
},
{
"epoch": 0.024242117516068626,
"grad_norm": 5.520168304443359,
"learning_rate": 4.959680820554347e-05,
"loss": 0.6907,
"step": 958
},
{
"epoch": 0.02429272736474518,
"grad_norm": 3.9114444255828857,
"learning_rate": 4.959596470806552e-05,
"loss": 0.419,
"step": 960
},
{
"epoch": 0.02434333721342173,
"grad_norm": 6.165211200714111,
"learning_rate": 4.959512121058758e-05,
"loss": 0.6344,
"step": 962
},
{
"epoch": 0.024393947062098284,
"grad_norm": 4.263137340545654,
"learning_rate": 4.9594277713109636e-05,
"loss": 0.5867,
"step": 964
},
{
"epoch": 0.024444556910774837,
"grad_norm": 2.720306873321533,
"learning_rate": 4.9593434215631696e-05,
"loss": 0.3657,
"step": 966
},
{
"epoch": 0.02449516675945139,
"grad_norm": 6.873605251312256,
"learning_rate": 4.959259071815375e-05,
"loss": 0.6407,
"step": 968
},
{
"epoch": 0.024545776608127942,
"grad_norm": 16.427352905273438,
"learning_rate": 4.959174722067581e-05,
"loss": 0.8508,
"step": 970
},
{
"epoch": 0.024596386456804495,
"grad_norm": 8.866124153137207,
"learning_rate": 4.959090372319787e-05,
"loss": 0.803,
"step": 972
},
{
"epoch": 0.024646996305481048,
"grad_norm": 5.541032791137695,
"learning_rate": 4.9590060225719924e-05,
"loss": 0.6485,
"step": 974
},
{
"epoch": 0.0246976061541576,
"grad_norm": 2.8217546939849854,
"learning_rate": 4.9589216728241984e-05,
"loss": 0.3261,
"step": 976
},
{
"epoch": 0.024748216002834153,
"grad_norm": 6.076355457305908,
"learning_rate": 4.958837323076404e-05,
"loss": 0.5189,
"step": 978
},
{
"epoch": 0.024798825851510702,
"grad_norm": 2.687714099884033,
"learning_rate": 4.95875297332861e-05,
"loss": 0.3242,
"step": 980
},
{
"epoch": 0.024849435700187255,
"grad_norm": 6.32763671875,
"learning_rate": 4.958668623580816e-05,
"loss": 0.6136,
"step": 982
},
{
"epoch": 0.024900045548863808,
"grad_norm": 11.270010948181152,
"learning_rate": 4.958584273833021e-05,
"loss": 0.8674,
"step": 984
},
{
"epoch": 0.02495065539754036,
"grad_norm": 5.883991718292236,
"learning_rate": 4.958499924085227e-05,
"loss": 0.5242,
"step": 986
},
{
"epoch": 0.025001265246216913,
"grad_norm": 6.999844551086426,
"learning_rate": 4.9584155743374325e-05,
"loss": 0.5351,
"step": 988
},
{
"epoch": 0.025051875094893466,
"grad_norm": 6.340963363647461,
"learning_rate": 4.9583312245896385e-05,
"loss": 0.5624,
"step": 990
},
{
"epoch": 0.02510248494357002,
"grad_norm": 7.818021774291992,
"learning_rate": 4.9582468748418445e-05,
"loss": 0.9044,
"step": 992
},
{
"epoch": 0.02515309479224657,
"grad_norm": 6.447050094604492,
"learning_rate": 4.95816252509405e-05,
"loss": 0.7352,
"step": 994
},
{
"epoch": 0.025203704640923124,
"grad_norm": 4.902888774871826,
"learning_rate": 4.958078175346256e-05,
"loss": 0.4516,
"step": 996
},
{
"epoch": 0.025254314489599677,
"grad_norm": 71.4312973022461,
"learning_rate": 4.957993825598461e-05,
"loss": 0.6593,
"step": 998
},
{
"epoch": 0.02530492433827623,
"grad_norm": 5.481997489929199,
"learning_rate": 4.957909475850667e-05,
"loss": 0.4801,
"step": 1000
},
{
"epoch": 0.02530492433827623,
"eval_cer": 0.20989040697069894,
"eval_loss": 0.3260483741760254,
"eval_runtime": 2649.7137,
"eval_samples_per_second": 5.915,
"eval_steps_per_second": 0.37,
"step": 1000
},
{
"epoch": 0.025355534186952782,
"grad_norm": 13.54730224609375,
"learning_rate": 4.957825126102873e-05,
"loss": 0.5685,
"step": 1002
},
{
"epoch": 0.025406144035629335,
"grad_norm": 8.101643562316895,
"learning_rate": 4.9577407763550786e-05,
"loss": 0.6376,
"step": 1004
},
{
"epoch": 0.025456753884305888,
"grad_norm": 5.483541965484619,
"learning_rate": 4.9576564266072846e-05,
"loss": 0.5417,
"step": 1006
},
{
"epoch": 0.025507363732982437,
"grad_norm": 5.6926493644714355,
"learning_rate": 4.95757207685949e-05,
"loss": 0.5145,
"step": 1008
},
{
"epoch": 0.02555797358165899,
"grad_norm": 4.287508487701416,
"learning_rate": 4.957487727111696e-05,
"loss": 0.5817,
"step": 1010
},
{
"epoch": 0.025608583430335542,
"grad_norm": 3.4692742824554443,
"learning_rate": 4.957403377363902e-05,
"loss": 0.4781,
"step": 1012
},
{
"epoch": 0.025659193279012095,
"grad_norm": 6.066647052764893,
"learning_rate": 4.957319027616107e-05,
"loss": 0.5475,
"step": 1014
},
{
"epoch": 0.025709803127688648,
"grad_norm": 3.1936872005462646,
"learning_rate": 4.9572346778683134e-05,
"loss": 0.6294,
"step": 1016
},
{
"epoch": 0.0257604129763652,
"grad_norm": 10.364377975463867,
"learning_rate": 4.957150328120519e-05,
"loss": 0.7561,
"step": 1018
},
{
"epoch": 0.025811022825041753,
"grad_norm": 5.162211894989014,
"learning_rate": 4.957065978372725e-05,
"loss": 0.5821,
"step": 1020
},
{
"epoch": 0.025861632673718306,
"grad_norm": 8.685132026672363,
"learning_rate": 4.956981628624931e-05,
"loss": 0.7048,
"step": 1022
},
{
"epoch": 0.02591224252239486,
"grad_norm": 4.4980316162109375,
"learning_rate": 4.956897278877136e-05,
"loss": 0.5405,
"step": 1024
},
{
"epoch": 0.02596285237107141,
"grad_norm": 5.45166540145874,
"learning_rate": 4.956812929129342e-05,
"loss": 0.3885,
"step": 1026
},
{
"epoch": 0.026013462219747964,
"grad_norm": 7.379059314727783,
"learning_rate": 4.9567285793815474e-05,
"loss": 0.5148,
"step": 1028
},
{
"epoch": 0.026064072068424517,
"grad_norm": 4.766157150268555,
"learning_rate": 4.9566442296337535e-05,
"loss": 0.7287,
"step": 1030
},
{
"epoch": 0.02611468191710107,
"grad_norm": 7.131243705749512,
"learning_rate": 4.9565598798859595e-05,
"loss": 0.6868,
"step": 1032
},
{
"epoch": 0.02616529176577762,
"grad_norm": 9.707527160644531,
"learning_rate": 4.956475530138165e-05,
"loss": 0.6592,
"step": 1034
},
{
"epoch": 0.02621590161445417,
"grad_norm": 5.630888938903809,
"learning_rate": 4.956391180390371e-05,
"loss": 0.5028,
"step": 1036
},
{
"epoch": 0.026266511463130724,
"grad_norm": 3.7485404014587402,
"learning_rate": 4.956306830642576e-05,
"loss": 0.3452,
"step": 1038
},
{
"epoch": 0.026317121311807277,
"grad_norm": 6.3416643142700195,
"learning_rate": 4.956222480894782e-05,
"loss": 0.4316,
"step": 1040
},
{
"epoch": 0.02636773116048383,
"grad_norm": 8.458013534545898,
"learning_rate": 4.956138131146988e-05,
"loss": 0.8142,
"step": 1042
},
{
"epoch": 0.026418341009160382,
"grad_norm": 9.704322814941406,
"learning_rate": 4.9560537813991936e-05,
"loss": 0.7614,
"step": 1044
},
{
"epoch": 0.026468950857836935,
"grad_norm": 3.311298370361328,
"learning_rate": 4.9559694316513996e-05,
"loss": 0.5417,
"step": 1046
},
{
"epoch": 0.026519560706513488,
"grad_norm": 7.961212635040283,
"learning_rate": 4.955885081903605e-05,
"loss": 0.6401,
"step": 1048
},
{
"epoch": 0.02657017055519004,
"grad_norm": 6.631721496582031,
"learning_rate": 4.955800732155811e-05,
"loss": 0.5357,
"step": 1050
},
{
"epoch": 0.026620780403866593,
"grad_norm": 5.143334865570068,
"learning_rate": 4.955716382408016e-05,
"loss": 0.545,
"step": 1052
},
{
"epoch": 0.026671390252543146,
"grad_norm": 8.77175235748291,
"learning_rate": 4.955632032660222e-05,
"loss": 0.5938,
"step": 1054
},
{
"epoch": 0.0267220001012197,
"grad_norm": 10.350188255310059,
"learning_rate": 4.955547682912428e-05,
"loss": 0.9662,
"step": 1056
},
{
"epoch": 0.02677260994989625,
"grad_norm": 6.271733283996582,
"learning_rate": 4.955463333164634e-05,
"loss": 0.5715,
"step": 1058
},
{
"epoch": 0.026823219798572804,
"grad_norm": 5.548452854156494,
"learning_rate": 4.95537898341684e-05,
"loss": 0.5213,
"step": 1060
},
{
"epoch": 0.026873829647249353,
"grad_norm": 5.460413455963135,
"learning_rate": 4.955294633669045e-05,
"loss": 0.3746,
"step": 1062
},
{
"epoch": 0.026924439495925906,
"grad_norm": 10.801025390625,
"learning_rate": 4.955210283921251e-05,
"loss": 0.66,
"step": 1064
},
{
"epoch": 0.02697504934460246,
"grad_norm": 35.335445404052734,
"learning_rate": 4.955125934173457e-05,
"loss": 0.5122,
"step": 1066
},
{
"epoch": 0.02702565919327901,
"grad_norm": 3.974865436553955,
"learning_rate": 4.9550415844256624e-05,
"loss": 0.3642,
"step": 1068
},
{
"epoch": 0.027076269041955564,
"grad_norm": 6.508527755737305,
"learning_rate": 4.9549572346778684e-05,
"loss": 0.6126,
"step": 1070
},
{
"epoch": 0.027126878890632117,
"grad_norm": 11.70407772064209,
"learning_rate": 4.954872884930074e-05,
"loss": 0.4939,
"step": 1072
},
{
"epoch": 0.02717748873930867,
"grad_norm": 9.648119926452637,
"learning_rate": 4.95478853518228e-05,
"loss": 0.6462,
"step": 1074
},
{
"epoch": 0.027228098587985222,
"grad_norm": 8.660693168640137,
"learning_rate": 4.954704185434486e-05,
"loss": 0.5923,
"step": 1076
},
{
"epoch": 0.027278708436661775,
"grad_norm": 17.504438400268555,
"learning_rate": 4.954619835686691e-05,
"loss": 0.9972,
"step": 1078
},
{
"epoch": 0.027329318285338328,
"grad_norm": 6.019506454467773,
"learning_rate": 4.954535485938897e-05,
"loss": 0.5884,
"step": 1080
},
{
"epoch": 0.02737992813401488,
"grad_norm": 11.403207778930664,
"learning_rate": 4.9544511361911025e-05,
"loss": 0.7651,
"step": 1082
},
{
"epoch": 0.027430537982691433,
"grad_norm": 3.842545747756958,
"learning_rate": 4.9543667864433085e-05,
"loss": 0.3572,
"step": 1084
},
{
"epoch": 0.027481147831367986,
"grad_norm": 4.691946029663086,
"learning_rate": 4.9542824366955146e-05,
"loss": 0.556,
"step": 1086
},
{
"epoch": 0.027531757680044535,
"grad_norm": 8.138900756835938,
"learning_rate": 4.95419808694772e-05,
"loss": 0.545,
"step": 1088
},
{
"epoch": 0.027582367528721088,
"grad_norm": 7.7400431632995605,
"learning_rate": 4.954113737199926e-05,
"loss": 0.6399,
"step": 1090
},
{
"epoch": 0.02763297737739764,
"grad_norm": 18.444286346435547,
"learning_rate": 4.954029387452131e-05,
"loss": 0.9819,
"step": 1092
},
{
"epoch": 0.027683587226074193,
"grad_norm": 4.818946838378906,
"learning_rate": 4.953945037704337e-05,
"loss": 0.5091,
"step": 1094
},
{
"epoch": 0.027734197074750746,
"grad_norm": 6.969218730926514,
"learning_rate": 4.953860687956543e-05,
"loss": 0.6144,
"step": 1096
},
{
"epoch": 0.0277848069234273,
"grad_norm": 5.701696395874023,
"learning_rate": 4.9537763382087486e-05,
"loss": 0.6378,
"step": 1098
},
{
"epoch": 0.02783541677210385,
"grad_norm": 4.948043346405029,
"learning_rate": 4.953691988460955e-05,
"loss": 0.61,
"step": 1100
},
{
"epoch": 0.027886026620780404,
"grad_norm": 6.133516788482666,
"learning_rate": 4.95360763871316e-05,
"loss": 0.5396,
"step": 1102
},
{
"epoch": 0.027936636469456957,
"grad_norm": 4.206554889678955,
"learning_rate": 4.953523288965366e-05,
"loss": 0.3066,
"step": 1104
},
{
"epoch": 0.02798724631813351,
"grad_norm": 3.7985496520996094,
"learning_rate": 4.953438939217572e-05,
"loss": 0.5389,
"step": 1106
},
{
"epoch": 0.028037856166810062,
"grad_norm": 6.991200923919678,
"learning_rate": 4.9533545894697774e-05,
"loss": 0.7457,
"step": 1108
},
{
"epoch": 0.028088466015486615,
"grad_norm": 5.731369495391846,
"learning_rate": 4.9532702397219834e-05,
"loss": 0.3976,
"step": 1110
},
{
"epoch": 0.028139075864163168,
"grad_norm": 7.29412317276001,
"learning_rate": 4.953185889974189e-05,
"loss": 0.56,
"step": 1112
},
{
"epoch": 0.02818968571283972,
"grad_norm": 10.055305480957031,
"learning_rate": 4.953101540226395e-05,
"loss": 0.8518,
"step": 1114
},
{
"epoch": 0.02824029556151627,
"grad_norm": 5.1682047843933105,
"learning_rate": 4.953017190478601e-05,
"loss": 0.3921,
"step": 1116
},
{
"epoch": 0.028290905410192822,
"grad_norm": 7.5293049812316895,
"learning_rate": 4.952932840730806e-05,
"loss": 0.732,
"step": 1118
},
{
"epoch": 0.028341515258869375,
"grad_norm": 7.173330783843994,
"learning_rate": 4.952848490983012e-05,
"loss": 0.6786,
"step": 1120
},
{
"epoch": 0.028392125107545928,
"grad_norm": 7.453824520111084,
"learning_rate": 4.9527641412352175e-05,
"loss": 0.6836,
"step": 1122
},
{
"epoch": 0.02844273495622248,
"grad_norm": 16.70603370666504,
"learning_rate": 4.9526797914874235e-05,
"loss": 0.8831,
"step": 1124
},
{
"epoch": 0.028493344804899033,
"grad_norm": 8.2377290725708,
"learning_rate": 4.952595441739629e-05,
"loss": 0.8214,
"step": 1126
},
{
"epoch": 0.028543954653575586,
"grad_norm": 4.513237953186035,
"learning_rate": 4.952511091991835e-05,
"loss": 0.7878,
"step": 1128
},
{
"epoch": 0.02859456450225214,
"grad_norm": 6.024347305297852,
"learning_rate": 4.952426742244041e-05,
"loss": 0.2515,
"step": 1130
},
{
"epoch": 0.02864517435092869,
"grad_norm": 7.672776699066162,
"learning_rate": 4.952342392496246e-05,
"loss": 0.9578,
"step": 1132
},
{
"epoch": 0.028695784199605244,
"grad_norm": 11.622359275817871,
"learning_rate": 4.952258042748452e-05,
"loss": 0.6932,
"step": 1134
},
{
"epoch": 0.028746394048281797,
"grad_norm": 6.2994232177734375,
"learning_rate": 4.9521736930006576e-05,
"loss": 0.5317,
"step": 1136
},
{
"epoch": 0.02879700389695835,
"grad_norm": 8.886155128479004,
"learning_rate": 4.9520893432528636e-05,
"loss": 0.9717,
"step": 1138
},
{
"epoch": 0.028847613745634902,
"grad_norm": 3.2561750411987305,
"learning_rate": 4.9520049935050696e-05,
"loss": 0.3471,
"step": 1140
},
{
"epoch": 0.02889822359431145,
"grad_norm": 6.805208683013916,
"learning_rate": 4.951920643757275e-05,
"loss": 0.4706,
"step": 1142
},
{
"epoch": 0.028948833442988004,
"grad_norm": 6.713986396789551,
"learning_rate": 4.951836294009481e-05,
"loss": 0.7055,
"step": 1144
},
{
"epoch": 0.028999443291664557,
"grad_norm": 5.203835964202881,
"learning_rate": 4.9517519442616863e-05,
"loss": 0.6585,
"step": 1146
},
{
"epoch": 0.02905005314034111,
"grad_norm": 3.168962001800537,
"learning_rate": 4.9516675945138924e-05,
"loss": 0.4794,
"step": 1148
},
{
"epoch": 0.029100662989017662,
"grad_norm": 11.134737014770508,
"learning_rate": 4.9515832447660984e-05,
"loss": 0.899,
"step": 1150
},
{
"epoch": 0.029151272837694215,
"grad_norm": 5.522861480712891,
"learning_rate": 4.951498895018304e-05,
"loss": 0.4002,
"step": 1152
},
{
"epoch": 0.029201882686370768,
"grad_norm": 2.908127784729004,
"learning_rate": 4.95141454527051e-05,
"loss": 0.4008,
"step": 1154
},
{
"epoch": 0.02925249253504732,
"grad_norm": 11.76309871673584,
"learning_rate": 4.951330195522715e-05,
"loss": 0.3493,
"step": 1156
},
{
"epoch": 0.029303102383723873,
"grad_norm": 4.505433082580566,
"learning_rate": 4.951245845774921e-05,
"loss": 0.3181,
"step": 1158
},
{
"epoch": 0.029353712232400426,
"grad_norm": 4.662505149841309,
"learning_rate": 4.951161496027127e-05,
"loss": 0.6274,
"step": 1160
},
{
"epoch": 0.02940432208107698,
"grad_norm": 4.870098114013672,
"learning_rate": 4.9510771462793325e-05,
"loss": 0.5994,
"step": 1162
},
{
"epoch": 0.02945493192975353,
"grad_norm": 4.153433322906494,
"learning_rate": 4.9509927965315385e-05,
"loss": 0.3769,
"step": 1164
},
{
"epoch": 0.029505541778430084,
"grad_norm": 8.398072242736816,
"learning_rate": 4.950908446783744e-05,
"loss": 0.8386,
"step": 1166
},
{
"epoch": 0.029556151627106633,
"grad_norm": 5.86764669418335,
"learning_rate": 4.95082409703595e-05,
"loss": 0.3906,
"step": 1168
},
{
"epoch": 0.029606761475783186,
"grad_norm": 4.7525200843811035,
"learning_rate": 4.950739747288156e-05,
"loss": 0.5805,
"step": 1170
},
{
"epoch": 0.02965737132445974,
"grad_norm": 14.163674354553223,
"learning_rate": 4.950655397540361e-05,
"loss": 0.714,
"step": 1172
},
{
"epoch": 0.02970798117313629,
"grad_norm": 6.36476469039917,
"learning_rate": 4.950571047792567e-05,
"loss": 0.7714,
"step": 1174
},
{
"epoch": 0.029758591021812844,
"grad_norm": 3.8334286212921143,
"learning_rate": 4.9504866980447726e-05,
"loss": 0.4451,
"step": 1176
},
{
"epoch": 0.029809200870489397,
"grad_norm": 3.1083927154541016,
"learning_rate": 4.9504023482969786e-05,
"loss": 0.3391,
"step": 1178
},
{
"epoch": 0.02985981071916595,
"grad_norm": 11.239181518554688,
"learning_rate": 4.9503179985491846e-05,
"loss": 0.6345,
"step": 1180
},
{
"epoch": 0.029910420567842502,
"grad_norm": 6.135115623474121,
"learning_rate": 4.95023364880139e-05,
"loss": 0.8366,
"step": 1182
},
{
"epoch": 0.029961030416519055,
"grad_norm": 3.8036446571350098,
"learning_rate": 4.950149299053596e-05,
"loss": 0.4256,
"step": 1184
},
{
"epoch": 0.030011640265195608,
"grad_norm": 11.396409034729004,
"learning_rate": 4.950064949305801e-05,
"loss": 0.5988,
"step": 1186
},
{
"epoch": 0.03006225011387216,
"grad_norm": 6.251826286315918,
"learning_rate": 4.949980599558007e-05,
"loss": 0.4033,
"step": 1188
},
{
"epoch": 0.030112859962548713,
"grad_norm": 6.115148544311523,
"learning_rate": 4.9498962498102134e-05,
"loss": 0.4699,
"step": 1190
},
{
"epoch": 0.030163469811225266,
"grad_norm": 8.004677772521973,
"learning_rate": 4.949811900062419e-05,
"loss": 0.7723,
"step": 1192
},
{
"epoch": 0.03021407965990182,
"grad_norm": 8.397205352783203,
"learning_rate": 4.949727550314625e-05,
"loss": 0.4565,
"step": 1194
},
{
"epoch": 0.030264689508578368,
"grad_norm": 7.252643585205078,
"learning_rate": 4.94964320056683e-05,
"loss": 0.7132,
"step": 1196
},
{
"epoch": 0.03031529935725492,
"grad_norm": 5.881059169769287,
"learning_rate": 4.949558850819036e-05,
"loss": 0.7198,
"step": 1198
},
{
"epoch": 0.030365909205931473,
"grad_norm": 11.146910667419434,
"learning_rate": 4.949474501071242e-05,
"loss": 0.5668,
"step": 1200
},
{
"epoch": 0.030365909205931473,
"eval_cer": 0.2020166573086281,
"eval_loss": 0.3112943768501282,
"eval_runtime": 2732.7913,
"eval_samples_per_second": 5.736,
"eval_steps_per_second": 0.359,
"step": 1200
},
{
"epoch": 0.030416519054608026,
"grad_norm": 5.239530563354492,
"learning_rate": 4.9493901513234474e-05,
"loss": 0.8572,
"step": 1202
},
{
"epoch": 0.03046712890328458,
"grad_norm": 6.763473987579346,
"learning_rate": 4.9493058015756535e-05,
"loss": 0.4804,
"step": 1204
},
{
"epoch": 0.03051773875196113,
"grad_norm": 15.63022232055664,
"learning_rate": 4.949221451827859e-05,
"loss": 0.6519,
"step": 1206
},
{
"epoch": 0.030568348600637684,
"grad_norm": 6.332169055938721,
"learning_rate": 4.949137102080065e-05,
"loss": 0.4457,
"step": 1208
},
{
"epoch": 0.030618958449314237,
"grad_norm": 8.737252235412598,
"learning_rate": 4.94905275233227e-05,
"loss": 0.6122,
"step": 1210
},
{
"epoch": 0.03066956829799079,
"grad_norm": 9.297114372253418,
"learning_rate": 4.948968402584476e-05,
"loss": 0.7155,
"step": 1212
},
{
"epoch": 0.030720178146667342,
"grad_norm": 7.846271991729736,
"learning_rate": 4.948884052836682e-05,
"loss": 0.7046,
"step": 1214
},
{
"epoch": 0.030770787995343895,
"grad_norm": 9.104436874389648,
"learning_rate": 4.9487997030888875e-05,
"loss": 0.5831,
"step": 1216
},
{
"epoch": 0.030821397844020448,
"grad_norm": 4.265629768371582,
"learning_rate": 4.9487153533410936e-05,
"loss": 0.2498,
"step": 1218
},
{
"epoch": 0.030872007692697,
"grad_norm": 3.7431282997131348,
"learning_rate": 4.948631003593299e-05,
"loss": 0.6634,
"step": 1220
},
{
"epoch": 0.03092261754137355,
"grad_norm": 5.563877105712891,
"learning_rate": 4.948546653845505e-05,
"loss": 0.7814,
"step": 1222
},
{
"epoch": 0.030973227390050102,
"grad_norm": 5.606101036071777,
"learning_rate": 4.948462304097711e-05,
"loss": 0.7189,
"step": 1224
},
{
"epoch": 0.031023837238726655,
"grad_norm": 9.97654914855957,
"learning_rate": 4.948377954349916e-05,
"loss": 0.7406,
"step": 1226
},
{
"epoch": 0.031074447087403208,
"grad_norm": 10.628059387207031,
"learning_rate": 4.948293604602122e-05,
"loss": 0.904,
"step": 1228
},
{
"epoch": 0.03112505693607976,
"grad_norm": 2.7047297954559326,
"learning_rate": 4.9482092548543277e-05,
"loss": 0.3665,
"step": 1230
},
{
"epoch": 0.031175666784756313,
"grad_norm": 8.546875953674316,
"learning_rate": 4.948124905106534e-05,
"loss": 0.5705,
"step": 1232
},
{
"epoch": 0.031226276633432866,
"grad_norm": 6.664468765258789,
"learning_rate": 4.94804055535874e-05,
"loss": 0.518,
"step": 1234
},
{
"epoch": 0.03127688648210942,
"grad_norm": 8.334696769714355,
"learning_rate": 4.947956205610945e-05,
"loss": 0.5282,
"step": 1236
},
{
"epoch": 0.03132749633078597,
"grad_norm": 7.652597427368164,
"learning_rate": 4.947871855863151e-05,
"loss": 0.6678,
"step": 1238
},
{
"epoch": 0.031378106179462524,
"grad_norm": 5.162440776824951,
"learning_rate": 4.9477875061153564e-05,
"loss": 0.4045,
"step": 1240
},
{
"epoch": 0.03142871602813908,
"grad_norm": 6.496342658996582,
"learning_rate": 4.9477031563675624e-05,
"loss": 0.4663,
"step": 1242
},
{
"epoch": 0.03147932587681563,
"grad_norm": 7.366580963134766,
"learning_rate": 4.9476188066197684e-05,
"loss": 0.6894,
"step": 1244
},
{
"epoch": 0.03152993572549218,
"grad_norm": 13.762931823730469,
"learning_rate": 4.947534456871974e-05,
"loss": 0.7944,
"step": 1246
},
{
"epoch": 0.031580545574168735,
"grad_norm": 12.880459785461426,
"learning_rate": 4.94745010712418e-05,
"loss": 0.7428,
"step": 1248
},
{
"epoch": 0.03163115542284529,
"grad_norm": 6.380914211273193,
"learning_rate": 4.947365757376385e-05,
"loss": 0.6661,
"step": 1250
},
{
"epoch": 0.03168176527152184,
"grad_norm": 7.456797122955322,
"learning_rate": 4.947281407628591e-05,
"loss": 0.7024,
"step": 1252
},
{
"epoch": 0.03173237512019839,
"grad_norm": 4.948566913604736,
"learning_rate": 4.947197057880797e-05,
"loss": 0.5951,
"step": 1254
},
{
"epoch": 0.031782984968874946,
"grad_norm": 7.60888147354126,
"learning_rate": 4.9471127081330025e-05,
"loss": 0.4111,
"step": 1256
},
{
"epoch": 0.0318335948175515,
"grad_norm": 5.659879684448242,
"learning_rate": 4.9470283583852085e-05,
"loss": 0.6262,
"step": 1258
},
{
"epoch": 0.03188420466622805,
"grad_norm": 6.134880542755127,
"learning_rate": 4.946944008637414e-05,
"loss": 0.5249,
"step": 1260
},
{
"epoch": 0.0319348145149046,
"grad_norm": 6.531559467315674,
"learning_rate": 4.94685965888962e-05,
"loss": 0.5775,
"step": 1262
},
{
"epoch": 0.03198542436358115,
"grad_norm": 12.462188720703125,
"learning_rate": 4.946775309141826e-05,
"loss": 0.6286,
"step": 1264
},
{
"epoch": 0.0320360342122577,
"grad_norm": 2.809241533279419,
"learning_rate": 4.946690959394031e-05,
"loss": 0.3842,
"step": 1266
},
{
"epoch": 0.032086644060934255,
"grad_norm": 5.521834850311279,
"learning_rate": 4.946606609646237e-05,
"loss": 0.4794,
"step": 1268
},
{
"epoch": 0.03213725390961081,
"grad_norm": 5.101696491241455,
"learning_rate": 4.9465222598984426e-05,
"loss": 0.4713,
"step": 1270
},
{
"epoch": 0.03218786375828736,
"grad_norm": 6.392932891845703,
"learning_rate": 4.9464379101506486e-05,
"loss": 0.6618,
"step": 1272
},
{
"epoch": 0.03223847360696391,
"grad_norm": 4.8280768394470215,
"learning_rate": 4.946353560402855e-05,
"loss": 0.3516,
"step": 1274
},
{
"epoch": 0.032289083455640466,
"grad_norm": 3.3262429237365723,
"learning_rate": 4.94626921065506e-05,
"loss": 0.5898,
"step": 1276
},
{
"epoch": 0.03233969330431702,
"grad_norm": 5.624804496765137,
"learning_rate": 4.946184860907266e-05,
"loss": 0.5552,
"step": 1278
},
{
"epoch": 0.03239030315299357,
"grad_norm": 3.1129558086395264,
"learning_rate": 4.9461005111594714e-05,
"loss": 0.6172,
"step": 1280
},
{
"epoch": 0.032440913001670124,
"grad_norm": 3.5634653568267822,
"learning_rate": 4.9460161614116774e-05,
"loss": 0.3858,
"step": 1282
},
{
"epoch": 0.03249152285034668,
"grad_norm": 7.236937046051025,
"learning_rate": 4.9459318116638834e-05,
"loss": 0.6032,
"step": 1284
},
{
"epoch": 0.03254213269902323,
"grad_norm": 3.344967842102051,
"learning_rate": 4.945847461916089e-05,
"loss": 0.5774,
"step": 1286
},
{
"epoch": 0.03259274254769978,
"grad_norm": 3.4330480098724365,
"learning_rate": 4.945763112168295e-05,
"loss": 0.7142,
"step": 1288
},
{
"epoch": 0.032643352396376335,
"grad_norm": 5.767812728881836,
"learning_rate": 4.9456787624205e-05,
"loss": 0.5658,
"step": 1290
},
{
"epoch": 0.03269396224505289,
"grad_norm": 5.187238693237305,
"learning_rate": 4.945594412672707e-05,
"loss": 0.346,
"step": 1292
},
{
"epoch": 0.03274457209372944,
"grad_norm": 5.828567028045654,
"learning_rate": 4.945510062924912e-05,
"loss": 0.3802,
"step": 1294
},
{
"epoch": 0.03279518194240599,
"grad_norm": 6.394260406494141,
"learning_rate": 4.945425713177118e-05,
"loss": 1.0168,
"step": 1296
},
{
"epoch": 0.032845791791082546,
"grad_norm": 4.123929977416992,
"learning_rate": 4.945341363429324e-05,
"loss": 0.4564,
"step": 1298
},
{
"epoch": 0.0328964016397591,
"grad_norm": 6.843530178070068,
"learning_rate": 4.9452570136815295e-05,
"loss": 0.6159,
"step": 1300
},
{
"epoch": 0.03294701148843565,
"grad_norm": 10.560795783996582,
"learning_rate": 4.9451726639337356e-05,
"loss": 0.517,
"step": 1302
},
{
"epoch": 0.032997621337112204,
"grad_norm": 2.8675217628479004,
"learning_rate": 4.945088314185941e-05,
"loss": 0.6848,
"step": 1304
},
{
"epoch": 0.03304823118578876,
"grad_norm": 3.9155211448669434,
"learning_rate": 4.945003964438147e-05,
"loss": 0.6188,
"step": 1306
},
{
"epoch": 0.03309884103446531,
"grad_norm": 6.229773998260498,
"learning_rate": 4.944919614690352e-05,
"loss": 0.4295,
"step": 1308
},
{
"epoch": 0.03314945088314186,
"grad_norm": 4.002541542053223,
"learning_rate": 4.944835264942558e-05,
"loss": 0.5816,
"step": 1310
},
{
"epoch": 0.033200060731818415,
"grad_norm": 6.393809795379639,
"learning_rate": 4.944750915194764e-05,
"loss": 0.8652,
"step": 1312
},
{
"epoch": 0.03325067058049497,
"grad_norm": 3.2906885147094727,
"learning_rate": 4.9446665654469696e-05,
"loss": 0.6938,
"step": 1314
},
{
"epoch": 0.03330128042917151,
"grad_norm": 2.870098352432251,
"learning_rate": 4.944582215699176e-05,
"loss": 0.2623,
"step": 1316
},
{
"epoch": 0.033351890277848066,
"grad_norm": 5.652560710906982,
"learning_rate": 4.944497865951381e-05,
"loss": 0.5474,
"step": 1318
},
{
"epoch": 0.03340250012652462,
"grad_norm": 12.31924819946289,
"learning_rate": 4.944413516203587e-05,
"loss": 0.7774,
"step": 1320
},
{
"epoch": 0.03345310997520117,
"grad_norm": 4.865198135375977,
"learning_rate": 4.944329166455793e-05,
"loss": 0.2116,
"step": 1322
},
{
"epoch": 0.033503719823877724,
"grad_norm": 4.536335468292236,
"learning_rate": 4.9442448167079984e-05,
"loss": 0.541,
"step": 1324
},
{
"epoch": 0.03355432967255428,
"grad_norm": 14.479270935058594,
"learning_rate": 4.9441604669602044e-05,
"loss": 0.3704,
"step": 1326
},
{
"epoch": 0.03360493952123083,
"grad_norm": 7.071706295013428,
"learning_rate": 4.94407611721241e-05,
"loss": 0.5868,
"step": 1328
},
{
"epoch": 0.03365554936990738,
"grad_norm": 12.812637329101562,
"learning_rate": 4.943991767464616e-05,
"loss": 0.7385,
"step": 1330
},
{
"epoch": 0.033706159218583935,
"grad_norm": 10.205668449401855,
"learning_rate": 4.943907417716822e-05,
"loss": 0.5933,
"step": 1332
},
{
"epoch": 0.03375676906726049,
"grad_norm": 6.436938762664795,
"learning_rate": 4.943823067969027e-05,
"loss": 1.0516,
"step": 1334
},
{
"epoch": 0.03380737891593704,
"grad_norm": 4.715915679931641,
"learning_rate": 4.943738718221233e-05,
"loss": 0.5055,
"step": 1336
},
{
"epoch": 0.03385798876461359,
"grad_norm": 5.58711051940918,
"learning_rate": 4.9436543684734385e-05,
"loss": 0.4184,
"step": 1338
},
{
"epoch": 0.033908598613290146,
"grad_norm": 16.581960678100586,
"learning_rate": 4.9435700187256445e-05,
"loss": 0.478,
"step": 1340
},
{
"epoch": 0.0339592084619667,
"grad_norm": 7.326168060302734,
"learning_rate": 4.9434856689778505e-05,
"loss": 0.7105,
"step": 1342
},
{
"epoch": 0.03400981831064325,
"grad_norm": 4.85835075378418,
"learning_rate": 4.943401319230056e-05,
"loss": 0.7862,
"step": 1344
},
{
"epoch": 0.034060428159319804,
"grad_norm": 3.9227964878082275,
"learning_rate": 4.943316969482262e-05,
"loss": 0.5298,
"step": 1346
},
{
"epoch": 0.03411103800799636,
"grad_norm": 6.2520904541015625,
"learning_rate": 4.943232619734467e-05,
"loss": 0.5795,
"step": 1348
},
{
"epoch": 0.03416164785667291,
"grad_norm": 6.502024173736572,
"learning_rate": 4.943148269986673e-05,
"loss": 0.4504,
"step": 1350
},
{
"epoch": 0.03421225770534946,
"grad_norm": 6.299930095672607,
"learning_rate": 4.943063920238879e-05,
"loss": 0.8499,
"step": 1352
},
{
"epoch": 0.034262867554026015,
"grad_norm": 3.0065736770629883,
"learning_rate": 4.9429795704910846e-05,
"loss": 0.3785,
"step": 1354
},
{
"epoch": 0.03431347740270257,
"grad_norm": 6.053264617919922,
"learning_rate": 4.9428952207432906e-05,
"loss": 0.4405,
"step": 1356
},
{
"epoch": 0.03436408725137912,
"grad_norm": 2.8478128910064697,
"learning_rate": 4.942810870995496e-05,
"loss": 0.3576,
"step": 1358
},
{
"epoch": 0.03441469710005567,
"grad_norm": 4.3910369873046875,
"learning_rate": 4.942726521247702e-05,
"loss": 0.6643,
"step": 1360
},
{
"epoch": 0.034465306948732226,
"grad_norm": 8.032602310180664,
"learning_rate": 4.942642171499908e-05,
"loss": 0.3172,
"step": 1362
},
{
"epoch": 0.03451591679740878,
"grad_norm": 4.423160552978516,
"learning_rate": 4.9425578217521134e-05,
"loss": 0.5107,
"step": 1364
},
{
"epoch": 0.03456652664608533,
"grad_norm": 4.310211181640625,
"learning_rate": 4.9424734720043194e-05,
"loss": 0.7117,
"step": 1366
},
{
"epoch": 0.034617136494761884,
"grad_norm": 7.594289779663086,
"learning_rate": 4.942389122256525e-05,
"loss": 0.3758,
"step": 1368
},
{
"epoch": 0.03466774634343843,
"grad_norm": 10.414362907409668,
"learning_rate": 4.942304772508731e-05,
"loss": 0.9048,
"step": 1370
},
{
"epoch": 0.03471835619211498,
"grad_norm": 5.809612274169922,
"learning_rate": 4.942220422760937e-05,
"loss": 0.4148,
"step": 1372
},
{
"epoch": 0.034768966040791535,
"grad_norm": 7.8530449867248535,
"learning_rate": 4.942136073013142e-05,
"loss": 0.6252,
"step": 1374
},
{
"epoch": 0.03481957588946809,
"grad_norm": 9.151339530944824,
"learning_rate": 4.942051723265348e-05,
"loss": 0.6158,
"step": 1376
},
{
"epoch": 0.03487018573814464,
"grad_norm": 20.164710998535156,
"learning_rate": 4.9419673735175535e-05,
"loss": 0.6702,
"step": 1378
},
{
"epoch": 0.03492079558682119,
"grad_norm": 6.756007194519043,
"learning_rate": 4.9418830237697595e-05,
"loss": 0.2371,
"step": 1380
},
{
"epoch": 0.034971405435497746,
"grad_norm": 6.241485595703125,
"learning_rate": 4.9417986740219655e-05,
"loss": 0.7635,
"step": 1382
},
{
"epoch": 0.0350220152841743,
"grad_norm": 10.091927528381348,
"learning_rate": 4.941714324274171e-05,
"loss": 0.8506,
"step": 1384
},
{
"epoch": 0.03507262513285085,
"grad_norm": 7.4213080406188965,
"learning_rate": 4.941629974526377e-05,
"loss": 0.5124,
"step": 1386
},
{
"epoch": 0.035123234981527404,
"grad_norm": 11.214619636535645,
"learning_rate": 4.941545624778582e-05,
"loss": 0.4202,
"step": 1388
},
{
"epoch": 0.03517384483020396,
"grad_norm": 8.220820426940918,
"learning_rate": 4.941461275030788e-05,
"loss": 0.9393,
"step": 1390
},
{
"epoch": 0.03522445467888051,
"grad_norm": 8.862229347229004,
"learning_rate": 4.9413769252829936e-05,
"loss": 0.813,
"step": 1392
},
{
"epoch": 0.03527506452755706,
"grad_norm": 3.892012596130371,
"learning_rate": 4.9412925755351996e-05,
"loss": 0.4279,
"step": 1394
},
{
"epoch": 0.035325674376233615,
"grad_norm": 13.525033950805664,
"learning_rate": 4.9412082257874056e-05,
"loss": 0.4542,
"step": 1396
},
{
"epoch": 0.03537628422491017,
"grad_norm": 7.098147869110107,
"learning_rate": 4.941123876039611e-05,
"loss": 0.613,
"step": 1398
},
{
"epoch": 0.03542689407358672,
"grad_norm": 6.280513286590576,
"learning_rate": 4.941039526291817e-05,
"loss": 0.8402,
"step": 1400
},
{
"epoch": 0.03542689407358672,
"eval_cer": 0.19522168153555014,
"eval_loss": 0.2994668483734131,
"eval_runtime": 2672.9422,
"eval_samples_per_second": 5.864,
"eval_steps_per_second": 0.367,
"step": 1400
},
{
"epoch": 0.03547750392226327,
"grad_norm": 20.038259506225586,
"learning_rate": 4.940955176544022e-05,
"loss": 0.4221,
"step": 1402
},
{
"epoch": 0.035528113770939826,
"grad_norm": 4.178603649139404,
"learning_rate": 4.940870826796228e-05,
"loss": 0.5002,
"step": 1404
},
{
"epoch": 0.03557872361961638,
"grad_norm": 4.7133469581604,
"learning_rate": 4.9407864770484344e-05,
"loss": 0.7616,
"step": 1406
},
{
"epoch": 0.03562933346829293,
"grad_norm": 5.610054969787598,
"learning_rate": 4.94070212730064e-05,
"loss": 0.5404,
"step": 1408
},
{
"epoch": 0.035679943316969484,
"grad_norm": 12.473624229431152,
"learning_rate": 4.940617777552846e-05,
"loss": 0.4731,
"step": 1410
},
{
"epoch": 0.03573055316564604,
"grad_norm": 5.456740856170654,
"learning_rate": 4.940533427805051e-05,
"loss": 0.3353,
"step": 1412
},
{
"epoch": 0.03578116301432259,
"grad_norm": 8.414679527282715,
"learning_rate": 4.940449078057257e-05,
"loss": 0.5136,
"step": 1414
},
{
"epoch": 0.03583177286299914,
"grad_norm": 6.136534214019775,
"learning_rate": 4.940364728309463e-05,
"loss": 0.4605,
"step": 1416
},
{
"epoch": 0.035882382711675695,
"grad_norm": 6.461178779602051,
"learning_rate": 4.9402803785616684e-05,
"loss": 0.395,
"step": 1418
},
{
"epoch": 0.03593299256035225,
"grad_norm": 8.947699546813965,
"learning_rate": 4.9401960288138745e-05,
"loss": 0.8262,
"step": 1420
},
{
"epoch": 0.0359836024090288,
"grad_norm": 7.892368793487549,
"learning_rate": 4.94011167906608e-05,
"loss": 0.529,
"step": 1422
},
{
"epoch": 0.036034212257705346,
"grad_norm": 6.520657062530518,
"learning_rate": 4.940027329318286e-05,
"loss": 0.6126,
"step": 1424
},
{
"epoch": 0.0360848221063819,
"grad_norm": 17.974655151367188,
"learning_rate": 4.939942979570492e-05,
"loss": 0.748,
"step": 1426
},
{
"epoch": 0.03613543195505845,
"grad_norm": 3.9267520904541016,
"learning_rate": 4.939858629822697e-05,
"loss": 0.3001,
"step": 1428
},
{
"epoch": 0.036186041803735004,
"grad_norm": 14.933813095092773,
"learning_rate": 4.939774280074903e-05,
"loss": 0.766,
"step": 1430
},
{
"epoch": 0.03623665165241156,
"grad_norm": 9.554229736328125,
"learning_rate": 4.9396899303271085e-05,
"loss": 0.5409,
"step": 1432
},
{
"epoch": 0.03628726150108811,
"grad_norm": 8.939488410949707,
"learning_rate": 4.9396055805793146e-05,
"loss": 0.4732,
"step": 1434
},
{
"epoch": 0.03633787134976466,
"grad_norm": 3.313821315765381,
"learning_rate": 4.9395212308315206e-05,
"loss": 0.3792,
"step": 1436
},
{
"epoch": 0.036388481198441215,
"grad_norm": 15.510714530944824,
"learning_rate": 4.939436881083726e-05,
"loss": 0.7043,
"step": 1438
},
{
"epoch": 0.03643909104711777,
"grad_norm": 8.56005573272705,
"learning_rate": 4.939352531335932e-05,
"loss": 0.4035,
"step": 1440
},
{
"epoch": 0.03648970089579432,
"grad_norm": 5.8778252601623535,
"learning_rate": 4.939268181588137e-05,
"loss": 0.3744,
"step": 1442
},
{
"epoch": 0.03654031074447087,
"grad_norm": 6.815622806549072,
"learning_rate": 4.939183831840343e-05,
"loss": 0.5826,
"step": 1444
},
{
"epoch": 0.036590920593147426,
"grad_norm": 7.3577680587768555,
"learning_rate": 4.939099482092549e-05,
"loss": 0.505,
"step": 1446
},
{
"epoch": 0.03664153044182398,
"grad_norm": 8.071341514587402,
"learning_rate": 4.939015132344755e-05,
"loss": 0.7999,
"step": 1448
},
{
"epoch": 0.03669214029050053,
"grad_norm": 10.3243989944458,
"learning_rate": 4.938930782596961e-05,
"loss": 0.6247,
"step": 1450
},
{
"epoch": 0.036742750139177084,
"grad_norm": 4.544474124908447,
"learning_rate": 4.938846432849166e-05,
"loss": 0.5807,
"step": 1452
},
{
"epoch": 0.03679335998785364,
"grad_norm": 9.284850120544434,
"learning_rate": 4.938762083101372e-05,
"loss": 0.4313,
"step": 1454
},
{
"epoch": 0.03684396983653019,
"grad_norm": 7.1159749031066895,
"learning_rate": 4.938677733353578e-05,
"loss": 0.4875,
"step": 1456
},
{
"epoch": 0.03689457968520674,
"grad_norm": 7.782135009765625,
"learning_rate": 4.9385933836057834e-05,
"loss": 0.8014,
"step": 1458
},
{
"epoch": 0.036945189533883295,
"grad_norm": 9.787236213684082,
"learning_rate": 4.9385090338579894e-05,
"loss": 0.9414,
"step": 1460
},
{
"epoch": 0.03699579938255985,
"grad_norm": 5.5786848068237305,
"learning_rate": 4.938424684110195e-05,
"loss": 0.6361,
"step": 1462
},
{
"epoch": 0.0370464092312364,
"grad_norm": 4.597846508026123,
"learning_rate": 4.938340334362401e-05,
"loss": 0.2795,
"step": 1464
},
{
"epoch": 0.03709701907991295,
"grad_norm": 5.9186553955078125,
"learning_rate": 4.938255984614607e-05,
"loss": 0.4051,
"step": 1466
},
{
"epoch": 0.037147628928589506,
"grad_norm": 5.021423816680908,
"learning_rate": 4.938171634866812e-05,
"loss": 0.4298,
"step": 1468
},
{
"epoch": 0.03719823877726606,
"grad_norm": 3.111738681793213,
"learning_rate": 4.938087285119018e-05,
"loss": 0.3101,
"step": 1470
},
{
"epoch": 0.03724884862594261,
"grad_norm": 6.111041069030762,
"learning_rate": 4.9380029353712235e-05,
"loss": 0.6118,
"step": 1472
},
{
"epoch": 0.037299458474619164,
"grad_norm": 30.16288185119629,
"learning_rate": 4.9379185856234295e-05,
"loss": 0.6206,
"step": 1474
},
{
"epoch": 0.03735006832329572,
"grad_norm": 9.460359573364258,
"learning_rate": 4.937834235875635e-05,
"loss": 0.3322,
"step": 1476
},
{
"epoch": 0.03740067817197226,
"grad_norm": 7.554260730743408,
"learning_rate": 4.937749886127841e-05,
"loss": 0.5655,
"step": 1478
},
{
"epoch": 0.037451288020648815,
"grad_norm": 8.392956733703613,
"learning_rate": 4.937665536380047e-05,
"loss": 0.6669,
"step": 1480
},
{
"epoch": 0.03750189786932537,
"grad_norm": 7.463123321533203,
"learning_rate": 4.937581186632252e-05,
"loss": 0.6082,
"step": 1482
},
{
"epoch": 0.03755250771800192,
"grad_norm": 2.9462671279907227,
"learning_rate": 4.937496836884458e-05,
"loss": 0.7547,
"step": 1484
},
{
"epoch": 0.03760311756667847,
"grad_norm": 9.454707145690918,
"learning_rate": 4.9374124871366636e-05,
"loss": 0.5665,
"step": 1486
},
{
"epoch": 0.037653727415355026,
"grad_norm": 2.5308687686920166,
"learning_rate": 4.9373281373888696e-05,
"loss": 0.4683,
"step": 1488
},
{
"epoch": 0.03770433726403158,
"grad_norm": 3.8949882984161377,
"learning_rate": 4.937243787641076e-05,
"loss": 0.3981,
"step": 1490
},
{
"epoch": 0.03775494711270813,
"grad_norm": 3.255460739135742,
"learning_rate": 4.937159437893281e-05,
"loss": 0.3254,
"step": 1492
},
{
"epoch": 0.037805556961384684,
"grad_norm": 10.862367630004883,
"learning_rate": 4.937075088145487e-05,
"loss": 0.8337,
"step": 1494
},
{
"epoch": 0.03785616681006124,
"grad_norm": 12.926139831542969,
"learning_rate": 4.9369907383976924e-05,
"loss": 0.8218,
"step": 1496
},
{
"epoch": 0.03790677665873779,
"grad_norm": 5.084683418273926,
"learning_rate": 4.9369063886498984e-05,
"loss": 0.4073,
"step": 1498
},
{
"epoch": 0.03795738650741434,
"grad_norm": 5.375326633453369,
"learning_rate": 4.9368220389021044e-05,
"loss": 0.4468,
"step": 1500
},
{
"epoch": 0.038007996356090895,
"grad_norm": 2.891730785369873,
"learning_rate": 4.93673768915431e-05,
"loss": 0.4952,
"step": 1502
},
{
"epoch": 0.03805860620476745,
"grad_norm": 6.7384161949157715,
"learning_rate": 4.936653339406516e-05,
"loss": 0.6817,
"step": 1504
},
{
"epoch": 0.038109216053444,
"grad_norm": 4.95111608505249,
"learning_rate": 4.936568989658721e-05,
"loss": 0.7322,
"step": 1506
},
{
"epoch": 0.03815982590212055,
"grad_norm": 4.689101219177246,
"learning_rate": 4.936484639910927e-05,
"loss": 0.6041,
"step": 1508
},
{
"epoch": 0.038210435750797106,
"grad_norm": 6.9700798988342285,
"learning_rate": 4.936400290163133e-05,
"loss": 0.4521,
"step": 1510
},
{
"epoch": 0.03826104559947366,
"grad_norm": 5.602224826812744,
"learning_rate": 4.9363159404153385e-05,
"loss": 0.6519,
"step": 1512
},
{
"epoch": 0.03831165544815021,
"grad_norm": 5.881453037261963,
"learning_rate": 4.9362315906675445e-05,
"loss": 0.6069,
"step": 1514
},
{
"epoch": 0.038362265296826764,
"grad_norm": 5.395936012268066,
"learning_rate": 4.93614724091975e-05,
"loss": 0.3001,
"step": 1516
},
{
"epoch": 0.03841287514550332,
"grad_norm": 5.4586663246154785,
"learning_rate": 4.936062891171956e-05,
"loss": 0.6286,
"step": 1518
},
{
"epoch": 0.03846348499417987,
"grad_norm": 5.338792324066162,
"learning_rate": 4.935978541424162e-05,
"loss": 0.4526,
"step": 1520
},
{
"epoch": 0.03851409484285642,
"grad_norm": 2.985135555267334,
"learning_rate": 4.935894191676367e-05,
"loss": 0.2444,
"step": 1522
},
{
"epoch": 0.038564704691532975,
"grad_norm": 7.952456474304199,
"learning_rate": 4.935809841928573e-05,
"loss": 0.5033,
"step": 1524
},
{
"epoch": 0.03861531454020953,
"grad_norm": 7.913976192474365,
"learning_rate": 4.9357254921807786e-05,
"loss": 0.5823,
"step": 1526
},
{
"epoch": 0.03866592438888608,
"grad_norm": 7.781038284301758,
"learning_rate": 4.9356411424329846e-05,
"loss": 0.6915,
"step": 1528
},
{
"epoch": 0.03871653423756263,
"grad_norm": 5.074882984161377,
"learning_rate": 4.9355567926851906e-05,
"loss": 0.8664,
"step": 1530
},
{
"epoch": 0.03876714408623918,
"grad_norm": 3.08144474029541,
"learning_rate": 4.935472442937396e-05,
"loss": 0.3723,
"step": 1532
},
{
"epoch": 0.03881775393491573,
"grad_norm": 6.625777244567871,
"learning_rate": 4.935388093189602e-05,
"loss": 0.5908,
"step": 1534
},
{
"epoch": 0.038868363783592284,
"grad_norm": 8.451767921447754,
"learning_rate": 4.9353037434418073e-05,
"loss": 0.8605,
"step": 1536
},
{
"epoch": 0.03891897363226884,
"grad_norm": 5.070049285888672,
"learning_rate": 4.9352193936940134e-05,
"loss": 0.3042,
"step": 1538
},
{
"epoch": 0.03896958348094539,
"grad_norm": 7.37528133392334,
"learning_rate": 4.9351350439462194e-05,
"loss": 0.7376,
"step": 1540
},
{
"epoch": 0.03902019332962194,
"grad_norm": 3.3460285663604736,
"learning_rate": 4.935050694198425e-05,
"loss": 0.6662,
"step": 1542
},
{
"epoch": 0.039070803178298495,
"grad_norm": 3.999098062515259,
"learning_rate": 4.934966344450631e-05,
"loss": 0.3612,
"step": 1544
},
{
"epoch": 0.03912141302697505,
"grad_norm": 6.4491868019104,
"learning_rate": 4.934881994702836e-05,
"loss": 0.7708,
"step": 1546
},
{
"epoch": 0.0391720228756516,
"grad_norm": 4.037872314453125,
"learning_rate": 4.934797644955042e-05,
"loss": 0.4529,
"step": 1548
},
{
"epoch": 0.03922263272432815,
"grad_norm": 7.757660865783691,
"learning_rate": 4.9347132952072475e-05,
"loss": 0.6572,
"step": 1550
},
{
"epoch": 0.039273242573004706,
"grad_norm": 10.399569511413574,
"learning_rate": 4.9346289454594535e-05,
"loss": 0.971,
"step": 1552
},
{
"epoch": 0.03932385242168126,
"grad_norm": 4.098683834075928,
"learning_rate": 4.9345445957116595e-05,
"loss": 0.5687,
"step": 1554
},
{
"epoch": 0.03937446227035781,
"grad_norm": 4.029207706451416,
"learning_rate": 4.934460245963865e-05,
"loss": 0.4164,
"step": 1556
},
{
"epoch": 0.039425072119034364,
"grad_norm": 9.3286771774292,
"learning_rate": 4.934375896216071e-05,
"loss": 0.8485,
"step": 1558
},
{
"epoch": 0.03947568196771092,
"grad_norm": 8.822758674621582,
"learning_rate": 4.934291546468276e-05,
"loss": 0.5747,
"step": 1560
},
{
"epoch": 0.03952629181638747,
"grad_norm": 6.740304946899414,
"learning_rate": 4.934207196720482e-05,
"loss": 0.5014,
"step": 1562
},
{
"epoch": 0.03957690166506402,
"grad_norm": 4.9694929122924805,
"learning_rate": 4.934122846972688e-05,
"loss": 0.5403,
"step": 1564
},
{
"epoch": 0.039627511513740575,
"grad_norm": 7.032242298126221,
"learning_rate": 4.9340384972248936e-05,
"loss": 0.3896,
"step": 1566
},
{
"epoch": 0.03967812136241713,
"grad_norm": 3.1985256671905518,
"learning_rate": 4.9339541474770996e-05,
"loss": 0.4138,
"step": 1568
},
{
"epoch": 0.03972873121109368,
"grad_norm": 4.911308288574219,
"learning_rate": 4.933869797729305e-05,
"loss": 0.6613,
"step": 1570
},
{
"epoch": 0.03977934105977023,
"grad_norm": 10.836835861206055,
"learning_rate": 4.933785447981511e-05,
"loss": 0.368,
"step": 1572
},
{
"epoch": 0.039829950908446786,
"grad_norm": 7.5843377113342285,
"learning_rate": 4.933701098233717e-05,
"loss": 0.8429,
"step": 1574
},
{
"epoch": 0.03988056075712334,
"grad_norm": 4.915452003479004,
"learning_rate": 4.933616748485922e-05,
"loss": 0.4179,
"step": 1576
},
{
"epoch": 0.03993117060579989,
"grad_norm": 4.324033260345459,
"learning_rate": 4.9335323987381283e-05,
"loss": 0.5588,
"step": 1578
},
{
"epoch": 0.039981780454476444,
"grad_norm": 5.227353096008301,
"learning_rate": 4.933448048990334e-05,
"loss": 0.4188,
"step": 1580
},
{
"epoch": 0.040032390303153,
"grad_norm": 6.144440650939941,
"learning_rate": 4.93336369924254e-05,
"loss": 0.7091,
"step": 1582
},
{
"epoch": 0.04008300015182954,
"grad_norm": 9.614253044128418,
"learning_rate": 4.933279349494746e-05,
"loss": 0.5816,
"step": 1584
},
{
"epoch": 0.040133610000506095,
"grad_norm": 5.539832592010498,
"learning_rate": 4.933194999746951e-05,
"loss": 0.2897,
"step": 1586
},
{
"epoch": 0.04018421984918265,
"grad_norm": 4.285924911499023,
"learning_rate": 4.933110649999157e-05,
"loss": 0.4647,
"step": 1588
},
{
"epoch": 0.0402348296978592,
"grad_norm": 5.418667316436768,
"learning_rate": 4.9330263002513624e-05,
"loss": 0.377,
"step": 1590
},
{
"epoch": 0.04028543954653575,
"grad_norm": 7.248344421386719,
"learning_rate": 4.9329419505035684e-05,
"loss": 0.5599,
"step": 1592
},
{
"epoch": 0.040336049395212306,
"grad_norm": 7.9554643630981445,
"learning_rate": 4.9328576007557745e-05,
"loss": 0.8153,
"step": 1594
},
{
"epoch": 0.04038665924388886,
"grad_norm": 8.028340339660645,
"learning_rate": 4.93277325100798e-05,
"loss": 0.5364,
"step": 1596
},
{
"epoch": 0.04043726909256541,
"grad_norm": 7.2708964347839355,
"learning_rate": 4.932688901260186e-05,
"loss": 0.8303,
"step": 1598
},
{
"epoch": 0.040487878941241964,
"grad_norm": 5.855560779571533,
"learning_rate": 4.932604551512391e-05,
"loss": 0.4462,
"step": 1600
},
{
"epoch": 0.040487878941241964,
"eval_cer": 0.18224000249547695,
"eval_loss": 0.28801780939102173,
"eval_runtime": 2605.2542,
"eval_samples_per_second": 6.016,
"eval_steps_per_second": 0.376,
"step": 1600
},
{
"epoch": 0.04053848878991852,
"grad_norm": 10.296344757080078,
"learning_rate": 4.932520201764597e-05,
"loss": 0.5008,
"step": 1602
},
{
"epoch": 0.04058909863859507,
"grad_norm": 8.924449920654297,
"learning_rate": 4.932435852016803e-05,
"loss": 0.3729,
"step": 1604
},
{
"epoch": 0.04063970848727162,
"grad_norm": 5.110000133514404,
"learning_rate": 4.9323515022690085e-05,
"loss": 0.5821,
"step": 1606
},
{
"epoch": 0.040690318335948175,
"grad_norm": 5.40463399887085,
"learning_rate": 4.9322671525212146e-05,
"loss": 0.6681,
"step": 1608
},
{
"epoch": 0.04074092818462473,
"grad_norm": 10.994048118591309,
"learning_rate": 4.93218280277342e-05,
"loss": 0.453,
"step": 1610
},
{
"epoch": 0.04079153803330128,
"grad_norm": 6.396229267120361,
"learning_rate": 4.932098453025626e-05,
"loss": 0.4864,
"step": 1612
},
{
"epoch": 0.04084214788197783,
"grad_norm": 6.884303569793701,
"learning_rate": 4.932014103277832e-05,
"loss": 0.9021,
"step": 1614
},
{
"epoch": 0.040892757730654386,
"grad_norm": 15.925917625427246,
"learning_rate": 4.931929753530037e-05,
"loss": 0.6345,
"step": 1616
},
{
"epoch": 0.04094336757933094,
"grad_norm": 3.3225505352020264,
"learning_rate": 4.931845403782243e-05,
"loss": 0.5429,
"step": 1618
},
{
"epoch": 0.04099397742800749,
"grad_norm": 5.548943519592285,
"learning_rate": 4.9317610540344487e-05,
"loss": 0.5081,
"step": 1620
},
{
"epoch": 0.041044587276684044,
"grad_norm": 8.253005027770996,
"learning_rate": 4.931676704286655e-05,
"loss": 0.4432,
"step": 1622
},
{
"epoch": 0.0410951971253606,
"grad_norm": 10.242237091064453,
"learning_rate": 4.931592354538861e-05,
"loss": 0.6442,
"step": 1624
},
{
"epoch": 0.04114580697403715,
"grad_norm": 4.957180976867676,
"learning_rate": 4.931508004791066e-05,
"loss": 0.4146,
"step": 1626
},
{
"epoch": 0.0411964168227137,
"grad_norm": 7.067080497741699,
"learning_rate": 4.931423655043272e-05,
"loss": 0.424,
"step": 1628
},
{
"epoch": 0.041247026671390255,
"grad_norm": 5.9971842765808105,
"learning_rate": 4.9313393052954774e-05,
"loss": 0.4941,
"step": 1630
},
{
"epoch": 0.04129763652006681,
"grad_norm": 5.412482261657715,
"learning_rate": 4.9312549555476834e-05,
"loss": 0.4065,
"step": 1632
},
{
"epoch": 0.04134824636874336,
"grad_norm": 13.345954895019531,
"learning_rate": 4.931170605799889e-05,
"loss": 0.8215,
"step": 1634
},
{
"epoch": 0.04139885621741991,
"grad_norm": 9.993549346923828,
"learning_rate": 4.931086256052095e-05,
"loss": 0.8707,
"step": 1636
},
{
"epoch": 0.04144946606609646,
"grad_norm": 27.219308853149414,
"learning_rate": 4.931001906304301e-05,
"loss": 0.5455,
"step": 1638
},
{
"epoch": 0.04150007591477301,
"grad_norm": 5.13199520111084,
"learning_rate": 4.930917556556506e-05,
"loss": 0.5479,
"step": 1640
},
{
"epoch": 0.041550685763449564,
"grad_norm": 4.877132892608643,
"learning_rate": 4.930833206808712e-05,
"loss": 0.5966,
"step": 1642
},
{
"epoch": 0.04160129561212612,
"grad_norm": 3.1169495582580566,
"learning_rate": 4.9307488570609175e-05,
"loss": 0.5371,
"step": 1644
},
{
"epoch": 0.04165190546080267,
"grad_norm": 4.428645133972168,
"learning_rate": 4.9306645073131235e-05,
"loss": 0.3974,
"step": 1646
},
{
"epoch": 0.04170251530947922,
"grad_norm": 12.36322021484375,
"learning_rate": 4.9305801575653295e-05,
"loss": 0.6203,
"step": 1648
},
{
"epoch": 0.041753125158155775,
"grad_norm": 4.944615840911865,
"learning_rate": 4.930495807817535e-05,
"loss": 0.6994,
"step": 1650
},
{
"epoch": 0.04180373500683233,
"grad_norm": 3.7057061195373535,
"learning_rate": 4.930411458069741e-05,
"loss": 0.2836,
"step": 1652
},
{
"epoch": 0.04185434485550888,
"grad_norm": 5.559707164764404,
"learning_rate": 4.930327108321946e-05,
"loss": 0.7595,
"step": 1654
},
{
"epoch": 0.04190495470418543,
"grad_norm": 6.805633544921875,
"learning_rate": 4.930242758574152e-05,
"loss": 0.497,
"step": 1656
},
{
"epoch": 0.041955564552861986,
"grad_norm": 6.29421329498291,
"learning_rate": 4.930158408826358e-05,
"loss": 0.5615,
"step": 1658
},
{
"epoch": 0.04200617440153854,
"grad_norm": 3.3329832553863525,
"learning_rate": 4.9300740590785636e-05,
"loss": 0.2914,
"step": 1660
},
{
"epoch": 0.04205678425021509,
"grad_norm": 5.0592827796936035,
"learning_rate": 4.9299897093307696e-05,
"loss": 0.3799,
"step": 1662
},
{
"epoch": 0.042107394098891644,
"grad_norm": 4.176607131958008,
"learning_rate": 4.929905359582975e-05,
"loss": 0.3987,
"step": 1664
},
{
"epoch": 0.0421580039475682,
"grad_norm": 6.9350175857543945,
"learning_rate": 4.929821009835181e-05,
"loss": 0.337,
"step": 1666
},
{
"epoch": 0.04220861379624475,
"grad_norm": 12.642129898071289,
"learning_rate": 4.929736660087387e-05,
"loss": 0.5035,
"step": 1668
},
{
"epoch": 0.0422592236449213,
"grad_norm": 3.18143892288208,
"learning_rate": 4.9296523103395924e-05,
"loss": 0.2602,
"step": 1670
},
{
"epoch": 0.042309833493597855,
"grad_norm": 7.448511600494385,
"learning_rate": 4.9295679605917984e-05,
"loss": 0.6351,
"step": 1672
},
{
"epoch": 0.04236044334227441,
"grad_norm": 9.632100105285645,
"learning_rate": 4.929483610844004e-05,
"loss": 0.6639,
"step": 1674
},
{
"epoch": 0.04241105319095096,
"grad_norm": 4.993129730224609,
"learning_rate": 4.92939926109621e-05,
"loss": 0.5902,
"step": 1676
},
{
"epoch": 0.04246166303962751,
"grad_norm": 8.809192657470703,
"learning_rate": 4.929314911348416e-05,
"loss": 0.7136,
"step": 1678
},
{
"epoch": 0.042512272888304066,
"grad_norm": 4.831273078918457,
"learning_rate": 4.929230561600621e-05,
"loss": 0.702,
"step": 1680
},
{
"epoch": 0.04256288273698062,
"grad_norm": 3.406562089920044,
"learning_rate": 4.929146211852827e-05,
"loss": 0.7114,
"step": 1682
},
{
"epoch": 0.04261349258565717,
"grad_norm": 4.17376184463501,
"learning_rate": 4.9290618621050325e-05,
"loss": 0.5236,
"step": 1684
},
{
"epoch": 0.042664102434333724,
"grad_norm": 3.0841195583343506,
"learning_rate": 4.9289775123572385e-05,
"loss": 0.4595,
"step": 1686
},
{
"epoch": 0.04271471228301028,
"grad_norm": 6.880139350891113,
"learning_rate": 4.9288931626094445e-05,
"loss": 0.6339,
"step": 1688
},
{
"epoch": 0.04276532213168683,
"grad_norm": 3.7621781826019287,
"learning_rate": 4.92880881286165e-05,
"loss": 0.4469,
"step": 1690
},
{
"epoch": 0.042815931980363375,
"grad_norm": 9.83583927154541,
"learning_rate": 4.928724463113856e-05,
"loss": 0.7447,
"step": 1692
},
{
"epoch": 0.04286654182903993,
"grad_norm": 3.6700632572174072,
"learning_rate": 4.928640113366061e-05,
"loss": 0.3185,
"step": 1694
},
{
"epoch": 0.04291715167771648,
"grad_norm": 5.93882417678833,
"learning_rate": 4.928555763618267e-05,
"loss": 0.5691,
"step": 1696
},
{
"epoch": 0.042967761526393033,
"grad_norm": 2.9145689010620117,
"learning_rate": 4.928471413870473e-05,
"loss": 0.3396,
"step": 1698
},
{
"epoch": 0.043018371375069586,
"grad_norm": 4.243858814239502,
"learning_rate": 4.9283870641226786e-05,
"loss": 0.3018,
"step": 1700
},
{
"epoch": 0.04306898122374614,
"grad_norm": 4.957646369934082,
"learning_rate": 4.9283027143748846e-05,
"loss": 0.4555,
"step": 1702
},
{
"epoch": 0.04311959107242269,
"grad_norm": 5.5672831535339355,
"learning_rate": 4.92821836462709e-05,
"loss": 0.4606,
"step": 1704
},
{
"epoch": 0.043170200921099244,
"grad_norm": 4.047713756561279,
"learning_rate": 4.928134014879296e-05,
"loss": 0.3793,
"step": 1706
},
{
"epoch": 0.0432208107697758,
"grad_norm": 7.607228755950928,
"learning_rate": 4.928049665131502e-05,
"loss": 0.6637,
"step": 1708
},
{
"epoch": 0.04327142061845235,
"grad_norm": 11.504854202270508,
"learning_rate": 4.9279653153837073e-05,
"loss": 0.6826,
"step": 1710
},
{
"epoch": 0.0433220304671289,
"grad_norm": 5.851675510406494,
"learning_rate": 4.9278809656359134e-05,
"loss": 0.526,
"step": 1712
},
{
"epoch": 0.043372640315805455,
"grad_norm": 14.779943466186523,
"learning_rate": 4.927796615888119e-05,
"loss": 0.6321,
"step": 1714
},
{
"epoch": 0.04342325016448201,
"grad_norm": 9.94005012512207,
"learning_rate": 4.927712266140325e-05,
"loss": 0.7401,
"step": 1716
},
{
"epoch": 0.04347386001315856,
"grad_norm": 4.635720729827881,
"learning_rate": 4.92762791639253e-05,
"loss": 0.5283,
"step": 1718
},
{
"epoch": 0.04352446986183511,
"grad_norm": 3.568844795227051,
"learning_rate": 4.927543566644736e-05,
"loss": 0.4053,
"step": 1720
},
{
"epoch": 0.043575079710511666,
"grad_norm": 6.748814582824707,
"learning_rate": 4.927459216896942e-05,
"loss": 0.5624,
"step": 1722
},
{
"epoch": 0.04362568955918822,
"grad_norm": 7.1615424156188965,
"learning_rate": 4.9273748671491475e-05,
"loss": 0.8786,
"step": 1724
},
{
"epoch": 0.04367629940786477,
"grad_norm": 5.897001266479492,
"learning_rate": 4.9272905174013535e-05,
"loss": 0.7447,
"step": 1726
},
{
"epoch": 0.043726909256541324,
"grad_norm": 6.555644512176514,
"learning_rate": 4.927206167653559e-05,
"loss": 0.4951,
"step": 1728
},
{
"epoch": 0.04377751910521788,
"grad_norm": 8.615190505981445,
"learning_rate": 4.927121817905765e-05,
"loss": 0.6687,
"step": 1730
},
{
"epoch": 0.04382812895389443,
"grad_norm": 6.572555065155029,
"learning_rate": 4.927037468157971e-05,
"loss": 0.4269,
"step": 1732
},
{
"epoch": 0.04387873880257098,
"grad_norm": 6.60528564453125,
"learning_rate": 4.926953118410176e-05,
"loss": 0.4845,
"step": 1734
},
{
"epoch": 0.043929348651247535,
"grad_norm": 5.96574592590332,
"learning_rate": 4.926868768662382e-05,
"loss": 0.6266,
"step": 1736
},
{
"epoch": 0.04397995849992409,
"grad_norm": 2.911107063293457,
"learning_rate": 4.9267844189145876e-05,
"loss": 0.312,
"step": 1738
},
{
"epoch": 0.04403056834860064,
"grad_norm": 6.024972438812256,
"learning_rate": 4.9267000691667936e-05,
"loss": 0.5205,
"step": 1740
},
{
"epoch": 0.04408117819727719,
"grad_norm": 12.015813827514648,
"learning_rate": 4.9266157194189996e-05,
"loss": 0.621,
"step": 1742
},
{
"epoch": 0.044131788045953746,
"grad_norm": 17.426437377929688,
"learning_rate": 4.926531369671205e-05,
"loss": 0.7822,
"step": 1744
},
{
"epoch": 0.04418239789463029,
"grad_norm": 8.361766815185547,
"learning_rate": 4.926447019923411e-05,
"loss": 0.8237,
"step": 1746
},
{
"epoch": 0.044233007743306844,
"grad_norm": 6.699423789978027,
"learning_rate": 4.926362670175616e-05,
"loss": 0.5673,
"step": 1748
},
{
"epoch": 0.0442836175919834,
"grad_norm": 3.933328151702881,
"learning_rate": 4.926278320427822e-05,
"loss": 0.6946,
"step": 1750
},
{
"epoch": 0.04433422744065995,
"grad_norm": 5.092313766479492,
"learning_rate": 4.9261939706800283e-05,
"loss": 0.6024,
"step": 1752
},
{
"epoch": 0.0443848372893365,
"grad_norm": 16.1398868560791,
"learning_rate": 4.926109620932234e-05,
"loss": 0.7255,
"step": 1754
},
{
"epoch": 0.044435447138013055,
"grad_norm": 4.958022117614746,
"learning_rate": 4.92602527118444e-05,
"loss": 0.4381,
"step": 1756
},
{
"epoch": 0.04448605698668961,
"grad_norm": 6.70586633682251,
"learning_rate": 4.925940921436645e-05,
"loss": 0.5228,
"step": 1758
},
{
"epoch": 0.04453666683536616,
"grad_norm": 3.8433001041412354,
"learning_rate": 4.925856571688851e-05,
"loss": 0.3635,
"step": 1760
},
{
"epoch": 0.04458727668404271,
"grad_norm": 3.4007515907287598,
"learning_rate": 4.925772221941057e-05,
"loss": 0.2397,
"step": 1762
},
{
"epoch": 0.044637886532719266,
"grad_norm": 12.362958908081055,
"learning_rate": 4.9256878721932624e-05,
"loss": 0.5092,
"step": 1764
},
{
"epoch": 0.04468849638139582,
"grad_norm": 3.942413568496704,
"learning_rate": 4.9256035224454684e-05,
"loss": 0.4432,
"step": 1766
},
{
"epoch": 0.04473910623007237,
"grad_norm": 5.850658893585205,
"learning_rate": 4.925519172697674e-05,
"loss": 0.8487,
"step": 1768
},
{
"epoch": 0.044789716078748924,
"grad_norm": 3.8962783813476562,
"learning_rate": 4.92543482294988e-05,
"loss": 0.6375,
"step": 1770
},
{
"epoch": 0.04484032592742548,
"grad_norm": 10.159829139709473,
"learning_rate": 4.925350473202086e-05,
"loss": 0.5135,
"step": 1772
},
{
"epoch": 0.04489093577610203,
"grad_norm": 2.924187183380127,
"learning_rate": 4.925266123454291e-05,
"loss": 0.2508,
"step": 1774
},
{
"epoch": 0.04494154562477858,
"grad_norm": 6.0558342933654785,
"learning_rate": 4.925181773706497e-05,
"loss": 0.3182,
"step": 1776
},
{
"epoch": 0.044992155473455135,
"grad_norm": 5.556451320648193,
"learning_rate": 4.9250974239587025e-05,
"loss": 0.492,
"step": 1778
},
{
"epoch": 0.04504276532213169,
"grad_norm": 4.185290813446045,
"learning_rate": 4.9250130742109086e-05,
"loss": 0.4214,
"step": 1780
},
{
"epoch": 0.04509337517080824,
"grad_norm": 5.0443196296691895,
"learning_rate": 4.9249287244631146e-05,
"loss": 0.6803,
"step": 1782
},
{
"epoch": 0.04514398501948479,
"grad_norm": 10.363567352294922,
"learning_rate": 4.92484437471532e-05,
"loss": 0.5701,
"step": 1784
},
{
"epoch": 0.045194594868161346,
"grad_norm": 6.5281243324279785,
"learning_rate": 4.924760024967526e-05,
"loss": 0.417,
"step": 1786
},
{
"epoch": 0.0452452047168379,
"grad_norm": 9.699246406555176,
"learning_rate": 4.924675675219731e-05,
"loss": 0.7565,
"step": 1788
},
{
"epoch": 0.04529581456551445,
"grad_norm": 13.578716278076172,
"learning_rate": 4.924591325471937e-05,
"loss": 0.5528,
"step": 1790
},
{
"epoch": 0.045346424414191004,
"grad_norm": 6.151464462280273,
"learning_rate": 4.924506975724143e-05,
"loss": 0.655,
"step": 1792
},
{
"epoch": 0.04539703426286756,
"grad_norm": 11.576047897338867,
"learning_rate": 4.9244226259763487e-05,
"loss": 0.8649,
"step": 1794
},
{
"epoch": 0.04544764411154411,
"grad_norm": 6.639153003692627,
"learning_rate": 4.924338276228555e-05,
"loss": 0.553,
"step": 1796
},
{
"epoch": 0.04549825396022066,
"grad_norm": 4.5320563316345215,
"learning_rate": 4.92425392648076e-05,
"loss": 0.4832,
"step": 1798
},
{
"epoch": 0.04554886380889721,
"grad_norm": 3.708400249481201,
"learning_rate": 4.924169576732966e-05,
"loss": 0.5646,
"step": 1800
},
{
"epoch": 0.04554886380889721,
"eval_cer": 0.19456141992638343,
"eval_loss": 0.29710084199905396,
"eval_runtime": 2630.6631,
"eval_samples_per_second": 5.958,
"eval_steps_per_second": 0.373,
"step": 1800
},
{
"epoch": 0.04559947365757376,
"grad_norm": 4.805381774902344,
"learning_rate": 4.9240852269851714e-05,
"loss": 0.5711,
"step": 1802
},
{
"epoch": 0.045650083506250314,
"grad_norm": 3.130221366882324,
"learning_rate": 4.9240008772373774e-05,
"loss": 0.4231,
"step": 1804
},
{
"epoch": 0.045700693354926866,
"grad_norm": 4.455588340759277,
"learning_rate": 4.9239165274895834e-05,
"loss": 0.3281,
"step": 1806
},
{
"epoch": 0.04575130320360342,
"grad_norm": 2.951422929763794,
"learning_rate": 4.923832177741789e-05,
"loss": 0.4477,
"step": 1808
},
{
"epoch": 0.04580191305227997,
"grad_norm": 5.397090435028076,
"learning_rate": 4.923747827993995e-05,
"loss": 0.4032,
"step": 1810
},
{
"epoch": 0.045852522900956524,
"grad_norm": 5.767248630523682,
"learning_rate": 4.9236634782462e-05,
"loss": 0.6842,
"step": 1812
},
{
"epoch": 0.04590313274963308,
"grad_norm": 3.4613723754882812,
"learning_rate": 4.923579128498406e-05,
"loss": 0.2943,
"step": 1814
},
{
"epoch": 0.04595374259830963,
"grad_norm": 4.715557098388672,
"learning_rate": 4.923494778750612e-05,
"loss": 0.3851,
"step": 1816
},
{
"epoch": 0.04600435244698618,
"grad_norm": 4.709720611572266,
"learning_rate": 4.9234104290028175e-05,
"loss": 0.2846,
"step": 1818
},
{
"epoch": 0.046054962295662735,
"grad_norm": 2.6926629543304443,
"learning_rate": 4.9233260792550235e-05,
"loss": 0.474,
"step": 1820
},
{
"epoch": 0.04610557214433929,
"grad_norm": 7.4501752853393555,
"learning_rate": 4.923241729507229e-05,
"loss": 0.6805,
"step": 1822
},
{
"epoch": 0.04615618199301584,
"grad_norm": 4.941989421844482,
"learning_rate": 4.923157379759435e-05,
"loss": 0.4549,
"step": 1824
},
{
"epoch": 0.04620679184169239,
"grad_norm": 7.671848297119141,
"learning_rate": 4.923073030011641e-05,
"loss": 0.4181,
"step": 1826
},
{
"epoch": 0.046257401690368946,
"grad_norm": 1.92340886592865,
"learning_rate": 4.922988680263846e-05,
"loss": 0.2794,
"step": 1828
},
{
"epoch": 0.0463080115390455,
"grad_norm": 4.5162200927734375,
"learning_rate": 4.922904330516052e-05,
"loss": 0.4803,
"step": 1830
},
{
"epoch": 0.04635862138772205,
"grad_norm": 6.522688388824463,
"learning_rate": 4.9228199807682576e-05,
"loss": 0.5851,
"step": 1832
},
{
"epoch": 0.046409231236398604,
"grad_norm": 3.49273419380188,
"learning_rate": 4.9227356310204636e-05,
"loss": 0.5751,
"step": 1834
},
{
"epoch": 0.04645984108507516,
"grad_norm": 14.866829872131348,
"learning_rate": 4.9226512812726697e-05,
"loss": 0.5211,
"step": 1836
},
{
"epoch": 0.04651045093375171,
"grad_norm": 6.658441066741943,
"learning_rate": 4.922566931524875e-05,
"loss": 0.5325,
"step": 1838
},
{
"epoch": 0.04656106078242826,
"grad_norm": 3.8592264652252197,
"learning_rate": 4.922482581777081e-05,
"loss": 0.5917,
"step": 1840
},
{
"epoch": 0.046611670631104815,
"grad_norm": 10.734745025634766,
"learning_rate": 4.9223982320292864e-05,
"loss": 0.3665,
"step": 1842
},
{
"epoch": 0.04666228047978137,
"grad_norm": 7.1689534187316895,
"learning_rate": 4.9223138822814924e-05,
"loss": 0.386,
"step": 1844
},
{
"epoch": 0.04671289032845792,
"grad_norm": 4.963681697845459,
"learning_rate": 4.9222295325336984e-05,
"loss": 0.4437,
"step": 1846
},
{
"epoch": 0.04676350017713447,
"grad_norm": 2.5440425872802734,
"learning_rate": 4.922145182785904e-05,
"loss": 0.3268,
"step": 1848
},
{
"epoch": 0.046814110025811026,
"grad_norm": 14.03139877319336,
"learning_rate": 4.92206083303811e-05,
"loss": 0.7161,
"step": 1850
},
{
"epoch": 0.04686471987448758,
"grad_norm": 7.766716480255127,
"learning_rate": 4.921976483290315e-05,
"loss": 0.4935,
"step": 1852
},
{
"epoch": 0.046915329723164124,
"grad_norm": 13.12921142578125,
"learning_rate": 4.921892133542521e-05,
"loss": 0.4105,
"step": 1854
},
{
"epoch": 0.04696593957184068,
"grad_norm": 3.441727876663208,
"learning_rate": 4.921807783794727e-05,
"loss": 0.8493,
"step": 1856
},
{
"epoch": 0.04701654942051723,
"grad_norm": 6.923675060272217,
"learning_rate": 4.9217234340469325e-05,
"loss": 0.819,
"step": 1858
},
{
"epoch": 0.04706715926919378,
"grad_norm": 8.306994438171387,
"learning_rate": 4.9216390842991385e-05,
"loss": 0.6548,
"step": 1860
},
{
"epoch": 0.047117769117870335,
"grad_norm": 3.314638614654541,
"learning_rate": 4.921554734551344e-05,
"loss": 0.6389,
"step": 1862
},
{
"epoch": 0.04716837896654689,
"grad_norm": 8.720133781433105,
"learning_rate": 4.92147038480355e-05,
"loss": 0.5129,
"step": 1864
},
{
"epoch": 0.04721898881522344,
"grad_norm": 15.914939880371094,
"learning_rate": 4.921386035055756e-05,
"loss": 0.5298,
"step": 1866
},
{
"epoch": 0.047269598663899993,
"grad_norm": 4.567658424377441,
"learning_rate": 4.921301685307961e-05,
"loss": 0.426,
"step": 1868
},
{
"epoch": 0.047320208512576546,
"grad_norm": 2.2042078971862793,
"learning_rate": 4.921217335560167e-05,
"loss": 0.2168,
"step": 1870
},
{
"epoch": 0.0473708183612531,
"grad_norm": 2.475092649459839,
"learning_rate": 4.9211329858123726e-05,
"loss": 0.5226,
"step": 1872
},
{
"epoch": 0.04742142820992965,
"grad_norm": 3.972266435623169,
"learning_rate": 4.9210486360645786e-05,
"loss": 0.5745,
"step": 1874
},
{
"epoch": 0.047472038058606204,
"grad_norm": 4.532939910888672,
"learning_rate": 4.9209642863167846e-05,
"loss": 0.6833,
"step": 1876
},
{
"epoch": 0.04752264790728276,
"grad_norm": 7.014007568359375,
"learning_rate": 4.92087993656899e-05,
"loss": 0.5458,
"step": 1878
},
{
"epoch": 0.04757325775595931,
"grad_norm": 8.639058113098145,
"learning_rate": 4.920795586821196e-05,
"loss": 0.698,
"step": 1880
},
{
"epoch": 0.04762386760463586,
"grad_norm": 4.487074375152588,
"learning_rate": 4.920711237073401e-05,
"loss": 0.4481,
"step": 1882
},
{
"epoch": 0.047674477453312415,
"grad_norm": 7.813543319702148,
"learning_rate": 4.9206268873256074e-05,
"loss": 0.5677,
"step": 1884
},
{
"epoch": 0.04772508730198897,
"grad_norm": 14.59656047821045,
"learning_rate": 4.920542537577813e-05,
"loss": 0.5587,
"step": 1886
},
{
"epoch": 0.04777569715066552,
"grad_norm": 7.641439437866211,
"learning_rate": 4.920458187830019e-05,
"loss": 0.9166,
"step": 1888
},
{
"epoch": 0.04782630699934207,
"grad_norm": 11.967202186584473,
"learning_rate": 4.920373838082225e-05,
"loss": 0.6675,
"step": 1890
},
{
"epoch": 0.047876916848018626,
"grad_norm": 3.4140710830688477,
"learning_rate": 4.92028948833443e-05,
"loss": 0.5182,
"step": 1892
},
{
"epoch": 0.04792752669669518,
"grad_norm": 11.876256942749023,
"learning_rate": 4.920205138586636e-05,
"loss": 0.5383,
"step": 1894
},
{
"epoch": 0.04797813654537173,
"grad_norm": 3.3766562938690186,
"learning_rate": 4.9201207888388414e-05,
"loss": 0.4677,
"step": 1896
},
{
"epoch": 0.048028746394048284,
"grad_norm": 4.277167320251465,
"learning_rate": 4.9200364390910475e-05,
"loss": 0.4177,
"step": 1898
},
{
"epoch": 0.04807935624272484,
"grad_norm": 5.8467936515808105,
"learning_rate": 4.9199520893432535e-05,
"loss": 0.5603,
"step": 1900
},
{
"epoch": 0.04812996609140139,
"grad_norm": 5.7047038078308105,
"learning_rate": 4.919867739595459e-05,
"loss": 0.416,
"step": 1902
},
{
"epoch": 0.04818057594007794,
"grad_norm": 8.13704776763916,
"learning_rate": 4.919783389847665e-05,
"loss": 0.2622,
"step": 1904
},
{
"epoch": 0.048231185788754495,
"grad_norm": 7.6252923011779785,
"learning_rate": 4.91969904009987e-05,
"loss": 0.9887,
"step": 1906
},
{
"epoch": 0.04828179563743104,
"grad_norm": 6.5191497802734375,
"learning_rate": 4.919614690352076e-05,
"loss": 0.6701,
"step": 1908
},
{
"epoch": 0.048332405486107594,
"grad_norm": 98.16575622558594,
"learning_rate": 4.919530340604282e-05,
"loss": 1.0357,
"step": 1910
},
{
"epoch": 0.048383015334784146,
"grad_norm": 3.221017837524414,
"learning_rate": 4.9194459908564876e-05,
"loss": 0.396,
"step": 1912
},
{
"epoch": 0.0484336251834607,
"grad_norm": 4.679788112640381,
"learning_rate": 4.9193616411086936e-05,
"loss": 0.3766,
"step": 1914
},
{
"epoch": 0.04848423503213725,
"grad_norm": 7.324192047119141,
"learning_rate": 4.919277291360899e-05,
"loss": 0.6765,
"step": 1916
},
{
"epoch": 0.048534844880813804,
"grad_norm": 5.692389488220215,
"learning_rate": 4.919192941613105e-05,
"loss": 0.7648,
"step": 1918
},
{
"epoch": 0.04858545472949036,
"grad_norm": 8.046274185180664,
"learning_rate": 4.919108591865311e-05,
"loss": 0.717,
"step": 1920
},
{
"epoch": 0.04863606457816691,
"grad_norm": 8.431305885314941,
"learning_rate": 4.919024242117516e-05,
"loss": 0.5176,
"step": 1922
},
{
"epoch": 0.04868667442684346,
"grad_norm": 4.852190017700195,
"learning_rate": 4.918939892369722e-05,
"loss": 0.4792,
"step": 1924
},
{
"epoch": 0.048737284275520015,
"grad_norm": 3.7859811782836914,
"learning_rate": 4.918855542621928e-05,
"loss": 0.4534,
"step": 1926
},
{
"epoch": 0.04878789412419657,
"grad_norm": 3.5033421516418457,
"learning_rate": 4.918771192874134e-05,
"loss": 0.5004,
"step": 1928
},
{
"epoch": 0.04883850397287312,
"grad_norm": 19.82146453857422,
"learning_rate": 4.91868684312634e-05,
"loss": 0.7809,
"step": 1930
},
{
"epoch": 0.04888911382154967,
"grad_norm": 7.877125263214111,
"learning_rate": 4.918602493378545e-05,
"loss": 0.4266,
"step": 1932
},
{
"epoch": 0.048939723670226226,
"grad_norm": 9.093170166015625,
"learning_rate": 4.918518143630751e-05,
"loss": 0.7279,
"step": 1934
},
{
"epoch": 0.04899033351890278,
"grad_norm": 6.574698448181152,
"learning_rate": 4.9184337938829564e-05,
"loss": 0.4833,
"step": 1936
},
{
"epoch": 0.04904094336757933,
"grad_norm": 4.608344078063965,
"learning_rate": 4.9183494441351624e-05,
"loss": 0.5268,
"step": 1938
},
{
"epoch": 0.049091553216255884,
"grad_norm": 8.013690948486328,
"learning_rate": 4.9182650943873685e-05,
"loss": 0.5434,
"step": 1940
},
{
"epoch": 0.04914216306493244,
"grad_norm": 4.216137409210205,
"learning_rate": 4.918180744639574e-05,
"loss": 0.4351,
"step": 1942
},
{
"epoch": 0.04919277291360899,
"grad_norm": 7.3226776123046875,
"learning_rate": 4.91809639489178e-05,
"loss": 0.3895,
"step": 1944
},
{
"epoch": 0.04924338276228554,
"grad_norm": 4.8168840408325195,
"learning_rate": 4.918012045143985e-05,
"loss": 0.3544,
"step": 1946
},
{
"epoch": 0.049293992610962095,
"grad_norm": 8.331216812133789,
"learning_rate": 4.917927695396191e-05,
"loss": 0.2721,
"step": 1948
},
{
"epoch": 0.04934460245963865,
"grad_norm": 10.902067184448242,
"learning_rate": 4.917843345648397e-05,
"loss": 0.4973,
"step": 1950
},
{
"epoch": 0.0493952123083152,
"grad_norm": 5.606088161468506,
"learning_rate": 4.9177589959006025e-05,
"loss": 0.5057,
"step": 1952
},
{
"epoch": 0.04944582215699175,
"grad_norm": 14.297886848449707,
"learning_rate": 4.9176746461528086e-05,
"loss": 0.6632,
"step": 1954
},
{
"epoch": 0.049496432005668306,
"grad_norm": 5.292407989501953,
"learning_rate": 4.917590296405014e-05,
"loss": 0.8784,
"step": 1956
},
{
"epoch": 0.04954704185434486,
"grad_norm": 2.2982068061828613,
"learning_rate": 4.91750594665722e-05,
"loss": 0.3005,
"step": 1958
},
{
"epoch": 0.049597651703021405,
"grad_norm": 5.021894931793213,
"learning_rate": 4.917421596909426e-05,
"loss": 0.5095,
"step": 1960
},
{
"epoch": 0.04964826155169796,
"grad_norm": 6.863503932952881,
"learning_rate": 4.917337247161631e-05,
"loss": 0.5115,
"step": 1962
},
{
"epoch": 0.04969887140037451,
"grad_norm": 8.457030296325684,
"learning_rate": 4.917252897413837e-05,
"loss": 0.6833,
"step": 1964
},
{
"epoch": 0.04974948124905106,
"grad_norm": 7.3646674156188965,
"learning_rate": 4.9171685476660426e-05,
"loss": 0.6812,
"step": 1966
},
{
"epoch": 0.049800091097727615,
"grad_norm": 4.478794574737549,
"learning_rate": 4.917084197918249e-05,
"loss": 0.3277,
"step": 1968
},
{
"epoch": 0.04985070094640417,
"grad_norm": 8.332141876220703,
"learning_rate": 4.916999848170454e-05,
"loss": 0.3486,
"step": 1970
},
{
"epoch": 0.04990131079508072,
"grad_norm": 5.418234348297119,
"learning_rate": 4.91691549842266e-05,
"loss": 0.5811,
"step": 1972
},
{
"epoch": 0.049951920643757274,
"grad_norm": 7.849460124969482,
"learning_rate": 4.916831148674866e-05,
"loss": 0.9629,
"step": 1974
},
{
"epoch": 0.050002530492433826,
"grad_norm": 4.812062740325928,
"learning_rate": 4.9167467989270714e-05,
"loss": 0.4392,
"step": 1976
},
{
"epoch": 0.05005314034111038,
"grad_norm": 7.80704402923584,
"learning_rate": 4.9166624491792774e-05,
"loss": 0.9341,
"step": 1978
},
{
"epoch": 0.05010375018978693,
"grad_norm": 4.2226243019104,
"learning_rate": 4.916578099431483e-05,
"loss": 0.8323,
"step": 1980
},
{
"epoch": 0.050154360038463484,
"grad_norm": 5.1408867835998535,
"learning_rate": 4.916493749683689e-05,
"loss": 0.3631,
"step": 1982
},
{
"epoch": 0.05020496988714004,
"grad_norm": 6.943533897399902,
"learning_rate": 4.916409399935895e-05,
"loss": 0.4674,
"step": 1984
},
{
"epoch": 0.05025557973581659,
"grad_norm": 2.5346226692199707,
"learning_rate": 4.9163250501881e-05,
"loss": 0.3056,
"step": 1986
},
{
"epoch": 0.05030618958449314,
"grad_norm": 7.4098944664001465,
"learning_rate": 4.916240700440306e-05,
"loss": 0.7281,
"step": 1988
},
{
"epoch": 0.050356799433169695,
"grad_norm": 5.00679349899292,
"learning_rate": 4.9161563506925115e-05,
"loss": 0.5875,
"step": 1990
},
{
"epoch": 0.05040740928184625,
"grad_norm": 7.140769004821777,
"learning_rate": 4.9160720009447175e-05,
"loss": 0.4304,
"step": 1992
},
{
"epoch": 0.0504580191305228,
"grad_norm": 4.408178806304932,
"learning_rate": 4.9159876511969235e-05,
"loss": 0.3655,
"step": 1994
},
{
"epoch": 0.05050862897919935,
"grad_norm": 6.3958353996276855,
"learning_rate": 4.915903301449129e-05,
"loss": 0.5034,
"step": 1996
},
{
"epoch": 0.050559238827875906,
"grad_norm": 11.404850959777832,
"learning_rate": 4.915818951701335e-05,
"loss": 0.6704,
"step": 1998
},
{
"epoch": 0.05060984867655246,
"grad_norm": 22.483257293701172,
"learning_rate": 4.91573460195354e-05,
"loss": 0.5152,
"step": 2000
},
{
"epoch": 0.05060984867655246,
"eval_cer": 0.1947745752490278,
"eval_loss": 0.306193083524704,
"eval_runtime": 2641.4126,
"eval_samples_per_second": 5.934,
"eval_steps_per_second": 0.371,
"step": 2000
}
],
"logging_steps": 2,
"max_steps": 118554,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 4.299636354947482e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}