{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999990712551615, "eval_steps": 500, "global_step": 44863, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001114493806200672, "grad_norm": 0.933451771736145, "learning_rate": 1.9999999387038595e-05, "loss": 0.7486, "step": 5 }, { "epoch": 0.0002228987612401344, "grad_norm": 0.8256990313529968, "learning_rate": 1.999999754815445e-05, "loss": 0.6341, "step": 10 }, { "epoch": 0.00033434814186020164, "grad_norm": 0.9633138179779053, "learning_rate": 1.99999944833478e-05, "loss": 0.5651, "step": 15 }, { "epoch": 0.0004457975224802688, "grad_norm": 0.5811278820037842, "learning_rate": 1.999999019261901e-05, "loss": 0.6119, "step": 20 }, { "epoch": 0.000557246903100336, "grad_norm": 0.7817385196685791, "learning_rate": 1.999998467596861e-05, "loss": 0.5496, "step": 25 }, { "epoch": 0.0006686962837204033, "grad_norm": 0.9045018553733826, "learning_rate": 1.999997793339728e-05, "loss": 0.6103, "step": 30 }, { "epoch": 0.0007801456643404704, "grad_norm": 0.8104248046875, "learning_rate": 1.999996996490584e-05, "loss": 0.5454, "step": 35 }, { "epoch": 0.0008915950449605376, "grad_norm": 0.6950963139533997, "learning_rate": 1.9999960770495273e-05, "loss": 0.4731, "step": 40 }, { "epoch": 0.0010030444255806049, "grad_norm": 1.1192351579666138, "learning_rate": 1.9999950350166698e-05, "loss": 0.4454, "step": 45 }, { "epoch": 0.001114493806200672, "grad_norm": 0.8689408302307129, "learning_rate": 1.9999938703921403e-05, "loss": 0.4379, "step": 50 }, { "epoch": 0.0012259431868207392, "grad_norm": 0.7909908294677734, "learning_rate": 1.9999925831760807e-05, "loss": 0.4556, "step": 55 }, { "epoch": 0.0013373925674408065, "grad_norm": 0.8446680903434753, "learning_rate": 1.9999911733686496e-05, "loss": 0.4575, "step": 60 }, { "epoch": 0.0014488419480608737, "grad_norm": 1.1718509197235107, "learning_rate": 1.999989640970019e-05, "loss": 0.6243, "step": 65 }, { "epoch": 0.0015602913286809409, "grad_norm": 0.540774405002594, "learning_rate": 1.9999879859803775e-05, "loss": 0.4596, "step": 70 }, { "epoch": 0.001671740709301008, "grad_norm": 0.8521000742912292, "learning_rate": 1.9999862083999276e-05, "loss": 0.6575, "step": 75 }, { "epoch": 0.0017831900899210752, "grad_norm": 0.9445694088935852, "learning_rate": 1.999984308228887e-05, "loss": 0.5966, "step": 80 }, { "epoch": 0.0018946394705411426, "grad_norm": 0.7841966152191162, "learning_rate": 1.999982285467489e-05, "loss": 0.4704, "step": 85 }, { "epoch": 0.0020060888511612097, "grad_norm": 0.7752586603164673, "learning_rate": 1.9999801401159815e-05, "loss": 0.609, "step": 90 }, { "epoch": 0.002117538231781277, "grad_norm": 0.6920070648193359, "learning_rate": 1.9999778721746276e-05, "loss": 0.4078, "step": 95 }, { "epoch": 0.002228987612401344, "grad_norm": 0.8577648401260376, "learning_rate": 1.9999754816437052e-05, "loss": 0.548, "step": 100 }, { "epoch": 0.002340436993021411, "grad_norm": 0.7144039869308472, "learning_rate": 1.999972968523507e-05, "loss": 0.5046, "step": 105 }, { "epoch": 0.0024518863736414783, "grad_norm": 0.8289719223976135, "learning_rate": 1.999970332814342e-05, "loss": 0.5335, "step": 110 }, { "epoch": 0.0025633357542615455, "grad_norm": 0.7193917632102966, "learning_rate": 1.9999675745165322e-05, "loss": 0.493, "step": 115 }, { "epoch": 0.002674785134881613, "grad_norm": 0.7550503015518188, "learning_rate": 1.999964693630417e-05, "loss": 0.5015, "step": 120 }, { "epoch": 0.0027862345155016803, "grad_norm": 0.7383673191070557, "learning_rate": 1.9999616901563486e-05, "loss": 0.4139, "step": 125 }, { "epoch": 0.0028976838961217474, "grad_norm": 0.5490477085113525, "learning_rate": 1.999958564094695e-05, "loss": 0.5196, "step": 130 }, { "epoch": 0.0030091332767418146, "grad_norm": 0.6082173585891724, "learning_rate": 1.9999553154458405e-05, "loss": 0.5463, "step": 135 }, { "epoch": 0.0031205826573618817, "grad_norm": 0.7065110206604004, "learning_rate": 1.999951944210183e-05, "loss": 0.6414, "step": 140 }, { "epoch": 0.003232032037981949, "grad_norm": 0.9052994847297668, "learning_rate": 1.9999484503881354e-05, "loss": 0.5787, "step": 145 }, { "epoch": 0.003343481418602016, "grad_norm": 0.8806249499320984, "learning_rate": 1.9999448339801258e-05, "loss": 0.5622, "step": 150 }, { "epoch": 0.003454930799222083, "grad_norm": 0.8756687045097351, "learning_rate": 1.9999410949865984e-05, "loss": 0.5655, "step": 155 }, { "epoch": 0.0035663801798421504, "grad_norm": 0.5419703722000122, "learning_rate": 1.9999372334080108e-05, "loss": 0.4233, "step": 160 }, { "epoch": 0.0036778295604622175, "grad_norm": 0.6752704977989197, "learning_rate": 1.9999332492448368e-05, "loss": 0.5489, "step": 165 }, { "epoch": 0.003789278941082285, "grad_norm": 0.8393696546554565, "learning_rate": 1.999929142497565e-05, "loss": 0.4443, "step": 170 }, { "epoch": 0.0039007283217023523, "grad_norm": 0.6518959403038025, "learning_rate": 1.9999249131666983e-05, "loss": 0.5434, "step": 175 }, { "epoch": 0.004012177702322419, "grad_norm": 0.7919310331344604, "learning_rate": 1.9999205612527556e-05, "loss": 0.4413, "step": 180 }, { "epoch": 0.004123627082942487, "grad_norm": 0.7572973966598511, "learning_rate": 1.9999160867562702e-05, "loss": 0.4197, "step": 185 }, { "epoch": 0.004235076463562554, "grad_norm": 0.8709051012992859, "learning_rate": 1.999911489677791e-05, "loss": 0.53, "step": 190 }, { "epoch": 0.004346525844182621, "grad_norm": 0.7445703744888306, "learning_rate": 1.999906770017881e-05, "loss": 0.4986, "step": 195 }, { "epoch": 0.004457975224802688, "grad_norm": 0.7933171391487122, "learning_rate": 1.9999019277771192e-05, "loss": 0.5453, "step": 200 }, { "epoch": 0.004569424605422755, "grad_norm": 0.5811159610748291, "learning_rate": 1.999896962956099e-05, "loss": 0.5268, "step": 205 }, { "epoch": 0.004680873986042822, "grad_norm": 0.7632311582565308, "learning_rate": 1.9998918755554295e-05, "loss": 0.548, "step": 210 }, { "epoch": 0.0047923233666628895, "grad_norm": 0.6502264142036438, "learning_rate": 1.999886665575734e-05, "loss": 0.6303, "step": 215 }, { "epoch": 0.004903772747282957, "grad_norm": 0.7639764547348022, "learning_rate": 1.9998813330176507e-05, "loss": 0.4808, "step": 220 }, { "epoch": 0.005015222127903024, "grad_norm": 0.6532260179519653, "learning_rate": 1.9998758778818342e-05, "loss": 0.5684, "step": 225 }, { "epoch": 0.005126671508523091, "grad_norm": 0.7039293646812439, "learning_rate": 1.999870300168953e-05, "loss": 0.4445, "step": 230 }, { "epoch": 0.005238120889143158, "grad_norm": 0.5982739329338074, "learning_rate": 1.999864599879691e-05, "loss": 0.449, "step": 235 }, { "epoch": 0.005349570269763226, "grad_norm": 0.7111895084381104, "learning_rate": 1.9998587770147465e-05, "loss": 0.5256, "step": 240 }, { "epoch": 0.005461019650383293, "grad_norm": 0.5618811249732971, "learning_rate": 1.9998528315748338e-05, "loss": 0.4843, "step": 245 }, { "epoch": 0.0055724690310033605, "grad_norm": 0.7400057315826416, "learning_rate": 1.9998467635606813e-05, "loss": 0.5675, "step": 250 }, { "epoch": 0.005683918411623428, "grad_norm": 0.9986342191696167, "learning_rate": 1.9998405729730338e-05, "loss": 0.6181, "step": 255 }, { "epoch": 0.005795367792243495, "grad_norm": 0.7148557305335999, "learning_rate": 1.999834259812649e-05, "loss": 0.451, "step": 260 }, { "epoch": 0.005906817172863562, "grad_norm": 0.6029154062271118, "learning_rate": 1.9998278240803018e-05, "loss": 0.4362, "step": 265 }, { "epoch": 0.006018266553483629, "grad_norm": 0.7607182264328003, "learning_rate": 1.999821265776781e-05, "loss": 0.5407, "step": 270 }, { "epoch": 0.006129715934103696, "grad_norm": 0.7768608927726746, "learning_rate": 1.9998145849028906e-05, "loss": 0.7043, "step": 275 }, { "epoch": 0.0062411653147237635, "grad_norm": 0.6231095790863037, "learning_rate": 1.999807781459449e-05, "loss": 0.4728, "step": 280 }, { "epoch": 0.006352614695343831, "grad_norm": 0.5471903085708618, "learning_rate": 1.999800855447291e-05, "loss": 0.4453, "step": 285 }, { "epoch": 0.006464064075963898, "grad_norm": 0.8246671557426453, "learning_rate": 1.9997938068672652e-05, "loss": 0.5344, "step": 290 }, { "epoch": 0.006575513456583965, "grad_norm": 0.5547112822532654, "learning_rate": 1.999786635720236e-05, "loss": 0.4327, "step": 295 }, { "epoch": 0.006686962837204032, "grad_norm": 0.7208492755889893, "learning_rate": 1.9997793420070826e-05, "loss": 0.5299, "step": 300 }, { "epoch": 0.006798412217824099, "grad_norm": 0.7412981390953064, "learning_rate": 1.9997719257286988e-05, "loss": 0.5218, "step": 305 }, { "epoch": 0.006909861598444166, "grad_norm": 0.7224171161651611, "learning_rate": 1.999764386885994e-05, "loss": 0.5424, "step": 310 }, { "epoch": 0.0070213109790642336, "grad_norm": 0.6104803681373596, "learning_rate": 1.9997567254798925e-05, "loss": 0.5609, "step": 315 }, { "epoch": 0.007132760359684301, "grad_norm": 0.6950530409812927, "learning_rate": 1.999748941511333e-05, "loss": 0.4333, "step": 320 }, { "epoch": 0.007244209740304368, "grad_norm": 1.157394289970398, "learning_rate": 1.9997410349812705e-05, "loss": 0.4037, "step": 325 }, { "epoch": 0.007355659120924435, "grad_norm": 0.6917374134063721, "learning_rate": 1.9997330058906738e-05, "loss": 0.3879, "step": 330 }, { "epoch": 0.007467108501544503, "grad_norm": 0.7056661248207092, "learning_rate": 1.9997248542405273e-05, "loss": 0.4963, "step": 335 }, { "epoch": 0.00757855788216457, "grad_norm": 0.8213403820991516, "learning_rate": 1.9997165800318304e-05, "loss": 0.4699, "step": 340 }, { "epoch": 0.007690007262784637, "grad_norm": 0.7679407000541687, "learning_rate": 1.9997081832655976e-05, "loss": 0.4822, "step": 345 }, { "epoch": 0.0078014566434047045, "grad_norm": 0.9080938100814819, "learning_rate": 1.9996996639428578e-05, "loss": 0.4325, "step": 350 }, { "epoch": 0.00791290602402477, "grad_norm": 0.5678904056549072, "learning_rate": 1.999691022064656e-05, "loss": 0.4739, "step": 355 }, { "epoch": 0.008024355404644839, "grad_norm": 0.567152202129364, "learning_rate": 1.999682257632051e-05, "loss": 0.4516, "step": 360 }, { "epoch": 0.008135804785264905, "grad_norm": 0.7391321659088135, "learning_rate": 1.999673370646118e-05, "loss": 0.4987, "step": 365 }, { "epoch": 0.008247254165884973, "grad_norm": 0.7149109840393066, "learning_rate": 1.9996643611079457e-05, "loss": 0.6195, "step": 370 }, { "epoch": 0.00835870354650504, "grad_norm": 0.7918938994407654, "learning_rate": 1.999655229018639e-05, "loss": 0.4269, "step": 375 }, { "epoch": 0.008470152927125107, "grad_norm": 0.6578711271286011, "learning_rate": 1.9996459743793175e-05, "loss": 0.4386, "step": 380 }, { "epoch": 0.008581602307745176, "grad_norm": 0.518247663974762, "learning_rate": 1.9996365971911155e-05, "loss": 0.4827, "step": 385 }, { "epoch": 0.008693051688365242, "grad_norm": 0.8139423131942749, "learning_rate": 1.9996270974551824e-05, "loss": 0.4481, "step": 390 }, { "epoch": 0.00880450106898531, "grad_norm": 0.728428304195404, "learning_rate": 1.999617475172684e-05, "loss": 0.4442, "step": 395 }, { "epoch": 0.008915950449605376, "grad_norm": 0.6173394918441772, "learning_rate": 1.999607730344798e-05, "loss": 0.6014, "step": 400 }, { "epoch": 0.009027399830225444, "grad_norm": 0.5675866603851318, "learning_rate": 1.9995978629727207e-05, "loss": 0.5472, "step": 405 }, { "epoch": 0.00913884921084551, "grad_norm": 0.8469188809394836, "learning_rate": 1.999587873057661e-05, "loss": 0.4522, "step": 410 }, { "epoch": 0.009250298591465578, "grad_norm": 0.5388155579566956, "learning_rate": 1.9995777606008434e-05, "loss": 0.3067, "step": 415 }, { "epoch": 0.009361747972085645, "grad_norm": 0.6457868218421936, "learning_rate": 1.9995675256035082e-05, "loss": 0.4966, "step": 420 }, { "epoch": 0.009473197352705713, "grad_norm": 0.586306095123291, "learning_rate": 1.9995571680669096e-05, "loss": 0.5018, "step": 425 }, { "epoch": 0.009584646733325779, "grad_norm": 0.797963559627533, "learning_rate": 1.999546687992318e-05, "loss": 0.4784, "step": 430 }, { "epoch": 0.009696096113945847, "grad_norm": 0.6302655339241028, "learning_rate": 1.9995360853810172e-05, "loss": 0.3656, "step": 435 }, { "epoch": 0.009807545494565913, "grad_norm": 0.6582913398742676, "learning_rate": 1.9995253602343082e-05, "loss": 0.6217, "step": 440 }, { "epoch": 0.009918994875185981, "grad_norm": 0.663432776927948, "learning_rate": 1.9995145125535047e-05, "loss": 0.368, "step": 445 }, { "epoch": 0.010030444255806048, "grad_norm": 0.712199330329895, "learning_rate": 1.9995035423399373e-05, "loss": 0.6665, "step": 450 }, { "epoch": 0.010141893636426116, "grad_norm": 0.6557328104972839, "learning_rate": 1.9994924495949503e-05, "loss": 0.5076, "step": 455 }, { "epoch": 0.010253343017046182, "grad_norm": 0.7125101685523987, "learning_rate": 1.999481234319904e-05, "loss": 0.4728, "step": 460 }, { "epoch": 0.01036479239766625, "grad_norm": 0.6319306492805481, "learning_rate": 1.9994698965161736e-05, "loss": 0.5616, "step": 465 }, { "epoch": 0.010476241778286316, "grad_norm": 0.6128279566764832, "learning_rate": 1.999458436185148e-05, "loss": 0.4844, "step": 470 }, { "epoch": 0.010587691158906384, "grad_norm": 0.6070976853370667, "learning_rate": 1.999446853328233e-05, "loss": 0.551, "step": 475 }, { "epoch": 0.010699140539526452, "grad_norm": 0.9927272796630859, "learning_rate": 1.9994351479468484e-05, "loss": 0.477, "step": 480 }, { "epoch": 0.010810589920146519, "grad_norm": 0.6825286746025085, "learning_rate": 1.9994233200424288e-05, "loss": 0.5127, "step": 485 }, { "epoch": 0.010922039300766587, "grad_norm": 0.6259206533432007, "learning_rate": 1.999411369616425e-05, "loss": 0.5682, "step": 490 }, { "epoch": 0.011033488681386653, "grad_norm": 0.6004756689071655, "learning_rate": 1.9993992966703012e-05, "loss": 0.557, "step": 495 }, { "epoch": 0.011144938062006721, "grad_norm": 0.4804564416408539, "learning_rate": 1.999387101205538e-05, "loss": 0.4895, "step": 500 }, { "epoch": 0.011256387442626787, "grad_norm": 0.8419182896614075, "learning_rate": 1.99937478322363e-05, "loss": 0.5776, "step": 505 }, { "epoch": 0.011367836823246855, "grad_norm": 0.6336909532546997, "learning_rate": 1.999362342726088e-05, "loss": 0.4448, "step": 510 }, { "epoch": 0.011479286203866922, "grad_norm": 0.7228277921676636, "learning_rate": 1.9993497797144363e-05, "loss": 0.6556, "step": 515 }, { "epoch": 0.01159073558448699, "grad_norm": 0.7878555655479431, "learning_rate": 1.9993370941902154e-05, "loss": 0.4965, "step": 520 }, { "epoch": 0.011702184965107056, "grad_norm": 0.7645102739334106, "learning_rate": 1.999324286154981e-05, "loss": 0.3559, "step": 525 }, { "epoch": 0.011813634345727124, "grad_norm": 0.8857200145721436, "learning_rate": 1.9993113556103024e-05, "loss": 0.3633, "step": 530 }, { "epoch": 0.01192508372634719, "grad_norm": 0.6778533458709717, "learning_rate": 1.9992983025577647e-05, "loss": 0.4346, "step": 535 }, { "epoch": 0.012036533106967258, "grad_norm": 0.5672030448913574, "learning_rate": 1.9992851269989687e-05, "loss": 0.488, "step": 540 }, { "epoch": 0.012147982487587325, "grad_norm": 0.5006288886070251, "learning_rate": 1.9992718289355296e-05, "loss": 0.4945, "step": 545 }, { "epoch": 0.012259431868207393, "grad_norm": 0.6481898427009583, "learning_rate": 1.9992584083690777e-05, "loss": 0.477, "step": 550 }, { "epoch": 0.012370881248827459, "grad_norm": 0.5722131729125977, "learning_rate": 1.9992448653012576e-05, "loss": 0.3468, "step": 555 }, { "epoch": 0.012482330629447527, "grad_norm": 0.6108018755912781, "learning_rate": 1.9992311997337302e-05, "loss": 0.4508, "step": 560 }, { "epoch": 0.012593780010067593, "grad_norm": 0.5560242533683777, "learning_rate": 1.9992174116681706e-05, "loss": 0.4088, "step": 565 }, { "epoch": 0.012705229390687661, "grad_norm": 0.7010998725891113, "learning_rate": 1.999203501106269e-05, "loss": 0.4018, "step": 570 }, { "epoch": 0.01281667877130773, "grad_norm": 0.7132105827331543, "learning_rate": 1.999189468049731e-05, "loss": 0.4272, "step": 575 }, { "epoch": 0.012928128151927796, "grad_norm": 0.628948986530304, "learning_rate": 1.9991753125002766e-05, "loss": 0.594, "step": 580 }, { "epoch": 0.013039577532547864, "grad_norm": 0.6215829849243164, "learning_rate": 1.9991610344596416e-05, "loss": 0.5651, "step": 585 }, { "epoch": 0.01315102691316793, "grad_norm": 0.47377511858940125, "learning_rate": 1.9991466339295758e-05, "loss": 0.514, "step": 590 }, { "epoch": 0.013262476293787998, "grad_norm": 0.6468937397003174, "learning_rate": 1.999132110911845e-05, "loss": 0.4809, "step": 595 }, { "epoch": 0.013373925674408064, "grad_norm": 0.5864740014076233, "learning_rate": 1.9991174654082296e-05, "loss": 0.4404, "step": 600 }, { "epoch": 0.013485375055028132, "grad_norm": 0.6367695331573486, "learning_rate": 1.9991026974205247e-05, "loss": 0.3821, "step": 605 }, { "epoch": 0.013596824435648198, "grad_norm": 0.6551821231842041, "learning_rate": 1.999087806950541e-05, "loss": 0.4741, "step": 610 }, { "epoch": 0.013708273816268267, "grad_norm": 0.6759845614433289, "learning_rate": 1.9990727940001043e-05, "loss": 0.447, "step": 615 }, { "epoch": 0.013819723196888333, "grad_norm": 0.5439283847808838, "learning_rate": 1.9990576585710543e-05, "loss": 0.3345, "step": 620 }, { "epoch": 0.0139311725775084, "grad_norm": 0.7672117948532104, "learning_rate": 1.9990424006652475e-05, "loss": 0.4169, "step": 625 }, { "epoch": 0.014042621958128467, "grad_norm": 0.5180301070213318, "learning_rate": 1.999027020284553e-05, "loss": 0.5224, "step": 630 }, { "epoch": 0.014154071338748535, "grad_norm": 0.5923333168029785, "learning_rate": 1.9990115174308577e-05, "loss": 0.3539, "step": 635 }, { "epoch": 0.014265520719368601, "grad_norm": 0.5843740105628967, "learning_rate": 1.9989958921060613e-05, "loss": 0.4949, "step": 640 }, { "epoch": 0.01437697009998867, "grad_norm": 0.6855534911155701, "learning_rate": 1.9989801443120796e-05, "loss": 0.4121, "step": 645 }, { "epoch": 0.014488419480608736, "grad_norm": 0.4852747321128845, "learning_rate": 1.9989642740508435e-05, "loss": 0.5217, "step": 650 }, { "epoch": 0.014599868861228804, "grad_norm": 0.541433572769165, "learning_rate": 1.998948281324298e-05, "loss": 0.4084, "step": 655 }, { "epoch": 0.01471131824184887, "grad_norm": 0.7746926546096802, "learning_rate": 1.9989321661344036e-05, "loss": 0.394, "step": 660 }, { "epoch": 0.014822767622468938, "grad_norm": 0.6820917129516602, "learning_rate": 1.9989159284831365e-05, "loss": 0.5741, "step": 665 }, { "epoch": 0.014934217003089006, "grad_norm": 0.7556787133216858, "learning_rate": 1.998899568372487e-05, "loss": 0.4957, "step": 670 }, { "epoch": 0.015045666383709072, "grad_norm": 0.49043309688568115, "learning_rate": 1.998883085804461e-05, "loss": 0.3866, "step": 675 }, { "epoch": 0.01515711576432914, "grad_norm": 0.5872183442115784, "learning_rate": 1.9988664807810784e-05, "loss": 0.5766, "step": 680 }, { "epoch": 0.015268565144949207, "grad_norm": 0.6925109028816223, "learning_rate": 1.9988497533043755e-05, "loss": 0.4167, "step": 685 }, { "epoch": 0.015380014525569275, "grad_norm": 0.7696684002876282, "learning_rate": 1.998832903376403e-05, "loss": 0.5314, "step": 690 }, { "epoch": 0.015491463906189341, "grad_norm": 0.5626276135444641, "learning_rate": 1.998815930999226e-05, "loss": 0.5398, "step": 695 }, { "epoch": 0.015602913286809409, "grad_norm": 0.7252472639083862, "learning_rate": 1.998798836174926e-05, "loss": 0.3591, "step": 700 }, { "epoch": 0.015714362667429475, "grad_norm": 0.6303166747093201, "learning_rate": 1.9987816189055977e-05, "loss": 0.555, "step": 705 }, { "epoch": 0.01582581204804954, "grad_norm": 0.5740355849266052, "learning_rate": 1.9987642791933525e-05, "loss": 0.4974, "step": 710 }, { "epoch": 0.01593726142866961, "grad_norm": 0.49339422583580017, "learning_rate": 1.998746817040316e-05, "loss": 0.4822, "step": 715 }, { "epoch": 0.016048710809289678, "grad_norm": 0.6468168497085571, "learning_rate": 1.998729232448629e-05, "loss": 0.4763, "step": 720 }, { "epoch": 0.016160160189909744, "grad_norm": 0.7275257706642151, "learning_rate": 1.998711525420447e-05, "loss": 0.3596, "step": 725 }, { "epoch": 0.01627160957052981, "grad_norm": 0.673309862613678, "learning_rate": 1.9986936959579406e-05, "loss": 0.5256, "step": 730 }, { "epoch": 0.01638305895114988, "grad_norm": 0.6239281296730042, "learning_rate": 1.998675744063296e-05, "loss": 0.4836, "step": 735 }, { "epoch": 0.016494508331769946, "grad_norm": 0.47714993357658386, "learning_rate": 1.9986576697387142e-05, "loss": 0.4293, "step": 740 }, { "epoch": 0.016605957712390013, "grad_norm": 0.6969321370124817, "learning_rate": 1.9986394729864102e-05, "loss": 0.4619, "step": 745 }, { "epoch": 0.01671740709301008, "grad_norm": 0.6592143177986145, "learning_rate": 1.998621153808615e-05, "loss": 0.4271, "step": 750 }, { "epoch": 0.01682885647363015, "grad_norm": 0.3665013611316681, "learning_rate": 1.9986027122075746e-05, "loss": 0.4901, "step": 755 }, { "epoch": 0.016940305854250215, "grad_norm": 0.5377305150032043, "learning_rate": 1.9985841481855495e-05, "loss": 0.4967, "step": 760 }, { "epoch": 0.01705175523487028, "grad_norm": 0.5931578278541565, "learning_rate": 1.998565461744816e-05, "loss": 0.3903, "step": 765 }, { "epoch": 0.01716320461549035, "grad_norm": 0.7513961791992188, "learning_rate": 1.9985466528876646e-05, "loss": 0.5286, "step": 770 }, { "epoch": 0.017274653996110417, "grad_norm": 0.708732008934021, "learning_rate": 1.998527721616401e-05, "loss": 0.5049, "step": 775 }, { "epoch": 0.017386103376730484, "grad_norm": 0.5988253951072693, "learning_rate": 1.9985086679333462e-05, "loss": 0.4331, "step": 780 }, { "epoch": 0.01749755275735055, "grad_norm": 0.6259390711784363, "learning_rate": 1.998489491840836e-05, "loss": 0.4295, "step": 785 }, { "epoch": 0.01760900213797062, "grad_norm": 0.9363119602203369, "learning_rate": 1.9984701933412217e-05, "loss": 0.4609, "step": 790 }, { "epoch": 0.017720451518590686, "grad_norm": 0.6658714413642883, "learning_rate": 1.9984507724368682e-05, "loss": 0.5327, "step": 795 }, { "epoch": 0.017831900899210752, "grad_norm": 0.6230865716934204, "learning_rate": 1.998431229130157e-05, "loss": 0.561, "step": 800 }, { "epoch": 0.01794335027983082, "grad_norm": 0.44577354192733765, "learning_rate": 1.998411563423484e-05, "loss": 0.4457, "step": 805 }, { "epoch": 0.01805479966045089, "grad_norm": 0.6300340890884399, "learning_rate": 1.9983917753192596e-05, "loss": 0.5296, "step": 810 }, { "epoch": 0.018166249041070955, "grad_norm": 0.6478850245475769, "learning_rate": 1.99837186481991e-05, "loss": 0.4569, "step": 815 }, { "epoch": 0.01827769842169102, "grad_norm": 0.5392821431159973, "learning_rate": 1.998351831927876e-05, "loss": 0.4988, "step": 820 }, { "epoch": 0.018389147802311087, "grad_norm": 0.6933488845825195, "learning_rate": 1.998331676645614e-05, "loss": 0.5505, "step": 825 }, { "epoch": 0.018500597182931157, "grad_norm": 0.6098037362098694, "learning_rate": 1.9983113989755936e-05, "loss": 0.4964, "step": 830 }, { "epoch": 0.018612046563551223, "grad_norm": 0.7463369369506836, "learning_rate": 1.9982909989203022e-05, "loss": 0.5317, "step": 835 }, { "epoch": 0.01872349594417129, "grad_norm": 0.5679046511650085, "learning_rate": 1.9982704764822394e-05, "loss": 0.4199, "step": 840 }, { "epoch": 0.018834945324791356, "grad_norm": 0.5825939178466797, "learning_rate": 1.9982498316639217e-05, "loss": 0.4962, "step": 845 }, { "epoch": 0.018946394705411426, "grad_norm": 0.771053671836853, "learning_rate": 1.9982290644678805e-05, "loss": 0.4801, "step": 850 }, { "epoch": 0.019057844086031492, "grad_norm": 0.9269528388977051, "learning_rate": 1.9982081748966604e-05, "loss": 0.4249, "step": 855 }, { "epoch": 0.019169293466651558, "grad_norm": 0.5291380882263184, "learning_rate": 1.9981871629528236e-05, "loss": 0.4816, "step": 860 }, { "epoch": 0.019280742847271628, "grad_norm": 0.8677108287811279, "learning_rate": 1.9981660286389452e-05, "loss": 0.5593, "step": 865 }, { "epoch": 0.019392192227891694, "grad_norm": 0.6123939156532288, "learning_rate": 1.9981447719576163e-05, "loss": 0.5049, "step": 870 }, { "epoch": 0.01950364160851176, "grad_norm": 0.7051258683204651, "learning_rate": 1.998123392911443e-05, "loss": 0.4572, "step": 875 }, { "epoch": 0.019615090989131827, "grad_norm": 0.7395469546318054, "learning_rate": 1.998101891503046e-05, "loss": 0.4729, "step": 880 }, { "epoch": 0.019726540369751897, "grad_norm": 0.5871098041534424, "learning_rate": 1.998080267735061e-05, "loss": 0.3922, "step": 885 }, { "epoch": 0.019837989750371963, "grad_norm": 0.6082477569580078, "learning_rate": 1.9980585216101397e-05, "loss": 0.4535, "step": 890 }, { "epoch": 0.01994943913099203, "grad_norm": 0.5871613025665283, "learning_rate": 1.998036653130947e-05, "loss": 0.4683, "step": 895 }, { "epoch": 0.020060888511612095, "grad_norm": 0.48536795377731323, "learning_rate": 1.9980146623001645e-05, "loss": 0.639, "step": 900 }, { "epoch": 0.020172337892232165, "grad_norm": 0.6489545106887817, "learning_rate": 1.997992549120488e-05, "loss": 0.451, "step": 905 }, { "epoch": 0.02028378727285223, "grad_norm": 0.4703137278556824, "learning_rate": 1.997970313594628e-05, "loss": 0.4035, "step": 910 }, { "epoch": 0.020395236653472298, "grad_norm": 0.4589080512523651, "learning_rate": 1.997947955725311e-05, "loss": 0.5589, "step": 915 }, { "epoch": 0.020506686034092364, "grad_norm": 0.5990431904792786, "learning_rate": 1.9979254755152774e-05, "loss": 0.4947, "step": 920 }, { "epoch": 0.020618135414712434, "grad_norm": 0.5349087715148926, "learning_rate": 1.9979028729672832e-05, "loss": 0.51, "step": 925 }, { "epoch": 0.0207295847953325, "grad_norm": 0.6614366769790649, "learning_rate": 1.9978801480840996e-05, "loss": 0.5226, "step": 930 }, { "epoch": 0.020841034175952566, "grad_norm": 0.5134681463241577, "learning_rate": 1.9978573008685122e-05, "loss": 0.4575, "step": 935 }, { "epoch": 0.020952483556572633, "grad_norm": 0.8158881068229675, "learning_rate": 1.9978343313233223e-05, "loss": 0.4214, "step": 940 }, { "epoch": 0.021063932937192702, "grad_norm": 0.47573769092559814, "learning_rate": 1.9978112394513453e-05, "loss": 0.5866, "step": 945 }, { "epoch": 0.02117538231781277, "grad_norm": 0.7340862154960632, "learning_rate": 1.997788025255412e-05, "loss": 0.5066, "step": 950 }, { "epoch": 0.021286831698432835, "grad_norm": 0.8693744540214539, "learning_rate": 1.997764688738369e-05, "loss": 0.5558, "step": 955 }, { "epoch": 0.021398281079052905, "grad_norm": 0.42990386486053467, "learning_rate": 1.9977412299030764e-05, "loss": 0.5234, "step": 960 }, { "epoch": 0.02150973045967297, "grad_norm": 0.7284458875656128, "learning_rate": 1.9977176487524104e-05, "loss": 0.4541, "step": 965 }, { "epoch": 0.021621179840293037, "grad_norm": 0.7727136611938477, "learning_rate": 1.997693945289262e-05, "loss": 0.4682, "step": 970 }, { "epoch": 0.021732629220913104, "grad_norm": 0.48860886693000793, "learning_rate": 1.9976701195165367e-05, "loss": 0.4349, "step": 975 }, { "epoch": 0.021844078601533173, "grad_norm": 0.41914358735084534, "learning_rate": 1.997646171437156e-05, "loss": 0.4263, "step": 980 }, { "epoch": 0.02195552798215324, "grad_norm": 0.6689707040786743, "learning_rate": 1.997622101054055e-05, "loss": 0.5567, "step": 985 }, { "epoch": 0.022066977362773306, "grad_norm": 0.7929695844650269, "learning_rate": 1.997597908370185e-05, "loss": 0.4281, "step": 990 }, { "epoch": 0.022178426743393372, "grad_norm": 0.48860880732536316, "learning_rate": 1.9975735933885115e-05, "loss": 0.4514, "step": 995 }, { "epoch": 0.022289876124013442, "grad_norm": 0.5418014526367188, "learning_rate": 1.9975491561120158e-05, "loss": 0.4661, "step": 1000 }, { "epoch": 0.02240132550463351, "grad_norm": 0.7025567293167114, "learning_rate": 1.997524596543693e-05, "loss": 0.5681, "step": 1005 }, { "epoch": 0.022512774885253575, "grad_norm": 0.5372076034545898, "learning_rate": 1.997499914686555e-05, "loss": 0.4425, "step": 1010 }, { "epoch": 0.02262422426587364, "grad_norm": 0.5473577976226807, "learning_rate": 1.9974751105436266e-05, "loss": 0.4982, "step": 1015 }, { "epoch": 0.02273567364649371, "grad_norm": 0.47598281502723694, "learning_rate": 1.997450184117949e-05, "loss": 0.6274, "step": 1020 }, { "epoch": 0.022847123027113777, "grad_norm": 0.585905909538269, "learning_rate": 1.997425135412578e-05, "loss": 0.5617, "step": 1025 }, { "epoch": 0.022958572407733843, "grad_norm": 0.5420157313346863, "learning_rate": 1.9973999644305846e-05, "loss": 0.4755, "step": 1030 }, { "epoch": 0.02307002178835391, "grad_norm": 0.6367788314819336, "learning_rate": 1.9973746711750538e-05, "loss": 0.5466, "step": 1035 }, { "epoch": 0.02318147116897398, "grad_norm": 0.5344269275665283, "learning_rate": 1.997349255649087e-05, "loss": 0.4518, "step": 1040 }, { "epoch": 0.023292920549594046, "grad_norm": 0.466888964176178, "learning_rate": 1.9973237178558003e-05, "loss": 0.3619, "step": 1045 }, { "epoch": 0.023404369930214112, "grad_norm": 0.6306031346321106, "learning_rate": 1.9972980577983233e-05, "loss": 0.4003, "step": 1050 }, { "epoch": 0.02351581931083418, "grad_norm": 0.617495596408844, "learning_rate": 1.9972722754798028e-05, "loss": 0.5175, "step": 1055 }, { "epoch": 0.023627268691454248, "grad_norm": 0.5849382877349854, "learning_rate": 1.9972463709033988e-05, "loss": 0.4024, "step": 1060 }, { "epoch": 0.023738718072074314, "grad_norm": 0.556148886680603, "learning_rate": 1.9972203440722877e-05, "loss": 0.3669, "step": 1065 }, { "epoch": 0.02385016745269438, "grad_norm": 0.9622534513473511, "learning_rate": 1.9971941949896598e-05, "loss": 0.2972, "step": 1070 }, { "epoch": 0.02396161683331445, "grad_norm": 0.4846252501010895, "learning_rate": 1.99716792365872e-05, "loss": 0.43, "step": 1075 }, { "epoch": 0.024073066213934517, "grad_norm": 0.6357453465461731, "learning_rate": 1.9971415300826903e-05, "loss": 0.5174, "step": 1080 }, { "epoch": 0.024184515594554583, "grad_norm": 0.47302985191345215, "learning_rate": 1.9971150142648056e-05, "loss": 0.3586, "step": 1085 }, { "epoch": 0.02429596497517465, "grad_norm": 0.6432552337646484, "learning_rate": 1.997088376208317e-05, "loss": 0.4996, "step": 1090 }, { "epoch": 0.02440741435579472, "grad_norm": 0.6288877129554749, "learning_rate": 1.9970616159164896e-05, "loss": 0.5208, "step": 1095 }, { "epoch": 0.024518863736414785, "grad_norm": 0.6514771580696106, "learning_rate": 1.997034733392604e-05, "loss": 0.4348, "step": 1100 }, { "epoch": 0.02463031311703485, "grad_norm": 0.6467877626419067, "learning_rate": 1.997007728639956e-05, "loss": 0.5967, "step": 1105 }, { "epoch": 0.024741762497654918, "grad_norm": 0.547593355178833, "learning_rate": 1.9969806016618566e-05, "loss": 0.4113, "step": 1110 }, { "epoch": 0.024853211878274988, "grad_norm": 0.6538119316101074, "learning_rate": 1.9969533524616306e-05, "loss": 0.4445, "step": 1115 }, { "epoch": 0.024964661258895054, "grad_norm": 0.7080554962158203, "learning_rate": 1.9969259810426192e-05, "loss": 0.3812, "step": 1120 }, { "epoch": 0.02507611063951512, "grad_norm": 0.45334309339523315, "learning_rate": 1.9968984874081773e-05, "loss": 0.4389, "step": 1125 }, { "epoch": 0.025187560020135186, "grad_norm": 0.4973078966140747, "learning_rate": 1.9968708715616757e-05, "loss": 0.5143, "step": 1130 }, { "epoch": 0.025299009400755256, "grad_norm": 0.807214081287384, "learning_rate": 1.9968431335065006e-05, "loss": 0.4306, "step": 1135 }, { "epoch": 0.025410458781375322, "grad_norm": 0.6170802712440491, "learning_rate": 1.996815273246051e-05, "loss": 0.4428, "step": 1140 }, { "epoch": 0.02552190816199539, "grad_norm": 0.5940778255462646, "learning_rate": 1.9967872907837433e-05, "loss": 0.5215, "step": 1145 }, { "epoch": 0.02563335754261546, "grad_norm": 0.48376041650772095, "learning_rate": 1.9967591861230077e-05, "loss": 0.4973, "step": 1150 }, { "epoch": 0.025744806923235525, "grad_norm": 0.4939127564430237, "learning_rate": 1.99673095926729e-05, "loss": 0.3881, "step": 1155 }, { "epoch": 0.02585625630385559, "grad_norm": 0.6013267636299133, "learning_rate": 1.9967026102200503e-05, "loss": 0.3574, "step": 1160 }, { "epoch": 0.025967705684475657, "grad_norm": 0.5341581106185913, "learning_rate": 1.9966741389847634e-05, "loss": 0.5351, "step": 1165 }, { "epoch": 0.026079155065095727, "grad_norm": 0.4685159921646118, "learning_rate": 1.9966455455649207e-05, "loss": 0.3158, "step": 1170 }, { "epoch": 0.026190604445715793, "grad_norm": 0.7413740158081055, "learning_rate": 1.9966168299640268e-05, "loss": 0.4989, "step": 1175 }, { "epoch": 0.02630205382633586, "grad_norm": 0.46080586314201355, "learning_rate": 1.9965879921856023e-05, "loss": 0.5247, "step": 1180 }, { "epoch": 0.026413503206955926, "grad_norm": 0.6611289381980896, "learning_rate": 1.9965590322331825e-05, "loss": 0.467, "step": 1185 }, { "epoch": 0.026524952587575996, "grad_norm": 0.6633203029632568, "learning_rate": 1.9965299501103178e-05, "loss": 0.4813, "step": 1190 }, { "epoch": 0.026636401968196062, "grad_norm": 0.5674930810928345, "learning_rate": 1.9965007458205727e-05, "loss": 0.4263, "step": 1195 }, { "epoch": 0.02674785134881613, "grad_norm": 0.513952910900116, "learning_rate": 1.9964714193675287e-05, "loss": 0.416, "step": 1200 }, { "epoch": 0.026859300729436195, "grad_norm": 0.6331111192703247, "learning_rate": 1.9964419707547797e-05, "loss": 0.383, "step": 1205 }, { "epoch": 0.026970750110056264, "grad_norm": 0.5278674364089966, "learning_rate": 1.9964123999859365e-05, "loss": 0.515, "step": 1210 }, { "epoch": 0.02708219949067633, "grad_norm": 0.963834822177887, "learning_rate": 1.9963827070646245e-05, "loss": 0.5118, "step": 1215 }, { "epoch": 0.027193648871296397, "grad_norm": 0.7036752700805664, "learning_rate": 1.996352891994483e-05, "loss": 0.53, "step": 1220 }, { "epoch": 0.027305098251916463, "grad_norm": 0.4653802216053009, "learning_rate": 1.996322954779168e-05, "loss": 0.4224, "step": 1225 }, { "epoch": 0.027416547632536533, "grad_norm": 0.6808333992958069, "learning_rate": 1.996292895422349e-05, "loss": 0.5377, "step": 1230 }, { "epoch": 0.0275279970131566, "grad_norm": 0.5279821157455444, "learning_rate": 1.9962627139277114e-05, "loss": 0.4485, "step": 1235 }, { "epoch": 0.027639446393776666, "grad_norm": 0.40944090485572815, "learning_rate": 1.996232410298955e-05, "loss": 0.5459, "step": 1240 }, { "epoch": 0.027750895774396735, "grad_norm": 0.6649956107139587, "learning_rate": 1.9962019845397945e-05, "loss": 0.539, "step": 1245 }, { "epoch": 0.0278623451550168, "grad_norm": 0.5300599336624146, "learning_rate": 1.9961714366539607e-05, "loss": 0.5001, "step": 1250 }, { "epoch": 0.027973794535636868, "grad_norm": 0.6410908102989197, "learning_rate": 1.9961407666451978e-05, "loss": 0.3778, "step": 1255 }, { "epoch": 0.028085243916256934, "grad_norm": 0.5215335488319397, "learning_rate": 1.996109974517266e-05, "loss": 0.5084, "step": 1260 }, { "epoch": 0.028196693296877004, "grad_norm": 0.7652701735496521, "learning_rate": 1.99607906027394e-05, "loss": 0.4056, "step": 1265 }, { "epoch": 0.02830814267749707, "grad_norm": 0.5366575717926025, "learning_rate": 1.9960480239190095e-05, "loss": 0.4365, "step": 1270 }, { "epoch": 0.028419592058117137, "grad_norm": 0.6422768235206604, "learning_rate": 1.99601686545628e-05, "loss": 0.4169, "step": 1275 }, { "epoch": 0.028531041438737203, "grad_norm": 0.686603844165802, "learning_rate": 1.9959855848895707e-05, "loss": 0.3715, "step": 1280 }, { "epoch": 0.028642490819357273, "grad_norm": 0.4541156589984894, "learning_rate": 1.9959541822227164e-05, "loss": 0.5185, "step": 1285 }, { "epoch": 0.02875394019997734, "grad_norm": 0.44781601428985596, "learning_rate": 1.995922657459567e-05, "loss": 0.4079, "step": 1290 }, { "epoch": 0.028865389580597405, "grad_norm": 0.487728089094162, "learning_rate": 1.9958910106039875e-05, "loss": 0.287, "step": 1295 }, { "epoch": 0.02897683896121747, "grad_norm": 0.45344364643096924, "learning_rate": 1.995859241659857e-05, "loss": 0.4283, "step": 1300 }, { "epoch": 0.02908828834183754, "grad_norm": 0.7202114462852478, "learning_rate": 1.9958273506310703e-05, "loss": 0.4479, "step": 1305 }, { "epoch": 0.029199737722457608, "grad_norm": 0.5753505229949951, "learning_rate": 1.9957953375215368e-05, "loss": 0.4883, "step": 1310 }, { "epoch": 0.029311187103077674, "grad_norm": 0.6523656249046326, "learning_rate": 1.9957632023351815e-05, "loss": 0.3157, "step": 1315 }, { "epoch": 0.02942263648369774, "grad_norm": 0.5862640142440796, "learning_rate": 1.995730945075944e-05, "loss": 0.3809, "step": 1320 }, { "epoch": 0.02953408586431781, "grad_norm": 0.6741471290588379, "learning_rate": 1.995698565747778e-05, "loss": 0.4532, "step": 1325 }, { "epoch": 0.029645535244937876, "grad_norm": 0.6238347887992859, "learning_rate": 1.9956660643546538e-05, "loss": 0.4625, "step": 1330 }, { "epoch": 0.029756984625557942, "grad_norm": 0.44492000341415405, "learning_rate": 1.9956334409005553e-05, "loss": 0.4245, "step": 1335 }, { "epoch": 0.029868434006178012, "grad_norm": 0.5926109552383423, "learning_rate": 1.9956006953894817e-05, "loss": 0.3555, "step": 1340 }, { "epoch": 0.02997988338679808, "grad_norm": 0.5521535277366638, "learning_rate": 1.9955678278254483e-05, "loss": 0.4892, "step": 1345 }, { "epoch": 0.030091332767418145, "grad_norm": 0.5029247999191284, "learning_rate": 1.9955348382124836e-05, "loss": 0.4331, "step": 1350 }, { "epoch": 0.03020278214803821, "grad_norm": 0.4701353907585144, "learning_rate": 1.995501726554632e-05, "loss": 0.4166, "step": 1355 }, { "epoch": 0.03031423152865828, "grad_norm": 0.5947720408439636, "learning_rate": 1.995468492855953e-05, "loss": 0.4421, "step": 1360 }, { "epoch": 0.030425680909278347, "grad_norm": 0.5445137023925781, "learning_rate": 1.9954351371205202e-05, "loss": 0.3011, "step": 1365 }, { "epoch": 0.030537130289898413, "grad_norm": 0.5879339575767517, "learning_rate": 1.9954016593524236e-05, "loss": 0.3486, "step": 1370 }, { "epoch": 0.03064857967051848, "grad_norm": 0.4083978831768036, "learning_rate": 1.9953680595557668e-05, "loss": 0.4505, "step": 1375 }, { "epoch": 0.03076002905113855, "grad_norm": 0.7546375393867493, "learning_rate": 1.9953343377346688e-05, "loss": 0.5002, "step": 1380 }, { "epoch": 0.030871478431758616, "grad_norm": 0.5613773465156555, "learning_rate": 1.995300493893264e-05, "loss": 0.5096, "step": 1385 }, { "epoch": 0.030982927812378682, "grad_norm": 0.6815944910049438, "learning_rate": 1.995266528035701e-05, "loss": 0.527, "step": 1390 }, { "epoch": 0.03109437719299875, "grad_norm": 0.48527321219444275, "learning_rate": 1.995232440166144e-05, "loss": 0.4285, "step": 1395 }, { "epoch": 0.031205826573618818, "grad_norm": 0.5341393351554871, "learning_rate": 1.995198230288772e-05, "loss": 0.5031, "step": 1400 }, { "epoch": 0.03131727595423888, "grad_norm": 0.5466798543930054, "learning_rate": 1.9951638984077784e-05, "loss": 0.4208, "step": 1405 }, { "epoch": 0.03142872533485895, "grad_norm": 0.5577125549316406, "learning_rate": 1.9951294445273725e-05, "loss": 0.4641, "step": 1410 }, { "epoch": 0.03154017471547902, "grad_norm": 0.47489219903945923, "learning_rate": 1.995094868651778e-05, "loss": 0.4353, "step": 1415 }, { "epoch": 0.03165162409609908, "grad_norm": 0.4973861277103424, "learning_rate": 1.9950601707852337e-05, "loss": 0.3991, "step": 1420 }, { "epoch": 0.03176307347671915, "grad_norm": 0.5270541310310364, "learning_rate": 1.9950253509319928e-05, "loss": 0.4269, "step": 1425 }, { "epoch": 0.03187452285733922, "grad_norm": 0.618984043598175, "learning_rate": 1.9949904090963245e-05, "loss": 0.3404, "step": 1430 }, { "epoch": 0.031985972237959286, "grad_norm": 0.5443921089172363, "learning_rate": 1.9949553452825122e-05, "loss": 0.4597, "step": 1435 }, { "epoch": 0.032097421618579355, "grad_norm": 0.7535853981971741, "learning_rate": 1.9949201594948544e-05, "loss": 0.4893, "step": 1440 }, { "epoch": 0.032208870999199425, "grad_norm": 0.5268767476081848, "learning_rate": 1.9948848517376644e-05, "loss": 0.4357, "step": 1445 }, { "epoch": 0.03232032037981949, "grad_norm": 0.7284941077232361, "learning_rate": 1.9948494220152714e-05, "loss": 0.4246, "step": 1450 }, { "epoch": 0.03243176976043956, "grad_norm": 0.555818498134613, "learning_rate": 1.9948138703320178e-05, "loss": 0.4174, "step": 1455 }, { "epoch": 0.03254321914105962, "grad_norm": 0.5622695088386536, "learning_rate": 1.9947781966922628e-05, "loss": 0.3668, "step": 1460 }, { "epoch": 0.03265466852167969, "grad_norm": 0.7035006880760193, "learning_rate": 1.9947424011003795e-05, "loss": 0.4184, "step": 1465 }, { "epoch": 0.03276611790229976, "grad_norm": 0.6183652877807617, "learning_rate": 1.994706483560756e-05, "loss": 0.4978, "step": 1470 }, { "epoch": 0.03287756728291982, "grad_norm": 0.5243300795555115, "learning_rate": 1.9946704440777952e-05, "loss": 0.4918, "step": 1475 }, { "epoch": 0.03298901666353989, "grad_norm": 0.6121699213981628, "learning_rate": 1.9946342826559162e-05, "loss": 0.4743, "step": 1480 }, { "epoch": 0.03310046604415996, "grad_norm": 0.6591364741325378, "learning_rate": 1.994597999299551e-05, "loss": 0.4328, "step": 1485 }, { "epoch": 0.033211915424780025, "grad_norm": 0.42131346464157104, "learning_rate": 1.9945615940131486e-05, "loss": 0.357, "step": 1490 }, { "epoch": 0.033323364805400095, "grad_norm": 0.49311649799346924, "learning_rate": 1.9945250668011714e-05, "loss": 0.3668, "step": 1495 }, { "epoch": 0.03343481418602016, "grad_norm": 0.6135536432266235, "learning_rate": 1.9944884176680975e-05, "loss": 0.4503, "step": 1500 }, { "epoch": 0.03354626356664023, "grad_norm": 0.32271111011505127, "learning_rate": 1.9944516466184202e-05, "loss": 0.3132, "step": 1505 }, { "epoch": 0.0336577129472603, "grad_norm": 0.5943491458892822, "learning_rate": 1.994414753656647e-05, "loss": 0.5199, "step": 1510 }, { "epoch": 0.03376916232788036, "grad_norm": 0.5975769758224487, "learning_rate": 1.9943777387873006e-05, "loss": 0.367, "step": 1515 }, { "epoch": 0.03388061170850043, "grad_norm": 0.73946613073349, "learning_rate": 1.9943406020149185e-05, "loss": 0.4344, "step": 1520 }, { "epoch": 0.0339920610891205, "grad_norm": 0.6030755043029785, "learning_rate": 1.994303343344054e-05, "loss": 0.5145, "step": 1525 }, { "epoch": 0.03410351046974056, "grad_norm": 0.5314607620239258, "learning_rate": 1.9942659627792746e-05, "loss": 0.4169, "step": 1530 }, { "epoch": 0.03421495985036063, "grad_norm": 0.47629496455192566, "learning_rate": 1.9942284603251624e-05, "loss": 0.3843, "step": 1535 }, { "epoch": 0.0343264092309807, "grad_norm": 0.5412185788154602, "learning_rate": 1.9941908359863152e-05, "loss": 0.3675, "step": 1540 }, { "epoch": 0.034437858611600765, "grad_norm": 0.6103938221931458, "learning_rate": 1.994153089767346e-05, "loss": 0.4595, "step": 1545 }, { "epoch": 0.034549307992220835, "grad_norm": 0.6166775226593018, "learning_rate": 1.9941152216728813e-05, "loss": 0.4521, "step": 1550 }, { "epoch": 0.0346607573728409, "grad_norm": 0.4617781341075897, "learning_rate": 1.9940772317075638e-05, "loss": 0.4703, "step": 1555 }, { "epoch": 0.03477220675346097, "grad_norm": 0.5079278349876404, "learning_rate": 1.9940391198760508e-05, "loss": 0.5156, "step": 1560 }, { "epoch": 0.03488365613408104, "grad_norm": 0.5550146102905273, "learning_rate": 1.9940008861830146e-05, "loss": 0.5081, "step": 1565 }, { "epoch": 0.0349951055147011, "grad_norm": 0.6835542321205139, "learning_rate": 1.9939625306331423e-05, "loss": 0.4996, "step": 1570 }, { "epoch": 0.03510655489532117, "grad_norm": 0.6561864018440247, "learning_rate": 1.993924053231136e-05, "loss": 0.4664, "step": 1575 }, { "epoch": 0.03521800427594124, "grad_norm": 0.5839830636978149, "learning_rate": 1.9938854539817124e-05, "loss": 0.4001, "step": 1580 }, { "epoch": 0.0353294536565613, "grad_norm": 0.38994866609573364, "learning_rate": 1.9938467328896038e-05, "loss": 0.5137, "step": 1585 }, { "epoch": 0.03544090303718137, "grad_norm": 0.7057561874389648, "learning_rate": 1.993807889959557e-05, "loss": 0.5832, "step": 1590 }, { "epoch": 0.035552352417801435, "grad_norm": 0.4783352315425873, "learning_rate": 1.9937689251963347e-05, "loss": 0.3321, "step": 1595 }, { "epoch": 0.035663801798421504, "grad_norm": 0.44710680842399597, "learning_rate": 1.993729838604712e-05, "loss": 0.3661, "step": 1600 }, { "epoch": 0.035775251179041574, "grad_norm": 0.6399690508842468, "learning_rate": 1.993690630189482e-05, "loss": 0.4862, "step": 1605 }, { "epoch": 0.03588670055966164, "grad_norm": 0.8524630069732666, "learning_rate": 1.9936512999554505e-05, "loss": 0.4287, "step": 1610 }, { "epoch": 0.03599814994028171, "grad_norm": 0.6386929750442505, "learning_rate": 1.9936118479074394e-05, "loss": 0.358, "step": 1615 }, { "epoch": 0.03610959932090178, "grad_norm": 0.3798363208770752, "learning_rate": 1.9935722740502857e-05, "loss": 0.3936, "step": 1620 }, { "epoch": 0.03622104870152184, "grad_norm": 0.5178596377372742, "learning_rate": 1.9935325783888403e-05, "loss": 0.3659, "step": 1625 }, { "epoch": 0.03633249808214191, "grad_norm": 0.5204070210456848, "learning_rate": 1.9934927609279695e-05, "loss": 0.4623, "step": 1630 }, { "epoch": 0.03644394746276198, "grad_norm": 0.6260115504264832, "learning_rate": 1.9934528216725547e-05, "loss": 0.3808, "step": 1635 }, { "epoch": 0.03655539684338204, "grad_norm": 0.3944597542285919, "learning_rate": 1.9934127606274925e-05, "loss": 0.557, "step": 1640 }, { "epoch": 0.03666684622400211, "grad_norm": 0.567103385925293, "learning_rate": 1.9933725777976937e-05, "loss": 0.4482, "step": 1645 }, { "epoch": 0.036778295604622174, "grad_norm": 0.478750616312027, "learning_rate": 1.9933322731880845e-05, "loss": 0.4589, "step": 1650 }, { "epoch": 0.036889744985242244, "grad_norm": 0.6372083425521851, "learning_rate": 1.9932918468036057e-05, "loss": 0.4494, "step": 1655 }, { "epoch": 0.037001194365862314, "grad_norm": 0.6181703805923462, "learning_rate": 1.993251298649214e-05, "loss": 0.42, "step": 1660 }, { "epoch": 0.03711264374648238, "grad_norm": 0.7616068720817566, "learning_rate": 1.9932106287298795e-05, "loss": 0.467, "step": 1665 }, { "epoch": 0.037224093127102446, "grad_norm": 0.6794231534004211, "learning_rate": 1.9931698370505884e-05, "loss": 0.6037, "step": 1670 }, { "epoch": 0.037335542507722516, "grad_norm": 0.562717854976654, "learning_rate": 1.9931289236163414e-05, "loss": 0.4844, "step": 1675 }, { "epoch": 0.03744699188834258, "grad_norm": 0.5788027048110962, "learning_rate": 1.9930878884321542e-05, "loss": 0.4101, "step": 1680 }, { "epoch": 0.03755844126896265, "grad_norm": 0.6554732918739319, "learning_rate": 1.9930467315030572e-05, "loss": 0.5106, "step": 1685 }, { "epoch": 0.03766989064958271, "grad_norm": 0.6344287395477295, "learning_rate": 1.9930054528340963e-05, "loss": 0.4831, "step": 1690 }, { "epoch": 0.03778134003020278, "grad_norm": 0.6484274864196777, "learning_rate": 1.9929640524303314e-05, "loss": 0.399, "step": 1695 }, { "epoch": 0.03789278941082285, "grad_norm": 0.49392303824424744, "learning_rate": 1.992922530296838e-05, "loss": 0.4336, "step": 1700 }, { "epoch": 0.038004238791442914, "grad_norm": 0.5903943777084351, "learning_rate": 1.9928808864387073e-05, "loss": 0.4583, "step": 1705 }, { "epoch": 0.038115688172062984, "grad_norm": 0.7169772982597351, "learning_rate": 1.992839120861043e-05, "loss": 0.4634, "step": 1710 }, { "epoch": 0.03822713755268305, "grad_norm": 0.6074687838554382, "learning_rate": 1.9927972335689667e-05, "loss": 0.5118, "step": 1715 }, { "epoch": 0.038338586933303116, "grad_norm": 0.4693703055381775, "learning_rate": 1.9927552245676124e-05, "loss": 0.4277, "step": 1720 }, { "epoch": 0.038450036313923186, "grad_norm": 0.444767564535141, "learning_rate": 1.9927130938621305e-05, "loss": 0.3628, "step": 1725 }, { "epoch": 0.038561485694543256, "grad_norm": 0.4447241723537445, "learning_rate": 1.992670841457686e-05, "loss": 0.3805, "step": 1730 }, { "epoch": 0.03867293507516332, "grad_norm": 0.5440791249275208, "learning_rate": 1.9926284673594588e-05, "loss": 0.3269, "step": 1735 }, { "epoch": 0.03878438445578339, "grad_norm": 0.5838759541511536, "learning_rate": 1.992585971572643e-05, "loss": 0.4433, "step": 1740 }, { "epoch": 0.03889583383640345, "grad_norm": 0.5916067361831665, "learning_rate": 1.9925433541024488e-05, "loss": 0.4893, "step": 1745 }, { "epoch": 0.03900728321702352, "grad_norm": 0.5731961131095886, "learning_rate": 1.9925006149541003e-05, "loss": 0.4612, "step": 1750 }, { "epoch": 0.03911873259764359, "grad_norm": 0.5600702166557312, "learning_rate": 1.9924577541328378e-05, "loss": 0.4457, "step": 1755 }, { "epoch": 0.039230181978263653, "grad_norm": 0.7729988098144531, "learning_rate": 1.9924147716439152e-05, "loss": 0.5437, "step": 1760 }, { "epoch": 0.03934163135888372, "grad_norm": 0.6786412596702576, "learning_rate": 1.9923716674926018e-05, "loss": 0.531, "step": 1765 }, { "epoch": 0.03945308073950379, "grad_norm": 0.505881130695343, "learning_rate": 1.992328441684182e-05, "loss": 0.4025, "step": 1770 }, { "epoch": 0.039564530120123856, "grad_norm": 0.5706116557121277, "learning_rate": 1.9922850942239544e-05, "loss": 0.4751, "step": 1775 }, { "epoch": 0.039675979500743926, "grad_norm": 0.4059732258319855, "learning_rate": 1.992241625117234e-05, "loss": 0.4051, "step": 1780 }, { "epoch": 0.03978742888136399, "grad_norm": 0.5178811550140381, "learning_rate": 1.992198034369349e-05, "loss": 0.5105, "step": 1785 }, { "epoch": 0.03989887826198406, "grad_norm": 0.6659989953041077, "learning_rate": 1.9921543219856437e-05, "loss": 0.4047, "step": 1790 }, { "epoch": 0.04001032764260413, "grad_norm": 0.4662383198738098, "learning_rate": 1.992110487971477e-05, "loss": 0.4747, "step": 1795 }, { "epoch": 0.04012177702322419, "grad_norm": 0.7052854299545288, "learning_rate": 1.992066532332222e-05, "loss": 0.3694, "step": 1800 }, { "epoch": 0.04023322640384426, "grad_norm": 0.5693308711051941, "learning_rate": 1.9920224550732682e-05, "loss": 0.4154, "step": 1805 }, { "epoch": 0.04034467578446433, "grad_norm": 0.5711913704872131, "learning_rate": 1.991978256200018e-05, "loss": 0.3909, "step": 1810 }, { "epoch": 0.04045612516508439, "grad_norm": 0.42563021183013916, "learning_rate": 1.9919339357178907e-05, "loss": 0.5499, "step": 1815 }, { "epoch": 0.04056757454570446, "grad_norm": 0.521532416343689, "learning_rate": 1.9918894936323197e-05, "loss": 0.3627, "step": 1820 }, { "epoch": 0.04067902392632453, "grad_norm": 0.506631076335907, "learning_rate": 1.991844929948753e-05, "loss": 0.4728, "step": 1825 }, { "epoch": 0.040790473306944595, "grad_norm": 0.6046636700630188, "learning_rate": 1.9918002446726535e-05, "loss": 0.3791, "step": 1830 }, { "epoch": 0.040901922687564665, "grad_norm": 0.620905339717865, "learning_rate": 1.9917554378094997e-05, "loss": 0.4688, "step": 1835 }, { "epoch": 0.04101337206818473, "grad_norm": 0.5445390939712524, "learning_rate": 1.991710509364784e-05, "loss": 0.4985, "step": 1840 }, { "epoch": 0.0411248214488048, "grad_norm": 0.6072726249694824, "learning_rate": 1.9916654593440152e-05, "loss": 0.4017, "step": 1845 }, { "epoch": 0.04123627082942487, "grad_norm": 0.5444666147232056, "learning_rate": 1.9916202877527153e-05, "loss": 0.4537, "step": 1850 }, { "epoch": 0.04134772021004493, "grad_norm": 0.6237067580223083, "learning_rate": 1.9915749945964228e-05, "loss": 0.3398, "step": 1855 }, { "epoch": 0.041459169590665, "grad_norm": 0.49777600169181824, "learning_rate": 1.9915295798806888e-05, "loss": 0.4411, "step": 1860 }, { "epoch": 0.04157061897128507, "grad_norm": 0.5367849469184875, "learning_rate": 1.9914840436110824e-05, "loss": 0.4353, "step": 1865 }, { "epoch": 0.04168206835190513, "grad_norm": 0.562211811542511, "learning_rate": 1.9914383857931853e-05, "loss": 0.4252, "step": 1870 }, { "epoch": 0.0417935177325252, "grad_norm": 0.6259203553199768, "learning_rate": 1.9913926064325946e-05, "loss": 0.42, "step": 1875 }, { "epoch": 0.041904967113145265, "grad_norm": 0.48725616931915283, "learning_rate": 1.9913467055349227e-05, "loss": 0.4408, "step": 1880 }, { "epoch": 0.042016416493765335, "grad_norm": 0.538391649723053, "learning_rate": 1.9913006831057967e-05, "loss": 0.4785, "step": 1885 }, { "epoch": 0.042127865874385405, "grad_norm": 0.4485551416873932, "learning_rate": 1.991254539150859e-05, "loss": 0.34, "step": 1890 }, { "epoch": 0.04223931525500547, "grad_norm": 0.4810604453086853, "learning_rate": 1.991208273675766e-05, "loss": 0.3773, "step": 1895 }, { "epoch": 0.04235076463562554, "grad_norm": 0.8326770067214966, "learning_rate": 1.9911618866861894e-05, "loss": 0.3926, "step": 1900 }, { "epoch": 0.04246221401624561, "grad_norm": 0.6135021448135376, "learning_rate": 1.991115378187816e-05, "loss": 0.5185, "step": 1905 }, { "epoch": 0.04257366339686567, "grad_norm": 0.5827608108520508, "learning_rate": 1.9910687481863478e-05, "loss": 0.5723, "step": 1910 }, { "epoch": 0.04268511277748574, "grad_norm": 0.5833299160003662, "learning_rate": 1.9910219966875007e-05, "loss": 0.4621, "step": 1915 }, { "epoch": 0.04279656215810581, "grad_norm": 0.7341663837432861, "learning_rate": 1.9909751236970064e-05, "loss": 0.4476, "step": 1920 }, { "epoch": 0.04290801153872587, "grad_norm": 0.6236368417739868, "learning_rate": 1.990928129220611e-05, "loss": 0.4315, "step": 1925 }, { "epoch": 0.04301946091934594, "grad_norm": 0.6765170097351074, "learning_rate": 1.990881013264076e-05, "loss": 0.5065, "step": 1930 }, { "epoch": 0.043130910299966005, "grad_norm": 0.498177170753479, "learning_rate": 1.990833775833177e-05, "loss": 0.4905, "step": 1935 }, { "epoch": 0.043242359680586075, "grad_norm": 0.7168980836868286, "learning_rate": 1.9907864169337053e-05, "loss": 0.4488, "step": 1940 }, { "epoch": 0.043353809061206144, "grad_norm": 0.5037057995796204, "learning_rate": 1.9907389365714662e-05, "loss": 0.3357, "step": 1945 }, { "epoch": 0.04346525844182621, "grad_norm": 0.608124852180481, "learning_rate": 1.9906913347522812e-05, "loss": 0.587, "step": 1950 }, { "epoch": 0.04357670782244628, "grad_norm": 0.400680273771286, "learning_rate": 1.9906436114819858e-05, "loss": 0.3273, "step": 1955 }, { "epoch": 0.04368815720306635, "grad_norm": 0.6874983310699463, "learning_rate": 1.9905957667664296e-05, "loss": 0.4827, "step": 1960 }, { "epoch": 0.04379960658368641, "grad_norm": 0.5223805904388428, "learning_rate": 1.990547800611479e-05, "loss": 0.5685, "step": 1965 }, { "epoch": 0.04391105596430648, "grad_norm": 0.43834665417671204, "learning_rate": 1.9904997130230135e-05, "loss": 0.3613, "step": 1970 }, { "epoch": 0.04402250534492654, "grad_norm": 0.7025169730186462, "learning_rate": 1.990451504006929e-05, "loss": 0.4489, "step": 1975 }, { "epoch": 0.04413395472554661, "grad_norm": 0.665053129196167, "learning_rate": 1.990403173569135e-05, "loss": 0.4566, "step": 1980 }, { "epoch": 0.04424540410616668, "grad_norm": 0.5735465288162231, "learning_rate": 1.9903547217155567e-05, "loss": 0.2985, "step": 1985 }, { "epoch": 0.044356853486786745, "grad_norm": 0.7619782090187073, "learning_rate": 1.990306148452134e-05, "loss": 0.6006, "step": 1990 }, { "epoch": 0.044468302867406814, "grad_norm": 0.491385281085968, "learning_rate": 1.9902574537848216e-05, "loss": 0.5219, "step": 1995 }, { "epoch": 0.044579752248026884, "grad_norm": 0.6306560039520264, "learning_rate": 1.990208637719589e-05, "loss": 0.4113, "step": 2000 }, { "epoch": 0.04469120162864695, "grad_norm": 0.4792928993701935, "learning_rate": 1.9901597002624204e-05, "loss": 0.565, "step": 2005 }, { "epoch": 0.04480265100926702, "grad_norm": 0.7919230461120605, "learning_rate": 1.9901106414193153e-05, "loss": 0.4039, "step": 2010 }, { "epoch": 0.044914100389887086, "grad_norm": 0.808326780796051, "learning_rate": 1.9900614611962882e-05, "loss": 0.481, "step": 2015 }, { "epoch": 0.04502554977050715, "grad_norm": 0.4971373677253723, "learning_rate": 1.9900121595993683e-05, "loss": 0.5343, "step": 2020 }, { "epoch": 0.04513699915112722, "grad_norm": 0.6289275884628296, "learning_rate": 1.989962736634599e-05, "loss": 0.4459, "step": 2025 }, { "epoch": 0.04524844853174728, "grad_norm": 0.5500627160072327, "learning_rate": 1.98991319230804e-05, "loss": 0.3991, "step": 2030 }, { "epoch": 0.04535989791236735, "grad_norm": 0.47303643822669983, "learning_rate": 1.9898635266257643e-05, "loss": 0.5046, "step": 2035 }, { "epoch": 0.04547134729298742, "grad_norm": 5.436945915222168, "learning_rate": 1.9898137395938605e-05, "loss": 0.4981, "step": 2040 }, { "epoch": 0.045582796673607484, "grad_norm": 0.40001043677330017, "learning_rate": 1.9897638312184328e-05, "loss": 0.4359, "step": 2045 }, { "epoch": 0.045694246054227554, "grad_norm": 0.5678738355636597, "learning_rate": 1.989713801505599e-05, "loss": 0.5342, "step": 2050 }, { "epoch": 0.045805695434847624, "grad_norm": 0.539551854133606, "learning_rate": 1.989663650461493e-05, "loss": 0.3701, "step": 2055 }, { "epoch": 0.045917144815467686, "grad_norm": 0.5696111917495728, "learning_rate": 1.989613378092262e-05, "loss": 0.5771, "step": 2060 }, { "epoch": 0.046028594196087756, "grad_norm": 0.5799300670623779, "learning_rate": 1.9895629844040697e-05, "loss": 0.4812, "step": 2065 }, { "epoch": 0.04614004357670782, "grad_norm": 0.5338211059570312, "learning_rate": 1.9895124694030934e-05, "loss": 0.5903, "step": 2070 }, { "epoch": 0.04625149295732789, "grad_norm": 0.5052824020385742, "learning_rate": 1.9894618330955268e-05, "loss": 0.3809, "step": 2075 }, { "epoch": 0.04636294233794796, "grad_norm": 0.47797662019729614, "learning_rate": 1.9894110754875763e-05, "loss": 0.371, "step": 2080 }, { "epoch": 0.04647439171856802, "grad_norm": 0.6397122144699097, "learning_rate": 1.9893601965854653e-05, "loss": 0.4875, "step": 2085 }, { "epoch": 0.04658584109918809, "grad_norm": 0.5474061369895935, "learning_rate": 1.9893091963954312e-05, "loss": 0.4379, "step": 2090 }, { "epoch": 0.04669729047980816, "grad_norm": 0.5695924162864685, "learning_rate": 1.9892580749237256e-05, "loss": 0.4498, "step": 2095 }, { "epoch": 0.046808739860428224, "grad_norm": 0.4939964711666107, "learning_rate": 1.989206832176616e-05, "loss": 0.4, "step": 2100 }, { "epoch": 0.046920189241048293, "grad_norm": 0.5758333206176758, "learning_rate": 1.9891554681603844e-05, "loss": 0.4681, "step": 2105 }, { "epoch": 0.04703163862166836, "grad_norm": 0.6062471270561218, "learning_rate": 1.9891039828813272e-05, "loss": 0.4122, "step": 2110 }, { "epoch": 0.047143088002288426, "grad_norm": 0.45578670501708984, "learning_rate": 1.9890523763457567e-05, "loss": 0.5645, "step": 2115 }, { "epoch": 0.047254537382908496, "grad_norm": 0.5675963759422302, "learning_rate": 1.989000648559999e-05, "loss": 0.4202, "step": 2120 }, { "epoch": 0.04736598676352856, "grad_norm": 0.6070412993431091, "learning_rate": 1.9889487995303958e-05, "loss": 0.2939, "step": 2125 }, { "epoch": 0.04747743614414863, "grad_norm": 0.7796115279197693, "learning_rate": 1.9888968292633032e-05, "loss": 0.529, "step": 2130 }, { "epoch": 0.0475888855247687, "grad_norm": 0.5633937120437622, "learning_rate": 1.9888447377650926e-05, "loss": 0.5273, "step": 2135 }, { "epoch": 0.04770033490538876, "grad_norm": 0.45713990926742554, "learning_rate": 1.9887925250421494e-05, "loss": 0.3204, "step": 2140 }, { "epoch": 0.04781178428600883, "grad_norm": 0.457570880651474, "learning_rate": 1.9887401911008758e-05, "loss": 0.5638, "step": 2145 }, { "epoch": 0.0479232336666289, "grad_norm": 0.5453818440437317, "learning_rate": 1.988687735947686e-05, "loss": 0.3383, "step": 2150 }, { "epoch": 0.04803468304724896, "grad_norm": 0.550879955291748, "learning_rate": 1.9886351595890114e-05, "loss": 0.446, "step": 2155 }, { "epoch": 0.04814613242786903, "grad_norm": 0.5164986848831177, "learning_rate": 1.9885824620312975e-05, "loss": 0.4184, "step": 2160 }, { "epoch": 0.048257581808489096, "grad_norm": 0.5737098455429077, "learning_rate": 1.988529643281004e-05, "loss": 0.4013, "step": 2165 }, { "epoch": 0.048369031189109166, "grad_norm": 0.6029147505760193, "learning_rate": 1.988476703344607e-05, "loss": 0.4013, "step": 2170 }, { "epoch": 0.048480480569729235, "grad_norm": 0.4032836854457855, "learning_rate": 1.988423642228596e-05, "loss": 0.543, "step": 2175 }, { "epoch": 0.0485919299503493, "grad_norm": 0.58678138256073, "learning_rate": 1.988370459939476e-05, "loss": 0.4044, "step": 2180 }, { "epoch": 0.04870337933096937, "grad_norm": 0.5044936537742615, "learning_rate": 1.988317156483766e-05, "loss": 0.2857, "step": 2185 }, { "epoch": 0.04881482871158944, "grad_norm": 0.6275551915168762, "learning_rate": 1.988263731868002e-05, "loss": 0.5061, "step": 2190 }, { "epoch": 0.0489262780922095, "grad_norm": 0.7103505730628967, "learning_rate": 1.988210186098732e-05, "loss": 0.4178, "step": 2195 }, { "epoch": 0.04903772747282957, "grad_norm": 0.5122005343437195, "learning_rate": 1.9881565191825218e-05, "loss": 0.3263, "step": 2200 }, { "epoch": 0.04914917685344964, "grad_norm": 0.7140627503395081, "learning_rate": 1.9881027311259487e-05, "loss": 0.4232, "step": 2205 }, { "epoch": 0.0492606262340697, "grad_norm": 0.6395582556724548, "learning_rate": 1.9880488219356086e-05, "loss": 0.5721, "step": 2210 }, { "epoch": 0.04937207561468977, "grad_norm": 0.5498226284980774, "learning_rate": 1.9879947916181096e-05, "loss": 0.3603, "step": 2215 }, { "epoch": 0.049483524995309836, "grad_norm": 0.5085523724555969, "learning_rate": 1.9879406401800746e-05, "loss": 0.4792, "step": 2220 }, { "epoch": 0.049594974375929905, "grad_norm": 0.8438782691955566, "learning_rate": 1.987886367628143e-05, "loss": 0.4226, "step": 2225 }, { "epoch": 0.049706423756549975, "grad_norm": 0.6150681972503662, "learning_rate": 1.9878319739689686e-05, "loss": 0.3928, "step": 2230 }, { "epoch": 0.04981787313717004, "grad_norm": 0.4985312223434448, "learning_rate": 1.987777459209219e-05, "loss": 0.4044, "step": 2235 }, { "epoch": 0.04992932251779011, "grad_norm": 0.5724480748176575, "learning_rate": 1.987722823355577e-05, "loss": 0.5321, "step": 2240 }, { "epoch": 0.05004077189841018, "grad_norm": 0.6445596218109131, "learning_rate": 1.9876680664147408e-05, "loss": 0.3881, "step": 2245 }, { "epoch": 0.05015222127903024, "grad_norm": 0.48637256026268005, "learning_rate": 1.9876131883934235e-05, "loss": 0.46, "step": 2250 }, { "epoch": 0.05026367065965031, "grad_norm": 0.4218536615371704, "learning_rate": 1.9875581892983527e-05, "loss": 0.5875, "step": 2255 }, { "epoch": 0.05037512004027037, "grad_norm": 0.48131638765335083, "learning_rate": 1.9875030691362705e-05, "loss": 0.4047, "step": 2260 }, { "epoch": 0.05048656942089044, "grad_norm": 0.6676334142684937, "learning_rate": 1.9874478279139343e-05, "loss": 0.3902, "step": 2265 }, { "epoch": 0.05059801880151051, "grad_norm": 0.4519447386264801, "learning_rate": 1.9873924656381164e-05, "loss": 0.4285, "step": 2270 }, { "epoch": 0.050709468182130575, "grad_norm": 0.3770105838775635, "learning_rate": 1.9873369823156038e-05, "loss": 0.4169, "step": 2275 }, { "epoch": 0.050820917562750645, "grad_norm": 0.46062755584716797, "learning_rate": 1.987281377953198e-05, "loss": 0.4212, "step": 2280 }, { "epoch": 0.050932366943370715, "grad_norm": 0.4402807950973511, "learning_rate": 1.987225652557716e-05, "loss": 0.4225, "step": 2285 }, { "epoch": 0.05104381632399078, "grad_norm": 0.5167077779769897, "learning_rate": 1.9871698061359893e-05, "loss": 0.4731, "step": 2290 }, { "epoch": 0.05115526570461085, "grad_norm": 0.6223405599594116, "learning_rate": 1.987113838694864e-05, "loss": 0.435, "step": 2295 }, { "epoch": 0.05126671508523092, "grad_norm": 0.6904946565628052, "learning_rate": 1.9870577502412015e-05, "loss": 0.4087, "step": 2300 }, { "epoch": 0.05137816446585098, "grad_norm": 0.6623885035514832, "learning_rate": 1.987001540781878e-05, "loss": 0.4721, "step": 2305 }, { "epoch": 0.05148961384647105, "grad_norm": 0.44664719700813293, "learning_rate": 1.9869452103237838e-05, "loss": 0.4547, "step": 2310 }, { "epoch": 0.05160106322709111, "grad_norm": 0.5109519362449646, "learning_rate": 1.986888758873825e-05, "loss": 0.3584, "step": 2315 }, { "epoch": 0.05171251260771118, "grad_norm": 0.5872670412063599, "learning_rate": 1.9868321864389216e-05, "loss": 0.4298, "step": 2320 }, { "epoch": 0.05182396198833125, "grad_norm": 0.6067822575569153, "learning_rate": 1.9867754930260098e-05, "loss": 0.3886, "step": 2325 }, { "epoch": 0.051935411368951315, "grad_norm": 0.40194398164749146, "learning_rate": 1.9867186786420388e-05, "loss": 0.4291, "step": 2330 }, { "epoch": 0.052046860749571384, "grad_norm": 0.39779698848724365, "learning_rate": 1.9866617432939746e-05, "loss": 0.4259, "step": 2335 }, { "epoch": 0.052158310130191454, "grad_norm": 0.5225132703781128, "learning_rate": 1.986604686988796e-05, "loss": 0.4568, "step": 2340 }, { "epoch": 0.05226975951081152, "grad_norm": 0.5338283777236938, "learning_rate": 1.986547509733499e-05, "loss": 0.5001, "step": 2345 }, { "epoch": 0.05238120889143159, "grad_norm": 0.37600216269493103, "learning_rate": 1.9864902115350918e-05, "loss": 0.3789, "step": 2350 }, { "epoch": 0.05249265827205165, "grad_norm": 0.4644761383533478, "learning_rate": 1.986432792400599e-05, "loss": 0.5704, "step": 2355 }, { "epoch": 0.05260410765267172, "grad_norm": 0.5336574912071228, "learning_rate": 1.9863752523370602e-05, "loss": 0.4756, "step": 2360 }, { "epoch": 0.05271555703329179, "grad_norm": 0.4611615538597107, "learning_rate": 1.9863175913515287e-05, "loss": 0.4586, "step": 2365 }, { "epoch": 0.05282700641391185, "grad_norm": 0.5109054446220398, "learning_rate": 1.9862598094510743e-05, "loss": 0.4997, "step": 2370 }, { "epoch": 0.05293845579453192, "grad_norm": 0.5393320322036743, "learning_rate": 1.98620190664278e-05, "loss": 0.4339, "step": 2375 }, { "epoch": 0.05304990517515199, "grad_norm": 0.5946705937385559, "learning_rate": 1.986143882933744e-05, "loss": 0.5266, "step": 2380 }, { "epoch": 0.053161354555772054, "grad_norm": 0.39486464858055115, "learning_rate": 1.9860857383310795e-05, "loss": 0.5178, "step": 2385 }, { "epoch": 0.053272803936392124, "grad_norm": 0.6416144967079163, "learning_rate": 1.9860274728419155e-05, "loss": 0.5279, "step": 2390 }, { "epoch": 0.053384253317012194, "grad_norm": 0.868364691734314, "learning_rate": 1.9859690864733942e-05, "loss": 0.2764, "step": 2395 }, { "epoch": 0.05349570269763226, "grad_norm": 0.5830197334289551, "learning_rate": 1.9859105792326733e-05, "loss": 0.4276, "step": 2400 }, { "epoch": 0.053607152078252326, "grad_norm": 0.6217747330665588, "learning_rate": 1.985851951126925e-05, "loss": 0.4741, "step": 2405 }, { "epoch": 0.05371860145887239, "grad_norm": 0.47893911600112915, "learning_rate": 1.9857932021633377e-05, "loss": 0.4266, "step": 2410 }, { "epoch": 0.05383005083949246, "grad_norm": 0.5307624340057373, "learning_rate": 1.9857343323491127e-05, "loss": 0.3482, "step": 2415 }, { "epoch": 0.05394150022011253, "grad_norm": 0.5524867177009583, "learning_rate": 1.9856753416914673e-05, "loss": 0.4334, "step": 2420 }, { "epoch": 0.05405294960073259, "grad_norm": 0.41400259733200073, "learning_rate": 1.9856162301976333e-05, "loss": 0.3792, "step": 2425 }, { "epoch": 0.05416439898135266, "grad_norm": 0.9114057421684265, "learning_rate": 1.9855569978748575e-05, "loss": 0.5029, "step": 2430 }, { "epoch": 0.05427584836197273, "grad_norm": 0.5691152811050415, "learning_rate": 1.9854976447304005e-05, "loss": 0.5097, "step": 2435 }, { "epoch": 0.054387297742592794, "grad_norm": 0.5087013840675354, "learning_rate": 1.9854381707715396e-05, "loss": 0.5009, "step": 2440 }, { "epoch": 0.054498747123212864, "grad_norm": 0.596858561038971, "learning_rate": 1.9853785760055652e-05, "loss": 0.4261, "step": 2445 }, { "epoch": 0.054610196503832927, "grad_norm": 0.547972559928894, "learning_rate": 1.9853188604397834e-05, "loss": 0.3686, "step": 2450 }, { "epoch": 0.054721645884452996, "grad_norm": 0.63202965259552, "learning_rate": 1.985259024081515e-05, "loss": 0.4538, "step": 2455 }, { "epoch": 0.054833095265073066, "grad_norm": 0.5696936249732971, "learning_rate": 1.9851990669380948e-05, "loss": 0.4632, "step": 2460 }, { "epoch": 0.05494454464569313, "grad_norm": 0.5310059189796448, "learning_rate": 1.9851389890168738e-05, "loss": 0.4243, "step": 2465 }, { "epoch": 0.0550559940263132, "grad_norm": 0.5194794535636902, "learning_rate": 1.985078790325217e-05, "loss": 0.4771, "step": 2470 }, { "epoch": 0.05516744340693327, "grad_norm": 0.4808349013328552, "learning_rate": 1.985018470870504e-05, "loss": 0.4842, "step": 2475 }, { "epoch": 0.05527889278755333, "grad_norm": 0.49285992980003357, "learning_rate": 1.9849580306601298e-05, "loss": 0.3485, "step": 2480 }, { "epoch": 0.0553903421681734, "grad_norm": 0.47631382942199707, "learning_rate": 1.9848974697015038e-05, "loss": 0.3182, "step": 2485 }, { "epoch": 0.05550179154879347, "grad_norm": 0.6178653836250305, "learning_rate": 1.98483678800205e-05, "loss": 0.3605, "step": 2490 }, { "epoch": 0.055613240929413534, "grad_norm": 0.6378611922264099, "learning_rate": 1.9847759855692078e-05, "loss": 0.4466, "step": 2495 }, { "epoch": 0.0557246903100336, "grad_norm": 0.6486213207244873, "learning_rate": 1.9847150624104313e-05, "loss": 0.4764, "step": 2500 }, { "epoch": 0.055836139690653666, "grad_norm": 0.5121660232543945, "learning_rate": 1.9846540185331886e-05, "loss": 0.4384, "step": 2505 }, { "epoch": 0.055947589071273736, "grad_norm": 0.5070223212242126, "learning_rate": 1.984592853944964e-05, "loss": 0.3902, "step": 2510 }, { "epoch": 0.056059038451893806, "grad_norm": 0.6691417694091797, "learning_rate": 1.984531568653255e-05, "loss": 0.4287, "step": 2515 }, { "epoch": 0.05617048783251387, "grad_norm": 0.6232566237449646, "learning_rate": 1.9844701626655753e-05, "loss": 0.3921, "step": 2520 }, { "epoch": 0.05628193721313394, "grad_norm": 0.596046507358551, "learning_rate": 1.9844086359894525e-05, "loss": 0.2237, "step": 2525 }, { "epoch": 0.05639338659375401, "grad_norm": 0.5020400285720825, "learning_rate": 1.9843469886324294e-05, "loss": 0.445, "step": 2530 }, { "epoch": 0.05650483597437407, "grad_norm": 0.6604124903678894, "learning_rate": 1.9842852206020637e-05, "loss": 0.3864, "step": 2535 }, { "epoch": 0.05661628535499414, "grad_norm": 0.4494327902793884, "learning_rate": 1.9842233319059274e-05, "loss": 0.4462, "step": 2540 }, { "epoch": 0.0567277347356142, "grad_norm": 0.6409807801246643, "learning_rate": 1.9841613225516077e-05, "loss": 0.4498, "step": 2545 }, { "epoch": 0.05683918411623427, "grad_norm": 0.6100894808769226, "learning_rate": 1.9840991925467064e-05, "loss": 0.3684, "step": 2550 }, { "epoch": 0.05695063349685434, "grad_norm": 0.4814474880695343, "learning_rate": 1.9840369418988397e-05, "loss": 0.242, "step": 2555 }, { "epoch": 0.057062082877474406, "grad_norm": 0.49156680703163147, "learning_rate": 1.98397457061564e-05, "loss": 0.3756, "step": 2560 }, { "epoch": 0.057173532258094475, "grad_norm": 0.4226768910884857, "learning_rate": 1.983912078704753e-05, "loss": 0.3711, "step": 2565 }, { "epoch": 0.057284981638714545, "grad_norm": 0.7404083609580994, "learning_rate": 1.98384946617384e-05, "loss": 0.4515, "step": 2570 }, { "epoch": 0.05739643101933461, "grad_norm": 0.6239614486694336, "learning_rate": 1.983786733030576e-05, "loss": 0.4969, "step": 2575 }, { "epoch": 0.05750788039995468, "grad_norm": 0.6774435639381409, "learning_rate": 1.9837238792826526e-05, "loss": 0.4344, "step": 2580 }, { "epoch": 0.05761932978057475, "grad_norm": 0.5329133868217468, "learning_rate": 1.983660904937775e-05, "loss": 0.3811, "step": 2585 }, { "epoch": 0.05773077916119481, "grad_norm": 0.6280587315559387, "learning_rate": 1.9835978100036625e-05, "loss": 0.4216, "step": 2590 }, { "epoch": 0.05784222854181488, "grad_norm": 0.717054009437561, "learning_rate": 1.9835345944880512e-05, "loss": 0.5021, "step": 2595 }, { "epoch": 0.05795367792243494, "grad_norm": 0.5728350281715393, "learning_rate": 1.9834712583986904e-05, "loss": 0.3976, "step": 2600 }, { "epoch": 0.05806512730305501, "grad_norm": 0.45669928193092346, "learning_rate": 1.9834078017433446e-05, "loss": 0.5191, "step": 2605 }, { "epoch": 0.05817657668367508, "grad_norm": 0.40955591201782227, "learning_rate": 1.9833442245297923e-05, "loss": 0.4159, "step": 2610 }, { "epoch": 0.058288026064295145, "grad_norm": 0.6732655167579651, "learning_rate": 1.983280526765829e-05, "loss": 0.5921, "step": 2615 }, { "epoch": 0.058399475444915215, "grad_norm": 0.6169721484184265, "learning_rate": 1.9832167084592628e-05, "loss": 0.4072, "step": 2620 }, { "epoch": 0.058510924825535285, "grad_norm": 0.5245190262794495, "learning_rate": 1.9831527696179173e-05, "loss": 0.4807, "step": 2625 }, { "epoch": 0.05862237420615535, "grad_norm": 0.6576266884803772, "learning_rate": 1.983088710249631e-05, "loss": 0.4389, "step": 2630 }, { "epoch": 0.05873382358677542, "grad_norm": 0.5251596570014954, "learning_rate": 1.9830245303622573e-05, "loss": 0.5181, "step": 2635 }, { "epoch": 0.05884527296739548, "grad_norm": 0.46695682406425476, "learning_rate": 1.9829602299636637e-05, "loss": 0.3301, "step": 2640 }, { "epoch": 0.05895672234801555, "grad_norm": 0.7640698552131653, "learning_rate": 1.9828958090617334e-05, "loss": 0.5782, "step": 2645 }, { "epoch": 0.05906817172863562, "grad_norm": 0.5179789662361145, "learning_rate": 1.9828312676643638e-05, "loss": 0.5458, "step": 2650 }, { "epoch": 0.05917962110925568, "grad_norm": 0.7178815007209778, "learning_rate": 1.9827666057794668e-05, "loss": 0.373, "step": 2655 }, { "epoch": 0.05929107048987575, "grad_norm": 0.5658371448516846, "learning_rate": 1.9827018234149696e-05, "loss": 0.3873, "step": 2660 }, { "epoch": 0.05940251987049582, "grad_norm": 0.6456442475318909, "learning_rate": 1.9826369205788144e-05, "loss": 0.3859, "step": 2665 }, { "epoch": 0.059513969251115885, "grad_norm": 0.5403016209602356, "learning_rate": 1.9825718972789576e-05, "loss": 0.4351, "step": 2670 }, { "epoch": 0.059625418631735955, "grad_norm": 0.39102426171302795, "learning_rate": 1.9825067535233703e-05, "loss": 0.2348, "step": 2675 }, { "epoch": 0.059736868012356024, "grad_norm": 0.539016842842102, "learning_rate": 1.982441489320039e-05, "loss": 0.4694, "step": 2680 }, { "epoch": 0.05984831739297609, "grad_norm": 0.6451724171638489, "learning_rate": 1.9823761046769644e-05, "loss": 0.521, "step": 2685 }, { "epoch": 0.05995976677359616, "grad_norm": 0.5299963355064392, "learning_rate": 1.9823105996021618e-05, "loss": 0.4558, "step": 2690 }, { "epoch": 0.06007121615421622, "grad_norm": 0.5168675184249878, "learning_rate": 1.9822449741036626e-05, "loss": 0.5146, "step": 2695 }, { "epoch": 0.06018266553483629, "grad_norm": 0.5516669750213623, "learning_rate": 1.9821792281895108e-05, "loss": 0.4997, "step": 2700 }, { "epoch": 0.06029411491545636, "grad_norm": 0.3844130337238312, "learning_rate": 1.9821133618677672e-05, "loss": 0.5141, "step": 2705 }, { "epoch": 0.06040556429607642, "grad_norm": 0.8562740087509155, "learning_rate": 1.9820473751465056e-05, "loss": 0.5316, "step": 2710 }, { "epoch": 0.06051701367669649, "grad_norm": 0.413327693939209, "learning_rate": 1.9819812680338167e-05, "loss": 0.456, "step": 2715 }, { "epoch": 0.06062846305731656, "grad_norm": 0.5889877080917358, "learning_rate": 1.9819150405378037e-05, "loss": 0.4245, "step": 2720 }, { "epoch": 0.060739912437936625, "grad_norm": 0.6145250201225281, "learning_rate": 1.981848692666586e-05, "loss": 0.554, "step": 2725 }, { "epoch": 0.060851361818556694, "grad_norm": 0.5503904819488525, "learning_rate": 1.9817822244282973e-05, "loss": 0.3949, "step": 2730 }, { "epoch": 0.06096281119917676, "grad_norm": 0.5144293308258057, "learning_rate": 1.981715635831086e-05, "loss": 0.337, "step": 2735 }, { "epoch": 0.06107426057979683, "grad_norm": 0.4838838577270508, "learning_rate": 1.981648926883116e-05, "loss": 0.4807, "step": 2740 }, { "epoch": 0.0611857099604169, "grad_norm": 0.7387391924858093, "learning_rate": 1.981582097592564e-05, "loss": 0.3634, "step": 2745 }, { "epoch": 0.06129715934103696, "grad_norm": 0.46244555711746216, "learning_rate": 1.981515147967624e-05, "loss": 0.5305, "step": 2750 }, { "epoch": 0.06140860872165703, "grad_norm": 0.435006707906723, "learning_rate": 1.9814480780165026e-05, "loss": 0.4762, "step": 2755 }, { "epoch": 0.0615200581022771, "grad_norm": 0.6218363642692566, "learning_rate": 1.981380887747423e-05, "loss": 0.5001, "step": 2760 }, { "epoch": 0.06163150748289716, "grad_norm": 0.5311275124549866, "learning_rate": 1.9813135771686213e-05, "loss": 0.3546, "step": 2765 }, { "epoch": 0.06174295686351723, "grad_norm": 0.4174991846084595, "learning_rate": 1.9812461462883496e-05, "loss": 0.3359, "step": 2770 }, { "epoch": 0.0618544062441373, "grad_norm": 0.5177382826805115, "learning_rate": 1.9811785951148744e-05, "loss": 0.3957, "step": 2775 }, { "epoch": 0.061965855624757364, "grad_norm": 0.6851249933242798, "learning_rate": 1.981110923656477e-05, "loss": 0.4732, "step": 2780 }, { "epoch": 0.062077305005377434, "grad_norm": 0.6137966513633728, "learning_rate": 1.981043131921453e-05, "loss": 0.4677, "step": 2785 }, { "epoch": 0.0621887543859975, "grad_norm": 0.6141671538352966, "learning_rate": 1.980975219918114e-05, "loss": 0.4041, "step": 2790 }, { "epoch": 0.062300203766617566, "grad_norm": 0.5481983423233032, "learning_rate": 1.9809071876547848e-05, "loss": 0.4604, "step": 2795 }, { "epoch": 0.062411653147237636, "grad_norm": 0.40346822142601013, "learning_rate": 1.9808390351398063e-05, "loss": 0.443, "step": 2800 }, { "epoch": 0.0625231025278577, "grad_norm": 0.6395840644836426, "learning_rate": 1.9807707623815323e-05, "loss": 0.38, "step": 2805 }, { "epoch": 0.06263455190847776, "grad_norm": 0.6557417511940002, "learning_rate": 1.9807023693883337e-05, "loss": 0.378, "step": 2810 }, { "epoch": 0.06274600128909784, "grad_norm": 0.6313578486442566, "learning_rate": 1.9806338561685946e-05, "loss": 0.5004, "step": 2815 }, { "epoch": 0.0628574506697179, "grad_norm": 0.4933686852455139, "learning_rate": 1.9805652227307137e-05, "loss": 0.3973, "step": 2820 }, { "epoch": 0.06296890005033796, "grad_norm": 0.5198767781257629, "learning_rate": 1.9804964690831055e-05, "loss": 0.4025, "step": 2825 }, { "epoch": 0.06308034943095804, "grad_norm": 0.45765766501426697, "learning_rate": 1.9804275952341983e-05, "loss": 0.4031, "step": 2830 }, { "epoch": 0.0631917988115781, "grad_norm": 0.5890164971351624, "learning_rate": 1.980358601192436e-05, "loss": 0.5368, "step": 2835 }, { "epoch": 0.06330324819219817, "grad_norm": 0.44444313645362854, "learning_rate": 1.9802894869662757e-05, "loss": 0.3634, "step": 2840 }, { "epoch": 0.06341469757281824, "grad_norm": 0.4453275501728058, "learning_rate": 1.9802202525641915e-05, "loss": 0.2877, "step": 2845 }, { "epoch": 0.0635261469534383, "grad_norm": 0.5745603442192078, "learning_rate": 1.9801508979946703e-05, "loss": 0.3674, "step": 2850 }, { "epoch": 0.06363759633405837, "grad_norm": 0.5141566395759583, "learning_rate": 1.980081423266215e-05, "loss": 0.3664, "step": 2855 }, { "epoch": 0.06374904571467845, "grad_norm": 0.5238634347915649, "learning_rate": 1.980011828387342e-05, "loss": 0.4089, "step": 2860 }, { "epoch": 0.06386049509529851, "grad_norm": 0.47008633613586426, "learning_rate": 1.979942113366583e-05, "loss": 0.4745, "step": 2865 }, { "epoch": 0.06397194447591857, "grad_norm": 0.5868954658508301, "learning_rate": 1.9798722782124854e-05, "loss": 0.3342, "step": 2870 }, { "epoch": 0.06408339385653865, "grad_norm": 0.5734167098999023, "learning_rate": 1.9798023229336097e-05, "loss": 0.5794, "step": 2875 }, { "epoch": 0.06419484323715871, "grad_norm": 0.6237195730209351, "learning_rate": 1.9797322475385323e-05, "loss": 0.5441, "step": 2880 }, { "epoch": 0.06430629261777877, "grad_norm": 0.7441937923431396, "learning_rate": 1.9796620520358436e-05, "loss": 0.3542, "step": 2885 }, { "epoch": 0.06441774199839885, "grad_norm": 0.6220014691352844, "learning_rate": 1.979591736434149e-05, "loss": 0.4135, "step": 2890 }, { "epoch": 0.06452919137901891, "grad_norm": 0.3563708961009979, "learning_rate": 1.9795213007420692e-05, "loss": 0.3922, "step": 2895 }, { "epoch": 0.06464064075963898, "grad_norm": 0.652351975440979, "learning_rate": 1.9794507449682383e-05, "loss": 0.5544, "step": 2900 }, { "epoch": 0.06475209014025904, "grad_norm": 0.6000990271568298, "learning_rate": 1.9793800691213065e-05, "loss": 0.3983, "step": 2905 }, { "epoch": 0.06486353952087912, "grad_norm": 0.6168727874755859, "learning_rate": 1.979309273209938e-05, "loss": 0.4477, "step": 2910 }, { "epoch": 0.06497498890149918, "grad_norm": 0.6351516246795654, "learning_rate": 1.9792383572428112e-05, "loss": 0.5369, "step": 2915 }, { "epoch": 0.06508643828211924, "grad_norm": 0.8037847280502319, "learning_rate": 1.9791673212286208e-05, "loss": 0.5326, "step": 2920 }, { "epoch": 0.06519788766273932, "grad_norm": 0.519878089427948, "learning_rate": 1.9790961651760744e-05, "loss": 0.3582, "step": 2925 }, { "epoch": 0.06530933704335938, "grad_norm": 0.6907233595848083, "learning_rate": 1.9790248890938958e-05, "loss": 0.4825, "step": 2930 }, { "epoch": 0.06542078642397944, "grad_norm": 0.5148009657859802, "learning_rate": 1.978953492990823e-05, "loss": 0.5087, "step": 2935 }, { "epoch": 0.06553223580459952, "grad_norm": 0.618364155292511, "learning_rate": 1.978881976875608e-05, "loss": 0.4869, "step": 2940 }, { "epoch": 0.06564368518521958, "grad_norm": 0.7060839533805847, "learning_rate": 1.9788103407570187e-05, "loss": 0.3459, "step": 2945 }, { "epoch": 0.06575513456583965, "grad_norm": 0.5656926035881042, "learning_rate": 1.978738584643837e-05, "loss": 0.4501, "step": 2950 }, { "epoch": 0.06586658394645972, "grad_norm": 0.626718282699585, "learning_rate": 1.978666708544859e-05, "loss": 0.341, "step": 2955 }, { "epoch": 0.06597803332707979, "grad_norm": 0.37863433361053467, "learning_rate": 1.978594712468897e-05, "loss": 0.4533, "step": 2960 }, { "epoch": 0.06608948270769985, "grad_norm": 0.6492586731910706, "learning_rate": 1.978522596424777e-05, "loss": 0.5158, "step": 2965 }, { "epoch": 0.06620093208831992, "grad_norm": 0.6117286086082458, "learning_rate": 1.978450360421339e-05, "loss": 0.4592, "step": 2970 }, { "epoch": 0.06631238146893999, "grad_norm": 0.5867244005203247, "learning_rate": 1.9783780044674402e-05, "loss": 0.2563, "step": 2975 }, { "epoch": 0.06642383084956005, "grad_norm": 0.4272123873233795, "learning_rate": 1.9783055285719498e-05, "loss": 0.3653, "step": 2980 }, { "epoch": 0.06653528023018013, "grad_norm": 0.5771167278289795, "learning_rate": 1.9782329327437524e-05, "loss": 0.3544, "step": 2985 }, { "epoch": 0.06664672961080019, "grad_norm": 0.776767373085022, "learning_rate": 1.9781602169917485e-05, "loss": 0.5062, "step": 2990 }, { "epoch": 0.06675817899142025, "grad_norm": 0.537855327129364, "learning_rate": 1.9780873813248525e-05, "loss": 0.4165, "step": 2995 }, { "epoch": 0.06686962837204032, "grad_norm": 0.46249496936798096, "learning_rate": 1.9780144257519928e-05, "loss": 0.4141, "step": 3000 }, { "epoch": 0.06698107775266039, "grad_norm": 0.6784607768058777, "learning_rate": 1.977941350282114e-05, "loss": 0.3485, "step": 3005 }, { "epoch": 0.06709252713328046, "grad_norm": 0.3660302758216858, "learning_rate": 1.977868154924174e-05, "loss": 0.4052, "step": 3010 }, { "epoch": 0.06720397651390052, "grad_norm": 0.5289759039878845, "learning_rate": 1.9777948396871464e-05, "loss": 0.3597, "step": 3015 }, { "epoch": 0.0673154258945206, "grad_norm": 0.7300575971603394, "learning_rate": 1.977721404580019e-05, "loss": 0.5077, "step": 3020 }, { "epoch": 0.06742687527514066, "grad_norm": 0.6065353155136108, "learning_rate": 1.9776478496117937e-05, "loss": 0.3955, "step": 3025 }, { "epoch": 0.06753832465576072, "grad_norm": 0.3663788139820099, "learning_rate": 1.9775741747914886e-05, "loss": 0.3011, "step": 3030 }, { "epoch": 0.0676497740363808, "grad_norm": 0.6221305131912231, "learning_rate": 1.9775003801281355e-05, "loss": 0.403, "step": 3035 }, { "epoch": 0.06776122341700086, "grad_norm": 0.6479603052139282, "learning_rate": 1.9774264656307805e-05, "loss": 0.4332, "step": 3040 }, { "epoch": 0.06787267279762092, "grad_norm": 0.5434275269508362, "learning_rate": 1.9773524313084857e-05, "loss": 0.4223, "step": 3045 }, { "epoch": 0.067984122178241, "grad_norm": 0.5634822249412537, "learning_rate": 1.977278277170327e-05, "loss": 0.36, "step": 3050 }, { "epoch": 0.06809557155886106, "grad_norm": 0.4549255073070526, "learning_rate": 1.9772040032253947e-05, "loss": 0.3791, "step": 3055 }, { "epoch": 0.06820702093948112, "grad_norm": 1.17770516872406, "learning_rate": 1.9771296094827948e-05, "loss": 0.4246, "step": 3060 }, { "epoch": 0.0683184703201012, "grad_norm": 0.6360183954238892, "learning_rate": 1.9770550959516466e-05, "loss": 0.5389, "step": 3065 }, { "epoch": 0.06842991970072126, "grad_norm": 0.4954073131084442, "learning_rate": 1.9769804626410856e-05, "loss": 0.35, "step": 3070 }, { "epoch": 0.06854136908134133, "grad_norm": 0.496670126914978, "learning_rate": 1.976905709560261e-05, "loss": 0.3511, "step": 3075 }, { "epoch": 0.0686528184619614, "grad_norm": 0.574720561504364, "learning_rate": 1.976830836718337e-05, "loss": 0.4003, "step": 3080 }, { "epoch": 0.06876426784258147, "grad_norm": 0.695163905620575, "learning_rate": 1.976755844124492e-05, "loss": 0.3023, "step": 3085 }, { "epoch": 0.06887571722320153, "grad_norm": 0.7089998722076416, "learning_rate": 1.9766807317879204e-05, "loss": 0.3988, "step": 3090 }, { "epoch": 0.06898716660382159, "grad_norm": 0.5017579197883606, "learning_rate": 1.9766054997178297e-05, "loss": 0.4265, "step": 3095 }, { "epoch": 0.06909861598444167, "grad_norm": 0.43221917748451233, "learning_rate": 1.9765301479234428e-05, "loss": 0.452, "step": 3100 }, { "epoch": 0.06921006536506173, "grad_norm": 0.385077565908432, "learning_rate": 1.9764546764139978e-05, "loss": 0.3379, "step": 3105 }, { "epoch": 0.0693215147456818, "grad_norm": 0.5521353483200073, "learning_rate": 1.9763790851987465e-05, "loss": 0.4577, "step": 3110 }, { "epoch": 0.06943296412630187, "grad_norm": 0.6436454653739929, "learning_rate": 1.9763033742869556e-05, "loss": 0.4443, "step": 3115 }, { "epoch": 0.06954441350692193, "grad_norm": 0.48646727204322815, "learning_rate": 1.976227543687907e-05, "loss": 0.3216, "step": 3120 }, { "epoch": 0.069655862887542, "grad_norm": 0.3888736665248871, "learning_rate": 1.976151593410897e-05, "loss": 0.4476, "step": 3125 }, { "epoch": 0.06976731226816207, "grad_norm": 0.5393504500389099, "learning_rate": 1.976075523465236e-05, "loss": 0.4556, "step": 3130 }, { "epoch": 0.06987876164878214, "grad_norm": 0.6784513592720032, "learning_rate": 1.9759993338602506e-05, "loss": 0.4144, "step": 3135 }, { "epoch": 0.0699902110294022, "grad_norm": 0.665978729724884, "learning_rate": 1.97592302460528e-05, "loss": 0.4303, "step": 3140 }, { "epoch": 0.07010166041002228, "grad_norm": 0.5168861746788025, "learning_rate": 1.9758465957096796e-05, "loss": 0.4082, "step": 3145 }, { "epoch": 0.07021310979064234, "grad_norm": 0.5755696296691895, "learning_rate": 1.975770047182819e-05, "loss": 0.3753, "step": 3150 }, { "epoch": 0.0703245591712624, "grad_norm": 0.6826760172843933, "learning_rate": 1.9756933790340823e-05, "loss": 0.4313, "step": 3155 }, { "epoch": 0.07043600855188248, "grad_norm": 0.4687601923942566, "learning_rate": 1.9756165912728687e-05, "loss": 0.4579, "step": 3160 }, { "epoch": 0.07054745793250254, "grad_norm": 0.5872018933296204, "learning_rate": 1.975539683908591e-05, "loss": 0.4517, "step": 3165 }, { "epoch": 0.0706589073131226, "grad_norm": 0.6635053157806396, "learning_rate": 1.9754626569506786e-05, "loss": 0.3935, "step": 3170 }, { "epoch": 0.07077035669374268, "grad_norm": 0.8101974129676819, "learning_rate": 1.975385510408574e-05, "loss": 0.4217, "step": 3175 }, { "epoch": 0.07088180607436274, "grad_norm": 0.488703191280365, "learning_rate": 1.9753082442917346e-05, "loss": 0.3935, "step": 3180 }, { "epoch": 0.0709932554549828, "grad_norm": 0.4950294494628906, "learning_rate": 1.9752308586096326e-05, "loss": 0.4707, "step": 3185 }, { "epoch": 0.07110470483560287, "grad_norm": 0.5157277584075928, "learning_rate": 1.975153353371755e-05, "loss": 0.4748, "step": 3190 }, { "epoch": 0.07121615421622295, "grad_norm": 0.5851638913154602, "learning_rate": 1.9750757285876032e-05, "loss": 0.4026, "step": 3195 }, { "epoch": 0.07132760359684301, "grad_norm": 0.5148910880088806, "learning_rate": 1.9749979842666934e-05, "loss": 0.4397, "step": 3200 }, { "epoch": 0.07143905297746307, "grad_norm": 0.504661500453949, "learning_rate": 1.974920120418557e-05, "loss": 0.4302, "step": 3205 }, { "epoch": 0.07155050235808315, "grad_norm": 0.6598942279815674, "learning_rate": 1.9748421370527383e-05, "loss": 0.4024, "step": 3210 }, { "epoch": 0.07166195173870321, "grad_norm": 0.5633904933929443, "learning_rate": 1.974764034178799e-05, "loss": 0.3654, "step": 3215 }, { "epoch": 0.07177340111932327, "grad_norm": 0.5808898210525513, "learning_rate": 1.974685811806313e-05, "loss": 0.4784, "step": 3220 }, { "epoch": 0.07188485049994335, "grad_norm": 0.6391786932945251, "learning_rate": 1.9746074699448697e-05, "loss": 0.4359, "step": 3225 }, { "epoch": 0.07199629988056341, "grad_norm": 0.4155711233615875, "learning_rate": 1.974529008604073e-05, "loss": 0.4878, "step": 3230 }, { "epoch": 0.07210774926118348, "grad_norm": 0.5104745626449585, "learning_rate": 1.9744504277935425e-05, "loss": 0.4309, "step": 3235 }, { "epoch": 0.07221919864180355, "grad_norm": 0.7546597719192505, "learning_rate": 1.9743717275229114e-05, "loss": 0.3166, "step": 3240 }, { "epoch": 0.07233064802242362, "grad_norm": 0.5601435303688049, "learning_rate": 1.974292907801827e-05, "loss": 0.4617, "step": 3245 }, { "epoch": 0.07244209740304368, "grad_norm": 0.3191545009613037, "learning_rate": 1.9742139686399527e-05, "loss": 0.5098, "step": 3250 }, { "epoch": 0.07255354678366376, "grad_norm": 0.4238179922103882, "learning_rate": 1.974134910046966e-05, "loss": 0.2844, "step": 3255 }, { "epoch": 0.07266499616428382, "grad_norm": 0.5163947343826294, "learning_rate": 1.9740557320325578e-05, "loss": 0.348, "step": 3260 }, { "epoch": 0.07277644554490388, "grad_norm": 0.5601313710212708, "learning_rate": 1.973976434606436e-05, "loss": 0.5619, "step": 3265 }, { "epoch": 0.07288789492552396, "grad_norm": 0.6028194427490234, "learning_rate": 1.9738970177783206e-05, "loss": 0.4684, "step": 3270 }, { "epoch": 0.07299934430614402, "grad_norm": 0.663697361946106, "learning_rate": 1.9738174815579486e-05, "loss": 0.5199, "step": 3275 }, { "epoch": 0.07311079368676408, "grad_norm": 0.39535772800445557, "learning_rate": 1.97373782595507e-05, "loss": 0.2407, "step": 3280 }, { "epoch": 0.07322224306738415, "grad_norm": 0.6364858746528625, "learning_rate": 1.9736580509794503e-05, "loss": 0.3724, "step": 3285 }, { "epoch": 0.07333369244800422, "grad_norm": 0.5598808526992798, "learning_rate": 1.973578156640869e-05, "loss": 0.4292, "step": 3290 }, { "epoch": 0.07344514182862429, "grad_norm": 0.5156586170196533, "learning_rate": 1.97349814294912e-05, "loss": 0.4021, "step": 3295 }, { "epoch": 0.07355659120924435, "grad_norm": 0.6328551173210144, "learning_rate": 1.9734180099140135e-05, "loss": 0.4461, "step": 3300 }, { "epoch": 0.07366804058986443, "grad_norm": 0.5238905549049377, "learning_rate": 1.9733377575453724e-05, "loss": 0.2508, "step": 3305 }, { "epoch": 0.07377948997048449, "grad_norm": 0.5785579085350037, "learning_rate": 1.9732573858530353e-05, "loss": 0.4404, "step": 3310 }, { "epoch": 0.07389093935110455, "grad_norm": 0.7258238196372986, "learning_rate": 1.973176894846855e-05, "loss": 0.5267, "step": 3315 }, { "epoch": 0.07400238873172463, "grad_norm": 0.6168790459632874, "learning_rate": 1.9730962845366993e-05, "loss": 0.3609, "step": 3320 }, { "epoch": 0.07411383811234469, "grad_norm": 0.6070130467414856, "learning_rate": 1.9730155549324502e-05, "loss": 0.3515, "step": 3325 }, { "epoch": 0.07422528749296475, "grad_norm": 0.7390276789665222, "learning_rate": 1.9729347060440046e-05, "loss": 0.413, "step": 3330 }, { "epoch": 0.07433673687358483, "grad_norm": 0.7556608319282532, "learning_rate": 1.9728537378812738e-05, "loss": 0.4181, "step": 3335 }, { "epoch": 0.07444818625420489, "grad_norm": 0.38995063304901123, "learning_rate": 1.9727726504541838e-05, "loss": 0.4586, "step": 3340 }, { "epoch": 0.07455963563482496, "grad_norm": 0.4750341773033142, "learning_rate": 1.9726914437726763e-05, "loss": 0.4457, "step": 3345 }, { "epoch": 0.07467108501544503, "grad_norm": 0.6445577144622803, "learning_rate": 1.972610117846705e-05, "loss": 0.349, "step": 3350 }, { "epoch": 0.0747825343960651, "grad_norm": 0.6933650970458984, "learning_rate": 1.9725286726862412e-05, "loss": 0.4251, "step": 3355 }, { "epoch": 0.07489398377668516, "grad_norm": 0.4839894771575928, "learning_rate": 1.972447108301269e-05, "loss": 0.4288, "step": 3360 }, { "epoch": 0.07500543315730523, "grad_norm": 0.4249872863292694, "learning_rate": 1.9723654247017867e-05, "loss": 0.3755, "step": 3365 }, { "epoch": 0.0751168825379253, "grad_norm": 0.3455201983451843, "learning_rate": 1.9722836218978094e-05, "loss": 0.4046, "step": 3370 }, { "epoch": 0.07522833191854536, "grad_norm": 0.5376846194267273, "learning_rate": 1.972201699899365e-05, "loss": 0.3663, "step": 3375 }, { "epoch": 0.07533978129916542, "grad_norm": 0.48555299639701843, "learning_rate": 1.9721196587164963e-05, "loss": 0.2518, "step": 3380 }, { "epoch": 0.0754512306797855, "grad_norm": 0.5334995985031128, "learning_rate": 1.972037498359261e-05, "loss": 0.4687, "step": 3385 }, { "epoch": 0.07556268006040556, "grad_norm": 0.604278028011322, "learning_rate": 1.9719552188377314e-05, "loss": 0.3836, "step": 3390 }, { "epoch": 0.07567412944102563, "grad_norm": 0.5833326578140259, "learning_rate": 1.971872820161994e-05, "loss": 0.4573, "step": 3395 }, { "epoch": 0.0757855788216457, "grad_norm": 0.5681135654449463, "learning_rate": 1.971790302342151e-05, "loss": 0.4283, "step": 3400 }, { "epoch": 0.07589702820226576, "grad_norm": 0.71799236536026, "learning_rate": 1.971707665388318e-05, "loss": 0.2283, "step": 3405 }, { "epoch": 0.07600847758288583, "grad_norm": 0.3602961599826813, "learning_rate": 1.9716249093106255e-05, "loss": 0.4057, "step": 3410 }, { "epoch": 0.0761199269635059, "grad_norm": 0.37014782428741455, "learning_rate": 1.9715420341192192e-05, "loss": 0.4281, "step": 3415 }, { "epoch": 0.07623137634412597, "grad_norm": 0.6142109036445618, "learning_rate": 1.971459039824258e-05, "loss": 0.4084, "step": 3420 }, { "epoch": 0.07634282572474603, "grad_norm": 0.5882206559181213, "learning_rate": 1.9713759264359175e-05, "loss": 0.3734, "step": 3425 }, { "epoch": 0.0764542751053661, "grad_norm": 0.7564554810523987, "learning_rate": 1.9712926939643864e-05, "loss": 0.3457, "step": 3430 }, { "epoch": 0.07656572448598617, "grad_norm": 0.5783476829528809, "learning_rate": 1.9712093424198682e-05, "loss": 0.4603, "step": 3435 }, { "epoch": 0.07667717386660623, "grad_norm": 0.5960418581962585, "learning_rate": 1.971125871812581e-05, "loss": 0.4154, "step": 3440 }, { "epoch": 0.07678862324722631, "grad_norm": 0.5827411413192749, "learning_rate": 1.971042282152758e-05, "loss": 0.4036, "step": 3445 }, { "epoch": 0.07690007262784637, "grad_norm": 0.5955131649971008, "learning_rate": 1.970958573450646e-05, "loss": 0.5522, "step": 3450 }, { "epoch": 0.07701152200846643, "grad_norm": 0.5336333513259888, "learning_rate": 1.9708747457165083e-05, "loss": 0.3985, "step": 3455 }, { "epoch": 0.07712297138908651, "grad_norm": 0.5097236633300781, "learning_rate": 1.9707907989606204e-05, "loss": 0.407, "step": 3460 }, { "epoch": 0.07723442076970657, "grad_norm": 0.6677082180976868, "learning_rate": 1.970706733193274e-05, "loss": 0.4819, "step": 3465 }, { "epoch": 0.07734587015032664, "grad_norm": 0.4805845320224762, "learning_rate": 1.9706225484247746e-05, "loss": 0.4058, "step": 3470 }, { "epoch": 0.0774573195309467, "grad_norm": 0.5422244668006897, "learning_rate": 1.9705382446654432e-05, "loss": 0.5239, "step": 3475 }, { "epoch": 0.07756876891156678, "grad_norm": 0.6769611239433289, "learning_rate": 1.9704538219256143e-05, "loss": 0.386, "step": 3480 }, { "epoch": 0.07768021829218684, "grad_norm": 0.447390079498291, "learning_rate": 1.9703692802156373e-05, "loss": 0.3574, "step": 3485 }, { "epoch": 0.0777916676728069, "grad_norm": 0.6275390982627869, "learning_rate": 1.9702846195458768e-05, "loss": 0.4229, "step": 3490 }, { "epoch": 0.07790311705342698, "grad_norm": 0.6409914493560791, "learning_rate": 1.9701998399267116e-05, "loss": 0.4774, "step": 3495 }, { "epoch": 0.07801456643404704, "grad_norm": 0.7508968710899353, "learning_rate": 1.9701149413685346e-05, "loss": 0.5063, "step": 3500 }, { "epoch": 0.0781260158146671, "grad_norm": 0.4104915261268616, "learning_rate": 1.970029923881754e-05, "loss": 0.386, "step": 3505 }, { "epoch": 0.07823746519528718, "grad_norm": 0.4921630024909973, "learning_rate": 1.969944787476792e-05, "loss": 0.4103, "step": 3510 }, { "epoch": 0.07834891457590724, "grad_norm": 0.6276881694793701, "learning_rate": 1.9698595321640864e-05, "loss": 0.3788, "step": 3515 }, { "epoch": 0.07846036395652731, "grad_norm": 0.3884750008583069, "learning_rate": 1.969774157954088e-05, "loss": 0.4529, "step": 3520 }, { "epoch": 0.07857181333714738, "grad_norm": 0.7288042902946472, "learning_rate": 1.9696886648572632e-05, "loss": 0.4934, "step": 3525 }, { "epoch": 0.07868326271776745, "grad_norm": 0.7034710645675659, "learning_rate": 1.9696030528840932e-05, "loss": 0.42, "step": 3530 }, { "epoch": 0.07879471209838751, "grad_norm": 0.5408551692962646, "learning_rate": 1.9695173220450733e-05, "loss": 0.5281, "step": 3535 }, { "epoch": 0.07890616147900759, "grad_norm": 0.6238879561424255, "learning_rate": 1.9694314723507128e-05, "loss": 0.4802, "step": 3540 }, { "epoch": 0.07901761085962765, "grad_norm": 0.5341750979423523, "learning_rate": 1.969345503811537e-05, "loss": 0.5339, "step": 3545 }, { "epoch": 0.07912906024024771, "grad_norm": 0.6059457063674927, "learning_rate": 1.969259416438084e-05, "loss": 0.3619, "step": 3550 }, { "epoch": 0.07924050962086779, "grad_norm": 0.5542227029800415, "learning_rate": 1.9691732102409086e-05, "loss": 0.4189, "step": 3555 }, { "epoch": 0.07935195900148785, "grad_norm": 0.46328842639923096, "learning_rate": 1.9690868852305782e-05, "loss": 0.3014, "step": 3560 }, { "epoch": 0.07946340838210791, "grad_norm": 0.5214948058128357, "learning_rate": 1.9690004414176764e-05, "loss": 0.4184, "step": 3565 }, { "epoch": 0.07957485776272798, "grad_norm": 0.7112562656402588, "learning_rate": 1.9689138788127994e-05, "loss": 0.4326, "step": 3570 }, { "epoch": 0.07968630714334805, "grad_norm": 0.8344321250915527, "learning_rate": 1.9688271974265603e-05, "loss": 0.4086, "step": 3575 }, { "epoch": 0.07979775652396812, "grad_norm": 0.587770938873291, "learning_rate": 1.9687403972695844e-05, "loss": 0.3255, "step": 3580 }, { "epoch": 0.07990920590458818, "grad_norm": 0.6727814674377441, "learning_rate": 1.9686534783525136e-05, "loss": 0.4838, "step": 3585 }, { "epoch": 0.08002065528520826, "grad_norm": 0.7931737899780273, "learning_rate": 1.9685664406860033e-05, "loss": 0.5761, "step": 3590 }, { "epoch": 0.08013210466582832, "grad_norm": 0.49970170855522156, "learning_rate": 1.9684792842807235e-05, "loss": 0.3854, "step": 3595 }, { "epoch": 0.08024355404644838, "grad_norm": 0.5609394907951355, "learning_rate": 1.968392009147359e-05, "loss": 0.3726, "step": 3600 }, { "epoch": 0.08035500342706846, "grad_norm": 0.6844983696937561, "learning_rate": 1.968304615296609e-05, "loss": 0.4116, "step": 3605 }, { "epoch": 0.08046645280768852, "grad_norm": 0.6218194365501404, "learning_rate": 1.9682171027391873e-05, "loss": 0.4588, "step": 3610 }, { "epoch": 0.08057790218830858, "grad_norm": 0.49494484066963196, "learning_rate": 1.9681294714858224e-05, "loss": 0.4596, "step": 3615 }, { "epoch": 0.08068935156892866, "grad_norm": 0.6498164534568787, "learning_rate": 1.9680417215472566e-05, "loss": 0.3559, "step": 3620 }, { "epoch": 0.08080080094954872, "grad_norm": 0.5791107416152954, "learning_rate": 1.9679538529342487e-05, "loss": 0.554, "step": 3625 }, { "epoch": 0.08091225033016879, "grad_norm": 0.5231840014457703, "learning_rate": 1.9678658656575692e-05, "loss": 0.4091, "step": 3630 }, { "epoch": 0.08102369971078886, "grad_norm": 0.6502806544303894, "learning_rate": 1.9677777597280055e-05, "loss": 0.3455, "step": 3635 }, { "epoch": 0.08113514909140893, "grad_norm": 0.6524990200996399, "learning_rate": 1.967689535156359e-05, "loss": 0.3828, "step": 3640 }, { "epoch": 0.08124659847202899, "grad_norm": 0.5977895855903625, "learning_rate": 1.9676011919534447e-05, "loss": 0.4543, "step": 3645 }, { "epoch": 0.08135804785264907, "grad_norm": 0.6692437529563904, "learning_rate": 1.9675127301300927e-05, "loss": 0.3642, "step": 3650 }, { "epoch": 0.08146949723326913, "grad_norm": 0.6440483927726746, "learning_rate": 1.967424149697148e-05, "loss": 0.47, "step": 3655 }, { "epoch": 0.08158094661388919, "grad_norm": 0.5908140540122986, "learning_rate": 1.9673354506654703e-05, "loss": 0.424, "step": 3660 }, { "epoch": 0.08169239599450925, "grad_norm": 0.5895307064056396, "learning_rate": 1.967246633045933e-05, "loss": 0.3909, "step": 3665 }, { "epoch": 0.08180384537512933, "grad_norm": 0.5629228353500366, "learning_rate": 1.967157696849424e-05, "loss": 0.5223, "step": 3670 }, { "epoch": 0.0819152947557494, "grad_norm": 0.645937979221344, "learning_rate": 1.9670686420868472e-05, "loss": 0.3936, "step": 3675 }, { "epoch": 0.08202674413636946, "grad_norm": 0.5599381923675537, "learning_rate": 1.9669794687691192e-05, "loss": 0.4481, "step": 3680 }, { "epoch": 0.08213819351698953, "grad_norm": 0.5673943758010864, "learning_rate": 1.9668901769071723e-05, "loss": 0.4666, "step": 3685 }, { "epoch": 0.0822496428976096, "grad_norm": 0.5841967463493347, "learning_rate": 1.966800766511953e-05, "loss": 0.4553, "step": 3690 }, { "epoch": 0.08236109227822966, "grad_norm": 0.6012502312660217, "learning_rate": 1.9667112375944226e-05, "loss": 0.4002, "step": 3695 }, { "epoch": 0.08247254165884974, "grad_norm": 0.874318540096283, "learning_rate": 1.966621590165556e-05, "loss": 0.3171, "step": 3700 }, { "epoch": 0.0825839910394698, "grad_norm": 0.5734549760818481, "learning_rate": 1.9665318242363437e-05, "loss": 0.4828, "step": 3705 }, { "epoch": 0.08269544042008986, "grad_norm": 0.4378792345523834, "learning_rate": 1.96644193981779e-05, "loss": 0.458, "step": 3710 }, { "epoch": 0.08280688980070994, "grad_norm": 0.5063863396644592, "learning_rate": 1.9663519369209147e-05, "loss": 0.4166, "step": 3715 }, { "epoch": 0.08291833918133, "grad_norm": 0.7026726603507996, "learning_rate": 1.9662618155567507e-05, "loss": 0.4646, "step": 3720 }, { "epoch": 0.08302978856195006, "grad_norm": 0.5438389778137207, "learning_rate": 1.9661715757363467e-05, "loss": 0.3754, "step": 3725 }, { "epoch": 0.08314123794257014, "grad_norm": 0.687262773513794, "learning_rate": 1.966081217470765e-05, "loss": 0.2916, "step": 3730 }, { "epoch": 0.0832526873231902, "grad_norm": 0.5479100942611694, "learning_rate": 1.9659907407710836e-05, "loss": 0.4759, "step": 3735 }, { "epoch": 0.08336413670381027, "grad_norm": 0.39713597297668457, "learning_rate": 1.965900145648393e-05, "loss": 0.5065, "step": 3740 }, { "epoch": 0.08347558608443034, "grad_norm": 0.48706549406051636, "learning_rate": 1.9658094321138e-05, "loss": 0.4385, "step": 3745 }, { "epoch": 0.0835870354650504, "grad_norm": 0.45029741525650024, "learning_rate": 1.9657186001784262e-05, "loss": 0.4206, "step": 3750 }, { "epoch": 0.08369848484567047, "grad_norm": 0.5025975108146667, "learning_rate": 1.965627649853406e-05, "loss": 0.3257, "step": 3755 }, { "epoch": 0.08380993422629053, "grad_norm": 0.3801042437553406, "learning_rate": 1.9655365811498894e-05, "loss": 0.3938, "step": 3760 }, { "epoch": 0.08392138360691061, "grad_norm": 0.5360175967216492, "learning_rate": 1.9654453940790405e-05, "loss": 0.5218, "step": 3765 }, { "epoch": 0.08403283298753067, "grad_norm": 0.5435302257537842, "learning_rate": 1.9653540886520387e-05, "loss": 0.3871, "step": 3770 }, { "epoch": 0.08414428236815073, "grad_norm": 0.545606255531311, "learning_rate": 1.965262664880077e-05, "loss": 0.3986, "step": 3775 }, { "epoch": 0.08425573174877081, "grad_norm": 0.571175754070282, "learning_rate": 1.9651711227743633e-05, "loss": 0.4242, "step": 3780 }, { "epoch": 0.08436718112939087, "grad_norm": 0.6667168140411377, "learning_rate": 1.9650794623461198e-05, "loss": 0.5204, "step": 3785 }, { "epoch": 0.08447863051001094, "grad_norm": 0.5052681565284729, "learning_rate": 1.9649876836065836e-05, "loss": 0.395, "step": 3790 }, { "epoch": 0.08459007989063101, "grad_norm": 0.6449118256568909, "learning_rate": 1.9648957865670057e-05, "loss": 0.451, "step": 3795 }, { "epoch": 0.08470152927125107, "grad_norm": 0.4299212098121643, "learning_rate": 1.9648037712386527e-05, "loss": 0.4676, "step": 3800 }, { "epoch": 0.08481297865187114, "grad_norm": 0.5785726308822632, "learning_rate": 1.964711637632804e-05, "loss": 0.4101, "step": 3805 }, { "epoch": 0.08492442803249121, "grad_norm": 0.38612961769104004, "learning_rate": 1.964619385760755e-05, "loss": 0.4847, "step": 3810 }, { "epoch": 0.08503587741311128, "grad_norm": 0.5214675664901733, "learning_rate": 1.9645270156338153e-05, "loss": 0.4197, "step": 3815 }, { "epoch": 0.08514732679373134, "grad_norm": 0.5284664034843445, "learning_rate": 1.9644345272633083e-05, "loss": 0.3452, "step": 3820 }, { "epoch": 0.08525877617435142, "grad_norm": 0.4067220985889435, "learning_rate": 1.9643419206605726e-05, "loss": 0.384, "step": 3825 }, { "epoch": 0.08537022555497148, "grad_norm": 0.44208478927612305, "learning_rate": 1.964249195836961e-05, "loss": 0.4052, "step": 3830 }, { "epoch": 0.08548167493559154, "grad_norm": 0.5821109414100647, "learning_rate": 1.964156352803841e-05, "loss": 0.5098, "step": 3835 }, { "epoch": 0.08559312431621162, "grad_norm": 0.5415028929710388, "learning_rate": 1.964063391572594e-05, "loss": 0.3839, "step": 3840 }, { "epoch": 0.08570457369683168, "grad_norm": 0.6177157163619995, "learning_rate": 1.9639703121546168e-05, "loss": 0.3516, "step": 3845 }, { "epoch": 0.08581602307745174, "grad_norm": 0.5327863097190857, "learning_rate": 1.9638771145613197e-05, "loss": 0.2842, "step": 3850 }, { "epoch": 0.08592747245807181, "grad_norm": 0.5437836050987244, "learning_rate": 1.9637837988041288e-05, "loss": 0.4526, "step": 3855 }, { "epoch": 0.08603892183869188, "grad_norm": 0.49541836977005005, "learning_rate": 1.9636903648944833e-05, "loss": 0.584, "step": 3860 }, { "epoch": 0.08615037121931195, "grad_norm": 0.4200514853000641, "learning_rate": 1.9635968128438376e-05, "loss": 0.4595, "step": 3865 }, { "epoch": 0.08626182059993201, "grad_norm": 0.41504156589508057, "learning_rate": 1.9635031426636603e-05, "loss": 0.4522, "step": 3870 }, { "epoch": 0.08637326998055209, "grad_norm": 0.5315532684326172, "learning_rate": 1.9634093543654355e-05, "loss": 0.4135, "step": 3875 }, { "epoch": 0.08648471936117215, "grad_norm": 0.6478207111358643, "learning_rate": 1.9633154479606597e-05, "loss": 0.4747, "step": 3880 }, { "epoch": 0.08659616874179221, "grad_norm": 0.593681275844574, "learning_rate": 1.9632214234608455e-05, "loss": 0.3559, "step": 3885 }, { "epoch": 0.08670761812241229, "grad_norm": 0.6857460737228394, "learning_rate": 1.9631272808775196e-05, "loss": 0.3462, "step": 3890 }, { "epoch": 0.08681906750303235, "grad_norm": 0.5769230723381042, "learning_rate": 1.9630330202222238e-05, "loss": 0.362, "step": 3895 }, { "epoch": 0.08693051688365241, "grad_norm": 0.6051479578018188, "learning_rate": 1.962938641506513e-05, "loss": 0.4291, "step": 3900 }, { "epoch": 0.08704196626427249, "grad_norm": 0.5377400517463684, "learning_rate": 1.9628441447419573e-05, "loss": 0.4074, "step": 3905 }, { "epoch": 0.08715341564489255, "grad_norm": 0.4809214174747467, "learning_rate": 1.9627495299401415e-05, "loss": 0.3452, "step": 3910 }, { "epoch": 0.08726486502551262, "grad_norm": 0.6425767540931702, "learning_rate": 1.9626547971126646e-05, "loss": 0.4252, "step": 3915 }, { "epoch": 0.0873763144061327, "grad_norm": 0.47710588574409485, "learning_rate": 1.9625599462711403e-05, "loss": 0.3737, "step": 3920 }, { "epoch": 0.08748776378675276, "grad_norm": 0.39346760511398315, "learning_rate": 1.9624649774271962e-05, "loss": 0.3413, "step": 3925 }, { "epoch": 0.08759921316737282, "grad_norm": 0.6022504568099976, "learning_rate": 1.9623698905924754e-05, "loss": 0.3669, "step": 3930 }, { "epoch": 0.0877106625479929, "grad_norm": 0.6236966252326965, "learning_rate": 1.962274685778634e-05, "loss": 0.4755, "step": 3935 }, { "epoch": 0.08782211192861296, "grad_norm": 0.4284822642803192, "learning_rate": 1.962179362997344e-05, "loss": 0.5267, "step": 3940 }, { "epoch": 0.08793356130923302, "grad_norm": 0.5431884527206421, "learning_rate": 1.96208392226029e-05, "loss": 0.3777, "step": 3945 }, { "epoch": 0.08804501068985308, "grad_norm": 0.4668923318386078, "learning_rate": 1.9619883635791745e-05, "loss": 0.378, "step": 3950 }, { "epoch": 0.08815646007047316, "grad_norm": 0.5473476648330688, "learning_rate": 1.9618926869657103e-05, "loss": 0.3816, "step": 3955 }, { "epoch": 0.08826790945109322, "grad_norm": 0.5855584740638733, "learning_rate": 1.961796892431628e-05, "loss": 0.4891, "step": 3960 }, { "epoch": 0.08837935883171329, "grad_norm": 0.587864100933075, "learning_rate": 1.96170097998867e-05, "loss": 0.5786, "step": 3965 }, { "epoch": 0.08849080821233336, "grad_norm": 0.4843202233314514, "learning_rate": 1.9616049496485954e-05, "loss": 0.4364, "step": 3970 }, { "epoch": 0.08860225759295343, "grad_norm": 0.5611356496810913, "learning_rate": 1.9615088014231765e-05, "loss": 0.4311, "step": 3975 }, { "epoch": 0.08871370697357349, "grad_norm": 0.6092740893363953, "learning_rate": 1.9614125353242e-05, "loss": 0.3972, "step": 3980 }, { "epoch": 0.08882515635419357, "grad_norm": 0.7620090842247009, "learning_rate": 1.9613161513634678e-05, "loss": 0.4782, "step": 3985 }, { "epoch": 0.08893660573481363, "grad_norm": 0.47108519077301025, "learning_rate": 1.9612196495527956e-05, "loss": 0.4183, "step": 3990 }, { "epoch": 0.08904805511543369, "grad_norm": 0.9083523154258728, "learning_rate": 1.961123029904014e-05, "loss": 0.539, "step": 3995 }, { "epoch": 0.08915950449605377, "grad_norm": 0.485842227935791, "learning_rate": 1.9610262924289674e-05, "loss": 0.3816, "step": 4000 }, { "epoch": 0.08927095387667383, "grad_norm": 0.4859134554862976, "learning_rate": 1.9609294371395154e-05, "loss": 0.4024, "step": 4005 }, { "epoch": 0.0893824032572939, "grad_norm": 0.5020127892494202, "learning_rate": 1.9608324640475315e-05, "loss": 0.3913, "step": 4010 }, { "epoch": 0.08949385263791397, "grad_norm": 0.5572119355201721, "learning_rate": 1.9607353731649045e-05, "loss": 0.346, "step": 4015 }, { "epoch": 0.08960530201853403, "grad_norm": 0.6680156588554382, "learning_rate": 1.960638164503536e-05, "loss": 0.4647, "step": 4020 }, { "epoch": 0.0897167513991541, "grad_norm": 0.48068952560424805, "learning_rate": 1.9605408380753438e-05, "loss": 0.4862, "step": 4025 }, { "epoch": 0.08982820077977417, "grad_norm": 0.6120688915252686, "learning_rate": 1.960443393892259e-05, "loss": 0.4473, "step": 4030 }, { "epoch": 0.08993965016039424, "grad_norm": 0.6175330877304077, "learning_rate": 1.9603458319662274e-05, "loss": 0.3948, "step": 4035 }, { "epoch": 0.0900510995410143, "grad_norm": 0.49549204111099243, "learning_rate": 1.9602481523092097e-05, "loss": 0.307, "step": 4040 }, { "epoch": 0.09016254892163436, "grad_norm": 0.5401844382286072, "learning_rate": 1.9601503549331803e-05, "loss": 0.4125, "step": 4045 }, { "epoch": 0.09027399830225444, "grad_norm": 0.6865761876106262, "learning_rate": 1.960052439850129e-05, "loss": 0.3436, "step": 4050 }, { "epoch": 0.0903854476828745, "grad_norm": 0.6297030448913574, "learning_rate": 1.9599544070720588e-05, "loss": 0.3901, "step": 4055 }, { "epoch": 0.09049689706349456, "grad_norm": 0.4896922707557678, "learning_rate": 1.959856256610988e-05, "loss": 0.4191, "step": 4060 }, { "epoch": 0.09060834644411464, "grad_norm": 0.38811594247817993, "learning_rate": 1.959757988478949e-05, "loss": 0.4305, "step": 4065 }, { "epoch": 0.0907197958247347, "grad_norm": 0.8301783204078674, "learning_rate": 1.9596596026879893e-05, "loss": 0.4498, "step": 4070 }, { "epoch": 0.09083124520535477, "grad_norm": 0.5923734307289124, "learning_rate": 1.9595610992501694e-05, "loss": 0.3545, "step": 4075 }, { "epoch": 0.09094269458597484, "grad_norm": 0.4734951853752136, "learning_rate": 1.9594624781775655e-05, "loss": 0.3817, "step": 4080 }, { "epoch": 0.0910541439665949, "grad_norm": 0.47061291337013245, "learning_rate": 1.9593637394822673e-05, "loss": 0.4529, "step": 4085 }, { "epoch": 0.09116559334721497, "grad_norm": 0.5556663274765015, "learning_rate": 1.9592648831763804e-05, "loss": 0.4694, "step": 4090 }, { "epoch": 0.09127704272783504, "grad_norm": 0.49430355429649353, "learning_rate": 1.9591659092720226e-05, "loss": 0.4425, "step": 4095 }, { "epoch": 0.09138849210845511, "grad_norm": 0.6225961446762085, "learning_rate": 1.9590668177813284e-05, "loss": 0.4133, "step": 4100 }, { "epoch": 0.09149994148907517, "grad_norm": 0.8296196460723877, "learning_rate": 1.958967608716445e-05, "loss": 0.599, "step": 4105 }, { "epoch": 0.09161139086969525, "grad_norm": 0.4928753972053528, "learning_rate": 1.9588682820895352e-05, "loss": 0.4644, "step": 4110 }, { "epoch": 0.09172284025031531, "grad_norm": 0.6599757671356201, "learning_rate": 1.958768837912775e-05, "loss": 0.3729, "step": 4115 }, { "epoch": 0.09183428963093537, "grad_norm": 0.436012864112854, "learning_rate": 1.958669276198356e-05, "loss": 0.4988, "step": 4120 }, { "epoch": 0.09194573901155545, "grad_norm": 0.41031414270401, "learning_rate": 1.958569596958483e-05, "loss": 0.2872, "step": 4125 }, { "epoch": 0.09205718839217551, "grad_norm": 0.5444058775901794, "learning_rate": 1.958469800205377e-05, "loss": 0.3723, "step": 4130 }, { "epoch": 0.09216863777279558, "grad_norm": 0.5348300337791443, "learning_rate": 1.9583698859512715e-05, "loss": 0.3589, "step": 4135 }, { "epoch": 0.09228008715341564, "grad_norm": 0.6096326112747192, "learning_rate": 1.958269854208416e-05, "loss": 0.3855, "step": 4140 }, { "epoch": 0.09239153653403571, "grad_norm": 0.4446529746055603, "learning_rate": 1.9581697049890723e-05, "loss": 0.4177, "step": 4145 }, { "epoch": 0.09250298591465578, "grad_norm": 0.6660559177398682, "learning_rate": 1.958069438305519e-05, "loss": 0.4488, "step": 4150 }, { "epoch": 0.09261443529527584, "grad_norm": 0.5148131847381592, "learning_rate": 1.957969054170048e-05, "loss": 0.4624, "step": 4155 }, { "epoch": 0.09272588467589592, "grad_norm": 0.43481868505477905, "learning_rate": 1.957868552594965e-05, "loss": 0.3381, "step": 4160 }, { "epoch": 0.09283733405651598, "grad_norm": 0.47481676936149597, "learning_rate": 1.957767933592591e-05, "loss": 0.4867, "step": 4165 }, { "epoch": 0.09294878343713604, "grad_norm": 0.5280261039733887, "learning_rate": 1.9576671971752615e-05, "loss": 0.4098, "step": 4170 }, { "epoch": 0.09306023281775612, "grad_norm": 0.5896614789962769, "learning_rate": 1.9575663433553257e-05, "loss": 0.4239, "step": 4175 }, { "epoch": 0.09317168219837618, "grad_norm": 0.6356634497642517, "learning_rate": 1.9574653721451472e-05, "loss": 0.4121, "step": 4180 }, { "epoch": 0.09328313157899625, "grad_norm": 0.6598286032676697, "learning_rate": 1.9573642835571046e-05, "loss": 0.3416, "step": 4185 }, { "epoch": 0.09339458095961632, "grad_norm": 0.506007194519043, "learning_rate": 1.9572630776035904e-05, "loss": 0.2665, "step": 4190 }, { "epoch": 0.09350603034023638, "grad_norm": 0.4136298894882202, "learning_rate": 1.9571617542970122e-05, "loss": 0.4143, "step": 4195 }, { "epoch": 0.09361747972085645, "grad_norm": 0.7333647608757019, "learning_rate": 1.957060313649791e-05, "loss": 0.506, "step": 4200 }, { "epoch": 0.09372892910147652, "grad_norm": 0.505630612373352, "learning_rate": 1.9569587556743627e-05, "loss": 0.4094, "step": 4205 }, { "epoch": 0.09384037848209659, "grad_norm": 0.5177976489067078, "learning_rate": 1.9568570803831776e-05, "loss": 0.2533, "step": 4210 }, { "epoch": 0.09395182786271665, "grad_norm": 0.43218210339546204, "learning_rate": 1.9567552877887e-05, "loss": 0.3037, "step": 4215 }, { "epoch": 0.09406327724333673, "grad_norm": 0.6493254899978638, "learning_rate": 1.9566533779034094e-05, "loss": 0.5223, "step": 4220 }, { "epoch": 0.09417472662395679, "grad_norm": 0.5709115862846375, "learning_rate": 1.9565513507397987e-05, "loss": 0.3979, "step": 4225 }, { "epoch": 0.09428617600457685, "grad_norm": 0.5558260083198547, "learning_rate": 1.9564492063103762e-05, "loss": 0.5181, "step": 4230 }, { "epoch": 0.09439762538519691, "grad_norm": 0.5373645424842834, "learning_rate": 1.9563469446276634e-05, "loss": 0.441, "step": 4235 }, { "epoch": 0.09450907476581699, "grad_norm": 0.5070239901542664, "learning_rate": 1.9562445657041967e-05, "loss": 0.4075, "step": 4240 }, { "epoch": 0.09462052414643705, "grad_norm": 0.510416567325592, "learning_rate": 1.956142069552528e-05, "loss": 0.4303, "step": 4245 }, { "epoch": 0.09473197352705712, "grad_norm": 0.6932322978973389, "learning_rate": 1.9560394561852214e-05, "loss": 0.4119, "step": 4250 }, { "epoch": 0.0948434229076772, "grad_norm": 0.5977771878242493, "learning_rate": 1.955936725614857e-05, "loss": 0.2711, "step": 4255 }, { "epoch": 0.09495487228829726, "grad_norm": 0.42069804668426514, "learning_rate": 1.955833877854029e-05, "loss": 0.4121, "step": 4260 }, { "epoch": 0.09506632166891732, "grad_norm": 0.6174266934394836, "learning_rate": 1.9557309129153454e-05, "loss": 0.3022, "step": 4265 }, { "epoch": 0.0951777710495374, "grad_norm": 0.8936811685562134, "learning_rate": 1.9556278308114287e-05, "loss": 0.4353, "step": 4270 }, { "epoch": 0.09528922043015746, "grad_norm": 0.6502985954284668, "learning_rate": 1.9555246315549166e-05, "loss": 0.3721, "step": 4275 }, { "epoch": 0.09540066981077752, "grad_norm": 0.4536944329738617, "learning_rate": 1.95542131515846e-05, "loss": 0.4085, "step": 4280 }, { "epoch": 0.0955121191913976, "grad_norm": 0.5095523595809937, "learning_rate": 1.955317881634725e-05, "loss": 0.3132, "step": 4285 }, { "epoch": 0.09562356857201766, "grad_norm": 0.4880094826221466, "learning_rate": 1.9552143309963917e-05, "loss": 0.4069, "step": 4290 }, { "epoch": 0.09573501795263772, "grad_norm": 1.030686855316162, "learning_rate": 1.955110663256154e-05, "loss": 0.422, "step": 4295 }, { "epoch": 0.0958464673332578, "grad_norm": 0.6700113415718079, "learning_rate": 1.9550068784267217e-05, "loss": 0.3764, "step": 4300 }, { "epoch": 0.09595791671387786, "grad_norm": 0.5926774740219116, "learning_rate": 1.9549029765208177e-05, "loss": 0.5163, "step": 4305 }, { "epoch": 0.09606936609449793, "grad_norm": 0.7712245583534241, "learning_rate": 1.9547989575511796e-05, "loss": 0.3977, "step": 4310 }, { "epoch": 0.096180815475118, "grad_norm": 0.4940590560436249, "learning_rate": 1.954694821530559e-05, "loss": 0.466, "step": 4315 }, { "epoch": 0.09629226485573807, "grad_norm": 0.36600184440612793, "learning_rate": 1.9545905684717222e-05, "loss": 0.3899, "step": 4320 }, { "epoch": 0.09640371423635813, "grad_norm": 0.7541760802268982, "learning_rate": 1.9544861983874504e-05, "loss": 0.3956, "step": 4325 }, { "epoch": 0.09651516361697819, "grad_norm": 0.4955383539199829, "learning_rate": 1.9543817112905383e-05, "loss": 0.4192, "step": 4330 }, { "epoch": 0.09662661299759827, "grad_norm": 0.5017595291137695, "learning_rate": 1.954277107193795e-05, "loss": 0.3995, "step": 4335 }, { "epoch": 0.09673806237821833, "grad_norm": 0.5852905511856079, "learning_rate": 1.954172386110044e-05, "loss": 0.4308, "step": 4340 }, { "epoch": 0.0968495117588384, "grad_norm": 0.5507896542549133, "learning_rate": 1.9540675480521234e-05, "loss": 0.4659, "step": 4345 }, { "epoch": 0.09696096113945847, "grad_norm": 0.5486690402030945, "learning_rate": 1.953962593032886e-05, "loss": 0.4153, "step": 4350 }, { "epoch": 0.09707241052007853, "grad_norm": 0.5381020307540894, "learning_rate": 1.953857521065198e-05, "loss": 0.3121, "step": 4355 }, { "epoch": 0.0971838599006986, "grad_norm": 0.5972719192504883, "learning_rate": 1.9537523321619406e-05, "loss": 0.4231, "step": 4360 }, { "epoch": 0.09729530928131867, "grad_norm": 0.6945480108261108, "learning_rate": 1.9536470263360093e-05, "loss": 0.4381, "step": 4365 }, { "epoch": 0.09740675866193874, "grad_norm": 0.4568721354007721, "learning_rate": 1.9535416036003132e-05, "loss": 0.3924, "step": 4370 }, { "epoch": 0.0975182080425588, "grad_norm": 0.4959554374217987, "learning_rate": 1.953436063967777e-05, "loss": 0.3647, "step": 4375 }, { "epoch": 0.09762965742317888, "grad_norm": 0.4809156060218811, "learning_rate": 1.953330407451339e-05, "loss": 0.4009, "step": 4380 }, { "epoch": 0.09774110680379894, "grad_norm": 0.5318039655685425, "learning_rate": 1.953224634063951e-05, "loss": 0.4031, "step": 4385 }, { "epoch": 0.097852556184419, "grad_norm": 0.6752211451530457, "learning_rate": 1.9531187438185812e-05, "loss": 0.342, "step": 4390 }, { "epoch": 0.09796400556503908, "grad_norm": 0.4800848364830017, "learning_rate": 1.95301273672821e-05, "loss": 0.3931, "step": 4395 }, { "epoch": 0.09807545494565914, "grad_norm": 0.5006334185600281, "learning_rate": 1.9529066128058333e-05, "loss": 0.3935, "step": 4400 }, { "epoch": 0.0981869043262792, "grad_norm": 0.7476497292518616, "learning_rate": 1.9528003720644615e-05, "loss": 0.4759, "step": 4405 }, { "epoch": 0.09829835370689928, "grad_norm": 0.5515742897987366, "learning_rate": 1.9526940145171185e-05, "loss": 0.3507, "step": 4410 }, { "epoch": 0.09840980308751934, "grad_norm": 0.44880327582359314, "learning_rate": 1.952587540176843e-05, "loss": 0.4261, "step": 4415 }, { "epoch": 0.0985212524681394, "grad_norm": 0.4958060681819916, "learning_rate": 1.9524809490566878e-05, "loss": 0.3289, "step": 4420 }, { "epoch": 0.09863270184875947, "grad_norm": 0.4644846022129059, "learning_rate": 1.9523742411697205e-05, "loss": 0.5159, "step": 4425 }, { "epoch": 0.09874415122937955, "grad_norm": 0.5666079521179199, "learning_rate": 1.952267416529022e-05, "loss": 0.3887, "step": 4430 }, { "epoch": 0.09885560060999961, "grad_norm": 0.8068232536315918, "learning_rate": 1.952160475147689e-05, "loss": 0.4436, "step": 4435 }, { "epoch": 0.09896704999061967, "grad_norm": 0.6327325701713562, "learning_rate": 1.9520534170388314e-05, "loss": 0.386, "step": 4440 }, { "epoch": 0.09907849937123975, "grad_norm": 0.583257794380188, "learning_rate": 1.9519462422155733e-05, "loss": 0.4371, "step": 4445 }, { "epoch": 0.09918994875185981, "grad_norm": 0.42879775166511536, "learning_rate": 1.951838950691054e-05, "loss": 0.38, "step": 4450 }, { "epoch": 0.09930139813247987, "grad_norm": 0.5124005079269409, "learning_rate": 1.9517315424784263e-05, "loss": 0.4692, "step": 4455 }, { "epoch": 0.09941284751309995, "grad_norm": 0.4689030945301056, "learning_rate": 1.9516240175908578e-05, "loss": 0.4573, "step": 4460 }, { "epoch": 0.09952429689372001, "grad_norm": 0.5528591871261597, "learning_rate": 1.95151637604153e-05, "loss": 0.4225, "step": 4465 }, { "epoch": 0.09963574627434008, "grad_norm": 0.5473395586013794, "learning_rate": 1.9514086178436393e-05, "loss": 0.4231, "step": 4470 }, { "epoch": 0.09974719565496015, "grad_norm": 0.5362890958786011, "learning_rate": 1.9513007430103954e-05, "loss": 0.5017, "step": 4475 }, { "epoch": 0.09985864503558022, "grad_norm": 0.5761915445327759, "learning_rate": 1.9511927515550235e-05, "loss": 0.3971, "step": 4480 }, { "epoch": 0.09997009441620028, "grad_norm": 0.7011691331863403, "learning_rate": 1.9510846434907626e-05, "loss": 0.4316, "step": 4485 }, { "epoch": 0.10008154379682035, "grad_norm": 0.40270212292671204, "learning_rate": 1.9509764188308654e-05, "loss": 0.3871, "step": 4490 }, { "epoch": 0.10019299317744042, "grad_norm": 0.7045685052871704, "learning_rate": 1.9508680775886e-05, "loss": 0.3752, "step": 4495 }, { "epoch": 0.10030444255806048, "grad_norm": 0.4979664087295532, "learning_rate": 1.9507596197772474e-05, "loss": 0.4033, "step": 4500 }, { "epoch": 0.10041589193868056, "grad_norm": 0.6763628125190735, "learning_rate": 1.9506510454101045e-05, "loss": 0.3748, "step": 4505 }, { "epoch": 0.10052734131930062, "grad_norm": 0.7423232793807983, "learning_rate": 1.950542354500481e-05, "loss": 0.408, "step": 4510 }, { "epoch": 0.10063879069992068, "grad_norm": 0.48339027166366577, "learning_rate": 1.9504335470617023e-05, "loss": 0.4648, "step": 4515 }, { "epoch": 0.10075024008054075, "grad_norm": 0.6129444241523743, "learning_rate": 1.9503246231071068e-05, "loss": 0.4098, "step": 4520 }, { "epoch": 0.10086168946116082, "grad_norm": 0.5484494566917419, "learning_rate": 1.9502155826500477e-05, "loss": 0.3202, "step": 4525 }, { "epoch": 0.10097313884178089, "grad_norm": 0.40450939536094666, "learning_rate": 1.9501064257038928e-05, "loss": 0.3256, "step": 4530 }, { "epoch": 0.10108458822240095, "grad_norm": 0.5657923221588135, "learning_rate": 1.9499971522820238e-05, "loss": 0.4207, "step": 4535 }, { "epoch": 0.10119603760302102, "grad_norm": 0.4858623445034027, "learning_rate": 1.949887762397837e-05, "loss": 0.4609, "step": 4540 }, { "epoch": 0.10130748698364109, "grad_norm": 0.5333701372146606, "learning_rate": 1.9497782560647424e-05, "loss": 0.3616, "step": 4545 }, { "epoch": 0.10141893636426115, "grad_norm": 0.5602688789367676, "learning_rate": 1.9496686332961646e-05, "loss": 0.3678, "step": 4550 }, { "epoch": 0.10153038574488123, "grad_norm": 0.3975955545902252, "learning_rate": 1.9495588941055428e-05, "loss": 0.3447, "step": 4555 }, { "epoch": 0.10164183512550129, "grad_norm": 0.48473915457725525, "learning_rate": 1.9494490385063303e-05, "loss": 0.2937, "step": 4560 }, { "epoch": 0.10175328450612135, "grad_norm": 0.5309877991676331, "learning_rate": 1.949339066511994e-05, "loss": 0.4019, "step": 4565 }, { "epoch": 0.10186473388674143, "grad_norm": 0.5357742309570312, "learning_rate": 1.9492289781360158e-05, "loss": 0.3802, "step": 4570 }, { "epoch": 0.10197618326736149, "grad_norm": 0.6399753093719482, "learning_rate": 1.949118773391892e-05, "loss": 0.3296, "step": 4575 }, { "epoch": 0.10208763264798155, "grad_norm": 0.48185470700263977, "learning_rate": 1.9490084522931326e-05, "loss": 0.3292, "step": 4580 }, { "epoch": 0.10219908202860163, "grad_norm": 0.4180678725242615, "learning_rate": 1.9488980148532622e-05, "loss": 0.3201, "step": 4585 }, { "epoch": 0.1023105314092217, "grad_norm": 0.5125147104263306, "learning_rate": 1.94878746108582e-05, "loss": 0.3645, "step": 4590 }, { "epoch": 0.10242198078984176, "grad_norm": 0.999405562877655, "learning_rate": 1.9486767910043577e-05, "loss": 0.4721, "step": 4595 }, { "epoch": 0.10253343017046183, "grad_norm": 0.5479783415794373, "learning_rate": 1.948566004622444e-05, "loss": 0.4055, "step": 4600 }, { "epoch": 0.1026448795510819, "grad_norm": 0.5214530825614929, "learning_rate": 1.94845510195366e-05, "loss": 0.4083, "step": 4605 }, { "epoch": 0.10275632893170196, "grad_norm": 0.5216266512870789, "learning_rate": 1.9483440830116015e-05, "loss": 0.3945, "step": 4610 }, { "epoch": 0.10286777831232202, "grad_norm": 0.6572335362434387, "learning_rate": 1.948232947809878e-05, "loss": 0.4189, "step": 4615 }, { "epoch": 0.1029792276929421, "grad_norm": 0.4009169042110443, "learning_rate": 1.9481216963621147e-05, "loss": 0.323, "step": 4620 }, { "epoch": 0.10309067707356216, "grad_norm": 0.5584750175476074, "learning_rate": 1.94801032868195e-05, "loss": 0.3341, "step": 4625 }, { "epoch": 0.10320212645418222, "grad_norm": 0.7415927648544312, "learning_rate": 1.947898844783036e-05, "loss": 0.3254, "step": 4630 }, { "epoch": 0.1033135758348023, "grad_norm": 0.521470308303833, "learning_rate": 1.9477872446790407e-05, "loss": 0.3841, "step": 4635 }, { "epoch": 0.10342502521542236, "grad_norm": 0.49424269795417786, "learning_rate": 1.9476755283836448e-05, "loss": 0.3904, "step": 4640 }, { "epoch": 0.10353647459604243, "grad_norm": 0.44216257333755493, "learning_rate": 1.947563695910544e-05, "loss": 0.4693, "step": 4645 }, { "epoch": 0.1036479239766625, "grad_norm": 0.36706945300102234, "learning_rate": 1.9474517472734483e-05, "loss": 0.3081, "step": 4650 }, { "epoch": 0.10375937335728257, "grad_norm": 0.7147274017333984, "learning_rate": 1.9473396824860818e-05, "loss": 0.4634, "step": 4655 }, { "epoch": 0.10387082273790263, "grad_norm": 0.4979982078075409, "learning_rate": 1.9472275015621823e-05, "loss": 0.398, "step": 4660 }, { "epoch": 0.1039822721185227, "grad_norm": 0.5600672364234924, "learning_rate": 1.9471152045155028e-05, "loss": 0.4202, "step": 4665 }, { "epoch": 0.10409372149914277, "grad_norm": 0.6161574721336365, "learning_rate": 1.9470027913598094e-05, "loss": 0.3974, "step": 4670 }, { "epoch": 0.10420517087976283, "grad_norm": 1.0517438650131226, "learning_rate": 1.9468902621088838e-05, "loss": 0.4727, "step": 4675 }, { "epoch": 0.10431662026038291, "grad_norm": 0.7160767912864685, "learning_rate": 1.946777616776521e-05, "loss": 0.3719, "step": 4680 }, { "epoch": 0.10442806964100297, "grad_norm": 0.561278223991394, "learning_rate": 1.94666485537653e-05, "loss": 0.4472, "step": 4685 }, { "epoch": 0.10453951902162303, "grad_norm": 0.5287666320800781, "learning_rate": 1.9465519779227354e-05, "loss": 0.5288, "step": 4690 }, { "epoch": 0.10465096840224311, "grad_norm": 0.6254017949104309, "learning_rate": 1.9464389844289742e-05, "loss": 0.4221, "step": 4695 }, { "epoch": 0.10476241778286317, "grad_norm": 0.4677393436431885, "learning_rate": 1.946325874909099e-05, "loss": 0.3627, "step": 4700 }, { "epoch": 0.10487386716348324, "grad_norm": 0.37834852933883667, "learning_rate": 1.946212649376976e-05, "loss": 0.4134, "step": 4705 }, { "epoch": 0.1049853165441033, "grad_norm": 0.4782722592353821, "learning_rate": 1.946099307846486e-05, "loss": 0.3583, "step": 4710 }, { "epoch": 0.10509676592472338, "grad_norm": 0.6644002795219421, "learning_rate": 1.9459858503315236e-05, "loss": 0.5085, "step": 4715 }, { "epoch": 0.10520821530534344, "grad_norm": 0.5693483948707581, "learning_rate": 1.9458722768459976e-05, "loss": 0.5201, "step": 4720 }, { "epoch": 0.1053196646859635, "grad_norm": 0.5851562023162842, "learning_rate": 1.9457585874038316e-05, "loss": 0.4057, "step": 4725 }, { "epoch": 0.10543111406658358, "grad_norm": 0.4632630944252014, "learning_rate": 1.9456447820189634e-05, "loss": 0.4006, "step": 4730 }, { "epoch": 0.10554256344720364, "grad_norm": 0.49858537316322327, "learning_rate": 1.9455308607053435e-05, "loss": 0.4134, "step": 4735 }, { "epoch": 0.1056540128278237, "grad_norm": 0.6325331330299377, "learning_rate": 1.9454168234769388e-05, "loss": 0.3621, "step": 4740 }, { "epoch": 0.10576546220844378, "grad_norm": 0.528812825679779, "learning_rate": 1.9453026703477288e-05, "loss": 0.285, "step": 4745 }, { "epoch": 0.10587691158906384, "grad_norm": 0.5538693070411682, "learning_rate": 1.9451884013317078e-05, "loss": 0.398, "step": 4750 }, { "epoch": 0.1059883609696839, "grad_norm": 0.7025234699249268, "learning_rate": 1.945074016442885e-05, "loss": 0.4313, "step": 4755 }, { "epoch": 0.10609981035030398, "grad_norm": 0.5137503147125244, "learning_rate": 1.9449595156952827e-05, "loss": 0.3942, "step": 4760 }, { "epoch": 0.10621125973092405, "grad_norm": 0.5194128155708313, "learning_rate": 1.9448448991029375e-05, "loss": 0.5149, "step": 4765 }, { "epoch": 0.10632270911154411, "grad_norm": 0.5486317276954651, "learning_rate": 1.944730166679901e-05, "loss": 0.3688, "step": 4770 }, { "epoch": 0.10643415849216419, "grad_norm": 0.4897007942199707, "learning_rate": 1.944615318440238e-05, "loss": 0.3045, "step": 4775 }, { "epoch": 0.10654560787278425, "grad_norm": 0.5833176970481873, "learning_rate": 1.9445003543980282e-05, "loss": 0.6282, "step": 4780 }, { "epoch": 0.10665705725340431, "grad_norm": 0.5699679851531982, "learning_rate": 1.944385274567366e-05, "loss": 0.4778, "step": 4785 }, { "epoch": 0.10676850663402439, "grad_norm": 0.405729740858078, "learning_rate": 1.9442700789623578e-05, "loss": 0.4771, "step": 4790 }, { "epoch": 0.10687995601464445, "grad_norm": 0.5367416739463806, "learning_rate": 1.944154767597127e-05, "loss": 0.4213, "step": 4795 }, { "epoch": 0.10699140539526451, "grad_norm": 0.9690253734588623, "learning_rate": 1.9440393404858095e-05, "loss": 0.4605, "step": 4800 }, { "epoch": 0.10710285477588458, "grad_norm": 0.31591367721557617, "learning_rate": 1.9439237976425554e-05, "loss": 0.3637, "step": 4805 }, { "epoch": 0.10721430415650465, "grad_norm": 0.6415589451789856, "learning_rate": 1.94380813908153e-05, "loss": 0.4701, "step": 4810 }, { "epoch": 0.10732575353712472, "grad_norm": 0.5109201669692993, "learning_rate": 1.943692364816912e-05, "loss": 0.5069, "step": 4815 }, { "epoch": 0.10743720291774478, "grad_norm": 0.6550776958465576, "learning_rate": 1.9435764748628938e-05, "loss": 0.3794, "step": 4820 }, { "epoch": 0.10754865229836486, "grad_norm": 0.5925948023796082, "learning_rate": 1.9434604692336833e-05, "loss": 0.4779, "step": 4825 }, { "epoch": 0.10766010167898492, "grad_norm": 0.6550899744033813, "learning_rate": 1.9433443479435017e-05, "loss": 0.4021, "step": 4830 }, { "epoch": 0.10777155105960498, "grad_norm": 0.5794579386711121, "learning_rate": 1.9432281110065845e-05, "loss": 0.5415, "step": 4835 }, { "epoch": 0.10788300044022506, "grad_norm": 0.5639534592628479, "learning_rate": 1.9431117584371816e-05, "loss": 0.39, "step": 4840 }, { "epoch": 0.10799444982084512, "grad_norm": 0.43835127353668213, "learning_rate": 1.9429952902495564e-05, "loss": 0.3971, "step": 4845 }, { "epoch": 0.10810589920146518, "grad_norm": 0.672596275806427, "learning_rate": 1.942878706457988e-05, "loss": 0.4264, "step": 4850 }, { "epoch": 0.10821734858208526, "grad_norm": 0.6578832268714905, "learning_rate": 1.942762007076768e-05, "loss": 0.449, "step": 4855 }, { "epoch": 0.10832879796270532, "grad_norm": 0.463405042886734, "learning_rate": 1.9426451921202026e-05, "loss": 0.4623, "step": 4860 }, { "epoch": 0.10844024734332539, "grad_norm": 0.48977985978126526, "learning_rate": 1.942528261602613e-05, "loss": 0.406, "step": 4865 }, { "epoch": 0.10855169672394546, "grad_norm": 0.47064387798309326, "learning_rate": 1.9424112155383334e-05, "loss": 0.3864, "step": 4870 }, { "epoch": 0.10866314610456553, "grad_norm": 0.4514538049697876, "learning_rate": 1.9422940539417133e-05, "loss": 0.4726, "step": 4875 }, { "epoch": 0.10877459548518559, "grad_norm": 0.3914186656475067, "learning_rate": 1.9421767768271156e-05, "loss": 0.4734, "step": 4880 }, { "epoch": 0.10888604486580566, "grad_norm": 0.6711552739143372, "learning_rate": 1.9420593842089178e-05, "loss": 0.4208, "step": 4885 }, { "epoch": 0.10899749424642573, "grad_norm": 0.598562479019165, "learning_rate": 1.9419418761015107e-05, "loss": 0.632, "step": 4890 }, { "epoch": 0.10910894362704579, "grad_norm": 0.5351366996765137, "learning_rate": 1.9418242525193002e-05, "loss": 0.3546, "step": 4895 }, { "epoch": 0.10922039300766585, "grad_norm": 0.546393871307373, "learning_rate": 1.9417065134767067e-05, "loss": 0.3914, "step": 4900 }, { "epoch": 0.10933184238828593, "grad_norm": 0.5991262197494507, "learning_rate": 1.941588658988163e-05, "loss": 0.4898, "step": 4905 }, { "epoch": 0.10944329176890599, "grad_norm": 0.6237379908561707, "learning_rate": 1.9414706890681177e-05, "loss": 0.3676, "step": 4910 }, { "epoch": 0.10955474114952606, "grad_norm": 0.5884041786193848, "learning_rate": 1.9413526037310332e-05, "loss": 0.4046, "step": 4915 }, { "epoch": 0.10966619053014613, "grad_norm": 0.5601520538330078, "learning_rate": 1.9412344029913854e-05, "loss": 0.3836, "step": 4920 }, { "epoch": 0.1097776399107662, "grad_norm": 0.5114838480949402, "learning_rate": 1.941116086863665e-05, "loss": 0.4927, "step": 4925 }, { "epoch": 0.10988908929138626, "grad_norm": 0.5547128915786743, "learning_rate": 1.9409976553623767e-05, "loss": 0.4657, "step": 4930 }, { "epoch": 0.11000053867200633, "grad_norm": 0.6494245529174805, "learning_rate": 1.940879108502039e-05, "loss": 0.3642, "step": 4935 }, { "epoch": 0.1101119880526264, "grad_norm": 0.34890300035476685, "learning_rate": 1.9407604462971856e-05, "loss": 0.2909, "step": 4940 }, { "epoch": 0.11022343743324646, "grad_norm": 0.6003003716468811, "learning_rate": 1.9406416687623625e-05, "loss": 0.5144, "step": 4945 }, { "epoch": 0.11033488681386654, "grad_norm": 0.48884761333465576, "learning_rate": 1.9405227759121318e-05, "loss": 0.4649, "step": 4950 }, { "epoch": 0.1104463361944866, "grad_norm": 0.6424091458320618, "learning_rate": 1.9404037677610685e-05, "loss": 0.4965, "step": 4955 }, { "epoch": 0.11055778557510666, "grad_norm": 0.6459415555000305, "learning_rate": 1.940284644323762e-05, "loss": 0.4513, "step": 4960 }, { "epoch": 0.11066923495572674, "grad_norm": 0.4841134548187256, "learning_rate": 1.940165405614816e-05, "loss": 0.3238, "step": 4965 }, { "epoch": 0.1107806843363468, "grad_norm": 0.7165957093238831, "learning_rate": 1.940046051648848e-05, "loss": 0.5479, "step": 4970 }, { "epoch": 0.11089213371696686, "grad_norm": 0.5024096369743347, "learning_rate": 1.9399265824404903e-05, "loss": 0.4196, "step": 4975 }, { "epoch": 0.11100358309758694, "grad_norm": 0.5883904695510864, "learning_rate": 1.9398069980043885e-05, "loss": 0.391, "step": 4980 }, { "epoch": 0.111115032478207, "grad_norm": 0.6263939142227173, "learning_rate": 1.9396872983552033e-05, "loss": 0.4796, "step": 4985 }, { "epoch": 0.11122648185882707, "grad_norm": 0.5873487591743469, "learning_rate": 1.9395674835076085e-05, "loss": 0.2901, "step": 4990 }, { "epoch": 0.11133793123944713, "grad_norm": 0.5534051060676575, "learning_rate": 1.9394475534762925e-05, "loss": 0.4489, "step": 4995 }, { "epoch": 0.1114493806200672, "grad_norm": 0.5187072157859802, "learning_rate": 1.939327508275958e-05, "loss": 0.345, "step": 5000 }, { "epoch": 0.11156083000068727, "grad_norm": 0.60208660364151, "learning_rate": 1.9392073479213213e-05, "loss": 0.3369, "step": 5005 }, { "epoch": 0.11167227938130733, "grad_norm": 0.585619330406189, "learning_rate": 1.9390870724271133e-05, "loss": 0.4317, "step": 5010 }, { "epoch": 0.11178372876192741, "grad_norm": 0.5309601426124573, "learning_rate": 1.9389666818080787e-05, "loss": 0.4831, "step": 5015 }, { "epoch": 0.11189517814254747, "grad_norm": 1.0372563600540161, "learning_rate": 1.9388461760789773e-05, "loss": 0.4236, "step": 5020 }, { "epoch": 0.11200662752316753, "grad_norm": 0.49995213747024536, "learning_rate": 1.938725555254581e-05, "loss": 0.3901, "step": 5025 }, { "epoch": 0.11211807690378761, "grad_norm": 0.4487152695655823, "learning_rate": 1.9386048193496774e-05, "loss": 0.413, "step": 5030 }, { "epoch": 0.11222952628440767, "grad_norm": 0.3875434696674347, "learning_rate": 1.938483968379068e-05, "loss": 0.3672, "step": 5035 }, { "epoch": 0.11234097566502774, "grad_norm": 0.691291868686676, "learning_rate": 1.938363002357568e-05, "loss": 0.5052, "step": 5040 }, { "epoch": 0.11245242504564781, "grad_norm": 0.406793475151062, "learning_rate": 1.938241921300007e-05, "loss": 0.4285, "step": 5045 }, { "epoch": 0.11256387442626788, "grad_norm": 0.7369039058685303, "learning_rate": 1.9381207252212286e-05, "loss": 0.3757, "step": 5050 }, { "epoch": 0.11267532380688794, "grad_norm": 0.5105433464050293, "learning_rate": 1.9379994141360906e-05, "loss": 0.3474, "step": 5055 }, { "epoch": 0.11278677318750802, "grad_norm": 0.6076253652572632, "learning_rate": 1.9378779880594643e-05, "loss": 0.495, "step": 5060 }, { "epoch": 0.11289822256812808, "grad_norm": 0.4707455635070801, "learning_rate": 1.937756447006236e-05, "loss": 0.3853, "step": 5065 }, { "epoch": 0.11300967194874814, "grad_norm": 0.629932701587677, "learning_rate": 1.937634790991306e-05, "loss": 0.4166, "step": 5070 }, { "epoch": 0.11312112132936822, "grad_norm": 0.5261717438697815, "learning_rate": 1.937513020029588e-05, "loss": 0.3943, "step": 5075 }, { "epoch": 0.11323257070998828, "grad_norm": 0.8253975510597229, "learning_rate": 1.93739113413601e-05, "loss": 0.4549, "step": 5080 }, { "epoch": 0.11334402009060834, "grad_norm": 0.49206769466400146, "learning_rate": 1.937269133325515e-05, "loss": 0.337, "step": 5085 }, { "epoch": 0.1134554694712284, "grad_norm": 0.5231266021728516, "learning_rate": 1.937147017613058e-05, "loss": 0.3369, "step": 5090 }, { "epoch": 0.11356691885184848, "grad_norm": 0.5962830781936646, "learning_rate": 1.937024787013611e-05, "loss": 0.3433, "step": 5095 }, { "epoch": 0.11367836823246855, "grad_norm": 0.6776148080825806, "learning_rate": 1.9369024415421576e-05, "loss": 0.2904, "step": 5100 }, { "epoch": 0.11378981761308861, "grad_norm": 0.46378564834594727, "learning_rate": 1.9367799812136967e-05, "loss": 0.2613, "step": 5105 }, { "epoch": 0.11390126699370869, "grad_norm": 0.6109187602996826, "learning_rate": 1.936657406043241e-05, "loss": 0.3001, "step": 5110 }, { "epoch": 0.11401271637432875, "grad_norm": 0.4546675384044647, "learning_rate": 1.9365347160458172e-05, "loss": 0.4848, "step": 5115 }, { "epoch": 0.11412416575494881, "grad_norm": 0.632952868938446, "learning_rate": 1.9364119112364662e-05, "loss": 0.2835, "step": 5120 }, { "epoch": 0.11423561513556889, "grad_norm": 0.7153362035751343, "learning_rate": 1.9362889916302428e-05, "loss": 0.4732, "step": 5125 }, { "epoch": 0.11434706451618895, "grad_norm": 0.5372235774993896, "learning_rate": 1.9361659572422158e-05, "loss": 0.4101, "step": 5130 }, { "epoch": 0.11445851389680901, "grad_norm": 0.604742169380188, "learning_rate": 1.9360428080874687e-05, "loss": 0.4882, "step": 5135 }, { "epoch": 0.11456996327742909, "grad_norm": 0.5875693559646606, "learning_rate": 1.9359195441810988e-05, "loss": 0.4322, "step": 5140 }, { "epoch": 0.11468141265804915, "grad_norm": 0.6571182012557983, "learning_rate": 1.9357961655382165e-05, "loss": 0.4533, "step": 5145 }, { "epoch": 0.11479286203866922, "grad_norm": 0.4525604248046875, "learning_rate": 1.9356726721739476e-05, "loss": 0.3682, "step": 5150 }, { "epoch": 0.11490431141928929, "grad_norm": 0.542430579662323, "learning_rate": 1.9355490641034315e-05, "loss": 0.3799, "step": 5155 }, { "epoch": 0.11501576079990936, "grad_norm": 1.2807742357254028, "learning_rate": 1.9354253413418215e-05, "loss": 0.4531, "step": 5160 }, { "epoch": 0.11512721018052942, "grad_norm": 0.5787119269371033, "learning_rate": 1.9353015039042852e-05, "loss": 0.4159, "step": 5165 }, { "epoch": 0.1152386595611495, "grad_norm": 0.3594432771205902, "learning_rate": 1.9351775518060036e-05, "loss": 0.4104, "step": 5170 }, { "epoch": 0.11535010894176956, "grad_norm": 0.7893831729888916, "learning_rate": 1.9350534850621728e-05, "loss": 0.4584, "step": 5175 }, { "epoch": 0.11546155832238962, "grad_norm": 0.46665945649147034, "learning_rate": 1.9349293036880023e-05, "loss": 0.4721, "step": 5180 }, { "epoch": 0.11557300770300968, "grad_norm": 0.5082299113273621, "learning_rate": 1.9348050076987155e-05, "loss": 0.3949, "step": 5185 }, { "epoch": 0.11568445708362976, "grad_norm": 0.6792308688163757, "learning_rate": 1.9346805971095504e-05, "loss": 0.4241, "step": 5190 }, { "epoch": 0.11579590646424982, "grad_norm": 0.5918766856193542, "learning_rate": 1.934556071935759e-05, "loss": 0.3717, "step": 5195 }, { "epoch": 0.11590735584486989, "grad_norm": 0.36979660391807556, "learning_rate": 1.9344314321926065e-05, "loss": 0.3797, "step": 5200 }, { "epoch": 0.11601880522548996, "grad_norm": 0.5370672345161438, "learning_rate": 1.9343066778953733e-05, "loss": 0.4361, "step": 5205 }, { "epoch": 0.11613025460611003, "grad_norm": 0.546700656414032, "learning_rate": 1.9341818090593532e-05, "loss": 0.4005, "step": 5210 }, { "epoch": 0.11624170398673009, "grad_norm": 0.45658764243125916, "learning_rate": 1.934056825699854e-05, "loss": 0.3926, "step": 5215 }, { "epoch": 0.11635315336735017, "grad_norm": 0.4908159375190735, "learning_rate": 1.9339317278321975e-05, "loss": 0.4419, "step": 5220 }, { "epoch": 0.11646460274797023, "grad_norm": 0.5328987240791321, "learning_rate": 1.9338065154717203e-05, "loss": 0.3964, "step": 5225 }, { "epoch": 0.11657605212859029, "grad_norm": 0.5930885076522827, "learning_rate": 1.9336811886337723e-05, "loss": 0.409, "step": 5230 }, { "epoch": 0.11668750150921037, "grad_norm": 0.3622898459434509, "learning_rate": 1.9335557473337174e-05, "loss": 0.3064, "step": 5235 }, { "epoch": 0.11679895088983043, "grad_norm": 0.5278002023696899, "learning_rate": 1.9334301915869334e-05, "loss": 0.4698, "step": 5240 }, { "epoch": 0.11691040027045049, "grad_norm": 0.46072134375572205, "learning_rate": 1.9333045214088132e-05, "loss": 0.4194, "step": 5245 }, { "epoch": 0.11702184965107057, "grad_norm": 0.8218250870704651, "learning_rate": 1.933178736814763e-05, "loss": 0.5006, "step": 5250 }, { "epoch": 0.11713329903169063, "grad_norm": 0.5179040431976318, "learning_rate": 1.9330528378202024e-05, "loss": 0.5059, "step": 5255 }, { "epoch": 0.1172447484123107, "grad_norm": 0.5304727554321289, "learning_rate": 1.932926824440566e-05, "loss": 0.5422, "step": 5260 }, { "epoch": 0.11735619779293077, "grad_norm": 0.5799547433853149, "learning_rate": 1.932800696691302e-05, "loss": 0.4012, "step": 5265 }, { "epoch": 0.11746764717355083, "grad_norm": 0.6032615900039673, "learning_rate": 1.9326744545878727e-05, "loss": 0.4134, "step": 5270 }, { "epoch": 0.1175790965541709, "grad_norm": 0.6024173498153687, "learning_rate": 1.9325480981457542e-05, "loss": 0.4608, "step": 5275 }, { "epoch": 0.11769054593479096, "grad_norm": 0.6025891900062561, "learning_rate": 1.9324216273804373e-05, "loss": 0.4729, "step": 5280 }, { "epoch": 0.11780199531541104, "grad_norm": 0.6101847887039185, "learning_rate": 1.932295042307426e-05, "loss": 0.4191, "step": 5285 }, { "epoch": 0.1179134446960311, "grad_norm": 0.5292050838470459, "learning_rate": 1.9321683429422386e-05, "loss": 0.3268, "step": 5290 }, { "epoch": 0.11802489407665116, "grad_norm": 0.6705468893051147, "learning_rate": 1.932041529300408e-05, "loss": 0.3897, "step": 5295 }, { "epoch": 0.11813634345727124, "grad_norm": 1.0549614429473877, "learning_rate": 1.9319146013974795e-05, "loss": 0.3921, "step": 5300 }, { "epoch": 0.1182477928378913, "grad_norm": 0.4721240997314453, "learning_rate": 1.9317875592490146e-05, "loss": 0.3079, "step": 5305 }, { "epoch": 0.11835924221851137, "grad_norm": 0.656427800655365, "learning_rate": 1.9316604028705874e-05, "loss": 0.3989, "step": 5310 }, { "epoch": 0.11847069159913144, "grad_norm": 0.6683394908905029, "learning_rate": 1.931533132277786e-05, "loss": 0.432, "step": 5315 }, { "epoch": 0.1185821409797515, "grad_norm": 0.6674720048904419, "learning_rate": 1.9314057474862125e-05, "loss": 0.3063, "step": 5320 }, { "epoch": 0.11869359036037157, "grad_norm": 0.46994155645370483, "learning_rate": 1.931278248511484e-05, "loss": 0.3768, "step": 5325 }, { "epoch": 0.11880503974099164, "grad_norm": 0.603171169757843, "learning_rate": 1.9311506353692305e-05, "loss": 0.3091, "step": 5330 }, { "epoch": 0.11891648912161171, "grad_norm": 0.520969808101654, "learning_rate": 1.9310229080750967e-05, "loss": 0.3635, "step": 5335 }, { "epoch": 0.11902793850223177, "grad_norm": 0.5163702368736267, "learning_rate": 1.9308950666447404e-05, "loss": 0.5842, "step": 5340 }, { "epoch": 0.11913938788285185, "grad_norm": 0.4493872821331024, "learning_rate": 1.930767111093835e-05, "loss": 0.3607, "step": 5345 }, { "epoch": 0.11925083726347191, "grad_norm": 0.48946818709373474, "learning_rate": 1.9306390414380655e-05, "loss": 0.4147, "step": 5350 }, { "epoch": 0.11936228664409197, "grad_norm": 0.48165857791900635, "learning_rate": 1.9305108576931336e-05, "loss": 0.4171, "step": 5355 }, { "epoch": 0.11947373602471205, "grad_norm": 0.5254682302474976, "learning_rate": 1.9303825598747524e-05, "loss": 0.4702, "step": 5360 }, { "epoch": 0.11958518540533211, "grad_norm": 0.5365452170372009, "learning_rate": 1.930254147998651e-05, "loss": 0.3702, "step": 5365 }, { "epoch": 0.11969663478595217, "grad_norm": 0.36603885889053345, "learning_rate": 1.9301256220805715e-05, "loss": 0.4523, "step": 5370 }, { "epoch": 0.11980808416657224, "grad_norm": 0.45412683486938477, "learning_rate": 1.9299969821362702e-05, "loss": 0.4379, "step": 5375 }, { "epoch": 0.11991953354719231, "grad_norm": 0.4885343611240387, "learning_rate": 1.9298682281815176e-05, "loss": 0.3207, "step": 5380 }, { "epoch": 0.12003098292781238, "grad_norm": 0.6273423433303833, "learning_rate": 1.929739360232097e-05, "loss": 0.3908, "step": 5385 }, { "epoch": 0.12014243230843244, "grad_norm": 0.6662055253982544, "learning_rate": 1.929610378303808e-05, "loss": 0.4066, "step": 5390 }, { "epoch": 0.12025388168905252, "grad_norm": 0.5559515357017517, "learning_rate": 1.9294812824124623e-05, "loss": 0.3538, "step": 5395 }, { "epoch": 0.12036533106967258, "grad_norm": 0.4622573256492615, "learning_rate": 1.9293520725738855e-05, "loss": 0.4457, "step": 5400 }, { "epoch": 0.12047678045029264, "grad_norm": 0.6049358248710632, "learning_rate": 1.929222748803918e-05, "loss": 0.3599, "step": 5405 }, { "epoch": 0.12058822983091272, "grad_norm": 0.5943440794944763, "learning_rate": 1.9290933111184142e-05, "loss": 0.3622, "step": 5410 }, { "epoch": 0.12069967921153278, "grad_norm": 0.48504123091697693, "learning_rate": 1.9289637595332418e-05, "loss": 0.4892, "step": 5415 }, { "epoch": 0.12081112859215284, "grad_norm": 0.48124271631240845, "learning_rate": 1.928834094064283e-05, "loss": 0.3757, "step": 5420 }, { "epoch": 0.12092257797277292, "grad_norm": 0.545174241065979, "learning_rate": 1.9287043147274338e-05, "loss": 0.3642, "step": 5425 }, { "epoch": 0.12103402735339298, "grad_norm": 0.5016522407531738, "learning_rate": 1.9285744215386043e-05, "loss": 0.3268, "step": 5430 }, { "epoch": 0.12114547673401305, "grad_norm": 0.5860884189605713, "learning_rate": 1.928444414513718e-05, "loss": 0.3471, "step": 5435 }, { "epoch": 0.12125692611463312, "grad_norm": 0.5948898792266846, "learning_rate": 1.928314293668713e-05, "loss": 0.283, "step": 5440 }, { "epoch": 0.12136837549525319, "grad_norm": 0.5052125453948975, "learning_rate": 1.9281840590195412e-05, "loss": 0.4477, "step": 5445 }, { "epoch": 0.12147982487587325, "grad_norm": 0.6505773067474365, "learning_rate": 1.928053710582168e-05, "loss": 0.3336, "step": 5450 }, { "epoch": 0.12159127425649333, "grad_norm": 0.6417970657348633, "learning_rate": 1.9279232483725735e-05, "loss": 0.3932, "step": 5455 }, { "epoch": 0.12170272363711339, "grad_norm": 0.44236230850219727, "learning_rate": 1.9277926724067512e-05, "loss": 0.4703, "step": 5460 }, { "epoch": 0.12181417301773345, "grad_norm": 0.49172699451446533, "learning_rate": 1.927661982700709e-05, "loss": 0.3714, "step": 5465 }, { "epoch": 0.12192562239835351, "grad_norm": 0.48851385712623596, "learning_rate": 1.9275311792704676e-05, "loss": 0.3447, "step": 5470 }, { "epoch": 0.12203707177897359, "grad_norm": 0.6325857639312744, "learning_rate": 1.9274002621320633e-05, "loss": 0.3943, "step": 5475 }, { "epoch": 0.12214852115959365, "grad_norm": 0.49979910254478455, "learning_rate": 1.9272692313015456e-05, "loss": 0.3352, "step": 5480 }, { "epoch": 0.12225997054021372, "grad_norm": 0.505219042301178, "learning_rate": 1.9271380867949774e-05, "loss": 0.3244, "step": 5485 }, { "epoch": 0.1223714199208338, "grad_norm": 0.5624701380729675, "learning_rate": 1.927006828628436e-05, "loss": 0.4565, "step": 5490 }, { "epoch": 0.12248286930145386, "grad_norm": 0.6194440722465515, "learning_rate": 1.9268754568180128e-05, "loss": 0.3445, "step": 5495 }, { "epoch": 0.12259431868207392, "grad_norm": 0.5660005807876587, "learning_rate": 1.9267439713798132e-05, "loss": 0.3507, "step": 5500 }, { "epoch": 0.122705768062694, "grad_norm": 0.43132275342941284, "learning_rate": 1.9266123723299558e-05, "loss": 0.4045, "step": 5505 }, { "epoch": 0.12281721744331406, "grad_norm": 0.7432733774185181, "learning_rate": 1.9264806596845742e-05, "loss": 0.4289, "step": 5510 }, { "epoch": 0.12292866682393412, "grad_norm": 0.6572379469871521, "learning_rate": 1.926348833459815e-05, "loss": 0.3649, "step": 5515 }, { "epoch": 0.1230401162045542, "grad_norm": 0.39170458912849426, "learning_rate": 1.926216893671839e-05, "loss": 0.4512, "step": 5520 }, { "epoch": 0.12315156558517426, "grad_norm": 0.6171269416809082, "learning_rate": 1.926084840336821e-05, "loss": 0.4068, "step": 5525 }, { "epoch": 0.12326301496579432, "grad_norm": 0.6351962089538574, "learning_rate": 1.9259526734709503e-05, "loss": 0.471, "step": 5530 }, { "epoch": 0.1233744643464144, "grad_norm": 0.4887767434120178, "learning_rate": 1.9258203930904286e-05, "loss": 0.4869, "step": 5535 }, { "epoch": 0.12348591372703446, "grad_norm": 0.7329266667366028, "learning_rate": 1.9256879992114734e-05, "loss": 0.47, "step": 5540 }, { "epoch": 0.12359736310765453, "grad_norm": 0.48019132018089294, "learning_rate": 1.9255554918503144e-05, "loss": 0.3257, "step": 5545 }, { "epoch": 0.1237088124882746, "grad_norm": 0.5862034559249878, "learning_rate": 1.9254228710231962e-05, "loss": 0.3933, "step": 5550 }, { "epoch": 0.12382026186889467, "grad_norm": 0.5435554385185242, "learning_rate": 1.9252901367463773e-05, "loss": 0.4413, "step": 5555 }, { "epoch": 0.12393171124951473, "grad_norm": 0.5873213410377502, "learning_rate": 1.92515728903613e-05, "loss": 0.4661, "step": 5560 }, { "epoch": 0.12404316063013479, "grad_norm": 0.5576989650726318, "learning_rate": 1.92502432790874e-05, "loss": 0.4491, "step": 5565 }, { "epoch": 0.12415461001075487, "grad_norm": 0.4655047059059143, "learning_rate": 1.9248912533805077e-05, "loss": 0.455, "step": 5570 }, { "epoch": 0.12426605939137493, "grad_norm": 0.44688910245895386, "learning_rate": 1.924758065467746e-05, "loss": 0.3578, "step": 5575 }, { "epoch": 0.124377508771995, "grad_norm": 0.612274706363678, "learning_rate": 1.9246247641867843e-05, "loss": 0.5784, "step": 5580 }, { "epoch": 0.12448895815261507, "grad_norm": 0.46077173948287964, "learning_rate": 1.9244913495539636e-05, "loss": 0.4116, "step": 5585 }, { "epoch": 0.12460040753323513, "grad_norm": 0.5178104639053345, "learning_rate": 1.924357821585639e-05, "loss": 0.402, "step": 5590 }, { "epoch": 0.1247118569138552, "grad_norm": 0.6142487525939941, "learning_rate": 1.924224180298181e-05, "loss": 0.4987, "step": 5595 }, { "epoch": 0.12482330629447527, "grad_norm": 0.5576684474945068, "learning_rate": 1.9240904257079716e-05, "loss": 0.4058, "step": 5600 }, { "epoch": 0.12493475567509534, "grad_norm": 0.6418728828430176, "learning_rate": 1.923956557831409e-05, "loss": 0.3919, "step": 5605 }, { "epoch": 0.1250462050557154, "grad_norm": 0.6118012070655823, "learning_rate": 1.9238225766849048e-05, "loss": 0.4424, "step": 5610 }, { "epoch": 0.12515765443633547, "grad_norm": 0.6719847917556763, "learning_rate": 1.923688482284883e-05, "loss": 0.3275, "step": 5615 }, { "epoch": 0.12526910381695552, "grad_norm": 0.6712337136268616, "learning_rate": 1.9235542746477835e-05, "loss": 0.3622, "step": 5620 }, { "epoch": 0.1253805531975756, "grad_norm": 0.5340763926506042, "learning_rate": 1.9234199537900586e-05, "loss": 0.3324, "step": 5625 }, { "epoch": 0.12549200257819568, "grad_norm": 0.5426451563835144, "learning_rate": 1.923285519728175e-05, "loss": 0.2466, "step": 5630 }, { "epoch": 0.12560345195881573, "grad_norm": 0.5238773822784424, "learning_rate": 1.9231509724786132e-05, "loss": 0.4723, "step": 5635 }, { "epoch": 0.1257149013394358, "grad_norm": 0.5981005430221558, "learning_rate": 1.9230163120578677e-05, "loss": 0.341, "step": 5640 }, { "epoch": 0.12582635072005588, "grad_norm": 0.5886806845664978, "learning_rate": 1.9228815384824472e-05, "loss": 0.3968, "step": 5645 }, { "epoch": 0.12593780010067593, "grad_norm": 0.6732558012008667, "learning_rate": 1.9227466517688738e-05, "loss": 0.569, "step": 5650 }, { "epoch": 0.126049249481296, "grad_norm": 0.6471225023269653, "learning_rate": 1.922611651933683e-05, "loss": 0.3136, "step": 5655 }, { "epoch": 0.12616069886191608, "grad_norm": 0.5462857484817505, "learning_rate": 1.9224765389934253e-05, "loss": 0.4516, "step": 5660 }, { "epoch": 0.12627214824253613, "grad_norm": 0.5605525970458984, "learning_rate": 1.9223413129646645e-05, "loss": 0.4971, "step": 5665 }, { "epoch": 0.1263835976231562, "grad_norm": 0.4602338373661041, "learning_rate": 1.922205973863978e-05, "loss": 0.3303, "step": 5670 }, { "epoch": 0.12649504700377628, "grad_norm": 0.5662568211555481, "learning_rate": 1.9220705217079573e-05, "loss": 0.4036, "step": 5675 }, { "epoch": 0.12660649638439633, "grad_norm": 0.6493194699287415, "learning_rate": 1.9219349565132083e-05, "loss": 0.3845, "step": 5680 }, { "epoch": 0.1267179457650164, "grad_norm": 0.5647873282432556, "learning_rate": 1.9217992782963493e-05, "loss": 0.3857, "step": 5685 }, { "epoch": 0.1268293951456365, "grad_norm": 0.5588977336883545, "learning_rate": 1.9216634870740144e-05, "loss": 0.3897, "step": 5690 }, { "epoch": 0.12694084452625654, "grad_norm": 0.537597119808197, "learning_rate": 1.92152758286285e-05, "loss": 0.4161, "step": 5695 }, { "epoch": 0.1270522939068766, "grad_norm": 0.7306565642356873, "learning_rate": 1.921391565679517e-05, "loss": 0.5014, "step": 5700 }, { "epoch": 0.1271637432874967, "grad_norm": 0.5383148789405823, "learning_rate": 1.9212554355406904e-05, "loss": 0.4513, "step": 5705 }, { "epoch": 0.12727519266811674, "grad_norm": 0.6623514890670776, "learning_rate": 1.921119192463058e-05, "loss": 0.4172, "step": 5710 }, { "epoch": 0.12738664204873681, "grad_norm": 0.5583232641220093, "learning_rate": 1.9209828364633227e-05, "loss": 0.4372, "step": 5715 }, { "epoch": 0.1274980914293569, "grad_norm": 0.5252357721328735, "learning_rate": 1.9208463675582006e-05, "loss": 0.3622, "step": 5720 }, { "epoch": 0.12760954080997694, "grad_norm": 0.6623414754867554, "learning_rate": 1.9207097857644216e-05, "loss": 0.4213, "step": 5725 }, { "epoch": 0.12772099019059702, "grad_norm": 0.5552505850791931, "learning_rate": 1.92057309109873e-05, "loss": 0.3906, "step": 5730 }, { "epoch": 0.1278324395712171, "grad_norm": 0.4317275881767273, "learning_rate": 1.920436283577883e-05, "loss": 0.3954, "step": 5735 }, { "epoch": 0.12794388895183714, "grad_norm": 0.6078845262527466, "learning_rate": 1.920299363218652e-05, "loss": 0.4249, "step": 5740 }, { "epoch": 0.12805533833245722, "grad_norm": 0.7265923619270325, "learning_rate": 1.9201623300378228e-05, "loss": 0.414, "step": 5745 }, { "epoch": 0.1281667877130773, "grad_norm": 0.6118583679199219, "learning_rate": 1.9200251840521946e-05, "loss": 0.3573, "step": 5750 }, { "epoch": 0.12827823709369734, "grad_norm": 0.5438648462295532, "learning_rate": 1.91988792527858e-05, "loss": 0.4139, "step": 5755 }, { "epoch": 0.12838968647431742, "grad_norm": 0.639155387878418, "learning_rate": 1.919750553733807e-05, "loss": 0.4046, "step": 5760 }, { "epoch": 0.1285011358549375, "grad_norm": 0.5353571772575378, "learning_rate": 1.919613069434715e-05, "loss": 0.5334, "step": 5765 }, { "epoch": 0.12861258523555755, "grad_norm": 0.6160265803337097, "learning_rate": 1.919475472398159e-05, "loss": 0.4462, "step": 5770 }, { "epoch": 0.12872403461617762, "grad_norm": 0.5473472476005554, "learning_rate": 1.9193377626410076e-05, "loss": 0.4203, "step": 5775 }, { "epoch": 0.1288354839967977, "grad_norm": 0.4638007581233978, "learning_rate": 1.919199940180142e-05, "loss": 0.4141, "step": 5780 }, { "epoch": 0.12894693337741775, "grad_norm": 0.6351835131645203, "learning_rate": 1.9190620050324596e-05, "loss": 0.4435, "step": 5785 }, { "epoch": 0.12905838275803783, "grad_norm": 0.642963171005249, "learning_rate": 1.918923957214869e-05, "loss": 0.3589, "step": 5790 }, { "epoch": 0.1291698321386579, "grad_norm": 0.5821643471717834, "learning_rate": 1.9187857967442945e-05, "loss": 0.2967, "step": 5795 }, { "epoch": 0.12928128151927795, "grad_norm": 0.4989367425441742, "learning_rate": 1.9186475236376733e-05, "loss": 0.3938, "step": 5800 }, { "epoch": 0.12939273089989803, "grad_norm": 0.49929431080818176, "learning_rate": 1.9185091379119566e-05, "loss": 0.3356, "step": 5805 }, { "epoch": 0.12950418028051808, "grad_norm": 0.4640662372112274, "learning_rate": 1.9183706395841092e-05, "loss": 0.2594, "step": 5810 }, { "epoch": 0.12961562966113815, "grad_norm": 0.5312515497207642, "learning_rate": 1.9182320286711104e-05, "loss": 0.3543, "step": 5815 }, { "epoch": 0.12972707904175823, "grad_norm": 0.5994097590446472, "learning_rate": 1.9180933051899523e-05, "loss": 0.2664, "step": 5820 }, { "epoch": 0.12983852842237828, "grad_norm": 0.6394380927085876, "learning_rate": 1.9179544691576416e-05, "loss": 0.4029, "step": 5825 }, { "epoch": 0.12994997780299836, "grad_norm": 0.5699619054794312, "learning_rate": 1.9178155205911984e-05, "loss": 0.2986, "step": 5830 }, { "epoch": 0.13006142718361843, "grad_norm": 0.6062706112861633, "learning_rate": 1.917676459507657e-05, "loss": 0.4173, "step": 5835 }, { "epoch": 0.13017287656423848, "grad_norm": 0.5544490218162537, "learning_rate": 1.917537285924065e-05, "loss": 0.4053, "step": 5840 }, { "epoch": 0.13028432594485856, "grad_norm": 0.5762254595756531, "learning_rate": 1.9173979998574838e-05, "loss": 0.4464, "step": 5845 }, { "epoch": 0.13039577532547864, "grad_norm": 0.4747742712497711, "learning_rate": 1.9172586013249894e-05, "loss": 0.4227, "step": 5850 }, { "epoch": 0.13050722470609868, "grad_norm": 0.6070486903190613, "learning_rate": 1.9171190903436705e-05, "loss": 0.3773, "step": 5855 }, { "epoch": 0.13061867408671876, "grad_norm": 0.4321900010108948, "learning_rate": 1.9169794669306303e-05, "loss": 0.4154, "step": 5860 }, { "epoch": 0.13073012346733884, "grad_norm": 0.49118393659591675, "learning_rate": 1.9168397311029854e-05, "loss": 0.2462, "step": 5865 }, { "epoch": 0.1308415728479589, "grad_norm": 0.8460988998413086, "learning_rate": 1.9166998828778665e-05, "loss": 0.4493, "step": 5870 }, { "epoch": 0.13095302222857896, "grad_norm": 0.6519992351531982, "learning_rate": 1.9165599222724175e-05, "loss": 0.4105, "step": 5875 }, { "epoch": 0.13106447160919904, "grad_norm": 0.6495894193649292, "learning_rate": 1.916419849303797e-05, "loss": 0.2441, "step": 5880 }, { "epoch": 0.1311759209898191, "grad_norm": 0.5206788182258606, "learning_rate": 1.9162796639891766e-05, "loss": 0.4636, "step": 5885 }, { "epoch": 0.13128737037043917, "grad_norm": 0.5899707078933716, "learning_rate": 1.9161393663457418e-05, "loss": 0.3734, "step": 5890 }, { "epoch": 0.13139881975105924, "grad_norm": 0.760115385055542, "learning_rate": 1.9159989563906922e-05, "loss": 0.3605, "step": 5895 }, { "epoch": 0.1315102691316793, "grad_norm": 0.5802580714225769, "learning_rate": 1.9158584341412414e-05, "loss": 0.3356, "step": 5900 }, { "epoch": 0.13162171851229937, "grad_norm": 0.6110792756080627, "learning_rate": 1.9157177996146156e-05, "loss": 0.459, "step": 5905 }, { "epoch": 0.13173316789291944, "grad_norm": 0.5127723217010498, "learning_rate": 1.9155770528280556e-05, "loss": 0.2911, "step": 5910 }, { "epoch": 0.1318446172735395, "grad_norm": 0.6098167300224304, "learning_rate": 1.9154361937988162e-05, "loss": 0.5592, "step": 5915 }, { "epoch": 0.13195606665415957, "grad_norm": 1.627808928489685, "learning_rate": 1.9152952225441657e-05, "loss": 0.3981, "step": 5920 }, { "epoch": 0.13206751603477965, "grad_norm": 0.5559998750686646, "learning_rate": 1.9151541390813863e-05, "loss": 0.3277, "step": 5925 }, { "epoch": 0.1321789654153997, "grad_norm": 0.527961790561676, "learning_rate": 1.915012943427773e-05, "loss": 0.3684, "step": 5930 }, { "epoch": 0.13229041479601977, "grad_norm": 0.4821566641330719, "learning_rate": 1.9148716356006358e-05, "loss": 0.3656, "step": 5935 }, { "epoch": 0.13240186417663985, "grad_norm": 0.3770541846752167, "learning_rate": 1.914730215617297e-05, "loss": 0.3135, "step": 5940 }, { "epoch": 0.1325133135572599, "grad_norm": 0.46859049797058105, "learning_rate": 1.914588683495095e-05, "loss": 0.4604, "step": 5945 }, { "epoch": 0.13262476293787998, "grad_norm": 0.6820949912071228, "learning_rate": 1.91444703925138e-05, "loss": 0.4089, "step": 5950 }, { "epoch": 0.13273621231850005, "grad_norm": 0.6557464599609375, "learning_rate": 1.9143052829035166e-05, "loss": 0.374, "step": 5955 }, { "epoch": 0.1328476616991201, "grad_norm": 0.8806049823760986, "learning_rate": 1.914163414468883e-05, "loss": 0.4139, "step": 5960 }, { "epoch": 0.13295911107974018, "grad_norm": 0.5537307262420654, "learning_rate": 1.9140214339648704e-05, "loss": 0.3534, "step": 5965 }, { "epoch": 0.13307056046036025, "grad_norm": 0.6633182168006897, "learning_rate": 1.9138793414088856e-05, "loss": 0.4184, "step": 5970 }, { "epoch": 0.1331820098409803, "grad_norm": 0.5307210087776184, "learning_rate": 1.9137371368183472e-05, "loss": 0.372, "step": 5975 }, { "epoch": 0.13329345922160038, "grad_norm": 0.48524239659309387, "learning_rate": 1.913594820210689e-05, "loss": 0.3055, "step": 5980 }, { "epoch": 0.13340490860222046, "grad_norm": 0.6917334198951721, "learning_rate": 1.9134523916033577e-05, "loss": 0.3744, "step": 5985 }, { "epoch": 0.1335163579828405, "grad_norm": 0.4943813383579254, "learning_rate": 1.913309851013814e-05, "loss": 0.4207, "step": 5990 }, { "epoch": 0.13362780736346058, "grad_norm": 0.586175799369812, "learning_rate": 1.9131671984595325e-05, "loss": 0.4356, "step": 5995 }, { "epoch": 0.13373925674408063, "grad_norm": 0.7961429953575134, "learning_rate": 1.9130244339580007e-05, "loss": 0.2789, "step": 6000 }, { "epoch": 0.1338507061247007, "grad_norm": 0.5163123607635498, "learning_rate": 1.912881557526721e-05, "loss": 0.3108, "step": 6005 }, { "epoch": 0.13396215550532078, "grad_norm": 0.5538989901542664, "learning_rate": 1.9127385691832084e-05, "loss": 0.5069, "step": 6010 }, { "epoch": 0.13407360488594083, "grad_norm": 0.4295978546142578, "learning_rate": 1.912595468944993e-05, "loss": 0.4329, "step": 6015 }, { "epoch": 0.1341850542665609, "grad_norm": 0.5083966255187988, "learning_rate": 1.9124522568296166e-05, "loss": 0.3951, "step": 6020 }, { "epoch": 0.134296503647181, "grad_norm": 0.5359913110733032, "learning_rate": 1.912308932854637e-05, "loss": 0.3751, "step": 6025 }, { "epoch": 0.13440795302780104, "grad_norm": 0.5720531344413757, "learning_rate": 1.912165497037624e-05, "loss": 0.278, "step": 6030 }, { "epoch": 0.1345194024084211, "grad_norm": 0.5642151832580566, "learning_rate": 1.912021949396162e-05, "loss": 0.4278, "step": 6035 }, { "epoch": 0.1346308517890412, "grad_norm": 0.7164551615715027, "learning_rate": 1.9118782899478488e-05, "loss": 0.4543, "step": 6040 }, { "epoch": 0.13474230116966124, "grad_norm": 0.7330194711685181, "learning_rate": 1.9117345187102958e-05, "loss": 0.321, "step": 6045 }, { "epoch": 0.13485375055028131, "grad_norm": 0.7292217016220093, "learning_rate": 1.9115906357011283e-05, "loss": 0.3806, "step": 6050 }, { "epoch": 0.1349651999309014, "grad_norm": 0.4237048625946045, "learning_rate": 1.911446640937985e-05, "loss": 0.3959, "step": 6055 }, { "epoch": 0.13507664931152144, "grad_norm": 0.5631638169288635, "learning_rate": 1.911302534438519e-05, "loss": 0.5221, "step": 6060 }, { "epoch": 0.13518809869214152, "grad_norm": 0.5988253355026245, "learning_rate": 1.9111583162203965e-05, "loss": 0.42, "step": 6065 }, { "epoch": 0.1352995480727616, "grad_norm": 0.44528236985206604, "learning_rate": 1.9110139863012978e-05, "loss": 0.3945, "step": 6070 }, { "epoch": 0.13541099745338164, "grad_norm": 0.6617679595947266, "learning_rate": 1.9108695446989158e-05, "loss": 0.6861, "step": 6075 }, { "epoch": 0.13552244683400172, "grad_norm": 0.6147041916847229, "learning_rate": 1.9107249914309586e-05, "loss": 0.3679, "step": 6080 }, { "epoch": 0.1356338962146218, "grad_norm": 0.4293046295642853, "learning_rate": 1.9105803265151474e-05, "loss": 0.3888, "step": 6085 }, { "epoch": 0.13574534559524185, "grad_norm": 0.5585926175117493, "learning_rate": 1.9104355499692166e-05, "loss": 0.3788, "step": 6090 }, { "epoch": 0.13585679497586192, "grad_norm": 0.6501102447509766, "learning_rate": 1.910290661810915e-05, "loss": 0.4541, "step": 6095 }, { "epoch": 0.135968244356482, "grad_norm": 0.50469571352005, "learning_rate": 1.9101456620580044e-05, "loss": 0.4489, "step": 6100 }, { "epoch": 0.13607969373710205, "grad_norm": 0.5148460268974304, "learning_rate": 1.910000550728261e-05, "loss": 0.5077, "step": 6105 }, { "epoch": 0.13619114311772212, "grad_norm": 0.383065789937973, "learning_rate": 1.9098553278394744e-05, "loss": 0.3584, "step": 6110 }, { "epoch": 0.1363025924983422, "grad_norm": 0.5884717106819153, "learning_rate": 1.909709993409447e-05, "loss": 0.3969, "step": 6115 }, { "epoch": 0.13641404187896225, "grad_norm": 0.48174750804901123, "learning_rate": 1.9095645474559967e-05, "loss": 0.4417, "step": 6120 }, { "epoch": 0.13652549125958233, "grad_norm": 0.5044527649879456, "learning_rate": 1.909418989996954e-05, "loss": 0.4233, "step": 6125 }, { "epoch": 0.1366369406402024, "grad_norm": 0.45409488677978516, "learning_rate": 1.909273321050162e-05, "loss": 0.4688, "step": 6130 }, { "epoch": 0.13674839002082245, "grad_norm": 0.5548310875892639, "learning_rate": 1.90912754063348e-05, "loss": 0.3716, "step": 6135 }, { "epoch": 0.13685983940144253, "grad_norm": 0.7075448036193848, "learning_rate": 1.9089816487647786e-05, "loss": 0.364, "step": 6140 }, { "epoch": 0.1369712887820626, "grad_norm": 0.5327110290527344, "learning_rate": 1.9088356454619433e-05, "loss": 0.4024, "step": 6145 }, { "epoch": 0.13708273816268265, "grad_norm": 0.4292064905166626, "learning_rate": 1.9086895307428733e-05, "loss": 0.3842, "step": 6150 }, { "epoch": 0.13719418754330273, "grad_norm": 0.5394851565361023, "learning_rate": 1.9085433046254805e-05, "loss": 0.3289, "step": 6155 }, { "epoch": 0.1373056369239228, "grad_norm": 0.5409968495368958, "learning_rate": 1.9083969671276915e-05, "loss": 0.4648, "step": 6160 }, { "epoch": 0.13741708630454286, "grad_norm": 0.6410302519798279, "learning_rate": 1.9082505182674462e-05, "loss": 0.3038, "step": 6165 }, { "epoch": 0.13752853568516293, "grad_norm": 0.8804435133934021, "learning_rate": 1.908103958062698e-05, "loss": 0.4235, "step": 6170 }, { "epoch": 0.137639985065783, "grad_norm": 0.3780747056007385, "learning_rate": 1.907957286531414e-05, "loss": 0.3038, "step": 6175 }, { "epoch": 0.13775143444640306, "grad_norm": 0.44706881046295166, "learning_rate": 1.907810503691575e-05, "loss": 0.3553, "step": 6180 }, { "epoch": 0.13786288382702314, "grad_norm": 0.4074099063873291, "learning_rate": 1.9076636095611752e-05, "loss": 0.4158, "step": 6185 }, { "epoch": 0.13797433320764318, "grad_norm": 0.545442521572113, "learning_rate": 1.907516604158223e-05, "loss": 0.3531, "step": 6190 }, { "epoch": 0.13808578258826326, "grad_norm": 0.6031177043914795, "learning_rate": 1.9073694875007403e-05, "loss": 0.4075, "step": 6195 }, { "epoch": 0.13819723196888334, "grad_norm": 0.4987606108188629, "learning_rate": 1.9072222596067626e-05, "loss": 0.392, "step": 6200 }, { "epoch": 0.1383086813495034, "grad_norm": 0.6515766382217407, "learning_rate": 1.907074920494338e-05, "loss": 0.4539, "step": 6205 }, { "epoch": 0.13842013073012346, "grad_norm": 0.5816649198532104, "learning_rate": 1.90692747018153e-05, "loss": 0.4451, "step": 6210 }, { "epoch": 0.13853158011074354, "grad_norm": 0.5591769218444824, "learning_rate": 1.906779908686414e-05, "loss": 0.4067, "step": 6215 }, { "epoch": 0.1386430294913636, "grad_norm": 0.6279967427253723, "learning_rate": 1.906632236027081e-05, "loss": 0.3641, "step": 6220 }, { "epoch": 0.13875447887198367, "grad_norm": 0.7580907344818115, "learning_rate": 1.906484452221634e-05, "loss": 0.3638, "step": 6225 }, { "epoch": 0.13886592825260374, "grad_norm": 0.7279596924781799, "learning_rate": 1.90633655728819e-05, "loss": 0.3655, "step": 6230 }, { "epoch": 0.1389773776332238, "grad_norm": 0.38095614314079285, "learning_rate": 1.9061885512448797e-05, "loss": 0.3642, "step": 6235 }, { "epoch": 0.13908882701384387, "grad_norm": 0.4549018144607544, "learning_rate": 1.9060404341098483e-05, "loss": 0.3173, "step": 6240 }, { "epoch": 0.13920027639446395, "grad_norm": 0.6030601859092712, "learning_rate": 1.905892205901253e-05, "loss": 0.4833, "step": 6245 }, { "epoch": 0.139311725775084, "grad_norm": 0.47454383969306946, "learning_rate": 1.9057438666372653e-05, "loss": 0.3674, "step": 6250 }, { "epoch": 0.13942317515570407, "grad_norm": 0.6735197305679321, "learning_rate": 1.905595416336071e-05, "loss": 0.3834, "step": 6255 }, { "epoch": 0.13953462453632415, "grad_norm": 0.6685248613357544, "learning_rate": 1.9054468550158688e-05, "loss": 0.3735, "step": 6260 }, { "epoch": 0.1396460739169442, "grad_norm": 0.5561477541923523, "learning_rate": 1.9052981826948715e-05, "loss": 0.3955, "step": 6265 }, { "epoch": 0.13975752329756427, "grad_norm": 0.40665513277053833, "learning_rate": 1.9051493993913044e-05, "loss": 0.4795, "step": 6270 }, { "epoch": 0.13986897267818435, "grad_norm": 0.4884146451950073, "learning_rate": 1.9050005051234078e-05, "loss": 0.3805, "step": 6275 }, { "epoch": 0.1399804220588044, "grad_norm": 0.5093684196472168, "learning_rate": 1.904851499909435e-05, "loss": 0.4379, "step": 6280 }, { "epoch": 0.14009187143942448, "grad_norm": 0.8419882655143738, "learning_rate": 1.9047023837676525e-05, "loss": 0.4529, "step": 6285 }, { "epoch": 0.14020332082004455, "grad_norm": 0.7996892333030701, "learning_rate": 1.904553156716341e-05, "loss": 0.4288, "step": 6290 }, { "epoch": 0.1403147702006646, "grad_norm": 0.4805566668510437, "learning_rate": 1.9044038187737944e-05, "loss": 0.4511, "step": 6295 }, { "epoch": 0.14042621958128468, "grad_norm": 0.5690134763717651, "learning_rate": 1.9042543699583204e-05, "loss": 0.408, "step": 6300 }, { "epoch": 0.14053766896190475, "grad_norm": 0.7435726523399353, "learning_rate": 1.9041048102882408e-05, "loss": 0.3276, "step": 6305 }, { "epoch": 0.1406491183425248, "grad_norm": 0.7217281460762024, "learning_rate": 1.90395513978189e-05, "loss": 0.4166, "step": 6310 }, { "epoch": 0.14076056772314488, "grad_norm": 0.562565267086029, "learning_rate": 1.9038053584576165e-05, "loss": 0.3688, "step": 6315 }, { "epoch": 0.14087201710376496, "grad_norm": 0.5799996852874756, "learning_rate": 1.9036554663337824e-05, "loss": 0.3326, "step": 6320 }, { "epoch": 0.140983466484385, "grad_norm": 0.4111635982990265, "learning_rate": 1.903505463428763e-05, "loss": 0.3406, "step": 6325 }, { "epoch": 0.14109491586500508, "grad_norm": 0.5106934309005737, "learning_rate": 1.903355349760948e-05, "loss": 0.414, "step": 6330 }, { "epoch": 0.14120636524562516, "grad_norm": 0.40918323397636414, "learning_rate": 1.90320512534874e-05, "loss": 0.3693, "step": 6335 }, { "epoch": 0.1413178146262452, "grad_norm": 0.48011714220046997, "learning_rate": 1.9030547902105554e-05, "loss": 0.3422, "step": 6340 }, { "epoch": 0.14142926400686529, "grad_norm": 0.5929650664329529, "learning_rate": 1.902904344364824e-05, "loss": 0.5012, "step": 6345 }, { "epoch": 0.14154071338748536, "grad_norm": 0.471110463142395, "learning_rate": 1.902753787829989e-05, "loss": 0.3024, "step": 6350 }, { "epoch": 0.1416521627681054, "grad_norm": 0.6223836541175842, "learning_rate": 1.9026031206245077e-05, "loss": 0.3263, "step": 6355 }, { "epoch": 0.1417636121487255, "grad_norm": 0.37485095858573914, "learning_rate": 1.9024523427668514e-05, "loss": 0.4027, "step": 6360 }, { "epoch": 0.14187506152934556, "grad_norm": 0.49914073944091797, "learning_rate": 1.9023014542755035e-05, "loss": 0.316, "step": 6365 }, { "epoch": 0.1419865109099656, "grad_norm": 0.5025352835655212, "learning_rate": 1.902150455168962e-05, "loss": 0.6833, "step": 6370 }, { "epoch": 0.1420979602905857, "grad_norm": 0.7094488739967346, "learning_rate": 1.901999345465738e-05, "loss": 0.3986, "step": 6375 }, { "epoch": 0.14220940967120574, "grad_norm": 0.634559690952301, "learning_rate": 1.901848125184357e-05, "loss": 0.4283, "step": 6380 }, { "epoch": 0.14232085905182582, "grad_norm": 0.6422039270401001, "learning_rate": 1.9016967943433568e-05, "loss": 0.3884, "step": 6385 }, { "epoch": 0.1424323084324459, "grad_norm": 0.4295634329319, "learning_rate": 1.90154535296129e-05, "loss": 0.443, "step": 6390 }, { "epoch": 0.14254375781306594, "grad_norm": 0.7843777537345886, "learning_rate": 1.9013938010567215e-05, "loss": 0.2879, "step": 6395 }, { "epoch": 0.14265520719368602, "grad_norm": 0.7074270844459534, "learning_rate": 1.9012421386482308e-05, "loss": 0.4682, "step": 6400 }, { "epoch": 0.1427666565743061, "grad_norm": 0.6161109805107117, "learning_rate": 1.90109036575441e-05, "loss": 0.3507, "step": 6405 }, { "epoch": 0.14287810595492614, "grad_norm": 0.5402871370315552, "learning_rate": 1.9009384823938663e-05, "loss": 0.4624, "step": 6410 }, { "epoch": 0.14298955533554622, "grad_norm": 0.7705127000808716, "learning_rate": 1.9007864885852182e-05, "loss": 0.461, "step": 6415 }, { "epoch": 0.1431010047161663, "grad_norm": 0.662533700466156, "learning_rate": 1.9006343843471003e-05, "loss": 0.3798, "step": 6420 }, { "epoch": 0.14321245409678635, "grad_norm": 0.5392851233482361, "learning_rate": 1.9004821696981585e-05, "loss": 0.3014, "step": 6425 }, { "epoch": 0.14332390347740642, "grad_norm": 0.9114376902580261, "learning_rate": 1.900329844657053e-05, "loss": 0.3915, "step": 6430 }, { "epoch": 0.1434353528580265, "grad_norm": 0.7330493927001953, "learning_rate": 1.9001774092424583e-05, "loss": 0.4391, "step": 6435 }, { "epoch": 0.14354680223864655, "grad_norm": 0.5785829424858093, "learning_rate": 1.9000248634730613e-05, "loss": 0.3804, "step": 6440 }, { "epoch": 0.14365825161926662, "grad_norm": 0.5360656380653381, "learning_rate": 1.8998722073675636e-05, "loss": 0.4382, "step": 6445 }, { "epoch": 0.1437697009998867, "grad_norm": 0.4669645130634308, "learning_rate": 1.8997194409446787e-05, "loss": 0.3544, "step": 6450 }, { "epoch": 0.14388115038050675, "grad_norm": 0.5305682420730591, "learning_rate": 1.8995665642231354e-05, "loss": 0.4464, "step": 6455 }, { "epoch": 0.14399259976112683, "grad_norm": 0.5815770626068115, "learning_rate": 1.899413577221675e-05, "loss": 0.3564, "step": 6460 }, { "epoch": 0.1441040491417469, "grad_norm": 0.5688044428825378, "learning_rate": 1.8992604799590526e-05, "loss": 0.3542, "step": 6465 }, { "epoch": 0.14421549852236695, "grad_norm": 0.4591280519962311, "learning_rate": 1.8991072724540364e-05, "loss": 0.3537, "step": 6470 }, { "epoch": 0.14432694790298703, "grad_norm": 0.5264057517051697, "learning_rate": 1.8989539547254084e-05, "loss": 0.4731, "step": 6475 }, { "epoch": 0.1444383972836071, "grad_norm": 0.4802411198616028, "learning_rate": 1.8988005267919644e-05, "loss": 0.271, "step": 6480 }, { "epoch": 0.14454984666422716, "grad_norm": 0.6552708745002747, "learning_rate": 1.8986469886725135e-05, "loss": 0.3038, "step": 6485 }, { "epoch": 0.14466129604484723, "grad_norm": 0.41324883699417114, "learning_rate": 1.8984933403858786e-05, "loss": 0.296, "step": 6490 }, { "epoch": 0.1447727454254673, "grad_norm": 0.4937549829483032, "learning_rate": 1.8983395819508955e-05, "loss": 0.4804, "step": 6495 }, { "epoch": 0.14488419480608736, "grad_norm": 0.7439261674880981, "learning_rate": 1.8981857133864136e-05, "loss": 0.3872, "step": 6500 }, { "epoch": 0.14499564418670743, "grad_norm": 0.5570749044418335, "learning_rate": 1.898031734711296e-05, "loss": 0.2656, "step": 6505 }, { "epoch": 0.1451070935673275, "grad_norm": 0.5962104201316833, "learning_rate": 1.8978776459444196e-05, "loss": 0.3265, "step": 6510 }, { "epoch": 0.14521854294794756, "grad_norm": 0.5931865572929382, "learning_rate": 1.8977234471046743e-05, "loss": 0.3804, "step": 6515 }, { "epoch": 0.14532999232856764, "grad_norm": 0.5354698896408081, "learning_rate": 1.897569138210964e-05, "loss": 0.4132, "step": 6520 }, { "epoch": 0.1454414417091877, "grad_norm": 0.4454991817474365, "learning_rate": 1.8974147192822053e-05, "loss": 0.4958, "step": 6525 }, { "epoch": 0.14555289108980776, "grad_norm": 0.5197862982749939, "learning_rate": 1.897260190337329e-05, "loss": 0.3653, "step": 6530 }, { "epoch": 0.14566434047042784, "grad_norm": 0.6639044284820557, "learning_rate": 1.897105551395279e-05, "loss": 0.4826, "step": 6535 }, { "epoch": 0.14577578985104792, "grad_norm": 0.6526901721954346, "learning_rate": 1.8969508024750137e-05, "loss": 0.3015, "step": 6540 }, { "epoch": 0.14588723923166796, "grad_norm": 0.4506591260433197, "learning_rate": 1.8967959435955027e-05, "loss": 0.5079, "step": 6545 }, { "epoch": 0.14599868861228804, "grad_norm": 0.46098169684410095, "learning_rate": 1.8966409747757314e-05, "loss": 0.3314, "step": 6550 }, { "epoch": 0.14611013799290812, "grad_norm": 0.5385696291923523, "learning_rate": 1.8964858960346976e-05, "loss": 0.5161, "step": 6555 }, { "epoch": 0.14622158737352817, "grad_norm": 0.6555469036102295, "learning_rate": 1.896330707391413e-05, "loss": 0.4846, "step": 6560 }, { "epoch": 0.14633303675414824, "grad_norm": 0.5580034255981445, "learning_rate": 1.8961754088649018e-05, "loss": 0.4174, "step": 6565 }, { "epoch": 0.1464444861347683, "grad_norm": 0.5875076651573181, "learning_rate": 1.896020000474203e-05, "loss": 0.5164, "step": 6570 }, { "epoch": 0.14655593551538837, "grad_norm": 0.6634327173233032, "learning_rate": 1.8958644822383688e-05, "loss": 0.4792, "step": 6575 }, { "epoch": 0.14666738489600845, "grad_norm": 0.5859067440032959, "learning_rate": 1.8957088541764637e-05, "loss": 0.4333, "step": 6580 }, { "epoch": 0.1467788342766285, "grad_norm": 0.513789176940918, "learning_rate": 1.895553116307567e-05, "loss": 0.5009, "step": 6585 }, { "epoch": 0.14689028365724857, "grad_norm": 0.5502701997756958, "learning_rate": 1.8953972686507707e-05, "loss": 0.441, "step": 6590 }, { "epoch": 0.14700173303786865, "grad_norm": 0.4884522557258606, "learning_rate": 1.895241311225181e-05, "loss": 0.3899, "step": 6595 }, { "epoch": 0.1471131824184887, "grad_norm": 0.6013541221618652, "learning_rate": 1.8950852440499163e-05, "loss": 0.44, "step": 6600 }, { "epoch": 0.14722463179910877, "grad_norm": 0.4754367172718048, "learning_rate": 1.8949290671441097e-05, "loss": 0.3826, "step": 6605 }, { "epoch": 0.14733608117972885, "grad_norm": 0.563018798828125, "learning_rate": 1.894772780526908e-05, "loss": 0.4355, "step": 6610 }, { "epoch": 0.1474475305603489, "grad_norm": 0.6593184471130371, "learning_rate": 1.8946163842174692e-05, "loss": 0.4039, "step": 6615 }, { "epoch": 0.14755897994096898, "grad_norm": 0.644548773765564, "learning_rate": 1.8944598782349675e-05, "loss": 0.333, "step": 6620 }, { "epoch": 0.14767042932158905, "grad_norm": 0.5700538158416748, "learning_rate": 1.8943032625985885e-05, "loss": 0.4342, "step": 6625 }, { "epoch": 0.1477818787022091, "grad_norm": 0.4651893675327301, "learning_rate": 1.894146537327533e-05, "loss": 0.268, "step": 6630 }, { "epoch": 0.14789332808282918, "grad_norm": 0.6951509118080139, "learning_rate": 1.8939897024410134e-05, "loss": 0.4129, "step": 6635 }, { "epoch": 0.14800477746344926, "grad_norm": 0.5308429598808289, "learning_rate": 1.893832757958257e-05, "loss": 0.2941, "step": 6640 }, { "epoch": 0.1481162268440693, "grad_norm": 0.5369426608085632, "learning_rate": 1.8936757038985037e-05, "loss": 0.2803, "step": 6645 }, { "epoch": 0.14822767622468938, "grad_norm": 0.5123085975646973, "learning_rate": 1.893518540281007e-05, "loss": 0.3713, "step": 6650 }, { "epoch": 0.14833912560530946, "grad_norm": 0.6187421083450317, "learning_rate": 1.8933612671250345e-05, "loss": 0.3995, "step": 6655 }, { "epoch": 0.1484505749859295, "grad_norm": 0.5980579257011414, "learning_rate": 1.893203884449866e-05, "loss": 0.3866, "step": 6660 }, { "epoch": 0.14856202436654958, "grad_norm": 0.7539716958999634, "learning_rate": 1.8930463922747965e-05, "loss": 0.502, "step": 6665 }, { "epoch": 0.14867347374716966, "grad_norm": 0.571130633354187, "learning_rate": 1.892888790619132e-05, "loss": 0.4009, "step": 6670 }, { "epoch": 0.1487849231277897, "grad_norm": 0.4514002799987793, "learning_rate": 1.8927310795021938e-05, "loss": 0.4243, "step": 6675 }, { "epoch": 0.14889637250840979, "grad_norm": 0.7586348652839661, "learning_rate": 1.892573258943316e-05, "loss": 0.4853, "step": 6680 }, { "epoch": 0.14900782188902986, "grad_norm": 0.36333855986595154, "learning_rate": 1.8924153289618466e-05, "loss": 0.3791, "step": 6685 }, { "epoch": 0.1491192712696499, "grad_norm": 0.43393850326538086, "learning_rate": 1.8922572895771458e-05, "loss": 0.3568, "step": 6690 }, { "epoch": 0.14923072065027, "grad_norm": 0.5546851754188538, "learning_rate": 1.892099140808589e-05, "loss": 0.381, "step": 6695 }, { "epoch": 0.14934217003089006, "grad_norm": 0.4489659368991852, "learning_rate": 1.8919408826755628e-05, "loss": 0.3822, "step": 6700 }, { "epoch": 0.1494536194115101, "grad_norm": 0.5850082635879517, "learning_rate": 1.8917825151974698e-05, "loss": 0.4964, "step": 6705 }, { "epoch": 0.1495650687921302, "grad_norm": 0.6391961574554443, "learning_rate": 1.8916240383937236e-05, "loss": 0.4219, "step": 6710 }, { "epoch": 0.14967651817275027, "grad_norm": 0.5481122732162476, "learning_rate": 1.8914654522837525e-05, "loss": 0.5315, "step": 6715 }, { "epoch": 0.14978796755337032, "grad_norm": 0.5840669274330139, "learning_rate": 1.8913067568869984e-05, "loss": 0.3674, "step": 6720 }, { "epoch": 0.1498994169339904, "grad_norm": 0.5937899947166443, "learning_rate": 1.8911479522229154e-05, "loss": 0.3358, "step": 6725 }, { "epoch": 0.15001086631461047, "grad_norm": 0.6355303525924683, "learning_rate": 1.890989038310972e-05, "loss": 0.3997, "step": 6730 }, { "epoch": 0.15012231569523052, "grad_norm": 0.5027339458465576, "learning_rate": 1.89083001517065e-05, "loss": 0.4098, "step": 6735 }, { "epoch": 0.1502337650758506, "grad_norm": 0.8207029700279236, "learning_rate": 1.8906708828214445e-05, "loss": 0.5281, "step": 6740 }, { "epoch": 0.15034521445647067, "grad_norm": 0.47056856751441956, "learning_rate": 1.8905116412828636e-05, "loss": 0.3632, "step": 6745 }, { "epoch": 0.15045666383709072, "grad_norm": 0.8110126852989197, "learning_rate": 1.890352290574429e-05, "loss": 0.227, "step": 6750 }, { "epoch": 0.1505681132177108, "grad_norm": 0.7214770913124084, "learning_rate": 1.8901928307156762e-05, "loss": 0.4157, "step": 6755 }, { "epoch": 0.15067956259833085, "grad_norm": 0.5110815763473511, "learning_rate": 1.8900332617261535e-05, "loss": 0.2864, "step": 6760 }, { "epoch": 0.15079101197895092, "grad_norm": 0.5272954106330872, "learning_rate": 1.889873583625423e-05, "loss": 0.3965, "step": 6765 }, { "epoch": 0.150902461359571, "grad_norm": 0.6545156836509705, "learning_rate": 1.8897137964330595e-05, "loss": 0.4683, "step": 6770 }, { "epoch": 0.15101391074019105, "grad_norm": 0.5515015721321106, "learning_rate": 1.8895539001686526e-05, "loss": 0.5359, "step": 6775 }, { "epoch": 0.15112536012081113, "grad_norm": 0.5681248903274536, "learning_rate": 1.8893938948518038e-05, "loss": 0.4592, "step": 6780 }, { "epoch": 0.1512368095014312, "grad_norm": 0.7550321221351624, "learning_rate": 1.8892337805021282e-05, "loss": 0.3513, "step": 6785 }, { "epoch": 0.15134825888205125, "grad_norm": 0.5479450821876526, "learning_rate": 1.8890735571392557e-05, "loss": 0.3816, "step": 6790 }, { "epoch": 0.15145970826267133, "grad_norm": 0.5612640380859375, "learning_rate": 1.8889132247828267e-05, "loss": 0.3922, "step": 6795 }, { "epoch": 0.1515711576432914, "grad_norm": 0.4440547823905945, "learning_rate": 1.8887527834524983e-05, "loss": 0.3642, "step": 6800 }, { "epoch": 0.15168260702391145, "grad_norm": 0.4283455014228821, "learning_rate": 1.8885922331679388e-05, "loss": 0.389, "step": 6805 }, { "epoch": 0.15179405640453153, "grad_norm": 0.5806257128715515, "learning_rate": 1.88843157394883e-05, "loss": 0.3685, "step": 6810 }, { "epoch": 0.1519055057851516, "grad_norm": 0.5102450847625732, "learning_rate": 1.8882708058148683e-05, "loss": 0.3832, "step": 6815 }, { "epoch": 0.15201695516577166, "grad_norm": 0.6629839539527893, "learning_rate": 1.888109928785762e-05, "loss": 0.5464, "step": 6820 }, { "epoch": 0.15212840454639173, "grad_norm": 0.5725881457328796, "learning_rate": 1.8879489428812335e-05, "loss": 0.2462, "step": 6825 }, { "epoch": 0.1522398539270118, "grad_norm": 2.398331642150879, "learning_rate": 1.887787848121019e-05, "loss": 0.4956, "step": 6830 }, { "epoch": 0.15235130330763186, "grad_norm": 0.6613421440124512, "learning_rate": 1.8876266445248664e-05, "loss": 0.4017, "step": 6835 }, { "epoch": 0.15246275268825193, "grad_norm": 0.5280817151069641, "learning_rate": 1.8874653321125388e-05, "loss": 0.37, "step": 6840 }, { "epoch": 0.152574202068872, "grad_norm": 0.5924332141876221, "learning_rate": 1.8873039109038115e-05, "loss": 0.4621, "step": 6845 }, { "epoch": 0.15268565144949206, "grad_norm": 0.5465047955513, "learning_rate": 1.887142380918474e-05, "loss": 0.4276, "step": 6850 }, { "epoch": 0.15279710083011214, "grad_norm": 0.6491557955741882, "learning_rate": 1.886980742176328e-05, "loss": 0.3074, "step": 6855 }, { "epoch": 0.1529085502107322, "grad_norm": 0.6065066456794739, "learning_rate": 1.8868189946971895e-05, "loss": 0.4331, "step": 6860 }, { "epoch": 0.15301999959135226, "grad_norm": 0.6120703816413879, "learning_rate": 1.886657138500888e-05, "loss": 0.272, "step": 6865 }, { "epoch": 0.15313144897197234, "grad_norm": 0.4606035649776459, "learning_rate": 1.8864951736072643e-05, "loss": 0.3616, "step": 6870 }, { "epoch": 0.15324289835259242, "grad_norm": 0.5181193351745605, "learning_rate": 1.8863331000361755e-05, "loss": 0.4053, "step": 6875 }, { "epoch": 0.15335434773321246, "grad_norm": 0.3439059853553772, "learning_rate": 1.88617091780749e-05, "loss": 0.3357, "step": 6880 }, { "epoch": 0.15346579711383254, "grad_norm": 0.5444257855415344, "learning_rate": 1.8860086269410905e-05, "loss": 0.4992, "step": 6885 }, { "epoch": 0.15357724649445262, "grad_norm": 0.729205846786499, "learning_rate": 1.885846227456872e-05, "loss": 0.3517, "step": 6890 }, { "epoch": 0.15368869587507267, "grad_norm": 0.5775438547134399, "learning_rate": 1.8856837193747436e-05, "loss": 0.4049, "step": 6895 }, { "epoch": 0.15380014525569274, "grad_norm": 0.49706968665122986, "learning_rate": 1.885521102714628e-05, "loss": 0.3226, "step": 6900 }, { "epoch": 0.15391159463631282, "grad_norm": 0.5086760520935059, "learning_rate": 1.8853583774964598e-05, "loss": 0.4231, "step": 6905 }, { "epoch": 0.15402304401693287, "grad_norm": 0.5242869257926941, "learning_rate": 1.885195543740189e-05, "loss": 0.3973, "step": 6910 }, { "epoch": 0.15413449339755295, "grad_norm": 0.6669424176216125, "learning_rate": 1.8850326014657765e-05, "loss": 0.43, "step": 6915 }, { "epoch": 0.15424594277817302, "grad_norm": 0.6157550811767578, "learning_rate": 1.8848695506931995e-05, "loss": 0.4676, "step": 6920 }, { "epoch": 0.15435739215879307, "grad_norm": 0.3835831880569458, "learning_rate": 1.8847063914424447e-05, "loss": 0.3631, "step": 6925 }, { "epoch": 0.15446884153941315, "grad_norm": 0.564392626285553, "learning_rate": 1.8845431237335158e-05, "loss": 0.4559, "step": 6930 }, { "epoch": 0.15458029092003323, "grad_norm": 0.5257560610771179, "learning_rate": 1.8843797475864274e-05, "loss": 0.4221, "step": 6935 }, { "epoch": 0.15469174030065327, "grad_norm": 0.5841484665870667, "learning_rate": 1.8842162630212083e-05, "loss": 0.3595, "step": 6940 }, { "epoch": 0.15480318968127335, "grad_norm": 0.5178191065788269, "learning_rate": 1.8840526700579004e-05, "loss": 0.3742, "step": 6945 }, { "epoch": 0.1549146390618934, "grad_norm": 0.4463173747062683, "learning_rate": 1.8838889687165592e-05, "loss": 0.3189, "step": 6950 }, { "epoch": 0.15502608844251348, "grad_norm": 0.5193114280700684, "learning_rate": 1.883725159017253e-05, "loss": 0.2913, "step": 6955 }, { "epoch": 0.15513753782313355, "grad_norm": 0.4916064143180847, "learning_rate": 1.8835612409800634e-05, "loss": 0.2726, "step": 6960 }, { "epoch": 0.1552489872037536, "grad_norm": 0.6708848476409912, "learning_rate": 1.883397214625086e-05, "loss": 0.4509, "step": 6965 }, { "epoch": 0.15536043658437368, "grad_norm": 0.5058689117431641, "learning_rate": 1.8832330799724285e-05, "loss": 0.3408, "step": 6970 }, { "epoch": 0.15547188596499376, "grad_norm": 0.5161834359169006, "learning_rate": 1.8830688370422127e-05, "loss": 0.2951, "step": 6975 }, { "epoch": 0.1555833353456138, "grad_norm": 1.0592902898788452, "learning_rate": 1.8829044858545744e-05, "loss": 0.4411, "step": 6980 }, { "epoch": 0.15569478472623388, "grad_norm": 0.4401077628135681, "learning_rate": 1.8827400264296606e-05, "loss": 0.3229, "step": 6985 }, { "epoch": 0.15580623410685396, "grad_norm": 0.48951423168182373, "learning_rate": 1.8825754587876335e-05, "loss": 0.4544, "step": 6990 }, { "epoch": 0.155917683487474, "grad_norm": 0.5329950451850891, "learning_rate": 1.8824107829486674e-05, "loss": 0.4696, "step": 6995 }, { "epoch": 0.15602913286809408, "grad_norm": 0.4187164604663849, "learning_rate": 1.8822459989329508e-05, "loss": 0.4175, "step": 7000 }, { "epoch": 0.15614058224871416, "grad_norm": 0.5629317164421082, "learning_rate": 1.8820811067606844e-05, "loss": 0.4599, "step": 7005 }, { "epoch": 0.1562520316293342, "grad_norm": 0.5468907952308655, "learning_rate": 1.881916106452083e-05, "loss": 0.3809, "step": 7010 }, { "epoch": 0.15636348100995429, "grad_norm": 0.4851939380168915, "learning_rate": 1.8817509980273742e-05, "loss": 0.384, "step": 7015 }, { "epoch": 0.15647493039057436, "grad_norm": 0.6714935898780823, "learning_rate": 1.8815857815067994e-05, "loss": 0.3595, "step": 7020 }, { "epoch": 0.1565863797711944, "grad_norm": 0.6519018411636353, "learning_rate": 1.8814204569106124e-05, "loss": 0.4568, "step": 7025 }, { "epoch": 0.1566978291518145, "grad_norm": 0.6960158348083496, "learning_rate": 1.8812550242590805e-05, "loss": 0.4064, "step": 7030 }, { "epoch": 0.15680927853243457, "grad_norm": 0.4918147623538971, "learning_rate": 1.8810894835724854e-05, "loss": 0.4429, "step": 7035 }, { "epoch": 0.15692072791305461, "grad_norm": 0.45890048146247864, "learning_rate": 1.8809238348711206e-05, "loss": 0.346, "step": 7040 }, { "epoch": 0.1570321772936747, "grad_norm": 0.6213566660881042, "learning_rate": 1.8807580781752932e-05, "loss": 0.4872, "step": 7045 }, { "epoch": 0.15714362667429477, "grad_norm": 0.4078867435455322, "learning_rate": 1.880592213505324e-05, "loss": 0.2985, "step": 7050 }, { "epoch": 0.15725507605491482, "grad_norm": 0.46203523874282837, "learning_rate": 1.880426240881546e-05, "loss": 0.4425, "step": 7055 }, { "epoch": 0.1573665254355349, "grad_norm": 0.6020154356956482, "learning_rate": 1.880260160324307e-05, "loss": 0.4224, "step": 7060 }, { "epoch": 0.15747797481615497, "grad_norm": 0.692721426486969, "learning_rate": 1.880093971853967e-05, "loss": 0.5081, "step": 7065 }, { "epoch": 0.15758942419677502, "grad_norm": 0.49963024258613586, "learning_rate": 1.8799276754908992e-05, "loss": 0.4919, "step": 7070 }, { "epoch": 0.1577008735773951, "grad_norm": 0.5270716547966003, "learning_rate": 1.8797612712554904e-05, "loss": 0.3817, "step": 7075 }, { "epoch": 0.15781232295801517, "grad_norm": 0.6648659706115723, "learning_rate": 1.879594759168141e-05, "loss": 0.3285, "step": 7080 }, { "epoch": 0.15792377233863522, "grad_norm": 0.39034026861190796, "learning_rate": 1.8794281392492627e-05, "loss": 0.2896, "step": 7085 }, { "epoch": 0.1580352217192553, "grad_norm": 1.2855838537216187, "learning_rate": 1.8792614115192834e-05, "loss": 0.4575, "step": 7090 }, { "epoch": 0.15814667109987537, "grad_norm": 0.5977685451507568, "learning_rate": 1.8790945759986414e-05, "loss": 0.4771, "step": 7095 }, { "epoch": 0.15825812048049542, "grad_norm": 0.8264610767364502, "learning_rate": 1.87892763270779e-05, "loss": 0.4239, "step": 7100 }, { "epoch": 0.1583695698611155, "grad_norm": 0.5660960078239441, "learning_rate": 1.8787605816671956e-05, "loss": 0.3571, "step": 7105 }, { "epoch": 0.15848101924173558, "grad_norm": 0.6069998741149902, "learning_rate": 1.8785934228973364e-05, "loss": 0.3975, "step": 7110 }, { "epoch": 0.15859246862235563, "grad_norm": 0.6635084748268127, "learning_rate": 1.8784261564187053e-05, "loss": 0.4047, "step": 7115 }, { "epoch": 0.1587039180029757, "grad_norm": 0.5507485866546631, "learning_rate": 1.878258782251808e-05, "loss": 0.4622, "step": 7120 }, { "epoch": 0.15881536738359578, "grad_norm": 0.5426681637763977, "learning_rate": 1.8780913004171628e-05, "loss": 0.4192, "step": 7125 }, { "epoch": 0.15892681676421583, "grad_norm": 0.7298119068145752, "learning_rate": 1.8779237109353023e-05, "loss": 0.4339, "step": 7130 }, { "epoch": 0.1590382661448359, "grad_norm": 0.6015418767929077, "learning_rate": 1.8777560138267712e-05, "loss": 0.4617, "step": 7135 }, { "epoch": 0.15914971552545595, "grad_norm": 0.4312678873538971, "learning_rate": 1.8775882091121282e-05, "loss": 0.283, "step": 7140 }, { "epoch": 0.15926116490607603, "grad_norm": 0.6461197137832642, "learning_rate": 1.8774202968119447e-05, "loss": 0.3925, "step": 7145 }, { "epoch": 0.1593726142866961, "grad_norm": 0.5625909566879272, "learning_rate": 1.8772522769468054e-05, "loss": 0.4194, "step": 7150 }, { "epoch": 0.15948406366731616, "grad_norm": 0.6251127123832703, "learning_rate": 1.8770841495373083e-05, "loss": 0.4162, "step": 7155 }, { "epoch": 0.15959551304793623, "grad_norm": 0.49051955342292786, "learning_rate": 1.8769159146040644e-05, "loss": 0.4761, "step": 7160 }, { "epoch": 0.1597069624285563, "grad_norm": 0.590154767036438, "learning_rate": 1.876747572167698e-05, "loss": 0.3741, "step": 7165 }, { "epoch": 0.15981841180917636, "grad_norm": 0.5282180905342102, "learning_rate": 1.8765791222488472e-05, "loss": 0.3672, "step": 7170 }, { "epoch": 0.15992986118979644, "grad_norm": 0.3630257844924927, "learning_rate": 1.8764105648681615e-05, "loss": 0.3332, "step": 7175 }, { "epoch": 0.1600413105704165, "grad_norm": 0.48569926619529724, "learning_rate": 1.876241900046306e-05, "loss": 0.3211, "step": 7180 }, { "epoch": 0.16015275995103656, "grad_norm": 0.6470745205879211, "learning_rate": 1.876073127803957e-05, "loss": 0.4411, "step": 7185 }, { "epoch": 0.16026420933165664, "grad_norm": 0.49332156777381897, "learning_rate": 1.8759042481618047e-05, "loss": 0.4793, "step": 7190 }, { "epoch": 0.16037565871227671, "grad_norm": 0.5013951659202576, "learning_rate": 1.8757352611405525e-05, "loss": 0.337, "step": 7195 }, { "epoch": 0.16048710809289676, "grad_norm": 0.47468462586402893, "learning_rate": 1.8755661667609167e-05, "loss": 0.4142, "step": 7200 }, { "epoch": 0.16059855747351684, "grad_norm": 0.5139350891113281, "learning_rate": 1.8753969650436274e-05, "loss": 0.4156, "step": 7205 }, { "epoch": 0.16071000685413692, "grad_norm": 0.6101190447807312, "learning_rate": 1.8752276560094273e-05, "loss": 0.3468, "step": 7210 }, { "epoch": 0.16082145623475697, "grad_norm": 0.5023723840713501, "learning_rate": 1.875058239679072e-05, "loss": 0.4566, "step": 7215 }, { "epoch": 0.16093290561537704, "grad_norm": 0.48146092891693115, "learning_rate": 1.8748887160733315e-05, "loss": 0.3785, "step": 7220 }, { "epoch": 0.16104435499599712, "grad_norm": 0.6446987986564636, "learning_rate": 1.8747190852129868e-05, "loss": 0.318, "step": 7225 }, { "epoch": 0.16115580437661717, "grad_norm": 0.5768639445304871, "learning_rate": 1.8745493471188348e-05, "loss": 0.348, "step": 7230 }, { "epoch": 0.16126725375723724, "grad_norm": 0.5417771935462952, "learning_rate": 1.8743795018116827e-05, "loss": 0.3935, "step": 7235 }, { "epoch": 0.16137870313785732, "grad_norm": 0.3816154897212982, "learning_rate": 1.874209549312353e-05, "loss": 0.4261, "step": 7240 }, { "epoch": 0.16149015251847737, "grad_norm": 0.6149434447288513, "learning_rate": 1.8740394896416806e-05, "loss": 0.483, "step": 7245 }, { "epoch": 0.16160160189909745, "grad_norm": 0.7580816745758057, "learning_rate": 1.873869322820513e-05, "loss": 0.4668, "step": 7250 }, { "epoch": 0.16171305127971752, "grad_norm": 0.5908399820327759, "learning_rate": 1.873699048869712e-05, "loss": 0.4884, "step": 7255 }, { "epoch": 0.16182450066033757, "grad_norm": 0.7933946251869202, "learning_rate": 1.8735286678101515e-05, "loss": 0.5547, "step": 7260 }, { "epoch": 0.16193595004095765, "grad_norm": 0.39468976855278015, "learning_rate": 1.8733581796627187e-05, "loss": 0.375, "step": 7265 }, { "epoch": 0.16204739942157773, "grad_norm": 0.6789149641990662, "learning_rate": 1.8731875844483145e-05, "loss": 0.2763, "step": 7270 }, { "epoch": 0.16215884880219777, "grad_norm": 0.684878408908844, "learning_rate": 1.8730168821878527e-05, "loss": 0.3293, "step": 7275 }, { "epoch": 0.16227029818281785, "grad_norm": 0.5506591796875, "learning_rate": 1.8728460729022592e-05, "loss": 0.3465, "step": 7280 }, { "epoch": 0.16238174756343793, "grad_norm": 0.509867787361145, "learning_rate": 1.8726751566124747e-05, "loss": 0.4675, "step": 7285 }, { "epoch": 0.16249319694405798, "grad_norm": 0.7684085369110107, "learning_rate": 1.8725041333394523e-05, "loss": 0.4364, "step": 7290 }, { "epoch": 0.16260464632467805, "grad_norm": 0.5433658361434937, "learning_rate": 1.872333003104158e-05, "loss": 0.3737, "step": 7295 }, { "epoch": 0.16271609570529813, "grad_norm": 0.6474670171737671, "learning_rate": 1.87216176592757e-05, "loss": 0.3013, "step": 7300 }, { "epoch": 0.16282754508591818, "grad_norm": 0.5131295919418335, "learning_rate": 1.8719904218306822e-05, "loss": 0.388, "step": 7305 }, { "epoch": 0.16293899446653826, "grad_norm": 0.5305821895599365, "learning_rate": 1.8718189708344997e-05, "loss": 0.3985, "step": 7310 }, { "epoch": 0.16305044384715833, "grad_norm": 0.5121385455131531, "learning_rate": 1.8716474129600403e-05, "loss": 0.4053, "step": 7315 }, { "epoch": 0.16316189322777838, "grad_norm": 0.6189393401145935, "learning_rate": 1.871475748228336e-05, "loss": 0.3405, "step": 7320 }, { "epoch": 0.16327334260839846, "grad_norm": 0.34280717372894287, "learning_rate": 1.8713039766604325e-05, "loss": 0.4189, "step": 7325 }, { "epoch": 0.1633847919890185, "grad_norm": 0.4888545274734497, "learning_rate": 1.8711320982773863e-05, "loss": 0.3416, "step": 7330 }, { "epoch": 0.16349624136963858, "grad_norm": 0.6265230178833008, "learning_rate": 1.870960113100269e-05, "loss": 0.4796, "step": 7335 }, { "epoch": 0.16360769075025866, "grad_norm": 0.5085820555686951, "learning_rate": 1.8707880211501646e-05, "loss": 0.4041, "step": 7340 }, { "epoch": 0.1637191401308787, "grad_norm": 0.5407921075820923, "learning_rate": 1.8706158224481704e-05, "loss": 0.2705, "step": 7345 }, { "epoch": 0.1638305895114988, "grad_norm": 0.38955292105674744, "learning_rate": 1.8704435170153963e-05, "loss": 0.3524, "step": 7350 }, { "epoch": 0.16394203889211886, "grad_norm": 0.46001070737838745, "learning_rate": 1.870271104872966e-05, "loss": 0.4483, "step": 7355 }, { "epoch": 0.1640534882727389, "grad_norm": 0.6388995051383972, "learning_rate": 1.8700985860420156e-05, "loss": 0.4532, "step": 7360 }, { "epoch": 0.164164937653359, "grad_norm": 0.6003766655921936, "learning_rate": 1.869925960543695e-05, "loss": 0.4178, "step": 7365 }, { "epoch": 0.16427638703397907, "grad_norm": 0.45454123616218567, "learning_rate": 1.869753228399166e-05, "loss": 0.2413, "step": 7370 }, { "epoch": 0.16438783641459911, "grad_norm": 0.604836106300354, "learning_rate": 1.8695803896296048e-05, "loss": 0.3807, "step": 7375 }, { "epoch": 0.1644992857952192, "grad_norm": 0.42217013239860535, "learning_rate": 1.8694074442562e-05, "loss": 0.2366, "step": 7380 }, { "epoch": 0.16461073517583927, "grad_norm": 0.4794704020023346, "learning_rate": 1.869234392300153e-05, "loss": 0.3015, "step": 7385 }, { "epoch": 0.16472218455645932, "grad_norm": 0.4159165620803833, "learning_rate": 1.8690612337826795e-05, "loss": 0.3359, "step": 7390 }, { "epoch": 0.1648336339370794, "grad_norm": 0.5214026570320129, "learning_rate": 1.8688879687250067e-05, "loss": 0.3039, "step": 7395 }, { "epoch": 0.16494508331769947, "grad_norm": 0.45507606863975525, "learning_rate": 1.8687145971483757e-05, "loss": 0.338, "step": 7400 }, { "epoch": 0.16505653269831952, "grad_norm": 0.5133429765701294, "learning_rate": 1.8685411190740404e-05, "loss": 0.3267, "step": 7405 }, { "epoch": 0.1651679820789396, "grad_norm": 0.5593928098678589, "learning_rate": 1.8683675345232683e-05, "loss": 0.4158, "step": 7410 }, { "epoch": 0.16527943145955967, "grad_norm": 0.5510695576667786, "learning_rate": 1.868193843517339e-05, "loss": 0.3397, "step": 7415 }, { "epoch": 0.16539088084017972, "grad_norm": 0.4579789340496063, "learning_rate": 1.868020046077546e-05, "loss": 0.4361, "step": 7420 }, { "epoch": 0.1655023302207998, "grad_norm": 0.5290041565895081, "learning_rate": 1.8678461422251956e-05, "loss": 0.3467, "step": 7425 }, { "epoch": 0.16561377960141987, "grad_norm": 0.5120608806610107, "learning_rate": 1.8676721319816064e-05, "loss": 0.3066, "step": 7430 }, { "epoch": 0.16572522898203992, "grad_norm": 0.6638226509094238, "learning_rate": 1.8674980153681116e-05, "loss": 0.4378, "step": 7435 }, { "epoch": 0.16583667836266, "grad_norm": 0.5974465012550354, "learning_rate": 1.867323792406056e-05, "loss": 0.3986, "step": 7440 }, { "epoch": 0.16594812774328008, "grad_norm": 0.7560718655586243, "learning_rate": 1.8671494631167982e-05, "loss": 0.3244, "step": 7445 }, { "epoch": 0.16605957712390013, "grad_norm": 0.674818217754364, "learning_rate": 1.8669750275217097e-05, "loss": 0.4048, "step": 7450 }, { "epoch": 0.1661710265045202, "grad_norm": 0.7948163151741028, "learning_rate": 1.8668004856421748e-05, "loss": 0.2647, "step": 7455 }, { "epoch": 0.16628247588514028, "grad_norm": 0.5481162071228027, "learning_rate": 1.8666258374995912e-05, "loss": 0.2952, "step": 7460 }, { "epoch": 0.16639392526576033, "grad_norm": 0.5210584402084351, "learning_rate": 1.866451083115369e-05, "loss": 0.3767, "step": 7465 }, { "epoch": 0.1665053746463804, "grad_norm": 0.9675779938697815, "learning_rate": 1.866276222510932e-05, "loss": 0.4264, "step": 7470 }, { "epoch": 0.16661682402700048, "grad_norm": 0.6672073602676392, "learning_rate": 1.8661012557077167e-05, "loss": 0.3939, "step": 7475 }, { "epoch": 0.16672827340762053, "grad_norm": 0.50336754322052, "learning_rate": 1.865926182727173e-05, "loss": 0.3563, "step": 7480 }, { "epoch": 0.1668397227882406, "grad_norm": 0.538811445236206, "learning_rate": 1.865751003590763e-05, "loss": 0.4235, "step": 7485 }, { "epoch": 0.16695117216886068, "grad_norm": 0.5374696850776672, "learning_rate": 1.8655757183199624e-05, "loss": 0.2565, "step": 7490 }, { "epoch": 0.16706262154948073, "grad_norm": 0.5080285668373108, "learning_rate": 1.8654003269362602e-05, "loss": 0.3744, "step": 7495 }, { "epoch": 0.1671740709301008, "grad_norm": 0.9205856919288635, "learning_rate": 1.8652248294611576e-05, "loss": 0.3381, "step": 7500 }, { "epoch": 0.1672855203107209, "grad_norm": 0.5005443096160889, "learning_rate": 1.8650492259161696e-05, "loss": 0.4025, "step": 7505 }, { "epoch": 0.16739696969134094, "grad_norm": 0.5921610593795776, "learning_rate": 1.8648735163228235e-05, "loss": 0.2706, "step": 7510 }, { "epoch": 0.167508419071961, "grad_norm": 0.551630973815918, "learning_rate": 1.8646977007026602e-05, "loss": 0.3583, "step": 7515 }, { "epoch": 0.16761986845258106, "grad_norm": 0.6462824940681458, "learning_rate": 1.8645217790772333e-05, "loss": 0.3964, "step": 7520 }, { "epoch": 0.16773131783320114, "grad_norm": 0.4856247305870056, "learning_rate": 1.8643457514681093e-05, "loss": 0.4055, "step": 7525 }, { "epoch": 0.16784276721382121, "grad_norm": 0.6250535845756531, "learning_rate": 1.864169617896868e-05, "loss": 0.4841, "step": 7530 }, { "epoch": 0.16795421659444126, "grad_norm": 0.40336018800735474, "learning_rate": 1.863993378385102e-05, "loss": 0.3998, "step": 7535 }, { "epoch": 0.16806566597506134, "grad_norm": 0.5048686265945435, "learning_rate": 1.8638170329544164e-05, "loss": 0.4064, "step": 7540 }, { "epoch": 0.16817711535568142, "grad_norm": 0.5588499307632446, "learning_rate": 1.8636405816264303e-05, "loss": 0.3946, "step": 7545 }, { "epoch": 0.16828856473630147, "grad_norm": 0.6174390316009521, "learning_rate": 1.8634640244227756e-05, "loss": 0.4433, "step": 7550 }, { "epoch": 0.16840001411692154, "grad_norm": 0.5336619019508362, "learning_rate": 1.863287361365096e-05, "loss": 0.3856, "step": 7555 }, { "epoch": 0.16851146349754162, "grad_norm": 1.3543431758880615, "learning_rate": 1.8631105924750496e-05, "loss": 0.3679, "step": 7560 }, { "epoch": 0.16862291287816167, "grad_norm": 0.7976319193840027, "learning_rate": 1.8629337177743067e-05, "loss": 0.2943, "step": 7565 }, { "epoch": 0.16873436225878174, "grad_norm": 0.5987356305122375, "learning_rate": 1.862756737284551e-05, "loss": 0.3568, "step": 7570 }, { "epoch": 0.16884581163940182, "grad_norm": 0.4379819631576538, "learning_rate": 1.8625796510274785e-05, "loss": 0.352, "step": 7575 }, { "epoch": 0.16895726102002187, "grad_norm": 1.3696235418319702, "learning_rate": 1.862402459024799e-05, "loss": 0.4194, "step": 7580 }, { "epoch": 0.16906871040064195, "grad_norm": 0.6878867745399475, "learning_rate": 1.8622251612982347e-05, "loss": 0.3598, "step": 7585 }, { "epoch": 0.16918015978126202, "grad_norm": 0.5670821070671082, "learning_rate": 1.862047757869521e-05, "loss": 0.4934, "step": 7590 }, { "epoch": 0.16929160916188207, "grad_norm": 0.5964428186416626, "learning_rate": 1.8618702487604064e-05, "loss": 0.464, "step": 7595 }, { "epoch": 0.16940305854250215, "grad_norm": 0.8312402963638306, "learning_rate": 1.8616926339926515e-05, "loss": 0.5313, "step": 7600 }, { "epoch": 0.16951450792312223, "grad_norm": 0.4638909697532654, "learning_rate": 1.8615149135880312e-05, "loss": 0.351, "step": 7605 }, { "epoch": 0.16962595730374228, "grad_norm": 0.7792471647262573, "learning_rate": 1.8613370875683327e-05, "loss": 0.4167, "step": 7610 }, { "epoch": 0.16973740668436235, "grad_norm": 0.5532524585723877, "learning_rate": 1.861159155955355e-05, "loss": 0.402, "step": 7615 }, { "epoch": 0.16984885606498243, "grad_norm": 0.567893922328949, "learning_rate": 1.8609811187709124e-05, "loss": 0.4829, "step": 7620 }, { "epoch": 0.16996030544560248, "grad_norm": 0.6022348999977112, "learning_rate": 1.8608029760368302e-05, "loss": 0.3435, "step": 7625 }, { "epoch": 0.17007175482622255, "grad_norm": 0.803606390953064, "learning_rate": 1.8606247277749476e-05, "loss": 0.3426, "step": 7630 }, { "epoch": 0.17018320420684263, "grad_norm": 0.49966391921043396, "learning_rate": 1.8604463740071165e-05, "loss": 0.4837, "step": 7635 }, { "epoch": 0.17029465358746268, "grad_norm": 0.4341173470020294, "learning_rate": 1.8602679147552014e-05, "loss": 0.4424, "step": 7640 }, { "epoch": 0.17040610296808276, "grad_norm": 0.5094113945960999, "learning_rate": 1.8600893500410803e-05, "loss": 0.3534, "step": 7645 }, { "epoch": 0.17051755234870283, "grad_norm": 0.6519873738288879, "learning_rate": 1.8599106798866438e-05, "loss": 0.3653, "step": 7650 }, { "epoch": 0.17062900172932288, "grad_norm": 0.5553015470504761, "learning_rate": 1.8597319043137952e-05, "loss": 0.2955, "step": 7655 }, { "epoch": 0.17074045110994296, "grad_norm": 0.5292229652404785, "learning_rate": 1.8595530233444514e-05, "loss": 0.3143, "step": 7660 }, { "epoch": 0.17085190049056304, "grad_norm": 0.5739569664001465, "learning_rate": 1.8593740370005415e-05, "loss": 0.3195, "step": 7665 }, { "epoch": 0.17096334987118308, "grad_norm": 0.502048671245575, "learning_rate": 1.8591949453040083e-05, "loss": 0.3717, "step": 7670 }, { "epoch": 0.17107479925180316, "grad_norm": 0.5392212867736816, "learning_rate": 1.8590157482768064e-05, "loss": 0.3698, "step": 7675 }, { "epoch": 0.17118624863242324, "grad_norm": 0.4482336938381195, "learning_rate": 1.858836445940905e-05, "loss": 0.4866, "step": 7680 }, { "epoch": 0.1712976980130433, "grad_norm": 0.5693212151527405, "learning_rate": 1.858657038318284e-05, "loss": 0.3281, "step": 7685 }, { "epoch": 0.17140914739366336, "grad_norm": 0.49769172072410583, "learning_rate": 1.8584775254309378e-05, "loss": 0.3215, "step": 7690 }, { "epoch": 0.17152059677428344, "grad_norm": 0.5934991240501404, "learning_rate": 1.858297907300874e-05, "loss": 0.3258, "step": 7695 }, { "epoch": 0.1716320461549035, "grad_norm": 0.5538322329521179, "learning_rate": 1.858118183950111e-05, "loss": 0.4904, "step": 7700 }, { "epoch": 0.17174349553552357, "grad_norm": 0.6971874833106995, "learning_rate": 1.8579383554006833e-05, "loss": 0.5043, "step": 7705 }, { "epoch": 0.17185494491614361, "grad_norm": 0.6677750945091248, "learning_rate": 1.8577584216746345e-05, "loss": 0.4446, "step": 7710 }, { "epoch": 0.1719663942967637, "grad_norm": 0.5418416857719421, "learning_rate": 1.8575783827940245e-05, "loss": 0.4064, "step": 7715 }, { "epoch": 0.17207784367738377, "grad_norm": 0.7056881189346313, "learning_rate": 1.8573982387809244e-05, "loss": 0.4147, "step": 7720 }, { "epoch": 0.17218929305800382, "grad_norm": 0.6976264715194702, "learning_rate": 1.8572179896574184e-05, "loss": 0.4984, "step": 7725 }, { "epoch": 0.1723007424386239, "grad_norm": 0.5107178688049316, "learning_rate": 1.8570376354456033e-05, "loss": 0.3481, "step": 7730 }, { "epoch": 0.17241219181924397, "grad_norm": 0.9620299935340881, "learning_rate": 1.8568571761675893e-05, "loss": 0.3581, "step": 7735 }, { "epoch": 0.17252364119986402, "grad_norm": 0.4947699010372162, "learning_rate": 1.8566766118454996e-05, "loss": 0.4885, "step": 7740 }, { "epoch": 0.1726350905804841, "grad_norm": 0.4762234389781952, "learning_rate": 1.85649594250147e-05, "loss": 0.4067, "step": 7745 }, { "epoch": 0.17274653996110417, "grad_norm": 0.9069308042526245, "learning_rate": 1.8563151681576487e-05, "loss": 0.2346, "step": 7750 }, { "epoch": 0.17285798934172422, "grad_norm": 0.5567780137062073, "learning_rate": 1.8561342888361978e-05, "loss": 0.3078, "step": 7755 }, { "epoch": 0.1729694387223443, "grad_norm": 0.641209602355957, "learning_rate": 1.855953304559291e-05, "loss": 0.4647, "step": 7760 }, { "epoch": 0.17308088810296438, "grad_norm": 0.5287478566169739, "learning_rate": 1.8557722153491166e-05, "loss": 0.3828, "step": 7765 }, { "epoch": 0.17319233748358442, "grad_norm": 0.5798740386962891, "learning_rate": 1.855591021227874e-05, "loss": 0.3984, "step": 7770 }, { "epoch": 0.1733037868642045, "grad_norm": 0.6225942373275757, "learning_rate": 1.855409722217776e-05, "loss": 0.3252, "step": 7775 }, { "epoch": 0.17341523624482458, "grad_norm": 0.6901559829711914, "learning_rate": 1.855228318341049e-05, "loss": 0.3742, "step": 7780 }, { "epoch": 0.17352668562544463, "grad_norm": 0.5212215185165405, "learning_rate": 1.8550468096199314e-05, "loss": 0.2777, "step": 7785 }, { "epoch": 0.1736381350060647, "grad_norm": 0.8276028037071228, "learning_rate": 1.854865196076675e-05, "loss": 0.4952, "step": 7790 }, { "epoch": 0.17374958438668478, "grad_norm": 0.48634928464889526, "learning_rate": 1.854683477733544e-05, "loss": 0.3508, "step": 7795 }, { "epoch": 0.17386103376730483, "grad_norm": 0.4943958520889282, "learning_rate": 1.8545016546128162e-05, "loss": 0.4288, "step": 7800 }, { "epoch": 0.1739724831479249, "grad_norm": 0.5155841708183289, "learning_rate": 1.8543197267367807e-05, "loss": 0.459, "step": 7805 }, { "epoch": 0.17408393252854498, "grad_norm": 0.3615794777870178, "learning_rate": 1.8541376941277414e-05, "loss": 0.4031, "step": 7810 }, { "epoch": 0.17419538190916503, "grad_norm": 0.6325530409812927, "learning_rate": 1.8539555568080134e-05, "loss": 0.4214, "step": 7815 }, { "epoch": 0.1743068312897851, "grad_norm": 0.513111412525177, "learning_rate": 1.8537733147999262e-05, "loss": 0.3848, "step": 7820 }, { "epoch": 0.17441828067040518, "grad_norm": 0.5642367005348206, "learning_rate": 1.8535909681258202e-05, "loss": 0.3574, "step": 7825 }, { "epoch": 0.17452973005102523, "grad_norm": 0.7676248550415039, "learning_rate": 1.8534085168080503e-05, "loss": 0.3844, "step": 7830 }, { "epoch": 0.1746411794316453, "grad_norm": 0.5915855765342712, "learning_rate": 1.853225960868984e-05, "loss": 0.3365, "step": 7835 }, { "epoch": 0.1747526288122654, "grad_norm": 0.5565099120140076, "learning_rate": 1.8530433003310003e-05, "loss": 0.4917, "step": 7840 }, { "epoch": 0.17486407819288544, "grad_norm": 0.6923288702964783, "learning_rate": 1.8528605352164926e-05, "loss": 0.3355, "step": 7845 }, { "epoch": 0.1749755275735055, "grad_norm": 0.4434451460838318, "learning_rate": 1.8526776655478663e-05, "loss": 0.4119, "step": 7850 }, { "epoch": 0.1750869769541256, "grad_norm": 0.587926983833313, "learning_rate": 1.85249469134754e-05, "loss": 0.3956, "step": 7855 }, { "epoch": 0.17519842633474564, "grad_norm": 0.5707252025604248, "learning_rate": 1.852311612637945e-05, "loss": 0.45, "step": 7860 }, { "epoch": 0.17530987571536572, "grad_norm": 0.6604421138763428, "learning_rate": 1.8521284294415247e-05, "loss": 0.3159, "step": 7865 }, { "epoch": 0.1754213250959858, "grad_norm": 0.6285608410835266, "learning_rate": 1.8519451417807364e-05, "loss": 0.4533, "step": 7870 }, { "epoch": 0.17553277447660584, "grad_norm": 0.45332780480384827, "learning_rate": 1.8517617496780497e-05, "loss": 0.403, "step": 7875 }, { "epoch": 0.17564422385722592, "grad_norm": 0.5412122011184692, "learning_rate": 1.8515782531559474e-05, "loss": 0.3701, "step": 7880 }, { "epoch": 0.175755673237846, "grad_norm": 0.7073056101799011, "learning_rate": 1.8513946522369242e-05, "loss": 0.3685, "step": 7885 }, { "epoch": 0.17586712261846604, "grad_norm": 0.5671860575675964, "learning_rate": 1.851210946943489e-05, "loss": 0.4571, "step": 7890 }, { "epoch": 0.17597857199908612, "grad_norm": 0.6075953245162964, "learning_rate": 1.8510271372981612e-05, "loss": 0.4485, "step": 7895 }, { "epoch": 0.17609002137970617, "grad_norm": 0.6864980459213257, "learning_rate": 1.8508432233234755e-05, "loss": 0.3992, "step": 7900 }, { "epoch": 0.17620147076032625, "grad_norm": 0.5237451195716858, "learning_rate": 1.8506592050419783e-05, "loss": 0.4678, "step": 7905 }, { "epoch": 0.17631292014094632, "grad_norm": 0.6632329821586609, "learning_rate": 1.8504750824762285e-05, "loss": 0.32, "step": 7910 }, { "epoch": 0.17642436952156637, "grad_norm": 0.6396775245666504, "learning_rate": 1.8502908556487985e-05, "loss": 0.3531, "step": 7915 }, { "epoch": 0.17653581890218645, "grad_norm": 0.5833070874214172, "learning_rate": 1.8501065245822726e-05, "loss": 0.2939, "step": 7920 }, { "epoch": 0.17664726828280652, "grad_norm": 0.6542580723762512, "learning_rate": 1.8499220892992483e-05, "loss": 0.4936, "step": 7925 }, { "epoch": 0.17675871766342657, "grad_norm": 0.4529288411140442, "learning_rate": 1.849737549822337e-05, "loss": 0.3176, "step": 7930 }, { "epoch": 0.17687016704404665, "grad_norm": 0.6152017712593079, "learning_rate": 1.8495529061741602e-05, "loss": 0.4047, "step": 7935 }, { "epoch": 0.17698161642466673, "grad_norm": 0.4641212522983551, "learning_rate": 1.8493681583773556e-05, "loss": 0.2954, "step": 7940 }, { "epoch": 0.17709306580528678, "grad_norm": 0.4853191375732422, "learning_rate": 1.8491833064545705e-05, "loss": 0.4545, "step": 7945 }, { "epoch": 0.17720451518590685, "grad_norm": 0.544423520565033, "learning_rate": 1.8489983504284664e-05, "loss": 0.4811, "step": 7950 }, { "epoch": 0.17731596456652693, "grad_norm": 0.32580846548080444, "learning_rate": 1.848813290321718e-05, "loss": 0.4182, "step": 7955 }, { "epoch": 0.17742741394714698, "grad_norm": 0.5929962396621704, "learning_rate": 1.848628126157012e-05, "loss": 0.517, "step": 7960 }, { "epoch": 0.17753886332776705, "grad_norm": 0.6212632656097412, "learning_rate": 1.8484428579570482e-05, "loss": 0.2642, "step": 7965 }, { "epoch": 0.17765031270838713, "grad_norm": 0.373495489358902, "learning_rate": 1.848257485744539e-05, "loss": 0.4516, "step": 7970 }, { "epoch": 0.17776176208900718, "grad_norm": 0.693138599395752, "learning_rate": 1.8480720095422096e-05, "loss": 0.3265, "step": 7975 }, { "epoch": 0.17787321146962726, "grad_norm": 0.4824640154838562, "learning_rate": 1.847886429372798e-05, "loss": 0.2924, "step": 7980 }, { "epoch": 0.17798466085024733, "grad_norm": 0.562910795211792, "learning_rate": 1.8477007452590546e-05, "loss": 0.4182, "step": 7985 }, { "epoch": 0.17809611023086738, "grad_norm": 0.5726629495620728, "learning_rate": 1.8475149572237434e-05, "loss": 0.3874, "step": 7990 }, { "epoch": 0.17820755961148746, "grad_norm": 0.5748285055160522, "learning_rate": 1.8473290652896398e-05, "loss": 0.4022, "step": 7995 }, { "epoch": 0.17831900899210754, "grad_norm": 0.5068720579147339, "learning_rate": 1.8471430694795336e-05, "loss": 0.3074, "step": 8000 }, { "epoch": 0.17843045837272759, "grad_norm": 0.5286265015602112, "learning_rate": 1.846956969816226e-05, "loss": 0.3784, "step": 8005 }, { "epoch": 0.17854190775334766, "grad_norm": 0.6678787469863892, "learning_rate": 1.8467707663225312e-05, "loss": 0.3808, "step": 8010 }, { "epoch": 0.17865335713396774, "grad_norm": 0.49153807759284973, "learning_rate": 1.8465844590212767e-05, "loss": 0.4211, "step": 8015 }, { "epoch": 0.1787648065145878, "grad_norm": 0.4546610713005066, "learning_rate": 1.8463980479353018e-05, "loss": 0.3546, "step": 8020 }, { "epoch": 0.17887625589520786, "grad_norm": 0.8939555883407593, "learning_rate": 1.8462115330874598e-05, "loss": 0.3224, "step": 8025 }, { "epoch": 0.17898770527582794, "grad_norm": 0.5169472098350525, "learning_rate": 1.8460249145006156e-05, "loss": 0.3166, "step": 8030 }, { "epoch": 0.179099154656448, "grad_norm": 0.5126698613166809, "learning_rate": 1.8458381921976468e-05, "loss": 0.3324, "step": 8035 }, { "epoch": 0.17921060403706807, "grad_norm": 0.5317251086235046, "learning_rate": 1.845651366201445e-05, "loss": 0.3892, "step": 8040 }, { "epoch": 0.17932205341768814, "grad_norm": 0.6299559473991394, "learning_rate": 1.8454644365349127e-05, "loss": 0.364, "step": 8045 }, { "epoch": 0.1794335027983082, "grad_norm": 0.6288081407546997, "learning_rate": 1.8452774032209667e-05, "loss": 0.4554, "step": 8050 }, { "epoch": 0.17954495217892827, "grad_norm": 0.7996246814727783, "learning_rate": 1.8450902662825357e-05, "loss": 0.2982, "step": 8055 }, { "epoch": 0.17965640155954835, "grad_norm": 0.7523775696754456, "learning_rate": 1.844903025742561e-05, "loss": 0.3866, "step": 8060 }, { "epoch": 0.1797678509401684, "grad_norm": 0.5479750633239746, "learning_rate": 1.8447156816239967e-05, "loss": 0.3827, "step": 8065 }, { "epoch": 0.17987930032078847, "grad_norm": 0.6241227388381958, "learning_rate": 1.8445282339498105e-05, "loss": 0.3712, "step": 8070 }, { "epoch": 0.17999074970140855, "grad_norm": 0.4241069555282593, "learning_rate": 1.8443406827429816e-05, "loss": 0.4409, "step": 8075 }, { "epoch": 0.1801021990820286, "grad_norm": 0.527955174446106, "learning_rate": 1.844153028026502e-05, "loss": 0.4218, "step": 8080 }, { "epoch": 0.18021364846264867, "grad_norm": 0.4392964541912079, "learning_rate": 1.8439652698233773e-05, "loss": 0.2986, "step": 8085 }, { "epoch": 0.18032509784326872, "grad_norm": 0.6911497712135315, "learning_rate": 1.8437774081566248e-05, "loss": 0.3723, "step": 8090 }, { "epoch": 0.1804365472238888, "grad_norm": 0.3347485363483429, "learning_rate": 1.843589443049275e-05, "loss": 0.3233, "step": 8095 }, { "epoch": 0.18054799660450888, "grad_norm": 0.6009610891342163, "learning_rate": 1.843401374524371e-05, "loss": 0.4063, "step": 8100 }, { "epoch": 0.18065944598512892, "grad_norm": 0.5331124067306519, "learning_rate": 1.8432132026049685e-05, "loss": 0.5214, "step": 8105 }, { "epoch": 0.180770895365749, "grad_norm": 0.9455961585044861, "learning_rate": 1.843024927314136e-05, "loss": 0.3784, "step": 8110 }, { "epoch": 0.18088234474636908, "grad_norm": 0.5959413051605225, "learning_rate": 1.8428365486749545e-05, "loss": 0.3704, "step": 8115 }, { "epoch": 0.18099379412698913, "grad_norm": 0.7602490186691284, "learning_rate": 1.8426480667105178e-05, "loss": 0.4113, "step": 8120 }, { "epoch": 0.1811052435076092, "grad_norm": 0.76889568567276, "learning_rate": 1.8424594814439326e-05, "loss": 0.5736, "step": 8125 }, { "epoch": 0.18121669288822928, "grad_norm": 0.873080849647522, "learning_rate": 1.8422707928983173e-05, "loss": 0.3585, "step": 8130 }, { "epoch": 0.18132814226884933, "grad_norm": 0.3947627544403076, "learning_rate": 1.8420820010968046e-05, "loss": 0.4465, "step": 8135 }, { "epoch": 0.1814395916494694, "grad_norm": 0.5145520567893982, "learning_rate": 1.8418931060625386e-05, "loss": 0.3982, "step": 8140 }, { "epoch": 0.18155104103008948, "grad_norm": 0.5094866156578064, "learning_rate": 1.8417041078186757e-05, "loss": 0.2775, "step": 8145 }, { "epoch": 0.18166249041070953, "grad_norm": 0.5887768864631653, "learning_rate": 1.841515006388386e-05, "loss": 0.4656, "step": 8150 }, { "epoch": 0.1817739397913296, "grad_norm": 0.6336820721626282, "learning_rate": 1.8413258017948527e-05, "loss": 0.2485, "step": 8155 }, { "epoch": 0.18188538917194969, "grad_norm": 0.8006008267402649, "learning_rate": 1.84113649406127e-05, "loss": 0.3981, "step": 8160 }, { "epoch": 0.18199683855256973, "grad_norm": 0.5995430946350098, "learning_rate": 1.8409470832108452e-05, "loss": 0.3706, "step": 8165 }, { "epoch": 0.1821082879331898, "grad_norm": 0.5357914566993713, "learning_rate": 1.8407575692667997e-05, "loss": 0.4349, "step": 8170 }, { "epoch": 0.1822197373138099, "grad_norm": 0.4280491769313812, "learning_rate": 1.8405679522523656e-05, "loss": 0.5198, "step": 8175 }, { "epoch": 0.18233118669442994, "grad_norm": 0.5123806595802307, "learning_rate": 1.8403782321907888e-05, "loss": 0.2347, "step": 8180 }, { "epoch": 0.18244263607505, "grad_norm": 0.6792116761207581, "learning_rate": 1.840188409105327e-05, "loss": 0.3305, "step": 8185 }, { "epoch": 0.1825540854556701, "grad_norm": 0.5935506224632263, "learning_rate": 1.8399984830192522e-05, "loss": 0.4718, "step": 8190 }, { "epoch": 0.18266553483629014, "grad_norm": 0.4705546200275421, "learning_rate": 1.839808453955847e-05, "loss": 0.4294, "step": 8195 }, { "epoch": 0.18277698421691022, "grad_norm": 0.44560903310775757, "learning_rate": 1.8396183219384073e-05, "loss": 0.3661, "step": 8200 }, { "epoch": 0.1828884335975303, "grad_norm": 0.596440315246582, "learning_rate": 1.8394280869902423e-05, "loss": 0.4454, "step": 8205 }, { "epoch": 0.18299988297815034, "grad_norm": 0.6796529293060303, "learning_rate": 1.8392377491346734e-05, "loss": 0.3922, "step": 8210 }, { "epoch": 0.18311133235877042, "grad_norm": 0.5746434926986694, "learning_rate": 1.8390473083950346e-05, "loss": 0.3445, "step": 8215 }, { "epoch": 0.1832227817393905, "grad_norm": 0.689755916595459, "learning_rate": 1.8388567647946718e-05, "loss": 0.4528, "step": 8220 }, { "epoch": 0.18333423112001054, "grad_norm": 0.5509458780288696, "learning_rate": 1.8386661183569446e-05, "loss": 0.4711, "step": 8225 }, { "epoch": 0.18344568050063062, "grad_norm": 0.5880481600761414, "learning_rate": 1.8384753691052252e-05, "loss": 0.3628, "step": 8230 }, { "epoch": 0.1835571298812507, "grad_norm": 0.4933379888534546, "learning_rate": 1.8382845170628973e-05, "loss": 0.3164, "step": 8235 }, { "epoch": 0.18366857926187075, "grad_norm": 0.5734802484512329, "learning_rate": 1.838093562253358e-05, "loss": 0.4686, "step": 8240 }, { "epoch": 0.18378002864249082, "grad_norm": 0.6775012612342834, "learning_rate": 1.8379025047000177e-05, "loss": 0.3105, "step": 8245 }, { "epoch": 0.1838914780231109, "grad_norm": 0.7611976861953735, "learning_rate": 1.8377113444262973e-05, "loss": 0.3797, "step": 8250 }, { "epoch": 0.18400292740373095, "grad_norm": 0.5081771016120911, "learning_rate": 1.8375200814556325e-05, "loss": 0.2762, "step": 8255 }, { "epoch": 0.18411437678435102, "grad_norm": 0.41866931319236755, "learning_rate": 1.8373287158114702e-05, "loss": 0.2894, "step": 8260 }, { "epoch": 0.1842258261649711, "grad_norm": 0.42981696128845215, "learning_rate": 1.8371372475172705e-05, "loss": 0.387, "step": 8265 }, { "epoch": 0.18433727554559115, "grad_norm": 0.4748207628726959, "learning_rate": 1.8369456765965064e-05, "loss": 0.2789, "step": 8270 }, { "epoch": 0.18444872492621123, "grad_norm": 0.7153117060661316, "learning_rate": 1.8367540030726624e-05, "loss": 0.4423, "step": 8275 }, { "epoch": 0.18456017430683128, "grad_norm": 0.5909894108772278, "learning_rate": 1.8365622269692362e-05, "loss": 0.3757, "step": 8280 }, { "epoch": 0.18467162368745135, "grad_norm": 0.4618348479270935, "learning_rate": 1.836370348309738e-05, "loss": 0.4254, "step": 8285 }, { "epoch": 0.18478307306807143, "grad_norm": 0.4978313148021698, "learning_rate": 1.8361783671176915e-05, "loss": 0.3347, "step": 8290 }, { "epoch": 0.18489452244869148, "grad_norm": 0.5355519652366638, "learning_rate": 1.835986283416631e-05, "loss": 0.2233, "step": 8295 }, { "epoch": 0.18500597182931156, "grad_norm": 0.5492738485336304, "learning_rate": 1.8357940972301055e-05, "loss": 0.3731, "step": 8300 }, { "epoch": 0.18511742120993163, "grad_norm": 0.5359588265419006, "learning_rate": 1.8356018085816744e-05, "loss": 0.318, "step": 8305 }, { "epoch": 0.18522887059055168, "grad_norm": 0.6668272614479065, "learning_rate": 1.8354094174949117e-05, "loss": 0.3873, "step": 8310 }, { "epoch": 0.18534031997117176, "grad_norm": 0.592851459980011, "learning_rate": 1.8352169239934028e-05, "loss": 0.3334, "step": 8315 }, { "epoch": 0.18545176935179183, "grad_norm": 0.627230167388916, "learning_rate": 1.835024328100746e-05, "loss": 0.4218, "step": 8320 }, { "epoch": 0.18556321873241188, "grad_norm": 0.40039190649986267, "learning_rate": 1.834831629840552e-05, "loss": 0.3812, "step": 8325 }, { "epoch": 0.18567466811303196, "grad_norm": 0.34311795234680176, "learning_rate": 1.8346388292364438e-05, "loss": 0.3433, "step": 8330 }, { "epoch": 0.18578611749365204, "grad_norm": 0.6989755630493164, "learning_rate": 1.8344459263120575e-05, "loss": 0.3556, "step": 8335 }, { "epoch": 0.18589756687427209, "grad_norm": 0.6018476486206055, "learning_rate": 1.834252921091042e-05, "loss": 0.4649, "step": 8340 }, { "epoch": 0.18600901625489216, "grad_norm": 0.5775113105773926, "learning_rate": 1.8340598135970577e-05, "loss": 0.3353, "step": 8345 }, { "epoch": 0.18612046563551224, "grad_norm": 0.48347654938697815, "learning_rate": 1.833866603853778e-05, "loss": 0.4051, "step": 8350 }, { "epoch": 0.1862319150161323, "grad_norm": 1.136330008506775, "learning_rate": 1.8336732918848894e-05, "loss": 0.391, "step": 8355 }, { "epoch": 0.18634336439675236, "grad_norm": 0.4948228597640991, "learning_rate": 1.8334798777140902e-05, "loss": 0.2786, "step": 8360 }, { "epoch": 0.18645481377737244, "grad_norm": 0.5982722640037537, "learning_rate": 1.8332863613650912e-05, "loss": 0.416, "step": 8365 }, { "epoch": 0.1865662631579925, "grad_norm": 0.5050781965255737, "learning_rate": 1.8330927428616162e-05, "loss": 0.2546, "step": 8370 }, { "epoch": 0.18667771253861257, "grad_norm": 0.4478650689125061, "learning_rate": 1.8328990222274018e-05, "loss": 0.2382, "step": 8375 }, { "epoch": 0.18678916191923264, "grad_norm": 0.6107395887374878, "learning_rate": 1.832705199486196e-05, "loss": 0.4187, "step": 8380 }, { "epoch": 0.1869006112998527, "grad_norm": 0.5083065032958984, "learning_rate": 1.8325112746617603e-05, "loss": 0.3965, "step": 8385 }, { "epoch": 0.18701206068047277, "grad_norm": 0.5986996293067932, "learning_rate": 1.8323172477778683e-05, "loss": 0.3662, "step": 8390 }, { "epoch": 0.18712351006109285, "grad_norm": 0.5546152591705322, "learning_rate": 1.832123118858306e-05, "loss": 0.4417, "step": 8395 }, { "epoch": 0.1872349594417129, "grad_norm": 0.5741242170333862, "learning_rate": 1.8319288879268727e-05, "loss": 0.4439, "step": 8400 }, { "epoch": 0.18734640882233297, "grad_norm": 0.5699114799499512, "learning_rate": 1.8317345550073792e-05, "loss": 0.4508, "step": 8405 }, { "epoch": 0.18745785820295305, "grad_norm": 0.6300404071807861, "learning_rate": 1.8315401201236492e-05, "loss": 0.347, "step": 8410 }, { "epoch": 0.1875693075835731, "grad_norm": 0.5789498090744019, "learning_rate": 1.831345583299519e-05, "loss": 0.408, "step": 8415 }, { "epoch": 0.18768075696419317, "grad_norm": 0.6178359985351562, "learning_rate": 1.831150944558837e-05, "loss": 0.4726, "step": 8420 }, { "epoch": 0.18779220634481325, "grad_norm": 0.4740115702152252, "learning_rate": 1.8309562039254652e-05, "loss": 0.3297, "step": 8425 }, { "epoch": 0.1879036557254333, "grad_norm": 0.39913448691368103, "learning_rate": 1.8307613614232765e-05, "loss": 0.3211, "step": 8430 }, { "epoch": 0.18801510510605338, "grad_norm": 0.4314505159854889, "learning_rate": 1.8305664170761576e-05, "loss": 0.3395, "step": 8435 }, { "epoch": 0.18812655448667345, "grad_norm": 0.5156731605529785, "learning_rate": 1.8303713709080067e-05, "loss": 0.344, "step": 8440 }, { "epoch": 0.1882380038672935, "grad_norm": 0.3862886130809784, "learning_rate": 1.8301762229427352e-05, "loss": 0.4049, "step": 8445 }, { "epoch": 0.18834945324791358, "grad_norm": 0.5977081060409546, "learning_rate": 1.829980973204267e-05, "loss": 0.3067, "step": 8450 }, { "epoch": 0.18846090262853366, "grad_norm": 0.5924422740936279, "learning_rate": 1.8297856217165376e-05, "loss": 0.4148, "step": 8455 }, { "epoch": 0.1885723520091537, "grad_norm": 0.6521643400192261, "learning_rate": 1.8295901685034964e-05, "loss": 0.4086, "step": 8460 }, { "epoch": 0.18868380138977378, "grad_norm": 0.6108610033988953, "learning_rate": 1.8293946135891038e-05, "loss": 0.4278, "step": 8465 }, { "epoch": 0.18879525077039383, "grad_norm": 0.4995235502719879, "learning_rate": 1.8291989569973332e-05, "loss": 0.3273, "step": 8470 }, { "epoch": 0.1889067001510139, "grad_norm": 0.6318859457969666, "learning_rate": 1.8290031987521714e-05, "loss": 0.4842, "step": 8475 }, { "epoch": 0.18901814953163398, "grad_norm": 0.5363388657569885, "learning_rate": 1.828807338877616e-05, "loss": 0.4514, "step": 8480 }, { "epoch": 0.18912959891225403, "grad_norm": 0.5494492650032043, "learning_rate": 1.8286113773976782e-05, "loss": 0.3342, "step": 8485 }, { "epoch": 0.1892410482928741, "grad_norm": 0.5755802989006042, "learning_rate": 1.8284153143363818e-05, "loss": 0.3305, "step": 8490 }, { "epoch": 0.18935249767349419, "grad_norm": 0.632031261920929, "learning_rate": 1.828219149717762e-05, "loss": 0.3841, "step": 8495 }, { "epoch": 0.18946394705411423, "grad_norm": 0.6508500576019287, "learning_rate": 1.8280228835658675e-05, "loss": 0.3259, "step": 8500 }, { "epoch": 0.1895753964347343, "grad_norm": 0.5723993182182312, "learning_rate": 1.8278265159047585e-05, "loss": 0.3421, "step": 8505 }, { "epoch": 0.1896868458153544, "grad_norm": 0.6716227531433105, "learning_rate": 1.8276300467585087e-05, "loss": 0.3498, "step": 8510 }, { "epoch": 0.18979829519597444, "grad_norm": 0.5189501047134399, "learning_rate": 1.8274334761512037e-05, "loss": 0.3458, "step": 8515 }, { "epoch": 0.1899097445765945, "grad_norm": 0.6275649070739746, "learning_rate": 1.827236804106941e-05, "loss": 0.3402, "step": 8520 }, { "epoch": 0.1900211939572146, "grad_norm": 0.6184311509132385, "learning_rate": 1.8270400306498313e-05, "loss": 0.3706, "step": 8525 }, { "epoch": 0.19013264333783464, "grad_norm": 0.5152689814567566, "learning_rate": 1.826843155803998e-05, "loss": 0.3156, "step": 8530 }, { "epoch": 0.19024409271845472, "grad_norm": 0.3839017450809479, "learning_rate": 1.8266461795935758e-05, "loss": 0.2933, "step": 8535 }, { "epoch": 0.1903555420990748, "grad_norm": 0.48906683921813965, "learning_rate": 1.8264491020427128e-05, "loss": 0.4327, "step": 8540 }, { "epoch": 0.19046699147969484, "grad_norm": 0.6690248847007751, "learning_rate": 1.8262519231755694e-05, "loss": 0.4718, "step": 8545 }, { "epoch": 0.19057844086031492, "grad_norm": 0.49581602215766907, "learning_rate": 1.8260546430163173e-05, "loss": 0.3557, "step": 8550 }, { "epoch": 0.190689890240935, "grad_norm": 0.8193932771682739, "learning_rate": 1.8258572615891427e-05, "loss": 0.3851, "step": 8555 }, { "epoch": 0.19080133962155504, "grad_norm": 0.7166968584060669, "learning_rate": 1.825659778918242e-05, "loss": 0.5081, "step": 8560 }, { "epoch": 0.19091278900217512, "grad_norm": 0.7662790417671204, "learning_rate": 1.8254621950278258e-05, "loss": 0.4056, "step": 8565 }, { "epoch": 0.1910242383827952, "grad_norm": 0.6887685656547546, "learning_rate": 1.825264509942116e-05, "loss": 0.3094, "step": 8570 }, { "epoch": 0.19113568776341525, "grad_norm": 0.6675335168838501, "learning_rate": 1.8250667236853473e-05, "loss": 0.4079, "step": 8575 }, { "epoch": 0.19124713714403532, "grad_norm": 0.6034323573112488, "learning_rate": 1.824868836281767e-05, "loss": 0.487, "step": 8580 }, { "epoch": 0.1913585865246554, "grad_norm": 0.4529895782470703, "learning_rate": 1.824670847755634e-05, "loss": 0.3206, "step": 8585 }, { "epoch": 0.19147003590527545, "grad_norm": 0.43103736639022827, "learning_rate": 1.824472758131221e-05, "loss": 0.3437, "step": 8590 }, { "epoch": 0.19158148528589553, "grad_norm": 0.7910636067390442, "learning_rate": 1.8242745674328114e-05, "loss": 0.3933, "step": 8595 }, { "epoch": 0.1916929346665156, "grad_norm": 0.552842915058136, "learning_rate": 1.8240762756847024e-05, "loss": 0.3755, "step": 8600 }, { "epoch": 0.19180438404713565, "grad_norm": 0.5862413644790649, "learning_rate": 1.823877882911203e-05, "loss": 0.3324, "step": 8605 }, { "epoch": 0.19191583342775573, "grad_norm": 0.42190515995025635, "learning_rate": 1.8236793891366346e-05, "loss": 0.3371, "step": 8610 }, { "epoch": 0.1920272828083758, "grad_norm": 0.8216304779052734, "learning_rate": 1.823480794385331e-05, "loss": 0.396, "step": 8615 }, { "epoch": 0.19213873218899585, "grad_norm": 0.4263015389442444, "learning_rate": 1.8232820986816376e-05, "loss": 0.3182, "step": 8620 }, { "epoch": 0.19225018156961593, "grad_norm": 0.608733594417572, "learning_rate": 1.8230833020499145e-05, "loss": 0.4568, "step": 8625 }, { "epoch": 0.192361630950236, "grad_norm": 0.5924399495124817, "learning_rate": 1.8228844045145312e-05, "loss": 0.3843, "step": 8630 }, { "epoch": 0.19247308033085606, "grad_norm": 0.609131395816803, "learning_rate": 1.822685406099872e-05, "loss": 0.3626, "step": 8635 }, { "epoch": 0.19258452971147613, "grad_norm": 0.40351277589797974, "learning_rate": 1.8224863068303322e-05, "loss": 0.2846, "step": 8640 }, { "epoch": 0.1926959790920962, "grad_norm": 0.6606740951538086, "learning_rate": 1.822287106730319e-05, "loss": 0.3876, "step": 8645 }, { "epoch": 0.19280742847271626, "grad_norm": 0.3179605007171631, "learning_rate": 1.8220878058242545e-05, "loss": 0.4238, "step": 8650 }, { "epoch": 0.19291887785333633, "grad_norm": 0.6481133699417114, "learning_rate": 1.82188840413657e-05, "loss": 0.3526, "step": 8655 }, { "epoch": 0.19303032723395638, "grad_norm": 0.5002349019050598, "learning_rate": 1.8216889016917116e-05, "loss": 0.4173, "step": 8660 }, { "epoch": 0.19314177661457646, "grad_norm": 0.5035709738731384, "learning_rate": 1.8214892985141363e-05, "loss": 0.3237, "step": 8665 }, { "epoch": 0.19325322599519654, "grad_norm": 0.7374876737594604, "learning_rate": 1.8212895946283134e-05, "loss": 0.3935, "step": 8670 }, { "epoch": 0.19336467537581659, "grad_norm": 0.5722633004188538, "learning_rate": 1.8210897900587263e-05, "loss": 0.4176, "step": 8675 }, { "epoch": 0.19347612475643666, "grad_norm": 0.56236332654953, "learning_rate": 1.8208898848298684e-05, "loss": 0.2532, "step": 8680 }, { "epoch": 0.19358757413705674, "grad_norm": 0.505115270614624, "learning_rate": 1.820689878966247e-05, "loss": 0.362, "step": 8685 }, { "epoch": 0.1936990235176768, "grad_norm": 0.7024111747741699, "learning_rate": 1.820489772492381e-05, "loss": 0.4478, "step": 8690 }, { "epoch": 0.19381047289829686, "grad_norm": 0.4140602648258209, "learning_rate": 1.8202895654328023e-05, "loss": 0.3881, "step": 8695 }, { "epoch": 0.19392192227891694, "grad_norm": 0.5594643950462341, "learning_rate": 1.8200892578120544e-05, "loss": 0.4082, "step": 8700 }, { "epoch": 0.194033371659537, "grad_norm": 0.5588990449905396, "learning_rate": 1.8198888496546936e-05, "loss": 0.5088, "step": 8705 }, { "epoch": 0.19414482104015707, "grad_norm": 0.3396087884902954, "learning_rate": 1.8196883409852886e-05, "loss": 0.3792, "step": 8710 }, { "epoch": 0.19425627042077714, "grad_norm": 0.40732541680336, "learning_rate": 1.81948773182842e-05, "loss": 0.3382, "step": 8715 }, { "epoch": 0.1943677198013972, "grad_norm": 0.4649874269962311, "learning_rate": 1.8192870222086805e-05, "loss": 0.3198, "step": 8720 }, { "epoch": 0.19447916918201727, "grad_norm": 0.5772071480751038, "learning_rate": 1.8190862121506766e-05, "loss": 0.4351, "step": 8725 }, { "epoch": 0.19459061856263735, "grad_norm": 0.7937235832214355, "learning_rate": 1.8188853016790252e-05, "loss": 0.4203, "step": 8730 }, { "epoch": 0.1947020679432574, "grad_norm": 0.5506812930107117, "learning_rate": 1.8186842908183568e-05, "loss": 0.2649, "step": 8735 }, { "epoch": 0.19481351732387747, "grad_norm": 0.43412524461746216, "learning_rate": 1.8184831795933134e-05, "loss": 0.3928, "step": 8740 }, { "epoch": 0.19492496670449755, "grad_norm": 0.5557735562324524, "learning_rate": 1.81828196802855e-05, "loss": 0.3544, "step": 8745 }, { "epoch": 0.1950364160851176, "grad_norm": 0.6762316823005676, "learning_rate": 1.8180806561487333e-05, "loss": 0.3823, "step": 8750 }, { "epoch": 0.19514786546573767, "grad_norm": 0.5124315619468689, "learning_rate": 1.817879243978543e-05, "loss": 0.3836, "step": 8755 }, { "epoch": 0.19525931484635775, "grad_norm": 0.6040787100791931, "learning_rate": 1.8176777315426703e-05, "loss": 0.3722, "step": 8760 }, { "epoch": 0.1953707642269778, "grad_norm": 0.8811430335044861, "learning_rate": 1.8174761188658196e-05, "loss": 0.3777, "step": 8765 }, { "epoch": 0.19548221360759788, "grad_norm": 0.40771323442459106, "learning_rate": 1.817274405972706e-05, "loss": 0.3874, "step": 8770 }, { "epoch": 0.19559366298821795, "grad_norm": 0.46913596987724304, "learning_rate": 1.8170725928880593e-05, "loss": 0.3714, "step": 8775 }, { "epoch": 0.195705112368838, "grad_norm": 0.7020705342292786, "learning_rate": 1.8168706796366192e-05, "loss": 0.3432, "step": 8780 }, { "epoch": 0.19581656174945808, "grad_norm": 0.5612475872039795, "learning_rate": 1.8166686662431388e-05, "loss": 0.2656, "step": 8785 }, { "epoch": 0.19592801113007816, "grad_norm": 0.4503854513168335, "learning_rate": 1.816466552732384e-05, "loss": 0.449, "step": 8790 }, { "epoch": 0.1960394605106982, "grad_norm": 0.6024184226989746, "learning_rate": 1.816264339129132e-05, "loss": 0.3781, "step": 8795 }, { "epoch": 0.19615090989131828, "grad_norm": 0.4631085991859436, "learning_rate": 1.8160620254581727e-05, "loss": 0.226, "step": 8800 }, { "epoch": 0.19626235927193836, "grad_norm": 0.6948122978210449, "learning_rate": 1.8158596117443078e-05, "loss": 0.3695, "step": 8805 }, { "epoch": 0.1963738086525584, "grad_norm": 0.8757532238960266, "learning_rate": 1.815657098012352e-05, "loss": 0.6121, "step": 8810 }, { "epoch": 0.19648525803317848, "grad_norm": 0.5438439249992371, "learning_rate": 1.8154544842871323e-05, "loss": 0.3557, "step": 8815 }, { "epoch": 0.19659670741379856, "grad_norm": 0.5261885523796082, "learning_rate": 1.815251770593487e-05, "loss": 0.4272, "step": 8820 }, { "epoch": 0.1967081567944186, "grad_norm": 0.6569040417671204, "learning_rate": 1.815048956956267e-05, "loss": 0.4891, "step": 8825 }, { "epoch": 0.19681960617503869, "grad_norm": 0.5094635486602783, "learning_rate": 1.8148460434003363e-05, "loss": 0.365, "step": 8830 }, { "epoch": 0.19693105555565876, "grad_norm": 0.5861701965332031, "learning_rate": 1.8146430299505705e-05, "loss": 0.3636, "step": 8835 }, { "epoch": 0.1970425049362788, "grad_norm": 0.5916622281074524, "learning_rate": 1.814439916631857e-05, "loss": 0.2662, "step": 8840 }, { "epoch": 0.1971539543168989, "grad_norm": 0.6713497638702393, "learning_rate": 1.8142367034690967e-05, "loss": 0.4473, "step": 8845 }, { "epoch": 0.19726540369751894, "grad_norm": 0.38671842217445374, "learning_rate": 1.8140333904872013e-05, "loss": 0.3107, "step": 8850 }, { "epoch": 0.19737685307813901, "grad_norm": 0.35884755849838257, "learning_rate": 1.8138299777110953e-05, "loss": 0.3558, "step": 8855 }, { "epoch": 0.1974883024587591, "grad_norm": 0.6742395758628845, "learning_rate": 1.813626465165716e-05, "loss": 0.4218, "step": 8860 }, { "epoch": 0.19759975183937914, "grad_norm": 0.6200085878372192, "learning_rate": 1.8134228528760124e-05, "loss": 0.2595, "step": 8865 }, { "epoch": 0.19771120121999922, "grad_norm": 0.30860522389411926, "learning_rate": 1.8132191408669458e-05, "loss": 0.3566, "step": 8870 }, { "epoch": 0.1978226506006193, "grad_norm": 0.5478228330612183, "learning_rate": 1.8130153291634893e-05, "loss": 0.2773, "step": 8875 }, { "epoch": 0.19793409998123934, "grad_norm": 0.6432132720947266, "learning_rate": 1.8128114177906292e-05, "loss": 0.3881, "step": 8880 }, { "epoch": 0.19804554936185942, "grad_norm": 0.5374806523323059, "learning_rate": 1.812607406773363e-05, "loss": 0.2712, "step": 8885 }, { "epoch": 0.1981569987424795, "grad_norm": 0.5458959937095642, "learning_rate": 1.8124032961367012e-05, "loss": 0.4764, "step": 8890 }, { "epoch": 0.19826844812309954, "grad_norm": 0.671455979347229, "learning_rate": 1.8121990859056664e-05, "loss": 0.3211, "step": 8895 }, { "epoch": 0.19837989750371962, "grad_norm": 0.6042885780334473, "learning_rate": 1.8119947761052924e-05, "loss": 0.3583, "step": 8900 }, { "epoch": 0.1984913468843397, "grad_norm": 0.7543115019798279, "learning_rate": 1.811790366760627e-05, "loss": 0.41, "step": 8905 }, { "epoch": 0.19860279626495975, "grad_norm": 0.40042057633399963, "learning_rate": 1.8115858578967283e-05, "loss": 0.4059, "step": 8910 }, { "epoch": 0.19871424564557982, "grad_norm": 0.5100334286689758, "learning_rate": 1.811381249538668e-05, "loss": 0.3285, "step": 8915 }, { "epoch": 0.1988256950261999, "grad_norm": 0.7095329165458679, "learning_rate": 1.8111765417115292e-05, "loss": 0.3764, "step": 8920 }, { "epoch": 0.19893714440681995, "grad_norm": 0.4244353771209717, "learning_rate": 1.810971734440408e-05, "loss": 0.2824, "step": 8925 }, { "epoch": 0.19904859378744003, "grad_norm": 0.6726874709129333, "learning_rate": 1.810766827750412e-05, "loss": 0.3899, "step": 8930 }, { "epoch": 0.1991600431680601, "grad_norm": 0.635212779045105, "learning_rate": 1.810561821666661e-05, "loss": 0.3438, "step": 8935 }, { "epoch": 0.19927149254868015, "grad_norm": 0.5831514596939087, "learning_rate": 1.810356716214287e-05, "loss": 0.4083, "step": 8940 }, { "epoch": 0.19938294192930023, "grad_norm": 0.4919126331806183, "learning_rate": 1.8101515114184348e-05, "loss": 0.3371, "step": 8945 }, { "epoch": 0.1994943913099203, "grad_norm": 0.5806923508644104, "learning_rate": 1.8099462073042607e-05, "loss": 0.3543, "step": 8950 }, { "epoch": 0.19960584069054035, "grad_norm": 0.5970994234085083, "learning_rate": 1.8097408038969332e-05, "loss": 0.3174, "step": 8955 }, { "epoch": 0.19971729007116043, "grad_norm": 0.8126015067100525, "learning_rate": 1.8095353012216334e-05, "loss": 0.3524, "step": 8960 }, { "epoch": 0.1998287394517805, "grad_norm": 0.7372391819953918, "learning_rate": 1.8093296993035546e-05, "loss": 0.427, "step": 8965 }, { "epoch": 0.19994018883240056, "grad_norm": 0.523418664932251, "learning_rate": 1.8091239981679016e-05, "loss": 0.4781, "step": 8970 }, { "epoch": 0.20005163821302063, "grad_norm": 0.553030252456665, "learning_rate": 1.808918197839892e-05, "loss": 0.3167, "step": 8975 }, { "epoch": 0.2001630875936407, "grad_norm": 0.7697877287864685, "learning_rate": 1.8087122983447548e-05, "loss": 0.4279, "step": 8980 }, { "epoch": 0.20027453697426076, "grad_norm": 0.5038485527038574, "learning_rate": 1.8085062997077326e-05, "loss": 0.3461, "step": 8985 }, { "epoch": 0.20038598635488084, "grad_norm": 0.44742345809936523, "learning_rate": 1.8083002019540784e-05, "loss": 0.3014, "step": 8990 }, { "epoch": 0.2004974357355009, "grad_norm": 0.5444981455802917, "learning_rate": 1.808094005109059e-05, "loss": 0.4331, "step": 8995 }, { "epoch": 0.20060888511612096, "grad_norm": 0.6124820709228516, "learning_rate": 1.807887709197952e-05, "loss": 0.3692, "step": 9000 }, { "epoch": 0.20072033449674104, "grad_norm": 0.533619225025177, "learning_rate": 1.8076813142460478e-05, "loss": 0.2453, "step": 9005 }, { "epoch": 0.20083178387736111, "grad_norm": 0.5671890377998352, "learning_rate": 1.8074748202786484e-05, "loss": 0.2173, "step": 9010 }, { "epoch": 0.20094323325798116, "grad_norm": 0.6472200155258179, "learning_rate": 1.8072682273210692e-05, "loss": 0.4271, "step": 9015 }, { "epoch": 0.20105468263860124, "grad_norm": 0.8244146704673767, "learning_rate": 1.8070615353986362e-05, "loss": 0.3664, "step": 9020 }, { "epoch": 0.20116613201922132, "grad_norm": 0.4256848990917206, "learning_rate": 1.8068547445366885e-05, "loss": 0.2756, "step": 9025 }, { "epoch": 0.20127758139984137, "grad_norm": 0.8401066064834595, "learning_rate": 1.8066478547605774e-05, "loss": 0.3697, "step": 9030 }, { "epoch": 0.20138903078046144, "grad_norm": 0.49587559700012207, "learning_rate": 1.8064408660956652e-05, "loss": 0.3255, "step": 9035 }, { "epoch": 0.2015004801610815, "grad_norm": 0.4269142150878906, "learning_rate": 1.8062337785673284e-05, "loss": 0.3515, "step": 9040 }, { "epoch": 0.20161192954170157, "grad_norm": 0.38900962471961975, "learning_rate": 1.806026592200953e-05, "loss": 0.3871, "step": 9045 }, { "epoch": 0.20172337892232164, "grad_norm": 0.6002835631370544, "learning_rate": 1.8058193070219387e-05, "loss": 0.3051, "step": 9050 }, { "epoch": 0.2018348283029417, "grad_norm": 0.5478485226631165, "learning_rate": 1.8056119230556978e-05, "loss": 0.3902, "step": 9055 }, { "epoch": 0.20194627768356177, "grad_norm": 0.5159756541252136, "learning_rate": 1.8054044403276534e-05, "loss": 0.4322, "step": 9060 }, { "epoch": 0.20205772706418185, "grad_norm": 0.731988251209259, "learning_rate": 1.8051968588632413e-05, "loss": 0.4703, "step": 9065 }, { "epoch": 0.2021691764448019, "grad_norm": 0.6769568920135498, "learning_rate": 1.8049891786879093e-05, "loss": 0.2977, "step": 9070 }, { "epoch": 0.20228062582542197, "grad_norm": 0.5943720936775208, "learning_rate": 1.804781399827118e-05, "loss": 0.4456, "step": 9075 }, { "epoch": 0.20239207520604205, "grad_norm": 0.4914182424545288, "learning_rate": 1.8045735223063384e-05, "loss": 0.3044, "step": 9080 }, { "epoch": 0.2025035245866621, "grad_norm": 0.5965585112571716, "learning_rate": 1.8043655461510558e-05, "loss": 0.2634, "step": 9085 }, { "epoch": 0.20261497396728217, "grad_norm": 0.39462730288505554, "learning_rate": 1.8041574713867658e-05, "loss": 0.3493, "step": 9090 }, { "epoch": 0.20272642334790225, "grad_norm": 0.6479722857475281, "learning_rate": 1.803949298038977e-05, "loss": 0.2632, "step": 9095 }, { "epoch": 0.2028378727285223, "grad_norm": 0.5808325409889221, "learning_rate": 1.80374102613321e-05, "loss": 0.3428, "step": 9100 }, { "epoch": 0.20294932210914238, "grad_norm": 2.2530341148376465, "learning_rate": 1.8035326556949968e-05, "loss": 0.3293, "step": 9105 }, { "epoch": 0.20306077148976245, "grad_norm": 0.5054280161857605, "learning_rate": 1.8033241867498826e-05, "loss": 0.3918, "step": 9110 }, { "epoch": 0.2031722208703825, "grad_norm": 0.5822464227676392, "learning_rate": 1.8031156193234237e-05, "loss": 0.3969, "step": 9115 }, { "epoch": 0.20328367025100258, "grad_norm": 0.7139219045639038, "learning_rate": 1.802906953441189e-05, "loss": 0.4328, "step": 9120 }, { "epoch": 0.20339511963162266, "grad_norm": 0.7321597933769226, "learning_rate": 1.8026981891287593e-05, "loss": 0.3183, "step": 9125 }, { "epoch": 0.2035065690122427, "grad_norm": 0.6543447375297546, "learning_rate": 1.8024893264117275e-05, "loss": 0.4174, "step": 9130 }, { "epoch": 0.20361801839286278, "grad_norm": 0.6497056484222412, "learning_rate": 1.8022803653156983e-05, "loss": 0.4571, "step": 9135 }, { "epoch": 0.20372946777348286, "grad_norm": 0.5019301176071167, "learning_rate": 1.8020713058662894e-05, "loss": 0.4811, "step": 9140 }, { "epoch": 0.2038409171541029, "grad_norm": 0.6254482269287109, "learning_rate": 1.8018621480891292e-05, "loss": 0.3229, "step": 9145 }, { "epoch": 0.20395236653472298, "grad_norm": 0.542697548866272, "learning_rate": 1.801652892009859e-05, "loss": 0.3139, "step": 9150 }, { "epoch": 0.20406381591534306, "grad_norm": 0.5235081315040588, "learning_rate": 1.8014435376541325e-05, "loss": 0.3208, "step": 9155 }, { "epoch": 0.2041752652959631, "grad_norm": 0.7484645843505859, "learning_rate": 1.801234085047614e-05, "loss": 0.3552, "step": 9160 }, { "epoch": 0.2042867146765832, "grad_norm": 0.9511454105377197, "learning_rate": 1.8010245342159812e-05, "loss": 0.2527, "step": 9165 }, { "epoch": 0.20439816405720326, "grad_norm": 0.4497017562389374, "learning_rate": 1.8008148851849237e-05, "loss": 0.3487, "step": 9170 }, { "epoch": 0.2045096134378233, "grad_norm": 0.5408441424369812, "learning_rate": 1.8006051379801425e-05, "loss": 0.3263, "step": 9175 }, { "epoch": 0.2046210628184434, "grad_norm": 0.6289383172988892, "learning_rate": 1.800395292627351e-05, "loss": 0.4548, "step": 9180 }, { "epoch": 0.20473251219906347, "grad_norm": 0.5261548757553101, "learning_rate": 1.8001853491522753e-05, "loss": 0.3654, "step": 9185 }, { "epoch": 0.20484396157968351, "grad_norm": 0.5952202081680298, "learning_rate": 1.7999753075806516e-05, "loss": 0.4186, "step": 9190 }, { "epoch": 0.2049554109603036, "grad_norm": 0.9061971306800842, "learning_rate": 1.7997651679382303e-05, "loss": 0.2387, "step": 9195 }, { "epoch": 0.20506686034092367, "grad_norm": 0.661060094833374, "learning_rate": 1.7995549302507725e-05, "loss": 0.3395, "step": 9200 }, { "epoch": 0.20517830972154372, "grad_norm": 0.520475447177887, "learning_rate": 1.7993445945440523e-05, "loss": 0.3408, "step": 9205 }, { "epoch": 0.2052897591021638, "grad_norm": 0.5380303859710693, "learning_rate": 1.7991341608438546e-05, "loss": 0.3459, "step": 9210 }, { "epoch": 0.20540120848278387, "grad_norm": 0.674584686756134, "learning_rate": 1.798923629175977e-05, "loss": 0.3531, "step": 9215 }, { "epoch": 0.20551265786340392, "grad_norm": 0.5248068571090698, "learning_rate": 1.798712999566229e-05, "loss": 0.343, "step": 9220 }, { "epoch": 0.205624107244024, "grad_norm": 0.46953219175338745, "learning_rate": 1.7985022720404332e-05, "loss": 0.4012, "step": 9225 }, { "epoch": 0.20573555662464404, "grad_norm": 0.5328274369239807, "learning_rate": 1.7982914466244216e-05, "loss": 0.4516, "step": 9230 }, { "epoch": 0.20584700600526412, "grad_norm": 0.5312590003013611, "learning_rate": 1.7980805233440406e-05, "loss": 0.3619, "step": 9235 }, { "epoch": 0.2059584553858842, "grad_norm": 0.4466384947299957, "learning_rate": 1.797869502225148e-05, "loss": 0.3922, "step": 9240 }, { "epoch": 0.20606990476650425, "grad_norm": 0.433711975812912, "learning_rate": 1.7976583832936124e-05, "loss": 0.4335, "step": 9245 }, { "epoch": 0.20618135414712432, "grad_norm": 0.612484335899353, "learning_rate": 1.7974471665753164e-05, "loss": 0.3176, "step": 9250 }, { "epoch": 0.2062928035277444, "grad_norm": 0.5125216841697693, "learning_rate": 1.7972358520961535e-05, "loss": 0.389, "step": 9255 }, { "epoch": 0.20640425290836445, "grad_norm": 0.7068095803260803, "learning_rate": 1.7970244398820283e-05, "loss": 0.3994, "step": 9260 }, { "epoch": 0.20651570228898453, "grad_norm": 0.6456936597824097, "learning_rate": 1.7968129299588592e-05, "loss": 0.3172, "step": 9265 }, { "epoch": 0.2066271516696046, "grad_norm": 0.5113691091537476, "learning_rate": 1.796601322352575e-05, "loss": 0.3565, "step": 9270 }, { "epoch": 0.20673860105022465, "grad_norm": 0.6549059152603149, "learning_rate": 1.796389617089118e-05, "loss": 0.3518, "step": 9275 }, { "epoch": 0.20685005043084473, "grad_norm": 0.5204808115959167, "learning_rate": 1.7961778141944407e-05, "loss": 0.2559, "step": 9280 }, { "epoch": 0.2069614998114648, "grad_norm": 0.6168663501739502, "learning_rate": 1.795965913694509e-05, "loss": 0.4356, "step": 9285 }, { "epoch": 0.20707294919208485, "grad_norm": 0.47157636284828186, "learning_rate": 1.7957539156153003e-05, "loss": 0.3213, "step": 9290 }, { "epoch": 0.20718439857270493, "grad_norm": 0.4153405427932739, "learning_rate": 1.7955418199828035e-05, "loss": 0.4165, "step": 9295 }, { "epoch": 0.207295847953325, "grad_norm": 0.6035289764404297, "learning_rate": 1.7953296268230203e-05, "loss": 0.4489, "step": 9300 }, { "epoch": 0.20740729733394506, "grad_norm": 0.9584718942642212, "learning_rate": 1.795117336161964e-05, "loss": 0.3088, "step": 9305 }, { "epoch": 0.20751874671456513, "grad_norm": 0.533146858215332, "learning_rate": 1.7949049480256596e-05, "loss": 0.3791, "step": 9310 }, { "epoch": 0.2076301960951852, "grad_norm": 0.5689879655838013, "learning_rate": 1.7946924624401445e-05, "loss": 0.4243, "step": 9315 }, { "epoch": 0.20774164547580526, "grad_norm": 0.7022379636764526, "learning_rate": 1.7944798794314676e-05, "loss": 0.3408, "step": 9320 }, { "epoch": 0.20785309485642534, "grad_norm": 0.6284753084182739, "learning_rate": 1.7942671990256895e-05, "loss": 0.288, "step": 9325 }, { "epoch": 0.2079645442370454, "grad_norm": 0.5168965458869934, "learning_rate": 1.794054421248884e-05, "loss": 0.4631, "step": 9330 }, { "epoch": 0.20807599361766546, "grad_norm": 0.797507107257843, "learning_rate": 1.7938415461271356e-05, "loss": 0.392, "step": 9335 }, { "epoch": 0.20818744299828554, "grad_norm": 0.45130276679992676, "learning_rate": 1.793628573686541e-05, "loss": 0.3337, "step": 9340 }, { "epoch": 0.20829889237890561, "grad_norm": 0.5223218202590942, "learning_rate": 1.793415503953209e-05, "loss": 0.3618, "step": 9345 }, { "epoch": 0.20841034175952566, "grad_norm": 0.5980708003044128, "learning_rate": 1.793202336953261e-05, "loss": 0.5088, "step": 9350 }, { "epoch": 0.20852179114014574, "grad_norm": 0.507690966129303, "learning_rate": 1.7929890727128287e-05, "loss": 0.309, "step": 9355 }, { "epoch": 0.20863324052076582, "grad_norm": 0.565941572189331, "learning_rate": 1.792775711258057e-05, "loss": 0.4123, "step": 9360 }, { "epoch": 0.20874468990138587, "grad_norm": 0.4737381637096405, "learning_rate": 1.7925622526151022e-05, "loss": 0.3996, "step": 9365 }, { "epoch": 0.20885613928200594, "grad_norm": 0.610497772693634, "learning_rate": 1.7923486968101332e-05, "loss": 0.4446, "step": 9370 }, { "epoch": 0.20896758866262602, "grad_norm": 0.6589722037315369, "learning_rate": 1.79213504386933e-05, "loss": 0.3663, "step": 9375 }, { "epoch": 0.20907903804324607, "grad_norm": 0.518922746181488, "learning_rate": 1.7919212938188843e-05, "loss": 0.296, "step": 9380 }, { "epoch": 0.20919048742386614, "grad_norm": 0.6134815216064453, "learning_rate": 1.7917074466850012e-05, "loss": 0.5138, "step": 9385 }, { "epoch": 0.20930193680448622, "grad_norm": 0.42902672290802, "learning_rate": 1.791493502493896e-05, "loss": 0.3863, "step": 9390 }, { "epoch": 0.20941338618510627, "grad_norm": 0.4924355149269104, "learning_rate": 1.7912794612717968e-05, "loss": 0.3656, "step": 9395 }, { "epoch": 0.20952483556572635, "grad_norm": 0.4996645748615265, "learning_rate": 1.7910653230449434e-05, "loss": 0.5112, "step": 9400 }, { "epoch": 0.20963628494634642, "grad_norm": 0.6504985690116882, "learning_rate": 1.7908510878395874e-05, "loss": 0.3295, "step": 9405 }, { "epoch": 0.20974773432696647, "grad_norm": 0.5653944611549377, "learning_rate": 1.7906367556819925e-05, "loss": 0.3313, "step": 9410 }, { "epoch": 0.20985918370758655, "grad_norm": 0.3870103359222412, "learning_rate": 1.790422326598434e-05, "loss": 0.3556, "step": 9415 }, { "epoch": 0.2099706330882066, "grad_norm": 0.5160222053527832, "learning_rate": 1.7902078006151996e-05, "loss": 0.3648, "step": 9420 }, { "epoch": 0.21008208246882668, "grad_norm": 0.4862663149833679, "learning_rate": 1.789993177758588e-05, "loss": 0.3417, "step": 9425 }, { "epoch": 0.21019353184944675, "grad_norm": 0.5651835203170776, "learning_rate": 1.789778458054911e-05, "loss": 0.4301, "step": 9430 }, { "epoch": 0.2103049812300668, "grad_norm": 0.6121236085891724, "learning_rate": 1.789563641530491e-05, "loss": 0.4275, "step": 9435 }, { "epoch": 0.21041643061068688, "grad_norm": 0.5317579507827759, "learning_rate": 1.789348728211663e-05, "loss": 0.3764, "step": 9440 }, { "epoch": 0.21052787999130695, "grad_norm": 0.8041971325874329, "learning_rate": 1.7891337181247738e-05, "loss": 0.3525, "step": 9445 }, { "epoch": 0.210639329371927, "grad_norm": 0.5068250894546509, "learning_rate": 1.788918611296182e-05, "loss": 0.3994, "step": 9450 }, { "epoch": 0.21075077875254708, "grad_norm": 0.6096692681312561, "learning_rate": 1.788703407752258e-05, "loss": 0.388, "step": 9455 }, { "epoch": 0.21086222813316716, "grad_norm": 0.4330832064151764, "learning_rate": 1.788488107519384e-05, "loss": 0.3733, "step": 9460 }, { "epoch": 0.2109736775137872, "grad_norm": 0.3921918272972107, "learning_rate": 1.7882727106239542e-05, "loss": 0.3411, "step": 9465 }, { "epoch": 0.21108512689440728, "grad_norm": 0.6018163561820984, "learning_rate": 1.7880572170923747e-05, "loss": 0.3416, "step": 9470 }, { "epoch": 0.21119657627502736, "grad_norm": 0.6241925954818726, "learning_rate": 1.787841626951063e-05, "loss": 0.3981, "step": 9475 }, { "epoch": 0.2113080256556474, "grad_norm": 0.6851200461387634, "learning_rate": 1.7876259402264496e-05, "loss": 0.2918, "step": 9480 }, { "epoch": 0.21141947503626748, "grad_norm": 0.6253845691680908, "learning_rate": 1.787410156944975e-05, "loss": 0.4206, "step": 9485 }, { "epoch": 0.21153092441688756, "grad_norm": 0.5972841382026672, "learning_rate": 1.7871942771330932e-05, "loss": 0.3712, "step": 9490 }, { "epoch": 0.2116423737975076, "grad_norm": 0.6291491389274597, "learning_rate": 1.786978300817269e-05, "loss": 0.3548, "step": 9495 }, { "epoch": 0.2117538231781277, "grad_norm": 0.5687154531478882, "learning_rate": 1.78676222802398e-05, "loss": 0.3425, "step": 9500 }, { "epoch": 0.21186527255874776, "grad_norm": 0.7182362079620361, "learning_rate": 1.7865460587797142e-05, "loss": 0.3582, "step": 9505 }, { "epoch": 0.2119767219393678, "grad_norm": 0.6228922605514526, "learning_rate": 1.7863297931109733e-05, "loss": 0.3037, "step": 9510 }, { "epoch": 0.2120881713199879, "grad_norm": 0.3843955397605896, "learning_rate": 1.7861134310442692e-05, "loss": 0.4066, "step": 9515 }, { "epoch": 0.21219962070060797, "grad_norm": 0.47894778847694397, "learning_rate": 1.7858969726061262e-05, "loss": 0.3569, "step": 9520 }, { "epoch": 0.21231107008122801, "grad_norm": 0.6425552368164062, "learning_rate": 1.7856804178230805e-05, "loss": 0.4807, "step": 9525 }, { "epoch": 0.2124225194618481, "grad_norm": 0.5509926676750183, "learning_rate": 1.7854637667216802e-05, "loss": 0.4651, "step": 9530 }, { "epoch": 0.21253396884246817, "grad_norm": 0.6003994941711426, "learning_rate": 1.7852470193284848e-05, "loss": 0.2946, "step": 9535 }, { "epoch": 0.21264541822308822, "grad_norm": 0.4288851022720337, "learning_rate": 1.785030175670066e-05, "loss": 0.2704, "step": 9540 }, { "epoch": 0.2127568676037083, "grad_norm": 0.5786031484603882, "learning_rate": 1.7848132357730078e-05, "loss": 0.2123, "step": 9545 }, { "epoch": 0.21286831698432837, "grad_norm": 0.4634719789028168, "learning_rate": 1.784596199663904e-05, "loss": 0.3564, "step": 9550 }, { "epoch": 0.21297976636494842, "grad_norm": 0.518600583076477, "learning_rate": 1.7843790673693627e-05, "loss": 0.3986, "step": 9555 }, { "epoch": 0.2130912157455685, "grad_norm": 0.46757927536964417, "learning_rate": 1.784161838916002e-05, "loss": 0.3545, "step": 9560 }, { "epoch": 0.21320266512618857, "grad_norm": 0.660696804523468, "learning_rate": 1.7839445143304524e-05, "loss": 0.3854, "step": 9565 }, { "epoch": 0.21331411450680862, "grad_norm": 0.41932207345962524, "learning_rate": 1.783727093639357e-05, "loss": 0.3081, "step": 9570 }, { "epoch": 0.2134255638874287, "grad_norm": 0.5107130408287048, "learning_rate": 1.783509576869369e-05, "loss": 0.3752, "step": 9575 }, { "epoch": 0.21353701326804878, "grad_norm": 0.5685948133468628, "learning_rate": 1.7832919640471548e-05, "loss": 0.3624, "step": 9580 }, { "epoch": 0.21364846264866882, "grad_norm": 0.4625929296016693, "learning_rate": 1.7830742551993916e-05, "loss": 0.4257, "step": 9585 }, { "epoch": 0.2137599120292889, "grad_norm": 0.6444830894470215, "learning_rate": 1.7828564503527696e-05, "loss": 0.2999, "step": 9590 }, { "epoch": 0.21387136140990898, "grad_norm": 0.5308654308319092, "learning_rate": 1.7826385495339892e-05, "loss": 0.5006, "step": 9595 }, { "epoch": 0.21398281079052903, "grad_norm": 0.549062192440033, "learning_rate": 1.7824205527697635e-05, "loss": 0.3594, "step": 9600 }, { "epoch": 0.2140942601711491, "grad_norm": 0.4222000241279602, "learning_rate": 1.782202460086818e-05, "loss": 0.3319, "step": 9605 }, { "epoch": 0.21420570955176915, "grad_norm": 0.32505983114242554, "learning_rate": 1.781984271511888e-05, "loss": 0.3907, "step": 9610 }, { "epoch": 0.21431715893238923, "grad_norm": 0.5011066198348999, "learning_rate": 1.7817659870717227e-05, "loss": 0.3543, "step": 9615 }, { "epoch": 0.2144286083130093, "grad_norm": 0.5036129355430603, "learning_rate": 1.7815476067930816e-05, "loss": 0.4492, "step": 9620 }, { "epoch": 0.21454005769362935, "grad_norm": 0.40307238698005676, "learning_rate": 1.7813291307027363e-05, "loss": 0.2483, "step": 9625 }, { "epoch": 0.21465150707424943, "grad_norm": 0.8827428221702576, "learning_rate": 1.7811105588274705e-05, "loss": 0.337, "step": 9630 }, { "epoch": 0.2147629564548695, "grad_norm": 0.497211217880249, "learning_rate": 1.7808918911940795e-05, "loss": 0.4192, "step": 9635 }, { "epoch": 0.21487440583548956, "grad_norm": 0.5260987281799316, "learning_rate": 1.7806731278293706e-05, "loss": 0.4036, "step": 9640 }, { "epoch": 0.21498585521610963, "grad_norm": 0.6299923658370972, "learning_rate": 1.7804542687601614e-05, "loss": 0.3552, "step": 9645 }, { "epoch": 0.2150973045967297, "grad_norm": 0.5604374408721924, "learning_rate": 1.7802353140132835e-05, "loss": 0.2949, "step": 9650 }, { "epoch": 0.21520875397734976, "grad_norm": 0.7191600203514099, "learning_rate": 1.7800162636155786e-05, "loss": 0.4364, "step": 9655 }, { "epoch": 0.21532020335796984, "grad_norm": 0.6973847150802612, "learning_rate": 1.7797971175939004e-05, "loss": 0.3598, "step": 9660 }, { "epoch": 0.2154316527385899, "grad_norm": 0.6464542150497437, "learning_rate": 1.779577875975115e-05, "loss": 0.3229, "step": 9665 }, { "epoch": 0.21554310211920996, "grad_norm": 0.5332272052764893, "learning_rate": 1.779358538786099e-05, "loss": 0.3024, "step": 9670 }, { "epoch": 0.21565455149983004, "grad_norm": 0.5820956826210022, "learning_rate": 1.779139106053742e-05, "loss": 0.2843, "step": 9675 }, { "epoch": 0.21576600088045012, "grad_norm": 0.746467113494873, "learning_rate": 1.7789195778049448e-05, "loss": 0.3948, "step": 9680 }, { "epoch": 0.21587745026107016, "grad_norm": 0.593795895576477, "learning_rate": 1.7786999540666197e-05, "loss": 0.4381, "step": 9685 }, { "epoch": 0.21598889964169024, "grad_norm": 0.568673849105835, "learning_rate": 1.7784802348656906e-05, "loss": 0.2747, "step": 9690 }, { "epoch": 0.21610034902231032, "grad_norm": 0.5895990133285522, "learning_rate": 1.778260420229094e-05, "loss": 0.266, "step": 9695 }, { "epoch": 0.21621179840293037, "grad_norm": 0.45582717657089233, "learning_rate": 1.778040510183777e-05, "loss": 0.3459, "step": 9700 }, { "epoch": 0.21632324778355044, "grad_norm": 0.5722758769989014, "learning_rate": 1.7778205047566987e-05, "loss": 0.3353, "step": 9705 }, { "epoch": 0.21643469716417052, "grad_norm": 0.492655873298645, "learning_rate": 1.7776004039748307e-05, "loss": 0.3778, "step": 9710 }, { "epoch": 0.21654614654479057, "grad_norm": 0.7253314256668091, "learning_rate": 1.777380207865155e-05, "loss": 0.4439, "step": 9715 }, { "epoch": 0.21665759592541065, "grad_norm": 0.6914671659469604, "learning_rate": 1.777159916454667e-05, "loss": 0.3464, "step": 9720 }, { "epoch": 0.21676904530603072, "grad_norm": 0.4587441682815552, "learning_rate": 1.7769395297703715e-05, "loss": 0.3654, "step": 9725 }, { "epoch": 0.21688049468665077, "grad_norm": 0.6213597059249878, "learning_rate": 1.7767190478392863e-05, "loss": 0.4613, "step": 9730 }, { "epoch": 0.21699194406727085, "grad_norm": 0.4911822974681854, "learning_rate": 1.7764984706884417e-05, "loss": 0.2427, "step": 9735 }, { "epoch": 0.21710339344789092, "grad_norm": 0.4868038296699524, "learning_rate": 1.776277798344878e-05, "loss": 0.438, "step": 9740 }, { "epoch": 0.21721484282851097, "grad_norm": 0.8504787683486938, "learning_rate": 1.7760570308356485e-05, "loss": 0.3359, "step": 9745 }, { "epoch": 0.21732629220913105, "grad_norm": 0.5720949172973633, "learning_rate": 1.775836168187817e-05, "loss": 0.5129, "step": 9750 }, { "epoch": 0.21743774158975113, "grad_norm": 0.43098098039627075, "learning_rate": 1.7756152104284602e-05, "loss": 0.3171, "step": 9755 }, { "epoch": 0.21754919097037118, "grad_norm": 0.5028066635131836, "learning_rate": 1.775394157584665e-05, "loss": 0.3363, "step": 9760 }, { "epoch": 0.21766064035099125, "grad_norm": 0.47206565737724304, "learning_rate": 1.7751730096835314e-05, "loss": 0.3361, "step": 9765 }, { "epoch": 0.21777208973161133, "grad_norm": 0.5294880270957947, "learning_rate": 1.7749517667521702e-05, "loss": 0.494, "step": 9770 }, { "epoch": 0.21788353911223138, "grad_norm": 0.723375678062439, "learning_rate": 1.7747304288177037e-05, "loss": 0.3211, "step": 9775 }, { "epoch": 0.21799498849285145, "grad_norm": 0.5910930633544922, "learning_rate": 1.7745089959072672e-05, "loss": 0.3621, "step": 9780 }, { "epoch": 0.21810643787347153, "grad_norm": 0.790056586265564, "learning_rate": 1.7742874680480057e-05, "loss": 0.4886, "step": 9785 }, { "epoch": 0.21821788725409158, "grad_norm": 0.5438470840454102, "learning_rate": 1.7740658452670775e-05, "loss": 0.4396, "step": 9790 }, { "epoch": 0.21832933663471166, "grad_norm": 0.9391214847564697, "learning_rate": 1.7738441275916515e-05, "loss": 0.3882, "step": 9795 }, { "epoch": 0.2184407860153317, "grad_norm": 0.6383192539215088, "learning_rate": 1.7736223150489085e-05, "loss": 0.4133, "step": 9800 }, { "epoch": 0.21855223539595178, "grad_norm": 0.5294129848480225, "learning_rate": 1.7734004076660413e-05, "loss": 0.3046, "step": 9805 }, { "epoch": 0.21866368477657186, "grad_norm": 0.3723093569278717, "learning_rate": 1.7731784054702538e-05, "loss": 0.4108, "step": 9810 }, { "epoch": 0.2187751341571919, "grad_norm": 0.4764557480812073, "learning_rate": 1.7729563084887615e-05, "loss": 0.373, "step": 9815 }, { "epoch": 0.21888658353781199, "grad_norm": 0.8178969025611877, "learning_rate": 1.7727341167487925e-05, "loss": 0.3919, "step": 9820 }, { "epoch": 0.21899803291843206, "grad_norm": 0.5052682757377625, "learning_rate": 1.772511830277585e-05, "loss": 0.3084, "step": 9825 }, { "epoch": 0.2191094822990521, "grad_norm": 0.617719829082489, "learning_rate": 1.77228944910239e-05, "loss": 0.3815, "step": 9830 }, { "epoch": 0.2192209316796722, "grad_norm": 0.43401801586151123, "learning_rate": 1.77206697325047e-05, "loss": 0.3949, "step": 9835 }, { "epoch": 0.21933238106029226, "grad_norm": 0.4182285964488983, "learning_rate": 1.7718444027490983e-05, "loss": 0.3994, "step": 9840 }, { "epoch": 0.2194438304409123, "grad_norm": 0.431234210729599, "learning_rate": 1.7716217376255608e-05, "loss": 0.4415, "step": 9845 }, { "epoch": 0.2195552798215324, "grad_norm": 0.6229223608970642, "learning_rate": 1.771398977907154e-05, "loss": 0.4124, "step": 9850 }, { "epoch": 0.21966672920215247, "grad_norm": 0.5385680198669434, "learning_rate": 1.771176123621187e-05, "loss": 0.5053, "step": 9855 }, { "epoch": 0.21977817858277252, "grad_norm": 0.5043733716011047, "learning_rate": 1.7709531747949796e-05, "loss": 0.4626, "step": 9860 }, { "epoch": 0.2198896279633926, "grad_norm": 0.4025327265262604, "learning_rate": 1.770730131455864e-05, "loss": 0.2598, "step": 9865 }, { "epoch": 0.22000107734401267, "grad_norm": 0.6064236760139465, "learning_rate": 1.7705069936311836e-05, "loss": 0.4956, "step": 9870 }, { "epoch": 0.22011252672463272, "grad_norm": 0.28435173630714417, "learning_rate": 1.7702837613482925e-05, "loss": 0.2965, "step": 9875 }, { "epoch": 0.2202239761052528, "grad_norm": 0.5732418894767761, "learning_rate": 1.7700604346345588e-05, "loss": 0.4326, "step": 9880 }, { "epoch": 0.22033542548587287, "grad_norm": 0.6132063269615173, "learning_rate": 1.769837013517359e-05, "loss": 0.2965, "step": 9885 }, { "epoch": 0.22044687486649292, "grad_norm": 0.8895071148872375, "learning_rate": 1.769613498024084e-05, "loss": 0.3603, "step": 9890 }, { "epoch": 0.220558324247113, "grad_norm": 0.6477868556976318, "learning_rate": 1.7693898881821344e-05, "loss": 0.3832, "step": 9895 }, { "epoch": 0.22066977362773307, "grad_norm": 0.7001681923866272, "learning_rate": 1.7691661840189235e-05, "loss": 0.2867, "step": 9900 }, { "epoch": 0.22078122300835312, "grad_norm": 0.5996546745300293, "learning_rate": 1.7689423855618754e-05, "loss": 0.3618, "step": 9905 }, { "epoch": 0.2208926723889732, "grad_norm": 0.4318576753139496, "learning_rate": 1.7687184928384263e-05, "loss": 0.4265, "step": 9910 }, { "epoch": 0.22100412176959328, "grad_norm": 0.519368052482605, "learning_rate": 1.7684945058760235e-05, "loss": 0.3522, "step": 9915 }, { "epoch": 0.22111557115021332, "grad_norm": 0.28598570823669434, "learning_rate": 1.7682704247021262e-05, "loss": 0.3078, "step": 9920 }, { "epoch": 0.2212270205308334, "grad_norm": 0.5853939056396484, "learning_rate": 1.768046249344205e-05, "loss": 0.4695, "step": 9925 }, { "epoch": 0.22133846991145348, "grad_norm": 0.7244415283203125, "learning_rate": 1.7678219798297417e-05, "loss": 0.4155, "step": 9930 }, { "epoch": 0.22144991929207353, "grad_norm": 0.712857723236084, "learning_rate": 1.7675976161862303e-05, "loss": 0.3553, "step": 9935 }, { "epoch": 0.2215613686726936, "grad_norm": 0.46348175406455994, "learning_rate": 1.7673731584411766e-05, "loss": 0.3931, "step": 9940 }, { "epoch": 0.22167281805331368, "grad_norm": 0.6503481864929199, "learning_rate": 1.7671486066220965e-05, "loss": 0.4813, "step": 9945 }, { "epoch": 0.22178426743393373, "grad_norm": 0.47859442234039307, "learning_rate": 1.7669239607565193e-05, "loss": 0.3663, "step": 9950 }, { "epoch": 0.2218957168145538, "grad_norm": 0.514894425868988, "learning_rate": 1.7666992208719835e-05, "loss": 0.4583, "step": 9955 }, { "epoch": 0.22200716619517388, "grad_norm": 0.5483409762382507, "learning_rate": 1.7664743869960416e-05, "loss": 0.3314, "step": 9960 }, { "epoch": 0.22211861557579393, "grad_norm": 0.6043745279312134, "learning_rate": 1.766249459156256e-05, "loss": 0.3185, "step": 9965 }, { "epoch": 0.222230064956414, "grad_norm": 0.3309432864189148, "learning_rate": 1.7660244373802014e-05, "loss": 0.3373, "step": 9970 }, { "epoch": 0.22234151433703409, "grad_norm": 0.6270701885223389, "learning_rate": 1.7657993216954635e-05, "loss": 0.5158, "step": 9975 }, { "epoch": 0.22245296371765413, "grad_norm": 0.6763097643852234, "learning_rate": 1.76557411212964e-05, "loss": 0.4119, "step": 9980 }, { "epoch": 0.2225644130982742, "grad_norm": 0.7081453800201416, "learning_rate": 1.7653488087103393e-05, "loss": 0.3731, "step": 9985 }, { "epoch": 0.22267586247889426, "grad_norm": 0.4833124577999115, "learning_rate": 1.7651234114651826e-05, "loss": 0.3278, "step": 9990 }, { "epoch": 0.22278731185951434, "grad_norm": 0.47149044275283813, "learning_rate": 1.764897920421801e-05, "loss": 0.3962, "step": 9995 }, { "epoch": 0.2228987612401344, "grad_norm": 0.5421935319900513, "learning_rate": 1.764672335607839e-05, "loss": 0.3723, "step": 10000 }, { "epoch": 0.22301021062075446, "grad_norm": 0.5773723125457764, "learning_rate": 1.7644466570509508e-05, "loss": 0.4249, "step": 10005 }, { "epoch": 0.22312166000137454, "grad_norm": 0.425523579120636, "learning_rate": 1.764220884778803e-05, "loss": 0.4544, "step": 10010 }, { "epoch": 0.22323310938199462, "grad_norm": 0.5259734392166138, "learning_rate": 1.7639950188190735e-05, "loss": 0.3608, "step": 10015 }, { "epoch": 0.22334455876261466, "grad_norm": 0.566461980342865, "learning_rate": 1.763769059199452e-05, "loss": 0.3499, "step": 10020 }, { "epoch": 0.22345600814323474, "grad_norm": 0.5393753051757812, "learning_rate": 1.7635430059476396e-05, "loss": 0.2665, "step": 10025 }, { "epoch": 0.22356745752385482, "grad_norm": 0.5863169431686401, "learning_rate": 1.763316859091348e-05, "loss": 0.4295, "step": 10030 }, { "epoch": 0.22367890690447487, "grad_norm": 0.5091935396194458, "learning_rate": 1.7630906186583012e-05, "loss": 0.4251, "step": 10035 }, { "epoch": 0.22379035628509494, "grad_norm": 0.5334644913673401, "learning_rate": 1.7628642846762348e-05, "loss": 0.4067, "step": 10040 }, { "epoch": 0.22390180566571502, "grad_norm": 0.5561589598655701, "learning_rate": 1.7626378571728958e-05, "loss": 0.3785, "step": 10045 }, { "epoch": 0.22401325504633507, "grad_norm": 0.4826967120170593, "learning_rate": 1.7624113361760418e-05, "loss": 0.3507, "step": 10050 }, { "epoch": 0.22412470442695515, "grad_norm": 0.5006000399589539, "learning_rate": 1.762184721713443e-05, "loss": 0.3652, "step": 10055 }, { "epoch": 0.22423615380757522, "grad_norm": 0.7852641344070435, "learning_rate": 1.7619580138128805e-05, "loss": 0.2905, "step": 10060 }, { "epoch": 0.22434760318819527, "grad_norm": 0.7374182939529419, "learning_rate": 1.7617312125021468e-05, "loss": 0.3224, "step": 10065 }, { "epoch": 0.22445905256881535, "grad_norm": 0.5154327750205994, "learning_rate": 1.7615043178090464e-05, "loss": 0.2448, "step": 10070 }, { "epoch": 0.22457050194943542, "grad_norm": 0.6623854637145996, "learning_rate": 1.7612773297613945e-05, "loss": 0.2127, "step": 10075 }, { "epoch": 0.22468195133005547, "grad_norm": 0.4166138172149658, "learning_rate": 1.761050248387018e-05, "loss": 0.4585, "step": 10080 }, { "epoch": 0.22479340071067555, "grad_norm": 0.5983569622039795, "learning_rate": 1.7608230737137555e-05, "loss": 0.2964, "step": 10085 }, { "epoch": 0.22490485009129563, "grad_norm": 0.6410354971885681, "learning_rate": 1.7605958057694564e-05, "loss": 0.2499, "step": 10090 }, { "epoch": 0.22501629947191568, "grad_norm": 0.5239635109901428, "learning_rate": 1.7603684445819832e-05, "loss": 0.3103, "step": 10095 }, { "epoch": 0.22512774885253575, "grad_norm": 0.702729344367981, "learning_rate": 1.7601409901792074e-05, "loss": 0.4165, "step": 10100 }, { "epoch": 0.22523919823315583, "grad_norm": 0.5636135935783386, "learning_rate": 1.7599134425890136e-05, "loss": 0.4647, "step": 10105 }, { "epoch": 0.22535064761377588, "grad_norm": 0.5451076030731201, "learning_rate": 1.7596858018392974e-05, "loss": 0.3212, "step": 10110 }, { "epoch": 0.22546209699439596, "grad_norm": 0.849031925201416, "learning_rate": 1.7594580679579654e-05, "loss": 0.3979, "step": 10115 }, { "epoch": 0.22557354637501603, "grad_norm": 0.7917623519897461, "learning_rate": 1.759230240972937e-05, "loss": 0.3505, "step": 10120 }, { "epoch": 0.22568499575563608, "grad_norm": 0.5842856168746948, "learning_rate": 1.7590023209121412e-05, "loss": 0.3653, "step": 10125 }, { "epoch": 0.22579644513625616, "grad_norm": 0.5458952188491821, "learning_rate": 1.7587743078035196e-05, "loss": 0.4657, "step": 10130 }, { "epoch": 0.22590789451687623, "grad_norm": 0.3667095899581909, "learning_rate": 1.7585462016750245e-05, "loss": 0.313, "step": 10135 }, { "epoch": 0.22601934389749628, "grad_norm": 0.7304571270942688, "learning_rate": 1.7583180025546202e-05, "loss": 0.3666, "step": 10140 }, { "epoch": 0.22613079327811636, "grad_norm": 0.5201693177223206, "learning_rate": 1.7580897104702818e-05, "loss": 0.4025, "step": 10145 }, { "epoch": 0.22624224265873644, "grad_norm": 0.5686962604522705, "learning_rate": 1.757861325449997e-05, "loss": 0.3395, "step": 10150 }, { "epoch": 0.22635369203935649, "grad_norm": 0.5492640733718872, "learning_rate": 1.757632847521763e-05, "loss": 0.2222, "step": 10155 }, { "epoch": 0.22646514141997656, "grad_norm": 0.5925264358520508, "learning_rate": 1.75740427671359e-05, "loss": 0.334, "step": 10160 }, { "epoch": 0.22657659080059664, "grad_norm": 0.6872429251670837, "learning_rate": 1.7571756130534994e-05, "loss": 0.3493, "step": 10165 }, { "epoch": 0.2266880401812167, "grad_norm": 0.7906669974327087, "learning_rate": 1.7569468565695227e-05, "loss": 0.4535, "step": 10170 }, { "epoch": 0.22679948956183676, "grad_norm": 0.5554289221763611, "learning_rate": 1.7567180072897043e-05, "loss": 0.305, "step": 10175 }, { "epoch": 0.2269109389424568, "grad_norm": 0.3369906544685364, "learning_rate": 1.7564890652420993e-05, "loss": 0.37, "step": 10180 }, { "epoch": 0.2270223883230769, "grad_norm": 0.5694258809089661, "learning_rate": 1.7562600304547735e-05, "loss": 0.3359, "step": 10185 }, { "epoch": 0.22713383770369697, "grad_norm": 0.38094398379325867, "learning_rate": 1.756030902955806e-05, "loss": 0.454, "step": 10190 }, { "epoch": 0.22724528708431702, "grad_norm": 0.5839552879333496, "learning_rate": 1.755801682773285e-05, "loss": 0.3097, "step": 10195 }, { "epoch": 0.2273567364649371, "grad_norm": 0.5562794804573059, "learning_rate": 1.7555723699353124e-05, "loss": 0.3604, "step": 10200 }, { "epoch": 0.22746818584555717, "grad_norm": 0.6823106408119202, "learning_rate": 1.7553429644699988e-05, "loss": 0.3474, "step": 10205 }, { "epoch": 0.22757963522617722, "grad_norm": 0.618422269821167, "learning_rate": 1.755113466405468e-05, "loss": 0.5486, "step": 10210 }, { "epoch": 0.2276910846067973, "grad_norm": 0.5379924178123474, "learning_rate": 1.754883875769855e-05, "loss": 0.5348, "step": 10215 }, { "epoch": 0.22780253398741737, "grad_norm": 0.617397129535675, "learning_rate": 1.7546541925913054e-05, "loss": 0.3263, "step": 10220 }, { "epoch": 0.22791398336803742, "grad_norm": 0.5682933926582336, "learning_rate": 1.754424416897977e-05, "loss": 0.4884, "step": 10225 }, { "epoch": 0.2280254327486575, "grad_norm": 0.5430388450622559, "learning_rate": 1.7541945487180383e-05, "loss": 0.3395, "step": 10230 }, { "epoch": 0.22813688212927757, "grad_norm": 0.7244122624397278, "learning_rate": 1.7539645880796694e-05, "loss": 0.4879, "step": 10235 }, { "epoch": 0.22824833150989762, "grad_norm": 0.37884101271629333, "learning_rate": 1.753734535011062e-05, "loss": 0.3292, "step": 10240 }, { "epoch": 0.2283597808905177, "grad_norm": 0.5905236601829529, "learning_rate": 1.753504389540418e-05, "loss": 0.3041, "step": 10245 }, { "epoch": 0.22847123027113778, "grad_norm": 0.4799133837223053, "learning_rate": 1.7532741516959527e-05, "loss": 0.3097, "step": 10250 }, { "epoch": 0.22858267965175783, "grad_norm": 0.6521375775337219, "learning_rate": 1.7530438215058902e-05, "loss": 0.4394, "step": 10255 }, { "epoch": 0.2286941290323779, "grad_norm": 0.6131927967071533, "learning_rate": 1.752813398998468e-05, "loss": 0.365, "step": 10260 }, { "epoch": 0.22880557841299798, "grad_norm": 0.6607373356819153, "learning_rate": 1.752582884201934e-05, "loss": 0.4046, "step": 10265 }, { "epoch": 0.22891702779361803, "grad_norm": 0.5813794136047363, "learning_rate": 1.7523522771445475e-05, "loss": 0.3887, "step": 10270 }, { "epoch": 0.2290284771742381, "grad_norm": 0.6334280967712402, "learning_rate": 1.752121577854579e-05, "loss": 0.4146, "step": 10275 }, { "epoch": 0.22913992655485818, "grad_norm": 0.5421414375305176, "learning_rate": 1.7518907863603102e-05, "loss": 0.3437, "step": 10280 }, { "epoch": 0.22925137593547823, "grad_norm": 0.5887249112129211, "learning_rate": 1.7516599026900352e-05, "loss": 0.328, "step": 10285 }, { "epoch": 0.2293628253160983, "grad_norm": 0.6338925957679749, "learning_rate": 1.751428926872058e-05, "loss": 0.3079, "step": 10290 }, { "epoch": 0.22947427469671838, "grad_norm": 0.43777555227279663, "learning_rate": 1.751197858934694e-05, "loss": 0.2509, "step": 10295 }, { "epoch": 0.22958572407733843, "grad_norm": 0.4322524666786194, "learning_rate": 1.7509666989062713e-05, "loss": 0.3394, "step": 10300 }, { "epoch": 0.2296971734579585, "grad_norm": 0.39146319031715393, "learning_rate": 1.750735446815128e-05, "loss": 0.3528, "step": 10305 }, { "epoch": 0.22980862283857859, "grad_norm": 0.9868649244308472, "learning_rate": 1.7505041026896133e-05, "loss": 0.3424, "step": 10310 }, { "epoch": 0.22992007221919863, "grad_norm": 0.6405900716781616, "learning_rate": 1.7502726665580887e-05, "loss": 0.4043, "step": 10315 }, { "epoch": 0.2300315215998187, "grad_norm": 0.4962483048439026, "learning_rate": 1.7500411384489267e-05, "loss": 0.3992, "step": 10320 }, { "epoch": 0.2301429709804388, "grad_norm": 0.5950126051902771, "learning_rate": 1.74980951839051e-05, "loss": 0.2763, "step": 10325 }, { "epoch": 0.23025442036105884, "grad_norm": 0.6838284134864807, "learning_rate": 1.7495778064112347e-05, "loss": 0.409, "step": 10330 }, { "epoch": 0.2303658697416789, "grad_norm": 0.6459113359451294, "learning_rate": 1.7493460025395056e-05, "loss": 0.3499, "step": 10335 }, { "epoch": 0.230477319122299, "grad_norm": 0.5087669491767883, "learning_rate": 1.749114106803741e-05, "loss": 0.2831, "step": 10340 }, { "epoch": 0.23058876850291904, "grad_norm": 0.5966417789459229, "learning_rate": 1.748882119232369e-05, "loss": 0.3628, "step": 10345 }, { "epoch": 0.23070021788353912, "grad_norm": 0.6546626687049866, "learning_rate": 1.7486500398538298e-05, "loss": 0.4678, "step": 10350 }, { "epoch": 0.2308116672641592, "grad_norm": 0.7793821692466736, "learning_rate": 1.7484178686965744e-05, "loss": 0.4621, "step": 10355 }, { "epoch": 0.23092311664477924, "grad_norm": 0.4167221486568451, "learning_rate": 1.7481856057890652e-05, "loss": 0.3032, "step": 10360 }, { "epoch": 0.23103456602539932, "grad_norm": 0.5104589462280273, "learning_rate": 1.7479532511597758e-05, "loss": 0.3208, "step": 10365 }, { "epoch": 0.23114601540601937, "grad_norm": 0.5882985591888428, "learning_rate": 1.7477208048371908e-05, "loss": 0.3434, "step": 10370 }, { "epoch": 0.23125746478663944, "grad_norm": 0.4183305501937866, "learning_rate": 1.7474882668498072e-05, "loss": 0.291, "step": 10375 }, { "epoch": 0.23136891416725952, "grad_norm": 0.40787050127983093, "learning_rate": 1.7472556372261316e-05, "loss": 0.2915, "step": 10380 }, { "epoch": 0.23148036354787957, "grad_norm": 0.621015727519989, "learning_rate": 1.7470229159946827e-05, "loss": 0.3706, "step": 10385 }, { "epoch": 0.23159181292849965, "grad_norm": 0.5765045881271362, "learning_rate": 1.7467901031839906e-05, "loss": 0.3946, "step": 10390 }, { "epoch": 0.23170326230911972, "grad_norm": 0.5098361968994141, "learning_rate": 1.746557198822596e-05, "loss": 0.3383, "step": 10395 }, { "epoch": 0.23181471168973977, "grad_norm": 0.5407850742340088, "learning_rate": 1.7463242029390516e-05, "loss": 0.3399, "step": 10400 }, { "epoch": 0.23192616107035985, "grad_norm": 0.5743176937103271, "learning_rate": 1.7460911155619205e-05, "loss": 0.3745, "step": 10405 }, { "epoch": 0.23203761045097993, "grad_norm": 0.6098573207855225, "learning_rate": 1.7458579367197773e-05, "loss": 0.3642, "step": 10410 }, { "epoch": 0.23214905983159997, "grad_norm": 0.7372178435325623, "learning_rate": 1.7456246664412085e-05, "loss": 0.3546, "step": 10415 }, { "epoch": 0.23226050921222005, "grad_norm": 0.6569801568984985, "learning_rate": 1.745391304754811e-05, "loss": 0.3821, "step": 10420 }, { "epoch": 0.23237195859284013, "grad_norm": 0.6863839030265808, "learning_rate": 1.745157851689193e-05, "loss": 0.3249, "step": 10425 }, { "epoch": 0.23248340797346018, "grad_norm": 0.6198316216468811, "learning_rate": 1.744924307272974e-05, "loss": 0.3759, "step": 10430 }, { "epoch": 0.23259485735408025, "grad_norm": 0.4969221353530884, "learning_rate": 1.7446906715347852e-05, "loss": 0.2658, "step": 10435 }, { "epoch": 0.23270630673470033, "grad_norm": 0.7487539649009705, "learning_rate": 1.7444569445032677e-05, "loss": 0.3694, "step": 10440 }, { "epoch": 0.23281775611532038, "grad_norm": 0.7681021690368652, "learning_rate": 1.7442231262070756e-05, "loss": 0.3903, "step": 10445 }, { "epoch": 0.23292920549594046, "grad_norm": 0.6074617505073547, "learning_rate": 1.7439892166748726e-05, "loss": 0.2939, "step": 10450 }, { "epoch": 0.23304065487656053, "grad_norm": 0.46127086877822876, "learning_rate": 1.7437552159353338e-05, "loss": 0.3318, "step": 10455 }, { "epoch": 0.23315210425718058, "grad_norm": 0.6152943968772888, "learning_rate": 1.743521124017147e-05, "loss": 0.3491, "step": 10460 }, { "epoch": 0.23326355363780066, "grad_norm": 0.6032249927520752, "learning_rate": 1.7432869409490095e-05, "loss": 0.3311, "step": 10465 }, { "epoch": 0.23337500301842073, "grad_norm": 0.7437279224395752, "learning_rate": 1.7430526667596305e-05, "loss": 0.414, "step": 10470 }, { "epoch": 0.23348645239904078, "grad_norm": 0.7488385438919067, "learning_rate": 1.7428183014777293e-05, "loss": 0.3895, "step": 10475 }, { "epoch": 0.23359790177966086, "grad_norm": 0.6262264251708984, "learning_rate": 1.7425838451320387e-05, "loss": 0.3258, "step": 10480 }, { "epoch": 0.23370935116028094, "grad_norm": 0.7204452157020569, "learning_rate": 1.7423492977513004e-05, "loss": 0.4216, "step": 10485 }, { "epoch": 0.23382080054090099, "grad_norm": 0.462632954120636, "learning_rate": 1.7421146593642683e-05, "loss": 0.3316, "step": 10490 }, { "epoch": 0.23393224992152106, "grad_norm": 0.758755624294281, "learning_rate": 1.741879929999707e-05, "loss": 0.4178, "step": 10495 }, { "epoch": 0.23404369930214114, "grad_norm": 0.46575212478637695, "learning_rate": 1.7416451096863928e-05, "loss": 0.452, "step": 10500 }, { "epoch": 0.2341551486827612, "grad_norm": 0.7237605452537537, "learning_rate": 1.7414101984531127e-05, "loss": 0.3371, "step": 10505 }, { "epoch": 0.23426659806338127, "grad_norm": 0.49028345942497253, "learning_rate": 1.741175196328665e-05, "loss": 0.3716, "step": 10510 }, { "epoch": 0.23437804744400134, "grad_norm": 0.5918816328048706, "learning_rate": 1.7409401033418596e-05, "loss": 0.4963, "step": 10515 }, { "epoch": 0.2344894968246214, "grad_norm": 0.48566198348999023, "learning_rate": 1.7407049195215167e-05, "loss": 0.3863, "step": 10520 }, { "epoch": 0.23460094620524147, "grad_norm": 0.6173545718193054, "learning_rate": 1.7404696448964676e-05, "loss": 0.3703, "step": 10525 }, { "epoch": 0.23471239558586154, "grad_norm": 0.6073485612869263, "learning_rate": 1.740234279495556e-05, "loss": 0.4293, "step": 10530 }, { "epoch": 0.2348238449664816, "grad_norm": 0.5335012078285217, "learning_rate": 1.739998823347635e-05, "loss": 0.3025, "step": 10535 }, { "epoch": 0.23493529434710167, "grad_norm": 0.5193667411804199, "learning_rate": 1.7397632764815706e-05, "loss": 0.4183, "step": 10540 }, { "epoch": 0.23504674372772175, "grad_norm": 0.580973207950592, "learning_rate": 1.7395276389262387e-05, "loss": 0.402, "step": 10545 }, { "epoch": 0.2351581931083418, "grad_norm": 0.6126024723052979, "learning_rate": 1.7392919107105262e-05, "loss": 0.3532, "step": 10550 }, { "epoch": 0.23526964248896187, "grad_norm": 0.5873859524726868, "learning_rate": 1.739056091863332e-05, "loss": 0.4641, "step": 10555 }, { "epoch": 0.23538109186958192, "grad_norm": 0.721373975276947, "learning_rate": 1.7388201824135658e-05, "loss": 0.3835, "step": 10560 }, { "epoch": 0.235492541250202, "grad_norm": 0.5195067524909973, "learning_rate": 1.7385841823901478e-05, "loss": 0.4342, "step": 10565 }, { "epoch": 0.23560399063082207, "grad_norm": 0.5306962132453918, "learning_rate": 1.73834809182201e-05, "loss": 0.2926, "step": 10570 }, { "epoch": 0.23571544001144212, "grad_norm": 0.6120619177818298, "learning_rate": 1.7381119107380956e-05, "loss": 0.3548, "step": 10575 }, { "epoch": 0.2358268893920622, "grad_norm": 0.669775128364563, "learning_rate": 1.7378756391673578e-05, "loss": 0.4739, "step": 10580 }, { "epoch": 0.23593833877268228, "grad_norm": 0.4187053442001343, "learning_rate": 1.7376392771387623e-05, "loss": 0.2801, "step": 10585 }, { "epoch": 0.23604978815330233, "grad_norm": 0.4181480407714844, "learning_rate": 1.7374028246812855e-05, "loss": 0.4613, "step": 10590 }, { "epoch": 0.2361612375339224, "grad_norm": 0.48722395300865173, "learning_rate": 1.737166281823914e-05, "loss": 0.3843, "step": 10595 }, { "epoch": 0.23627268691454248, "grad_norm": 0.4884631931781769, "learning_rate": 1.7369296485956465e-05, "loss": 0.3571, "step": 10600 }, { "epoch": 0.23638413629516253, "grad_norm": 0.568600594997406, "learning_rate": 1.7366929250254926e-05, "loss": 0.3935, "step": 10605 }, { "epoch": 0.2364955856757826, "grad_norm": 0.5506514310836792, "learning_rate": 1.7364561111424717e-05, "loss": 0.4381, "step": 10610 }, { "epoch": 0.23660703505640268, "grad_norm": 0.5655339956283569, "learning_rate": 1.7362192069756168e-05, "loss": 0.3502, "step": 10615 }, { "epoch": 0.23671848443702273, "grad_norm": 0.5412592887878418, "learning_rate": 1.7359822125539695e-05, "loss": 0.342, "step": 10620 }, { "epoch": 0.2368299338176428, "grad_norm": 0.6305738687515259, "learning_rate": 1.735745127906584e-05, "loss": 0.2947, "step": 10625 }, { "epoch": 0.23694138319826288, "grad_norm": 0.8514442443847656, "learning_rate": 1.735507953062525e-05, "loss": 0.2562, "step": 10630 }, { "epoch": 0.23705283257888293, "grad_norm": 0.6014148592948914, "learning_rate": 1.735270688050868e-05, "loss": 0.3063, "step": 10635 }, { "epoch": 0.237164281959503, "grad_norm": 0.5961599349975586, "learning_rate": 1.7350333329007e-05, "loss": 0.2601, "step": 10640 }, { "epoch": 0.23727573134012309, "grad_norm": 0.5256707668304443, "learning_rate": 1.7347958876411196e-05, "loss": 0.3067, "step": 10645 }, { "epoch": 0.23738718072074314, "grad_norm": 0.45404037833213806, "learning_rate": 1.7345583523012344e-05, "loss": 0.3712, "step": 10650 }, { "epoch": 0.2374986301013632, "grad_norm": 0.5770034790039062, "learning_rate": 1.7343207269101655e-05, "loss": 0.391, "step": 10655 }, { "epoch": 0.2376100794819833, "grad_norm": 0.5898873805999756, "learning_rate": 1.7340830114970435e-05, "loss": 0.4271, "step": 10660 }, { "epoch": 0.23772152886260334, "grad_norm": 0.5621415972709656, "learning_rate": 1.7338452060910104e-05, "loss": 0.292, "step": 10665 }, { "epoch": 0.23783297824322341, "grad_norm": 0.42645710706710815, "learning_rate": 1.7336073107212197e-05, "loss": 0.4255, "step": 10670 }, { "epoch": 0.2379444276238435, "grad_norm": 0.678278386592865, "learning_rate": 1.733369325416835e-05, "loss": 0.4151, "step": 10675 }, { "epoch": 0.23805587700446354, "grad_norm": 0.6612429618835449, "learning_rate": 1.733131250207032e-05, "loss": 0.3753, "step": 10680 }, { "epoch": 0.23816732638508362, "grad_norm": 0.6525142192840576, "learning_rate": 1.7328930851209963e-05, "loss": 0.312, "step": 10685 }, { "epoch": 0.2382787757657037, "grad_norm": 0.5441312193870544, "learning_rate": 1.7326548301879258e-05, "loss": 0.3952, "step": 10690 }, { "epoch": 0.23839022514632374, "grad_norm": 0.600470781326294, "learning_rate": 1.7324164854370283e-05, "loss": 0.3412, "step": 10695 }, { "epoch": 0.23850167452694382, "grad_norm": 0.5109327435493469, "learning_rate": 1.7321780508975226e-05, "loss": 0.3853, "step": 10700 }, { "epoch": 0.2386131239075639, "grad_norm": 0.6196373701095581, "learning_rate": 1.73193952659864e-05, "loss": 0.4093, "step": 10705 }, { "epoch": 0.23872457328818394, "grad_norm": 0.5077962279319763, "learning_rate": 1.7317009125696208e-05, "loss": 0.2362, "step": 10710 }, { "epoch": 0.23883602266880402, "grad_norm": 0.6553756594657898, "learning_rate": 1.7314622088397177e-05, "loss": 0.3759, "step": 10715 }, { "epoch": 0.2389474720494241, "grad_norm": 0.5542757511138916, "learning_rate": 1.731223415438194e-05, "loss": 0.3064, "step": 10720 }, { "epoch": 0.23905892143004415, "grad_norm": 0.6027953028678894, "learning_rate": 1.7309845323943236e-05, "loss": 0.3629, "step": 10725 }, { "epoch": 0.23917037081066422, "grad_norm": 0.7495735287666321, "learning_rate": 1.7307455597373916e-05, "loss": 0.3856, "step": 10730 }, { "epoch": 0.2392818201912843, "grad_norm": 0.6348993182182312, "learning_rate": 1.7305064974966946e-05, "loss": 0.4807, "step": 10735 }, { "epoch": 0.23939326957190435, "grad_norm": 0.6500808596611023, "learning_rate": 1.73026734570154e-05, "loss": 0.3925, "step": 10740 }, { "epoch": 0.23950471895252443, "grad_norm": 0.47195371985435486, "learning_rate": 1.7300281043812453e-05, "loss": 0.2649, "step": 10745 }, { "epoch": 0.23961616833314447, "grad_norm": 0.59906005859375, "learning_rate": 1.72978877356514e-05, "loss": 0.3296, "step": 10750 }, { "epoch": 0.23972761771376455, "grad_norm": 0.479704350233078, "learning_rate": 1.7295493532825643e-05, "loss": 0.3996, "step": 10755 }, { "epoch": 0.23983906709438463, "grad_norm": 0.5302325487136841, "learning_rate": 1.729309843562869e-05, "loss": 0.2719, "step": 10760 }, { "epoch": 0.23995051647500468, "grad_norm": 0.4929358959197998, "learning_rate": 1.729070244435416e-05, "loss": 0.3027, "step": 10765 }, { "epoch": 0.24006196585562475, "grad_norm": 0.5257954597473145, "learning_rate": 1.7288305559295793e-05, "loss": 0.344, "step": 10770 }, { "epoch": 0.24017341523624483, "grad_norm": 0.5391680598258972, "learning_rate": 1.7285907780747417e-05, "loss": 0.3992, "step": 10775 }, { "epoch": 0.24028486461686488, "grad_norm": 0.49891191720962524, "learning_rate": 1.728350910900299e-05, "loss": 0.4104, "step": 10780 }, { "epoch": 0.24039631399748496, "grad_norm": 0.6244639158248901, "learning_rate": 1.7281109544356556e-05, "loss": 0.4117, "step": 10785 }, { "epoch": 0.24050776337810503, "grad_norm": 0.5571478009223938, "learning_rate": 1.72787090871023e-05, "loss": 0.4336, "step": 10790 }, { "epoch": 0.24061921275872508, "grad_norm": 0.5005432963371277, "learning_rate": 1.7276307737534496e-05, "loss": 0.461, "step": 10795 }, { "epoch": 0.24073066213934516, "grad_norm": 0.8142117261886597, "learning_rate": 1.7273905495947522e-05, "loss": 0.3993, "step": 10800 }, { "epoch": 0.24084211151996524, "grad_norm": 0.6402087807655334, "learning_rate": 1.7271502362635883e-05, "loss": 0.3807, "step": 10805 }, { "epoch": 0.24095356090058528, "grad_norm": 0.37803009152412415, "learning_rate": 1.726909833789418e-05, "loss": 0.3521, "step": 10810 }, { "epoch": 0.24106501028120536, "grad_norm": 0.3475203216075897, "learning_rate": 1.7266693422017133e-05, "loss": 0.5104, "step": 10815 }, { "epoch": 0.24117645966182544, "grad_norm": 0.7365702986717224, "learning_rate": 1.726428761529956e-05, "loss": 0.3797, "step": 10820 }, { "epoch": 0.2412879090424455, "grad_norm": 0.7421954870223999, "learning_rate": 1.72618809180364e-05, "loss": 0.3804, "step": 10825 }, { "epoch": 0.24139935842306556, "grad_norm": 0.40259018540382385, "learning_rate": 1.725947333052269e-05, "loss": 0.4523, "step": 10830 }, { "epoch": 0.24151080780368564, "grad_norm": 0.548994243144989, "learning_rate": 1.7257064853053586e-05, "loss": 0.3184, "step": 10835 }, { "epoch": 0.2416222571843057, "grad_norm": 0.5792094469070435, "learning_rate": 1.7254655485924346e-05, "loss": 0.5211, "step": 10840 }, { "epoch": 0.24173370656492577, "grad_norm": 0.648899495601654, "learning_rate": 1.7252245229430336e-05, "loss": 0.3558, "step": 10845 }, { "epoch": 0.24184515594554584, "grad_norm": 0.47642436623573303, "learning_rate": 1.7249834083867048e-05, "loss": 0.4062, "step": 10850 }, { "epoch": 0.2419566053261659, "grad_norm": 0.6215870976448059, "learning_rate": 1.7247422049530056e-05, "loss": 0.3595, "step": 10855 }, { "epoch": 0.24206805470678597, "grad_norm": 0.8642063736915588, "learning_rate": 1.7245009126715065e-05, "loss": 0.496, "step": 10860 }, { "epoch": 0.24217950408740604, "grad_norm": 0.682572066783905, "learning_rate": 1.7242595315717876e-05, "loss": 0.3212, "step": 10865 }, { "epoch": 0.2422909534680261, "grad_norm": 0.6249151229858398, "learning_rate": 1.7240180616834407e-05, "loss": 0.479, "step": 10870 }, { "epoch": 0.24240240284864617, "grad_norm": 0.7562622427940369, "learning_rate": 1.723776503036068e-05, "loss": 0.3625, "step": 10875 }, { "epoch": 0.24251385222926625, "grad_norm": 0.53129643201828, "learning_rate": 1.7235348556592826e-05, "loss": 0.2043, "step": 10880 }, { "epoch": 0.2426253016098863, "grad_norm": 0.5382141470909119, "learning_rate": 1.7232931195827086e-05, "loss": 0.3235, "step": 10885 }, { "epoch": 0.24273675099050637, "grad_norm": 0.8118866682052612, "learning_rate": 1.7230512948359816e-05, "loss": 0.2424, "step": 10890 }, { "epoch": 0.24284820037112645, "grad_norm": 0.7187613248825073, "learning_rate": 1.7228093814487464e-05, "loss": 0.4476, "step": 10895 }, { "epoch": 0.2429596497517465, "grad_norm": 0.5982451438903809, "learning_rate": 1.7225673794506604e-05, "loss": 0.2502, "step": 10900 }, { "epoch": 0.24307109913236657, "grad_norm": 0.6305253505706787, "learning_rate": 1.7223252888713914e-05, "loss": 0.2767, "step": 10905 }, { "epoch": 0.24318254851298665, "grad_norm": 0.4656547009944916, "learning_rate": 1.722083109740617e-05, "loss": 0.3105, "step": 10910 }, { "epoch": 0.2432939978936067, "grad_norm": 0.411578506231308, "learning_rate": 1.7218408420880273e-05, "loss": 0.3901, "step": 10915 }, { "epoch": 0.24340544727422678, "grad_norm": 0.43576860427856445, "learning_rate": 1.721598485943322e-05, "loss": 0.2305, "step": 10920 }, { "epoch": 0.24351689665484685, "grad_norm": 0.5072635412216187, "learning_rate": 1.7213560413362122e-05, "loss": 0.3357, "step": 10925 }, { "epoch": 0.2436283460354669, "grad_norm": 0.7047842741012573, "learning_rate": 1.7211135082964198e-05, "loss": 0.3911, "step": 10930 }, { "epoch": 0.24373979541608698, "grad_norm": 0.36933034658432007, "learning_rate": 1.7208708868536775e-05, "loss": 0.3613, "step": 10935 }, { "epoch": 0.24385124479670703, "grad_norm": 0.6863102912902832, "learning_rate": 1.7206281770377285e-05, "loss": 0.3389, "step": 10940 }, { "epoch": 0.2439626941773271, "grad_norm": 0.5595670938491821, "learning_rate": 1.7203853788783275e-05, "loss": 0.2501, "step": 10945 }, { "epoch": 0.24407414355794718, "grad_norm": 0.8564665913581848, "learning_rate": 1.7201424924052396e-05, "loss": 0.3184, "step": 10950 }, { "epoch": 0.24418559293856723, "grad_norm": 0.6190245151519775, "learning_rate": 1.7198995176482407e-05, "loss": 0.3406, "step": 10955 }, { "epoch": 0.2442970423191873, "grad_norm": 0.5113375782966614, "learning_rate": 1.7196564546371178e-05, "loss": 0.3252, "step": 10960 }, { "epoch": 0.24440849169980738, "grad_norm": 0.5863358974456787, "learning_rate": 1.7194133034016686e-05, "loss": 0.288, "step": 10965 }, { "epoch": 0.24451994108042743, "grad_norm": 0.5502198934555054, "learning_rate": 1.7191700639717012e-05, "loss": 0.431, "step": 10970 }, { "epoch": 0.2446313904610475, "grad_norm": 0.5362652540206909, "learning_rate": 1.7189267363770352e-05, "loss": 0.4535, "step": 10975 }, { "epoch": 0.2447428398416676, "grad_norm": 0.9066315293312073, "learning_rate": 1.7186833206475004e-05, "loss": 0.325, "step": 10980 }, { "epoch": 0.24485428922228764, "grad_norm": 0.7099729776382446, "learning_rate": 1.7184398168129378e-05, "loss": 0.3492, "step": 10985 }, { "epoch": 0.2449657386029077, "grad_norm": 0.4674534201622009, "learning_rate": 1.7181962249031995e-05, "loss": 0.3179, "step": 10990 }, { "epoch": 0.2450771879835278, "grad_norm": 0.48811131715774536, "learning_rate": 1.7179525449481475e-05, "loss": 0.4254, "step": 10995 }, { "epoch": 0.24518863736414784, "grad_norm": 0.6559020280838013, "learning_rate": 1.7177087769776556e-05, "loss": 0.3824, "step": 11000 }, { "epoch": 0.24530008674476791, "grad_norm": 0.683711051940918, "learning_rate": 1.717464921021607e-05, "loss": 0.3422, "step": 11005 }, { "epoch": 0.245411536125388, "grad_norm": 0.5838513374328613, "learning_rate": 1.7172209771098974e-05, "loss": 0.301, "step": 11010 }, { "epoch": 0.24552298550600804, "grad_norm": 0.6140018105506897, "learning_rate": 1.716976945272432e-05, "loss": 0.3821, "step": 11015 }, { "epoch": 0.24563443488662812, "grad_norm": 0.49847617745399475, "learning_rate": 1.716732825539127e-05, "loss": 0.3379, "step": 11020 }, { "epoch": 0.2457458842672482, "grad_norm": 0.6614333391189575, "learning_rate": 1.7164886179399102e-05, "loss": 0.496, "step": 11025 }, { "epoch": 0.24585733364786824, "grad_norm": 0.3504364490509033, "learning_rate": 1.7162443225047192e-05, "loss": 0.223, "step": 11030 }, { "epoch": 0.24596878302848832, "grad_norm": 0.7113224864006042, "learning_rate": 1.715999939263503e-05, "loss": 0.4054, "step": 11035 }, { "epoch": 0.2460802324091084, "grad_norm": 0.7698032259941101, "learning_rate": 1.715755468246221e-05, "loss": 0.3, "step": 11040 }, { "epoch": 0.24619168178972844, "grad_norm": 0.7142345309257507, "learning_rate": 1.715510909482843e-05, "loss": 0.2923, "step": 11045 }, { "epoch": 0.24630313117034852, "grad_norm": 0.5872173309326172, "learning_rate": 1.7152662630033506e-05, "loss": 0.3531, "step": 11050 }, { "epoch": 0.2464145805509686, "grad_norm": 0.7580005526542664, "learning_rate": 1.7150215288377352e-05, "loss": 0.3401, "step": 11055 }, { "epoch": 0.24652602993158865, "grad_norm": 0.5612766742706299, "learning_rate": 1.7147767070159995e-05, "loss": 0.4963, "step": 11060 }, { "epoch": 0.24663747931220872, "grad_norm": 0.5882358551025391, "learning_rate": 1.714531797568157e-05, "loss": 0.2482, "step": 11065 }, { "epoch": 0.2467489286928288, "grad_norm": 0.4978226125240326, "learning_rate": 1.714286800524231e-05, "loss": 0.4981, "step": 11070 }, { "epoch": 0.24686037807344885, "grad_norm": 0.4974943995475769, "learning_rate": 1.7140417159142572e-05, "loss": 0.2582, "step": 11075 }, { "epoch": 0.24697182745406893, "grad_norm": 0.5015186071395874, "learning_rate": 1.7137965437682803e-05, "loss": 0.3072, "step": 11080 }, { "epoch": 0.247083276834689, "grad_norm": 0.7087413668632507, "learning_rate": 1.713551284116357e-05, "loss": 0.4249, "step": 11085 }, { "epoch": 0.24719472621530905, "grad_norm": 0.5860006213188171, "learning_rate": 1.713305936988554e-05, "loss": 0.3173, "step": 11090 }, { "epoch": 0.24730617559592913, "grad_norm": 0.5131428241729736, "learning_rate": 1.713060502414949e-05, "loss": 0.3511, "step": 11095 }, { "epoch": 0.2474176249765492, "grad_norm": 0.5306171178817749, "learning_rate": 1.7128149804256303e-05, "loss": 0.3353, "step": 11100 }, { "epoch": 0.24752907435716925, "grad_norm": 0.5503345727920532, "learning_rate": 1.7125693710506973e-05, "loss": 0.3021, "step": 11105 }, { "epoch": 0.24764052373778933, "grad_norm": 0.5068588852882385, "learning_rate": 1.7123236743202592e-05, "loss": 0.377, "step": 11110 }, { "epoch": 0.2477519731184094, "grad_norm": 0.5373303294181824, "learning_rate": 1.7120778902644374e-05, "loss": 0.3971, "step": 11115 }, { "epoch": 0.24786342249902946, "grad_norm": 0.594527006149292, "learning_rate": 1.711832018913362e-05, "loss": 0.2949, "step": 11120 }, { "epoch": 0.24797487187964953, "grad_norm": 0.5444244146347046, "learning_rate": 1.7115860602971758e-05, "loss": 0.4187, "step": 11125 }, { "epoch": 0.24808632126026958, "grad_norm": 0.5815946459770203, "learning_rate": 1.7113400144460315e-05, "loss": 0.3258, "step": 11130 }, { "epoch": 0.24819777064088966, "grad_norm": 0.5695800185203552, "learning_rate": 1.711093881390092e-05, "loss": 0.3724, "step": 11135 }, { "epoch": 0.24830922002150974, "grad_norm": 0.7198867201805115, "learning_rate": 1.7108476611595317e-05, "loss": 0.3852, "step": 11140 }, { "epoch": 0.24842066940212978, "grad_norm": 0.9782135486602783, "learning_rate": 1.7106013537845346e-05, "loss": 0.4175, "step": 11145 }, { "epoch": 0.24853211878274986, "grad_norm": 0.7631874084472656, "learning_rate": 1.7103549592952967e-05, "loss": 0.4098, "step": 11150 }, { "epoch": 0.24864356816336994, "grad_norm": 0.6426448822021484, "learning_rate": 1.7101084777220242e-05, "loss": 0.3423, "step": 11155 }, { "epoch": 0.24875501754399, "grad_norm": 0.45032215118408203, "learning_rate": 1.709861909094933e-05, "loss": 0.2994, "step": 11160 }, { "epoch": 0.24886646692461006, "grad_norm": 0.5834007263183594, "learning_rate": 1.7096152534442515e-05, "loss": 0.3597, "step": 11165 }, { "epoch": 0.24897791630523014, "grad_norm": 0.49693334102630615, "learning_rate": 1.709368510800217e-05, "loss": 0.5088, "step": 11170 }, { "epoch": 0.2490893656858502, "grad_norm": 0.5814831852912903, "learning_rate": 1.7091216811930788e-05, "loss": 0.2275, "step": 11175 }, { "epoch": 0.24920081506647027, "grad_norm": 0.6035648584365845, "learning_rate": 1.7088747646530958e-05, "loss": 0.4597, "step": 11180 }, { "epoch": 0.24931226444709034, "grad_norm": 0.8806386590003967, "learning_rate": 1.7086277612105384e-05, "loss": 0.4899, "step": 11185 }, { "epoch": 0.2494237138277104, "grad_norm": 0.7509183287620544, "learning_rate": 1.708380670895687e-05, "loss": 0.351, "step": 11190 }, { "epoch": 0.24953516320833047, "grad_norm": 0.7867349982261658, "learning_rate": 1.7081334937388335e-05, "loss": 0.4327, "step": 11195 }, { "epoch": 0.24964661258895054, "grad_norm": 0.4728323221206665, "learning_rate": 1.7078862297702797e-05, "loss": 0.2547, "step": 11200 }, { "epoch": 0.2497580619695706, "grad_norm": 0.6497337222099304, "learning_rate": 1.707638879020338e-05, "loss": 0.4015, "step": 11205 }, { "epoch": 0.24986951135019067, "grad_norm": 0.6320534944534302, "learning_rate": 1.7073914415193322e-05, "loss": 0.3609, "step": 11210 }, { "epoch": 0.24998096073081075, "grad_norm": 0.5705777406692505, "learning_rate": 1.7071439172975956e-05, "loss": 0.2253, "step": 11215 }, { "epoch": 0.2500924101114308, "grad_norm": 0.40593892335891724, "learning_rate": 1.706896306385473e-05, "loss": 0.3606, "step": 11220 }, { "epoch": 0.2502038594920509, "grad_norm": 0.5855808854103088, "learning_rate": 1.7066486088133197e-05, "loss": 0.3391, "step": 11225 }, { "epoch": 0.25031530887267095, "grad_norm": 0.5672227740287781, "learning_rate": 1.7064008246115014e-05, "loss": 0.4191, "step": 11230 }, { "epoch": 0.250426758253291, "grad_norm": 0.5538031458854675, "learning_rate": 1.706152953810395e-05, "loss": 0.3872, "step": 11235 }, { "epoch": 0.25053820763391105, "grad_norm": 0.8302563428878784, "learning_rate": 1.7059049964403868e-05, "loss": 0.3195, "step": 11240 }, { "epoch": 0.2506496570145311, "grad_norm": 0.506974458694458, "learning_rate": 1.705656952531875e-05, "loss": 0.3242, "step": 11245 }, { "epoch": 0.2507611063951512, "grad_norm": 0.5065387487411499, "learning_rate": 1.7054088221152673e-05, "loss": 0.4196, "step": 11250 }, { "epoch": 0.2508725557757713, "grad_norm": 0.5343696475028992, "learning_rate": 1.705160605220983e-05, "loss": 0.3731, "step": 11255 }, { "epoch": 0.25098400515639135, "grad_norm": 0.7063585519790649, "learning_rate": 1.7049123018794515e-05, "loss": 0.3776, "step": 11260 }, { "epoch": 0.25109545453701143, "grad_norm": 0.49221959710121155, "learning_rate": 1.7046639121211127e-05, "loss": 0.3237, "step": 11265 }, { "epoch": 0.25120690391763145, "grad_norm": 0.7863365411758423, "learning_rate": 1.704415435976418e-05, "loss": 0.4793, "step": 11270 }, { "epoch": 0.25131835329825153, "grad_norm": 0.44239675998687744, "learning_rate": 1.7041668734758275e-05, "loss": 0.3187, "step": 11275 }, { "epoch": 0.2514298026788716, "grad_norm": 0.41563260555267334, "learning_rate": 1.7039182246498143e-05, "loss": 0.3485, "step": 11280 }, { "epoch": 0.2515412520594917, "grad_norm": 0.7104516625404358, "learning_rate": 1.7036694895288596e-05, "loss": 0.3551, "step": 11285 }, { "epoch": 0.25165270144011176, "grad_norm": 0.43757396936416626, "learning_rate": 1.7034206681434574e-05, "loss": 0.3258, "step": 11290 }, { "epoch": 0.25176415082073184, "grad_norm": 0.6314382553100586, "learning_rate": 1.7031717605241106e-05, "loss": 0.3952, "step": 11295 }, { "epoch": 0.25187560020135186, "grad_norm": 0.6406825184822083, "learning_rate": 1.7029227667013337e-05, "loss": 0.4376, "step": 11300 }, { "epoch": 0.25198704958197193, "grad_norm": 0.6163500547409058, "learning_rate": 1.702673686705651e-05, "loss": 0.4085, "step": 11305 }, { "epoch": 0.252098498962592, "grad_norm": 0.6222765445709229, "learning_rate": 1.7024245205675986e-05, "loss": 0.3946, "step": 11310 }, { "epoch": 0.2522099483432121, "grad_norm": 0.7866464257240295, "learning_rate": 1.702175268317722e-05, "loss": 0.4104, "step": 11315 }, { "epoch": 0.25232139772383216, "grad_norm": 0.6964669823646545, "learning_rate": 1.701925929986577e-05, "loss": 0.2906, "step": 11320 }, { "epoch": 0.25243284710445224, "grad_norm": 0.6061902642250061, "learning_rate": 1.701676505604731e-05, "loss": 0.3071, "step": 11325 }, { "epoch": 0.25254429648507226, "grad_norm": 0.4792914092540741, "learning_rate": 1.7014269952027617e-05, "loss": 0.4317, "step": 11330 }, { "epoch": 0.25265574586569234, "grad_norm": 0.5628675222396851, "learning_rate": 1.701177398811257e-05, "loss": 0.4445, "step": 11335 }, { "epoch": 0.2527671952463124, "grad_norm": 0.5403448939323425, "learning_rate": 1.7009277164608155e-05, "loss": 0.2302, "step": 11340 }, { "epoch": 0.2528786446269325, "grad_norm": 0.6103495359420776, "learning_rate": 1.7006779481820462e-05, "loss": 0.4181, "step": 11345 }, { "epoch": 0.25299009400755257, "grad_norm": 0.3714889883995056, "learning_rate": 1.700428094005569e-05, "loss": 0.369, "step": 11350 }, { "epoch": 0.25310154338817265, "grad_norm": 0.7734758257865906, "learning_rate": 1.7001781539620135e-05, "loss": 0.5424, "step": 11355 }, { "epoch": 0.25321299276879267, "grad_norm": 0.6347566246986389, "learning_rate": 1.6999281280820214e-05, "loss": 0.4152, "step": 11360 }, { "epoch": 0.25332444214941274, "grad_norm": 0.7742220759391785, "learning_rate": 1.6996780163962432e-05, "loss": 0.3414, "step": 11365 }, { "epoch": 0.2534358915300328, "grad_norm": 0.6309844255447388, "learning_rate": 1.699427818935341e-05, "loss": 0.4047, "step": 11370 }, { "epoch": 0.2535473409106529, "grad_norm": 0.7506263256072998, "learning_rate": 1.6991775357299867e-05, "loss": 0.3317, "step": 11375 }, { "epoch": 0.253658790291273, "grad_norm": 0.6002774834632874, "learning_rate": 1.6989271668108632e-05, "loss": 0.3938, "step": 11380 }, { "epoch": 0.25377023967189305, "grad_norm": 0.6120704412460327, "learning_rate": 1.6986767122086644e-05, "loss": 0.2883, "step": 11385 }, { "epoch": 0.25388168905251307, "grad_norm": 0.5829112529754639, "learning_rate": 1.6984261719540935e-05, "loss": 0.4873, "step": 11390 }, { "epoch": 0.25399313843313315, "grad_norm": 0.5210264921188354, "learning_rate": 1.698175546077865e-05, "loss": 0.3445, "step": 11395 }, { "epoch": 0.2541045878137532, "grad_norm": 0.5267536640167236, "learning_rate": 1.6979248346107037e-05, "loss": 0.2697, "step": 11400 }, { "epoch": 0.2542160371943733, "grad_norm": 0.6789841651916504, "learning_rate": 1.6976740375833444e-05, "loss": 0.2485, "step": 11405 }, { "epoch": 0.2543274865749934, "grad_norm": 0.5412545800209045, "learning_rate": 1.6974231550265338e-05, "loss": 0.3462, "step": 11410 }, { "epoch": 0.2544389359556134, "grad_norm": 0.40079477429389954, "learning_rate": 1.6971721869710275e-05, "loss": 0.4159, "step": 11415 }, { "epoch": 0.2545503853362335, "grad_norm": 0.5569183826446533, "learning_rate": 1.6969211334475923e-05, "loss": 0.3448, "step": 11420 }, { "epoch": 0.25466183471685355, "grad_norm": 0.6156569719314575, "learning_rate": 1.6966699944870052e-05, "loss": 0.2425, "step": 11425 }, { "epoch": 0.25477328409747363, "grad_norm": 0.681559145450592, "learning_rate": 1.696418770120055e-05, "loss": 0.4076, "step": 11430 }, { "epoch": 0.2548847334780937, "grad_norm": 0.6507387161254883, "learning_rate": 1.6961674603775388e-05, "loss": 0.4648, "step": 11435 }, { "epoch": 0.2549961828587138, "grad_norm": 0.5616104602813721, "learning_rate": 1.6959160652902654e-05, "loss": 0.3365, "step": 11440 }, { "epoch": 0.2551076322393338, "grad_norm": 0.689851701259613, "learning_rate": 1.695664584889054e-05, "loss": 0.328, "step": 11445 }, { "epoch": 0.2552190816199539, "grad_norm": 0.6259694695472717, "learning_rate": 1.6954130192047346e-05, "loss": 0.5502, "step": 11450 }, { "epoch": 0.25533053100057396, "grad_norm": 0.6690799593925476, "learning_rate": 1.6951613682681465e-05, "loss": 0.3411, "step": 11455 }, { "epoch": 0.25544198038119403, "grad_norm": 0.6366260647773743, "learning_rate": 1.6949096321101404e-05, "loss": 0.3378, "step": 11460 }, { "epoch": 0.2555534297618141, "grad_norm": 0.578250527381897, "learning_rate": 1.6946578107615774e-05, "loss": 0.4496, "step": 11465 }, { "epoch": 0.2556648791424342, "grad_norm": 0.5326183438301086, "learning_rate": 1.694405904253329e-05, "loss": 0.2232, "step": 11470 }, { "epoch": 0.2557763285230542, "grad_norm": 0.6691266298294067, "learning_rate": 1.6941539126162766e-05, "loss": 0.4133, "step": 11475 }, { "epoch": 0.2558877779036743, "grad_norm": 0.5321334004402161, "learning_rate": 1.6939018358813122e-05, "loss": 0.2617, "step": 11480 }, { "epoch": 0.25599922728429436, "grad_norm": 0.6388453245162964, "learning_rate": 1.6936496740793395e-05, "loss": 0.3585, "step": 11485 }, { "epoch": 0.25611067666491444, "grad_norm": 0.4702873229980469, "learning_rate": 1.69339742724127e-05, "loss": 0.3783, "step": 11490 }, { "epoch": 0.2562221260455345, "grad_norm": 0.6191978454589844, "learning_rate": 1.6931450953980285e-05, "loss": 0.4226, "step": 11495 }, { "epoch": 0.2563335754261546, "grad_norm": 0.46512070298194885, "learning_rate": 1.692892678580549e-05, "loss": 0.3928, "step": 11500 }, { "epoch": 0.2564450248067746, "grad_norm": 0.6981931924819946, "learning_rate": 1.692640176819775e-05, "loss": 0.2401, "step": 11505 }, { "epoch": 0.2565564741873947, "grad_norm": 0.6449040174484253, "learning_rate": 1.692387590146662e-05, "loss": 0.3707, "step": 11510 }, { "epoch": 0.25666792356801477, "grad_norm": 0.6683300733566284, "learning_rate": 1.6921349185921744e-05, "loss": 0.3094, "step": 11515 }, { "epoch": 0.25677937294863484, "grad_norm": 0.4521041810512543, "learning_rate": 1.6918821621872886e-05, "loss": 0.436, "step": 11520 }, { "epoch": 0.2568908223292549, "grad_norm": 0.6098304390907288, "learning_rate": 1.69162932096299e-05, "loss": 0.3315, "step": 11525 }, { "epoch": 0.257002271709875, "grad_norm": 0.4085136950016022, "learning_rate": 1.6913763949502754e-05, "loss": 0.3717, "step": 11530 }, { "epoch": 0.257113721090495, "grad_norm": 0.6487431526184082, "learning_rate": 1.691123384180151e-05, "loss": 0.3725, "step": 11535 }, { "epoch": 0.2572251704711151, "grad_norm": 0.6593152284622192, "learning_rate": 1.690870288683635e-05, "loss": 0.4701, "step": 11540 }, { "epoch": 0.25733661985173517, "grad_norm": 0.5475544929504395, "learning_rate": 1.690617108491754e-05, "loss": 0.3776, "step": 11545 }, { "epoch": 0.25744806923235525, "grad_norm": 0.6540517807006836, "learning_rate": 1.690363843635546e-05, "loss": 0.3261, "step": 11550 }, { "epoch": 0.2575595186129753, "grad_norm": 0.628803014755249, "learning_rate": 1.69011049414606e-05, "loss": 0.4249, "step": 11555 }, { "epoch": 0.2576709679935954, "grad_norm": 0.7063155174255371, "learning_rate": 1.689857060054354e-05, "loss": 0.3277, "step": 11560 }, { "epoch": 0.2577824173742154, "grad_norm": 0.7075435519218445, "learning_rate": 1.689603541391497e-05, "loss": 0.3694, "step": 11565 }, { "epoch": 0.2578938667548355, "grad_norm": 0.5980352759361267, "learning_rate": 1.6893499381885693e-05, "loss": 0.2849, "step": 11570 }, { "epoch": 0.2580053161354556, "grad_norm": 0.7767646908760071, "learning_rate": 1.68909625047666e-05, "loss": 0.3956, "step": 11575 }, { "epoch": 0.25811676551607565, "grad_norm": 1.3503165245056152, "learning_rate": 1.6888424782868692e-05, "loss": 0.3896, "step": 11580 }, { "epoch": 0.25822821489669573, "grad_norm": 0.6707198619842529, "learning_rate": 1.6885886216503077e-05, "loss": 0.3116, "step": 11585 }, { "epoch": 0.2583396642773158, "grad_norm": 0.781755268573761, "learning_rate": 1.6883346805980964e-05, "loss": 0.3923, "step": 11590 }, { "epoch": 0.2584511136579358, "grad_norm": 0.6143796443939209, "learning_rate": 1.6880806551613662e-05, "loss": 0.443, "step": 11595 }, { "epoch": 0.2585625630385559, "grad_norm": 0.6381718516349792, "learning_rate": 1.6878265453712587e-05, "loss": 0.3774, "step": 11600 }, { "epoch": 0.258674012419176, "grad_norm": 0.7067936062812805, "learning_rate": 1.6875723512589264e-05, "loss": 0.4032, "step": 11605 }, { "epoch": 0.25878546179979606, "grad_norm": 0.5545554757118225, "learning_rate": 1.687318072855531e-05, "loss": 0.3526, "step": 11610 }, { "epoch": 0.25889691118041613, "grad_norm": 0.8011837005615234, "learning_rate": 1.687063710192245e-05, "loss": 0.5166, "step": 11615 }, { "epoch": 0.25900836056103616, "grad_norm": 0.5507569909095764, "learning_rate": 1.6868092633002514e-05, "loss": 0.4307, "step": 11620 }, { "epoch": 0.25911980994165623, "grad_norm": 0.2780143916606903, "learning_rate": 1.6865547322107434e-05, "loss": 0.2921, "step": 11625 }, { "epoch": 0.2592312593222763, "grad_norm": 0.44053661823272705, "learning_rate": 1.686300116954925e-05, "loss": 0.3779, "step": 11630 }, { "epoch": 0.2593427087028964, "grad_norm": 0.5424075722694397, "learning_rate": 1.6860454175640094e-05, "loss": 0.4212, "step": 11635 }, { "epoch": 0.25945415808351646, "grad_norm": 0.4896464943885803, "learning_rate": 1.685790634069221e-05, "loss": 0.3013, "step": 11640 }, { "epoch": 0.25956560746413654, "grad_norm": 0.7107864022254944, "learning_rate": 1.6855357665017944e-05, "loss": 0.263, "step": 11645 }, { "epoch": 0.25967705684475656, "grad_norm": 0.6286140084266663, "learning_rate": 1.6852808148929745e-05, "loss": 0.4639, "step": 11650 }, { "epoch": 0.25978850622537664, "grad_norm": 0.6039460301399231, "learning_rate": 1.685025779274016e-05, "loss": 0.3377, "step": 11655 }, { "epoch": 0.2598999556059967, "grad_norm": 0.7437496185302734, "learning_rate": 1.6847706596761848e-05, "loss": 0.3962, "step": 11660 }, { "epoch": 0.2600114049866168, "grad_norm": 0.6204871535301208, "learning_rate": 1.6845154561307562e-05, "loss": 0.1816, "step": 11665 }, { "epoch": 0.26012285436723687, "grad_norm": 0.5137778520584106, "learning_rate": 1.6842601686690162e-05, "loss": 0.3548, "step": 11670 }, { "epoch": 0.26023430374785694, "grad_norm": 0.7160062789916992, "learning_rate": 1.6840047973222615e-05, "loss": 0.3825, "step": 11675 }, { "epoch": 0.26034575312847696, "grad_norm": 0.42000359296798706, "learning_rate": 1.6837493421217982e-05, "loss": 0.439, "step": 11680 }, { "epoch": 0.26045720250909704, "grad_norm": 0.6821982264518738, "learning_rate": 1.6834938030989433e-05, "loss": 0.3363, "step": 11685 }, { "epoch": 0.2605686518897171, "grad_norm": 0.585969090461731, "learning_rate": 1.683238180285024e-05, "loss": 0.2724, "step": 11690 }, { "epoch": 0.2606801012703372, "grad_norm": 0.588124692440033, "learning_rate": 1.6829824737113775e-05, "loss": 0.4278, "step": 11695 }, { "epoch": 0.26079155065095727, "grad_norm": 0.5431883931159973, "learning_rate": 1.6827266834093517e-05, "loss": 0.4278, "step": 11700 }, { "epoch": 0.26090300003157735, "grad_norm": 0.38738054037094116, "learning_rate": 1.6824708094103043e-05, "loss": 0.3955, "step": 11705 }, { "epoch": 0.26101444941219737, "grad_norm": 0.3914400041103363, "learning_rate": 1.6822148517456033e-05, "loss": 0.231, "step": 11710 }, { "epoch": 0.26112589879281745, "grad_norm": 0.7311981916427612, "learning_rate": 1.6819588104466275e-05, "loss": 0.3843, "step": 11715 }, { "epoch": 0.2612373481734375, "grad_norm": 0.4924669563770294, "learning_rate": 1.6817026855447657e-05, "loss": 0.3251, "step": 11720 }, { "epoch": 0.2613487975540576, "grad_norm": 0.5667435526847839, "learning_rate": 1.6814464770714162e-05, "loss": 0.2467, "step": 11725 }, { "epoch": 0.2614602469346777, "grad_norm": 0.6493402123451233, "learning_rate": 1.6811901850579884e-05, "loss": 0.3649, "step": 11730 }, { "epoch": 0.26157169631529775, "grad_norm": 0.5301352739334106, "learning_rate": 1.6809338095359026e-05, "loss": 0.5046, "step": 11735 }, { "epoch": 0.2616831456959178, "grad_norm": 0.4617167115211487, "learning_rate": 1.6806773505365873e-05, "loss": 0.3497, "step": 11740 }, { "epoch": 0.26179459507653785, "grad_norm": 0.5654996633529663, "learning_rate": 1.6804208080914824e-05, "loss": 0.3925, "step": 11745 }, { "epoch": 0.2619060444571579, "grad_norm": 0.4751913845539093, "learning_rate": 1.6801641822320392e-05, "loss": 0.3844, "step": 11750 }, { "epoch": 0.262017493837778, "grad_norm": 0.703079342842102, "learning_rate": 1.6799074729897173e-05, "loss": 0.3697, "step": 11755 }, { "epoch": 0.2621289432183981, "grad_norm": 0.5409825444221497, "learning_rate": 1.679650680395987e-05, "loss": 0.5028, "step": 11760 }, { "epoch": 0.26224039259901816, "grad_norm": 0.5090709328651428, "learning_rate": 1.6793938044823295e-05, "loss": 0.4512, "step": 11765 }, { "epoch": 0.2623518419796382, "grad_norm": 0.35236915946006775, "learning_rate": 1.6791368452802356e-05, "loss": 0.3593, "step": 11770 }, { "epoch": 0.26246329136025826, "grad_norm": 0.46630239486694336, "learning_rate": 1.6788798028212068e-05, "loss": 0.3396, "step": 11775 }, { "epoch": 0.26257474074087833, "grad_norm": 0.5448976159095764, "learning_rate": 1.678622677136754e-05, "loss": 0.3422, "step": 11780 }, { "epoch": 0.2626861901214984, "grad_norm": 0.5812391042709351, "learning_rate": 1.6783654682583995e-05, "loss": 0.3253, "step": 11785 }, { "epoch": 0.2627976395021185, "grad_norm": 0.3605920374393463, "learning_rate": 1.6781081762176745e-05, "loss": 0.3841, "step": 11790 }, { "epoch": 0.26290908888273856, "grad_norm": 0.6160728931427002, "learning_rate": 1.6778508010461213e-05, "loss": 0.3686, "step": 11795 }, { "epoch": 0.2630205382633586, "grad_norm": 0.5062189102172852, "learning_rate": 1.6775933427752922e-05, "loss": 0.3245, "step": 11800 }, { "epoch": 0.26313198764397866, "grad_norm": 0.6029192209243774, "learning_rate": 1.6773358014367494e-05, "loss": 0.2799, "step": 11805 }, { "epoch": 0.26324343702459874, "grad_norm": 0.5310506820678711, "learning_rate": 1.677078177062066e-05, "loss": 0.3318, "step": 11810 }, { "epoch": 0.2633548864052188, "grad_norm": 0.4977557957172394, "learning_rate": 1.6768204696828236e-05, "loss": 0.2938, "step": 11815 }, { "epoch": 0.2634663357858389, "grad_norm": 0.543250322341919, "learning_rate": 1.6765626793306164e-05, "loss": 0.3897, "step": 11820 }, { "epoch": 0.2635777851664589, "grad_norm": 0.572293221950531, "learning_rate": 1.676304806037047e-05, "loss": 0.3455, "step": 11825 }, { "epoch": 0.263689234547079, "grad_norm": 0.859261155128479, "learning_rate": 1.6760468498337285e-05, "loss": 0.3914, "step": 11830 }, { "epoch": 0.26380068392769906, "grad_norm": 0.5799628496170044, "learning_rate": 1.6757888107522845e-05, "loss": 0.4242, "step": 11835 }, { "epoch": 0.26391213330831914, "grad_norm": 0.4707748293876648, "learning_rate": 1.6755306888243487e-05, "loss": 0.4087, "step": 11840 }, { "epoch": 0.2640235826889392, "grad_norm": 0.6998196244239807, "learning_rate": 1.6752724840815643e-05, "loss": 0.4391, "step": 11845 }, { "epoch": 0.2641350320695593, "grad_norm": 0.3416438698768616, "learning_rate": 1.6750141965555858e-05, "loss": 0.3157, "step": 11850 }, { "epoch": 0.2642464814501793, "grad_norm": 0.582663893699646, "learning_rate": 1.6747558262780775e-05, "loss": 0.2197, "step": 11855 }, { "epoch": 0.2643579308307994, "grad_norm": 0.6217628717422485, "learning_rate": 1.6744973732807128e-05, "loss": 0.3317, "step": 11860 }, { "epoch": 0.26446938021141947, "grad_norm": 0.4774651527404785, "learning_rate": 1.6742388375951767e-05, "loss": 0.341, "step": 11865 }, { "epoch": 0.26458082959203955, "grad_norm": 0.6677722334861755, "learning_rate": 1.6739802192531633e-05, "loss": 0.3857, "step": 11870 }, { "epoch": 0.2646922789726596, "grad_norm": 0.5869291424751282, "learning_rate": 1.6737215182863775e-05, "loss": 0.3785, "step": 11875 }, { "epoch": 0.2648037283532797, "grad_norm": 0.7363972067832947, "learning_rate": 1.673462734726534e-05, "loss": 0.3204, "step": 11880 }, { "epoch": 0.2649151777338997, "grad_norm": 0.676572859287262, "learning_rate": 1.6732038686053572e-05, "loss": 0.3858, "step": 11885 }, { "epoch": 0.2650266271145198, "grad_norm": 0.4292004108428955, "learning_rate": 1.6729449199545828e-05, "loss": 0.2801, "step": 11890 }, { "epoch": 0.2651380764951399, "grad_norm": 0.6198212504386902, "learning_rate": 1.6726858888059553e-05, "loss": 0.3531, "step": 11895 }, { "epoch": 0.26524952587575995, "grad_norm": 0.5823439955711365, "learning_rate": 1.67242677519123e-05, "loss": 0.3468, "step": 11900 }, { "epoch": 0.26536097525638, "grad_norm": 0.5388737916946411, "learning_rate": 1.672167579142173e-05, "loss": 0.3273, "step": 11905 }, { "epoch": 0.2654724246370001, "grad_norm": 0.5490552186965942, "learning_rate": 1.6719083006905586e-05, "loss": 0.2722, "step": 11910 }, { "epoch": 0.2655838740176201, "grad_norm": 0.5554761290550232, "learning_rate": 1.671648939868173e-05, "loss": 0.319, "step": 11915 }, { "epoch": 0.2656953233982402, "grad_norm": 0.6012527346611023, "learning_rate": 1.6713894967068117e-05, "loss": 0.4606, "step": 11920 }, { "epoch": 0.2658067727788603, "grad_norm": 0.7309210896492004, "learning_rate": 1.6711299712382807e-05, "loss": 0.3519, "step": 11925 }, { "epoch": 0.26591822215948036, "grad_norm": 0.5101271271705627, "learning_rate": 1.6708703634943954e-05, "loss": 0.2698, "step": 11930 }, { "epoch": 0.26602967154010043, "grad_norm": 0.7300100326538086, "learning_rate": 1.6706106735069817e-05, "loss": 0.4509, "step": 11935 }, { "epoch": 0.2661411209207205, "grad_norm": 0.5938910245895386, "learning_rate": 1.6703509013078756e-05, "loss": 0.357, "step": 11940 }, { "epoch": 0.26625257030134053, "grad_norm": 0.6169475317001343, "learning_rate": 1.6700910469289238e-05, "loss": 0.3244, "step": 11945 }, { "epoch": 0.2663640196819606, "grad_norm": 0.5854972004890442, "learning_rate": 1.669831110401982e-05, "loss": 0.3573, "step": 11950 }, { "epoch": 0.2664754690625807, "grad_norm": 0.4386449158191681, "learning_rate": 1.6695710917589156e-05, "loss": 0.359, "step": 11955 }, { "epoch": 0.26658691844320076, "grad_norm": 0.6892266273498535, "learning_rate": 1.669310991031602e-05, "loss": 0.4041, "step": 11960 }, { "epoch": 0.26669836782382084, "grad_norm": 0.5055707693099976, "learning_rate": 1.6690508082519275e-05, "loss": 0.3592, "step": 11965 }, { "epoch": 0.2668098172044409, "grad_norm": 0.38702186942100525, "learning_rate": 1.668790543451788e-05, "loss": 0.4146, "step": 11970 }, { "epoch": 0.26692126658506093, "grad_norm": 0.5644632577896118, "learning_rate": 1.6685301966630903e-05, "loss": 0.4233, "step": 11975 }, { "epoch": 0.267032715965681, "grad_norm": 0.5067944526672363, "learning_rate": 1.6682697679177506e-05, "loss": 0.2752, "step": 11980 }, { "epoch": 0.2671441653463011, "grad_norm": 0.54136723279953, "learning_rate": 1.6680092572476956e-05, "loss": 0.2532, "step": 11985 }, { "epoch": 0.26725561472692116, "grad_norm": 0.4757809042930603, "learning_rate": 1.667748664684862e-05, "loss": 0.3329, "step": 11990 }, { "epoch": 0.26736706410754124, "grad_norm": 0.4774401783943176, "learning_rate": 1.6674879902611964e-05, "loss": 0.438, "step": 11995 }, { "epoch": 0.26747851348816126, "grad_norm": 0.6735679507255554, "learning_rate": 1.667227234008655e-05, "loss": 0.4542, "step": 12000 }, { "epoch": 0.26758996286878134, "grad_norm": 0.6603081822395325, "learning_rate": 1.666966395959205e-05, "loss": 0.3418, "step": 12005 }, { "epoch": 0.2677014122494014, "grad_norm": 0.5100643634796143, "learning_rate": 1.6667054761448233e-05, "loss": 0.342, "step": 12010 }, { "epoch": 0.2678128616300215, "grad_norm": 0.5056988000869751, "learning_rate": 1.6664444745974964e-05, "loss": 0.4094, "step": 12015 }, { "epoch": 0.26792431101064157, "grad_norm": 0.504252016544342, "learning_rate": 1.666183391349221e-05, "loss": 0.3087, "step": 12020 }, { "epoch": 0.26803576039126165, "grad_norm": 0.658033013343811, "learning_rate": 1.6659222264320038e-05, "loss": 0.3112, "step": 12025 }, { "epoch": 0.26814720977188167, "grad_norm": 0.43579134345054626, "learning_rate": 1.665660979877862e-05, "loss": 0.1689, "step": 12030 }, { "epoch": 0.26825865915250174, "grad_norm": 0.7889179587364197, "learning_rate": 1.6653996517188224e-05, "loss": 0.2845, "step": 12035 }, { "epoch": 0.2683701085331218, "grad_norm": 0.4142740070819855, "learning_rate": 1.665138241986921e-05, "loss": 0.2759, "step": 12040 }, { "epoch": 0.2684815579137419, "grad_norm": 0.5060544610023499, "learning_rate": 1.6648767507142056e-05, "loss": 0.3471, "step": 12045 }, { "epoch": 0.268593007294362, "grad_norm": 0.6566082835197449, "learning_rate": 1.6646151779327326e-05, "loss": 0.4258, "step": 12050 }, { "epoch": 0.26870445667498205, "grad_norm": 0.9004737734794617, "learning_rate": 1.664353523674569e-05, "loss": 0.4168, "step": 12055 }, { "epoch": 0.26881590605560207, "grad_norm": 0.6746425032615662, "learning_rate": 1.6640917879717907e-05, "loss": 0.2759, "step": 12060 }, { "epoch": 0.26892735543622215, "grad_norm": 0.6046661138534546, "learning_rate": 1.6638299708564856e-05, "loss": 0.4257, "step": 12065 }, { "epoch": 0.2690388048168422, "grad_norm": 0.7445327639579773, "learning_rate": 1.6635680723607507e-05, "loss": 0.3879, "step": 12070 }, { "epoch": 0.2691502541974623, "grad_norm": 0.46639499068260193, "learning_rate": 1.6633060925166914e-05, "loss": 0.4192, "step": 12075 }, { "epoch": 0.2692617035780824, "grad_norm": 0.5227963328361511, "learning_rate": 1.663044031356425e-05, "loss": 0.3057, "step": 12080 }, { "epoch": 0.26937315295870246, "grad_norm": 0.6597884893417358, "learning_rate": 1.6627818889120787e-05, "loss": 0.4571, "step": 12085 }, { "epoch": 0.2694846023393225, "grad_norm": 0.6032409071922302, "learning_rate": 1.6625196652157883e-05, "loss": 0.3079, "step": 12090 }, { "epoch": 0.26959605171994255, "grad_norm": 0.5562949776649475, "learning_rate": 1.662257360299701e-05, "loss": 0.3328, "step": 12095 }, { "epoch": 0.26970750110056263, "grad_norm": 0.5631253123283386, "learning_rate": 1.6619949741959734e-05, "loss": 0.3595, "step": 12100 }, { "epoch": 0.2698189504811827, "grad_norm": 0.7083554863929749, "learning_rate": 1.6617325069367715e-05, "loss": 0.3311, "step": 12105 }, { "epoch": 0.2699303998618028, "grad_norm": 0.5315176844596863, "learning_rate": 1.6614699585542722e-05, "loss": 0.2935, "step": 12110 }, { "epoch": 0.27004184924242286, "grad_norm": 0.8315207958221436, "learning_rate": 1.6612073290806617e-05, "loss": 0.4054, "step": 12115 }, { "epoch": 0.2701532986230429, "grad_norm": 0.6667119860649109, "learning_rate": 1.660944618548136e-05, "loss": 0.4269, "step": 12120 }, { "epoch": 0.27026474800366296, "grad_norm": 0.6344430446624756, "learning_rate": 1.660681826988902e-05, "loss": 0.448, "step": 12125 }, { "epoch": 0.27037619738428303, "grad_norm": 0.5542057156562805, "learning_rate": 1.660418954435176e-05, "loss": 0.4683, "step": 12130 }, { "epoch": 0.2704876467649031, "grad_norm": 0.634754478931427, "learning_rate": 1.6601560009191837e-05, "loss": 0.3437, "step": 12135 }, { "epoch": 0.2705990961455232, "grad_norm": 0.892122745513916, "learning_rate": 1.6598929664731613e-05, "loss": 0.5243, "step": 12140 }, { "epoch": 0.27071054552614326, "grad_norm": 0.7992680668830872, "learning_rate": 1.659629851129355e-05, "loss": 0.3108, "step": 12145 }, { "epoch": 0.2708219949067633, "grad_norm": 0.6725826263427734, "learning_rate": 1.6593666549200202e-05, "loss": 0.3965, "step": 12150 }, { "epoch": 0.27093344428738336, "grad_norm": 0.6233682036399841, "learning_rate": 1.659103377877423e-05, "loss": 0.4429, "step": 12155 }, { "epoch": 0.27104489366800344, "grad_norm": 0.5028186440467834, "learning_rate": 1.658840020033839e-05, "loss": 0.3574, "step": 12160 }, { "epoch": 0.2711563430486235, "grad_norm": 0.46699824929237366, "learning_rate": 1.6585765814215547e-05, "loss": 0.364, "step": 12165 }, { "epoch": 0.2712677924292436, "grad_norm": 0.5705031752586365, "learning_rate": 1.6583130620728643e-05, "loss": 0.3392, "step": 12170 }, { "epoch": 0.27137924180986367, "grad_norm": 0.7525768876075745, "learning_rate": 1.658049462020074e-05, "loss": 0.3572, "step": 12175 }, { "epoch": 0.2714906911904837, "grad_norm": 0.49981358647346497, "learning_rate": 1.6577857812954994e-05, "loss": 0.3044, "step": 12180 }, { "epoch": 0.27160214057110377, "grad_norm": 0.6712756156921387, "learning_rate": 1.657522019931465e-05, "loss": 0.3959, "step": 12185 }, { "epoch": 0.27171358995172384, "grad_norm": 0.5159211158752441, "learning_rate": 1.6572581779603062e-05, "loss": 0.3127, "step": 12190 }, { "epoch": 0.2718250393323439, "grad_norm": 0.8316812515258789, "learning_rate": 1.6569942554143686e-05, "loss": 0.3857, "step": 12195 }, { "epoch": 0.271936488712964, "grad_norm": 0.5915578603744507, "learning_rate": 1.6567302523260057e-05, "loss": 0.3696, "step": 12200 }, { "epoch": 0.272047938093584, "grad_norm": 0.5893950462341309, "learning_rate": 1.6564661687275836e-05, "loss": 0.5104, "step": 12205 }, { "epoch": 0.2721593874742041, "grad_norm": 0.4903256595134735, "learning_rate": 1.6562020046514763e-05, "loss": 0.4969, "step": 12210 }, { "epoch": 0.27227083685482417, "grad_norm": 0.7330714464187622, "learning_rate": 1.6559377601300683e-05, "loss": 0.3735, "step": 12215 }, { "epoch": 0.27238228623544425, "grad_norm": 0.6421142220497131, "learning_rate": 1.6556734351957534e-05, "loss": 0.2828, "step": 12220 }, { "epoch": 0.2724937356160643, "grad_norm": 0.43036210536956787, "learning_rate": 1.6554090298809372e-05, "loss": 0.416, "step": 12225 }, { "epoch": 0.2726051849966844, "grad_norm": 0.48010724782943726, "learning_rate": 1.6551445442180325e-05, "loss": 0.3933, "step": 12230 }, { "epoch": 0.2727166343773044, "grad_norm": 0.4992339015007019, "learning_rate": 1.6548799782394637e-05, "loss": 0.3862, "step": 12235 }, { "epoch": 0.2728280837579245, "grad_norm": 0.5170784592628479, "learning_rate": 1.6546153319776644e-05, "loss": 0.3221, "step": 12240 }, { "epoch": 0.2729395331385446, "grad_norm": 0.753477156162262, "learning_rate": 1.654350605465078e-05, "loss": 0.3116, "step": 12245 }, { "epoch": 0.27305098251916465, "grad_norm": 0.6622695922851562, "learning_rate": 1.654085798734159e-05, "loss": 0.3437, "step": 12250 }, { "epoch": 0.27316243189978473, "grad_norm": 0.588044285774231, "learning_rate": 1.6538209118173688e-05, "loss": 0.3554, "step": 12255 }, { "epoch": 0.2732738812804048, "grad_norm": 0.8749666810035706, "learning_rate": 1.653555944747182e-05, "loss": 0.3499, "step": 12260 }, { "epoch": 0.27338533066102483, "grad_norm": 0.6158493757247925, "learning_rate": 1.6532908975560813e-05, "loss": 0.2875, "step": 12265 }, { "epoch": 0.2734967800416449, "grad_norm": 0.46477964520454407, "learning_rate": 1.653025770276559e-05, "loss": 0.3704, "step": 12270 }, { "epoch": 0.273608229422265, "grad_norm": 0.6315426826477051, "learning_rate": 1.652760562941118e-05, "loss": 0.4398, "step": 12275 }, { "epoch": 0.27371967880288506, "grad_norm": 0.6809016466140747, "learning_rate": 1.6524952755822702e-05, "loss": 0.4204, "step": 12280 }, { "epoch": 0.27383112818350513, "grad_norm": 0.45368507504463196, "learning_rate": 1.6522299082325385e-05, "loss": 0.3416, "step": 12285 }, { "epoch": 0.2739425775641252, "grad_norm": 0.6435974836349487, "learning_rate": 1.651964460924454e-05, "loss": 0.2746, "step": 12290 }, { "epoch": 0.27405402694474523, "grad_norm": 0.585220992565155, "learning_rate": 1.6516989336905594e-05, "loss": 0.3996, "step": 12295 }, { "epoch": 0.2741654763253653, "grad_norm": 0.632941484451294, "learning_rate": 1.6514333265634058e-05, "loss": 0.3072, "step": 12300 }, { "epoch": 0.2742769257059854, "grad_norm": 0.5073537230491638, "learning_rate": 1.6511676395755546e-05, "loss": 0.3857, "step": 12305 }, { "epoch": 0.27438837508660546, "grad_norm": 0.5666691660881042, "learning_rate": 1.650901872759577e-05, "loss": 0.4126, "step": 12310 }, { "epoch": 0.27449982446722554, "grad_norm": 0.44631102681159973, "learning_rate": 1.6506360261480537e-05, "loss": 0.3289, "step": 12315 }, { "epoch": 0.2746112738478456, "grad_norm": 0.560738205909729, "learning_rate": 1.6503700997735764e-05, "loss": 0.4752, "step": 12320 }, { "epoch": 0.27472272322846564, "grad_norm": 0.5568654537200928, "learning_rate": 1.6501040936687444e-05, "loss": 0.3097, "step": 12325 }, { "epoch": 0.2748341726090857, "grad_norm": 0.430672287940979, "learning_rate": 1.6498380078661686e-05, "loss": 0.2771, "step": 12330 }, { "epoch": 0.2749456219897058, "grad_norm": 0.7009373307228088, "learning_rate": 1.649571842398469e-05, "loss": 0.315, "step": 12335 }, { "epoch": 0.27505707137032587, "grad_norm": 0.5214234590530396, "learning_rate": 1.649305597298276e-05, "loss": 0.2682, "step": 12340 }, { "epoch": 0.27516852075094594, "grad_norm": 0.6840612292289734, "learning_rate": 1.649039272598228e-05, "loss": 0.299, "step": 12345 }, { "epoch": 0.275279970131566, "grad_norm": 0.6062028408050537, "learning_rate": 1.6487728683309754e-05, "loss": 0.4087, "step": 12350 }, { "epoch": 0.27539141951218604, "grad_norm": 0.48450252413749695, "learning_rate": 1.6485063845291767e-05, "loss": 0.3104, "step": 12355 }, { "epoch": 0.2755028688928061, "grad_norm": 0.6246992945671082, "learning_rate": 1.648239821225501e-05, "loss": 0.4994, "step": 12360 }, { "epoch": 0.2756143182734262, "grad_norm": 0.6224850416183472, "learning_rate": 1.647973178452627e-05, "loss": 0.2987, "step": 12365 }, { "epoch": 0.27572576765404627, "grad_norm": 0.6870371103286743, "learning_rate": 1.6477064562432428e-05, "loss": 0.4026, "step": 12370 }, { "epoch": 0.27583721703466635, "grad_norm": 0.6845866441726685, "learning_rate": 1.6474396546300464e-05, "loss": 0.5957, "step": 12375 }, { "epoch": 0.27594866641528637, "grad_norm": 0.5691341161727905, "learning_rate": 1.647172773645746e-05, "loss": 0.2552, "step": 12380 }, { "epoch": 0.27606011579590645, "grad_norm": 0.5626338720321655, "learning_rate": 1.6469058133230588e-05, "loss": 0.3779, "step": 12385 }, { "epoch": 0.2761715651765265, "grad_norm": 0.7906394004821777, "learning_rate": 1.646638773694712e-05, "loss": 0.3081, "step": 12390 }, { "epoch": 0.2762830145571466, "grad_norm": 0.519107460975647, "learning_rate": 1.6463716547934433e-05, "loss": 0.3364, "step": 12395 }, { "epoch": 0.2763944639377667, "grad_norm": 0.45310643315315247, "learning_rate": 1.6461044566519993e-05, "loss": 0.3508, "step": 12400 }, { "epoch": 0.27650591331838675, "grad_norm": 0.5506815314292908, "learning_rate": 1.6458371793031353e-05, "loss": 0.4634, "step": 12405 }, { "epoch": 0.2766173626990068, "grad_norm": 0.41375917196273804, "learning_rate": 1.645569822779619e-05, "loss": 0.2951, "step": 12410 }, { "epoch": 0.27672881207962685, "grad_norm": 0.7495025992393494, "learning_rate": 1.6453023871142245e-05, "loss": 0.4052, "step": 12415 }, { "epoch": 0.27684026146024693, "grad_norm": 0.827620804309845, "learning_rate": 1.6450348723397388e-05, "loss": 0.4226, "step": 12420 }, { "epoch": 0.276951710840867, "grad_norm": 0.5709133148193359, "learning_rate": 1.6447672784889567e-05, "loss": 0.3413, "step": 12425 }, { "epoch": 0.2770631602214871, "grad_norm": 0.5625734329223633, "learning_rate": 1.6444996055946833e-05, "loss": 0.3169, "step": 12430 }, { "epoch": 0.27717460960210716, "grad_norm": 0.568760335445404, "learning_rate": 1.6442318536897325e-05, "loss": 0.2628, "step": 12435 }, { "epoch": 0.2772860589827272, "grad_norm": 0.5465920567512512, "learning_rate": 1.6439640228069297e-05, "loss": 0.3317, "step": 12440 }, { "epoch": 0.27739750836334726, "grad_norm": 0.5634509325027466, "learning_rate": 1.6436961129791077e-05, "loss": 0.325, "step": 12445 }, { "epoch": 0.27750895774396733, "grad_norm": 0.6779350638389587, "learning_rate": 1.6434281242391113e-05, "loss": 0.4495, "step": 12450 }, { "epoch": 0.2776204071245874, "grad_norm": 0.7128263115882874, "learning_rate": 1.6431600566197934e-05, "loss": 0.2684, "step": 12455 }, { "epoch": 0.2777318565052075, "grad_norm": 0.5098510980606079, "learning_rate": 1.6428919101540168e-05, "loss": 0.3573, "step": 12460 }, { "epoch": 0.27784330588582756, "grad_norm": 0.5363258123397827, "learning_rate": 1.6426236848746543e-05, "loss": 0.2721, "step": 12465 }, { "epoch": 0.2779547552664476, "grad_norm": 0.5816171169281006, "learning_rate": 1.6423553808145886e-05, "loss": 0.4065, "step": 12470 }, { "epoch": 0.27806620464706766, "grad_norm": 0.9319338798522949, "learning_rate": 1.642086998006711e-05, "loss": 0.336, "step": 12475 }, { "epoch": 0.27817765402768774, "grad_norm": 0.6476007699966431, "learning_rate": 1.6418185364839242e-05, "loss": 0.2388, "step": 12480 }, { "epoch": 0.2782891034083078, "grad_norm": 0.6227739453315735, "learning_rate": 1.6415499962791383e-05, "loss": 0.3853, "step": 12485 }, { "epoch": 0.2784005527889279, "grad_norm": 0.4589061141014099, "learning_rate": 1.6412813774252755e-05, "loss": 0.2953, "step": 12490 }, { "epoch": 0.27851200216954797, "grad_norm": 0.4495225250720978, "learning_rate": 1.6410126799552653e-05, "loss": 0.2907, "step": 12495 }, { "epoch": 0.278623451550168, "grad_norm": 0.654646635055542, "learning_rate": 1.6407439039020485e-05, "loss": 0.2302, "step": 12500 }, { "epoch": 0.27873490093078807, "grad_norm": 0.7446420192718506, "learning_rate": 1.6404750492985748e-05, "loss": 0.4201, "step": 12505 }, { "epoch": 0.27884635031140814, "grad_norm": 0.7043203711509705, "learning_rate": 1.640206116177804e-05, "loss": 0.4006, "step": 12510 }, { "epoch": 0.2789577996920282, "grad_norm": 0.530685544013977, "learning_rate": 1.6399371045727045e-05, "loss": 0.3329, "step": 12515 }, { "epoch": 0.2790692490726483, "grad_norm": 0.6326469779014587, "learning_rate": 1.639668014516256e-05, "loss": 0.3888, "step": 12520 }, { "epoch": 0.27918069845326837, "grad_norm": 0.5652064681053162, "learning_rate": 1.6393988460414462e-05, "loss": 0.3634, "step": 12525 }, { "epoch": 0.2792921478338884, "grad_norm": 0.5635384321212769, "learning_rate": 1.6391295991812735e-05, "loss": 0.2965, "step": 12530 }, { "epoch": 0.27940359721450847, "grad_norm": 0.708854079246521, "learning_rate": 1.638860273968745e-05, "loss": 0.4356, "step": 12535 }, { "epoch": 0.27951504659512855, "grad_norm": 0.5619444847106934, "learning_rate": 1.6385908704368784e-05, "loss": 0.1672, "step": 12540 }, { "epoch": 0.2796264959757486, "grad_norm": 0.48927435278892517, "learning_rate": 1.6383213886187e-05, "loss": 0.3496, "step": 12545 }, { "epoch": 0.2797379453563687, "grad_norm": 0.47531449794769287, "learning_rate": 1.6380518285472468e-05, "loss": 0.3092, "step": 12550 }, { "epoch": 0.2798493947369888, "grad_norm": 0.7148705720901489, "learning_rate": 1.637782190255564e-05, "loss": 0.3725, "step": 12555 }, { "epoch": 0.2799608441176088, "grad_norm": 0.5635217428207397, "learning_rate": 1.6375124737767077e-05, "loss": 0.3899, "step": 12560 }, { "epoch": 0.2800722934982289, "grad_norm": 0.576865553855896, "learning_rate": 1.637242679143743e-05, "loss": 0.466, "step": 12565 }, { "epoch": 0.28018374287884895, "grad_norm": 0.6125386953353882, "learning_rate": 1.6369728063897445e-05, "loss": 0.4707, "step": 12570 }, { "epoch": 0.28029519225946903, "grad_norm": 0.6960713863372803, "learning_rate": 1.6367028555477967e-05, "loss": 0.4153, "step": 12575 }, { "epoch": 0.2804066416400891, "grad_norm": 0.7176638841629028, "learning_rate": 1.6364328266509937e-05, "loss": 0.3106, "step": 12580 }, { "epoch": 0.2805180910207091, "grad_norm": 0.4757578670978546, "learning_rate": 1.6361627197324382e-05, "loss": 0.3273, "step": 12585 }, { "epoch": 0.2806295404013292, "grad_norm": 0.7011920809745789, "learning_rate": 1.6358925348252438e-05, "loss": 0.2884, "step": 12590 }, { "epoch": 0.2807409897819493, "grad_norm": 0.35939499735832214, "learning_rate": 1.6356222719625332e-05, "loss": 0.2344, "step": 12595 }, { "epoch": 0.28085243916256936, "grad_norm": 0.79154372215271, "learning_rate": 1.6353519311774383e-05, "loss": 0.4193, "step": 12600 }, { "epoch": 0.28096388854318943, "grad_norm": 0.6778676509857178, "learning_rate": 1.6350815125031005e-05, "loss": 0.3332, "step": 12605 }, { "epoch": 0.2810753379238095, "grad_norm": 0.4393715262413025, "learning_rate": 1.6348110159726715e-05, "loss": 0.3775, "step": 12610 }, { "epoch": 0.28118678730442953, "grad_norm": 0.4823377728462219, "learning_rate": 1.6345404416193117e-05, "loss": 0.2871, "step": 12615 }, { "epoch": 0.2812982366850496, "grad_norm": 0.5828414559364319, "learning_rate": 1.6342697894761923e-05, "loss": 0.416, "step": 12620 }, { "epoch": 0.2814096860656697, "grad_norm": 0.566920280456543, "learning_rate": 1.633999059576492e-05, "loss": 0.4064, "step": 12625 }, { "epoch": 0.28152113544628976, "grad_norm": 0.593452513217926, "learning_rate": 1.6337282519534005e-05, "loss": 0.3254, "step": 12630 }, { "epoch": 0.28163258482690984, "grad_norm": 0.7168657779693604, "learning_rate": 1.6334573666401173e-05, "loss": 0.3073, "step": 12635 }, { "epoch": 0.2817440342075299, "grad_norm": 0.6673802733421326, "learning_rate": 1.633186403669851e-05, "loss": 0.4074, "step": 12640 }, { "epoch": 0.28185548358814994, "grad_norm": 0.7347943186759949, "learning_rate": 1.6329153630758183e-05, "loss": 0.3531, "step": 12645 }, { "epoch": 0.28196693296877, "grad_norm": 0.42925938963890076, "learning_rate": 1.6326442448912475e-05, "loss": 0.2911, "step": 12650 }, { "epoch": 0.2820783823493901, "grad_norm": 0.5090445280075073, "learning_rate": 1.6323730491493757e-05, "loss": 0.3901, "step": 12655 }, { "epoch": 0.28218983173001017, "grad_norm": 0.4907485246658325, "learning_rate": 1.632101775883449e-05, "loss": 0.4705, "step": 12660 }, { "epoch": 0.28230128111063024, "grad_norm": 0.547439694404602, "learning_rate": 1.6318304251267242e-05, "loss": 0.3237, "step": 12665 }, { "epoch": 0.2824127304912503, "grad_norm": 0.4126209616661072, "learning_rate": 1.6315589969124663e-05, "loss": 0.3427, "step": 12670 }, { "epoch": 0.28252417987187034, "grad_norm": 0.47196847200393677, "learning_rate": 1.6312874912739495e-05, "loss": 0.3371, "step": 12675 }, { "epoch": 0.2826356292524904, "grad_norm": 0.5562902092933655, "learning_rate": 1.6310159082444593e-05, "loss": 0.3015, "step": 12680 }, { "epoch": 0.2827470786331105, "grad_norm": 0.5090768933296204, "learning_rate": 1.6307442478572898e-05, "loss": 0.3171, "step": 12685 }, { "epoch": 0.28285852801373057, "grad_norm": 0.685769259929657, "learning_rate": 1.6304725101457442e-05, "loss": 0.2784, "step": 12690 }, { "epoch": 0.28296997739435065, "grad_norm": 0.4524206221103668, "learning_rate": 1.6302006951431348e-05, "loss": 0.3828, "step": 12695 }, { "epoch": 0.2830814267749707, "grad_norm": 0.5083780288696289, "learning_rate": 1.629928802882785e-05, "loss": 0.323, "step": 12700 }, { "epoch": 0.28319287615559074, "grad_norm": 0.6181834936141968, "learning_rate": 1.629656833398026e-05, "loss": 0.2939, "step": 12705 }, { "epoch": 0.2833043255362108, "grad_norm": 0.6792620420455933, "learning_rate": 1.6293847867222e-05, "loss": 0.2741, "step": 12710 }, { "epoch": 0.2834157749168309, "grad_norm": 0.5394588708877563, "learning_rate": 1.6291126628886566e-05, "loss": 0.2861, "step": 12715 }, { "epoch": 0.283527224297451, "grad_norm": 0.7299884557723999, "learning_rate": 1.628840461930757e-05, "loss": 0.407, "step": 12720 }, { "epoch": 0.28363867367807105, "grad_norm": 0.6590754985809326, "learning_rate": 1.6285681838818707e-05, "loss": 0.3828, "step": 12725 }, { "epoch": 0.28375012305869113, "grad_norm": 0.388741135597229, "learning_rate": 1.6282958287753767e-05, "loss": 0.4551, "step": 12730 }, { "epoch": 0.28386157243931115, "grad_norm": 0.7172895073890686, "learning_rate": 1.628023396644664e-05, "loss": 0.3955, "step": 12735 }, { "epoch": 0.2839730218199312, "grad_norm": 0.6117028594017029, "learning_rate": 1.6277508875231304e-05, "loss": 0.3407, "step": 12740 }, { "epoch": 0.2840844712005513, "grad_norm": 0.5097143054008484, "learning_rate": 1.6274783014441833e-05, "loss": 0.3437, "step": 12745 }, { "epoch": 0.2841959205811714, "grad_norm": 0.4291749596595764, "learning_rate": 1.62720563844124e-05, "loss": 0.3326, "step": 12750 }, { "epoch": 0.28430736996179146, "grad_norm": 0.4608302414417267, "learning_rate": 1.6269328985477267e-05, "loss": 0.3271, "step": 12755 }, { "epoch": 0.2844188193424115, "grad_norm": 0.671491265296936, "learning_rate": 1.6266600817970794e-05, "loss": 0.4097, "step": 12760 }, { "epoch": 0.28453026872303155, "grad_norm": 0.6661823391914368, "learning_rate": 1.6263871882227426e-05, "loss": 0.381, "step": 12765 }, { "epoch": 0.28464171810365163, "grad_norm": 0.46518033742904663, "learning_rate": 1.626114217858172e-05, "loss": 0.4233, "step": 12770 }, { "epoch": 0.2847531674842717, "grad_norm": 0.39857664704322815, "learning_rate": 1.625841170736831e-05, "loss": 0.3127, "step": 12775 }, { "epoch": 0.2848646168648918, "grad_norm": 0.6866069436073303, "learning_rate": 1.6255680468921932e-05, "loss": 0.3427, "step": 12780 }, { "epoch": 0.28497606624551186, "grad_norm": 0.46831992268562317, "learning_rate": 1.625294846357741e-05, "loss": 0.3559, "step": 12785 }, { "epoch": 0.2850875156261319, "grad_norm": 0.5224331617355347, "learning_rate": 1.6250215691669683e-05, "loss": 0.3373, "step": 12790 }, { "epoch": 0.28519896500675196, "grad_norm": 0.7276885509490967, "learning_rate": 1.624748215353375e-05, "loss": 0.429, "step": 12795 }, { "epoch": 0.28531041438737204, "grad_norm": 0.49990314245224, "learning_rate": 1.6244747849504724e-05, "loss": 0.3134, "step": 12800 }, { "epoch": 0.2854218637679921, "grad_norm": 0.5923623442649841, "learning_rate": 1.6242012779917818e-05, "loss": 0.3458, "step": 12805 }, { "epoch": 0.2855333131486122, "grad_norm": 0.7208021879196167, "learning_rate": 1.6239276945108327e-05, "loss": 0.395, "step": 12810 }, { "epoch": 0.28564476252923227, "grad_norm": 0.6818788647651672, "learning_rate": 1.6236540345411646e-05, "loss": 0.3351, "step": 12815 }, { "epoch": 0.2857562119098523, "grad_norm": 0.5643772482872009, "learning_rate": 1.623380298116325e-05, "loss": 0.3587, "step": 12820 }, { "epoch": 0.28586766129047236, "grad_norm": 0.4340428113937378, "learning_rate": 1.623106485269873e-05, "loss": 0.3104, "step": 12825 }, { "epoch": 0.28597911067109244, "grad_norm": 0.5968391299247742, "learning_rate": 1.6228325960353752e-05, "loss": 0.4524, "step": 12830 }, { "epoch": 0.2860905600517125, "grad_norm": 0.5892665386199951, "learning_rate": 1.6225586304464093e-05, "loss": 0.3047, "step": 12835 }, { "epoch": 0.2862020094323326, "grad_norm": 0.531557559967041, "learning_rate": 1.6222845885365603e-05, "loss": 0.4443, "step": 12840 }, { "epoch": 0.28631345881295267, "grad_norm": 0.49256327748298645, "learning_rate": 1.6220104703394237e-05, "loss": 0.405, "step": 12845 }, { "epoch": 0.2864249081935727, "grad_norm": 0.5449532866477966, "learning_rate": 1.621736275888605e-05, "loss": 0.3217, "step": 12850 }, { "epoch": 0.28653635757419277, "grad_norm": 0.7042402625083923, "learning_rate": 1.621462005217718e-05, "loss": 0.4748, "step": 12855 }, { "epoch": 0.28664780695481284, "grad_norm": 0.5613042116165161, "learning_rate": 1.621187658360386e-05, "loss": 0.4733, "step": 12860 }, { "epoch": 0.2867592563354329, "grad_norm": 0.7253202795982361, "learning_rate": 1.620913235350242e-05, "loss": 0.255, "step": 12865 }, { "epoch": 0.286870705716053, "grad_norm": 1.0904386043548584, "learning_rate": 1.620638736220928e-05, "loss": 0.4042, "step": 12870 }, { "epoch": 0.2869821550966731, "grad_norm": 0.5551652312278748, "learning_rate": 1.6203641610060956e-05, "loss": 0.3905, "step": 12875 }, { "epoch": 0.2870936044772931, "grad_norm": 0.4741367995738983, "learning_rate": 1.6200895097394056e-05, "loss": 0.3241, "step": 12880 }, { "epoch": 0.2872050538579132, "grad_norm": 0.5723397135734558, "learning_rate": 1.6198147824545278e-05, "loss": 0.3918, "step": 12885 }, { "epoch": 0.28731650323853325, "grad_norm": 0.4763239920139313, "learning_rate": 1.6195399791851422e-05, "loss": 0.3494, "step": 12890 }, { "epoch": 0.2874279526191533, "grad_norm": 0.713117241859436, "learning_rate": 1.619265099964937e-05, "loss": 0.3322, "step": 12895 }, { "epoch": 0.2875394019997734, "grad_norm": 0.7806963920593262, "learning_rate": 1.6189901448276106e-05, "loss": 0.3423, "step": 12900 }, { "epoch": 0.2876508513803935, "grad_norm": 0.6539384126663208, "learning_rate": 1.6187151138068707e-05, "loss": 0.3596, "step": 12905 }, { "epoch": 0.2877623007610135, "grad_norm": 0.6627002954483032, "learning_rate": 1.618440006936433e-05, "loss": 0.2949, "step": 12910 }, { "epoch": 0.2878737501416336, "grad_norm": 0.5374137163162231, "learning_rate": 1.6181648242500246e-05, "loss": 0.4181, "step": 12915 }, { "epoch": 0.28798519952225365, "grad_norm": 0.5556002855300903, "learning_rate": 1.61788956578138e-05, "loss": 0.26, "step": 12920 }, { "epoch": 0.28809664890287373, "grad_norm": 0.8424591422080994, "learning_rate": 1.6176142315642438e-05, "loss": 0.3883, "step": 12925 }, { "epoch": 0.2882080982834938, "grad_norm": 0.5209367871284485, "learning_rate": 1.6173388216323704e-05, "loss": 0.2877, "step": 12930 }, { "epoch": 0.2883195476641139, "grad_norm": 0.5887765884399414, "learning_rate": 1.6170633360195226e-05, "loss": 0.4278, "step": 12935 }, { "epoch": 0.2884309970447339, "grad_norm": 0.7472568154335022, "learning_rate": 1.6167877747594724e-05, "loss": 0.2795, "step": 12940 }, { "epoch": 0.288542446425354, "grad_norm": 0.5088253617286682, "learning_rate": 1.616512137886002e-05, "loss": 0.335, "step": 12945 }, { "epoch": 0.28865389580597406, "grad_norm": 0.5196222066879272, "learning_rate": 1.6162364254329026e-05, "loss": 0.3659, "step": 12950 }, { "epoch": 0.28876534518659414, "grad_norm": 0.5183055996894836, "learning_rate": 1.6159606374339736e-05, "loss": 0.3415, "step": 12955 }, { "epoch": 0.2888767945672142, "grad_norm": 0.4333648979663849, "learning_rate": 1.6156847739230254e-05, "loss": 0.3549, "step": 12960 }, { "epoch": 0.28898824394783423, "grad_norm": 0.6129320859909058, "learning_rate": 1.6154088349338758e-05, "loss": 0.5242, "step": 12965 }, { "epoch": 0.2890996933284543, "grad_norm": 0.6342664361000061, "learning_rate": 1.6151328205003538e-05, "loss": 0.355, "step": 12970 }, { "epoch": 0.2892111427090744, "grad_norm": 0.6179186701774597, "learning_rate": 1.6148567306562958e-05, "loss": 0.4005, "step": 12975 }, { "epoch": 0.28932259208969446, "grad_norm": 0.8301606774330139, "learning_rate": 1.6145805654355484e-05, "loss": 0.2717, "step": 12980 }, { "epoch": 0.28943404147031454, "grad_norm": 0.5873782634735107, "learning_rate": 1.614304324871968e-05, "loss": 0.3209, "step": 12985 }, { "epoch": 0.2895454908509346, "grad_norm": 0.5506610870361328, "learning_rate": 1.614028008999419e-05, "loss": 0.35, "step": 12990 }, { "epoch": 0.28965694023155464, "grad_norm": 0.5306448936462402, "learning_rate": 1.613751617851775e-05, "loss": 0.339, "step": 12995 }, { "epoch": 0.2897683896121747, "grad_norm": 0.49982750415802, "learning_rate": 1.613475151462921e-05, "loss": 0.4548, "step": 13000 }, { "epoch": 0.2898798389927948, "grad_norm": 0.42073002457618713, "learning_rate": 1.613198609866748e-05, "loss": 0.4259, "step": 13005 }, { "epoch": 0.28999128837341487, "grad_norm": 0.7801643013954163, "learning_rate": 1.6129219930971588e-05, "loss": 0.412, "step": 13010 }, { "epoch": 0.29010273775403495, "grad_norm": 0.6633349061012268, "learning_rate": 1.6126453011880644e-05, "loss": 0.2692, "step": 13015 }, { "epoch": 0.290214187134655, "grad_norm": 0.7223657965660095, "learning_rate": 1.612368534173385e-05, "loss": 0.2686, "step": 13020 }, { "epoch": 0.29032563651527504, "grad_norm": 0.509113073348999, "learning_rate": 1.61209169208705e-05, "loss": 0.3325, "step": 13025 }, { "epoch": 0.2904370858958951, "grad_norm": 0.8219191431999207, "learning_rate": 1.6118147749629982e-05, "loss": 0.4306, "step": 13030 }, { "epoch": 0.2905485352765152, "grad_norm": 0.5687716007232666, "learning_rate": 1.6115377828351773e-05, "loss": 0.239, "step": 13035 }, { "epoch": 0.2906599846571353, "grad_norm": 0.5741727352142334, "learning_rate": 1.6112607157375447e-05, "loss": 0.3715, "step": 13040 }, { "epoch": 0.29077143403775535, "grad_norm": 0.7006103992462158, "learning_rate": 1.6109835737040666e-05, "loss": 0.4331, "step": 13045 }, { "epoch": 0.2908828834183754, "grad_norm": 0.6321763396263123, "learning_rate": 1.6107063567687183e-05, "loss": 0.3468, "step": 13050 }, { "epoch": 0.29099433279899545, "grad_norm": 0.5299533009529114, "learning_rate": 1.6104290649654847e-05, "loss": 0.3427, "step": 13055 }, { "epoch": 0.2911057821796155, "grad_norm": 0.6118397116661072, "learning_rate": 1.61015169832836e-05, "loss": 0.3445, "step": 13060 }, { "epoch": 0.2912172315602356, "grad_norm": 0.5570360422134399, "learning_rate": 1.609874256891346e-05, "loss": 0.3531, "step": 13065 }, { "epoch": 0.2913286809408557, "grad_norm": 0.5952074527740479, "learning_rate": 1.6095967406884558e-05, "loss": 0.403, "step": 13070 }, { "epoch": 0.29144013032147575, "grad_norm": 0.5052128434181213, "learning_rate": 1.6093191497537106e-05, "loss": 0.2412, "step": 13075 }, { "epoch": 0.29155157970209583, "grad_norm": 0.5345081686973572, "learning_rate": 1.609041484121141e-05, "loss": 0.2459, "step": 13080 }, { "epoch": 0.29166302908271585, "grad_norm": 0.4836462438106537, "learning_rate": 1.6087637438247863e-05, "loss": 0.2964, "step": 13085 }, { "epoch": 0.29177447846333593, "grad_norm": 0.7367750406265259, "learning_rate": 1.6084859288986957e-05, "loss": 0.3672, "step": 13090 }, { "epoch": 0.291885927843956, "grad_norm": 0.7815617322921753, "learning_rate": 1.608208039376927e-05, "loss": 0.4404, "step": 13095 }, { "epoch": 0.2919973772245761, "grad_norm": 0.7470172047615051, "learning_rate": 1.607930075293547e-05, "loss": 0.3288, "step": 13100 }, { "epoch": 0.29210882660519616, "grad_norm": 0.5423603653907776, "learning_rate": 1.6076520366826326e-05, "loss": 0.3442, "step": 13105 }, { "epoch": 0.29222027598581624, "grad_norm": 0.7404739856719971, "learning_rate": 1.6073739235782688e-05, "loss": 0.3763, "step": 13110 }, { "epoch": 0.29233172536643626, "grad_norm": 0.5424173474311829, "learning_rate": 1.6070957360145502e-05, "loss": 0.2754, "step": 13115 }, { "epoch": 0.29244317474705633, "grad_norm": 0.9737645387649536, "learning_rate": 1.6068174740255803e-05, "loss": 0.389, "step": 13120 }, { "epoch": 0.2925546241276764, "grad_norm": 0.7803316712379456, "learning_rate": 1.6065391376454722e-05, "loss": 0.3917, "step": 13125 }, { "epoch": 0.2926660735082965, "grad_norm": 0.8863366842269897, "learning_rate": 1.6062607269083475e-05, "loss": 0.5252, "step": 13130 }, { "epoch": 0.29277752288891656, "grad_norm": 0.4519311189651489, "learning_rate": 1.6059822418483375e-05, "loss": 0.3182, "step": 13135 }, { "epoch": 0.2928889722695366, "grad_norm": 0.5549803972244263, "learning_rate": 1.6057036824995814e-05, "loss": 0.4129, "step": 13140 }, { "epoch": 0.29300042165015666, "grad_norm": 0.5345078110694885, "learning_rate": 1.60542504889623e-05, "loss": 0.2777, "step": 13145 }, { "epoch": 0.29311187103077674, "grad_norm": 0.6359322667121887, "learning_rate": 1.6051463410724405e-05, "loss": 0.3406, "step": 13150 }, { "epoch": 0.2932233204113968, "grad_norm": 0.7798189520835876, "learning_rate": 1.60486755906238e-05, "loss": 0.3654, "step": 13155 }, { "epoch": 0.2933347697920169, "grad_norm": 0.662034273147583, "learning_rate": 1.6045887029002265e-05, "loss": 0.258, "step": 13160 }, { "epoch": 0.29344621917263697, "grad_norm": 0.5370074510574341, "learning_rate": 1.6043097726201645e-05, "loss": 0.3456, "step": 13165 }, { "epoch": 0.293557668553257, "grad_norm": 0.9667613506317139, "learning_rate": 1.6040307682563888e-05, "loss": 0.3283, "step": 13170 }, { "epoch": 0.29366911793387707, "grad_norm": 0.41724613308906555, "learning_rate": 1.6037516898431032e-05, "loss": 0.3669, "step": 13175 }, { "epoch": 0.29378056731449714, "grad_norm": 0.5329295992851257, "learning_rate": 1.6034725374145206e-05, "loss": 0.3201, "step": 13180 }, { "epoch": 0.2938920166951172, "grad_norm": 0.49996498227119446, "learning_rate": 1.6031933110048633e-05, "loss": 0.3333, "step": 13185 }, { "epoch": 0.2940034660757373, "grad_norm": 0.6060029864311218, "learning_rate": 1.6029140106483617e-05, "loss": 0.3471, "step": 13190 }, { "epoch": 0.2941149154563574, "grad_norm": 0.4627971947193146, "learning_rate": 1.6026346363792565e-05, "loss": 0.2648, "step": 13195 }, { "epoch": 0.2942263648369774, "grad_norm": 0.7191663384437561, "learning_rate": 1.6023551882317964e-05, "loss": 0.474, "step": 13200 }, { "epoch": 0.29433781421759747, "grad_norm": 0.45818057656288147, "learning_rate": 1.6020756662402398e-05, "loss": 0.4591, "step": 13205 }, { "epoch": 0.29444926359821755, "grad_norm": 0.6689664125442505, "learning_rate": 1.6017960704388535e-05, "loss": 0.3238, "step": 13210 }, { "epoch": 0.2945607129788376, "grad_norm": 0.7070868611335754, "learning_rate": 1.6015164008619143e-05, "loss": 0.2863, "step": 13215 }, { "epoch": 0.2946721623594577, "grad_norm": 0.5390009880065918, "learning_rate": 1.6012366575437074e-05, "loss": 0.3581, "step": 13220 }, { "epoch": 0.2947836117400778, "grad_norm": 0.46126458048820496, "learning_rate": 1.600956840518527e-05, "loss": 0.3509, "step": 13225 }, { "epoch": 0.2948950611206978, "grad_norm": 0.4514496922492981, "learning_rate": 1.6006769498206767e-05, "loss": 0.4155, "step": 13230 }, { "epoch": 0.2950065105013179, "grad_norm": 0.7030669450759888, "learning_rate": 1.600396985484469e-05, "loss": 0.3635, "step": 13235 }, { "epoch": 0.29511795988193795, "grad_norm": 0.619882345199585, "learning_rate": 1.600116947544225e-05, "loss": 0.3775, "step": 13240 }, { "epoch": 0.29522940926255803, "grad_norm": 0.6607558727264404, "learning_rate": 1.5998368360342756e-05, "loss": 0.3034, "step": 13245 }, { "epoch": 0.2953408586431781, "grad_norm": 0.5937981009483337, "learning_rate": 1.59955665098896e-05, "loss": 0.389, "step": 13250 }, { "epoch": 0.2954523080237982, "grad_norm": 0.7993833422660828, "learning_rate": 1.5992763924426272e-05, "loss": 0.3678, "step": 13255 }, { "epoch": 0.2955637574044182, "grad_norm": 0.5195314288139343, "learning_rate": 1.598996060429634e-05, "loss": 0.3452, "step": 13260 }, { "epoch": 0.2956752067850383, "grad_norm": 0.6692262291908264, "learning_rate": 1.5987156549843474e-05, "loss": 0.2995, "step": 13265 }, { "epoch": 0.29578665616565836, "grad_norm": 0.6648061871528625, "learning_rate": 1.598435176141143e-05, "loss": 0.3632, "step": 13270 }, { "epoch": 0.29589810554627843, "grad_norm": 0.5224160552024841, "learning_rate": 1.598154623934405e-05, "loss": 0.3567, "step": 13275 }, { "epoch": 0.2960095549268985, "grad_norm": 0.8477574586868286, "learning_rate": 1.5978739983985273e-05, "loss": 0.3707, "step": 13280 }, { "epoch": 0.2961210043075186, "grad_norm": 0.5074322819709778, "learning_rate": 1.5975932995679123e-05, "loss": 0.4027, "step": 13285 }, { "epoch": 0.2962324536881386, "grad_norm": 0.6792076230049133, "learning_rate": 1.5973125274769715e-05, "loss": 0.4334, "step": 13290 }, { "epoch": 0.2963439030687587, "grad_norm": 0.3721717298030853, "learning_rate": 1.5970316821601253e-05, "loss": 0.3296, "step": 13295 }, { "epoch": 0.29645535244937876, "grad_norm": 0.5924360752105713, "learning_rate": 1.596750763651803e-05, "loss": 0.3489, "step": 13300 }, { "epoch": 0.29656680182999884, "grad_norm": 0.6146001815795898, "learning_rate": 1.5964697719864437e-05, "loss": 0.3179, "step": 13305 }, { "epoch": 0.2966782512106189, "grad_norm": 0.45257607102394104, "learning_rate": 1.5961887071984944e-05, "loss": 0.3409, "step": 13310 }, { "epoch": 0.296789700591239, "grad_norm": 0.8483275771141052, "learning_rate": 1.595907569322411e-05, "loss": 0.2899, "step": 13315 }, { "epoch": 0.296901149971859, "grad_norm": 0.7063986659049988, "learning_rate": 1.5956263583926598e-05, "loss": 0.2711, "step": 13320 }, { "epoch": 0.2970125993524791, "grad_norm": 0.6052030324935913, "learning_rate": 1.5953450744437144e-05, "loss": 0.4041, "step": 13325 }, { "epoch": 0.29712404873309917, "grad_norm": 0.6279826164245605, "learning_rate": 1.5950637175100583e-05, "loss": 0.315, "step": 13330 }, { "epoch": 0.29723549811371924, "grad_norm": 0.6276046633720398, "learning_rate": 1.5947822876261835e-05, "loss": 0.3207, "step": 13335 }, { "epoch": 0.2973469474943393, "grad_norm": 0.5449913144111633, "learning_rate": 1.5945007848265912e-05, "loss": 0.2947, "step": 13340 }, { "epoch": 0.29745839687495934, "grad_norm": 0.633040726184845, "learning_rate": 1.5942192091457918e-05, "loss": 0.298, "step": 13345 }, { "epoch": 0.2975698462555794, "grad_norm": 0.5707663297653198, "learning_rate": 1.5939375606183035e-05, "loss": 0.2653, "step": 13350 }, { "epoch": 0.2976812956361995, "grad_norm": 0.6369941234588623, "learning_rate": 1.5936558392786553e-05, "loss": 0.324, "step": 13355 }, { "epoch": 0.29779274501681957, "grad_norm": 0.7818615436553955, "learning_rate": 1.5933740451613836e-05, "loss": 0.2791, "step": 13360 }, { "epoch": 0.29790419439743965, "grad_norm": 0.6412309408187866, "learning_rate": 1.5930921783010336e-05, "loss": 0.2938, "step": 13365 }, { "epoch": 0.2980156437780597, "grad_norm": 0.6033828258514404, "learning_rate": 1.592810238732161e-05, "loss": 0.2937, "step": 13370 }, { "epoch": 0.29812709315867975, "grad_norm": 0.8528613448143005, "learning_rate": 1.5925282264893283e-05, "loss": 0.3686, "step": 13375 }, { "epoch": 0.2982385425392998, "grad_norm": 0.6302016973495483, "learning_rate": 1.592246141607109e-05, "loss": 0.3047, "step": 13380 }, { "epoch": 0.2983499919199199, "grad_norm": 0.7522174715995789, "learning_rate": 1.5919639841200843e-05, "loss": 0.2914, "step": 13385 }, { "epoch": 0.29846144130054, "grad_norm": 0.8643938899040222, "learning_rate": 1.591681754062844e-05, "loss": 0.4014, "step": 13390 }, { "epoch": 0.29857289068116005, "grad_norm": 0.5454531311988831, "learning_rate": 1.5913994514699883e-05, "loss": 0.3496, "step": 13395 }, { "epoch": 0.29868434006178013, "grad_norm": 0.5838807225227356, "learning_rate": 1.591117076376125e-05, "loss": 0.3131, "step": 13400 }, { "epoch": 0.29879578944240015, "grad_norm": 0.4497433602809906, "learning_rate": 1.59083462881587e-05, "loss": 0.4624, "step": 13405 }, { "epoch": 0.2989072388230202, "grad_norm": 0.5543870329856873, "learning_rate": 1.59055210882385e-05, "loss": 0.3888, "step": 13410 }, { "epoch": 0.2990186882036403, "grad_norm": 0.6825234293937683, "learning_rate": 1.5902695164347007e-05, "loss": 0.3123, "step": 13415 }, { "epoch": 0.2991301375842604, "grad_norm": 0.42260441184043884, "learning_rate": 1.5899868516830643e-05, "loss": 0.2898, "step": 13420 }, { "epoch": 0.29924158696488046, "grad_norm": 0.6578460335731506, "learning_rate": 1.589704114603594e-05, "loss": 0.3062, "step": 13425 }, { "epoch": 0.29935303634550053, "grad_norm": 0.6747757196426392, "learning_rate": 1.589421305230951e-05, "loss": 0.276, "step": 13430 }, { "epoch": 0.29946448572612056, "grad_norm": 0.6671870350837708, "learning_rate": 1.5891384235998058e-05, "loss": 0.332, "step": 13435 }, { "epoch": 0.29957593510674063, "grad_norm": 0.5130128860473633, "learning_rate": 1.5888554697448372e-05, "loss": 0.2472, "step": 13440 }, { "epoch": 0.2996873844873607, "grad_norm": 0.525191605091095, "learning_rate": 1.5885724437007332e-05, "loss": 0.3166, "step": 13445 }, { "epoch": 0.2997988338679808, "grad_norm": 0.5181118845939636, "learning_rate": 1.5882893455021906e-05, "loss": 0.2823, "step": 13450 }, { "epoch": 0.29991028324860086, "grad_norm": 0.8248529434204102, "learning_rate": 1.5880061751839153e-05, "loss": 0.4481, "step": 13455 }, { "epoch": 0.30002173262922094, "grad_norm": 0.509216845035553, "learning_rate": 1.5877229327806217e-05, "loss": 0.282, "step": 13460 }, { "epoch": 0.30013318200984096, "grad_norm": 0.791086733341217, "learning_rate": 1.587439618327033e-05, "loss": 0.4165, "step": 13465 }, { "epoch": 0.30024463139046104, "grad_norm": 0.39396199584007263, "learning_rate": 1.5871562318578814e-05, "loss": 0.3594, "step": 13470 }, { "epoch": 0.3003560807710811, "grad_norm": 0.6425427198410034, "learning_rate": 1.5868727734079078e-05, "loss": 0.3538, "step": 13475 }, { "epoch": 0.3004675301517012, "grad_norm": 0.5383874177932739, "learning_rate": 1.5865892430118623e-05, "loss": 0.3805, "step": 13480 }, { "epoch": 0.30057897953232127, "grad_norm": 0.529425323009491, "learning_rate": 1.5863056407045034e-05, "loss": 0.4096, "step": 13485 }, { "epoch": 0.30069042891294134, "grad_norm": 0.5081347227096558, "learning_rate": 1.5860219665205985e-05, "loss": 0.2414, "step": 13490 }, { "epoch": 0.30080187829356136, "grad_norm": 0.5437273979187012, "learning_rate": 1.585738220494924e-05, "loss": 0.3649, "step": 13495 }, { "epoch": 0.30091332767418144, "grad_norm": 0.6691897511482239, "learning_rate": 1.5854544026622648e-05, "loss": 0.4306, "step": 13500 }, { "epoch": 0.3010247770548015, "grad_norm": 0.5725626945495605, "learning_rate": 1.5851705130574147e-05, "loss": 0.3724, "step": 13505 }, { "epoch": 0.3011362264354216, "grad_norm": 1.0486339330673218, "learning_rate": 1.5848865517151762e-05, "loss": 0.3816, "step": 13510 }, { "epoch": 0.30124767581604167, "grad_norm": 0.5338178873062134, "learning_rate": 1.584602518670362e-05, "loss": 0.4815, "step": 13515 }, { "epoch": 0.3013591251966617, "grad_norm": 0.4364151656627655, "learning_rate": 1.5843184139577908e-05, "loss": 0.2972, "step": 13520 }, { "epoch": 0.30147057457728177, "grad_norm": 0.43141815066337585, "learning_rate": 1.5840342376122927e-05, "loss": 0.2521, "step": 13525 }, { "epoch": 0.30158202395790185, "grad_norm": 0.5989120006561279, "learning_rate": 1.5837499896687048e-05, "loss": 0.3116, "step": 13530 }, { "epoch": 0.3016934733385219, "grad_norm": 0.7535663843154907, "learning_rate": 1.583465670161874e-05, "loss": 0.3741, "step": 13535 }, { "epoch": 0.301804922719142, "grad_norm": 0.5637946724891663, "learning_rate": 1.5831812791266557e-05, "loss": 0.2601, "step": 13540 }, { "epoch": 0.3019163720997621, "grad_norm": 0.6944532990455627, "learning_rate": 1.582896816597914e-05, "loss": 0.3492, "step": 13545 }, { "epoch": 0.3020278214803821, "grad_norm": 0.40424805879592896, "learning_rate": 1.5826122826105224e-05, "loss": 0.2823, "step": 13550 }, { "epoch": 0.3021392708610022, "grad_norm": 0.41228148341178894, "learning_rate": 1.5823276771993617e-05, "loss": 0.3248, "step": 13555 }, { "epoch": 0.30225072024162225, "grad_norm": 0.5896701812744141, "learning_rate": 1.5820430003993226e-05, "loss": 0.3773, "step": 13560 }, { "epoch": 0.3023621696222423, "grad_norm": 0.6878036856651306, "learning_rate": 1.5817582522453042e-05, "loss": 0.3773, "step": 13565 }, { "epoch": 0.3024736190028624, "grad_norm": 0.5029690265655518, "learning_rate": 1.581473432772215e-05, "loss": 0.4281, "step": 13570 }, { "epoch": 0.3025850683834825, "grad_norm": 0.4946044385433197, "learning_rate": 1.581188542014971e-05, "loss": 0.3541, "step": 13575 }, { "epoch": 0.3026965177641025, "grad_norm": 0.4801938235759735, "learning_rate": 1.5809035800084974e-05, "loss": 0.4324, "step": 13580 }, { "epoch": 0.3028079671447226, "grad_norm": 0.5312267541885376, "learning_rate": 1.5806185467877293e-05, "loss": 0.32, "step": 13585 }, { "epoch": 0.30291941652534266, "grad_norm": 0.8016061782836914, "learning_rate": 1.5803334423876088e-05, "loss": 0.4019, "step": 13590 }, { "epoch": 0.30303086590596273, "grad_norm": 0.7053002715110779, "learning_rate": 1.580048266843088e-05, "loss": 0.3772, "step": 13595 }, { "epoch": 0.3031423152865828, "grad_norm": 0.5228629112243652, "learning_rate": 1.5797630201891267e-05, "loss": 0.5045, "step": 13600 }, { "epoch": 0.3032537646672029, "grad_norm": 0.5898446440696716, "learning_rate": 1.579477702460694e-05, "loss": 0.4242, "step": 13605 }, { "epoch": 0.3033652140478229, "grad_norm": 0.6774618625640869, "learning_rate": 1.5791923136927684e-05, "loss": 0.3472, "step": 13610 }, { "epoch": 0.303476663428443, "grad_norm": 0.36860859394073486, "learning_rate": 1.5789068539203356e-05, "loss": 0.3184, "step": 13615 }, { "epoch": 0.30358811280906306, "grad_norm": 0.4877468943595886, "learning_rate": 1.578621323178391e-05, "loss": 0.34, "step": 13620 }, { "epoch": 0.30369956218968314, "grad_norm": 0.5136805176734924, "learning_rate": 1.5783357215019383e-05, "loss": 0.3786, "step": 13625 }, { "epoch": 0.3038110115703032, "grad_norm": 0.8331414461135864, "learning_rate": 1.5780500489259907e-05, "loss": 0.3808, "step": 13630 }, { "epoch": 0.3039224609509233, "grad_norm": 0.7533968687057495, "learning_rate": 1.5777643054855684e-05, "loss": 0.298, "step": 13635 }, { "epoch": 0.3040339103315433, "grad_norm": 0.6575278639793396, "learning_rate": 1.577478491215702e-05, "loss": 0.355, "step": 13640 }, { "epoch": 0.3041453597121634, "grad_norm": 0.5389758348464966, "learning_rate": 1.5771926061514302e-05, "loss": 0.3802, "step": 13645 }, { "epoch": 0.30425680909278346, "grad_norm": 0.5021669864654541, "learning_rate": 1.5769066503278e-05, "loss": 0.2789, "step": 13650 }, { "epoch": 0.30436825847340354, "grad_norm": 0.7188048362731934, "learning_rate": 1.5766206237798677e-05, "loss": 0.3787, "step": 13655 }, { "epoch": 0.3044797078540236, "grad_norm": 0.5826491713523865, "learning_rate": 1.5763345265426978e-05, "loss": 0.3537, "step": 13660 }, { "epoch": 0.3045911572346437, "grad_norm": 0.5045832395553589, "learning_rate": 1.5760483586513632e-05, "loss": 0.3441, "step": 13665 }, { "epoch": 0.3047026066152637, "grad_norm": 0.7873217463493347, "learning_rate": 1.5757621201409463e-05, "loss": 0.4069, "step": 13670 }, { "epoch": 0.3048140559958838, "grad_norm": 0.5248357653617859, "learning_rate": 1.575475811046538e-05, "loss": 0.3679, "step": 13675 }, { "epoch": 0.30492550537650387, "grad_norm": 0.6609611511230469, "learning_rate": 1.575189431403237e-05, "loss": 0.3589, "step": 13680 }, { "epoch": 0.30503695475712395, "grad_norm": 0.5051944851875305, "learning_rate": 1.5749029812461515e-05, "loss": 0.269, "step": 13685 }, { "epoch": 0.305148404137744, "grad_norm": 0.6227126121520996, "learning_rate": 1.5746164606103983e-05, "loss": 0.3709, "step": 13690 }, { "epoch": 0.3052598535183641, "grad_norm": 0.5263267159461975, "learning_rate": 1.574329869531102e-05, "loss": 0.2782, "step": 13695 }, { "epoch": 0.3053713028989841, "grad_norm": 0.5302555561065674, "learning_rate": 1.5740432080433974e-05, "loss": 0.2429, "step": 13700 }, { "epoch": 0.3054827522796042, "grad_norm": 0.5816635489463806, "learning_rate": 1.5737564761824257e-05, "loss": 0.2038, "step": 13705 }, { "epoch": 0.3055942016602243, "grad_norm": 0.5670195817947388, "learning_rate": 1.5734696739833392e-05, "loss": 0.4132, "step": 13710 }, { "epoch": 0.30570565104084435, "grad_norm": 0.5609695315361023, "learning_rate": 1.573182801481297e-05, "loss": 0.3791, "step": 13715 }, { "epoch": 0.3058171004214644, "grad_norm": 0.7394962310791016, "learning_rate": 1.5728958587114677e-05, "loss": 0.3235, "step": 13720 }, { "epoch": 0.30592854980208445, "grad_norm": 0.6420606970787048, "learning_rate": 1.5726088457090284e-05, "loss": 0.3705, "step": 13725 }, { "epoch": 0.3060399991827045, "grad_norm": 0.5575365424156189, "learning_rate": 1.5723217625091645e-05, "loss": 0.3184, "step": 13730 }, { "epoch": 0.3061514485633246, "grad_norm": 0.5209715366363525, "learning_rate": 1.5720346091470697e-05, "loss": 0.3651, "step": 13735 }, { "epoch": 0.3062628979439447, "grad_norm": 0.6402333974838257, "learning_rate": 1.5717473856579475e-05, "loss": 0.3487, "step": 13740 }, { "epoch": 0.30637434732456476, "grad_norm": 0.5436682105064392, "learning_rate": 1.571460092077009e-05, "loss": 0.3645, "step": 13745 }, { "epoch": 0.30648579670518483, "grad_norm": 0.5848494172096252, "learning_rate": 1.5711727284394745e-05, "loss": 0.2991, "step": 13750 }, { "epoch": 0.30659724608580485, "grad_norm": 0.6025956273078918, "learning_rate": 1.5708852947805717e-05, "loss": 0.3323, "step": 13755 }, { "epoch": 0.30670869546642493, "grad_norm": 0.44370484352111816, "learning_rate": 1.5705977911355388e-05, "loss": 0.3734, "step": 13760 }, { "epoch": 0.306820144847045, "grad_norm": 0.7109201550483704, "learning_rate": 1.5703102175396208e-05, "loss": 0.316, "step": 13765 }, { "epoch": 0.3069315942276651, "grad_norm": 0.6673081517219543, "learning_rate": 1.5700225740280725e-05, "loss": 0.2519, "step": 13770 }, { "epoch": 0.30704304360828516, "grad_norm": 0.41183361411094666, "learning_rate": 1.5697348606361564e-05, "loss": 0.4071, "step": 13775 }, { "epoch": 0.30715449298890524, "grad_norm": 0.6063647270202637, "learning_rate": 1.5694470773991438e-05, "loss": 0.384, "step": 13780 }, { "epoch": 0.30726594236952526, "grad_norm": 0.5366505980491638, "learning_rate": 1.5691592243523154e-05, "loss": 0.4145, "step": 13785 }, { "epoch": 0.30737739175014533, "grad_norm": 0.5829271078109741, "learning_rate": 1.5688713015309592e-05, "loss": 0.4262, "step": 13790 }, { "epoch": 0.3074888411307654, "grad_norm": 0.7547322511672974, "learning_rate": 1.5685833089703718e-05, "loss": 0.2761, "step": 13795 }, { "epoch": 0.3076002905113855, "grad_norm": 0.5357990860939026, "learning_rate": 1.5682952467058603e-05, "loss": 0.3992, "step": 13800 }, { "epoch": 0.30771173989200556, "grad_norm": 0.44748038053512573, "learning_rate": 1.568007114772738e-05, "loss": 0.3155, "step": 13805 }, { "epoch": 0.30782318927262564, "grad_norm": 0.6524558067321777, "learning_rate": 1.5677189132063278e-05, "loss": 0.3054, "step": 13810 }, { "epoch": 0.30793463865324566, "grad_norm": 0.42701128125190735, "learning_rate": 1.5674306420419606e-05, "loss": 0.3253, "step": 13815 }, { "epoch": 0.30804608803386574, "grad_norm": 0.417032390832901, "learning_rate": 1.567142301314977e-05, "loss": 0.4835, "step": 13820 }, { "epoch": 0.3081575374144858, "grad_norm": 0.6458917260169983, "learning_rate": 1.5668538910607244e-05, "loss": 0.3511, "step": 13825 }, { "epoch": 0.3082689867951059, "grad_norm": 0.42670947313308716, "learning_rate": 1.5665654113145606e-05, "loss": 0.352, "step": 13830 }, { "epoch": 0.30838043617572597, "grad_norm": 0.37725549936294556, "learning_rate": 1.5662768621118507e-05, "loss": 0.4173, "step": 13835 }, { "epoch": 0.30849188555634605, "grad_norm": 0.5069001317024231, "learning_rate": 1.565988243487968e-05, "loss": 0.2881, "step": 13840 }, { "epoch": 0.30860333493696607, "grad_norm": 0.684617817401886, "learning_rate": 1.5656995554782957e-05, "loss": 0.408, "step": 13845 }, { "epoch": 0.30871478431758614, "grad_norm": 0.729494035243988, "learning_rate": 1.5654107981182247e-05, "loss": 0.3553, "step": 13850 }, { "epoch": 0.3088262336982062, "grad_norm": 0.5434486269950867, "learning_rate": 1.565121971443154e-05, "loss": 0.303, "step": 13855 }, { "epoch": 0.3089376830788263, "grad_norm": 0.6943069100379944, "learning_rate": 1.5648330754884917e-05, "loss": 0.3397, "step": 13860 }, { "epoch": 0.3090491324594464, "grad_norm": 0.49677422642707825, "learning_rate": 1.5645441102896536e-05, "loss": 0.3348, "step": 13865 }, { "epoch": 0.30916058184006645, "grad_norm": 0.5370576977729797, "learning_rate": 1.5642550758820657e-05, "loss": 0.4424, "step": 13870 }, { "epoch": 0.30927203122068647, "grad_norm": 0.7557622194290161, "learning_rate": 1.563965972301161e-05, "loss": 0.3526, "step": 13875 }, { "epoch": 0.30938348060130655, "grad_norm": 0.39046844840049744, "learning_rate": 1.563676799582381e-05, "loss": 0.3893, "step": 13880 }, { "epoch": 0.3094949299819266, "grad_norm": 0.5151693820953369, "learning_rate": 1.5633875577611765e-05, "loss": 0.4531, "step": 13885 }, { "epoch": 0.3096063793625467, "grad_norm": 0.5232051014900208, "learning_rate": 1.563098246873006e-05, "loss": 0.4816, "step": 13890 }, { "epoch": 0.3097178287431668, "grad_norm": 0.6085663437843323, "learning_rate": 1.562808866953337e-05, "loss": 0.4044, "step": 13895 }, { "epoch": 0.3098292781237868, "grad_norm": 0.7641314268112183, "learning_rate": 1.5625194180376446e-05, "loss": 0.3029, "step": 13900 }, { "epoch": 0.3099407275044069, "grad_norm": 0.4355248212814331, "learning_rate": 1.5622299001614138e-05, "loss": 0.368, "step": 13905 }, { "epoch": 0.31005217688502695, "grad_norm": 0.5732108354568481, "learning_rate": 1.561940313360137e-05, "loss": 0.4079, "step": 13910 }, { "epoch": 0.31016362626564703, "grad_norm": 0.6047371625900269, "learning_rate": 1.5616506576693155e-05, "loss": 0.3951, "step": 13915 }, { "epoch": 0.3102750756462671, "grad_norm": 0.6488727331161499, "learning_rate": 1.5613609331244584e-05, "loss": 0.3739, "step": 13920 }, { "epoch": 0.3103865250268872, "grad_norm": 0.9942343235015869, "learning_rate": 1.561071139761084e-05, "loss": 0.2652, "step": 13925 }, { "epoch": 0.3104979744075072, "grad_norm": 0.6689128279685974, "learning_rate": 1.5607812776147192e-05, "loss": 0.3878, "step": 13930 }, { "epoch": 0.3106094237881273, "grad_norm": 0.6173014044761658, "learning_rate": 1.5604913467208977e-05, "loss": 0.1834, "step": 13935 }, { "epoch": 0.31072087316874736, "grad_norm": 0.48626089096069336, "learning_rate": 1.5602013471151634e-05, "loss": 0.2309, "step": 13940 }, { "epoch": 0.31083232254936743, "grad_norm": 0.431443452835083, "learning_rate": 1.5599112788330685e-05, "loss": 0.2777, "step": 13945 }, { "epoch": 0.3109437719299875, "grad_norm": 0.7525061368942261, "learning_rate": 1.5596211419101723e-05, "loss": 0.4271, "step": 13950 }, { "epoch": 0.3110552213106076, "grad_norm": 0.5650204420089722, "learning_rate": 1.5593309363820437e-05, "loss": 0.3672, "step": 13955 }, { "epoch": 0.3111666706912276, "grad_norm": 0.471719890832901, "learning_rate": 1.55904066228426e-05, "loss": 0.4063, "step": 13960 }, { "epoch": 0.3112781200718477, "grad_norm": 0.4454389810562134, "learning_rate": 1.558750319652406e-05, "loss": 0.4041, "step": 13965 }, { "epoch": 0.31138956945246776, "grad_norm": 0.6640139222145081, "learning_rate": 1.5584599085220754e-05, "loss": 0.2817, "step": 13970 }, { "epoch": 0.31150101883308784, "grad_norm": 0.8577756285667419, "learning_rate": 1.558169428928871e-05, "loss": 0.3628, "step": 13975 }, { "epoch": 0.3116124682137079, "grad_norm": 0.6317833065986633, "learning_rate": 1.5578788809084033e-05, "loss": 0.2419, "step": 13980 }, { "epoch": 0.311723917594328, "grad_norm": 0.6040417551994324, "learning_rate": 1.5575882644962902e-05, "loss": 0.3732, "step": 13985 }, { "epoch": 0.311835366974948, "grad_norm": 0.5538058280944824, "learning_rate": 1.5572975797281603e-05, "loss": 0.3231, "step": 13990 }, { "epoch": 0.3119468163555681, "grad_norm": 0.566226065158844, "learning_rate": 1.557006826639649e-05, "loss": 0.3924, "step": 13995 }, { "epoch": 0.31205826573618817, "grad_norm": 0.46765801310539246, "learning_rate": 1.5567160052664002e-05, "loss": 0.3625, "step": 14000 }, { "epoch": 0.31216971511680824, "grad_norm": 0.5231557488441467, "learning_rate": 1.556425115644066e-05, "loss": 0.3253, "step": 14005 }, { "epoch": 0.3122811644974283, "grad_norm": 0.495217889547348, "learning_rate": 1.5561341578083075e-05, "loss": 0.275, "step": 14010 }, { "epoch": 0.3123926138780484, "grad_norm": 0.5250295996665955, "learning_rate": 1.5558431317947943e-05, "loss": 0.487, "step": 14015 }, { "epoch": 0.3125040632586684, "grad_norm": 0.7416672110557556, "learning_rate": 1.555552037639203e-05, "loss": 0.3936, "step": 14020 }, { "epoch": 0.3126155126392885, "grad_norm": 0.5672133564949036, "learning_rate": 1.5552608753772207e-05, "loss": 0.4152, "step": 14025 }, { "epoch": 0.31272696201990857, "grad_norm": 0.5304513573646545, "learning_rate": 1.554969645044541e-05, "loss": 0.3631, "step": 14030 }, { "epoch": 0.31283841140052865, "grad_norm": 0.45727986097335815, "learning_rate": 1.554678346676866e-05, "loss": 0.3183, "step": 14035 }, { "epoch": 0.3129498607811487, "grad_norm": 0.5454281568527222, "learning_rate": 1.5543869803099077e-05, "loss": 0.3309, "step": 14040 }, { "epoch": 0.3130613101617688, "grad_norm": 0.5051997900009155, "learning_rate": 1.5540955459793847e-05, "loss": 0.1792, "step": 14045 }, { "epoch": 0.3131727595423888, "grad_norm": 0.6837823987007141, "learning_rate": 1.5538040437210247e-05, "loss": 0.4055, "step": 14050 }, { "epoch": 0.3132842089230089, "grad_norm": 0.6566972136497498, "learning_rate": 1.5535124735705634e-05, "loss": 0.264, "step": 14055 }, { "epoch": 0.313395658303629, "grad_norm": 0.5439302325248718, "learning_rate": 1.5532208355637454e-05, "loss": 0.3285, "step": 14060 }, { "epoch": 0.31350710768424905, "grad_norm": 0.5949956774711609, "learning_rate": 1.5529291297363235e-05, "loss": 0.3884, "step": 14065 }, { "epoch": 0.31361855706486913, "grad_norm": 0.6063193082809448, "learning_rate": 1.552637356124058e-05, "loss": 0.3661, "step": 14070 }, { "epoch": 0.3137300064454892, "grad_norm": 0.4883115291595459, "learning_rate": 1.5523455147627182e-05, "loss": 0.3501, "step": 14075 }, { "epoch": 0.31384145582610923, "grad_norm": 0.6186197996139526, "learning_rate": 1.552053605688082e-05, "loss": 0.3345, "step": 14080 }, { "epoch": 0.3139529052067293, "grad_norm": 0.678356945514679, "learning_rate": 1.551761628935935e-05, "loss": 0.3084, "step": 14085 }, { "epoch": 0.3140643545873494, "grad_norm": 0.7062575221061707, "learning_rate": 1.551469584542071e-05, "loss": 0.3178, "step": 14090 }, { "epoch": 0.31417580396796946, "grad_norm": 0.4921148419380188, "learning_rate": 1.5511774725422924e-05, "loss": 0.3214, "step": 14095 }, { "epoch": 0.31428725334858953, "grad_norm": 0.6326109170913696, "learning_rate": 1.5508852929724107e-05, "loss": 0.3674, "step": 14100 }, { "epoch": 0.31439870272920956, "grad_norm": 0.412112832069397, "learning_rate": 1.550593045868244e-05, "loss": 0.3656, "step": 14105 }, { "epoch": 0.31451015210982963, "grad_norm": 0.46066349744796753, "learning_rate": 1.5503007312656198e-05, "loss": 0.294, "step": 14110 }, { "epoch": 0.3146216014904497, "grad_norm": 0.5353672504425049, "learning_rate": 1.5500083492003737e-05, "loss": 0.271, "step": 14115 }, { "epoch": 0.3147330508710698, "grad_norm": 0.5529379844665527, "learning_rate": 1.5497158997083492e-05, "loss": 0.2766, "step": 14120 }, { "epoch": 0.31484450025168986, "grad_norm": 0.5734739899635315, "learning_rate": 1.5494233828253985e-05, "loss": 0.3251, "step": 14125 }, { "epoch": 0.31495594963230994, "grad_norm": 0.5877709984779358, "learning_rate": 1.5491307985873822e-05, "loss": 0.2302, "step": 14130 }, { "epoch": 0.31506739901292996, "grad_norm": 0.4589202404022217, "learning_rate": 1.5488381470301685e-05, "loss": 0.3152, "step": 14135 }, { "epoch": 0.31517884839355004, "grad_norm": 0.5067848563194275, "learning_rate": 1.5485454281896346e-05, "loss": 0.3822, "step": 14140 }, { "epoch": 0.3152902977741701, "grad_norm": 0.6330682635307312, "learning_rate": 1.548252642101665e-05, "loss": 0.3133, "step": 14145 }, { "epoch": 0.3154017471547902, "grad_norm": 0.6754536628723145, "learning_rate": 1.5479597888021537e-05, "loss": 0.2672, "step": 14150 }, { "epoch": 0.31551319653541027, "grad_norm": 0.7370808124542236, "learning_rate": 1.5476668683270016e-05, "loss": 0.3993, "step": 14155 }, { "epoch": 0.31562464591603034, "grad_norm": 2.0108695030212402, "learning_rate": 1.547373880712119e-05, "loss": 0.3878, "step": 14160 }, { "epoch": 0.31573609529665037, "grad_norm": 0.3049313724040985, "learning_rate": 1.5470808259934235e-05, "loss": 0.3542, "step": 14165 }, { "epoch": 0.31584754467727044, "grad_norm": 0.5608581900596619, "learning_rate": 1.5467877042068415e-05, "loss": 0.4755, "step": 14170 }, { "epoch": 0.3159589940578905, "grad_norm": 0.6225017309188843, "learning_rate": 1.5464945153883076e-05, "loss": 0.4222, "step": 14175 }, { "epoch": 0.3160704434385106, "grad_norm": 0.7026190161705017, "learning_rate": 1.5462012595737643e-05, "loss": 0.4397, "step": 14180 }, { "epoch": 0.31618189281913067, "grad_norm": 0.6331912279129028, "learning_rate": 1.5459079367991626e-05, "loss": 0.4302, "step": 14185 }, { "epoch": 0.31629334219975075, "grad_norm": 0.4649040102958679, "learning_rate": 1.545614547100462e-05, "loss": 0.3459, "step": 14190 }, { "epoch": 0.31640479158037077, "grad_norm": 0.6613979339599609, "learning_rate": 1.545321090513629e-05, "loss": 0.2516, "step": 14195 }, { "epoch": 0.31651624096099085, "grad_norm": 0.6959325671195984, "learning_rate": 1.54502756707464e-05, "loss": 0.3448, "step": 14200 }, { "epoch": 0.3166276903416109, "grad_norm": 0.41933050751686096, "learning_rate": 1.5447339768194777e-05, "loss": 0.2984, "step": 14205 }, { "epoch": 0.316739139722231, "grad_norm": 0.7498952150344849, "learning_rate": 1.5444403197841345e-05, "loss": 0.3256, "step": 14210 }, { "epoch": 0.3168505891028511, "grad_norm": 0.470032662153244, "learning_rate": 1.544146596004611e-05, "loss": 0.4236, "step": 14215 }, { "epoch": 0.31696203848347115, "grad_norm": 0.527711808681488, "learning_rate": 1.5438528055169148e-05, "loss": 0.3678, "step": 14220 }, { "epoch": 0.3170734878640912, "grad_norm": 0.7611703872680664, "learning_rate": 1.5435589483570627e-05, "loss": 0.3765, "step": 14225 }, { "epoch": 0.31718493724471125, "grad_norm": 0.7579947710037231, "learning_rate": 1.5432650245610788e-05, "loss": 0.3067, "step": 14230 }, { "epoch": 0.31729638662533133, "grad_norm": 0.559687077999115, "learning_rate": 1.5429710341649963e-05, "loss": 0.3298, "step": 14235 }, { "epoch": 0.3174078360059514, "grad_norm": 0.6190071702003479, "learning_rate": 1.5426769772048567e-05, "loss": 0.429, "step": 14240 }, { "epoch": 0.3175192853865715, "grad_norm": 0.5103845000267029, "learning_rate": 1.542382853716708e-05, "loss": 0.3116, "step": 14245 }, { "epoch": 0.31763073476719156, "grad_norm": 0.5446650981903076, "learning_rate": 1.542088663736608e-05, "loss": 0.3435, "step": 14250 }, { "epoch": 0.3177421841478116, "grad_norm": 0.49055203795433044, "learning_rate": 1.541794407300622e-05, "loss": 0.29, "step": 14255 }, { "epoch": 0.31785363352843166, "grad_norm": 0.5283357501029968, "learning_rate": 1.5415000844448244e-05, "loss": 0.3255, "step": 14260 }, { "epoch": 0.31796508290905173, "grad_norm": 0.7132277488708496, "learning_rate": 1.5412056952052955e-05, "loss": 0.4841, "step": 14265 }, { "epoch": 0.3180765322896718, "grad_norm": 0.7739210724830627, "learning_rate": 1.5409112396181257e-05, "loss": 0.2972, "step": 14270 }, { "epoch": 0.3181879816702919, "grad_norm": 0.6999651193618774, "learning_rate": 1.5406167177194134e-05, "loss": 0.2844, "step": 14275 }, { "epoch": 0.3182994310509119, "grad_norm": 0.6541236042976379, "learning_rate": 1.5403221295452647e-05, "loss": 0.377, "step": 14280 }, { "epoch": 0.318410880431532, "grad_norm": 0.5203682780265808, "learning_rate": 1.540027475131793e-05, "loss": 0.4102, "step": 14285 }, { "epoch": 0.31852232981215206, "grad_norm": 0.4930424988269806, "learning_rate": 1.5397327545151214e-05, "loss": 0.2815, "step": 14290 }, { "epoch": 0.31863377919277214, "grad_norm": 0.8869831562042236, "learning_rate": 1.5394379677313805e-05, "loss": 0.528, "step": 14295 }, { "epoch": 0.3187452285733922, "grad_norm": 0.5916035175323486, "learning_rate": 1.5391431148167084e-05, "loss": 0.4043, "step": 14300 }, { "epoch": 0.3188566779540123, "grad_norm": 0.538135290145874, "learning_rate": 1.5388481958072517e-05, "loss": 0.3168, "step": 14305 }, { "epoch": 0.3189681273346323, "grad_norm": 0.3676489591598511, "learning_rate": 1.538553210739166e-05, "loss": 0.2399, "step": 14310 }, { "epoch": 0.3190795767152524, "grad_norm": 0.5923545360565186, "learning_rate": 1.5382581596486133e-05, "loss": 0.3974, "step": 14315 }, { "epoch": 0.31919102609587247, "grad_norm": 0.47751757502555847, "learning_rate": 1.537963042571765e-05, "loss": 0.2995, "step": 14320 }, { "epoch": 0.31930247547649254, "grad_norm": 0.8567208051681519, "learning_rate": 1.5376678595448002e-05, "loss": 0.268, "step": 14325 }, { "epoch": 0.3194139248571126, "grad_norm": 0.3531752824783325, "learning_rate": 1.537372610603906e-05, "loss": 0.277, "step": 14330 }, { "epoch": 0.3195253742377327, "grad_norm": 0.5776049494743347, "learning_rate": 1.5370772957852774e-05, "loss": 0.2297, "step": 14335 }, { "epoch": 0.3196368236183527, "grad_norm": 0.6118939518928528, "learning_rate": 1.536781915125118e-05, "loss": 0.2609, "step": 14340 }, { "epoch": 0.3197482729989728, "grad_norm": 0.6777652502059937, "learning_rate": 1.5364864686596393e-05, "loss": 0.3072, "step": 14345 }, { "epoch": 0.31985972237959287, "grad_norm": 0.547516942024231, "learning_rate": 1.5361909564250606e-05, "loss": 0.3243, "step": 14350 }, { "epoch": 0.31997117176021295, "grad_norm": 0.6201531887054443, "learning_rate": 1.5358953784576093e-05, "loss": 0.3129, "step": 14355 }, { "epoch": 0.320082621140833, "grad_norm": 0.3952672481536865, "learning_rate": 1.5355997347935207e-05, "loss": 0.3897, "step": 14360 }, { "epoch": 0.3201940705214531, "grad_norm": 0.6198890805244446, "learning_rate": 1.5353040254690396e-05, "loss": 0.3446, "step": 14365 }, { "epoch": 0.3203055199020731, "grad_norm": 0.5419462323188782, "learning_rate": 1.5350082505204163e-05, "loss": 0.3506, "step": 14370 }, { "epoch": 0.3204169692826932, "grad_norm": 0.564018964767456, "learning_rate": 1.5347124099839113e-05, "loss": 0.3479, "step": 14375 }, { "epoch": 0.3205284186633133, "grad_norm": 0.8541130423545837, "learning_rate": 1.534416503895792e-05, "loss": 0.3654, "step": 14380 }, { "epoch": 0.32063986804393335, "grad_norm": 0.5771231055259705, "learning_rate": 1.5341205322923344e-05, "loss": 0.3991, "step": 14385 }, { "epoch": 0.32075131742455343, "grad_norm": 0.8024408221244812, "learning_rate": 1.5338244952098228e-05, "loss": 0.3302, "step": 14390 }, { "epoch": 0.3208627668051735, "grad_norm": 0.649458110332489, "learning_rate": 1.533528392684548e-05, "loss": 0.419, "step": 14395 }, { "epoch": 0.3209742161857935, "grad_norm": 0.39435210824012756, "learning_rate": 1.5332322247528105e-05, "loss": 0.4264, "step": 14400 }, { "epoch": 0.3210856655664136, "grad_norm": 0.5552257895469666, "learning_rate": 1.5329359914509188e-05, "loss": 0.3141, "step": 14405 }, { "epoch": 0.3211971149470337, "grad_norm": 0.6719105243682861, "learning_rate": 1.5326396928151875e-05, "loss": 0.2793, "step": 14410 }, { "epoch": 0.32130856432765376, "grad_norm": 0.52122563123703, "learning_rate": 1.5323433288819414e-05, "loss": 0.2675, "step": 14415 }, { "epoch": 0.32142001370827383, "grad_norm": 0.5365925431251526, "learning_rate": 1.5320468996875125e-05, "loss": 0.4213, "step": 14420 }, { "epoch": 0.3215314630888939, "grad_norm": 0.5100508332252502, "learning_rate": 1.53175040526824e-05, "loss": 0.4192, "step": 14425 }, { "epoch": 0.32164291246951393, "grad_norm": 0.600318193435669, "learning_rate": 1.531453845660473e-05, "loss": 0.3496, "step": 14430 }, { "epoch": 0.321754361850134, "grad_norm": 0.5246594548225403, "learning_rate": 1.5311572209005662e-05, "loss": 0.3804, "step": 14435 }, { "epoch": 0.3218658112307541, "grad_norm": 0.5853553414344788, "learning_rate": 1.5308605310248837e-05, "loss": 0.446, "step": 14440 }, { "epoch": 0.32197726061137416, "grad_norm": 0.48245969414711, "learning_rate": 1.530563776069798e-05, "loss": 0.2857, "step": 14445 }, { "epoch": 0.32208870999199424, "grad_norm": 0.4787314832210541, "learning_rate": 1.5302669560716892e-05, "loss": 0.409, "step": 14450 }, { "epoch": 0.3222001593726143, "grad_norm": 0.40312430262565613, "learning_rate": 1.529970071066944e-05, "loss": 0.3718, "step": 14455 }, { "epoch": 0.32231160875323434, "grad_norm": 0.5702295899391174, "learning_rate": 1.529673121091959e-05, "loss": 0.3722, "step": 14460 }, { "epoch": 0.3224230581338544, "grad_norm": 0.5956227779388428, "learning_rate": 1.529376106183138e-05, "loss": 0.2894, "step": 14465 }, { "epoch": 0.3225345075144745, "grad_norm": 0.5669090747833252, "learning_rate": 1.529079026376892e-05, "loss": 0.2773, "step": 14470 }, { "epoch": 0.32264595689509457, "grad_norm": 1.0934644937515259, "learning_rate": 1.528781881709642e-05, "loss": 0.4747, "step": 14475 }, { "epoch": 0.32275740627571464, "grad_norm": 0.4202762544155121, "learning_rate": 1.5284846722178145e-05, "loss": 0.4186, "step": 14480 }, { "epoch": 0.32286885565633466, "grad_norm": 0.7583221793174744, "learning_rate": 1.5281873979378453e-05, "loss": 0.3649, "step": 14485 }, { "epoch": 0.32298030503695474, "grad_norm": 0.7628973126411438, "learning_rate": 1.5278900589061786e-05, "loss": 0.4465, "step": 14490 }, { "epoch": 0.3230917544175748, "grad_norm": 0.4392721354961395, "learning_rate": 1.527592655159265e-05, "loss": 0.3713, "step": 14495 }, { "epoch": 0.3232032037981949, "grad_norm": 0.5354841947555542, "learning_rate": 1.527295186733564e-05, "loss": 0.4206, "step": 14500 }, { "epoch": 0.32331465317881497, "grad_norm": 0.6088135838508606, "learning_rate": 1.5269976536655432e-05, "loss": 0.3972, "step": 14505 }, { "epoch": 0.32342610255943505, "grad_norm": 0.7526804208755493, "learning_rate": 1.5267000559916786e-05, "loss": 0.3269, "step": 14510 }, { "epoch": 0.32353755194005507, "grad_norm": 0.4515046775341034, "learning_rate": 1.526402393748452e-05, "loss": 0.3799, "step": 14515 }, { "epoch": 0.32364900132067514, "grad_norm": 0.5819978713989258, "learning_rate": 1.526104666972355e-05, "loss": 0.2439, "step": 14520 }, { "epoch": 0.3237604507012952, "grad_norm": 0.5523484945297241, "learning_rate": 1.5258068756998874e-05, "loss": 0.4368, "step": 14525 }, { "epoch": 0.3238719000819153, "grad_norm": 0.5664416551589966, "learning_rate": 1.5255090199675549e-05, "loss": 0.2417, "step": 14530 }, { "epoch": 0.3239833494625354, "grad_norm": 0.5326518416404724, "learning_rate": 1.525211099811873e-05, "loss": 0.2991, "step": 14535 }, { "epoch": 0.32409479884315545, "grad_norm": 0.5060669779777527, "learning_rate": 1.5249131152693644e-05, "loss": 0.4009, "step": 14540 }, { "epoch": 0.3242062482237755, "grad_norm": 0.5437809228897095, "learning_rate": 1.5246150663765597e-05, "loss": 0.3516, "step": 14545 }, { "epoch": 0.32431769760439555, "grad_norm": 0.47817185521125793, "learning_rate": 1.5243169531699966e-05, "loss": 0.312, "step": 14550 }, { "epoch": 0.3244291469850156, "grad_norm": 0.7032877206802368, "learning_rate": 1.5240187756862227e-05, "loss": 0.3905, "step": 14555 }, { "epoch": 0.3245405963656357, "grad_norm": 0.49623337388038635, "learning_rate": 1.5237205339617917e-05, "loss": 0.3021, "step": 14560 }, { "epoch": 0.3246520457462558, "grad_norm": 0.42450109124183655, "learning_rate": 1.5234222280332658e-05, "loss": 0.3865, "step": 14565 }, { "epoch": 0.32476349512687586, "grad_norm": 0.583397388458252, "learning_rate": 1.5231238579372145e-05, "loss": 0.4173, "step": 14570 }, { "epoch": 0.3248749445074959, "grad_norm": 0.40309199690818787, "learning_rate": 1.5228254237102164e-05, "loss": 0.3815, "step": 14575 }, { "epoch": 0.32498639388811595, "grad_norm": 0.6588791608810425, "learning_rate": 1.522526925388857e-05, "loss": 0.3177, "step": 14580 }, { "epoch": 0.32509784326873603, "grad_norm": 0.5301122665405273, "learning_rate": 1.5222283630097298e-05, "loss": 0.3032, "step": 14585 }, { "epoch": 0.3252092926493561, "grad_norm": 0.7074852585792542, "learning_rate": 1.5219297366094364e-05, "loss": 0.3527, "step": 14590 }, { "epoch": 0.3253207420299762, "grad_norm": 0.44125667214393616, "learning_rate": 1.5216310462245858e-05, "loss": 0.3452, "step": 14595 }, { "epoch": 0.32543219141059626, "grad_norm": 0.4817579686641693, "learning_rate": 1.5213322918917954e-05, "loss": 0.3088, "step": 14600 }, { "epoch": 0.3255436407912163, "grad_norm": 0.4762619733810425, "learning_rate": 1.5210334736476898e-05, "loss": 0.3784, "step": 14605 }, { "epoch": 0.32565509017183636, "grad_norm": 0.5899630784988403, "learning_rate": 1.5207345915289023e-05, "loss": 0.3804, "step": 14610 }, { "epoch": 0.32576653955245644, "grad_norm": 0.38415637612342834, "learning_rate": 1.5204356455720735e-05, "loss": 0.4236, "step": 14615 }, { "epoch": 0.3258779889330765, "grad_norm": 0.5962705016136169, "learning_rate": 1.5201366358138514e-05, "loss": 0.4957, "step": 14620 }, { "epoch": 0.3259894383136966, "grad_norm": 0.6160600781440735, "learning_rate": 1.5198375622908926e-05, "loss": 0.362, "step": 14625 }, { "epoch": 0.32610088769431667, "grad_norm": 0.5748515725135803, "learning_rate": 1.5195384250398614e-05, "loss": 0.3849, "step": 14630 }, { "epoch": 0.3262123370749367, "grad_norm": 0.6279200315475464, "learning_rate": 1.5192392240974296e-05, "loss": 0.4086, "step": 14635 }, { "epoch": 0.32632378645555676, "grad_norm": 0.671965479850769, "learning_rate": 1.5189399595002767e-05, "loss": 0.2291, "step": 14640 }, { "epoch": 0.32643523583617684, "grad_norm": 0.49965712428092957, "learning_rate": 1.5186406312850901e-05, "loss": 0.4113, "step": 14645 }, { "epoch": 0.3265466852167969, "grad_norm": 0.6102264523506165, "learning_rate": 1.518341239488566e-05, "loss": 0.3486, "step": 14650 }, { "epoch": 0.326658134597417, "grad_norm": 0.7976998686790466, "learning_rate": 1.5180417841474063e-05, "loss": 0.3541, "step": 14655 }, { "epoch": 0.326769583978037, "grad_norm": 0.7241818308830261, "learning_rate": 1.5177422652983227e-05, "loss": 0.3369, "step": 14660 }, { "epoch": 0.3268810333586571, "grad_norm": 0.4444121718406677, "learning_rate": 1.5174426829780337e-05, "loss": 0.2688, "step": 14665 }, { "epoch": 0.32699248273927717, "grad_norm": 0.5494623184204102, "learning_rate": 1.5171430372232661e-05, "loss": 0.4802, "step": 14670 }, { "epoch": 0.32710393211989724, "grad_norm": 0.8373895883560181, "learning_rate": 1.5168433280707536e-05, "loss": 0.382, "step": 14675 }, { "epoch": 0.3272153815005173, "grad_norm": 0.784369707107544, "learning_rate": 1.5165435555572386e-05, "loss": 0.3099, "step": 14680 }, { "epoch": 0.3273268308811374, "grad_norm": 0.6406736969947815, "learning_rate": 1.5162437197194707e-05, "loss": 0.4039, "step": 14685 }, { "epoch": 0.3274382802617574, "grad_norm": 0.4019657075405121, "learning_rate": 1.5159438205942078e-05, "loss": 0.2698, "step": 14690 }, { "epoch": 0.3275497296423775, "grad_norm": 0.5189796090126038, "learning_rate": 1.5156438582182147e-05, "loss": 0.3633, "step": 14695 }, { "epoch": 0.3276611790229976, "grad_norm": 0.6062685251235962, "learning_rate": 1.515343832628265e-05, "loss": 0.4351, "step": 14700 }, { "epoch": 0.32777262840361765, "grad_norm": 0.5360396504402161, "learning_rate": 1.515043743861139e-05, "loss": 0.3736, "step": 14705 }, { "epoch": 0.3278840777842377, "grad_norm": 0.5746144652366638, "learning_rate": 1.5147435919536254e-05, "loss": 0.2686, "step": 14710 }, { "epoch": 0.3279955271648578, "grad_norm": 0.515260636806488, "learning_rate": 1.5144433769425212e-05, "loss": 0.2981, "step": 14715 }, { "epoch": 0.3281069765454778, "grad_norm": 0.6790237426757812, "learning_rate": 1.5141430988646294e-05, "loss": 0.3976, "step": 14720 }, { "epoch": 0.3282184259260979, "grad_norm": 0.5881702899932861, "learning_rate": 1.5138427577567627e-05, "loss": 0.3953, "step": 14725 }, { "epoch": 0.328329875306718, "grad_norm": 0.4543622136116028, "learning_rate": 1.5135423536557397e-05, "loss": 0.4129, "step": 14730 }, { "epoch": 0.32844132468733805, "grad_norm": 0.7353951930999756, "learning_rate": 1.513241886598388e-05, "loss": 0.2967, "step": 14735 }, { "epoch": 0.32855277406795813, "grad_norm": 0.6022626757621765, "learning_rate": 1.5129413566215432e-05, "loss": 0.2963, "step": 14740 }, { "epoch": 0.3286642234485782, "grad_norm": 0.5606439709663391, "learning_rate": 1.5126407637620469e-05, "loss": 0.3192, "step": 14745 }, { "epoch": 0.32877567282919823, "grad_norm": 0.6193286180496216, "learning_rate": 1.51234010805675e-05, "loss": 0.3371, "step": 14750 }, { "epoch": 0.3288871222098183, "grad_norm": 0.5279430150985718, "learning_rate": 1.5120393895425108e-05, "loss": 0.2623, "step": 14755 }, { "epoch": 0.3289985715904384, "grad_norm": 1.206921100616455, "learning_rate": 1.5117386082561947e-05, "loss": 0.2733, "step": 14760 }, { "epoch": 0.32911002097105846, "grad_norm": 0.579546332359314, "learning_rate": 1.5114377642346747e-05, "loss": 0.19, "step": 14765 }, { "epoch": 0.32922147035167854, "grad_norm": 0.5887970924377441, "learning_rate": 1.511136857514833e-05, "loss": 0.3501, "step": 14770 }, { "epoch": 0.3293329197322986, "grad_norm": 0.9834648370742798, "learning_rate": 1.510835888133558e-05, "loss": 0.4039, "step": 14775 }, { "epoch": 0.32944436911291863, "grad_norm": 0.5637308955192566, "learning_rate": 1.510534856127746e-05, "loss": 0.3722, "step": 14780 }, { "epoch": 0.3295558184935387, "grad_norm": 0.7502883672714233, "learning_rate": 1.5102337615343014e-05, "loss": 0.3558, "step": 14785 }, { "epoch": 0.3296672678741588, "grad_norm": 0.6586925983428955, "learning_rate": 1.5099326043901361e-05, "loss": 0.2808, "step": 14790 }, { "epoch": 0.32977871725477886, "grad_norm": 0.48584112524986267, "learning_rate": 1.5096313847321696e-05, "loss": 0.4449, "step": 14795 }, { "epoch": 0.32989016663539894, "grad_norm": 0.6589898467063904, "learning_rate": 1.5093301025973289e-05, "loss": 0.2944, "step": 14800 }, { "epoch": 0.330001616016019, "grad_norm": 0.6377086639404297, "learning_rate": 1.5090287580225492e-05, "loss": 0.3999, "step": 14805 }, { "epoch": 0.33011306539663904, "grad_norm": 0.5869620442390442, "learning_rate": 1.5087273510447726e-05, "loss": 0.3394, "step": 14810 }, { "epoch": 0.3302245147772591, "grad_norm": 0.5796343684196472, "learning_rate": 1.5084258817009496e-05, "loss": 0.2943, "step": 14815 }, { "epoch": 0.3303359641578792, "grad_norm": 0.5942478775978088, "learning_rate": 1.5081243500280383e-05, "loss": 0.2228, "step": 14820 }, { "epoch": 0.33044741353849927, "grad_norm": 0.589439868927002, "learning_rate": 1.5078227560630035e-05, "loss": 0.3606, "step": 14825 }, { "epoch": 0.33055886291911935, "grad_norm": 0.563065767288208, "learning_rate": 1.5075210998428187e-05, "loss": 0.3575, "step": 14830 }, { "epoch": 0.3306703122997394, "grad_norm": 0.607187032699585, "learning_rate": 1.5072193814044645e-05, "loss": 0.2671, "step": 14835 }, { "epoch": 0.33078176168035944, "grad_norm": 0.6348040103912354, "learning_rate": 1.506917600784929e-05, "loss": 0.3359, "step": 14840 }, { "epoch": 0.3308932110609795, "grad_norm": 0.6133410930633545, "learning_rate": 1.506615758021209e-05, "loss": 0.3673, "step": 14845 }, { "epoch": 0.3310046604415996, "grad_norm": 0.5856065154075623, "learning_rate": 1.5063138531503075e-05, "loss": 0.3409, "step": 14850 }, { "epoch": 0.3311161098222197, "grad_norm": 0.5683413147926331, "learning_rate": 1.5060118862092354e-05, "loss": 0.4316, "step": 14855 }, { "epoch": 0.33122755920283975, "grad_norm": 0.5754731893539429, "learning_rate": 1.505709857235012e-05, "loss": 0.3709, "step": 14860 }, { "epoch": 0.33133900858345977, "grad_norm": 0.4092066287994385, "learning_rate": 1.5054077662646633e-05, "loss": 0.3438, "step": 14865 }, { "epoch": 0.33145045796407985, "grad_norm": 0.6222695112228394, "learning_rate": 1.505105613335224e-05, "loss": 0.363, "step": 14870 }, { "epoch": 0.3315619073446999, "grad_norm": 0.6781238317489624, "learning_rate": 1.5048033984837352e-05, "loss": 0.2977, "step": 14875 }, { "epoch": 0.33167335672532, "grad_norm": 0.4596276879310608, "learning_rate": 1.5045011217472463e-05, "loss": 0.3093, "step": 14880 }, { "epoch": 0.3317848061059401, "grad_norm": 0.3854585289955139, "learning_rate": 1.5041987831628137e-05, "loss": 0.4025, "step": 14885 }, { "epoch": 0.33189625548656015, "grad_norm": 0.5611217021942139, "learning_rate": 1.5038963827675024e-05, "loss": 0.3191, "step": 14890 }, { "epoch": 0.3320077048671802, "grad_norm": 0.48890677094459534, "learning_rate": 1.503593920598384e-05, "loss": 0.3349, "step": 14895 }, { "epoch": 0.33211915424780025, "grad_norm": 0.6745815277099609, "learning_rate": 1.5032913966925382e-05, "loss": 0.2311, "step": 14900 }, { "epoch": 0.33223060362842033, "grad_norm": 0.5865761041641235, "learning_rate": 1.5029888110870516e-05, "loss": 0.3149, "step": 14905 }, { "epoch": 0.3323420530090404, "grad_norm": 0.6866218447685242, "learning_rate": 1.5026861638190196e-05, "loss": 0.3871, "step": 14910 }, { "epoch": 0.3324535023896605, "grad_norm": 0.6699501872062683, "learning_rate": 1.5023834549255441e-05, "loss": 0.4077, "step": 14915 }, { "epoch": 0.33256495177028056, "grad_norm": 0.6308091878890991, "learning_rate": 1.5020806844437345e-05, "loss": 0.3688, "step": 14920 }, { "epoch": 0.3326764011509006, "grad_norm": 0.5703872442245483, "learning_rate": 1.5017778524107088e-05, "loss": 0.3708, "step": 14925 }, { "epoch": 0.33278785053152066, "grad_norm": 0.7351306676864624, "learning_rate": 1.5014749588635914e-05, "loss": 0.358, "step": 14930 }, { "epoch": 0.33289929991214073, "grad_norm": 0.9598005414009094, "learning_rate": 1.5011720038395145e-05, "loss": 0.2703, "step": 14935 }, { "epoch": 0.3330107492927608, "grad_norm": 0.46301475167274475, "learning_rate": 1.5008689873756189e-05, "loss": 0.3201, "step": 14940 }, { "epoch": 0.3331221986733809, "grad_norm": 0.6332523822784424, "learning_rate": 1.5005659095090513e-05, "loss": 0.3842, "step": 14945 }, { "epoch": 0.33323364805400096, "grad_norm": 0.5854825377464294, "learning_rate": 1.500262770276967e-05, "loss": 0.3669, "step": 14950 }, { "epoch": 0.333345097434621, "grad_norm": 0.5210273861885071, "learning_rate": 1.4999595697165286e-05, "loss": 0.2296, "step": 14955 }, { "epoch": 0.33345654681524106, "grad_norm": 0.6245068311691284, "learning_rate": 1.499656307864906e-05, "loss": 0.2669, "step": 14960 }, { "epoch": 0.33356799619586114, "grad_norm": 0.5508987903594971, "learning_rate": 1.4993529847592766e-05, "loss": 0.326, "step": 14965 }, { "epoch": 0.3336794455764812, "grad_norm": 0.37212294340133667, "learning_rate": 1.499049600436826e-05, "loss": 0.3557, "step": 14970 }, { "epoch": 0.3337908949571013, "grad_norm": 0.4018285274505615, "learning_rate": 1.4987461549347462e-05, "loss": 0.2277, "step": 14975 }, { "epoch": 0.33390234433772137, "grad_norm": 0.5052530765533447, "learning_rate": 1.4984426482902377e-05, "loss": 0.2757, "step": 14980 }, { "epoch": 0.3340137937183414, "grad_norm": 0.6615517139434814, "learning_rate": 1.4981390805405079e-05, "loss": 0.3558, "step": 14985 }, { "epoch": 0.33412524309896147, "grad_norm": 0.6946066617965698, "learning_rate": 1.497835451722772e-05, "loss": 0.4595, "step": 14990 }, { "epoch": 0.33423669247958154, "grad_norm": 0.4988650977611542, "learning_rate": 1.4975317618742518e-05, "loss": 0.2057, "step": 14995 }, { "epoch": 0.3343481418602016, "grad_norm": 0.43746042251586914, "learning_rate": 1.4972280110321787e-05, "loss": 0.2913, "step": 15000 }, { "epoch": 0.3344595912408217, "grad_norm": 0.352634459733963, "learning_rate": 1.4969241992337891e-05, "loss": 0.2988, "step": 15005 }, { "epoch": 0.3345710406214418, "grad_norm": 0.8164475560188293, "learning_rate": 1.4966203265163284e-05, "loss": 0.428, "step": 15010 }, { "epoch": 0.3346824900020618, "grad_norm": 0.6539212465286255, "learning_rate": 1.496316392917049e-05, "loss": 0.3029, "step": 15015 }, { "epoch": 0.33479393938268187, "grad_norm": 0.6618625521659851, "learning_rate": 1.4960123984732109e-05, "loss": 0.2792, "step": 15020 }, { "epoch": 0.33490538876330195, "grad_norm": 0.6794106960296631, "learning_rate": 1.4957083432220811e-05, "loss": 0.2843, "step": 15025 }, { "epoch": 0.335016838143922, "grad_norm": 0.46583518385887146, "learning_rate": 1.4954042272009348e-05, "loss": 0.235, "step": 15030 }, { "epoch": 0.3351282875245421, "grad_norm": 0.6059388518333435, "learning_rate": 1.4951000504470543e-05, "loss": 0.3461, "step": 15035 }, { "epoch": 0.3352397369051621, "grad_norm": 0.4841199517250061, "learning_rate": 1.4947958129977292e-05, "loss": 0.4458, "step": 15040 }, { "epoch": 0.3353511862857822, "grad_norm": 0.56889408826828, "learning_rate": 1.4944915148902564e-05, "loss": 0.2941, "step": 15045 }, { "epoch": 0.3354626356664023, "grad_norm": 0.741605281829834, "learning_rate": 1.494187156161941e-05, "loss": 0.3782, "step": 15050 }, { "epoch": 0.33557408504702235, "grad_norm": 0.6679457426071167, "learning_rate": 1.493882736850095e-05, "loss": 0.3299, "step": 15055 }, { "epoch": 0.33568553442764243, "grad_norm": 0.5985062122344971, "learning_rate": 1.4935782569920373e-05, "loss": 0.2703, "step": 15060 }, { "epoch": 0.3357969838082625, "grad_norm": 0.44864389300346375, "learning_rate": 1.4932737166250953e-05, "loss": 0.4695, "step": 15065 }, { "epoch": 0.3359084331888825, "grad_norm": 0.6617417931556702, "learning_rate": 1.492969115786603e-05, "loss": 0.3258, "step": 15070 }, { "epoch": 0.3360198825695026, "grad_norm": 0.41781333088874817, "learning_rate": 1.4926644545139025e-05, "loss": 0.1761, "step": 15075 }, { "epoch": 0.3361313319501227, "grad_norm": 0.5821076035499573, "learning_rate": 1.4923597328443423e-05, "loss": 0.423, "step": 15080 }, { "epoch": 0.33624278133074276, "grad_norm": 0.6323464512825012, "learning_rate": 1.4920549508152795e-05, "loss": 0.4038, "step": 15085 }, { "epoch": 0.33635423071136283, "grad_norm": 0.5762990117073059, "learning_rate": 1.4917501084640777e-05, "loss": 0.3248, "step": 15090 }, { "epoch": 0.3364656800919829, "grad_norm": 0.8302823305130005, "learning_rate": 1.4914452058281086e-05, "loss": 0.3412, "step": 15095 }, { "epoch": 0.33657712947260293, "grad_norm": 0.5417741537094116, "learning_rate": 1.4911402429447503e-05, "loss": 0.3437, "step": 15100 }, { "epoch": 0.336688578853223, "grad_norm": 0.6161333322525024, "learning_rate": 1.4908352198513894e-05, "loss": 0.3852, "step": 15105 }, { "epoch": 0.3368000282338431, "grad_norm": 0.49575358629226685, "learning_rate": 1.4905301365854193e-05, "loss": 0.2876, "step": 15110 }, { "epoch": 0.33691147761446316, "grad_norm": 0.5573312640190125, "learning_rate": 1.4902249931842407e-05, "loss": 0.3939, "step": 15115 }, { "epoch": 0.33702292699508324, "grad_norm": 0.5083134770393372, "learning_rate": 1.4899197896852618e-05, "loss": 0.2197, "step": 15120 }, { "epoch": 0.3371343763757033, "grad_norm": 0.5789259672164917, "learning_rate": 1.4896145261258982e-05, "loss": 0.3829, "step": 15125 }, { "epoch": 0.33724582575632334, "grad_norm": 0.4595588445663452, "learning_rate": 1.4893092025435733e-05, "loss": 0.2697, "step": 15130 }, { "epoch": 0.3373572751369434, "grad_norm": 0.5526938438415527, "learning_rate": 1.4890038189757166e-05, "loss": 0.3557, "step": 15135 }, { "epoch": 0.3374687245175635, "grad_norm": 0.5678578019142151, "learning_rate": 1.4886983754597667e-05, "loss": 0.3549, "step": 15140 }, { "epoch": 0.33758017389818357, "grad_norm": 0.5942708849906921, "learning_rate": 1.4883928720331677e-05, "loss": 0.2335, "step": 15145 }, { "epoch": 0.33769162327880364, "grad_norm": 0.5128694772720337, "learning_rate": 1.4880873087333726e-05, "loss": 0.3975, "step": 15150 }, { "epoch": 0.3378030726594237, "grad_norm": 0.6574785709381104, "learning_rate": 1.4877816855978409e-05, "loss": 0.2745, "step": 15155 }, { "epoch": 0.33791452204004374, "grad_norm": 0.7634928822517395, "learning_rate": 1.4874760026640398e-05, "loss": 0.3363, "step": 15160 }, { "epoch": 0.3380259714206638, "grad_norm": 0.5223698616027832, "learning_rate": 1.4871702599694433e-05, "loss": 0.3408, "step": 15165 }, { "epoch": 0.3381374208012839, "grad_norm": 0.46809354424476624, "learning_rate": 1.4868644575515334e-05, "loss": 0.4533, "step": 15170 }, { "epoch": 0.33824887018190397, "grad_norm": 0.6846073865890503, "learning_rate": 1.486558595447799e-05, "loss": 0.3388, "step": 15175 }, { "epoch": 0.33836031956252405, "grad_norm": 0.6507226824760437, "learning_rate": 1.4862526736957363e-05, "loss": 0.3976, "step": 15180 }, { "epoch": 0.3384717689431441, "grad_norm": 0.7572437524795532, "learning_rate": 1.4859466923328494e-05, "loss": 0.3125, "step": 15185 }, { "epoch": 0.33858321832376415, "grad_norm": 0.49744054675102234, "learning_rate": 1.4856406513966487e-05, "loss": 0.3406, "step": 15190 }, { "epoch": 0.3386946677043842, "grad_norm": 0.5798356533050537, "learning_rate": 1.4853345509246528e-05, "loss": 0.4069, "step": 15195 }, { "epoch": 0.3388061170850043, "grad_norm": 0.5558682680130005, "learning_rate": 1.4850283909543873e-05, "loss": 0.4139, "step": 15200 }, { "epoch": 0.3389175664656244, "grad_norm": 0.6416339874267578, "learning_rate": 1.4847221715233846e-05, "loss": 0.3324, "step": 15205 }, { "epoch": 0.33902901584624445, "grad_norm": 0.6704056262969971, "learning_rate": 1.4844158926691857e-05, "loss": 0.4212, "step": 15210 }, { "epoch": 0.33914046522686453, "grad_norm": 0.5458512306213379, "learning_rate": 1.4841095544293369e-05, "loss": 0.2544, "step": 15215 }, { "epoch": 0.33925191460748455, "grad_norm": 0.22037500143051147, "learning_rate": 1.4838031568413937e-05, "loss": 0.2862, "step": 15220 }, { "epoch": 0.3393633639881046, "grad_norm": 0.5767005681991577, "learning_rate": 1.4834966999429179e-05, "loss": 0.3191, "step": 15225 }, { "epoch": 0.3394748133687247, "grad_norm": 0.5186335444450378, "learning_rate": 1.4831901837714786e-05, "loss": 0.3202, "step": 15230 }, { "epoch": 0.3395862627493448, "grad_norm": 0.5099804997444153, "learning_rate": 1.4828836083646526e-05, "loss": 0.2643, "step": 15235 }, { "epoch": 0.33969771212996486, "grad_norm": 0.7531489133834839, "learning_rate": 1.4825769737600232e-05, "loss": 0.3556, "step": 15240 }, { "epoch": 0.3398091615105849, "grad_norm": 1.1307042837142944, "learning_rate": 1.482270279995182e-05, "loss": 0.1962, "step": 15245 }, { "epoch": 0.33992061089120496, "grad_norm": 0.8670387268066406, "learning_rate": 1.4819635271077269e-05, "loss": 0.3373, "step": 15250 }, { "epoch": 0.34003206027182503, "grad_norm": 0.7208353281021118, "learning_rate": 1.4816567151352637e-05, "loss": 0.3233, "step": 15255 }, { "epoch": 0.3401435096524451, "grad_norm": 0.5600586533546448, "learning_rate": 1.4813498441154048e-05, "loss": 0.4405, "step": 15260 }, { "epoch": 0.3402549590330652, "grad_norm": 0.7927045822143555, "learning_rate": 1.4810429140857707e-05, "loss": 0.427, "step": 15265 }, { "epoch": 0.34036640841368526, "grad_norm": 0.6728307604789734, "learning_rate": 1.4807359250839886e-05, "loss": 0.4482, "step": 15270 }, { "epoch": 0.3404778577943053, "grad_norm": 0.5486059784889221, "learning_rate": 1.4804288771476923e-05, "loss": 0.3259, "step": 15275 }, { "epoch": 0.34058930717492536, "grad_norm": 0.6788325905799866, "learning_rate": 1.4801217703145243e-05, "loss": 0.4149, "step": 15280 }, { "epoch": 0.34070075655554544, "grad_norm": 0.6441235542297363, "learning_rate": 1.4798146046221332e-05, "loss": 0.3206, "step": 15285 }, { "epoch": 0.3408122059361655, "grad_norm": 0.6822008490562439, "learning_rate": 1.4795073801081752e-05, "loss": 0.3903, "step": 15290 }, { "epoch": 0.3409236553167856, "grad_norm": 0.6025763750076294, "learning_rate": 1.4792000968103132e-05, "loss": 0.3727, "step": 15295 }, { "epoch": 0.34103510469740567, "grad_norm": 0.6513890027999878, "learning_rate": 1.4788927547662188e-05, "loss": 0.367, "step": 15300 }, { "epoch": 0.3411465540780257, "grad_norm": 0.5683618783950806, "learning_rate": 1.4785853540135689e-05, "loss": 0.2873, "step": 15305 }, { "epoch": 0.34125800345864576, "grad_norm": 0.6003490090370178, "learning_rate": 1.4782778945900486e-05, "loss": 0.5232, "step": 15310 }, { "epoch": 0.34136945283926584, "grad_norm": 0.47557592391967773, "learning_rate": 1.4779703765333504e-05, "loss": 0.309, "step": 15315 }, { "epoch": 0.3414809022198859, "grad_norm": 0.4998873174190521, "learning_rate": 1.4776627998811733e-05, "loss": 0.3484, "step": 15320 }, { "epoch": 0.341592351600506, "grad_norm": 0.5946129560470581, "learning_rate": 1.4773551646712242e-05, "loss": 0.2308, "step": 15325 }, { "epoch": 0.34170380098112607, "grad_norm": 0.5998285412788391, "learning_rate": 1.477047470941216e-05, "loss": 0.3165, "step": 15330 }, { "epoch": 0.3418152503617461, "grad_norm": 0.5759714841842651, "learning_rate": 1.47673971872887e-05, "loss": 0.3408, "step": 15335 }, { "epoch": 0.34192669974236617, "grad_norm": 0.8474906086921692, "learning_rate": 1.4764319080719149e-05, "loss": 0.2878, "step": 15340 }, { "epoch": 0.34203814912298625, "grad_norm": 0.262577086687088, "learning_rate": 1.476124039008085e-05, "loss": 0.335, "step": 15345 }, { "epoch": 0.3421495985036063, "grad_norm": 0.788374662399292, "learning_rate": 1.4758161115751234e-05, "loss": 0.2989, "step": 15350 }, { "epoch": 0.3422610478842264, "grad_norm": 0.6692611575126648, "learning_rate": 1.475508125810779e-05, "loss": 0.3803, "step": 15355 }, { "epoch": 0.3423724972648465, "grad_norm": 0.7293594479560852, "learning_rate": 1.475200081752809e-05, "loss": 0.4263, "step": 15360 }, { "epoch": 0.3424839466454665, "grad_norm": 0.689411997795105, "learning_rate": 1.4748919794389767e-05, "loss": 0.4555, "step": 15365 }, { "epoch": 0.3425953960260866, "grad_norm": 0.5694615244865417, "learning_rate": 1.4745838189070531e-05, "loss": 0.4428, "step": 15370 }, { "epoch": 0.34270684540670665, "grad_norm": 0.8198974132537842, "learning_rate": 1.4742756001948171e-05, "loss": 0.2321, "step": 15375 }, { "epoch": 0.3428182947873267, "grad_norm": 0.3840519189834595, "learning_rate": 1.4739673233400528e-05, "loss": 0.3065, "step": 15380 }, { "epoch": 0.3429297441679468, "grad_norm": 0.5746693015098572, "learning_rate": 1.4736589883805535e-05, "loss": 0.2873, "step": 15385 }, { "epoch": 0.3430411935485669, "grad_norm": 0.7387751936912537, "learning_rate": 1.473350595354118e-05, "loss": 0.4415, "step": 15390 }, { "epoch": 0.3431526429291869, "grad_norm": 0.6387646198272705, "learning_rate": 1.4730421442985534e-05, "loss": 0.413, "step": 15395 }, { "epoch": 0.343264092309807, "grad_norm": 0.7444597482681274, "learning_rate": 1.4727336352516733e-05, "loss": 0.4287, "step": 15400 }, { "epoch": 0.34337554169042706, "grad_norm": 0.6035224795341492, "learning_rate": 1.4724250682512985e-05, "loss": 0.2612, "step": 15405 }, { "epoch": 0.34348699107104713, "grad_norm": 0.6849896907806396, "learning_rate": 1.4721164433352568e-05, "loss": 0.2332, "step": 15410 }, { "epoch": 0.3435984404516672, "grad_norm": 0.5554786920547485, "learning_rate": 1.4718077605413832e-05, "loss": 0.4821, "step": 15415 }, { "epoch": 0.34370988983228723, "grad_norm": 0.5494076013565063, "learning_rate": 1.47149901990752e-05, "loss": 0.2667, "step": 15420 }, { "epoch": 0.3438213392129073, "grad_norm": 0.6117123961448669, "learning_rate": 1.4711902214715165e-05, "loss": 0.4004, "step": 15425 }, { "epoch": 0.3439327885935274, "grad_norm": 0.6088406443595886, "learning_rate": 1.4708813652712287e-05, "loss": 0.3147, "step": 15430 }, { "epoch": 0.34404423797414746, "grad_norm": 0.605383038520813, "learning_rate": 1.4705724513445204e-05, "loss": 0.2673, "step": 15435 }, { "epoch": 0.34415568735476754, "grad_norm": 0.9843233823776245, "learning_rate": 1.4702634797292618e-05, "loss": 0.459, "step": 15440 }, { "epoch": 0.3442671367353876, "grad_norm": 0.6190301179885864, "learning_rate": 1.4699544504633303e-05, "loss": 0.3502, "step": 15445 }, { "epoch": 0.34437858611600763, "grad_norm": 0.4855058193206787, "learning_rate": 1.4696453635846109e-05, "loss": 0.3083, "step": 15450 }, { "epoch": 0.3444900354966277, "grad_norm": 0.8662997484207153, "learning_rate": 1.4693362191309948e-05, "loss": 0.3554, "step": 15455 }, { "epoch": 0.3446014848772478, "grad_norm": 0.710433840751648, "learning_rate": 1.4690270171403809e-05, "loss": 0.3623, "step": 15460 }, { "epoch": 0.34471293425786786, "grad_norm": 0.4728683531284332, "learning_rate": 1.4687177576506752e-05, "loss": 0.3923, "step": 15465 }, { "epoch": 0.34482438363848794, "grad_norm": 0.3298479914665222, "learning_rate": 1.4684084406997903e-05, "loss": 0.3616, "step": 15470 }, { "epoch": 0.344935833019108, "grad_norm": 0.8117642402648926, "learning_rate": 1.468099066325646e-05, "loss": 0.3516, "step": 15475 }, { "epoch": 0.34504728239972804, "grad_norm": 0.5667406916618347, "learning_rate": 1.4677896345661697e-05, "loss": 0.3093, "step": 15480 }, { "epoch": 0.3451587317803481, "grad_norm": 0.47480669617652893, "learning_rate": 1.4674801454592949e-05, "loss": 0.3733, "step": 15485 }, { "epoch": 0.3452701811609682, "grad_norm": 0.670015811920166, "learning_rate": 1.467170599042962e-05, "loss": 0.3697, "step": 15490 }, { "epoch": 0.34538163054158827, "grad_norm": 0.5766051411628723, "learning_rate": 1.4668609953551205e-05, "loss": 0.3838, "step": 15495 }, { "epoch": 0.34549307992220835, "grad_norm": 1.1956382989883423, "learning_rate": 1.4665513344337243e-05, "loss": 0.3496, "step": 15500 }, { "epoch": 0.3456045293028284, "grad_norm": 0.8127216696739197, "learning_rate": 1.4662416163167354e-05, "loss": 0.2764, "step": 15505 }, { "epoch": 0.34571597868344844, "grad_norm": 0.693279504776001, "learning_rate": 1.4659318410421237e-05, "loss": 0.5523, "step": 15510 }, { "epoch": 0.3458274280640685, "grad_norm": 0.696718692779541, "learning_rate": 1.4656220086478645e-05, "loss": 0.2758, "step": 15515 }, { "epoch": 0.3459388774446886, "grad_norm": 0.41814491152763367, "learning_rate": 1.465312119171941e-05, "loss": 0.2746, "step": 15520 }, { "epoch": 0.3460503268253087, "grad_norm": 0.6163225769996643, "learning_rate": 1.4650021726523433e-05, "loss": 0.3639, "step": 15525 }, { "epoch": 0.34616177620592875, "grad_norm": 0.5217748880386353, "learning_rate": 1.4646921691270688e-05, "loss": 0.3989, "step": 15530 }, { "epoch": 0.3462732255865488, "grad_norm": 0.5386656522750854, "learning_rate": 1.464382108634121e-05, "loss": 0.4572, "step": 15535 }, { "epoch": 0.34638467496716885, "grad_norm": 0.7713764905929565, "learning_rate": 1.4640719912115114e-05, "loss": 0.3086, "step": 15540 }, { "epoch": 0.3464961243477889, "grad_norm": 0.3597823977470398, "learning_rate": 1.4637618168972574e-05, "loss": 0.2486, "step": 15545 }, { "epoch": 0.346607573728409, "grad_norm": 0.7092816233634949, "learning_rate": 1.4634515857293845e-05, "loss": 0.3379, "step": 15550 }, { "epoch": 0.3467190231090291, "grad_norm": 0.7376322746276855, "learning_rate": 1.4631412977459248e-05, "loss": 0.3819, "step": 15555 }, { "epoch": 0.34683047248964916, "grad_norm": 0.623254656791687, "learning_rate": 1.4628309529849164e-05, "loss": 0.3446, "step": 15560 }, { "epoch": 0.34694192187026923, "grad_norm": 0.6260783076286316, "learning_rate": 1.4625205514844061e-05, "loss": 0.385, "step": 15565 }, { "epoch": 0.34705337125088925, "grad_norm": 0.5267546772956848, "learning_rate": 1.462210093282446e-05, "loss": 0.2952, "step": 15570 }, { "epoch": 0.34716482063150933, "grad_norm": 0.6364974975585938, "learning_rate": 1.4618995784170961e-05, "loss": 0.3519, "step": 15575 }, { "epoch": 0.3472762700121294, "grad_norm": 0.6822920441627502, "learning_rate": 1.4615890069264237e-05, "loss": 0.3488, "step": 15580 }, { "epoch": 0.3473877193927495, "grad_norm": 0.5061941742897034, "learning_rate": 1.4612783788485014e-05, "loss": 0.25, "step": 15585 }, { "epoch": 0.34749916877336956, "grad_norm": 0.5946047902107239, "learning_rate": 1.4609676942214108e-05, "loss": 0.4002, "step": 15590 }, { "epoch": 0.34761061815398964, "grad_norm": 0.5874724388122559, "learning_rate": 1.4606569530832385e-05, "loss": 0.1921, "step": 15595 }, { "epoch": 0.34772206753460966, "grad_norm": 0.789135754108429, "learning_rate": 1.46034615547208e-05, "loss": 0.3302, "step": 15600 }, { "epoch": 0.34783351691522973, "grad_norm": 0.846127986907959, "learning_rate": 1.460035301426036e-05, "loss": 0.3297, "step": 15605 }, { "epoch": 0.3479449662958498, "grad_norm": 0.6058344841003418, "learning_rate": 1.4597243909832149e-05, "loss": 0.1896, "step": 15610 }, { "epoch": 0.3480564156764699, "grad_norm": 0.6141452789306641, "learning_rate": 1.459413424181732e-05, "loss": 0.249, "step": 15615 }, { "epoch": 0.34816786505708996, "grad_norm": 0.614362895488739, "learning_rate": 1.4591024010597094e-05, "loss": 0.2335, "step": 15620 }, { "epoch": 0.34827931443771, "grad_norm": 0.5771622061729431, "learning_rate": 1.4587913216552765e-05, "loss": 0.2674, "step": 15625 }, { "epoch": 0.34839076381833006, "grad_norm": 0.7412368059158325, "learning_rate": 1.4584801860065683e-05, "loss": 0.4087, "step": 15630 }, { "epoch": 0.34850221319895014, "grad_norm": 0.6473712921142578, "learning_rate": 1.4581689941517285e-05, "loss": 0.3875, "step": 15635 }, { "epoch": 0.3486136625795702, "grad_norm": 0.575176477432251, "learning_rate": 1.4578577461289068e-05, "loss": 0.3011, "step": 15640 }, { "epoch": 0.3487251119601903, "grad_norm": 0.9265764951705933, "learning_rate": 1.4575464419762592e-05, "loss": 0.4058, "step": 15645 }, { "epoch": 0.34883656134081037, "grad_norm": 0.5604076981544495, "learning_rate": 1.4572350817319497e-05, "loss": 0.3007, "step": 15650 }, { "epoch": 0.3489480107214304, "grad_norm": 0.7239184975624084, "learning_rate": 1.4569236654341486e-05, "loss": 0.4001, "step": 15655 }, { "epoch": 0.34905946010205047, "grad_norm": 0.7642486095428467, "learning_rate": 1.4566121931210326e-05, "loss": 0.2504, "step": 15660 }, { "epoch": 0.34917090948267054, "grad_norm": 0.6194584965705872, "learning_rate": 1.4563006648307865e-05, "loss": 0.2743, "step": 15665 }, { "epoch": 0.3492823588632906, "grad_norm": 0.4331457316875458, "learning_rate": 1.455989080601601e-05, "loss": 0.3977, "step": 15670 }, { "epoch": 0.3493938082439107, "grad_norm": 0.4841763973236084, "learning_rate": 1.4556774404716738e-05, "loss": 0.3041, "step": 15675 }, { "epoch": 0.3495052576245308, "grad_norm": 0.7557531595230103, "learning_rate": 1.45536574447921e-05, "loss": 0.3077, "step": 15680 }, { "epoch": 0.3496167070051508, "grad_norm": 0.5951617360115051, "learning_rate": 1.4550539926624206e-05, "loss": 0.4016, "step": 15685 }, { "epoch": 0.34972815638577087, "grad_norm": 0.5638185739517212, "learning_rate": 1.454742185059524e-05, "loss": 0.3353, "step": 15690 }, { "epoch": 0.34983960576639095, "grad_norm": 0.5489707589149475, "learning_rate": 1.4544303217087459e-05, "loss": 0.4071, "step": 15695 }, { "epoch": 0.349951055147011, "grad_norm": 0.5517125129699707, "learning_rate": 1.4541184026483176e-05, "loss": 0.3495, "step": 15700 }, { "epoch": 0.3500625045276311, "grad_norm": 0.5938624143600464, "learning_rate": 1.4538064279164787e-05, "loss": 0.3706, "step": 15705 }, { "epoch": 0.3501739539082512, "grad_norm": 0.6009719371795654, "learning_rate": 1.4534943975514746e-05, "loss": 0.4807, "step": 15710 }, { "epoch": 0.3502854032888712, "grad_norm": 0.7321330904960632, "learning_rate": 1.453182311591558e-05, "loss": 0.4177, "step": 15715 }, { "epoch": 0.3503968526694913, "grad_norm": 0.826553463935852, "learning_rate": 1.4528701700749876e-05, "loss": 0.33, "step": 15720 }, { "epoch": 0.35050830205011135, "grad_norm": 0.4826429486274719, "learning_rate": 1.4525579730400301e-05, "loss": 0.3708, "step": 15725 }, { "epoch": 0.35061975143073143, "grad_norm": 0.5381823778152466, "learning_rate": 1.4522457205249584e-05, "loss": 0.3267, "step": 15730 }, { "epoch": 0.3507312008113515, "grad_norm": 1.0298216342926025, "learning_rate": 1.4519334125680522e-05, "loss": 0.3464, "step": 15735 }, { "epoch": 0.3508426501919716, "grad_norm": 0.6540161967277527, "learning_rate": 1.451621049207598e-05, "loss": 0.3396, "step": 15740 }, { "epoch": 0.3509540995725916, "grad_norm": 0.5737782716751099, "learning_rate": 1.451308630481889e-05, "loss": 0.3299, "step": 15745 }, { "epoch": 0.3510655489532117, "grad_norm": 0.7006227970123291, "learning_rate": 1.4509961564292257e-05, "loss": 0.382, "step": 15750 }, { "epoch": 0.35117699833383176, "grad_norm": 0.5406222343444824, "learning_rate": 1.4506836270879143e-05, "loss": 0.3064, "step": 15755 }, { "epoch": 0.35128844771445183, "grad_norm": 0.8543760776519775, "learning_rate": 1.4503710424962693e-05, "loss": 0.3355, "step": 15760 }, { "epoch": 0.3513998970950719, "grad_norm": 0.7554575204849243, "learning_rate": 1.450058402692611e-05, "loss": 0.372, "step": 15765 }, { "epoch": 0.351511346475692, "grad_norm": 0.8103945851325989, "learning_rate": 1.449745707715266e-05, "loss": 0.3812, "step": 15770 }, { "epoch": 0.351622795856312, "grad_norm": 0.5388813614845276, "learning_rate": 1.4494329576025691e-05, "loss": 0.3267, "step": 15775 }, { "epoch": 0.3517342452369321, "grad_norm": 0.6109858155250549, "learning_rate": 1.4491201523928608e-05, "loss": 0.4573, "step": 15780 }, { "epoch": 0.35184569461755216, "grad_norm": 0.6300851702690125, "learning_rate": 1.4488072921244883e-05, "loss": 0.2466, "step": 15785 }, { "epoch": 0.35195714399817224, "grad_norm": 0.6552715301513672, "learning_rate": 1.4484943768358059e-05, "loss": 0.3671, "step": 15790 }, { "epoch": 0.3520685933787923, "grad_norm": 0.9018075466156006, "learning_rate": 1.448181406565175e-05, "loss": 0.3814, "step": 15795 }, { "epoch": 0.35218004275941234, "grad_norm": 0.5913105607032776, "learning_rate": 1.4478683813509629e-05, "loss": 0.2968, "step": 15800 }, { "epoch": 0.3522914921400324, "grad_norm": 0.6036539077758789, "learning_rate": 1.4475553012315441e-05, "loss": 0.25, "step": 15805 }, { "epoch": 0.3524029415206525, "grad_norm": 0.5692189931869507, "learning_rate": 1.4472421662453004e-05, "loss": 0.2111, "step": 15810 }, { "epoch": 0.35251439090127257, "grad_norm": 0.676845133304596, "learning_rate": 1.446928976430619e-05, "loss": 0.338, "step": 15815 }, { "epoch": 0.35262584028189264, "grad_norm": 0.8736307621002197, "learning_rate": 1.4466157318258952e-05, "loss": 0.288, "step": 15820 }, { "epoch": 0.3527372896625127, "grad_norm": 0.6261550784111023, "learning_rate": 1.4463024324695295e-05, "loss": 0.3869, "step": 15825 }, { "epoch": 0.35284873904313274, "grad_norm": 0.5824673771858215, "learning_rate": 1.4459890783999308e-05, "loss": 0.3307, "step": 15830 }, { "epoch": 0.3529601884237528, "grad_norm": 0.6068733334541321, "learning_rate": 1.4456756696555137e-05, "loss": 0.3239, "step": 15835 }, { "epoch": 0.3530716378043729, "grad_norm": 0.4593876302242279, "learning_rate": 1.4453622062746995e-05, "loss": 0.3784, "step": 15840 }, { "epoch": 0.35318308718499297, "grad_norm": 0.7499799132347107, "learning_rate": 1.4450486882959162e-05, "loss": 0.3009, "step": 15845 }, { "epoch": 0.35329453656561305, "grad_norm": 0.7290626168251038, "learning_rate": 1.4447351157575993e-05, "loss": 0.3016, "step": 15850 }, { "epoch": 0.3534059859462331, "grad_norm": 0.625694990158081, "learning_rate": 1.44442148869819e-05, "loss": 0.3197, "step": 15855 }, { "epoch": 0.35351743532685315, "grad_norm": 0.6635383367538452, "learning_rate": 1.4441078071561363e-05, "loss": 0.2795, "step": 15860 }, { "epoch": 0.3536288847074732, "grad_norm": 0.6525633335113525, "learning_rate": 1.4437940711698936e-05, "loss": 0.3183, "step": 15865 }, { "epoch": 0.3537403340880933, "grad_norm": 0.5589553713798523, "learning_rate": 1.4434802807779238e-05, "loss": 0.4735, "step": 15870 }, { "epoch": 0.3538517834687134, "grad_norm": 0.6205970048904419, "learning_rate": 1.4431664360186942e-05, "loss": 0.2494, "step": 15875 }, { "epoch": 0.35396323284933345, "grad_norm": 0.5392980575561523, "learning_rate": 1.4428525369306803e-05, "loss": 0.2474, "step": 15880 }, { "epoch": 0.35407468222995353, "grad_norm": 0.6593893766403198, "learning_rate": 1.4425385835523638e-05, "loss": 0.373, "step": 15885 }, { "epoch": 0.35418613161057355, "grad_norm": 1.365324854850769, "learning_rate": 1.4422245759222326e-05, "loss": 0.2801, "step": 15890 }, { "epoch": 0.35429758099119363, "grad_norm": 0.8453717231750488, "learning_rate": 1.4419105140787819e-05, "loss": 0.2617, "step": 15895 }, { "epoch": 0.3544090303718137, "grad_norm": 0.4204340875148773, "learning_rate": 1.4415963980605136e-05, "loss": 0.2475, "step": 15900 }, { "epoch": 0.3545204797524338, "grad_norm": 0.48751235008239746, "learning_rate": 1.441282227905935e-05, "loss": 0.3031, "step": 15905 }, { "epoch": 0.35463192913305386, "grad_norm": 0.5883186459541321, "learning_rate": 1.4409680036535618e-05, "loss": 0.2444, "step": 15910 }, { "epoch": 0.35474337851367393, "grad_norm": 0.37586283683776855, "learning_rate": 1.4406537253419149e-05, "loss": 0.3133, "step": 15915 }, { "epoch": 0.35485482789429396, "grad_norm": 0.6607975959777832, "learning_rate": 1.4403393930095227e-05, "loss": 0.4004, "step": 15920 }, { "epoch": 0.35496627727491403, "grad_norm": 0.814619243144989, "learning_rate": 1.4400250066949198e-05, "loss": 0.4083, "step": 15925 }, { "epoch": 0.3550777266555341, "grad_norm": 0.5426995754241943, "learning_rate": 1.4397105664366474e-05, "loss": 0.2856, "step": 15930 }, { "epoch": 0.3551891760361542, "grad_norm": 0.6581703424453735, "learning_rate": 1.4393960722732538e-05, "loss": 0.3279, "step": 15935 }, { "epoch": 0.35530062541677426, "grad_norm": 0.58955979347229, "learning_rate": 1.4390815242432932e-05, "loss": 0.3484, "step": 15940 }, { "epoch": 0.35541207479739434, "grad_norm": 0.49699389934539795, "learning_rate": 1.438766922385327e-05, "loss": 0.3763, "step": 15945 }, { "epoch": 0.35552352417801436, "grad_norm": 0.4076257646083832, "learning_rate": 1.4384522667379229e-05, "loss": 0.2196, "step": 15950 }, { "epoch": 0.35563497355863444, "grad_norm": 0.6451223492622375, "learning_rate": 1.4381375573396552e-05, "loss": 0.387, "step": 15955 }, { "epoch": 0.3557464229392545, "grad_norm": 0.727311372756958, "learning_rate": 1.437822794229105e-05, "loss": 0.2124, "step": 15960 }, { "epoch": 0.3558578723198746, "grad_norm": 0.48559942841529846, "learning_rate": 1.4375079774448595e-05, "loss": 0.4022, "step": 15965 }, { "epoch": 0.35596932170049467, "grad_norm": 0.5659855008125305, "learning_rate": 1.437193107025513e-05, "loss": 0.2158, "step": 15970 }, { "epoch": 0.35608077108111474, "grad_norm": 0.8604714274406433, "learning_rate": 1.4368781830096662e-05, "loss": 0.3405, "step": 15975 }, { "epoch": 0.35619222046173477, "grad_norm": 0.5525174736976624, "learning_rate": 1.4365632054359267e-05, "loss": 0.3046, "step": 15980 }, { "epoch": 0.35630366984235484, "grad_norm": 0.6220390200614929, "learning_rate": 1.4362481743429073e-05, "loss": 0.2765, "step": 15985 }, { "epoch": 0.3564151192229749, "grad_norm": 0.6702668070793152, "learning_rate": 1.4359330897692296e-05, "loss": 0.3382, "step": 15990 }, { "epoch": 0.356526568603595, "grad_norm": 0.5980839133262634, "learning_rate": 1.4356179517535199e-05, "loss": 0.3194, "step": 15995 }, { "epoch": 0.35663801798421507, "grad_norm": 0.3999008536338806, "learning_rate": 1.4353027603344115e-05, "loss": 0.3779, "step": 16000 }, { "epoch": 0.3567494673648351, "grad_norm": 0.531395673751831, "learning_rate": 1.4349875155505448e-05, "loss": 0.4263, "step": 16005 }, { "epoch": 0.35686091674545517, "grad_norm": 0.8333300352096558, "learning_rate": 1.4346722174405666e-05, "loss": 0.2635, "step": 16010 }, { "epoch": 0.35697236612607525, "grad_norm": 0.33808475732803345, "learning_rate": 1.4343568660431293e-05, "loss": 0.319, "step": 16015 }, { "epoch": 0.3570838155066953, "grad_norm": 0.7113955020904541, "learning_rate": 1.4340414613968929e-05, "loss": 0.4791, "step": 16020 }, { "epoch": 0.3571952648873154, "grad_norm": 0.592242419719696, "learning_rate": 1.433726003540524e-05, "loss": 0.2668, "step": 16025 }, { "epoch": 0.3573067142679355, "grad_norm": 0.6215150952339172, "learning_rate": 1.4334104925126945e-05, "loss": 0.3935, "step": 16030 }, { "epoch": 0.3574181636485555, "grad_norm": 0.4412376582622528, "learning_rate": 1.4330949283520843e-05, "loss": 0.2753, "step": 16035 }, { "epoch": 0.3575296130291756, "grad_norm": 0.46210625767707825, "learning_rate": 1.432779311097379e-05, "loss": 0.4574, "step": 16040 }, { "epoch": 0.35764106240979565, "grad_norm": 0.5504388213157654, "learning_rate": 1.4324636407872705e-05, "loss": 0.3069, "step": 16045 }, { "epoch": 0.35775251179041573, "grad_norm": 0.3885088562965393, "learning_rate": 1.432147917460458e-05, "loss": 0.3624, "step": 16050 }, { "epoch": 0.3578639611710358, "grad_norm": 0.44459834694862366, "learning_rate": 1.4318321411556462e-05, "loss": 0.2526, "step": 16055 }, { "epoch": 0.3579754105516559, "grad_norm": 0.36986035108566284, "learning_rate": 1.4315163119115472e-05, "loss": 0.369, "step": 16060 }, { "epoch": 0.3580868599322759, "grad_norm": 0.6682476997375488, "learning_rate": 1.4312004297668791e-05, "loss": 0.3287, "step": 16065 }, { "epoch": 0.358198309312896, "grad_norm": 0.5184884667396545, "learning_rate": 1.4308844947603666e-05, "loss": 0.2998, "step": 16070 }, { "epoch": 0.35830975869351606, "grad_norm": 0.7321197986602783, "learning_rate": 1.4305685069307412e-05, "loss": 0.4231, "step": 16075 }, { "epoch": 0.35842120807413613, "grad_norm": 0.8538438677787781, "learning_rate": 1.4302524663167403e-05, "loss": 0.339, "step": 16080 }, { "epoch": 0.3585326574547562, "grad_norm": 0.5270879864692688, "learning_rate": 1.4299363729571081e-05, "loss": 0.3379, "step": 16085 }, { "epoch": 0.3586441068353763, "grad_norm": 0.8474531769752502, "learning_rate": 1.4296202268905948e-05, "loss": 0.3172, "step": 16090 }, { "epoch": 0.3587555562159963, "grad_norm": 0.6586465239524841, "learning_rate": 1.429304028155958e-05, "loss": 0.2714, "step": 16095 }, { "epoch": 0.3588670055966164, "grad_norm": 0.5502996444702148, "learning_rate": 1.4289877767919613e-05, "loss": 0.2873, "step": 16100 }, { "epoch": 0.35897845497723646, "grad_norm": 0.8836638927459717, "learning_rate": 1.4286714728373743e-05, "loss": 0.3991, "step": 16105 }, { "epoch": 0.35908990435785654, "grad_norm": 0.4775274991989136, "learning_rate": 1.4283551163309735e-05, "loss": 0.309, "step": 16110 }, { "epoch": 0.3592013537384766, "grad_norm": 0.579677164554596, "learning_rate": 1.428038707311542e-05, "loss": 0.3486, "step": 16115 }, { "epoch": 0.3593128031190967, "grad_norm": 0.5680740475654602, "learning_rate": 1.4277222458178688e-05, "loss": 0.2838, "step": 16120 }, { "epoch": 0.3594242524997167, "grad_norm": 0.5583674907684326, "learning_rate": 1.4274057318887495e-05, "loss": 0.403, "step": 16125 }, { "epoch": 0.3595357018803368, "grad_norm": 0.6949165463447571, "learning_rate": 1.427089165562987e-05, "loss": 0.2891, "step": 16130 }, { "epoch": 0.35964715126095687, "grad_norm": 0.6850028038024902, "learning_rate": 1.4267725468793895e-05, "loss": 0.3734, "step": 16135 }, { "epoch": 0.35975860064157694, "grad_norm": 0.6475020051002502, "learning_rate": 1.4264558758767715e-05, "loss": 0.3214, "step": 16140 }, { "epoch": 0.359870050022197, "grad_norm": 0.5016544461250305, "learning_rate": 1.426139152593955e-05, "loss": 0.308, "step": 16145 }, { "epoch": 0.3599814994028171, "grad_norm": 0.5636915564537048, "learning_rate": 1.4258223770697679e-05, "loss": 0.3707, "step": 16150 }, { "epoch": 0.3600929487834371, "grad_norm": 0.7824536561965942, "learning_rate": 1.4255055493430441e-05, "loss": 0.2622, "step": 16155 }, { "epoch": 0.3602043981640572, "grad_norm": 0.7711067795753479, "learning_rate": 1.425188669452624e-05, "loss": 0.2371, "step": 16160 }, { "epoch": 0.36031584754467727, "grad_norm": 0.6058388352394104, "learning_rate": 1.4248717374373554e-05, "loss": 0.2209, "step": 16165 }, { "epoch": 0.36042729692529735, "grad_norm": 0.5630457401275635, "learning_rate": 1.4245547533360912e-05, "loss": 0.3454, "step": 16170 }, { "epoch": 0.3605387463059174, "grad_norm": 0.43767404556274414, "learning_rate": 1.4242377171876913e-05, "loss": 0.3509, "step": 16175 }, { "epoch": 0.36065019568653744, "grad_norm": 0.6815534234046936, "learning_rate": 1.423920629031022e-05, "loss": 0.4217, "step": 16180 }, { "epoch": 0.3607616450671575, "grad_norm": 0.5987027883529663, "learning_rate": 1.4236034889049554e-05, "loss": 0.2651, "step": 16185 }, { "epoch": 0.3608730944477776, "grad_norm": 0.651369035243988, "learning_rate": 1.4232862968483711e-05, "loss": 0.3665, "step": 16190 }, { "epoch": 0.3609845438283977, "grad_norm": 0.5900524854660034, "learning_rate": 1.4229690529001538e-05, "loss": 0.3257, "step": 16195 }, { "epoch": 0.36109599320901775, "grad_norm": 0.5773907899856567, "learning_rate": 1.4226517570991955e-05, "loss": 0.2899, "step": 16200 }, { "epoch": 0.36120744258963783, "grad_norm": 0.5796383023262024, "learning_rate": 1.4223344094843945e-05, "loss": 0.3728, "step": 16205 }, { "epoch": 0.36131889197025785, "grad_norm": 0.6338873505592346, "learning_rate": 1.4220170100946547e-05, "loss": 0.3826, "step": 16210 }, { "epoch": 0.3614303413508779, "grad_norm": 0.7792302370071411, "learning_rate": 1.4216995589688865e-05, "loss": 0.3319, "step": 16215 }, { "epoch": 0.361541790731498, "grad_norm": 0.39774107933044434, "learning_rate": 1.421382056146008e-05, "loss": 0.3335, "step": 16220 }, { "epoch": 0.3616532401121181, "grad_norm": 0.6285306215286255, "learning_rate": 1.4210645016649416e-05, "loss": 0.3625, "step": 16225 }, { "epoch": 0.36176468949273816, "grad_norm": 0.85149747133255, "learning_rate": 1.4207468955646171e-05, "loss": 0.3553, "step": 16230 }, { "epoch": 0.36187613887335823, "grad_norm": 0.6813942790031433, "learning_rate": 1.4204292378839714e-05, "loss": 0.3652, "step": 16235 }, { "epoch": 0.36198758825397825, "grad_norm": 0.7618511915206909, "learning_rate": 1.4201115286619464e-05, "loss": 0.2329, "step": 16240 }, { "epoch": 0.36209903763459833, "grad_norm": 0.6078253388404846, "learning_rate": 1.4197937679374904e-05, "loss": 0.3757, "step": 16245 }, { "epoch": 0.3622104870152184, "grad_norm": 0.5501529574394226, "learning_rate": 1.419475955749559e-05, "loss": 0.2823, "step": 16250 }, { "epoch": 0.3623219363958385, "grad_norm": 0.5846516489982605, "learning_rate": 1.4191580921371134e-05, "loss": 0.3182, "step": 16255 }, { "epoch": 0.36243338577645856, "grad_norm": 0.7952144145965576, "learning_rate": 1.4188401771391207e-05, "loss": 0.4785, "step": 16260 }, { "epoch": 0.36254483515707864, "grad_norm": 0.5714855194091797, "learning_rate": 1.4185222107945553e-05, "loss": 0.4005, "step": 16265 }, { "epoch": 0.36265628453769866, "grad_norm": 0.6147738695144653, "learning_rate": 1.4182041931423976e-05, "loss": 0.546, "step": 16270 }, { "epoch": 0.36276773391831874, "grad_norm": 0.6777298450469971, "learning_rate": 1.4178861242216338e-05, "loss": 0.4361, "step": 16275 }, { "epoch": 0.3628791832989388, "grad_norm": 0.6393991708755493, "learning_rate": 1.4175680040712567e-05, "loss": 0.4514, "step": 16280 }, { "epoch": 0.3629906326795589, "grad_norm": 0.6732271313667297, "learning_rate": 1.4172498327302653e-05, "loss": 0.3916, "step": 16285 }, { "epoch": 0.36310208206017897, "grad_norm": 0.6620044708251953, "learning_rate": 1.4169316102376653e-05, "loss": 0.2325, "step": 16290 }, { "epoch": 0.36321353144079904, "grad_norm": 0.35400888323783875, "learning_rate": 1.4166133366324682e-05, "loss": 0.2397, "step": 16295 }, { "epoch": 0.36332498082141906, "grad_norm": 0.5362957715988159, "learning_rate": 1.4162950119536913e-05, "loss": 0.3582, "step": 16300 }, { "epoch": 0.36343643020203914, "grad_norm": 0.5642344951629639, "learning_rate": 1.4159766362403597e-05, "loss": 0.3506, "step": 16305 }, { "epoch": 0.3635478795826592, "grad_norm": 0.59524005651474, "learning_rate": 1.4156582095315032e-05, "loss": 0.4598, "step": 16310 }, { "epoch": 0.3636593289632793, "grad_norm": 0.6816734671592712, "learning_rate": 1.4153397318661588e-05, "loss": 0.4182, "step": 16315 }, { "epoch": 0.36377077834389937, "grad_norm": 0.36529749631881714, "learning_rate": 1.4150212032833687e-05, "loss": 0.2804, "step": 16320 }, { "epoch": 0.36388222772451945, "grad_norm": 0.6353341937065125, "learning_rate": 1.4147026238221831e-05, "loss": 0.3277, "step": 16325 }, { "epoch": 0.36399367710513947, "grad_norm": 0.4412643015384674, "learning_rate": 1.4143839935216565e-05, "loss": 0.1712, "step": 16330 }, { "epoch": 0.36410512648575954, "grad_norm": 0.36294540762901306, "learning_rate": 1.414065312420851e-05, "loss": 0.3183, "step": 16335 }, { "epoch": 0.3642165758663796, "grad_norm": 0.5242140293121338, "learning_rate": 1.4137465805588342e-05, "loss": 0.4187, "step": 16340 }, { "epoch": 0.3643280252469997, "grad_norm": 0.5565961599349976, "learning_rate": 1.4134277979746803e-05, "loss": 0.3671, "step": 16345 }, { "epoch": 0.3644394746276198, "grad_norm": 0.5486041307449341, "learning_rate": 1.4131089647074697e-05, "loss": 0.448, "step": 16350 }, { "epoch": 0.36455092400823985, "grad_norm": 0.6204886436462402, "learning_rate": 1.4127900807962882e-05, "loss": 0.1796, "step": 16355 }, { "epoch": 0.3646623733888599, "grad_norm": 0.5635268092155457, "learning_rate": 1.4124711462802296e-05, "loss": 0.2629, "step": 16360 }, { "epoch": 0.36477382276947995, "grad_norm": 0.4247026741504669, "learning_rate": 1.4121521611983921e-05, "loss": 0.3895, "step": 16365 }, { "epoch": 0.3648852721501, "grad_norm": 0.8973407745361328, "learning_rate": 1.4118331255898807e-05, "loss": 0.4073, "step": 16370 }, { "epoch": 0.3649967215307201, "grad_norm": 0.6370887756347656, "learning_rate": 1.4115140394938074e-05, "loss": 0.3349, "step": 16375 }, { "epoch": 0.3651081709113402, "grad_norm": 0.6443496346473694, "learning_rate": 1.4111949029492891e-05, "loss": 0.5014, "step": 16380 }, { "epoch": 0.3652196202919602, "grad_norm": 0.5404614210128784, "learning_rate": 1.4108757159954496e-05, "loss": 0.312, "step": 16385 }, { "epoch": 0.3653310696725803, "grad_norm": 0.6759606003761292, "learning_rate": 1.4105564786714185e-05, "loss": 0.2964, "step": 16390 }, { "epoch": 0.36544251905320035, "grad_norm": 0.763971745967865, "learning_rate": 1.4102371910163326e-05, "loss": 0.3385, "step": 16395 }, { "epoch": 0.36555396843382043, "grad_norm": 0.6019205451011658, "learning_rate": 1.4099178530693333e-05, "loss": 0.1829, "step": 16400 }, { "epoch": 0.3656654178144405, "grad_norm": 0.599514365196228, "learning_rate": 1.4095984648695695e-05, "loss": 0.2784, "step": 16405 }, { "epoch": 0.3657768671950606, "grad_norm": 0.49529388546943665, "learning_rate": 1.4092790264561955e-05, "loss": 0.3325, "step": 16410 }, { "epoch": 0.3658883165756806, "grad_norm": 0.8132462501525879, "learning_rate": 1.4089595378683719e-05, "loss": 0.5318, "step": 16415 }, { "epoch": 0.3659997659563007, "grad_norm": 0.8020228147506714, "learning_rate": 1.4086399991452658e-05, "loss": 0.5165, "step": 16420 }, { "epoch": 0.36611121533692076, "grad_norm": 0.5684449672698975, "learning_rate": 1.4083204103260498e-05, "loss": 0.2872, "step": 16425 }, { "epoch": 0.36622266471754084, "grad_norm": 0.9270157814025879, "learning_rate": 1.4080007714499033e-05, "loss": 0.257, "step": 16430 }, { "epoch": 0.3663341140981609, "grad_norm": 0.6331964731216431, "learning_rate": 1.4076810825560116e-05, "loss": 0.4288, "step": 16435 }, { "epoch": 0.366445563478781, "grad_norm": 0.6294323205947876, "learning_rate": 1.4073613436835661e-05, "loss": 0.3198, "step": 16440 }, { "epoch": 0.366557012859401, "grad_norm": 0.627004086971283, "learning_rate": 1.407041554871764e-05, "loss": 0.3088, "step": 16445 }, { "epoch": 0.3666684622400211, "grad_norm": 0.652871310710907, "learning_rate": 1.406721716159809e-05, "loss": 0.3399, "step": 16450 }, { "epoch": 0.36677991162064116, "grad_norm": 0.9713679552078247, "learning_rate": 1.4064018275869116e-05, "loss": 0.3622, "step": 16455 }, { "epoch": 0.36689136100126124, "grad_norm": 0.5056372880935669, "learning_rate": 1.4060818891922865e-05, "loss": 0.4097, "step": 16460 }, { "epoch": 0.3670028103818813, "grad_norm": 0.46640658378601074, "learning_rate": 1.4057619010151564e-05, "loss": 0.3042, "step": 16465 }, { "epoch": 0.3671142597625014, "grad_norm": 0.7078487873077393, "learning_rate": 1.4054418630947495e-05, "loss": 0.309, "step": 16470 }, { "epoch": 0.3672257091431214, "grad_norm": 0.7127984762191772, "learning_rate": 1.4051217754702995e-05, "loss": 0.3671, "step": 16475 }, { "epoch": 0.3673371585237415, "grad_norm": 0.41462767124176025, "learning_rate": 1.4048016381810467e-05, "loss": 0.3339, "step": 16480 }, { "epoch": 0.36744860790436157, "grad_norm": 0.5943542718887329, "learning_rate": 1.404481451266238e-05, "loss": 0.4385, "step": 16485 }, { "epoch": 0.36756005728498164, "grad_norm": 0.769970715045929, "learning_rate": 1.4041612147651252e-05, "loss": 0.3697, "step": 16490 }, { "epoch": 0.3676715066656017, "grad_norm": 0.5637467503547668, "learning_rate": 1.4038409287169672e-05, "loss": 0.3749, "step": 16495 }, { "epoch": 0.3677829560462218, "grad_norm": 0.6345033645629883, "learning_rate": 1.4035205931610288e-05, "loss": 0.3776, "step": 16500 }, { "epoch": 0.3678944054268418, "grad_norm": 0.7196531295776367, "learning_rate": 1.4032002081365801e-05, "loss": 0.3372, "step": 16505 }, { "epoch": 0.3680058548074619, "grad_norm": 1.9599355459213257, "learning_rate": 1.402879773682898e-05, "loss": 0.4251, "step": 16510 }, { "epoch": 0.368117304188082, "grad_norm": 0.6104928851127625, "learning_rate": 1.4025592898392658e-05, "loss": 0.2613, "step": 16515 }, { "epoch": 0.36822875356870205, "grad_norm": 0.6330108046531677, "learning_rate": 1.4022387566449715e-05, "loss": 0.284, "step": 16520 }, { "epoch": 0.3683402029493221, "grad_norm": 0.49859699606895447, "learning_rate": 1.4019181741393106e-05, "loss": 0.1871, "step": 16525 }, { "epoch": 0.3684516523299422, "grad_norm": 0.604479968547821, "learning_rate": 1.401597542361584e-05, "loss": 0.2761, "step": 16530 }, { "epoch": 0.3685631017105622, "grad_norm": 0.46823564171791077, "learning_rate": 1.4012768613510985e-05, "loss": 0.2745, "step": 16535 }, { "epoch": 0.3686745510911823, "grad_norm": 0.5509923100471497, "learning_rate": 1.400956131147167e-05, "loss": 0.2638, "step": 16540 }, { "epoch": 0.3687860004718024, "grad_norm": 0.6106875538825989, "learning_rate": 1.400635351789109e-05, "loss": 0.3422, "step": 16545 }, { "epoch": 0.36889744985242245, "grad_norm": 0.7943158745765686, "learning_rate": 1.4003145233162495e-05, "loss": 0.3621, "step": 16550 }, { "epoch": 0.36900889923304253, "grad_norm": 0.46912720799446106, "learning_rate": 1.3999936457679189e-05, "loss": 0.4506, "step": 16555 }, { "epoch": 0.36912034861366255, "grad_norm": 0.421644926071167, "learning_rate": 1.399672719183455e-05, "loss": 0.3087, "step": 16560 }, { "epoch": 0.36923179799428263, "grad_norm": 0.4038814604282379, "learning_rate": 1.3993517436022006e-05, "loss": 0.3402, "step": 16565 }, { "epoch": 0.3693432473749027, "grad_norm": 0.3322283923625946, "learning_rate": 1.399030719063505e-05, "loss": 0.2699, "step": 16570 }, { "epoch": 0.3694546967555228, "grad_norm": 0.4782276749610901, "learning_rate": 1.3987096456067236e-05, "loss": 0.2706, "step": 16575 }, { "epoch": 0.36956614613614286, "grad_norm": 0.7985139489173889, "learning_rate": 1.398388523271217e-05, "loss": 0.2949, "step": 16580 }, { "epoch": 0.36967759551676294, "grad_norm": 0.6678852438926697, "learning_rate": 1.3980673520963524e-05, "loss": 0.2423, "step": 16585 }, { "epoch": 0.36978904489738296, "grad_norm": 0.4688909351825714, "learning_rate": 1.3977461321215034e-05, "loss": 0.337, "step": 16590 }, { "epoch": 0.36990049427800303, "grad_norm": 0.6942318081855774, "learning_rate": 1.3974248633860486e-05, "loss": 0.4337, "step": 16595 }, { "epoch": 0.3700119436586231, "grad_norm": 0.5391799807548523, "learning_rate": 1.3971035459293729e-05, "loss": 0.2623, "step": 16600 }, { "epoch": 0.3701233930392432, "grad_norm": 0.7162173390388489, "learning_rate": 1.3967821797908678e-05, "loss": 0.391, "step": 16605 }, { "epoch": 0.37023484241986326, "grad_norm": 0.656777560710907, "learning_rate": 1.3964607650099302e-05, "loss": 0.3275, "step": 16610 }, { "epoch": 0.37034629180048334, "grad_norm": 0.7742978930473328, "learning_rate": 1.396139301625963e-05, "loss": 0.3674, "step": 16615 }, { "epoch": 0.37045774118110336, "grad_norm": 0.8095855712890625, "learning_rate": 1.3958177896783751e-05, "loss": 0.2723, "step": 16620 }, { "epoch": 0.37056919056172344, "grad_norm": 0.3930739164352417, "learning_rate": 1.3954962292065814e-05, "loss": 0.2473, "step": 16625 }, { "epoch": 0.3706806399423435, "grad_norm": 0.6105589270591736, "learning_rate": 1.395174620250003e-05, "loss": 0.3796, "step": 16630 }, { "epoch": 0.3707920893229636, "grad_norm": 0.6673600077629089, "learning_rate": 1.394852962848066e-05, "loss": 0.3638, "step": 16635 }, { "epoch": 0.37090353870358367, "grad_norm": 0.58013916015625, "learning_rate": 1.394531257040204e-05, "loss": 0.3143, "step": 16640 }, { "epoch": 0.37101498808420375, "grad_norm": 0.776722252368927, "learning_rate": 1.3942095028658553e-05, "loss": 0.2441, "step": 16645 }, { "epoch": 0.37112643746482377, "grad_norm": 0.57895427942276, "learning_rate": 1.3938877003644639e-05, "loss": 0.2568, "step": 16650 }, { "epoch": 0.37123788684544384, "grad_norm": 0.609281599521637, "learning_rate": 1.3935658495754809e-05, "loss": 0.2602, "step": 16655 }, { "epoch": 0.3713493362260639, "grad_norm": 0.44535091519355774, "learning_rate": 1.3932439505383628e-05, "loss": 0.4303, "step": 16660 }, { "epoch": 0.371460785606684, "grad_norm": 0.9552980065345764, "learning_rate": 1.3929220032925716e-05, "loss": 0.2524, "step": 16665 }, { "epoch": 0.3715722349873041, "grad_norm": 0.8034424781799316, "learning_rate": 1.3926000078775757e-05, "loss": 0.3437, "step": 16670 }, { "epoch": 0.37168368436792415, "grad_norm": 1.8572295904159546, "learning_rate": 1.3922779643328492e-05, "loss": 0.3697, "step": 16675 }, { "epoch": 0.37179513374854417, "grad_norm": 0.6209281086921692, "learning_rate": 1.3919558726978724e-05, "loss": 0.2803, "step": 16680 }, { "epoch": 0.37190658312916425, "grad_norm": 0.628084659576416, "learning_rate": 1.3916337330121308e-05, "loss": 0.3693, "step": 16685 }, { "epoch": 0.3720180325097843, "grad_norm": 0.64268559217453, "learning_rate": 1.3913115453151166e-05, "loss": 0.274, "step": 16690 }, { "epoch": 0.3721294818904044, "grad_norm": 0.389698326587677, "learning_rate": 1.3909893096463274e-05, "loss": 0.2581, "step": 16695 }, { "epoch": 0.3722409312710245, "grad_norm": 0.6302729845046997, "learning_rate": 1.3906670260452668e-05, "loss": 0.2739, "step": 16700 }, { "epoch": 0.37235238065164455, "grad_norm": 0.6693209409713745, "learning_rate": 1.3903446945514445e-05, "loss": 0.2398, "step": 16705 }, { "epoch": 0.3724638300322646, "grad_norm": 0.8056045770645142, "learning_rate": 1.390022315204375e-05, "loss": 0.4227, "step": 16710 }, { "epoch": 0.37257527941288465, "grad_norm": 0.8639228940010071, "learning_rate": 1.3896998880435807e-05, "loss": 0.3866, "step": 16715 }, { "epoch": 0.37268672879350473, "grad_norm": 0.6434595584869385, "learning_rate": 1.3893774131085885e-05, "loss": 0.3445, "step": 16720 }, { "epoch": 0.3727981781741248, "grad_norm": 0.5078811645507812, "learning_rate": 1.38905489043893e-05, "loss": 0.2029, "step": 16725 }, { "epoch": 0.3729096275547449, "grad_norm": 0.6022213101387024, "learning_rate": 1.3887323200741457e-05, "loss": 0.3117, "step": 16730 }, { "epoch": 0.37302107693536496, "grad_norm": 0.6306881308555603, "learning_rate": 1.3884097020537794e-05, "loss": 0.3548, "step": 16735 }, { "epoch": 0.373132526315985, "grad_norm": 0.5963431000709534, "learning_rate": 1.3880870364173815e-05, "loss": 0.341, "step": 16740 }, { "epoch": 0.37324397569660506, "grad_norm": 0.7082194685935974, "learning_rate": 1.3877643232045086e-05, "loss": 0.2662, "step": 16745 }, { "epoch": 0.37335542507722513, "grad_norm": 0.4736379086971283, "learning_rate": 1.3874415624547228e-05, "loss": 0.2598, "step": 16750 }, { "epoch": 0.3734668744578452, "grad_norm": 0.6545222401618958, "learning_rate": 1.387118754207592e-05, "loss": 0.239, "step": 16755 }, { "epoch": 0.3735783238384653, "grad_norm": 0.5867891907691956, "learning_rate": 1.38679589850269e-05, "loss": 0.2697, "step": 16760 }, { "epoch": 0.3736897732190853, "grad_norm": 0.6038467288017273, "learning_rate": 1.3864729953795965e-05, "loss": 0.3925, "step": 16765 }, { "epoch": 0.3738012225997054, "grad_norm": 0.6953158378601074, "learning_rate": 1.3861500448778968e-05, "loss": 0.4082, "step": 16770 }, { "epoch": 0.37391267198032546, "grad_norm": 0.5023922324180603, "learning_rate": 1.3858270470371826e-05, "loss": 0.3093, "step": 16775 }, { "epoch": 0.37402412136094554, "grad_norm": 0.6265695095062256, "learning_rate": 1.38550400189705e-05, "loss": 0.3559, "step": 16780 }, { "epoch": 0.3741355707415656, "grad_norm": 0.532604455947876, "learning_rate": 1.3851809094971028e-05, "loss": 0.2924, "step": 16785 }, { "epoch": 0.3742470201221857, "grad_norm": 0.5418358445167542, "learning_rate": 1.3848577698769491e-05, "loss": 0.246, "step": 16790 }, { "epoch": 0.3743584695028057, "grad_norm": 0.6811532378196716, "learning_rate": 1.3845345830762033e-05, "loss": 0.3921, "step": 16795 }, { "epoch": 0.3744699188834258, "grad_norm": 0.5974913239479065, "learning_rate": 1.384211349134486e-05, "loss": 0.305, "step": 16800 }, { "epoch": 0.37458136826404587, "grad_norm": 0.6165561676025391, "learning_rate": 1.3838880680914229e-05, "loss": 0.2876, "step": 16805 }, { "epoch": 0.37469281764466594, "grad_norm": 0.4414345920085907, "learning_rate": 1.3835647399866459e-05, "loss": 0.3331, "step": 16810 }, { "epoch": 0.374804267025286, "grad_norm": 0.5125650763511658, "learning_rate": 1.3832413648597923e-05, "loss": 0.3586, "step": 16815 }, { "epoch": 0.3749157164059061, "grad_norm": 0.5107496976852417, "learning_rate": 1.3829179427505052e-05, "loss": 0.2738, "step": 16820 }, { "epoch": 0.3750271657865261, "grad_norm": 0.48095574975013733, "learning_rate": 1.3825944736984348e-05, "loss": 0.3208, "step": 16825 }, { "epoch": 0.3751386151671462, "grad_norm": 0.6544551253318787, "learning_rate": 1.3822709577432345e-05, "loss": 0.2271, "step": 16830 }, { "epoch": 0.37525006454776627, "grad_norm": 0.6666854023933411, "learning_rate": 1.3819473949245655e-05, "loss": 0.4917, "step": 16835 }, { "epoch": 0.37536151392838635, "grad_norm": 0.556831955909729, "learning_rate": 1.3816237852820945e-05, "loss": 0.266, "step": 16840 }, { "epoch": 0.3754729633090064, "grad_norm": 0.6979274749755859, "learning_rate": 1.3813001288554925e-05, "loss": 0.3474, "step": 16845 }, { "epoch": 0.3755844126896265, "grad_norm": 0.7797757387161255, "learning_rate": 1.380976425684438e-05, "loss": 0.3212, "step": 16850 }, { "epoch": 0.3756958620702465, "grad_norm": 0.4977809488773346, "learning_rate": 1.3806526758086148e-05, "loss": 0.3759, "step": 16855 }, { "epoch": 0.3758073114508666, "grad_norm": 0.634545087814331, "learning_rate": 1.3803288792677116e-05, "loss": 0.3098, "step": 16860 }, { "epoch": 0.3759187608314867, "grad_norm": 0.8404287695884705, "learning_rate": 1.380005036101423e-05, "loss": 0.2756, "step": 16865 }, { "epoch": 0.37603021021210675, "grad_norm": 0.596894383430481, "learning_rate": 1.3796811463494508e-05, "loss": 0.327, "step": 16870 }, { "epoch": 0.37614165959272683, "grad_norm": 0.5277706384658813, "learning_rate": 1.3793572100515004e-05, "loss": 0.3358, "step": 16875 }, { "epoch": 0.3762531089733469, "grad_norm": 0.5064794421195984, "learning_rate": 1.3790332272472844e-05, "loss": 0.3757, "step": 16880 }, { "epoch": 0.3763645583539669, "grad_norm": 0.576138973236084, "learning_rate": 1.3787091979765203e-05, "loss": 0.3069, "step": 16885 }, { "epoch": 0.376476007734587, "grad_norm": 0.6503730416297913, "learning_rate": 1.378385122278932e-05, "loss": 0.4241, "step": 16890 }, { "epoch": 0.3765874571152071, "grad_norm": 0.5472214818000793, "learning_rate": 1.3780610001942481e-05, "loss": 0.2996, "step": 16895 }, { "epoch": 0.37669890649582716, "grad_norm": 0.34399139881134033, "learning_rate": 1.3777368317622038e-05, "loss": 0.3402, "step": 16900 }, { "epoch": 0.37681035587644723, "grad_norm": 0.4152921140193939, "learning_rate": 1.3774126170225398e-05, "loss": 0.2246, "step": 16905 }, { "epoch": 0.3769218052570673, "grad_norm": 0.6326379179954529, "learning_rate": 1.3770883560150017e-05, "loss": 0.4047, "step": 16910 }, { "epoch": 0.37703325463768733, "grad_norm": 0.5722355246543884, "learning_rate": 1.3767640487793423e-05, "loss": 0.4291, "step": 16915 }, { "epoch": 0.3771447040183074, "grad_norm": 0.5854982137680054, "learning_rate": 1.3764396953553182e-05, "loss": 0.4298, "step": 16920 }, { "epoch": 0.3772561533989275, "grad_norm": 0.47443121671676636, "learning_rate": 1.3761152957826935e-05, "loss": 0.4424, "step": 16925 }, { "epoch": 0.37736760277954756, "grad_norm": 0.4873077869415283, "learning_rate": 1.3757908501012368e-05, "loss": 0.3093, "step": 16930 }, { "epoch": 0.37747905216016764, "grad_norm": 0.614548921585083, "learning_rate": 1.3754663583507222e-05, "loss": 0.3204, "step": 16935 }, { "epoch": 0.37759050154078766, "grad_norm": 0.7937530875205994, "learning_rate": 1.3751418205709301e-05, "loss": 0.3612, "step": 16940 }, { "epoch": 0.37770195092140774, "grad_norm": 0.5327603816986084, "learning_rate": 1.3748172368016471e-05, "loss": 0.385, "step": 16945 }, { "epoch": 0.3778134003020278, "grad_norm": 0.634777307510376, "learning_rate": 1.3744926070826636e-05, "loss": 0.3743, "step": 16950 }, { "epoch": 0.3779248496826479, "grad_norm": 0.5578102469444275, "learning_rate": 1.3741679314537772e-05, "loss": 0.3718, "step": 16955 }, { "epoch": 0.37803629906326797, "grad_norm": 0.41441190242767334, "learning_rate": 1.3738432099547903e-05, "loss": 0.2758, "step": 16960 }, { "epoch": 0.37814774844388804, "grad_norm": 0.7478371858596802, "learning_rate": 1.3735184426255117e-05, "loss": 0.2887, "step": 16965 }, { "epoch": 0.37825919782450806, "grad_norm": 0.7181201577186584, "learning_rate": 1.3731936295057552e-05, "loss": 0.3466, "step": 16970 }, { "epoch": 0.37837064720512814, "grad_norm": 0.49926698207855225, "learning_rate": 1.3728687706353401e-05, "loss": 0.3399, "step": 16975 }, { "epoch": 0.3784820965857482, "grad_norm": 0.6402499079704285, "learning_rate": 1.3725438660540922e-05, "loss": 0.2987, "step": 16980 }, { "epoch": 0.3785935459663683, "grad_norm": 0.4854130744934082, "learning_rate": 1.3722189158018415e-05, "loss": 0.3407, "step": 16985 }, { "epoch": 0.37870499534698837, "grad_norm": 0.5425702333450317, "learning_rate": 1.371893919918425e-05, "loss": 0.4215, "step": 16990 }, { "epoch": 0.37881644472760845, "grad_norm": 0.5966536402702332, "learning_rate": 1.3715688784436847e-05, "loss": 0.2236, "step": 16995 }, { "epoch": 0.37892789410822847, "grad_norm": 0.7263554930686951, "learning_rate": 1.3712437914174676e-05, "loss": 0.4078, "step": 17000 }, { "epoch": 0.37903934348884855, "grad_norm": 0.6094164252281189, "learning_rate": 1.3709186588796275e-05, "loss": 0.3422, "step": 17005 }, { "epoch": 0.3791507928694686, "grad_norm": 0.3622407019138336, "learning_rate": 1.3705934808700227e-05, "loss": 0.287, "step": 17010 }, { "epoch": 0.3792622422500887, "grad_norm": 0.546227753162384, "learning_rate": 1.370268257428518e-05, "loss": 0.2921, "step": 17015 }, { "epoch": 0.3793736916307088, "grad_norm": 0.4682996869087219, "learning_rate": 1.3699429885949826e-05, "loss": 0.2827, "step": 17020 }, { "epoch": 0.37948514101132885, "grad_norm": 0.642169177532196, "learning_rate": 1.3696176744092924e-05, "loss": 0.262, "step": 17025 }, { "epoch": 0.3795965903919489, "grad_norm": 0.6648514270782471, "learning_rate": 1.3692923149113286e-05, "loss": 0.2944, "step": 17030 }, { "epoch": 0.37970803977256895, "grad_norm": 1.3251914978027344, "learning_rate": 1.3689669101409773e-05, "loss": 0.4648, "step": 17035 }, { "epoch": 0.379819489153189, "grad_norm": 0.42268428206443787, "learning_rate": 1.3686414601381309e-05, "loss": 0.2584, "step": 17040 }, { "epoch": 0.3799309385338091, "grad_norm": 0.6230071783065796, "learning_rate": 1.3683159649426867e-05, "loss": 0.3298, "step": 17045 }, { "epoch": 0.3800423879144292, "grad_norm": 0.5669429302215576, "learning_rate": 1.3679904245945481e-05, "loss": 0.3647, "step": 17050 }, { "epoch": 0.38015383729504926, "grad_norm": 0.6155208349227905, "learning_rate": 1.3676648391336245e-05, "loss": 0.4041, "step": 17055 }, { "epoch": 0.3802652866756693, "grad_norm": 0.7474114894866943, "learning_rate": 1.367339208599829e-05, "loss": 0.2239, "step": 17060 }, { "epoch": 0.38037673605628936, "grad_norm": 0.8320900797843933, "learning_rate": 1.3670135330330819e-05, "loss": 0.2889, "step": 17065 }, { "epoch": 0.38048818543690943, "grad_norm": 0.5396373271942139, "learning_rate": 1.366687812473309e-05, "loss": 0.2983, "step": 17070 }, { "epoch": 0.3805996348175295, "grad_norm": 0.7233791351318359, "learning_rate": 1.3663620469604406e-05, "loss": 0.4327, "step": 17075 }, { "epoch": 0.3807110841981496, "grad_norm": 0.37970295548439026, "learning_rate": 1.3660362365344126e-05, "loss": 0.3472, "step": 17080 }, { "epoch": 0.38082253357876966, "grad_norm": 0.797645092010498, "learning_rate": 1.365710381235168e-05, "loss": 0.2166, "step": 17085 }, { "epoch": 0.3809339829593897, "grad_norm": 0.7819150686264038, "learning_rate": 1.3653844811026534e-05, "loss": 0.2911, "step": 17090 }, { "epoch": 0.38104543234000976, "grad_norm": 0.7489306330680847, "learning_rate": 1.3650585361768212e-05, "loss": 0.3647, "step": 17095 }, { "epoch": 0.38115688172062984, "grad_norm": 0.4779724180698395, "learning_rate": 1.364732546497631e-05, "loss": 0.2732, "step": 17100 }, { "epoch": 0.3812683311012499, "grad_norm": 0.5058176517486572, "learning_rate": 1.3644065121050456e-05, "loss": 0.2747, "step": 17105 }, { "epoch": 0.38137978048187, "grad_norm": 0.4260009229183197, "learning_rate": 1.3640804330390345e-05, "loss": 0.3045, "step": 17110 }, { "epoch": 0.38149122986249007, "grad_norm": 0.6472933292388916, "learning_rate": 1.3637543093395727e-05, "loss": 0.396, "step": 17115 }, { "epoch": 0.3816026792431101, "grad_norm": 0.5300203561782837, "learning_rate": 1.3634281410466404e-05, "loss": 0.2754, "step": 17120 }, { "epoch": 0.38171412862373016, "grad_norm": 0.618672251701355, "learning_rate": 1.3631019282002231e-05, "loss": 0.3196, "step": 17125 }, { "epoch": 0.38182557800435024, "grad_norm": 0.5094271302223206, "learning_rate": 1.3627756708403122e-05, "loss": 0.3136, "step": 17130 }, { "epoch": 0.3819370273849703, "grad_norm": 0.5453209280967712, "learning_rate": 1.3624493690069042e-05, "loss": 0.3624, "step": 17135 }, { "epoch": 0.3820484767655904, "grad_norm": 0.610127866268158, "learning_rate": 1.3621230227400013e-05, "loss": 0.3844, "step": 17140 }, { "epoch": 0.3821599261462104, "grad_norm": 0.6166819930076599, "learning_rate": 1.361796632079611e-05, "loss": 0.3333, "step": 17145 }, { "epoch": 0.3822713755268305, "grad_norm": 0.7649003267288208, "learning_rate": 1.3614701970657463e-05, "loss": 0.2924, "step": 17150 }, { "epoch": 0.38238282490745057, "grad_norm": 0.4538317024707794, "learning_rate": 1.3611437177384252e-05, "loss": 0.2623, "step": 17155 }, { "epoch": 0.38249427428807065, "grad_norm": 0.5183296799659729, "learning_rate": 1.3608171941376722e-05, "loss": 0.5353, "step": 17160 }, { "epoch": 0.3826057236686907, "grad_norm": 0.9330891966819763, "learning_rate": 1.3604906263035162e-05, "loss": 0.3002, "step": 17165 }, { "epoch": 0.3827171730493108, "grad_norm": 0.5934903621673584, "learning_rate": 1.3601640142759922e-05, "loss": 0.35, "step": 17170 }, { "epoch": 0.3828286224299308, "grad_norm": 0.4814862012863159, "learning_rate": 1.3598373580951399e-05, "loss": 0.3452, "step": 17175 }, { "epoch": 0.3829400718105509, "grad_norm": 0.5025895237922668, "learning_rate": 1.3595106578010051e-05, "loss": 0.2542, "step": 17180 }, { "epoch": 0.383051521191171, "grad_norm": 0.758365273475647, "learning_rate": 1.3591839134336384e-05, "loss": 0.3329, "step": 17185 }, { "epoch": 0.38316297057179105, "grad_norm": 1.2039666175842285, "learning_rate": 1.3588571250330965e-05, "loss": 0.3772, "step": 17190 }, { "epoch": 0.3832744199524111, "grad_norm": 0.5428191423416138, "learning_rate": 1.3585302926394412e-05, "loss": 0.3924, "step": 17195 }, { "epoch": 0.3833858693330312, "grad_norm": 0.38816478848457336, "learning_rate": 1.3582034162927393e-05, "loss": 0.2923, "step": 17200 }, { "epoch": 0.3834973187136512, "grad_norm": 0.47817766666412354, "learning_rate": 1.3578764960330632e-05, "loss": 0.3491, "step": 17205 }, { "epoch": 0.3836087680942713, "grad_norm": 0.5975366830825806, "learning_rate": 1.3575495319004917e-05, "loss": 0.3218, "step": 17210 }, { "epoch": 0.3837202174748914, "grad_norm": 0.5096039175987244, "learning_rate": 1.357222523935107e-05, "loss": 0.2884, "step": 17215 }, { "epoch": 0.38383166685551146, "grad_norm": 0.46383658051490784, "learning_rate": 1.3568954721769982e-05, "loss": 0.3117, "step": 17220 }, { "epoch": 0.38394311623613153, "grad_norm": 0.47448253631591797, "learning_rate": 1.3565683766662597e-05, "loss": 0.2814, "step": 17225 }, { "epoch": 0.3840545656167516, "grad_norm": 0.6228605508804321, "learning_rate": 1.3562412374429903e-05, "loss": 0.3834, "step": 17230 }, { "epoch": 0.38416601499737163, "grad_norm": 0.5391026139259338, "learning_rate": 1.3559140545472948e-05, "loss": 0.3386, "step": 17235 }, { "epoch": 0.3842774643779917, "grad_norm": 0.44915348291397095, "learning_rate": 1.3555868280192835e-05, "loss": 0.2299, "step": 17240 }, { "epoch": 0.3843889137586118, "grad_norm": 0.6540490388870239, "learning_rate": 1.3552595578990719e-05, "loss": 0.3504, "step": 17245 }, { "epoch": 0.38450036313923186, "grad_norm": 0.404447078704834, "learning_rate": 1.3549322442267805e-05, "loss": 0.3026, "step": 17250 }, { "epoch": 0.38461181251985194, "grad_norm": 0.9255741834640503, "learning_rate": 1.3546048870425356e-05, "loss": 0.351, "step": 17255 }, { "epoch": 0.384723261900472, "grad_norm": 0.4792323708534241, "learning_rate": 1.3542774863864692e-05, "loss": 0.303, "step": 17260 }, { "epoch": 0.38483471128109203, "grad_norm": 0.5489178895950317, "learning_rate": 1.353950042298717e-05, "loss": 0.3484, "step": 17265 }, { "epoch": 0.3849461606617121, "grad_norm": 0.5653095841407776, "learning_rate": 1.3536225548194219e-05, "loss": 0.2875, "step": 17270 }, { "epoch": 0.3850576100423322, "grad_norm": 0.5252937078475952, "learning_rate": 1.353295023988731e-05, "loss": 0.2503, "step": 17275 }, { "epoch": 0.38516905942295226, "grad_norm": 0.6666718125343323, "learning_rate": 1.3529674498467974e-05, "loss": 0.2796, "step": 17280 }, { "epoch": 0.38528050880357234, "grad_norm": 0.64290452003479, "learning_rate": 1.3526398324337788e-05, "loss": 0.4103, "step": 17285 }, { "epoch": 0.3853919581841924, "grad_norm": 0.9748024344444275, "learning_rate": 1.3523121717898387e-05, "loss": 0.3458, "step": 17290 }, { "epoch": 0.38550340756481244, "grad_norm": 0.8246389627456665, "learning_rate": 1.3519844679551456e-05, "loss": 0.3598, "step": 17295 }, { "epoch": 0.3856148569454325, "grad_norm": 0.6088479161262512, "learning_rate": 1.3516567209698739e-05, "loss": 0.3518, "step": 17300 }, { "epoch": 0.3857263063260526, "grad_norm": 0.6825111508369446, "learning_rate": 1.3513289308742028e-05, "loss": 0.377, "step": 17305 }, { "epoch": 0.38583775570667267, "grad_norm": 0.4026068449020386, "learning_rate": 1.3510010977083159e-05, "loss": 0.2406, "step": 17310 }, { "epoch": 0.38594920508729275, "grad_norm": 0.5761940479278564, "learning_rate": 1.3506732215124044e-05, "loss": 0.3949, "step": 17315 }, { "epoch": 0.38606065446791277, "grad_norm": 0.5035435557365417, "learning_rate": 1.3503453023266626e-05, "loss": 0.2624, "step": 17320 }, { "epoch": 0.38617210384853284, "grad_norm": 0.6900637149810791, "learning_rate": 1.3500173401912908e-05, "loss": 0.336, "step": 17325 }, { "epoch": 0.3862835532291529, "grad_norm": 0.574922502040863, "learning_rate": 1.3496893351464948e-05, "loss": 0.3015, "step": 17330 }, { "epoch": 0.386395002609773, "grad_norm": 0.4953921437263489, "learning_rate": 1.3493612872324857e-05, "loss": 0.3169, "step": 17335 }, { "epoch": 0.3865064519903931, "grad_norm": 0.6877520084381104, "learning_rate": 1.3490331964894792e-05, "loss": 0.3994, "step": 17340 }, { "epoch": 0.38661790137101315, "grad_norm": 0.6951386332511902, "learning_rate": 1.348705062957697e-05, "loss": 0.3787, "step": 17345 }, { "epoch": 0.38672935075163317, "grad_norm": 0.46507614850997925, "learning_rate": 1.3483768866773659e-05, "loss": 0.2933, "step": 17350 }, { "epoch": 0.38684080013225325, "grad_norm": 0.5548629760742188, "learning_rate": 1.3480486676887172e-05, "loss": 0.3405, "step": 17355 }, { "epoch": 0.3869522495128733, "grad_norm": 0.581392228603363, "learning_rate": 1.3477204060319884e-05, "loss": 0.2563, "step": 17360 }, { "epoch": 0.3870636988934934, "grad_norm": 0.5281866192817688, "learning_rate": 1.3473921017474221e-05, "loss": 0.378, "step": 17365 }, { "epoch": 0.3871751482741135, "grad_norm": 0.5699111223220825, "learning_rate": 1.3470637548752657e-05, "loss": 0.3965, "step": 17370 }, { "epoch": 0.38728659765473356, "grad_norm": 0.45156022906303406, "learning_rate": 1.3467353654557713e-05, "loss": 0.225, "step": 17375 }, { "epoch": 0.3873980470353536, "grad_norm": 0.6937002539634705, "learning_rate": 1.3464069335291979e-05, "loss": 0.4284, "step": 17380 }, { "epoch": 0.38750949641597365, "grad_norm": 0.5318115949630737, "learning_rate": 1.3460784591358083e-05, "loss": 0.3061, "step": 17385 }, { "epoch": 0.38762094579659373, "grad_norm": 0.46002933382987976, "learning_rate": 1.3457499423158706e-05, "loss": 0.2477, "step": 17390 }, { "epoch": 0.3877323951772138, "grad_norm": 0.565171480178833, "learning_rate": 1.345421383109659e-05, "loss": 0.2647, "step": 17395 }, { "epoch": 0.3878438445578339, "grad_norm": 0.5259685516357422, "learning_rate": 1.3450927815574522e-05, "loss": 0.2421, "step": 17400 }, { "epoch": 0.38795529393845396, "grad_norm": 0.5806754231452942, "learning_rate": 1.3447641376995337e-05, "loss": 0.3067, "step": 17405 }, { "epoch": 0.388066743319074, "grad_norm": 0.48702603578567505, "learning_rate": 1.3444354515761935e-05, "loss": 0.4088, "step": 17410 }, { "epoch": 0.38817819269969406, "grad_norm": 0.5694561004638672, "learning_rate": 1.3441067232277255e-05, "loss": 0.2687, "step": 17415 }, { "epoch": 0.38828964208031413, "grad_norm": 0.5904139876365662, "learning_rate": 1.343777952694429e-05, "loss": 0.3926, "step": 17420 }, { "epoch": 0.3884010914609342, "grad_norm": 0.5209612846374512, "learning_rate": 1.3434491400166095e-05, "loss": 0.3796, "step": 17425 }, { "epoch": 0.3885125408415543, "grad_norm": 1.1780948638916016, "learning_rate": 1.343120285234576e-05, "loss": 0.3414, "step": 17430 }, { "epoch": 0.38862399022217436, "grad_norm": 0.5466594099998474, "learning_rate": 1.342791388388644e-05, "loss": 0.3755, "step": 17435 }, { "epoch": 0.3887354396027944, "grad_norm": 0.3503721058368683, "learning_rate": 1.3424624495191344e-05, "loss": 0.25, "step": 17440 }, { "epoch": 0.38884688898341446, "grad_norm": 0.571108877658844, "learning_rate": 1.3421334686663714e-05, "loss": 0.3724, "step": 17445 }, { "epoch": 0.38895833836403454, "grad_norm": 0.6269806623458862, "learning_rate": 1.3418044458706859e-05, "loss": 0.2822, "step": 17450 }, { "epoch": 0.3890697877446546, "grad_norm": 0.42661252617836, "learning_rate": 1.3414753811724139e-05, "loss": 0.2892, "step": 17455 }, { "epoch": 0.3891812371252747, "grad_norm": 0.6101808547973633, "learning_rate": 1.341146274611896e-05, "loss": 0.2023, "step": 17460 }, { "epoch": 0.38929268650589477, "grad_norm": 0.5674259066581726, "learning_rate": 1.3408171262294778e-05, "loss": 0.4122, "step": 17465 }, { "epoch": 0.3894041358865148, "grad_norm": 0.5061613917350769, "learning_rate": 1.3404879360655108e-05, "loss": 0.3471, "step": 17470 }, { "epoch": 0.38951558526713487, "grad_norm": 0.5070977210998535, "learning_rate": 1.3401587041603512e-05, "loss": 0.3126, "step": 17475 }, { "epoch": 0.38962703464775494, "grad_norm": 0.6381949186325073, "learning_rate": 1.3398294305543597e-05, "loss": 0.3114, "step": 17480 }, { "epoch": 0.389738484028375, "grad_norm": 0.9275464415550232, "learning_rate": 1.3395001152879033e-05, "loss": 0.3702, "step": 17485 }, { "epoch": 0.3898499334089951, "grad_norm": 0.7014368176460266, "learning_rate": 1.3391707584013533e-05, "loss": 0.3544, "step": 17490 }, { "epoch": 0.3899613827896152, "grad_norm": 0.38658320903778076, "learning_rate": 1.3388413599350862e-05, "loss": 0.3118, "step": 17495 }, { "epoch": 0.3900728321702352, "grad_norm": 0.6096714735031128, "learning_rate": 1.338511919929484e-05, "loss": 0.4444, "step": 17500 }, { "epoch": 0.39018428155085527, "grad_norm": 0.45221275091171265, "learning_rate": 1.338182438424933e-05, "loss": 0.3064, "step": 17505 }, { "epoch": 0.39029573093147535, "grad_norm": 0.6235794425010681, "learning_rate": 1.3378529154618258e-05, "loss": 0.3525, "step": 17510 }, { "epoch": 0.3904071803120954, "grad_norm": 0.49644261598587036, "learning_rate": 1.337523351080559e-05, "loss": 0.3963, "step": 17515 }, { "epoch": 0.3905186296927155, "grad_norm": 0.39976486563682556, "learning_rate": 1.3371937453215344e-05, "loss": 0.372, "step": 17520 }, { "epoch": 0.3906300790733355, "grad_norm": 0.582975447177887, "learning_rate": 1.3368640982251595e-05, "loss": 0.4404, "step": 17525 }, { "epoch": 0.3907415284539556, "grad_norm": 0.7980674505233765, "learning_rate": 1.3365344098318463e-05, "loss": 0.4305, "step": 17530 }, { "epoch": 0.3908529778345757, "grad_norm": 0.5849584937095642, "learning_rate": 1.3362046801820122e-05, "loss": 0.2463, "step": 17535 }, { "epoch": 0.39096442721519575, "grad_norm": 0.6233468651771545, "learning_rate": 1.3358749093160792e-05, "loss": 0.3788, "step": 17540 }, { "epoch": 0.39107587659581583, "grad_norm": 0.7556803226470947, "learning_rate": 1.335545097274475e-05, "loss": 0.3482, "step": 17545 }, { "epoch": 0.3911873259764359, "grad_norm": 0.521452009677887, "learning_rate": 1.335215244097632e-05, "loss": 0.2991, "step": 17550 }, { "epoch": 0.39129877535705593, "grad_norm": 0.6280778050422668, "learning_rate": 1.3348853498259872e-05, "loss": 0.425, "step": 17555 }, { "epoch": 0.391410224737676, "grad_norm": 0.5022149682044983, "learning_rate": 1.3345554144999837e-05, "loss": 0.2878, "step": 17560 }, { "epoch": 0.3915216741182961, "grad_norm": 0.6211374998092651, "learning_rate": 1.3342254381600688e-05, "loss": 0.2642, "step": 17565 }, { "epoch": 0.39163312349891616, "grad_norm": 0.634395182132721, "learning_rate": 1.3338954208466948e-05, "loss": 0.247, "step": 17570 }, { "epoch": 0.39174457287953623, "grad_norm": 0.8194277882575989, "learning_rate": 1.3335653626003196e-05, "loss": 0.2874, "step": 17575 }, { "epoch": 0.3918560222601563, "grad_norm": 0.37737780809402466, "learning_rate": 1.3332352634614057e-05, "loss": 0.2985, "step": 17580 }, { "epoch": 0.39196747164077633, "grad_norm": 0.5371142029762268, "learning_rate": 1.3329051234704209e-05, "loss": 0.3103, "step": 17585 }, { "epoch": 0.3920789210213964, "grad_norm": 0.5832655429840088, "learning_rate": 1.3325749426678369e-05, "loss": 0.2908, "step": 17590 }, { "epoch": 0.3921903704020165, "grad_norm": 0.5224264860153198, "learning_rate": 1.3322447210941328e-05, "loss": 0.2937, "step": 17595 }, { "epoch": 0.39230181978263656, "grad_norm": 0.7488956451416016, "learning_rate": 1.33191445878979e-05, "loss": 0.3981, "step": 17600 }, { "epoch": 0.39241326916325664, "grad_norm": 0.5862334966659546, "learning_rate": 1.3315841557952967e-05, "loss": 0.3055, "step": 17605 }, { "epoch": 0.3925247185438767, "grad_norm": 0.7196325063705444, "learning_rate": 1.331253812151145e-05, "loss": 0.3212, "step": 17610 }, { "epoch": 0.39263616792449674, "grad_norm": 0.3566960096359253, "learning_rate": 1.3309234278978332e-05, "loss": 0.2657, "step": 17615 }, { "epoch": 0.3927476173051168, "grad_norm": 0.5377562642097473, "learning_rate": 1.3305930030758632e-05, "loss": 0.2786, "step": 17620 }, { "epoch": 0.3928590666857369, "grad_norm": 0.9269275069236755, "learning_rate": 1.330262537725743e-05, "loss": 0.4352, "step": 17625 }, { "epoch": 0.39297051606635697, "grad_norm": 0.6867557168006897, "learning_rate": 1.3299320318879849e-05, "loss": 0.4177, "step": 17630 }, { "epoch": 0.39308196544697704, "grad_norm": 1.2014501094818115, "learning_rate": 1.3296014856031062e-05, "loss": 0.4319, "step": 17635 }, { "epoch": 0.3931934148275971, "grad_norm": 0.56941819190979, "learning_rate": 1.32927089891163e-05, "loss": 0.2071, "step": 17640 }, { "epoch": 0.39330486420821714, "grad_norm": 0.6028541922569275, "learning_rate": 1.3289402718540826e-05, "loss": 0.414, "step": 17645 }, { "epoch": 0.3934163135888372, "grad_norm": 0.5946452021598816, "learning_rate": 1.328609604470997e-05, "loss": 0.3379, "step": 17650 }, { "epoch": 0.3935277629694573, "grad_norm": 0.5714167356491089, "learning_rate": 1.3282788968029108e-05, "loss": 0.3109, "step": 17655 }, { "epoch": 0.39363921235007737, "grad_norm": 0.8042417764663696, "learning_rate": 1.3279481488903651e-05, "loss": 0.3508, "step": 17660 }, { "epoch": 0.39375066173069745, "grad_norm": 0.46548306941986084, "learning_rate": 1.3276173607739082e-05, "loss": 0.2213, "step": 17665 }, { "epoch": 0.3938621111113175, "grad_norm": 0.7371535897254944, "learning_rate": 1.3272865324940916e-05, "loss": 0.3666, "step": 17670 }, { "epoch": 0.39397356049193755, "grad_norm": 0.7542550563812256, "learning_rate": 1.3269556640914726e-05, "loss": 0.3432, "step": 17675 }, { "epoch": 0.3940850098725576, "grad_norm": 0.6199653148651123, "learning_rate": 1.3266247556066122e-05, "loss": 0.3421, "step": 17680 }, { "epoch": 0.3941964592531777, "grad_norm": 0.5280221104621887, "learning_rate": 1.3262938070800786e-05, "loss": 0.2772, "step": 17685 }, { "epoch": 0.3943079086337978, "grad_norm": 0.6193488240242004, "learning_rate": 1.3259628185524426e-05, "loss": 0.3734, "step": 17690 }, { "epoch": 0.39441935801441785, "grad_norm": 0.6555532217025757, "learning_rate": 1.325631790064281e-05, "loss": 0.3828, "step": 17695 }, { "epoch": 0.3945308073950379, "grad_norm": 0.44673195481300354, "learning_rate": 1.3253007216561755e-05, "loss": 0.2106, "step": 17700 }, { "epoch": 0.39464225677565795, "grad_norm": 0.8975861668586731, "learning_rate": 1.3249696133687127e-05, "loss": 0.3603, "step": 17705 }, { "epoch": 0.39475370615627803, "grad_norm": 0.5637463331222534, "learning_rate": 1.3246384652424832e-05, "loss": 0.3267, "step": 17710 }, { "epoch": 0.3948651555368981, "grad_norm": 0.5970215797424316, "learning_rate": 1.3243072773180841e-05, "loss": 0.3672, "step": 17715 }, { "epoch": 0.3949766049175182, "grad_norm": 0.47597572207450867, "learning_rate": 1.323976049636116e-05, "loss": 0.246, "step": 17720 }, { "epoch": 0.39508805429813826, "grad_norm": 0.5651100873947144, "learning_rate": 1.323644782237185e-05, "loss": 0.4041, "step": 17725 }, { "epoch": 0.3951995036787583, "grad_norm": 0.4764869809150696, "learning_rate": 1.3233134751619018e-05, "loss": 0.3636, "step": 17730 }, { "epoch": 0.39531095305937836, "grad_norm": 0.44785913825035095, "learning_rate": 1.3229821284508817e-05, "loss": 0.2818, "step": 17735 }, { "epoch": 0.39542240243999843, "grad_norm": 0.5839234590530396, "learning_rate": 1.322650742144746e-05, "loss": 0.2325, "step": 17740 }, { "epoch": 0.3955338518206185, "grad_norm": 0.7260363698005676, "learning_rate": 1.32231931628412e-05, "loss": 0.2708, "step": 17745 }, { "epoch": 0.3956453012012386, "grad_norm": 0.4467025399208069, "learning_rate": 1.3219878509096336e-05, "loss": 0.304, "step": 17750 }, { "epoch": 0.39575675058185866, "grad_norm": 0.783078134059906, "learning_rate": 1.321656346061922e-05, "loss": 0.3295, "step": 17755 }, { "epoch": 0.3958681999624787, "grad_norm": 1.3599778413772583, "learning_rate": 1.3213248017816251e-05, "loss": 0.3469, "step": 17760 }, { "epoch": 0.39597964934309876, "grad_norm": 0.6114479899406433, "learning_rate": 1.320993218109388e-05, "loss": 0.3666, "step": 17765 }, { "epoch": 0.39609109872371884, "grad_norm": 0.5643268823623657, "learning_rate": 1.32066159508586e-05, "loss": 0.317, "step": 17770 }, { "epoch": 0.3962025481043389, "grad_norm": 0.5749050974845886, "learning_rate": 1.3203299327516951e-05, "loss": 0.3648, "step": 17775 }, { "epoch": 0.396313997484959, "grad_norm": 0.6513046622276306, "learning_rate": 1.3199982311475536e-05, "loss": 0.3595, "step": 17780 }, { "epoch": 0.39642544686557907, "grad_norm": 0.49400731921195984, "learning_rate": 1.3196664903140986e-05, "loss": 0.2408, "step": 17785 }, { "epoch": 0.3965368962461991, "grad_norm": 0.5998428463935852, "learning_rate": 1.3193347102919995e-05, "loss": 0.2253, "step": 17790 }, { "epoch": 0.39664834562681917, "grad_norm": 0.5260230898857117, "learning_rate": 1.3190028911219299e-05, "loss": 0.3894, "step": 17795 }, { "epoch": 0.39675979500743924, "grad_norm": 0.8185827136039734, "learning_rate": 1.3186710328445681e-05, "loss": 0.402, "step": 17800 }, { "epoch": 0.3968712443880593, "grad_norm": 0.5452010631561279, "learning_rate": 1.3183391355005972e-05, "loss": 0.3864, "step": 17805 }, { "epoch": 0.3969826937686794, "grad_norm": 0.5198045969009399, "learning_rate": 1.3180071991307058e-05, "loss": 0.3525, "step": 17810 }, { "epoch": 0.39709414314929947, "grad_norm": 0.49183177947998047, "learning_rate": 1.3176752237755866e-05, "loss": 0.1952, "step": 17815 }, { "epoch": 0.3972055925299195, "grad_norm": 0.9827027320861816, "learning_rate": 1.3173432094759364e-05, "loss": 0.3477, "step": 17820 }, { "epoch": 0.39731704191053957, "grad_norm": 0.6063224673271179, "learning_rate": 1.3170111562724586e-05, "loss": 0.2805, "step": 17825 }, { "epoch": 0.39742849129115965, "grad_norm": 0.5842794179916382, "learning_rate": 1.31667906420586e-05, "loss": 0.389, "step": 17830 }, { "epoch": 0.3975399406717797, "grad_norm": 0.5279350280761719, "learning_rate": 1.3163469333168526e-05, "loss": 0.309, "step": 17835 }, { "epoch": 0.3976513900523998, "grad_norm": 0.578009843826294, "learning_rate": 1.3160147636461527e-05, "loss": 0.3562, "step": 17840 }, { "epoch": 0.3977628394330199, "grad_norm": 0.5486703515052795, "learning_rate": 1.3156825552344822e-05, "loss": 0.4434, "step": 17845 }, { "epoch": 0.3978742888136399, "grad_norm": 0.536373496055603, "learning_rate": 1.315350308122567e-05, "loss": 0.3216, "step": 17850 }, { "epoch": 0.39798573819426, "grad_norm": 0.4132682979106903, "learning_rate": 1.3150180223511383e-05, "loss": 0.2851, "step": 17855 }, { "epoch": 0.39809718757488005, "grad_norm": 0.4532261788845062, "learning_rate": 1.314685697960932e-05, "loss": 0.3908, "step": 17860 }, { "epoch": 0.39820863695550013, "grad_norm": 0.5398720502853394, "learning_rate": 1.3143533349926875e-05, "loss": 0.3451, "step": 17865 }, { "epoch": 0.3983200863361202, "grad_norm": 0.5097636580467224, "learning_rate": 1.314020933487151e-05, "loss": 0.3461, "step": 17870 }, { "epoch": 0.3984315357167403, "grad_norm": 0.6569482684135437, "learning_rate": 1.3136884934850719e-05, "loss": 0.4025, "step": 17875 }, { "epoch": 0.3985429850973603, "grad_norm": 0.5743000507354736, "learning_rate": 1.3133560150272047e-05, "loss": 0.4101, "step": 17880 }, { "epoch": 0.3986544344779804, "grad_norm": 0.49067550897598267, "learning_rate": 1.3130234981543087e-05, "loss": 0.2034, "step": 17885 }, { "epoch": 0.39876588385860046, "grad_norm": 0.6841097474098206, "learning_rate": 1.312690942907148e-05, "loss": 0.3888, "step": 17890 }, { "epoch": 0.39887733323922053, "grad_norm": 0.5429337024688721, "learning_rate": 1.3123583493264913e-05, "loss": 0.4961, "step": 17895 }, { "epoch": 0.3989887826198406, "grad_norm": 0.56031334400177, "learning_rate": 1.312025717453112e-05, "loss": 0.3289, "step": 17900 }, { "epoch": 0.39910023200046063, "grad_norm": 0.6107448935508728, "learning_rate": 1.3116930473277884e-05, "loss": 0.3042, "step": 17905 }, { "epoch": 0.3992116813810807, "grad_norm": 0.30036771297454834, "learning_rate": 1.3113603389913027e-05, "loss": 0.2888, "step": 17910 }, { "epoch": 0.3993231307617008, "grad_norm": 0.5498743653297424, "learning_rate": 1.3110275924844432e-05, "loss": 0.3323, "step": 17915 }, { "epoch": 0.39943458014232086, "grad_norm": 0.31906214356422424, "learning_rate": 1.3106948078480014e-05, "loss": 0.2188, "step": 17920 }, { "epoch": 0.39954602952294094, "grad_norm": 0.8935547471046448, "learning_rate": 1.3103619851227744e-05, "loss": 0.3219, "step": 17925 }, { "epoch": 0.399657478903561, "grad_norm": 0.5760512351989746, "learning_rate": 1.3100291243495637e-05, "loss": 0.2671, "step": 17930 }, { "epoch": 0.39976892828418104, "grad_norm": 0.7435246706008911, "learning_rate": 1.3096962255691755e-05, "loss": 0.4353, "step": 17935 }, { "epoch": 0.3998803776648011, "grad_norm": 0.6022874712944031, "learning_rate": 1.3093632888224206e-05, "loss": 0.3313, "step": 17940 }, { "epoch": 0.3999918270454212, "grad_norm": 0.897080659866333, "learning_rate": 1.3090303141501139e-05, "loss": 0.2699, "step": 17945 }, { "epoch": 0.40010327642604127, "grad_norm": 0.7807203531265259, "learning_rate": 1.3086973015930763e-05, "loss": 0.3183, "step": 17950 }, { "epoch": 0.40021472580666134, "grad_norm": 0.9843493103981018, "learning_rate": 1.3083642511921325e-05, "loss": 0.3, "step": 17955 }, { "epoch": 0.4003261751872814, "grad_norm": 0.47445183992385864, "learning_rate": 1.3080311629881112e-05, "loss": 0.4984, "step": 17960 }, { "epoch": 0.40043762456790144, "grad_norm": 0.8390313982963562, "learning_rate": 1.3076980370218478e-05, "loss": 0.3191, "step": 17965 }, { "epoch": 0.4005490739485215, "grad_norm": 0.61275714635849, "learning_rate": 1.3073648733341796e-05, "loss": 0.3963, "step": 17970 }, { "epoch": 0.4006605233291416, "grad_norm": 0.8645874261856079, "learning_rate": 1.3070316719659503e-05, "loss": 0.2564, "step": 17975 }, { "epoch": 0.40077197270976167, "grad_norm": 0.5309740900993347, "learning_rate": 1.3066984329580081e-05, "loss": 0.2978, "step": 17980 }, { "epoch": 0.40088342209038175, "grad_norm": 0.5536980628967285, "learning_rate": 1.3063651563512054e-05, "loss": 0.3188, "step": 17985 }, { "epoch": 0.4009948714710018, "grad_norm": 0.788591742515564, "learning_rate": 1.3060318421863994e-05, "loss": 0.3653, "step": 17990 }, { "epoch": 0.40110632085162184, "grad_norm": 0.5309394598007202, "learning_rate": 1.3056984905044516e-05, "loss": 0.2452, "step": 17995 }, { "epoch": 0.4012177702322419, "grad_norm": 0.5781253576278687, "learning_rate": 1.3053651013462285e-05, "loss": 0.2701, "step": 18000 }, { "epoch": 0.401329219612862, "grad_norm": 0.552573561668396, "learning_rate": 1.305031674752601e-05, "loss": 0.2345, "step": 18005 }, { "epoch": 0.4014406689934821, "grad_norm": 0.5335693359375, "learning_rate": 1.3046982107644448e-05, "loss": 0.268, "step": 18010 }, { "epoch": 0.40155211837410215, "grad_norm": 0.6344022750854492, "learning_rate": 1.3043647094226397e-05, "loss": 0.3166, "step": 18015 }, { "epoch": 0.40166356775472223, "grad_norm": 0.6636702418327332, "learning_rate": 1.3040311707680706e-05, "loss": 0.4501, "step": 18020 }, { "epoch": 0.40177501713534225, "grad_norm": 0.33217310905456543, "learning_rate": 1.3036975948416268e-05, "loss": 0.2042, "step": 18025 }, { "epoch": 0.4018864665159623, "grad_norm": 0.7773119807243347, "learning_rate": 1.3033639816842023e-05, "loss": 0.3389, "step": 18030 }, { "epoch": 0.4019979158965824, "grad_norm": 0.4810592234134674, "learning_rate": 1.3030303313366948e-05, "loss": 0.2472, "step": 18035 }, { "epoch": 0.4021093652772025, "grad_norm": 0.5222720503807068, "learning_rate": 1.3026966438400079e-05, "loss": 0.3122, "step": 18040 }, { "epoch": 0.40222081465782256, "grad_norm": 0.6859990954399109, "learning_rate": 1.302362919235049e-05, "loss": 0.3732, "step": 18045 }, { "epoch": 0.40233226403844263, "grad_norm": 0.5260218381881714, "learning_rate": 1.30202915756273e-05, "loss": 0.3085, "step": 18050 }, { "epoch": 0.40244371341906265, "grad_norm": 0.6136817336082458, "learning_rate": 1.3016953588639676e-05, "loss": 0.3202, "step": 18055 }, { "epoch": 0.40255516279968273, "grad_norm": 0.7072620987892151, "learning_rate": 1.3013615231796829e-05, "loss": 0.2627, "step": 18060 }, { "epoch": 0.4026666121803028, "grad_norm": 0.5630493760108948, "learning_rate": 1.3010276505508017e-05, "loss": 0.2619, "step": 18065 }, { "epoch": 0.4027780615609229, "grad_norm": 0.5899954438209534, "learning_rate": 1.3006937410182539e-05, "loss": 0.3468, "step": 18070 }, { "epoch": 0.40288951094154296, "grad_norm": 0.6174592971801758, "learning_rate": 1.3003597946229747e-05, "loss": 0.3401, "step": 18075 }, { "epoch": 0.403000960322163, "grad_norm": 0.9012115001678467, "learning_rate": 1.300025811405903e-05, "loss": 0.2373, "step": 18080 }, { "epoch": 0.40311240970278306, "grad_norm": 0.49508702754974365, "learning_rate": 1.2996917914079826e-05, "loss": 0.3124, "step": 18085 }, { "epoch": 0.40322385908340314, "grad_norm": 0.8170999884605408, "learning_rate": 1.299357734670162e-05, "loss": 0.2728, "step": 18090 }, { "epoch": 0.4033353084640232, "grad_norm": 0.4755793809890747, "learning_rate": 1.299023641233394e-05, "loss": 0.4259, "step": 18095 }, { "epoch": 0.4034467578446433, "grad_norm": 0.520043671131134, "learning_rate": 1.2986895111386351e-05, "loss": 0.3457, "step": 18100 }, { "epoch": 0.40355820722526337, "grad_norm": 0.5027348399162292, "learning_rate": 1.298355344426848e-05, "loss": 0.3179, "step": 18105 }, { "epoch": 0.4036696566058834, "grad_norm": 0.5023277401924133, "learning_rate": 1.2980211411389987e-05, "loss": 0.1961, "step": 18110 }, { "epoch": 0.40378110598650346, "grad_norm": 0.5529801249504089, "learning_rate": 1.2976869013160577e-05, "loss": 0.2631, "step": 18115 }, { "epoch": 0.40389255536712354, "grad_norm": 0.652068555355072, "learning_rate": 1.2973526249990006e-05, "loss": 0.3448, "step": 18120 }, { "epoch": 0.4040040047477436, "grad_norm": 0.5933851599693298, "learning_rate": 1.2970183122288068e-05, "loss": 0.421, "step": 18125 }, { "epoch": 0.4041154541283637, "grad_norm": 0.4105674922466278, "learning_rate": 1.2966839630464603e-05, "loss": 0.2904, "step": 18130 }, { "epoch": 0.40422690350898377, "grad_norm": 0.7906683683395386, "learning_rate": 1.2963495774929505e-05, "loss": 0.2608, "step": 18135 }, { "epoch": 0.4043383528896038, "grad_norm": 0.8182550072669983, "learning_rate": 1.2960151556092694e-05, "loss": 0.3075, "step": 18140 }, { "epoch": 0.40444980227022387, "grad_norm": 0.7050228118896484, "learning_rate": 1.2956806974364153e-05, "loss": 0.2282, "step": 18145 }, { "epoch": 0.40456125165084394, "grad_norm": 0.5624240040779114, "learning_rate": 1.2953462030153902e-05, "loss": 0.2506, "step": 18150 }, { "epoch": 0.404672701031464, "grad_norm": 0.6622910499572754, "learning_rate": 1.2950116723872002e-05, "loss": 0.2953, "step": 18155 }, { "epoch": 0.4047841504120841, "grad_norm": 0.5642595291137695, "learning_rate": 1.2946771055928562e-05, "loss": 0.3647, "step": 18160 }, { "epoch": 0.4048955997927042, "grad_norm": 0.29868683218955994, "learning_rate": 1.2943425026733738e-05, "loss": 0.2502, "step": 18165 }, { "epoch": 0.4050070491733242, "grad_norm": 0.7383465766906738, "learning_rate": 1.2940078636697726e-05, "loss": 0.3526, "step": 18170 }, { "epoch": 0.4051184985539443, "grad_norm": 0.6359235048294067, "learning_rate": 1.2936731886230761e-05, "loss": 0.3998, "step": 18175 }, { "epoch": 0.40522994793456435, "grad_norm": 0.4913898706436157, "learning_rate": 1.2933384775743143e-05, "loss": 0.252, "step": 18180 }, { "epoch": 0.4053413973151844, "grad_norm": 0.7118620872497559, "learning_rate": 1.2930037305645191e-05, "loss": 0.3759, "step": 18185 }, { "epoch": 0.4054528466958045, "grad_norm": 0.6543040871620178, "learning_rate": 1.292668947634728e-05, "loss": 0.1789, "step": 18190 }, { "epoch": 0.4055642960764246, "grad_norm": 0.7008522152900696, "learning_rate": 1.292334128825983e-05, "loss": 0.311, "step": 18195 }, { "epoch": 0.4056757454570446, "grad_norm": 0.5294999480247498, "learning_rate": 1.2919992741793306e-05, "loss": 0.2591, "step": 18200 }, { "epoch": 0.4057871948376647, "grad_norm": 0.44648101925849915, "learning_rate": 1.2916643837358205e-05, "loss": 0.2552, "step": 18205 }, { "epoch": 0.40589864421828475, "grad_norm": 0.6527552604675293, "learning_rate": 1.2913294575365086e-05, "loss": 0.357, "step": 18210 }, { "epoch": 0.40601009359890483, "grad_norm": 0.5339484214782715, "learning_rate": 1.290994495622454e-05, "loss": 0.28, "step": 18215 }, { "epoch": 0.4061215429795249, "grad_norm": 0.5636955499649048, "learning_rate": 1.2906594980347202e-05, "loss": 0.368, "step": 18220 }, { "epoch": 0.406232992360145, "grad_norm": 0.5656212568283081, "learning_rate": 1.2903244648143756e-05, "loss": 0.3734, "step": 18225 }, { "epoch": 0.406344441740765, "grad_norm": 0.9620150923728943, "learning_rate": 1.2899893960024926e-05, "loss": 0.3122, "step": 18230 }, { "epoch": 0.4064558911213851, "grad_norm": 0.7658068537712097, "learning_rate": 1.2896542916401477e-05, "loss": 0.3063, "step": 18235 }, { "epoch": 0.40656734050200516, "grad_norm": 0.49050748348236084, "learning_rate": 1.289319151768423e-05, "loss": 0.2702, "step": 18240 }, { "epoch": 0.40667878988262524, "grad_norm": 0.6088207364082336, "learning_rate": 1.2889839764284032e-05, "loss": 0.4151, "step": 18245 }, { "epoch": 0.4067902392632453, "grad_norm": 0.5077471733093262, "learning_rate": 1.2886487656611786e-05, "loss": 0.3534, "step": 18250 }, { "epoch": 0.4069016886438654, "grad_norm": 1.2243595123291016, "learning_rate": 1.2883135195078431e-05, "loss": 0.3367, "step": 18255 }, { "epoch": 0.4070131380244854, "grad_norm": 0.4729726314544678, "learning_rate": 1.2879782380094958e-05, "loss": 0.295, "step": 18260 }, { "epoch": 0.4071245874051055, "grad_norm": 0.428756982088089, "learning_rate": 1.2876429212072391e-05, "loss": 0.267, "step": 18265 }, { "epoch": 0.40723603678572556, "grad_norm": 0.5256748795509338, "learning_rate": 1.2873075691421808e-05, "loss": 0.3028, "step": 18270 }, { "epoch": 0.40734748616634564, "grad_norm": 0.512870728969574, "learning_rate": 1.2869721818554321e-05, "loss": 0.2903, "step": 18275 }, { "epoch": 0.4074589355469657, "grad_norm": 0.811591386795044, "learning_rate": 1.286636759388109e-05, "loss": 0.3306, "step": 18280 }, { "epoch": 0.40757038492758574, "grad_norm": 0.556670069694519, "learning_rate": 1.2863013017813316e-05, "loss": 0.3757, "step": 18285 }, { "epoch": 0.4076818343082058, "grad_norm": 0.57549649477005, "learning_rate": 1.2859658090762248e-05, "loss": 0.3665, "step": 18290 }, { "epoch": 0.4077932836888259, "grad_norm": 0.6680387854576111, "learning_rate": 1.285630281313917e-05, "loss": 0.2735, "step": 18295 }, { "epoch": 0.40790473306944597, "grad_norm": 0.6254687309265137, "learning_rate": 1.2852947185355409e-05, "loss": 0.3771, "step": 18300 }, { "epoch": 0.40801618245006605, "grad_norm": 0.9815579056739807, "learning_rate": 1.2849591207822351e-05, "loss": 0.4015, "step": 18305 }, { "epoch": 0.4081276318306861, "grad_norm": 0.4263515770435333, "learning_rate": 1.2846234880951406e-05, "loss": 0.3862, "step": 18310 }, { "epoch": 0.40823908121130614, "grad_norm": 0.582800030708313, "learning_rate": 1.284287820515403e-05, "loss": 0.2104, "step": 18315 }, { "epoch": 0.4083505305919262, "grad_norm": 0.6399257779121399, "learning_rate": 1.2839521180841735e-05, "loss": 0.3136, "step": 18320 }, { "epoch": 0.4084619799725463, "grad_norm": 0.599901556968689, "learning_rate": 1.283616380842606e-05, "loss": 0.3759, "step": 18325 }, { "epoch": 0.4085734293531664, "grad_norm": 0.5146269798278809, "learning_rate": 1.2832806088318587e-05, "loss": 0.3023, "step": 18330 }, { "epoch": 0.40868487873378645, "grad_norm": 0.5369811058044434, "learning_rate": 1.2829448020930959e-05, "loss": 0.2933, "step": 18335 }, { "epoch": 0.4087963281144065, "grad_norm": 0.4636971056461334, "learning_rate": 1.2826089606674845e-05, "loss": 0.3844, "step": 18340 }, { "epoch": 0.40890777749502655, "grad_norm": 0.6950342655181885, "learning_rate": 1.2822730845961958e-05, "loss": 0.3928, "step": 18345 }, { "epoch": 0.4090192268756466, "grad_norm": 0.4408503770828247, "learning_rate": 1.2819371739204054e-05, "loss": 0.2776, "step": 18350 }, { "epoch": 0.4091306762562667, "grad_norm": 0.44306161999702454, "learning_rate": 1.2816012286812942e-05, "loss": 0.3134, "step": 18355 }, { "epoch": 0.4092421256368868, "grad_norm": 0.4013633131980896, "learning_rate": 1.2812652489200457e-05, "loss": 0.3159, "step": 18360 }, { "epoch": 0.40935357501750685, "grad_norm": 0.7099652290344238, "learning_rate": 1.2809292346778488e-05, "loss": 0.3841, "step": 18365 }, { "epoch": 0.40946502439812693, "grad_norm": 0.6522414088249207, "learning_rate": 1.2805931859958962e-05, "loss": 0.4007, "step": 18370 }, { "epoch": 0.40957647377874695, "grad_norm": 0.3149759769439697, "learning_rate": 1.2802571029153848e-05, "loss": 0.3963, "step": 18375 }, { "epoch": 0.40968792315936703, "grad_norm": 0.7824275493621826, "learning_rate": 1.279920985477516e-05, "loss": 0.2676, "step": 18380 }, { "epoch": 0.4097993725399871, "grad_norm": 0.501352846622467, "learning_rate": 1.2795848337234946e-05, "loss": 0.2539, "step": 18385 }, { "epoch": 0.4099108219206072, "grad_norm": 0.6711282134056091, "learning_rate": 1.279248647694531e-05, "loss": 0.3712, "step": 18390 }, { "epoch": 0.41002227130122726, "grad_norm": 0.5977444648742676, "learning_rate": 1.2789124274318386e-05, "loss": 0.4149, "step": 18395 }, { "epoch": 0.41013372068184734, "grad_norm": 0.3847135305404663, "learning_rate": 1.2785761729766356e-05, "loss": 0.278, "step": 18400 }, { "epoch": 0.41024517006246736, "grad_norm": 0.5048947334289551, "learning_rate": 1.278239884370144e-05, "loss": 0.3682, "step": 18405 }, { "epoch": 0.41035661944308743, "grad_norm": 0.495900958776474, "learning_rate": 1.27790356165359e-05, "loss": 0.3405, "step": 18410 }, { "epoch": 0.4104680688237075, "grad_norm": 0.40374094247817993, "learning_rate": 1.2775672048682047e-05, "loss": 0.2201, "step": 18415 }, { "epoch": 0.4105795182043276, "grad_norm": 0.5748105049133301, "learning_rate": 1.2772308140552224e-05, "loss": 0.2755, "step": 18420 }, { "epoch": 0.41069096758494766, "grad_norm": 0.5982359647750854, "learning_rate": 1.2768943892558823e-05, "loss": 0.3694, "step": 18425 }, { "epoch": 0.41080241696556774, "grad_norm": 0.48614931106567383, "learning_rate": 1.2765579305114276e-05, "loss": 0.3353, "step": 18430 }, { "epoch": 0.41091386634618776, "grad_norm": 0.6232022047042847, "learning_rate": 1.2762214378631049e-05, "loss": 0.2576, "step": 18435 }, { "epoch": 0.41102531572680784, "grad_norm": 0.7191541790962219, "learning_rate": 1.275884911352166e-05, "loss": 0.3179, "step": 18440 }, { "epoch": 0.4111367651074279, "grad_norm": 0.6166452169418335, "learning_rate": 1.2755483510198668e-05, "loss": 0.2123, "step": 18445 }, { "epoch": 0.411248214488048, "grad_norm": 0.6613633632659912, "learning_rate": 1.2752117569074667e-05, "loss": 0.4291, "step": 18450 }, { "epoch": 0.41135966386866807, "grad_norm": 0.6093412637710571, "learning_rate": 1.2748751290562291e-05, "loss": 0.3531, "step": 18455 }, { "epoch": 0.4114711132492881, "grad_norm": 0.9398419857025146, "learning_rate": 1.2745384675074228e-05, "loss": 0.2857, "step": 18460 }, { "epoch": 0.41158256262990817, "grad_norm": 0.6028172969818115, "learning_rate": 1.2742017723023194e-05, "loss": 0.3597, "step": 18465 }, { "epoch": 0.41169401201052824, "grad_norm": 0.5513226985931396, "learning_rate": 1.2738650434821952e-05, "loss": 0.3979, "step": 18470 }, { "epoch": 0.4118054613911483, "grad_norm": 0.5270368456840515, "learning_rate": 1.2735282810883303e-05, "loss": 0.4054, "step": 18475 }, { "epoch": 0.4119169107717684, "grad_norm": 0.48779648542404175, "learning_rate": 1.2731914851620099e-05, "loss": 0.3752, "step": 18480 }, { "epoch": 0.4120283601523885, "grad_norm": 0.5824145674705505, "learning_rate": 1.2728546557445218e-05, "loss": 0.3695, "step": 18485 }, { "epoch": 0.4121398095330085, "grad_norm": 0.5668191909790039, "learning_rate": 1.2725177928771591e-05, "loss": 0.2761, "step": 18490 }, { "epoch": 0.41225125891362857, "grad_norm": 0.4143696129322052, "learning_rate": 1.2721808966012188e-05, "loss": 0.3978, "step": 18495 }, { "epoch": 0.41236270829424865, "grad_norm": 0.4864271283149719, "learning_rate": 1.2718439669580009e-05, "loss": 0.2801, "step": 18500 }, { "epoch": 0.4124741576748687, "grad_norm": 0.6782664656639099, "learning_rate": 1.2715070039888116e-05, "loss": 0.3272, "step": 18505 }, { "epoch": 0.4125856070554888, "grad_norm": 0.7863107323646545, "learning_rate": 1.2711700077349589e-05, "loss": 0.254, "step": 18510 }, { "epoch": 0.4126970564361089, "grad_norm": 0.7221528887748718, "learning_rate": 1.2708329782377565e-05, "loss": 0.3955, "step": 18515 }, { "epoch": 0.4128085058167289, "grad_norm": 0.6611953973770142, "learning_rate": 1.2704959155385217e-05, "loss": 0.2752, "step": 18520 }, { "epoch": 0.412919955197349, "grad_norm": 0.48354998230934143, "learning_rate": 1.2701588196785755e-05, "loss": 0.2948, "step": 18525 }, { "epoch": 0.41303140457796905, "grad_norm": 0.4558368921279907, "learning_rate": 1.2698216906992426e-05, "loss": 0.4377, "step": 18530 }, { "epoch": 0.41314285395858913, "grad_norm": 0.6224683523178101, "learning_rate": 1.269484528641854e-05, "loss": 0.2484, "step": 18535 }, { "epoch": 0.4132543033392092, "grad_norm": 0.5457311272621155, "learning_rate": 1.269147333547742e-05, "loss": 0.3035, "step": 18540 }, { "epoch": 0.4133657527198293, "grad_norm": 0.6461859345436096, "learning_rate": 1.2688101054582443e-05, "loss": 0.3112, "step": 18545 }, { "epoch": 0.4134772021004493, "grad_norm": 0.46919846534729004, "learning_rate": 1.2684728444147027e-05, "loss": 0.3344, "step": 18550 }, { "epoch": 0.4135886514810694, "grad_norm": 0.4080178737640381, "learning_rate": 1.2681355504584627e-05, "loss": 0.3146, "step": 18555 }, { "epoch": 0.41370010086168946, "grad_norm": 1.5496551990509033, "learning_rate": 1.2677982236308737e-05, "loss": 0.2363, "step": 18560 }, { "epoch": 0.41381155024230953, "grad_norm": 0.5933051109313965, "learning_rate": 1.2674608639732896e-05, "loss": 0.361, "step": 18565 }, { "epoch": 0.4139229996229296, "grad_norm": 0.5045031905174255, "learning_rate": 1.2671234715270684e-05, "loss": 0.3109, "step": 18570 }, { "epoch": 0.4140344490035497, "grad_norm": 0.7789613604545593, "learning_rate": 1.2667860463335711e-05, "loss": 0.4204, "step": 18575 }, { "epoch": 0.4141458983841697, "grad_norm": 0.4235498309135437, "learning_rate": 1.2664485884341638e-05, "loss": 0.3569, "step": 18580 }, { "epoch": 0.4142573477647898, "grad_norm": 0.4231083393096924, "learning_rate": 1.2661110978702164e-05, "loss": 0.3355, "step": 18585 }, { "epoch": 0.41436879714540986, "grad_norm": 0.49784886837005615, "learning_rate": 1.2657735746831023e-05, "loss": 0.1982, "step": 18590 }, { "epoch": 0.41448024652602994, "grad_norm": 0.6310487985610962, "learning_rate": 1.2654360189141997e-05, "loss": 0.3131, "step": 18595 }, { "epoch": 0.41459169590665, "grad_norm": 0.48694488406181335, "learning_rate": 1.2650984306048897e-05, "loss": 0.2527, "step": 18600 }, { "epoch": 0.4147031452872701, "grad_norm": 0.6880688667297363, "learning_rate": 1.2647608097965585e-05, "loss": 0.3681, "step": 18605 }, { "epoch": 0.4148145946678901, "grad_norm": 0.7431567907333374, "learning_rate": 1.2644231565305955e-05, "loss": 0.3137, "step": 18610 }, { "epoch": 0.4149260440485102, "grad_norm": 0.5349269509315491, "learning_rate": 1.2640854708483948e-05, "loss": 0.3026, "step": 18615 }, { "epoch": 0.41503749342913027, "grad_norm": 0.86985182762146, "learning_rate": 1.2637477527913538e-05, "loss": 0.445, "step": 18620 }, { "epoch": 0.41514894280975034, "grad_norm": 0.6898271441459656, "learning_rate": 1.2634100024008737e-05, "loss": 0.329, "step": 18625 }, { "epoch": 0.4152603921903704, "grad_norm": 0.612746000289917, "learning_rate": 1.2630722197183611e-05, "loss": 0.271, "step": 18630 }, { "epoch": 0.4153718415709905, "grad_norm": 0.5866104364395142, "learning_rate": 1.2627344047852246e-05, "loss": 0.4016, "step": 18635 }, { "epoch": 0.4154832909516105, "grad_norm": 0.7675313949584961, "learning_rate": 1.2623965576428781e-05, "loss": 0.3276, "step": 18640 }, { "epoch": 0.4155947403322306, "grad_norm": 0.6587178707122803, "learning_rate": 1.2620586783327392e-05, "loss": 0.1957, "step": 18645 }, { "epoch": 0.41570618971285067, "grad_norm": 0.7516689896583557, "learning_rate": 1.2617207668962287e-05, "loss": 0.2169, "step": 18650 }, { "epoch": 0.41581763909347075, "grad_norm": 0.6809471249580383, "learning_rate": 1.2613828233747726e-05, "loss": 0.3006, "step": 18655 }, { "epoch": 0.4159290884740908, "grad_norm": 0.3378174304962158, "learning_rate": 1.2610448478098003e-05, "loss": 0.3007, "step": 18660 }, { "epoch": 0.41604053785471085, "grad_norm": 0.40882351994514465, "learning_rate": 1.2607068402427444e-05, "loss": 0.2965, "step": 18665 }, { "epoch": 0.4161519872353309, "grad_norm": 0.6255966424942017, "learning_rate": 1.2603688007150418e-05, "loss": 0.404, "step": 18670 }, { "epoch": 0.416263436615951, "grad_norm": 0.6284846067428589, "learning_rate": 1.2600307292681345e-05, "loss": 0.4282, "step": 18675 }, { "epoch": 0.4163748859965711, "grad_norm": 0.536528468132019, "learning_rate": 1.2596926259434668e-05, "loss": 0.3725, "step": 18680 }, { "epoch": 0.41648633537719115, "grad_norm": 0.39987242221832275, "learning_rate": 1.2593544907824872e-05, "loss": 0.2594, "step": 18685 }, { "epoch": 0.41659778475781123, "grad_norm": 0.5609185695648193, "learning_rate": 1.2590163238266494e-05, "loss": 0.2777, "step": 18690 }, { "epoch": 0.41670923413843125, "grad_norm": 0.6560683846473694, "learning_rate": 1.2586781251174098e-05, "loss": 0.3646, "step": 18695 }, { "epoch": 0.4168206835190513, "grad_norm": 0.781556248664856, "learning_rate": 1.2583398946962284e-05, "loss": 0.2337, "step": 18700 }, { "epoch": 0.4169321328996714, "grad_norm": 0.5400981903076172, "learning_rate": 1.25800163260457e-05, "loss": 0.4057, "step": 18705 }, { "epoch": 0.4170435822802915, "grad_norm": 0.6012236475944519, "learning_rate": 1.2576633388839032e-05, "loss": 0.2896, "step": 18710 }, { "epoch": 0.41715503166091156, "grad_norm": 0.4528880715370178, "learning_rate": 1.2573250135756996e-05, "loss": 0.4724, "step": 18715 }, { "epoch": 0.41726648104153163, "grad_norm": 0.5520021319389343, "learning_rate": 1.2569866567214354e-05, "loss": 0.3262, "step": 18720 }, { "epoch": 0.41737793042215166, "grad_norm": 0.5487710237503052, "learning_rate": 1.2566482683625914e-05, "loss": 0.3624, "step": 18725 }, { "epoch": 0.41748937980277173, "grad_norm": 0.7504733204841614, "learning_rate": 1.2563098485406502e-05, "loss": 0.259, "step": 18730 }, { "epoch": 0.4176008291833918, "grad_norm": 0.5932433605194092, "learning_rate": 1.2559713972971002e-05, "loss": 0.3458, "step": 18735 }, { "epoch": 0.4177122785640119, "grad_norm": 0.6621396541595459, "learning_rate": 1.2556329146734326e-05, "loss": 0.313, "step": 18740 }, { "epoch": 0.41782372794463196, "grad_norm": 0.6199692487716675, "learning_rate": 1.2552944007111424e-05, "loss": 0.2457, "step": 18745 }, { "epoch": 0.41793517732525204, "grad_norm": 0.5559074282646179, "learning_rate": 1.2549558554517298e-05, "loss": 0.3902, "step": 18750 }, { "epoch": 0.41804662670587206, "grad_norm": 0.3973815143108368, "learning_rate": 1.2546172789366973e-05, "loss": 0.443, "step": 18755 }, { "epoch": 0.41815807608649214, "grad_norm": 0.3972376585006714, "learning_rate": 1.2542786712075516e-05, "loss": 0.3741, "step": 18760 }, { "epoch": 0.4182695254671122, "grad_norm": 0.6444116234779358, "learning_rate": 1.2539400323058035e-05, "loss": 0.3151, "step": 18765 }, { "epoch": 0.4183809748477323, "grad_norm": 0.6359716057777405, "learning_rate": 1.2536013622729677e-05, "loss": 0.4012, "step": 18770 }, { "epoch": 0.41849242422835237, "grad_norm": 0.7078494429588318, "learning_rate": 1.2532626611505623e-05, "loss": 0.3266, "step": 18775 }, { "epoch": 0.41860387360897244, "grad_norm": 0.5959888696670532, "learning_rate": 1.2529239289801093e-05, "loss": 0.3894, "step": 18780 }, { "epoch": 0.41871532298959246, "grad_norm": 0.6465641260147095, "learning_rate": 1.252585165803135e-05, "loss": 0.338, "step": 18785 }, { "epoch": 0.41882677237021254, "grad_norm": 0.40250465273857117, "learning_rate": 1.2522463716611693e-05, "loss": 0.304, "step": 18790 }, { "epoch": 0.4189382217508326, "grad_norm": 0.5716389417648315, "learning_rate": 1.2519075465957451e-05, "loss": 0.2614, "step": 18795 }, { "epoch": 0.4190496711314527, "grad_norm": 0.7511163353919983, "learning_rate": 1.2515686906484006e-05, "loss": 0.3442, "step": 18800 }, { "epoch": 0.41916112051207277, "grad_norm": 0.5761300921440125, "learning_rate": 1.2512298038606759e-05, "loss": 0.3672, "step": 18805 }, { "epoch": 0.41927256989269285, "grad_norm": 0.585167407989502, "learning_rate": 1.2508908862741166e-05, "loss": 0.3066, "step": 18810 }, { "epoch": 0.41938401927331287, "grad_norm": 0.5810245871543884, "learning_rate": 1.2505519379302715e-05, "loss": 0.3823, "step": 18815 }, { "epoch": 0.41949546865393295, "grad_norm": 0.627994179725647, "learning_rate": 1.2502129588706926e-05, "loss": 0.3069, "step": 18820 }, { "epoch": 0.419606918034553, "grad_norm": 0.45014336705207825, "learning_rate": 1.2498739491369362e-05, "loss": 0.2936, "step": 18825 }, { "epoch": 0.4197183674151731, "grad_norm": 0.56611567735672, "learning_rate": 1.2495349087705625e-05, "loss": 0.427, "step": 18830 }, { "epoch": 0.4198298167957932, "grad_norm": 0.6289352774620056, "learning_rate": 1.249195837813135e-05, "loss": 0.2673, "step": 18835 }, { "epoch": 0.4199412661764132, "grad_norm": 0.5408057570457458, "learning_rate": 1.2488567363062213e-05, "loss": 0.3397, "step": 18840 }, { "epoch": 0.4200527155570333, "grad_norm": 0.6514496207237244, "learning_rate": 1.2485176042913926e-05, "loss": 0.2976, "step": 18845 }, { "epoch": 0.42016416493765335, "grad_norm": 0.903830885887146, "learning_rate": 1.248178441810224e-05, "loss": 0.3012, "step": 18850 }, { "epoch": 0.4202756143182734, "grad_norm": 0.6694687008857727, "learning_rate": 1.247839248904294e-05, "loss": 0.2519, "step": 18855 }, { "epoch": 0.4203870636988935, "grad_norm": 0.7829026579856873, "learning_rate": 1.2475000256151852e-05, "loss": 0.335, "step": 18860 }, { "epoch": 0.4204985130795136, "grad_norm": 0.7483092546463013, "learning_rate": 1.2471607719844833e-05, "loss": 0.3928, "step": 18865 }, { "epoch": 0.4206099624601336, "grad_norm": 0.5407531261444092, "learning_rate": 1.2468214880537788e-05, "loss": 0.333, "step": 18870 }, { "epoch": 0.4207214118407537, "grad_norm": 0.45422080159187317, "learning_rate": 1.2464821738646652e-05, "loss": 0.356, "step": 18875 }, { "epoch": 0.42083286122137376, "grad_norm": 0.6194318532943726, "learning_rate": 1.2461428294587394e-05, "loss": 0.2601, "step": 18880 }, { "epoch": 0.42094431060199383, "grad_norm": 0.4982631802558899, "learning_rate": 1.2458034548776026e-05, "loss": 0.3319, "step": 18885 }, { "epoch": 0.4210557599826139, "grad_norm": 0.5249817967414856, "learning_rate": 1.2454640501628599e-05, "loss": 0.3689, "step": 18890 }, { "epoch": 0.421167209363234, "grad_norm": 0.7947955131530762, "learning_rate": 1.2451246153561191e-05, "loss": 0.3169, "step": 18895 }, { "epoch": 0.421278658743854, "grad_norm": 0.43422529101371765, "learning_rate": 1.2447851504989922e-05, "loss": 0.3823, "step": 18900 }, { "epoch": 0.4213901081244741, "grad_norm": 0.5541136264801025, "learning_rate": 1.2444456556330957e-05, "loss": 0.2701, "step": 18905 }, { "epoch": 0.42150155750509416, "grad_norm": 0.6233346462249756, "learning_rate": 1.2441061308000489e-05, "loss": 0.4002, "step": 18910 }, { "epoch": 0.42161300688571424, "grad_norm": 0.6174611449241638, "learning_rate": 1.2437665760414742e-05, "loss": 0.3123, "step": 18915 }, { "epoch": 0.4217244562663343, "grad_norm": 0.7869670391082764, "learning_rate": 1.243426991398999e-05, "loss": 0.2451, "step": 18920 }, { "epoch": 0.4218359056469544, "grad_norm": 0.6006391644477844, "learning_rate": 1.2430873769142538e-05, "loss": 0.3733, "step": 18925 }, { "epoch": 0.4219473550275744, "grad_norm": 0.38537976145744324, "learning_rate": 1.2427477326288722e-05, "loss": 0.4187, "step": 18930 }, { "epoch": 0.4220588044081945, "grad_norm": 0.40693843364715576, "learning_rate": 1.2424080585844924e-05, "loss": 0.345, "step": 18935 }, { "epoch": 0.42217025378881456, "grad_norm": 0.6330365538597107, "learning_rate": 1.2420683548227558e-05, "loss": 0.3277, "step": 18940 }, { "epoch": 0.42228170316943464, "grad_norm": 0.9879538416862488, "learning_rate": 1.2417286213853075e-05, "loss": 0.3467, "step": 18945 }, { "epoch": 0.4223931525500547, "grad_norm": 0.7751918435096741, "learning_rate": 1.241388858313796e-05, "loss": 0.2522, "step": 18950 }, { "epoch": 0.4225046019306748, "grad_norm": 0.6003888249397278, "learning_rate": 1.2410490656498736e-05, "loss": 0.4377, "step": 18955 }, { "epoch": 0.4226160513112948, "grad_norm": 0.671985924243927, "learning_rate": 1.2407092434351965e-05, "loss": 0.3896, "step": 18960 }, { "epoch": 0.4227275006919149, "grad_norm": 0.5274528861045837, "learning_rate": 1.240369391711424e-05, "loss": 0.3564, "step": 18965 }, { "epoch": 0.42283895007253497, "grad_norm": 0.5010458827018738, "learning_rate": 1.2400295105202194e-05, "loss": 0.2859, "step": 18970 }, { "epoch": 0.42295039945315505, "grad_norm": 0.5250893235206604, "learning_rate": 1.23968959990325e-05, "loss": 0.2515, "step": 18975 }, { "epoch": 0.4230618488337751, "grad_norm": 0.8268576860427856, "learning_rate": 1.2393496599021853e-05, "loss": 0.3041, "step": 18980 }, { "epoch": 0.4231732982143952, "grad_norm": 0.7334866523742676, "learning_rate": 1.2390096905586997e-05, "loss": 0.4155, "step": 18985 }, { "epoch": 0.4232847475950152, "grad_norm": 0.6723551154136658, "learning_rate": 1.2386696919144715e-05, "loss": 0.2755, "step": 18990 }, { "epoch": 0.4233961969756353, "grad_norm": 0.8720344305038452, "learning_rate": 1.2383296640111805e-05, "loss": 0.2926, "step": 18995 }, { "epoch": 0.4235076463562554, "grad_norm": 0.4712897837162018, "learning_rate": 1.2379896068905131e-05, "loss": 0.312, "step": 19000 }, { "epoch": 0.42361909573687545, "grad_norm": 0.6474051475524902, "learning_rate": 1.2376495205941566e-05, "loss": 0.2337, "step": 19005 }, { "epoch": 0.4237305451174955, "grad_norm": 0.6197468042373657, "learning_rate": 1.237309405163803e-05, "loss": 0.4293, "step": 19010 }, { "epoch": 0.4238419944981156, "grad_norm": 0.446269690990448, "learning_rate": 1.2369692606411486e-05, "loss": 0.2745, "step": 19015 }, { "epoch": 0.4239534438787356, "grad_norm": 0.5081597566604614, "learning_rate": 1.2366290870678914e-05, "loss": 0.2961, "step": 19020 }, { "epoch": 0.4240648932593557, "grad_norm": 0.5012861490249634, "learning_rate": 1.2362888844857349e-05, "loss": 0.2736, "step": 19025 }, { "epoch": 0.4241763426399758, "grad_norm": 0.6631560921669006, "learning_rate": 1.2359486529363851e-05, "loss": 0.3143, "step": 19030 }, { "epoch": 0.42428779202059586, "grad_norm": 1.1381235122680664, "learning_rate": 1.2356083924615516e-05, "loss": 0.5141, "step": 19035 }, { "epoch": 0.42439924140121593, "grad_norm": 0.6382589340209961, "learning_rate": 1.2352681031029476e-05, "loss": 0.2542, "step": 19040 }, { "epoch": 0.42451069078183595, "grad_norm": 0.7608498930931091, "learning_rate": 1.2349277849022905e-05, "loss": 0.33, "step": 19045 }, { "epoch": 0.42462214016245603, "grad_norm": 0.4310937523841858, "learning_rate": 1.2345874379013004e-05, "loss": 0.2368, "step": 19050 }, { "epoch": 0.4247335895430761, "grad_norm": 0.7651890516281128, "learning_rate": 1.2342470621417006e-05, "loss": 0.2911, "step": 19055 }, { "epoch": 0.4248450389236962, "grad_norm": 0.7039048075675964, "learning_rate": 1.2339066576652194e-05, "loss": 0.2134, "step": 19060 }, { "epoch": 0.42495648830431626, "grad_norm": 0.6038171052932739, "learning_rate": 1.2335662245135877e-05, "loss": 0.1983, "step": 19065 }, { "epoch": 0.42506793768493634, "grad_norm": 0.8028332591056824, "learning_rate": 1.2332257627285395e-05, "loss": 0.3403, "step": 19070 }, { "epoch": 0.42517938706555636, "grad_norm": 0.752396821975708, "learning_rate": 1.2328852723518127e-05, "loss": 0.3764, "step": 19075 }, { "epoch": 0.42529083644617643, "grad_norm": 0.598617434501648, "learning_rate": 1.2325447534251497e-05, "loss": 0.3535, "step": 19080 }, { "epoch": 0.4254022858267965, "grad_norm": 0.9264512658119202, "learning_rate": 1.2322042059902946e-05, "loss": 0.3442, "step": 19085 }, { "epoch": 0.4255137352074166, "grad_norm": 0.6805040240287781, "learning_rate": 1.2318636300889963e-05, "loss": 0.3218, "step": 19090 }, { "epoch": 0.42562518458803666, "grad_norm": 0.5993042588233948, "learning_rate": 1.2315230257630066e-05, "loss": 0.378, "step": 19095 }, { "epoch": 0.42573663396865674, "grad_norm": 0.49872323870658875, "learning_rate": 1.231182393054081e-05, "loss": 0.3688, "step": 19100 }, { "epoch": 0.42584808334927676, "grad_norm": 0.6361995935440063, "learning_rate": 1.2308417320039785e-05, "loss": 0.3241, "step": 19105 }, { "epoch": 0.42595953272989684, "grad_norm": 0.6568997502326965, "learning_rate": 1.2305010426544615e-05, "loss": 0.3265, "step": 19110 }, { "epoch": 0.4260709821105169, "grad_norm": 0.5412352085113525, "learning_rate": 1.2301603250472958e-05, "loss": 0.344, "step": 19115 }, { "epoch": 0.426182431491137, "grad_norm": 0.47443583607673645, "learning_rate": 1.229819579224251e-05, "loss": 0.2534, "step": 19120 }, { "epoch": 0.42629388087175707, "grad_norm": 0.6884459853172302, "learning_rate": 1.2294788052270996e-05, "loss": 0.2869, "step": 19125 }, { "epoch": 0.42640533025237715, "grad_norm": 0.5194846391677856, "learning_rate": 1.2291380030976177e-05, "loss": 0.3046, "step": 19130 }, { "epoch": 0.42651677963299717, "grad_norm": 0.8681108951568604, "learning_rate": 1.2287971728775856e-05, "loss": 0.4133, "step": 19135 }, { "epoch": 0.42662822901361724, "grad_norm": 0.49403271079063416, "learning_rate": 1.2284563146087862e-05, "loss": 0.3009, "step": 19140 }, { "epoch": 0.4267396783942373, "grad_norm": 0.6868266463279724, "learning_rate": 1.2281154283330059e-05, "loss": 0.2997, "step": 19145 }, { "epoch": 0.4268511277748574, "grad_norm": 0.7888140082359314, "learning_rate": 1.2277745140920347e-05, "loss": 0.3482, "step": 19150 }, { "epoch": 0.4269625771554775, "grad_norm": 0.8175737857818604, "learning_rate": 1.2274335719276666e-05, "loss": 0.376, "step": 19155 }, { "epoch": 0.42707402653609755, "grad_norm": 0.3791024088859558, "learning_rate": 1.2270926018816978e-05, "loss": 0.3969, "step": 19160 }, { "epoch": 0.42718547591671757, "grad_norm": 0.528655469417572, "learning_rate": 1.2267516039959289e-05, "loss": 0.4267, "step": 19165 }, { "epoch": 0.42729692529733765, "grad_norm": 0.54219651222229, "learning_rate": 1.2264105783121639e-05, "loss": 0.2982, "step": 19170 }, { "epoch": 0.4274083746779577, "grad_norm": 0.6535061001777649, "learning_rate": 1.2260695248722096e-05, "loss": 0.238, "step": 19175 }, { "epoch": 0.4275198240585778, "grad_norm": 0.4789026081562042, "learning_rate": 1.2257284437178761e-05, "loss": 0.3183, "step": 19180 }, { "epoch": 0.4276312734391979, "grad_norm": 0.5575686097145081, "learning_rate": 1.2253873348909782e-05, "loss": 0.4224, "step": 19185 }, { "epoch": 0.42774272281981796, "grad_norm": 0.6034550666809082, "learning_rate": 1.2250461984333326e-05, "loss": 0.4497, "step": 19190 }, { "epoch": 0.427854172200438, "grad_norm": 0.7348048686981201, "learning_rate": 1.22470503438676e-05, "loss": 0.2831, "step": 19195 }, { "epoch": 0.42796562158105805, "grad_norm": 0.5611327886581421, "learning_rate": 1.2243638427930848e-05, "loss": 0.3723, "step": 19200 }, { "epoch": 0.42807707096167813, "grad_norm": 0.6347730159759521, "learning_rate": 1.2240226236941344e-05, "loss": 0.2247, "step": 19205 }, { "epoch": 0.4281885203422982, "grad_norm": 0.7379160523414612, "learning_rate": 1.2236813771317392e-05, "loss": 0.4229, "step": 19210 }, { "epoch": 0.4282999697229183, "grad_norm": 0.4423249661922455, "learning_rate": 1.2233401031477342e-05, "loss": 0.299, "step": 19215 }, { "epoch": 0.4284114191035383, "grad_norm": 0.7548907995223999, "learning_rate": 1.2229988017839563e-05, "loss": 0.3763, "step": 19220 }, { "epoch": 0.4285228684841584, "grad_norm": 0.5771735310554504, "learning_rate": 1.2226574730822463e-05, "loss": 0.4532, "step": 19225 }, { "epoch": 0.42863431786477846, "grad_norm": 0.6492488384246826, "learning_rate": 1.222316117084449e-05, "loss": 0.2664, "step": 19230 }, { "epoch": 0.42874576724539853, "grad_norm": 0.7255642414093018, "learning_rate": 1.2219747338324117e-05, "loss": 0.2886, "step": 19235 }, { "epoch": 0.4288572166260186, "grad_norm": 0.548874020576477, "learning_rate": 1.2216333233679853e-05, "loss": 0.2665, "step": 19240 }, { "epoch": 0.4289686660066387, "grad_norm": 0.7727565765380859, "learning_rate": 1.2212918857330244e-05, "loss": 0.3573, "step": 19245 }, { "epoch": 0.4290801153872587, "grad_norm": 0.5158136487007141, "learning_rate": 1.2209504209693863e-05, "loss": 0.2059, "step": 19250 }, { "epoch": 0.4291915647678788, "grad_norm": 0.6173778176307678, "learning_rate": 1.2206089291189322e-05, "loss": 0.3144, "step": 19255 }, { "epoch": 0.42930301414849886, "grad_norm": 0.5652750134468079, "learning_rate": 1.2202674102235264e-05, "loss": 0.3406, "step": 19260 }, { "epoch": 0.42941446352911894, "grad_norm": 0.4517764151096344, "learning_rate": 1.219925864325036e-05, "loss": 0.3573, "step": 19265 }, { "epoch": 0.429525912909739, "grad_norm": 0.47026604413986206, "learning_rate": 1.2195842914653321e-05, "loss": 0.2306, "step": 19270 }, { "epoch": 0.4296373622903591, "grad_norm": 0.6984983682632446, "learning_rate": 1.2192426916862892e-05, "loss": 0.3728, "step": 19275 }, { "epoch": 0.4297488116709791, "grad_norm": 0.5496819615364075, "learning_rate": 1.2189010650297848e-05, "loss": 0.361, "step": 19280 }, { "epoch": 0.4298602610515992, "grad_norm": 0.8521873950958252, "learning_rate": 1.2185594115376991e-05, "loss": 0.3353, "step": 19285 }, { "epoch": 0.42997171043221927, "grad_norm": 0.6980066895484924, "learning_rate": 1.2182177312519166e-05, "loss": 0.283, "step": 19290 }, { "epoch": 0.43008315981283934, "grad_norm": 0.6652234792709351, "learning_rate": 1.2178760242143248e-05, "loss": 0.315, "step": 19295 }, { "epoch": 0.4301946091934594, "grad_norm": 0.6429911255836487, "learning_rate": 1.2175342904668139e-05, "loss": 0.4032, "step": 19300 }, { "epoch": 0.4303060585740795, "grad_norm": 0.35845664143562317, "learning_rate": 1.2171925300512783e-05, "loss": 0.24, "step": 19305 }, { "epoch": 0.4304175079546995, "grad_norm": 0.6370695233345032, "learning_rate": 1.2168507430096152e-05, "loss": 0.3106, "step": 19310 }, { "epoch": 0.4305289573353196, "grad_norm": 0.44975778460502625, "learning_rate": 1.2165089293837245e-05, "loss": 0.3312, "step": 19315 }, { "epoch": 0.43064040671593967, "grad_norm": 0.7515400052070618, "learning_rate": 1.2161670892155106e-05, "loss": 0.2437, "step": 19320 }, { "epoch": 0.43075185609655975, "grad_norm": 0.34712356328964233, "learning_rate": 1.2158252225468799e-05, "loss": 0.3372, "step": 19325 }, { "epoch": 0.4308633054771798, "grad_norm": 0.6354972720146179, "learning_rate": 1.2154833294197427e-05, "loss": 0.3335, "step": 19330 }, { "epoch": 0.4309747548577999, "grad_norm": 0.6496874094009399, "learning_rate": 1.2151414098760129e-05, "loss": 0.3384, "step": 19335 }, { "epoch": 0.4310862042384199, "grad_norm": 0.6100443601608276, "learning_rate": 1.2147994639576064e-05, "loss": 0.364, "step": 19340 }, { "epoch": 0.43119765361904, "grad_norm": 1.147022008895874, "learning_rate": 1.214457491706444e-05, "loss": 0.3455, "step": 19345 }, { "epoch": 0.4313091029996601, "grad_norm": 0.4176161587238312, "learning_rate": 1.2141154931644484e-05, "loss": 0.2617, "step": 19350 }, { "epoch": 0.43142055238028015, "grad_norm": 1.2032032012939453, "learning_rate": 1.2137734683735463e-05, "loss": 0.4088, "step": 19355 }, { "epoch": 0.43153200176090023, "grad_norm": 0.6207007765769958, "learning_rate": 1.2134314173756664e-05, "loss": 0.3417, "step": 19360 }, { "epoch": 0.4316434511415203, "grad_norm": 0.6498280167579651, "learning_rate": 1.2130893402127427e-05, "loss": 0.2873, "step": 19365 }, { "epoch": 0.43175490052214033, "grad_norm": 0.5154594779014587, "learning_rate": 1.2127472369267105e-05, "loss": 0.3284, "step": 19370 }, { "epoch": 0.4318663499027604, "grad_norm": 0.5628852844238281, "learning_rate": 1.2124051075595094e-05, "loss": 0.3134, "step": 19375 }, { "epoch": 0.4319777992833805, "grad_norm": 0.6029967069625854, "learning_rate": 1.2120629521530813e-05, "loss": 0.4147, "step": 19380 }, { "epoch": 0.43208924866400056, "grad_norm": 0.36349472403526306, "learning_rate": 1.2117207707493726e-05, "loss": 0.2858, "step": 19385 }, { "epoch": 0.43220069804462063, "grad_norm": 0.5155287384986877, "learning_rate": 1.2113785633903315e-05, "loss": 0.3876, "step": 19390 }, { "epoch": 0.4323121474252407, "grad_norm": 0.5897233486175537, "learning_rate": 1.2110363301179096e-05, "loss": 0.2948, "step": 19395 }, { "epoch": 0.43242359680586073, "grad_norm": 0.6986548900604248, "learning_rate": 1.2106940709740631e-05, "loss": 0.4823, "step": 19400 }, { "epoch": 0.4325350461864808, "grad_norm": 0.6543066501617432, "learning_rate": 1.2103517860007498e-05, "loss": 0.3273, "step": 19405 }, { "epoch": 0.4326464955671009, "grad_norm": 0.5895144939422607, "learning_rate": 1.2100094752399307e-05, "loss": 0.3373, "step": 19410 }, { "epoch": 0.43275794494772096, "grad_norm": 0.8023034334182739, "learning_rate": 1.2096671387335717e-05, "loss": 0.4212, "step": 19415 }, { "epoch": 0.43286939432834104, "grad_norm": 0.4318520426750183, "learning_rate": 1.2093247765236395e-05, "loss": 0.3291, "step": 19420 }, { "epoch": 0.43298084370896106, "grad_norm": 0.7532975673675537, "learning_rate": 1.2089823886521054e-05, "loss": 0.3317, "step": 19425 }, { "epoch": 0.43309229308958114, "grad_norm": 0.7808141708374023, "learning_rate": 1.2086399751609435e-05, "loss": 0.4682, "step": 19430 }, { "epoch": 0.4332037424702012, "grad_norm": 0.43736621737480164, "learning_rate": 1.2082975360921314e-05, "loss": 0.2712, "step": 19435 }, { "epoch": 0.4333151918508213, "grad_norm": 0.3133240342140198, "learning_rate": 1.2079550714876489e-05, "loss": 0.2126, "step": 19440 }, { "epoch": 0.43342664123144137, "grad_norm": 0.6244284510612488, "learning_rate": 1.2076125813894797e-05, "loss": 0.4448, "step": 19445 }, { "epoch": 0.43353809061206144, "grad_norm": 0.7542661428451538, "learning_rate": 1.207270065839611e-05, "loss": 0.2544, "step": 19450 }, { "epoch": 0.43364953999268147, "grad_norm": 0.5900478959083557, "learning_rate": 1.2069275248800315e-05, "loss": 0.2778, "step": 19455 }, { "epoch": 0.43376098937330154, "grad_norm": 0.7354373335838318, "learning_rate": 1.2065849585527354e-05, "loss": 0.2657, "step": 19460 }, { "epoch": 0.4338724387539216, "grad_norm": 0.7068347334861755, "learning_rate": 1.2062423668997174e-05, "loss": 0.4206, "step": 19465 }, { "epoch": 0.4339838881345417, "grad_norm": 0.6501403450965881, "learning_rate": 1.2058997499629775e-05, "loss": 0.2959, "step": 19470 }, { "epoch": 0.43409533751516177, "grad_norm": 0.6633725166320801, "learning_rate": 1.2055571077845175e-05, "loss": 0.1783, "step": 19475 }, { "epoch": 0.43420678689578185, "grad_norm": 0.4296213388442993, "learning_rate": 1.2052144404063423e-05, "loss": 0.2178, "step": 19480 }, { "epoch": 0.43431823627640187, "grad_norm": 0.6821916103363037, "learning_rate": 1.2048717478704614e-05, "loss": 0.3981, "step": 19485 }, { "epoch": 0.43442968565702195, "grad_norm": 0.5226448774337769, "learning_rate": 1.2045290302188852e-05, "loss": 0.2762, "step": 19490 }, { "epoch": 0.434541135037642, "grad_norm": 0.6372525095939636, "learning_rate": 1.2041862874936288e-05, "loss": 0.3254, "step": 19495 }, { "epoch": 0.4346525844182621, "grad_norm": 0.6730618476867676, "learning_rate": 1.2038435197367093e-05, "loss": 0.3431, "step": 19500 }, { "epoch": 0.4347640337988822, "grad_norm": 0.6753670573234558, "learning_rate": 1.203500726990148e-05, "loss": 0.3882, "step": 19505 }, { "epoch": 0.43487548317950225, "grad_norm": 0.5922030806541443, "learning_rate": 1.2031579092959685e-05, "loss": 0.4237, "step": 19510 }, { "epoch": 0.4349869325601223, "grad_norm": 0.7718773484230042, "learning_rate": 1.2028150666961968e-05, "loss": 0.2151, "step": 19515 }, { "epoch": 0.43509838194074235, "grad_norm": 0.8648079633712769, "learning_rate": 1.2024721992328638e-05, "loss": 0.3575, "step": 19520 }, { "epoch": 0.43520983132136243, "grad_norm": 0.6470637321472168, "learning_rate": 1.2021293069480023e-05, "loss": 0.3105, "step": 19525 }, { "epoch": 0.4353212807019825, "grad_norm": 0.4397228956222534, "learning_rate": 1.2017863898836479e-05, "loss": 0.397, "step": 19530 }, { "epoch": 0.4354327300826026, "grad_norm": 0.49957188963890076, "learning_rate": 1.201443448081839e-05, "loss": 0.2596, "step": 19535 }, { "epoch": 0.43554417946322266, "grad_norm": 0.5893974900245667, "learning_rate": 1.201100481584619e-05, "loss": 0.3685, "step": 19540 }, { "epoch": 0.4356556288438427, "grad_norm": 0.7479491233825684, "learning_rate": 1.2007574904340322e-05, "loss": 0.4322, "step": 19545 }, { "epoch": 0.43576707822446276, "grad_norm": 0.8564417362213135, "learning_rate": 1.200414474672126e-05, "loss": 0.3602, "step": 19550 }, { "epoch": 0.43587852760508283, "grad_norm": 0.6858411431312561, "learning_rate": 1.200071434340953e-05, "loss": 0.2759, "step": 19555 }, { "epoch": 0.4359899769857029, "grad_norm": 0.9257897734642029, "learning_rate": 1.1997283694825661e-05, "loss": 0.3571, "step": 19560 }, { "epoch": 0.436101426366323, "grad_norm": 0.7651895880699158, "learning_rate": 1.1993852801390227e-05, "loss": 0.3078, "step": 19565 }, { "epoch": 0.43621287574694306, "grad_norm": 0.819974422454834, "learning_rate": 1.1990421663523829e-05, "loss": 0.4032, "step": 19570 }, { "epoch": 0.4363243251275631, "grad_norm": 0.508603572845459, "learning_rate": 1.1986990281647101e-05, "loss": 0.3394, "step": 19575 }, { "epoch": 0.43643577450818316, "grad_norm": 0.43608325719833374, "learning_rate": 1.19835586561807e-05, "loss": 0.3911, "step": 19580 }, { "epoch": 0.43654722388880324, "grad_norm": 0.8884719610214233, "learning_rate": 1.198012678754532e-05, "loss": 0.4198, "step": 19585 }, { "epoch": 0.4366586732694233, "grad_norm": 0.6997649073600769, "learning_rate": 1.197669467616168e-05, "loss": 0.299, "step": 19590 }, { "epoch": 0.4367701226500434, "grad_norm": 0.5541196465492249, "learning_rate": 1.1973262322450527e-05, "loss": 0.3458, "step": 19595 }, { "epoch": 0.4368815720306634, "grad_norm": 0.42259481549263, "learning_rate": 1.196982972683265e-05, "loss": 0.2937, "step": 19600 }, { "epoch": 0.4369930214112835, "grad_norm": 0.5829482078552246, "learning_rate": 1.1966396889728848e-05, "loss": 0.2873, "step": 19605 }, { "epoch": 0.43710447079190357, "grad_norm": 0.5691021680831909, "learning_rate": 1.1962963811559969e-05, "loss": 0.2757, "step": 19610 }, { "epoch": 0.43721592017252364, "grad_norm": 0.412564754486084, "learning_rate": 1.1959530492746879e-05, "loss": 0.3327, "step": 19615 }, { "epoch": 0.4373273695531437, "grad_norm": 0.46707937121391296, "learning_rate": 1.1956096933710476e-05, "loss": 0.2798, "step": 19620 }, { "epoch": 0.4374388189337638, "grad_norm": 0.8313919305801392, "learning_rate": 1.1952663134871681e-05, "loss": 0.4392, "step": 19625 }, { "epoch": 0.4375502683143838, "grad_norm": 0.7636642456054688, "learning_rate": 1.1949229096651465e-05, "loss": 0.3606, "step": 19630 }, { "epoch": 0.4376617176950039, "grad_norm": 0.510886013507843, "learning_rate": 1.1945794819470805e-05, "loss": 0.2675, "step": 19635 }, { "epoch": 0.43777316707562397, "grad_norm": 0.6083784103393555, "learning_rate": 1.194236030375072e-05, "loss": 0.3092, "step": 19640 }, { "epoch": 0.43788461645624405, "grad_norm": 1.0390868186950684, "learning_rate": 1.1938925549912255e-05, "loss": 0.4237, "step": 19645 }, { "epoch": 0.4379960658368641, "grad_norm": 0.44526880979537964, "learning_rate": 1.1935490558376484e-05, "loss": 0.3021, "step": 19650 }, { "epoch": 0.4381075152174842, "grad_norm": 0.5297619700431824, "learning_rate": 1.1932055329564507e-05, "loss": 0.3156, "step": 19655 }, { "epoch": 0.4382189645981042, "grad_norm": 0.7630996704101562, "learning_rate": 1.1928619863897461e-05, "loss": 0.2666, "step": 19660 }, { "epoch": 0.4383304139787243, "grad_norm": 0.8829711079597473, "learning_rate": 1.192518416179651e-05, "loss": 0.3796, "step": 19665 }, { "epoch": 0.4384418633593444, "grad_norm": 0.6145585775375366, "learning_rate": 1.1921748223682837e-05, "loss": 0.3302, "step": 19670 }, { "epoch": 0.43855331273996445, "grad_norm": 0.5251651406288147, "learning_rate": 1.1918312049977665e-05, "loss": 0.3594, "step": 19675 }, { "epoch": 0.43866476212058453, "grad_norm": 0.5702129602432251, "learning_rate": 1.1914875641102246e-05, "loss": 0.3224, "step": 19680 }, { "epoch": 0.4387762115012046, "grad_norm": 0.5534759759902954, "learning_rate": 1.1911438997477854e-05, "loss": 0.3794, "step": 19685 }, { "epoch": 0.4388876608818246, "grad_norm": 0.32037588953971863, "learning_rate": 1.1908002119525792e-05, "loss": 0.2029, "step": 19690 }, { "epoch": 0.4389991102624447, "grad_norm": 0.6656951904296875, "learning_rate": 1.1904565007667399e-05, "loss": 0.3419, "step": 19695 }, { "epoch": 0.4391105596430648, "grad_norm": 0.6678401827812195, "learning_rate": 1.190112766232404e-05, "loss": 0.387, "step": 19700 }, { "epoch": 0.43922200902368486, "grad_norm": 0.4325374364852905, "learning_rate": 1.1897690083917098e-05, "loss": 0.325, "step": 19705 }, { "epoch": 0.43933345840430493, "grad_norm": 0.49688783288002014, "learning_rate": 1.1894252272868e-05, "loss": 0.2698, "step": 19710 }, { "epoch": 0.439444907784925, "grad_norm": 0.5682390928268433, "learning_rate": 1.18908142295982e-05, "loss": 0.2784, "step": 19715 }, { "epoch": 0.43955635716554503, "grad_norm": 0.5705307722091675, "learning_rate": 1.1887375954529167e-05, "loss": 0.3702, "step": 19720 }, { "epoch": 0.4396678065461651, "grad_norm": 0.6163852214813232, "learning_rate": 1.1883937448082414e-05, "loss": 0.2932, "step": 19725 }, { "epoch": 0.4397792559267852, "grad_norm": 0.5091946721076965, "learning_rate": 1.1880498710679466e-05, "loss": 0.3564, "step": 19730 }, { "epoch": 0.43989070530740526, "grad_norm": 0.7378758788108826, "learning_rate": 1.1877059742741895e-05, "loss": 0.2929, "step": 19735 }, { "epoch": 0.44000215468802534, "grad_norm": 0.3604847192764282, "learning_rate": 1.1873620544691288e-05, "loss": 0.3412, "step": 19740 }, { "epoch": 0.4401136040686454, "grad_norm": 0.5514371991157532, "learning_rate": 1.1870181116949261e-05, "loss": 0.3556, "step": 19745 }, { "epoch": 0.44022505344926544, "grad_norm": 0.5001578330993652, "learning_rate": 1.1866741459937467e-05, "loss": 0.35, "step": 19750 }, { "epoch": 0.4403365028298855, "grad_norm": 0.8715097308158875, "learning_rate": 1.1863301574077581e-05, "loss": 0.2616, "step": 19755 }, { "epoch": 0.4404479522105056, "grad_norm": 0.7427131533622742, "learning_rate": 1.1859861459791305e-05, "loss": 0.4198, "step": 19760 }, { "epoch": 0.44055940159112567, "grad_norm": 0.8536766767501831, "learning_rate": 1.1856421117500364e-05, "loss": 0.231, "step": 19765 }, { "epoch": 0.44067085097174574, "grad_norm": 0.3912745416164398, "learning_rate": 1.185298054762653e-05, "loss": 0.1782, "step": 19770 }, { "epoch": 0.4407823003523658, "grad_norm": 0.5229966640472412, "learning_rate": 1.184953975059158e-05, "loss": 0.2969, "step": 19775 }, { "epoch": 0.44089374973298584, "grad_norm": 0.8363884687423706, "learning_rate": 1.1846098726817332e-05, "loss": 0.3296, "step": 19780 }, { "epoch": 0.4410051991136059, "grad_norm": 0.5975040793418884, "learning_rate": 1.1842657476725632e-05, "loss": 0.2876, "step": 19785 }, { "epoch": 0.441116648494226, "grad_norm": 0.42045867443084717, "learning_rate": 1.1839216000738349e-05, "loss": 0.3232, "step": 19790 }, { "epoch": 0.44122809787484607, "grad_norm": 0.6494802832603455, "learning_rate": 1.1835774299277379e-05, "loss": 0.2857, "step": 19795 }, { "epoch": 0.44133954725546615, "grad_norm": 0.631358802318573, "learning_rate": 1.1832332372764649e-05, "loss": 0.3664, "step": 19800 }, { "epoch": 0.44145099663608617, "grad_norm": 0.5753572583198547, "learning_rate": 1.1828890221622117e-05, "loss": 0.4741, "step": 19805 }, { "epoch": 0.44156244601670624, "grad_norm": 0.4872659146785736, "learning_rate": 1.1825447846271758e-05, "loss": 0.3375, "step": 19810 }, { "epoch": 0.4416738953973263, "grad_norm": 0.5573753714561462, "learning_rate": 1.1822005247135584e-05, "loss": 0.4541, "step": 19815 }, { "epoch": 0.4417853447779464, "grad_norm": 0.5151143074035645, "learning_rate": 1.1818562424635631e-05, "loss": 0.2904, "step": 19820 }, { "epoch": 0.4418967941585665, "grad_norm": 0.6065669655799866, "learning_rate": 1.1815119379193962e-05, "loss": 0.3648, "step": 19825 }, { "epoch": 0.44200824353918655, "grad_norm": 0.4966534972190857, "learning_rate": 1.1811676111232668e-05, "loss": 0.2602, "step": 19830 }, { "epoch": 0.4421196929198066, "grad_norm": 0.37646156549453735, "learning_rate": 1.1808232621173866e-05, "loss": 0.327, "step": 19835 }, { "epoch": 0.44223114230042665, "grad_norm": 0.6118890047073364, "learning_rate": 1.1804788909439702e-05, "loss": 0.3577, "step": 19840 }, { "epoch": 0.4423425916810467, "grad_norm": 0.4959801137447357, "learning_rate": 1.1801344976452348e-05, "loss": 0.347, "step": 19845 }, { "epoch": 0.4424540410616668, "grad_norm": 0.5995858311653137, "learning_rate": 1.1797900822634007e-05, "loss": 0.2956, "step": 19850 }, { "epoch": 0.4425654904422869, "grad_norm": 0.3750613331794739, "learning_rate": 1.17944564484069e-05, "loss": 0.3752, "step": 19855 }, { "epoch": 0.44267693982290696, "grad_norm": 0.6806448698043823, "learning_rate": 1.1791011854193282e-05, "loss": 0.33, "step": 19860 }, { "epoch": 0.442788389203527, "grad_norm": 0.6760001182556152, "learning_rate": 1.1787567040415437e-05, "loss": 0.3706, "step": 19865 }, { "epoch": 0.44289983858414705, "grad_norm": 0.39426669478416443, "learning_rate": 1.178412200749567e-05, "loss": 0.3819, "step": 19870 }, { "epoch": 0.44301128796476713, "grad_norm": 0.44993576407432556, "learning_rate": 1.1780676755856317e-05, "loss": 0.3555, "step": 19875 }, { "epoch": 0.4431227373453872, "grad_norm": 0.8157110810279846, "learning_rate": 1.1777231285919742e-05, "loss": 0.3558, "step": 19880 }, { "epoch": 0.4432341867260073, "grad_norm": 0.5687642693519592, "learning_rate": 1.1773785598108326e-05, "loss": 0.241, "step": 19885 }, { "epoch": 0.44334563610662736, "grad_norm": 0.7683583498001099, "learning_rate": 1.1770339692844484e-05, "loss": 0.2754, "step": 19890 }, { "epoch": 0.4434570854872474, "grad_norm": 0.72896409034729, "learning_rate": 1.1766893570550666e-05, "loss": 0.3862, "step": 19895 }, { "epoch": 0.44356853486786746, "grad_norm": 0.5326892733573914, "learning_rate": 1.1763447231649332e-05, "loss": 0.3575, "step": 19900 }, { "epoch": 0.44367998424848754, "grad_norm": 0.5236831903457642, "learning_rate": 1.1760000676562977e-05, "loss": 0.199, "step": 19905 }, { "epoch": 0.4437914336291076, "grad_norm": 0.6456315517425537, "learning_rate": 1.175655390571413e-05, "loss": 0.2572, "step": 19910 }, { "epoch": 0.4439028830097277, "grad_norm": 0.6855353713035583, "learning_rate": 1.175310691952533e-05, "loss": 0.4176, "step": 19915 }, { "epoch": 0.44401433239034777, "grad_norm": 0.9236936569213867, "learning_rate": 1.1749659718419151e-05, "loss": 0.4013, "step": 19920 }, { "epoch": 0.4441257817709678, "grad_norm": 0.5553421378135681, "learning_rate": 1.1746212302818196e-05, "loss": 0.2543, "step": 19925 }, { "epoch": 0.44423723115158786, "grad_norm": 0.7569783926010132, "learning_rate": 1.1742764673145095e-05, "loss": 0.396, "step": 19930 }, { "epoch": 0.44434868053220794, "grad_norm": 0.5262734293937683, "learning_rate": 1.1739316829822496e-05, "loss": 0.2578, "step": 19935 }, { "epoch": 0.444460129912828, "grad_norm": 0.37517091631889343, "learning_rate": 1.1735868773273076e-05, "loss": 0.3594, "step": 19940 }, { "epoch": 0.4445715792934481, "grad_norm": 0.5958526730537415, "learning_rate": 1.1732420503919547e-05, "loss": 0.2276, "step": 19945 }, { "epoch": 0.44468302867406817, "grad_norm": 0.8476828336715698, "learning_rate": 1.1728972022184636e-05, "loss": 0.3202, "step": 19950 }, { "epoch": 0.4447944780546882, "grad_norm": 0.4996603727340698, "learning_rate": 1.17255233284911e-05, "loss": 0.2563, "step": 19955 }, { "epoch": 0.44490592743530827, "grad_norm": 0.6085326671600342, "learning_rate": 1.1722074423261723e-05, "loss": 0.3926, "step": 19960 }, { "epoch": 0.44501737681592834, "grad_norm": 1.2089165449142456, "learning_rate": 1.1718625306919312e-05, "loss": 0.3213, "step": 19965 }, { "epoch": 0.4451288261965484, "grad_norm": 0.47199055552482605, "learning_rate": 1.1715175979886708e-05, "loss": 0.3736, "step": 19970 }, { "epoch": 0.4452402755771685, "grad_norm": 0.9002397656440735, "learning_rate": 1.1711726442586764e-05, "loss": 0.2109, "step": 19975 }, { "epoch": 0.4453517249577885, "grad_norm": 0.8258025646209717, "learning_rate": 1.1708276695442371e-05, "loss": 0.301, "step": 19980 }, { "epoch": 0.4454631743384086, "grad_norm": 0.4305465817451477, "learning_rate": 1.1704826738876445e-05, "loss": 0.347, "step": 19985 }, { "epoch": 0.4455746237190287, "grad_norm": 0.46003684401512146, "learning_rate": 1.1701376573311918e-05, "loss": 0.3968, "step": 19990 }, { "epoch": 0.44568607309964875, "grad_norm": 0.6948609948158264, "learning_rate": 1.1697926199171754e-05, "loss": 0.3136, "step": 19995 }, { "epoch": 0.4457975224802688, "grad_norm": 0.42964625358581543, "learning_rate": 1.1694475616878947e-05, "loss": 0.2609, "step": 20000 }, { "epoch": 0.4459089718608889, "grad_norm": 0.6982073783874512, "learning_rate": 1.1691024826856507e-05, "loss": 0.361, "step": 20005 }, { "epoch": 0.4460204212415089, "grad_norm": 0.5581613183021545, "learning_rate": 1.1687573829527474e-05, "loss": 0.3218, "step": 20010 }, { "epoch": 0.446131870622129, "grad_norm": 0.6846082806587219, "learning_rate": 1.1684122625314918e-05, "loss": 0.3306, "step": 20015 }, { "epoch": 0.4462433200027491, "grad_norm": 0.6660178303718567, "learning_rate": 1.1680671214641927e-05, "loss": 0.3782, "step": 20020 }, { "epoch": 0.44635476938336915, "grad_norm": 0.47997379302978516, "learning_rate": 1.1677219597931617e-05, "loss": 0.2564, "step": 20025 }, { "epoch": 0.44646621876398923, "grad_norm": 0.4481871724128723, "learning_rate": 1.1673767775607133e-05, "loss": 0.2694, "step": 20030 }, { "epoch": 0.4465776681446093, "grad_norm": 0.6611993908882141, "learning_rate": 1.1670315748091639e-05, "loss": 0.3976, "step": 20035 }, { "epoch": 0.44668911752522933, "grad_norm": 1.1613658666610718, "learning_rate": 1.1666863515808323e-05, "loss": 0.2544, "step": 20040 }, { "epoch": 0.4468005669058494, "grad_norm": 0.5730067491531372, "learning_rate": 1.1663411079180409e-05, "loss": 0.3335, "step": 20045 }, { "epoch": 0.4469120162864695, "grad_norm": 0.7078298330307007, "learning_rate": 1.1659958438631138e-05, "loss": 0.3033, "step": 20050 }, { "epoch": 0.44702346566708956, "grad_norm": 0.7347967624664307, "learning_rate": 1.1656505594583774e-05, "loss": 0.3882, "step": 20055 }, { "epoch": 0.44713491504770964, "grad_norm": 0.7464125752449036, "learning_rate": 1.165305254746161e-05, "loss": 0.3353, "step": 20060 }, { "epoch": 0.4472463644283297, "grad_norm": 0.5959119200706482, "learning_rate": 1.164959929768796e-05, "loss": 0.3464, "step": 20065 }, { "epoch": 0.44735781380894973, "grad_norm": 0.6540177464485168, "learning_rate": 1.1646145845686175e-05, "loss": 0.3472, "step": 20070 }, { "epoch": 0.4474692631895698, "grad_norm": 0.5438899397850037, "learning_rate": 1.1642692191879613e-05, "loss": 0.3067, "step": 20075 }, { "epoch": 0.4475807125701899, "grad_norm": 0.5280618071556091, "learning_rate": 1.1639238336691666e-05, "loss": 0.3269, "step": 20080 }, { "epoch": 0.44769216195080996, "grad_norm": 0.548997700214386, "learning_rate": 1.1635784280545755e-05, "loss": 0.3355, "step": 20085 }, { "epoch": 0.44780361133143004, "grad_norm": 0.5856815576553345, "learning_rate": 1.1632330023865315e-05, "loss": 0.2652, "step": 20090 }, { "epoch": 0.4479150607120501, "grad_norm": 0.4746699035167694, "learning_rate": 1.1628875567073816e-05, "loss": 0.2406, "step": 20095 }, { "epoch": 0.44802651009267014, "grad_norm": 0.5286392569541931, "learning_rate": 1.1625420910594745e-05, "loss": 0.3352, "step": 20100 }, { "epoch": 0.4481379594732902, "grad_norm": 0.7887224555015564, "learning_rate": 1.1621966054851614e-05, "loss": 0.4244, "step": 20105 }, { "epoch": 0.4482494088539103, "grad_norm": 0.6540002822875977, "learning_rate": 1.1618511000267966e-05, "loss": 0.3291, "step": 20110 }, { "epoch": 0.44836085823453037, "grad_norm": 0.7543660998344421, "learning_rate": 1.1615055747267366e-05, "loss": 0.2833, "step": 20115 }, { "epoch": 0.44847230761515045, "grad_norm": 0.5979798436164856, "learning_rate": 1.1611600296273391e-05, "loss": 0.3342, "step": 20120 }, { "epoch": 0.4485837569957705, "grad_norm": 0.5423582196235657, "learning_rate": 1.1608144647709664e-05, "loss": 0.3795, "step": 20125 }, { "epoch": 0.44869520637639054, "grad_norm": 0.8526514172554016, "learning_rate": 1.1604688801999817e-05, "loss": 0.4085, "step": 20130 }, { "epoch": 0.4488066557570106, "grad_norm": 0.6163521409034729, "learning_rate": 1.1601232759567504e-05, "loss": 0.3199, "step": 20135 }, { "epoch": 0.4489181051376307, "grad_norm": 0.7942532896995544, "learning_rate": 1.159777652083642e-05, "loss": 0.3802, "step": 20140 }, { "epoch": 0.4490295545182508, "grad_norm": 0.46986180543899536, "learning_rate": 1.1594320086230265e-05, "loss": 0.286, "step": 20145 }, { "epoch": 0.44914100389887085, "grad_norm": 0.5512806177139282, "learning_rate": 1.1590863456172772e-05, "loss": 0.3589, "step": 20150 }, { "epoch": 0.4492524532794909, "grad_norm": 0.8379069566726685, "learning_rate": 1.1587406631087701e-05, "loss": 0.325, "step": 20155 }, { "epoch": 0.44936390266011095, "grad_norm": 0.7071454524993896, "learning_rate": 1.158394961139883e-05, "loss": 0.3405, "step": 20160 }, { "epoch": 0.449475352040731, "grad_norm": 0.49624374508857727, "learning_rate": 1.158049239752996e-05, "loss": 0.2863, "step": 20165 }, { "epoch": 0.4495868014213511, "grad_norm": 0.5125814080238342, "learning_rate": 1.1577034989904923e-05, "loss": 0.3024, "step": 20170 }, { "epoch": 0.4496982508019712, "grad_norm": 0.6997113227844238, "learning_rate": 1.1573577388947573e-05, "loss": 0.3309, "step": 20175 }, { "epoch": 0.44980970018259125, "grad_norm": 0.58835768699646, "learning_rate": 1.1570119595081777e-05, "loss": 0.3265, "step": 20180 }, { "epoch": 0.4499211495632113, "grad_norm": 0.7760397791862488, "learning_rate": 1.156666160873144e-05, "loss": 0.3216, "step": 20185 }, { "epoch": 0.45003259894383135, "grad_norm": 0.63795405626297, "learning_rate": 1.1563203430320482e-05, "loss": 0.359, "step": 20190 }, { "epoch": 0.45014404832445143, "grad_norm": 0.4398214817047119, "learning_rate": 1.1559745060272849e-05, "loss": 0.2296, "step": 20195 }, { "epoch": 0.4502554977050715, "grad_norm": 0.5632160902023315, "learning_rate": 1.1556286499012512e-05, "loss": 0.4828, "step": 20200 }, { "epoch": 0.4503669470856916, "grad_norm": 0.663616955280304, "learning_rate": 1.155282774696346e-05, "loss": 0.3925, "step": 20205 }, { "epoch": 0.45047839646631166, "grad_norm": 0.7086315155029297, "learning_rate": 1.1549368804549716e-05, "loss": 0.2877, "step": 20210 }, { "epoch": 0.4505898458469317, "grad_norm": 0.5059530138969421, "learning_rate": 1.1545909672195315e-05, "loss": 0.4004, "step": 20215 }, { "epoch": 0.45070129522755176, "grad_norm": 0.724844753742218, "learning_rate": 1.1542450350324321e-05, "loss": 0.3196, "step": 20220 }, { "epoch": 0.45081274460817183, "grad_norm": 0.34039250016212463, "learning_rate": 1.1538990839360818e-05, "loss": 0.232, "step": 20225 }, { "epoch": 0.4509241939887919, "grad_norm": 0.7491649389266968, "learning_rate": 1.1535531139728918e-05, "loss": 0.3064, "step": 20230 }, { "epoch": 0.451035643369412, "grad_norm": 0.7688721418380737, "learning_rate": 1.1532071251852753e-05, "loss": 0.2946, "step": 20235 }, { "epoch": 0.45114709275003206, "grad_norm": 0.566525399684906, "learning_rate": 1.1528611176156477e-05, "loss": 0.2713, "step": 20240 }, { "epoch": 0.4512585421306521, "grad_norm": 0.6546362042427063, "learning_rate": 1.152515091306427e-05, "loss": 0.2791, "step": 20245 }, { "epoch": 0.45136999151127216, "grad_norm": 0.7522642016410828, "learning_rate": 1.1521690463000336e-05, "loss": 0.3468, "step": 20250 }, { "epoch": 0.45148144089189224, "grad_norm": 0.5377618670463562, "learning_rate": 1.1518229826388898e-05, "loss": 0.3282, "step": 20255 }, { "epoch": 0.4515928902725123, "grad_norm": 0.7234406471252441, "learning_rate": 1.1514769003654195e-05, "loss": 0.4184, "step": 20260 }, { "epoch": 0.4517043396531324, "grad_norm": 0.5777047872543335, "learning_rate": 1.1511307995220511e-05, "loss": 0.2963, "step": 20265 }, { "epoch": 0.45181578903375247, "grad_norm": 0.523595929145813, "learning_rate": 1.1507846801512132e-05, "loss": 0.2322, "step": 20270 }, { "epoch": 0.4519272384143725, "grad_norm": 0.6133381724357605, "learning_rate": 1.150438542295337e-05, "loss": 0.2527, "step": 20275 }, { "epoch": 0.45203868779499257, "grad_norm": 0.8271573781967163, "learning_rate": 1.1500923859968572e-05, "loss": 0.2821, "step": 20280 }, { "epoch": 0.45215013717561264, "grad_norm": 0.524104118347168, "learning_rate": 1.1497462112982092e-05, "loss": 0.3312, "step": 20285 }, { "epoch": 0.4522615865562327, "grad_norm": 0.6851401329040527, "learning_rate": 1.1494000182418315e-05, "loss": 0.336, "step": 20290 }, { "epoch": 0.4523730359368528, "grad_norm": 0.6665769219398499, "learning_rate": 1.1490538068701646e-05, "loss": 0.2889, "step": 20295 }, { "epoch": 0.4524844853174729, "grad_norm": 0.544818639755249, "learning_rate": 1.1487075772256517e-05, "loss": 0.2402, "step": 20300 }, { "epoch": 0.4525959346980929, "grad_norm": 0.43808647990226746, "learning_rate": 1.1483613293507376e-05, "loss": 0.268, "step": 20305 }, { "epoch": 0.45270738407871297, "grad_norm": 0.5447205305099487, "learning_rate": 1.1480150632878697e-05, "loss": 0.2629, "step": 20310 }, { "epoch": 0.45281883345933305, "grad_norm": 0.6161884665489197, "learning_rate": 1.1476687790794978e-05, "loss": 0.25, "step": 20315 }, { "epoch": 0.4529302828399531, "grad_norm": 0.5583766102790833, "learning_rate": 1.147322476768073e-05, "loss": 0.2625, "step": 20320 }, { "epoch": 0.4530417322205732, "grad_norm": 0.5555024147033691, "learning_rate": 1.1469761563960503e-05, "loss": 0.3512, "step": 20325 }, { "epoch": 0.4531531816011933, "grad_norm": 0.7176376581192017, "learning_rate": 1.1466298180058847e-05, "loss": 0.3411, "step": 20330 }, { "epoch": 0.4532646309818133, "grad_norm": 0.5666157603263855, "learning_rate": 1.1462834616400353e-05, "loss": 0.2999, "step": 20335 }, { "epoch": 0.4533760803624334, "grad_norm": 0.679897665977478, "learning_rate": 1.145937087340963e-05, "loss": 0.3178, "step": 20340 }, { "epoch": 0.45348752974305345, "grad_norm": 0.4235386848449707, "learning_rate": 1.14559069515113e-05, "loss": 0.2108, "step": 20345 }, { "epoch": 0.45359897912367353, "grad_norm": 0.6555066704750061, "learning_rate": 1.1452442851130017e-05, "loss": 0.3308, "step": 20350 }, { "epoch": 0.4537104285042936, "grad_norm": 0.6095037460327148, "learning_rate": 1.1448978572690448e-05, "loss": 0.2004, "step": 20355 }, { "epoch": 0.4538218778849136, "grad_norm": 0.5765591859817505, "learning_rate": 1.1445514116617296e-05, "loss": 0.3616, "step": 20360 }, { "epoch": 0.4539333272655337, "grad_norm": 0.5720357894897461, "learning_rate": 1.1442049483335267e-05, "loss": 0.3972, "step": 20365 }, { "epoch": 0.4540447766461538, "grad_norm": 0.6138008832931519, "learning_rate": 1.1438584673269102e-05, "loss": 0.3684, "step": 20370 }, { "epoch": 0.45415622602677386, "grad_norm": 0.5443325638771057, "learning_rate": 1.143511968684356e-05, "loss": 0.4133, "step": 20375 }, { "epoch": 0.45426767540739393, "grad_norm": 0.742470920085907, "learning_rate": 1.1431654524483425e-05, "loss": 0.2467, "step": 20380 }, { "epoch": 0.454379124788014, "grad_norm": 0.5554812550544739, "learning_rate": 1.1428189186613491e-05, "loss": 0.4247, "step": 20385 }, { "epoch": 0.45449057416863403, "grad_norm": 0.4698553681373596, "learning_rate": 1.1424723673658593e-05, "loss": 0.3415, "step": 20390 }, { "epoch": 0.4546020235492541, "grad_norm": 0.5283235907554626, "learning_rate": 1.1421257986043566e-05, "loss": 0.2199, "step": 20395 }, { "epoch": 0.4547134729298742, "grad_norm": 0.5475237369537354, "learning_rate": 1.141779212419328e-05, "loss": 0.3577, "step": 20400 }, { "epoch": 0.45482492231049426, "grad_norm": 0.6595231294631958, "learning_rate": 1.1414326088532625e-05, "loss": 0.4771, "step": 20405 }, { "epoch": 0.45493637169111434, "grad_norm": 0.6333425641059875, "learning_rate": 1.1410859879486509e-05, "loss": 0.2621, "step": 20410 }, { "epoch": 0.4550478210717344, "grad_norm": 0.4809962809085846, "learning_rate": 1.140739349747986e-05, "loss": 0.3684, "step": 20415 }, { "epoch": 0.45515927045235444, "grad_norm": 0.360771507024765, "learning_rate": 1.1403926942937631e-05, "loss": 0.4388, "step": 20420 }, { "epoch": 0.4552707198329745, "grad_norm": 0.5278193950653076, "learning_rate": 1.1400460216284799e-05, "loss": 0.2437, "step": 20425 }, { "epoch": 0.4553821692135946, "grad_norm": 0.7080212235450745, "learning_rate": 1.139699331794635e-05, "loss": 0.335, "step": 20430 }, { "epoch": 0.45549361859421467, "grad_norm": 0.9361355900764465, "learning_rate": 1.1393526248347304e-05, "loss": 0.372, "step": 20435 }, { "epoch": 0.45560506797483474, "grad_norm": 0.7043543457984924, "learning_rate": 1.13900590079127e-05, "loss": 0.314, "step": 20440 }, { "epoch": 0.4557165173554548, "grad_norm": 0.5692029595375061, "learning_rate": 1.1386591597067586e-05, "loss": 0.2636, "step": 20445 }, { "epoch": 0.45582796673607484, "grad_norm": 0.46591368317604065, "learning_rate": 1.138312401623705e-05, "loss": 0.3487, "step": 20450 }, { "epoch": 0.4559394161166949, "grad_norm": 0.5977901816368103, "learning_rate": 1.1379656265846185e-05, "loss": 0.3386, "step": 20455 }, { "epoch": 0.456050865497315, "grad_norm": 0.7537121176719666, "learning_rate": 1.1376188346320107e-05, "loss": 0.4314, "step": 20460 }, { "epoch": 0.45616231487793507, "grad_norm": 0.6459237933158875, "learning_rate": 1.1372720258083965e-05, "loss": 0.3015, "step": 20465 }, { "epoch": 0.45627376425855515, "grad_norm": 0.8141577243804932, "learning_rate": 1.1369252001562911e-05, "loss": 0.4319, "step": 20470 }, { "epoch": 0.4563852136391752, "grad_norm": 0.5266554355621338, "learning_rate": 1.1365783577182132e-05, "loss": 0.3016, "step": 20475 }, { "epoch": 0.45649666301979525, "grad_norm": 0.9065921306610107, "learning_rate": 1.136231498536683e-05, "loss": 0.3809, "step": 20480 }, { "epoch": 0.4566081124004153, "grad_norm": 0.5425348281860352, "learning_rate": 1.135884622654223e-05, "loss": 0.3662, "step": 20485 }, { "epoch": 0.4567195617810354, "grad_norm": 0.45135268568992615, "learning_rate": 1.1355377301133564e-05, "loss": 0.2974, "step": 20490 }, { "epoch": 0.4568310111616555, "grad_norm": 0.4818071722984314, "learning_rate": 1.1351908209566104e-05, "loss": 0.232, "step": 20495 }, { "epoch": 0.45694246054227555, "grad_norm": 0.6810379028320312, "learning_rate": 1.1348438952265138e-05, "loss": 0.3733, "step": 20500 }, { "epoch": 0.45705390992289563, "grad_norm": 0.5900397896766663, "learning_rate": 1.134496952965596e-05, "loss": 0.4092, "step": 20505 }, { "epoch": 0.45716535930351565, "grad_norm": 0.5401953458786011, "learning_rate": 1.13414999421639e-05, "loss": 0.4204, "step": 20510 }, { "epoch": 0.4572768086841357, "grad_norm": 0.381527841091156, "learning_rate": 1.1338030190214304e-05, "loss": 0.2127, "step": 20515 }, { "epoch": 0.4573882580647558, "grad_norm": 0.7745471000671387, "learning_rate": 1.1334560274232531e-05, "loss": 0.3037, "step": 20520 }, { "epoch": 0.4574997074453759, "grad_norm": 0.7092231512069702, "learning_rate": 1.1331090194643972e-05, "loss": 0.2768, "step": 20525 }, { "epoch": 0.45761115682599596, "grad_norm": 0.9606593251228333, "learning_rate": 1.1327619951874029e-05, "loss": 0.3453, "step": 20530 }, { "epoch": 0.45772260620661603, "grad_norm": 0.9122305512428284, "learning_rate": 1.132414954634813e-05, "loss": 0.2957, "step": 20535 }, { "epoch": 0.45783405558723606, "grad_norm": 0.4970526397228241, "learning_rate": 1.1320678978491713e-05, "loss": 0.299, "step": 20540 }, { "epoch": 0.45794550496785613, "grad_norm": 0.5332255959510803, "learning_rate": 1.1317208248730252e-05, "loss": 0.3238, "step": 20545 }, { "epoch": 0.4580569543484762, "grad_norm": 0.5015629529953003, "learning_rate": 1.1313737357489223e-05, "loss": 0.3711, "step": 20550 }, { "epoch": 0.4581684037290963, "grad_norm": 0.6893032193183899, "learning_rate": 1.1310266305194136e-05, "loss": 0.2776, "step": 20555 }, { "epoch": 0.45827985310971636, "grad_norm": 0.6035255789756775, "learning_rate": 1.1306795092270512e-05, "loss": 0.3867, "step": 20560 }, { "epoch": 0.4583913024903364, "grad_norm": 0.4319852888584137, "learning_rate": 1.1303323719143902e-05, "loss": 0.2329, "step": 20565 }, { "epoch": 0.45850275187095646, "grad_norm": 0.7130627036094666, "learning_rate": 1.1299852186239859e-05, "loss": 0.4149, "step": 20570 }, { "epoch": 0.45861420125157654, "grad_norm": 0.5139709115028381, "learning_rate": 1.1296380493983971e-05, "loss": 0.3604, "step": 20575 }, { "epoch": 0.4587256506321966, "grad_norm": 0.6675410270690918, "learning_rate": 1.1292908642801845e-05, "loss": 0.267, "step": 20580 }, { "epoch": 0.4588371000128167, "grad_norm": 0.5854967832565308, "learning_rate": 1.1289436633119095e-05, "loss": 0.4298, "step": 20585 }, { "epoch": 0.45894854939343677, "grad_norm": 0.7468124628067017, "learning_rate": 1.128596446536137e-05, "loss": 0.439, "step": 20590 }, { "epoch": 0.4590599987740568, "grad_norm": 0.7832880020141602, "learning_rate": 1.1282492139954326e-05, "loss": 0.3148, "step": 20595 }, { "epoch": 0.45917144815467686, "grad_norm": 0.5017052292823792, "learning_rate": 1.1279019657323644e-05, "loss": 0.3527, "step": 20600 }, { "epoch": 0.45928289753529694, "grad_norm": 0.7330566048622131, "learning_rate": 1.1275547017895027e-05, "loss": 0.332, "step": 20605 }, { "epoch": 0.459394346915917, "grad_norm": 0.667830228805542, "learning_rate": 1.1272074222094189e-05, "loss": 0.3643, "step": 20610 }, { "epoch": 0.4595057962965371, "grad_norm": 0.5111591219902039, "learning_rate": 1.126860127034687e-05, "loss": 0.2477, "step": 20615 }, { "epoch": 0.45961724567715717, "grad_norm": 0.5095810890197754, "learning_rate": 1.1265128163078831e-05, "loss": 0.3729, "step": 20620 }, { "epoch": 0.4597286950577772, "grad_norm": 0.6718348264694214, "learning_rate": 1.1261654900715844e-05, "loss": 0.3688, "step": 20625 }, { "epoch": 0.45984014443839727, "grad_norm": 0.6186143755912781, "learning_rate": 1.1258181483683699e-05, "loss": 0.2758, "step": 20630 }, { "epoch": 0.45995159381901735, "grad_norm": 0.6600292325019836, "learning_rate": 1.125470791240822e-05, "loss": 0.3646, "step": 20635 }, { "epoch": 0.4600630431996374, "grad_norm": 0.6270229816436768, "learning_rate": 1.1251234187315237e-05, "loss": 0.3948, "step": 20640 }, { "epoch": 0.4601744925802575, "grad_norm": 0.6568154692649841, "learning_rate": 1.12477603088306e-05, "loss": 0.4049, "step": 20645 }, { "epoch": 0.4602859419608776, "grad_norm": 0.6456282138824463, "learning_rate": 1.1244286277380176e-05, "loss": 0.3546, "step": 20650 }, { "epoch": 0.4603973913414976, "grad_norm": 0.5540902018547058, "learning_rate": 1.1240812093389865e-05, "loss": 0.2044, "step": 20655 }, { "epoch": 0.4605088407221177, "grad_norm": 0.7089858055114746, "learning_rate": 1.1237337757285564e-05, "loss": 0.4284, "step": 20660 }, { "epoch": 0.46062029010273775, "grad_norm": 0.6283511519432068, "learning_rate": 1.1233863269493208e-05, "loss": 0.327, "step": 20665 }, { "epoch": 0.4607317394833578, "grad_norm": 0.5950222611427307, "learning_rate": 1.1230388630438737e-05, "loss": 0.3742, "step": 20670 }, { "epoch": 0.4608431888639779, "grad_norm": 0.4111473560333252, "learning_rate": 1.1226913840548119e-05, "loss": 0.3084, "step": 20675 }, { "epoch": 0.460954638244598, "grad_norm": 0.5442538261413574, "learning_rate": 1.1223438900247334e-05, "loss": 0.2162, "step": 20680 }, { "epoch": 0.461066087625218, "grad_norm": 0.5340818762779236, "learning_rate": 1.1219963809962382e-05, "loss": 0.3153, "step": 20685 }, { "epoch": 0.4611775370058381, "grad_norm": 0.5781068801879883, "learning_rate": 1.1216488570119283e-05, "loss": 0.3198, "step": 20690 }, { "epoch": 0.46128898638645816, "grad_norm": 0.6196689009666443, "learning_rate": 1.1213013181144079e-05, "loss": 0.3136, "step": 20695 }, { "epoch": 0.46140043576707823, "grad_norm": 0.7419324517250061, "learning_rate": 1.120953764346282e-05, "loss": 0.245, "step": 20700 }, { "epoch": 0.4615118851476983, "grad_norm": 0.5698023438453674, "learning_rate": 1.120606195750158e-05, "loss": 0.3621, "step": 20705 }, { "epoch": 0.4616233345283184, "grad_norm": 0.7995551228523254, "learning_rate": 1.1202586123686457e-05, "loss": 0.2984, "step": 20710 }, { "epoch": 0.4617347839089384, "grad_norm": 0.8355708718299866, "learning_rate": 1.1199110142443557e-05, "loss": 0.3809, "step": 20715 }, { "epoch": 0.4618462332895585, "grad_norm": 0.35497555136680603, "learning_rate": 1.1195634014199006e-05, "loss": 0.3197, "step": 20720 }, { "epoch": 0.46195768267017856, "grad_norm": 0.6658803224563599, "learning_rate": 1.1192157739378958e-05, "loss": 0.3327, "step": 20725 }, { "epoch": 0.46206913205079864, "grad_norm": 0.6382262110710144, "learning_rate": 1.1188681318409571e-05, "loss": 0.4237, "step": 20730 }, { "epoch": 0.4621805814314187, "grad_norm": 0.6009849905967712, "learning_rate": 1.118520475171703e-05, "loss": 0.3045, "step": 20735 }, { "epoch": 0.46229203081203873, "grad_norm": 0.6650320291519165, "learning_rate": 1.1181728039727532e-05, "loss": 0.4768, "step": 20740 }, { "epoch": 0.4624034801926588, "grad_norm": 0.4834892749786377, "learning_rate": 1.1178251182867302e-05, "loss": 0.3506, "step": 20745 }, { "epoch": 0.4625149295732789, "grad_norm": 0.6801639795303345, "learning_rate": 1.1174774181562568e-05, "loss": 0.3021, "step": 20750 }, { "epoch": 0.46262637895389896, "grad_norm": 0.5477128028869629, "learning_rate": 1.117129703623959e-05, "loss": 0.3267, "step": 20755 }, { "epoch": 0.46273782833451904, "grad_norm": 0.7352733612060547, "learning_rate": 1.1167819747324635e-05, "loss": 0.2959, "step": 20760 }, { "epoch": 0.4628492777151391, "grad_norm": 0.6437637209892273, "learning_rate": 1.1164342315243997e-05, "loss": 0.2679, "step": 20765 }, { "epoch": 0.46296072709575914, "grad_norm": 0.586779773235321, "learning_rate": 1.1160864740423971e-05, "loss": 0.4493, "step": 20770 }, { "epoch": 0.4630721764763792, "grad_norm": 0.6587520837783813, "learning_rate": 1.1157387023290896e-05, "loss": 0.325, "step": 20775 }, { "epoch": 0.4631836258569993, "grad_norm": 0.8112159371376038, "learning_rate": 1.1153909164271101e-05, "loss": 0.2332, "step": 20780 }, { "epoch": 0.46329507523761937, "grad_norm": 0.8643737435340881, "learning_rate": 1.1150431163790951e-05, "loss": 0.2957, "step": 20785 }, { "epoch": 0.46340652461823945, "grad_norm": 0.6423876285552979, "learning_rate": 1.1146953022276819e-05, "loss": 0.28, "step": 20790 }, { "epoch": 0.4635179739988595, "grad_norm": 0.4466862082481384, "learning_rate": 1.11434747401551e-05, "loss": 0.3576, "step": 20795 }, { "epoch": 0.46362942337947954, "grad_norm": 0.4856477975845337, "learning_rate": 1.1139996317852204e-05, "loss": 0.2646, "step": 20800 }, { "epoch": 0.4637408727600996, "grad_norm": 0.6780421733856201, "learning_rate": 1.1136517755794559e-05, "loss": 0.3032, "step": 20805 }, { "epoch": 0.4638523221407197, "grad_norm": 0.9612930417060852, "learning_rate": 1.1133039054408612e-05, "loss": 0.2347, "step": 20810 }, { "epoch": 0.4639637715213398, "grad_norm": 0.5880045294761658, "learning_rate": 1.112956021412082e-05, "loss": 0.4147, "step": 20815 }, { "epoch": 0.46407522090195985, "grad_norm": 0.5519404411315918, "learning_rate": 1.1126081235357667e-05, "loss": 0.2711, "step": 20820 }, { "epoch": 0.4641866702825799, "grad_norm": 0.7037073969841003, "learning_rate": 1.1122602118545642e-05, "loss": 0.2904, "step": 20825 }, { "epoch": 0.46429811966319995, "grad_norm": 0.4632646441459656, "learning_rate": 1.1119122864111264e-05, "loss": 0.2557, "step": 20830 }, { "epoch": 0.46440956904382, "grad_norm": 0.5045544505119324, "learning_rate": 1.1115643472481067e-05, "loss": 0.1933, "step": 20835 }, { "epoch": 0.4645210184244401, "grad_norm": 0.5202092528343201, "learning_rate": 1.1112163944081585e-05, "loss": 0.3419, "step": 20840 }, { "epoch": 0.4646324678050602, "grad_norm": 0.4976825714111328, "learning_rate": 1.1108684279339388e-05, "loss": 0.2634, "step": 20845 }, { "epoch": 0.46474391718568026, "grad_norm": 0.5703532695770264, "learning_rate": 1.110520447868106e-05, "loss": 0.3883, "step": 20850 }, { "epoch": 0.46485536656630033, "grad_norm": 0.7237419486045837, "learning_rate": 1.1101724542533195e-05, "loss": 0.3212, "step": 20855 }, { "epoch": 0.46496681594692035, "grad_norm": 0.4750984013080597, "learning_rate": 1.1098244471322397e-05, "loss": 0.1943, "step": 20860 }, { "epoch": 0.46507826532754043, "grad_norm": 0.32952219247817993, "learning_rate": 1.109476426547531e-05, "loss": 0.1731, "step": 20865 }, { "epoch": 0.4651897147081605, "grad_norm": 0.6741600632667542, "learning_rate": 1.1091283925418577e-05, "loss": 0.3286, "step": 20870 }, { "epoch": 0.4653011640887806, "grad_norm": 0.7948526740074158, "learning_rate": 1.1087803451578854e-05, "loss": 0.4216, "step": 20875 }, { "epoch": 0.46541261346940066, "grad_norm": 0.6836048364639282, "learning_rate": 1.1084322844382822e-05, "loss": 0.3021, "step": 20880 }, { "epoch": 0.46552406285002074, "grad_norm": 0.5596360564231873, "learning_rate": 1.1080842104257183e-05, "loss": 0.2108, "step": 20885 }, { "epoch": 0.46563551223064076, "grad_norm": 0.668707549571991, "learning_rate": 1.1077361231628646e-05, "loss": 0.4101, "step": 20890 }, { "epoch": 0.46574696161126083, "grad_norm": 0.49210840463638306, "learning_rate": 1.1073880226923933e-05, "loss": 0.291, "step": 20895 }, { "epoch": 0.4658584109918809, "grad_norm": 0.6215358376502991, "learning_rate": 1.1070399090569796e-05, "loss": 0.3779, "step": 20900 }, { "epoch": 0.465969860372501, "grad_norm": 0.5351458787918091, "learning_rate": 1.1066917822992992e-05, "loss": 0.2616, "step": 20905 }, { "epoch": 0.46608130975312106, "grad_norm": 0.24766775965690613, "learning_rate": 1.1063436424620302e-05, "loss": 0.3989, "step": 20910 }, { "epoch": 0.46619275913374114, "grad_norm": 0.5665358304977417, "learning_rate": 1.1059954895878512e-05, "loss": 0.2567, "step": 20915 }, { "epoch": 0.46630420851436116, "grad_norm": 0.5747869610786438, "learning_rate": 1.1056473237194434e-05, "loss": 0.299, "step": 20920 }, { "epoch": 0.46641565789498124, "grad_norm": 0.4050520360469818, "learning_rate": 1.105299144899489e-05, "loss": 0.3226, "step": 20925 }, { "epoch": 0.4665271072756013, "grad_norm": 0.8657063245773315, "learning_rate": 1.1049509531706721e-05, "loss": 0.3553, "step": 20930 }, { "epoch": 0.4666385566562214, "grad_norm": 0.7300972938537598, "learning_rate": 1.104602748575679e-05, "loss": 0.3177, "step": 20935 }, { "epoch": 0.46675000603684147, "grad_norm": 0.8992680907249451, "learning_rate": 1.1042545311571957e-05, "loss": 0.2669, "step": 20940 }, { "epoch": 0.4668614554174615, "grad_norm": 0.7697744369506836, "learning_rate": 1.103906300957912e-05, "loss": 0.3603, "step": 20945 }, { "epoch": 0.46697290479808157, "grad_norm": 0.7392646074295044, "learning_rate": 1.1035580580205179e-05, "loss": 0.3591, "step": 20950 }, { "epoch": 0.46708435417870164, "grad_norm": 0.5674382448196411, "learning_rate": 1.103209802387705e-05, "loss": 0.4146, "step": 20955 }, { "epoch": 0.4671958035593217, "grad_norm": 0.6826933026313782, "learning_rate": 1.1028615341021669e-05, "loss": 0.2796, "step": 20960 }, { "epoch": 0.4673072529399418, "grad_norm": 0.6870999336242676, "learning_rate": 1.102513253206599e-05, "loss": 0.2892, "step": 20965 }, { "epoch": 0.4674187023205619, "grad_norm": 0.6140218377113342, "learning_rate": 1.1021649597436971e-05, "loss": 0.3268, "step": 20970 }, { "epoch": 0.4675301517011819, "grad_norm": 0.5515140891075134, "learning_rate": 1.10181665375616e-05, "loss": 0.3496, "step": 20975 }, { "epoch": 0.46764160108180197, "grad_norm": 0.8796146512031555, "learning_rate": 1.1014683352866873e-05, "loss": 0.2802, "step": 20980 }, { "epoch": 0.46775305046242205, "grad_norm": 1.1398389339447021, "learning_rate": 1.1011200043779795e-05, "loss": 0.3306, "step": 20985 }, { "epoch": 0.4678644998430421, "grad_norm": 0.6144253015518188, "learning_rate": 1.10077166107274e-05, "loss": 0.3811, "step": 20990 }, { "epoch": 0.4679759492236622, "grad_norm": 0.7339516878128052, "learning_rate": 1.1004233054136726e-05, "loss": 0.4931, "step": 20995 }, { "epoch": 0.4680873986042823, "grad_norm": 0.5207570195198059, "learning_rate": 1.1000749374434826e-05, "loss": 0.3024, "step": 21000 }, { "epoch": 0.4681988479849023, "grad_norm": 0.5385868549346924, "learning_rate": 1.0997265572048785e-05, "loss": 0.333, "step": 21005 }, { "epoch": 0.4683102973655224, "grad_norm": 0.6920881271362305, "learning_rate": 1.0993781647405679e-05, "loss": 0.2503, "step": 21010 }, { "epoch": 0.46842174674614245, "grad_norm": 0.6983755230903625, "learning_rate": 1.0990297600932614e-05, "loss": 0.342, "step": 21015 }, { "epoch": 0.46853319612676253, "grad_norm": 0.6789685487747192, "learning_rate": 1.0986813433056707e-05, "loss": 0.3334, "step": 21020 }, { "epoch": 0.4686446455073826, "grad_norm": 0.6402018070220947, "learning_rate": 1.0983329144205092e-05, "loss": 0.4244, "step": 21025 }, { "epoch": 0.4687560948880027, "grad_norm": 0.671718955039978, "learning_rate": 1.0979844734804912e-05, "loss": 0.3912, "step": 21030 }, { "epoch": 0.4688675442686227, "grad_norm": 0.5738045573234558, "learning_rate": 1.097636020528333e-05, "loss": 0.4052, "step": 21035 }, { "epoch": 0.4689789936492428, "grad_norm": 0.6680939197540283, "learning_rate": 1.0972875556067526e-05, "loss": 0.3295, "step": 21040 }, { "epoch": 0.46909044302986286, "grad_norm": 0.6745684146881104, "learning_rate": 1.0969390787584683e-05, "loss": 0.3101, "step": 21045 }, { "epoch": 0.46920189241048293, "grad_norm": 0.6045733094215393, "learning_rate": 1.0965905900262019e-05, "loss": 0.2997, "step": 21050 }, { "epoch": 0.469313341791103, "grad_norm": 0.5858403444290161, "learning_rate": 1.0962420894526744e-05, "loss": 0.3838, "step": 21055 }, { "epoch": 0.4694247911717231, "grad_norm": 0.9184689521789551, "learning_rate": 1.0958935770806094e-05, "loss": 0.3696, "step": 21060 }, { "epoch": 0.4695362405523431, "grad_norm": 0.715875506401062, "learning_rate": 1.0955450529527323e-05, "loss": 0.2313, "step": 21065 }, { "epoch": 0.4696476899329632, "grad_norm": 0.5318100452423096, "learning_rate": 1.0951965171117691e-05, "loss": 0.2672, "step": 21070 }, { "epoch": 0.46975913931358326, "grad_norm": 0.5228089094161987, "learning_rate": 1.0948479696004479e-05, "loss": 0.2672, "step": 21075 }, { "epoch": 0.46987058869420334, "grad_norm": 0.5848378539085388, "learning_rate": 1.0944994104614974e-05, "loss": 0.3674, "step": 21080 }, { "epoch": 0.4699820380748234, "grad_norm": 0.5758753418922424, "learning_rate": 1.094150839737649e-05, "loss": 0.246, "step": 21085 }, { "epoch": 0.4700934874554435, "grad_norm": 0.4807438850402832, "learning_rate": 1.093802257471634e-05, "loss": 0.3168, "step": 21090 }, { "epoch": 0.4702049368360635, "grad_norm": 0.8553235530853271, "learning_rate": 1.0934536637061865e-05, "loss": 0.3348, "step": 21095 }, { "epoch": 0.4703163862166836, "grad_norm": 0.75001460313797, "learning_rate": 1.0931050584840413e-05, "loss": 0.3114, "step": 21100 }, { "epoch": 0.47042783559730367, "grad_norm": 0.706753134727478, "learning_rate": 1.0927564418479342e-05, "loss": 0.3645, "step": 21105 }, { "epoch": 0.47053928497792374, "grad_norm": 0.6393307447433472, "learning_rate": 1.0924078138406037e-05, "loss": 0.3728, "step": 21110 }, { "epoch": 0.4706507343585438, "grad_norm": 0.6542025208473206, "learning_rate": 1.0920591745047882e-05, "loss": 0.2668, "step": 21115 }, { "epoch": 0.47076218373916384, "grad_norm": 0.7120665311813354, "learning_rate": 1.0917105238832286e-05, "loss": 0.2934, "step": 21120 }, { "epoch": 0.4708736331197839, "grad_norm": 0.4809533357620239, "learning_rate": 1.0913618620186666e-05, "loss": 0.3022, "step": 21125 }, { "epoch": 0.470985082500404, "grad_norm": 0.6897178292274475, "learning_rate": 1.0910131889538457e-05, "loss": 0.3523, "step": 21130 }, { "epoch": 0.47109653188102407, "grad_norm": 0.540787398815155, "learning_rate": 1.0906645047315103e-05, "loss": 0.3457, "step": 21135 }, { "epoch": 0.47120798126164415, "grad_norm": 0.7425426840782166, "learning_rate": 1.0903158093944061e-05, "loss": 0.3859, "step": 21140 }, { "epoch": 0.4713194306422642, "grad_norm": 0.5876283049583435, "learning_rate": 1.0899671029852811e-05, "loss": 0.3606, "step": 21145 }, { "epoch": 0.47143088002288425, "grad_norm": 0.7805973291397095, "learning_rate": 1.089618385546884e-05, "loss": 0.2542, "step": 21150 }, { "epoch": 0.4715423294035043, "grad_norm": 0.6115480661392212, "learning_rate": 1.0892696571219644e-05, "loss": 0.3854, "step": 21155 }, { "epoch": 0.4716537787841244, "grad_norm": 0.5104815363883972, "learning_rate": 1.0889209177532736e-05, "loss": 0.3008, "step": 21160 }, { "epoch": 0.4717652281647445, "grad_norm": 0.5803892016410828, "learning_rate": 1.0885721674835647e-05, "loss": 0.3487, "step": 21165 }, { "epoch": 0.47187667754536455, "grad_norm": 0.5066009759902954, "learning_rate": 1.0882234063555918e-05, "loss": 0.3002, "step": 21170 }, { "epoch": 0.47198812692598463, "grad_norm": 0.5209431648254395, "learning_rate": 1.0878746344121105e-05, "loss": 0.3443, "step": 21175 }, { "epoch": 0.47209957630660465, "grad_norm": 0.5825982093811035, "learning_rate": 1.087525851695877e-05, "loss": 0.2403, "step": 21180 }, { "epoch": 0.47221102568722473, "grad_norm": 0.5618570446968079, "learning_rate": 1.0871770582496499e-05, "loss": 0.2063, "step": 21185 }, { "epoch": 0.4723224750678448, "grad_norm": 0.6874715089797974, "learning_rate": 1.0868282541161882e-05, "loss": 0.2414, "step": 21190 }, { "epoch": 0.4724339244484649, "grad_norm": 0.4891948997974396, "learning_rate": 1.086479439338253e-05, "loss": 0.4205, "step": 21195 }, { "epoch": 0.47254537382908496, "grad_norm": 0.7241100668907166, "learning_rate": 1.0861306139586056e-05, "loss": 0.359, "step": 21200 }, { "epoch": 0.47265682320970503, "grad_norm": 0.6043018698692322, "learning_rate": 1.0857817780200102e-05, "loss": 0.3749, "step": 21205 }, { "epoch": 0.47276827259032506, "grad_norm": 0.7084778547286987, "learning_rate": 1.085432931565231e-05, "loss": 0.2436, "step": 21210 }, { "epoch": 0.47287972197094513, "grad_norm": 0.817477822303772, "learning_rate": 1.0850840746370333e-05, "loss": 0.3669, "step": 21215 }, { "epoch": 0.4729911713515652, "grad_norm": 0.7407671809196472, "learning_rate": 1.0847352072781852e-05, "loss": 0.3582, "step": 21220 }, { "epoch": 0.4731026207321853, "grad_norm": 0.5965331792831421, "learning_rate": 1.0843863295314549e-05, "loss": 0.3101, "step": 21225 }, { "epoch": 0.47321407011280536, "grad_norm": 0.5705615282058716, "learning_rate": 1.0840374414396115e-05, "loss": 0.29, "step": 21230 }, { "epoch": 0.47332551949342544, "grad_norm": 0.4807664453983307, "learning_rate": 1.0836885430454267e-05, "loss": 0.3527, "step": 21235 }, { "epoch": 0.47343696887404546, "grad_norm": 0.6452370882034302, "learning_rate": 1.0833396343916726e-05, "loss": 0.3127, "step": 21240 }, { "epoch": 0.47354841825466554, "grad_norm": 0.6671549081802368, "learning_rate": 1.0829907155211224e-05, "loss": 0.3212, "step": 21245 }, { "epoch": 0.4736598676352856, "grad_norm": 0.7048370242118835, "learning_rate": 1.0826417864765511e-05, "loss": 0.3144, "step": 21250 }, { "epoch": 0.4737713170159057, "grad_norm": 0.47000741958618164, "learning_rate": 1.0822928473007348e-05, "loss": 0.2763, "step": 21255 }, { "epoch": 0.47388276639652577, "grad_norm": 0.7111234068870544, "learning_rate": 1.0819438980364504e-05, "loss": 0.3689, "step": 21260 }, { "epoch": 0.47399421577714584, "grad_norm": 0.6803306937217712, "learning_rate": 1.0815949387264766e-05, "loss": 0.2767, "step": 21265 }, { "epoch": 0.47410566515776587, "grad_norm": 0.7065713405609131, "learning_rate": 1.0812459694135934e-05, "loss": 0.2115, "step": 21270 }, { "epoch": 0.47421711453838594, "grad_norm": 0.4053919017314911, "learning_rate": 1.0808969901405816e-05, "loss": 0.2379, "step": 21275 }, { "epoch": 0.474328563919006, "grad_norm": 0.6302461624145508, "learning_rate": 1.080548000950223e-05, "loss": 0.3157, "step": 21280 }, { "epoch": 0.4744400132996261, "grad_norm": 0.8447433114051819, "learning_rate": 1.080199001885301e-05, "loss": 0.3754, "step": 21285 }, { "epoch": 0.47455146268024617, "grad_norm": 0.6583344340324402, "learning_rate": 1.0798499929886008e-05, "loss": 0.2893, "step": 21290 }, { "epoch": 0.47466291206086625, "grad_norm": 0.7092258334159851, "learning_rate": 1.0795009743029074e-05, "loss": 0.3731, "step": 21295 }, { "epoch": 0.47477436144148627, "grad_norm": 0.9176077842712402, "learning_rate": 1.0791519458710085e-05, "loss": 0.2761, "step": 21300 }, { "epoch": 0.47488581082210635, "grad_norm": 0.7795170545578003, "learning_rate": 1.078802907735692e-05, "loss": 0.2331, "step": 21305 }, { "epoch": 0.4749972602027264, "grad_norm": 0.8962420225143433, "learning_rate": 1.0784538599397472e-05, "loss": 0.3119, "step": 21310 }, { "epoch": 0.4751087095833465, "grad_norm": 0.38173189759254456, "learning_rate": 1.0781048025259648e-05, "loss": 0.354, "step": 21315 }, { "epoch": 0.4752201589639666, "grad_norm": 0.6000009775161743, "learning_rate": 1.0777557355371364e-05, "loss": 0.4249, "step": 21320 }, { "epoch": 0.4753316083445866, "grad_norm": 0.7955544590950012, "learning_rate": 1.0774066590160551e-05, "loss": 0.3269, "step": 21325 }, { "epoch": 0.4754430577252067, "grad_norm": 0.5262717604637146, "learning_rate": 1.0770575730055149e-05, "loss": 0.2005, "step": 21330 }, { "epoch": 0.47555450710582675, "grad_norm": 0.6059525609016418, "learning_rate": 1.0767084775483107e-05, "loss": 0.2447, "step": 21335 }, { "epoch": 0.47566595648644683, "grad_norm": 0.4977636933326721, "learning_rate": 1.0763593726872395e-05, "loss": 0.2031, "step": 21340 }, { "epoch": 0.4757774058670669, "grad_norm": 1.0840247869491577, "learning_rate": 1.076010258465099e-05, "loss": 0.3704, "step": 21345 }, { "epoch": 0.475888855247687, "grad_norm": 1.2776250839233398, "learning_rate": 1.0756611349246871e-05, "loss": 0.1845, "step": 21350 }, { "epoch": 0.476000304628307, "grad_norm": 0.8424351215362549, "learning_rate": 1.0753120021088037e-05, "loss": 0.2985, "step": 21355 }, { "epoch": 0.4761117540089271, "grad_norm": 0.38192832469940186, "learning_rate": 1.0749628600602509e-05, "loss": 0.2605, "step": 21360 }, { "epoch": 0.47622320338954716, "grad_norm": 0.5329508185386658, "learning_rate": 1.0746137088218298e-05, "loss": 0.3446, "step": 21365 }, { "epoch": 0.47633465277016723, "grad_norm": 0.5951401591300964, "learning_rate": 1.0742645484363437e-05, "loss": 0.2271, "step": 21370 }, { "epoch": 0.4764461021507873, "grad_norm": 0.5628944039344788, "learning_rate": 1.0739153789465974e-05, "loss": 0.2752, "step": 21375 }, { "epoch": 0.4765575515314074, "grad_norm": 0.4774826467037201, "learning_rate": 1.0735662003953964e-05, "loss": 0.3852, "step": 21380 }, { "epoch": 0.4766690009120274, "grad_norm": 0.612303614616394, "learning_rate": 1.0732170128255467e-05, "loss": 0.331, "step": 21385 }, { "epoch": 0.4767804502926475, "grad_norm": 0.6990157961845398, "learning_rate": 1.0728678162798565e-05, "loss": 0.3232, "step": 21390 }, { "epoch": 0.47689189967326756, "grad_norm": 0.6447412967681885, "learning_rate": 1.0725186108011348e-05, "loss": 0.3336, "step": 21395 }, { "epoch": 0.47700334905388764, "grad_norm": 0.6951749920845032, "learning_rate": 1.0721693964321906e-05, "loss": 0.3038, "step": 21400 }, { "epoch": 0.4771147984345077, "grad_norm": 0.6879447102546692, "learning_rate": 1.0718201732158357e-05, "loss": 0.2384, "step": 21405 }, { "epoch": 0.4772262478151278, "grad_norm": 0.5735766887664795, "learning_rate": 1.071470941194882e-05, "loss": 0.2334, "step": 21410 }, { "epoch": 0.4773376971957478, "grad_norm": 0.6717875599861145, "learning_rate": 1.0711217004121425e-05, "loss": 0.2281, "step": 21415 }, { "epoch": 0.4774491465763679, "grad_norm": 0.6436781883239746, "learning_rate": 1.0707724509104318e-05, "loss": 0.408, "step": 21420 }, { "epoch": 0.47756059595698797, "grad_norm": 0.6138870120048523, "learning_rate": 1.0704231927325646e-05, "loss": 0.3461, "step": 21425 }, { "epoch": 0.47767204533760804, "grad_norm": 0.5195964574813843, "learning_rate": 1.0700739259213577e-05, "loss": 0.3413, "step": 21430 }, { "epoch": 0.4777834947182281, "grad_norm": 0.4784146249294281, "learning_rate": 1.0697246505196282e-05, "loss": 0.3437, "step": 21435 }, { "epoch": 0.4778949440988482, "grad_norm": 0.6470903754234314, "learning_rate": 1.0693753665701949e-05, "loss": 0.2982, "step": 21440 }, { "epoch": 0.4780063934794682, "grad_norm": 0.4970431327819824, "learning_rate": 1.069026074115877e-05, "loss": 0.2671, "step": 21445 }, { "epoch": 0.4781178428600883, "grad_norm": 0.5180191397666931, "learning_rate": 1.0686767731994952e-05, "loss": 0.2682, "step": 21450 }, { "epoch": 0.47822929224070837, "grad_norm": 0.5427197813987732, "learning_rate": 1.0683274638638712e-05, "loss": 0.2806, "step": 21455 }, { "epoch": 0.47834074162132845, "grad_norm": 0.7320225238800049, "learning_rate": 1.0679781461518275e-05, "loss": 0.3579, "step": 21460 }, { "epoch": 0.4784521910019485, "grad_norm": 0.4966415464878082, "learning_rate": 1.0676288201061878e-05, "loss": 0.2459, "step": 21465 }, { "epoch": 0.4785636403825686, "grad_norm": 0.5143702626228333, "learning_rate": 1.0672794857697769e-05, "loss": 0.3453, "step": 21470 }, { "epoch": 0.4786750897631886, "grad_norm": 0.4696958363056183, "learning_rate": 1.0669301431854203e-05, "loss": 0.2583, "step": 21475 }, { "epoch": 0.4787865391438087, "grad_norm": 0.6180020570755005, "learning_rate": 1.0665807923959446e-05, "loss": 0.4172, "step": 21480 }, { "epoch": 0.4788979885244288, "grad_norm": 0.9005603790283203, "learning_rate": 1.0662314334441779e-05, "loss": 0.294, "step": 21485 }, { "epoch": 0.47900943790504885, "grad_norm": 0.43045490980148315, "learning_rate": 1.0658820663729486e-05, "loss": 0.244, "step": 21490 }, { "epoch": 0.47912088728566893, "grad_norm": 0.7310406565666199, "learning_rate": 1.0655326912250863e-05, "loss": 0.252, "step": 21495 }, { "epoch": 0.47923233666628895, "grad_norm": 0.6532853841781616, "learning_rate": 1.0651833080434222e-05, "loss": 0.397, "step": 21500 }, { "epoch": 0.479343786046909, "grad_norm": 0.6540833115577698, "learning_rate": 1.0648339168707875e-05, "loss": 0.2388, "step": 21505 }, { "epoch": 0.4794552354275291, "grad_norm": 0.3894214928150177, "learning_rate": 1.0644845177500152e-05, "loss": 0.2435, "step": 21510 }, { "epoch": 0.4795666848081492, "grad_norm": 0.6973043084144592, "learning_rate": 1.0641351107239384e-05, "loss": 0.3917, "step": 21515 }, { "epoch": 0.47967813418876926, "grad_norm": 0.6568936705589294, "learning_rate": 1.0637856958353925e-05, "loss": 0.3837, "step": 21520 }, { "epoch": 0.47978958356938933, "grad_norm": 0.426479697227478, "learning_rate": 1.0634362731272123e-05, "loss": 0.3434, "step": 21525 }, { "epoch": 0.47990103295000935, "grad_norm": 0.8263179659843445, "learning_rate": 1.0630868426422346e-05, "loss": 0.1714, "step": 21530 }, { "epoch": 0.48001248233062943, "grad_norm": 0.4639967978000641, "learning_rate": 1.0627374044232973e-05, "loss": 0.2999, "step": 21535 }, { "epoch": 0.4801239317112495, "grad_norm": 0.6227473020553589, "learning_rate": 1.0623879585132384e-05, "loss": 0.3419, "step": 21540 }, { "epoch": 0.4802353810918696, "grad_norm": 0.47798821330070496, "learning_rate": 1.0620385049548976e-05, "loss": 0.2656, "step": 21545 }, { "epoch": 0.48034683047248966, "grad_norm": 0.6013455390930176, "learning_rate": 1.0616890437911146e-05, "loss": 0.3157, "step": 21550 }, { "epoch": 0.48045827985310974, "grad_norm": 0.667288601398468, "learning_rate": 1.0613395750647308e-05, "loss": 0.291, "step": 21555 }, { "epoch": 0.48056972923372976, "grad_norm": 0.808897852897644, "learning_rate": 1.060990098818589e-05, "loss": 0.3802, "step": 21560 }, { "epoch": 0.48068117861434984, "grad_norm": 0.49802616238594055, "learning_rate": 1.0606406150955315e-05, "loss": 0.3905, "step": 21565 }, { "epoch": 0.4807926279949699, "grad_norm": 0.7375686168670654, "learning_rate": 1.0602911239384028e-05, "loss": 0.3577, "step": 21570 }, { "epoch": 0.48090407737559, "grad_norm": 0.5488283038139343, "learning_rate": 1.0599416253900478e-05, "loss": 0.4146, "step": 21575 }, { "epoch": 0.48101552675621007, "grad_norm": 0.5572513341903687, "learning_rate": 1.0595921194933124e-05, "loss": 0.1912, "step": 21580 }, { "epoch": 0.48112697613683014, "grad_norm": 1.139878273010254, "learning_rate": 1.0592426062910426e-05, "loss": 0.3301, "step": 21585 }, { "epoch": 0.48123842551745016, "grad_norm": 0.5184080004692078, "learning_rate": 1.0588930858260869e-05, "loss": 0.4067, "step": 21590 }, { "epoch": 0.48134987489807024, "grad_norm": 0.6013784408569336, "learning_rate": 1.0585435581412933e-05, "loss": 0.2887, "step": 21595 }, { "epoch": 0.4814613242786903, "grad_norm": 0.8848481774330139, "learning_rate": 1.0581940232795114e-05, "loss": 0.3174, "step": 21600 }, { "epoch": 0.4815727736593104, "grad_norm": 0.8106632232666016, "learning_rate": 1.0578444812835914e-05, "loss": 0.2261, "step": 21605 }, { "epoch": 0.48168422303993047, "grad_norm": 0.6408175826072693, "learning_rate": 1.0574949321963845e-05, "loss": 0.2829, "step": 21610 }, { "epoch": 0.48179567242055055, "grad_norm": 0.7407748103141785, "learning_rate": 1.0571453760607428e-05, "loss": 0.3721, "step": 21615 }, { "epoch": 0.48190712180117057, "grad_norm": 0.6626582145690918, "learning_rate": 1.056795812919519e-05, "loss": 0.1896, "step": 21620 }, { "epoch": 0.48201857118179064, "grad_norm": 0.5842767357826233, "learning_rate": 1.0564462428155671e-05, "loss": 0.3176, "step": 21625 }, { "epoch": 0.4821300205624107, "grad_norm": 0.6812451481819153, "learning_rate": 1.0560966657917414e-05, "loss": 0.2068, "step": 21630 }, { "epoch": 0.4822414699430308, "grad_norm": 0.5823196172714233, "learning_rate": 1.0557470818908973e-05, "loss": 0.3458, "step": 21635 }, { "epoch": 0.4823529193236509, "grad_norm": 0.5161350965499878, "learning_rate": 1.0553974911558916e-05, "loss": 0.3995, "step": 21640 }, { "epoch": 0.48246436870427095, "grad_norm": 0.3206993639469147, "learning_rate": 1.055047893629581e-05, "loss": 0.2528, "step": 21645 }, { "epoch": 0.482575818084891, "grad_norm": 0.5969932675361633, "learning_rate": 1.0546982893548234e-05, "loss": 0.2286, "step": 21650 }, { "epoch": 0.48268726746551105, "grad_norm": 0.7618280053138733, "learning_rate": 1.0543486783744777e-05, "loss": 0.288, "step": 21655 }, { "epoch": 0.4827987168461311, "grad_norm": 0.726722002029419, "learning_rate": 1.0539990607314036e-05, "loss": 0.3569, "step": 21660 }, { "epoch": 0.4829101662267512, "grad_norm": 0.5121591687202454, "learning_rate": 1.0536494364684615e-05, "loss": 0.3177, "step": 21665 }, { "epoch": 0.4830216156073713, "grad_norm": 0.5842480659484863, "learning_rate": 1.053299805628513e-05, "loss": 0.3213, "step": 21670 }, { "epoch": 0.48313306498799136, "grad_norm": 0.6678650379180908, "learning_rate": 1.052950168254419e-05, "loss": 0.329, "step": 21675 }, { "epoch": 0.4832445143686114, "grad_norm": 0.6162123084068298, "learning_rate": 1.0526005243890434e-05, "loss": 0.3396, "step": 21680 }, { "epoch": 0.48335596374923145, "grad_norm": 0.5280946493148804, "learning_rate": 1.0522508740752498e-05, "loss": 0.3658, "step": 21685 }, { "epoch": 0.48346741312985153, "grad_norm": 0.4918314814567566, "learning_rate": 1.051901217355902e-05, "loss": 0.2598, "step": 21690 }, { "epoch": 0.4835788625104716, "grad_norm": 1.1120338439941406, "learning_rate": 1.0515515542738655e-05, "loss": 0.2327, "step": 21695 }, { "epoch": 0.4836903118910917, "grad_norm": 0.4988137483596802, "learning_rate": 1.0512018848720067e-05, "loss": 0.3211, "step": 21700 }, { "epoch": 0.4838017612717117, "grad_norm": 0.6864381432533264, "learning_rate": 1.050852209193192e-05, "loss": 0.2267, "step": 21705 }, { "epoch": 0.4839132106523318, "grad_norm": 0.924082338809967, "learning_rate": 1.0505025272802885e-05, "loss": 0.3091, "step": 21710 }, { "epoch": 0.48402466003295186, "grad_norm": 0.5768184065818787, "learning_rate": 1.0501528391761655e-05, "loss": 0.236, "step": 21715 }, { "epoch": 0.48413610941357194, "grad_norm": 0.6007546782493591, "learning_rate": 1.0498031449236914e-05, "loss": 0.387, "step": 21720 }, { "epoch": 0.484247558794192, "grad_norm": 0.683722198009491, "learning_rate": 1.0494534445657358e-05, "loss": 0.3609, "step": 21725 }, { "epoch": 0.4843590081748121, "grad_norm": 0.7989122867584229, "learning_rate": 1.04910373814517e-05, "loss": 0.3164, "step": 21730 }, { "epoch": 0.4844704575554321, "grad_norm": 0.45317602157592773, "learning_rate": 1.048754025704865e-05, "loss": 0.3638, "step": 21735 }, { "epoch": 0.4845819069360522, "grad_norm": 0.6904126405715942, "learning_rate": 1.0484043072876927e-05, "loss": 0.3686, "step": 21740 }, { "epoch": 0.48469335631667226, "grad_norm": 0.683846652507782, "learning_rate": 1.0480545829365257e-05, "loss": 0.3373, "step": 21745 }, { "epoch": 0.48480480569729234, "grad_norm": 0.8189532160758972, "learning_rate": 1.0477048526942379e-05, "loss": 0.3134, "step": 21750 }, { "epoch": 0.4849162550779124, "grad_norm": 0.7070431113243103, "learning_rate": 1.0473551166037035e-05, "loss": 0.2997, "step": 21755 }, { "epoch": 0.4850277044585325, "grad_norm": 0.7763333916664124, "learning_rate": 1.047005374707797e-05, "loss": 0.316, "step": 21760 }, { "epoch": 0.4851391538391525, "grad_norm": 0.6913332343101501, "learning_rate": 1.0466556270493948e-05, "loss": 0.3186, "step": 21765 }, { "epoch": 0.4852506032197726, "grad_norm": 0.5847534537315369, "learning_rate": 1.0463058736713728e-05, "loss": 0.4038, "step": 21770 }, { "epoch": 0.48536205260039267, "grad_norm": 0.5487058758735657, "learning_rate": 1.045956114616608e-05, "loss": 0.3587, "step": 21775 }, { "epoch": 0.48547350198101274, "grad_norm": 0.7386172413825989, "learning_rate": 1.0456063499279783e-05, "loss": 0.3604, "step": 21780 }, { "epoch": 0.4855849513616328, "grad_norm": 0.4023517966270447, "learning_rate": 1.0452565796483618e-05, "loss": 0.2326, "step": 21785 }, { "epoch": 0.4856964007422529, "grad_norm": 0.5407540202140808, "learning_rate": 1.0449068038206385e-05, "loss": 0.2755, "step": 21790 }, { "epoch": 0.4858078501228729, "grad_norm": 0.5274724960327148, "learning_rate": 1.0445570224876873e-05, "loss": 0.3664, "step": 21795 }, { "epoch": 0.485919299503493, "grad_norm": 1.241453766822815, "learning_rate": 1.0442072356923893e-05, "loss": 0.244, "step": 21800 }, { "epoch": 0.4860307488841131, "grad_norm": 0.7574704885482788, "learning_rate": 1.0438574434776255e-05, "loss": 0.3552, "step": 21805 }, { "epoch": 0.48614219826473315, "grad_norm": 0.492057204246521, "learning_rate": 1.0435076458862774e-05, "loss": 0.2605, "step": 21810 }, { "epoch": 0.4862536476453532, "grad_norm": 0.6264439821243286, "learning_rate": 1.0431578429612277e-05, "loss": 0.1561, "step": 21815 }, { "epoch": 0.4863650970259733, "grad_norm": 0.5074672102928162, "learning_rate": 1.0428080347453597e-05, "loss": 0.3695, "step": 21820 }, { "epoch": 0.4864765464065933, "grad_norm": 0.49057939648628235, "learning_rate": 1.0424582212815572e-05, "loss": 0.324, "step": 21825 }, { "epoch": 0.4865879957872134, "grad_norm": 0.6426665186882019, "learning_rate": 1.0421084026127043e-05, "loss": 0.4274, "step": 21830 }, { "epoch": 0.4866994451678335, "grad_norm": 0.515211284160614, "learning_rate": 1.0417585787816861e-05, "loss": 0.3988, "step": 21835 }, { "epoch": 0.48681089454845355, "grad_norm": 0.5060247778892517, "learning_rate": 1.041408749831389e-05, "loss": 0.4018, "step": 21840 }, { "epoch": 0.48692234392907363, "grad_norm": 0.4912254512310028, "learning_rate": 1.0410589158046986e-05, "loss": 0.2778, "step": 21845 }, { "epoch": 0.4870337933096937, "grad_norm": 1.7002813816070557, "learning_rate": 1.0407090767445018e-05, "loss": 0.3479, "step": 21850 }, { "epoch": 0.48714524269031373, "grad_norm": 1.522349238395691, "learning_rate": 1.0403592326936867e-05, "loss": 0.2538, "step": 21855 }, { "epoch": 0.4872566920709338, "grad_norm": 0.7782891988754272, "learning_rate": 1.0400093836951412e-05, "loss": 0.3399, "step": 21860 }, { "epoch": 0.4873681414515539, "grad_norm": 0.6137044429779053, "learning_rate": 1.0396595297917535e-05, "loss": 0.3428, "step": 21865 }, { "epoch": 0.48747959083217396, "grad_norm": 0.5912491679191589, "learning_rate": 1.0393096710264143e-05, "loss": 0.2999, "step": 21870 }, { "epoch": 0.48759104021279404, "grad_norm": 0.6235821843147278, "learning_rate": 1.0389598074420126e-05, "loss": 0.428, "step": 21875 }, { "epoch": 0.48770248959341406, "grad_norm": 0.5283631086349487, "learning_rate": 1.0386099390814393e-05, "loss": 0.2223, "step": 21880 }, { "epoch": 0.48781393897403413, "grad_norm": 0.5237429141998291, "learning_rate": 1.0382600659875852e-05, "loss": 0.3552, "step": 21885 }, { "epoch": 0.4879253883546542, "grad_norm": 1.0724443197250366, "learning_rate": 1.0379101882033427e-05, "loss": 0.4636, "step": 21890 }, { "epoch": 0.4880368377352743, "grad_norm": 0.5929918885231018, "learning_rate": 1.0375603057716035e-05, "loss": 0.3857, "step": 21895 }, { "epoch": 0.48814828711589436, "grad_norm": 0.5061531066894531, "learning_rate": 1.0372104187352608e-05, "loss": 0.2441, "step": 21900 }, { "epoch": 0.48825973649651444, "grad_norm": 0.8347680568695068, "learning_rate": 1.0368605271372082e-05, "loss": 0.3225, "step": 21905 }, { "epoch": 0.48837118587713446, "grad_norm": 0.550164520740509, "learning_rate": 1.0365106310203392e-05, "loss": 0.3056, "step": 21910 }, { "epoch": 0.48848263525775454, "grad_norm": 0.7784298062324524, "learning_rate": 1.0361607304275487e-05, "loss": 0.2766, "step": 21915 }, { "epoch": 0.4885940846383746, "grad_norm": 0.8520126342773438, "learning_rate": 1.0358108254017319e-05, "loss": 0.3047, "step": 21920 }, { "epoch": 0.4887055340189947, "grad_norm": 0.5592504739761353, "learning_rate": 1.035460915985784e-05, "loss": 0.3923, "step": 21925 }, { "epoch": 0.48881698339961477, "grad_norm": 0.44507914781570435, "learning_rate": 1.0351110022226021e-05, "loss": 0.2968, "step": 21930 }, { "epoch": 0.48892843278023485, "grad_norm": 0.7121692299842834, "learning_rate": 1.0347610841550821e-05, "loss": 0.2145, "step": 21935 }, { "epoch": 0.48903988216085487, "grad_norm": 0.49924153089523315, "learning_rate": 1.034411161826121e-05, "loss": 0.1799, "step": 21940 }, { "epoch": 0.48915133154147494, "grad_norm": 0.5004662275314331, "learning_rate": 1.0340612352786177e-05, "loss": 0.249, "step": 21945 }, { "epoch": 0.489262780922095, "grad_norm": 0.5530099868774414, "learning_rate": 1.0337113045554696e-05, "loss": 0.3849, "step": 21950 }, { "epoch": 0.4893742303027151, "grad_norm": 0.8083066344261169, "learning_rate": 1.033361369699576e-05, "loss": 0.3543, "step": 21955 }, { "epoch": 0.4894856796833352, "grad_norm": 0.7340057492256165, "learning_rate": 1.0330114307538357e-05, "loss": 0.3615, "step": 21960 }, { "epoch": 0.48959712906395525, "grad_norm": 0.5993553400039673, "learning_rate": 1.032661487761149e-05, "loss": 0.3492, "step": 21965 }, { "epoch": 0.48970857844457527, "grad_norm": 0.708200216293335, "learning_rate": 1.0323115407644158e-05, "loss": 0.3004, "step": 21970 }, { "epoch": 0.48982002782519535, "grad_norm": 0.5713046789169312, "learning_rate": 1.031961589806537e-05, "loss": 0.3034, "step": 21975 }, { "epoch": 0.4899314772058154, "grad_norm": 0.4956084191799164, "learning_rate": 1.0316116349304144e-05, "loss": 0.2956, "step": 21980 }, { "epoch": 0.4900429265864355, "grad_norm": 0.6252642273902893, "learning_rate": 1.0312616761789492e-05, "loss": 0.2746, "step": 21985 }, { "epoch": 0.4901543759670556, "grad_norm": 0.8498284220695496, "learning_rate": 1.0309117135950437e-05, "loss": 0.2398, "step": 21990 }, { "epoch": 0.49026582534767565, "grad_norm": 0.5617647767066956, "learning_rate": 1.0305617472216008e-05, "loss": 0.3071, "step": 21995 }, { "epoch": 0.4903772747282957, "grad_norm": 0.31070756912231445, "learning_rate": 1.030211777101524e-05, "loss": 0.2756, "step": 22000 }, { "epoch": 0.49048872410891575, "grad_norm": 0.6978440880775452, "learning_rate": 1.0298618032777159e-05, "loss": 0.1967, "step": 22005 }, { "epoch": 0.49060017348953583, "grad_norm": 0.57720947265625, "learning_rate": 1.0295118257930813e-05, "loss": 0.3435, "step": 22010 }, { "epoch": 0.4907116228701559, "grad_norm": 0.5618628263473511, "learning_rate": 1.029161844690525e-05, "loss": 0.2808, "step": 22015 }, { "epoch": 0.490823072250776, "grad_norm": 0.4151650667190552, "learning_rate": 1.0288118600129511e-05, "loss": 0.2021, "step": 22020 }, { "epoch": 0.49093452163139606, "grad_norm": 0.6705240607261658, "learning_rate": 1.0284618718032659e-05, "loss": 0.3617, "step": 22025 }, { "epoch": 0.4910459710120161, "grad_norm": 0.4305287003517151, "learning_rate": 1.028111880104375e-05, "loss": 0.2537, "step": 22030 }, { "epoch": 0.49115742039263616, "grad_norm": 0.48318371176719666, "learning_rate": 1.0277618849591845e-05, "loss": 0.3553, "step": 22035 }, { "epoch": 0.49126886977325623, "grad_norm": 0.6259504556655884, "learning_rate": 1.027411886410601e-05, "loss": 0.3473, "step": 22040 }, { "epoch": 0.4913803191538763, "grad_norm": 0.32242828607559204, "learning_rate": 1.027061884501532e-05, "loss": 0.3195, "step": 22045 }, { "epoch": 0.4914917685344964, "grad_norm": 1.1589083671569824, "learning_rate": 1.0267118792748846e-05, "loss": 0.3997, "step": 22050 }, { "epoch": 0.49160321791511646, "grad_norm": 0.6759958267211914, "learning_rate": 1.0263618707735676e-05, "loss": 0.37, "step": 22055 }, { "epoch": 0.4917146672957365, "grad_norm": 0.6928285360336304, "learning_rate": 1.0260118590404881e-05, "loss": 0.2775, "step": 22060 }, { "epoch": 0.49182611667635656, "grad_norm": 0.7928364276885986, "learning_rate": 1.0256618441185557e-05, "loss": 0.2353, "step": 22065 }, { "epoch": 0.49193756605697664, "grad_norm": 0.5948072075843811, "learning_rate": 1.0253118260506793e-05, "loss": 0.3934, "step": 22070 }, { "epoch": 0.4920490154375967, "grad_norm": 0.8399053812026978, "learning_rate": 1.0249618048797687e-05, "loss": 0.3526, "step": 22075 }, { "epoch": 0.4921604648182168, "grad_norm": 0.44372832775115967, "learning_rate": 1.0246117806487328e-05, "loss": 0.3371, "step": 22080 }, { "epoch": 0.4922719141988368, "grad_norm": 0.5984147787094116, "learning_rate": 1.0242617534004831e-05, "loss": 0.177, "step": 22085 }, { "epoch": 0.4923833635794569, "grad_norm": 0.4956534206867218, "learning_rate": 1.0239117231779297e-05, "loss": 0.3363, "step": 22090 }, { "epoch": 0.49249481296007697, "grad_norm": 0.5281473994255066, "learning_rate": 1.0235616900239831e-05, "loss": 0.2418, "step": 22095 }, { "epoch": 0.49260626234069704, "grad_norm": 0.5945612788200378, "learning_rate": 1.0232116539815558e-05, "loss": 0.3105, "step": 22100 }, { "epoch": 0.4927177117213171, "grad_norm": 0.5451594591140747, "learning_rate": 1.0228616150935587e-05, "loss": 0.2799, "step": 22105 }, { "epoch": 0.4928291611019372, "grad_norm": 0.6400839686393738, "learning_rate": 1.0225115734029039e-05, "loss": 0.288, "step": 22110 }, { "epoch": 0.4929406104825572, "grad_norm": 0.43941572308540344, "learning_rate": 1.022161528952504e-05, "loss": 0.3765, "step": 22115 }, { "epoch": 0.4930520598631773, "grad_norm": 0.6126463413238525, "learning_rate": 1.0218114817852718e-05, "loss": 0.3841, "step": 22120 }, { "epoch": 0.49316350924379737, "grad_norm": 1.0110007524490356, "learning_rate": 1.0214614319441202e-05, "loss": 0.3867, "step": 22125 }, { "epoch": 0.49327495862441745, "grad_norm": 0.7327465415000916, "learning_rate": 1.0211113794719627e-05, "loss": 0.2004, "step": 22130 }, { "epoch": 0.4933864080050375, "grad_norm": 0.44659993052482605, "learning_rate": 1.0207613244117132e-05, "loss": 0.2642, "step": 22135 }, { "epoch": 0.4934978573856576, "grad_norm": 0.701119601726532, "learning_rate": 1.0204112668062853e-05, "loss": 0.2889, "step": 22140 }, { "epoch": 0.4936093067662776, "grad_norm": 0.42930060625076294, "learning_rate": 1.020061206698594e-05, "loss": 0.3402, "step": 22145 }, { "epoch": 0.4937207561468977, "grad_norm": 0.6302748322486877, "learning_rate": 1.0197111441315532e-05, "loss": 0.3178, "step": 22150 }, { "epoch": 0.4938322055275178, "grad_norm": 0.5891337394714355, "learning_rate": 1.0193610791480789e-05, "loss": 0.281, "step": 22155 }, { "epoch": 0.49394365490813785, "grad_norm": 0.9075787663459778, "learning_rate": 1.019011011791085e-05, "loss": 0.3301, "step": 22160 }, { "epoch": 0.49405510428875793, "grad_norm": 1.0279183387756348, "learning_rate": 1.018660942103488e-05, "loss": 0.2902, "step": 22165 }, { "epoch": 0.494166553669378, "grad_norm": 0.4951561689376831, "learning_rate": 1.018310870128204e-05, "loss": 0.3365, "step": 22170 }, { "epoch": 0.494278003049998, "grad_norm": 0.5057305693626404, "learning_rate": 1.0179607959081482e-05, "loss": 0.4527, "step": 22175 }, { "epoch": 0.4943894524306181, "grad_norm": 0.640472948551178, "learning_rate": 1.0176107194862377e-05, "loss": 0.2863, "step": 22180 }, { "epoch": 0.4945009018112382, "grad_norm": 0.7052475214004517, "learning_rate": 1.0172606409053887e-05, "loss": 0.4188, "step": 22185 }, { "epoch": 0.49461235119185826, "grad_norm": 0.6334758996963501, "learning_rate": 1.0169105602085182e-05, "loss": 0.3385, "step": 22190 }, { "epoch": 0.49472380057247833, "grad_norm": 0.6031564474105835, "learning_rate": 1.0165604774385441e-05, "loss": 0.2381, "step": 22195 }, { "epoch": 0.4948352499530984, "grad_norm": 0.6718378067016602, "learning_rate": 1.0162103926383828e-05, "loss": 0.2258, "step": 22200 }, { "epoch": 0.49494669933371843, "grad_norm": 0.5733515024185181, "learning_rate": 1.0158603058509527e-05, "loss": 0.2035, "step": 22205 }, { "epoch": 0.4950581487143385, "grad_norm": 0.674278736114502, "learning_rate": 1.0155102171191717e-05, "loss": 0.3144, "step": 22210 }, { "epoch": 0.4951695980949586, "grad_norm": 0.5201547741889954, "learning_rate": 1.0151601264859577e-05, "loss": 0.2628, "step": 22215 }, { "epoch": 0.49528104747557866, "grad_norm": 0.5437789559364319, "learning_rate": 1.0148100339942288e-05, "loss": 0.1872, "step": 22220 }, { "epoch": 0.49539249685619874, "grad_norm": 0.5899885892868042, "learning_rate": 1.0144599396869046e-05, "loss": 0.4151, "step": 22225 }, { "epoch": 0.4955039462368188, "grad_norm": 1.0787954330444336, "learning_rate": 1.0141098436069032e-05, "loss": 0.314, "step": 22230 }, { "epoch": 0.49561539561743884, "grad_norm": 0.6664475202560425, "learning_rate": 1.0137597457971437e-05, "loss": 0.3002, "step": 22235 }, { "epoch": 0.4957268449980589, "grad_norm": 0.5675019025802612, "learning_rate": 1.0134096463005459e-05, "loss": 0.4102, "step": 22240 }, { "epoch": 0.495838294378679, "grad_norm": 0.6319025754928589, "learning_rate": 1.0130595451600289e-05, "loss": 0.2957, "step": 22245 }, { "epoch": 0.49594974375929907, "grad_norm": 0.5857518315315247, "learning_rate": 1.0127094424185122e-05, "loss": 0.3533, "step": 22250 }, { "epoch": 0.49606119313991914, "grad_norm": 0.5915650725364685, "learning_rate": 1.0123593381189161e-05, "loss": 0.3708, "step": 22255 }, { "epoch": 0.49617264252053916, "grad_norm": 0.5313299894332886, "learning_rate": 1.0120092323041606e-05, "loss": 0.3931, "step": 22260 }, { "epoch": 0.49628409190115924, "grad_norm": 0.5031399726867676, "learning_rate": 1.011659125017166e-05, "loss": 0.4215, "step": 22265 }, { "epoch": 0.4963955412817793, "grad_norm": 0.8380836248397827, "learning_rate": 1.0113090163008525e-05, "loss": 0.3264, "step": 22270 }, { "epoch": 0.4965069906623994, "grad_norm": 0.6201096177101135, "learning_rate": 1.0109589061981411e-05, "loss": 0.2423, "step": 22275 }, { "epoch": 0.49661844004301947, "grad_norm": 0.7164033651351929, "learning_rate": 1.0106087947519521e-05, "loss": 0.2604, "step": 22280 }, { "epoch": 0.49672988942363955, "grad_norm": 0.5825961828231812, "learning_rate": 1.010258682005207e-05, "loss": 0.3742, "step": 22285 }, { "epoch": 0.49684133880425957, "grad_norm": 0.5495718717575073, "learning_rate": 1.0099085680008265e-05, "loss": 0.2993, "step": 22290 }, { "epoch": 0.49695278818487965, "grad_norm": 0.6401649713516235, "learning_rate": 1.0095584527817319e-05, "loss": 0.3758, "step": 22295 }, { "epoch": 0.4970642375654997, "grad_norm": 0.5806993246078491, "learning_rate": 1.0092083363908454e-05, "loss": 0.3059, "step": 22300 }, { "epoch": 0.4971756869461198, "grad_norm": 0.6632343530654907, "learning_rate": 1.0088582188710876e-05, "loss": 0.4287, "step": 22305 }, { "epoch": 0.4972871363267399, "grad_norm": 0.33431074023246765, "learning_rate": 1.0085081002653802e-05, "loss": 0.2605, "step": 22310 }, { "epoch": 0.49739858570735995, "grad_norm": 0.5944840312004089, "learning_rate": 1.0081579806166456e-05, "loss": 0.3084, "step": 22315 }, { "epoch": 0.49751003508798, "grad_norm": 0.5025819540023804, "learning_rate": 1.0078078599678058e-05, "loss": 0.2872, "step": 22320 }, { "epoch": 0.49762148446860005, "grad_norm": 0.5288582444190979, "learning_rate": 1.0074577383617822e-05, "loss": 0.3557, "step": 22325 }, { "epoch": 0.4977329338492201, "grad_norm": 0.5158663392066956, "learning_rate": 1.0071076158414977e-05, "loss": 0.2734, "step": 22330 }, { "epoch": 0.4978443832298402, "grad_norm": 0.7956809401512146, "learning_rate": 1.0067574924498744e-05, "loss": 0.3969, "step": 22335 }, { "epoch": 0.4979558326104603, "grad_norm": 0.5699462890625, "learning_rate": 1.0064073682298346e-05, "loss": 0.2774, "step": 22340 }, { "epoch": 0.49806728199108036, "grad_norm": 0.5688024759292603, "learning_rate": 1.0060572432243007e-05, "loss": 0.4471, "step": 22345 }, { "epoch": 0.4981787313717004, "grad_norm": 0.4060691297054291, "learning_rate": 1.0057071174761962e-05, "loss": 0.271, "step": 22350 }, { "epoch": 0.49829018075232046, "grad_norm": 0.6036604642868042, "learning_rate": 1.0053569910284424e-05, "loss": 0.3759, "step": 22355 }, { "epoch": 0.49840163013294053, "grad_norm": 0.6653122305870056, "learning_rate": 1.0050068639239632e-05, "loss": 0.1751, "step": 22360 }, { "epoch": 0.4985130795135606, "grad_norm": 0.554861843585968, "learning_rate": 1.0046567362056811e-05, "loss": 0.3071, "step": 22365 }, { "epoch": 0.4986245288941807, "grad_norm": 0.6937253475189209, "learning_rate": 1.0043066079165194e-05, "loss": 0.2901, "step": 22370 }, { "epoch": 0.49873597827480076, "grad_norm": 0.5724537968635559, "learning_rate": 1.0039564790994002e-05, "loss": 0.2876, "step": 22375 }, { "epoch": 0.4988474276554208, "grad_norm": 0.5790332555770874, "learning_rate": 1.0036063497972475e-05, "loss": 0.3521, "step": 22380 }, { "epoch": 0.49895887703604086, "grad_norm": 0.48917481303215027, "learning_rate": 1.0032562200529843e-05, "loss": 0.302, "step": 22385 }, { "epoch": 0.49907032641666094, "grad_norm": 0.5448917150497437, "learning_rate": 1.0029060899095335e-05, "loss": 0.3715, "step": 22390 }, { "epoch": 0.499181775797281, "grad_norm": 0.502838134765625, "learning_rate": 1.0025559594098185e-05, "loss": 0.2759, "step": 22395 }, { "epoch": 0.4992932251779011, "grad_norm": 0.4868786334991455, "learning_rate": 1.0022058285967627e-05, "loss": 0.2882, "step": 22400 }, { "epoch": 0.49940467455852117, "grad_norm": 0.671261191368103, "learning_rate": 1.0018556975132891e-05, "loss": 0.3673, "step": 22405 }, { "epoch": 0.4995161239391412, "grad_norm": 0.553508996963501, "learning_rate": 1.0015055662023217e-05, "loss": 0.2697, "step": 22410 }, { "epoch": 0.49962757331976126, "grad_norm": 0.6089357733726501, "learning_rate": 1.0011554347067832e-05, "loss": 0.4386, "step": 22415 }, { "epoch": 0.49973902270038134, "grad_norm": 0.7945957183837891, "learning_rate": 1.0008053030695972e-05, "loss": 0.2507, "step": 22420 }, { "epoch": 0.4998504720810014, "grad_norm": 0.658442497253418, "learning_rate": 1.0004551713336876e-05, "loss": 0.2068, "step": 22425 }, { "epoch": 0.4999619214616215, "grad_norm": 0.527000367641449, "learning_rate": 1.000105039541977e-05, "loss": 0.2783, "step": 22430 }, { "epoch": 0.5000733708422416, "grad_norm": 0.4612204432487488, "learning_rate": 9.997549077373898e-06, "loss": 0.2835, "step": 22435 }, { "epoch": 0.5001848202228616, "grad_norm": 0.3289565145969391, "learning_rate": 9.994047759628491e-06, "loss": 0.239, "step": 22440 }, { "epoch": 0.5002962696034817, "grad_norm": 0.7831099033355713, "learning_rate": 9.990546442612783e-06, "loss": 0.2561, "step": 22445 }, { "epoch": 0.5004077189841017, "grad_norm": 0.6953529715538025, "learning_rate": 9.987045126756005e-06, "loss": 0.3294, "step": 22450 }, { "epoch": 0.5005191683647218, "grad_norm": 0.6163928508758545, "learning_rate": 9.983543812487397e-06, "loss": 0.3615, "step": 22455 }, { "epoch": 0.5006306177453419, "grad_norm": 0.34678909182548523, "learning_rate": 9.980042500236187e-06, "loss": 0.3397, "step": 22460 }, { "epoch": 0.5007420671259619, "grad_norm": 0.5026941299438477, "learning_rate": 9.976541190431616e-06, "loss": 0.2778, "step": 22465 }, { "epoch": 0.500853516506582, "grad_norm": 0.6653352975845337, "learning_rate": 9.973039883502912e-06, "loss": 0.2742, "step": 22470 }, { "epoch": 0.5009649658872021, "grad_norm": 0.6822400093078613, "learning_rate": 9.969538579879312e-06, "loss": 0.3038, "step": 22475 }, { "epoch": 0.5010764152678221, "grad_norm": 0.7787721157073975, "learning_rate": 9.966037279990044e-06, "loss": 0.2925, "step": 22480 }, { "epoch": 0.5011878646484422, "grad_norm": 0.5366182327270508, "learning_rate": 9.962535984264347e-06, "loss": 0.3434, "step": 22485 }, { "epoch": 0.5012993140290622, "grad_norm": 0.7126238346099854, "learning_rate": 9.95903469313145e-06, "loss": 0.3031, "step": 22490 }, { "epoch": 0.5014107634096824, "grad_norm": 0.3990797996520996, "learning_rate": 9.955533407020577e-06, "loss": 0.4007, "step": 22495 }, { "epoch": 0.5015222127903024, "grad_norm": 0.680857241153717, "learning_rate": 9.952032126360968e-06, "loss": 0.294, "step": 22500 }, { "epoch": 0.5016336621709225, "grad_norm": 0.660984992980957, "learning_rate": 9.948530851581853e-06, "loss": 0.3406, "step": 22505 }, { "epoch": 0.5017451115515426, "grad_norm": 1.1851459741592407, "learning_rate": 9.945029583112456e-06, "loss": 0.3037, "step": 22510 }, { "epoch": 0.5018565609321626, "grad_norm": 0.5630490779876709, "learning_rate": 9.941528321382008e-06, "loss": 0.2537, "step": 22515 }, { "epoch": 0.5019680103127827, "grad_norm": 0.5093672275543213, "learning_rate": 9.938027066819738e-06, "loss": 0.3292, "step": 22520 }, { "epoch": 0.5020794596934027, "grad_norm": 0.6856085062026978, "learning_rate": 9.934525819854868e-06, "loss": 0.3213, "step": 22525 }, { "epoch": 0.5021909090740229, "grad_norm": 0.5624552965164185, "learning_rate": 9.931024580916626e-06, "loss": 0.3323, "step": 22530 }, { "epoch": 0.5023023584546429, "grad_norm": 0.6010044813156128, "learning_rate": 9.927523350434243e-06, "loss": 0.3975, "step": 22535 }, { "epoch": 0.5024138078352629, "grad_norm": 0.6860697865486145, "learning_rate": 9.924022128836937e-06, "loss": 0.3569, "step": 22540 }, { "epoch": 0.502525257215883, "grad_norm": 0.6552989482879639, "learning_rate": 9.92052091655393e-06, "loss": 0.3626, "step": 22545 }, { "epoch": 0.5026367065965031, "grad_norm": 0.5610737204551697, "learning_rate": 9.917019714014445e-06, "loss": 0.3625, "step": 22550 }, { "epoch": 0.5027481559771232, "grad_norm": 0.7716104388237, "learning_rate": 9.913518521647702e-06, "loss": 0.4169, "step": 22555 }, { "epoch": 0.5028596053577432, "grad_norm": 0.3971973955631256, "learning_rate": 9.910017339882924e-06, "loss": 0.2835, "step": 22560 }, { "epoch": 0.5029710547383633, "grad_norm": 0.5556774139404297, "learning_rate": 9.90651616914932e-06, "loss": 0.2799, "step": 22565 }, { "epoch": 0.5030825041189834, "grad_norm": 0.6435865759849548, "learning_rate": 9.903015009876115e-06, "loss": 0.3382, "step": 22570 }, { "epoch": 0.5031939534996034, "grad_norm": 0.6683843731880188, "learning_rate": 9.899513862492521e-06, "loss": 0.3051, "step": 22575 }, { "epoch": 0.5033054028802235, "grad_norm": 0.5448920726776123, "learning_rate": 9.896012727427754e-06, "loss": 0.3286, "step": 22580 }, { "epoch": 0.5034168522608435, "grad_norm": 0.6063219308853149, "learning_rate": 9.892511605111024e-06, "loss": 0.253, "step": 22585 }, { "epoch": 0.5035283016414637, "grad_norm": 0.877792477607727, "learning_rate": 9.889010495971538e-06, "loss": 0.3547, "step": 22590 }, { "epoch": 0.5036397510220837, "grad_norm": 0.4884467124938965, "learning_rate": 9.885509400438512e-06, "loss": 0.3147, "step": 22595 }, { "epoch": 0.5037512004027037, "grad_norm": 0.6876420974731445, "learning_rate": 9.882008318941145e-06, "loss": 0.2954, "step": 22600 }, { "epoch": 0.5038626497833238, "grad_norm": 0.5861078500747681, "learning_rate": 9.87850725190865e-06, "loss": 0.252, "step": 22605 }, { "epoch": 0.5039740991639439, "grad_norm": 0.5254116654396057, "learning_rate": 9.87500619977023e-06, "loss": 0.4148, "step": 22610 }, { "epoch": 0.504085548544564, "grad_norm": 0.6513392925262451, "learning_rate": 9.871505162955084e-06, "loss": 0.2827, "step": 22615 }, { "epoch": 0.504196997925184, "grad_norm": 0.5880435705184937, "learning_rate": 9.868004141892412e-06, "loss": 0.3372, "step": 22620 }, { "epoch": 0.5043084473058042, "grad_norm": 0.5500809550285339, "learning_rate": 9.864503137011416e-06, "loss": 0.4289, "step": 22625 }, { "epoch": 0.5044198966864242, "grad_norm": 0.7847594022750854, "learning_rate": 9.86100214874129e-06, "loss": 0.3283, "step": 22630 }, { "epoch": 0.5045313460670442, "grad_norm": 0.46724045276641846, "learning_rate": 9.85750117751122e-06, "loss": 0.2784, "step": 22635 }, { "epoch": 0.5046427954476643, "grad_norm": 1.0864177942276, "learning_rate": 9.854000223750412e-06, "loss": 0.4388, "step": 22640 }, { "epoch": 0.5047542448282843, "grad_norm": 0.3599768877029419, "learning_rate": 9.85049928788805e-06, "loss": 0.2593, "step": 22645 }, { "epoch": 0.5048656942089045, "grad_norm": 0.3865143060684204, "learning_rate": 9.846998370353318e-06, "loss": 0.2861, "step": 22650 }, { "epoch": 0.5049771435895245, "grad_norm": 0.5876185894012451, "learning_rate": 9.843497471575409e-06, "loss": 0.3759, "step": 22655 }, { "epoch": 0.5050885929701445, "grad_norm": 0.48538899421691895, "learning_rate": 9.8399965919835e-06, "loss": 0.276, "step": 22660 }, { "epoch": 0.5052000423507647, "grad_norm": 0.6529558897018433, "learning_rate": 9.836495732006774e-06, "loss": 0.4206, "step": 22665 }, { "epoch": 0.5053114917313847, "grad_norm": 0.6429377794265747, "learning_rate": 9.832994892074404e-06, "loss": 0.3484, "step": 22670 }, { "epoch": 0.5054229411120048, "grad_norm": 0.5969396233558655, "learning_rate": 9.829494072615579e-06, "loss": 0.2981, "step": 22675 }, { "epoch": 0.5055343904926248, "grad_norm": 0.44130802154541016, "learning_rate": 9.825993274059463e-06, "loss": 0.1855, "step": 22680 }, { "epoch": 0.5056458398732449, "grad_norm": 0.6406823992729187, "learning_rate": 9.822492496835225e-06, "loss": 0.3845, "step": 22685 }, { "epoch": 0.505757289253865, "grad_norm": 0.7552033066749573, "learning_rate": 9.81899174137204e-06, "loss": 0.3, "step": 22690 }, { "epoch": 0.505868738634485, "grad_norm": 0.5428248643875122, "learning_rate": 9.815491008099068e-06, "loss": 0.2329, "step": 22695 }, { "epoch": 0.5059801880151051, "grad_norm": 0.5956423878669739, "learning_rate": 9.811990297445477e-06, "loss": 0.2874, "step": 22700 }, { "epoch": 0.5060916373957252, "grad_norm": 0.5944362282752991, "learning_rate": 9.80848960984042e-06, "loss": 0.2433, "step": 22705 }, { "epoch": 0.5062030867763453, "grad_norm": 0.7279775738716125, "learning_rate": 9.804988945713062e-06, "loss": 0.3592, "step": 22710 }, { "epoch": 0.5063145361569653, "grad_norm": 0.7761232852935791, "learning_rate": 9.801488305492553e-06, "loss": 0.4098, "step": 22715 }, { "epoch": 0.5064259855375853, "grad_norm": 0.5941323637962341, "learning_rate": 9.797987689608047e-06, "loss": 0.2724, "step": 22720 }, { "epoch": 0.5065374349182055, "grad_norm": 0.6884803175926208, "learning_rate": 9.79448709848869e-06, "loss": 0.3842, "step": 22725 }, { "epoch": 0.5066488842988255, "grad_norm": 0.4874166250228882, "learning_rate": 9.790986532563627e-06, "loss": 0.2648, "step": 22730 }, { "epoch": 0.5067603336794456, "grad_norm": 0.5346237421035767, "learning_rate": 9.787485992262004e-06, "loss": 0.3159, "step": 22735 }, { "epoch": 0.5068717830600656, "grad_norm": 0.6470242738723755, "learning_rate": 9.783985478012954e-06, "loss": 0.3349, "step": 22740 }, { "epoch": 0.5069832324406857, "grad_norm": 0.5535328984260559, "learning_rate": 9.780484990245619e-06, "loss": 0.2287, "step": 22745 }, { "epoch": 0.5070946818213058, "grad_norm": 0.7618942856788635, "learning_rate": 9.776984529389132e-06, "loss": 0.3348, "step": 22750 }, { "epoch": 0.5072061312019258, "grad_norm": 0.524957001209259, "learning_rate": 9.77348409587262e-06, "loss": 0.3553, "step": 22755 }, { "epoch": 0.507317580582546, "grad_norm": 0.6703402400016785, "learning_rate": 9.769983690125208e-06, "loss": 0.323, "step": 22760 }, { "epoch": 0.507429029963166, "grad_norm": 0.7614783048629761, "learning_rate": 9.766483312576022e-06, "loss": 0.2614, "step": 22765 }, { "epoch": 0.5075404793437861, "grad_norm": 0.6554802060127258, "learning_rate": 9.762982963654178e-06, "loss": 0.327, "step": 22770 }, { "epoch": 0.5076519287244061, "grad_norm": 0.4557334780693054, "learning_rate": 9.759482643788792e-06, "loss": 0.3442, "step": 22775 }, { "epoch": 0.5077633781050261, "grad_norm": 0.43107250332832336, "learning_rate": 9.755982353408976e-06, "loss": 0.2592, "step": 22780 }, { "epoch": 0.5078748274856463, "grad_norm": 0.5819715261459351, "learning_rate": 9.752482092943844e-06, "loss": 0.3049, "step": 22785 }, { "epoch": 0.5079862768662663, "grad_norm": 0.6508607864379883, "learning_rate": 9.748981862822494e-06, "loss": 0.2143, "step": 22790 }, { "epoch": 0.5080977262468864, "grad_norm": 0.7444683313369751, "learning_rate": 9.745481663474033e-06, "loss": 0.3441, "step": 22795 }, { "epoch": 0.5082091756275064, "grad_norm": 0.34373778104782104, "learning_rate": 9.741981495327555e-06, "loss": 0.3651, "step": 22800 }, { "epoch": 0.5083206250081265, "grad_norm": 0.6767758131027222, "learning_rate": 9.738481358812152e-06, "loss": 0.2992, "step": 22805 }, { "epoch": 0.5084320743887466, "grad_norm": 0.5160681009292603, "learning_rate": 9.734981254356913e-06, "loss": 0.3437, "step": 22810 }, { "epoch": 0.5085435237693666, "grad_norm": 0.5730963945388794, "learning_rate": 9.731481182390932e-06, "loss": 0.3474, "step": 22815 }, { "epoch": 0.5086549731499868, "grad_norm": 0.36844706535339355, "learning_rate": 9.727981143343284e-06, "loss": 0.2352, "step": 22820 }, { "epoch": 0.5087664225306068, "grad_norm": 0.5087379813194275, "learning_rate": 9.724481137643047e-06, "loss": 0.4335, "step": 22825 }, { "epoch": 0.5088778719112268, "grad_norm": 0.9651373624801636, "learning_rate": 9.7209811657193e-06, "loss": 0.3421, "step": 22830 }, { "epoch": 0.5089893212918469, "grad_norm": 0.5327233076095581, "learning_rate": 9.717481228001103e-06, "loss": 0.2985, "step": 22835 }, { "epoch": 0.509100770672467, "grad_norm": 0.6139131188392639, "learning_rate": 9.713981324917529e-06, "loss": 0.3014, "step": 22840 }, { "epoch": 0.5092122200530871, "grad_norm": 0.766156017780304, "learning_rate": 9.710481456897633e-06, "loss": 0.2784, "step": 22845 }, { "epoch": 0.5093236694337071, "grad_norm": 0.5155205130577087, "learning_rate": 9.706981624370481e-06, "loss": 0.2782, "step": 22850 }, { "epoch": 0.5094351188143272, "grad_norm": 0.5603840947151184, "learning_rate": 9.703481827765117e-06, "loss": 0.2385, "step": 22855 }, { "epoch": 0.5095465681949473, "grad_norm": 0.9208455085754395, "learning_rate": 9.699982067510595e-06, "loss": 0.2422, "step": 22860 }, { "epoch": 0.5096580175755673, "grad_norm": 0.5859543681144714, "learning_rate": 9.696482344035954e-06, "loss": 0.3954, "step": 22865 }, { "epoch": 0.5097694669561874, "grad_norm": 0.6649774312973022, "learning_rate": 9.692982657770236e-06, "loss": 0.2863, "step": 22870 }, { "epoch": 0.5098809163368074, "grad_norm": 0.6705203652381897, "learning_rate": 9.689483009142475e-06, "loss": 0.3382, "step": 22875 }, { "epoch": 0.5099923657174276, "grad_norm": 0.46043553948402405, "learning_rate": 9.685983398581698e-06, "loss": 0.299, "step": 22880 }, { "epoch": 0.5101038150980476, "grad_norm": 0.9133014678955078, "learning_rate": 9.68248382651693e-06, "loss": 0.2856, "step": 22885 }, { "epoch": 0.5102152644786676, "grad_norm": 0.866908073425293, "learning_rate": 9.678984293377198e-06, "loss": 0.3087, "step": 22890 }, { "epoch": 0.5103267138592877, "grad_norm": 0.683866024017334, "learning_rate": 9.675484799591515e-06, "loss": 0.3811, "step": 22895 }, { "epoch": 0.5104381632399078, "grad_norm": 0.585959792137146, "learning_rate": 9.671985345588887e-06, "loss": 0.3753, "step": 22900 }, { "epoch": 0.5105496126205279, "grad_norm": 0.7192093729972839, "learning_rate": 9.668485931798327e-06, "loss": 0.2072, "step": 22905 }, { "epoch": 0.5106610620011479, "grad_norm": 0.527054488658905, "learning_rate": 9.66498655864883e-06, "loss": 0.3902, "step": 22910 }, { "epoch": 0.510772511381768, "grad_norm": 0.5373089909553528, "learning_rate": 9.661487226569397e-06, "loss": 0.2589, "step": 22915 }, { "epoch": 0.5108839607623881, "grad_norm": 0.458723247051239, "learning_rate": 9.657987935989014e-06, "loss": 0.2575, "step": 22920 }, { "epoch": 0.5109954101430081, "grad_norm": 0.5586103200912476, "learning_rate": 9.654488687336673e-06, "loss": 0.3368, "step": 22925 }, { "epoch": 0.5111068595236282, "grad_norm": 1.0093234777450562, "learning_rate": 9.65098948104135e-06, "loss": 0.2647, "step": 22930 }, { "epoch": 0.5112183089042482, "grad_norm": 0.7439244985580444, "learning_rate": 9.647490317532026e-06, "loss": 0.3487, "step": 22935 }, { "epoch": 0.5113297582848684, "grad_norm": 0.7570911645889282, "learning_rate": 9.643991197237668e-06, "loss": 0.2823, "step": 22940 }, { "epoch": 0.5114412076654884, "grad_norm": 0.5762554407119751, "learning_rate": 9.640492120587237e-06, "loss": 0.4238, "step": 22945 }, { "epoch": 0.5115526570461084, "grad_norm": 0.42711982131004333, "learning_rate": 9.636993088009703e-06, "loss": 0.3863, "step": 22950 }, { "epoch": 0.5116641064267285, "grad_norm": 0.5918402075767517, "learning_rate": 9.633494099934008e-06, "loss": 0.3001, "step": 22955 }, { "epoch": 0.5117755558073486, "grad_norm": 0.4869014322757721, "learning_rate": 9.62999515678911e-06, "loss": 0.2149, "step": 22960 }, { "epoch": 0.5118870051879687, "grad_norm": 0.6010558605194092, "learning_rate": 9.62649625900395e-06, "loss": 0.3213, "step": 22965 }, { "epoch": 0.5119984545685887, "grad_norm": 0.5538244247436523, "learning_rate": 9.622997407007467e-06, "loss": 0.2962, "step": 22970 }, { "epoch": 0.5121099039492089, "grad_norm": 0.5053831338882446, "learning_rate": 9.61949860122859e-06, "loss": 0.3091, "step": 22975 }, { "epoch": 0.5122213533298289, "grad_norm": 0.4428863525390625, "learning_rate": 9.61599984209625e-06, "loss": 0.4138, "step": 22980 }, { "epoch": 0.5123328027104489, "grad_norm": 0.8053079843521118, "learning_rate": 9.612501130039364e-06, "loss": 0.4502, "step": 22985 }, { "epoch": 0.512444252091069, "grad_norm": 0.6244291067123413, "learning_rate": 9.609002465486845e-06, "loss": 0.3958, "step": 22990 }, { "epoch": 0.512555701471689, "grad_norm": 0.6197370290756226, "learning_rate": 9.605503848867608e-06, "loss": 0.3291, "step": 22995 }, { "epoch": 0.5126671508523092, "grad_norm": 0.7503328323364258, "learning_rate": 9.602005280610556e-06, "loss": 0.2703, "step": 23000 }, { "epoch": 0.5127786002329292, "grad_norm": 0.608693540096283, "learning_rate": 9.598506761144582e-06, "loss": 0.3146, "step": 23005 }, { "epoch": 0.5128900496135492, "grad_norm": 0.6147252917289734, "learning_rate": 9.595008290898582e-06, "loss": 0.2822, "step": 23010 }, { "epoch": 0.5130014989941694, "grad_norm": 0.6361362934112549, "learning_rate": 9.59150987030144e-06, "loss": 0.2855, "step": 23015 }, { "epoch": 0.5131129483747894, "grad_norm": 0.5929672122001648, "learning_rate": 9.588011499782033e-06, "loss": 0.3091, "step": 23020 }, { "epoch": 0.5132243977554095, "grad_norm": 0.5976213216781616, "learning_rate": 9.584513179769233e-06, "loss": 0.344, "step": 23025 }, { "epoch": 0.5133358471360295, "grad_norm": 0.612542450428009, "learning_rate": 9.581014910691915e-06, "loss": 0.3292, "step": 23030 }, { "epoch": 0.5134472965166496, "grad_norm": 0.6661171317100525, "learning_rate": 9.577516692978935e-06, "loss": 0.3059, "step": 23035 }, { "epoch": 0.5135587458972697, "grad_norm": 0.6575843691825867, "learning_rate": 9.574018527059144e-06, "loss": 0.2872, "step": 23040 }, { "epoch": 0.5136701952778897, "grad_norm": 0.5535309314727783, "learning_rate": 9.570520413361398e-06, "loss": 0.2884, "step": 23045 }, { "epoch": 0.5137816446585098, "grad_norm": 0.7088550925254822, "learning_rate": 9.56702235231453e-06, "loss": 0.3696, "step": 23050 }, { "epoch": 0.5138930940391299, "grad_norm": 0.6000550985336304, "learning_rate": 9.563524344347384e-06, "loss": 0.5036, "step": 23055 }, { "epoch": 0.51400454341975, "grad_norm": 0.6340003609657288, "learning_rate": 9.560026389888777e-06, "loss": 0.3908, "step": 23060 }, { "epoch": 0.51411599280037, "grad_norm": 0.6714855432510376, "learning_rate": 9.556528489367545e-06, "loss": 0.2814, "step": 23065 }, { "epoch": 0.51422744218099, "grad_norm": 0.7142869830131531, "learning_rate": 9.553030643212494e-06, "loss": 0.2587, "step": 23070 }, { "epoch": 0.5143388915616102, "grad_norm": 0.667551577091217, "learning_rate": 9.54953285185244e-06, "loss": 0.2403, "step": 23075 }, { "epoch": 0.5144503409422302, "grad_norm": 0.5600367784500122, "learning_rate": 9.546035115716178e-06, "loss": 0.221, "step": 23080 }, { "epoch": 0.5145617903228503, "grad_norm": 0.3773113489151001, "learning_rate": 9.542537435232508e-06, "loss": 0.2968, "step": 23085 }, { "epoch": 0.5146732397034703, "grad_norm": 0.6152325868606567, "learning_rate": 9.539039810830217e-06, "loss": 0.3402, "step": 23090 }, { "epoch": 0.5147846890840904, "grad_norm": 0.6277587413787842, "learning_rate": 9.535542242938084e-06, "loss": 0.3275, "step": 23095 }, { "epoch": 0.5148961384647105, "grad_norm": 0.4143803119659424, "learning_rate": 9.532044731984889e-06, "loss": 0.2836, "step": 23100 }, { "epoch": 0.5150075878453305, "grad_norm": 0.5582551956176758, "learning_rate": 9.5285472783994e-06, "loss": 0.4107, "step": 23105 }, { "epoch": 0.5151190372259506, "grad_norm": 0.48419493436813354, "learning_rate": 9.525049882610374e-06, "loss": 0.2143, "step": 23110 }, { "epoch": 0.5152304866065707, "grad_norm": 0.7525806427001953, "learning_rate": 9.521552545046566e-06, "loss": 0.3405, "step": 23115 }, { "epoch": 0.5153419359871908, "grad_norm": 0.5951476693153381, "learning_rate": 9.518055266136725e-06, "loss": 0.2919, "step": 23120 }, { "epoch": 0.5154533853678108, "grad_norm": 0.7247080206871033, "learning_rate": 9.514558046309585e-06, "loss": 0.3474, "step": 23125 }, { "epoch": 0.5155648347484308, "grad_norm": 0.8065999150276184, "learning_rate": 9.511060885993883e-06, "loss": 0.2848, "step": 23130 }, { "epoch": 0.515676284129051, "grad_norm": 0.5279972553253174, "learning_rate": 9.507563785618343e-06, "loss": 0.3519, "step": 23135 }, { "epoch": 0.515787733509671, "grad_norm": 0.6969117522239685, "learning_rate": 9.504066745611682e-06, "loss": 0.281, "step": 23140 }, { "epoch": 0.5158991828902911, "grad_norm": 0.4764590263366699, "learning_rate": 9.500569766402607e-06, "loss": 0.1984, "step": 23145 }, { "epoch": 0.5160106322709112, "grad_norm": 0.5910069942474365, "learning_rate": 9.497072848419828e-06, "loss": 0.3462, "step": 23150 }, { "epoch": 0.5161220816515312, "grad_norm": 0.4852128326892853, "learning_rate": 9.493575992092035e-06, "loss": 0.245, "step": 23155 }, { "epoch": 0.5162335310321513, "grad_norm": 0.629984438419342, "learning_rate": 9.490079197847915e-06, "loss": 0.265, "step": 23160 }, { "epoch": 0.5163449804127713, "grad_norm": 0.4971965551376343, "learning_rate": 9.486582466116147e-06, "loss": 0.1871, "step": 23165 }, { "epoch": 0.5164564297933915, "grad_norm": 0.781303346157074, "learning_rate": 9.483085797325408e-06, "loss": 0.2245, "step": 23170 }, { "epoch": 0.5165678791740115, "grad_norm": 0.6366822719573975, "learning_rate": 9.47958919190436e-06, "loss": 0.322, "step": 23175 }, { "epoch": 0.5166793285546316, "grad_norm": 0.6994455456733704, "learning_rate": 9.476092650281661e-06, "loss": 0.3182, "step": 23180 }, { "epoch": 0.5167907779352516, "grad_norm": 0.5690954923629761, "learning_rate": 9.472596172885962e-06, "loss": 0.332, "step": 23185 }, { "epoch": 0.5169022273158717, "grad_norm": 0.3832198977470398, "learning_rate": 9.469099760145896e-06, "loss": 0.3166, "step": 23190 }, { "epoch": 0.5170136766964918, "grad_norm": 0.8475557565689087, "learning_rate": 9.465603412490105e-06, "loss": 0.2004, "step": 23195 }, { "epoch": 0.5171251260771118, "grad_norm": 0.5788425207138062, "learning_rate": 9.462107130347206e-06, "loss": 0.2165, "step": 23200 }, { "epoch": 0.5172365754577319, "grad_norm": 0.5698291063308716, "learning_rate": 9.458610914145826e-06, "loss": 0.3324, "step": 23205 }, { "epoch": 0.517348024838352, "grad_norm": 0.5328242778778076, "learning_rate": 9.455114764314566e-06, "loss": 0.3755, "step": 23210 }, { "epoch": 0.517459474218972, "grad_norm": 0.49963513016700745, "learning_rate": 9.451618681282034e-06, "loss": 0.3728, "step": 23215 }, { "epoch": 0.5175709235995921, "grad_norm": 0.5842313170433044, "learning_rate": 9.448122665476814e-06, "loss": 0.3258, "step": 23220 }, { "epoch": 0.5176823729802121, "grad_norm": 0.6654723286628723, "learning_rate": 9.444626717327499e-06, "loss": 0.2704, "step": 23225 }, { "epoch": 0.5177938223608323, "grad_norm": 0.3355579674243927, "learning_rate": 9.441130837262662e-06, "loss": 0.3187, "step": 23230 }, { "epoch": 0.5179052717414523, "grad_norm": 0.6794620752334595, "learning_rate": 9.437635025710863e-06, "loss": 0.3512, "step": 23235 }, { "epoch": 0.5180167211220723, "grad_norm": 0.9019680619239807, "learning_rate": 9.434139283100674e-06, "loss": 0.4195, "step": 23240 }, { "epoch": 0.5181281705026924, "grad_norm": 0.7227687835693359, "learning_rate": 9.430643609860644e-06, "loss": 0.3432, "step": 23245 }, { "epoch": 0.5182396198833125, "grad_norm": 0.42706429958343506, "learning_rate": 9.427148006419312e-06, "loss": 0.2238, "step": 23250 }, { "epoch": 0.5183510692639326, "grad_norm": 0.6106488704681396, "learning_rate": 9.42365247320521e-06, "loss": 0.2121, "step": 23255 }, { "epoch": 0.5184625186445526, "grad_norm": 0.7338367104530334, "learning_rate": 9.42015701064687e-06, "loss": 0.424, "step": 23260 }, { "epoch": 0.5185739680251727, "grad_norm": 0.5893292427062988, "learning_rate": 9.4166616191728e-06, "loss": 0.3253, "step": 23265 }, { "epoch": 0.5186854174057928, "grad_norm": 0.6662803292274475, "learning_rate": 9.41316629921151e-06, "loss": 0.4152, "step": 23270 }, { "epoch": 0.5187968667864128, "grad_norm": 0.5518497228622437, "learning_rate": 9.40967105119151e-06, "loss": 0.3649, "step": 23275 }, { "epoch": 0.5189083161670329, "grad_norm": 0.6257652044296265, "learning_rate": 9.40617587554128e-06, "loss": 0.4227, "step": 23280 }, { "epoch": 0.519019765547653, "grad_norm": 0.7822826504707336, "learning_rate": 9.402680772689303e-06, "loss": 0.2985, "step": 23285 }, { "epoch": 0.5191312149282731, "grad_norm": 0.6136816143989563, "learning_rate": 9.399185743064055e-06, "loss": 0.3674, "step": 23290 }, { "epoch": 0.5192426643088931, "grad_norm": 0.7093392014503479, "learning_rate": 9.395690787093995e-06, "loss": 0.2932, "step": 23295 }, { "epoch": 0.5193541136895131, "grad_norm": 0.5666813254356384, "learning_rate": 9.392195905207581e-06, "loss": 0.2642, "step": 23300 }, { "epoch": 0.5194655630701333, "grad_norm": 0.5724304914474487, "learning_rate": 9.388701097833252e-06, "loss": 0.3962, "step": 23305 }, { "epoch": 0.5195770124507533, "grad_norm": 0.5531351566314697, "learning_rate": 9.385206365399457e-06, "loss": 0.279, "step": 23310 }, { "epoch": 0.5196884618313734, "grad_norm": 0.5691862106323242, "learning_rate": 9.381711708334613e-06, "loss": 0.2841, "step": 23315 }, { "epoch": 0.5197999112119934, "grad_norm": 0.4270021319389343, "learning_rate": 9.378217127067144e-06, "loss": 0.3192, "step": 23320 }, { "epoch": 0.5199113605926136, "grad_norm": 0.684860348701477, "learning_rate": 9.374722622025454e-06, "loss": 0.3051, "step": 23325 }, { "epoch": 0.5200228099732336, "grad_norm": 0.4866192638874054, "learning_rate": 9.371228193637943e-06, "loss": 0.3273, "step": 23330 }, { "epoch": 0.5201342593538536, "grad_norm": 0.6408313512802124, "learning_rate": 9.367733842333004e-06, "loss": 0.34, "step": 23335 }, { "epoch": 0.5202457087344737, "grad_norm": 0.8118225336074829, "learning_rate": 9.364239568539012e-06, "loss": 0.3525, "step": 23340 }, { "epoch": 0.5203571581150938, "grad_norm": 0.5743765830993652, "learning_rate": 9.360745372684346e-06, "loss": 0.2737, "step": 23345 }, { "epoch": 0.5204686074957139, "grad_norm": 0.47640687227249146, "learning_rate": 9.35725125519736e-06, "loss": 0.2322, "step": 23350 }, { "epoch": 0.5205800568763339, "grad_norm": 0.4566701352596283, "learning_rate": 9.353757216506411e-06, "loss": 0.2681, "step": 23355 }, { "epoch": 0.5206915062569539, "grad_norm": 1.2294862270355225, "learning_rate": 9.350263257039837e-06, "loss": 0.2964, "step": 23360 }, { "epoch": 0.5208029556375741, "grad_norm": 0.4802538752555847, "learning_rate": 9.346769377225974e-06, "loss": 0.2547, "step": 23365 }, { "epoch": 0.5209144050181941, "grad_norm": 0.6217039227485657, "learning_rate": 9.343275577493146e-06, "loss": 0.3537, "step": 23370 }, { "epoch": 0.5210258543988142, "grad_norm": 0.41138404607772827, "learning_rate": 9.339781858269655e-06, "loss": 0.1877, "step": 23375 }, { "epoch": 0.5211373037794342, "grad_norm": 0.6733947396278381, "learning_rate": 9.336288219983817e-06, "loss": 0.4094, "step": 23380 }, { "epoch": 0.5212487531600544, "grad_norm": 0.7708743810653687, "learning_rate": 9.332794663063922e-06, "loss": 0.3424, "step": 23385 }, { "epoch": 0.5213602025406744, "grad_norm": 0.5516330003738403, "learning_rate": 9.329301187938249e-06, "loss": 0.2601, "step": 23390 }, { "epoch": 0.5214716519212944, "grad_norm": 0.8098486661911011, "learning_rate": 9.325807795035074e-06, "loss": 0.3064, "step": 23395 }, { "epoch": 0.5215831013019145, "grad_norm": 0.628699779510498, "learning_rate": 9.32231448478266e-06, "loss": 0.2739, "step": 23400 }, { "epoch": 0.5216945506825346, "grad_norm": 0.47968655824661255, "learning_rate": 9.318821257609256e-06, "loss": 0.3494, "step": 23405 }, { "epoch": 0.5218060000631547, "grad_norm": 0.37704741954803467, "learning_rate": 9.315328113943111e-06, "loss": 0.3228, "step": 23410 }, { "epoch": 0.5219174494437747, "grad_norm": 0.8004491925239563, "learning_rate": 9.311835054212452e-06, "loss": 0.3071, "step": 23415 }, { "epoch": 0.5220288988243947, "grad_norm": 0.6016726493835449, "learning_rate": 9.308342078845506e-06, "loss": 0.4179, "step": 23420 }, { "epoch": 0.5221403482050149, "grad_norm": 0.7621659636497498, "learning_rate": 9.304849188270481e-06, "loss": 0.3633, "step": 23425 }, { "epoch": 0.5222517975856349, "grad_norm": 0.6040893793106079, "learning_rate": 9.30135638291558e-06, "loss": 0.3027, "step": 23430 }, { "epoch": 0.522363246966255, "grad_norm": 0.8616956472396851, "learning_rate": 9.297863663208995e-06, "loss": 0.2637, "step": 23435 }, { "epoch": 0.522474696346875, "grad_norm": 0.5304297208786011, "learning_rate": 9.294371029578905e-06, "loss": 0.2171, "step": 23440 }, { "epoch": 0.5225861457274951, "grad_norm": 0.6328489780426025, "learning_rate": 9.290878482453481e-06, "loss": 0.4316, "step": 23445 }, { "epoch": 0.5226975951081152, "grad_norm": 0.4458315074443817, "learning_rate": 9.287386022260877e-06, "loss": 0.278, "step": 23450 }, { "epoch": 0.5228090444887352, "grad_norm": 0.750149130821228, "learning_rate": 9.283893649429248e-06, "loss": 0.3191, "step": 23455 }, { "epoch": 0.5229204938693554, "grad_norm": 0.4905281662940979, "learning_rate": 9.280401364386731e-06, "loss": 0.2809, "step": 23460 }, { "epoch": 0.5230319432499754, "grad_norm": 0.8535996079444885, "learning_rate": 9.276909167561454e-06, "loss": 0.3205, "step": 23465 }, { "epoch": 0.5231433926305955, "grad_norm": 0.43157458305358887, "learning_rate": 9.27341705938153e-06, "loss": 0.1815, "step": 23470 }, { "epoch": 0.5232548420112155, "grad_norm": 0.8516057729721069, "learning_rate": 9.269925040275066e-06, "loss": 0.2436, "step": 23475 }, { "epoch": 0.5233662913918355, "grad_norm": 0.6305704116821289, "learning_rate": 9.266433110670157e-06, "loss": 0.2275, "step": 23480 }, { "epoch": 0.5234777407724557, "grad_norm": 0.39994460344314575, "learning_rate": 9.262941270994886e-06, "loss": 0.3378, "step": 23485 }, { "epoch": 0.5235891901530757, "grad_norm": 0.6393985748291016, "learning_rate": 9.259449521677323e-06, "loss": 0.3433, "step": 23490 }, { "epoch": 0.5237006395336958, "grad_norm": 0.6144167184829712, "learning_rate": 9.255957863145538e-06, "loss": 0.3347, "step": 23495 }, { "epoch": 0.5238120889143159, "grad_norm": 0.6233029961585999, "learning_rate": 9.252466295827572e-06, "loss": 0.3765, "step": 23500 }, { "epoch": 0.5239235382949359, "grad_norm": 0.5702547430992126, "learning_rate": 9.248974820151472e-06, "loss": 0.2417, "step": 23505 }, { "epoch": 0.524034987675556, "grad_norm": 0.47504016757011414, "learning_rate": 9.24548343654526e-06, "loss": 0.3524, "step": 23510 }, { "epoch": 0.524146437056176, "grad_norm": 0.6024385094642639, "learning_rate": 9.241992145436953e-06, "loss": 0.2762, "step": 23515 }, { "epoch": 0.5242578864367962, "grad_norm": 0.3721349835395813, "learning_rate": 9.238500947254558e-06, "loss": 0.2902, "step": 23520 }, { "epoch": 0.5243693358174162, "grad_norm": 0.8836500644683838, "learning_rate": 9.23500984242607e-06, "loss": 0.4422, "step": 23525 }, { "epoch": 0.5244807851980363, "grad_norm": 0.3741062879562378, "learning_rate": 9.23151883137947e-06, "loss": 0.2211, "step": 23530 }, { "epoch": 0.5245922345786563, "grad_norm": 0.5023152232170105, "learning_rate": 9.228027914542733e-06, "loss": 0.289, "step": 23535 }, { "epoch": 0.5247036839592764, "grad_norm": 0.3265573978424072, "learning_rate": 9.224537092343812e-06, "loss": 0.2948, "step": 23540 }, { "epoch": 0.5248151333398965, "grad_norm": 0.5927016735076904, "learning_rate": 9.221046365210659e-06, "loss": 0.4409, "step": 23545 }, { "epoch": 0.5249265827205165, "grad_norm": 0.5317661762237549, "learning_rate": 9.217555733571208e-06, "loss": 0.4329, "step": 23550 }, { "epoch": 0.5250380321011366, "grad_norm": 0.6136202216148376, "learning_rate": 9.214065197853383e-06, "loss": 0.2988, "step": 23555 }, { "epoch": 0.5251494814817567, "grad_norm": 0.7506454586982727, "learning_rate": 9.2105747584851e-06, "loss": 0.2864, "step": 23560 }, { "epoch": 0.5252609308623767, "grad_norm": 0.5869132876396179, "learning_rate": 9.207084415894258e-06, "loss": 0.375, "step": 23565 }, { "epoch": 0.5253723802429968, "grad_norm": 0.6257401704788208, "learning_rate": 9.203594170508747e-06, "loss": 0.3417, "step": 23570 }, { "epoch": 0.5254838296236168, "grad_norm": 0.7179450392723083, "learning_rate": 9.200104022756443e-06, "loss": 0.296, "step": 23575 }, { "epoch": 0.525595279004237, "grad_norm": 0.4368216097354889, "learning_rate": 9.196613973065212e-06, "loss": 0.2702, "step": 23580 }, { "epoch": 0.525706728384857, "grad_norm": 0.5079479813575745, "learning_rate": 9.193124021862907e-06, "loss": 0.2096, "step": 23585 }, { "epoch": 0.5258181777654771, "grad_norm": 0.6696041226387024, "learning_rate": 9.189634169577365e-06, "loss": 0.3816, "step": 23590 }, { "epoch": 0.5259296271460971, "grad_norm": 0.8143077492713928, "learning_rate": 9.186144416636422e-06, "loss": 0.4566, "step": 23595 }, { "epoch": 0.5260410765267172, "grad_norm": 0.8123489618301392, "learning_rate": 9.18265476346789e-06, "loss": 0.3496, "step": 23600 }, { "epoch": 0.5261525259073373, "grad_norm": 0.405198335647583, "learning_rate": 9.179165210499579e-06, "loss": 0.2687, "step": 23605 }, { "epoch": 0.5262639752879573, "grad_norm": 0.5814400911331177, "learning_rate": 9.175675758159273e-06, "loss": 0.2619, "step": 23610 }, { "epoch": 0.5263754246685775, "grad_norm": 0.7175332307815552, "learning_rate": 9.17218640687476e-06, "loss": 0.3822, "step": 23615 }, { "epoch": 0.5264868740491975, "grad_norm": 0.5167621970176697, "learning_rate": 9.1686971570738e-06, "loss": 0.2828, "step": 23620 }, { "epoch": 0.5265983234298175, "grad_norm": 0.5517563819885254, "learning_rate": 9.16520800918415e-06, "loss": 0.3361, "step": 23625 }, { "epoch": 0.5267097728104376, "grad_norm": 1.081498146057129, "learning_rate": 9.16171896363356e-06, "loss": 0.2769, "step": 23630 }, { "epoch": 0.5268212221910576, "grad_norm": 0.5097198486328125, "learning_rate": 9.158230020849754e-06, "loss": 0.2926, "step": 23635 }, { "epoch": 0.5269326715716778, "grad_norm": 0.9057488441467285, "learning_rate": 9.154741181260446e-06, "loss": 0.2774, "step": 23640 }, { "epoch": 0.5270441209522978, "grad_norm": 0.8249532580375671, "learning_rate": 9.151252445293349e-06, "loss": 0.2342, "step": 23645 }, { "epoch": 0.5271555703329178, "grad_norm": 0.5287266969680786, "learning_rate": 9.14776381337615e-06, "loss": 0.276, "step": 23650 }, { "epoch": 0.527267019713538, "grad_norm": 0.8385326266288757, "learning_rate": 9.144275285936527e-06, "loss": 0.3539, "step": 23655 }, { "epoch": 0.527378469094158, "grad_norm": 0.6637365818023682, "learning_rate": 9.140786863402148e-06, "loss": 0.3495, "step": 23660 }, { "epoch": 0.5274899184747781, "grad_norm": 0.8243251442909241, "learning_rate": 9.13729854620067e-06, "loss": 0.3066, "step": 23665 }, { "epoch": 0.5276013678553981, "grad_norm": 0.42031049728393555, "learning_rate": 9.13381033475973e-06, "loss": 0.3982, "step": 23670 }, { "epoch": 0.5277128172360183, "grad_norm": 0.656934380531311, "learning_rate": 9.130322229506958e-06, "loss": 0.2919, "step": 23675 }, { "epoch": 0.5278242666166383, "grad_norm": 0.742068886756897, "learning_rate": 9.12683423086997e-06, "loss": 0.4047, "step": 23680 }, { "epoch": 0.5279357159972583, "grad_norm": 0.6763992309570312, "learning_rate": 9.12334633927636e-06, "loss": 0.1972, "step": 23685 }, { "epoch": 0.5280471653778784, "grad_norm": 0.514451265335083, "learning_rate": 9.119858555153727e-06, "loss": 0.2933, "step": 23690 }, { "epoch": 0.5281586147584985, "grad_norm": 0.512758731842041, "learning_rate": 9.116370878929637e-06, "loss": 0.2829, "step": 23695 }, { "epoch": 0.5282700641391186, "grad_norm": 0.5397109389305115, "learning_rate": 9.112883311031661e-06, "loss": 0.2403, "step": 23700 }, { "epoch": 0.5283815135197386, "grad_norm": 0.5646653771400452, "learning_rate": 9.10939585188734e-06, "loss": 0.2965, "step": 23705 }, { "epoch": 0.5284929629003586, "grad_norm": 0.4140741527080536, "learning_rate": 9.105908501924217e-06, "loss": 0.3376, "step": 23710 }, { "epoch": 0.5286044122809788, "grad_norm": 0.5725622177124023, "learning_rate": 9.102421261569807e-06, "loss": 0.1281, "step": 23715 }, { "epoch": 0.5287158616615988, "grad_norm": 0.42752501368522644, "learning_rate": 9.098934131251624e-06, "loss": 0.2581, "step": 23720 }, { "epoch": 0.5288273110422189, "grad_norm": 0.5737465023994446, "learning_rate": 9.095447111397162e-06, "loss": 0.2171, "step": 23725 }, { "epoch": 0.5289387604228389, "grad_norm": 0.9149386882781982, "learning_rate": 9.091960202433897e-06, "loss": 0.2777, "step": 23730 }, { "epoch": 0.5290502098034591, "grad_norm": 0.5786307454109192, "learning_rate": 9.088473404789306e-06, "loss": 0.2615, "step": 23735 }, { "epoch": 0.5291616591840791, "grad_norm": 0.8515751957893372, "learning_rate": 9.08498671889084e-06, "loss": 0.449, "step": 23740 }, { "epoch": 0.5292731085646991, "grad_norm": 0.7798794507980347, "learning_rate": 9.08150014516594e-06, "loss": 0.2805, "step": 23745 }, { "epoch": 0.5293845579453192, "grad_norm": 0.7772887349128723, "learning_rate": 9.078013684042032e-06, "loss": 0.2888, "step": 23750 }, { "epoch": 0.5294960073259393, "grad_norm": 0.4915827512741089, "learning_rate": 9.07452733594653e-06, "loss": 0.2785, "step": 23755 }, { "epoch": 0.5296074567065594, "grad_norm": 0.2885194718837738, "learning_rate": 9.071041101306832e-06, "loss": 0.205, "step": 23760 }, { "epoch": 0.5297189060871794, "grad_norm": 0.7066339254379272, "learning_rate": 9.067554980550322e-06, "loss": 0.3628, "step": 23765 }, { "epoch": 0.5298303554677994, "grad_norm": 0.6108125448226929, "learning_rate": 9.064068974104377e-06, "loss": 0.2408, "step": 23770 }, { "epoch": 0.5299418048484196, "grad_norm": 0.8365517258644104, "learning_rate": 9.060583082396353e-06, "loss": 0.3107, "step": 23775 }, { "epoch": 0.5300532542290396, "grad_norm": 0.5426750779151917, "learning_rate": 9.057097305853589e-06, "loss": 0.2681, "step": 23780 }, { "epoch": 0.5301647036096597, "grad_norm": 0.7357817888259888, "learning_rate": 9.05361164490342e-06, "loss": 0.3611, "step": 23785 }, { "epoch": 0.5302761529902797, "grad_norm": 0.7073951959609985, "learning_rate": 9.050126099973155e-06, "loss": 0.2575, "step": 23790 }, { "epoch": 0.5303876023708998, "grad_norm": 0.5723925828933716, "learning_rate": 9.0466406714901e-06, "loss": 0.4243, "step": 23795 }, { "epoch": 0.5304990517515199, "grad_norm": 0.6931876540184021, "learning_rate": 9.043155359881538e-06, "loss": 0.2974, "step": 23800 }, { "epoch": 0.5306105011321399, "grad_norm": 0.568409264087677, "learning_rate": 9.039670165574747e-06, "loss": 0.2453, "step": 23805 }, { "epoch": 0.53072195051276, "grad_norm": 0.5373852252960205, "learning_rate": 9.036185088996978e-06, "loss": 0.3074, "step": 23810 }, { "epoch": 0.5308333998933801, "grad_norm": 0.657902181148529, "learning_rate": 9.03270013057548e-06, "loss": 0.304, "step": 23815 }, { "epoch": 0.5309448492740002, "grad_norm": 0.5374367833137512, "learning_rate": 9.02921529073748e-06, "loss": 0.3381, "step": 23820 }, { "epoch": 0.5310562986546202, "grad_norm": 0.579827070236206, "learning_rate": 9.025730569910189e-06, "loss": 0.2773, "step": 23825 }, { "epoch": 0.5311677480352403, "grad_norm": 0.6020088195800781, "learning_rate": 9.022245968520812e-06, "loss": 0.2669, "step": 23830 }, { "epoch": 0.5312791974158604, "grad_norm": 0.24233196675777435, "learning_rate": 9.018761486996529e-06, "loss": 0.2554, "step": 23835 }, { "epoch": 0.5313906467964804, "grad_norm": 1.0846818685531616, "learning_rate": 9.015277125764515e-06, "loss": 0.3221, "step": 23840 }, { "epoch": 0.5315020961771005, "grad_norm": 0.4313613772392273, "learning_rate": 9.011792885251926e-06, "loss": 0.308, "step": 23845 }, { "epoch": 0.5316135455577206, "grad_norm": 0.7550637125968933, "learning_rate": 9.008308765885903e-06, "loss": 0.3228, "step": 23850 }, { "epoch": 0.5317249949383406, "grad_norm": 0.5088363289833069, "learning_rate": 9.004824768093567e-06, "loss": 0.2836, "step": 23855 }, { "epoch": 0.5318364443189607, "grad_norm": 0.6121699213981628, "learning_rate": 9.001340892302038e-06, "loss": 0.2657, "step": 23860 }, { "epoch": 0.5319478936995807, "grad_norm": 0.649071991443634, "learning_rate": 8.997857138938404e-06, "loss": 0.4241, "step": 23865 }, { "epoch": 0.5320593430802009, "grad_norm": 0.7292037010192871, "learning_rate": 8.994373508429745e-06, "loss": 0.37, "step": 23870 }, { "epoch": 0.5321707924608209, "grad_norm": 0.4739864468574524, "learning_rate": 8.990890001203136e-06, "loss": 0.3927, "step": 23875 }, { "epoch": 0.532282241841441, "grad_norm": 0.7016490697860718, "learning_rate": 8.987406617685625e-06, "loss": 0.3176, "step": 23880 }, { "epoch": 0.532393691222061, "grad_norm": 0.5824190974235535, "learning_rate": 8.983923358304242e-06, "loss": 0.1939, "step": 23885 }, { "epoch": 0.5325051406026811, "grad_norm": 0.7529845237731934, "learning_rate": 8.980440223486019e-06, "loss": 0.3123, "step": 23890 }, { "epoch": 0.5326165899833012, "grad_norm": 0.47603124380111694, "learning_rate": 8.976957213657952e-06, "loss": 0.2305, "step": 23895 }, { "epoch": 0.5327280393639212, "grad_norm": 0.7694602012634277, "learning_rate": 8.973474329247031e-06, "loss": 0.2075, "step": 23900 }, { "epoch": 0.5328394887445413, "grad_norm": 0.6289934515953064, "learning_rate": 8.969991570680238e-06, "loss": 0.3209, "step": 23905 }, { "epoch": 0.5329509381251614, "grad_norm": 0.5181615352630615, "learning_rate": 8.966508938384522e-06, "loss": 0.2942, "step": 23910 }, { "epoch": 0.5330623875057814, "grad_norm": 0.714810848236084, "learning_rate": 8.963026432786839e-06, "loss": 0.2761, "step": 23915 }, { "epoch": 0.5331738368864015, "grad_norm": 0.5382199287414551, "learning_rate": 8.959544054314107e-06, "loss": 0.2745, "step": 23920 }, { "epoch": 0.5332852862670215, "grad_norm": 0.6060092449188232, "learning_rate": 8.956061803393246e-06, "loss": 0.4041, "step": 23925 }, { "epoch": 0.5333967356476417, "grad_norm": 0.46849480271339417, "learning_rate": 8.952579680451147e-06, "loss": 0.2914, "step": 23930 }, { "epoch": 0.5335081850282617, "grad_norm": 0.5679491758346558, "learning_rate": 8.949097685914698e-06, "loss": 0.2236, "step": 23935 }, { "epoch": 0.5336196344088818, "grad_norm": 0.6544236540794373, "learning_rate": 8.94561582021076e-06, "loss": 0.2899, "step": 23940 }, { "epoch": 0.5337310837895018, "grad_norm": 0.5217140913009644, "learning_rate": 8.942134083766182e-06, "loss": 0.3035, "step": 23945 }, { "epoch": 0.5338425331701219, "grad_norm": 0.4291645586490631, "learning_rate": 8.938652477007799e-06, "loss": 0.3018, "step": 23950 }, { "epoch": 0.533953982550742, "grad_norm": 0.5148656964302063, "learning_rate": 8.935171000362433e-06, "loss": 0.2073, "step": 23955 }, { "epoch": 0.534065431931362, "grad_norm": 0.629658579826355, "learning_rate": 8.931689654256883e-06, "loss": 0.2964, "step": 23960 }, { "epoch": 0.5341768813119822, "grad_norm": 0.5942751169204712, "learning_rate": 8.928208439117932e-06, "loss": 0.3437, "step": 23965 }, { "epoch": 0.5342883306926022, "grad_norm": 0.7543257474899292, "learning_rate": 8.924727355372356e-06, "loss": 0.3154, "step": 23970 }, { "epoch": 0.5343997800732222, "grad_norm": 0.5518947839736938, "learning_rate": 8.921246403446907e-06, "loss": 0.2256, "step": 23975 }, { "epoch": 0.5345112294538423, "grad_norm": 0.6838600039482117, "learning_rate": 8.917765583768316e-06, "loss": 0.373, "step": 23980 }, { "epoch": 0.5346226788344624, "grad_norm": 0.284435898065567, "learning_rate": 8.914284896763317e-06, "loss": 0.1701, "step": 23985 }, { "epoch": 0.5347341282150825, "grad_norm": 0.5570893287658691, "learning_rate": 8.91080434285861e-06, "loss": 0.3315, "step": 23990 }, { "epoch": 0.5348455775957025, "grad_norm": 0.6959690451622009, "learning_rate": 8.90732392248088e-06, "loss": 0.3038, "step": 23995 }, { "epoch": 0.5349570269763225, "grad_norm": 0.8016462326049805, "learning_rate": 8.903843636056805e-06, "loss": 0.394, "step": 24000 }, { "epoch": 0.5350684763569427, "grad_norm": 0.5051465034484863, "learning_rate": 8.900363484013041e-06, "loss": 0.3425, "step": 24005 }, { "epoch": 0.5351799257375627, "grad_norm": 0.4948039650917053, "learning_rate": 8.896883466776223e-06, "loss": 0.1765, "step": 24010 }, { "epoch": 0.5352913751181828, "grad_norm": 0.764173686504364, "learning_rate": 8.893403584772975e-06, "loss": 0.2309, "step": 24015 }, { "epoch": 0.5354028244988028, "grad_norm": 1.018734097480774, "learning_rate": 8.889923838429909e-06, "loss": 0.3934, "step": 24020 }, { "epoch": 0.535514273879423, "grad_norm": 0.648385226726532, "learning_rate": 8.88644422817361e-06, "loss": 0.2999, "step": 24025 }, { "epoch": 0.535625723260043, "grad_norm": 0.44655290246009827, "learning_rate": 8.882964754430658e-06, "loss": 0.3726, "step": 24030 }, { "epoch": 0.535737172640663, "grad_norm": 0.5479897260665894, "learning_rate": 8.879485417627602e-06, "loss": 0.2738, "step": 24035 }, { "epoch": 0.5358486220212831, "grad_norm": 0.36989253759384155, "learning_rate": 8.876006218190985e-06, "loss": 0.3391, "step": 24040 }, { "epoch": 0.5359600714019032, "grad_norm": 0.6240692734718323, "learning_rate": 8.872527156547329e-06, "loss": 0.2586, "step": 24045 }, { "epoch": 0.5360715207825233, "grad_norm": 0.416449636220932, "learning_rate": 8.86904823312314e-06, "loss": 0.2799, "step": 24050 }, { "epoch": 0.5361829701631433, "grad_norm": 0.5444673895835876, "learning_rate": 8.865569448344909e-06, "loss": 0.3169, "step": 24055 }, { "epoch": 0.5362944195437633, "grad_norm": 0.6248395442962646, "learning_rate": 8.862090802639106e-06, "loss": 0.2957, "step": 24060 }, { "epoch": 0.5364058689243835, "grad_norm": 0.641238272190094, "learning_rate": 8.85861229643219e-06, "loss": 0.4187, "step": 24065 }, { "epoch": 0.5365173183050035, "grad_norm": 0.8127478957176208, "learning_rate": 8.855133930150594e-06, "loss": 0.3227, "step": 24070 }, { "epoch": 0.5366287676856236, "grad_norm": 0.7973223924636841, "learning_rate": 8.851655704220743e-06, "loss": 0.3224, "step": 24075 }, { "epoch": 0.5367402170662436, "grad_norm": 0.7173725962638855, "learning_rate": 8.848177619069039e-06, "loss": 0.3197, "step": 24080 }, { "epoch": 0.5368516664468638, "grad_norm": 0.5283668637275696, "learning_rate": 8.844699675121865e-06, "loss": 0.3195, "step": 24085 }, { "epoch": 0.5369631158274838, "grad_norm": 0.5162988901138306, "learning_rate": 8.841221872805595e-06, "loss": 0.3324, "step": 24090 }, { "epoch": 0.5370745652081038, "grad_norm": 0.807227611541748, "learning_rate": 8.83774421254658e-06, "loss": 0.262, "step": 24095 }, { "epoch": 0.537186014588724, "grad_norm": 0.5646213889122009, "learning_rate": 8.834266694771151e-06, "loss": 0.2243, "step": 24100 }, { "epoch": 0.537297463969344, "grad_norm": 0.5768153667449951, "learning_rate": 8.83078931990563e-06, "loss": 0.2814, "step": 24105 }, { "epoch": 0.5374089133499641, "grad_norm": 0.6003533601760864, "learning_rate": 8.827312088376315e-06, "loss": 0.3577, "step": 24110 }, { "epoch": 0.5375203627305841, "grad_norm": 0.5767202377319336, "learning_rate": 8.823835000609482e-06, "loss": 0.2962, "step": 24115 }, { "epoch": 0.5376318121112041, "grad_norm": 0.34857234358787537, "learning_rate": 8.820358057031397e-06, "loss": 0.2262, "step": 24120 }, { "epoch": 0.5377432614918243, "grad_norm": 0.615218997001648, "learning_rate": 8.816881258068313e-06, "loss": 0.2648, "step": 24125 }, { "epoch": 0.5378547108724443, "grad_norm": 0.3658369779586792, "learning_rate": 8.813404604146457e-06, "loss": 0.2492, "step": 24130 }, { "epoch": 0.5379661602530644, "grad_norm": 0.380628764629364, "learning_rate": 8.809928095692033e-06, "loss": 0.379, "step": 24135 }, { "epoch": 0.5380776096336845, "grad_norm": 0.4054970443248749, "learning_rate": 8.806451733131241e-06, "loss": 0.2889, "step": 24140 }, { "epoch": 0.5381890590143046, "grad_norm": 0.5545241236686707, "learning_rate": 8.802975516890252e-06, "loss": 0.254, "step": 24145 }, { "epoch": 0.5383005083949246, "grad_norm": 0.43503686785697937, "learning_rate": 8.799499447395227e-06, "loss": 0.2407, "step": 24150 }, { "epoch": 0.5384119577755446, "grad_norm": 0.817979633808136, "learning_rate": 8.7960235250723e-06, "loss": 0.3682, "step": 24155 }, { "epoch": 0.5385234071561648, "grad_norm": 0.45893165469169617, "learning_rate": 8.792547750347597e-06, "loss": 0.385, "step": 24160 }, { "epoch": 0.5386348565367848, "grad_norm": 0.5781266689300537, "learning_rate": 8.78907212364722e-06, "loss": 0.1913, "step": 24165 }, { "epoch": 0.5387463059174049, "grad_norm": 0.70738685131073, "learning_rate": 8.785596645397256e-06, "loss": 0.2322, "step": 24170 }, { "epoch": 0.5388577552980249, "grad_norm": 0.5673659443855286, "learning_rate": 8.782121316023769e-06, "loss": 0.3694, "step": 24175 }, { "epoch": 0.538969204678645, "grad_norm": 0.48852887749671936, "learning_rate": 8.778646135952805e-06, "loss": 0.2731, "step": 24180 }, { "epoch": 0.5390806540592651, "grad_norm": 0.7373262643814087, "learning_rate": 8.7751711056104e-06, "loss": 0.2755, "step": 24185 }, { "epoch": 0.5391921034398851, "grad_norm": 0.5007327795028687, "learning_rate": 8.771696225422559e-06, "loss": 0.2156, "step": 24190 }, { "epoch": 0.5393035528205052, "grad_norm": 0.7125994563102722, "learning_rate": 8.768221495815282e-06, "loss": 0.2494, "step": 24195 }, { "epoch": 0.5394150022011253, "grad_norm": 0.5907738208770752, "learning_rate": 8.764746917214543e-06, "loss": 0.3115, "step": 24200 }, { "epoch": 0.5395264515817453, "grad_norm": 0.6450021862983704, "learning_rate": 8.761272490046299e-06, "loss": 0.2341, "step": 24205 }, { "epoch": 0.5396379009623654, "grad_norm": 0.5347104072570801, "learning_rate": 8.757798214736483e-06, "loss": 0.2313, "step": 24210 }, { "epoch": 0.5397493503429854, "grad_norm": 0.5462760329246521, "learning_rate": 8.754324091711021e-06, "loss": 0.3637, "step": 24215 }, { "epoch": 0.5398607997236056, "grad_norm": 0.39873236417770386, "learning_rate": 8.75085012139581e-06, "loss": 0.4349, "step": 24220 }, { "epoch": 0.5399722491042256, "grad_norm": 0.5908456444740295, "learning_rate": 8.747376304216726e-06, "loss": 0.3994, "step": 24225 }, { "epoch": 0.5400836984848457, "grad_norm": 0.6254735589027405, "learning_rate": 8.74390264059964e-06, "loss": 0.3037, "step": 24230 }, { "epoch": 0.5401951478654657, "grad_norm": 0.7801318168640137, "learning_rate": 8.740429130970399e-06, "loss": 0.1759, "step": 24235 }, { "epoch": 0.5403065972460858, "grad_norm": 1.0558874607086182, "learning_rate": 8.736955775754821e-06, "loss": 0.2624, "step": 24240 }, { "epoch": 0.5404180466267059, "grad_norm": 0.5824124813079834, "learning_rate": 8.733482575378718e-06, "loss": 0.304, "step": 24245 }, { "epoch": 0.5405294960073259, "grad_norm": 0.7328165173530579, "learning_rate": 8.730009530267876e-06, "loss": 0.2544, "step": 24250 }, { "epoch": 0.540640945387946, "grad_norm": 0.7363235950469971, "learning_rate": 8.72653664084806e-06, "loss": 0.2932, "step": 24255 }, { "epoch": 0.5407523947685661, "grad_norm": 0.43137842416763306, "learning_rate": 8.72306390754502e-06, "loss": 0.3143, "step": 24260 }, { "epoch": 0.5408638441491861, "grad_norm": 0.7054587602615356, "learning_rate": 8.719591330784493e-06, "loss": 0.3246, "step": 24265 }, { "epoch": 0.5409752935298062, "grad_norm": 0.6189272999763489, "learning_rate": 8.716118910992185e-06, "loss": 0.2671, "step": 24270 }, { "epoch": 0.5410867429104262, "grad_norm": 0.8482842445373535, "learning_rate": 8.712646648593786e-06, "loss": 0.3937, "step": 24275 }, { "epoch": 0.5411981922910464, "grad_norm": 0.7510126233100891, "learning_rate": 8.709174544014974e-06, "loss": 0.2735, "step": 24280 }, { "epoch": 0.5413096416716664, "grad_norm": 0.804165780544281, "learning_rate": 8.705702597681399e-06, "loss": 0.3779, "step": 24285 }, { "epoch": 0.5414210910522865, "grad_norm": 0.6668807864189148, "learning_rate": 8.702230810018695e-06, "loss": 0.2744, "step": 24290 }, { "epoch": 0.5415325404329066, "grad_norm": 0.33997178077697754, "learning_rate": 8.698759181452472e-06, "loss": 0.2683, "step": 24295 }, { "epoch": 0.5416439898135266, "grad_norm": 0.5800212621688843, "learning_rate": 8.695287712408333e-06, "loss": 0.3001, "step": 24300 }, { "epoch": 0.5417554391941467, "grad_norm": 0.7384917140007019, "learning_rate": 8.691816403311849e-06, "loss": 0.3023, "step": 24305 }, { "epoch": 0.5418668885747667, "grad_norm": 0.42454642057418823, "learning_rate": 8.688345254588579e-06, "loss": 0.2291, "step": 24310 }, { "epoch": 0.5419783379553869, "grad_norm": 0.7141265869140625, "learning_rate": 8.684874266664054e-06, "loss": 0.3026, "step": 24315 }, { "epoch": 0.5420897873360069, "grad_norm": 0.49102476239204407, "learning_rate": 8.681403439963793e-06, "loss": 0.3357, "step": 24320 }, { "epoch": 0.5422012367166269, "grad_norm": 0.7738983631134033, "learning_rate": 8.677932774913292e-06, "loss": 0.3483, "step": 24325 }, { "epoch": 0.542312686097247, "grad_norm": 0.7534337043762207, "learning_rate": 8.674462271938026e-06, "loss": 0.2816, "step": 24330 }, { "epoch": 0.542424135477867, "grad_norm": 0.4947792887687683, "learning_rate": 8.670991931463454e-06, "loss": 0.3452, "step": 24335 }, { "epoch": 0.5425355848584872, "grad_norm": 0.5832251310348511, "learning_rate": 8.667521753915018e-06, "loss": 0.3181, "step": 24340 }, { "epoch": 0.5426470342391072, "grad_norm": 0.9216941595077515, "learning_rate": 8.664051739718127e-06, "loss": 0.4427, "step": 24345 }, { "epoch": 0.5427584836197273, "grad_norm": 0.6378898024559021, "learning_rate": 8.660581889298178e-06, "loss": 0.2333, "step": 24350 }, { "epoch": 0.5428699330003474, "grad_norm": 1.0858154296875, "learning_rate": 8.657112203080555e-06, "loss": 0.4015, "step": 24355 }, { "epoch": 0.5429813823809674, "grad_norm": 0.6010323166847229, "learning_rate": 8.653642681490608e-06, "loss": 0.3105, "step": 24360 }, { "epoch": 0.5430928317615875, "grad_norm": 0.7363236546516418, "learning_rate": 8.650173324953675e-06, "loss": 0.318, "step": 24365 }, { "epoch": 0.5432042811422075, "grad_norm": 0.5955904722213745, "learning_rate": 8.646704133895074e-06, "loss": 0.2549, "step": 24370 }, { "epoch": 0.5433157305228277, "grad_norm": 0.6519381999969482, "learning_rate": 8.643235108740102e-06, "loss": 0.3682, "step": 24375 }, { "epoch": 0.5434271799034477, "grad_norm": 0.7452640533447266, "learning_rate": 8.639766249914032e-06, "loss": 0.2038, "step": 24380 }, { "epoch": 0.5435386292840677, "grad_norm": 0.8365711569786072, "learning_rate": 8.636297557842122e-06, "loss": 0.3316, "step": 24385 }, { "epoch": 0.5436500786646878, "grad_norm": 0.49274158477783203, "learning_rate": 8.632829032949604e-06, "loss": 0.3712, "step": 24390 }, { "epoch": 0.5437615280453079, "grad_norm": 0.4177057445049286, "learning_rate": 8.629360675661693e-06, "loss": 0.3004, "step": 24395 }, { "epoch": 0.543872977425928, "grad_norm": 0.6827482581138611, "learning_rate": 8.62589248640358e-06, "loss": 0.295, "step": 24400 }, { "epoch": 0.543984426806548, "grad_norm": 0.626742422580719, "learning_rate": 8.622424465600448e-06, "loss": 0.1827, "step": 24405 }, { "epoch": 0.544095876187168, "grad_norm": 0.6676792502403259, "learning_rate": 8.618956613677438e-06, "loss": 0.3464, "step": 24410 }, { "epoch": 0.5442073255677882, "grad_norm": 0.5386606454849243, "learning_rate": 8.615488931059693e-06, "loss": 0.2527, "step": 24415 }, { "epoch": 0.5443187749484082, "grad_norm": 0.6125070452690125, "learning_rate": 8.612021418172316e-06, "loss": 0.2888, "step": 24420 }, { "epoch": 0.5444302243290283, "grad_norm": 0.8366973996162415, "learning_rate": 8.608554075440397e-06, "loss": 0.3497, "step": 24425 }, { "epoch": 0.5445416737096483, "grad_norm": 0.41520625352859497, "learning_rate": 8.605086903289011e-06, "loss": 0.3532, "step": 24430 }, { "epoch": 0.5446531230902685, "grad_norm": 0.5745276212692261, "learning_rate": 8.601619902143204e-06, "loss": 0.2881, "step": 24435 }, { "epoch": 0.5447645724708885, "grad_norm": 0.7710216641426086, "learning_rate": 8.598153072427998e-06, "loss": 0.3121, "step": 24440 }, { "epoch": 0.5448760218515085, "grad_norm": 0.5294174551963806, "learning_rate": 8.594686414568407e-06, "loss": 0.2459, "step": 24445 }, { "epoch": 0.5449874712321287, "grad_norm": 0.43676459789276123, "learning_rate": 8.591219928989418e-06, "loss": 0.3917, "step": 24450 }, { "epoch": 0.5450989206127487, "grad_norm": 0.49319544434547424, "learning_rate": 8.587753616115988e-06, "loss": 0.3877, "step": 24455 }, { "epoch": 0.5452103699933688, "grad_norm": 0.5562756061553955, "learning_rate": 8.584287476373066e-06, "loss": 0.3236, "step": 24460 }, { "epoch": 0.5453218193739888, "grad_norm": 0.5114259719848633, "learning_rate": 8.580821510185571e-06, "loss": 0.29, "step": 24465 }, { "epoch": 0.5454332687546088, "grad_norm": 0.507517397403717, "learning_rate": 8.577355717978403e-06, "loss": 0.2529, "step": 24470 }, { "epoch": 0.545544718135229, "grad_norm": 0.6263054609298706, "learning_rate": 8.57389010017644e-06, "loss": 0.2756, "step": 24475 }, { "epoch": 0.545656167515849, "grad_norm": 0.8092488050460815, "learning_rate": 8.570424657204548e-06, "loss": 0.3541, "step": 24480 }, { "epoch": 0.5457676168964691, "grad_norm": 0.6228552460670471, "learning_rate": 8.56695938948756e-06, "loss": 0.2658, "step": 24485 }, { "epoch": 0.5458790662770892, "grad_norm": 0.6549887657165527, "learning_rate": 8.563494297450285e-06, "loss": 0.3416, "step": 24490 }, { "epoch": 0.5459905156577093, "grad_norm": 0.5719853639602661, "learning_rate": 8.560029381517524e-06, "loss": 0.2552, "step": 24495 }, { "epoch": 0.5461019650383293, "grad_norm": 0.5304532051086426, "learning_rate": 8.556564642114044e-06, "loss": 0.2077, "step": 24500 }, { "epoch": 0.5462134144189493, "grad_norm": 0.6370610594749451, "learning_rate": 8.553100079664598e-06, "loss": 0.2462, "step": 24505 }, { "epoch": 0.5463248637995695, "grad_norm": 0.7064418792724609, "learning_rate": 8.549635694593911e-06, "loss": 0.2964, "step": 24510 }, { "epoch": 0.5464363131801895, "grad_norm": 0.5713470578193665, "learning_rate": 8.546171487326698e-06, "loss": 0.3, "step": 24515 }, { "epoch": 0.5465477625608096, "grad_norm": 0.5088363885879517, "learning_rate": 8.542707458287635e-06, "loss": 0.4, "step": 24520 }, { "epoch": 0.5466592119414296, "grad_norm": 0.6283735632896423, "learning_rate": 8.539243607901391e-06, "loss": 0.328, "step": 24525 }, { "epoch": 0.5467706613220497, "grad_norm": 0.45291775465011597, "learning_rate": 8.535779936592607e-06, "loss": 0.2645, "step": 24530 }, { "epoch": 0.5468821107026698, "grad_norm": 0.5215762257575989, "learning_rate": 8.532316444785898e-06, "loss": 0.3545, "step": 24535 }, { "epoch": 0.5469935600832898, "grad_norm": 0.6499525308609009, "learning_rate": 8.528853132905865e-06, "loss": 0.281, "step": 24540 }, { "epoch": 0.5471050094639099, "grad_norm": 0.6608521938323975, "learning_rate": 8.525390001377078e-06, "loss": 0.3618, "step": 24545 }, { "epoch": 0.54721645884453, "grad_norm": 0.7306557297706604, "learning_rate": 8.521927050624097e-06, "loss": 0.2263, "step": 24550 }, { "epoch": 0.54732790822515, "grad_norm": 0.46093496680259705, "learning_rate": 8.518464281071454e-06, "loss": 0.3256, "step": 24555 }, { "epoch": 0.5474393576057701, "grad_norm": 0.7847006916999817, "learning_rate": 8.515001693143654e-06, "loss": 0.2738, "step": 24560 }, { "epoch": 0.5475508069863901, "grad_norm": 0.6807790994644165, "learning_rate": 8.511539287265183e-06, "loss": 0.348, "step": 24565 }, { "epoch": 0.5476622563670103, "grad_norm": 0.36090344190597534, "learning_rate": 8.508077063860505e-06, "loss": 0.3948, "step": 24570 }, { "epoch": 0.5477737057476303, "grad_norm": 0.7280330061912537, "learning_rate": 8.504615023354066e-06, "loss": 0.3484, "step": 24575 }, { "epoch": 0.5478851551282504, "grad_norm": 0.7046535611152649, "learning_rate": 8.501153166170277e-06, "loss": 0.3831, "step": 24580 }, { "epoch": 0.5479966045088704, "grad_norm": 0.6159283518791199, "learning_rate": 8.497691492733543e-06, "loss": 0.2587, "step": 24585 }, { "epoch": 0.5481080538894905, "grad_norm": 0.6410984396934509, "learning_rate": 8.494230003468239e-06, "loss": 0.2617, "step": 24590 }, { "epoch": 0.5482195032701106, "grad_norm": 0.728539228439331, "learning_rate": 8.49076869879871e-06, "loss": 0.2402, "step": 24595 }, { "epoch": 0.5483309526507306, "grad_norm": 0.42051032185554504, "learning_rate": 8.487307579149293e-06, "loss": 0.2624, "step": 24600 }, { "epoch": 0.5484424020313508, "grad_norm": 0.5762730240821838, "learning_rate": 8.483846644944289e-06, "loss": 0.3665, "step": 24605 }, { "epoch": 0.5485538514119708, "grad_norm": 0.6362734436988831, "learning_rate": 8.480385896607981e-06, "loss": 0.2072, "step": 24610 }, { "epoch": 0.5486653007925908, "grad_norm": 0.7404980063438416, "learning_rate": 8.476925334564631e-06, "loss": 0.4376, "step": 24615 }, { "epoch": 0.5487767501732109, "grad_norm": 0.6925135254859924, "learning_rate": 8.473464959238485e-06, "loss": 0.2248, "step": 24620 }, { "epoch": 0.548888199553831, "grad_norm": 0.6552592515945435, "learning_rate": 8.47000477105375e-06, "loss": 0.2665, "step": 24625 }, { "epoch": 0.5489996489344511, "grad_norm": 0.6084296107292175, "learning_rate": 8.466544770434617e-06, "loss": 0.2409, "step": 24630 }, { "epoch": 0.5491110983150711, "grad_norm": 0.6576414108276367, "learning_rate": 8.463084957805263e-06, "loss": 0.2778, "step": 24635 }, { "epoch": 0.5492225476956912, "grad_norm": 0.5849794745445251, "learning_rate": 8.459625333589828e-06, "loss": 0.2869, "step": 24640 }, { "epoch": 0.5493339970763113, "grad_norm": 0.6730411052703857, "learning_rate": 8.456165898212438e-06, "loss": 0.2549, "step": 24645 }, { "epoch": 0.5494454464569313, "grad_norm": 0.6720399856567383, "learning_rate": 8.452706652097187e-06, "loss": 0.3656, "step": 24650 }, { "epoch": 0.5495568958375514, "grad_norm": 0.4191713333129883, "learning_rate": 8.449247595668164e-06, "loss": 0.3909, "step": 24655 }, { "epoch": 0.5496683452181714, "grad_norm": 0.7045153379440308, "learning_rate": 8.445788729349412e-06, "loss": 0.3864, "step": 24660 }, { "epoch": 0.5497797945987916, "grad_norm": 0.41460931301116943, "learning_rate": 8.442330053564969e-06, "loss": 0.2533, "step": 24665 }, { "epoch": 0.5498912439794116, "grad_norm": 1.074849009513855, "learning_rate": 8.438871568738833e-06, "loss": 0.256, "step": 24670 }, { "epoch": 0.5500026933600316, "grad_norm": 0.7419118881225586, "learning_rate": 8.435413275294998e-06, "loss": 0.3451, "step": 24675 }, { "epoch": 0.5501141427406517, "grad_norm": 0.6535670757293701, "learning_rate": 8.431955173657416e-06, "loss": 0.3289, "step": 24680 }, { "epoch": 0.5502255921212718, "grad_norm": 0.6609795689582825, "learning_rate": 8.428497264250023e-06, "loss": 0.3248, "step": 24685 }, { "epoch": 0.5503370415018919, "grad_norm": 0.35083624720573425, "learning_rate": 8.425039547496737e-06, "loss": 0.3534, "step": 24690 }, { "epoch": 0.5504484908825119, "grad_norm": 0.7437844276428223, "learning_rate": 8.421582023821448e-06, "loss": 0.2265, "step": 24695 }, { "epoch": 0.550559940263132, "grad_norm": 0.790406346321106, "learning_rate": 8.41812469364802e-06, "loss": 0.3658, "step": 24700 }, { "epoch": 0.5506713896437521, "grad_norm": 1.019261121749878, "learning_rate": 8.414667557400293e-06, "loss": 0.3364, "step": 24705 }, { "epoch": 0.5507828390243721, "grad_norm": 0.5490015149116516, "learning_rate": 8.411210615502087e-06, "loss": 0.1999, "step": 24710 }, { "epoch": 0.5508942884049922, "grad_norm": 0.6740686297416687, "learning_rate": 8.407753868377194e-06, "loss": 0.261, "step": 24715 }, { "epoch": 0.5510057377856122, "grad_norm": 0.6299969553947449, "learning_rate": 8.404297316449387e-06, "loss": 0.3937, "step": 24720 }, { "epoch": 0.5511171871662324, "grad_norm": 0.4909580647945404, "learning_rate": 8.40084096014241e-06, "loss": 0.2368, "step": 24725 }, { "epoch": 0.5512286365468524, "grad_norm": 0.9306431412696838, "learning_rate": 8.397384799879993e-06, "loss": 0.399, "step": 24730 }, { "epoch": 0.5513400859274724, "grad_norm": 0.46534305810928345, "learning_rate": 8.393928836085827e-06, "loss": 0.2315, "step": 24735 }, { "epoch": 0.5514515353080925, "grad_norm": 0.5373953580856323, "learning_rate": 8.39047306918359e-06, "loss": 0.355, "step": 24740 }, { "epoch": 0.5515629846887126, "grad_norm": 0.6013240814208984, "learning_rate": 8.38701749959693e-06, "loss": 0.2399, "step": 24745 }, { "epoch": 0.5516744340693327, "grad_norm": 0.5150963664054871, "learning_rate": 8.383562127749473e-06, "loss": 0.2574, "step": 24750 }, { "epoch": 0.5517858834499527, "grad_norm": 0.5833569169044495, "learning_rate": 8.38010695406482e-06, "loss": 0.3758, "step": 24755 }, { "epoch": 0.5518973328305727, "grad_norm": 0.4889329969882965, "learning_rate": 8.376651978966555e-06, "loss": 0.2779, "step": 24760 }, { "epoch": 0.5520087822111929, "grad_norm": 0.7517090439796448, "learning_rate": 8.373197202878224e-06, "loss": 0.3864, "step": 24765 }, { "epoch": 0.5521202315918129, "grad_norm": 0.7434937357902527, "learning_rate": 8.369742626223363e-06, "loss": 0.2573, "step": 24770 }, { "epoch": 0.552231680972433, "grad_norm": 0.6837606430053711, "learning_rate": 8.36628824942547e-06, "loss": 0.2357, "step": 24775 }, { "epoch": 0.552343130353053, "grad_norm": 0.8016228079795837, "learning_rate": 8.362834072908028e-06, "loss": 0.3943, "step": 24780 }, { "epoch": 0.5524545797336732, "grad_norm": 0.549381673336029, "learning_rate": 8.35938009709449e-06, "loss": 0.3435, "step": 24785 }, { "epoch": 0.5525660291142932, "grad_norm": 1.0602985620498657, "learning_rate": 8.355926322408287e-06, "loss": 0.2506, "step": 24790 }, { "epoch": 0.5526774784949132, "grad_norm": 0.526971161365509, "learning_rate": 8.352472749272831e-06, "loss": 0.47, "step": 24795 }, { "epoch": 0.5527889278755334, "grad_norm": 0.8049795627593994, "learning_rate": 8.349019378111496e-06, "loss": 0.298, "step": 24800 }, { "epoch": 0.5529003772561534, "grad_norm": 0.5035505294799805, "learning_rate": 8.345566209347644e-06, "loss": 0.2329, "step": 24805 }, { "epoch": 0.5530118266367735, "grad_norm": 0.6278955340385437, "learning_rate": 8.342113243404602e-06, "loss": 0.2987, "step": 24810 }, { "epoch": 0.5531232760173935, "grad_norm": 0.38567596673965454, "learning_rate": 8.338660480705683e-06, "loss": 0.2863, "step": 24815 }, { "epoch": 0.5532347253980135, "grad_norm": 0.6542585492134094, "learning_rate": 8.335207921674165e-06, "loss": 0.4058, "step": 24820 }, { "epoch": 0.5533461747786337, "grad_norm": 0.8733132481575012, "learning_rate": 8.3317555667333e-06, "loss": 0.2498, "step": 24825 }, { "epoch": 0.5534576241592537, "grad_norm": 0.46271783113479614, "learning_rate": 8.328303416306329e-06, "loss": 0.2544, "step": 24830 }, { "epoch": 0.5535690735398738, "grad_norm": 0.8014933466911316, "learning_rate": 8.324851470816457e-06, "loss": 0.3084, "step": 24835 }, { "epoch": 0.5536805229204939, "grad_norm": 0.5467740893363953, "learning_rate": 8.321399730686865e-06, "loss": 0.3583, "step": 24840 }, { "epoch": 0.553791972301114, "grad_norm": 0.7579909563064575, "learning_rate": 8.317948196340706e-06, "loss": 0.3352, "step": 24845 }, { "epoch": 0.553903421681734, "grad_norm": 0.7478350400924683, "learning_rate": 8.314496868201118e-06, "loss": 0.4136, "step": 24850 }, { "epoch": 0.554014871062354, "grad_norm": 0.46065306663513184, "learning_rate": 8.3110457466912e-06, "loss": 0.2237, "step": 24855 }, { "epoch": 0.5541263204429742, "grad_norm": 0.5404456257820129, "learning_rate": 8.307594832234037e-06, "loss": 0.2682, "step": 24860 }, { "epoch": 0.5542377698235942, "grad_norm": 0.7918099761009216, "learning_rate": 8.304144125252687e-06, "loss": 0.4182, "step": 24865 }, { "epoch": 0.5543492192042143, "grad_norm": 0.4383431673049927, "learning_rate": 8.300693626170178e-06, "loss": 0.3092, "step": 24870 }, { "epoch": 0.5544606685848343, "grad_norm": 0.424964040517807, "learning_rate": 8.297243335409511e-06, "loss": 0.3116, "step": 24875 }, { "epoch": 0.5545721179654544, "grad_norm": 0.6739875674247742, "learning_rate": 8.29379325339367e-06, "loss": 0.3016, "step": 24880 }, { "epoch": 0.5546835673460745, "grad_norm": 0.6436865329742432, "learning_rate": 8.290343380545606e-06, "loss": 0.2368, "step": 24885 }, { "epoch": 0.5547950167266945, "grad_norm": 0.5790094137191772, "learning_rate": 8.286893717288246e-06, "loss": 0.3702, "step": 24890 }, { "epoch": 0.5549064661073146, "grad_norm": 0.5759456157684326, "learning_rate": 8.283444264044491e-06, "loss": 0.2727, "step": 24895 }, { "epoch": 0.5550179154879347, "grad_norm": 0.8519607186317444, "learning_rate": 8.279995021237223e-06, "loss": 0.4072, "step": 24900 }, { "epoch": 0.5551293648685548, "grad_norm": 0.6983477473258972, "learning_rate": 8.276545989289286e-06, "loss": 0.2188, "step": 24905 }, { "epoch": 0.5552408142491748, "grad_norm": 0.7973755598068237, "learning_rate": 8.273097168623511e-06, "loss": 0.3562, "step": 24910 }, { "epoch": 0.5553522636297948, "grad_norm": 1.0104763507843018, "learning_rate": 8.269648559662694e-06, "loss": 0.2745, "step": 24915 }, { "epoch": 0.555463713010415, "grad_norm": 0.37829214334487915, "learning_rate": 8.266200162829605e-06, "loss": 0.3069, "step": 24920 }, { "epoch": 0.555575162391035, "grad_norm": 0.5734246969223022, "learning_rate": 8.262751978546995e-06, "loss": 0.222, "step": 24925 }, { "epoch": 0.5556866117716551, "grad_norm": 0.5025404691696167, "learning_rate": 8.259304007237578e-06, "loss": 0.2063, "step": 24930 }, { "epoch": 0.5557980611522751, "grad_norm": 0.43454474210739136, "learning_rate": 8.255856249324058e-06, "loss": 0.1827, "step": 24935 }, { "epoch": 0.5559095105328952, "grad_norm": 0.6172532439231873, "learning_rate": 8.252408705229098e-06, "loss": 0.2765, "step": 24940 }, { "epoch": 0.5560209599135153, "grad_norm": 0.3773133456707001, "learning_rate": 8.248961375375341e-06, "loss": 0.2087, "step": 24945 }, { "epoch": 0.5561324092941353, "grad_norm": 0.5615310668945312, "learning_rate": 8.245514260185403e-06, "loss": 0.361, "step": 24950 }, { "epoch": 0.5562438586747555, "grad_norm": 0.5559342503547668, "learning_rate": 8.242067360081878e-06, "loss": 0.3179, "step": 24955 }, { "epoch": 0.5563553080553755, "grad_norm": 0.47637277841567993, "learning_rate": 8.238620675487323e-06, "loss": 0.3062, "step": 24960 }, { "epoch": 0.5564667574359955, "grad_norm": 0.580187976360321, "learning_rate": 8.235174206824278e-06, "loss": 0.4512, "step": 24965 }, { "epoch": 0.5565782068166156, "grad_norm": 0.2807926833629608, "learning_rate": 8.231727954515247e-06, "loss": 0.2401, "step": 24970 }, { "epoch": 0.5566896561972356, "grad_norm": 0.6485481858253479, "learning_rate": 8.228281918982726e-06, "loss": 0.4448, "step": 24975 }, { "epoch": 0.5568011055778558, "grad_norm": 0.42547407746315, "learning_rate": 8.224836100649166e-06, "loss": 0.3134, "step": 24980 }, { "epoch": 0.5569125549584758, "grad_norm": 0.6035627126693726, "learning_rate": 8.221390499936997e-06, "loss": 0.4542, "step": 24985 }, { "epoch": 0.5570240043390959, "grad_norm": 0.49706533551216125, "learning_rate": 8.217945117268624e-06, "loss": 0.1768, "step": 24990 }, { "epoch": 0.557135453719716, "grad_norm": 0.5365151762962341, "learning_rate": 8.214499953066423e-06, "loss": 0.2971, "step": 24995 }, { "epoch": 0.557246903100336, "grad_norm": 0.7276906967163086, "learning_rate": 8.211055007752749e-06, "loss": 0.2899, "step": 25000 }, { "epoch": 0.5573583524809561, "grad_norm": 0.6989277601242065, "learning_rate": 8.207610281749918e-06, "loss": 0.3771, "step": 25005 }, { "epoch": 0.5574698018615761, "grad_norm": 0.8797715306282043, "learning_rate": 8.204165775480233e-06, "loss": 0.2723, "step": 25010 }, { "epoch": 0.5575812512421963, "grad_norm": 0.5198432207107544, "learning_rate": 8.200721489365962e-06, "loss": 0.3965, "step": 25015 }, { "epoch": 0.5576927006228163, "grad_norm": 0.6038433313369751, "learning_rate": 8.197277423829351e-06, "loss": 0.2742, "step": 25020 }, { "epoch": 0.5578041500034363, "grad_norm": 0.6098718047142029, "learning_rate": 8.19383357929261e-06, "loss": 0.2459, "step": 25025 }, { "epoch": 0.5579155993840564, "grad_norm": 0.5177041292190552, "learning_rate": 8.190389956177934e-06, "loss": 0.322, "step": 25030 }, { "epoch": 0.5580270487646765, "grad_norm": 0.8710949420928955, "learning_rate": 8.18694655490748e-06, "loss": 0.3124, "step": 25035 }, { "epoch": 0.5581384981452966, "grad_norm": 0.6304861307144165, "learning_rate": 8.183503375903378e-06, "loss": 0.2803, "step": 25040 }, { "epoch": 0.5582499475259166, "grad_norm": 0.5343223214149475, "learning_rate": 8.180060419587746e-06, "loss": 0.2192, "step": 25045 }, { "epoch": 0.5583613969065367, "grad_norm": 1.02570641040802, "learning_rate": 8.17661768638266e-06, "loss": 0.1673, "step": 25050 }, { "epoch": 0.5584728462871568, "grad_norm": 0.5182636976242065, "learning_rate": 8.173175176710172e-06, "loss": 0.3125, "step": 25055 }, { "epoch": 0.5585842956677768, "grad_norm": 0.9790181517601013, "learning_rate": 8.169732890992302e-06, "loss": 0.447, "step": 25060 }, { "epoch": 0.5586957450483969, "grad_norm": 0.646925151348114, "learning_rate": 8.166290829651056e-06, "loss": 0.2969, "step": 25065 }, { "epoch": 0.5588071944290169, "grad_norm": 0.5242272019386292, "learning_rate": 8.162848993108396e-06, "loss": 0.3618, "step": 25070 }, { "epoch": 0.5589186438096371, "grad_norm": 0.5497309565544128, "learning_rate": 8.159407381786268e-06, "loss": 0.3532, "step": 25075 }, { "epoch": 0.5590300931902571, "grad_norm": 0.6220980286598206, "learning_rate": 8.155965996106594e-06, "loss": 0.2132, "step": 25080 }, { "epoch": 0.5591415425708771, "grad_norm": 0.3434116244316101, "learning_rate": 8.152524836491254e-06, "loss": 0.3954, "step": 25085 }, { "epoch": 0.5592529919514972, "grad_norm": 0.7324555516242981, "learning_rate": 8.149083903362105e-06, "loss": 0.2732, "step": 25090 }, { "epoch": 0.5593644413321173, "grad_norm": 0.517315685749054, "learning_rate": 8.145643197140986e-06, "loss": 0.2802, "step": 25095 }, { "epoch": 0.5594758907127374, "grad_norm": 0.7378593683242798, "learning_rate": 8.142202718249697e-06, "loss": 0.3438, "step": 25100 }, { "epoch": 0.5595873400933574, "grad_norm": 0.5122184753417969, "learning_rate": 8.138762467110014e-06, "loss": 0.3632, "step": 25105 }, { "epoch": 0.5596987894739776, "grad_norm": 0.43752196431159973, "learning_rate": 8.135322444143683e-06, "loss": 0.2482, "step": 25110 }, { "epoch": 0.5598102388545976, "grad_norm": 0.7723203897476196, "learning_rate": 8.13188264977243e-06, "loss": 0.259, "step": 25115 }, { "epoch": 0.5599216882352176, "grad_norm": 0.7775996327400208, "learning_rate": 8.128443084417942e-06, "loss": 0.4302, "step": 25120 }, { "epoch": 0.5600331376158377, "grad_norm": 0.33896544575691223, "learning_rate": 8.125003748501891e-06, "loss": 0.2196, "step": 25125 }, { "epoch": 0.5601445869964577, "grad_norm": 0.6470742225646973, "learning_rate": 8.121564642445907e-06, "loss": 0.4144, "step": 25130 }, { "epoch": 0.5602560363770779, "grad_norm": 0.4051295518875122, "learning_rate": 8.118125766671594e-06, "loss": 0.2509, "step": 25135 }, { "epoch": 0.5603674857576979, "grad_norm": 0.4324702024459839, "learning_rate": 8.114687121600541e-06, "loss": 0.422, "step": 25140 }, { "epoch": 0.5604789351383179, "grad_norm": 0.7880158424377441, "learning_rate": 8.111248707654288e-06, "loss": 0.338, "step": 25145 }, { "epoch": 0.5605903845189381, "grad_norm": 0.6531829237937927, "learning_rate": 8.10781052525437e-06, "loss": 0.2991, "step": 25150 }, { "epoch": 0.5607018338995581, "grad_norm": 0.49740514159202576, "learning_rate": 8.104372574822274e-06, "loss": 0.3692, "step": 25155 }, { "epoch": 0.5608132832801782, "grad_norm": 0.6843011975288391, "learning_rate": 8.10093485677947e-06, "loss": 0.306, "step": 25160 }, { "epoch": 0.5609247326607982, "grad_norm": 0.9157106876373291, "learning_rate": 8.097497371547392e-06, "loss": 0.3139, "step": 25165 }, { "epoch": 0.5610361820414183, "grad_norm": 0.39046990871429443, "learning_rate": 8.094060119547455e-06, "loss": 0.3031, "step": 25170 }, { "epoch": 0.5611476314220384, "grad_norm": 0.5675200819969177, "learning_rate": 8.090623101201035e-06, "loss": 0.4418, "step": 25175 }, { "epoch": 0.5612590808026584, "grad_norm": 0.56672203540802, "learning_rate": 8.087186316929479e-06, "loss": 0.2393, "step": 25180 }, { "epoch": 0.5613705301832785, "grad_norm": 0.5712127089500427, "learning_rate": 8.083749767154118e-06, "loss": 0.3301, "step": 25185 }, { "epoch": 0.5614819795638986, "grad_norm": 0.8723680377006531, "learning_rate": 8.080313452296246e-06, "loss": 0.2967, "step": 25190 }, { "epoch": 0.5615934289445187, "grad_norm": 0.7300506234169006, "learning_rate": 8.07687737277713e-06, "loss": 0.2007, "step": 25195 }, { "epoch": 0.5617048783251387, "grad_norm": 0.44683921337127686, "learning_rate": 8.073441529017998e-06, "loss": 0.2307, "step": 25200 }, { "epoch": 0.5618163277057587, "grad_norm": 0.6372109651565552, "learning_rate": 8.07000592144007e-06, "loss": 0.1934, "step": 25205 }, { "epoch": 0.5619277770863789, "grad_norm": 0.6536151170730591, "learning_rate": 8.066570550464515e-06, "loss": 0.3503, "step": 25210 }, { "epoch": 0.5620392264669989, "grad_norm": 0.8571807146072388, "learning_rate": 8.063135416512483e-06, "loss": 0.3751, "step": 25215 }, { "epoch": 0.562150675847619, "grad_norm": 0.6964419484138489, "learning_rate": 8.059700520005104e-06, "loss": 0.3219, "step": 25220 }, { "epoch": 0.562262125228239, "grad_norm": 0.5729572772979736, "learning_rate": 8.056265861363464e-06, "loss": 0.3504, "step": 25225 }, { "epoch": 0.5623735746088591, "grad_norm": 0.701521098613739, "learning_rate": 8.052831441008626e-06, "loss": 0.2278, "step": 25230 }, { "epoch": 0.5624850239894792, "grad_norm": 0.4984922409057617, "learning_rate": 8.049397259361622e-06, "loss": 0.4445, "step": 25235 }, { "epoch": 0.5625964733700992, "grad_norm": 0.560855507850647, "learning_rate": 8.045963316843461e-06, "loss": 0.3214, "step": 25240 }, { "epoch": 0.5627079227507193, "grad_norm": 0.6652450561523438, "learning_rate": 8.042529613875111e-06, "loss": 0.2324, "step": 25245 }, { "epoch": 0.5628193721313394, "grad_norm": 0.5668606758117676, "learning_rate": 8.039096150877518e-06, "loss": 0.3622, "step": 25250 }, { "epoch": 0.5629308215119595, "grad_norm": 0.7954594492912292, "learning_rate": 8.035662928271607e-06, "loss": 0.3204, "step": 25255 }, { "epoch": 0.5630422708925795, "grad_norm": 0.7688658237457275, "learning_rate": 8.032229946478254e-06, "loss": 0.3322, "step": 25260 }, { "epoch": 0.5631537202731995, "grad_norm": 0.7299761176109314, "learning_rate": 8.028797205918326e-06, "loss": 0.353, "step": 25265 }, { "epoch": 0.5632651696538197, "grad_norm": 0.6364347338676453, "learning_rate": 8.025364707012644e-06, "loss": 0.2966, "step": 25270 }, { "epoch": 0.5633766190344397, "grad_norm": 0.7999971508979797, "learning_rate": 8.021932450182004e-06, "loss": 0.3605, "step": 25275 }, { "epoch": 0.5634880684150598, "grad_norm": 0.5598938465118408, "learning_rate": 8.01850043584718e-06, "loss": 0.2989, "step": 25280 }, { "epoch": 0.5635995177956798, "grad_norm": 0.6162434220314026, "learning_rate": 8.015068664428905e-06, "loss": 0.313, "step": 25285 }, { "epoch": 0.5637109671762999, "grad_norm": 0.8101426362991333, "learning_rate": 8.011637136347892e-06, "loss": 0.1561, "step": 25290 }, { "epoch": 0.56382241655692, "grad_norm": 0.6738321781158447, "learning_rate": 8.008205852024817e-06, "loss": 0.3467, "step": 25295 }, { "epoch": 0.56393386593754, "grad_norm": 0.5133274793624878, "learning_rate": 8.004774811880333e-06, "loss": 0.3507, "step": 25300 }, { "epoch": 0.5640453153181602, "grad_norm": 0.4188895523548126, "learning_rate": 8.001344016335054e-06, "loss": 0.2576, "step": 25305 }, { "epoch": 0.5641567646987802, "grad_norm": 0.520499587059021, "learning_rate": 7.997913465809575e-06, "loss": 0.3331, "step": 25310 }, { "epoch": 0.5642682140794002, "grad_norm": 0.8256027102470398, "learning_rate": 7.994483160724449e-06, "loss": 0.2773, "step": 25315 }, { "epoch": 0.5643796634600203, "grad_norm": 0.5647714734077454, "learning_rate": 7.991053101500204e-06, "loss": 0.3196, "step": 25320 }, { "epoch": 0.5644911128406404, "grad_norm": 0.5048403739929199, "learning_rate": 7.987623288557343e-06, "loss": 0.2535, "step": 25325 }, { "epoch": 0.5646025622212605, "grad_norm": 0.8115688562393188, "learning_rate": 7.984193722316338e-06, "loss": 0.2521, "step": 25330 }, { "epoch": 0.5647140116018805, "grad_norm": 0.49011000990867615, "learning_rate": 7.98076440319762e-06, "loss": 0.305, "step": 25335 }, { "epoch": 0.5648254609825006, "grad_norm": 0.7082293033599854, "learning_rate": 7.977335331621601e-06, "loss": 0.3085, "step": 25340 }, { "epoch": 0.5649369103631207, "grad_norm": 0.673119843006134, "learning_rate": 7.973906508008659e-06, "loss": 0.3354, "step": 25345 }, { "epoch": 0.5650483597437407, "grad_norm": 0.6760952472686768, "learning_rate": 7.970477932779137e-06, "loss": 0.4585, "step": 25350 }, { "epoch": 0.5651598091243608, "grad_norm": 1.0284398794174194, "learning_rate": 7.967049606353354e-06, "loss": 0.3098, "step": 25355 }, { "epoch": 0.5652712585049808, "grad_norm": 0.5097162127494812, "learning_rate": 7.963621529151601e-06, "loss": 0.3228, "step": 25360 }, { "epoch": 0.565382707885601, "grad_norm": 0.5417636632919312, "learning_rate": 7.96019370159413e-06, "loss": 0.258, "step": 25365 }, { "epoch": 0.565494157266221, "grad_norm": 0.47898048162460327, "learning_rate": 7.956766124101164e-06, "loss": 0.3204, "step": 25370 }, { "epoch": 0.565605606646841, "grad_norm": 0.5756115913391113, "learning_rate": 7.953338797092902e-06, "loss": 0.2885, "step": 25375 }, { "epoch": 0.5657170560274611, "grad_norm": 0.6981114745140076, "learning_rate": 7.949911720989503e-06, "loss": 0.2448, "step": 25380 }, { "epoch": 0.5658285054080812, "grad_norm": 0.805111289024353, "learning_rate": 7.946484896211106e-06, "loss": 0.2254, "step": 25385 }, { "epoch": 0.5659399547887013, "grad_norm": 0.5139040946960449, "learning_rate": 7.943058323177807e-06, "loss": 0.177, "step": 25390 }, { "epoch": 0.5660514041693213, "grad_norm": 0.5966136455535889, "learning_rate": 7.939632002309681e-06, "loss": 0.3185, "step": 25395 }, { "epoch": 0.5661628535499414, "grad_norm": 0.8083769679069519, "learning_rate": 7.936205934026769e-06, "loss": 0.4075, "step": 25400 }, { "epoch": 0.5662743029305615, "grad_norm": 0.45511701703071594, "learning_rate": 7.93278011874908e-06, "loss": 0.2769, "step": 25405 }, { "epoch": 0.5663857523111815, "grad_norm": 0.8094688653945923, "learning_rate": 7.929354556896592e-06, "loss": 0.3354, "step": 25410 }, { "epoch": 0.5664972016918016, "grad_norm": 0.5739903450012207, "learning_rate": 7.92592924888925e-06, "loss": 0.2835, "step": 25415 }, { "epoch": 0.5666086510724216, "grad_norm": 0.7249305248260498, "learning_rate": 7.922504195146975e-06, "loss": 0.3656, "step": 25420 }, { "epoch": 0.5667201004530418, "grad_norm": 0.7731884121894836, "learning_rate": 7.919079396089648e-06, "loss": 0.3511, "step": 25425 }, { "epoch": 0.5668315498336618, "grad_norm": 0.652795672416687, "learning_rate": 7.915654852137124e-06, "loss": 0.3149, "step": 25430 }, { "epoch": 0.5669429992142818, "grad_norm": 0.6516653895378113, "learning_rate": 7.91223056370923e-06, "loss": 0.3051, "step": 25435 }, { "epoch": 0.567054448594902, "grad_norm": 0.5057012438774109, "learning_rate": 7.908806531225753e-06, "loss": 0.2262, "step": 25440 }, { "epoch": 0.567165897975522, "grad_norm": 0.513541579246521, "learning_rate": 7.905382755106452e-06, "loss": 0.286, "step": 25445 }, { "epoch": 0.5672773473561421, "grad_norm": 0.5020678639411926, "learning_rate": 7.90195923577106e-06, "loss": 0.2685, "step": 25450 }, { "epoch": 0.5673887967367621, "grad_norm": 0.5548171997070312, "learning_rate": 7.898535973639272e-06, "loss": 0.3232, "step": 25455 }, { "epoch": 0.5675002461173823, "grad_norm": 0.4107208251953125, "learning_rate": 7.895112969130745e-06, "loss": 0.238, "step": 25460 }, { "epoch": 0.5676116954980023, "grad_norm": 0.6904474496841431, "learning_rate": 7.891690222665126e-06, "loss": 0.2172, "step": 25465 }, { "epoch": 0.5677231448786223, "grad_norm": 0.3023183345794678, "learning_rate": 7.888267734662013e-06, "loss": 0.3178, "step": 25470 }, { "epoch": 0.5678345942592424, "grad_norm": 0.6031567454338074, "learning_rate": 7.884845505540975e-06, "loss": 0.2792, "step": 25475 }, { "epoch": 0.5679460436398625, "grad_norm": 0.5487951636314392, "learning_rate": 7.881423535721553e-06, "loss": 0.3045, "step": 25480 }, { "epoch": 0.5680574930204826, "grad_norm": 0.6568580865859985, "learning_rate": 7.87800182562325e-06, "loss": 0.2478, "step": 25485 }, { "epoch": 0.5681689424011026, "grad_norm": 0.6862227916717529, "learning_rate": 7.874580375665546e-06, "loss": 0.3681, "step": 25490 }, { "epoch": 0.5682803917817226, "grad_norm": 0.18388068675994873, "learning_rate": 7.871159186267881e-06, "loss": 0.2117, "step": 25495 }, { "epoch": 0.5683918411623428, "grad_norm": 0.4341485798358917, "learning_rate": 7.867738257849665e-06, "loss": 0.3259, "step": 25500 }, { "epoch": 0.5685032905429628, "grad_norm": 0.7467966079711914, "learning_rate": 7.864317590830284e-06, "loss": 0.4328, "step": 25505 }, { "epoch": 0.5686147399235829, "grad_norm": 0.867165744304657, "learning_rate": 7.860897185629078e-06, "loss": 0.283, "step": 25510 }, { "epoch": 0.5687261893042029, "grad_norm": 0.675286591053009, "learning_rate": 7.857477042665369e-06, "loss": 0.2941, "step": 25515 }, { "epoch": 0.568837638684823, "grad_norm": 0.8349703550338745, "learning_rate": 7.854057162358436e-06, "loss": 0.2432, "step": 25520 }, { "epoch": 0.5689490880654431, "grad_norm": 0.5063271522521973, "learning_rate": 7.85063754512753e-06, "loss": 0.3295, "step": 25525 }, { "epoch": 0.5690605374460631, "grad_norm": 0.4811290204524994, "learning_rate": 7.847218191391873e-06, "loss": 0.2728, "step": 25530 }, { "epoch": 0.5691719868266832, "grad_norm": 0.5109490752220154, "learning_rate": 7.843799101570645e-06, "loss": 0.4188, "step": 25535 }, { "epoch": 0.5692834362073033, "grad_norm": 0.4581867754459381, "learning_rate": 7.840380276083002e-06, "loss": 0.3957, "step": 25540 }, { "epoch": 0.5693948855879234, "grad_norm": 0.507814347743988, "learning_rate": 7.836961715348072e-06, "loss": 0.2872, "step": 25545 }, { "epoch": 0.5695063349685434, "grad_norm": 0.5380224585533142, "learning_rate": 7.833543419784941e-06, "loss": 0.2411, "step": 25550 }, { "epoch": 0.5696177843491634, "grad_norm": 0.7744241952896118, "learning_rate": 7.83012538981266e-06, "loss": 0.3722, "step": 25555 }, { "epoch": 0.5697292337297836, "grad_norm": 0.4554356634616852, "learning_rate": 7.826707625850261e-06, "loss": 0.2234, "step": 25560 }, { "epoch": 0.5698406831104036, "grad_norm": 0.6055052876472473, "learning_rate": 7.82329012831673e-06, "loss": 0.3317, "step": 25565 }, { "epoch": 0.5699521324910237, "grad_norm": 0.5006867051124573, "learning_rate": 7.819872897631024e-06, "loss": 0.2603, "step": 25570 }, { "epoch": 0.5700635818716437, "grad_norm": 0.6414822936058044, "learning_rate": 7.816455934212078e-06, "loss": 0.3273, "step": 25575 }, { "epoch": 0.5701750312522638, "grad_norm": 0.4626764953136444, "learning_rate": 7.813039238478778e-06, "loss": 0.2381, "step": 25580 }, { "epoch": 0.5702864806328839, "grad_norm": 0.7485530376434326, "learning_rate": 7.809622810849986e-06, "loss": 0.2817, "step": 25585 }, { "epoch": 0.5703979300135039, "grad_norm": 0.6687384247779846, "learning_rate": 7.806206651744534e-06, "loss": 0.4064, "step": 25590 }, { "epoch": 0.570509379394124, "grad_norm": 0.6886327862739563, "learning_rate": 7.80279076158121e-06, "loss": 0.3425, "step": 25595 }, { "epoch": 0.5706208287747441, "grad_norm": 0.47214236855506897, "learning_rate": 7.799375140778778e-06, "loss": 0.2447, "step": 25600 }, { "epoch": 0.5707322781553642, "grad_norm": 0.9890148639678955, "learning_rate": 7.795959789755967e-06, "loss": 0.2846, "step": 25605 }, { "epoch": 0.5708437275359842, "grad_norm": 0.6087769269943237, "learning_rate": 7.792544708931475e-06, "loss": 0.3436, "step": 25610 }, { "epoch": 0.5709551769166042, "grad_norm": 0.7241442799568176, "learning_rate": 7.789129898723962e-06, "loss": 0.3694, "step": 25615 }, { "epoch": 0.5710666262972244, "grad_norm": 0.6570469737052917, "learning_rate": 7.78571535955206e-06, "loss": 0.3063, "step": 25620 }, { "epoch": 0.5711780756778444, "grad_norm": 0.561510443687439, "learning_rate": 7.782301091834363e-06, "loss": 0.286, "step": 25625 }, { "epoch": 0.5712895250584645, "grad_norm": 0.6006831526756287, "learning_rate": 7.778887095989433e-06, "loss": 0.4609, "step": 25630 }, { "epoch": 0.5714009744390846, "grad_norm": 0.4968348443508148, "learning_rate": 7.775473372435803e-06, "loss": 0.2656, "step": 25635 }, { "epoch": 0.5715124238197046, "grad_norm": 0.5182333588600159, "learning_rate": 7.77205992159196e-06, "loss": 0.2532, "step": 25640 }, { "epoch": 0.5716238732003247, "grad_norm": 0.6506955623626709, "learning_rate": 7.768646743876379e-06, "loss": 0.2729, "step": 25645 }, { "epoch": 0.5717353225809447, "grad_norm": 0.5487266778945923, "learning_rate": 7.765233839707483e-06, "loss": 0.234, "step": 25650 }, { "epoch": 0.5718467719615649, "grad_norm": 0.564389169216156, "learning_rate": 7.761821209503669e-06, "loss": 0.1993, "step": 25655 }, { "epoch": 0.5719582213421849, "grad_norm": 0.5629382729530334, "learning_rate": 7.758408853683296e-06, "loss": 0.3271, "step": 25660 }, { "epoch": 0.572069670722805, "grad_norm": 0.5799145698547363, "learning_rate": 7.7549967726647e-06, "loss": 0.2574, "step": 25665 }, { "epoch": 0.572181120103425, "grad_norm": 0.3119282126426697, "learning_rate": 7.75158496686617e-06, "loss": 0.1811, "step": 25670 }, { "epoch": 0.572292569484045, "grad_norm": 0.7346594333648682, "learning_rate": 7.748173436705962e-06, "loss": 0.3085, "step": 25675 }, { "epoch": 0.5724040188646652, "grad_norm": 0.6520366072654724, "learning_rate": 7.744762182602313e-06, "loss": 0.2386, "step": 25680 }, { "epoch": 0.5725154682452852, "grad_norm": 0.5619764924049377, "learning_rate": 7.741351204973414e-06, "loss": 0.4687, "step": 25685 }, { "epoch": 0.5726269176259053, "grad_norm": 1.1763639450073242, "learning_rate": 7.737940504237421e-06, "loss": 0.1898, "step": 25690 }, { "epoch": 0.5727383670065254, "grad_norm": 0.7920293807983398, "learning_rate": 7.734530080812463e-06, "loss": 0.1965, "step": 25695 }, { "epoch": 0.5728498163871454, "grad_norm": 0.557349681854248, "learning_rate": 7.731119935116632e-06, "loss": 0.3269, "step": 25700 }, { "epoch": 0.5729612657677655, "grad_norm": 0.7871140241622925, "learning_rate": 7.727710067567982e-06, "loss": 0.2021, "step": 25705 }, { "epoch": 0.5730727151483855, "grad_norm": 0.7102549076080322, "learning_rate": 7.724300478584535e-06, "loss": 0.3496, "step": 25710 }, { "epoch": 0.5731841645290057, "grad_norm": 0.9377176761627197, "learning_rate": 7.720891168584288e-06, "loss": 0.3166, "step": 25715 }, { "epoch": 0.5732956139096257, "grad_norm": 0.7283439636230469, "learning_rate": 7.717482137985193e-06, "loss": 0.4105, "step": 25720 }, { "epoch": 0.5734070632902457, "grad_norm": 0.668796956539154, "learning_rate": 7.714073387205165e-06, "loss": 0.245, "step": 25725 }, { "epoch": 0.5735185126708658, "grad_norm": 0.39624908566474915, "learning_rate": 7.7106649166621e-06, "loss": 0.2321, "step": 25730 }, { "epoch": 0.5736299620514859, "grad_norm": 0.5537715554237366, "learning_rate": 7.707256726773841e-06, "loss": 0.3214, "step": 25735 }, { "epoch": 0.573741411432106, "grad_norm": 0.6404997110366821, "learning_rate": 7.703848817958213e-06, "loss": 0.3117, "step": 25740 }, { "epoch": 0.573852860812726, "grad_norm": 0.6337887048721313, "learning_rate": 7.700441190632992e-06, "loss": 0.3031, "step": 25745 }, { "epoch": 0.5739643101933461, "grad_norm": 0.667795717716217, "learning_rate": 7.697033845215936e-06, "loss": 0.3622, "step": 25750 }, { "epoch": 0.5740757595739662, "grad_norm": 0.44926580786705017, "learning_rate": 7.693626782124751e-06, "loss": 0.3497, "step": 25755 }, { "epoch": 0.5741872089545862, "grad_norm": 0.6890833377838135, "learning_rate": 7.690220001777124e-06, "loss": 0.2375, "step": 25760 }, { "epoch": 0.5742986583352063, "grad_norm": 0.5325068235397339, "learning_rate": 7.686813504590693e-06, "loss": 0.3192, "step": 25765 }, { "epoch": 0.5744101077158263, "grad_norm": 0.6580883860588074, "learning_rate": 7.68340729098307e-06, "loss": 0.3099, "step": 25770 }, { "epoch": 0.5745215570964465, "grad_norm": 0.5879032015800476, "learning_rate": 7.680001361371837e-06, "loss": 0.2587, "step": 25775 }, { "epoch": 0.5746330064770665, "grad_norm": 0.48277339339256287, "learning_rate": 7.676595716174522e-06, "loss": 0.2754, "step": 25780 }, { "epoch": 0.5747444558576865, "grad_norm": 0.7396294474601746, "learning_rate": 7.673190355808643e-06, "loss": 0.3878, "step": 25785 }, { "epoch": 0.5748559052383067, "grad_norm": 0.6438273191452026, "learning_rate": 7.669785280691667e-06, "loss": 0.3425, "step": 25790 }, { "epoch": 0.5749673546189267, "grad_norm": 0.5543471574783325, "learning_rate": 7.666380491241027e-06, "loss": 0.2511, "step": 25795 }, { "epoch": 0.5750788039995468, "grad_norm": 0.4841437339782715, "learning_rate": 7.662975987874127e-06, "loss": 0.3244, "step": 25800 }, { "epoch": 0.5751902533801668, "grad_norm": 0.786459743976593, "learning_rate": 7.659571771008333e-06, "loss": 0.3308, "step": 25805 }, { "epoch": 0.575301702760787, "grad_norm": 0.700810968875885, "learning_rate": 7.656167841060975e-06, "loss": 0.3623, "step": 25810 }, { "epoch": 0.575413152141407, "grad_norm": 0.6509367227554321, "learning_rate": 7.652764198449342e-06, "loss": 0.3097, "step": 25815 }, { "epoch": 0.575524601522027, "grad_norm": 0.5181035995483398, "learning_rate": 7.649360843590704e-06, "loss": 0.2473, "step": 25820 }, { "epoch": 0.5756360509026471, "grad_norm": 0.8579918146133423, "learning_rate": 7.645957776902284e-06, "loss": 0.3062, "step": 25825 }, { "epoch": 0.5757475002832672, "grad_norm": 0.5640930533409119, "learning_rate": 7.642554998801268e-06, "loss": 0.3244, "step": 25830 }, { "epoch": 0.5758589496638873, "grad_norm": 0.6464223861694336, "learning_rate": 7.639152509704815e-06, "loss": 0.3029, "step": 25835 }, { "epoch": 0.5759703990445073, "grad_norm": 0.6057314276695251, "learning_rate": 7.635750310030041e-06, "loss": 0.2792, "step": 25840 }, { "epoch": 0.5760818484251273, "grad_norm": 0.4716472029685974, "learning_rate": 7.632348400194027e-06, "loss": 0.3339, "step": 25845 }, { "epoch": 0.5761932978057475, "grad_norm": 0.5490631461143494, "learning_rate": 7.6289467806138225e-06, "loss": 0.2401, "step": 25850 }, { "epoch": 0.5763047471863675, "grad_norm": 0.4313923418521881, "learning_rate": 7.6255454517064466e-06, "loss": 0.209, "step": 25855 }, { "epoch": 0.5764161965669876, "grad_norm": 0.6104652285575867, "learning_rate": 7.622144413888868e-06, "loss": 0.3952, "step": 25860 }, { "epoch": 0.5765276459476076, "grad_norm": 0.5978262424468994, "learning_rate": 7.618743667578029e-06, "loss": 0.2015, "step": 25865 }, { "epoch": 0.5766390953282278, "grad_norm": 0.38337233662605286, "learning_rate": 7.615343213190838e-06, "loss": 0.2155, "step": 25870 }, { "epoch": 0.5767505447088478, "grad_norm": 0.6329131126403809, "learning_rate": 7.611943051144159e-06, "loss": 0.3101, "step": 25875 }, { "epoch": 0.5768619940894678, "grad_norm": 0.6377989649772644, "learning_rate": 7.608543181854832e-06, "loss": 0.3393, "step": 25880 }, { "epoch": 0.5769734434700879, "grad_norm": 0.6451283693313599, "learning_rate": 7.605143605739648e-06, "loss": 0.2839, "step": 25885 }, { "epoch": 0.577084892850708, "grad_norm": 0.571865975856781, "learning_rate": 7.601744323215376e-06, "loss": 0.2775, "step": 25890 }, { "epoch": 0.5771963422313281, "grad_norm": 0.45400145649909973, "learning_rate": 7.598345334698737e-06, "loss": 0.248, "step": 25895 }, { "epoch": 0.5773077916119481, "grad_norm": 0.36165305972099304, "learning_rate": 7.594946640606423e-06, "loss": 0.33, "step": 25900 }, { "epoch": 0.5774192409925681, "grad_norm": 0.5473405122756958, "learning_rate": 7.591548241355085e-06, "loss": 0.3083, "step": 25905 }, { "epoch": 0.5775306903731883, "grad_norm": 0.5231678485870361, "learning_rate": 7.588150137361345e-06, "loss": 0.3748, "step": 25910 }, { "epoch": 0.5776421397538083, "grad_norm": 0.9739239811897278, "learning_rate": 7.58475232904178e-06, "loss": 0.3063, "step": 25915 }, { "epoch": 0.5777535891344284, "grad_norm": 0.7371828556060791, "learning_rate": 7.5813548168129335e-06, "loss": 0.3693, "step": 25920 }, { "epoch": 0.5778650385150484, "grad_norm": 0.6414753198623657, "learning_rate": 7.577957601091319e-06, "loss": 0.3757, "step": 25925 }, { "epoch": 0.5779764878956685, "grad_norm": 0.7251477241516113, "learning_rate": 7.57456068229341e-06, "loss": 0.4134, "step": 25930 }, { "epoch": 0.5780879372762886, "grad_norm": 0.3665677011013031, "learning_rate": 7.5711640608356405e-06, "loss": 0.2698, "step": 25935 }, { "epoch": 0.5781993866569086, "grad_norm": 0.5627968907356262, "learning_rate": 7.567767737134406e-06, "loss": 0.2704, "step": 25940 }, { "epoch": 0.5783108360375288, "grad_norm": 0.5136057734489441, "learning_rate": 7.564371711606075e-06, "loss": 0.3786, "step": 25945 }, { "epoch": 0.5784222854181488, "grad_norm": 0.6626278758049011, "learning_rate": 7.560975984666971e-06, "loss": 0.3273, "step": 25950 }, { "epoch": 0.5785337347987689, "grad_norm": 0.25327253341674805, "learning_rate": 7.557580556733384e-06, "loss": 0.2196, "step": 25955 }, { "epoch": 0.5786451841793889, "grad_norm": 0.7648833990097046, "learning_rate": 7.554185428221569e-06, "loss": 0.2873, "step": 25960 }, { "epoch": 0.578756633560009, "grad_norm": 0.5665774941444397, "learning_rate": 7.550790599547742e-06, "loss": 0.2172, "step": 25965 }, { "epoch": 0.5788680829406291, "grad_norm": 0.5095011591911316, "learning_rate": 7.547396071128082e-06, "loss": 0.2808, "step": 25970 }, { "epoch": 0.5789795323212491, "grad_norm": 0.7390519976615906, "learning_rate": 7.544001843378734e-06, "loss": 0.3288, "step": 25975 }, { "epoch": 0.5790909817018692, "grad_norm": 0.4616258442401886, "learning_rate": 7.540607916715803e-06, "loss": 0.1979, "step": 25980 }, { "epoch": 0.5792024310824893, "grad_norm": 0.7574527859687805, "learning_rate": 7.537214291555356e-06, "loss": 0.2186, "step": 25985 }, { "epoch": 0.5793138804631093, "grad_norm": 0.6136702299118042, "learning_rate": 7.533820968313425e-06, "loss": 0.2186, "step": 25990 }, { "epoch": 0.5794253298437294, "grad_norm": 0.5772611498832703, "learning_rate": 7.530427947406011e-06, "loss": 0.334, "step": 25995 }, { "epoch": 0.5795367792243494, "grad_norm": 0.4464194178581238, "learning_rate": 7.527035229249066e-06, "loss": 0.2133, "step": 26000 }, { "epoch": 0.5796482286049696, "grad_norm": 0.473961740732193, "learning_rate": 7.523642814258516e-06, "loss": 0.1995, "step": 26005 }, { "epoch": 0.5797596779855896, "grad_norm": 0.6521080136299133, "learning_rate": 7.520250702850242e-06, "loss": 0.2104, "step": 26010 }, { "epoch": 0.5798711273662097, "grad_norm": 0.5367646217346191, "learning_rate": 7.51685889544009e-06, "loss": 0.2848, "step": 26015 }, { "epoch": 0.5799825767468297, "grad_norm": 0.5130108594894409, "learning_rate": 7.513467392443872e-06, "loss": 0.3441, "step": 26020 }, { "epoch": 0.5800940261274498, "grad_norm": 0.5340881943702698, "learning_rate": 7.5100761942773585e-06, "loss": 0.3413, "step": 26025 }, { "epoch": 0.5802054755080699, "grad_norm": 0.7103127241134644, "learning_rate": 7.50668530135628e-06, "loss": 0.3688, "step": 26030 }, { "epoch": 0.5803169248886899, "grad_norm": 0.6038323640823364, "learning_rate": 7.503294714096341e-06, "loss": 0.3647, "step": 26035 }, { "epoch": 0.58042837426931, "grad_norm": 0.533886194229126, "learning_rate": 7.4999044329132e-06, "loss": 0.426, "step": 26040 }, { "epoch": 0.5805398236499301, "grad_norm": 0.48127058148384094, "learning_rate": 7.496514458222475e-06, "loss": 0.2537, "step": 26045 }, { "epoch": 0.5806512730305501, "grad_norm": 0.5195320248603821, "learning_rate": 7.4931247904397564e-06, "loss": 0.2595, "step": 26050 }, { "epoch": 0.5807627224111702, "grad_norm": 0.9014118313789368, "learning_rate": 7.489735429980589e-06, "loss": 0.348, "step": 26055 }, { "epoch": 0.5808741717917902, "grad_norm": 0.7020335793495178, "learning_rate": 7.4863463772604785e-06, "loss": 0.2999, "step": 26060 }, { "epoch": 0.5809856211724104, "grad_norm": 0.7003258466720581, "learning_rate": 7.482957632694898e-06, "loss": 0.2775, "step": 26065 }, { "epoch": 0.5810970705530304, "grad_norm": 0.5625821948051453, "learning_rate": 7.479569196699287e-06, "loss": 0.2492, "step": 26070 }, { "epoch": 0.5812085199336504, "grad_norm": 0.6482052803039551, "learning_rate": 7.476181069689038e-06, "loss": 0.2566, "step": 26075 }, { "epoch": 0.5813199693142705, "grad_norm": 0.6843783855438232, "learning_rate": 7.472793252079506e-06, "loss": 0.3194, "step": 26080 }, { "epoch": 0.5814314186948906, "grad_norm": 0.8143817782402039, "learning_rate": 7.469405744286018e-06, "loss": 0.3681, "step": 26085 }, { "epoch": 0.5815428680755107, "grad_norm": 0.5542212724685669, "learning_rate": 7.46601854672385e-06, "loss": 0.207, "step": 26090 }, { "epoch": 0.5816543174561307, "grad_norm": 0.4516535699367523, "learning_rate": 7.462631659808251e-06, "loss": 0.3429, "step": 26095 }, { "epoch": 0.5817657668367509, "grad_norm": 0.4697490930557251, "learning_rate": 7.45924508395442e-06, "loss": 0.2176, "step": 26100 }, { "epoch": 0.5818772162173709, "grad_norm": 0.6717570424079895, "learning_rate": 7.455858819577535e-06, "loss": 0.3194, "step": 26105 }, { "epoch": 0.5819886655979909, "grad_norm": 0.8826180100440979, "learning_rate": 7.452472867092718e-06, "loss": 0.3914, "step": 26110 }, { "epoch": 0.582100114978611, "grad_norm": 0.44467124342918396, "learning_rate": 7.449087226915066e-06, "loss": 0.2664, "step": 26115 }, { "epoch": 0.582211564359231, "grad_norm": 0.5231371521949768, "learning_rate": 7.4457018994596305e-06, "loss": 0.3211, "step": 26120 }, { "epoch": 0.5823230137398512, "grad_norm": 0.50714111328125, "learning_rate": 7.442316885141423e-06, "loss": 0.2728, "step": 26125 }, { "epoch": 0.5824344631204712, "grad_norm": 0.752540111541748, "learning_rate": 7.438932184375427e-06, "loss": 0.3526, "step": 26130 }, { "epoch": 0.5825459125010912, "grad_norm": 0.8013758063316345, "learning_rate": 7.435547797576571e-06, "loss": 0.278, "step": 26135 }, { "epoch": 0.5826573618817114, "grad_norm": 0.7866904735565186, "learning_rate": 7.4321637251597625e-06, "loss": 0.2892, "step": 26140 }, { "epoch": 0.5827688112623314, "grad_norm": 0.5477409362792969, "learning_rate": 7.428779967539863e-06, "loss": 0.2566, "step": 26145 }, { "epoch": 0.5828802606429515, "grad_norm": 0.5327290892601013, "learning_rate": 7.425396525131694e-06, "loss": 0.3314, "step": 26150 }, { "epoch": 0.5829917100235715, "grad_norm": 0.8647708892822266, "learning_rate": 7.422013398350035e-06, "loss": 0.2462, "step": 26155 }, { "epoch": 0.5831031594041917, "grad_norm": 0.7855604290962219, "learning_rate": 7.418630587609636e-06, "loss": 0.4267, "step": 26160 }, { "epoch": 0.5832146087848117, "grad_norm": 0.6840621829032898, "learning_rate": 7.415248093325203e-06, "loss": 0.22, "step": 26165 }, { "epoch": 0.5833260581654317, "grad_norm": 0.7037733197212219, "learning_rate": 7.411865915911397e-06, "loss": 0.3764, "step": 26170 }, { "epoch": 0.5834375075460518, "grad_norm": 0.5002277493476868, "learning_rate": 7.408484055782854e-06, "loss": 0.1966, "step": 26175 }, { "epoch": 0.5835489569266719, "grad_norm": 0.6145541667938232, "learning_rate": 7.405102513354166e-06, "loss": 0.3244, "step": 26180 }, { "epoch": 0.583660406307292, "grad_norm": 0.515566349029541, "learning_rate": 7.4017212890398786e-06, "loss": 0.343, "step": 26185 }, { "epoch": 0.583771855687912, "grad_norm": 0.5336402654647827, "learning_rate": 7.398340383254507e-06, "loss": 0.258, "step": 26190 }, { "epoch": 0.583883305068532, "grad_norm": 0.5561584830284119, "learning_rate": 7.394959796412522e-06, "loss": 0.285, "step": 26195 }, { "epoch": 0.5839947544491522, "grad_norm": 0.8551725149154663, "learning_rate": 7.3915795289283565e-06, "loss": 0.2469, "step": 26200 }, { "epoch": 0.5841062038297722, "grad_norm": 1.5717085599899292, "learning_rate": 7.388199581216404e-06, "loss": 0.3854, "step": 26205 }, { "epoch": 0.5842176532103923, "grad_norm": 0.5521072745323181, "learning_rate": 7.384819953691028e-06, "loss": 0.2584, "step": 26210 }, { "epoch": 0.5843291025910123, "grad_norm": 0.4260997474193573, "learning_rate": 7.38144064676654e-06, "loss": 0.3264, "step": 26215 }, { "epoch": 0.5844405519716325, "grad_norm": 0.7784475684165955, "learning_rate": 7.378061660857213e-06, "loss": 0.3542, "step": 26220 }, { "epoch": 0.5845520013522525, "grad_norm": 0.5338385701179504, "learning_rate": 7.374682996377292e-06, "loss": 0.3085, "step": 26225 }, { "epoch": 0.5846634507328725, "grad_norm": 0.5612271428108215, "learning_rate": 7.371304653740967e-06, "loss": 0.2138, "step": 26230 }, { "epoch": 0.5847749001134926, "grad_norm": 0.5885851979255676, "learning_rate": 7.367926633362405e-06, "loss": 0.272, "step": 26235 }, { "epoch": 0.5848863494941127, "grad_norm": 0.6166054606437683, "learning_rate": 7.364548935655717e-06, "loss": 0.2572, "step": 26240 }, { "epoch": 0.5849977988747328, "grad_norm": 0.4326198697090149, "learning_rate": 7.36117156103499e-06, "loss": 0.3473, "step": 26245 }, { "epoch": 0.5851092482553528, "grad_norm": 0.9689270853996277, "learning_rate": 7.35779450991426e-06, "loss": 0.3694, "step": 26250 }, { "epoch": 0.5852206976359728, "grad_norm": 0.6381075382232666, "learning_rate": 7.354417782707529e-06, "loss": 0.4034, "step": 26255 }, { "epoch": 0.585332147016593, "grad_norm": 0.8201387524604797, "learning_rate": 7.351041379828756e-06, "loss": 0.3866, "step": 26260 }, { "epoch": 0.585443596397213, "grad_norm": 0.39132338762283325, "learning_rate": 7.347665301691865e-06, "loss": 0.2545, "step": 26265 }, { "epoch": 0.5855550457778331, "grad_norm": 0.8254690766334534, "learning_rate": 7.344289548710734e-06, "loss": 0.2786, "step": 26270 }, { "epoch": 0.5856664951584531, "grad_norm": 0.697370707988739, "learning_rate": 7.340914121299203e-06, "loss": 0.2387, "step": 26275 }, { "epoch": 0.5857779445390732, "grad_norm": 0.5445848703384399, "learning_rate": 7.337539019871078e-06, "loss": 0.1327, "step": 26280 }, { "epoch": 0.5858893939196933, "grad_norm": 0.44236284494400024, "learning_rate": 7.334164244840118e-06, "loss": 0.2674, "step": 26285 }, { "epoch": 0.5860008433003133, "grad_norm": 0.6897943019866943, "learning_rate": 7.330789796620047e-06, "loss": 0.3231, "step": 26290 }, { "epoch": 0.5861122926809335, "grad_norm": 0.4481407105922699, "learning_rate": 7.327415675624541e-06, "loss": 0.2518, "step": 26295 }, { "epoch": 0.5862237420615535, "grad_norm": 0.6133534908294678, "learning_rate": 7.3240418822672454e-06, "loss": 0.2758, "step": 26300 }, { "epoch": 0.5863351914421736, "grad_norm": 0.6441121697425842, "learning_rate": 7.320668416961758e-06, "loss": 0.2207, "step": 26305 }, { "epoch": 0.5864466408227936, "grad_norm": 0.9060331583023071, "learning_rate": 7.317295280121639e-06, "loss": 0.3035, "step": 26310 }, { "epoch": 0.5865580902034137, "grad_norm": 0.3640364408493042, "learning_rate": 7.313922472160415e-06, "loss": 0.2682, "step": 26315 }, { "epoch": 0.5866695395840338, "grad_norm": 0.5085642337799072, "learning_rate": 7.310549993491565e-06, "loss": 0.3085, "step": 26320 }, { "epoch": 0.5867809889646538, "grad_norm": 0.6459940671920776, "learning_rate": 7.307177844528521e-06, "loss": 0.3039, "step": 26325 }, { "epoch": 0.5868924383452739, "grad_norm": 0.7363597750663757, "learning_rate": 7.303806025684692e-06, "loss": 0.3756, "step": 26330 }, { "epoch": 0.587003887725894, "grad_norm": 0.2980023920536041, "learning_rate": 7.300434537373433e-06, "loss": 0.2816, "step": 26335 }, { "epoch": 0.587115337106514, "grad_norm": 0.5422041416168213, "learning_rate": 7.29706338000806e-06, "loss": 0.3713, "step": 26340 }, { "epoch": 0.5872267864871341, "grad_norm": 0.585302472114563, "learning_rate": 7.29369255400185e-06, "loss": 0.3396, "step": 26345 }, { "epoch": 0.5873382358677541, "grad_norm": 0.7089430689811707, "learning_rate": 7.290322059768049e-06, "loss": 0.3635, "step": 26350 }, { "epoch": 0.5874496852483743, "grad_norm": 0.6561121344566345, "learning_rate": 7.286951897719845e-06, "loss": 0.2654, "step": 26355 }, { "epoch": 0.5875611346289943, "grad_norm": 0.43730154633522034, "learning_rate": 7.283582068270398e-06, "loss": 0.3302, "step": 26360 }, { "epoch": 0.5876725840096144, "grad_norm": 0.7563340663909912, "learning_rate": 7.280212571832824e-06, "loss": 0.2343, "step": 26365 }, { "epoch": 0.5877840333902344, "grad_norm": 0.9669647216796875, "learning_rate": 7.2768434088201924e-06, "loss": 0.3343, "step": 26370 }, { "epoch": 0.5878954827708545, "grad_norm": 0.5285956263542175, "learning_rate": 7.273474579645541e-06, "loss": 0.4108, "step": 26375 }, { "epoch": 0.5880069321514746, "grad_norm": 0.6726597547531128, "learning_rate": 7.270106084721856e-06, "loss": 0.4043, "step": 26380 }, { "epoch": 0.5881183815320946, "grad_norm": 0.5744706392288208, "learning_rate": 7.2667379244621e-06, "loss": 0.2229, "step": 26385 }, { "epoch": 0.5882298309127147, "grad_norm": 0.4486239552497864, "learning_rate": 7.263370099279173e-06, "loss": 0.2721, "step": 26390 }, { "epoch": 0.5883412802933348, "grad_norm": 0.6839447617530823, "learning_rate": 7.260002609585949e-06, "loss": 0.4493, "step": 26395 }, { "epoch": 0.5884527296739548, "grad_norm": 0.46326157450675964, "learning_rate": 7.256635455795256e-06, "loss": 0.2774, "step": 26400 }, { "epoch": 0.5885641790545749, "grad_norm": 0.49713730812072754, "learning_rate": 7.25326863831988e-06, "loss": 0.2504, "step": 26405 }, { "epoch": 0.5886756284351949, "grad_norm": 0.39275237917900085, "learning_rate": 7.24990215757257e-06, "loss": 0.2447, "step": 26410 }, { "epoch": 0.5887870778158151, "grad_norm": 0.5270885229110718, "learning_rate": 7.246536013966021e-06, "loss": 0.2935, "step": 26415 }, { "epoch": 0.5888985271964351, "grad_norm": 0.6464756727218628, "learning_rate": 7.243170207912909e-06, "loss": 0.327, "step": 26420 }, { "epoch": 0.5890099765770552, "grad_norm": 0.9072006344795227, "learning_rate": 7.239804739825849e-06, "loss": 0.3532, "step": 26425 }, { "epoch": 0.5891214259576752, "grad_norm": 0.5739597678184509, "learning_rate": 7.2364396101174235e-06, "loss": 0.3507, "step": 26430 }, { "epoch": 0.5892328753382953, "grad_norm": 0.820540189743042, "learning_rate": 7.233074819200169e-06, "loss": 0.2704, "step": 26435 }, { "epoch": 0.5893443247189154, "grad_norm": 0.6239163279533386, "learning_rate": 7.229710367486585e-06, "loss": 0.2627, "step": 26440 }, { "epoch": 0.5894557740995354, "grad_norm": 0.6565598249435425, "learning_rate": 7.226346255389125e-06, "loss": 0.2404, "step": 26445 }, { "epoch": 0.5895672234801556, "grad_norm": 0.7946537137031555, "learning_rate": 7.222982483320204e-06, "loss": 0.2855, "step": 26450 }, { "epoch": 0.5896786728607756, "grad_norm": 0.85628741979599, "learning_rate": 7.219619051692198e-06, "loss": 0.2938, "step": 26455 }, { "epoch": 0.5897901222413956, "grad_norm": 0.9541513323783875, "learning_rate": 7.216255960917435e-06, "loss": 0.3255, "step": 26460 }, { "epoch": 0.5899015716220157, "grad_norm": 0.6425460577011108, "learning_rate": 7.212893211408203e-06, "loss": 0.3304, "step": 26465 }, { "epoch": 0.5900130210026358, "grad_norm": 0.6187822818756104, "learning_rate": 7.209530803576752e-06, "loss": 0.2497, "step": 26470 }, { "epoch": 0.5901244703832559, "grad_norm": 0.6000394225120544, "learning_rate": 7.206168737835284e-06, "loss": 0.3631, "step": 26475 }, { "epoch": 0.5902359197638759, "grad_norm": 0.7396858930587769, "learning_rate": 7.202807014595962e-06, "loss": 0.2548, "step": 26480 }, { "epoch": 0.5903473691444959, "grad_norm": 0.8461358547210693, "learning_rate": 7.199445634270908e-06, "loss": 0.1718, "step": 26485 }, { "epoch": 0.5904588185251161, "grad_norm": 0.44261434674263, "learning_rate": 7.196084597272206e-06, "loss": 0.2134, "step": 26490 }, { "epoch": 0.5905702679057361, "grad_norm": 0.597154438495636, "learning_rate": 7.1927239040118865e-06, "loss": 0.2971, "step": 26495 }, { "epoch": 0.5906817172863562, "grad_norm": 0.5538870692253113, "learning_rate": 7.189363554901951e-06, "loss": 0.2702, "step": 26500 }, { "epoch": 0.5907931666669762, "grad_norm": 0.6188107132911682, "learning_rate": 7.186003550354348e-06, "loss": 0.3845, "step": 26505 }, { "epoch": 0.5909046160475964, "grad_norm": 0.7189980149269104, "learning_rate": 7.182643890780987e-06, "loss": 0.2398, "step": 26510 }, { "epoch": 0.5910160654282164, "grad_norm": 0.4853551387786865, "learning_rate": 7.179284576593741e-06, "loss": 0.3118, "step": 26515 }, { "epoch": 0.5911275148088364, "grad_norm": 0.6885067820549011, "learning_rate": 7.175925608204428e-06, "loss": 0.1962, "step": 26520 }, { "epoch": 0.5912389641894565, "grad_norm": 0.7876604795455933, "learning_rate": 7.172566986024843e-06, "loss": 0.3829, "step": 26525 }, { "epoch": 0.5913504135700766, "grad_norm": 0.460191011428833, "learning_rate": 7.169208710466717e-06, "loss": 0.2921, "step": 26530 }, { "epoch": 0.5914618629506967, "grad_norm": 0.7385380864143372, "learning_rate": 7.165850781941757e-06, "loss": 0.3389, "step": 26535 }, { "epoch": 0.5915733123313167, "grad_norm": 0.7774286270141602, "learning_rate": 7.162493200861611e-06, "loss": 0.2618, "step": 26540 }, { "epoch": 0.5916847617119367, "grad_norm": 0.4776510000228882, "learning_rate": 7.159135967637898e-06, "loss": 0.2776, "step": 26545 }, { "epoch": 0.5917962110925569, "grad_norm": 0.42568615078926086, "learning_rate": 7.155779082682188e-06, "loss": 0.3234, "step": 26550 }, { "epoch": 0.5919076604731769, "grad_norm": 0.620646595954895, "learning_rate": 7.152422546406007e-06, "loss": 0.3686, "step": 26555 }, { "epoch": 0.592019109853797, "grad_norm": 0.516928493976593, "learning_rate": 7.14906635922084e-06, "loss": 0.3181, "step": 26560 }, { "epoch": 0.592130559234417, "grad_norm": 0.6994913220405579, "learning_rate": 7.145710521538134e-06, "loss": 0.4317, "step": 26565 }, { "epoch": 0.5922420086150372, "grad_norm": 0.8918303847312927, "learning_rate": 7.142355033769286e-06, "loss": 0.215, "step": 26570 }, { "epoch": 0.5923534579956572, "grad_norm": 0.8608641028404236, "learning_rate": 7.138999896325654e-06, "loss": 0.3661, "step": 26575 }, { "epoch": 0.5924649073762772, "grad_norm": 0.5170380473136902, "learning_rate": 7.135645109618552e-06, "loss": 0.2329, "step": 26580 }, { "epoch": 0.5925763567568973, "grad_norm": 0.5056709051132202, "learning_rate": 7.1322906740592476e-06, "loss": 0.3267, "step": 26585 }, { "epoch": 0.5926878061375174, "grad_norm": 0.7833072543144226, "learning_rate": 7.128936590058973e-06, "loss": 0.2842, "step": 26590 }, { "epoch": 0.5927992555181375, "grad_norm": 0.5779724717140198, "learning_rate": 7.125582858028908e-06, "loss": 0.2362, "step": 26595 }, { "epoch": 0.5929107048987575, "grad_norm": 0.731823742389679, "learning_rate": 7.1222294783802015e-06, "loss": 0.3527, "step": 26600 }, { "epoch": 0.5930221542793775, "grad_norm": 0.5753491520881653, "learning_rate": 7.118876451523946e-06, "loss": 0.3011, "step": 26605 }, { "epoch": 0.5931336036599977, "grad_norm": 0.4684869050979614, "learning_rate": 7.1155237778712006e-06, "loss": 0.2555, "step": 26610 }, { "epoch": 0.5932450530406177, "grad_norm": 0.5428973436355591, "learning_rate": 7.112171457832973e-06, "loss": 0.282, "step": 26615 }, { "epoch": 0.5933565024212378, "grad_norm": 0.6345245242118835, "learning_rate": 7.1088194918202355e-06, "loss": 0.2939, "step": 26620 }, { "epoch": 0.5934679518018579, "grad_norm": 0.8436496257781982, "learning_rate": 7.105467880243913e-06, "loss": 0.3502, "step": 26625 }, { "epoch": 0.593579401182478, "grad_norm": 0.5694996118545532, "learning_rate": 7.102116623514881e-06, "loss": 0.2252, "step": 26630 }, { "epoch": 0.593690850563098, "grad_norm": 1.0282306671142578, "learning_rate": 7.098765722043985e-06, "loss": 0.3636, "step": 26635 }, { "epoch": 0.593802299943718, "grad_norm": 0.7384717464447021, "learning_rate": 7.09541517624202e-06, "loss": 0.2732, "step": 26640 }, { "epoch": 0.5939137493243382, "grad_norm": 0.7744261622428894, "learning_rate": 7.0920649865197336e-06, "loss": 0.4055, "step": 26645 }, { "epoch": 0.5940251987049582, "grad_norm": 0.7789164781570435, "learning_rate": 7.088715153287833e-06, "loss": 0.3471, "step": 26650 }, { "epoch": 0.5941366480855783, "grad_norm": 0.7392175793647766, "learning_rate": 7.085365676956983e-06, "loss": 0.4146, "step": 26655 }, { "epoch": 0.5942480974661983, "grad_norm": 0.43524813652038574, "learning_rate": 7.0820165579378035e-06, "loss": 0.3001, "step": 26660 }, { "epoch": 0.5943595468468184, "grad_norm": 0.5696089863777161, "learning_rate": 7.078667796640868e-06, "loss": 0.2282, "step": 26665 }, { "epoch": 0.5944709962274385, "grad_norm": 0.5634101033210754, "learning_rate": 7.075319393476716e-06, "loss": 0.2777, "step": 26670 }, { "epoch": 0.5945824456080585, "grad_norm": 0.5696731805801392, "learning_rate": 7.07197134885583e-06, "loss": 0.3796, "step": 26675 }, { "epoch": 0.5946938949886786, "grad_norm": 0.5457412600517273, "learning_rate": 7.068623663188654e-06, "loss": 0.2276, "step": 26680 }, { "epoch": 0.5948053443692987, "grad_norm": 0.792374849319458, "learning_rate": 7.065276336885594e-06, "loss": 0.3329, "step": 26685 }, { "epoch": 0.5949167937499187, "grad_norm": 0.6463175415992737, "learning_rate": 7.061929370357001e-06, "loss": 0.2245, "step": 26690 }, { "epoch": 0.5950282431305388, "grad_norm": 0.578173041343689, "learning_rate": 7.058582764013187e-06, "loss": 0.3041, "step": 26695 }, { "epoch": 0.5951396925111588, "grad_norm": 0.6091515421867371, "learning_rate": 7.055236518264419e-06, "loss": 0.2944, "step": 26700 }, { "epoch": 0.595251141891779, "grad_norm": 0.7556412816047668, "learning_rate": 7.051890633520928e-06, "loss": 0.3857, "step": 26705 }, { "epoch": 0.595362591272399, "grad_norm": 0.3958643674850464, "learning_rate": 7.048545110192888e-06, "loss": 0.2731, "step": 26710 }, { "epoch": 0.5954740406530191, "grad_norm": 0.23419268429279327, "learning_rate": 7.045199948690438e-06, "loss": 0.2714, "step": 26715 }, { "epoch": 0.5955854900336391, "grad_norm": 0.7274675965309143, "learning_rate": 7.0418551494236665e-06, "loss": 0.2134, "step": 26720 }, { "epoch": 0.5956969394142592, "grad_norm": 0.6608673930168152, "learning_rate": 7.0385107128026174e-06, "loss": 0.2355, "step": 26725 }, { "epoch": 0.5958083887948793, "grad_norm": 0.8258979916572571, "learning_rate": 7.035166639237299e-06, "loss": 0.2381, "step": 26730 }, { "epoch": 0.5959198381754993, "grad_norm": 0.4890570044517517, "learning_rate": 7.031822929137661e-06, "loss": 0.2922, "step": 26735 }, { "epoch": 0.5960312875561194, "grad_norm": 0.5419553518295288, "learning_rate": 7.028479582913625e-06, "loss": 0.352, "step": 26740 }, { "epoch": 0.5961427369367395, "grad_norm": 0.5165709853172302, "learning_rate": 7.025136600975054e-06, "loss": 0.3279, "step": 26745 }, { "epoch": 0.5962541863173595, "grad_norm": 0.6278826594352722, "learning_rate": 7.021793983731775e-06, "loss": 0.2826, "step": 26750 }, { "epoch": 0.5963656356979796, "grad_norm": 0.9872399568557739, "learning_rate": 7.018451731593564e-06, "loss": 0.4879, "step": 26755 }, { "epoch": 0.5964770850785996, "grad_norm": 0.32107967138290405, "learning_rate": 7.015109844970158e-06, "loss": 0.2365, "step": 26760 }, { "epoch": 0.5965885344592198, "grad_norm": 0.6850024461746216, "learning_rate": 7.011768324271246e-06, "loss": 0.2676, "step": 26765 }, { "epoch": 0.5966999838398398, "grad_norm": 0.6033693552017212, "learning_rate": 7.008427169906466e-06, "loss": 0.3631, "step": 26770 }, { "epoch": 0.5968114332204599, "grad_norm": 0.6413952708244324, "learning_rate": 7.005086382285426e-06, "loss": 0.2698, "step": 26775 }, { "epoch": 0.59692288260108, "grad_norm": 0.3277706801891327, "learning_rate": 7.001745961817682e-06, "loss": 0.267, "step": 26780 }, { "epoch": 0.5970343319817, "grad_norm": 0.6097815632820129, "learning_rate": 6.9984059089127394e-06, "loss": 0.2035, "step": 26785 }, { "epoch": 0.5971457813623201, "grad_norm": 0.736315131187439, "learning_rate": 6.9950662239800605e-06, "loss": 0.2213, "step": 26790 }, { "epoch": 0.5972572307429401, "grad_norm": 1.0850633382797241, "learning_rate": 6.991726907429072e-06, "loss": 0.2451, "step": 26795 }, { "epoch": 0.5973686801235603, "grad_norm": 0.6129161715507507, "learning_rate": 6.988387959669141e-06, "loss": 0.3486, "step": 26800 }, { "epoch": 0.5974801295041803, "grad_norm": 0.8613978624343872, "learning_rate": 6.985049381109599e-06, "loss": 0.4207, "step": 26805 }, { "epoch": 0.5975915788848003, "grad_norm": 0.615011990070343, "learning_rate": 6.981711172159733e-06, "loss": 0.3018, "step": 26810 }, { "epoch": 0.5977030282654204, "grad_norm": 0.6447067260742188, "learning_rate": 6.978373333228782e-06, "loss": 0.3114, "step": 26815 }, { "epoch": 0.5978144776460405, "grad_norm": 0.6088956594467163, "learning_rate": 6.975035864725934e-06, "loss": 0.3613, "step": 26820 }, { "epoch": 0.5979259270266606, "grad_norm": 0.39137575030326843, "learning_rate": 6.971698767060342e-06, "loss": 0.2536, "step": 26825 }, { "epoch": 0.5980373764072806, "grad_norm": 0.6503191590309143, "learning_rate": 6.9683620406411036e-06, "loss": 0.3193, "step": 26830 }, { "epoch": 0.5981488257879006, "grad_norm": 0.46391865611076355, "learning_rate": 6.965025685877281e-06, "loss": 0.2987, "step": 26835 }, { "epoch": 0.5982602751685208, "grad_norm": 0.5320603847503662, "learning_rate": 6.961689703177879e-06, "loss": 0.2605, "step": 26840 }, { "epoch": 0.5983717245491408, "grad_norm": 0.6308834552764893, "learning_rate": 6.95835409295187e-06, "loss": 0.2241, "step": 26845 }, { "epoch": 0.5984831739297609, "grad_norm": 0.5442749857902527, "learning_rate": 6.955018855608171e-06, "loss": 0.2204, "step": 26850 }, { "epoch": 0.5985946233103809, "grad_norm": 0.7064063549041748, "learning_rate": 6.951683991555658e-06, "loss": 0.3439, "step": 26855 }, { "epoch": 0.5987060726910011, "grad_norm": 0.9378949999809265, "learning_rate": 6.948349501203157e-06, "loss": 0.3433, "step": 26860 }, { "epoch": 0.5988175220716211, "grad_norm": 1.0083317756652832, "learning_rate": 6.945015384959452e-06, "loss": 0.3341, "step": 26865 }, { "epoch": 0.5989289714522411, "grad_norm": 0.7770516872406006, "learning_rate": 6.941681643233281e-06, "loss": 0.4177, "step": 26870 }, { "epoch": 0.5990404208328612, "grad_norm": 0.7873477935791016, "learning_rate": 6.93834827643333e-06, "loss": 0.2319, "step": 26875 }, { "epoch": 0.5991518702134813, "grad_norm": 0.4507529139518738, "learning_rate": 6.9350152849682515e-06, "loss": 0.2637, "step": 26880 }, { "epoch": 0.5992633195941014, "grad_norm": 0.5220516324043274, "learning_rate": 6.93168266924664e-06, "loss": 0.3425, "step": 26885 }, { "epoch": 0.5993747689747214, "grad_norm": 0.5274975895881653, "learning_rate": 6.928350429677051e-06, "loss": 0.2999, "step": 26890 }, { "epoch": 0.5994862183553414, "grad_norm": 0.4189387261867523, "learning_rate": 6.925018566667988e-06, "loss": 0.2648, "step": 26895 }, { "epoch": 0.5995976677359616, "grad_norm": 0.6953780651092529, "learning_rate": 6.9216870806279156e-06, "loss": 0.2682, "step": 26900 }, { "epoch": 0.5997091171165816, "grad_norm": 0.574653148651123, "learning_rate": 6.918355971965247e-06, "loss": 0.2319, "step": 26905 }, { "epoch": 0.5998205664972017, "grad_norm": 0.34783512353897095, "learning_rate": 6.915025241088344e-06, "loss": 0.3696, "step": 26910 }, { "epoch": 0.5999320158778217, "grad_norm": 0.5067845582962036, "learning_rate": 6.911694888405536e-06, "loss": 0.2181, "step": 26915 }, { "epoch": 0.6000434652584419, "grad_norm": 0.6084128022193909, "learning_rate": 6.9083649143251e-06, "loss": 0.4194, "step": 26920 }, { "epoch": 0.6001549146390619, "grad_norm": 0.6096634864807129, "learning_rate": 6.905035319255259e-06, "loss": 0.3532, "step": 26925 }, { "epoch": 0.6002663640196819, "grad_norm": 0.7994610071182251, "learning_rate": 6.9017061036042e-06, "loss": 0.3452, "step": 26930 }, { "epoch": 0.600377813400302, "grad_norm": 0.5439140796661377, "learning_rate": 6.898377267780059e-06, "loss": 0.2444, "step": 26935 }, { "epoch": 0.6004892627809221, "grad_norm": 0.5204330086708069, "learning_rate": 6.895048812190921e-06, "loss": 0.2858, "step": 26940 }, { "epoch": 0.6006007121615422, "grad_norm": 0.5798391699790955, "learning_rate": 6.8917207372448295e-06, "loss": 0.4046, "step": 26945 }, { "epoch": 0.6007121615421622, "grad_norm": 0.5817235708236694, "learning_rate": 6.8883930433497884e-06, "loss": 0.2308, "step": 26950 }, { "epoch": 0.6008236109227822, "grad_norm": 0.4607628583908081, "learning_rate": 6.885065730913741e-06, "loss": 0.3217, "step": 26955 }, { "epoch": 0.6009350603034024, "grad_norm": 0.5161969065666199, "learning_rate": 6.881738800344591e-06, "loss": 0.2642, "step": 26960 }, { "epoch": 0.6010465096840224, "grad_norm": 0.8000197410583496, "learning_rate": 6.878412252050196e-06, "loss": 0.3186, "step": 26965 }, { "epoch": 0.6011579590646425, "grad_norm": 0.7184973955154419, "learning_rate": 6.875086086438363e-06, "loss": 0.2563, "step": 26970 }, { "epoch": 0.6012694084452626, "grad_norm": 0.6139436960220337, "learning_rate": 6.871760303916855e-06, "loss": 0.2642, "step": 26975 }, { "epoch": 0.6013808578258827, "grad_norm": 0.4902457892894745, "learning_rate": 6.868434904893385e-06, "loss": 0.2255, "step": 26980 }, { "epoch": 0.6014923072065027, "grad_norm": 0.4996844530105591, "learning_rate": 6.8651098897756276e-06, "loss": 0.3407, "step": 26985 }, { "epoch": 0.6016037565871227, "grad_norm": 0.48853209614753723, "learning_rate": 6.861785258971198e-06, "loss": 0.2316, "step": 26990 }, { "epoch": 0.6017152059677429, "grad_norm": 0.7358313202857971, "learning_rate": 6.858461012887674e-06, "loss": 0.3769, "step": 26995 }, { "epoch": 0.6018266553483629, "grad_norm": 0.5391202569007874, "learning_rate": 6.85513715193258e-06, "loss": 0.3337, "step": 27000 }, { "epoch": 0.601938104728983, "grad_norm": 0.5830953121185303, "learning_rate": 6.851813676513397e-06, "loss": 0.3003, "step": 27005 }, { "epoch": 0.602049554109603, "grad_norm": 0.53678297996521, "learning_rate": 6.848490587037557e-06, "loss": 0.3026, "step": 27010 }, { "epoch": 0.6021610034902231, "grad_norm": 0.7724995017051697, "learning_rate": 6.845167883912442e-06, "loss": 0.3513, "step": 27015 }, { "epoch": 0.6022724528708432, "grad_norm": 0.7503898739814758, "learning_rate": 6.8418455675453944e-06, "loss": 0.3203, "step": 27020 }, { "epoch": 0.6023839022514632, "grad_norm": 0.8009513020515442, "learning_rate": 6.838523638343705e-06, "loss": 0.2895, "step": 27025 }, { "epoch": 0.6024953516320833, "grad_norm": 0.6689680814743042, "learning_rate": 6.835202096714615e-06, "loss": 0.3026, "step": 27030 }, { "epoch": 0.6026068010127034, "grad_norm": 0.6379620432853699, "learning_rate": 6.831880943065316e-06, "loss": 0.3278, "step": 27035 }, { "epoch": 0.6027182503933234, "grad_norm": 0.6119515895843506, "learning_rate": 6.8285601778029635e-06, "loss": 0.3328, "step": 27040 }, { "epoch": 0.6028296997739435, "grad_norm": 0.7183812856674194, "learning_rate": 6.8252398013346536e-06, "loss": 0.3237, "step": 27045 }, { "epoch": 0.6029411491545635, "grad_norm": 0.5734810829162598, "learning_rate": 6.821919814067432e-06, "loss": 0.229, "step": 27050 }, { "epoch": 0.6030525985351837, "grad_norm": 0.49161458015441895, "learning_rate": 6.818600216408314e-06, "loss": 0.2327, "step": 27055 }, { "epoch": 0.6031640479158037, "grad_norm": 0.8137242197990417, "learning_rate": 6.815281008764255e-06, "loss": 0.2162, "step": 27060 }, { "epoch": 0.6032754972964238, "grad_norm": 0.6309579014778137, "learning_rate": 6.8119621915421595e-06, "loss": 0.2806, "step": 27065 }, { "epoch": 0.6033869466770438, "grad_norm": 0.5926720499992371, "learning_rate": 6.808643765148895e-06, "loss": 0.3955, "step": 27070 }, { "epoch": 0.6034983960576639, "grad_norm": 0.5575020909309387, "learning_rate": 6.805325729991269e-06, "loss": 0.2786, "step": 27075 }, { "epoch": 0.603609845438284, "grad_norm": 0.81143718957901, "learning_rate": 6.802008086476049e-06, "loss": 0.3453, "step": 27080 }, { "epoch": 0.603721294818904, "grad_norm": 0.8011807799339294, "learning_rate": 6.798690835009953e-06, "loss": 0.248, "step": 27085 }, { "epoch": 0.6038327441995242, "grad_norm": 0.567529022693634, "learning_rate": 6.795373975999648e-06, "loss": 0.3641, "step": 27090 }, { "epoch": 0.6039441935801442, "grad_norm": 0.5633978843688965, "learning_rate": 6.792057509851762e-06, "loss": 0.2786, "step": 27095 }, { "epoch": 0.6040556429607642, "grad_norm": 0.6928328275680542, "learning_rate": 6.788741436972861e-06, "loss": 0.2781, "step": 27100 }, { "epoch": 0.6041670923413843, "grad_norm": 0.3641568720340729, "learning_rate": 6.785425757769475e-06, "loss": 0.2688, "step": 27105 }, { "epoch": 0.6042785417220043, "grad_norm": 0.6293769478797913, "learning_rate": 6.782110472648076e-06, "loss": 0.2609, "step": 27110 }, { "epoch": 0.6043899911026245, "grad_norm": 0.349682480096817, "learning_rate": 6.778795582015096e-06, "loss": 0.1502, "step": 27115 }, { "epoch": 0.6045014404832445, "grad_norm": 0.5193562507629395, "learning_rate": 6.7754810862769145e-06, "loss": 0.1376, "step": 27120 }, { "epoch": 0.6046128898638646, "grad_norm": 0.6919031143188477, "learning_rate": 6.772166985839859e-06, "loss": 0.3575, "step": 27125 }, { "epoch": 0.6047243392444847, "grad_norm": 0.3963978588581085, "learning_rate": 6.768853281110217e-06, "loss": 0.3116, "step": 27130 }, { "epoch": 0.6048357886251047, "grad_norm": 0.6030378937721252, "learning_rate": 6.765539972494225e-06, "loss": 0.2672, "step": 27135 }, { "epoch": 0.6049472380057248, "grad_norm": 0.6921826601028442, "learning_rate": 6.762227060398065e-06, "loss": 0.4098, "step": 27140 }, { "epoch": 0.6050586873863448, "grad_norm": 0.7611590623855591, "learning_rate": 6.758914545227875e-06, "loss": 0.435, "step": 27145 }, { "epoch": 0.605170136766965, "grad_norm": 0.9621880650520325, "learning_rate": 6.755602427389746e-06, "loss": 0.3439, "step": 27150 }, { "epoch": 0.605281586147585, "grad_norm": 0.7084947824478149, "learning_rate": 6.752290707289715e-06, "loss": 0.3519, "step": 27155 }, { "epoch": 0.605393035528205, "grad_norm": 0.6749106645584106, "learning_rate": 6.748979385333772e-06, "loss": 0.2787, "step": 27160 }, { "epoch": 0.6055044849088251, "grad_norm": 0.47607892751693726, "learning_rate": 6.745668461927866e-06, "loss": 0.4617, "step": 27165 }, { "epoch": 0.6056159342894452, "grad_norm": 0.7611442804336548, "learning_rate": 6.742357937477887e-06, "loss": 0.3738, "step": 27170 }, { "epoch": 0.6057273836700653, "grad_norm": 0.800377368927002, "learning_rate": 6.739047812389679e-06, "loss": 0.3432, "step": 27175 }, { "epoch": 0.6058388330506853, "grad_norm": 0.6022168397903442, "learning_rate": 6.7357380870690395e-06, "loss": 0.2759, "step": 27180 }, { "epoch": 0.6059502824313054, "grad_norm": 0.6572964191436768, "learning_rate": 6.732428761921712e-06, "loss": 0.2319, "step": 27185 }, { "epoch": 0.6060617318119255, "grad_norm": 0.5233751535415649, "learning_rate": 6.7291198373533994e-06, "loss": 0.2188, "step": 27190 }, { "epoch": 0.6061731811925455, "grad_norm": 0.7602106928825378, "learning_rate": 6.725811313769742e-06, "loss": 0.2614, "step": 27195 }, { "epoch": 0.6062846305731656, "grad_norm": 0.5607755780220032, "learning_rate": 6.722503191576351e-06, "loss": 0.2624, "step": 27200 }, { "epoch": 0.6063960799537856, "grad_norm": 1.0890930891036987, "learning_rate": 6.719195471178766e-06, "loss": 0.3478, "step": 27205 }, { "epoch": 0.6065075293344058, "grad_norm": 0.6860947608947754, "learning_rate": 6.715888152982495e-06, "loss": 0.264, "step": 27210 }, { "epoch": 0.6066189787150258, "grad_norm": 0.6374927759170532, "learning_rate": 6.712581237392988e-06, "loss": 0.34, "step": 27215 }, { "epoch": 0.6067304280956458, "grad_norm": 0.7704671025276184, "learning_rate": 6.709274724815643e-06, "loss": 0.3104, "step": 27220 }, { "epoch": 0.606841877476266, "grad_norm": 0.616899847984314, "learning_rate": 6.705968615655819e-06, "loss": 0.2688, "step": 27225 }, { "epoch": 0.606953326856886, "grad_norm": 0.7842974066734314, "learning_rate": 6.702662910318814e-06, "loss": 0.2945, "step": 27230 }, { "epoch": 0.6070647762375061, "grad_norm": 0.49267229437828064, "learning_rate": 6.699357609209884e-06, "loss": 0.4059, "step": 27235 }, { "epoch": 0.6071762256181261, "grad_norm": 0.75706547498703, "learning_rate": 6.6960527127342375e-06, "loss": 0.4469, "step": 27240 }, { "epoch": 0.6072876749987461, "grad_norm": 0.9641366600990295, "learning_rate": 6.6927482212970254e-06, "loss": 0.3216, "step": 27245 }, { "epoch": 0.6073991243793663, "grad_norm": 0.5527195930480957, "learning_rate": 6.689444135303351e-06, "loss": 0.3861, "step": 27250 }, { "epoch": 0.6075105737599863, "grad_norm": 0.8383334279060364, "learning_rate": 6.686140455158275e-06, "loss": 0.3467, "step": 27255 }, { "epoch": 0.6076220231406064, "grad_norm": 0.4666925072669983, "learning_rate": 6.682837181266799e-06, "loss": 0.3701, "step": 27260 }, { "epoch": 0.6077334725212264, "grad_norm": 0.6067997217178345, "learning_rate": 6.679534314033878e-06, "loss": 0.2877, "step": 27265 }, { "epoch": 0.6078449219018466, "grad_norm": 0.561024010181427, "learning_rate": 6.67623185386442e-06, "loss": 0.2082, "step": 27270 }, { "epoch": 0.6079563712824666, "grad_norm": 0.4686552882194519, "learning_rate": 6.6729298011632835e-06, "loss": 0.3118, "step": 27275 }, { "epoch": 0.6080678206630866, "grad_norm": 0.6984442472457886, "learning_rate": 6.6696281563352714e-06, "loss": 0.2798, "step": 27280 }, { "epoch": 0.6081792700437068, "grad_norm": 0.5045234560966492, "learning_rate": 6.666326919785142e-06, "loss": 0.3132, "step": 27285 }, { "epoch": 0.6082907194243268, "grad_norm": 0.6149911284446716, "learning_rate": 6.6630260919176e-06, "loss": 0.2368, "step": 27290 }, { "epoch": 0.6084021688049469, "grad_norm": 0.4781946837902069, "learning_rate": 6.659725673137301e-06, "loss": 0.3103, "step": 27295 }, { "epoch": 0.6085136181855669, "grad_norm": 0.5364055037498474, "learning_rate": 6.656425663848848e-06, "loss": 0.2564, "step": 27300 }, { "epoch": 0.608625067566187, "grad_norm": 0.9337091445922852, "learning_rate": 6.653126064456805e-06, "loss": 0.3776, "step": 27305 }, { "epoch": 0.6087365169468071, "grad_norm": 0.6068729162216187, "learning_rate": 6.649826875365672e-06, "loss": 0.346, "step": 27310 }, { "epoch": 0.6088479663274271, "grad_norm": 0.563932478427887, "learning_rate": 6.646528096979903e-06, "loss": 0.3165, "step": 27315 }, { "epoch": 0.6089594157080472, "grad_norm": 0.6620405316352844, "learning_rate": 6.643229729703906e-06, "loss": 0.4277, "step": 27320 }, { "epoch": 0.6090708650886673, "grad_norm": 0.540409505367279, "learning_rate": 6.639931773942033e-06, "loss": 0.2479, "step": 27325 }, { "epoch": 0.6091823144692874, "grad_norm": 0.6847338080406189, "learning_rate": 6.636634230098591e-06, "loss": 0.2437, "step": 27330 }, { "epoch": 0.6092937638499074, "grad_norm": 0.6418032646179199, "learning_rate": 6.633337098577826e-06, "loss": 0.2825, "step": 27335 }, { "epoch": 0.6094052132305274, "grad_norm": 0.7292640805244446, "learning_rate": 6.630040379783949e-06, "loss": 0.3244, "step": 27340 }, { "epoch": 0.6095166626111476, "grad_norm": 0.6510348916053772, "learning_rate": 6.62674407412111e-06, "loss": 0.2052, "step": 27345 }, { "epoch": 0.6096281119917676, "grad_norm": 0.3380892872810364, "learning_rate": 6.6234481819934106e-06, "loss": 0.2416, "step": 27350 }, { "epoch": 0.6097395613723877, "grad_norm": 0.738538920879364, "learning_rate": 6.6201527038049005e-06, "loss": 0.3871, "step": 27355 }, { "epoch": 0.6098510107530077, "grad_norm": 0.5535542964935303, "learning_rate": 6.616857639959579e-06, "loss": 0.2765, "step": 27360 }, { "epoch": 0.6099624601336278, "grad_norm": 0.45810580253601074, "learning_rate": 6.6135629908614e-06, "loss": 0.2714, "step": 27365 }, { "epoch": 0.6100739095142479, "grad_norm": 0.661761999130249, "learning_rate": 6.610268756914254e-06, "loss": 0.3585, "step": 27370 }, { "epoch": 0.6101853588948679, "grad_norm": 0.9758643507957458, "learning_rate": 6.606974938521996e-06, "loss": 0.275, "step": 27375 }, { "epoch": 0.610296808275488, "grad_norm": 0.5419698357582092, "learning_rate": 6.603681536088422e-06, "loss": 0.2785, "step": 27380 }, { "epoch": 0.6104082576561081, "grad_norm": 0.5564716458320618, "learning_rate": 6.6003885500172775e-06, "loss": 0.2769, "step": 27385 }, { "epoch": 0.6105197070367282, "grad_norm": 1.0348775386810303, "learning_rate": 6.5970959807122515e-06, "loss": 0.3956, "step": 27390 }, { "epoch": 0.6106311564173482, "grad_norm": 0.6051989793777466, "learning_rate": 6.593803828576996e-06, "loss": 0.2114, "step": 27395 }, { "epoch": 0.6107426057979682, "grad_norm": 0.699422299861908, "learning_rate": 6.590512094015098e-06, "loss": 0.2821, "step": 27400 }, { "epoch": 0.6108540551785884, "grad_norm": 0.48418572545051575, "learning_rate": 6.587220777430097e-06, "loss": 0.2364, "step": 27405 }, { "epoch": 0.6109655045592084, "grad_norm": 0.4478502869606018, "learning_rate": 6.583929879225487e-06, "loss": 0.2317, "step": 27410 }, { "epoch": 0.6110769539398285, "grad_norm": 0.6911597847938538, "learning_rate": 6.580639399804709e-06, "loss": 0.3018, "step": 27415 }, { "epoch": 0.6111884033204485, "grad_norm": 0.6295296549797058, "learning_rate": 6.577349339571144e-06, "loss": 0.2588, "step": 27420 }, { "epoch": 0.6112998527010686, "grad_norm": 0.45536351203918457, "learning_rate": 6.574059698928133e-06, "loss": 0.24, "step": 27425 }, { "epoch": 0.6114113020816887, "grad_norm": 0.4310155510902405, "learning_rate": 6.570770478278961e-06, "loss": 0.2478, "step": 27430 }, { "epoch": 0.6115227514623087, "grad_norm": 0.4848770201206207, "learning_rate": 6.567481678026854e-06, "loss": 0.3281, "step": 27435 }, { "epoch": 0.6116342008429289, "grad_norm": 0.7289760708808899, "learning_rate": 6.564193298574998e-06, "loss": 0.464, "step": 27440 }, { "epoch": 0.6117456502235489, "grad_norm": 0.5454939603805542, "learning_rate": 6.560905340326527e-06, "loss": 0.3648, "step": 27445 }, { "epoch": 0.6118570996041689, "grad_norm": 0.5365239977836609, "learning_rate": 6.557617803684515e-06, "loss": 0.4343, "step": 27450 }, { "epoch": 0.611968548984789, "grad_norm": 0.4437295198440552, "learning_rate": 6.554330689051987e-06, "loss": 0.3891, "step": 27455 }, { "epoch": 0.612079998365409, "grad_norm": 0.7840902209281921, "learning_rate": 6.551043996831923e-06, "loss": 0.1994, "step": 27460 }, { "epoch": 0.6121914477460292, "grad_norm": 0.6054477095603943, "learning_rate": 6.54775772742724e-06, "loss": 0.294, "step": 27465 }, { "epoch": 0.6123028971266492, "grad_norm": 0.7534658312797546, "learning_rate": 6.544471881240815e-06, "loss": 0.3178, "step": 27470 }, { "epoch": 0.6124143465072693, "grad_norm": 0.576594889163971, "learning_rate": 6.541186458675461e-06, "loss": 0.3334, "step": 27475 }, { "epoch": 0.6125257958878894, "grad_norm": 0.7999580502510071, "learning_rate": 6.537901460133953e-06, "loss": 0.2468, "step": 27480 }, { "epoch": 0.6126372452685094, "grad_norm": 0.9341402649879456, "learning_rate": 6.534616886019e-06, "loss": 0.3492, "step": 27485 }, { "epoch": 0.6127486946491295, "grad_norm": 1.0711827278137207, "learning_rate": 6.531332736733271e-06, "loss": 0.3198, "step": 27490 }, { "epoch": 0.6128601440297495, "grad_norm": 0.7271906137466431, "learning_rate": 6.5280490126793724e-06, "loss": 0.3124, "step": 27495 }, { "epoch": 0.6129715934103697, "grad_norm": 0.603204071521759, "learning_rate": 6.5247657142598686e-06, "loss": 0.3847, "step": 27500 }, { "epoch": 0.6130830427909897, "grad_norm": 0.7971246242523193, "learning_rate": 6.521482841877263e-06, "loss": 0.4809, "step": 27505 }, { "epoch": 0.6131944921716097, "grad_norm": 0.7611426711082458, "learning_rate": 6.518200395934004e-06, "loss": 0.2798, "step": 27510 }, { "epoch": 0.6133059415522298, "grad_norm": 0.6165714859962463, "learning_rate": 6.514918376832506e-06, "loss": 0.4297, "step": 27515 }, { "epoch": 0.6134173909328499, "grad_norm": 0.512054979801178, "learning_rate": 6.5116367849751146e-06, "loss": 0.2774, "step": 27520 }, { "epoch": 0.61352884031347, "grad_norm": 0.6778582334518433, "learning_rate": 6.508355620764129e-06, "loss": 0.3278, "step": 27525 }, { "epoch": 0.61364028969409, "grad_norm": 0.6414464116096497, "learning_rate": 6.505074884601791e-06, "loss": 0.2077, "step": 27530 }, { "epoch": 0.6137517390747101, "grad_norm": 0.3846264183521271, "learning_rate": 6.501794576890297e-06, "loss": 0.2684, "step": 27535 }, { "epoch": 0.6138631884553302, "grad_norm": 0.5332993268966675, "learning_rate": 6.4985146980317835e-06, "loss": 0.2662, "step": 27540 }, { "epoch": 0.6139746378359502, "grad_norm": 0.8535371422767639, "learning_rate": 6.495235248428339e-06, "loss": 0.2598, "step": 27545 }, { "epoch": 0.6140860872165703, "grad_norm": 0.4994587004184723, "learning_rate": 6.491956228482004e-06, "loss": 0.3186, "step": 27550 }, { "epoch": 0.6141975365971903, "grad_norm": 0.7860655188560486, "learning_rate": 6.488677638594758e-06, "loss": 0.246, "step": 27555 }, { "epoch": 0.6143089859778105, "grad_norm": 0.5616355538368225, "learning_rate": 6.485399479168528e-06, "loss": 0.3059, "step": 27560 }, { "epoch": 0.6144204353584305, "grad_norm": 0.7185654640197754, "learning_rate": 6.4821217506051945e-06, "loss": 0.2726, "step": 27565 }, { "epoch": 0.6145318847390505, "grad_norm": 0.7038310170173645, "learning_rate": 6.478844453306582e-06, "loss": 0.4596, "step": 27570 }, { "epoch": 0.6146433341196706, "grad_norm": 0.6382894515991211, "learning_rate": 6.475567587674457e-06, "loss": 0.2459, "step": 27575 }, { "epoch": 0.6147547835002907, "grad_norm": 0.43890437483787537, "learning_rate": 6.4722911541105435e-06, "loss": 0.2813, "step": 27580 }, { "epoch": 0.6148662328809108, "grad_norm": 0.6502318978309631, "learning_rate": 6.469015153016502e-06, "loss": 0.3317, "step": 27585 }, { "epoch": 0.6149776822615308, "grad_norm": 0.594805896282196, "learning_rate": 6.465739584793949e-06, "loss": 0.2856, "step": 27590 }, { "epoch": 0.6150891316421508, "grad_norm": 0.5505577325820923, "learning_rate": 6.462464449844446e-06, "loss": 0.3047, "step": 27595 }, { "epoch": 0.615200581022771, "grad_norm": 0.6672086119651794, "learning_rate": 6.459189748569493e-06, "loss": 0.3137, "step": 27600 }, { "epoch": 0.615312030403391, "grad_norm": 0.5111857056617737, "learning_rate": 6.455915481370545e-06, "loss": 0.2823, "step": 27605 }, { "epoch": 0.6154234797840111, "grad_norm": 0.45580384135246277, "learning_rate": 6.452641648649006e-06, "loss": 0.3184, "step": 27610 }, { "epoch": 0.6155349291646312, "grad_norm": 0.7010921239852905, "learning_rate": 6.449368250806218e-06, "loss": 0.3076, "step": 27615 }, { "epoch": 0.6156463785452513, "grad_norm": 0.6821643114089966, "learning_rate": 6.446095288243473e-06, "loss": 0.2631, "step": 27620 }, { "epoch": 0.6157578279258713, "grad_norm": 0.7827234268188477, "learning_rate": 6.442822761362015e-06, "loss": 0.3193, "step": 27625 }, { "epoch": 0.6158692773064913, "grad_norm": 0.4832581579685211, "learning_rate": 6.439550670563031e-06, "loss": 0.3358, "step": 27630 }, { "epoch": 0.6159807266871115, "grad_norm": 0.7612441778182983, "learning_rate": 6.436279016247652e-06, "loss": 0.4278, "step": 27635 }, { "epoch": 0.6160921760677315, "grad_norm": 0.4451126754283905, "learning_rate": 6.43300779881696e-06, "loss": 0.2232, "step": 27640 }, { "epoch": 0.6162036254483516, "grad_norm": 0.70438152551651, "learning_rate": 6.429737018671976e-06, "loss": 0.2801, "step": 27645 }, { "epoch": 0.6163150748289716, "grad_norm": 0.5213625431060791, "learning_rate": 6.426466676213675e-06, "loss": 0.3101, "step": 27650 }, { "epoch": 0.6164265242095917, "grad_norm": 0.7046170830726624, "learning_rate": 6.423196771842975e-06, "loss": 0.2849, "step": 27655 }, { "epoch": 0.6165379735902118, "grad_norm": 0.27048227190971375, "learning_rate": 6.419927305960745e-06, "loss": 0.3052, "step": 27660 }, { "epoch": 0.6166494229708318, "grad_norm": 0.8334628939628601, "learning_rate": 6.416658278967793e-06, "loss": 0.2801, "step": 27665 }, { "epoch": 0.6167608723514519, "grad_norm": 0.7342699766159058, "learning_rate": 6.413389691264875e-06, "loss": 0.2398, "step": 27670 }, { "epoch": 0.616872321732072, "grad_norm": 0.703302800655365, "learning_rate": 6.4101215432526985e-06, "loss": 0.3231, "step": 27675 }, { "epoch": 0.6169837711126921, "grad_norm": 0.6903324127197266, "learning_rate": 6.40685383533191e-06, "loss": 0.3583, "step": 27680 }, { "epoch": 0.6170952204933121, "grad_norm": 0.5930154323577881, "learning_rate": 6.403586567903108e-06, "loss": 0.2944, "step": 27685 }, { "epoch": 0.6172066698739321, "grad_norm": 0.41322100162506104, "learning_rate": 6.400319741366829e-06, "loss": 0.3527, "step": 27690 }, { "epoch": 0.6173181192545523, "grad_norm": 0.8043963313102722, "learning_rate": 6.3970533561235685e-06, "loss": 0.3136, "step": 27695 }, { "epoch": 0.6174295686351723, "grad_norm": 0.5982251167297363, "learning_rate": 6.393787412573753e-06, "loss": 0.2659, "step": 27700 }, { "epoch": 0.6175410180157924, "grad_norm": 0.7102477550506592, "learning_rate": 6.390521911117768e-06, "loss": 0.3109, "step": 27705 }, { "epoch": 0.6176524673964124, "grad_norm": 0.8494806289672852, "learning_rate": 6.387256852155934e-06, "loss": 0.2982, "step": 27710 }, { "epoch": 0.6177639167770325, "grad_norm": 0.7837997674942017, "learning_rate": 6.383992236088525e-06, "loss": 0.3226, "step": 27715 }, { "epoch": 0.6178753661576526, "grad_norm": 0.5590851306915283, "learning_rate": 6.3807280633157565e-06, "loss": 0.3012, "step": 27720 }, { "epoch": 0.6179868155382726, "grad_norm": 0.6607463359832764, "learning_rate": 6.377464334237786e-06, "loss": 0.2411, "step": 27725 }, { "epoch": 0.6180982649188927, "grad_norm": 0.7981462478637695, "learning_rate": 6.374201049254731e-06, "loss": 0.3484, "step": 27730 }, { "epoch": 0.6182097142995128, "grad_norm": 0.6522349715232849, "learning_rate": 6.370938208766642e-06, "loss": 0.313, "step": 27735 }, { "epoch": 0.6183211636801329, "grad_norm": 0.6056199669837952, "learning_rate": 6.367675813173515e-06, "loss": 0.3073, "step": 27740 }, { "epoch": 0.6184326130607529, "grad_norm": 0.5482789874076843, "learning_rate": 6.364413862875295e-06, "loss": 0.3135, "step": 27745 }, { "epoch": 0.6185440624413729, "grad_norm": 0.21319426596164703, "learning_rate": 6.361152358271875e-06, "loss": 0.2503, "step": 27750 }, { "epoch": 0.6186555118219931, "grad_norm": 0.6866772770881653, "learning_rate": 6.357891299763086e-06, "loss": 0.1807, "step": 27755 }, { "epoch": 0.6187669612026131, "grad_norm": 0.7949379682540894, "learning_rate": 6.35463068774871e-06, "loss": 0.3076, "step": 27760 }, { "epoch": 0.6188784105832332, "grad_norm": 0.5552226305007935, "learning_rate": 6.351370522628475e-06, "loss": 0.3984, "step": 27765 }, { "epoch": 0.6189898599638533, "grad_norm": 0.4902004599571228, "learning_rate": 6.348110804802052e-06, "loss": 0.3095, "step": 27770 }, { "epoch": 0.6191013093444733, "grad_norm": 0.5670919418334961, "learning_rate": 6.344851534669057e-06, "loss": 0.2612, "step": 27775 }, { "epoch": 0.6192127587250934, "grad_norm": 0.5468574166297913, "learning_rate": 6.341592712629049e-06, "loss": 0.2413, "step": 27780 }, { "epoch": 0.6193242081057134, "grad_norm": 0.47762176394462585, "learning_rate": 6.338334339081538e-06, "loss": 0.3156, "step": 27785 }, { "epoch": 0.6194356574863336, "grad_norm": 0.5758314728736877, "learning_rate": 6.335076414425969e-06, "loss": 0.1764, "step": 27790 }, { "epoch": 0.6195471068669536, "grad_norm": 0.5514389872550964, "learning_rate": 6.331818939061742e-06, "loss": 0.2183, "step": 27795 }, { "epoch": 0.6196585562475736, "grad_norm": 0.40423551201820374, "learning_rate": 6.328561913388203e-06, "loss": 0.3397, "step": 27800 }, { "epoch": 0.6197700056281937, "grad_norm": 0.4383307099342346, "learning_rate": 6.325305337804633e-06, "loss": 0.3033, "step": 27805 }, { "epoch": 0.6198814550088138, "grad_norm": 0.43310850858688354, "learning_rate": 6.322049212710265e-06, "loss": 0.3374, "step": 27810 }, { "epoch": 0.6199929043894339, "grad_norm": 0.5410407781600952, "learning_rate": 6.318793538504276e-06, "loss": 0.2061, "step": 27815 }, { "epoch": 0.6201043537700539, "grad_norm": 0.5015875101089478, "learning_rate": 6.315538315585781e-06, "loss": 0.176, "step": 27820 }, { "epoch": 0.620215803150674, "grad_norm": 0.7569509744644165, "learning_rate": 6.31228354435385e-06, "loss": 0.4063, "step": 27825 }, { "epoch": 0.6203272525312941, "grad_norm": 0.8083080649375916, "learning_rate": 6.309029225207489e-06, "loss": 0.2622, "step": 27830 }, { "epoch": 0.6204387019119141, "grad_norm": 0.6677647233009338, "learning_rate": 6.305775358545659e-06, "loss": 0.259, "step": 27835 }, { "epoch": 0.6205501512925342, "grad_norm": 0.7124935388565063, "learning_rate": 6.302521944767253e-06, "loss": 0.2878, "step": 27840 }, { "epoch": 0.6206616006731542, "grad_norm": 0.667360782623291, "learning_rate": 6.2992689842711184e-06, "loss": 0.2577, "step": 27845 }, { "epoch": 0.6207730500537744, "grad_norm": 0.9954953789710999, "learning_rate": 6.29601647745604e-06, "loss": 0.3319, "step": 27850 }, { "epoch": 0.6208844994343944, "grad_norm": 0.7336387038230896, "learning_rate": 6.292764424720752e-06, "loss": 0.2353, "step": 27855 }, { "epoch": 0.6209959488150144, "grad_norm": 0.5347248911857605, "learning_rate": 6.2895128264639295e-06, "loss": 0.2903, "step": 27860 }, { "epoch": 0.6211073981956345, "grad_norm": 0.5746235847473145, "learning_rate": 6.28626168308419e-06, "loss": 0.3182, "step": 27865 }, { "epoch": 0.6212188475762546, "grad_norm": 0.6404165029525757, "learning_rate": 6.283010994980106e-06, "loss": 0.2627, "step": 27870 }, { "epoch": 0.6213302969568747, "grad_norm": 0.48679685592651367, "learning_rate": 6.2797607625501845e-06, "loss": 0.2974, "step": 27875 }, { "epoch": 0.6214417463374947, "grad_norm": 0.9068040251731873, "learning_rate": 6.276510986192876e-06, "loss": 0.2187, "step": 27880 }, { "epoch": 0.6215531957181148, "grad_norm": 0.905701220035553, "learning_rate": 6.27326166630658e-06, "loss": 0.4345, "step": 27885 }, { "epoch": 0.6216646450987349, "grad_norm": 0.45995256304740906, "learning_rate": 6.270012803289639e-06, "loss": 0.272, "step": 27890 }, { "epoch": 0.6217760944793549, "grad_norm": 0.6941897869110107, "learning_rate": 6.266764397540334e-06, "loss": 0.3543, "step": 27895 }, { "epoch": 0.621887543859975, "grad_norm": 0.7971600890159607, "learning_rate": 6.263516449456898e-06, "loss": 0.3165, "step": 27900 }, { "epoch": 0.621998993240595, "grad_norm": 0.5076633095741272, "learning_rate": 6.260268959437507e-06, "loss": 0.3159, "step": 27905 }, { "epoch": 0.6221104426212152, "grad_norm": 0.5933796167373657, "learning_rate": 6.257021927880276e-06, "loss": 0.2812, "step": 27910 }, { "epoch": 0.6222218920018352, "grad_norm": 0.6237947940826416, "learning_rate": 6.253775355183261e-06, "loss": 0.3239, "step": 27915 }, { "epoch": 0.6223333413824552, "grad_norm": 0.3354548513889313, "learning_rate": 6.250529241744475e-06, "loss": 0.2003, "step": 27920 }, { "epoch": 0.6224447907630754, "grad_norm": 0.7078173756599426, "learning_rate": 6.247283587961862e-06, "loss": 0.2688, "step": 27925 }, { "epoch": 0.6225562401436954, "grad_norm": 0.8186478614807129, "learning_rate": 6.244038394233313e-06, "loss": 0.3333, "step": 27930 }, { "epoch": 0.6226676895243155, "grad_norm": 0.5947364568710327, "learning_rate": 6.240793660956662e-06, "loss": 0.2854, "step": 27935 }, { "epoch": 0.6227791389049355, "grad_norm": 0.8700222373008728, "learning_rate": 6.237549388529696e-06, "loss": 0.2826, "step": 27940 }, { "epoch": 0.6228905882855557, "grad_norm": 0.4603564143180847, "learning_rate": 6.234305577350133e-06, "loss": 0.1922, "step": 27945 }, { "epoch": 0.6230020376661757, "grad_norm": 0.4369763731956482, "learning_rate": 6.23106222781564e-06, "loss": 0.2934, "step": 27950 }, { "epoch": 0.6231134870467957, "grad_norm": 0.5998416543006897, "learning_rate": 6.227819340323826e-06, "loss": 0.2065, "step": 27955 }, { "epoch": 0.6232249364274158, "grad_norm": 0.71073979139328, "learning_rate": 6.224576915272242e-06, "loss": 0.3077, "step": 27960 }, { "epoch": 0.6233363858080359, "grad_norm": 1.1015069484710693, "learning_rate": 6.221334953058389e-06, "loss": 0.3986, "step": 27965 }, { "epoch": 0.623447835188656, "grad_norm": 0.227636456489563, "learning_rate": 6.2180934540797e-06, "loss": 0.3346, "step": 27970 }, { "epoch": 0.623559284569276, "grad_norm": 0.6981043815612793, "learning_rate": 6.214852418733566e-06, "loss": 0.3099, "step": 27975 }, { "epoch": 0.623670733949896, "grad_norm": 0.45622771978378296, "learning_rate": 6.211611847417306e-06, "loss": 0.2065, "step": 27980 }, { "epoch": 0.6237821833305162, "grad_norm": 0.5265591740608215, "learning_rate": 6.208371740528194e-06, "loss": 0.2861, "step": 27985 }, { "epoch": 0.6238936327111362, "grad_norm": 0.8384089469909668, "learning_rate": 6.205132098463437e-06, "loss": 0.3414, "step": 27990 }, { "epoch": 0.6240050820917563, "grad_norm": 1.005171775817871, "learning_rate": 6.201892921620197e-06, "loss": 0.2219, "step": 27995 }, { "epoch": 0.6241165314723763, "grad_norm": 0.691701352596283, "learning_rate": 6.1986542103955646e-06, "loss": 0.4161, "step": 28000 }, { "epoch": 0.6242279808529964, "grad_norm": 0.38245102763175964, "learning_rate": 6.195415965186582e-06, "loss": 0.3582, "step": 28005 }, { "epoch": 0.6243394302336165, "grad_norm": 0.5451532602310181, "learning_rate": 6.192178186390237e-06, "loss": 0.3372, "step": 28010 }, { "epoch": 0.6244508796142365, "grad_norm": 0.6231672167778015, "learning_rate": 6.188940874403456e-06, "loss": 0.341, "step": 28015 }, { "epoch": 0.6245623289948566, "grad_norm": 0.6475543975830078, "learning_rate": 6.185704029623107e-06, "loss": 0.2429, "step": 28020 }, { "epoch": 0.6246737783754767, "grad_norm": 0.3510415852069855, "learning_rate": 6.1824676524460015e-06, "loss": 0.2533, "step": 28025 }, { "epoch": 0.6247852277560968, "grad_norm": 0.8009830713272095, "learning_rate": 6.179231743268896e-06, "loss": 0.3173, "step": 28030 }, { "epoch": 0.6248966771367168, "grad_norm": 0.31814971566200256, "learning_rate": 6.175996302488485e-06, "loss": 0.2053, "step": 28035 }, { "epoch": 0.6250081265173368, "grad_norm": 0.37845292687416077, "learning_rate": 6.172761330501409e-06, "loss": 0.2341, "step": 28040 }, { "epoch": 0.625119575897957, "grad_norm": 0.581320583820343, "learning_rate": 6.169526827704255e-06, "loss": 0.2602, "step": 28045 }, { "epoch": 0.625231025278577, "grad_norm": 0.666366696357727, "learning_rate": 6.166292794493545e-06, "loss": 0.4235, "step": 28050 }, { "epoch": 0.6253424746591971, "grad_norm": 0.4768614172935486, "learning_rate": 6.163059231265747e-06, "loss": 0.2679, "step": 28055 }, { "epoch": 0.6254539240398171, "grad_norm": 0.5455437302589417, "learning_rate": 6.159826138417271e-06, "loss": 0.2806, "step": 28060 }, { "epoch": 0.6255653734204372, "grad_norm": 0.512651264667511, "learning_rate": 6.1565935163444665e-06, "loss": 0.2676, "step": 28065 }, { "epoch": 0.6256768228010573, "grad_norm": 1.0292948484420776, "learning_rate": 6.153361365443633e-06, "loss": 0.326, "step": 28070 }, { "epoch": 0.6257882721816773, "grad_norm": 0.43563035130500793, "learning_rate": 6.1501296861109995e-06, "loss": 0.2994, "step": 28075 }, { "epoch": 0.6258997215622975, "grad_norm": 0.5008480548858643, "learning_rate": 6.146898478742756e-06, "loss": 0.354, "step": 28080 }, { "epoch": 0.6260111709429175, "grad_norm": 0.650435209274292, "learning_rate": 6.143667743735013e-06, "loss": 0.284, "step": 28085 }, { "epoch": 0.6261226203235376, "grad_norm": 0.6315786242485046, "learning_rate": 6.140437481483843e-06, "loss": 0.2181, "step": 28090 }, { "epoch": 0.6262340697041576, "grad_norm": 0.45991653203964233, "learning_rate": 6.137207692385246e-06, "loss": 0.3351, "step": 28095 }, { "epoch": 0.6263455190847776, "grad_norm": 0.6725899577140808, "learning_rate": 6.133978376835167e-06, "loss": 0.2262, "step": 28100 }, { "epoch": 0.6264569684653978, "grad_norm": 0.6889006495475769, "learning_rate": 6.1307495352295e-06, "loss": 0.3143, "step": 28105 }, { "epoch": 0.6265684178460178, "grad_norm": 0.6997217535972595, "learning_rate": 6.127521167964072e-06, "loss": 0.2375, "step": 28110 }, { "epoch": 0.6266798672266379, "grad_norm": 0.5511771440505981, "learning_rate": 6.124293275434659e-06, "loss": 0.2455, "step": 28115 }, { "epoch": 0.626791316607258, "grad_norm": 0.8514209389686584, "learning_rate": 6.121065858036972e-06, "loss": 0.3233, "step": 28120 }, { "epoch": 0.626902765987878, "grad_norm": 0.5625143051147461, "learning_rate": 6.117838916166674e-06, "loss": 0.3249, "step": 28125 }, { "epoch": 0.6270142153684981, "grad_norm": 0.5047087669372559, "learning_rate": 6.114612450219356e-06, "loss": 0.2841, "step": 28130 }, { "epoch": 0.6271256647491181, "grad_norm": 0.7418507933616638, "learning_rate": 6.111386460590564e-06, "loss": 0.2969, "step": 28135 }, { "epoch": 0.6272371141297383, "grad_norm": 0.4277876019477844, "learning_rate": 6.1081609476757766e-06, "loss": 0.2599, "step": 28140 }, { "epoch": 0.6273485635103583, "grad_norm": 0.43965139985084534, "learning_rate": 6.104935911870413e-06, "loss": 0.2074, "step": 28145 }, { "epoch": 0.6274600128909784, "grad_norm": 0.42385342717170715, "learning_rate": 6.101711353569841e-06, "loss": 0.1684, "step": 28150 }, { "epoch": 0.6275714622715984, "grad_norm": 0.5323282480239868, "learning_rate": 6.098487273169371e-06, "loss": 0.2288, "step": 28155 }, { "epoch": 0.6276829116522185, "grad_norm": 0.6577903032302856, "learning_rate": 6.095263671064243e-06, "loss": 0.3944, "step": 28160 }, { "epoch": 0.6277943610328386, "grad_norm": 0.5340499877929688, "learning_rate": 6.0920405476496515e-06, "loss": 0.312, "step": 28165 }, { "epoch": 0.6279058104134586, "grad_norm": 1.0899690389633179, "learning_rate": 6.088817903320725e-06, "loss": 0.2366, "step": 28170 }, { "epoch": 0.6280172597940787, "grad_norm": 0.7473883628845215, "learning_rate": 6.085595738472529e-06, "loss": 0.2867, "step": 28175 }, { "epoch": 0.6281287091746988, "grad_norm": 0.4160996377468109, "learning_rate": 6.082374053500087e-06, "loss": 0.2274, "step": 28180 }, { "epoch": 0.6282401585553188, "grad_norm": 0.6289620995521545, "learning_rate": 6.07915284879834e-06, "loss": 0.3257, "step": 28185 }, { "epoch": 0.6283516079359389, "grad_norm": 0.367827445268631, "learning_rate": 6.075932124762194e-06, "loss": 0.3148, "step": 28190 }, { "epoch": 0.6284630573165589, "grad_norm": 0.6318298578262329, "learning_rate": 6.072711881786477e-06, "loss": 0.2852, "step": 28195 }, { "epoch": 0.6285745066971791, "grad_norm": 0.7384268641471863, "learning_rate": 6.069492120265974e-06, "loss": 0.2835, "step": 28200 }, { "epoch": 0.6286859560777991, "grad_norm": 0.7134256362915039, "learning_rate": 6.066272840595394e-06, "loss": 0.3431, "step": 28205 }, { "epoch": 0.6287974054584191, "grad_norm": 0.48338642716407776, "learning_rate": 6.063054043169403e-06, "loss": 0.2961, "step": 28210 }, { "epoch": 0.6289088548390392, "grad_norm": 0.41167038679122925, "learning_rate": 6.059835728382597e-06, "loss": 0.2717, "step": 28215 }, { "epoch": 0.6290203042196593, "grad_norm": 0.38216400146484375, "learning_rate": 6.056617896629515e-06, "loss": 0.2197, "step": 28220 }, { "epoch": 0.6291317536002794, "grad_norm": 0.9967228770256042, "learning_rate": 6.053400548304641e-06, "loss": 0.3655, "step": 28225 }, { "epoch": 0.6292432029808994, "grad_norm": 0.27996689081192017, "learning_rate": 6.0501836838024e-06, "loss": 0.3875, "step": 28230 }, { "epoch": 0.6293546523615196, "grad_norm": 0.8248032331466675, "learning_rate": 6.04696730351715e-06, "loss": 0.4108, "step": 28235 }, { "epoch": 0.6294661017421396, "grad_norm": 0.6017432808876038, "learning_rate": 6.043751407843195e-06, "loss": 0.213, "step": 28240 }, { "epoch": 0.6295775511227596, "grad_norm": 0.5813285708427429, "learning_rate": 6.040535997174781e-06, "loss": 0.2602, "step": 28245 }, { "epoch": 0.6296890005033797, "grad_norm": 0.7789047956466675, "learning_rate": 6.0373210719060894e-06, "loss": 0.2884, "step": 28250 }, { "epoch": 0.6298004498839997, "grad_norm": 1.083784818649292, "learning_rate": 6.0341066324312455e-06, "loss": 0.3943, "step": 28255 }, { "epoch": 0.6299118992646199, "grad_norm": 0.5226530432701111, "learning_rate": 6.03089267914432e-06, "loss": 0.2488, "step": 28260 }, { "epoch": 0.6300233486452399, "grad_norm": 0.7086270451545715, "learning_rate": 6.0276792124393165e-06, "loss": 0.2931, "step": 28265 }, { "epoch": 0.6301347980258599, "grad_norm": 0.5156954526901245, "learning_rate": 6.024466232710178e-06, "loss": 0.263, "step": 28270 }, { "epoch": 0.63024624740648, "grad_norm": 0.6274219155311584, "learning_rate": 6.021253740350793e-06, "loss": 0.3188, "step": 28275 }, { "epoch": 0.6303576967871001, "grad_norm": 0.6668805480003357, "learning_rate": 6.018041735754989e-06, "loss": 0.3313, "step": 28280 }, { "epoch": 0.6304691461677202, "grad_norm": 0.9630991220474243, "learning_rate": 6.014830219316531e-06, "loss": 0.3118, "step": 28285 }, { "epoch": 0.6305805955483402, "grad_norm": 0.4004746377468109, "learning_rate": 6.0116191914291255e-06, "loss": 0.1991, "step": 28290 }, { "epoch": 0.6306920449289604, "grad_norm": 0.3665456175804138, "learning_rate": 6.0084086524864235e-06, "loss": 0.1839, "step": 28295 }, { "epoch": 0.6308034943095804, "grad_norm": 0.5296953320503235, "learning_rate": 6.005198602882011e-06, "loss": 0.3783, "step": 28300 }, { "epoch": 0.6309149436902004, "grad_norm": 0.659162163734436, "learning_rate": 6.001989043009415e-06, "loss": 0.2793, "step": 28305 }, { "epoch": 0.6310263930708205, "grad_norm": 0.6633775234222412, "learning_rate": 5.9987799732621035e-06, "loss": 0.2492, "step": 28310 }, { "epoch": 0.6311378424514406, "grad_norm": 0.5430098176002502, "learning_rate": 5.995571394033479e-06, "loss": 0.2439, "step": 28315 }, { "epoch": 0.6312492918320607, "grad_norm": 0.7841137647628784, "learning_rate": 5.9923633057168954e-06, "loss": 0.355, "step": 28320 }, { "epoch": 0.6313607412126807, "grad_norm": 0.7403837442398071, "learning_rate": 5.989155708705632e-06, "loss": 0.2966, "step": 28325 }, { "epoch": 0.6314721905933007, "grad_norm": 0.5251857042312622, "learning_rate": 5.985948603392923e-06, "loss": 0.2467, "step": 28330 }, { "epoch": 0.6315836399739209, "grad_norm": 0.5293110609054565, "learning_rate": 5.982741990171932e-06, "loss": 0.3541, "step": 28335 }, { "epoch": 0.6316950893545409, "grad_norm": 0.6655994057655334, "learning_rate": 5.979535869435764e-06, "loss": 0.2069, "step": 28340 }, { "epoch": 0.631806538735161, "grad_norm": 0.813114583492279, "learning_rate": 5.976330241577464e-06, "loss": 0.2699, "step": 28345 }, { "epoch": 0.631917988115781, "grad_norm": 0.35635751485824585, "learning_rate": 5.973125106990021e-06, "loss": 0.2387, "step": 28350 }, { "epoch": 0.6320294374964011, "grad_norm": 0.6354801058769226, "learning_rate": 5.9699204660663554e-06, "loss": 0.3574, "step": 28355 }, { "epoch": 0.6321408868770212, "grad_norm": 0.4692534804344177, "learning_rate": 5.96671631919933e-06, "loss": 0.3614, "step": 28360 }, { "epoch": 0.6322523362576412, "grad_norm": 0.5889471769332886, "learning_rate": 5.963512666781754e-06, "loss": 0.3419, "step": 28365 }, { "epoch": 0.6323637856382613, "grad_norm": 0.7773043513298035, "learning_rate": 5.960309509206371e-06, "loss": 0.3964, "step": 28370 }, { "epoch": 0.6324752350188814, "grad_norm": 0.6574989557266235, "learning_rate": 5.95710684686586e-06, "loss": 0.2782, "step": 28375 }, { "epoch": 0.6325866843995015, "grad_norm": 0.8650151491165161, "learning_rate": 5.953904680152841e-06, "loss": 0.2922, "step": 28380 }, { "epoch": 0.6326981337801215, "grad_norm": 0.6198773980140686, "learning_rate": 5.9507030094598794e-06, "loss": 0.2723, "step": 28385 }, { "epoch": 0.6328095831607415, "grad_norm": 0.34654122591018677, "learning_rate": 5.94750183517947e-06, "loss": 0.2669, "step": 28390 }, { "epoch": 0.6329210325413617, "grad_norm": 0.8061956763267517, "learning_rate": 5.9443011577040545e-06, "loss": 0.2312, "step": 28395 }, { "epoch": 0.6330324819219817, "grad_norm": 0.799089252948761, "learning_rate": 5.9411009774260154e-06, "loss": 0.3356, "step": 28400 }, { "epoch": 0.6331439313026018, "grad_norm": 0.45396721363067627, "learning_rate": 5.937901294737667e-06, "loss": 0.1851, "step": 28405 }, { "epoch": 0.6332553806832218, "grad_norm": 0.5224248766899109, "learning_rate": 5.934702110031263e-06, "loss": 0.3263, "step": 28410 }, { "epoch": 0.6333668300638419, "grad_norm": 0.6665075421333313, "learning_rate": 5.931503423699002e-06, "loss": 0.3244, "step": 28415 }, { "epoch": 0.633478279444462, "grad_norm": 0.9394651055335999, "learning_rate": 5.928305236133016e-06, "loss": 0.3177, "step": 28420 }, { "epoch": 0.633589728825082, "grad_norm": 0.7035577297210693, "learning_rate": 5.925107547725382e-06, "loss": 0.3348, "step": 28425 }, { "epoch": 0.6337011782057022, "grad_norm": 0.9956908226013184, "learning_rate": 5.921910358868103e-06, "loss": 0.2843, "step": 28430 }, { "epoch": 0.6338126275863222, "grad_norm": 0.48321256041526794, "learning_rate": 5.918713669953143e-06, "loss": 0.2137, "step": 28435 }, { "epoch": 0.6339240769669423, "grad_norm": 0.597806453704834, "learning_rate": 5.915517481372382e-06, "loss": 0.237, "step": 28440 }, { "epoch": 0.6340355263475623, "grad_norm": 0.7039462327957153, "learning_rate": 5.9123217935176505e-06, "loss": 0.3598, "step": 28445 }, { "epoch": 0.6341469757281823, "grad_norm": 0.7864067554473877, "learning_rate": 5.909126606780717e-06, "loss": 0.2633, "step": 28450 }, { "epoch": 0.6342584251088025, "grad_norm": 0.49334821105003357, "learning_rate": 5.905931921553282e-06, "loss": 0.2726, "step": 28455 }, { "epoch": 0.6343698744894225, "grad_norm": 0.6459826827049255, "learning_rate": 5.9027377382269935e-06, "loss": 0.4212, "step": 28460 }, { "epoch": 0.6344813238700426, "grad_norm": 0.39496904611587524, "learning_rate": 5.899544057193429e-06, "loss": 0.3046, "step": 28465 }, { "epoch": 0.6345927732506627, "grad_norm": 0.708795964717865, "learning_rate": 5.896350878844115e-06, "loss": 0.3406, "step": 28470 }, { "epoch": 0.6347042226312827, "grad_norm": 1.2843683958053589, "learning_rate": 5.893158203570508e-06, "loss": 0.3394, "step": 28475 }, { "epoch": 0.6348156720119028, "grad_norm": 0.5526872277259827, "learning_rate": 5.889966031764007e-06, "loss": 0.279, "step": 28480 }, { "epoch": 0.6349271213925228, "grad_norm": 0.5944620370864868, "learning_rate": 5.886774363815944e-06, "loss": 0.2616, "step": 28485 }, { "epoch": 0.635038570773143, "grad_norm": 0.5583816170692444, "learning_rate": 5.883583200117595e-06, "loss": 0.3036, "step": 28490 }, { "epoch": 0.635150020153763, "grad_norm": 0.36957287788391113, "learning_rate": 5.880392541060174e-06, "loss": 0.3225, "step": 28495 }, { "epoch": 0.6352614695343831, "grad_norm": 0.6170169711112976, "learning_rate": 5.877202387034824e-06, "loss": 0.3002, "step": 28500 }, { "epoch": 0.6353729189150031, "grad_norm": 0.4157882332801819, "learning_rate": 5.874012738432639e-06, "loss": 0.2491, "step": 28505 }, { "epoch": 0.6354843682956232, "grad_norm": 0.4403998553752899, "learning_rate": 5.870823595644648e-06, "loss": 0.2913, "step": 28510 }, { "epoch": 0.6355958176762433, "grad_norm": 0.5965349078178406, "learning_rate": 5.8676349590618095e-06, "loss": 0.3229, "step": 28515 }, { "epoch": 0.6357072670568633, "grad_norm": 0.7371415495872498, "learning_rate": 5.86444682907503e-06, "loss": 0.2964, "step": 28520 }, { "epoch": 0.6358187164374834, "grad_norm": 0.7189732193946838, "learning_rate": 5.861259206075147e-06, "loss": 0.3191, "step": 28525 }, { "epoch": 0.6359301658181035, "grad_norm": 0.8875950574874878, "learning_rate": 5.858072090452939e-06, "loss": 0.3036, "step": 28530 }, { "epoch": 0.6360416151987235, "grad_norm": 0.7388046383857727, "learning_rate": 5.854885482599118e-06, "loss": 0.3218, "step": 28535 }, { "epoch": 0.6361530645793436, "grad_norm": 0.5969105958938599, "learning_rate": 5.8516993829043465e-06, "loss": 0.3614, "step": 28540 }, { "epoch": 0.6362645139599636, "grad_norm": 0.801123321056366, "learning_rate": 5.8485137917592115e-06, "loss": 0.2599, "step": 28545 }, { "epoch": 0.6363759633405838, "grad_norm": 0.5359708666801453, "learning_rate": 5.845328709554237e-06, "loss": 0.3185, "step": 28550 }, { "epoch": 0.6364874127212038, "grad_norm": 0.9001630544662476, "learning_rate": 5.842144136679897e-06, "loss": 0.3986, "step": 28555 }, { "epoch": 0.6365988621018238, "grad_norm": 0.3510948121547699, "learning_rate": 5.838960073526589e-06, "loss": 0.2436, "step": 28560 }, { "epoch": 0.636710311482444, "grad_norm": 0.5558143854141235, "learning_rate": 5.835776520484664e-06, "loss": 0.3203, "step": 28565 }, { "epoch": 0.636821760863064, "grad_norm": 0.7191622257232666, "learning_rate": 5.832593477944386e-06, "loss": 0.327, "step": 28570 }, { "epoch": 0.6369332102436841, "grad_norm": 0.8400184512138367, "learning_rate": 5.829410946295981e-06, "loss": 0.2707, "step": 28575 }, { "epoch": 0.6370446596243041, "grad_norm": 0.6664688587188721, "learning_rate": 5.826228925929607e-06, "loss": 0.2452, "step": 28580 }, { "epoch": 0.6371561090049243, "grad_norm": 0.7116914391517639, "learning_rate": 5.82304741723535e-06, "loss": 0.3667, "step": 28585 }, { "epoch": 0.6372675583855443, "grad_norm": 0.8650065660476685, "learning_rate": 5.819866420603237e-06, "loss": 0.2962, "step": 28590 }, { "epoch": 0.6373790077661643, "grad_norm": 0.3095763623714447, "learning_rate": 5.8166859364232365e-06, "loss": 0.2899, "step": 28595 }, { "epoch": 0.6374904571467844, "grad_norm": 0.6253182291984558, "learning_rate": 5.813505965085245e-06, "loss": 0.3305, "step": 28600 }, { "epoch": 0.6376019065274044, "grad_norm": 0.41500774025917053, "learning_rate": 5.810326506979109e-06, "loss": 0.3192, "step": 28605 }, { "epoch": 0.6377133559080246, "grad_norm": 0.4850252568721771, "learning_rate": 5.8071475624946065e-06, "loss": 0.2845, "step": 28610 }, { "epoch": 0.6378248052886446, "grad_norm": 0.771271824836731, "learning_rate": 5.8039691320214495e-06, "loss": 0.3584, "step": 28615 }, { "epoch": 0.6379362546692646, "grad_norm": 0.5867313742637634, "learning_rate": 5.800791215949289e-06, "loss": 0.4466, "step": 28620 }, { "epoch": 0.6380477040498848, "grad_norm": 0.6335778832435608, "learning_rate": 5.797613814667711e-06, "loss": 0.4406, "step": 28625 }, { "epoch": 0.6381591534305048, "grad_norm": 0.5767678618431091, "learning_rate": 5.794436928566238e-06, "loss": 0.303, "step": 28630 }, { "epoch": 0.6382706028111249, "grad_norm": 0.666451096534729, "learning_rate": 5.7912605580343415e-06, "loss": 0.1921, "step": 28635 }, { "epoch": 0.6383820521917449, "grad_norm": 0.5831393003463745, "learning_rate": 5.7880847034614125e-06, "loss": 0.298, "step": 28640 }, { "epoch": 0.6384935015723651, "grad_norm": 0.6831397414207458, "learning_rate": 5.784909365236784e-06, "loss": 0.3438, "step": 28645 }, { "epoch": 0.6386049509529851, "grad_norm": 0.7521395087242126, "learning_rate": 5.781734543749737e-06, "loss": 0.4049, "step": 28650 }, { "epoch": 0.6387164003336051, "grad_norm": 0.46241921186447144, "learning_rate": 5.7785602393894745e-06, "loss": 0.3837, "step": 28655 }, { "epoch": 0.6388278497142252, "grad_norm": 0.8582910895347595, "learning_rate": 5.775386452545142e-06, "loss": 0.2823, "step": 28660 }, { "epoch": 0.6389392990948453, "grad_norm": 0.6524258852005005, "learning_rate": 5.772213183605817e-06, "loss": 0.3483, "step": 28665 }, { "epoch": 0.6390507484754654, "grad_norm": 0.8292897939682007, "learning_rate": 5.7690404329605255e-06, "loss": 0.2351, "step": 28670 }, { "epoch": 0.6391621978560854, "grad_norm": 0.5087527632713318, "learning_rate": 5.765868200998219e-06, "loss": 0.3299, "step": 28675 }, { "epoch": 0.6392736472367054, "grad_norm": 0.5977237224578857, "learning_rate": 5.7626964881077865e-06, "loss": 0.3581, "step": 28680 }, { "epoch": 0.6393850966173256, "grad_norm": 0.6168895959854126, "learning_rate": 5.75952529467806e-06, "loss": 0.2462, "step": 28685 }, { "epoch": 0.6394965459979456, "grad_norm": 0.6840497255325317, "learning_rate": 5.756354621097802e-06, "loss": 0.4014, "step": 28690 }, { "epoch": 0.6396079953785657, "grad_norm": 0.6457498073577881, "learning_rate": 5.753184467755706e-06, "loss": 0.3981, "step": 28695 }, { "epoch": 0.6397194447591857, "grad_norm": 0.6398038268089294, "learning_rate": 5.7500148350404184e-06, "loss": 0.3208, "step": 28700 }, { "epoch": 0.6398308941398059, "grad_norm": 0.704741358757019, "learning_rate": 5.746845723340509e-06, "loss": 0.4027, "step": 28705 }, { "epoch": 0.6399423435204259, "grad_norm": 0.5821196436882019, "learning_rate": 5.743677133044483e-06, "loss": 0.2992, "step": 28710 }, { "epoch": 0.6400537929010459, "grad_norm": 0.7517719268798828, "learning_rate": 5.7405090645407825e-06, "loss": 0.2866, "step": 28715 }, { "epoch": 0.640165242281666, "grad_norm": 0.5159407258033752, "learning_rate": 5.737341518217796e-06, "loss": 0.2866, "step": 28720 }, { "epoch": 0.6402766916622861, "grad_norm": 0.701301634311676, "learning_rate": 5.734174494463834e-06, "loss": 0.2667, "step": 28725 }, { "epoch": 0.6403881410429062, "grad_norm": 0.48736444115638733, "learning_rate": 5.731007993667155e-06, "loss": 0.2607, "step": 28730 }, { "epoch": 0.6404995904235262, "grad_norm": 0.6046923398971558, "learning_rate": 5.7278420162159455e-06, "loss": 0.2917, "step": 28735 }, { "epoch": 0.6406110398041462, "grad_norm": 0.6669983267784119, "learning_rate": 5.724676562498328e-06, "loss": 0.2034, "step": 28740 }, { "epoch": 0.6407224891847664, "grad_norm": 0.6515795588493347, "learning_rate": 5.721511632902364e-06, "loss": 0.1844, "step": 28745 }, { "epoch": 0.6408339385653864, "grad_norm": 0.38362544775009155, "learning_rate": 5.718347227816044e-06, "loss": 0.2074, "step": 28750 }, { "epoch": 0.6409453879460065, "grad_norm": 0.8220067620277405, "learning_rate": 5.715183347627305e-06, "loss": 0.2547, "step": 28755 }, { "epoch": 0.6410568373266265, "grad_norm": 0.6852700710296631, "learning_rate": 5.7120199927240186e-06, "loss": 0.4051, "step": 28760 }, { "epoch": 0.6411682867072466, "grad_norm": 0.6596646904945374, "learning_rate": 5.708857163493981e-06, "loss": 0.2388, "step": 28765 }, { "epoch": 0.6412797360878667, "grad_norm": 0.8401342034339905, "learning_rate": 5.705694860324935e-06, "loss": 0.301, "step": 28770 }, { "epoch": 0.6413911854684867, "grad_norm": 0.7203656435012817, "learning_rate": 5.702533083604551e-06, "loss": 0.3037, "step": 28775 }, { "epoch": 0.6415026348491069, "grad_norm": 0.509978711605072, "learning_rate": 5.6993718337204394e-06, "loss": 0.3031, "step": 28780 }, { "epoch": 0.6416140842297269, "grad_norm": 0.6036132574081421, "learning_rate": 5.696211111060142e-06, "loss": 0.3665, "step": 28785 }, { "epoch": 0.641725533610347, "grad_norm": 0.613450288772583, "learning_rate": 5.693050916011141e-06, "loss": 0.3481, "step": 28790 }, { "epoch": 0.641836982990967, "grad_norm": 0.8690477609634399, "learning_rate": 5.68989124896086e-06, "loss": 0.2436, "step": 28795 }, { "epoch": 0.641948432371587, "grad_norm": 0.548732340335846, "learning_rate": 5.68673211029664e-06, "loss": 0.2267, "step": 28800 }, { "epoch": 0.6420598817522072, "grad_norm": 0.6367558240890503, "learning_rate": 5.68357350040577e-06, "loss": 0.2659, "step": 28805 }, { "epoch": 0.6421713311328272, "grad_norm": 0.7086297869682312, "learning_rate": 5.680415419675472e-06, "loss": 0.2234, "step": 28810 }, { "epoch": 0.6422827805134473, "grad_norm": 0.7409600019454956, "learning_rate": 5.677257868492898e-06, "loss": 0.3479, "step": 28815 }, { "epoch": 0.6423942298940674, "grad_norm": 0.26541799306869507, "learning_rate": 5.674100847245142e-06, "loss": 0.1698, "step": 28820 }, { "epoch": 0.6425056792746874, "grad_norm": 0.750893235206604, "learning_rate": 5.6709443563192355e-06, "loss": 0.2711, "step": 28825 }, { "epoch": 0.6426171286553075, "grad_norm": 0.6889334917068481, "learning_rate": 5.667788396102136e-06, "loss": 0.3132, "step": 28830 }, { "epoch": 0.6427285780359275, "grad_norm": 0.6183418035507202, "learning_rate": 5.66463296698074e-06, "loss": 0.2488, "step": 28835 }, { "epoch": 0.6428400274165477, "grad_norm": 0.6999242305755615, "learning_rate": 5.661478069341877e-06, "loss": 0.4003, "step": 28840 }, { "epoch": 0.6429514767971677, "grad_norm": 0.8293443322181702, "learning_rate": 5.658323703572313e-06, "loss": 0.2561, "step": 28845 }, { "epoch": 0.6430629261777878, "grad_norm": 0.6160246133804321, "learning_rate": 5.655169870058752e-06, "loss": 0.2749, "step": 28850 }, { "epoch": 0.6431743755584078, "grad_norm": 0.7358884215354919, "learning_rate": 5.652016569187823e-06, "loss": 0.2327, "step": 28855 }, { "epoch": 0.6432858249390279, "grad_norm": 0.5642139315605164, "learning_rate": 5.648863801346108e-06, "loss": 0.3409, "step": 28860 }, { "epoch": 0.643397274319648, "grad_norm": 0.47250843048095703, "learning_rate": 5.645711566920105e-06, "loss": 0.3037, "step": 28865 }, { "epoch": 0.643508723700268, "grad_norm": 0.6742610335350037, "learning_rate": 5.642559866296253e-06, "loss": 0.3204, "step": 28870 }, { "epoch": 0.6436201730808881, "grad_norm": 0.6448606848716736, "learning_rate": 5.639408699860927e-06, "loss": 0.2055, "step": 28875 }, { "epoch": 0.6437316224615082, "grad_norm": 0.35170137882232666, "learning_rate": 5.636258068000433e-06, "loss": 0.2345, "step": 28880 }, { "epoch": 0.6438430718421282, "grad_norm": 0.5862758755683899, "learning_rate": 5.633107971101019e-06, "loss": 0.3759, "step": 28885 }, { "epoch": 0.6439545212227483, "grad_norm": 0.5817404985427856, "learning_rate": 5.629958409548859e-06, "loss": 0.201, "step": 28890 }, { "epoch": 0.6440659706033683, "grad_norm": 0.9205470681190491, "learning_rate": 5.626809383730069e-06, "loss": 0.3098, "step": 28895 }, { "epoch": 0.6441774199839885, "grad_norm": 0.5637964606285095, "learning_rate": 5.623660894030691e-06, "loss": 0.3565, "step": 28900 }, { "epoch": 0.6442888693646085, "grad_norm": 0.5298755168914795, "learning_rate": 5.620512940836711e-06, "loss": 0.3908, "step": 28905 }, { "epoch": 0.6444003187452286, "grad_norm": 0.7002055048942566, "learning_rate": 5.617365524534033e-06, "loss": 0.2143, "step": 28910 }, { "epoch": 0.6445117681258486, "grad_norm": 0.6193220615386963, "learning_rate": 5.614218645508518e-06, "loss": 0.2276, "step": 28915 }, { "epoch": 0.6446232175064687, "grad_norm": 0.5418049097061157, "learning_rate": 5.611072304145944e-06, "loss": 0.4421, "step": 28920 }, { "epoch": 0.6447346668870888, "grad_norm": 0.5820499062538147, "learning_rate": 5.607926500832024e-06, "loss": 0.2382, "step": 28925 }, { "epoch": 0.6448461162677088, "grad_norm": 0.5766317844390869, "learning_rate": 5.604781235952418e-06, "loss": 0.2872, "step": 28930 }, { "epoch": 0.644957565648329, "grad_norm": 0.896270215511322, "learning_rate": 5.601636509892706e-06, "loss": 0.3515, "step": 28935 }, { "epoch": 0.645069015028949, "grad_norm": 0.613178551197052, "learning_rate": 5.5984923230384045e-06, "loss": 0.2528, "step": 28940 }, { "epoch": 0.645180464409569, "grad_norm": 0.7325118184089661, "learning_rate": 5.595348675774972e-06, "loss": 0.2991, "step": 28945 }, { "epoch": 0.6452919137901891, "grad_norm": 0.6555901169776917, "learning_rate": 5.5922055684877956e-06, "loss": 0.2551, "step": 28950 }, { "epoch": 0.6454033631708092, "grad_norm": 0.7699524760246277, "learning_rate": 5.589063001562191e-06, "loss": 0.3519, "step": 28955 }, { "epoch": 0.6455148125514293, "grad_norm": 0.8284532427787781, "learning_rate": 5.585920975383413e-06, "loss": 0.2222, "step": 28960 }, { "epoch": 0.6456262619320493, "grad_norm": 0.41853615641593933, "learning_rate": 5.582779490336656e-06, "loss": 0.2515, "step": 28965 }, { "epoch": 0.6457377113126693, "grad_norm": 0.780164361000061, "learning_rate": 5.579638546807037e-06, "loss": 0.4116, "step": 28970 }, { "epoch": 0.6458491606932895, "grad_norm": 0.6371381878852844, "learning_rate": 5.5764981451796085e-06, "loss": 0.2392, "step": 28975 }, { "epoch": 0.6459606100739095, "grad_norm": 0.6431651711463928, "learning_rate": 5.573358285839367e-06, "loss": 0.3508, "step": 28980 }, { "epoch": 0.6460720594545296, "grad_norm": 0.3787180781364441, "learning_rate": 5.57021896917123e-06, "loss": 0.2437, "step": 28985 }, { "epoch": 0.6461835088351496, "grad_norm": 0.7805829048156738, "learning_rate": 5.567080195560057e-06, "loss": 0.1728, "step": 28990 }, { "epoch": 0.6462949582157698, "grad_norm": 0.7594588398933411, "learning_rate": 5.563941965390631e-06, "loss": 0.3961, "step": 28995 }, { "epoch": 0.6464064075963898, "grad_norm": 0.6046558022499084, "learning_rate": 5.560804279047682e-06, "loss": 0.3928, "step": 29000 }, { "epoch": 0.6465178569770098, "grad_norm": 0.5299666523933411, "learning_rate": 5.557667136915859e-06, "loss": 0.2343, "step": 29005 }, { "epoch": 0.6466293063576299, "grad_norm": 0.5828954577445984, "learning_rate": 5.554530539379759e-06, "loss": 0.2749, "step": 29010 }, { "epoch": 0.64674075573825, "grad_norm": 0.7023400664329529, "learning_rate": 5.551394486823903e-06, "loss": 0.1958, "step": 29015 }, { "epoch": 0.6468522051188701, "grad_norm": 0.7730718851089478, "learning_rate": 5.548258979632743e-06, "loss": 0.2952, "step": 29020 }, { "epoch": 0.6469636544994901, "grad_norm": 0.4209458529949188, "learning_rate": 5.545124018190671e-06, "loss": 0.2644, "step": 29025 }, { "epoch": 0.6470751038801101, "grad_norm": 0.526593029499054, "learning_rate": 5.541989602882003e-06, "loss": 0.1792, "step": 29030 }, { "epoch": 0.6471865532607303, "grad_norm": 0.4635657072067261, "learning_rate": 5.5388557340909995e-06, "loss": 0.2978, "step": 29035 }, { "epoch": 0.6472980026413503, "grad_norm": 0.4689362347126007, "learning_rate": 5.535722412201854e-06, "loss": 0.2753, "step": 29040 }, { "epoch": 0.6474094520219704, "grad_norm": 0.532612681388855, "learning_rate": 5.53258963759868e-06, "loss": 0.3198, "step": 29045 }, { "epoch": 0.6475209014025904, "grad_norm": 0.6630828976631165, "learning_rate": 5.529457410665533e-06, "loss": 0.2862, "step": 29050 }, { "epoch": 0.6476323507832106, "grad_norm": 0.8753209114074707, "learning_rate": 5.526325731786402e-06, "loss": 0.2556, "step": 29055 }, { "epoch": 0.6477438001638306, "grad_norm": 0.49083131551742554, "learning_rate": 5.523194601345199e-06, "loss": 0.2725, "step": 29060 }, { "epoch": 0.6478552495444506, "grad_norm": 0.6012879014015198, "learning_rate": 5.520064019725787e-06, "loss": 0.2719, "step": 29065 }, { "epoch": 0.6479666989250707, "grad_norm": 0.6715513467788696, "learning_rate": 5.516933987311942e-06, "loss": 0.3033, "step": 29070 }, { "epoch": 0.6480781483056908, "grad_norm": 0.72255939245224, "learning_rate": 5.51380450448739e-06, "loss": 0.301, "step": 29075 }, { "epoch": 0.6481895976863109, "grad_norm": 0.6323819756507874, "learning_rate": 5.5106755716357796e-06, "loss": 0.326, "step": 29080 }, { "epoch": 0.6483010470669309, "grad_norm": 0.5553443431854248, "learning_rate": 5.50754718914069e-06, "loss": 0.2591, "step": 29085 }, { "epoch": 0.648412496447551, "grad_norm": 0.8041399717330933, "learning_rate": 5.504419357385639e-06, "loss": 0.3453, "step": 29090 }, { "epoch": 0.6485239458281711, "grad_norm": 0.6085423827171326, "learning_rate": 5.50129207675407e-06, "loss": 0.2194, "step": 29095 }, { "epoch": 0.6486353952087911, "grad_norm": 0.6338213682174683, "learning_rate": 5.498165347629367e-06, "loss": 0.4204, "step": 29100 }, { "epoch": 0.6487468445894112, "grad_norm": 0.97102952003479, "learning_rate": 5.495039170394849e-06, "loss": 0.2265, "step": 29105 }, { "epoch": 0.6488582939700313, "grad_norm": 0.42732176184654236, "learning_rate": 5.491913545433756e-06, "loss": 0.3023, "step": 29110 }, { "epoch": 0.6489697433506513, "grad_norm": 0.7507066130638123, "learning_rate": 5.488788473129264e-06, "loss": 0.2947, "step": 29115 }, { "epoch": 0.6490811927312714, "grad_norm": 0.6872125864028931, "learning_rate": 5.485663953864484e-06, "loss": 0.2854, "step": 29120 }, { "epoch": 0.6491926421118914, "grad_norm": 1.0153363943099976, "learning_rate": 5.482539988022455e-06, "loss": 0.3717, "step": 29125 }, { "epoch": 0.6493040914925116, "grad_norm": 1.0214664936065674, "learning_rate": 5.4794165759861565e-06, "loss": 0.2702, "step": 29130 }, { "epoch": 0.6494155408731316, "grad_norm": 0.470058798789978, "learning_rate": 5.47629371813849e-06, "loss": 0.2975, "step": 29135 }, { "epoch": 0.6495269902537517, "grad_norm": 0.4673933684825897, "learning_rate": 5.473171414862299e-06, "loss": 0.3017, "step": 29140 }, { "epoch": 0.6496384396343717, "grad_norm": 0.741070032119751, "learning_rate": 5.470049666540352e-06, "loss": 0.3026, "step": 29145 }, { "epoch": 0.6497498890149918, "grad_norm": 0.710664689540863, "learning_rate": 5.466928473555347e-06, "loss": 0.2238, "step": 29150 }, { "epoch": 0.6498613383956119, "grad_norm": 0.4912746846675873, "learning_rate": 5.463807836289921e-06, "loss": 0.2674, "step": 29155 }, { "epoch": 0.6499727877762319, "grad_norm": 0.48481565713882446, "learning_rate": 5.460687755126641e-06, "loss": 0.342, "step": 29160 }, { "epoch": 0.650084237156852, "grad_norm": 0.6787352561950684, "learning_rate": 5.457568230448005e-06, "loss": 0.3587, "step": 29165 }, { "epoch": 0.6501956865374721, "grad_norm": 0.5041036605834961, "learning_rate": 5.454449262636443e-06, "loss": 0.2712, "step": 29170 }, { "epoch": 0.6503071359180921, "grad_norm": 0.8429849743843079, "learning_rate": 5.4513308520743105e-06, "loss": 0.3708, "step": 29175 }, { "epoch": 0.6504185852987122, "grad_norm": 0.7078865766525269, "learning_rate": 5.448212999143909e-06, "loss": 0.3688, "step": 29180 }, { "epoch": 0.6505300346793322, "grad_norm": 0.44548001885414124, "learning_rate": 5.445095704227459e-06, "loss": 0.2585, "step": 29185 }, { "epoch": 0.6506414840599524, "grad_norm": 0.5557127594947815, "learning_rate": 5.4419789677071135e-06, "loss": 0.2925, "step": 29190 }, { "epoch": 0.6507529334405724, "grad_norm": 0.5642905831336975, "learning_rate": 5.438862789964969e-06, "loss": 0.2724, "step": 29195 }, { "epoch": 0.6508643828211925, "grad_norm": 0.38652265071868896, "learning_rate": 5.435747171383039e-06, "loss": 0.2282, "step": 29200 }, { "epoch": 0.6509758322018125, "grad_norm": 0.721820592880249, "learning_rate": 5.432632112343274e-06, "loss": 0.3158, "step": 29205 }, { "epoch": 0.6510872815824326, "grad_norm": 0.8188399076461792, "learning_rate": 5.429517613227555e-06, "loss": 0.4775, "step": 29210 }, { "epoch": 0.6511987309630527, "grad_norm": 0.5114537477493286, "learning_rate": 5.426403674417701e-06, "loss": 0.246, "step": 29215 }, { "epoch": 0.6513101803436727, "grad_norm": 0.6441953182220459, "learning_rate": 5.423290296295452e-06, "loss": 0.2664, "step": 29220 }, { "epoch": 0.6514216297242928, "grad_norm": 0.7435140013694763, "learning_rate": 5.420177479242488e-06, "loss": 0.2502, "step": 29225 }, { "epoch": 0.6515330791049129, "grad_norm": 0.6647602319717407, "learning_rate": 5.4170652236404144e-06, "loss": 0.2905, "step": 29230 }, { "epoch": 0.6516445284855329, "grad_norm": 0.4995235502719879, "learning_rate": 5.413953529870769e-06, "loss": 0.3982, "step": 29235 }, { "epoch": 0.651755977866153, "grad_norm": 0.8494893312454224, "learning_rate": 5.410842398315022e-06, "loss": 0.2187, "step": 29240 }, { "epoch": 0.651867427246773, "grad_norm": 0.42740708589553833, "learning_rate": 5.407731829354572e-06, "loss": 0.3034, "step": 29245 }, { "epoch": 0.6519788766273932, "grad_norm": 0.7548797726631165, "learning_rate": 5.4046218233707505e-06, "loss": 0.2383, "step": 29250 }, { "epoch": 0.6520903260080132, "grad_norm": 0.5193314552307129, "learning_rate": 5.4015123807448286e-06, "loss": 0.3382, "step": 29255 }, { "epoch": 0.6522017753886333, "grad_norm": 0.4508189260959625, "learning_rate": 5.3984035018579924e-06, "loss": 0.4111, "step": 29260 }, { "epoch": 0.6523132247692534, "grad_norm": 0.7551596760749817, "learning_rate": 5.39529518709137e-06, "loss": 0.343, "step": 29265 }, { "epoch": 0.6524246741498734, "grad_norm": 0.5172317624092102, "learning_rate": 5.392187436826012e-06, "loss": 0.3155, "step": 29270 }, { "epoch": 0.6525361235304935, "grad_norm": 0.7789238691329956, "learning_rate": 5.3890802514429045e-06, "loss": 0.2763, "step": 29275 }, { "epoch": 0.6526475729111135, "grad_norm": 0.43609416484832764, "learning_rate": 5.3859736313229715e-06, "loss": 0.311, "step": 29280 }, { "epoch": 0.6527590222917337, "grad_norm": 0.8336686491966248, "learning_rate": 5.382867576847053e-06, "loss": 0.3602, "step": 29285 }, { "epoch": 0.6528704716723537, "grad_norm": 0.5604103803634644, "learning_rate": 5.379762088395935e-06, "loss": 0.3437, "step": 29290 }, { "epoch": 0.6529819210529737, "grad_norm": 0.3999236226081848, "learning_rate": 5.3766571663503205e-06, "loss": 0.2477, "step": 29295 }, { "epoch": 0.6530933704335938, "grad_norm": 0.5317768454551697, "learning_rate": 5.373552811090852e-06, "loss": 0.2595, "step": 29300 }, { "epoch": 0.6532048198142139, "grad_norm": 0.6594027280807495, "learning_rate": 5.3704490229980975e-06, "loss": 0.2629, "step": 29305 }, { "epoch": 0.653316269194834, "grad_norm": 0.7733690738677979, "learning_rate": 5.367345802452555e-06, "loss": 0.2808, "step": 29310 }, { "epoch": 0.653427718575454, "grad_norm": 0.3656593859195709, "learning_rate": 5.364243149834658e-06, "loss": 0.1783, "step": 29315 }, { "epoch": 0.653539167956074, "grad_norm": 0.8090857863426208, "learning_rate": 5.361141065524773e-06, "loss": 0.2217, "step": 29320 }, { "epoch": 0.6536506173366942, "grad_norm": 0.5806332230567932, "learning_rate": 5.358039549903186e-06, "loss": 0.283, "step": 29325 }, { "epoch": 0.6537620667173142, "grad_norm": 0.4583825170993805, "learning_rate": 5.354938603350119e-06, "loss": 0.2812, "step": 29330 }, { "epoch": 0.6538735160979343, "grad_norm": 0.4804151952266693, "learning_rate": 5.3518382262457265e-06, "loss": 0.2342, "step": 29335 }, { "epoch": 0.6539849654785543, "grad_norm": 0.5588163733482361, "learning_rate": 5.348738418970084e-06, "loss": 0.2548, "step": 29340 }, { "epoch": 0.6540964148591745, "grad_norm": 0.5237212777137756, "learning_rate": 5.345639181903214e-06, "loss": 0.2743, "step": 29345 }, { "epoch": 0.6542078642397945, "grad_norm": 0.49168357253074646, "learning_rate": 5.34254051542505e-06, "loss": 0.2248, "step": 29350 }, { "epoch": 0.6543193136204145, "grad_norm": 0.34626704454421997, "learning_rate": 5.339442419915472e-06, "loss": 0.3692, "step": 29355 }, { "epoch": 0.6544307630010346, "grad_norm": 0.8723349571228027, "learning_rate": 5.336344895754279e-06, "loss": 0.3255, "step": 29360 }, { "epoch": 0.6545422123816547, "grad_norm": 0.5037609934806824, "learning_rate": 5.333247943321205e-06, "loss": 0.2013, "step": 29365 }, { "epoch": 0.6546536617622748, "grad_norm": 0.5711085200309753, "learning_rate": 5.330151562995908e-06, "loss": 0.2588, "step": 29370 }, { "epoch": 0.6547651111428948, "grad_norm": 0.5225061178207397, "learning_rate": 5.327055755157986e-06, "loss": 0.2935, "step": 29375 }, { "epoch": 0.6548765605235148, "grad_norm": 0.7341488599777222, "learning_rate": 5.323960520186959e-06, "loss": 0.2403, "step": 29380 }, { "epoch": 0.654988009904135, "grad_norm": 0.5305912494659424, "learning_rate": 5.3208658584622765e-06, "loss": 0.3251, "step": 29385 }, { "epoch": 0.655099459284755, "grad_norm": 0.5498141646385193, "learning_rate": 5.317771770363325e-06, "loss": 0.3644, "step": 29390 }, { "epoch": 0.6552109086653751, "grad_norm": 0.9295704960823059, "learning_rate": 5.314678256269413e-06, "loss": 0.2262, "step": 29395 }, { "epoch": 0.6553223580459951, "grad_norm": 0.522474467754364, "learning_rate": 5.311585316559782e-06, "loss": 0.3555, "step": 29400 }, { "epoch": 0.6554338074266153, "grad_norm": 0.6643032431602478, "learning_rate": 5.308492951613597e-06, "loss": 0.3139, "step": 29405 }, { "epoch": 0.6555452568072353, "grad_norm": 0.9323588013648987, "learning_rate": 5.305401161809969e-06, "loss": 0.4577, "step": 29410 }, { "epoch": 0.6556567061878553, "grad_norm": 0.7106078863143921, "learning_rate": 5.3023099475279216e-06, "loss": 0.2728, "step": 29415 }, { "epoch": 0.6557681555684755, "grad_norm": 0.6369653344154358, "learning_rate": 5.299219309146411e-06, "loss": 0.3196, "step": 29420 }, { "epoch": 0.6558796049490955, "grad_norm": 0.5267141461372375, "learning_rate": 5.29612924704433e-06, "loss": 0.223, "step": 29425 }, { "epoch": 0.6559910543297156, "grad_norm": 0.4096876084804535, "learning_rate": 5.293039761600496e-06, "loss": 0.2565, "step": 29430 }, { "epoch": 0.6561025037103356, "grad_norm": 0.7868233919143677, "learning_rate": 5.2899508531936526e-06, "loss": 0.2941, "step": 29435 }, { "epoch": 0.6562139530909556, "grad_norm": 0.8961450457572937, "learning_rate": 5.28686252220248e-06, "loss": 0.275, "step": 29440 }, { "epoch": 0.6563254024715758, "grad_norm": 0.6578887104988098, "learning_rate": 5.283774769005585e-06, "loss": 0.1974, "step": 29445 }, { "epoch": 0.6564368518521958, "grad_norm": 0.38261333107948303, "learning_rate": 5.280687593981497e-06, "loss": 0.3405, "step": 29450 }, { "epoch": 0.6565483012328159, "grad_norm": 0.85776686668396, "learning_rate": 5.277600997508681e-06, "loss": 0.3478, "step": 29455 }, { "epoch": 0.656659750613436, "grad_norm": 0.46715691685676575, "learning_rate": 5.274514979965535e-06, "loss": 0.1674, "step": 29460 }, { "epoch": 0.6567711999940561, "grad_norm": 0.4936739206314087, "learning_rate": 5.2714295417303715e-06, "loss": 0.3113, "step": 29465 }, { "epoch": 0.6568826493746761, "grad_norm": 0.7164607644081116, "learning_rate": 5.268344683181452e-06, "loss": 0.3478, "step": 29470 }, { "epoch": 0.6569940987552961, "grad_norm": 0.6708316206932068, "learning_rate": 5.265260404696952e-06, "loss": 0.2512, "step": 29475 }, { "epoch": 0.6571055481359163, "grad_norm": 0.6708416938781738, "learning_rate": 5.26217670665498e-06, "loss": 0.3026, "step": 29480 }, { "epoch": 0.6572169975165363, "grad_norm": 0.6250762939453125, "learning_rate": 5.259093589433573e-06, "loss": 0.2766, "step": 29485 }, { "epoch": 0.6573284468971564, "grad_norm": 0.6325827240943909, "learning_rate": 5.2560110534106944e-06, "loss": 0.2531, "step": 29490 }, { "epoch": 0.6574398962777764, "grad_norm": 0.6853511333465576, "learning_rate": 5.252929098964246e-06, "loss": 0.3013, "step": 29495 }, { "epoch": 0.6575513456583965, "grad_norm": 0.6903412938117981, "learning_rate": 5.249847726472044e-06, "loss": 0.3223, "step": 29500 }, { "epoch": 0.6576627950390166, "grad_norm": 0.8328612446784973, "learning_rate": 5.246766936311849e-06, "loss": 0.4012, "step": 29505 }, { "epoch": 0.6577742444196366, "grad_norm": 0.6383739113807678, "learning_rate": 5.243686728861339e-06, "loss": 0.2335, "step": 29510 }, { "epoch": 0.6578856938002567, "grad_norm": 0.3972104787826538, "learning_rate": 5.240607104498123e-06, "loss": 0.3859, "step": 29515 }, { "epoch": 0.6579971431808768, "grad_norm": 0.8156102299690247, "learning_rate": 5.237528063599739e-06, "loss": 0.3795, "step": 29520 }, { "epoch": 0.6581085925614968, "grad_norm": 0.7300518155097961, "learning_rate": 5.23444960654365e-06, "loss": 0.3428, "step": 29525 }, { "epoch": 0.6582200419421169, "grad_norm": 0.9030614495277405, "learning_rate": 5.231371733707253e-06, "loss": 0.2376, "step": 29530 }, { "epoch": 0.6583314913227369, "grad_norm": 0.6202174425125122, "learning_rate": 5.2282944454678795e-06, "loss": 0.2795, "step": 29535 }, { "epoch": 0.6584429407033571, "grad_norm": 0.6512044072151184, "learning_rate": 5.225217742202775e-06, "loss": 0.3253, "step": 29540 }, { "epoch": 0.6585543900839771, "grad_norm": 0.42493924498558044, "learning_rate": 5.222141624289118e-06, "loss": 0.2762, "step": 29545 }, { "epoch": 0.6586658394645972, "grad_norm": 0.5045167207717896, "learning_rate": 5.219066092104019e-06, "loss": 0.2826, "step": 29550 }, { "epoch": 0.6587772888452172, "grad_norm": 0.5697253346443176, "learning_rate": 5.21599114602451e-06, "loss": 0.2514, "step": 29555 }, { "epoch": 0.6588887382258373, "grad_norm": 0.6183579564094543, "learning_rate": 5.212916786427562e-06, "loss": 0.2274, "step": 29560 }, { "epoch": 0.6590001876064574, "grad_norm": 0.4757140278816223, "learning_rate": 5.2098430136900665e-06, "loss": 0.2824, "step": 29565 }, { "epoch": 0.6591116369870774, "grad_norm": 0.5445846319198608, "learning_rate": 5.206769828188844e-06, "loss": 0.3084, "step": 29570 }, { "epoch": 0.6592230863676976, "grad_norm": 0.7208926677703857, "learning_rate": 5.203697230300643e-06, "loss": 0.2515, "step": 29575 }, { "epoch": 0.6593345357483176, "grad_norm": 0.6429301500320435, "learning_rate": 5.200625220402139e-06, "loss": 0.3822, "step": 29580 }, { "epoch": 0.6594459851289376, "grad_norm": 0.575545608997345, "learning_rate": 5.197553798869939e-06, "loss": 0.2806, "step": 29585 }, { "epoch": 0.6595574345095577, "grad_norm": 0.5600603222846985, "learning_rate": 5.1944829660805675e-06, "loss": 0.3525, "step": 29590 }, { "epoch": 0.6596688838901777, "grad_norm": 0.49977508187294006, "learning_rate": 5.1914127224104935e-06, "loss": 0.2609, "step": 29595 }, { "epoch": 0.6597803332707979, "grad_norm": 0.5002358555793762, "learning_rate": 5.188343068236106e-06, "loss": 0.2928, "step": 29600 }, { "epoch": 0.6598917826514179, "grad_norm": 0.7009340524673462, "learning_rate": 5.185274003933719e-06, "loss": 0.1986, "step": 29605 }, { "epoch": 0.660003232032038, "grad_norm": 0.5574341416358948, "learning_rate": 5.1822055298795744e-06, "loss": 0.3342, "step": 29610 }, { "epoch": 0.660114681412658, "grad_norm": 0.5994423031806946, "learning_rate": 5.179137646449845e-06, "loss": 0.2479, "step": 29615 }, { "epoch": 0.6602261307932781, "grad_norm": 0.8221856951713562, "learning_rate": 5.176070354020624e-06, "loss": 0.2727, "step": 29620 }, { "epoch": 0.6603375801738982, "grad_norm": 0.6682111024856567, "learning_rate": 5.173003652967947e-06, "loss": 0.2282, "step": 29625 }, { "epoch": 0.6604490295545182, "grad_norm": 0.5746262073516846, "learning_rate": 5.169937543667759e-06, "loss": 0.2221, "step": 29630 }, { "epoch": 0.6605604789351384, "grad_norm": 0.6253163814544678, "learning_rate": 5.16687202649595e-06, "loss": 0.265, "step": 29635 }, { "epoch": 0.6606719283157584, "grad_norm": 0.5271322131156921, "learning_rate": 5.163807101828324e-06, "loss": 0.2783, "step": 29640 }, { "epoch": 0.6607833776963784, "grad_norm": 0.7899476289749146, "learning_rate": 5.160742770040619e-06, "loss": 0.4335, "step": 29645 }, { "epoch": 0.6608948270769985, "grad_norm": 0.5249226093292236, "learning_rate": 5.157679031508492e-06, "loss": 0.2563, "step": 29650 }, { "epoch": 0.6610062764576186, "grad_norm": 0.8132089376449585, "learning_rate": 5.154615886607544e-06, "loss": 0.2834, "step": 29655 }, { "epoch": 0.6611177258382387, "grad_norm": 0.8233852386474609, "learning_rate": 5.151553335713286e-06, "loss": 0.4334, "step": 29660 }, { "epoch": 0.6612291752188587, "grad_norm": 0.6578496098518372, "learning_rate": 5.148491379201161e-06, "loss": 0.2377, "step": 29665 }, { "epoch": 0.6613406245994788, "grad_norm": 0.6311374306678772, "learning_rate": 5.145430017446551e-06, "loss": 0.2669, "step": 29670 }, { "epoch": 0.6614520739800989, "grad_norm": 0.761950671672821, "learning_rate": 5.142369250824747e-06, "loss": 0.255, "step": 29675 }, { "epoch": 0.6615635233607189, "grad_norm": 0.6191284656524658, "learning_rate": 5.1393090797109745e-06, "loss": 0.2739, "step": 29680 }, { "epoch": 0.661674972741339, "grad_norm": 0.47322070598602295, "learning_rate": 5.136249504480395e-06, "loss": 0.2319, "step": 29685 }, { "epoch": 0.661786422121959, "grad_norm": 0.8531567454338074, "learning_rate": 5.133190525508083e-06, "loss": 0.2633, "step": 29690 }, { "epoch": 0.6618978715025792, "grad_norm": 0.32951730489730835, "learning_rate": 5.130132143169046e-06, "loss": 0.2739, "step": 29695 }, { "epoch": 0.6620093208831992, "grad_norm": 0.760873019695282, "learning_rate": 5.127074357838218e-06, "loss": 0.3018, "step": 29700 }, { "epoch": 0.6621207702638192, "grad_norm": 0.6249333620071411, "learning_rate": 5.124017169890458e-06, "loss": 0.1994, "step": 29705 }, { "epoch": 0.6622322196444393, "grad_norm": 0.588768720626831, "learning_rate": 5.12096057970056e-06, "loss": 0.3882, "step": 29710 }, { "epoch": 0.6623436690250594, "grad_norm": 0.4670078456401825, "learning_rate": 5.117904587643231e-06, "loss": 0.3119, "step": 29715 }, { "epoch": 0.6624551184056795, "grad_norm": 0.7735868096351624, "learning_rate": 5.114849194093119e-06, "loss": 0.3149, "step": 29720 }, { "epoch": 0.6625665677862995, "grad_norm": 0.601441502571106, "learning_rate": 5.1117943994247875e-06, "loss": 0.2163, "step": 29725 }, { "epoch": 0.6626780171669195, "grad_norm": 0.617202877998352, "learning_rate": 5.108740204012732e-06, "loss": 0.2872, "step": 29730 }, { "epoch": 0.6627894665475397, "grad_norm": 0.626083254814148, "learning_rate": 5.105686608231373e-06, "loss": 0.316, "step": 29735 }, { "epoch": 0.6629009159281597, "grad_norm": 0.7950212359428406, "learning_rate": 5.1026336124550545e-06, "loss": 0.3991, "step": 29740 }, { "epoch": 0.6630123653087798, "grad_norm": 0.7873651385307312, "learning_rate": 5.099581217058052e-06, "loss": 0.3129, "step": 29745 }, { "epoch": 0.6631238146893998, "grad_norm": 0.576119065284729, "learning_rate": 5.096529422414571e-06, "loss": 0.351, "step": 29750 }, { "epoch": 0.66323526407002, "grad_norm": 0.7593878507614136, "learning_rate": 5.093478228898734e-06, "loss": 0.2498, "step": 29755 }, { "epoch": 0.66334671345064, "grad_norm": 0.5120570063591003, "learning_rate": 5.090427636884593e-06, "loss": 0.2429, "step": 29760 }, { "epoch": 0.66345816283126, "grad_norm": 0.24186640977859497, "learning_rate": 5.087377646746128e-06, "loss": 0.251, "step": 29765 }, { "epoch": 0.6635696122118802, "grad_norm": 0.586061418056488, "learning_rate": 5.084328258857241e-06, "loss": 0.2237, "step": 29770 }, { "epoch": 0.6636810615925002, "grad_norm": 0.36794933676719666, "learning_rate": 5.081279473591765e-06, "loss": 0.2927, "step": 29775 }, { "epoch": 0.6637925109731203, "grad_norm": 0.5301176309585571, "learning_rate": 5.078231291323463e-06, "loss": 0.3115, "step": 29780 }, { "epoch": 0.6639039603537403, "grad_norm": 0.712449848651886, "learning_rate": 5.075183712426014e-06, "loss": 0.2553, "step": 29785 }, { "epoch": 0.6640154097343604, "grad_norm": 0.803022027015686, "learning_rate": 5.072136737273029e-06, "loss": 0.1746, "step": 29790 }, { "epoch": 0.6641268591149805, "grad_norm": 0.48893946409225464, "learning_rate": 5.069090366238042e-06, "loss": 0.1848, "step": 29795 }, { "epoch": 0.6642383084956005, "grad_norm": 0.5302146077156067, "learning_rate": 5.066044599694515e-06, "loss": 0.298, "step": 29800 }, { "epoch": 0.6643497578762206, "grad_norm": 0.5051549673080444, "learning_rate": 5.062999438015834e-06, "loss": 0.2469, "step": 29805 }, { "epoch": 0.6644612072568407, "grad_norm": 0.7402228116989136, "learning_rate": 5.059954881575313e-06, "loss": 0.3926, "step": 29810 }, { "epoch": 0.6645726566374608, "grad_norm": 0.3407340943813324, "learning_rate": 5.056910930746195e-06, "loss": 0.2767, "step": 29815 }, { "epoch": 0.6646841060180808, "grad_norm": 0.5036594271659851, "learning_rate": 5.0538675859016425e-06, "loss": 0.3033, "step": 29820 }, { "epoch": 0.6647955553987008, "grad_norm": 0.4008634388446808, "learning_rate": 5.0508248474147455e-06, "loss": 0.3629, "step": 29825 }, { "epoch": 0.664907004779321, "grad_norm": 0.9136576652526855, "learning_rate": 5.047782715658523e-06, "loss": 0.4994, "step": 29830 }, { "epoch": 0.665018454159941, "grad_norm": 0.7496482729911804, "learning_rate": 5.044741191005908e-06, "loss": 0.3205, "step": 29835 }, { "epoch": 0.6651299035405611, "grad_norm": 0.6265192031860352, "learning_rate": 5.041700273829778e-06, "loss": 0.2063, "step": 29840 }, { "epoch": 0.6652413529211811, "grad_norm": 0.591755747795105, "learning_rate": 5.038659964502919e-06, "loss": 0.244, "step": 29845 }, { "epoch": 0.6653528023018012, "grad_norm": 0.6225812435150146, "learning_rate": 5.035620263398056e-06, "loss": 0.3191, "step": 29850 }, { "epoch": 0.6654642516824213, "grad_norm": 0.43200963735580444, "learning_rate": 5.032581170887831e-06, "loss": 0.198, "step": 29855 }, { "epoch": 0.6655757010630413, "grad_norm": 0.43655383586883545, "learning_rate": 5.02954268734481e-06, "loss": 0.3137, "step": 29860 }, { "epoch": 0.6656871504436614, "grad_norm": 0.5098513960838318, "learning_rate": 5.026504813141487e-06, "loss": 0.3615, "step": 29865 }, { "epoch": 0.6657985998242815, "grad_norm": 0.764817476272583, "learning_rate": 5.023467548650288e-06, "loss": 0.224, "step": 29870 }, { "epoch": 0.6659100492049015, "grad_norm": 0.5321330428123474, "learning_rate": 5.020430894243556e-06, "loss": 0.3052, "step": 29875 }, { "epoch": 0.6660214985855216, "grad_norm": 0.48160234093666077, "learning_rate": 5.017394850293553e-06, "loss": 0.2948, "step": 29880 }, { "epoch": 0.6661329479661416, "grad_norm": 0.6200422048568726, "learning_rate": 5.0143594171724875e-06, "loss": 0.2417, "step": 29885 }, { "epoch": 0.6662443973467618, "grad_norm": 0.7394756078720093, "learning_rate": 5.011324595252474e-06, "loss": 0.305, "step": 29890 }, { "epoch": 0.6663558467273818, "grad_norm": 0.6181401610374451, "learning_rate": 5.0082903849055585e-06, "loss": 0.3193, "step": 29895 }, { "epoch": 0.6664672961080019, "grad_norm": 0.46570515632629395, "learning_rate": 5.0052567865037075e-06, "loss": 0.2006, "step": 29900 }, { "epoch": 0.666578745488622, "grad_norm": 0.6175560355186462, "learning_rate": 5.002223800418824e-06, "loss": 0.3857, "step": 29905 }, { "epoch": 0.666690194869242, "grad_norm": 0.40701529383659363, "learning_rate": 4.9991914270227274e-06, "loss": 0.3133, "step": 29910 }, { "epoch": 0.6668016442498621, "grad_norm": 0.529159665107727, "learning_rate": 4.996159666687156e-06, "loss": 0.2521, "step": 29915 }, { "epoch": 0.6669130936304821, "grad_norm": 0.5332446694374084, "learning_rate": 4.993128519783791e-06, "loss": 0.1586, "step": 29920 }, { "epoch": 0.6670245430111023, "grad_norm": 0.7735430598258972, "learning_rate": 4.990097986684221e-06, "loss": 0.204, "step": 29925 }, { "epoch": 0.6671359923917223, "grad_norm": 0.5558745861053467, "learning_rate": 4.987068067759965e-06, "loss": 0.361, "step": 29930 }, { "epoch": 0.6672474417723423, "grad_norm": 0.6907291412353516, "learning_rate": 4.984038763382473e-06, "loss": 0.3075, "step": 29935 }, { "epoch": 0.6673588911529624, "grad_norm": 0.3587920069694519, "learning_rate": 4.981010073923112e-06, "loss": 0.3012, "step": 29940 }, { "epoch": 0.6674703405335825, "grad_norm": 0.6192587018013, "learning_rate": 4.977981999753174e-06, "loss": 0.3315, "step": 29945 }, { "epoch": 0.6675817899142026, "grad_norm": 0.5411548614501953, "learning_rate": 4.974954541243876e-06, "loss": 0.4462, "step": 29950 }, { "epoch": 0.6676932392948226, "grad_norm": 0.7996327877044678, "learning_rate": 4.971927698766367e-06, "loss": 0.1447, "step": 29955 }, { "epoch": 0.6678046886754427, "grad_norm": 0.7160996198654175, "learning_rate": 4.9689014726917085e-06, "loss": 0.3103, "step": 29960 }, { "epoch": 0.6679161380560628, "grad_norm": 0.5828596353530884, "learning_rate": 4.965875863390898e-06, "loss": 0.352, "step": 29965 }, { "epoch": 0.6680275874366828, "grad_norm": 0.47592854499816895, "learning_rate": 4.96285087123485e-06, "loss": 0.3189, "step": 29970 }, { "epoch": 0.6681390368173029, "grad_norm": 0.5826191902160645, "learning_rate": 4.9598264965944044e-06, "loss": 0.2964, "step": 29975 }, { "epoch": 0.6682504861979229, "grad_norm": 0.5355210304260254, "learning_rate": 4.956802739840325e-06, "loss": 0.2218, "step": 29980 }, { "epoch": 0.6683619355785431, "grad_norm": 0.9146451950073242, "learning_rate": 4.953779601343299e-06, "loss": 0.2733, "step": 29985 }, { "epoch": 0.6684733849591631, "grad_norm": 0.916202962398529, "learning_rate": 4.9507570814739435e-06, "loss": 0.2596, "step": 29990 }, { "epoch": 0.6685848343397831, "grad_norm": 0.873465895652771, "learning_rate": 4.9477351806028e-06, "loss": 0.4759, "step": 29995 }, { "epoch": 0.6686962837204032, "grad_norm": 0.6375863552093506, "learning_rate": 4.944713899100324e-06, "loss": 0.1947, "step": 30000 }, { "epoch": 0.6688077331010233, "grad_norm": 0.5613588690757751, "learning_rate": 4.941693237336904e-06, "loss": 0.3548, "step": 30005 }, { "epoch": 0.6689191824816434, "grad_norm": 0.5151784420013428, "learning_rate": 4.938673195682849e-06, "loss": 0.2757, "step": 30010 }, { "epoch": 0.6690306318622634, "grad_norm": 0.433434396982193, "learning_rate": 4.935653774508393e-06, "loss": 0.2052, "step": 30015 }, { "epoch": 0.6691420812428835, "grad_norm": 0.7155526280403137, "learning_rate": 4.93263497418369e-06, "loss": 0.2852, "step": 30020 }, { "epoch": 0.6692535306235036, "grad_norm": 0.8238219022750854, "learning_rate": 4.929616795078825e-06, "loss": 0.3083, "step": 30025 }, { "epoch": 0.6693649800041236, "grad_norm": 0.531947135925293, "learning_rate": 4.926599237563807e-06, "loss": 0.2996, "step": 30030 }, { "epoch": 0.6694764293847437, "grad_norm": 0.6209695339202881, "learning_rate": 4.923582302008562e-06, "loss": 0.3227, "step": 30035 }, { "epoch": 0.6695878787653637, "grad_norm": 0.6829642057418823, "learning_rate": 4.920565988782943e-06, "loss": 0.3823, "step": 30040 }, { "epoch": 0.6696993281459839, "grad_norm": 1.1301989555358887, "learning_rate": 4.917550298256726e-06, "loss": 0.2317, "step": 30045 }, { "epoch": 0.6698107775266039, "grad_norm": 0.6049754023551941, "learning_rate": 4.914535230799609e-06, "loss": 0.341, "step": 30050 }, { "epoch": 0.6699222269072239, "grad_norm": 0.8974992036819458, "learning_rate": 4.911520786781218e-06, "loss": 0.191, "step": 30055 }, { "epoch": 0.670033676287844, "grad_norm": 0.7844485640525818, "learning_rate": 4.908506966571106e-06, "loss": 0.2516, "step": 30060 }, { "epoch": 0.6701451256684641, "grad_norm": 0.5584155321121216, "learning_rate": 4.905493770538739e-06, "loss": 0.3154, "step": 30065 }, { "epoch": 0.6702565750490842, "grad_norm": 0.5932765603065491, "learning_rate": 4.902481199053512e-06, "loss": 0.3324, "step": 30070 }, { "epoch": 0.6703680244297042, "grad_norm": 0.6151829957962036, "learning_rate": 4.899469252484744e-06, "loss": 0.282, "step": 30075 }, { "epoch": 0.6704794738103242, "grad_norm": 0.6490795016288757, "learning_rate": 4.896457931201671e-06, "loss": 0.2691, "step": 30080 }, { "epoch": 0.6705909231909444, "grad_norm": 0.4294373393058777, "learning_rate": 4.8934472355734675e-06, "loss": 0.3277, "step": 30085 }, { "epoch": 0.6707023725715644, "grad_norm": 0.48455068469047546, "learning_rate": 4.890437165969212e-06, "loss": 0.2612, "step": 30090 }, { "epoch": 0.6708138219521845, "grad_norm": 0.6728994250297546, "learning_rate": 4.887427722757924e-06, "loss": 0.2395, "step": 30095 }, { "epoch": 0.6709252713328046, "grad_norm": 0.431524395942688, "learning_rate": 4.884418906308533e-06, "loss": 0.2241, "step": 30100 }, { "epoch": 0.6710367207134247, "grad_norm": 0.7784751653671265, "learning_rate": 4.881410716989899e-06, "loss": 0.2834, "step": 30105 }, { "epoch": 0.6711481700940447, "grad_norm": 0.8710717558860779, "learning_rate": 4.878403155170801e-06, "loss": 0.3092, "step": 30110 }, { "epoch": 0.6712596194746647, "grad_norm": 0.6971104145050049, "learning_rate": 4.87539622121994e-06, "loss": 0.2769, "step": 30115 }, { "epoch": 0.6713710688552849, "grad_norm": 0.7268978953361511, "learning_rate": 4.872389915505951e-06, "loss": 0.3372, "step": 30120 }, { "epoch": 0.6714825182359049, "grad_norm": 0.5553570985794067, "learning_rate": 4.869384238397375e-06, "loss": 0.2756, "step": 30125 }, { "epoch": 0.671593967616525, "grad_norm": 0.554465115070343, "learning_rate": 4.866379190262692e-06, "loss": 0.2625, "step": 30130 }, { "epoch": 0.671705416997145, "grad_norm": 0.904084324836731, "learning_rate": 4.863374771470296e-06, "loss": 0.2299, "step": 30135 }, { "epoch": 0.671816866377765, "grad_norm": 0.6341699957847595, "learning_rate": 4.860370982388504e-06, "loss": 0.2554, "step": 30140 }, { "epoch": 0.6719283157583852, "grad_norm": 0.4441910684108734, "learning_rate": 4.8573678233855534e-06, "loss": 0.2788, "step": 30145 }, { "epoch": 0.6720397651390052, "grad_norm": 0.45818713307380676, "learning_rate": 4.854365294829617e-06, "loss": 0.2327, "step": 30150 }, { "epoch": 0.6721512145196253, "grad_norm": 0.771054744720459, "learning_rate": 4.851363397088777e-06, "loss": 0.2874, "step": 30155 }, { "epoch": 0.6722626639002454, "grad_norm": 0.7584509253501892, "learning_rate": 4.848362130531039e-06, "loss": 0.3167, "step": 30160 }, { "epoch": 0.6723741132808655, "grad_norm": 0.45884889364242554, "learning_rate": 4.845361495524343e-06, "loss": 0.2909, "step": 30165 }, { "epoch": 0.6724855626614855, "grad_norm": 0.36979031562805176, "learning_rate": 4.842361492436541e-06, "loss": 0.1748, "step": 30170 }, { "epoch": 0.6725970120421055, "grad_norm": 0.7901503443717957, "learning_rate": 4.839362121635405e-06, "loss": 0.2478, "step": 30175 }, { "epoch": 0.6727084614227257, "grad_norm": 0.6436774134635925, "learning_rate": 4.836363383488643e-06, "loss": 0.3461, "step": 30180 }, { "epoch": 0.6728199108033457, "grad_norm": 0.30818986892700195, "learning_rate": 4.833365278363872e-06, "loss": 0.2585, "step": 30185 }, { "epoch": 0.6729313601839658, "grad_norm": 0.6664495468139648, "learning_rate": 4.830367806628637e-06, "loss": 0.2956, "step": 30190 }, { "epoch": 0.6730428095645858, "grad_norm": 0.5725700855255127, "learning_rate": 4.827370968650403e-06, "loss": 0.1498, "step": 30195 }, { "epoch": 0.6731542589452059, "grad_norm": 0.552641749382019, "learning_rate": 4.824374764796565e-06, "loss": 0.2645, "step": 30200 }, { "epoch": 0.673265708325826, "grad_norm": 0.7079885601997375, "learning_rate": 4.8213791954344315e-06, "loss": 0.3482, "step": 30205 }, { "epoch": 0.673377157706446, "grad_norm": 0.8411067128181458, "learning_rate": 4.818384260931233e-06, "loss": 0.3391, "step": 30210 }, { "epoch": 0.6734886070870661, "grad_norm": 0.696851909160614, "learning_rate": 4.81538996165413e-06, "loss": 0.4519, "step": 30215 }, { "epoch": 0.6736000564676862, "grad_norm": 0.6157433986663818, "learning_rate": 4.8123962979702e-06, "loss": 0.2456, "step": 30220 }, { "epoch": 0.6737115058483063, "grad_norm": 1.0309218168258667, "learning_rate": 4.809403270246441e-06, "loss": 0.2318, "step": 30225 }, { "epoch": 0.6738229552289263, "grad_norm": 0.4081147015094757, "learning_rate": 4.806410878849776e-06, "loss": 0.3775, "step": 30230 }, { "epoch": 0.6739344046095463, "grad_norm": 0.8992989659309387, "learning_rate": 4.803419124147045e-06, "loss": 0.2688, "step": 30235 }, { "epoch": 0.6740458539901665, "grad_norm": 0.4808000326156616, "learning_rate": 4.800428006505018e-06, "loss": 0.2405, "step": 30240 }, { "epoch": 0.6741573033707865, "grad_norm": 0.3560895323753357, "learning_rate": 4.797437526290386e-06, "loss": 0.3898, "step": 30245 }, { "epoch": 0.6742687527514066, "grad_norm": 0.4232325851917267, "learning_rate": 4.794447683869756e-06, "loss": 0.337, "step": 30250 }, { "epoch": 0.6743802021320267, "grad_norm": 0.6125756502151489, "learning_rate": 4.791458479609661e-06, "loss": 0.3283, "step": 30255 }, { "epoch": 0.6744916515126467, "grad_norm": 0.6041368246078491, "learning_rate": 4.78846991387655e-06, "loss": 0.314, "step": 30260 }, { "epoch": 0.6746031008932668, "grad_norm": 1.6415461301803589, "learning_rate": 4.785481987036799e-06, "loss": 0.2928, "step": 30265 }, { "epoch": 0.6747145502738868, "grad_norm": 0.7918848395347595, "learning_rate": 4.782494699456706e-06, "loss": 0.2784, "step": 30270 }, { "epoch": 0.674825999654507, "grad_norm": 0.638884961605072, "learning_rate": 4.779508051502492e-06, "loss": 0.1836, "step": 30275 }, { "epoch": 0.674937449035127, "grad_norm": 0.6684740781784058, "learning_rate": 4.776522043540297e-06, "loss": 0.3748, "step": 30280 }, { "epoch": 0.675048898415747, "grad_norm": 0.5826922655105591, "learning_rate": 4.773536675936179e-06, "loss": 0.2958, "step": 30285 }, { "epoch": 0.6751603477963671, "grad_norm": 0.4849734902381897, "learning_rate": 4.770551949056123e-06, "loss": 0.1645, "step": 30290 }, { "epoch": 0.6752717971769872, "grad_norm": 0.6118282079696655, "learning_rate": 4.767567863266029e-06, "loss": 0.2709, "step": 30295 }, { "epoch": 0.6753832465576073, "grad_norm": 0.5682799816131592, "learning_rate": 4.764584418931731e-06, "loss": 0.3137, "step": 30300 }, { "epoch": 0.6754946959382273, "grad_norm": 0.7277367115020752, "learning_rate": 4.761601616418968e-06, "loss": 0.2859, "step": 30305 }, { "epoch": 0.6756061453188474, "grad_norm": 0.7065811157226562, "learning_rate": 4.758619456093417e-06, "loss": 0.3519, "step": 30310 }, { "epoch": 0.6757175946994675, "grad_norm": 0.39767614006996155, "learning_rate": 4.755637938320662e-06, "loss": 0.2527, "step": 30315 }, { "epoch": 0.6758290440800875, "grad_norm": 0.6266661286354065, "learning_rate": 4.752657063466217e-06, "loss": 0.27, "step": 30320 }, { "epoch": 0.6759404934607076, "grad_norm": 0.6225513219833374, "learning_rate": 4.749676831895512e-06, "loss": 0.2394, "step": 30325 }, { "epoch": 0.6760519428413276, "grad_norm": 0.5919443964958191, "learning_rate": 4.7466972439738965e-06, "loss": 0.3048, "step": 30330 }, { "epoch": 0.6761633922219478, "grad_norm": 0.9167764186859131, "learning_rate": 4.7437183000666555e-06, "loss": 0.2479, "step": 30335 }, { "epoch": 0.6762748416025678, "grad_norm": 0.7809853553771973, "learning_rate": 4.740740000538973e-06, "loss": 0.2459, "step": 30340 }, { "epoch": 0.6763862909831878, "grad_norm": 0.8190988302230835, "learning_rate": 4.737762345755975e-06, "loss": 0.309, "step": 30345 }, { "epoch": 0.6764977403638079, "grad_norm": 0.9799290895462036, "learning_rate": 4.734785336082697e-06, "loss": 0.261, "step": 30350 }, { "epoch": 0.676609189744428, "grad_norm": 0.6642264723777771, "learning_rate": 4.731808971884095e-06, "loss": 0.2865, "step": 30355 }, { "epoch": 0.6767206391250481, "grad_norm": 0.6557360291481018, "learning_rate": 4.728833253525043e-06, "loss": 0.3017, "step": 30360 }, { "epoch": 0.6768320885056681, "grad_norm": 0.6903407573699951, "learning_rate": 4.725858181370352e-06, "loss": 0.2997, "step": 30365 }, { "epoch": 0.6769435378862882, "grad_norm": 0.7355604767799377, "learning_rate": 4.7228837557847385e-06, "loss": 0.3865, "step": 30370 }, { "epoch": 0.6770549872669083, "grad_norm": 0.43351057171821594, "learning_rate": 4.71990997713284e-06, "loss": 0.278, "step": 30375 }, { "epoch": 0.6771664366475283, "grad_norm": 0.41541969776153564, "learning_rate": 4.716936845779224e-06, "loss": 0.3562, "step": 30380 }, { "epoch": 0.6772778860281484, "grad_norm": 0.5344095826148987, "learning_rate": 4.713964362088374e-06, "loss": 0.1759, "step": 30385 }, { "epoch": 0.6773893354087684, "grad_norm": 0.7391599416732788, "learning_rate": 4.710992526424686e-06, "loss": 0.2791, "step": 30390 }, { "epoch": 0.6775007847893886, "grad_norm": 0.7321291565895081, "learning_rate": 4.708021339152493e-06, "loss": 0.3204, "step": 30395 }, { "epoch": 0.6776122341700086, "grad_norm": 0.5888240933418274, "learning_rate": 4.7050508006360365e-06, "loss": 0.3577, "step": 30400 }, { "epoch": 0.6777236835506286, "grad_norm": 0.5763120055198669, "learning_rate": 4.702080911239482e-06, "loss": 0.2414, "step": 30405 }, { "epoch": 0.6778351329312488, "grad_norm": 0.6024255156517029, "learning_rate": 4.699111671326909e-06, "loss": 0.3128, "step": 30410 }, { "epoch": 0.6779465823118688, "grad_norm": 0.7076147794723511, "learning_rate": 4.6961430812623334e-06, "loss": 0.2945, "step": 30415 }, { "epoch": 0.6780580316924889, "grad_norm": 0.6222317218780518, "learning_rate": 4.693175141409675e-06, "loss": 0.3651, "step": 30420 }, { "epoch": 0.6781694810731089, "grad_norm": 0.5933769941329956, "learning_rate": 4.69020785213278e-06, "loss": 0.2369, "step": 30425 }, { "epoch": 0.6782809304537291, "grad_norm": 0.669907808303833, "learning_rate": 4.687241213795419e-06, "loss": 0.3003, "step": 30430 }, { "epoch": 0.6783923798343491, "grad_norm": 0.7214773297309875, "learning_rate": 4.6842752267612775e-06, "loss": 0.3474, "step": 30435 }, { "epoch": 0.6785038292149691, "grad_norm": 0.6140491962432861, "learning_rate": 4.6813098913939635e-06, "loss": 0.313, "step": 30440 }, { "epoch": 0.6786152785955892, "grad_norm": 0.40183714032173157, "learning_rate": 4.678345208056998e-06, "loss": 0.2156, "step": 30445 }, { "epoch": 0.6787267279762093, "grad_norm": 0.765648365020752, "learning_rate": 4.675381177113837e-06, "loss": 0.2835, "step": 30450 }, { "epoch": 0.6788381773568294, "grad_norm": 0.4940396845340729, "learning_rate": 4.67241779892784e-06, "loss": 0.2926, "step": 30455 }, { "epoch": 0.6789496267374494, "grad_norm": 0.6889997124671936, "learning_rate": 4.669455073862302e-06, "loss": 0.3213, "step": 30460 }, { "epoch": 0.6790610761180694, "grad_norm": 0.5608032941818237, "learning_rate": 4.666493002280426e-06, "loss": 0.2041, "step": 30465 }, { "epoch": 0.6791725254986896, "grad_norm": 0.3555319011211395, "learning_rate": 4.66353158454534e-06, "loss": 0.3921, "step": 30470 }, { "epoch": 0.6792839748793096, "grad_norm": 0.7849695682525635, "learning_rate": 4.660570821020091e-06, "loss": 0.1967, "step": 30475 }, { "epoch": 0.6793954242599297, "grad_norm": 0.5523033142089844, "learning_rate": 4.6576107120676415e-06, "loss": 0.341, "step": 30480 }, { "epoch": 0.6795068736405497, "grad_norm": 0.7489277124404907, "learning_rate": 4.654651258050881e-06, "loss": 0.3402, "step": 30485 }, { "epoch": 0.6796183230211698, "grad_norm": 0.5791055560112, "learning_rate": 4.651692459332621e-06, "loss": 0.3398, "step": 30490 }, { "epoch": 0.6797297724017899, "grad_norm": 0.7251898646354675, "learning_rate": 4.648734316275583e-06, "loss": 0.2289, "step": 30495 }, { "epoch": 0.6798412217824099, "grad_norm": 0.43119266629219055, "learning_rate": 4.645776829242411e-06, "loss": 0.323, "step": 30500 }, { "epoch": 0.67995267116303, "grad_norm": 0.3917275369167328, "learning_rate": 4.642819998595672e-06, "loss": 0.2227, "step": 30505 }, { "epoch": 0.6800641205436501, "grad_norm": 0.7229493856430054, "learning_rate": 4.639863824697848e-06, "loss": 0.2777, "step": 30510 }, { "epoch": 0.6801755699242702, "grad_norm": 1.0337713956832886, "learning_rate": 4.6369083079113475e-06, "loss": 0.2911, "step": 30515 }, { "epoch": 0.6802870193048902, "grad_norm": 0.5802996754646301, "learning_rate": 4.633953448598489e-06, "loss": 0.2701, "step": 30520 }, { "epoch": 0.6803984686855102, "grad_norm": 0.43975260853767395, "learning_rate": 4.63099924712152e-06, "loss": 0.2523, "step": 30525 }, { "epoch": 0.6805099180661304, "grad_norm": 0.5821147561073303, "learning_rate": 4.628045703842602e-06, "loss": 0.2978, "step": 30530 }, { "epoch": 0.6806213674467504, "grad_norm": 0.9624720215797424, "learning_rate": 4.625092819123815e-06, "loss": 0.3141, "step": 30535 }, { "epoch": 0.6807328168273705, "grad_norm": 0.5244483351707458, "learning_rate": 4.622140593327163e-06, "loss": 0.2627, "step": 30540 }, { "epoch": 0.6808442662079905, "grad_norm": 0.5924509763717651, "learning_rate": 4.619189026814556e-06, "loss": 0.2249, "step": 30545 }, { "epoch": 0.6809557155886106, "grad_norm": 0.5367047786712646, "learning_rate": 4.616238119947843e-06, "loss": 0.2293, "step": 30550 }, { "epoch": 0.6810671649692307, "grad_norm": 0.5647961497306824, "learning_rate": 4.613287873088784e-06, "loss": 0.3609, "step": 30555 }, { "epoch": 0.6811786143498507, "grad_norm": 0.7824503183364868, "learning_rate": 4.610338286599053e-06, "loss": 0.2663, "step": 30560 }, { "epoch": 0.6812900637304709, "grad_norm": 0.9510183930397034, "learning_rate": 4.607389360840245e-06, "loss": 0.3119, "step": 30565 }, { "epoch": 0.6814015131110909, "grad_norm": 0.4036903381347656, "learning_rate": 4.604441096173878e-06, "loss": 0.279, "step": 30570 }, { "epoch": 0.681512962491711, "grad_norm": 0.6872173547744751, "learning_rate": 4.601493492961381e-06, "loss": 0.3826, "step": 30575 }, { "epoch": 0.681624411872331, "grad_norm": 0.5271679162979126, "learning_rate": 4.5985465515641156e-06, "loss": 0.2456, "step": 30580 }, { "epoch": 0.681735861252951, "grad_norm": 0.7497157454490662, "learning_rate": 4.595600272343347e-06, "loss": 0.1737, "step": 30585 }, { "epoch": 0.6818473106335712, "grad_norm": 0.23075701296329498, "learning_rate": 4.592654655660273e-06, "loss": 0.2424, "step": 30590 }, { "epoch": 0.6819587600141912, "grad_norm": 0.6322120428085327, "learning_rate": 4.589709701875999e-06, "loss": 0.2902, "step": 30595 }, { "epoch": 0.6820702093948113, "grad_norm": 0.7525787353515625, "learning_rate": 4.586765411351555e-06, "loss": 0.3234, "step": 30600 }, { "epoch": 0.6821816587754314, "grad_norm": 0.7649107575416565, "learning_rate": 4.583821784447884e-06, "loss": 0.288, "step": 30605 }, { "epoch": 0.6822931081560514, "grad_norm": 0.8376215696334839, "learning_rate": 4.580878821525859e-06, "loss": 0.3664, "step": 30610 }, { "epoch": 0.6824045575366715, "grad_norm": 0.5996965169906616, "learning_rate": 4.577936522946261e-06, "loss": 0.3438, "step": 30615 }, { "epoch": 0.6825160069172915, "grad_norm": 0.5214008092880249, "learning_rate": 4.574994889069791e-06, "loss": 0.2935, "step": 30620 }, { "epoch": 0.6826274562979117, "grad_norm": 0.5297601222991943, "learning_rate": 4.5720539202570766e-06, "loss": 0.2882, "step": 30625 }, { "epoch": 0.6827389056785317, "grad_norm": 0.6422613263130188, "learning_rate": 4.569113616868654e-06, "loss": 0.2625, "step": 30630 }, { "epoch": 0.6828503550591517, "grad_norm": 0.4508661925792694, "learning_rate": 4.566173979264982e-06, "loss": 0.2579, "step": 30635 }, { "epoch": 0.6829618044397718, "grad_norm": 0.3404235541820526, "learning_rate": 4.563235007806432e-06, "loss": 0.2441, "step": 30640 }, { "epoch": 0.6830732538203919, "grad_norm": 0.8853699564933777, "learning_rate": 4.560296702853311e-06, "loss": 0.3594, "step": 30645 }, { "epoch": 0.683184703201012, "grad_norm": 1.0352290868759155, "learning_rate": 4.557359064765825e-06, "loss": 0.4762, "step": 30650 }, { "epoch": 0.683296152581632, "grad_norm": 0.7401602268218994, "learning_rate": 4.554422093904103e-06, "loss": 0.3626, "step": 30655 }, { "epoch": 0.6834076019622521, "grad_norm": 0.9809353947639465, "learning_rate": 4.551485790628203e-06, "loss": 0.264, "step": 30660 }, { "epoch": 0.6835190513428722, "grad_norm": 0.6002543568611145, "learning_rate": 4.548550155298089e-06, "loss": 0.2303, "step": 30665 }, { "epoch": 0.6836305007234922, "grad_norm": 0.6429628729820251, "learning_rate": 4.545615188273643e-06, "loss": 0.3119, "step": 30670 }, { "epoch": 0.6837419501041123, "grad_norm": 0.7270705699920654, "learning_rate": 4.542680889914678e-06, "loss": 0.2012, "step": 30675 }, { "epoch": 0.6838533994847323, "grad_norm": 0.4127255976200104, "learning_rate": 4.539747260580911e-06, "loss": 0.3944, "step": 30680 }, { "epoch": 0.6839648488653525, "grad_norm": 0.5112218260765076, "learning_rate": 4.536814300631984e-06, "loss": 0.2155, "step": 30685 }, { "epoch": 0.6840762982459725, "grad_norm": 0.620757520198822, "learning_rate": 4.533882010427451e-06, "loss": 0.3096, "step": 30690 }, { "epoch": 0.6841877476265925, "grad_norm": 0.7408103346824646, "learning_rate": 4.530950390326794e-06, "loss": 0.3529, "step": 30695 }, { "epoch": 0.6842991970072126, "grad_norm": 0.6948126554489136, "learning_rate": 4.528019440689401e-06, "loss": 0.3586, "step": 30700 }, { "epoch": 0.6844106463878327, "grad_norm": 0.5368820428848267, "learning_rate": 4.525089161874592e-06, "loss": 0.2216, "step": 30705 }, { "epoch": 0.6845220957684528, "grad_norm": 0.4925262928009033, "learning_rate": 4.522159554241591e-06, "loss": 0.2569, "step": 30710 }, { "epoch": 0.6846335451490728, "grad_norm": 0.5296086668968201, "learning_rate": 4.519230618149547e-06, "loss": 0.2763, "step": 30715 }, { "epoch": 0.684744994529693, "grad_norm": 0.7307214140892029, "learning_rate": 4.516302353957523e-06, "loss": 0.3427, "step": 30720 }, { "epoch": 0.684856443910313, "grad_norm": 0.7109100222587585, "learning_rate": 4.513374762024501e-06, "loss": 0.2628, "step": 30725 }, { "epoch": 0.684967893290933, "grad_norm": 0.7910977005958557, "learning_rate": 4.510447842709386e-06, "loss": 0.3581, "step": 30730 }, { "epoch": 0.6850793426715531, "grad_norm": 0.6768742799758911, "learning_rate": 4.507521596370987e-06, "loss": 0.2831, "step": 30735 }, { "epoch": 0.6851907920521731, "grad_norm": 0.827703595161438, "learning_rate": 4.504596023368051e-06, "loss": 0.337, "step": 30740 }, { "epoch": 0.6853022414327933, "grad_norm": 0.6112938523292542, "learning_rate": 4.501671124059224e-06, "loss": 0.2836, "step": 30745 }, { "epoch": 0.6854136908134133, "grad_norm": 0.43153080344200134, "learning_rate": 4.498746898803076e-06, "loss": 0.2882, "step": 30750 }, { "epoch": 0.6855251401940333, "grad_norm": 0.6515633463859558, "learning_rate": 4.4958233479580945e-06, "loss": 0.2517, "step": 30755 }, { "epoch": 0.6856365895746535, "grad_norm": 0.4262503385543823, "learning_rate": 4.4929004718826815e-06, "loss": 0.2563, "step": 30760 }, { "epoch": 0.6857480389552735, "grad_norm": 0.8875929713249207, "learning_rate": 4.489978270935164e-06, "loss": 0.2603, "step": 30765 }, { "epoch": 0.6858594883358936, "grad_norm": 0.3773414194583893, "learning_rate": 4.487056745473781e-06, "loss": 0.4201, "step": 30770 }, { "epoch": 0.6859709377165136, "grad_norm": 0.5433903336524963, "learning_rate": 4.4841358958566885e-06, "loss": 0.1744, "step": 30775 }, { "epoch": 0.6860823870971338, "grad_norm": 0.6163174510002136, "learning_rate": 4.481215722441959e-06, "loss": 0.3012, "step": 30780 }, { "epoch": 0.6861938364777538, "grad_norm": 0.5906506776809692, "learning_rate": 4.4782962255875835e-06, "loss": 0.3484, "step": 30785 }, { "epoch": 0.6863052858583738, "grad_norm": 0.6859926581382751, "learning_rate": 4.475377405651468e-06, "loss": 0.242, "step": 30790 }, { "epoch": 0.6864167352389939, "grad_norm": 0.49034976959228516, "learning_rate": 4.472459262991441e-06, "loss": 0.3221, "step": 30795 }, { "epoch": 0.686528184619614, "grad_norm": 0.7555416226387024, "learning_rate": 4.469541797965238e-06, "loss": 0.2177, "step": 30800 }, { "epoch": 0.6866396340002341, "grad_norm": 0.5270309448242188, "learning_rate": 4.466625010930526e-06, "loss": 0.2009, "step": 30805 }, { "epoch": 0.6867510833808541, "grad_norm": 0.4625611901283264, "learning_rate": 4.463708902244878e-06, "loss": 0.3301, "step": 30810 }, { "epoch": 0.6868625327614741, "grad_norm": 0.6135231256484985, "learning_rate": 4.4607934722657834e-06, "loss": 0.308, "step": 30815 }, { "epoch": 0.6869739821420943, "grad_norm": 0.5562835335731506, "learning_rate": 4.457878721350653e-06, "loss": 0.381, "step": 30820 }, { "epoch": 0.6870854315227143, "grad_norm": 0.6260277628898621, "learning_rate": 4.45496464985681e-06, "loss": 0.2883, "step": 30825 }, { "epoch": 0.6871968809033344, "grad_norm": 0.4729725420475006, "learning_rate": 4.452051258141503e-06, "loss": 0.3035, "step": 30830 }, { "epoch": 0.6873083302839544, "grad_norm": 0.45729824900627136, "learning_rate": 4.4491385465618846e-06, "loss": 0.1677, "step": 30835 }, { "epoch": 0.6874197796645745, "grad_norm": 0.5871806144714355, "learning_rate": 4.4462265154750386e-06, "loss": 0.2998, "step": 30840 }, { "epoch": 0.6875312290451946, "grad_norm": 0.4161869287490845, "learning_rate": 4.443315165237951e-06, "loss": 0.1802, "step": 30845 }, { "epoch": 0.6876426784258146, "grad_norm": 0.6632789373397827, "learning_rate": 4.440404496207536e-06, "loss": 0.2441, "step": 30850 }, { "epoch": 0.6877541278064347, "grad_norm": 0.424626886844635, "learning_rate": 4.43749450874061e-06, "loss": 0.2589, "step": 30855 }, { "epoch": 0.6878655771870548, "grad_norm": 0.8885061740875244, "learning_rate": 4.434585203193927e-06, "loss": 0.3274, "step": 30860 }, { "epoch": 0.6879770265676749, "grad_norm": 0.6270710229873657, "learning_rate": 4.431676579924139e-06, "loss": 0.2532, "step": 30865 }, { "epoch": 0.6880884759482949, "grad_norm": 0.5457544326782227, "learning_rate": 4.4287686392878185e-06, "loss": 0.3691, "step": 30870 }, { "epoch": 0.6881999253289149, "grad_norm": 0.5432224273681641, "learning_rate": 4.425861381641462e-06, "loss": 0.1776, "step": 30875 }, { "epoch": 0.6883113747095351, "grad_norm": 0.5192307233810425, "learning_rate": 4.4229548073414745e-06, "loss": 0.3711, "step": 30880 }, { "epoch": 0.6884228240901551, "grad_norm": 0.6740368008613586, "learning_rate": 4.420048916744176e-06, "loss": 0.4051, "step": 30885 }, { "epoch": 0.6885342734707752, "grad_norm": 0.7884381413459778, "learning_rate": 4.417143710205814e-06, "loss": 0.2601, "step": 30890 }, { "epoch": 0.6886457228513952, "grad_norm": 0.5081226229667664, "learning_rate": 4.4142391880825386e-06, "loss": 0.2706, "step": 30895 }, { "epoch": 0.6887571722320153, "grad_norm": 0.5837457776069641, "learning_rate": 4.411335350730425e-06, "loss": 0.3546, "step": 30900 }, { "epoch": 0.6888686216126354, "grad_norm": 0.828184962272644, "learning_rate": 4.408432198505454e-06, "loss": 0.1952, "step": 30905 }, { "epoch": 0.6889800709932554, "grad_norm": 0.5400125980377197, "learning_rate": 4.40552973176354e-06, "loss": 0.2742, "step": 30910 }, { "epoch": 0.6890915203738756, "grad_norm": 0.22060178220272064, "learning_rate": 4.402627950860494e-06, "loss": 0.2226, "step": 30915 }, { "epoch": 0.6892029697544956, "grad_norm": 0.5295904278755188, "learning_rate": 4.3997268561520615e-06, "loss": 0.2693, "step": 30920 }, { "epoch": 0.6893144191351157, "grad_norm": 0.8292050957679749, "learning_rate": 4.396826447993887e-06, "loss": 0.3038, "step": 30925 }, { "epoch": 0.6894258685157357, "grad_norm": 0.5134817361831665, "learning_rate": 4.393926726741541e-06, "loss": 0.2658, "step": 30930 }, { "epoch": 0.6895373178963558, "grad_norm": 0.7239783406257629, "learning_rate": 4.391027692750506e-06, "loss": 0.3571, "step": 30935 }, { "epoch": 0.6896487672769759, "grad_norm": 0.4626108705997467, "learning_rate": 4.388129346376177e-06, "loss": 0.3237, "step": 30940 }, { "epoch": 0.6897602166575959, "grad_norm": 0.6538516283035278, "learning_rate": 4.385231687973878e-06, "loss": 0.1788, "step": 30945 }, { "epoch": 0.689871666038216, "grad_norm": 0.7186077833175659, "learning_rate": 4.38233471789883e-06, "loss": 0.2678, "step": 30950 }, { "epoch": 0.6899831154188361, "grad_norm": 0.5353816151618958, "learning_rate": 4.379438436506187e-06, "loss": 0.2909, "step": 30955 }, { "epoch": 0.6900945647994561, "grad_norm": 0.8315975666046143, "learning_rate": 4.376542844151009e-06, "loss": 0.2689, "step": 30960 }, { "epoch": 0.6902060141800762, "grad_norm": 0.7231079339981079, "learning_rate": 4.373647941188272e-06, "loss": 0.2647, "step": 30965 }, { "epoch": 0.6903174635606962, "grad_norm": 0.6835340261459351, "learning_rate": 4.3707537279728674e-06, "loss": 0.2836, "step": 30970 }, { "epoch": 0.6904289129413164, "grad_norm": 0.7447507381439209, "learning_rate": 4.367860204859601e-06, "loss": 0.2892, "step": 30975 }, { "epoch": 0.6905403623219364, "grad_norm": 0.6612392067909241, "learning_rate": 4.3649673722032e-06, "loss": 0.341, "step": 30980 }, { "epoch": 0.6906518117025565, "grad_norm": 0.5242640972137451, "learning_rate": 4.362075230358308e-06, "loss": 0.3152, "step": 30985 }, { "epoch": 0.6907632610831765, "grad_norm": 0.8434734344482422, "learning_rate": 4.359183779679475e-06, "loss": 0.4032, "step": 30990 }, { "epoch": 0.6908747104637966, "grad_norm": 0.6736511588096619, "learning_rate": 4.35629302052117e-06, "loss": 0.204, "step": 30995 }, { "epoch": 0.6909861598444167, "grad_norm": 0.5592665672302246, "learning_rate": 4.353402953237776e-06, "loss": 0.3241, "step": 31000 }, { "epoch": 0.6910976092250367, "grad_norm": 0.49798890948295593, "learning_rate": 4.350513578183593e-06, "loss": 0.2848, "step": 31005 }, { "epoch": 0.6912090586056568, "grad_norm": 0.5979819297790527, "learning_rate": 4.347624895712837e-06, "loss": 0.3554, "step": 31010 }, { "epoch": 0.6913205079862769, "grad_norm": 0.6463135480880737, "learning_rate": 4.344736906179644e-06, "loss": 0.2998, "step": 31015 }, { "epoch": 0.6914319573668969, "grad_norm": 0.8632110953330994, "learning_rate": 4.341849609938054e-06, "loss": 0.2684, "step": 31020 }, { "epoch": 0.691543406747517, "grad_norm": 0.6110043525695801, "learning_rate": 4.338963007342027e-06, "loss": 0.2602, "step": 31025 }, { "epoch": 0.691654856128137, "grad_norm": 0.9202972054481506, "learning_rate": 4.336077098745439e-06, "loss": 0.2615, "step": 31030 }, { "epoch": 0.6917663055087572, "grad_norm": 0.619824230670929, "learning_rate": 4.3331918845020805e-06, "loss": 0.2498, "step": 31035 }, { "epoch": 0.6918777548893772, "grad_norm": 0.5901528596878052, "learning_rate": 4.330307364965652e-06, "loss": 0.2758, "step": 31040 }, { "epoch": 0.6919892042699972, "grad_norm": 0.5944219827651978, "learning_rate": 4.327423540489777e-06, "loss": 0.4121, "step": 31045 }, { "epoch": 0.6921006536506173, "grad_norm": 0.49613940715789795, "learning_rate": 4.324540411427994e-06, "loss": 0.3705, "step": 31050 }, { "epoch": 0.6922121030312374, "grad_norm": 0.6901610493659973, "learning_rate": 4.3216579781337485e-06, "loss": 0.2918, "step": 31055 }, { "epoch": 0.6923235524118575, "grad_norm": 0.7814179062843323, "learning_rate": 4.318776240960406e-06, "loss": 0.2877, "step": 31060 }, { "epoch": 0.6924350017924775, "grad_norm": 0.6709560751914978, "learning_rate": 4.315895200261243e-06, "loss": 0.3923, "step": 31065 }, { "epoch": 0.6925464511730977, "grad_norm": 0.86536705493927, "learning_rate": 4.31301485638945e-06, "loss": 0.3328, "step": 31070 }, { "epoch": 0.6926579005537177, "grad_norm": 0.9728611707687378, "learning_rate": 4.310135209698143e-06, "loss": 0.3971, "step": 31075 }, { "epoch": 0.6927693499343377, "grad_norm": 0.39398086071014404, "learning_rate": 4.307256260540337e-06, "loss": 0.279, "step": 31080 }, { "epoch": 0.6928807993149578, "grad_norm": 0.4623780846595764, "learning_rate": 4.304378009268976e-06, "loss": 0.3175, "step": 31085 }, { "epoch": 0.6929922486955779, "grad_norm": 0.4939644932746887, "learning_rate": 4.301500456236907e-06, "loss": 0.2116, "step": 31090 }, { "epoch": 0.693103698076198, "grad_norm": 0.5080740451812744, "learning_rate": 4.2986236017968956e-06, "loss": 0.3565, "step": 31095 }, { "epoch": 0.693215147456818, "grad_norm": 0.7656640410423279, "learning_rate": 4.2957474463016206e-06, "loss": 0.2778, "step": 31100 }, { "epoch": 0.693326596837438, "grad_norm": 0.7635650038719177, "learning_rate": 4.2928719901036805e-06, "loss": 0.3594, "step": 31105 }, { "epoch": 0.6934380462180582, "grad_norm": 0.6077600717544556, "learning_rate": 4.289997233555584e-06, "loss": 0.3699, "step": 31110 }, { "epoch": 0.6935494955986782, "grad_norm": 0.7991203665733337, "learning_rate": 4.287123177009747e-06, "loss": 0.2534, "step": 31115 }, { "epoch": 0.6936609449792983, "grad_norm": 0.9318343997001648, "learning_rate": 4.284249820818517e-06, "loss": 0.4215, "step": 31120 }, { "epoch": 0.6937723943599183, "grad_norm": 0.4393812119960785, "learning_rate": 4.2813771653341395e-06, "loss": 0.2498, "step": 31125 }, { "epoch": 0.6938838437405385, "grad_norm": 0.6677595973014832, "learning_rate": 4.27850521090878e-06, "loss": 0.3103, "step": 31130 }, { "epoch": 0.6939952931211585, "grad_norm": 1.0747525691986084, "learning_rate": 4.275633957894516e-06, "loss": 0.4051, "step": 31135 }, { "epoch": 0.6941067425017785, "grad_norm": 0.584563672542572, "learning_rate": 4.2727634066433465e-06, "loss": 0.2511, "step": 31140 }, { "epoch": 0.6942181918823986, "grad_norm": 0.516092836856842, "learning_rate": 4.269893557507175e-06, "loss": 0.3257, "step": 31145 }, { "epoch": 0.6943296412630187, "grad_norm": 0.6570064425468445, "learning_rate": 4.267024410837821e-06, "loss": 0.344, "step": 31150 }, { "epoch": 0.6944410906436388, "grad_norm": 0.5576528906822205, "learning_rate": 4.264155966987026e-06, "loss": 0.3629, "step": 31155 }, { "epoch": 0.6945525400242588, "grad_norm": 0.4276863932609558, "learning_rate": 4.261288226306436e-06, "loss": 0.143, "step": 31160 }, { "epoch": 0.6946639894048788, "grad_norm": 0.6324317455291748, "learning_rate": 4.258421189147609e-06, "loss": 0.2573, "step": 31165 }, { "epoch": 0.694775438785499, "grad_norm": 0.4620887041091919, "learning_rate": 4.2555548558620294e-06, "loss": 0.1637, "step": 31170 }, { "epoch": 0.694886888166119, "grad_norm": 0.39362654089927673, "learning_rate": 4.2526892268010844e-06, "loss": 0.1964, "step": 31175 }, { "epoch": 0.6949983375467391, "grad_norm": 0.6800695657730103, "learning_rate": 4.249824302316079e-06, "loss": 0.3853, "step": 31180 }, { "epoch": 0.6951097869273591, "grad_norm": 0.598951518535614, "learning_rate": 4.246960082758225e-06, "loss": 0.2922, "step": 31185 }, { "epoch": 0.6952212363079793, "grad_norm": 0.5495995879173279, "learning_rate": 4.244096568478662e-06, "loss": 0.302, "step": 31190 }, { "epoch": 0.6953326856885993, "grad_norm": 0.36194702982902527, "learning_rate": 4.241233759828426e-06, "loss": 0.2135, "step": 31195 }, { "epoch": 0.6954441350692193, "grad_norm": 0.8240219354629517, "learning_rate": 4.238371657158486e-06, "loss": 0.3262, "step": 31200 }, { "epoch": 0.6955555844498394, "grad_norm": 0.6485922336578369, "learning_rate": 4.235510260819707e-06, "loss": 0.2685, "step": 31205 }, { "epoch": 0.6956670338304595, "grad_norm": 0.4704461097717285, "learning_rate": 4.232649571162874e-06, "loss": 0.2412, "step": 31210 }, { "epoch": 0.6957784832110796, "grad_norm": 0.627560555934906, "learning_rate": 4.229789588538687e-06, "loss": 0.2287, "step": 31215 }, { "epoch": 0.6958899325916996, "grad_norm": 0.5985351800918579, "learning_rate": 4.226930313297754e-06, "loss": 0.2742, "step": 31220 }, { "epoch": 0.6960013819723196, "grad_norm": 0.5910149812698364, "learning_rate": 4.224071745790603e-06, "loss": 0.3414, "step": 31225 }, { "epoch": 0.6961128313529398, "grad_norm": 0.9427435398101807, "learning_rate": 4.221213886367677e-06, "loss": 0.3136, "step": 31230 }, { "epoch": 0.6962242807335598, "grad_norm": 0.6015107035636902, "learning_rate": 4.218356735379322e-06, "loss": 0.2943, "step": 31235 }, { "epoch": 0.6963357301141799, "grad_norm": 0.8204703330993652, "learning_rate": 4.215500293175805e-06, "loss": 0.2389, "step": 31240 }, { "epoch": 0.6964471794948, "grad_norm": 0.7436009645462036, "learning_rate": 4.212644560107302e-06, "loss": 0.3549, "step": 31245 }, { "epoch": 0.69655862887542, "grad_norm": 0.6757791638374329, "learning_rate": 4.209789536523905e-06, "loss": 0.3306, "step": 31250 }, { "epoch": 0.6966700782560401, "grad_norm": 0.3792746365070343, "learning_rate": 4.206935222775612e-06, "loss": 0.1995, "step": 31255 }, { "epoch": 0.6967815276366601, "grad_norm": 0.5856655836105347, "learning_rate": 4.2040816192123465e-06, "loss": 0.2582, "step": 31260 }, { "epoch": 0.6968929770172803, "grad_norm": 0.4346431791782379, "learning_rate": 4.20122872618394e-06, "loss": 0.2341, "step": 31265 }, { "epoch": 0.6970044263979003, "grad_norm": 0.6606493592262268, "learning_rate": 4.198376544040132e-06, "loss": 0.3211, "step": 31270 }, { "epoch": 0.6971158757785204, "grad_norm": 0.6540193557739258, "learning_rate": 4.195525073130578e-06, "loss": 0.2868, "step": 31275 }, { "epoch": 0.6972273251591404, "grad_norm": 0.686784565448761, "learning_rate": 4.192674313804847e-06, "loss": 0.3434, "step": 31280 }, { "epoch": 0.6973387745397605, "grad_norm": 0.7588372230529785, "learning_rate": 4.189824266412416e-06, "loss": 0.3459, "step": 31285 }, { "epoch": 0.6974502239203806, "grad_norm": 0.7323045134544373, "learning_rate": 4.186974931302685e-06, "loss": 0.4094, "step": 31290 }, { "epoch": 0.6975616733010006, "grad_norm": 0.5145244598388672, "learning_rate": 4.184126308824954e-06, "loss": 0.3327, "step": 31295 }, { "epoch": 0.6976731226816207, "grad_norm": 0.4681999385356903, "learning_rate": 4.18127839932845e-06, "loss": 0.2122, "step": 31300 }, { "epoch": 0.6977845720622408, "grad_norm": 0.27826374769210815, "learning_rate": 4.178431203162301e-06, "loss": 0.2309, "step": 31305 }, { "epoch": 0.6978960214428608, "grad_norm": 0.5219650864601135, "learning_rate": 4.175584720675551e-06, "loss": 0.3067, "step": 31310 }, { "epoch": 0.6980074708234809, "grad_norm": 0.7450000047683716, "learning_rate": 4.172738952217151e-06, "loss": 0.29, "step": 31315 }, { "epoch": 0.6981189202041009, "grad_norm": 0.7271034121513367, "learning_rate": 4.169893898135981e-06, "loss": 0.3266, "step": 31320 }, { "epoch": 0.6982303695847211, "grad_norm": 0.583061158657074, "learning_rate": 4.167049558780818e-06, "loss": 0.274, "step": 31325 }, { "epoch": 0.6983418189653411, "grad_norm": 0.4795846939086914, "learning_rate": 4.164205934500351e-06, "loss": 0.1614, "step": 31330 }, { "epoch": 0.6984532683459612, "grad_norm": 0.6440160274505615, "learning_rate": 4.161363025643196e-06, "loss": 0.2446, "step": 31335 }, { "epoch": 0.6985647177265812, "grad_norm": 0.5630683302879333, "learning_rate": 4.158520832557866e-06, "loss": 0.2658, "step": 31340 }, { "epoch": 0.6986761671072013, "grad_norm": 0.5534509420394897, "learning_rate": 4.155679355592792e-06, "loss": 0.2804, "step": 31345 }, { "epoch": 0.6987876164878214, "grad_norm": 0.6446009874343872, "learning_rate": 4.152838595096316e-06, "loss": 0.2733, "step": 31350 }, { "epoch": 0.6988990658684414, "grad_norm": 0.6161692142486572, "learning_rate": 4.149998551416697e-06, "loss": 0.2515, "step": 31355 }, { "epoch": 0.6990105152490615, "grad_norm": 0.6872076988220215, "learning_rate": 4.147159224902101e-06, "loss": 0.3784, "step": 31360 }, { "epoch": 0.6991219646296816, "grad_norm": 0.9411196112632751, "learning_rate": 4.144320615900603e-06, "loss": 0.2868, "step": 31365 }, { "epoch": 0.6992334140103016, "grad_norm": 0.514918327331543, "learning_rate": 4.1414827247602016e-06, "loss": 0.3274, "step": 31370 }, { "epoch": 0.6993448633909217, "grad_norm": 0.576237678527832, "learning_rate": 4.138645551828799e-06, "loss": 0.2588, "step": 31375 }, { "epoch": 0.6994563127715417, "grad_norm": 0.840854287147522, "learning_rate": 4.135809097454204e-06, "loss": 0.3152, "step": 31380 }, { "epoch": 0.6995677621521619, "grad_norm": 0.8221232891082764, "learning_rate": 4.1329733619841535e-06, "loss": 0.3219, "step": 31385 }, { "epoch": 0.6996792115327819, "grad_norm": 0.6220507621765137, "learning_rate": 4.130138345766283e-06, "loss": 0.3625, "step": 31390 }, { "epoch": 0.6997906609134019, "grad_norm": 0.6762942671775818, "learning_rate": 4.127304049148142e-06, "loss": 0.2756, "step": 31395 }, { "epoch": 0.699902110294022, "grad_norm": 0.5760985016822815, "learning_rate": 4.12447047247719e-06, "loss": 0.2999, "step": 31400 }, { "epoch": 0.7000135596746421, "grad_norm": 0.927649199962616, "learning_rate": 4.121637616100811e-06, "loss": 0.3766, "step": 31405 }, { "epoch": 0.7001250090552622, "grad_norm": 1.2276265621185303, "learning_rate": 4.1188054803662814e-06, "loss": 0.3655, "step": 31410 }, { "epoch": 0.7002364584358822, "grad_norm": 0.5870280265808105, "learning_rate": 4.115974065620809e-06, "loss": 0.2976, "step": 31415 }, { "epoch": 0.7003479078165024, "grad_norm": 0.5980759263038635, "learning_rate": 4.113143372211498e-06, "loss": 0.2414, "step": 31420 }, { "epoch": 0.7004593571971224, "grad_norm": 0.766173779964447, "learning_rate": 4.110313400485369e-06, "loss": 0.2918, "step": 31425 }, { "epoch": 0.7005708065777424, "grad_norm": 0.5566070079803467, "learning_rate": 4.107484150789356e-06, "loss": 0.318, "step": 31430 }, { "epoch": 0.7006822559583625, "grad_norm": 0.6765812039375305, "learning_rate": 4.1046556234703e-06, "loss": 0.2702, "step": 31435 }, { "epoch": 0.7007937053389826, "grad_norm": 0.8657772541046143, "learning_rate": 4.101827818874962e-06, "loss": 0.3333, "step": 31440 }, { "epoch": 0.7009051547196027, "grad_norm": 0.6400044560432434, "learning_rate": 4.099000737350004e-06, "loss": 0.3406, "step": 31445 }, { "epoch": 0.7010166041002227, "grad_norm": 0.5417999029159546, "learning_rate": 4.09617437924201e-06, "loss": 0.2273, "step": 31450 }, { "epoch": 0.7011280534808427, "grad_norm": 0.7063996195793152, "learning_rate": 4.093348744897467e-06, "loss": 0.2746, "step": 31455 }, { "epoch": 0.7012395028614629, "grad_norm": 0.5358220934867859, "learning_rate": 4.090523834662775e-06, "loss": 0.2756, "step": 31460 }, { "epoch": 0.7013509522420829, "grad_norm": 0.37458446621894836, "learning_rate": 4.087699648884248e-06, "loss": 0.2767, "step": 31465 }, { "epoch": 0.701462401622703, "grad_norm": 0.7166943550109863, "learning_rate": 4.084876187908104e-06, "loss": 0.3122, "step": 31470 }, { "epoch": 0.701573851003323, "grad_norm": 0.6304473280906677, "learning_rate": 4.08205345208048e-06, "loss": 0.3681, "step": 31475 }, { "epoch": 0.7016853003839432, "grad_norm": 0.5616061091423035, "learning_rate": 4.079231441747428e-06, "loss": 0.1783, "step": 31480 }, { "epoch": 0.7017967497645632, "grad_norm": 0.7790914177894592, "learning_rate": 4.0764101572549e-06, "loss": 0.2155, "step": 31485 }, { "epoch": 0.7019081991451832, "grad_norm": 0.6581530570983887, "learning_rate": 4.0735895989487625e-06, "loss": 0.3486, "step": 31490 }, { "epoch": 0.7020196485258033, "grad_norm": 0.7142459750175476, "learning_rate": 4.070769767174797e-06, "loss": 0.2639, "step": 31495 }, { "epoch": 0.7021310979064234, "grad_norm": 0.6214120388031006, "learning_rate": 4.067950662278687e-06, "loss": 0.3789, "step": 31500 }, { "epoch": 0.7022425472870435, "grad_norm": 0.4268057942390442, "learning_rate": 4.065132284606038e-06, "loss": 0.3095, "step": 31505 }, { "epoch": 0.7023539966676635, "grad_norm": 0.6592011451721191, "learning_rate": 4.062314634502364e-06, "loss": 0.2152, "step": 31510 }, { "epoch": 0.7024654460482835, "grad_norm": 0.4133501648902893, "learning_rate": 4.059497712313083e-06, "loss": 0.3253, "step": 31515 }, { "epoch": 0.7025768954289037, "grad_norm": 0.9275414943695068, "learning_rate": 4.0566815183835295e-06, "loss": 0.2372, "step": 31520 }, { "epoch": 0.7026883448095237, "grad_norm": 0.6263979077339172, "learning_rate": 4.0538660530589466e-06, "loss": 0.318, "step": 31525 }, { "epoch": 0.7027997941901438, "grad_norm": 0.6248124241828918, "learning_rate": 4.051051316684486e-06, "loss": 0.2939, "step": 31530 }, { "epoch": 0.7029112435707638, "grad_norm": 0.3651900291442871, "learning_rate": 4.048237309605216e-06, "loss": 0.2063, "step": 31535 }, { "epoch": 0.703022692951384, "grad_norm": 0.5976786017417908, "learning_rate": 4.04542403216611e-06, "loss": 0.326, "step": 31540 }, { "epoch": 0.703134142332004, "grad_norm": 0.7850168347358704, "learning_rate": 4.042611484712058e-06, "loss": 0.4271, "step": 31545 }, { "epoch": 0.703245591712624, "grad_norm": 0.9984448552131653, "learning_rate": 4.039799667587855e-06, "loss": 0.386, "step": 31550 }, { "epoch": 0.7033570410932442, "grad_norm": 0.5956363677978516, "learning_rate": 4.036988581138206e-06, "loss": 0.2853, "step": 31555 }, { "epoch": 0.7034684904738642, "grad_norm": 0.6415675282478333, "learning_rate": 4.03417822570773e-06, "loss": 0.2615, "step": 31560 }, { "epoch": 0.7035799398544843, "grad_norm": 0.6364458799362183, "learning_rate": 4.031368601640951e-06, "loss": 0.2817, "step": 31565 }, { "epoch": 0.7036913892351043, "grad_norm": 0.7403188943862915, "learning_rate": 4.028559709282314e-06, "loss": 0.217, "step": 31570 }, { "epoch": 0.7038028386157243, "grad_norm": 0.6373766660690308, "learning_rate": 4.02575154897616e-06, "loss": 0.1846, "step": 31575 }, { "epoch": 0.7039142879963445, "grad_norm": 0.781013548374176, "learning_rate": 4.022944121066757e-06, "loss": 0.2312, "step": 31580 }, { "epoch": 0.7040257373769645, "grad_norm": 0.48915186524391174, "learning_rate": 4.020137425898267e-06, "loss": 0.3111, "step": 31585 }, { "epoch": 0.7041371867575846, "grad_norm": 0.7576313614845276, "learning_rate": 4.017331463814772e-06, "loss": 0.3434, "step": 31590 }, { "epoch": 0.7042486361382047, "grad_norm": 0.4817739725112915, "learning_rate": 4.014526235160258e-06, "loss": 0.3492, "step": 31595 }, { "epoch": 0.7043600855188247, "grad_norm": 0.6662478446960449, "learning_rate": 4.011721740278629e-06, "loss": 0.4293, "step": 31600 }, { "epoch": 0.7044715348994448, "grad_norm": 0.576723575592041, "learning_rate": 4.008917979513692e-06, "loss": 0.242, "step": 31605 }, { "epoch": 0.7045829842800648, "grad_norm": 0.6015423536300659, "learning_rate": 4.006114953209165e-06, "loss": 0.2857, "step": 31610 }, { "epoch": 0.704694433660685, "grad_norm": 0.6793724298477173, "learning_rate": 4.0033126617086815e-06, "loss": 0.2909, "step": 31615 }, { "epoch": 0.704805883041305, "grad_norm": 0.5891973376274109, "learning_rate": 4.0005111053557776e-06, "loss": 0.2533, "step": 31620 }, { "epoch": 0.7049173324219251, "grad_norm": 0.594723105430603, "learning_rate": 3.997710284493901e-06, "loss": 0.4283, "step": 31625 }, { "epoch": 0.7050287818025451, "grad_norm": 0.5770767331123352, "learning_rate": 3.994910199466415e-06, "loss": 0.2623, "step": 31630 }, { "epoch": 0.7051402311831652, "grad_norm": 0.5853124260902405, "learning_rate": 3.992110850616587e-06, "loss": 0.1828, "step": 31635 }, { "epoch": 0.7052516805637853, "grad_norm": 0.3737061023712158, "learning_rate": 3.989312238287596e-06, "loss": 0.3689, "step": 31640 }, { "epoch": 0.7053631299444053, "grad_norm": 0.5344130992889404, "learning_rate": 3.986514362822524e-06, "loss": 0.2212, "step": 31645 }, { "epoch": 0.7054745793250254, "grad_norm": 0.581981897354126, "learning_rate": 3.983717224564378e-06, "loss": 0.1676, "step": 31650 }, { "epoch": 0.7055860287056455, "grad_norm": 0.7280535697937012, "learning_rate": 3.9809208238560624e-06, "loss": 0.295, "step": 31655 }, { "epoch": 0.7056974780862655, "grad_norm": 0.7952364087104797, "learning_rate": 3.97812516104039e-06, "loss": 0.3424, "step": 31660 }, { "epoch": 0.7058089274668856, "grad_norm": 0.36092883348464966, "learning_rate": 3.9753302364600955e-06, "loss": 0.3084, "step": 31665 }, { "epoch": 0.7059203768475056, "grad_norm": 0.5385135412216187, "learning_rate": 3.972536050457809e-06, "loss": 0.3352, "step": 31670 }, { "epoch": 0.7060318262281258, "grad_norm": 0.503911554813385, "learning_rate": 3.969742603376079e-06, "loss": 0.1922, "step": 31675 }, { "epoch": 0.7061432756087458, "grad_norm": 0.43139663338661194, "learning_rate": 3.966949895557355e-06, "loss": 0.241, "step": 31680 }, { "epoch": 0.7062547249893659, "grad_norm": 0.7122260928153992, "learning_rate": 3.96415792734401e-06, "loss": 0.318, "step": 31685 }, { "epoch": 0.7063661743699859, "grad_norm": 0.7296402454376221, "learning_rate": 3.961366699078309e-06, "loss": 0.2224, "step": 31690 }, { "epoch": 0.706477623750606, "grad_norm": 0.5677131414413452, "learning_rate": 3.958576211102445e-06, "loss": 0.3132, "step": 31695 }, { "epoch": 0.7065890731312261, "grad_norm": 0.4357840120792389, "learning_rate": 3.955786463758503e-06, "loss": 0.2895, "step": 31700 }, { "epoch": 0.7067005225118461, "grad_norm": 0.660984992980957, "learning_rate": 3.952997457388488e-06, "loss": 0.2977, "step": 31705 }, { "epoch": 0.7068119718924663, "grad_norm": 0.46307283639907837, "learning_rate": 3.950209192334308e-06, "loss": 0.2648, "step": 31710 }, { "epoch": 0.7069234212730863, "grad_norm": 0.6149296760559082, "learning_rate": 3.94742166893778e-06, "loss": 0.3036, "step": 31715 }, { "epoch": 0.7070348706537063, "grad_norm": 0.4931444227695465, "learning_rate": 3.944634887540637e-06, "loss": 0.2578, "step": 31720 }, { "epoch": 0.7071463200343264, "grad_norm": 0.7630481719970703, "learning_rate": 3.941848848484521e-06, "loss": 0.2542, "step": 31725 }, { "epoch": 0.7072577694149464, "grad_norm": 0.6333499550819397, "learning_rate": 3.939063552110973e-06, "loss": 0.281, "step": 31730 }, { "epoch": 0.7073692187955666, "grad_norm": 0.6649298071861267, "learning_rate": 3.9362789987614514e-06, "loss": 0.3235, "step": 31735 }, { "epoch": 0.7074806681761866, "grad_norm": 0.5478550791740417, "learning_rate": 3.933495188777318e-06, "loss": 0.243, "step": 31740 }, { "epoch": 0.7075921175568067, "grad_norm": 0.7835191488265991, "learning_rate": 3.930712122499847e-06, "loss": 0.3897, "step": 31745 }, { "epoch": 0.7077035669374268, "grad_norm": 0.6654976606369019, "learning_rate": 3.9279298002702245e-06, "loss": 0.2984, "step": 31750 }, { "epoch": 0.7078150163180468, "grad_norm": 0.8832136988639832, "learning_rate": 3.925148222429536e-06, "loss": 0.4235, "step": 31755 }, { "epoch": 0.7079264656986669, "grad_norm": 0.5009804964065552, "learning_rate": 3.922367389318788e-06, "loss": 0.2761, "step": 31760 }, { "epoch": 0.7080379150792869, "grad_norm": 0.456746906042099, "learning_rate": 3.919587301278886e-06, "loss": 0.3327, "step": 31765 }, { "epoch": 0.7081493644599071, "grad_norm": 0.2070566713809967, "learning_rate": 3.916807958650647e-06, "loss": 0.2253, "step": 31770 }, { "epoch": 0.7082608138405271, "grad_norm": 0.8645603656768799, "learning_rate": 3.914029361774798e-06, "loss": 0.323, "step": 31775 }, { "epoch": 0.7083722632211471, "grad_norm": 0.609553337097168, "learning_rate": 3.911251510991969e-06, "loss": 0.2522, "step": 31780 }, { "epoch": 0.7084837126017672, "grad_norm": 0.454108327627182, "learning_rate": 3.90847440664271e-06, "loss": 0.2475, "step": 31785 }, { "epoch": 0.7085951619823873, "grad_norm": 1.1150599718093872, "learning_rate": 3.905698049067466e-06, "loss": 0.2728, "step": 31790 }, { "epoch": 0.7087066113630074, "grad_norm": 0.6766877174377441, "learning_rate": 3.902922438606603e-06, "loss": 0.2768, "step": 31795 }, { "epoch": 0.7088180607436274, "grad_norm": 0.7520423531532288, "learning_rate": 3.9001475756003884e-06, "loss": 0.4019, "step": 31800 }, { "epoch": 0.7089295101242474, "grad_norm": 0.3129575848579407, "learning_rate": 3.8973734603889965e-06, "loss": 0.2247, "step": 31805 }, { "epoch": 0.7090409595048676, "grad_norm": 0.3450331687927246, "learning_rate": 3.8946000933125104e-06, "loss": 0.246, "step": 31810 }, { "epoch": 0.7091524088854876, "grad_norm": 0.7495473623275757, "learning_rate": 3.89182747471093e-06, "loss": 0.3223, "step": 31815 }, { "epoch": 0.7092638582661077, "grad_norm": 0.6589322686195374, "learning_rate": 3.889055604924152e-06, "loss": 0.341, "step": 31820 }, { "epoch": 0.7093753076467277, "grad_norm": 0.6266372203826904, "learning_rate": 3.886284484291985e-06, "loss": 0.2727, "step": 31825 }, { "epoch": 0.7094867570273479, "grad_norm": 0.5666084289550781, "learning_rate": 3.883514113154154e-06, "loss": 0.2586, "step": 31830 }, { "epoch": 0.7095982064079679, "grad_norm": 0.47962307929992676, "learning_rate": 3.88074449185028e-06, "loss": 0.3381, "step": 31835 }, { "epoch": 0.7097096557885879, "grad_norm": 0.4192776083946228, "learning_rate": 3.877975620719893e-06, "loss": 0.22, "step": 31840 }, { "epoch": 0.709821105169208, "grad_norm": 0.6102100014686584, "learning_rate": 3.8752075001024455e-06, "loss": 0.2446, "step": 31845 }, { "epoch": 0.7099325545498281, "grad_norm": 0.801941990852356, "learning_rate": 3.872440130337282e-06, "loss": 0.2804, "step": 31850 }, { "epoch": 0.7100440039304482, "grad_norm": 0.7876242995262146, "learning_rate": 3.869673511763661e-06, "loss": 0.3862, "step": 31855 }, { "epoch": 0.7101554533110682, "grad_norm": 0.3692980706691742, "learning_rate": 3.866907644720744e-06, "loss": 0.3608, "step": 31860 }, { "epoch": 0.7102669026916882, "grad_norm": 0.65399169921875, "learning_rate": 3.864142529547614e-06, "loss": 0.27, "step": 31865 }, { "epoch": 0.7103783520723084, "grad_norm": 0.6146334409713745, "learning_rate": 3.861378166583248e-06, "loss": 0.3073, "step": 31870 }, { "epoch": 0.7104898014529284, "grad_norm": 0.7848692536354065, "learning_rate": 3.858614556166532e-06, "loss": 0.2501, "step": 31875 }, { "epoch": 0.7106012508335485, "grad_norm": 0.5123720765113831, "learning_rate": 3.855851698636271e-06, "loss": 0.3775, "step": 31880 }, { "epoch": 0.7107127002141685, "grad_norm": 0.823817789554596, "learning_rate": 3.853089594331168e-06, "loss": 0.2522, "step": 31885 }, { "epoch": 0.7108241495947887, "grad_norm": 0.5935199856758118, "learning_rate": 3.850328243589832e-06, "loss": 0.3239, "step": 31890 }, { "epoch": 0.7109355989754087, "grad_norm": 0.7279103994369507, "learning_rate": 3.847567646750782e-06, "loss": 0.2592, "step": 31895 }, { "epoch": 0.7110470483560287, "grad_norm": 0.7195340991020203, "learning_rate": 3.8448078041524515e-06, "loss": 0.249, "step": 31900 }, { "epoch": 0.7111584977366489, "grad_norm": 0.619198739528656, "learning_rate": 3.842048716133172e-06, "loss": 0.3505, "step": 31905 }, { "epoch": 0.7112699471172689, "grad_norm": 1.096381425857544, "learning_rate": 3.8392903830311905e-06, "loss": 0.262, "step": 31910 }, { "epoch": 0.711381396497889, "grad_norm": 0.5376746654510498, "learning_rate": 3.836532805184654e-06, "loss": 0.24, "step": 31915 }, { "epoch": 0.711492845878509, "grad_norm": 0.7633156180381775, "learning_rate": 3.833775982931621e-06, "loss": 0.2999, "step": 31920 }, { "epoch": 0.711604295259129, "grad_norm": 0.5743112564086914, "learning_rate": 3.831019916610057e-06, "loss": 0.2885, "step": 31925 }, { "epoch": 0.7117157446397492, "grad_norm": 0.6933272480964661, "learning_rate": 3.82826460655783e-06, "loss": 0.3165, "step": 31930 }, { "epoch": 0.7118271940203692, "grad_norm": 0.8295173645019531, "learning_rate": 3.825510053112724e-06, "loss": 0.2561, "step": 31935 }, { "epoch": 0.7119386434009893, "grad_norm": 0.5710833668708801, "learning_rate": 3.82275625661243e-06, "loss": 0.3247, "step": 31940 }, { "epoch": 0.7120500927816094, "grad_norm": 0.7485097646713257, "learning_rate": 3.820003217394537e-06, "loss": 0.2412, "step": 31945 }, { "epoch": 0.7121615421622295, "grad_norm": 0.6061316728591919, "learning_rate": 3.817250935796547e-06, "loss": 0.2984, "step": 31950 }, { "epoch": 0.7122729915428495, "grad_norm": 0.5282992720603943, "learning_rate": 3.8144994121558698e-06, "loss": 0.1346, "step": 31955 }, { "epoch": 0.7123844409234695, "grad_norm": 0.680298924446106, "learning_rate": 3.8117486468098198e-06, "loss": 0.221, "step": 31960 }, { "epoch": 0.7124958903040897, "grad_norm": 0.802907407283783, "learning_rate": 3.8089986400956156e-06, "loss": 0.3548, "step": 31965 }, { "epoch": 0.7126073396847097, "grad_norm": 0.6295163631439209, "learning_rate": 3.806249392350392e-06, "loss": 0.3174, "step": 31970 }, { "epoch": 0.7127187890653298, "grad_norm": 0.4468139708042145, "learning_rate": 3.803500903911187e-06, "loss": 0.2824, "step": 31975 }, { "epoch": 0.7128302384459498, "grad_norm": 0.3595259487628937, "learning_rate": 3.8007531751149417e-06, "loss": 0.2313, "step": 31980 }, { "epoch": 0.7129416878265699, "grad_norm": 0.6914778351783752, "learning_rate": 3.7980062062985056e-06, "loss": 0.2235, "step": 31985 }, { "epoch": 0.71305313720719, "grad_norm": 0.939741313457489, "learning_rate": 3.795259997798638e-06, "loss": 0.2855, "step": 31990 }, { "epoch": 0.71316458658781, "grad_norm": 0.5949002504348755, "learning_rate": 3.7925145499519967e-06, "loss": 0.4313, "step": 31995 }, { "epoch": 0.7132760359684301, "grad_norm": 0.37591123580932617, "learning_rate": 3.7897698630951584e-06, "loss": 0.3378, "step": 32000 }, { "epoch": 0.7133874853490502, "grad_norm": 0.6279875040054321, "learning_rate": 3.787025937564601e-06, "loss": 0.2906, "step": 32005 }, { "epoch": 0.7134989347296702, "grad_norm": 0.37446829676628113, "learning_rate": 3.784282773696708e-06, "loss": 0.2244, "step": 32010 }, { "epoch": 0.7136103841102903, "grad_norm": 0.8711704611778259, "learning_rate": 3.78154037182777e-06, "loss": 0.2275, "step": 32015 }, { "epoch": 0.7137218334909103, "grad_norm": 0.5769466161727905, "learning_rate": 3.778798732293981e-06, "loss": 0.1817, "step": 32020 }, { "epoch": 0.7138332828715305, "grad_norm": 0.6223900318145752, "learning_rate": 3.7760578554314454e-06, "loss": 0.4379, "step": 32025 }, { "epoch": 0.7139447322521505, "grad_norm": 0.6333432197570801, "learning_rate": 3.773317741576178e-06, "loss": 0.2252, "step": 32030 }, { "epoch": 0.7140561816327706, "grad_norm": 0.3799491226673126, "learning_rate": 3.770578391064089e-06, "loss": 0.2514, "step": 32035 }, { "epoch": 0.7141676310133906, "grad_norm": 0.4907926321029663, "learning_rate": 3.7678398042310106e-06, "loss": 0.2987, "step": 32040 }, { "epoch": 0.7142790803940107, "grad_norm": 0.43275654315948486, "learning_rate": 3.7651019814126656e-06, "loss": 0.3212, "step": 32045 }, { "epoch": 0.7143905297746308, "grad_norm": 0.8039458394050598, "learning_rate": 3.7623649229446922e-06, "loss": 0.31, "step": 32050 }, { "epoch": 0.7145019791552508, "grad_norm": 0.6455947160720825, "learning_rate": 3.759628629162633e-06, "loss": 0.2957, "step": 32055 }, { "epoch": 0.714613428535871, "grad_norm": 0.9271951913833618, "learning_rate": 3.7568931004019306e-06, "loss": 0.2133, "step": 32060 }, { "epoch": 0.714724877916491, "grad_norm": 0.7006394267082214, "learning_rate": 3.7541583369979484e-06, "loss": 0.2565, "step": 32065 }, { "epoch": 0.714836327297111, "grad_norm": 0.765282928943634, "learning_rate": 3.7514243392859406e-06, "loss": 0.2578, "step": 32070 }, { "epoch": 0.7149477766777311, "grad_norm": 0.9573498368263245, "learning_rate": 3.748691107601081e-06, "loss": 0.3838, "step": 32075 }, { "epoch": 0.7150592260583511, "grad_norm": 0.3499448001384735, "learning_rate": 3.7459586422784387e-06, "loss": 0.2668, "step": 32080 }, { "epoch": 0.7151706754389713, "grad_norm": 0.5847846269607544, "learning_rate": 3.7432269436529934e-06, "loss": 0.3042, "step": 32085 }, { "epoch": 0.7152821248195913, "grad_norm": 0.8232019543647766, "learning_rate": 3.7404960120596256e-06, "loss": 0.2874, "step": 32090 }, { "epoch": 0.7153935742002114, "grad_norm": 0.8658748865127563, "learning_rate": 3.7377658478331347e-06, "loss": 0.2938, "step": 32095 }, { "epoch": 0.7155050235808315, "grad_norm": 0.7339183688163757, "learning_rate": 3.7350364513082137e-06, "loss": 0.2917, "step": 32100 }, { "epoch": 0.7156164729614515, "grad_norm": 0.61677086353302, "learning_rate": 3.732307822819462e-06, "loss": 0.3273, "step": 32105 }, { "epoch": 0.7157279223420716, "grad_norm": 0.9375027418136597, "learning_rate": 3.7295799627013964e-06, "loss": 0.3053, "step": 32110 }, { "epoch": 0.7158393717226916, "grad_norm": 0.4487803280353546, "learning_rate": 3.7268528712884254e-06, "loss": 0.2954, "step": 32115 }, { "epoch": 0.7159508211033118, "grad_norm": 0.35287606716156006, "learning_rate": 3.724126548914869e-06, "loss": 0.2638, "step": 32120 }, { "epoch": 0.7160622704839318, "grad_norm": 0.5714026093482971, "learning_rate": 3.721400995914959e-06, "loss": 0.3062, "step": 32125 }, { "epoch": 0.7161737198645518, "grad_norm": 0.6228859424591064, "learning_rate": 3.7186762126228227e-06, "loss": 0.2557, "step": 32130 }, { "epoch": 0.7162851692451719, "grad_norm": 0.761638343334198, "learning_rate": 3.715952199372499e-06, "loss": 0.278, "step": 32135 }, { "epoch": 0.716396618625792, "grad_norm": 0.8885366320610046, "learning_rate": 3.7132289564979273e-06, "loss": 0.3607, "step": 32140 }, { "epoch": 0.7165080680064121, "grad_norm": 0.7892611622810364, "learning_rate": 3.710506484332962e-06, "loss": 0.2887, "step": 32145 }, { "epoch": 0.7166195173870321, "grad_norm": 0.6966431736946106, "learning_rate": 3.7077847832113502e-06, "loss": 0.4219, "step": 32150 }, { "epoch": 0.7167309667676521, "grad_norm": 0.5574402809143066, "learning_rate": 3.705063853466759e-06, "loss": 0.2245, "step": 32155 }, { "epoch": 0.7168424161482723, "grad_norm": 0.6996751427650452, "learning_rate": 3.7023436954327507e-06, "loss": 0.3271, "step": 32160 }, { "epoch": 0.7169538655288923, "grad_norm": 0.8541873693466187, "learning_rate": 3.6996243094427955e-06, "loss": 0.391, "step": 32165 }, { "epoch": 0.7170653149095124, "grad_norm": 1.0597639083862305, "learning_rate": 3.6969056958302673e-06, "loss": 0.3295, "step": 32170 }, { "epoch": 0.7171767642901324, "grad_norm": 0.37856525182724, "learning_rate": 3.694187854928445e-06, "loss": 0.2672, "step": 32175 }, { "epoch": 0.7172882136707526, "grad_norm": 0.46974796056747437, "learning_rate": 3.6914707870705224e-06, "loss": 0.2337, "step": 32180 }, { "epoch": 0.7173996630513726, "grad_norm": 0.9543710350990295, "learning_rate": 3.6887544925895826e-06, "loss": 0.3482, "step": 32185 }, { "epoch": 0.7175111124319926, "grad_norm": 0.5527359843254089, "learning_rate": 3.6860389718186306e-06, "loss": 0.2452, "step": 32190 }, { "epoch": 0.7176225618126127, "grad_norm": 0.5871400833129883, "learning_rate": 3.6833242250905644e-06, "loss": 0.3222, "step": 32195 }, { "epoch": 0.7177340111932328, "grad_norm": 0.5523154139518738, "learning_rate": 3.6806102527381916e-06, "loss": 0.1973, "step": 32200 }, { "epoch": 0.7178454605738529, "grad_norm": 0.724926233291626, "learning_rate": 3.6778970550942227e-06, "loss": 0.2868, "step": 32205 }, { "epoch": 0.7179569099544729, "grad_norm": 0.7483214139938354, "learning_rate": 3.675184632491272e-06, "loss": 0.2372, "step": 32210 }, { "epoch": 0.7180683593350929, "grad_norm": 0.46413981914520264, "learning_rate": 3.672472985261866e-06, "loss": 0.4091, "step": 32215 }, { "epoch": 0.7181798087157131, "grad_norm": 0.6509472131729126, "learning_rate": 3.669762113738434e-06, "loss": 0.294, "step": 32220 }, { "epoch": 0.7182912580963331, "grad_norm": 0.8137038350105286, "learning_rate": 3.6670520182533054e-06, "loss": 0.3554, "step": 32225 }, { "epoch": 0.7184027074769532, "grad_norm": 0.6024363040924072, "learning_rate": 3.6643426991387167e-06, "loss": 0.2791, "step": 32230 }, { "epoch": 0.7185141568575732, "grad_norm": 0.7786808609962463, "learning_rate": 3.661634156726809e-06, "loss": 0.3118, "step": 32235 }, { "epoch": 0.7186256062381934, "grad_norm": 0.3448847234249115, "learning_rate": 3.6589263913496242e-06, "loss": 0.2843, "step": 32240 }, { "epoch": 0.7187370556188134, "grad_norm": 0.4758356213569641, "learning_rate": 3.65621940333912e-06, "loss": 0.2978, "step": 32245 }, { "epoch": 0.7188485049994334, "grad_norm": 0.5408560037612915, "learning_rate": 3.653513193027154e-06, "loss": 0.2222, "step": 32250 }, { "epoch": 0.7189599543800536, "grad_norm": 0.6804062724113464, "learning_rate": 3.6508077607454818e-06, "loss": 0.3638, "step": 32255 }, { "epoch": 0.7190714037606736, "grad_norm": 0.4784459173679352, "learning_rate": 3.648103106825771e-06, "loss": 0.2286, "step": 32260 }, { "epoch": 0.7191828531412937, "grad_norm": 0.43720412254333496, "learning_rate": 3.64539923159959e-06, "loss": 0.3287, "step": 32265 }, { "epoch": 0.7192943025219137, "grad_norm": 0.56275874376297, "learning_rate": 3.6426961353984125e-06, "loss": 0.2435, "step": 32270 }, { "epoch": 0.7194057519025338, "grad_norm": 0.3388124406337738, "learning_rate": 3.6399938185536153e-06, "loss": 0.2135, "step": 32275 }, { "epoch": 0.7195172012831539, "grad_norm": 0.8845019936561584, "learning_rate": 3.637292281396484e-06, "loss": 0.2409, "step": 32280 }, { "epoch": 0.7196286506637739, "grad_norm": 0.6715808510780334, "learning_rate": 3.6345915242582096e-06, "loss": 0.3154, "step": 32285 }, { "epoch": 0.719740100044394, "grad_norm": 0.6816524863243103, "learning_rate": 3.631891547469881e-06, "loss": 0.4458, "step": 32290 }, { "epoch": 0.7198515494250141, "grad_norm": 0.427211731672287, "learning_rate": 3.6291923513624948e-06, "loss": 0.3249, "step": 32295 }, { "epoch": 0.7199629988056342, "grad_norm": 0.7947831749916077, "learning_rate": 3.6264939362669517e-06, "loss": 0.2995, "step": 32300 }, { "epoch": 0.7200744481862542, "grad_norm": 0.7609732747077942, "learning_rate": 3.623796302514051e-06, "loss": 0.2699, "step": 32305 }, { "epoch": 0.7201858975668742, "grad_norm": 0.6190392374992371, "learning_rate": 3.621099450434512e-06, "loss": 0.2547, "step": 32310 }, { "epoch": 0.7202973469474944, "grad_norm": 0.404293030500412, "learning_rate": 3.618403380358941e-06, "loss": 0.3716, "step": 32315 }, { "epoch": 0.7204087963281144, "grad_norm": 0.45661965012550354, "learning_rate": 3.6157080926178556e-06, "loss": 0.3288, "step": 32320 }, { "epoch": 0.7205202457087345, "grad_norm": 0.49279457330703735, "learning_rate": 3.6130135875416816e-06, "loss": 0.2279, "step": 32325 }, { "epoch": 0.7206316950893545, "grad_norm": 0.6576529741287231, "learning_rate": 3.610319865460742e-06, "loss": 0.2772, "step": 32330 }, { "epoch": 0.7207431444699746, "grad_norm": 0.5785512924194336, "learning_rate": 3.607626926705262e-06, "loss": 0.3288, "step": 32335 }, { "epoch": 0.7208545938505947, "grad_norm": 0.4410577118396759, "learning_rate": 3.6049347716053838e-06, "loss": 0.3005, "step": 32340 }, { "epoch": 0.7209660432312147, "grad_norm": 0.8653692007064819, "learning_rate": 3.60224340049114e-06, "loss": 0.3509, "step": 32345 }, { "epoch": 0.7210774926118348, "grad_norm": 0.6026588082313538, "learning_rate": 3.599552813692472e-06, "loss": 0.34, "step": 32350 }, { "epoch": 0.7211889419924549, "grad_norm": 0.4423521161079407, "learning_rate": 3.596863011539221e-06, "loss": 0.2271, "step": 32355 }, { "epoch": 0.7213003913730749, "grad_norm": 0.5537447333335876, "learning_rate": 3.594173994361144e-06, "loss": 0.3005, "step": 32360 }, { "epoch": 0.721411840753695, "grad_norm": 0.42409682273864746, "learning_rate": 3.5914857624878898e-06, "loss": 0.2173, "step": 32365 }, { "epoch": 0.721523290134315, "grad_norm": 0.698688268661499, "learning_rate": 3.5887983162490125e-06, "loss": 0.2936, "step": 32370 }, { "epoch": 0.7216347395149352, "grad_norm": 0.6022817492485046, "learning_rate": 3.5861116559739772e-06, "loss": 0.3066, "step": 32375 }, { "epoch": 0.7217461888955552, "grad_norm": 0.9220947623252869, "learning_rate": 3.583425781992146e-06, "loss": 0.2283, "step": 32380 }, { "epoch": 0.7218576382761753, "grad_norm": 0.3861825466156006, "learning_rate": 3.5807406946327847e-06, "loss": 0.3059, "step": 32385 }, { "epoch": 0.7219690876567953, "grad_norm": 0.3583422899246216, "learning_rate": 3.5780563942250623e-06, "loss": 0.2054, "step": 32390 }, { "epoch": 0.7220805370374154, "grad_norm": 0.4105013608932495, "learning_rate": 3.575372881098059e-06, "loss": 0.2404, "step": 32395 }, { "epoch": 0.7221919864180355, "grad_norm": 0.5124438405036926, "learning_rate": 3.572690155580747e-06, "loss": 0.179, "step": 32400 }, { "epoch": 0.7223034357986555, "grad_norm": 0.3848888874053955, "learning_rate": 3.5700082180020147e-06, "loss": 0.3135, "step": 32405 }, { "epoch": 0.7224148851792757, "grad_norm": 0.5860435962677002, "learning_rate": 3.5673270686906424e-06, "loss": 0.2559, "step": 32410 }, { "epoch": 0.7225263345598957, "grad_norm": 0.5059179663658142, "learning_rate": 3.564646707975319e-06, "loss": 0.3659, "step": 32415 }, { "epoch": 0.7226377839405157, "grad_norm": 0.671142041683197, "learning_rate": 3.561967136184635e-06, "loss": 0.3804, "step": 32420 }, { "epoch": 0.7227492333211358, "grad_norm": 0.6313089728355408, "learning_rate": 3.5592883536470836e-06, "loss": 0.2195, "step": 32425 }, { "epoch": 0.7228606827017559, "grad_norm": 0.5060765743255615, "learning_rate": 3.5566103606910652e-06, "loss": 0.3807, "step": 32430 }, { "epoch": 0.722972132082376, "grad_norm": 0.8583391308784485, "learning_rate": 3.5539331576448854e-06, "loss": 0.3069, "step": 32435 }, { "epoch": 0.723083581462996, "grad_norm": 0.8933718204498291, "learning_rate": 3.551256744836743e-06, "loss": 0.2781, "step": 32440 }, { "epoch": 0.7231950308436161, "grad_norm": 0.7085146903991699, "learning_rate": 3.5485811225947485e-06, "loss": 0.2548, "step": 32445 }, { "epoch": 0.7233064802242362, "grad_norm": 0.6724334359169006, "learning_rate": 3.54590629124691e-06, "loss": 0.2972, "step": 32450 }, { "epoch": 0.7234179296048562, "grad_norm": 0.5674258470535278, "learning_rate": 3.5432322511211393e-06, "loss": 0.3705, "step": 32455 }, { "epoch": 0.7235293789854763, "grad_norm": 0.5062234997749329, "learning_rate": 3.5405590025452565e-06, "loss": 0.1177, "step": 32460 }, { "epoch": 0.7236408283660963, "grad_norm": 0.5411877632141113, "learning_rate": 3.5378865458469824e-06, "loss": 0.1958, "step": 32465 }, { "epoch": 0.7237522777467165, "grad_norm": 0.6080607771873474, "learning_rate": 3.53521488135394e-06, "loss": 0.2997, "step": 32470 }, { "epoch": 0.7238637271273365, "grad_norm": 0.3965998888015747, "learning_rate": 3.5325440093936513e-06, "loss": 0.2094, "step": 32475 }, { "epoch": 0.7239751765079565, "grad_norm": 0.8515360951423645, "learning_rate": 3.529873930293546e-06, "loss": 0.2023, "step": 32480 }, { "epoch": 0.7240866258885766, "grad_norm": 0.7043160200119019, "learning_rate": 3.527204644380956e-06, "loss": 0.2765, "step": 32485 }, { "epoch": 0.7241980752691967, "grad_norm": 0.5239383578300476, "learning_rate": 3.52453615198311e-06, "loss": 0.2129, "step": 32490 }, { "epoch": 0.7243095246498168, "grad_norm": 0.4187348484992981, "learning_rate": 3.5218684534271497e-06, "loss": 0.2735, "step": 32495 }, { "epoch": 0.7244209740304368, "grad_norm": 0.5309123396873474, "learning_rate": 3.5192015490401165e-06, "loss": 0.2541, "step": 32500 }, { "epoch": 0.724532423411057, "grad_norm": 0.48658499121665955, "learning_rate": 3.516535439148949e-06, "loss": 0.3096, "step": 32505 }, { "epoch": 0.724643872791677, "grad_norm": 0.49070295691490173, "learning_rate": 3.5138701240804927e-06, "loss": 0.2836, "step": 32510 }, { "epoch": 0.724755322172297, "grad_norm": 0.5202540755271912, "learning_rate": 3.5112056041614927e-06, "loss": 0.189, "step": 32515 }, { "epoch": 0.7248667715529171, "grad_norm": 0.49356088042259216, "learning_rate": 3.5085418797185977e-06, "loss": 0.2788, "step": 32520 }, { "epoch": 0.7249782209335371, "grad_norm": 0.792779803276062, "learning_rate": 3.505878951078365e-06, "loss": 0.3265, "step": 32525 }, { "epoch": 0.7250896703141573, "grad_norm": 0.5415936708450317, "learning_rate": 3.5032168185672423e-06, "loss": 0.2231, "step": 32530 }, { "epoch": 0.7252011196947773, "grad_norm": 0.9408218860626221, "learning_rate": 3.500555482511594e-06, "loss": 0.311, "step": 32535 }, { "epoch": 0.7253125690753973, "grad_norm": 0.9326852560043335, "learning_rate": 3.4978949432376753e-06, "loss": 0.3399, "step": 32540 }, { "epoch": 0.7254240184560174, "grad_norm": 0.5972670912742615, "learning_rate": 3.4952352010716472e-06, "loss": 0.3099, "step": 32545 }, { "epoch": 0.7255354678366375, "grad_norm": 0.4196832478046417, "learning_rate": 3.4925762563395714e-06, "loss": 0.2358, "step": 32550 }, { "epoch": 0.7256469172172576, "grad_norm": 0.6563181281089783, "learning_rate": 3.489918109367422e-06, "loss": 0.2752, "step": 32555 }, { "epoch": 0.7257583665978776, "grad_norm": 0.501997172832489, "learning_rate": 3.4872607604810605e-06, "loss": 0.26, "step": 32560 }, { "epoch": 0.7258698159784976, "grad_norm": 1.001268744468689, "learning_rate": 3.484604210006256e-06, "loss": 0.4104, "step": 32565 }, { "epoch": 0.7259812653591178, "grad_norm": 0.5787915587425232, "learning_rate": 3.481948458268688e-06, "loss": 0.1873, "step": 32570 }, { "epoch": 0.7260927147397378, "grad_norm": 1.107448935508728, "learning_rate": 3.479293505593927e-06, "loss": 0.3459, "step": 32575 }, { "epoch": 0.7262041641203579, "grad_norm": 0.7938663363456726, "learning_rate": 3.4766393523074504e-06, "loss": 0.2198, "step": 32580 }, { "epoch": 0.726315613500978, "grad_norm": 0.6893438696861267, "learning_rate": 3.4739859987346325e-06, "loss": 0.3321, "step": 32585 }, { "epoch": 0.7264270628815981, "grad_norm": 0.5795388221740723, "learning_rate": 3.471333445200762e-06, "loss": 0.2203, "step": 32590 }, { "epoch": 0.7265385122622181, "grad_norm": 0.863497257232666, "learning_rate": 3.4686816920310175e-06, "loss": 0.2348, "step": 32595 }, { "epoch": 0.7266499616428381, "grad_norm": 0.8609886169433594, "learning_rate": 3.466030739550481e-06, "loss": 0.3007, "step": 32600 }, { "epoch": 0.7267614110234583, "grad_norm": 0.37407514452934265, "learning_rate": 3.463380588084143e-06, "loss": 0.181, "step": 32605 }, { "epoch": 0.7268728604040783, "grad_norm": 0.6242061853408813, "learning_rate": 3.4607312379568913e-06, "loss": 0.1466, "step": 32610 }, { "epoch": 0.7269843097846984, "grad_norm": 0.6473770141601562, "learning_rate": 3.4580826894935104e-06, "loss": 0.2335, "step": 32615 }, { "epoch": 0.7270957591653184, "grad_norm": 0.6805868148803711, "learning_rate": 3.4554349430186997e-06, "loss": 0.343, "step": 32620 }, { "epoch": 0.7272072085459385, "grad_norm": 0.897735595703125, "learning_rate": 3.452787998857048e-06, "loss": 0.3243, "step": 32625 }, { "epoch": 0.7273186579265586, "grad_norm": 0.6950689554214478, "learning_rate": 3.4501418573330516e-06, "loss": 0.3108, "step": 32630 }, { "epoch": 0.7274301073071786, "grad_norm": 0.8856807351112366, "learning_rate": 3.447496518771103e-06, "loss": 0.4225, "step": 32635 }, { "epoch": 0.7275415566877987, "grad_norm": 0.5541945695877075, "learning_rate": 3.4448519834955065e-06, "loss": 0.3201, "step": 32640 }, { "epoch": 0.7276530060684188, "grad_norm": 1.209107756614685, "learning_rate": 3.4422082518304555e-06, "loss": 0.3038, "step": 32645 }, { "epoch": 0.7277644554490389, "grad_norm": 0.6372049450874329, "learning_rate": 3.4395653241000584e-06, "loss": 0.2701, "step": 32650 }, { "epoch": 0.7278759048296589, "grad_norm": 0.6735883355140686, "learning_rate": 3.4369232006283137e-06, "loss": 0.2318, "step": 32655 }, { "epoch": 0.7279873542102789, "grad_norm": 0.5158315896987915, "learning_rate": 3.4342818817391253e-06, "loss": 0.3009, "step": 32660 }, { "epoch": 0.7280988035908991, "grad_norm": 0.6828005909919739, "learning_rate": 3.4316413677562976e-06, "loss": 0.2938, "step": 32665 }, { "epoch": 0.7282102529715191, "grad_norm": 0.6131678819656372, "learning_rate": 3.4290016590035367e-06, "loss": 0.2791, "step": 32670 }, { "epoch": 0.7283217023521392, "grad_norm": 0.4129774272441864, "learning_rate": 3.4263627558044543e-06, "loss": 0.318, "step": 32675 }, { "epoch": 0.7284331517327592, "grad_norm": 0.5805781483650208, "learning_rate": 3.4237246584825545e-06, "loss": 0.2985, "step": 32680 }, { "epoch": 0.7285446011133793, "grad_norm": 0.7275981307029724, "learning_rate": 3.4210873673612534e-06, "loss": 0.2832, "step": 32685 }, { "epoch": 0.7286560504939994, "grad_norm": 0.7103040814399719, "learning_rate": 3.4184508827638597e-06, "loss": 0.3154, "step": 32690 }, { "epoch": 0.7287674998746194, "grad_norm": 0.899198055267334, "learning_rate": 3.4158152050135864e-06, "loss": 0.3092, "step": 32695 }, { "epoch": 0.7288789492552395, "grad_norm": 0.6193143725395203, "learning_rate": 3.413180334433547e-06, "loss": 0.1996, "step": 32700 }, { "epoch": 0.7289903986358596, "grad_norm": 0.46193233132362366, "learning_rate": 3.410546271346752e-06, "loss": 0.3823, "step": 32705 }, { "epoch": 0.7291018480164797, "grad_norm": 0.46639421582221985, "learning_rate": 3.4079130160761222e-06, "loss": 0.2493, "step": 32710 }, { "epoch": 0.7292132973970997, "grad_norm": 0.846572756767273, "learning_rate": 3.4052805689444757e-06, "loss": 0.3425, "step": 32715 }, { "epoch": 0.7293247467777197, "grad_norm": 1.2694923877716064, "learning_rate": 3.402648930274529e-06, "loss": 0.2782, "step": 32720 }, { "epoch": 0.7294361961583399, "grad_norm": 0.6195218563079834, "learning_rate": 3.4000181003889e-06, "loss": 0.2875, "step": 32725 }, { "epoch": 0.7295476455389599, "grad_norm": 0.8452773094177246, "learning_rate": 3.3973880796101067e-06, "loss": 0.3262, "step": 32730 }, { "epoch": 0.72965909491958, "grad_norm": 0.7911295294761658, "learning_rate": 3.394758868260568e-06, "loss": 0.3521, "step": 32735 }, { "epoch": 0.7297705443002, "grad_norm": 0.6133013367652893, "learning_rate": 3.3921304666626075e-06, "loss": 0.3355, "step": 32740 }, { "epoch": 0.7298819936808201, "grad_norm": 0.5400652885437012, "learning_rate": 3.3895028751384495e-06, "loss": 0.2788, "step": 32745 }, { "epoch": 0.7299934430614402, "grad_norm": 0.7221797704696655, "learning_rate": 3.386876094010214e-06, "loss": 0.2963, "step": 32750 }, { "epoch": 0.7301048924420602, "grad_norm": 0.6148155331611633, "learning_rate": 3.3842501235999246e-06, "loss": 0.2763, "step": 32755 }, { "epoch": 0.7302163418226804, "grad_norm": 0.6763278245925903, "learning_rate": 3.381624964229504e-06, "loss": 0.2316, "step": 32760 }, { "epoch": 0.7303277912033004, "grad_norm": 0.6103901863098145, "learning_rate": 3.3790006162207722e-06, "loss": 0.3596, "step": 32765 }, { "epoch": 0.7304392405839204, "grad_norm": 0.7152795195579529, "learning_rate": 3.3763770798954633e-06, "loss": 0.2931, "step": 32770 }, { "epoch": 0.7305506899645405, "grad_norm": 0.5556166768074036, "learning_rate": 3.3737543555751937e-06, "loss": 0.2399, "step": 32775 }, { "epoch": 0.7306621393451606, "grad_norm": 0.6293778419494629, "learning_rate": 3.3711324435814973e-06, "loss": 0.284, "step": 32780 }, { "epoch": 0.7307735887257807, "grad_norm": 0.37658339738845825, "learning_rate": 3.3685113442357963e-06, "loss": 0.3552, "step": 32785 }, { "epoch": 0.7308850381064007, "grad_norm": 0.8621169328689575, "learning_rate": 3.3658910578594173e-06, "loss": 0.2481, "step": 32790 }, { "epoch": 0.7309964874870208, "grad_norm": 0.5625035762786865, "learning_rate": 3.3632715847735875e-06, "loss": 0.383, "step": 32795 }, { "epoch": 0.7311079368676409, "grad_norm": 0.5894595384597778, "learning_rate": 3.36065292529943e-06, "loss": 0.3287, "step": 32800 }, { "epoch": 0.7312193862482609, "grad_norm": 0.8198038339614868, "learning_rate": 3.3580350797579786e-06, "loss": 0.2684, "step": 32805 }, { "epoch": 0.731330835628881, "grad_norm": 0.4880506098270416, "learning_rate": 3.355418048470156e-06, "loss": 0.4003, "step": 32810 }, { "epoch": 0.731442285009501, "grad_norm": 0.5881021618843079, "learning_rate": 3.3528018317567954e-06, "loss": 0.1828, "step": 32815 }, { "epoch": 0.7315537343901212, "grad_norm": 0.5141913890838623, "learning_rate": 3.3501864299386213e-06, "loss": 0.2842, "step": 32820 }, { "epoch": 0.7316651837707412, "grad_norm": 0.6004062294960022, "learning_rate": 3.3475718433362623e-06, "loss": 0.3737, "step": 32825 }, { "epoch": 0.7317766331513612, "grad_norm": 0.9291187524795532, "learning_rate": 3.3449580722702434e-06, "loss": 0.2161, "step": 32830 }, { "epoch": 0.7318880825319813, "grad_norm": 0.69871586561203, "learning_rate": 3.342345117060999e-06, "loss": 0.297, "step": 32835 }, { "epoch": 0.7319995319126014, "grad_norm": 1.0281239748001099, "learning_rate": 3.3397329780288546e-06, "loss": 0.2879, "step": 32840 }, { "epoch": 0.7321109812932215, "grad_norm": 0.3937816023826599, "learning_rate": 3.3371216554940367e-06, "loss": 0.2839, "step": 32845 }, { "epoch": 0.7322224306738415, "grad_norm": 2.1417198181152344, "learning_rate": 3.3345111497766713e-06, "loss": 0.2963, "step": 32850 }, { "epoch": 0.7323338800544617, "grad_norm": 0.7976836562156677, "learning_rate": 3.3319014611967936e-06, "loss": 0.2797, "step": 32855 }, { "epoch": 0.7324453294350817, "grad_norm": 0.8146697878837585, "learning_rate": 3.329292590074322e-06, "loss": 0.1875, "step": 32860 }, { "epoch": 0.7325567788157017, "grad_norm": 1.0141352415084839, "learning_rate": 3.3266845367290934e-06, "loss": 0.241, "step": 32865 }, { "epoch": 0.7326682281963218, "grad_norm": 0.8705369830131531, "learning_rate": 3.3240773014808303e-06, "loss": 0.2844, "step": 32870 }, { "epoch": 0.7327796775769418, "grad_norm": 0.5327593088150024, "learning_rate": 3.3214708846491594e-06, "loss": 0.2567, "step": 32875 }, { "epoch": 0.732891126957562, "grad_norm": 0.909140944480896, "learning_rate": 3.3188652865536074e-06, "loss": 0.3596, "step": 32880 }, { "epoch": 0.733002576338182, "grad_norm": 0.5487768054008484, "learning_rate": 3.3162605075135988e-06, "loss": 0.2859, "step": 32885 }, { "epoch": 0.733114025718802, "grad_norm": 0.5730482935905457, "learning_rate": 3.3136565478484638e-06, "loss": 0.2157, "step": 32890 }, { "epoch": 0.7332254750994222, "grad_norm": 0.7908298969268799, "learning_rate": 3.3110534078774224e-06, "loss": 0.2703, "step": 32895 }, { "epoch": 0.7333369244800422, "grad_norm": 0.5331554412841797, "learning_rate": 3.3084510879196053e-06, "loss": 0.2244, "step": 32900 }, { "epoch": 0.7334483738606623, "grad_norm": 0.7708745002746582, "learning_rate": 3.3058495882940344e-06, "loss": 0.2691, "step": 32905 }, { "epoch": 0.7335598232412823, "grad_norm": 0.6561907529830933, "learning_rate": 3.303248909319633e-06, "loss": 0.2677, "step": 32910 }, { "epoch": 0.7336712726219023, "grad_norm": 0.9045218825340271, "learning_rate": 3.3006490513152245e-06, "loss": 0.2301, "step": 32915 }, { "epoch": 0.7337827220025225, "grad_norm": 0.6481374502182007, "learning_rate": 3.2980500145995278e-06, "loss": 0.3318, "step": 32920 }, { "epoch": 0.7338941713831425, "grad_norm": 0.6650016903877258, "learning_rate": 3.2954517994911684e-06, "loss": 0.3868, "step": 32925 }, { "epoch": 0.7340056207637626, "grad_norm": 0.6965200901031494, "learning_rate": 3.2928544063086697e-06, "loss": 0.3053, "step": 32930 }, { "epoch": 0.7341170701443827, "grad_norm": 0.7827367782592773, "learning_rate": 3.290257835370451e-06, "loss": 0.3013, "step": 32935 }, { "epoch": 0.7342285195250028, "grad_norm": 0.45556485652923584, "learning_rate": 3.2876620869948294e-06, "loss": 0.259, "step": 32940 }, { "epoch": 0.7343399689056228, "grad_norm": 0.8706529140472412, "learning_rate": 3.285067161500024e-06, "loss": 0.3253, "step": 32945 }, { "epoch": 0.7344514182862428, "grad_norm": 0.7577581405639648, "learning_rate": 3.2824730592041507e-06, "loss": 0.2733, "step": 32950 }, { "epoch": 0.734562867666863, "grad_norm": 0.7123679518699646, "learning_rate": 3.27987978042523e-06, "loss": 0.3404, "step": 32955 }, { "epoch": 0.734674317047483, "grad_norm": 0.6272518038749695, "learning_rate": 3.2772873254811787e-06, "loss": 0.218, "step": 32960 }, { "epoch": 0.7347857664281031, "grad_norm": 0.4759124219417572, "learning_rate": 3.2746956946898114e-06, "loss": 0.2832, "step": 32965 }, { "epoch": 0.7348972158087231, "grad_norm": 0.7127870917320251, "learning_rate": 3.2721048883688387e-06, "loss": 0.2872, "step": 32970 }, { "epoch": 0.7350086651893432, "grad_norm": 0.7940077185630798, "learning_rate": 3.2695149068358765e-06, "loss": 0.1871, "step": 32975 }, { "epoch": 0.7351201145699633, "grad_norm": 0.6166383624076843, "learning_rate": 3.2669257504084317e-06, "loss": 0.2697, "step": 32980 }, { "epoch": 0.7352315639505833, "grad_norm": 0.5002848505973816, "learning_rate": 3.264337419403922e-06, "loss": 0.2979, "step": 32985 }, { "epoch": 0.7353430133312034, "grad_norm": 0.5366833209991455, "learning_rate": 3.2617499141396504e-06, "loss": 0.2495, "step": 32990 }, { "epoch": 0.7354544627118235, "grad_norm": 0.890483558177948, "learning_rate": 3.2591632349328305e-06, "loss": 0.3691, "step": 32995 }, { "epoch": 0.7355659120924436, "grad_norm": 0.5453323721885681, "learning_rate": 3.2565773821005663e-06, "loss": 0.2533, "step": 33000 }, { "epoch": 0.7356773614730636, "grad_norm": 0.56355220079422, "learning_rate": 3.2539923559598654e-06, "loss": 0.1984, "step": 33005 }, { "epoch": 0.7357888108536836, "grad_norm": 0.5132585763931274, "learning_rate": 3.251408156827629e-06, "loss": 0.2963, "step": 33010 }, { "epoch": 0.7359002602343038, "grad_norm": 0.5574620366096497, "learning_rate": 3.248824785020659e-06, "loss": 0.3047, "step": 33015 }, { "epoch": 0.7360117096149238, "grad_norm": 0.7912201285362244, "learning_rate": 3.246242240855663e-06, "loss": 0.2632, "step": 33020 }, { "epoch": 0.7361231589955439, "grad_norm": 0.4491974115371704, "learning_rate": 3.2436605246492337e-06, "loss": 0.2854, "step": 33025 }, { "epoch": 0.736234608376164, "grad_norm": 0.8769235014915466, "learning_rate": 3.2410796367178753e-06, "loss": 0.3817, "step": 33030 }, { "epoch": 0.736346057756784, "grad_norm": 0.6464642286300659, "learning_rate": 3.238499577377984e-06, "loss": 0.3015, "step": 33035 }, { "epoch": 0.7364575071374041, "grad_norm": 1.0190484523773193, "learning_rate": 3.235920346945852e-06, "loss": 0.3064, "step": 33040 }, { "epoch": 0.7365689565180241, "grad_norm": 0.7795517444610596, "learning_rate": 3.2333419457376734e-06, "loss": 0.279, "step": 33045 }, { "epoch": 0.7366804058986443, "grad_norm": 0.5025030374526978, "learning_rate": 3.2307643740695437e-06, "loss": 0.2187, "step": 33050 }, { "epoch": 0.7367918552792643, "grad_norm": 0.5091804265975952, "learning_rate": 3.228187632257452e-06, "loss": 0.3016, "step": 33055 }, { "epoch": 0.7369033046598844, "grad_norm": 1.0234618186950684, "learning_rate": 3.225611720617283e-06, "loss": 0.1708, "step": 33060 }, { "epoch": 0.7370147540405044, "grad_norm": 0.7228109836578369, "learning_rate": 3.223036639464829e-06, "loss": 0.4115, "step": 33065 }, { "epoch": 0.7371262034211244, "grad_norm": 0.5997233390808105, "learning_rate": 3.220462389115774e-06, "loss": 0.2367, "step": 33070 }, { "epoch": 0.7372376528017446, "grad_norm": 0.7658345103263855, "learning_rate": 3.2178889698856964e-06, "loss": 0.2493, "step": 33075 }, { "epoch": 0.7373491021823646, "grad_norm": 0.6293799877166748, "learning_rate": 3.2153163820900844e-06, "loss": 0.3571, "step": 33080 }, { "epoch": 0.7374605515629847, "grad_norm": 0.7071552276611328, "learning_rate": 3.212744626044315e-06, "loss": 0.2756, "step": 33085 }, { "epoch": 0.7375720009436048, "grad_norm": 0.5751235485076904, "learning_rate": 3.2101737020636637e-06, "loss": 0.3348, "step": 33090 }, { "epoch": 0.7376834503242248, "grad_norm": 0.8929909467697144, "learning_rate": 3.2076036104633048e-06, "loss": 0.348, "step": 33095 }, { "epoch": 0.7377948997048449, "grad_norm": 0.6280662417411804, "learning_rate": 3.205034351558317e-06, "loss": 0.3748, "step": 33100 }, { "epoch": 0.7379063490854649, "grad_norm": 0.7077507972717285, "learning_rate": 3.202465925663668e-06, "loss": 0.3992, "step": 33105 }, { "epoch": 0.7380177984660851, "grad_norm": 0.7908935546875, "learning_rate": 3.1998983330942246e-06, "loss": 0.2469, "step": 33110 }, { "epoch": 0.7381292478467051, "grad_norm": 0.39649245142936707, "learning_rate": 3.1973315741647605e-06, "loss": 0.155, "step": 33115 }, { "epoch": 0.7382406972273251, "grad_norm": 0.9612866640090942, "learning_rate": 3.194765649189937e-06, "loss": 0.3344, "step": 33120 }, { "epoch": 0.7383521466079452, "grad_norm": 0.5874886512756348, "learning_rate": 3.1922005584843163e-06, "loss": 0.2799, "step": 33125 }, { "epoch": 0.7384635959885653, "grad_norm": 0.523689866065979, "learning_rate": 3.1896363023623557e-06, "loss": 0.1934, "step": 33130 }, { "epoch": 0.7385750453691854, "grad_norm": 0.42125797271728516, "learning_rate": 3.18707288113842e-06, "loss": 0.2687, "step": 33135 }, { "epoch": 0.7386864947498054, "grad_norm": 1.1294206380844116, "learning_rate": 3.184510295126757e-06, "loss": 0.3863, "step": 33140 }, { "epoch": 0.7387979441304255, "grad_norm": 0.5358449816703796, "learning_rate": 3.1819485446415287e-06, "loss": 0.3612, "step": 33145 }, { "epoch": 0.7389093935110456, "grad_norm": 1.1763213872909546, "learning_rate": 3.179387629996782e-06, "loss": 0.3059, "step": 33150 }, { "epoch": 0.7390208428916656, "grad_norm": 0.4582350254058838, "learning_rate": 3.1768275515064638e-06, "loss": 0.3229, "step": 33155 }, { "epoch": 0.7391322922722857, "grad_norm": 0.4934980571269989, "learning_rate": 3.1742683094844206e-06, "loss": 0.4742, "step": 33160 }, { "epoch": 0.7392437416529057, "grad_norm": 0.6613831520080566, "learning_rate": 3.171709904244393e-06, "loss": 0.2208, "step": 33165 }, { "epoch": 0.7393551910335259, "grad_norm": 0.4670477509498596, "learning_rate": 3.1691523361000265e-06, "loss": 0.2436, "step": 33170 }, { "epoch": 0.7394666404141459, "grad_norm": 0.49927234649658203, "learning_rate": 3.1665956053648594e-06, "loss": 0.3054, "step": 33175 }, { "epoch": 0.7395780897947659, "grad_norm": 0.6709491610527039, "learning_rate": 3.164039712352325e-06, "loss": 0.2551, "step": 33180 }, { "epoch": 0.739689539175386, "grad_norm": 0.5243662595748901, "learning_rate": 3.1614846573757572e-06, "loss": 0.3787, "step": 33185 }, { "epoch": 0.7398009885560061, "grad_norm": 0.7863882184028625, "learning_rate": 3.1589304407483844e-06, "loss": 0.2148, "step": 33190 }, { "epoch": 0.7399124379366262, "grad_norm": 0.5960976481437683, "learning_rate": 3.1563770627833356e-06, "loss": 0.4045, "step": 33195 }, { "epoch": 0.7400238873172462, "grad_norm": 0.7527463436126709, "learning_rate": 3.1538245237936304e-06, "loss": 0.2915, "step": 33200 }, { "epoch": 0.7401353366978664, "grad_norm": 0.4676433801651001, "learning_rate": 3.1512728240921943e-06, "loss": 0.3479, "step": 33205 }, { "epoch": 0.7402467860784864, "grad_norm": 0.4947379231452942, "learning_rate": 3.1487219639918487e-06, "loss": 0.292, "step": 33210 }, { "epoch": 0.7403582354591064, "grad_norm": 0.48669198155403137, "learning_rate": 3.1461719438053073e-06, "loss": 0.3316, "step": 33215 }, { "epoch": 0.7404696848397265, "grad_norm": 0.4809311032295227, "learning_rate": 3.143622763845181e-06, "loss": 0.2504, "step": 33220 }, { "epoch": 0.7405811342203465, "grad_norm": 0.6982517838478088, "learning_rate": 3.141074424423982e-06, "loss": 0.4138, "step": 33225 }, { "epoch": 0.7406925836009667, "grad_norm": 0.7339484095573425, "learning_rate": 3.138526925854112e-06, "loss": 0.3286, "step": 33230 }, { "epoch": 0.7408040329815867, "grad_norm": 0.5350735783576965, "learning_rate": 3.135980268447879e-06, "loss": 0.2829, "step": 33235 }, { "epoch": 0.7409154823622067, "grad_norm": 0.4297383427619934, "learning_rate": 3.1334344525174854e-06, "loss": 0.3504, "step": 33240 }, { "epoch": 0.7410269317428269, "grad_norm": 0.8133834004402161, "learning_rate": 3.1308894783750265e-06, "loss": 0.2604, "step": 33245 }, { "epoch": 0.7411383811234469, "grad_norm": 0.5834691524505615, "learning_rate": 3.1283453463324966e-06, "loss": 0.2232, "step": 33250 }, { "epoch": 0.741249830504067, "grad_norm": 1.1657015085220337, "learning_rate": 3.1258020567017855e-06, "loss": 0.2746, "step": 33255 }, { "epoch": 0.741361279884687, "grad_norm": 0.6553075313568115, "learning_rate": 3.123259609794679e-06, "loss": 0.196, "step": 33260 }, { "epoch": 0.7414727292653072, "grad_norm": 0.4720059633255005, "learning_rate": 3.1207180059228657e-06, "loss": 0.3207, "step": 33265 }, { "epoch": 0.7415841786459272, "grad_norm": 0.7128260731697083, "learning_rate": 3.1181772453979242e-06, "loss": 0.2255, "step": 33270 }, { "epoch": 0.7416956280265472, "grad_norm": 0.6636922359466553, "learning_rate": 3.1156373285313346e-06, "loss": 0.3006, "step": 33275 }, { "epoch": 0.7418070774071673, "grad_norm": 0.5027670860290527, "learning_rate": 3.113098255634469e-06, "loss": 0.3242, "step": 33280 }, { "epoch": 0.7419185267877874, "grad_norm": 1.0309278964996338, "learning_rate": 3.1105600270186e-06, "loss": 0.2365, "step": 33285 }, { "epoch": 0.7420299761684075, "grad_norm": 0.7465946674346924, "learning_rate": 3.108022642994892e-06, "loss": 0.3953, "step": 33290 }, { "epoch": 0.7421414255490275, "grad_norm": 0.4628431797027588, "learning_rate": 3.1054861038744076e-06, "loss": 0.3051, "step": 33295 }, { "epoch": 0.7422528749296475, "grad_norm": 0.6248934268951416, "learning_rate": 3.102950409968113e-06, "loss": 0.2942, "step": 33300 }, { "epoch": 0.7423643243102677, "grad_norm": 0.4816969335079193, "learning_rate": 3.100415561586857e-06, "loss": 0.1637, "step": 33305 }, { "epoch": 0.7424757736908877, "grad_norm": 0.6795361042022705, "learning_rate": 3.0978815590414e-06, "loss": 0.195, "step": 33310 }, { "epoch": 0.7425872230715078, "grad_norm": 0.8494073152542114, "learning_rate": 3.0953484026423875e-06, "loss": 0.2266, "step": 33315 }, { "epoch": 0.7426986724521278, "grad_norm": 0.9777247309684753, "learning_rate": 3.092816092700366e-06, "loss": 0.2737, "step": 33320 }, { "epoch": 0.7428101218327479, "grad_norm": 0.6463773250579834, "learning_rate": 3.0902846295257715e-06, "loss": 0.2583, "step": 33325 }, { "epoch": 0.742921571213368, "grad_norm": 0.7945129871368408, "learning_rate": 3.087754013428951e-06, "loss": 0.3514, "step": 33330 }, { "epoch": 0.743033020593988, "grad_norm": 0.6409342288970947, "learning_rate": 3.0852242447201343e-06, "loss": 0.2827, "step": 33335 }, { "epoch": 0.7431444699746081, "grad_norm": 0.41631874442100525, "learning_rate": 3.082695323709447e-06, "loss": 0.1967, "step": 33340 }, { "epoch": 0.7432559193552282, "grad_norm": 0.6244701743125916, "learning_rate": 3.0801672507069237e-06, "loss": 0.3629, "step": 33345 }, { "epoch": 0.7433673687358483, "grad_norm": 0.5667757391929626, "learning_rate": 3.0776400260224825e-06, "loss": 0.3233, "step": 33350 }, { "epoch": 0.7434788181164683, "grad_norm": 0.5378469824790955, "learning_rate": 3.0751136499659384e-06, "loss": 0.1566, "step": 33355 }, { "epoch": 0.7435902674970883, "grad_norm": 0.7033135294914246, "learning_rate": 3.072588122847012e-06, "loss": 0.3346, "step": 33360 }, { "epoch": 0.7437017168777085, "grad_norm": 0.7798947691917419, "learning_rate": 3.0700634449753097e-06, "loss": 0.2435, "step": 33365 }, { "epoch": 0.7438131662583285, "grad_norm": 0.3774152398109436, "learning_rate": 3.067539616660339e-06, "loss": 0.2378, "step": 33370 }, { "epoch": 0.7439246156389486, "grad_norm": 0.49303340911865234, "learning_rate": 3.065016638211501e-06, "loss": 0.2845, "step": 33375 }, { "epoch": 0.7440360650195686, "grad_norm": 0.5648555755615234, "learning_rate": 3.06249450993809e-06, "loss": 0.2918, "step": 33380 }, { "epoch": 0.7441475144001887, "grad_norm": 0.8499997854232788, "learning_rate": 3.0599732321493025e-06, "loss": 0.2115, "step": 33385 }, { "epoch": 0.7442589637808088, "grad_norm": 0.6331583857536316, "learning_rate": 3.057452805154231e-06, "loss": 0.3151, "step": 33390 }, { "epoch": 0.7443704131614288, "grad_norm": 0.5493272542953491, "learning_rate": 3.054933229261857e-06, "loss": 0.2756, "step": 33395 }, { "epoch": 0.744481862542049, "grad_norm": 0.6646212339401245, "learning_rate": 3.0524145047810625e-06, "loss": 0.3277, "step": 33400 }, { "epoch": 0.744593311922669, "grad_norm": 0.5860058069229126, "learning_rate": 3.0498966320206213e-06, "loss": 0.2758, "step": 33405 }, { "epoch": 0.7447047613032891, "grad_norm": 0.6671875715255737, "learning_rate": 3.047379611289207e-06, "loss": 0.289, "step": 33410 }, { "epoch": 0.7448162106839091, "grad_norm": 0.7462060451507568, "learning_rate": 3.0448634428953837e-06, "loss": 0.291, "step": 33415 }, { "epoch": 0.7449276600645292, "grad_norm": 0.7190172076225281, "learning_rate": 3.042348127147616e-06, "loss": 0.4172, "step": 33420 }, { "epoch": 0.7450391094451493, "grad_norm": 0.6614157557487488, "learning_rate": 3.039833664354268e-06, "loss": 0.3088, "step": 33425 }, { "epoch": 0.7451505588257693, "grad_norm": 0.4733962118625641, "learning_rate": 3.037320054823587e-06, "loss": 0.2293, "step": 33430 }, { "epoch": 0.7452620082063894, "grad_norm": 0.6521340608596802, "learning_rate": 3.0348072988637235e-06, "loss": 0.2871, "step": 33435 }, { "epoch": 0.7453734575870095, "grad_norm": 0.6829430460929871, "learning_rate": 3.032295396782723e-06, "loss": 0.1454, "step": 33440 }, { "epoch": 0.7454849069676295, "grad_norm": 0.6285884380340576, "learning_rate": 3.0297843488885204e-06, "loss": 0.2307, "step": 33445 }, { "epoch": 0.7455963563482496, "grad_norm": 0.837503969669342, "learning_rate": 3.0272741554889563e-06, "loss": 0.2876, "step": 33450 }, { "epoch": 0.7457078057288696, "grad_norm": 0.8544986844062805, "learning_rate": 3.024764816891761e-06, "loss": 0.2674, "step": 33455 }, { "epoch": 0.7458192551094898, "grad_norm": 0.5040220618247986, "learning_rate": 3.0222563334045596e-06, "loss": 0.2078, "step": 33460 }, { "epoch": 0.7459307044901098, "grad_norm": 0.7368492484092712, "learning_rate": 3.0197487053348715e-06, "loss": 0.3709, "step": 33465 }, { "epoch": 0.7460421538707299, "grad_norm": 0.9807363748550415, "learning_rate": 3.0172419329901126e-06, "loss": 0.3531, "step": 33470 }, { "epoch": 0.7461536032513499, "grad_norm": 0.5130937695503235, "learning_rate": 3.0147360166775907e-06, "loss": 0.2775, "step": 33475 }, { "epoch": 0.74626505263197, "grad_norm": 0.49336349964141846, "learning_rate": 3.0122309567045194e-06, "loss": 0.2453, "step": 33480 }, { "epoch": 0.7463765020125901, "grad_norm": 0.9051334857940674, "learning_rate": 3.0097267533779915e-06, "loss": 0.3578, "step": 33485 }, { "epoch": 0.7464879513932101, "grad_norm": 0.3698303997516632, "learning_rate": 3.007223407005011e-06, "loss": 0.3242, "step": 33490 }, { "epoch": 0.7465994007738302, "grad_norm": 0.6289532780647278, "learning_rate": 3.004720917892464e-06, "loss": 0.2575, "step": 33495 }, { "epoch": 0.7467108501544503, "grad_norm": 0.5866841673851013, "learning_rate": 3.002219286347138e-06, "loss": 0.3025, "step": 33500 }, { "epoch": 0.7468222995350703, "grad_norm": 0.7117490768432617, "learning_rate": 2.999718512675712e-06, "loss": 0.2121, "step": 33505 }, { "epoch": 0.7469337489156904, "grad_norm": 0.6077593564987183, "learning_rate": 2.997218597184759e-06, "loss": 0.3158, "step": 33510 }, { "epoch": 0.7470451982963104, "grad_norm": 0.6103549003601074, "learning_rate": 2.9947195401807573e-06, "loss": 0.2382, "step": 33515 }, { "epoch": 0.7471566476769306, "grad_norm": 0.6339691281318665, "learning_rate": 2.992221341970064e-06, "loss": 0.2428, "step": 33520 }, { "epoch": 0.7472680970575506, "grad_norm": 0.6688328385353088, "learning_rate": 2.9897240028589447e-06, "loss": 0.3443, "step": 33525 }, { "epoch": 0.7473795464381706, "grad_norm": 0.7099976539611816, "learning_rate": 2.9872275231535518e-06, "loss": 0.3767, "step": 33530 }, { "epoch": 0.7474909958187907, "grad_norm": 0.49107834696769714, "learning_rate": 2.9847319031599353e-06, "loss": 0.3656, "step": 33535 }, { "epoch": 0.7476024451994108, "grad_norm": 0.7511234879493713, "learning_rate": 2.9822371431840346e-06, "loss": 0.2243, "step": 33540 }, { "epoch": 0.7477138945800309, "grad_norm": 0.483070433139801, "learning_rate": 2.979743243531693e-06, "loss": 0.2865, "step": 33545 }, { "epoch": 0.7478253439606509, "grad_norm": 0.7143993377685547, "learning_rate": 2.9772502045086438e-06, "loss": 0.2051, "step": 33550 }, { "epoch": 0.7479367933412711, "grad_norm": 0.5198782086372375, "learning_rate": 2.9747580264205077e-06, "loss": 0.3119, "step": 33555 }, { "epoch": 0.7480482427218911, "grad_norm": 0.8435953855514526, "learning_rate": 2.9722667095728142e-06, "loss": 0.2516, "step": 33560 }, { "epoch": 0.7481596921025111, "grad_norm": 0.5863881707191467, "learning_rate": 2.9697762542709764e-06, "loss": 0.2087, "step": 33565 }, { "epoch": 0.7482711414831312, "grad_norm": 0.5257229804992676, "learning_rate": 2.967286660820302e-06, "loss": 0.2725, "step": 33570 }, { "epoch": 0.7483825908637513, "grad_norm": 0.2609884738922119, "learning_rate": 2.964797929526002e-06, "loss": 0.318, "step": 33575 }, { "epoch": 0.7484940402443714, "grad_norm": 0.6922463178634644, "learning_rate": 2.962310060693172e-06, "loss": 0.3863, "step": 33580 }, { "epoch": 0.7486054896249914, "grad_norm": 0.4755772054195404, "learning_rate": 2.9598230546268057e-06, "loss": 0.2543, "step": 33585 }, { "epoch": 0.7487169390056114, "grad_norm": 0.4754803776741028, "learning_rate": 2.9573369116317885e-06, "loss": 0.3667, "step": 33590 }, { "epoch": 0.7488283883862316, "grad_norm": 0.7210944294929504, "learning_rate": 2.9548516320129085e-06, "loss": 0.2127, "step": 33595 }, { "epoch": 0.7489398377668516, "grad_norm": 0.5226638317108154, "learning_rate": 2.9523672160748375e-06, "loss": 0.2286, "step": 33600 }, { "epoch": 0.7490512871474717, "grad_norm": 0.4636479616165161, "learning_rate": 2.949883664122143e-06, "loss": 0.2489, "step": 33605 }, { "epoch": 0.7491627365280917, "grad_norm": 0.5760490894317627, "learning_rate": 2.947400976459297e-06, "loss": 0.3907, "step": 33610 }, { "epoch": 0.7492741859087119, "grad_norm": 0.4980350732803345, "learning_rate": 2.9449191533906527e-06, "loss": 0.2128, "step": 33615 }, { "epoch": 0.7493856352893319, "grad_norm": 0.41088926792144775, "learning_rate": 2.9424381952204638e-06, "loss": 0.2411, "step": 33620 }, { "epoch": 0.7494970846699519, "grad_norm": 0.485420286655426, "learning_rate": 2.9399581022528724e-06, "loss": 0.2114, "step": 33625 }, { "epoch": 0.749608534050572, "grad_norm": 0.648485004901886, "learning_rate": 2.937478874791926e-06, "loss": 0.4191, "step": 33630 }, { "epoch": 0.7497199834311921, "grad_norm": 0.6990744471549988, "learning_rate": 2.9350005131415514e-06, "loss": 0.4039, "step": 33635 }, { "epoch": 0.7498314328118122, "grad_norm": 0.8828688859939575, "learning_rate": 2.932523017605585e-06, "loss": 0.3454, "step": 33640 }, { "epoch": 0.7499428821924322, "grad_norm": 0.4837510287761688, "learning_rate": 2.9300463884877428e-06, "loss": 0.2088, "step": 33645 }, { "epoch": 0.7500543315730522, "grad_norm": 0.48634445667266846, "learning_rate": 2.9275706260916427e-06, "loss": 0.3017, "step": 33650 }, { "epoch": 0.7501657809536724, "grad_norm": 0.551918625831604, "learning_rate": 2.925095730720793e-06, "loss": 0.1801, "step": 33655 }, { "epoch": 0.7502772303342924, "grad_norm": 0.7558825016021729, "learning_rate": 2.922621702678593e-06, "loss": 0.3125, "step": 33660 }, { "epoch": 0.7503886797149125, "grad_norm": 0.5769863724708557, "learning_rate": 2.920148542268344e-06, "loss": 0.2318, "step": 33665 }, { "epoch": 0.7505001290955325, "grad_norm": 0.9255049824714661, "learning_rate": 2.9176762497932376e-06, "loss": 0.3873, "step": 33670 }, { "epoch": 0.7506115784761526, "grad_norm": 0.3768719732761383, "learning_rate": 2.915204825556357e-06, "loss": 0.2289, "step": 33675 }, { "epoch": 0.7507230278567727, "grad_norm": 0.6153138279914856, "learning_rate": 2.912734269860679e-06, "loss": 0.2159, "step": 33680 }, { "epoch": 0.7508344772373927, "grad_norm": 0.726574718952179, "learning_rate": 2.9102645830090724e-06, "loss": 0.3907, "step": 33685 }, { "epoch": 0.7509459266180128, "grad_norm": 0.8838280439376831, "learning_rate": 2.9077957653043022e-06, "loss": 0.3025, "step": 33690 }, { "epoch": 0.7510573759986329, "grad_norm": 0.5469415187835693, "learning_rate": 2.9053278170490262e-06, "loss": 0.2113, "step": 33695 }, { "epoch": 0.751168825379253, "grad_norm": 0.7691811919212341, "learning_rate": 2.902860738545802e-06, "loss": 0.2403, "step": 33700 }, { "epoch": 0.751280274759873, "grad_norm": 0.47786420583724976, "learning_rate": 2.9003945300970683e-06, "loss": 0.2594, "step": 33705 }, { "epoch": 0.751391724140493, "grad_norm": 0.5924757719039917, "learning_rate": 2.8979291920051655e-06, "loss": 0.3142, "step": 33710 }, { "epoch": 0.7515031735211132, "grad_norm": 0.5335355997085571, "learning_rate": 2.8954647245723245e-06, "loss": 0.2587, "step": 33715 }, { "epoch": 0.7516146229017332, "grad_norm": 0.4959110915660858, "learning_rate": 2.8930011281006685e-06, "loss": 0.1869, "step": 33720 }, { "epoch": 0.7517260722823533, "grad_norm": 0.8827904462814331, "learning_rate": 2.8905384028922144e-06, "loss": 0.4612, "step": 33725 }, { "epoch": 0.7518375216629734, "grad_norm": 0.3391810655593872, "learning_rate": 2.888076549248876e-06, "loss": 0.2682, "step": 33730 }, { "epoch": 0.7519489710435934, "grad_norm": 0.9731943607330322, "learning_rate": 2.8856155674724595e-06, "loss": 0.2434, "step": 33735 }, { "epoch": 0.7520604204242135, "grad_norm": 0.41712039709091187, "learning_rate": 2.883155457864659e-06, "loss": 0.3726, "step": 33740 }, { "epoch": 0.7521718698048335, "grad_norm": 0.7917229533195496, "learning_rate": 2.8806962207270673e-06, "loss": 0.3029, "step": 33745 }, { "epoch": 0.7522833191854537, "grad_norm": 0.7232569456100464, "learning_rate": 2.878237856361166e-06, "loss": 0.3067, "step": 33750 }, { "epoch": 0.7523947685660737, "grad_norm": 0.576991081237793, "learning_rate": 2.875780365068328e-06, "loss": 0.3046, "step": 33755 }, { "epoch": 0.7525062179466938, "grad_norm": 0.47883665561676025, "learning_rate": 2.87332374714983e-06, "loss": 0.2504, "step": 33760 }, { "epoch": 0.7526176673273138, "grad_norm": 0.4215628206729889, "learning_rate": 2.870868002906828e-06, "loss": 0.184, "step": 33765 }, { "epoch": 0.7527291167079339, "grad_norm": 0.6791629791259766, "learning_rate": 2.868413132640384e-06, "loss": 0.2995, "step": 33770 }, { "epoch": 0.752840566088554, "grad_norm": 0.5074769258499146, "learning_rate": 2.8659591366514426e-06, "loss": 0.2565, "step": 33775 }, { "epoch": 0.752952015469174, "grad_norm": 0.7514712810516357, "learning_rate": 2.8635060152408446e-06, "loss": 0.2893, "step": 33780 }, { "epoch": 0.7530634648497941, "grad_norm": 0.7852396368980408, "learning_rate": 2.86105376870932e-06, "loss": 0.3205, "step": 33785 }, { "epoch": 0.7531749142304142, "grad_norm": 0.42000624537467957, "learning_rate": 2.8586023973575027e-06, "loss": 0.3827, "step": 33790 }, { "epoch": 0.7532863636110342, "grad_norm": 0.23319512605667114, "learning_rate": 2.8561519014859087e-06, "loss": 0.2055, "step": 33795 }, { "epoch": 0.7533978129916543, "grad_norm": 0.5697981715202332, "learning_rate": 2.8537022813949456e-06, "loss": 0.1737, "step": 33800 }, { "epoch": 0.7535092623722743, "grad_norm": 0.5846883058547974, "learning_rate": 2.851253537384926e-06, "loss": 0.2479, "step": 33805 }, { "epoch": 0.7536207117528945, "grad_norm": 0.56658536195755, "learning_rate": 2.848805669756042e-06, "loss": 0.2692, "step": 33810 }, { "epoch": 0.7537321611335145, "grad_norm": 0.3844006359577179, "learning_rate": 2.8463586788083843e-06, "loss": 0.2243, "step": 33815 }, { "epoch": 0.7538436105141346, "grad_norm": 0.5789145827293396, "learning_rate": 2.843912564841932e-06, "loss": 0.115, "step": 33820 }, { "epoch": 0.7539550598947546, "grad_norm": 0.5536205172538757, "learning_rate": 2.8414673281565663e-06, "loss": 0.2793, "step": 33825 }, { "epoch": 0.7540665092753747, "grad_norm": 0.8552709221839905, "learning_rate": 2.83902296905205e-06, "loss": 0.2786, "step": 33830 }, { "epoch": 0.7541779586559948, "grad_norm": 0.5636184215545654, "learning_rate": 2.8365794878280407e-06, "loss": 0.3251, "step": 33835 }, { "epoch": 0.7542894080366148, "grad_norm": 0.7049769759178162, "learning_rate": 2.8341368847840968e-06, "loss": 0.2594, "step": 33840 }, { "epoch": 0.754400857417235, "grad_norm": 0.661155104637146, "learning_rate": 2.8316951602196574e-06, "loss": 0.2518, "step": 33845 }, { "epoch": 0.754512306797855, "grad_norm": 0.5453686118125916, "learning_rate": 2.829254314434059e-06, "loss": 0.3564, "step": 33850 }, { "epoch": 0.754623756178475, "grad_norm": 0.5124586224555969, "learning_rate": 2.8268143477265343e-06, "loss": 0.2007, "step": 33855 }, { "epoch": 0.7547352055590951, "grad_norm": 0.7682088613510132, "learning_rate": 2.8243752603962014e-06, "loss": 0.3431, "step": 33860 }, { "epoch": 0.7548466549397151, "grad_norm": 0.7964093685150146, "learning_rate": 2.821937052742075e-06, "loss": 0.2964, "step": 33865 }, { "epoch": 0.7549581043203353, "grad_norm": 0.6934346556663513, "learning_rate": 2.8194997250630574e-06, "loss": 0.2396, "step": 33870 }, { "epoch": 0.7550695537009553, "grad_norm": 0.6436589956283569, "learning_rate": 2.817063277657951e-06, "loss": 0.2745, "step": 33875 }, { "epoch": 0.7551810030815753, "grad_norm": 0.7439756989479065, "learning_rate": 2.8146277108254396e-06, "loss": 0.3674, "step": 33880 }, { "epoch": 0.7552924524621955, "grad_norm": 0.6890519857406616, "learning_rate": 2.8121930248641117e-06, "loss": 0.235, "step": 33885 }, { "epoch": 0.7554039018428155, "grad_norm": 0.8035330176353455, "learning_rate": 2.809759220072438e-06, "loss": 0.2283, "step": 33890 }, { "epoch": 0.7555153512234356, "grad_norm": 0.7052496075630188, "learning_rate": 2.8073262967487825e-06, "loss": 0.3007, "step": 33895 }, { "epoch": 0.7556268006040556, "grad_norm": 0.744096577167511, "learning_rate": 2.804894255191405e-06, "loss": 0.1672, "step": 33900 }, { "epoch": 0.7557382499846758, "grad_norm": 0.8351132869720459, "learning_rate": 2.802463095698451e-06, "loss": 0.2797, "step": 33905 }, { "epoch": 0.7558496993652958, "grad_norm": 0.47542646527290344, "learning_rate": 2.8000328185679683e-06, "loss": 0.483, "step": 33910 }, { "epoch": 0.7559611487459158, "grad_norm": 0.5174663066864014, "learning_rate": 2.7976034240978834e-06, "loss": 0.2212, "step": 33915 }, { "epoch": 0.7560725981265359, "grad_norm": 0.7283490896224976, "learning_rate": 2.7951749125860274e-06, "loss": 0.2985, "step": 33920 }, { "epoch": 0.756184047507156, "grad_norm": 0.5224509239196777, "learning_rate": 2.792747284330115e-06, "loss": 0.2678, "step": 33925 }, { "epoch": 0.7562954968877761, "grad_norm": 0.4216907024383545, "learning_rate": 2.7903205396277546e-06, "loss": 0.1627, "step": 33930 }, { "epoch": 0.7564069462683961, "grad_norm": 0.5221124291419983, "learning_rate": 2.787894678776445e-06, "loss": 0.2285, "step": 33935 }, { "epoch": 0.7565183956490161, "grad_norm": 0.7804731726646423, "learning_rate": 2.7854697020735757e-06, "loss": 0.3794, "step": 33940 }, { "epoch": 0.7566298450296363, "grad_norm": 0.5530830025672913, "learning_rate": 2.783045609816434e-06, "loss": 0.2065, "step": 33945 }, { "epoch": 0.7567412944102563, "grad_norm": 1.1412625312805176, "learning_rate": 2.7806224023021966e-06, "loss": 0.3147, "step": 33950 }, { "epoch": 0.7568527437908764, "grad_norm": 0.6325271129608154, "learning_rate": 2.7782000798279276e-06, "loss": 0.2436, "step": 33955 }, { "epoch": 0.7569641931714964, "grad_norm": 0.5092622637748718, "learning_rate": 2.775778642690585e-06, "loss": 0.2813, "step": 33960 }, { "epoch": 0.7570756425521166, "grad_norm": 0.6763238310813904, "learning_rate": 2.7733580911870195e-06, "loss": 0.2637, "step": 33965 }, { "epoch": 0.7571870919327366, "grad_norm": 0.40753841400146484, "learning_rate": 2.770938425613967e-06, "loss": 0.2771, "step": 33970 }, { "epoch": 0.7572985413133566, "grad_norm": 0.5731813311576843, "learning_rate": 2.768519646268066e-06, "loss": 0.2672, "step": 33975 }, { "epoch": 0.7574099906939767, "grad_norm": 0.6931194067001343, "learning_rate": 2.7661017534458366e-06, "loss": 0.2182, "step": 33980 }, { "epoch": 0.7575214400745968, "grad_norm": 0.7261427640914917, "learning_rate": 2.7636847474436968e-06, "loss": 0.2065, "step": 33985 }, { "epoch": 0.7576328894552169, "grad_norm": 0.5500878095626831, "learning_rate": 2.7612686285579515e-06, "loss": 0.1377, "step": 33990 }, { "epoch": 0.7577443388358369, "grad_norm": 0.6357752680778503, "learning_rate": 2.758853397084799e-06, "loss": 0.1856, "step": 33995 }, { "epoch": 0.7578557882164569, "grad_norm": 0.5131103992462158, "learning_rate": 2.756439053320322e-06, "loss": 0.2762, "step": 34000 }, { "epoch": 0.7579672375970771, "grad_norm": 0.7534242868423462, "learning_rate": 2.7540255975605103e-06, "loss": 0.2826, "step": 34005 }, { "epoch": 0.7580786869776971, "grad_norm": 0.541512668132782, "learning_rate": 2.751613030101229e-06, "loss": 0.3079, "step": 34010 }, { "epoch": 0.7581901363583172, "grad_norm": 0.83238685131073, "learning_rate": 2.7492013512382376e-06, "loss": 0.3747, "step": 34015 }, { "epoch": 0.7583015857389372, "grad_norm": 0.4771921634674072, "learning_rate": 2.7467905612671975e-06, "loss": 0.2614, "step": 34020 }, { "epoch": 0.7584130351195574, "grad_norm": 0.8062953948974609, "learning_rate": 2.7443806604836474e-06, "loss": 0.2927, "step": 34025 }, { "epoch": 0.7585244845001774, "grad_norm": 1.1034796237945557, "learning_rate": 2.741971649183024e-06, "loss": 0.2625, "step": 34030 }, { "epoch": 0.7586359338807974, "grad_norm": 0.43950408697128296, "learning_rate": 2.739563527660649e-06, "loss": 0.1446, "step": 34035 }, { "epoch": 0.7587473832614176, "grad_norm": 0.7604075074195862, "learning_rate": 2.737156296211748e-06, "loss": 0.2146, "step": 34040 }, { "epoch": 0.7588588326420376, "grad_norm": 0.5220000147819519, "learning_rate": 2.734749955131424e-06, "loss": 0.2342, "step": 34045 }, { "epoch": 0.7589702820226577, "grad_norm": 0.44721153378486633, "learning_rate": 2.7323445047146724e-06, "loss": 0.3044, "step": 34050 }, { "epoch": 0.7590817314032777, "grad_norm": 0.3938583731651306, "learning_rate": 2.7299399452563912e-06, "loss": 0.3128, "step": 34055 }, { "epoch": 0.7591931807838977, "grad_norm": 0.588921070098877, "learning_rate": 2.727536277051356e-06, "loss": 0.2765, "step": 34060 }, { "epoch": 0.7593046301645179, "grad_norm": 0.4165632128715515, "learning_rate": 2.725133500394235e-06, "loss": 0.2777, "step": 34065 }, { "epoch": 0.7594160795451379, "grad_norm": 0.638014554977417, "learning_rate": 2.722731615579597e-06, "loss": 0.3232, "step": 34070 }, { "epoch": 0.759527528925758, "grad_norm": 0.6121829152107239, "learning_rate": 2.720330622901891e-06, "loss": 0.2997, "step": 34075 }, { "epoch": 0.759638978306378, "grad_norm": 0.6752800941467285, "learning_rate": 2.7179305226554597e-06, "loss": 0.3141, "step": 34080 }, { "epoch": 0.7597504276869981, "grad_norm": 0.481235533952713, "learning_rate": 2.7155313151345354e-06, "loss": 0.2172, "step": 34085 }, { "epoch": 0.7598618770676182, "grad_norm": 0.4340780973434448, "learning_rate": 2.7131330006332467e-06, "loss": 0.2937, "step": 34090 }, { "epoch": 0.7599733264482382, "grad_norm": 0.2692956328392029, "learning_rate": 2.7107355794456026e-06, "loss": 0.3429, "step": 34095 }, { "epoch": 0.7600847758288584, "grad_norm": 0.5197578072547913, "learning_rate": 2.7083390518655163e-06, "loss": 0.2589, "step": 34100 }, { "epoch": 0.7601962252094784, "grad_norm": 0.4632870852947235, "learning_rate": 2.7059434181867783e-06, "loss": 0.3697, "step": 34105 }, { "epoch": 0.7603076745900985, "grad_norm": 0.5069155097007751, "learning_rate": 2.7035486787030753e-06, "loss": 0.2879, "step": 34110 }, { "epoch": 0.7604191239707185, "grad_norm": 0.2655554711818695, "learning_rate": 2.7011548337079853e-06, "loss": 0.2065, "step": 34115 }, { "epoch": 0.7605305733513386, "grad_norm": 0.7087981104850769, "learning_rate": 2.6987618834949715e-06, "loss": 0.3412, "step": 34120 }, { "epoch": 0.7606420227319587, "grad_norm": 0.4668249487876892, "learning_rate": 2.6963698283573958e-06, "loss": 0.274, "step": 34125 }, { "epoch": 0.7607534721125787, "grad_norm": 0.7287569642066956, "learning_rate": 2.6939786685885016e-06, "loss": 0.309, "step": 34130 }, { "epoch": 0.7608649214931988, "grad_norm": 0.5991645455360413, "learning_rate": 2.691588404481431e-06, "loss": 0.2946, "step": 34135 }, { "epoch": 0.7609763708738189, "grad_norm": 0.482779324054718, "learning_rate": 2.6891990363292107e-06, "loss": 0.2796, "step": 34140 }, { "epoch": 0.7610878202544389, "grad_norm": 0.5827972888946533, "learning_rate": 2.686810564424758e-06, "loss": 0.1942, "step": 34145 }, { "epoch": 0.761199269635059, "grad_norm": 0.673029899597168, "learning_rate": 2.6844229890608796e-06, "loss": 0.3049, "step": 34150 }, { "epoch": 0.761310719015679, "grad_norm": 0.5888857841491699, "learning_rate": 2.682036310530273e-06, "loss": 0.2724, "step": 34155 }, { "epoch": 0.7614221683962992, "grad_norm": 0.6745133399963379, "learning_rate": 2.679650529125528e-06, "loss": 0.2954, "step": 34160 }, { "epoch": 0.7615336177769192, "grad_norm": 0.6212778091430664, "learning_rate": 2.6772656451391277e-06, "loss": 0.3062, "step": 34165 }, { "epoch": 0.7616450671575393, "grad_norm": 0.9315680861473083, "learning_rate": 2.6748816588634363e-06, "loss": 0.3048, "step": 34170 }, { "epoch": 0.7617565165381593, "grad_norm": 0.6840492486953735, "learning_rate": 2.6724985705907114e-06, "loss": 0.2465, "step": 34175 }, { "epoch": 0.7618679659187794, "grad_norm": 0.7086824774742126, "learning_rate": 2.670116380613104e-06, "loss": 0.3416, "step": 34180 }, { "epoch": 0.7619794152993995, "grad_norm": 0.330198734998703, "learning_rate": 2.667735089222645e-06, "loss": 0.1631, "step": 34185 }, { "epoch": 0.7620908646800195, "grad_norm": 0.9546667337417603, "learning_rate": 2.6653546967112687e-06, "loss": 0.3951, "step": 34190 }, { "epoch": 0.7622023140606397, "grad_norm": 0.6984142065048218, "learning_rate": 2.6629752033707956e-06, "loss": 0.1967, "step": 34195 }, { "epoch": 0.7623137634412597, "grad_norm": 0.7566418051719666, "learning_rate": 2.660596609492928e-06, "loss": 0.3162, "step": 34200 }, { "epoch": 0.7624252128218797, "grad_norm": 0.680320143699646, "learning_rate": 2.658218915369265e-06, "loss": 0.2, "step": 34205 }, { "epoch": 0.7625366622024998, "grad_norm": 0.5272489786148071, "learning_rate": 2.6558421212912945e-06, "loss": 0.267, "step": 34210 }, { "epoch": 0.7626481115831198, "grad_norm": 0.8857879042625427, "learning_rate": 2.6534662275503877e-06, "loss": 0.3184, "step": 34215 }, { "epoch": 0.76275956096374, "grad_norm": 0.6333488821983337, "learning_rate": 2.651091234437817e-06, "loss": 0.2489, "step": 34220 }, { "epoch": 0.76287101034436, "grad_norm": 0.5380803346633911, "learning_rate": 2.6487171422447344e-06, "loss": 0.1641, "step": 34225 }, { "epoch": 0.7629824597249801, "grad_norm": 0.9589352011680603, "learning_rate": 2.646343951262189e-06, "loss": 0.3971, "step": 34230 }, { "epoch": 0.7630939091056002, "grad_norm": 0.3139306604862213, "learning_rate": 2.643971661781114e-06, "loss": 0.2628, "step": 34235 }, { "epoch": 0.7632053584862202, "grad_norm": 0.7108866572380066, "learning_rate": 2.641600274092334e-06, "loss": 0.2655, "step": 34240 }, { "epoch": 0.7633168078668403, "grad_norm": 0.7292156219482422, "learning_rate": 2.6392297884865613e-06, "loss": 0.2955, "step": 34245 }, { "epoch": 0.7634282572474603, "grad_norm": 0.3986571729183197, "learning_rate": 2.636860205254398e-06, "loss": 0.3701, "step": 34250 }, { "epoch": 0.7635397066280805, "grad_norm": 0.41875624656677246, "learning_rate": 2.634491524686341e-06, "loss": 0.3138, "step": 34255 }, { "epoch": 0.7636511560087005, "grad_norm": 0.8360694050788879, "learning_rate": 2.6321237470727678e-06, "loss": 0.2779, "step": 34260 }, { "epoch": 0.7637626053893205, "grad_norm": 0.3968079090118408, "learning_rate": 2.6297568727039547e-06, "loss": 0.3343, "step": 34265 }, { "epoch": 0.7638740547699406, "grad_norm": 0.8168555498123169, "learning_rate": 2.6273909018700595e-06, "loss": 0.3282, "step": 34270 }, { "epoch": 0.7639855041505607, "grad_norm": 0.8862971663475037, "learning_rate": 2.625025834861131e-06, "loss": 0.247, "step": 34275 }, { "epoch": 0.7640969535311808, "grad_norm": 0.524175763130188, "learning_rate": 2.6226616719671074e-06, "loss": 0.2793, "step": 34280 }, { "epoch": 0.7642084029118008, "grad_norm": 0.9022205471992493, "learning_rate": 2.620298413477821e-06, "loss": 0.2865, "step": 34285 }, { "epoch": 0.7643198522924208, "grad_norm": 0.5455117225646973, "learning_rate": 2.617936059682986e-06, "loss": 0.3403, "step": 34290 }, { "epoch": 0.764431301673041, "grad_norm": 0.7895170450210571, "learning_rate": 2.615574610872208e-06, "loss": 0.3012, "step": 34295 }, { "epoch": 0.764542751053661, "grad_norm": 0.4839140474796295, "learning_rate": 2.6132140673349847e-06, "loss": 0.3254, "step": 34300 }, { "epoch": 0.7646542004342811, "grad_norm": 0.6854641437530518, "learning_rate": 2.610854429360701e-06, "loss": 0.301, "step": 34305 }, { "epoch": 0.7647656498149011, "grad_norm": 0.6549224853515625, "learning_rate": 2.6084956972386245e-06, "loss": 0.3275, "step": 34310 }, { "epoch": 0.7648770991955213, "grad_norm": 0.5638486742973328, "learning_rate": 2.6061378712579255e-06, "loss": 0.228, "step": 34315 }, { "epoch": 0.7649885485761413, "grad_norm": 0.526157557964325, "learning_rate": 2.603780951707652e-06, "loss": 0.2646, "step": 34320 }, { "epoch": 0.7650999979567613, "grad_norm": 0.5761968493461609, "learning_rate": 2.6014249388767433e-06, "loss": 0.2787, "step": 34325 }, { "epoch": 0.7652114473373814, "grad_norm": 0.7018436193466187, "learning_rate": 2.5990698330540267e-06, "loss": 0.3231, "step": 34330 }, { "epoch": 0.7653228967180015, "grad_norm": 0.6506215929985046, "learning_rate": 2.596715634528224e-06, "loss": 0.2319, "step": 34335 }, { "epoch": 0.7654343460986216, "grad_norm": 0.5280274152755737, "learning_rate": 2.594362343587941e-06, "loss": 0.29, "step": 34340 }, { "epoch": 0.7655457954792416, "grad_norm": 0.6760926246643066, "learning_rate": 2.5920099605216677e-06, "loss": 0.2271, "step": 34345 }, { "epoch": 0.7656572448598616, "grad_norm": 0.6700071692466736, "learning_rate": 2.5896584856177954e-06, "loss": 0.3054, "step": 34350 }, { "epoch": 0.7657686942404818, "grad_norm": 0.5384727120399475, "learning_rate": 2.5873079191645944e-06, "loss": 0.2114, "step": 34355 }, { "epoch": 0.7658801436211018, "grad_norm": 0.6928741335868835, "learning_rate": 2.5849582614502255e-06, "loss": 0.2694, "step": 34360 }, { "epoch": 0.7659915930017219, "grad_norm": 0.7100231647491455, "learning_rate": 2.582609512762735e-06, "loss": 0.2787, "step": 34365 }, { "epoch": 0.766103042382342, "grad_norm": 0.5652828812599182, "learning_rate": 2.5802616733900674e-06, "loss": 0.3399, "step": 34370 }, { "epoch": 0.7662144917629621, "grad_norm": 0.6810146570205688, "learning_rate": 2.5779147436200448e-06, "loss": 0.2125, "step": 34375 }, { "epoch": 0.7663259411435821, "grad_norm": 0.5119982361793518, "learning_rate": 2.5755687237403882e-06, "loss": 0.2988, "step": 34380 }, { "epoch": 0.7664373905242021, "grad_norm": 0.7045231461524963, "learning_rate": 2.573223614038697e-06, "loss": 0.235, "step": 34385 }, { "epoch": 0.7665488399048223, "grad_norm": 0.6490983963012695, "learning_rate": 2.5708794148024654e-06, "loss": 0.2585, "step": 34390 }, { "epoch": 0.7666602892854423, "grad_norm": 0.7183946967124939, "learning_rate": 2.5685361263190735e-06, "loss": 0.3308, "step": 34395 }, { "epoch": 0.7667717386660624, "grad_norm": 0.7707793116569519, "learning_rate": 2.5661937488757883e-06, "loss": 0.2189, "step": 34400 }, { "epoch": 0.7668831880466824, "grad_norm": 0.335593581199646, "learning_rate": 2.563852282759768e-06, "loss": 0.2147, "step": 34405 }, { "epoch": 0.7669946374273025, "grad_norm": 0.4331473410129547, "learning_rate": 2.5615117282580627e-06, "loss": 0.2491, "step": 34410 }, { "epoch": 0.7671060868079226, "grad_norm": 0.8042201399803162, "learning_rate": 2.559172085657604e-06, "loss": 0.3988, "step": 34415 }, { "epoch": 0.7672175361885426, "grad_norm": 0.5171744227409363, "learning_rate": 2.556833355245213e-06, "loss": 0.2492, "step": 34420 }, { "epoch": 0.7673289855691627, "grad_norm": 0.6053786873817444, "learning_rate": 2.5544955373075997e-06, "loss": 0.3404, "step": 34425 }, { "epoch": 0.7674404349497828, "grad_norm": 0.437044620513916, "learning_rate": 2.552158632131363e-06, "loss": 0.2578, "step": 34430 }, { "epoch": 0.7675518843304028, "grad_norm": 0.8940054178237915, "learning_rate": 2.5498226400029867e-06, "loss": 0.2197, "step": 34435 }, { "epoch": 0.7676633337110229, "grad_norm": 0.7202461957931519, "learning_rate": 2.547487561208849e-06, "loss": 0.2935, "step": 34440 }, { "epoch": 0.7677747830916429, "grad_norm": 0.597625732421875, "learning_rate": 2.545153396035214e-06, "loss": 0.2747, "step": 34445 }, { "epoch": 0.7678862324722631, "grad_norm": 0.5841318368911743, "learning_rate": 2.54282014476823e-06, "loss": 0.2042, "step": 34450 }, { "epoch": 0.7679976818528831, "grad_norm": 0.7554879784584045, "learning_rate": 2.5404878076939364e-06, "loss": 0.3231, "step": 34455 }, { "epoch": 0.7681091312335032, "grad_norm": 0.4223545789718628, "learning_rate": 2.538156385098258e-06, "loss": 0.2919, "step": 34460 }, { "epoch": 0.7682205806141232, "grad_norm": 0.469969242811203, "learning_rate": 2.5358258772670073e-06, "loss": 0.2142, "step": 34465 }, { "epoch": 0.7683320299947433, "grad_norm": 0.563951313495636, "learning_rate": 2.5334962844858933e-06, "loss": 0.2619, "step": 34470 }, { "epoch": 0.7684434793753634, "grad_norm": 0.546368420124054, "learning_rate": 2.531167607040499e-06, "loss": 0.2622, "step": 34475 }, { "epoch": 0.7685549287559834, "grad_norm": 0.5263521671295166, "learning_rate": 2.5288398452163075e-06, "loss": 0.2952, "step": 34480 }, { "epoch": 0.7686663781366035, "grad_norm": 0.8289400935173035, "learning_rate": 2.526512999298684e-06, "loss": 0.2693, "step": 34485 }, { "epoch": 0.7687778275172236, "grad_norm": 0.7360613942146301, "learning_rate": 2.5241870695728788e-06, "loss": 0.2953, "step": 34490 }, { "epoch": 0.7688892768978436, "grad_norm": 0.689541220664978, "learning_rate": 2.5218620563240324e-06, "loss": 0.2033, "step": 34495 }, { "epoch": 0.7690007262784637, "grad_norm": 0.6824425458908081, "learning_rate": 2.5195379598371785e-06, "loss": 0.2722, "step": 34500 }, { "epoch": 0.7691121756590837, "grad_norm": 0.5263447761535645, "learning_rate": 2.51721478039723e-06, "loss": 0.3732, "step": 34505 }, { "epoch": 0.7692236250397039, "grad_norm": 0.39247003197669983, "learning_rate": 2.514892518288988e-06, "loss": 0.3426, "step": 34510 }, { "epoch": 0.7693350744203239, "grad_norm": 0.6696521639823914, "learning_rate": 2.5125711737971514e-06, "loss": 0.3763, "step": 34515 }, { "epoch": 0.769446523800944, "grad_norm": 1.1758371591567993, "learning_rate": 2.510250747206294e-06, "loss": 0.1982, "step": 34520 }, { "epoch": 0.769557973181564, "grad_norm": 0.5801847577095032, "learning_rate": 2.5079312388008825e-06, "loss": 0.3051, "step": 34525 }, { "epoch": 0.7696694225621841, "grad_norm": 0.5518688559532166, "learning_rate": 2.5056126488652686e-06, "loss": 0.3729, "step": 34530 }, { "epoch": 0.7697808719428042, "grad_norm": 0.604720413684845, "learning_rate": 2.503294977683699e-06, "loss": 0.4624, "step": 34535 }, { "epoch": 0.7698923213234242, "grad_norm": 0.9135832190513611, "learning_rate": 2.5009782255403003e-06, "loss": 0.4127, "step": 34540 }, { "epoch": 0.7700037707040444, "grad_norm": 0.5589683055877686, "learning_rate": 2.4986623927190834e-06, "loss": 0.3683, "step": 34545 }, { "epoch": 0.7701152200846644, "grad_norm": 0.9304735660552979, "learning_rate": 2.49634747950396e-06, "loss": 0.1766, "step": 34550 }, { "epoch": 0.7702266694652844, "grad_norm": 0.6019694805145264, "learning_rate": 2.4940334861787154e-06, "loss": 0.266, "step": 34555 }, { "epoch": 0.7703381188459045, "grad_norm": 0.35014772415161133, "learning_rate": 2.4917204130270255e-06, "loss": 0.2729, "step": 34560 }, { "epoch": 0.7704495682265246, "grad_norm": 0.44610536098480225, "learning_rate": 2.489408260332461e-06, "loss": 0.2708, "step": 34565 }, { "epoch": 0.7705610176071447, "grad_norm": 0.5490642786026001, "learning_rate": 2.48709702837847e-06, "loss": 0.2285, "step": 34570 }, { "epoch": 0.7706724669877647, "grad_norm": 0.5669086575508118, "learning_rate": 2.484786717448394e-06, "loss": 0.1863, "step": 34575 }, { "epoch": 0.7707839163683848, "grad_norm": 0.5582325458526611, "learning_rate": 2.4824773278254543e-06, "loss": 0.369, "step": 34580 }, { "epoch": 0.7708953657490049, "grad_norm": 0.39789506793022156, "learning_rate": 2.480168859792771e-06, "loss": 0.3403, "step": 34585 }, { "epoch": 0.7710068151296249, "grad_norm": 0.5807655453681946, "learning_rate": 2.4778613136333373e-06, "loss": 0.2493, "step": 34590 }, { "epoch": 0.771118264510245, "grad_norm": 0.6753233075141907, "learning_rate": 2.4755546896300484e-06, "loss": 0.1841, "step": 34595 }, { "epoch": 0.771229713890865, "grad_norm": 0.6643304228782654, "learning_rate": 2.4732489880656753e-06, "loss": 0.3048, "step": 34600 }, { "epoch": 0.7713411632714852, "grad_norm": 0.6019219756126404, "learning_rate": 2.4709442092228773e-06, "loss": 0.3022, "step": 34605 }, { "epoch": 0.7714526126521052, "grad_norm": 0.7255983352661133, "learning_rate": 2.468640353384205e-06, "loss": 0.3594, "step": 34610 }, { "epoch": 0.7715640620327252, "grad_norm": 0.8732434511184692, "learning_rate": 2.466337420832089e-06, "loss": 0.299, "step": 34615 }, { "epoch": 0.7716755114133453, "grad_norm": 0.5019058585166931, "learning_rate": 2.464035411848854e-06, "loss": 0.2987, "step": 34620 }, { "epoch": 0.7717869607939654, "grad_norm": 0.6579979062080383, "learning_rate": 2.4617343267167126e-06, "loss": 0.3096, "step": 34625 }, { "epoch": 0.7718984101745855, "grad_norm": 0.7526550889015198, "learning_rate": 2.4594341657177544e-06, "loss": 0.2986, "step": 34630 }, { "epoch": 0.7720098595552055, "grad_norm": 0.7801370024681091, "learning_rate": 2.457134929133965e-06, "loss": 0.2556, "step": 34635 }, { "epoch": 0.7721213089358255, "grad_norm": 0.4410828649997711, "learning_rate": 2.4548366172472103e-06, "loss": 0.1926, "step": 34640 }, { "epoch": 0.7722327583164457, "grad_norm": 0.5587494373321533, "learning_rate": 2.452539230339247e-06, "loss": 0.2875, "step": 34645 }, { "epoch": 0.7723442076970657, "grad_norm": 0.8305307626724243, "learning_rate": 2.450242768691714e-06, "loss": 0.3443, "step": 34650 }, { "epoch": 0.7724556570776858, "grad_norm": 0.5703745484352112, "learning_rate": 2.447947232586142e-06, "loss": 0.2984, "step": 34655 }, { "epoch": 0.7725671064583058, "grad_norm": 0.586417019367218, "learning_rate": 2.445652622303949e-06, "loss": 0.2462, "step": 34660 }, { "epoch": 0.772678555838926, "grad_norm": 0.5886982679367065, "learning_rate": 2.4433589381264346e-06, "loss": 0.241, "step": 34665 }, { "epoch": 0.772790005219546, "grad_norm": 0.5183648467063904, "learning_rate": 2.441066180334786e-06, "loss": 0.2852, "step": 34670 }, { "epoch": 0.772901454600166, "grad_norm": 0.5780871510505676, "learning_rate": 2.4387743492100767e-06, "loss": 0.2254, "step": 34675 }, { "epoch": 0.7730129039807861, "grad_norm": 0.44057005643844604, "learning_rate": 2.4364834450332662e-06, "loss": 0.1836, "step": 34680 }, { "epoch": 0.7731243533614062, "grad_norm": 0.4620932340621948, "learning_rate": 2.4341934680852043e-06, "loss": 0.246, "step": 34685 }, { "epoch": 0.7732358027420263, "grad_norm": 0.5267679691314697, "learning_rate": 2.4319044186466267e-06, "loss": 0.1548, "step": 34690 }, { "epoch": 0.7733472521226463, "grad_norm": 0.5535991787910461, "learning_rate": 2.4296162969981506e-06, "loss": 0.2883, "step": 34695 }, { "epoch": 0.7734587015032663, "grad_norm": 0.4434576630592346, "learning_rate": 2.4273291034202828e-06, "loss": 0.2774, "step": 34700 }, { "epoch": 0.7735701508838865, "grad_norm": 0.7226085066795349, "learning_rate": 2.4250428381934143e-06, "loss": 0.3007, "step": 34705 }, { "epoch": 0.7736816002645065, "grad_norm": 0.48912107944488525, "learning_rate": 2.4227575015978213e-06, "loss": 0.2283, "step": 34710 }, { "epoch": 0.7737930496451266, "grad_norm": 0.8533339500427246, "learning_rate": 2.4204730939136733e-06, "loss": 0.2332, "step": 34715 }, { "epoch": 0.7739044990257467, "grad_norm": 0.722251296043396, "learning_rate": 2.418189615421017e-06, "loss": 0.2628, "step": 34720 }, { "epoch": 0.7740159484063668, "grad_norm": 0.507311999797821, "learning_rate": 2.415907066399794e-06, "loss": 0.2101, "step": 34725 }, { "epoch": 0.7741273977869868, "grad_norm": 0.8455132842063904, "learning_rate": 2.413625447129825e-06, "loss": 0.2839, "step": 34730 }, { "epoch": 0.7742388471676068, "grad_norm": 0.4431849420070648, "learning_rate": 2.411344757890818e-06, "loss": 0.3066, "step": 34735 }, { "epoch": 0.774350296548227, "grad_norm": 0.3015442490577698, "learning_rate": 2.409064998962368e-06, "loss": 0.2811, "step": 34740 }, { "epoch": 0.774461745928847, "grad_norm": 0.5934215784072876, "learning_rate": 2.4067861706239524e-06, "loss": 0.1633, "step": 34745 }, { "epoch": 0.7745731953094671, "grad_norm": 0.5405876636505127, "learning_rate": 2.4045082731549463e-06, "loss": 0.3663, "step": 34750 }, { "epoch": 0.7746846446900871, "grad_norm": 0.6101316213607788, "learning_rate": 2.4022313068345937e-06, "loss": 0.3166, "step": 34755 }, { "epoch": 0.7747960940707072, "grad_norm": 0.8928530216217041, "learning_rate": 2.3999552719420403e-06, "loss": 0.3347, "step": 34760 }, { "epoch": 0.7749075434513273, "grad_norm": 0.510225772857666, "learning_rate": 2.3976801687563077e-06, "loss": 0.2394, "step": 34765 }, { "epoch": 0.7750189928319473, "grad_norm": 0.6352126002311707, "learning_rate": 2.395405997556305e-06, "loss": 0.403, "step": 34770 }, { "epoch": 0.7751304422125674, "grad_norm": 0.7462135553359985, "learning_rate": 2.3931327586208252e-06, "loss": 0.2649, "step": 34775 }, { "epoch": 0.7752418915931875, "grad_norm": 0.45706817507743835, "learning_rate": 2.3908604522285574e-06, "loss": 0.2739, "step": 34780 }, { "epoch": 0.7753533409738076, "grad_norm": 0.47235020995140076, "learning_rate": 2.3885890786580636e-06, "loss": 0.2053, "step": 34785 }, { "epoch": 0.7754647903544276, "grad_norm": 0.8027734756469727, "learning_rate": 2.3863186381877944e-06, "loss": 0.3695, "step": 34790 }, { "epoch": 0.7755762397350476, "grad_norm": 0.48161131143569946, "learning_rate": 2.3840491310960955e-06, "loss": 0.3491, "step": 34795 }, { "epoch": 0.7756876891156678, "grad_norm": 0.5025513768196106, "learning_rate": 2.381780557661186e-06, "loss": 0.2582, "step": 34800 }, { "epoch": 0.7757991384962878, "grad_norm": 0.8030077219009399, "learning_rate": 2.379512918161174e-06, "loss": 0.2948, "step": 34805 }, { "epoch": 0.7759105878769079, "grad_norm": 1.0067226886749268, "learning_rate": 2.3772462128740603e-06, "loss": 0.2108, "step": 34810 }, { "epoch": 0.7760220372575279, "grad_norm": 0.5891587734222412, "learning_rate": 2.374980442077722e-06, "loss": 0.4098, "step": 34815 }, { "epoch": 0.776133486638148, "grad_norm": 0.7874899506568909, "learning_rate": 2.3727156060499256e-06, "loss": 0.3265, "step": 34820 }, { "epoch": 0.7762449360187681, "grad_norm": 0.7964795827865601, "learning_rate": 2.3704517050683196e-06, "loss": 0.3475, "step": 34825 }, { "epoch": 0.7763563853993881, "grad_norm": 0.3685130774974823, "learning_rate": 2.3681887394104463e-06, "loss": 0.3647, "step": 34830 }, { "epoch": 0.7764678347800082, "grad_norm": 0.5425657033920288, "learning_rate": 2.3659267093537242e-06, "loss": 0.3282, "step": 34835 }, { "epoch": 0.7765792841606283, "grad_norm": 0.6460210084915161, "learning_rate": 2.363665615175459e-06, "loss": 0.2726, "step": 34840 }, { "epoch": 0.7766907335412483, "grad_norm": 0.4823559522628784, "learning_rate": 2.361405457152849e-06, "loss": 0.2596, "step": 34845 }, { "epoch": 0.7768021829218684, "grad_norm": 0.5281304121017456, "learning_rate": 2.359146235562969e-06, "loss": 0.2796, "step": 34850 }, { "epoch": 0.7769136323024884, "grad_norm": 0.6700007915496826, "learning_rate": 2.3568879506827826e-06, "loss": 0.2438, "step": 34855 }, { "epoch": 0.7770250816831086, "grad_norm": 0.8023337125778198, "learning_rate": 2.354630602789134e-06, "loss": 0.2751, "step": 34860 }, { "epoch": 0.7771365310637286, "grad_norm": 0.4087885916233063, "learning_rate": 2.352374192158764e-06, "loss": 0.2717, "step": 34865 }, { "epoch": 0.7772479804443487, "grad_norm": 0.49791598320007324, "learning_rate": 2.350118719068284e-06, "loss": 0.2264, "step": 34870 }, { "epoch": 0.7773594298249688, "grad_norm": 0.42472195625305176, "learning_rate": 2.347864183794204e-06, "loss": 0.2517, "step": 34875 }, { "epoch": 0.7774708792055888, "grad_norm": 0.8098690509796143, "learning_rate": 2.3456105866129098e-06, "loss": 0.2864, "step": 34880 }, { "epoch": 0.7775823285862089, "grad_norm": 0.4638606309890747, "learning_rate": 2.343357927800676e-06, "loss": 0.2375, "step": 34885 }, { "epoch": 0.7776937779668289, "grad_norm": 0.2630634307861328, "learning_rate": 2.341106207633658e-06, "loss": 0.3394, "step": 34890 }, { "epoch": 0.7778052273474491, "grad_norm": 1.4568618535995483, "learning_rate": 2.3388554263878992e-06, "loss": 0.2233, "step": 34895 }, { "epoch": 0.7779166767280691, "grad_norm": 0.4091511368751526, "learning_rate": 2.336605584339331e-06, "loss": 0.2923, "step": 34900 }, { "epoch": 0.7780281261086891, "grad_norm": 0.7855178713798523, "learning_rate": 2.3343566817637674e-06, "loss": 0.2633, "step": 34905 }, { "epoch": 0.7781395754893092, "grad_norm": 0.8028655052185059, "learning_rate": 2.332108718936905e-06, "loss": 0.1376, "step": 34910 }, { "epoch": 0.7782510248699293, "grad_norm": 0.869860053062439, "learning_rate": 2.329861696134328e-06, "loss": 0.4468, "step": 34915 }, { "epoch": 0.7783624742505494, "grad_norm": 0.7514600157737732, "learning_rate": 2.327615613631502e-06, "loss": 0.2536, "step": 34920 }, { "epoch": 0.7784739236311694, "grad_norm": 0.5989588499069214, "learning_rate": 2.3253704717037763e-06, "loss": 0.2255, "step": 34925 }, { "epoch": 0.7785853730117895, "grad_norm": 0.3953379988670349, "learning_rate": 2.3231262706263957e-06, "loss": 0.3115, "step": 34930 }, { "epoch": 0.7786968223924096, "grad_norm": 0.49553415179252625, "learning_rate": 2.3208830106744738e-06, "loss": 0.291, "step": 34935 }, { "epoch": 0.7788082717730296, "grad_norm": 0.6226276755332947, "learning_rate": 2.3186406921230255e-06, "loss": 0.3238, "step": 34940 }, { "epoch": 0.7789197211536497, "grad_norm": 0.8406127095222473, "learning_rate": 2.3163993152469365e-06, "loss": 0.3199, "step": 34945 }, { "epoch": 0.7790311705342697, "grad_norm": 0.5463730096817017, "learning_rate": 2.3141588803209837e-06, "loss": 0.2737, "step": 34950 }, { "epoch": 0.7791426199148899, "grad_norm": 0.46503451466560364, "learning_rate": 2.3119193876198265e-06, "loss": 0.1797, "step": 34955 }, { "epoch": 0.7792540692955099, "grad_norm": 0.6056923270225525, "learning_rate": 2.3096808374180056e-06, "loss": 0.3692, "step": 34960 }, { "epoch": 0.7793655186761299, "grad_norm": 0.7003437280654907, "learning_rate": 2.307443229989957e-06, "loss": 0.3654, "step": 34965 }, { "epoch": 0.77947696805675, "grad_norm": 0.6190140247344971, "learning_rate": 2.30520656560999e-06, "loss": 0.2616, "step": 34970 }, { "epoch": 0.7795884174373701, "grad_norm": 0.7247709631919861, "learning_rate": 2.3029708445523048e-06, "loss": 0.3933, "step": 34975 }, { "epoch": 0.7796998668179902, "grad_norm": 0.43216267228126526, "learning_rate": 2.300736067090982e-06, "loss": 0.3454, "step": 34980 }, { "epoch": 0.7798113161986102, "grad_norm": 0.7808366417884827, "learning_rate": 2.2985022334999884e-06, "loss": 0.3402, "step": 34985 }, { "epoch": 0.7799227655792303, "grad_norm": 0.4898461699485779, "learning_rate": 2.2962693440531713e-06, "loss": 0.297, "step": 34990 }, { "epoch": 0.7800342149598504, "grad_norm": 0.62904953956604, "learning_rate": 2.2940373990242724e-06, "loss": 0.2683, "step": 34995 }, { "epoch": 0.7801456643404704, "grad_norm": 0.425571471452713, "learning_rate": 2.2918063986869064e-06, "loss": 0.1824, "step": 35000 }, { "epoch": 0.7802571137210905, "grad_norm": 0.7334528565406799, "learning_rate": 2.289576343314577e-06, "loss": 0.2736, "step": 35005 }, { "epoch": 0.7803685631017105, "grad_norm": 0.4682607054710388, "learning_rate": 2.2873472331806732e-06, "loss": 0.2973, "step": 35010 }, { "epoch": 0.7804800124823307, "grad_norm": 0.6670015454292297, "learning_rate": 2.285119068558468e-06, "loss": 0.369, "step": 35015 }, { "epoch": 0.7805914618629507, "grad_norm": 0.7925013303756714, "learning_rate": 2.2828918497211107e-06, "loss": 0.2609, "step": 35020 }, { "epoch": 0.7807029112435707, "grad_norm": 0.5793088674545288, "learning_rate": 2.2806655769416496e-06, "loss": 0.2433, "step": 35025 }, { "epoch": 0.7808143606241909, "grad_norm": 0.6716881394386292, "learning_rate": 2.2784402504930047e-06, "loss": 0.3465, "step": 35030 }, { "epoch": 0.7809258100048109, "grad_norm": 0.6803726553916931, "learning_rate": 2.2762158706479833e-06, "loss": 0.211, "step": 35035 }, { "epoch": 0.781037259385431, "grad_norm": 0.46885883808135986, "learning_rate": 2.2739924376792744e-06, "loss": 0.3056, "step": 35040 }, { "epoch": 0.781148708766051, "grad_norm": 0.5113359689712524, "learning_rate": 2.2717699518594606e-06, "loss": 0.268, "step": 35045 }, { "epoch": 0.781260158146671, "grad_norm": 0.5416445136070251, "learning_rate": 2.269548413460998e-06, "loss": 0.2482, "step": 35050 }, { "epoch": 0.7813716075272912, "grad_norm": 0.88877272605896, "learning_rate": 2.2673278227562266e-06, "loss": 0.2914, "step": 35055 }, { "epoch": 0.7814830569079112, "grad_norm": 0.954880952835083, "learning_rate": 2.26510818001738e-06, "loss": 0.3863, "step": 35060 }, { "epoch": 0.7815945062885313, "grad_norm": 0.8852445483207703, "learning_rate": 2.262889485516567e-06, "loss": 0.3352, "step": 35065 }, { "epoch": 0.7817059556691514, "grad_norm": 0.4573374092578888, "learning_rate": 2.2606717395257816e-06, "loss": 0.225, "step": 35070 }, { "epoch": 0.7818174050497715, "grad_norm": 0.7477966547012329, "learning_rate": 2.258454942316899e-06, "loss": 0.1913, "step": 35075 }, { "epoch": 0.7819288544303915, "grad_norm": 0.732876181602478, "learning_rate": 2.2562390941616887e-06, "loss": 0.2876, "step": 35080 }, { "epoch": 0.7820403038110115, "grad_norm": 0.6387799382209778, "learning_rate": 2.254024195331789e-06, "loss": 0.2778, "step": 35085 }, { "epoch": 0.7821517531916317, "grad_norm": 0.8216693997383118, "learning_rate": 2.2518102460987355e-06, "loss": 0.1691, "step": 35090 }, { "epoch": 0.7822632025722517, "grad_norm": 0.337422639131546, "learning_rate": 2.249597246733941e-06, "loss": 0.232, "step": 35095 }, { "epoch": 0.7823746519528718, "grad_norm": 0.7069044709205627, "learning_rate": 2.247385197508698e-06, "loss": 0.2494, "step": 35100 }, { "epoch": 0.7824861013334918, "grad_norm": 0.7120121121406555, "learning_rate": 2.2451740986941905e-06, "loss": 0.2518, "step": 35105 }, { "epoch": 0.7825975507141119, "grad_norm": 0.5651516318321228, "learning_rate": 2.2429639505614764e-06, "loss": 0.2946, "step": 35110 }, { "epoch": 0.782709000094732, "grad_norm": 0.5717464685440063, "learning_rate": 2.2407547533815077e-06, "loss": 0.2368, "step": 35115 }, { "epoch": 0.782820449475352, "grad_norm": 0.7231550812721252, "learning_rate": 2.2385465074251166e-06, "loss": 0.2777, "step": 35120 }, { "epoch": 0.7829318988559721, "grad_norm": 0.5691319108009338, "learning_rate": 2.2363392129630147e-06, "loss": 0.2391, "step": 35125 }, { "epoch": 0.7830433482365922, "grad_norm": 0.7855725288391113, "learning_rate": 2.2341328702657985e-06, "loss": 0.3428, "step": 35130 }, { "epoch": 0.7831547976172123, "grad_norm": 0.48168542981147766, "learning_rate": 2.2319274796039493e-06, "loss": 0.2955, "step": 35135 }, { "epoch": 0.7832662469978323, "grad_norm": 0.7382097840309143, "learning_rate": 2.2297230412478275e-06, "loss": 0.3642, "step": 35140 }, { "epoch": 0.7833776963784523, "grad_norm": 0.6649189591407776, "learning_rate": 2.2275195554676864e-06, "loss": 0.2334, "step": 35145 }, { "epoch": 0.7834891457590725, "grad_norm": 0.7483057379722595, "learning_rate": 2.2253170225336517e-06, "loss": 0.2402, "step": 35150 }, { "epoch": 0.7836005951396925, "grad_norm": 0.4074045717716217, "learning_rate": 2.223115442715741e-06, "loss": 0.2334, "step": 35155 }, { "epoch": 0.7837120445203126, "grad_norm": 0.8826455473899841, "learning_rate": 2.2209148162838477e-06, "loss": 0.2745, "step": 35160 }, { "epoch": 0.7838234939009326, "grad_norm": 0.7632371783256531, "learning_rate": 2.2187151435077535e-06, "loss": 0.2734, "step": 35165 }, { "epoch": 0.7839349432815527, "grad_norm": 0.9246512055397034, "learning_rate": 2.2165164246571203e-06, "loss": 0.2973, "step": 35170 }, { "epoch": 0.7840463926621728, "grad_norm": 0.3215585947036743, "learning_rate": 2.214318660001492e-06, "loss": 0.4391, "step": 35175 }, { "epoch": 0.7841578420427928, "grad_norm": 0.6698297262191772, "learning_rate": 2.212121849810299e-06, "loss": 0.3566, "step": 35180 }, { "epoch": 0.784269291423413, "grad_norm": 0.9584089517593384, "learning_rate": 2.2099259943528573e-06, "loss": 0.3124, "step": 35185 }, { "epoch": 0.784380740804033, "grad_norm": 0.8780977129936218, "learning_rate": 2.2077310938983577e-06, "loss": 0.4167, "step": 35190 }, { "epoch": 0.7844921901846531, "grad_norm": 0.7061375379562378, "learning_rate": 2.2055371487158796e-06, "loss": 0.2632, "step": 35195 }, { "epoch": 0.7846036395652731, "grad_norm": 0.6107895970344543, "learning_rate": 2.203344159074383e-06, "loss": 0.2148, "step": 35200 }, { "epoch": 0.7847150889458931, "grad_norm": 0.7566227316856384, "learning_rate": 2.201152125242708e-06, "loss": 0.3246, "step": 35205 }, { "epoch": 0.7848265383265133, "grad_norm": 0.6975489854812622, "learning_rate": 2.1989610474895883e-06, "loss": 0.3739, "step": 35210 }, { "epoch": 0.7849379877071333, "grad_norm": 0.6445315480232239, "learning_rate": 2.196770926083627e-06, "loss": 0.2056, "step": 35215 }, { "epoch": 0.7850494370877534, "grad_norm": 0.47463878989219666, "learning_rate": 2.1945817612933194e-06, "loss": 0.2741, "step": 35220 }, { "epoch": 0.7851608864683735, "grad_norm": 0.5092577934265137, "learning_rate": 2.192393553387041e-06, "loss": 0.2682, "step": 35225 }, { "epoch": 0.7852723358489935, "grad_norm": 0.5312575697898865, "learning_rate": 2.1902063026330466e-06, "loss": 0.3165, "step": 35230 }, { "epoch": 0.7853837852296136, "grad_norm": 0.4544369578361511, "learning_rate": 2.1880200092994754e-06, "loss": 0.2641, "step": 35235 }, { "epoch": 0.7854952346102336, "grad_norm": 0.46932798624038696, "learning_rate": 2.1858346736543535e-06, "loss": 0.2427, "step": 35240 }, { "epoch": 0.7856066839908538, "grad_norm": 0.7473596334457397, "learning_rate": 2.1836502959655847e-06, "loss": 0.2826, "step": 35245 }, { "epoch": 0.7857181333714738, "grad_norm": 0.6829301714897156, "learning_rate": 2.181466876500954e-06, "loss": 0.1744, "step": 35250 }, { "epoch": 0.7858295827520938, "grad_norm": 0.4222946763038635, "learning_rate": 2.1792844155281377e-06, "loss": 0.2619, "step": 35255 }, { "epoch": 0.7859410321327139, "grad_norm": 0.5802531838417053, "learning_rate": 2.1771029133146848e-06, "loss": 0.3516, "step": 35260 }, { "epoch": 0.786052481513334, "grad_norm": 0.7257078289985657, "learning_rate": 2.174922370128032e-06, "loss": 0.2959, "step": 35265 }, { "epoch": 0.7861639308939541, "grad_norm": 0.7537233233451843, "learning_rate": 2.172742786235492e-06, "loss": 0.2253, "step": 35270 }, { "epoch": 0.7862753802745741, "grad_norm": 1.001983404159546, "learning_rate": 2.170564161904274e-06, "loss": 0.3747, "step": 35275 }, { "epoch": 0.7863868296551942, "grad_norm": 0.610106348991394, "learning_rate": 2.1683864974014545e-06, "loss": 0.2735, "step": 35280 }, { "epoch": 0.7864982790358143, "grad_norm": 0.40139901638031006, "learning_rate": 2.1662097929939975e-06, "loss": 0.2136, "step": 35285 }, { "epoch": 0.7866097284164343, "grad_norm": 0.6343426704406738, "learning_rate": 2.164034048948754e-06, "loss": 0.3794, "step": 35290 }, { "epoch": 0.7867211777970544, "grad_norm": 0.6991207599639893, "learning_rate": 2.161859265532452e-06, "loss": 0.2002, "step": 35295 }, { "epoch": 0.7868326271776744, "grad_norm": 0.6790099143981934, "learning_rate": 2.159685443011701e-06, "loss": 0.3706, "step": 35300 }, { "epoch": 0.7869440765582946, "grad_norm": 1.4308520555496216, "learning_rate": 2.1575125816529996e-06, "loss": 0.2568, "step": 35305 }, { "epoch": 0.7870555259389146, "grad_norm": 0.4401121437549591, "learning_rate": 2.1553406817227194e-06, "loss": 0.3254, "step": 35310 }, { "epoch": 0.7871669753195346, "grad_norm": 1.223118543624878, "learning_rate": 2.1531697434871215e-06, "loss": 0.3267, "step": 35315 }, { "epoch": 0.7872784247001547, "grad_norm": 0.413013756275177, "learning_rate": 2.1509997672123418e-06, "loss": 0.2134, "step": 35320 }, { "epoch": 0.7873898740807748, "grad_norm": 0.3138122260570526, "learning_rate": 2.148830753164408e-06, "loss": 0.1685, "step": 35325 }, { "epoch": 0.7875013234613949, "grad_norm": 0.6393536329269409, "learning_rate": 2.1466627016092202e-06, "loss": 0.3473, "step": 35330 }, { "epoch": 0.7876127728420149, "grad_norm": 0.8312395811080933, "learning_rate": 2.1444956128125694e-06, "loss": 0.3317, "step": 35335 }, { "epoch": 0.787724222222635, "grad_norm": 0.6563614010810852, "learning_rate": 2.1423294870401203e-06, "loss": 0.3394, "step": 35340 }, { "epoch": 0.7878356716032551, "grad_norm": 0.8131313323974609, "learning_rate": 2.1401643245574244e-06, "loss": 0.3371, "step": 35345 }, { "epoch": 0.7879471209838751, "grad_norm": 0.8174598217010498, "learning_rate": 2.1380001256299143e-06, "loss": 0.3269, "step": 35350 }, { "epoch": 0.7880585703644952, "grad_norm": 0.5772351026535034, "learning_rate": 2.135836890522901e-06, "loss": 0.3529, "step": 35355 }, { "epoch": 0.7881700197451152, "grad_norm": 0.42435768246650696, "learning_rate": 2.1336746195015845e-06, "loss": 0.2459, "step": 35360 }, { "epoch": 0.7882814691257354, "grad_norm": 0.731113076210022, "learning_rate": 2.1315133128310395e-06, "loss": 0.3766, "step": 35365 }, { "epoch": 0.7883929185063554, "grad_norm": 0.5198209285736084, "learning_rate": 2.1293529707762284e-06, "loss": 0.2979, "step": 35370 }, { "epoch": 0.7885043678869754, "grad_norm": 0.8131558895111084, "learning_rate": 2.1271935936019915e-06, "loss": 0.2811, "step": 35375 }, { "epoch": 0.7886158172675956, "grad_norm": 0.5951612591743469, "learning_rate": 2.1250351815730517e-06, "loss": 0.2405, "step": 35380 }, { "epoch": 0.7887272666482156, "grad_norm": 0.736031174659729, "learning_rate": 2.122877734954013e-06, "loss": 0.2372, "step": 35385 }, { "epoch": 0.7888387160288357, "grad_norm": 1.0694679021835327, "learning_rate": 2.120721254009359e-06, "loss": 0.392, "step": 35390 }, { "epoch": 0.7889501654094557, "grad_norm": 0.6037766337394714, "learning_rate": 2.118565739003461e-06, "loss": 0.2684, "step": 35395 }, { "epoch": 0.7890616147900757, "grad_norm": 0.8734601736068726, "learning_rate": 2.11641119020057e-06, "loss": 0.2731, "step": 35400 }, { "epoch": 0.7891730641706959, "grad_norm": 0.5328859090805054, "learning_rate": 2.114257607864816e-06, "loss": 0.181, "step": 35405 }, { "epoch": 0.7892845135513159, "grad_norm": 0.5082289576530457, "learning_rate": 2.1121049922602098e-06, "loss": 0.3103, "step": 35410 }, { "epoch": 0.789395962931936, "grad_norm": 0.49645161628723145, "learning_rate": 2.109953343650647e-06, "loss": 0.2639, "step": 35415 }, { "epoch": 0.7895074123125561, "grad_norm": 0.46683311462402344, "learning_rate": 2.1078026622999006e-06, "loss": 0.1948, "step": 35420 }, { "epoch": 0.7896188616931762, "grad_norm": 0.606785237789154, "learning_rate": 2.105652948471628e-06, "loss": 0.303, "step": 35425 }, { "epoch": 0.7897303110737962, "grad_norm": 0.42324838042259216, "learning_rate": 2.1035042024293716e-06, "loss": 0.2416, "step": 35430 }, { "epoch": 0.7898417604544162, "grad_norm": 0.785689651966095, "learning_rate": 2.101356424436549e-06, "loss": 0.2966, "step": 35435 }, { "epoch": 0.7899532098350364, "grad_norm": 0.771626353263855, "learning_rate": 2.09920961475646e-06, "loss": 0.2261, "step": 35440 }, { "epoch": 0.7900646592156564, "grad_norm": 0.8682251572608948, "learning_rate": 2.097063773652288e-06, "loss": 0.2409, "step": 35445 }, { "epoch": 0.7901761085962765, "grad_norm": 0.6335934400558472, "learning_rate": 2.0949189013870965e-06, "loss": 0.2482, "step": 35450 }, { "epoch": 0.7902875579768965, "grad_norm": 0.9981822967529297, "learning_rate": 2.0927749982238266e-06, "loss": 0.2093, "step": 35455 }, { "epoch": 0.7903990073575166, "grad_norm": 0.7123530507087708, "learning_rate": 2.09063206442531e-06, "loss": 0.3498, "step": 35460 }, { "epoch": 0.7905104567381367, "grad_norm": 0.4021548926830292, "learning_rate": 2.088490100254248e-06, "loss": 0.2242, "step": 35465 }, { "epoch": 0.7906219061187567, "grad_norm": 0.49737146496772766, "learning_rate": 2.0863491059732366e-06, "loss": 0.2516, "step": 35470 }, { "epoch": 0.7907333554993768, "grad_norm": 0.6410098075866699, "learning_rate": 2.0842090818447393e-06, "loss": 0.3899, "step": 35475 }, { "epoch": 0.7908448048799969, "grad_norm": 1.1013152599334717, "learning_rate": 2.082070028131109e-06, "loss": 0.2475, "step": 35480 }, { "epoch": 0.790956254260617, "grad_norm": 0.5911498069763184, "learning_rate": 2.0799319450945733e-06, "loss": 0.264, "step": 35485 }, { "epoch": 0.791067703641237, "grad_norm": 0.7244366407394409, "learning_rate": 2.0777948329972497e-06, "loss": 0.3184, "step": 35490 }, { "epoch": 0.791179153021857, "grad_norm": 0.6575037240982056, "learning_rate": 2.07565869210113e-06, "loss": 0.3026, "step": 35495 }, { "epoch": 0.7912906024024772, "grad_norm": 0.5350658893585205, "learning_rate": 2.073523522668086e-06, "loss": 0.261, "step": 35500 }, { "epoch": 0.7914020517830972, "grad_norm": 0.6373189091682434, "learning_rate": 2.0713893249598772e-06, "loss": 0.2683, "step": 35505 }, { "epoch": 0.7915135011637173, "grad_norm": 0.6466808319091797, "learning_rate": 2.0692560992381373e-06, "loss": 0.3699, "step": 35510 }, { "epoch": 0.7916249505443373, "grad_norm": 0.710489809513092, "learning_rate": 2.0671238457643817e-06, "loss": 0.3298, "step": 35515 }, { "epoch": 0.7917363999249574, "grad_norm": 0.45638200640678406, "learning_rate": 2.0649925648000123e-06, "loss": 0.1977, "step": 35520 }, { "epoch": 0.7918478493055775, "grad_norm": 0.6310269236564636, "learning_rate": 2.0628622566063063e-06, "loss": 0.2807, "step": 35525 }, { "epoch": 0.7919592986861975, "grad_norm": 0.8936885595321655, "learning_rate": 2.0607329214444216e-06, "loss": 0.3801, "step": 35530 }, { "epoch": 0.7920707480668177, "grad_norm": 0.5122530460357666, "learning_rate": 2.058604559575397e-06, "loss": 0.1657, "step": 35535 }, { "epoch": 0.7921821974474377, "grad_norm": 0.6624186038970947, "learning_rate": 2.0564771712601573e-06, "loss": 0.2397, "step": 35540 }, { "epoch": 0.7922936468280578, "grad_norm": 0.7633967399597168, "learning_rate": 2.0543507567594987e-06, "loss": 0.2455, "step": 35545 }, { "epoch": 0.7924050962086778, "grad_norm": 0.431619256734848, "learning_rate": 2.05222531633411e-06, "loss": 0.27, "step": 35550 }, { "epoch": 0.7925165455892978, "grad_norm": 0.6180048584938049, "learning_rate": 2.05010085024455e-06, "loss": 0.3086, "step": 35555 }, { "epoch": 0.792627994969918, "grad_norm": 0.5129493474960327, "learning_rate": 2.047977358751262e-06, "loss": 0.0996, "step": 35560 }, { "epoch": 0.792739444350538, "grad_norm": 0.2933807373046875, "learning_rate": 2.0458548421145697e-06, "loss": 0.2842, "step": 35565 }, { "epoch": 0.7928508937311581, "grad_norm": 0.3998599648475647, "learning_rate": 2.0437333005946736e-06, "loss": 0.3707, "step": 35570 }, { "epoch": 0.7929623431117782, "grad_norm": 0.8523929119110107, "learning_rate": 2.041612734451666e-06, "loss": 0.2828, "step": 35575 }, { "epoch": 0.7930737924923982, "grad_norm": 0.6669270992279053, "learning_rate": 2.0394931439455034e-06, "loss": 0.2341, "step": 35580 }, { "epoch": 0.7931852418730183, "grad_norm": 0.8018181324005127, "learning_rate": 2.037374529336039e-06, "loss": 0.3917, "step": 35585 }, { "epoch": 0.7932966912536383, "grad_norm": 1.1173460483551025, "learning_rate": 2.035256890882996e-06, "loss": 0.2755, "step": 35590 }, { "epoch": 0.7934081406342585, "grad_norm": 0.4358447194099426, "learning_rate": 2.033140228845979e-06, "loss": 0.2951, "step": 35595 }, { "epoch": 0.7935195900148785, "grad_norm": 0.9090696573257446, "learning_rate": 2.0310245434844756e-06, "loss": 0.2723, "step": 35600 }, { "epoch": 0.7936310393954985, "grad_norm": 0.7753550410270691, "learning_rate": 2.0289098350578493e-06, "loss": 0.2791, "step": 35605 }, { "epoch": 0.7937424887761186, "grad_norm": 0.887015700340271, "learning_rate": 2.0267961038253503e-06, "loss": 0.3607, "step": 35610 }, { "epoch": 0.7938539381567387, "grad_norm": 0.6061819195747375, "learning_rate": 2.024683350046107e-06, "loss": 0.2793, "step": 35615 }, { "epoch": 0.7939653875373588, "grad_norm": 0.6215338110923767, "learning_rate": 2.0225715739791265e-06, "loss": 0.3664, "step": 35620 }, { "epoch": 0.7940768369179788, "grad_norm": 0.7980818748474121, "learning_rate": 2.020460775883294e-06, "loss": 0.2984, "step": 35625 }, { "epoch": 0.7941882862985989, "grad_norm": 0.5570255517959595, "learning_rate": 2.0183509560173777e-06, "loss": 0.4127, "step": 35630 }, { "epoch": 0.794299735679219, "grad_norm": 0.47531047463417053, "learning_rate": 2.0162421146400223e-06, "loss": 0.2832, "step": 35635 }, { "epoch": 0.794411185059839, "grad_norm": 0.5249255895614624, "learning_rate": 2.0141342520097583e-06, "loss": 0.2659, "step": 35640 }, { "epoch": 0.7945226344404591, "grad_norm": 0.216322660446167, "learning_rate": 2.0120273683849965e-06, "loss": 0.2336, "step": 35645 }, { "epoch": 0.7946340838210791, "grad_norm": 0.49721142649650574, "learning_rate": 2.0099214640240227e-06, "loss": 0.203, "step": 35650 }, { "epoch": 0.7947455332016993, "grad_norm": 0.7828408479690552, "learning_rate": 2.0078165391850026e-06, "loss": 0.3225, "step": 35655 }, { "epoch": 0.7948569825823193, "grad_norm": 0.8212775588035583, "learning_rate": 2.0057125941259846e-06, "loss": 0.256, "step": 35660 }, { "epoch": 0.7949684319629393, "grad_norm": 1.2922614812850952, "learning_rate": 2.0036096291048956e-06, "loss": 0.3137, "step": 35665 }, { "epoch": 0.7950798813435594, "grad_norm": 0.9791651964187622, "learning_rate": 2.001507644379541e-06, "loss": 0.3149, "step": 35670 }, { "epoch": 0.7951913307241795, "grad_norm": 1.0271574258804321, "learning_rate": 1.999406640207612e-06, "loss": 0.2303, "step": 35675 }, { "epoch": 0.7953027801047996, "grad_norm": 0.6987234950065613, "learning_rate": 1.997306616846675e-06, "loss": 0.4131, "step": 35680 }, { "epoch": 0.7954142294854196, "grad_norm": 0.31380951404571533, "learning_rate": 1.9952075745541744e-06, "loss": 0.2285, "step": 35685 }, { "epoch": 0.7955256788660398, "grad_norm": 0.5740078091621399, "learning_rate": 1.993109513587439e-06, "loss": 0.3155, "step": 35690 }, { "epoch": 0.7956371282466598, "grad_norm": 0.6432341933250427, "learning_rate": 1.9910124342036742e-06, "loss": 0.3574, "step": 35695 }, { "epoch": 0.7957485776272798, "grad_norm": 0.7405551671981812, "learning_rate": 1.9889163366599607e-06, "loss": 0.372, "step": 35700 }, { "epoch": 0.7958600270078999, "grad_norm": 0.6038686633110046, "learning_rate": 1.986821221213272e-06, "loss": 0.183, "step": 35705 }, { "epoch": 0.79597147638852, "grad_norm": 0.5730610489845276, "learning_rate": 1.9847270881204462e-06, "loss": 0.3067, "step": 35710 }, { "epoch": 0.7960829257691401, "grad_norm": 0.7106767296791077, "learning_rate": 1.9826339376382144e-06, "loss": 0.2624, "step": 35715 }, { "epoch": 0.7961943751497601, "grad_norm": 0.48092079162597656, "learning_rate": 1.9805417700231766e-06, "loss": 0.2149, "step": 35720 }, { "epoch": 0.7963058245303801, "grad_norm": 0.692106306552887, "learning_rate": 1.978450585531817e-06, "loss": 0.3472, "step": 35725 }, { "epoch": 0.7964172739110003, "grad_norm": 0.8615431785583496, "learning_rate": 1.976360384420496e-06, "loss": 0.267, "step": 35730 }, { "epoch": 0.7965287232916203, "grad_norm": 1.365073561668396, "learning_rate": 1.974271166945463e-06, "loss": 0.3025, "step": 35735 }, { "epoch": 0.7966401726722404, "grad_norm": 0.6001205444335938, "learning_rate": 1.972182933362834e-06, "loss": 0.209, "step": 35740 }, { "epoch": 0.7967516220528604, "grad_norm": 0.3347148299217224, "learning_rate": 1.97009568392861e-06, "loss": 0.3489, "step": 35745 }, { "epoch": 0.7968630714334806, "grad_norm": 0.5797106027603149, "learning_rate": 1.9680094188986767e-06, "loss": 0.3299, "step": 35750 }, { "epoch": 0.7969745208141006, "grad_norm": 0.5811283588409424, "learning_rate": 1.965924138528791e-06, "loss": 0.3119, "step": 35755 }, { "epoch": 0.7970859701947206, "grad_norm": 0.543427050113678, "learning_rate": 1.963839843074593e-06, "loss": 0.2819, "step": 35760 }, { "epoch": 0.7971974195753407, "grad_norm": 0.9709669947624207, "learning_rate": 1.9617565327915966e-06, "loss": 0.2298, "step": 35765 }, { "epoch": 0.7973088689559608, "grad_norm": 0.5826546549797058, "learning_rate": 1.959674207935207e-06, "loss": 0.3116, "step": 35770 }, { "epoch": 0.7974203183365809, "grad_norm": 0.4199870526790619, "learning_rate": 1.9575928687606983e-06, "loss": 0.3215, "step": 35775 }, { "epoch": 0.7975317677172009, "grad_norm": 0.6433164477348328, "learning_rate": 1.9555125155232223e-06, "loss": 0.218, "step": 35780 }, { "epoch": 0.7976432170978209, "grad_norm": 0.6210421919822693, "learning_rate": 1.9534331484778212e-06, "loss": 0.2374, "step": 35785 }, { "epoch": 0.7977546664784411, "grad_norm": 0.6035399436950684, "learning_rate": 1.9513547678794065e-06, "loss": 0.4207, "step": 35790 }, { "epoch": 0.7978661158590611, "grad_norm": 0.5844293832778931, "learning_rate": 1.949277373982769e-06, "loss": 0.3032, "step": 35795 }, { "epoch": 0.7979775652396812, "grad_norm": 0.7550781965255737, "learning_rate": 1.947200967042584e-06, "loss": 0.2134, "step": 35800 }, { "epoch": 0.7980890146203012, "grad_norm": 0.4585762619972229, "learning_rate": 1.9451255473134046e-06, "loss": 0.2652, "step": 35805 }, { "epoch": 0.7982004640009213, "grad_norm": 0.7412415742874146, "learning_rate": 1.9430511150496576e-06, "loss": 0.4184, "step": 35810 }, { "epoch": 0.7983119133815414, "grad_norm": 0.7571333646774292, "learning_rate": 1.9409776705056514e-06, "loss": 0.3278, "step": 35815 }, { "epoch": 0.7984233627621614, "grad_norm": 0.6268478035926819, "learning_rate": 1.93890521393558e-06, "loss": 0.2156, "step": 35820 }, { "epoch": 0.7985348121427815, "grad_norm": 0.7117454409599304, "learning_rate": 1.936833745593504e-06, "loss": 0.2145, "step": 35825 }, { "epoch": 0.7986462615234016, "grad_norm": 0.42261865735054016, "learning_rate": 1.934763265733376e-06, "loss": 0.2468, "step": 35830 }, { "epoch": 0.7987577109040217, "grad_norm": 0.547375500202179, "learning_rate": 1.932693774609017e-06, "loss": 0.3163, "step": 35835 }, { "epoch": 0.7988691602846417, "grad_norm": 0.6775712370872498, "learning_rate": 1.9306252724741305e-06, "loss": 0.394, "step": 35840 }, { "epoch": 0.7989806096652617, "grad_norm": 1.0392708778381348, "learning_rate": 1.9285577595823002e-06, "loss": 0.2424, "step": 35845 }, { "epoch": 0.7990920590458819, "grad_norm": 1.004452109336853, "learning_rate": 1.9264912361869847e-06, "loss": 0.3627, "step": 35850 }, { "epoch": 0.7992035084265019, "grad_norm": 1.1012901067733765, "learning_rate": 1.9244257025415247e-06, "loss": 0.2651, "step": 35855 }, { "epoch": 0.799314957807122, "grad_norm": 0.5909172296524048, "learning_rate": 1.922361158899143e-06, "loss": 0.2275, "step": 35860 }, { "epoch": 0.799426407187742, "grad_norm": 0.7913104891777039, "learning_rate": 1.920297605512933e-06, "loss": 0.1997, "step": 35865 }, { "epoch": 0.7995378565683621, "grad_norm": 0.7826907634735107, "learning_rate": 1.918235042635871e-06, "loss": 0.3624, "step": 35870 }, { "epoch": 0.7996493059489822, "grad_norm": 0.6586305499076843, "learning_rate": 1.9161734705208114e-06, "loss": 0.2755, "step": 35875 }, { "epoch": 0.7997607553296022, "grad_norm": 0.6121078133583069, "learning_rate": 1.9141128894204863e-06, "loss": 0.2498, "step": 35880 }, { "epoch": 0.7998722047102224, "grad_norm": 0.5371085405349731, "learning_rate": 1.912053299587505e-06, "loss": 0.2658, "step": 35885 }, { "epoch": 0.7999836540908424, "grad_norm": 0.4897899925708771, "learning_rate": 1.909994701274359e-06, "loss": 0.2947, "step": 35890 }, { "epoch": 0.8000951034714625, "grad_norm": 0.6061673164367676, "learning_rate": 1.9079370947334218e-06, "loss": 0.2394, "step": 35895 }, { "epoch": 0.8002065528520825, "grad_norm": 0.6482059359550476, "learning_rate": 1.905880480216934e-06, "loss": 0.1842, "step": 35900 }, { "epoch": 0.8003180022327026, "grad_norm": 0.5653631091117859, "learning_rate": 1.9038248579770234e-06, "loss": 0.3537, "step": 35905 }, { "epoch": 0.8004294516133227, "grad_norm": 0.4235904812812805, "learning_rate": 1.9017702282656913e-06, "loss": 0.3208, "step": 35910 }, { "epoch": 0.8005409009939427, "grad_norm": 0.5391014814376831, "learning_rate": 1.8997165913348191e-06, "loss": 0.3414, "step": 35915 }, { "epoch": 0.8006523503745628, "grad_norm": 0.6648342609405518, "learning_rate": 1.897663947436167e-06, "loss": 0.2192, "step": 35920 }, { "epoch": 0.8007637997551829, "grad_norm": 0.6008848547935486, "learning_rate": 1.8956122968213787e-06, "loss": 0.3137, "step": 35925 }, { "epoch": 0.8008752491358029, "grad_norm": 0.9569928050041199, "learning_rate": 1.8935616397419653e-06, "loss": 0.4199, "step": 35930 }, { "epoch": 0.800986698516423, "grad_norm": 0.8108124136924744, "learning_rate": 1.8915119764493229e-06, "loss": 0.2181, "step": 35935 }, { "epoch": 0.801098147897043, "grad_norm": 0.6100592017173767, "learning_rate": 1.8894633071947245e-06, "loss": 0.2611, "step": 35940 }, { "epoch": 0.8012095972776632, "grad_norm": 0.5053665637969971, "learning_rate": 1.887415632229318e-06, "loss": 0.3694, "step": 35945 }, { "epoch": 0.8013210466582832, "grad_norm": 0.5933337211608887, "learning_rate": 1.8853689518041385e-06, "loss": 0.1836, "step": 35950 }, { "epoch": 0.8014324960389033, "grad_norm": 0.5199214220046997, "learning_rate": 1.8833232661700873e-06, "loss": 0.2262, "step": 35955 }, { "epoch": 0.8015439454195233, "grad_norm": 0.790266752243042, "learning_rate": 1.881278575577955e-06, "loss": 0.3876, "step": 35960 }, { "epoch": 0.8016553948001434, "grad_norm": 0.6568781733512878, "learning_rate": 1.8792348802784022e-06, "loss": 0.2912, "step": 35965 }, { "epoch": 0.8017668441807635, "grad_norm": 0.7379553914070129, "learning_rate": 1.8771921805219705e-06, "loss": 0.2439, "step": 35970 }, { "epoch": 0.8018782935613835, "grad_norm": 0.808842122554779, "learning_rate": 1.87515047655908e-06, "loss": 0.3986, "step": 35975 }, { "epoch": 0.8019897429420036, "grad_norm": 0.2748057544231415, "learning_rate": 1.8731097686400236e-06, "loss": 0.2027, "step": 35980 }, { "epoch": 0.8021011923226237, "grad_norm": 0.7190350294113159, "learning_rate": 1.871070057014982e-06, "loss": 0.2014, "step": 35985 }, { "epoch": 0.8022126417032437, "grad_norm": 0.8670672178268433, "learning_rate": 1.8690313419340055e-06, "loss": 0.3295, "step": 35990 }, { "epoch": 0.8023240910838638, "grad_norm": 0.8307561874389648, "learning_rate": 1.8669936236470221e-06, "loss": 0.2391, "step": 35995 }, { "epoch": 0.8024355404644838, "grad_norm": 0.5368415117263794, "learning_rate": 1.8649569024038472e-06, "loss": 0.2935, "step": 36000 }, { "epoch": 0.802546989845104, "grad_norm": 0.6144288182258606, "learning_rate": 1.8629211784541623e-06, "loss": 0.2763, "step": 36005 }, { "epoch": 0.802658439225724, "grad_norm": 0.662746012210846, "learning_rate": 1.86088645204753e-06, "loss": 0.3804, "step": 36010 }, { "epoch": 0.802769888606344, "grad_norm": 0.6758759617805481, "learning_rate": 1.8588527234333963e-06, "loss": 0.2599, "step": 36015 }, { "epoch": 0.8028813379869641, "grad_norm": 0.5104584693908691, "learning_rate": 1.8568199928610798e-06, "loss": 0.2994, "step": 36020 }, { "epoch": 0.8029927873675842, "grad_norm": 0.5354732275009155, "learning_rate": 1.8547882605797763e-06, "loss": 0.3458, "step": 36025 }, { "epoch": 0.8031042367482043, "grad_norm": 1.0208121538162231, "learning_rate": 1.8527575268385566e-06, "loss": 0.2628, "step": 36030 }, { "epoch": 0.8032156861288243, "grad_norm": 0.3906199038028717, "learning_rate": 1.8507277918863808e-06, "loss": 0.2572, "step": 36035 }, { "epoch": 0.8033271355094445, "grad_norm": 0.7649385333061218, "learning_rate": 1.848699055972073e-06, "loss": 0.2585, "step": 36040 }, { "epoch": 0.8034385848900645, "grad_norm": 0.7261055111885071, "learning_rate": 1.8466713193443442e-06, "loss": 0.2428, "step": 36045 }, { "epoch": 0.8035500342706845, "grad_norm": 1.1180109977722168, "learning_rate": 1.8446445822517778e-06, "loss": 0.2716, "step": 36050 }, { "epoch": 0.8036614836513046, "grad_norm": 0.8348441123962402, "learning_rate": 1.842618844942836e-06, "loss": 0.3549, "step": 36055 }, { "epoch": 0.8037729330319247, "grad_norm": 0.9582706689834595, "learning_rate": 1.8405941076658584e-06, "loss": 0.3277, "step": 36060 }, { "epoch": 0.8038843824125448, "grad_norm": 0.7937244176864624, "learning_rate": 1.8385703706690605e-06, "loss": 0.3877, "step": 36065 }, { "epoch": 0.8039958317931648, "grad_norm": 0.4820943772792816, "learning_rate": 1.8365476342005407e-06, "loss": 0.1227, "step": 36070 }, { "epoch": 0.8041072811737848, "grad_norm": 1.0704084634780884, "learning_rate": 1.8345258985082658e-06, "loss": 0.3512, "step": 36075 }, { "epoch": 0.804218730554405, "grad_norm": 0.836803138256073, "learning_rate": 1.8325051638400903e-06, "loss": 0.2548, "step": 36080 }, { "epoch": 0.804330179935025, "grad_norm": 0.5254769921302795, "learning_rate": 1.8304854304437391e-06, "loss": 0.225, "step": 36085 }, { "epoch": 0.8044416293156451, "grad_norm": 0.7195600271224976, "learning_rate": 1.8284666985668142e-06, "loss": 0.2046, "step": 36090 }, { "epoch": 0.8045530786962651, "grad_norm": 0.598730206489563, "learning_rate": 1.8264489684567987e-06, "loss": 0.2834, "step": 36095 }, { "epoch": 0.8046645280768853, "grad_norm": 0.7976406216621399, "learning_rate": 1.824432240361046e-06, "loss": 0.3223, "step": 36100 }, { "epoch": 0.8047759774575053, "grad_norm": 0.760412335395813, "learning_rate": 1.8224165145267947e-06, "loss": 0.3259, "step": 36105 }, { "epoch": 0.8048874268381253, "grad_norm": 0.614362359046936, "learning_rate": 1.8204017912011606e-06, "loss": 0.1954, "step": 36110 }, { "epoch": 0.8049988762187454, "grad_norm": 0.2477876991033554, "learning_rate": 1.8183880706311308e-06, "loss": 0.2068, "step": 36115 }, { "epoch": 0.8051103255993655, "grad_norm": 0.6392495632171631, "learning_rate": 1.8163753530635698e-06, "loss": 0.2686, "step": 36120 }, { "epoch": 0.8052217749799856, "grad_norm": 0.7322916388511658, "learning_rate": 1.8143636387452236e-06, "loss": 0.2283, "step": 36125 }, { "epoch": 0.8053332243606056, "grad_norm": 0.5651661157608032, "learning_rate": 1.8123529279227092e-06, "loss": 0.3185, "step": 36130 }, { "epoch": 0.8054446737412256, "grad_norm": 0.5761824250221252, "learning_rate": 1.8103432208425264e-06, "loss": 0.2629, "step": 36135 }, { "epoch": 0.8055561231218458, "grad_norm": 0.5522593855857849, "learning_rate": 1.8083345177510536e-06, "loss": 0.2671, "step": 36140 }, { "epoch": 0.8056675725024658, "grad_norm": 0.524655818939209, "learning_rate": 1.8063268188945382e-06, "loss": 0.2215, "step": 36145 }, { "epoch": 0.8057790218830859, "grad_norm": 0.8033666014671326, "learning_rate": 1.80432012451911e-06, "loss": 0.4037, "step": 36150 }, { "epoch": 0.8058904712637059, "grad_norm": 0.8125406503677368, "learning_rate": 1.8023144348707733e-06, "loss": 0.3197, "step": 36155 }, { "epoch": 0.806001920644326, "grad_norm": 0.7958818674087524, "learning_rate": 1.8003097501954081e-06, "loss": 0.2377, "step": 36160 }, { "epoch": 0.8061133700249461, "grad_norm": 0.5927658677101135, "learning_rate": 1.798306070738778e-06, "loss": 0.3152, "step": 36165 }, { "epoch": 0.8062248194055661, "grad_norm": 0.7656975984573364, "learning_rate": 1.7963033967465127e-06, "loss": 0.3435, "step": 36170 }, { "epoch": 0.8063362687861862, "grad_norm": 0.5137174129486084, "learning_rate": 1.7943017284641317e-06, "loss": 0.2395, "step": 36175 }, { "epoch": 0.8064477181668063, "grad_norm": 0.9598259329795837, "learning_rate": 1.792301066137021e-06, "loss": 0.2673, "step": 36180 }, { "epoch": 0.8065591675474264, "grad_norm": 0.6322871446609497, "learning_rate": 1.7903014100104455e-06, "loss": 0.2864, "step": 36185 }, { "epoch": 0.8066706169280464, "grad_norm": 0.7137151956558228, "learning_rate": 1.7883027603295479e-06, "loss": 0.1954, "step": 36190 }, { "epoch": 0.8067820663086664, "grad_norm": 0.5784446597099304, "learning_rate": 1.7863051173393442e-06, "loss": 0.3839, "step": 36195 }, { "epoch": 0.8068935156892866, "grad_norm": 1.2818819284439087, "learning_rate": 1.7843084812847367e-06, "loss": 0.2935, "step": 36200 }, { "epoch": 0.8070049650699066, "grad_norm": 1.0318589210510254, "learning_rate": 1.7823128524104905e-06, "loss": 0.3096, "step": 36205 }, { "epoch": 0.8071164144505267, "grad_norm": 0.47286149859428406, "learning_rate": 1.780318230961261e-06, "loss": 0.321, "step": 36210 }, { "epoch": 0.8072278638311468, "grad_norm": 0.7115129828453064, "learning_rate": 1.7783246171815694e-06, "loss": 0.3974, "step": 36215 }, { "epoch": 0.8073393132117668, "grad_norm": 0.8326082229614258, "learning_rate": 1.7763320113158188e-06, "loss": 0.3417, "step": 36220 }, { "epoch": 0.8074507625923869, "grad_norm": 0.6177644729614258, "learning_rate": 1.7743404136082843e-06, "loss": 0.3237, "step": 36225 }, { "epoch": 0.8075622119730069, "grad_norm": 0.5992699861526489, "learning_rate": 1.7723498243031246e-06, "loss": 0.3652, "step": 36230 }, { "epoch": 0.8076736613536271, "grad_norm": 0.5553713440895081, "learning_rate": 1.770360243644369e-06, "loss": 0.3442, "step": 36235 }, { "epoch": 0.8077851107342471, "grad_norm": 0.4120601415634155, "learning_rate": 1.7683716718759224e-06, "loss": 0.3071, "step": 36240 }, { "epoch": 0.8078965601148672, "grad_norm": 0.44997352361679077, "learning_rate": 1.7663841092415723e-06, "loss": 0.2397, "step": 36245 }, { "epoch": 0.8080080094954872, "grad_norm": 0.9335451126098633, "learning_rate": 1.7643975559849768e-06, "loss": 0.3183, "step": 36250 }, { "epoch": 0.8081194588761073, "grad_norm": 0.5336930751800537, "learning_rate": 1.7624120123496702e-06, "loss": 0.2958, "step": 36255 }, { "epoch": 0.8082309082567274, "grad_norm": 0.8036069273948669, "learning_rate": 1.7604274785790676e-06, "loss": 0.3444, "step": 36260 }, { "epoch": 0.8083423576373474, "grad_norm": 0.5085418224334717, "learning_rate": 1.7584439549164578e-06, "loss": 0.2722, "step": 36265 }, { "epoch": 0.8084538070179675, "grad_norm": 0.6753281354904175, "learning_rate": 1.756461441605003e-06, "loss": 0.2542, "step": 36270 }, { "epoch": 0.8085652563985876, "grad_norm": 0.4116097688674927, "learning_rate": 1.754479938887742e-06, "loss": 0.2768, "step": 36275 }, { "epoch": 0.8086767057792076, "grad_norm": 0.8679396510124207, "learning_rate": 1.7524994470075985e-06, "loss": 0.3315, "step": 36280 }, { "epoch": 0.8087881551598277, "grad_norm": 0.534657895565033, "learning_rate": 1.7505199662073624e-06, "loss": 0.1846, "step": 36285 }, { "epoch": 0.8088996045404477, "grad_norm": 0.6001737713813782, "learning_rate": 1.7485414967296988e-06, "loss": 0.2151, "step": 36290 }, { "epoch": 0.8090110539210679, "grad_norm": 0.6152361631393433, "learning_rate": 1.7465640388171589e-06, "loss": 0.3807, "step": 36295 }, { "epoch": 0.8091225033016879, "grad_norm": 0.3379671275615692, "learning_rate": 1.7445875927121602e-06, "loss": 0.2696, "step": 36300 }, { "epoch": 0.809233952682308, "grad_norm": 0.5682633519172668, "learning_rate": 1.7426121586570023e-06, "loss": 0.347, "step": 36305 }, { "epoch": 0.809345402062928, "grad_norm": 0.8797348141670227, "learning_rate": 1.7406377368938531e-06, "loss": 0.2951, "step": 36310 }, { "epoch": 0.8094568514435481, "grad_norm": 0.5881412625312805, "learning_rate": 1.7386643276647674e-06, "loss": 0.2931, "step": 36315 }, { "epoch": 0.8095683008241682, "grad_norm": 0.5972985625267029, "learning_rate": 1.7366919312116647e-06, "loss": 0.3303, "step": 36320 }, { "epoch": 0.8096797502047882, "grad_norm": 0.5516791343688965, "learning_rate": 1.7347205477763508e-06, "loss": 0.1799, "step": 36325 }, { "epoch": 0.8097911995854084, "grad_norm": 0.8413097858428955, "learning_rate": 1.7327501776004995e-06, "loss": 0.2569, "step": 36330 }, { "epoch": 0.8099026489660284, "grad_norm": 0.42086705565452576, "learning_rate": 1.7307808209256638e-06, "loss": 0.2209, "step": 36335 }, { "epoch": 0.8100140983466484, "grad_norm": 0.69650799036026, "learning_rate": 1.7288124779932701e-06, "loss": 0.2003, "step": 36340 }, { "epoch": 0.8101255477272685, "grad_norm": 0.5159130692481995, "learning_rate": 1.7268451490446203e-06, "loss": 0.2107, "step": 36345 }, { "epoch": 0.8102369971078885, "grad_norm": 0.5900051593780518, "learning_rate": 1.7248788343208966e-06, "loss": 0.2757, "step": 36350 }, { "epoch": 0.8103484464885087, "grad_norm": 0.44346192479133606, "learning_rate": 1.7229135340631565e-06, "loss": 0.3421, "step": 36355 }, { "epoch": 0.8104598958691287, "grad_norm": 0.7002114057540894, "learning_rate": 1.7209492485123281e-06, "loss": 0.2814, "step": 36360 }, { "epoch": 0.8105713452497487, "grad_norm": 0.6705095171928406, "learning_rate": 1.7189859779092166e-06, "loss": 0.3148, "step": 36365 }, { "epoch": 0.8106827946303689, "grad_norm": 0.5612956881523132, "learning_rate": 1.717023722494504e-06, "loss": 0.2804, "step": 36370 }, { "epoch": 0.8107942440109889, "grad_norm": 0.6677428483963013, "learning_rate": 1.715062482508747e-06, "loss": 0.2004, "step": 36375 }, { "epoch": 0.810905693391609, "grad_norm": 0.9647729396820068, "learning_rate": 1.7131022581923818e-06, "loss": 0.3318, "step": 36380 }, { "epoch": 0.811017142772229, "grad_norm": 0.4130144715309143, "learning_rate": 1.7111430497857118e-06, "loss": 0.1961, "step": 36385 }, { "epoch": 0.8111285921528492, "grad_norm": 0.8154301047325134, "learning_rate": 1.709184857528927e-06, "loss": 0.2715, "step": 36390 }, { "epoch": 0.8112400415334692, "grad_norm": 0.954910159111023, "learning_rate": 1.7072276816620825e-06, "loss": 0.1962, "step": 36395 }, { "epoch": 0.8113514909140892, "grad_norm": 0.5167503356933594, "learning_rate": 1.7052715224251149e-06, "loss": 0.2053, "step": 36400 }, { "epoch": 0.8114629402947093, "grad_norm": 0.8377135396003723, "learning_rate": 1.7033163800578322e-06, "loss": 0.3016, "step": 36405 }, { "epoch": 0.8115743896753294, "grad_norm": 0.7488682270050049, "learning_rate": 1.7013622547999187e-06, "loss": 0.204, "step": 36410 }, { "epoch": 0.8116858390559495, "grad_norm": 0.6839303374290466, "learning_rate": 1.699409146890938e-06, "loss": 0.2733, "step": 36415 }, { "epoch": 0.8117972884365695, "grad_norm": 0.7828803062438965, "learning_rate": 1.6974570565703263e-06, "loss": 0.2779, "step": 36420 }, { "epoch": 0.8119087378171895, "grad_norm": 0.6850530505180359, "learning_rate": 1.6955059840773947e-06, "loss": 0.2373, "step": 36425 }, { "epoch": 0.8120201871978097, "grad_norm": 0.5718730092048645, "learning_rate": 1.6935559296513271e-06, "loss": 0.2751, "step": 36430 }, { "epoch": 0.8121316365784297, "grad_norm": 0.42931827902793884, "learning_rate": 1.691606893531188e-06, "loss": 0.3998, "step": 36435 }, { "epoch": 0.8122430859590498, "grad_norm": 0.5094924569129944, "learning_rate": 1.6896588759559096e-06, "loss": 0.2464, "step": 36440 }, { "epoch": 0.8123545353396698, "grad_norm": 0.4700838029384613, "learning_rate": 1.6877118771643097e-06, "loss": 0.293, "step": 36445 }, { "epoch": 0.81246598472029, "grad_norm": 0.5998474955558777, "learning_rate": 1.6857658973950709e-06, "loss": 0.3073, "step": 36450 }, { "epoch": 0.81257743410091, "grad_norm": 0.9076191186904907, "learning_rate": 1.6838209368867586e-06, "loss": 0.2482, "step": 36455 }, { "epoch": 0.81268888348153, "grad_norm": 0.6209660172462463, "learning_rate": 1.681876995877808e-06, "loss": 0.279, "step": 36460 }, { "epoch": 0.8128003328621501, "grad_norm": 0.7067864537239075, "learning_rate": 1.679934074606533e-06, "loss": 0.2419, "step": 36465 }, { "epoch": 0.8129117822427702, "grad_norm": 0.5416485667228699, "learning_rate": 1.677992173311116e-06, "loss": 0.2501, "step": 36470 }, { "epoch": 0.8130232316233903, "grad_norm": 0.8749811053276062, "learning_rate": 1.6760512922296245e-06, "loss": 0.1969, "step": 36475 }, { "epoch": 0.8131346810040103, "grad_norm": 0.6520072221755981, "learning_rate": 1.6741114315999952e-06, "loss": 0.2354, "step": 36480 }, { "epoch": 0.8132461303846303, "grad_norm": 0.8475064635276794, "learning_rate": 1.672172591660035e-06, "loss": 0.2563, "step": 36485 }, { "epoch": 0.8133575797652505, "grad_norm": 0.5565570592880249, "learning_rate": 1.6702347726474367e-06, "loss": 0.2641, "step": 36490 }, { "epoch": 0.8134690291458705, "grad_norm": 0.5682485699653625, "learning_rate": 1.6682979747997586e-06, "loss": 0.234, "step": 36495 }, { "epoch": 0.8135804785264906, "grad_norm": 0.44536277651786804, "learning_rate": 1.6663621983544387e-06, "loss": 0.2004, "step": 36500 }, { "epoch": 0.8136919279071106, "grad_norm": 0.6493122577667236, "learning_rate": 1.664427443548785e-06, "loss": 0.4058, "step": 36505 }, { "epoch": 0.8138033772877308, "grad_norm": 0.7767170667648315, "learning_rate": 1.662493710619988e-06, "loss": 0.1867, "step": 36510 }, { "epoch": 0.8139148266683508, "grad_norm": 0.5790889263153076, "learning_rate": 1.6605609998051064e-06, "loss": 0.1945, "step": 36515 }, { "epoch": 0.8140262760489708, "grad_norm": 0.777140200138092, "learning_rate": 1.6586293113410756e-06, "loss": 0.3477, "step": 36520 }, { "epoch": 0.814137725429591, "grad_norm": 0.5361962914466858, "learning_rate": 1.6566986454647039e-06, "loss": 0.1921, "step": 36525 }, { "epoch": 0.814249174810211, "grad_norm": 0.5239769816398621, "learning_rate": 1.6547690024126796e-06, "loss": 0.3521, "step": 36530 }, { "epoch": 0.8143606241908311, "grad_norm": 0.9107983112335205, "learning_rate": 1.6528403824215588e-06, "loss": 0.2483, "step": 36535 }, { "epoch": 0.8144720735714511, "grad_norm": 0.9957351088523865, "learning_rate": 1.6509127857277784e-06, "loss": 0.2463, "step": 36540 }, { "epoch": 0.8145835229520711, "grad_norm": 0.6087895035743713, "learning_rate": 1.6489862125676459e-06, "loss": 0.3351, "step": 36545 }, { "epoch": 0.8146949723326913, "grad_norm": 0.3920477032661438, "learning_rate": 1.6470606631773433e-06, "loss": 0.2002, "step": 36550 }, { "epoch": 0.8148064217133113, "grad_norm": 0.7256163358688354, "learning_rate": 1.6451361377929298e-06, "loss": 0.3118, "step": 36555 }, { "epoch": 0.8149178710939314, "grad_norm": 0.8055761456489563, "learning_rate": 1.6432126366503331e-06, "loss": 0.2417, "step": 36560 }, { "epoch": 0.8150293204745515, "grad_norm": 0.909164309501648, "learning_rate": 1.6412901599853626e-06, "loss": 0.3491, "step": 36565 }, { "epoch": 0.8151407698551715, "grad_norm": 0.4847832918167114, "learning_rate": 1.6393687080337006e-06, "loss": 0.1905, "step": 36570 }, { "epoch": 0.8152522192357916, "grad_norm": 0.8144800066947937, "learning_rate": 1.6374482810309022e-06, "loss": 0.3172, "step": 36575 }, { "epoch": 0.8153636686164116, "grad_norm": 0.8546509742736816, "learning_rate": 1.6355288792123947e-06, "loss": 0.3299, "step": 36580 }, { "epoch": 0.8154751179970318, "grad_norm": 0.8366774916648865, "learning_rate": 1.633610502813483e-06, "loss": 0.2728, "step": 36585 }, { "epoch": 0.8155865673776518, "grad_norm": 0.6456219553947449, "learning_rate": 1.6316931520693457e-06, "loss": 0.2615, "step": 36590 }, { "epoch": 0.8156980167582719, "grad_norm": 0.5672926306724548, "learning_rate": 1.6297768272150315e-06, "loss": 0.3728, "step": 36595 }, { "epoch": 0.8158094661388919, "grad_norm": 0.545039713382721, "learning_rate": 1.6278615284854705e-06, "loss": 0.1529, "step": 36600 }, { "epoch": 0.815920915519512, "grad_norm": 0.7076376080513, "learning_rate": 1.6259472561154655e-06, "loss": 0.2669, "step": 36605 }, { "epoch": 0.8160323649001321, "grad_norm": 0.6926854848861694, "learning_rate": 1.624034010339688e-06, "loss": 0.3351, "step": 36610 }, { "epoch": 0.8161438142807521, "grad_norm": 0.8632144331932068, "learning_rate": 1.622121791392689e-06, "loss": 0.294, "step": 36615 }, { "epoch": 0.8162552636613722, "grad_norm": 0.6294080018997192, "learning_rate": 1.6202105995088912e-06, "loss": 0.2915, "step": 36620 }, { "epoch": 0.8163667130419923, "grad_norm": 0.8796729445457458, "learning_rate": 1.6183004349225895e-06, "loss": 0.2139, "step": 36625 }, { "epoch": 0.8164781624226123, "grad_norm": 0.6016055345535278, "learning_rate": 1.6163912978679587e-06, "loss": 0.2296, "step": 36630 }, { "epoch": 0.8165896118032324, "grad_norm": 0.601929783821106, "learning_rate": 1.6144831885790435e-06, "loss": 0.3887, "step": 36635 }, { "epoch": 0.8167010611838524, "grad_norm": 0.6190734505653381, "learning_rate": 1.612576107289765e-06, "loss": 0.2295, "step": 36640 }, { "epoch": 0.8168125105644726, "grad_norm": 0.6690784692764282, "learning_rate": 1.6106700542339138e-06, "loss": 0.3049, "step": 36645 }, { "epoch": 0.8169239599450926, "grad_norm": 0.5558091998100281, "learning_rate": 1.6087650296451584e-06, "loss": 0.2995, "step": 36650 }, { "epoch": 0.8170354093257127, "grad_norm": 0.34144356846809387, "learning_rate": 1.6068610337570378e-06, "loss": 0.2802, "step": 36655 }, { "epoch": 0.8171468587063327, "grad_norm": 0.6739746332168579, "learning_rate": 1.6049580668029718e-06, "loss": 0.2603, "step": 36660 }, { "epoch": 0.8172583080869528, "grad_norm": 0.7272350788116455, "learning_rate": 1.603056129016244e-06, "loss": 0.4396, "step": 36665 }, { "epoch": 0.8173697574675729, "grad_norm": 0.3853004276752472, "learning_rate": 1.6011552206300229e-06, "loss": 0.2138, "step": 36670 }, { "epoch": 0.8174812068481929, "grad_norm": 0.8203393220901489, "learning_rate": 1.5992553418773438e-06, "loss": 0.2679, "step": 36675 }, { "epoch": 0.817592656228813, "grad_norm": 0.5697523951530457, "learning_rate": 1.5973564929911144e-06, "loss": 0.3115, "step": 36680 }, { "epoch": 0.8177041056094331, "grad_norm": 0.6337721943855286, "learning_rate": 1.5954586742041212e-06, "loss": 0.3143, "step": 36685 }, { "epoch": 0.8178155549900531, "grad_norm": 0.7625837922096252, "learning_rate": 1.5935618857490198e-06, "loss": 0.2307, "step": 36690 }, { "epoch": 0.8179270043706732, "grad_norm": 0.8012816309928894, "learning_rate": 1.5916661278583444e-06, "loss": 0.2297, "step": 36695 }, { "epoch": 0.8180384537512932, "grad_norm": 0.49943870306015015, "learning_rate": 1.5897714007644983e-06, "loss": 0.215, "step": 36700 }, { "epoch": 0.8181499031319134, "grad_norm": 1.0728031396865845, "learning_rate": 1.5878777046997628e-06, "loss": 0.2713, "step": 36705 }, { "epoch": 0.8182613525125334, "grad_norm": 0.7271302342414856, "learning_rate": 1.5859850398962896e-06, "loss": 0.2224, "step": 36710 }, { "epoch": 0.8183728018931535, "grad_norm": 0.8357278108596802, "learning_rate": 1.5840934065861047e-06, "loss": 0.2635, "step": 36715 }, { "epoch": 0.8184842512737736, "grad_norm": 0.4184236526489258, "learning_rate": 1.582202805001104e-06, "loss": 0.3066, "step": 36720 }, { "epoch": 0.8185957006543936, "grad_norm": 0.4964730143547058, "learning_rate": 1.5803132353730665e-06, "loss": 0.2489, "step": 36725 }, { "epoch": 0.8187071500350137, "grad_norm": 0.7390478849411011, "learning_rate": 1.578424697933637e-06, "loss": 0.2571, "step": 36730 }, { "epoch": 0.8188185994156337, "grad_norm": 0.42086485028266907, "learning_rate": 1.5765371929143326e-06, "loss": 0.3055, "step": 36735 }, { "epoch": 0.8189300487962539, "grad_norm": 0.7112220525741577, "learning_rate": 1.574650720546551e-06, "loss": 0.359, "step": 36740 }, { "epoch": 0.8190414981768739, "grad_norm": 0.828458845615387, "learning_rate": 1.5727652810615568e-06, "loss": 0.2786, "step": 36745 }, { "epoch": 0.8191529475574939, "grad_norm": 0.2417992502450943, "learning_rate": 1.5708808746904891e-06, "loss": 0.3147, "step": 36750 }, { "epoch": 0.819264396938114, "grad_norm": 0.22680948674678802, "learning_rate": 1.5689975016643666e-06, "loss": 0.3218, "step": 36755 }, { "epoch": 0.8193758463187341, "grad_norm": 0.8272700905799866, "learning_rate": 1.5671151622140723e-06, "loss": 0.2564, "step": 36760 }, { "epoch": 0.8194872956993542, "grad_norm": 0.8969610333442688, "learning_rate": 1.5652338565703673e-06, "loss": 0.2409, "step": 36765 }, { "epoch": 0.8195987450799742, "grad_norm": 0.769127607345581, "learning_rate": 1.5633535849638825e-06, "loss": 0.2854, "step": 36770 }, { "epoch": 0.8197101944605942, "grad_norm": 0.5934464931488037, "learning_rate": 1.5614743476251294e-06, "loss": 0.3501, "step": 36775 }, { "epoch": 0.8198216438412144, "grad_norm": 0.4663730561733246, "learning_rate": 1.5595961447844843e-06, "loss": 0.2903, "step": 36780 }, { "epoch": 0.8199330932218344, "grad_norm": 0.8412685394287109, "learning_rate": 1.5577189766722034e-06, "loss": 0.3296, "step": 36785 }, { "epoch": 0.8200445426024545, "grad_norm": 0.8940569162368774, "learning_rate": 1.5558428435184126e-06, "loss": 0.2503, "step": 36790 }, { "epoch": 0.8201559919830745, "grad_norm": 0.7685565948486328, "learning_rate": 1.5539677455531088e-06, "loss": 0.258, "step": 36795 }, { "epoch": 0.8202674413636947, "grad_norm": 0.6796576380729675, "learning_rate": 1.5520936830061672e-06, "loss": 0.2695, "step": 36800 }, { "epoch": 0.8203788907443147, "grad_norm": 0.5190930366516113, "learning_rate": 1.5502206561073296e-06, "loss": 0.2867, "step": 36805 }, { "epoch": 0.8204903401249347, "grad_norm": 0.35317862033843994, "learning_rate": 1.54834866508622e-06, "loss": 0.338, "step": 36810 }, { "epoch": 0.8206017895055548, "grad_norm": 0.44570088386535645, "learning_rate": 1.5464777101723237e-06, "loss": 0.2515, "step": 36815 }, { "epoch": 0.8207132388861749, "grad_norm": 0.9127787947654724, "learning_rate": 1.5446077915950131e-06, "loss": 0.2184, "step": 36820 }, { "epoch": 0.820824688266795, "grad_norm": 0.8880887031555176, "learning_rate": 1.542738909583521e-06, "loss": 0.2583, "step": 36825 }, { "epoch": 0.820936137647415, "grad_norm": 0.588434636592865, "learning_rate": 1.5408710643669578e-06, "loss": 0.2506, "step": 36830 }, { "epoch": 0.821047587028035, "grad_norm": 0.6682761907577515, "learning_rate": 1.539004256174308e-06, "loss": 0.2622, "step": 36835 }, { "epoch": 0.8211590364086552, "grad_norm": 0.5779880881309509, "learning_rate": 1.537138485234425e-06, "loss": 0.2864, "step": 36840 }, { "epoch": 0.8212704857892752, "grad_norm": 0.5295442938804626, "learning_rate": 1.5352737517760407e-06, "loss": 0.3275, "step": 36845 }, { "epoch": 0.8213819351698953, "grad_norm": 0.866150438785553, "learning_rate": 1.5334100560277599e-06, "loss": 0.3419, "step": 36850 }, { "epoch": 0.8214933845505153, "grad_norm": 1.006531834602356, "learning_rate": 1.5315473982180528e-06, "loss": 0.2724, "step": 36855 }, { "epoch": 0.8216048339311355, "grad_norm": 0.4456956088542938, "learning_rate": 1.5296857785752694e-06, "loss": 0.2642, "step": 36860 }, { "epoch": 0.8217162833117555, "grad_norm": 0.5592550039291382, "learning_rate": 1.5278251973276281e-06, "loss": 0.2101, "step": 36865 }, { "epoch": 0.8218277326923755, "grad_norm": 0.5776540637016296, "learning_rate": 1.525965654703221e-06, "loss": 0.2222, "step": 36870 }, { "epoch": 0.8219391820729957, "grad_norm": 0.7175986766815186, "learning_rate": 1.5241071509300143e-06, "loss": 0.3245, "step": 36875 }, { "epoch": 0.8220506314536157, "grad_norm": 0.7773592472076416, "learning_rate": 1.5222496862358494e-06, "loss": 0.3101, "step": 36880 }, { "epoch": 0.8221620808342358, "grad_norm": 0.6264556646347046, "learning_rate": 1.520393260848435e-06, "loss": 0.3531, "step": 36885 }, { "epoch": 0.8222735302148558, "grad_norm": 0.6289038062095642, "learning_rate": 1.5185378749953538e-06, "loss": 0.2971, "step": 36890 }, { "epoch": 0.8223849795954759, "grad_norm": 0.6231863498687744, "learning_rate": 1.5166835289040626e-06, "loss": 0.2296, "step": 36895 }, { "epoch": 0.822496428976096, "grad_norm": 0.6649795770645142, "learning_rate": 1.5148302228018897e-06, "loss": 0.3292, "step": 36900 }, { "epoch": 0.822607878356716, "grad_norm": 0.6453999876976013, "learning_rate": 1.5129779569160342e-06, "loss": 0.2511, "step": 36905 }, { "epoch": 0.8227193277373361, "grad_norm": 0.6257741451263428, "learning_rate": 1.5111267314735712e-06, "loss": 0.2089, "step": 36910 }, { "epoch": 0.8228307771179562, "grad_norm": 0.4796069860458374, "learning_rate": 1.509276546701448e-06, "loss": 0.2014, "step": 36915 }, { "epoch": 0.8229422264985762, "grad_norm": 0.784619927406311, "learning_rate": 1.5074274028264835e-06, "loss": 0.2584, "step": 36920 }, { "epoch": 0.8230536758791963, "grad_norm": 0.4626988470554352, "learning_rate": 1.505579300075366e-06, "loss": 0.2602, "step": 36925 }, { "epoch": 0.8231651252598163, "grad_norm": 1.1895695924758911, "learning_rate": 1.503732238674659e-06, "loss": 0.3199, "step": 36930 }, { "epoch": 0.8232765746404365, "grad_norm": 0.4978344142436981, "learning_rate": 1.5018862188507965e-06, "loss": 0.1895, "step": 36935 }, { "epoch": 0.8233880240210565, "grad_norm": 0.6750534772872925, "learning_rate": 1.5000412408300914e-06, "loss": 0.2036, "step": 36940 }, { "epoch": 0.8234994734016766, "grad_norm": 0.7439817190170288, "learning_rate": 1.4981973048387177e-06, "loss": 0.229, "step": 36945 }, { "epoch": 0.8236109227822966, "grad_norm": 0.5261428356170654, "learning_rate": 1.4963544111027316e-06, "loss": 0.2848, "step": 36950 }, { "epoch": 0.8237223721629167, "grad_norm": 0.5842021703720093, "learning_rate": 1.494512559848058e-06, "loss": 0.2968, "step": 36955 }, { "epoch": 0.8238338215435368, "grad_norm": 0.6377136707305908, "learning_rate": 1.4926717513004928e-06, "loss": 0.3559, "step": 36960 }, { "epoch": 0.8239452709241568, "grad_norm": 0.5633856654167175, "learning_rate": 1.4908319856857012e-06, "loss": 0.3856, "step": 36965 }, { "epoch": 0.824056720304777, "grad_norm": 0.6232241988182068, "learning_rate": 1.4889932632292292e-06, "loss": 0.268, "step": 36970 }, { "epoch": 0.824168169685397, "grad_norm": 0.48467230796813965, "learning_rate": 1.4871555841564889e-06, "loss": 0.2913, "step": 36975 }, { "epoch": 0.824279619066017, "grad_norm": 0.643015444278717, "learning_rate": 1.4853189486927634e-06, "loss": 0.3131, "step": 36980 }, { "epoch": 0.8243910684466371, "grad_norm": 0.612481951713562, "learning_rate": 1.4834833570632123e-06, "loss": 0.1504, "step": 36985 }, { "epoch": 0.8245025178272571, "grad_norm": 0.4606129229068756, "learning_rate": 1.481648809492865e-06, "loss": 0.2039, "step": 36990 }, { "epoch": 0.8246139672078773, "grad_norm": 0.8010572195053101, "learning_rate": 1.4798153062066223e-06, "loss": 0.3349, "step": 36995 }, { "epoch": 0.8247254165884973, "grad_norm": 0.6135943531990051, "learning_rate": 1.4779828474292545e-06, "loss": 0.3115, "step": 37000 }, { "epoch": 0.8248368659691174, "grad_norm": 0.8414220213890076, "learning_rate": 1.4761514333854121e-06, "loss": 0.368, "step": 37005 }, { "epoch": 0.8249483153497374, "grad_norm": 0.7408134341239929, "learning_rate": 1.4743210642996108e-06, "loss": 0.2246, "step": 37010 }, { "epoch": 0.8250597647303575, "grad_norm": 0.5923077464103699, "learning_rate": 1.4724917403962359e-06, "loss": 0.2294, "step": 37015 }, { "epoch": 0.8251712141109776, "grad_norm": 0.7077345252037048, "learning_rate": 1.470663461899553e-06, "loss": 0.3896, "step": 37020 }, { "epoch": 0.8252826634915976, "grad_norm": 0.48106616735458374, "learning_rate": 1.4688362290336944e-06, "loss": 0.2708, "step": 37025 }, { "epoch": 0.8253941128722178, "grad_norm": 0.4276266396045685, "learning_rate": 1.4670100420226608e-06, "loss": 0.1939, "step": 37030 }, { "epoch": 0.8255055622528378, "grad_norm": 0.5082858204841614, "learning_rate": 1.465184901090334e-06, "loss": 0.1899, "step": 37035 }, { "epoch": 0.8256170116334578, "grad_norm": 0.6479561924934387, "learning_rate": 1.4633608064604598e-06, "loss": 0.2502, "step": 37040 }, { "epoch": 0.8257284610140779, "grad_norm": 0.6713269948959351, "learning_rate": 1.461537758356657e-06, "loss": 0.3826, "step": 37045 }, { "epoch": 0.825839910394698, "grad_norm": 0.7276782393455505, "learning_rate": 1.459715757002419e-06, "loss": 0.2784, "step": 37050 }, { "epoch": 0.8259513597753181, "grad_norm": 1.0333011150360107, "learning_rate": 1.457894802621106e-06, "loss": 0.3731, "step": 37055 }, { "epoch": 0.8260628091559381, "grad_norm": 0.4250289797782898, "learning_rate": 1.456074895435955e-06, "loss": 0.2192, "step": 37060 }, { "epoch": 0.8261742585365582, "grad_norm": 0.6744095683097839, "learning_rate": 1.4542560356700764e-06, "loss": 0.2691, "step": 37065 }, { "epoch": 0.8262857079171783, "grad_norm": 0.6844059228897095, "learning_rate": 1.4524382235464429e-06, "loss": 0.2293, "step": 37070 }, { "epoch": 0.8263971572977983, "grad_norm": 0.8135658502578735, "learning_rate": 1.4506214592879075e-06, "loss": 0.3692, "step": 37075 }, { "epoch": 0.8265086066784184, "grad_norm": 0.43851977586746216, "learning_rate": 1.4488057431171897e-06, "loss": 0.2361, "step": 37080 }, { "epoch": 0.8266200560590384, "grad_norm": 0.720723569393158, "learning_rate": 1.4469910752568805e-06, "loss": 0.3309, "step": 37085 }, { "epoch": 0.8267315054396586, "grad_norm": 0.7780085802078247, "learning_rate": 1.4451774559294463e-06, "loss": 0.2542, "step": 37090 }, { "epoch": 0.8268429548202786, "grad_norm": 0.5620120167732239, "learning_rate": 1.4433648853572247e-06, "loss": 0.3395, "step": 37095 }, { "epoch": 0.8269544042008986, "grad_norm": 0.2839726507663727, "learning_rate": 1.4415533637624214e-06, "loss": 0.2525, "step": 37100 }, { "epoch": 0.8270658535815187, "grad_norm": 0.6484590172767639, "learning_rate": 1.4397428913671153e-06, "loss": 0.3056, "step": 37105 }, { "epoch": 0.8271773029621388, "grad_norm": 0.5020953416824341, "learning_rate": 1.4379334683932544e-06, "loss": 0.2167, "step": 37110 }, { "epoch": 0.8272887523427589, "grad_norm": 0.7176340818405151, "learning_rate": 1.4361250950626626e-06, "loss": 0.2863, "step": 37115 }, { "epoch": 0.8274002017233789, "grad_norm": 1.1245310306549072, "learning_rate": 1.434317771597028e-06, "loss": 0.3414, "step": 37120 }, { "epoch": 0.8275116511039989, "grad_norm": 0.5871909856796265, "learning_rate": 1.4325114982179177e-06, "loss": 0.285, "step": 37125 }, { "epoch": 0.8276231004846191, "grad_norm": 0.34241896867752075, "learning_rate": 1.4307062751467693e-06, "loss": 0.1995, "step": 37130 }, { "epoch": 0.8277345498652391, "grad_norm": 0.5553747415542603, "learning_rate": 1.4289021026048865e-06, "loss": 0.3491, "step": 37135 }, { "epoch": 0.8278459992458592, "grad_norm": 0.6614499092102051, "learning_rate": 1.4270989808134483e-06, "loss": 0.3176, "step": 37140 }, { "epoch": 0.8279574486264792, "grad_norm": 0.7910409569740295, "learning_rate": 1.425296909993501e-06, "loss": 0.2726, "step": 37145 }, { "epoch": 0.8280688980070994, "grad_norm": 0.9615556597709656, "learning_rate": 1.4234958903659645e-06, "loss": 0.2541, "step": 37150 }, { "epoch": 0.8281803473877194, "grad_norm": 0.3836075961589813, "learning_rate": 1.4216959221516336e-06, "loss": 0.3331, "step": 37155 }, { "epoch": 0.8282917967683394, "grad_norm": 0.40252745151519775, "learning_rate": 1.4198970055711657e-06, "loss": 0.1764, "step": 37160 }, { "epoch": 0.8284032461489595, "grad_norm": 0.485383540391922, "learning_rate": 1.4180991408450995e-06, "loss": 0.1294, "step": 37165 }, { "epoch": 0.8285146955295796, "grad_norm": 0.5613443851470947, "learning_rate": 1.416302328193836e-06, "loss": 0.1982, "step": 37170 }, { "epoch": 0.8286261449101997, "grad_norm": 0.32537856698036194, "learning_rate": 1.4145065678376512e-06, "loss": 0.2892, "step": 37175 }, { "epoch": 0.8287375942908197, "grad_norm": 0.8405792117118835, "learning_rate": 1.4127118599966895e-06, "loss": 0.2527, "step": 37180 }, { "epoch": 0.8288490436714397, "grad_norm": 0.4815138876438141, "learning_rate": 1.410918204890972e-06, "loss": 0.2303, "step": 37185 }, { "epoch": 0.8289604930520599, "grad_norm": 0.5533608198165894, "learning_rate": 1.4091256027403855e-06, "loss": 0.1856, "step": 37190 }, { "epoch": 0.8290719424326799, "grad_norm": 0.6979275941848755, "learning_rate": 1.4073340537646863e-06, "loss": 0.2584, "step": 37195 }, { "epoch": 0.8291833918133, "grad_norm": 0.8504383563995361, "learning_rate": 1.4055435581835086e-06, "loss": 0.2773, "step": 37200 }, { "epoch": 0.82929484119392, "grad_norm": 0.7722695469856262, "learning_rate": 1.4037541162163515e-06, "loss": 0.3342, "step": 37205 }, { "epoch": 0.8294062905745402, "grad_norm": 0.326023668050766, "learning_rate": 1.4019657280825883e-06, "loss": 0.3253, "step": 37210 }, { "epoch": 0.8295177399551602, "grad_norm": 0.7090151906013489, "learning_rate": 1.4001783940014568e-06, "loss": 0.3364, "step": 37215 }, { "epoch": 0.8296291893357802, "grad_norm": 0.5238401293754578, "learning_rate": 1.3983921141920764e-06, "loss": 0.1887, "step": 37220 }, { "epoch": 0.8297406387164004, "grad_norm": 0.6227529048919678, "learning_rate": 1.3966068888734285e-06, "loss": 0.2502, "step": 37225 }, { "epoch": 0.8298520880970204, "grad_norm": 0.4360678791999817, "learning_rate": 1.3948227182643647e-06, "loss": 0.2717, "step": 37230 }, { "epoch": 0.8299635374776405, "grad_norm": 0.5678982734680176, "learning_rate": 1.393039602583618e-06, "loss": 0.2633, "step": 37235 }, { "epoch": 0.8300749868582605, "grad_norm": 0.2946786880493164, "learning_rate": 1.3912575420497798e-06, "loss": 0.2416, "step": 37240 }, { "epoch": 0.8301864362388806, "grad_norm": 0.9580309391021729, "learning_rate": 1.3894765368813145e-06, "loss": 0.2383, "step": 37245 }, { "epoch": 0.8302978856195007, "grad_norm": 0.8293169736862183, "learning_rate": 1.3876965872965654e-06, "loss": 0.227, "step": 37250 }, { "epoch": 0.8304093350001207, "grad_norm": 0.9708387851715088, "learning_rate": 1.3859176935137387e-06, "loss": 0.2806, "step": 37255 }, { "epoch": 0.8305207843807408, "grad_norm": 0.6555142402648926, "learning_rate": 1.3841398557509123e-06, "loss": 0.2103, "step": 37260 }, { "epoch": 0.8306322337613609, "grad_norm": 0.7201140522956848, "learning_rate": 1.382363074226032e-06, "loss": 0.4568, "step": 37265 }, { "epoch": 0.830743683141981, "grad_norm": 0.6834139823913574, "learning_rate": 1.3805873491569234e-06, "loss": 0.2772, "step": 37270 }, { "epoch": 0.830855132522601, "grad_norm": 0.5923340320587158, "learning_rate": 1.3788126807612722e-06, "loss": 0.332, "step": 37275 }, { "epoch": 0.830966581903221, "grad_norm": 0.604604184627533, "learning_rate": 1.3770390692566438e-06, "loss": 0.1901, "step": 37280 }, { "epoch": 0.8310780312838412, "grad_norm": 0.7125951647758484, "learning_rate": 1.3752665148604661e-06, "loss": 0.2456, "step": 37285 }, { "epoch": 0.8311894806644612, "grad_norm": 0.6670401692390442, "learning_rate": 1.3734950177900396e-06, "loss": 0.2768, "step": 37290 }, { "epoch": 0.8313009300450813, "grad_norm": 0.8545556664466858, "learning_rate": 1.3717245782625389e-06, "loss": 0.3144, "step": 37295 }, { "epoch": 0.8314123794257013, "grad_norm": 0.6221092939376831, "learning_rate": 1.369955196495003e-06, "loss": 0.1969, "step": 37300 }, { "epoch": 0.8315238288063214, "grad_norm": 0.5654235482215881, "learning_rate": 1.3681868727043478e-06, "loss": 0.2783, "step": 37305 }, { "epoch": 0.8316352781869415, "grad_norm": 0.3323666751384735, "learning_rate": 1.3664196071073521e-06, "loss": 0.2179, "step": 37310 }, { "epoch": 0.8317467275675615, "grad_norm": 0.7455230951309204, "learning_rate": 1.3646533999206745e-06, "loss": 0.3412, "step": 37315 }, { "epoch": 0.8318581769481816, "grad_norm": 0.49438366293907166, "learning_rate": 1.3628882513608343e-06, "loss": 0.3679, "step": 37320 }, { "epoch": 0.8319696263288017, "grad_norm": 0.831476092338562, "learning_rate": 1.3611241616442273e-06, "loss": 0.3278, "step": 37325 }, { "epoch": 0.8320810757094217, "grad_norm": 0.5302203297615051, "learning_rate": 1.3593611309871158e-06, "loss": 0.33, "step": 37330 }, { "epoch": 0.8321925250900418, "grad_norm": 0.6043755412101746, "learning_rate": 1.3575991596056325e-06, "loss": 0.2399, "step": 37335 }, { "epoch": 0.8323039744706618, "grad_norm": 0.4499483108520508, "learning_rate": 1.3558382477157828e-06, "loss": 0.2271, "step": 37340 }, { "epoch": 0.832415423851282, "grad_norm": 1.055702805519104, "learning_rate": 1.3540783955334425e-06, "loss": 0.3462, "step": 37345 }, { "epoch": 0.832526873231902, "grad_norm": 0.5674599409103394, "learning_rate": 1.3523196032743557e-06, "loss": 0.3179, "step": 37350 }, { "epoch": 0.8326383226125221, "grad_norm": 0.7008332014083862, "learning_rate": 1.3505618711541358e-06, "loss": 0.3123, "step": 37355 }, { "epoch": 0.8327497719931422, "grad_norm": 0.8845453858375549, "learning_rate": 1.3488051993882668e-06, "loss": 0.3611, "step": 37360 }, { "epoch": 0.8328612213737622, "grad_norm": 0.4716404676437378, "learning_rate": 1.3470495881921008e-06, "loss": 0.2243, "step": 37365 }, { "epoch": 0.8329726707543823, "grad_norm": 0.6117929220199585, "learning_rate": 1.3452950377808648e-06, "loss": 0.258, "step": 37370 }, { "epoch": 0.8330841201350023, "grad_norm": 0.7175026535987854, "learning_rate": 1.3435415483696556e-06, "loss": 0.3727, "step": 37375 }, { "epoch": 0.8331955695156225, "grad_norm": 0.6029932498931885, "learning_rate": 1.3417891201734356e-06, "loss": 0.2152, "step": 37380 }, { "epoch": 0.8333070188962425, "grad_norm": 0.7872262001037598, "learning_rate": 1.3400377534070374e-06, "loss": 0.2302, "step": 37385 }, { "epoch": 0.8334184682768625, "grad_norm": 1.1524052619934082, "learning_rate": 1.338287448285166e-06, "loss": 0.4157, "step": 37390 }, { "epoch": 0.8335299176574826, "grad_norm": 0.7111510038375854, "learning_rate": 1.336538205022393e-06, "loss": 0.3132, "step": 37395 }, { "epoch": 0.8336413670381027, "grad_norm": 0.7074114084243774, "learning_rate": 1.3347900238331668e-06, "loss": 0.2602, "step": 37400 }, { "epoch": 0.8337528164187228, "grad_norm": 0.5388669371604919, "learning_rate": 1.3330429049317972e-06, "loss": 0.3328, "step": 37405 }, { "epoch": 0.8338642657993428, "grad_norm": 0.4883374869823456, "learning_rate": 1.3312968485324695e-06, "loss": 0.2247, "step": 37410 }, { "epoch": 0.8339757151799629, "grad_norm": 0.383735716342926, "learning_rate": 1.329551854849237e-06, "loss": 0.2401, "step": 37415 }, { "epoch": 0.834087164560583, "grad_norm": 0.9760423302650452, "learning_rate": 1.3278079240960217e-06, "loss": 0.3357, "step": 37420 }, { "epoch": 0.834198613941203, "grad_norm": 0.6999823451042175, "learning_rate": 1.3260650564866151e-06, "loss": 0.2286, "step": 37425 }, { "epoch": 0.8343100633218231, "grad_norm": 0.7534862756729126, "learning_rate": 1.3243232522346783e-06, "loss": 0.3578, "step": 37430 }, { "epoch": 0.8344215127024431, "grad_norm": 0.6549674272537231, "learning_rate": 1.3225825115537472e-06, "loss": 0.2919, "step": 37435 }, { "epoch": 0.8345329620830633, "grad_norm": 0.7035777568817139, "learning_rate": 1.3208428346572189e-06, "loss": 0.3564, "step": 37440 }, { "epoch": 0.8346444114636833, "grad_norm": 0.7054711580276489, "learning_rate": 1.3191042217583672e-06, "loss": 0.3436, "step": 37445 }, { "epoch": 0.8347558608443033, "grad_norm": 0.781085193157196, "learning_rate": 1.3173666730703327e-06, "loss": 0.2596, "step": 37450 }, { "epoch": 0.8348673102249234, "grad_norm": 0.7348710894584656, "learning_rate": 1.315630188806124e-06, "loss": 0.3375, "step": 37455 }, { "epoch": 0.8349787596055435, "grad_norm": 0.7270821928977966, "learning_rate": 1.3138947691786185e-06, "loss": 0.2431, "step": 37460 }, { "epoch": 0.8350902089861636, "grad_norm": 0.18632780015468597, "learning_rate": 1.3121604144005717e-06, "loss": 0.1656, "step": 37465 }, { "epoch": 0.8352016583667836, "grad_norm": 0.4481074810028076, "learning_rate": 1.3104271246845967e-06, "loss": 0.2322, "step": 37470 }, { "epoch": 0.8353131077474037, "grad_norm": 0.7926612496376038, "learning_rate": 1.3086949002431815e-06, "loss": 0.3465, "step": 37475 }, { "epoch": 0.8354245571280238, "grad_norm": 0.4460538923740387, "learning_rate": 1.306963741288687e-06, "loss": 0.3541, "step": 37480 }, { "epoch": 0.8355360065086438, "grad_norm": 0.38178062438964844, "learning_rate": 1.3052336480333372e-06, "loss": 0.284, "step": 37485 }, { "epoch": 0.8356474558892639, "grad_norm": 0.5777224898338318, "learning_rate": 1.3035046206892277e-06, "loss": 0.2561, "step": 37490 }, { "epoch": 0.835758905269884, "grad_norm": 0.30997785925865173, "learning_rate": 1.3017766594683267e-06, "loss": 0.1854, "step": 37495 }, { "epoch": 0.8358703546505041, "grad_norm": 0.6693107485771179, "learning_rate": 1.3000497645824672e-06, "loss": 0.262, "step": 37500 }, { "epoch": 0.8359818040311241, "grad_norm": 0.5152618885040283, "learning_rate": 1.2983239362433542e-06, "loss": 0.2762, "step": 37505 }, { "epoch": 0.8360932534117441, "grad_norm": 0.5906385183334351, "learning_rate": 1.2965991746625561e-06, "loss": 0.2687, "step": 37510 }, { "epoch": 0.8362047027923643, "grad_norm": 0.6585797071456909, "learning_rate": 1.2948754800515228e-06, "loss": 0.3197, "step": 37515 }, { "epoch": 0.8363161521729843, "grad_norm": 0.7745956182479858, "learning_rate": 1.293152852621562e-06, "loss": 0.1805, "step": 37520 }, { "epoch": 0.8364276015536044, "grad_norm": 0.9326626062393188, "learning_rate": 1.2914312925838524e-06, "loss": 0.2467, "step": 37525 }, { "epoch": 0.8365390509342244, "grad_norm": 0.7191392183303833, "learning_rate": 1.2897108001494496e-06, "loss": 0.3352, "step": 37530 }, { "epoch": 0.8366505003148444, "grad_norm": 0.601437509059906, "learning_rate": 1.2879913755292683e-06, "loss": 0.2939, "step": 37535 }, { "epoch": 0.8367619496954646, "grad_norm": 0.6129541993141174, "learning_rate": 1.2862730189340989e-06, "loss": 0.2128, "step": 37540 }, { "epoch": 0.8368733990760846, "grad_norm": 0.5679721236228943, "learning_rate": 1.284555730574596e-06, "loss": 0.2247, "step": 37545 }, { "epoch": 0.8369848484567047, "grad_norm": 0.7880651950836182, "learning_rate": 1.282839510661289e-06, "loss": 0.2246, "step": 37550 }, { "epoch": 0.8370962978373248, "grad_norm": 0.6838695406913757, "learning_rate": 1.2811243594045697e-06, "loss": 0.2358, "step": 37555 }, { "epoch": 0.8372077472179449, "grad_norm": 0.373812735080719, "learning_rate": 1.2794102770147065e-06, "loss": 0.2945, "step": 37560 }, { "epoch": 0.8373191965985649, "grad_norm": 0.9102007150650024, "learning_rate": 1.2776972637018314e-06, "loss": 0.2955, "step": 37565 }, { "epoch": 0.8374306459791849, "grad_norm": 0.4074249267578125, "learning_rate": 1.2759853196759454e-06, "loss": 0.3346, "step": 37570 }, { "epoch": 0.8375420953598051, "grad_norm": 1.002528190612793, "learning_rate": 1.2742744451469202e-06, "loss": 0.2436, "step": 37575 }, { "epoch": 0.8376535447404251, "grad_norm": 0.7155662775039673, "learning_rate": 1.272564640324494e-06, "loss": 0.2921, "step": 37580 }, { "epoch": 0.8377649941210452, "grad_norm": 0.41818249225616455, "learning_rate": 1.2708559054182767e-06, "loss": 0.3699, "step": 37585 }, { "epoch": 0.8378764435016652, "grad_norm": 0.6082835793495178, "learning_rate": 1.2691482406377499e-06, "loss": 0.352, "step": 37590 }, { "epoch": 0.8379878928822853, "grad_norm": 0.7842504382133484, "learning_rate": 1.2674416461922555e-06, "loss": 0.4216, "step": 37595 }, { "epoch": 0.8380993422629054, "grad_norm": 0.8266199231147766, "learning_rate": 1.2657361222910115e-06, "loss": 0.3116, "step": 37600 }, { "epoch": 0.8382107916435254, "grad_norm": 0.6448431611061096, "learning_rate": 1.264031669143101e-06, "loss": 0.2408, "step": 37605 }, { "epoch": 0.8383222410241455, "grad_norm": 0.9584223628044128, "learning_rate": 1.2623282869574726e-06, "loss": 0.2261, "step": 37610 }, { "epoch": 0.8384336904047656, "grad_norm": 0.5301266312599182, "learning_rate": 1.2606259759429562e-06, "loss": 0.2289, "step": 37615 }, { "epoch": 0.8385451397853857, "grad_norm": 0.7165831923484802, "learning_rate": 1.2589247363082335e-06, "loss": 0.2479, "step": 37620 }, { "epoch": 0.8386565891660057, "grad_norm": 0.9761996865272522, "learning_rate": 1.25722456826187e-06, "loss": 0.3339, "step": 37625 }, { "epoch": 0.8387680385466257, "grad_norm": 0.30678683519363403, "learning_rate": 1.255525472012291e-06, "loss": 0.2204, "step": 37630 }, { "epoch": 0.8388794879272459, "grad_norm": 0.45100367069244385, "learning_rate": 1.2538274477677925e-06, "loss": 0.2811, "step": 37635 }, { "epoch": 0.8389909373078659, "grad_norm": 0.5770091414451599, "learning_rate": 1.2521304957365388e-06, "loss": 0.2808, "step": 37640 }, { "epoch": 0.839102386688486, "grad_norm": 0.5960776805877686, "learning_rate": 1.2504346161265602e-06, "loss": 0.1137, "step": 37645 }, { "epoch": 0.839213836069106, "grad_norm": 0.6756134033203125, "learning_rate": 1.2487398091457637e-06, "loss": 0.2733, "step": 37650 }, { "epoch": 0.8393252854497261, "grad_norm": 0.3762647807598114, "learning_rate": 1.2470460750019154e-06, "loss": 0.3683, "step": 37655 }, { "epoch": 0.8394367348303462, "grad_norm": 0.6110719442367554, "learning_rate": 1.2453534139026579e-06, "loss": 0.2211, "step": 37660 }, { "epoch": 0.8395481842109662, "grad_norm": 0.5859352350234985, "learning_rate": 1.2436618260554955e-06, "loss": 0.2183, "step": 37665 }, { "epoch": 0.8396596335915864, "grad_norm": 0.800769567489624, "learning_rate": 1.2419713116678056e-06, "loss": 0.3123, "step": 37670 }, { "epoch": 0.8397710829722064, "grad_norm": 0.5613384246826172, "learning_rate": 1.240281870946829e-06, "loss": 0.191, "step": 37675 }, { "epoch": 0.8398825323528264, "grad_norm": 0.6227254867553711, "learning_rate": 1.2385935040996833e-06, "loss": 0.198, "step": 37680 }, { "epoch": 0.8399939817334465, "grad_norm": 0.5670225620269775, "learning_rate": 1.2369062113333453e-06, "loss": 0.1745, "step": 37685 }, { "epoch": 0.8401054311140665, "grad_norm": 0.44551461935043335, "learning_rate": 1.2352199928546627e-06, "loss": 0.201, "step": 37690 }, { "epoch": 0.8402168804946867, "grad_norm": 0.5814508199691772, "learning_rate": 1.233534848870358e-06, "loss": 0.1739, "step": 37695 }, { "epoch": 0.8403283298753067, "grad_norm": 0.6766602396965027, "learning_rate": 1.2318507795870138e-06, "loss": 0.2415, "step": 37700 }, { "epoch": 0.8404397792559268, "grad_norm": 1.014407992362976, "learning_rate": 1.2301677852110828e-06, "loss": 0.2986, "step": 37705 }, { "epoch": 0.8405512286365469, "grad_norm": 1.3561230897903442, "learning_rate": 1.2284858659488908e-06, "loss": 0.3334, "step": 37710 }, { "epoch": 0.8406626780171669, "grad_norm": 0.46694424748420715, "learning_rate": 1.2268050220066251e-06, "loss": 0.2962, "step": 37715 }, { "epoch": 0.840774127397787, "grad_norm": 0.6729729771614075, "learning_rate": 1.2251252535903457e-06, "loss": 0.2579, "step": 37720 }, { "epoch": 0.840885576778407, "grad_norm": 1.022292971611023, "learning_rate": 1.2234465609059754e-06, "loss": 0.307, "step": 37725 }, { "epoch": 0.8409970261590272, "grad_norm": 0.6683056950569153, "learning_rate": 1.221768944159315e-06, "loss": 0.3139, "step": 37730 }, { "epoch": 0.8411084755396472, "grad_norm": 0.6963731646537781, "learning_rate": 1.2200924035560247e-06, "loss": 0.2479, "step": 37735 }, { "epoch": 0.8412199249202672, "grad_norm": 0.9116354584693909, "learning_rate": 1.218416939301633e-06, "loss": 0.2626, "step": 37740 }, { "epoch": 0.8413313743008873, "grad_norm": 0.44806307554244995, "learning_rate": 1.216742551601543e-06, "loss": 0.3445, "step": 37745 }, { "epoch": 0.8414428236815074, "grad_norm": 0.7028947472572327, "learning_rate": 1.2150692406610199e-06, "loss": 0.3144, "step": 37750 }, { "epoch": 0.8415542730621275, "grad_norm": 0.7792380452156067, "learning_rate": 1.2133970066851985e-06, "loss": 0.3816, "step": 37755 }, { "epoch": 0.8416657224427475, "grad_norm": 0.34202641248703003, "learning_rate": 1.2117258498790807e-06, "loss": 0.24, "step": 37760 }, { "epoch": 0.8417771718233676, "grad_norm": 0.4707144796848297, "learning_rate": 1.2100557704475402e-06, "loss": 0.2674, "step": 37765 }, { "epoch": 0.8418886212039877, "grad_norm": 0.9186710715293884, "learning_rate": 1.2083867685953121e-06, "loss": 0.254, "step": 37770 }, { "epoch": 0.8420000705846077, "grad_norm": 0.6091528534889221, "learning_rate": 1.2067188445270074e-06, "loss": 0.3032, "step": 37775 }, { "epoch": 0.8421115199652278, "grad_norm": 0.7481672763824463, "learning_rate": 1.2050519984470988e-06, "loss": 0.3216, "step": 37780 }, { "epoch": 0.8422229693458478, "grad_norm": 0.7646524310112, "learning_rate": 1.2033862305599275e-06, "loss": 0.4078, "step": 37785 }, { "epoch": 0.842334418726468, "grad_norm": 0.5433209538459778, "learning_rate": 1.201721541069706e-06, "loss": 0.2804, "step": 37790 }, { "epoch": 0.842445868107088, "grad_norm": 0.6066043376922607, "learning_rate": 1.2000579301805093e-06, "loss": 0.2953, "step": 37795 }, { "epoch": 0.842557317487708, "grad_norm": 0.3361576199531555, "learning_rate": 1.1983953980962848e-06, "loss": 0.3078, "step": 37800 }, { "epoch": 0.8426687668683281, "grad_norm": 0.4943036139011383, "learning_rate": 1.1967339450208492e-06, "loss": 0.1272, "step": 37805 }, { "epoch": 0.8427802162489482, "grad_norm": 0.9423078894615173, "learning_rate": 1.195073571157881e-06, "loss": 0.371, "step": 37810 }, { "epoch": 0.8428916656295683, "grad_norm": 0.3357747197151184, "learning_rate": 1.193414276710928e-06, "loss": 0.1712, "step": 37815 }, { "epoch": 0.8430031150101883, "grad_norm": 0.35928764939308167, "learning_rate": 1.1917560618834102e-06, "loss": 0.2544, "step": 37820 }, { "epoch": 0.8431145643908085, "grad_norm": 0.8321924805641174, "learning_rate": 1.190098926878609e-06, "loss": 0.3164, "step": 37825 }, { "epoch": 0.8432260137714285, "grad_norm": 0.6431793570518494, "learning_rate": 1.1884428718996755e-06, "loss": 0.3778, "step": 37830 }, { "epoch": 0.8433374631520485, "grad_norm": 0.6413928270339966, "learning_rate": 1.1867878971496305e-06, "loss": 0.3294, "step": 37835 }, { "epoch": 0.8434489125326686, "grad_norm": 0.40377914905548096, "learning_rate": 1.1851340028313652e-06, "loss": 0.3043, "step": 37840 }, { "epoch": 0.8435603619132886, "grad_norm": 0.6660176515579224, "learning_rate": 1.1834811891476294e-06, "loss": 0.1815, "step": 37845 }, { "epoch": 0.8436718112939088, "grad_norm": 0.581720232963562, "learning_rate": 1.181829456301048e-06, "loss": 0.3054, "step": 37850 }, { "epoch": 0.8437832606745288, "grad_norm": 0.28792068362236023, "learning_rate": 1.1801788044941088e-06, "loss": 0.2019, "step": 37855 }, { "epoch": 0.8438947100551488, "grad_norm": 0.6191038489341736, "learning_rate": 1.1785292339291677e-06, "loss": 0.3956, "step": 37860 }, { "epoch": 0.844006159435769, "grad_norm": 0.8252318501472473, "learning_rate": 1.1768807448084507e-06, "loss": 0.3405, "step": 37865 }, { "epoch": 0.844117608816389, "grad_norm": 0.798909068107605, "learning_rate": 1.175233337334053e-06, "loss": 0.3404, "step": 37870 }, { "epoch": 0.8442290581970091, "grad_norm": 0.5434174537658691, "learning_rate": 1.1735870117079307e-06, "loss": 0.1721, "step": 37875 }, { "epoch": 0.8443405075776291, "grad_norm": 0.5645771622657776, "learning_rate": 1.1719417681319123e-06, "loss": 0.25, "step": 37880 }, { "epoch": 0.8444519569582492, "grad_norm": 0.7520797252655029, "learning_rate": 1.1702976068076898e-06, "loss": 0.2488, "step": 37885 }, { "epoch": 0.8445634063388693, "grad_norm": 0.8642824292182922, "learning_rate": 1.1686545279368244e-06, "loss": 0.2841, "step": 37890 }, { "epoch": 0.8446748557194893, "grad_norm": 0.6720357537269592, "learning_rate": 1.1670125317207493e-06, "loss": 0.3353, "step": 37895 }, { "epoch": 0.8447863051001094, "grad_norm": 0.7542029619216919, "learning_rate": 1.1653716183607544e-06, "loss": 0.2117, "step": 37900 }, { "epoch": 0.8448977544807295, "grad_norm": 0.39105698466300964, "learning_rate": 1.163731788058009e-06, "loss": 0.2014, "step": 37905 }, { "epoch": 0.8450092038613496, "grad_norm": 0.8331542611122131, "learning_rate": 1.16209304101354e-06, "loss": 0.2795, "step": 37910 }, { "epoch": 0.8451206532419696, "grad_norm": 0.4050682783126831, "learning_rate": 1.1604553774282467e-06, "loss": 0.2269, "step": 37915 }, { "epoch": 0.8452321026225896, "grad_norm": 0.556277871131897, "learning_rate": 1.1588187975028931e-06, "loss": 0.3418, "step": 37920 }, { "epoch": 0.8453435520032098, "grad_norm": 0.8178937435150146, "learning_rate": 1.1571833014381096e-06, "loss": 0.2718, "step": 37925 }, { "epoch": 0.8454550013838298, "grad_norm": 0.5613789558410645, "learning_rate": 1.1555488894343991e-06, "loss": 0.2226, "step": 37930 }, { "epoch": 0.8455664507644499, "grad_norm": 0.5579361915588379, "learning_rate": 1.1539155616921238e-06, "loss": 0.2663, "step": 37935 }, { "epoch": 0.8456779001450699, "grad_norm": 0.7475429773330688, "learning_rate": 1.1522833184115212e-06, "loss": 0.3363, "step": 37940 }, { "epoch": 0.84578934952569, "grad_norm": 0.49127307534217834, "learning_rate": 1.1506521597926912e-06, "loss": 0.2636, "step": 37945 }, { "epoch": 0.8459007989063101, "grad_norm": 0.5424503087997437, "learning_rate": 1.1490220860355983e-06, "loss": 0.2751, "step": 37950 }, { "epoch": 0.8460122482869301, "grad_norm": 0.7346308827400208, "learning_rate": 1.147393097340077e-06, "loss": 0.3807, "step": 37955 }, { "epoch": 0.8461236976675502, "grad_norm": 0.8214137554168701, "learning_rate": 1.1457651939058335e-06, "loss": 0.2832, "step": 37960 }, { "epoch": 0.8462351470481703, "grad_norm": 0.8216602206230164, "learning_rate": 1.144138375932432e-06, "loss": 0.3962, "step": 37965 }, { "epoch": 0.8463465964287904, "grad_norm": 0.5301281809806824, "learning_rate": 1.1425126436193067e-06, "loss": 0.2104, "step": 37970 }, { "epoch": 0.8464580458094104, "grad_norm": 0.6581772565841675, "learning_rate": 1.140887997165765e-06, "loss": 0.3826, "step": 37975 }, { "epoch": 0.8465694951900304, "grad_norm": 0.584018886089325, "learning_rate": 1.1392644367709715e-06, "loss": 0.3253, "step": 37980 }, { "epoch": 0.8466809445706506, "grad_norm": 0.5445225238800049, "learning_rate": 1.137641962633963e-06, "loss": 0.334, "step": 37985 }, { "epoch": 0.8467923939512706, "grad_norm": 0.9342231154441833, "learning_rate": 1.1360205749536446e-06, "loss": 0.1585, "step": 37990 }, { "epoch": 0.8469038433318907, "grad_norm": 0.5521963238716125, "learning_rate": 1.134400273928784e-06, "loss": 0.3071, "step": 37995 }, { "epoch": 0.8470152927125107, "grad_norm": 0.7862924337387085, "learning_rate": 1.132781059758018e-06, "loss": 0.2368, "step": 38000 }, { "epoch": 0.8471267420931308, "grad_norm": 0.7552818655967712, "learning_rate": 1.1311629326398477e-06, "loss": 0.2618, "step": 38005 }, { "epoch": 0.8472381914737509, "grad_norm": 0.6027349233627319, "learning_rate": 1.1295458927726465e-06, "loss": 0.2241, "step": 38010 }, { "epoch": 0.8473496408543709, "grad_norm": 0.4127175509929657, "learning_rate": 1.1279299403546485e-06, "loss": 0.2692, "step": 38015 }, { "epoch": 0.847461090234991, "grad_norm": 1.0972000360488892, "learning_rate": 1.1263150755839592e-06, "loss": 0.3133, "step": 38020 }, { "epoch": 0.8475725396156111, "grad_norm": 0.5284757614135742, "learning_rate": 1.124701298658547e-06, "loss": 0.3089, "step": 38025 }, { "epoch": 0.8476839889962312, "grad_norm": 0.468267560005188, "learning_rate": 1.1230886097762495e-06, "loss": 0.2538, "step": 38030 }, { "epoch": 0.8477954383768512, "grad_norm": 0.7088356018066406, "learning_rate": 1.1214770091347694e-06, "loss": 0.2582, "step": 38035 }, { "epoch": 0.8479068877574713, "grad_norm": 0.5855355858802795, "learning_rate": 1.119866496931673e-06, "loss": 0.2273, "step": 38040 }, { "epoch": 0.8480183371380914, "grad_norm": 0.6103977560997009, "learning_rate": 1.1182570733644037e-06, "loss": 0.2108, "step": 38045 }, { "epoch": 0.8481297865187114, "grad_norm": 0.66413813829422, "learning_rate": 1.1166487386302571e-06, "loss": 0.3404, "step": 38050 }, { "epoch": 0.8482412358993315, "grad_norm": 0.6266335844993591, "learning_rate": 1.1150414929264087e-06, "loss": 0.2823, "step": 38055 }, { "epoch": 0.8483526852799516, "grad_norm": 0.47270530462265015, "learning_rate": 1.113435336449893e-06, "loss": 0.2533, "step": 38060 }, { "epoch": 0.8484641346605716, "grad_norm": 0.5260603427886963, "learning_rate": 1.1118302693976103e-06, "loss": 0.2751, "step": 38065 }, { "epoch": 0.8485755840411917, "grad_norm": 0.6224083304405212, "learning_rate": 1.1102262919663299e-06, "loss": 0.3943, "step": 38070 }, { "epoch": 0.8486870334218117, "grad_norm": 0.6470963954925537, "learning_rate": 1.1086234043526866e-06, "loss": 0.1906, "step": 38075 }, { "epoch": 0.8487984828024319, "grad_norm": 0.46870410442352295, "learning_rate": 1.1070216067531825e-06, "loss": 0.2331, "step": 38080 }, { "epoch": 0.8489099321830519, "grad_norm": 0.6851820349693298, "learning_rate": 1.1054208993641879e-06, "loss": 0.3887, "step": 38085 }, { "epoch": 0.8490213815636719, "grad_norm": 0.5743823647499084, "learning_rate": 1.103821282381936e-06, "loss": 0.2285, "step": 38090 }, { "epoch": 0.849132830944292, "grad_norm": 0.9163671135902405, "learning_rate": 1.1022227560025267e-06, "loss": 0.3448, "step": 38095 }, { "epoch": 0.8492442803249121, "grad_norm": 0.44099679589271545, "learning_rate": 1.1006253204219275e-06, "loss": 0.1958, "step": 38100 }, { "epoch": 0.8493557297055322, "grad_norm": 0.727388858795166, "learning_rate": 1.0990289758359685e-06, "loss": 0.27, "step": 38105 }, { "epoch": 0.8494671790861522, "grad_norm": 0.5296708941459656, "learning_rate": 1.0974337224403553e-06, "loss": 0.3284, "step": 38110 }, { "epoch": 0.8495786284667723, "grad_norm": 0.3783695697784424, "learning_rate": 1.0958395604306482e-06, "loss": 0.144, "step": 38115 }, { "epoch": 0.8496900778473924, "grad_norm": 0.9098923206329346, "learning_rate": 1.094246490002283e-06, "loss": 0.2708, "step": 38120 }, { "epoch": 0.8498015272280124, "grad_norm": 0.4576197564601898, "learning_rate": 1.092654511350556e-06, "loss": 0.2607, "step": 38125 }, { "epoch": 0.8499129766086325, "grad_norm": 0.7173288464546204, "learning_rate": 1.0910636246706318e-06, "loss": 0.2797, "step": 38130 }, { "epoch": 0.8500244259892525, "grad_norm": 0.5603467226028442, "learning_rate": 1.0894738301575414e-06, "loss": 0.3129, "step": 38135 }, { "epoch": 0.8501358753698727, "grad_norm": 0.4809896945953369, "learning_rate": 1.0878851280061787e-06, "loss": 0.3054, "step": 38140 }, { "epoch": 0.8502473247504927, "grad_norm": 0.5944100022315979, "learning_rate": 1.08629751841131e-06, "loss": 0.1697, "step": 38145 }, { "epoch": 0.8503587741311127, "grad_norm": 0.44639232754707336, "learning_rate": 1.0847110015675599e-06, "loss": 0.2338, "step": 38150 }, { "epoch": 0.8504702235117328, "grad_norm": 0.45551520586013794, "learning_rate": 1.0831255776694283e-06, "loss": 0.275, "step": 38155 }, { "epoch": 0.8505816728923529, "grad_norm": 0.7087881565093994, "learning_rate": 1.081541246911273e-06, "loss": 0.3113, "step": 38160 }, { "epoch": 0.850693122272973, "grad_norm": 0.7395936846733093, "learning_rate": 1.0799580094873208e-06, "loss": 0.2508, "step": 38165 }, { "epoch": 0.850804571653593, "grad_norm": 0.6941021680831909, "learning_rate": 1.078375865591662e-06, "loss": 0.2996, "step": 38170 }, { "epoch": 0.8509160210342132, "grad_norm": 0.5546767115592957, "learning_rate": 1.0767948154182606e-06, "loss": 0.1887, "step": 38175 }, { "epoch": 0.8510274704148332, "grad_norm": 0.4076506495475769, "learning_rate": 1.0752148591609378e-06, "loss": 0.2515, "step": 38180 }, { "epoch": 0.8511389197954532, "grad_norm": 0.5561453104019165, "learning_rate": 1.0736359970133825e-06, "loss": 0.2829, "step": 38185 }, { "epoch": 0.8512503691760733, "grad_norm": 0.7610055804252625, "learning_rate": 1.072058229169155e-06, "loss": 0.2921, "step": 38190 }, { "epoch": 0.8513618185566934, "grad_norm": 1.0838948488235474, "learning_rate": 1.0704815558216752e-06, "loss": 0.2318, "step": 38195 }, { "epoch": 0.8514732679373135, "grad_norm": 0.5526623129844666, "learning_rate": 1.0689059771642308e-06, "loss": 0.2769, "step": 38200 }, { "epoch": 0.8515847173179335, "grad_norm": 0.4516993761062622, "learning_rate": 1.0673314933899758e-06, "loss": 0.2309, "step": 38205 }, { "epoch": 0.8516961666985535, "grad_norm": 0.5029829144477844, "learning_rate": 1.0657581046919318e-06, "loss": 0.2642, "step": 38210 }, { "epoch": 0.8518076160791737, "grad_norm": 0.6774804592132568, "learning_rate": 1.0641858112629821e-06, "loss": 0.2934, "step": 38215 }, { "epoch": 0.8519190654597937, "grad_norm": 0.6830132603645325, "learning_rate": 1.062614613295876e-06, "loss": 0.4301, "step": 38220 }, { "epoch": 0.8520305148404138, "grad_norm": 0.44892117381095886, "learning_rate": 1.0610445109832335e-06, "loss": 0.2804, "step": 38225 }, { "epoch": 0.8521419642210338, "grad_norm": 0.5139585733413696, "learning_rate": 1.0594755045175363e-06, "loss": 0.3174, "step": 38230 }, { "epoch": 0.852253413601654, "grad_norm": 0.6646450161933899, "learning_rate": 1.0579075940911309e-06, "loss": 0.2326, "step": 38235 }, { "epoch": 0.852364862982274, "grad_norm": 0.811774492263794, "learning_rate": 1.0563407798962332e-06, "loss": 0.2359, "step": 38240 }, { "epoch": 0.852476312362894, "grad_norm": 0.450503408908844, "learning_rate": 1.0547750621249208e-06, "loss": 0.3047, "step": 38245 }, { "epoch": 0.8525877617435141, "grad_norm": 0.7951428890228271, "learning_rate": 1.05321044096914e-06, "loss": 0.3498, "step": 38250 }, { "epoch": 0.8526992111241342, "grad_norm": 0.8334109783172607, "learning_rate": 1.051646916620699e-06, "loss": 0.2847, "step": 38255 }, { "epoch": 0.8528106605047543, "grad_norm": 0.7562937140464783, "learning_rate": 1.050084489271277e-06, "loss": 0.2878, "step": 38260 }, { "epoch": 0.8529221098853743, "grad_norm": 0.5558695793151855, "learning_rate": 1.0485231591124113e-06, "loss": 0.2797, "step": 38265 }, { "epoch": 0.8530335592659943, "grad_norm": 0.539916455745697, "learning_rate": 1.046962926335514e-06, "loss": 0.2059, "step": 38270 }, { "epoch": 0.8531450086466145, "grad_norm": 0.856054425239563, "learning_rate": 1.045403791131855e-06, "loss": 0.236, "step": 38275 }, { "epoch": 0.8532564580272345, "grad_norm": 0.7847248315811157, "learning_rate": 1.0438457536925728e-06, "loss": 0.4156, "step": 38280 }, { "epoch": 0.8533679074078546, "grad_norm": 0.6048935651779175, "learning_rate": 1.0422888142086696e-06, "loss": 0.3339, "step": 38285 }, { "epoch": 0.8534793567884746, "grad_norm": 0.8079482913017273, "learning_rate": 1.0407329728710135e-06, "loss": 0.2025, "step": 38290 }, { "epoch": 0.8535908061690947, "grad_norm": 0.33381929993629456, "learning_rate": 1.0391782298703391e-06, "loss": 0.2348, "step": 38295 }, { "epoch": 0.8537022555497148, "grad_norm": 0.7819501757621765, "learning_rate": 1.03762458539725e-06, "loss": 0.3077, "step": 38300 }, { "epoch": 0.8538137049303348, "grad_norm": 0.6421924829483032, "learning_rate": 1.0360720396422064e-06, "loss": 0.3291, "step": 38305 }, { "epoch": 0.853925154310955, "grad_norm": 0.2999565899372101, "learning_rate": 1.0345205927955405e-06, "loss": 0.2765, "step": 38310 }, { "epoch": 0.854036603691575, "grad_norm": 0.6653572916984558, "learning_rate": 1.0329702450474477e-06, "loss": 0.2799, "step": 38315 }, { "epoch": 0.8541480530721951, "grad_norm": 0.8726109266281128, "learning_rate": 1.0314209965879852e-06, "loss": 0.3337, "step": 38320 }, { "epoch": 0.8542595024528151, "grad_norm": 0.6368909478187561, "learning_rate": 1.0298728476070818e-06, "loss": 0.3035, "step": 38325 }, { "epoch": 0.8543709518334351, "grad_norm": 0.9166002869606018, "learning_rate": 1.0283257982945305e-06, "loss": 0.3818, "step": 38330 }, { "epoch": 0.8544824012140553, "grad_norm": 0.6032201051712036, "learning_rate": 1.026779848839986e-06, "loss": 0.1349, "step": 38335 }, { "epoch": 0.8545938505946753, "grad_norm": 0.7271354794502258, "learning_rate": 1.025234999432969e-06, "loss": 0.2524, "step": 38340 }, { "epoch": 0.8547052999752954, "grad_norm": 0.5672757029533386, "learning_rate": 1.023691250262867e-06, "loss": 0.3017, "step": 38345 }, { "epoch": 0.8548167493559155, "grad_norm": 0.48506101965904236, "learning_rate": 1.0221486015189309e-06, "loss": 0.277, "step": 38350 }, { "epoch": 0.8549281987365355, "grad_norm": 0.5211977362632751, "learning_rate": 1.0206070533902756e-06, "loss": 0.2977, "step": 38355 }, { "epoch": 0.8550396481171556, "grad_norm": 0.44795161485671997, "learning_rate": 1.019066606065886e-06, "loss": 0.2916, "step": 38360 }, { "epoch": 0.8551510974977756, "grad_norm": 0.8175597786903381, "learning_rate": 1.0175272597346097e-06, "loss": 0.3533, "step": 38365 }, { "epoch": 0.8552625468783958, "grad_norm": 0.5160136818885803, "learning_rate": 1.0159890145851569e-06, "loss": 0.2598, "step": 38370 }, { "epoch": 0.8553739962590158, "grad_norm": 0.7401508092880249, "learning_rate": 1.0144518708061057e-06, "loss": 0.1902, "step": 38375 }, { "epoch": 0.8554854456396359, "grad_norm": 0.5100545287132263, "learning_rate": 1.0129158285858975e-06, "loss": 0.2488, "step": 38380 }, { "epoch": 0.8555968950202559, "grad_norm": 0.8144034147262573, "learning_rate": 1.0113808881128374e-06, "loss": 0.2565, "step": 38385 }, { "epoch": 0.855708344400876, "grad_norm": 0.6111043095588684, "learning_rate": 1.0098470495751e-06, "loss": 0.2435, "step": 38390 }, { "epoch": 0.8558197937814961, "grad_norm": 0.5661292672157288, "learning_rate": 1.0083143131607197e-06, "loss": 0.2558, "step": 38395 }, { "epoch": 0.8559312431621161, "grad_norm": 0.8154894709587097, "learning_rate": 1.0067826790576008e-06, "loss": 0.3087, "step": 38400 }, { "epoch": 0.8560426925427362, "grad_norm": 0.7612628936767578, "learning_rate": 1.005252147453508e-06, "loss": 0.3343, "step": 38405 }, { "epoch": 0.8561541419233563, "grad_norm": 0.5865107178688049, "learning_rate": 1.0037227185360743e-06, "loss": 0.231, "step": 38410 }, { "epoch": 0.8562655913039763, "grad_norm": 0.8663761615753174, "learning_rate": 1.0021943924927924e-06, "loss": 0.4532, "step": 38415 }, { "epoch": 0.8563770406845964, "grad_norm": 0.6198815107345581, "learning_rate": 1.0006671695110281e-06, "loss": 0.4073, "step": 38420 }, { "epoch": 0.8564884900652164, "grad_norm": 0.658185601234436, "learning_rate": 9.99141049778004e-07, "loss": 0.2215, "step": 38425 }, { "epoch": 0.8565999394458366, "grad_norm": 0.5928554534912109, "learning_rate": 9.976160334808094e-07, "loss": 0.1856, "step": 38430 }, { "epoch": 0.8567113888264566, "grad_norm": 1.2877331972122192, "learning_rate": 9.960921208064023e-07, "loss": 0.2063, "step": 38435 }, { "epoch": 0.8568228382070766, "grad_norm": 0.9091715216636658, "learning_rate": 9.945693119416033e-07, "loss": 0.3503, "step": 38440 }, { "epoch": 0.8569342875876967, "grad_norm": 0.626483142375946, "learning_rate": 9.930476070730944e-07, "loss": 0.2357, "step": 38445 }, { "epoch": 0.8570457369683168, "grad_norm": 0.5116456151008606, "learning_rate": 9.915270063874238e-07, "loss": 0.2662, "step": 38450 }, { "epoch": 0.8571571863489369, "grad_norm": 0.5982497930526733, "learning_rate": 9.900075100710106e-07, "loss": 0.3785, "step": 38455 }, { "epoch": 0.8572686357295569, "grad_norm": 0.6922852993011475, "learning_rate": 9.884891183101287e-07, "loss": 0.3587, "step": 38460 }, { "epoch": 0.857380085110177, "grad_norm": 0.4601098597049713, "learning_rate": 9.869718312909215e-07, "loss": 0.3211, "step": 38465 }, { "epoch": 0.8574915344907971, "grad_norm": 0.2123146653175354, "learning_rate": 9.854556491994005e-07, "loss": 0.1296, "step": 38470 }, { "epoch": 0.8576029838714171, "grad_norm": 0.546073853969574, "learning_rate": 9.839405722214345e-07, "loss": 0.2066, "step": 38475 }, { "epoch": 0.8577144332520372, "grad_norm": 0.4517442584037781, "learning_rate": 9.824266005427584e-07, "loss": 0.2971, "step": 38480 }, { "epoch": 0.8578258826326572, "grad_norm": 0.6086894869804382, "learning_rate": 9.809137343489793e-07, "loss": 0.2705, "step": 38485 }, { "epoch": 0.8579373320132774, "grad_norm": 0.49917909502983093, "learning_rate": 9.79401973825559e-07, "loss": 0.2768, "step": 38490 }, { "epoch": 0.8580487813938974, "grad_norm": 0.7907560467720032, "learning_rate": 9.778913191578287e-07, "loss": 0.2817, "step": 38495 }, { "epoch": 0.8581602307745174, "grad_norm": 0.6295052766799927, "learning_rate": 9.763817705309808e-07, "loss": 0.2585, "step": 38500 }, { "epoch": 0.8582716801551376, "grad_norm": 0.484733521938324, "learning_rate": 9.748733281300782e-07, "loss": 0.3148, "step": 38505 }, { "epoch": 0.8583831295357576, "grad_norm": 0.7300118803977966, "learning_rate": 9.733659921400395e-07, "loss": 0.2856, "step": 38510 }, { "epoch": 0.8584945789163777, "grad_norm": 0.4974004328250885, "learning_rate": 9.718597627456582e-07, "loss": 0.2531, "step": 38515 }, { "epoch": 0.8586060282969977, "grad_norm": 0.37486526370048523, "learning_rate": 9.703546401315832e-07, "loss": 0.2336, "step": 38520 }, { "epoch": 0.8587174776776179, "grad_norm": 0.6507949829101562, "learning_rate": 9.688506244823315e-07, "loss": 0.2287, "step": 38525 }, { "epoch": 0.8588289270582379, "grad_norm": 0.7002065181732178, "learning_rate": 9.673477159822832e-07, "loss": 0.2789, "step": 38530 }, { "epoch": 0.8589403764388579, "grad_norm": 0.7301084995269775, "learning_rate": 9.658459148156807e-07, "loss": 0.3805, "step": 38535 }, { "epoch": 0.859051825819478, "grad_norm": 0.6007069945335388, "learning_rate": 9.643452211666394e-07, "loss": 0.3356, "step": 38540 }, { "epoch": 0.859163275200098, "grad_norm": 0.505754292011261, "learning_rate": 9.628456352191261e-07, "loss": 0.2512, "step": 38545 }, { "epoch": 0.8592747245807182, "grad_norm": 0.893120288848877, "learning_rate": 9.613471571569843e-07, "loss": 0.3404, "step": 38550 }, { "epoch": 0.8593861739613382, "grad_norm": 0.5641831755638123, "learning_rate": 9.598497871639133e-07, "loss": 0.3276, "step": 38555 }, { "epoch": 0.8594976233419582, "grad_norm": 0.5923650860786438, "learning_rate": 9.5835352542348e-07, "loss": 0.3105, "step": 38560 }, { "epoch": 0.8596090727225784, "grad_norm": 0.6627763509750366, "learning_rate": 9.568583721191126e-07, "loss": 0.3515, "step": 38565 }, { "epoch": 0.8597205221031984, "grad_norm": 0.5730844140052795, "learning_rate": 9.55364327434105e-07, "loss": 0.3103, "step": 38570 }, { "epoch": 0.8598319714838185, "grad_norm": 0.9559993743896484, "learning_rate": 9.538713915516173e-07, "loss": 0.2309, "step": 38575 }, { "epoch": 0.8599434208644385, "grad_norm": 0.6301518082618713, "learning_rate": 9.52379564654673e-07, "loss": 0.3103, "step": 38580 }, { "epoch": 0.8600548702450587, "grad_norm": 0.6794214844703674, "learning_rate": 9.508888469261568e-07, "loss": 0.3756, "step": 38585 }, { "epoch": 0.8601663196256787, "grad_norm": 0.5919482111930847, "learning_rate": 9.493992385488204e-07, "loss": 0.2822, "step": 38590 }, { "epoch": 0.8602777690062987, "grad_norm": 0.7542113065719604, "learning_rate": 9.479107397052767e-07, "loss": 0.2241, "step": 38595 }, { "epoch": 0.8603892183869188, "grad_norm": 0.6400946974754333, "learning_rate": 9.46423350578004e-07, "loss": 0.1845, "step": 38600 }, { "epoch": 0.8605006677675389, "grad_norm": 0.5948458909988403, "learning_rate": 9.449370713493444e-07, "loss": 0.295, "step": 38605 }, { "epoch": 0.860612117148159, "grad_norm": 0.8288118839263916, "learning_rate": 9.434519022015088e-07, "loss": 0.3929, "step": 38610 }, { "epoch": 0.860723566528779, "grad_norm": 0.6763032078742981, "learning_rate": 9.419678433165624e-07, "loss": 0.471, "step": 38615 }, { "epoch": 0.860835015909399, "grad_norm": 0.933210015296936, "learning_rate": 9.404848948764422e-07, "loss": 0.3071, "step": 38620 }, { "epoch": 0.8609464652900192, "grad_norm": 0.5194078683853149, "learning_rate": 9.39003057062946e-07, "loss": 0.3275, "step": 38625 }, { "epoch": 0.8610579146706392, "grad_norm": 0.5377574563026428, "learning_rate": 9.375223300577319e-07, "loss": 0.3088, "step": 38630 }, { "epoch": 0.8611693640512593, "grad_norm": 0.6296390891075134, "learning_rate": 9.3604271404233e-07, "loss": 0.2647, "step": 38635 }, { "epoch": 0.8612808134318793, "grad_norm": 0.7914707064628601, "learning_rate": 9.345642091981288e-07, "loss": 0.2754, "step": 38640 }, { "epoch": 0.8613922628124994, "grad_norm": 0.7675842046737671, "learning_rate": 9.330868157063799e-07, "loss": 0.2453, "step": 38645 }, { "epoch": 0.8615037121931195, "grad_norm": 0.9423962831497192, "learning_rate": 9.316105337482028e-07, "loss": 0.1748, "step": 38650 }, { "epoch": 0.8616151615737395, "grad_norm": 0.6657691597938538, "learning_rate": 9.301353635045774e-07, "loss": 0.1363, "step": 38655 }, { "epoch": 0.8617266109543597, "grad_norm": 0.5235121846199036, "learning_rate": 9.286613051563476e-07, "loss": 0.298, "step": 38660 }, { "epoch": 0.8618380603349797, "grad_norm": 0.6590076684951782, "learning_rate": 9.27188358884219e-07, "loss": 0.41, "step": 38665 }, { "epoch": 0.8619495097155998, "grad_norm": 0.6933293342590332, "learning_rate": 9.257165248687694e-07, "loss": 0.2681, "step": 38670 }, { "epoch": 0.8620609590962198, "grad_norm": 0.5265421867370605, "learning_rate": 9.242458032904311e-07, "loss": 0.2567, "step": 38675 }, { "epoch": 0.8621724084768398, "grad_norm": 0.447303831577301, "learning_rate": 9.22776194329501e-07, "loss": 0.3959, "step": 38680 }, { "epoch": 0.86228385785746, "grad_norm": 0.6949535012245178, "learning_rate": 9.21307698166145e-07, "loss": 0.2809, "step": 38685 }, { "epoch": 0.86239530723808, "grad_norm": 0.42898672819137573, "learning_rate": 9.198403149803903e-07, "loss": 0.2463, "step": 38690 }, { "epoch": 0.8625067566187001, "grad_norm": 0.564953088760376, "learning_rate": 9.183740449521217e-07, "loss": 0.3246, "step": 38695 }, { "epoch": 0.8626182059993202, "grad_norm": 0.5689299702644348, "learning_rate": 9.169088882610977e-07, "loss": 0.3268, "step": 38700 }, { "epoch": 0.8627296553799402, "grad_norm": 0.6454007625579834, "learning_rate": 9.154448450869347e-07, "loss": 0.2103, "step": 38705 }, { "epoch": 0.8628411047605603, "grad_norm": 0.8169035315513611, "learning_rate": 9.139819156091101e-07, "loss": 0.2811, "step": 38710 }, { "epoch": 0.8629525541411803, "grad_norm": 0.47908321022987366, "learning_rate": 9.125201000069683e-07, "loss": 0.2798, "step": 38715 }, { "epoch": 0.8630640035218005, "grad_norm": 0.42922982573509216, "learning_rate": 9.110593984597193e-07, "loss": 0.2175, "step": 38720 }, { "epoch": 0.8631754529024205, "grad_norm": 0.5486500263214111, "learning_rate": 9.0959981114643e-07, "loss": 0.1905, "step": 38725 }, { "epoch": 0.8632869022830406, "grad_norm": 0.911518394947052, "learning_rate": 9.081413382460391e-07, "loss": 0.2032, "step": 38730 }, { "epoch": 0.8633983516636606, "grad_norm": 0.48162025213241577, "learning_rate": 9.066839799373417e-07, "loss": 0.2701, "step": 38735 }, { "epoch": 0.8635098010442807, "grad_norm": 0.5854309797286987, "learning_rate": 9.052277363989981e-07, "loss": 0.2961, "step": 38740 }, { "epoch": 0.8636212504249008, "grad_norm": 0.49761664867401123, "learning_rate": 9.037726078095344e-07, "loss": 0.2893, "step": 38745 }, { "epoch": 0.8637326998055208, "grad_norm": 0.6846062541007996, "learning_rate": 9.023185943473345e-07, "loss": 0.325, "step": 38750 }, { "epoch": 0.8638441491861409, "grad_norm": 0.672755777835846, "learning_rate": 9.008656961906526e-07, "loss": 0.3281, "step": 38755 }, { "epoch": 0.863955598566761, "grad_norm": 0.6164188981056213, "learning_rate": 8.994139135176005e-07, "loss": 0.2586, "step": 38760 }, { "epoch": 0.864067047947381, "grad_norm": 0.3592098653316498, "learning_rate": 8.979632465061594e-07, "loss": 0.2613, "step": 38765 }, { "epoch": 0.8641784973280011, "grad_norm": 0.47354990243911743, "learning_rate": 8.965136953341669e-07, "loss": 0.33, "step": 38770 }, { "epoch": 0.8642899467086211, "grad_norm": 0.7735857367515564, "learning_rate": 8.950652601793264e-07, "loss": 0.2444, "step": 38775 }, { "epoch": 0.8644013960892413, "grad_norm": 0.7215647101402283, "learning_rate": 8.93617941219207e-07, "loss": 0.2783, "step": 38780 }, { "epoch": 0.8645128454698613, "grad_norm": 0.8758747577667236, "learning_rate": 8.921717386312346e-07, "loss": 0.2587, "step": 38785 }, { "epoch": 0.8646242948504814, "grad_norm": 0.8073229193687439, "learning_rate": 8.90726652592706e-07, "loss": 0.2216, "step": 38790 }, { "epoch": 0.8647357442311014, "grad_norm": 0.4132098853588104, "learning_rate": 8.892826832807788e-07, "loss": 0.2941, "step": 38795 }, { "epoch": 0.8648471936117215, "grad_norm": 0.5358142852783203, "learning_rate": 8.8783983087247e-07, "loss": 0.225, "step": 38800 }, { "epoch": 0.8649586429923416, "grad_norm": 0.4428861141204834, "learning_rate": 8.863980955446639e-07, "loss": 0.2894, "step": 38805 }, { "epoch": 0.8650700923729616, "grad_norm": 0.7807385921478271, "learning_rate": 8.849574774741044e-07, "loss": 0.2118, "step": 38810 }, { "epoch": 0.8651815417535818, "grad_norm": 0.5106543898582458, "learning_rate": 8.835179768373992e-07, "loss": 0.2659, "step": 38815 }, { "epoch": 0.8652929911342018, "grad_norm": 0.575259268283844, "learning_rate": 8.820795938110216e-07, "loss": 0.2706, "step": 38820 }, { "epoch": 0.8654044405148218, "grad_norm": 0.7704864144325256, "learning_rate": 8.806423285713084e-07, "loss": 0.348, "step": 38825 }, { "epoch": 0.8655158898954419, "grad_norm": 0.37502098083496094, "learning_rate": 8.792061812944541e-07, "loss": 0.259, "step": 38830 }, { "epoch": 0.865627339276062, "grad_norm": 0.5361759662628174, "learning_rate": 8.777711521565213e-07, "loss": 0.2836, "step": 38835 }, { "epoch": 0.8657387886566821, "grad_norm": 0.639102041721344, "learning_rate": 8.763372413334314e-07, "loss": 0.2414, "step": 38840 }, { "epoch": 0.8658502380373021, "grad_norm": 0.7271820306777954, "learning_rate": 8.749044490009717e-07, "loss": 0.2522, "step": 38845 }, { "epoch": 0.8659616874179221, "grad_norm": 0.6150298714637756, "learning_rate": 8.734727753347916e-07, "loss": 0.1579, "step": 38850 }, { "epoch": 0.8660731367985423, "grad_norm": 0.7927728295326233, "learning_rate": 8.720422205104029e-07, "loss": 0.3589, "step": 38855 }, { "epoch": 0.8661845861791623, "grad_norm": 1.0862514972686768, "learning_rate": 8.70612784703182e-07, "loss": 0.3257, "step": 38860 }, { "epoch": 0.8662960355597824, "grad_norm": 0.499602735042572, "learning_rate": 8.691844680883654e-07, "loss": 0.2216, "step": 38865 }, { "epoch": 0.8664074849404024, "grad_norm": 1.040918231010437, "learning_rate": 8.677572708410543e-07, "loss": 0.3053, "step": 38870 }, { "epoch": 0.8665189343210226, "grad_norm": 0.5788599252700806, "learning_rate": 8.663311931362117e-07, "loss": 0.3767, "step": 38875 }, { "epoch": 0.8666303837016426, "grad_norm": 1.2706820964813232, "learning_rate": 8.649062351486626e-07, "loss": 0.3246, "step": 38880 }, { "epoch": 0.8667418330822626, "grad_norm": 0.4492451846599579, "learning_rate": 8.634823970530981e-07, "loss": 0.3951, "step": 38885 }, { "epoch": 0.8668532824628827, "grad_norm": 0.31704801321029663, "learning_rate": 8.620596790240665e-07, "loss": 0.2206, "step": 38890 }, { "epoch": 0.8669647318435028, "grad_norm": 0.4670735001564026, "learning_rate": 8.606380812359861e-07, "loss": 0.2563, "step": 38895 }, { "epoch": 0.8670761812241229, "grad_norm": 0.6560717821121216, "learning_rate": 8.592176038631328e-07, "loss": 0.2734, "step": 38900 }, { "epoch": 0.8671876306047429, "grad_norm": 0.43644067645072937, "learning_rate": 8.577982470796442e-07, "loss": 0.2596, "step": 38905 }, { "epoch": 0.8672990799853629, "grad_norm": 0.6353582143783569, "learning_rate": 8.563800110595222e-07, "loss": 0.2138, "step": 38910 }, { "epoch": 0.8674105293659831, "grad_norm": 0.5252775549888611, "learning_rate": 8.549628959766343e-07, "loss": 0.2546, "step": 38915 }, { "epoch": 0.8675219787466031, "grad_norm": 0.4174504280090332, "learning_rate": 8.535469020047071e-07, "loss": 0.1929, "step": 38920 }, { "epoch": 0.8676334281272232, "grad_norm": 0.5764095783233643, "learning_rate": 8.521320293173274e-07, "loss": 0.2603, "step": 38925 }, { "epoch": 0.8677448775078432, "grad_norm": 0.6523967385292053, "learning_rate": 8.50718278087953e-07, "loss": 0.2129, "step": 38930 }, { "epoch": 0.8678563268884634, "grad_norm": 0.6079277396202087, "learning_rate": 8.493056484898954e-07, "loss": 0.3142, "step": 38935 }, { "epoch": 0.8679677762690834, "grad_norm": 0.44901034235954285, "learning_rate": 8.478941406963315e-07, "loss": 0.358, "step": 38940 }, { "epoch": 0.8680792256497034, "grad_norm": 0.30900290608406067, "learning_rate": 8.464837548803029e-07, "loss": 0.3144, "step": 38945 }, { "epoch": 0.8681906750303235, "grad_norm": 0.6993968486785889, "learning_rate": 8.450744912147135e-07, "loss": 0.3674, "step": 38950 }, { "epoch": 0.8683021244109436, "grad_norm": 0.8762882351875305, "learning_rate": 8.436663498723252e-07, "loss": 0.3504, "step": 38955 }, { "epoch": 0.8684135737915637, "grad_norm": 0.7278645038604736, "learning_rate": 8.422593310257654e-07, "loss": 0.2525, "step": 38960 }, { "epoch": 0.8685250231721837, "grad_norm": 0.8509200215339661, "learning_rate": 8.40853434847525e-07, "loss": 0.3666, "step": 38965 }, { "epoch": 0.8686364725528037, "grad_norm": 1.1432135105133057, "learning_rate": 8.394486615099573e-07, "loss": 0.3129, "step": 38970 }, { "epoch": 0.8687479219334239, "grad_norm": 0.5976606011390686, "learning_rate": 8.380450111852722e-07, "loss": 0.2232, "step": 38975 }, { "epoch": 0.8688593713140439, "grad_norm": 0.27752354741096497, "learning_rate": 8.366424840455523e-07, "loss": 0.1715, "step": 38980 }, { "epoch": 0.868970820694664, "grad_norm": 0.5606639385223389, "learning_rate": 8.352410802627331e-07, "loss": 0.3196, "step": 38985 }, { "epoch": 0.869082270075284, "grad_norm": 0.6761105060577393, "learning_rate": 8.338408000086151e-07, "loss": 0.2986, "step": 38990 }, { "epoch": 0.8691937194559042, "grad_norm": 0.4891190826892853, "learning_rate": 8.324416434548632e-07, "loss": 0.2157, "step": 38995 }, { "epoch": 0.8693051688365242, "grad_norm": 0.589738130569458, "learning_rate": 8.310436107730035e-07, "loss": 0.2274, "step": 39000 }, { "epoch": 0.8694166182171442, "grad_norm": 0.6930895447731018, "learning_rate": 8.296467021344223e-07, "loss": 0.266, "step": 39005 }, { "epoch": 0.8695280675977644, "grad_norm": 0.5216238498687744, "learning_rate": 8.282509177103737e-07, "loss": 0.4008, "step": 39010 }, { "epoch": 0.8696395169783844, "grad_norm": 0.6839985251426697, "learning_rate": 8.268562576719663e-07, "loss": 0.2302, "step": 39015 }, { "epoch": 0.8697509663590045, "grad_norm": 0.5241414904594421, "learning_rate": 8.254627221901767e-07, "loss": 0.3208, "step": 39020 }, { "epoch": 0.8698624157396245, "grad_norm": 0.4129006564617157, "learning_rate": 8.240703114358405e-07, "loss": 0.2677, "step": 39025 }, { "epoch": 0.8699738651202445, "grad_norm": 0.6128037571907043, "learning_rate": 8.226790255796557e-07, "loss": 0.2581, "step": 39030 }, { "epoch": 0.8700853145008647, "grad_norm": 0.7427310943603516, "learning_rate": 8.212888647921835e-07, "loss": 0.3437, "step": 39035 }, { "epoch": 0.8701967638814847, "grad_norm": 0.7483739852905273, "learning_rate": 8.198998292438498e-07, "loss": 0.2322, "step": 39040 }, { "epoch": 0.8703082132621048, "grad_norm": 0.675360918045044, "learning_rate": 8.185119191049373e-07, "loss": 0.2521, "step": 39045 }, { "epoch": 0.8704196626427249, "grad_norm": 0.5865544676780701, "learning_rate": 8.171251345455943e-07, "loss": 0.2734, "step": 39050 }, { "epoch": 0.8705311120233449, "grad_norm": 0.7708106637001038, "learning_rate": 8.157394757358283e-07, "loss": 0.3535, "step": 39055 }, { "epoch": 0.870642561403965, "grad_norm": 0.8821659684181213, "learning_rate": 8.1435494284551e-07, "loss": 0.3533, "step": 39060 }, { "epoch": 0.870754010784585, "grad_norm": 0.8066558241844177, "learning_rate": 8.129715360443724e-07, "loss": 0.2142, "step": 39065 }, { "epoch": 0.8708654601652052, "grad_norm": 0.4113776385784149, "learning_rate": 8.115892555020122e-07, "loss": 0.2661, "step": 39070 }, { "epoch": 0.8709769095458252, "grad_norm": 0.5391425490379333, "learning_rate": 8.10208101387886e-07, "loss": 0.2141, "step": 39075 }, { "epoch": 0.8710883589264453, "grad_norm": 0.7116353511810303, "learning_rate": 8.088280738713139e-07, "loss": 0.2821, "step": 39080 }, { "epoch": 0.8711998083070653, "grad_norm": 0.6435947418212891, "learning_rate": 8.07449173121474e-07, "loss": 0.2571, "step": 39085 }, { "epoch": 0.8713112576876854, "grad_norm": 0.5934739112854004, "learning_rate": 8.060713993074099e-07, "loss": 0.3214, "step": 39090 }, { "epoch": 0.8714227070683055, "grad_norm": 0.6468964219093323, "learning_rate": 8.046947525980242e-07, "loss": 0.2661, "step": 39095 }, { "epoch": 0.8715341564489255, "grad_norm": 0.8659062385559082, "learning_rate": 8.033192331620843e-07, "loss": 0.3474, "step": 39100 }, { "epoch": 0.8716456058295456, "grad_norm": 0.7774683237075806, "learning_rate": 8.019448411682218e-07, "loss": 0.2422, "step": 39105 }, { "epoch": 0.8717570552101657, "grad_norm": 0.7808752059936523, "learning_rate": 8.005715767849231e-07, "loss": 0.333, "step": 39110 }, { "epoch": 0.8718685045907857, "grad_norm": 0.2276986986398697, "learning_rate": 7.991994401805414e-07, "loss": 0.176, "step": 39115 }, { "epoch": 0.8719799539714058, "grad_norm": 0.5539754629135132, "learning_rate": 7.978284315232875e-07, "loss": 0.3066, "step": 39120 }, { "epoch": 0.8720914033520258, "grad_norm": 0.6452548503875732, "learning_rate": 7.964585509812373e-07, "loss": 0.2689, "step": 39125 }, { "epoch": 0.872202852732646, "grad_norm": 0.5837932229042053, "learning_rate": 7.950897987223305e-07, "loss": 0.3347, "step": 39130 }, { "epoch": 0.872314302113266, "grad_norm": 1.0752758979797363, "learning_rate": 7.937221749143608e-07, "loss": 0.2316, "step": 39135 }, { "epoch": 0.8724257514938861, "grad_norm": 0.49105966091156006, "learning_rate": 7.923556797249921e-07, "loss": 0.2245, "step": 39140 }, { "epoch": 0.8725372008745061, "grad_norm": 0.6479565501213074, "learning_rate": 7.909903133217456e-07, "loss": 0.1952, "step": 39145 }, { "epoch": 0.8726486502551262, "grad_norm": 0.5889627933502197, "learning_rate": 7.896260758720043e-07, "loss": 0.1782, "step": 39150 }, { "epoch": 0.8727600996357463, "grad_norm": 0.5997331738471985, "learning_rate": 7.882629675430131e-07, "loss": 0.2341, "step": 39155 }, { "epoch": 0.8728715490163663, "grad_norm": 0.5359553694725037, "learning_rate": 7.869009885018764e-07, "loss": 0.2751, "step": 39160 }, { "epoch": 0.8729829983969865, "grad_norm": 0.46849164366722107, "learning_rate": 7.855401389155659e-07, "loss": 0.2439, "step": 39165 }, { "epoch": 0.8730944477776065, "grad_norm": 0.9019102454185486, "learning_rate": 7.841804189509094e-07, "loss": 0.1982, "step": 39170 }, { "epoch": 0.8732058971582265, "grad_norm": 0.6545245051383972, "learning_rate": 7.82821828774597e-07, "loss": 0.2837, "step": 39175 }, { "epoch": 0.8733173465388466, "grad_norm": 0.5654610395431519, "learning_rate": 7.814643685531842e-07, "loss": 0.1812, "step": 39180 }, { "epoch": 0.8734287959194666, "grad_norm": 0.8217921853065491, "learning_rate": 7.801080384530834e-07, "loss": 0.3776, "step": 39185 }, { "epoch": 0.8735402453000868, "grad_norm": 0.3172108829021454, "learning_rate": 7.787528386405696e-07, "loss": 0.3206, "step": 39190 }, { "epoch": 0.8736516946807068, "grad_norm": 1.0418602228164673, "learning_rate": 7.77398769281782e-07, "loss": 0.2938, "step": 39195 }, { "epoch": 0.8737631440613268, "grad_norm": 0.7183475494384766, "learning_rate": 7.760458305427176e-07, "loss": 0.2731, "step": 39200 }, { "epoch": 0.873874593441947, "grad_norm": 0.40343570709228516, "learning_rate": 7.746940225892363e-07, "loss": 0.1553, "step": 39205 }, { "epoch": 0.873986042822567, "grad_norm": 0.5755996704101562, "learning_rate": 7.733433455870575e-07, "loss": 0.2032, "step": 39210 }, { "epoch": 0.8740974922031871, "grad_norm": 0.5467020273208618, "learning_rate": 7.719937997017679e-07, "loss": 0.4656, "step": 39215 }, { "epoch": 0.8742089415838071, "grad_norm": 0.800784170627594, "learning_rate": 7.706453850988071e-07, "loss": 0.3364, "step": 39220 }, { "epoch": 0.8743203909644273, "grad_norm": 0.60468590259552, "learning_rate": 7.692981019434843e-07, "loss": 0.159, "step": 39225 }, { "epoch": 0.8744318403450473, "grad_norm": 0.20965750515460968, "learning_rate": 7.679519504009636e-07, "loss": 0.1155, "step": 39230 }, { "epoch": 0.8745432897256673, "grad_norm": 0.6301449537277222, "learning_rate": 7.666069306362733e-07, "loss": 0.1794, "step": 39235 }, { "epoch": 0.8746547391062874, "grad_norm": 0.38496220111846924, "learning_rate": 7.652630428143038e-07, "loss": 0.2565, "step": 39240 }, { "epoch": 0.8747661884869075, "grad_norm": 0.6018979549407959, "learning_rate": 7.639202870998008e-07, "loss": 0.2916, "step": 39245 }, { "epoch": 0.8748776378675276, "grad_norm": 0.3267180919647217, "learning_rate": 7.625786636573795e-07, "loss": 0.3218, "step": 39250 }, { "epoch": 0.8749890872481476, "grad_norm": 0.612133264541626, "learning_rate": 7.612381726515139e-07, "loss": 0.4414, "step": 39255 }, { "epoch": 0.8751005366287676, "grad_norm": 0.7912502884864807, "learning_rate": 7.598988142465358e-07, "loss": 0.2768, "step": 39260 }, { "epoch": 0.8752119860093878, "grad_norm": 0.9484602808952332, "learning_rate": 7.585605886066405e-07, "loss": 0.2242, "step": 39265 }, { "epoch": 0.8753234353900078, "grad_norm": 0.6952139735221863, "learning_rate": 7.572234958958846e-07, "loss": 0.203, "step": 39270 }, { "epoch": 0.8754348847706279, "grad_norm": 0.4956663250923157, "learning_rate": 7.558875362781848e-07, "loss": 0.2395, "step": 39275 }, { "epoch": 0.8755463341512479, "grad_norm": 0.6728199124336243, "learning_rate": 7.545527099173178e-07, "loss": 0.2816, "step": 39280 }, { "epoch": 0.8756577835318681, "grad_norm": 0.5598202347755432, "learning_rate": 7.532190169769238e-07, "loss": 0.2929, "step": 39285 }, { "epoch": 0.8757692329124881, "grad_norm": 0.5399930477142334, "learning_rate": 7.518864576205065e-07, "loss": 0.3053, "step": 39290 }, { "epoch": 0.8758806822931081, "grad_norm": 0.6357437372207642, "learning_rate": 7.505550320114241e-07, "loss": 0.4018, "step": 39295 }, { "epoch": 0.8759921316737282, "grad_norm": 0.667725145816803, "learning_rate": 7.492247403129016e-07, "loss": 0.3565, "step": 39300 }, { "epoch": 0.8761035810543483, "grad_norm": 0.7579306364059448, "learning_rate": 7.478955826880197e-07, "loss": 0.3909, "step": 39305 }, { "epoch": 0.8762150304349684, "grad_norm": 0.8091420531272888, "learning_rate": 7.465675592997223e-07, "loss": 0.391, "step": 39310 }, { "epoch": 0.8763264798155884, "grad_norm": 0.6669242978096008, "learning_rate": 7.452406703108173e-07, "loss": 0.3984, "step": 39315 }, { "epoch": 0.8764379291962084, "grad_norm": 1.056694507598877, "learning_rate": 7.43914915883972e-07, "loss": 0.149, "step": 39320 }, { "epoch": 0.8765493785768286, "grad_norm": 0.48080557584762573, "learning_rate": 7.425902961817111e-07, "loss": 0.2749, "step": 39325 }, { "epoch": 0.8766608279574486, "grad_norm": 0.46817928552627563, "learning_rate": 7.412668113664245e-07, "loss": 0.1438, "step": 39330 }, { "epoch": 0.8767722773380687, "grad_norm": 0.4309822916984558, "learning_rate": 7.399444616003604e-07, "loss": 0.1833, "step": 39335 }, { "epoch": 0.8768837267186887, "grad_norm": 0.7805137038230896, "learning_rate": 7.386232470456268e-07, "loss": 0.317, "step": 39340 }, { "epoch": 0.8769951760993089, "grad_norm": 0.6236761808395386, "learning_rate": 7.373031678641985e-07, "loss": 0.2534, "step": 39345 }, { "epoch": 0.8771066254799289, "grad_norm": 0.37533897161483765, "learning_rate": 7.359842242179016e-07, "loss": 0.3453, "step": 39350 }, { "epoch": 0.8772180748605489, "grad_norm": 0.4856351912021637, "learning_rate": 7.346664162684347e-07, "loss": 0.3894, "step": 39355 }, { "epoch": 0.8773295242411691, "grad_norm": 0.8379895091056824, "learning_rate": 7.333497441773474e-07, "loss": 0.2715, "step": 39360 }, { "epoch": 0.8774409736217891, "grad_norm": 0.8367623090744019, "learning_rate": 7.320342081060527e-07, "loss": 0.2622, "step": 39365 }, { "epoch": 0.8775524230024092, "grad_norm": 0.8916360139846802, "learning_rate": 7.307198082158285e-07, "loss": 0.2733, "step": 39370 }, { "epoch": 0.8776638723830292, "grad_norm": 0.56252521276474, "learning_rate": 7.294065446678045e-07, "loss": 0.2643, "step": 39375 }, { "epoch": 0.8777753217636493, "grad_norm": 0.48122766613960266, "learning_rate": 7.280944176229821e-07, "loss": 0.247, "step": 39380 }, { "epoch": 0.8778867711442694, "grad_norm": 0.7334158420562744, "learning_rate": 7.267834272422148e-07, "loss": 0.3036, "step": 39385 }, { "epoch": 0.8779982205248894, "grad_norm": 0.7368651032447815, "learning_rate": 7.254735736862228e-07, "loss": 0.3579, "step": 39390 }, { "epoch": 0.8781096699055095, "grad_norm": 0.6399995684623718, "learning_rate": 7.241648571155824e-07, "loss": 0.2535, "step": 39395 }, { "epoch": 0.8782211192861296, "grad_norm": 0.6023989915847778, "learning_rate": 7.228572776907317e-07, "loss": 0.3227, "step": 39400 }, { "epoch": 0.8783325686667496, "grad_norm": 0.6582564115524292, "learning_rate": 7.215508355719681e-07, "loss": 0.3222, "step": 39405 }, { "epoch": 0.8784440180473697, "grad_norm": 0.5214042067527771, "learning_rate": 7.202455309194556e-07, "loss": 0.2741, "step": 39410 }, { "epoch": 0.8785554674279897, "grad_norm": 0.63289874792099, "learning_rate": 7.18941363893213e-07, "loss": 0.2356, "step": 39415 }, { "epoch": 0.8786669168086099, "grad_norm": 0.9625816941261292, "learning_rate": 7.176383346531179e-07, "loss": 0.3086, "step": 39420 }, { "epoch": 0.8787783661892299, "grad_norm": 1.4271131753921509, "learning_rate": 7.163364433589159e-07, "loss": 0.2845, "step": 39425 }, { "epoch": 0.87888981556985, "grad_norm": 0.8671938180923462, "learning_rate": 7.150356901702072e-07, "loss": 0.2468, "step": 39430 }, { "epoch": 0.87900126495047, "grad_norm": 0.7523783445358276, "learning_rate": 7.137360752464517e-07, "loss": 0.3748, "step": 39435 }, { "epoch": 0.8791127143310901, "grad_norm": 0.6828460097312927, "learning_rate": 7.124375987469767e-07, "loss": 0.3695, "step": 39440 }, { "epoch": 0.8792241637117102, "grad_norm": 0.7740871906280518, "learning_rate": 7.111402608309637e-07, "loss": 0.3003, "step": 39445 }, { "epoch": 0.8793356130923302, "grad_norm": 0.8277214765548706, "learning_rate": 7.098440616574542e-07, "loss": 0.295, "step": 39450 }, { "epoch": 0.8794470624729503, "grad_norm": 0.7407256364822388, "learning_rate": 7.085490013853536e-07, "loss": 0.2262, "step": 39455 }, { "epoch": 0.8795585118535704, "grad_norm": 0.3992447853088379, "learning_rate": 7.072550801734268e-07, "loss": 0.2338, "step": 39460 }, { "epoch": 0.8796699612341904, "grad_norm": 0.42001864314079285, "learning_rate": 7.059622981802994e-07, "loss": 0.2455, "step": 39465 }, { "epoch": 0.8797814106148105, "grad_norm": 0.7254727482795715, "learning_rate": 7.046706555644522e-07, "loss": 0.2974, "step": 39470 }, { "epoch": 0.8798928599954305, "grad_norm": 0.5861344933509827, "learning_rate": 7.033801524842366e-07, "loss": 0.3441, "step": 39475 }, { "epoch": 0.8800043093760507, "grad_norm": 0.7168605923652649, "learning_rate": 7.020907890978546e-07, "loss": 0.225, "step": 39480 }, { "epoch": 0.8801157587566707, "grad_norm": 0.5098482966423035, "learning_rate": 7.008025655633743e-07, "loss": 0.2328, "step": 39485 }, { "epoch": 0.8802272081372908, "grad_norm": 0.5601391792297363, "learning_rate": 6.995154820387173e-07, "loss": 0.3758, "step": 39490 }, { "epoch": 0.8803386575179108, "grad_norm": 1.00440514087677, "learning_rate": 6.982295386816762e-07, "loss": 0.2554, "step": 39495 }, { "epoch": 0.8804501068985309, "grad_norm": 0.5242079496383667, "learning_rate": 6.969447356498938e-07, "loss": 0.2905, "step": 39500 }, { "epoch": 0.880561556279151, "grad_norm": 0.8028491735458374, "learning_rate": 6.956610731008783e-07, "loss": 0.2896, "step": 39505 }, { "epoch": 0.880673005659771, "grad_norm": 0.2605814039707184, "learning_rate": 6.943785511919976e-07, "loss": 0.3147, "step": 39510 }, { "epoch": 0.8807844550403912, "grad_norm": 1.1690866947174072, "learning_rate": 6.930971700804789e-07, "loss": 0.309, "step": 39515 }, { "epoch": 0.8808959044210112, "grad_norm": 0.595558762550354, "learning_rate": 6.918169299234079e-07, "loss": 0.2606, "step": 39520 }, { "epoch": 0.8810073538016312, "grad_norm": 0.6599328517913818, "learning_rate": 6.905378308777322e-07, "loss": 0.2377, "step": 39525 }, { "epoch": 0.8811188031822513, "grad_norm": 0.454018771648407, "learning_rate": 6.892598731002597e-07, "loss": 0.2882, "step": 39530 }, { "epoch": 0.8812302525628714, "grad_norm": 0.7266696691513062, "learning_rate": 6.879830567476609e-07, "loss": 0.282, "step": 39535 }, { "epoch": 0.8813417019434915, "grad_norm": 0.796342134475708, "learning_rate": 6.867073819764614e-07, "loss": 0.1781, "step": 39540 }, { "epoch": 0.8814531513241115, "grad_norm": 0.4576019048690796, "learning_rate": 6.854328489430495e-07, "loss": 0.3685, "step": 39545 }, { "epoch": 0.8815646007047316, "grad_norm": 0.6398374438285828, "learning_rate": 6.841594578036725e-07, "loss": 0.2192, "step": 39550 }, { "epoch": 0.8816760500853517, "grad_norm": 0.9749293923377991, "learning_rate": 6.828872087144378e-07, "loss": 0.3229, "step": 39555 }, { "epoch": 0.8817874994659717, "grad_norm": 0.6655979752540588, "learning_rate": 6.816161018313138e-07, "loss": 0.2114, "step": 39560 }, { "epoch": 0.8818989488465918, "grad_norm": 0.4514460265636444, "learning_rate": 6.803461373101306e-07, "loss": 0.2602, "step": 39565 }, { "epoch": 0.8820103982272118, "grad_norm": 1.0040894746780396, "learning_rate": 6.790773153065744e-07, "loss": 0.2762, "step": 39570 }, { "epoch": 0.882121847607832, "grad_norm": 0.5751023888587952, "learning_rate": 6.778096359761921e-07, "loss": 0.3602, "step": 39575 }, { "epoch": 0.882233296988452, "grad_norm": 0.5020691752433777, "learning_rate": 6.765430994743926e-07, "loss": 0.2579, "step": 39580 }, { "epoch": 0.882344746369072, "grad_norm": 0.8378996849060059, "learning_rate": 6.752777059564431e-07, "loss": 0.2474, "step": 39585 }, { "epoch": 0.8824561957496921, "grad_norm": 0.7655625343322754, "learning_rate": 6.740134555774702e-07, "loss": 0.2461, "step": 39590 }, { "epoch": 0.8825676451303122, "grad_norm": 0.7098322510719299, "learning_rate": 6.727503484924614e-07, "loss": 0.2742, "step": 39595 }, { "epoch": 0.8826790945109323, "grad_norm": 0.756291925907135, "learning_rate": 6.714883848562659e-07, "loss": 0.2654, "step": 39600 }, { "epoch": 0.8827905438915523, "grad_norm": 0.9852766394615173, "learning_rate": 6.70227564823589e-07, "loss": 0.4138, "step": 39605 }, { "epoch": 0.8829019932721723, "grad_norm": 0.6849904656410217, "learning_rate": 6.689678885489992e-07, "loss": 0.2707, "step": 39610 }, { "epoch": 0.8830134426527925, "grad_norm": 0.7948192358016968, "learning_rate": 6.677093561869208e-07, "loss": 0.2577, "step": 39615 }, { "epoch": 0.8831248920334125, "grad_norm": 0.5963390469551086, "learning_rate": 6.664519678916392e-07, "loss": 0.2616, "step": 39620 }, { "epoch": 0.8832363414140326, "grad_norm": 0.623665988445282, "learning_rate": 6.651957238173046e-07, "loss": 0.2896, "step": 39625 }, { "epoch": 0.8833477907946526, "grad_norm": 0.6460698843002319, "learning_rate": 6.639406241179192e-07, "loss": 0.2555, "step": 39630 }, { "epoch": 0.8834592401752728, "grad_norm": 0.7518662810325623, "learning_rate": 6.62686668947351e-07, "loss": 0.2195, "step": 39635 }, { "epoch": 0.8835706895558928, "grad_norm": 0.5728727579116821, "learning_rate": 6.614338584593249e-07, "loss": 0.3104, "step": 39640 }, { "epoch": 0.8836821389365128, "grad_norm": 0.5908369421958923, "learning_rate": 6.601821928074248e-07, "loss": 0.2138, "step": 39645 }, { "epoch": 0.883793588317133, "grad_norm": 0.7735019326210022, "learning_rate": 6.589316721450933e-07, "loss": 0.3471, "step": 39650 }, { "epoch": 0.883905037697753, "grad_norm": 0.7732353210449219, "learning_rate": 6.576822966256391e-07, "loss": 0.3198, "step": 39655 }, { "epoch": 0.8840164870783731, "grad_norm": 0.5547565221786499, "learning_rate": 6.56434066402224e-07, "loss": 0.2367, "step": 39660 }, { "epoch": 0.8841279364589931, "grad_norm": 0.9324460029602051, "learning_rate": 6.551869816278699e-07, "loss": 0.3295, "step": 39665 }, { "epoch": 0.8842393858396131, "grad_norm": 0.7827137112617493, "learning_rate": 6.539410424554604e-07, "loss": 0.2771, "step": 39670 }, { "epoch": 0.8843508352202333, "grad_norm": 0.35162538290023804, "learning_rate": 6.526962490377397e-07, "loss": 0.2388, "step": 39675 }, { "epoch": 0.8844622846008533, "grad_norm": 0.8613860607147217, "learning_rate": 6.514526015273093e-07, "loss": 0.343, "step": 39680 }, { "epoch": 0.8845737339814734, "grad_norm": 0.5753836631774902, "learning_rate": 6.502101000766281e-07, "loss": 0.3576, "step": 39685 }, { "epoch": 0.8846851833620935, "grad_norm": 0.6556292772293091, "learning_rate": 6.489687448380211e-07, "loss": 0.2278, "step": 39690 }, { "epoch": 0.8847966327427136, "grad_norm": 0.8420238494873047, "learning_rate": 6.477285359636676e-07, "loss": 0.3398, "step": 39695 }, { "epoch": 0.8849080821233336, "grad_norm": 0.515235424041748, "learning_rate": 6.464894736056071e-07, "loss": 0.3074, "step": 39700 }, { "epoch": 0.8850195315039536, "grad_norm": 0.7593355774879456, "learning_rate": 6.45251557915737e-07, "loss": 0.3446, "step": 39705 }, { "epoch": 0.8851309808845738, "grad_norm": 0.5038763284683228, "learning_rate": 6.440147890458193e-07, "loss": 0.3875, "step": 39710 }, { "epoch": 0.8852424302651938, "grad_norm": 0.6667838096618652, "learning_rate": 6.427791671474704e-07, "loss": 0.3563, "step": 39715 }, { "epoch": 0.8853538796458139, "grad_norm": 0.38335031270980835, "learning_rate": 6.415446923721713e-07, "loss": 0.2846, "step": 39720 }, { "epoch": 0.8854653290264339, "grad_norm": 0.48969194293022156, "learning_rate": 6.403113648712555e-07, "loss": 0.197, "step": 39725 }, { "epoch": 0.885576778407054, "grad_norm": 0.6656966805458069, "learning_rate": 6.39079184795921e-07, "loss": 0.2357, "step": 39730 }, { "epoch": 0.8856882277876741, "grad_norm": 0.6289982795715332, "learning_rate": 6.378481522972236e-07, "loss": 0.2108, "step": 39735 }, { "epoch": 0.8857996771682941, "grad_norm": 0.48437291383743286, "learning_rate": 6.366182675260768e-07, "loss": 0.2386, "step": 39740 }, { "epoch": 0.8859111265489142, "grad_norm": 0.5738726258277893, "learning_rate": 6.353895306332558e-07, "loss": 0.2448, "step": 39745 }, { "epoch": 0.8860225759295343, "grad_norm": 0.8107031583786011, "learning_rate": 6.341619417693956e-07, "loss": 0.2105, "step": 39750 }, { "epoch": 0.8861340253101544, "grad_norm": 0.7784823179244995, "learning_rate": 6.32935501084988e-07, "loss": 0.2757, "step": 39755 }, { "epoch": 0.8862454746907744, "grad_norm": 0.67021644115448, "learning_rate": 6.317102087303861e-07, "loss": 0.3014, "step": 39760 }, { "epoch": 0.8863569240713944, "grad_norm": 0.6922481656074524, "learning_rate": 6.304860648558009e-07, "loss": 0.3157, "step": 39765 }, { "epoch": 0.8864683734520146, "grad_norm": 0.706314742565155, "learning_rate": 6.292630696113e-07, "loss": 0.1906, "step": 39770 }, { "epoch": 0.8865798228326346, "grad_norm": 0.4553152322769165, "learning_rate": 6.28041223146818e-07, "loss": 0.3109, "step": 39775 }, { "epoch": 0.8866912722132547, "grad_norm": 0.7962448596954346, "learning_rate": 6.268205256121396e-07, "loss": 0.3001, "step": 39780 }, { "epoch": 0.8868027215938747, "grad_norm": 0.744176983833313, "learning_rate": 6.256009771569171e-07, "loss": 0.2006, "step": 39785 }, { "epoch": 0.8869141709744948, "grad_norm": 0.7229616641998291, "learning_rate": 6.243825779306555e-07, "loss": 0.2824, "step": 39790 }, { "epoch": 0.8870256203551149, "grad_norm": 0.5249221920967102, "learning_rate": 6.231653280827211e-07, "loss": 0.3406, "step": 39795 }, { "epoch": 0.8871370697357349, "grad_norm": 0.48112252354621887, "learning_rate": 6.219492277623384e-07, "loss": 0.1427, "step": 39800 }, { "epoch": 0.887248519116355, "grad_norm": 0.5653254985809326, "learning_rate": 6.20734277118592e-07, "loss": 0.3752, "step": 39805 }, { "epoch": 0.8873599684969751, "grad_norm": 0.6261930465698242, "learning_rate": 6.19520476300427e-07, "loss": 0.2933, "step": 39810 }, { "epoch": 0.8874714178775951, "grad_norm": 0.2286958545446396, "learning_rate": 6.183078254566466e-07, "loss": 0.1827, "step": 39815 }, { "epoch": 0.8875828672582152, "grad_norm": 0.706714928150177, "learning_rate": 6.170963247359119e-07, "loss": 0.2767, "step": 39820 }, { "epoch": 0.8876943166388352, "grad_norm": 0.6469764113426208, "learning_rate": 6.158859742867418e-07, "loss": 0.2535, "step": 39825 }, { "epoch": 0.8878057660194554, "grad_norm": 0.4051867723464966, "learning_rate": 6.146767742575178e-07, "loss": 0.2556, "step": 39830 }, { "epoch": 0.8879172154000754, "grad_norm": 0.3999421000480652, "learning_rate": 6.134687247964754e-07, "loss": 0.2969, "step": 39835 }, { "epoch": 0.8880286647806955, "grad_norm": 0.43835553526878357, "learning_rate": 6.122618260517166e-07, "loss": 0.1711, "step": 39840 }, { "epoch": 0.8881401141613156, "grad_norm": 0.649053156375885, "learning_rate": 6.110560781711938e-07, "loss": 0.3861, "step": 39845 }, { "epoch": 0.8882515635419356, "grad_norm": 0.9673153162002563, "learning_rate": 6.098514813027256e-07, "loss": 0.2902, "step": 39850 }, { "epoch": 0.8883630129225557, "grad_norm": 0.2699918746948242, "learning_rate": 6.08648035593985e-07, "loss": 0.1352, "step": 39855 }, { "epoch": 0.8884744623031757, "grad_norm": 0.964161217212677, "learning_rate": 6.074457411925061e-07, "loss": 0.2884, "step": 39860 }, { "epoch": 0.8885859116837959, "grad_norm": 0.8768846392631531, "learning_rate": 6.062445982456777e-07, "loss": 0.3667, "step": 39865 }, { "epoch": 0.8886973610644159, "grad_norm": 0.6985152363777161, "learning_rate": 6.050446069007543e-07, "loss": 0.2767, "step": 39870 }, { "epoch": 0.8888088104450359, "grad_norm": 0.7158140540122986, "learning_rate": 6.038457673048448e-07, "loss": 0.2725, "step": 39875 }, { "epoch": 0.888920259825656, "grad_norm": 0.6063166856765747, "learning_rate": 6.02648079604915e-07, "loss": 0.1417, "step": 39880 }, { "epoch": 0.889031709206276, "grad_norm": 0.4928998649120331, "learning_rate": 6.014515439477952e-07, "loss": 0.2431, "step": 39885 }, { "epoch": 0.8891431585868962, "grad_norm": 0.300959050655365, "learning_rate": 6.002561604801715e-07, "loss": 0.2616, "step": 39890 }, { "epoch": 0.8892546079675162, "grad_norm": 0.9015136361122131, "learning_rate": 5.990619293485866e-07, "loss": 0.3326, "step": 39895 }, { "epoch": 0.8893660573481363, "grad_norm": 0.6377741694450378, "learning_rate": 5.978688506994446e-07, "loss": 0.2272, "step": 39900 }, { "epoch": 0.8894775067287564, "grad_norm": 0.7506493926048279, "learning_rate": 5.966769246790094e-07, "loss": 0.3169, "step": 39905 }, { "epoch": 0.8895889561093764, "grad_norm": 0.6252882480621338, "learning_rate": 5.954861514333999e-07, "loss": 0.329, "step": 39910 }, { "epoch": 0.8897004054899965, "grad_norm": 0.7580475807189941, "learning_rate": 5.942965311085957e-07, "loss": 0.3943, "step": 39915 }, { "epoch": 0.8898118548706165, "grad_norm": 0.7877764701843262, "learning_rate": 5.931080638504382e-07, "loss": 0.3409, "step": 39920 }, { "epoch": 0.8899233042512367, "grad_norm": 0.3911045491695404, "learning_rate": 5.919207498046209e-07, "loss": 0.2998, "step": 39925 }, { "epoch": 0.8900347536318567, "grad_norm": 0.934512734413147, "learning_rate": 5.907345891166993e-07, "loss": 0.2028, "step": 39930 }, { "epoch": 0.8901462030124767, "grad_norm": 0.7734924554824829, "learning_rate": 5.895495819320896e-07, "loss": 0.3299, "step": 39935 }, { "epoch": 0.8902576523930968, "grad_norm": 0.6034185886383057, "learning_rate": 5.883657283960642e-07, "loss": 0.3106, "step": 39940 }, { "epoch": 0.8903691017737169, "grad_norm": 0.8035076260566711, "learning_rate": 5.871830286537539e-07, "loss": 0.329, "step": 39945 }, { "epoch": 0.890480551154337, "grad_norm": 0.7638542056083679, "learning_rate": 5.86001482850147e-07, "loss": 0.324, "step": 39950 }, { "epoch": 0.890592000534957, "grad_norm": 0.7222042679786682, "learning_rate": 5.848210911300945e-07, "loss": 0.2364, "step": 39955 }, { "epoch": 0.890703449915577, "grad_norm": 0.7595744132995605, "learning_rate": 5.836418536383015e-07, "loss": 0.3364, "step": 39960 }, { "epoch": 0.8908148992961972, "grad_norm": 0.7978435754776001, "learning_rate": 5.824637705193348e-07, "loss": 0.254, "step": 39965 }, { "epoch": 0.8909263486768172, "grad_norm": 0.6424942016601562, "learning_rate": 5.812868419176176e-07, "loss": 0.3046, "step": 39970 }, { "epoch": 0.8910377980574373, "grad_norm": 0.38849854469299316, "learning_rate": 5.801110679774325e-07, "loss": 0.2289, "step": 39975 }, { "epoch": 0.8911492474380573, "grad_norm": 0.2390509694814682, "learning_rate": 5.789364488429205e-07, "loss": 0.3562, "step": 39980 }, { "epoch": 0.8912606968186775, "grad_norm": 0.15910890698432922, "learning_rate": 5.77762984658079e-07, "loss": 0.2501, "step": 39985 }, { "epoch": 0.8913721461992975, "grad_norm": 0.932104766368866, "learning_rate": 5.765906755667682e-07, "loss": 0.3042, "step": 39990 }, { "epoch": 0.8914835955799175, "grad_norm": 0.34565722942352295, "learning_rate": 5.754195217127013e-07, "loss": 0.2862, "step": 39995 }, { "epoch": 0.8915950449605377, "grad_norm": 0.7554762959480286, "learning_rate": 5.742495232394562e-07, "loss": 0.333, "step": 40000 }, { "epoch": 0.8917064943411577, "grad_norm": 0.5055221319198608, "learning_rate": 5.730806802904631e-07, "loss": 0.241, "step": 40005 }, { "epoch": 0.8918179437217778, "grad_norm": 0.5110387206077576, "learning_rate": 5.719129930090151e-07, "loss": 0.3625, "step": 40010 }, { "epoch": 0.8919293931023978, "grad_norm": 0.8160702586174011, "learning_rate": 5.707464615382597e-07, "loss": 0.1209, "step": 40015 }, { "epoch": 0.8920408424830178, "grad_norm": 0.7044849991798401, "learning_rate": 5.695810860212047e-07, "loss": 0.3725, "step": 40020 }, { "epoch": 0.892152291863638, "grad_norm": 0.6476200819015503, "learning_rate": 5.684168666007162e-07, "loss": 0.3231, "step": 40025 }, { "epoch": 0.892263741244258, "grad_norm": 0.704243004322052, "learning_rate": 5.672538034195218e-07, "loss": 0.3767, "step": 40030 }, { "epoch": 0.8923751906248781, "grad_norm": 0.6323098540306091, "learning_rate": 5.660918966202001e-07, "loss": 0.2308, "step": 40035 }, { "epoch": 0.8924866400054982, "grad_norm": 0.4053361713886261, "learning_rate": 5.649311463451945e-07, "loss": 0.2657, "step": 40040 }, { "epoch": 0.8925980893861183, "grad_norm": 0.7476621270179749, "learning_rate": 5.637715527368015e-07, "loss": 0.3113, "step": 40045 }, { "epoch": 0.8927095387667383, "grad_norm": 0.4209010601043701, "learning_rate": 5.626131159371794e-07, "loss": 0.2733, "step": 40050 }, { "epoch": 0.8928209881473583, "grad_norm": 0.82071453332901, "learning_rate": 5.614558360883426e-07, "loss": 0.2244, "step": 40055 }, { "epoch": 0.8929324375279785, "grad_norm": 0.9399155378341675, "learning_rate": 5.602997133321686e-07, "loss": 0.3318, "step": 40060 }, { "epoch": 0.8930438869085985, "grad_norm": 0.5941539406776428, "learning_rate": 5.591447478103862e-07, "loss": 0.3065, "step": 40065 }, { "epoch": 0.8931553362892186, "grad_norm": 0.8494892716407776, "learning_rate": 5.579909396645844e-07, "loss": 0.2649, "step": 40070 }, { "epoch": 0.8932667856698386, "grad_norm": 0.5023177266120911, "learning_rate": 5.568382890362134e-07, "loss": 0.2635, "step": 40075 }, { "epoch": 0.8933782350504587, "grad_norm": 1.0533781051635742, "learning_rate": 5.556867960665768e-07, "loss": 0.2732, "step": 40080 }, { "epoch": 0.8934896844310788, "grad_norm": 0.6501057744026184, "learning_rate": 5.545364608968407e-07, "loss": 0.2353, "step": 40085 }, { "epoch": 0.8936011338116988, "grad_norm": 0.7864367961883545, "learning_rate": 5.533872836680254e-07, "loss": 0.2648, "step": 40090 }, { "epoch": 0.8937125831923189, "grad_norm": 0.5219927430152893, "learning_rate": 5.52239264521014e-07, "loss": 0.2553, "step": 40095 }, { "epoch": 0.893824032572939, "grad_norm": 0.44724053144454956, "learning_rate": 5.510924035965437e-07, "loss": 0.277, "step": 40100 }, { "epoch": 0.8939354819535591, "grad_norm": 0.5169790387153625, "learning_rate": 5.4994670103521e-07, "loss": 0.2414, "step": 40105 }, { "epoch": 0.8940469313341791, "grad_norm": 0.5901083946228027, "learning_rate": 5.488021569774682e-07, "loss": 0.2424, "step": 40110 }, { "epoch": 0.8941583807147991, "grad_norm": 0.42854076623916626, "learning_rate": 5.476587715636273e-07, "loss": 0.2814, "step": 40115 }, { "epoch": 0.8942698300954193, "grad_norm": 0.5647521615028381, "learning_rate": 5.465165449338628e-07, "loss": 0.331, "step": 40120 }, { "epoch": 0.8943812794760393, "grad_norm": 0.4333915710449219, "learning_rate": 5.453754772281972e-07, "loss": 0.2925, "step": 40125 }, { "epoch": 0.8944927288566594, "grad_norm": 0.3756270706653595, "learning_rate": 5.442355685865219e-07, "loss": 0.2895, "step": 40130 }, { "epoch": 0.8946041782372794, "grad_norm": 0.6410927176475525, "learning_rate": 5.430968191485797e-07, "loss": 0.4226, "step": 40135 }, { "epoch": 0.8947156276178995, "grad_norm": 0.5165871977806091, "learning_rate": 5.4195922905397e-07, "loss": 0.2558, "step": 40140 }, { "epoch": 0.8948270769985196, "grad_norm": 0.5397159457206726, "learning_rate": 5.408227984421521e-07, "loss": 0.3183, "step": 40145 }, { "epoch": 0.8949385263791396, "grad_norm": 0.7090404629707336, "learning_rate": 5.396875274524482e-07, "loss": 0.173, "step": 40150 }, { "epoch": 0.8950499757597598, "grad_norm": 0.9219658374786377, "learning_rate": 5.385534162240303e-07, "loss": 0.2953, "step": 40155 }, { "epoch": 0.8951614251403798, "grad_norm": 0.5412634611129761, "learning_rate": 5.374204648959314e-07, "loss": 0.3092, "step": 40160 }, { "epoch": 0.8952728745209998, "grad_norm": 0.6510096788406372, "learning_rate": 5.36288673607045e-07, "loss": 0.2088, "step": 40165 }, { "epoch": 0.8953843239016199, "grad_norm": 0.2909053564071655, "learning_rate": 5.351580424961178e-07, "loss": 0.3199, "step": 40170 }, { "epoch": 0.89549577328224, "grad_norm": 0.5638686418533325, "learning_rate": 5.340285717017568e-07, "loss": 0.2127, "step": 40175 }, { "epoch": 0.8956072226628601, "grad_norm": 0.5359810590744019, "learning_rate": 5.329002613624279e-07, "loss": 0.2579, "step": 40180 }, { "epoch": 0.8957186720434801, "grad_norm": 0.5342158675193787, "learning_rate": 5.317731116164515e-07, "loss": 0.2506, "step": 40185 }, { "epoch": 0.8958301214241002, "grad_norm": 0.5907591581344604, "learning_rate": 5.306471226020082e-07, "loss": 0.1526, "step": 40190 }, { "epoch": 0.8959415708047203, "grad_norm": 1.4923869371414185, "learning_rate": 5.295222944571365e-07, "loss": 0.3076, "step": 40195 }, { "epoch": 0.8960530201853403, "grad_norm": 0.38972416520118713, "learning_rate": 5.283986273197284e-07, "loss": 0.2718, "step": 40200 }, { "epoch": 0.8961644695659604, "grad_norm": 0.6770171523094177, "learning_rate": 5.272761213275413e-07, "loss": 0.3058, "step": 40205 }, { "epoch": 0.8962759189465804, "grad_norm": 0.23331047594547272, "learning_rate": 5.261547766181818e-07, "loss": 0.271, "step": 40210 }, { "epoch": 0.8963873683272006, "grad_norm": 0.611391007900238, "learning_rate": 5.250345933291201e-07, "loss": 0.2069, "step": 40215 }, { "epoch": 0.8964988177078206, "grad_norm": 0.4834649860858917, "learning_rate": 5.239155715976829e-07, "loss": 0.2186, "step": 40220 }, { "epoch": 0.8966102670884406, "grad_norm": 0.5032212734222412, "learning_rate": 5.227977115610528e-07, "loss": 0.2942, "step": 40225 }, { "epoch": 0.8967217164690607, "grad_norm": 0.4694267511367798, "learning_rate": 5.216810133562689e-07, "loss": 0.3242, "step": 40230 }, { "epoch": 0.8968331658496808, "grad_norm": 0.5364362597465515, "learning_rate": 5.205654771202317e-07, "loss": 0.208, "step": 40235 }, { "epoch": 0.8969446152303009, "grad_norm": 0.452486127614975, "learning_rate": 5.194511029896965e-07, "loss": 0.2727, "step": 40240 }, { "epoch": 0.8970560646109209, "grad_norm": 0.24721093475818634, "learning_rate": 5.183378911012782e-07, "loss": 0.2061, "step": 40245 }, { "epoch": 0.897167513991541, "grad_norm": 0.5605514049530029, "learning_rate": 5.172258415914478e-07, "loss": 0.2209, "step": 40250 }, { "epoch": 0.8972789633721611, "grad_norm": 0.5760518312454224, "learning_rate": 5.161149545965328e-07, "loss": 0.2893, "step": 40255 }, { "epoch": 0.8973904127527811, "grad_norm": 0.516250729560852, "learning_rate": 5.150052302527208e-07, "loss": 0.2429, "step": 40260 }, { "epoch": 0.8975018621334012, "grad_norm": 0.14454306662082672, "learning_rate": 5.138966686960534e-07, "loss": 0.2113, "step": 40265 }, { "epoch": 0.8976133115140212, "grad_norm": 0.6639364957809448, "learning_rate": 5.127892700624326e-07, "loss": 0.2789, "step": 40270 }, { "epoch": 0.8977247608946414, "grad_norm": 0.5296926498413086, "learning_rate": 5.116830344876178e-07, "loss": 0.2151, "step": 40275 }, { "epoch": 0.8978362102752614, "grad_norm": 0.7752794623374939, "learning_rate": 5.10577962107226e-07, "loss": 0.3579, "step": 40280 }, { "epoch": 0.8979476596558814, "grad_norm": 0.5278540253639221, "learning_rate": 5.094740530567277e-07, "loss": 0.1673, "step": 40285 }, { "epoch": 0.8980591090365015, "grad_norm": 0.7104523777961731, "learning_rate": 5.083713074714547e-07, "loss": 0.2614, "step": 40290 }, { "epoch": 0.8981705584171216, "grad_norm": 0.8003318905830383, "learning_rate": 5.072697254865966e-07, "loss": 0.4819, "step": 40295 }, { "epoch": 0.8982820077977417, "grad_norm": 0.7443469166755676, "learning_rate": 5.061693072371953e-07, "loss": 0.1905, "step": 40300 }, { "epoch": 0.8983934571783617, "grad_norm": 0.5459474325180054, "learning_rate": 5.050700528581554e-07, "loss": 0.3179, "step": 40305 }, { "epoch": 0.8985049065589819, "grad_norm": 1.0226274728775024, "learning_rate": 5.039719624842398e-07, "loss": 0.3921, "step": 40310 }, { "epoch": 0.8986163559396019, "grad_norm": 0.3113342821598053, "learning_rate": 5.028750362500633e-07, "loss": 0.3065, "step": 40315 }, { "epoch": 0.8987278053202219, "grad_norm": 0.8021506667137146, "learning_rate": 5.017792742901006e-07, "loss": 0.3777, "step": 40320 }, { "epoch": 0.898839254700842, "grad_norm": 0.36907243728637695, "learning_rate": 5.006846767386831e-07, "loss": 0.2731, "step": 40325 }, { "epoch": 0.898950704081462, "grad_norm": 0.45655930042266846, "learning_rate": 4.995912437299999e-07, "loss": 0.2021, "step": 40330 }, { "epoch": 0.8990621534620822, "grad_norm": 0.7580198645591736, "learning_rate": 4.984989753981007e-07, "loss": 0.2069, "step": 40335 }, { "epoch": 0.8991736028427022, "grad_norm": 0.5341771841049194, "learning_rate": 4.974078718768837e-07, "loss": 0.3496, "step": 40340 }, { "epoch": 0.8992850522233222, "grad_norm": 0.809218168258667, "learning_rate": 4.963179333001156e-07, "loss": 0.2558, "step": 40345 }, { "epoch": 0.8993965016039424, "grad_norm": 0.488911509513855, "learning_rate": 4.952291598014114e-07, "loss": 0.2288, "step": 40350 }, { "epoch": 0.8995079509845624, "grad_norm": 0.5416426062583923, "learning_rate": 4.941415515142467e-07, "loss": 0.2644, "step": 40355 }, { "epoch": 0.8996194003651825, "grad_norm": 0.9903549551963806, "learning_rate": 4.930551085719515e-07, "loss": 0.2549, "step": 40360 }, { "epoch": 0.8997308497458025, "grad_norm": 0.4512089490890503, "learning_rate": 4.919698311077203e-07, "loss": 0.2033, "step": 40365 }, { "epoch": 0.8998422991264226, "grad_norm": 0.7078744769096375, "learning_rate": 4.908857192545958e-07, "loss": 0.2584, "step": 40370 }, { "epoch": 0.8999537485070427, "grad_norm": 0.7992226481437683, "learning_rate": 4.898027731454824e-07, "loss": 0.2492, "step": 40375 }, { "epoch": 0.9000651978876627, "grad_norm": 0.7031039595603943, "learning_rate": 4.887209929131431e-07, "loss": 0.2458, "step": 40380 }, { "epoch": 0.9001766472682828, "grad_norm": 0.5648842453956604, "learning_rate": 4.876403786901939e-07, "loss": 0.2639, "step": 40385 }, { "epoch": 0.9002880966489029, "grad_norm": 0.8292831182479858, "learning_rate": 4.865609306091101e-07, "loss": 0.3474, "step": 40390 }, { "epoch": 0.900399546029523, "grad_norm": 0.8248302340507507, "learning_rate": 4.854826488022235e-07, "loss": 0.2959, "step": 40395 }, { "epoch": 0.900510995410143, "grad_norm": 0.6083874702453613, "learning_rate": 4.844055334017228e-07, "loss": 0.2092, "step": 40400 }, { "epoch": 0.900622444790763, "grad_norm": 0.8103845715522766, "learning_rate": 4.833295845396558e-07, "loss": 0.2423, "step": 40405 }, { "epoch": 0.9007338941713832, "grad_norm": 0.6734932661056519, "learning_rate": 4.822548023479234e-07, "loss": 0.1936, "step": 40410 }, { "epoch": 0.9008453435520032, "grad_norm": 0.7690288424491882, "learning_rate": 4.811811869582872e-07, "loss": 0.3171, "step": 40415 }, { "epoch": 0.9009567929326233, "grad_norm": 0.8907411098480225, "learning_rate": 4.801087385023629e-07, "loss": 0.2316, "step": 40420 }, { "epoch": 0.9010682423132433, "grad_norm": 0.8934478759765625, "learning_rate": 4.79037457111624e-07, "loss": 0.19, "step": 40425 }, { "epoch": 0.9011796916938634, "grad_norm": 0.38658207654953003, "learning_rate": 4.779673429174036e-07, "loss": 0.2257, "step": 40430 }, { "epoch": 0.9012911410744835, "grad_norm": 0.5999665260314941, "learning_rate": 4.768983960508888e-07, "loss": 0.2945, "step": 40435 }, { "epoch": 0.9014025904551035, "grad_norm": 0.6051561236381531, "learning_rate": 4.758306166431226e-07, "loss": 0.2874, "step": 40440 }, { "epoch": 0.9015140398357236, "grad_norm": 0.7040433287620544, "learning_rate": 4.747640048250068e-07, "loss": 0.3253, "step": 40445 }, { "epoch": 0.9016254892163437, "grad_norm": 0.5505911111831665, "learning_rate": 4.736985607273026e-07, "loss": 0.1973, "step": 40450 }, { "epoch": 0.9017369385969638, "grad_norm": 0.6418886780738831, "learning_rate": 4.726342844806209e-07, "loss": 0.3427, "step": 40455 }, { "epoch": 0.9018483879775838, "grad_norm": 0.46738046407699585, "learning_rate": 4.715711762154362e-07, "loss": 0.3277, "step": 40460 }, { "epoch": 0.9019598373582038, "grad_norm": 1.1336296796798706, "learning_rate": 4.7050923606207886e-07, "loss": 0.3439, "step": 40465 }, { "epoch": 0.902071286738824, "grad_norm": 0.5736339688301086, "learning_rate": 4.694484641507324e-07, "loss": 0.3776, "step": 40470 }, { "epoch": 0.902182736119444, "grad_norm": 0.4404354989528656, "learning_rate": 4.683888606114384e-07, "loss": 0.2393, "step": 40475 }, { "epoch": 0.9022941855000641, "grad_norm": 0.4591207206249237, "learning_rate": 4.673304255740974e-07, "loss": 0.3282, "step": 40480 }, { "epoch": 0.9024056348806841, "grad_norm": 0.9725980162620544, "learning_rate": 4.662731591684655e-07, "loss": 0.2651, "step": 40485 }, { "epoch": 0.9025170842613042, "grad_norm": 0.6524075269699097, "learning_rate": 4.652170615241558e-07, "loss": 0.3794, "step": 40490 }, { "epoch": 0.9026285336419243, "grad_norm": 0.6163941025733948, "learning_rate": 4.6416213277063693e-07, "loss": 0.2641, "step": 40495 }, { "epoch": 0.9027399830225443, "grad_norm": 0.8426482677459717, "learning_rate": 4.6310837303723546e-07, "loss": 0.3196, "step": 40500 }, { "epoch": 0.9028514324031645, "grad_norm": 0.47756075859069824, "learning_rate": 4.6205578245313356e-07, "loss": 0.294, "step": 40505 }, { "epoch": 0.9029628817837845, "grad_norm": 0.5749417543411255, "learning_rate": 4.6100436114737246e-07, "loss": 0.2196, "step": 40510 }, { "epoch": 0.9030743311644046, "grad_norm": 0.8055911064147949, "learning_rate": 4.5995410924884464e-07, "loss": 0.3331, "step": 40515 }, { "epoch": 0.9031857805450246, "grad_norm": 0.5792744159698486, "learning_rate": 4.5890502688630487e-07, "loss": 0.3535, "step": 40520 }, { "epoch": 0.9032972299256447, "grad_norm": 0.31290754675865173, "learning_rate": 4.5785711418836473e-07, "loss": 0.4213, "step": 40525 }, { "epoch": 0.9034086793062648, "grad_norm": 0.27813515067100525, "learning_rate": 4.5681037128348816e-07, "loss": 0.2462, "step": 40530 }, { "epoch": 0.9035201286868848, "grad_norm": 0.9764168858528137, "learning_rate": 4.55764798299998e-07, "loss": 0.2968, "step": 40535 }, { "epoch": 0.9036315780675049, "grad_norm": 0.5820105671882629, "learning_rate": 4.547203953660728e-07, "loss": 0.3481, "step": 40540 }, { "epoch": 0.903743027448125, "grad_norm": 0.6162011623382568, "learning_rate": 4.5367716260974916e-07, "loss": 0.3021, "step": 40545 }, { "epoch": 0.903854476828745, "grad_norm": 0.5713285803794861, "learning_rate": 4.5263510015891796e-07, "loss": 0.2988, "step": 40550 }, { "epoch": 0.9039659262093651, "grad_norm": 0.3402009606361389, "learning_rate": 4.5159420814133047e-07, "loss": 0.1795, "step": 40555 }, { "epoch": 0.9040773755899851, "grad_norm": 0.6031463146209717, "learning_rate": 4.505544866845901e-07, "loss": 0.2681, "step": 40560 }, { "epoch": 0.9041888249706053, "grad_norm": 0.746756911277771, "learning_rate": 4.495159359161605e-07, "loss": 0.2348, "step": 40565 }, { "epoch": 0.9043002743512253, "grad_norm": 1.0207500457763672, "learning_rate": 4.484785559633575e-07, "loss": 0.2689, "step": 40570 }, { "epoch": 0.9044117237318453, "grad_norm": 0.7450649738311768, "learning_rate": 4.474423469533562e-07, "loss": 0.2491, "step": 40575 }, { "epoch": 0.9045231731124654, "grad_norm": 0.6365007162094116, "learning_rate": 4.464073090131904e-07, "loss": 0.3264, "step": 40580 }, { "epoch": 0.9046346224930855, "grad_norm": 0.9207115769386292, "learning_rate": 4.4537344226974533e-07, "loss": 0.2552, "step": 40585 }, { "epoch": 0.9047460718737056, "grad_norm": 0.5804201364517212, "learning_rate": 4.4434074684976624e-07, "loss": 0.2411, "step": 40590 }, { "epoch": 0.9048575212543256, "grad_norm": 0.6803749203681946, "learning_rate": 4.43309222879853e-07, "loss": 0.3159, "step": 40595 }, { "epoch": 0.9049689706349457, "grad_norm": 0.34516510367393494, "learning_rate": 4.4227887048646335e-07, "loss": 0.2359, "step": 40600 }, { "epoch": 0.9050804200155658, "grad_norm": 0.8290709853172302, "learning_rate": 4.4124968979590953e-07, "loss": 0.2248, "step": 40605 }, { "epoch": 0.9051918693961858, "grad_norm": 0.6502459049224854, "learning_rate": 4.402216809343607e-07, "loss": 0.2735, "step": 40610 }, { "epoch": 0.9053033187768059, "grad_norm": 0.6392561793327332, "learning_rate": 4.3919484402784483e-07, "loss": 0.2153, "step": 40615 }, { "epoch": 0.9054147681574259, "grad_norm": 0.6268975734710693, "learning_rate": 4.3816917920224134e-07, "loss": 0.2836, "step": 40620 }, { "epoch": 0.9055262175380461, "grad_norm": 0.6285988688468933, "learning_rate": 4.371446865832918e-07, "loss": 0.2421, "step": 40625 }, { "epoch": 0.9056376669186661, "grad_norm": 0.863253653049469, "learning_rate": 4.3612136629659017e-07, "loss": 0.2221, "step": 40630 }, { "epoch": 0.9057491162992861, "grad_norm": 0.4006742238998413, "learning_rate": 4.350992184675873e-07, "loss": 0.2495, "step": 40635 }, { "epoch": 0.9058605656799062, "grad_norm": 0.44644999504089355, "learning_rate": 4.3407824322158844e-07, "loss": 0.2173, "step": 40640 }, { "epoch": 0.9059720150605263, "grad_norm": 0.7369060516357422, "learning_rate": 4.3305844068376125e-07, "loss": 0.2913, "step": 40645 }, { "epoch": 0.9060834644411464, "grad_norm": 0.7623095512390137, "learning_rate": 4.320398109791235e-07, "loss": 0.2909, "step": 40650 }, { "epoch": 0.9061949138217664, "grad_norm": 0.6151301860809326, "learning_rate": 4.310223542325509e-07, "loss": 0.1126, "step": 40655 }, { "epoch": 0.9063063632023866, "grad_norm": 0.7170618772506714, "learning_rate": 4.3000607056877697e-07, "loss": 0.2357, "step": 40660 }, { "epoch": 0.9064178125830066, "grad_norm": 0.5875582695007324, "learning_rate": 4.2899096011238986e-07, "loss": 0.2537, "step": 40665 }, { "epoch": 0.9065292619636266, "grad_norm": 0.5494327545166016, "learning_rate": 4.2797702298783327e-07, "loss": 0.3221, "step": 40670 }, { "epoch": 0.9066407113442467, "grad_norm": 0.7645689249038696, "learning_rate": 4.269642593194101e-07, "loss": 0.2921, "step": 40675 }, { "epoch": 0.9067521607248668, "grad_norm": 0.5476593375205994, "learning_rate": 4.2595266923127654e-07, "loss": 0.3128, "step": 40680 }, { "epoch": 0.9068636101054869, "grad_norm": 0.39104554057121277, "learning_rate": 4.249422528474456e-07, "loss": 0.3036, "step": 40685 }, { "epoch": 0.9069750594861069, "grad_norm": 0.6466798186302185, "learning_rate": 4.239330102917838e-07, "loss": 0.2348, "step": 40690 }, { "epoch": 0.9070865088667269, "grad_norm": 0.5702161192893982, "learning_rate": 4.229249416880221e-07, "loss": 0.2807, "step": 40695 }, { "epoch": 0.9071979582473471, "grad_norm": 0.7513668537139893, "learning_rate": 4.2191804715973725e-07, "loss": 0.2557, "step": 40700 }, { "epoch": 0.9073094076279671, "grad_norm": 0.6457399725914001, "learning_rate": 4.209123268303683e-07, "loss": 0.2531, "step": 40705 }, { "epoch": 0.9074208570085872, "grad_norm": 0.683296263217926, "learning_rate": 4.1990778082320993e-07, "loss": 0.2555, "step": 40710 }, { "epoch": 0.9075323063892072, "grad_norm": 0.4363429844379425, "learning_rate": 4.189044092614103e-07, "loss": 0.2557, "step": 40715 }, { "epoch": 0.9076437557698273, "grad_norm": 1.0468658208847046, "learning_rate": 4.179022122679754e-07, "loss": 0.2291, "step": 40720 }, { "epoch": 0.9077552051504474, "grad_norm": 0.5416340231895447, "learning_rate": 4.169011899657671e-07, "loss": 0.2986, "step": 40725 }, { "epoch": 0.9078666545310674, "grad_norm": 0.45224234461784363, "learning_rate": 4.159013424775016e-07, "loss": 0.1981, "step": 40730 }, { "epoch": 0.9079781039116875, "grad_norm": 0.7300019264221191, "learning_rate": 4.14902669925753e-07, "loss": 0.2268, "step": 40735 }, { "epoch": 0.9080895532923076, "grad_norm": 0.6923551559448242, "learning_rate": 4.1390517243295236e-07, "loss": 0.3246, "step": 40740 }, { "epoch": 0.9082010026729277, "grad_norm": 0.7699552178382874, "learning_rate": 4.1290885012138514e-07, "loss": 0.3116, "step": 40745 }, { "epoch": 0.9083124520535477, "grad_norm": 0.6291413307189941, "learning_rate": 4.119137031131926e-07, "loss": 0.2966, "step": 40750 }, { "epoch": 0.9084239014341677, "grad_norm": 0.3823592960834503, "learning_rate": 4.109197315303703e-07, "loss": 0.2266, "step": 40755 }, { "epoch": 0.9085353508147879, "grad_norm": 0.6914843320846558, "learning_rate": 4.09926935494771e-07, "loss": 0.3038, "step": 40760 }, { "epoch": 0.9086468001954079, "grad_norm": 0.8302752375602722, "learning_rate": 4.089353151281061e-07, "loss": 0.3805, "step": 40765 }, { "epoch": 0.908758249576028, "grad_norm": 0.7488309741020203, "learning_rate": 4.079448705519418e-07, "loss": 0.2704, "step": 40770 }, { "epoch": 0.908869698956648, "grad_norm": 0.5267086029052734, "learning_rate": 4.069556018876963e-07, "loss": 0.2928, "step": 40775 }, { "epoch": 0.9089811483372681, "grad_norm": 0.5103002190589905, "learning_rate": 4.059675092566462e-07, "loss": 0.1929, "step": 40780 }, { "epoch": 0.9090925977178882, "grad_norm": 0.476452499628067, "learning_rate": 4.0498059277992665e-07, "loss": 0.3, "step": 40785 }, { "epoch": 0.9092040470985082, "grad_norm": 0.5397807955741882, "learning_rate": 4.03994852578522e-07, "loss": 0.2652, "step": 40790 }, { "epoch": 0.9093154964791283, "grad_norm": 0.4290942847728729, "learning_rate": 4.030102887732801e-07, "loss": 0.3166, "step": 40795 }, { "epoch": 0.9094269458597484, "grad_norm": 0.2386811226606369, "learning_rate": 4.0202690148489766e-07, "loss": 0.3247, "step": 40800 }, { "epoch": 0.9095383952403685, "grad_norm": 0.898285448551178, "learning_rate": 4.010446908339338e-07, "loss": 0.3954, "step": 40805 }, { "epoch": 0.9096498446209885, "grad_norm": 0.5822149515151978, "learning_rate": 4.000636569407978e-07, "loss": 0.2307, "step": 40810 }, { "epoch": 0.9097612940016085, "grad_norm": 0.7847784757614136, "learning_rate": 3.9908379992575795e-07, "loss": 0.2917, "step": 40815 }, { "epoch": 0.9098727433822287, "grad_norm": 0.6533625721931458, "learning_rate": 3.9810511990893696e-07, "loss": 0.2921, "step": 40820 }, { "epoch": 0.9099841927628487, "grad_norm": 0.5961136817932129, "learning_rate": 3.971276170103111e-07, "loss": 0.3071, "step": 40825 }, { "epoch": 0.9100956421434688, "grad_norm": 0.513742983341217, "learning_rate": 3.9615129134971786e-07, "loss": 0.2203, "step": 40830 }, { "epoch": 0.9102070915240889, "grad_norm": 0.7879976630210876, "learning_rate": 3.951761430468448e-07, "loss": 0.2818, "step": 40835 }, { "epoch": 0.9103185409047089, "grad_norm": 0.7775445580482483, "learning_rate": 3.9420217222123967e-07, "loss": 0.2644, "step": 40840 }, { "epoch": 0.910429990285329, "grad_norm": 0.40710213780403137, "learning_rate": 3.932293789923036e-07, "loss": 0.4258, "step": 40845 }, { "epoch": 0.910541439665949, "grad_norm": 0.8035426139831543, "learning_rate": 3.9225776347929235e-07, "loss": 0.318, "step": 40850 }, { "epoch": 0.9106528890465692, "grad_norm": 0.41157716512680054, "learning_rate": 3.9128732580131725e-07, "loss": 0.2158, "step": 40855 }, { "epoch": 0.9107643384271892, "grad_norm": 0.32904186844825745, "learning_rate": 3.9031806607734977e-07, "loss": 0.3042, "step": 40860 }, { "epoch": 0.9108757878078093, "grad_norm": 0.37337726354599, "learning_rate": 3.8934998442621165e-07, "loss": 0.3871, "step": 40865 }, { "epoch": 0.9109872371884293, "grad_norm": 0.6850005388259888, "learning_rate": 3.8838308096658227e-07, "loss": 0.2787, "step": 40870 }, { "epoch": 0.9110986865690494, "grad_norm": 1.088067889213562, "learning_rate": 3.874173558169969e-07, "loss": 0.3177, "step": 40875 }, { "epoch": 0.9112101359496695, "grad_norm": 0.7726697325706482, "learning_rate": 3.864528090958475e-07, "loss": 0.3719, "step": 40880 }, { "epoch": 0.9113215853302895, "grad_norm": 0.6238122582435608, "learning_rate": 3.854894409213761e-07, "loss": 0.2347, "step": 40885 }, { "epoch": 0.9114330347109096, "grad_norm": 0.8500913977622986, "learning_rate": 3.8452725141168824e-07, "loss": 0.3518, "step": 40890 }, { "epoch": 0.9115444840915297, "grad_norm": 0.8673303723335266, "learning_rate": 3.835662406847385e-07, "loss": 0.2818, "step": 40895 }, { "epoch": 0.9116559334721497, "grad_norm": 0.590812623500824, "learning_rate": 3.826064088583414e-07, "loss": 0.2046, "step": 40900 }, { "epoch": 0.9117673828527698, "grad_norm": 0.7240970730781555, "learning_rate": 3.816477560501619e-07, "loss": 0.3674, "step": 40905 }, { "epoch": 0.9118788322333898, "grad_norm": 0.5596110224723816, "learning_rate": 3.80690282377727e-07, "loss": 0.3576, "step": 40910 }, { "epoch": 0.91199028161401, "grad_norm": 0.7215169072151184, "learning_rate": 3.797339879584128e-07, "loss": 0.2859, "step": 40915 }, { "epoch": 0.91210173099463, "grad_norm": 0.6473619937896729, "learning_rate": 3.7877887290945434e-07, "loss": 0.2701, "step": 40920 }, { "epoch": 0.91221318037525, "grad_norm": 0.760945737361908, "learning_rate": 3.7782493734794143e-07, "loss": 0.2415, "step": 40925 }, { "epoch": 0.9123246297558701, "grad_norm": 0.5840770602226257, "learning_rate": 3.768721813908205e-07, "loss": 0.2049, "step": 40930 }, { "epoch": 0.9124360791364902, "grad_norm": 0.779276430606842, "learning_rate": 3.759206051548914e-07, "loss": 0.2785, "step": 40935 }, { "epoch": 0.9125475285171103, "grad_norm": 0.848493754863739, "learning_rate": 3.7497020875680746e-07, "loss": 0.3365, "step": 40940 }, { "epoch": 0.9126589778977303, "grad_norm": 0.7321879267692566, "learning_rate": 3.740209923130844e-07, "loss": 0.3718, "step": 40945 }, { "epoch": 0.9127704272783504, "grad_norm": 0.6048905253410339, "learning_rate": 3.730729559400847e-07, "loss": 0.2731, "step": 40950 }, { "epoch": 0.9128818766589705, "grad_norm": 0.444602906703949, "learning_rate": 3.7212609975403325e-07, "loss": 0.3882, "step": 40955 }, { "epoch": 0.9129933260395905, "grad_norm": 0.47648975253105164, "learning_rate": 3.7118042387100596e-07, "loss": 0.2263, "step": 40960 }, { "epoch": 0.9131047754202106, "grad_norm": 0.7234205007553101, "learning_rate": 3.7023592840693566e-07, "loss": 0.3095, "step": 40965 }, { "epoch": 0.9132162248008306, "grad_norm": 0.24275638163089752, "learning_rate": 3.692926134776109e-07, "loss": 0.3063, "step": 40970 }, { "epoch": 0.9133276741814508, "grad_norm": 0.5635351538658142, "learning_rate": 3.683504791986714e-07, "loss": 0.3308, "step": 40975 }, { "epoch": 0.9134391235620708, "grad_norm": 0.5409741401672363, "learning_rate": 3.674095256856192e-07, "loss": 0.2552, "step": 40980 }, { "epoch": 0.9135505729426908, "grad_norm": 0.574982762336731, "learning_rate": 3.664697530538086e-07, "loss": 0.2514, "step": 40985 }, { "epoch": 0.913662022323311, "grad_norm": 0.5560092329978943, "learning_rate": 3.6553116141844645e-07, "loss": 0.2257, "step": 40990 }, { "epoch": 0.913773471703931, "grad_norm": 0.5724308490753174, "learning_rate": 3.6459375089459735e-07, "loss": 0.3164, "step": 40995 }, { "epoch": 0.9138849210845511, "grad_norm": 0.5456295609474182, "learning_rate": 3.636575215971805e-07, "loss": 0.2546, "step": 41000 }, { "epoch": 0.9139963704651711, "grad_norm": 0.5563053488731384, "learning_rate": 3.6272247364096845e-07, "loss": 0.3636, "step": 41005 }, { "epoch": 0.9141078198457913, "grad_norm": 0.536551296710968, "learning_rate": 3.6178860714059406e-07, "loss": 0.2576, "step": 41010 }, { "epoch": 0.9142192692264113, "grad_norm": 0.515977144241333, "learning_rate": 3.608559222105401e-07, "loss": 0.3612, "step": 41015 }, { "epoch": 0.9143307186070313, "grad_norm": 0.6344241499900818, "learning_rate": 3.599244189651485e-07, "loss": 0.2691, "step": 41020 }, { "epoch": 0.9144421679876514, "grad_norm": 1.1275960206985474, "learning_rate": 3.589940975186135e-07, "loss": 0.2796, "step": 41025 }, { "epoch": 0.9145536173682715, "grad_norm": 0.8587217926979065, "learning_rate": 3.5806495798498486e-07, "loss": 0.4254, "step": 41030 }, { "epoch": 0.9146650667488916, "grad_norm": 0.6224008202552795, "learning_rate": 3.5713700047816715e-07, "loss": 0.3392, "step": 41035 }, { "epoch": 0.9147765161295116, "grad_norm": 0.7869055867195129, "learning_rate": 3.562102251119215e-07, "loss": 0.3612, "step": 41040 }, { "epoch": 0.9148879655101316, "grad_norm": 0.5396468043327332, "learning_rate": 3.5528463199986374e-07, "loss": 0.3232, "step": 41045 }, { "epoch": 0.9149994148907518, "grad_norm": 0.7791078090667725, "learning_rate": 3.543602212554642e-07, "loss": 0.3343, "step": 41050 }, { "epoch": 0.9151108642713718, "grad_norm": 1.2158949375152588, "learning_rate": 3.5343699299205003e-07, "loss": 0.1825, "step": 41055 }, { "epoch": 0.9152223136519919, "grad_norm": 0.45359018445014954, "learning_rate": 3.5251494732279957e-07, "loss": 0.3307, "step": 41060 }, { "epoch": 0.9153337630326119, "grad_norm": 0.6725892424583435, "learning_rate": 3.5159408436075015e-07, "loss": 0.3159, "step": 41065 }, { "epoch": 0.9154452124132321, "grad_norm": 0.4623555839061737, "learning_rate": 3.506744042187904e-07, "loss": 0.2987, "step": 41070 }, { "epoch": 0.9155566617938521, "grad_norm": 0.9277505278587341, "learning_rate": 3.497559070096679e-07, "loss": 0.2633, "step": 41075 }, { "epoch": 0.9156681111744721, "grad_norm": 0.7097349762916565, "learning_rate": 3.4883859284598254e-07, "loss": 0.2369, "step": 41080 }, { "epoch": 0.9157795605550922, "grad_norm": 0.5371698141098022, "learning_rate": 3.4792246184018997e-07, "loss": 0.277, "step": 41085 }, { "epoch": 0.9158910099357123, "grad_norm": 0.5579074621200562, "learning_rate": 3.4700751410460255e-07, "loss": 0.3128, "step": 41090 }, { "epoch": 0.9160024593163324, "grad_norm": 0.9541270732879639, "learning_rate": 3.4609374975138275e-07, "loss": 0.3005, "step": 41095 }, { "epoch": 0.9161139086969524, "grad_norm": 0.6161052584648132, "learning_rate": 3.4518116889255215e-07, "loss": 0.3291, "step": 41100 }, { "epoch": 0.9162253580775724, "grad_norm": 0.6096290349960327, "learning_rate": 3.442697716399879e-07, "loss": 0.1922, "step": 41105 }, { "epoch": 0.9163368074581926, "grad_norm": 0.8514531850814819, "learning_rate": 3.4335955810541834e-07, "loss": 0.3677, "step": 41110 }, { "epoch": 0.9164482568388126, "grad_norm": 0.49064114689826965, "learning_rate": 3.424505284004276e-07, "loss": 0.254, "step": 41115 }, { "epoch": 0.9165597062194327, "grad_norm": 0.7443819046020508, "learning_rate": 3.4154268263645983e-07, "loss": 0.2203, "step": 41120 }, { "epoch": 0.9166711556000527, "grad_norm": 1.1286894083023071, "learning_rate": 3.4063602092480606e-07, "loss": 0.2697, "step": 41125 }, { "epoch": 0.9167826049806728, "grad_norm": 0.33552834391593933, "learning_rate": 3.3973054337661737e-07, "loss": 0.2624, "step": 41130 }, { "epoch": 0.9168940543612929, "grad_norm": 0.5012408494949341, "learning_rate": 3.3882625010289717e-07, "loss": 0.2549, "step": 41135 }, { "epoch": 0.9170055037419129, "grad_norm": 0.7399274706840515, "learning_rate": 3.379231412145079e-07, "loss": 0.2953, "step": 41140 }, { "epoch": 0.917116953122533, "grad_norm": 0.6067442893981934, "learning_rate": 3.3702121682216094e-07, "loss": 0.1676, "step": 41145 }, { "epoch": 0.9172284025031531, "grad_norm": 1.101588249206543, "learning_rate": 3.361204770364246e-07, "loss": 0.3673, "step": 41150 }, { "epoch": 0.9173398518837732, "grad_norm": 0.7327398061752319, "learning_rate": 3.3522092196772605e-07, "loss": 0.3734, "step": 41155 }, { "epoch": 0.9174513012643932, "grad_norm": 0.6925585865974426, "learning_rate": 3.3432255172634153e-07, "loss": 0.1508, "step": 41160 }, { "epoch": 0.9175627506450132, "grad_norm": 0.6559704542160034, "learning_rate": 3.3342536642240296e-07, "loss": 0.3934, "step": 41165 }, { "epoch": 0.9176742000256334, "grad_norm": 0.5590237379074097, "learning_rate": 3.3252936616590125e-07, "loss": 0.3641, "step": 41170 }, { "epoch": 0.9177856494062534, "grad_norm": 0.913287878036499, "learning_rate": 3.3163455106667854e-07, "loss": 0.305, "step": 41175 }, { "epoch": 0.9178970987868735, "grad_norm": 0.7776272892951965, "learning_rate": 3.3074092123443036e-07, "loss": 0.2427, "step": 41180 }, { "epoch": 0.9180085481674936, "grad_norm": 0.601886510848999, "learning_rate": 3.2984847677871025e-07, "loss": 0.1735, "step": 41185 }, { "epoch": 0.9181199975481136, "grad_norm": 0.5641916990280151, "learning_rate": 3.289572178089251e-07, "loss": 0.3247, "step": 41190 }, { "epoch": 0.9182314469287337, "grad_norm": 0.5231988430023193, "learning_rate": 3.2806714443433416e-07, "loss": 0.2847, "step": 41195 }, { "epoch": 0.9183428963093537, "grad_norm": 0.83245450258255, "learning_rate": 3.27178256764058e-07, "loss": 0.297, "step": 41200 }, { "epoch": 0.9184543456899739, "grad_norm": 0.6542052030563354, "learning_rate": 3.2629055490706386e-07, "loss": 0.3121, "step": 41205 }, { "epoch": 0.9185657950705939, "grad_norm": 0.6979672908782959, "learning_rate": 3.2540403897217907e-07, "loss": 0.1994, "step": 41210 }, { "epoch": 0.918677244451214, "grad_norm": 0.49956926703453064, "learning_rate": 3.2451870906808236e-07, "loss": 0.2602, "step": 41215 }, { "epoch": 0.918788693831834, "grad_norm": 0.7163175940513611, "learning_rate": 3.236345653033068e-07, "loss": 0.3169, "step": 41220 }, { "epoch": 0.9189001432124541, "grad_norm": 0.6471841335296631, "learning_rate": 3.227516077862447e-07, "loss": 0.2563, "step": 41225 }, { "epoch": 0.9190115925930742, "grad_norm": 0.27869361639022827, "learning_rate": 3.2186983662513826e-07, "loss": 0.2896, "step": 41230 }, { "epoch": 0.9191230419736942, "grad_norm": 0.7508506774902344, "learning_rate": 3.209892519280866e-07, "loss": 0.3923, "step": 41235 }, { "epoch": 0.9192344913543143, "grad_norm": 0.47320273518562317, "learning_rate": 3.2010985380304337e-07, "loss": 0.237, "step": 41240 }, { "epoch": 0.9193459407349344, "grad_norm": 0.3736218214035034, "learning_rate": 3.1923164235781346e-07, "loss": 0.3483, "step": 41245 }, { "epoch": 0.9194573901155544, "grad_norm": 0.48803550004959106, "learning_rate": 3.183546177000607e-07, "loss": 0.2823, "step": 41250 }, { "epoch": 0.9195688394961745, "grad_norm": 0.2997181713581085, "learning_rate": 3.1747877993729916e-07, "loss": 0.2793, "step": 41255 }, { "epoch": 0.9196802888767945, "grad_norm": 0.6676647067070007, "learning_rate": 3.1660412917690284e-07, "loss": 0.1896, "step": 41260 }, { "epoch": 0.9197917382574147, "grad_norm": 0.770282506942749, "learning_rate": 3.157306655260961e-07, "loss": 0.3209, "step": 41265 }, { "epoch": 0.9199031876380347, "grad_norm": 0.8639677166938782, "learning_rate": 3.1485838909195875e-07, "loss": 0.2109, "step": 41270 }, { "epoch": 0.9200146370186548, "grad_norm": 0.4482399523258209, "learning_rate": 3.139872999814253e-07, "loss": 0.1985, "step": 41275 }, { "epoch": 0.9201260863992748, "grad_norm": 0.8507997393608093, "learning_rate": 3.131173983012847e-07, "loss": 0.2521, "step": 41280 }, { "epoch": 0.9202375357798949, "grad_norm": 0.6366497278213501, "learning_rate": 3.122486841581773e-07, "loss": 0.3673, "step": 41285 }, { "epoch": 0.920348985160515, "grad_norm": 0.6055535674095154, "learning_rate": 3.1138115765860564e-07, "loss": 0.2085, "step": 41290 }, { "epoch": 0.920460434541135, "grad_norm": 0.547455906867981, "learning_rate": 3.1051481890891797e-07, "loss": 0.2279, "step": 41295 }, { "epoch": 0.9205718839217552, "grad_norm": 0.48422321677207947, "learning_rate": 3.096496680153238e-07, "loss": 0.2381, "step": 41300 }, { "epoch": 0.9206833333023752, "grad_norm": 0.9877715110778809, "learning_rate": 3.087857050838816e-07, "loss": 0.3086, "step": 41305 }, { "epoch": 0.9207947826829952, "grad_norm": 0.7008081674575806, "learning_rate": 3.079229302205078e-07, "loss": 0.1457, "step": 41310 }, { "epoch": 0.9209062320636153, "grad_norm": 0.7891531586647034, "learning_rate": 3.0706134353097107e-07, "loss": 0.2196, "step": 41315 }, { "epoch": 0.9210176814442353, "grad_norm": 0.44549623131752014, "learning_rate": 3.062009451208936e-07, "loss": 0.1889, "step": 41320 }, { "epoch": 0.9211291308248555, "grad_norm": 0.8684026002883911, "learning_rate": 3.053417350957577e-07, "loss": 0.332, "step": 41325 }, { "epoch": 0.9212405802054755, "grad_norm": 0.7550075650215149, "learning_rate": 3.0448371356089247e-07, "loss": 0.307, "step": 41330 }, { "epoch": 0.9213520295860955, "grad_norm": 0.7287044525146484, "learning_rate": 3.036268806214882e-07, "loss": 0.2904, "step": 41335 }, { "epoch": 0.9214634789667157, "grad_norm": 0.5059028267860413, "learning_rate": 3.02771236382583e-07, "loss": 0.2386, "step": 41340 }, { "epoch": 0.9215749283473357, "grad_norm": 0.7069331407546997, "learning_rate": 3.0191678094907306e-07, "loss": 0.3352, "step": 41345 }, { "epoch": 0.9216863777279558, "grad_norm": 0.6661747694015503, "learning_rate": 3.010635144257068e-07, "loss": 0.3008, "step": 41350 }, { "epoch": 0.9217978271085758, "grad_norm": 0.9574068784713745, "learning_rate": 3.0021143691709055e-07, "loss": 0.3011, "step": 41355 }, { "epoch": 0.921909276489196, "grad_norm": 0.45941346883773804, "learning_rate": 2.993605485276807e-07, "loss": 0.3542, "step": 41360 }, { "epoch": 0.922020725869816, "grad_norm": 0.7826938033103943, "learning_rate": 2.9851084936179054e-07, "loss": 0.3547, "step": 41365 }, { "epoch": 0.922132175250436, "grad_norm": 0.5727695226669312, "learning_rate": 2.976623395235867e-07, "loss": 0.288, "step": 41370 }, { "epoch": 0.9222436246310561, "grad_norm": 0.622995138168335, "learning_rate": 2.968150191170882e-07, "loss": 0.3068, "step": 41375 }, { "epoch": 0.9223550740116762, "grad_norm": 0.5591624975204468, "learning_rate": 2.95968888246172e-07, "loss": 0.2044, "step": 41380 }, { "epoch": 0.9224665233922963, "grad_norm": 0.40898847579956055, "learning_rate": 2.9512394701456614e-07, "loss": 0.257, "step": 41385 }, { "epoch": 0.9225779727729163, "grad_norm": 0.690396249294281, "learning_rate": 2.942801955258556e-07, "loss": 0.3743, "step": 41390 }, { "epoch": 0.9226894221535363, "grad_norm": 0.7114289999008179, "learning_rate": 2.934376338834755e-07, "loss": 0.2297, "step": 41395 }, { "epoch": 0.9228008715341565, "grad_norm": 0.8272179365158081, "learning_rate": 2.9259626219071747e-07, "loss": 0.2813, "step": 41400 }, { "epoch": 0.9229123209147765, "grad_norm": 0.4953787922859192, "learning_rate": 2.917560805507291e-07, "loss": 0.216, "step": 41405 }, { "epoch": 0.9230237702953966, "grad_norm": 0.40638652443885803, "learning_rate": 2.9091708906650806e-07, "loss": 0.218, "step": 41410 }, { "epoch": 0.9231352196760166, "grad_norm": 0.6107771992683411, "learning_rate": 2.900792878409109e-07, "loss": 0.3346, "step": 41415 }, { "epoch": 0.9232466690566368, "grad_norm": 0.4116399884223938, "learning_rate": 2.892426769766432e-07, "loss": 0.2733, "step": 41420 }, { "epoch": 0.9233581184372568, "grad_norm": 0.7331038117408752, "learning_rate": 2.8840725657626746e-07, "loss": 0.3631, "step": 41425 }, { "epoch": 0.9234695678178768, "grad_norm": 0.44384974241256714, "learning_rate": 2.8757302674220054e-07, "loss": 0.259, "step": 41430 }, { "epoch": 0.923581017198497, "grad_norm": 0.8607022762298584, "learning_rate": 2.8673998757671185e-07, "loss": 0.2981, "step": 41435 }, { "epoch": 0.923692466579117, "grad_norm": 0.5234905481338501, "learning_rate": 2.859081391819263e-07, "loss": 0.1037, "step": 41440 }, { "epoch": 0.9238039159597371, "grad_norm": 0.7386176586151123, "learning_rate": 2.8507748165982006e-07, "loss": 0.3014, "step": 41445 }, { "epoch": 0.9239153653403571, "grad_norm": 0.5484693050384521, "learning_rate": 2.8424801511222844e-07, "loss": 0.2253, "step": 41450 }, { "epoch": 0.9240268147209771, "grad_norm": 0.39193445444107056, "learning_rate": 2.8341973964083557e-07, "loss": 0.2594, "step": 41455 }, { "epoch": 0.9241382641015973, "grad_norm": 0.7376248240470886, "learning_rate": 2.825926553471825e-07, "loss": 0.3243, "step": 41460 }, { "epoch": 0.9242497134822173, "grad_norm": 0.46132493019104004, "learning_rate": 2.817667623326625e-07, "loss": 0.2577, "step": 41465 }, { "epoch": 0.9243611628628374, "grad_norm": 0.5108756422996521, "learning_rate": 2.809420606985236e-07, "loss": 0.2368, "step": 41470 }, { "epoch": 0.9244726122434574, "grad_norm": 0.814017653465271, "learning_rate": 2.8011855054586925e-07, "loss": 0.3394, "step": 41475 }, { "epoch": 0.9245840616240775, "grad_norm": 0.2798021137714386, "learning_rate": 2.7929623197565427e-07, "loss": 0.13, "step": 41480 }, { "epoch": 0.9246955110046976, "grad_norm": 0.6604013442993164, "learning_rate": 2.7847510508869025e-07, "loss": 0.2438, "step": 41485 }, { "epoch": 0.9248069603853176, "grad_norm": 0.47539040446281433, "learning_rate": 2.776551699856389e-07, "loss": 0.1872, "step": 41490 }, { "epoch": 0.9249184097659378, "grad_norm": 1.0226033926010132, "learning_rate": 2.768364267670187e-07, "loss": 0.3309, "step": 41495 }, { "epoch": 0.9250298591465578, "grad_norm": 0.5431970357894897, "learning_rate": 2.760188755332005e-07, "loss": 0.2802, "step": 41500 }, { "epoch": 0.9251413085271779, "grad_norm": 0.5110717415809631, "learning_rate": 2.7520251638440965e-07, "loss": 0.3091, "step": 41505 }, { "epoch": 0.9252527579077979, "grad_norm": 1.4647572040557861, "learning_rate": 2.7438734942072833e-07, "loss": 0.3535, "step": 41510 }, { "epoch": 0.925364207288418, "grad_norm": 0.5415831804275513, "learning_rate": 2.7357337474208767e-07, "loss": 0.2229, "step": 41515 }, { "epoch": 0.9254756566690381, "grad_norm": 0.5373227596282959, "learning_rate": 2.7276059244827455e-07, "loss": 0.2529, "step": 41520 }, { "epoch": 0.9255871060496581, "grad_norm": 0.6123536825180054, "learning_rate": 2.7194900263893043e-07, "loss": 0.372, "step": 41525 }, { "epoch": 0.9256985554302782, "grad_norm": 0.8674310445785522, "learning_rate": 2.7113860541354896e-07, "loss": 0.1822, "step": 41530 }, { "epoch": 0.9258100048108983, "grad_norm": 0.716400146484375, "learning_rate": 2.7032940087147854e-07, "loss": 0.2701, "step": 41535 }, { "epoch": 0.9259214541915183, "grad_norm": 1.3148468732833862, "learning_rate": 2.6952138911192196e-07, "loss": 0.2786, "step": 41540 }, { "epoch": 0.9260329035721384, "grad_norm": 0.747526228427887, "learning_rate": 2.6871457023393667e-07, "loss": 0.3921, "step": 41545 }, { "epoch": 0.9261443529527584, "grad_norm": 0.37125661969184875, "learning_rate": 2.679089443364313e-07, "loss": 0.1607, "step": 41550 }, { "epoch": 0.9262558023333786, "grad_norm": 0.41526857018470764, "learning_rate": 2.6710451151816807e-07, "loss": 0.3007, "step": 41555 }, { "epoch": 0.9263672517139986, "grad_norm": 0.7944679260253906, "learning_rate": 2.663012718777658e-07, "loss": 0.2242, "step": 41560 }, { "epoch": 0.9264787010946187, "grad_norm": 0.7380418181419373, "learning_rate": 2.6549922551369455e-07, "loss": 0.3003, "step": 41565 }, { "epoch": 0.9265901504752387, "grad_norm": 0.5211284756660461, "learning_rate": 2.646983725242802e-07, "loss": 0.3025, "step": 41570 }, { "epoch": 0.9267015998558588, "grad_norm": 0.5751848220825195, "learning_rate": 2.6389871300769865e-07, "loss": 0.1344, "step": 41575 }, { "epoch": 0.9268130492364789, "grad_norm": 0.5620242357254028, "learning_rate": 2.631002470619848e-07, "loss": 0.1888, "step": 41580 }, { "epoch": 0.9269244986170989, "grad_norm": 0.8619793653488159, "learning_rate": 2.623029747850236e-07, "loss": 0.3848, "step": 41585 }, { "epoch": 0.927035947997719, "grad_norm": 0.6856527924537659, "learning_rate": 2.6150689627455486e-07, "loss": 0.3158, "step": 41590 }, { "epoch": 0.9271473973783391, "grad_norm": 0.48377129435539246, "learning_rate": 2.6071201162816916e-07, "loss": 0.2322, "step": 41595 }, { "epoch": 0.9272588467589591, "grad_norm": 0.6122012734413147, "learning_rate": 2.5991832094331646e-07, "loss": 0.2448, "step": 41600 }, { "epoch": 0.9273702961395792, "grad_norm": 0.8147674798965454, "learning_rate": 2.591258243172956e-07, "loss": 0.2793, "step": 41605 }, { "epoch": 0.9274817455201992, "grad_norm": 0.7093601822853088, "learning_rate": 2.5833452184725995e-07, "loss": 0.3627, "step": 41610 }, { "epoch": 0.9275931949008194, "grad_norm": 0.39448460936546326, "learning_rate": 2.5754441363021854e-07, "loss": 0.2745, "step": 41615 }, { "epoch": 0.9277046442814394, "grad_norm": 0.6350008249282837, "learning_rate": 2.567554997630317e-07, "loss": 0.2382, "step": 41620 }, { "epoch": 0.9278160936620595, "grad_norm": 0.7153553366661072, "learning_rate": 2.559677803424143e-07, "loss": 0.2696, "step": 41625 }, { "epoch": 0.9279275430426795, "grad_norm": 0.5094320774078369, "learning_rate": 2.5518125546493356e-07, "loss": 0.256, "step": 41630 }, { "epoch": 0.9280389924232996, "grad_norm": 0.6403762698173523, "learning_rate": 2.543959252270134e-07, "loss": 0.3522, "step": 41635 }, { "epoch": 0.9281504418039197, "grad_norm": 0.4988352656364441, "learning_rate": 2.5361178972492906e-07, "loss": 0.3591, "step": 41640 }, { "epoch": 0.9282618911845397, "grad_norm": 0.8148736953735352, "learning_rate": 2.52828849054807e-07, "loss": 0.2603, "step": 41645 }, { "epoch": 0.9283733405651599, "grad_norm": 0.6698065400123596, "learning_rate": 2.520471033126326e-07, "loss": 0.3158, "step": 41650 }, { "epoch": 0.9284847899457799, "grad_norm": 0.45521312952041626, "learning_rate": 2.512665525942404e-07, "loss": 0.2776, "step": 41655 }, { "epoch": 0.9285962393263999, "grad_norm": 0.6049147248268127, "learning_rate": 2.504871969953204e-07, "loss": 0.2781, "step": 41660 }, { "epoch": 0.92870768870702, "grad_norm": 0.6182847619056702, "learning_rate": 2.497090366114152e-07, "loss": 0.2291, "step": 41665 }, { "epoch": 0.92881913808764, "grad_norm": 0.42223942279815674, "learning_rate": 2.4893207153792176e-07, "loss": 0.3403, "step": 41670 }, { "epoch": 0.9289305874682602, "grad_norm": 0.5356005430221558, "learning_rate": 2.4815630187008944e-07, "loss": 0.252, "step": 41675 }, { "epoch": 0.9290420368488802, "grad_norm": 0.6396979689598083, "learning_rate": 2.4738172770302104e-07, "loss": 0.278, "step": 41680 }, { "epoch": 0.9291534862295002, "grad_norm": 0.5384474396705627, "learning_rate": 2.466083491316751e-07, "loss": 0.3089, "step": 41685 }, { "epoch": 0.9292649356101204, "grad_norm": 0.5913751125335693, "learning_rate": 2.45836166250859e-07, "loss": 0.3418, "step": 41690 }, { "epoch": 0.9293763849907404, "grad_norm": 0.7718113660812378, "learning_rate": 2.4506517915524054e-07, "loss": 0.2465, "step": 41695 }, { "epoch": 0.9294878343713605, "grad_norm": 0.7065486907958984, "learning_rate": 2.4429538793933506e-07, "loss": 0.3416, "step": 41700 }, { "epoch": 0.9295992837519805, "grad_norm": 0.785386860370636, "learning_rate": 2.4352679269751154e-07, "loss": 0.4405, "step": 41705 }, { "epoch": 0.9297107331326007, "grad_norm": 0.5587266087532043, "learning_rate": 2.427593935239947e-07, "loss": 0.2807, "step": 41710 }, { "epoch": 0.9298221825132207, "grad_norm": 1.2147592306137085, "learning_rate": 2.419931905128614e-07, "loss": 0.211, "step": 41715 }, { "epoch": 0.9299336318938407, "grad_norm": 0.5206283330917358, "learning_rate": 2.4122818375804215e-07, "loss": 0.2633, "step": 41720 }, { "epoch": 0.9300450812744608, "grad_norm": 0.5563860535621643, "learning_rate": 2.4046437335332296e-07, "loss": 0.2558, "step": 41725 }, { "epoch": 0.9301565306550809, "grad_norm": 0.5390650629997253, "learning_rate": 2.39701759392339e-07, "loss": 0.287, "step": 41730 }, { "epoch": 0.930267980035701, "grad_norm": 0.47119244933128357, "learning_rate": 2.389403419685821e-07, "loss": 0.3024, "step": 41735 }, { "epoch": 0.930379429416321, "grad_norm": 0.6295462846755981, "learning_rate": 2.3818012117539535e-07, "loss": 0.2199, "step": 41740 }, { "epoch": 0.930490878796941, "grad_norm": 0.7683353424072266, "learning_rate": 2.374210971059754e-07, "loss": 0.2737, "step": 41745 }, { "epoch": 0.9306023281775612, "grad_norm": 0.7312949299812317, "learning_rate": 2.3666326985337328e-07, "loss": 0.2167, "step": 41750 }, { "epoch": 0.9307137775581812, "grad_norm": 0.48791399598121643, "learning_rate": 2.3590663951049141e-07, "loss": 0.2981, "step": 41755 }, { "epoch": 0.9308252269388013, "grad_norm": 0.5298525094985962, "learning_rate": 2.3515120617009112e-07, "loss": 0.2264, "step": 41760 }, { "epoch": 0.9309366763194213, "grad_norm": 0.5891234874725342, "learning_rate": 2.3439696992477834e-07, "loss": 0.39, "step": 41765 }, { "epoch": 0.9310481257000415, "grad_norm": 0.5690597891807556, "learning_rate": 2.336439308670191e-07, "loss": 0.3731, "step": 41770 }, { "epoch": 0.9311595750806615, "grad_norm": 0.5743623971939087, "learning_rate": 2.3289208908912952e-07, "loss": 0.2081, "step": 41775 }, { "epoch": 0.9312710244612815, "grad_norm": 0.7222797274589539, "learning_rate": 2.3214144468327703e-07, "loss": 0.2236, "step": 41780 }, { "epoch": 0.9313824738419016, "grad_norm": 0.739750862121582, "learning_rate": 2.3139199774148912e-07, "loss": 0.3976, "step": 41785 }, { "epoch": 0.9314939232225217, "grad_norm": 0.612297534942627, "learning_rate": 2.3064374835563896e-07, "loss": 0.3034, "step": 41790 }, { "epoch": 0.9316053726031418, "grad_norm": 0.5315260887145996, "learning_rate": 2.2989669661745872e-07, "loss": 0.2088, "step": 41795 }, { "epoch": 0.9317168219837618, "grad_norm": 0.6662878394126892, "learning_rate": 2.291508426185296e-07, "loss": 0.3021, "step": 41800 }, { "epoch": 0.9318282713643818, "grad_norm": 0.973757803440094, "learning_rate": 2.2840618645028735e-07, "loss": 0.2573, "step": 41805 }, { "epoch": 0.931939720745002, "grad_norm": 0.5593680143356323, "learning_rate": 2.2766272820402113e-07, "loss": 0.1697, "step": 41810 }, { "epoch": 0.932051170125622, "grad_norm": 0.7627435922622681, "learning_rate": 2.2692046797087475e-07, "loss": 0.352, "step": 41815 }, { "epoch": 0.9321626195062421, "grad_norm": 0.9235901832580566, "learning_rate": 2.2617940584184205e-07, "loss": 0.2901, "step": 41820 }, { "epoch": 0.9322740688868622, "grad_norm": 0.9725611805915833, "learning_rate": 2.2543954190777039e-07, "loss": 0.3149, "step": 41825 }, { "epoch": 0.9323855182674823, "grad_norm": 0.6089190244674683, "learning_rate": 2.2470087625936498e-07, "loss": 0.3041, "step": 41830 }, { "epoch": 0.9324969676481023, "grad_norm": 0.4506814479827881, "learning_rate": 2.2396340898717783e-07, "loss": 0.3103, "step": 41835 }, { "epoch": 0.9326084170287223, "grad_norm": 0.5334108471870422, "learning_rate": 2.2322714018161662e-07, "loss": 0.1976, "step": 41840 }, { "epoch": 0.9327198664093425, "grad_norm": 0.7881041169166565, "learning_rate": 2.2249206993294249e-07, "loss": 0.3227, "step": 41845 }, { "epoch": 0.9328313157899625, "grad_norm": 0.711259126663208, "learning_rate": 2.2175819833127111e-07, "loss": 0.2914, "step": 41850 }, { "epoch": 0.9329427651705826, "grad_norm": 0.6935058236122131, "learning_rate": 2.210255254665683e-07, "loss": 0.3051, "step": 41855 }, { "epoch": 0.9330542145512026, "grad_norm": 0.583777129650116, "learning_rate": 2.2029405142865225e-07, "loss": 0.3541, "step": 41860 }, { "epoch": 0.9331656639318227, "grad_norm": 0.5853663086891174, "learning_rate": 2.1956377630719895e-07, "loss": 0.3056, "step": 41865 }, { "epoch": 0.9332771133124428, "grad_norm": 0.7453263998031616, "learning_rate": 2.1883470019173346e-07, "loss": 0.2945, "step": 41870 }, { "epoch": 0.9333885626930628, "grad_norm": 0.8286758661270142, "learning_rate": 2.1810682317163323e-07, "loss": 0.2106, "step": 41875 }, { "epoch": 0.9335000120736829, "grad_norm": 0.5287289023399353, "learning_rate": 2.1738014533613349e-07, "loss": 0.1769, "step": 41880 }, { "epoch": 0.933611461454303, "grad_norm": 0.5840474963188171, "learning_rate": 2.1665466677431745e-07, "loss": 0.3766, "step": 41885 }, { "epoch": 0.933722910834923, "grad_norm": 0.9760193228721619, "learning_rate": 2.1593038757512397e-07, "loss": 0.2558, "step": 41890 }, { "epoch": 0.9338343602155431, "grad_norm": 0.5497422814369202, "learning_rate": 2.15207307827342e-07, "loss": 0.313, "step": 41895 }, { "epoch": 0.9339458095961631, "grad_norm": 0.6463967561721802, "learning_rate": 2.1448542761961844e-07, "loss": 0.2442, "step": 41900 }, { "epoch": 0.9340572589767833, "grad_norm": 0.6637454628944397, "learning_rate": 2.1376474704044693e-07, "loss": 0.2445, "step": 41905 }, { "epoch": 0.9341687083574033, "grad_norm": 0.2920180857181549, "learning_rate": 2.1304526617818121e-07, "loss": 0.2486, "step": 41910 }, { "epoch": 0.9342801577380234, "grad_norm": 0.7941288948059082, "learning_rate": 2.123269851210219e-07, "loss": 0.3052, "step": 41915 }, { "epoch": 0.9343916071186434, "grad_norm": 0.6346423625946045, "learning_rate": 2.116099039570252e-07, "loss": 0.241, "step": 41920 }, { "epoch": 0.9345030564992635, "grad_norm": 0.6398741602897644, "learning_rate": 2.1089402277409855e-07, "loss": 0.1914, "step": 41925 }, { "epoch": 0.9346145058798836, "grad_norm": 0.9809888005256653, "learning_rate": 2.101793416600051e-07, "loss": 0.1846, "step": 41930 }, { "epoch": 0.9347259552605036, "grad_norm": 0.5712922811508179, "learning_rate": 2.0946586070235808e-07, "loss": 0.3663, "step": 41935 }, { "epoch": 0.9348374046411237, "grad_norm": 0.7750124335289001, "learning_rate": 2.0875357998862422e-07, "loss": 0.2467, "step": 41940 }, { "epoch": 0.9349488540217438, "grad_norm": 0.4220404624938965, "learning_rate": 2.08042499606127e-07, "loss": 0.1812, "step": 41945 }, { "epoch": 0.9350603034023638, "grad_norm": 0.4303109347820282, "learning_rate": 2.0733261964203556e-07, "loss": 0.2075, "step": 41950 }, { "epoch": 0.9351717527829839, "grad_norm": 0.8041217923164368, "learning_rate": 2.0662394018337806e-07, "loss": 0.3333, "step": 41955 }, { "epoch": 0.9352832021636039, "grad_norm": 0.5130581855773926, "learning_rate": 2.0591646131703168e-07, "loss": 0.2355, "step": 41960 }, { "epoch": 0.9353946515442241, "grad_norm": 0.5752612352371216, "learning_rate": 2.0521018312972818e-07, "loss": 0.3229, "step": 41965 }, { "epoch": 0.9355061009248441, "grad_norm": 0.5263034701347351, "learning_rate": 2.045051057080516e-07, "loss": 0.3224, "step": 41970 }, { "epoch": 0.9356175503054642, "grad_norm": 0.40126967430114746, "learning_rate": 2.0380122913843946e-07, "loss": 0.2269, "step": 41975 }, { "epoch": 0.9357289996860843, "grad_norm": 0.5782945156097412, "learning_rate": 2.0309855350718277e-07, "loss": 0.3501, "step": 41980 }, { "epoch": 0.9358404490667043, "grad_norm": 0.6336562633514404, "learning_rate": 2.023970789004226e-07, "loss": 0.2832, "step": 41985 }, { "epoch": 0.9359518984473244, "grad_norm": 0.6401287913322449, "learning_rate": 2.016968054041546e-07, "loss": 0.3738, "step": 41990 }, { "epoch": 0.9360633478279444, "grad_norm": 0.7216722369194031, "learning_rate": 2.0099773310422676e-07, "loss": 0.3418, "step": 41995 }, { "epoch": 0.9361747972085646, "grad_norm": 0.7964975833892822, "learning_rate": 2.0029986208633943e-07, "loss": 0.3576, "step": 42000 }, { "epoch": 0.9362862465891846, "grad_norm": 0.40365028381347656, "learning_rate": 1.9960319243604753e-07, "loss": 0.3055, "step": 42005 }, { "epoch": 0.9363976959698046, "grad_norm": 1.2185330390930176, "learning_rate": 1.9890772423875715e-07, "loss": 0.3307, "step": 42010 }, { "epoch": 0.9365091453504247, "grad_norm": 0.4133281707763672, "learning_rate": 1.9821345757972787e-07, "loss": 0.3099, "step": 42015 }, { "epoch": 0.9366205947310448, "grad_norm": 0.5761286020278931, "learning_rate": 1.975203925440694e-07, "loss": 0.2464, "step": 42020 }, { "epoch": 0.9367320441116649, "grad_norm": 0.4297039806842804, "learning_rate": 1.96828529216746e-07, "loss": 0.2333, "step": 42025 }, { "epoch": 0.9368434934922849, "grad_norm": 0.4553482234477997, "learning_rate": 1.9613786768257758e-07, "loss": 0.3504, "step": 42030 }, { "epoch": 0.936954942872905, "grad_norm": 0.6616497039794922, "learning_rate": 1.95448408026232e-07, "loss": 0.2023, "step": 42035 }, { "epoch": 0.9370663922535251, "grad_norm": 0.4561408758163452, "learning_rate": 1.9476015033223273e-07, "loss": 0.2401, "step": 42040 }, { "epoch": 0.9371778416341451, "grad_norm": 0.9392362236976624, "learning_rate": 1.940730946849534e-07, "loss": 0.2696, "step": 42045 }, { "epoch": 0.9372892910147652, "grad_norm": 0.475864976644516, "learning_rate": 1.9338724116862328e-07, "loss": 0.2356, "step": 42050 }, { "epoch": 0.9374007403953852, "grad_norm": 0.5198137760162354, "learning_rate": 1.9270258986732181e-07, "loss": 0.2693, "step": 42055 }, { "epoch": 0.9375121897760054, "grad_norm": 0.5708441734313965, "learning_rate": 1.9201914086498075e-07, "loss": 0.2114, "step": 42060 }, { "epoch": 0.9376236391566254, "grad_norm": 0.585024893283844, "learning_rate": 1.913368942453886e-07, "loss": 0.3733, "step": 42065 }, { "epoch": 0.9377350885372454, "grad_norm": 0.4550107419490814, "learning_rate": 1.9065585009218069e-07, "loss": 0.4031, "step": 42070 }, { "epoch": 0.9378465379178655, "grad_norm": 0.6647144556045532, "learning_rate": 1.8997600848885023e-07, "loss": 0.1745, "step": 42075 }, { "epoch": 0.9379579872984856, "grad_norm": 0.4726318120956421, "learning_rate": 1.8929736951873946e-07, "loss": 0.2502, "step": 42080 }, { "epoch": 0.9380694366791057, "grad_norm": 0.517724871635437, "learning_rate": 1.886199332650429e-07, "loss": 0.2343, "step": 42085 }, { "epoch": 0.9381808860597257, "grad_norm": 0.577621579170227, "learning_rate": 1.8794369981081085e-07, "loss": 0.268, "step": 42090 }, { "epoch": 0.9382923354403457, "grad_norm": 0.811469316482544, "learning_rate": 1.872686692389436e-07, "loss": 0.2637, "step": 42095 }, { "epoch": 0.9384037848209659, "grad_norm": 0.29971227049827576, "learning_rate": 1.865948416321961e-07, "loss": 0.1793, "step": 42100 }, { "epoch": 0.9385152342015859, "grad_norm": 0.6150528192520142, "learning_rate": 1.8592221707317116e-07, "loss": 0.3394, "step": 42105 }, { "epoch": 0.938626683582206, "grad_norm": 0.5606695413589478, "learning_rate": 1.8525079564433057e-07, "loss": 0.2622, "step": 42110 }, { "epoch": 0.938738132962826, "grad_norm": 0.8158010840415955, "learning_rate": 1.8458057742798407e-07, "loss": 0.3565, "step": 42115 }, { "epoch": 0.9388495823434462, "grad_norm": 0.6361294984817505, "learning_rate": 1.8391156250629482e-07, "loss": 0.2246, "step": 42120 }, { "epoch": 0.9389610317240662, "grad_norm": 1.0060545206069946, "learning_rate": 1.8324375096128056e-07, "loss": 0.348, "step": 42125 }, { "epoch": 0.9390724811046862, "grad_norm": 0.9523029327392578, "learning_rate": 1.82577142874808e-07, "loss": 0.301, "step": 42130 }, { "epoch": 0.9391839304853064, "grad_norm": 0.4871150553226471, "learning_rate": 1.8191173832859955e-07, "loss": 0.267, "step": 42135 }, { "epoch": 0.9392953798659264, "grad_norm": 0.499967098236084, "learning_rate": 1.8124753740422662e-07, "loss": 0.2269, "step": 42140 }, { "epoch": 0.9394068292465465, "grad_norm": 0.7111514210700989, "learning_rate": 1.8058454018311743e-07, "loss": 0.257, "step": 42145 }, { "epoch": 0.9395182786271665, "grad_norm": 0.8823172450065613, "learning_rate": 1.7992274674654918e-07, "loss": 0.306, "step": 42150 }, { "epoch": 0.9396297280077865, "grad_norm": 0.3821994662284851, "learning_rate": 1.7926215717565255e-07, "loss": 0.256, "step": 42155 }, { "epoch": 0.9397411773884067, "grad_norm": 0.683880090713501, "learning_rate": 1.7860277155141158e-07, "loss": 0.3176, "step": 42160 }, { "epoch": 0.9398526267690267, "grad_norm": 0.7522774338722229, "learning_rate": 1.7794458995466057e-07, "loss": 0.3349, "step": 42165 }, { "epoch": 0.9399640761496468, "grad_norm": 0.8497912883758545, "learning_rate": 1.772876124660894e-07, "loss": 0.3679, "step": 42170 }, { "epoch": 0.9400755255302669, "grad_norm": 1.0213016271591187, "learning_rate": 1.7663183916623584e-07, "loss": 0.2661, "step": 42175 }, { "epoch": 0.940186974910887, "grad_norm": 1.0421154499053955, "learning_rate": 1.7597727013549448e-07, "loss": 0.4221, "step": 42180 }, { "epoch": 0.940298424291507, "grad_norm": 0.6340685486793518, "learning_rate": 1.7532390545410894e-07, "loss": 0.261, "step": 42185 }, { "epoch": 0.940409873672127, "grad_norm": 0.6523642539978027, "learning_rate": 1.7467174520217956e-07, "loss": 0.2458, "step": 42190 }, { "epoch": 0.9405213230527472, "grad_norm": 0.7212027311325073, "learning_rate": 1.7402078945965352e-07, "loss": 0.2504, "step": 42195 }, { "epoch": 0.9406327724333672, "grad_norm": 0.3738698959350586, "learning_rate": 1.7337103830633474e-07, "loss": 0.2207, "step": 42200 }, { "epoch": 0.9407442218139873, "grad_norm": 1.0090970993041992, "learning_rate": 1.7272249182187616e-07, "loss": 0.3324, "step": 42205 }, { "epoch": 0.9408556711946073, "grad_norm": 0.5669004917144775, "learning_rate": 1.7207515008578424e-07, "loss": 0.3247, "step": 42210 }, { "epoch": 0.9409671205752274, "grad_norm": 0.6298465728759766, "learning_rate": 1.7142901317741877e-07, "loss": 0.3369, "step": 42215 }, { "epoch": 0.9410785699558475, "grad_norm": 0.8187587261199951, "learning_rate": 1.7078408117599198e-07, "loss": 0.3211, "step": 42220 }, { "epoch": 0.9411900193364675, "grad_norm": 0.49201130867004395, "learning_rate": 1.701403541605673e-07, "loss": 0.3096, "step": 42225 }, { "epoch": 0.9413014687170876, "grad_norm": 0.7773210406303406, "learning_rate": 1.6949783221006044e-07, "loss": 0.2497, "step": 42230 }, { "epoch": 0.9414129180977077, "grad_norm": 0.733504056930542, "learning_rate": 1.6885651540323954e-07, "loss": 0.1281, "step": 42235 }, { "epoch": 0.9415243674783277, "grad_norm": 0.5793965458869934, "learning_rate": 1.6821640381872395e-07, "loss": 0.2486, "step": 42240 }, { "epoch": 0.9416358168589478, "grad_norm": 0.7282425761222839, "learning_rate": 1.6757749753498865e-07, "loss": 0.2126, "step": 42245 }, { "epoch": 0.9417472662395678, "grad_norm": 1.0350067615509033, "learning_rate": 1.6693979663035653e-07, "loss": 0.3799, "step": 42250 }, { "epoch": 0.941858715620188, "grad_norm": 1.0458147525787354, "learning_rate": 1.663033011830073e-07, "loss": 0.2125, "step": 42255 }, { "epoch": 0.941970165000808, "grad_norm": 0.5265653133392334, "learning_rate": 1.6566801127096855e-07, "loss": 0.2299, "step": 42260 }, { "epoch": 0.9420816143814281, "grad_norm": 0.7979380488395691, "learning_rate": 1.6503392697212128e-07, "loss": 0.3032, "step": 42265 }, { "epoch": 0.9421930637620481, "grad_norm": 0.6244803071022034, "learning_rate": 1.6440104836420112e-07, "loss": 0.3416, "step": 42270 }, { "epoch": 0.9423045131426682, "grad_norm": 0.9354427456855774, "learning_rate": 1.6376937552479154e-07, "loss": 0.2255, "step": 42275 }, { "epoch": 0.9424159625232883, "grad_norm": 0.37904810905456543, "learning_rate": 1.6313890853133397e-07, "loss": 0.2258, "step": 42280 }, { "epoch": 0.9425274119039083, "grad_norm": 0.732758104801178, "learning_rate": 1.6250964746111654e-07, "loss": 0.1669, "step": 42285 }, { "epoch": 0.9426388612845285, "grad_norm": 0.9100874066352844, "learning_rate": 1.618815923912842e-07, "loss": 0.4047, "step": 42290 }, { "epoch": 0.9427503106651485, "grad_norm": 0.612760603427887, "learning_rate": 1.6125474339882874e-07, "loss": 0.3023, "step": 42295 }, { "epoch": 0.9428617600457685, "grad_norm": 0.5571433901786804, "learning_rate": 1.6062910056059867e-07, "loss": 0.2317, "step": 42300 }, { "epoch": 0.9429732094263886, "grad_norm": 0.6878780722618103, "learning_rate": 1.6000466395329151e-07, "loss": 0.3452, "step": 42305 }, { "epoch": 0.9430846588070086, "grad_norm": 0.7174660563468933, "learning_rate": 1.5938143365346048e-07, "loss": 0.2271, "step": 42310 }, { "epoch": 0.9431961081876288, "grad_norm": 0.7692664265632629, "learning_rate": 1.587594097375078e-07, "loss": 0.2722, "step": 42315 }, { "epoch": 0.9433075575682488, "grad_norm": 0.5780901312828064, "learning_rate": 1.5813859228168805e-07, "loss": 0.302, "step": 42320 }, { "epoch": 0.9434190069488689, "grad_norm": 0.41825351119041443, "learning_rate": 1.575189813621103e-07, "loss": 0.2909, "step": 42325 }, { "epoch": 0.943530456329489, "grad_norm": 0.40317502617836, "learning_rate": 1.569005770547327e-07, "loss": 0.1812, "step": 42330 }, { "epoch": 0.943641905710109, "grad_norm": 0.5926758646965027, "learning_rate": 1.5628337943536686e-07, "loss": 0.3533, "step": 42335 }, { "epoch": 0.9437533550907291, "grad_norm": 0.9634340405464172, "learning_rate": 1.556673885796778e-07, "loss": 0.2677, "step": 42340 }, { "epoch": 0.9438648044713491, "grad_norm": 0.805266797542572, "learning_rate": 1.550526045631795e-07, "loss": 0.2779, "step": 42345 }, { "epoch": 0.9439762538519693, "grad_norm": 0.7578127980232239, "learning_rate": 1.5443902746124172e-07, "loss": 0.2362, "step": 42350 }, { "epoch": 0.9440877032325893, "grad_norm": 0.5255642533302307, "learning_rate": 1.5382665734908098e-07, "loss": 0.2249, "step": 42355 }, { "epoch": 0.9441991526132093, "grad_norm": 0.9447386860847473, "learning_rate": 1.5321549430177275e-07, "loss": 0.4072, "step": 42360 }, { "epoch": 0.9443106019938294, "grad_norm": 0.5444424152374268, "learning_rate": 1.5260553839424042e-07, "loss": 0.2678, "step": 42365 }, { "epoch": 0.9444220513744495, "grad_norm": 0.8604699969291687, "learning_rate": 1.5199678970125642e-07, "loss": 0.3507, "step": 42370 }, { "epoch": 0.9445335007550696, "grad_norm": 0.6764824986457825, "learning_rate": 1.5138924829745217e-07, "loss": 0.3744, "step": 42375 }, { "epoch": 0.9446449501356896, "grad_norm": 0.3252200484275818, "learning_rate": 1.5078291425730694e-07, "loss": 0.2754, "step": 42380 }, { "epoch": 0.9447563995163097, "grad_norm": 0.2970804274082184, "learning_rate": 1.5017778765515246e-07, "loss": 0.1979, "step": 42385 }, { "epoch": 0.9448678488969298, "grad_norm": 0.7865724563598633, "learning_rate": 1.495738685651704e-07, "loss": 0.2971, "step": 42390 }, { "epoch": 0.9449792982775498, "grad_norm": 0.7084057927131653, "learning_rate": 1.4897115706140052e-07, "loss": 0.2849, "step": 42395 }, { "epoch": 0.9450907476581699, "grad_norm": 0.7051497101783752, "learning_rate": 1.48369653217727e-07, "loss": 0.2426, "step": 42400 }, { "epoch": 0.9452021970387899, "grad_norm": 0.4434252381324768, "learning_rate": 1.4776935710789196e-07, "loss": 0.2957, "step": 42405 }, { "epoch": 0.9453136464194101, "grad_norm": 0.6347283720970154, "learning_rate": 1.4717026880548657e-07, "loss": 0.3472, "step": 42410 }, { "epoch": 0.9454250958000301, "grad_norm": 0.5560967922210693, "learning_rate": 1.4657238838395315e-07, "loss": 0.3799, "step": 42415 }, { "epoch": 0.9455365451806501, "grad_norm": 0.8887553215026855, "learning_rate": 1.4597571591658865e-07, "loss": 0.3139, "step": 42420 }, { "epoch": 0.9456479945612702, "grad_norm": 0.4831375181674957, "learning_rate": 1.45380251476539e-07, "loss": 0.2862, "step": 42425 }, { "epoch": 0.9457594439418903, "grad_norm": 0.6553042531013489, "learning_rate": 1.4478599513680468e-07, "loss": 0.2052, "step": 42430 }, { "epoch": 0.9458708933225104, "grad_norm": 0.7742103934288025, "learning_rate": 1.4419294697023745e-07, "loss": 0.3235, "step": 42435 }, { "epoch": 0.9459823427031304, "grad_norm": 0.7551333904266357, "learning_rate": 1.436011070495402e-07, "loss": 0.2466, "step": 42440 }, { "epoch": 0.9460937920837504, "grad_norm": 0.7697866559028625, "learning_rate": 1.430104754472672e-07, "loss": 0.334, "step": 42445 }, { "epoch": 0.9462052414643706, "grad_norm": 0.4702761769294739, "learning_rate": 1.4242105223582602e-07, "loss": 0.223, "step": 42450 }, { "epoch": 0.9463166908449906, "grad_norm": 0.5854142904281616, "learning_rate": 1.4183283748747446e-07, "loss": 0.2965, "step": 42455 }, { "epoch": 0.9464281402256107, "grad_norm": 0.5609765648841858, "learning_rate": 1.4124583127432257e-07, "loss": 0.3774, "step": 42460 }, { "epoch": 0.9465395896062307, "grad_norm": 0.642464280128479, "learning_rate": 1.406600336683339e-07, "loss": 0.3742, "step": 42465 }, { "epoch": 0.9466510389868509, "grad_norm": 0.7575817108154297, "learning_rate": 1.4007544474132438e-07, "loss": 0.4532, "step": 42470 }, { "epoch": 0.9467624883674709, "grad_norm": 0.32896581292152405, "learning_rate": 1.3949206456495778e-07, "loss": 0.2458, "step": 42475 }, { "epoch": 0.9468739377480909, "grad_norm": 0.5499560236930847, "learning_rate": 1.3890989321075244e-07, "loss": 0.3524, "step": 42480 }, { "epoch": 0.946985387128711, "grad_norm": 0.643747866153717, "learning_rate": 1.3832893075007902e-07, "loss": 0.3066, "step": 42485 }, { "epoch": 0.9470968365093311, "grad_norm": 0.8362199068069458, "learning_rate": 1.3774917725415727e-07, "loss": 0.3007, "step": 42490 }, { "epoch": 0.9472082858899512, "grad_norm": 0.390722393989563, "learning_rate": 1.371706327940614e-07, "loss": 0.3432, "step": 42495 }, { "epoch": 0.9473197352705712, "grad_norm": 0.3729709982872009, "learning_rate": 1.365932974407169e-07, "loss": 0.2158, "step": 42500 }, { "epoch": 0.9474311846511912, "grad_norm": 0.661213219165802, "learning_rate": 1.3601717126490056e-07, "loss": 0.3354, "step": 42505 }, { "epoch": 0.9475426340318114, "grad_norm": 1.1504225730895996, "learning_rate": 1.3544225433724024e-07, "loss": 0.1984, "step": 42510 }, { "epoch": 0.9476540834124314, "grad_norm": 0.4445840120315552, "learning_rate": 1.3486854672821848e-07, "loss": 0.2474, "step": 42515 }, { "epoch": 0.9477655327930515, "grad_norm": 0.7373872399330139, "learning_rate": 1.3429604850816346e-07, "loss": 0.4028, "step": 42520 }, { "epoch": 0.9478769821736716, "grad_norm": 0.5534309148788452, "learning_rate": 1.337247597472624e-07, "loss": 0.2287, "step": 42525 }, { "epoch": 0.9479884315542917, "grad_norm": 0.997943103313446, "learning_rate": 1.3315468051554926e-07, "loss": 0.3051, "step": 42530 }, { "epoch": 0.9480998809349117, "grad_norm": 0.5267981290817261, "learning_rate": 1.3258581088291257e-07, "loss": 0.3215, "step": 42535 }, { "epoch": 0.9482113303155317, "grad_norm": 0.734476625919342, "learning_rate": 1.32018150919091e-07, "loss": 0.2919, "step": 42540 }, { "epoch": 0.9483227796961519, "grad_norm": 0.4765477478504181, "learning_rate": 1.3145170069367552e-07, "loss": 0.2901, "step": 42545 }, { "epoch": 0.9484342290767719, "grad_norm": 0.6583042740821838, "learning_rate": 1.308864602761073e-07, "loss": 0.3807, "step": 42550 }, { "epoch": 0.948545678457392, "grad_norm": 0.9653348326683044, "learning_rate": 1.3032242973567976e-07, "loss": 0.2463, "step": 42555 }, { "epoch": 0.948657127838012, "grad_norm": 0.6304477453231812, "learning_rate": 1.2975960914154207e-07, "loss": 0.317, "step": 42560 }, { "epoch": 0.9487685772186321, "grad_norm": 0.8308120965957642, "learning_rate": 1.291979985626879e-07, "loss": 0.2333, "step": 42565 }, { "epoch": 0.9488800265992522, "grad_norm": 0.8143225908279419, "learning_rate": 1.2863759806796993e-07, "loss": 0.1493, "step": 42570 }, { "epoch": 0.9489914759798722, "grad_norm": 0.774811327457428, "learning_rate": 1.280784077260866e-07, "loss": 0.2704, "step": 42575 }, { "epoch": 0.9491029253604923, "grad_norm": 0.20292221009731293, "learning_rate": 1.275204276055919e-07, "loss": 0.1911, "step": 42580 }, { "epoch": 0.9492143747411124, "grad_norm": 0.8017863035202026, "learning_rate": 1.2696365777488673e-07, "loss": 0.2807, "step": 42585 }, { "epoch": 0.9493258241217325, "grad_norm": 0.6566117405891418, "learning_rate": 1.264080983022309e-07, "loss": 0.2286, "step": 42590 }, { "epoch": 0.9494372735023525, "grad_norm": 0.5979494452476501, "learning_rate": 1.2585374925572991e-07, "loss": 0.1646, "step": 42595 }, { "epoch": 0.9495487228829725, "grad_norm": 0.6958621740341187, "learning_rate": 1.2530061070334054e-07, "loss": 0.2779, "step": 42600 }, { "epoch": 0.9496601722635927, "grad_norm": 0.6765291094779968, "learning_rate": 1.2474868271287745e-07, "loss": 0.3752, "step": 42605 }, { "epoch": 0.9497716216442127, "grad_norm": 0.5178284645080566, "learning_rate": 1.241979653519998e-07, "loss": 0.3015, "step": 42610 }, { "epoch": 0.9498830710248328, "grad_norm": 0.7208675742149353, "learning_rate": 1.236484586882214e-07, "loss": 0.3072, "step": 42615 }, { "epoch": 0.9499945204054528, "grad_norm": 0.6745007634162903, "learning_rate": 1.2310016278890946e-07, "loss": 0.2314, "step": 42620 }, { "epoch": 0.9501059697860729, "grad_norm": 0.9178875684738159, "learning_rate": 1.2255307772127911e-07, "loss": 0.2668, "step": 42625 }, { "epoch": 0.950217419166693, "grad_norm": 0.5638184547424316, "learning_rate": 1.2200720355239893e-07, "loss": 0.2664, "step": 42630 }, { "epoch": 0.950328868547313, "grad_norm": 0.5028668642044067, "learning_rate": 1.214625403491887e-07, "loss": 0.4063, "step": 42635 }, { "epoch": 0.9504403179279332, "grad_norm": 0.840446412563324, "learning_rate": 1.209190881784217e-07, "loss": 0.2869, "step": 42640 }, { "epoch": 0.9505517673085532, "grad_norm": 0.7156557440757751, "learning_rate": 1.2037684710671905e-07, "loss": 0.2172, "step": 42645 }, { "epoch": 0.9506632166891732, "grad_norm": 1.0785118341445923, "learning_rate": 1.1983581720055537e-07, "loss": 0.2311, "step": 42650 }, { "epoch": 0.9507746660697933, "grad_norm": 1.065416932106018, "learning_rate": 1.1929599852625872e-07, "loss": 0.2657, "step": 42655 }, { "epoch": 0.9508861154504133, "grad_norm": 0.7923719882965088, "learning_rate": 1.1875739115000506e-07, "loss": 0.3733, "step": 42660 }, { "epoch": 0.9509975648310335, "grad_norm": 0.7212884426116943, "learning_rate": 1.1821999513782268e-07, "loss": 0.3436, "step": 42665 }, { "epoch": 0.9511090142116535, "grad_norm": 0.6976764798164368, "learning_rate": 1.176838105555933e-07, "loss": 0.272, "step": 42670 }, { "epoch": 0.9512204635922736, "grad_norm": 0.42368894815444946, "learning_rate": 1.1714883746904992e-07, "loss": 0.2463, "step": 42675 }, { "epoch": 0.9513319129728937, "grad_norm": 0.6565561294555664, "learning_rate": 1.1661507594377452e-07, "loss": 0.2854, "step": 42680 }, { "epoch": 0.9514433623535137, "grad_norm": 0.4275035560131073, "learning_rate": 1.1608252604520364e-07, "loss": 0.2842, "step": 42685 }, { "epoch": 0.9515548117341338, "grad_norm": 0.9225032925605774, "learning_rate": 1.1555118783862174e-07, "loss": 0.2786, "step": 42690 }, { "epoch": 0.9516662611147538, "grad_norm": 0.5609422922134399, "learning_rate": 1.1502106138916891e-07, "loss": 0.2586, "step": 42695 }, { "epoch": 0.951777710495374, "grad_norm": 0.9946783781051636, "learning_rate": 1.1449214676183429e-07, "loss": 0.2039, "step": 42700 }, { "epoch": 0.951889159875994, "grad_norm": 0.5673030614852905, "learning_rate": 1.1396444402145602e-07, "loss": 0.2096, "step": 42705 }, { "epoch": 0.952000609256614, "grad_norm": 0.5998368263244629, "learning_rate": 1.1343795323272899e-07, "loss": 0.296, "step": 42710 }, { "epoch": 0.9521120586372341, "grad_norm": 0.6101316213607788, "learning_rate": 1.1291267446019716e-07, "loss": 0.2241, "step": 42715 }, { "epoch": 0.9522235080178542, "grad_norm": 0.8632827997207642, "learning_rate": 1.1238860776825456e-07, "loss": 0.2306, "step": 42720 }, { "epoch": 0.9523349573984743, "grad_norm": 0.5945008993148804, "learning_rate": 1.1186575322114868e-07, "loss": 0.3053, "step": 42725 }, { "epoch": 0.9524464067790943, "grad_norm": 0.5753487348556519, "learning_rate": 1.1134411088297603e-07, "loss": 0.2783, "step": 42730 }, { "epoch": 0.9525578561597144, "grad_norm": 0.4323602616786957, "learning_rate": 1.1082368081768657e-07, "loss": 0.3981, "step": 42735 }, { "epoch": 0.9526693055403345, "grad_norm": 0.28826257586479187, "learning_rate": 1.1030446308908038e-07, "loss": 0.1441, "step": 42740 }, { "epoch": 0.9527807549209545, "grad_norm": 0.6044817566871643, "learning_rate": 1.0978645776081098e-07, "loss": 0.2885, "step": 42745 }, { "epoch": 0.9528922043015746, "grad_norm": 0.6922546625137329, "learning_rate": 1.0926966489638202e-07, "loss": 0.3463, "step": 42750 }, { "epoch": 0.9530036536821946, "grad_norm": 0.5683593153953552, "learning_rate": 1.0875408455914726e-07, "loss": 0.2736, "step": 42755 }, { "epoch": 0.9531151030628148, "grad_norm": 0.41599422693252563, "learning_rate": 1.0823971681231171e-07, "loss": 0.3351, "step": 42760 }, { "epoch": 0.9532265524434348, "grad_norm": 0.6246147155761719, "learning_rate": 1.0772656171893603e-07, "loss": 0.2507, "step": 42765 }, { "epoch": 0.9533380018240548, "grad_norm": 0.6976191997528076, "learning_rate": 1.0721461934192545e-07, "loss": 0.3827, "step": 42770 }, { "epoch": 0.953449451204675, "grad_norm": 0.5291700959205627, "learning_rate": 1.06703889744042e-07, "loss": 0.3821, "step": 42775 }, { "epoch": 0.953560900585295, "grad_norm": 0.6558244824409485, "learning_rate": 1.0619437298789781e-07, "loss": 0.2086, "step": 42780 }, { "epoch": 0.9536723499659151, "grad_norm": 0.5279899835586548, "learning_rate": 1.0568606913595514e-07, "loss": 0.2535, "step": 42785 }, { "epoch": 0.9537837993465351, "grad_norm": 0.4647957384586334, "learning_rate": 1.0517897825052858e-07, "loss": 0.2313, "step": 42790 }, { "epoch": 0.9538952487271553, "grad_norm": 0.9641315340995789, "learning_rate": 1.0467310039378287e-07, "loss": 0.2446, "step": 42795 }, { "epoch": 0.9540066981077753, "grad_norm": 0.5629506707191467, "learning_rate": 1.0416843562773393e-07, "loss": 0.2346, "step": 42800 }, { "epoch": 0.9541181474883953, "grad_norm": 0.786285400390625, "learning_rate": 1.0366498401425117e-07, "loss": 0.2091, "step": 42805 }, { "epoch": 0.9542295968690154, "grad_norm": 0.8931853771209717, "learning_rate": 1.0316274561505301e-07, "loss": 0.3457, "step": 42810 }, { "epoch": 0.9543410462496354, "grad_norm": 0.9373008608818054, "learning_rate": 1.026617204917102e-07, "loss": 0.1567, "step": 42815 }, { "epoch": 0.9544524956302556, "grad_norm": 1.3995561599731445, "learning_rate": 1.0216190870564579e-07, "loss": 0.1895, "step": 42820 }, { "epoch": 0.9545639450108756, "grad_norm": 0.3858010470867157, "learning_rate": 1.0166331031813082e-07, "loss": 0.2095, "step": 42825 }, { "epoch": 0.9546753943914956, "grad_norm": 0.8172832131385803, "learning_rate": 1.011659253902908e-07, "loss": 0.3237, "step": 42830 }, { "epoch": 0.9547868437721158, "grad_norm": 0.40116187930107117, "learning_rate": 1.0066975398310141e-07, "loss": 0.2896, "step": 42835 }, { "epoch": 0.9548982931527358, "grad_norm": 0.3348509669303894, "learning_rate": 1.0017479615738957e-07, "loss": 0.266, "step": 42840 }, { "epoch": 0.9550097425333559, "grad_norm": 0.6185810565948486, "learning_rate": 9.968105197383226e-08, "loss": 0.2971, "step": 42845 }, { "epoch": 0.9551211919139759, "grad_norm": 0.8081035614013672, "learning_rate": 9.918852149295777e-08, "loss": 0.2854, "step": 42850 }, { "epoch": 0.955232641294596, "grad_norm": 0.5405545830726624, "learning_rate": 9.869720477514999e-08, "loss": 0.2292, "step": 42855 }, { "epoch": 0.9553440906752161, "grad_norm": 0.4950360953807831, "learning_rate": 9.820710188063854e-08, "loss": 0.3015, "step": 42860 }, { "epoch": 0.9554555400558361, "grad_norm": 0.43373239040374756, "learning_rate": 9.771821286950533e-08, "loss": 0.2929, "step": 42865 }, { "epoch": 0.9555669894364562, "grad_norm": 0.4429517388343811, "learning_rate": 9.723053780168579e-08, "loss": 0.2409, "step": 42870 }, { "epoch": 0.9556784388170763, "grad_norm": 0.7364949584007263, "learning_rate": 9.674407673696429e-08, "loss": 0.2495, "step": 42875 }, { "epoch": 0.9557898881976964, "grad_norm": 0.6135752201080322, "learning_rate": 9.625882973497757e-08, "loss": 0.2881, "step": 42880 }, { "epoch": 0.9559013375783164, "grad_norm": 0.5923545956611633, "learning_rate": 9.577479685521363e-08, "loss": 0.2911, "step": 42885 }, { "epoch": 0.9560127869589364, "grad_norm": 0.4776538908481598, "learning_rate": 9.529197815701052e-08, "loss": 0.3263, "step": 42890 }, { "epoch": 0.9561242363395566, "grad_norm": 0.7010661363601685, "learning_rate": 9.481037369955759e-08, "loss": 0.2749, "step": 42895 }, { "epoch": 0.9562356857201766, "grad_norm": 1.0098576545715332, "learning_rate": 9.43299835418976e-08, "loss": 0.3255, "step": 42900 }, { "epoch": 0.9563471351007967, "grad_norm": 0.6457880139350891, "learning_rate": 9.385080774292122e-08, "loss": 0.2645, "step": 42905 }, { "epoch": 0.9564585844814167, "grad_norm": 0.2925399839878082, "learning_rate": 9.337284636137256e-08, "loss": 0.2653, "step": 42910 }, { "epoch": 0.9565700338620368, "grad_norm": 0.4781745672225952, "learning_rate": 9.289609945584477e-08, "loss": 0.3069, "step": 42915 }, { "epoch": 0.9566814832426569, "grad_norm": 0.5949952006340027, "learning_rate": 9.242056708478442e-08, "loss": 0.3137, "step": 42920 }, { "epoch": 0.9567929326232769, "grad_norm": 0.8055167198181152, "learning_rate": 9.19462493064871e-08, "loss": 0.2474, "step": 42925 }, { "epoch": 0.956904382003897, "grad_norm": 0.38028067350387573, "learning_rate": 9.147314617910186e-08, "loss": 0.2518, "step": 42930 }, { "epoch": 0.9570158313845171, "grad_norm": 0.39675191044807434, "learning_rate": 9.100125776062673e-08, "loss": 0.1767, "step": 42935 }, { "epoch": 0.9571272807651372, "grad_norm": 0.7536404132843018, "learning_rate": 9.053058410891214e-08, "loss": 0.2343, "step": 42940 }, { "epoch": 0.9572387301457572, "grad_norm": 0.5819075107574463, "learning_rate": 9.006112528165855e-08, "loss": 0.3314, "step": 42945 }, { "epoch": 0.9573501795263772, "grad_norm": 0.36783868074417114, "learning_rate": 8.959288133641664e-08, "loss": 0.2105, "step": 42950 }, { "epoch": 0.9574616289069974, "grad_norm": 0.6799458861351013, "learning_rate": 8.912585233059157e-08, "loss": 0.2144, "step": 42955 }, { "epoch": 0.9575730782876174, "grad_norm": 0.6199312806129456, "learning_rate": 8.866003832143644e-08, "loss": 0.1734, "step": 42960 }, { "epoch": 0.9576845276682375, "grad_norm": 0.5303043127059937, "learning_rate": 8.819543936605779e-08, "loss": 0.2384, "step": 42965 }, { "epoch": 0.9577959770488575, "grad_norm": 0.5857964754104614, "learning_rate": 8.773205552141118e-08, "loss": 0.2936, "step": 42970 }, { "epoch": 0.9579074264294776, "grad_norm": 0.6527206301689148, "learning_rate": 8.726988684430227e-08, "loss": 0.2617, "step": 42975 }, { "epoch": 0.9580188758100977, "grad_norm": 0.41805943846702576, "learning_rate": 8.680893339139241e-08, "loss": 0.2858, "step": 42980 }, { "epoch": 0.9581303251907177, "grad_norm": 0.6815401315689087, "learning_rate": 8.63491952191875e-08, "loss": 0.2616, "step": 42985 }, { "epoch": 0.9582417745713379, "grad_norm": 0.588553249835968, "learning_rate": 8.589067238404913e-08, "loss": 0.3657, "step": 42990 }, { "epoch": 0.9583532239519579, "grad_norm": 0.8243670463562012, "learning_rate": 8.543336494219123e-08, "loss": 0.289, "step": 42995 }, { "epoch": 0.9584646733325779, "grad_norm": 0.6533645987510681, "learning_rate": 8.49772729496734e-08, "loss": 0.3099, "step": 43000 }, { "epoch": 0.958576122713198, "grad_norm": 0.7068596482276917, "learning_rate": 8.452239646240867e-08, "loss": 0.2069, "step": 43005 }, { "epoch": 0.958687572093818, "grad_norm": 0.4924030303955078, "learning_rate": 8.406873553616357e-08, "loss": 0.2956, "step": 43010 }, { "epoch": 0.9587990214744382, "grad_norm": 0.646881639957428, "learning_rate": 8.361629022655138e-08, "loss": 0.2566, "step": 43015 }, { "epoch": 0.9589104708550582, "grad_norm": 0.5528478622436523, "learning_rate": 8.316506058903994e-08, "loss": 0.2912, "step": 43020 }, { "epoch": 0.9590219202356783, "grad_norm": 0.6520793437957764, "learning_rate": 8.271504667894503e-08, "loss": 0.2392, "step": 43025 }, { "epoch": 0.9591333696162984, "grad_norm": 0.8448598980903625, "learning_rate": 8.226624855143694e-08, "loss": 0.2468, "step": 43030 }, { "epoch": 0.9592448189969184, "grad_norm": 0.5260225534439087, "learning_rate": 8.181866626153278e-08, "loss": 0.2171, "step": 43035 }, { "epoch": 0.9593562683775385, "grad_norm": 0.4451325237751007, "learning_rate": 8.137229986410422e-08, "loss": 0.3226, "step": 43040 }, { "epoch": 0.9594677177581585, "grad_norm": 0.5130228400230408, "learning_rate": 8.092714941387081e-08, "loss": 0.2164, "step": 43045 }, { "epoch": 0.9595791671387787, "grad_norm": 0.5746123194694519, "learning_rate": 8.048321496540557e-08, "loss": 0.3993, "step": 43050 }, { "epoch": 0.9596906165193987, "grad_norm": 0.4763878583908081, "learning_rate": 8.004049657313162e-08, "loss": 0.2981, "step": 43055 }, { "epoch": 0.9598020659000187, "grad_norm": 0.6170870661735535, "learning_rate": 7.959899429132112e-08, "loss": 0.3722, "step": 43060 }, { "epoch": 0.9599135152806388, "grad_norm": 0.458804726600647, "learning_rate": 7.915870817410188e-08, "loss": 0.3045, "step": 43065 }, { "epoch": 0.9600249646612589, "grad_norm": 0.6858850717544556, "learning_rate": 7.871963827544738e-08, "loss": 0.245, "step": 43070 }, { "epoch": 0.960136414041879, "grad_norm": 0.5968716144561768, "learning_rate": 7.828178464918456e-08, "loss": 0.1815, "step": 43075 }, { "epoch": 0.960247863422499, "grad_norm": 0.9967861771583557, "learning_rate": 7.784514734899052e-08, "loss": 0.3042, "step": 43080 }, { "epoch": 0.9603593128031191, "grad_norm": 0.733881950378418, "learning_rate": 7.740972642839573e-08, "loss": 0.2845, "step": 43085 }, { "epoch": 0.9604707621837392, "grad_norm": 0.4205930233001709, "learning_rate": 7.69755219407764e-08, "loss": 0.28, "step": 43090 }, { "epoch": 0.9605822115643592, "grad_norm": 0.5251030325889587, "learning_rate": 7.65425339393644e-08, "loss": 0.2224, "step": 43095 }, { "epoch": 0.9606936609449793, "grad_norm": 0.43053174018859863, "learning_rate": 7.611076247724058e-08, "loss": 0.2235, "step": 43100 }, { "epoch": 0.9608051103255993, "grad_norm": 0.9452346563339233, "learning_rate": 7.568020760733707e-08, "loss": 0.3017, "step": 43105 }, { "epoch": 0.9609165597062195, "grad_norm": 0.821847140789032, "learning_rate": 7.525086938243498e-08, "loss": 0.2344, "step": 43110 }, { "epoch": 0.9610280090868395, "grad_norm": 0.7619543075561523, "learning_rate": 7.482274785516996e-08, "loss": 0.4167, "step": 43115 }, { "epoch": 0.9611394584674595, "grad_norm": 0.725729763507843, "learning_rate": 7.439584307802449e-08, "loss": 0.2355, "step": 43120 }, { "epoch": 0.9612509078480796, "grad_norm": 0.6464846730232239, "learning_rate": 7.397015510333561e-08, "loss": 0.3668, "step": 43125 }, { "epoch": 0.9613623572286997, "grad_norm": 0.6052849292755127, "learning_rate": 7.35456839832882e-08, "loss": 0.1793, "step": 43130 }, { "epoch": 0.9614738066093198, "grad_norm": 0.510604202747345, "learning_rate": 7.312242976991956e-08, "loss": 0.2486, "step": 43135 }, { "epoch": 0.9615852559899398, "grad_norm": 0.9605926275253296, "learning_rate": 7.270039251511707e-08, "loss": 0.3757, "step": 43140 }, { "epoch": 0.96169670537056, "grad_norm": 0.487678587436676, "learning_rate": 7.227957227061933e-08, "loss": 0.2236, "step": 43145 }, { "epoch": 0.96180815475118, "grad_norm": 0.420016884803772, "learning_rate": 7.185996908801618e-08, "loss": 0.3208, "step": 43150 }, { "epoch": 0.9619196041318, "grad_norm": 0.5586323738098145, "learning_rate": 7.14415830187476e-08, "loss": 0.2396, "step": 43155 }, { "epoch": 0.9620310535124201, "grad_norm": 0.484066903591156, "learning_rate": 7.102441411410366e-08, "loss": 0.2765, "step": 43160 }, { "epoch": 0.9621425028930402, "grad_norm": 0.9337204694747925, "learning_rate": 7.060846242522679e-08, "loss": 0.3034, "step": 43165 }, { "epoch": 0.9622539522736603, "grad_norm": 0.5187270045280457, "learning_rate": 7.019372800310953e-08, "loss": 0.2147, "step": 43170 }, { "epoch": 0.9623654016542803, "grad_norm": 0.6802904605865479, "learning_rate": 6.978021089859454e-08, "loss": 0.1796, "step": 43175 }, { "epoch": 0.9624768510349003, "grad_norm": 0.3484508693218231, "learning_rate": 6.936791116237574e-08, "loss": 0.2456, "step": 43180 }, { "epoch": 0.9625883004155205, "grad_norm": 0.5931937098503113, "learning_rate": 6.895682884499933e-08, "loss": 0.2011, "step": 43185 }, { "epoch": 0.9626997497961405, "grad_norm": 0.8017531037330627, "learning_rate": 6.854696399685945e-08, "loss": 0.309, "step": 43190 }, { "epoch": 0.9628111991767606, "grad_norm": 0.6763916015625, "learning_rate": 6.813831666820259e-08, "loss": 0.2995, "step": 43195 }, { "epoch": 0.9629226485573806, "grad_norm": 0.579896092414856, "learning_rate": 6.773088690912644e-08, "loss": 0.2025, "step": 43200 }, { "epoch": 0.9630340979380007, "grad_norm": 0.6687420606613159, "learning_rate": 6.73246747695766e-08, "loss": 0.2448, "step": 43205 }, { "epoch": 0.9631455473186208, "grad_norm": 0.8000922203063965, "learning_rate": 6.691968029935436e-08, "loss": 0.3776, "step": 43210 }, { "epoch": 0.9632569966992408, "grad_norm": 0.8290566802024841, "learning_rate": 6.651590354810777e-08, "loss": 0.1601, "step": 43215 }, { "epoch": 0.9633684460798609, "grad_norm": 0.5483561754226685, "learning_rate": 6.611334456533725e-08, "loss": 0.2459, "step": 43220 }, { "epoch": 0.963479895460481, "grad_norm": 0.6955622434616089, "learning_rate": 6.57120034003922e-08, "loss": 0.2317, "step": 43225 }, { "epoch": 0.9635913448411011, "grad_norm": 0.43610045313835144, "learning_rate": 6.531188010247436e-08, "loss": 0.2193, "step": 43230 }, { "epoch": 0.9637027942217211, "grad_norm": 0.7788457274436951, "learning_rate": 6.491297472063563e-08, "loss": 0.2353, "step": 43235 }, { "epoch": 0.9638142436023411, "grad_norm": 0.38781026005744934, "learning_rate": 6.45152873037802e-08, "loss": 0.2539, "step": 43240 }, { "epoch": 0.9639256929829613, "grad_norm": 0.6308822631835938, "learning_rate": 6.41188179006591e-08, "loss": 0.2511, "step": 43245 }, { "epoch": 0.9640371423635813, "grad_norm": 0.9062620401382446, "learning_rate": 6.3723566559879e-08, "loss": 0.2205, "step": 43250 }, { "epoch": 0.9641485917442014, "grad_norm": 1.0278260707855225, "learning_rate": 6.332953332989334e-08, "loss": 0.3506, "step": 43255 }, { "epoch": 0.9642600411248214, "grad_norm": 0.5123734474182129, "learning_rate": 6.293671825900571e-08, "loss": 0.282, "step": 43260 }, { "epoch": 0.9643714905054415, "grad_norm": 0.5359479784965515, "learning_rate": 6.254512139537539e-08, "loss": 0.2928, "step": 43265 }, { "epoch": 0.9644829398860616, "grad_norm": 0.8474448323249817, "learning_rate": 6.215474278700839e-08, "loss": 0.2269, "step": 43270 }, { "epoch": 0.9645943892666816, "grad_norm": 0.15656907856464386, "learning_rate": 6.17655824817609e-08, "loss": 0.1414, "step": 43275 }, { "epoch": 0.9647058386473017, "grad_norm": 0.4503397047519684, "learning_rate": 6.13776405273414e-08, "loss": 0.2113, "step": 43280 }, { "epoch": 0.9648172880279218, "grad_norm": 0.4656757414340973, "learning_rate": 6.099091697130965e-08, "loss": 0.2896, "step": 43285 }, { "epoch": 0.9649287374085419, "grad_norm": 0.571235716342926, "learning_rate": 6.060541186107327e-08, "loss": 0.2686, "step": 43290 }, { "epoch": 0.9650401867891619, "grad_norm": 0.5569755434989929, "learning_rate": 6.022112524389223e-08, "loss": 0.3137, "step": 43295 }, { "epoch": 0.965151636169782, "grad_norm": 0.4425262212753296, "learning_rate": 5.983805716687996e-08, "loss": 0.2193, "step": 43300 }, { "epoch": 0.9652630855504021, "grad_norm": 0.7848999500274658, "learning_rate": 5.9456207676993336e-08, "loss": 0.2234, "step": 43305 }, { "epoch": 0.9653745349310221, "grad_norm": 0.9052958488464355, "learning_rate": 5.9075576821048256e-08, "loss": 0.2435, "step": 43310 }, { "epoch": 0.9654859843116422, "grad_norm": 0.7334598898887634, "learning_rate": 5.869616464570516e-08, "loss": 0.2737, "step": 43315 }, { "epoch": 0.9655974336922623, "grad_norm": 0.49490249156951904, "learning_rate": 5.831797119747684e-08, "loss": 0.228, "step": 43320 }, { "epoch": 0.9657088830728823, "grad_norm": 0.7265264987945557, "learning_rate": 5.794099652272622e-08, "loss": 0.284, "step": 43325 }, { "epoch": 0.9658203324535024, "grad_norm": 0.7840932011604309, "learning_rate": 5.756524066766966e-08, "loss": 0.3362, "step": 43330 }, { "epoch": 0.9659317818341224, "grad_norm": 0.35997217893600464, "learning_rate": 5.719070367837032e-08, "loss": 0.155, "step": 43335 }, { "epoch": 0.9660432312147426, "grad_norm": 0.3674221336841583, "learning_rate": 5.681738560074479e-08, "loss": 0.2327, "step": 43340 }, { "epoch": 0.9661546805953626, "grad_norm": 0.549663245677948, "learning_rate": 5.6445286480557583e-08, "loss": 0.2507, "step": 43345 }, { "epoch": 0.9662661299759827, "grad_norm": 0.31489622592926025, "learning_rate": 5.6074406363425534e-08, "loss": 0.4149, "step": 43350 }, { "epoch": 0.9663775793566027, "grad_norm": 0.39642104506492615, "learning_rate": 5.5704745294815624e-08, "loss": 0.3571, "step": 43355 }, { "epoch": 0.9664890287372228, "grad_norm": 0.7653921842575073, "learning_rate": 5.533630332004714e-08, "loss": 0.393, "step": 43360 }, { "epoch": 0.9666004781178429, "grad_norm": 0.5353769659996033, "learning_rate": 5.496908048428618e-08, "loss": 0.2873, "step": 43365 }, { "epoch": 0.9667119274984629, "grad_norm": 0.5851602554321289, "learning_rate": 5.4603076832552284e-08, "loss": 0.1736, "step": 43370 }, { "epoch": 0.966823376879083, "grad_norm": 0.4860190451145172, "learning_rate": 5.423829240971401e-08, "loss": 0.2902, "step": 43375 }, { "epoch": 0.9669348262597031, "grad_norm": 0.8070089221000671, "learning_rate": 5.3874727260491146e-08, "loss": 0.2503, "step": 43380 }, { "epoch": 0.9670462756403231, "grad_norm": 0.6368986964225769, "learning_rate": 5.3512381429455804e-08, "loss": 0.2482, "step": 43385 }, { "epoch": 0.9671577250209432, "grad_norm": 0.4795011579990387, "learning_rate": 5.31512549610258e-08, "loss": 0.2357, "step": 43390 }, { "epoch": 0.9672691744015632, "grad_norm": 0.7794646620750427, "learning_rate": 5.27913478994746e-08, "loss": 0.327, "step": 43395 }, { "epoch": 0.9673806237821834, "grad_norm": 0.5979329347610474, "learning_rate": 5.2432660288924687e-08, "loss": 0.2538, "step": 43400 }, { "epoch": 0.9674920731628034, "grad_norm": 0.7042983770370483, "learning_rate": 5.207519217334645e-08, "loss": 0.2317, "step": 43405 }, { "epoch": 0.9676035225434234, "grad_norm": 0.5814474821090698, "learning_rate": 5.17189435965626e-08, "loss": 0.3646, "step": 43410 }, { "epoch": 0.9677149719240435, "grad_norm": 0.47899243235588074, "learning_rate": 5.136391460224821e-08, "loss": 0.1985, "step": 43415 }, { "epoch": 0.9678264213046636, "grad_norm": 0.6566064953804016, "learning_rate": 5.101010523392513e-08, "loss": 0.3045, "step": 43420 }, { "epoch": 0.9679378706852837, "grad_norm": 0.46647369861602783, "learning_rate": 5.065751553496978e-08, "loss": 0.294, "step": 43425 }, { "epoch": 0.9680493200659037, "grad_norm": 0.5277575254440308, "learning_rate": 5.030614554860647e-08, "loss": 0.2, "step": 43430 }, { "epoch": 0.9681607694465239, "grad_norm": 0.41413211822509766, "learning_rate": 4.9955995317908514e-08, "loss": 0.2468, "step": 43435 }, { "epoch": 0.9682722188271439, "grad_norm": 0.5823618173599243, "learning_rate": 4.96070648858038e-08, "loss": 0.2698, "step": 43440 }, { "epoch": 0.9683836682077639, "grad_norm": 0.4486048221588135, "learning_rate": 4.925935429506701e-08, "loss": 0.377, "step": 43445 }, { "epoch": 0.968495117588384, "grad_norm": 0.6892697215080261, "learning_rate": 4.891286358832626e-08, "loss": 0.3767, "step": 43450 }, { "epoch": 0.968606566969004, "grad_norm": 0.8102115988731384, "learning_rate": 4.8567592808057564e-08, "loss": 0.3368, "step": 43455 }, { "epoch": 0.9687180163496242, "grad_norm": 0.646845817565918, "learning_rate": 4.8223541996588186e-08, "loss": 0.2482, "step": 43460 }, { "epoch": 0.9688294657302442, "grad_norm": 0.42743000388145447, "learning_rate": 4.788071119609772e-08, "loss": 0.2973, "step": 43465 }, { "epoch": 0.9689409151108642, "grad_norm": 0.6461029648780823, "learning_rate": 4.753910044861254e-08, "loss": 0.2869, "step": 43470 }, { "epoch": 0.9690523644914844, "grad_norm": 0.45668455958366394, "learning_rate": 4.719870979601249e-08, "loss": 0.3004, "step": 43475 }, { "epoch": 0.9691638138721044, "grad_norm": 0.9763020277023315, "learning_rate": 4.685953928002751e-08, "loss": 0.2883, "step": 43480 }, { "epoch": 0.9692752632527245, "grad_norm": 0.5899277925491333, "learning_rate": 4.6521588942235466e-08, "loss": 0.2785, "step": 43485 }, { "epoch": 0.9693867126333445, "grad_norm": 0.5160545110702515, "learning_rate": 4.618485882406876e-08, "loss": 0.285, "step": 43490 }, { "epoch": 0.9694981620139647, "grad_norm": 0.6278097033500671, "learning_rate": 4.5849348966805485e-08, "loss": 0.2861, "step": 43495 }, { "epoch": 0.9696096113945847, "grad_norm": 0.45948338508605957, "learning_rate": 4.55150594115783e-08, "loss": 0.3111, "step": 43500 }, { "epoch": 0.9697210607752047, "grad_norm": 0.7423383593559265, "learning_rate": 4.518199019936886e-08, "loss": 0.3179, "step": 43505 }, { "epoch": 0.9698325101558248, "grad_norm": 0.6052937507629395, "learning_rate": 4.485014137100674e-08, "loss": 0.2932, "step": 43510 }, { "epoch": 0.9699439595364449, "grad_norm": 0.5899354219436646, "learning_rate": 4.451951296717605e-08, "loss": 0.1974, "step": 43515 }, { "epoch": 0.970055408917065, "grad_norm": 0.621191143989563, "learning_rate": 4.41901050284077e-08, "loss": 0.3295, "step": 43520 }, { "epoch": 0.970166858297685, "grad_norm": 0.6658492088317871, "learning_rate": 4.386191759508607e-08, "loss": 0.3678, "step": 43525 }, { "epoch": 0.970278307678305, "grad_norm": 0.8935458064079285, "learning_rate": 4.3534950707444515e-08, "loss": 0.2223, "step": 43530 }, { "epoch": 0.9703897570589252, "grad_norm": 0.4643547236919403, "learning_rate": 4.320920440556542e-08, "loss": 0.3042, "step": 43535 }, { "epoch": 0.9705012064395452, "grad_norm": 0.749759316444397, "learning_rate": 4.28846787293824e-08, "loss": 0.2473, "step": 43540 }, { "epoch": 0.9706126558201653, "grad_norm": 0.6472769975662231, "learning_rate": 4.256137371868141e-08, "loss": 0.2965, "step": 43545 }, { "epoch": 0.9707241052007853, "grad_norm": 0.8817629814147949, "learning_rate": 4.2239289413096296e-08, "loss": 0.1953, "step": 43550 }, { "epoch": 0.9708355545814055, "grad_norm": 0.7503361105918884, "learning_rate": 4.1918425852111034e-08, "loss": 0.2381, "step": 43555 }, { "epoch": 0.9709470039620255, "grad_norm": 0.524804413318634, "learning_rate": 4.1598783075064154e-08, "loss": 0.2617, "step": 43560 }, { "epoch": 0.9710584533426455, "grad_norm": 0.804250955581665, "learning_rate": 4.1280361121137645e-08, "loss": 0.0857, "step": 43565 }, { "epoch": 0.9711699027232656, "grad_norm": 0.9102737903594971, "learning_rate": 4.0963160029370284e-08, "loss": 0.2496, "step": 43570 }, { "epoch": 0.9712813521038857, "grad_norm": 0.7096487283706665, "learning_rate": 4.064717983864763e-08, "loss": 0.3119, "step": 43575 }, { "epoch": 0.9713928014845058, "grad_norm": 0.6348690390586853, "learning_rate": 4.033242058770648e-08, "loss": 0.253, "step": 43580 }, { "epoch": 0.9715042508651258, "grad_norm": 0.6706966161727905, "learning_rate": 4.0018882315132624e-08, "loss": 0.3657, "step": 43585 }, { "epoch": 0.9716157002457458, "grad_norm": 0.6501227617263794, "learning_rate": 3.970656505936532e-08, "loss": 0.2629, "step": 43590 }, { "epoch": 0.971727149626366, "grad_norm": 0.6011533737182617, "learning_rate": 3.939546885869172e-08, "loss": 0.3037, "step": 43595 }, { "epoch": 0.971838599006986, "grad_norm": 0.7536007165908813, "learning_rate": 3.9085593751249094e-08, "loss": 0.3844, "step": 43600 }, { "epoch": 0.9719500483876061, "grad_norm": 0.8846654891967773, "learning_rate": 3.877693977502594e-08, "loss": 0.2288, "step": 43605 }, { "epoch": 0.9720614977682261, "grad_norm": 0.35863903164863586, "learning_rate": 3.8469506967862e-08, "loss": 0.2713, "step": 43610 }, { "epoch": 0.9721729471488462, "grad_norm": 0.658648669719696, "learning_rate": 3.81632953674449e-08, "loss": 0.2827, "step": 43615 }, { "epoch": 0.9722843965294663, "grad_norm": 0.6056036353111267, "learning_rate": 3.78583050113146e-08, "loss": 0.201, "step": 43620 }, { "epoch": 0.9723958459100863, "grad_norm": 0.6864349842071533, "learning_rate": 3.755453593685898e-08, "loss": 0.3008, "step": 43625 }, { "epoch": 0.9725072952907065, "grad_norm": 0.8081383109092712, "learning_rate": 3.7251988181319365e-08, "loss": 0.3825, "step": 43630 }, { "epoch": 0.9726187446713265, "grad_norm": 0.3625475764274597, "learning_rate": 3.695066178178608e-08, "loss": 0.2838, "step": 43635 }, { "epoch": 0.9727301940519466, "grad_norm": 0.6773290038108826, "learning_rate": 3.6650556775198464e-08, "loss": 0.2896, "step": 43640 }, { "epoch": 0.9728416434325666, "grad_norm": 0.603001594543457, "learning_rate": 3.635167319834709e-08, "loss": 0.2838, "step": 43645 }, { "epoch": 0.9729530928131866, "grad_norm": 1.0788512229919434, "learning_rate": 3.6054011087873765e-08, "loss": 0.3954, "step": 43650 }, { "epoch": 0.9730645421938068, "grad_norm": 0.49225085973739624, "learning_rate": 3.575757048026818e-08, "loss": 0.3231, "step": 43655 }, { "epoch": 0.9731759915744268, "grad_norm": 0.5131734013557434, "learning_rate": 3.546235141187238e-08, "loss": 0.286, "step": 43660 }, { "epoch": 0.9732874409550469, "grad_norm": 0.43520259857177734, "learning_rate": 3.5168353918877406e-08, "loss": 0.3706, "step": 43665 }, { "epoch": 0.973398890335667, "grad_norm": 0.9183757901191711, "learning_rate": 3.487557803732555e-08, "loss": 0.3376, "step": 43670 }, { "epoch": 0.973510339716287, "grad_norm": 0.5258111953735352, "learning_rate": 3.458402380310921e-08, "loss": 0.2385, "step": 43675 }, { "epoch": 0.9736217890969071, "grad_norm": 0.6289880871772766, "learning_rate": 3.429369125197091e-08, "loss": 0.2567, "step": 43680 }, { "epoch": 0.9737332384775271, "grad_norm": 0.3663708567619324, "learning_rate": 3.4004580419502164e-08, "loss": 0.3914, "step": 43685 }, { "epoch": 0.9738446878581473, "grad_norm": 0.5350440740585327, "learning_rate": 3.3716691341146855e-08, "loss": 0.3602, "step": 43690 }, { "epoch": 0.9739561372387673, "grad_norm": 0.33239659667015076, "learning_rate": 3.343002405219564e-08, "loss": 0.294, "step": 43695 }, { "epoch": 0.9740675866193874, "grad_norm": 0.9363645315170288, "learning_rate": 3.314457858779485e-08, "loss": 0.3205, "step": 43700 }, { "epoch": 0.9741790360000074, "grad_norm": 0.6830964088439941, "learning_rate": 3.2860354982935385e-08, "loss": 0.2822, "step": 43705 }, { "epoch": 0.9742904853806275, "grad_norm": 0.8132318258285522, "learning_rate": 3.25773532724627e-08, "loss": 0.273, "step": 43710 }, { "epoch": 0.9744019347612476, "grad_norm": 0.7080520987510681, "learning_rate": 3.2295573491070157e-08, "loss": 0.3019, "step": 43715 }, { "epoch": 0.9745133841418676, "grad_norm": 0.9034405946731567, "learning_rate": 3.201501567330012e-08, "loss": 0.2615, "step": 43720 }, { "epoch": 0.9746248335224877, "grad_norm": 0.5081268548965454, "learning_rate": 3.173567985354842e-08, "loss": 0.2833, "step": 43725 }, { "epoch": 0.9747362829031078, "grad_norm": 0.7102012038230896, "learning_rate": 3.145756606605988e-08, "loss": 0.2291, "step": 43730 }, { "epoch": 0.9748477322837278, "grad_norm": 0.8138337135314941, "learning_rate": 3.118067434492833e-08, "loss": 0.29, "step": 43735 }, { "epoch": 0.9749591816643479, "grad_norm": 0.2943519055843353, "learning_rate": 3.090500472409774e-08, "loss": 0.3336, "step": 43740 }, { "epoch": 0.9750706310449679, "grad_norm": 0.5932673215866089, "learning_rate": 3.0630557237365524e-08, "loss": 0.3135, "step": 43745 }, { "epoch": 0.9751820804255881, "grad_norm": 0.9085009694099426, "learning_rate": 3.0357331918373644e-08, "loss": 0.2403, "step": 43750 }, { "epoch": 0.9752935298062081, "grad_norm": 0.5600362420082092, "learning_rate": 3.0085328800619763e-08, "loss": 0.2085, "step": 43755 }, { "epoch": 0.9754049791868281, "grad_norm": 0.8048264384269714, "learning_rate": 2.981454791744831e-08, "loss": 0.3457, "step": 43760 }, { "epoch": 0.9755164285674482, "grad_norm": 0.5526010394096375, "learning_rate": 2.9544989302056072e-08, "loss": 0.2698, "step": 43765 }, { "epoch": 0.9756278779480683, "grad_norm": 0.5081197023391724, "learning_rate": 2.927665298748772e-08, "loss": 0.2491, "step": 43770 }, { "epoch": 0.9757393273286884, "grad_norm": 0.5824170708656311, "learning_rate": 2.9009539006639165e-08, "loss": 0.2384, "step": 43775 }, { "epoch": 0.9758507767093084, "grad_norm": 1.076564908027649, "learning_rate": 2.8743647392257546e-08, "loss": 0.1521, "step": 43780 }, { "epoch": 0.9759622260899286, "grad_norm": 0.5411296486854553, "learning_rate": 2.84789781769379e-08, "loss": 0.3039, "step": 43785 }, { "epoch": 0.9760736754705486, "grad_norm": 0.696861982345581, "learning_rate": 2.8215531393126495e-08, "loss": 0.3149, "step": 43790 }, { "epoch": 0.9761851248511686, "grad_norm": 0.43493250012397766, "learning_rate": 2.7953307073121936e-08, "loss": 0.2139, "step": 43795 }, { "epoch": 0.9762965742317887, "grad_norm": 0.5109127759933472, "learning_rate": 2.7692305249068518e-08, "loss": 0.2602, "step": 43800 }, { "epoch": 0.9764080236124087, "grad_norm": 0.7707669734954834, "learning_rate": 2.7432525952965084e-08, "loss": 0.1913, "step": 43805 }, { "epoch": 0.9765194729930289, "grad_norm": 0.8124564290046692, "learning_rate": 2.717396921665727e-08, "loss": 0.265, "step": 43810 }, { "epoch": 0.9766309223736489, "grad_norm": 0.5128911137580872, "learning_rate": 2.6916635071841945e-08, "loss": 0.2216, "step": 43815 }, { "epoch": 0.9767423717542689, "grad_norm": 0.8029731512069702, "learning_rate": 2.666052355006721e-08, "loss": 0.3493, "step": 43820 }, { "epoch": 0.976853821134889, "grad_norm": 0.5877036452293396, "learning_rate": 2.6405634682729054e-08, "loss": 0.1946, "step": 43825 }, { "epoch": 0.9769652705155091, "grad_norm": 0.714207112789154, "learning_rate": 2.615196850107693e-08, "loss": 0.3386, "step": 43830 }, { "epoch": 0.9770767198961292, "grad_norm": 0.5350043773651123, "learning_rate": 2.5899525036207073e-08, "loss": 0.2033, "step": 43835 }, { "epoch": 0.9771881692767492, "grad_norm": 0.7569406628608704, "learning_rate": 2.5648304319065843e-08, "loss": 0.2975, "step": 43840 }, { "epoch": 0.9772996186573694, "grad_norm": 0.7935879826545715, "learning_rate": 2.539830638045415e-08, "loss": 0.3681, "step": 43845 }, { "epoch": 0.9774110680379894, "grad_norm": 0.6853658556938171, "learning_rate": 2.514953125101638e-08, "loss": 0.1598, "step": 43850 }, { "epoch": 0.9775225174186094, "grad_norm": 0.45932871103286743, "learning_rate": 2.4901978961253682e-08, "loss": 0.26, "step": 43855 }, { "epoch": 0.9776339667992295, "grad_norm": 0.8988186120986938, "learning_rate": 2.4655649541510674e-08, "loss": 0.3586, "step": 43860 }, { "epoch": 0.9777454161798496, "grad_norm": 0.6980366110801697, "learning_rate": 2.4410543021988753e-08, "loss": 0.2054, "step": 43865 }, { "epoch": 0.9778568655604697, "grad_norm": 0.4933127164840698, "learning_rate": 2.4166659432733884e-08, "loss": 0.1948, "step": 43870 }, { "epoch": 0.9779683149410897, "grad_norm": 0.6166961789131165, "learning_rate": 2.392399880364438e-08, "loss": 0.2251, "step": 43875 }, { "epoch": 0.9780797643217097, "grad_norm": 0.7168357968330383, "learning_rate": 2.3682561164469764e-08, "loss": 0.2675, "step": 43880 }, { "epoch": 0.9781912137023299, "grad_norm": 1.0506415367126465, "learning_rate": 2.3442346544807482e-08, "loss": 0.2907, "step": 43885 }, { "epoch": 0.9783026630829499, "grad_norm": 0.6322489380836487, "learning_rate": 2.3203354974107305e-08, "loss": 0.2523, "step": 43890 }, { "epoch": 0.97841411246357, "grad_norm": 0.29705050587654114, "learning_rate": 2.2965586481665804e-08, "loss": 0.2348, "step": 43895 }, { "epoch": 0.97852556184419, "grad_norm": 0.4134402275085449, "learning_rate": 2.2729041096632987e-08, "loss": 0.3822, "step": 43900 }, { "epoch": 0.9786370112248102, "grad_norm": 0.9109967947006226, "learning_rate": 2.2493718848006773e-08, "loss": 0.3193, "step": 43905 }, { "epoch": 0.9787484606054302, "grad_norm": 0.6592934727668762, "learning_rate": 2.22596197646352e-08, "loss": 0.3314, "step": 43910 }, { "epoch": 0.9788599099860502, "grad_norm": 0.9526196122169495, "learning_rate": 2.2026743875218637e-08, "loss": 0.2137, "step": 43915 }, { "epoch": 0.9789713593666703, "grad_norm": 0.8369274735450745, "learning_rate": 2.1795091208305363e-08, "loss": 0.1951, "step": 43920 }, { "epoch": 0.9790828087472904, "grad_norm": 0.6565558910369873, "learning_rate": 2.1564661792293773e-08, "loss": 0.2329, "step": 43925 }, { "epoch": 0.9791942581279105, "grad_norm": 0.7436085343360901, "learning_rate": 2.133545565543349e-08, "loss": 0.2862, "step": 43930 }, { "epoch": 0.9793057075085305, "grad_norm": 0.6626237630844116, "learning_rate": 2.110747282582204e-08, "loss": 0.2831, "step": 43935 }, { "epoch": 0.9794171568891505, "grad_norm": 1.3161259889602661, "learning_rate": 2.0880713331410397e-08, "loss": 0.3011, "step": 43940 }, { "epoch": 0.9795286062697707, "grad_norm": 0.7118861675262451, "learning_rate": 2.0655177199995214e-08, "loss": 0.273, "step": 43945 }, { "epoch": 0.9796400556503907, "grad_norm": 0.583651065826416, "learning_rate": 2.0430864459226594e-08, "loss": 0.31, "step": 43950 }, { "epoch": 0.9797515050310108, "grad_norm": 0.956254243850708, "learning_rate": 2.020777513660366e-08, "loss": 0.3354, "step": 43955 }, { "epoch": 0.9798629544116308, "grad_norm": 0.6450993418693542, "learning_rate": 1.9985909259475635e-08, "loss": 0.2298, "step": 43960 }, { "epoch": 0.9799744037922509, "grad_norm": 0.8870912790298462, "learning_rate": 1.9765266855041876e-08, "loss": 0.3602, "step": 43965 }, { "epoch": 0.980085853172871, "grad_norm": 0.7856428623199463, "learning_rate": 1.9545847950349638e-08, "loss": 0.2129, "step": 43970 }, { "epoch": 0.980197302553491, "grad_norm": 0.4291646480560303, "learning_rate": 1.9327652572299628e-08, "loss": 0.1795, "step": 43975 }, { "epoch": 0.9803087519341112, "grad_norm": 0.5906414985656738, "learning_rate": 1.9110680747640442e-08, "loss": 0.2781, "step": 43980 }, { "epoch": 0.9804202013147312, "grad_norm": 0.8675142526626587, "learning_rate": 1.8894932502970807e-08, "loss": 0.3785, "step": 43985 }, { "epoch": 0.9805316506953513, "grad_norm": 0.3896576166152954, "learning_rate": 1.868040786474068e-08, "loss": 0.2942, "step": 43990 }, { "epoch": 0.9806431000759713, "grad_norm": 0.6647374629974365, "learning_rate": 1.8467106859247907e-08, "loss": 0.3858, "step": 43995 }, { "epoch": 0.9807545494565914, "grad_norm": 0.6497798562049866, "learning_rate": 1.8255029512642686e-08, "loss": 0.1956, "step": 44000 }, { "epoch": 0.9808659988372115, "grad_norm": 0.571378231048584, "learning_rate": 1.8044175850924215e-08, "loss": 0.2705, "step": 44005 }, { "epoch": 0.9809774482178315, "grad_norm": 0.7403730750083923, "learning_rate": 1.7834545899939602e-08, "loss": 0.4463, "step": 44010 }, { "epoch": 0.9810888975984516, "grad_norm": 0.8009006381034851, "learning_rate": 1.7626139685389398e-08, "loss": 0.2168, "step": 44015 }, { "epoch": 0.9812003469790717, "grad_norm": 0.5484341979026794, "learning_rate": 1.7418957232823164e-08, "loss": 0.2656, "step": 44020 }, { "epoch": 0.9813117963596917, "grad_norm": 0.258148193359375, "learning_rate": 1.7212998567639473e-08, "loss": 0.2093, "step": 44025 }, { "epoch": 0.9814232457403118, "grad_norm": 0.6334442496299744, "learning_rate": 1.7008263715085904e-08, "loss": 0.2161, "step": 44030 }, { "epoch": 0.9815346951209318, "grad_norm": 0.43233025074005127, "learning_rate": 1.6804752700262385e-08, "loss": 0.3004, "step": 44035 }, { "epoch": 0.981646144501552, "grad_norm": 0.5253536105155945, "learning_rate": 1.660246554811784e-08, "loss": 0.1689, "step": 44040 }, { "epoch": 0.981757593882172, "grad_norm": 0.5353971719741821, "learning_rate": 1.640140228345133e-08, "loss": 0.275, "step": 44045 }, { "epoch": 0.9818690432627921, "grad_norm": 0.4450036287307739, "learning_rate": 1.620156293091091e-08, "loss": 0.328, "step": 44050 }, { "epoch": 0.9819804926434121, "grad_norm": 0.76844722032547, "learning_rate": 1.600294751499587e-08, "loss": 0.3037, "step": 44055 }, { "epoch": 0.9820919420240322, "grad_norm": 0.965364396572113, "learning_rate": 1.5805556060054517e-08, "loss": 0.2984, "step": 44060 }, { "epoch": 0.9822033914046523, "grad_norm": 0.49304118752479553, "learning_rate": 1.5609388590286378e-08, "loss": 0.2602, "step": 44065 }, { "epoch": 0.9823148407852723, "grad_norm": 0.8976914882659912, "learning_rate": 1.5414445129739998e-08, "loss": 0.2268, "step": 44070 }, { "epoch": 0.9824262901658924, "grad_norm": 0.7301560640335083, "learning_rate": 1.522072570231292e-08, "loss": 0.2517, "step": 44075 }, { "epoch": 0.9825377395465125, "grad_norm": 1.5539482831954956, "learning_rate": 1.5028230331753935e-08, "loss": 0.2661, "step": 44080 }, { "epoch": 0.9826491889271325, "grad_norm": 0.6516464352607727, "learning_rate": 1.4836959041661935e-08, "loss": 0.3588, "step": 44085 }, { "epoch": 0.9827606383077526, "grad_norm": 0.5414305925369263, "learning_rate": 1.4646911855484836e-08, "loss": 0.2876, "step": 44090 }, { "epoch": 0.9828720876883726, "grad_norm": 0.52645343542099, "learning_rate": 1.4458088796521775e-08, "loss": 0.2282, "step": 44095 }, { "epoch": 0.9829835370689928, "grad_norm": 0.4100842773914337, "learning_rate": 1.4270489887919792e-08, "loss": 0.3874, "step": 44100 }, { "epoch": 0.9830949864496128, "grad_norm": 0.6244540214538574, "learning_rate": 1.4084115152679379e-08, "loss": 0.2705, "step": 44105 }, { "epoch": 0.9832064358302329, "grad_norm": 0.6012434959411621, "learning_rate": 1.3898964613645593e-08, "loss": 0.3067, "step": 44110 }, { "epoch": 0.983317885210853, "grad_norm": 0.5650272369384766, "learning_rate": 1.3715038293518057e-08, "loss": 0.3253, "step": 44115 }, { "epoch": 0.983429334591473, "grad_norm": 0.7158342003822327, "learning_rate": 1.3532336214844288e-08, "loss": 0.3328, "step": 44120 }, { "epoch": 0.9835407839720931, "grad_norm": 0.40258294343948364, "learning_rate": 1.3350858400023036e-08, "loss": 0.3134, "step": 44125 }, { "epoch": 0.9836522333527131, "grad_norm": 0.8680897355079651, "learning_rate": 1.3170604871300951e-08, "loss": 0.3529, "step": 44130 }, { "epoch": 0.9837636827333333, "grad_norm": 1.1211825609207153, "learning_rate": 1.2991575650777021e-08, "loss": 0.2747, "step": 44135 }, { "epoch": 0.9838751321139533, "grad_norm": 0.41539865732192993, "learning_rate": 1.2813770760397027e-08, "loss": 0.3315, "step": 44140 }, { "epoch": 0.9839865814945733, "grad_norm": 0.3326054811477661, "learning_rate": 1.2637190221960193e-08, "loss": 0.2775, "step": 44145 }, { "epoch": 0.9840980308751934, "grad_norm": 0.5752367377281189, "learning_rate": 1.2461834057112543e-08, "loss": 0.32, "step": 44150 }, { "epoch": 0.9842094802558135, "grad_norm": 0.6004331111907959, "learning_rate": 1.2287702287352432e-08, "loss": 0.2507, "step": 44155 }, { "epoch": 0.9843209296364336, "grad_norm": 0.396384596824646, "learning_rate": 1.211479493402723e-08, "loss": 0.1781, "step": 44160 }, { "epoch": 0.9844323790170536, "grad_norm": 1.2847353219985962, "learning_rate": 1.1943112018332204e-08, "loss": 0.1996, "step": 44165 }, { "epoch": 0.9845438283976736, "grad_norm": 0.5534709692001343, "learning_rate": 1.1772653561317183e-08, "loss": 0.2638, "step": 44170 }, { "epoch": 0.9846552777782938, "grad_norm": 1.1106151342391968, "learning_rate": 1.1603419583876564e-08, "loss": 0.2106, "step": 44175 }, { "epoch": 0.9847667271589138, "grad_norm": 0.5935094952583313, "learning_rate": 1.1435410106758194e-08, "loss": 0.1783, "step": 44180 }, { "epoch": 0.9848781765395339, "grad_norm": 0.6666005253791809, "learning_rate": 1.1268625150558931e-08, "loss": 0.1647, "step": 44185 }, { "epoch": 0.9849896259201539, "grad_norm": 0.5934476852416992, "learning_rate": 1.1103064735725755e-08, "loss": 0.2673, "step": 44190 }, { "epoch": 0.9851010753007741, "grad_norm": 0.2453710436820984, "learning_rate": 1.0938728882553539e-08, "loss": 0.2299, "step": 44195 }, { "epoch": 0.9852125246813941, "grad_norm": 0.5756257176399231, "learning_rate": 1.0775617611189504e-08, "loss": 0.2414, "step": 44200 }, { "epoch": 0.9853239740620141, "grad_norm": 0.19844235479831696, "learning_rate": 1.0613730941629875e-08, "loss": 0.2291, "step": 44205 }, { "epoch": 0.9854354234426342, "grad_norm": 0.5764844417572021, "learning_rate": 1.0453068893720998e-08, "loss": 0.2601, "step": 44210 }, { "epoch": 0.9855468728232543, "grad_norm": 0.5215807557106018, "learning_rate": 1.0293631487157119e-08, "loss": 0.3564, "step": 44215 }, { "epoch": 0.9856583222038744, "grad_norm": 0.506155788898468, "learning_rate": 1.0135418741487046e-08, "loss": 0.1787, "step": 44220 }, { "epoch": 0.9857697715844944, "grad_norm": 0.3792957663536072, "learning_rate": 9.978430676103047e-09, "loss": 0.2132, "step": 44225 }, { "epoch": 0.9858812209651144, "grad_norm": 0.5018007755279541, "learning_rate": 9.822667310253054e-09, "loss": 0.2947, "step": 44230 }, { "epoch": 0.9859926703457346, "grad_norm": 0.5258249044418335, "learning_rate": 9.668128663031795e-09, "loss": 0.3304, "step": 44235 }, { "epoch": 0.9861041197263546, "grad_norm": 0.6357218623161316, "learning_rate": 9.514814753385226e-09, "loss": 0.2847, "step": 44240 }, { "epoch": 0.9862155691069747, "grad_norm": 0.7918208837509155, "learning_rate": 9.362725600106094e-09, "loss": 0.2303, "step": 44245 }, { "epoch": 0.9863270184875947, "grad_norm": 0.6385669708251953, "learning_rate": 9.2118612218417e-09, "loss": 0.2983, "step": 44250 }, { "epoch": 0.9864384678682149, "grad_norm": 0.8675650954246521, "learning_rate": 9.062221637086143e-09, "loss": 0.2412, "step": 44255 }, { "epoch": 0.9865499172488349, "grad_norm": 0.5685170888900757, "learning_rate": 8.913806864183638e-09, "loss": 0.2755, "step": 44260 }, { "epoch": 0.9866613666294549, "grad_norm": 1.0873620510101318, "learning_rate": 8.76661692132963e-09, "loss": 0.3299, "step": 44265 }, { "epoch": 0.986772816010075, "grad_norm": 0.7357842326164246, "learning_rate": 8.620651826567461e-09, "loss": 0.3163, "step": 44270 }, { "epoch": 0.9868842653906951, "grad_norm": 0.8572162389755249, "learning_rate": 8.475911597792818e-09, "loss": 0.268, "step": 44275 }, { "epoch": 0.9869957147713152, "grad_norm": 0.34092795848846436, "learning_rate": 8.332396252747066e-09, "loss": 0.2398, "step": 44280 }, { "epoch": 0.9871071641519352, "grad_norm": 0.5856196284294128, "learning_rate": 8.190105809026127e-09, "loss": 0.3095, "step": 44285 }, { "epoch": 0.9872186135325552, "grad_norm": 0.6363946795463562, "learning_rate": 8.049040284073828e-09, "loss": 0.2011, "step": 44290 }, { "epoch": 0.9873300629131754, "grad_norm": 0.6171948313713074, "learning_rate": 7.909199695183e-09, "loss": 0.356, "step": 44295 }, { "epoch": 0.9874415122937954, "grad_norm": 0.5732865929603577, "learning_rate": 7.77058405949771e-09, "loss": 0.3505, "step": 44300 }, { "epoch": 0.9875529616744155, "grad_norm": 0.724398672580719, "learning_rate": 7.633193394009919e-09, "loss": 0.1241, "step": 44305 }, { "epoch": 0.9876644110550356, "grad_norm": 0.7275500893592834, "learning_rate": 7.49702771556282e-09, "loss": 0.277, "step": 44310 }, { "epoch": 0.9877758604356557, "grad_norm": 0.48448437452316284, "learning_rate": 7.36208704085084e-09, "loss": 0.1904, "step": 44315 }, { "epoch": 0.9878873098162757, "grad_norm": 0.7425686120986938, "learning_rate": 7.228371386415189e-09, "loss": 0.3454, "step": 44320 }, { "epoch": 0.9879987591968957, "grad_norm": 0.725597083568573, "learning_rate": 7.095880768649422e-09, "loss": 0.3361, "step": 44325 }, { "epoch": 0.9881102085775159, "grad_norm": 0.9583325386047363, "learning_rate": 6.9646152037949884e-09, "loss": 0.2904, "step": 44330 }, { "epoch": 0.9882216579581359, "grad_norm": 0.633183479309082, "learning_rate": 6.834574707943464e-09, "loss": 0.451, "step": 44335 }, { "epoch": 0.988333107338756, "grad_norm": 0.7976347208023071, "learning_rate": 6.705759297038761e-09, "loss": 0.2779, "step": 44340 }, { "epoch": 0.988444556719376, "grad_norm": 0.5813356637954712, "learning_rate": 6.57816898687158e-09, "loss": 0.2151, "step": 44345 }, { "epoch": 0.988556006099996, "grad_norm": 0.7804527282714844, "learning_rate": 6.451803793082745e-09, "loss": 0.2853, "step": 44350 }, { "epoch": 0.9886674554806162, "grad_norm": 0.839613676071167, "learning_rate": 6.3266637311654164e-09, "loss": 0.2664, "step": 44355 }, { "epoch": 0.9887789048612362, "grad_norm": 0.3687819540500641, "learning_rate": 6.202748816458437e-09, "loss": 0.2546, "step": 44360 }, { "epoch": 0.9888903542418563, "grad_norm": 0.5957117676734924, "learning_rate": 6.080059064155208e-09, "loss": 0.3163, "step": 44365 }, { "epoch": 0.9890018036224764, "grad_norm": 0.45797082781791687, "learning_rate": 5.958594489295921e-09, "loss": 0.2898, "step": 44370 }, { "epoch": 0.9891132530030964, "grad_norm": 0.7213388085365295, "learning_rate": 5.8383551067697774e-09, "loss": 0.1246, "step": 44375 }, { "epoch": 0.9892247023837165, "grad_norm": 0.46411189436912537, "learning_rate": 5.719340931318318e-09, "loss": 0.3271, "step": 44380 }, { "epoch": 0.9893361517643365, "grad_norm": 0.6343370676040649, "learning_rate": 5.6015519775320935e-09, "loss": 0.1754, "step": 44385 }, { "epoch": 0.9894476011449567, "grad_norm": 0.2115870714187622, "learning_rate": 5.484988259850665e-09, "loss": 0.2141, "step": 44390 }, { "epoch": 0.9895590505255767, "grad_norm": 0.626330554485321, "learning_rate": 5.369649792563714e-09, "loss": 0.3045, "step": 44395 }, { "epoch": 0.9896704999061968, "grad_norm": 0.6680053472518921, "learning_rate": 5.255536589811039e-09, "loss": 0.2344, "step": 44400 }, { "epoch": 0.9897819492868168, "grad_norm": 0.4205387532711029, "learning_rate": 5.142648665581451e-09, "loss": 0.2797, "step": 44405 }, { "epoch": 0.9898933986674369, "grad_norm": 0.6423759460449219, "learning_rate": 5.030986033714991e-09, "loss": 0.3676, "step": 44410 }, { "epoch": 0.990004848048057, "grad_norm": 0.3676292598247528, "learning_rate": 4.920548707900707e-09, "loss": 0.2164, "step": 44415 }, { "epoch": 0.990116297428677, "grad_norm": 0.5738866925239563, "learning_rate": 4.811336701676661e-09, "loss": 0.2332, "step": 44420 }, { "epoch": 0.9902277468092971, "grad_norm": 0.475180447101593, "learning_rate": 4.703350028432141e-09, "loss": 0.1915, "step": 44425 }, { "epoch": 0.9903391961899172, "grad_norm": 0.811890184879303, "learning_rate": 4.596588701404336e-09, "loss": 0.3585, "step": 44430 }, { "epoch": 0.9904506455705372, "grad_norm": 0.5211836695671082, "learning_rate": 4.491052733682777e-09, "loss": 0.2742, "step": 44435 }, { "epoch": 0.9905620949511573, "grad_norm": 1.0613921880722046, "learning_rate": 4.386742138203781e-09, "loss": 0.1681, "step": 44440 }, { "epoch": 0.9906735443317773, "grad_norm": 0.4034428596496582, "learning_rate": 4.283656927757119e-09, "loss": 0.2306, "step": 44445 }, { "epoch": 0.9907849937123975, "grad_norm": 0.4636722505092621, "learning_rate": 4.181797114978236e-09, "loss": 0.2559, "step": 44450 }, { "epoch": 0.9908964430930175, "grad_norm": 0.7295843362808228, "learning_rate": 4.081162712354924e-09, "loss": 0.2294, "step": 44455 }, { "epoch": 0.9910078924736376, "grad_norm": 0.5972443222999573, "learning_rate": 3.981753732225091e-09, "loss": 0.1937, "step": 44460 }, { "epoch": 0.9911193418542577, "grad_norm": 0.43710991740226746, "learning_rate": 3.883570186774543e-09, "loss": 0.2792, "step": 44465 }, { "epoch": 0.9912307912348777, "grad_norm": 0.8411343097686768, "learning_rate": 3.78661208804032e-09, "loss": 0.2378, "step": 44470 }, { "epoch": 0.9913422406154978, "grad_norm": 0.7043887972831726, "learning_rate": 3.6908794479084687e-09, "loss": 0.2732, "step": 44475 }, { "epoch": 0.9914536899961178, "grad_norm": 0.6706881523132324, "learning_rate": 3.5963722781151568e-09, "loss": 0.2515, "step": 44480 }, { "epoch": 0.991565139376738, "grad_norm": 0.6557788252830505, "learning_rate": 3.5030905902455615e-09, "loss": 0.2542, "step": 44485 }, { "epoch": 0.991676588757358, "grad_norm": 0.7045385837554932, "learning_rate": 3.4110343957360904e-09, "loss": 0.2377, "step": 44490 }, { "epoch": 0.991788038137978, "grad_norm": 0.5724929571151733, "learning_rate": 3.3202037058732707e-09, "loss": 0.2809, "step": 44495 }, { "epoch": 0.9918994875185981, "grad_norm": 0.7064734697341919, "learning_rate": 3.2305985317893086e-09, "loss": 0.3664, "step": 44500 }, { "epoch": 0.9920109368992182, "grad_norm": 0.8698769211769104, "learning_rate": 3.142218884472081e-09, "loss": 0.1696, "step": 44505 }, { "epoch": 0.9921223862798383, "grad_norm": 0.723349392414093, "learning_rate": 3.0550647747540352e-09, "loss": 0.2655, "step": 44510 }, { "epoch": 0.9922338356604583, "grad_norm": 0.5983314514160156, "learning_rate": 2.9691362133210667e-09, "loss": 0.3201, "step": 44515 }, { "epoch": 0.9923452850410783, "grad_norm": 0.3199304938316345, "learning_rate": 2.8844332107058615e-09, "loss": 0.2649, "step": 44520 }, { "epoch": 0.9924567344216985, "grad_norm": 0.6981607675552368, "learning_rate": 2.800955777293446e-09, "loss": 0.3955, "step": 44525 }, { "epoch": 0.9925681838023185, "grad_norm": 1.2841216325759888, "learning_rate": 2.718703923317856e-09, "loss": 0.1887, "step": 44530 }, { "epoch": 0.9926796331829386, "grad_norm": 0.7746221423149109, "learning_rate": 2.637677658862137e-09, "loss": 0.2562, "step": 44535 }, { "epoch": 0.9927910825635586, "grad_norm": 0.929040253162384, "learning_rate": 2.557876993859454e-09, "loss": 0.3289, "step": 44540 }, { "epoch": 0.9929025319441788, "grad_norm": 0.49944812059402466, "learning_rate": 2.4793019380919825e-09, "loss": 0.3591, "step": 44545 }, { "epoch": 0.9930139813247988, "grad_norm": 0.46589112281799316, "learning_rate": 2.4019525011931277e-09, "loss": 0.1766, "step": 44550 }, { "epoch": 0.9931254307054188, "grad_norm": 0.7392093539237976, "learning_rate": 2.3258286926453043e-09, "loss": 0.345, "step": 44555 }, { "epoch": 0.9932368800860389, "grad_norm": 0.6125676035881042, "learning_rate": 2.2509305217810473e-09, "loss": 0.2584, "step": 44560 }, { "epoch": 0.993348329466659, "grad_norm": 0.7500099539756775, "learning_rate": 2.177257997781901e-09, "loss": 0.238, "step": 44565 }, { "epoch": 0.9934597788472791, "grad_norm": 0.3287777900695801, "learning_rate": 2.1048111296795293e-09, "loss": 0.1774, "step": 44570 }, { "epoch": 0.9935712282278991, "grad_norm": 0.5603126883506775, "learning_rate": 2.0335899263546065e-09, "loss": 0.1842, "step": 44575 }, { "epoch": 0.9936826776085191, "grad_norm": 0.8498858213424683, "learning_rate": 1.963594396540147e-09, "loss": 0.3448, "step": 44580 }, { "epoch": 0.9937941269891393, "grad_norm": 0.7383705377578735, "learning_rate": 1.8948245488159543e-09, "loss": 0.365, "step": 44585 }, { "epoch": 0.9939055763697593, "grad_norm": 0.6081027388572693, "learning_rate": 1.8272803916119519e-09, "loss": 0.2639, "step": 44590 }, { "epoch": 0.9940170257503794, "grad_norm": 0.5102843642234802, "learning_rate": 1.7609619332104034e-09, "loss": 0.2042, "step": 44595 }, { "epoch": 0.9941284751309994, "grad_norm": 0.9159148335456848, "learning_rate": 1.6958691817392514e-09, "loss": 0.3071, "step": 44600 }, { "epoch": 0.9942399245116196, "grad_norm": 0.5763835906982422, "learning_rate": 1.6320021451798895e-09, "loss": 0.2104, "step": 44605 }, { "epoch": 0.9943513738922396, "grad_norm": 0.46994051337242126, "learning_rate": 1.5693608313616104e-09, "loss": 0.2715, "step": 44610 }, { "epoch": 0.9944628232728596, "grad_norm": 0.6035696268081665, "learning_rate": 1.5079452479638268e-09, "loss": 0.3726, "step": 44615 }, { "epoch": 0.9945742726534798, "grad_norm": 0.8437037467956543, "learning_rate": 1.447755402514961e-09, "loss": 0.3022, "step": 44620 }, { "epoch": 0.9946857220340998, "grad_norm": 0.7784526944160461, "learning_rate": 1.3887913023946652e-09, "loss": 0.284, "step": 44625 }, { "epoch": 0.9947971714147199, "grad_norm": 0.7864732146263123, "learning_rate": 1.331052954831602e-09, "loss": 0.2185, "step": 44630 }, { "epoch": 0.9949086207953399, "grad_norm": 0.5367758274078369, "learning_rate": 1.2745403669023327e-09, "loss": 0.2567, "step": 44635 }, { "epoch": 0.99502007017596, "grad_norm": 0.4071984887123108, "learning_rate": 1.2192535455368692e-09, "loss": 0.2429, "step": 44640 }, { "epoch": 0.9951315195565801, "grad_norm": 0.656913697719574, "learning_rate": 1.1651924975120133e-09, "loss": 0.2744, "step": 44645 }, { "epoch": 0.9952429689372001, "grad_norm": 0.7914825677871704, "learning_rate": 1.112357229455796e-09, "loss": 0.2515, "step": 44650 }, { "epoch": 0.9953544183178202, "grad_norm": 1.119311809539795, "learning_rate": 1.0607477478452588e-09, "loss": 0.3854, "step": 44655 }, { "epoch": 0.9954658676984403, "grad_norm": 0.5164552927017212, "learning_rate": 1.0103640590064524e-09, "loss": 0.2533, "step": 44660 }, { "epoch": 0.9955773170790604, "grad_norm": 0.962119460105896, "learning_rate": 9.612061691166575e-10, "loss": 0.2001, "step": 44665 }, { "epoch": 0.9956887664596804, "grad_norm": 0.793340265750885, "learning_rate": 9.132740842021647e-10, "loss": 0.3941, "step": 44670 }, { "epoch": 0.9958002158403004, "grad_norm": 1.0190134048461914, "learning_rate": 8.665678101393848e-10, "loss": 0.2366, "step": 44675 }, { "epoch": 0.9959116652209206, "grad_norm": 1.1671634912490845, "learning_rate": 8.210873526537378e-10, "loss": 0.2662, "step": 44680 }, { "epoch": 0.9960231146015406, "grad_norm": 0.5475580096244812, "learning_rate": 7.768327173207635e-10, "loss": 0.2246, "step": 44685 }, { "epoch": 0.9961345639821607, "grad_norm": 0.8026213049888611, "learning_rate": 7.338039095672323e-10, "loss": 0.1997, "step": 44690 }, { "epoch": 0.9962460133627807, "grad_norm": 0.7829786539077759, "learning_rate": 6.920009346655931e-10, "loss": 0.3269, "step": 44695 }, { "epoch": 0.9963574627434008, "grad_norm": 0.6256038546562195, "learning_rate": 6.514237977417459e-10, "loss": 0.2818, "step": 44700 }, { "epoch": 0.9964689121240209, "grad_norm": 0.5961411595344543, "learning_rate": 6.120725037706e-10, "loss": 0.2174, "step": 44705 }, { "epoch": 0.9965803615046409, "grad_norm": 0.7228636741638184, "learning_rate": 5.739470575760742e-10, "loss": 0.309, "step": 44710 }, { "epoch": 0.996691810885261, "grad_norm": 0.29328784346580505, "learning_rate": 5.370474638322076e-10, "loss": 0.2438, "step": 44715 }, { "epoch": 0.9968032602658811, "grad_norm": 0.9593527913093567, "learning_rate": 5.013737270620489e-10, "loss": 0.2308, "step": 44720 }, { "epoch": 0.9969147096465011, "grad_norm": 1.333426594734192, "learning_rate": 4.669258516387665e-10, "loss": 0.3419, "step": 44725 }, { "epoch": 0.9970261590271212, "grad_norm": 1.4894150495529175, "learning_rate": 4.3370384178564875e-10, "loss": 0.2652, "step": 44730 }, { "epoch": 0.9971376084077412, "grad_norm": 0.6049108505249023, "learning_rate": 4.0170770157610393e-10, "loss": 0.2449, "step": 44735 }, { "epoch": 0.9972490577883614, "grad_norm": 0.48116618394851685, "learning_rate": 3.709374349325501e-10, "loss": 0.2508, "step": 44740 }, { "epoch": 0.9973605071689814, "grad_norm": 0.4338245987892151, "learning_rate": 3.4139304562641474e-10, "loss": 0.2205, "step": 44745 }, { "epoch": 0.9974719565496015, "grad_norm": 0.7536609172821045, "learning_rate": 3.130745372803556e-10, "loss": 0.2546, "step": 44750 }, { "epoch": 0.9975834059302215, "grad_norm": 0.8969931602478027, "learning_rate": 2.8598191336492995e-10, "loss": 0.2604, "step": 44755 }, { "epoch": 0.9976948553108416, "grad_norm": 0.6707092523574829, "learning_rate": 2.6011517720192505e-10, "loss": 0.223, "step": 44760 }, { "epoch": 0.9978063046914617, "grad_norm": 0.6238420605659485, "learning_rate": 2.3547433196324796e-10, "loss": 0.27, "step": 44765 }, { "epoch": 0.9979177540720817, "grad_norm": 0.5326305627822876, "learning_rate": 2.1205938066870547e-10, "loss": 0.2645, "step": 44770 }, { "epoch": 0.9980292034527019, "grad_norm": 0.4190085828304291, "learning_rate": 1.898703261893342e-10, "loss": 0.1725, "step": 44775 }, { "epoch": 0.9981406528333219, "grad_norm": 0.866177499294281, "learning_rate": 1.6890717124629085e-10, "loss": 0.3569, "step": 44780 }, { "epoch": 0.9982521022139419, "grad_norm": 0.6426597237586975, "learning_rate": 1.4916991840641103e-10, "loss": 0.2912, "step": 44785 }, { "epoch": 0.998363551594562, "grad_norm": 0.8013852834701538, "learning_rate": 1.3065857009331161e-10, "loss": 0.2824, "step": 44790 }, { "epoch": 0.998475000975182, "grad_norm": 0.599602222442627, "learning_rate": 1.133731285729578e-10, "loss": 0.2392, "step": 44795 }, { "epoch": 0.9985864503558022, "grad_norm": 0.8479488492012024, "learning_rate": 9.731359596587552e-11, "loss": 0.4189, "step": 44800 }, { "epoch": 0.9986978997364222, "grad_norm": 0.7829699516296387, "learning_rate": 8.247997424049026e-11, "loss": 0.2198, "step": 44805 }, { "epoch": 0.9988093491170423, "grad_norm": 0.5513267517089844, "learning_rate": 6.887226521645751e-11, "loss": 0.2213, "step": 44810 }, { "epoch": 0.9989207984976624, "grad_norm": 0.47978201508522034, "learning_rate": 5.649047056022206e-11, "loss": 0.2498, "step": 44815 }, { "epoch": 0.9990322478782824, "grad_norm": 0.4485434591770172, "learning_rate": 4.5334591791679204e-11, "loss": 0.2715, "step": 44820 }, { "epoch": 0.9991436972589025, "grad_norm": 0.5298421382904053, "learning_rate": 3.5404630276403286e-11, "loss": 0.3027, "step": 44825 }, { "epoch": 0.9992551466395225, "grad_norm": 0.6424366235733032, "learning_rate": 2.670058723230895e-11, "loss": 0.3339, "step": 44830 }, { "epoch": 0.9993665960201427, "grad_norm": 0.5604028701782227, "learning_rate": 1.922246372743075e-11, "loss": 0.2231, "step": 44835 }, { "epoch": 0.9994780454007627, "grad_norm": 0.5397953391075134, "learning_rate": 1.2970260677702684e-11, "loss": 0.2282, "step": 44840 }, { "epoch": 0.9995894947813827, "grad_norm": 0.8372544646263123, "learning_rate": 7.943978850288859e-12, "loss": 0.3187, "step": 44845 }, { "epoch": 0.9997009441620028, "grad_norm": 0.8007095456123352, "learning_rate": 4.143618860252829e-12, "loss": 0.2664, "step": 44850 }, { "epoch": 0.9998123935426229, "grad_norm": 0.5296596884727478, "learning_rate": 1.569181173888268e-12, "loss": 0.1991, "step": 44855 }, { "epoch": 0.999923842923243, "grad_norm": 0.5373436212539673, "learning_rate": 2.2066610760873575e-13, "loss": 0.3511, "step": 44860 }, { "epoch": 0.999990712551615, "step": 44863, "total_flos": 6.904376768783725e+19, "train_loss": 0.32993994381826713, "train_runtime": 381562.2902, "train_samples_per_second": 2.822, "train_steps_per_second": 0.118 } ], "logging_steps": 5, "max_steps": 44863, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.904376768783725e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }