{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999815653930811, "eval_steps": 500, "global_step": 45204, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011060764151326001, "grad_norm": 1.0098981857299805, "learning_rate": 1.9999999396251563e-05, "loss": 0.3378, "step": 5 }, { "epoch": 0.00022121528302652003, "grad_norm": 1.3027799129486084, "learning_rate": 1.9999997585006317e-05, "loss": 0.4038, "step": 10 }, { "epoch": 0.00033182292453978003, "grad_norm": 1.1290963888168335, "learning_rate": 1.9999994566264482e-05, "loss": 0.4215, "step": 15 }, { "epoch": 0.00044243056605304006, "grad_norm": 0.9720609188079834, "learning_rate": 1.999999034002642e-05, "loss": 0.4099, "step": 20 }, { "epoch": 0.0005530382075663, "grad_norm": 1.6457998752593994, "learning_rate": 1.999998490629265e-05, "loss": 0.5017, "step": 25 }, { "epoch": 0.0006636458490795601, "grad_norm": 1.5389305353164673, "learning_rate": 1.999997826506382e-05, "loss": 0.2771, "step": 30 }, { "epoch": 0.0007742534905928201, "grad_norm": 1.4694418907165527, "learning_rate": 1.999997041634074e-05, "loss": 0.4143, "step": 35 }, { "epoch": 0.0008848611321060801, "grad_norm": 1.1544642448425293, "learning_rate": 1.9999961360124343e-05, "loss": 0.3562, "step": 40 }, { "epoch": 0.00099546877361934, "grad_norm": 1.1249604225158691, "learning_rate": 1.999995109641574e-05, "loss": 0.3195, "step": 45 }, { "epoch": 0.0011060764151326, "grad_norm": 1.0745760202407837, "learning_rate": 1.999993962521616e-05, "loss": 0.4909, "step": 50 }, { "epoch": 0.00121668405664586, "grad_norm": 1.4258919954299927, "learning_rate": 1.9999926946526988e-05, "loss": 0.5486, "step": 55 }, { "epoch": 0.0013272916981591201, "grad_norm": 1.3337279558181763, "learning_rate": 1.999991306034976e-05, "loss": 0.4657, "step": 60 }, { "epoch": 0.0014378993396723801, "grad_norm": 1.997052788734436, "learning_rate": 1.999989796668615e-05, "loss": 0.4657, "step": 65 }, { "epoch": 0.0015485069811856402, "grad_norm": 0.9127364158630371, "learning_rate": 1.9999881665537977e-05, "loss": 0.3327, "step": 70 }, { "epoch": 0.0016591146226989002, "grad_norm": 0.6388740539550781, "learning_rate": 1.999986415690722e-05, "loss": 0.2447, "step": 75 }, { "epoch": 0.0017697222642121602, "grad_norm": 1.5209629535675049, "learning_rate": 1.999984544079598e-05, "loss": 0.506, "step": 80 }, { "epoch": 0.0018803299057254203, "grad_norm": 1.3830124139785767, "learning_rate": 1.9999825517206527e-05, "loss": 0.3804, "step": 85 }, { "epoch": 0.00199093754723868, "grad_norm": 0.9016902446746826, "learning_rate": 1.999980438614126e-05, "loss": 0.3776, "step": 90 }, { "epoch": 0.00210154518875194, "grad_norm": 1.0562076568603516, "learning_rate": 1.9999782047602735e-05, "loss": 0.1751, "step": 95 }, { "epoch": 0.0022121528302652, "grad_norm": 1.0515130758285522, "learning_rate": 1.999975850159365e-05, "loss": 0.329, "step": 100 }, { "epoch": 0.00232276047177846, "grad_norm": 1.1993863582611084, "learning_rate": 1.9999733748116844e-05, "loss": 0.3834, "step": 105 }, { "epoch": 0.00243336811329172, "grad_norm": 1.152951717376709, "learning_rate": 1.9999707787175308e-05, "loss": 0.3045, "step": 110 }, { "epoch": 0.00254397575480498, "grad_norm": 1.8746933937072754, "learning_rate": 1.9999680618772174e-05, "loss": 0.4204, "step": 115 }, { "epoch": 0.0026545833963182402, "grad_norm": 1.3205792903900146, "learning_rate": 1.999965224291073e-05, "loss": 0.3478, "step": 120 }, { "epoch": 0.0027651910378315003, "grad_norm": 0.994893491268158, "learning_rate": 1.9999622659594396e-05, "loss": 0.4217, "step": 125 }, { "epoch": 0.0028757986793447603, "grad_norm": 1.7540589570999146, "learning_rate": 1.9999591868826745e-05, "loss": 0.4486, "step": 130 }, { "epoch": 0.0029864063208580203, "grad_norm": 1.4211417436599731, "learning_rate": 1.9999559870611497e-05, "loss": 0.2951, "step": 135 }, { "epoch": 0.0030970139623712803, "grad_norm": 1.4985895156860352, "learning_rate": 1.9999526664952514e-05, "loss": 0.305, "step": 140 }, { "epoch": 0.0032076216038845404, "grad_norm": 1.1800671815872192, "learning_rate": 1.999949225185381e-05, "loss": 0.3975, "step": 145 }, { "epoch": 0.0033182292453978004, "grad_norm": 2.6629140377044678, "learning_rate": 1.9999456631319534e-05, "loss": 0.3503, "step": 150 }, { "epoch": 0.0034288368869110604, "grad_norm": 1.1970534324645996, "learning_rate": 1.9999419803353988e-05, "loss": 0.45, "step": 155 }, { "epoch": 0.0035394445284243205, "grad_norm": 0.9636037945747375, "learning_rate": 1.999938176796162e-05, "loss": 0.4001, "step": 160 }, { "epoch": 0.0036500521699375805, "grad_norm": 0.9508614540100098, "learning_rate": 1.999934252514703e-05, "loss": 0.3916, "step": 165 }, { "epoch": 0.0037606598114508405, "grad_norm": 1.3169060945510864, "learning_rate": 1.9999302074914944e-05, "loss": 0.3166, "step": 170 }, { "epoch": 0.0038712674529641005, "grad_norm": 1.0344523191452026, "learning_rate": 1.9999260417270253e-05, "loss": 0.3916, "step": 175 }, { "epoch": 0.00398187509447736, "grad_norm": 2.0603716373443604, "learning_rate": 1.999921755221799e-05, "loss": 0.4038, "step": 180 }, { "epoch": 0.004092482735990621, "grad_norm": 1.8023473024368286, "learning_rate": 1.9999173479763327e-05, "loss": 0.4129, "step": 185 }, { "epoch": 0.00420309037750388, "grad_norm": 1.2878496646881104, "learning_rate": 1.9999128199911583e-05, "loss": 0.4182, "step": 190 }, { "epoch": 0.004313698019017141, "grad_norm": 1.2451289892196655, "learning_rate": 1.999908171266823e-05, "loss": 0.2239, "step": 195 }, { "epoch": 0.0044243056605304, "grad_norm": 1.5519031286239624, "learning_rate": 1.9999034018038884e-05, "loss": 0.4015, "step": 200 }, { "epoch": 0.004534913302043661, "grad_norm": 1.537863850593567, "learning_rate": 1.9998985116029296e-05, "loss": 0.3253, "step": 205 }, { "epoch": 0.00464552094355692, "grad_norm": 1.4421772956848145, "learning_rate": 1.999893500664538e-05, "loss": 0.3992, "step": 210 }, { "epoch": 0.004756128585070181, "grad_norm": 0.7541108727455139, "learning_rate": 1.9998883689893173e-05, "loss": 0.3054, "step": 215 }, { "epoch": 0.00486673622658344, "grad_norm": 1.494553804397583, "learning_rate": 1.9998831165778883e-05, "loss": 0.3243, "step": 220 }, { "epoch": 0.004977343868096701, "grad_norm": 0.9286811351776123, "learning_rate": 1.9998777434308855e-05, "loss": 0.3918, "step": 225 }, { "epoch": 0.00508795150960996, "grad_norm": 1.6263800859451294, "learning_rate": 1.999872249548957e-05, "loss": 0.4307, "step": 230 }, { "epoch": 0.005198559151123221, "grad_norm": 0.9846457242965698, "learning_rate": 1.999866634932766e-05, "loss": 0.3182, "step": 235 }, { "epoch": 0.0053091667926364805, "grad_norm": 1.8512308597564697, "learning_rate": 1.999860899582991e-05, "loss": 0.4226, "step": 240 }, { "epoch": 0.005419774434149741, "grad_norm": 1.1682807207107544, "learning_rate": 1.9998550435003245e-05, "loss": 0.3332, "step": 245 }, { "epoch": 0.0055303820756630005, "grad_norm": 1.5079151391983032, "learning_rate": 1.9998490666854736e-05, "loss": 0.4352, "step": 250 }, { "epoch": 0.005640989717176261, "grad_norm": 1.2754334211349487, "learning_rate": 1.9998429691391593e-05, "loss": 0.3897, "step": 255 }, { "epoch": 0.005751597358689521, "grad_norm": 0.9918100237846375, "learning_rate": 1.999836750862119e-05, "loss": 0.3989, "step": 260 }, { "epoch": 0.005862205000202781, "grad_norm": 1.181748867034912, "learning_rate": 1.999830411855103e-05, "loss": 0.3479, "step": 265 }, { "epoch": 0.005972812641716041, "grad_norm": 1.588049292564392, "learning_rate": 1.9998239521188762e-05, "loss": 0.3783, "step": 270 }, { "epoch": 0.006083420283229301, "grad_norm": 1.7852157354354858, "learning_rate": 1.9998173716542196e-05, "loss": 0.4609, "step": 275 }, { "epoch": 0.006194027924742561, "grad_norm": 2.123424530029297, "learning_rate": 1.9998106704619272e-05, "loss": 0.4884, "step": 280 }, { "epoch": 0.006304635566255821, "grad_norm": 1.2252733707427979, "learning_rate": 1.999803848542808e-05, "loss": 0.4833, "step": 285 }, { "epoch": 0.006415243207769081, "grad_norm": 1.4725431203842163, "learning_rate": 1.9997969058976867e-05, "loss": 0.2032, "step": 290 }, { "epoch": 0.00652585084928234, "grad_norm": 1.8025238513946533, "learning_rate": 1.9997898425274008e-05, "loss": 0.4916, "step": 295 }, { "epoch": 0.006636458490795601, "grad_norm": 0.9122537970542908, "learning_rate": 1.9997826584328028e-05, "loss": 0.3415, "step": 300 }, { "epoch": 0.00674706613230886, "grad_norm": 0.8079615235328674, "learning_rate": 1.9997753536147612e-05, "loss": 0.4739, "step": 305 }, { "epoch": 0.006857673773822121, "grad_norm": 1.080998420715332, "learning_rate": 1.999767928074158e-05, "loss": 0.3517, "step": 310 }, { "epoch": 0.0069682814153353805, "grad_norm": 1.8107413053512573, "learning_rate": 1.9997603818118886e-05, "loss": 0.5321, "step": 315 }, { "epoch": 0.007078889056848641, "grad_norm": 1.4104124307632446, "learning_rate": 1.9997527148288655e-05, "loss": 0.4217, "step": 320 }, { "epoch": 0.0071894966983619005, "grad_norm": 1.2745281457901, "learning_rate": 1.999744927126014e-05, "loss": 0.4832, "step": 325 }, { "epoch": 0.007300104339875161, "grad_norm": 1.3356807231903076, "learning_rate": 1.9997370187042744e-05, "loss": 0.3479, "step": 330 }, { "epoch": 0.0074107119813884206, "grad_norm": 1.4037951231002808, "learning_rate": 1.999728989564602e-05, "loss": 0.3709, "step": 335 }, { "epoch": 0.007521319622901681, "grad_norm": 1.4552357196807861, "learning_rate": 1.9997208397079658e-05, "loss": 0.4539, "step": 340 }, { "epoch": 0.007631927264414941, "grad_norm": 1.6023696660995483, "learning_rate": 1.9997125691353503e-05, "loss": 0.3611, "step": 345 }, { "epoch": 0.007742534905928201, "grad_norm": 1.737326741218567, "learning_rate": 1.9997041778477537e-05, "loss": 0.3928, "step": 350 }, { "epoch": 0.00785314254744146, "grad_norm": 1.2300801277160645, "learning_rate": 1.99969566584619e-05, "loss": 0.294, "step": 355 }, { "epoch": 0.00796375018895472, "grad_norm": 1.4654666185379028, "learning_rate": 1.9996870331316862e-05, "loss": 0.3864, "step": 360 }, { "epoch": 0.008074357830467982, "grad_norm": 1.1602009534835815, "learning_rate": 1.9996782797052852e-05, "loss": 0.3803, "step": 365 }, { "epoch": 0.008184965471981241, "grad_norm": 1.6238704919815063, "learning_rate": 1.9996694055680437e-05, "loss": 0.4421, "step": 370 }, { "epoch": 0.0082955731134945, "grad_norm": 1.5528647899627686, "learning_rate": 1.9996604107210338e-05, "loss": 0.4134, "step": 375 }, { "epoch": 0.00840618075500776, "grad_norm": 1.038794755935669, "learning_rate": 1.999651295165341e-05, "loss": 0.3653, "step": 380 }, { "epoch": 0.008516788396521022, "grad_norm": 1.0021169185638428, "learning_rate": 1.9996420589020662e-05, "loss": 0.1995, "step": 385 }, { "epoch": 0.008627396038034281, "grad_norm": 1.4108307361602783, "learning_rate": 1.9996327019323245e-05, "loss": 0.4858, "step": 390 }, { "epoch": 0.008738003679547541, "grad_norm": 1.1560003757476807, "learning_rate": 1.9996232242572463e-05, "loss": 0.4554, "step": 395 }, { "epoch": 0.0088486113210608, "grad_norm": 1.6206974983215332, "learning_rate": 1.9996136258779755e-05, "loss": 0.4385, "step": 400 }, { "epoch": 0.008959218962574062, "grad_norm": 3.013606309890747, "learning_rate": 1.999603906795671e-05, "loss": 0.516, "step": 405 }, { "epoch": 0.009069826604087321, "grad_norm": 1.2671042680740356, "learning_rate": 1.9995940670115072e-05, "loss": 0.3246, "step": 410 }, { "epoch": 0.009180434245600581, "grad_norm": 2.3615846633911133, "learning_rate": 1.9995841065266716e-05, "loss": 0.3182, "step": 415 }, { "epoch": 0.00929104188711384, "grad_norm": 2.1914141178131104, "learning_rate": 1.9995740253423664e-05, "loss": 0.4367, "step": 420 }, { "epoch": 0.009401649528627102, "grad_norm": 1.3932421207427979, "learning_rate": 1.9995638234598104e-05, "loss": 0.323, "step": 425 }, { "epoch": 0.009512257170140362, "grad_norm": 1.5163156986236572, "learning_rate": 1.999553500880234e-05, "loss": 0.3386, "step": 430 }, { "epoch": 0.009622864811653621, "grad_norm": 1.217690348625183, "learning_rate": 1.9995430576048846e-05, "loss": 0.3996, "step": 435 }, { "epoch": 0.00973347245316688, "grad_norm": 1.5663549900054932, "learning_rate": 1.9995324936350227e-05, "loss": 0.2565, "step": 440 }, { "epoch": 0.00984408009468014, "grad_norm": 1.338636875152588, "learning_rate": 1.999521808971924e-05, "loss": 0.4414, "step": 445 }, { "epoch": 0.009954687736193402, "grad_norm": 1.7738866806030273, "learning_rate": 1.999511003616879e-05, "loss": 0.3735, "step": 450 }, { "epoch": 0.010065295377706661, "grad_norm": 1.6681926250457764, "learning_rate": 1.999500077571192e-05, "loss": 0.3949, "step": 455 }, { "epoch": 0.01017590301921992, "grad_norm": 1.3502219915390015, "learning_rate": 1.9994890308361826e-05, "loss": 0.2899, "step": 460 }, { "epoch": 0.01028651066073318, "grad_norm": 1.5999979972839355, "learning_rate": 1.9994778634131843e-05, "loss": 0.4587, "step": 465 }, { "epoch": 0.010397118302246442, "grad_norm": 1.0574697256088257, "learning_rate": 1.9994665753035463e-05, "loss": 0.4436, "step": 470 }, { "epoch": 0.010507725943759701, "grad_norm": 1.7919062376022339, "learning_rate": 1.999455166508631e-05, "loss": 0.3797, "step": 475 }, { "epoch": 0.010618333585272961, "grad_norm": 1.0036317110061646, "learning_rate": 1.999443637029816e-05, "loss": 0.4282, "step": 480 }, { "epoch": 0.01072894122678622, "grad_norm": 1.197751522064209, "learning_rate": 1.999431986868494e-05, "loss": 0.3442, "step": 485 }, { "epoch": 0.010839548868299482, "grad_norm": 1.5259722471237183, "learning_rate": 1.9994202160260712e-05, "loss": 0.4896, "step": 490 }, { "epoch": 0.010950156509812741, "grad_norm": 1.091264247894287, "learning_rate": 1.9994083245039692e-05, "loss": 0.2856, "step": 495 }, { "epoch": 0.011060764151326001, "grad_norm": 0.5953179001808167, "learning_rate": 1.999396312303624e-05, "loss": 0.267, "step": 500 }, { "epoch": 0.01117137179283926, "grad_norm": 0.9875431060791016, "learning_rate": 1.999384179426486e-05, "loss": 0.4266, "step": 505 }, { "epoch": 0.011281979434352522, "grad_norm": 1.5088143348693848, "learning_rate": 1.99937192587402e-05, "loss": 0.3642, "step": 510 }, { "epoch": 0.011392587075865782, "grad_norm": 3.114298105239868, "learning_rate": 1.9993595516477056e-05, "loss": 0.3814, "step": 515 }, { "epoch": 0.011503194717379041, "grad_norm": 1.783241629600525, "learning_rate": 1.9993470567490373e-05, "loss": 0.4153, "step": 520 }, { "epoch": 0.0116138023588923, "grad_norm": 3.163952112197876, "learning_rate": 1.9993344411795236e-05, "loss": 0.451, "step": 525 }, { "epoch": 0.011724410000405562, "grad_norm": 1.4985238313674927, "learning_rate": 1.999321704940688e-05, "loss": 0.4178, "step": 530 }, { "epoch": 0.011835017641918822, "grad_norm": 1.6766449213027954, "learning_rate": 1.9993088480340686e-05, "loss": 0.493, "step": 535 }, { "epoch": 0.011945625283432081, "grad_norm": 1.681623101234436, "learning_rate": 1.9992958704612175e-05, "loss": 0.3114, "step": 540 }, { "epoch": 0.01205623292494534, "grad_norm": 1.531205177307129, "learning_rate": 1.9992827722237015e-05, "loss": 0.5129, "step": 545 }, { "epoch": 0.012166840566458602, "grad_norm": 2.20176100730896, "learning_rate": 1.999269553323103e-05, "loss": 0.4147, "step": 550 }, { "epoch": 0.012277448207971862, "grad_norm": 1.1875768899917603, "learning_rate": 1.9992562137610174e-05, "loss": 0.2715, "step": 555 }, { "epoch": 0.012388055849485121, "grad_norm": 1.5550061464309692, "learning_rate": 1.9992427535390558e-05, "loss": 0.5162, "step": 560 }, { "epoch": 0.012498663490998381, "grad_norm": 1.5107383728027344, "learning_rate": 1.999229172658844e-05, "loss": 0.457, "step": 565 }, { "epoch": 0.012609271132511642, "grad_norm": 1.485146164894104, "learning_rate": 1.999215471122021e-05, "loss": 0.4996, "step": 570 }, { "epoch": 0.012719878774024902, "grad_norm": 1.428308367729187, "learning_rate": 1.9992016489302414e-05, "loss": 0.4139, "step": 575 }, { "epoch": 0.012830486415538161, "grad_norm": 2.3433175086975098, "learning_rate": 1.9991877060851746e-05, "loss": 0.4506, "step": 580 }, { "epoch": 0.012941094057051421, "grad_norm": 1.2056102752685547, "learning_rate": 1.9991736425885044e-05, "loss": 0.4664, "step": 585 }, { "epoch": 0.01305170169856468, "grad_norm": 3.53164005279541, "learning_rate": 1.9991594584419283e-05, "loss": 0.3991, "step": 590 }, { "epoch": 0.013162309340077942, "grad_norm": 1.4206898212432861, "learning_rate": 1.9991451536471596e-05, "loss": 0.4247, "step": 595 }, { "epoch": 0.013272916981591202, "grad_norm": 0.7530494332313538, "learning_rate": 1.9991307282059253e-05, "loss": 0.298, "step": 600 }, { "epoch": 0.013383524623104461, "grad_norm": 1.9098811149597168, "learning_rate": 1.999116182119967e-05, "loss": 0.3415, "step": 605 }, { "epoch": 0.01349413226461772, "grad_norm": 1.7797225713729858, "learning_rate": 1.9991015153910416e-05, "loss": 0.3719, "step": 610 }, { "epoch": 0.013604739906130982, "grad_norm": 1.0618116855621338, "learning_rate": 1.9990867280209204e-05, "loss": 0.3032, "step": 615 }, { "epoch": 0.013715347547644242, "grad_norm": 1.6823108196258545, "learning_rate": 1.999071820011388e-05, "loss": 0.3114, "step": 620 }, { "epoch": 0.013825955189157501, "grad_norm": 1.5610665082931519, "learning_rate": 1.999056791364245e-05, "loss": 0.331, "step": 625 }, { "epoch": 0.013936562830670761, "grad_norm": 1.9009490013122559, "learning_rate": 1.9990416420813064e-05, "loss": 0.4168, "step": 630 }, { "epoch": 0.014047170472184022, "grad_norm": 1.672075867652893, "learning_rate": 1.9990263721644013e-05, "loss": 0.2163, "step": 635 }, { "epoch": 0.014157778113697282, "grad_norm": 1.4824103116989136, "learning_rate": 1.9990109816153736e-05, "loss": 0.3707, "step": 640 }, { "epoch": 0.014268385755210541, "grad_norm": 1.1508342027664185, "learning_rate": 1.9989954704360812e-05, "loss": 0.3031, "step": 645 }, { "epoch": 0.014378993396723801, "grad_norm": 2.2810006141662598, "learning_rate": 1.998979838628398e-05, "loss": 0.4136, "step": 650 }, { "epoch": 0.014489601038237062, "grad_norm": 1.2784085273742676, "learning_rate": 1.9989640861942105e-05, "loss": 0.3318, "step": 655 }, { "epoch": 0.014600208679750322, "grad_norm": 1.5543919801712036, "learning_rate": 1.9989482131354213e-05, "loss": 0.3742, "step": 660 }, { "epoch": 0.014710816321263582, "grad_norm": 1.2674219608306885, "learning_rate": 1.9989322194539472e-05, "loss": 0.3655, "step": 665 }, { "epoch": 0.014821423962776841, "grad_norm": 1.2111879587173462, "learning_rate": 1.9989161051517193e-05, "loss": 0.4259, "step": 670 }, { "epoch": 0.014932031604290102, "grad_norm": 1.2993717193603516, "learning_rate": 1.9988998702306834e-05, "loss": 0.4833, "step": 675 }, { "epoch": 0.015042639245803362, "grad_norm": 1.402723789215088, "learning_rate": 1.9988835146928e-05, "loss": 0.5029, "step": 680 }, { "epoch": 0.015153246887316622, "grad_norm": 1.0906521081924438, "learning_rate": 1.9988670385400433e-05, "loss": 0.4803, "step": 685 }, { "epoch": 0.015263854528829881, "grad_norm": 1.2492867708206177, "learning_rate": 1.998850441774404e-05, "loss": 0.2816, "step": 690 }, { "epoch": 0.015374462170343143, "grad_norm": 1.700445294380188, "learning_rate": 1.9988337243978853e-05, "loss": 0.3833, "step": 695 }, { "epoch": 0.015485069811856402, "grad_norm": 1.3337392807006836, "learning_rate": 1.9988168864125058e-05, "loss": 0.3193, "step": 700 }, { "epoch": 0.015595677453369662, "grad_norm": 0.8198513984680176, "learning_rate": 1.9987999278202994e-05, "loss": 0.4519, "step": 705 }, { "epoch": 0.01570628509488292, "grad_norm": 1.6660990715026855, "learning_rate": 1.9987828486233133e-05, "loss": 0.401, "step": 710 }, { "epoch": 0.015816892736396183, "grad_norm": 1.3600127696990967, "learning_rate": 1.9987656488236094e-05, "loss": 0.4744, "step": 715 }, { "epoch": 0.01592750037790944, "grad_norm": 1.0047779083251953, "learning_rate": 1.9987483284232657e-05, "loss": 0.2742, "step": 720 }, { "epoch": 0.016038108019422702, "grad_norm": 1.243537425994873, "learning_rate": 1.9987308874243724e-05, "loss": 0.3769, "step": 725 }, { "epoch": 0.016148715660935963, "grad_norm": 1.0419790744781494, "learning_rate": 1.9987133258290362e-05, "loss": 0.3718, "step": 730 }, { "epoch": 0.01625932330244922, "grad_norm": 1.6565879583358765, "learning_rate": 1.9986956436393776e-05, "loss": 0.5563, "step": 735 }, { "epoch": 0.016369930943962482, "grad_norm": 1.716636300086975, "learning_rate": 1.9986778408575317e-05, "loss": 0.3668, "step": 740 }, { "epoch": 0.01648053858547574, "grad_norm": 1.5255146026611328, "learning_rate": 1.998659917485648e-05, "loss": 0.3729, "step": 745 }, { "epoch": 0.016591146226989, "grad_norm": 1.1788830757141113, "learning_rate": 1.998641873525891e-05, "loss": 0.3682, "step": 750 }, { "epoch": 0.016701753868502263, "grad_norm": 0.8350759744644165, "learning_rate": 1.998623708980439e-05, "loss": 0.1659, "step": 755 }, { "epoch": 0.01681236151001552, "grad_norm": 1.0176374912261963, "learning_rate": 1.998605423851486e-05, "loss": 0.4052, "step": 760 }, { "epoch": 0.016922969151528782, "grad_norm": 0.7744227051734924, "learning_rate": 1.99858701814124e-05, "loss": 0.2629, "step": 765 }, { "epoch": 0.017033576793042043, "grad_norm": 1.6468068361282349, "learning_rate": 1.9985684918519228e-05, "loss": 0.4399, "step": 770 }, { "epoch": 0.0171441844345553, "grad_norm": 1.1699730157852173, "learning_rate": 1.998549844985772e-05, "loss": 0.4094, "step": 775 }, { "epoch": 0.017254792076068563, "grad_norm": 1.6849980354309082, "learning_rate": 1.998531077545039e-05, "loss": 0.3937, "step": 780 }, { "epoch": 0.01736539971758182, "grad_norm": 1.1741364002227783, "learning_rate": 1.9985121895319894e-05, "loss": 0.4058, "step": 785 }, { "epoch": 0.017476007359095082, "grad_norm": 1.7319626808166504, "learning_rate": 1.998493180948905e-05, "loss": 0.4402, "step": 790 }, { "epoch": 0.017586615000608343, "grad_norm": 1.2378299236297607, "learning_rate": 1.9984740517980805e-05, "loss": 0.4103, "step": 795 }, { "epoch": 0.0176972226421216, "grad_norm": 1.4885413646697998, "learning_rate": 1.9984548020818253e-05, "loss": 0.4791, "step": 800 }, { "epoch": 0.017807830283634862, "grad_norm": 1.7748368978500366, "learning_rate": 1.9984354318024648e-05, "loss": 0.346, "step": 805 }, { "epoch": 0.017918437925148124, "grad_norm": 1.5142452716827393, "learning_rate": 1.9984159409623374e-05, "loss": 0.4659, "step": 810 }, { "epoch": 0.01802904556666138, "grad_norm": 1.2787433862686157, "learning_rate": 1.9983963295637964e-05, "loss": 0.4791, "step": 815 }, { "epoch": 0.018139653208174643, "grad_norm": 1.29265558719635, "learning_rate": 1.9983765976092106e-05, "loss": 0.3841, "step": 820 }, { "epoch": 0.0182502608496879, "grad_norm": 0.9706268906593323, "learning_rate": 1.9983567451009617e-05, "loss": 0.264, "step": 825 }, { "epoch": 0.018360868491201162, "grad_norm": 0.5566022992134094, "learning_rate": 1.9983367720414473e-05, "loss": 0.2206, "step": 830 }, { "epoch": 0.018471476132714423, "grad_norm": 1.3352006673812866, "learning_rate": 1.9983166784330797e-05, "loss": 0.3592, "step": 835 }, { "epoch": 0.01858208377422768, "grad_norm": 0.8174845576286316, "learning_rate": 1.9982964642782844e-05, "loss": 0.4067, "step": 840 }, { "epoch": 0.018692691415740943, "grad_norm": 1.8382980823516846, "learning_rate": 1.9982761295795023e-05, "loss": 0.3628, "step": 845 }, { "epoch": 0.018803299057254204, "grad_norm": 1.2672278881072998, "learning_rate": 1.9982556743391894e-05, "loss": 0.3485, "step": 850 }, { "epoch": 0.01891390669876746, "grad_norm": 2.0548276901245117, "learning_rate": 1.998235098559815e-05, "loss": 0.4359, "step": 855 }, { "epoch": 0.019024514340280723, "grad_norm": 1.918861746788025, "learning_rate": 1.998214402243864e-05, "loss": 0.377, "step": 860 }, { "epoch": 0.01913512198179398, "grad_norm": 1.6009896993637085, "learning_rate": 1.9981935853938355e-05, "loss": 0.4736, "step": 865 }, { "epoch": 0.019245729623307242, "grad_norm": 1.9495576620101929, "learning_rate": 1.998172648012243e-05, "loss": 0.3397, "step": 870 }, { "epoch": 0.019356337264820504, "grad_norm": 0.9920646548271179, "learning_rate": 1.9981515901016145e-05, "loss": 0.3479, "step": 875 }, { "epoch": 0.01946694490633376, "grad_norm": 1.6650441884994507, "learning_rate": 1.998130411664493e-05, "loss": 0.4562, "step": 880 }, { "epoch": 0.019577552547847023, "grad_norm": 1.4782465696334839, "learning_rate": 1.998109112703436e-05, "loss": 0.4136, "step": 885 }, { "epoch": 0.01968816018936028, "grad_norm": 1.5930824279785156, "learning_rate": 1.9980876932210148e-05, "loss": 0.4387, "step": 890 }, { "epoch": 0.019798767830873542, "grad_norm": 1.8932952880859375, "learning_rate": 1.9980661532198163e-05, "loss": 0.5119, "step": 895 }, { "epoch": 0.019909375472386803, "grad_norm": 1.7424213886260986, "learning_rate": 1.998044492702441e-05, "loss": 0.5431, "step": 900 }, { "epoch": 0.02001998311390006, "grad_norm": 1.1744574308395386, "learning_rate": 1.9980227116715048e-05, "loss": 0.3957, "step": 905 }, { "epoch": 0.020130590755413322, "grad_norm": 1.6281894445419312, "learning_rate": 1.9980008101296376e-05, "loss": 0.4191, "step": 910 }, { "epoch": 0.020241198396926584, "grad_norm": 0.8328661918640137, "learning_rate": 1.9979787880794836e-05, "loss": 0.2644, "step": 915 }, { "epoch": 0.02035180603843984, "grad_norm": 1.5447438955307007, "learning_rate": 1.9979566455237025e-05, "loss": 0.5107, "step": 920 }, { "epoch": 0.020462413679953103, "grad_norm": 1.04193115234375, "learning_rate": 1.9979343824649683e-05, "loss": 0.4065, "step": 925 }, { "epoch": 0.02057302132146636, "grad_norm": 1.9673240184783936, "learning_rate": 1.997911998905968e-05, "loss": 0.3997, "step": 930 }, { "epoch": 0.020683628962979622, "grad_norm": 1.4147409200668335, "learning_rate": 1.997889494849406e-05, "loss": 0.3219, "step": 935 }, { "epoch": 0.020794236604492883, "grad_norm": 1.2033170461654663, "learning_rate": 1.9978668702979983e-05, "loss": 0.3416, "step": 940 }, { "epoch": 0.02090484424600614, "grad_norm": 1.4421274662017822, "learning_rate": 1.9978441252544774e-05, "loss": 0.4987, "step": 945 }, { "epoch": 0.021015451887519403, "grad_norm": 1.2078620195388794, "learning_rate": 1.99782125972159e-05, "loss": 0.3371, "step": 950 }, { "epoch": 0.021126059529032664, "grad_norm": 1.3434959650039673, "learning_rate": 1.9977982737020967e-05, "loss": 0.3938, "step": 955 }, { "epoch": 0.021236667170545922, "grad_norm": 1.6279258728027344, "learning_rate": 1.9977751671987734e-05, "loss": 0.3512, "step": 960 }, { "epoch": 0.021347274812059183, "grad_norm": 1.2361619472503662, "learning_rate": 1.9977519402144096e-05, "loss": 0.4465, "step": 965 }, { "epoch": 0.02145788245357244, "grad_norm": 1.38743257522583, "learning_rate": 1.9977285927518108e-05, "loss": 0.3335, "step": 970 }, { "epoch": 0.021568490095085702, "grad_norm": 1.3391947746276855, "learning_rate": 1.9977051248137954e-05, "loss": 0.5303, "step": 975 }, { "epoch": 0.021679097736598964, "grad_norm": 1.6789953708648682, "learning_rate": 1.9976815364031975e-05, "loss": 0.4361, "step": 980 }, { "epoch": 0.02178970537811222, "grad_norm": 0.7590691447257996, "learning_rate": 1.9976578275228653e-05, "loss": 0.3403, "step": 985 }, { "epoch": 0.021900313019625483, "grad_norm": 1.006661057472229, "learning_rate": 1.9976339981756618e-05, "loss": 0.3892, "step": 990 }, { "epoch": 0.022010920661138744, "grad_norm": 1.4304735660552979, "learning_rate": 1.9976100483644646e-05, "loss": 0.3714, "step": 995 }, { "epoch": 0.022121528302652002, "grad_norm": 1.6930865049362183, "learning_rate": 1.997585978092165e-05, "loss": 0.4204, "step": 1000 }, { "epoch": 0.022232135944165263, "grad_norm": 1.243986964225769, "learning_rate": 1.99756178736167e-05, "loss": 0.3561, "step": 1005 }, { "epoch": 0.02234274358567852, "grad_norm": 1.486270785331726, "learning_rate": 1.9975374761759e-05, "loss": 0.4801, "step": 1010 }, { "epoch": 0.022453351227191783, "grad_norm": 1.7370812892913818, "learning_rate": 1.9975130445377916e-05, "loss": 0.4147, "step": 1015 }, { "epoch": 0.022563958868705044, "grad_norm": 1.6690140962600708, "learning_rate": 1.9974884924502944e-05, "loss": 0.3336, "step": 1020 }, { "epoch": 0.022674566510218302, "grad_norm": 1.8132838010787964, "learning_rate": 1.997463819916372e-05, "loss": 0.3275, "step": 1025 }, { "epoch": 0.022785174151731563, "grad_norm": 1.3379210233688354, "learning_rate": 1.9974390269390053e-05, "loss": 0.3698, "step": 1030 }, { "epoch": 0.02289578179324482, "grad_norm": 1.335469365119934, "learning_rate": 1.9974141135211874e-05, "loss": 0.3721, "step": 1035 }, { "epoch": 0.023006389434758082, "grad_norm": 1.099830985069275, "learning_rate": 1.9973890796659266e-05, "loss": 0.3324, "step": 1040 }, { "epoch": 0.023116997076271344, "grad_norm": 1.387379765510559, "learning_rate": 1.9973639253762453e-05, "loss": 0.3765, "step": 1045 }, { "epoch": 0.0232276047177846, "grad_norm": 1.7703676223754883, "learning_rate": 1.997338650655181e-05, "loss": 0.4624, "step": 1050 }, { "epoch": 0.023338212359297863, "grad_norm": 1.2987421751022339, "learning_rate": 1.9973132555057862e-05, "loss": 0.422, "step": 1055 }, { "epoch": 0.023448820000811124, "grad_norm": 1.845648169517517, "learning_rate": 1.997287739931127e-05, "loss": 0.4341, "step": 1060 }, { "epoch": 0.023559427642324382, "grad_norm": 1.1097899675369263, "learning_rate": 1.9972621039342844e-05, "loss": 0.3242, "step": 1065 }, { "epoch": 0.023670035283837643, "grad_norm": 1.6857101917266846, "learning_rate": 1.997236347518354e-05, "loss": 0.2187, "step": 1070 }, { "epoch": 0.0237806429253509, "grad_norm": 1.825363039970398, "learning_rate": 1.9972104706864454e-05, "loss": 0.3971, "step": 1075 }, { "epoch": 0.023891250566864163, "grad_norm": 1.2114020586013794, "learning_rate": 1.9971844734416835e-05, "loss": 0.3059, "step": 1080 }, { "epoch": 0.024001858208377424, "grad_norm": 2.094937324523926, "learning_rate": 1.997158355787208e-05, "loss": 0.3764, "step": 1085 }, { "epoch": 0.02411246584989068, "grad_norm": 1.5062401294708252, "learning_rate": 1.9971321177261723e-05, "loss": 0.4021, "step": 1090 }, { "epoch": 0.024223073491403943, "grad_norm": 1.4633724689483643, "learning_rate": 1.9971057592617442e-05, "loss": 0.3802, "step": 1095 }, { "epoch": 0.024333681132917204, "grad_norm": 1.4287408590316772, "learning_rate": 1.997079280397107e-05, "loss": 0.3656, "step": 1100 }, { "epoch": 0.024444288774430462, "grad_norm": 2.014711856842041, "learning_rate": 1.9970526811354577e-05, "loss": 0.4296, "step": 1105 }, { "epoch": 0.024554896415943724, "grad_norm": 2.345499038696289, "learning_rate": 1.997025961480008e-05, "loss": 0.3048, "step": 1110 }, { "epoch": 0.02466550405745698, "grad_norm": 1.6644213199615479, "learning_rate": 1.996999121433985e-05, "loss": 0.4274, "step": 1115 }, { "epoch": 0.024776111698970243, "grad_norm": 1.0806634426116943, "learning_rate": 1.9969721610006288e-05, "loss": 0.3113, "step": 1120 }, { "epoch": 0.024886719340483504, "grad_norm": 1.4901978969573975, "learning_rate": 1.9969450801831955e-05, "loss": 0.2797, "step": 1125 }, { "epoch": 0.024997326981996762, "grad_norm": 1.3415969610214233, "learning_rate": 1.9969178789849545e-05, "loss": 0.4265, "step": 1130 }, { "epoch": 0.025107934623510023, "grad_norm": 1.6647189855575562, "learning_rate": 1.996890557409191e-05, "loss": 0.3371, "step": 1135 }, { "epoch": 0.025218542265023285, "grad_norm": 1.3294992446899414, "learning_rate": 1.9968631154592036e-05, "loss": 0.3602, "step": 1140 }, { "epoch": 0.025329149906536542, "grad_norm": 1.5876978635787964, "learning_rate": 1.9968355531383064e-05, "loss": 0.4415, "step": 1145 }, { "epoch": 0.025439757548049804, "grad_norm": 0.9118492603302002, "learning_rate": 1.9968078704498267e-05, "loss": 0.3949, "step": 1150 }, { "epoch": 0.02555036518956306, "grad_norm": 1.7133017778396606, "learning_rate": 1.9967800673971077e-05, "loss": 0.3786, "step": 1155 }, { "epoch": 0.025660972831076323, "grad_norm": 1.5445481538772583, "learning_rate": 1.996752143983507e-05, "loss": 0.3621, "step": 1160 }, { "epoch": 0.025771580472589584, "grad_norm": 1.2922894954681396, "learning_rate": 1.9967241002123955e-05, "loss": 0.4343, "step": 1165 }, { "epoch": 0.025882188114102842, "grad_norm": 1.3445627689361572, "learning_rate": 1.9966959360871602e-05, "loss": 0.5823, "step": 1170 }, { "epoch": 0.025992795755616104, "grad_norm": 1.1735810041427612, "learning_rate": 1.9966676516112012e-05, "loss": 0.2365, "step": 1175 }, { "epoch": 0.02610340339712936, "grad_norm": 1.3847334384918213, "learning_rate": 1.9966392467879347e-05, "loss": 0.3646, "step": 1180 }, { "epoch": 0.026214011038642623, "grad_norm": 1.39348304271698, "learning_rate": 1.99661072162079e-05, "loss": 0.2908, "step": 1185 }, { "epoch": 0.026324618680155884, "grad_norm": 1.685351014137268, "learning_rate": 1.9965820761132117e-05, "loss": 0.4959, "step": 1190 }, { "epoch": 0.026435226321669142, "grad_norm": 1.3834279775619507, "learning_rate": 1.9965533102686584e-05, "loss": 0.3952, "step": 1195 }, { "epoch": 0.026545833963182403, "grad_norm": 1.5664297342300415, "learning_rate": 1.996524424090604e-05, "loss": 0.3071, "step": 1200 }, { "epoch": 0.026656441604695665, "grad_norm": 2.0145459175109863, "learning_rate": 1.996495417582536e-05, "loss": 0.3179, "step": 1205 }, { "epoch": 0.026767049246208922, "grad_norm": 1.1572273969650269, "learning_rate": 1.9964662907479578e-05, "loss": 0.4386, "step": 1210 }, { "epoch": 0.026877656887722184, "grad_norm": 1.0059760808944702, "learning_rate": 1.9964370435903856e-05, "loss": 0.3719, "step": 1215 }, { "epoch": 0.02698826452923544, "grad_norm": 1.5482217073440552, "learning_rate": 1.9964076761133515e-05, "loss": 0.593, "step": 1220 }, { "epoch": 0.027098872170748703, "grad_norm": 1.2877788543701172, "learning_rate": 1.9963781883204014e-05, "loss": 0.3617, "step": 1225 }, { "epoch": 0.027209479812261964, "grad_norm": 0.8086239695549011, "learning_rate": 1.9963485802150957e-05, "loss": 0.2571, "step": 1230 }, { "epoch": 0.027320087453775222, "grad_norm": 2.7332327365875244, "learning_rate": 1.99631885180101e-05, "loss": 0.3595, "step": 1235 }, { "epoch": 0.027430695095288483, "grad_norm": 1.3433928489685059, "learning_rate": 1.9962890030817336e-05, "loss": 0.3794, "step": 1240 }, { "epoch": 0.027541302736801745, "grad_norm": 1.103946566581726, "learning_rate": 1.9962590340608713e-05, "loss": 0.3789, "step": 1245 }, { "epoch": 0.027651910378315003, "grad_norm": 1.2393618822097778, "learning_rate": 1.996228944742041e-05, "loss": 0.4502, "step": 1250 }, { "epoch": 0.027762518019828264, "grad_norm": 1.1121129989624023, "learning_rate": 1.9961987351288772e-05, "loss": 0.3326, "step": 1255 }, { "epoch": 0.027873125661341522, "grad_norm": 1.37144935131073, "learning_rate": 1.9961684052250266e-05, "loss": 0.4388, "step": 1260 }, { "epoch": 0.027983733302854783, "grad_norm": 1.5202018022537231, "learning_rate": 1.9961379550341515e-05, "loss": 0.3995, "step": 1265 }, { "epoch": 0.028094340944368044, "grad_norm": 0.8983990550041199, "learning_rate": 1.9961073845599297e-05, "loss": 0.2948, "step": 1270 }, { "epoch": 0.028204948585881302, "grad_norm": 1.9552021026611328, "learning_rate": 1.996076693806052e-05, "loss": 0.3498, "step": 1275 }, { "epoch": 0.028315556227394564, "grad_norm": 1.4086353778839111, "learning_rate": 1.9960458827762238e-05, "loss": 0.2261, "step": 1280 }, { "epoch": 0.028426163868907825, "grad_norm": 1.3989759683609009, "learning_rate": 1.9960149514741664e-05, "loss": 0.4798, "step": 1285 }, { "epoch": 0.028536771510421083, "grad_norm": 1.1871541738510132, "learning_rate": 1.9959838999036142e-05, "loss": 0.3739, "step": 1290 }, { "epoch": 0.028647379151934344, "grad_norm": 1.1420170068740845, "learning_rate": 1.9959527280683173e-05, "loss": 0.4773, "step": 1295 }, { "epoch": 0.028757986793447602, "grad_norm": 0.9968221783638, "learning_rate": 1.995921435972039e-05, "loss": 0.3017, "step": 1300 }, { "epoch": 0.028868594434960863, "grad_norm": 1.700411081314087, "learning_rate": 1.995890023618558e-05, "loss": 0.436, "step": 1305 }, { "epoch": 0.028979202076474125, "grad_norm": 1.8762866258621216, "learning_rate": 1.9958584910116673e-05, "loss": 0.3112, "step": 1310 }, { "epoch": 0.029089809717987383, "grad_norm": 1.3299816846847534, "learning_rate": 1.995826838155175e-05, "loss": 0.3591, "step": 1315 }, { "epoch": 0.029200417359500644, "grad_norm": 1.5134090185165405, "learning_rate": 1.9957950650529023e-05, "loss": 0.3925, "step": 1320 }, { "epoch": 0.029311025001013902, "grad_norm": 1.0477466583251953, "learning_rate": 1.995763171708686e-05, "loss": 0.3352, "step": 1325 }, { "epoch": 0.029421632642527163, "grad_norm": 1.2962243556976318, "learning_rate": 1.9957311581263776e-05, "loss": 0.3664, "step": 1330 }, { "epoch": 0.029532240284040424, "grad_norm": 1.5609354972839355, "learning_rate": 1.995699024309843e-05, "loss": 0.597, "step": 1335 }, { "epoch": 0.029642847925553682, "grad_norm": 1.6431083679199219, "learning_rate": 1.9956667702629613e-05, "loss": 0.5712, "step": 1340 }, { "epoch": 0.029753455567066944, "grad_norm": 1.804472804069519, "learning_rate": 1.995634395989628e-05, "loss": 0.279, "step": 1345 }, { "epoch": 0.029864063208580205, "grad_norm": 2.435232162475586, "learning_rate": 1.995601901493752e-05, "loss": 0.3656, "step": 1350 }, { "epoch": 0.029974670850093463, "grad_norm": 2.1654512882232666, "learning_rate": 1.995569286779257e-05, "loss": 0.3938, "step": 1355 }, { "epoch": 0.030085278491606724, "grad_norm": 1.4679621458053589, "learning_rate": 1.9955365518500813e-05, "loss": 0.4131, "step": 1360 }, { "epoch": 0.030195886133119982, "grad_norm": 2.0275371074676514, "learning_rate": 1.9955036967101778e-05, "loss": 0.2963, "step": 1365 }, { "epoch": 0.030306493774633243, "grad_norm": 1.2949063777923584, "learning_rate": 1.9954707213635133e-05, "loss": 0.4831, "step": 1370 }, { "epoch": 0.030417101416146505, "grad_norm": 0.9616190791130066, "learning_rate": 1.9954376258140698e-05, "loss": 0.376, "step": 1375 }, { "epoch": 0.030527709057659762, "grad_norm": 1.4603271484375, "learning_rate": 1.9954044100658437e-05, "loss": 0.3148, "step": 1380 }, { "epoch": 0.030638316699173024, "grad_norm": 1.3098572492599487, "learning_rate": 1.9953710741228458e-05, "loss": 0.3062, "step": 1385 }, { "epoch": 0.030748924340686285, "grad_norm": 1.1274430751800537, "learning_rate": 1.995337617989101e-05, "loss": 0.4503, "step": 1390 }, { "epoch": 0.030859531982199543, "grad_norm": 1.7395604848861694, "learning_rate": 1.9953040416686494e-05, "loss": 0.4029, "step": 1395 }, { "epoch": 0.030970139623712804, "grad_norm": 3.4156806468963623, "learning_rate": 1.9952703451655458e-05, "loss": 0.3473, "step": 1400 }, { "epoch": 0.031080747265226062, "grad_norm": 1.2884804010391235, "learning_rate": 1.995236528483858e-05, "loss": 0.4756, "step": 1405 }, { "epoch": 0.031191354906739324, "grad_norm": 1.517590880393982, "learning_rate": 1.99520259162767e-05, "loss": 0.4535, "step": 1410 }, { "epoch": 0.031301962548252585, "grad_norm": 1.1176884174346924, "learning_rate": 1.99516853460108e-05, "loss": 0.2981, "step": 1415 }, { "epoch": 0.03141257018976584, "grad_norm": 1.7175956964492798, "learning_rate": 1.9951343574081995e-05, "loss": 0.5288, "step": 1420 }, { "epoch": 0.0315231778312791, "grad_norm": 1.2429533004760742, "learning_rate": 1.9951000600531563e-05, "loss": 0.4331, "step": 1425 }, { "epoch": 0.031633785472792365, "grad_norm": 1.8522565364837646, "learning_rate": 1.995065642540091e-05, "loss": 0.408, "step": 1430 }, { "epoch": 0.03174439311430562, "grad_norm": 1.7889617681503296, "learning_rate": 1.99503110487316e-05, "loss": 0.3316, "step": 1435 }, { "epoch": 0.03185500075581888, "grad_norm": 1.4390583038330078, "learning_rate": 1.9949964470565334e-05, "loss": 0.3738, "step": 1440 }, { "epoch": 0.031965608397332146, "grad_norm": 1.8827886581420898, "learning_rate": 1.9949616690943962e-05, "loss": 0.5625, "step": 1445 }, { "epoch": 0.032076216038845404, "grad_norm": 1.7436652183532715, "learning_rate": 1.9949267709909482e-05, "loss": 0.5415, "step": 1450 }, { "epoch": 0.03218682368035866, "grad_norm": 1.6764863729476929, "learning_rate": 1.994891752750403e-05, "loss": 0.525, "step": 1455 }, { "epoch": 0.032297431321871926, "grad_norm": 1.4678794145584106, "learning_rate": 1.9948566143769888e-05, "loss": 0.3004, "step": 1460 }, { "epoch": 0.032408038963385184, "grad_norm": 1.6213154792785645, "learning_rate": 1.994821355874949e-05, "loss": 0.4348, "step": 1465 }, { "epoch": 0.03251864660489844, "grad_norm": 1.3034226894378662, "learning_rate": 1.994785977248541e-05, "loss": 0.412, "step": 1470 }, { "epoch": 0.03262925424641171, "grad_norm": 1.6633925437927246, "learning_rate": 1.9947504785020367e-05, "loss": 0.3486, "step": 1475 }, { "epoch": 0.032739861887924965, "grad_norm": 2.1730141639709473, "learning_rate": 1.9947148596397222e-05, "loss": 0.3315, "step": 1480 }, { "epoch": 0.03285046952943822, "grad_norm": 1.3933545351028442, "learning_rate": 1.994679120665899e-05, "loss": 0.5148, "step": 1485 }, { "epoch": 0.03296107717095148, "grad_norm": 0.8462718725204468, "learning_rate": 1.994643261584882e-05, "loss": 0.2664, "step": 1490 }, { "epoch": 0.033071684812464745, "grad_norm": 1.7442182302474976, "learning_rate": 1.9946072824010018e-05, "loss": 0.3291, "step": 1495 }, { "epoch": 0.033182292453978, "grad_norm": 1.153548240661621, "learning_rate": 1.9945711831186026e-05, "loss": 0.3647, "step": 1500 }, { "epoch": 0.03329290009549126, "grad_norm": 1.6028947830200195, "learning_rate": 1.9945349637420436e-05, "loss": 0.5249, "step": 1505 }, { "epoch": 0.033403507737004526, "grad_norm": 1.4543222188949585, "learning_rate": 1.9944986242756978e-05, "loss": 0.5021, "step": 1510 }, { "epoch": 0.033514115378517784, "grad_norm": 1.6787489652633667, "learning_rate": 1.994462164723953e-05, "loss": 0.5502, "step": 1515 }, { "epoch": 0.03362472302003104, "grad_norm": 1.3913249969482422, "learning_rate": 1.9944255850912126e-05, "loss": 0.324, "step": 1520 }, { "epoch": 0.033735330661544306, "grad_norm": 2.1065797805786133, "learning_rate": 1.9943888853818932e-05, "loss": 0.4445, "step": 1525 }, { "epoch": 0.033845938303057564, "grad_norm": 1.8566280603408813, "learning_rate": 1.9943520656004257e-05, "loss": 0.3262, "step": 1530 }, { "epoch": 0.03395654594457082, "grad_norm": 1.7572929859161377, "learning_rate": 1.994315125751257e-05, "loss": 0.5658, "step": 1535 }, { "epoch": 0.03406715358608409, "grad_norm": 1.512519359588623, "learning_rate": 1.994278065838847e-05, "loss": 0.4253, "step": 1540 }, { "epoch": 0.034177761227597345, "grad_norm": 1.6034471988677979, "learning_rate": 1.9942408858676705e-05, "loss": 0.3643, "step": 1545 }, { "epoch": 0.0342883688691106, "grad_norm": 1.4028284549713135, "learning_rate": 1.9942035858422178e-05, "loss": 0.5422, "step": 1550 }, { "epoch": 0.03439897651062387, "grad_norm": 2.1084299087524414, "learning_rate": 1.994166165766992e-05, "loss": 0.4588, "step": 1555 }, { "epoch": 0.034509584152137125, "grad_norm": 1.6503419876098633, "learning_rate": 1.9941286256465117e-05, "loss": 0.4572, "step": 1560 }, { "epoch": 0.03462019179365038, "grad_norm": 1.8703776597976685, "learning_rate": 1.99409096548531e-05, "loss": 0.3851, "step": 1565 }, { "epoch": 0.03473079943516364, "grad_norm": 1.6890854835510254, "learning_rate": 1.9940531852879346e-05, "loss": 0.4338, "step": 1570 }, { "epoch": 0.034841407076676906, "grad_norm": 1.1552622318267822, "learning_rate": 1.9940152850589474e-05, "loss": 0.3632, "step": 1575 }, { "epoch": 0.034952014718190164, "grad_norm": 1.3504608869552612, "learning_rate": 1.9939772648029246e-05, "loss": 0.4149, "step": 1580 }, { "epoch": 0.03506262235970342, "grad_norm": 1.3849256038665771, "learning_rate": 1.993939124524457e-05, "loss": 0.3661, "step": 1585 }, { "epoch": 0.035173230001216686, "grad_norm": 1.6501619815826416, "learning_rate": 1.9939008642281504e-05, "loss": 0.42, "step": 1590 }, { "epoch": 0.035283837642729944, "grad_norm": 2.0739259719848633, "learning_rate": 1.9938624839186246e-05, "loss": 0.3056, "step": 1595 }, { "epoch": 0.0353944452842432, "grad_norm": 1.2636287212371826, "learning_rate": 1.993823983600514e-05, "loss": 0.3744, "step": 1600 }, { "epoch": 0.03550505292575647, "grad_norm": 1.0999157428741455, "learning_rate": 1.9937853632784673e-05, "loss": 0.3416, "step": 1605 }, { "epoch": 0.035615660567269725, "grad_norm": 1.2782995700836182, "learning_rate": 1.9937466229571482e-05, "loss": 0.3492, "step": 1610 }, { "epoch": 0.03572626820878298, "grad_norm": 1.1964596509933472, "learning_rate": 1.9937077626412343e-05, "loss": 0.3607, "step": 1615 }, { "epoch": 0.03583687585029625, "grad_norm": 2.1067469120025635, "learning_rate": 1.9936687823354185e-05, "loss": 0.459, "step": 1620 }, { "epoch": 0.035947483491809505, "grad_norm": 1.187377691268921, "learning_rate": 1.993629682044407e-05, "loss": 0.3152, "step": 1625 }, { "epoch": 0.03605809113332276, "grad_norm": 1.446434497833252, "learning_rate": 1.9935904617729217e-05, "loss": 0.4584, "step": 1630 }, { "epoch": 0.03616869877483602, "grad_norm": 1.6647571325302124, "learning_rate": 1.9935511215256977e-05, "loss": 0.3591, "step": 1635 }, { "epoch": 0.036279306416349286, "grad_norm": 2.774245262145996, "learning_rate": 1.993511661307486e-05, "loss": 0.4557, "step": 1640 }, { "epoch": 0.036389914057862544, "grad_norm": 1.5062856674194336, "learning_rate": 1.9934720811230513e-05, "loss": 0.3801, "step": 1645 }, { "epoch": 0.0365005216993758, "grad_norm": 0.9688113331794739, "learning_rate": 1.9934323809771723e-05, "loss": 0.2691, "step": 1650 }, { "epoch": 0.036611129340889066, "grad_norm": 1.641059398651123, "learning_rate": 1.993392560874644e-05, "loss": 0.3315, "step": 1655 }, { "epoch": 0.036721736982402324, "grad_norm": 1.0470434427261353, "learning_rate": 1.9933526208202737e-05, "loss": 0.3923, "step": 1660 }, { "epoch": 0.03683234462391558, "grad_norm": 1.3745472431182861, "learning_rate": 1.9933125608188842e-05, "loss": 0.2752, "step": 1665 }, { "epoch": 0.03694295226542885, "grad_norm": 1.0813316106796265, "learning_rate": 1.993272380875313e-05, "loss": 0.3998, "step": 1670 }, { "epoch": 0.037053559906942105, "grad_norm": 1.4880412817001343, "learning_rate": 1.9932320809944116e-05, "loss": 0.5433, "step": 1675 }, { "epoch": 0.03716416754845536, "grad_norm": 1.6901288032531738, "learning_rate": 1.9931916611810463e-05, "loss": 0.4456, "step": 1680 }, { "epoch": 0.03727477518996863, "grad_norm": 1.6344832181930542, "learning_rate": 1.9931511214400984e-05, "loss": 0.4479, "step": 1685 }, { "epoch": 0.037385382831481885, "grad_norm": 1.2788772583007812, "learning_rate": 1.993110461776462e-05, "loss": 0.406, "step": 1690 }, { "epoch": 0.03749599047299514, "grad_norm": 1.6355303525924683, "learning_rate": 1.993069682195047e-05, "loss": 0.4648, "step": 1695 }, { "epoch": 0.03760659811450841, "grad_norm": 1.7000280618667603, "learning_rate": 1.9930287827007783e-05, "loss": 0.4656, "step": 1700 }, { "epoch": 0.037717205756021666, "grad_norm": 1.7274237871170044, "learning_rate": 1.9929877632985933e-05, "loss": 0.4945, "step": 1705 }, { "epoch": 0.03782781339753492, "grad_norm": 0.9419665932655334, "learning_rate": 1.9929466239934462e-05, "loss": 0.4295, "step": 1710 }, { "epoch": 0.03793842103904818, "grad_norm": 1.9227476119995117, "learning_rate": 1.9929053647903037e-05, "loss": 0.2412, "step": 1715 }, { "epoch": 0.038049028680561446, "grad_norm": 1.8982129096984863, "learning_rate": 1.9928639856941485e-05, "loss": 0.467, "step": 1720 }, { "epoch": 0.038159636322074704, "grad_norm": 1.3025832176208496, "learning_rate": 1.9928224867099767e-05, "loss": 0.3951, "step": 1725 }, { "epoch": 0.03827024396358796, "grad_norm": 1.025184988975525, "learning_rate": 1.9927808678427998e-05, "loss": 0.3613, "step": 1730 }, { "epoch": 0.03838085160510123, "grad_norm": 1.726584792137146, "learning_rate": 1.9927391290976424e-05, "loss": 0.3916, "step": 1735 }, { "epoch": 0.038491459246614484, "grad_norm": 1.610206127166748, "learning_rate": 1.9926972704795452e-05, "loss": 0.4645, "step": 1740 }, { "epoch": 0.03860206688812774, "grad_norm": 1.6090996265411377, "learning_rate": 1.992655291993562e-05, "loss": 0.3766, "step": 1745 }, { "epoch": 0.03871267452964101, "grad_norm": 1.3677319288253784, "learning_rate": 1.9926131936447623e-05, "loss": 0.4636, "step": 1750 }, { "epoch": 0.038823282171154265, "grad_norm": 2.265345573425293, "learning_rate": 1.992570975438229e-05, "loss": 0.4446, "step": 1755 }, { "epoch": 0.03893388981266752, "grad_norm": 1.0422066450119019, "learning_rate": 1.9925286373790604e-05, "loss": 0.3976, "step": 1760 }, { "epoch": 0.03904449745418079, "grad_norm": 1.089389443397522, "learning_rate": 1.9924861794723682e-05, "loss": 0.3509, "step": 1765 }, { "epoch": 0.039155105095694046, "grad_norm": 1.9326908588409424, "learning_rate": 1.9924436017232798e-05, "loss": 0.3017, "step": 1770 }, { "epoch": 0.0392657127372073, "grad_norm": 1.2068520784378052, "learning_rate": 1.9924009041369363e-05, "loss": 0.3548, "step": 1775 }, { "epoch": 0.03937632037872056, "grad_norm": 1.1482384204864502, "learning_rate": 1.9923580867184927e-05, "loss": 0.37, "step": 1780 }, { "epoch": 0.039486928020233826, "grad_norm": 1.0593656301498413, "learning_rate": 1.99231514947312e-05, "loss": 0.401, "step": 1785 }, { "epoch": 0.039597535661747084, "grad_norm": 0.6981975436210632, "learning_rate": 1.992272092406003e-05, "loss": 0.4588, "step": 1790 }, { "epoch": 0.03970814330326034, "grad_norm": 1.0706939697265625, "learning_rate": 1.9922289155223396e-05, "loss": 0.5159, "step": 1795 }, { "epoch": 0.03981875094477361, "grad_norm": 1.0891457796096802, "learning_rate": 1.992185618827345e-05, "loss": 0.2494, "step": 1800 }, { "epoch": 0.039929358586286864, "grad_norm": 1.60609769821167, "learning_rate": 1.9921422023262465e-05, "loss": 0.3236, "step": 1805 }, { "epoch": 0.04003996622780012, "grad_norm": 1.0060046911239624, "learning_rate": 1.9920986660242866e-05, "loss": 0.4259, "step": 1810 }, { "epoch": 0.04015057386931339, "grad_norm": 1.7036231756210327, "learning_rate": 1.992055009926722e-05, "loss": 0.3989, "step": 1815 }, { "epoch": 0.040261181510826645, "grad_norm": 1.538004755973816, "learning_rate": 1.9920112340388246e-05, "loss": 0.3314, "step": 1820 }, { "epoch": 0.0403717891523399, "grad_norm": 1.2872172594070435, "learning_rate": 1.99196733836588e-05, "loss": 0.374, "step": 1825 }, { "epoch": 0.04048239679385317, "grad_norm": 1.0714995861053467, "learning_rate": 1.9919233229131897e-05, "loss": 0.3592, "step": 1830 }, { "epoch": 0.040593004435366425, "grad_norm": 1.0011204481124878, "learning_rate": 1.991879187686067e-05, "loss": 0.5065, "step": 1835 }, { "epoch": 0.04070361207687968, "grad_norm": 1.3570650815963745, "learning_rate": 1.991834932689842e-05, "loss": 0.4753, "step": 1840 }, { "epoch": 0.04081421971839295, "grad_norm": 1.37960946559906, "learning_rate": 1.991790557929858e-05, "loss": 0.3198, "step": 1845 }, { "epoch": 0.040924827359906206, "grad_norm": 1.259458065032959, "learning_rate": 1.991746063411474e-05, "loss": 0.2842, "step": 1850 }, { "epoch": 0.041035435001419464, "grad_norm": 1.3764694929122925, "learning_rate": 1.9917014491400622e-05, "loss": 0.4791, "step": 1855 }, { "epoch": 0.04114604264293272, "grad_norm": 1.5101432800292969, "learning_rate": 1.9916567151210097e-05, "loss": 0.4205, "step": 1860 }, { "epoch": 0.041256650284445986, "grad_norm": 1.1641854047775269, "learning_rate": 1.9916118613597186e-05, "loss": 0.3114, "step": 1865 }, { "epoch": 0.041367257925959244, "grad_norm": 1.8781219720840454, "learning_rate": 1.991566887861604e-05, "loss": 0.4704, "step": 1870 }, { "epoch": 0.0414778655674725, "grad_norm": 1.3368706703186035, "learning_rate": 1.9915217946320976e-05, "loss": 0.4984, "step": 1875 }, { "epoch": 0.04158847320898577, "grad_norm": 1.9992051124572754, "learning_rate": 1.991476581676644e-05, "loss": 0.3394, "step": 1880 }, { "epoch": 0.041699080850499025, "grad_norm": 1.7645069360733032, "learning_rate": 1.9914312490007025e-05, "loss": 0.39, "step": 1885 }, { "epoch": 0.04180968849201228, "grad_norm": 1.241485595703125, "learning_rate": 1.9913857966097467e-05, "loss": 0.5009, "step": 1890 }, { "epoch": 0.04192029613352555, "grad_norm": 1.6978182792663574, "learning_rate": 1.9913402245092653e-05, "loss": 0.4613, "step": 1895 }, { "epoch": 0.042030903775038805, "grad_norm": 1.1530978679656982, "learning_rate": 1.9912945327047614e-05, "loss": 0.241, "step": 1900 }, { "epoch": 0.04214151141655206, "grad_norm": 1.230265736579895, "learning_rate": 1.9912487212017518e-05, "loss": 0.2942, "step": 1905 }, { "epoch": 0.04225211905806533, "grad_norm": 1.6586536169052124, "learning_rate": 1.9912027900057683e-05, "loss": 0.5788, "step": 1910 }, { "epoch": 0.042362726699578586, "grad_norm": 2.309019088745117, "learning_rate": 1.9911567391223574e-05, "loss": 0.3873, "step": 1915 }, { "epoch": 0.042473334341091844, "grad_norm": 0.9753649234771729, "learning_rate": 1.9911105685570795e-05, "loss": 0.2559, "step": 1920 }, { "epoch": 0.0425839419826051, "grad_norm": 1.041579246520996, "learning_rate": 1.9910642783155096e-05, "loss": 0.3652, "step": 1925 }, { "epoch": 0.042694549624118366, "grad_norm": 1.5550487041473389, "learning_rate": 1.9910178684032374e-05, "loss": 0.3532, "step": 1930 }, { "epoch": 0.042805157265631624, "grad_norm": 0.9889026284217834, "learning_rate": 1.9909713388258665e-05, "loss": 0.2943, "step": 1935 }, { "epoch": 0.04291576490714488, "grad_norm": 1.839468240737915, "learning_rate": 1.990924689589016e-05, "loss": 0.5776, "step": 1940 }, { "epoch": 0.04302637254865815, "grad_norm": 1.587815523147583, "learning_rate": 1.9908779206983184e-05, "loss": 0.3313, "step": 1945 }, { "epoch": 0.043136980190171405, "grad_norm": 1.8313932418823242, "learning_rate": 1.990831032159421e-05, "loss": 0.3745, "step": 1950 }, { "epoch": 0.04324758783168466, "grad_norm": 1.0933369398117065, "learning_rate": 1.9907840239779857e-05, "loss": 0.4639, "step": 1955 }, { "epoch": 0.04335819547319793, "grad_norm": 1.320271372795105, "learning_rate": 1.9907368961596885e-05, "loss": 0.4421, "step": 1960 }, { "epoch": 0.043468803114711185, "grad_norm": 2.2063987255096436, "learning_rate": 1.9906896487102207e-05, "loss": 0.4301, "step": 1965 }, { "epoch": 0.04357941075622444, "grad_norm": 1.768778920173645, "learning_rate": 1.990642281635286e-05, "loss": 0.4766, "step": 1970 }, { "epoch": 0.04369001839773771, "grad_norm": 1.3267290592193604, "learning_rate": 1.9905947949406058e-05, "loss": 0.2591, "step": 1975 }, { "epoch": 0.043800626039250966, "grad_norm": 2.4924116134643555, "learning_rate": 1.9905471886319127e-05, "loss": 0.3974, "step": 1980 }, { "epoch": 0.043911233680764224, "grad_norm": 1.6705002784729004, "learning_rate": 1.9904994627149557e-05, "loss": 0.4426, "step": 1985 }, { "epoch": 0.04402184132227749, "grad_norm": 1.0442208051681519, "learning_rate": 1.990451617195498e-05, "loss": 0.4544, "step": 1990 }, { "epoch": 0.044132448963790746, "grad_norm": 1.330059289932251, "learning_rate": 1.9904036520793162e-05, "loss": 0.4111, "step": 1995 }, { "epoch": 0.044243056605304004, "grad_norm": 1.464342474937439, "learning_rate": 1.9903555673722024e-05, "loss": 0.5699, "step": 2000 }, { "epoch": 0.04435366424681726, "grad_norm": 2.8925960063934326, "learning_rate": 1.990307363079963e-05, "loss": 0.3261, "step": 2005 }, { "epoch": 0.04446427188833053, "grad_norm": 1.9577715396881104, "learning_rate": 1.9902590392084183e-05, "loss": 0.4503, "step": 2010 }, { "epoch": 0.044574879529843785, "grad_norm": 1.571022868156433, "learning_rate": 1.9902105957634038e-05, "loss": 0.5027, "step": 2015 }, { "epoch": 0.04468548717135704, "grad_norm": 1.7391701936721802, "learning_rate": 1.990162032750769e-05, "loss": 0.5029, "step": 2020 }, { "epoch": 0.04479609481287031, "grad_norm": 1.3139772415161133, "learning_rate": 1.9901133501763773e-05, "loss": 0.4752, "step": 2025 }, { "epoch": 0.044906702454383565, "grad_norm": 1.4060453176498413, "learning_rate": 1.990064548046108e-05, "loss": 0.4248, "step": 2030 }, { "epoch": 0.04501731009589682, "grad_norm": 1.9148207902908325, "learning_rate": 1.990015626365853e-05, "loss": 0.2633, "step": 2035 }, { "epoch": 0.04512791773741009, "grad_norm": 1.3706566095352173, "learning_rate": 1.98996658514152e-05, "loss": 0.5147, "step": 2040 }, { "epoch": 0.045238525378923346, "grad_norm": 1.4594364166259766, "learning_rate": 1.9899174243790312e-05, "loss": 0.4645, "step": 2045 }, { "epoch": 0.045349133020436604, "grad_norm": 1.0690209865570068, "learning_rate": 1.9898681440843218e-05, "loss": 0.286, "step": 2050 }, { "epoch": 0.04545974066194987, "grad_norm": 1.4011962413787842, "learning_rate": 1.989818744263343e-05, "loss": 0.3708, "step": 2055 }, { "epoch": 0.045570348303463126, "grad_norm": 1.262143850326538, "learning_rate": 1.98976922492206e-05, "loss": 0.3882, "step": 2060 }, { "epoch": 0.045680955944976384, "grad_norm": 1.0066295862197876, "learning_rate": 1.9897195860664516e-05, "loss": 0.3087, "step": 2065 }, { "epoch": 0.04579156358648964, "grad_norm": 0.944383442401886, "learning_rate": 1.989669827702512e-05, "loss": 0.4632, "step": 2070 }, { "epoch": 0.04590217122800291, "grad_norm": 1.2726325988769531, "learning_rate": 1.9896199498362495e-05, "loss": 0.3674, "step": 2075 }, { "epoch": 0.046012778869516165, "grad_norm": 3.5587821006774902, "learning_rate": 1.9895699524736867e-05, "loss": 0.441, "step": 2080 }, { "epoch": 0.04612338651102942, "grad_norm": 1.356386423110962, "learning_rate": 1.9895198356208613e-05, "loss": 0.3961, "step": 2085 }, { "epoch": 0.04623399415254269, "grad_norm": 1.6746889352798462, "learning_rate": 1.9894695992838242e-05, "loss": 0.3304, "step": 2090 }, { "epoch": 0.046344601794055945, "grad_norm": 2.1315078735351562, "learning_rate": 1.989419243468642e-05, "loss": 0.3771, "step": 2095 }, { "epoch": 0.0464552094355692, "grad_norm": 2.626690149307251, "learning_rate": 1.9893687681813946e-05, "loss": 0.3791, "step": 2100 }, { "epoch": 0.04656581707708247, "grad_norm": 1.5506622791290283, "learning_rate": 1.9893181734281772e-05, "loss": 0.4013, "step": 2105 }, { "epoch": 0.046676424718595726, "grad_norm": 1.8721537590026855, "learning_rate": 1.9892674592150995e-05, "loss": 0.4404, "step": 2110 }, { "epoch": 0.046787032360108984, "grad_norm": 1.2380080223083496, "learning_rate": 1.9892166255482843e-05, "loss": 0.3272, "step": 2115 }, { "epoch": 0.04689764000162225, "grad_norm": 1.2583808898925781, "learning_rate": 1.98916567243387e-05, "loss": 0.3597, "step": 2120 }, { "epoch": 0.047008247643135506, "grad_norm": 1.0434297323226929, "learning_rate": 1.98911459987801e-05, "loss": 0.3479, "step": 2125 }, { "epoch": 0.047118855284648764, "grad_norm": 1.6103774309158325, "learning_rate": 1.98906340788687e-05, "loss": 0.3153, "step": 2130 }, { "epoch": 0.04722946292616203, "grad_norm": 1.0700759887695312, "learning_rate": 1.9890120964666328e-05, "loss": 0.4381, "step": 2135 }, { "epoch": 0.04734007056767529, "grad_norm": 1.851691722869873, "learning_rate": 1.988960665623493e-05, "loss": 0.3591, "step": 2140 }, { "epoch": 0.047450678209188545, "grad_norm": 1.4115655422210693, "learning_rate": 1.9889091153636616e-05, "loss": 0.4714, "step": 2145 }, { "epoch": 0.0475612858507018, "grad_norm": 0.9130614995956421, "learning_rate": 1.988857445693363e-05, "loss": 0.2925, "step": 2150 }, { "epoch": 0.04767189349221507, "grad_norm": 1.3604780435562134, "learning_rate": 1.988805656618837e-05, "loss": 0.2891, "step": 2155 }, { "epoch": 0.047782501133728325, "grad_norm": 1.187685489654541, "learning_rate": 1.9887537481463356e-05, "loss": 0.3252, "step": 2160 }, { "epoch": 0.04789310877524158, "grad_norm": 1.7799403667449951, "learning_rate": 1.988701720282128e-05, "loss": 0.4632, "step": 2165 }, { "epoch": 0.04800371641675485, "grad_norm": 1.2127039432525635, "learning_rate": 1.9886495730324963e-05, "loss": 0.2733, "step": 2170 }, { "epoch": 0.048114324058268106, "grad_norm": 0.9583289623260498, "learning_rate": 1.9885973064037373e-05, "loss": 0.3977, "step": 2175 }, { "epoch": 0.04822493169978136, "grad_norm": 1.951528549194336, "learning_rate": 1.9885449204021612e-05, "loss": 0.329, "step": 2180 }, { "epoch": 0.04833553934129463, "grad_norm": 1.1556411981582642, "learning_rate": 1.988492415034095e-05, "loss": 0.4397, "step": 2185 }, { "epoch": 0.048446146982807886, "grad_norm": 1.1385254859924316, "learning_rate": 1.9884397903058784e-05, "loss": 0.2576, "step": 2190 }, { "epoch": 0.048556754624321144, "grad_norm": 1.1659611463546753, "learning_rate": 1.988387046223865e-05, "loss": 0.514, "step": 2195 }, { "epoch": 0.04866736226583441, "grad_norm": 1.6676899194717407, "learning_rate": 1.9883341827944245e-05, "loss": 0.358, "step": 2200 }, { "epoch": 0.04877796990734767, "grad_norm": 0.9930961728096008, "learning_rate": 1.9882812000239397e-05, "loss": 0.586, "step": 2205 }, { "epoch": 0.048888577548860924, "grad_norm": 1.422568678855896, "learning_rate": 1.9882280979188084e-05, "loss": 0.3946, "step": 2210 }, { "epoch": 0.04899918519037418, "grad_norm": 1.6325699090957642, "learning_rate": 1.9881748764854425e-05, "loss": 0.3169, "step": 2215 }, { "epoch": 0.04910979283188745, "grad_norm": 1.581741452217102, "learning_rate": 1.9881215357302688e-05, "loss": 0.3414, "step": 2220 }, { "epoch": 0.049220400473400705, "grad_norm": 1.2818689346313477, "learning_rate": 1.988068075659728e-05, "loss": 0.4085, "step": 2225 }, { "epoch": 0.04933100811491396, "grad_norm": 1.7279057502746582, "learning_rate": 1.988014496280275e-05, "loss": 0.2867, "step": 2230 }, { "epoch": 0.04944161575642723, "grad_norm": 1.6151331663131714, "learning_rate": 1.98796079759838e-05, "loss": 0.4529, "step": 2235 }, { "epoch": 0.049552223397940486, "grad_norm": 1.2708929777145386, "learning_rate": 1.987906979620527e-05, "loss": 0.4029, "step": 2240 }, { "epoch": 0.04966283103945374, "grad_norm": 1.2229819297790527, "learning_rate": 1.987853042353215e-05, "loss": 0.4132, "step": 2245 }, { "epoch": 0.04977343868096701, "grad_norm": 1.3457281589508057, "learning_rate": 1.9877989858029558e-05, "loss": 0.3838, "step": 2250 }, { "epoch": 0.049884046322480266, "grad_norm": 1.6834375858306885, "learning_rate": 1.987744809976277e-05, "loss": 0.3068, "step": 2255 }, { "epoch": 0.049994653963993524, "grad_norm": 1.8692505359649658, "learning_rate": 1.987690514879721e-05, "loss": 0.4849, "step": 2260 }, { "epoch": 0.05010526160550679, "grad_norm": 1.2451096773147583, "learning_rate": 1.9876361005198436e-05, "loss": 0.4452, "step": 2265 }, { "epoch": 0.05021586924702005, "grad_norm": 1.5865321159362793, "learning_rate": 1.987581566903215e-05, "loss": 0.4188, "step": 2270 }, { "epoch": 0.050326476888533304, "grad_norm": 1.5243339538574219, "learning_rate": 1.9875269140364203e-05, "loss": 0.4154, "step": 2275 }, { "epoch": 0.05043708453004657, "grad_norm": 1.1520603895187378, "learning_rate": 1.987472141926059e-05, "loss": 0.4062, "step": 2280 }, { "epoch": 0.05054769217155983, "grad_norm": 1.870670199394226, "learning_rate": 1.9874172505787446e-05, "loss": 0.469, "step": 2285 }, { "epoch": 0.050658299813073085, "grad_norm": 1.0223819017410278, "learning_rate": 1.987362240001105e-05, "loss": 0.4186, "step": 2290 }, { "epoch": 0.05076890745458634, "grad_norm": 1.347593903541565, "learning_rate": 1.9873071101997836e-05, "loss": 0.3629, "step": 2295 }, { "epoch": 0.05087951509609961, "grad_norm": 1.6196404695510864, "learning_rate": 1.9872518611814363e-05, "loss": 0.3215, "step": 2300 }, { "epoch": 0.050990122737612865, "grad_norm": 2.087160348892212, "learning_rate": 1.9871964929527353e-05, "loss": 0.4977, "step": 2305 }, { "epoch": 0.05110073037912612, "grad_norm": 2.02555775642395, "learning_rate": 1.9871410055203653e-05, "loss": 0.4595, "step": 2310 }, { "epoch": 0.05121133802063939, "grad_norm": 2.0682272911071777, "learning_rate": 1.987085398891027e-05, "loss": 0.5125, "step": 2315 }, { "epoch": 0.051321945662152646, "grad_norm": 0.9722549915313721, "learning_rate": 1.987029673071435e-05, "loss": 0.4404, "step": 2320 }, { "epoch": 0.051432553303665904, "grad_norm": 1.6655970811843872, "learning_rate": 1.9869738280683176e-05, "loss": 0.2692, "step": 2325 }, { "epoch": 0.05154316094517917, "grad_norm": 1.0775120258331299, "learning_rate": 1.9869178638884185e-05, "loss": 0.3824, "step": 2330 }, { "epoch": 0.051653768586692426, "grad_norm": 1.4419875144958496, "learning_rate": 1.9868617805384955e-05, "loss": 0.305, "step": 2335 }, { "epoch": 0.051764376228205684, "grad_norm": 1.269936442375183, "learning_rate": 1.9868055780253202e-05, "loss": 0.3549, "step": 2340 }, { "epoch": 0.05187498386971895, "grad_norm": 1.418851613998413, "learning_rate": 1.9867492563556794e-05, "loss": 0.312, "step": 2345 }, { "epoch": 0.05198559151123221, "grad_norm": 1.4426442384719849, "learning_rate": 1.986692815536374e-05, "loss": 0.2811, "step": 2350 }, { "epoch": 0.052096199152745465, "grad_norm": 1.9221850633621216, "learning_rate": 1.9866362555742185e-05, "loss": 0.4378, "step": 2355 }, { "epoch": 0.05220680679425872, "grad_norm": 1.0940755605697632, "learning_rate": 1.9865795764760432e-05, "loss": 0.2834, "step": 2360 }, { "epoch": 0.05231741443577199, "grad_norm": 1.3292614221572876, "learning_rate": 1.9865227782486917e-05, "loss": 0.2788, "step": 2365 }, { "epoch": 0.052428022077285245, "grad_norm": 1.067088007926941, "learning_rate": 1.986465860899023e-05, "loss": 0.4481, "step": 2370 }, { "epoch": 0.0525386297187985, "grad_norm": 2.0038957595825195, "learning_rate": 1.986408824433909e-05, "loss": 0.5231, "step": 2375 }, { "epoch": 0.05264923736031177, "grad_norm": 1.5398387908935547, "learning_rate": 1.9863516688602374e-05, "loss": 0.4356, "step": 2380 }, { "epoch": 0.052759845001825026, "grad_norm": 1.8451168537139893, "learning_rate": 1.9862943941849094e-05, "loss": 0.4023, "step": 2385 }, { "epoch": 0.052870452643338284, "grad_norm": 1.9335554838180542, "learning_rate": 1.986237000414841e-05, "loss": 0.3386, "step": 2390 }, { "epoch": 0.05298106028485155, "grad_norm": 1.6652709245681763, "learning_rate": 1.986179487556963e-05, "loss": 0.4227, "step": 2395 }, { "epoch": 0.053091667926364806, "grad_norm": 1.2778211832046509, "learning_rate": 1.9861218556182194e-05, "loss": 0.3373, "step": 2400 }, { "epoch": 0.053202275567878064, "grad_norm": 2.000532865524292, "learning_rate": 1.986064104605569e-05, "loss": 0.5213, "step": 2405 }, { "epoch": 0.05331288320939133, "grad_norm": 2.2895658016204834, "learning_rate": 1.986006234525986e-05, "loss": 0.3497, "step": 2410 }, { "epoch": 0.05342349085090459, "grad_norm": 1.2706068754196167, "learning_rate": 1.985948245386458e-05, "loss": 0.3636, "step": 2415 }, { "epoch": 0.053534098492417845, "grad_norm": 1.6390936374664307, "learning_rate": 1.985890137193987e-05, "loss": 0.3761, "step": 2420 }, { "epoch": 0.05364470613393111, "grad_norm": 2.2319748401641846, "learning_rate": 1.985831909955589e-05, "loss": 0.3627, "step": 2425 }, { "epoch": 0.05375531377544437, "grad_norm": 1.4689892530441284, "learning_rate": 1.9857735636782958e-05, "loss": 0.3403, "step": 2430 }, { "epoch": 0.053865921416957625, "grad_norm": 1.4020427465438843, "learning_rate": 1.9857150983691525e-05, "loss": 0.3895, "step": 2435 }, { "epoch": 0.05397652905847088, "grad_norm": 1.1936427354812622, "learning_rate": 1.9856565140352187e-05, "loss": 0.3563, "step": 2440 }, { "epoch": 0.05408713669998415, "grad_norm": 1.5421093702316284, "learning_rate": 1.985597810683568e-05, "loss": 0.4406, "step": 2445 }, { "epoch": 0.054197744341497406, "grad_norm": 1.6497217416763306, "learning_rate": 1.9855389883212895e-05, "loss": 0.3502, "step": 2450 }, { "epoch": 0.054308351983010664, "grad_norm": 1.1378445625305176, "learning_rate": 1.9854800469554857e-05, "loss": 0.4455, "step": 2455 }, { "epoch": 0.05441895962452393, "grad_norm": 2.0627214908599854, "learning_rate": 1.9854209865932734e-05, "loss": 0.3676, "step": 2460 }, { "epoch": 0.054529567266037186, "grad_norm": 1.42262864112854, "learning_rate": 1.9853618072417846e-05, "loss": 0.3325, "step": 2465 }, { "epoch": 0.054640174907550444, "grad_norm": 2.2795135974884033, "learning_rate": 1.985302508908165e-05, "loss": 0.3768, "step": 2470 }, { "epoch": 0.05475078254906371, "grad_norm": 1.2795742750167847, "learning_rate": 1.985243091599575e-05, "loss": 0.2947, "step": 2475 }, { "epoch": 0.05486139019057697, "grad_norm": 1.5031499862670898, "learning_rate": 1.985183555323189e-05, "loss": 0.4215, "step": 2480 }, { "epoch": 0.054971997832090225, "grad_norm": 1.957170844078064, "learning_rate": 1.9851239000861962e-05, "loss": 0.4555, "step": 2485 }, { "epoch": 0.05508260547360349, "grad_norm": 1.4793671369552612, "learning_rate": 1.9850641258957997e-05, "loss": 0.3989, "step": 2490 }, { "epoch": 0.05519321311511675, "grad_norm": 1.1744012832641602, "learning_rate": 1.9850042327592177e-05, "loss": 0.3949, "step": 2495 }, { "epoch": 0.055303820756630005, "grad_norm": 1.2310986518859863, "learning_rate": 1.9849442206836818e-05, "loss": 0.3773, "step": 2500 }, { "epoch": 0.05541442839814326, "grad_norm": 1.7182142734527588, "learning_rate": 1.9848840896764384e-05, "loss": 0.326, "step": 2505 }, { "epoch": 0.05552503603965653, "grad_norm": 1.5755306482315063, "learning_rate": 1.9848238397447484e-05, "loss": 0.5485, "step": 2510 }, { "epoch": 0.055635643681169786, "grad_norm": 1.2812979221343994, "learning_rate": 1.9847634708958875e-05, "loss": 0.3722, "step": 2515 }, { "epoch": 0.055746251322683044, "grad_norm": 1.5554815530776978, "learning_rate": 1.984702983137144e-05, "loss": 0.4422, "step": 2520 }, { "epoch": 0.05585685896419631, "grad_norm": 1.1201385259628296, "learning_rate": 1.984642376475823e-05, "loss": 0.3749, "step": 2525 }, { "epoch": 0.055967466605709566, "grad_norm": 1.326851725578308, "learning_rate": 1.9845816509192426e-05, "loss": 0.4321, "step": 2530 }, { "epoch": 0.056078074247222824, "grad_norm": 1.1481646299362183, "learning_rate": 1.9845208064747347e-05, "loss": 0.4789, "step": 2535 }, { "epoch": 0.05618868188873609, "grad_norm": 0.9764200448989868, "learning_rate": 1.9844598431496463e-05, "loss": 0.3737, "step": 2540 }, { "epoch": 0.05629928953024935, "grad_norm": 2.0928797721862793, "learning_rate": 1.9843987609513395e-05, "loss": 0.4845, "step": 2545 }, { "epoch": 0.056409897171762605, "grad_norm": 1.986199140548706, "learning_rate": 1.9843375598871893e-05, "loss": 0.4478, "step": 2550 }, { "epoch": 0.05652050481327587, "grad_norm": 1.6514774560928345, "learning_rate": 1.984276239964586e-05, "loss": 0.3504, "step": 2555 }, { "epoch": 0.05663111245478913, "grad_norm": 1.1027288436889648, "learning_rate": 1.9842148011909336e-05, "loss": 0.3203, "step": 2560 }, { "epoch": 0.056741720096302385, "grad_norm": 1.5458030700683594, "learning_rate": 1.9841532435736513e-05, "loss": 0.5251, "step": 2565 }, { "epoch": 0.05685232773781565, "grad_norm": 1.684320330619812, "learning_rate": 1.9840915671201717e-05, "loss": 0.3464, "step": 2570 }, { "epoch": 0.05696293537932891, "grad_norm": 1.7486944198608398, "learning_rate": 1.9840297718379426e-05, "loss": 0.5183, "step": 2575 }, { "epoch": 0.057073543020842166, "grad_norm": 1.4114190340042114, "learning_rate": 1.9839678577344256e-05, "loss": 0.3752, "step": 2580 }, { "epoch": 0.057184150662355424, "grad_norm": 2.1718153953552246, "learning_rate": 1.9839058248170966e-05, "loss": 0.395, "step": 2585 }, { "epoch": 0.05729475830386869, "grad_norm": 1.3313419818878174, "learning_rate": 1.9838436730934463e-05, "loss": 0.4639, "step": 2590 }, { "epoch": 0.057405365945381946, "grad_norm": 1.5053868293762207, "learning_rate": 1.9837814025709795e-05, "loss": 0.4566, "step": 2595 }, { "epoch": 0.057515973586895204, "grad_norm": 1.357436180114746, "learning_rate": 1.9837190132572155e-05, "loss": 0.385, "step": 2600 }, { "epoch": 0.05762658122840847, "grad_norm": 1.456974983215332, "learning_rate": 1.9836565051596873e-05, "loss": 0.2907, "step": 2605 }, { "epoch": 0.05773718886992173, "grad_norm": 2.573904037475586, "learning_rate": 1.9835938782859432e-05, "loss": 0.4614, "step": 2610 }, { "epoch": 0.057847796511434985, "grad_norm": 2.7076315879821777, "learning_rate": 1.9835311326435452e-05, "loss": 0.5164, "step": 2615 }, { "epoch": 0.05795840415294825, "grad_norm": 1.375097393989563, "learning_rate": 1.9834682682400695e-05, "loss": 0.3107, "step": 2620 }, { "epoch": 0.05806901179446151, "grad_norm": 1.1102280616760254, "learning_rate": 1.983405285083108e-05, "loss": 0.5085, "step": 2625 }, { "epoch": 0.058179619435974765, "grad_norm": 1.360790729522705, "learning_rate": 1.9833421831802644e-05, "loss": 0.2506, "step": 2630 }, { "epoch": 0.05829022707748803, "grad_norm": 1.4457511901855469, "learning_rate": 1.9832789625391595e-05, "loss": 0.4341, "step": 2635 }, { "epoch": 0.05840083471900129, "grad_norm": 1.6549619436264038, "learning_rate": 1.9832156231674264e-05, "loss": 0.3651, "step": 2640 }, { "epoch": 0.058511442360514546, "grad_norm": 1.7004015445709229, "learning_rate": 1.9831521650727138e-05, "loss": 0.4025, "step": 2645 }, { "epoch": 0.058622050002027803, "grad_norm": 1.2435928583145142, "learning_rate": 1.983088588262684e-05, "loss": 0.3558, "step": 2650 }, { "epoch": 0.05873265764354107, "grad_norm": 1.5065828561782837, "learning_rate": 1.9830248927450137e-05, "loss": 0.5151, "step": 2655 }, { "epoch": 0.058843265285054326, "grad_norm": 1.9823298454284668, "learning_rate": 1.9829610785273943e-05, "loss": 0.4064, "step": 2660 }, { "epoch": 0.058953872926567584, "grad_norm": 1.9854854345321655, "learning_rate": 1.9828971456175316e-05, "loss": 0.4743, "step": 2665 }, { "epoch": 0.05906448056808085, "grad_norm": 0.850407600402832, "learning_rate": 1.9828330940231454e-05, "loss": 0.3633, "step": 2670 }, { "epoch": 0.05917508820959411, "grad_norm": 1.316698670387268, "learning_rate": 1.9827689237519693e-05, "loss": 0.2933, "step": 2675 }, { "epoch": 0.059285695851107365, "grad_norm": 1.2008169889450073, "learning_rate": 1.982704634811753e-05, "loss": 0.3914, "step": 2680 }, { "epoch": 0.05939630349262063, "grad_norm": 1.5814332962036133, "learning_rate": 1.9826402272102578e-05, "loss": 0.4573, "step": 2685 }, { "epoch": 0.05950691113413389, "grad_norm": 1.2794475555419922, "learning_rate": 1.9825757009552622e-05, "loss": 0.4557, "step": 2690 }, { "epoch": 0.059617518775647145, "grad_norm": 1.5838959217071533, "learning_rate": 1.9825110560545573e-05, "loss": 0.473, "step": 2695 }, { "epoch": 0.05972812641716041, "grad_norm": 1.4740455150604248, "learning_rate": 1.9824462925159488e-05, "loss": 0.3811, "step": 2700 }, { "epoch": 0.05983873405867367, "grad_norm": 1.1939387321472168, "learning_rate": 1.9823814103472576e-05, "loss": 0.3934, "step": 2705 }, { "epoch": 0.059949341700186926, "grad_norm": 1.4038918018341064, "learning_rate": 1.982316409556317e-05, "loss": 0.3684, "step": 2710 }, { "epoch": 0.06005994934170019, "grad_norm": 1.8136494159698486, "learning_rate": 1.9822512901509764e-05, "loss": 0.185, "step": 2715 }, { "epoch": 0.06017055698321345, "grad_norm": 1.850573182106018, "learning_rate": 1.9821860521390993e-05, "loss": 0.3379, "step": 2720 }, { "epoch": 0.060281164624726706, "grad_norm": 1.782930850982666, "learning_rate": 1.982120695528563e-05, "loss": 0.3279, "step": 2725 }, { "epoch": 0.060391772266239964, "grad_norm": 1.2881648540496826, "learning_rate": 1.982055220327259e-05, "loss": 0.4518, "step": 2730 }, { "epoch": 0.06050237990775323, "grad_norm": 1.6763274669647217, "learning_rate": 1.9819896265430936e-05, "loss": 0.3754, "step": 2735 }, { "epoch": 0.06061298754926649, "grad_norm": 1.6848785877227783, "learning_rate": 1.981923914183987e-05, "loss": 0.5213, "step": 2740 }, { "epoch": 0.060723595190779744, "grad_norm": 1.262988567352295, "learning_rate": 1.9818580832578742e-05, "loss": 0.4077, "step": 2745 }, { "epoch": 0.06083420283229301, "grad_norm": 1.8127613067626953, "learning_rate": 1.981792133772704e-05, "loss": 0.4707, "step": 2750 }, { "epoch": 0.06094481047380627, "grad_norm": 1.5276837348937988, "learning_rate": 1.9817260657364406e-05, "loss": 0.3837, "step": 2755 }, { "epoch": 0.061055418115319525, "grad_norm": 1.525822401046753, "learning_rate": 1.9816598791570606e-05, "loss": 0.3314, "step": 2760 }, { "epoch": 0.06116602575683279, "grad_norm": 1.244940161705017, "learning_rate": 1.981593574042557e-05, "loss": 0.4327, "step": 2765 }, { "epoch": 0.06127663339834605, "grad_norm": 1.7902559041976929, "learning_rate": 1.981527150400935e-05, "loss": 0.5869, "step": 2770 }, { "epoch": 0.061387241039859305, "grad_norm": 1.6476271152496338, "learning_rate": 1.9814606082402162e-05, "loss": 0.4489, "step": 2775 }, { "epoch": 0.06149784868137257, "grad_norm": 1.5973352193832397, "learning_rate": 1.9813939475684352e-05, "loss": 0.3013, "step": 2780 }, { "epoch": 0.06160845632288583, "grad_norm": 1.082259178161621, "learning_rate": 1.981327168393641e-05, "loss": 0.4196, "step": 2785 }, { "epoch": 0.061719063964399086, "grad_norm": 1.6523863077163696, "learning_rate": 1.9812602707238975e-05, "loss": 0.3992, "step": 2790 }, { "epoch": 0.061829671605912344, "grad_norm": 1.0532516241073608, "learning_rate": 1.9811932545672828e-05, "loss": 0.282, "step": 2795 }, { "epoch": 0.06194027924742561, "grad_norm": 1.2658824920654297, "learning_rate": 1.981126119931888e-05, "loss": 0.4589, "step": 2800 }, { "epoch": 0.062050886888938867, "grad_norm": 1.5819928646087646, "learning_rate": 1.9810588668258214e-05, "loss": 0.4793, "step": 2805 }, { "epoch": 0.062161494530452124, "grad_norm": 1.5571277141571045, "learning_rate": 1.980991495257202e-05, "loss": 0.3542, "step": 2810 }, { "epoch": 0.06227210217196539, "grad_norm": 1.3344603776931763, "learning_rate": 1.980924005234166e-05, "loss": 0.344, "step": 2815 }, { "epoch": 0.06238270981347865, "grad_norm": 1.5111874341964722, "learning_rate": 1.980856396764862e-05, "loss": 0.3826, "step": 2820 }, { "epoch": 0.062493317454991905, "grad_norm": 1.517843246459961, "learning_rate": 1.9807886698574545e-05, "loss": 0.3864, "step": 2825 }, { "epoch": 0.06260392509650517, "grad_norm": 1.5268380641937256, "learning_rate": 1.980720824520121e-05, "loss": 0.3534, "step": 2830 }, { "epoch": 0.06271453273801843, "grad_norm": 1.5680739879608154, "learning_rate": 1.980652860761054e-05, "loss": 0.2989, "step": 2835 }, { "epoch": 0.06282514037953169, "grad_norm": 1.0300633907318115, "learning_rate": 1.9805847785884603e-05, "loss": 0.4135, "step": 2840 }, { "epoch": 0.06293574802104494, "grad_norm": 1.4168033599853516, "learning_rate": 1.9805165780105603e-05, "loss": 0.3015, "step": 2845 }, { "epoch": 0.0630463556625582, "grad_norm": 1.4605436325073242, "learning_rate": 1.9804482590355897e-05, "loss": 0.4102, "step": 2850 }, { "epoch": 0.06315696330407147, "grad_norm": 1.9808874130249023, "learning_rate": 1.980379821671797e-05, "loss": 0.4964, "step": 2855 }, { "epoch": 0.06326757094558473, "grad_norm": 1.988883376121521, "learning_rate": 1.9803112659274474e-05, "loss": 0.5291, "step": 2860 }, { "epoch": 0.06337817858709799, "grad_norm": 1.6521811485290527, "learning_rate": 1.9802425918108184e-05, "loss": 0.4785, "step": 2865 }, { "epoch": 0.06348878622861125, "grad_norm": 1.6402193307876587, "learning_rate": 1.980173799330202e-05, "loss": 0.3196, "step": 2870 }, { "epoch": 0.0635993938701245, "grad_norm": 2.109456777572632, "learning_rate": 1.980104888493905e-05, "loss": 0.3173, "step": 2875 }, { "epoch": 0.06371000151163776, "grad_norm": 1.4957531690597534, "learning_rate": 1.9800358593102488e-05, "loss": 0.4037, "step": 2880 }, { "epoch": 0.06382060915315103, "grad_norm": 1.630302906036377, "learning_rate": 1.9799667117875683e-05, "loss": 0.4537, "step": 2885 }, { "epoch": 0.06393121679466429, "grad_norm": 1.1496318578720093, "learning_rate": 1.9798974459342132e-05, "loss": 0.2806, "step": 2890 }, { "epoch": 0.06404182443617755, "grad_norm": 1.3213034868240356, "learning_rate": 1.9798280617585473e-05, "loss": 0.3888, "step": 2895 }, { "epoch": 0.06415243207769081, "grad_norm": 1.362597942352295, "learning_rate": 1.9797585592689486e-05, "loss": 0.3387, "step": 2900 }, { "epoch": 0.06426303971920407, "grad_norm": 1.375345230102539, "learning_rate": 1.9796889384738092e-05, "loss": 0.2847, "step": 2905 }, { "epoch": 0.06437364736071732, "grad_norm": 1.9633907079696655, "learning_rate": 1.979619199381537e-05, "loss": 0.4556, "step": 2910 }, { "epoch": 0.06448425500223058, "grad_norm": 0.9654141664505005, "learning_rate": 1.9795493420005517e-05, "loss": 0.598, "step": 2915 }, { "epoch": 0.06459486264374385, "grad_norm": 2.2244718074798584, "learning_rate": 1.9794793663392887e-05, "loss": 0.3517, "step": 2920 }, { "epoch": 0.06470547028525711, "grad_norm": 1.2480331659317017, "learning_rate": 1.979409272406198e-05, "loss": 0.3815, "step": 2925 }, { "epoch": 0.06481607792677037, "grad_norm": 1.8847757577896118, "learning_rate": 1.9793390602097435e-05, "loss": 0.4074, "step": 2930 }, { "epoch": 0.06492668556828363, "grad_norm": 0.9983028173446655, "learning_rate": 1.979268729758403e-05, "loss": 0.3947, "step": 2935 }, { "epoch": 0.06503729320979688, "grad_norm": 2.2931199073791504, "learning_rate": 1.9791982810606685e-05, "loss": 0.314, "step": 2940 }, { "epoch": 0.06514790085131014, "grad_norm": 2.666687250137329, "learning_rate": 1.9791277141250478e-05, "loss": 0.3432, "step": 2945 }, { "epoch": 0.06525850849282341, "grad_norm": 1.1360424757003784, "learning_rate": 1.9790570289600606e-05, "loss": 0.195, "step": 2950 }, { "epoch": 0.06536911613433667, "grad_norm": 1.5207988023757935, "learning_rate": 1.978986225574243e-05, "loss": 0.3815, "step": 2955 }, { "epoch": 0.06547972377584993, "grad_norm": 1.5952821969985962, "learning_rate": 1.9789153039761436e-05, "loss": 0.2901, "step": 2960 }, { "epoch": 0.06559033141736319, "grad_norm": 1.3310017585754395, "learning_rate": 1.978844264174327e-05, "loss": 0.48, "step": 2965 }, { "epoch": 0.06570093905887645, "grad_norm": 1.0702455043792725, "learning_rate": 1.9787731061773708e-05, "loss": 0.4133, "step": 2970 }, { "epoch": 0.0658115467003897, "grad_norm": 1.7620147466659546, "learning_rate": 1.9787018299938678e-05, "loss": 0.432, "step": 2975 }, { "epoch": 0.06592215434190296, "grad_norm": 1.556247591972351, "learning_rate": 1.9786304356324238e-05, "loss": 0.4659, "step": 2980 }, { "epoch": 0.06603276198341623, "grad_norm": 1.8582919836044312, "learning_rate": 1.9785589231016604e-05, "loss": 0.3494, "step": 2985 }, { "epoch": 0.06614336962492949, "grad_norm": 1.6865006685256958, "learning_rate": 1.9784872924102125e-05, "loss": 0.4227, "step": 2990 }, { "epoch": 0.06625397726644275, "grad_norm": 2.0150105953216553, "learning_rate": 1.9784155435667286e-05, "loss": 0.4749, "step": 2995 }, { "epoch": 0.066364584907956, "grad_norm": 1.4257475137710571, "learning_rate": 1.9783436765798736e-05, "loss": 0.4015, "step": 3000 }, { "epoch": 0.06647519254946926, "grad_norm": 1.3585933446884155, "learning_rate": 1.978271691458325e-05, "loss": 0.3458, "step": 3005 }, { "epoch": 0.06658580019098252, "grad_norm": 2.0094399452209473, "learning_rate": 1.978199588210775e-05, "loss": 0.3735, "step": 3010 }, { "epoch": 0.0666964078324958, "grad_norm": 1.1959577798843384, "learning_rate": 1.9781273668459296e-05, "loss": 0.4491, "step": 3015 }, { "epoch": 0.06680701547400905, "grad_norm": 1.328962802886963, "learning_rate": 1.97805502737251e-05, "loss": 0.3341, "step": 3020 }, { "epoch": 0.06691762311552231, "grad_norm": 2.3554811477661133, "learning_rate": 1.977982569799251e-05, "loss": 0.3177, "step": 3025 }, { "epoch": 0.06702823075703557, "grad_norm": 1.5430545806884766, "learning_rate": 1.9779099941349015e-05, "loss": 0.3751, "step": 3030 }, { "epoch": 0.06713883839854883, "grad_norm": 1.0051460266113281, "learning_rate": 1.977837300388226e-05, "loss": 0.2993, "step": 3035 }, { "epoch": 0.06724944604006208, "grad_norm": 1.2898821830749512, "learning_rate": 1.977764488568001e-05, "loss": 0.3209, "step": 3040 }, { "epoch": 0.06736005368157534, "grad_norm": 1.848175287246704, "learning_rate": 1.9776915586830194e-05, "loss": 0.3696, "step": 3045 }, { "epoch": 0.06747066132308861, "grad_norm": 1.2522225379943848, "learning_rate": 1.9776185107420872e-05, "loss": 0.3608, "step": 3050 }, { "epoch": 0.06758126896460187, "grad_norm": 1.8258564472198486, "learning_rate": 1.9775453447540248e-05, "loss": 0.4362, "step": 3055 }, { "epoch": 0.06769187660611513, "grad_norm": 1.3944040536880493, "learning_rate": 1.977472060727667e-05, "loss": 0.3739, "step": 3060 }, { "epoch": 0.06780248424762839, "grad_norm": 1.9443227052688599, "learning_rate": 1.9773986586718627e-05, "loss": 0.376, "step": 3065 }, { "epoch": 0.06791309188914164, "grad_norm": 1.8227994441986084, "learning_rate": 1.9773251385954755e-05, "loss": 0.4455, "step": 3070 }, { "epoch": 0.0680236995306549, "grad_norm": 1.472651481628418, "learning_rate": 1.9772515005073828e-05, "loss": 0.4056, "step": 3075 }, { "epoch": 0.06813430717216817, "grad_norm": 1.4738481044769287, "learning_rate": 1.9771777444164765e-05, "loss": 0.3862, "step": 3080 }, { "epoch": 0.06824491481368143, "grad_norm": 1.2373828887939453, "learning_rate": 1.977103870331662e-05, "loss": 0.4705, "step": 3085 }, { "epoch": 0.06835552245519469, "grad_norm": 1.3985180854797363, "learning_rate": 1.9770298782618603e-05, "loss": 0.3218, "step": 3090 }, { "epoch": 0.06846613009670795, "grad_norm": 1.3141562938690186, "learning_rate": 1.9769557682160058e-05, "loss": 0.4383, "step": 3095 }, { "epoch": 0.0685767377382212, "grad_norm": 1.1272765398025513, "learning_rate": 1.976881540203047e-05, "loss": 0.3876, "step": 3100 }, { "epoch": 0.06868734537973446, "grad_norm": 1.2238909006118774, "learning_rate": 1.976807194231947e-05, "loss": 0.4116, "step": 3105 }, { "epoch": 0.06879795302124773, "grad_norm": 1.2489120960235596, "learning_rate": 1.976732730311683e-05, "loss": 0.4423, "step": 3110 }, { "epoch": 0.06890856066276099, "grad_norm": 1.9318491220474243, "learning_rate": 1.9766581484512468e-05, "loss": 0.5115, "step": 3115 }, { "epoch": 0.06901916830427425, "grad_norm": 1.8048739433288574, "learning_rate": 1.9765834486596436e-05, "loss": 0.3874, "step": 3120 }, { "epoch": 0.06912977594578751, "grad_norm": 1.3070504665374756, "learning_rate": 1.9765086309458937e-05, "loss": 0.4644, "step": 3125 }, { "epoch": 0.06924038358730077, "grad_norm": 1.3633251190185547, "learning_rate": 1.9764336953190314e-05, "loss": 0.4674, "step": 3130 }, { "epoch": 0.06935099122881402, "grad_norm": 1.4351229667663574, "learning_rate": 1.976358641788105e-05, "loss": 0.4226, "step": 3135 }, { "epoch": 0.06946159887032728, "grad_norm": 1.4013359546661377, "learning_rate": 1.9762834703621778e-05, "loss": 0.5031, "step": 3140 }, { "epoch": 0.06957220651184055, "grad_norm": 2.035982608795166, "learning_rate": 1.976208181050326e-05, "loss": 0.4586, "step": 3145 }, { "epoch": 0.06968281415335381, "grad_norm": 1.6533170938491821, "learning_rate": 1.9761327738616403e-05, "loss": 0.4721, "step": 3150 }, { "epoch": 0.06979342179486707, "grad_norm": 1.3114326000213623, "learning_rate": 1.976057248805227e-05, "loss": 0.5619, "step": 3155 }, { "epoch": 0.06990402943638033, "grad_norm": 1.5795444250106812, "learning_rate": 1.975981605890206e-05, "loss": 0.4101, "step": 3160 }, { "epoch": 0.07001463707789359, "grad_norm": 1.5608770847320557, "learning_rate": 1.97590584512571e-05, "loss": 0.3704, "step": 3165 }, { "epoch": 0.07012524471940684, "grad_norm": 1.4098740816116333, "learning_rate": 1.975829966520888e-05, "loss": 0.3934, "step": 3170 }, { "epoch": 0.07023585236092011, "grad_norm": 1.026309609413147, "learning_rate": 1.975753970084902e-05, "loss": 0.3403, "step": 3175 }, { "epoch": 0.07034646000243337, "grad_norm": 2.441941738128662, "learning_rate": 1.9756778558269282e-05, "loss": 0.5215, "step": 3180 }, { "epoch": 0.07045706764394663, "grad_norm": 0.8199138045310974, "learning_rate": 1.975601623756158e-05, "loss": 0.2475, "step": 3185 }, { "epoch": 0.07056767528545989, "grad_norm": 1.7079726457595825, "learning_rate": 1.9755252738817962e-05, "loss": 0.3236, "step": 3190 }, { "epoch": 0.07067828292697315, "grad_norm": 1.3536673784255981, "learning_rate": 1.975448806213062e-05, "loss": 0.2497, "step": 3195 }, { "epoch": 0.0707888905684864, "grad_norm": 1.1342817544937134, "learning_rate": 1.975372220759189e-05, "loss": 0.3139, "step": 3200 }, { "epoch": 0.07089949820999966, "grad_norm": 3.204569101333618, "learning_rate": 1.9752955175294243e-05, "loss": 0.3677, "step": 3205 }, { "epoch": 0.07101010585151293, "grad_norm": 0.9972839951515198, "learning_rate": 1.9752186965330306e-05, "loss": 0.4885, "step": 3210 }, { "epoch": 0.07112071349302619, "grad_norm": 1.335110068321228, "learning_rate": 1.975141757779283e-05, "loss": 0.3024, "step": 3215 }, { "epoch": 0.07123132113453945, "grad_norm": 1.5905364751815796, "learning_rate": 1.975064701277473e-05, "loss": 0.3874, "step": 3220 }, { "epoch": 0.07134192877605271, "grad_norm": 1.6035890579223633, "learning_rate": 1.974987527036904e-05, "loss": 0.4744, "step": 3225 }, { "epoch": 0.07145253641756596, "grad_norm": 1.2673557996749878, "learning_rate": 1.9749102350668956e-05, "loss": 0.2191, "step": 3230 }, { "epoch": 0.07156314405907922, "grad_norm": 1.6057692766189575, "learning_rate": 1.9748328253767807e-05, "loss": 0.3712, "step": 3235 }, { "epoch": 0.0716737517005925, "grad_norm": 2.0734095573425293, "learning_rate": 1.974755297975906e-05, "loss": 0.4785, "step": 3240 }, { "epoch": 0.07178435934210575, "grad_norm": 2.3651418685913086, "learning_rate": 1.9746776528736332e-05, "loss": 0.3591, "step": 3245 }, { "epoch": 0.07189496698361901, "grad_norm": 1.4388259649276733, "learning_rate": 1.974599890079338e-05, "loss": 0.5114, "step": 3250 }, { "epoch": 0.07200557462513227, "grad_norm": 1.659005045890808, "learning_rate": 1.9745220096024104e-05, "loss": 0.3119, "step": 3255 }, { "epoch": 0.07211618226664553, "grad_norm": 1.6208690404891968, "learning_rate": 1.974444011452254e-05, "loss": 0.4893, "step": 3260 }, { "epoch": 0.07222678990815878, "grad_norm": 1.1838468313217163, "learning_rate": 1.9743658956382875e-05, "loss": 0.3406, "step": 3265 }, { "epoch": 0.07233739754967204, "grad_norm": 0.8888073563575745, "learning_rate": 1.974287662169943e-05, "loss": 0.2657, "step": 3270 }, { "epoch": 0.07244800519118531, "grad_norm": 1.1551666259765625, "learning_rate": 1.9742093110566673e-05, "loss": 0.3214, "step": 3275 }, { "epoch": 0.07255861283269857, "grad_norm": 1.5875320434570312, "learning_rate": 1.974130842307921e-05, "loss": 0.5083, "step": 3280 }, { "epoch": 0.07266922047421183, "grad_norm": 1.3387553691864014, "learning_rate": 1.9740522559331795e-05, "loss": 0.3379, "step": 3285 }, { "epoch": 0.07277982811572509, "grad_norm": 1.3397377729415894, "learning_rate": 1.9739735519419322e-05, "loss": 0.3485, "step": 3290 }, { "epoch": 0.07289043575723834, "grad_norm": 2.1355717182159424, "learning_rate": 1.9738947303436828e-05, "loss": 0.5015, "step": 3295 }, { "epoch": 0.0730010433987516, "grad_norm": 1.2751418352127075, "learning_rate": 1.973815791147948e-05, "loss": 0.377, "step": 3300 }, { "epoch": 0.07311165104026487, "grad_norm": 2.927727460861206, "learning_rate": 1.9737367343642603e-05, "loss": 0.3666, "step": 3305 }, { "epoch": 0.07322225868177813, "grad_norm": 1.1685341596603394, "learning_rate": 1.9736575600021658e-05, "loss": 0.3931, "step": 3310 }, { "epoch": 0.07333286632329139, "grad_norm": 1.1885298490524292, "learning_rate": 1.973578268071225e-05, "loss": 0.3456, "step": 3315 }, { "epoch": 0.07344347396480465, "grad_norm": 1.9908831119537354, "learning_rate": 1.973498858581012e-05, "loss": 0.3678, "step": 3320 }, { "epoch": 0.0735540816063179, "grad_norm": 2.4925599098205566, "learning_rate": 1.9734193315411155e-05, "loss": 0.4816, "step": 3325 }, { "epoch": 0.07366468924783116, "grad_norm": 1.215168833732605, "learning_rate": 1.9733396869611382e-05, "loss": 0.4422, "step": 3330 }, { "epoch": 0.07377529688934442, "grad_norm": 1.0664640665054321, "learning_rate": 1.9732599248506977e-05, "loss": 0.3497, "step": 3335 }, { "epoch": 0.0738859045308577, "grad_norm": 1.0222536325454712, "learning_rate": 1.973180045219425e-05, "loss": 0.3173, "step": 3340 }, { "epoch": 0.07399651217237095, "grad_norm": 2.1025688648223877, "learning_rate": 1.9731000480769654e-05, "loss": 0.4728, "step": 3345 }, { "epoch": 0.07410711981388421, "grad_norm": 1.536736249923706, "learning_rate": 1.9730199334329786e-05, "loss": 0.3333, "step": 3350 }, { "epoch": 0.07421772745539747, "grad_norm": 1.49917733669281, "learning_rate": 1.9729397012971384e-05, "loss": 0.4048, "step": 3355 }, { "epoch": 0.07432833509691072, "grad_norm": 1.8274383544921875, "learning_rate": 1.972859351679133e-05, "loss": 0.3308, "step": 3360 }, { "epoch": 0.07443894273842398, "grad_norm": 1.3568896055221558, "learning_rate": 1.9727788845886646e-05, "loss": 0.2749, "step": 3365 }, { "epoch": 0.07454955037993725, "grad_norm": 1.2323994636535645, "learning_rate": 1.972698300035449e-05, "loss": 0.2982, "step": 3370 }, { "epoch": 0.07466015802145051, "grad_norm": 1.1817598342895508, "learning_rate": 1.9726175980292175e-05, "loss": 0.4573, "step": 3375 }, { "epoch": 0.07477076566296377, "grad_norm": 0.9943642020225525, "learning_rate": 1.972536778579714e-05, "loss": 0.4908, "step": 3380 }, { "epoch": 0.07488137330447703, "grad_norm": 1.1885597705841064, "learning_rate": 1.9724558416966984e-05, "loss": 0.3828, "step": 3385 }, { "epoch": 0.07499198094599029, "grad_norm": 1.2291350364685059, "learning_rate": 1.9723747873899434e-05, "loss": 0.3545, "step": 3390 }, { "epoch": 0.07510258858750354, "grad_norm": 2.339608907699585, "learning_rate": 1.972293615669236e-05, "loss": 0.4451, "step": 3395 }, { "epoch": 0.07521319622901682, "grad_norm": 1.8871612548828125, "learning_rate": 1.972212326544378e-05, "loss": 0.3826, "step": 3400 }, { "epoch": 0.07532380387053007, "grad_norm": 0.9933223128318787, "learning_rate": 1.972130920025185e-05, "loss": 0.3683, "step": 3405 }, { "epoch": 0.07543441151204333, "grad_norm": 1.5405988693237305, "learning_rate": 1.9720493961214865e-05, "loss": 0.4114, "step": 3410 }, { "epoch": 0.07554501915355659, "grad_norm": 1.337378978729248, "learning_rate": 1.971967754843127e-05, "loss": 0.5373, "step": 3415 }, { "epoch": 0.07565562679506985, "grad_norm": 1.0661780834197998, "learning_rate": 1.9718859961999638e-05, "loss": 0.4465, "step": 3420 }, { "epoch": 0.0757662344365831, "grad_norm": 1.078179955482483, "learning_rate": 1.9718041202018705e-05, "loss": 0.4168, "step": 3425 }, { "epoch": 0.07587684207809636, "grad_norm": 1.2196555137634277, "learning_rate": 1.9717221268587326e-05, "loss": 0.3388, "step": 3430 }, { "epoch": 0.07598744971960963, "grad_norm": 1.639794111251831, "learning_rate": 1.9716400161804513e-05, "loss": 0.3225, "step": 3435 }, { "epoch": 0.07609805736112289, "grad_norm": 1.47315514087677, "learning_rate": 1.971557788176941e-05, "loss": 0.2794, "step": 3440 }, { "epoch": 0.07620866500263615, "grad_norm": 1.5209444761276245, "learning_rate": 1.9714754428581308e-05, "loss": 0.2822, "step": 3445 }, { "epoch": 0.07631927264414941, "grad_norm": 1.3897970914840698, "learning_rate": 1.971392980233964e-05, "loss": 0.4727, "step": 3450 }, { "epoch": 0.07642988028566267, "grad_norm": 1.0924663543701172, "learning_rate": 1.971310400314398e-05, "loss": 0.3264, "step": 3455 }, { "epoch": 0.07654048792717592, "grad_norm": 1.4057537317276, "learning_rate": 1.9712277031094046e-05, "loss": 0.2495, "step": 3460 }, { "epoch": 0.0766510955686892, "grad_norm": 1.6868164539337158, "learning_rate": 1.971144888628969e-05, "loss": 0.4588, "step": 3465 }, { "epoch": 0.07676170321020245, "grad_norm": 1.2642247676849365, "learning_rate": 1.9710619568830908e-05, "loss": 0.4628, "step": 3470 }, { "epoch": 0.07687231085171571, "grad_norm": 1.537423014640808, "learning_rate": 1.970978907881784e-05, "loss": 0.4338, "step": 3475 }, { "epoch": 0.07698291849322897, "grad_norm": 1.3159427642822266, "learning_rate": 1.9708957416350778e-05, "loss": 0.3087, "step": 3480 }, { "epoch": 0.07709352613474223, "grad_norm": 1.7150393724441528, "learning_rate": 1.9708124581530135e-05, "loss": 0.495, "step": 3485 }, { "epoch": 0.07720413377625548, "grad_norm": 1.7457314729690552, "learning_rate": 1.9707290574456477e-05, "loss": 0.4556, "step": 3490 }, { "epoch": 0.07731474141776874, "grad_norm": 2.7890079021453857, "learning_rate": 1.9706455395230512e-05, "loss": 0.4676, "step": 3495 }, { "epoch": 0.07742534905928201, "grad_norm": 1.5194826126098633, "learning_rate": 1.9705619043953087e-05, "loss": 0.357, "step": 3500 }, { "epoch": 0.07753595670079527, "grad_norm": 1.6940536499023438, "learning_rate": 1.9704781520725186e-05, "loss": 0.3475, "step": 3505 }, { "epoch": 0.07764656434230853, "grad_norm": 1.5910186767578125, "learning_rate": 1.970394282564795e-05, "loss": 0.3199, "step": 3510 }, { "epoch": 0.07775717198382179, "grad_norm": 1.3418943881988525, "learning_rate": 1.970310295882264e-05, "loss": 0.3884, "step": 3515 }, { "epoch": 0.07786777962533505, "grad_norm": 1.3471966981887817, "learning_rate": 1.9702261920350682e-05, "loss": 0.3694, "step": 3520 }, { "epoch": 0.0779783872668483, "grad_norm": 1.934319019317627, "learning_rate": 1.9701419710333623e-05, "loss": 0.4165, "step": 3525 }, { "epoch": 0.07808899490836158, "grad_norm": 1.2442736625671387, "learning_rate": 1.970057632887316e-05, "loss": 0.3116, "step": 3530 }, { "epoch": 0.07819960254987483, "grad_norm": 1.4284855127334595, "learning_rate": 1.969973177607113e-05, "loss": 0.4863, "step": 3535 }, { "epoch": 0.07831021019138809, "grad_norm": 1.7294188737869263, "learning_rate": 1.9698886052029514e-05, "loss": 0.1956, "step": 3540 }, { "epoch": 0.07842081783290135, "grad_norm": 0.7206429243087769, "learning_rate": 1.9698039156850437e-05, "loss": 0.3757, "step": 3545 }, { "epoch": 0.0785314254744146, "grad_norm": 0.9185183048248291, "learning_rate": 1.9697191090636158e-05, "loss": 0.2894, "step": 3550 }, { "epoch": 0.07864203311592786, "grad_norm": 2.0428104400634766, "learning_rate": 1.969634185348908e-05, "loss": 0.5257, "step": 3555 }, { "epoch": 0.07875264075744112, "grad_norm": 1.334648847579956, "learning_rate": 1.9695491445511744e-05, "loss": 0.4484, "step": 3560 }, { "epoch": 0.0788632483989544, "grad_norm": 1.3266490697860718, "learning_rate": 1.9694639866806846e-05, "loss": 0.369, "step": 3565 }, { "epoch": 0.07897385604046765, "grad_norm": 1.1876170635223389, "learning_rate": 1.9693787117477207e-05, "loss": 0.364, "step": 3570 }, { "epoch": 0.07908446368198091, "grad_norm": 1.3708484172821045, "learning_rate": 1.96929331976258e-05, "loss": 0.3192, "step": 3575 }, { "epoch": 0.07919507132349417, "grad_norm": 1.6894267797470093, "learning_rate": 1.9692078107355735e-05, "loss": 0.5192, "step": 3580 }, { "epoch": 0.07930567896500743, "grad_norm": 1.3401269912719727, "learning_rate": 1.9691221846770258e-05, "loss": 0.4637, "step": 3585 }, { "epoch": 0.07941628660652068, "grad_norm": 1.2902871370315552, "learning_rate": 1.9690364415972773e-05, "loss": 0.3359, "step": 3590 }, { "epoch": 0.07952689424803396, "grad_norm": 1.4295963048934937, "learning_rate": 1.9689505815066804e-05, "loss": 0.2698, "step": 3595 }, { "epoch": 0.07963750188954721, "grad_norm": 1.4061928987503052, "learning_rate": 1.968864604415603e-05, "loss": 0.3423, "step": 3600 }, { "epoch": 0.07974810953106047, "grad_norm": 0.8028144240379333, "learning_rate": 1.968778510334427e-05, "loss": 0.2972, "step": 3605 }, { "epoch": 0.07985871717257373, "grad_norm": 1.4801546335220337, "learning_rate": 1.9686922992735485e-05, "loss": 0.4433, "step": 3610 }, { "epoch": 0.07996932481408699, "grad_norm": 2.528934955596924, "learning_rate": 1.968605971243377e-05, "loss": 0.4588, "step": 3615 }, { "epoch": 0.08007993245560024, "grad_norm": 1.4444230794906616, "learning_rate": 1.9685195262543363e-05, "loss": 0.2785, "step": 3620 }, { "epoch": 0.0801905400971135, "grad_norm": 1.5351407527923584, "learning_rate": 1.9684329643168658e-05, "loss": 0.3641, "step": 3625 }, { "epoch": 0.08030114773862677, "grad_norm": 2.071122884750366, "learning_rate": 1.9683462854414163e-05, "loss": 0.3154, "step": 3630 }, { "epoch": 0.08041175538014003, "grad_norm": 1.4451757669448853, "learning_rate": 1.9682594896384553e-05, "loss": 0.3603, "step": 3635 }, { "epoch": 0.08052236302165329, "grad_norm": 1.7040361166000366, "learning_rate": 1.968172576918463e-05, "loss": 0.5351, "step": 3640 }, { "epoch": 0.08063297066316655, "grad_norm": 1.9786057472229004, "learning_rate": 1.9680855472919342e-05, "loss": 0.2899, "step": 3645 }, { "epoch": 0.0807435783046798, "grad_norm": 1.7509500980377197, "learning_rate": 1.9679984007693774e-05, "loss": 0.3834, "step": 3650 }, { "epoch": 0.08085418594619306, "grad_norm": 1.3812317848205566, "learning_rate": 1.9679111373613158e-05, "loss": 0.2741, "step": 3655 }, { "epoch": 0.08096479358770634, "grad_norm": 1.1472617387771606, "learning_rate": 1.9678237570782864e-05, "loss": 0.4021, "step": 3660 }, { "epoch": 0.08107540122921959, "grad_norm": 1.2279362678527832, "learning_rate": 1.9677362599308406e-05, "loss": 0.4068, "step": 3665 }, { "epoch": 0.08118600887073285, "grad_norm": 1.1142498254776, "learning_rate": 1.967648645929543e-05, "loss": 0.5675, "step": 3670 }, { "epoch": 0.08129661651224611, "grad_norm": 1.5331223011016846, "learning_rate": 1.9675609150849734e-05, "loss": 0.2663, "step": 3675 }, { "epoch": 0.08140722415375937, "grad_norm": 1.142168641090393, "learning_rate": 1.9674730674077254e-05, "loss": 0.3614, "step": 3680 }, { "epoch": 0.08151783179527262, "grad_norm": 1.3425699472427368, "learning_rate": 1.9673851029084063e-05, "loss": 0.3806, "step": 3685 }, { "epoch": 0.0816284394367859, "grad_norm": 1.2615275382995605, "learning_rate": 1.9672970215976374e-05, "loss": 0.3084, "step": 3690 }, { "epoch": 0.08173904707829915, "grad_norm": 1.3936481475830078, "learning_rate": 1.9672088234860557e-05, "loss": 0.4801, "step": 3695 }, { "epoch": 0.08184965471981241, "grad_norm": 1.4427711963653564, "learning_rate": 1.96712050858431e-05, "loss": 0.3938, "step": 3700 }, { "epoch": 0.08196026236132567, "grad_norm": 1.3403998613357544, "learning_rate": 1.9670320769030647e-05, "loss": 0.4486, "step": 3705 }, { "epoch": 0.08207087000283893, "grad_norm": 0.884281575679779, "learning_rate": 1.9669435284529976e-05, "loss": 0.3271, "step": 3710 }, { "epoch": 0.08218147764435219, "grad_norm": 1.0645185708999634, "learning_rate": 1.9668548632448017e-05, "loss": 0.3204, "step": 3715 }, { "epoch": 0.08229208528586544, "grad_norm": 1.47297203540802, "learning_rate": 1.9667660812891826e-05, "loss": 0.2898, "step": 3720 }, { "epoch": 0.08240269292737872, "grad_norm": 1.5096337795257568, "learning_rate": 1.9666771825968603e-05, "loss": 0.3765, "step": 3725 }, { "epoch": 0.08251330056889197, "grad_norm": 2.1601240634918213, "learning_rate": 1.9665881671785702e-05, "loss": 0.3963, "step": 3730 }, { "epoch": 0.08262390821040523, "grad_norm": 1.6269676685333252, "learning_rate": 1.9664990350450607e-05, "loss": 0.352, "step": 3735 }, { "epoch": 0.08273451585191849, "grad_norm": 1.7862251996994019, "learning_rate": 1.966409786207094e-05, "loss": 0.3817, "step": 3740 }, { "epoch": 0.08284512349343175, "grad_norm": 0.9522467255592346, "learning_rate": 1.966320420675447e-05, "loss": 0.4202, "step": 3745 }, { "epoch": 0.082955731134945, "grad_norm": 2.4931411743164062, "learning_rate": 1.9662309384609114e-05, "loss": 0.4853, "step": 3750 }, { "epoch": 0.08306633877645828, "grad_norm": 1.5186878442764282, "learning_rate": 1.9661413395742907e-05, "loss": 0.4868, "step": 3755 }, { "epoch": 0.08317694641797153, "grad_norm": 2.2158775329589844, "learning_rate": 1.9660516240264053e-05, "loss": 0.4887, "step": 3760 }, { "epoch": 0.08328755405948479, "grad_norm": 1.4178979396820068, "learning_rate": 1.9659617918280873e-05, "loss": 0.4821, "step": 3765 }, { "epoch": 0.08339816170099805, "grad_norm": 1.5021400451660156, "learning_rate": 1.9658718429901843e-05, "loss": 0.3488, "step": 3770 }, { "epoch": 0.08350876934251131, "grad_norm": 1.5557526350021362, "learning_rate": 1.965781777523558e-05, "loss": 0.313, "step": 3775 }, { "epoch": 0.08361937698402457, "grad_norm": 1.8280081748962402, "learning_rate": 1.9656915954390832e-05, "loss": 0.4423, "step": 3780 }, { "epoch": 0.08372998462553782, "grad_norm": 1.4816877841949463, "learning_rate": 1.9656012967476498e-05, "loss": 0.3669, "step": 3785 }, { "epoch": 0.0838405922670511, "grad_norm": 1.860620141029358, "learning_rate": 1.9655108814601607e-05, "loss": 0.5233, "step": 3790 }, { "epoch": 0.08395119990856435, "grad_norm": 1.3878202438354492, "learning_rate": 1.965420349587534e-05, "loss": 0.5487, "step": 3795 }, { "epoch": 0.08406180755007761, "grad_norm": 1.2532196044921875, "learning_rate": 1.965329701140702e-05, "loss": 0.3159, "step": 3800 }, { "epoch": 0.08417241519159087, "grad_norm": 1.2725090980529785, "learning_rate": 1.965238936130609e-05, "loss": 0.3421, "step": 3805 }, { "epoch": 0.08428302283310413, "grad_norm": 2.2078006267547607, "learning_rate": 1.965148054568216e-05, "loss": 0.3394, "step": 3810 }, { "epoch": 0.08439363047461738, "grad_norm": 0.9230765104293823, "learning_rate": 1.9650570564644962e-05, "loss": 0.2869, "step": 3815 }, { "epoch": 0.08450423811613066, "grad_norm": 1.226900577545166, "learning_rate": 1.9649659418304382e-05, "loss": 0.3485, "step": 3820 }, { "epoch": 0.08461484575764391, "grad_norm": 2.2408740520477295, "learning_rate": 1.964874710677044e-05, "loss": 0.4928, "step": 3825 }, { "epoch": 0.08472545339915717, "grad_norm": 1.6669727563858032, "learning_rate": 1.9647833630153294e-05, "loss": 0.6281, "step": 3830 }, { "epoch": 0.08483606104067043, "grad_norm": 1.6923081874847412, "learning_rate": 1.9646918988563248e-05, "loss": 0.4006, "step": 3835 }, { "epoch": 0.08494666868218369, "grad_norm": 1.7452138662338257, "learning_rate": 1.964600318211074e-05, "loss": 0.489, "step": 3840 }, { "epoch": 0.08505727632369695, "grad_norm": 1.8274058103561401, "learning_rate": 1.9645086210906363e-05, "loss": 0.4904, "step": 3845 }, { "epoch": 0.0851678839652102, "grad_norm": 1.1492518186569214, "learning_rate": 1.9644168075060834e-05, "loss": 0.2868, "step": 3850 }, { "epoch": 0.08527849160672347, "grad_norm": 0.9597756862640381, "learning_rate": 1.964324877468502e-05, "loss": 0.3886, "step": 3855 }, { "epoch": 0.08538909924823673, "grad_norm": 1.7558127641677856, "learning_rate": 1.964232830988992e-05, "loss": 0.3915, "step": 3860 }, { "epoch": 0.08549970688974999, "grad_norm": 1.623683214187622, "learning_rate": 1.964140668078669e-05, "loss": 0.3586, "step": 3865 }, { "epoch": 0.08561031453126325, "grad_norm": 1.1671375036239624, "learning_rate": 1.9640483887486614e-05, "loss": 0.3411, "step": 3870 }, { "epoch": 0.0857209221727765, "grad_norm": 1.5694273710250854, "learning_rate": 1.963955993010111e-05, "loss": 0.5301, "step": 3875 }, { "epoch": 0.08583152981428976, "grad_norm": 1.726396918296814, "learning_rate": 1.9638634808741757e-05, "loss": 0.5336, "step": 3880 }, { "epoch": 0.08594213745580304, "grad_norm": 1.731339454650879, "learning_rate": 1.963770852352026e-05, "loss": 0.454, "step": 3885 }, { "epoch": 0.0860527450973163, "grad_norm": 3.019259214401245, "learning_rate": 1.963678107454846e-05, "loss": 0.4708, "step": 3890 }, { "epoch": 0.08616335273882955, "grad_norm": 1.7385001182556152, "learning_rate": 1.9635852461938354e-05, "loss": 0.4186, "step": 3895 }, { "epoch": 0.08627396038034281, "grad_norm": 1.828263759613037, "learning_rate": 1.963492268580207e-05, "loss": 0.3582, "step": 3900 }, { "epoch": 0.08638456802185607, "grad_norm": 1.3344987630844116, "learning_rate": 1.963399174625188e-05, "loss": 0.4162, "step": 3905 }, { "epoch": 0.08649517566336933, "grad_norm": 1.497778058052063, "learning_rate": 1.963305964340019e-05, "loss": 0.3425, "step": 3910 }, { "epoch": 0.08660578330488258, "grad_norm": 1.5689443349838257, "learning_rate": 1.9632126377359557e-05, "loss": 0.3658, "step": 3915 }, { "epoch": 0.08671639094639585, "grad_norm": 1.269541621208191, "learning_rate": 1.963119194824267e-05, "loss": 0.3347, "step": 3920 }, { "epoch": 0.08682699858790911, "grad_norm": 1.4487590789794922, "learning_rate": 1.963025635616236e-05, "loss": 0.3694, "step": 3925 }, { "epoch": 0.08693760622942237, "grad_norm": 1.1329331398010254, "learning_rate": 1.9629319601231596e-05, "loss": 0.3713, "step": 3930 }, { "epoch": 0.08704821387093563, "grad_norm": 1.836942195892334, "learning_rate": 1.9628381683563494e-05, "loss": 0.4512, "step": 3935 }, { "epoch": 0.08715882151244889, "grad_norm": 1.7154439687728882, "learning_rate": 1.962744260327131e-05, "loss": 0.3581, "step": 3940 }, { "epoch": 0.08726942915396214, "grad_norm": 1.5830281972885132, "learning_rate": 1.962650236046844e-05, "loss": 0.3789, "step": 3945 }, { "epoch": 0.08738003679547542, "grad_norm": 3.0967328548431396, "learning_rate": 1.962556095526841e-05, "loss": 0.4064, "step": 3950 }, { "epoch": 0.08749064443698867, "grad_norm": 1.327884316444397, "learning_rate": 1.9624618387784898e-05, "loss": 0.3633, "step": 3955 }, { "epoch": 0.08760125207850193, "grad_norm": 1.1729737520217896, "learning_rate": 1.9623674658131717e-05, "loss": 0.2086, "step": 3960 }, { "epoch": 0.08771185972001519, "grad_norm": 1.4069775342941284, "learning_rate": 1.9622729766422824e-05, "loss": 0.3309, "step": 3965 }, { "epoch": 0.08782246736152845, "grad_norm": 1.625845193862915, "learning_rate": 1.962178371277232e-05, "loss": 0.4272, "step": 3970 }, { "epoch": 0.0879330750030417, "grad_norm": 1.9637670516967773, "learning_rate": 1.9620836497294428e-05, "loss": 0.5227, "step": 3975 }, { "epoch": 0.08804368264455498, "grad_norm": 1.588945984840393, "learning_rate": 1.9619888120103535e-05, "loss": 0.3073, "step": 3980 }, { "epoch": 0.08815429028606823, "grad_norm": 1.7461386919021606, "learning_rate": 1.961893858131415e-05, "loss": 0.561, "step": 3985 }, { "epoch": 0.08826489792758149, "grad_norm": 1.068893313407898, "learning_rate": 1.9617987881040936e-05, "loss": 0.3624, "step": 3990 }, { "epoch": 0.08837550556909475, "grad_norm": 0.9730588793754578, "learning_rate": 1.9617036019398685e-05, "loss": 0.3707, "step": 3995 }, { "epoch": 0.08848611321060801, "grad_norm": 1.8597757816314697, "learning_rate": 1.9616082996502334e-05, "loss": 0.4419, "step": 4000 }, { "epoch": 0.08859672085212127, "grad_norm": 1.3437812328338623, "learning_rate": 1.961512881246696e-05, "loss": 0.5466, "step": 4005 }, { "epoch": 0.08870732849363452, "grad_norm": 1.3370863199234009, "learning_rate": 1.961417346740779e-05, "loss": 0.4153, "step": 4010 }, { "epoch": 0.0888179361351478, "grad_norm": 1.5355076789855957, "learning_rate": 1.9613216961440167e-05, "loss": 0.4273, "step": 4015 }, { "epoch": 0.08892854377666105, "grad_norm": 1.3989381790161133, "learning_rate": 1.9612259294679598e-05, "loss": 0.5074, "step": 4020 }, { "epoch": 0.08903915141817431, "grad_norm": 1.544713020324707, "learning_rate": 1.9611300467241717e-05, "loss": 0.3912, "step": 4025 }, { "epoch": 0.08914975905968757, "grad_norm": 1.2308688163757324, "learning_rate": 1.9610340479242304e-05, "loss": 0.4737, "step": 4030 }, { "epoch": 0.08926036670120083, "grad_norm": 5.219364166259766, "learning_rate": 1.9609379330797274e-05, "loss": 0.3395, "step": 4035 }, { "epoch": 0.08937097434271409, "grad_norm": 1.7565997838974, "learning_rate": 1.960841702202269e-05, "loss": 0.4756, "step": 4040 }, { "epoch": 0.08948158198422736, "grad_norm": 1.4183964729309082, "learning_rate": 1.960745355303475e-05, "loss": 0.3605, "step": 4045 }, { "epoch": 0.08959218962574061, "grad_norm": 1.4195950031280518, "learning_rate": 1.960648892394979e-05, "loss": 0.3691, "step": 4050 }, { "epoch": 0.08970279726725387, "grad_norm": 1.6318808794021606, "learning_rate": 1.960552313488429e-05, "loss": 0.2464, "step": 4055 }, { "epoch": 0.08981340490876713, "grad_norm": 1.0707823038101196, "learning_rate": 1.960455618595487e-05, "loss": 0.4396, "step": 4060 }, { "epoch": 0.08992401255028039, "grad_norm": 1.1653443574905396, "learning_rate": 1.9603588077278286e-05, "loss": 0.3437, "step": 4065 }, { "epoch": 0.09003462019179365, "grad_norm": 1.4535192251205444, "learning_rate": 1.9602618808971437e-05, "loss": 0.3719, "step": 4070 }, { "epoch": 0.0901452278333069, "grad_norm": 1.7346543073654175, "learning_rate": 1.960164838115137e-05, "loss": 0.5425, "step": 4075 }, { "epoch": 0.09025583547482018, "grad_norm": 0.8611789345741272, "learning_rate": 1.9600676793935248e-05, "loss": 0.328, "step": 4080 }, { "epoch": 0.09036644311633343, "grad_norm": 1.2692785263061523, "learning_rate": 1.9599704047440403e-05, "loss": 0.4721, "step": 4085 }, { "epoch": 0.09047705075784669, "grad_norm": 1.2276475429534912, "learning_rate": 1.959873014178429e-05, "loss": 0.4338, "step": 4090 }, { "epoch": 0.09058765839935995, "grad_norm": 1.8148858547210693, "learning_rate": 1.9597755077084505e-05, "loss": 0.5559, "step": 4095 }, { "epoch": 0.09069826604087321, "grad_norm": 1.3467168807983398, "learning_rate": 1.959677885345879e-05, "loss": 0.4652, "step": 4100 }, { "epoch": 0.09080887368238647, "grad_norm": 1.4597879648208618, "learning_rate": 1.9595801471025026e-05, "loss": 0.5865, "step": 4105 }, { "epoch": 0.09091948132389974, "grad_norm": 1.418942928314209, "learning_rate": 1.9594822929901224e-05, "loss": 0.3972, "step": 4110 }, { "epoch": 0.091030088965413, "grad_norm": 1.3826271295547485, "learning_rate": 1.959384323020555e-05, "loss": 0.4186, "step": 4115 }, { "epoch": 0.09114069660692625, "grad_norm": 1.1572391986846924, "learning_rate": 1.9592862372056298e-05, "loss": 0.4333, "step": 4120 }, { "epoch": 0.09125130424843951, "grad_norm": 1.6297577619552612, "learning_rate": 1.959188035557191e-05, "loss": 0.4853, "step": 4125 }, { "epoch": 0.09136191188995277, "grad_norm": 1.5819309949874878, "learning_rate": 1.9590897180870958e-05, "loss": 0.4585, "step": 4130 }, { "epoch": 0.09147251953146603, "grad_norm": 1.0589486360549927, "learning_rate": 1.958991284807217e-05, "loss": 0.5079, "step": 4135 }, { "epoch": 0.09158312717297928, "grad_norm": 1.2278800010681152, "learning_rate": 1.958892735729439e-05, "loss": 0.4941, "step": 4140 }, { "epoch": 0.09169373481449256, "grad_norm": 1.8148773908615112, "learning_rate": 1.958794070865663e-05, "loss": 0.2907, "step": 4145 }, { "epoch": 0.09180434245600581, "grad_norm": 1.4559823274612427, "learning_rate": 1.9586952902278014e-05, "loss": 0.2946, "step": 4150 }, { "epoch": 0.09191495009751907, "grad_norm": 2.2935256958007812, "learning_rate": 1.9585963938277834e-05, "loss": 0.3217, "step": 4155 }, { "epoch": 0.09202555773903233, "grad_norm": 1.4520325660705566, "learning_rate": 1.9584973816775496e-05, "loss": 0.4201, "step": 4160 }, { "epoch": 0.09213616538054559, "grad_norm": 1.8532183170318604, "learning_rate": 1.958398253789056e-05, "loss": 0.4215, "step": 4165 }, { "epoch": 0.09224677302205884, "grad_norm": 1.3256938457489014, "learning_rate": 1.9582990101742726e-05, "loss": 0.3567, "step": 4170 }, { "epoch": 0.09235738066357212, "grad_norm": 2.0483310222625732, "learning_rate": 1.9581996508451824e-05, "loss": 0.4832, "step": 4175 }, { "epoch": 0.09246798830508537, "grad_norm": 2.3817458152770996, "learning_rate": 1.9581001758137833e-05, "loss": 0.4337, "step": 4180 }, { "epoch": 0.09257859594659863, "grad_norm": 1.0804271697998047, "learning_rate": 1.9580005850920872e-05, "loss": 0.3878, "step": 4185 }, { "epoch": 0.09268920358811189, "grad_norm": 1.2211874723434448, "learning_rate": 1.9579008786921193e-05, "loss": 0.3749, "step": 4190 }, { "epoch": 0.09279981122962515, "grad_norm": 1.8516230583190918, "learning_rate": 1.9578010566259193e-05, "loss": 0.4093, "step": 4195 }, { "epoch": 0.0929104188711384, "grad_norm": 1.8642029762268066, "learning_rate": 1.95770111890554e-05, "loss": 0.4928, "step": 4200 }, { "epoch": 0.09302102651265166, "grad_norm": 1.4502556324005127, "learning_rate": 1.95760106554305e-05, "loss": 0.4788, "step": 4205 }, { "epoch": 0.09313163415416494, "grad_norm": 2.124992609024048, "learning_rate": 1.95750089655053e-05, "loss": 0.5324, "step": 4210 }, { "epoch": 0.0932422417956782, "grad_norm": 1.2536089420318604, "learning_rate": 1.9574006119400755e-05, "loss": 0.3951, "step": 4215 }, { "epoch": 0.09335284943719145, "grad_norm": 1.4176779985427856, "learning_rate": 1.9573002117237955e-05, "loss": 0.3865, "step": 4220 }, { "epoch": 0.09346345707870471, "grad_norm": 1.5544732809066772, "learning_rate": 1.957199695913814e-05, "loss": 0.4119, "step": 4225 }, { "epoch": 0.09357406472021797, "grad_norm": 1.4633272886276245, "learning_rate": 1.9570990645222678e-05, "loss": 0.4912, "step": 4230 }, { "epoch": 0.09368467236173122, "grad_norm": 1.3845857381820679, "learning_rate": 1.9569983175613082e-05, "loss": 0.2745, "step": 4235 }, { "epoch": 0.0937952800032445, "grad_norm": 1.7605035305023193, "learning_rate": 1.9568974550431002e-05, "loss": 0.3388, "step": 4240 }, { "epoch": 0.09390588764475775, "grad_norm": 2.136582374572754, "learning_rate": 1.9567964769798235e-05, "loss": 0.3826, "step": 4245 }, { "epoch": 0.09401649528627101, "grad_norm": 1.7197285890579224, "learning_rate": 1.9566953833836705e-05, "loss": 0.3757, "step": 4250 }, { "epoch": 0.09412710292778427, "grad_norm": 4.161820888519287, "learning_rate": 1.956594174266848e-05, "loss": 0.4023, "step": 4255 }, { "epoch": 0.09423771056929753, "grad_norm": 1.0170098543167114, "learning_rate": 1.956492849641578e-05, "loss": 0.3384, "step": 4260 }, { "epoch": 0.09434831821081079, "grad_norm": 1.2917170524597168, "learning_rate": 1.956391409520095e-05, "loss": 0.3737, "step": 4265 }, { "epoch": 0.09445892585232406, "grad_norm": 1.2208441495895386, "learning_rate": 1.9562898539146476e-05, "loss": 0.4012, "step": 4270 }, { "epoch": 0.09456953349383732, "grad_norm": 1.7635269165039062, "learning_rate": 1.956188182837498e-05, "loss": 0.4176, "step": 4275 }, { "epoch": 0.09468014113535057, "grad_norm": 1.5950474739074707, "learning_rate": 1.9560863963009247e-05, "loss": 0.4887, "step": 4280 }, { "epoch": 0.09479074877686383, "grad_norm": 1.4680172204971313, "learning_rate": 1.955984494317217e-05, "loss": 0.3956, "step": 4285 }, { "epoch": 0.09490135641837709, "grad_norm": 2.4385735988616943, "learning_rate": 1.95588247689868e-05, "loss": 0.3269, "step": 4290 }, { "epoch": 0.09501196405989035, "grad_norm": 1.7465449571609497, "learning_rate": 1.9557803440576316e-05, "loss": 0.4799, "step": 4295 }, { "epoch": 0.0951225717014036, "grad_norm": 1.4933891296386719, "learning_rate": 1.9556780958064053e-05, "loss": 0.5467, "step": 4300 }, { "epoch": 0.09523317934291688, "grad_norm": 1.4667818546295166, "learning_rate": 1.955575732157347e-05, "loss": 0.317, "step": 4305 }, { "epoch": 0.09534378698443013, "grad_norm": 0.8181486129760742, "learning_rate": 1.9554732531228177e-05, "loss": 0.3886, "step": 4310 }, { "epoch": 0.09545439462594339, "grad_norm": 1.6208062171936035, "learning_rate": 1.9553706587151904e-05, "loss": 0.4225, "step": 4315 }, { "epoch": 0.09556500226745665, "grad_norm": 1.7065292596817017, "learning_rate": 1.9552679489468545e-05, "loss": 0.4244, "step": 4320 }, { "epoch": 0.09567560990896991, "grad_norm": 1.34455406665802, "learning_rate": 1.9551651238302122e-05, "loss": 0.35, "step": 4325 }, { "epoch": 0.09578621755048317, "grad_norm": 1.272398591041565, "learning_rate": 1.955062183377679e-05, "loss": 0.5036, "step": 4330 }, { "epoch": 0.09589682519199644, "grad_norm": 1.0005327463150024, "learning_rate": 1.9549591276016852e-05, "loss": 0.3973, "step": 4335 }, { "epoch": 0.0960074328335097, "grad_norm": 1.161360263824463, "learning_rate": 1.9548559565146744e-05, "loss": 0.399, "step": 4340 }, { "epoch": 0.09611804047502295, "grad_norm": 1.1983336210250854, "learning_rate": 1.954752670129105e-05, "loss": 0.3525, "step": 4345 }, { "epoch": 0.09622864811653621, "grad_norm": 1.3194074630737305, "learning_rate": 1.9546492684574482e-05, "loss": 0.5158, "step": 4350 }, { "epoch": 0.09633925575804947, "grad_norm": 1.634223461151123, "learning_rate": 1.9545457515121908e-05, "loss": 0.481, "step": 4355 }, { "epoch": 0.09644986339956273, "grad_norm": 1.572007656097412, "learning_rate": 1.9544421193058314e-05, "loss": 0.5078, "step": 4360 }, { "epoch": 0.09656047104107598, "grad_norm": 1.3350406885147095, "learning_rate": 1.954338371850884e-05, "loss": 0.5197, "step": 4365 }, { "epoch": 0.09667107868258926, "grad_norm": 1.392389178276062, "learning_rate": 1.9542345091598757e-05, "loss": 0.4393, "step": 4370 }, { "epoch": 0.09678168632410251, "grad_norm": 1.234641671180725, "learning_rate": 1.9541305312453486e-05, "loss": 0.4572, "step": 4375 }, { "epoch": 0.09689229396561577, "grad_norm": 1.68038809299469, "learning_rate": 1.9540264381198574e-05, "loss": 0.3226, "step": 4380 }, { "epoch": 0.09700290160712903, "grad_norm": 1.1811834573745728, "learning_rate": 1.9539222297959714e-05, "loss": 0.4573, "step": 4385 }, { "epoch": 0.09711350924864229, "grad_norm": 1.533383846282959, "learning_rate": 1.9538179062862738e-05, "loss": 0.2887, "step": 4390 }, { "epoch": 0.09722411689015555, "grad_norm": 1.4122403860092163, "learning_rate": 1.9537134676033613e-05, "loss": 0.4157, "step": 4395 }, { "epoch": 0.09733472453166882, "grad_norm": 2.2901430130004883, "learning_rate": 1.9536089137598457e-05, "loss": 0.3989, "step": 4400 }, { "epoch": 0.09744533217318208, "grad_norm": 1.122094988822937, "learning_rate": 1.953504244768351e-05, "loss": 0.286, "step": 4405 }, { "epoch": 0.09755593981469533, "grad_norm": 0.8675702214241028, "learning_rate": 1.9533994606415163e-05, "loss": 0.4208, "step": 4410 }, { "epoch": 0.09766654745620859, "grad_norm": 1.2922241687774658, "learning_rate": 1.9532945613919943e-05, "loss": 0.2907, "step": 4415 }, { "epoch": 0.09777715509772185, "grad_norm": 1.3707270622253418, "learning_rate": 1.9531895470324515e-05, "loss": 0.3253, "step": 4420 }, { "epoch": 0.0978877627392351, "grad_norm": 2.0705864429473877, "learning_rate": 1.953084417575568e-05, "loss": 0.446, "step": 4425 }, { "epoch": 0.09799837038074836, "grad_norm": 1.0638395547866821, "learning_rate": 1.952979173034039e-05, "loss": 0.4582, "step": 4430 }, { "epoch": 0.09810897802226164, "grad_norm": 1.1832499504089355, "learning_rate": 1.952873813420572e-05, "loss": 0.4339, "step": 4435 }, { "epoch": 0.0982195856637749, "grad_norm": 1.4301100969314575, "learning_rate": 1.952768338747889e-05, "loss": 0.3214, "step": 4440 }, { "epoch": 0.09833019330528815, "grad_norm": 1.7577379941940308, "learning_rate": 1.9526627490287265e-05, "loss": 0.4108, "step": 4445 }, { "epoch": 0.09844080094680141, "grad_norm": 1.0190526247024536, "learning_rate": 1.9525570442758348e-05, "loss": 0.2667, "step": 4450 }, { "epoch": 0.09855140858831467, "grad_norm": 1.8615275621414185, "learning_rate": 1.9524512245019766e-05, "loss": 0.3338, "step": 4455 }, { "epoch": 0.09866201622982793, "grad_norm": 1.7499860525131226, "learning_rate": 1.9523452897199306e-05, "loss": 0.3407, "step": 4460 }, { "epoch": 0.0987726238713412, "grad_norm": 1.3750590085983276, "learning_rate": 1.9522392399424877e-05, "loss": 0.4499, "step": 4465 }, { "epoch": 0.09888323151285446, "grad_norm": 1.4766864776611328, "learning_rate": 1.952133075182454e-05, "loss": 0.3496, "step": 4470 }, { "epoch": 0.09899383915436771, "grad_norm": 1.9199429750442505, "learning_rate": 1.9520267954526487e-05, "loss": 0.4197, "step": 4475 }, { "epoch": 0.09910444679588097, "grad_norm": 1.033484935760498, "learning_rate": 1.9519204007659043e-05, "loss": 0.3317, "step": 4480 }, { "epoch": 0.09921505443739423, "grad_norm": 1.43253493309021, "learning_rate": 1.9518138911350693e-05, "loss": 0.3326, "step": 4485 }, { "epoch": 0.09932566207890749, "grad_norm": 1.004762887954712, "learning_rate": 1.9517072665730034e-05, "loss": 0.2728, "step": 4490 }, { "epoch": 0.09943626972042074, "grad_norm": 1.5712361335754395, "learning_rate": 1.951600527092582e-05, "loss": 0.4119, "step": 4495 }, { "epoch": 0.09954687736193402, "grad_norm": 1.5485153198242188, "learning_rate": 1.9514936727066943e-05, "loss": 0.3805, "step": 4500 }, { "epoch": 0.09965748500344727, "grad_norm": 1.6068989038467407, "learning_rate": 1.9513867034282425e-05, "loss": 0.4252, "step": 4505 }, { "epoch": 0.09976809264496053, "grad_norm": 1.6042925119400024, "learning_rate": 1.9512796192701425e-05, "loss": 0.4205, "step": 4510 }, { "epoch": 0.09987870028647379, "grad_norm": 1.5272160768508911, "learning_rate": 1.951172420245326e-05, "loss": 0.4486, "step": 4515 }, { "epoch": 0.09998930792798705, "grad_norm": 1.520217776298523, "learning_rate": 1.951065106366736e-05, "loss": 0.3311, "step": 4520 }, { "epoch": 0.1000999155695003, "grad_norm": 1.381764531135559, "learning_rate": 1.9509576776473312e-05, "loss": 0.5783, "step": 4525 }, { "epoch": 0.10021052321101358, "grad_norm": 1.559191346168518, "learning_rate": 1.9508501341000835e-05, "loss": 0.4599, "step": 4530 }, { "epoch": 0.10032113085252684, "grad_norm": 1.1431931257247925, "learning_rate": 1.9507424757379793e-05, "loss": 0.3249, "step": 4535 }, { "epoch": 0.1004317384940401, "grad_norm": 1.688443899154663, "learning_rate": 1.9506347025740174e-05, "loss": 0.4655, "step": 4540 }, { "epoch": 0.10054234613555335, "grad_norm": 1.7571477890014648, "learning_rate": 1.950526814621212e-05, "loss": 0.3341, "step": 4545 }, { "epoch": 0.10065295377706661, "grad_norm": 1.411932349205017, "learning_rate": 1.9504188118925902e-05, "loss": 0.4698, "step": 4550 }, { "epoch": 0.10076356141857987, "grad_norm": 1.624600887298584, "learning_rate": 1.9503106944011934e-05, "loss": 0.3406, "step": 4555 }, { "epoch": 0.10087416906009314, "grad_norm": 1.201497197151184, "learning_rate": 1.9502024621600766e-05, "loss": 0.4984, "step": 4560 }, { "epoch": 0.1009847767016064, "grad_norm": 1.804135799407959, "learning_rate": 1.950094115182309e-05, "loss": 0.4391, "step": 4565 }, { "epoch": 0.10109538434311965, "grad_norm": 1.5037630796432495, "learning_rate": 1.9499856534809738e-05, "loss": 0.3187, "step": 4570 }, { "epoch": 0.10120599198463291, "grad_norm": 1.8742636442184448, "learning_rate": 1.9498770770691672e-05, "loss": 0.5733, "step": 4575 }, { "epoch": 0.10131659962614617, "grad_norm": 1.8397403955459595, "learning_rate": 1.9497683859599997e-05, "loss": 0.3862, "step": 4580 }, { "epoch": 0.10142720726765943, "grad_norm": 1.6288434267044067, "learning_rate": 1.9496595801665963e-05, "loss": 0.4091, "step": 4585 }, { "epoch": 0.10153781490917269, "grad_norm": 2.073798656463623, "learning_rate": 1.9495506597020947e-05, "loss": 0.3005, "step": 4590 }, { "epoch": 0.10164842255068596, "grad_norm": 1.5576467514038086, "learning_rate": 1.9494416245796474e-05, "loss": 0.304, "step": 4595 }, { "epoch": 0.10175903019219922, "grad_norm": 1.5484106540679932, "learning_rate": 1.94933247481242e-05, "loss": 0.3503, "step": 4600 }, { "epoch": 0.10186963783371247, "grad_norm": 1.0045183897018433, "learning_rate": 1.9492232104135925e-05, "loss": 0.3221, "step": 4605 }, { "epoch": 0.10198024547522573, "grad_norm": 1.3011510372161865, "learning_rate": 1.9491138313963588e-05, "loss": 0.3331, "step": 4610 }, { "epoch": 0.10209085311673899, "grad_norm": 1.5429834127426147, "learning_rate": 1.9490043377739258e-05, "loss": 0.2311, "step": 4615 }, { "epoch": 0.10220146075825225, "grad_norm": 1.891099452972412, "learning_rate": 1.948894729559515e-05, "loss": 0.3791, "step": 4620 }, { "epoch": 0.10231206839976552, "grad_norm": 1.2466233968734741, "learning_rate": 1.9487850067663618e-05, "loss": 0.2346, "step": 4625 }, { "epoch": 0.10242267604127878, "grad_norm": 2.1488869190216064, "learning_rate": 1.9486751694077154e-05, "loss": 0.444, "step": 4630 }, { "epoch": 0.10253328368279203, "grad_norm": 1.5118799209594727, "learning_rate": 1.9485652174968378e-05, "loss": 0.3153, "step": 4635 }, { "epoch": 0.10264389132430529, "grad_norm": 1.5905600786209106, "learning_rate": 1.9484551510470066e-05, "loss": 0.4169, "step": 4640 }, { "epoch": 0.10275449896581855, "grad_norm": 1.3289669752120972, "learning_rate": 1.9483449700715115e-05, "loss": 0.2562, "step": 4645 }, { "epoch": 0.10286510660733181, "grad_norm": 1.6676779985427856, "learning_rate": 1.9482346745836573e-05, "loss": 0.3906, "step": 4650 }, { "epoch": 0.10297571424884507, "grad_norm": 1.0479322671890259, "learning_rate": 1.948124264596762e-05, "loss": 0.328, "step": 4655 }, { "epoch": 0.10308632189035834, "grad_norm": 1.6068211793899536, "learning_rate": 1.9480137401241575e-05, "loss": 0.2594, "step": 4660 }, { "epoch": 0.1031969295318716, "grad_norm": 1.732882022857666, "learning_rate": 1.94790310117919e-05, "loss": 0.3454, "step": 4665 }, { "epoch": 0.10330753717338485, "grad_norm": 1.1928975582122803, "learning_rate": 1.9477923477752188e-05, "loss": 0.3488, "step": 4670 }, { "epoch": 0.10341814481489811, "grad_norm": 0.9790850281715393, "learning_rate": 1.947681479925617e-05, "loss": 0.4723, "step": 4675 }, { "epoch": 0.10352875245641137, "grad_norm": 2.3305585384368896, "learning_rate": 1.9475704976437723e-05, "loss": 0.3605, "step": 4680 }, { "epoch": 0.10363936009792463, "grad_norm": 1.0146028995513916, "learning_rate": 1.947459400943086e-05, "loss": 0.3738, "step": 4685 }, { "epoch": 0.1037499677394379, "grad_norm": 2.116072177886963, "learning_rate": 1.947348189836972e-05, "loss": 0.3827, "step": 4690 }, { "epoch": 0.10386057538095116, "grad_norm": 1.5193942785263062, "learning_rate": 1.94723686433886e-05, "loss": 0.4372, "step": 4695 }, { "epoch": 0.10397118302246441, "grad_norm": 1.2144771814346313, "learning_rate": 1.9471254244621924e-05, "loss": 0.5349, "step": 4700 }, { "epoch": 0.10408179066397767, "grad_norm": 1.555141806602478, "learning_rate": 1.9470138702204255e-05, "loss": 0.4632, "step": 4705 }, { "epoch": 0.10419239830549093, "grad_norm": 1.3780333995819092, "learning_rate": 1.9469022016270286e-05, "loss": 0.346, "step": 4710 }, { "epoch": 0.10430300594700419, "grad_norm": 1.6435017585754395, "learning_rate": 1.9467904186954864e-05, "loss": 0.3663, "step": 4715 }, { "epoch": 0.10441361358851745, "grad_norm": 1.1211321353912354, "learning_rate": 1.946678521439297e-05, "loss": 0.3467, "step": 4720 }, { "epoch": 0.10452422123003072, "grad_norm": 1.734589695930481, "learning_rate": 1.946566509871971e-05, "loss": 0.354, "step": 4725 }, { "epoch": 0.10463482887154398, "grad_norm": 1.9510903358459473, "learning_rate": 1.946454384007035e-05, "loss": 0.2997, "step": 4730 }, { "epoch": 0.10474543651305723, "grad_norm": 1.3568185567855835, "learning_rate": 1.946342143858027e-05, "loss": 0.5322, "step": 4735 }, { "epoch": 0.10485604415457049, "grad_norm": 1.0281001329421997, "learning_rate": 1.9462297894385003e-05, "loss": 0.4705, "step": 4740 }, { "epoch": 0.10496665179608375, "grad_norm": 1.7972654104232788, "learning_rate": 1.9461173207620222e-05, "loss": 0.5659, "step": 4745 }, { "epoch": 0.105077259437597, "grad_norm": 1.6201773881912231, "learning_rate": 1.9460047378421722e-05, "loss": 0.3016, "step": 4750 }, { "epoch": 0.10518786707911028, "grad_norm": 1.1920804977416992, "learning_rate": 1.945892040692546e-05, "loss": 0.3549, "step": 4755 }, { "epoch": 0.10529847472062354, "grad_norm": 1.6716328859329224, "learning_rate": 1.9457792293267506e-05, "loss": 0.2964, "step": 4760 }, { "epoch": 0.1054090823621368, "grad_norm": 1.9847564697265625, "learning_rate": 1.9456663037584083e-05, "loss": 0.3065, "step": 4765 }, { "epoch": 0.10551969000365005, "grad_norm": 1.727195143699646, "learning_rate": 1.945553264001155e-05, "loss": 0.4814, "step": 4770 }, { "epoch": 0.10563029764516331, "grad_norm": 1.255409598350525, "learning_rate": 1.94544011006864e-05, "loss": 0.3834, "step": 4775 }, { "epoch": 0.10574090528667657, "grad_norm": 1.4639195203781128, "learning_rate": 1.9453268419745268e-05, "loss": 0.5275, "step": 4780 }, { "epoch": 0.10585151292818983, "grad_norm": 1.5494208335876465, "learning_rate": 1.9452134597324927e-05, "loss": 0.4276, "step": 4785 }, { "epoch": 0.1059621205697031, "grad_norm": 1.172483205795288, "learning_rate": 1.945099963356228e-05, "loss": 0.1982, "step": 4790 }, { "epoch": 0.10607272821121635, "grad_norm": 1.7666354179382324, "learning_rate": 1.9449863528594374e-05, "loss": 0.4371, "step": 4795 }, { "epoch": 0.10618333585272961, "grad_norm": 1.55924391746521, "learning_rate": 1.9448726282558397e-05, "loss": 0.4463, "step": 4800 }, { "epoch": 0.10629394349424287, "grad_norm": 1.174971342086792, "learning_rate": 1.9447587895591673e-05, "loss": 0.3402, "step": 4805 }, { "epoch": 0.10640455113575613, "grad_norm": 1.9935580492019653, "learning_rate": 1.9446448367831656e-05, "loss": 0.5451, "step": 4810 }, { "epoch": 0.10651515877726939, "grad_norm": 1.9697192907333374, "learning_rate": 1.9445307699415946e-05, "loss": 0.3587, "step": 4815 }, { "epoch": 0.10662576641878266, "grad_norm": 1.7704665660858154, "learning_rate": 1.9444165890482275e-05, "loss": 0.4871, "step": 4820 }, { "epoch": 0.10673637406029592, "grad_norm": 1.8390065431594849, "learning_rate": 1.9443022941168523e-05, "loss": 0.4628, "step": 4825 }, { "epoch": 0.10684698170180917, "grad_norm": 1.2604849338531494, "learning_rate": 1.9441878851612696e-05, "loss": 0.4937, "step": 4830 }, { "epoch": 0.10695758934332243, "grad_norm": 1.2920094728469849, "learning_rate": 1.9440733621952948e-05, "loss": 0.3751, "step": 4835 }, { "epoch": 0.10706819698483569, "grad_norm": 1.0522348880767822, "learning_rate": 1.9439587252327558e-05, "loss": 0.3861, "step": 4840 }, { "epoch": 0.10717880462634895, "grad_norm": 1.140427589416504, "learning_rate": 1.943843974287495e-05, "loss": 0.4244, "step": 4845 }, { "epoch": 0.10728941226786222, "grad_norm": 1.5384271144866943, "learning_rate": 1.943729109373369e-05, "loss": 0.4304, "step": 4850 }, { "epoch": 0.10740001990937548, "grad_norm": 1.9350779056549072, "learning_rate": 1.9436141305042474e-05, "loss": 0.3925, "step": 4855 }, { "epoch": 0.10751062755088873, "grad_norm": 2.1574745178222656, "learning_rate": 1.9434990376940143e-05, "loss": 0.3159, "step": 4860 }, { "epoch": 0.10762123519240199, "grad_norm": 1.6708999872207642, "learning_rate": 1.9433838309565664e-05, "loss": 0.3935, "step": 4865 }, { "epoch": 0.10773184283391525, "grad_norm": 1.2697739601135254, "learning_rate": 1.9432685103058157e-05, "loss": 0.4305, "step": 4870 }, { "epoch": 0.10784245047542851, "grad_norm": 1.5191632509231567, "learning_rate": 1.9431530757556863e-05, "loss": 0.3661, "step": 4875 }, { "epoch": 0.10795305811694177, "grad_norm": 1.006371021270752, "learning_rate": 1.9430375273201174e-05, "loss": 0.4343, "step": 4880 }, { "epoch": 0.10806366575845504, "grad_norm": 1.560943603515625, "learning_rate": 1.9429218650130614e-05, "loss": 0.4517, "step": 4885 }, { "epoch": 0.1081742733999683, "grad_norm": 1.503429889678955, "learning_rate": 1.9428060888484844e-05, "loss": 0.3276, "step": 4890 }, { "epoch": 0.10828488104148155, "grad_norm": 1.0042012929916382, "learning_rate": 1.9426901988403662e-05, "loss": 0.42, "step": 4895 }, { "epoch": 0.10839548868299481, "grad_norm": 1.6667516231536865, "learning_rate": 1.942574195002701e-05, "loss": 0.3836, "step": 4900 }, { "epoch": 0.10850609632450807, "grad_norm": 1.4623639583587646, "learning_rate": 1.9424580773494955e-05, "loss": 0.2154, "step": 4905 }, { "epoch": 0.10861670396602133, "grad_norm": 1.548520803451538, "learning_rate": 1.9423418458947716e-05, "loss": 0.4662, "step": 4910 }, { "epoch": 0.1087273116075346, "grad_norm": 2.230271100997925, "learning_rate": 1.9422255006525636e-05, "loss": 0.4566, "step": 4915 }, { "epoch": 0.10883791924904786, "grad_norm": 1.8891234397888184, "learning_rate": 1.94210904163692e-05, "loss": 0.5712, "step": 4920 }, { "epoch": 0.10894852689056111, "grad_norm": 1.4963228702545166, "learning_rate": 1.9419924688619042e-05, "loss": 0.4215, "step": 4925 }, { "epoch": 0.10905913453207437, "grad_norm": 1.3629271984100342, "learning_rate": 1.9418757823415914e-05, "loss": 0.344, "step": 4930 }, { "epoch": 0.10916974217358763, "grad_norm": 0.8673114776611328, "learning_rate": 1.941758982090072e-05, "loss": 0.4796, "step": 4935 }, { "epoch": 0.10928034981510089, "grad_norm": 1.4539889097213745, "learning_rate": 1.9416420681214492e-05, "loss": 0.3433, "step": 4940 }, { "epoch": 0.10939095745661415, "grad_norm": 1.5462456941604614, "learning_rate": 1.9415250404498406e-05, "loss": 0.2839, "step": 4945 }, { "epoch": 0.10950156509812742, "grad_norm": 2.0318973064422607, "learning_rate": 1.941407899089377e-05, "loss": 0.5346, "step": 4950 }, { "epoch": 0.10961217273964068, "grad_norm": 1.4785728454589844, "learning_rate": 1.9412906440542034e-05, "loss": 0.3538, "step": 4955 }, { "epoch": 0.10972278038115393, "grad_norm": 1.2012602090835571, "learning_rate": 1.941173275358478e-05, "loss": 0.4142, "step": 4960 }, { "epoch": 0.10983338802266719, "grad_norm": 1.7525750398635864, "learning_rate": 1.9410557930163735e-05, "loss": 0.3594, "step": 4965 }, { "epoch": 0.10994399566418045, "grad_norm": 2.074418067932129, "learning_rate": 1.9409381970420757e-05, "loss": 0.3651, "step": 4970 }, { "epoch": 0.11005460330569371, "grad_norm": 1.4601867198944092, "learning_rate": 1.940820487449784e-05, "loss": 0.3729, "step": 4975 }, { "epoch": 0.11016521094720698, "grad_norm": 1.2697306871414185, "learning_rate": 1.940702664253712e-05, "loss": 0.2556, "step": 4980 }, { "epoch": 0.11027581858872024, "grad_norm": 2.310359001159668, "learning_rate": 1.940584727468087e-05, "loss": 0.4819, "step": 4985 }, { "epoch": 0.1103864262302335, "grad_norm": 1.6535577774047852, "learning_rate": 1.9404666771071494e-05, "loss": 0.5471, "step": 4990 }, { "epoch": 0.11049703387174675, "grad_norm": 1.4714661836624146, "learning_rate": 1.940348513185154e-05, "loss": 0.3836, "step": 4995 }, { "epoch": 0.11060764151326001, "grad_norm": 1.8964512348175049, "learning_rate": 1.9402302357163695e-05, "loss": 0.4504, "step": 5000 }, { "epoch": 0.11071824915477327, "grad_norm": 0.8524267077445984, "learning_rate": 1.9401118447150772e-05, "loss": 0.3589, "step": 5005 }, { "epoch": 0.11082885679628653, "grad_norm": 1.3643252849578857, "learning_rate": 1.9399933401955725e-05, "loss": 0.3427, "step": 5010 }, { "epoch": 0.1109394644377998, "grad_norm": 1.5374906063079834, "learning_rate": 1.9398747221721658e-05, "loss": 0.4942, "step": 5015 }, { "epoch": 0.11105007207931306, "grad_norm": 1.2114739418029785, "learning_rate": 1.9397559906591798e-05, "loss": 0.5309, "step": 5020 }, { "epoch": 0.11116067972082631, "grad_norm": 1.2702308893203735, "learning_rate": 1.9396371456709506e-05, "loss": 0.4435, "step": 5025 }, { "epoch": 0.11127128736233957, "grad_norm": 1.1216119527816772, "learning_rate": 1.9395181872218294e-05, "loss": 0.3537, "step": 5030 }, { "epoch": 0.11138189500385283, "grad_norm": 1.3845821619033813, "learning_rate": 1.9393991153261806e-05, "loss": 0.4389, "step": 5035 }, { "epoch": 0.11149250264536609, "grad_norm": 1.5176879167556763, "learning_rate": 1.9392799299983812e-05, "loss": 0.4504, "step": 5040 }, { "epoch": 0.11160311028687936, "grad_norm": 2.277400255203247, "learning_rate": 1.9391606312528238e-05, "loss": 0.4084, "step": 5045 }, { "epoch": 0.11171371792839262, "grad_norm": 1.394566535949707, "learning_rate": 1.939041219103913e-05, "loss": 0.3907, "step": 5050 }, { "epoch": 0.11182432556990587, "grad_norm": 3.028308153152466, "learning_rate": 1.938921693566068e-05, "loss": 0.5553, "step": 5055 }, { "epoch": 0.11193493321141913, "grad_norm": 1.704977035522461, "learning_rate": 1.9388020546537217e-05, "loss": 0.2644, "step": 5060 }, { "epoch": 0.11204554085293239, "grad_norm": 1.4153468608856201, "learning_rate": 1.9386823023813198e-05, "loss": 0.5325, "step": 5065 }, { "epoch": 0.11215614849444565, "grad_norm": 1.2759255170822144, "learning_rate": 1.9385624367633233e-05, "loss": 0.3291, "step": 5070 }, { "epoch": 0.1122667561359589, "grad_norm": 1.7581592798233032, "learning_rate": 1.9384424578142046e-05, "loss": 0.3532, "step": 5075 }, { "epoch": 0.11237736377747218, "grad_norm": 1.4288694858551025, "learning_rate": 1.9383223655484528e-05, "loss": 0.4994, "step": 5080 }, { "epoch": 0.11248797141898544, "grad_norm": 1.2044689655303955, "learning_rate": 1.9382021599805676e-05, "loss": 0.3608, "step": 5085 }, { "epoch": 0.1125985790604987, "grad_norm": 1.8257474899291992, "learning_rate": 1.9380818411250646e-05, "loss": 0.3444, "step": 5090 }, { "epoch": 0.11270918670201195, "grad_norm": 1.428453803062439, "learning_rate": 1.937961408996472e-05, "loss": 0.4186, "step": 5095 }, { "epoch": 0.11281979434352521, "grad_norm": 1.5671592950820923, "learning_rate": 1.9378408636093316e-05, "loss": 0.4627, "step": 5100 }, { "epoch": 0.11293040198503847, "grad_norm": 1.35671067237854, "learning_rate": 1.9377202049782e-05, "loss": 0.381, "step": 5105 }, { "epoch": 0.11304100962655174, "grad_norm": 1.346993327140808, "learning_rate": 1.937599433117646e-05, "loss": 0.4197, "step": 5110 }, { "epoch": 0.113151617268065, "grad_norm": 1.9246387481689453, "learning_rate": 1.9374785480422527e-05, "loss": 0.4683, "step": 5115 }, { "epoch": 0.11326222490957825, "grad_norm": 1.07107412815094, "learning_rate": 1.9373575497666177e-05, "loss": 0.3854, "step": 5120 }, { "epoch": 0.11337283255109151, "grad_norm": 1.477776288986206, "learning_rate": 1.937236438305351e-05, "loss": 0.4727, "step": 5125 }, { "epoch": 0.11348344019260477, "grad_norm": 1.5762522220611572, "learning_rate": 1.9371152136730766e-05, "loss": 0.3225, "step": 5130 }, { "epoch": 0.11359404783411803, "grad_norm": 1.5473525524139404, "learning_rate": 1.9369938758844325e-05, "loss": 0.4044, "step": 5135 }, { "epoch": 0.1137046554756313, "grad_norm": 1.23297119140625, "learning_rate": 1.9368724249540706e-05, "loss": 0.3988, "step": 5140 }, { "epoch": 0.11381526311714456, "grad_norm": 1.4969267845153809, "learning_rate": 1.9367508608966554e-05, "loss": 0.3537, "step": 5145 }, { "epoch": 0.11392587075865782, "grad_norm": 1.5560193061828613, "learning_rate": 1.9366291837268663e-05, "loss": 0.4369, "step": 5150 }, { "epoch": 0.11403647840017107, "grad_norm": 1.4594780206680298, "learning_rate": 1.9365073934593953e-05, "loss": 0.3949, "step": 5155 }, { "epoch": 0.11414708604168433, "grad_norm": 1.3203016519546509, "learning_rate": 1.9363854901089486e-05, "loss": 0.5046, "step": 5160 }, { "epoch": 0.11425769368319759, "grad_norm": 0.936113178730011, "learning_rate": 1.9362634736902465e-05, "loss": 0.4545, "step": 5165 }, { "epoch": 0.11436830132471085, "grad_norm": 1.5501017570495605, "learning_rate": 1.936141344218022e-05, "loss": 0.5025, "step": 5170 }, { "epoch": 0.11447890896622412, "grad_norm": 1.297181248664856, "learning_rate": 1.9360191017070225e-05, "loss": 0.4102, "step": 5175 }, { "epoch": 0.11458951660773738, "grad_norm": 1.3368213176727295, "learning_rate": 1.935896746172008e-05, "loss": 0.404, "step": 5180 }, { "epoch": 0.11470012424925063, "grad_norm": 2.351327419281006, "learning_rate": 1.935774277627754e-05, "loss": 0.4693, "step": 5185 }, { "epoch": 0.11481073189076389, "grad_norm": 1.2613605260849, "learning_rate": 1.9356516960890476e-05, "loss": 0.3435, "step": 5190 }, { "epoch": 0.11492133953227715, "grad_norm": 1.2706034183502197, "learning_rate": 1.9355290015706908e-05, "loss": 0.4068, "step": 5195 }, { "epoch": 0.11503194717379041, "grad_norm": 1.652693748474121, "learning_rate": 1.9354061940874987e-05, "loss": 0.3059, "step": 5200 }, { "epoch": 0.11514255481530368, "grad_norm": 0.9360271096229553, "learning_rate": 1.935283273654301e-05, "loss": 0.5157, "step": 5205 }, { "epoch": 0.11525316245681694, "grad_norm": 0.7756430506706238, "learning_rate": 1.93516024028594e-05, "loss": 0.3159, "step": 5210 }, { "epoch": 0.1153637700983302, "grad_norm": 1.067289113998413, "learning_rate": 1.9350370939972714e-05, "loss": 0.3794, "step": 5215 }, { "epoch": 0.11547437773984345, "grad_norm": 1.9011505842208862, "learning_rate": 1.9349138348031653e-05, "loss": 0.557, "step": 5220 }, { "epoch": 0.11558498538135671, "grad_norm": 1.3037177324295044, "learning_rate": 1.9347904627185057e-05, "loss": 0.5547, "step": 5225 }, { "epoch": 0.11569559302286997, "grad_norm": 0.8623313903808594, "learning_rate": 1.934666977758189e-05, "loss": 0.4318, "step": 5230 }, { "epoch": 0.11580620066438323, "grad_norm": 1.5027235746383667, "learning_rate": 1.934543379937127e-05, "loss": 0.2936, "step": 5235 }, { "epoch": 0.1159168083058965, "grad_norm": 1.0434077978134155, "learning_rate": 1.934419669270243e-05, "loss": 0.401, "step": 5240 }, { "epoch": 0.11602741594740976, "grad_norm": 1.4597822427749634, "learning_rate": 1.934295845772476e-05, "loss": 0.3927, "step": 5245 }, { "epoch": 0.11613802358892301, "grad_norm": 1.9905503988265991, "learning_rate": 1.9341719094587766e-05, "loss": 0.3087, "step": 5250 }, { "epoch": 0.11624863123043627, "grad_norm": 2.1853835582733154, "learning_rate": 1.9340478603441107e-05, "loss": 0.3938, "step": 5255 }, { "epoch": 0.11635923887194953, "grad_norm": 1.4754146337509155, "learning_rate": 1.9339236984434574e-05, "loss": 0.3073, "step": 5260 }, { "epoch": 0.11646984651346279, "grad_norm": 2.35380482673645, "learning_rate": 1.9337994237718086e-05, "loss": 0.2834, "step": 5265 }, { "epoch": 0.11658045415497606, "grad_norm": 1.902882695198059, "learning_rate": 1.933675036344171e-05, "loss": 0.5478, "step": 5270 }, { "epoch": 0.11669106179648932, "grad_norm": 1.7451447248458862, "learning_rate": 1.933550536175564e-05, "loss": 0.4535, "step": 5275 }, { "epoch": 0.11680166943800258, "grad_norm": 1.636975646018982, "learning_rate": 1.9334259232810207e-05, "loss": 0.3269, "step": 5280 }, { "epoch": 0.11691227707951583, "grad_norm": 1.2043530941009521, "learning_rate": 1.933301197675589e-05, "loss": 0.3456, "step": 5285 }, { "epoch": 0.11702288472102909, "grad_norm": 1.4080227613449097, "learning_rate": 1.9331763593743288e-05, "loss": 0.2301, "step": 5290 }, { "epoch": 0.11713349236254235, "grad_norm": 1.3278400897979736, "learning_rate": 1.933051408392314e-05, "loss": 0.279, "step": 5295 }, { "epoch": 0.11724410000405561, "grad_norm": 1.2320581674575806, "learning_rate": 1.932926344744633e-05, "loss": 0.3868, "step": 5300 }, { "epoch": 0.11735470764556888, "grad_norm": 2.1249537467956543, "learning_rate": 1.9328011684463867e-05, "loss": 0.5688, "step": 5305 }, { "epoch": 0.11746531528708214, "grad_norm": 1.069229006767273, "learning_rate": 1.9326758795126908e-05, "loss": 0.4294, "step": 5310 }, { "epoch": 0.1175759229285954, "grad_norm": 1.414701223373413, "learning_rate": 1.9325504779586733e-05, "loss": 0.3361, "step": 5315 }, { "epoch": 0.11768653057010865, "grad_norm": 1.2374788522720337, "learning_rate": 1.9324249637994765e-05, "loss": 0.4368, "step": 5320 }, { "epoch": 0.11779713821162191, "grad_norm": 1.7674999237060547, "learning_rate": 1.9322993370502564e-05, "loss": 0.1779, "step": 5325 }, { "epoch": 0.11790774585313517, "grad_norm": 1.5018140077590942, "learning_rate": 1.932173597726182e-05, "loss": 0.3854, "step": 5330 }, { "epoch": 0.11801835349464844, "grad_norm": 1.2620837688446045, "learning_rate": 1.9320477458424368e-05, "loss": 0.3411, "step": 5335 }, { "epoch": 0.1181289611361617, "grad_norm": 1.5155102014541626, "learning_rate": 1.931921781414217e-05, "loss": 0.41, "step": 5340 }, { "epoch": 0.11823956877767496, "grad_norm": 0.9262099266052246, "learning_rate": 1.931795704456733e-05, "loss": 0.3825, "step": 5345 }, { "epoch": 0.11835017641918821, "grad_norm": 1.2944021224975586, "learning_rate": 1.931669514985208e-05, "loss": 0.5132, "step": 5350 }, { "epoch": 0.11846078406070147, "grad_norm": 0.9886609315872192, "learning_rate": 1.93154321301488e-05, "loss": 0.4312, "step": 5355 }, { "epoch": 0.11857139170221473, "grad_norm": 1.445898175239563, "learning_rate": 1.9314167985609996e-05, "loss": 0.4675, "step": 5360 }, { "epoch": 0.11868199934372799, "grad_norm": 1.920093059539795, "learning_rate": 1.931290271638832e-05, "loss": 0.5019, "step": 5365 }, { "epoch": 0.11879260698524126, "grad_norm": 0.9636952877044678, "learning_rate": 1.931163632263654e-05, "loss": 0.3836, "step": 5370 }, { "epoch": 0.11890321462675452, "grad_norm": 1.6163080930709839, "learning_rate": 1.9310368804507583e-05, "loss": 0.3689, "step": 5375 }, { "epoch": 0.11901382226826777, "grad_norm": 1.4920958280563354, "learning_rate": 1.9309100162154494e-05, "loss": 0.4464, "step": 5380 }, { "epoch": 0.11912442990978103, "grad_norm": 1.8575915098190308, "learning_rate": 1.930783039573047e-05, "loss": 0.54, "step": 5385 }, { "epoch": 0.11923503755129429, "grad_norm": 1.5758798122406006, "learning_rate": 1.9306559505388825e-05, "loss": 0.4627, "step": 5390 }, { "epoch": 0.11934564519280755, "grad_norm": 1.2514375448226929, "learning_rate": 1.930528749128303e-05, "loss": 0.4013, "step": 5395 }, { "epoch": 0.11945625283432082, "grad_norm": 1.909339427947998, "learning_rate": 1.930401435356667e-05, "loss": 0.3151, "step": 5400 }, { "epoch": 0.11956686047583408, "grad_norm": 1.3865814208984375, "learning_rate": 1.930274009239348e-05, "loss": 0.4036, "step": 5405 }, { "epoch": 0.11967746811734734, "grad_norm": 1.9530361890792847, "learning_rate": 1.9301464707917326e-05, "loss": 0.3547, "step": 5410 }, { "epoch": 0.1197880757588606, "grad_norm": 1.7363640069961548, "learning_rate": 1.930018820029221e-05, "loss": 0.4387, "step": 5415 }, { "epoch": 0.11989868340037385, "grad_norm": 1.7500450611114502, "learning_rate": 1.9298910569672273e-05, "loss": 0.4382, "step": 5420 }, { "epoch": 0.12000929104188711, "grad_norm": 1.1682714223861694, "learning_rate": 1.9297631816211784e-05, "loss": 0.3467, "step": 5425 }, { "epoch": 0.12011989868340038, "grad_norm": 1.3184837102890015, "learning_rate": 1.9296351940065157e-05, "loss": 0.371, "step": 5430 }, { "epoch": 0.12023050632491364, "grad_norm": 1.0455747842788696, "learning_rate": 1.929507094138693e-05, "loss": 0.4335, "step": 5435 }, { "epoch": 0.1203411139664269, "grad_norm": 0.7669450044631958, "learning_rate": 1.929378882033179e-05, "loss": 0.3672, "step": 5440 }, { "epoch": 0.12045172160794015, "grad_norm": 0.9521745443344116, "learning_rate": 1.929250557705455e-05, "loss": 0.3437, "step": 5445 }, { "epoch": 0.12056232924945341, "grad_norm": 2.1798462867736816, "learning_rate": 1.929122121171016e-05, "loss": 0.3948, "step": 5450 }, { "epoch": 0.12067293689096667, "grad_norm": 1.8456690311431885, "learning_rate": 1.928993572445371e-05, "loss": 0.3435, "step": 5455 }, { "epoch": 0.12078354453247993, "grad_norm": 0.9271587133407593, "learning_rate": 1.9288649115440417e-05, "loss": 0.311, "step": 5460 }, { "epoch": 0.1208941521739932, "grad_norm": 0.879239022731781, "learning_rate": 1.928736138482564e-05, "loss": 0.2505, "step": 5465 }, { "epoch": 0.12100475981550646, "grad_norm": 1.4918293952941895, "learning_rate": 1.9286072532764877e-05, "loss": 0.4355, "step": 5470 }, { "epoch": 0.12111536745701972, "grad_norm": 1.074135184288025, "learning_rate": 1.928478255941375e-05, "loss": 0.2741, "step": 5475 }, { "epoch": 0.12122597509853297, "grad_norm": 1.8501172065734863, "learning_rate": 1.928349146492803e-05, "loss": 0.3277, "step": 5480 }, { "epoch": 0.12133658274004623, "grad_norm": 1.0817201137542725, "learning_rate": 1.928219924946361e-05, "loss": 0.4173, "step": 5485 }, { "epoch": 0.12144719038155949, "grad_norm": 1.3760218620300293, "learning_rate": 1.9280905913176528e-05, "loss": 0.376, "step": 5490 }, { "epoch": 0.12155779802307276, "grad_norm": 1.1989308595657349, "learning_rate": 1.9279611456222954e-05, "loss": 0.3928, "step": 5495 }, { "epoch": 0.12166840566458602, "grad_norm": 1.224012851715088, "learning_rate": 1.9278315878759192e-05, "loss": 0.3468, "step": 5500 }, { "epoch": 0.12177901330609928, "grad_norm": 1.8851617574691772, "learning_rate": 1.9277019180941678e-05, "loss": 0.3662, "step": 5505 }, { "epoch": 0.12188962094761253, "grad_norm": 1.368145227432251, "learning_rate": 1.9275721362926997e-05, "loss": 0.43, "step": 5510 }, { "epoch": 0.12200022858912579, "grad_norm": 1.7711236476898193, "learning_rate": 1.927442242487185e-05, "loss": 0.5004, "step": 5515 }, { "epoch": 0.12211083623063905, "grad_norm": 1.2541996240615845, "learning_rate": 1.9273122366933098e-05, "loss": 0.4492, "step": 5520 }, { "epoch": 0.12222144387215231, "grad_norm": 1.013047218322754, "learning_rate": 1.927182118926771e-05, "loss": 0.2766, "step": 5525 }, { "epoch": 0.12233205151366558, "grad_norm": 1.6387193202972412, "learning_rate": 1.9270518892032803e-05, "loss": 0.3711, "step": 5530 }, { "epoch": 0.12244265915517884, "grad_norm": 1.2135796546936035, "learning_rate": 1.9269215475385637e-05, "loss": 0.3926, "step": 5535 }, { "epoch": 0.1225532667966921, "grad_norm": 0.9092623591423035, "learning_rate": 1.926791093948359e-05, "loss": 0.3491, "step": 5540 }, { "epoch": 0.12266387443820535, "grad_norm": 1.058918833732605, "learning_rate": 1.926660528448419e-05, "loss": 0.4545, "step": 5545 }, { "epoch": 0.12277448207971861, "grad_norm": 1.1692990064620972, "learning_rate": 1.9265298510545095e-05, "loss": 0.3705, "step": 5550 }, { "epoch": 0.12288508972123187, "grad_norm": 0.7757040858268738, "learning_rate": 1.9263990617824097e-05, "loss": 0.2616, "step": 5555 }, { "epoch": 0.12299569736274514, "grad_norm": 1.1952452659606934, "learning_rate": 1.926268160647912e-05, "loss": 0.3254, "step": 5560 }, { "epoch": 0.1231063050042584, "grad_norm": 2.792086362838745, "learning_rate": 1.926137147666823e-05, "loss": 0.6074, "step": 5565 }, { "epoch": 0.12321691264577166, "grad_norm": 2.1709251403808594, "learning_rate": 1.9260060228549628e-05, "loss": 0.4116, "step": 5570 }, { "epoch": 0.12332752028728491, "grad_norm": 1.803026556968689, "learning_rate": 1.9258747862281635e-05, "loss": 0.4388, "step": 5575 }, { "epoch": 0.12343812792879817, "grad_norm": 1.01143217086792, "learning_rate": 1.925743437802273e-05, "loss": 0.4682, "step": 5580 }, { "epoch": 0.12354873557031143, "grad_norm": 1.4518909454345703, "learning_rate": 1.9256119775931513e-05, "loss": 0.3917, "step": 5585 }, { "epoch": 0.12365934321182469, "grad_norm": 1.3850587606430054, "learning_rate": 1.9254804056166723e-05, "loss": 0.3651, "step": 5590 }, { "epoch": 0.12376995085333796, "grad_norm": 1.3400176763534546, "learning_rate": 1.9253487218887228e-05, "loss": 0.372, "step": 5595 }, { "epoch": 0.12388055849485122, "grad_norm": 2.035543918609619, "learning_rate": 1.925216926425204e-05, "loss": 0.5941, "step": 5600 }, { "epoch": 0.12399116613636448, "grad_norm": 1.5818687677383423, "learning_rate": 1.9250850192420297e-05, "loss": 0.4292, "step": 5605 }, { "epoch": 0.12410177377787773, "grad_norm": 1.5942202806472778, "learning_rate": 1.9249530003551285e-05, "loss": 0.3099, "step": 5610 }, { "epoch": 0.12421238141939099, "grad_norm": 0.9859552979469299, "learning_rate": 1.9248208697804405e-05, "loss": 0.402, "step": 5615 }, { "epoch": 0.12432298906090425, "grad_norm": 1.675306797027588, "learning_rate": 1.9246886275339215e-05, "loss": 0.4266, "step": 5620 }, { "epoch": 0.12443359670241752, "grad_norm": 1.6239570379257202, "learning_rate": 1.9245562736315394e-05, "loss": 0.3918, "step": 5625 }, { "epoch": 0.12454420434393078, "grad_norm": 1.158168077468872, "learning_rate": 1.9244238080892753e-05, "loss": 0.3808, "step": 5630 }, { "epoch": 0.12465481198544404, "grad_norm": 1.5469648838043213, "learning_rate": 1.924291230923125e-05, "loss": 0.3763, "step": 5635 }, { "epoch": 0.1247654196269573, "grad_norm": 1.6390315294265747, "learning_rate": 1.924158542149097e-05, "loss": 0.4314, "step": 5640 }, { "epoch": 0.12487602726847055, "grad_norm": 1.5618537664413452, "learning_rate": 1.9240257417832132e-05, "loss": 0.522, "step": 5645 }, { "epoch": 0.12498663490998381, "grad_norm": 1.1644129753112793, "learning_rate": 1.9238928298415097e-05, "loss": 0.467, "step": 5650 }, { "epoch": 0.12509724255149707, "grad_norm": 1.460855484008789, "learning_rate": 1.923759806340035e-05, "loss": 0.4759, "step": 5655 }, { "epoch": 0.12520785019301034, "grad_norm": 1.5208232402801514, "learning_rate": 1.923626671294852e-05, "loss": 0.5177, "step": 5660 }, { "epoch": 0.12531845783452358, "grad_norm": 2.225285053253174, "learning_rate": 1.9234934247220366e-05, "loss": 0.4408, "step": 5665 }, { "epoch": 0.12542906547603686, "grad_norm": 1.1227712631225586, "learning_rate": 1.9233600666376783e-05, "loss": 0.4457, "step": 5670 }, { "epoch": 0.12553967311755013, "grad_norm": 1.4990267753601074, "learning_rate": 1.9232265970578804e-05, "loss": 0.3805, "step": 5675 }, { "epoch": 0.12565028075906337, "grad_norm": 1.6374459266662598, "learning_rate": 1.9230930159987588e-05, "loss": 0.3666, "step": 5680 }, { "epoch": 0.12576088840057664, "grad_norm": 1.4058895111083984, "learning_rate": 1.922959323476443e-05, "loss": 0.4182, "step": 5685 }, { "epoch": 0.1258714960420899, "grad_norm": 1.1484246253967285, "learning_rate": 1.9228255195070777e-05, "loss": 0.2325, "step": 5690 }, { "epoch": 0.12598210368360316, "grad_norm": 1.1546169519424438, "learning_rate": 1.9226916041068183e-05, "loss": 0.5674, "step": 5695 }, { "epoch": 0.1260927113251164, "grad_norm": 1.1170897483825684, "learning_rate": 1.922557577291836e-05, "loss": 0.3718, "step": 5700 }, { "epoch": 0.12620331896662967, "grad_norm": 1.8118456602096558, "learning_rate": 1.922423439078314e-05, "loss": 0.4099, "step": 5705 }, { "epoch": 0.12631392660814295, "grad_norm": 1.8985294103622437, "learning_rate": 1.9222891894824495e-05, "loss": 0.3911, "step": 5710 }, { "epoch": 0.1264245342496562, "grad_norm": 1.2128995656967163, "learning_rate": 1.9221548285204528e-05, "loss": 0.4071, "step": 5715 }, { "epoch": 0.12653514189116946, "grad_norm": 1.4736419916152954, "learning_rate": 1.9220203562085488e-05, "loss": 0.3817, "step": 5720 }, { "epoch": 0.1266457495326827, "grad_norm": 0.9104214310646057, "learning_rate": 1.9218857725629743e-05, "loss": 0.3241, "step": 5725 }, { "epoch": 0.12675635717419598, "grad_norm": 1.412368655204773, "learning_rate": 1.9217510775999803e-05, "loss": 0.3809, "step": 5730 }, { "epoch": 0.12686696481570922, "grad_norm": 1.1616326570510864, "learning_rate": 1.9216162713358314e-05, "loss": 0.4106, "step": 5735 }, { "epoch": 0.1269775724572225, "grad_norm": 1.3267185688018799, "learning_rate": 1.9214813537868053e-05, "loss": 0.3859, "step": 5740 }, { "epoch": 0.12708818009873576, "grad_norm": 1.292978286743164, "learning_rate": 1.9213463249691934e-05, "loss": 0.4067, "step": 5745 }, { "epoch": 0.127198787740249, "grad_norm": 1.5887353420257568, "learning_rate": 1.9212111848992997e-05, "loss": 0.4505, "step": 5750 }, { "epoch": 0.12730939538176228, "grad_norm": 1.507908582687378, "learning_rate": 1.9210759335934433e-05, "loss": 0.3828, "step": 5755 }, { "epoch": 0.12742000302327552, "grad_norm": 1.2418265342712402, "learning_rate": 1.920940571067955e-05, "loss": 0.3623, "step": 5760 }, { "epoch": 0.1275306106647888, "grad_norm": 1.4829580783843994, "learning_rate": 1.9208050973391804e-05, "loss": 0.3038, "step": 5765 }, { "epoch": 0.12764121830630207, "grad_norm": 1.4189943075180054, "learning_rate": 1.9206695124234776e-05, "loss": 0.3686, "step": 5770 }, { "epoch": 0.1277518259478153, "grad_norm": 1.4946491718292236, "learning_rate": 1.9205338163372186e-05, "loss": 0.3333, "step": 5775 }, { "epoch": 0.12786243358932858, "grad_norm": 2.004868745803833, "learning_rate": 1.9203980090967886e-05, "loss": 0.3779, "step": 5780 }, { "epoch": 0.12797304123084183, "grad_norm": 1.2140538692474365, "learning_rate": 1.9202620907185855e-05, "loss": 0.5646, "step": 5785 }, { "epoch": 0.1280836488723551, "grad_norm": 1.6214736700057983, "learning_rate": 1.9201260612190226e-05, "loss": 0.4308, "step": 5790 }, { "epoch": 0.12819425651386834, "grad_norm": 1.2595528364181519, "learning_rate": 1.9199899206145247e-05, "loss": 0.4592, "step": 5795 }, { "epoch": 0.12830486415538161, "grad_norm": 0.976921796798706, "learning_rate": 1.919853668921531e-05, "loss": 0.4658, "step": 5800 }, { "epoch": 0.1284154717968949, "grad_norm": 1.8858847618103027, "learning_rate": 1.919717306156494e-05, "loss": 0.48, "step": 5805 }, { "epoch": 0.12852607943840813, "grad_norm": 1.188775658607483, "learning_rate": 1.919580832335879e-05, "loss": 0.3045, "step": 5810 }, { "epoch": 0.1286366870799214, "grad_norm": 1.3489470481872559, "learning_rate": 1.9194442474761654e-05, "loss": 0.3756, "step": 5815 }, { "epoch": 0.12874729472143465, "grad_norm": 0.9965197443962097, "learning_rate": 1.919307551593846e-05, "loss": 0.3707, "step": 5820 }, { "epoch": 0.12885790236294792, "grad_norm": 1.704056978225708, "learning_rate": 1.9191707447054263e-05, "loss": 0.5127, "step": 5825 }, { "epoch": 0.12896851000446116, "grad_norm": 2.277059316635132, "learning_rate": 1.9190338268274266e-05, "loss": 0.6051, "step": 5830 }, { "epoch": 0.12907911764597443, "grad_norm": 1.4642192125320435, "learning_rate": 1.9188967979763784e-05, "loss": 0.3394, "step": 5835 }, { "epoch": 0.1291897252874877, "grad_norm": 1.5628095865249634, "learning_rate": 1.9187596581688287e-05, "loss": 0.5514, "step": 5840 }, { "epoch": 0.12930033292900095, "grad_norm": 1.0219507217407227, "learning_rate": 1.918622407421337e-05, "loss": 0.4177, "step": 5845 }, { "epoch": 0.12941094057051422, "grad_norm": 1.5731453895568848, "learning_rate": 1.9184850457504765e-05, "loss": 0.5134, "step": 5850 }, { "epoch": 0.12952154821202747, "grad_norm": 1.222425103187561, "learning_rate": 1.9183475731728333e-05, "loss": 0.2749, "step": 5855 }, { "epoch": 0.12963215585354074, "grad_norm": 1.3853271007537842, "learning_rate": 1.9182099897050064e-05, "loss": 0.5196, "step": 5860 }, { "epoch": 0.129742763495054, "grad_norm": 1.1019530296325684, "learning_rate": 1.9180722953636107e-05, "loss": 0.3952, "step": 5865 }, { "epoch": 0.12985337113656725, "grad_norm": 1.8652498722076416, "learning_rate": 1.9179344901652713e-05, "loss": 0.4155, "step": 5870 }, { "epoch": 0.12996397877808052, "grad_norm": 1.0620791912078857, "learning_rate": 1.9177965741266287e-05, "loss": 0.3241, "step": 5875 }, { "epoch": 0.13007458641959377, "grad_norm": 1.4964879751205444, "learning_rate": 1.9176585472643362e-05, "loss": 0.4759, "step": 5880 }, { "epoch": 0.13018519406110704, "grad_norm": 1.7718970775604248, "learning_rate": 1.9175204095950605e-05, "loss": 0.5088, "step": 5885 }, { "epoch": 0.13029580170262028, "grad_norm": 1.538352370262146, "learning_rate": 1.9173821611354815e-05, "loss": 0.4057, "step": 5890 }, { "epoch": 0.13040640934413356, "grad_norm": 1.2991300821304321, "learning_rate": 1.917243801902293e-05, "loss": 0.2871, "step": 5895 }, { "epoch": 0.13051701698564683, "grad_norm": 1.6405963897705078, "learning_rate": 1.9171053319122016e-05, "loss": 0.357, "step": 5900 }, { "epoch": 0.13062762462716007, "grad_norm": 2.4321913719177246, "learning_rate": 1.9169667511819276e-05, "loss": 0.4019, "step": 5905 }, { "epoch": 0.13073823226867334, "grad_norm": 1.5153170824050903, "learning_rate": 1.916828059728204e-05, "loss": 0.4237, "step": 5910 }, { "epoch": 0.1308488399101866, "grad_norm": 2.084807872772217, "learning_rate": 1.9166892575677787e-05, "loss": 0.4799, "step": 5915 }, { "epoch": 0.13095944755169986, "grad_norm": 1.546061396598816, "learning_rate": 1.9165503447174114e-05, "loss": 0.4211, "step": 5920 }, { "epoch": 0.1310700551932131, "grad_norm": 0.862808108329773, "learning_rate": 1.916411321193876e-05, "loss": 0.2896, "step": 5925 }, { "epoch": 0.13118066283472637, "grad_norm": 1.1438584327697754, "learning_rate": 1.9162721870139592e-05, "loss": 0.4101, "step": 5930 }, { "epoch": 0.13129127047623965, "grad_norm": 1.023362636566162, "learning_rate": 1.9161329421944623e-05, "loss": 0.5072, "step": 5935 }, { "epoch": 0.1314018781177529, "grad_norm": 3.0532989501953125, "learning_rate": 1.9159935867521985e-05, "loss": 0.489, "step": 5940 }, { "epoch": 0.13151248575926616, "grad_norm": 2.021237850189209, "learning_rate": 1.9158541207039943e-05, "loss": 0.4047, "step": 5945 }, { "epoch": 0.1316230934007794, "grad_norm": 0.9501652121543884, "learning_rate": 1.9157145440666912e-05, "loss": 0.4036, "step": 5950 }, { "epoch": 0.13173370104229268, "grad_norm": 1.636578917503357, "learning_rate": 1.9155748568571426e-05, "loss": 0.4638, "step": 5955 }, { "epoch": 0.13184430868380592, "grad_norm": 1.036970615386963, "learning_rate": 1.915435059092216e-05, "loss": 0.3926, "step": 5960 }, { "epoch": 0.1319549163253192, "grad_norm": 1.1678075790405273, "learning_rate": 1.9152951507887908e-05, "loss": 0.3632, "step": 5965 }, { "epoch": 0.13206552396683247, "grad_norm": 1.0001752376556396, "learning_rate": 1.9151551319637624e-05, "loss": 0.3355, "step": 5970 }, { "epoch": 0.1321761316083457, "grad_norm": 1.140153408050537, "learning_rate": 1.915015002634037e-05, "loss": 0.404, "step": 5975 }, { "epoch": 0.13228673924985898, "grad_norm": 0.6687695980072021, "learning_rate": 1.914874762816536e-05, "loss": 0.2837, "step": 5980 }, { "epoch": 0.13239734689137223, "grad_norm": 1.2832129001617432, "learning_rate": 1.9147344125281925e-05, "loss": 0.356, "step": 5985 }, { "epoch": 0.1325079545328855, "grad_norm": 0.9484536051750183, "learning_rate": 1.914593951785954e-05, "loss": 0.3613, "step": 5990 }, { "epoch": 0.13261856217439877, "grad_norm": 2.7078239917755127, "learning_rate": 1.9144533806067818e-05, "loss": 0.304, "step": 5995 }, { "epoch": 0.132729169815912, "grad_norm": 1.6759693622589111, "learning_rate": 1.9143126990076488e-05, "loss": 0.4722, "step": 6000 }, { "epoch": 0.13283977745742528, "grad_norm": 2.2539312839508057, "learning_rate": 1.9141719070055427e-05, "loss": 0.3449, "step": 6005 }, { "epoch": 0.13295038509893853, "grad_norm": 1.7249102592468262, "learning_rate": 1.914031004617464e-05, "loss": 0.4217, "step": 6010 }, { "epoch": 0.1330609927404518, "grad_norm": 1.8589547872543335, "learning_rate": 1.9138899918604268e-05, "loss": 0.2658, "step": 6015 }, { "epoch": 0.13317160038196504, "grad_norm": 1.4055699110031128, "learning_rate": 1.913748868751458e-05, "loss": 0.44, "step": 6020 }, { "epoch": 0.13328220802347832, "grad_norm": 1.1928904056549072, "learning_rate": 1.913607635307599e-05, "loss": 0.3633, "step": 6025 }, { "epoch": 0.1333928156649916, "grad_norm": 1.0129585266113281, "learning_rate": 1.9134662915459024e-05, "loss": 0.292, "step": 6030 }, { "epoch": 0.13350342330650483, "grad_norm": 1.0502597093582153, "learning_rate": 1.913324837483437e-05, "loss": 0.4193, "step": 6035 }, { "epoch": 0.1336140309480181, "grad_norm": 2.1656219959259033, "learning_rate": 1.9131832731372814e-05, "loss": 0.2893, "step": 6040 }, { "epoch": 0.13372463858953135, "grad_norm": 1.5223177671432495, "learning_rate": 1.9130415985245312e-05, "loss": 0.2981, "step": 6045 }, { "epoch": 0.13383524623104462, "grad_norm": 1.23662531375885, "learning_rate": 1.912899813662293e-05, "loss": 0.5418, "step": 6050 }, { "epoch": 0.13394585387255786, "grad_norm": 2.2205052375793457, "learning_rate": 1.9127579185676865e-05, "loss": 0.3216, "step": 6055 }, { "epoch": 0.13405646151407113, "grad_norm": 2.001621961593628, "learning_rate": 1.9126159132578462e-05, "loss": 0.4498, "step": 6060 }, { "epoch": 0.1341670691555844, "grad_norm": 1.362507939338684, "learning_rate": 1.9124737977499194e-05, "loss": 0.395, "step": 6065 }, { "epoch": 0.13427767679709765, "grad_norm": 1.1696910858154297, "learning_rate": 1.9123315720610662e-05, "loss": 0.4403, "step": 6070 }, { "epoch": 0.13438828443861092, "grad_norm": 1.6033880710601807, "learning_rate": 1.9121892362084605e-05, "loss": 0.3881, "step": 6075 }, { "epoch": 0.13449889208012417, "grad_norm": 1.7013787031173706, "learning_rate": 1.9120467902092888e-05, "loss": 0.4538, "step": 6080 }, { "epoch": 0.13460949972163744, "grad_norm": 1.569021224975586, "learning_rate": 1.911904234080752e-05, "loss": 0.4564, "step": 6085 }, { "epoch": 0.13472010736315068, "grad_norm": 1.2602808475494385, "learning_rate": 1.9117615678400636e-05, "loss": 0.4332, "step": 6090 }, { "epoch": 0.13483071500466395, "grad_norm": 1.818721055984497, "learning_rate": 1.9116187915044502e-05, "loss": 0.3869, "step": 6095 }, { "epoch": 0.13494132264617723, "grad_norm": 1.5945169925689697, "learning_rate": 1.911475905091152e-05, "loss": 0.4974, "step": 6100 }, { "epoch": 0.13505193028769047, "grad_norm": 1.6060203313827515, "learning_rate": 1.911332908617423e-05, "loss": 0.5061, "step": 6105 }, { "epoch": 0.13516253792920374, "grad_norm": 1.2298915386199951, "learning_rate": 1.9111898021005296e-05, "loss": 0.3914, "step": 6110 }, { "epoch": 0.13527314557071698, "grad_norm": 1.2855567932128906, "learning_rate": 1.9110465855577514e-05, "loss": 0.2069, "step": 6115 }, { "epoch": 0.13538375321223026, "grad_norm": 1.8487292528152466, "learning_rate": 1.910903259006383e-05, "loss": 0.3304, "step": 6120 }, { "epoch": 0.13549436085374353, "grad_norm": 1.8021327257156372, "learning_rate": 1.91075982246373e-05, "loss": 0.3707, "step": 6125 }, { "epoch": 0.13560496849525677, "grad_norm": 1.145630955696106, "learning_rate": 1.9106162759471126e-05, "loss": 0.3929, "step": 6130 }, { "epoch": 0.13571557613677004, "grad_norm": 1.484133005142212, "learning_rate": 1.910472619473864e-05, "loss": 0.4066, "step": 6135 }, { "epoch": 0.1358261837782833, "grad_norm": 1.1474268436431885, "learning_rate": 1.9103288530613305e-05, "loss": 0.3733, "step": 6140 }, { "epoch": 0.13593679141979656, "grad_norm": 0.9661514163017273, "learning_rate": 1.910184976726872e-05, "loss": 0.3258, "step": 6145 }, { "epoch": 0.1360473990613098, "grad_norm": 1.2409381866455078, "learning_rate": 1.9100409904878622e-05, "loss": 0.5614, "step": 6150 }, { "epoch": 0.13615800670282308, "grad_norm": 1.8632627725601196, "learning_rate": 1.909896894361686e-05, "loss": 0.4151, "step": 6155 }, { "epoch": 0.13626861434433635, "grad_norm": 1.4967628717422485, "learning_rate": 1.909752688365744e-05, "loss": 0.5063, "step": 6160 }, { "epoch": 0.1363792219858496, "grad_norm": 1.8947832584381104, "learning_rate": 1.909608372517449e-05, "loss": 0.4315, "step": 6165 }, { "epoch": 0.13648982962736286, "grad_norm": 0.8570780158042908, "learning_rate": 1.9094639468342265e-05, "loss": 0.3689, "step": 6170 }, { "epoch": 0.1366004372688761, "grad_norm": 1.3065898418426514, "learning_rate": 1.9093194113335162e-05, "loss": 0.4284, "step": 6175 }, { "epoch": 0.13671104491038938, "grad_norm": 1.5618168115615845, "learning_rate": 1.909174766032771e-05, "loss": 0.615, "step": 6180 }, { "epoch": 0.13682165255190262, "grad_norm": 1.2726731300354004, "learning_rate": 1.9090300109494562e-05, "loss": 0.5007, "step": 6185 }, { "epoch": 0.1369322601934159, "grad_norm": 1.369860291481018, "learning_rate": 1.908885146101051e-05, "loss": 0.3779, "step": 6190 }, { "epoch": 0.13704286783492917, "grad_norm": 1.6806285381317139, "learning_rate": 1.9087401715050485e-05, "loss": 0.5453, "step": 6195 }, { "epoch": 0.1371534754764424, "grad_norm": 1.711031198501587, "learning_rate": 1.9085950871789535e-05, "loss": 0.4108, "step": 6200 }, { "epoch": 0.13726408311795568, "grad_norm": 1.2566081285476685, "learning_rate": 1.9084498931402854e-05, "loss": 0.5721, "step": 6205 }, { "epoch": 0.13737469075946893, "grad_norm": 1.1251777410507202, "learning_rate": 1.9083045894065763e-05, "loss": 0.3852, "step": 6210 }, { "epoch": 0.1374852984009822, "grad_norm": 1.5868202447891235, "learning_rate": 1.9081591759953714e-05, "loss": 0.4292, "step": 6215 }, { "epoch": 0.13759590604249547, "grad_norm": 0.6574198007583618, "learning_rate": 1.908013652924229e-05, "loss": 0.2222, "step": 6220 }, { "epoch": 0.1377065136840087, "grad_norm": 1.463051676750183, "learning_rate": 1.9078680202107217e-05, "loss": 0.4912, "step": 6225 }, { "epoch": 0.13781712132552199, "grad_norm": 1.9758415222167969, "learning_rate": 1.9077222778724342e-05, "loss": 0.3136, "step": 6230 }, { "epoch": 0.13792772896703523, "grad_norm": 1.0948512554168701, "learning_rate": 1.9075764259269646e-05, "loss": 0.4337, "step": 6235 }, { "epoch": 0.1380383366085485, "grad_norm": 1.6486941576004028, "learning_rate": 1.907430464391925e-05, "loss": 0.302, "step": 6240 }, { "epoch": 0.13814894425006174, "grad_norm": 1.4630076885223389, "learning_rate": 1.90728439328494e-05, "loss": 0.3265, "step": 6245 }, { "epoch": 0.13825955189157502, "grad_norm": 2.236219882965088, "learning_rate": 1.9071382126236474e-05, "loss": 0.291, "step": 6250 }, { "epoch": 0.1383701595330883, "grad_norm": 1.1956754922866821, "learning_rate": 1.9069919224256988e-05, "loss": 0.3009, "step": 6255 }, { "epoch": 0.13848076717460153, "grad_norm": 1.6970239877700806, "learning_rate": 1.9068455227087584e-05, "loss": 0.5145, "step": 6260 }, { "epoch": 0.1385913748161148, "grad_norm": 1.584176778793335, "learning_rate": 1.9066990134905045e-05, "loss": 0.4104, "step": 6265 }, { "epoch": 0.13870198245762805, "grad_norm": 1.4512194395065308, "learning_rate": 1.9065523947886272e-05, "loss": 0.3846, "step": 6270 }, { "epoch": 0.13881259009914132, "grad_norm": 1.7829527854919434, "learning_rate": 1.9064056666208312e-05, "loss": 0.2953, "step": 6275 }, { "epoch": 0.13892319774065456, "grad_norm": 1.6096175909042358, "learning_rate": 1.906258829004834e-05, "loss": 0.4463, "step": 6280 }, { "epoch": 0.13903380538216784, "grad_norm": 1.301340103149414, "learning_rate": 1.9061118819583657e-05, "loss": 0.4065, "step": 6285 }, { "epoch": 0.1391444130236811, "grad_norm": 1.6130318641662598, "learning_rate": 1.9059648254991704e-05, "loss": 0.4523, "step": 6290 }, { "epoch": 0.13925502066519435, "grad_norm": 1.1758326292037964, "learning_rate": 1.905817659645005e-05, "loss": 0.514, "step": 6295 }, { "epoch": 0.13936562830670762, "grad_norm": 2.2343978881835938, "learning_rate": 1.9056703844136404e-05, "loss": 0.3264, "step": 6300 }, { "epoch": 0.13947623594822087, "grad_norm": 1.1983753442764282, "learning_rate": 1.9055229998228592e-05, "loss": 0.4436, "step": 6305 }, { "epoch": 0.13958684358973414, "grad_norm": 1.2934294939041138, "learning_rate": 1.905375505890458e-05, "loss": 0.3488, "step": 6310 }, { "epoch": 0.13969745123124738, "grad_norm": 2.108458995819092, "learning_rate": 1.905227902634247e-05, "loss": 0.34, "step": 6315 }, { "epoch": 0.13980805887276065, "grad_norm": 1.2511210441589355, "learning_rate": 1.9050801900720498e-05, "loss": 0.4471, "step": 6320 }, { "epoch": 0.13991866651427393, "grad_norm": 1.7514848709106445, "learning_rate": 1.9049323682217012e-05, "loss": 0.3262, "step": 6325 }, { "epoch": 0.14002927415578717, "grad_norm": 2.0594868659973145, "learning_rate": 1.9047844371010522e-05, "loss": 0.4067, "step": 6330 }, { "epoch": 0.14013988179730044, "grad_norm": 1.793491005897522, "learning_rate": 1.9046363967279645e-05, "loss": 0.4332, "step": 6335 }, { "epoch": 0.14025048943881369, "grad_norm": 1.594115138053894, "learning_rate": 1.904488247120314e-05, "loss": 0.5135, "step": 6340 }, { "epoch": 0.14036109708032696, "grad_norm": 1.3992466926574707, "learning_rate": 1.90433998829599e-05, "loss": 0.4626, "step": 6345 }, { "epoch": 0.14047170472184023, "grad_norm": 1.1632264852523804, "learning_rate": 1.9041916202728945e-05, "loss": 0.404, "step": 6350 }, { "epoch": 0.14058231236335347, "grad_norm": 1.176788091659546, "learning_rate": 1.9040431430689433e-05, "loss": 0.5609, "step": 6355 }, { "epoch": 0.14069292000486674, "grad_norm": 0.9110123515129089, "learning_rate": 1.9038945567020644e-05, "loss": 0.358, "step": 6360 }, { "epoch": 0.14080352764638, "grad_norm": 1.6982550621032715, "learning_rate": 1.9037458611902e-05, "loss": 0.3441, "step": 6365 }, { "epoch": 0.14091413528789326, "grad_norm": 1.2494561672210693, "learning_rate": 1.9035970565513043e-05, "loss": 0.3405, "step": 6370 }, { "epoch": 0.1410247429294065, "grad_norm": 2.0026133060455322, "learning_rate": 1.9034481428033467e-05, "loss": 0.4173, "step": 6375 }, { "epoch": 0.14113535057091978, "grad_norm": 1.767876386642456, "learning_rate": 1.903299119964307e-05, "loss": 0.4136, "step": 6380 }, { "epoch": 0.14124595821243305, "grad_norm": 1.6695027351379395, "learning_rate": 1.903149988052181e-05, "loss": 0.2543, "step": 6385 }, { "epoch": 0.1413565658539463, "grad_norm": 1.3104326725006104, "learning_rate": 1.9030007470849757e-05, "loss": 0.3357, "step": 6390 }, { "epoch": 0.14146717349545956, "grad_norm": 1.2015600204467773, "learning_rate": 1.9028513970807115e-05, "loss": 0.4813, "step": 6395 }, { "epoch": 0.1415777811369728, "grad_norm": 1.4131771326065063, "learning_rate": 1.9027019380574236e-05, "loss": 0.3858, "step": 6400 }, { "epoch": 0.14168838877848608, "grad_norm": 1.6909987926483154, "learning_rate": 1.9025523700331576e-05, "loss": 0.2901, "step": 6405 }, { "epoch": 0.14179899641999932, "grad_norm": 2.0832631587982178, "learning_rate": 1.902402693025975e-05, "loss": 0.3456, "step": 6410 }, { "epoch": 0.1419096040615126, "grad_norm": 1.5406862497329712, "learning_rate": 1.9022529070539483e-05, "loss": 0.556, "step": 6415 }, { "epoch": 0.14202021170302587, "grad_norm": 1.339289903640747, "learning_rate": 1.9021030121351647e-05, "loss": 0.34, "step": 6420 }, { "epoch": 0.1421308193445391, "grad_norm": 1.0878491401672363, "learning_rate": 1.9019530082877246e-05, "loss": 0.2577, "step": 6425 }, { "epoch": 0.14224142698605238, "grad_norm": 1.5594605207443237, "learning_rate": 1.9018028955297395e-05, "loss": 0.3286, "step": 6430 }, { "epoch": 0.14235203462756563, "grad_norm": 1.3988142013549805, "learning_rate": 1.9016526738793363e-05, "loss": 0.3694, "step": 6435 }, { "epoch": 0.1424626422690789, "grad_norm": 1.3957304954528809, "learning_rate": 1.901502343354654e-05, "loss": 0.4363, "step": 6440 }, { "epoch": 0.14257324991059217, "grad_norm": 1.2032930850982666, "learning_rate": 1.901351903973845e-05, "loss": 0.34, "step": 6445 }, { "epoch": 0.14268385755210541, "grad_norm": 1.4661592245101929, "learning_rate": 1.901201355755075e-05, "loss": 0.4305, "step": 6450 }, { "epoch": 0.14279446519361869, "grad_norm": 1.505459189414978, "learning_rate": 1.901050698716522e-05, "loss": 0.6413, "step": 6455 }, { "epoch": 0.14290507283513193, "grad_norm": 1.583253026008606, "learning_rate": 1.9008999328763788e-05, "loss": 0.4727, "step": 6460 }, { "epoch": 0.1430156804766452, "grad_norm": 1.7597501277923584, "learning_rate": 1.90074905825285e-05, "loss": 0.3376, "step": 6465 }, { "epoch": 0.14312628811815845, "grad_norm": 5.588284969329834, "learning_rate": 1.900598074864153e-05, "loss": 0.4586, "step": 6470 }, { "epoch": 0.14323689575967172, "grad_norm": 1.2567272186279297, "learning_rate": 1.9004469827285197e-05, "loss": 0.2497, "step": 6475 }, { "epoch": 0.143347503401185, "grad_norm": 2.1746020317077637, "learning_rate": 1.900295781864194e-05, "loss": 0.4878, "step": 6480 }, { "epoch": 0.14345811104269823, "grad_norm": 1.1878607273101807, "learning_rate": 1.9001444722894337e-05, "loss": 0.3614, "step": 6485 }, { "epoch": 0.1435687186842115, "grad_norm": 1.3141433000564575, "learning_rate": 1.8999930540225094e-05, "loss": 0.3818, "step": 6490 }, { "epoch": 0.14367932632572475, "grad_norm": 1.6854026317596436, "learning_rate": 1.8998415270817042e-05, "loss": 0.2217, "step": 6495 }, { "epoch": 0.14378993396723802, "grad_norm": 0.9932371973991394, "learning_rate": 1.899689891485316e-05, "loss": 0.4348, "step": 6500 }, { "epoch": 0.14390054160875126, "grad_norm": 1.3374714851379395, "learning_rate": 1.8995381472516533e-05, "loss": 0.3518, "step": 6505 }, { "epoch": 0.14401114925026454, "grad_norm": 1.6760892868041992, "learning_rate": 1.8993862943990407e-05, "loss": 0.3994, "step": 6510 }, { "epoch": 0.1441217568917778, "grad_norm": 1.939066767692566, "learning_rate": 1.8992343329458132e-05, "loss": 0.3733, "step": 6515 }, { "epoch": 0.14423236453329105, "grad_norm": 1.7545340061187744, "learning_rate": 1.899082262910321e-05, "loss": 0.3743, "step": 6520 }, { "epoch": 0.14434297217480432, "grad_norm": 1.0378295183181763, "learning_rate": 1.8989300843109258e-05, "loss": 0.4636, "step": 6525 }, { "epoch": 0.14445357981631757, "grad_norm": 1.1857157945632935, "learning_rate": 1.8987777971660034e-05, "loss": 0.5184, "step": 6530 }, { "epoch": 0.14456418745783084, "grad_norm": 2.4193003177642822, "learning_rate": 1.8986254014939427e-05, "loss": 0.4228, "step": 6535 }, { "epoch": 0.14467479509934408, "grad_norm": 1.7387081384658813, "learning_rate": 1.898472897313145e-05, "loss": 0.5124, "step": 6540 }, { "epoch": 0.14478540274085736, "grad_norm": 1.103232741355896, "learning_rate": 1.898320284642025e-05, "loss": 0.3894, "step": 6545 }, { "epoch": 0.14489601038237063, "grad_norm": 1.6658273935317993, "learning_rate": 1.8981675634990114e-05, "loss": 0.2976, "step": 6550 }, { "epoch": 0.14500661802388387, "grad_norm": 0.9892645478248596, "learning_rate": 1.8980147339025445e-05, "loss": 0.3734, "step": 6555 }, { "epoch": 0.14511722566539714, "grad_norm": 1.6624363660812378, "learning_rate": 1.8978617958710787e-05, "loss": 0.4101, "step": 6560 }, { "epoch": 0.1452278333069104, "grad_norm": 1.8015763759613037, "learning_rate": 1.8977087494230815e-05, "loss": 0.4027, "step": 6565 }, { "epoch": 0.14533844094842366, "grad_norm": 1.2401626110076904, "learning_rate": 1.8975555945770326e-05, "loss": 0.2588, "step": 6570 }, { "epoch": 0.14544904858993693, "grad_norm": 1.0222845077514648, "learning_rate": 1.8974023313514258e-05, "loss": 0.4375, "step": 6575 }, { "epoch": 0.14555965623145017, "grad_norm": 1.3147770166397095, "learning_rate": 1.8972489597647676e-05, "loss": 0.4289, "step": 6580 }, { "epoch": 0.14567026387296345, "grad_norm": 1.474872350692749, "learning_rate": 1.8970954798355772e-05, "loss": 0.411, "step": 6585 }, { "epoch": 0.1457808715144767, "grad_norm": 1.039254069328308, "learning_rate": 1.8969418915823875e-05, "loss": 0.3845, "step": 6590 }, { "epoch": 0.14589147915598996, "grad_norm": 1.7793803215026855, "learning_rate": 1.8967881950237447e-05, "loss": 0.3085, "step": 6595 }, { "epoch": 0.1460020867975032, "grad_norm": 0.7509400844573975, "learning_rate": 1.896634390178207e-05, "loss": 0.3056, "step": 6600 }, { "epoch": 0.14611269443901648, "grad_norm": 2.207821846008301, "learning_rate": 1.8964804770643463e-05, "loss": 0.4609, "step": 6605 }, { "epoch": 0.14622330208052975, "grad_norm": 1.513350486755371, "learning_rate": 1.8963264557007477e-05, "loss": 0.3518, "step": 6610 }, { "epoch": 0.146333909722043, "grad_norm": 1.4499850273132324, "learning_rate": 1.8961723261060096e-05, "loss": 0.4711, "step": 6615 }, { "epoch": 0.14644451736355626, "grad_norm": 1.2395687103271484, "learning_rate": 1.8960180882987423e-05, "loss": 0.3352, "step": 6620 }, { "epoch": 0.1465551250050695, "grad_norm": 1.1299294233322144, "learning_rate": 1.8958637422975704e-05, "loss": 0.3127, "step": 6625 }, { "epoch": 0.14666573264658278, "grad_norm": 1.4264247417449951, "learning_rate": 1.8957092881211313e-05, "loss": 0.3744, "step": 6630 }, { "epoch": 0.14677634028809602, "grad_norm": 1.320819616317749, "learning_rate": 1.8955547257880753e-05, "loss": 0.55, "step": 6635 }, { "epoch": 0.1468869479296093, "grad_norm": 1.6293678283691406, "learning_rate": 1.8954000553170653e-05, "loss": 0.3708, "step": 6640 }, { "epoch": 0.14699755557112257, "grad_norm": 1.3193342685699463, "learning_rate": 1.895245276726778e-05, "loss": 0.3128, "step": 6645 }, { "epoch": 0.1471081632126358, "grad_norm": 1.4469798803329468, "learning_rate": 1.8950903900359035e-05, "loss": 0.5427, "step": 6650 }, { "epoch": 0.14721877085414908, "grad_norm": 1.1883511543273926, "learning_rate": 1.894935395263143e-05, "loss": 0.3329, "step": 6655 }, { "epoch": 0.14732937849566233, "grad_norm": 1.8640215396881104, "learning_rate": 1.894780292427213e-05, "loss": 0.4365, "step": 6660 }, { "epoch": 0.1474399861371756, "grad_norm": 1.4760690927505493, "learning_rate": 1.894625081546842e-05, "loss": 0.4949, "step": 6665 }, { "epoch": 0.14755059377868884, "grad_norm": 1.3189891576766968, "learning_rate": 1.8944697626407712e-05, "loss": 0.4639, "step": 6670 }, { "epoch": 0.14766120142020212, "grad_norm": 1.2984635829925537, "learning_rate": 1.894314335727756e-05, "loss": 0.3368, "step": 6675 }, { "epoch": 0.1477718090617154, "grad_norm": 1.1593267917633057, "learning_rate": 1.8941588008265638e-05, "loss": 0.3167, "step": 6680 }, { "epoch": 0.14788241670322863, "grad_norm": 1.4647868871688843, "learning_rate": 1.8940031579559753e-05, "loss": 0.4641, "step": 6685 }, { "epoch": 0.1479930243447419, "grad_norm": 1.1677926778793335, "learning_rate": 1.8938474071347842e-05, "loss": 0.2552, "step": 6690 }, { "epoch": 0.14810363198625515, "grad_norm": 1.3283621072769165, "learning_rate": 1.8936915483817978e-05, "loss": 0.4261, "step": 6695 }, { "epoch": 0.14821423962776842, "grad_norm": 1.5260214805603027, "learning_rate": 1.8935355817158358e-05, "loss": 0.4895, "step": 6700 }, { "epoch": 0.1483248472692817, "grad_norm": 1.1863666772842407, "learning_rate": 1.8933795071557314e-05, "loss": 0.363, "step": 6705 }, { "epoch": 0.14843545491079493, "grad_norm": 1.0363661050796509, "learning_rate": 1.8932233247203298e-05, "loss": 0.4177, "step": 6710 }, { "epoch": 0.1485460625523082, "grad_norm": 1.541280746459961, "learning_rate": 1.8930670344284906e-05, "loss": 0.4619, "step": 6715 }, { "epoch": 0.14865667019382145, "grad_norm": 1.3911850452423096, "learning_rate": 1.8929106362990858e-05, "loss": 0.3211, "step": 6720 }, { "epoch": 0.14876727783533472, "grad_norm": 1.554492473602295, "learning_rate": 1.892754130351e-05, "loss": 0.4075, "step": 6725 }, { "epoch": 0.14887788547684797, "grad_norm": 1.40540611743927, "learning_rate": 1.8925975166031315e-05, "loss": 0.4369, "step": 6730 }, { "epoch": 0.14898849311836124, "grad_norm": 1.3568195104599, "learning_rate": 1.8924407950743916e-05, "loss": 0.4758, "step": 6735 }, { "epoch": 0.1490991007598745, "grad_norm": 1.824575424194336, "learning_rate": 1.8922839657837044e-05, "loss": 0.3659, "step": 6740 }, { "epoch": 0.14920970840138775, "grad_norm": 1.9412847757339478, "learning_rate": 1.892127028750006e-05, "loss": 0.351, "step": 6745 }, { "epoch": 0.14932031604290102, "grad_norm": 1.493661642074585, "learning_rate": 1.891969983992248e-05, "loss": 0.3444, "step": 6750 }, { "epoch": 0.14943092368441427, "grad_norm": 1.27972412109375, "learning_rate": 1.8918128315293924e-05, "loss": 0.4341, "step": 6755 }, { "epoch": 0.14954153132592754, "grad_norm": 1.475452184677124, "learning_rate": 1.8916555713804156e-05, "loss": 0.3902, "step": 6760 }, { "epoch": 0.14965213896744078, "grad_norm": 1.5502803325653076, "learning_rate": 1.8914982035643067e-05, "loss": 0.4656, "step": 6765 }, { "epoch": 0.14976274660895406, "grad_norm": 1.1533275842666626, "learning_rate": 1.8913407281000683e-05, "loss": 0.3727, "step": 6770 }, { "epoch": 0.14987335425046733, "grad_norm": 1.2519168853759766, "learning_rate": 1.8911831450067147e-05, "loss": 0.4839, "step": 6775 }, { "epoch": 0.14998396189198057, "grad_norm": 1.3966728448867798, "learning_rate": 1.8910254543032746e-05, "loss": 0.3319, "step": 6780 }, { "epoch": 0.15009456953349384, "grad_norm": 1.5260014533996582, "learning_rate": 1.8908676560087887e-05, "loss": 0.4788, "step": 6785 }, { "epoch": 0.1502051771750071, "grad_norm": 1.1809232234954834, "learning_rate": 1.8907097501423117e-05, "loss": 0.4762, "step": 6790 }, { "epoch": 0.15031578481652036, "grad_norm": 1.6237883567810059, "learning_rate": 1.8905517367229102e-05, "loss": 0.5193, "step": 6795 }, { "epoch": 0.15042639245803363, "grad_norm": 1.566816806793213, "learning_rate": 1.890393615769664e-05, "loss": 0.4047, "step": 6800 }, { "epoch": 0.15053700009954687, "grad_norm": 2.788219451904297, "learning_rate": 1.8902353873016667e-05, "loss": 0.4668, "step": 6805 }, { "epoch": 0.15064760774106015, "grad_norm": 0.7059590816497803, "learning_rate": 1.8900770513380244e-05, "loss": 0.3458, "step": 6810 }, { "epoch": 0.1507582153825734, "grad_norm": 1.5357117652893066, "learning_rate": 1.8899186078978557e-05, "loss": 0.4077, "step": 6815 }, { "epoch": 0.15086882302408666, "grad_norm": 0.920581042766571, "learning_rate": 1.8897600570002924e-05, "loss": 0.3245, "step": 6820 }, { "epoch": 0.1509794306655999, "grad_norm": 1.2094290256500244, "learning_rate": 1.8896013986644803e-05, "loss": 0.4817, "step": 6825 }, { "epoch": 0.15109003830711318, "grad_norm": 1.5384165048599243, "learning_rate": 1.8894426329095767e-05, "loss": 0.3204, "step": 6830 }, { "epoch": 0.15120064594862645, "grad_norm": 1.1422796249389648, "learning_rate": 1.889283759754753e-05, "loss": 0.4663, "step": 6835 }, { "epoch": 0.1513112535901397, "grad_norm": 1.8978912830352783, "learning_rate": 1.8891247792191926e-05, "loss": 0.493, "step": 6840 }, { "epoch": 0.15142186123165297, "grad_norm": 0.9259054660797119, "learning_rate": 1.8889656913220924e-05, "loss": 0.3536, "step": 6845 }, { "epoch": 0.1515324688731662, "grad_norm": 1.4446499347686768, "learning_rate": 1.8888064960826624e-05, "loss": 0.3547, "step": 6850 }, { "epoch": 0.15164307651467948, "grad_norm": 1.5167491436004639, "learning_rate": 1.8886471935201257e-05, "loss": 0.4349, "step": 6855 }, { "epoch": 0.15175368415619273, "grad_norm": 1.55640709400177, "learning_rate": 1.8884877836537172e-05, "loss": 0.4138, "step": 6860 }, { "epoch": 0.151864291797706, "grad_norm": 2.0526506900787354, "learning_rate": 1.888328266502686e-05, "loss": 0.4447, "step": 6865 }, { "epoch": 0.15197489943921927, "grad_norm": 1.9261051416397095, "learning_rate": 1.888168642086294e-05, "loss": 0.3258, "step": 6870 }, { "epoch": 0.1520855070807325, "grad_norm": 1.8149884939193726, "learning_rate": 1.8880089104238157e-05, "loss": 0.421, "step": 6875 }, { "epoch": 0.15219611472224578, "grad_norm": 1.0715018510818481, "learning_rate": 1.8878490715345387e-05, "loss": 0.3102, "step": 6880 }, { "epoch": 0.15230672236375903, "grad_norm": 1.201961636543274, "learning_rate": 1.887689125437763e-05, "loss": 0.3448, "step": 6885 }, { "epoch": 0.1524173300052723, "grad_norm": 1.1536247730255127, "learning_rate": 1.8875290721528023e-05, "loss": 0.4947, "step": 6890 }, { "epoch": 0.15252793764678554, "grad_norm": 1.2438040971755981, "learning_rate": 1.887368911698983e-05, "loss": 0.2927, "step": 6895 }, { "epoch": 0.15263854528829882, "grad_norm": 0.973251461982727, "learning_rate": 1.887208644095645e-05, "loss": 0.3309, "step": 6900 }, { "epoch": 0.1527491529298121, "grad_norm": 2.151782751083374, "learning_rate": 1.88704826936214e-05, "loss": 0.4537, "step": 6905 }, { "epoch": 0.15285976057132533, "grad_norm": 2.0673837661743164, "learning_rate": 1.8868877875178326e-05, "loss": 0.4892, "step": 6910 }, { "epoch": 0.1529703682128386, "grad_norm": 1.4575204849243164, "learning_rate": 1.8867271985821022e-05, "loss": 0.6147, "step": 6915 }, { "epoch": 0.15308097585435185, "grad_norm": 1.4364303350448608, "learning_rate": 1.886566502574339e-05, "loss": 0.4888, "step": 6920 }, { "epoch": 0.15319158349586512, "grad_norm": 2.0946617126464844, "learning_rate": 1.886405699513947e-05, "loss": 0.4178, "step": 6925 }, { "epoch": 0.1533021911373784, "grad_norm": 1.8822555541992188, "learning_rate": 1.886244789420344e-05, "loss": 0.4221, "step": 6930 }, { "epoch": 0.15341279877889163, "grad_norm": 0.9172224402427673, "learning_rate": 1.886083772312959e-05, "loss": 0.3308, "step": 6935 }, { "epoch": 0.1535234064204049, "grad_norm": 1.6786541938781738, "learning_rate": 1.8859226482112344e-05, "loss": 0.349, "step": 6940 }, { "epoch": 0.15363401406191815, "grad_norm": 1.1701561212539673, "learning_rate": 1.8857614171346273e-05, "loss": 0.4899, "step": 6945 }, { "epoch": 0.15374462170343142, "grad_norm": 1.2971248626708984, "learning_rate": 1.8856000791026052e-05, "loss": 0.5411, "step": 6950 }, { "epoch": 0.15385522934494467, "grad_norm": 1.3721681833267212, "learning_rate": 1.8854386341346497e-05, "loss": 0.4374, "step": 6955 }, { "epoch": 0.15396583698645794, "grad_norm": 1.1324117183685303, "learning_rate": 1.885277082250255e-05, "loss": 0.3396, "step": 6960 }, { "epoch": 0.1540764446279712, "grad_norm": 1.5795958042144775, "learning_rate": 1.8851154234689297e-05, "loss": 0.4132, "step": 6965 }, { "epoch": 0.15418705226948445, "grad_norm": 1.0724815130233765, "learning_rate": 1.8849536578101926e-05, "loss": 0.4099, "step": 6970 }, { "epoch": 0.15429765991099773, "grad_norm": 1.163935899734497, "learning_rate": 1.8847917852935777e-05, "loss": 0.2809, "step": 6975 }, { "epoch": 0.15440826755251097, "grad_norm": 2.16550612449646, "learning_rate": 1.8846298059386304e-05, "loss": 0.4865, "step": 6980 }, { "epoch": 0.15451887519402424, "grad_norm": 2.5563864707946777, "learning_rate": 1.8844677197649107e-05, "loss": 0.3873, "step": 6985 }, { "epoch": 0.15462948283553749, "grad_norm": 0.8434544801712036, "learning_rate": 1.8843055267919895e-05, "loss": 0.3624, "step": 6990 }, { "epoch": 0.15474009047705076, "grad_norm": 1.3501099348068237, "learning_rate": 1.8841432270394516e-05, "loss": 0.4563, "step": 6995 }, { "epoch": 0.15485069811856403, "grad_norm": 1.5824140310287476, "learning_rate": 1.8839808205268952e-05, "loss": 0.4779, "step": 7000 }, { "epoch": 0.15496130576007727, "grad_norm": 1.1683738231658936, "learning_rate": 1.8838183072739306e-05, "loss": 0.4113, "step": 7005 }, { "epoch": 0.15507191340159054, "grad_norm": 1.4356420040130615, "learning_rate": 1.883655687300181e-05, "loss": 0.3554, "step": 7010 }, { "epoch": 0.1551825210431038, "grad_norm": 2.205475330352783, "learning_rate": 1.8834929606252828e-05, "loss": 0.2737, "step": 7015 }, { "epoch": 0.15529312868461706, "grad_norm": 1.3508440256118774, "learning_rate": 1.8833301272688858e-05, "loss": 0.4682, "step": 7020 }, { "epoch": 0.15540373632613033, "grad_norm": 2.110431671142578, "learning_rate": 1.883167187250651e-05, "loss": 0.3252, "step": 7025 }, { "epoch": 0.15551434396764358, "grad_norm": 1.377989411354065, "learning_rate": 1.8830041405902542e-05, "loss": 0.4153, "step": 7030 }, { "epoch": 0.15562495160915685, "grad_norm": 1.8716366291046143, "learning_rate": 1.882840987307383e-05, "loss": 0.4817, "step": 7035 }, { "epoch": 0.1557355592506701, "grad_norm": 2.1737489700317383, "learning_rate": 1.8826777274217377e-05, "loss": 0.4265, "step": 7040 }, { "epoch": 0.15584616689218336, "grad_norm": 1.0410735607147217, "learning_rate": 1.8825143609530324e-05, "loss": 0.3665, "step": 7045 }, { "epoch": 0.1559567745336966, "grad_norm": 1.1508889198303223, "learning_rate": 1.8823508879209935e-05, "loss": 0.4382, "step": 7050 }, { "epoch": 0.15606738217520988, "grad_norm": 2.1191341876983643, "learning_rate": 1.88218730834536e-05, "loss": 0.4306, "step": 7055 }, { "epoch": 0.15617798981672315, "grad_norm": 1.030676007270813, "learning_rate": 1.8820236222458843e-05, "loss": 0.4323, "step": 7060 }, { "epoch": 0.1562885974582364, "grad_norm": 1.7286893129348755, "learning_rate": 1.8818598296423314e-05, "loss": 0.4569, "step": 7065 }, { "epoch": 0.15639920509974967, "grad_norm": 1.960218906402588, "learning_rate": 1.8816959305544795e-05, "loss": 0.3092, "step": 7070 }, { "epoch": 0.1565098127412629, "grad_norm": 1.722731590270996, "learning_rate": 1.881531925002119e-05, "loss": 0.237, "step": 7075 }, { "epoch": 0.15662042038277618, "grad_norm": 0.9747259616851807, "learning_rate": 1.881367813005053e-05, "loss": 0.3156, "step": 7080 }, { "epoch": 0.15673102802428943, "grad_norm": 1.5503073930740356, "learning_rate": 1.8812035945830997e-05, "loss": 0.395, "step": 7085 }, { "epoch": 0.1568416356658027, "grad_norm": 1.4238839149475098, "learning_rate": 1.8810392697560865e-05, "loss": 0.4318, "step": 7090 }, { "epoch": 0.15695224330731597, "grad_norm": 1.3499176502227783, "learning_rate": 1.8808748385438568e-05, "loss": 0.5666, "step": 7095 }, { "epoch": 0.1570628509488292, "grad_norm": 1.6005628108978271, "learning_rate": 1.8807103009662647e-05, "loss": 0.3817, "step": 7100 }, { "epoch": 0.15717345859034249, "grad_norm": 1.3799035549163818, "learning_rate": 1.8805456570431788e-05, "loss": 0.4756, "step": 7105 }, { "epoch": 0.15728406623185573, "grad_norm": 2.653916358947754, "learning_rate": 1.8803809067944798e-05, "loss": 0.3869, "step": 7110 }, { "epoch": 0.157394673873369, "grad_norm": 1.7716879844665527, "learning_rate": 1.880216050240061e-05, "loss": 0.2573, "step": 7115 }, { "epoch": 0.15750528151488224, "grad_norm": 1.6506370306015015, "learning_rate": 1.8800510873998283e-05, "loss": 0.4831, "step": 7120 }, { "epoch": 0.15761588915639552, "grad_norm": 1.246965765953064, "learning_rate": 1.8798860182937017e-05, "loss": 0.3906, "step": 7125 }, { "epoch": 0.1577264967979088, "grad_norm": 2.105010747909546, "learning_rate": 1.8797208429416128e-05, "loss": 0.4496, "step": 7130 }, { "epoch": 0.15783710443942203, "grad_norm": 0.9253627061843872, "learning_rate": 1.879555561363507e-05, "loss": 0.4697, "step": 7135 }, { "epoch": 0.1579477120809353, "grad_norm": 1.4181708097457886, "learning_rate": 1.8793901735793414e-05, "loss": 0.4501, "step": 7140 }, { "epoch": 0.15805831972244855, "grad_norm": 1.462512493133545, "learning_rate": 1.8792246796090866e-05, "loss": 0.3376, "step": 7145 }, { "epoch": 0.15816892736396182, "grad_norm": 1.8517863750457764, "learning_rate": 1.8790590794727262e-05, "loss": 0.3676, "step": 7150 }, { "epoch": 0.1582795350054751, "grad_norm": 1.5999600887298584, "learning_rate": 1.8788933731902565e-05, "loss": 0.3866, "step": 7155 }, { "epoch": 0.15839014264698834, "grad_norm": 1.6431505680084229, "learning_rate": 1.8787275607816857e-05, "loss": 0.3984, "step": 7160 }, { "epoch": 0.1585007502885016, "grad_norm": 1.485962152481079, "learning_rate": 1.8785616422670365e-05, "loss": 0.453, "step": 7165 }, { "epoch": 0.15861135793001485, "grad_norm": 1.1868473291397095, "learning_rate": 1.8783956176663427e-05, "loss": 0.4142, "step": 7170 }, { "epoch": 0.15872196557152812, "grad_norm": 1.145594835281372, "learning_rate": 1.8782294869996524e-05, "loss": 0.413, "step": 7175 }, { "epoch": 0.15883257321304137, "grad_norm": 1.6261214017868042, "learning_rate": 1.8780632502870256e-05, "loss": 0.4087, "step": 7180 }, { "epoch": 0.15894318085455464, "grad_norm": 1.5418369770050049, "learning_rate": 1.8778969075485353e-05, "loss": 0.5383, "step": 7185 }, { "epoch": 0.1590537884960679, "grad_norm": 2.021383762359619, "learning_rate": 1.8777304588042674e-05, "loss": 0.5217, "step": 7190 }, { "epoch": 0.15916439613758115, "grad_norm": 1.3313496112823486, "learning_rate": 1.8775639040743203e-05, "loss": 0.4699, "step": 7195 }, { "epoch": 0.15927500377909443, "grad_norm": 1.4707227945327759, "learning_rate": 1.8773972433788056e-05, "loss": 0.377, "step": 7200 }, { "epoch": 0.15938561142060767, "grad_norm": 1.2150722742080688, "learning_rate": 1.8772304767378478e-05, "loss": 0.5147, "step": 7205 }, { "epoch": 0.15949621906212094, "grad_norm": 1.749758243560791, "learning_rate": 1.8770636041715832e-05, "loss": 0.4718, "step": 7210 }, { "epoch": 0.15960682670363419, "grad_norm": 1.0484381914138794, "learning_rate": 1.8768966257001627e-05, "loss": 0.4358, "step": 7215 }, { "epoch": 0.15971743434514746, "grad_norm": 1.4892733097076416, "learning_rate": 1.8767295413437478e-05, "loss": 0.4723, "step": 7220 }, { "epoch": 0.15982804198666073, "grad_norm": 1.0045056343078613, "learning_rate": 1.8765623511225145e-05, "loss": 0.4236, "step": 7225 }, { "epoch": 0.15993864962817397, "grad_norm": 1.5631675720214844, "learning_rate": 1.8763950550566505e-05, "loss": 0.3983, "step": 7230 }, { "epoch": 0.16004925726968725, "grad_norm": 1.415142297744751, "learning_rate": 1.8762276531663576e-05, "loss": 0.5198, "step": 7235 }, { "epoch": 0.1601598649112005, "grad_norm": 1.9777451753616333, "learning_rate": 1.8760601454718485e-05, "loss": 0.5071, "step": 7240 }, { "epoch": 0.16027047255271376, "grad_norm": 1.551720380783081, "learning_rate": 1.87589253199335e-05, "loss": 0.3274, "step": 7245 }, { "epoch": 0.160381080194227, "grad_norm": 1.4934744834899902, "learning_rate": 1.8757248127511024e-05, "loss": 0.3868, "step": 7250 }, { "epoch": 0.16049168783574028, "grad_norm": 1.5638487339019775, "learning_rate": 1.8755569877653563e-05, "loss": 0.4406, "step": 7255 }, { "epoch": 0.16060229547725355, "grad_norm": 1.5387717485427856, "learning_rate": 1.8753890570563775e-05, "loss": 0.4425, "step": 7260 }, { "epoch": 0.1607129031187668, "grad_norm": 1.844288945198059, "learning_rate": 1.8752210206444427e-05, "loss": 0.3978, "step": 7265 }, { "epoch": 0.16082351076028006, "grad_norm": 1.4657952785491943, "learning_rate": 1.875052878549843e-05, "loss": 0.456, "step": 7270 }, { "epoch": 0.1609341184017933, "grad_norm": 1.6670246124267578, "learning_rate": 1.8748846307928814e-05, "loss": 0.231, "step": 7275 }, { "epoch": 0.16104472604330658, "grad_norm": 1.1252118349075317, "learning_rate": 1.8747162773938735e-05, "loss": 0.3256, "step": 7280 }, { "epoch": 0.16115533368481985, "grad_norm": 1.3518365621566772, "learning_rate": 1.8745478183731477e-05, "loss": 0.5264, "step": 7285 }, { "epoch": 0.1612659413263331, "grad_norm": 1.1151585578918457, "learning_rate": 1.8743792537510462e-05, "loss": 0.4136, "step": 7290 }, { "epoch": 0.16137654896784637, "grad_norm": 4.6722412109375, "learning_rate": 1.8742105835479226e-05, "loss": 0.3702, "step": 7295 }, { "epoch": 0.1614871566093596, "grad_norm": 1.3794221878051758, "learning_rate": 1.8740418077841437e-05, "loss": 0.2637, "step": 7300 }, { "epoch": 0.16159776425087288, "grad_norm": 1.382637858390808, "learning_rate": 1.8738729264800893e-05, "loss": 0.3662, "step": 7305 }, { "epoch": 0.16170837189238613, "grad_norm": 1.413165807723999, "learning_rate": 1.8737039396561514e-05, "loss": 0.3976, "step": 7310 }, { "epoch": 0.1618189795338994, "grad_norm": 2.0964319705963135, "learning_rate": 1.8735348473327355e-05, "loss": 0.4185, "step": 7315 }, { "epoch": 0.16192958717541267, "grad_norm": 1.6047989130020142, "learning_rate": 1.8733656495302597e-05, "loss": 0.3735, "step": 7320 }, { "epoch": 0.16204019481692591, "grad_norm": 1.6694560050964355, "learning_rate": 1.873196346269154e-05, "loss": 0.4954, "step": 7325 }, { "epoch": 0.16215080245843919, "grad_norm": 1.4806987047195435, "learning_rate": 1.873026937569862e-05, "loss": 0.361, "step": 7330 }, { "epoch": 0.16226141009995243, "grad_norm": 1.2070176601409912, "learning_rate": 1.8728574234528394e-05, "loss": 0.3446, "step": 7335 }, { "epoch": 0.1623720177414657, "grad_norm": 1.7378181219100952, "learning_rate": 1.8726878039385553e-05, "loss": 0.3633, "step": 7340 }, { "epoch": 0.16248262538297895, "grad_norm": 1.2415192127227783, "learning_rate": 1.8725180790474914e-05, "loss": 0.4932, "step": 7345 }, { "epoch": 0.16259323302449222, "grad_norm": 1.1045855283737183, "learning_rate": 1.8723482488001416e-05, "loss": 0.3755, "step": 7350 }, { "epoch": 0.1627038406660055, "grad_norm": 1.1752934455871582, "learning_rate": 1.872178313217013e-05, "loss": 0.5194, "step": 7355 }, { "epoch": 0.16281444830751873, "grad_norm": 1.5265499353408813, "learning_rate": 1.8720082723186252e-05, "loss": 0.5175, "step": 7360 }, { "epoch": 0.162925055949032, "grad_norm": 1.2329999208450317, "learning_rate": 1.8718381261255107e-05, "loss": 0.4905, "step": 7365 }, { "epoch": 0.16303566359054525, "grad_norm": 1.035437822341919, "learning_rate": 1.8716678746582144e-05, "loss": 0.3635, "step": 7370 }, { "epoch": 0.16314627123205852, "grad_norm": 1.48918879032135, "learning_rate": 1.8714975179372943e-05, "loss": 0.33, "step": 7375 }, { "epoch": 0.1632568788735718, "grad_norm": 1.6899341344833374, "learning_rate": 1.8713270559833206e-05, "loss": 0.4087, "step": 7380 }, { "epoch": 0.16336748651508504, "grad_norm": 1.0937985181808472, "learning_rate": 1.871156488816877e-05, "loss": 0.3024, "step": 7385 }, { "epoch": 0.1634780941565983, "grad_norm": 1.2399966716766357, "learning_rate": 1.8709858164585593e-05, "loss": 0.3176, "step": 7390 }, { "epoch": 0.16358870179811155, "grad_norm": 1.617626428604126, "learning_rate": 1.8708150389289757e-05, "loss": 0.4013, "step": 7395 }, { "epoch": 0.16369930943962482, "grad_norm": 2.7490668296813965, "learning_rate": 1.8706441562487482e-05, "loss": 0.2253, "step": 7400 }, { "epoch": 0.16380991708113807, "grad_norm": 1.3291431665420532, "learning_rate": 1.8704731684385104e-05, "loss": 0.4433, "step": 7405 }, { "epoch": 0.16392052472265134, "grad_norm": 1.211215853691101, "learning_rate": 1.870302075518909e-05, "loss": 0.5893, "step": 7410 }, { "epoch": 0.1640311323641646, "grad_norm": 1.9345746040344238, "learning_rate": 1.8701308775106035e-05, "loss": 0.3977, "step": 7415 }, { "epoch": 0.16414174000567786, "grad_norm": 1.164852261543274, "learning_rate": 1.8699595744342663e-05, "loss": 0.4176, "step": 7420 }, { "epoch": 0.16425234764719113, "grad_norm": 1.3996143341064453, "learning_rate": 1.869788166310582e-05, "loss": 0.4726, "step": 7425 }, { "epoch": 0.16436295528870437, "grad_norm": 1.6336380243301392, "learning_rate": 1.8696166531602475e-05, "loss": 0.4377, "step": 7430 }, { "epoch": 0.16447356293021764, "grad_norm": 1.1813900470733643, "learning_rate": 1.8694450350039735e-05, "loss": 0.2787, "step": 7435 }, { "epoch": 0.1645841705717309, "grad_norm": 1.6729168891906738, "learning_rate": 1.869273311862483e-05, "loss": 0.5754, "step": 7440 }, { "epoch": 0.16469477821324416, "grad_norm": 1.4368219375610352, "learning_rate": 1.8691014837565117e-05, "loss": 0.3581, "step": 7445 }, { "epoch": 0.16480538585475743, "grad_norm": 0.8685466051101685, "learning_rate": 1.868929550706807e-05, "loss": 0.3478, "step": 7450 }, { "epoch": 0.16491599349627067, "grad_norm": 1.0771723985671997, "learning_rate": 1.8687575127341302e-05, "loss": 0.4904, "step": 7455 }, { "epoch": 0.16502660113778395, "grad_norm": 1.4873822927474976, "learning_rate": 1.868585369859255e-05, "loss": 0.3575, "step": 7460 }, { "epoch": 0.1651372087792972, "grad_norm": 1.8300514221191406, "learning_rate": 1.868413122102967e-05, "loss": 0.5579, "step": 7465 }, { "epoch": 0.16524781642081046, "grad_norm": 2.353144884109497, "learning_rate": 1.8682407694860658e-05, "loss": 0.3093, "step": 7470 }, { "epoch": 0.1653584240623237, "grad_norm": 1.178699254989624, "learning_rate": 1.8680683120293626e-05, "loss": 0.3202, "step": 7475 }, { "epoch": 0.16546903170383698, "grad_norm": 1.5245779752731323, "learning_rate": 1.8678957497536817e-05, "loss": 0.4672, "step": 7480 }, { "epoch": 0.16557963934535025, "grad_norm": 1.1630855798721313, "learning_rate": 1.8677230826798594e-05, "loss": 0.4317, "step": 7485 }, { "epoch": 0.1656902469868635, "grad_norm": 1.4707622528076172, "learning_rate": 1.867550310828746e-05, "loss": 0.2809, "step": 7490 }, { "epoch": 0.16580085462837676, "grad_norm": 1.0620827674865723, "learning_rate": 1.867377434221203e-05, "loss": 0.4266, "step": 7495 }, { "epoch": 0.16591146226989, "grad_norm": 1.6542867422103882, "learning_rate": 1.8672044528781057e-05, "loss": 0.2275, "step": 7500 }, { "epoch": 0.16602206991140328, "grad_norm": 1.3178889751434326, "learning_rate": 1.8670313668203413e-05, "loss": 0.3098, "step": 7505 }, { "epoch": 0.16613267755291655, "grad_norm": 0.9473133087158203, "learning_rate": 1.8668581760688097e-05, "loss": 0.359, "step": 7510 }, { "epoch": 0.1662432851944298, "grad_norm": 1.3210045099258423, "learning_rate": 1.866684880644424e-05, "loss": 0.4341, "step": 7515 }, { "epoch": 0.16635389283594307, "grad_norm": 1.9307429790496826, "learning_rate": 1.866511480568109e-05, "loss": 0.5066, "step": 7520 }, { "epoch": 0.1664645004774563, "grad_norm": 1.6515934467315674, "learning_rate": 1.8663379758608035e-05, "loss": 0.5104, "step": 7525 }, { "epoch": 0.16657510811896958, "grad_norm": 1.308433175086975, "learning_rate": 1.8661643665434578e-05, "loss": 0.3422, "step": 7530 }, { "epoch": 0.16668571576048283, "grad_norm": 1.7011785507202148, "learning_rate": 1.865990652637035e-05, "loss": 0.4167, "step": 7535 }, { "epoch": 0.1667963234019961, "grad_norm": 2.2924108505249023, "learning_rate": 1.8658168341625108e-05, "loss": 0.3791, "step": 7540 }, { "epoch": 0.16690693104350937, "grad_norm": 2.1006078720092773, "learning_rate": 1.8656429111408742e-05, "loss": 0.4058, "step": 7545 }, { "epoch": 0.16701753868502262, "grad_norm": 1.1204835176467896, "learning_rate": 1.8654688835931262e-05, "loss": 0.3579, "step": 7550 }, { "epoch": 0.1671281463265359, "grad_norm": 2.2628591060638428, "learning_rate": 1.8652947515402805e-05, "loss": 0.4131, "step": 7555 }, { "epoch": 0.16723875396804913, "grad_norm": 0.9728931784629822, "learning_rate": 1.8651205150033637e-05, "loss": 0.2973, "step": 7560 }, { "epoch": 0.1673493616095624, "grad_norm": 1.4875763654708862, "learning_rate": 1.8649461740034148e-05, "loss": 0.4051, "step": 7565 }, { "epoch": 0.16745996925107565, "grad_norm": 1.4880952835083008, "learning_rate": 1.864771728561485e-05, "loss": 0.5484, "step": 7570 }, { "epoch": 0.16757057689258892, "grad_norm": 1.3697905540466309, "learning_rate": 1.8645971786986387e-05, "loss": 0.4242, "step": 7575 }, { "epoch": 0.1676811845341022, "grad_norm": 1.5144253969192505, "learning_rate": 1.8644225244359527e-05, "loss": 0.4573, "step": 7580 }, { "epoch": 0.16779179217561543, "grad_norm": 1.3519387245178223, "learning_rate": 1.864247765794517e-05, "loss": 0.4291, "step": 7585 }, { "epoch": 0.1679023998171287, "grad_norm": 1.1368902921676636, "learning_rate": 1.8640729027954336e-05, "loss": 0.311, "step": 7590 }, { "epoch": 0.16801300745864195, "grad_norm": 1.51514732837677, "learning_rate": 1.863897935459816e-05, "loss": 0.4639, "step": 7595 }, { "epoch": 0.16812361510015522, "grad_norm": 1.8248847723007202, "learning_rate": 1.8637228638087925e-05, "loss": 0.3573, "step": 7600 }, { "epoch": 0.1682342227416685, "grad_norm": 1.5367838144302368, "learning_rate": 1.8635476878635027e-05, "loss": 0.4663, "step": 7605 }, { "epoch": 0.16834483038318174, "grad_norm": 0.9879417419433594, "learning_rate": 1.863372407645099e-05, "loss": 0.3755, "step": 7610 }, { "epoch": 0.168455438024695, "grad_norm": 0.8046427369117737, "learning_rate": 1.8631970231747467e-05, "loss": 0.541, "step": 7615 }, { "epoch": 0.16856604566620825, "grad_norm": 1.6074875593185425, "learning_rate": 1.863021534473623e-05, "loss": 0.2535, "step": 7620 }, { "epoch": 0.16867665330772152, "grad_norm": 1.3108879327774048, "learning_rate": 1.8628459415629185e-05, "loss": 0.5134, "step": 7625 }, { "epoch": 0.16878726094923477, "grad_norm": 1.1715528964996338, "learning_rate": 1.8626702444638356e-05, "loss": 0.3752, "step": 7630 }, { "epoch": 0.16889786859074804, "grad_norm": 1.3703782558441162, "learning_rate": 1.86249444319759e-05, "loss": 0.3476, "step": 7635 }, { "epoch": 0.1690084762322613, "grad_norm": 1.4628915786743164, "learning_rate": 1.8623185377854093e-05, "loss": 0.4037, "step": 7640 }, { "epoch": 0.16911908387377456, "grad_norm": 1.456699252128601, "learning_rate": 1.8621425282485345e-05, "loss": 0.3919, "step": 7645 }, { "epoch": 0.16922969151528783, "grad_norm": 1.2147672176361084, "learning_rate": 1.861966414608218e-05, "loss": 0.3683, "step": 7650 }, { "epoch": 0.16934029915680107, "grad_norm": 1.692923665046692, "learning_rate": 1.8617901968857264e-05, "loss": 0.3889, "step": 7655 }, { "epoch": 0.16945090679831434, "grad_norm": 1.215653419494629, "learning_rate": 1.8616138751023372e-05, "loss": 0.4161, "step": 7660 }, { "epoch": 0.1695615144398276, "grad_norm": 1.413521409034729, "learning_rate": 1.8614374492793416e-05, "loss": 0.2704, "step": 7665 }, { "epoch": 0.16967212208134086, "grad_norm": 1.5628852844238281, "learning_rate": 1.861260919438042e-05, "loss": 0.4273, "step": 7670 }, { "epoch": 0.16978272972285413, "grad_norm": 1.0444245338439941, "learning_rate": 1.861084285599756e-05, "loss": 0.4153, "step": 7675 }, { "epoch": 0.16989333736436737, "grad_norm": 1.8573321104049683, "learning_rate": 1.860907547785811e-05, "loss": 0.3515, "step": 7680 }, { "epoch": 0.17000394500588065, "grad_norm": 1.2788262367248535, "learning_rate": 1.860730706017548e-05, "loss": 0.4669, "step": 7685 }, { "epoch": 0.1701145526473939, "grad_norm": 3.562194347381592, "learning_rate": 1.8605537603163208e-05, "loss": 0.4464, "step": 7690 }, { "epoch": 0.17022516028890716, "grad_norm": 0.9765413403511047, "learning_rate": 1.860376710703496e-05, "loss": 0.6358, "step": 7695 }, { "epoch": 0.1703357679304204, "grad_norm": 1.374396800994873, "learning_rate": 1.8601995572004513e-05, "loss": 0.3771, "step": 7700 }, { "epoch": 0.17044637557193368, "grad_norm": 1.3628512620925903, "learning_rate": 1.8600222998285786e-05, "loss": 0.3924, "step": 7705 }, { "epoch": 0.17055698321344695, "grad_norm": 1.0112675428390503, "learning_rate": 1.8598449386092815e-05, "loss": 0.4432, "step": 7710 }, { "epoch": 0.1706675908549602, "grad_norm": 1.3624294996261597, "learning_rate": 1.8596674735639766e-05, "loss": 0.4798, "step": 7715 }, { "epoch": 0.17077819849647347, "grad_norm": 1.8699549436569214, "learning_rate": 1.8594899047140925e-05, "loss": 0.517, "step": 7720 }, { "epoch": 0.1708888061379867, "grad_norm": 1.6584019660949707, "learning_rate": 1.8593122320810703e-05, "loss": 0.4957, "step": 7725 }, { "epoch": 0.17099941377949998, "grad_norm": 0.7982273101806641, "learning_rate": 1.8591344556863644e-05, "loss": 0.3394, "step": 7730 }, { "epoch": 0.17111002142101325, "grad_norm": 1.961687445640564, "learning_rate": 1.858956575551441e-05, "loss": 0.3532, "step": 7735 }, { "epoch": 0.1712206290625265, "grad_norm": 1.3549833297729492, "learning_rate": 1.858778591697779e-05, "loss": 0.4188, "step": 7740 }, { "epoch": 0.17133123670403977, "grad_norm": 1.4380431175231934, "learning_rate": 1.8586005041468702e-05, "loss": 0.2524, "step": 7745 }, { "epoch": 0.171441844345553, "grad_norm": 1.3808265924453735, "learning_rate": 1.8584223129202184e-05, "loss": 0.4661, "step": 7750 }, { "epoch": 0.17155245198706628, "grad_norm": 1.4743515253067017, "learning_rate": 1.8582440180393403e-05, "loss": 0.344, "step": 7755 }, { "epoch": 0.17166305962857953, "grad_norm": 1.4708607196807861, "learning_rate": 1.8580656195257645e-05, "loss": 0.3284, "step": 7760 }, { "epoch": 0.1717736672700928, "grad_norm": 1.1553980112075806, "learning_rate": 1.8578871174010332e-05, "loss": 0.3339, "step": 7765 }, { "epoch": 0.17188427491160607, "grad_norm": 0.9504607915878296, "learning_rate": 1.8577085116867e-05, "loss": 0.2951, "step": 7770 }, { "epoch": 0.17199488255311932, "grad_norm": 1.1548831462860107, "learning_rate": 1.8575298024043314e-05, "loss": 0.4135, "step": 7775 }, { "epoch": 0.1721054901946326, "grad_norm": 1.3247015476226807, "learning_rate": 1.857350989575507e-05, "loss": 0.4406, "step": 7780 }, { "epoch": 0.17221609783614583, "grad_norm": 1.9327797889709473, "learning_rate": 1.8571720732218182e-05, "loss": 0.3191, "step": 7785 }, { "epoch": 0.1723267054776591, "grad_norm": 1.330474615097046, "learning_rate": 1.856993053364869e-05, "loss": 0.4331, "step": 7790 }, { "epoch": 0.17243731311917235, "grad_norm": 1.351824164390564, "learning_rate": 1.8568139300262754e-05, "loss": 0.4249, "step": 7795 }, { "epoch": 0.17254792076068562, "grad_norm": 0.918010950088501, "learning_rate": 1.856634703227668e-05, "loss": 0.4319, "step": 7800 }, { "epoch": 0.1726585284021989, "grad_norm": 0.8457802534103394, "learning_rate": 1.8564553729906868e-05, "loss": 0.2361, "step": 7805 }, { "epoch": 0.17276913604371213, "grad_norm": 1.1923693418502808, "learning_rate": 1.856275939336987e-05, "loss": 0.2914, "step": 7810 }, { "epoch": 0.1728797436852254, "grad_norm": 1.6569679975509644, "learning_rate": 1.8560964022882343e-05, "loss": 0.3746, "step": 7815 }, { "epoch": 0.17299035132673865, "grad_norm": 0.9338164329528809, "learning_rate": 1.8559167618661084e-05, "loss": 0.456, "step": 7820 }, { "epoch": 0.17310095896825192, "grad_norm": 1.3295737504959106, "learning_rate": 1.8557370180923006e-05, "loss": 0.4863, "step": 7825 }, { "epoch": 0.17321156660976517, "grad_norm": 1.761412501335144, "learning_rate": 1.8555571709885146e-05, "loss": 0.5266, "step": 7830 }, { "epoch": 0.17332217425127844, "grad_norm": 0.9887244701385498, "learning_rate": 1.8553772205764674e-05, "loss": 0.2651, "step": 7835 }, { "epoch": 0.1734327818927917, "grad_norm": 1.559511423110962, "learning_rate": 1.8551971668778877e-05, "loss": 0.2755, "step": 7840 }, { "epoch": 0.17354338953430495, "grad_norm": 0.9661475419998169, "learning_rate": 1.855017009914517e-05, "loss": 0.4721, "step": 7845 }, { "epoch": 0.17365399717581823, "grad_norm": 1.350678563117981, "learning_rate": 1.854836749708109e-05, "loss": 0.2534, "step": 7850 }, { "epoch": 0.17376460481733147, "grad_norm": 1.4461195468902588, "learning_rate": 1.8546563862804303e-05, "loss": 0.3772, "step": 7855 }, { "epoch": 0.17387521245884474, "grad_norm": 2.346102237701416, "learning_rate": 1.8544759196532596e-05, "loss": 0.3738, "step": 7860 }, { "epoch": 0.173985820100358, "grad_norm": 2.4317517280578613, "learning_rate": 1.8542953498483883e-05, "loss": 0.3699, "step": 7865 }, { "epoch": 0.17409642774187126, "grad_norm": 2.2187459468841553, "learning_rate": 1.8541146768876198e-05, "loss": 0.4078, "step": 7870 }, { "epoch": 0.17420703538338453, "grad_norm": 2.9168524742126465, "learning_rate": 1.8539339007927707e-05, "loss": 0.3256, "step": 7875 }, { "epoch": 0.17431764302489777, "grad_norm": 1.3825846910476685, "learning_rate": 1.8537530215856696e-05, "loss": 0.3185, "step": 7880 }, { "epoch": 0.17442825066641104, "grad_norm": 1.540684461593628, "learning_rate": 1.8535720392881576e-05, "loss": 0.2727, "step": 7885 }, { "epoch": 0.1745388583079243, "grad_norm": 2.1097497940063477, "learning_rate": 1.8533909539220876e-05, "loss": 0.4141, "step": 7890 }, { "epoch": 0.17464946594943756, "grad_norm": 1.4150944948196411, "learning_rate": 1.853209765509327e-05, "loss": 0.4885, "step": 7895 }, { "epoch": 0.17476007359095083, "grad_norm": 1.2451887130737305, "learning_rate": 1.8530284740717533e-05, "loss": 0.3329, "step": 7900 }, { "epoch": 0.17487068123246408, "grad_norm": 1.4568026065826416, "learning_rate": 1.852847079631257e-05, "loss": 0.3911, "step": 7905 }, { "epoch": 0.17498128887397735, "grad_norm": 1.6719510555267334, "learning_rate": 1.8526655822097422e-05, "loss": 0.3293, "step": 7910 }, { "epoch": 0.1750918965154906, "grad_norm": 1.1856008768081665, "learning_rate": 1.852483981829125e-05, "loss": 0.363, "step": 7915 }, { "epoch": 0.17520250415700386, "grad_norm": 1.4380193948745728, "learning_rate": 1.852302278511332e-05, "loss": 0.3614, "step": 7920 }, { "epoch": 0.1753131117985171, "grad_norm": 1.5123052597045898, "learning_rate": 1.8521204722783052e-05, "loss": 0.4788, "step": 7925 }, { "epoch": 0.17542371944003038, "grad_norm": 1.3554803133010864, "learning_rate": 1.8519385631519973e-05, "loss": 0.387, "step": 7930 }, { "epoch": 0.17553432708154365, "grad_norm": 1.3532334566116333, "learning_rate": 1.8517565511543737e-05, "loss": 0.5483, "step": 7935 }, { "epoch": 0.1756449347230569, "grad_norm": 1.3193596601486206, "learning_rate": 1.8515744363074124e-05, "loss": 0.4488, "step": 7940 }, { "epoch": 0.17575554236457017, "grad_norm": 1.566103219985962, "learning_rate": 1.8513922186331035e-05, "loss": 0.4471, "step": 7945 }, { "epoch": 0.1758661500060834, "grad_norm": 1.2442361116409302, "learning_rate": 1.85120989815345e-05, "loss": 0.2998, "step": 7950 }, { "epoch": 0.17597675764759668, "grad_norm": 1.417935848236084, "learning_rate": 1.851027474890467e-05, "loss": 0.3001, "step": 7955 }, { "epoch": 0.17608736528910995, "grad_norm": 1.1738121509552002, "learning_rate": 1.8508449488661816e-05, "loss": 0.5041, "step": 7960 }, { "epoch": 0.1761979729306232, "grad_norm": 1.5943917036056519, "learning_rate": 1.8506623201026342e-05, "loss": 0.4442, "step": 7965 }, { "epoch": 0.17630858057213647, "grad_norm": 1.373829960823059, "learning_rate": 1.8504795886218774e-05, "loss": 0.4262, "step": 7970 }, { "epoch": 0.1764191882136497, "grad_norm": 1.100296974182129, "learning_rate": 1.8502967544459753e-05, "loss": 0.2889, "step": 7975 }, { "epoch": 0.17652979585516299, "grad_norm": 1.232465386390686, "learning_rate": 1.8501138175970057e-05, "loss": 0.3555, "step": 7980 }, { "epoch": 0.17664040349667623, "grad_norm": 0.8201074600219727, "learning_rate": 1.8499307780970578e-05, "loss": 0.4079, "step": 7985 }, { "epoch": 0.1767510111381895, "grad_norm": 1.267311692237854, "learning_rate": 1.849747635968233e-05, "loss": 0.5284, "step": 7990 }, { "epoch": 0.17686161877970277, "grad_norm": 1.2502765655517578, "learning_rate": 1.8495643912326472e-05, "loss": 0.3245, "step": 7995 }, { "epoch": 0.17697222642121602, "grad_norm": 0.9478585720062256, "learning_rate": 1.8493810439124258e-05, "loss": 0.4661, "step": 8000 }, { "epoch": 0.1770828340627293, "grad_norm": 1.419379472732544, "learning_rate": 1.8491975940297085e-05, "loss": 0.4977, "step": 8005 }, { "epoch": 0.17719344170424253, "grad_norm": 1.2168171405792236, "learning_rate": 1.8490140416066467e-05, "loss": 0.3354, "step": 8010 }, { "epoch": 0.1773040493457558, "grad_norm": 1.2087775468826294, "learning_rate": 1.8488303866654044e-05, "loss": 0.2377, "step": 8015 }, { "epoch": 0.17741465698726905, "grad_norm": 2.694960355758667, "learning_rate": 1.8486466292281573e-05, "loss": 0.4888, "step": 8020 }, { "epoch": 0.17752526462878232, "grad_norm": 1.367037296295166, "learning_rate": 1.8484627693170947e-05, "loss": 0.2872, "step": 8025 }, { "epoch": 0.1776358722702956, "grad_norm": 1.3970658779144287, "learning_rate": 1.8482788069544176e-05, "loss": 0.2829, "step": 8030 }, { "epoch": 0.17774647991180884, "grad_norm": 0.8728079795837402, "learning_rate": 1.8480947421623392e-05, "loss": 0.2892, "step": 8035 }, { "epoch": 0.1778570875533221, "grad_norm": 1.4053874015808105, "learning_rate": 1.8479105749630854e-05, "loss": 0.3461, "step": 8040 }, { "epoch": 0.17796769519483535, "grad_norm": 1.3539291620254517, "learning_rate": 1.8477263053788942e-05, "loss": 0.3669, "step": 8045 }, { "epoch": 0.17807830283634862, "grad_norm": 1.6616941690444946, "learning_rate": 1.847541933432016e-05, "loss": 0.3824, "step": 8050 }, { "epoch": 0.17818891047786187, "grad_norm": 1.2972638607025146, "learning_rate": 1.8473574591447136e-05, "loss": 0.4861, "step": 8055 }, { "epoch": 0.17829951811937514, "grad_norm": 1.555336356163025, "learning_rate": 1.847172882539263e-05, "loss": 0.283, "step": 8060 }, { "epoch": 0.1784101257608884, "grad_norm": 1.0428484678268433, "learning_rate": 1.8469882036379508e-05, "loss": 0.3508, "step": 8065 }, { "epoch": 0.17852073340240165, "grad_norm": 1.194475769996643, "learning_rate": 1.8468034224630772e-05, "loss": 0.37, "step": 8070 }, { "epoch": 0.17863134104391493, "grad_norm": 1.5177520513534546, "learning_rate": 1.8466185390369545e-05, "loss": 0.4348, "step": 8075 }, { "epoch": 0.17874194868542817, "grad_norm": 1.3230408430099487, "learning_rate": 1.8464335533819077e-05, "loss": 0.2663, "step": 8080 }, { "epoch": 0.17885255632694144, "grad_norm": 1.4218257665634155, "learning_rate": 1.8462484655202734e-05, "loss": 0.4384, "step": 8085 }, { "epoch": 0.1789631639684547, "grad_norm": 1.3215700387954712, "learning_rate": 1.8460632754744007e-05, "loss": 0.3251, "step": 8090 }, { "epoch": 0.17907377160996796, "grad_norm": 1.7957661151885986, "learning_rate": 1.8458779832666516e-05, "loss": 0.2745, "step": 8095 }, { "epoch": 0.17918437925148123, "grad_norm": 1.3931922912597656, "learning_rate": 1.8456925889194003e-05, "loss": 0.5633, "step": 8100 }, { "epoch": 0.17929498689299447, "grad_norm": 1.168044090270996, "learning_rate": 1.8455070924550323e-05, "loss": 0.5346, "step": 8105 }, { "epoch": 0.17940559453450775, "grad_norm": 1.8877277374267578, "learning_rate": 1.8453214938959468e-05, "loss": 0.4348, "step": 8110 }, { "epoch": 0.179516202176021, "grad_norm": 1.6983084678649902, "learning_rate": 1.8451357932645545e-05, "loss": 0.458, "step": 8115 }, { "epoch": 0.17962680981753426, "grad_norm": 1.4495512247085571, "learning_rate": 1.8449499905832793e-05, "loss": 0.431, "step": 8120 }, { "epoch": 0.17973741745904753, "grad_norm": 1.2477596998214722, "learning_rate": 1.8447640858745562e-05, "loss": 0.3989, "step": 8125 }, { "epoch": 0.17984802510056078, "grad_norm": 1.816855788230896, "learning_rate": 1.844578079160833e-05, "loss": 0.4665, "step": 8130 }, { "epoch": 0.17995863274207405, "grad_norm": 2.1232149600982666, "learning_rate": 1.8443919704645706e-05, "loss": 0.3916, "step": 8135 }, { "epoch": 0.1800692403835873, "grad_norm": 1.1205378770828247, "learning_rate": 1.8442057598082412e-05, "loss": 0.4054, "step": 8140 }, { "epoch": 0.18017984802510056, "grad_norm": 1.2671648263931274, "learning_rate": 1.8440194472143297e-05, "loss": 0.4883, "step": 8145 }, { "epoch": 0.1802904556666138, "grad_norm": 1.5348201990127563, "learning_rate": 1.8438330327053334e-05, "loss": 0.3087, "step": 8150 }, { "epoch": 0.18040106330812708, "grad_norm": 1.6407159566879272, "learning_rate": 1.8436465163037613e-05, "loss": 0.3471, "step": 8155 }, { "epoch": 0.18051167094964035, "grad_norm": 0.9894051551818848, "learning_rate": 1.843459898032136e-05, "loss": 0.4227, "step": 8160 }, { "epoch": 0.1806222785911536, "grad_norm": 1.7790031433105469, "learning_rate": 1.843273177912991e-05, "loss": 0.4004, "step": 8165 }, { "epoch": 0.18073288623266687, "grad_norm": 1.2231945991516113, "learning_rate": 1.843086355968873e-05, "loss": 0.4318, "step": 8170 }, { "epoch": 0.1808434938741801, "grad_norm": 1.7225573062896729, "learning_rate": 1.8428994322223405e-05, "loss": 0.4506, "step": 8175 }, { "epoch": 0.18095410151569338, "grad_norm": 1.1817522048950195, "learning_rate": 1.8427124066959646e-05, "loss": 0.3365, "step": 8180 }, { "epoch": 0.18106470915720665, "grad_norm": 1.6284066438674927, "learning_rate": 1.8425252794123288e-05, "loss": 0.4969, "step": 8185 }, { "epoch": 0.1811753167987199, "grad_norm": 1.4964481592178345, "learning_rate": 1.842338050394028e-05, "loss": 0.2764, "step": 8190 }, { "epoch": 0.18128592444023317, "grad_norm": 1.528009057044983, "learning_rate": 1.8421507196636705e-05, "loss": 0.3546, "step": 8195 }, { "epoch": 0.18139653208174641, "grad_norm": 1.8109263181686401, "learning_rate": 1.8419632872438765e-05, "loss": 0.5466, "step": 8200 }, { "epoch": 0.1815071397232597, "grad_norm": 1.1159063577651978, "learning_rate": 1.8417757531572783e-05, "loss": 0.5753, "step": 8205 }, { "epoch": 0.18161774736477293, "grad_norm": 1.466718077659607, "learning_rate": 1.8415881174265203e-05, "loss": 0.3634, "step": 8210 }, { "epoch": 0.1817283550062862, "grad_norm": 1.2033895254135132, "learning_rate": 1.84140038007426e-05, "loss": 0.3846, "step": 8215 }, { "epoch": 0.18183896264779947, "grad_norm": 1.6185557842254639, "learning_rate": 1.841212541123166e-05, "loss": 0.4145, "step": 8220 }, { "epoch": 0.18194957028931272, "grad_norm": 1.4484376907348633, "learning_rate": 1.8410246005959207e-05, "loss": 0.4046, "step": 8225 }, { "epoch": 0.182060177930826, "grad_norm": 1.3205915689468384, "learning_rate": 1.840836558515217e-05, "loss": 0.3617, "step": 8230 }, { "epoch": 0.18217078557233923, "grad_norm": 1.7261650562286377, "learning_rate": 1.8406484149037612e-05, "loss": 0.4832, "step": 8235 }, { "epoch": 0.1822813932138525, "grad_norm": 2.054595947265625, "learning_rate": 1.8404601697842716e-05, "loss": 0.3579, "step": 8240 }, { "epoch": 0.18239200085536575, "grad_norm": 1.3597846031188965, "learning_rate": 1.8402718231794786e-05, "loss": 0.343, "step": 8245 }, { "epoch": 0.18250260849687902, "grad_norm": 1.2945337295532227, "learning_rate": 1.840083375112125e-05, "loss": 0.3736, "step": 8250 }, { "epoch": 0.1826132161383923, "grad_norm": 1.6459505558013916, "learning_rate": 1.8398948256049663e-05, "loss": 0.3349, "step": 8255 }, { "epoch": 0.18272382377990554, "grad_norm": 1.1773816347122192, "learning_rate": 1.8397061746807696e-05, "loss": 0.367, "step": 8260 }, { "epoch": 0.1828344314214188, "grad_norm": 1.2478595972061157, "learning_rate": 1.839517422362314e-05, "loss": 0.6111, "step": 8265 }, { "epoch": 0.18294503906293205, "grad_norm": 1.596423864364624, "learning_rate": 1.839328568672392e-05, "loss": 0.5307, "step": 8270 }, { "epoch": 0.18305564670444532, "grad_norm": 2.0111825466156006, "learning_rate": 1.8391396136338068e-05, "loss": 0.4295, "step": 8275 }, { "epoch": 0.18316625434595857, "grad_norm": 0.7969152927398682, "learning_rate": 1.8389505572693752e-05, "loss": 0.3143, "step": 8280 }, { "epoch": 0.18327686198747184, "grad_norm": 1.6199491024017334, "learning_rate": 1.838761399601926e-05, "loss": 0.4132, "step": 8285 }, { "epoch": 0.1833874696289851, "grad_norm": 2.0532724857330322, "learning_rate": 1.8385721406542987e-05, "loss": 0.3065, "step": 8290 }, { "epoch": 0.18349807727049836, "grad_norm": 1.3852691650390625, "learning_rate": 1.8383827804493478e-05, "loss": 0.3657, "step": 8295 }, { "epoch": 0.18360868491201163, "grad_norm": 1.2999809980392456, "learning_rate": 1.8381933190099372e-05, "loss": 0.4048, "step": 8300 }, { "epoch": 0.18371929255352487, "grad_norm": 1.4969582557678223, "learning_rate": 1.8380037563589453e-05, "loss": 0.441, "step": 8305 }, { "epoch": 0.18382990019503814, "grad_norm": 0.9379119277000427, "learning_rate": 1.837814092519261e-05, "loss": 0.3645, "step": 8310 }, { "epoch": 0.18394050783655141, "grad_norm": 1.3202327489852905, "learning_rate": 1.8376243275137866e-05, "loss": 0.3921, "step": 8315 }, { "epoch": 0.18405111547806466, "grad_norm": 1.6715096235275269, "learning_rate": 1.837434461365436e-05, "loss": 0.3626, "step": 8320 }, { "epoch": 0.18416172311957793, "grad_norm": 1.6993913650512695, "learning_rate": 1.8372444940971355e-05, "loss": 0.4712, "step": 8325 }, { "epoch": 0.18427233076109117, "grad_norm": 1.2463864088058472, "learning_rate": 1.8370544257318235e-05, "loss": 0.2059, "step": 8330 }, { "epoch": 0.18438293840260445, "grad_norm": 1.3170231580734253, "learning_rate": 1.8368642562924507e-05, "loss": 0.4255, "step": 8335 }, { "epoch": 0.1844935460441177, "grad_norm": 1.3189975023269653, "learning_rate": 1.8366739858019803e-05, "loss": 0.3797, "step": 8340 }, { "epoch": 0.18460415368563096, "grad_norm": 1.4056121110916138, "learning_rate": 1.836483614283387e-05, "loss": 0.4529, "step": 8345 }, { "epoch": 0.18471476132714423, "grad_norm": 0.4697827100753784, "learning_rate": 1.8362931417596582e-05, "loss": 0.2725, "step": 8350 }, { "epoch": 0.18482536896865748, "grad_norm": 1.53630530834198, "learning_rate": 1.8361025682537937e-05, "loss": 0.2585, "step": 8355 }, { "epoch": 0.18493597661017075, "grad_norm": 1.3230290412902832, "learning_rate": 1.835911893788805e-05, "loss": 0.3177, "step": 8360 }, { "epoch": 0.185046584251684, "grad_norm": 1.2736343145370483, "learning_rate": 1.8357211183877156e-05, "loss": 0.3715, "step": 8365 }, { "epoch": 0.18515719189319726, "grad_norm": 1.2663114070892334, "learning_rate": 1.8355302420735623e-05, "loss": 0.3989, "step": 8370 }, { "epoch": 0.1852677995347105, "grad_norm": 1.0633538961410522, "learning_rate": 1.835339264869393e-05, "loss": 0.3263, "step": 8375 }, { "epoch": 0.18537840717622378, "grad_norm": 1.472044587135315, "learning_rate": 1.8351481867982678e-05, "loss": 0.3707, "step": 8380 }, { "epoch": 0.18548901481773705, "grad_norm": 1.2194719314575195, "learning_rate": 1.8349570078832597e-05, "loss": 0.3223, "step": 8385 }, { "epoch": 0.1855996224592503, "grad_norm": 1.6975904703140259, "learning_rate": 1.8347657281474537e-05, "loss": 0.4184, "step": 8390 }, { "epoch": 0.18571023010076357, "grad_norm": 1.894809365272522, "learning_rate": 1.834574347613946e-05, "loss": 0.4434, "step": 8395 }, { "epoch": 0.1858208377422768, "grad_norm": 2.2146661281585693, "learning_rate": 1.8343828663058464e-05, "loss": 0.4775, "step": 8400 }, { "epoch": 0.18593144538379008, "grad_norm": 1.4205149412155151, "learning_rate": 1.8341912842462765e-05, "loss": 0.4106, "step": 8405 }, { "epoch": 0.18604205302530333, "grad_norm": 0.8795381188392639, "learning_rate": 1.833999601458369e-05, "loss": 0.2668, "step": 8410 }, { "epoch": 0.1861526606668166, "grad_norm": 1.5970063209533691, "learning_rate": 1.8338078179652696e-05, "loss": 0.349, "step": 8415 }, { "epoch": 0.18626326830832987, "grad_norm": 1.2222986221313477, "learning_rate": 1.833615933790137e-05, "loss": 0.2925, "step": 8420 }, { "epoch": 0.18637387594984312, "grad_norm": 1.475507140159607, "learning_rate": 1.83342394895614e-05, "loss": 0.3938, "step": 8425 }, { "epoch": 0.1864844835913564, "grad_norm": 1.1389808654785156, "learning_rate": 1.833231863486461e-05, "loss": 0.3111, "step": 8430 }, { "epoch": 0.18659509123286963, "grad_norm": 1.2350804805755615, "learning_rate": 1.8330396774042948e-05, "loss": 0.4457, "step": 8435 }, { "epoch": 0.1867056988743829, "grad_norm": 1.2726622819900513, "learning_rate": 1.832847390732847e-05, "loss": 0.4951, "step": 8440 }, { "epoch": 0.18681630651589617, "grad_norm": 1.7682368755340576, "learning_rate": 1.8326550034953375e-05, "loss": 0.4321, "step": 8445 }, { "epoch": 0.18692691415740942, "grad_norm": 1.3186314105987549, "learning_rate": 1.8324625157149957e-05, "loss": 0.3629, "step": 8450 }, { "epoch": 0.1870375217989227, "grad_norm": 1.7537000179290771, "learning_rate": 1.8322699274150647e-05, "loss": 0.2044, "step": 8455 }, { "epoch": 0.18714812944043593, "grad_norm": 1.516255259513855, "learning_rate": 1.8320772386188e-05, "loss": 0.3823, "step": 8460 }, { "epoch": 0.1872587370819492, "grad_norm": 1.5072154998779297, "learning_rate": 1.8318844493494682e-05, "loss": 0.3599, "step": 8465 }, { "epoch": 0.18736934472346245, "grad_norm": 1.803846836090088, "learning_rate": 1.8316915596303485e-05, "loss": 0.3885, "step": 8470 }, { "epoch": 0.18747995236497572, "grad_norm": 1.1226986646652222, "learning_rate": 1.8314985694847326e-05, "loss": 0.4109, "step": 8475 }, { "epoch": 0.187590560006489, "grad_norm": 1.4962975978851318, "learning_rate": 1.8313054789359243e-05, "loss": 0.4946, "step": 8480 }, { "epoch": 0.18770116764800224, "grad_norm": 1.6769921779632568, "learning_rate": 1.8311122880072384e-05, "loss": 0.4991, "step": 8485 }, { "epoch": 0.1878117752895155, "grad_norm": 2.1194546222686768, "learning_rate": 1.830918996722003e-05, "loss": 0.4968, "step": 8490 }, { "epoch": 0.18792238293102875, "grad_norm": 2.0137782096862793, "learning_rate": 1.830725605103558e-05, "loss": 0.4295, "step": 8495 }, { "epoch": 0.18803299057254202, "grad_norm": 1.5250543355941772, "learning_rate": 1.830532113175256e-05, "loss": 0.366, "step": 8500 }, { "epoch": 0.18814359821405527, "grad_norm": 1.8159235715866089, "learning_rate": 1.8303385209604595e-05, "loss": 0.3836, "step": 8505 }, { "epoch": 0.18825420585556854, "grad_norm": 1.0712653398513794, "learning_rate": 1.8301448284825462e-05, "loss": 0.2878, "step": 8510 }, { "epoch": 0.1883648134970818, "grad_norm": 1.0055242776870728, "learning_rate": 1.829951035764904e-05, "loss": 0.2376, "step": 8515 }, { "epoch": 0.18847542113859506, "grad_norm": 1.094264030456543, "learning_rate": 1.8297571428309328e-05, "loss": 0.3576, "step": 8520 }, { "epoch": 0.18858602878010833, "grad_norm": 1.3517965078353882, "learning_rate": 1.829563149704046e-05, "loss": 0.3719, "step": 8525 }, { "epoch": 0.18869663642162157, "grad_norm": 1.6482510566711426, "learning_rate": 1.8293690564076675e-05, "loss": 0.4511, "step": 8530 }, { "epoch": 0.18880724406313484, "grad_norm": 1.889306664466858, "learning_rate": 1.829174862965234e-05, "loss": 0.4161, "step": 8535 }, { "epoch": 0.18891785170464812, "grad_norm": 0.8726267218589783, "learning_rate": 1.8289805694001947e-05, "loss": 0.4734, "step": 8540 }, { "epoch": 0.18902845934616136, "grad_norm": 2.0533783435821533, "learning_rate": 1.8287861757360105e-05, "loss": 0.4284, "step": 8545 }, { "epoch": 0.18913906698767463, "grad_norm": 1.129348874092102, "learning_rate": 1.828591681996154e-05, "loss": 0.321, "step": 8550 }, { "epoch": 0.18924967462918788, "grad_norm": 1.7723267078399658, "learning_rate": 1.8283970882041105e-05, "loss": 0.5297, "step": 8555 }, { "epoch": 0.18936028227070115, "grad_norm": 1.75954008102417, "learning_rate": 1.8282023943833768e-05, "loss": 0.3829, "step": 8560 }, { "epoch": 0.1894708899122144, "grad_norm": 1.6208274364471436, "learning_rate": 1.8280076005574628e-05, "loss": 0.5255, "step": 8565 }, { "epoch": 0.18958149755372766, "grad_norm": 1.1897838115692139, "learning_rate": 1.827812706749889e-05, "loss": 0.4682, "step": 8570 }, { "epoch": 0.18969210519524093, "grad_norm": 1.5908808708190918, "learning_rate": 1.8276177129841894e-05, "loss": 0.4908, "step": 8575 }, { "epoch": 0.18980271283675418, "grad_norm": 2.6187031269073486, "learning_rate": 1.827422619283909e-05, "loss": 0.4972, "step": 8580 }, { "epoch": 0.18991332047826745, "grad_norm": 2.0728416442871094, "learning_rate": 1.827227425672606e-05, "loss": 0.5211, "step": 8585 }, { "epoch": 0.1900239281197807, "grad_norm": 1.4763959646224976, "learning_rate": 1.827032132173849e-05, "loss": 0.4368, "step": 8590 }, { "epoch": 0.19013453576129397, "grad_norm": 1.1071557998657227, "learning_rate": 1.82683673881122e-05, "loss": 0.3668, "step": 8595 }, { "epoch": 0.1902451434028072, "grad_norm": 1.4239718914031982, "learning_rate": 1.826641245608313e-05, "loss": 0.488, "step": 8600 }, { "epoch": 0.19035575104432048, "grad_norm": 1.456650972366333, "learning_rate": 1.8264456525887335e-05, "loss": 0.596, "step": 8605 }, { "epoch": 0.19046635868583375, "grad_norm": 1.0633840560913086, "learning_rate": 1.8262499597760994e-05, "loss": 0.5153, "step": 8610 }, { "epoch": 0.190576966327347, "grad_norm": 1.1453192234039307, "learning_rate": 1.8260541671940406e-05, "loss": 0.3526, "step": 8615 }, { "epoch": 0.19068757396886027, "grad_norm": 1.167088508605957, "learning_rate": 1.8258582748661986e-05, "loss": 0.2927, "step": 8620 }, { "epoch": 0.1907981816103735, "grad_norm": 1.7413831949234009, "learning_rate": 1.8256622828162277e-05, "loss": 0.3432, "step": 8625 }, { "epoch": 0.19090878925188678, "grad_norm": 1.8739395141601562, "learning_rate": 1.8254661910677936e-05, "loss": 0.3873, "step": 8630 }, { "epoch": 0.19101939689340003, "grad_norm": 0.9924023747444153, "learning_rate": 1.8252699996445747e-05, "loss": 0.5293, "step": 8635 }, { "epoch": 0.1911300045349133, "grad_norm": 1.1934866905212402, "learning_rate": 1.8250737085702606e-05, "loss": 0.4277, "step": 8640 }, { "epoch": 0.19124061217642657, "grad_norm": 1.4595229625701904, "learning_rate": 1.8248773178685537e-05, "loss": 0.5518, "step": 8645 }, { "epoch": 0.19135121981793982, "grad_norm": 1.554247498512268, "learning_rate": 1.824680827563168e-05, "loss": 0.3818, "step": 8650 }, { "epoch": 0.1914618274594531, "grad_norm": 1.2482880353927612, "learning_rate": 1.82448423767783e-05, "loss": 0.4663, "step": 8655 }, { "epoch": 0.19157243510096633, "grad_norm": 1.0391746759414673, "learning_rate": 1.8242875482362772e-05, "loss": 0.3601, "step": 8660 }, { "epoch": 0.1916830427424796, "grad_norm": 1.0853534936904907, "learning_rate": 1.8240907592622606e-05, "loss": 0.3739, "step": 8665 }, { "epoch": 0.19179365038399288, "grad_norm": 1.441542148590088, "learning_rate": 1.8238938707795418e-05, "loss": 0.4131, "step": 8670 }, { "epoch": 0.19190425802550612, "grad_norm": 1.6793978214263916, "learning_rate": 1.823696882811895e-05, "loss": 0.2015, "step": 8675 }, { "epoch": 0.1920148656670194, "grad_norm": 1.0964012145996094, "learning_rate": 1.8234997953831065e-05, "loss": 0.4143, "step": 8680 }, { "epoch": 0.19212547330853263, "grad_norm": 2.2504186630249023, "learning_rate": 1.8233026085169753e-05, "loss": 0.3521, "step": 8685 }, { "epoch": 0.1922360809500459, "grad_norm": 1.8082151412963867, "learning_rate": 1.82310532223731e-05, "loss": 0.6048, "step": 8690 }, { "epoch": 0.19234668859155915, "grad_norm": 1.759657382965088, "learning_rate": 1.8229079365679347e-05, "loss": 0.4167, "step": 8695 }, { "epoch": 0.19245729623307242, "grad_norm": 1.1983848810195923, "learning_rate": 1.8227104515326823e-05, "loss": 0.3583, "step": 8700 }, { "epoch": 0.1925679038745857, "grad_norm": 1.4052691459655762, "learning_rate": 1.8225128671553998e-05, "loss": 0.5558, "step": 8705 }, { "epoch": 0.19267851151609894, "grad_norm": 2.013258218765259, "learning_rate": 1.8223151834599454e-05, "loss": 0.5168, "step": 8710 }, { "epoch": 0.1927891191576122, "grad_norm": 1.7761456966400146, "learning_rate": 1.822117400470189e-05, "loss": 0.3624, "step": 8715 }, { "epoch": 0.19289972679912545, "grad_norm": 1.314440131187439, "learning_rate": 1.821919518210013e-05, "loss": 0.3207, "step": 8720 }, { "epoch": 0.19301033444063873, "grad_norm": 0.9232757687568665, "learning_rate": 1.821721536703312e-05, "loss": 0.2883, "step": 8725 }, { "epoch": 0.19312094208215197, "grad_norm": 1.356067419052124, "learning_rate": 1.8215234559739915e-05, "loss": 0.3587, "step": 8730 }, { "epoch": 0.19323154972366524, "grad_norm": 1.7251853942871094, "learning_rate": 1.8213252760459698e-05, "loss": 0.4985, "step": 8735 }, { "epoch": 0.1933421573651785, "grad_norm": 1.412247657775879, "learning_rate": 1.8211269969431774e-05, "loss": 0.4232, "step": 8740 }, { "epoch": 0.19345276500669176, "grad_norm": 1.3983365297317505, "learning_rate": 1.8209286186895567e-05, "loss": 0.4856, "step": 8745 }, { "epoch": 0.19356337264820503, "grad_norm": 1.1825673580169678, "learning_rate": 1.8207301413090613e-05, "loss": 0.4426, "step": 8750 }, { "epoch": 0.19367398028971827, "grad_norm": 2.238076686859131, "learning_rate": 1.8205315648256572e-05, "loss": 0.4898, "step": 8755 }, { "epoch": 0.19378458793123154, "grad_norm": 1.419005036354065, "learning_rate": 1.8203328892633226e-05, "loss": 0.4142, "step": 8760 }, { "epoch": 0.19389519557274482, "grad_norm": 1.3608343601226807, "learning_rate": 1.8201341146460477e-05, "loss": 0.2659, "step": 8765 }, { "epoch": 0.19400580321425806, "grad_norm": 1.6219663619995117, "learning_rate": 1.819935240997835e-05, "loss": 0.333, "step": 8770 }, { "epoch": 0.19411641085577133, "grad_norm": 1.2492784261703491, "learning_rate": 1.819736268342697e-05, "loss": 0.313, "step": 8775 }, { "epoch": 0.19422701849728458, "grad_norm": 0.9111348986625671, "learning_rate": 1.8195371967046605e-05, "loss": 0.3308, "step": 8780 }, { "epoch": 0.19433762613879785, "grad_norm": 2.2908663749694824, "learning_rate": 1.8193380261077634e-05, "loss": 0.3033, "step": 8785 }, { "epoch": 0.1944482337803111, "grad_norm": 0.7563189268112183, "learning_rate": 1.819138756576055e-05, "loss": 0.4054, "step": 8790 }, { "epoch": 0.19455884142182436, "grad_norm": 1.5902265310287476, "learning_rate": 1.8189393881335976e-05, "loss": 0.386, "step": 8795 }, { "epoch": 0.19466944906333764, "grad_norm": 1.226371169090271, "learning_rate": 1.8187399208044646e-05, "loss": 0.3559, "step": 8800 }, { "epoch": 0.19478005670485088, "grad_norm": 1.458243489265442, "learning_rate": 1.8185403546127415e-05, "loss": 0.3494, "step": 8805 }, { "epoch": 0.19489066434636415, "grad_norm": 1.4684889316558838, "learning_rate": 1.8183406895825263e-05, "loss": 0.393, "step": 8810 }, { "epoch": 0.1950012719878774, "grad_norm": 1.8572510480880737, "learning_rate": 1.818140925737928e-05, "loss": 0.3388, "step": 8815 }, { "epoch": 0.19511187962939067, "grad_norm": 1.2910754680633545, "learning_rate": 1.817941063103068e-05, "loss": 0.3191, "step": 8820 }, { "epoch": 0.1952224872709039, "grad_norm": 1.3597042560577393, "learning_rate": 1.8177411017020802e-05, "loss": 0.434, "step": 8825 }, { "epoch": 0.19533309491241718, "grad_norm": 1.224297046661377, "learning_rate": 1.817541041559109e-05, "loss": 0.2463, "step": 8830 }, { "epoch": 0.19544370255393045, "grad_norm": 2.1862356662750244, "learning_rate": 1.8173408826983125e-05, "loss": 0.447, "step": 8835 }, { "epoch": 0.1955543101954437, "grad_norm": 1.5341095924377441, "learning_rate": 1.8171406251438593e-05, "loss": 0.4603, "step": 8840 }, { "epoch": 0.19566491783695697, "grad_norm": 1.2474607229232788, "learning_rate": 1.8169402689199306e-05, "loss": 0.4039, "step": 8845 }, { "epoch": 0.1957755254784702, "grad_norm": 1.1963512897491455, "learning_rate": 1.8167398140507197e-05, "loss": 0.3926, "step": 8850 }, { "epoch": 0.19588613311998349, "grad_norm": 0.7879716753959656, "learning_rate": 1.8165392605604307e-05, "loss": 0.4777, "step": 8855 }, { "epoch": 0.19599674076149673, "grad_norm": 1.5303202867507935, "learning_rate": 1.8163386084732807e-05, "loss": 0.4999, "step": 8860 }, { "epoch": 0.19610734840301, "grad_norm": 1.0223127603530884, "learning_rate": 1.8161378578134988e-05, "loss": 0.3767, "step": 8865 }, { "epoch": 0.19621795604452327, "grad_norm": 1.0504778623580933, "learning_rate": 1.815937008605325e-05, "loss": 0.2837, "step": 8870 }, { "epoch": 0.19632856368603652, "grad_norm": 1.4397259950637817, "learning_rate": 1.8157360608730118e-05, "loss": 0.4198, "step": 8875 }, { "epoch": 0.1964391713275498, "grad_norm": 1.4170780181884766, "learning_rate": 1.8155350146408238e-05, "loss": 0.5227, "step": 8880 }, { "epoch": 0.19654977896906303, "grad_norm": 1.399729609489441, "learning_rate": 1.8153338699330376e-05, "loss": 0.4086, "step": 8885 }, { "epoch": 0.1966603866105763, "grad_norm": 1.4493088722229004, "learning_rate": 1.815132626773941e-05, "loss": 0.3957, "step": 8890 }, { "epoch": 0.19677099425208958, "grad_norm": 1.3374069929122925, "learning_rate": 1.814931285187834e-05, "loss": 0.2753, "step": 8895 }, { "epoch": 0.19688160189360282, "grad_norm": 1.2610121965408325, "learning_rate": 1.8147298451990285e-05, "loss": 0.4772, "step": 8900 }, { "epoch": 0.1969922095351161, "grad_norm": 1.4255497455596924, "learning_rate": 1.8145283068318483e-05, "loss": 0.3635, "step": 8905 }, { "epoch": 0.19710281717662934, "grad_norm": 1.600082278251648, "learning_rate": 1.8143266701106294e-05, "loss": 0.5453, "step": 8910 }, { "epoch": 0.1972134248181426, "grad_norm": 1.155190348625183, "learning_rate": 1.814124935059719e-05, "loss": 0.3821, "step": 8915 }, { "epoch": 0.19732403245965585, "grad_norm": 1.0957926511764526, "learning_rate": 1.813923101703477e-05, "loss": 0.4736, "step": 8920 }, { "epoch": 0.19743464010116912, "grad_norm": 1.7592767477035522, "learning_rate": 1.813721170066274e-05, "loss": 0.512, "step": 8925 }, { "epoch": 0.1975452477426824, "grad_norm": 1.4496231079101562, "learning_rate": 1.8135191401724936e-05, "loss": 0.4167, "step": 8930 }, { "epoch": 0.19765585538419564, "grad_norm": 1.2637405395507812, "learning_rate": 1.8133170120465313e-05, "loss": 0.5704, "step": 8935 }, { "epoch": 0.1977664630257089, "grad_norm": 2.145017385482788, "learning_rate": 1.8131147857127932e-05, "loss": 0.3443, "step": 8940 }, { "epoch": 0.19787707066722215, "grad_norm": 1.041879653930664, "learning_rate": 1.812912461195699e-05, "loss": 0.4742, "step": 8945 }, { "epoch": 0.19798767830873543, "grad_norm": 1.5614018440246582, "learning_rate": 1.8127100385196782e-05, "loss": 0.3358, "step": 8950 }, { "epoch": 0.19809828595024867, "grad_norm": 1.3003766536712646, "learning_rate": 1.8125075177091738e-05, "loss": 0.3669, "step": 8955 }, { "epoch": 0.19820889359176194, "grad_norm": 1.8527449369430542, "learning_rate": 1.8123048987886405e-05, "loss": 0.4594, "step": 8960 }, { "epoch": 0.1983195012332752, "grad_norm": 1.4165481328964233, "learning_rate": 1.812102181782544e-05, "loss": 0.5041, "step": 8965 }, { "epoch": 0.19843010887478846, "grad_norm": 1.0351778268814087, "learning_rate": 1.8118993667153623e-05, "loss": 0.3868, "step": 8970 }, { "epoch": 0.19854071651630173, "grad_norm": 1.2649604082107544, "learning_rate": 1.8116964536115854e-05, "loss": 0.3632, "step": 8975 }, { "epoch": 0.19865132415781497, "grad_norm": 1.4215718507766724, "learning_rate": 1.8114934424957153e-05, "loss": 0.5996, "step": 8980 }, { "epoch": 0.19876193179932825, "grad_norm": 1.104856014251709, "learning_rate": 1.811290333392265e-05, "loss": 0.5814, "step": 8985 }, { "epoch": 0.1988725394408415, "grad_norm": 1.1292475461959839, "learning_rate": 1.8110871263257603e-05, "loss": 0.5605, "step": 8990 }, { "epoch": 0.19898314708235476, "grad_norm": 2.0220460891723633, "learning_rate": 1.8108838213207377e-05, "loss": 0.4008, "step": 8995 }, { "epoch": 0.19909375472386803, "grad_norm": 1.315417766571045, "learning_rate": 1.8106804184017473e-05, "loss": 0.2316, "step": 9000 }, { "epoch": 0.19920436236538128, "grad_norm": 1.8103442192077637, "learning_rate": 1.8104769175933488e-05, "loss": 0.4386, "step": 9005 }, { "epoch": 0.19931497000689455, "grad_norm": 1.385876178741455, "learning_rate": 1.810273318920116e-05, "loss": 0.4326, "step": 9010 }, { "epoch": 0.1994255776484078, "grad_norm": 1.1483098268508911, "learning_rate": 1.810069622406632e-05, "loss": 0.3586, "step": 9015 }, { "epoch": 0.19953618528992106, "grad_norm": 1.2591546773910522, "learning_rate": 1.809865828077494e-05, "loss": 0.2398, "step": 9020 }, { "epoch": 0.19964679293143434, "grad_norm": 1.0831377506256104, "learning_rate": 1.8096619359573103e-05, "loss": 0.3961, "step": 9025 }, { "epoch": 0.19975740057294758, "grad_norm": 1.5682902336120605, "learning_rate": 1.8094579460707004e-05, "loss": 0.5363, "step": 9030 }, { "epoch": 0.19986800821446085, "grad_norm": 1.4521732330322266, "learning_rate": 1.809253858442296e-05, "loss": 0.3262, "step": 9035 }, { "epoch": 0.1999786158559741, "grad_norm": 2.298015832901001, "learning_rate": 1.809049673096741e-05, "loss": 0.4213, "step": 9040 }, { "epoch": 0.20008922349748737, "grad_norm": 1.4683533906936646, "learning_rate": 1.80884539005869e-05, "loss": 0.2899, "step": 9045 }, { "epoch": 0.2001998311390006, "grad_norm": 1.2226210832595825, "learning_rate": 1.8086410093528105e-05, "loss": 0.3135, "step": 9050 }, { "epoch": 0.20031043878051388, "grad_norm": 1.0580832958221436, "learning_rate": 1.8084365310037816e-05, "loss": 0.4365, "step": 9055 }, { "epoch": 0.20042104642202715, "grad_norm": 2.3306379318237305, "learning_rate": 1.808231955036294e-05, "loss": 0.4672, "step": 9060 }, { "epoch": 0.2005316540635404, "grad_norm": 2.404820680618286, "learning_rate": 1.80802728147505e-05, "loss": 0.2888, "step": 9065 }, { "epoch": 0.20064226170505367, "grad_norm": 1.668479561805725, "learning_rate": 1.8078225103447637e-05, "loss": 0.4869, "step": 9070 }, { "epoch": 0.20075286934656691, "grad_norm": 0.9986069202423096, "learning_rate": 1.807617641670161e-05, "loss": 0.3936, "step": 9075 }, { "epoch": 0.2008634769880802, "grad_norm": 1.9353246688842773, "learning_rate": 1.8074126754759805e-05, "loss": 0.4118, "step": 9080 }, { "epoch": 0.20097408462959343, "grad_norm": 1.2710680961608887, "learning_rate": 1.8072076117869714e-05, "loss": 0.4662, "step": 9085 }, { "epoch": 0.2010846922711067, "grad_norm": 2.0210044384002686, "learning_rate": 1.807002450627895e-05, "loss": 0.2754, "step": 9090 }, { "epoch": 0.20119529991261997, "grad_norm": 1.2995089292526245, "learning_rate": 1.8067971920235243e-05, "loss": 0.4791, "step": 9095 }, { "epoch": 0.20130590755413322, "grad_norm": 1.8554913997650146, "learning_rate": 1.8065918359986446e-05, "loss": 0.4348, "step": 9100 }, { "epoch": 0.2014165151956465, "grad_norm": 1.9820932149887085, "learning_rate": 1.8063863825780522e-05, "loss": 0.3789, "step": 9105 }, { "epoch": 0.20152712283715973, "grad_norm": 1.9915214776992798, "learning_rate": 1.8061808317865562e-05, "loss": 0.4512, "step": 9110 }, { "epoch": 0.201637730478673, "grad_norm": 1.8129661083221436, "learning_rate": 1.8059751836489757e-05, "loss": 0.5503, "step": 9115 }, { "epoch": 0.20174833812018628, "grad_norm": 2.5188791751861572, "learning_rate": 1.8057694381901435e-05, "loss": 0.3785, "step": 9120 }, { "epoch": 0.20185894576169952, "grad_norm": 1.068416714668274, "learning_rate": 1.8055635954349033e-05, "loss": 0.3067, "step": 9125 }, { "epoch": 0.2019695534032128, "grad_norm": 1.2030487060546875, "learning_rate": 1.80535765540811e-05, "loss": 0.3909, "step": 9130 }, { "epoch": 0.20208016104472604, "grad_norm": 1.0146510601043701, "learning_rate": 1.805151618134631e-05, "loss": 0.2865, "step": 9135 }, { "epoch": 0.2021907686862393, "grad_norm": 0.7833704352378845, "learning_rate": 1.8049454836393456e-05, "loss": 0.2995, "step": 9140 }, { "epoch": 0.20230137632775255, "grad_norm": 0.9950535893440247, "learning_rate": 1.804739251947144e-05, "loss": 0.4426, "step": 9145 }, { "epoch": 0.20241198396926582, "grad_norm": 1.0475330352783203, "learning_rate": 1.804532923082929e-05, "loss": 0.3168, "step": 9150 }, { "epoch": 0.2025225916107791, "grad_norm": 1.8694572448730469, "learning_rate": 1.8043264970716145e-05, "loss": 0.4142, "step": 9155 }, { "epoch": 0.20263319925229234, "grad_norm": 1.3322452306747437, "learning_rate": 1.8041199739381263e-05, "loss": 0.4421, "step": 9160 }, { "epoch": 0.2027438068938056, "grad_norm": 1.4383444786071777, "learning_rate": 1.8039133537074022e-05, "loss": 0.5045, "step": 9165 }, { "epoch": 0.20285441453531886, "grad_norm": 1.65001380443573, "learning_rate": 1.8037066364043916e-05, "loss": 0.4169, "step": 9170 }, { "epoch": 0.20296502217683213, "grad_norm": 1.4664260149002075, "learning_rate": 1.8034998220540556e-05, "loss": 0.3899, "step": 9175 }, { "epoch": 0.20307562981834537, "grad_norm": 1.5339316129684448, "learning_rate": 1.8032929106813664e-05, "loss": 0.4255, "step": 9180 }, { "epoch": 0.20318623745985864, "grad_norm": 1.179207444190979, "learning_rate": 1.803085902311309e-05, "loss": 0.4905, "step": 9185 }, { "epoch": 0.20329684510137191, "grad_norm": 1.3276876211166382, "learning_rate": 1.8028787969688797e-05, "loss": 0.3112, "step": 9190 }, { "epoch": 0.20340745274288516, "grad_norm": 1.241294264793396, "learning_rate": 1.8026715946790863e-05, "loss": 0.4542, "step": 9195 }, { "epoch": 0.20351806038439843, "grad_norm": 1.2940452098846436, "learning_rate": 1.8024642954669478e-05, "loss": 0.5609, "step": 9200 }, { "epoch": 0.20362866802591167, "grad_norm": 1.5391689538955688, "learning_rate": 1.8022568993574965e-05, "loss": 0.5069, "step": 9205 }, { "epoch": 0.20373927566742495, "grad_norm": 1.0838396549224854, "learning_rate": 1.8020494063757744e-05, "loss": 0.2903, "step": 9210 }, { "epoch": 0.2038498833089382, "grad_norm": 1.3726567029953003, "learning_rate": 1.801841816546837e-05, "loss": 0.3541, "step": 9215 }, { "epoch": 0.20396049095045146, "grad_norm": 1.6977953910827637, "learning_rate": 1.8016341298957505e-05, "loss": 0.3165, "step": 9220 }, { "epoch": 0.20407109859196473, "grad_norm": 1.4097518920898438, "learning_rate": 1.8014263464475928e-05, "loss": 0.3845, "step": 9225 }, { "epoch": 0.20418170623347798, "grad_norm": 1.111494541168213, "learning_rate": 1.8012184662274543e-05, "loss": 0.3608, "step": 9230 }, { "epoch": 0.20429231387499125, "grad_norm": 1.6053136587142944, "learning_rate": 1.8010104892604353e-05, "loss": 0.3281, "step": 9235 }, { "epoch": 0.2044029215165045, "grad_norm": 1.4123514890670776, "learning_rate": 1.80080241557165e-05, "loss": 0.3088, "step": 9240 }, { "epoch": 0.20451352915801776, "grad_norm": 1.853369116783142, "learning_rate": 1.800594245186223e-05, "loss": 0.41, "step": 9245 }, { "epoch": 0.20462413679953104, "grad_norm": 2.024594783782959, "learning_rate": 1.8003859781292904e-05, "loss": 0.447, "step": 9250 }, { "epoch": 0.20473474444104428, "grad_norm": 1.9457638263702393, "learning_rate": 1.800177614426001e-05, "loss": 0.3388, "step": 9255 }, { "epoch": 0.20484535208255755, "grad_norm": 1.5434610843658447, "learning_rate": 1.799969154101514e-05, "loss": 0.3155, "step": 9260 }, { "epoch": 0.2049559597240708, "grad_norm": 1.0469727516174316, "learning_rate": 1.7997605971810013e-05, "loss": 0.2352, "step": 9265 }, { "epoch": 0.20506656736558407, "grad_norm": 1.5461556911468506, "learning_rate": 1.7995519436896463e-05, "loss": 0.3584, "step": 9270 }, { "epoch": 0.2051771750070973, "grad_norm": 1.9388670921325684, "learning_rate": 1.7993431936526435e-05, "loss": 0.3052, "step": 9275 }, { "epoch": 0.20528778264861058, "grad_norm": 1.9095373153686523, "learning_rate": 1.7991343470951995e-05, "loss": 0.3844, "step": 9280 }, { "epoch": 0.20539839029012386, "grad_norm": 1.7422373294830322, "learning_rate": 1.798925404042532e-05, "loss": 0.5063, "step": 9285 }, { "epoch": 0.2055089979316371, "grad_norm": 1.0939030647277832, "learning_rate": 1.798716364519872e-05, "loss": 0.1767, "step": 9290 }, { "epoch": 0.20561960557315037, "grad_norm": 1.4205799102783203, "learning_rate": 1.7985072285524596e-05, "loss": 0.4182, "step": 9295 }, { "epoch": 0.20573021321466362, "grad_norm": 1.904466152191162, "learning_rate": 1.798297996165549e-05, "loss": 0.2623, "step": 9300 }, { "epoch": 0.2058408208561769, "grad_norm": 1.2227212190628052, "learning_rate": 1.798088667384404e-05, "loss": 0.4025, "step": 9305 }, { "epoch": 0.20595142849769013, "grad_norm": 1.195278286933899, "learning_rate": 1.7978792422343017e-05, "loss": 0.3719, "step": 9310 }, { "epoch": 0.2060620361392034, "grad_norm": 1.3897852897644043, "learning_rate": 1.7976697207405296e-05, "loss": 0.5155, "step": 9315 }, { "epoch": 0.20617264378071667, "grad_norm": 1.104456901550293, "learning_rate": 1.797460102928388e-05, "loss": 0.3251, "step": 9320 }, { "epoch": 0.20628325142222992, "grad_norm": 1.3259351253509521, "learning_rate": 1.797250388823187e-05, "loss": 0.3294, "step": 9325 }, { "epoch": 0.2063938590637432, "grad_norm": 1.7634742259979248, "learning_rate": 1.7970405784502512e-05, "loss": 0.3326, "step": 9330 }, { "epoch": 0.20650446670525643, "grad_norm": 1.0023839473724365, "learning_rate": 1.796830671834914e-05, "loss": 0.329, "step": 9335 }, { "epoch": 0.2066150743467697, "grad_norm": 1.3715295791625977, "learning_rate": 1.7966206690025217e-05, "loss": 0.4502, "step": 9340 }, { "epoch": 0.20672568198828298, "grad_norm": 1.5624752044677734, "learning_rate": 1.796410569978432e-05, "loss": 0.3417, "step": 9345 }, { "epoch": 0.20683628962979622, "grad_norm": 1.9248814582824707, "learning_rate": 1.7962003747880148e-05, "loss": 0.4814, "step": 9350 }, { "epoch": 0.2069468972713095, "grad_norm": 0.9715316891670227, "learning_rate": 1.7959900834566507e-05, "loss": 0.3212, "step": 9355 }, { "epoch": 0.20705750491282274, "grad_norm": 1.3522871732711792, "learning_rate": 1.7957796960097323e-05, "loss": 0.3783, "step": 9360 }, { "epoch": 0.207168112554336, "grad_norm": 1.3974459171295166, "learning_rate": 1.795569212472664e-05, "loss": 0.4744, "step": 9365 }, { "epoch": 0.20727872019584925, "grad_norm": 2.210444927215576, "learning_rate": 1.7953586328708613e-05, "loss": 0.4797, "step": 9370 }, { "epoch": 0.20738932783736252, "grad_norm": 1.793060064315796, "learning_rate": 1.795147957229752e-05, "loss": 0.3713, "step": 9375 }, { "epoch": 0.2074999354788758, "grad_norm": 1.1840746402740479, "learning_rate": 1.7949371855747753e-05, "loss": 0.3926, "step": 9380 }, { "epoch": 0.20761054312038904, "grad_norm": 1.066495656967163, "learning_rate": 1.794726317931381e-05, "loss": 0.3039, "step": 9385 }, { "epoch": 0.2077211507619023, "grad_norm": 2.1032567024230957, "learning_rate": 1.7945153543250317e-05, "loss": 0.4196, "step": 9390 }, { "epoch": 0.20783175840341556, "grad_norm": 1.7704393863677979, "learning_rate": 1.794304294781202e-05, "loss": 0.436, "step": 9395 }, { "epoch": 0.20794236604492883, "grad_norm": 1.8576886653900146, "learning_rate": 1.794093139325376e-05, "loss": 0.4396, "step": 9400 }, { "epoch": 0.20805297368644207, "grad_norm": 1.0020142793655396, "learning_rate": 1.793881887983051e-05, "loss": 0.4632, "step": 9405 }, { "epoch": 0.20816358132795534, "grad_norm": 1.198484182357788, "learning_rate": 1.7936705407797362e-05, "loss": 0.3859, "step": 9410 }, { "epoch": 0.20827418896946862, "grad_norm": 1.0327547788619995, "learning_rate": 1.793459097740951e-05, "loss": 0.4151, "step": 9415 }, { "epoch": 0.20838479661098186, "grad_norm": 2.083974838256836, "learning_rate": 1.7932475588922272e-05, "loss": 0.3888, "step": 9420 }, { "epoch": 0.20849540425249513, "grad_norm": 1.3600401878356934, "learning_rate": 1.7930359242591088e-05, "loss": 0.3426, "step": 9425 }, { "epoch": 0.20860601189400838, "grad_norm": 1.9753590822219849, "learning_rate": 1.792824193867149e-05, "loss": 0.5101, "step": 9430 }, { "epoch": 0.20871661953552165, "grad_norm": 1.274460792541504, "learning_rate": 1.7926123677419157e-05, "loss": 0.4187, "step": 9435 }, { "epoch": 0.2088272271770349, "grad_norm": 1.6292451620101929, "learning_rate": 1.7924004459089866e-05, "loss": 0.3868, "step": 9440 }, { "epoch": 0.20893783481854816, "grad_norm": 1.1470903158187866, "learning_rate": 1.7921884283939506e-05, "loss": 0.4745, "step": 9445 }, { "epoch": 0.20904844246006143, "grad_norm": 0.8765552043914795, "learning_rate": 1.7919763152224092e-05, "loss": 0.3393, "step": 9450 }, { "epoch": 0.20915905010157468, "grad_norm": 1.5452773571014404, "learning_rate": 1.7917641064199746e-05, "loss": 0.3797, "step": 9455 }, { "epoch": 0.20926965774308795, "grad_norm": 0.9243483543395996, "learning_rate": 1.7915518020122715e-05, "loss": 0.4081, "step": 9460 }, { "epoch": 0.2093802653846012, "grad_norm": 1.3681812286376953, "learning_rate": 1.791339402024935e-05, "loss": 0.414, "step": 9465 }, { "epoch": 0.20949087302611447, "grad_norm": 1.2352062463760376, "learning_rate": 1.791126906483613e-05, "loss": 0.4682, "step": 9470 }, { "epoch": 0.20960148066762774, "grad_norm": 1.4272645711898804, "learning_rate": 1.7909143154139636e-05, "loss": 0.345, "step": 9475 }, { "epoch": 0.20971208830914098, "grad_norm": 1.4278013706207275, "learning_rate": 1.7907016288416572e-05, "loss": 0.3491, "step": 9480 }, { "epoch": 0.20982269595065425, "grad_norm": 1.4649229049682617, "learning_rate": 1.790488846792376e-05, "loss": 0.5177, "step": 9485 }, { "epoch": 0.2099333035921675, "grad_norm": 1.081545352935791, "learning_rate": 1.7902759692918134e-05, "loss": 0.3296, "step": 9490 }, { "epoch": 0.21004391123368077, "grad_norm": 1.9008666276931763, "learning_rate": 1.7900629963656738e-05, "loss": 0.463, "step": 9495 }, { "epoch": 0.210154518875194, "grad_norm": 1.3830004930496216, "learning_rate": 1.789849928039674e-05, "loss": 0.326, "step": 9500 }, { "epoch": 0.21026512651670728, "grad_norm": 1.6188230514526367, "learning_rate": 1.7896367643395422e-05, "loss": 0.3932, "step": 9505 }, { "epoch": 0.21037573415822056, "grad_norm": 1.51378333568573, "learning_rate": 1.7894235052910172e-05, "loss": 0.3962, "step": 9510 }, { "epoch": 0.2104863417997338, "grad_norm": 1.4919954538345337, "learning_rate": 1.7892101509198502e-05, "loss": 0.3684, "step": 9515 }, { "epoch": 0.21059694944124707, "grad_norm": 1.756209373474121, "learning_rate": 1.788996701251804e-05, "loss": 0.3791, "step": 9520 }, { "epoch": 0.21070755708276032, "grad_norm": 1.1807959079742432, "learning_rate": 1.7887831563126517e-05, "loss": 0.438, "step": 9525 }, { "epoch": 0.2108181647242736, "grad_norm": 2.1257290840148926, "learning_rate": 1.78856951612818e-05, "loss": 0.3356, "step": 9530 }, { "epoch": 0.21092877236578683, "grad_norm": 1.1826213598251343, "learning_rate": 1.7883557807241852e-05, "loss": 0.3581, "step": 9535 }, { "epoch": 0.2110393800073001, "grad_norm": 1.663801670074463, "learning_rate": 1.7881419501264754e-05, "loss": 0.4344, "step": 9540 }, { "epoch": 0.21114998764881338, "grad_norm": 1.6361910104751587, "learning_rate": 1.7879280243608714e-05, "loss": 0.3526, "step": 9545 }, { "epoch": 0.21126059529032662, "grad_norm": 0.7702332735061646, "learning_rate": 1.7877140034532044e-05, "loss": 0.309, "step": 9550 }, { "epoch": 0.2113712029318399, "grad_norm": 2.2901432514190674, "learning_rate": 1.787499887429317e-05, "loss": 0.4658, "step": 9555 }, { "epoch": 0.21148181057335314, "grad_norm": 1.337119221687317, "learning_rate": 1.787285676315064e-05, "loss": 0.2624, "step": 9560 }, { "epoch": 0.2115924182148664, "grad_norm": 1.2793349027633667, "learning_rate": 1.7870713701363115e-05, "loss": 0.2954, "step": 9565 }, { "epoch": 0.21170302585637965, "grad_norm": 0.9672450423240662, "learning_rate": 1.7868569689189363e-05, "loss": 0.3258, "step": 9570 }, { "epoch": 0.21181363349789292, "grad_norm": 0.9241244196891785, "learning_rate": 1.7866424726888277e-05, "loss": 0.5247, "step": 9575 }, { "epoch": 0.2119242411394062, "grad_norm": 1.3672188520431519, "learning_rate": 1.786427881471886e-05, "loss": 0.4865, "step": 9580 }, { "epoch": 0.21203484878091944, "grad_norm": 1.4688724279403687, "learning_rate": 1.7862131952940232e-05, "loss": 0.3591, "step": 9585 }, { "epoch": 0.2121454564224327, "grad_norm": 1.2961400747299194, "learning_rate": 1.7859984141811623e-05, "loss": 0.3365, "step": 9590 }, { "epoch": 0.21225606406394595, "grad_norm": 1.6778720617294312, "learning_rate": 1.7857835381592377e-05, "loss": 0.4063, "step": 9595 }, { "epoch": 0.21236667170545923, "grad_norm": 0.9518163204193115, "learning_rate": 1.7855685672541966e-05, "loss": 0.5415, "step": 9600 }, { "epoch": 0.2124772793469725, "grad_norm": 1.163583755493164, "learning_rate": 1.7853535014919957e-05, "loss": 0.4067, "step": 9605 }, { "epoch": 0.21258788698848574, "grad_norm": 1.7011481523513794, "learning_rate": 1.785138340898605e-05, "loss": 0.47, "step": 9610 }, { "epoch": 0.212698494629999, "grad_norm": 1.7860008478164673, "learning_rate": 1.784923085500004e-05, "loss": 0.3804, "step": 9615 }, { "epoch": 0.21280910227151226, "grad_norm": 1.5781924724578857, "learning_rate": 1.784707735322186e-05, "loss": 0.3579, "step": 9620 }, { "epoch": 0.21291970991302553, "grad_norm": 1.6910912990570068, "learning_rate": 1.784492290391153e-05, "loss": 0.2633, "step": 9625 }, { "epoch": 0.21303031755453877, "grad_norm": 1.135952353477478, "learning_rate": 1.7842767507329216e-05, "loss": 0.3757, "step": 9630 }, { "epoch": 0.21314092519605204, "grad_norm": 1.0815389156341553, "learning_rate": 1.7840611163735172e-05, "loss": 0.383, "step": 9635 }, { "epoch": 0.21325153283756532, "grad_norm": 2.001309394836426, "learning_rate": 1.783845387338977e-05, "loss": 0.4409, "step": 9640 }, { "epoch": 0.21336214047907856, "grad_norm": 1.5623769760131836, "learning_rate": 1.7836295636553514e-05, "loss": 0.3124, "step": 9645 }, { "epoch": 0.21347274812059183, "grad_norm": 1.8041331768035889, "learning_rate": 1.7834136453487e-05, "loss": 0.3219, "step": 9650 }, { "epoch": 0.21358335576210508, "grad_norm": 1.6341661214828491, "learning_rate": 1.783197632445096e-05, "loss": 0.4213, "step": 9655 }, { "epoch": 0.21369396340361835, "grad_norm": 1.461215615272522, "learning_rate": 1.782981524970622e-05, "loss": 0.3338, "step": 9660 }, { "epoch": 0.2138045710451316, "grad_norm": 1.0043234825134277, "learning_rate": 1.7827653229513728e-05, "loss": 0.4744, "step": 9665 }, { "epoch": 0.21391517868664486, "grad_norm": 1.0017890930175781, "learning_rate": 1.7825490264134553e-05, "loss": 0.432, "step": 9670 }, { "epoch": 0.21402578632815814, "grad_norm": 1.1280218362808228, "learning_rate": 1.782332635382987e-05, "loss": 0.405, "step": 9675 }, { "epoch": 0.21413639396967138, "grad_norm": 1.407751441001892, "learning_rate": 1.7821161498860974e-05, "loss": 0.2798, "step": 9680 }, { "epoch": 0.21424700161118465, "grad_norm": 0.9426209330558777, "learning_rate": 1.7818995699489264e-05, "loss": 0.3693, "step": 9685 }, { "epoch": 0.2143576092526979, "grad_norm": 2.0216660499572754, "learning_rate": 1.7816828955976263e-05, "loss": 0.4579, "step": 9690 }, { "epoch": 0.21446821689421117, "grad_norm": 1.2463113069534302, "learning_rate": 1.7814661268583608e-05, "loss": 0.3583, "step": 9695 }, { "epoch": 0.21457882453572444, "grad_norm": 1.4072421789169312, "learning_rate": 1.781249263757304e-05, "loss": 0.4337, "step": 9700 }, { "epoch": 0.21468943217723768, "grad_norm": 1.3831170797348022, "learning_rate": 1.7810323063206426e-05, "loss": 0.3888, "step": 9705 }, { "epoch": 0.21480003981875095, "grad_norm": 1.4265437126159668, "learning_rate": 1.7808152545745736e-05, "loss": 0.4706, "step": 9710 }, { "epoch": 0.2149106474602642, "grad_norm": 1.1716703176498413, "learning_rate": 1.7805981085453065e-05, "loss": 0.3682, "step": 9715 }, { "epoch": 0.21502125510177747, "grad_norm": 1.0097942352294922, "learning_rate": 1.7803808682590614e-05, "loss": 0.2987, "step": 9720 }, { "epoch": 0.2151318627432907, "grad_norm": 1.6955162286758423, "learning_rate": 1.78016353374207e-05, "loss": 0.4493, "step": 9725 }, { "epoch": 0.21524247038480399, "grad_norm": 1.067857265472412, "learning_rate": 1.7799461050205748e-05, "loss": 0.4645, "step": 9730 }, { "epoch": 0.21535307802631726, "grad_norm": 1.124284267425537, "learning_rate": 1.779728582120831e-05, "loss": 0.2342, "step": 9735 }, { "epoch": 0.2154636856678305, "grad_norm": 1.0202727317810059, "learning_rate": 1.7795109650691045e-05, "loss": 0.3782, "step": 9740 }, { "epoch": 0.21557429330934377, "grad_norm": 1.6134871244430542, "learning_rate": 1.779293253891672e-05, "loss": 0.481, "step": 9745 }, { "epoch": 0.21568490095085702, "grad_norm": 1.6068413257598877, "learning_rate": 1.7790754486148224e-05, "loss": 0.3542, "step": 9750 }, { "epoch": 0.2157955085923703, "grad_norm": 1.2648273706436157, "learning_rate": 1.7788575492648554e-05, "loss": 0.2772, "step": 9755 }, { "epoch": 0.21590611623388353, "grad_norm": 1.2343422174453735, "learning_rate": 1.7786395558680824e-05, "loss": 0.3234, "step": 9760 }, { "epoch": 0.2160167238753968, "grad_norm": 1.2243711948394775, "learning_rate": 1.7784214684508256e-05, "loss": 0.4029, "step": 9765 }, { "epoch": 0.21612733151691008, "grad_norm": 0.9226768016815186, "learning_rate": 1.77820328703942e-05, "loss": 0.3241, "step": 9770 }, { "epoch": 0.21623793915842332, "grad_norm": 1.8557233810424805, "learning_rate": 1.77798501166021e-05, "loss": 0.3768, "step": 9775 }, { "epoch": 0.2163485467999366, "grad_norm": 1.5671035051345825, "learning_rate": 1.7777666423395522e-05, "loss": 0.3733, "step": 9780 }, { "epoch": 0.21645915444144984, "grad_norm": 1.4180763959884644, "learning_rate": 1.7775481791038155e-05, "loss": 0.3074, "step": 9785 }, { "epoch": 0.2165697620829631, "grad_norm": 1.323614478111267, "learning_rate": 1.7773296219793788e-05, "loss": 0.4133, "step": 9790 }, { "epoch": 0.21668036972447635, "grad_norm": 1.4482744932174683, "learning_rate": 1.7771109709926327e-05, "loss": 0.434, "step": 9795 }, { "epoch": 0.21679097736598962, "grad_norm": 1.059078574180603, "learning_rate": 1.776892226169979e-05, "loss": 0.3633, "step": 9800 }, { "epoch": 0.2169015850075029, "grad_norm": 1.88442862033844, "learning_rate": 1.776673387537832e-05, "loss": 0.4477, "step": 9805 }, { "epoch": 0.21701219264901614, "grad_norm": 1.2856810092926025, "learning_rate": 1.776454455122615e-05, "loss": 0.3468, "step": 9810 }, { "epoch": 0.2171228002905294, "grad_norm": 2.2430243492126465, "learning_rate": 1.7762354289507654e-05, "loss": 0.3638, "step": 9815 }, { "epoch": 0.21723340793204265, "grad_norm": 1.538088321685791, "learning_rate": 1.77601630904873e-05, "loss": 0.4184, "step": 9820 }, { "epoch": 0.21734401557355593, "grad_norm": 1.1065983772277832, "learning_rate": 1.7757970954429674e-05, "loss": 0.456, "step": 9825 }, { "epoch": 0.2174546232150692, "grad_norm": 1.1314496994018555, "learning_rate": 1.7755777881599475e-05, "loss": 0.5194, "step": 9830 }, { "epoch": 0.21756523085658244, "grad_norm": 1.1139181852340698, "learning_rate": 1.7753583872261514e-05, "loss": 0.3871, "step": 9835 }, { "epoch": 0.21767583849809571, "grad_norm": 1.3216278553009033, "learning_rate": 1.775138892668072e-05, "loss": 0.3373, "step": 9840 }, { "epoch": 0.21778644613960896, "grad_norm": 1.1653101444244385, "learning_rate": 1.7749193045122137e-05, "loss": 0.4546, "step": 9845 }, { "epoch": 0.21789705378112223, "grad_norm": 1.9185738563537598, "learning_rate": 1.7746996227850908e-05, "loss": 0.3023, "step": 9850 }, { "epoch": 0.21800766142263547, "grad_norm": 1.5992302894592285, "learning_rate": 1.77447984751323e-05, "loss": 0.3599, "step": 9855 }, { "epoch": 0.21811826906414875, "grad_norm": 1.326263427734375, "learning_rate": 1.7742599787231695e-05, "loss": 0.4136, "step": 9860 }, { "epoch": 0.21822887670566202, "grad_norm": 1.358125925064087, "learning_rate": 1.7740400164414584e-05, "loss": 0.4523, "step": 9865 }, { "epoch": 0.21833948434717526, "grad_norm": 2.499098539352417, "learning_rate": 1.7738199606946564e-05, "loss": 0.3792, "step": 9870 }, { "epoch": 0.21845009198868853, "grad_norm": 1.5233778953552246, "learning_rate": 1.773599811509336e-05, "loss": 0.3467, "step": 9875 }, { "epoch": 0.21856069963020178, "grad_norm": 1.1217293739318848, "learning_rate": 1.773379568912079e-05, "loss": 0.3234, "step": 9880 }, { "epoch": 0.21867130727171505, "grad_norm": 1.5181266069412231, "learning_rate": 1.7731592329294812e-05, "loss": 0.3541, "step": 9885 }, { "epoch": 0.2187819149132283, "grad_norm": 1.1381466388702393, "learning_rate": 1.7729388035881468e-05, "loss": 0.2367, "step": 9890 }, { "epoch": 0.21889252255474156, "grad_norm": 2.2413220405578613, "learning_rate": 1.772718280914693e-05, "loss": 0.3767, "step": 9895 }, { "epoch": 0.21900313019625484, "grad_norm": 1.477082371711731, "learning_rate": 1.7724976649357485e-05, "loss": 0.5718, "step": 9900 }, { "epoch": 0.21911373783776808, "grad_norm": 1.5074187517166138, "learning_rate": 1.7722769556779514e-05, "loss": 0.4478, "step": 9905 }, { "epoch": 0.21922434547928135, "grad_norm": 0.810518741607666, "learning_rate": 1.7720561531679533e-05, "loss": 0.3719, "step": 9910 }, { "epoch": 0.2193349531207946, "grad_norm": 1.5132691860198975, "learning_rate": 1.7718352574324152e-05, "loss": 0.4699, "step": 9915 }, { "epoch": 0.21944556076230787, "grad_norm": 1.5228404998779297, "learning_rate": 1.7716142684980108e-05, "loss": 0.3031, "step": 9920 }, { "epoch": 0.21955616840382114, "grad_norm": 1.3604463338851929, "learning_rate": 1.771393186391424e-05, "loss": 0.4064, "step": 9925 }, { "epoch": 0.21966677604533438, "grad_norm": 1.5151939392089844, "learning_rate": 1.771172011139351e-05, "loss": 0.3561, "step": 9930 }, { "epoch": 0.21977738368684765, "grad_norm": 1.7731273174285889, "learning_rate": 1.770950742768498e-05, "loss": 0.3781, "step": 9935 }, { "epoch": 0.2198879913283609, "grad_norm": 1.2217817306518555, "learning_rate": 1.7707293813055833e-05, "loss": 0.4549, "step": 9940 }, { "epoch": 0.21999859896987417, "grad_norm": 1.5973525047302246, "learning_rate": 1.7705079267773366e-05, "loss": 0.4619, "step": 9945 }, { "epoch": 0.22010920661138741, "grad_norm": 1.494006872177124, "learning_rate": 1.770286379210498e-05, "loss": 0.4938, "step": 9950 }, { "epoch": 0.2202198142529007, "grad_norm": 1.9191972017288208, "learning_rate": 1.7700647386318195e-05, "loss": 0.3511, "step": 9955 }, { "epoch": 0.22033042189441396, "grad_norm": 1.5986005067825317, "learning_rate": 1.769843005068064e-05, "loss": 0.3417, "step": 9960 }, { "epoch": 0.2204410295359272, "grad_norm": 1.8043824434280396, "learning_rate": 1.769621178546006e-05, "loss": 0.4267, "step": 9965 }, { "epoch": 0.22055163717744047, "grad_norm": 1.8786505460739136, "learning_rate": 1.7693992590924308e-05, "loss": 0.3511, "step": 9970 }, { "epoch": 0.22066224481895372, "grad_norm": 1.5055198669433594, "learning_rate": 1.769177246734135e-05, "loss": 0.3065, "step": 9975 }, { "epoch": 0.220772852460467, "grad_norm": 0.7981667518615723, "learning_rate": 1.7689551414979266e-05, "loss": 0.3236, "step": 9980 }, { "epoch": 0.22088346010198023, "grad_norm": 1.3244142532348633, "learning_rate": 1.7687329434106247e-05, "loss": 0.3494, "step": 9985 }, { "epoch": 0.2209940677434935, "grad_norm": 1.0816394090652466, "learning_rate": 1.76851065249906e-05, "loss": 0.3905, "step": 9990 }, { "epoch": 0.22110467538500678, "grad_norm": 2.816422462463379, "learning_rate": 1.768288268790074e-05, "loss": 0.3549, "step": 9995 }, { "epoch": 0.22121528302652002, "grad_norm": 1.0841871500015259, "learning_rate": 1.768065792310519e-05, "loss": 0.3494, "step": 10000 }, { "epoch": 0.2213258906680333, "grad_norm": 1.2636303901672363, "learning_rate": 1.767843223087259e-05, "loss": 0.2631, "step": 10005 }, { "epoch": 0.22143649830954654, "grad_norm": 1.6538304090499878, "learning_rate": 1.7676205611471698e-05, "loss": 0.345, "step": 10010 }, { "epoch": 0.2215471059510598, "grad_norm": 1.8803596496582031, "learning_rate": 1.7673978065171373e-05, "loss": 0.4194, "step": 10015 }, { "epoch": 0.22165771359257305, "grad_norm": 1.5538609027862549, "learning_rate": 1.7671749592240588e-05, "loss": 0.5964, "step": 10020 }, { "epoch": 0.22176832123408632, "grad_norm": 0.9472442865371704, "learning_rate": 1.7669520192948436e-05, "loss": 0.4222, "step": 10025 }, { "epoch": 0.2218789288755996, "grad_norm": 1.3935292959213257, "learning_rate": 1.7667289867564114e-05, "loss": 0.3166, "step": 10030 }, { "epoch": 0.22198953651711284, "grad_norm": 1.1348661184310913, "learning_rate": 1.766505861635693e-05, "loss": 0.3977, "step": 10035 }, { "epoch": 0.2221001441586261, "grad_norm": 1.4207206964492798, "learning_rate": 1.7662826439596314e-05, "loss": 0.5402, "step": 10040 }, { "epoch": 0.22221075180013936, "grad_norm": 1.2880958318710327, "learning_rate": 1.7660593337551793e-05, "loss": 0.4372, "step": 10045 }, { "epoch": 0.22232135944165263, "grad_norm": 1.1345138549804688, "learning_rate": 1.765835931049302e-05, "loss": 0.4293, "step": 10050 }, { "epoch": 0.2224319670831659, "grad_norm": 1.5881539583206177, "learning_rate": 1.7656124358689746e-05, "loss": 0.4637, "step": 10055 }, { "epoch": 0.22254257472467914, "grad_norm": 3.5476315021514893, "learning_rate": 1.765388848241185e-05, "loss": 0.2179, "step": 10060 }, { "epoch": 0.22265318236619241, "grad_norm": 1.5679768323898315, "learning_rate": 1.7651651681929304e-05, "loss": 0.3787, "step": 10065 }, { "epoch": 0.22276379000770566, "grad_norm": 2.0091440677642822, "learning_rate": 1.7649413957512206e-05, "loss": 0.4416, "step": 10070 }, { "epoch": 0.22287439764921893, "grad_norm": 1.7008088827133179, "learning_rate": 1.764717530943076e-05, "loss": 0.3395, "step": 10075 }, { "epoch": 0.22298500529073217, "grad_norm": 1.2185156345367432, "learning_rate": 1.764493573795528e-05, "loss": 0.2157, "step": 10080 }, { "epoch": 0.22309561293224545, "grad_norm": 1.1030876636505127, "learning_rate": 1.7642695243356194e-05, "loss": 0.541, "step": 10085 }, { "epoch": 0.22320622057375872, "grad_norm": 1.2915509939193726, "learning_rate": 1.7640453825904045e-05, "loss": 0.3136, "step": 10090 }, { "epoch": 0.22331682821527196, "grad_norm": 1.2374045848846436, "learning_rate": 1.763821148586948e-05, "loss": 0.3485, "step": 10095 }, { "epoch": 0.22342743585678523, "grad_norm": 2.4123425483703613, "learning_rate": 1.7635968223523258e-05, "loss": 0.6231, "step": 10100 }, { "epoch": 0.22353804349829848, "grad_norm": 1.7569630146026611, "learning_rate": 1.763372403913626e-05, "loss": 0.4675, "step": 10105 }, { "epoch": 0.22364865113981175, "grad_norm": 1.5150114297866821, "learning_rate": 1.763147893297946e-05, "loss": 0.442, "step": 10110 }, { "epoch": 0.223759258781325, "grad_norm": 1.8109616041183472, "learning_rate": 1.7629232905323967e-05, "loss": 0.4355, "step": 10115 }, { "epoch": 0.22386986642283827, "grad_norm": 1.5120307207107544, "learning_rate": 1.7626985956440976e-05, "loss": 0.4448, "step": 10120 }, { "epoch": 0.22398047406435154, "grad_norm": 1.3662647008895874, "learning_rate": 1.7624738086601816e-05, "loss": 0.354, "step": 10125 }, { "epoch": 0.22409108170586478, "grad_norm": 1.1018264293670654, "learning_rate": 1.7622489296077906e-05, "loss": 0.3466, "step": 10130 }, { "epoch": 0.22420168934737805, "grad_norm": 1.4379113912582397, "learning_rate": 1.7620239585140793e-05, "loss": 0.3413, "step": 10135 }, { "epoch": 0.2243122969888913, "grad_norm": 1.744497537612915, "learning_rate": 1.761798895406213e-05, "loss": 0.4429, "step": 10140 }, { "epoch": 0.22442290463040457, "grad_norm": 1.5083794593811035, "learning_rate": 1.761573740311368e-05, "loss": 0.5018, "step": 10145 }, { "epoch": 0.2245335122719178, "grad_norm": 1.875996470451355, "learning_rate": 1.761348493256731e-05, "loss": 0.437, "step": 10150 }, { "epoch": 0.22464411991343108, "grad_norm": 1.4901413917541504, "learning_rate": 1.761123154269501e-05, "loss": 0.3818, "step": 10155 }, { "epoch": 0.22475472755494436, "grad_norm": 1.8574743270874023, "learning_rate": 1.7608977233768877e-05, "loss": 0.4881, "step": 10160 }, { "epoch": 0.2248653351964576, "grad_norm": 1.5818549394607544, "learning_rate": 1.760672200606112e-05, "loss": 0.544, "step": 10165 }, { "epoch": 0.22497594283797087, "grad_norm": 1.0008360147476196, "learning_rate": 1.760446585984405e-05, "loss": 0.4431, "step": 10170 }, { "epoch": 0.22508655047948412, "grad_norm": 1.0828427076339722, "learning_rate": 1.76022087953901e-05, "loss": 0.4629, "step": 10175 }, { "epoch": 0.2251971581209974, "grad_norm": 0.9905200600624084, "learning_rate": 1.7599950812971815e-05, "loss": 0.3161, "step": 10180 }, { "epoch": 0.22530776576251066, "grad_norm": 1.1888501644134521, "learning_rate": 1.7597691912861835e-05, "loss": 0.4152, "step": 10185 }, { "epoch": 0.2254183734040239, "grad_norm": 1.4137482643127441, "learning_rate": 1.7595432095332933e-05, "loss": 0.4559, "step": 10190 }, { "epoch": 0.22552898104553717, "grad_norm": 1.0545746088027954, "learning_rate": 1.759317136065797e-05, "loss": 0.3249, "step": 10195 }, { "epoch": 0.22563958868705042, "grad_norm": 0.8451319336891174, "learning_rate": 1.7590909709109938e-05, "loss": 0.4049, "step": 10200 }, { "epoch": 0.2257501963285637, "grad_norm": 1.3550080060958862, "learning_rate": 1.7588647140961924e-05, "loss": 0.446, "step": 10205 }, { "epoch": 0.22586080397007693, "grad_norm": 1.4622598886489868, "learning_rate": 1.758638365648714e-05, "loss": 0.5063, "step": 10210 }, { "epoch": 0.2259714116115902, "grad_norm": 1.6535378694534302, "learning_rate": 1.7584119255958895e-05, "loss": 0.3566, "step": 10215 }, { "epoch": 0.22608201925310348, "grad_norm": 1.3344388008117676, "learning_rate": 1.7581853939650613e-05, "loss": 0.4533, "step": 10220 }, { "epoch": 0.22619262689461672, "grad_norm": 1.3897053003311157, "learning_rate": 1.7579587707835837e-05, "loss": 0.3331, "step": 10225 }, { "epoch": 0.22630323453613, "grad_norm": 1.7256669998168945, "learning_rate": 1.757732056078821e-05, "loss": 0.3762, "step": 10230 }, { "epoch": 0.22641384217764324, "grad_norm": 1.2370744943618774, "learning_rate": 1.7575052498781486e-05, "loss": 0.34, "step": 10235 }, { "epoch": 0.2265244498191565, "grad_norm": 1.4254302978515625, "learning_rate": 1.757278352208954e-05, "loss": 0.435, "step": 10240 }, { "epoch": 0.22663505746066975, "grad_norm": 1.5017786026000977, "learning_rate": 1.7570513630986345e-05, "loss": 0.2356, "step": 10245 }, { "epoch": 0.22674566510218302, "grad_norm": 1.534530758857727, "learning_rate": 1.7568242825745993e-05, "loss": 0.3675, "step": 10250 }, { "epoch": 0.2268562727436963, "grad_norm": 1.3725621700286865, "learning_rate": 1.756597110664268e-05, "loss": 0.3083, "step": 10255 }, { "epoch": 0.22696688038520954, "grad_norm": 2.0299415588378906, "learning_rate": 1.7563698473950716e-05, "loss": 0.3414, "step": 10260 }, { "epoch": 0.2270774880267228, "grad_norm": 1.2475658655166626, "learning_rate": 1.7561424927944523e-05, "loss": 0.4203, "step": 10265 }, { "epoch": 0.22718809566823606, "grad_norm": 1.5104402303695679, "learning_rate": 1.7559150468898628e-05, "loss": 0.4003, "step": 10270 }, { "epoch": 0.22729870330974933, "grad_norm": 1.7779219150543213, "learning_rate": 1.7556875097087675e-05, "loss": 0.4191, "step": 10275 }, { "epoch": 0.2274093109512626, "grad_norm": 0.9860856533050537, "learning_rate": 1.755459881278641e-05, "loss": 0.3978, "step": 10280 }, { "epoch": 0.22751991859277584, "grad_norm": 1.283320426940918, "learning_rate": 1.7552321616269695e-05, "loss": 0.3511, "step": 10285 }, { "epoch": 0.22763052623428912, "grad_norm": 1.4692749977111816, "learning_rate": 1.75500435078125e-05, "loss": 0.4639, "step": 10290 }, { "epoch": 0.22774113387580236, "grad_norm": 1.0828791856765747, "learning_rate": 1.7547764487689912e-05, "loss": 0.2602, "step": 10295 }, { "epoch": 0.22785174151731563, "grad_norm": 1.5939462184906006, "learning_rate": 1.754548455617711e-05, "loss": 0.3451, "step": 10300 }, { "epoch": 0.22796234915882888, "grad_norm": 1.2408337593078613, "learning_rate": 1.7543203713549408e-05, "loss": 0.4341, "step": 10305 }, { "epoch": 0.22807295680034215, "grad_norm": 1.6569712162017822, "learning_rate": 1.754092196008221e-05, "loss": 0.294, "step": 10310 }, { "epoch": 0.22818356444185542, "grad_norm": 1.8954427242279053, "learning_rate": 1.7538639296051037e-05, "loss": 0.508, "step": 10315 }, { "epoch": 0.22829417208336866, "grad_norm": 1.3153976202011108, "learning_rate": 1.7536355721731523e-05, "loss": 0.3052, "step": 10320 }, { "epoch": 0.22840477972488193, "grad_norm": 2.438448429107666, "learning_rate": 1.7534071237399408e-05, "loss": 0.4158, "step": 10325 }, { "epoch": 0.22851538736639518, "grad_norm": 0.5768797397613525, "learning_rate": 1.753178584333054e-05, "loss": 0.3371, "step": 10330 }, { "epoch": 0.22862599500790845, "grad_norm": 1.6865148544311523, "learning_rate": 1.752949953980088e-05, "loss": 0.4072, "step": 10335 }, { "epoch": 0.2287366026494217, "grad_norm": 1.6379910707473755, "learning_rate": 1.75272123270865e-05, "loss": 0.3684, "step": 10340 }, { "epoch": 0.22884721029093497, "grad_norm": 1.139078140258789, "learning_rate": 1.7524924205463584e-05, "loss": 0.376, "step": 10345 }, { "epoch": 0.22895781793244824, "grad_norm": 1.099472999572754, "learning_rate": 1.7522635175208416e-05, "loss": 0.4563, "step": 10350 }, { "epoch": 0.22906842557396148, "grad_norm": 0.9066113829612732, "learning_rate": 1.75203452365974e-05, "loss": 0.3833, "step": 10355 }, { "epoch": 0.22917903321547475, "grad_norm": 1.7458000183105469, "learning_rate": 1.7518054389907035e-05, "loss": 0.2883, "step": 10360 }, { "epoch": 0.229289640856988, "grad_norm": 1.8183164596557617, "learning_rate": 1.7515762635413954e-05, "loss": 0.4435, "step": 10365 }, { "epoch": 0.22940024849850127, "grad_norm": 1.602959156036377, "learning_rate": 1.7513469973394884e-05, "loss": 0.4311, "step": 10370 }, { "epoch": 0.2295108561400145, "grad_norm": 1.3969175815582275, "learning_rate": 1.7511176404126653e-05, "loss": 0.3113, "step": 10375 }, { "epoch": 0.22962146378152778, "grad_norm": 1.2434093952178955, "learning_rate": 1.750888192788622e-05, "loss": 0.4052, "step": 10380 }, { "epoch": 0.22973207142304106, "grad_norm": 1.0423178672790527, "learning_rate": 1.750658654495063e-05, "loss": 0.3843, "step": 10385 }, { "epoch": 0.2298426790645543, "grad_norm": 1.5855064392089844, "learning_rate": 1.750429025559706e-05, "loss": 0.4055, "step": 10390 }, { "epoch": 0.22995328670606757, "grad_norm": 1.3574830293655396, "learning_rate": 1.7501993060102785e-05, "loss": 0.3271, "step": 10395 }, { "epoch": 0.23006389434758082, "grad_norm": 1.3258748054504395, "learning_rate": 1.7499694958745187e-05, "loss": 0.4001, "step": 10400 }, { "epoch": 0.2301745019890941, "grad_norm": 1.3631223440170288, "learning_rate": 1.7497395951801764e-05, "loss": 0.3932, "step": 10405 }, { "epoch": 0.23028510963060736, "grad_norm": 1.511702299118042, "learning_rate": 1.749509603955012e-05, "loss": 0.3297, "step": 10410 }, { "epoch": 0.2303957172721206, "grad_norm": 1.1352354288101196, "learning_rate": 1.7492795222267967e-05, "loss": 0.4622, "step": 10415 }, { "epoch": 0.23050632491363388, "grad_norm": 1.1668766736984253, "learning_rate": 1.749049350023313e-05, "loss": 0.3752, "step": 10420 }, { "epoch": 0.23061693255514712, "grad_norm": 1.1158032417297363, "learning_rate": 1.7488190873723537e-05, "loss": 0.3881, "step": 10425 }, { "epoch": 0.2307275401966604, "grad_norm": 1.6391711235046387, "learning_rate": 1.7485887343017233e-05, "loss": 0.2836, "step": 10430 }, { "epoch": 0.23083814783817364, "grad_norm": 1.6133570671081543, "learning_rate": 1.748358290839237e-05, "loss": 0.4274, "step": 10435 }, { "epoch": 0.2309487554796869, "grad_norm": 1.4469060897827148, "learning_rate": 1.7481277570127203e-05, "loss": 0.4428, "step": 10440 }, { "epoch": 0.23105936312120018, "grad_norm": 1.295979380607605, "learning_rate": 1.747897132850011e-05, "loss": 0.4071, "step": 10445 }, { "epoch": 0.23116997076271342, "grad_norm": 1.104712963104248, "learning_rate": 1.7476664183789558e-05, "loss": 0.4281, "step": 10450 }, { "epoch": 0.2312805784042267, "grad_norm": 1.39009428024292, "learning_rate": 1.7474356136274136e-05, "loss": 0.5431, "step": 10455 }, { "epoch": 0.23139118604573994, "grad_norm": 1.3048909902572632, "learning_rate": 1.7472047186232547e-05, "loss": 0.5002, "step": 10460 }, { "epoch": 0.2315017936872532, "grad_norm": 1.1296603679656982, "learning_rate": 1.7469737333943594e-05, "loss": 0.379, "step": 10465 }, { "epoch": 0.23161240132876645, "grad_norm": 1.8514255285263062, "learning_rate": 1.746742657968618e-05, "loss": 0.4214, "step": 10470 }, { "epoch": 0.23172300897027973, "grad_norm": 2.110450029373169, "learning_rate": 1.7465114923739344e-05, "loss": 0.5025, "step": 10475 }, { "epoch": 0.231833616611793, "grad_norm": 1.4309475421905518, "learning_rate": 1.7462802366382208e-05, "loss": 0.4367, "step": 10480 }, { "epoch": 0.23194422425330624, "grad_norm": 1.1686954498291016, "learning_rate": 1.7460488907894017e-05, "loss": 0.4838, "step": 10485 }, { "epoch": 0.2320548318948195, "grad_norm": 1.387771725654602, "learning_rate": 1.7458174548554116e-05, "loss": 0.3668, "step": 10490 }, { "epoch": 0.23216543953633276, "grad_norm": 1.997103214263916, "learning_rate": 1.7455859288641965e-05, "loss": 0.4444, "step": 10495 }, { "epoch": 0.23227604717784603, "grad_norm": 1.7381103038787842, "learning_rate": 1.7453543128437133e-05, "loss": 0.4942, "step": 10500 }, { "epoch": 0.23238665481935927, "grad_norm": 2.593492269515991, "learning_rate": 1.7451226068219293e-05, "loss": 0.4803, "step": 10505 }, { "epoch": 0.23249726246087254, "grad_norm": 1.8579232692718506, "learning_rate": 1.744890810826823e-05, "loss": 0.3042, "step": 10510 }, { "epoch": 0.23260787010238582, "grad_norm": 1.258059024810791, "learning_rate": 1.7446589248863833e-05, "loss": 0.4326, "step": 10515 }, { "epoch": 0.23271847774389906, "grad_norm": 1.7894291877746582, "learning_rate": 1.7444269490286113e-05, "loss": 0.3183, "step": 10520 }, { "epoch": 0.23282908538541233, "grad_norm": 0.7208828330039978, "learning_rate": 1.7441948832815172e-05, "loss": 0.3946, "step": 10525 }, { "epoch": 0.23293969302692558, "grad_norm": 1.3851594924926758, "learning_rate": 1.7439627276731236e-05, "loss": 0.3225, "step": 10530 }, { "epoch": 0.23305030066843885, "grad_norm": 1.527173399925232, "learning_rate": 1.7437304822314623e-05, "loss": 0.3729, "step": 10535 }, { "epoch": 0.23316090830995212, "grad_norm": 1.2432119846343994, "learning_rate": 1.7434981469845774e-05, "loss": 0.2762, "step": 10540 }, { "epoch": 0.23327151595146536, "grad_norm": 1.5824341773986816, "learning_rate": 1.7432657219605237e-05, "loss": 0.4088, "step": 10545 }, { "epoch": 0.23338212359297864, "grad_norm": 1.704729676246643, "learning_rate": 1.743033207187365e-05, "loss": 0.4274, "step": 10550 }, { "epoch": 0.23349273123449188, "grad_norm": 1.1487818956375122, "learning_rate": 1.7428006026931795e-05, "loss": 0.4106, "step": 10555 }, { "epoch": 0.23360333887600515, "grad_norm": 1.5381821393966675, "learning_rate": 1.7425679085060527e-05, "loss": 0.3532, "step": 10560 }, { "epoch": 0.2337139465175184, "grad_norm": 1.786310076713562, "learning_rate": 1.742335124654082e-05, "loss": 0.4339, "step": 10565 }, { "epoch": 0.23382455415903167, "grad_norm": 1.1579211950302124, "learning_rate": 1.742102251165377e-05, "loss": 0.3274, "step": 10570 }, { "epoch": 0.23393516180054494, "grad_norm": 1.4606674909591675, "learning_rate": 1.741869288068057e-05, "loss": 0.3933, "step": 10575 }, { "epoch": 0.23404576944205818, "grad_norm": 1.2280464172363281, "learning_rate": 1.7416362353902516e-05, "loss": 0.2812, "step": 10580 }, { "epoch": 0.23415637708357145, "grad_norm": 1.2604116201400757, "learning_rate": 1.7414030931601023e-05, "loss": 0.3337, "step": 10585 }, { "epoch": 0.2342669847250847, "grad_norm": 1.4721554517745972, "learning_rate": 1.7411698614057612e-05, "loss": 0.38, "step": 10590 }, { "epoch": 0.23437759236659797, "grad_norm": 1.021384835243225, "learning_rate": 1.7409365401553903e-05, "loss": 0.3409, "step": 10595 }, { "epoch": 0.23448820000811121, "grad_norm": 1.7940980195999146, "learning_rate": 1.7407031294371633e-05, "loss": 0.2612, "step": 10600 }, { "epoch": 0.23459880764962449, "grad_norm": 1.4590873718261719, "learning_rate": 1.7404696292792644e-05, "loss": 0.2862, "step": 10605 }, { "epoch": 0.23470941529113776, "grad_norm": 0.8254291415214539, "learning_rate": 1.740236039709889e-05, "loss": 0.3814, "step": 10610 }, { "epoch": 0.234820022932651, "grad_norm": 1.342970371246338, "learning_rate": 1.7400023607572426e-05, "loss": 0.4591, "step": 10615 }, { "epoch": 0.23493063057416427, "grad_norm": 1.5399270057678223, "learning_rate": 1.739768592449542e-05, "loss": 0.3116, "step": 10620 }, { "epoch": 0.23504123821567752, "grad_norm": 1.8590092658996582, "learning_rate": 1.739534734815015e-05, "loss": 0.4094, "step": 10625 }, { "epoch": 0.2351518458571908, "grad_norm": 1.1797670125961304, "learning_rate": 1.7393007878818994e-05, "loss": 0.3061, "step": 10630 }, { "epoch": 0.23526245349870406, "grad_norm": 1.3304877281188965, "learning_rate": 1.7390667516784446e-05, "loss": 0.3736, "step": 10635 }, { "epoch": 0.2353730611402173, "grad_norm": 1.8482640981674194, "learning_rate": 1.7388326262329098e-05, "loss": 0.4478, "step": 10640 }, { "epoch": 0.23548366878173058, "grad_norm": 1.7391095161437988, "learning_rate": 1.738598411573566e-05, "loss": 0.4492, "step": 10645 }, { "epoch": 0.23559427642324382, "grad_norm": 1.7592558860778809, "learning_rate": 1.7383641077286945e-05, "loss": 0.3657, "step": 10650 }, { "epoch": 0.2357048840647571, "grad_norm": 1.7533173561096191, "learning_rate": 1.738129714726587e-05, "loss": 0.5021, "step": 10655 }, { "epoch": 0.23581549170627034, "grad_norm": 1.70650053024292, "learning_rate": 1.7378952325955472e-05, "loss": 0.5238, "step": 10660 }, { "epoch": 0.2359260993477836, "grad_norm": 1.6893752813339233, "learning_rate": 1.7376606613638882e-05, "loss": 0.3505, "step": 10665 }, { "epoch": 0.23603670698929688, "grad_norm": 1.507232666015625, "learning_rate": 1.7374260010599348e-05, "loss": 0.4143, "step": 10670 }, { "epoch": 0.23614731463081012, "grad_norm": 1.2834444046020508, "learning_rate": 1.7371912517120213e-05, "loss": 0.3254, "step": 10675 }, { "epoch": 0.2362579222723234, "grad_norm": 1.8349593877792358, "learning_rate": 1.7369564133484944e-05, "loss": 0.3422, "step": 10680 }, { "epoch": 0.23636852991383664, "grad_norm": 0.9516412019729614, "learning_rate": 1.7367214859977105e-05, "loss": 0.3836, "step": 10685 }, { "epoch": 0.2364791375553499, "grad_norm": 1.3593292236328125, "learning_rate": 1.7364864696880374e-05, "loss": 0.3146, "step": 10690 }, { "epoch": 0.23658974519686315, "grad_norm": 1.2046033143997192, "learning_rate": 1.7362513644478524e-05, "loss": 0.3324, "step": 10695 }, { "epoch": 0.23670035283837643, "grad_norm": 1.498342514038086, "learning_rate": 1.7360161703055453e-05, "loss": 0.399, "step": 10700 }, { "epoch": 0.2368109604798897, "grad_norm": 1.5499365329742432, "learning_rate": 1.7357808872895147e-05, "loss": 0.426, "step": 10705 }, { "epoch": 0.23692156812140294, "grad_norm": 1.4726475477218628, "learning_rate": 1.735545515428172e-05, "loss": 0.3798, "step": 10710 }, { "epoch": 0.23703217576291621, "grad_norm": 1.331188440322876, "learning_rate": 1.7353100547499378e-05, "loss": 0.3073, "step": 10715 }, { "epoch": 0.23714278340442946, "grad_norm": 1.0738976001739502, "learning_rate": 1.735074505283244e-05, "loss": 0.3757, "step": 10720 }, { "epoch": 0.23725339104594273, "grad_norm": 1.0402815341949463, "learning_rate": 1.7348388670565327e-05, "loss": 0.3429, "step": 10725 }, { "epoch": 0.23736399868745597, "grad_norm": 1.5380804538726807, "learning_rate": 1.7346031400982576e-05, "loss": 0.3097, "step": 10730 }, { "epoch": 0.23747460632896925, "grad_norm": 1.6395115852355957, "learning_rate": 1.7343673244368827e-05, "loss": 0.4966, "step": 10735 }, { "epoch": 0.23758521397048252, "grad_norm": 1.8737684488296509, "learning_rate": 1.7341314201008824e-05, "loss": 0.5224, "step": 10740 }, { "epoch": 0.23769582161199576, "grad_norm": 0.708790123462677, "learning_rate": 1.7338954271187422e-05, "loss": 0.2862, "step": 10745 }, { "epoch": 0.23780642925350903, "grad_norm": 1.4959138631820679, "learning_rate": 1.7336593455189584e-05, "loss": 0.4853, "step": 10750 }, { "epoch": 0.23791703689502228, "grad_norm": 1.915073275566101, "learning_rate": 1.7334231753300372e-05, "loss": 0.2422, "step": 10755 }, { "epoch": 0.23802764453653555, "grad_norm": 1.6255546808242798, "learning_rate": 1.7331869165804966e-05, "loss": 0.3394, "step": 10760 }, { "epoch": 0.23813825217804882, "grad_norm": 1.2094775438308716, "learning_rate": 1.7329505692988643e-05, "loss": 0.4244, "step": 10765 }, { "epoch": 0.23824885981956206, "grad_norm": 1.2456419467926025, "learning_rate": 1.73271413351368e-05, "loss": 0.4363, "step": 10770 }, { "epoch": 0.23835946746107534, "grad_norm": 1.4201081991195679, "learning_rate": 1.7324776092534922e-05, "loss": 0.3254, "step": 10775 }, { "epoch": 0.23847007510258858, "grad_norm": 1.1731668710708618, "learning_rate": 1.7322409965468618e-05, "loss": 0.3554, "step": 10780 }, { "epoch": 0.23858068274410185, "grad_norm": 1.5002868175506592, "learning_rate": 1.7320042954223596e-05, "loss": 0.3305, "step": 10785 }, { "epoch": 0.2386912903856151, "grad_norm": 1.711388111114502, "learning_rate": 1.731767505908567e-05, "loss": 0.4167, "step": 10790 }, { "epoch": 0.23880189802712837, "grad_norm": 1.2538114786148071, "learning_rate": 1.7315306280340766e-05, "loss": 0.3999, "step": 10795 }, { "epoch": 0.23891250566864164, "grad_norm": 1.6406806707382202, "learning_rate": 1.731293661827491e-05, "loss": 0.4508, "step": 10800 }, { "epoch": 0.23902311331015488, "grad_norm": 1.423883080482483, "learning_rate": 1.731056607317424e-05, "loss": 0.3475, "step": 10805 }, { "epoch": 0.23913372095166816, "grad_norm": 1.883152961730957, "learning_rate": 1.7308194645325e-05, "loss": 0.3938, "step": 10810 }, { "epoch": 0.2392443285931814, "grad_norm": 1.4561195373535156, "learning_rate": 1.730582233501353e-05, "loss": 0.2509, "step": 10815 }, { "epoch": 0.23935493623469467, "grad_norm": 13.324851989746094, "learning_rate": 1.73034491425263e-05, "loss": 0.3922, "step": 10820 }, { "epoch": 0.23946554387620791, "grad_norm": 1.2086148262023926, "learning_rate": 1.730107506814986e-05, "loss": 0.5639, "step": 10825 }, { "epoch": 0.2395761515177212, "grad_norm": 1.8051904439926147, "learning_rate": 1.7298700112170883e-05, "loss": 0.5602, "step": 10830 }, { "epoch": 0.23968675915923446, "grad_norm": 1.5092456340789795, "learning_rate": 1.729632427487615e-05, "loss": 0.4176, "step": 10835 }, { "epoch": 0.2397973668007477, "grad_norm": 1.1815009117126465, "learning_rate": 1.729394755655253e-05, "loss": 0.3455, "step": 10840 }, { "epoch": 0.23990797444226097, "grad_norm": 1.7553725242614746, "learning_rate": 1.7291569957487024e-05, "loss": 0.4189, "step": 10845 }, { "epoch": 0.24001858208377422, "grad_norm": 1.0259357690811157, "learning_rate": 1.7289191477966722e-05, "loss": 0.5275, "step": 10850 }, { "epoch": 0.2401291897252875, "grad_norm": 1.8252907991409302, "learning_rate": 1.7286812118278818e-05, "loss": 0.4412, "step": 10855 }, { "epoch": 0.24023979736680076, "grad_norm": 1.5009883642196655, "learning_rate": 1.728443187871063e-05, "loss": 0.3775, "step": 10860 }, { "epoch": 0.240350405008314, "grad_norm": 0.999273419380188, "learning_rate": 1.7282050759549563e-05, "loss": 0.3382, "step": 10865 }, { "epoch": 0.24046101264982728, "grad_norm": 1.0401713848114014, "learning_rate": 1.727966876108314e-05, "loss": 0.3586, "step": 10870 }, { "epoch": 0.24057162029134052, "grad_norm": 1.555220365524292, "learning_rate": 1.727728588359898e-05, "loss": 0.3737, "step": 10875 }, { "epoch": 0.2406822279328538, "grad_norm": 2.004215955734253, "learning_rate": 1.7274902127384826e-05, "loss": 0.4317, "step": 10880 }, { "epoch": 0.24079283557436704, "grad_norm": 1.6900324821472168, "learning_rate": 1.7272517492728512e-05, "loss": 0.3008, "step": 10885 }, { "epoch": 0.2409034432158803, "grad_norm": 1.365142822265625, "learning_rate": 1.7270131979917977e-05, "loss": 0.4651, "step": 10890 }, { "epoch": 0.24101405085739358, "grad_norm": 1.592530608177185, "learning_rate": 1.7267745589241272e-05, "loss": 0.4977, "step": 10895 }, { "epoch": 0.24112465849890682, "grad_norm": 1.202248215675354, "learning_rate": 1.7265358320986555e-05, "loss": 0.3462, "step": 10900 }, { "epoch": 0.2412352661404201, "grad_norm": 1.5893068313598633, "learning_rate": 1.726297017544209e-05, "loss": 0.3956, "step": 10905 }, { "epoch": 0.24134587378193334, "grad_norm": 1.004711627960205, "learning_rate": 1.7260581152896244e-05, "loss": 0.4063, "step": 10910 }, { "epoch": 0.2414564814234466, "grad_norm": 1.2544007301330566, "learning_rate": 1.7258191253637483e-05, "loss": 0.3493, "step": 10915 }, { "epoch": 0.24156708906495986, "grad_norm": 1.470594048500061, "learning_rate": 1.7255800477954395e-05, "loss": 0.4833, "step": 10920 }, { "epoch": 0.24167769670647313, "grad_norm": 0.8379184603691101, "learning_rate": 1.725340882613567e-05, "loss": 0.4783, "step": 10925 }, { "epoch": 0.2417883043479864, "grad_norm": 1.5801730155944824, "learning_rate": 1.7251016298470086e-05, "loss": 0.38, "step": 10930 }, { "epoch": 0.24189891198949964, "grad_norm": 3.061105728149414, "learning_rate": 1.7248622895246547e-05, "loss": 0.4318, "step": 10935 }, { "epoch": 0.24200951963101291, "grad_norm": 1.7043342590332031, "learning_rate": 1.7246228616754055e-05, "loss": 0.3746, "step": 10940 }, { "epoch": 0.24212012727252616, "grad_norm": 1.2154113054275513, "learning_rate": 1.7243833463281722e-05, "loss": 0.3485, "step": 10945 }, { "epoch": 0.24223073491403943, "grad_norm": 3.5538229942321777, "learning_rate": 1.7241437435118757e-05, "loss": 0.43, "step": 10950 }, { "epoch": 0.24234134255555267, "grad_norm": 1.6067100763320923, "learning_rate": 1.7239040532554478e-05, "loss": 0.3806, "step": 10955 }, { "epoch": 0.24245195019706595, "grad_norm": 1.8538687229156494, "learning_rate": 1.7236642755878316e-05, "loss": 0.339, "step": 10960 }, { "epoch": 0.24256255783857922, "grad_norm": 1.2677949666976929, "learning_rate": 1.7234244105379798e-05, "loss": 0.3687, "step": 10965 }, { "epoch": 0.24267316548009246, "grad_norm": 0.9942331314086914, "learning_rate": 1.723184458134856e-05, "loss": 0.3945, "step": 10970 }, { "epoch": 0.24278377312160573, "grad_norm": 1.05833899974823, "learning_rate": 1.7229444184074352e-05, "loss": 0.2868, "step": 10975 }, { "epoch": 0.24289438076311898, "grad_norm": 1.295449137687683, "learning_rate": 1.722704291384701e-05, "loss": 0.3034, "step": 10980 }, { "epoch": 0.24300498840463225, "grad_norm": 1.5788346529006958, "learning_rate": 1.722464077095649e-05, "loss": 0.3664, "step": 10985 }, { "epoch": 0.24311559604614552, "grad_norm": 1.1620877981185913, "learning_rate": 1.722223775569285e-05, "loss": 0.3256, "step": 10990 }, { "epoch": 0.24322620368765877, "grad_norm": 1.7034953832626343, "learning_rate": 1.721983386834626e-05, "loss": 0.3247, "step": 10995 }, { "epoch": 0.24333681132917204, "grad_norm": 1.3970781564712524, "learning_rate": 1.721742910920698e-05, "loss": 0.2364, "step": 11000 }, { "epoch": 0.24344741897068528, "grad_norm": 1.9838674068450928, "learning_rate": 1.7215023478565385e-05, "loss": 0.5457, "step": 11005 }, { "epoch": 0.24355802661219855, "grad_norm": 1.7516957521438599, "learning_rate": 1.721261697671196e-05, "loss": 0.2268, "step": 11010 }, { "epoch": 0.2436686342537118, "grad_norm": 0.9821227788925171, "learning_rate": 1.7210209603937285e-05, "loss": 0.3304, "step": 11015 }, { "epoch": 0.24377924189522507, "grad_norm": 1.7167576551437378, "learning_rate": 1.720780136053205e-05, "loss": 0.3775, "step": 11020 }, { "epoch": 0.24388984953673834, "grad_norm": 1.887250304222107, "learning_rate": 1.7205392246787048e-05, "loss": 0.3512, "step": 11025 }, { "epoch": 0.24400045717825158, "grad_norm": 1.0533512830734253, "learning_rate": 1.720298226299318e-05, "loss": 0.2061, "step": 11030 }, { "epoch": 0.24411106481976486, "grad_norm": 1.2844773530960083, "learning_rate": 1.7200571409441452e-05, "loss": 0.3761, "step": 11035 }, { "epoch": 0.2442216724612781, "grad_norm": 0.8319059610366821, "learning_rate": 1.7198159686422973e-05, "loss": 0.3648, "step": 11040 }, { "epoch": 0.24433228010279137, "grad_norm": 1.5588091611862183, "learning_rate": 1.7195747094228957e-05, "loss": 0.454, "step": 11045 }, { "epoch": 0.24444288774430462, "grad_norm": 1.4446035623550415, "learning_rate": 1.719333363315073e-05, "loss": 0.3618, "step": 11050 }, { "epoch": 0.2445534953858179, "grad_norm": 1.292542576789856, "learning_rate": 1.7190919303479705e-05, "loss": 0.4517, "step": 11055 }, { "epoch": 0.24466410302733116, "grad_norm": 1.369038701057434, "learning_rate": 1.718850410550742e-05, "loss": 0.3293, "step": 11060 }, { "epoch": 0.2447747106688444, "grad_norm": 1.3755738735198975, "learning_rate": 1.7186088039525507e-05, "loss": 0.439, "step": 11065 }, { "epoch": 0.24488531831035767, "grad_norm": 0.8814907670021057, "learning_rate": 1.7183671105825705e-05, "loss": 0.3511, "step": 11070 }, { "epoch": 0.24499592595187092, "grad_norm": 1.2023955583572388, "learning_rate": 1.718125330469986e-05, "loss": 0.3211, "step": 11075 }, { "epoch": 0.2451065335933842, "grad_norm": 1.8197723627090454, "learning_rate": 1.7178834636439918e-05, "loss": 0.4023, "step": 11080 }, { "epoch": 0.24521714123489743, "grad_norm": 1.0008724927902222, "learning_rate": 1.7176415101337936e-05, "loss": 0.4875, "step": 11085 }, { "epoch": 0.2453277488764107, "grad_norm": 1.805459976196289, "learning_rate": 1.7173994699686066e-05, "loss": 0.4406, "step": 11090 }, { "epoch": 0.24543835651792398, "grad_norm": 1.6855608224868774, "learning_rate": 1.717157343177658e-05, "loss": 0.3199, "step": 11095 }, { "epoch": 0.24554896415943722, "grad_norm": 1.4675109386444092, "learning_rate": 1.7169151297901834e-05, "loss": 0.3095, "step": 11100 }, { "epoch": 0.2456595718009505, "grad_norm": 0.8311145901679993, "learning_rate": 1.7166728298354307e-05, "loss": 0.3371, "step": 11105 }, { "epoch": 0.24577017944246374, "grad_norm": 1.3677676916122437, "learning_rate": 1.716430443342657e-05, "loss": 0.3586, "step": 11110 }, { "epoch": 0.245880787083977, "grad_norm": 1.548305869102478, "learning_rate": 1.7161879703411316e-05, "loss": 0.3742, "step": 11115 }, { "epoch": 0.24599139472549028, "grad_norm": 1.2776329517364502, "learning_rate": 1.7159454108601318e-05, "loss": 0.5315, "step": 11120 }, { "epoch": 0.24610200236700353, "grad_norm": 1.4835766553878784, "learning_rate": 1.7157027649289476e-05, "loss": 0.4557, "step": 11125 }, { "epoch": 0.2462126100085168, "grad_norm": 1.317830204963684, "learning_rate": 1.7154600325768773e-05, "loss": 0.2918, "step": 11130 }, { "epoch": 0.24632321765003004, "grad_norm": 1.7765281200408936, "learning_rate": 1.7152172138332313e-05, "loss": 0.3403, "step": 11135 }, { "epoch": 0.2464338252915433, "grad_norm": 1.7287839651107788, "learning_rate": 1.71497430872733e-05, "loss": 0.4797, "step": 11140 }, { "epoch": 0.24654443293305656, "grad_norm": 1.2840566635131836, "learning_rate": 1.7147313172885042e-05, "loss": 0.5116, "step": 11145 }, { "epoch": 0.24665504057456983, "grad_norm": 0.8728380799293518, "learning_rate": 1.7144882395460943e-05, "loss": 0.4053, "step": 11150 }, { "epoch": 0.2467656482160831, "grad_norm": 1.7964955568313599, "learning_rate": 1.714245075529453e-05, "loss": 0.3395, "step": 11155 }, { "epoch": 0.24687625585759634, "grad_norm": 1.38570237159729, "learning_rate": 1.7140018252679416e-05, "loss": 0.2938, "step": 11160 }, { "epoch": 0.24698686349910962, "grad_norm": 1.2695504426956177, "learning_rate": 1.7137584887909327e-05, "loss": 0.3577, "step": 11165 }, { "epoch": 0.24709747114062286, "grad_norm": 1.4244251251220703, "learning_rate": 1.7135150661278085e-05, "loss": 0.3221, "step": 11170 }, { "epoch": 0.24720807878213613, "grad_norm": 1.7266799211502075, "learning_rate": 1.713271557307963e-05, "loss": 0.4857, "step": 11175 }, { "epoch": 0.24731868642364938, "grad_norm": 2.570462942123413, "learning_rate": 1.7130279623607994e-05, "loss": 0.4005, "step": 11180 }, { "epoch": 0.24742929406516265, "grad_norm": 0.9093016386032104, "learning_rate": 1.712784281315732e-05, "loss": 0.3096, "step": 11185 }, { "epoch": 0.24753990170667592, "grad_norm": 1.2296671867370605, "learning_rate": 1.712540514202185e-05, "loss": 0.4409, "step": 11190 }, { "epoch": 0.24765050934818916, "grad_norm": 2.290253162384033, "learning_rate": 1.712296661049593e-05, "loss": 0.387, "step": 11195 }, { "epoch": 0.24776111698970243, "grad_norm": 1.8475056886672974, "learning_rate": 1.7120527218874015e-05, "loss": 0.3034, "step": 11200 }, { "epoch": 0.24787172463121568, "grad_norm": 1.7779392004013062, "learning_rate": 1.711808696745066e-05, "loss": 0.4681, "step": 11205 }, { "epoch": 0.24798233227272895, "grad_norm": 1.1912637948989868, "learning_rate": 1.711564585652053e-05, "loss": 0.4437, "step": 11210 }, { "epoch": 0.24809293991424222, "grad_norm": 1.4303890466690063, "learning_rate": 1.711320388637838e-05, "loss": 0.3805, "step": 11215 }, { "epoch": 0.24820354755575547, "grad_norm": 1.1162227392196655, "learning_rate": 1.7110761057319078e-05, "loss": 0.5055, "step": 11220 }, { "epoch": 0.24831415519726874, "grad_norm": 0.827201247215271, "learning_rate": 1.7108317369637596e-05, "loss": 0.2161, "step": 11225 }, { "epoch": 0.24842476283878198, "grad_norm": 1.5203938484191895, "learning_rate": 1.7105872823629012e-05, "loss": 0.3428, "step": 11230 }, { "epoch": 0.24853537048029525, "grad_norm": 1.510694146156311, "learning_rate": 1.71034274195885e-05, "loss": 0.3639, "step": 11235 }, { "epoch": 0.2486459781218085, "grad_norm": 2.3154776096343994, "learning_rate": 1.7100981157811344e-05, "loss": 0.4393, "step": 11240 }, { "epoch": 0.24875658576332177, "grad_norm": 1.329904556274414, "learning_rate": 1.709853403859293e-05, "loss": 0.3111, "step": 11245 }, { "epoch": 0.24886719340483504, "grad_norm": 1.218939185142517, "learning_rate": 1.7096086062228745e-05, "loss": 0.511, "step": 11250 }, { "epoch": 0.24897780104634828, "grad_norm": 1.232398271560669, "learning_rate": 1.7093637229014385e-05, "loss": 0.3796, "step": 11255 }, { "epoch": 0.24908840868786156, "grad_norm": 1.4231619834899902, "learning_rate": 1.709118753924554e-05, "loss": 0.4125, "step": 11260 }, { "epoch": 0.2491990163293748, "grad_norm": 1.0766828060150146, "learning_rate": 1.7088736993218013e-05, "loss": 0.5642, "step": 11265 }, { "epoch": 0.24930962397088807, "grad_norm": 1.6024603843688965, "learning_rate": 1.7086285591227705e-05, "loss": 0.3009, "step": 11270 }, { "epoch": 0.24942023161240132, "grad_norm": 1.304322600364685, "learning_rate": 1.7083833333570625e-05, "loss": 0.3346, "step": 11275 }, { "epoch": 0.2495308392539146, "grad_norm": 1.8554730415344238, "learning_rate": 1.7081380220542877e-05, "loss": 0.4013, "step": 11280 }, { "epoch": 0.24964144689542786, "grad_norm": 1.9512277841567993, "learning_rate": 1.7078926252440678e-05, "loss": 0.2219, "step": 11285 }, { "epoch": 0.2497520545369411, "grad_norm": 1.0852433443069458, "learning_rate": 1.7076471429560347e-05, "loss": 0.3564, "step": 11290 }, { "epoch": 0.24986266217845438, "grad_norm": 1.0596299171447754, "learning_rate": 1.7074015752198295e-05, "loss": 0.264, "step": 11295 }, { "epoch": 0.24997326981996762, "grad_norm": 1.4629780054092407, "learning_rate": 1.7071559220651048e-05, "loss": 0.4166, "step": 11300 }, { "epoch": 0.25008387746148086, "grad_norm": 1.4606596231460571, "learning_rate": 1.706910183521523e-05, "loss": 0.3259, "step": 11305 }, { "epoch": 0.25019448510299414, "grad_norm": 0.9285896420478821, "learning_rate": 1.7066643596187577e-05, "loss": 0.2824, "step": 11310 }, { "epoch": 0.2503050927445074, "grad_norm": 0.9059315919876099, "learning_rate": 1.7064184503864912e-05, "loss": 0.4348, "step": 11315 }, { "epoch": 0.2504157003860207, "grad_norm": 1.447719931602478, "learning_rate": 1.706172455854417e-05, "loss": 0.3991, "step": 11320 }, { "epoch": 0.25052630802753395, "grad_norm": 1.1007544994354248, "learning_rate": 1.7059263760522392e-05, "loss": 0.3125, "step": 11325 }, { "epoch": 0.25063691566904717, "grad_norm": 1.539446234703064, "learning_rate": 1.7056802110096717e-05, "loss": 0.4143, "step": 11330 }, { "epoch": 0.25074752331056044, "grad_norm": 1.2516026496887207, "learning_rate": 1.705433960756439e-05, "loss": 0.3955, "step": 11335 }, { "epoch": 0.2508581309520737, "grad_norm": 1.1439672708511353, "learning_rate": 1.705187625322276e-05, "loss": 0.2814, "step": 11340 }, { "epoch": 0.250968738593587, "grad_norm": 1.306843638420105, "learning_rate": 1.704941204736927e-05, "loss": 0.46, "step": 11345 }, { "epoch": 0.25107934623510025, "grad_norm": 1.084540843963623, "learning_rate": 1.7046946990301472e-05, "loss": 0.4288, "step": 11350 }, { "epoch": 0.25118995387661347, "grad_norm": 1.4647408723831177, "learning_rate": 1.7044481082317027e-05, "loss": 0.4047, "step": 11355 }, { "epoch": 0.25130056151812674, "grad_norm": 1.5014525651931763, "learning_rate": 1.7042014323713687e-05, "loss": 0.3282, "step": 11360 }, { "epoch": 0.25141116915964, "grad_norm": 1.6245909929275513, "learning_rate": 1.7039546714789316e-05, "loss": 0.3308, "step": 11365 }, { "epoch": 0.2515217768011533, "grad_norm": 1.3358776569366455, "learning_rate": 1.7037078255841873e-05, "loss": 0.2485, "step": 11370 }, { "epoch": 0.2516323844426665, "grad_norm": 0.9836465120315552, "learning_rate": 1.703460894716943e-05, "loss": 0.3162, "step": 11375 }, { "epoch": 0.2517429920841798, "grad_norm": 1.5199311971664429, "learning_rate": 1.703213878907015e-05, "loss": 0.343, "step": 11380 }, { "epoch": 0.25185359972569304, "grad_norm": 1.4969768524169922, "learning_rate": 1.702966778184231e-05, "loss": 0.3011, "step": 11385 }, { "epoch": 0.2519642073672063, "grad_norm": 1.266241192817688, "learning_rate": 1.7027195925784275e-05, "loss": 0.4903, "step": 11390 }, { "epoch": 0.2520748150087196, "grad_norm": 0.898705244064331, "learning_rate": 1.7024723221194522e-05, "loss": 0.4121, "step": 11395 }, { "epoch": 0.2521854226502328, "grad_norm": 1.8158998489379883, "learning_rate": 1.7022249668371637e-05, "loss": 0.4173, "step": 11400 }, { "epoch": 0.2522960302917461, "grad_norm": 2.1298763751983643, "learning_rate": 1.701977526761429e-05, "loss": 0.3919, "step": 11405 }, { "epoch": 0.25240663793325935, "grad_norm": 0.9210541844367981, "learning_rate": 1.7017300019221275e-05, "loss": 0.4746, "step": 11410 }, { "epoch": 0.2525172455747726, "grad_norm": 1.05018949508667, "learning_rate": 1.701482392349147e-05, "loss": 0.3427, "step": 11415 }, { "epoch": 0.2526278532162859, "grad_norm": 1.3037700653076172, "learning_rate": 1.7012346980723863e-05, "loss": 0.4675, "step": 11420 }, { "epoch": 0.2527384608577991, "grad_norm": 1.0924386978149414, "learning_rate": 1.700986919121755e-05, "loss": 0.4395, "step": 11425 }, { "epoch": 0.2528490684993124, "grad_norm": 1.3864315748214722, "learning_rate": 1.7007390555271715e-05, "loss": 0.4098, "step": 11430 }, { "epoch": 0.25295967614082565, "grad_norm": 1.679884433746338, "learning_rate": 1.7004911073185663e-05, "loss": 0.4592, "step": 11435 }, { "epoch": 0.2530702837823389, "grad_norm": 1.221247673034668, "learning_rate": 1.700243074525878e-05, "loss": 0.4365, "step": 11440 }, { "epoch": 0.2531808914238522, "grad_norm": 1.736573338508606, "learning_rate": 1.6999949571790567e-05, "loss": 0.4071, "step": 11445 }, { "epoch": 0.2532914990653654, "grad_norm": 1.3053803443908691, "learning_rate": 1.6997467553080633e-05, "loss": 0.4118, "step": 11450 }, { "epoch": 0.2534021067068787, "grad_norm": 0.8130152821540833, "learning_rate": 1.6994984689428676e-05, "loss": 0.324, "step": 11455 }, { "epoch": 0.25351271434839195, "grad_norm": 1.009224772453308, "learning_rate": 1.6992500981134496e-05, "loss": 0.4362, "step": 11460 }, { "epoch": 0.2536233219899052, "grad_norm": 1.1340440511703491, "learning_rate": 1.6990016428498005e-05, "loss": 0.3134, "step": 11465 }, { "epoch": 0.25373392963141844, "grad_norm": 0.8196548223495483, "learning_rate": 1.698753103181921e-05, "loss": 0.3534, "step": 11470 }, { "epoch": 0.2538445372729317, "grad_norm": 2.0437068939208984, "learning_rate": 1.698504479139823e-05, "loss": 0.4786, "step": 11475 }, { "epoch": 0.253955144914445, "grad_norm": 1.7337517738342285, "learning_rate": 1.6982557707535262e-05, "loss": 0.2722, "step": 11480 }, { "epoch": 0.25406575255595826, "grad_norm": 1.3911895751953125, "learning_rate": 1.6980069780530634e-05, "loss": 0.2468, "step": 11485 }, { "epoch": 0.25417636019747153, "grad_norm": 1.0660933256149292, "learning_rate": 1.6977581010684758e-05, "loss": 0.4229, "step": 11490 }, { "epoch": 0.25428696783898475, "grad_norm": 0.9131743311882019, "learning_rate": 1.6975091398298153e-05, "loss": 0.3606, "step": 11495 }, { "epoch": 0.254397575480498, "grad_norm": 0.9635950922966003, "learning_rate": 1.6972600943671435e-05, "loss": 0.4492, "step": 11500 }, { "epoch": 0.2545081831220113, "grad_norm": 0.8462517261505127, "learning_rate": 1.697010964710533e-05, "loss": 0.4306, "step": 11505 }, { "epoch": 0.25461879076352456, "grad_norm": 1.061809778213501, "learning_rate": 1.696761750890066e-05, "loss": 0.4726, "step": 11510 }, { "epoch": 0.25472939840503783, "grad_norm": 1.3401072025299072, "learning_rate": 1.6965124529358353e-05, "loss": 0.4031, "step": 11515 }, { "epoch": 0.25484000604655105, "grad_norm": 1.584594488143921, "learning_rate": 1.6962630708779428e-05, "loss": 0.5767, "step": 11520 }, { "epoch": 0.2549506136880643, "grad_norm": 1.1651545763015747, "learning_rate": 1.696013604746502e-05, "loss": 0.3348, "step": 11525 }, { "epoch": 0.2550612213295776, "grad_norm": 1.4351016283035278, "learning_rate": 1.6957640545716356e-05, "loss": 0.3776, "step": 11530 }, { "epoch": 0.25517182897109086, "grad_norm": 0.8119806051254272, "learning_rate": 1.6955144203834762e-05, "loss": 0.2542, "step": 11535 }, { "epoch": 0.25528243661260414, "grad_norm": 1.5857524871826172, "learning_rate": 1.6952647022121685e-05, "loss": 0.449, "step": 11540 }, { "epoch": 0.25539304425411735, "grad_norm": 1.6314853429794312, "learning_rate": 1.6950149000878644e-05, "loss": 0.4105, "step": 11545 }, { "epoch": 0.2555036518956306, "grad_norm": 1.3435419797897339, "learning_rate": 1.694765014040728e-05, "loss": 0.2454, "step": 11550 }, { "epoch": 0.2556142595371439, "grad_norm": 1.5501044988632202, "learning_rate": 1.694515044100933e-05, "loss": 0.3507, "step": 11555 }, { "epoch": 0.25572486717865717, "grad_norm": 1.697275996208191, "learning_rate": 1.694264990298663e-05, "loss": 0.4311, "step": 11560 }, { "epoch": 0.2558354748201704, "grad_norm": 1.3110156059265137, "learning_rate": 1.6940148526641117e-05, "loss": 0.2958, "step": 11565 }, { "epoch": 0.25594608246168365, "grad_norm": 1.415984869003296, "learning_rate": 1.693764631227484e-05, "loss": 0.4222, "step": 11570 }, { "epoch": 0.2560566901031969, "grad_norm": 1.4777064323425293, "learning_rate": 1.693514326018994e-05, "loss": 0.4599, "step": 11575 }, { "epoch": 0.2561672977447102, "grad_norm": 1.4379198551177979, "learning_rate": 1.6932639370688643e-05, "loss": 0.3473, "step": 11580 }, { "epoch": 0.25627790538622347, "grad_norm": 1.20792555809021, "learning_rate": 1.693013464407331e-05, "loss": 0.5114, "step": 11585 }, { "epoch": 0.2563885130277367, "grad_norm": 0.8619514107704163, "learning_rate": 1.692762908064639e-05, "loss": 0.5454, "step": 11590 }, { "epoch": 0.25649912066924996, "grad_norm": 1.1222227811813354, "learning_rate": 1.692512268071041e-05, "loss": 0.3171, "step": 11595 }, { "epoch": 0.25660972831076323, "grad_norm": 1.198190450668335, "learning_rate": 1.6922615444568028e-05, "loss": 0.3827, "step": 11600 }, { "epoch": 0.2567203359522765, "grad_norm": 1.152779459953308, "learning_rate": 1.6920107372521997e-05, "loss": 0.2854, "step": 11605 }, { "epoch": 0.2568309435937898, "grad_norm": 1.3581041097640991, "learning_rate": 1.6917598464875154e-05, "loss": 0.4037, "step": 11610 }, { "epoch": 0.256941551235303, "grad_norm": 1.3139346837997437, "learning_rate": 1.6915088721930457e-05, "loss": 0.3329, "step": 11615 }, { "epoch": 0.25705215887681626, "grad_norm": 1.4618138074874878, "learning_rate": 1.6912578143990954e-05, "loss": 0.2525, "step": 11620 }, { "epoch": 0.25716276651832953, "grad_norm": 1.0407875776290894, "learning_rate": 1.6910066731359797e-05, "loss": 0.4407, "step": 11625 }, { "epoch": 0.2572733741598428, "grad_norm": 1.6411532163619995, "learning_rate": 1.690755448434024e-05, "loss": 0.3809, "step": 11630 }, { "epoch": 0.2573839818013561, "grad_norm": 0.6256580352783203, "learning_rate": 1.6905041403235632e-05, "loss": 0.4328, "step": 11635 }, { "epoch": 0.2574945894428693, "grad_norm": 1.7270426750183105, "learning_rate": 1.6902527488349433e-05, "loss": 0.2951, "step": 11640 }, { "epoch": 0.25760519708438256, "grad_norm": 1.4384145736694336, "learning_rate": 1.6900012739985188e-05, "loss": 0.4821, "step": 11645 }, { "epoch": 0.25771580472589584, "grad_norm": 0.8770902156829834, "learning_rate": 1.689749715844656e-05, "loss": 0.4148, "step": 11650 }, { "epoch": 0.2578264123674091, "grad_norm": 1.1765341758728027, "learning_rate": 1.6894980744037304e-05, "loss": 0.4478, "step": 11655 }, { "epoch": 0.2579370200089223, "grad_norm": 1.529283881187439, "learning_rate": 1.689246349706127e-05, "loss": 0.2998, "step": 11660 }, { "epoch": 0.2580476276504356, "grad_norm": 1.6836549043655396, "learning_rate": 1.6889945417822426e-05, "loss": 0.3489, "step": 11665 }, { "epoch": 0.25815823529194887, "grad_norm": 1.369488000869751, "learning_rate": 1.6887426506624817e-05, "loss": 0.5131, "step": 11670 }, { "epoch": 0.25826884293346214, "grad_norm": 1.6285264492034912, "learning_rate": 1.688490676377261e-05, "loss": 0.4166, "step": 11675 }, { "epoch": 0.2583794505749754, "grad_norm": 1.5196986198425293, "learning_rate": 1.6882386189570058e-05, "loss": 0.4346, "step": 11680 }, { "epoch": 0.2584900582164886, "grad_norm": 1.5314182043075562, "learning_rate": 1.687986478432152e-05, "loss": 0.337, "step": 11685 }, { "epoch": 0.2586006658580019, "grad_norm": 2.344677209854126, "learning_rate": 1.6877342548331457e-05, "loss": 0.3148, "step": 11690 }, { "epoch": 0.25871127349951517, "grad_norm": 0.8219030499458313, "learning_rate": 1.687481948190443e-05, "loss": 0.3471, "step": 11695 }, { "epoch": 0.25882188114102844, "grad_norm": 1.832739233970642, "learning_rate": 1.687229558534509e-05, "loss": 0.4208, "step": 11700 }, { "epoch": 0.2589324887825417, "grad_norm": 2.021754264831543, "learning_rate": 1.6869770858958206e-05, "loss": 0.3992, "step": 11705 }, { "epoch": 0.25904309642405493, "grad_norm": 1.385135293006897, "learning_rate": 1.6867245303048632e-05, "loss": 0.3814, "step": 11710 }, { "epoch": 0.2591537040655682, "grad_norm": 1.144547939300537, "learning_rate": 1.6864718917921332e-05, "loss": 0.4087, "step": 11715 }, { "epoch": 0.2592643117070815, "grad_norm": 1.190960168838501, "learning_rate": 1.6862191703881365e-05, "loss": 0.4873, "step": 11720 }, { "epoch": 0.25937491934859475, "grad_norm": 1.292663812637329, "learning_rate": 1.685966366123389e-05, "loss": 0.4882, "step": 11725 }, { "epoch": 0.259485526990108, "grad_norm": 1.4261518716812134, "learning_rate": 1.6857134790284167e-05, "loss": 0.3198, "step": 11730 }, { "epoch": 0.25959613463162123, "grad_norm": 0.9050889015197754, "learning_rate": 1.685460509133756e-05, "loss": 0.3862, "step": 11735 }, { "epoch": 0.2597067422731345, "grad_norm": 1.598108172416687, "learning_rate": 1.685207456469953e-05, "loss": 0.4741, "step": 11740 }, { "epoch": 0.2598173499146478, "grad_norm": 1.9209157228469849, "learning_rate": 1.684954321067563e-05, "loss": 0.3317, "step": 11745 }, { "epoch": 0.25992795755616105, "grad_norm": 1.1599093675613403, "learning_rate": 1.6847011029571523e-05, "loss": 0.3355, "step": 11750 }, { "epoch": 0.26003856519767427, "grad_norm": 1.6145685911178589, "learning_rate": 1.6844478021692972e-05, "loss": 0.3251, "step": 11755 }, { "epoch": 0.26014917283918754, "grad_norm": 1.7642617225646973, "learning_rate": 1.684194418734584e-05, "loss": 0.3228, "step": 11760 }, { "epoch": 0.2602597804807008, "grad_norm": 1.1125742197036743, "learning_rate": 1.6839409526836073e-05, "loss": 0.2845, "step": 11765 }, { "epoch": 0.2603703881222141, "grad_norm": 1.1513278484344482, "learning_rate": 1.683687404046975e-05, "loss": 0.3108, "step": 11770 }, { "epoch": 0.26048099576372735, "grad_norm": 1.8617937564849854, "learning_rate": 1.6834337728553015e-05, "loss": 0.4411, "step": 11775 }, { "epoch": 0.26059160340524057, "grad_norm": 1.1401565074920654, "learning_rate": 1.683180059139213e-05, "loss": 0.5339, "step": 11780 }, { "epoch": 0.26070221104675384, "grad_norm": 1.142081618309021, "learning_rate": 1.6829262629293456e-05, "loss": 0.3166, "step": 11785 }, { "epoch": 0.2608128186882671, "grad_norm": 1.6381956338882446, "learning_rate": 1.682672384256345e-05, "loss": 0.3917, "step": 11790 }, { "epoch": 0.2609234263297804, "grad_norm": 1.0911864042282104, "learning_rate": 1.6824184231508677e-05, "loss": 0.2799, "step": 11795 }, { "epoch": 0.26103403397129366, "grad_norm": 1.2511638402938843, "learning_rate": 1.682164379643578e-05, "loss": 0.4997, "step": 11800 }, { "epoch": 0.26114464161280687, "grad_norm": 1.3238939046859741, "learning_rate": 1.6819102537651522e-05, "loss": 0.4109, "step": 11805 }, { "epoch": 0.26125524925432014, "grad_norm": 2.0463364124298096, "learning_rate": 1.6816560455462767e-05, "loss": 0.5097, "step": 11810 }, { "epoch": 0.2613658568958334, "grad_norm": 1.4057667255401611, "learning_rate": 1.681401755017646e-05, "loss": 0.5826, "step": 11815 }, { "epoch": 0.2614764645373467, "grad_norm": 1.1194602251052856, "learning_rate": 1.681147382209966e-05, "loss": 0.3717, "step": 11820 }, { "epoch": 0.2615870721788599, "grad_norm": 1.2146425247192383, "learning_rate": 1.680892927153952e-05, "loss": 0.1948, "step": 11825 }, { "epoch": 0.2616976798203732, "grad_norm": 1.0985349416732788, "learning_rate": 1.68063838988033e-05, "loss": 0.4456, "step": 11830 }, { "epoch": 0.26180828746188645, "grad_norm": 1.2826759815216064, "learning_rate": 1.680383770419834e-05, "loss": 0.5035, "step": 11835 }, { "epoch": 0.2619188951033997, "grad_norm": 1.4836276769638062, "learning_rate": 1.6801290688032107e-05, "loss": 0.5973, "step": 11840 }, { "epoch": 0.262029502744913, "grad_norm": 1.389887809753418, "learning_rate": 1.6798742850612143e-05, "loss": 0.3499, "step": 11845 }, { "epoch": 0.2621401103864262, "grad_norm": 1.1097431182861328, "learning_rate": 1.67961941922461e-05, "loss": 0.2708, "step": 11850 }, { "epoch": 0.2622507180279395, "grad_norm": 0.9594298601150513, "learning_rate": 1.6793644713241733e-05, "loss": 0.4086, "step": 11855 }, { "epoch": 0.26236132566945275, "grad_norm": 2.314152240753174, "learning_rate": 1.6791094413906884e-05, "loss": 0.4066, "step": 11860 }, { "epoch": 0.262471933310966, "grad_norm": 1.9741569757461548, "learning_rate": 1.67885432945495e-05, "loss": 0.3507, "step": 11865 }, { "epoch": 0.2625825409524793, "grad_norm": 3.944694757461548, "learning_rate": 1.6785991355477638e-05, "loss": 0.471, "step": 11870 }, { "epoch": 0.2626931485939925, "grad_norm": 1.1748924255371094, "learning_rate": 1.6783438596999433e-05, "loss": 0.3531, "step": 11875 }, { "epoch": 0.2628037562355058, "grad_norm": 0.9944360256195068, "learning_rate": 1.6780885019423136e-05, "loss": 0.3863, "step": 11880 }, { "epoch": 0.26291436387701905, "grad_norm": 1.7315950393676758, "learning_rate": 1.6778330623057087e-05, "loss": 0.4772, "step": 11885 }, { "epoch": 0.2630249715185323, "grad_norm": 2.2272396087646484, "learning_rate": 1.677577540820973e-05, "loss": 0.2844, "step": 11890 }, { "epoch": 0.2631355791600456, "grad_norm": 0.8301576375961304, "learning_rate": 1.6773219375189608e-05, "loss": 0.2148, "step": 11895 }, { "epoch": 0.2632461868015588, "grad_norm": 1.5960990190505981, "learning_rate": 1.677066252430536e-05, "loss": 0.5064, "step": 11900 }, { "epoch": 0.2633567944430721, "grad_norm": 1.4018492698669434, "learning_rate": 1.6768104855865723e-05, "loss": 0.3598, "step": 11905 }, { "epoch": 0.26346740208458536, "grad_norm": 1.3392106294631958, "learning_rate": 1.676554637017954e-05, "loss": 0.3376, "step": 11910 }, { "epoch": 0.2635780097260986, "grad_norm": 1.0014324188232422, "learning_rate": 1.676298706755574e-05, "loss": 0.3174, "step": 11915 }, { "epoch": 0.26368861736761184, "grad_norm": 0.8839554786682129, "learning_rate": 1.6760426948303364e-05, "loss": 0.3159, "step": 11920 }, { "epoch": 0.2637992250091251, "grad_norm": 1.2732068300247192, "learning_rate": 1.675786601273154e-05, "loss": 0.4554, "step": 11925 }, { "epoch": 0.2639098326506384, "grad_norm": 1.4059786796569824, "learning_rate": 1.6755304261149507e-05, "loss": 0.496, "step": 11930 }, { "epoch": 0.26402044029215166, "grad_norm": 1.6508219242095947, "learning_rate": 1.6752741693866594e-05, "loss": 0.5481, "step": 11935 }, { "epoch": 0.26413104793366493, "grad_norm": 1.627691626548767, "learning_rate": 1.6750178311192225e-05, "loss": 0.4976, "step": 11940 }, { "epoch": 0.26424165557517815, "grad_norm": 1.2530179023742676, "learning_rate": 1.674761411343593e-05, "loss": 0.5641, "step": 11945 }, { "epoch": 0.2643522632166914, "grad_norm": 1.7643061876296997, "learning_rate": 1.6745049100907337e-05, "loss": 0.3792, "step": 11950 }, { "epoch": 0.2644628708582047, "grad_norm": 2.7047531604766846, "learning_rate": 1.6742483273916175e-05, "loss": 0.4961, "step": 11955 }, { "epoch": 0.26457347849971796, "grad_norm": 1.4631116390228271, "learning_rate": 1.6739916632772256e-05, "loss": 0.3146, "step": 11960 }, { "epoch": 0.26468408614123123, "grad_norm": 1.4377130270004272, "learning_rate": 1.6737349177785508e-05, "loss": 0.3736, "step": 11965 }, { "epoch": 0.26479469378274445, "grad_norm": 1.0601989030838013, "learning_rate": 1.6734780909265952e-05, "loss": 0.4106, "step": 11970 }, { "epoch": 0.2649053014242577, "grad_norm": 1.1956523656845093, "learning_rate": 1.6732211827523697e-05, "loss": 0.2442, "step": 11975 }, { "epoch": 0.265015909065771, "grad_norm": 1.117456078529358, "learning_rate": 1.6729641932868968e-05, "loss": 0.3818, "step": 11980 }, { "epoch": 0.26512651670728427, "grad_norm": 1.7900655269622803, "learning_rate": 1.6727071225612076e-05, "loss": 0.2584, "step": 11985 }, { "epoch": 0.26523712434879754, "grad_norm": 1.2792799472808838, "learning_rate": 1.672449970606343e-05, "loss": 0.2706, "step": 11990 }, { "epoch": 0.26534773199031075, "grad_norm": 1.4232895374298096, "learning_rate": 1.672192737453354e-05, "loss": 0.3248, "step": 11995 }, { "epoch": 0.265458339631824, "grad_norm": 1.8757033348083496, "learning_rate": 1.671935423133302e-05, "loss": 0.331, "step": 12000 }, { "epoch": 0.2655689472733373, "grad_norm": 1.7579967975616455, "learning_rate": 1.6716780276772577e-05, "loss": 0.3485, "step": 12005 }, { "epoch": 0.26567955491485057, "grad_norm": 1.768161654472351, "learning_rate": 1.6714205511163005e-05, "loss": 0.375, "step": 12010 }, { "epoch": 0.2657901625563638, "grad_norm": 1.214436650276184, "learning_rate": 1.6711629934815217e-05, "loss": 0.5446, "step": 12015 }, { "epoch": 0.26590077019787706, "grad_norm": 1.4798400402069092, "learning_rate": 1.6709053548040207e-05, "loss": 0.3323, "step": 12020 }, { "epoch": 0.26601137783939033, "grad_norm": 1.4973541498184204, "learning_rate": 1.6706476351149076e-05, "loss": 0.4815, "step": 12025 }, { "epoch": 0.2661219854809036, "grad_norm": 1.6332165002822876, "learning_rate": 1.6703898344453015e-05, "loss": 0.3477, "step": 12030 }, { "epoch": 0.26623259312241687, "grad_norm": 0.9668537378311157, "learning_rate": 1.6701319528263323e-05, "loss": 0.2296, "step": 12035 }, { "epoch": 0.2663432007639301, "grad_norm": 2.258230686187744, "learning_rate": 1.669873990289139e-05, "loss": 0.6624, "step": 12040 }, { "epoch": 0.26645380840544336, "grad_norm": 0.948768675327301, "learning_rate": 1.6696159468648702e-05, "loss": 0.3407, "step": 12045 }, { "epoch": 0.26656441604695663, "grad_norm": 1.0825353860855103, "learning_rate": 1.6693578225846846e-05, "loss": 0.2972, "step": 12050 }, { "epoch": 0.2666750236884699, "grad_norm": 1.2149046659469604, "learning_rate": 1.6690996174797513e-05, "loss": 0.3634, "step": 12055 }, { "epoch": 0.2667856313299832, "grad_norm": 1.1391700506210327, "learning_rate": 1.6688413315812473e-05, "loss": 0.3474, "step": 12060 }, { "epoch": 0.2668962389714964, "grad_norm": 2.5341196060180664, "learning_rate": 1.6685829649203616e-05, "loss": 0.4391, "step": 12065 }, { "epoch": 0.26700684661300966, "grad_norm": 2.6344616413116455, "learning_rate": 1.6683245175282917e-05, "loss": 0.2804, "step": 12070 }, { "epoch": 0.26711745425452293, "grad_norm": 1.1701055765151978, "learning_rate": 1.6680659894362447e-05, "loss": 0.4264, "step": 12075 }, { "epoch": 0.2672280618960362, "grad_norm": 1.1928434371948242, "learning_rate": 1.6678073806754374e-05, "loss": 0.3483, "step": 12080 }, { "epoch": 0.2673386695375495, "grad_norm": 1.4106132984161377, "learning_rate": 1.6675486912770976e-05, "loss": 0.2995, "step": 12085 }, { "epoch": 0.2674492771790627, "grad_norm": 1.6948035955429077, "learning_rate": 1.6672899212724618e-05, "loss": 0.3471, "step": 12090 }, { "epoch": 0.26755988482057597, "grad_norm": 1.6883299350738525, "learning_rate": 1.6670310706927758e-05, "loss": 0.5366, "step": 12095 }, { "epoch": 0.26767049246208924, "grad_norm": 1.3877538442611694, "learning_rate": 1.6667721395692966e-05, "loss": 0.4322, "step": 12100 }, { "epoch": 0.2677811001036025, "grad_norm": 1.7686609029769897, "learning_rate": 1.6665131279332895e-05, "loss": 0.4616, "step": 12105 }, { "epoch": 0.2678917077451157, "grad_norm": 2.338963270187378, "learning_rate": 1.66625403581603e-05, "loss": 0.4103, "step": 12110 }, { "epoch": 0.268002315386629, "grad_norm": 2.542827844619751, "learning_rate": 1.6659948632488037e-05, "loss": 0.3722, "step": 12115 }, { "epoch": 0.26811292302814227, "grad_norm": 1.2529398202896118, "learning_rate": 1.6657356102629053e-05, "loss": 0.4632, "step": 12120 }, { "epoch": 0.26822353066965554, "grad_norm": 1.6353216171264648, "learning_rate": 1.6654762768896402e-05, "loss": 0.4184, "step": 12125 }, { "epoch": 0.2683341383111688, "grad_norm": 2.158984899520874, "learning_rate": 1.6652168631603218e-05, "loss": 0.3303, "step": 12130 }, { "epoch": 0.26844474595268203, "grad_norm": 1.2579485177993774, "learning_rate": 1.6649573691062747e-05, "loss": 0.3754, "step": 12135 }, { "epoch": 0.2685553535941953, "grad_norm": 1.3799835443496704, "learning_rate": 1.664697794758833e-05, "loss": 0.2898, "step": 12140 }, { "epoch": 0.2686659612357086, "grad_norm": 1.3711822032928467, "learning_rate": 1.6644381401493398e-05, "loss": 0.3972, "step": 12145 }, { "epoch": 0.26877656887722184, "grad_norm": 1.1085647344589233, "learning_rate": 1.6641784053091486e-05, "loss": 0.3841, "step": 12150 }, { "epoch": 0.2688871765187351, "grad_norm": 1.1342487335205078, "learning_rate": 1.6639185902696223e-05, "loss": 0.3272, "step": 12155 }, { "epoch": 0.26899778416024833, "grad_norm": 1.5694620609283447, "learning_rate": 1.663658695062133e-05, "loss": 0.404, "step": 12160 }, { "epoch": 0.2691083918017616, "grad_norm": 1.5467314720153809, "learning_rate": 1.663398719718064e-05, "loss": 0.3483, "step": 12165 }, { "epoch": 0.2692189994432749, "grad_norm": 1.1240694522857666, "learning_rate": 1.663138664268806e-05, "loss": 0.428, "step": 12170 }, { "epoch": 0.26932960708478815, "grad_norm": 1.294618010520935, "learning_rate": 1.6628785287457617e-05, "loss": 0.4067, "step": 12175 }, { "epoch": 0.26944021472630136, "grad_norm": 1.1171783208847046, "learning_rate": 1.6626183131803416e-05, "loss": 0.3989, "step": 12180 }, { "epoch": 0.26955082236781464, "grad_norm": 1.5213322639465332, "learning_rate": 1.6623580176039667e-05, "loss": 0.3738, "step": 12185 }, { "epoch": 0.2696614300093279, "grad_norm": 1.2031941413879395, "learning_rate": 1.662097642048068e-05, "loss": 0.2688, "step": 12190 }, { "epoch": 0.2697720376508412, "grad_norm": 2.0562520027160645, "learning_rate": 1.6618371865440858e-05, "loss": 0.2859, "step": 12195 }, { "epoch": 0.26988264529235445, "grad_norm": 0.5716249942779541, "learning_rate": 1.6615766511234695e-05, "loss": 0.2744, "step": 12200 }, { "epoch": 0.26999325293386767, "grad_norm": 1.771910309791565, "learning_rate": 1.6613160358176795e-05, "loss": 0.5011, "step": 12205 }, { "epoch": 0.27010386057538094, "grad_norm": 2.9156417846679688, "learning_rate": 1.6610553406581843e-05, "loss": 0.3072, "step": 12210 }, { "epoch": 0.2702144682168942, "grad_norm": 0.9483566284179688, "learning_rate": 1.6607945656764628e-05, "loss": 0.2961, "step": 12215 }, { "epoch": 0.2703250758584075, "grad_norm": 1.028038501739502, "learning_rate": 1.6605337109040038e-05, "loss": 0.4413, "step": 12220 }, { "epoch": 0.27043568349992075, "grad_norm": 1.7818984985351562, "learning_rate": 1.6602727763723056e-05, "loss": 0.3904, "step": 12225 }, { "epoch": 0.27054629114143397, "grad_norm": 1.6165581941604614, "learning_rate": 1.6600117621128752e-05, "loss": 0.367, "step": 12230 }, { "epoch": 0.27065689878294724, "grad_norm": 1.9352715015411377, "learning_rate": 1.659750668157231e-05, "loss": 0.2816, "step": 12235 }, { "epoch": 0.2707675064244605, "grad_norm": 1.1408765316009521, "learning_rate": 1.659489494536899e-05, "loss": 0.3962, "step": 12240 }, { "epoch": 0.2708781140659738, "grad_norm": 1.3353235721588135, "learning_rate": 1.6592282412834163e-05, "loss": 0.3529, "step": 12245 }, { "epoch": 0.27098872170748706, "grad_norm": 0.7789366841316223, "learning_rate": 1.658966908428329e-05, "loss": 0.3746, "step": 12250 }, { "epoch": 0.2710993293490003, "grad_norm": 0.9509039521217346, "learning_rate": 1.6587054960031937e-05, "loss": 0.2813, "step": 12255 }, { "epoch": 0.27120993699051354, "grad_norm": 1.2597452402114868, "learning_rate": 1.658444004039575e-05, "loss": 0.3314, "step": 12260 }, { "epoch": 0.2713205446320268, "grad_norm": 1.8228235244750977, "learning_rate": 1.6581824325690484e-05, "loss": 0.4611, "step": 12265 }, { "epoch": 0.2714311522735401, "grad_norm": 1.9890574216842651, "learning_rate": 1.657920781623198e-05, "loss": 0.3732, "step": 12270 }, { "epoch": 0.2715417599150533, "grad_norm": 1.3969380855560303, "learning_rate": 1.6576590512336192e-05, "loss": 0.2894, "step": 12275 }, { "epoch": 0.2716523675565666, "grad_norm": 1.7569302320480347, "learning_rate": 1.6573972414319147e-05, "loss": 0.3848, "step": 12280 }, { "epoch": 0.27176297519807985, "grad_norm": 1.2571337223052979, "learning_rate": 1.6571353522496983e-05, "loss": 0.3317, "step": 12285 }, { "epoch": 0.2718735828395931, "grad_norm": 2.198032855987549, "learning_rate": 1.6568733837185935e-05, "loss": 0.4348, "step": 12290 }, { "epoch": 0.2719841904811064, "grad_norm": 1.3051369190216064, "learning_rate": 1.656611335870232e-05, "loss": 0.3451, "step": 12295 }, { "epoch": 0.2720947981226196, "grad_norm": 1.2890692949295044, "learning_rate": 1.6563492087362575e-05, "loss": 0.267, "step": 12300 }, { "epoch": 0.2722054057641329, "grad_norm": 1.188299298286438, "learning_rate": 1.6560870023483203e-05, "loss": 0.3717, "step": 12305 }, { "epoch": 0.27231601340564615, "grad_norm": 1.4613622426986694, "learning_rate": 1.6558247167380824e-05, "loss": 0.3679, "step": 12310 }, { "epoch": 0.2724266210471594, "grad_norm": 1.5664833784103394, "learning_rate": 1.6555623519372144e-05, "loss": 0.3422, "step": 12315 }, { "epoch": 0.2725372286886727, "grad_norm": 1.9228041172027588, "learning_rate": 1.655299907977397e-05, "loss": 0.3732, "step": 12320 }, { "epoch": 0.2726478363301859, "grad_norm": 1.1902379989624023, "learning_rate": 1.6550373848903202e-05, "loss": 0.2965, "step": 12325 }, { "epoch": 0.2727584439716992, "grad_norm": 1.4620740413665771, "learning_rate": 1.6547747827076833e-05, "loss": 0.3246, "step": 12330 }, { "epoch": 0.27286905161321245, "grad_norm": 1.3276832103729248, "learning_rate": 1.6545121014611965e-05, "loss": 0.4557, "step": 12335 }, { "epoch": 0.2729796592547257, "grad_norm": 1.4375702142715454, "learning_rate": 1.654249341182577e-05, "loss": 0.5359, "step": 12340 }, { "epoch": 0.273090266896239, "grad_norm": 0.9950371384620667, "learning_rate": 1.6539865019035536e-05, "loss": 0.4493, "step": 12345 }, { "epoch": 0.2732008745377522, "grad_norm": 0.59972083568573, "learning_rate": 1.6537235836558647e-05, "loss": 0.3215, "step": 12350 }, { "epoch": 0.2733114821792655, "grad_norm": 1.4359076023101807, "learning_rate": 1.653460586471257e-05, "loss": 0.2647, "step": 12355 }, { "epoch": 0.27342208982077876, "grad_norm": 1.144339680671692, "learning_rate": 1.653197510381487e-05, "loss": 0.3401, "step": 12360 }, { "epoch": 0.27353269746229203, "grad_norm": 1.3544466495513916, "learning_rate": 1.6529343554183214e-05, "loss": 0.476, "step": 12365 }, { "epoch": 0.27364330510380525, "grad_norm": 2.008676767349243, "learning_rate": 1.6526711216135367e-05, "loss": 0.3874, "step": 12370 }, { "epoch": 0.2737539127453185, "grad_norm": 0.7199488282203674, "learning_rate": 1.652407808998917e-05, "loss": 0.3583, "step": 12375 }, { "epoch": 0.2738645203868318, "grad_norm": 1.2295066118240356, "learning_rate": 1.6521444176062586e-05, "loss": 0.4838, "step": 12380 }, { "epoch": 0.27397512802834506, "grad_norm": 0.9106724262237549, "learning_rate": 1.6518809474673645e-05, "loss": 0.4125, "step": 12385 }, { "epoch": 0.27408573566985833, "grad_norm": 1.1859831809997559, "learning_rate": 1.6516173986140496e-05, "loss": 0.5127, "step": 12390 }, { "epoch": 0.27419634331137155, "grad_norm": 1.2827379703521729, "learning_rate": 1.6513537710781377e-05, "loss": 0.3019, "step": 12395 }, { "epoch": 0.2743069509528848, "grad_norm": 1.4374407529830933, "learning_rate": 1.651090064891461e-05, "loss": 0.4331, "step": 12400 }, { "epoch": 0.2744175585943981, "grad_norm": 1.4300726652145386, "learning_rate": 1.6508262800858615e-05, "loss": 0.4696, "step": 12405 }, { "epoch": 0.27452816623591136, "grad_norm": 1.1230084896087646, "learning_rate": 1.650562416693192e-05, "loss": 0.4698, "step": 12410 }, { "epoch": 0.27463877387742464, "grad_norm": 1.45378839969635, "learning_rate": 1.6502984747453142e-05, "loss": 0.379, "step": 12415 }, { "epoch": 0.27474938151893785, "grad_norm": 0.9704524278640747, "learning_rate": 1.650034454274098e-05, "loss": 0.4332, "step": 12420 }, { "epoch": 0.2748599891604511, "grad_norm": 1.4975131750106812, "learning_rate": 1.6497703553114244e-05, "loss": 0.3738, "step": 12425 }, { "epoch": 0.2749705968019644, "grad_norm": 1.623072624206543, "learning_rate": 1.649506177889183e-05, "loss": 0.2928, "step": 12430 }, { "epoch": 0.27508120444347767, "grad_norm": 0.7265191674232483, "learning_rate": 1.649241922039274e-05, "loss": 0.3364, "step": 12435 }, { "epoch": 0.27519181208499094, "grad_norm": 1.408460021018982, "learning_rate": 1.6489775877936044e-05, "loss": 0.1874, "step": 12440 }, { "epoch": 0.27530241972650416, "grad_norm": 1.379372477531433, "learning_rate": 1.648713175184094e-05, "loss": 0.4783, "step": 12445 }, { "epoch": 0.2754130273680174, "grad_norm": 1.4496046304702759, "learning_rate": 1.6484486842426704e-05, "loss": 0.3646, "step": 12450 }, { "epoch": 0.2755236350095307, "grad_norm": 1.201499342918396, "learning_rate": 1.6481841150012704e-05, "loss": 0.4796, "step": 12455 }, { "epoch": 0.27563424265104397, "grad_norm": 2.0728213787078857, "learning_rate": 1.6479194674918404e-05, "loss": 0.4508, "step": 12460 }, { "epoch": 0.2757448502925572, "grad_norm": 1.7926641702651978, "learning_rate": 1.6476547417463372e-05, "loss": 0.3633, "step": 12465 }, { "epoch": 0.27585545793407046, "grad_norm": 1.4052952527999878, "learning_rate": 1.6473899377967262e-05, "loss": 0.3334, "step": 12470 }, { "epoch": 0.27596606557558373, "grad_norm": 1.0621432065963745, "learning_rate": 1.647125055674982e-05, "loss": 0.2485, "step": 12475 }, { "epoch": 0.276076673217097, "grad_norm": 1.579028606414795, "learning_rate": 1.6468600954130892e-05, "loss": 0.3589, "step": 12480 }, { "epoch": 0.2761872808586103, "grad_norm": 1.1259071826934814, "learning_rate": 1.6465950570430416e-05, "loss": 0.4744, "step": 12485 }, { "epoch": 0.2762978885001235, "grad_norm": 1.4295107126235962, "learning_rate": 1.6463299405968424e-05, "loss": 0.5242, "step": 12490 }, { "epoch": 0.27640849614163676, "grad_norm": 0.9159891605377197, "learning_rate": 1.6460647461065053e-05, "loss": 0.1983, "step": 12495 }, { "epoch": 0.27651910378315003, "grad_norm": 1.205219030380249, "learning_rate": 1.645799473604051e-05, "loss": 0.3826, "step": 12500 }, { "epoch": 0.2766297114246633, "grad_norm": 1.7667462825775146, "learning_rate": 1.6455341231215123e-05, "loss": 0.5422, "step": 12505 }, { "epoch": 0.2767403190661766, "grad_norm": 1.753602147102356, "learning_rate": 1.645268694690929e-05, "loss": 0.3038, "step": 12510 }, { "epoch": 0.2768509267076898, "grad_norm": 1.4761154651641846, "learning_rate": 1.6450031883443527e-05, "loss": 0.4379, "step": 12515 }, { "epoch": 0.27696153434920306, "grad_norm": 1.3354566097259521, "learning_rate": 1.6447376041138427e-05, "loss": 0.4664, "step": 12520 }, { "epoch": 0.27707214199071634, "grad_norm": 1.0468387603759766, "learning_rate": 1.644471942031468e-05, "loss": 0.4433, "step": 12525 }, { "epoch": 0.2771827496322296, "grad_norm": 1.2046949863433838, "learning_rate": 1.6442062021293072e-05, "loss": 0.414, "step": 12530 }, { "epoch": 0.2772933572737428, "grad_norm": 1.1509478092193604, "learning_rate": 1.643940384439449e-05, "loss": 0.3681, "step": 12535 }, { "epoch": 0.2774039649152561, "grad_norm": 1.1286094188690186, "learning_rate": 1.6436744889939898e-05, "loss": 0.5404, "step": 12540 }, { "epoch": 0.27751457255676937, "grad_norm": 0.9815607666969299, "learning_rate": 1.643408515825037e-05, "loss": 0.3684, "step": 12545 }, { "epoch": 0.27762518019828264, "grad_norm": 0.9647710919380188, "learning_rate": 1.6431424649647066e-05, "loss": 0.4044, "step": 12550 }, { "epoch": 0.2777357878397959, "grad_norm": 1.1282621622085571, "learning_rate": 1.6428763364451246e-05, "loss": 0.2935, "step": 12555 }, { "epoch": 0.2778463954813091, "grad_norm": 1.288151741027832, "learning_rate": 1.6426101302984252e-05, "loss": 0.5738, "step": 12560 }, { "epoch": 0.2779570031228224, "grad_norm": 1.5476254224777222, "learning_rate": 1.6423438465567533e-05, "loss": 0.295, "step": 12565 }, { "epoch": 0.27806761076433567, "grad_norm": 1.5168421268463135, "learning_rate": 1.642077485252262e-05, "loss": 0.3289, "step": 12570 }, { "epoch": 0.27817821840584894, "grad_norm": 1.2258158922195435, "learning_rate": 1.6418110464171153e-05, "loss": 0.3966, "step": 12575 }, { "epoch": 0.2782888260473622, "grad_norm": 2.0195159912109375, "learning_rate": 1.6415445300834846e-05, "loss": 0.2583, "step": 12580 }, { "epoch": 0.27839943368887543, "grad_norm": 1.2423831224441528, "learning_rate": 1.6412779362835524e-05, "loss": 0.3908, "step": 12585 }, { "epoch": 0.2785100413303887, "grad_norm": 1.9791585206985474, "learning_rate": 1.641011265049509e-05, "loss": 0.3816, "step": 12590 }, { "epoch": 0.278620648971902, "grad_norm": 1.8421591520309448, "learning_rate": 1.640744516413556e-05, "loss": 0.4641, "step": 12595 }, { "epoch": 0.27873125661341525, "grad_norm": 1.1074154376983643, "learning_rate": 1.6404776904079017e-05, "loss": 0.3948, "step": 12600 }, { "epoch": 0.2788418642549285, "grad_norm": 2.1389451026916504, "learning_rate": 1.6402107870647666e-05, "loss": 0.2592, "step": 12605 }, { "epoch": 0.27895247189644173, "grad_norm": 1.619992733001709, "learning_rate": 1.6399438064163787e-05, "loss": 0.4029, "step": 12610 }, { "epoch": 0.279063079537955, "grad_norm": 1.8456237316131592, "learning_rate": 1.639676748494976e-05, "loss": 0.2991, "step": 12615 }, { "epoch": 0.2791736871794683, "grad_norm": 1.341324806213379, "learning_rate": 1.6394096133328053e-05, "loss": 0.3489, "step": 12620 }, { "epoch": 0.27928429482098155, "grad_norm": 2.045840263366699, "learning_rate": 1.6391424009621233e-05, "loss": 0.4543, "step": 12625 }, { "epoch": 0.27939490246249477, "grad_norm": 0.9537631869316101, "learning_rate": 1.638875111415196e-05, "loss": 0.4147, "step": 12630 }, { "epoch": 0.27950551010400804, "grad_norm": 1.3602588176727295, "learning_rate": 1.638607744724298e-05, "loss": 0.3735, "step": 12635 }, { "epoch": 0.2796161177455213, "grad_norm": 1.2649376392364502, "learning_rate": 1.6383403009217143e-05, "loss": 0.4117, "step": 12640 }, { "epoch": 0.2797267253870346, "grad_norm": 1.4117735624313354, "learning_rate": 1.6380727800397384e-05, "loss": 0.3558, "step": 12645 }, { "epoch": 0.27983733302854785, "grad_norm": 1.1911977529525757, "learning_rate": 1.6378051821106737e-05, "loss": 0.4534, "step": 12650 }, { "epoch": 0.27994794067006107, "grad_norm": 2.370929002761841, "learning_rate": 1.637537507166832e-05, "loss": 0.3877, "step": 12655 }, { "epoch": 0.28005854831157434, "grad_norm": 2.0838136672973633, "learning_rate": 1.637269755240535e-05, "loss": 0.4679, "step": 12660 }, { "epoch": 0.2801691559530876, "grad_norm": 1.7952619791030884, "learning_rate": 1.6370019263641142e-05, "loss": 0.2602, "step": 12665 }, { "epoch": 0.2802797635946009, "grad_norm": 1.655556082725525, "learning_rate": 1.6367340205699096e-05, "loss": 0.4908, "step": 12670 }, { "epoch": 0.28039037123611416, "grad_norm": 1.3994531631469727, "learning_rate": 1.6364660378902706e-05, "loss": 0.415, "step": 12675 }, { "epoch": 0.28050097887762737, "grad_norm": 2.0953125953674316, "learning_rate": 1.6361979783575563e-05, "loss": 0.5066, "step": 12680 }, { "epoch": 0.28061158651914064, "grad_norm": 1.0492982864379883, "learning_rate": 1.6359298420041344e-05, "loss": 0.3025, "step": 12685 }, { "epoch": 0.2807221941606539, "grad_norm": 1.0965495109558105, "learning_rate": 1.6356616288623823e-05, "loss": 0.334, "step": 12690 }, { "epoch": 0.2808328018021672, "grad_norm": 0.7741219997406006, "learning_rate": 1.635393338964687e-05, "loss": 0.3122, "step": 12695 }, { "epoch": 0.28094340944368046, "grad_norm": 1.8544930219650269, "learning_rate": 1.6351249723434446e-05, "loss": 0.3907, "step": 12700 }, { "epoch": 0.2810540170851937, "grad_norm": 1.2993911504745483, "learning_rate": 1.6348565290310597e-05, "loss": 0.4316, "step": 12705 }, { "epoch": 0.28116462472670695, "grad_norm": 2.061002016067505, "learning_rate": 1.634588009059947e-05, "loss": 0.6096, "step": 12710 }, { "epoch": 0.2812752323682202, "grad_norm": 0.8342477679252625, "learning_rate": 1.6343194124625303e-05, "loss": 0.3453, "step": 12715 }, { "epoch": 0.2813858400097335, "grad_norm": 1.7144830226898193, "learning_rate": 1.6340507392712423e-05, "loss": 0.3279, "step": 12720 }, { "epoch": 0.2814964476512467, "grad_norm": 1.3505709171295166, "learning_rate": 1.6337819895185252e-05, "loss": 0.3807, "step": 12725 }, { "epoch": 0.28160705529276, "grad_norm": 0.7470729351043701, "learning_rate": 1.6335131632368314e-05, "loss": 0.2563, "step": 12730 }, { "epoch": 0.28171766293427325, "grad_norm": 1.1468766927719116, "learning_rate": 1.63324426045862e-05, "loss": 0.3774, "step": 12735 }, { "epoch": 0.2818282705757865, "grad_norm": 1.5186278820037842, "learning_rate": 1.6329752812163622e-05, "loss": 0.4274, "step": 12740 }, { "epoch": 0.2819388782172998, "grad_norm": 1.5486512184143066, "learning_rate": 1.6327062255425365e-05, "loss": 0.4728, "step": 12745 }, { "epoch": 0.282049485858813, "grad_norm": 1.6115477085113525, "learning_rate": 1.6324370934696318e-05, "loss": 0.2889, "step": 12750 }, { "epoch": 0.2821600935003263, "grad_norm": 1.0795974731445312, "learning_rate": 1.6321678850301447e-05, "loss": 0.4001, "step": 12755 }, { "epoch": 0.28227070114183955, "grad_norm": 1.3274188041687012, "learning_rate": 1.6318986002565835e-05, "loss": 0.4381, "step": 12760 }, { "epoch": 0.2823813087833528, "grad_norm": 1.557173490524292, "learning_rate": 1.631629239181463e-05, "loss": 0.5777, "step": 12765 }, { "epoch": 0.2824919164248661, "grad_norm": 1.8172717094421387, "learning_rate": 1.6313598018373094e-05, "loss": 0.3957, "step": 12770 }, { "epoch": 0.2826025240663793, "grad_norm": 1.4237651824951172, "learning_rate": 1.631090288256656e-05, "loss": 0.334, "step": 12775 }, { "epoch": 0.2827131317078926, "grad_norm": 1.5020111799240112, "learning_rate": 1.6308206984720478e-05, "loss": 0.3962, "step": 12780 }, { "epoch": 0.28282373934940586, "grad_norm": 1.6770967245101929, "learning_rate": 1.630551032516037e-05, "loss": 0.3774, "step": 12785 }, { "epoch": 0.28293434699091913, "grad_norm": 1.269322395324707, "learning_rate": 1.6302812904211854e-05, "loss": 0.3597, "step": 12790 }, { "epoch": 0.2830449546324324, "grad_norm": 1.1868457794189453, "learning_rate": 1.6300114722200652e-05, "loss": 0.3723, "step": 12795 }, { "epoch": 0.2831555622739456, "grad_norm": 1.1918264627456665, "learning_rate": 1.629741577945256e-05, "loss": 0.2353, "step": 12800 }, { "epoch": 0.2832661699154589, "grad_norm": 1.258118987083435, "learning_rate": 1.629471607629348e-05, "loss": 0.3682, "step": 12805 }, { "epoch": 0.28337677755697216, "grad_norm": 1.5616273880004883, "learning_rate": 1.6292015613049396e-05, "loss": 0.3158, "step": 12810 }, { "epoch": 0.28348738519848543, "grad_norm": 1.3602064847946167, "learning_rate": 1.628931439004639e-05, "loss": 0.5119, "step": 12815 }, { "epoch": 0.28359799283999865, "grad_norm": 4.0353899002075195, "learning_rate": 1.6286612407610635e-05, "loss": 0.3939, "step": 12820 }, { "epoch": 0.2837086004815119, "grad_norm": 1.8243621587753296, "learning_rate": 1.628390966606839e-05, "loss": 0.4037, "step": 12825 }, { "epoch": 0.2838192081230252, "grad_norm": 1.1515438556671143, "learning_rate": 1.6281206165746014e-05, "loss": 0.4325, "step": 12830 }, { "epoch": 0.28392981576453846, "grad_norm": 1.4029847383499146, "learning_rate": 1.6278501906969957e-05, "loss": 0.4692, "step": 12835 }, { "epoch": 0.28404042340605173, "grad_norm": 1.0318748950958252, "learning_rate": 1.6275796890066747e-05, "loss": 0.2976, "step": 12840 }, { "epoch": 0.28415103104756495, "grad_norm": 1.426786184310913, "learning_rate": 1.627309111536303e-05, "loss": 0.3226, "step": 12845 }, { "epoch": 0.2842616386890782, "grad_norm": 1.3010528087615967, "learning_rate": 1.6270384583185514e-05, "loss": 0.4855, "step": 12850 }, { "epoch": 0.2843722463305915, "grad_norm": 1.6565766334533691, "learning_rate": 1.6267677293861016e-05, "loss": 0.349, "step": 12855 }, { "epoch": 0.28448285397210477, "grad_norm": 1.1749029159545898, "learning_rate": 1.626496924771644e-05, "loss": 0.2826, "step": 12860 }, { "epoch": 0.28459346161361804, "grad_norm": 1.1662276983261108, "learning_rate": 1.626226044507878e-05, "loss": 0.3605, "step": 12865 }, { "epoch": 0.28470406925513125, "grad_norm": 1.5639458894729614, "learning_rate": 1.625955088627513e-05, "loss": 0.3943, "step": 12870 }, { "epoch": 0.2848146768966445, "grad_norm": 1.3460657596588135, "learning_rate": 1.625684057163266e-05, "loss": 0.4019, "step": 12875 }, { "epoch": 0.2849252845381578, "grad_norm": 2.197537422180176, "learning_rate": 1.6254129501478643e-05, "loss": 0.3249, "step": 12880 }, { "epoch": 0.28503589217967107, "grad_norm": 1.9374250173568726, "learning_rate": 1.6251417676140443e-05, "loss": 0.4717, "step": 12885 }, { "epoch": 0.28514649982118434, "grad_norm": 0.8017204999923706, "learning_rate": 1.624870509594551e-05, "loss": 0.3142, "step": 12890 }, { "epoch": 0.28525710746269756, "grad_norm": 1.8478858470916748, "learning_rate": 1.6245991761221383e-05, "loss": 0.262, "step": 12895 }, { "epoch": 0.28536771510421083, "grad_norm": 1.4446536302566528, "learning_rate": 1.62432776722957e-05, "loss": 0.3417, "step": 12900 }, { "epoch": 0.2854783227457241, "grad_norm": 0.5424515008926392, "learning_rate": 1.6240562829496185e-05, "loss": 0.3602, "step": 12905 }, { "epoch": 0.28558893038723737, "grad_norm": 1.5978238582611084, "learning_rate": 1.6237847233150658e-05, "loss": 0.3761, "step": 12910 }, { "epoch": 0.2856995380287506, "grad_norm": 1.7590934038162231, "learning_rate": 1.6235130883587022e-05, "loss": 0.5392, "step": 12915 }, { "epoch": 0.28581014567026386, "grad_norm": 2.0705761909484863, "learning_rate": 1.6232413781133282e-05, "loss": 0.363, "step": 12920 }, { "epoch": 0.28592075331177713, "grad_norm": 1.2572466135025024, "learning_rate": 1.6229695926117518e-05, "loss": 0.3291, "step": 12925 }, { "epoch": 0.2860313609532904, "grad_norm": 1.1807706356048584, "learning_rate": 1.6226977318867917e-05, "loss": 0.3323, "step": 12930 }, { "epoch": 0.2861419685948037, "grad_norm": 2.063136577606201, "learning_rate": 1.6224257959712746e-05, "loss": 0.4055, "step": 12935 }, { "epoch": 0.2862525762363169, "grad_norm": 1.5952712297439575, "learning_rate": 1.622153784898037e-05, "loss": 0.4561, "step": 12940 }, { "epoch": 0.28636318387783016, "grad_norm": 1.5414148569107056, "learning_rate": 1.6218816986999238e-05, "loss": 0.381, "step": 12945 }, { "epoch": 0.28647379151934343, "grad_norm": 1.8882670402526855, "learning_rate": 1.6216095374097897e-05, "loss": 0.4203, "step": 12950 }, { "epoch": 0.2865843991608567, "grad_norm": 1.4014567136764526, "learning_rate": 1.6213373010604977e-05, "loss": 0.401, "step": 12955 }, { "epoch": 0.28669500680237, "grad_norm": 0.9155007600784302, "learning_rate": 1.621064989684921e-05, "loss": 0.3056, "step": 12960 }, { "epoch": 0.2868056144438832, "grad_norm": 1.4332201480865479, "learning_rate": 1.6207926033159397e-05, "loss": 0.3823, "step": 12965 }, { "epoch": 0.28691622208539647, "grad_norm": 1.7447164058685303, "learning_rate": 1.620520141986446e-05, "loss": 0.3497, "step": 12970 }, { "epoch": 0.28702682972690974, "grad_norm": 1.6515740156173706, "learning_rate": 1.6202476057293387e-05, "loss": 0.3623, "step": 12975 }, { "epoch": 0.287137437368423, "grad_norm": 1.0412633419036865, "learning_rate": 1.619974994577526e-05, "loss": 0.6009, "step": 12980 }, { "epoch": 0.2872480450099362, "grad_norm": 1.867857575416565, "learning_rate": 1.6197023085639267e-05, "loss": 0.4721, "step": 12985 }, { "epoch": 0.2873586526514495, "grad_norm": 2.1597650051116943, "learning_rate": 1.6194295477214667e-05, "loss": 0.466, "step": 12990 }, { "epoch": 0.28746926029296277, "grad_norm": 1.3434677124023438, "learning_rate": 1.619156712083082e-05, "loss": 0.3934, "step": 12995 }, { "epoch": 0.28757986793447604, "grad_norm": 1.7027945518493652, "learning_rate": 1.6188838016817182e-05, "loss": 0.3268, "step": 13000 }, { "epoch": 0.2876904755759893, "grad_norm": 1.534993052482605, "learning_rate": 1.6186108165503278e-05, "loss": 0.493, "step": 13005 }, { "epoch": 0.28780108321750253, "grad_norm": 1.2758266925811768, "learning_rate": 1.6183377567218746e-05, "loss": 0.4185, "step": 13010 }, { "epoch": 0.2879116908590158, "grad_norm": 1.6781706809997559, "learning_rate": 1.61806462222933e-05, "loss": 0.4745, "step": 13015 }, { "epoch": 0.2880222985005291, "grad_norm": 0.9164647459983826, "learning_rate": 1.617791413105675e-05, "loss": 0.2583, "step": 13020 }, { "epoch": 0.28813290614204234, "grad_norm": 1.211882472038269, "learning_rate": 1.6175181293839e-05, "loss": 0.4014, "step": 13025 }, { "epoch": 0.2882435137835556, "grad_norm": 0.8049250245094299, "learning_rate": 1.6172447710970037e-05, "loss": 0.3764, "step": 13030 }, { "epoch": 0.28835412142506883, "grad_norm": 1.814323902130127, "learning_rate": 1.6169713382779933e-05, "loss": 0.2881, "step": 13035 }, { "epoch": 0.2884647290665821, "grad_norm": 0.9823851585388184, "learning_rate": 1.6166978309598866e-05, "loss": 0.4542, "step": 13040 }, { "epoch": 0.2885753367080954, "grad_norm": 1.3618693351745605, "learning_rate": 1.6164242491757095e-05, "loss": 0.4253, "step": 13045 }, { "epoch": 0.28868594434960865, "grad_norm": 2.3661296367645264, "learning_rate": 1.6161505929584963e-05, "loss": 0.4183, "step": 13050 }, { "epoch": 0.2887965519911219, "grad_norm": 1.7115285396575928, "learning_rate": 1.615876862341291e-05, "loss": 0.3053, "step": 13055 }, { "epoch": 0.28890715963263514, "grad_norm": 1.6666319370269775, "learning_rate": 1.6156030573571472e-05, "loss": 0.4253, "step": 13060 }, { "epoch": 0.2890177672741484, "grad_norm": 0.9588655233383179, "learning_rate": 1.6153291780391263e-05, "loss": 0.3218, "step": 13065 }, { "epoch": 0.2891283749156617, "grad_norm": 2.103027105331421, "learning_rate": 1.615055224420299e-05, "loss": 0.4074, "step": 13070 }, { "epoch": 0.28923898255717495, "grad_norm": 1.7595359086990356, "learning_rate": 1.6147811965337454e-05, "loss": 0.3194, "step": 13075 }, { "epoch": 0.28934959019868817, "grad_norm": 1.1005123853683472, "learning_rate": 1.614507094412554e-05, "loss": 0.2284, "step": 13080 }, { "epoch": 0.28946019784020144, "grad_norm": 0.8298508524894714, "learning_rate": 1.614232918089823e-05, "loss": 0.2688, "step": 13085 }, { "epoch": 0.2895708054817147, "grad_norm": 1.4427990913391113, "learning_rate": 1.6139586675986586e-05, "loss": 0.3319, "step": 13090 }, { "epoch": 0.289681413123228, "grad_norm": 1.3110557794570923, "learning_rate": 1.613684342972176e-05, "loss": 0.2967, "step": 13095 }, { "epoch": 0.28979202076474125, "grad_norm": 1.4042410850524902, "learning_rate": 1.6134099442435015e-05, "loss": 0.4686, "step": 13100 }, { "epoch": 0.28990262840625447, "grad_norm": 1.2479525804519653, "learning_rate": 1.613135471445767e-05, "loss": 0.3057, "step": 13105 }, { "epoch": 0.29001323604776774, "grad_norm": 0.9817124009132385, "learning_rate": 1.612860924612116e-05, "loss": 0.1988, "step": 13110 }, { "epoch": 0.290123843689281, "grad_norm": 0.9504029750823975, "learning_rate": 1.612586303775699e-05, "loss": 0.3199, "step": 13115 }, { "epoch": 0.2902344513307943, "grad_norm": 1.341302752494812, "learning_rate": 1.6123116089696776e-05, "loss": 0.1985, "step": 13120 }, { "epoch": 0.29034505897230756, "grad_norm": 1.23753023147583, "learning_rate": 1.61203684022722e-05, "loss": 0.3002, "step": 13125 }, { "epoch": 0.2904556666138208, "grad_norm": 1.2259204387664795, "learning_rate": 1.6117619975815056e-05, "loss": 0.404, "step": 13130 }, { "epoch": 0.29056627425533404, "grad_norm": 1.018287181854248, "learning_rate": 1.61148708106572e-05, "loss": 0.4512, "step": 13135 }, { "epoch": 0.2906768818968473, "grad_norm": 1.1666197776794434, "learning_rate": 1.6112120907130608e-05, "loss": 0.3502, "step": 13140 }, { "epoch": 0.2907874895383606, "grad_norm": 1.5834124088287354, "learning_rate": 1.6109370265567323e-05, "loss": 0.3047, "step": 13145 }, { "epoch": 0.29089809717987386, "grad_norm": 1.1300997734069824, "learning_rate": 1.6106618886299482e-05, "loss": 0.427, "step": 13150 }, { "epoch": 0.2910087048213871, "grad_norm": 1.6099077463150024, "learning_rate": 1.6103866769659317e-05, "loss": 0.5211, "step": 13155 }, { "epoch": 0.29111931246290035, "grad_norm": 1.2822120189666748, "learning_rate": 1.6101113915979142e-05, "loss": 0.4298, "step": 13160 }, { "epoch": 0.2912299201044136, "grad_norm": 1.5705301761627197, "learning_rate": 1.609836032559137e-05, "loss": 0.4786, "step": 13165 }, { "epoch": 0.2913405277459269, "grad_norm": 0.9181768894195557, "learning_rate": 1.6095605998828484e-05, "loss": 0.3608, "step": 13170 }, { "epoch": 0.2914511353874401, "grad_norm": 1.4287049770355225, "learning_rate": 1.6092850936023085e-05, "loss": 0.4526, "step": 13175 }, { "epoch": 0.2915617430289534, "grad_norm": 1.4047043323516846, "learning_rate": 1.609009513750783e-05, "loss": 0.4152, "step": 13180 }, { "epoch": 0.29167235067046665, "grad_norm": 0.8592086434364319, "learning_rate": 1.608733860361549e-05, "loss": 0.3976, "step": 13185 }, { "epoch": 0.2917829583119799, "grad_norm": 1.6447267532348633, "learning_rate": 1.6084581334678913e-05, "loss": 0.4062, "step": 13190 }, { "epoch": 0.2918935659534932, "grad_norm": 1.5366441011428833, "learning_rate": 1.6081823331031035e-05, "loss": 0.488, "step": 13195 }, { "epoch": 0.2920041735950064, "grad_norm": 0.9749012589454651, "learning_rate": 1.6079064593004895e-05, "loss": 0.336, "step": 13200 }, { "epoch": 0.2921147812365197, "grad_norm": 0.8763280510902405, "learning_rate": 1.6076305120933595e-05, "loss": 0.3276, "step": 13205 }, { "epoch": 0.29222538887803295, "grad_norm": 1.4111416339874268, "learning_rate": 1.607354491515035e-05, "loss": 0.2749, "step": 13210 }, { "epoch": 0.2923359965195462, "grad_norm": 1.4452805519104004, "learning_rate": 1.607078397598845e-05, "loss": 0.3657, "step": 13215 }, { "epoch": 0.2924466041610595, "grad_norm": 1.5930067300796509, "learning_rate": 1.6068022303781285e-05, "loss": 0.3662, "step": 13220 }, { "epoch": 0.2925572118025727, "grad_norm": 1.7040985822677612, "learning_rate": 1.6065259898862312e-05, "loss": 0.5017, "step": 13225 }, { "epoch": 0.292667819444086, "grad_norm": 1.3042157888412476, "learning_rate": 1.6062496761565103e-05, "loss": 0.4382, "step": 13230 }, { "epoch": 0.29277842708559926, "grad_norm": 0.8790606260299683, "learning_rate": 1.6059732892223302e-05, "loss": 0.384, "step": 13235 }, { "epoch": 0.29288903472711253, "grad_norm": 1.7436100244522095, "learning_rate": 1.6056968291170647e-05, "loss": 0.317, "step": 13240 }, { "epoch": 0.2929996423686258, "grad_norm": 0.893426775932312, "learning_rate": 1.6054202958740955e-05, "loss": 0.3301, "step": 13245 }, { "epoch": 0.293110250010139, "grad_norm": 1.4487446546554565, "learning_rate": 1.605143689526815e-05, "loss": 0.4302, "step": 13250 }, { "epoch": 0.2932208576516523, "grad_norm": 1.031592607498169, "learning_rate": 1.6048670101086225e-05, "loss": 0.3154, "step": 13255 }, { "epoch": 0.29333146529316556, "grad_norm": 1.6355217695236206, "learning_rate": 1.6045902576529276e-05, "loss": 0.514, "step": 13260 }, { "epoch": 0.29344207293467883, "grad_norm": 1.0559332370758057, "learning_rate": 1.6043134321931475e-05, "loss": 0.2523, "step": 13265 }, { "epoch": 0.29355268057619205, "grad_norm": 1.330615520477295, "learning_rate": 1.6040365337627086e-05, "loss": 0.315, "step": 13270 }, { "epoch": 0.2936632882177053, "grad_norm": 1.4039422273635864, "learning_rate": 1.6037595623950473e-05, "loss": 0.2714, "step": 13275 }, { "epoch": 0.2937738958592186, "grad_norm": 1.2363156080245972, "learning_rate": 1.6034825181236075e-05, "loss": 0.3215, "step": 13280 }, { "epoch": 0.29388450350073186, "grad_norm": 2.5549044609069824, "learning_rate": 1.6032054009818412e-05, "loss": 0.4804, "step": 13285 }, { "epoch": 0.29399511114224514, "grad_norm": 1.335703730583191, "learning_rate": 1.6029282110032114e-05, "loss": 0.3208, "step": 13290 }, { "epoch": 0.29410571878375835, "grad_norm": 1.4145795106887817, "learning_rate": 1.602650948221188e-05, "loss": 0.406, "step": 13295 }, { "epoch": 0.2942163264252716, "grad_norm": 1.3059700727462769, "learning_rate": 1.6023736126692506e-05, "loss": 0.4239, "step": 13300 }, { "epoch": 0.2943269340667849, "grad_norm": 2.5119028091430664, "learning_rate": 1.602096204380888e-05, "loss": 0.4142, "step": 13305 }, { "epoch": 0.29443754170829817, "grad_norm": 1.5805134773254395, "learning_rate": 1.6018187233895958e-05, "loss": 0.2687, "step": 13310 }, { "epoch": 0.29454814934981144, "grad_norm": 1.3957288265228271, "learning_rate": 1.601541169728881e-05, "loss": 0.2149, "step": 13315 }, { "epoch": 0.29465875699132466, "grad_norm": 1.4822909832000732, "learning_rate": 1.6012635434322573e-05, "loss": 0.3517, "step": 13320 }, { "epoch": 0.2947693646328379, "grad_norm": 1.265356421470642, "learning_rate": 1.6009858445332487e-05, "loss": 0.3823, "step": 13325 }, { "epoch": 0.2948799722743512, "grad_norm": 1.097550630569458, "learning_rate": 1.600708073065387e-05, "loss": 0.4617, "step": 13330 }, { "epoch": 0.29499057991586447, "grad_norm": 1.7253706455230713, "learning_rate": 1.600430229062212e-05, "loss": 0.4599, "step": 13335 }, { "epoch": 0.2951011875573777, "grad_norm": 1.290732502937317, "learning_rate": 1.6001523125572752e-05, "loss": 0.3428, "step": 13340 }, { "epoch": 0.29521179519889096, "grad_norm": 1.1141611337661743, "learning_rate": 1.5998743235841336e-05, "loss": 0.4232, "step": 13345 }, { "epoch": 0.29532240284040423, "grad_norm": 0.8456601500511169, "learning_rate": 1.5995962621763546e-05, "loss": 0.2095, "step": 13350 }, { "epoch": 0.2954330104819175, "grad_norm": 1.111664056777954, "learning_rate": 1.599318128367514e-05, "loss": 0.3323, "step": 13355 }, { "epoch": 0.2955436181234308, "grad_norm": 1.3206524848937988, "learning_rate": 1.5990399221911966e-05, "loss": 0.4333, "step": 13360 }, { "epoch": 0.295654225764944, "grad_norm": 1.3545947074890137, "learning_rate": 1.5987616436809954e-05, "loss": 0.3289, "step": 13365 }, { "epoch": 0.29576483340645726, "grad_norm": 1.003782033920288, "learning_rate": 1.5984832928705122e-05, "loss": 0.459, "step": 13370 }, { "epoch": 0.29587544104797053, "grad_norm": 1.33824622631073, "learning_rate": 1.5982048697933585e-05, "loss": 0.3134, "step": 13375 }, { "epoch": 0.2959860486894838, "grad_norm": 1.868643045425415, "learning_rate": 1.597926374483153e-05, "loss": 0.3398, "step": 13380 }, { "epoch": 0.2960966563309971, "grad_norm": 1.6043552160263062, "learning_rate": 1.597647806973525e-05, "loss": 0.5138, "step": 13385 }, { "epoch": 0.2962072639725103, "grad_norm": 1.2025457620620728, "learning_rate": 1.5973691672981105e-05, "loss": 0.3239, "step": 13390 }, { "epoch": 0.29631787161402356, "grad_norm": 1.3301255702972412, "learning_rate": 1.5970904554905552e-05, "loss": 0.4243, "step": 13395 }, { "epoch": 0.29642847925553684, "grad_norm": 0.8872957229614258, "learning_rate": 1.5968116715845143e-05, "loss": 0.3215, "step": 13400 }, { "epoch": 0.2965390868970501, "grad_norm": 0.5951601266860962, "learning_rate": 1.5965328156136498e-05, "loss": 0.3474, "step": 13405 }, { "epoch": 0.2966496945385634, "grad_norm": 0.9230466485023499, "learning_rate": 1.596253887611634e-05, "loss": 0.4556, "step": 13410 }, { "epoch": 0.2967603021800766, "grad_norm": 1.3679202795028687, "learning_rate": 1.5959748876121475e-05, "loss": 0.3502, "step": 13415 }, { "epoch": 0.29687090982158987, "grad_norm": 1.0928293466567993, "learning_rate": 1.595695815648879e-05, "loss": 0.3566, "step": 13420 }, { "epoch": 0.29698151746310314, "grad_norm": 1.046770453453064, "learning_rate": 1.5954166717555265e-05, "loss": 0.2808, "step": 13425 }, { "epoch": 0.2970921251046164, "grad_norm": 1.0649062395095825, "learning_rate": 1.5951374559657972e-05, "loss": 0.5338, "step": 13430 }, { "epoch": 0.2972027327461296, "grad_norm": 2.1299948692321777, "learning_rate": 1.5948581683134054e-05, "loss": 0.2835, "step": 13435 }, { "epoch": 0.2973133403876429, "grad_norm": 1.123279333114624, "learning_rate": 1.5945788088320755e-05, "loss": 0.4338, "step": 13440 }, { "epoch": 0.29742394802915617, "grad_norm": 0.9890992045402527, "learning_rate": 1.59429937755554e-05, "loss": 0.4081, "step": 13445 }, { "epoch": 0.29753455567066944, "grad_norm": 1.298039197921753, "learning_rate": 1.59401987451754e-05, "loss": 0.4554, "step": 13450 }, { "epoch": 0.2976451633121827, "grad_norm": 1.1568083763122559, "learning_rate": 1.5937402997518257e-05, "loss": 0.3522, "step": 13455 }, { "epoch": 0.29775577095369593, "grad_norm": 1.734718918800354, "learning_rate": 1.5934606532921554e-05, "loss": 0.2631, "step": 13460 }, { "epoch": 0.2978663785952092, "grad_norm": 1.5463184118270874, "learning_rate": 1.593180935172296e-05, "loss": 0.4457, "step": 13465 }, { "epoch": 0.2979769862367225, "grad_norm": 1.2009246349334717, "learning_rate": 1.5929011454260242e-05, "loss": 0.2879, "step": 13470 }, { "epoch": 0.29808759387823575, "grad_norm": 0.9995460510253906, "learning_rate": 1.592621284087124e-05, "loss": 0.3392, "step": 13475 }, { "epoch": 0.298198201519749, "grad_norm": 1.0931867361068726, "learning_rate": 1.5923413511893885e-05, "loss": 0.4265, "step": 13480 }, { "epoch": 0.29830880916126223, "grad_norm": 1.3366519212722778, "learning_rate": 1.5920613467666196e-05, "loss": 0.3776, "step": 13485 }, { "epoch": 0.2984194168027755, "grad_norm": 1.5333905220031738, "learning_rate": 1.5917812708526284e-05, "loss": 0.3333, "step": 13490 }, { "epoch": 0.2985300244442888, "grad_norm": 1.2842952013015747, "learning_rate": 1.5915011234812328e-05, "loss": 0.3822, "step": 13495 }, { "epoch": 0.29864063208580205, "grad_norm": 1.2699230909347534, "learning_rate": 1.5912209046862614e-05, "loss": 0.3304, "step": 13500 }, { "epoch": 0.2987512397273153, "grad_norm": 1.824626088142395, "learning_rate": 1.59094061450155e-05, "loss": 0.4604, "step": 13505 }, { "epoch": 0.29886184736882854, "grad_norm": 1.5539538860321045, "learning_rate": 1.590660252960944e-05, "loss": 0.2706, "step": 13510 }, { "epoch": 0.2989724550103418, "grad_norm": 0.9965506196022034, "learning_rate": 1.5903798200982968e-05, "loss": 0.4767, "step": 13515 }, { "epoch": 0.2990830626518551, "grad_norm": 1.4746485948562622, "learning_rate": 1.5900993159474705e-05, "loss": 0.2849, "step": 13520 }, { "epoch": 0.29919367029336835, "grad_norm": 1.153435468673706, "learning_rate": 1.5898187405423356e-05, "loss": 0.5531, "step": 13525 }, { "epoch": 0.29930427793488157, "grad_norm": 1.0729471445083618, "learning_rate": 1.5895380939167722e-05, "loss": 0.3028, "step": 13530 }, { "epoch": 0.29941488557639484, "grad_norm": 1.125190258026123, "learning_rate": 1.5892573761046676e-05, "loss": 0.4118, "step": 13535 }, { "epoch": 0.2995254932179081, "grad_norm": 1.5194597244262695, "learning_rate": 1.588976587139919e-05, "loss": 0.2464, "step": 13540 }, { "epoch": 0.2996361008594214, "grad_norm": 1.309945821762085, "learning_rate": 1.588695727056431e-05, "loss": 0.3515, "step": 13545 }, { "epoch": 0.29974670850093466, "grad_norm": 2.0455589294433594, "learning_rate": 1.588414795888118e-05, "loss": 0.314, "step": 13550 }, { "epoch": 0.29985731614244787, "grad_norm": 1.7090381383895874, "learning_rate": 1.5881337936689018e-05, "loss": 0.3827, "step": 13555 }, { "epoch": 0.29996792378396114, "grad_norm": 1.559127926826477, "learning_rate": 1.5878527204327133e-05, "loss": 0.4074, "step": 13560 }, { "epoch": 0.3000785314254744, "grad_norm": 1.2933368682861328, "learning_rate": 1.5875715762134922e-05, "loss": 0.4951, "step": 13565 }, { "epoch": 0.3001891390669877, "grad_norm": 0.9689770340919495, "learning_rate": 1.587290361045187e-05, "loss": 0.2877, "step": 13570 }, { "epoch": 0.30029974670850096, "grad_norm": 1.3442738056182861, "learning_rate": 1.587009074961754e-05, "loss": 0.4484, "step": 13575 }, { "epoch": 0.3004103543500142, "grad_norm": 1.2397938966751099, "learning_rate": 1.586727717997158e-05, "loss": 0.3637, "step": 13580 }, { "epoch": 0.30052096199152745, "grad_norm": 1.0163151025772095, "learning_rate": 1.5864462901853732e-05, "loss": 0.2579, "step": 13585 }, { "epoch": 0.3006315696330407, "grad_norm": 1.5625243186950684, "learning_rate": 1.5861647915603818e-05, "loss": 0.347, "step": 13590 }, { "epoch": 0.300742177274554, "grad_norm": 1.4946136474609375, "learning_rate": 1.5858832221561748e-05, "loss": 0.4758, "step": 13595 }, { "epoch": 0.30085278491606726, "grad_norm": 1.5919021368026733, "learning_rate": 1.5856015820067518e-05, "loss": 0.3687, "step": 13600 }, { "epoch": 0.3009633925575805, "grad_norm": 0.9441934823989868, "learning_rate": 1.58531987114612e-05, "loss": 0.3354, "step": 13605 }, { "epoch": 0.30107400019909375, "grad_norm": 1.2779496908187866, "learning_rate": 1.5850380896082967e-05, "loss": 0.3309, "step": 13610 }, { "epoch": 0.301184607840607, "grad_norm": 0.8920063972473145, "learning_rate": 1.5847562374273068e-05, "loss": 0.253, "step": 13615 }, { "epoch": 0.3012952154821203, "grad_norm": 1.6562248468399048, "learning_rate": 1.5844743146371835e-05, "loss": 0.3807, "step": 13620 }, { "epoch": 0.3014058231236335, "grad_norm": 1.9488286972045898, "learning_rate": 1.5841923212719688e-05, "loss": 0.6383, "step": 13625 }, { "epoch": 0.3015164307651468, "grad_norm": 1.2480098009109497, "learning_rate": 1.583910257365714e-05, "loss": 0.3208, "step": 13630 }, { "epoch": 0.30162703840666005, "grad_norm": 0.9076240658760071, "learning_rate": 1.5836281229524777e-05, "loss": 0.2604, "step": 13635 }, { "epoch": 0.3017376460481733, "grad_norm": 1.0211347341537476, "learning_rate": 1.5833459180663276e-05, "loss": 0.2856, "step": 13640 }, { "epoch": 0.3018482536896866, "grad_norm": 1.2786424160003662, "learning_rate": 1.58306364274134e-05, "loss": 0.3858, "step": 13645 }, { "epoch": 0.3019588613311998, "grad_norm": 1.3665480613708496, "learning_rate": 1.5827812970115993e-05, "loss": 0.3697, "step": 13650 }, { "epoch": 0.3020694689727131, "grad_norm": 1.4866605997085571, "learning_rate": 1.5824988809111986e-05, "loss": 0.4147, "step": 13655 }, { "epoch": 0.30218007661422636, "grad_norm": 1.1854581832885742, "learning_rate": 1.5822163944742404e-05, "loss": 0.2876, "step": 13660 }, { "epoch": 0.30229068425573963, "grad_norm": 1.0549753904342651, "learning_rate": 1.5819338377348336e-05, "loss": 0.3443, "step": 13665 }, { "epoch": 0.3024012918972529, "grad_norm": 1.0983809232711792, "learning_rate": 1.581651210727098e-05, "loss": 0.3295, "step": 13670 }, { "epoch": 0.3025118995387661, "grad_norm": 1.698797583580017, "learning_rate": 1.58136851348516e-05, "loss": 0.4453, "step": 13675 }, { "epoch": 0.3026225071802794, "grad_norm": 1.25867760181427, "learning_rate": 1.5810857460431552e-05, "loss": 0.3707, "step": 13680 }, { "epoch": 0.30273311482179266, "grad_norm": 1.2105636596679688, "learning_rate": 1.5808029084352282e-05, "loss": 0.3943, "step": 13685 }, { "epoch": 0.30284372246330593, "grad_norm": 2.105177164077759, "learning_rate": 1.5805200006955312e-05, "loss": 0.3396, "step": 13690 }, { "epoch": 0.30295433010481915, "grad_norm": 1.5066784620285034, "learning_rate": 1.580237022858225e-05, "loss": 0.3413, "step": 13695 }, { "epoch": 0.3030649377463324, "grad_norm": 1.3116040229797363, "learning_rate": 1.5799539749574793e-05, "loss": 0.2976, "step": 13700 }, { "epoch": 0.3031755453878457, "grad_norm": 0.9509299397468567, "learning_rate": 1.5796708570274723e-05, "loss": 0.3488, "step": 13705 }, { "epoch": 0.30328615302935896, "grad_norm": 1.2749329805374146, "learning_rate": 1.57938766910239e-05, "loss": 0.3598, "step": 13710 }, { "epoch": 0.30339676067087223, "grad_norm": 1.019490361213684, "learning_rate": 1.5791044112164278e-05, "loss": 0.3726, "step": 13715 }, { "epoch": 0.30350736831238545, "grad_norm": 1.1977531909942627, "learning_rate": 1.5788210834037886e-05, "loss": 0.4435, "step": 13720 }, { "epoch": 0.3036179759538987, "grad_norm": 1.893874168395996, "learning_rate": 1.5785376856986842e-05, "loss": 0.4932, "step": 13725 }, { "epoch": 0.303728583595412, "grad_norm": 1.3139537572860718, "learning_rate": 1.578254218135334e-05, "loss": 0.3059, "step": 13730 }, { "epoch": 0.30383919123692527, "grad_norm": 0.7223889827728271, "learning_rate": 1.5779706807479684e-05, "loss": 0.3753, "step": 13735 }, { "epoch": 0.30394979887843854, "grad_norm": 1.8055026531219482, "learning_rate": 1.577687073570823e-05, "loss": 0.3696, "step": 13740 }, { "epoch": 0.30406040651995175, "grad_norm": 2.04581618309021, "learning_rate": 1.5774033966381437e-05, "loss": 0.3763, "step": 13745 }, { "epoch": 0.304171014161465, "grad_norm": 1.800614833831787, "learning_rate": 1.577119649984184e-05, "loss": 0.3441, "step": 13750 }, { "epoch": 0.3042816218029783, "grad_norm": 2.703617811203003, "learning_rate": 1.5768358336432076e-05, "loss": 0.3466, "step": 13755 }, { "epoch": 0.30439222944449157, "grad_norm": 1.6824328899383545, "learning_rate": 1.5765519476494836e-05, "loss": 0.3698, "step": 13760 }, { "epoch": 0.30450283708600484, "grad_norm": 1.5366661548614502, "learning_rate": 1.5762679920372915e-05, "loss": 0.3435, "step": 13765 }, { "epoch": 0.30461344472751806, "grad_norm": 1.8790582418441772, "learning_rate": 1.57598396684092e-05, "loss": 0.358, "step": 13770 }, { "epoch": 0.30472405236903133, "grad_norm": 1.7162216901779175, "learning_rate": 1.5756998720946638e-05, "loss": 0.4308, "step": 13775 }, { "epoch": 0.3048346600105446, "grad_norm": 1.7510640621185303, "learning_rate": 1.575415707832828e-05, "loss": 0.4752, "step": 13780 }, { "epoch": 0.30494526765205787, "grad_norm": 1.0631240606307983, "learning_rate": 1.5751314740897247e-05, "loss": 0.2986, "step": 13785 }, { "epoch": 0.3050558752935711, "grad_norm": 1.466241478919983, "learning_rate": 1.5748471708996752e-05, "loss": 0.5034, "step": 13790 }, { "epoch": 0.30516648293508436, "grad_norm": 1.7726432085037231, "learning_rate": 1.5745627982970095e-05, "loss": 0.3682, "step": 13795 }, { "epoch": 0.30527709057659763, "grad_norm": 1.8647189140319824, "learning_rate": 1.5742783563160647e-05, "loss": 0.4077, "step": 13800 }, { "epoch": 0.3053876982181109, "grad_norm": 2.0099596977233887, "learning_rate": 1.5739938449911883e-05, "loss": 0.2845, "step": 13805 }, { "epoch": 0.3054983058596242, "grad_norm": 0.9772801995277405, "learning_rate": 1.573709264356734e-05, "loss": 0.2935, "step": 13810 }, { "epoch": 0.3056089135011374, "grad_norm": 1.806660532951355, "learning_rate": 1.5734246144470646e-05, "loss": 0.5774, "step": 13815 }, { "epoch": 0.30571952114265066, "grad_norm": 1.3525139093399048, "learning_rate": 1.5731398952965525e-05, "loss": 0.3173, "step": 13820 }, { "epoch": 0.30583012878416393, "grad_norm": 1.418395757675171, "learning_rate": 1.5728551069395766e-05, "loss": 0.3106, "step": 13825 }, { "epoch": 0.3059407364256772, "grad_norm": 1.1749343872070312, "learning_rate": 1.572570249410525e-05, "loss": 0.3771, "step": 13830 }, { "epoch": 0.3060513440671905, "grad_norm": 1.4732428789138794, "learning_rate": 1.5722853227437948e-05, "loss": 0.39, "step": 13835 }, { "epoch": 0.3061619517087037, "grad_norm": 1.309828758239746, "learning_rate": 1.5720003269737903e-05, "loss": 0.3242, "step": 13840 }, { "epoch": 0.30627255935021697, "grad_norm": 1.4859336614608765, "learning_rate": 1.571715262134925e-05, "loss": 0.3419, "step": 13845 }, { "epoch": 0.30638316699173024, "grad_norm": 1.1860994100570679, "learning_rate": 1.57143012826162e-05, "loss": 0.5525, "step": 13850 }, { "epoch": 0.3064937746332435, "grad_norm": 1.5618098974227905, "learning_rate": 1.5711449253883056e-05, "loss": 0.3208, "step": 13855 }, { "epoch": 0.3066043822747568, "grad_norm": 1.2759547233581543, "learning_rate": 1.5708596535494192e-05, "loss": 0.3168, "step": 13860 }, { "epoch": 0.30671498991627, "grad_norm": 0.997388482093811, "learning_rate": 1.570574312779408e-05, "loss": 0.3852, "step": 13865 }, { "epoch": 0.30682559755778327, "grad_norm": 1.5211765766143799, "learning_rate": 1.5702889031127266e-05, "loss": 0.5757, "step": 13870 }, { "epoch": 0.30693620519929654, "grad_norm": 1.3026885986328125, "learning_rate": 1.570003424583838e-05, "loss": 0.4069, "step": 13875 }, { "epoch": 0.3070468128408098, "grad_norm": 1.055686116218567, "learning_rate": 1.5697178772272138e-05, "loss": 0.4341, "step": 13880 }, { "epoch": 0.30715742048232303, "grad_norm": 1.2770236730575562, "learning_rate": 1.5694322610773337e-05, "loss": 0.416, "step": 13885 }, { "epoch": 0.3072680281238363, "grad_norm": 1.2789517641067505, "learning_rate": 1.5691465761686858e-05, "loss": 0.4388, "step": 13890 }, { "epoch": 0.3073786357653496, "grad_norm": 1.3071683645248413, "learning_rate": 1.5688608225357662e-05, "loss": 0.3909, "step": 13895 }, { "epoch": 0.30748924340686284, "grad_norm": 1.1663237810134888, "learning_rate": 1.56857500021308e-05, "loss": 0.3049, "step": 13900 }, { "epoch": 0.3075998510483761, "grad_norm": 1.2676199674606323, "learning_rate": 1.5682891092351396e-05, "loss": 0.3555, "step": 13905 }, { "epoch": 0.30771045868988933, "grad_norm": 1.4323320388793945, "learning_rate": 1.568003149636467e-05, "loss": 0.3691, "step": 13910 }, { "epoch": 0.3078210663314026, "grad_norm": 1.5163992643356323, "learning_rate": 1.567717121451591e-05, "loss": 0.3103, "step": 13915 }, { "epoch": 0.3079316739729159, "grad_norm": 1.7127306461334229, "learning_rate": 1.5674310247150496e-05, "loss": 0.4268, "step": 13920 }, { "epoch": 0.30804228161442915, "grad_norm": 1.0306793451309204, "learning_rate": 1.5671448594613895e-05, "loss": 0.4402, "step": 13925 }, { "epoch": 0.3081528892559424, "grad_norm": 1.351030707359314, "learning_rate": 1.5668586257251644e-05, "loss": 0.3424, "step": 13930 }, { "epoch": 0.30826349689745564, "grad_norm": 1.8211941719055176, "learning_rate": 1.5665723235409373e-05, "loss": 0.3526, "step": 13935 }, { "epoch": 0.3083741045389689, "grad_norm": 1.8601239919662476, "learning_rate": 1.5662859529432783e-05, "loss": 0.4817, "step": 13940 }, { "epoch": 0.3084847121804822, "grad_norm": 1.270913004875183, "learning_rate": 1.5659995139667675e-05, "loss": 0.2987, "step": 13945 }, { "epoch": 0.30859531982199545, "grad_norm": 1.1125677824020386, "learning_rate": 1.5657130066459924e-05, "loss": 0.3591, "step": 13950 }, { "epoch": 0.3087059274635087, "grad_norm": 1.4692506790161133, "learning_rate": 1.5654264310155478e-05, "loss": 0.3004, "step": 13955 }, { "epoch": 0.30881653510502194, "grad_norm": 1.3344823122024536, "learning_rate": 1.565139787110038e-05, "loss": 0.3138, "step": 13960 }, { "epoch": 0.3089271427465352, "grad_norm": 1.4968922138214111, "learning_rate": 1.5648530749640753e-05, "loss": 0.3743, "step": 13965 }, { "epoch": 0.3090377503880485, "grad_norm": 1.1912384033203125, "learning_rate": 1.5645662946122806e-05, "loss": 0.2598, "step": 13970 }, { "epoch": 0.30914835802956175, "grad_norm": 2.3116815090179443, "learning_rate": 1.5642794460892814e-05, "loss": 0.4424, "step": 13975 }, { "epoch": 0.30925896567107497, "grad_norm": 1.1150407791137695, "learning_rate": 1.563992529429715e-05, "loss": 0.4618, "step": 13980 }, { "epoch": 0.30936957331258824, "grad_norm": 1.2981743812561035, "learning_rate": 1.5637055446682272e-05, "loss": 0.2969, "step": 13985 }, { "epoch": 0.3094801809541015, "grad_norm": 1.039984941482544, "learning_rate": 1.5634184918394707e-05, "loss": 0.4577, "step": 13990 }, { "epoch": 0.3095907885956148, "grad_norm": 1.1817786693572998, "learning_rate": 1.563131370978107e-05, "loss": 0.2344, "step": 13995 }, { "epoch": 0.30970139623712806, "grad_norm": 1.5232528448104858, "learning_rate": 1.562844182118806e-05, "loss": 0.3368, "step": 14000 }, { "epoch": 0.3098120038786413, "grad_norm": 1.122971773147583, "learning_rate": 1.5625569252962453e-05, "loss": 0.3563, "step": 14005 }, { "epoch": 0.30992261152015455, "grad_norm": 1.3183584213256836, "learning_rate": 1.5622696005451116e-05, "loss": 0.3472, "step": 14010 }, { "epoch": 0.3100332191616678, "grad_norm": 1.186830997467041, "learning_rate": 1.5619822079000994e-05, "loss": 0.4182, "step": 14015 }, { "epoch": 0.3101438268031811, "grad_norm": 1.3538150787353516, "learning_rate": 1.5616947473959107e-05, "loss": 0.4419, "step": 14020 }, { "epoch": 0.31025443444469436, "grad_norm": 1.5714377164840698, "learning_rate": 1.561407219067256e-05, "loss": 0.3838, "step": 14025 }, { "epoch": 0.3103650420862076, "grad_norm": 1.6658155918121338, "learning_rate": 1.5611196229488554e-05, "loss": 0.3495, "step": 14030 }, { "epoch": 0.31047564972772085, "grad_norm": 1.6114146709442139, "learning_rate": 1.5608319590754353e-05, "loss": 0.334, "step": 14035 }, { "epoch": 0.3105862573692341, "grad_norm": 1.588878870010376, "learning_rate": 1.5605442274817312e-05, "loss": 0.4247, "step": 14040 }, { "epoch": 0.3106968650107474, "grad_norm": 1.636023998260498, "learning_rate": 1.5602564282024865e-05, "loss": 0.3787, "step": 14045 }, { "epoch": 0.31080747265226066, "grad_norm": 1.483156681060791, "learning_rate": 1.5599685612724526e-05, "loss": 0.4629, "step": 14050 }, { "epoch": 0.3109180802937739, "grad_norm": 1.3146218061447144, "learning_rate": 1.55968062672639e-05, "loss": 0.429, "step": 14055 }, { "epoch": 0.31102868793528715, "grad_norm": 1.216851830482483, "learning_rate": 1.559392624599066e-05, "loss": 0.4803, "step": 14060 }, { "epoch": 0.3111392955768004, "grad_norm": 1.6199923753738403, "learning_rate": 1.5591045549252575e-05, "loss": 0.5137, "step": 14065 }, { "epoch": 0.3112499032183137, "grad_norm": 1.4441434144973755, "learning_rate": 1.5588164177397484e-05, "loss": 0.2968, "step": 14070 }, { "epoch": 0.3113605108598269, "grad_norm": 1.2282373905181885, "learning_rate": 1.558528213077331e-05, "loss": 0.3849, "step": 14075 }, { "epoch": 0.3114711185013402, "grad_norm": 1.5954248905181885, "learning_rate": 1.5582399409728064e-05, "loss": 0.3427, "step": 14080 }, { "epoch": 0.31158172614285345, "grad_norm": 1.410377025604248, "learning_rate": 1.557951601460983e-05, "loss": 0.4342, "step": 14085 }, { "epoch": 0.3116923337843667, "grad_norm": 0.8015835285186768, "learning_rate": 1.5576631945766778e-05, "loss": 0.268, "step": 14090 }, { "epoch": 0.31180294142588, "grad_norm": 1.2955851554870605, "learning_rate": 1.557374720354716e-05, "loss": 0.4818, "step": 14095 }, { "epoch": 0.3119135490673932, "grad_norm": 1.4562245607376099, "learning_rate": 1.5570861788299303e-05, "loss": 0.4349, "step": 14100 }, { "epoch": 0.3120241567089065, "grad_norm": 1.0485109090805054, "learning_rate": 1.5567975700371624e-05, "loss": 0.3322, "step": 14105 }, { "epoch": 0.31213476435041976, "grad_norm": 1.535290241241455, "learning_rate": 1.556508894011262e-05, "loss": 0.4347, "step": 14110 }, { "epoch": 0.31224537199193303, "grad_norm": 1.2728219032287598, "learning_rate": 1.5562201507870863e-05, "loss": 0.4308, "step": 14115 }, { "epoch": 0.3123559796334463, "grad_norm": 1.1357102394104004, "learning_rate": 1.5559313403995004e-05, "loss": 0.4167, "step": 14120 }, { "epoch": 0.3124665872749595, "grad_norm": 2.0356595516204834, "learning_rate": 1.5556424628833792e-05, "loss": 0.2883, "step": 14125 }, { "epoch": 0.3125771949164728, "grad_norm": 1.2557415962219238, "learning_rate": 1.5553535182736034e-05, "loss": 0.3299, "step": 14130 }, { "epoch": 0.31268780255798606, "grad_norm": 2.281773328781128, "learning_rate": 1.555064506605064e-05, "loss": 0.3566, "step": 14135 }, { "epoch": 0.31279841019949933, "grad_norm": 2.76427960395813, "learning_rate": 1.5547754279126583e-05, "loss": 0.3784, "step": 14140 }, { "epoch": 0.31290901784101255, "grad_norm": 3.282555103302002, "learning_rate": 1.5544862822312928e-05, "loss": 0.385, "step": 14145 }, { "epoch": 0.3130196254825258, "grad_norm": 0.5302538871765137, "learning_rate": 1.554197069595882e-05, "loss": 0.3301, "step": 14150 }, { "epoch": 0.3131302331240391, "grad_norm": 1.3161354064941406, "learning_rate": 1.553907790041348e-05, "loss": 0.3394, "step": 14155 }, { "epoch": 0.31324084076555236, "grad_norm": 1.696223258972168, "learning_rate": 1.553618443602621e-05, "loss": 0.3906, "step": 14160 }, { "epoch": 0.31335144840706564, "grad_norm": 1.1076326370239258, "learning_rate": 1.55332903031464e-05, "loss": 0.254, "step": 14165 }, { "epoch": 0.31346205604857885, "grad_norm": 1.1300218105316162, "learning_rate": 1.5530395502123505e-05, "loss": 0.564, "step": 14170 }, { "epoch": 0.3135726636900921, "grad_norm": 1.1017028093338013, "learning_rate": 1.5527500033307084e-05, "loss": 0.3067, "step": 14175 }, { "epoch": 0.3136832713316054, "grad_norm": 1.5717464685440063, "learning_rate": 1.552460389704676e-05, "loss": 0.46, "step": 14180 }, { "epoch": 0.31379387897311867, "grad_norm": 1.572155237197876, "learning_rate": 1.552170709369224e-05, "loss": 0.417, "step": 14185 }, { "epoch": 0.31390448661463194, "grad_norm": 2.0199227333068848, "learning_rate": 1.5518809623593305e-05, "loss": 0.3965, "step": 14190 }, { "epoch": 0.31401509425614516, "grad_norm": 1.5915358066558838, "learning_rate": 1.5515911487099834e-05, "loss": 0.4452, "step": 14195 }, { "epoch": 0.3141257018976584, "grad_norm": 2.0619289875030518, "learning_rate": 1.551301268456177e-05, "loss": 0.3505, "step": 14200 }, { "epoch": 0.3142363095391717, "grad_norm": 2.3304038047790527, "learning_rate": 1.5510113216329146e-05, "loss": 0.4067, "step": 14205 }, { "epoch": 0.31434691718068497, "grad_norm": 1.0460174083709717, "learning_rate": 1.550721308275207e-05, "loss": 0.2931, "step": 14210 }, { "epoch": 0.31445752482219824, "grad_norm": 1.226164698600769, "learning_rate": 1.5504312284180736e-05, "loss": 0.5607, "step": 14215 }, { "epoch": 0.31456813246371146, "grad_norm": 0.8898358345031738, "learning_rate": 1.5501410820965408e-05, "loss": 0.3131, "step": 14220 }, { "epoch": 0.31467874010522473, "grad_norm": 1.5049315690994263, "learning_rate": 1.5498508693456437e-05, "loss": 0.322, "step": 14225 }, { "epoch": 0.314789347746738, "grad_norm": 1.2298288345336914, "learning_rate": 1.5495605902004262e-05, "loss": 0.4105, "step": 14230 }, { "epoch": 0.3148999553882513, "grad_norm": 1.212738037109375, "learning_rate": 1.5492702446959388e-05, "loss": 0.3089, "step": 14235 }, { "epoch": 0.3150105630297645, "grad_norm": 1.5726613998413086, "learning_rate": 1.5489798328672405e-05, "loss": 0.3966, "step": 14240 }, { "epoch": 0.31512117067127776, "grad_norm": 1.0106443166732788, "learning_rate": 1.548689354749399e-05, "loss": 0.1955, "step": 14245 }, { "epoch": 0.31523177831279103, "grad_norm": 1.1256396770477295, "learning_rate": 1.548398810377489e-05, "loss": 0.3516, "step": 14250 }, { "epoch": 0.3153423859543043, "grad_norm": 1.0127840042114258, "learning_rate": 1.5481081997865935e-05, "loss": 0.4492, "step": 14255 }, { "epoch": 0.3154529935958176, "grad_norm": 1.5958231687545776, "learning_rate": 1.547817523011804e-05, "loss": 0.3641, "step": 14260 }, { "epoch": 0.3155636012373308, "grad_norm": 0.5532341003417969, "learning_rate": 1.54752678008822e-05, "loss": 0.3447, "step": 14265 }, { "epoch": 0.31567420887884406, "grad_norm": 1.339255690574646, "learning_rate": 1.5472359710509473e-05, "loss": 0.4705, "step": 14270 }, { "epoch": 0.31578481652035734, "grad_norm": 1.6437129974365234, "learning_rate": 1.5469450959351025e-05, "loss": 0.3415, "step": 14275 }, { "epoch": 0.3158954241618706, "grad_norm": 0.7050947546958923, "learning_rate": 1.546654154775808e-05, "loss": 0.2831, "step": 14280 }, { "epoch": 0.3160060318033839, "grad_norm": 1.2983628511428833, "learning_rate": 1.5463631476081946e-05, "loss": 0.385, "step": 14285 }, { "epoch": 0.3161166394448971, "grad_norm": 1.4567885398864746, "learning_rate": 1.5460720744674017e-05, "loss": 0.2813, "step": 14290 }, { "epoch": 0.31622724708641037, "grad_norm": 1.0970953702926636, "learning_rate": 1.5457809353885762e-05, "loss": 0.3801, "step": 14295 }, { "epoch": 0.31633785472792364, "grad_norm": 0.7975950837135315, "learning_rate": 1.5454897304068733e-05, "loss": 0.4439, "step": 14300 }, { "epoch": 0.3164484623694369, "grad_norm": 2.5761969089508057, "learning_rate": 1.5451984595574554e-05, "loss": 0.3212, "step": 14305 }, { "epoch": 0.3165590700109502, "grad_norm": 1.393101453781128, "learning_rate": 1.5449071228754934e-05, "loss": 0.4041, "step": 14310 }, { "epoch": 0.3166696776524634, "grad_norm": 1.753588080406189, "learning_rate": 1.5446157203961668e-05, "loss": 0.3448, "step": 14315 }, { "epoch": 0.31678028529397667, "grad_norm": 1.423006296157837, "learning_rate": 1.544324252154662e-05, "loss": 0.354, "step": 14320 }, { "epoch": 0.31689089293548994, "grad_norm": 1.2246172428131104, "learning_rate": 1.544032718186173e-05, "loss": 0.4934, "step": 14325 }, { "epoch": 0.3170015005770032, "grad_norm": 1.8330967426300049, "learning_rate": 1.5437411185259034e-05, "loss": 0.3196, "step": 14330 }, { "epoch": 0.31711210821851643, "grad_norm": 0.9807466864585876, "learning_rate": 1.543449453209063e-05, "loss": 0.419, "step": 14335 }, { "epoch": 0.3172227158600297, "grad_norm": 1.6355793476104736, "learning_rate": 1.5431577222708713e-05, "loss": 0.4041, "step": 14340 }, { "epoch": 0.317333323501543, "grad_norm": 1.7640920877456665, "learning_rate": 1.5428659257465535e-05, "loss": 0.3317, "step": 14345 }, { "epoch": 0.31744393114305625, "grad_norm": 0.9216453433036804, "learning_rate": 1.542574063671345e-05, "loss": 0.3928, "step": 14350 }, { "epoch": 0.3175545387845695, "grad_norm": 1.408515214920044, "learning_rate": 1.5422821360804876e-05, "loss": 0.3584, "step": 14355 }, { "epoch": 0.31766514642608273, "grad_norm": 1.681222915649414, "learning_rate": 1.541990143009231e-05, "loss": 0.5397, "step": 14360 }, { "epoch": 0.317775754067596, "grad_norm": 1.3175808191299438, "learning_rate": 1.5416980844928345e-05, "loss": 0.4358, "step": 14365 }, { "epoch": 0.3178863617091093, "grad_norm": 1.6715810298919678, "learning_rate": 1.5414059605665627e-05, "loss": 0.3667, "step": 14370 }, { "epoch": 0.31799696935062255, "grad_norm": 1.3325188159942627, "learning_rate": 1.5411137712656903e-05, "loss": 0.3478, "step": 14375 }, { "epoch": 0.3181075769921358, "grad_norm": 1.1466457843780518, "learning_rate": 1.540821516625499e-05, "loss": 0.4264, "step": 14380 }, { "epoch": 0.31821818463364904, "grad_norm": 2.015427589416504, "learning_rate": 1.540529196681278e-05, "loss": 0.3834, "step": 14385 }, { "epoch": 0.3183287922751623, "grad_norm": 1.9394084215164185, "learning_rate": 1.5402368114683253e-05, "loss": 0.3508, "step": 14390 }, { "epoch": 0.3184393999166756, "grad_norm": 1.6562700271606445, "learning_rate": 1.5399443610219464e-05, "loss": 0.3002, "step": 14395 }, { "epoch": 0.31855000755818885, "grad_norm": 2.158621311187744, "learning_rate": 1.539651845377454e-05, "loss": 0.4265, "step": 14400 }, { "epoch": 0.3186606151997021, "grad_norm": 1.047662377357483, "learning_rate": 1.53935926457017e-05, "loss": 0.3804, "step": 14405 }, { "epoch": 0.31877122284121534, "grad_norm": 1.8281711339950562, "learning_rate": 1.539066618635423e-05, "loss": 0.4268, "step": 14410 }, { "epoch": 0.3188818304827286, "grad_norm": 2.021919012069702, "learning_rate": 1.53877390760855e-05, "loss": 0.3348, "step": 14415 }, { "epoch": 0.3189924381242419, "grad_norm": 2.1780598163604736, "learning_rate": 1.5384811315248955e-05, "loss": 0.5511, "step": 14420 }, { "epoch": 0.31910304576575516, "grad_norm": 1.2507044076919556, "learning_rate": 1.5381882904198127e-05, "loss": 0.3778, "step": 14425 }, { "epoch": 0.31921365340726837, "grad_norm": 1.3155583143234253, "learning_rate": 1.5378953843286616e-05, "loss": 0.5417, "step": 14430 }, { "epoch": 0.31932426104878164, "grad_norm": 1.4348222017288208, "learning_rate": 1.5376024132868108e-05, "loss": 0.3587, "step": 14435 }, { "epoch": 0.3194348686902949, "grad_norm": 1.211649775505066, "learning_rate": 1.5373093773296362e-05, "loss": 0.2967, "step": 14440 }, { "epoch": 0.3195454763318082, "grad_norm": 0.9934759140014648, "learning_rate": 1.537016276492522e-05, "loss": 0.3749, "step": 14445 }, { "epoch": 0.31965608397332146, "grad_norm": 1.0316680669784546, "learning_rate": 1.53672311081086e-05, "loss": 0.451, "step": 14450 }, { "epoch": 0.3197666916148347, "grad_norm": 0.9643877148628235, "learning_rate": 1.53642988032005e-05, "loss": 0.2686, "step": 14455 }, { "epoch": 0.31987729925634795, "grad_norm": 1.6563833951950073, "learning_rate": 1.536136585055499e-05, "loss": 0.5467, "step": 14460 }, { "epoch": 0.3199879068978612, "grad_norm": 1.5543771982192993, "learning_rate": 1.5358432250526226e-05, "loss": 0.4789, "step": 14465 }, { "epoch": 0.3200985145393745, "grad_norm": 1.513300895690918, "learning_rate": 1.5355498003468444e-05, "loss": 0.2699, "step": 14470 }, { "epoch": 0.32020912218088776, "grad_norm": 0.8414377570152283, "learning_rate": 1.535256310973595e-05, "loss": 0.3243, "step": 14475 }, { "epoch": 0.320319729822401, "grad_norm": 1.5707560777664185, "learning_rate": 1.5349627569683122e-05, "loss": 0.2812, "step": 14480 }, { "epoch": 0.32043033746391425, "grad_norm": 1.4508510828018188, "learning_rate": 1.5346691383664443e-05, "loss": 0.3635, "step": 14485 }, { "epoch": 0.3205409451054275, "grad_norm": 1.2406642436981201, "learning_rate": 1.5343754552034443e-05, "loss": 0.3074, "step": 14490 }, { "epoch": 0.3206515527469408, "grad_norm": 1.176429271697998, "learning_rate": 1.534081707514775e-05, "loss": 0.4403, "step": 14495 }, { "epoch": 0.320762160388454, "grad_norm": 1.3579981327056885, "learning_rate": 1.5337878953359063e-05, "loss": 0.404, "step": 14500 }, { "epoch": 0.3208727680299673, "grad_norm": 1.3602241277694702, "learning_rate": 1.5334940187023154e-05, "loss": 0.43, "step": 14505 }, { "epoch": 0.32098337567148055, "grad_norm": 1.5829514265060425, "learning_rate": 1.5332000776494888e-05, "loss": 0.396, "step": 14510 }, { "epoch": 0.3210939833129938, "grad_norm": 1.761852502822876, "learning_rate": 1.5329060722129187e-05, "loss": 0.4251, "step": 14515 }, { "epoch": 0.3212045909545071, "grad_norm": 1.9784876108169556, "learning_rate": 1.5326120024281067e-05, "loss": 0.3988, "step": 14520 }, { "epoch": 0.3213151985960203, "grad_norm": 1.320387601852417, "learning_rate": 1.5323178683305615e-05, "loss": 0.3791, "step": 14525 }, { "epoch": 0.3214258062375336, "grad_norm": 0.7565842270851135, "learning_rate": 1.5320236699558e-05, "loss": 0.5561, "step": 14530 }, { "epoch": 0.32153641387904686, "grad_norm": 1.1842395067214966, "learning_rate": 1.5317294073393464e-05, "loss": 0.4649, "step": 14535 }, { "epoch": 0.32164702152056013, "grad_norm": 1.6737161874771118, "learning_rate": 1.5314350805167328e-05, "loss": 0.243, "step": 14540 }, { "epoch": 0.3217576291620734, "grad_norm": 1.4825513362884521, "learning_rate": 1.5311406895234987e-05, "loss": 0.2909, "step": 14545 }, { "epoch": 0.3218682368035866, "grad_norm": 1.6713826656341553, "learning_rate": 1.530846234395192e-05, "loss": 0.3956, "step": 14550 }, { "epoch": 0.3219788444450999, "grad_norm": 1.281362771987915, "learning_rate": 1.5305517151673688e-05, "loss": 0.4396, "step": 14555 }, { "epoch": 0.32208945208661316, "grad_norm": 1.1501697301864624, "learning_rate": 1.5302571318755912e-05, "loss": 0.3501, "step": 14560 }, { "epoch": 0.32220005972812643, "grad_norm": 0.9663934111595154, "learning_rate": 1.5299624845554303e-05, "loss": 0.4286, "step": 14565 }, { "epoch": 0.3223106673696397, "grad_norm": 1.831695556640625, "learning_rate": 1.529667773242465e-05, "loss": 0.2928, "step": 14570 }, { "epoch": 0.3224212750111529, "grad_norm": 1.1954333782196045, "learning_rate": 1.5293729979722808e-05, "loss": 0.4422, "step": 14575 }, { "epoch": 0.3225318826526662, "grad_norm": 1.5938913822174072, "learning_rate": 1.5290781587804725e-05, "loss": 0.4061, "step": 14580 }, { "epoch": 0.32264249029417946, "grad_norm": 1.9328535795211792, "learning_rate": 1.528783255702642e-05, "loss": 0.3381, "step": 14585 }, { "epoch": 0.32275309793569273, "grad_norm": 1.2220349311828613, "learning_rate": 1.528488288774398e-05, "loss": 0.3944, "step": 14590 }, { "epoch": 0.32286370557720595, "grad_norm": 0.8086104989051819, "learning_rate": 1.528193258031358e-05, "loss": 0.2465, "step": 14595 }, { "epoch": 0.3229743132187192, "grad_norm": 1.4957777261734009, "learning_rate": 1.5278981635091472e-05, "loss": 0.3763, "step": 14600 }, { "epoch": 0.3230849208602325, "grad_norm": 1.1681627035140991, "learning_rate": 1.5276030052433978e-05, "loss": 0.5411, "step": 14605 }, { "epoch": 0.32319552850174577, "grad_norm": 1.622413992881775, "learning_rate": 1.52730778326975e-05, "loss": 0.5189, "step": 14610 }, { "epoch": 0.32330613614325904, "grad_norm": 0.8401442766189575, "learning_rate": 1.527012497623852e-05, "loss": 0.296, "step": 14615 }, { "epoch": 0.32341674378477225, "grad_norm": 1.4295400381088257, "learning_rate": 1.5267171483413594e-05, "loss": 0.2147, "step": 14620 }, { "epoch": 0.3235273514262855, "grad_norm": 1.8404512405395508, "learning_rate": 1.526421735457935e-05, "loss": 0.3728, "step": 14625 }, { "epoch": 0.3236379590677988, "grad_norm": 1.2711983919143677, "learning_rate": 1.5261262590092508e-05, "loss": 0.405, "step": 14630 }, { "epoch": 0.32374856670931207, "grad_norm": 1.5661524534225464, "learning_rate": 1.5258307190309852e-05, "loss": 0.4243, "step": 14635 }, { "epoch": 0.32385917435082534, "grad_norm": 1.9366601705551147, "learning_rate": 1.5255351155588239e-05, "loss": 0.3372, "step": 14640 }, { "epoch": 0.32396978199233856, "grad_norm": 1.478554368019104, "learning_rate": 1.5252394486284614e-05, "loss": 0.2519, "step": 14645 }, { "epoch": 0.32408038963385183, "grad_norm": 1.5870856046676636, "learning_rate": 1.5249437182755994e-05, "loss": 0.3563, "step": 14650 }, { "epoch": 0.3241909972753651, "grad_norm": 1.9128881692886353, "learning_rate": 1.5246479245359473e-05, "loss": 0.4528, "step": 14655 }, { "epoch": 0.32430160491687837, "grad_norm": 1.1710853576660156, "learning_rate": 1.5243520674452219e-05, "loss": 0.2991, "step": 14660 }, { "epoch": 0.32441221255839164, "grad_norm": 1.7334476709365845, "learning_rate": 1.5240561470391481e-05, "loss": 0.469, "step": 14665 }, { "epoch": 0.32452282019990486, "grad_norm": 1.3288002014160156, "learning_rate": 1.5237601633534582e-05, "loss": 0.2524, "step": 14670 }, { "epoch": 0.32463342784141813, "grad_norm": 1.4144055843353271, "learning_rate": 1.5234641164238916e-05, "loss": 0.2646, "step": 14675 }, { "epoch": 0.3247440354829314, "grad_norm": 1.4253031015396118, "learning_rate": 1.5231680062861966e-05, "loss": 0.4301, "step": 14680 }, { "epoch": 0.3248546431244447, "grad_norm": 1.215436339378357, "learning_rate": 1.5228718329761281e-05, "loss": 0.3303, "step": 14685 }, { "epoch": 0.3249652507659579, "grad_norm": 1.1650102138519287, "learning_rate": 1.5225755965294489e-05, "loss": 0.3626, "step": 14690 }, { "epoch": 0.32507585840747116, "grad_norm": 1.394532561302185, "learning_rate": 1.5222792969819291e-05, "loss": 0.4697, "step": 14695 }, { "epoch": 0.32518646604898443, "grad_norm": 0.857699453830719, "learning_rate": 1.5219829343693475e-05, "loss": 0.3393, "step": 14700 }, { "epoch": 0.3252970736904977, "grad_norm": 0.9695771932601929, "learning_rate": 1.5216865087274894e-05, "loss": 0.328, "step": 14705 }, { "epoch": 0.325407681332011, "grad_norm": 1.1862791776657104, "learning_rate": 1.521390020092148e-05, "loss": 0.4299, "step": 14710 }, { "epoch": 0.3255182889735242, "grad_norm": 1.629356861114502, "learning_rate": 1.5210934684991245e-05, "loss": 0.3838, "step": 14715 }, { "epoch": 0.32562889661503747, "grad_norm": 1.5664920806884766, "learning_rate": 1.5207968539842273e-05, "loss": 0.3367, "step": 14720 }, { "epoch": 0.32573950425655074, "grad_norm": 1.0815625190734863, "learning_rate": 1.5205001765832724e-05, "loss": 0.2512, "step": 14725 }, { "epoch": 0.325850111898064, "grad_norm": 1.5376205444335938, "learning_rate": 1.5202034363320835e-05, "loss": 0.4403, "step": 14730 }, { "epoch": 0.3259607195395773, "grad_norm": 0.81709885597229, "learning_rate": 1.5199066332664923e-05, "loss": 0.4024, "step": 14735 }, { "epoch": 0.3260713271810905, "grad_norm": 1.313712477684021, "learning_rate": 1.519609767422337e-05, "loss": 0.3815, "step": 14740 }, { "epoch": 0.32618193482260377, "grad_norm": 1.363387107849121, "learning_rate": 1.5193128388354645e-05, "loss": 0.3117, "step": 14745 }, { "epoch": 0.32629254246411704, "grad_norm": 1.5907191038131714, "learning_rate": 1.519015847541729e-05, "loss": 0.3084, "step": 14750 }, { "epoch": 0.3264031501056303, "grad_norm": 0.8666683435440063, "learning_rate": 1.5187187935769917e-05, "loss": 0.4263, "step": 14755 }, { "epoch": 0.3265137577471436, "grad_norm": 1.4951146841049194, "learning_rate": 1.5184216769771216e-05, "loss": 0.4175, "step": 14760 }, { "epoch": 0.3266243653886568, "grad_norm": 1.4107425212860107, "learning_rate": 1.518124497777996e-05, "loss": 0.452, "step": 14765 }, { "epoch": 0.3267349730301701, "grad_norm": 1.3244667053222656, "learning_rate": 1.5178272560154992e-05, "loss": 0.4326, "step": 14770 }, { "epoch": 0.32684558067168334, "grad_norm": 1.389815092086792, "learning_rate": 1.5175299517255227e-05, "loss": 0.4999, "step": 14775 }, { "epoch": 0.3269561883131966, "grad_norm": 1.2653944492340088, "learning_rate": 1.5172325849439658e-05, "loss": 0.442, "step": 14780 }, { "epoch": 0.32706679595470983, "grad_norm": 1.4293169975280762, "learning_rate": 1.5169351557067357e-05, "loss": 0.4545, "step": 14785 }, { "epoch": 0.3271774035962231, "grad_norm": 1.3028392791748047, "learning_rate": 1.516637664049747e-05, "loss": 0.3304, "step": 14790 }, { "epoch": 0.3272880112377364, "grad_norm": 1.3288618326187134, "learning_rate": 1.5163401100089217e-05, "loss": 0.2526, "step": 14795 }, { "epoch": 0.32739861887924965, "grad_norm": 1.4042491912841797, "learning_rate": 1.516042493620189e-05, "loss": 0.4463, "step": 14800 }, { "epoch": 0.3275092265207629, "grad_norm": 1.3408629894256592, "learning_rate": 1.515744814919486e-05, "loss": 0.355, "step": 14805 }, { "epoch": 0.32761983416227614, "grad_norm": 0.8316897749900818, "learning_rate": 1.5154470739427577e-05, "loss": 0.3292, "step": 14810 }, { "epoch": 0.3277304418037894, "grad_norm": 0.8816894292831421, "learning_rate": 1.5151492707259564e-05, "loss": 0.3513, "step": 14815 }, { "epoch": 0.3278410494453027, "grad_norm": 1.4437127113342285, "learning_rate": 1.514851405305041e-05, "loss": 0.3151, "step": 14820 }, { "epoch": 0.32795165708681595, "grad_norm": 1.8342268466949463, "learning_rate": 1.5145534777159792e-05, "loss": 0.4281, "step": 14825 }, { "epoch": 0.3280622647283292, "grad_norm": 1.29441237449646, "learning_rate": 1.5142554879947455e-05, "loss": 0.3323, "step": 14830 }, { "epoch": 0.32817287236984244, "grad_norm": 1.2054744958877563, "learning_rate": 1.5139574361773219e-05, "loss": 0.455, "step": 14835 }, { "epoch": 0.3282834800113557, "grad_norm": 1.586668848991394, "learning_rate": 1.5136593222996984e-05, "loss": 0.3232, "step": 14840 }, { "epoch": 0.328394087652869, "grad_norm": 1.260347843170166, "learning_rate": 1.5133611463978721e-05, "loss": 0.4546, "step": 14845 }, { "epoch": 0.32850469529438225, "grad_norm": 1.8130908012390137, "learning_rate": 1.5130629085078474e-05, "loss": 0.2888, "step": 14850 }, { "epoch": 0.32861530293589547, "grad_norm": 2.0248329639434814, "learning_rate": 1.512764608665637e-05, "loss": 0.2682, "step": 14855 }, { "epoch": 0.32872591057740874, "grad_norm": 1.6004464626312256, "learning_rate": 1.5124662469072594e-05, "loss": 0.291, "step": 14860 }, { "epoch": 0.328836518218922, "grad_norm": 1.4384435415267944, "learning_rate": 1.512167823268743e-05, "loss": 0.3702, "step": 14865 }, { "epoch": 0.3289471258604353, "grad_norm": 1.3705341815948486, "learning_rate": 1.5118693377861214e-05, "loss": 0.2901, "step": 14870 }, { "epoch": 0.32905773350194856, "grad_norm": 1.3670448064804077, "learning_rate": 1.511570790495437e-05, "loss": 0.4572, "step": 14875 }, { "epoch": 0.3291683411434618, "grad_norm": 1.6075677871704102, "learning_rate": 1.5112721814327393e-05, "loss": 0.4624, "step": 14880 }, { "epoch": 0.32927894878497505, "grad_norm": 2.081094264984131, "learning_rate": 1.5109735106340852e-05, "loss": 0.2928, "step": 14885 }, { "epoch": 0.3293895564264883, "grad_norm": 1.7985856533050537, "learning_rate": 1.5106747781355392e-05, "loss": 0.3747, "step": 14890 }, { "epoch": 0.3295001640680016, "grad_norm": 1.4107673168182373, "learning_rate": 1.510375983973173e-05, "loss": 0.2806, "step": 14895 }, { "epoch": 0.32961077170951486, "grad_norm": 1.4547263383865356, "learning_rate": 1.5100771281830664e-05, "loss": 0.2762, "step": 14900 }, { "epoch": 0.3297213793510281, "grad_norm": 1.2970472574234009, "learning_rate": 1.5097782108013052e-05, "loss": 0.2316, "step": 14905 }, { "epoch": 0.32983198699254135, "grad_norm": 1.4691580533981323, "learning_rate": 1.5094792318639844e-05, "loss": 0.543, "step": 14910 }, { "epoch": 0.3299425946340546, "grad_norm": 1.0681447982788086, "learning_rate": 1.509180191407205e-05, "loss": 0.1708, "step": 14915 }, { "epoch": 0.3300532022755679, "grad_norm": 1.366597294807434, "learning_rate": 1.5088810894670767e-05, "loss": 0.2134, "step": 14920 }, { "epoch": 0.33016380991708116, "grad_norm": 1.7230712175369263, "learning_rate": 1.5085819260797154e-05, "loss": 0.3452, "step": 14925 }, { "epoch": 0.3302744175585944, "grad_norm": 1.4507865905761719, "learning_rate": 1.5082827012812455e-05, "loss": 0.5546, "step": 14930 }, { "epoch": 0.33038502520010765, "grad_norm": 1.4737211465835571, "learning_rate": 1.507983415107798e-05, "loss": 0.2436, "step": 14935 }, { "epoch": 0.3304956328416209, "grad_norm": 1.1051440238952637, "learning_rate": 1.5076840675955113e-05, "loss": 0.5153, "step": 14940 }, { "epoch": 0.3306062404831342, "grad_norm": 1.0280061960220337, "learning_rate": 1.507384658780532e-05, "loss": 0.3668, "step": 14945 }, { "epoch": 0.3307168481246474, "grad_norm": 0.760373055934906, "learning_rate": 1.5070851886990135e-05, "loss": 0.5248, "step": 14950 }, { "epoch": 0.3308274557661607, "grad_norm": 1.3347617387771606, "learning_rate": 1.5067856573871168e-05, "loss": 0.325, "step": 14955 }, { "epoch": 0.33093806340767395, "grad_norm": 1.8679149150848389, "learning_rate": 1.5064860648810098e-05, "loss": 0.3955, "step": 14960 }, { "epoch": 0.3310486710491872, "grad_norm": 0.9966967701911926, "learning_rate": 1.5061864112168687e-05, "loss": 0.3391, "step": 14965 }, { "epoch": 0.3311592786907005, "grad_norm": 1.8246816396713257, "learning_rate": 1.505886696430876e-05, "loss": 0.3711, "step": 14970 }, { "epoch": 0.3312698863322137, "grad_norm": 1.182856559753418, "learning_rate": 1.5055869205592231e-05, "loss": 0.4282, "step": 14975 }, { "epoch": 0.331380493973727, "grad_norm": 1.0628403425216675, "learning_rate": 1.505287083638107e-05, "loss": 0.2594, "step": 14980 }, { "epoch": 0.33149110161524026, "grad_norm": 1.537427306175232, "learning_rate": 1.5049871857037333e-05, "loss": 0.4483, "step": 14985 }, { "epoch": 0.33160170925675353, "grad_norm": 1.1223772764205933, "learning_rate": 1.5046872267923145e-05, "loss": 0.3949, "step": 14990 }, { "epoch": 0.3317123168982668, "grad_norm": 1.6198161840438843, "learning_rate": 1.5043872069400706e-05, "loss": 0.4798, "step": 14995 }, { "epoch": 0.33182292453978, "grad_norm": 0.7976606488227844, "learning_rate": 1.5040871261832288e-05, "loss": 0.4235, "step": 15000 }, { "epoch": 0.3319335321812933, "grad_norm": 1.4939112663269043, "learning_rate": 1.5037869845580239e-05, "loss": 0.4608, "step": 15005 }, { "epoch": 0.33204413982280656, "grad_norm": 1.5135420560836792, "learning_rate": 1.5034867821006978e-05, "loss": 0.5889, "step": 15010 }, { "epoch": 0.33215474746431983, "grad_norm": 0.9539763927459717, "learning_rate": 1.5031865188474997e-05, "loss": 0.3253, "step": 15015 }, { "epoch": 0.3322653551058331, "grad_norm": 1.0498316287994385, "learning_rate": 1.5028861948346864e-05, "loss": 0.3979, "step": 15020 }, { "epoch": 0.3323759627473463, "grad_norm": 1.4617416858673096, "learning_rate": 1.502585810098522e-05, "loss": 0.4182, "step": 15025 }, { "epoch": 0.3324865703888596, "grad_norm": 1.0921227931976318, "learning_rate": 1.5022853646752779e-05, "loss": 0.39, "step": 15030 }, { "epoch": 0.33259717803037286, "grad_norm": 0.929851233959198, "learning_rate": 1.5019848586012329e-05, "loss": 0.2449, "step": 15035 }, { "epoch": 0.33270778567188614, "grad_norm": 2.229644298553467, "learning_rate": 1.5016842919126728e-05, "loss": 0.3887, "step": 15040 }, { "epoch": 0.33281839331339935, "grad_norm": 1.1272032260894775, "learning_rate": 1.5013836646458908e-05, "loss": 0.4653, "step": 15045 }, { "epoch": 0.3329290009549126, "grad_norm": 1.2571130990982056, "learning_rate": 1.5010829768371879e-05, "loss": 0.4127, "step": 15050 }, { "epoch": 0.3330396085964259, "grad_norm": 1.059865117073059, "learning_rate": 1.500782228522872e-05, "loss": 0.3794, "step": 15055 }, { "epoch": 0.33315021623793917, "grad_norm": 1.0595335960388184, "learning_rate": 1.5004814197392579e-05, "loss": 0.3733, "step": 15060 }, { "epoch": 0.33326082387945244, "grad_norm": 1.0637463331222534, "learning_rate": 1.5001805505226687e-05, "loss": 0.41, "step": 15065 }, { "epoch": 0.33337143152096566, "grad_norm": 1.7714707851409912, "learning_rate": 1.4998796209094342e-05, "loss": 0.2842, "step": 15070 }, { "epoch": 0.3334820391624789, "grad_norm": 1.1733161211013794, "learning_rate": 1.4995786309358916e-05, "loss": 0.3452, "step": 15075 }, { "epoch": 0.3335926468039922, "grad_norm": 0.9889556765556335, "learning_rate": 1.4992775806383848e-05, "loss": 0.2461, "step": 15080 }, { "epoch": 0.33370325444550547, "grad_norm": 1.1160647869110107, "learning_rate": 1.4989764700532661e-05, "loss": 0.4248, "step": 15085 }, { "epoch": 0.33381386208701874, "grad_norm": 1.420602798461914, "learning_rate": 1.4986752992168945e-05, "loss": 0.3449, "step": 15090 }, { "epoch": 0.33392446972853196, "grad_norm": 1.4641212224960327, "learning_rate": 1.4983740681656357e-05, "loss": 0.3294, "step": 15095 }, { "epoch": 0.33403507737004523, "grad_norm": 1.9180065393447876, "learning_rate": 1.4980727769358641e-05, "loss": 0.3756, "step": 15100 }, { "epoch": 0.3341456850115585, "grad_norm": 1.3677024841308594, "learning_rate": 1.4977714255639599e-05, "loss": 0.3498, "step": 15105 }, { "epoch": 0.3342562926530718, "grad_norm": 1.1993868350982666, "learning_rate": 1.4974700140863115e-05, "loss": 0.286, "step": 15110 }, { "epoch": 0.33436690029458505, "grad_norm": 1.3680723905563354, "learning_rate": 1.497168542539314e-05, "loss": 0.4339, "step": 15115 }, { "epoch": 0.33447750793609826, "grad_norm": 1.1248645782470703, "learning_rate": 1.4968670109593701e-05, "loss": 0.4708, "step": 15120 }, { "epoch": 0.33458811557761153, "grad_norm": 1.4038299322128296, "learning_rate": 1.4965654193828898e-05, "loss": 0.353, "step": 15125 }, { "epoch": 0.3346987232191248, "grad_norm": 1.8783737421035767, "learning_rate": 1.4962637678462899e-05, "loss": 0.3656, "step": 15130 }, { "epoch": 0.3348093308606381, "grad_norm": 1.5924773216247559, "learning_rate": 1.495962056385995e-05, "loss": 0.438, "step": 15135 }, { "epoch": 0.3349199385021513, "grad_norm": 1.231215476989746, "learning_rate": 1.4956602850384362e-05, "loss": 0.2513, "step": 15140 }, { "epoch": 0.33503054614366456, "grad_norm": 0.9973697662353516, "learning_rate": 1.4953584538400531e-05, "loss": 0.2455, "step": 15145 }, { "epoch": 0.33514115378517784, "grad_norm": 1.0954347848892212, "learning_rate": 1.495056562827291e-05, "loss": 0.3792, "step": 15150 }, { "epoch": 0.3352517614266911, "grad_norm": 1.9158743619918823, "learning_rate": 1.4947546120366034e-05, "loss": 0.3828, "step": 15155 }, { "epoch": 0.3353623690682044, "grad_norm": 0.8821646571159363, "learning_rate": 1.494452601504451e-05, "loss": 0.3248, "step": 15160 }, { "epoch": 0.3354729767097176, "grad_norm": 1.4237632751464844, "learning_rate": 1.494150531267301e-05, "loss": 0.2607, "step": 15165 }, { "epoch": 0.33558358435123087, "grad_norm": 1.7991888523101807, "learning_rate": 1.4938484013616287e-05, "loss": 0.2993, "step": 15170 }, { "epoch": 0.33569419199274414, "grad_norm": 0.9779825210571289, "learning_rate": 1.493546211823916e-05, "loss": 0.4101, "step": 15175 }, { "epoch": 0.3358047996342574, "grad_norm": 2.5719733238220215, "learning_rate": 1.493243962690652e-05, "loss": 0.3214, "step": 15180 }, { "epoch": 0.3359154072757707, "grad_norm": 1.305903434753418, "learning_rate": 1.4929416539983336e-05, "loss": 0.3463, "step": 15185 }, { "epoch": 0.3360260149172839, "grad_norm": 1.3291103839874268, "learning_rate": 1.4926392857834643e-05, "loss": 0.3857, "step": 15190 }, { "epoch": 0.33613662255879717, "grad_norm": 1.3582051992416382, "learning_rate": 1.492336858082555e-05, "loss": 0.2913, "step": 15195 }, { "epoch": 0.33624723020031044, "grad_norm": 1.7510168552398682, "learning_rate": 1.4920343709321237e-05, "loss": 0.2701, "step": 15200 }, { "epoch": 0.3363578378418237, "grad_norm": 1.2959903478622437, "learning_rate": 1.4917318243686955e-05, "loss": 0.3841, "step": 15205 }, { "epoch": 0.336468445483337, "grad_norm": 1.8178781270980835, "learning_rate": 1.491429218428803e-05, "loss": 0.3441, "step": 15210 }, { "epoch": 0.3365790531248502, "grad_norm": 1.4175045490264893, "learning_rate": 1.4911265531489857e-05, "loss": 0.3607, "step": 15215 }, { "epoch": 0.3366896607663635, "grad_norm": 1.6211113929748535, "learning_rate": 1.4908238285657905e-05, "loss": 0.296, "step": 15220 }, { "epoch": 0.33680026840787675, "grad_norm": 1.2119474411010742, "learning_rate": 1.490521044715771e-05, "loss": 0.4013, "step": 15225 }, { "epoch": 0.33691087604939, "grad_norm": 1.2634992599487305, "learning_rate": 1.4902182016354885e-05, "loss": 0.3466, "step": 15230 }, { "epoch": 0.33702148369090323, "grad_norm": 1.3464421033859253, "learning_rate": 1.489915299361511e-05, "loss": 0.4238, "step": 15235 }, { "epoch": 0.3371320913324165, "grad_norm": 1.6644154787063599, "learning_rate": 1.4896123379304141e-05, "loss": 0.3043, "step": 15240 }, { "epoch": 0.3372426989739298, "grad_norm": 1.3242945671081543, "learning_rate": 1.48930931737878e-05, "loss": 0.3137, "step": 15245 }, { "epoch": 0.33735330661544305, "grad_norm": 1.2048571109771729, "learning_rate": 1.4890062377431987e-05, "loss": 0.4495, "step": 15250 }, { "epoch": 0.3374639142569563, "grad_norm": 1.2554562091827393, "learning_rate": 1.4887030990602664e-05, "loss": 0.377, "step": 15255 }, { "epoch": 0.33757452189846954, "grad_norm": 1.1658450365066528, "learning_rate": 1.4883999013665878e-05, "loss": 0.4337, "step": 15260 }, { "epoch": 0.3376851295399828, "grad_norm": 1.6549761295318604, "learning_rate": 1.4880966446987732e-05, "loss": 0.3069, "step": 15265 }, { "epoch": 0.3377957371814961, "grad_norm": 1.6546498537063599, "learning_rate": 1.4877933290934412e-05, "loss": 0.3333, "step": 15270 }, { "epoch": 0.33790634482300935, "grad_norm": 1.24002206325531, "learning_rate": 1.487489954587217e-05, "loss": 0.4166, "step": 15275 }, { "epoch": 0.3380169524645226, "grad_norm": 1.6477043628692627, "learning_rate": 1.4871865212167327e-05, "loss": 0.5192, "step": 15280 }, { "epoch": 0.33812756010603584, "grad_norm": 0.9144701361656189, "learning_rate": 1.4868830290186282e-05, "loss": 0.3577, "step": 15285 }, { "epoch": 0.3382381677475491, "grad_norm": 0.9549031257629395, "learning_rate": 1.4865794780295497e-05, "loss": 0.4309, "step": 15290 }, { "epoch": 0.3383487753890624, "grad_norm": 1.0215181112289429, "learning_rate": 1.486275868286151e-05, "loss": 0.3993, "step": 15295 }, { "epoch": 0.33845938303057566, "grad_norm": 1.1850395202636719, "learning_rate": 1.4859721998250929e-05, "loss": 0.4452, "step": 15300 }, { "epoch": 0.33856999067208887, "grad_norm": 1.0110211372375488, "learning_rate": 1.4856684726830435e-05, "loss": 0.4455, "step": 15305 }, { "epoch": 0.33868059831360214, "grad_norm": 1.3925127983093262, "learning_rate": 1.4853646868966776e-05, "loss": 0.3008, "step": 15310 }, { "epoch": 0.3387912059551154, "grad_norm": 1.0080195665359497, "learning_rate": 1.4850608425026769e-05, "loss": 0.2508, "step": 15315 }, { "epoch": 0.3389018135966287, "grad_norm": 1.3057414293289185, "learning_rate": 1.484756939537731e-05, "loss": 0.3045, "step": 15320 }, { "epoch": 0.33901242123814196, "grad_norm": 1.446711778640747, "learning_rate": 1.484452978038536e-05, "loss": 0.2998, "step": 15325 }, { "epoch": 0.3391230288796552, "grad_norm": 1.1864386796951294, "learning_rate": 1.484148958041795e-05, "loss": 0.5572, "step": 15330 }, { "epoch": 0.33923363652116845, "grad_norm": 1.3768340349197388, "learning_rate": 1.4838448795842184e-05, "loss": 0.4717, "step": 15335 }, { "epoch": 0.3393442441626817, "grad_norm": 0.6732336282730103, "learning_rate": 1.4835407427025234e-05, "loss": 0.363, "step": 15340 }, { "epoch": 0.339454851804195, "grad_norm": 1.2340855598449707, "learning_rate": 1.4832365474334347e-05, "loss": 0.1986, "step": 15345 }, { "epoch": 0.33956545944570826, "grad_norm": 1.735133171081543, "learning_rate": 1.4829322938136837e-05, "loss": 0.4702, "step": 15350 }, { "epoch": 0.3396760670872215, "grad_norm": 1.4247839450836182, "learning_rate": 1.4826279818800089e-05, "loss": 0.4389, "step": 15355 }, { "epoch": 0.33978667472873475, "grad_norm": 2.114419460296631, "learning_rate": 1.4823236116691559e-05, "loss": 0.3862, "step": 15360 }, { "epoch": 0.339897282370248, "grad_norm": 1.3299777507781982, "learning_rate": 1.4820191832178772e-05, "loss": 0.3799, "step": 15365 }, { "epoch": 0.3400078900117613, "grad_norm": 0.6218876242637634, "learning_rate": 1.4817146965629325e-05, "loss": 0.2842, "step": 15370 }, { "epoch": 0.34011849765327457, "grad_norm": 0.9588462710380554, "learning_rate": 1.4814101517410886e-05, "loss": 0.317, "step": 15375 }, { "epoch": 0.3402291052947878, "grad_norm": 1.265150785446167, "learning_rate": 1.481105548789119e-05, "loss": 0.2603, "step": 15380 }, { "epoch": 0.34033971293630105, "grad_norm": 1.8487403392791748, "learning_rate": 1.4808008877438047e-05, "loss": 0.4108, "step": 15385 }, { "epoch": 0.3404503205778143, "grad_norm": 1.406860589981079, "learning_rate": 1.480496168641933e-05, "loss": 0.4426, "step": 15390 }, { "epoch": 0.3405609282193276, "grad_norm": 1.1847728490829468, "learning_rate": 1.480191391520299e-05, "loss": 0.4376, "step": 15395 }, { "epoch": 0.3406715358608408, "grad_norm": 1.4833639860153198, "learning_rate": 1.4798865564157044e-05, "loss": 0.4022, "step": 15400 }, { "epoch": 0.3407821435023541, "grad_norm": 2.2611305713653564, "learning_rate": 1.4795816633649576e-05, "loss": 0.2847, "step": 15405 }, { "epoch": 0.34089275114386736, "grad_norm": 1.3257174491882324, "learning_rate": 1.4792767124048747e-05, "loss": 0.3116, "step": 15410 }, { "epoch": 0.34100335878538063, "grad_norm": 1.210499882698059, "learning_rate": 1.4789717035722784e-05, "loss": 0.5272, "step": 15415 }, { "epoch": 0.3411139664268939, "grad_norm": 1.5959993600845337, "learning_rate": 1.4786666369039982e-05, "loss": 0.3237, "step": 15420 }, { "epoch": 0.3412245740684071, "grad_norm": 1.4467601776123047, "learning_rate": 1.478361512436871e-05, "loss": 0.4216, "step": 15425 }, { "epoch": 0.3413351817099204, "grad_norm": 0.9134896397590637, "learning_rate": 1.4780563302077405e-05, "loss": 0.258, "step": 15430 }, { "epoch": 0.34144578935143366, "grad_norm": 1.2909010648727417, "learning_rate": 1.4777510902534574e-05, "loss": 0.5591, "step": 15435 }, { "epoch": 0.34155639699294693, "grad_norm": 0.8538870811462402, "learning_rate": 1.477445792610879e-05, "loss": 0.3868, "step": 15440 }, { "epoch": 0.3416670046344602, "grad_norm": 1.2988210916519165, "learning_rate": 1.4771404373168701e-05, "loss": 0.3128, "step": 15445 }, { "epoch": 0.3417776122759734, "grad_norm": 1.0934267044067383, "learning_rate": 1.4768350244083022e-05, "loss": 0.3502, "step": 15450 }, { "epoch": 0.3418882199174867, "grad_norm": 1.779455304145813, "learning_rate": 1.4765295539220541e-05, "loss": 0.4019, "step": 15455 }, { "epoch": 0.34199882755899996, "grad_norm": 1.9005703926086426, "learning_rate": 1.4762240258950108e-05, "loss": 0.4294, "step": 15460 }, { "epoch": 0.34210943520051323, "grad_norm": 1.4981920719146729, "learning_rate": 1.4759184403640655e-05, "loss": 0.307, "step": 15465 }, { "epoch": 0.3422200428420265, "grad_norm": 1.2380292415618896, "learning_rate": 1.4756127973661166e-05, "loss": 0.383, "step": 15470 }, { "epoch": 0.3423306504835397, "grad_norm": 1.4246808290481567, "learning_rate": 1.475307096938071e-05, "loss": 0.3957, "step": 15475 }, { "epoch": 0.342441258125053, "grad_norm": 1.664656639099121, "learning_rate": 1.4750013391168414e-05, "loss": 0.4949, "step": 15480 }, { "epoch": 0.34255186576656627, "grad_norm": 0.7772741317749023, "learning_rate": 1.4746955239393486e-05, "loss": 0.3803, "step": 15485 }, { "epoch": 0.34266247340807954, "grad_norm": 1.2158479690551758, "learning_rate": 1.4743896514425193e-05, "loss": 0.3131, "step": 15490 }, { "epoch": 0.34277308104959275, "grad_norm": 1.601609230041504, "learning_rate": 1.4740837216632877e-05, "loss": 0.4059, "step": 15495 }, { "epoch": 0.342883688691106, "grad_norm": 1.2334411144256592, "learning_rate": 1.4737777346385947e-05, "loss": 0.4587, "step": 15500 }, { "epoch": 0.3429942963326193, "grad_norm": 1.0894927978515625, "learning_rate": 1.4734716904053879e-05, "loss": 0.339, "step": 15505 }, { "epoch": 0.34310490397413257, "grad_norm": 1.694584608078003, "learning_rate": 1.4731655890006222e-05, "loss": 0.3596, "step": 15510 }, { "epoch": 0.34321551161564584, "grad_norm": 1.1078481674194336, "learning_rate": 1.4728594304612594e-05, "loss": 0.2652, "step": 15515 }, { "epoch": 0.34332611925715906, "grad_norm": 0.8228939175605774, "learning_rate": 1.4725532148242678e-05, "loss": 0.248, "step": 15520 }, { "epoch": 0.34343672689867233, "grad_norm": 1.303318738937378, "learning_rate": 1.4722469421266233e-05, "loss": 0.4551, "step": 15525 }, { "epoch": 0.3435473345401856, "grad_norm": 1.3076298236846924, "learning_rate": 1.4719406124053076e-05, "loss": 0.573, "step": 15530 }, { "epoch": 0.34365794218169887, "grad_norm": 0.868610680103302, "learning_rate": 1.4716342256973101e-05, "loss": 0.3294, "step": 15535 }, { "epoch": 0.34376854982321214, "grad_norm": 1.3120391368865967, "learning_rate": 1.471327782039627e-05, "loss": 0.3241, "step": 15540 }, { "epoch": 0.34387915746472536, "grad_norm": 2.19639253616333, "learning_rate": 1.4710212814692614e-05, "loss": 0.3255, "step": 15545 }, { "epoch": 0.34398976510623863, "grad_norm": 2.1666667461395264, "learning_rate": 1.470714724023223e-05, "loss": 0.4126, "step": 15550 }, { "epoch": 0.3441003727477519, "grad_norm": 1.34955894947052, "learning_rate": 1.4704081097385287e-05, "loss": 0.3036, "step": 15555 }, { "epoch": 0.3442109803892652, "grad_norm": 1.2581862211227417, "learning_rate": 1.4701014386522019e-05, "loss": 0.3522, "step": 15560 }, { "epoch": 0.34432158803077845, "grad_norm": 2.108563184738159, "learning_rate": 1.4697947108012729e-05, "loss": 0.257, "step": 15565 }, { "epoch": 0.34443219567229166, "grad_norm": 1.2520655393600464, "learning_rate": 1.4694879262227795e-05, "loss": 0.4433, "step": 15570 }, { "epoch": 0.34454280331380494, "grad_norm": 0.7311168909072876, "learning_rate": 1.469181084953765e-05, "loss": 0.3583, "step": 15575 }, { "epoch": 0.3446534109553182, "grad_norm": 1.2822034358978271, "learning_rate": 1.4688741870312812e-05, "loss": 0.4007, "step": 15580 }, { "epoch": 0.3447640185968315, "grad_norm": 1.085301160812378, "learning_rate": 1.4685672324923857e-05, "loss": 0.4365, "step": 15585 }, { "epoch": 0.3448746262383447, "grad_norm": 1.162745714187622, "learning_rate": 1.4682602213741428e-05, "loss": 0.325, "step": 15590 }, { "epoch": 0.34498523387985797, "grad_norm": 0.7723871469497681, "learning_rate": 1.4679531537136244e-05, "loss": 0.3742, "step": 15595 }, { "epoch": 0.34509584152137124, "grad_norm": 1.270172357559204, "learning_rate": 1.4676460295479087e-05, "loss": 0.3476, "step": 15600 }, { "epoch": 0.3452064491628845, "grad_norm": 1.2146798372268677, "learning_rate": 1.4673388489140807e-05, "loss": 0.5004, "step": 15605 }, { "epoch": 0.3453170568043978, "grad_norm": 3.4490461349487305, "learning_rate": 1.4670316118492326e-05, "loss": 0.2823, "step": 15610 }, { "epoch": 0.345427664445911, "grad_norm": 1.6555202007293701, "learning_rate": 1.4667243183904634e-05, "loss": 0.4001, "step": 15615 }, { "epoch": 0.34553827208742427, "grad_norm": 1.7473666667938232, "learning_rate": 1.4664169685748782e-05, "loss": 0.5224, "step": 15620 }, { "epoch": 0.34564887972893754, "grad_norm": 1.3512554168701172, "learning_rate": 1.4661095624395896e-05, "loss": 0.3467, "step": 15625 }, { "epoch": 0.3457594873704508, "grad_norm": 0.9859955906867981, "learning_rate": 1.4658021000217166e-05, "loss": 0.3976, "step": 15630 }, { "epoch": 0.3458700950119641, "grad_norm": 2.3203487396240234, "learning_rate": 1.4654945813583854e-05, "loss": 0.3388, "step": 15635 }, { "epoch": 0.3459807026534773, "grad_norm": 1.3672804832458496, "learning_rate": 1.4651870064867288e-05, "loss": 0.3125, "step": 15640 }, { "epoch": 0.3460913102949906, "grad_norm": 1.8030999898910522, "learning_rate": 1.4648793754438863e-05, "loss": 0.4784, "step": 15645 }, { "epoch": 0.34620191793650384, "grad_norm": 1.1550408601760864, "learning_rate": 1.4645716882670042e-05, "loss": 0.389, "step": 15650 }, { "epoch": 0.3463125255780171, "grad_norm": 1.3811123371124268, "learning_rate": 1.464263944993236e-05, "loss": 0.4469, "step": 15655 }, { "epoch": 0.34642313321953033, "grad_norm": 0.7715067267417908, "learning_rate": 1.463956145659741e-05, "loss": 0.1818, "step": 15660 }, { "epoch": 0.3465337408610436, "grad_norm": 1.082797646522522, "learning_rate": 1.4636482903036862e-05, "loss": 0.4105, "step": 15665 }, { "epoch": 0.3466443485025569, "grad_norm": 2.1009366512298584, "learning_rate": 1.4633403789622451e-05, "loss": 0.4714, "step": 15670 }, { "epoch": 0.34675495614407015, "grad_norm": 1.778803825378418, "learning_rate": 1.4630324116725979e-05, "loss": 0.3363, "step": 15675 }, { "epoch": 0.3468655637855834, "grad_norm": 1.5426311492919922, "learning_rate": 1.4627243884719314e-05, "loss": 0.3016, "step": 15680 }, { "epoch": 0.34697617142709664, "grad_norm": 1.0453400611877441, "learning_rate": 1.4624163093974392e-05, "loss": 0.206, "step": 15685 }, { "epoch": 0.3470867790686099, "grad_norm": 1.255282998085022, "learning_rate": 1.4621081744863221e-05, "loss": 0.4176, "step": 15690 }, { "epoch": 0.3471973867101232, "grad_norm": 1.4928226470947266, "learning_rate": 1.461799983775787e-05, "loss": 0.5482, "step": 15695 }, { "epoch": 0.34730799435163645, "grad_norm": 1.9019018411636353, "learning_rate": 1.4614917373030482e-05, "loss": 0.4376, "step": 15700 }, { "epoch": 0.3474186019931497, "grad_norm": 1.221766710281372, "learning_rate": 1.4611834351053258e-05, "loss": 0.5558, "step": 15705 }, { "epoch": 0.34752920963466294, "grad_norm": 2.1194820404052734, "learning_rate": 1.4608750772198477e-05, "loss": 0.244, "step": 15710 }, { "epoch": 0.3476398172761762, "grad_norm": 1.1976181268692017, "learning_rate": 1.4605666636838477e-05, "loss": 0.4333, "step": 15715 }, { "epoch": 0.3477504249176895, "grad_norm": 1.0579280853271484, "learning_rate": 1.4602581945345669e-05, "loss": 0.3037, "step": 15720 }, { "epoch": 0.34786103255920275, "grad_norm": 0.9375311732292175, "learning_rate": 1.4599496698092526e-05, "loss": 0.4748, "step": 15725 }, { "epoch": 0.347971640200716, "grad_norm": 1.0117032527923584, "learning_rate": 1.4596410895451594e-05, "loss": 0.4936, "step": 15730 }, { "epoch": 0.34808224784222924, "grad_norm": 1.6188238859176636, "learning_rate": 1.459332453779548e-05, "loss": 0.4271, "step": 15735 }, { "epoch": 0.3481928554837425, "grad_norm": 1.2220782041549683, "learning_rate": 1.459023762549686e-05, "loss": 0.2059, "step": 15740 }, { "epoch": 0.3483034631252558, "grad_norm": 1.283725380897522, "learning_rate": 1.4587150158928478e-05, "loss": 0.3571, "step": 15745 }, { "epoch": 0.34841407076676906, "grad_norm": 1.175169587135315, "learning_rate": 1.4584062138463147e-05, "loss": 0.3019, "step": 15750 }, { "epoch": 0.3485246784082823, "grad_norm": 1.0732375383377075, "learning_rate": 1.4580973564473743e-05, "loss": 0.2509, "step": 15755 }, { "epoch": 0.34863528604979555, "grad_norm": 1.6498178243637085, "learning_rate": 1.4577884437333212e-05, "loss": 0.3088, "step": 15760 }, { "epoch": 0.3487458936913088, "grad_norm": 1.5137803554534912, "learning_rate": 1.4574794757414561e-05, "loss": 0.4527, "step": 15765 }, { "epoch": 0.3488565013328221, "grad_norm": 1.0581198930740356, "learning_rate": 1.4571704525090873e-05, "loss": 0.4735, "step": 15770 }, { "epoch": 0.34896710897433536, "grad_norm": 1.8310890197753906, "learning_rate": 1.4568613740735286e-05, "loss": 0.3717, "step": 15775 }, { "epoch": 0.3490777166158486, "grad_norm": 0.9464430212974548, "learning_rate": 1.4565522404721019e-05, "loss": 0.3108, "step": 15780 }, { "epoch": 0.34918832425736185, "grad_norm": 0.9347255825996399, "learning_rate": 1.4562430517421347e-05, "loss": 0.3693, "step": 15785 }, { "epoch": 0.3492989318988751, "grad_norm": 1.2562732696533203, "learning_rate": 1.455933807920961e-05, "loss": 0.3626, "step": 15790 }, { "epoch": 0.3494095395403884, "grad_norm": 2.092366933822632, "learning_rate": 1.4556245090459225e-05, "loss": 0.293, "step": 15795 }, { "epoch": 0.34952014718190166, "grad_norm": 1.3556854724884033, "learning_rate": 1.4553151551543665e-05, "loss": 0.4028, "step": 15800 }, { "epoch": 0.3496307548234149, "grad_norm": 0.6948928236961365, "learning_rate": 1.4550057462836477e-05, "loss": 0.4013, "step": 15805 }, { "epoch": 0.34974136246492815, "grad_norm": 1.1127327680587769, "learning_rate": 1.4546962824711272e-05, "loss": 0.323, "step": 15810 }, { "epoch": 0.3498519701064414, "grad_norm": 1.195496678352356, "learning_rate": 1.4543867637541722e-05, "loss": 0.3331, "step": 15815 }, { "epoch": 0.3499625777479547, "grad_norm": 1.124766230583191, "learning_rate": 1.4540771901701574e-05, "loss": 0.5089, "step": 15820 }, { "epoch": 0.35007318538946797, "grad_norm": 1.2420132160186768, "learning_rate": 1.4537675617564635e-05, "loss": 0.4285, "step": 15825 }, { "epoch": 0.3501837930309812, "grad_norm": 1.44577157497406, "learning_rate": 1.453457878550478e-05, "loss": 0.451, "step": 15830 }, { "epoch": 0.35029440067249445, "grad_norm": 1.2778071165084839, "learning_rate": 1.4531481405895953e-05, "loss": 0.4621, "step": 15835 }, { "epoch": 0.3504050083140077, "grad_norm": 1.6731332540512085, "learning_rate": 1.4528383479112162e-05, "loss": 0.268, "step": 15840 }, { "epoch": 0.350515615955521, "grad_norm": 1.75295090675354, "learning_rate": 1.4525285005527476e-05, "loss": 0.523, "step": 15845 }, { "epoch": 0.3506262235970342, "grad_norm": 1.5641785860061646, "learning_rate": 1.4522185985516038e-05, "loss": 0.2013, "step": 15850 }, { "epoch": 0.3507368312385475, "grad_norm": 1.6879780292510986, "learning_rate": 1.4519086419452053e-05, "loss": 0.433, "step": 15855 }, { "epoch": 0.35084743888006076, "grad_norm": 1.9827805757522583, "learning_rate": 1.4515986307709795e-05, "loss": 0.5478, "step": 15860 }, { "epoch": 0.35095804652157403, "grad_norm": 1.113707184791565, "learning_rate": 1.45128856506636e-05, "loss": 0.4559, "step": 15865 }, { "epoch": 0.3510686541630873, "grad_norm": 1.323620319366455, "learning_rate": 1.4509784448687868e-05, "loss": 0.3405, "step": 15870 }, { "epoch": 0.3511792618046005, "grad_norm": 1.3555957078933716, "learning_rate": 1.4506682702157071e-05, "loss": 0.4588, "step": 15875 }, { "epoch": 0.3512898694461138, "grad_norm": 1.255086064338684, "learning_rate": 1.4503580411445744e-05, "loss": 0.4345, "step": 15880 }, { "epoch": 0.35140047708762706, "grad_norm": 1.8117483854293823, "learning_rate": 1.4500477576928487e-05, "loss": 0.4602, "step": 15885 }, { "epoch": 0.35151108472914033, "grad_norm": 1.4154585599899292, "learning_rate": 1.4497374198979967e-05, "loss": 0.3665, "step": 15890 }, { "epoch": 0.3516216923706536, "grad_norm": 1.6708000898361206, "learning_rate": 1.4494270277974918e-05, "loss": 0.3798, "step": 15895 }, { "epoch": 0.3517323000121668, "grad_norm": 1.4165312051773071, "learning_rate": 1.4491165814288134e-05, "loss": 0.3107, "step": 15900 }, { "epoch": 0.3518429076536801, "grad_norm": 1.1311854124069214, "learning_rate": 1.448806080829448e-05, "loss": 0.3753, "step": 15905 }, { "epoch": 0.35195351529519336, "grad_norm": 2.2732441425323486, "learning_rate": 1.4484955260368883e-05, "loss": 0.4701, "step": 15910 }, { "epoch": 0.35206412293670664, "grad_norm": 1.2941218614578247, "learning_rate": 1.4481849170886337e-05, "loss": 0.4953, "step": 15915 }, { "epoch": 0.3521747305782199, "grad_norm": 1.816598892211914, "learning_rate": 1.4478742540221904e-05, "loss": 0.2277, "step": 15920 }, { "epoch": 0.3522853382197331, "grad_norm": 2.0059895515441895, "learning_rate": 1.4475635368750704e-05, "loss": 0.4532, "step": 15925 }, { "epoch": 0.3523959458612464, "grad_norm": 1.0831053256988525, "learning_rate": 1.4472527656847933e-05, "loss": 0.3507, "step": 15930 }, { "epoch": 0.35250655350275967, "grad_norm": 1.0116081237792969, "learning_rate": 1.446941940488884e-05, "loss": 0.3385, "step": 15935 }, { "epoch": 0.35261716114427294, "grad_norm": 1.1181020736694336, "learning_rate": 1.446631061324875e-05, "loss": 0.3443, "step": 15940 }, { "epoch": 0.35272776878578616, "grad_norm": 1.4943652153015137, "learning_rate": 1.4463201282303047e-05, "loss": 0.2778, "step": 15945 }, { "epoch": 0.3528383764272994, "grad_norm": 1.2759435176849365, "learning_rate": 1.4460091412427182e-05, "loss": 0.5332, "step": 15950 }, { "epoch": 0.3529489840688127, "grad_norm": 1.1506233215332031, "learning_rate": 1.4456981003996671e-05, "loss": 0.2697, "step": 15955 }, { "epoch": 0.35305959171032597, "grad_norm": 1.081407904624939, "learning_rate": 1.4453870057387092e-05, "loss": 0.3426, "step": 15960 }, { "epoch": 0.35317019935183924, "grad_norm": 2.733189344406128, "learning_rate": 1.4450758572974095e-05, "loss": 0.4677, "step": 15965 }, { "epoch": 0.35328080699335246, "grad_norm": 1.5179164409637451, "learning_rate": 1.444764655113339e-05, "loss": 0.3367, "step": 15970 }, { "epoch": 0.35339141463486573, "grad_norm": 0.8956961631774902, "learning_rate": 1.444453399224075e-05, "loss": 0.3364, "step": 15975 }, { "epoch": 0.353502022276379, "grad_norm": 1.4373195171356201, "learning_rate": 1.4441420896672019e-05, "loss": 0.3148, "step": 15980 }, { "epoch": 0.3536126299178923, "grad_norm": 1.4689610004425049, "learning_rate": 1.4438307264803097e-05, "loss": 0.3366, "step": 15985 }, { "epoch": 0.35372323755940555, "grad_norm": 1.0802708864212036, "learning_rate": 1.4435193097009961e-05, "loss": 0.3321, "step": 15990 }, { "epoch": 0.35383384520091876, "grad_norm": 0.6962181925773621, "learning_rate": 1.4432078393668642e-05, "loss": 0.3387, "step": 15995 }, { "epoch": 0.35394445284243203, "grad_norm": 2.681088924407959, "learning_rate": 1.4428963155155237e-05, "loss": 0.3097, "step": 16000 }, { "epoch": 0.3540550604839453, "grad_norm": 1.0865914821624756, "learning_rate": 1.4425847381845917e-05, "loss": 0.4137, "step": 16005 }, { "epoch": 0.3541656681254586, "grad_norm": 0.5004785060882568, "learning_rate": 1.4422731074116904e-05, "loss": 0.2352, "step": 16010 }, { "epoch": 0.3542762757669718, "grad_norm": 1.3809130191802979, "learning_rate": 1.4419614232344495e-05, "loss": 0.3334, "step": 16015 }, { "epoch": 0.35438688340848506, "grad_norm": 0.9185322523117065, "learning_rate": 1.4416496856905047e-05, "loss": 0.3229, "step": 16020 }, { "epoch": 0.35449749104999834, "grad_norm": 1.146433711051941, "learning_rate": 1.4413378948174981e-05, "loss": 0.3313, "step": 16025 }, { "epoch": 0.3546080986915116, "grad_norm": 1.7361321449279785, "learning_rate": 1.4410260506530785e-05, "loss": 0.4026, "step": 16030 }, { "epoch": 0.3547187063330249, "grad_norm": 1.0912598371505737, "learning_rate": 1.4407141532349005e-05, "loss": 0.4201, "step": 16035 }, { "epoch": 0.3548293139745381, "grad_norm": 1.540871024131775, "learning_rate": 1.4404022026006264e-05, "loss": 0.194, "step": 16040 }, { "epoch": 0.35493992161605137, "grad_norm": 0.7203248739242554, "learning_rate": 1.4400901987879236e-05, "loss": 0.1664, "step": 16045 }, { "epoch": 0.35505052925756464, "grad_norm": 1.2961822748184204, "learning_rate": 1.4397781418344666e-05, "loss": 0.3015, "step": 16050 }, { "epoch": 0.3551611368990779, "grad_norm": 0.8228198885917664, "learning_rate": 1.4394660317779364e-05, "loss": 0.3763, "step": 16055 }, { "epoch": 0.3552717445405912, "grad_norm": 1.1773124933242798, "learning_rate": 1.4391538686560197e-05, "loss": 0.203, "step": 16060 }, { "epoch": 0.3553823521821044, "grad_norm": 1.314704179763794, "learning_rate": 1.4388416525064104e-05, "loss": 0.3737, "step": 16065 }, { "epoch": 0.35549295982361767, "grad_norm": 1.7782974243164062, "learning_rate": 1.4385293833668085e-05, "loss": 0.3965, "step": 16070 }, { "epoch": 0.35560356746513094, "grad_norm": 1.0156383514404297, "learning_rate": 1.4382170612749206e-05, "loss": 0.3652, "step": 16075 }, { "epoch": 0.3557141751066442, "grad_norm": 1.4802296161651611, "learning_rate": 1.4379046862684596e-05, "loss": 0.323, "step": 16080 }, { "epoch": 0.3558247827481575, "grad_norm": 1.5582964420318604, "learning_rate": 1.4375922583851436e-05, "loss": 0.3589, "step": 16085 }, { "epoch": 0.3559353903896707, "grad_norm": 1.2577269077301025, "learning_rate": 1.4372797776626992e-05, "loss": 0.3726, "step": 16090 }, { "epoch": 0.356045998031184, "grad_norm": 1.498484492301941, "learning_rate": 1.4369672441388583e-05, "loss": 0.353, "step": 16095 }, { "epoch": 0.35615660567269725, "grad_norm": 0.7348072528839111, "learning_rate": 1.4366546578513589e-05, "loss": 0.312, "step": 16100 }, { "epoch": 0.3562672133142105, "grad_norm": 1.361512541770935, "learning_rate": 1.4363420188379458e-05, "loss": 0.5647, "step": 16105 }, { "epoch": 0.35637782095572373, "grad_norm": 0.9875860810279846, "learning_rate": 1.4360293271363701e-05, "loss": 0.2131, "step": 16110 }, { "epoch": 0.356488428597237, "grad_norm": 1.6232396364212036, "learning_rate": 1.4357165827843891e-05, "loss": 0.3916, "step": 16115 }, { "epoch": 0.3565990362387503, "grad_norm": 1.4688385725021362, "learning_rate": 1.4354037858197666e-05, "loss": 0.3033, "step": 16120 }, { "epoch": 0.35670964388026355, "grad_norm": 1.383644461631775, "learning_rate": 1.4350909362802732e-05, "loss": 0.3928, "step": 16125 }, { "epoch": 0.3568202515217768, "grad_norm": 0.932992696762085, "learning_rate": 1.4347780342036846e-05, "loss": 0.202, "step": 16130 }, { "epoch": 0.35693085916329004, "grad_norm": 1.639560580253601, "learning_rate": 1.4344650796277844e-05, "loss": 0.2127, "step": 16135 }, { "epoch": 0.3570414668048033, "grad_norm": 1.152140736579895, "learning_rate": 1.434152072590361e-05, "loss": 0.3816, "step": 16140 }, { "epoch": 0.3571520744463166, "grad_norm": 1.3580130338668823, "learning_rate": 1.4338390131292107e-05, "loss": 0.435, "step": 16145 }, { "epoch": 0.35726268208782985, "grad_norm": 1.8572999238967896, "learning_rate": 1.4335259012821347e-05, "loss": 0.3661, "step": 16150 }, { "epoch": 0.3573732897293431, "grad_norm": 0.9382109642028809, "learning_rate": 1.4332127370869414e-05, "loss": 0.4086, "step": 16155 }, { "epoch": 0.35748389737085634, "grad_norm": 1.2135976552963257, "learning_rate": 1.4328995205814452e-05, "loss": 0.4229, "step": 16160 }, { "epoch": 0.3575945050123696, "grad_norm": 0.8684080839157104, "learning_rate": 1.4325862518034672e-05, "loss": 0.3152, "step": 16165 }, { "epoch": 0.3577051126538829, "grad_norm": 1.3175063133239746, "learning_rate": 1.432272930790834e-05, "loss": 0.433, "step": 16170 }, { "epoch": 0.35781572029539616, "grad_norm": 2.0215437412261963, "learning_rate": 1.4319595575813794e-05, "loss": 0.4286, "step": 16175 }, { "epoch": 0.3579263279369094, "grad_norm": 1.5933116674423218, "learning_rate": 1.4316461322129431e-05, "loss": 0.3608, "step": 16180 }, { "epoch": 0.35803693557842264, "grad_norm": 1.5575907230377197, "learning_rate": 1.431332654723371e-05, "loss": 0.4318, "step": 16185 }, { "epoch": 0.3581475432199359, "grad_norm": 1.2646452188491821, "learning_rate": 1.4310191251505152e-05, "loss": 0.3989, "step": 16190 }, { "epoch": 0.3582581508614492, "grad_norm": 1.3483232259750366, "learning_rate": 1.4307055435322349e-05, "loss": 0.5416, "step": 16195 }, { "epoch": 0.35836875850296246, "grad_norm": 1.1798491477966309, "learning_rate": 1.4303919099063943e-05, "loss": 0.2931, "step": 16200 }, { "epoch": 0.3584793661444757, "grad_norm": 1.50019371509552, "learning_rate": 1.4300782243108649e-05, "loss": 0.4661, "step": 16205 }, { "epoch": 0.35858997378598895, "grad_norm": 1.4469177722930908, "learning_rate": 1.4297644867835243e-05, "loss": 0.3862, "step": 16210 }, { "epoch": 0.3587005814275022, "grad_norm": 1.1937155723571777, "learning_rate": 1.4294506973622556e-05, "loss": 0.4637, "step": 16215 }, { "epoch": 0.3588111890690155, "grad_norm": 1.0868419408798218, "learning_rate": 1.4291368560849497e-05, "loss": 0.4937, "step": 16220 }, { "epoch": 0.35892179671052876, "grad_norm": 1.0142728090286255, "learning_rate": 1.428822962989502e-05, "loss": 0.3328, "step": 16225 }, { "epoch": 0.359032404352042, "grad_norm": 1.4133620262145996, "learning_rate": 1.4285090181138154e-05, "loss": 0.4827, "step": 16230 }, { "epoch": 0.35914301199355525, "grad_norm": 1.0323398113250732, "learning_rate": 1.4281950214957986e-05, "loss": 0.2693, "step": 16235 }, { "epoch": 0.3592536196350685, "grad_norm": 0.8588260412216187, "learning_rate": 1.4278809731733666e-05, "loss": 0.4677, "step": 16240 }, { "epoch": 0.3593642272765818, "grad_norm": 0.9577782154083252, "learning_rate": 1.4275668731844407e-05, "loss": 0.4513, "step": 16245 }, { "epoch": 0.35947483491809507, "grad_norm": 0.8703123927116394, "learning_rate": 1.427252721566948e-05, "loss": 0.3314, "step": 16250 }, { "epoch": 0.3595854425596083, "grad_norm": 1.428094744682312, "learning_rate": 1.4269385183588227e-05, "loss": 0.2568, "step": 16255 }, { "epoch": 0.35969605020112155, "grad_norm": 1.3080363273620605, "learning_rate": 1.4266242635980044e-05, "loss": 0.374, "step": 16260 }, { "epoch": 0.3598066578426348, "grad_norm": 1.2795891761779785, "learning_rate": 1.4263099573224395e-05, "loss": 0.3101, "step": 16265 }, { "epoch": 0.3599172654841481, "grad_norm": 1.4733678102493286, "learning_rate": 1.4259955995700803e-05, "loss": 0.4382, "step": 16270 }, { "epoch": 0.36002787312566137, "grad_norm": 0.9909136295318604, "learning_rate": 1.4256811903788851e-05, "loss": 0.4307, "step": 16275 }, { "epoch": 0.3601384807671746, "grad_norm": 1.213320255279541, "learning_rate": 1.4253667297868193e-05, "loss": 0.417, "step": 16280 }, { "epoch": 0.36024908840868786, "grad_norm": 1.3604867458343506, "learning_rate": 1.4250522178318533e-05, "loss": 0.3772, "step": 16285 }, { "epoch": 0.36035969605020113, "grad_norm": 1.32577383518219, "learning_rate": 1.4247376545519648e-05, "loss": 0.3341, "step": 16290 }, { "epoch": 0.3604703036917144, "grad_norm": 1.2703324556350708, "learning_rate": 1.4244230399851373e-05, "loss": 0.3945, "step": 16295 }, { "epoch": 0.3605809113332276, "grad_norm": 1.7799978256225586, "learning_rate": 1.4241083741693596e-05, "loss": 0.3983, "step": 16300 }, { "epoch": 0.3606915189747409, "grad_norm": 1.6439543962478638, "learning_rate": 1.4237936571426283e-05, "loss": 0.2874, "step": 16305 }, { "epoch": 0.36080212661625416, "grad_norm": 1.6858723163604736, "learning_rate": 1.4234788889429452e-05, "loss": 0.3151, "step": 16310 }, { "epoch": 0.36091273425776743, "grad_norm": 1.006286859512329, "learning_rate": 1.4231640696083184e-05, "loss": 0.3993, "step": 16315 }, { "epoch": 0.3610233418992807, "grad_norm": 1.9028401374816895, "learning_rate": 1.4228491991767622e-05, "loss": 0.2901, "step": 16320 }, { "epoch": 0.3611339495407939, "grad_norm": 1.8281505107879639, "learning_rate": 1.422534277686297e-05, "loss": 0.3687, "step": 16325 }, { "epoch": 0.3612445571823072, "grad_norm": 1.4539659023284912, "learning_rate": 1.4222193051749498e-05, "loss": 0.2736, "step": 16330 }, { "epoch": 0.36135516482382046, "grad_norm": 1.6505926847457886, "learning_rate": 1.421904281680753e-05, "loss": 0.3514, "step": 16335 }, { "epoch": 0.36146577246533373, "grad_norm": 1.357602596282959, "learning_rate": 1.4215892072417462e-05, "loss": 0.316, "step": 16340 }, { "epoch": 0.361576380106847, "grad_norm": 1.7016295194625854, "learning_rate": 1.421274081895974e-05, "loss": 0.3278, "step": 16345 }, { "epoch": 0.3616869877483602, "grad_norm": 1.320128083229065, "learning_rate": 1.4209589056814879e-05, "loss": 0.2351, "step": 16350 }, { "epoch": 0.3617975953898735, "grad_norm": 1.2408369779586792, "learning_rate": 1.4206436786363452e-05, "loss": 0.4033, "step": 16355 }, { "epoch": 0.36190820303138677, "grad_norm": 1.2873928546905518, "learning_rate": 1.4203284007986096e-05, "loss": 0.3799, "step": 16360 }, { "epoch": 0.36201881067290004, "grad_norm": 1.3338724374771118, "learning_rate": 1.4200130722063506e-05, "loss": 0.3944, "step": 16365 }, { "epoch": 0.3621294183144133, "grad_norm": 1.3980398178100586, "learning_rate": 1.4196976928976442e-05, "loss": 0.42, "step": 16370 }, { "epoch": 0.3622400259559265, "grad_norm": 1.157224416732788, "learning_rate": 1.4193822629105727e-05, "loss": 0.3164, "step": 16375 }, { "epoch": 0.3623506335974398, "grad_norm": 1.4612871408462524, "learning_rate": 1.4190667822832234e-05, "loss": 0.4775, "step": 16380 }, { "epoch": 0.36246124123895307, "grad_norm": 1.260555386543274, "learning_rate": 1.4187512510536912e-05, "loss": 0.5376, "step": 16385 }, { "epoch": 0.36257184888046634, "grad_norm": 2.1773715019226074, "learning_rate": 1.4184356692600757e-05, "loss": 0.4973, "step": 16390 }, { "epoch": 0.36268245652197956, "grad_norm": 0.9538789987564087, "learning_rate": 1.4181200369404842e-05, "loss": 0.5552, "step": 16395 }, { "epoch": 0.36279306416349283, "grad_norm": 0.8313690423965454, "learning_rate": 1.4178043541330284e-05, "loss": 0.2556, "step": 16400 }, { "epoch": 0.3629036718050061, "grad_norm": 1.5397309064865112, "learning_rate": 1.4174886208758272e-05, "loss": 0.4281, "step": 16405 }, { "epoch": 0.3630142794465194, "grad_norm": 1.0963271856307983, "learning_rate": 1.4171728372070056e-05, "loss": 0.4578, "step": 16410 }, { "epoch": 0.36312488708803264, "grad_norm": 2.07688045501709, "learning_rate": 1.4168570031646936e-05, "loss": 0.3901, "step": 16415 }, { "epoch": 0.36323549472954586, "grad_norm": 1.0887980461120605, "learning_rate": 1.4165411187870288e-05, "loss": 0.3199, "step": 16420 }, { "epoch": 0.36334610237105913, "grad_norm": 1.3880313634872437, "learning_rate": 1.4162251841121539e-05, "loss": 0.2659, "step": 16425 }, { "epoch": 0.3634567100125724, "grad_norm": 0.9391574263572693, "learning_rate": 1.4159091991782177e-05, "loss": 0.2594, "step": 16430 }, { "epoch": 0.3635673176540857, "grad_norm": 1.602553367614746, "learning_rate": 1.4155931640233757e-05, "loss": 0.478, "step": 16435 }, { "epoch": 0.36367792529559895, "grad_norm": 0.9720966815948486, "learning_rate": 1.4152770786857888e-05, "loss": 0.3786, "step": 16440 }, { "epoch": 0.36378853293711216, "grad_norm": 1.2807247638702393, "learning_rate": 1.414960943203624e-05, "loss": 0.2174, "step": 16445 }, { "epoch": 0.36389914057862544, "grad_norm": 1.3891584873199463, "learning_rate": 1.4146447576150551e-05, "loss": 0.323, "step": 16450 }, { "epoch": 0.3640097482201387, "grad_norm": 1.2733328342437744, "learning_rate": 1.4143285219582608e-05, "loss": 0.2529, "step": 16455 }, { "epoch": 0.364120355861652, "grad_norm": 1.7900359630584717, "learning_rate": 1.414012236271427e-05, "loss": 0.3119, "step": 16460 }, { "epoch": 0.3642309635031652, "grad_norm": 0.9354844093322754, "learning_rate": 1.4136959005927447e-05, "loss": 0.2336, "step": 16465 }, { "epoch": 0.36434157114467847, "grad_norm": 1.310886263847351, "learning_rate": 1.4133795149604114e-05, "loss": 0.5233, "step": 16470 }, { "epoch": 0.36445217878619174, "grad_norm": 0.9775168895721436, "learning_rate": 1.4130630794126307e-05, "loss": 0.4712, "step": 16475 }, { "epoch": 0.364562786427705, "grad_norm": 1.4847187995910645, "learning_rate": 1.4127465939876121e-05, "loss": 0.3577, "step": 16480 }, { "epoch": 0.3646733940692183, "grad_norm": 1.3877888917922974, "learning_rate": 1.412430058723571e-05, "loss": 0.2549, "step": 16485 }, { "epoch": 0.3647840017107315, "grad_norm": 1.6265861988067627, "learning_rate": 1.4121134736587294e-05, "loss": 0.3962, "step": 16490 }, { "epoch": 0.36489460935224477, "grad_norm": 1.2890511751174927, "learning_rate": 1.411796838831314e-05, "loss": 0.4499, "step": 16495 }, { "epoch": 0.36500521699375804, "grad_norm": 1.1234405040740967, "learning_rate": 1.4114801542795589e-05, "loss": 0.3181, "step": 16500 }, { "epoch": 0.3651158246352713, "grad_norm": 1.1458648443222046, "learning_rate": 1.4111634200417037e-05, "loss": 0.3685, "step": 16505 }, { "epoch": 0.3652264322767846, "grad_norm": 1.171578049659729, "learning_rate": 1.4108466361559938e-05, "loss": 0.4782, "step": 16510 }, { "epoch": 0.3653370399182978, "grad_norm": 1.7948259115219116, "learning_rate": 1.4105298026606806e-05, "loss": 0.3911, "step": 16515 }, { "epoch": 0.3654476475598111, "grad_norm": 1.3694789409637451, "learning_rate": 1.410212919594022e-05, "loss": 0.4545, "step": 16520 }, { "epoch": 0.36555825520132434, "grad_norm": 1.090944528579712, "learning_rate": 1.4098959869942811e-05, "loss": 0.2376, "step": 16525 }, { "epoch": 0.3656688628428376, "grad_norm": 0.9064347147941589, "learning_rate": 1.4095790048997277e-05, "loss": 0.4215, "step": 16530 }, { "epoch": 0.3657794704843509, "grad_norm": 1.600460171699524, "learning_rate": 1.4092619733486375e-05, "loss": 0.3148, "step": 16535 }, { "epoch": 0.3658900781258641, "grad_norm": 1.2863847017288208, "learning_rate": 1.4089448923792917e-05, "loss": 0.4793, "step": 16540 }, { "epoch": 0.3660006857673774, "grad_norm": 1.9053949117660522, "learning_rate": 1.4086277620299776e-05, "loss": 0.3303, "step": 16545 }, { "epoch": 0.36611129340889065, "grad_norm": 0.828719973564148, "learning_rate": 1.4083105823389889e-05, "loss": 0.2784, "step": 16550 }, { "epoch": 0.3662219010504039, "grad_norm": 1.6253105401992798, "learning_rate": 1.4079933533446247e-05, "loss": 0.4387, "step": 16555 }, { "epoch": 0.36633250869191714, "grad_norm": 1.4468897581100464, "learning_rate": 1.4076760750851901e-05, "loss": 0.4717, "step": 16560 }, { "epoch": 0.3664431163334304, "grad_norm": 0.9879266619682312, "learning_rate": 1.407358747598997e-05, "loss": 0.4091, "step": 16565 }, { "epoch": 0.3665537239749437, "grad_norm": 1.84319007396698, "learning_rate": 1.407041370924362e-05, "loss": 0.4015, "step": 16570 }, { "epoch": 0.36666433161645695, "grad_norm": 1.3443331718444824, "learning_rate": 1.4067239450996089e-05, "loss": 0.3365, "step": 16575 }, { "epoch": 0.3667749392579702, "grad_norm": 1.1049095392227173, "learning_rate": 1.4064064701630658e-05, "loss": 0.2038, "step": 16580 }, { "epoch": 0.36688554689948344, "grad_norm": 1.0115727186203003, "learning_rate": 1.4060889461530685e-05, "loss": 0.406, "step": 16585 }, { "epoch": 0.3669961545409967, "grad_norm": 1.0034576654434204, "learning_rate": 1.4057713731079576e-05, "loss": 0.512, "step": 16590 }, { "epoch": 0.36710676218251, "grad_norm": 1.1291862726211548, "learning_rate": 1.4054537510660802e-05, "loss": 0.2532, "step": 16595 }, { "epoch": 0.36721736982402325, "grad_norm": 1.3610426187515259, "learning_rate": 1.4051360800657885e-05, "loss": 0.3335, "step": 16600 }, { "epoch": 0.3673279774655365, "grad_norm": 1.2667691707611084, "learning_rate": 1.4048183601454417e-05, "loss": 0.3225, "step": 16605 }, { "epoch": 0.36743858510704974, "grad_norm": 1.8441014289855957, "learning_rate": 1.4045005913434044e-05, "loss": 0.3952, "step": 16610 }, { "epoch": 0.367549192748563, "grad_norm": 1.9927635192871094, "learning_rate": 1.4041827736980468e-05, "loss": 0.3704, "step": 16615 }, { "epoch": 0.3676598003900763, "grad_norm": 1.5747119188308716, "learning_rate": 1.4038649072477452e-05, "loss": 0.4546, "step": 16620 }, { "epoch": 0.36777040803158956, "grad_norm": 0.697384238243103, "learning_rate": 1.4035469920308826e-05, "loss": 0.2615, "step": 16625 }, { "epoch": 0.36788101567310283, "grad_norm": 1.9943163394927979, "learning_rate": 1.403229028085846e-05, "loss": 0.4404, "step": 16630 }, { "epoch": 0.36799162331461605, "grad_norm": 1.0709518194198608, "learning_rate": 1.4029110154510304e-05, "loss": 0.4012, "step": 16635 }, { "epoch": 0.3681022309561293, "grad_norm": 0.9120925068855286, "learning_rate": 1.4025929541648354e-05, "loss": 0.4197, "step": 16640 }, { "epoch": 0.3682128385976426, "grad_norm": 1.3235697746276855, "learning_rate": 1.4022748442656665e-05, "loss": 0.269, "step": 16645 }, { "epoch": 0.36832344623915586, "grad_norm": 1.031637191772461, "learning_rate": 1.401956685791936e-05, "loss": 0.3336, "step": 16650 }, { "epoch": 0.3684340538806691, "grad_norm": 0.4922609031200409, "learning_rate": 1.4016384787820612e-05, "loss": 0.1943, "step": 16655 }, { "epoch": 0.36854466152218235, "grad_norm": 1.0547698736190796, "learning_rate": 1.401320223274465e-05, "loss": 0.3487, "step": 16660 }, { "epoch": 0.3686552691636956, "grad_norm": 1.101631999015808, "learning_rate": 1.4010019193075775e-05, "loss": 0.3901, "step": 16665 }, { "epoch": 0.3687658768052089, "grad_norm": 1.6338837146759033, "learning_rate": 1.4006835669198331e-05, "loss": 0.3846, "step": 16670 }, { "epoch": 0.36887648444672216, "grad_norm": 1.215604305267334, "learning_rate": 1.4003651661496733e-05, "loss": 0.4286, "step": 16675 }, { "epoch": 0.3689870920882354, "grad_norm": 1.0935415029525757, "learning_rate": 1.4000467170355441e-05, "loss": 0.3177, "step": 16680 }, { "epoch": 0.36909769972974865, "grad_norm": 1.2510262727737427, "learning_rate": 1.399728219615899e-05, "loss": 0.3918, "step": 16685 }, { "epoch": 0.3692083073712619, "grad_norm": 1.4144606590270996, "learning_rate": 1.399409673929196e-05, "loss": 0.2656, "step": 16690 }, { "epoch": 0.3693189150127752, "grad_norm": 1.366242527961731, "learning_rate": 1.3990910800138995e-05, "loss": 0.4134, "step": 16695 }, { "epoch": 0.36942952265428847, "grad_norm": 1.2154510021209717, "learning_rate": 1.3987724379084797e-05, "loss": 0.4357, "step": 16700 }, { "epoch": 0.3695401302958017, "grad_norm": 1.309082269668579, "learning_rate": 1.3984537476514123e-05, "loss": 0.34, "step": 16705 }, { "epoch": 0.36965073793731495, "grad_norm": 1.4896681308746338, "learning_rate": 1.3981350092811793e-05, "loss": 0.3701, "step": 16710 }, { "epoch": 0.3697613455788282, "grad_norm": 1.254783034324646, "learning_rate": 1.3978162228362683e-05, "loss": 0.4096, "step": 16715 }, { "epoch": 0.3698719532203415, "grad_norm": 1.3278542757034302, "learning_rate": 1.3974973883551723e-05, "loss": 0.3277, "step": 16720 }, { "epoch": 0.36998256086185477, "grad_norm": 1.0733667612075806, "learning_rate": 1.397178505876391e-05, "loss": 0.5163, "step": 16725 }, { "epoch": 0.370093168503368, "grad_norm": 2.079714059829712, "learning_rate": 1.3968595754384287e-05, "loss": 0.3942, "step": 16730 }, { "epoch": 0.37020377614488126, "grad_norm": 1.044350028038025, "learning_rate": 1.3965405970797965e-05, "loss": 0.4611, "step": 16735 }, { "epoch": 0.37031438378639453, "grad_norm": 0.7329850792884827, "learning_rate": 1.396221570839011e-05, "loss": 0.2564, "step": 16740 }, { "epoch": 0.3704249914279078, "grad_norm": 1.4388731718063354, "learning_rate": 1.3959024967545944e-05, "loss": 0.3304, "step": 16745 }, { "epoch": 0.370535599069421, "grad_norm": 1.1631261110305786, "learning_rate": 1.395583374865075e-05, "loss": 0.5063, "step": 16750 }, { "epoch": 0.3706462067109343, "grad_norm": 1.4067457914352417, "learning_rate": 1.3952642052089864e-05, "loss": 0.3114, "step": 16755 }, { "epoch": 0.37075681435244756, "grad_norm": 1.261452317237854, "learning_rate": 1.3949449878248684e-05, "loss": 0.3305, "step": 16760 }, { "epoch": 0.37086742199396083, "grad_norm": 1.1468474864959717, "learning_rate": 1.3946257227512665e-05, "loss": 0.3874, "step": 16765 }, { "epoch": 0.3709780296354741, "grad_norm": 1.8047529458999634, "learning_rate": 1.3943064100267314e-05, "loss": 0.1915, "step": 16770 }, { "epoch": 0.3710886372769873, "grad_norm": 1.506343126296997, "learning_rate": 1.3939870496898205e-05, "loss": 0.4722, "step": 16775 }, { "epoch": 0.3711992449185006, "grad_norm": 1.2784361839294434, "learning_rate": 1.3936676417790964e-05, "loss": 0.2445, "step": 16780 }, { "epoch": 0.37130985256001386, "grad_norm": 1.8109736442565918, "learning_rate": 1.393348186333127e-05, "loss": 0.5172, "step": 16785 }, { "epoch": 0.37142046020152714, "grad_norm": 1.10970938205719, "learning_rate": 1.3930286833904873e-05, "loss": 0.1965, "step": 16790 }, { "epoch": 0.3715310678430404, "grad_norm": 1.3992397785186768, "learning_rate": 1.3927091329897564e-05, "loss": 0.3564, "step": 16795 }, { "epoch": 0.3716416754845536, "grad_norm": 1.3908149003982544, "learning_rate": 1.3923895351695205e-05, "loss": 0.5033, "step": 16800 }, { "epoch": 0.3717522831260669, "grad_norm": 0.9601726531982422, "learning_rate": 1.3920698899683704e-05, "loss": 0.4132, "step": 16805 }, { "epoch": 0.37186289076758017, "grad_norm": 1.1729578971862793, "learning_rate": 1.3917501974249035e-05, "loss": 0.3833, "step": 16810 }, { "epoch": 0.37197349840909344, "grad_norm": 1.3945972919464111, "learning_rate": 1.3914304575777225e-05, "loss": 0.3919, "step": 16815 }, { "epoch": 0.37208410605060666, "grad_norm": 1.5697743892669678, "learning_rate": 1.3911106704654359e-05, "loss": 0.3788, "step": 16820 }, { "epoch": 0.3721947136921199, "grad_norm": 1.4342504739761353, "learning_rate": 1.3907908361266577e-05, "loss": 0.4075, "step": 16825 }, { "epoch": 0.3723053213336332, "grad_norm": 2.1615025997161865, "learning_rate": 1.3904709546000081e-05, "loss": 0.43, "step": 16830 }, { "epoch": 0.37241592897514647, "grad_norm": 1.4153975248336792, "learning_rate": 1.3901510259241126e-05, "loss": 0.3977, "step": 16835 }, { "epoch": 0.37252653661665974, "grad_norm": 1.8385077714920044, "learning_rate": 1.3898310501376026e-05, "loss": 0.3139, "step": 16840 }, { "epoch": 0.37263714425817296, "grad_norm": 2.1419122219085693, "learning_rate": 1.3895110272791148e-05, "loss": 0.4033, "step": 16845 }, { "epoch": 0.37274775189968623, "grad_norm": 1.2269588708877563, "learning_rate": 1.3891909573872919e-05, "loss": 0.3205, "step": 16850 }, { "epoch": 0.3728583595411995, "grad_norm": 1.5334339141845703, "learning_rate": 1.3888708405007824e-05, "loss": 0.3564, "step": 16855 }, { "epoch": 0.3729689671827128, "grad_norm": 1.5194026231765747, "learning_rate": 1.3885506766582403e-05, "loss": 0.4977, "step": 16860 }, { "epoch": 0.37307957482422605, "grad_norm": 1.5619088411331177, "learning_rate": 1.3882304658983253e-05, "loss": 0.309, "step": 16865 }, { "epoch": 0.37319018246573926, "grad_norm": 1.827925682067871, "learning_rate": 1.3879102082597025e-05, "loss": 0.3912, "step": 16870 }, { "epoch": 0.37330079010725253, "grad_norm": 0.8564406633377075, "learning_rate": 1.3875899037810432e-05, "loss": 0.2738, "step": 16875 }, { "epoch": 0.3734113977487658, "grad_norm": 1.8869918584823608, "learning_rate": 1.3872695525010238e-05, "loss": 0.3284, "step": 16880 }, { "epoch": 0.3735220053902791, "grad_norm": 1.7216495275497437, "learning_rate": 1.386949154458327e-05, "loss": 0.3285, "step": 16885 }, { "epoch": 0.37363261303179235, "grad_norm": 1.3854002952575684, "learning_rate": 1.3866287096916406e-05, "loss": 0.3551, "step": 16890 }, { "epoch": 0.37374322067330557, "grad_norm": 1.6454811096191406, "learning_rate": 1.3863082182396577e-05, "loss": 0.2801, "step": 16895 }, { "epoch": 0.37385382831481884, "grad_norm": 1.606501817703247, "learning_rate": 1.3859876801410784e-05, "loss": 0.4179, "step": 16900 }, { "epoch": 0.3739644359563321, "grad_norm": 1.582004427909851, "learning_rate": 1.3856670954346067e-05, "loss": 0.3887, "step": 16905 }, { "epoch": 0.3740750435978454, "grad_norm": 1.8547109365463257, "learning_rate": 1.385346464158954e-05, "loss": 0.3832, "step": 16910 }, { "epoch": 0.3741856512393586, "grad_norm": 0.8373976349830627, "learning_rate": 1.3850257863528361e-05, "loss": 0.3469, "step": 16915 }, { "epoch": 0.37429625888087187, "grad_norm": 1.3414188623428345, "learning_rate": 1.3847050620549742e-05, "loss": 0.358, "step": 16920 }, { "epoch": 0.37440686652238514, "grad_norm": 1.3685529232025146, "learning_rate": 1.3843842913040964e-05, "loss": 0.1933, "step": 16925 }, { "epoch": 0.3745174741638984, "grad_norm": 2.2250192165374756, "learning_rate": 1.3840634741389352e-05, "loss": 0.2864, "step": 16930 }, { "epoch": 0.3746280818054117, "grad_norm": 1.5884166955947876, "learning_rate": 1.3837426105982292e-05, "loss": 0.3079, "step": 16935 }, { "epoch": 0.3747386894469249, "grad_norm": 1.9262841939926147, "learning_rate": 1.3834217007207231e-05, "loss": 0.3242, "step": 16940 }, { "epoch": 0.37484929708843817, "grad_norm": 1.054663896560669, "learning_rate": 1.3831007445451658e-05, "loss": 0.2968, "step": 16945 }, { "epoch": 0.37495990472995144, "grad_norm": 1.3882160186767578, "learning_rate": 1.3827797421103135e-05, "loss": 0.5074, "step": 16950 }, { "epoch": 0.3750705123714647, "grad_norm": 1.451882243156433, "learning_rate": 1.3824586934549269e-05, "loss": 0.3345, "step": 16955 }, { "epoch": 0.375181120012978, "grad_norm": 1.3671977519989014, "learning_rate": 1.382137598617772e-05, "loss": 0.3645, "step": 16960 }, { "epoch": 0.3752917276544912, "grad_norm": 1.5363458395004272, "learning_rate": 1.3818164576376214e-05, "loss": 0.2805, "step": 16965 }, { "epoch": 0.3754023352960045, "grad_norm": 1.5169334411621094, "learning_rate": 1.381495270553253e-05, "loss": 0.3851, "step": 16970 }, { "epoch": 0.37551294293751775, "grad_norm": 1.2480055093765259, "learning_rate": 1.3811740374034494e-05, "loss": 0.4148, "step": 16975 }, { "epoch": 0.375623550579031, "grad_norm": 1.5215781927108765, "learning_rate": 1.3808527582269996e-05, "loss": 0.3771, "step": 16980 }, { "epoch": 0.3757341582205443, "grad_norm": 1.342537522315979, "learning_rate": 1.3805314330626985e-05, "loss": 0.3955, "step": 16985 }, { "epoch": 0.3758447658620575, "grad_norm": 1.8172688484191895, "learning_rate": 1.3802100619493453e-05, "loss": 0.5238, "step": 16990 }, { "epoch": 0.3759553735035708, "grad_norm": 1.0135310888290405, "learning_rate": 1.3798886449257459e-05, "loss": 0.3158, "step": 16995 }, { "epoch": 0.37606598114508405, "grad_norm": 1.174865961074829, "learning_rate": 1.3795671820307108e-05, "loss": 0.3442, "step": 17000 }, { "epoch": 0.3761765887865973, "grad_norm": 1.4499452114105225, "learning_rate": 1.3792456733030575e-05, "loss": 0.3166, "step": 17005 }, { "epoch": 0.37628719642811054, "grad_norm": 1.1410388946533203, "learning_rate": 1.3789241187816068e-05, "loss": 0.5239, "step": 17010 }, { "epoch": 0.3763978040696238, "grad_norm": 1.4501612186431885, "learning_rate": 1.3786025185051874e-05, "loss": 0.3498, "step": 17015 }, { "epoch": 0.3765084117111371, "grad_norm": 1.2887849807739258, "learning_rate": 1.3782808725126322e-05, "loss": 0.2975, "step": 17020 }, { "epoch": 0.37661901935265035, "grad_norm": 1.342012643814087, "learning_rate": 1.3779591808427792e-05, "loss": 0.4164, "step": 17025 }, { "epoch": 0.3767296269941636, "grad_norm": 2.1139707565307617, "learning_rate": 1.3776374435344734e-05, "loss": 0.3941, "step": 17030 }, { "epoch": 0.37684023463567684, "grad_norm": 0.9756672978401184, "learning_rate": 1.3773156606265641e-05, "loss": 0.5239, "step": 17035 }, { "epoch": 0.3769508422771901, "grad_norm": 1.8825801610946655, "learning_rate": 1.3769938321579064e-05, "loss": 0.4531, "step": 17040 }, { "epoch": 0.3770614499187034, "grad_norm": 0.7390503287315369, "learning_rate": 1.376671958167361e-05, "loss": 0.3334, "step": 17045 }, { "epoch": 0.37717205756021666, "grad_norm": 1.4907475709915161, "learning_rate": 1.3763500386937945e-05, "loss": 0.388, "step": 17050 }, { "epoch": 0.3772826652017299, "grad_norm": 1.242935299873352, "learning_rate": 1.376028073776078e-05, "loss": 0.3985, "step": 17055 }, { "epoch": 0.37739327284324314, "grad_norm": 1.4993767738342285, "learning_rate": 1.375706063453089e-05, "loss": 0.3219, "step": 17060 }, { "epoch": 0.3775038804847564, "grad_norm": 0.9991163015365601, "learning_rate": 1.37538400776371e-05, "loss": 0.3416, "step": 17065 }, { "epoch": 0.3776144881262697, "grad_norm": 1.1577922105789185, "learning_rate": 1.3750619067468294e-05, "loss": 0.2314, "step": 17070 }, { "epoch": 0.37772509576778296, "grad_norm": 1.2475796937942505, "learning_rate": 1.3747397604413402e-05, "loss": 0.4151, "step": 17075 }, { "epoch": 0.37783570340929623, "grad_norm": 1.6586112976074219, "learning_rate": 1.3744175688861421e-05, "loss": 0.4246, "step": 17080 }, { "epoch": 0.37794631105080945, "grad_norm": 1.3982104063034058, "learning_rate": 1.3740953321201393e-05, "loss": 0.4326, "step": 17085 }, { "epoch": 0.3780569186923227, "grad_norm": 2.483699321746826, "learning_rate": 1.373773050182242e-05, "loss": 0.3702, "step": 17090 }, { "epoch": 0.378167526333836, "grad_norm": 1.9826749563217163, "learning_rate": 1.3734507231113656e-05, "loss": 0.3643, "step": 17095 }, { "epoch": 0.37827813397534926, "grad_norm": 1.3056939840316772, "learning_rate": 1.3731283509464306e-05, "loss": 0.4284, "step": 17100 }, { "epoch": 0.3783887416168625, "grad_norm": 1.7239898443222046, "learning_rate": 1.3728059337263642e-05, "loss": 0.3878, "step": 17105 }, { "epoch": 0.37849934925837575, "grad_norm": 1.0476188659667969, "learning_rate": 1.372483471490097e-05, "loss": 0.3363, "step": 17110 }, { "epoch": 0.378609956899889, "grad_norm": 1.375280499458313, "learning_rate": 1.372160964276567e-05, "loss": 0.3097, "step": 17115 }, { "epoch": 0.3787205645414023, "grad_norm": 1.5453226566314697, "learning_rate": 1.371838412124717e-05, "loss": 0.4476, "step": 17120 }, { "epoch": 0.37883117218291557, "grad_norm": 1.2598388195037842, "learning_rate": 1.3715158150734941e-05, "loss": 0.5532, "step": 17125 }, { "epoch": 0.3789417798244288, "grad_norm": 0.751206636428833, "learning_rate": 1.3711931731618531e-05, "loss": 0.2648, "step": 17130 }, { "epoch": 0.37905238746594205, "grad_norm": 1.056836485862732, "learning_rate": 1.370870486428752e-05, "loss": 0.315, "step": 17135 }, { "epoch": 0.3791629951074553, "grad_norm": 0.4555726647377014, "learning_rate": 1.3705477549131554e-05, "loss": 0.2889, "step": 17140 }, { "epoch": 0.3792736027489686, "grad_norm": 1.5651746988296509, "learning_rate": 1.370224978654033e-05, "loss": 0.258, "step": 17145 }, { "epoch": 0.37938421039048187, "grad_norm": 1.3235499858856201, "learning_rate": 1.3699021576903601e-05, "loss": 0.4207, "step": 17150 }, { "epoch": 0.3794948180319951, "grad_norm": 1.6426299810409546, "learning_rate": 1.369579292061117e-05, "loss": 0.2124, "step": 17155 }, { "epoch": 0.37960542567350836, "grad_norm": 1.5027176141738892, "learning_rate": 1.3692563818052894e-05, "loss": 0.2964, "step": 17160 }, { "epoch": 0.37971603331502163, "grad_norm": 1.7321274280548096, "learning_rate": 1.3689334269618692e-05, "loss": 0.4463, "step": 17165 }, { "epoch": 0.3798266409565349, "grad_norm": 1.4582972526550293, "learning_rate": 1.368610427569853e-05, "loss": 0.3657, "step": 17170 }, { "epoch": 0.3799372485980481, "grad_norm": 1.5233843326568604, "learning_rate": 1.3682873836682425e-05, "loss": 0.3616, "step": 17175 }, { "epoch": 0.3800478562395614, "grad_norm": 1.0115598440170288, "learning_rate": 1.3679642952960452e-05, "loss": 0.3447, "step": 17180 }, { "epoch": 0.38015846388107466, "grad_norm": 1.560126781463623, "learning_rate": 1.3676411624922743e-05, "loss": 0.3411, "step": 17185 }, { "epoch": 0.38026907152258793, "grad_norm": 1.6986557245254517, "learning_rate": 1.3673179852959475e-05, "loss": 0.3272, "step": 17190 }, { "epoch": 0.3803796791641012, "grad_norm": 1.7718145847320557, "learning_rate": 1.3669947637460887e-05, "loss": 0.3394, "step": 17195 }, { "epoch": 0.3804902868056144, "grad_norm": 1.2732633352279663, "learning_rate": 1.3666714978817266e-05, "loss": 0.3171, "step": 17200 }, { "epoch": 0.3806008944471277, "grad_norm": 1.4201631546020508, "learning_rate": 1.3663481877418955e-05, "loss": 0.3215, "step": 17205 }, { "epoch": 0.38071150208864096, "grad_norm": 1.9411935806274414, "learning_rate": 1.3660248333656352e-05, "loss": 0.4141, "step": 17210 }, { "epoch": 0.38082210973015423, "grad_norm": 1.3359836339950562, "learning_rate": 1.3657014347919906e-05, "loss": 0.4297, "step": 17215 }, { "epoch": 0.3809327173716675, "grad_norm": 1.6150864362716675, "learning_rate": 1.3653779920600119e-05, "loss": 0.2982, "step": 17220 }, { "epoch": 0.3810433250131807, "grad_norm": 0.6365106105804443, "learning_rate": 1.3650545052087543e-05, "loss": 0.3563, "step": 17225 }, { "epoch": 0.381153932654694, "grad_norm": 1.1811097860336304, "learning_rate": 1.3647309742772792e-05, "loss": 0.4642, "step": 17230 }, { "epoch": 0.38126454029620727, "grad_norm": 1.3440755605697632, "learning_rate": 1.3644073993046528e-05, "loss": 0.3366, "step": 17235 }, { "epoch": 0.38137514793772054, "grad_norm": 1.3903611898422241, "learning_rate": 1.3640837803299467e-05, "loss": 0.3764, "step": 17240 }, { "epoch": 0.3814857555792338, "grad_norm": 0.8109383583068848, "learning_rate": 1.3637601173922377e-05, "loss": 0.4966, "step": 17245 }, { "epoch": 0.381596363220747, "grad_norm": 2.0241281986236572, "learning_rate": 1.363436410530608e-05, "loss": 0.3178, "step": 17250 }, { "epoch": 0.3817069708622603, "grad_norm": 1.2002813816070557, "learning_rate": 1.3631126597841451e-05, "loss": 0.4162, "step": 17255 }, { "epoch": 0.38181757850377357, "grad_norm": 1.6945456266403198, "learning_rate": 1.3627888651919419e-05, "loss": 0.3498, "step": 17260 }, { "epoch": 0.38192818614528684, "grad_norm": 1.6109094619750977, "learning_rate": 1.3624650267930963e-05, "loss": 0.2743, "step": 17265 }, { "epoch": 0.38203879378680006, "grad_norm": 1.2755849361419678, "learning_rate": 1.3621411446267121e-05, "loss": 0.5688, "step": 17270 }, { "epoch": 0.38214940142831333, "grad_norm": 2.160414695739746, "learning_rate": 1.3618172187318974e-05, "loss": 0.4316, "step": 17275 }, { "epoch": 0.3822600090698266, "grad_norm": 1.8151007890701294, "learning_rate": 1.3614932491477664e-05, "loss": 0.3216, "step": 17280 }, { "epoch": 0.3823706167113399, "grad_norm": 0.737163782119751, "learning_rate": 1.3611692359134386e-05, "loss": 0.4087, "step": 17285 }, { "epoch": 0.38248122435285314, "grad_norm": 1.1869832277297974, "learning_rate": 1.3608451790680382e-05, "loss": 0.3459, "step": 17290 }, { "epoch": 0.38259183199436636, "grad_norm": 1.2638132572174072, "learning_rate": 1.3605210786506948e-05, "loss": 0.3314, "step": 17295 }, { "epoch": 0.38270243963587963, "grad_norm": 1.7742249965667725, "learning_rate": 1.3601969347005439e-05, "loss": 0.355, "step": 17300 }, { "epoch": 0.3828130472773929, "grad_norm": 1.9107874631881714, "learning_rate": 1.3598727472567252e-05, "loss": 0.316, "step": 17305 }, { "epoch": 0.3829236549189062, "grad_norm": 1.104528784751892, "learning_rate": 1.3595485163583847e-05, "loss": 0.4, "step": 17310 }, { "epoch": 0.38303426256041945, "grad_norm": 1.9505054950714111, "learning_rate": 1.3592242420446733e-05, "loss": 0.4274, "step": 17315 }, { "epoch": 0.38314487020193266, "grad_norm": 1.221445083618164, "learning_rate": 1.3588999243547467e-05, "loss": 0.2188, "step": 17320 }, { "epoch": 0.38325547784344594, "grad_norm": 1.8519173860549927, "learning_rate": 1.3585755633277659e-05, "loss": 0.4209, "step": 17325 }, { "epoch": 0.3833660854849592, "grad_norm": 1.12282133102417, "learning_rate": 1.3582511590028981e-05, "loss": 0.5095, "step": 17330 }, { "epoch": 0.3834766931264725, "grad_norm": 1.0628461837768555, "learning_rate": 1.3579267114193147e-05, "loss": 0.4751, "step": 17335 }, { "epoch": 0.38358730076798575, "grad_norm": 0.8202899098396301, "learning_rate": 1.3576022206161921e-05, "loss": 0.2564, "step": 17340 }, { "epoch": 0.38369790840949897, "grad_norm": 1.5665004253387451, "learning_rate": 1.357277686632713e-05, "loss": 0.4489, "step": 17345 }, { "epoch": 0.38380851605101224, "grad_norm": 0.6982828378677368, "learning_rate": 1.356953109508065e-05, "loss": 0.4042, "step": 17350 }, { "epoch": 0.3839191236925255, "grad_norm": 0.9617422223091125, "learning_rate": 1.3566284892814403e-05, "loss": 0.4122, "step": 17355 }, { "epoch": 0.3840297313340388, "grad_norm": 1.7750884294509888, "learning_rate": 1.3563038259920367e-05, "loss": 0.2729, "step": 17360 }, { "epoch": 0.384140338975552, "grad_norm": 1.1942310333251953, "learning_rate": 1.3559791196790576e-05, "loss": 0.5022, "step": 17365 }, { "epoch": 0.38425094661706527, "grad_norm": 1.3245655298233032, "learning_rate": 1.3556543703817107e-05, "loss": 0.4639, "step": 17370 }, { "epoch": 0.38436155425857854, "grad_norm": 1.3103446960449219, "learning_rate": 1.3553295781392093e-05, "loss": 0.3872, "step": 17375 }, { "epoch": 0.3844721619000918, "grad_norm": 1.7087171077728271, "learning_rate": 1.3550047429907723e-05, "loss": 0.2575, "step": 17380 }, { "epoch": 0.3845827695416051, "grad_norm": 1.6175764799118042, "learning_rate": 1.3546798649756236e-05, "loss": 0.2731, "step": 17385 }, { "epoch": 0.3846933771831183, "grad_norm": 1.3231596946716309, "learning_rate": 1.3543549441329914e-05, "loss": 0.2904, "step": 17390 }, { "epoch": 0.3848039848246316, "grad_norm": 1.4631352424621582, "learning_rate": 1.354029980502111e-05, "loss": 0.4808, "step": 17395 }, { "epoch": 0.38491459246614484, "grad_norm": 1.5462325811386108, "learning_rate": 1.3537049741222203e-05, "loss": 0.2978, "step": 17400 }, { "epoch": 0.3850252001076581, "grad_norm": 1.5847365856170654, "learning_rate": 1.3533799250325646e-05, "loss": 0.3423, "step": 17405 }, { "epoch": 0.3851358077491714, "grad_norm": 0.3913732171058655, "learning_rate": 1.3530548332723932e-05, "loss": 0.2634, "step": 17410 }, { "epoch": 0.3852464153906846, "grad_norm": 0.9794289469718933, "learning_rate": 1.3527296988809606e-05, "loss": 0.5201, "step": 17415 }, { "epoch": 0.3853570230321979, "grad_norm": 1.5946464538574219, "learning_rate": 1.3524045218975272e-05, "loss": 0.4214, "step": 17420 }, { "epoch": 0.38546763067371115, "grad_norm": 1.4412281513214111, "learning_rate": 1.3520793023613576e-05, "loss": 0.3946, "step": 17425 }, { "epoch": 0.3855782383152244, "grad_norm": 1.4672057628631592, "learning_rate": 1.3517540403117222e-05, "loss": 0.4439, "step": 17430 }, { "epoch": 0.3856888459567377, "grad_norm": 1.0747265815734863, "learning_rate": 1.3514287357878964e-05, "loss": 0.1674, "step": 17435 }, { "epoch": 0.3857994535982509, "grad_norm": 1.9652042388916016, "learning_rate": 1.3511033888291599e-05, "loss": 0.2885, "step": 17440 }, { "epoch": 0.3859100612397642, "grad_norm": 1.180333137512207, "learning_rate": 1.3507779994747989e-05, "loss": 0.3232, "step": 17445 }, { "epoch": 0.38602066888127745, "grad_norm": 1.1639528274536133, "learning_rate": 1.3504525677641041e-05, "loss": 0.3568, "step": 17450 }, { "epoch": 0.3861312765227907, "grad_norm": 1.2657207250595093, "learning_rate": 1.350127093736371e-05, "loss": 0.3861, "step": 17455 }, { "epoch": 0.38624188416430394, "grad_norm": 1.3821731805801392, "learning_rate": 1.3498015774309005e-05, "loss": 0.494, "step": 17460 }, { "epoch": 0.3863524918058172, "grad_norm": 1.3750638961791992, "learning_rate": 1.349476018886999e-05, "loss": 0.3409, "step": 17465 }, { "epoch": 0.3864630994473305, "grad_norm": 0.6977415680885315, "learning_rate": 1.349150418143977e-05, "loss": 0.3194, "step": 17470 }, { "epoch": 0.38657370708884375, "grad_norm": 2.224849224090576, "learning_rate": 1.348824775241151e-05, "loss": 0.3527, "step": 17475 }, { "epoch": 0.386684314730357, "grad_norm": 1.1619391441345215, "learning_rate": 1.348499090217842e-05, "loss": 0.4135, "step": 17480 }, { "epoch": 0.38679492237187024, "grad_norm": 0.9538764357566833, "learning_rate": 1.3481733631133772e-05, "loss": 0.216, "step": 17485 }, { "epoch": 0.3869055300133835, "grad_norm": 0.5486892461776733, "learning_rate": 1.3478475939670867e-05, "loss": 0.3716, "step": 17490 }, { "epoch": 0.3870161376548968, "grad_norm": 1.145211935043335, "learning_rate": 1.347521782818308e-05, "loss": 0.382, "step": 17495 }, { "epoch": 0.38712674529641006, "grad_norm": 1.232353687286377, "learning_rate": 1.3471959297063828e-05, "loss": 0.4107, "step": 17500 }, { "epoch": 0.38723735293792333, "grad_norm": 2.0367918014526367, "learning_rate": 1.346870034670657e-05, "loss": 0.3933, "step": 17505 }, { "epoch": 0.38734796057943655, "grad_norm": 1.1835423707962036, "learning_rate": 1.3465440977504831e-05, "loss": 0.488, "step": 17510 }, { "epoch": 0.3874585682209498, "grad_norm": 1.5595834255218506, "learning_rate": 1.3462181189852172e-05, "loss": 0.4113, "step": 17515 }, { "epoch": 0.3875691758624631, "grad_norm": 1.573775053024292, "learning_rate": 1.3458920984142217e-05, "loss": 0.2358, "step": 17520 }, { "epoch": 0.38767978350397636, "grad_norm": 0.9143115878105164, "learning_rate": 1.3455660360768632e-05, "loss": 0.4723, "step": 17525 }, { "epoch": 0.38779039114548963, "grad_norm": 1.288870096206665, "learning_rate": 1.3452399320125136e-05, "loss": 0.2865, "step": 17530 }, { "epoch": 0.38790099878700285, "grad_norm": 1.9412978887557983, "learning_rate": 1.3449137862605502e-05, "loss": 0.3069, "step": 17535 }, { "epoch": 0.3880116064285161, "grad_norm": 1.3291643857955933, "learning_rate": 1.3445875988603543e-05, "loss": 0.4109, "step": 17540 }, { "epoch": 0.3881222140700294, "grad_norm": 1.2215274572372437, "learning_rate": 1.3442613698513136e-05, "loss": 0.3648, "step": 17545 }, { "epoch": 0.38823282171154266, "grad_norm": 1.665555477142334, "learning_rate": 1.34393509927282e-05, "loss": 0.3499, "step": 17550 }, { "epoch": 0.3883434293530559, "grad_norm": 0.8364453911781311, "learning_rate": 1.34360878716427e-05, "loss": 0.3042, "step": 17555 }, { "epoch": 0.38845403699456915, "grad_norm": 1.840399980545044, "learning_rate": 1.3432824335650665e-05, "loss": 0.4322, "step": 17560 }, { "epoch": 0.3885646446360824, "grad_norm": 0.770193338394165, "learning_rate": 1.3429560385146164e-05, "loss": 0.2711, "step": 17565 }, { "epoch": 0.3886752522775957, "grad_norm": 1.0561574697494507, "learning_rate": 1.3426296020523315e-05, "loss": 0.3838, "step": 17570 }, { "epoch": 0.38878585991910897, "grad_norm": 1.3113446235656738, "learning_rate": 1.342303124217629e-05, "loss": 0.3575, "step": 17575 }, { "epoch": 0.3888964675606222, "grad_norm": 1.5086989402770996, "learning_rate": 1.3419766050499312e-05, "loss": 0.3757, "step": 17580 }, { "epoch": 0.38900707520213545, "grad_norm": 1.9565290212631226, "learning_rate": 1.3416500445886654e-05, "loss": 0.4571, "step": 17585 }, { "epoch": 0.3891176828436487, "grad_norm": 0.8332728147506714, "learning_rate": 1.3413234428732625e-05, "loss": 0.3187, "step": 17590 }, { "epoch": 0.389228290485162, "grad_norm": 1.5995464324951172, "learning_rate": 1.3409967999431608e-05, "loss": 0.451, "step": 17595 }, { "epoch": 0.38933889812667527, "grad_norm": 1.6150527000427246, "learning_rate": 1.340670115837802e-05, "loss": 0.3617, "step": 17600 }, { "epoch": 0.3894495057681885, "grad_norm": 1.0943334102630615, "learning_rate": 1.3403433905966328e-05, "loss": 0.3298, "step": 17605 }, { "epoch": 0.38956011340970176, "grad_norm": 1.353270173072815, "learning_rate": 1.3400166242591052e-05, "loss": 0.3633, "step": 17610 }, { "epoch": 0.38967072105121503, "grad_norm": 0.9814561605453491, "learning_rate": 1.3396898168646769e-05, "loss": 0.343, "step": 17615 }, { "epoch": 0.3897813286927283, "grad_norm": 1.5896681547164917, "learning_rate": 1.3393629684528087e-05, "loss": 0.3825, "step": 17620 }, { "epoch": 0.3898919363342415, "grad_norm": 2.476513385772705, "learning_rate": 1.3390360790629681e-05, "loss": 0.3106, "step": 17625 }, { "epoch": 0.3900025439757548, "grad_norm": 1.0447747707366943, "learning_rate": 1.3387091487346269e-05, "loss": 0.3887, "step": 17630 }, { "epoch": 0.39011315161726806, "grad_norm": 1.291680932044983, "learning_rate": 1.3383821775072616e-05, "loss": 0.3007, "step": 17635 }, { "epoch": 0.39022375925878133, "grad_norm": 0.851357102394104, "learning_rate": 1.3380551654203538e-05, "loss": 0.467, "step": 17640 }, { "epoch": 0.3903343669002946, "grad_norm": 0.6222451329231262, "learning_rate": 1.3377281125133903e-05, "loss": 0.4167, "step": 17645 }, { "epoch": 0.3904449745418078, "grad_norm": 1.734081506729126, "learning_rate": 1.337401018825863e-05, "loss": 0.3802, "step": 17650 }, { "epoch": 0.3905555821833211, "grad_norm": 1.2781128883361816, "learning_rate": 1.3370738843972673e-05, "loss": 0.3593, "step": 17655 }, { "epoch": 0.39066618982483436, "grad_norm": 1.4612482786178589, "learning_rate": 1.3367467092671056e-05, "loss": 0.3751, "step": 17660 }, { "epoch": 0.39077679746634764, "grad_norm": 1.3146470785140991, "learning_rate": 1.3364194934748837e-05, "loss": 0.4316, "step": 17665 }, { "epoch": 0.3908874051078609, "grad_norm": 1.3389058113098145, "learning_rate": 1.336092237060113e-05, "loss": 0.2416, "step": 17670 }, { "epoch": 0.3909980127493741, "grad_norm": 1.2810773849487305, "learning_rate": 1.3357649400623096e-05, "loss": 0.5228, "step": 17675 }, { "epoch": 0.3911086203908874, "grad_norm": 1.1383228302001953, "learning_rate": 1.3354376025209943e-05, "loss": 0.3675, "step": 17680 }, { "epoch": 0.39121922803240067, "grad_norm": 1.8453210592269897, "learning_rate": 1.3351102244756933e-05, "loss": 0.3529, "step": 17685 }, { "epoch": 0.39132983567391394, "grad_norm": 1.8485796451568604, "learning_rate": 1.3347828059659374e-05, "loss": 0.2927, "step": 17690 }, { "epoch": 0.3914404433154272, "grad_norm": 1.5802586078643799, "learning_rate": 1.3344553470312617e-05, "loss": 0.4288, "step": 17695 }, { "epoch": 0.3915510509569404, "grad_norm": 1.4564658403396606, "learning_rate": 1.3341278477112075e-05, "loss": 0.4756, "step": 17700 }, { "epoch": 0.3916616585984537, "grad_norm": 1.399537444114685, "learning_rate": 1.3338003080453198e-05, "loss": 0.3226, "step": 17705 }, { "epoch": 0.39177226623996697, "grad_norm": 1.0443191528320312, "learning_rate": 1.3334727280731493e-05, "loss": 0.5407, "step": 17710 }, { "epoch": 0.39188287388148024, "grad_norm": 1.489917516708374, "learning_rate": 1.333145107834251e-05, "loss": 0.379, "step": 17715 }, { "epoch": 0.39199348152299346, "grad_norm": 1.232606291770935, "learning_rate": 1.3328174473681843e-05, "loss": 0.2965, "step": 17720 }, { "epoch": 0.39210408916450673, "grad_norm": 1.5180755853652954, "learning_rate": 1.3324897467145152e-05, "loss": 0.3971, "step": 17725 }, { "epoch": 0.39221469680602, "grad_norm": 1.5339536666870117, "learning_rate": 1.3321620059128129e-05, "loss": 0.447, "step": 17730 }, { "epoch": 0.3923253044475333, "grad_norm": 1.3987118005752563, "learning_rate": 1.3318342250026517e-05, "loss": 0.3353, "step": 17735 }, { "epoch": 0.39243591208904655, "grad_norm": 1.3971365690231323, "learning_rate": 1.3315064040236117e-05, "loss": 0.3827, "step": 17740 }, { "epoch": 0.39254651973055976, "grad_norm": 1.3992886543273926, "learning_rate": 1.3311785430152766e-05, "loss": 0.4614, "step": 17745 }, { "epoch": 0.39265712737207303, "grad_norm": 1.0156078338623047, "learning_rate": 1.3308506420172362e-05, "loss": 0.386, "step": 17750 }, { "epoch": 0.3927677350135863, "grad_norm": 1.6904897689819336, "learning_rate": 1.3305227010690835e-05, "loss": 0.3387, "step": 17755 }, { "epoch": 0.3928783426550996, "grad_norm": 1.5993688106536865, "learning_rate": 1.330194720210418e-05, "loss": 0.3552, "step": 17760 }, { "epoch": 0.39298895029661285, "grad_norm": 1.6893064975738525, "learning_rate": 1.3298666994808432e-05, "loss": 0.3548, "step": 17765 }, { "epoch": 0.39309955793812607, "grad_norm": 1.3843611478805542, "learning_rate": 1.3295386389199669e-05, "loss": 0.3714, "step": 17770 }, { "epoch": 0.39321016557963934, "grad_norm": 0.5518309473991394, "learning_rate": 1.329210538567403e-05, "loss": 0.3926, "step": 17775 }, { "epoch": 0.3933207732211526, "grad_norm": 0.8848504424095154, "learning_rate": 1.3288823984627695e-05, "loss": 0.4467, "step": 17780 }, { "epoch": 0.3934313808626659, "grad_norm": 1.7072222232818604, "learning_rate": 1.3285542186456888e-05, "loss": 0.3686, "step": 17785 }, { "epoch": 0.39354198850417915, "grad_norm": 1.5391390323638916, "learning_rate": 1.3282259991557889e-05, "loss": 0.3441, "step": 17790 }, { "epoch": 0.39365259614569237, "grad_norm": 1.298391342163086, "learning_rate": 1.3278977400327017e-05, "loss": 0.3712, "step": 17795 }, { "epoch": 0.39376320378720564, "grad_norm": 1.0700777769088745, "learning_rate": 1.3275694413160651e-05, "loss": 0.335, "step": 17800 }, { "epoch": 0.3938738114287189, "grad_norm": 1.3100968599319458, "learning_rate": 1.3272411030455205e-05, "loss": 0.3812, "step": 17805 }, { "epoch": 0.3939844190702322, "grad_norm": 1.4400367736816406, "learning_rate": 1.3269127252607148e-05, "loss": 0.2961, "step": 17810 }, { "epoch": 0.3940950267117454, "grad_norm": 1.018481969833374, "learning_rate": 1.3265843080012997e-05, "loss": 0.3733, "step": 17815 }, { "epoch": 0.39420563435325867, "grad_norm": 1.6076576709747314, "learning_rate": 1.3262558513069307e-05, "loss": 0.3725, "step": 17820 }, { "epoch": 0.39431624199477194, "grad_norm": 1.3491970300674438, "learning_rate": 1.32592735521727e-05, "loss": 0.2653, "step": 17825 }, { "epoch": 0.3944268496362852, "grad_norm": 1.3975600004196167, "learning_rate": 1.3255988197719826e-05, "loss": 0.421, "step": 17830 }, { "epoch": 0.3945374572777985, "grad_norm": 1.2838808298110962, "learning_rate": 1.3252702450107396e-05, "loss": 0.4815, "step": 17835 }, { "epoch": 0.3946480649193117, "grad_norm": 1.0117123126983643, "learning_rate": 1.3249416309732159e-05, "loss": 0.2808, "step": 17840 }, { "epoch": 0.394758672560825, "grad_norm": 1.398587942123413, "learning_rate": 1.3246129776990916e-05, "loss": 0.2737, "step": 17845 }, { "epoch": 0.39486928020233825, "grad_norm": 1.4729933738708496, "learning_rate": 1.3242842852280517e-05, "loss": 0.5203, "step": 17850 }, { "epoch": 0.3949798878438515, "grad_norm": 1.5820775032043457, "learning_rate": 1.3239555535997853e-05, "loss": 0.3445, "step": 17855 }, { "epoch": 0.3950904954853648, "grad_norm": 1.8430646657943726, "learning_rate": 1.3236267828539869e-05, "loss": 0.3453, "step": 17860 }, { "epoch": 0.395201103126878, "grad_norm": 1.404716968536377, "learning_rate": 1.3232979730303556e-05, "loss": 0.3765, "step": 17865 }, { "epoch": 0.3953117107683913, "grad_norm": 1.024733304977417, "learning_rate": 1.3229691241685947e-05, "loss": 0.3025, "step": 17870 }, { "epoch": 0.39542231840990455, "grad_norm": 1.585540533065796, "learning_rate": 1.322640236308413e-05, "loss": 0.4579, "step": 17875 }, { "epoch": 0.3955329260514178, "grad_norm": 1.0000542402267456, "learning_rate": 1.3223113094895235e-05, "loss": 0.4009, "step": 17880 }, { "epoch": 0.3956435336929311, "grad_norm": 1.4017250537872314, "learning_rate": 1.3219823437516437e-05, "loss": 0.3219, "step": 17885 }, { "epoch": 0.3957541413344443, "grad_norm": 1.7395271062850952, "learning_rate": 1.3216533391344966e-05, "loss": 0.4333, "step": 17890 }, { "epoch": 0.3958647489759576, "grad_norm": 1.6860827207565308, "learning_rate": 1.3213242956778089e-05, "loss": 0.2123, "step": 17895 }, { "epoch": 0.39597535661747085, "grad_norm": 1.1686208248138428, "learning_rate": 1.320995213421313e-05, "loss": 0.3808, "step": 17900 }, { "epoch": 0.3960859642589841, "grad_norm": 2.3626208305358887, "learning_rate": 1.3206660924047452e-05, "loss": 0.5266, "step": 17905 }, { "epoch": 0.39619657190049734, "grad_norm": 1.0867644548416138, "learning_rate": 1.3203369326678468e-05, "loss": 0.4188, "step": 17910 }, { "epoch": 0.3963071795420106, "grad_norm": 0.8865944147109985, "learning_rate": 1.3200077342503638e-05, "loss": 0.33, "step": 17915 }, { "epoch": 0.3964177871835239, "grad_norm": 1.222335934638977, "learning_rate": 1.3196784971920464e-05, "loss": 0.2515, "step": 17920 }, { "epoch": 0.39652839482503716, "grad_norm": 1.819747805595398, "learning_rate": 1.3193492215326505e-05, "loss": 0.4436, "step": 17925 }, { "epoch": 0.3966390024665504, "grad_norm": 1.840572476387024, "learning_rate": 1.3190199073119359e-05, "loss": 0.3265, "step": 17930 }, { "epoch": 0.39674961010806364, "grad_norm": 2.2330162525177, "learning_rate": 1.3186905545696666e-05, "loss": 0.3603, "step": 17935 }, { "epoch": 0.3968602177495769, "grad_norm": 1.5479164123535156, "learning_rate": 1.3183611633456126e-05, "loss": 0.4337, "step": 17940 }, { "epoch": 0.3969708253910902, "grad_norm": 1.3099830150604248, "learning_rate": 1.3180317336795473e-05, "loss": 0.3921, "step": 17945 }, { "epoch": 0.39708143303260346, "grad_norm": 1.4420686960220337, "learning_rate": 1.3177022656112495e-05, "loss": 0.2773, "step": 17950 }, { "epoch": 0.39719204067411673, "grad_norm": 1.3650634288787842, "learning_rate": 1.3173727591805021e-05, "loss": 0.3487, "step": 17955 }, { "epoch": 0.39730264831562995, "grad_norm": 1.0622591972351074, "learning_rate": 1.3170432144270932e-05, "loss": 0.2648, "step": 17960 }, { "epoch": 0.3974132559571432, "grad_norm": 1.2803040742874146, "learning_rate": 1.3167136313908154e-05, "loss": 0.4302, "step": 17965 }, { "epoch": 0.3975238635986565, "grad_norm": 1.1604719161987305, "learning_rate": 1.3163840101114646e-05, "loss": 0.3551, "step": 17970 }, { "epoch": 0.39763447124016976, "grad_norm": 1.1286431550979614, "learning_rate": 1.316054350628844e-05, "loss": 0.334, "step": 17975 }, { "epoch": 0.397745078881683, "grad_norm": 0.8963461518287659, "learning_rate": 1.315724652982759e-05, "loss": 0.413, "step": 17980 }, { "epoch": 0.39785568652319625, "grad_norm": 1.9953744411468506, "learning_rate": 1.3153949172130206e-05, "loss": 0.3515, "step": 17985 }, { "epoch": 0.3979662941647095, "grad_norm": 1.4054473638534546, "learning_rate": 1.3150651433594441e-05, "loss": 0.3197, "step": 17990 }, { "epoch": 0.3980769018062228, "grad_norm": 1.333256483078003, "learning_rate": 1.3147353314618503e-05, "loss": 0.248, "step": 17995 }, { "epoch": 0.39818750944773607, "grad_norm": 1.3496575355529785, "learning_rate": 1.3144054815600633e-05, "loss": 0.3363, "step": 18000 }, { "epoch": 0.3982981170892493, "grad_norm": 0.7769703269004822, "learning_rate": 1.3140755936939123e-05, "loss": 0.3462, "step": 18005 }, { "epoch": 0.39840872473076255, "grad_norm": 1.3902469873428345, "learning_rate": 1.3137456679032316e-05, "loss": 0.496, "step": 18010 }, { "epoch": 0.3985193323722758, "grad_norm": 1.134339451789856, "learning_rate": 1.3134157042278595e-05, "loss": 0.4043, "step": 18015 }, { "epoch": 0.3986299400137891, "grad_norm": 1.2997781038284302, "learning_rate": 1.3130857027076385e-05, "loss": 0.3013, "step": 18020 }, { "epoch": 0.39874054765530237, "grad_norm": 0.8459084033966064, "learning_rate": 1.3127556633824167e-05, "loss": 0.3404, "step": 18025 }, { "epoch": 0.3988511552968156, "grad_norm": 1.3578466176986694, "learning_rate": 1.3124255862920466e-05, "loss": 0.3753, "step": 18030 }, { "epoch": 0.39896176293832886, "grad_norm": 2.2035257816314697, "learning_rate": 1.3120954714763839e-05, "loss": 0.4092, "step": 18035 }, { "epoch": 0.39907237057984213, "grad_norm": 1.2424914836883545, "learning_rate": 1.3117653189752907e-05, "loss": 0.348, "step": 18040 }, { "epoch": 0.3991829782213554, "grad_norm": 1.0176122188568115, "learning_rate": 1.3114351288286324e-05, "loss": 0.4527, "step": 18045 }, { "epoch": 0.39929358586286867, "grad_norm": 1.3781507015228271, "learning_rate": 1.3111049010762793e-05, "loss": 0.3577, "step": 18050 }, { "epoch": 0.3994041935043819, "grad_norm": 0.6231480836868286, "learning_rate": 1.3107746357581067e-05, "loss": 0.2532, "step": 18055 }, { "epoch": 0.39951480114589516, "grad_norm": 1.3608921766281128, "learning_rate": 1.3104443329139938e-05, "loss": 0.3233, "step": 18060 }, { "epoch": 0.39962540878740843, "grad_norm": 1.5311203002929688, "learning_rate": 1.3101139925838247e-05, "loss": 0.3714, "step": 18065 }, { "epoch": 0.3997360164289217, "grad_norm": 1.31263267993927, "learning_rate": 1.3097836148074871e-05, "loss": 0.4686, "step": 18070 }, { "epoch": 0.3998466240704349, "grad_norm": 1.0727554559707642, "learning_rate": 1.3094531996248754e-05, "loss": 0.2475, "step": 18075 }, { "epoch": 0.3999572317119482, "grad_norm": 0.593370258808136, "learning_rate": 1.3091227470758862e-05, "loss": 0.3858, "step": 18080 }, { "epoch": 0.40006783935346146, "grad_norm": 1.7451239824295044, "learning_rate": 1.3087922572004215e-05, "loss": 0.5655, "step": 18085 }, { "epoch": 0.40017844699497473, "grad_norm": 1.176234245300293, "learning_rate": 1.3084617300383882e-05, "loss": 0.3397, "step": 18090 }, { "epoch": 0.400289054636488, "grad_norm": 1.5197855234146118, "learning_rate": 1.3081311656296975e-05, "loss": 0.2911, "step": 18095 }, { "epoch": 0.4003996622780012, "grad_norm": 1.7933813333511353, "learning_rate": 1.3078005640142644e-05, "loss": 0.3833, "step": 18100 }, { "epoch": 0.4005102699195145, "grad_norm": 1.4898489713668823, "learning_rate": 1.3074699252320093e-05, "loss": 0.3658, "step": 18105 }, { "epoch": 0.40062087756102777, "grad_norm": 1.3188650608062744, "learning_rate": 1.3071392493228567e-05, "loss": 0.2726, "step": 18110 }, { "epoch": 0.40073148520254104, "grad_norm": 1.589134693145752, "learning_rate": 1.3068085363267356e-05, "loss": 0.3567, "step": 18115 }, { "epoch": 0.4008420928440543, "grad_norm": 1.6090834140777588, "learning_rate": 1.3064777862835793e-05, "loss": 0.4133, "step": 18120 }, { "epoch": 0.4009527004855675, "grad_norm": 1.1267577409744263, "learning_rate": 1.3061469992333262e-05, "loss": 0.4048, "step": 18125 }, { "epoch": 0.4010633081270808, "grad_norm": 1.0447856187820435, "learning_rate": 1.3058161752159187e-05, "loss": 0.3692, "step": 18130 }, { "epoch": 0.40117391576859407, "grad_norm": 1.8989267349243164, "learning_rate": 1.3054853142713028e-05, "loss": 0.3809, "step": 18135 }, { "epoch": 0.40128452341010734, "grad_norm": 1.7429016828536987, "learning_rate": 1.3051544164394308e-05, "loss": 0.3958, "step": 18140 }, { "epoch": 0.4013951310516206, "grad_norm": 1.2292790412902832, "learning_rate": 1.3048234817602585e-05, "loss": 0.4835, "step": 18145 }, { "epoch": 0.40150573869313383, "grad_norm": 1.5571237802505493, "learning_rate": 1.3044925102737454e-05, "loss": 0.4225, "step": 18150 }, { "epoch": 0.4016163463346471, "grad_norm": 1.8892349004745483, "learning_rate": 1.304161502019857e-05, "loss": 0.4315, "step": 18155 }, { "epoch": 0.4017269539761604, "grad_norm": 0.8828055262565613, "learning_rate": 1.303830457038562e-05, "loss": 0.3985, "step": 18160 }, { "epoch": 0.40183756161767364, "grad_norm": 1.6469810009002686, "learning_rate": 1.3034993753698343e-05, "loss": 0.3254, "step": 18165 }, { "epoch": 0.40194816925918686, "grad_norm": 1.4263073205947876, "learning_rate": 1.3031682570536518e-05, "loss": 0.3948, "step": 18170 }, { "epoch": 0.40205877690070013, "grad_norm": 1.1874010562896729, "learning_rate": 1.3028371021299964e-05, "loss": 0.3866, "step": 18175 }, { "epoch": 0.4021693845422134, "grad_norm": 1.0038801431655884, "learning_rate": 1.302505910638856e-05, "loss": 0.4581, "step": 18180 }, { "epoch": 0.4022799921837267, "grad_norm": 1.4445792436599731, "learning_rate": 1.3021746826202208e-05, "loss": 0.5623, "step": 18185 }, { "epoch": 0.40239059982523995, "grad_norm": 1.28433358669281, "learning_rate": 1.3018434181140871e-05, "loss": 0.3615, "step": 18190 }, { "epoch": 0.40250120746675316, "grad_norm": 0.8699345588684082, "learning_rate": 1.301512117160455e-05, "loss": 0.3752, "step": 18195 }, { "epoch": 0.40261181510826644, "grad_norm": 1.0523916482925415, "learning_rate": 1.3011807797993286e-05, "loss": 0.4741, "step": 18200 }, { "epoch": 0.4027224227497797, "grad_norm": 1.3190948963165283, "learning_rate": 1.3008494060707171e-05, "loss": 0.3612, "step": 18205 }, { "epoch": 0.402833030391293, "grad_norm": 0.8187752366065979, "learning_rate": 1.3005179960146337e-05, "loss": 0.3559, "step": 18210 }, { "epoch": 0.40294363803280625, "grad_norm": 1.3512029647827148, "learning_rate": 1.3001865496710962e-05, "loss": 0.506, "step": 18215 }, { "epoch": 0.40305424567431947, "grad_norm": 1.2273579835891724, "learning_rate": 1.2998550670801262e-05, "loss": 0.3622, "step": 18220 }, { "epoch": 0.40316485331583274, "grad_norm": 1.2804690599441528, "learning_rate": 1.2995235482817506e-05, "loss": 0.2858, "step": 18225 }, { "epoch": 0.403275460957346, "grad_norm": 1.0551865100860596, "learning_rate": 1.2991919933160001e-05, "loss": 0.51, "step": 18230 }, { "epoch": 0.4033860685988593, "grad_norm": 1.0518723726272583, "learning_rate": 1.2988604022229094e-05, "loss": 0.3954, "step": 18235 }, { "epoch": 0.40349667624037255, "grad_norm": 1.3709379434585571, "learning_rate": 1.2985287750425186e-05, "loss": 0.3209, "step": 18240 }, { "epoch": 0.40360728388188577, "grad_norm": 1.7269933223724365, "learning_rate": 1.2981971118148717e-05, "loss": 0.3871, "step": 18245 }, { "epoch": 0.40371789152339904, "grad_norm": 1.3065171241760254, "learning_rate": 1.297865412580016e-05, "loss": 0.3428, "step": 18250 }, { "epoch": 0.4038284991649123, "grad_norm": 1.4454751014709473, "learning_rate": 1.297533677378005e-05, "loss": 0.2975, "step": 18255 }, { "epoch": 0.4039391068064256, "grad_norm": 1.6832499504089355, "learning_rate": 1.2972019062488954e-05, "loss": 0.4607, "step": 18260 }, { "epoch": 0.4040497144479388, "grad_norm": 1.1573587656021118, "learning_rate": 1.296870099232748e-05, "loss": 0.463, "step": 18265 }, { "epoch": 0.4041603220894521, "grad_norm": 1.0280734300613403, "learning_rate": 1.296538256369629e-05, "loss": 0.3103, "step": 18270 }, { "epoch": 0.40427092973096534, "grad_norm": 0.9322847723960876, "learning_rate": 1.2962063776996078e-05, "loss": 0.2812, "step": 18275 }, { "epoch": 0.4043815373724786, "grad_norm": 1.2522125244140625, "learning_rate": 1.2958744632627594e-05, "loss": 0.3304, "step": 18280 }, { "epoch": 0.4044921450139919, "grad_norm": 1.3003326654434204, "learning_rate": 1.2955425130991614e-05, "loss": 0.3451, "step": 18285 }, { "epoch": 0.4046027526555051, "grad_norm": 1.0835096836090088, "learning_rate": 1.2952105272488976e-05, "loss": 0.2657, "step": 18290 }, { "epoch": 0.4047133602970184, "grad_norm": 1.130008339881897, "learning_rate": 1.2948785057520545e-05, "loss": 0.249, "step": 18295 }, { "epoch": 0.40482396793853165, "grad_norm": 1.145098090171814, "learning_rate": 1.2945464486487235e-05, "loss": 0.5386, "step": 18300 }, { "epoch": 0.4049345755800449, "grad_norm": 1.5777312517166138, "learning_rate": 1.2942143559790013e-05, "loss": 0.3112, "step": 18305 }, { "epoch": 0.4050451832215582, "grad_norm": 1.9652303457260132, "learning_rate": 1.2938822277829872e-05, "loss": 0.3754, "step": 18310 }, { "epoch": 0.4051557908630714, "grad_norm": 1.030818223953247, "learning_rate": 1.2935500641007856e-05, "loss": 0.3846, "step": 18315 }, { "epoch": 0.4052663985045847, "grad_norm": 0.977764904499054, "learning_rate": 1.2932178649725056e-05, "loss": 0.4957, "step": 18320 }, { "epoch": 0.40537700614609795, "grad_norm": 1.1429603099822998, "learning_rate": 1.29288563043826e-05, "loss": 0.3258, "step": 18325 }, { "epoch": 0.4054876137876112, "grad_norm": 1.5140013694763184, "learning_rate": 1.2925533605381654e-05, "loss": 0.3157, "step": 18330 }, { "epoch": 0.40559822142912444, "grad_norm": 1.3464595079421997, "learning_rate": 1.2922210553123445e-05, "loss": 0.3019, "step": 18335 }, { "epoch": 0.4057088290706377, "grad_norm": 1.6154252290725708, "learning_rate": 1.2918887148009218e-05, "loss": 0.3333, "step": 18340 }, { "epoch": 0.405819436712151, "grad_norm": 0.8460050225257874, "learning_rate": 1.2915563390440285e-05, "loss": 0.2522, "step": 18345 }, { "epoch": 0.40593004435366425, "grad_norm": 1.651727557182312, "learning_rate": 1.2912239280817973e-05, "loss": 0.1949, "step": 18350 }, { "epoch": 0.4060406519951775, "grad_norm": 0.7172040939331055, "learning_rate": 1.2908914819543683e-05, "loss": 0.4749, "step": 18355 }, { "epoch": 0.40615125963669074, "grad_norm": 1.086057186126709, "learning_rate": 1.2905590007018835e-05, "loss": 0.5045, "step": 18360 }, { "epoch": 0.406261867278204, "grad_norm": 1.4828394651412964, "learning_rate": 1.2902264843644898e-05, "loss": 0.4193, "step": 18365 }, { "epoch": 0.4063724749197173, "grad_norm": 1.3168781995773315, "learning_rate": 1.289893932982339e-05, "loss": 0.4007, "step": 18370 }, { "epoch": 0.40648308256123056, "grad_norm": 1.3453510999679565, "learning_rate": 1.2895613465955861e-05, "loss": 0.4025, "step": 18375 }, { "epoch": 0.40659369020274383, "grad_norm": 1.2001991271972656, "learning_rate": 1.2892287252443909e-05, "loss": 0.2801, "step": 18380 }, { "epoch": 0.40670429784425705, "grad_norm": 1.5021852254867554, "learning_rate": 1.2888960689689175e-05, "loss": 0.318, "step": 18385 }, { "epoch": 0.4068149054857703, "grad_norm": 1.6153547763824463, "learning_rate": 1.2885633778093338e-05, "loss": 0.3318, "step": 18390 }, { "epoch": 0.4069255131272836, "grad_norm": 1.7411171197891235, "learning_rate": 1.2882306518058126e-05, "loss": 0.4367, "step": 18395 }, { "epoch": 0.40703612076879686, "grad_norm": 2.2184975147247314, "learning_rate": 1.2878978909985296e-05, "loss": 0.3463, "step": 18400 }, { "epoch": 0.40714672841031013, "grad_norm": 1.1731798648834229, "learning_rate": 1.2875650954276662e-05, "loss": 0.3357, "step": 18405 }, { "epoch": 0.40725733605182335, "grad_norm": 1.521837592124939, "learning_rate": 1.2872322651334078e-05, "loss": 0.3452, "step": 18410 }, { "epoch": 0.4073679436933366, "grad_norm": 0.7871682047843933, "learning_rate": 1.2868994001559423e-05, "loss": 0.1891, "step": 18415 }, { "epoch": 0.4074785513348499, "grad_norm": 1.8616217374801636, "learning_rate": 1.286566500535464e-05, "loss": 0.4411, "step": 18420 }, { "epoch": 0.40758915897636316, "grad_norm": 1.7846988439559937, "learning_rate": 1.2862335663121702e-05, "loss": 0.4031, "step": 18425 }, { "epoch": 0.4076997666178764, "grad_norm": 1.229817271232605, "learning_rate": 1.2859005975262626e-05, "loss": 0.4645, "step": 18430 }, { "epoch": 0.40781037425938965, "grad_norm": 1.6952728033065796, "learning_rate": 1.285567594217947e-05, "loss": 0.4837, "step": 18435 }, { "epoch": 0.4079209819009029, "grad_norm": 1.684233546257019, "learning_rate": 1.2852345564274336e-05, "loss": 0.2839, "step": 18440 }, { "epoch": 0.4080315895424162, "grad_norm": 2.2374765872955322, "learning_rate": 1.2849014841949367e-05, "loss": 0.299, "step": 18445 }, { "epoch": 0.40814219718392947, "grad_norm": 1.01211416721344, "learning_rate": 1.2845683775606741e-05, "loss": 0.2578, "step": 18450 }, { "epoch": 0.4082528048254427, "grad_norm": 1.4405497312545776, "learning_rate": 1.2842352365648687e-05, "loss": 0.2736, "step": 18455 }, { "epoch": 0.40836341246695596, "grad_norm": 1.6316269636154175, "learning_rate": 1.2839020612477476e-05, "loss": 0.4261, "step": 18460 }, { "epoch": 0.4084740201084692, "grad_norm": 1.2425153255462646, "learning_rate": 1.2835688516495406e-05, "loss": 0.4389, "step": 18465 }, { "epoch": 0.4085846277499825, "grad_norm": 1.149055004119873, "learning_rate": 1.2832356078104838e-05, "loss": 0.3527, "step": 18470 }, { "epoch": 0.40869523539149577, "grad_norm": 1.6603970527648926, "learning_rate": 1.2829023297708156e-05, "loss": 0.4349, "step": 18475 }, { "epoch": 0.408805843033009, "grad_norm": 1.5415384769439697, "learning_rate": 1.282569017570779e-05, "loss": 0.436, "step": 18480 }, { "epoch": 0.40891645067452226, "grad_norm": 1.440709114074707, "learning_rate": 1.2822356712506218e-05, "loss": 0.4033, "step": 18485 }, { "epoch": 0.40902705831603553, "grad_norm": 1.406375765800476, "learning_rate": 1.2819022908505956e-05, "loss": 0.4488, "step": 18490 }, { "epoch": 0.4091376659575488, "grad_norm": 1.1427333354949951, "learning_rate": 1.2815688764109557e-05, "loss": 0.1942, "step": 18495 }, { "epoch": 0.4092482735990621, "grad_norm": 1.4171897172927856, "learning_rate": 1.2812354279719617e-05, "loss": 0.4558, "step": 18500 }, { "epoch": 0.4093588812405753, "grad_norm": 0.9850272536277771, "learning_rate": 1.2809019455738777e-05, "loss": 0.3372, "step": 18505 }, { "epoch": 0.40946948888208856, "grad_norm": 1.423388957977295, "learning_rate": 1.2805684292569713e-05, "loss": 0.45, "step": 18510 }, { "epoch": 0.40958009652360183, "grad_norm": 2.3381447792053223, "learning_rate": 1.2802348790615145e-05, "loss": 0.4157, "step": 18515 }, { "epoch": 0.4096907041651151, "grad_norm": 1.4053629636764526, "learning_rate": 1.2799012950277836e-05, "loss": 0.3901, "step": 18520 }, { "epoch": 0.4098013118066283, "grad_norm": 1.2006813287734985, "learning_rate": 1.2795676771960588e-05, "loss": 0.31, "step": 18525 }, { "epoch": 0.4099119194481416, "grad_norm": 1.3145464658737183, "learning_rate": 1.2792340256066238e-05, "loss": 0.3636, "step": 18530 }, { "epoch": 0.41002252708965486, "grad_norm": 0.9665094614028931, "learning_rate": 1.2789003402997677e-05, "loss": 0.4337, "step": 18535 }, { "epoch": 0.41013313473116814, "grad_norm": 1.5084675550460815, "learning_rate": 1.2785666213157826e-05, "loss": 0.4569, "step": 18540 }, { "epoch": 0.4102437423726814, "grad_norm": 1.2403274774551392, "learning_rate": 1.2782328686949646e-05, "loss": 0.3325, "step": 18545 }, { "epoch": 0.4103543500141946, "grad_norm": 1.199700117111206, "learning_rate": 1.2778990824776147e-05, "loss": 0.4867, "step": 18550 }, { "epoch": 0.4104649576557079, "grad_norm": 1.448548674583435, "learning_rate": 1.2775652627040374e-05, "loss": 0.4532, "step": 18555 }, { "epoch": 0.41057556529722117, "grad_norm": 1.1168233156204224, "learning_rate": 1.2772314094145411e-05, "loss": 0.4526, "step": 18560 }, { "epoch": 0.41068617293873444, "grad_norm": 1.3293226957321167, "learning_rate": 1.2768975226494385e-05, "loss": 0.3608, "step": 18565 }, { "epoch": 0.4107967805802477, "grad_norm": 1.2468624114990234, "learning_rate": 1.2765636024490467e-05, "loss": 0.384, "step": 18570 }, { "epoch": 0.4109073882217609, "grad_norm": 1.4485055208206177, "learning_rate": 1.2762296488536861e-05, "loss": 0.3063, "step": 18575 }, { "epoch": 0.4110179958632742, "grad_norm": 1.6972111463546753, "learning_rate": 1.2758956619036813e-05, "loss": 0.3322, "step": 18580 }, { "epoch": 0.41112860350478747, "grad_norm": 1.4795546531677246, "learning_rate": 1.2755616416393617e-05, "loss": 0.4197, "step": 18585 }, { "epoch": 0.41123921114630074, "grad_norm": 1.2526651620864868, "learning_rate": 1.2752275881010601e-05, "loss": 0.4425, "step": 18590 }, { "epoch": 0.411349818787814, "grad_norm": 1.3385096788406372, "learning_rate": 1.2748935013291126e-05, "loss": 0.4284, "step": 18595 }, { "epoch": 0.41146042642932723, "grad_norm": 1.6324297189712524, "learning_rate": 1.2745593813638605e-05, "loss": 0.4623, "step": 18600 }, { "epoch": 0.4115710340708405, "grad_norm": 1.4036693572998047, "learning_rate": 1.2742252282456493e-05, "loss": 0.4495, "step": 18605 }, { "epoch": 0.4116816417123538, "grad_norm": 1.1158806085586548, "learning_rate": 1.2738910420148274e-05, "loss": 0.4274, "step": 18610 }, { "epoch": 0.41179224935386705, "grad_norm": 0.9966703653335571, "learning_rate": 1.273556822711747e-05, "loss": 0.2781, "step": 18615 }, { "epoch": 0.41190285699538026, "grad_norm": 1.7747743129730225, "learning_rate": 1.273222570376766e-05, "loss": 0.4155, "step": 18620 }, { "epoch": 0.41201346463689353, "grad_norm": 1.5068837404251099, "learning_rate": 1.272888285050245e-05, "loss": 0.3058, "step": 18625 }, { "epoch": 0.4121240722784068, "grad_norm": 1.491142749786377, "learning_rate": 1.2725539667725487e-05, "loss": 0.2229, "step": 18630 }, { "epoch": 0.4122346799199201, "grad_norm": 1.5186760425567627, "learning_rate": 1.2722196155840456e-05, "loss": 0.4834, "step": 18635 }, { "epoch": 0.41234528756143335, "grad_norm": 1.0815459489822388, "learning_rate": 1.2718852315251094e-05, "loss": 0.4532, "step": 18640 }, { "epoch": 0.41245589520294657, "grad_norm": 2.0185861587524414, "learning_rate": 1.2715508146361158e-05, "loss": 0.3325, "step": 18645 }, { "epoch": 0.41256650284445984, "grad_norm": 1.0086690187454224, "learning_rate": 1.2712163649574463e-05, "loss": 0.3074, "step": 18650 }, { "epoch": 0.4126771104859731, "grad_norm": 0.8687032461166382, "learning_rate": 1.2708818825294853e-05, "loss": 0.4812, "step": 18655 }, { "epoch": 0.4127877181274864, "grad_norm": 1.6428101062774658, "learning_rate": 1.2705473673926218e-05, "loss": 0.6143, "step": 18660 }, { "epoch": 0.41289832576899965, "grad_norm": 1.0449833869934082, "learning_rate": 1.2702128195872476e-05, "loss": 0.2793, "step": 18665 }, { "epoch": 0.41300893341051287, "grad_norm": 1.1046888828277588, "learning_rate": 1.26987823915376e-05, "loss": 0.3169, "step": 18670 }, { "epoch": 0.41311954105202614, "grad_norm": 0.6688475012779236, "learning_rate": 1.2695436261325596e-05, "loss": 0.2909, "step": 18675 }, { "epoch": 0.4132301486935394, "grad_norm": 1.128611445426941, "learning_rate": 1.2692089805640497e-05, "loss": 0.4956, "step": 18680 }, { "epoch": 0.4133407563350527, "grad_norm": 1.170154094696045, "learning_rate": 1.26887430248864e-05, "loss": 0.3418, "step": 18685 }, { "epoch": 0.41345136397656596, "grad_norm": 1.1157879829406738, "learning_rate": 1.268539591946742e-05, "loss": 0.3656, "step": 18690 }, { "epoch": 0.41356197161807917, "grad_norm": 1.7503818273544312, "learning_rate": 1.2682048489787718e-05, "loss": 0.4105, "step": 18695 }, { "epoch": 0.41367257925959244, "grad_norm": 1.9778690338134766, "learning_rate": 1.26787007362515e-05, "loss": 0.4049, "step": 18700 }, { "epoch": 0.4137831869011057, "grad_norm": 1.5645463466644287, "learning_rate": 1.2675352659263003e-05, "loss": 0.3442, "step": 18705 }, { "epoch": 0.413893794542619, "grad_norm": 2.268982410430908, "learning_rate": 1.267200425922651e-05, "loss": 0.3732, "step": 18710 }, { "epoch": 0.4140044021841322, "grad_norm": 1.4971100091934204, "learning_rate": 1.2668655536546332e-05, "loss": 0.1521, "step": 18715 }, { "epoch": 0.4141150098256455, "grad_norm": 1.493670105934143, "learning_rate": 1.2665306491626834e-05, "loss": 0.3867, "step": 18720 }, { "epoch": 0.41422561746715875, "grad_norm": 1.09019136428833, "learning_rate": 1.266195712487241e-05, "loss": 0.2624, "step": 18725 }, { "epoch": 0.414336225108672, "grad_norm": 1.2846965789794922, "learning_rate": 1.2658607436687489e-05, "loss": 0.3817, "step": 18730 }, { "epoch": 0.4144468327501853, "grad_norm": 0.8937426805496216, "learning_rate": 1.2655257427476552e-05, "loss": 0.4317, "step": 18735 }, { "epoch": 0.4145574403916985, "grad_norm": 1.3253860473632812, "learning_rate": 1.2651907097644112e-05, "loss": 0.412, "step": 18740 }, { "epoch": 0.4146680480332118, "grad_norm": 0.9833961725234985, "learning_rate": 1.264855644759471e-05, "loss": 0.2964, "step": 18745 }, { "epoch": 0.41477865567472505, "grad_norm": 1.1357275247573853, "learning_rate": 1.264520547773295e-05, "loss": 0.4022, "step": 18750 }, { "epoch": 0.4148892633162383, "grad_norm": 1.972558856010437, "learning_rate": 1.2641854188463451e-05, "loss": 0.5322, "step": 18755 }, { "epoch": 0.4149998709577516, "grad_norm": 1.4726228713989258, "learning_rate": 1.2638502580190885e-05, "loss": 0.4039, "step": 18760 }, { "epoch": 0.4151104785992648, "grad_norm": 1.2743233442306519, "learning_rate": 1.2635150653319953e-05, "loss": 0.3278, "step": 18765 }, { "epoch": 0.4152210862407781, "grad_norm": 0.8164854645729065, "learning_rate": 1.2631798408255404e-05, "loss": 0.447, "step": 18770 }, { "epoch": 0.41533169388229135, "grad_norm": 3.05696702003479, "learning_rate": 1.262844584540202e-05, "loss": 0.4257, "step": 18775 }, { "epoch": 0.4154423015238046, "grad_norm": 0.9943355917930603, "learning_rate": 1.2625092965164618e-05, "loss": 0.2437, "step": 18780 }, { "epoch": 0.41555290916531784, "grad_norm": 0.8600325584411621, "learning_rate": 1.2621739767948057e-05, "loss": 0.3092, "step": 18785 }, { "epoch": 0.4156635168068311, "grad_norm": 2.0746853351593018, "learning_rate": 1.2618386254157245e-05, "loss": 0.4741, "step": 18790 }, { "epoch": 0.4157741244483444, "grad_norm": 0.9893059730529785, "learning_rate": 1.26150324241971e-05, "loss": 0.46, "step": 18795 }, { "epoch": 0.41588473208985766, "grad_norm": 1.4124071598052979, "learning_rate": 1.2611678278472611e-05, "loss": 0.4995, "step": 18800 }, { "epoch": 0.41599533973137093, "grad_norm": 1.3735002279281616, "learning_rate": 1.2608323817388785e-05, "loss": 0.2876, "step": 18805 }, { "epoch": 0.41610594737288414, "grad_norm": 0.808260440826416, "learning_rate": 1.2604969041350672e-05, "loss": 0.3251, "step": 18810 }, { "epoch": 0.4162165550143974, "grad_norm": 1.4023897647857666, "learning_rate": 1.2601613950763359e-05, "loss": 0.2692, "step": 18815 }, { "epoch": 0.4163271626559107, "grad_norm": 1.109750747680664, "learning_rate": 1.2598258546031972e-05, "loss": 0.2584, "step": 18820 }, { "epoch": 0.41643777029742396, "grad_norm": 1.858726978302002, "learning_rate": 1.259490282756168e-05, "loss": 0.3883, "step": 18825 }, { "epoch": 0.41654837793893723, "grad_norm": 1.1563246250152588, "learning_rate": 1.2591546795757676e-05, "loss": 0.2304, "step": 18830 }, { "epoch": 0.41665898558045045, "grad_norm": 1.4155287742614746, "learning_rate": 1.2588190451025209e-05, "loss": 0.3554, "step": 18835 }, { "epoch": 0.4167695932219637, "grad_norm": 0.9132216572761536, "learning_rate": 1.2584833793769553e-05, "loss": 0.2867, "step": 18840 }, { "epoch": 0.416880200863477, "grad_norm": 1.3778764009475708, "learning_rate": 1.258147682439602e-05, "loss": 0.4706, "step": 18845 }, { "epoch": 0.41699080850499026, "grad_norm": 0.8305585980415344, "learning_rate": 1.2578119543309968e-05, "loss": 0.3101, "step": 18850 }, { "epoch": 0.41710141614650353, "grad_norm": 1.5152899026870728, "learning_rate": 1.2574761950916785e-05, "loss": 0.3394, "step": 18855 }, { "epoch": 0.41721202378801675, "grad_norm": 0.8515959978103638, "learning_rate": 1.2571404047621897e-05, "loss": 0.4756, "step": 18860 }, { "epoch": 0.41732263142953, "grad_norm": 0.7372950911521912, "learning_rate": 1.2568045833830777e-05, "loss": 0.4756, "step": 18865 }, { "epoch": 0.4174332390710433, "grad_norm": 1.3457266092300415, "learning_rate": 1.2564687309948921e-05, "loss": 0.4147, "step": 18870 }, { "epoch": 0.41754384671255657, "grad_norm": 1.5710862874984741, "learning_rate": 1.2561328476381878e-05, "loss": 0.3104, "step": 18875 }, { "epoch": 0.4176544543540698, "grad_norm": 3.165083169937134, "learning_rate": 1.2557969333535214e-05, "loss": 0.2857, "step": 18880 }, { "epoch": 0.41776506199558305, "grad_norm": 1.0135875940322876, "learning_rate": 1.2554609881814552e-05, "loss": 0.2938, "step": 18885 }, { "epoch": 0.4178756696370963, "grad_norm": 1.2150604724884033, "learning_rate": 1.2551250121625547e-05, "loss": 0.2665, "step": 18890 }, { "epoch": 0.4179862772786096, "grad_norm": 1.7510217428207397, "learning_rate": 1.2547890053373884e-05, "loss": 0.3273, "step": 18895 }, { "epoch": 0.41809688492012287, "grad_norm": 2.03354549407959, "learning_rate": 1.2544529677465292e-05, "loss": 0.281, "step": 18900 }, { "epoch": 0.4182074925616361, "grad_norm": 1.2723634243011475, "learning_rate": 1.2541168994305538e-05, "loss": 0.2678, "step": 18905 }, { "epoch": 0.41831810020314936, "grad_norm": 1.33965003490448, "learning_rate": 1.2537808004300416e-05, "loss": 0.4094, "step": 18910 }, { "epoch": 0.41842870784466263, "grad_norm": 0.9456433653831482, "learning_rate": 1.2534446707855772e-05, "loss": 0.2725, "step": 18915 }, { "epoch": 0.4185393154861759, "grad_norm": 1.0491632223129272, "learning_rate": 1.2531085105377478e-05, "loss": 0.5457, "step": 18920 }, { "epoch": 0.41864992312768917, "grad_norm": 1.049850344657898, "learning_rate": 1.2527723197271451e-05, "loss": 0.2534, "step": 18925 }, { "epoch": 0.4187605307692024, "grad_norm": 1.5774039030075073, "learning_rate": 1.2524360983943631e-05, "loss": 0.419, "step": 18930 }, { "epoch": 0.41887113841071566, "grad_norm": 1.4078989028930664, "learning_rate": 1.2520998465800011e-05, "loss": 0.3955, "step": 18935 }, { "epoch": 0.41898174605222893, "grad_norm": 1.2200340032577515, "learning_rate": 1.2517635643246618e-05, "loss": 0.2743, "step": 18940 }, { "epoch": 0.4190923536937422, "grad_norm": 1.6863335371017456, "learning_rate": 1.2514272516689501e-05, "loss": 0.4755, "step": 18945 }, { "epoch": 0.4192029613352555, "grad_norm": 1.5162100791931152, "learning_rate": 1.2510909086534763e-05, "loss": 0.4459, "step": 18950 }, { "epoch": 0.4193135689767687, "grad_norm": 1.3457635641098022, "learning_rate": 1.2507545353188539e-05, "loss": 0.3543, "step": 18955 }, { "epoch": 0.41942417661828196, "grad_norm": 1.7601184844970703, "learning_rate": 1.2504181317056993e-05, "loss": 0.4217, "step": 18960 }, { "epoch": 0.41953478425979523, "grad_norm": 3.8351218700408936, "learning_rate": 1.2500816978546335e-05, "loss": 0.6228, "step": 18965 }, { "epoch": 0.4196453919013085, "grad_norm": 1.1602754592895508, "learning_rate": 1.2497452338062812e-05, "loss": 0.404, "step": 18970 }, { "epoch": 0.4197559995428217, "grad_norm": 1.4140048027038574, "learning_rate": 1.2494087396012691e-05, "loss": 0.3309, "step": 18975 }, { "epoch": 0.419866607184335, "grad_norm": 1.1765713691711426, "learning_rate": 1.24907221528023e-05, "loss": 0.2966, "step": 18980 }, { "epoch": 0.41997721482584827, "grad_norm": 1.6091445684432983, "learning_rate": 1.2487356608837983e-05, "loss": 0.3795, "step": 18985 }, { "epoch": 0.42008782246736154, "grad_norm": 1.2606321573257446, "learning_rate": 1.2483990764526135e-05, "loss": 0.3094, "step": 18990 }, { "epoch": 0.4201984301088748, "grad_norm": 1.4431265592575073, "learning_rate": 1.2480624620273174e-05, "loss": 0.3201, "step": 18995 }, { "epoch": 0.420309037750388, "grad_norm": 1.2196931838989258, "learning_rate": 1.2477258176485566e-05, "loss": 0.2894, "step": 19000 }, { "epoch": 0.4204196453919013, "grad_norm": 1.6252548694610596, "learning_rate": 1.2473891433569807e-05, "loss": 0.4448, "step": 19005 }, { "epoch": 0.42053025303341457, "grad_norm": 1.4098799228668213, "learning_rate": 1.2470524391932425e-05, "loss": 0.3869, "step": 19010 }, { "epoch": 0.42064086067492784, "grad_norm": 1.4186909198760986, "learning_rate": 1.2467157051979998e-05, "loss": 0.4998, "step": 19015 }, { "epoch": 0.4207514683164411, "grad_norm": 1.0570555925369263, "learning_rate": 1.2463789414119127e-05, "loss": 0.3963, "step": 19020 }, { "epoch": 0.42086207595795433, "grad_norm": 0.9089516401290894, "learning_rate": 1.246042147875645e-05, "loss": 0.3574, "step": 19025 }, { "epoch": 0.4209726835994676, "grad_norm": 1.6572214365005493, "learning_rate": 1.2457053246298649e-05, "loss": 0.4509, "step": 19030 }, { "epoch": 0.4210832912409809, "grad_norm": 1.6922945976257324, "learning_rate": 1.2453684717152434e-05, "loss": 0.4864, "step": 19035 }, { "epoch": 0.42119389888249414, "grad_norm": 1.1440201997756958, "learning_rate": 1.2450315891724558e-05, "loss": 0.2349, "step": 19040 }, { "epoch": 0.4213045065240074, "grad_norm": 0.9656710028648376, "learning_rate": 1.2446946770421797e-05, "loss": 0.4401, "step": 19045 }, { "epoch": 0.42141511416552063, "grad_norm": 0.8926275968551636, "learning_rate": 1.2443577353650981e-05, "loss": 0.4732, "step": 19050 }, { "epoch": 0.4215257218070339, "grad_norm": 1.8957284688949585, "learning_rate": 1.2440207641818964e-05, "loss": 0.5276, "step": 19055 }, { "epoch": 0.4216363294485472, "grad_norm": 0.8275842666625977, "learning_rate": 1.2436837635332631e-05, "loss": 0.3315, "step": 19060 }, { "epoch": 0.42174693709006045, "grad_norm": 1.1983168125152588, "learning_rate": 1.2433467334598914e-05, "loss": 0.2762, "step": 19065 }, { "epoch": 0.42185754473157366, "grad_norm": 1.634653925895691, "learning_rate": 1.2430096740024782e-05, "loss": 0.4256, "step": 19070 }, { "epoch": 0.42196815237308694, "grad_norm": 1.142151951789856, "learning_rate": 1.2426725852017219e-05, "loss": 0.4106, "step": 19075 }, { "epoch": 0.4220787600146002, "grad_norm": 0.9869879484176636, "learning_rate": 1.242335467098327e-05, "loss": 0.4038, "step": 19080 }, { "epoch": 0.4221893676561135, "grad_norm": 1.616659164428711, "learning_rate": 1.2419983197330001e-05, "loss": 0.4024, "step": 19085 }, { "epoch": 0.42229997529762675, "grad_norm": 1.2826790809631348, "learning_rate": 1.2416611431464518e-05, "loss": 0.4242, "step": 19090 }, { "epoch": 0.42241058293913997, "grad_norm": 1.472456455230713, "learning_rate": 1.2413239373793952e-05, "loss": 0.3857, "step": 19095 }, { "epoch": 0.42252119058065324, "grad_norm": 1.1473455429077148, "learning_rate": 1.240986702472549e-05, "loss": 0.371, "step": 19100 }, { "epoch": 0.4226317982221665, "grad_norm": 1.1023764610290527, "learning_rate": 1.240649438466634e-05, "loss": 0.3193, "step": 19105 }, { "epoch": 0.4227424058636798, "grad_norm": 0.787316620349884, "learning_rate": 1.2403121454023736e-05, "loss": 0.394, "step": 19110 }, { "epoch": 0.42285301350519305, "grad_norm": 0.9800429344177246, "learning_rate": 1.239974823320497e-05, "loss": 0.4854, "step": 19115 }, { "epoch": 0.42296362114670627, "grad_norm": 0.9112874269485474, "learning_rate": 1.2396374722617354e-05, "loss": 0.3763, "step": 19120 }, { "epoch": 0.42307422878821954, "grad_norm": 1.32712721824646, "learning_rate": 1.2393000922668237e-05, "loss": 0.4375, "step": 19125 }, { "epoch": 0.4231848364297328, "grad_norm": 0.8055389523506165, "learning_rate": 1.2389626833765005e-05, "loss": 0.2035, "step": 19130 }, { "epoch": 0.4232954440712461, "grad_norm": 1.7105320692062378, "learning_rate": 1.2386252456315077e-05, "loss": 0.3509, "step": 19135 }, { "epoch": 0.4234060517127593, "grad_norm": 1.5161718130111694, "learning_rate": 1.2382877790725915e-05, "loss": 0.4067, "step": 19140 }, { "epoch": 0.4235166593542726, "grad_norm": 1.9074066877365112, "learning_rate": 1.2379502837404998e-05, "loss": 0.2958, "step": 19145 }, { "epoch": 0.42362726699578584, "grad_norm": 0.8977624773979187, "learning_rate": 1.2376127596759857e-05, "loss": 0.3343, "step": 19150 }, { "epoch": 0.4237378746372991, "grad_norm": 1.3048419952392578, "learning_rate": 1.2372752069198053e-05, "loss": 0.3317, "step": 19155 }, { "epoch": 0.4238484822788124, "grad_norm": 0.9863548874855042, "learning_rate": 1.2369376255127172e-05, "loss": 0.3321, "step": 19160 }, { "epoch": 0.4239590899203256, "grad_norm": 1.4981496334075928, "learning_rate": 1.2366000154954852e-05, "loss": 0.3678, "step": 19165 }, { "epoch": 0.4240696975618389, "grad_norm": 1.8002538681030273, "learning_rate": 1.236262376908875e-05, "loss": 0.3561, "step": 19170 }, { "epoch": 0.42418030520335215, "grad_norm": 1.4415370225906372, "learning_rate": 1.2359247097936564e-05, "loss": 0.4524, "step": 19175 }, { "epoch": 0.4242909128448654, "grad_norm": 1.3617668151855469, "learning_rate": 1.2355870141906028e-05, "loss": 0.3534, "step": 19180 }, { "epoch": 0.4244015204863787, "grad_norm": 1.4959239959716797, "learning_rate": 1.2352492901404909e-05, "loss": 0.333, "step": 19185 }, { "epoch": 0.4245121281278919, "grad_norm": 0.933582067489624, "learning_rate": 1.2349115376841e-05, "loss": 0.4984, "step": 19190 }, { "epoch": 0.4246227357694052, "grad_norm": 1.1666324138641357, "learning_rate": 1.234573756862215e-05, "loss": 0.2491, "step": 19195 }, { "epoch": 0.42473334341091845, "grad_norm": 1.6674877405166626, "learning_rate": 1.2342359477156216e-05, "loss": 0.3397, "step": 19200 }, { "epoch": 0.4248439510524317, "grad_norm": 1.468881368637085, "learning_rate": 1.2338981102851109e-05, "loss": 0.4686, "step": 19205 }, { "epoch": 0.424954558693945, "grad_norm": 1.2437682151794434, "learning_rate": 1.233560244611476e-05, "loss": 0.2676, "step": 19210 }, { "epoch": 0.4250651663354582, "grad_norm": 1.001696228981018, "learning_rate": 1.2332223507355146e-05, "loss": 0.3145, "step": 19215 }, { "epoch": 0.4251757739769715, "grad_norm": 1.9837594032287598, "learning_rate": 1.2328844286980273e-05, "loss": 0.2886, "step": 19220 }, { "epoch": 0.42528638161848475, "grad_norm": 1.3098678588867188, "learning_rate": 1.2325464785398175e-05, "loss": 0.317, "step": 19225 }, { "epoch": 0.425396989259998, "grad_norm": 1.0661414861679077, "learning_rate": 1.2322085003016932e-05, "loss": 0.3038, "step": 19230 }, { "epoch": 0.42550759690151124, "grad_norm": 0.9699549674987793, "learning_rate": 1.2318704940244653e-05, "loss": 0.3125, "step": 19235 }, { "epoch": 0.4256182045430245, "grad_norm": 1.3189634084701538, "learning_rate": 1.2315324597489471e-05, "loss": 0.3632, "step": 19240 }, { "epoch": 0.4257288121845378, "grad_norm": 0.9147299528121948, "learning_rate": 1.2311943975159568e-05, "loss": 0.282, "step": 19245 }, { "epoch": 0.42583941982605106, "grad_norm": 0.8683350086212158, "learning_rate": 1.2308563073663152e-05, "loss": 0.3069, "step": 19250 }, { "epoch": 0.42595002746756433, "grad_norm": 2.113696336746216, "learning_rate": 1.2305181893408467e-05, "loss": 0.3418, "step": 19255 }, { "epoch": 0.42606063510907755, "grad_norm": 1.6588592529296875, "learning_rate": 1.2301800434803782e-05, "loss": 0.596, "step": 19260 }, { "epoch": 0.4261712427505908, "grad_norm": 0.6021313667297363, "learning_rate": 1.2298418698257416e-05, "loss": 0.2431, "step": 19265 }, { "epoch": 0.4262818503921041, "grad_norm": 1.0686925649642944, "learning_rate": 1.229503668417771e-05, "loss": 0.41, "step": 19270 }, { "epoch": 0.42639245803361736, "grad_norm": 0.8013767004013062, "learning_rate": 1.2291654392973039e-05, "loss": 0.3478, "step": 19275 }, { "epoch": 0.42650306567513063, "grad_norm": 0.9232672452926636, "learning_rate": 1.2288271825051813e-05, "loss": 0.2783, "step": 19280 }, { "epoch": 0.42661367331664385, "grad_norm": 0.9370556473731995, "learning_rate": 1.2284888980822484e-05, "loss": 0.3694, "step": 19285 }, { "epoch": 0.4267242809581571, "grad_norm": 1.4001370668411255, "learning_rate": 1.2281505860693518e-05, "loss": 0.3979, "step": 19290 }, { "epoch": 0.4268348885996704, "grad_norm": 0.5281509160995483, "learning_rate": 1.2278122465073435e-05, "loss": 0.2695, "step": 19295 }, { "epoch": 0.42694549624118366, "grad_norm": 1.8238598108291626, "learning_rate": 1.2274738794370773e-05, "loss": 0.3761, "step": 19300 }, { "epoch": 0.42705610388269694, "grad_norm": 1.4650241136550903, "learning_rate": 1.2271354848994115e-05, "loss": 0.3671, "step": 19305 }, { "epoch": 0.42716671152421015, "grad_norm": 1.2543975114822388, "learning_rate": 1.2267970629352061e-05, "loss": 0.3853, "step": 19310 }, { "epoch": 0.4272773191657234, "grad_norm": 1.9774175882339478, "learning_rate": 1.2264586135853268e-05, "loss": 0.477, "step": 19315 }, { "epoch": 0.4273879268072367, "grad_norm": 1.0970205068588257, "learning_rate": 1.2261201368906404e-05, "loss": 0.4124, "step": 19320 }, { "epoch": 0.42749853444874997, "grad_norm": 1.3504245281219482, "learning_rate": 1.2257816328920176e-05, "loss": 0.3999, "step": 19325 }, { "epoch": 0.4276091420902632, "grad_norm": 1.5344465970993042, "learning_rate": 1.2254431016303335e-05, "loss": 0.4594, "step": 19330 }, { "epoch": 0.42771974973177646, "grad_norm": 1.9749685525894165, "learning_rate": 1.2251045431464653e-05, "loss": 0.4092, "step": 19335 }, { "epoch": 0.4278303573732897, "grad_norm": 1.267175316810608, "learning_rate": 1.2247659574812932e-05, "loss": 0.3348, "step": 19340 }, { "epoch": 0.427940965014803, "grad_norm": 1.8109462261199951, "learning_rate": 1.2244273446757022e-05, "loss": 0.3414, "step": 19345 }, { "epoch": 0.42805157265631627, "grad_norm": 1.1531200408935547, "learning_rate": 1.2240887047705795e-05, "loss": 0.3875, "step": 19350 }, { "epoch": 0.4281621802978295, "grad_norm": 1.9190713167190552, "learning_rate": 1.2237500378068154e-05, "loss": 0.4405, "step": 19355 }, { "epoch": 0.42827278793934276, "grad_norm": 1.1440296173095703, "learning_rate": 1.2234113438253039e-05, "loss": 0.2955, "step": 19360 }, { "epoch": 0.42838339558085603, "grad_norm": 1.2010852098464966, "learning_rate": 1.2230726228669427e-05, "loss": 0.4553, "step": 19365 }, { "epoch": 0.4284940032223693, "grad_norm": 1.080793023109436, "learning_rate": 1.222733874972632e-05, "loss": 0.3466, "step": 19370 }, { "epoch": 0.4286046108638826, "grad_norm": 1.8221256732940674, "learning_rate": 1.222395100183275e-05, "loss": 0.356, "step": 19375 }, { "epoch": 0.4287152185053958, "grad_norm": 1.3026161193847656, "learning_rate": 1.222056298539779e-05, "loss": 0.4584, "step": 19380 }, { "epoch": 0.42882582614690906, "grad_norm": 1.3256208896636963, "learning_rate": 1.2217174700830547e-05, "loss": 0.3917, "step": 19385 }, { "epoch": 0.42893643378842233, "grad_norm": 1.5473146438598633, "learning_rate": 1.2213786148540148e-05, "loss": 0.4655, "step": 19390 }, { "epoch": 0.4290470414299356, "grad_norm": 1.188671588897705, "learning_rate": 1.2210397328935761e-05, "loss": 0.3297, "step": 19395 }, { "epoch": 0.4291576490714489, "grad_norm": 0.8641922473907471, "learning_rate": 1.220700824242659e-05, "loss": 0.2636, "step": 19400 }, { "epoch": 0.4292682567129621, "grad_norm": 0.6644631028175354, "learning_rate": 1.2203618889421858e-05, "loss": 0.3656, "step": 19405 }, { "epoch": 0.42937886435447536, "grad_norm": 1.4132176637649536, "learning_rate": 1.2200229270330834e-05, "loss": 0.289, "step": 19410 }, { "epoch": 0.42948947199598864, "grad_norm": 0.9620028138160706, "learning_rate": 1.2196839385562813e-05, "loss": 0.4097, "step": 19415 }, { "epoch": 0.4296000796375019, "grad_norm": 1.158453106880188, "learning_rate": 1.2193449235527125e-05, "loss": 0.3772, "step": 19420 }, { "epoch": 0.4297106872790151, "grad_norm": 1.0635429620742798, "learning_rate": 1.219005882063312e-05, "loss": 0.3657, "step": 19425 }, { "epoch": 0.4298212949205284, "grad_norm": 0.6452921032905579, "learning_rate": 1.2186668141290199e-05, "loss": 0.3614, "step": 19430 }, { "epoch": 0.42993190256204167, "grad_norm": 0.9643234610557556, "learning_rate": 1.2183277197907784e-05, "loss": 0.2909, "step": 19435 }, { "epoch": 0.43004251020355494, "grad_norm": 0.9755743741989136, "learning_rate": 1.2179885990895324e-05, "loss": 0.4017, "step": 19440 }, { "epoch": 0.4301531178450682, "grad_norm": 1.5867877006530762, "learning_rate": 1.2176494520662313e-05, "loss": 0.4017, "step": 19445 }, { "epoch": 0.4302637254865814, "grad_norm": 0.7605918049812317, "learning_rate": 1.217310278761827e-05, "loss": 0.264, "step": 19450 }, { "epoch": 0.4303743331280947, "grad_norm": 1.2617806196212769, "learning_rate": 1.2169710792172738e-05, "loss": 0.3931, "step": 19455 }, { "epoch": 0.43048494076960797, "grad_norm": 0.8023796677589417, "learning_rate": 1.2166318534735308e-05, "loss": 0.2387, "step": 19460 }, { "epoch": 0.43059554841112124, "grad_norm": 1.2091176509857178, "learning_rate": 1.216292601571559e-05, "loss": 0.3285, "step": 19465 }, { "epoch": 0.4307061560526345, "grad_norm": 1.1084994077682495, "learning_rate": 1.2159533235523235e-05, "loss": 0.3309, "step": 19470 }, { "epoch": 0.43081676369414773, "grad_norm": 0.9044680595397949, "learning_rate": 1.215614019456791e-05, "loss": 0.4115, "step": 19475 }, { "epoch": 0.430927371335661, "grad_norm": 1.8668302297592163, "learning_rate": 1.215274689325933e-05, "loss": 0.2499, "step": 19480 }, { "epoch": 0.4310379789771743, "grad_norm": 1.3131879568099976, "learning_rate": 1.214935333200724e-05, "loss": 0.4047, "step": 19485 }, { "epoch": 0.43114858661868755, "grad_norm": 1.3180547952651978, "learning_rate": 1.2145959511221399e-05, "loss": 0.4551, "step": 19490 }, { "epoch": 0.43125919426020076, "grad_norm": 1.2162986993789673, "learning_rate": 1.2142565431311618e-05, "loss": 0.368, "step": 19495 }, { "epoch": 0.43136980190171403, "grad_norm": 1.2171999216079712, "learning_rate": 1.2139171092687733e-05, "loss": 0.3032, "step": 19500 }, { "epoch": 0.4314804095432273, "grad_norm": 1.2614699602127075, "learning_rate": 1.2135776495759601e-05, "loss": 0.4146, "step": 19505 }, { "epoch": 0.4315910171847406, "grad_norm": 1.8877519369125366, "learning_rate": 1.2132381640937128e-05, "loss": 0.3371, "step": 19510 }, { "epoch": 0.43170162482625385, "grad_norm": 0.9706434011459351, "learning_rate": 1.2128986528630237e-05, "loss": 0.2909, "step": 19515 }, { "epoch": 0.43181223246776707, "grad_norm": 1.0886491537094116, "learning_rate": 1.2125591159248886e-05, "loss": 0.4503, "step": 19520 }, { "epoch": 0.43192284010928034, "grad_norm": 1.3069812059402466, "learning_rate": 1.2122195533203063e-05, "loss": 0.3919, "step": 19525 }, { "epoch": 0.4320334477507936, "grad_norm": 0.9588229656219482, "learning_rate": 1.2118799650902796e-05, "loss": 0.4701, "step": 19530 }, { "epoch": 0.4321440553923069, "grad_norm": 0.9688197374343872, "learning_rate": 1.2115403512758131e-05, "loss": 0.3261, "step": 19535 }, { "epoch": 0.43225466303382015, "grad_norm": 1.296229600906372, "learning_rate": 1.2112007119179148e-05, "loss": 0.4861, "step": 19540 }, { "epoch": 0.43236527067533337, "grad_norm": 1.362186074256897, "learning_rate": 1.2108610470575968e-05, "loss": 0.5141, "step": 19545 }, { "epoch": 0.43247587831684664, "grad_norm": 1.8643804788589478, "learning_rate": 1.2105213567358734e-05, "loss": 0.3289, "step": 19550 }, { "epoch": 0.4325864859583599, "grad_norm": 1.140052080154419, "learning_rate": 1.2101816409937613e-05, "loss": 0.3678, "step": 19555 }, { "epoch": 0.4326970935998732, "grad_norm": 0.8950735330581665, "learning_rate": 1.2098418998722818e-05, "loss": 0.3762, "step": 19560 }, { "epoch": 0.43280770124138646, "grad_norm": 1.8491418361663818, "learning_rate": 1.2095021334124587e-05, "loss": 0.4727, "step": 19565 }, { "epoch": 0.43291830888289967, "grad_norm": 1.2450112104415894, "learning_rate": 1.2091623416553183e-05, "loss": 0.2448, "step": 19570 }, { "epoch": 0.43302891652441294, "grad_norm": 1.4625964164733887, "learning_rate": 1.2088225246418898e-05, "loss": 0.2653, "step": 19575 }, { "epoch": 0.4331395241659262, "grad_norm": 1.1781786680221558, "learning_rate": 1.208482682413207e-05, "loss": 0.2801, "step": 19580 }, { "epoch": 0.4332501318074395, "grad_norm": 1.5372216701507568, "learning_rate": 1.2081428150103057e-05, "loss": 0.4962, "step": 19585 }, { "epoch": 0.4333607394489527, "grad_norm": 1.2171541452407837, "learning_rate": 1.2078029224742238e-05, "loss": 0.3188, "step": 19590 }, { "epoch": 0.433471347090466, "grad_norm": 1.2515627145767212, "learning_rate": 1.2074630048460042e-05, "loss": 0.223, "step": 19595 }, { "epoch": 0.43358195473197925, "grad_norm": 1.2301796674728394, "learning_rate": 1.2071230621666915e-05, "loss": 0.4239, "step": 19600 }, { "epoch": 0.4336925623734925, "grad_norm": 1.3390971422195435, "learning_rate": 1.2067830944773336e-05, "loss": 0.3683, "step": 19605 }, { "epoch": 0.4338031700150058, "grad_norm": 1.4459218978881836, "learning_rate": 1.2064431018189815e-05, "loss": 0.2956, "step": 19610 }, { "epoch": 0.433913777656519, "grad_norm": 1.4516898393630981, "learning_rate": 1.2061030842326897e-05, "loss": 0.321, "step": 19615 }, { "epoch": 0.4340243852980323, "grad_norm": 1.3352744579315186, "learning_rate": 1.2057630417595142e-05, "loss": 0.2699, "step": 19620 }, { "epoch": 0.43413499293954555, "grad_norm": 1.4018462896347046, "learning_rate": 1.205422974440516e-05, "loss": 0.4199, "step": 19625 }, { "epoch": 0.4342456005810588, "grad_norm": 1.3699814081192017, "learning_rate": 1.2050828823167575e-05, "loss": 0.2921, "step": 19630 }, { "epoch": 0.4343562082225721, "grad_norm": 1.32241690158844, "learning_rate": 1.2047427654293053e-05, "loss": 0.366, "step": 19635 }, { "epoch": 0.4344668158640853, "grad_norm": 1.2325310707092285, "learning_rate": 1.2044026238192275e-05, "loss": 0.466, "step": 19640 }, { "epoch": 0.4345774235055986, "grad_norm": 1.3512977361679077, "learning_rate": 1.204062457527597e-05, "loss": 0.3946, "step": 19645 }, { "epoch": 0.43468803114711185, "grad_norm": 1.1634031534194946, "learning_rate": 1.2037222665954887e-05, "loss": 0.2366, "step": 19650 }, { "epoch": 0.4347986387886251, "grad_norm": 1.7221097946166992, "learning_rate": 1.2033820510639797e-05, "loss": 0.4564, "step": 19655 }, { "epoch": 0.4349092464301384, "grad_norm": 1.4660344123840332, "learning_rate": 1.2030418109741518e-05, "loss": 0.376, "step": 19660 }, { "epoch": 0.4350198540716516, "grad_norm": 1.9274533987045288, "learning_rate": 1.2027015463670887e-05, "loss": 0.4045, "step": 19665 }, { "epoch": 0.4351304617131649, "grad_norm": 1.4580262899398804, "learning_rate": 1.2023612572838767e-05, "loss": 0.2682, "step": 19670 }, { "epoch": 0.43524106935467816, "grad_norm": 1.1376934051513672, "learning_rate": 1.2020209437656063e-05, "loss": 0.5248, "step": 19675 }, { "epoch": 0.43535167699619143, "grad_norm": 1.4024049043655396, "learning_rate": 1.2016806058533698e-05, "loss": 0.4766, "step": 19680 }, { "epoch": 0.43546228463770464, "grad_norm": 1.046596646308899, "learning_rate": 1.2013402435882636e-05, "loss": 0.2541, "step": 19685 }, { "epoch": 0.4355728922792179, "grad_norm": 0.9078878164291382, "learning_rate": 1.2009998570113853e-05, "loss": 0.407, "step": 19690 }, { "epoch": 0.4356834999207312, "grad_norm": 0.7157816886901855, "learning_rate": 1.200659446163837e-05, "loss": 0.4059, "step": 19695 }, { "epoch": 0.43579410756224446, "grad_norm": 1.6878513097763062, "learning_rate": 1.2003190110867236e-05, "loss": 0.5019, "step": 19700 }, { "epoch": 0.43590471520375773, "grad_norm": 1.4810974597930908, "learning_rate": 1.1999785518211515e-05, "loss": 0.2971, "step": 19705 }, { "epoch": 0.43601532284527095, "grad_norm": 1.3933351039886475, "learning_rate": 1.199638068408232e-05, "loss": 0.428, "step": 19710 }, { "epoch": 0.4361259304867842, "grad_norm": 1.1432666778564453, "learning_rate": 1.1992975608890783e-05, "loss": 0.3017, "step": 19715 }, { "epoch": 0.4362365381282975, "grad_norm": 0.794632077217102, "learning_rate": 1.1989570293048059e-05, "loss": 0.355, "step": 19720 }, { "epoch": 0.43634714576981076, "grad_norm": 1.4099551439285278, "learning_rate": 1.1986164736965343e-05, "loss": 0.3366, "step": 19725 }, { "epoch": 0.43645775341132403, "grad_norm": 1.0038650035858154, "learning_rate": 1.1982758941053858e-05, "loss": 0.3389, "step": 19730 }, { "epoch": 0.43656836105283725, "grad_norm": 1.9217500686645508, "learning_rate": 1.1979352905724847e-05, "loss": 0.3301, "step": 19735 }, { "epoch": 0.4366789686943505, "grad_norm": 1.1096901893615723, "learning_rate": 1.197594663138959e-05, "loss": 0.4635, "step": 19740 }, { "epoch": 0.4367895763358638, "grad_norm": 1.6608973741531372, "learning_rate": 1.1972540118459393e-05, "loss": 0.4157, "step": 19745 }, { "epoch": 0.43690018397737707, "grad_norm": 1.724755883216858, "learning_rate": 1.1969133367345596e-05, "loss": 0.4334, "step": 19750 }, { "epoch": 0.43701079161889034, "grad_norm": 1.3186641931533813, "learning_rate": 1.1965726378459555e-05, "loss": 0.3579, "step": 19755 }, { "epoch": 0.43712139926040355, "grad_norm": 1.2183761596679688, "learning_rate": 1.1962319152212669e-05, "loss": 0.4992, "step": 19760 }, { "epoch": 0.4372320069019168, "grad_norm": 1.3859293460845947, "learning_rate": 1.1958911689016359e-05, "loss": 0.3771, "step": 19765 }, { "epoch": 0.4373426145434301, "grad_norm": 2.5749258995056152, "learning_rate": 1.195550398928207e-05, "loss": 0.4587, "step": 19770 }, { "epoch": 0.43745322218494337, "grad_norm": 1.367352843284607, "learning_rate": 1.195209605342129e-05, "loss": 0.3368, "step": 19775 }, { "epoch": 0.4375638298264566, "grad_norm": 1.5986994504928589, "learning_rate": 1.1948687881845518e-05, "loss": 0.3502, "step": 19780 }, { "epoch": 0.43767443746796986, "grad_norm": 1.286909818649292, "learning_rate": 1.1945279474966291e-05, "loss": 0.403, "step": 19785 }, { "epoch": 0.43778504510948313, "grad_norm": 1.1296457052230835, "learning_rate": 1.1941870833195175e-05, "loss": 0.3318, "step": 19790 }, { "epoch": 0.4378956527509964, "grad_norm": 1.1922353506088257, "learning_rate": 1.193846195694376e-05, "loss": 0.3187, "step": 19795 }, { "epoch": 0.43800626039250967, "grad_norm": 1.1324005126953125, "learning_rate": 1.1935052846623673e-05, "loss": 0.4136, "step": 19800 }, { "epoch": 0.4381168680340229, "grad_norm": 1.4829680919647217, "learning_rate": 1.1931643502646555e-05, "loss": 0.4965, "step": 19805 }, { "epoch": 0.43822747567553616, "grad_norm": 1.3153407573699951, "learning_rate": 1.1928233925424089e-05, "loss": 0.3497, "step": 19810 }, { "epoch": 0.43833808331704943, "grad_norm": 1.0868781805038452, "learning_rate": 1.1924824115367978e-05, "loss": 0.4916, "step": 19815 }, { "epoch": 0.4384486909585627, "grad_norm": 1.104351282119751, "learning_rate": 1.1921414072889954e-05, "loss": 0.2501, "step": 19820 }, { "epoch": 0.438559298600076, "grad_norm": 0.9816493988037109, "learning_rate": 1.191800379840178e-05, "loss": 0.307, "step": 19825 }, { "epoch": 0.4386699062415892, "grad_norm": 1.4869261980056763, "learning_rate": 1.1914593292315248e-05, "loss": 0.3107, "step": 19830 }, { "epoch": 0.43878051388310246, "grad_norm": 1.2035752534866333, "learning_rate": 1.1911182555042174e-05, "loss": 0.4024, "step": 19835 }, { "epoch": 0.43889112152461573, "grad_norm": 1.6155033111572266, "learning_rate": 1.1907771586994399e-05, "loss": 0.3276, "step": 19840 }, { "epoch": 0.439001729166129, "grad_norm": 1.2942030429840088, "learning_rate": 1.1904360388583803e-05, "loss": 0.1935, "step": 19845 }, { "epoch": 0.4391123368076423, "grad_norm": 1.0510886907577515, "learning_rate": 1.1900948960222283e-05, "loss": 0.3368, "step": 19850 }, { "epoch": 0.4392229444491555, "grad_norm": 1.085273265838623, "learning_rate": 1.1897537302321768e-05, "loss": 0.3683, "step": 19855 }, { "epoch": 0.43933355209066877, "grad_norm": 1.8757655620574951, "learning_rate": 1.1894125415294218e-05, "loss": 0.3691, "step": 19860 }, { "epoch": 0.43944415973218204, "grad_norm": 0.7841835618019104, "learning_rate": 1.1890713299551618e-05, "loss": 0.2927, "step": 19865 }, { "epoch": 0.4395547673736953, "grad_norm": 1.0527445077896118, "learning_rate": 1.1887300955505969e-05, "loss": 0.3172, "step": 19870 }, { "epoch": 0.4396653750152085, "grad_norm": 1.2305959463119507, "learning_rate": 1.1883888383569324e-05, "loss": 0.3168, "step": 19875 }, { "epoch": 0.4397759826567218, "grad_norm": 1.5897501707077026, "learning_rate": 1.1880475584153745e-05, "loss": 0.4011, "step": 19880 }, { "epoch": 0.43988659029823507, "grad_norm": 0.7117530107498169, "learning_rate": 1.1877062557671323e-05, "loss": 0.2271, "step": 19885 }, { "epoch": 0.43999719793974834, "grad_norm": 1.2494069337844849, "learning_rate": 1.1873649304534184e-05, "loss": 0.2958, "step": 19890 }, { "epoch": 0.4401078055812616, "grad_norm": 1.9465571641921997, "learning_rate": 1.1870235825154479e-05, "loss": 0.3949, "step": 19895 }, { "epoch": 0.44021841322277483, "grad_norm": 1.4699082374572754, "learning_rate": 1.1866822119944378e-05, "loss": 0.4087, "step": 19900 }, { "epoch": 0.4403290208642881, "grad_norm": 1.7699018716812134, "learning_rate": 1.1863408189316088e-05, "loss": 0.2475, "step": 19905 }, { "epoch": 0.4404396285058014, "grad_norm": 1.2527519464492798, "learning_rate": 1.1859994033681844e-05, "loss": 0.4747, "step": 19910 }, { "epoch": 0.44055023614731464, "grad_norm": 1.3330793380737305, "learning_rate": 1.18565796534539e-05, "loss": 0.4563, "step": 19915 }, { "epoch": 0.4406608437888279, "grad_norm": 1.0910282135009766, "learning_rate": 1.1853165049044539e-05, "loss": 0.4099, "step": 19920 }, { "epoch": 0.44077145143034113, "grad_norm": 1.651720404624939, "learning_rate": 1.1849750220866078e-05, "loss": 0.3605, "step": 19925 }, { "epoch": 0.4408820590718544, "grad_norm": 1.0378241539001465, "learning_rate": 1.1846335169330857e-05, "loss": 0.3731, "step": 19930 }, { "epoch": 0.4409926667133677, "grad_norm": 1.507830023765564, "learning_rate": 1.1842919894851239e-05, "loss": 0.3016, "step": 19935 }, { "epoch": 0.44110327435488095, "grad_norm": 0.6773275136947632, "learning_rate": 1.1839504397839619e-05, "loss": 0.3816, "step": 19940 }, { "epoch": 0.44121388199639416, "grad_norm": 0.4568295478820801, "learning_rate": 1.1836088678708418e-05, "loss": 0.3766, "step": 19945 }, { "epoch": 0.44132448963790744, "grad_norm": 1.4113775491714478, "learning_rate": 1.1832672737870081e-05, "loss": 0.3851, "step": 19950 }, { "epoch": 0.4414350972794207, "grad_norm": 1.0247646570205688, "learning_rate": 1.182925657573708e-05, "loss": 0.2981, "step": 19955 }, { "epoch": 0.441545704920934, "grad_norm": 1.4460198879241943, "learning_rate": 1.182584019272192e-05, "loss": 0.3727, "step": 19960 }, { "epoch": 0.44165631256244725, "grad_norm": 1.7860431671142578, "learning_rate": 1.1822423589237132e-05, "loss": 0.452, "step": 19965 }, { "epoch": 0.44176692020396047, "grad_norm": 0.9353137612342834, "learning_rate": 1.1819006765695256e-05, "loss": 0.3483, "step": 19970 }, { "epoch": 0.44187752784547374, "grad_norm": 1.5064069032669067, "learning_rate": 1.1815589722508883e-05, "loss": 0.3573, "step": 19975 }, { "epoch": 0.441988135486987, "grad_norm": 0.9424939751625061, "learning_rate": 1.1812172460090621e-05, "loss": 0.2014, "step": 19980 }, { "epoch": 0.4420987431285003, "grad_norm": 0.7788439393043518, "learning_rate": 1.1808754978853094e-05, "loss": 0.3327, "step": 19985 }, { "epoch": 0.44220935077001355, "grad_norm": 2.6661770343780518, "learning_rate": 1.1805337279208973e-05, "loss": 0.3436, "step": 19990 }, { "epoch": 0.44231995841152677, "grad_norm": 1.6384389400482178, "learning_rate": 1.180191936157094e-05, "loss": 0.3924, "step": 19995 }, { "epoch": 0.44243056605304004, "grad_norm": 1.7247498035430908, "learning_rate": 1.1798501226351701e-05, "loss": 0.346, "step": 20000 }, { "epoch": 0.4425411736945533, "grad_norm": 2.2855899333953857, "learning_rate": 1.1795082873964001e-05, "loss": 0.3884, "step": 20005 }, { "epoch": 0.4426517813360666, "grad_norm": 1.1316620111465454, "learning_rate": 1.1791664304820604e-05, "loss": 0.2926, "step": 20010 }, { "epoch": 0.44276238897757986, "grad_norm": 1.954460620880127, "learning_rate": 1.1788245519334304e-05, "loss": 0.4376, "step": 20015 }, { "epoch": 0.4428729966190931, "grad_norm": 2.485849618911743, "learning_rate": 1.1784826517917913e-05, "loss": 0.3128, "step": 20020 }, { "epoch": 0.44298360426060635, "grad_norm": 1.575088620185852, "learning_rate": 1.1781407300984277e-05, "loss": 0.3935, "step": 20025 }, { "epoch": 0.4430942119021196, "grad_norm": 1.1232407093048096, "learning_rate": 1.177798786894627e-05, "loss": 0.5173, "step": 20030 }, { "epoch": 0.4432048195436329, "grad_norm": 1.5293587446212769, "learning_rate": 1.1774568222216778e-05, "loss": 0.42, "step": 20035 }, { "epoch": 0.4433154271851461, "grad_norm": 1.658264398574829, "learning_rate": 1.1771148361208726e-05, "loss": 0.3876, "step": 20040 }, { "epoch": 0.4434260348266594, "grad_norm": 1.322920322418213, "learning_rate": 1.1767728286335067e-05, "loss": 0.4411, "step": 20045 }, { "epoch": 0.44353664246817265, "grad_norm": 1.444276213645935, "learning_rate": 1.1764307998008767e-05, "loss": 0.3649, "step": 20050 }, { "epoch": 0.4436472501096859, "grad_norm": 1.519026279449463, "learning_rate": 1.176088749664282e-05, "loss": 0.4094, "step": 20055 }, { "epoch": 0.4437578577511992, "grad_norm": 0.892779529094696, "learning_rate": 1.1757466782650263e-05, "loss": 0.2503, "step": 20060 }, { "epoch": 0.4438684653927124, "grad_norm": 1.3540724515914917, "learning_rate": 1.1754045856444143e-05, "loss": 0.2226, "step": 20065 }, { "epoch": 0.4439790730342257, "grad_norm": 0.9391475319862366, "learning_rate": 1.175062471843753e-05, "loss": 0.1442, "step": 20070 }, { "epoch": 0.44408968067573895, "grad_norm": 1.2910488843917847, "learning_rate": 1.1747203369043527e-05, "loss": 0.3805, "step": 20075 }, { "epoch": 0.4442002883172522, "grad_norm": 2.3763980865478516, "learning_rate": 1.1743781808675266e-05, "loss": 0.3621, "step": 20080 }, { "epoch": 0.4443108959587655, "grad_norm": 1.2273002862930298, "learning_rate": 1.1740360037745893e-05, "loss": 0.3811, "step": 20085 }, { "epoch": 0.4444215036002787, "grad_norm": 0.7371603846549988, "learning_rate": 1.1736938056668588e-05, "loss": 0.2145, "step": 20090 }, { "epoch": 0.444532111241792, "grad_norm": 1.0815540552139282, "learning_rate": 1.1733515865856557e-05, "loss": 0.2151, "step": 20095 }, { "epoch": 0.44464271888330525, "grad_norm": 1.3160040378570557, "learning_rate": 1.1730093465723025e-05, "loss": 0.4814, "step": 20100 }, { "epoch": 0.4447533265248185, "grad_norm": 1.542919635772705, "learning_rate": 1.1726670856681246e-05, "loss": 0.2868, "step": 20105 }, { "epoch": 0.4448639341663318, "grad_norm": 1.6807786226272583, "learning_rate": 1.17232480391445e-05, "loss": 0.5454, "step": 20110 }, { "epoch": 0.444974541807845, "grad_norm": 0.8359502553939819, "learning_rate": 1.1719825013526094e-05, "loss": 0.3625, "step": 20115 }, { "epoch": 0.4450851494493583, "grad_norm": 1.2554800510406494, "learning_rate": 1.1716401780239349e-05, "loss": 0.271, "step": 20120 }, { "epoch": 0.44519575709087156, "grad_norm": 1.5051734447479248, "learning_rate": 1.1712978339697626e-05, "loss": 0.2643, "step": 20125 }, { "epoch": 0.44530636473238483, "grad_norm": 1.017870306968689, "learning_rate": 1.1709554692314309e-05, "loss": 0.4021, "step": 20130 }, { "epoch": 0.44541697237389805, "grad_norm": 0.8587832450866699, "learning_rate": 1.170613083850279e-05, "loss": 0.2475, "step": 20135 }, { "epoch": 0.4455275800154113, "grad_norm": 1.261328935623169, "learning_rate": 1.1702706778676504e-05, "loss": 0.4262, "step": 20140 }, { "epoch": 0.4456381876569246, "grad_norm": 0.9294367432594299, "learning_rate": 1.169928251324891e-05, "loss": 0.3112, "step": 20145 }, { "epoch": 0.44574879529843786, "grad_norm": 2.0882058143615723, "learning_rate": 1.1695858042633478e-05, "loss": 0.5383, "step": 20150 }, { "epoch": 0.44585940293995113, "grad_norm": 1.283618688583374, "learning_rate": 1.1692433367243722e-05, "loss": 0.3768, "step": 20155 }, { "epoch": 0.44597001058146435, "grad_norm": 1.2865792512893677, "learning_rate": 1.1689008487493163e-05, "loss": 0.3656, "step": 20160 }, { "epoch": 0.4460806182229776, "grad_norm": 1.2560279369354248, "learning_rate": 1.1685583403795355e-05, "loss": 0.3675, "step": 20165 }, { "epoch": 0.4461912258644909, "grad_norm": 1.6932481527328491, "learning_rate": 1.1682158116563875e-05, "loss": 0.4075, "step": 20170 }, { "epoch": 0.44630183350600416, "grad_norm": 1.7070293426513672, "learning_rate": 1.167873262621233e-05, "loss": 0.336, "step": 20175 }, { "epoch": 0.44641244114751744, "grad_norm": 1.322261095046997, "learning_rate": 1.1675306933154348e-05, "loss": 0.3615, "step": 20180 }, { "epoch": 0.44652304878903065, "grad_norm": 0.8189114928245544, "learning_rate": 1.1671881037803569e-05, "loss": 0.2225, "step": 20185 }, { "epoch": 0.4466336564305439, "grad_norm": 1.1639255285263062, "learning_rate": 1.166845494057368e-05, "loss": 0.3629, "step": 20190 }, { "epoch": 0.4467442640720572, "grad_norm": 1.2118276357650757, "learning_rate": 1.1665028641878378e-05, "loss": 0.3637, "step": 20195 }, { "epoch": 0.44685487171357047, "grad_norm": 1.770713448524475, "learning_rate": 1.1661602142131386e-05, "loss": 0.3807, "step": 20200 }, { "epoch": 0.44696547935508374, "grad_norm": 1.101391315460205, "learning_rate": 1.1658175441746453e-05, "loss": 0.2962, "step": 20205 }, { "epoch": 0.44707608699659696, "grad_norm": 1.2867802381515503, "learning_rate": 1.1654748541137356e-05, "loss": 0.3453, "step": 20210 }, { "epoch": 0.4471866946381102, "grad_norm": 1.492274284362793, "learning_rate": 1.1651321440717887e-05, "loss": 0.3536, "step": 20215 }, { "epoch": 0.4472973022796235, "grad_norm": 0.9864513874053955, "learning_rate": 1.1647894140901867e-05, "loss": 0.3934, "step": 20220 }, { "epoch": 0.44740790992113677, "grad_norm": 1.200791358947754, "learning_rate": 1.1644466642103145e-05, "loss": 0.3585, "step": 20225 }, { "epoch": 0.44751851756265, "grad_norm": 1.515763521194458, "learning_rate": 1.1641038944735592e-05, "loss": 0.4024, "step": 20230 }, { "epoch": 0.44762912520416326, "grad_norm": 1.217124104499817, "learning_rate": 1.1637611049213096e-05, "loss": 0.2644, "step": 20235 }, { "epoch": 0.44773973284567653, "grad_norm": 0.7721331119537354, "learning_rate": 1.1634182955949575e-05, "loss": 0.3174, "step": 20240 }, { "epoch": 0.4478503404871898, "grad_norm": 0.9951835870742798, "learning_rate": 1.1630754665358976e-05, "loss": 0.2897, "step": 20245 }, { "epoch": 0.4479609481287031, "grad_norm": 1.0051727294921875, "learning_rate": 1.1627326177855256e-05, "loss": 0.3988, "step": 20250 }, { "epoch": 0.4480715557702163, "grad_norm": 1.8629246950149536, "learning_rate": 1.1623897493852412e-05, "loss": 0.5742, "step": 20255 }, { "epoch": 0.44818216341172956, "grad_norm": 1.0348793268203735, "learning_rate": 1.162046861376445e-05, "loss": 0.3594, "step": 20260 }, { "epoch": 0.44829277105324283, "grad_norm": 1.5118532180786133, "learning_rate": 1.161703953800541e-05, "loss": 0.4968, "step": 20265 }, { "epoch": 0.4484033786947561, "grad_norm": 1.107062578201294, "learning_rate": 1.1613610266989346e-05, "loss": 0.2914, "step": 20270 }, { "epoch": 0.4485139863362694, "grad_norm": 1.5339826345443726, "learning_rate": 1.1610180801130351e-05, "loss": 0.2907, "step": 20275 }, { "epoch": 0.4486245939777826, "grad_norm": 1.0990873575210571, "learning_rate": 1.1606751140842526e-05, "loss": 0.3383, "step": 20280 }, { "epoch": 0.44873520161929586, "grad_norm": 1.4358147382736206, "learning_rate": 1.1603321286539996e-05, "loss": 0.4069, "step": 20285 }, { "epoch": 0.44884580926080914, "grad_norm": 1.575076699256897, "learning_rate": 1.1599891238636926e-05, "loss": 0.4257, "step": 20290 }, { "epoch": 0.4489564169023224, "grad_norm": 0.6808692812919617, "learning_rate": 1.1596460997547489e-05, "loss": 0.2992, "step": 20295 }, { "epoch": 0.4490670245438356, "grad_norm": 1.1532038450241089, "learning_rate": 1.1593030563685884e-05, "loss": 0.5028, "step": 20300 }, { "epoch": 0.4491776321853489, "grad_norm": 1.387302279472351, "learning_rate": 1.1589599937466334e-05, "loss": 0.2802, "step": 20305 }, { "epoch": 0.44928823982686217, "grad_norm": 1.3214210271835327, "learning_rate": 1.1586169119303091e-05, "loss": 0.2656, "step": 20310 }, { "epoch": 0.44939884746837544, "grad_norm": 0.9283217191696167, "learning_rate": 1.1582738109610416e-05, "loss": 0.3428, "step": 20315 }, { "epoch": 0.4495094551098887, "grad_norm": 0.7992318868637085, "learning_rate": 1.157930690880261e-05, "loss": 0.276, "step": 20320 }, { "epoch": 0.4496200627514019, "grad_norm": 1.6554220914840698, "learning_rate": 1.157587551729399e-05, "loss": 0.3236, "step": 20325 }, { "epoch": 0.4497306703929152, "grad_norm": 1.3470587730407715, "learning_rate": 1.1572443935498893e-05, "loss": 0.4794, "step": 20330 }, { "epoch": 0.44984127803442847, "grad_norm": 1.2289427518844604, "learning_rate": 1.156901216383168e-05, "loss": 0.3572, "step": 20335 }, { "epoch": 0.44995188567594174, "grad_norm": 0.852251410484314, "learning_rate": 1.1565580202706736e-05, "loss": 0.5143, "step": 20340 }, { "epoch": 0.450062493317455, "grad_norm": 1.201866865158081, "learning_rate": 1.1562148052538476e-05, "loss": 0.3623, "step": 20345 }, { "epoch": 0.45017310095896823, "grad_norm": 0.9492722749710083, "learning_rate": 1.155871571374132e-05, "loss": 0.2709, "step": 20350 }, { "epoch": 0.4502837086004815, "grad_norm": 1.5869464874267578, "learning_rate": 1.155528318672973e-05, "loss": 0.4167, "step": 20355 }, { "epoch": 0.4503943162419948, "grad_norm": 1.11073637008667, "learning_rate": 1.1551850471918184e-05, "loss": 0.4254, "step": 20360 }, { "epoch": 0.45050492388350805, "grad_norm": 2.1027517318725586, "learning_rate": 1.1548417569721173e-05, "loss": 0.3236, "step": 20365 }, { "epoch": 0.4506155315250213, "grad_norm": 1.2928574085235596, "learning_rate": 1.1544984480553224e-05, "loss": 0.3862, "step": 20370 }, { "epoch": 0.45072613916653453, "grad_norm": 2.5573058128356934, "learning_rate": 1.1541551204828886e-05, "loss": 0.4146, "step": 20375 }, { "epoch": 0.4508367468080478, "grad_norm": 1.3091154098510742, "learning_rate": 1.153811774296272e-05, "loss": 0.358, "step": 20380 }, { "epoch": 0.4509473544495611, "grad_norm": 1.5800482034683228, "learning_rate": 1.1534684095369311e-05, "loss": 0.2354, "step": 20385 }, { "epoch": 0.45105796209107435, "grad_norm": 1.4806175231933594, "learning_rate": 1.153125026246328e-05, "loss": 0.4012, "step": 20390 }, { "epoch": 0.45116856973258757, "grad_norm": 1.474134087562561, "learning_rate": 1.152781624465926e-05, "loss": 0.3491, "step": 20395 }, { "epoch": 0.45127917737410084, "grad_norm": 1.3403135538101196, "learning_rate": 1.15243820423719e-05, "loss": 0.3945, "step": 20400 }, { "epoch": 0.4513897850156141, "grad_norm": 1.6350120306015015, "learning_rate": 1.1520947656015886e-05, "loss": 0.3253, "step": 20405 }, { "epoch": 0.4515003926571274, "grad_norm": 1.5926055908203125, "learning_rate": 1.151751308600592e-05, "loss": 0.3545, "step": 20410 }, { "epoch": 0.45161100029864065, "grad_norm": 1.5719201564788818, "learning_rate": 1.1514078332756717e-05, "loss": 0.268, "step": 20415 }, { "epoch": 0.45172160794015387, "grad_norm": 1.0602433681488037, "learning_rate": 1.151064339668303e-05, "loss": 0.3053, "step": 20420 }, { "epoch": 0.45183221558166714, "grad_norm": 1.0288382768630981, "learning_rate": 1.1507208278199629e-05, "loss": 0.3807, "step": 20425 }, { "epoch": 0.4519428232231804, "grad_norm": 1.6093578338623047, "learning_rate": 1.1503772977721293e-05, "loss": 0.4238, "step": 20430 }, { "epoch": 0.4520534308646937, "grad_norm": 1.3977136611938477, "learning_rate": 1.1500337495662839e-05, "loss": 0.4143, "step": 20435 }, { "epoch": 0.45216403850620696, "grad_norm": 1.015252709388733, "learning_rate": 1.1496901832439104e-05, "loss": 0.2192, "step": 20440 }, { "epoch": 0.45227464614772017, "grad_norm": 1.0935893058776855, "learning_rate": 1.1493465988464937e-05, "loss": 0.2973, "step": 20445 }, { "epoch": 0.45238525378923344, "grad_norm": 0.7642233371734619, "learning_rate": 1.1490029964155218e-05, "loss": 0.3551, "step": 20450 }, { "epoch": 0.4524958614307467, "grad_norm": 1.4293674230575562, "learning_rate": 1.1486593759924847e-05, "loss": 0.3189, "step": 20455 }, { "epoch": 0.45260646907226, "grad_norm": 1.6629277467727661, "learning_rate": 1.1483157376188744e-05, "loss": 0.4409, "step": 20460 }, { "epoch": 0.45271707671377326, "grad_norm": 1.2596487998962402, "learning_rate": 1.1479720813361847e-05, "loss": 0.3055, "step": 20465 }, { "epoch": 0.4528276843552865, "grad_norm": 1.148688793182373, "learning_rate": 1.1476284071859128e-05, "loss": 0.3275, "step": 20470 }, { "epoch": 0.45293829199679975, "grad_norm": 1.8559743165969849, "learning_rate": 1.1472847152095567e-05, "loss": 0.2427, "step": 20475 }, { "epoch": 0.453048899638313, "grad_norm": 1.0791921615600586, "learning_rate": 1.1469410054486172e-05, "loss": 0.35, "step": 20480 }, { "epoch": 0.4531595072798263, "grad_norm": 1.119803547859192, "learning_rate": 1.1465972779445966e-05, "loss": 0.3322, "step": 20485 }, { "epoch": 0.4532701149213395, "grad_norm": 1.2226650714874268, "learning_rate": 1.1462535327390011e-05, "loss": 0.2871, "step": 20490 }, { "epoch": 0.4533807225628528, "grad_norm": 1.1389788389205933, "learning_rate": 1.145909769873337e-05, "loss": 0.3581, "step": 20495 }, { "epoch": 0.45349133020436605, "grad_norm": 1.0364431142807007, "learning_rate": 1.1455659893891136e-05, "loss": 0.3379, "step": 20500 }, { "epoch": 0.4536019378458793, "grad_norm": 1.2649487257003784, "learning_rate": 1.1452221913278423e-05, "loss": 0.3453, "step": 20505 }, { "epoch": 0.4537125454873926, "grad_norm": 0.8406450152397156, "learning_rate": 1.144878375731037e-05, "loss": 0.4072, "step": 20510 }, { "epoch": 0.4538231531289058, "grad_norm": 1.5037500858306885, "learning_rate": 1.1445345426402129e-05, "loss": 0.419, "step": 20515 }, { "epoch": 0.4539337607704191, "grad_norm": 1.2963961362838745, "learning_rate": 1.1441906920968877e-05, "loss": 0.4622, "step": 20520 }, { "epoch": 0.45404436841193235, "grad_norm": 0.9048462510108948, "learning_rate": 1.1438468241425818e-05, "loss": 0.4288, "step": 20525 }, { "epoch": 0.4541549760534456, "grad_norm": 1.0850707292556763, "learning_rate": 1.1435029388188163e-05, "loss": 0.3262, "step": 20530 }, { "epoch": 0.4542655836949589, "grad_norm": 1.1755043268203735, "learning_rate": 1.1431590361671161e-05, "loss": 0.3695, "step": 20535 }, { "epoch": 0.4543761913364721, "grad_norm": 1.1697312593460083, "learning_rate": 1.1428151162290069e-05, "loss": 0.4619, "step": 20540 }, { "epoch": 0.4544867989779854, "grad_norm": 2.0028576850891113, "learning_rate": 1.142471179046017e-05, "loss": 0.5778, "step": 20545 }, { "epoch": 0.45459740661949866, "grad_norm": 1.305633306503296, "learning_rate": 1.1421272246596762e-05, "loss": 0.4229, "step": 20550 }, { "epoch": 0.45470801426101193, "grad_norm": 1.771004557609558, "learning_rate": 1.1417832531115178e-05, "loss": 0.2996, "step": 20555 }, { "epoch": 0.4548186219025252, "grad_norm": 1.5165960788726807, "learning_rate": 1.141439264443076e-05, "loss": 0.4236, "step": 20560 }, { "epoch": 0.4549292295440384, "grad_norm": 1.4999345541000366, "learning_rate": 1.1410952586958867e-05, "loss": 0.4958, "step": 20565 }, { "epoch": 0.4550398371855517, "grad_norm": 1.4829214811325073, "learning_rate": 1.1407512359114892e-05, "loss": 0.6291, "step": 20570 }, { "epoch": 0.45515044482706496, "grad_norm": 0.860802948474884, "learning_rate": 1.140407196131424e-05, "loss": 0.517, "step": 20575 }, { "epoch": 0.45526105246857823, "grad_norm": 1.6503509283065796, "learning_rate": 1.1400631393972334e-05, "loss": 0.458, "step": 20580 }, { "epoch": 0.45537166011009145, "grad_norm": 1.6792314052581787, "learning_rate": 1.1397190657504623e-05, "loss": 0.3531, "step": 20585 }, { "epoch": 0.4554822677516047, "grad_norm": 2.091240644454956, "learning_rate": 1.1393749752326581e-05, "loss": 0.3815, "step": 20590 }, { "epoch": 0.455592875393118, "grad_norm": 2.628059148788452, "learning_rate": 1.139030867885369e-05, "loss": 0.4093, "step": 20595 }, { "epoch": 0.45570348303463126, "grad_norm": 1.1762651205062866, "learning_rate": 1.1386867437501456e-05, "loss": 0.5163, "step": 20600 }, { "epoch": 0.45581409067614453, "grad_norm": 1.6067475080490112, "learning_rate": 1.1383426028685416e-05, "loss": 0.2559, "step": 20605 }, { "epoch": 0.45592469831765775, "grad_norm": 1.2084143161773682, "learning_rate": 1.1379984452821114e-05, "loss": 0.5298, "step": 20610 }, { "epoch": 0.456035305959171, "grad_norm": 1.7350382804870605, "learning_rate": 1.1376542710324117e-05, "loss": 0.4803, "step": 20615 }, { "epoch": 0.4561459136006843, "grad_norm": 1.172811508178711, "learning_rate": 1.1373100801610017e-05, "loss": 0.39, "step": 20620 }, { "epoch": 0.45625652124219757, "grad_norm": 1.1811856031417847, "learning_rate": 1.1369658727094431e-05, "loss": 0.3812, "step": 20625 }, { "epoch": 0.45636712888371084, "grad_norm": 1.0030009746551514, "learning_rate": 1.1366216487192972e-05, "loss": 0.3544, "step": 20630 }, { "epoch": 0.45647773652522405, "grad_norm": 0.8289363980293274, "learning_rate": 1.1362774082321302e-05, "loss": 0.3033, "step": 20635 }, { "epoch": 0.4565883441667373, "grad_norm": 2.620087146759033, "learning_rate": 1.1359331512895092e-05, "loss": 0.3162, "step": 20640 }, { "epoch": 0.4566989518082506, "grad_norm": 1.7577729225158691, "learning_rate": 1.1355888779330018e-05, "loss": 0.4536, "step": 20645 }, { "epoch": 0.45680955944976387, "grad_norm": 1.586221694946289, "learning_rate": 1.1352445882041798e-05, "loss": 0.3145, "step": 20650 }, { "epoch": 0.4569201670912771, "grad_norm": 1.4771896600723267, "learning_rate": 1.1349002821446164e-05, "loss": 0.3848, "step": 20655 }, { "epoch": 0.45703077473279036, "grad_norm": 0.75051349401474, "learning_rate": 1.1345559597958857e-05, "loss": 0.433, "step": 20660 }, { "epoch": 0.45714138237430363, "grad_norm": 1.8643039464950562, "learning_rate": 1.1342116211995648e-05, "loss": 0.3858, "step": 20665 }, { "epoch": 0.4572519900158169, "grad_norm": 0.9719410538673401, "learning_rate": 1.1338672663972328e-05, "loss": 0.3675, "step": 20670 }, { "epoch": 0.45736259765733017, "grad_norm": 1.5651917457580566, "learning_rate": 1.13352289543047e-05, "loss": 0.4642, "step": 20675 }, { "epoch": 0.4574732052988434, "grad_norm": 1.664474606513977, "learning_rate": 1.133178508340859e-05, "loss": 0.4399, "step": 20680 }, { "epoch": 0.45758381294035666, "grad_norm": 1.2243058681488037, "learning_rate": 1.1328341051699852e-05, "loss": 0.5101, "step": 20685 }, { "epoch": 0.45769442058186993, "grad_norm": 1.2065712213516235, "learning_rate": 1.1324896859594344e-05, "loss": 0.3008, "step": 20690 }, { "epoch": 0.4578050282233832, "grad_norm": 1.5615761280059814, "learning_rate": 1.1321452507507953e-05, "loss": 0.2929, "step": 20695 }, { "epoch": 0.4579156358648965, "grad_norm": 1.74229896068573, "learning_rate": 1.1318007995856583e-05, "loss": 0.2902, "step": 20700 }, { "epoch": 0.4580262435064097, "grad_norm": 0.9859062433242798, "learning_rate": 1.131456332505616e-05, "loss": 0.3677, "step": 20705 }, { "epoch": 0.45813685114792296, "grad_norm": 0.8809463381767273, "learning_rate": 1.1311118495522626e-05, "loss": 0.3737, "step": 20710 }, { "epoch": 0.45824745878943624, "grad_norm": 1.3435890674591064, "learning_rate": 1.1307673507671941e-05, "loss": 0.5034, "step": 20715 }, { "epoch": 0.4583580664309495, "grad_norm": 1.1851511001586914, "learning_rate": 1.130422836192009e-05, "loss": 0.3477, "step": 20720 }, { "epoch": 0.4584686740724628, "grad_norm": 1.4060440063476562, "learning_rate": 1.1300783058683073e-05, "loss": 0.3645, "step": 20725 }, { "epoch": 0.458579281713976, "grad_norm": 1.619185447692871, "learning_rate": 1.1297337598376902e-05, "loss": 0.3239, "step": 20730 }, { "epoch": 0.45868988935548927, "grad_norm": 1.1304689645767212, "learning_rate": 1.1293891981417624e-05, "loss": 0.3646, "step": 20735 }, { "epoch": 0.45880049699700254, "grad_norm": 1.38199782371521, "learning_rate": 1.1290446208221295e-05, "loss": 0.4771, "step": 20740 }, { "epoch": 0.4589111046385158, "grad_norm": 1.2268449068069458, "learning_rate": 1.1287000279203986e-05, "loss": 0.3315, "step": 20745 }, { "epoch": 0.459021712280029, "grad_norm": 1.2769156694412231, "learning_rate": 1.1283554194781797e-05, "loss": 0.3076, "step": 20750 }, { "epoch": 0.4591323199215423, "grad_norm": 0.8869127035140991, "learning_rate": 1.1280107955370838e-05, "loss": 0.3308, "step": 20755 }, { "epoch": 0.45924292756305557, "grad_norm": 0.9675303101539612, "learning_rate": 1.1276661561387244e-05, "loss": 0.3795, "step": 20760 }, { "epoch": 0.45935353520456884, "grad_norm": 1.5916236639022827, "learning_rate": 1.1273215013247163e-05, "loss": 0.265, "step": 20765 }, { "epoch": 0.4594641428460821, "grad_norm": 0.82509446144104, "learning_rate": 1.1269768311366766e-05, "loss": 0.3585, "step": 20770 }, { "epoch": 0.45957475048759533, "grad_norm": 1.1544899940490723, "learning_rate": 1.1266321456162247e-05, "loss": 0.4625, "step": 20775 }, { "epoch": 0.4596853581291086, "grad_norm": 0.8736791610717773, "learning_rate": 1.12628744480498e-05, "loss": 0.3133, "step": 20780 }, { "epoch": 0.4597959657706219, "grad_norm": 0.8538786172866821, "learning_rate": 1.1259427287445658e-05, "loss": 0.2547, "step": 20785 }, { "epoch": 0.45990657341213514, "grad_norm": 0.8125515580177307, "learning_rate": 1.1255979974766071e-05, "loss": 0.3892, "step": 20790 }, { "epoch": 0.4600171810536484, "grad_norm": 1.5499783754348755, "learning_rate": 1.1252532510427287e-05, "loss": 0.3307, "step": 20795 }, { "epoch": 0.46012778869516163, "grad_norm": 1.3563756942749023, "learning_rate": 1.1249084894845597e-05, "loss": 0.2653, "step": 20800 }, { "epoch": 0.4602383963366749, "grad_norm": 1.848827600479126, "learning_rate": 1.1245637128437297e-05, "loss": 0.3071, "step": 20805 }, { "epoch": 0.4603490039781882, "grad_norm": 1.0011563301086426, "learning_rate": 1.1242189211618699e-05, "loss": 0.2109, "step": 20810 }, { "epoch": 0.46045961161970145, "grad_norm": 1.1488044261932373, "learning_rate": 1.123874114480614e-05, "loss": 0.3983, "step": 20815 }, { "epoch": 0.4605702192612147, "grad_norm": 1.223855972290039, "learning_rate": 1.1235292928415977e-05, "loss": 0.4039, "step": 20820 }, { "epoch": 0.46068082690272794, "grad_norm": 1.0819953680038452, "learning_rate": 1.123184456286458e-05, "loss": 0.3658, "step": 20825 }, { "epoch": 0.4607914345442412, "grad_norm": 1.336761474609375, "learning_rate": 1.122839604856833e-05, "loss": 0.3664, "step": 20830 }, { "epoch": 0.4609020421857545, "grad_norm": 1.391581654548645, "learning_rate": 1.1224947385943646e-05, "loss": 0.3505, "step": 20835 }, { "epoch": 0.46101264982726775, "grad_norm": 1.024085521697998, "learning_rate": 1.1221498575406949e-05, "loss": 0.2378, "step": 20840 }, { "epoch": 0.46112325746878097, "grad_norm": 1.013016939163208, "learning_rate": 1.1218049617374674e-05, "loss": 0.4095, "step": 20845 }, { "epoch": 0.46123386511029424, "grad_norm": 1.713674545288086, "learning_rate": 1.121460051226329e-05, "loss": 0.3891, "step": 20850 }, { "epoch": 0.4613444727518075, "grad_norm": 1.084812879562378, "learning_rate": 1.1211151260489277e-05, "loss": 0.3346, "step": 20855 }, { "epoch": 0.4614550803933208, "grad_norm": 1.8124972581863403, "learning_rate": 1.1207701862469125e-05, "loss": 0.3945, "step": 20860 }, { "epoch": 0.46156568803483405, "grad_norm": 0.9089289307594299, "learning_rate": 1.1204252318619347e-05, "loss": 0.4188, "step": 20865 }, { "epoch": 0.46167629567634727, "grad_norm": 0.7042198181152344, "learning_rate": 1.120080262935648e-05, "loss": 0.366, "step": 20870 }, { "epoch": 0.46178690331786054, "grad_norm": 1.236413836479187, "learning_rate": 1.1197352795097069e-05, "loss": 0.2806, "step": 20875 }, { "epoch": 0.4618975109593738, "grad_norm": 1.5303196907043457, "learning_rate": 1.1193902816257682e-05, "loss": 0.3394, "step": 20880 }, { "epoch": 0.4620081186008871, "grad_norm": 0.7639788389205933, "learning_rate": 1.1190452693254903e-05, "loss": 0.3688, "step": 20885 }, { "epoch": 0.46211872624240036, "grad_norm": 1.3917498588562012, "learning_rate": 1.1187002426505333e-05, "loss": 0.452, "step": 20890 }, { "epoch": 0.4622293338839136, "grad_norm": 1.4837844371795654, "learning_rate": 1.1183552016425588e-05, "loss": 0.4196, "step": 20895 }, { "epoch": 0.46233994152542685, "grad_norm": 2.1526389122009277, "learning_rate": 1.118010146343231e-05, "loss": 0.2451, "step": 20900 }, { "epoch": 0.4624505491669401, "grad_norm": 1.1124844551086426, "learning_rate": 1.1176650767942148e-05, "loss": 0.3649, "step": 20905 }, { "epoch": 0.4625611568084534, "grad_norm": 0.818028450012207, "learning_rate": 1.1173199930371769e-05, "loss": 0.3091, "step": 20910 }, { "epoch": 0.46267176444996666, "grad_norm": 1.4283114671707153, "learning_rate": 1.1169748951137866e-05, "loss": 0.3642, "step": 20915 }, { "epoch": 0.4627823720914799, "grad_norm": 0.5447675585746765, "learning_rate": 1.1166297830657146e-05, "loss": 0.2045, "step": 20920 }, { "epoch": 0.46289297973299315, "grad_norm": 1.3286418914794922, "learning_rate": 1.1162846569346325e-05, "loss": 0.3856, "step": 20925 }, { "epoch": 0.4630035873745064, "grad_norm": 2.1755011081695557, "learning_rate": 1.115939516762214e-05, "loss": 0.4138, "step": 20930 }, { "epoch": 0.4631141950160197, "grad_norm": 0.9904136657714844, "learning_rate": 1.1155943625901351e-05, "loss": 0.3345, "step": 20935 }, { "epoch": 0.4632248026575329, "grad_norm": 0.8553004264831543, "learning_rate": 1.1152491944600734e-05, "loss": 0.2406, "step": 20940 }, { "epoch": 0.4633354102990462, "grad_norm": 0.8864454030990601, "learning_rate": 1.1149040124137072e-05, "loss": 0.3778, "step": 20945 }, { "epoch": 0.46344601794055945, "grad_norm": 0.9569950699806213, "learning_rate": 1.1145588164927172e-05, "loss": 0.2673, "step": 20950 }, { "epoch": 0.4635566255820727, "grad_norm": 1.023227334022522, "learning_rate": 1.1142136067387862e-05, "loss": 0.3682, "step": 20955 }, { "epoch": 0.463667233223586, "grad_norm": 1.6734954118728638, "learning_rate": 1.1138683831935975e-05, "loss": 0.288, "step": 20960 }, { "epoch": 0.4637778408650992, "grad_norm": 1.090619444847107, "learning_rate": 1.1135231458988372e-05, "loss": 0.2689, "step": 20965 }, { "epoch": 0.4638884485066125, "grad_norm": 1.1837520599365234, "learning_rate": 1.1131778948961926e-05, "loss": 0.2491, "step": 20970 }, { "epoch": 0.46399905614812575, "grad_norm": 1.3796272277832031, "learning_rate": 1.1128326302273524e-05, "loss": 0.5043, "step": 20975 }, { "epoch": 0.464109663789639, "grad_norm": 1.1085485219955444, "learning_rate": 1.112487351934007e-05, "loss": 0.4207, "step": 20980 }, { "epoch": 0.4642202714311523, "grad_norm": 1.5896289348602295, "learning_rate": 1.1121420600578494e-05, "loss": 0.5263, "step": 20985 }, { "epoch": 0.4643308790726655, "grad_norm": 1.4823778867721558, "learning_rate": 1.1117967546405729e-05, "loss": 0.3234, "step": 20990 }, { "epoch": 0.4644414867141788, "grad_norm": 1.3792667388916016, "learning_rate": 1.1114514357238729e-05, "loss": 0.4248, "step": 20995 }, { "epoch": 0.46455209435569206, "grad_norm": 1.2167112827301025, "learning_rate": 1.111106103349447e-05, "loss": 0.2708, "step": 21000 }, { "epoch": 0.46466270199720533, "grad_norm": 1.2067264318466187, "learning_rate": 1.1107607575589941e-05, "loss": 0.3815, "step": 21005 }, { "epoch": 0.46477330963871855, "grad_norm": 1.5734552145004272, "learning_rate": 1.1104153983942136e-05, "loss": 0.4769, "step": 21010 }, { "epoch": 0.4648839172802318, "grad_norm": 1.2870432138442993, "learning_rate": 1.1100700258968086e-05, "loss": 0.3495, "step": 21015 }, { "epoch": 0.4649945249217451, "grad_norm": 1.6505204439163208, "learning_rate": 1.1097246401084827e-05, "loss": 0.3982, "step": 21020 }, { "epoch": 0.46510513256325836, "grad_norm": 1.2078660726547241, "learning_rate": 1.1093792410709403e-05, "loss": 0.3384, "step": 21025 }, { "epoch": 0.46521574020477163, "grad_norm": 1.3966091871261597, "learning_rate": 1.1090338288258883e-05, "loss": 0.2833, "step": 21030 }, { "epoch": 0.46532634784628485, "grad_norm": 1.1180732250213623, "learning_rate": 1.1086884034150363e-05, "loss": 0.3794, "step": 21035 }, { "epoch": 0.4654369554877981, "grad_norm": 1.0351855754852295, "learning_rate": 1.108342964880093e-05, "loss": 0.3353, "step": 21040 }, { "epoch": 0.4655475631293114, "grad_norm": 1.1921617984771729, "learning_rate": 1.1079975132627705e-05, "loss": 0.2712, "step": 21045 }, { "epoch": 0.46565817077082466, "grad_norm": 1.0640487670898438, "learning_rate": 1.107652048604782e-05, "loss": 0.4085, "step": 21050 }, { "epoch": 0.46576877841233794, "grad_norm": 1.0113245248794556, "learning_rate": 1.1073065709478425e-05, "loss": 0.4283, "step": 21055 }, { "epoch": 0.46587938605385115, "grad_norm": 1.3298726081848145, "learning_rate": 1.1069610803336676e-05, "loss": 0.3361, "step": 21060 }, { "epoch": 0.4659899936953644, "grad_norm": 1.5983307361602783, "learning_rate": 1.1066155768039759e-05, "loss": 0.2822, "step": 21065 }, { "epoch": 0.4661006013368777, "grad_norm": 1.608391523361206, "learning_rate": 1.1062700604004868e-05, "loss": 0.3565, "step": 21070 }, { "epoch": 0.46621120897839097, "grad_norm": 0.6570141315460205, "learning_rate": 1.1059245311649207e-05, "loss": 0.4333, "step": 21075 }, { "epoch": 0.46632181661990424, "grad_norm": 1.384387731552124, "learning_rate": 1.1055789891390004e-05, "loss": 0.4012, "step": 21080 }, { "epoch": 0.46643242426141746, "grad_norm": 1.2072312831878662, "learning_rate": 1.1052334343644506e-05, "loss": 0.311, "step": 21085 }, { "epoch": 0.4665430319029307, "grad_norm": 1.537024736404419, "learning_rate": 1.1048878668829961e-05, "loss": 0.4249, "step": 21090 }, { "epoch": 0.466653639544444, "grad_norm": 1.4624428749084473, "learning_rate": 1.1045422867363642e-05, "loss": 0.3753, "step": 21095 }, { "epoch": 0.46676424718595727, "grad_norm": 1.3045048713684082, "learning_rate": 1.104196693966284e-05, "loss": 0.4114, "step": 21100 }, { "epoch": 0.4668748548274705, "grad_norm": 1.1534837484359741, "learning_rate": 1.103851088614486e-05, "loss": 0.4182, "step": 21105 }, { "epoch": 0.46698546246898376, "grad_norm": 1.326043963432312, "learning_rate": 1.1035054707227006e-05, "loss": 0.3793, "step": 21110 }, { "epoch": 0.46709607011049703, "grad_norm": 1.077608346939087, "learning_rate": 1.1031598403326624e-05, "loss": 0.2838, "step": 21115 }, { "epoch": 0.4672066777520103, "grad_norm": 1.4885801076889038, "learning_rate": 1.1028141974861057e-05, "loss": 0.4475, "step": 21120 }, { "epoch": 0.4673172853935236, "grad_norm": 1.3847779035568237, "learning_rate": 1.1024685422247668e-05, "loss": 0.4372, "step": 21125 }, { "epoch": 0.4674278930350368, "grad_norm": 0.848900556564331, "learning_rate": 1.1021228745903829e-05, "loss": 0.3199, "step": 21130 }, { "epoch": 0.46753850067655006, "grad_norm": 1.443092703819275, "learning_rate": 1.1017771946246942e-05, "loss": 0.343, "step": 21135 }, { "epoch": 0.46764910831806333, "grad_norm": 1.7514272928237915, "learning_rate": 1.101431502369441e-05, "loss": 0.3589, "step": 21140 }, { "epoch": 0.4677597159595766, "grad_norm": 1.4047893285751343, "learning_rate": 1.1010857978663652e-05, "loss": 0.3325, "step": 21145 }, { "epoch": 0.4678703236010899, "grad_norm": 1.1307846307754517, "learning_rate": 1.100740081157211e-05, "loss": 0.3887, "step": 21150 }, { "epoch": 0.4679809312426031, "grad_norm": 1.1151899099349976, "learning_rate": 1.1003943522837237e-05, "loss": 0.3077, "step": 21155 }, { "epoch": 0.46809153888411636, "grad_norm": 1.3163033723831177, "learning_rate": 1.1000486112876494e-05, "loss": 0.3961, "step": 21160 }, { "epoch": 0.46820214652562964, "grad_norm": 1.7225984334945679, "learning_rate": 1.0997028582107365e-05, "loss": 0.318, "step": 21165 }, { "epoch": 0.4683127541671429, "grad_norm": 1.1730445623397827, "learning_rate": 1.099357093094735e-05, "loss": 0.2972, "step": 21170 }, { "epoch": 0.4684233618086562, "grad_norm": 1.3722056150436401, "learning_rate": 1.0990113159813949e-05, "loss": 0.2281, "step": 21175 }, { "epoch": 0.4685339694501694, "grad_norm": 1.7057973146438599, "learning_rate": 1.0986655269124695e-05, "loss": 0.299, "step": 21180 }, { "epoch": 0.46864457709168267, "grad_norm": 1.2332202196121216, "learning_rate": 1.098319725929713e-05, "loss": 0.2413, "step": 21185 }, { "epoch": 0.46875518473319594, "grad_norm": 1.183211326599121, "learning_rate": 1.09797391307488e-05, "loss": 0.4362, "step": 21190 }, { "epoch": 0.4688657923747092, "grad_norm": 1.7207289934158325, "learning_rate": 1.097628088389727e-05, "loss": 0.4235, "step": 21195 }, { "epoch": 0.46897640001622243, "grad_norm": 0.7728297710418701, "learning_rate": 1.0972822519160134e-05, "loss": 0.316, "step": 21200 }, { "epoch": 0.4690870076577357, "grad_norm": 1.0162231922149658, "learning_rate": 1.0969364036954982e-05, "loss": 0.5022, "step": 21205 }, { "epoch": 0.46919761529924897, "grad_norm": 1.389289140701294, "learning_rate": 1.096590543769942e-05, "loss": 0.3041, "step": 21210 }, { "epoch": 0.46930822294076224, "grad_norm": 1.1717939376831055, "learning_rate": 1.0962446721811082e-05, "loss": 0.2445, "step": 21215 }, { "epoch": 0.4694188305822755, "grad_norm": 0.9882546067237854, "learning_rate": 1.0958987889707605e-05, "loss": 0.3632, "step": 21220 }, { "epoch": 0.46952943822378873, "grad_norm": 1.065106987953186, "learning_rate": 1.0955528941806632e-05, "loss": 0.4223, "step": 21225 }, { "epoch": 0.469640045865302, "grad_norm": 1.0638480186462402, "learning_rate": 1.0952069878525842e-05, "loss": 0.3785, "step": 21230 }, { "epoch": 0.4697506535068153, "grad_norm": 1.0210802555084229, "learning_rate": 1.0948610700282913e-05, "loss": 0.2545, "step": 21235 }, { "epoch": 0.46986126114832855, "grad_norm": 1.7314939498901367, "learning_rate": 1.0945151407495537e-05, "loss": 0.2721, "step": 21240 }, { "epoch": 0.4699718687898418, "grad_norm": 0.6433147192001343, "learning_rate": 1.094169200058142e-05, "loss": 0.2764, "step": 21245 }, { "epoch": 0.47008247643135503, "grad_norm": 1.8213781118392944, "learning_rate": 1.0938232479958293e-05, "loss": 0.4328, "step": 21250 }, { "epoch": 0.4701930840728683, "grad_norm": 1.329233169555664, "learning_rate": 1.0934772846043885e-05, "loss": 0.3518, "step": 21255 }, { "epoch": 0.4703036917143816, "grad_norm": 2.6271109580993652, "learning_rate": 1.0931313099255943e-05, "loss": 0.4531, "step": 21260 }, { "epoch": 0.47041429935589485, "grad_norm": 1.271625280380249, "learning_rate": 1.0927853240012238e-05, "loss": 0.2635, "step": 21265 }, { "epoch": 0.4705249069974081, "grad_norm": 0.28707221150398254, "learning_rate": 1.0924393268730546e-05, "loss": 0.3158, "step": 21270 }, { "epoch": 0.47063551463892134, "grad_norm": 1.5858479738235474, "learning_rate": 1.0920933185828654e-05, "loss": 0.492, "step": 21275 }, { "epoch": 0.4707461222804346, "grad_norm": 1.1723920106887817, "learning_rate": 1.0917472991724366e-05, "loss": 0.3355, "step": 21280 }, { "epoch": 0.4708567299219479, "grad_norm": 1.3067772388458252, "learning_rate": 1.0914012686835502e-05, "loss": 0.4204, "step": 21285 }, { "epoch": 0.47096733756346115, "grad_norm": 1.5633957386016846, "learning_rate": 1.091055227157989e-05, "loss": 0.2806, "step": 21290 }, { "epoch": 0.47107794520497437, "grad_norm": 1.4381266832351685, "learning_rate": 1.0907091746375373e-05, "loss": 0.3789, "step": 21295 }, { "epoch": 0.47118855284648764, "grad_norm": 4.967668533325195, "learning_rate": 1.0903631111639816e-05, "loss": 0.2995, "step": 21300 }, { "epoch": 0.4712991604880009, "grad_norm": 3.1057605743408203, "learning_rate": 1.0900170367791083e-05, "loss": 0.309, "step": 21305 }, { "epoch": 0.4714097681295142, "grad_norm": 1.5065925121307373, "learning_rate": 1.0896709515247055e-05, "loss": 0.3874, "step": 21310 }, { "epoch": 0.47152037577102746, "grad_norm": 1.0453373193740845, "learning_rate": 1.0893248554425635e-05, "loss": 0.3985, "step": 21315 }, { "epoch": 0.47163098341254067, "grad_norm": 1.1781054735183716, "learning_rate": 1.0889787485744733e-05, "loss": 0.4079, "step": 21320 }, { "epoch": 0.47174159105405394, "grad_norm": 1.2374048233032227, "learning_rate": 1.0886326309622266e-05, "loss": 0.2749, "step": 21325 }, { "epoch": 0.4718521986955672, "grad_norm": 1.8694437742233276, "learning_rate": 1.0882865026476174e-05, "loss": 0.3837, "step": 21330 }, { "epoch": 0.4719628063370805, "grad_norm": 1.3044273853302002, "learning_rate": 1.087940363672441e-05, "loss": 0.2932, "step": 21335 }, { "epoch": 0.47207341397859376, "grad_norm": 0.9814428687095642, "learning_rate": 1.087594214078493e-05, "loss": 0.3698, "step": 21340 }, { "epoch": 0.472184021620107, "grad_norm": 1.0644444227218628, "learning_rate": 1.0872480539075705e-05, "loss": 0.333, "step": 21345 }, { "epoch": 0.47229462926162025, "grad_norm": 1.134513258934021, "learning_rate": 1.0869018832014733e-05, "loss": 0.3002, "step": 21350 }, { "epoch": 0.4724052369031335, "grad_norm": 0.9248881340026855, "learning_rate": 1.0865557020020005e-05, "loss": 0.4439, "step": 21355 }, { "epoch": 0.4725158445446468, "grad_norm": 1.2377229928970337, "learning_rate": 1.0862095103509535e-05, "loss": 0.4269, "step": 21360 }, { "epoch": 0.47262645218616006, "grad_norm": 1.668731451034546, "learning_rate": 1.0858633082901356e-05, "loss": 0.3525, "step": 21365 }, { "epoch": 0.4727370598276733, "grad_norm": 0.7488930821418762, "learning_rate": 1.0855170958613494e-05, "loss": 0.2785, "step": 21370 }, { "epoch": 0.47284766746918655, "grad_norm": 1.3238911628723145, "learning_rate": 1.0851708731064008e-05, "loss": 0.2529, "step": 21375 }, { "epoch": 0.4729582751106998, "grad_norm": 1.7083194255828857, "learning_rate": 1.0848246400670958e-05, "loss": 0.305, "step": 21380 }, { "epoch": 0.4730688827522131, "grad_norm": 0.6801090240478516, "learning_rate": 1.0844783967852424e-05, "loss": 0.2926, "step": 21385 }, { "epoch": 0.4731794903937263, "grad_norm": 1.2107343673706055, "learning_rate": 1.0841321433026483e-05, "loss": 0.4181, "step": 21390 }, { "epoch": 0.4732900980352396, "grad_norm": 1.2161026000976562, "learning_rate": 1.0837858796611244e-05, "loss": 0.3782, "step": 21395 }, { "epoch": 0.47340070567675285, "grad_norm": 1.1003862619400024, "learning_rate": 1.0834396059024822e-05, "loss": 0.3096, "step": 21400 }, { "epoch": 0.4735113133182661, "grad_norm": 1.1046801805496216, "learning_rate": 1.0830933220685329e-05, "loss": 0.4262, "step": 21405 }, { "epoch": 0.4736219209597794, "grad_norm": 1.3392176628112793, "learning_rate": 1.082747028201091e-05, "loss": 0.4415, "step": 21410 }, { "epoch": 0.4737325286012926, "grad_norm": 0.9827756285667419, "learning_rate": 1.0824007243419716e-05, "loss": 0.3218, "step": 21415 }, { "epoch": 0.4738431362428059, "grad_norm": 2.079777956008911, "learning_rate": 1.0820544105329902e-05, "loss": 0.2658, "step": 21420 }, { "epoch": 0.47395374388431916, "grad_norm": 1.5127307176589966, "learning_rate": 1.0817080868159642e-05, "loss": 0.3869, "step": 21425 }, { "epoch": 0.47406435152583243, "grad_norm": 1.2895363569259644, "learning_rate": 1.0813617532327125e-05, "loss": 0.3587, "step": 21430 }, { "epoch": 0.4741749591673457, "grad_norm": 1.6134796142578125, "learning_rate": 1.0810154098250544e-05, "loss": 0.3558, "step": 21435 }, { "epoch": 0.4742855668088589, "grad_norm": 1.4038230180740356, "learning_rate": 1.0806690566348108e-05, "loss": 0.4574, "step": 21440 }, { "epoch": 0.4743961744503722, "grad_norm": 1.4737613201141357, "learning_rate": 1.0803226937038038e-05, "loss": 0.4198, "step": 21445 }, { "epoch": 0.47450678209188546, "grad_norm": 1.1881818771362305, "learning_rate": 1.0799763210738567e-05, "loss": 0.2479, "step": 21450 }, { "epoch": 0.47461738973339873, "grad_norm": 2.928239107131958, "learning_rate": 1.0796299387867938e-05, "loss": 0.4937, "step": 21455 }, { "epoch": 0.47472799737491195, "grad_norm": 1.236106276512146, "learning_rate": 1.0792835468844402e-05, "loss": 0.5341, "step": 21460 }, { "epoch": 0.4748386050164252, "grad_norm": 1.3637698888778687, "learning_rate": 1.0789371454086238e-05, "loss": 0.3396, "step": 21465 }, { "epoch": 0.4749492126579385, "grad_norm": 1.5164378881454468, "learning_rate": 1.0785907344011712e-05, "loss": 0.4011, "step": 21470 }, { "epoch": 0.47505982029945176, "grad_norm": 1.229306936264038, "learning_rate": 1.0782443139039118e-05, "loss": 0.27, "step": 21475 }, { "epoch": 0.47517042794096503, "grad_norm": 0.8432195782661438, "learning_rate": 1.077897883958676e-05, "loss": 0.4715, "step": 21480 }, { "epoch": 0.47528103558247825, "grad_norm": 2.4154281616210938, "learning_rate": 1.0775514446072955e-05, "loss": 0.2296, "step": 21485 }, { "epoch": 0.4753916432239915, "grad_norm": 1.2342877388000488, "learning_rate": 1.0772049958916018e-05, "loss": 0.445, "step": 21490 }, { "epoch": 0.4755022508655048, "grad_norm": 1.6949387788772583, "learning_rate": 1.0768585378534289e-05, "loss": 0.2838, "step": 21495 }, { "epoch": 0.47561285850701807, "grad_norm": 1.0877522230148315, "learning_rate": 1.0765120705346119e-05, "loss": 0.3849, "step": 21500 }, { "epoch": 0.47572346614853134, "grad_norm": 1.4409562349319458, "learning_rate": 1.0761655939769862e-05, "loss": 0.3594, "step": 21505 }, { "epoch": 0.47583407379004455, "grad_norm": 0.9452928900718689, "learning_rate": 1.0758191082223884e-05, "loss": 0.4186, "step": 21510 }, { "epoch": 0.4759446814315578, "grad_norm": 2.0129611492156982, "learning_rate": 1.0754726133126574e-05, "loss": 0.4022, "step": 21515 }, { "epoch": 0.4760552890730711, "grad_norm": 0.9536619782447815, "learning_rate": 1.0751261092896317e-05, "loss": 0.4118, "step": 21520 }, { "epoch": 0.47616589671458437, "grad_norm": 1.0545258522033691, "learning_rate": 1.0747795961951515e-05, "loss": 0.3056, "step": 21525 }, { "epoch": 0.47627650435609764, "grad_norm": 1.2197405099868774, "learning_rate": 1.0744330740710585e-05, "loss": 0.3102, "step": 21530 }, { "epoch": 0.47638711199761086, "grad_norm": 1.0677387714385986, "learning_rate": 1.0740865429591953e-05, "loss": 0.4995, "step": 21535 }, { "epoch": 0.47649771963912413, "grad_norm": 1.0117958784103394, "learning_rate": 1.073740002901405e-05, "loss": 0.4082, "step": 21540 }, { "epoch": 0.4766083272806374, "grad_norm": 1.1808348894119263, "learning_rate": 1.0733934539395323e-05, "loss": 0.3345, "step": 21545 }, { "epoch": 0.47671893492215067, "grad_norm": 0.8583787679672241, "learning_rate": 1.0730468961154232e-05, "loss": 0.3425, "step": 21550 }, { "epoch": 0.4768295425636639, "grad_norm": 1.4967929124832153, "learning_rate": 1.072700329470924e-05, "loss": 0.293, "step": 21555 }, { "epoch": 0.47694015020517716, "grad_norm": 0.8213487267494202, "learning_rate": 1.0723537540478823e-05, "loss": 0.334, "step": 21560 }, { "epoch": 0.47705075784669043, "grad_norm": 1.2957849502563477, "learning_rate": 1.072007169888148e-05, "loss": 0.3893, "step": 21565 }, { "epoch": 0.4771613654882037, "grad_norm": 1.15937077999115, "learning_rate": 1.0716605770335702e-05, "loss": 0.4038, "step": 21570 }, { "epoch": 0.477271973129717, "grad_norm": 1.1460542678833008, "learning_rate": 1.0713139755259998e-05, "loss": 0.3439, "step": 21575 }, { "epoch": 0.4773825807712302, "grad_norm": 0.692026674747467, "learning_rate": 1.0709673654072897e-05, "loss": 0.4125, "step": 21580 }, { "epoch": 0.47749318841274346, "grad_norm": 2.7777812480926514, "learning_rate": 1.070620746719292e-05, "loss": 0.3834, "step": 21585 }, { "epoch": 0.47760379605425674, "grad_norm": 1.0002632141113281, "learning_rate": 1.070274119503861e-05, "loss": 0.4239, "step": 21590 }, { "epoch": 0.47771440369577, "grad_norm": 2.2760815620422363, "learning_rate": 1.0699274838028521e-05, "loss": 0.3628, "step": 21595 }, { "epoch": 0.4778250113372833, "grad_norm": 1.2665481567382812, "learning_rate": 1.0695808396581218e-05, "loss": 0.4108, "step": 21600 }, { "epoch": 0.4779356189787965, "grad_norm": 1.3223849534988403, "learning_rate": 1.0692341871115261e-05, "loss": 0.3736, "step": 21605 }, { "epoch": 0.47804622662030977, "grad_norm": 1.26531982421875, "learning_rate": 1.0688875262049243e-05, "loss": 0.3414, "step": 21610 }, { "epoch": 0.47815683426182304, "grad_norm": 2.42340087890625, "learning_rate": 1.0685408569801753e-05, "loss": 0.5128, "step": 21615 }, { "epoch": 0.4782674419033363, "grad_norm": 1.393159031867981, "learning_rate": 1.0681941794791393e-05, "loss": 0.3676, "step": 21620 }, { "epoch": 0.4783780495448496, "grad_norm": 1.5554287433624268, "learning_rate": 1.0678474937436769e-05, "loss": 0.1852, "step": 21625 }, { "epoch": 0.4784886571863628, "grad_norm": 1.4866054058074951, "learning_rate": 1.0675007998156513e-05, "loss": 0.4324, "step": 21630 }, { "epoch": 0.47859926482787607, "grad_norm": 1.120201587677002, "learning_rate": 1.067154097736925e-05, "loss": 0.409, "step": 21635 }, { "epoch": 0.47870987246938934, "grad_norm": 0.9419758915901184, "learning_rate": 1.066807387549362e-05, "loss": 0.3022, "step": 21640 }, { "epoch": 0.4788204801109026, "grad_norm": 1.717018723487854, "learning_rate": 1.0664606692948284e-05, "loss": 0.3988, "step": 21645 }, { "epoch": 0.47893108775241583, "grad_norm": 1.4541491270065308, "learning_rate": 1.0661139430151895e-05, "loss": 0.3462, "step": 21650 }, { "epoch": 0.4790416953939291, "grad_norm": 1.15022873878479, "learning_rate": 1.0657672087523123e-05, "loss": 0.3449, "step": 21655 }, { "epoch": 0.4791523030354424, "grad_norm": 1.3755199909210205, "learning_rate": 1.0654204665480654e-05, "loss": 0.4097, "step": 21660 }, { "epoch": 0.47926291067695564, "grad_norm": 1.0682154893875122, "learning_rate": 1.0650737164443179e-05, "loss": 0.2435, "step": 21665 }, { "epoch": 0.4793735183184689, "grad_norm": 1.4680681228637695, "learning_rate": 1.064726958482939e-05, "loss": 0.3794, "step": 21670 }, { "epoch": 0.47948412595998213, "grad_norm": 0.44641414284706116, "learning_rate": 1.0643801927058e-05, "loss": 0.3201, "step": 21675 }, { "epoch": 0.4795947336014954, "grad_norm": 1.5272003412246704, "learning_rate": 1.0640334191547733e-05, "loss": 0.3028, "step": 21680 }, { "epoch": 0.4797053412430087, "grad_norm": 0.6887720823287964, "learning_rate": 1.0636866378717308e-05, "loss": 0.2587, "step": 21685 }, { "epoch": 0.47981594888452195, "grad_norm": 1.3069559335708618, "learning_rate": 1.0633398488985466e-05, "loss": 0.3854, "step": 21690 }, { "epoch": 0.4799265565260352, "grad_norm": 1.3796643018722534, "learning_rate": 1.0629930522770958e-05, "loss": 0.4175, "step": 21695 }, { "epoch": 0.48003716416754844, "grad_norm": 1.7605595588684082, "learning_rate": 1.0626462480492534e-05, "loss": 0.4643, "step": 21700 }, { "epoch": 0.4801477718090617, "grad_norm": 1.6046273708343506, "learning_rate": 1.062299436256896e-05, "loss": 0.3626, "step": 21705 }, { "epoch": 0.480258379450575, "grad_norm": 1.3127118349075317, "learning_rate": 1.061952616941901e-05, "loss": 0.3516, "step": 21710 }, { "epoch": 0.48036898709208825, "grad_norm": 1.1167452335357666, "learning_rate": 1.0616057901461474e-05, "loss": 0.3815, "step": 21715 }, { "epoch": 0.4804795947336015, "grad_norm": 1.0872924327850342, "learning_rate": 1.0612589559115134e-05, "loss": 0.2938, "step": 21720 }, { "epoch": 0.48059020237511474, "grad_norm": 1.8533532619476318, "learning_rate": 1.0609121142798795e-05, "loss": 0.3888, "step": 21725 }, { "epoch": 0.480700810016628, "grad_norm": 1.3280020952224731, "learning_rate": 1.0605652652931271e-05, "loss": 0.3988, "step": 21730 }, { "epoch": 0.4808114176581413, "grad_norm": 1.733009934425354, "learning_rate": 1.0602184089931378e-05, "loss": 0.2518, "step": 21735 }, { "epoch": 0.48092202529965455, "grad_norm": 1.5140377283096313, "learning_rate": 1.059871545421794e-05, "loss": 0.2434, "step": 21740 }, { "epoch": 0.48103263294116777, "grad_norm": 1.312817096710205, "learning_rate": 1.0595246746209805e-05, "loss": 0.5043, "step": 21745 }, { "epoch": 0.48114324058268104, "grad_norm": 1.8300983905792236, "learning_rate": 1.0591777966325804e-05, "loss": 0.3429, "step": 21750 }, { "epoch": 0.4812538482241943, "grad_norm": 1.3124204874038696, "learning_rate": 1.05883091149848e-05, "loss": 0.3947, "step": 21755 }, { "epoch": 0.4813644558657076, "grad_norm": 1.3611586093902588, "learning_rate": 1.0584840192605655e-05, "loss": 0.3149, "step": 21760 }, { "epoch": 0.48147506350722086, "grad_norm": 0.7703959941864014, "learning_rate": 1.0581371199607242e-05, "loss": 0.4061, "step": 21765 }, { "epoch": 0.4815856711487341, "grad_norm": 1.8461856842041016, "learning_rate": 1.0577902136408433e-05, "loss": 0.4665, "step": 21770 }, { "epoch": 0.48169627879024735, "grad_norm": 1.1229465007781982, "learning_rate": 1.0574433003428122e-05, "loss": 0.2291, "step": 21775 }, { "epoch": 0.4818068864317606, "grad_norm": 1.2154065370559692, "learning_rate": 1.057096380108521e-05, "loss": 0.3957, "step": 21780 }, { "epoch": 0.4819174940732739, "grad_norm": 0.5542342662811279, "learning_rate": 1.0567494529798594e-05, "loss": 0.3772, "step": 21785 }, { "epoch": 0.48202810171478716, "grad_norm": 1.057908296585083, "learning_rate": 1.0564025189987189e-05, "loss": 0.3371, "step": 21790 }, { "epoch": 0.4821387093563004, "grad_norm": 1.3206312656402588, "learning_rate": 1.0560555782069922e-05, "loss": 0.3094, "step": 21795 }, { "epoch": 0.48224931699781365, "grad_norm": 1.7907593250274658, "learning_rate": 1.0557086306465717e-05, "loss": 0.3914, "step": 21800 }, { "epoch": 0.4823599246393269, "grad_norm": 1.3905491828918457, "learning_rate": 1.0553616763593511e-05, "loss": 0.2854, "step": 21805 }, { "epoch": 0.4824705322808402, "grad_norm": 1.2081398963928223, "learning_rate": 1.0550147153872257e-05, "loss": 0.4173, "step": 21810 }, { "epoch": 0.4825811399223534, "grad_norm": 0.9963319897651672, "learning_rate": 1.0546677477720908e-05, "loss": 0.3651, "step": 21815 }, { "epoch": 0.4826917475638667, "grad_norm": 1.1455458402633667, "learning_rate": 1.054320773555842e-05, "loss": 0.2899, "step": 21820 }, { "epoch": 0.48280235520537995, "grad_norm": 1.409708857536316, "learning_rate": 1.0539737927803767e-05, "loss": 0.3995, "step": 21825 }, { "epoch": 0.4829129628468932, "grad_norm": 1.7363582849502563, "learning_rate": 1.0536268054875933e-05, "loss": 0.3693, "step": 21830 }, { "epoch": 0.4830235704884065, "grad_norm": 0.8047738671302795, "learning_rate": 1.0532798117193896e-05, "loss": 0.2422, "step": 21835 }, { "epoch": 0.4831341781299197, "grad_norm": 0.9387385249137878, "learning_rate": 1.052932811517665e-05, "loss": 0.2504, "step": 21840 }, { "epoch": 0.483244785771433, "grad_norm": 1.2877830266952515, "learning_rate": 1.0525858049243203e-05, "loss": 0.3944, "step": 21845 }, { "epoch": 0.48335539341294625, "grad_norm": 1.114229679107666, "learning_rate": 1.0522387919812558e-05, "loss": 0.6056, "step": 21850 }, { "epoch": 0.4834660010544595, "grad_norm": 1.333931565284729, "learning_rate": 1.0518917727303734e-05, "loss": 0.327, "step": 21855 }, { "epoch": 0.4835766086959728, "grad_norm": 1.8196561336517334, "learning_rate": 1.0515447472135758e-05, "loss": 0.258, "step": 21860 }, { "epoch": 0.483687216337486, "grad_norm": 1.0542505979537964, "learning_rate": 1.0511977154727663e-05, "loss": 0.2948, "step": 21865 }, { "epoch": 0.4837978239789993, "grad_norm": 1.9847259521484375, "learning_rate": 1.0508506775498481e-05, "loss": 0.3384, "step": 21870 }, { "epoch": 0.48390843162051256, "grad_norm": 1.6378731727600098, "learning_rate": 1.050503633486727e-05, "loss": 0.2101, "step": 21875 }, { "epoch": 0.48401903926202583, "grad_norm": 1.4224162101745605, "learning_rate": 1.0501565833253079e-05, "loss": 0.3346, "step": 21880 }, { "epoch": 0.4841296469035391, "grad_norm": 0.9386888742446899, "learning_rate": 1.0498095271074966e-05, "loss": 0.3788, "step": 21885 }, { "epoch": 0.4842402545450523, "grad_norm": 0.8081566095352173, "learning_rate": 1.0494624648752003e-05, "loss": 0.2684, "step": 21890 }, { "epoch": 0.4843508621865656, "grad_norm": 0.830448567867279, "learning_rate": 1.0491153966703273e-05, "loss": 0.3362, "step": 21895 }, { "epoch": 0.48446146982807886, "grad_norm": 1.0851364135742188, "learning_rate": 1.0487683225347854e-05, "loss": 0.4173, "step": 21900 }, { "epoch": 0.48457207746959213, "grad_norm": 0.750586748123169, "learning_rate": 1.0484212425104833e-05, "loss": 0.3855, "step": 21905 }, { "epoch": 0.48468268511110535, "grad_norm": 1.12352454662323, "learning_rate": 1.048074156639332e-05, "loss": 0.3548, "step": 21910 }, { "epoch": 0.4847932927526186, "grad_norm": 1.0890370607376099, "learning_rate": 1.047727064963241e-05, "loss": 0.1559, "step": 21915 }, { "epoch": 0.4849039003941319, "grad_norm": 1.7354967594146729, "learning_rate": 1.0473799675241215e-05, "loss": 0.2292, "step": 21920 }, { "epoch": 0.48501450803564516, "grad_norm": 1.984973430633545, "learning_rate": 1.0470328643638858e-05, "loss": 0.3692, "step": 21925 }, { "epoch": 0.48512511567715844, "grad_norm": 1.2187294960021973, "learning_rate": 1.0466857555244468e-05, "loss": 0.4099, "step": 21930 }, { "epoch": 0.48523572331867165, "grad_norm": 1.740767002105713, "learning_rate": 1.0463386410477173e-05, "loss": 0.3109, "step": 21935 }, { "epoch": 0.4853463309601849, "grad_norm": 1.5955711603164673, "learning_rate": 1.0459915209756109e-05, "loss": 0.355, "step": 21940 }, { "epoch": 0.4854569386016982, "grad_norm": 1.1887205839157104, "learning_rate": 1.045644395350043e-05, "loss": 0.48, "step": 21945 }, { "epoch": 0.48556754624321147, "grad_norm": 1.3914155960083008, "learning_rate": 1.0452972642129287e-05, "loss": 0.3092, "step": 21950 }, { "epoch": 0.48567815388472474, "grad_norm": 1.1871694326400757, "learning_rate": 1.0449501276061836e-05, "loss": 0.3324, "step": 21955 }, { "epoch": 0.48578876152623796, "grad_norm": 0.9218567609786987, "learning_rate": 1.0446029855717248e-05, "loss": 0.3255, "step": 21960 }, { "epoch": 0.4858993691677512, "grad_norm": 1.1008877754211426, "learning_rate": 1.0442558381514694e-05, "loss": 0.3532, "step": 21965 }, { "epoch": 0.4860099768092645, "grad_norm": 1.2670232057571411, "learning_rate": 1.043908685387335e-05, "loss": 0.3734, "step": 21970 }, { "epoch": 0.48612058445077777, "grad_norm": 1.7107304334640503, "learning_rate": 1.043561527321241e-05, "loss": 0.4462, "step": 21975 }, { "epoch": 0.48623119209229104, "grad_norm": 1.3266230821609497, "learning_rate": 1.0432143639951063e-05, "loss": 0.4516, "step": 21980 }, { "epoch": 0.48634179973380426, "grad_norm": 1.5275325775146484, "learning_rate": 1.0428671954508502e-05, "loss": 0.3655, "step": 21985 }, { "epoch": 0.48645240737531753, "grad_norm": 1.143560528755188, "learning_rate": 1.0425200217303937e-05, "loss": 0.3433, "step": 21990 }, { "epoch": 0.4865630150168308, "grad_norm": 1.4132541418075562, "learning_rate": 1.042172842875658e-05, "loss": 0.3392, "step": 21995 }, { "epoch": 0.4866736226583441, "grad_norm": 1.3901429176330566, "learning_rate": 1.0418256589285647e-05, "loss": 0.4187, "step": 22000 }, { "epoch": 0.4867842302998573, "grad_norm": 1.1674948930740356, "learning_rate": 1.0414784699310359e-05, "loss": 0.2743, "step": 22005 }, { "epoch": 0.48689483794137056, "grad_norm": 0.9057583808898926, "learning_rate": 1.0411312759249952e-05, "loss": 0.4182, "step": 22010 }, { "epoch": 0.48700544558288383, "grad_norm": 1.7448649406433105, "learning_rate": 1.0407840769523654e-05, "loss": 0.3247, "step": 22015 }, { "epoch": 0.4871160532243971, "grad_norm": 1.2449311017990112, "learning_rate": 1.0404368730550708e-05, "loss": 0.3405, "step": 22020 }, { "epoch": 0.4872266608659104, "grad_norm": 1.2416828870773315, "learning_rate": 1.0400896642750369e-05, "loss": 0.3629, "step": 22025 }, { "epoch": 0.4873372685074236, "grad_norm": 1.0838919878005981, "learning_rate": 1.0397424506541886e-05, "loss": 0.4275, "step": 22030 }, { "epoch": 0.48744787614893687, "grad_norm": 1.3461370468139648, "learning_rate": 1.0393952322344517e-05, "loss": 0.3427, "step": 22035 }, { "epoch": 0.48755848379045014, "grad_norm": 1.0451624393463135, "learning_rate": 1.0390480090577524e-05, "loss": 0.4739, "step": 22040 }, { "epoch": 0.4876690914319634, "grad_norm": 1.3056286573410034, "learning_rate": 1.0387007811660185e-05, "loss": 0.4135, "step": 22045 }, { "epoch": 0.4877796990734767, "grad_norm": 1.6662139892578125, "learning_rate": 1.0383535486011773e-05, "loss": 0.559, "step": 22050 }, { "epoch": 0.4878903067149899, "grad_norm": 1.7206147909164429, "learning_rate": 1.0380063114051571e-05, "loss": 0.2951, "step": 22055 }, { "epoch": 0.48800091435650317, "grad_norm": 0.4319199025630951, "learning_rate": 1.0376590696198867e-05, "loss": 0.3321, "step": 22060 }, { "epoch": 0.48811152199801644, "grad_norm": 1.605143666267395, "learning_rate": 1.0373118232872954e-05, "loss": 0.2838, "step": 22065 }, { "epoch": 0.4882221296395297, "grad_norm": 1.6403367519378662, "learning_rate": 1.0369645724493127e-05, "loss": 0.446, "step": 22070 }, { "epoch": 0.488332737281043, "grad_norm": 2.189537763595581, "learning_rate": 1.0366173171478697e-05, "loss": 0.3119, "step": 22075 }, { "epoch": 0.4884433449225562, "grad_norm": 0.7463648319244385, "learning_rate": 1.0362700574248973e-05, "loss": 0.3203, "step": 22080 }, { "epoch": 0.48855395256406947, "grad_norm": 0.7158884406089783, "learning_rate": 1.0359227933223265e-05, "loss": 0.3474, "step": 22085 }, { "epoch": 0.48866456020558274, "grad_norm": 1.3227957487106323, "learning_rate": 1.0355755248820897e-05, "loss": 0.3946, "step": 22090 }, { "epoch": 0.488775167847096, "grad_norm": 0.8179544806480408, "learning_rate": 1.0352282521461197e-05, "loss": 0.392, "step": 22095 }, { "epoch": 0.48888577548860923, "grad_norm": 1.1106524467468262, "learning_rate": 1.0348809751563489e-05, "loss": 0.3196, "step": 22100 }, { "epoch": 0.4889963831301225, "grad_norm": 0.9249630570411682, "learning_rate": 1.034533693954711e-05, "loss": 0.4095, "step": 22105 }, { "epoch": 0.4891069907716358, "grad_norm": 1.7706583738327026, "learning_rate": 1.0341864085831409e-05, "loss": 0.4247, "step": 22110 }, { "epoch": 0.48921759841314905, "grad_norm": 1.3211047649383545, "learning_rate": 1.0338391190835721e-05, "loss": 0.3472, "step": 22115 }, { "epoch": 0.4893282060546623, "grad_norm": 1.8101317882537842, "learning_rate": 1.0334918254979403e-05, "loss": 0.5339, "step": 22120 }, { "epoch": 0.48943881369617553, "grad_norm": 1.7911978960037231, "learning_rate": 1.0331445278681812e-05, "loss": 0.3232, "step": 22125 }, { "epoch": 0.4895494213376888, "grad_norm": 1.0401707887649536, "learning_rate": 1.032797226236231e-05, "loss": 0.4833, "step": 22130 }, { "epoch": 0.4896600289792021, "grad_norm": 0.8156430125236511, "learning_rate": 1.0324499206440252e-05, "loss": 0.2503, "step": 22135 }, { "epoch": 0.48977063662071535, "grad_norm": 1.685389757156372, "learning_rate": 1.0321026111335021e-05, "loss": 0.3722, "step": 22140 }, { "epoch": 0.4898812442622286, "grad_norm": 1.611361026763916, "learning_rate": 1.031755297746599e-05, "loss": 0.4647, "step": 22145 }, { "epoch": 0.48999185190374184, "grad_norm": 1.4134411811828613, "learning_rate": 1.0314079805252533e-05, "loss": 0.4101, "step": 22150 }, { "epoch": 0.4901024595452551, "grad_norm": 1.6482434272766113, "learning_rate": 1.0310606595114035e-05, "loss": 0.3427, "step": 22155 }, { "epoch": 0.4902130671867684, "grad_norm": 1.172722339630127, "learning_rate": 1.0307133347469891e-05, "loss": 0.3086, "step": 22160 }, { "epoch": 0.49032367482828165, "grad_norm": 1.1304327249526978, "learning_rate": 1.030366006273949e-05, "loss": 0.3258, "step": 22165 }, { "epoch": 0.49043428246979487, "grad_norm": 1.4564777612686157, "learning_rate": 1.0300186741342228e-05, "loss": 0.4059, "step": 22170 }, { "epoch": 0.49054489011130814, "grad_norm": 1.5205742120742798, "learning_rate": 1.0296713383697517e-05, "loss": 0.3007, "step": 22175 }, { "epoch": 0.4906554977528214, "grad_norm": 1.1451137065887451, "learning_rate": 1.0293239990224755e-05, "loss": 0.375, "step": 22180 }, { "epoch": 0.4907661053943347, "grad_norm": 1.0643244981765747, "learning_rate": 1.028976656134335e-05, "loss": 0.2381, "step": 22185 }, { "epoch": 0.49087671303584796, "grad_norm": 0.8949393630027771, "learning_rate": 1.028629309747273e-05, "loss": 0.3488, "step": 22190 }, { "epoch": 0.49098732067736117, "grad_norm": 0.7772098779678345, "learning_rate": 1.0282819599032308e-05, "loss": 0.1966, "step": 22195 }, { "epoch": 0.49109792831887444, "grad_norm": 1.3996598720550537, "learning_rate": 1.0279346066441504e-05, "loss": 0.3737, "step": 22200 }, { "epoch": 0.4912085359603877, "grad_norm": 1.2154213190078735, "learning_rate": 1.0275872500119751e-05, "loss": 0.4083, "step": 22205 }, { "epoch": 0.491319143601901, "grad_norm": 0.9345799088478088, "learning_rate": 1.0272398900486482e-05, "loss": 0.3589, "step": 22210 }, { "epoch": 0.49142975124341426, "grad_norm": 1.0408248901367188, "learning_rate": 1.0268925267961127e-05, "loss": 0.3724, "step": 22215 }, { "epoch": 0.4915403588849275, "grad_norm": 1.3814668655395508, "learning_rate": 1.026545160296313e-05, "loss": 0.2585, "step": 22220 }, { "epoch": 0.49165096652644075, "grad_norm": 1.059349775314331, "learning_rate": 1.0261977905911937e-05, "loss": 0.2915, "step": 22225 }, { "epoch": 0.491761574167954, "grad_norm": 1.6129719018936157, "learning_rate": 1.0258504177226993e-05, "loss": 0.4566, "step": 22230 }, { "epoch": 0.4918721818094673, "grad_norm": 1.1248645782470703, "learning_rate": 1.0255030417327749e-05, "loss": 0.3014, "step": 22235 }, { "epoch": 0.49198278945098056, "grad_norm": 0.9123164415359497, "learning_rate": 1.0251556626633663e-05, "loss": 0.3481, "step": 22240 }, { "epoch": 0.4920933970924938, "grad_norm": 0.9840605854988098, "learning_rate": 1.0248082805564195e-05, "loss": 0.3686, "step": 22245 }, { "epoch": 0.49220400473400705, "grad_norm": 0.7226476073265076, "learning_rate": 1.0244608954538801e-05, "loss": 0.2273, "step": 22250 }, { "epoch": 0.4923146123755203, "grad_norm": 0.9836705923080444, "learning_rate": 1.0241135073976954e-05, "loss": 0.3191, "step": 22255 }, { "epoch": 0.4924252200170336, "grad_norm": 1.1067525148391724, "learning_rate": 1.0237661164298126e-05, "loss": 0.4825, "step": 22260 }, { "epoch": 0.4925358276585468, "grad_norm": 1.5143758058547974, "learning_rate": 1.0234187225921782e-05, "loss": 0.3707, "step": 22265 }, { "epoch": 0.4926464353000601, "grad_norm": 0.6550595164299011, "learning_rate": 1.0230713259267402e-05, "loss": 0.3063, "step": 22270 }, { "epoch": 0.49275704294157335, "grad_norm": 0.9768131375312805, "learning_rate": 1.0227239264754472e-05, "loss": 0.309, "step": 22275 }, { "epoch": 0.4928676505830866, "grad_norm": 1.0865155458450317, "learning_rate": 1.022376524280247e-05, "loss": 0.3604, "step": 22280 }, { "epoch": 0.4929782582245999, "grad_norm": 1.346847414970398, "learning_rate": 1.0220291193830882e-05, "loss": 0.4067, "step": 22285 }, { "epoch": 0.4930888658661131, "grad_norm": 0.8753420114517212, "learning_rate": 1.0216817118259206e-05, "loss": 0.4074, "step": 22290 }, { "epoch": 0.4931994735076264, "grad_norm": 0.8265246748924255, "learning_rate": 1.021334301650693e-05, "loss": 0.2709, "step": 22295 }, { "epoch": 0.49331008114913966, "grad_norm": 1.1771103143692017, "learning_rate": 1.0209868888993546e-05, "loss": 0.3355, "step": 22300 }, { "epoch": 0.49342068879065293, "grad_norm": 1.5678825378417969, "learning_rate": 1.0206394736138563e-05, "loss": 0.5212, "step": 22305 }, { "epoch": 0.4935312964321662, "grad_norm": 0.9139542579650879, "learning_rate": 1.0202920558361483e-05, "loss": 0.3694, "step": 22310 }, { "epoch": 0.4936419040736794, "grad_norm": 1.2588773965835571, "learning_rate": 1.0199446356081806e-05, "loss": 0.3124, "step": 22315 }, { "epoch": 0.4937525117151927, "grad_norm": 1.2272034883499146, "learning_rate": 1.019597212971904e-05, "loss": 0.3736, "step": 22320 }, { "epoch": 0.49386311935670596, "grad_norm": 1.1760144233703613, "learning_rate": 1.0192497879692706e-05, "loss": 0.3004, "step": 22325 }, { "epoch": 0.49397372699821923, "grad_norm": 1.6186802387237549, "learning_rate": 1.0189023606422312e-05, "loss": 0.4019, "step": 22330 }, { "epoch": 0.4940843346397325, "grad_norm": 1.2554993629455566, "learning_rate": 1.0185549310327375e-05, "loss": 0.3749, "step": 22335 }, { "epoch": 0.4941949422812457, "grad_norm": 0.9331502914428711, "learning_rate": 1.0182074991827418e-05, "loss": 0.469, "step": 22340 }, { "epoch": 0.494305549922759, "grad_norm": 1.2057808637619019, "learning_rate": 1.0178600651341961e-05, "loss": 0.2975, "step": 22345 }, { "epoch": 0.49441615756427226, "grad_norm": 1.1173756122589111, "learning_rate": 1.0175126289290532e-05, "loss": 0.3125, "step": 22350 }, { "epoch": 0.49452676520578553, "grad_norm": 1.1211178302764893, "learning_rate": 1.0171651906092657e-05, "loss": 0.2244, "step": 22355 }, { "epoch": 0.49463737284729875, "grad_norm": 1.1851609945297241, "learning_rate": 1.0168177502167871e-05, "loss": 0.2999, "step": 22360 }, { "epoch": 0.494747980488812, "grad_norm": 1.0897886753082275, "learning_rate": 1.0164703077935702e-05, "loss": 0.3168, "step": 22365 }, { "epoch": 0.4948585881303253, "grad_norm": 1.173570156097412, "learning_rate": 1.0161228633815684e-05, "loss": 0.3338, "step": 22370 }, { "epoch": 0.49496919577183857, "grad_norm": 1.1679580211639404, "learning_rate": 1.0157754170227368e-05, "loss": 0.277, "step": 22375 }, { "epoch": 0.49507980341335184, "grad_norm": 1.3824834823608398, "learning_rate": 1.0154279687590279e-05, "loss": 0.4833, "step": 22380 }, { "epoch": 0.49519041105486505, "grad_norm": 1.8340339660644531, "learning_rate": 1.0150805186323965e-05, "loss": 0.6043, "step": 22385 }, { "epoch": 0.4953010186963783, "grad_norm": 1.3119133710861206, "learning_rate": 1.0147330666847974e-05, "loss": 0.3349, "step": 22390 }, { "epoch": 0.4954116263378916, "grad_norm": 1.3626950979232788, "learning_rate": 1.014385612958185e-05, "loss": 0.3465, "step": 22395 }, { "epoch": 0.49552223397940487, "grad_norm": 1.5777696371078491, "learning_rate": 1.0140381574945141e-05, "loss": 0.2996, "step": 22400 }, { "epoch": 0.49563284162091814, "grad_norm": 1.264872670173645, "learning_rate": 1.0136907003357405e-05, "loss": 0.4437, "step": 22405 }, { "epoch": 0.49574344926243136, "grad_norm": 1.3010419607162476, "learning_rate": 1.0133432415238191e-05, "loss": 0.3828, "step": 22410 }, { "epoch": 0.49585405690394463, "grad_norm": 1.4075735807418823, "learning_rate": 1.0129957811007054e-05, "loss": 0.3912, "step": 22415 }, { "epoch": 0.4959646645454579, "grad_norm": 1.3695602416992188, "learning_rate": 1.0126483191083547e-05, "loss": 0.5379, "step": 22420 }, { "epoch": 0.4960752721869712, "grad_norm": 1.1433053016662598, "learning_rate": 1.0123008555887242e-05, "loss": 0.4269, "step": 22425 }, { "epoch": 0.49618587982848444, "grad_norm": 1.2622222900390625, "learning_rate": 1.0119533905837687e-05, "loss": 0.3441, "step": 22430 }, { "epoch": 0.49629648746999766, "grad_norm": 1.5302486419677734, "learning_rate": 1.0116059241354448e-05, "loss": 0.402, "step": 22435 }, { "epoch": 0.49640709511151093, "grad_norm": 1.6431901454925537, "learning_rate": 1.0112584562857097e-05, "loss": 0.2881, "step": 22440 }, { "epoch": 0.4965177027530242, "grad_norm": 0.3905813694000244, "learning_rate": 1.0109109870765195e-05, "loss": 0.1515, "step": 22445 }, { "epoch": 0.4966283103945375, "grad_norm": 0.977918803691864, "learning_rate": 1.0105635165498306e-05, "loss": 0.4148, "step": 22450 }, { "epoch": 0.4967389180360507, "grad_norm": 0.7943298816680908, "learning_rate": 1.0102160447476009e-05, "loss": 0.3341, "step": 22455 }, { "epoch": 0.49684952567756396, "grad_norm": 1.4819633960723877, "learning_rate": 1.0098685717117867e-05, "loss": 0.3422, "step": 22460 }, { "epoch": 0.49696013331907724, "grad_norm": 1.6674273014068604, "learning_rate": 1.0095210974843455e-05, "loss": 0.3375, "step": 22465 }, { "epoch": 0.4970707409605905, "grad_norm": 1.0950068235397339, "learning_rate": 1.0091736221072345e-05, "loss": 0.3754, "step": 22470 }, { "epoch": 0.4971813486021038, "grad_norm": 1.2249586582183838, "learning_rate": 1.008826145622412e-05, "loss": 0.3434, "step": 22475 }, { "epoch": 0.497291956243617, "grad_norm": 1.8260010480880737, "learning_rate": 1.008478668071835e-05, "loss": 0.3136, "step": 22480 }, { "epoch": 0.49740256388513027, "grad_norm": 1.441975474357605, "learning_rate": 1.0081311894974612e-05, "loss": 0.4294, "step": 22485 }, { "epoch": 0.49751317152664354, "grad_norm": 1.0914393663406372, "learning_rate": 1.0077837099412493e-05, "loss": 0.3104, "step": 22490 }, { "epoch": 0.4976237791681568, "grad_norm": 1.5781360864639282, "learning_rate": 1.0074362294451565e-05, "loss": 0.3092, "step": 22495 }, { "epoch": 0.4977343868096701, "grad_norm": 1.5949690341949463, "learning_rate": 1.0070887480511413e-05, "loss": 0.4767, "step": 22500 }, { "epoch": 0.4978449944511833, "grad_norm": 0.8722736835479736, "learning_rate": 1.0067412658011622e-05, "loss": 0.2959, "step": 22505 }, { "epoch": 0.49795560209269657, "grad_norm": 0.9607784152030945, "learning_rate": 1.0063937827371773e-05, "loss": 0.449, "step": 22510 }, { "epoch": 0.49806620973420984, "grad_norm": 1.8155591487884521, "learning_rate": 1.0060462989011448e-05, "loss": 0.2997, "step": 22515 }, { "epoch": 0.4981768173757231, "grad_norm": 1.1792304515838623, "learning_rate": 1.0056988143350239e-05, "loss": 0.3015, "step": 22520 }, { "epoch": 0.4982874250172364, "grad_norm": 1.0965124368667603, "learning_rate": 1.0053513290807732e-05, "loss": 0.4074, "step": 22525 }, { "epoch": 0.4983980326587496, "grad_norm": 0.9801984429359436, "learning_rate": 1.0050038431803508e-05, "loss": 0.3756, "step": 22530 }, { "epoch": 0.4985086403002629, "grad_norm": 1.5711110830307007, "learning_rate": 1.004656356675716e-05, "loss": 0.4078, "step": 22535 }, { "epoch": 0.49861924794177614, "grad_norm": 2.256542444229126, "learning_rate": 1.0043088696088277e-05, "loss": 0.4441, "step": 22540 }, { "epoch": 0.4987298555832894, "grad_norm": 1.129949927330017, "learning_rate": 1.0039613820216445e-05, "loss": 0.3463, "step": 22545 }, { "epoch": 0.49884046322480263, "grad_norm": 1.3895167112350464, "learning_rate": 1.0036138939561257e-05, "loss": 0.2922, "step": 22550 }, { "epoch": 0.4989510708663159, "grad_norm": 1.1072652339935303, "learning_rate": 1.0032664054542304e-05, "loss": 0.2753, "step": 22555 }, { "epoch": 0.4990616785078292, "grad_norm": 1.0396666526794434, "learning_rate": 1.0029189165579178e-05, "loss": 0.2677, "step": 22560 }, { "epoch": 0.49917228614934245, "grad_norm": 2.058706045150757, "learning_rate": 1.0025714273091467e-05, "loss": 0.2953, "step": 22565 }, { "epoch": 0.4992828937908557, "grad_norm": 1.561500072479248, "learning_rate": 1.0022239377498766e-05, "loss": 0.2872, "step": 22570 }, { "epoch": 0.49939350143236894, "grad_norm": 0.9574991464614868, "learning_rate": 1.001876447922067e-05, "loss": 0.2459, "step": 22575 }, { "epoch": 0.4995041090738822, "grad_norm": 0.8348687887191772, "learning_rate": 1.0015289578676767e-05, "loss": 0.2454, "step": 22580 }, { "epoch": 0.4996147167153955, "grad_norm": 1.7149430513381958, "learning_rate": 1.001181467628665e-05, "loss": 0.4063, "step": 22585 }, { "epoch": 0.49972532435690875, "grad_norm": 0.7827123999595642, "learning_rate": 1.0008339772469916e-05, "loss": 0.4208, "step": 22590 }, { "epoch": 0.499835931998422, "grad_norm": 1.8690916299819946, "learning_rate": 1.000486486764616e-05, "loss": 0.459, "step": 22595 }, { "epoch": 0.49994653963993524, "grad_norm": 0.5097280144691467, "learning_rate": 1.0001389962234968e-05, "loss": 0.2122, "step": 22600 }, { "epoch": 0.5000571472814486, "grad_norm": 1.1261942386627197, "learning_rate": 9.997915056655942e-06, "loss": 0.3464, "step": 22605 }, { "epoch": 0.5001677549229617, "grad_norm": 1.363595962524414, "learning_rate": 9.99444015132867e-06, "loss": 0.3728, "step": 22610 }, { "epoch": 0.500278362564475, "grad_norm": 1.673628330230713, "learning_rate": 9.990965246672748e-06, "loss": 0.2327, "step": 22615 }, { "epoch": 0.5003889702059883, "grad_norm": 1.5964133739471436, "learning_rate": 9.987490343107773e-06, "loss": 0.4482, "step": 22620 }, { "epoch": 0.5004995778475015, "grad_norm": 1.2677202224731445, "learning_rate": 9.984015441053331e-06, "loss": 0.2819, "step": 22625 }, { "epoch": 0.5006101854890148, "grad_norm": 0.8640742897987366, "learning_rate": 9.980540540929023e-06, "loss": 0.3341, "step": 22630 }, { "epoch": 0.5007207931305281, "grad_norm": 1.3294488191604614, "learning_rate": 9.977065643154438e-06, "loss": 0.4557, "step": 22635 }, { "epoch": 0.5008314007720414, "grad_norm": 2.7675130367279053, "learning_rate": 9.973590748149165e-06, "loss": 0.2333, "step": 22640 }, { "epoch": 0.5009420084135546, "grad_norm": 1.611248254776001, "learning_rate": 9.970115856332807e-06, "loss": 0.3251, "step": 22645 }, { "epoch": 0.5010526160550679, "grad_norm": 0.7696058750152588, "learning_rate": 9.966640968124948e-06, "loss": 0.311, "step": 22650 }, { "epoch": 0.5011632236965812, "grad_norm": 1.9480876922607422, "learning_rate": 9.963166083945178e-06, "loss": 0.3332, "step": 22655 }, { "epoch": 0.5012738313380943, "grad_norm": 1.4474760293960571, "learning_rate": 9.959691204213097e-06, "loss": 0.3079, "step": 22660 }, { "epoch": 0.5013844389796076, "grad_norm": 1.0613933801651, "learning_rate": 9.956216329348289e-06, "loss": 0.2797, "step": 22665 }, { "epoch": 0.5014950466211209, "grad_norm": 1.9128857851028442, "learning_rate": 9.952741459770342e-06, "loss": 0.4122, "step": 22670 }, { "epoch": 0.5016056542626341, "grad_norm": 1.2297505140304565, "learning_rate": 9.949266595898854e-06, "loss": 0.3147, "step": 22675 }, { "epoch": 0.5017162619041474, "grad_norm": 1.0156246423721313, "learning_rate": 9.945791738153407e-06, "loss": 0.3283, "step": 22680 }, { "epoch": 0.5018268695456607, "grad_norm": 0.9807356595993042, "learning_rate": 9.942316886953588e-06, "loss": 0.3203, "step": 22685 }, { "epoch": 0.501937477187174, "grad_norm": 1.5343865156173706, "learning_rate": 9.93884204271899e-06, "loss": 0.4696, "step": 22690 }, { "epoch": 0.5020480848286872, "grad_norm": 2.177574872970581, "learning_rate": 9.935367205869197e-06, "loss": 0.4129, "step": 22695 }, { "epoch": 0.5021586924702005, "grad_norm": 1.5671573877334595, "learning_rate": 9.93189237682379e-06, "loss": 0.4227, "step": 22700 }, { "epoch": 0.5022693001117137, "grad_norm": 1.1814347505569458, "learning_rate": 9.928417556002362e-06, "loss": 0.2444, "step": 22705 }, { "epoch": 0.5023799077532269, "grad_norm": 0.8072860240936279, "learning_rate": 9.924942743824487e-06, "loss": 0.279, "step": 22710 }, { "epoch": 0.5024905153947402, "grad_norm": 1.2141622304916382, "learning_rate": 9.921467940709754e-06, "loss": 0.4341, "step": 22715 }, { "epoch": 0.5026011230362535, "grad_norm": 1.2516621351242065, "learning_rate": 9.917993147077745e-06, "loss": 0.2631, "step": 22720 }, { "epoch": 0.5027117306777668, "grad_norm": 1.3314827680587769, "learning_rate": 9.914518363348034e-06, "loss": 0.3024, "step": 22725 }, { "epoch": 0.50282233831928, "grad_norm": 1.091892957687378, "learning_rate": 9.911043589940208e-06, "loss": 0.3426, "step": 22730 }, { "epoch": 0.5029329459607933, "grad_norm": 1.1688681840896606, "learning_rate": 9.907568827273841e-06, "loss": 0.3275, "step": 22735 }, { "epoch": 0.5030435536023066, "grad_norm": 1.4902763366699219, "learning_rate": 9.904094075768504e-06, "loss": 0.4558, "step": 22740 }, { "epoch": 0.5031541612438198, "grad_norm": 2.3128037452697754, "learning_rate": 9.900619335843781e-06, "loss": 0.3528, "step": 22745 }, { "epoch": 0.503264768885333, "grad_norm": 0.5296726822853088, "learning_rate": 9.897144607919242e-06, "loss": 0.2624, "step": 22750 }, { "epoch": 0.5033753765268463, "grad_norm": 0.9666993021965027, "learning_rate": 9.893669892414459e-06, "loss": 0.2621, "step": 22755 }, { "epoch": 0.5034859841683595, "grad_norm": 0.6097541451454163, "learning_rate": 9.890195189749004e-06, "loss": 0.3313, "step": 22760 }, { "epoch": 0.5035965918098728, "grad_norm": 2.1518208980560303, "learning_rate": 9.886720500342446e-06, "loss": 0.1773, "step": 22765 }, { "epoch": 0.5037071994513861, "grad_norm": 1.6629358530044556, "learning_rate": 9.883245824614349e-06, "loss": 0.3128, "step": 22770 }, { "epoch": 0.5038178070928994, "grad_norm": 1.4969432353973389, "learning_rate": 9.879771162984286e-06, "loss": 0.6297, "step": 22775 }, { "epoch": 0.5039284147344126, "grad_norm": 1.3656151294708252, "learning_rate": 9.876296515871816e-06, "loss": 0.3459, "step": 22780 }, { "epoch": 0.5040390223759259, "grad_norm": 1.9095948934555054, "learning_rate": 9.872821883696501e-06, "loss": 0.3839, "step": 22785 }, { "epoch": 0.5041496300174392, "grad_norm": 2.4898407459259033, "learning_rate": 9.869347266877909e-06, "loss": 0.4266, "step": 22790 }, { "epoch": 0.5042602376589524, "grad_norm": 1.250312328338623, "learning_rate": 9.865872665835591e-06, "loss": 0.3471, "step": 22795 }, { "epoch": 0.5043708453004656, "grad_norm": 1.119011402130127, "learning_rate": 9.862398080989105e-06, "loss": 0.3327, "step": 22800 }, { "epoch": 0.5044814529419789, "grad_norm": 0.8345051407814026, "learning_rate": 9.858923512758011e-06, "loss": 0.3218, "step": 22805 }, { "epoch": 0.5045920605834922, "grad_norm": 1.1503050327301025, "learning_rate": 9.855448961561854e-06, "loss": 0.3379, "step": 22810 }, { "epoch": 0.5047026682250054, "grad_norm": 2.8045032024383545, "learning_rate": 9.851974427820194e-06, "loss": 0.2811, "step": 22815 }, { "epoch": 0.5048132758665187, "grad_norm": 1.2378652095794678, "learning_rate": 9.848499911952576e-06, "loss": 0.3731, "step": 22820 }, { "epoch": 0.504923883508032, "grad_norm": 1.0915318727493286, "learning_rate": 9.845025414378543e-06, "loss": 0.4166, "step": 22825 }, { "epoch": 0.5050344911495452, "grad_norm": 1.4326868057250977, "learning_rate": 9.841550935517645e-06, "loss": 0.4298, "step": 22830 }, { "epoch": 0.5051450987910585, "grad_norm": 1.1058868169784546, "learning_rate": 9.838076475789424e-06, "loss": 0.3516, "step": 22835 }, { "epoch": 0.5052557064325718, "grad_norm": 0.8072742819786072, "learning_rate": 9.834602035613415e-06, "loss": 0.3605, "step": 22840 }, { "epoch": 0.5053663140740849, "grad_norm": 2.2367522716522217, "learning_rate": 9.83112761540916e-06, "loss": 0.3149, "step": 22845 }, { "epoch": 0.5054769217155982, "grad_norm": 0.8605563044548035, "learning_rate": 9.827653215596193e-06, "loss": 0.2822, "step": 22850 }, { "epoch": 0.5055875293571115, "grad_norm": 0.9195566177368164, "learning_rate": 9.824178836594043e-06, "loss": 0.3888, "step": 22855 }, { "epoch": 0.5056981369986248, "grad_norm": 1.8908300399780273, "learning_rate": 9.82070447882225e-06, "loss": 0.3502, "step": 22860 }, { "epoch": 0.505808744640138, "grad_norm": 1.4938099384307861, "learning_rate": 9.81723014270033e-06, "loss": 0.3257, "step": 22865 }, { "epoch": 0.5059193522816513, "grad_norm": 0.9005242586135864, "learning_rate": 9.813755828647814e-06, "loss": 0.3913, "step": 22870 }, { "epoch": 0.5060299599231646, "grad_norm": 1.09848153591156, "learning_rate": 9.810281537084227e-06, "loss": 0.3234, "step": 22875 }, { "epoch": 0.5061405675646778, "grad_norm": 1.3519527912139893, "learning_rate": 9.806807268429081e-06, "loss": 0.1843, "step": 22880 }, { "epoch": 0.5062511752061911, "grad_norm": 1.1053030490875244, "learning_rate": 9.803333023101897e-06, "loss": 0.2946, "step": 22885 }, { "epoch": 0.5063617828477044, "grad_norm": 0.7148911952972412, "learning_rate": 9.799858801522193e-06, "loss": 0.2917, "step": 22890 }, { "epoch": 0.5064723904892175, "grad_norm": 1.1918319463729858, "learning_rate": 9.796384604109474e-06, "loss": 0.2943, "step": 22895 }, { "epoch": 0.5065829981307308, "grad_norm": 1.108292818069458, "learning_rate": 9.792910431283247e-06, "loss": 0.3569, "step": 22900 }, { "epoch": 0.5066936057722441, "grad_norm": 0.9827724695205688, "learning_rate": 9.789436283463028e-06, "loss": 0.2065, "step": 22905 }, { "epoch": 0.5068042134137574, "grad_norm": 1.1297328472137451, "learning_rate": 9.78596216106831e-06, "loss": 0.4123, "step": 22910 }, { "epoch": 0.5069148210552706, "grad_norm": 1.1636688709259033, "learning_rate": 9.78248806451859e-06, "loss": 0.3314, "step": 22915 }, { "epoch": 0.5070254286967839, "grad_norm": 1.1745505332946777, "learning_rate": 9.779013994233372e-06, "loss": 0.49, "step": 22920 }, { "epoch": 0.5071360363382972, "grad_norm": 0.7017190456390381, "learning_rate": 9.775539950632143e-06, "loss": 0.3493, "step": 22925 }, { "epoch": 0.5072466439798105, "grad_norm": 1.71627676486969, "learning_rate": 9.772065934134396e-06, "loss": 0.3478, "step": 22930 }, { "epoch": 0.5073572516213237, "grad_norm": 2.3511900901794434, "learning_rate": 9.768591945159617e-06, "loss": 0.5612, "step": 22935 }, { "epoch": 0.5074678592628369, "grad_norm": 1.6520482301712036, "learning_rate": 9.765117984127287e-06, "loss": 0.4489, "step": 22940 }, { "epoch": 0.5075784669043502, "grad_norm": 0.8938272595405579, "learning_rate": 9.761644051456888e-06, "loss": 0.2628, "step": 22945 }, { "epoch": 0.5076890745458634, "grad_norm": 1.0173914432525635, "learning_rate": 9.758170147567899e-06, "loss": 0.3354, "step": 22950 }, { "epoch": 0.5077996821873767, "grad_norm": 0.8601319193840027, "learning_rate": 9.754696272879783e-06, "loss": 0.2709, "step": 22955 }, { "epoch": 0.50791028982889, "grad_norm": 1.147545576095581, "learning_rate": 9.75122242781202e-06, "loss": 0.5436, "step": 22960 }, { "epoch": 0.5080208974704032, "grad_norm": 1.7474232912063599, "learning_rate": 9.74774861278407e-06, "loss": 0.4493, "step": 22965 }, { "epoch": 0.5081315051119165, "grad_norm": 1.1838147640228271, "learning_rate": 9.744274828215393e-06, "loss": 0.5638, "step": 22970 }, { "epoch": 0.5082421127534298, "grad_norm": 0.9387937784194946, "learning_rate": 9.740801074525456e-06, "loss": 0.3811, "step": 22975 }, { "epoch": 0.5083527203949431, "grad_norm": 1.4952547550201416, "learning_rate": 9.737327352133705e-06, "loss": 0.2728, "step": 22980 }, { "epoch": 0.5084633280364563, "grad_norm": 0.9149320125579834, "learning_rate": 9.733853661459594e-06, "loss": 0.2281, "step": 22985 }, { "epoch": 0.5085739356779695, "grad_norm": 1.1999658346176147, "learning_rate": 9.730380002922573e-06, "loss": 0.2411, "step": 22990 }, { "epoch": 0.5086845433194828, "grad_norm": 1.122821569442749, "learning_rate": 9.72690637694208e-06, "loss": 0.2864, "step": 22995 }, { "epoch": 0.508795150960996, "grad_norm": 1.0707029104232788, "learning_rate": 9.723432783937556e-06, "loss": 0.2662, "step": 23000 }, { "epoch": 0.5089057586025093, "grad_norm": 2.037553548812866, "learning_rate": 9.71995922432844e-06, "loss": 0.5232, "step": 23005 }, { "epoch": 0.5090163662440226, "grad_norm": 1.4224565029144287, "learning_rate": 9.71648569853416e-06, "loss": 0.509, "step": 23010 }, { "epoch": 0.5091269738855358, "grad_norm": 2.50844407081604, "learning_rate": 9.71301220697414e-06, "loss": 0.3271, "step": 23015 }, { "epoch": 0.5092375815270491, "grad_norm": 0.8648173213005066, "learning_rate": 9.709538750067812e-06, "loss": 0.3964, "step": 23020 }, { "epoch": 0.5093481891685624, "grad_norm": 1.1949397325515747, "learning_rate": 9.706065328234585e-06, "loss": 0.324, "step": 23025 }, { "epoch": 0.5094587968100757, "grad_norm": 1.0870187282562256, "learning_rate": 9.702591941893878e-06, "loss": 0.4061, "step": 23030 }, { "epoch": 0.5095694044515888, "grad_norm": 1.0340890884399414, "learning_rate": 9.699118591465103e-06, "loss": 0.1587, "step": 23035 }, { "epoch": 0.5096800120931021, "grad_norm": 1.0120457410812378, "learning_rate": 9.695645277367663e-06, "loss": 0.4329, "step": 23040 }, { "epoch": 0.5097906197346154, "grad_norm": 1.248868465423584, "learning_rate": 9.692172000020963e-06, "loss": 0.3219, "step": 23045 }, { "epoch": 0.5099012273761286, "grad_norm": 1.219510555267334, "learning_rate": 9.688698759844397e-06, "loss": 0.3005, "step": 23050 }, { "epoch": 0.5100118350176419, "grad_norm": 1.592979073524475, "learning_rate": 9.685225557257355e-06, "loss": 0.4724, "step": 23055 }, { "epoch": 0.5101224426591552, "grad_norm": 1.013277530670166, "learning_rate": 9.681752392679234e-06, "loss": 0.4425, "step": 23060 }, { "epoch": 0.5102330503006685, "grad_norm": 1.3619053363800049, "learning_rate": 9.67827926652941e-06, "loss": 0.4819, "step": 23065 }, { "epoch": 0.5103436579421817, "grad_norm": 0.8771155476570129, "learning_rate": 9.674806179227261e-06, "loss": 0.3464, "step": 23070 }, { "epoch": 0.510454265583695, "grad_norm": 1.4935380220413208, "learning_rate": 9.671333131192172e-06, "loss": 0.3107, "step": 23075 }, { "epoch": 0.5105648732252083, "grad_norm": 1.4265345335006714, "learning_rate": 9.6678601228435e-06, "loss": 0.4171, "step": 23080 }, { "epoch": 0.5106754808667214, "grad_norm": 1.742863416671753, "learning_rate": 9.664387154600612e-06, "loss": 0.3201, "step": 23085 }, { "epoch": 0.5107860885082347, "grad_norm": 1.2619731426239014, "learning_rate": 9.660914226882877e-06, "loss": 0.3018, "step": 23090 }, { "epoch": 0.510896696149748, "grad_norm": 0.6920039057731628, "learning_rate": 9.657441340109638e-06, "loss": 0.3497, "step": 23095 }, { "epoch": 0.5110073037912612, "grad_norm": 1.1889960765838623, "learning_rate": 9.65396849470025e-06, "loss": 0.372, "step": 23100 }, { "epoch": 0.5111179114327745, "grad_norm": 0.8933259844779968, "learning_rate": 9.650495691074063e-06, "loss": 0.3861, "step": 23105 }, { "epoch": 0.5112285190742878, "grad_norm": 1.2125182151794434, "learning_rate": 9.64702292965041e-06, "loss": 0.2542, "step": 23110 }, { "epoch": 0.5113391267158011, "grad_norm": 1.0140810012817383, "learning_rate": 9.643550210848625e-06, "loss": 0.4811, "step": 23115 }, { "epoch": 0.5114497343573143, "grad_norm": 1.3805879354476929, "learning_rate": 9.640077535088045e-06, "loss": 0.4155, "step": 23120 }, { "epoch": 0.5115603419988276, "grad_norm": 1.1606731414794922, "learning_rate": 9.636604902787988e-06, "loss": 0.4023, "step": 23125 }, { "epoch": 0.5116709496403408, "grad_norm": 1.403053879737854, "learning_rate": 9.633132314367774e-06, "loss": 0.3261, "step": 23130 }, { "epoch": 0.511781557281854, "grad_norm": 2.3247177600860596, "learning_rate": 9.629659770246724e-06, "loss": 0.4527, "step": 23135 }, { "epoch": 0.5118921649233673, "grad_norm": 1.6197497844696045, "learning_rate": 9.626187270844135e-06, "loss": 0.2819, "step": 23140 }, { "epoch": 0.5120027725648806, "grad_norm": 1.0546983480453491, "learning_rate": 9.62271481657932e-06, "loss": 0.2876, "step": 23145 }, { "epoch": 0.5121133802063939, "grad_norm": 1.4605575799942017, "learning_rate": 9.619242407871574e-06, "loss": 0.3806, "step": 23150 }, { "epoch": 0.5122239878479071, "grad_norm": 1.6652517318725586, "learning_rate": 9.615770045140186e-06, "loss": 0.3708, "step": 23155 }, { "epoch": 0.5123345954894204, "grad_norm": 1.2060532569885254, "learning_rate": 9.612297728804445e-06, "loss": 0.2668, "step": 23160 }, { "epoch": 0.5124452031309337, "grad_norm": 1.0502005815505981, "learning_rate": 9.608825459283636e-06, "loss": 0.2752, "step": 23165 }, { "epoch": 0.5125558107724469, "grad_norm": 1.2778209447860718, "learning_rate": 9.605353236997026e-06, "loss": 0.3351, "step": 23170 }, { "epoch": 0.5126664184139602, "grad_norm": 1.4544100761413574, "learning_rate": 9.601881062363895e-06, "loss": 0.5058, "step": 23175 }, { "epoch": 0.5127770260554734, "grad_norm": 1.1921087503433228, "learning_rate": 9.598408935803499e-06, "loss": 0.4644, "step": 23180 }, { "epoch": 0.5128876336969866, "grad_norm": 0.46893852949142456, "learning_rate": 9.594936857735094e-06, "loss": 0.3438, "step": 23185 }, { "epoch": 0.5129982413384999, "grad_norm": 1.4571967124938965, "learning_rate": 9.591464828577944e-06, "loss": 0.3046, "step": 23190 }, { "epoch": 0.5131088489800132, "grad_norm": 2.718930721282959, "learning_rate": 9.587992848751286e-06, "loss": 0.3455, "step": 23195 }, { "epoch": 0.5132194566215265, "grad_norm": 1.8602315187454224, "learning_rate": 9.58452091867436e-06, "loss": 0.3804, "step": 23200 }, { "epoch": 0.5133300642630397, "grad_norm": 1.452720284461975, "learning_rate": 9.581049038766407e-06, "loss": 0.3156, "step": 23205 }, { "epoch": 0.513440671904553, "grad_norm": 0.9552952647209167, "learning_rate": 9.577577209446652e-06, "loss": 0.3959, "step": 23210 }, { "epoch": 0.5135512795460663, "grad_norm": 1.092811942100525, "learning_rate": 9.574105431134313e-06, "loss": 0.1936, "step": 23215 }, { "epoch": 0.5136618871875795, "grad_norm": 1.213388204574585, "learning_rate": 9.570633704248615e-06, "loss": 0.3966, "step": 23220 }, { "epoch": 0.5137724948290927, "grad_norm": 1.142703890800476, "learning_rate": 9.56716202920876e-06, "loss": 0.254, "step": 23225 }, { "epoch": 0.513883102470606, "grad_norm": 1.1886652708053589, "learning_rate": 9.563690406433956e-06, "loss": 0.4979, "step": 23230 }, { "epoch": 0.5139937101121193, "grad_norm": 1.031300663948059, "learning_rate": 9.560218836343401e-06, "loss": 0.3638, "step": 23235 }, { "epoch": 0.5141043177536325, "grad_norm": 1.5942480564117432, "learning_rate": 9.556747319356283e-06, "loss": 0.4595, "step": 23240 }, { "epoch": 0.5142149253951458, "grad_norm": 1.4118871688842773, "learning_rate": 9.553275855891786e-06, "loss": 0.261, "step": 23245 }, { "epoch": 0.5143255330366591, "grad_norm": 0.6327369809150696, "learning_rate": 9.549804446369093e-06, "loss": 0.3402, "step": 23250 }, { "epoch": 0.5144361406781723, "grad_norm": 1.249734878540039, "learning_rate": 9.54633309120737e-06, "loss": 0.4364, "step": 23255 }, { "epoch": 0.5145467483196856, "grad_norm": 1.3805447816848755, "learning_rate": 9.542861790825785e-06, "loss": 0.4374, "step": 23260 }, { "epoch": 0.5146573559611989, "grad_norm": 1.454256296157837, "learning_rate": 9.5393905456435e-06, "loss": 0.4065, "step": 23265 }, { "epoch": 0.5147679636027122, "grad_norm": 1.4977871179580688, "learning_rate": 9.535919356079657e-06, "loss": 0.4642, "step": 23270 }, { "epoch": 0.5148785712442253, "grad_norm": 0.9728492498397827, "learning_rate": 9.532448222553413e-06, "loss": 0.4418, "step": 23275 }, { "epoch": 0.5149891788857386, "grad_norm": 1.2780332565307617, "learning_rate": 9.528977145483897e-06, "loss": 0.3821, "step": 23280 }, { "epoch": 0.5150997865272519, "grad_norm": 1.1441502571105957, "learning_rate": 9.525506125290242e-06, "loss": 0.3794, "step": 23285 }, { "epoch": 0.5152103941687651, "grad_norm": 1.1503808498382568, "learning_rate": 9.522035162391578e-06, "loss": 0.3683, "step": 23290 }, { "epoch": 0.5153210018102784, "grad_norm": 1.32164466381073, "learning_rate": 9.518564257207018e-06, "loss": 0.2875, "step": 23295 }, { "epoch": 0.5154316094517917, "grad_norm": 1.6046173572540283, "learning_rate": 9.515093410155671e-06, "loss": 0.3117, "step": 23300 }, { "epoch": 0.5155422170933049, "grad_norm": 1.2652344703674316, "learning_rate": 9.511622621656647e-06, "loss": 0.3102, "step": 23305 }, { "epoch": 0.5156528247348182, "grad_norm": 1.1771610975265503, "learning_rate": 9.508151892129038e-06, "loss": 0.3789, "step": 23310 }, { "epoch": 0.5157634323763315, "grad_norm": 1.160805106163025, "learning_rate": 9.504681221991933e-06, "loss": 0.3389, "step": 23315 }, { "epoch": 0.5158740400178446, "grad_norm": 1.0950498580932617, "learning_rate": 9.501210611664418e-06, "loss": 0.3734, "step": 23320 }, { "epoch": 0.5159846476593579, "grad_norm": 0.8878926634788513, "learning_rate": 9.497740061565567e-06, "loss": 0.3272, "step": 23325 }, { "epoch": 0.5160952553008712, "grad_norm": 2.042639970779419, "learning_rate": 9.49426957211444e-06, "loss": 0.3529, "step": 23330 }, { "epoch": 0.5162058629423845, "grad_norm": 1.0873289108276367, "learning_rate": 9.490799143730113e-06, "loss": 0.3648, "step": 23335 }, { "epoch": 0.5163164705838977, "grad_norm": 2.0232913494110107, "learning_rate": 9.487328776831625e-06, "loss": 0.4034, "step": 23340 }, { "epoch": 0.516427078225411, "grad_norm": 1.0985760688781738, "learning_rate": 9.483858471838027e-06, "loss": 0.323, "step": 23345 }, { "epoch": 0.5165376858669243, "grad_norm": 1.4427032470703125, "learning_rate": 9.480388229168362e-06, "loss": 0.4874, "step": 23350 }, { "epoch": 0.5166482935084376, "grad_norm": 0.9153605699539185, "learning_rate": 9.476918049241652e-06, "loss": 0.2484, "step": 23355 }, { "epoch": 0.5167589011499508, "grad_norm": 1.218097448348999, "learning_rate": 9.473447932476925e-06, "loss": 0.3965, "step": 23360 }, { "epoch": 0.5168695087914641, "grad_norm": 1.0617194175720215, "learning_rate": 9.4699778792932e-06, "loss": 0.2786, "step": 23365 }, { "epoch": 0.5169801164329773, "grad_norm": 1.6131837368011475, "learning_rate": 9.466507890109476e-06, "loss": 0.3699, "step": 23370 }, { "epoch": 0.5170907240744905, "grad_norm": 1.3091299533843994, "learning_rate": 9.463037965344758e-06, "loss": 0.433, "step": 23375 }, { "epoch": 0.5172013317160038, "grad_norm": 0.8160579204559326, "learning_rate": 9.459568105418042e-06, "loss": 0.3039, "step": 23380 }, { "epoch": 0.5173119393575171, "grad_norm": 1.3712406158447266, "learning_rate": 9.456098310748304e-06, "loss": 0.1648, "step": 23385 }, { "epoch": 0.5174225469990303, "grad_norm": 1.5953900814056396, "learning_rate": 9.452628581754529e-06, "loss": 0.4682, "step": 23390 }, { "epoch": 0.5175331546405436, "grad_norm": 0.7479733824729919, "learning_rate": 9.44915891885568e-06, "loss": 0.2168, "step": 23395 }, { "epoch": 0.5176437622820569, "grad_norm": 1.5912494659423828, "learning_rate": 9.445689322470718e-06, "loss": 0.2713, "step": 23400 }, { "epoch": 0.5177543699235702, "grad_norm": 1.3593127727508545, "learning_rate": 9.442219793018603e-06, "loss": 0.3405, "step": 23405 }, { "epoch": 0.5178649775650834, "grad_norm": 2.0646395683288574, "learning_rate": 9.43875033091827e-06, "loss": 0.4403, "step": 23410 }, { "epoch": 0.5179755852065966, "grad_norm": 0.855380654335022, "learning_rate": 9.435280936588656e-06, "loss": 0.3481, "step": 23415 }, { "epoch": 0.5180861928481099, "grad_norm": 0.8475659489631653, "learning_rate": 9.4318116104487e-06, "loss": 0.3426, "step": 23420 }, { "epoch": 0.5181968004896231, "grad_norm": 1.589357614517212, "learning_rate": 9.428342352917312e-06, "loss": 0.3899, "step": 23425 }, { "epoch": 0.5183074081311364, "grad_norm": 1.3809231519699097, "learning_rate": 9.424873164413403e-06, "loss": 0.3693, "step": 23430 }, { "epoch": 0.5184180157726497, "grad_norm": 0.8801959753036499, "learning_rate": 9.421404045355884e-06, "loss": 0.3635, "step": 23435 }, { "epoch": 0.518528623414163, "grad_norm": 1.6734979152679443, "learning_rate": 9.417934996163645e-06, "loss": 0.2186, "step": 23440 }, { "epoch": 0.5186392310556762, "grad_norm": 0.7921309471130371, "learning_rate": 9.414466017255572e-06, "loss": 0.3279, "step": 23445 }, { "epoch": 0.5187498386971895, "grad_norm": 1.3080555200576782, "learning_rate": 9.410997109050547e-06, "loss": 0.4212, "step": 23450 }, { "epoch": 0.5188604463387028, "grad_norm": 1.2787728309631348, "learning_rate": 9.407528271967437e-06, "loss": 0.367, "step": 23455 }, { "epoch": 0.518971053980216, "grad_norm": 1.4879052639007568, "learning_rate": 9.4040595064251e-06, "loss": 0.4046, "step": 23460 }, { "epoch": 0.5190816616217292, "grad_norm": 2.1061043739318848, "learning_rate": 9.400590812842397e-06, "loss": 0.3346, "step": 23465 }, { "epoch": 0.5191922692632425, "grad_norm": 0.7094554305076599, "learning_rate": 9.39712219163816e-06, "loss": 0.4013, "step": 23470 }, { "epoch": 0.5193028769047557, "grad_norm": 1.174729347229004, "learning_rate": 9.393653643231234e-06, "loss": 0.3247, "step": 23475 }, { "epoch": 0.519413484546269, "grad_norm": 1.226301670074463, "learning_rate": 9.390185168040443e-06, "loss": 0.3611, "step": 23480 }, { "epoch": 0.5195240921877823, "grad_norm": 1.1798640489578247, "learning_rate": 9.3867167664846e-06, "loss": 0.2638, "step": 23485 }, { "epoch": 0.5196346998292956, "grad_norm": 1.1067051887512207, "learning_rate": 9.38324843898252e-06, "loss": 0.1719, "step": 23490 }, { "epoch": 0.5197453074708088, "grad_norm": 1.3731441497802734, "learning_rate": 9.379780185952993e-06, "loss": 0.3194, "step": 23495 }, { "epoch": 0.5198559151123221, "grad_norm": 1.0693804025650024, "learning_rate": 9.376312007814816e-06, "loss": 0.3588, "step": 23500 }, { "epoch": 0.5199665227538354, "grad_norm": 1.440061092376709, "learning_rate": 9.372843904986773e-06, "loss": 0.2583, "step": 23505 }, { "epoch": 0.5200771303953485, "grad_norm": 1.0393414497375488, "learning_rate": 9.36937587788763e-06, "loss": 0.2458, "step": 23510 }, { "epoch": 0.5201877380368618, "grad_norm": 1.740120530128479, "learning_rate": 9.365907926936152e-06, "loss": 0.4772, "step": 23515 }, { "epoch": 0.5202983456783751, "grad_norm": 1.1785989999771118, "learning_rate": 9.362440052551098e-06, "loss": 0.3282, "step": 23520 }, { "epoch": 0.5204089533198883, "grad_norm": 1.4042267799377441, "learning_rate": 9.358972255151206e-06, "loss": 0.4037, "step": 23525 }, { "epoch": 0.5205195609614016, "grad_norm": 1.089576005935669, "learning_rate": 9.355504535155213e-06, "loss": 0.2732, "step": 23530 }, { "epoch": 0.5206301686029149, "grad_norm": 1.4645459651947021, "learning_rate": 9.35203689298185e-06, "loss": 0.4389, "step": 23535 }, { "epoch": 0.5207407762444282, "grad_norm": 1.8897809982299805, "learning_rate": 9.348569329049827e-06, "loss": 0.4403, "step": 23540 }, { "epoch": 0.5208513838859414, "grad_norm": 1.5872936248779297, "learning_rate": 9.345101843777855e-06, "loss": 0.5489, "step": 23545 }, { "epoch": 0.5209619915274547, "grad_norm": 1.9369901418685913, "learning_rate": 9.341634437584632e-06, "loss": 0.4303, "step": 23550 }, { "epoch": 0.5210725991689679, "grad_norm": 1.336618423461914, "learning_rate": 9.338167110888845e-06, "loss": 0.2784, "step": 23555 }, { "epoch": 0.5211832068104811, "grad_norm": 1.0886844396591187, "learning_rate": 9.334699864109168e-06, "loss": 0.3419, "step": 23560 }, { "epoch": 0.5212938144519944, "grad_norm": 1.0841338634490967, "learning_rate": 9.331232697664283e-06, "loss": 0.3885, "step": 23565 }, { "epoch": 0.5214044220935077, "grad_norm": 1.2819252014160156, "learning_rate": 9.327765611972837e-06, "loss": 0.434, "step": 23570 }, { "epoch": 0.521515029735021, "grad_norm": 0.8078551888465881, "learning_rate": 9.32429860745348e-06, "loss": 0.3121, "step": 23575 }, { "epoch": 0.5216256373765342, "grad_norm": 1.2180083990097046, "learning_rate": 9.320831684524862e-06, "loss": 0.37, "step": 23580 }, { "epoch": 0.5217362450180475, "grad_norm": 1.3890801668167114, "learning_rate": 9.317364843605603e-06, "loss": 0.3307, "step": 23585 }, { "epoch": 0.5218468526595608, "grad_norm": 1.4017819166183472, "learning_rate": 9.313898085114324e-06, "loss": 0.2188, "step": 23590 }, { "epoch": 0.521957460301074, "grad_norm": 1.8145675659179688, "learning_rate": 9.310431409469643e-06, "loss": 0.386, "step": 23595 }, { "epoch": 0.5220680679425873, "grad_norm": 1.7982827425003052, "learning_rate": 9.306964817090149e-06, "loss": 0.3594, "step": 23600 }, { "epoch": 0.5221786755841005, "grad_norm": 1.204410433769226, "learning_rate": 9.30349830839444e-06, "loss": 0.3587, "step": 23605 }, { "epoch": 0.5222892832256137, "grad_norm": 1.4108160734176636, "learning_rate": 9.300031883801091e-06, "loss": 0.3608, "step": 23610 }, { "epoch": 0.522399890867127, "grad_norm": 1.8920609951019287, "learning_rate": 9.296565543728673e-06, "loss": 0.3359, "step": 23615 }, { "epoch": 0.5225104985086403, "grad_norm": 0.7405049800872803, "learning_rate": 9.29309928859575e-06, "loss": 0.2526, "step": 23620 }, { "epoch": 0.5226211061501536, "grad_norm": 0.6577838659286499, "learning_rate": 9.289633118820864e-06, "loss": 0.2787, "step": 23625 }, { "epoch": 0.5227317137916668, "grad_norm": 1.2008570432662964, "learning_rate": 9.286167034822557e-06, "loss": 0.4374, "step": 23630 }, { "epoch": 0.5228423214331801, "grad_norm": 1.1496015787124634, "learning_rate": 9.28270103701936e-06, "loss": 0.3374, "step": 23635 }, { "epoch": 0.5229529290746934, "grad_norm": 1.5742137432098389, "learning_rate": 9.279235125829787e-06, "loss": 0.2369, "step": 23640 }, { "epoch": 0.5230635367162066, "grad_norm": 1.2998862266540527, "learning_rate": 9.275769301672346e-06, "loss": 0.3283, "step": 23645 }, { "epoch": 0.5231741443577198, "grad_norm": 1.661854863166809, "learning_rate": 9.272303564965542e-06, "loss": 0.2901, "step": 23650 }, { "epoch": 0.5232847519992331, "grad_norm": 1.3279184103012085, "learning_rate": 9.26883791612785e-06, "loss": 0.4093, "step": 23655 }, { "epoch": 0.5233953596407463, "grad_norm": 1.5487658977508545, "learning_rate": 9.265372355577753e-06, "loss": 0.3864, "step": 23660 }, { "epoch": 0.5235059672822596, "grad_norm": 1.077052116394043, "learning_rate": 9.261906883733716e-06, "loss": 0.3505, "step": 23665 }, { "epoch": 0.5236165749237729, "grad_norm": 1.2538414001464844, "learning_rate": 9.258441501014193e-06, "loss": 0.3797, "step": 23670 }, { "epoch": 0.5237271825652862, "grad_norm": 1.2351078987121582, "learning_rate": 9.254976207837623e-06, "loss": 0.4101, "step": 23675 }, { "epoch": 0.5238377902067994, "grad_norm": 2.406344175338745, "learning_rate": 9.251511004622448e-06, "loss": 0.4361, "step": 23680 }, { "epoch": 0.5239483978483127, "grad_norm": 1.2580044269561768, "learning_rate": 9.248045891787082e-06, "loss": 0.3813, "step": 23685 }, { "epoch": 0.524059005489826, "grad_norm": 0.9724280834197998, "learning_rate": 9.244580869749942e-06, "loss": 0.4658, "step": 23690 }, { "epoch": 0.5241696131313393, "grad_norm": 1.4688429832458496, "learning_rate": 9.241115938929428e-06, "loss": 0.3881, "step": 23695 }, { "epoch": 0.5242802207728524, "grad_norm": 1.6411221027374268, "learning_rate": 9.237651099743924e-06, "loss": 0.2685, "step": 23700 }, { "epoch": 0.5243908284143657, "grad_norm": 0.8239161968231201, "learning_rate": 9.234186352611815e-06, "loss": 0.2185, "step": 23705 }, { "epoch": 0.524501436055879, "grad_norm": 1.014634370803833, "learning_rate": 9.230721697951464e-06, "loss": 0.2367, "step": 23710 }, { "epoch": 0.5246120436973922, "grad_norm": 0.8659315705299377, "learning_rate": 9.227257136181225e-06, "loss": 0.307, "step": 23715 }, { "epoch": 0.5247226513389055, "grad_norm": 1.2983012199401855, "learning_rate": 9.223792667719451e-06, "loss": 0.2615, "step": 23720 }, { "epoch": 0.5248332589804188, "grad_norm": 1.045296549797058, "learning_rate": 9.220328292984468e-06, "loss": 0.2338, "step": 23725 }, { "epoch": 0.524943866621932, "grad_norm": 1.6739369630813599, "learning_rate": 9.216864012394597e-06, "loss": 0.5434, "step": 23730 }, { "epoch": 0.5250544742634453, "grad_norm": 1.0160638093948364, "learning_rate": 9.213399826368158e-06, "loss": 0.4953, "step": 23735 }, { "epoch": 0.5251650819049586, "grad_norm": 1.079775333404541, "learning_rate": 9.20993573532344e-06, "loss": 0.2694, "step": 23740 }, { "epoch": 0.5252756895464717, "grad_norm": 1.3856945037841797, "learning_rate": 9.206471739678736e-06, "loss": 0.3352, "step": 23745 }, { "epoch": 0.525386297187985, "grad_norm": 1.3708069324493408, "learning_rate": 9.203007839852324e-06, "loss": 0.3731, "step": 23750 }, { "epoch": 0.5254969048294983, "grad_norm": 1.682856798171997, "learning_rate": 9.199544036262467e-06, "loss": 0.3043, "step": 23755 }, { "epoch": 0.5256075124710116, "grad_norm": 1.7637286186218262, "learning_rate": 9.196080329327413e-06, "loss": 0.2839, "step": 23760 }, { "epoch": 0.5257181201125248, "grad_norm": 1.048849105834961, "learning_rate": 9.192616719465414e-06, "loss": 0.347, "step": 23765 }, { "epoch": 0.5258287277540381, "grad_norm": 1.2155534029006958, "learning_rate": 9.189153207094692e-06, "loss": 0.3247, "step": 23770 }, { "epoch": 0.5259393353955514, "grad_norm": 1.440477728843689, "learning_rate": 9.185689792633465e-06, "loss": 0.3384, "step": 23775 }, { "epoch": 0.5260499430370646, "grad_norm": 1.9028253555297852, "learning_rate": 9.182226476499946e-06, "loss": 0.4591, "step": 23780 }, { "epoch": 0.5261605506785779, "grad_norm": 1.2619026899337769, "learning_rate": 9.178763259112322e-06, "loss": 0.4297, "step": 23785 }, { "epoch": 0.5262711583200912, "grad_norm": 0.8805214762687683, "learning_rate": 9.175300140888776e-06, "loss": 0.3526, "step": 23790 }, { "epoch": 0.5263817659616044, "grad_norm": 1.3839616775512695, "learning_rate": 9.171837122247485e-06, "loss": 0.2799, "step": 23795 }, { "epoch": 0.5264923736031176, "grad_norm": 0.7192927002906799, "learning_rate": 9.1683742036066e-06, "loss": 0.3264, "step": 23800 }, { "epoch": 0.5266029812446309, "grad_norm": 1.3108298778533936, "learning_rate": 9.164911385384275e-06, "loss": 0.4157, "step": 23805 }, { "epoch": 0.5267135888861442, "grad_norm": 1.0125675201416016, "learning_rate": 9.161448667998635e-06, "loss": 0.2984, "step": 23810 }, { "epoch": 0.5268241965276574, "grad_norm": 1.3503270149230957, "learning_rate": 9.157986051867806e-06, "loss": 0.3295, "step": 23815 }, { "epoch": 0.5269348041691707, "grad_norm": 1.4285755157470703, "learning_rate": 9.154523537409902e-06, "loss": 0.4805, "step": 23820 }, { "epoch": 0.527045411810684, "grad_norm": 1.2747538089752197, "learning_rate": 9.151061125043016e-06, "loss": 0.2095, "step": 23825 }, { "epoch": 0.5271560194521973, "grad_norm": 1.2561801671981812, "learning_rate": 9.147598815185232e-06, "loss": 0.3898, "step": 23830 }, { "epoch": 0.5272666270937105, "grad_norm": 1.371498942375183, "learning_rate": 9.144136608254628e-06, "loss": 0.3373, "step": 23835 }, { "epoch": 0.5273772347352237, "grad_norm": 1.2203830480575562, "learning_rate": 9.140674504669263e-06, "loss": 0.1877, "step": 23840 }, { "epoch": 0.527487842376737, "grad_norm": 0.7625868320465088, "learning_rate": 9.137212504847177e-06, "loss": 0.3035, "step": 23845 }, { "epoch": 0.5275984500182502, "grad_norm": 1.3492306470870972, "learning_rate": 9.133750609206418e-06, "loss": 0.2402, "step": 23850 }, { "epoch": 0.5277090576597635, "grad_norm": 1.6116663217544556, "learning_rate": 9.130288818165e-06, "loss": 0.3476, "step": 23855 }, { "epoch": 0.5278196653012768, "grad_norm": 1.4228655099868774, "learning_rate": 9.126827132140935e-06, "loss": 0.262, "step": 23860 }, { "epoch": 0.52793027294279, "grad_norm": 1.8293029069900513, "learning_rate": 9.123365551552225e-06, "loss": 0.3738, "step": 23865 }, { "epoch": 0.5280408805843033, "grad_norm": 0.41742172837257385, "learning_rate": 9.119904076816849e-06, "loss": 0.2794, "step": 23870 }, { "epoch": 0.5281514882258166, "grad_norm": 1.4126605987548828, "learning_rate": 9.116442708352779e-06, "loss": 0.3681, "step": 23875 }, { "epoch": 0.5282620958673299, "grad_norm": 0.8658580183982849, "learning_rate": 9.11298144657798e-06, "loss": 0.2364, "step": 23880 }, { "epoch": 0.5283727035088431, "grad_norm": 1.704796552658081, "learning_rate": 9.109520291910392e-06, "loss": 0.3438, "step": 23885 }, { "epoch": 0.5284833111503563, "grad_norm": 1.1766847372055054, "learning_rate": 9.10605924476795e-06, "loss": 0.4641, "step": 23890 }, { "epoch": 0.5285939187918696, "grad_norm": 1.5221738815307617, "learning_rate": 9.102598305568578e-06, "loss": 0.4263, "step": 23895 }, { "epoch": 0.5287045264333828, "grad_norm": 1.7972394227981567, "learning_rate": 9.099137474730178e-06, "loss": 0.4492, "step": 23900 }, { "epoch": 0.5288151340748961, "grad_norm": 1.5109316110610962, "learning_rate": 9.095676752670647e-06, "loss": 0.3339, "step": 23905 }, { "epoch": 0.5289257417164094, "grad_norm": 1.2584373950958252, "learning_rate": 9.09221613980787e-06, "loss": 0.5425, "step": 23910 }, { "epoch": 0.5290363493579227, "grad_norm": 1.3012676239013672, "learning_rate": 9.088755636559706e-06, "loss": 0.3594, "step": 23915 }, { "epoch": 0.5291469569994359, "grad_norm": 2.125542640686035, "learning_rate": 9.08529524334402e-06, "loss": 0.4329, "step": 23920 }, { "epoch": 0.5292575646409492, "grad_norm": 1.2379119396209717, "learning_rate": 9.081834960578644e-06, "loss": 0.3346, "step": 23925 }, { "epoch": 0.5293681722824625, "grad_norm": 0.9260175228118896, "learning_rate": 9.078374788681408e-06, "loss": 0.369, "step": 23930 }, { "epoch": 0.5294787799239756, "grad_norm": 1.8259638547897339, "learning_rate": 9.074914728070133e-06, "loss": 0.4538, "step": 23935 }, { "epoch": 0.5295893875654889, "grad_norm": 1.6831151247024536, "learning_rate": 9.071454779162615e-06, "loss": 0.4627, "step": 23940 }, { "epoch": 0.5296999952070022, "grad_norm": 1.2892465591430664, "learning_rate": 9.06799494237664e-06, "loss": 0.4147, "step": 23945 }, { "epoch": 0.5298106028485154, "grad_norm": 1.763149619102478, "learning_rate": 9.064535218129988e-06, "loss": 0.3339, "step": 23950 }, { "epoch": 0.5299212104900287, "grad_norm": 1.6174302101135254, "learning_rate": 9.061075606840416e-06, "loss": 0.4316, "step": 23955 }, { "epoch": 0.530031818131542, "grad_norm": 0.9528664350509644, "learning_rate": 9.057616108925669e-06, "loss": 0.4024, "step": 23960 }, { "epoch": 0.5301424257730553, "grad_norm": 1.0530123710632324, "learning_rate": 9.054156724803486e-06, "loss": 0.4319, "step": 23965 }, { "epoch": 0.5302530334145685, "grad_norm": 1.1649953126907349, "learning_rate": 9.050697454891583e-06, "loss": 0.2422, "step": 23970 }, { "epoch": 0.5303636410560818, "grad_norm": 0.9899040460586548, "learning_rate": 9.047238299607662e-06, "loss": 0.2613, "step": 23975 }, { "epoch": 0.5304742486975951, "grad_norm": 0.8274750113487244, "learning_rate": 9.043779259369426e-06, "loss": 0.4422, "step": 23980 }, { "epoch": 0.5305848563391082, "grad_norm": 0.6118070483207703, "learning_rate": 9.040320334594542e-06, "loss": 0.2701, "step": 23985 }, { "epoch": 0.5306954639806215, "grad_norm": 0.7952582240104675, "learning_rate": 9.036861525700675e-06, "loss": 0.2893, "step": 23990 }, { "epoch": 0.5308060716221348, "grad_norm": 1.0102829933166504, "learning_rate": 9.033402833105484e-06, "loss": 0.3494, "step": 23995 }, { "epoch": 0.530916679263648, "grad_norm": 1.606866717338562, "learning_rate": 9.029944257226598e-06, "loss": 0.3609, "step": 24000 }, { "epoch": 0.5310272869051613, "grad_norm": 1.2186791896820068, "learning_rate": 9.026485798481637e-06, "loss": 0.3828, "step": 24005 }, { "epoch": 0.5311378945466746, "grad_norm": 1.1610828638076782, "learning_rate": 9.023027457288216e-06, "loss": 0.3682, "step": 24010 }, { "epoch": 0.5312485021881879, "grad_norm": 1.3631436824798584, "learning_rate": 9.01956923406392e-06, "loss": 0.3088, "step": 24015 }, { "epoch": 0.5313591098297011, "grad_norm": 1.285906195640564, "learning_rate": 9.01611112922634e-06, "loss": 0.2389, "step": 24020 }, { "epoch": 0.5314697174712144, "grad_norm": 1.205007553100586, "learning_rate": 9.012653143193028e-06, "loss": 0.3462, "step": 24025 }, { "epoch": 0.5315803251127276, "grad_norm": 1.8545442819595337, "learning_rate": 9.009195276381541e-06, "loss": 0.4655, "step": 24030 }, { "epoch": 0.5316909327542408, "grad_norm": 1.924720048904419, "learning_rate": 9.005737529209421e-06, "loss": 0.3174, "step": 24035 }, { "epoch": 0.5318015403957541, "grad_norm": 1.3397421836853027, "learning_rate": 9.00227990209418e-06, "loss": 0.409, "step": 24040 }, { "epoch": 0.5319121480372674, "grad_norm": 1.016600251197815, "learning_rate": 8.998822395453328e-06, "loss": 0.3167, "step": 24045 }, { "epoch": 0.5320227556787807, "grad_norm": 1.7115535736083984, "learning_rate": 8.995365009704363e-06, "loss": 0.2601, "step": 24050 }, { "epoch": 0.5321333633202939, "grad_norm": 1.6821798086166382, "learning_rate": 8.991907745264758e-06, "loss": 0.2705, "step": 24055 }, { "epoch": 0.5322439709618072, "grad_norm": 1.3212449550628662, "learning_rate": 8.988450602551976e-06, "loss": 0.2951, "step": 24060 }, { "epoch": 0.5323545786033205, "grad_norm": 0.9813992977142334, "learning_rate": 8.984993581983472e-06, "loss": 0.3385, "step": 24065 }, { "epoch": 0.5324651862448337, "grad_norm": 0.8868578672409058, "learning_rate": 8.981536683976674e-06, "loss": 0.2819, "step": 24070 }, { "epoch": 0.532575793886347, "grad_norm": 1.3473008871078491, "learning_rate": 8.978079908949003e-06, "loss": 0.4401, "step": 24075 }, { "epoch": 0.5326864015278602, "grad_norm": 1.7804858684539795, "learning_rate": 8.974623257317867e-06, "loss": 0.3634, "step": 24080 }, { "epoch": 0.5327970091693734, "grad_norm": 1.0243473052978516, "learning_rate": 8.971166729500651e-06, "loss": 0.4027, "step": 24085 }, { "epoch": 0.5329076168108867, "grad_norm": 0.6867493987083435, "learning_rate": 8.967710325914728e-06, "loss": 0.4473, "step": 24090 }, { "epoch": 0.5330182244524, "grad_norm": 1.1222143173217773, "learning_rate": 8.964254046977468e-06, "loss": 0.3361, "step": 24095 }, { "epoch": 0.5331288320939133, "grad_norm": 1.5278376340866089, "learning_rate": 8.960797893106206e-06, "loss": 0.3382, "step": 24100 }, { "epoch": 0.5332394397354265, "grad_norm": 1.3644589185714722, "learning_rate": 8.95734186471827e-06, "loss": 0.3645, "step": 24105 }, { "epoch": 0.5333500473769398, "grad_norm": 1.1077044010162354, "learning_rate": 8.953885962230983e-06, "loss": 0.408, "step": 24110 }, { "epoch": 0.5334606550184531, "grad_norm": 1.3532882928848267, "learning_rate": 8.95043018606164e-06, "loss": 0.4452, "step": 24115 }, { "epoch": 0.5335712626599663, "grad_norm": 1.4205046892166138, "learning_rate": 8.94697453662752e-06, "loss": 0.3588, "step": 24120 }, { "epoch": 0.5336818703014795, "grad_norm": 0.9304869174957275, "learning_rate": 8.943519014345901e-06, "loss": 0.3092, "step": 24125 }, { "epoch": 0.5337924779429928, "grad_norm": 0.6927605271339417, "learning_rate": 8.940063619634028e-06, "loss": 0.2946, "step": 24130 }, { "epoch": 0.533903085584506, "grad_norm": 0.793555736541748, "learning_rate": 8.936608352909146e-06, "loss": 0.2508, "step": 24135 }, { "epoch": 0.5340136932260193, "grad_norm": 1.8137435913085938, "learning_rate": 8.933153214588471e-06, "loss": 0.223, "step": 24140 }, { "epoch": 0.5341243008675326, "grad_norm": 1.569393277168274, "learning_rate": 8.92969820508921e-06, "loss": 0.2634, "step": 24145 }, { "epoch": 0.5342349085090459, "grad_norm": 1.8787739276885986, "learning_rate": 8.926243324828564e-06, "loss": 0.4434, "step": 24150 }, { "epoch": 0.5343455161505591, "grad_norm": 1.8565007448196411, "learning_rate": 8.922788574223695e-06, "loss": 0.3913, "step": 24155 }, { "epoch": 0.5344561237920724, "grad_norm": 1.539947271347046, "learning_rate": 8.91933395369177e-06, "loss": 0.5272, "step": 24160 }, { "epoch": 0.5345667314335857, "grad_norm": 0.9059681296348572, "learning_rate": 8.915879463649936e-06, "loss": 0.2299, "step": 24165 }, { "epoch": 0.534677339075099, "grad_norm": 1.0518938302993774, "learning_rate": 8.912425104515317e-06, "loss": 0.3156, "step": 24170 }, { "epoch": 0.5347879467166121, "grad_norm": 0.41721728444099426, "learning_rate": 8.908970876705025e-06, "loss": 0.48, "step": 24175 }, { "epoch": 0.5348985543581254, "grad_norm": 1.1333341598510742, "learning_rate": 8.905516780636164e-06, "loss": 0.3713, "step": 24180 }, { "epoch": 0.5350091619996387, "grad_norm": 2.3229901790618896, "learning_rate": 8.902062816725807e-06, "loss": 0.2632, "step": 24185 }, { "epoch": 0.5351197696411519, "grad_norm": 1.781646966934204, "learning_rate": 8.89860898539102e-06, "loss": 0.4066, "step": 24190 }, { "epoch": 0.5352303772826652, "grad_norm": 1.000868558883667, "learning_rate": 8.89515528704886e-06, "loss": 0.3505, "step": 24195 }, { "epoch": 0.5353409849241785, "grad_norm": 1.5412416458129883, "learning_rate": 8.891701722116353e-06, "loss": 0.4663, "step": 24200 }, { "epoch": 0.5354515925656917, "grad_norm": 1.0238080024719238, "learning_rate": 8.888248291010512e-06, "loss": 0.4206, "step": 24205 }, { "epoch": 0.535562200207205, "grad_norm": 0.8138429522514343, "learning_rate": 8.88479499414835e-06, "loss": 0.3709, "step": 24210 }, { "epoch": 0.5356728078487183, "grad_norm": 1.3212043046951294, "learning_rate": 8.881341831946841e-06, "loss": 0.4116, "step": 24215 }, { "epoch": 0.5357834154902315, "grad_norm": 1.1024106740951538, "learning_rate": 8.877888804822955e-06, "loss": 0.3329, "step": 24220 }, { "epoch": 0.5358940231317447, "grad_norm": 1.1968520879745483, "learning_rate": 8.87443591319365e-06, "loss": 0.3351, "step": 24225 }, { "epoch": 0.536004630773258, "grad_norm": 1.0064270496368408, "learning_rate": 8.870983157475853e-06, "loss": 0.4536, "step": 24230 }, { "epoch": 0.5361152384147713, "grad_norm": 1.461471676826477, "learning_rate": 8.867530538086492e-06, "loss": 0.328, "step": 24235 }, { "epoch": 0.5362258460562845, "grad_norm": 1.223426103591919, "learning_rate": 8.864078055442462e-06, "loss": 0.3622, "step": 24240 }, { "epoch": 0.5363364536977978, "grad_norm": 1.3634215593338013, "learning_rate": 8.860625709960653e-06, "loss": 0.3481, "step": 24245 }, { "epoch": 0.5364470613393111, "grad_norm": 1.349678874015808, "learning_rate": 8.857173502057934e-06, "loss": 0.3224, "step": 24250 }, { "epoch": 0.5365576689808244, "grad_norm": 1.960610270500183, "learning_rate": 8.853721432151158e-06, "loss": 0.4561, "step": 24255 }, { "epoch": 0.5366682766223376, "grad_norm": 1.6616976261138916, "learning_rate": 8.850269500657161e-06, "loss": 0.4308, "step": 24260 }, { "epoch": 0.5367788842638509, "grad_norm": 1.8961209058761597, "learning_rate": 8.846817707992765e-06, "loss": 0.3177, "step": 24265 }, { "epoch": 0.5368894919053641, "grad_norm": 1.5470792055130005, "learning_rate": 8.84336605457477e-06, "loss": 0.4887, "step": 24270 }, { "epoch": 0.5370000995468773, "grad_norm": 1.0836968421936035, "learning_rate": 8.839914540819959e-06, "loss": 0.2526, "step": 24275 }, { "epoch": 0.5371107071883906, "grad_norm": 1.219099760055542, "learning_rate": 8.836463167145111e-06, "loss": 0.3161, "step": 24280 }, { "epoch": 0.5372213148299039, "grad_norm": 1.2988567352294922, "learning_rate": 8.833011933966971e-06, "loss": 0.455, "step": 24285 }, { "epoch": 0.5373319224714171, "grad_norm": 0.8313173651695251, "learning_rate": 8.829560841702272e-06, "loss": 0.359, "step": 24290 }, { "epoch": 0.5374425301129304, "grad_norm": 1.4244258403778076, "learning_rate": 8.826109890767742e-06, "loss": 0.4372, "step": 24295 }, { "epoch": 0.5375531377544437, "grad_norm": 0.8012514710426331, "learning_rate": 8.822659081580073e-06, "loss": 0.2221, "step": 24300 }, { "epoch": 0.537663745395957, "grad_norm": 0.9782665967941284, "learning_rate": 8.819208414555951e-06, "loss": 0.3574, "step": 24305 }, { "epoch": 0.5377743530374702, "grad_norm": 1.3993276357650757, "learning_rate": 8.81575789011205e-06, "loss": 0.3139, "step": 24310 }, { "epoch": 0.5378849606789834, "grad_norm": 1.6720139980316162, "learning_rate": 8.81230750866501e-06, "loss": 0.3746, "step": 24315 }, { "epoch": 0.5379955683204967, "grad_norm": 1.1017400026321411, "learning_rate": 8.808857270631467e-06, "loss": 0.3224, "step": 24320 }, { "epoch": 0.5381061759620099, "grad_norm": 0.9601558446884155, "learning_rate": 8.805407176428042e-06, "loss": 0.3634, "step": 24325 }, { "epoch": 0.5382167836035232, "grad_norm": 1.0305447578430176, "learning_rate": 8.801957226471323e-06, "loss": 0.3756, "step": 24330 }, { "epoch": 0.5383273912450365, "grad_norm": 0.7664341330528259, "learning_rate": 8.798507421177895e-06, "loss": 0.2678, "step": 24335 }, { "epoch": 0.5384379988865498, "grad_norm": 1.051929235458374, "learning_rate": 8.795057760964326e-06, "loss": 0.4392, "step": 24340 }, { "epoch": 0.538548606528063, "grad_norm": 2.3752129077911377, "learning_rate": 8.79160824624715e-06, "loss": 0.2824, "step": 24345 }, { "epoch": 0.5386592141695763, "grad_norm": 1.2375001907348633, "learning_rate": 8.788158877442906e-06, "loss": 0.3645, "step": 24350 }, { "epoch": 0.5387698218110896, "grad_norm": 1.7412059307098389, "learning_rate": 8.784709654968096e-06, "loss": 0.4674, "step": 24355 }, { "epoch": 0.5388804294526027, "grad_norm": 1.3887901306152344, "learning_rate": 8.781260579239216e-06, "loss": 0.4325, "step": 24360 }, { "epoch": 0.538991037094116, "grad_norm": 1.8993185758590698, "learning_rate": 8.777811650672746e-06, "loss": 0.3331, "step": 24365 }, { "epoch": 0.5391016447356293, "grad_norm": 0.9063121676445007, "learning_rate": 8.774362869685132e-06, "loss": 0.3285, "step": 24370 }, { "epoch": 0.5392122523771425, "grad_norm": 1.371351957321167, "learning_rate": 8.770914236692821e-06, "loss": 0.3862, "step": 24375 }, { "epoch": 0.5393228600186558, "grad_norm": 1.3992127180099487, "learning_rate": 8.767465752112232e-06, "loss": 0.3965, "step": 24380 }, { "epoch": 0.5394334676601691, "grad_norm": 1.2887202501296997, "learning_rate": 8.764017416359771e-06, "loss": 0.3084, "step": 24385 }, { "epoch": 0.5395440753016824, "grad_norm": 1.2788097858428955, "learning_rate": 8.760569229851817e-06, "loss": 0.2932, "step": 24390 }, { "epoch": 0.5396546829431956, "grad_norm": 1.0370982885360718, "learning_rate": 8.757121193004746e-06, "loss": 0.3426, "step": 24395 }, { "epoch": 0.5397652905847089, "grad_norm": 1.2485815286636353, "learning_rate": 8.753673306234903e-06, "loss": 0.4259, "step": 24400 }, { "epoch": 0.5398758982262222, "grad_norm": 3.5516226291656494, "learning_rate": 8.750225569958616e-06, "loss": 0.387, "step": 24405 }, { "epoch": 0.5399865058677353, "grad_norm": 2.3084421157836914, "learning_rate": 8.746777984592205e-06, "loss": 0.3727, "step": 24410 }, { "epoch": 0.5400971135092486, "grad_norm": 0.6205614805221558, "learning_rate": 8.74333055055196e-06, "loss": 0.2723, "step": 24415 }, { "epoch": 0.5402077211507619, "grad_norm": 0.798539936542511, "learning_rate": 8.739883268254156e-06, "loss": 0.2791, "step": 24420 }, { "epoch": 0.5403183287922751, "grad_norm": 0.9575374126434326, "learning_rate": 8.73643613811506e-06, "loss": 0.2224, "step": 24425 }, { "epoch": 0.5404289364337884, "grad_norm": 1.0316359996795654, "learning_rate": 8.732989160550901e-06, "loss": 0.3443, "step": 24430 }, { "epoch": 0.5405395440753017, "grad_norm": 1.308739423751831, "learning_rate": 8.729542335977907e-06, "loss": 0.3541, "step": 24435 }, { "epoch": 0.540650151716815, "grad_norm": 1.3755508661270142, "learning_rate": 8.726095664812281e-06, "loss": 0.2916, "step": 24440 }, { "epoch": 0.5407607593583282, "grad_norm": 1.636514663696289, "learning_rate": 8.722649147470204e-06, "loss": 0.4454, "step": 24445 }, { "epoch": 0.5408713669998415, "grad_norm": 2.0227279663085938, "learning_rate": 8.719202784367845e-06, "loss": 0.3094, "step": 24450 }, { "epoch": 0.5409819746413547, "grad_norm": 1.493470311164856, "learning_rate": 8.715756575921352e-06, "loss": 0.3732, "step": 24455 }, { "epoch": 0.5410925822828679, "grad_norm": 1.9708243608474731, "learning_rate": 8.712310522546848e-06, "loss": 0.332, "step": 24460 }, { "epoch": 0.5412031899243812, "grad_norm": 1.0285836458206177, "learning_rate": 8.70886462466045e-06, "loss": 0.2707, "step": 24465 }, { "epoch": 0.5413137975658945, "grad_norm": 1.0294547080993652, "learning_rate": 8.705418882678244e-06, "loss": 0.5046, "step": 24470 }, { "epoch": 0.5414244052074078, "grad_norm": 1.206700086593628, "learning_rate": 8.701973297016301e-06, "loss": 0.2981, "step": 24475 }, { "epoch": 0.541535012848921, "grad_norm": 1.1449922323226929, "learning_rate": 8.698527868090683e-06, "loss": 0.3858, "step": 24480 }, { "epoch": 0.5416456204904343, "grad_norm": 1.1103873252868652, "learning_rate": 8.695082596317417e-06, "loss": 0.3006, "step": 24485 }, { "epoch": 0.5417562281319476, "grad_norm": 1.6903793811798096, "learning_rate": 8.691637482112517e-06, "loss": 0.3744, "step": 24490 }, { "epoch": 0.5418668357734608, "grad_norm": 0.97975754737854, "learning_rate": 8.688192525891987e-06, "loss": 0.3601, "step": 24495 }, { "epoch": 0.5419774434149741, "grad_norm": 1.4394193887710571, "learning_rate": 8.684747728071798e-06, "loss": 0.3453, "step": 24500 }, { "epoch": 0.5420880510564873, "grad_norm": 1.6237475872039795, "learning_rate": 8.681303089067909e-06, "loss": 0.3755, "step": 24505 }, { "epoch": 0.5421986586980005, "grad_norm": 1.6561897993087769, "learning_rate": 8.677858609296266e-06, "loss": 0.2744, "step": 24510 }, { "epoch": 0.5423092663395138, "grad_norm": 1.0630911588668823, "learning_rate": 8.674414289172779e-06, "loss": 0.3279, "step": 24515 }, { "epoch": 0.5424198739810271, "grad_norm": 1.183573603630066, "learning_rate": 8.670970129113353e-06, "loss": 0.3357, "step": 24520 }, { "epoch": 0.5425304816225404, "grad_norm": 1.3006824254989624, "learning_rate": 8.667526129533871e-06, "loss": 0.3728, "step": 24525 }, { "epoch": 0.5426410892640536, "grad_norm": 0.9915875792503357, "learning_rate": 8.664082290850194e-06, "loss": 0.3089, "step": 24530 }, { "epoch": 0.5427516969055669, "grad_norm": 0.8773046135902405, "learning_rate": 8.660638613478158e-06, "loss": 0.3903, "step": 24535 }, { "epoch": 0.5428623045470802, "grad_norm": 1.3627104759216309, "learning_rate": 8.657195097833597e-06, "loss": 0.4196, "step": 24540 }, { "epoch": 0.5429729121885934, "grad_norm": 0.9003704786300659, "learning_rate": 8.653751744332307e-06, "loss": 0.3751, "step": 24545 }, { "epoch": 0.5430835198301066, "grad_norm": 0.9727431535720825, "learning_rate": 8.650308553390073e-06, "loss": 0.319, "step": 24550 }, { "epoch": 0.5431941274716199, "grad_norm": 0.9047697186470032, "learning_rate": 8.646865525422663e-06, "loss": 0.3939, "step": 24555 }, { "epoch": 0.5433047351131332, "grad_norm": 1.6943602561950684, "learning_rate": 8.643422660845815e-06, "loss": 0.4535, "step": 24560 }, { "epoch": 0.5434153427546464, "grad_norm": 1.0965203046798706, "learning_rate": 8.63997996007526e-06, "loss": 0.4357, "step": 24565 }, { "epoch": 0.5435259503961597, "grad_norm": 0.9748310446739197, "learning_rate": 8.636537423526701e-06, "loss": 0.3876, "step": 24570 }, { "epoch": 0.543636558037673, "grad_norm": 1.5440722703933716, "learning_rate": 8.633095051615818e-06, "loss": 0.224, "step": 24575 }, { "epoch": 0.5437471656791862, "grad_norm": 1.962633490562439, "learning_rate": 8.629652844758288e-06, "loss": 0.4189, "step": 24580 }, { "epoch": 0.5438577733206995, "grad_norm": 1.6353614330291748, "learning_rate": 8.626210803369745e-06, "loss": 0.3718, "step": 24585 }, { "epoch": 0.5439683809622128, "grad_norm": 1.387508749961853, "learning_rate": 8.622768927865817e-06, "loss": 0.2998, "step": 24590 }, { "epoch": 0.544078988603726, "grad_norm": 2.256545305252075, "learning_rate": 8.619327218662116e-06, "loss": 0.3294, "step": 24595 }, { "epoch": 0.5441895962452392, "grad_norm": 0.6323999762535095, "learning_rate": 8.61588567617422e-06, "loss": 0.389, "step": 24600 }, { "epoch": 0.5443002038867525, "grad_norm": 1.6492725610733032, "learning_rate": 8.612444300817694e-06, "loss": 0.4558, "step": 24605 }, { "epoch": 0.5444108115282658, "grad_norm": 1.58588445186615, "learning_rate": 8.609003093008092e-06, "loss": 0.3144, "step": 24610 }, { "epoch": 0.544521419169779, "grad_norm": 2.3801143169403076, "learning_rate": 8.60556205316093e-06, "loss": 0.4289, "step": 24615 }, { "epoch": 0.5446320268112923, "grad_norm": 1.3203998804092407, "learning_rate": 8.602121181691712e-06, "loss": 0.352, "step": 24620 }, { "epoch": 0.5447426344528056, "grad_norm": 1.2895649671554565, "learning_rate": 8.598680479015929e-06, "loss": 0.2619, "step": 24625 }, { "epoch": 0.5448532420943188, "grad_norm": 1.240532636642456, "learning_rate": 8.595239945549039e-06, "loss": 0.241, "step": 24630 }, { "epoch": 0.5449638497358321, "grad_norm": 1.8421332836151123, "learning_rate": 8.591799581706485e-06, "loss": 0.4818, "step": 24635 }, { "epoch": 0.5450744573773454, "grad_norm": 1.6726453304290771, "learning_rate": 8.588359387903697e-06, "loss": 0.2757, "step": 24640 }, { "epoch": 0.5451850650188586, "grad_norm": 1.2734150886535645, "learning_rate": 8.58491936455607e-06, "loss": 0.3645, "step": 24645 }, { "epoch": 0.5452956726603718, "grad_norm": 1.3459986448287964, "learning_rate": 8.581479512078988e-06, "loss": 0.2851, "step": 24650 }, { "epoch": 0.5454062803018851, "grad_norm": 1.4788997173309326, "learning_rate": 8.578039830887813e-06, "loss": 0.255, "step": 24655 }, { "epoch": 0.5455168879433984, "grad_norm": 1.5971554517745972, "learning_rate": 8.574600321397883e-06, "loss": 0.3644, "step": 24660 }, { "epoch": 0.5456274955849116, "grad_norm": 0.8272050023078918, "learning_rate": 8.571160984024522e-06, "loss": 0.2117, "step": 24665 }, { "epoch": 0.5457381032264249, "grad_norm": 0.8687194585800171, "learning_rate": 8.567721819183026e-06, "loss": 0.2819, "step": 24670 }, { "epoch": 0.5458487108679382, "grad_norm": 0.6983230113983154, "learning_rate": 8.56428282728867e-06, "loss": 0.4284, "step": 24675 }, { "epoch": 0.5459593185094515, "grad_norm": 1.4246197938919067, "learning_rate": 8.56084400875672e-06, "loss": 0.4546, "step": 24680 }, { "epoch": 0.5460699261509647, "grad_norm": 0.7104344367980957, "learning_rate": 8.557405364002403e-06, "loss": 0.2687, "step": 24685 }, { "epoch": 0.546180533792478, "grad_norm": 0.764265775680542, "learning_rate": 8.553966893440937e-06, "loss": 0.2722, "step": 24690 }, { "epoch": 0.5462911414339912, "grad_norm": 0.9984202980995178, "learning_rate": 8.550528597487523e-06, "loss": 0.379, "step": 24695 }, { "epoch": 0.5464017490755044, "grad_norm": 1.6036720275878906, "learning_rate": 8.547090476557325e-06, "loss": 0.3284, "step": 24700 }, { "epoch": 0.5465123567170177, "grad_norm": 0.7352926135063171, "learning_rate": 8.543652531065497e-06, "loss": 0.453, "step": 24705 }, { "epoch": 0.546622964358531, "grad_norm": 0.9411243796348572, "learning_rate": 8.540214761427174e-06, "loss": 0.4092, "step": 24710 }, { "epoch": 0.5467335720000442, "grad_norm": 1.225460171699524, "learning_rate": 8.536777168057464e-06, "loss": 0.3312, "step": 24715 }, { "epoch": 0.5468441796415575, "grad_norm": 1.3442599773406982, "learning_rate": 8.53333975137145e-06, "loss": 0.2461, "step": 24720 }, { "epoch": 0.5469547872830708, "grad_norm": 2.2635390758514404, "learning_rate": 8.529902511784208e-06, "loss": 0.3882, "step": 24725 }, { "epoch": 0.5470653949245841, "grad_norm": 1.6016833782196045, "learning_rate": 8.526465449710777e-06, "loss": 0.476, "step": 24730 }, { "epoch": 0.5471760025660973, "grad_norm": 1.2116672992706299, "learning_rate": 8.523028565566182e-06, "loss": 0.3843, "step": 24735 }, { "epoch": 0.5472866102076105, "grad_norm": 1.1628550291061401, "learning_rate": 8.51959185976543e-06, "loss": 0.4212, "step": 24740 }, { "epoch": 0.5473972178491238, "grad_norm": 1.7570438385009766, "learning_rate": 8.516155332723497e-06, "loss": 0.566, "step": 24745 }, { "epoch": 0.547507825490637, "grad_norm": 0.4435025155544281, "learning_rate": 8.512718984855343e-06, "loss": 0.2239, "step": 24750 }, { "epoch": 0.5476184331321503, "grad_norm": 1.0491281747817993, "learning_rate": 8.509282816575912e-06, "loss": 0.3358, "step": 24755 }, { "epoch": 0.5477290407736636, "grad_norm": 0.9929397106170654, "learning_rate": 8.505846828300112e-06, "loss": 0.3033, "step": 24760 }, { "epoch": 0.5478396484151768, "grad_norm": 1.6713629961013794, "learning_rate": 8.50241102044284e-06, "loss": 0.3881, "step": 24765 }, { "epoch": 0.5479502560566901, "grad_norm": 1.5083547830581665, "learning_rate": 8.498975393418975e-06, "loss": 0.3066, "step": 24770 }, { "epoch": 0.5480608636982034, "grad_norm": 1.3526766300201416, "learning_rate": 8.495539947643358e-06, "loss": 0.3311, "step": 24775 }, { "epoch": 0.5481714713397167, "grad_norm": 1.2516013383865356, "learning_rate": 8.492104683530828e-06, "loss": 0.3374, "step": 24780 }, { "epoch": 0.5482820789812299, "grad_norm": 1.0233802795410156, "learning_rate": 8.488669601496184e-06, "loss": 0.3243, "step": 24785 }, { "epoch": 0.5483926866227431, "grad_norm": 1.7902966737747192, "learning_rate": 8.485234701954212e-06, "loss": 0.4255, "step": 24790 }, { "epoch": 0.5485032942642564, "grad_norm": 1.3859626054763794, "learning_rate": 8.481799985319681e-06, "loss": 0.4092, "step": 24795 }, { "epoch": 0.5486139019057696, "grad_norm": 1.439314365386963, "learning_rate": 8.478365452007327e-06, "loss": 0.4763, "step": 24800 }, { "epoch": 0.5487245095472829, "grad_norm": 2.196855306625366, "learning_rate": 8.474931102431868e-06, "loss": 0.428, "step": 24805 }, { "epoch": 0.5488351171887962, "grad_norm": 1.1772027015686035, "learning_rate": 8.471496937008005e-06, "loss": 0.2366, "step": 24810 }, { "epoch": 0.5489457248303095, "grad_norm": 0.7705940008163452, "learning_rate": 8.468062956150409e-06, "loss": 0.4284, "step": 24815 }, { "epoch": 0.5490563324718227, "grad_norm": 1.1791057586669922, "learning_rate": 8.464629160273729e-06, "loss": 0.414, "step": 24820 }, { "epoch": 0.549166940113336, "grad_norm": 0.7792350649833679, "learning_rate": 8.461195549792605e-06, "loss": 0.3546, "step": 24825 }, { "epoch": 0.5492775477548493, "grad_norm": 0.990820586681366, "learning_rate": 8.457762125121637e-06, "loss": 0.2931, "step": 24830 }, { "epoch": 0.5493881553963624, "grad_norm": 0.9815987944602966, "learning_rate": 8.454328886675407e-06, "loss": 0.3755, "step": 24835 }, { "epoch": 0.5494987630378757, "grad_norm": 0.7963159680366516, "learning_rate": 8.450895834868487e-06, "loss": 0.3866, "step": 24840 }, { "epoch": 0.549609370679389, "grad_norm": 1.8628522157669067, "learning_rate": 8.44746297011541e-06, "loss": 0.2214, "step": 24845 }, { "epoch": 0.5497199783209022, "grad_norm": 0.701928436756134, "learning_rate": 8.444030292830692e-06, "loss": 0.3022, "step": 24850 }, { "epoch": 0.5498305859624155, "grad_norm": 1.3590070009231567, "learning_rate": 8.440597803428833e-06, "loss": 0.2485, "step": 24855 }, { "epoch": 0.5499411936039288, "grad_norm": 1.1068168878555298, "learning_rate": 8.437165502324304e-06, "loss": 0.2252, "step": 24860 }, { "epoch": 0.5500518012454421, "grad_norm": 1.2505803108215332, "learning_rate": 8.433733389931548e-06, "loss": 0.404, "step": 24865 }, { "epoch": 0.5501624088869553, "grad_norm": 1.868930459022522, "learning_rate": 8.430301466665003e-06, "loss": 0.3914, "step": 24870 }, { "epoch": 0.5502730165284686, "grad_norm": 0.9787955284118652, "learning_rate": 8.426869732939065e-06, "loss": 0.4401, "step": 24875 }, { "epoch": 0.5503836241699819, "grad_norm": 1.4737046957015991, "learning_rate": 8.423438189168112e-06, "loss": 0.3099, "step": 24880 }, { "epoch": 0.550494231811495, "grad_norm": 1.149775505065918, "learning_rate": 8.420006835766508e-06, "loss": 0.2437, "step": 24885 }, { "epoch": 0.5506048394530083, "grad_norm": 0.8848971724510193, "learning_rate": 8.41657567314858e-06, "loss": 0.3794, "step": 24890 }, { "epoch": 0.5507154470945216, "grad_norm": 1.13786780834198, "learning_rate": 8.413144701728654e-06, "loss": 0.3922, "step": 24895 }, { "epoch": 0.5508260547360349, "grad_norm": 0.9995834827423096, "learning_rate": 8.409713921921008e-06, "loss": 0.2625, "step": 24900 }, { "epoch": 0.5509366623775481, "grad_norm": 0.9817618131637573, "learning_rate": 8.406283334139905e-06, "loss": 0.4353, "step": 24905 }, { "epoch": 0.5510472700190614, "grad_norm": 1.1142560243606567, "learning_rate": 8.402852938799598e-06, "loss": 0.3708, "step": 24910 }, { "epoch": 0.5511578776605747, "grad_norm": 1.1999337673187256, "learning_rate": 8.399422736314297e-06, "loss": 0.4019, "step": 24915 }, { "epoch": 0.5512684853020879, "grad_norm": 1.5033395290374756, "learning_rate": 8.395992727098202e-06, "loss": 0.3555, "step": 24920 }, { "epoch": 0.5513790929436012, "grad_norm": 0.9519023895263672, "learning_rate": 8.392562911565487e-06, "loss": 0.3579, "step": 24925 }, { "epoch": 0.5514897005851144, "grad_norm": 0.3888709247112274, "learning_rate": 8.389133290130295e-06, "loss": 0.1754, "step": 24930 }, { "epoch": 0.5516003082266276, "grad_norm": 1.8954445123672485, "learning_rate": 8.385703863206755e-06, "loss": 0.3491, "step": 24935 }, { "epoch": 0.5517109158681409, "grad_norm": 0.9214086532592773, "learning_rate": 8.382274631208976e-06, "loss": 0.4412, "step": 24940 }, { "epoch": 0.5518215235096542, "grad_norm": 0.9612589478492737, "learning_rate": 8.378845594551025e-06, "loss": 0.3019, "step": 24945 }, { "epoch": 0.5519321311511675, "grad_norm": 1.7342525720596313, "learning_rate": 8.375416753646962e-06, "loss": 0.4514, "step": 24950 }, { "epoch": 0.5520427387926807, "grad_norm": 0.6358898282051086, "learning_rate": 8.371988108910823e-06, "loss": 0.2197, "step": 24955 }, { "epoch": 0.552153346434194, "grad_norm": 1.4559730291366577, "learning_rate": 8.368559660756612e-06, "loss": 0.4282, "step": 24960 }, { "epoch": 0.5522639540757073, "grad_norm": 1.0497801303863525, "learning_rate": 8.365131409598308e-06, "loss": 0.3809, "step": 24965 }, { "epoch": 0.5523745617172205, "grad_norm": 0.9998264908790588, "learning_rate": 8.361703355849881e-06, "loss": 0.4298, "step": 24970 }, { "epoch": 0.5524851693587338, "grad_norm": 1.593156099319458, "learning_rate": 8.358275499925262e-06, "loss": 0.439, "step": 24975 }, { "epoch": 0.552595777000247, "grad_norm": 1.8398417234420776, "learning_rate": 8.35484784223836e-06, "loss": 0.3285, "step": 24980 }, { "epoch": 0.5527063846417603, "grad_norm": 1.7461555004119873, "learning_rate": 8.351420383203073e-06, "loss": 0.4327, "step": 24985 }, { "epoch": 0.5528169922832735, "grad_norm": 0.9903586506843567, "learning_rate": 8.347993123233256e-06, "loss": 0.4339, "step": 24990 }, { "epoch": 0.5529275999247868, "grad_norm": 1.5553539991378784, "learning_rate": 8.344566062742758e-06, "loss": 0.4037, "step": 24995 }, { "epoch": 0.5530382075663001, "grad_norm": 0.8647812008857727, "learning_rate": 8.341139202145389e-06, "loss": 0.4559, "step": 25000 }, { "epoch": 0.5531488152078133, "grad_norm": 1.0335423946380615, "learning_rate": 8.337712541854941e-06, "loss": 0.3712, "step": 25005 }, { "epoch": 0.5532594228493266, "grad_norm": 0.9071877598762512, "learning_rate": 8.33428608228519e-06, "loss": 0.4164, "step": 25010 }, { "epoch": 0.5533700304908399, "grad_norm": 1.0208159685134888, "learning_rate": 8.330859823849872e-06, "loss": 0.2307, "step": 25015 }, { "epoch": 0.5534806381323532, "grad_norm": 1.0459496974945068, "learning_rate": 8.327433766962709e-06, "loss": 0.2404, "step": 25020 }, { "epoch": 0.5535912457738663, "grad_norm": 1.011378526687622, "learning_rate": 8.324007912037401e-06, "loss": 0.266, "step": 25025 }, { "epoch": 0.5537018534153796, "grad_norm": 2.3749043941497803, "learning_rate": 8.32058225948761e-06, "loss": 0.4037, "step": 25030 }, { "epoch": 0.5538124610568929, "grad_norm": 1.6683255434036255, "learning_rate": 8.317156809726987e-06, "loss": 0.4476, "step": 25035 }, { "epoch": 0.5539230686984061, "grad_norm": 0.9549310207366943, "learning_rate": 8.313731563169158e-06, "loss": 0.4611, "step": 25040 }, { "epoch": 0.5540336763399194, "grad_norm": 1.0527803897857666, "learning_rate": 8.310306520227716e-06, "loss": 0.4561, "step": 25045 }, { "epoch": 0.5541442839814327, "grad_norm": 1.6235427856445312, "learning_rate": 8.30688168131623e-06, "loss": 0.2591, "step": 25050 }, { "epoch": 0.5542548916229459, "grad_norm": 1.0042117834091187, "learning_rate": 8.303457046848257e-06, "loss": 0.3439, "step": 25055 }, { "epoch": 0.5543654992644592, "grad_norm": 1.3837625980377197, "learning_rate": 8.300032617237316e-06, "loss": 0.3398, "step": 25060 }, { "epoch": 0.5544761069059725, "grad_norm": 1.2936017513275146, "learning_rate": 8.296608392896903e-06, "loss": 0.4254, "step": 25065 }, { "epoch": 0.5545867145474856, "grad_norm": 1.3968124389648438, "learning_rate": 8.2931843742405e-06, "loss": 0.3396, "step": 25070 }, { "epoch": 0.5546973221889989, "grad_norm": 1.2470189332962036, "learning_rate": 8.289760561681547e-06, "loss": 0.2736, "step": 25075 }, { "epoch": 0.5548079298305122, "grad_norm": 2.214484214782715, "learning_rate": 8.286336955633471e-06, "loss": 0.3988, "step": 25080 }, { "epoch": 0.5549185374720255, "grad_norm": 0.5368916392326355, "learning_rate": 8.282913556509678e-06, "loss": 0.3118, "step": 25085 }, { "epoch": 0.5550291451135387, "grad_norm": 1.0815619230270386, "learning_rate": 8.279490364723538e-06, "loss": 0.2099, "step": 25090 }, { "epoch": 0.555139752755052, "grad_norm": 1.1333744525909424, "learning_rate": 8.276067380688394e-06, "loss": 0.2743, "step": 25095 }, { "epoch": 0.5552503603965653, "grad_norm": 0.9932513236999512, "learning_rate": 8.272644604817579e-06, "loss": 0.3077, "step": 25100 }, { "epoch": 0.5553609680380786, "grad_norm": 0.9429784417152405, "learning_rate": 8.269222037524385e-06, "loss": 0.3121, "step": 25105 }, { "epoch": 0.5554715756795918, "grad_norm": 0.9261471629142761, "learning_rate": 8.265799679222097e-06, "loss": 0.1915, "step": 25110 }, { "epoch": 0.5555821833211051, "grad_norm": 1.1388553380966187, "learning_rate": 8.262377530323953e-06, "loss": 0.4364, "step": 25115 }, { "epoch": 0.5556927909626183, "grad_norm": 1.1651133298873901, "learning_rate": 8.258955591243177e-06, "loss": 0.3465, "step": 25120 }, { "epoch": 0.5558033986041315, "grad_norm": 1.5410710573196411, "learning_rate": 8.255533862392976e-06, "loss": 0.4124, "step": 25125 }, { "epoch": 0.5559140062456448, "grad_norm": 0.9766947627067566, "learning_rate": 8.252112344186513e-06, "loss": 0.3455, "step": 25130 }, { "epoch": 0.5560246138871581, "grad_norm": 1.1199291944503784, "learning_rate": 8.248691037036939e-06, "loss": 0.3281, "step": 25135 }, { "epoch": 0.5561352215286713, "grad_norm": 1.1331062316894531, "learning_rate": 8.245269941357378e-06, "loss": 0.2957, "step": 25140 }, { "epoch": 0.5562458291701846, "grad_norm": 0.7883870601654053, "learning_rate": 8.241849057560924e-06, "loss": 0.3208, "step": 25145 }, { "epoch": 0.5563564368116979, "grad_norm": 2.0817673206329346, "learning_rate": 8.238428386060643e-06, "loss": 0.3279, "step": 25150 }, { "epoch": 0.5564670444532112, "grad_norm": 2.1897196769714355, "learning_rate": 8.235007927269592e-06, "loss": 0.3402, "step": 25155 }, { "epoch": 0.5565776520947244, "grad_norm": 0.7983014583587646, "learning_rate": 8.231587681600779e-06, "loss": 0.2254, "step": 25160 }, { "epoch": 0.5566882597362376, "grad_norm": 1.2541654109954834, "learning_rate": 8.2281676494672e-06, "loss": 0.3432, "step": 25165 }, { "epoch": 0.5567988673777509, "grad_norm": 0.8745633959770203, "learning_rate": 8.224747831281829e-06, "loss": 0.4686, "step": 25170 }, { "epoch": 0.5569094750192641, "grad_norm": 1.2540884017944336, "learning_rate": 8.221328227457601e-06, "loss": 0.2851, "step": 25175 }, { "epoch": 0.5570200826607774, "grad_norm": 0.48377525806427, "learning_rate": 8.217908838407433e-06, "loss": 0.3396, "step": 25180 }, { "epoch": 0.5571306903022907, "grad_norm": 1.4431205987930298, "learning_rate": 8.21448966454422e-06, "loss": 0.3441, "step": 25185 }, { "epoch": 0.557241297943804, "grad_norm": 1.3489586114883423, "learning_rate": 8.211070706280823e-06, "loss": 0.2734, "step": 25190 }, { "epoch": 0.5573519055853172, "grad_norm": 1.0392094850540161, "learning_rate": 8.207651964030075e-06, "loss": 0.3226, "step": 25195 }, { "epoch": 0.5574625132268305, "grad_norm": 1.9156605005264282, "learning_rate": 8.204233438204798e-06, "loss": 0.3399, "step": 25200 }, { "epoch": 0.5575731208683438, "grad_norm": 0.9993060827255249, "learning_rate": 8.200815129217768e-06, "loss": 0.3927, "step": 25205 }, { "epoch": 0.557683728509857, "grad_norm": 1.449025273323059, "learning_rate": 8.197397037481756e-06, "loss": 0.3154, "step": 25210 }, { "epoch": 0.5577943361513702, "grad_norm": 0.9137487411499023, "learning_rate": 8.193979163409487e-06, "loss": 0.1943, "step": 25215 }, { "epoch": 0.5579049437928835, "grad_norm": 1.139998435974121, "learning_rate": 8.190561507413667e-06, "loss": 0.3379, "step": 25220 }, { "epoch": 0.5580155514343967, "grad_norm": 1.1319469213485718, "learning_rate": 8.187144069906985e-06, "loss": 0.3777, "step": 25225 }, { "epoch": 0.55812615907591, "grad_norm": 1.1846075057983398, "learning_rate": 8.18372685130209e-06, "loss": 0.2845, "step": 25230 }, { "epoch": 0.5582367667174233, "grad_norm": 0.9386315941810608, "learning_rate": 8.180309852011607e-06, "loss": 0.2638, "step": 25235 }, { "epoch": 0.5583473743589366, "grad_norm": 1.7913885116577148, "learning_rate": 8.176893072448148e-06, "loss": 0.3377, "step": 25240 }, { "epoch": 0.5584579820004498, "grad_norm": 1.1067136526107788, "learning_rate": 8.173476513024278e-06, "loss": 0.2958, "step": 25245 }, { "epoch": 0.5585685896419631, "grad_norm": 1.175897479057312, "learning_rate": 8.170060174152548e-06, "loss": 0.2999, "step": 25250 }, { "epoch": 0.5586791972834764, "grad_norm": 0.5218721628189087, "learning_rate": 8.166644056245484e-06, "loss": 0.3236, "step": 25255 }, { "epoch": 0.5587898049249895, "grad_norm": 0.9576560854911804, "learning_rate": 8.163228159715577e-06, "loss": 0.2982, "step": 25260 }, { "epoch": 0.5589004125665028, "grad_norm": 1.096712350845337, "learning_rate": 8.159812484975294e-06, "loss": 0.342, "step": 25265 }, { "epoch": 0.5590110202080161, "grad_norm": 1.7886834144592285, "learning_rate": 8.156397032437083e-06, "loss": 0.4551, "step": 25270 }, { "epoch": 0.5591216278495293, "grad_norm": 0.8955926299095154, "learning_rate": 8.152981802513354e-06, "loss": 0.3361, "step": 25275 }, { "epoch": 0.5592322354910426, "grad_norm": 1.5144034624099731, "learning_rate": 8.149566795616493e-06, "loss": 0.3127, "step": 25280 }, { "epoch": 0.5593428431325559, "grad_norm": 2.787433624267578, "learning_rate": 8.146152012158869e-06, "loss": 0.3417, "step": 25285 }, { "epoch": 0.5594534507740692, "grad_norm": 0.9506085515022278, "learning_rate": 8.14273745255281e-06, "loss": 0.3761, "step": 25290 }, { "epoch": 0.5595640584155824, "grad_norm": 1.8414489030838013, "learning_rate": 8.13932311721062e-06, "loss": 0.2446, "step": 25295 }, { "epoch": 0.5596746660570957, "grad_norm": 1.237174391746521, "learning_rate": 8.135909006544587e-06, "loss": 0.3652, "step": 25300 }, { "epoch": 0.559785273698609, "grad_norm": 1.0143439769744873, "learning_rate": 8.13249512096696e-06, "loss": 0.3545, "step": 25305 }, { "epoch": 0.5598958813401221, "grad_norm": 1.3219130039215088, "learning_rate": 8.129081460889962e-06, "loss": 0.4206, "step": 25310 }, { "epoch": 0.5600064889816354, "grad_norm": 1.3463822603225708, "learning_rate": 8.125668026725795e-06, "loss": 0.4785, "step": 25315 }, { "epoch": 0.5601170966231487, "grad_norm": 1.2466976642608643, "learning_rate": 8.122254818886628e-06, "loss": 0.2977, "step": 25320 }, { "epoch": 0.560227704264662, "grad_norm": 1.6560250520706177, "learning_rate": 8.118841837784608e-06, "loss": 0.373, "step": 25325 }, { "epoch": 0.5603383119061752, "grad_norm": 0.8458669781684875, "learning_rate": 8.115429083831848e-06, "loss": 0.2278, "step": 25330 }, { "epoch": 0.5604489195476885, "grad_norm": 1.0096412897109985, "learning_rate": 8.112016557440436e-06, "loss": 0.289, "step": 25335 }, { "epoch": 0.5605595271892018, "grad_norm": 1.0722039937973022, "learning_rate": 8.10860425902244e-06, "loss": 0.5067, "step": 25340 }, { "epoch": 0.560670134830715, "grad_norm": 0.6981964111328125, "learning_rate": 8.105192188989886e-06, "loss": 0.181, "step": 25345 }, { "epoch": 0.5607807424722283, "grad_norm": 1.4869279861450195, "learning_rate": 8.101780347754783e-06, "loss": 0.2556, "step": 25350 }, { "epoch": 0.5608913501137415, "grad_norm": 0.9024001955986023, "learning_rate": 8.098368735729116e-06, "loss": 0.252, "step": 25355 }, { "epoch": 0.5610019577552547, "grad_norm": 1.0420411825180054, "learning_rate": 8.094957353324825e-06, "loss": 0.3575, "step": 25360 }, { "epoch": 0.561112565396768, "grad_norm": 1.4906069040298462, "learning_rate": 8.091546200953842e-06, "loss": 0.2943, "step": 25365 }, { "epoch": 0.5612231730382813, "grad_norm": 0.8491356372833252, "learning_rate": 8.08813527902806e-06, "loss": 0.2756, "step": 25370 }, { "epoch": 0.5613337806797946, "grad_norm": 1.0474365949630737, "learning_rate": 8.084724587959348e-06, "loss": 0.4769, "step": 25375 }, { "epoch": 0.5614443883213078, "grad_norm": 0.7196621894836426, "learning_rate": 8.081314128159539e-06, "loss": 0.2726, "step": 25380 }, { "epoch": 0.5615549959628211, "grad_norm": 2.455230712890625, "learning_rate": 8.077903900040456e-06, "loss": 0.3441, "step": 25385 }, { "epoch": 0.5616656036043344, "grad_norm": 1.678383231163025, "learning_rate": 8.074493904013875e-06, "loss": 0.3686, "step": 25390 }, { "epoch": 0.5617762112458476, "grad_norm": 0.8484804034233093, "learning_rate": 8.071084140491552e-06, "loss": 0.3563, "step": 25395 }, { "epoch": 0.5618868188873609, "grad_norm": 0.9408839344978333, "learning_rate": 8.067674609885221e-06, "loss": 0.2394, "step": 25400 }, { "epoch": 0.5619974265288741, "grad_norm": 1.2873724699020386, "learning_rate": 8.06426531260658e-06, "loss": 0.3285, "step": 25405 }, { "epoch": 0.5621080341703873, "grad_norm": 1.4565943479537964, "learning_rate": 8.060856249067291e-06, "loss": 0.408, "step": 25410 }, { "epoch": 0.5622186418119006, "grad_norm": 1.9340167045593262, "learning_rate": 8.05744741967901e-06, "loss": 0.3334, "step": 25415 }, { "epoch": 0.5623292494534139, "grad_norm": 3.364346742630005, "learning_rate": 8.054038824853347e-06, "loss": 0.343, "step": 25420 }, { "epoch": 0.5624398570949272, "grad_norm": 1.0482200384140015, "learning_rate": 8.050630465001887e-06, "loss": 0.2923, "step": 25425 }, { "epoch": 0.5625504647364404, "grad_norm": 1.0121819972991943, "learning_rate": 8.04722234053619e-06, "loss": 0.3957, "step": 25430 }, { "epoch": 0.5626610723779537, "grad_norm": 0.8397381901741028, "learning_rate": 8.043814451867784e-06, "loss": 0.3457, "step": 25435 }, { "epoch": 0.562771680019467, "grad_norm": 1.101967453956604, "learning_rate": 8.040406799408177e-06, "loss": 0.4007, "step": 25440 }, { "epoch": 0.5628822876609803, "grad_norm": 1.4096956253051758, "learning_rate": 8.036999383568836e-06, "loss": 0.4237, "step": 25445 }, { "epoch": 0.5629928953024934, "grad_norm": 1.3028652667999268, "learning_rate": 8.033592204761205e-06, "loss": 0.377, "step": 25450 }, { "epoch": 0.5631035029440067, "grad_norm": 1.8878597021102905, "learning_rate": 8.030185263396705e-06, "loss": 0.2502, "step": 25455 }, { "epoch": 0.56321411058552, "grad_norm": 0.9651315808296204, "learning_rate": 8.026778559886719e-06, "loss": 0.34, "step": 25460 }, { "epoch": 0.5633247182270332, "grad_norm": 0.890400230884552, "learning_rate": 8.023372094642603e-06, "loss": 0.5062, "step": 25465 }, { "epoch": 0.5634353258685465, "grad_norm": 0.5164498686790466, "learning_rate": 8.019965868075694e-06, "loss": 0.344, "step": 25470 }, { "epoch": 0.5635459335100598, "grad_norm": 1.2493772506713867, "learning_rate": 8.016559880597287e-06, "loss": 0.4238, "step": 25475 }, { "epoch": 0.563656541151573, "grad_norm": 1.292244791984558, "learning_rate": 8.013154132618653e-06, "loss": 0.4646, "step": 25480 }, { "epoch": 0.5637671487930863, "grad_norm": 0.9623879790306091, "learning_rate": 8.009748624551043e-06, "loss": 0.3722, "step": 25485 }, { "epoch": 0.5638777564345996, "grad_norm": 2.1860835552215576, "learning_rate": 8.006343356805662e-06, "loss": 0.3004, "step": 25490 }, { "epoch": 0.5639883640761129, "grad_norm": 2.0612778663635254, "learning_rate": 8.002938329793697e-06, "loss": 0.3858, "step": 25495 }, { "epoch": 0.564098971717626, "grad_norm": 1.3165439367294312, "learning_rate": 7.99953354392631e-06, "loss": 0.4325, "step": 25500 }, { "epoch": 0.5642095793591393, "grad_norm": 0.7055957913398743, "learning_rate": 7.996128999614621e-06, "loss": 0.3974, "step": 25505 }, { "epoch": 0.5643201870006526, "grad_norm": 1.1641567945480347, "learning_rate": 7.992724697269727e-06, "loss": 0.2779, "step": 25510 }, { "epoch": 0.5644307946421658, "grad_norm": 1.4573760032653809, "learning_rate": 7.989320637302705e-06, "loss": 0.3233, "step": 25515 }, { "epoch": 0.5645414022836791, "grad_norm": 1.5810638666152954, "learning_rate": 7.985916820124589e-06, "loss": 0.3366, "step": 25520 }, { "epoch": 0.5646520099251924, "grad_norm": 1.086431860923767, "learning_rate": 7.982513246146383e-06, "loss": 0.2705, "step": 25525 }, { "epoch": 0.5647626175667056, "grad_norm": 0.986234724521637, "learning_rate": 7.979109915779076e-06, "loss": 0.2956, "step": 25530 }, { "epoch": 0.5648732252082189, "grad_norm": 1.0161489248275757, "learning_rate": 7.975706829433614e-06, "loss": 0.3539, "step": 25535 }, { "epoch": 0.5649838328497322, "grad_norm": 0.8442850112915039, "learning_rate": 7.972303987520924e-06, "loss": 0.3131, "step": 25540 }, { "epoch": 0.5650944404912454, "grad_norm": 1.217244267463684, "learning_rate": 7.968901390451894e-06, "loss": 0.4115, "step": 25545 }, { "epoch": 0.5652050481327586, "grad_norm": 1.8653535842895508, "learning_rate": 7.965499038637385e-06, "loss": 0.3825, "step": 25550 }, { "epoch": 0.5653156557742719, "grad_norm": 0.8724094033241272, "learning_rate": 7.962096932488236e-06, "loss": 0.3985, "step": 25555 }, { "epoch": 0.5654262634157852, "grad_norm": 1.3203833103179932, "learning_rate": 7.958695072415244e-06, "loss": 0.3748, "step": 25560 }, { "epoch": 0.5655368710572984, "grad_norm": 0.8279010653495789, "learning_rate": 7.955293458829185e-06, "loss": 0.3097, "step": 25565 }, { "epoch": 0.5656474786988117, "grad_norm": 1.580322504043579, "learning_rate": 7.951892092140805e-06, "loss": 0.325, "step": 25570 }, { "epoch": 0.565758086340325, "grad_norm": 1.296364426612854, "learning_rate": 7.948490972760814e-06, "loss": 0.2556, "step": 25575 }, { "epoch": 0.5658686939818383, "grad_norm": 1.626991629600525, "learning_rate": 7.945090101099896e-06, "loss": 0.3588, "step": 25580 }, { "epoch": 0.5659793016233515, "grad_norm": 1.7481521368026733, "learning_rate": 7.941689477568711e-06, "loss": 0.2518, "step": 25585 }, { "epoch": 0.5660899092648648, "grad_norm": 1.2125145196914673, "learning_rate": 7.938289102577878e-06, "loss": 0.3602, "step": 25590 }, { "epoch": 0.566200516906378, "grad_norm": 1.9396299123764038, "learning_rate": 7.93488897653799e-06, "loss": 0.5141, "step": 25595 }, { "epoch": 0.5663111245478912, "grad_norm": 1.558049201965332, "learning_rate": 7.931489099859618e-06, "loss": 0.4068, "step": 25600 }, { "epoch": 0.5664217321894045, "grad_norm": 1.6014087200164795, "learning_rate": 7.92808947295329e-06, "loss": 0.3705, "step": 25605 }, { "epoch": 0.5665323398309178, "grad_norm": 1.506324291229248, "learning_rate": 7.924690096229509e-06, "loss": 0.3759, "step": 25610 }, { "epoch": 0.566642947472431, "grad_norm": 1.289455533027649, "learning_rate": 7.921290970098756e-06, "loss": 0.2911, "step": 25615 }, { "epoch": 0.5667535551139443, "grad_norm": 0.8725370168685913, "learning_rate": 7.91789209497147e-06, "loss": 0.3972, "step": 25620 }, { "epoch": 0.5668641627554576, "grad_norm": 1.0020122528076172, "learning_rate": 7.914493471258057e-06, "loss": 0.1946, "step": 25625 }, { "epoch": 0.5669747703969709, "grad_norm": 1.3077763319015503, "learning_rate": 7.911095099368912e-06, "loss": 0.3087, "step": 25630 }, { "epoch": 0.5670853780384841, "grad_norm": 0.6943287253379822, "learning_rate": 7.907696979714385e-06, "loss": 0.3164, "step": 25635 }, { "epoch": 0.5671959856799973, "grad_norm": 0.7708203792572021, "learning_rate": 7.904299112704788e-06, "loss": 0.4342, "step": 25640 }, { "epoch": 0.5673065933215106, "grad_norm": 1.3509708642959595, "learning_rate": 7.900901498750421e-06, "loss": 0.4583, "step": 25645 }, { "epoch": 0.5674172009630238, "grad_norm": 1.6295381784439087, "learning_rate": 7.897504138261543e-06, "loss": 0.2918, "step": 25650 }, { "epoch": 0.5675278086045371, "grad_norm": 0.6440744996070862, "learning_rate": 7.894107031648389e-06, "loss": 0.3567, "step": 25655 }, { "epoch": 0.5676384162460504, "grad_norm": 1.4368571043014526, "learning_rate": 7.890710179321149e-06, "loss": 0.3816, "step": 25660 }, { "epoch": 0.5677490238875637, "grad_norm": 1.0283771753311157, "learning_rate": 7.887313581689995e-06, "loss": 0.6022, "step": 25665 }, { "epoch": 0.5678596315290769, "grad_norm": 0.957869291305542, "learning_rate": 7.883917239165073e-06, "loss": 0.3223, "step": 25670 }, { "epoch": 0.5679702391705902, "grad_norm": 0.8480997085571289, "learning_rate": 7.88052115215648e-06, "loss": 0.3771, "step": 25675 }, { "epoch": 0.5680808468121035, "grad_norm": 0.9484270215034485, "learning_rate": 7.877125321074296e-06, "loss": 0.3106, "step": 25680 }, { "epoch": 0.5681914544536167, "grad_norm": 1.3135820627212524, "learning_rate": 7.87372974632857e-06, "loss": 0.3609, "step": 25685 }, { "epoch": 0.5683020620951299, "grad_norm": 0.9675602316856384, "learning_rate": 7.870334428329313e-06, "loss": 0.3059, "step": 25690 }, { "epoch": 0.5684126697366432, "grad_norm": 1.7946677207946777, "learning_rate": 7.866939367486505e-06, "loss": 0.3579, "step": 25695 }, { "epoch": 0.5685232773781564, "grad_norm": 0.9205043315887451, "learning_rate": 7.863544564210109e-06, "loss": 0.3432, "step": 25700 }, { "epoch": 0.5686338850196697, "grad_norm": 1.0317959785461426, "learning_rate": 7.860150018910037e-06, "loss": 0.3014, "step": 25705 }, { "epoch": 0.568744492661183, "grad_norm": 1.2980808019638062, "learning_rate": 7.856755731996182e-06, "loss": 0.4381, "step": 25710 }, { "epoch": 0.5688551003026963, "grad_norm": 0.9680275321006775, "learning_rate": 7.853361703878406e-06, "loss": 0.3252, "step": 25715 }, { "epoch": 0.5689657079442095, "grad_norm": 1.8019872903823853, "learning_rate": 7.849967934966533e-06, "loss": 0.2355, "step": 25720 }, { "epoch": 0.5690763155857228, "grad_norm": 0.9488751292228699, "learning_rate": 7.846574425670359e-06, "loss": 0.3013, "step": 25725 }, { "epoch": 0.5691869232272361, "grad_norm": 1.6048754453659058, "learning_rate": 7.843181176399657e-06, "loss": 0.4647, "step": 25730 }, { "epoch": 0.5692975308687492, "grad_norm": 1.8502719402313232, "learning_rate": 7.839788187564153e-06, "loss": 0.4437, "step": 25735 }, { "epoch": 0.5694081385102625, "grad_norm": 1.3951536417007446, "learning_rate": 7.836395459573548e-06, "loss": 0.3526, "step": 25740 }, { "epoch": 0.5695187461517758, "grad_norm": 0.7964100241661072, "learning_rate": 7.833002992837517e-06, "loss": 0.2766, "step": 25745 }, { "epoch": 0.569629353793289, "grad_norm": 1.0255342721939087, "learning_rate": 7.829610787765697e-06, "loss": 0.5097, "step": 25750 }, { "epoch": 0.5697399614348023, "grad_norm": 1.4967635869979858, "learning_rate": 7.8262188447677e-06, "loss": 0.3771, "step": 25755 }, { "epoch": 0.5698505690763156, "grad_norm": 0.6057402491569519, "learning_rate": 7.8228271642531e-06, "loss": 0.4377, "step": 25760 }, { "epoch": 0.5699611767178289, "grad_norm": 1.679445743560791, "learning_rate": 7.819435746631438e-06, "loss": 0.4745, "step": 25765 }, { "epoch": 0.5700717843593421, "grad_norm": 1.157857894897461, "learning_rate": 7.816044592312232e-06, "loss": 0.4325, "step": 25770 }, { "epoch": 0.5701823920008554, "grad_norm": 1.1626803874969482, "learning_rate": 7.812653701704957e-06, "loss": 0.2069, "step": 25775 }, { "epoch": 0.5702929996423687, "grad_norm": 1.8229470252990723, "learning_rate": 7.809263075219066e-06, "loss": 0.2545, "step": 25780 }, { "epoch": 0.5704036072838818, "grad_norm": 1.0439046621322632, "learning_rate": 7.805872713263977e-06, "loss": 0.462, "step": 25785 }, { "epoch": 0.5705142149253951, "grad_norm": 1.564125418663025, "learning_rate": 7.802482616249072e-06, "loss": 0.475, "step": 25790 }, { "epoch": 0.5706248225669084, "grad_norm": 0.8464411497116089, "learning_rate": 7.799092784583703e-06, "loss": 0.2731, "step": 25795 }, { "epoch": 0.5707354302084217, "grad_norm": 1.5215181112289429, "learning_rate": 7.795703218677197e-06, "loss": 0.2793, "step": 25800 }, { "epoch": 0.5708460378499349, "grad_norm": 0.5253804326057434, "learning_rate": 7.79231391893884e-06, "loss": 0.2557, "step": 25805 }, { "epoch": 0.5709566454914482, "grad_norm": 1.7275768518447876, "learning_rate": 7.788924885777884e-06, "loss": 0.3284, "step": 25810 }, { "epoch": 0.5710672531329615, "grad_norm": 0.7475365400314331, "learning_rate": 7.785536119603563e-06, "loss": 0.3515, "step": 25815 }, { "epoch": 0.5711778607744747, "grad_norm": 1.4789986610412598, "learning_rate": 7.782147620825062e-06, "loss": 0.5026, "step": 25820 }, { "epoch": 0.571288468415988, "grad_norm": 1.1067230701446533, "learning_rate": 7.778759389851542e-06, "loss": 0.3599, "step": 25825 }, { "epoch": 0.5713990760575012, "grad_norm": 1.5113378763198853, "learning_rate": 7.775371427092138e-06, "loss": 0.2739, "step": 25830 }, { "epoch": 0.5715096836990144, "grad_norm": 1.645276427268982, "learning_rate": 7.771983732955939e-06, "loss": 0.3774, "step": 25835 }, { "epoch": 0.5716202913405277, "grad_norm": 0.9040472507476807, "learning_rate": 7.768596307852002e-06, "loss": 0.4057, "step": 25840 }, { "epoch": 0.571730898982041, "grad_norm": 0.9782158136367798, "learning_rate": 7.765209152189372e-06, "loss": 0.2181, "step": 25845 }, { "epoch": 0.5718415066235543, "grad_norm": 1.6796963214874268, "learning_rate": 7.76182226637704e-06, "loss": 0.3245, "step": 25850 }, { "epoch": 0.5719521142650675, "grad_norm": 0.8019821643829346, "learning_rate": 7.758435650823966e-06, "loss": 0.3763, "step": 25855 }, { "epoch": 0.5720627219065808, "grad_norm": 2.279747724533081, "learning_rate": 7.755049305939088e-06, "loss": 0.2782, "step": 25860 }, { "epoch": 0.5721733295480941, "grad_norm": 0.8884673118591309, "learning_rate": 7.751663232131306e-06, "loss": 0.2725, "step": 25865 }, { "epoch": 0.5722839371896074, "grad_norm": 1.0522749423980713, "learning_rate": 7.748277429809492e-06, "loss": 0.2068, "step": 25870 }, { "epoch": 0.5723945448311205, "grad_norm": 0.9245744347572327, "learning_rate": 7.744891899382471e-06, "loss": 0.374, "step": 25875 }, { "epoch": 0.5725051524726338, "grad_norm": 1.6931103467941284, "learning_rate": 7.74150664125905e-06, "loss": 0.3808, "step": 25880 }, { "epoch": 0.572615760114147, "grad_norm": 1.010098934173584, "learning_rate": 7.738121655848001e-06, "loss": 0.2842, "step": 25885 }, { "epoch": 0.5727263677556603, "grad_norm": 0.9292153120040894, "learning_rate": 7.734736943558055e-06, "loss": 0.4172, "step": 25890 }, { "epoch": 0.5728369753971736, "grad_norm": 1.159869909286499, "learning_rate": 7.731352504797912e-06, "loss": 0.3386, "step": 25895 }, { "epoch": 0.5729475830386869, "grad_norm": 1.4256865978240967, "learning_rate": 7.72796833997625e-06, "loss": 0.3353, "step": 25900 }, { "epoch": 0.5730581906802001, "grad_norm": 1.5140025615692139, "learning_rate": 7.724584449501701e-06, "loss": 0.318, "step": 25905 }, { "epoch": 0.5731687983217134, "grad_norm": 1.2716355323791504, "learning_rate": 7.72120083378287e-06, "loss": 0.3819, "step": 25910 }, { "epoch": 0.5732794059632267, "grad_norm": 0.6826084852218628, "learning_rate": 7.717817493228328e-06, "loss": 0.321, "step": 25915 }, { "epoch": 0.57339001360474, "grad_norm": 1.3404425382614136, "learning_rate": 7.71443442824661e-06, "loss": 0.263, "step": 25920 }, { "epoch": 0.5735006212462531, "grad_norm": 0.7052589058876038, "learning_rate": 7.711051639246222e-06, "loss": 0.3889, "step": 25925 }, { "epoch": 0.5736112288877664, "grad_norm": 1.4532887935638428, "learning_rate": 7.707669126635636e-06, "loss": 0.2455, "step": 25930 }, { "epoch": 0.5737218365292797, "grad_norm": 0.8736953139305115, "learning_rate": 7.704286890823287e-06, "loss": 0.2915, "step": 25935 }, { "epoch": 0.5738324441707929, "grad_norm": 2.673733711242676, "learning_rate": 7.700904932217577e-06, "loss": 0.3906, "step": 25940 }, { "epoch": 0.5739430518123062, "grad_norm": 1.482399821281433, "learning_rate": 7.697523251226884e-06, "loss": 0.2771, "step": 25945 }, { "epoch": 0.5740536594538195, "grad_norm": 1.208821177482605, "learning_rate": 7.69414184825954e-06, "loss": 0.4062, "step": 25950 }, { "epoch": 0.5741642670953327, "grad_norm": 1.100634217262268, "learning_rate": 7.690760723723842e-06, "loss": 0.3471, "step": 25955 }, { "epoch": 0.574274874736846, "grad_norm": 1.2726978063583374, "learning_rate": 7.68737987802807e-06, "loss": 0.2734, "step": 25960 }, { "epoch": 0.5743854823783593, "grad_norm": 0.9835228323936462, "learning_rate": 7.683999311580458e-06, "loss": 0.4491, "step": 25965 }, { "epoch": 0.5744960900198725, "grad_norm": 1.6264044046401978, "learning_rate": 7.680619024789203e-06, "loss": 0.3672, "step": 25970 }, { "epoch": 0.5746066976613857, "grad_norm": 1.1381083726882935, "learning_rate": 7.677239018062476e-06, "loss": 0.2865, "step": 25975 }, { "epoch": 0.574717305302899, "grad_norm": 0.7207798957824707, "learning_rate": 7.673859291808414e-06, "loss": 0.1794, "step": 25980 }, { "epoch": 0.5748279129444123, "grad_norm": 1.0514999628067017, "learning_rate": 7.670479846435121e-06, "loss": 0.4463, "step": 25985 }, { "epoch": 0.5749385205859255, "grad_norm": 1.0898711681365967, "learning_rate": 7.667100682350655e-06, "loss": 0.3691, "step": 25990 }, { "epoch": 0.5750491282274388, "grad_norm": 0.7785205245018005, "learning_rate": 7.663721799963054e-06, "loss": 0.2393, "step": 25995 }, { "epoch": 0.5751597358689521, "grad_norm": 1.654563307762146, "learning_rate": 7.66034319968032e-06, "loss": 0.4019, "step": 26000 }, { "epoch": 0.5752703435104654, "grad_norm": 1.4445736408233643, "learning_rate": 7.656964881910413e-06, "loss": 0.295, "step": 26005 }, { "epoch": 0.5753809511519786, "grad_norm": 0.5657484531402588, "learning_rate": 7.653586847061263e-06, "loss": 0.3373, "step": 26010 }, { "epoch": 0.5754915587934919, "grad_norm": 1.3921555280685425, "learning_rate": 7.650209095540773e-06, "loss": 0.364, "step": 26015 }, { "epoch": 0.5756021664350051, "grad_norm": 1.7990710735321045, "learning_rate": 7.6468316277568e-06, "loss": 0.4453, "step": 26020 }, { "epoch": 0.5757127740765183, "grad_norm": 1.3175753355026245, "learning_rate": 7.64345444411717e-06, "loss": 0.3424, "step": 26025 }, { "epoch": 0.5758233817180316, "grad_norm": 1.077695608139038, "learning_rate": 7.640077545029686e-06, "loss": 0.3848, "step": 26030 }, { "epoch": 0.5759339893595449, "grad_norm": 1.8740607500076294, "learning_rate": 7.6367009309021e-06, "loss": 0.3716, "step": 26035 }, { "epoch": 0.5760445970010581, "grad_norm": 0.8629442453384399, "learning_rate": 7.633324602142137e-06, "loss": 0.2973, "step": 26040 }, { "epoch": 0.5761552046425714, "grad_norm": 1.024283528327942, "learning_rate": 7.629948559157495e-06, "loss": 0.2276, "step": 26045 }, { "epoch": 0.5762658122840847, "grad_norm": 0.6161295175552368, "learning_rate": 7.626572802355823e-06, "loss": 0.336, "step": 26050 }, { "epoch": 0.576376419925598, "grad_norm": 1.0821622610092163, "learning_rate": 7.6231973321447395e-06, "loss": 0.3529, "step": 26055 }, { "epoch": 0.5764870275671112, "grad_norm": 1.4478814601898193, "learning_rate": 7.619822148931843e-06, "loss": 0.241, "step": 26060 }, { "epoch": 0.5765976352086244, "grad_norm": 1.3735946416854858, "learning_rate": 7.616447253124679e-06, "loss": 0.2136, "step": 26065 }, { "epoch": 0.5767082428501377, "grad_norm": 1.5168274641036987, "learning_rate": 7.613072645130761e-06, "loss": 0.2561, "step": 26070 }, { "epoch": 0.5768188504916509, "grad_norm": 1.3368991613388062, "learning_rate": 7.609698325357578e-06, "loss": 0.3747, "step": 26075 }, { "epoch": 0.5769294581331642, "grad_norm": 1.0932687520980835, "learning_rate": 7.6063242942125745e-06, "loss": 0.381, "step": 26080 }, { "epoch": 0.5770400657746775, "grad_norm": 1.0577361583709717, "learning_rate": 7.602950552103171e-06, "loss": 0.1766, "step": 26085 }, { "epoch": 0.5771506734161908, "grad_norm": 1.5229800939559937, "learning_rate": 7.599577099436737e-06, "loss": 0.2205, "step": 26090 }, { "epoch": 0.577261281057704, "grad_norm": 1.6175657510757446, "learning_rate": 7.596203936620619e-06, "loss": 0.3655, "step": 26095 }, { "epoch": 0.5773718886992173, "grad_norm": 1.1645375490188599, "learning_rate": 7.592831064062127e-06, "loss": 0.3701, "step": 26100 }, { "epoch": 0.5774824963407306, "grad_norm": 1.7113946676254272, "learning_rate": 7.589458482168532e-06, "loss": 0.3249, "step": 26105 }, { "epoch": 0.5775931039822438, "grad_norm": 1.1024349927902222, "learning_rate": 7.586086191347071e-06, "loss": 0.2365, "step": 26110 }, { "epoch": 0.577703711623757, "grad_norm": 0.727011501789093, "learning_rate": 7.582714192004955e-06, "loss": 0.3168, "step": 26115 }, { "epoch": 0.5778143192652703, "grad_norm": 1.2468069791793823, "learning_rate": 7.579342484549342e-06, "loss": 0.2942, "step": 26120 }, { "epoch": 0.5779249269067835, "grad_norm": 0.7105509042739868, "learning_rate": 7.575971069387368e-06, "loss": 0.3572, "step": 26125 }, { "epoch": 0.5780355345482968, "grad_norm": 1.22966468334198, "learning_rate": 7.5725999469261345e-06, "loss": 0.3797, "step": 26130 }, { "epoch": 0.5781461421898101, "grad_norm": 1.0931991338729858, "learning_rate": 7.569229117572698e-06, "loss": 0.24, "step": 26135 }, { "epoch": 0.5782567498313234, "grad_norm": 1.508395791053772, "learning_rate": 7.565858581734085e-06, "loss": 0.3341, "step": 26140 }, { "epoch": 0.5783673574728366, "grad_norm": 0.7557709813117981, "learning_rate": 7.562488339817293e-06, "loss": 0.3705, "step": 26145 }, { "epoch": 0.5784779651143499, "grad_norm": 1.4306650161743164, "learning_rate": 7.559118392229271e-06, "loss": 0.2674, "step": 26150 }, { "epoch": 0.5785885727558632, "grad_norm": 0.5380489230155945, "learning_rate": 7.555748739376939e-06, "loss": 0.1798, "step": 26155 }, { "epoch": 0.5786991803973763, "grad_norm": 1.4843422174453735, "learning_rate": 7.55237938166719e-06, "loss": 0.3697, "step": 26160 }, { "epoch": 0.5788097880388896, "grad_norm": 0.9254232048988342, "learning_rate": 7.549010319506865e-06, "loss": 0.3218, "step": 26165 }, { "epoch": 0.5789203956804029, "grad_norm": 0.9276363849639893, "learning_rate": 7.545641553302776e-06, "loss": 0.256, "step": 26170 }, { "epoch": 0.5790310033219161, "grad_norm": 1.1454998254776, "learning_rate": 7.5422730834617054e-06, "loss": 0.2033, "step": 26175 }, { "epoch": 0.5791416109634294, "grad_norm": 1.7386970520019531, "learning_rate": 7.538904910390395e-06, "loss": 0.3673, "step": 26180 }, { "epoch": 0.5792522186049427, "grad_norm": 0.9784799218177795, "learning_rate": 7.535537034495544e-06, "loss": 0.2217, "step": 26185 }, { "epoch": 0.579362826246456, "grad_norm": 0.7608324289321899, "learning_rate": 7.532169456183831e-06, "loss": 0.278, "step": 26190 }, { "epoch": 0.5794734338879692, "grad_norm": 1.9674432277679443, "learning_rate": 7.528802175861884e-06, "loss": 0.3285, "step": 26195 }, { "epoch": 0.5795840415294825, "grad_norm": 1.4456169605255127, "learning_rate": 7.525435193936306e-06, "loss": 0.3799, "step": 26200 }, { "epoch": 0.5796946491709958, "grad_norm": 1.6314435005187988, "learning_rate": 7.522068510813654e-06, "loss": 0.4792, "step": 26205 }, { "epoch": 0.5798052568125089, "grad_norm": 1.3556782007217407, "learning_rate": 7.5187021269004545e-06, "loss": 0.4046, "step": 26210 }, { "epoch": 0.5799158644540222, "grad_norm": 1.0424742698669434, "learning_rate": 7.515336042603204e-06, "loss": 0.4607, "step": 26215 }, { "epoch": 0.5800264720955355, "grad_norm": 0.9987685680389404, "learning_rate": 7.511970258328349e-06, "loss": 0.3874, "step": 26220 }, { "epoch": 0.5801370797370488, "grad_norm": 0.9974262714385986, "learning_rate": 7.508604774482306e-06, "loss": 0.4906, "step": 26225 }, { "epoch": 0.580247687378562, "grad_norm": 1.2355799674987793, "learning_rate": 7.505239591471463e-06, "loss": 0.2845, "step": 26230 }, { "epoch": 0.5803582950200753, "grad_norm": 0.8021748065948486, "learning_rate": 7.5018747097021596e-06, "loss": 0.2137, "step": 26235 }, { "epoch": 0.5804689026615886, "grad_norm": 1.1507755517959595, "learning_rate": 7.498510129580703e-06, "loss": 0.2558, "step": 26240 }, { "epoch": 0.5805795103031018, "grad_norm": 1.6761549711227417, "learning_rate": 7.495145851513372e-06, "loss": 0.3688, "step": 26245 }, { "epoch": 0.5806901179446151, "grad_norm": 0.6508427262306213, "learning_rate": 7.491781875906395e-06, "loss": 0.3257, "step": 26250 }, { "epoch": 0.5808007255861283, "grad_norm": 1.3093737363815308, "learning_rate": 7.488418203165972e-06, "loss": 0.4743, "step": 26255 }, { "epoch": 0.5809113332276415, "grad_norm": 2.684899091720581, "learning_rate": 7.485054833698273e-06, "loss": 0.4496, "step": 26260 }, { "epoch": 0.5810219408691548, "grad_norm": 1.0303727388381958, "learning_rate": 7.481691767909418e-06, "loss": 0.2981, "step": 26265 }, { "epoch": 0.5811325485106681, "grad_norm": 1.0103760957717896, "learning_rate": 7.478329006205489e-06, "loss": 0.3967, "step": 26270 }, { "epoch": 0.5812431561521814, "grad_norm": 0.8408295512199402, "learning_rate": 7.474966548992551e-06, "loss": 0.4176, "step": 26275 }, { "epoch": 0.5813537637936946, "grad_norm": 1.0991673469543457, "learning_rate": 7.4716043966766176e-06, "loss": 0.307, "step": 26280 }, { "epoch": 0.5814643714352079, "grad_norm": 2.436208963394165, "learning_rate": 7.468242549663657e-06, "loss": 0.3243, "step": 26285 }, { "epoch": 0.5815749790767212, "grad_norm": 1.374452829360962, "learning_rate": 7.464881008359621e-06, "loss": 0.3436, "step": 26290 }, { "epoch": 0.5816855867182344, "grad_norm": 1.0999287366867065, "learning_rate": 7.461519773170411e-06, "loss": 0.3052, "step": 26295 }, { "epoch": 0.5817961943597477, "grad_norm": 1.1146676540374756, "learning_rate": 7.458158844501901e-06, "loss": 0.1953, "step": 26300 }, { "epoch": 0.5819068020012609, "grad_norm": 0.9187739491462708, "learning_rate": 7.454798222759915e-06, "loss": 0.3557, "step": 26305 }, { "epoch": 0.5820174096427742, "grad_norm": 2.675835609436035, "learning_rate": 7.451437908350247e-06, "loss": 0.337, "step": 26310 }, { "epoch": 0.5821280172842874, "grad_norm": 1.0428698062896729, "learning_rate": 7.448077901678661e-06, "loss": 0.3867, "step": 26315 }, { "epoch": 0.5822386249258007, "grad_norm": 1.6026296615600586, "learning_rate": 7.444718203150869e-06, "loss": 0.2891, "step": 26320 }, { "epoch": 0.582349232567314, "grad_norm": 0.9433096051216125, "learning_rate": 7.441358813172554e-06, "loss": 0.2464, "step": 26325 }, { "epoch": 0.5824598402088272, "grad_norm": 1.2519150972366333, "learning_rate": 7.43799973214937e-06, "loss": 0.3629, "step": 26330 }, { "epoch": 0.5825704478503405, "grad_norm": 0.815462052822113, "learning_rate": 7.434640960486916e-06, "loss": 0.5026, "step": 26335 }, { "epoch": 0.5826810554918538, "grad_norm": 1.6502201557159424, "learning_rate": 7.431282498590764e-06, "loss": 0.4155, "step": 26340 }, { "epoch": 0.582791663133367, "grad_norm": 0.745641827583313, "learning_rate": 7.427924346866453e-06, "loss": 0.2592, "step": 26345 }, { "epoch": 0.5829022707748802, "grad_norm": 1.0502383708953857, "learning_rate": 7.4245665057194715e-06, "loss": 0.3119, "step": 26350 }, { "epoch": 0.5830128784163935, "grad_norm": 1.3653409481048584, "learning_rate": 7.4212089755552795e-06, "loss": 0.4573, "step": 26355 }, { "epoch": 0.5831234860579068, "grad_norm": 1.715455412864685, "learning_rate": 7.417851756779304e-06, "loss": 0.3094, "step": 26360 }, { "epoch": 0.58323409369942, "grad_norm": 0.7931005954742432, "learning_rate": 7.414494849796919e-06, "loss": 0.2608, "step": 26365 }, { "epoch": 0.5833447013409333, "grad_norm": 1.3173383474349976, "learning_rate": 7.411138255013473e-06, "loss": 0.391, "step": 26370 }, { "epoch": 0.5834553089824466, "grad_norm": 1.2870436906814575, "learning_rate": 7.407781972834281e-06, "loss": 0.3837, "step": 26375 }, { "epoch": 0.5835659166239598, "grad_norm": 1.5405027866363525, "learning_rate": 7.404426003664604e-06, "loss": 0.3858, "step": 26380 }, { "epoch": 0.5836765242654731, "grad_norm": 1.715132236480713, "learning_rate": 7.401070347909675e-06, "loss": 0.2747, "step": 26385 }, { "epoch": 0.5837871319069864, "grad_norm": 1.1497551202774048, "learning_rate": 7.3977150059746914e-06, "loss": 0.3061, "step": 26390 }, { "epoch": 0.5838977395484997, "grad_norm": 1.4309439659118652, "learning_rate": 7.3943599782648115e-06, "loss": 0.307, "step": 26395 }, { "epoch": 0.5840083471900128, "grad_norm": 1.0915881395339966, "learning_rate": 7.391005265185147e-06, "loss": 0.3545, "step": 26400 }, { "epoch": 0.5841189548315261, "grad_norm": 0.9134471416473389, "learning_rate": 7.3876508671407855e-06, "loss": 0.4306, "step": 26405 }, { "epoch": 0.5842295624730394, "grad_norm": 0.7167361974716187, "learning_rate": 7.384296784536764e-06, "loss": 0.2552, "step": 26410 }, { "epoch": 0.5843401701145526, "grad_norm": 1.1354670524597168, "learning_rate": 7.3809430177780925e-06, "loss": 0.4691, "step": 26415 }, { "epoch": 0.5844507777560659, "grad_norm": 1.4908353090286255, "learning_rate": 7.377589567269734e-06, "loss": 0.3345, "step": 26420 }, { "epoch": 0.5845613853975792, "grad_norm": 1.9780861139297485, "learning_rate": 7.374236433416615e-06, "loss": 0.4705, "step": 26425 }, { "epoch": 0.5846719930390925, "grad_norm": 0.9891182780265808, "learning_rate": 7.37088361662363e-06, "loss": 0.275, "step": 26430 }, { "epoch": 0.5847826006806057, "grad_norm": 1.1754040718078613, "learning_rate": 7.3675311172956275e-06, "loss": 0.2919, "step": 26435 }, { "epoch": 0.584893208322119, "grad_norm": 0.7495017051696777, "learning_rate": 7.364178935837418e-06, "loss": 0.4985, "step": 26440 }, { "epoch": 0.5850038159636322, "grad_norm": 1.0055111646652222, "learning_rate": 7.360827072653785e-06, "loss": 0.3301, "step": 26445 }, { "epoch": 0.5851144236051454, "grad_norm": 1.4785728454589844, "learning_rate": 7.357475528149456e-06, "loss": 0.297, "step": 26450 }, { "epoch": 0.5852250312466587, "grad_norm": 1.1529371738433838, "learning_rate": 7.354124302729131e-06, "loss": 0.2601, "step": 26455 }, { "epoch": 0.585335638888172, "grad_norm": 1.6557087898254395, "learning_rate": 7.350773396797475e-06, "loss": 0.5106, "step": 26460 }, { "epoch": 0.5854462465296852, "grad_norm": 1.0898329019546509, "learning_rate": 7.347422810759101e-06, "loss": 0.2424, "step": 26465 }, { "epoch": 0.5855568541711985, "grad_norm": 2.7238974571228027, "learning_rate": 7.344072545018595e-06, "loss": 0.3571, "step": 26470 }, { "epoch": 0.5856674618127118, "grad_norm": 0.8416616916656494, "learning_rate": 7.340722599980505e-06, "loss": 0.3151, "step": 26475 }, { "epoch": 0.5857780694542251, "grad_norm": 0.3797456920146942, "learning_rate": 7.337372976049331e-06, "loss": 0.4773, "step": 26480 }, { "epoch": 0.5858886770957383, "grad_norm": 1.3975237607955933, "learning_rate": 7.334023673629531e-06, "loss": 0.4018, "step": 26485 }, { "epoch": 0.5859992847372516, "grad_norm": 1.2891210317611694, "learning_rate": 7.330674693125549e-06, "loss": 0.4909, "step": 26490 }, { "epoch": 0.5861098923787648, "grad_norm": 1.790027141571045, "learning_rate": 7.327326034941764e-06, "loss": 0.3775, "step": 26495 }, { "epoch": 0.586220500020278, "grad_norm": 1.412735939025879, "learning_rate": 7.323977699482523e-06, "loss": 0.4518, "step": 26500 }, { "epoch": 0.5863311076617913, "grad_norm": 1.7174813747406006, "learning_rate": 7.320629687152141e-06, "loss": 0.3398, "step": 26505 }, { "epoch": 0.5864417153033046, "grad_norm": 0.8637757897377014, "learning_rate": 7.31728199835489e-06, "loss": 0.2613, "step": 26510 }, { "epoch": 0.5865523229448178, "grad_norm": 1.1112302541732788, "learning_rate": 7.313934633494997e-06, "loss": 0.1928, "step": 26515 }, { "epoch": 0.5866629305863311, "grad_norm": 1.484520435333252, "learning_rate": 7.310587592976661e-06, "loss": 0.3787, "step": 26520 }, { "epoch": 0.5867735382278444, "grad_norm": 1.3247549533843994, "learning_rate": 7.307240877204032e-06, "loss": 0.2468, "step": 26525 }, { "epoch": 0.5868841458693577, "grad_norm": 1.0969818830490112, "learning_rate": 7.303894486581231e-06, "loss": 0.4522, "step": 26530 }, { "epoch": 0.5869947535108709, "grad_norm": 1.2989603281021118, "learning_rate": 7.300548421512328e-06, "loss": 0.3982, "step": 26535 }, { "epoch": 0.5871053611523841, "grad_norm": 1.0030299425125122, "learning_rate": 7.297202682401358e-06, "loss": 0.2931, "step": 26540 }, { "epoch": 0.5872159687938974, "grad_norm": 1.281815528869629, "learning_rate": 7.293857269652324e-06, "loss": 0.2733, "step": 26545 }, { "epoch": 0.5873265764354106, "grad_norm": 0.9956468939781189, "learning_rate": 7.29051218366918e-06, "loss": 0.3326, "step": 26550 }, { "epoch": 0.5874371840769239, "grad_norm": 0.5369442701339722, "learning_rate": 7.287167424855841e-06, "loss": 0.3179, "step": 26555 }, { "epoch": 0.5875477917184372, "grad_norm": 1.211045503616333, "learning_rate": 7.283822993616194e-06, "loss": 0.2876, "step": 26560 }, { "epoch": 0.5876583993599505, "grad_norm": 0.7938262820243835, "learning_rate": 7.280478890354071e-06, "loss": 0.3416, "step": 26565 }, { "epoch": 0.5877690070014637, "grad_norm": 0.5630358457565308, "learning_rate": 7.277135115473271e-06, "loss": 0.2747, "step": 26570 }, { "epoch": 0.587879614642977, "grad_norm": 1.011197805404663, "learning_rate": 7.2737916693775614e-06, "loss": 0.2451, "step": 26575 }, { "epoch": 0.5879902222844903, "grad_norm": 1.0406162738800049, "learning_rate": 7.270448552470654e-06, "loss": 0.3763, "step": 26580 }, { "epoch": 0.5881008299260035, "grad_norm": 1.0775939226150513, "learning_rate": 7.267105765156229e-06, "loss": 0.3384, "step": 26585 }, { "epoch": 0.5882114375675167, "grad_norm": 1.2662125825881958, "learning_rate": 7.2637633078379365e-06, "loss": 0.4572, "step": 26590 }, { "epoch": 0.58832204520903, "grad_norm": 1.3693740367889404, "learning_rate": 7.260421180919369e-06, "loss": 0.4245, "step": 26595 }, { "epoch": 0.5884326528505432, "grad_norm": 1.1201125383377075, "learning_rate": 7.257079384804087e-06, "loss": 0.3657, "step": 26600 }, { "epoch": 0.5885432604920565, "grad_norm": 0.7924851179122925, "learning_rate": 7.253737919895613e-06, "loss": 0.3357, "step": 26605 }, { "epoch": 0.5886538681335698, "grad_norm": 1.2799700498580933, "learning_rate": 7.250396786597431e-06, "loss": 0.3671, "step": 26610 }, { "epoch": 0.5887644757750831, "grad_norm": 1.5117011070251465, "learning_rate": 7.2470559853129765e-06, "loss": 0.4117, "step": 26615 }, { "epoch": 0.5888750834165963, "grad_norm": 1.2035537958145142, "learning_rate": 7.2437155164456526e-06, "loss": 0.3598, "step": 26620 }, { "epoch": 0.5889856910581096, "grad_norm": 1.8079127073287964, "learning_rate": 7.240375380398821e-06, "loss": 0.3354, "step": 26625 }, { "epoch": 0.5890962986996229, "grad_norm": 1.4568604230880737, "learning_rate": 7.237035577575803e-06, "loss": 0.194, "step": 26630 }, { "epoch": 0.589206906341136, "grad_norm": 1.3759982585906982, "learning_rate": 7.233696108379875e-06, "loss": 0.2768, "step": 26635 }, { "epoch": 0.5893175139826493, "grad_norm": 0.9972688555717468, "learning_rate": 7.230356973214276e-06, "loss": 0.366, "step": 26640 }, { "epoch": 0.5894281216241626, "grad_norm": 1.948412537574768, "learning_rate": 7.227018172482214e-06, "loss": 0.3564, "step": 26645 }, { "epoch": 0.5895387292656759, "grad_norm": 1.5050365924835205, "learning_rate": 7.223679706586841e-06, "loss": 0.4357, "step": 26650 }, { "epoch": 0.5896493369071891, "grad_norm": 1.1497383117675781, "learning_rate": 7.220341575931274e-06, "loss": 0.4676, "step": 26655 }, { "epoch": 0.5897599445487024, "grad_norm": 1.424721360206604, "learning_rate": 7.217003780918598e-06, "loss": 0.4857, "step": 26660 }, { "epoch": 0.5898705521902157, "grad_norm": 1.0067975521087646, "learning_rate": 7.213666321951846e-06, "loss": 0.3556, "step": 26665 }, { "epoch": 0.5899811598317289, "grad_norm": 0.9062950015068054, "learning_rate": 7.210329199434013e-06, "loss": 0.3155, "step": 26670 }, { "epoch": 0.5900917674732422, "grad_norm": 0.717110276222229, "learning_rate": 7.206992413768063e-06, "loss": 0.2881, "step": 26675 }, { "epoch": 0.5902023751147554, "grad_norm": 1.0833592414855957, "learning_rate": 7.203655965356906e-06, "loss": 0.2891, "step": 26680 }, { "epoch": 0.5903129827562686, "grad_norm": 1.8575738668441772, "learning_rate": 7.200319854603417e-06, "loss": 0.4122, "step": 26685 }, { "epoch": 0.5904235903977819, "grad_norm": 1.159058690071106, "learning_rate": 7.196984081910437e-06, "loss": 0.298, "step": 26690 }, { "epoch": 0.5905341980392952, "grad_norm": 1.5216842889785767, "learning_rate": 7.193648647680752e-06, "loss": 0.3146, "step": 26695 }, { "epoch": 0.5906448056808085, "grad_norm": 0.9007142782211304, "learning_rate": 7.190313552317112e-06, "loss": 0.2321, "step": 26700 }, { "epoch": 0.5907554133223217, "grad_norm": 0.772156298160553, "learning_rate": 7.186978796222242e-06, "loss": 0.3088, "step": 26705 }, { "epoch": 0.590866020963835, "grad_norm": 1.4200588464736938, "learning_rate": 7.183644379798802e-06, "loss": 0.3253, "step": 26710 }, { "epoch": 0.5909766286053483, "grad_norm": 1.1094930171966553, "learning_rate": 7.180310303449421e-06, "loss": 0.3744, "step": 26715 }, { "epoch": 0.5910872362468615, "grad_norm": 1.3206818103790283, "learning_rate": 7.1769765675766925e-06, "loss": 0.4231, "step": 26720 }, { "epoch": 0.5911978438883748, "grad_norm": 1.0509768724441528, "learning_rate": 7.173643172583167e-06, "loss": 0.4984, "step": 26725 }, { "epoch": 0.591308451529888, "grad_norm": 1.5797677040100098, "learning_rate": 7.17031011887134e-06, "loss": 0.5062, "step": 26730 }, { "epoch": 0.5914190591714013, "grad_norm": 1.499584674835205, "learning_rate": 7.166977406843686e-06, "loss": 0.2113, "step": 26735 }, { "epoch": 0.5915296668129145, "grad_norm": 1.505356788635254, "learning_rate": 7.163645036902624e-06, "loss": 0.3623, "step": 26740 }, { "epoch": 0.5916402744544278, "grad_norm": 0.7673457264900208, "learning_rate": 7.1603130094505435e-06, "loss": 0.2834, "step": 26745 }, { "epoch": 0.5917508820959411, "grad_norm": 1.017520546913147, "learning_rate": 7.156981324889778e-06, "loss": 0.3484, "step": 26750 }, { "epoch": 0.5918614897374543, "grad_norm": 1.4547864198684692, "learning_rate": 7.1536499836226295e-06, "loss": 0.4646, "step": 26755 }, { "epoch": 0.5919720973789676, "grad_norm": 1.2247995138168335, "learning_rate": 7.150318986051361e-06, "loss": 0.4857, "step": 26760 }, { "epoch": 0.5920827050204809, "grad_norm": 1.4876009225845337, "learning_rate": 7.146988332578184e-06, "loss": 0.2511, "step": 26765 }, { "epoch": 0.5921933126619942, "grad_norm": 0.8212443590164185, "learning_rate": 7.143658023605273e-06, "loss": 0.2343, "step": 26770 }, { "epoch": 0.5923039203035073, "grad_norm": 1.4679373502731323, "learning_rate": 7.140328059534769e-06, "loss": 0.4143, "step": 26775 }, { "epoch": 0.5924145279450206, "grad_norm": 1.0805134773254395, "learning_rate": 7.136998440768757e-06, "loss": 0.4127, "step": 26780 }, { "epoch": 0.5925251355865339, "grad_norm": 1.2690606117248535, "learning_rate": 7.133669167709287e-06, "loss": 0.1416, "step": 26785 }, { "epoch": 0.5926357432280471, "grad_norm": 1.244911551475525, "learning_rate": 7.130340240758377e-06, "loss": 0.3614, "step": 26790 }, { "epoch": 0.5927463508695604, "grad_norm": 1.0433951616287231, "learning_rate": 7.127011660317984e-06, "loss": 0.3441, "step": 26795 }, { "epoch": 0.5928569585110737, "grad_norm": 2.3153018951416016, "learning_rate": 7.123683426790036e-06, "loss": 0.4113, "step": 26800 }, { "epoch": 0.592967566152587, "grad_norm": 2.058821678161621, "learning_rate": 7.120355540576418e-06, "loss": 0.4923, "step": 26805 }, { "epoch": 0.5930781737941002, "grad_norm": 1.7016676664352417, "learning_rate": 7.117028002078972e-06, "loss": 0.2957, "step": 26810 }, { "epoch": 0.5931887814356135, "grad_norm": 1.1016616821289062, "learning_rate": 7.113700811699491e-06, "loss": 0.4792, "step": 26815 }, { "epoch": 0.5932993890771268, "grad_norm": 1.2546608448028564, "learning_rate": 7.110373969839739e-06, "loss": 0.2978, "step": 26820 }, { "epoch": 0.5934099967186399, "grad_norm": 1.4683732986450195, "learning_rate": 7.10704747690143e-06, "loss": 0.3486, "step": 26825 }, { "epoch": 0.5935206043601532, "grad_norm": 0.9690577983856201, "learning_rate": 7.103721333286231e-06, "loss": 0.4522, "step": 26830 }, { "epoch": 0.5936312120016665, "grad_norm": 1.163960337638855, "learning_rate": 7.100395539395779e-06, "loss": 0.3162, "step": 26835 }, { "epoch": 0.5937418196431797, "grad_norm": 1.4604874849319458, "learning_rate": 7.097070095631659e-06, "loss": 0.2682, "step": 26840 }, { "epoch": 0.593852427284693, "grad_norm": 0.8852854371070862, "learning_rate": 7.0937450023954246e-06, "loss": 0.2823, "step": 26845 }, { "epoch": 0.5939630349262063, "grad_norm": 0.8832587003707886, "learning_rate": 7.090420260088571e-06, "loss": 0.4187, "step": 26850 }, { "epoch": 0.5940736425677196, "grad_norm": 1.0225024223327637, "learning_rate": 7.087095869112561e-06, "loss": 0.3784, "step": 26855 }, { "epoch": 0.5941842502092328, "grad_norm": 1.3357971906661987, "learning_rate": 7.08377182986882e-06, "loss": 0.3874, "step": 26860 }, { "epoch": 0.5942948578507461, "grad_norm": 2.1070754528045654, "learning_rate": 7.08044814275872e-06, "loss": 0.3711, "step": 26865 }, { "epoch": 0.5944054654922593, "grad_norm": 1.9137296676635742, "learning_rate": 7.077124808183593e-06, "loss": 0.2851, "step": 26870 }, { "epoch": 0.5945160731337725, "grad_norm": 0.8981866240501404, "learning_rate": 7.0738018265447375e-06, "loss": 0.485, "step": 26875 }, { "epoch": 0.5946266807752858, "grad_norm": 1.506633996963501, "learning_rate": 7.070479198243395e-06, "loss": 0.3811, "step": 26880 }, { "epoch": 0.5947372884167991, "grad_norm": 1.739877462387085, "learning_rate": 7.067156923680774e-06, "loss": 0.2836, "step": 26885 }, { "epoch": 0.5948478960583123, "grad_norm": 1.7828913927078247, "learning_rate": 7.0638350032580436e-06, "loss": 0.3569, "step": 26890 }, { "epoch": 0.5949585036998256, "grad_norm": 1.2114813327789307, "learning_rate": 7.06051343737632e-06, "loss": 0.4641, "step": 26895 }, { "epoch": 0.5950691113413389, "grad_norm": 0.9936716556549072, "learning_rate": 7.057192226436678e-06, "loss": 0.3071, "step": 26900 }, { "epoch": 0.5951797189828522, "grad_norm": 1.2068077325820923, "learning_rate": 7.05387137084016e-06, "loss": 0.3523, "step": 26905 }, { "epoch": 0.5952903266243654, "grad_norm": 1.2922370433807373, "learning_rate": 7.050550870987755e-06, "loss": 0.2554, "step": 26910 }, { "epoch": 0.5954009342658787, "grad_norm": 1.0338054895401, "learning_rate": 7.047230727280406e-06, "loss": 0.1971, "step": 26915 }, { "epoch": 0.5955115419073919, "grad_norm": 1.128940463066101, "learning_rate": 7.043910940119032e-06, "loss": 0.3382, "step": 26920 }, { "epoch": 0.5956221495489051, "grad_norm": 1.6386895179748535, "learning_rate": 7.040591509904489e-06, "loss": 0.3607, "step": 26925 }, { "epoch": 0.5957327571904184, "grad_norm": 1.1514850854873657, "learning_rate": 7.037272437037594e-06, "loss": 0.3139, "step": 26930 }, { "epoch": 0.5958433648319317, "grad_norm": 1.0921642780303955, "learning_rate": 7.033953721919131e-06, "loss": 0.3605, "step": 26935 }, { "epoch": 0.595953972473445, "grad_norm": 0.9486144185066223, "learning_rate": 7.0306353649498314e-06, "loss": 0.4358, "step": 26940 }, { "epoch": 0.5960645801149582, "grad_norm": 1.4082295894622803, "learning_rate": 7.027317366530381e-06, "loss": 0.3041, "step": 26945 }, { "epoch": 0.5961751877564715, "grad_norm": 1.1861480474472046, "learning_rate": 7.023999727061432e-06, "loss": 0.3926, "step": 26950 }, { "epoch": 0.5962857953979848, "grad_norm": 0.7745469808578491, "learning_rate": 7.0206824469435875e-06, "loss": 0.3324, "step": 26955 }, { "epoch": 0.596396403039498, "grad_norm": 1.2961653470993042, "learning_rate": 7.017365526577411e-06, "loss": 0.2081, "step": 26960 }, { "epoch": 0.5965070106810112, "grad_norm": 1.4407340288162231, "learning_rate": 7.014048966363414e-06, "loss": 0.3717, "step": 26965 }, { "epoch": 0.5966176183225245, "grad_norm": 1.684614658355713, "learning_rate": 7.010732766702071e-06, "loss": 0.4083, "step": 26970 }, { "epoch": 0.5967282259640377, "grad_norm": 0.9213442206382751, "learning_rate": 7.007416927993818e-06, "loss": 0.2608, "step": 26975 }, { "epoch": 0.596838833605551, "grad_norm": 0.8645250797271729, "learning_rate": 7.004101450639035e-06, "loss": 0.316, "step": 26980 }, { "epoch": 0.5969494412470643, "grad_norm": 1.0837551355361938, "learning_rate": 7.000786335038065e-06, "loss": 0.4556, "step": 26985 }, { "epoch": 0.5970600488885776, "grad_norm": 1.5790997743606567, "learning_rate": 6.997471581591213e-06, "loss": 0.2896, "step": 26990 }, { "epoch": 0.5971706565300908, "grad_norm": 1.1450124979019165, "learning_rate": 6.994157190698729e-06, "loss": 0.2538, "step": 26995 }, { "epoch": 0.5972812641716041, "grad_norm": 0.9293268322944641, "learning_rate": 6.990843162760823e-06, "loss": 0.2061, "step": 27000 }, { "epoch": 0.5973918718131174, "grad_norm": 0.7611274719238281, "learning_rate": 6.987529498177672e-06, "loss": 0.3242, "step": 27005 }, { "epoch": 0.5975024794546306, "grad_norm": 0.8561445474624634, "learning_rate": 6.984216197349392e-06, "loss": 0.2623, "step": 27010 }, { "epoch": 0.5976130870961438, "grad_norm": 1.024128794670105, "learning_rate": 6.980903260676062e-06, "loss": 0.3142, "step": 27015 }, { "epoch": 0.5977236947376571, "grad_norm": 1.1231871843338013, "learning_rate": 6.977590688557727e-06, "loss": 0.2846, "step": 27020 }, { "epoch": 0.5978343023791703, "grad_norm": 1.4964812994003296, "learning_rate": 6.974278481394372e-06, "loss": 0.3216, "step": 27025 }, { "epoch": 0.5979449100206836, "grad_norm": 1.2281216382980347, "learning_rate": 6.970966639585942e-06, "loss": 0.3183, "step": 27030 }, { "epoch": 0.5980555176621969, "grad_norm": 0.9395133852958679, "learning_rate": 6.967655163532348e-06, "loss": 0.2134, "step": 27035 }, { "epoch": 0.5981661253037102, "grad_norm": 1.3123165369033813, "learning_rate": 6.9643440536334485e-06, "loss": 0.2872, "step": 27040 }, { "epoch": 0.5982767329452234, "grad_norm": 1.1206308603286743, "learning_rate": 6.961033310289054e-06, "loss": 0.2117, "step": 27045 }, { "epoch": 0.5983873405867367, "grad_norm": 0.8957098722457886, "learning_rate": 6.9577229338989395e-06, "loss": 0.323, "step": 27050 }, { "epoch": 0.59849794822825, "grad_norm": 0.9369764924049377, "learning_rate": 6.95441292486283e-06, "loss": 0.291, "step": 27055 }, { "epoch": 0.5986085558697631, "grad_norm": 1.4574662446975708, "learning_rate": 6.951103283580414e-06, "loss": 0.418, "step": 27060 }, { "epoch": 0.5987191635112764, "grad_norm": 1.1478601694107056, "learning_rate": 6.947794010451323e-06, "loss": 0.5484, "step": 27065 }, { "epoch": 0.5988297711527897, "grad_norm": 1.5133671760559082, "learning_rate": 6.944485105875152e-06, "loss": 0.4919, "step": 27070 }, { "epoch": 0.598940378794303, "grad_norm": 0.9967584609985352, "learning_rate": 6.941176570251454e-06, "loss": 0.3989, "step": 27075 }, { "epoch": 0.5990509864358162, "grad_norm": 0.7060086131095886, "learning_rate": 6.937868403979728e-06, "loss": 0.3061, "step": 27080 }, { "epoch": 0.5991615940773295, "grad_norm": 1.3414355516433716, "learning_rate": 6.934560607459435e-06, "loss": 0.3789, "step": 27085 }, { "epoch": 0.5992722017188428, "grad_norm": 2.1811764240264893, "learning_rate": 6.931253181089998e-06, "loss": 0.3372, "step": 27090 }, { "epoch": 0.599382809360356, "grad_norm": 1.3159723281860352, "learning_rate": 6.927946125270777e-06, "loss": 0.4248, "step": 27095 }, { "epoch": 0.5994934170018693, "grad_norm": 1.1762381792068481, "learning_rate": 6.924639440401103e-06, "loss": 0.2544, "step": 27100 }, { "epoch": 0.5996040246433826, "grad_norm": 1.0914336442947388, "learning_rate": 6.921333126880261e-06, "loss": 0.2544, "step": 27105 }, { "epoch": 0.5997146322848957, "grad_norm": 1.050559639930725, "learning_rate": 6.9180271851074795e-06, "loss": 0.3533, "step": 27110 }, { "epoch": 0.599825239926409, "grad_norm": 1.0408759117126465, "learning_rate": 6.914721615481954e-06, "loss": 0.2858, "step": 27115 }, { "epoch": 0.5999358475679223, "grad_norm": 1.6133804321289062, "learning_rate": 6.911416418402833e-06, "loss": 0.3533, "step": 27120 }, { "epoch": 0.6000464552094356, "grad_norm": 1.6845154762268066, "learning_rate": 6.908111594269217e-06, "loss": 0.3702, "step": 27125 }, { "epoch": 0.6001570628509488, "grad_norm": 1.113060474395752, "learning_rate": 6.904807143480155e-06, "loss": 0.2693, "step": 27130 }, { "epoch": 0.6002676704924621, "grad_norm": 1.0001884698867798, "learning_rate": 6.901503066434669e-06, "loss": 0.3135, "step": 27135 }, { "epoch": 0.6003782781339754, "grad_norm": 0.8841904401779175, "learning_rate": 6.898199363531724e-06, "loss": 0.2826, "step": 27140 }, { "epoch": 0.6004888857754886, "grad_norm": 0.40823638439178467, "learning_rate": 6.894896035170231e-06, "loss": 0.2487, "step": 27145 }, { "epoch": 0.6005994934170019, "grad_norm": 1.1985905170440674, "learning_rate": 6.891593081749076e-06, "loss": 0.3392, "step": 27150 }, { "epoch": 0.6007101010585151, "grad_norm": 1.619117259979248, "learning_rate": 6.888290503667089e-06, "loss": 0.4657, "step": 27155 }, { "epoch": 0.6008207087000283, "grad_norm": 0.9767947793006897, "learning_rate": 6.884988301323048e-06, "loss": 0.3643, "step": 27160 }, { "epoch": 0.6009313163415416, "grad_norm": 1.0509936809539795, "learning_rate": 6.881686475115701e-06, "loss": 0.3291, "step": 27165 }, { "epoch": 0.6010419239830549, "grad_norm": 1.1533336639404297, "learning_rate": 6.8783850254437354e-06, "loss": 0.3573, "step": 27170 }, { "epoch": 0.6011525316245682, "grad_norm": 1.150682806968689, "learning_rate": 6.875083952705809e-06, "loss": 0.2962, "step": 27175 }, { "epoch": 0.6012631392660814, "grad_norm": 1.0391007661819458, "learning_rate": 6.8717832573005185e-06, "loss": 0.2927, "step": 27180 }, { "epoch": 0.6013737469075947, "grad_norm": 1.048856258392334, "learning_rate": 6.868482939626421e-06, "loss": 0.3637, "step": 27185 }, { "epoch": 0.601484354549108, "grad_norm": 1.650494933128357, "learning_rate": 6.865183000082035e-06, "loss": 0.3719, "step": 27190 }, { "epoch": 0.6015949621906213, "grad_norm": 0.9546661376953125, "learning_rate": 6.861883439065823e-06, "loss": 0.396, "step": 27195 }, { "epoch": 0.6017055698321345, "grad_norm": 2.2475829124450684, "learning_rate": 6.858584256976204e-06, "loss": 0.3629, "step": 27200 }, { "epoch": 0.6018161774736477, "grad_norm": 1.392386794090271, "learning_rate": 6.8552854542115585e-06, "loss": 0.2424, "step": 27205 }, { "epoch": 0.601926785115161, "grad_norm": 0.989754319190979, "learning_rate": 6.851987031170213e-06, "loss": 0.2332, "step": 27210 }, { "epoch": 0.6020373927566742, "grad_norm": 0.6581191420555115, "learning_rate": 6.848688988250448e-06, "loss": 0.314, "step": 27215 }, { "epoch": 0.6021480003981875, "grad_norm": 1.2895047664642334, "learning_rate": 6.845391325850509e-06, "loss": 0.4251, "step": 27220 }, { "epoch": 0.6022586080397008, "grad_norm": 1.0420743227005005, "learning_rate": 6.842094044368581e-06, "loss": 0.2715, "step": 27225 }, { "epoch": 0.602369215681214, "grad_norm": 1.0447388887405396, "learning_rate": 6.838797144202809e-06, "loss": 0.4388, "step": 27230 }, { "epoch": 0.6024798233227273, "grad_norm": 1.402748942375183, "learning_rate": 6.835500625751301e-06, "loss": 0.3659, "step": 27235 }, { "epoch": 0.6025904309642406, "grad_norm": 1.0271151065826416, "learning_rate": 6.8322044894121045e-06, "loss": 0.1763, "step": 27240 }, { "epoch": 0.6027010386057539, "grad_norm": 1.6137585639953613, "learning_rate": 6.828908735583223e-06, "loss": 0.3177, "step": 27245 }, { "epoch": 0.602811646247267, "grad_norm": 1.0883766412734985, "learning_rate": 6.825613364662625e-06, "loss": 0.3047, "step": 27250 }, { "epoch": 0.6029222538887803, "grad_norm": 1.4601318836212158, "learning_rate": 6.822318377048224e-06, "loss": 0.2614, "step": 27255 }, { "epoch": 0.6030328615302936, "grad_norm": 1.591260313987732, "learning_rate": 6.8190237731378826e-06, "loss": 0.3468, "step": 27260 }, { "epoch": 0.6031434691718068, "grad_norm": 1.3732342720031738, "learning_rate": 6.815729553329432e-06, "loss": 0.3927, "step": 27265 }, { "epoch": 0.6032540768133201, "grad_norm": 0.8235198259353638, "learning_rate": 6.812435718020644e-06, "loss": 0.3662, "step": 27270 }, { "epoch": 0.6033646844548334, "grad_norm": 1.3987069129943848, "learning_rate": 6.809142267609248e-06, "loss": 0.4852, "step": 27275 }, { "epoch": 0.6034752920963466, "grad_norm": 1.1368985176086426, "learning_rate": 6.805849202492927e-06, "loss": 0.3908, "step": 27280 }, { "epoch": 0.6035858997378599, "grad_norm": 1.417432188987732, "learning_rate": 6.802556523069317e-06, "loss": 0.2419, "step": 27285 }, { "epoch": 0.6036965073793732, "grad_norm": 1.1847953796386719, "learning_rate": 6.7992642297360115e-06, "loss": 0.4592, "step": 27290 }, { "epoch": 0.6038071150208865, "grad_norm": 0.9217110276222229, "learning_rate": 6.795972322890552e-06, "loss": 0.2259, "step": 27295 }, { "epoch": 0.6039177226623996, "grad_norm": 1.5473698377609253, "learning_rate": 6.792680802930432e-06, "loss": 0.4078, "step": 27300 }, { "epoch": 0.6040283303039129, "grad_norm": 1.6575210094451904, "learning_rate": 6.789389670253106e-06, "loss": 0.3227, "step": 27305 }, { "epoch": 0.6041389379454262, "grad_norm": 1.354051947593689, "learning_rate": 6.786098925255976e-06, "loss": 0.3248, "step": 27310 }, { "epoch": 0.6042495455869394, "grad_norm": 0.6759863495826721, "learning_rate": 6.782808568336395e-06, "loss": 0.3691, "step": 27315 }, { "epoch": 0.6043601532284527, "grad_norm": 1.5388673543930054, "learning_rate": 6.77951859989168e-06, "loss": 0.3956, "step": 27320 }, { "epoch": 0.604470760869966, "grad_norm": 1.1430777311325073, "learning_rate": 6.776229020319087e-06, "loss": 0.3894, "step": 27325 }, { "epoch": 0.6045813685114793, "grad_norm": 0.500103235244751, "learning_rate": 6.772939830015833e-06, "loss": 0.3864, "step": 27330 }, { "epoch": 0.6046919761529925, "grad_norm": 0.7139732241630554, "learning_rate": 6.7696510293790895e-06, "loss": 0.4037, "step": 27335 }, { "epoch": 0.6048025837945058, "grad_norm": 2.3660924434661865, "learning_rate": 6.766362618805978e-06, "loss": 0.5018, "step": 27340 }, { "epoch": 0.604913191436019, "grad_norm": 0.8030710220336914, "learning_rate": 6.763074598693564e-06, "loss": 0.2749, "step": 27345 }, { "epoch": 0.6050237990775322, "grad_norm": 1.4488402605056763, "learning_rate": 6.75978696943889e-06, "loss": 0.3265, "step": 27350 }, { "epoch": 0.6051344067190455, "grad_norm": 1.4229801893234253, "learning_rate": 6.756499731438927e-06, "loss": 0.4391, "step": 27355 }, { "epoch": 0.6052450143605588, "grad_norm": 0.7462282180786133, "learning_rate": 6.7532128850906055e-06, "loss": 0.4073, "step": 27360 }, { "epoch": 0.605355622002072, "grad_norm": 0.9412362575531006, "learning_rate": 6.749926430790818e-06, "loss": 0.2935, "step": 27365 }, { "epoch": 0.6054662296435853, "grad_norm": 0.7752172350883484, "learning_rate": 6.7466403689364015e-06, "loss": 0.2773, "step": 27370 }, { "epoch": 0.6055768372850986, "grad_norm": 1.058964490890503, "learning_rate": 6.743354699924141e-06, "loss": 0.3231, "step": 27375 }, { "epoch": 0.6056874449266119, "grad_norm": 1.0564684867858887, "learning_rate": 6.7400694241507855e-06, "loss": 0.218, "step": 27380 }, { "epoch": 0.6057980525681251, "grad_norm": 1.3022016286849976, "learning_rate": 6.736784542013029e-06, "loss": 0.3238, "step": 27385 }, { "epoch": 0.6059086602096383, "grad_norm": 1.0836176872253418, "learning_rate": 6.733500053907525e-06, "loss": 0.2368, "step": 27390 }, { "epoch": 0.6060192678511516, "grad_norm": 1.0436749458312988, "learning_rate": 6.730215960230867e-06, "loss": 0.2785, "step": 27395 }, { "epoch": 0.6061298754926648, "grad_norm": 1.2514735460281372, "learning_rate": 6.7269322613796105e-06, "loss": 0.24, "step": 27400 }, { "epoch": 0.6062404831341781, "grad_norm": 0.9289553165435791, "learning_rate": 6.723648957750266e-06, "loss": 0.2102, "step": 27405 }, { "epoch": 0.6063510907756914, "grad_norm": 2.1317577362060547, "learning_rate": 6.720366049739285e-06, "loss": 0.3289, "step": 27410 }, { "epoch": 0.6064616984172047, "grad_norm": 1.1054917573928833, "learning_rate": 6.717083537743079e-06, "loss": 0.3458, "step": 27415 }, { "epoch": 0.6065723060587179, "grad_norm": 1.1832911968231201, "learning_rate": 6.713801422158015e-06, "loss": 0.3435, "step": 27420 }, { "epoch": 0.6066829137002312, "grad_norm": 1.3088405132293701, "learning_rate": 6.710519703380403e-06, "loss": 0.3347, "step": 27425 }, { "epoch": 0.6067935213417445, "grad_norm": 1.1725879907608032, "learning_rate": 6.707238381806507e-06, "loss": 0.3787, "step": 27430 }, { "epoch": 0.6069041289832577, "grad_norm": 1.7553730010986328, "learning_rate": 6.703957457832554e-06, "loss": 0.317, "step": 27435 }, { "epoch": 0.6070147366247709, "grad_norm": 1.3514719009399414, "learning_rate": 6.700676931854706e-06, "loss": 0.3736, "step": 27440 }, { "epoch": 0.6071253442662842, "grad_norm": 0.8023115396499634, "learning_rate": 6.697396804269088e-06, "loss": 0.29, "step": 27445 }, { "epoch": 0.6072359519077974, "grad_norm": 0.6377990245819092, "learning_rate": 6.69411707547178e-06, "loss": 0.3336, "step": 27450 }, { "epoch": 0.6073465595493107, "grad_norm": 1.2622126340866089, "learning_rate": 6.690837745858804e-06, "loss": 0.2418, "step": 27455 }, { "epoch": 0.607457167190824, "grad_norm": 1.2608610391616821, "learning_rate": 6.687558815826132e-06, "loss": 0.478, "step": 27460 }, { "epoch": 0.6075677748323373, "grad_norm": 1.356764793395996, "learning_rate": 6.684280285769702e-06, "loss": 0.3338, "step": 27465 }, { "epoch": 0.6076783824738505, "grad_norm": 2.5823915004730225, "learning_rate": 6.681002156085395e-06, "loss": 0.4088, "step": 27470 }, { "epoch": 0.6077889901153638, "grad_norm": 0.7577986717224121, "learning_rate": 6.677724427169039e-06, "loss": 0.3236, "step": 27475 }, { "epoch": 0.6078995977568771, "grad_norm": 1.066488265991211, "learning_rate": 6.6744470994164224e-06, "loss": 0.3906, "step": 27480 }, { "epoch": 0.6080102053983902, "grad_norm": 0.975045919418335, "learning_rate": 6.671170173223284e-06, "loss": 0.4004, "step": 27485 }, { "epoch": 0.6081208130399035, "grad_norm": 0.775200366973877, "learning_rate": 6.667893648985304e-06, "loss": 0.2439, "step": 27490 }, { "epoch": 0.6082314206814168, "grad_norm": 0.9430120587348938, "learning_rate": 6.664617527098127e-06, "loss": 0.2236, "step": 27495 }, { "epoch": 0.60834202832293, "grad_norm": 1.3881973028182983, "learning_rate": 6.661341807957342e-06, "loss": 0.1352, "step": 27500 }, { "epoch": 0.6084526359644433, "grad_norm": 1.5773262977600098, "learning_rate": 6.658066491958497e-06, "loss": 0.4018, "step": 27505 }, { "epoch": 0.6085632436059566, "grad_norm": 1.578239917755127, "learning_rate": 6.654791579497077e-06, "loss": 0.5624, "step": 27510 }, { "epoch": 0.6086738512474699, "grad_norm": 1.1294728517532349, "learning_rate": 6.651517070968528e-06, "loss": 0.2977, "step": 27515 }, { "epoch": 0.6087844588889831, "grad_norm": 0.8706616163253784, "learning_rate": 6.6482429667682525e-06, "loss": 0.333, "step": 27520 }, { "epoch": 0.6088950665304964, "grad_norm": 0.986988365650177, "learning_rate": 6.64496926729159e-06, "loss": 0.52, "step": 27525 }, { "epoch": 0.6090056741720097, "grad_norm": 0.8310043811798096, "learning_rate": 6.641695972933842e-06, "loss": 0.316, "step": 27530 }, { "epoch": 0.6091162818135228, "grad_norm": 1.7085434198379517, "learning_rate": 6.638423084090258e-06, "loss": 0.3405, "step": 27535 }, { "epoch": 0.6092268894550361, "grad_norm": 0.9404575228691101, "learning_rate": 6.635150601156037e-06, "loss": 0.1858, "step": 27540 }, { "epoch": 0.6093374970965494, "grad_norm": 1.3546735048294067, "learning_rate": 6.631878524526331e-06, "loss": 0.3524, "step": 27545 }, { "epoch": 0.6094481047380627, "grad_norm": 1.5344270467758179, "learning_rate": 6.628606854596244e-06, "loss": 0.3629, "step": 27550 }, { "epoch": 0.6095587123795759, "grad_norm": 1.295107126235962, "learning_rate": 6.625335591760829e-06, "loss": 0.3674, "step": 27555 }, { "epoch": 0.6096693200210892, "grad_norm": 0.8587083220481873, "learning_rate": 6.622064736415082e-06, "loss": 0.348, "step": 27560 }, { "epoch": 0.6097799276626025, "grad_norm": 1.5261813402175903, "learning_rate": 6.618794288953968e-06, "loss": 0.28, "step": 27565 }, { "epoch": 0.6098905353041157, "grad_norm": 0.8958653807640076, "learning_rate": 6.61552424977239e-06, "loss": 0.2281, "step": 27570 }, { "epoch": 0.610001142945629, "grad_norm": 0.9495186805725098, "learning_rate": 6.612254619265199e-06, "loss": 0.2544, "step": 27575 }, { "epoch": 0.6101117505871422, "grad_norm": 1.5325895547866821, "learning_rate": 6.608985397827208e-06, "loss": 0.4549, "step": 27580 }, { "epoch": 0.6102223582286554, "grad_norm": 1.6201411485671997, "learning_rate": 6.605716585853173e-06, "loss": 0.3484, "step": 27585 }, { "epoch": 0.6103329658701687, "grad_norm": 1.238390564918518, "learning_rate": 6.6024481837377975e-06, "loss": 0.3047, "step": 27590 }, { "epoch": 0.610443573511682, "grad_norm": 1.0906894207000732, "learning_rate": 6.599180191875746e-06, "loss": 0.3058, "step": 27595 }, { "epoch": 0.6105541811531953, "grad_norm": 1.1122431755065918, "learning_rate": 6.595912610661623e-06, "loss": 0.3332, "step": 27600 }, { "epoch": 0.6106647887947085, "grad_norm": 1.3282710313796997, "learning_rate": 6.592645440489994e-06, "loss": 0.2865, "step": 27605 }, { "epoch": 0.6107753964362218, "grad_norm": 0.840687096118927, "learning_rate": 6.589378681755363e-06, "loss": 0.315, "step": 27610 }, { "epoch": 0.6108860040777351, "grad_norm": 1.2034064531326294, "learning_rate": 6.58611233485219e-06, "loss": 0.4256, "step": 27615 }, { "epoch": 0.6109966117192484, "grad_norm": 1.8909167051315308, "learning_rate": 6.582846400174892e-06, "loss": 0.3277, "step": 27620 }, { "epoch": 0.6111072193607616, "grad_norm": 1.3826580047607422, "learning_rate": 6.579580878117823e-06, "loss": 0.3887, "step": 27625 }, { "epoch": 0.6112178270022748, "grad_norm": 1.153316855430603, "learning_rate": 6.576315769075292e-06, "loss": 0.4119, "step": 27630 }, { "epoch": 0.611328434643788, "grad_norm": 0.6962083578109741, "learning_rate": 6.57305107344157e-06, "loss": 0.2717, "step": 27635 }, { "epoch": 0.6114390422853013, "grad_norm": 2.156353712081909, "learning_rate": 6.569786791610857e-06, "loss": 0.3586, "step": 27640 }, { "epoch": 0.6115496499268146, "grad_norm": 0.73172527551651, "learning_rate": 6.5665229239773184e-06, "loss": 0.2862, "step": 27645 }, { "epoch": 0.6116602575683279, "grad_norm": 1.381304144859314, "learning_rate": 6.563259470935069e-06, "loss": 0.3127, "step": 27650 }, { "epoch": 0.6117708652098411, "grad_norm": 1.3990477323532104, "learning_rate": 6.5599964328781644e-06, "loss": 0.4312, "step": 27655 }, { "epoch": 0.6118814728513544, "grad_norm": 1.1494650840759277, "learning_rate": 6.556733810200615e-06, "loss": 0.4267, "step": 27660 }, { "epoch": 0.6119920804928677, "grad_norm": 1.7322075366973877, "learning_rate": 6.5534716032963874e-06, "loss": 0.4734, "step": 27665 }, { "epoch": 0.612102688134381, "grad_norm": 0.4383908212184906, "learning_rate": 6.550209812559389e-06, "loss": 0.2716, "step": 27670 }, { "epoch": 0.6122132957758941, "grad_norm": 0.5835598707199097, "learning_rate": 6.546948438383474e-06, "loss": 0.3364, "step": 27675 }, { "epoch": 0.6123239034174074, "grad_norm": 1.0907939672470093, "learning_rate": 6.5436874811624604e-06, "loss": 0.4373, "step": 27680 }, { "epoch": 0.6124345110589207, "grad_norm": 1.065066933631897, "learning_rate": 6.540426941290107e-06, "loss": 0.3754, "step": 27685 }, { "epoch": 0.6125451187004339, "grad_norm": 1.395828366279602, "learning_rate": 6.537166819160117e-06, "loss": 0.3066, "step": 27690 }, { "epoch": 0.6126557263419472, "grad_norm": 0.9972161650657654, "learning_rate": 6.5339071151661556e-06, "loss": 0.2339, "step": 27695 }, { "epoch": 0.6127663339834605, "grad_norm": 0.6569359302520752, "learning_rate": 6.530647829701831e-06, "loss": 0.2248, "step": 27700 }, { "epoch": 0.6128769416249737, "grad_norm": 1.3374205827713013, "learning_rate": 6.527388963160693e-06, "loss": 0.4, "step": 27705 }, { "epoch": 0.612987549266487, "grad_norm": 1.41141676902771, "learning_rate": 6.524130515936256e-06, "loss": 0.3902, "step": 27710 }, { "epoch": 0.6130981569080003, "grad_norm": 0.640812337398529, "learning_rate": 6.520872488421973e-06, "loss": 0.1829, "step": 27715 }, { "epoch": 0.6132087645495136, "grad_norm": 1.363751769065857, "learning_rate": 6.517614881011256e-06, "loss": 0.402, "step": 27720 }, { "epoch": 0.6133193721910267, "grad_norm": 0.4835648238658905, "learning_rate": 6.514357694097453e-06, "loss": 0.3598, "step": 27725 }, { "epoch": 0.61342997983254, "grad_norm": 1.028367280960083, "learning_rate": 6.51110092807387e-06, "loss": 0.3969, "step": 27730 }, { "epoch": 0.6135405874740533, "grad_norm": 1.603834629058838, "learning_rate": 6.507844583333764e-06, "loss": 0.4515, "step": 27735 }, { "epoch": 0.6136511951155665, "grad_norm": 1.3496280908584595, "learning_rate": 6.504588660270333e-06, "loss": 0.267, "step": 27740 }, { "epoch": 0.6137618027570798, "grad_norm": 1.796839714050293, "learning_rate": 6.5013331592767295e-06, "loss": 0.2672, "step": 27745 }, { "epoch": 0.6138724103985931, "grad_norm": 0.897010326385498, "learning_rate": 6.49807808074606e-06, "loss": 0.3049, "step": 27750 }, { "epoch": 0.6139830180401064, "grad_norm": 1.9628256559371948, "learning_rate": 6.494823425071366e-06, "loss": 0.3327, "step": 27755 }, { "epoch": 0.6140936256816196, "grad_norm": 1.1355009078979492, "learning_rate": 6.491569192645648e-06, "loss": 0.4226, "step": 27760 }, { "epoch": 0.6142042333231329, "grad_norm": 1.8431564569473267, "learning_rate": 6.488315383861858e-06, "loss": 0.45, "step": 27765 }, { "epoch": 0.6143148409646461, "grad_norm": 1.137624740600586, "learning_rate": 6.4850619991128916e-06, "loss": 0.4533, "step": 27770 }, { "epoch": 0.6144254486061593, "grad_norm": 0.9690749049186707, "learning_rate": 6.481809038791589e-06, "loss": 0.3368, "step": 27775 }, { "epoch": 0.6145360562476726, "grad_norm": 1.3835264444351196, "learning_rate": 6.478556503290747e-06, "loss": 0.3133, "step": 27780 }, { "epoch": 0.6146466638891859, "grad_norm": 0.6687236428260803, "learning_rate": 6.475304393003111e-06, "loss": 0.3965, "step": 27785 }, { "epoch": 0.6147572715306991, "grad_norm": 0.7612988948822021, "learning_rate": 6.472052708321365e-06, "loss": 0.3659, "step": 27790 }, { "epoch": 0.6148678791722124, "grad_norm": 1.338745355606079, "learning_rate": 6.468801449638158e-06, "loss": 0.3786, "step": 27795 }, { "epoch": 0.6149784868137257, "grad_norm": 2.103515625, "learning_rate": 6.465550617346075e-06, "loss": 0.2952, "step": 27800 }, { "epoch": 0.615089094455239, "grad_norm": 1.6174287796020508, "learning_rate": 6.462300211837648e-06, "loss": 0.4214, "step": 27805 }, { "epoch": 0.6151997020967522, "grad_norm": 1.1358734369277954, "learning_rate": 6.4590502335053686e-06, "loss": 0.2659, "step": 27810 }, { "epoch": 0.6153103097382655, "grad_norm": 1.2847250699996948, "learning_rate": 6.455800682741671e-06, "loss": 0.3018, "step": 27815 }, { "epoch": 0.6154209173797787, "grad_norm": 0.4214160740375519, "learning_rate": 6.45255155993893e-06, "loss": 0.17, "step": 27820 }, { "epoch": 0.6155315250212919, "grad_norm": 1.0153006315231323, "learning_rate": 6.449302865489484e-06, "loss": 0.2445, "step": 27825 }, { "epoch": 0.6156421326628052, "grad_norm": 1.1094465255737305, "learning_rate": 6.446054599785607e-06, "loss": 0.359, "step": 27830 }, { "epoch": 0.6157527403043185, "grad_norm": 1.2162011861801147, "learning_rate": 6.442806763219531e-06, "loss": 0.377, "step": 27835 }, { "epoch": 0.6158633479458318, "grad_norm": 0.9581196904182434, "learning_rate": 6.439559356183429e-06, "loss": 0.3389, "step": 27840 }, { "epoch": 0.615973955587345, "grad_norm": 0.9704151153564453, "learning_rate": 6.43631237906942e-06, "loss": 0.3518, "step": 27845 }, { "epoch": 0.6160845632288583, "grad_norm": 2.049131393432617, "learning_rate": 6.433065832269583e-06, "loss": 0.2439, "step": 27850 }, { "epoch": 0.6161951708703716, "grad_norm": 0.9127610325813293, "learning_rate": 6.429819716175934e-06, "loss": 0.3272, "step": 27855 }, { "epoch": 0.6163057785118848, "grad_norm": 1.2594836950302124, "learning_rate": 6.426574031180436e-06, "loss": 0.3452, "step": 27860 }, { "epoch": 0.616416386153398, "grad_norm": 0.9149600267410278, "learning_rate": 6.423328777675013e-06, "loss": 0.28, "step": 27865 }, { "epoch": 0.6165269937949113, "grad_norm": 1.375808596611023, "learning_rate": 6.420083956051523e-06, "loss": 0.2904, "step": 27870 }, { "epoch": 0.6166376014364245, "grad_norm": 1.5821117162704468, "learning_rate": 6.416839566701776e-06, "loss": 0.2325, "step": 27875 }, { "epoch": 0.6167482090779378, "grad_norm": 0.7934233546257019, "learning_rate": 6.413595610017537e-06, "loss": 0.4681, "step": 27880 }, { "epoch": 0.6168588167194511, "grad_norm": 1.1897228956222534, "learning_rate": 6.41035208639051e-06, "loss": 0.3143, "step": 27885 }, { "epoch": 0.6169694243609644, "grad_norm": 1.0329854488372803, "learning_rate": 6.407108996212344e-06, "loss": 0.2998, "step": 27890 }, { "epoch": 0.6170800320024776, "grad_norm": 1.3908621072769165, "learning_rate": 6.403866339874647e-06, "loss": 0.2819, "step": 27895 }, { "epoch": 0.6171906396439909, "grad_norm": 0.5785133838653564, "learning_rate": 6.40062411776897e-06, "loss": 0.3277, "step": 27900 }, { "epoch": 0.6173012472855042, "grad_norm": 2.1654059886932373, "learning_rate": 6.397382330286803e-06, "loss": 0.3415, "step": 27905 }, { "epoch": 0.6174118549270174, "grad_norm": 0.9129645228385925, "learning_rate": 6.394140977819597e-06, "loss": 0.5835, "step": 27910 }, { "epoch": 0.6175224625685306, "grad_norm": 1.0880323648452759, "learning_rate": 6.390900060758747e-06, "loss": 0.2822, "step": 27915 }, { "epoch": 0.6176330702100439, "grad_norm": 1.003812313079834, "learning_rate": 6.387659579495582e-06, "loss": 0.3451, "step": 27920 }, { "epoch": 0.6177436778515571, "grad_norm": 1.7792739868164062, "learning_rate": 6.3844195344214e-06, "loss": 0.4146, "step": 27925 }, { "epoch": 0.6178542854930704, "grad_norm": 0.55767422914505, "learning_rate": 6.381179925927427e-06, "loss": 0.3671, "step": 27930 }, { "epoch": 0.6179648931345837, "grad_norm": 1.6314196586608887, "learning_rate": 6.377940754404854e-06, "loss": 0.3751, "step": 27935 }, { "epoch": 0.618075500776097, "grad_norm": 1.302346110343933, "learning_rate": 6.374702020244803e-06, "loss": 0.2022, "step": 27940 }, { "epoch": 0.6181861084176102, "grad_norm": 1.1462701559066772, "learning_rate": 6.371463723838349e-06, "loss": 0.375, "step": 27945 }, { "epoch": 0.6182967160591235, "grad_norm": 0.9182533621788025, "learning_rate": 6.368225865576522e-06, "loss": 0.318, "step": 27950 }, { "epoch": 0.6184073237006368, "grad_norm": 1.270234227180481, "learning_rate": 6.3649884458502885e-06, "loss": 0.3749, "step": 27955 }, { "epoch": 0.6185179313421499, "grad_norm": 0.8056057095527649, "learning_rate": 6.361751465050563e-06, "loss": 0.2242, "step": 27960 }, { "epoch": 0.6186285389836632, "grad_norm": 0.7234849333763123, "learning_rate": 6.358514923568215e-06, "loss": 0.3396, "step": 27965 }, { "epoch": 0.6187391466251765, "grad_norm": 1.8109129667282104, "learning_rate": 6.3552788217940545e-06, "loss": 0.3994, "step": 27970 }, { "epoch": 0.6188497542666898, "grad_norm": 1.7282211780548096, "learning_rate": 6.352043160118835e-06, "loss": 0.4542, "step": 27975 }, { "epoch": 0.618960361908203, "grad_norm": 0.7620909214019775, "learning_rate": 6.348807938933271e-06, "loss": 0.2905, "step": 27980 }, { "epoch": 0.6190709695497163, "grad_norm": 1.3408596515655518, "learning_rate": 6.345573158628009e-06, "loss": 0.3261, "step": 27985 }, { "epoch": 0.6191815771912296, "grad_norm": 1.223048210144043, "learning_rate": 6.342338819593643e-06, "loss": 0.4892, "step": 27990 }, { "epoch": 0.6192921848327428, "grad_norm": 0.8970188498497009, "learning_rate": 6.339104922220726e-06, "loss": 0.4855, "step": 27995 }, { "epoch": 0.6194027924742561, "grad_norm": 1.161983609199524, "learning_rate": 6.335871466899749e-06, "loss": 0.3319, "step": 28000 }, { "epoch": 0.6195134001157694, "grad_norm": 0.8048517107963562, "learning_rate": 6.332638454021145e-06, "loss": 0.3797, "step": 28005 }, { "epoch": 0.6196240077572825, "grad_norm": 3.271691083908081, "learning_rate": 6.329405883975306e-06, "loss": 0.3448, "step": 28010 }, { "epoch": 0.6197346153987958, "grad_norm": 1.2393476963043213, "learning_rate": 6.3261737571525626e-06, "loss": 0.2734, "step": 28015 }, { "epoch": 0.6198452230403091, "grad_norm": 1.0585482120513916, "learning_rate": 6.322942073943188e-06, "loss": 0.3453, "step": 28020 }, { "epoch": 0.6199558306818224, "grad_norm": 2.3785793781280518, "learning_rate": 6.319710834737412e-06, "loss": 0.3996, "step": 28025 }, { "epoch": 0.6200664383233356, "grad_norm": 1.1815061569213867, "learning_rate": 6.316480039925407e-06, "loss": 0.3563, "step": 28030 }, { "epoch": 0.6201770459648489, "grad_norm": 1.7759780883789062, "learning_rate": 6.3132496898972824e-06, "loss": 0.3741, "step": 28035 }, { "epoch": 0.6202876536063622, "grad_norm": 0.9200423955917358, "learning_rate": 6.310019785043111e-06, "loss": 0.329, "step": 28040 }, { "epoch": 0.6203982612478754, "grad_norm": 0.9720513820648193, "learning_rate": 6.306790325752897e-06, "loss": 0.3705, "step": 28045 }, { "epoch": 0.6205088688893887, "grad_norm": 1.2153267860412598, "learning_rate": 6.303561312416604e-06, "loss": 0.3038, "step": 28050 }, { "epoch": 0.6206194765309019, "grad_norm": 2.2276928424835205, "learning_rate": 6.300332745424125e-06, "loss": 0.397, "step": 28055 }, { "epoch": 0.6207300841724152, "grad_norm": 0.9746618866920471, "learning_rate": 6.297104625165312e-06, "loss": 0.3866, "step": 28060 }, { "epoch": 0.6208406918139284, "grad_norm": 0.9316051602363586, "learning_rate": 6.293876952029962e-06, "loss": 0.258, "step": 28065 }, { "epoch": 0.6209512994554417, "grad_norm": 1.241310954093933, "learning_rate": 6.290649726407814e-06, "loss": 0.2376, "step": 28070 }, { "epoch": 0.621061907096955, "grad_norm": 1.1657761335372925, "learning_rate": 6.2874229486885505e-06, "loss": 0.4084, "step": 28075 }, { "epoch": 0.6211725147384682, "grad_norm": 1.1402828693389893, "learning_rate": 6.284196619261813e-06, "loss": 0.2609, "step": 28080 }, { "epoch": 0.6212831223799815, "grad_norm": 0.9436346292495728, "learning_rate": 6.280970738517171e-06, "loss": 0.2896, "step": 28085 }, { "epoch": 0.6213937300214948, "grad_norm": 1.3954851627349854, "learning_rate": 6.277745306844152e-06, "loss": 0.272, "step": 28090 }, { "epoch": 0.621504337663008, "grad_norm": 0.8900678157806396, "learning_rate": 6.274520324632227e-06, "loss": 0.3897, "step": 28095 }, { "epoch": 0.6216149453045213, "grad_norm": 0.7512552738189697, "learning_rate": 6.271295792270813e-06, "loss": 0.2134, "step": 28100 }, { "epoch": 0.6217255529460345, "grad_norm": 1.1362797021865845, "learning_rate": 6.268071710149263e-06, "loss": 0.3688, "step": 28105 }, { "epoch": 0.6218361605875478, "grad_norm": 0.8949592113494873, "learning_rate": 6.2648480786568925e-06, "loss": 0.3578, "step": 28110 }, { "epoch": 0.621946768229061, "grad_norm": 1.1525521278381348, "learning_rate": 6.261624898182952e-06, "loss": 0.4237, "step": 28115 }, { "epoch": 0.6220573758705743, "grad_norm": 1.6941813230514526, "learning_rate": 6.258402169116635e-06, "loss": 0.3072, "step": 28120 }, { "epoch": 0.6221679835120876, "grad_norm": 1.545035481452942, "learning_rate": 6.25517989184709e-06, "loss": 0.3547, "step": 28125 }, { "epoch": 0.6222785911536008, "grad_norm": 1.1055052280426025, "learning_rate": 6.2519580667634085e-06, "loss": 0.3066, "step": 28130 }, { "epoch": 0.6223891987951141, "grad_norm": 1.2872850894927979, "learning_rate": 6.248736694254616e-06, "loss": 0.3274, "step": 28135 }, { "epoch": 0.6224998064366274, "grad_norm": 1.3311693668365479, "learning_rate": 6.245515774709698e-06, "loss": 0.2626, "step": 28140 }, { "epoch": 0.6226104140781407, "grad_norm": 0.7430053949356079, "learning_rate": 6.242295308517578e-06, "loss": 0.4296, "step": 28145 }, { "epoch": 0.6227210217196538, "grad_norm": 1.8797926902770996, "learning_rate": 6.239075296067129e-06, "loss": 0.3196, "step": 28150 }, { "epoch": 0.6228316293611671, "grad_norm": 1.2462444305419922, "learning_rate": 6.235855737747165e-06, "loss": 0.3695, "step": 28155 }, { "epoch": 0.6229422370026804, "grad_norm": 1.572361946105957, "learning_rate": 6.232636633946444e-06, "loss": 0.2861, "step": 28160 }, { "epoch": 0.6230528446441936, "grad_norm": 1.189514398574829, "learning_rate": 6.229417985053676e-06, "loss": 0.2992, "step": 28165 }, { "epoch": 0.6231634522857069, "grad_norm": 1.9067507982254028, "learning_rate": 6.22619979145751e-06, "loss": 0.4743, "step": 28170 }, { "epoch": 0.6232740599272202, "grad_norm": 0.8976705074310303, "learning_rate": 6.222982053546538e-06, "loss": 0.3564, "step": 28175 }, { "epoch": 0.6233846675687335, "grad_norm": 1.302443265914917, "learning_rate": 6.21976477170931e-06, "loss": 0.2605, "step": 28180 }, { "epoch": 0.6234952752102467, "grad_norm": 1.2166194915771484, "learning_rate": 6.216547946334303e-06, "loss": 0.268, "step": 28185 }, { "epoch": 0.62360588285176, "grad_norm": 2.0787363052368164, "learning_rate": 6.21333157780995e-06, "loss": 0.4352, "step": 28190 }, { "epoch": 0.6237164904932732, "grad_norm": 0.8839397430419922, "learning_rate": 6.210115666524631e-06, "loss": 0.3838, "step": 28195 }, { "epoch": 0.6238270981347864, "grad_norm": 1.2580533027648926, "learning_rate": 6.206900212866662e-06, "loss": 0.312, "step": 28200 }, { "epoch": 0.6239377057762997, "grad_norm": 1.7217715978622437, "learning_rate": 6.203685217224307e-06, "loss": 0.3754, "step": 28205 }, { "epoch": 0.624048313417813, "grad_norm": 1.1696451902389526, "learning_rate": 6.200470679985777e-06, "loss": 0.3249, "step": 28210 }, { "epoch": 0.6241589210593262, "grad_norm": 1.0475701093673706, "learning_rate": 6.19725660153923e-06, "loss": 0.3127, "step": 28215 }, { "epoch": 0.6242695287008395, "grad_norm": 1.0918086767196655, "learning_rate": 6.194042982272758e-06, "loss": 0.417, "step": 28220 }, { "epoch": 0.6243801363423528, "grad_norm": 1.1993408203125, "learning_rate": 6.190829822574409e-06, "loss": 0.319, "step": 28225 }, { "epoch": 0.6244907439838661, "grad_norm": 0.9492326974868774, "learning_rate": 6.187617122832175e-06, "loss": 0.2642, "step": 28230 }, { "epoch": 0.6246013516253793, "grad_norm": 0.8824667930603027, "learning_rate": 6.184404883433976e-06, "loss": 0.3472, "step": 28235 }, { "epoch": 0.6247119592668926, "grad_norm": 0.757735013961792, "learning_rate": 6.181193104767702e-06, "loss": 0.1937, "step": 28240 }, { "epoch": 0.6248225669084058, "grad_norm": 1.0216038227081299, "learning_rate": 6.17798178722117e-06, "loss": 0.3119, "step": 28245 }, { "epoch": 0.624933174549919, "grad_norm": 1.205276608467102, "learning_rate": 6.174770931182141e-06, "loss": 0.3141, "step": 28250 }, { "epoch": 0.6250437821914323, "grad_norm": 1.0224257707595825, "learning_rate": 6.171560537038331e-06, "loss": 0.502, "step": 28255 }, { "epoch": 0.6251543898329456, "grad_norm": 0.8247915506362915, "learning_rate": 6.168350605177387e-06, "loss": 0.2889, "step": 28260 }, { "epoch": 0.6252649974744589, "grad_norm": 0.7062891721725464, "learning_rate": 6.165141135986918e-06, "loss": 0.2926, "step": 28265 }, { "epoch": 0.6253756051159721, "grad_norm": 1.0168933868408203, "learning_rate": 6.161932129854457e-06, "loss": 0.4312, "step": 28270 }, { "epoch": 0.6254862127574854, "grad_norm": 1.0758171081542969, "learning_rate": 6.158723587167494e-06, "loss": 0.3737, "step": 28275 }, { "epoch": 0.6255968203989987, "grad_norm": 1.0165802240371704, "learning_rate": 6.155515508313461e-06, "loss": 0.4467, "step": 28280 }, { "epoch": 0.6257074280405119, "grad_norm": 1.4210846424102783, "learning_rate": 6.15230789367973e-06, "loss": 0.2082, "step": 28285 }, { "epoch": 0.6258180356820251, "grad_norm": 1.1191768646240234, "learning_rate": 6.149100743653618e-06, "loss": 0.3379, "step": 28290 }, { "epoch": 0.6259286433235384, "grad_norm": 0.9455841183662415, "learning_rate": 6.145894058622395e-06, "loss": 0.3471, "step": 28295 }, { "epoch": 0.6260392509650516, "grad_norm": 1.6890617609024048, "learning_rate": 6.1426878389732584e-06, "loss": 0.2883, "step": 28300 }, { "epoch": 0.6261498586065649, "grad_norm": 1.2882193326950073, "learning_rate": 6.139482085093359e-06, "loss": 0.2881, "step": 28305 }, { "epoch": 0.6262604662480782, "grad_norm": 1.3205987215042114, "learning_rate": 6.136276797369798e-06, "loss": 0.3977, "step": 28310 }, { "epoch": 0.6263710738895915, "grad_norm": 1.5254912376403809, "learning_rate": 6.133071976189608e-06, "loss": 0.5109, "step": 28315 }, { "epoch": 0.6264816815311047, "grad_norm": 0.7848552465438843, "learning_rate": 6.129867621939767e-06, "loss": 0.3013, "step": 28320 }, { "epoch": 0.626592289172618, "grad_norm": 1.3865036964416504, "learning_rate": 6.126663735007204e-06, "loss": 0.2356, "step": 28325 }, { "epoch": 0.6267028968141313, "grad_norm": 1.1047009229660034, "learning_rate": 6.123460315778788e-06, "loss": 0.3335, "step": 28330 }, { "epoch": 0.6268135044556445, "grad_norm": 1.0557928085327148, "learning_rate": 6.120257364641326e-06, "loss": 0.3803, "step": 28335 }, { "epoch": 0.6269241120971577, "grad_norm": 2.6673450469970703, "learning_rate": 6.117054881981581e-06, "loss": 0.4366, "step": 28340 }, { "epoch": 0.627034719738671, "grad_norm": 1.2255208492279053, "learning_rate": 6.113852868186248e-06, "loss": 0.3952, "step": 28345 }, { "epoch": 0.6271453273801842, "grad_norm": 1.8719571828842163, "learning_rate": 6.110651323641963e-06, "loss": 0.3895, "step": 28350 }, { "epoch": 0.6272559350216975, "grad_norm": 1.3280891180038452, "learning_rate": 6.107450248735321e-06, "loss": 0.384, "step": 28355 }, { "epoch": 0.6273665426632108, "grad_norm": 0.8928027749061584, "learning_rate": 6.104249643852847e-06, "loss": 0.2034, "step": 28360 }, { "epoch": 0.6274771503047241, "grad_norm": 1.8690069913864136, "learning_rate": 6.1010495093810094e-06, "loss": 0.3956, "step": 28365 }, { "epoch": 0.6275877579462373, "grad_norm": 1.6460187435150146, "learning_rate": 6.0978498457062296e-06, "loss": 0.3837, "step": 28370 }, { "epoch": 0.6276983655877506, "grad_norm": 1.2982134819030762, "learning_rate": 6.094650653214862e-06, "loss": 0.3702, "step": 28375 }, { "epoch": 0.6278089732292639, "grad_norm": 1.767822027206421, "learning_rate": 6.091451932293212e-06, "loss": 0.4439, "step": 28380 }, { "epoch": 0.627919580870777, "grad_norm": 1.1925171613693237, "learning_rate": 6.088253683327521e-06, "loss": 0.3282, "step": 28385 }, { "epoch": 0.6280301885122903, "grad_norm": 1.5146256685256958, "learning_rate": 6.085055906703974e-06, "loss": 0.3593, "step": 28390 }, { "epoch": 0.6281407961538036, "grad_norm": 1.3153235912322998, "learning_rate": 6.08185860280871e-06, "loss": 0.3205, "step": 28395 }, { "epoch": 0.6282514037953169, "grad_norm": 1.4442124366760254, "learning_rate": 6.078661772027794e-06, "loss": 0.314, "step": 28400 }, { "epoch": 0.6283620114368301, "grad_norm": 0.7053605914115906, "learning_rate": 6.075465414747244e-06, "loss": 0.2884, "step": 28405 }, { "epoch": 0.6284726190783434, "grad_norm": 0.6783138513565063, "learning_rate": 6.072269531353025e-06, "loss": 0.3395, "step": 28410 }, { "epoch": 0.6285832267198567, "grad_norm": 1.1752461194992065, "learning_rate": 6.069074122231033e-06, "loss": 0.2347, "step": 28415 }, { "epoch": 0.6286938343613699, "grad_norm": 3.6769163608551025, "learning_rate": 6.065879187767112e-06, "loss": 0.3857, "step": 28420 }, { "epoch": 0.6288044420028832, "grad_norm": 1.208177089691162, "learning_rate": 6.062684728347052e-06, "loss": 0.3684, "step": 28425 }, { "epoch": 0.6289150496443965, "grad_norm": 0.5423233509063721, "learning_rate": 6.059490744356586e-06, "loss": 0.3327, "step": 28430 }, { "epoch": 0.6290256572859096, "grad_norm": 1.8746390342712402, "learning_rate": 6.056297236181379e-06, "loss": 0.4382, "step": 28435 }, { "epoch": 0.6291362649274229, "grad_norm": 1.210109829902649, "learning_rate": 6.0531042042070505e-06, "loss": 0.3483, "step": 28440 }, { "epoch": 0.6292468725689362, "grad_norm": 1.248171329498291, "learning_rate": 6.0499116488191615e-06, "loss": 0.4011, "step": 28445 }, { "epoch": 0.6293574802104495, "grad_norm": 0.7947481870651245, "learning_rate": 6.0467195704032035e-06, "loss": 0.2338, "step": 28450 }, { "epoch": 0.6294680878519627, "grad_norm": 5.855911731719971, "learning_rate": 6.043527969344627e-06, "loss": 0.4573, "step": 28455 }, { "epoch": 0.629578695493476, "grad_norm": 0.5150712132453918, "learning_rate": 6.040336846028814e-06, "loss": 0.2243, "step": 28460 }, { "epoch": 0.6296893031349893, "grad_norm": 0.8628373742103577, "learning_rate": 6.0371462008410885e-06, "loss": 0.2731, "step": 28465 }, { "epoch": 0.6297999107765025, "grad_norm": 1.2889022827148438, "learning_rate": 6.033956034166725e-06, "loss": 0.3764, "step": 28470 }, { "epoch": 0.6299105184180158, "grad_norm": 1.1155511140823364, "learning_rate": 6.030766346390931e-06, "loss": 0.2539, "step": 28475 }, { "epoch": 0.630021126059529, "grad_norm": 2.024477005004883, "learning_rate": 6.027577137898865e-06, "loss": 0.3681, "step": 28480 }, { "epoch": 0.6301317337010423, "grad_norm": 1.0484563112258911, "learning_rate": 6.024388409075619e-06, "loss": 0.3999, "step": 28485 }, { "epoch": 0.6302423413425555, "grad_norm": 1.6239811182022095, "learning_rate": 6.0212001603062285e-06, "loss": 0.2575, "step": 28490 }, { "epoch": 0.6303529489840688, "grad_norm": 1.914304256439209, "learning_rate": 6.018012391975682e-06, "loss": 0.409, "step": 28495 }, { "epoch": 0.6304635566255821, "grad_norm": 1.1726186275482178, "learning_rate": 6.014825104468895e-06, "loss": 0.2969, "step": 28500 }, { "epoch": 0.6305741642670953, "grad_norm": 1.5023373365402222, "learning_rate": 6.011638298170732e-06, "loss": 0.2904, "step": 28505 }, { "epoch": 0.6306847719086086, "grad_norm": 1.06159508228302, "learning_rate": 6.008451973466003e-06, "loss": 0.4195, "step": 28510 }, { "epoch": 0.6307953795501219, "grad_norm": 1.773521065711975, "learning_rate": 6.005266130739451e-06, "loss": 0.3702, "step": 28515 }, { "epoch": 0.6309059871916352, "grad_norm": 1.5760608911514282, "learning_rate": 6.002080770375767e-06, "loss": 0.3878, "step": 28520 }, { "epoch": 0.6310165948331484, "grad_norm": 2.4009459018707275, "learning_rate": 5.998895892759584e-06, "loss": 0.4427, "step": 28525 }, { "epoch": 0.6311272024746616, "grad_norm": 0.833803117275238, "learning_rate": 5.995711498275474e-06, "loss": 0.3688, "step": 28530 }, { "epoch": 0.6312378101161749, "grad_norm": 1.56623375415802, "learning_rate": 5.992527587307948e-06, "loss": 0.2553, "step": 28535 }, { "epoch": 0.6313484177576881, "grad_norm": 1.5167784690856934, "learning_rate": 5.989344160241467e-06, "loss": 0.4563, "step": 28540 }, { "epoch": 0.6314590253992014, "grad_norm": 1.4456473588943481, "learning_rate": 5.986161217460429e-06, "loss": 0.3161, "step": 28545 }, { "epoch": 0.6315696330407147, "grad_norm": 1.973887324333191, "learning_rate": 5.982978759349168e-06, "loss": 0.4395, "step": 28550 }, { "epoch": 0.631680240682228, "grad_norm": 0.9464303255081177, "learning_rate": 5.9797967862919705e-06, "loss": 0.3239, "step": 28555 }, { "epoch": 0.6317908483237412, "grad_norm": 1.3702889680862427, "learning_rate": 5.976615298673058e-06, "loss": 0.2987, "step": 28560 }, { "epoch": 0.6319014559652545, "grad_norm": 1.4379154443740845, "learning_rate": 5.97343429687659e-06, "loss": 0.3261, "step": 28565 }, { "epoch": 0.6320120636067678, "grad_norm": 1.2674262523651123, "learning_rate": 5.970253781286674e-06, "loss": 0.4111, "step": 28570 }, { "epoch": 0.6321226712482809, "grad_norm": 1.1083366870880127, "learning_rate": 5.9670737522873615e-06, "loss": 0.2371, "step": 28575 }, { "epoch": 0.6322332788897942, "grad_norm": 2.3706676959991455, "learning_rate": 5.963894210262628e-06, "loss": 0.3683, "step": 28580 }, { "epoch": 0.6323438865313075, "grad_norm": 2.139099359512329, "learning_rate": 5.960715155596414e-06, "loss": 0.2054, "step": 28585 }, { "epoch": 0.6324544941728207, "grad_norm": 1.481443166732788, "learning_rate": 5.957536588672581e-06, "loss": 0.2739, "step": 28590 }, { "epoch": 0.632565101814334, "grad_norm": 1.587889552116394, "learning_rate": 5.954358509874948e-06, "loss": 0.4087, "step": 28595 }, { "epoch": 0.6326757094558473, "grad_norm": 1.0887906551361084, "learning_rate": 5.9511809195872604e-06, "loss": 0.3658, "step": 28600 }, { "epoch": 0.6327863170973606, "grad_norm": 1.474699854850769, "learning_rate": 5.948003818193212e-06, "loss": 0.3692, "step": 28605 }, { "epoch": 0.6328969247388738, "grad_norm": 1.0854578018188477, "learning_rate": 5.944827206076442e-06, "loss": 0.4278, "step": 28610 }, { "epoch": 0.6330075323803871, "grad_norm": 1.751124620437622, "learning_rate": 5.9416510836205185e-06, "loss": 0.2835, "step": 28615 }, { "epoch": 0.6331181400219004, "grad_norm": 1.170345425605774, "learning_rate": 5.9384754512089605e-06, "loss": 0.3348, "step": 28620 }, { "epoch": 0.6332287476634135, "grad_norm": 1.2350807189941406, "learning_rate": 5.935300309225227e-06, "loss": 0.3104, "step": 28625 }, { "epoch": 0.6333393553049268, "grad_norm": 1.3197300434112549, "learning_rate": 5.9321256580527134e-06, "loss": 0.3402, "step": 28630 }, { "epoch": 0.6334499629464401, "grad_norm": 1.0203405618667603, "learning_rate": 5.928951498074753e-06, "loss": 0.3597, "step": 28635 }, { "epoch": 0.6335605705879533, "grad_norm": 1.4745084047317505, "learning_rate": 5.92577782967463e-06, "loss": 0.3287, "step": 28640 }, { "epoch": 0.6336711782294666, "grad_norm": 1.5556062459945679, "learning_rate": 5.922604653235566e-06, "loss": 0.1966, "step": 28645 }, { "epoch": 0.6337817858709799, "grad_norm": 0.9561015367507935, "learning_rate": 5.919431969140714e-06, "loss": 0.3475, "step": 28650 }, { "epoch": 0.6338923935124932, "grad_norm": 1.7827380895614624, "learning_rate": 5.91625977777318e-06, "loss": 0.3558, "step": 28655 }, { "epoch": 0.6340030011540064, "grad_norm": 1.877925157546997, "learning_rate": 5.913088079516006e-06, "loss": 0.2985, "step": 28660 }, { "epoch": 0.6341136087955197, "grad_norm": 0.9774803519248962, "learning_rate": 5.909916874752166e-06, "loss": 0.3381, "step": 28665 }, { "epoch": 0.6342242164370329, "grad_norm": 1.3032617568969727, "learning_rate": 5.9067461638645915e-06, "loss": 0.2009, "step": 28670 }, { "epoch": 0.6343348240785461, "grad_norm": 1.1569678783416748, "learning_rate": 5.903575947236141e-06, "loss": 0.2916, "step": 28675 }, { "epoch": 0.6344454317200594, "grad_norm": 0.9714440107345581, "learning_rate": 5.900406225249613e-06, "loss": 0.4467, "step": 28680 }, { "epoch": 0.6345560393615727, "grad_norm": 1.1732369661331177, "learning_rate": 5.897236998287756e-06, "loss": 0.2059, "step": 28685 }, { "epoch": 0.634666647003086, "grad_norm": 1.0952179431915283, "learning_rate": 5.894068266733249e-06, "loss": 0.3971, "step": 28690 }, { "epoch": 0.6347772546445992, "grad_norm": 3.9437289237976074, "learning_rate": 5.890900030968723e-06, "loss": 0.3588, "step": 28695 }, { "epoch": 0.6348878622861125, "grad_norm": 1.4249457120895386, "learning_rate": 5.887732291376732e-06, "loss": 0.3904, "step": 28700 }, { "epoch": 0.6349984699276258, "grad_norm": 0.9611791372299194, "learning_rate": 5.884565048339783e-06, "loss": 0.2194, "step": 28705 }, { "epoch": 0.635109077569139, "grad_norm": 1.4527148008346558, "learning_rate": 5.8813983022403245e-06, "loss": 0.302, "step": 28710 }, { "epoch": 0.6352196852106523, "grad_norm": 3.4417407512664795, "learning_rate": 5.878232053460733e-06, "loss": 0.5003, "step": 28715 }, { "epoch": 0.6353302928521655, "grad_norm": 1.6152408123016357, "learning_rate": 5.875066302383334e-06, "loss": 0.3123, "step": 28720 }, { "epoch": 0.6354409004936787, "grad_norm": 0.8226509690284729, "learning_rate": 5.871901049390395e-06, "loss": 0.3904, "step": 28725 }, { "epoch": 0.635551508135192, "grad_norm": 1.5770936012268066, "learning_rate": 5.868736294864115e-06, "loss": 0.3743, "step": 28730 }, { "epoch": 0.6356621157767053, "grad_norm": 1.5331330299377441, "learning_rate": 5.865572039186635e-06, "loss": 0.3447, "step": 28735 }, { "epoch": 0.6357727234182186, "grad_norm": 1.1211342811584473, "learning_rate": 5.862408282740046e-06, "loss": 0.225, "step": 28740 }, { "epoch": 0.6358833310597318, "grad_norm": 1.0389615297317505, "learning_rate": 5.8592450259063675e-06, "loss": 0.4224, "step": 28745 }, { "epoch": 0.6359939387012451, "grad_norm": 2.020404815673828, "learning_rate": 5.856082269067553e-06, "loss": 0.365, "step": 28750 }, { "epoch": 0.6361045463427584, "grad_norm": 1.5155657529830933, "learning_rate": 5.852920012605516e-06, "loss": 0.344, "step": 28755 }, { "epoch": 0.6362151539842716, "grad_norm": 1.0663750171661377, "learning_rate": 5.849758256902095e-06, "loss": 0.3889, "step": 28760 }, { "epoch": 0.6363257616257848, "grad_norm": 0.8930937647819519, "learning_rate": 5.846597002339065e-06, "loss": 0.4821, "step": 28765 }, { "epoch": 0.6364363692672981, "grad_norm": 0.962992250919342, "learning_rate": 5.843436249298153e-06, "loss": 0.4568, "step": 28770 }, { "epoch": 0.6365469769088113, "grad_norm": 1.7167463302612305, "learning_rate": 5.84027599816102e-06, "loss": 0.4364, "step": 28775 }, { "epoch": 0.6366575845503246, "grad_norm": 1.4303070306777954, "learning_rate": 5.837116249309257e-06, "loss": 0.3885, "step": 28780 }, { "epoch": 0.6367681921918379, "grad_norm": 1.08237624168396, "learning_rate": 5.833957003124411e-06, "loss": 0.3825, "step": 28785 }, { "epoch": 0.6368787998333512, "grad_norm": 1.0221049785614014, "learning_rate": 5.830798259987955e-06, "loss": 0.3886, "step": 28790 }, { "epoch": 0.6369894074748644, "grad_norm": 1.284982442855835, "learning_rate": 5.827640020281311e-06, "loss": 0.2678, "step": 28795 }, { "epoch": 0.6371000151163777, "grad_norm": 1.403165340423584, "learning_rate": 5.824482284385831e-06, "loss": 0.2957, "step": 28800 }, { "epoch": 0.637210622757891, "grad_norm": 1.2183181047439575, "learning_rate": 5.821325052682811e-06, "loss": 0.3884, "step": 28805 }, { "epoch": 0.6373212303994042, "grad_norm": 1.8941986560821533, "learning_rate": 5.81816832555349e-06, "loss": 0.235, "step": 28810 }, { "epoch": 0.6374318380409174, "grad_norm": 1.6097445487976074, "learning_rate": 5.815012103379038e-06, "loss": 0.4619, "step": 28815 }, { "epoch": 0.6375424456824307, "grad_norm": 1.1512391567230225, "learning_rate": 5.811856386540567e-06, "loss": 0.314, "step": 28820 }, { "epoch": 0.637653053323944, "grad_norm": 1.3966615200042725, "learning_rate": 5.808701175419131e-06, "loss": 0.3249, "step": 28825 }, { "epoch": 0.6377636609654572, "grad_norm": 1.3189289569854736, "learning_rate": 5.805546470395722e-06, "loss": 0.352, "step": 28830 }, { "epoch": 0.6378742686069705, "grad_norm": 1.489024043083191, "learning_rate": 5.802392271851267e-06, "loss": 0.3642, "step": 28835 }, { "epoch": 0.6379848762484838, "grad_norm": 1.0066838264465332, "learning_rate": 5.799238580166635e-06, "loss": 0.5062, "step": 28840 }, { "epoch": 0.638095483889997, "grad_norm": 0.7655932307243347, "learning_rate": 5.796085395722635e-06, "loss": 0.3204, "step": 28845 }, { "epoch": 0.6382060915315103, "grad_norm": 1.6325989961624146, "learning_rate": 5.792932718900011e-06, "loss": 0.4414, "step": 28850 }, { "epoch": 0.6383166991730236, "grad_norm": 1.9631292819976807, "learning_rate": 5.789780550079448e-06, "loss": 0.4946, "step": 28855 }, { "epoch": 0.6384273068145367, "grad_norm": 1.1591835021972656, "learning_rate": 5.78662888964157e-06, "loss": 0.3335, "step": 28860 }, { "epoch": 0.63853791445605, "grad_norm": 0.8680834770202637, "learning_rate": 5.7834777379669396e-06, "loss": 0.3206, "step": 28865 }, { "epoch": 0.6386485220975633, "grad_norm": 1.4047582149505615, "learning_rate": 5.780327095436056e-06, "loss": 0.383, "step": 28870 }, { "epoch": 0.6387591297390766, "grad_norm": 1.2948075532913208, "learning_rate": 5.777176962429358e-06, "loss": 0.3662, "step": 28875 }, { "epoch": 0.6388697373805898, "grad_norm": 1.205002784729004, "learning_rate": 5.774027339327226e-06, "loss": 0.2996, "step": 28880 }, { "epoch": 0.6389803450221031, "grad_norm": 1.134634256362915, "learning_rate": 5.770878226509974e-06, "loss": 0.3358, "step": 28885 }, { "epoch": 0.6390909526636164, "grad_norm": 1.57118821144104, "learning_rate": 5.767729624357856e-06, "loss": 0.3682, "step": 28890 }, { "epoch": 0.6392015603051296, "grad_norm": 1.4064245223999023, "learning_rate": 5.764581533251066e-06, "loss": 0.418, "step": 28895 }, { "epoch": 0.6393121679466429, "grad_norm": 1.0214416980743408, "learning_rate": 5.761433953569732e-06, "loss": 0.3068, "step": 28900 }, { "epoch": 0.6394227755881562, "grad_norm": 1.4773229360580444, "learning_rate": 5.7582868856939285e-06, "loss": 0.373, "step": 28905 }, { "epoch": 0.6395333832296693, "grad_norm": 1.294674038887024, "learning_rate": 5.755140330003659e-06, "loss": 0.264, "step": 28910 }, { "epoch": 0.6396439908711826, "grad_norm": 1.2363303899765015, "learning_rate": 5.75199428687887e-06, "loss": 0.2774, "step": 28915 }, { "epoch": 0.6397545985126959, "grad_norm": 1.413519024848938, "learning_rate": 5.748848756699445e-06, "loss": 0.4598, "step": 28920 }, { "epoch": 0.6398652061542092, "grad_norm": 1.3335540294647217, "learning_rate": 5.745703739845208e-06, "loss": 0.2036, "step": 28925 }, { "epoch": 0.6399758137957224, "grad_norm": 0.7192028164863586, "learning_rate": 5.7425592366959145e-06, "loss": 0.3106, "step": 28930 }, { "epoch": 0.6400864214372357, "grad_norm": 1.437761902809143, "learning_rate": 5.739415247631266e-06, "loss": 0.3643, "step": 28935 }, { "epoch": 0.640197029078749, "grad_norm": 1.634255290031433, "learning_rate": 5.736271773030897e-06, "loss": 0.3299, "step": 28940 }, { "epoch": 0.6403076367202623, "grad_norm": 1.2833324670791626, "learning_rate": 5.73312881327438e-06, "loss": 0.3562, "step": 28945 }, { "epoch": 0.6404182443617755, "grad_norm": 0.5546873807907104, "learning_rate": 5.729986368741229e-06, "loss": 0.3222, "step": 28950 }, { "epoch": 0.6405288520032887, "grad_norm": 1.0462825298309326, "learning_rate": 5.72684443981089e-06, "loss": 0.4224, "step": 28955 }, { "epoch": 0.640639459644802, "grad_norm": 1.0174996852874756, "learning_rate": 5.723703026862753e-06, "loss": 0.4809, "step": 28960 }, { "epoch": 0.6407500672863152, "grad_norm": 0.841820240020752, "learning_rate": 5.720562130276141e-06, "loss": 0.3895, "step": 28965 }, { "epoch": 0.6408606749278285, "grad_norm": 1.2144287824630737, "learning_rate": 5.7174217504303165e-06, "loss": 0.4273, "step": 28970 }, { "epoch": 0.6409712825693418, "grad_norm": 0.8178695440292358, "learning_rate": 5.714281887704478e-06, "loss": 0.3272, "step": 28975 }, { "epoch": 0.641081890210855, "grad_norm": 1.9163649082183838, "learning_rate": 5.711142542477765e-06, "loss": 0.491, "step": 28980 }, { "epoch": 0.6411924978523683, "grad_norm": 1.1155405044555664, "learning_rate": 5.708003715129253e-06, "loss": 0.4055, "step": 28985 }, { "epoch": 0.6413031054938816, "grad_norm": 2.205116033554077, "learning_rate": 5.704865406037952e-06, "loss": 0.4528, "step": 28990 }, { "epoch": 0.6414137131353949, "grad_norm": 1.153825044631958, "learning_rate": 5.701727615582813e-06, "loss": 0.3437, "step": 28995 }, { "epoch": 0.641524320776908, "grad_norm": 1.0041849613189697, "learning_rate": 5.698590344142724e-06, "loss": 0.2658, "step": 29000 }, { "epoch": 0.6416349284184213, "grad_norm": 1.2168627977371216, "learning_rate": 5.6954535920965085e-06, "loss": 0.3879, "step": 29005 }, { "epoch": 0.6417455360599346, "grad_norm": 1.5137568712234497, "learning_rate": 5.692317359822928e-06, "loss": 0.3194, "step": 29010 }, { "epoch": 0.6418561437014478, "grad_norm": 1.4900476932525635, "learning_rate": 5.689181647700683e-06, "loss": 0.4238, "step": 29015 }, { "epoch": 0.6419667513429611, "grad_norm": 1.2010070085525513, "learning_rate": 5.6860464561084094e-06, "loss": 0.3116, "step": 29020 }, { "epoch": 0.6420773589844744, "grad_norm": 1.0109957456588745, "learning_rate": 5.682911785424679e-06, "loss": 0.2782, "step": 29025 }, { "epoch": 0.6421879666259876, "grad_norm": 0.733288586139679, "learning_rate": 5.679777636028003e-06, "loss": 0.4233, "step": 29030 }, { "epoch": 0.6422985742675009, "grad_norm": 1.125083088874817, "learning_rate": 5.676644008296831e-06, "loss": 0.3367, "step": 29035 }, { "epoch": 0.6424091819090142, "grad_norm": 1.544204831123352, "learning_rate": 5.673510902609546e-06, "loss": 0.3666, "step": 29040 }, { "epoch": 0.6425197895505275, "grad_norm": 1.19594144821167, "learning_rate": 5.670378319344469e-06, "loss": 0.2964, "step": 29045 }, { "epoch": 0.6426303971920406, "grad_norm": 0.9328681230545044, "learning_rate": 5.667246258879859e-06, "loss": 0.3378, "step": 29050 }, { "epoch": 0.6427410048335539, "grad_norm": 0.7645723819732666, "learning_rate": 5.664114721593911e-06, "loss": 0.2753, "step": 29055 }, { "epoch": 0.6428516124750672, "grad_norm": 1.182950496673584, "learning_rate": 5.660983707864758e-06, "loss": 0.2996, "step": 29060 }, { "epoch": 0.6429622201165804, "grad_norm": 1.1828058958053589, "learning_rate": 5.657853218070468e-06, "loss": 0.2886, "step": 29065 }, { "epoch": 0.6430728277580937, "grad_norm": 0.9588051438331604, "learning_rate": 5.654723252589047e-06, "loss": 0.285, "step": 29070 }, { "epoch": 0.643183435399607, "grad_norm": 1.6222834587097168, "learning_rate": 5.651593811798437e-06, "loss": 0.217, "step": 29075 }, { "epoch": 0.6432940430411203, "grad_norm": 0.8821370005607605, "learning_rate": 5.648464896076518e-06, "loss": 0.3129, "step": 29080 }, { "epoch": 0.6434046506826335, "grad_norm": 1.3959784507751465, "learning_rate": 5.645336505801106e-06, "loss": 0.2698, "step": 29085 }, { "epoch": 0.6435152583241468, "grad_norm": 0.7670608758926392, "learning_rate": 5.642208641349951e-06, "loss": 0.3612, "step": 29090 }, { "epoch": 0.64362586596566, "grad_norm": 1.4476966857910156, "learning_rate": 5.639081303100743e-06, "loss": 0.4373, "step": 29095 }, { "epoch": 0.6437364736071732, "grad_norm": 1.3855845928192139, "learning_rate": 5.635954491431107e-06, "loss": 0.3235, "step": 29100 }, { "epoch": 0.6438470812486865, "grad_norm": 0.9490916132926941, "learning_rate": 5.632828206718605e-06, "loss": 0.3913, "step": 29105 }, { "epoch": 0.6439576888901998, "grad_norm": 1.6368144750595093, "learning_rate": 5.629702449340734e-06, "loss": 0.5413, "step": 29110 }, { "epoch": 0.644068296531713, "grad_norm": 0.6081562638282776, "learning_rate": 5.626577219674928e-06, "loss": 0.3964, "step": 29115 }, { "epoch": 0.6441789041732263, "grad_norm": 1.6055632829666138, "learning_rate": 5.623452518098559e-06, "loss": 0.3567, "step": 29120 }, { "epoch": 0.6442895118147396, "grad_norm": 1.3973714113235474, "learning_rate": 5.620328344988933e-06, "loss": 0.288, "step": 29125 }, { "epoch": 0.6444001194562529, "grad_norm": 1.2255111932754517, "learning_rate": 5.617204700723291e-06, "loss": 0.3352, "step": 29130 }, { "epoch": 0.6445107270977661, "grad_norm": 1.8233323097229004, "learning_rate": 5.614081585678815e-06, "loss": 0.3246, "step": 29135 }, { "epoch": 0.6446213347392794, "grad_norm": 0.9977742433547974, "learning_rate": 5.6109590002326195e-06, "loss": 0.203, "step": 29140 }, { "epoch": 0.6447319423807926, "grad_norm": 0.9433032274246216, "learning_rate": 5.6078369447617555e-06, "loss": 0.324, "step": 29145 }, { "epoch": 0.6448425500223058, "grad_norm": 1.2236155271530151, "learning_rate": 5.60471541964321e-06, "loss": 0.233, "step": 29150 }, { "epoch": 0.6449531576638191, "grad_norm": 1.0615326166152954, "learning_rate": 5.601594425253904e-06, "loss": 0.3702, "step": 29155 }, { "epoch": 0.6450637653053324, "grad_norm": 1.374861478805542, "learning_rate": 5.5984739619707e-06, "loss": 0.4007, "step": 29160 }, { "epoch": 0.6451743729468457, "grad_norm": 1.7825018167495728, "learning_rate": 5.595354030170393e-06, "loss": 0.3214, "step": 29165 }, { "epoch": 0.6452849805883589, "grad_norm": 1.0012258291244507, "learning_rate": 5.592234630229711e-06, "loss": 0.4745, "step": 29170 }, { "epoch": 0.6453955882298722, "grad_norm": 1.4464027881622314, "learning_rate": 5.589115762525324e-06, "loss": 0.2587, "step": 29175 }, { "epoch": 0.6455061958713855, "grad_norm": 0.6025654673576355, "learning_rate": 5.5859974274338304e-06, "loss": 0.3684, "step": 29180 }, { "epoch": 0.6456168035128987, "grad_norm": 1.7722424268722534, "learning_rate": 5.58287962533177e-06, "loss": 0.3473, "step": 29185 }, { "epoch": 0.6457274111544119, "grad_norm": 1.0982699394226074, "learning_rate": 5.579762356595617e-06, "loss": 0.2553, "step": 29190 }, { "epoch": 0.6458380187959252, "grad_norm": 1.6334784030914307, "learning_rate": 5.576645621601782e-06, "loss": 0.3792, "step": 29195 }, { "epoch": 0.6459486264374384, "grad_norm": 1.1069464683532715, "learning_rate": 5.573529420726606e-06, "loss": 0.4016, "step": 29200 }, { "epoch": 0.6460592340789517, "grad_norm": 0.8705712556838989, "learning_rate": 5.570413754346371e-06, "loss": 0.2929, "step": 29205 }, { "epoch": 0.646169841720465, "grad_norm": 1.326331377029419, "learning_rate": 5.567298622837294e-06, "loss": 0.3549, "step": 29210 }, { "epoch": 0.6462804493619783, "grad_norm": 0.8056589961051941, "learning_rate": 5.564184026575525e-06, "loss": 0.3936, "step": 29215 }, { "epoch": 0.6463910570034915, "grad_norm": 2.251222848892212, "learning_rate": 5.5610699659371515e-06, "loss": 0.4397, "step": 29220 }, { "epoch": 0.6465016646450048, "grad_norm": 1.0769182443618774, "learning_rate": 5.5579564412981935e-06, "loss": 0.3252, "step": 29225 }, { "epoch": 0.6466122722865181, "grad_norm": 1.057346224784851, "learning_rate": 5.5548434530346095e-06, "loss": 0.3193, "step": 29230 }, { "epoch": 0.6467228799280313, "grad_norm": 1.6431456804275513, "learning_rate": 5.551731001522291e-06, "loss": 0.3966, "step": 29235 }, { "epoch": 0.6468334875695445, "grad_norm": 0.9622028470039368, "learning_rate": 5.5486190871370686e-06, "loss": 0.2852, "step": 29240 }, { "epoch": 0.6469440952110578, "grad_norm": 1.100150227546692, "learning_rate": 5.545507710254699e-06, "loss": 0.4118, "step": 29245 }, { "epoch": 0.647054702852571, "grad_norm": 1.0712151527404785, "learning_rate": 5.542396871250886e-06, "loss": 0.2294, "step": 29250 }, { "epoch": 0.6471653104940843, "grad_norm": 1.0274988412857056, "learning_rate": 5.539286570501259e-06, "loss": 0.3492, "step": 29255 }, { "epoch": 0.6472759181355976, "grad_norm": 1.7349120378494263, "learning_rate": 5.536176808381386e-06, "loss": 0.3229, "step": 29260 }, { "epoch": 0.6473865257771109, "grad_norm": 0.9835044145584106, "learning_rate": 5.533067585266771e-06, "loss": 0.4199, "step": 29265 }, { "epoch": 0.6474971334186241, "grad_norm": 1.1051808595657349, "learning_rate": 5.529958901532853e-06, "loss": 0.3786, "step": 29270 }, { "epoch": 0.6476077410601374, "grad_norm": 1.5249155759811401, "learning_rate": 5.526850757555e-06, "loss": 0.5168, "step": 29275 }, { "epoch": 0.6477183487016507, "grad_norm": 0.8288964033126831, "learning_rate": 5.523743153708525e-06, "loss": 0.1977, "step": 29280 }, { "epoch": 0.6478289563431638, "grad_norm": 1.278314232826233, "learning_rate": 5.520636090368665e-06, "loss": 0.3219, "step": 29285 }, { "epoch": 0.6479395639846771, "grad_norm": 1.24668288230896, "learning_rate": 5.517529567910602e-06, "loss": 0.2931, "step": 29290 }, { "epoch": 0.6480501716261904, "grad_norm": 1.6321223974227905, "learning_rate": 5.514423586709443e-06, "loss": 0.4548, "step": 29295 }, { "epoch": 0.6481607792677037, "grad_norm": 1.2193797826766968, "learning_rate": 5.511318147140238e-06, "loss": 0.3645, "step": 29300 }, { "epoch": 0.6482713869092169, "grad_norm": 1.631563425064087, "learning_rate": 5.508213249577965e-06, "loss": 0.4412, "step": 29305 }, { "epoch": 0.6483819945507302, "grad_norm": 1.2210277318954468, "learning_rate": 5.505108894397541e-06, "loss": 0.341, "step": 29310 }, { "epoch": 0.6484926021922435, "grad_norm": 1.297836184501648, "learning_rate": 5.502005081973816e-06, "loss": 0.3967, "step": 29315 }, { "epoch": 0.6486032098337567, "grad_norm": 1.055092215538025, "learning_rate": 5.498901812681572e-06, "loss": 0.3828, "step": 29320 }, { "epoch": 0.64871381747527, "grad_norm": 1.3182817697525024, "learning_rate": 5.49579908689553e-06, "loss": 0.317, "step": 29325 }, { "epoch": 0.6488244251167833, "grad_norm": 1.4266647100448608, "learning_rate": 5.492696904990345e-06, "loss": 0.3876, "step": 29330 }, { "epoch": 0.6489350327582964, "grad_norm": 1.1389524936676025, "learning_rate": 5.4895952673406e-06, "loss": 0.297, "step": 29335 }, { "epoch": 0.6490456403998097, "grad_norm": 0.5975916981697083, "learning_rate": 5.4864941743208215e-06, "loss": 0.4687, "step": 29340 }, { "epoch": 0.649156248041323, "grad_norm": 1.4433802366256714, "learning_rate": 5.483393626305461e-06, "loss": 0.3672, "step": 29345 }, { "epoch": 0.6492668556828363, "grad_norm": 1.1401807069778442, "learning_rate": 5.480293623668912e-06, "loss": 0.3435, "step": 29350 }, { "epoch": 0.6493774633243495, "grad_norm": 1.3601961135864258, "learning_rate": 5.477194166785498e-06, "loss": 0.2848, "step": 29355 }, { "epoch": 0.6494880709658628, "grad_norm": 1.691678524017334, "learning_rate": 5.4740952560294754e-06, "loss": 0.5913, "step": 29360 }, { "epoch": 0.6495986786073761, "grad_norm": 0.8834462761878967, "learning_rate": 5.47099689177504e-06, "loss": 0.2942, "step": 29365 }, { "epoch": 0.6497092862488894, "grad_norm": 0.6202958822250366, "learning_rate": 5.467899074396317e-06, "loss": 0.307, "step": 29370 }, { "epoch": 0.6498198938904026, "grad_norm": 2.0087337493896484, "learning_rate": 5.464801804267366e-06, "loss": 0.3129, "step": 29375 }, { "epoch": 0.6499305015319158, "grad_norm": 1.6201330423355103, "learning_rate": 5.46170508176218e-06, "loss": 0.2921, "step": 29380 }, { "epoch": 0.650041109173429, "grad_norm": 1.158905029296875, "learning_rate": 5.458608907254693e-06, "loss": 0.3674, "step": 29385 }, { "epoch": 0.6501517168149423, "grad_norm": 1.435987949371338, "learning_rate": 5.455513281118761e-06, "loss": 0.3499, "step": 29390 }, { "epoch": 0.6502623244564556, "grad_norm": 1.4609817266464233, "learning_rate": 5.452418203728183e-06, "loss": 0.2387, "step": 29395 }, { "epoch": 0.6503729320979689, "grad_norm": 0.6774128675460815, "learning_rate": 5.449323675456688e-06, "loss": 0.2676, "step": 29400 }, { "epoch": 0.6504835397394821, "grad_norm": 0.9479442238807678, "learning_rate": 5.44622969667794e-06, "loss": 0.2151, "step": 29405 }, { "epoch": 0.6505941473809954, "grad_norm": 0.8512431383132935, "learning_rate": 5.443136267765534e-06, "loss": 0.342, "step": 29410 }, { "epoch": 0.6507047550225087, "grad_norm": 1.669440746307373, "learning_rate": 5.440043389093003e-06, "loss": 0.2584, "step": 29415 }, { "epoch": 0.650815362664022, "grad_norm": 0.681159496307373, "learning_rate": 5.4369510610338085e-06, "loss": 0.4193, "step": 29420 }, { "epoch": 0.6509259703055352, "grad_norm": 1.5815273523330688, "learning_rate": 5.4338592839613515e-06, "loss": 0.35, "step": 29425 }, { "epoch": 0.6510365779470484, "grad_norm": 0.7375788688659668, "learning_rate": 5.43076805824896e-06, "loss": 0.3862, "step": 29430 }, { "epoch": 0.6511471855885617, "grad_norm": 0.872601330280304, "learning_rate": 5.427677384269899e-06, "loss": 0.4538, "step": 29435 }, { "epoch": 0.6512577932300749, "grad_norm": 1.8122687339782715, "learning_rate": 5.424587262397368e-06, "loss": 0.2634, "step": 29440 }, { "epoch": 0.6513684008715882, "grad_norm": 1.308921456336975, "learning_rate": 5.421497693004498e-06, "loss": 0.3389, "step": 29445 }, { "epoch": 0.6514790085131015, "grad_norm": 0.622261106967926, "learning_rate": 5.418408676464353e-06, "loss": 0.2715, "step": 29450 }, { "epoch": 0.6515896161546147, "grad_norm": 1.0709887742996216, "learning_rate": 5.415320213149931e-06, "loss": 0.4185, "step": 29455 }, { "epoch": 0.651700223796128, "grad_norm": 1.0022436380386353, "learning_rate": 5.4122323034341616e-06, "loss": 0.3272, "step": 29460 }, { "epoch": 0.6518108314376413, "grad_norm": 1.4150781631469727, "learning_rate": 5.40914494768991e-06, "loss": 0.3367, "step": 29465 }, { "epoch": 0.6519214390791546, "grad_norm": 1.291837453842163, "learning_rate": 5.406058146289974e-06, "loss": 0.2967, "step": 29470 }, { "epoch": 0.6520320467206677, "grad_norm": 0.818725049495697, "learning_rate": 5.402971899607083e-06, "loss": 0.269, "step": 29475 }, { "epoch": 0.652142654362181, "grad_norm": 0.9464149475097656, "learning_rate": 5.399886208013901e-06, "loss": 0.2862, "step": 29480 }, { "epoch": 0.6522532620036943, "grad_norm": 0.8355833292007446, "learning_rate": 5.396801071883024e-06, "loss": 0.1152, "step": 29485 }, { "epoch": 0.6523638696452075, "grad_norm": 1.3755394220352173, "learning_rate": 5.393716491586982e-06, "loss": 0.3162, "step": 29490 }, { "epoch": 0.6524744772867208, "grad_norm": 1.2670824527740479, "learning_rate": 5.390632467498236e-06, "loss": 0.2465, "step": 29495 }, { "epoch": 0.6525850849282341, "grad_norm": 1.368735671043396, "learning_rate": 5.38754899998918e-06, "loss": 0.367, "step": 29500 }, { "epoch": 0.6526956925697474, "grad_norm": 1.574342966079712, "learning_rate": 5.384466089432145e-06, "loss": 0.3636, "step": 29505 }, { "epoch": 0.6528063002112606, "grad_norm": 1.212084174156189, "learning_rate": 5.381383736199389e-06, "loss": 0.411, "step": 29510 }, { "epoch": 0.6529169078527739, "grad_norm": 1.6929141283035278, "learning_rate": 5.378301940663105e-06, "loss": 0.1707, "step": 29515 }, { "epoch": 0.6530275154942872, "grad_norm": 1.4219480752944946, "learning_rate": 5.375220703195421e-06, "loss": 0.2531, "step": 29520 }, { "epoch": 0.6531381231358003, "grad_norm": 2.295022964477539, "learning_rate": 5.372140024168393e-06, "loss": 0.306, "step": 29525 }, { "epoch": 0.6532487307773136, "grad_norm": 1.443634033203125, "learning_rate": 5.369059903954012e-06, "loss": 0.2366, "step": 29530 }, { "epoch": 0.6533593384188269, "grad_norm": 1.2353370189666748, "learning_rate": 5.365980342924205e-06, "loss": 0.4654, "step": 29535 }, { "epoch": 0.6534699460603401, "grad_norm": 0.9862974882125854, "learning_rate": 5.3629013414508244e-06, "loss": 0.2646, "step": 29540 }, { "epoch": 0.6535805537018534, "grad_norm": 1.2856895923614502, "learning_rate": 5.35982289990566e-06, "loss": 0.3233, "step": 29545 }, { "epoch": 0.6536911613433667, "grad_norm": 1.1987887620925903, "learning_rate": 5.356745018660433e-06, "loss": 0.2442, "step": 29550 }, { "epoch": 0.65380176898488, "grad_norm": 1.4032036066055298, "learning_rate": 5.353667698086795e-06, "loss": 0.2672, "step": 29555 }, { "epoch": 0.6539123766263932, "grad_norm": 0.8919408917427063, "learning_rate": 5.350590938556335e-06, "loss": 0.3042, "step": 29560 }, { "epoch": 0.6540229842679065, "grad_norm": 0.9525145888328552, "learning_rate": 5.347514740440567e-06, "loss": 0.2957, "step": 29565 }, { "epoch": 0.6541335919094197, "grad_norm": 0.8745918273925781, "learning_rate": 5.344439104110942e-06, "loss": 0.48, "step": 29570 }, { "epoch": 0.6542441995509329, "grad_norm": 0.9928642511367798, "learning_rate": 5.341364029938843e-06, "loss": 0.3151, "step": 29575 }, { "epoch": 0.6543548071924462, "grad_norm": 1.2337924242019653, "learning_rate": 5.338289518295584e-06, "loss": 0.3, "step": 29580 }, { "epoch": 0.6544654148339595, "grad_norm": 1.558724284172058, "learning_rate": 5.33521556955241e-06, "loss": 0.344, "step": 29585 }, { "epoch": 0.6545760224754728, "grad_norm": 1.8786988258361816, "learning_rate": 5.332142184080502e-06, "loss": 0.3993, "step": 29590 }, { "epoch": 0.654686630116986, "grad_norm": 0.8968825340270996, "learning_rate": 5.329069362250968e-06, "loss": 0.348, "step": 29595 }, { "epoch": 0.6547972377584993, "grad_norm": 1.2904644012451172, "learning_rate": 5.3259971044348515e-06, "loss": 0.3251, "step": 29600 }, { "epoch": 0.6549078454000126, "grad_norm": 1.0402460098266602, "learning_rate": 5.322925411003126e-06, "loss": 0.4083, "step": 29605 }, { "epoch": 0.6550184530415258, "grad_norm": 0.8087093234062195, "learning_rate": 5.319854282326698e-06, "loss": 0.5798, "step": 29610 }, { "epoch": 0.6551290606830391, "grad_norm": 1.0628052949905396, "learning_rate": 5.316783718776404e-06, "loss": 0.2934, "step": 29615 }, { "epoch": 0.6552396683245523, "grad_norm": 1.5919442176818848, "learning_rate": 5.313713720723016e-06, "loss": 0.3872, "step": 29620 }, { "epoch": 0.6553502759660655, "grad_norm": 0.954515278339386, "learning_rate": 5.310644288537233e-06, "loss": 0.2418, "step": 29625 }, { "epoch": 0.6554608836075788, "grad_norm": 1.479500412940979, "learning_rate": 5.307575422589691e-06, "loss": 0.4027, "step": 29630 }, { "epoch": 0.6555714912490921, "grad_norm": 1.1357066631317139, "learning_rate": 5.304507123250951e-06, "loss": 0.3974, "step": 29635 }, { "epoch": 0.6556820988906054, "grad_norm": 1.1158372163772583, "learning_rate": 5.3014393908915095e-06, "loss": 0.4676, "step": 29640 }, { "epoch": 0.6557927065321186, "grad_norm": 1.1249032020568848, "learning_rate": 5.298372225881797e-06, "loss": 0.4084, "step": 29645 }, { "epoch": 0.6559033141736319, "grad_norm": 1.5833489894866943, "learning_rate": 5.295305628592171e-06, "loss": 0.4289, "step": 29650 }, { "epoch": 0.6560139218151452, "grad_norm": 1.4874892234802246, "learning_rate": 5.2922395993929234e-06, "loss": 0.1848, "step": 29655 }, { "epoch": 0.6561245294566584, "grad_norm": 1.1536767482757568, "learning_rate": 5.289174138654274e-06, "loss": 0.2191, "step": 29660 }, { "epoch": 0.6562351370981716, "grad_norm": 0.8387190699577332, "learning_rate": 5.2861092467463774e-06, "loss": 0.326, "step": 29665 }, { "epoch": 0.6563457447396849, "grad_norm": 1.4377381801605225, "learning_rate": 5.2830449240393176e-06, "loss": 0.2872, "step": 29670 }, { "epoch": 0.6564563523811981, "grad_norm": 1.2011148929595947, "learning_rate": 5.279981170903114e-06, "loss": 0.3456, "step": 29675 }, { "epoch": 0.6565669600227114, "grad_norm": 0.956305205821991, "learning_rate": 5.276917987707709e-06, "loss": 0.3444, "step": 29680 }, { "epoch": 0.6566775676642247, "grad_norm": 1.2797117233276367, "learning_rate": 5.2738553748229845e-06, "loss": 0.318, "step": 29685 }, { "epoch": 0.656788175305738, "grad_norm": 1.7540147304534912, "learning_rate": 5.270793332618749e-06, "loss": 0.3083, "step": 29690 }, { "epoch": 0.6568987829472512, "grad_norm": 0.8726662993431091, "learning_rate": 5.267731861464742e-06, "loss": 0.3861, "step": 29695 }, { "epoch": 0.6570093905887645, "grad_norm": 1.3685952425003052, "learning_rate": 5.264670961730636e-06, "loss": 0.3862, "step": 29700 }, { "epoch": 0.6571199982302778, "grad_norm": 1.5691312551498413, "learning_rate": 5.261610633786034e-06, "loss": 0.3576, "step": 29705 }, { "epoch": 0.6572306058717909, "grad_norm": 0.9497473835945129, "learning_rate": 5.25855087800047e-06, "loss": 0.2311, "step": 29710 }, { "epoch": 0.6573412135133042, "grad_norm": 1.1006311178207397, "learning_rate": 5.255491694743409e-06, "loss": 0.4427, "step": 29715 }, { "epoch": 0.6574518211548175, "grad_norm": 0.9577587842941284, "learning_rate": 5.252433084384243e-06, "loss": 0.3195, "step": 29720 }, { "epoch": 0.6575624287963308, "grad_norm": 1.626408576965332, "learning_rate": 5.2493750472923025e-06, "loss": 0.2436, "step": 29725 }, { "epoch": 0.657673036437844, "grad_norm": 1.1586555242538452, "learning_rate": 5.2463175838368415e-06, "loss": 0.318, "step": 29730 }, { "epoch": 0.6577836440793573, "grad_norm": 1.984471321105957, "learning_rate": 5.243260694387051e-06, "loss": 0.4344, "step": 29735 }, { "epoch": 0.6578942517208706, "grad_norm": 1.4809876680374146, "learning_rate": 5.240204379312046e-06, "loss": 0.513, "step": 29740 }, { "epoch": 0.6580048593623838, "grad_norm": 1.0185900926589966, "learning_rate": 5.2371486389808775e-06, "loss": 0.2893, "step": 29745 }, { "epoch": 0.6581154670038971, "grad_norm": 1.1646413803100586, "learning_rate": 5.234093473762525e-06, "loss": 0.4001, "step": 29750 }, { "epoch": 0.6582260746454104, "grad_norm": 0.7737995982170105, "learning_rate": 5.231038884025898e-06, "loss": 0.369, "step": 29755 }, { "epoch": 0.6583366822869235, "grad_norm": 1.4431887865066528, "learning_rate": 5.227984870139838e-06, "loss": 0.2997, "step": 29760 }, { "epoch": 0.6584472899284368, "grad_norm": 0.9226342439651489, "learning_rate": 5.224931432473115e-06, "loss": 0.3426, "step": 29765 }, { "epoch": 0.6585578975699501, "grad_norm": 1.397496223449707, "learning_rate": 5.221878571394432e-06, "loss": 0.3288, "step": 29770 }, { "epoch": 0.6586685052114634, "grad_norm": 1.4808512926101685, "learning_rate": 5.21882628727242e-06, "loss": 0.3884, "step": 29775 }, { "epoch": 0.6587791128529766, "grad_norm": 1.4272478818893433, "learning_rate": 5.2157745804756435e-06, "loss": 0.2622, "step": 29780 }, { "epoch": 0.6588897204944899, "grad_norm": 1.2829066514968872, "learning_rate": 5.212723451372593e-06, "loss": 0.3813, "step": 29785 }, { "epoch": 0.6590003281360032, "grad_norm": 1.0759843587875366, "learning_rate": 5.209672900331693e-06, "loss": 0.3545, "step": 29790 }, { "epoch": 0.6591109357775164, "grad_norm": 1.5696910619735718, "learning_rate": 5.206622927721294e-06, "loss": 0.4628, "step": 29795 }, { "epoch": 0.6592215434190297, "grad_norm": 1.59585702419281, "learning_rate": 5.203573533909681e-06, "loss": 0.3658, "step": 29800 }, { "epoch": 0.6593321510605429, "grad_norm": 0.7595665454864502, "learning_rate": 5.200524719265068e-06, "loss": 0.3785, "step": 29805 }, { "epoch": 0.6594427587020562, "grad_norm": 0.7621791958808899, "learning_rate": 5.197476484155597e-06, "loss": 0.3087, "step": 29810 }, { "epoch": 0.6595533663435694, "grad_norm": 1.7842203378677368, "learning_rate": 5.194428828949342e-06, "loss": 0.2258, "step": 29815 }, { "epoch": 0.6596639739850827, "grad_norm": 2.77067494392395, "learning_rate": 5.191381754014308e-06, "loss": 0.4113, "step": 29820 }, { "epoch": 0.659774581626596, "grad_norm": 1.165095329284668, "learning_rate": 5.188335259718425e-06, "loss": 0.3351, "step": 29825 }, { "epoch": 0.6598851892681092, "grad_norm": 0.7100410461425781, "learning_rate": 5.185289346429559e-06, "loss": 0.3727, "step": 29830 }, { "epoch": 0.6599957969096225, "grad_norm": 0.8189888596534729, "learning_rate": 5.182244014515501e-06, "loss": 0.2994, "step": 29835 }, { "epoch": 0.6601064045511358, "grad_norm": 0.8439499735832214, "learning_rate": 5.179199264343976e-06, "loss": 0.3403, "step": 29840 }, { "epoch": 0.660217012192649, "grad_norm": 1.088546872138977, "learning_rate": 5.176155096282636e-06, "loss": 0.3037, "step": 29845 }, { "epoch": 0.6603276198341623, "grad_norm": 1.4173662662506104, "learning_rate": 5.1731115106990615e-06, "loss": 0.1969, "step": 29850 }, { "epoch": 0.6604382274756755, "grad_norm": 1.175028920173645, "learning_rate": 5.170068507960766e-06, "loss": 0.2994, "step": 29855 }, { "epoch": 0.6605488351171888, "grad_norm": 0.8664072751998901, "learning_rate": 5.1670260884351916e-06, "loss": 0.2876, "step": 29860 }, { "epoch": 0.660659442758702, "grad_norm": 1.167786717414856, "learning_rate": 5.163984252489711e-06, "loss": 0.3854, "step": 29865 }, { "epoch": 0.6607700504002153, "grad_norm": 1.2293047904968262, "learning_rate": 5.160943000491621e-06, "loss": 0.3425, "step": 29870 }, { "epoch": 0.6608806580417286, "grad_norm": 1.0743614435195923, "learning_rate": 5.157902332808153e-06, "loss": 0.3388, "step": 29875 }, { "epoch": 0.6609912656832418, "grad_norm": 1.2343385219573975, "learning_rate": 5.1548622498064695e-06, "loss": 0.289, "step": 29880 }, { "epoch": 0.6611018733247551, "grad_norm": 0.4394216239452362, "learning_rate": 5.151822751853657e-06, "loss": 0.4197, "step": 29885 }, { "epoch": 0.6612124809662684, "grad_norm": 1.16578209400177, "learning_rate": 5.148783839316734e-06, "loss": 0.3684, "step": 29890 }, { "epoch": 0.6613230886077817, "grad_norm": 1.2510385513305664, "learning_rate": 5.145745512562648e-06, "loss": 0.3073, "step": 29895 }, { "epoch": 0.6614336962492948, "grad_norm": 0.9783326387405396, "learning_rate": 5.142707771958277e-06, "loss": 0.4626, "step": 29900 }, { "epoch": 0.6615443038908081, "grad_norm": 1.066194772720337, "learning_rate": 5.139670617870428e-06, "loss": 0.2853, "step": 29905 }, { "epoch": 0.6616549115323214, "grad_norm": 2.152658224105835, "learning_rate": 5.136634050665835e-06, "loss": 0.3007, "step": 29910 }, { "epoch": 0.6617655191738346, "grad_norm": 0.8843711018562317, "learning_rate": 5.133598070711163e-06, "loss": 0.3261, "step": 29915 }, { "epoch": 0.6618761268153479, "grad_norm": 2.089574098587036, "learning_rate": 5.130562678373005e-06, "loss": 0.4533, "step": 29920 }, { "epoch": 0.6619867344568612, "grad_norm": 1.0043796300888062, "learning_rate": 5.127527874017885e-06, "loss": 0.3878, "step": 29925 }, { "epoch": 0.6620973420983745, "grad_norm": 0.9570770263671875, "learning_rate": 5.124493658012253e-06, "loss": 0.4718, "step": 29930 }, { "epoch": 0.6622079497398877, "grad_norm": 0.9859487414360046, "learning_rate": 5.12146003072249e-06, "loss": 0.2456, "step": 29935 }, { "epoch": 0.662318557381401, "grad_norm": 0.6165288090705872, "learning_rate": 5.118426992514908e-06, "loss": 0.3597, "step": 29940 }, { "epoch": 0.6624291650229143, "grad_norm": 1.5719988346099854, "learning_rate": 5.115394543755743e-06, "loss": 0.2627, "step": 29945 }, { "epoch": 0.6625397726644274, "grad_norm": 1.126786708831787, "learning_rate": 5.11236268481116e-06, "loss": 0.4009, "step": 29950 }, { "epoch": 0.6626503803059407, "grad_norm": 1.520407795906067, "learning_rate": 5.109331416047261e-06, "loss": 0.4201, "step": 29955 }, { "epoch": 0.662760987947454, "grad_norm": 1.622005820274353, "learning_rate": 5.106300737830067e-06, "loss": 0.2334, "step": 29960 }, { "epoch": 0.6628715955889672, "grad_norm": 1.5281263589859009, "learning_rate": 5.103270650525531e-06, "loss": 0.3862, "step": 29965 }, { "epoch": 0.6629822032304805, "grad_norm": 1.2828552722930908, "learning_rate": 5.100241154499537e-06, "loss": 0.447, "step": 29970 }, { "epoch": 0.6630928108719938, "grad_norm": 1.1979949474334717, "learning_rate": 5.097212250117894e-06, "loss": 0.3669, "step": 29975 }, { "epoch": 0.6632034185135071, "grad_norm": 1.143448829650879, "learning_rate": 5.094183937746342e-06, "loss": 0.1954, "step": 29980 }, { "epoch": 0.6633140261550203, "grad_norm": 1.2606468200683594, "learning_rate": 5.0911562177505495e-06, "loss": 0.4192, "step": 29985 }, { "epoch": 0.6634246337965336, "grad_norm": 1.6296566724777222, "learning_rate": 5.088129090496111e-06, "loss": 0.401, "step": 29990 }, { "epoch": 0.6635352414380468, "grad_norm": 0.7052159309387207, "learning_rate": 5.085102556348553e-06, "loss": 0.2688, "step": 29995 }, { "epoch": 0.66364584907956, "grad_norm": 1.360297441482544, "learning_rate": 5.082076615673328e-06, "loss": 0.3285, "step": 30000 }, { "epoch": 0.6637564567210733, "grad_norm": 1.5587188005447388, "learning_rate": 5.079051268835817e-06, "loss": 0.1655, "step": 30005 }, { "epoch": 0.6638670643625866, "grad_norm": 1.0520213842391968, "learning_rate": 5.076026516201329e-06, "loss": 0.3163, "step": 30010 }, { "epoch": 0.6639776720040999, "grad_norm": 1.2462443113327026, "learning_rate": 5.073002358135103e-06, "loss": 0.1843, "step": 30015 }, { "epoch": 0.6640882796456131, "grad_norm": 1.3921014070510864, "learning_rate": 5.069978795002305e-06, "loss": 0.3262, "step": 30020 }, { "epoch": 0.6641988872871264, "grad_norm": 1.0908998250961304, "learning_rate": 5.06695582716803e-06, "loss": 0.1979, "step": 30025 }, { "epoch": 0.6643094949286397, "grad_norm": 1.1814813613891602, "learning_rate": 5.063933454997299e-06, "loss": 0.3528, "step": 30030 }, { "epoch": 0.6644201025701529, "grad_norm": 1.026558756828308, "learning_rate": 5.060911678855063e-06, "loss": 0.3626, "step": 30035 }, { "epoch": 0.6645307102116662, "grad_norm": 2.029397487640381, "learning_rate": 5.057890499106202e-06, "loss": 0.4109, "step": 30040 }, { "epoch": 0.6646413178531794, "grad_norm": 1.2923814058303833, "learning_rate": 5.0548699161155194e-06, "loss": 0.3062, "step": 30045 }, { "epoch": 0.6647519254946926, "grad_norm": 2.056006908416748, "learning_rate": 5.051849930247752e-06, "loss": 0.3656, "step": 30050 }, { "epoch": 0.6648625331362059, "grad_norm": 0.901739776134491, "learning_rate": 5.048830541867562e-06, "loss": 0.3164, "step": 30055 }, { "epoch": 0.6649731407777192, "grad_norm": 0.9376824498176575, "learning_rate": 5.045811751339537e-06, "loss": 0.4214, "step": 30060 }, { "epoch": 0.6650837484192325, "grad_norm": 1.1318798065185547, "learning_rate": 5.0427935590282005e-06, "loss": 0.3569, "step": 30065 }, { "epoch": 0.6651943560607457, "grad_norm": 0.9116389751434326, "learning_rate": 5.039775965297994e-06, "loss": 0.4425, "step": 30070 }, { "epoch": 0.665304963702259, "grad_norm": 2.2829294204711914, "learning_rate": 5.036758970513292e-06, "loss": 0.2871, "step": 30075 }, { "epoch": 0.6654155713437723, "grad_norm": 1.5771467685699463, "learning_rate": 5.033742575038394e-06, "loss": 0.3535, "step": 30080 }, { "epoch": 0.6655261789852855, "grad_norm": 1.430616021156311, "learning_rate": 5.030726779237532e-06, "loss": 0.3501, "step": 30085 }, { "epoch": 0.6656367866267987, "grad_norm": 1.0560637712478638, "learning_rate": 5.0277115834748614e-06, "loss": 0.317, "step": 30090 }, { "epoch": 0.665747394268312, "grad_norm": 1.1487820148468018, "learning_rate": 5.024696988114464e-06, "loss": 0.3143, "step": 30095 }, { "epoch": 0.6658580019098252, "grad_norm": 0.6336850523948669, "learning_rate": 5.021682993520354e-06, "loss": 0.3647, "step": 30100 }, { "epoch": 0.6659686095513385, "grad_norm": 0.7212808728218079, "learning_rate": 5.01866960005647e-06, "loss": 0.2871, "step": 30105 }, { "epoch": 0.6660792171928518, "grad_norm": 1.1371415853500366, "learning_rate": 5.015656808086675e-06, "loss": 0.3091, "step": 30110 }, { "epoch": 0.6661898248343651, "grad_norm": 1.0679811239242554, "learning_rate": 5.012644617974767e-06, "loss": 0.1937, "step": 30115 }, { "epoch": 0.6663004324758783, "grad_norm": 1.2334462404251099, "learning_rate": 5.0096330300844645e-06, "loss": 0.28, "step": 30120 }, { "epoch": 0.6664110401173916, "grad_norm": 1.5531163215637207, "learning_rate": 5.006622044779416e-06, "loss": 0.375, "step": 30125 }, { "epoch": 0.6665216477589049, "grad_norm": 1.2193859815597534, "learning_rate": 5.0036116624232e-06, "loss": 0.403, "step": 30130 }, { "epoch": 0.6666322554004181, "grad_norm": 2.1499125957489014, "learning_rate": 5.000601883379314e-06, "loss": 0.3996, "step": 30135 }, { "epoch": 0.6667428630419313, "grad_norm": 1.5131486654281616, "learning_rate": 4.997592708011193e-06, "loss": 0.2321, "step": 30140 }, { "epoch": 0.6668534706834446, "grad_norm": 1.8671915531158447, "learning_rate": 4.99458413668219e-06, "loss": 0.2564, "step": 30145 }, { "epoch": 0.6669640783249579, "grad_norm": 2.5816657543182373, "learning_rate": 4.991576169755591e-06, "loss": 0.3658, "step": 30150 }, { "epoch": 0.6670746859664711, "grad_norm": 1.1468408107757568, "learning_rate": 4.988568807594607e-06, "loss": 0.2553, "step": 30155 }, { "epoch": 0.6671852936079844, "grad_norm": 1.7486056089401245, "learning_rate": 4.985562050562376e-06, "loss": 0.2643, "step": 30160 }, { "epoch": 0.6672959012494977, "grad_norm": 0.879423975944519, "learning_rate": 4.9825558990219625e-06, "loss": 0.324, "step": 30165 }, { "epoch": 0.6674065088910109, "grad_norm": 1.0102936029434204, "learning_rate": 4.979550353336359e-06, "loss": 0.2915, "step": 30170 }, { "epoch": 0.6675171165325242, "grad_norm": 1.2128194570541382, "learning_rate": 4.976545413868483e-06, "loss": 0.4416, "step": 30175 }, { "epoch": 0.6676277241740375, "grad_norm": 0.9897715449333191, "learning_rate": 4.973541080981182e-06, "loss": 0.5153, "step": 30180 }, { "epoch": 0.6677383318155506, "grad_norm": 0.8414472341537476, "learning_rate": 4.970537355037226e-06, "loss": 0.281, "step": 30185 }, { "epoch": 0.6678489394570639, "grad_norm": 1.4652291536331177, "learning_rate": 4.9675342363993154e-06, "loss": 0.3058, "step": 30190 }, { "epoch": 0.6679595470985772, "grad_norm": 1.3044395446777344, "learning_rate": 4.964531725430075e-06, "loss": 0.3272, "step": 30195 }, { "epoch": 0.6680701547400905, "grad_norm": 0.7987053990364075, "learning_rate": 4.961529822492058e-06, "loss": 0.3073, "step": 30200 }, { "epoch": 0.6681807623816037, "grad_norm": 1.5263874530792236, "learning_rate": 4.958528527947743e-06, "loss": 0.4302, "step": 30205 }, { "epoch": 0.668291370023117, "grad_norm": 1.1195791959762573, "learning_rate": 4.9555278421595355e-06, "loss": 0.4819, "step": 30210 }, { "epoch": 0.6684019776646303, "grad_norm": 1.480141520500183, "learning_rate": 4.952527765489767e-06, "loss": 0.2248, "step": 30215 }, { "epoch": 0.6685125853061435, "grad_norm": 1.3802231550216675, "learning_rate": 4.949528298300696e-06, "loss": 0.4114, "step": 30220 }, { "epoch": 0.6686231929476568, "grad_norm": 1.5837206840515137, "learning_rate": 4.946529440954506e-06, "loss": 0.3207, "step": 30225 }, { "epoch": 0.6687338005891701, "grad_norm": 0.7514464259147644, "learning_rate": 4.94353119381331e-06, "loss": 0.3309, "step": 30230 }, { "epoch": 0.6688444082306833, "grad_norm": 1.5681084394454956, "learning_rate": 4.940533557239145e-06, "loss": 0.3571, "step": 30235 }, { "epoch": 0.6689550158721965, "grad_norm": 0.9798123240470886, "learning_rate": 4.937536531593974e-06, "loss": 0.4321, "step": 30240 }, { "epoch": 0.6690656235137098, "grad_norm": 1.1013405323028564, "learning_rate": 4.934540117239687e-06, "loss": 0.3888, "step": 30245 }, { "epoch": 0.6691762311552231, "grad_norm": 0.9010697603225708, "learning_rate": 4.9315443145381e-06, "loss": 0.3408, "step": 30250 }, { "epoch": 0.6692868387967363, "grad_norm": 1.7444500923156738, "learning_rate": 4.928549123850956e-06, "loss": 0.322, "step": 30255 }, { "epoch": 0.6693974464382496, "grad_norm": 0.9474672675132751, "learning_rate": 4.925554545539923e-06, "loss": 0.3529, "step": 30260 }, { "epoch": 0.6695080540797629, "grad_norm": 1.2207117080688477, "learning_rate": 4.922560579966595e-06, "loss": 0.2786, "step": 30265 }, { "epoch": 0.6696186617212762, "grad_norm": 1.561257243156433, "learning_rate": 4.919567227492493e-06, "loss": 0.2911, "step": 30270 }, { "epoch": 0.6697292693627894, "grad_norm": 1.594152569770813, "learning_rate": 4.9165744884790635e-06, "loss": 0.3261, "step": 30275 }, { "epoch": 0.6698398770043026, "grad_norm": 1.747525930404663, "learning_rate": 4.913582363287678e-06, "loss": 0.3481, "step": 30280 }, { "epoch": 0.6699504846458159, "grad_norm": 0.9398006200790405, "learning_rate": 4.910590852279634e-06, "loss": 0.3663, "step": 30285 }, { "epoch": 0.6700610922873291, "grad_norm": 1.8356037139892578, "learning_rate": 4.907599955816158e-06, "loss": 0.4606, "step": 30290 }, { "epoch": 0.6701716999288424, "grad_norm": 1.1895380020141602, "learning_rate": 4.904609674258397e-06, "loss": 0.4307, "step": 30295 }, { "epoch": 0.6702823075703557, "grad_norm": 2.0477454662323, "learning_rate": 4.901620007967429e-06, "loss": 0.311, "step": 30300 }, { "epoch": 0.670392915211869, "grad_norm": 1.13949453830719, "learning_rate": 4.898630957304255e-06, "loss": 0.2557, "step": 30305 }, { "epoch": 0.6705035228533822, "grad_norm": 1.9082565307617188, "learning_rate": 4.895642522629799e-06, "loss": 0.4606, "step": 30310 }, { "epoch": 0.6706141304948955, "grad_norm": 0.5313282012939453, "learning_rate": 4.892654704304916e-06, "loss": 0.2212, "step": 30315 }, { "epoch": 0.6707247381364088, "grad_norm": 2.2150864601135254, "learning_rate": 4.889667502690386e-06, "loss": 0.2891, "step": 30320 }, { "epoch": 0.670835345777922, "grad_norm": 1.5611084699630737, "learning_rate": 4.886680918146909e-06, "loss": 0.3387, "step": 30325 }, { "epoch": 0.6709459534194352, "grad_norm": 0.7373268604278564, "learning_rate": 4.883694951035116e-06, "loss": 0.2949, "step": 30330 }, { "epoch": 0.6710565610609485, "grad_norm": 1.1493325233459473, "learning_rate": 4.880709601715561e-06, "loss": 0.3433, "step": 30335 }, { "epoch": 0.6711671687024617, "grad_norm": 1.1579192876815796, "learning_rate": 4.877724870548725e-06, "loss": 0.3446, "step": 30340 }, { "epoch": 0.671277776343975, "grad_norm": 1.1878578662872314, "learning_rate": 4.874740757895011e-06, "loss": 0.4174, "step": 30345 }, { "epoch": 0.6713883839854883, "grad_norm": 1.44257652759552, "learning_rate": 4.871757264114752e-06, "loss": 0.2923, "step": 30350 }, { "epoch": 0.6714989916270016, "grad_norm": 1.394869089126587, "learning_rate": 4.868774389568204e-06, "loss": 0.3827, "step": 30355 }, { "epoch": 0.6716095992685148, "grad_norm": 1.2728110551834106, "learning_rate": 4.865792134615547e-06, "loss": 0.3935, "step": 30360 }, { "epoch": 0.6717202069100281, "grad_norm": 0.9762117266654968, "learning_rate": 4.862810499616888e-06, "loss": 0.3685, "step": 30365 }, { "epoch": 0.6718308145515414, "grad_norm": 0.9318594932556152, "learning_rate": 4.859829484932258e-06, "loss": 0.4898, "step": 30370 }, { "epoch": 0.6719414221930545, "grad_norm": 1.5858899354934692, "learning_rate": 4.856849090921614e-06, "loss": 0.2299, "step": 30375 }, { "epoch": 0.6720520298345678, "grad_norm": 1.6898808479309082, "learning_rate": 4.853869317944837e-06, "loss": 0.3914, "step": 30380 }, { "epoch": 0.6721626374760811, "grad_norm": 0.8400896191596985, "learning_rate": 4.850890166361734e-06, "loss": 0.2675, "step": 30385 }, { "epoch": 0.6722732451175943, "grad_norm": 0.5103498697280884, "learning_rate": 4.847911636532037e-06, "loss": 0.2128, "step": 30390 }, { "epoch": 0.6723838527591076, "grad_norm": 0.6358826160430908, "learning_rate": 4.844933728815403e-06, "loss": 0.3166, "step": 30395 }, { "epoch": 0.6724944604006209, "grad_norm": 0.8142677545547485, "learning_rate": 4.841956443571412e-06, "loss": 0.3057, "step": 30400 }, { "epoch": 0.6726050680421342, "grad_norm": 0.4745847284793854, "learning_rate": 4.838979781159571e-06, "loss": 0.2025, "step": 30405 }, { "epoch": 0.6727156756836474, "grad_norm": 0.9905698299407959, "learning_rate": 4.836003741939311e-06, "loss": 0.4183, "step": 30410 }, { "epoch": 0.6728262833251607, "grad_norm": 1.4201782941818237, "learning_rate": 4.833028326269988e-06, "loss": 0.3356, "step": 30415 }, { "epoch": 0.672936890966674, "grad_norm": 1.480954647064209, "learning_rate": 4.830053534510884e-06, "loss": 0.4417, "step": 30420 }, { "epoch": 0.6730474986081871, "grad_norm": 0.8522579669952393, "learning_rate": 4.8270793670212005e-06, "loss": 0.3917, "step": 30425 }, { "epoch": 0.6731581062497004, "grad_norm": 1.4986870288848877, "learning_rate": 4.82410582416007e-06, "loss": 0.2112, "step": 30430 }, { "epoch": 0.6732687138912137, "grad_norm": 1.3357040882110596, "learning_rate": 4.821132906286546e-06, "loss": 0.2816, "step": 30435 }, { "epoch": 0.673379321532727, "grad_norm": 1.1028449535369873, "learning_rate": 4.818160613759608e-06, "loss": 0.35, "step": 30440 }, { "epoch": 0.6734899291742402, "grad_norm": 1.4554344415664673, "learning_rate": 4.81518894693816e-06, "loss": 0.3477, "step": 30445 }, { "epoch": 0.6736005368157535, "grad_norm": 0.9516786932945251, "learning_rate": 4.812217906181027e-06, "loss": 0.2924, "step": 30450 }, { "epoch": 0.6737111444572668, "grad_norm": 0.8818154335021973, "learning_rate": 4.809247491846964e-06, "loss": 0.1786, "step": 30455 }, { "epoch": 0.67382175209878, "grad_norm": 0.9956861138343811, "learning_rate": 4.806277704294646e-06, "loss": 0.2897, "step": 30460 }, { "epoch": 0.6739323597402933, "grad_norm": 1.4197156429290771, "learning_rate": 4.803308543882676e-06, "loss": 0.2796, "step": 30465 }, { "epoch": 0.6740429673818065, "grad_norm": 0.8826672434806824, "learning_rate": 4.800340010969577e-06, "loss": 0.247, "step": 30470 }, { "epoch": 0.6741535750233197, "grad_norm": 0.9884036183357239, "learning_rate": 4.7973721059138e-06, "loss": 0.3468, "step": 30475 }, { "epoch": 0.674264182664833, "grad_norm": 0.9029163718223572, "learning_rate": 4.794404829073716e-06, "loss": 0.2436, "step": 30480 }, { "epoch": 0.6743747903063463, "grad_norm": 1.5797361135482788, "learning_rate": 4.791438180807627e-06, "loss": 0.3836, "step": 30485 }, { "epoch": 0.6744853979478596, "grad_norm": 0.8453651666641235, "learning_rate": 4.7884721614737516e-06, "loss": 0.3621, "step": 30490 }, { "epoch": 0.6745960055893728, "grad_norm": 1.4799820184707642, "learning_rate": 4.785506771430236e-06, "loss": 0.2189, "step": 30495 }, { "epoch": 0.6747066132308861, "grad_norm": 0.8228428959846497, "learning_rate": 4.782542011035151e-06, "loss": 0.2433, "step": 30500 }, { "epoch": 0.6748172208723994, "grad_norm": 0.6610034108161926, "learning_rate": 4.77957788064649e-06, "loss": 0.368, "step": 30505 }, { "epoch": 0.6749278285139126, "grad_norm": 0.6179741024971008, "learning_rate": 4.776614380622172e-06, "loss": 0.2856, "step": 30510 }, { "epoch": 0.6750384361554258, "grad_norm": 0.9641457796096802, "learning_rate": 4.773651511320037e-06, "loss": 0.2759, "step": 30515 }, { "epoch": 0.6751490437969391, "grad_norm": 0.6657909750938416, "learning_rate": 4.770689273097851e-06, "loss": 0.3234, "step": 30520 }, { "epoch": 0.6752596514384523, "grad_norm": 0.8949538469314575, "learning_rate": 4.767727666313304e-06, "loss": 0.2329, "step": 30525 }, { "epoch": 0.6753702590799656, "grad_norm": 0.22511836886405945, "learning_rate": 4.764766691324009e-06, "loss": 0.3308, "step": 30530 }, { "epoch": 0.6754808667214789, "grad_norm": 2.188692808151245, "learning_rate": 4.761806348487502e-06, "loss": 0.3468, "step": 30535 }, { "epoch": 0.6755914743629922, "grad_norm": 1.0784991979599, "learning_rate": 4.758846638161244e-06, "loss": 0.3051, "step": 30540 }, { "epoch": 0.6757020820045054, "grad_norm": 1.0895739793777466, "learning_rate": 4.755887560702619e-06, "loss": 0.2969, "step": 30545 }, { "epoch": 0.6758126896460187, "grad_norm": 0.9683560132980347, "learning_rate": 4.752929116468935e-06, "loss": 0.1711, "step": 30550 }, { "epoch": 0.675923297287532, "grad_norm": 1.4970817565917969, "learning_rate": 4.749971305817424e-06, "loss": 0.3875, "step": 30555 }, { "epoch": 0.6760339049290452, "grad_norm": 1.2397857904434204, "learning_rate": 4.7470141291052374e-06, "loss": 0.349, "step": 30560 }, { "epoch": 0.6761445125705584, "grad_norm": 1.3324848413467407, "learning_rate": 4.744057586689458e-06, "loss": 0.4217, "step": 30565 }, { "epoch": 0.6762551202120717, "grad_norm": 1.6242388486862183, "learning_rate": 4.741101678927083e-06, "loss": 0.4346, "step": 30570 }, { "epoch": 0.676365727853585, "grad_norm": 0.8196778893470764, "learning_rate": 4.738146406175041e-06, "loss": 0.2771, "step": 30575 }, { "epoch": 0.6764763354950982, "grad_norm": 1.5742672681808472, "learning_rate": 4.735191768790177e-06, "loss": 0.3169, "step": 30580 }, { "epoch": 0.6765869431366115, "grad_norm": 1.687225103378296, "learning_rate": 4.732237767129265e-06, "loss": 0.2448, "step": 30585 }, { "epoch": 0.6766975507781248, "grad_norm": 1.3666291236877441, "learning_rate": 4.729284401549e-06, "loss": 0.3369, "step": 30590 }, { "epoch": 0.676808158419638, "grad_norm": 1.5273116827011108, "learning_rate": 4.726331672405998e-06, "loss": 0.3189, "step": 30595 }, { "epoch": 0.6769187660611513, "grad_norm": 1.2914336919784546, "learning_rate": 4.723379580056801e-06, "loss": 0.398, "step": 30600 }, { "epoch": 0.6770293737026646, "grad_norm": 0.6334279775619507, "learning_rate": 4.720428124857873e-06, "loss": 0.301, "step": 30605 }, { "epoch": 0.6771399813441777, "grad_norm": 2.154337167739868, "learning_rate": 4.717477307165602e-06, "loss": 0.3492, "step": 30610 }, { "epoch": 0.677250588985691, "grad_norm": 0.8609282374382019, "learning_rate": 4.714527127336298e-06, "loss": 0.4386, "step": 30615 }, { "epoch": 0.6773611966272043, "grad_norm": 1.138684868812561, "learning_rate": 4.711577585726194e-06, "loss": 0.3549, "step": 30620 }, { "epoch": 0.6774718042687176, "grad_norm": 1.2860901355743408, "learning_rate": 4.708628682691446e-06, "loss": 0.268, "step": 30625 }, { "epoch": 0.6775824119102308, "grad_norm": 1.387389063835144, "learning_rate": 4.705680418588134e-06, "loss": 0.182, "step": 30630 }, { "epoch": 0.6776930195517441, "grad_norm": 1.1844403743743896, "learning_rate": 4.70273279377226e-06, "loss": 0.2211, "step": 30635 }, { "epoch": 0.6778036271932574, "grad_norm": 1.0340619087219238, "learning_rate": 4.699785808599747e-06, "loss": 0.211, "step": 30640 }, { "epoch": 0.6779142348347706, "grad_norm": 1.8601065874099731, "learning_rate": 4.696839463426446e-06, "loss": 0.3198, "step": 30645 }, { "epoch": 0.6780248424762839, "grad_norm": 0.869627058506012, "learning_rate": 4.693893758608124e-06, "loss": 0.1999, "step": 30650 }, { "epoch": 0.6781354501177972, "grad_norm": 1.2292274236679077, "learning_rate": 4.690948694500474e-06, "loss": 0.3293, "step": 30655 }, { "epoch": 0.6782460577593104, "grad_norm": 1.1127578020095825, "learning_rate": 4.688004271459114e-06, "loss": 0.2825, "step": 30660 }, { "epoch": 0.6783566654008236, "grad_norm": 2.8685619831085205, "learning_rate": 4.6850604898395795e-06, "loss": 0.4202, "step": 30665 }, { "epoch": 0.6784672730423369, "grad_norm": 1.0073198080062866, "learning_rate": 4.682117349997333e-06, "loss": 0.3061, "step": 30670 }, { "epoch": 0.6785778806838502, "grad_norm": 0.48041534423828125, "learning_rate": 4.6791748522877564e-06, "loss": 0.3063, "step": 30675 }, { "epoch": 0.6786884883253634, "grad_norm": 0.775104284286499, "learning_rate": 4.676232997066157e-06, "loss": 0.3075, "step": 30680 }, { "epoch": 0.6787990959668767, "grad_norm": 1.7245935201644897, "learning_rate": 4.6732917846877624e-06, "loss": 0.3319, "step": 30685 }, { "epoch": 0.67890970360839, "grad_norm": 1.2109500169754028, "learning_rate": 4.6703512155077225e-06, "loss": 0.4547, "step": 30690 }, { "epoch": 0.6790203112499033, "grad_norm": 1.0544586181640625, "learning_rate": 4.66741128988111e-06, "loss": 0.2801, "step": 30695 }, { "epoch": 0.6791309188914165, "grad_norm": 0.8114449381828308, "learning_rate": 4.66447200816292e-06, "loss": 0.4102, "step": 30700 }, { "epoch": 0.6792415265329297, "grad_norm": 1.4893931150436401, "learning_rate": 4.661533370708071e-06, "loss": 0.441, "step": 30705 }, { "epoch": 0.679352134174443, "grad_norm": 1.3450019359588623, "learning_rate": 4.658595377871402e-06, "loss": 0.283, "step": 30710 }, { "epoch": 0.6794627418159562, "grad_norm": 0.786726176738739, "learning_rate": 4.655658030007674e-06, "loss": 0.3568, "step": 30715 }, { "epoch": 0.6795733494574695, "grad_norm": 1.8010419607162476, "learning_rate": 4.652721327471572e-06, "loss": 0.2465, "step": 30720 }, { "epoch": 0.6796839570989828, "grad_norm": 0.9977709054946899, "learning_rate": 4.6497852706177005e-06, "loss": 0.2265, "step": 30725 }, { "epoch": 0.679794564740496, "grad_norm": 1.1328421831130981, "learning_rate": 4.646849859800588e-06, "loss": 0.3889, "step": 30730 }, { "epoch": 0.6799051723820093, "grad_norm": 0.9600478410720825, "learning_rate": 4.643915095374685e-06, "loss": 0.3541, "step": 30735 }, { "epoch": 0.6800157800235226, "grad_norm": 0.8244717717170715, "learning_rate": 4.640980977694363e-06, "loss": 0.2614, "step": 30740 }, { "epoch": 0.6801263876650359, "grad_norm": 1.1681206226348877, "learning_rate": 4.638047507113915e-06, "loss": 0.3046, "step": 30745 }, { "epoch": 0.6802369953065491, "grad_norm": 0.8509520292282104, "learning_rate": 4.635114683987559e-06, "loss": 0.3378, "step": 30750 }, { "epoch": 0.6803476029480623, "grad_norm": 1.3412646055221558, "learning_rate": 4.6321825086694304e-06, "loss": 0.43, "step": 30755 }, { "epoch": 0.6804582105895756, "grad_norm": 0.8067130446434021, "learning_rate": 4.629250981513589e-06, "loss": 0.3038, "step": 30760 }, { "epoch": 0.6805688182310888, "grad_norm": 1.6082321405410767, "learning_rate": 4.626320102874015e-06, "loss": 0.3311, "step": 30765 }, { "epoch": 0.6806794258726021, "grad_norm": 1.5037494897842407, "learning_rate": 4.6233898731046125e-06, "loss": 0.3393, "step": 30770 }, { "epoch": 0.6807900335141154, "grad_norm": 1.19599187374115, "learning_rate": 4.620460292559205e-06, "loss": 0.3108, "step": 30775 }, { "epoch": 0.6809006411556286, "grad_norm": 1.2591239213943481, "learning_rate": 4.617531361591538e-06, "loss": 0.3808, "step": 30780 }, { "epoch": 0.6810112487971419, "grad_norm": 1.2664598226547241, "learning_rate": 4.614603080555281e-06, "loss": 0.2128, "step": 30785 }, { "epoch": 0.6811218564386552, "grad_norm": 0.7545126676559448, "learning_rate": 4.61167544980402e-06, "loss": 0.2868, "step": 30790 }, { "epoch": 0.6812324640801685, "grad_norm": 1.0223206281661987, "learning_rate": 4.6087484696912695e-06, "loss": 0.3809, "step": 30795 }, { "epoch": 0.6813430717216816, "grad_norm": 1.4805902242660522, "learning_rate": 4.6058221405704575e-06, "loss": 0.285, "step": 30800 }, { "epoch": 0.6814536793631949, "grad_norm": 1.5615901947021484, "learning_rate": 4.602896462794939e-06, "loss": 0.2861, "step": 30805 }, { "epoch": 0.6815642870047082, "grad_norm": 1.6124155521392822, "learning_rate": 4.599971436717989e-06, "loss": 0.4291, "step": 30810 }, { "epoch": 0.6816748946462214, "grad_norm": 0.8921675682067871, "learning_rate": 4.5970470626928035e-06, "loss": 0.3216, "step": 30815 }, { "epoch": 0.6817855022877347, "grad_norm": 1.717615008354187, "learning_rate": 4.594123341072498e-06, "loss": 0.2374, "step": 30820 }, { "epoch": 0.681896109929248, "grad_norm": 1.3678346872329712, "learning_rate": 4.591200272210114e-06, "loss": 0.2542, "step": 30825 }, { "epoch": 0.6820067175707613, "grad_norm": 1.927201747894287, "learning_rate": 4.588277856458608e-06, "loss": 0.3344, "step": 30830 }, { "epoch": 0.6821173252122745, "grad_norm": 1.4983173608779907, "learning_rate": 4.585356094170863e-06, "loss": 0.3789, "step": 30835 }, { "epoch": 0.6822279328537878, "grad_norm": 1.2903711795806885, "learning_rate": 4.582434985699679e-06, "loss": 0.4323, "step": 30840 }, { "epoch": 0.6823385404953011, "grad_norm": 0.7402361631393433, "learning_rate": 4.579514531397782e-06, "loss": 0.2593, "step": 30845 }, { "epoch": 0.6824491481368142, "grad_norm": 1.2074172496795654, "learning_rate": 4.576594731617813e-06, "loss": 0.3042, "step": 30850 }, { "epoch": 0.6825597557783275, "grad_norm": 1.3758304119110107, "learning_rate": 4.573675586712337e-06, "loss": 0.3537, "step": 30855 }, { "epoch": 0.6826703634198408, "grad_norm": 1.0936894416809082, "learning_rate": 4.570757097033842e-06, "loss": 0.2865, "step": 30860 }, { "epoch": 0.682780971061354, "grad_norm": 1.806044340133667, "learning_rate": 4.567839262934733e-06, "loss": 0.4094, "step": 30865 }, { "epoch": 0.6828915787028673, "grad_norm": 0.6538559794425964, "learning_rate": 4.564922084767338e-06, "loss": 0.2919, "step": 30870 }, { "epoch": 0.6830021863443806, "grad_norm": 1.437546730041504, "learning_rate": 4.562005562883905e-06, "loss": 0.3839, "step": 30875 }, { "epoch": 0.6831127939858939, "grad_norm": 1.1244784593582153, "learning_rate": 4.559089697636605e-06, "loss": 0.5403, "step": 30880 }, { "epoch": 0.6832234016274071, "grad_norm": 1.5177770853042603, "learning_rate": 4.556174489377525e-06, "loss": 0.2704, "step": 30885 }, { "epoch": 0.6833340092689204, "grad_norm": 0.8738405704498291, "learning_rate": 4.553259938458677e-06, "loss": 0.3257, "step": 30890 }, { "epoch": 0.6834446169104336, "grad_norm": 1.0345063209533691, "learning_rate": 4.5503460452319925e-06, "loss": 0.3019, "step": 30895 }, { "epoch": 0.6835552245519468, "grad_norm": 1.8298015594482422, "learning_rate": 4.547432810049323e-06, "loss": 0.5913, "step": 30900 }, { "epoch": 0.6836658321934601, "grad_norm": 1.2227145433425903, "learning_rate": 4.544520233262439e-06, "loss": 0.4666, "step": 30905 }, { "epoch": 0.6837764398349734, "grad_norm": 1.0700279474258423, "learning_rate": 4.541608315223036e-06, "loss": 0.3144, "step": 30910 }, { "epoch": 0.6838870474764867, "grad_norm": 1.1419090032577515, "learning_rate": 4.538697056282725e-06, "loss": 0.1701, "step": 30915 }, { "epoch": 0.6839976551179999, "grad_norm": 1.5932196378707886, "learning_rate": 4.5357864567930395e-06, "loss": 0.2681, "step": 30920 }, { "epoch": 0.6841082627595132, "grad_norm": 0.7006977200508118, "learning_rate": 4.532876517105436e-06, "loss": 0.2039, "step": 30925 }, { "epoch": 0.6842188704010265, "grad_norm": 1.1409536600112915, "learning_rate": 4.529967237571287e-06, "loss": 0.2508, "step": 30930 }, { "epoch": 0.6843294780425397, "grad_norm": 1.2676734924316406, "learning_rate": 4.527058618541887e-06, "loss": 0.4856, "step": 30935 }, { "epoch": 0.684440085684053, "grad_norm": 1.109927773475647, "learning_rate": 4.52415066036845e-06, "loss": 0.3644, "step": 30940 }, { "epoch": 0.6845506933255662, "grad_norm": 0.7303546667098999, "learning_rate": 4.5212433634021135e-06, "loss": 0.2725, "step": 30945 }, { "epoch": 0.6846613009670794, "grad_norm": 0.420353502035141, "learning_rate": 4.518336727993931e-06, "loss": 0.2988, "step": 30950 }, { "epoch": 0.6847719086085927, "grad_norm": 1.155282974243164, "learning_rate": 4.5154307544948785e-06, "loss": 0.3669, "step": 30955 }, { "epoch": 0.684882516250106, "grad_norm": 0.9993428587913513, "learning_rate": 4.512525443255851e-06, "loss": 0.3819, "step": 30960 }, { "epoch": 0.6849931238916193, "grad_norm": 1.8934497833251953, "learning_rate": 4.509620794627664e-06, "loss": 0.3552, "step": 30965 }, { "epoch": 0.6851037315331325, "grad_norm": 0.7017208337783813, "learning_rate": 4.5067168089610524e-06, "loss": 0.3421, "step": 30970 }, { "epoch": 0.6852143391746458, "grad_norm": 0.827869713306427, "learning_rate": 4.503813486606672e-06, "loss": 0.4328, "step": 30975 }, { "epoch": 0.6853249468161591, "grad_norm": 0.9729675054550171, "learning_rate": 4.500910827915098e-06, "loss": 0.3164, "step": 30980 }, { "epoch": 0.6854355544576723, "grad_norm": 1.0192128419876099, "learning_rate": 4.498008833236826e-06, "loss": 0.2916, "step": 30985 }, { "epoch": 0.6855461620991855, "grad_norm": 0.8386598825454712, "learning_rate": 4.495107502922271e-06, "loss": 0.2977, "step": 30990 }, { "epoch": 0.6856567697406988, "grad_norm": 1.6400225162506104, "learning_rate": 4.4922068373217676e-06, "loss": 0.3851, "step": 30995 }, { "epoch": 0.685767377382212, "grad_norm": 1.2403299808502197, "learning_rate": 4.48930683678557e-06, "loss": 0.2238, "step": 31000 }, { "epoch": 0.6858779850237253, "grad_norm": 1.4281771183013916, "learning_rate": 4.486407501663852e-06, "loss": 0.3359, "step": 31005 }, { "epoch": 0.6859885926652386, "grad_norm": 1.1243656873703003, "learning_rate": 4.4835088323067074e-06, "loss": 0.3904, "step": 31010 }, { "epoch": 0.6860992003067519, "grad_norm": 0.8574025630950928, "learning_rate": 4.480610829064152e-06, "loss": 0.3775, "step": 31015 }, { "epoch": 0.6862098079482651, "grad_norm": 1.023727536201477, "learning_rate": 4.477713492286114e-06, "loss": 0.364, "step": 31020 }, { "epoch": 0.6863204155897784, "grad_norm": 0.9627255201339722, "learning_rate": 4.474816822322451e-06, "loss": 0.4293, "step": 31025 }, { "epoch": 0.6864310232312917, "grad_norm": 1.126139760017395, "learning_rate": 4.471920819522932e-06, "loss": 0.3149, "step": 31030 }, { "epoch": 0.686541630872805, "grad_norm": 0.983982264995575, "learning_rate": 4.4690254842372484e-06, "loss": 0.3789, "step": 31035 }, { "epoch": 0.6866522385143181, "grad_norm": 1.3901628255844116, "learning_rate": 4.466130816815012e-06, "loss": 0.465, "step": 31040 }, { "epoch": 0.6867628461558314, "grad_norm": 2.871687650680542, "learning_rate": 4.463236817605754e-06, "loss": 0.2081, "step": 31045 }, { "epoch": 0.6868734537973447, "grad_norm": 1.3119219541549683, "learning_rate": 4.460343486958922e-06, "loss": 0.4816, "step": 31050 }, { "epoch": 0.6869840614388579, "grad_norm": 0.7608881592750549, "learning_rate": 4.457450825223884e-06, "loss": 0.2516, "step": 31055 }, { "epoch": 0.6870946690803712, "grad_norm": 1.025861382484436, "learning_rate": 4.454558832749931e-06, "loss": 0.2434, "step": 31060 }, { "epoch": 0.6872052767218845, "grad_norm": 0.44211825728416443, "learning_rate": 4.451667509886268e-06, "loss": 0.2763, "step": 31065 }, { "epoch": 0.6873158843633977, "grad_norm": 1.1444077491760254, "learning_rate": 4.448776856982023e-06, "loss": 0.3704, "step": 31070 }, { "epoch": 0.687426492004911, "grad_norm": 1.8552953004837036, "learning_rate": 4.445886874386239e-06, "loss": 0.3099, "step": 31075 }, { "epoch": 0.6875370996464243, "grad_norm": 0.7065847516059875, "learning_rate": 4.442997562447884e-06, "loss": 0.2216, "step": 31080 }, { "epoch": 0.6876477072879374, "grad_norm": 0.9212736487388611, "learning_rate": 4.440108921515838e-06, "loss": 0.2979, "step": 31085 }, { "epoch": 0.6877583149294507, "grad_norm": 1.4273865222930908, "learning_rate": 4.437220951938905e-06, "loss": 0.3775, "step": 31090 }, { "epoch": 0.687868922570964, "grad_norm": 2.219745635986328, "learning_rate": 4.4343336540658055e-06, "loss": 0.3875, "step": 31095 }, { "epoch": 0.6879795302124773, "grad_norm": 1.1719613075256348, "learning_rate": 4.431447028245182e-06, "loss": 0.4016, "step": 31100 }, { "epoch": 0.6880901378539905, "grad_norm": 2.171905279159546, "learning_rate": 4.4285610748255914e-06, "loss": 0.3676, "step": 31105 }, { "epoch": 0.6882007454955038, "grad_norm": 0.8265579342842102, "learning_rate": 4.425675794155513e-06, "loss": 0.2251, "step": 31110 }, { "epoch": 0.6883113531370171, "grad_norm": 2.2238521575927734, "learning_rate": 4.4227911865833425e-06, "loss": 0.4141, "step": 31115 }, { "epoch": 0.6884219607785304, "grad_norm": 1.3413059711456299, "learning_rate": 4.419907252457397e-06, "loss": 0.3885, "step": 31120 }, { "epoch": 0.6885325684200436, "grad_norm": 1.0056971311569214, "learning_rate": 4.41702399212591e-06, "loss": 0.3235, "step": 31125 }, { "epoch": 0.6886431760615569, "grad_norm": 1.2522871494293213, "learning_rate": 4.414141405937033e-06, "loss": 0.4546, "step": 31130 }, { "epoch": 0.68875378370307, "grad_norm": 1.4703086614608765, "learning_rate": 4.411259494238839e-06, "loss": 0.3397, "step": 31135 }, { "epoch": 0.6888643913445833, "grad_norm": 1.3903625011444092, "learning_rate": 4.408378257379317e-06, "loss": 0.3165, "step": 31140 }, { "epoch": 0.6889749989860966, "grad_norm": 2.6765644550323486, "learning_rate": 4.405497695706374e-06, "loss": 0.2664, "step": 31145 }, { "epoch": 0.6890856066276099, "grad_norm": 1.3394252061843872, "learning_rate": 4.40261780956784e-06, "loss": 0.289, "step": 31150 }, { "epoch": 0.6891962142691231, "grad_norm": 0.9650225043296814, "learning_rate": 4.39973859931146e-06, "loss": 0.2234, "step": 31155 }, { "epoch": 0.6893068219106364, "grad_norm": 1.6992332935333252, "learning_rate": 4.396860065284896e-06, "loss": 0.4598, "step": 31160 }, { "epoch": 0.6894174295521497, "grad_norm": 0.9924720525741577, "learning_rate": 4.393982207835731e-06, "loss": 0.3996, "step": 31165 }, { "epoch": 0.689528037193663, "grad_norm": 2.4846065044403076, "learning_rate": 4.391105027311464e-06, "loss": 0.3054, "step": 31170 }, { "epoch": 0.6896386448351762, "grad_norm": 2.247206926345825, "learning_rate": 4.388228524059515e-06, "loss": 0.2987, "step": 31175 }, { "epoch": 0.6897492524766894, "grad_norm": 1.0423647165298462, "learning_rate": 4.385352698427222e-06, "loss": 0.2375, "step": 31180 }, { "epoch": 0.6898598601182027, "grad_norm": 2.8176028728485107, "learning_rate": 4.382477550761839e-06, "loss": 0.4417, "step": 31185 }, { "epoch": 0.6899704677597159, "grad_norm": 0.8434734344482422, "learning_rate": 4.379603081410538e-06, "loss": 0.3439, "step": 31190 }, { "epoch": 0.6900810754012292, "grad_norm": 0.8830681443214417, "learning_rate": 4.376729290720412e-06, "loss": 0.2206, "step": 31195 }, { "epoch": 0.6901916830427425, "grad_norm": 1.0220798254013062, "learning_rate": 4.373856179038468e-06, "loss": 0.2566, "step": 31200 }, { "epoch": 0.6903022906842557, "grad_norm": 1.3588703870773315, "learning_rate": 4.370983746711637e-06, "loss": 0.3515, "step": 31205 }, { "epoch": 0.690412898325769, "grad_norm": 1.5787231922149658, "learning_rate": 4.36811199408676e-06, "loss": 0.3439, "step": 31210 }, { "epoch": 0.6905235059672823, "grad_norm": 1.1582049131393433, "learning_rate": 4.365240921510604e-06, "loss": 0.4355, "step": 31215 }, { "epoch": 0.6906341136087956, "grad_norm": 0.9831572771072388, "learning_rate": 4.362370529329848e-06, "loss": 0.368, "step": 31220 }, { "epoch": 0.6907447212503087, "grad_norm": 0.5991696715354919, "learning_rate": 4.359500817891092e-06, "loss": 0.2668, "step": 31225 }, { "epoch": 0.690855328891822, "grad_norm": 1.2475101947784424, "learning_rate": 4.356631787540852e-06, "loss": 0.3263, "step": 31230 }, { "epoch": 0.6909659365333353, "grad_norm": 0.6928271055221558, "learning_rate": 4.353763438625562e-06, "loss": 0.3747, "step": 31235 }, { "epoch": 0.6910765441748485, "grad_norm": 1.265657663345337, "learning_rate": 4.350895771491576e-06, "loss": 0.386, "step": 31240 }, { "epoch": 0.6911871518163618, "grad_norm": 1.1497578620910645, "learning_rate": 4.348028786485162e-06, "loss": 0.3564, "step": 31245 }, { "epoch": 0.6912977594578751, "grad_norm": 0.9800723195075989, "learning_rate": 4.345162483952509e-06, "loss": 0.2406, "step": 31250 }, { "epoch": 0.6914083670993884, "grad_norm": 0.8186519145965576, "learning_rate": 4.342296864239723e-06, "loss": 0.3753, "step": 31255 }, { "epoch": 0.6915189747409016, "grad_norm": 1.5906147956848145, "learning_rate": 4.339431927692823e-06, "loss": 0.3316, "step": 31260 }, { "epoch": 0.6916295823824149, "grad_norm": 0.7143656611442566, "learning_rate": 4.336567674657752e-06, "loss": 0.3397, "step": 31265 }, { "epoch": 0.6917401900239282, "grad_norm": 0.8039603233337402, "learning_rate": 4.333704105480369e-06, "loss": 0.2488, "step": 31270 }, { "epoch": 0.6918507976654413, "grad_norm": 1.0456973314285278, "learning_rate": 4.330841220506445e-06, "loss": 0.2656, "step": 31275 }, { "epoch": 0.6919614053069546, "grad_norm": 0.8693780899047852, "learning_rate": 4.3279790200816764e-06, "loss": 0.2244, "step": 31280 }, { "epoch": 0.6920720129484679, "grad_norm": 0.6376264095306396, "learning_rate": 4.32511750455167e-06, "loss": 0.4521, "step": 31285 }, { "epoch": 0.6921826205899811, "grad_norm": 1.4201921224594116, "learning_rate": 4.3222566742619545e-06, "loss": 0.3867, "step": 31290 }, { "epoch": 0.6922932282314944, "grad_norm": 1.258222222328186, "learning_rate": 4.319396529557975e-06, "loss": 0.3516, "step": 31295 }, { "epoch": 0.6924038358730077, "grad_norm": 0.8910961747169495, "learning_rate": 4.31653707078509e-06, "loss": 0.262, "step": 31300 }, { "epoch": 0.692514443514521, "grad_norm": 1.050258755683899, "learning_rate": 4.313678298288583e-06, "loss": 0.3595, "step": 31305 }, { "epoch": 0.6926250511560342, "grad_norm": 0.9022901654243469, "learning_rate": 4.3108202124136455e-06, "loss": 0.2885, "step": 31310 }, { "epoch": 0.6927356587975475, "grad_norm": 1.0895285606384277, "learning_rate": 4.307962813505393e-06, "loss": 0.2981, "step": 31315 }, { "epoch": 0.6928462664390607, "grad_norm": 1.2832778692245483, "learning_rate": 4.305106101908855e-06, "loss": 0.3212, "step": 31320 }, { "epoch": 0.6929568740805739, "grad_norm": 1.5864722728729248, "learning_rate": 4.302250077968977e-06, "loss": 0.3728, "step": 31325 }, { "epoch": 0.6930674817220872, "grad_norm": 1.2848882675170898, "learning_rate": 4.299394742030625e-06, "loss": 0.1579, "step": 31330 }, { "epoch": 0.6931780893636005, "grad_norm": 0.5523501038551331, "learning_rate": 4.2965400944385795e-06, "loss": 0.3146, "step": 31335 }, { "epoch": 0.6932886970051138, "grad_norm": 0.8160201907157898, "learning_rate": 4.293686135537537e-06, "loss": 0.292, "step": 31340 }, { "epoch": 0.693399304646627, "grad_norm": 1.4987468719482422, "learning_rate": 4.290832865672114e-06, "loss": 0.3534, "step": 31345 }, { "epoch": 0.6935099122881403, "grad_norm": 1.6931726932525635, "learning_rate": 4.28798028518684e-06, "loss": 0.2576, "step": 31350 }, { "epoch": 0.6936205199296536, "grad_norm": 1.82268226146698, "learning_rate": 4.285128394426165e-06, "loss": 0.2072, "step": 31355 }, { "epoch": 0.6937311275711668, "grad_norm": 2.2554638385772705, "learning_rate": 4.282277193734454e-06, "loss": 0.3599, "step": 31360 }, { "epoch": 0.6938417352126801, "grad_norm": 1.6620057821273804, "learning_rate": 4.279426683455987e-06, "loss": 0.4311, "step": 31365 }, { "epoch": 0.6939523428541933, "grad_norm": 1.1187983751296997, "learning_rate": 4.276576863934963e-06, "loss": 0.332, "step": 31370 }, { "epoch": 0.6940629504957065, "grad_norm": 1.3136180639266968, "learning_rate": 4.273727735515497e-06, "loss": 0.3033, "step": 31375 }, { "epoch": 0.6941735581372198, "grad_norm": 0.3379135727882385, "learning_rate": 4.27087929854162e-06, "loss": 0.3829, "step": 31380 }, { "epoch": 0.6942841657787331, "grad_norm": 0.8937351107597351, "learning_rate": 4.26803155335728e-06, "loss": 0.221, "step": 31385 }, { "epoch": 0.6943947734202464, "grad_norm": 0.5280998349189758, "learning_rate": 4.265184500306342e-06, "loss": 0.329, "step": 31390 }, { "epoch": 0.6945053810617596, "grad_norm": 1.4476646184921265, "learning_rate": 4.2623381397325855e-06, "loss": 0.336, "step": 31395 }, { "epoch": 0.6946159887032729, "grad_norm": 1.3704793453216553, "learning_rate": 4.25949247197971e-06, "loss": 0.3299, "step": 31400 }, { "epoch": 0.6947265963447862, "grad_norm": 1.132246971130371, "learning_rate": 4.256647497391325e-06, "loss": 0.2529, "step": 31405 }, { "epoch": 0.6948372039862994, "grad_norm": 1.6609312295913696, "learning_rate": 4.253803216310963e-06, "loss": 0.3095, "step": 31410 }, { "epoch": 0.6949478116278126, "grad_norm": 0.6475817561149597, "learning_rate": 4.250959629082071e-06, "loss": 0.4108, "step": 31415 }, { "epoch": 0.6950584192693259, "grad_norm": 0.8138289451599121, "learning_rate": 4.248116736048009e-06, "loss": 0.3675, "step": 31420 }, { "epoch": 0.6951690269108391, "grad_norm": 1.742000699043274, "learning_rate": 4.245274537552058e-06, "loss": 0.3142, "step": 31425 }, { "epoch": 0.6952796345523524, "grad_norm": 1.7212095260620117, "learning_rate": 4.242433033937409e-06, "loss": 0.3224, "step": 31430 }, { "epoch": 0.6953902421938657, "grad_norm": 0.9780128002166748, "learning_rate": 4.239592225547176e-06, "loss": 0.3628, "step": 31435 }, { "epoch": 0.695500849835379, "grad_norm": 1.2259016036987305, "learning_rate": 4.236752112724382e-06, "loss": 0.3293, "step": 31440 }, { "epoch": 0.6956114574768922, "grad_norm": 1.0028951168060303, "learning_rate": 4.2339126958119745e-06, "loss": 0.5446, "step": 31445 }, { "epoch": 0.6957220651184055, "grad_norm": 1.1634091138839722, "learning_rate": 4.23107397515281e-06, "loss": 0.2799, "step": 31450 }, { "epoch": 0.6958326727599188, "grad_norm": 1.3417470455169678, "learning_rate": 4.228235951089662e-06, "loss": 0.3076, "step": 31455 }, { "epoch": 0.695943280401432, "grad_norm": 1.528418779373169, "learning_rate": 4.225398623965222e-06, "loss": 0.3101, "step": 31460 }, { "epoch": 0.6960538880429452, "grad_norm": 1.686224341392517, "learning_rate": 4.2225619941220964e-06, "loss": 0.4331, "step": 31465 }, { "epoch": 0.6961644956844585, "grad_norm": 0.7350099682807922, "learning_rate": 4.219726061902808e-06, "loss": 0.3098, "step": 31470 }, { "epoch": 0.6962751033259718, "grad_norm": 0.7082011699676514, "learning_rate": 4.216890827649793e-06, "loss": 0.373, "step": 31475 }, { "epoch": 0.696385710967485, "grad_norm": 0.8071118593215942, "learning_rate": 4.2140562917054065e-06, "loss": 0.2919, "step": 31480 }, { "epoch": 0.6964963186089983, "grad_norm": 1.2070738077163696, "learning_rate": 4.211222454411917e-06, "loss": 0.3898, "step": 31485 }, { "epoch": 0.6966069262505116, "grad_norm": 1.3706138134002686, "learning_rate": 4.208389316111511e-06, "loss": 0.2671, "step": 31490 }, { "epoch": 0.6967175338920248, "grad_norm": 0.9173393249511719, "learning_rate": 4.205556877146287e-06, "loss": 0.4761, "step": 31495 }, { "epoch": 0.6968281415335381, "grad_norm": 1.2386868000030518, "learning_rate": 4.2027251378582625e-06, "loss": 0.4682, "step": 31500 }, { "epoch": 0.6969387491750514, "grad_norm": 1.017702579498291, "learning_rate": 4.199894098589369e-06, "loss": 0.4871, "step": 31505 }, { "epoch": 0.6970493568165645, "grad_norm": 1.198968768119812, "learning_rate": 4.197063759681454e-06, "loss": 0.3422, "step": 31510 }, { "epoch": 0.6971599644580778, "grad_norm": 1.816081166267395, "learning_rate": 4.194234121476277e-06, "loss": 0.3187, "step": 31515 }, { "epoch": 0.6972705720995911, "grad_norm": 0.8220762014389038, "learning_rate": 4.19140518431552e-06, "loss": 0.3108, "step": 31520 }, { "epoch": 0.6973811797411044, "grad_norm": 1.3807252645492554, "learning_rate": 4.1885769485407745e-06, "loss": 0.3392, "step": 31525 }, { "epoch": 0.6974917873826176, "grad_norm": 1.1215683221817017, "learning_rate": 4.185749414493549e-06, "loss": 0.466, "step": 31530 }, { "epoch": 0.6976023950241309, "grad_norm": 1.021332025527954, "learning_rate": 4.182922582515267e-06, "loss": 0.3178, "step": 31535 }, { "epoch": 0.6977130026656442, "grad_norm": 1.1238011121749878, "learning_rate": 4.1800964529472686e-06, "loss": 0.2866, "step": 31540 }, { "epoch": 0.6978236103071574, "grad_norm": 1.1897461414337158, "learning_rate": 4.177271026130808e-06, "loss": 0.3757, "step": 31545 }, { "epoch": 0.6979342179486707, "grad_norm": 0.40397506952285767, "learning_rate": 4.174446302407053e-06, "loss": 0.4003, "step": 31550 }, { "epoch": 0.698044825590184, "grad_norm": 1.1760362386703491, "learning_rate": 4.171622282117091e-06, "loss": 0.4305, "step": 31555 }, { "epoch": 0.6981554332316972, "grad_norm": 1.8479968309402466, "learning_rate": 4.168798965601918e-06, "loss": 0.3145, "step": 31560 }, { "epoch": 0.6982660408732104, "grad_norm": 0.7705810070037842, "learning_rate": 4.165976353202451e-06, "loss": 0.2729, "step": 31565 }, { "epoch": 0.6983766485147237, "grad_norm": 1.2407166957855225, "learning_rate": 4.16315444525952e-06, "loss": 0.325, "step": 31570 }, { "epoch": 0.698487256156237, "grad_norm": 2.445525884628296, "learning_rate": 4.160333242113867e-06, "loss": 0.4143, "step": 31575 }, { "epoch": 0.6985978637977502, "grad_norm": 1.2810858488082886, "learning_rate": 4.1575127441061545e-06, "loss": 0.3486, "step": 31580 }, { "epoch": 0.6987084714392635, "grad_norm": 1.0942269563674927, "learning_rate": 4.154692951576954e-06, "loss": 0.1439, "step": 31585 }, { "epoch": 0.6988190790807768, "grad_norm": 1.2515777349472046, "learning_rate": 4.151873864866757e-06, "loss": 0.4503, "step": 31590 }, { "epoch": 0.6989296867222901, "grad_norm": 0.8794146776199341, "learning_rate": 4.149055484315965e-06, "loss": 0.3762, "step": 31595 }, { "epoch": 0.6990402943638033, "grad_norm": 1.1701431274414062, "learning_rate": 4.146237810264897e-06, "loss": 0.2837, "step": 31600 }, { "epoch": 0.6991509020053165, "grad_norm": 1.6778184175491333, "learning_rate": 4.143420843053788e-06, "loss": 0.347, "step": 31605 }, { "epoch": 0.6992615096468298, "grad_norm": 1.033837914466858, "learning_rate": 4.140604583022785e-06, "loss": 0.3776, "step": 31610 }, { "epoch": 0.699372117288343, "grad_norm": 1.2012405395507812, "learning_rate": 4.137789030511949e-06, "loss": 0.3283, "step": 31615 }, { "epoch": 0.6994827249298563, "grad_norm": 1.4547514915466309, "learning_rate": 4.134974185861259e-06, "loss": 0.3408, "step": 31620 }, { "epoch": 0.6995933325713696, "grad_norm": 1.5681836605072021, "learning_rate": 4.132160049410606e-06, "loss": 0.28, "step": 31625 }, { "epoch": 0.6997039402128828, "grad_norm": 0.9443973898887634, "learning_rate": 4.129346621499797e-06, "loss": 0.3596, "step": 31630 }, { "epoch": 0.6998145478543961, "grad_norm": 1.8727082014083862, "learning_rate": 4.126533902468551e-06, "loss": 0.3615, "step": 31635 }, { "epoch": 0.6999251554959094, "grad_norm": 0.6183366179466248, "learning_rate": 4.123721892656503e-06, "loss": 0.3289, "step": 31640 }, { "epoch": 0.7000357631374227, "grad_norm": 1.8140571117401123, "learning_rate": 4.120910592403205e-06, "loss": 0.3111, "step": 31645 }, { "epoch": 0.7001463707789359, "grad_norm": 1.6215434074401855, "learning_rate": 4.118100002048117e-06, "loss": 0.3837, "step": 31650 }, { "epoch": 0.7002569784204491, "grad_norm": 0.9348192811012268, "learning_rate": 4.1152901219306184e-06, "loss": 0.3494, "step": 31655 }, { "epoch": 0.7003675860619624, "grad_norm": 1.8607335090637207, "learning_rate": 4.112480952390002e-06, "loss": 0.25, "step": 31660 }, { "epoch": 0.7004781937034756, "grad_norm": 0.9495384693145752, "learning_rate": 4.109672493765473e-06, "loss": 0.3142, "step": 31665 }, { "epoch": 0.7005888013449889, "grad_norm": 1.3067772388458252, "learning_rate": 4.106864746396153e-06, "loss": 0.3831, "step": 31670 }, { "epoch": 0.7006994089865022, "grad_norm": 0.8906987905502319, "learning_rate": 4.104057710621075e-06, "loss": 0.2596, "step": 31675 }, { "epoch": 0.7008100166280155, "grad_norm": 1.6084527969360352, "learning_rate": 4.10125138677919e-06, "loss": 0.327, "step": 31680 }, { "epoch": 0.7009206242695287, "grad_norm": 1.2006031274795532, "learning_rate": 4.09844577520936e-06, "loss": 0.444, "step": 31685 }, { "epoch": 0.701031231911042, "grad_norm": 0.7207416892051697, "learning_rate": 4.09564087625036e-06, "loss": 0.2755, "step": 31690 }, { "epoch": 0.7011418395525553, "grad_norm": 0.9814532399177551, "learning_rate": 4.0928366902408825e-06, "loss": 0.198, "step": 31695 }, { "epoch": 0.7012524471940684, "grad_norm": 2.4562785625457764, "learning_rate": 4.09003321751953e-06, "loss": 0.3176, "step": 31700 }, { "epoch": 0.7013630548355817, "grad_norm": 1.0655542612075806, "learning_rate": 4.087230458424825e-06, "loss": 0.4213, "step": 31705 }, { "epoch": 0.701473662477095, "grad_norm": 1.2293767929077148, "learning_rate": 4.084428413295195e-06, "loss": 0.51, "step": 31710 }, { "epoch": 0.7015842701186082, "grad_norm": 1.5490543842315674, "learning_rate": 4.081627082468989e-06, "loss": 0.2635, "step": 31715 }, { "epoch": 0.7016948777601215, "grad_norm": 1.150118112564087, "learning_rate": 4.078826466284467e-06, "loss": 0.4209, "step": 31720 }, { "epoch": 0.7018054854016348, "grad_norm": 1.0098261833190918, "learning_rate": 4.0760265650798e-06, "loss": 0.4482, "step": 31725 }, { "epoch": 0.7019160930431481, "grad_norm": 1.1592518091201782, "learning_rate": 4.073227379193082e-06, "loss": 0.2915, "step": 31730 }, { "epoch": 0.7020267006846613, "grad_norm": 0.770505964756012, "learning_rate": 4.070428908962301e-06, "loss": 0.3779, "step": 31735 }, { "epoch": 0.7021373083261746, "grad_norm": 0.5314367413520813, "learning_rate": 4.067631154725382e-06, "loss": 0.3507, "step": 31740 }, { "epoch": 0.7022479159676879, "grad_norm": 1.5498336553573608, "learning_rate": 4.064834116820151e-06, "loss": 0.4195, "step": 31745 }, { "epoch": 0.702358523609201, "grad_norm": 0.8686416149139404, "learning_rate": 4.062037795584348e-06, "loss": 0.2952, "step": 31750 }, { "epoch": 0.7024691312507143, "grad_norm": 1.2339657545089722, "learning_rate": 4.059242191355628e-06, "loss": 0.3404, "step": 31755 }, { "epoch": 0.7025797388922276, "grad_norm": 1.5665457248687744, "learning_rate": 4.0564473044715605e-06, "loss": 0.3129, "step": 31760 }, { "epoch": 0.7026903465337409, "grad_norm": 0.5622530579566956, "learning_rate": 4.0536531352696264e-06, "loss": 0.2333, "step": 31765 }, { "epoch": 0.7028009541752541, "grad_norm": 1.838549256324768, "learning_rate": 4.05085968408722e-06, "loss": 0.4413, "step": 31770 }, { "epoch": 0.7029115618167674, "grad_norm": 0.5460445284843445, "learning_rate": 4.048066951261652e-06, "loss": 0.2799, "step": 31775 }, { "epoch": 0.7030221694582807, "grad_norm": 1.3297802209854126, "learning_rate": 4.045274937130142e-06, "loss": 0.3156, "step": 31780 }, { "epoch": 0.7031327770997939, "grad_norm": 0.7524604797363281, "learning_rate": 4.0424836420298245e-06, "loss": 0.3818, "step": 31785 }, { "epoch": 0.7032433847413072, "grad_norm": 1.4434775114059448, "learning_rate": 4.039693066297749e-06, "loss": 0.4597, "step": 31790 }, { "epoch": 0.7033539923828204, "grad_norm": 1.159125804901123, "learning_rate": 4.036903210270875e-06, "loss": 0.2465, "step": 31795 }, { "epoch": 0.7034646000243336, "grad_norm": 1.8157620429992676, "learning_rate": 4.034114074286079e-06, "loss": 0.4547, "step": 31800 }, { "epoch": 0.7035752076658469, "grad_norm": 1.287784218788147, "learning_rate": 4.031325658680147e-06, "loss": 0.2691, "step": 31805 }, { "epoch": 0.7036858153073602, "grad_norm": 1.5130412578582764, "learning_rate": 4.028537963789778e-06, "loss": 0.3298, "step": 31810 }, { "epoch": 0.7037964229488735, "grad_norm": 1.4908441305160522, "learning_rate": 4.025750989951588e-06, "loss": 0.433, "step": 31815 }, { "epoch": 0.7039070305903867, "grad_norm": 0.6437156796455383, "learning_rate": 4.0229647375021005e-06, "loss": 0.3693, "step": 31820 }, { "epoch": 0.7040176382319, "grad_norm": 1.1918989419937134, "learning_rate": 4.020179206777758e-06, "loss": 0.3913, "step": 31825 }, { "epoch": 0.7041282458734133, "grad_norm": 1.9537614583969116, "learning_rate": 4.0173943981149086e-06, "loss": 0.2189, "step": 31830 }, { "epoch": 0.7042388535149265, "grad_norm": 1.4443563222885132, "learning_rate": 4.014610311849819e-06, "loss": 0.3039, "step": 31835 }, { "epoch": 0.7043494611564398, "grad_norm": 0.9689356684684753, "learning_rate": 4.0118269483186676e-06, "loss": 0.4355, "step": 31840 }, { "epoch": 0.704460068797953, "grad_norm": 1.3693528175354004, "learning_rate": 4.009044307857547e-06, "loss": 0.3225, "step": 31845 }, { "epoch": 0.7045706764394662, "grad_norm": 1.7475589513778687, "learning_rate": 4.0062623908024504e-06, "loss": 0.2733, "step": 31850 }, { "epoch": 0.7046812840809795, "grad_norm": 0.6745430827140808, "learning_rate": 4.003481197489303e-06, "loss": 0.2142, "step": 31855 }, { "epoch": 0.7047918917224928, "grad_norm": 1.6641446352005005, "learning_rate": 4.000700728253929e-06, "loss": 0.3689, "step": 31860 }, { "epoch": 0.7049024993640061, "grad_norm": 1.249586582183838, "learning_rate": 3.997920983432072e-06, "loss": 0.427, "step": 31865 }, { "epoch": 0.7050131070055193, "grad_norm": 1.2588343620300293, "learning_rate": 3.9951419633593825e-06, "loss": 0.4136, "step": 31870 }, { "epoch": 0.7051237146470326, "grad_norm": 1.255889654159546, "learning_rate": 3.9923636683714265e-06, "loss": 0.3634, "step": 31875 }, { "epoch": 0.7052343222885459, "grad_norm": 1.0940989255905151, "learning_rate": 3.9895860988036836e-06, "loss": 0.3019, "step": 31880 }, { "epoch": 0.7053449299300592, "grad_norm": 1.404308795928955, "learning_rate": 3.986809254991544e-06, "loss": 0.1973, "step": 31885 }, { "epoch": 0.7054555375715723, "grad_norm": 1.034071683883667, "learning_rate": 3.984033137270311e-06, "loss": 0.3167, "step": 31890 }, { "epoch": 0.7055661452130856, "grad_norm": 1.0660425424575806, "learning_rate": 3.981257745975199e-06, "loss": 0.3393, "step": 31895 }, { "epoch": 0.7056767528545989, "grad_norm": 1.0965545177459717, "learning_rate": 3.978483081441336e-06, "loss": 0.451, "step": 31900 }, { "epoch": 0.7057873604961121, "grad_norm": 1.1770901679992676, "learning_rate": 3.975709144003763e-06, "loss": 0.2881, "step": 31905 }, { "epoch": 0.7058979681376254, "grad_norm": 1.9282629489898682, "learning_rate": 3.972935933997431e-06, "loss": 0.3632, "step": 31910 }, { "epoch": 0.7060085757791387, "grad_norm": 0.803523063659668, "learning_rate": 3.9701634517572045e-06, "loss": 0.222, "step": 31915 }, { "epoch": 0.7061191834206519, "grad_norm": 1.341321349143982, "learning_rate": 3.967391697617858e-06, "loss": 0.2581, "step": 31920 }, { "epoch": 0.7062297910621652, "grad_norm": 1.6769680976867676, "learning_rate": 3.9646206719140835e-06, "loss": 0.3164, "step": 31925 }, { "epoch": 0.7063403987036785, "grad_norm": 1.4389426708221436, "learning_rate": 3.9618503749804795e-06, "loss": 0.2817, "step": 31930 }, { "epoch": 0.7064510063451918, "grad_norm": 1.7210547924041748, "learning_rate": 3.959080807151557e-06, "loss": 0.3654, "step": 31935 }, { "epoch": 0.7065616139867049, "grad_norm": 1.5747088193893433, "learning_rate": 3.956311968761744e-06, "loss": 0.368, "step": 31940 }, { "epoch": 0.7066722216282182, "grad_norm": 1.6803812980651855, "learning_rate": 3.953543860145378e-06, "loss": 0.4044, "step": 31945 }, { "epoch": 0.7067828292697315, "grad_norm": 0.8556889891624451, "learning_rate": 3.950776481636698e-06, "loss": 0.3614, "step": 31950 }, { "epoch": 0.7068934369112447, "grad_norm": 0.7578348517417908, "learning_rate": 3.9480098335698725e-06, "loss": 0.2959, "step": 31955 }, { "epoch": 0.707004044552758, "grad_norm": 0.62323397397995, "learning_rate": 3.9452439162789716e-06, "loss": 0.3042, "step": 31960 }, { "epoch": 0.7071146521942713, "grad_norm": 1.428185224533081, "learning_rate": 3.9424787300979785e-06, "loss": 0.3301, "step": 31965 }, { "epoch": 0.7072252598357845, "grad_norm": 1.3322243690490723, "learning_rate": 3.939714275360789e-06, "loss": 0.3336, "step": 31970 }, { "epoch": 0.7073358674772978, "grad_norm": 0.6845158338546753, "learning_rate": 3.93695055240121e-06, "loss": 0.3145, "step": 31975 }, { "epoch": 0.7074464751188111, "grad_norm": 1.2822870016098022, "learning_rate": 3.9341875615529605e-06, "loss": 0.4152, "step": 31980 }, { "epoch": 0.7075570827603243, "grad_norm": 1.263893485069275, "learning_rate": 3.931425303149668e-06, "loss": 0.3319, "step": 31985 }, { "epoch": 0.7076676904018375, "grad_norm": 1.1602104902267456, "learning_rate": 3.92866377752488e-06, "loss": 0.3717, "step": 31990 }, { "epoch": 0.7077782980433508, "grad_norm": 1.3702059984207153, "learning_rate": 3.925902985012043e-06, "loss": 0.3658, "step": 31995 }, { "epoch": 0.7078889056848641, "grad_norm": 0.7435858845710754, "learning_rate": 3.923142925944528e-06, "loss": 0.3174, "step": 32000 }, { "epoch": 0.7079995133263773, "grad_norm": 2.801104784011841, "learning_rate": 3.920383600655607e-06, "loss": 0.349, "step": 32005 }, { "epoch": 0.7081101209678906, "grad_norm": 1.4062739610671997, "learning_rate": 3.917625009478469e-06, "loss": 0.2689, "step": 32010 }, { "epoch": 0.7082207286094039, "grad_norm": 1.0640273094177246, "learning_rate": 3.914867152746215e-06, "loss": 0.2135, "step": 32015 }, { "epoch": 0.7083313362509172, "grad_norm": 0.794338583946228, "learning_rate": 3.9121100307918525e-06, "loss": 0.2991, "step": 32020 }, { "epoch": 0.7084419438924304, "grad_norm": 1.031052589416504, "learning_rate": 3.909353643948304e-06, "loss": 0.2871, "step": 32025 }, { "epoch": 0.7085525515339436, "grad_norm": 2.660555601119995, "learning_rate": 3.906597992548403e-06, "loss": 0.3395, "step": 32030 }, { "epoch": 0.7086631591754569, "grad_norm": 0.6532772779464722, "learning_rate": 3.903843076924893e-06, "loss": 0.2365, "step": 32035 }, { "epoch": 0.7087737668169701, "grad_norm": 1.9931825399398804, "learning_rate": 3.901088897410429e-06, "loss": 0.2961, "step": 32040 }, { "epoch": 0.7088843744584834, "grad_norm": 0.7709190249443054, "learning_rate": 3.898335454337579e-06, "loss": 0.1879, "step": 32045 }, { "epoch": 0.7089949820999967, "grad_norm": 1.631733775138855, "learning_rate": 3.895582748038816e-06, "loss": 0.2734, "step": 32050 }, { "epoch": 0.70910558974151, "grad_norm": 1.3650634288787842, "learning_rate": 3.892830778846535e-06, "loss": 0.4085, "step": 32055 }, { "epoch": 0.7092161973830232, "grad_norm": 1.2840571403503418, "learning_rate": 3.890079547093033e-06, "loss": 0.4074, "step": 32060 }, { "epoch": 0.7093268050245365, "grad_norm": 0.6136147379875183, "learning_rate": 3.8873290531105135e-06, "loss": 0.2555, "step": 32065 }, { "epoch": 0.7094374126660498, "grad_norm": 1.3286957740783691, "learning_rate": 3.884579297231107e-06, "loss": 0.4039, "step": 32070 }, { "epoch": 0.709548020307563, "grad_norm": 1.0109577178955078, "learning_rate": 3.881830279786841e-06, "loss": 0.3265, "step": 32075 }, { "epoch": 0.7096586279490762, "grad_norm": 2.4385361671447754, "learning_rate": 3.879082001109661e-06, "loss": 0.4244, "step": 32080 }, { "epoch": 0.7097692355905895, "grad_norm": 1.2822980880737305, "learning_rate": 3.876334461531419e-06, "loss": 0.2765, "step": 32085 }, { "epoch": 0.7098798432321027, "grad_norm": 1.3375054597854614, "learning_rate": 3.873587661383879e-06, "loss": 0.3421, "step": 32090 }, { "epoch": 0.709990450873616, "grad_norm": 1.4341976642608643, "learning_rate": 3.870841600998719e-06, "loss": 0.3096, "step": 32095 }, { "epoch": 0.7101010585151293, "grad_norm": 1.2683032751083374, "learning_rate": 3.868096280707523e-06, "loss": 0.1786, "step": 32100 }, { "epoch": 0.7102116661566426, "grad_norm": 1.0230356454849243, "learning_rate": 3.865351700841787e-06, "loss": 0.4072, "step": 32105 }, { "epoch": 0.7103222737981558, "grad_norm": 2.6351919174194336, "learning_rate": 3.8626078617329195e-06, "loss": 0.388, "step": 32110 }, { "epoch": 0.7104328814396691, "grad_norm": 1.0844758749008179, "learning_rate": 3.859864763712238e-06, "loss": 0.3162, "step": 32115 }, { "epoch": 0.7105434890811824, "grad_norm": 0.6910739541053772, "learning_rate": 3.85712240711097e-06, "loss": 0.2709, "step": 32120 }, { "epoch": 0.7106540967226955, "grad_norm": 1.8926162719726562, "learning_rate": 3.854380792260255e-06, "loss": 0.3419, "step": 32125 }, { "epoch": 0.7107647043642088, "grad_norm": 0.7638702392578125, "learning_rate": 3.851639919491141e-06, "loss": 0.3119, "step": 32130 }, { "epoch": 0.7108753120057221, "grad_norm": 1.0078892707824707, "learning_rate": 3.848899789134589e-06, "loss": 0.3549, "step": 32135 }, { "epoch": 0.7109859196472353, "grad_norm": 1.0023139715194702, "learning_rate": 3.846160401521469e-06, "loss": 0.2834, "step": 32140 }, { "epoch": 0.7110965272887486, "grad_norm": 0.73276287317276, "learning_rate": 3.843421756982558e-06, "loss": 0.2982, "step": 32145 }, { "epoch": 0.7112071349302619, "grad_norm": 1.5422987937927246, "learning_rate": 3.840683855848551e-06, "loss": 0.3573, "step": 32150 }, { "epoch": 0.7113177425717752, "grad_norm": 1.1983201503753662, "learning_rate": 3.837946698450045e-06, "loss": 0.4007, "step": 32155 }, { "epoch": 0.7114283502132884, "grad_norm": 1.8720299005508423, "learning_rate": 3.835210285117556e-06, "loss": 0.2954, "step": 32160 }, { "epoch": 0.7115389578548017, "grad_norm": 1.1289478540420532, "learning_rate": 3.832474616181495e-06, "loss": 0.3184, "step": 32165 }, { "epoch": 0.711649565496315, "grad_norm": 1.1430360078811646, "learning_rate": 3.829739691972204e-06, "loss": 0.364, "step": 32170 }, { "epoch": 0.7117601731378281, "grad_norm": 1.2568438053131104, "learning_rate": 3.827005512819919e-06, "loss": 0.3349, "step": 32175 }, { "epoch": 0.7118707807793414, "grad_norm": 0.9999079704284668, "learning_rate": 3.824272079054793e-06, "loss": 0.3041, "step": 32180 }, { "epoch": 0.7119813884208547, "grad_norm": 1.175649881362915, "learning_rate": 3.8215393910068856e-06, "loss": 0.3145, "step": 32185 }, { "epoch": 0.712091996062368, "grad_norm": 1.2850096225738525, "learning_rate": 3.81880744900617e-06, "loss": 0.3935, "step": 32190 }, { "epoch": 0.7122026037038812, "grad_norm": 1.1466201543807983, "learning_rate": 3.816076253382524e-06, "loss": 0.2327, "step": 32195 }, { "epoch": 0.7123132113453945, "grad_norm": 1.106951117515564, "learning_rate": 3.8133458044657433e-06, "loss": 0.2905, "step": 32200 }, { "epoch": 0.7124238189869078, "grad_norm": 1.2289725542068481, "learning_rate": 3.810616102585526e-06, "loss": 0.3291, "step": 32205 }, { "epoch": 0.712534426628421, "grad_norm": 1.1138912439346313, "learning_rate": 3.807887148071482e-06, "loss": 0.3063, "step": 32210 }, { "epoch": 0.7126450342699343, "grad_norm": 0.9524775743484497, "learning_rate": 3.805158941253134e-06, "loss": 0.3864, "step": 32215 }, { "epoch": 0.7127556419114475, "grad_norm": 1.4193377494812012, "learning_rate": 3.8024314824599095e-06, "loss": 0.4218, "step": 32220 }, { "epoch": 0.7128662495529607, "grad_norm": 1.9966566562652588, "learning_rate": 3.799704772021151e-06, "loss": 0.2993, "step": 32225 }, { "epoch": 0.712976857194474, "grad_norm": 1.6400210857391357, "learning_rate": 3.7969788102661067e-06, "loss": 0.413, "step": 32230 }, { "epoch": 0.7130874648359873, "grad_norm": 1.0051593780517578, "learning_rate": 3.7942535975239347e-06, "loss": 0.2992, "step": 32235 }, { "epoch": 0.7131980724775006, "grad_norm": 1.3145960569381714, "learning_rate": 3.791529134123705e-06, "loss": 0.4827, "step": 32240 }, { "epoch": 0.7133086801190138, "grad_norm": 0.7060980796813965, "learning_rate": 3.788805420394395e-06, "loss": 0.4642, "step": 32245 }, { "epoch": 0.7134192877605271, "grad_norm": 1.0960701704025269, "learning_rate": 3.786082456664891e-06, "loss": 0.3536, "step": 32250 }, { "epoch": 0.7135298954020404, "grad_norm": 0.7515956163406372, "learning_rate": 3.783360243263994e-06, "loss": 0.2403, "step": 32255 }, { "epoch": 0.7136405030435536, "grad_norm": 0.9600052833557129, "learning_rate": 3.780638780520406e-06, "loss": 0.3938, "step": 32260 }, { "epoch": 0.7137511106850669, "grad_norm": 0.8809182643890381, "learning_rate": 3.7779180687627457e-06, "loss": 0.2647, "step": 32265 }, { "epoch": 0.7138617183265801, "grad_norm": 1.1556313037872314, "learning_rate": 3.7751981083195366e-06, "loss": 0.3512, "step": 32270 }, { "epoch": 0.7139723259680933, "grad_norm": 1.0350397825241089, "learning_rate": 3.7724788995192173e-06, "loss": 0.3692, "step": 32275 }, { "epoch": 0.7140829336096066, "grad_norm": 0.8237111568450928, "learning_rate": 3.769760442690121e-06, "loss": 0.3882, "step": 32280 }, { "epoch": 0.7141935412511199, "grad_norm": 1.0024701356887817, "learning_rate": 3.7670427381605114e-06, "loss": 0.3281, "step": 32285 }, { "epoch": 0.7143041488926332, "grad_norm": 1.0077791213989258, "learning_rate": 3.7643257862585456e-06, "loss": 0.3037, "step": 32290 }, { "epoch": 0.7144147565341464, "grad_norm": 0.8543902635574341, "learning_rate": 3.761609587312295e-06, "loss": 0.1924, "step": 32295 }, { "epoch": 0.7145253641756597, "grad_norm": 0.8246257901191711, "learning_rate": 3.7588941416497403e-06, "loss": 0.216, "step": 32300 }, { "epoch": 0.714635971817173, "grad_norm": 0.7650245428085327, "learning_rate": 3.7561794495987714e-06, "loss": 0.2563, "step": 32305 }, { "epoch": 0.7147465794586862, "grad_norm": 0.9459652304649353, "learning_rate": 3.7534655114871855e-06, "loss": 0.4043, "step": 32310 }, { "epoch": 0.7148571871001994, "grad_norm": 1.1067031621932983, "learning_rate": 3.7507523276426906e-06, "loss": 0.4373, "step": 32315 }, { "epoch": 0.7149677947417127, "grad_norm": 1.36698579788208, "learning_rate": 3.7480398983929014e-06, "loss": 0.3074, "step": 32320 }, { "epoch": 0.715078402383226, "grad_norm": 1.3452955484390259, "learning_rate": 3.7453282240653434e-06, "loss": 0.3695, "step": 32325 }, { "epoch": 0.7151890100247392, "grad_norm": 1.1587146520614624, "learning_rate": 3.7426173049874516e-06, "loss": 0.3563, "step": 32330 }, { "epoch": 0.7152996176662525, "grad_norm": 1.23695969581604, "learning_rate": 3.739907141486566e-06, "loss": 0.2672, "step": 32335 }, { "epoch": 0.7154102253077658, "grad_norm": 0.5661383271217346, "learning_rate": 3.7371977338899425e-06, "loss": 0.4035, "step": 32340 }, { "epoch": 0.715520832949279, "grad_norm": 1.2298871278762817, "learning_rate": 3.734489082524736e-06, "loss": 0.344, "step": 32345 }, { "epoch": 0.7156314405907923, "grad_norm": 1.0182942152023315, "learning_rate": 3.7317811877180197e-06, "loss": 0.2836, "step": 32350 }, { "epoch": 0.7157420482323056, "grad_norm": 0.977523684501648, "learning_rate": 3.729074049796768e-06, "loss": 0.2605, "step": 32355 }, { "epoch": 0.7158526558738189, "grad_norm": 1.7613017559051514, "learning_rate": 3.726367669087868e-06, "loss": 0.3385, "step": 32360 }, { "epoch": 0.715963263515332, "grad_norm": 1.2525070905685425, "learning_rate": 3.723662045918115e-06, "loss": 0.3693, "step": 32365 }, { "epoch": 0.7160738711568453, "grad_norm": 1.4987595081329346, "learning_rate": 3.720957180614212e-06, "loss": 0.3949, "step": 32370 }, { "epoch": 0.7161844787983586, "grad_norm": 1.7239257097244263, "learning_rate": 3.7182530735027733e-06, "loss": 0.3807, "step": 32375 }, { "epoch": 0.7162950864398718, "grad_norm": 0.8558711409568787, "learning_rate": 3.715549724910309e-06, "loss": 0.3743, "step": 32380 }, { "epoch": 0.7164056940813851, "grad_norm": 0.5819491147994995, "learning_rate": 3.712847135163258e-06, "loss": 0.2481, "step": 32385 }, { "epoch": 0.7165163017228984, "grad_norm": 1.3238396644592285, "learning_rate": 3.7101453045879534e-06, "loss": 0.2202, "step": 32390 }, { "epoch": 0.7166269093644116, "grad_norm": 0.8884609937667847, "learning_rate": 3.7074442335106407e-06, "loss": 0.3684, "step": 32395 }, { "epoch": 0.7167375170059249, "grad_norm": 1.7029041051864624, "learning_rate": 3.704743922257473e-06, "loss": 0.3299, "step": 32400 }, { "epoch": 0.7168481246474382, "grad_norm": 0.5343486666679382, "learning_rate": 3.702044371154514e-06, "loss": 0.3307, "step": 32405 }, { "epoch": 0.7169587322889514, "grad_norm": 1.7018598318099976, "learning_rate": 3.6993455805277312e-06, "loss": 0.3898, "step": 32410 }, { "epoch": 0.7170693399304646, "grad_norm": 1.812604308128357, "learning_rate": 3.6966475507030033e-06, "loss": 0.3832, "step": 32415 }, { "epoch": 0.7171799475719779, "grad_norm": 0.8239447474479675, "learning_rate": 3.693950282006117e-06, "loss": 0.3697, "step": 32420 }, { "epoch": 0.7172905552134912, "grad_norm": 2.239624500274658, "learning_rate": 3.6912537747627662e-06, "loss": 0.2987, "step": 32425 }, { "epoch": 0.7174011628550044, "grad_norm": 0.4891189634799957, "learning_rate": 3.6885580292985544e-06, "loss": 0.2364, "step": 32430 }, { "epoch": 0.7175117704965177, "grad_norm": 1.6331247091293335, "learning_rate": 3.685863045938991e-06, "loss": 0.4131, "step": 32435 }, { "epoch": 0.717622378138031, "grad_norm": 0.429531455039978, "learning_rate": 3.6831688250094944e-06, "loss": 0.2591, "step": 32440 }, { "epoch": 0.7177329857795443, "grad_norm": 1.454746127128601, "learning_rate": 3.680475366835391e-06, "loss": 0.2618, "step": 32445 }, { "epoch": 0.7178435934210575, "grad_norm": 1.8494064807891846, "learning_rate": 3.677782671741915e-06, "loss": 0.3975, "step": 32450 }, { "epoch": 0.7179542010625708, "grad_norm": 1.1194648742675781, "learning_rate": 3.67509074005421e-06, "loss": 0.3229, "step": 32455 }, { "epoch": 0.718064808704084, "grad_norm": 1.5725700855255127, "learning_rate": 3.6723995720973236e-06, "loss": 0.5931, "step": 32460 }, { "epoch": 0.7181754163455972, "grad_norm": 1.7566272020339966, "learning_rate": 3.6697091681962148e-06, "loss": 0.4137, "step": 32465 }, { "epoch": 0.7182860239871105, "grad_norm": 0.8176954984664917, "learning_rate": 3.6670195286757492e-06, "loss": 0.4398, "step": 32470 }, { "epoch": 0.7183966316286238, "grad_norm": 1.3561185598373413, "learning_rate": 3.664330653860698e-06, "loss": 0.452, "step": 32475 }, { "epoch": 0.718507239270137, "grad_norm": 1.7252490520477295, "learning_rate": 3.6616425440757455e-06, "loss": 0.2766, "step": 32480 }, { "epoch": 0.7186178469116503, "grad_norm": 0.8663360476493835, "learning_rate": 3.658955199645476e-06, "loss": 0.3372, "step": 32485 }, { "epoch": 0.7187284545531636, "grad_norm": 1.1922364234924316, "learning_rate": 3.656268620894392e-06, "loss": 0.3107, "step": 32490 }, { "epoch": 0.7188390621946769, "grad_norm": 1.01238214969635, "learning_rate": 3.653582808146886e-06, "loss": 0.339, "step": 32495 }, { "epoch": 0.7189496698361901, "grad_norm": 1.0403751134872437, "learning_rate": 3.650897761727279e-06, "loss": 0.3697, "step": 32500 }, { "epoch": 0.7190602774777033, "grad_norm": 0.6788943409919739, "learning_rate": 3.648213481959787e-06, "loss": 0.3343, "step": 32505 }, { "epoch": 0.7191708851192166, "grad_norm": 2.0077590942382812, "learning_rate": 3.645529969168534e-06, "loss": 0.3218, "step": 32510 }, { "epoch": 0.7192814927607298, "grad_norm": 1.2162411212921143, "learning_rate": 3.6428472236775548e-06, "loss": 0.3315, "step": 32515 }, { "epoch": 0.7193921004022431, "grad_norm": 1.6490862369537354, "learning_rate": 3.6401652458107896e-06, "loss": 0.3334, "step": 32520 }, { "epoch": 0.7195027080437564, "grad_norm": 2.9800901412963867, "learning_rate": 3.637484035892087e-06, "loss": 0.2713, "step": 32525 }, { "epoch": 0.7196133156852696, "grad_norm": 2.3892199993133545, "learning_rate": 3.634803594245201e-06, "loss": 0.4003, "step": 32530 }, { "epoch": 0.7197239233267829, "grad_norm": 1.1489437818527222, "learning_rate": 3.6321239211937954e-06, "loss": 0.3569, "step": 32535 }, { "epoch": 0.7198345309682962, "grad_norm": 1.06971275806427, "learning_rate": 3.6294450170614404e-06, "loss": 0.2181, "step": 32540 }, { "epoch": 0.7199451386098095, "grad_norm": 1.9254977703094482, "learning_rate": 3.6267668821716107e-06, "loss": 0.5422, "step": 32545 }, { "epoch": 0.7200557462513227, "grad_norm": 0.9457816481590271, "learning_rate": 3.624089516847691e-06, "loss": 0.1665, "step": 32550 }, { "epoch": 0.7201663538928359, "grad_norm": 1.3190242052078247, "learning_rate": 3.6214129214129734e-06, "loss": 0.4361, "step": 32555 }, { "epoch": 0.7202769615343492, "grad_norm": 1.4053877592086792, "learning_rate": 3.6187370961906553e-06, "loss": 0.3031, "step": 32560 }, { "epoch": 0.7203875691758624, "grad_norm": 0.9825094938278198, "learning_rate": 3.6160620415038417e-06, "loss": 0.3037, "step": 32565 }, { "epoch": 0.7204981768173757, "grad_norm": 1.2537853717803955, "learning_rate": 3.613387757675545e-06, "loss": 0.4213, "step": 32570 }, { "epoch": 0.720608784458889, "grad_norm": 1.4952212572097778, "learning_rate": 3.6107142450286826e-06, "loss": 0.3905, "step": 32575 }, { "epoch": 0.7207193921004023, "grad_norm": 0.6781617999076843, "learning_rate": 3.6080415038860828e-06, "loss": 0.3423, "step": 32580 }, { "epoch": 0.7208299997419155, "grad_norm": 1.1359144449234009, "learning_rate": 3.6053695345704763e-06, "loss": 0.2988, "step": 32585 }, { "epoch": 0.7209406073834288, "grad_norm": 1.0123214721679688, "learning_rate": 3.6026983374045066e-06, "loss": 0.22, "step": 32590 }, { "epoch": 0.7210512150249421, "grad_norm": 0.8977499008178711, "learning_rate": 3.6000279127107096e-06, "loss": 0.4278, "step": 32595 }, { "epoch": 0.7211618226664552, "grad_norm": 1.1961400508880615, "learning_rate": 3.597358260811549e-06, "loss": 0.2271, "step": 32600 }, { "epoch": 0.7212724303079685, "grad_norm": 1.5338122844696045, "learning_rate": 3.594689382029384e-06, "loss": 0.3112, "step": 32605 }, { "epoch": 0.7213830379494818, "grad_norm": 1.0130054950714111, "learning_rate": 3.5920212766864704e-06, "loss": 0.2548, "step": 32610 }, { "epoch": 0.721493645590995, "grad_norm": 0.565284252166748, "learning_rate": 3.589353945104992e-06, "loss": 0.2013, "step": 32615 }, { "epoch": 0.7216042532325083, "grad_norm": 0.6276851296424866, "learning_rate": 3.5866873876070253e-06, "loss": 0.3971, "step": 32620 }, { "epoch": 0.7217148608740216, "grad_norm": 1.1847386360168457, "learning_rate": 3.584021604514554e-06, "loss": 0.3507, "step": 32625 }, { "epoch": 0.7218254685155349, "grad_norm": 0.6569443345069885, "learning_rate": 3.5813565961494723e-06, "loss": 0.3406, "step": 32630 }, { "epoch": 0.7219360761570481, "grad_norm": 0.5999816060066223, "learning_rate": 3.578692362833579e-06, "loss": 0.3555, "step": 32635 }, { "epoch": 0.7220466837985614, "grad_norm": 1.3063921928405762, "learning_rate": 3.57602890488858e-06, "loss": 0.4008, "step": 32640 }, { "epoch": 0.7221572914400747, "grad_norm": 0.8451093435287476, "learning_rate": 3.5733662226360854e-06, "loss": 0.4581, "step": 32645 }, { "epoch": 0.7222678990815878, "grad_norm": 0.8524464964866638, "learning_rate": 3.570704316397614e-06, "loss": 0.2729, "step": 32650 }, { "epoch": 0.7223785067231011, "grad_norm": 0.8323779106140137, "learning_rate": 3.5680431864945907e-06, "loss": 0.326, "step": 32655 }, { "epoch": 0.7224891143646144, "grad_norm": 0.8823177218437195, "learning_rate": 3.5653828332483463e-06, "loss": 0.3857, "step": 32660 }, { "epoch": 0.7225997220061277, "grad_norm": 0.8179072141647339, "learning_rate": 3.562723256980116e-06, "loss": 0.4037, "step": 32665 }, { "epoch": 0.7227103296476409, "grad_norm": 0.9453499913215637, "learning_rate": 3.5600644580110445e-06, "loss": 0.4008, "step": 32670 }, { "epoch": 0.7228209372891542, "grad_norm": 1.4324322938919067, "learning_rate": 3.5574064366621795e-06, "loss": 0.3947, "step": 32675 }, { "epoch": 0.7229315449306675, "grad_norm": 2.4503934383392334, "learning_rate": 3.554749193254479e-06, "loss": 0.4778, "step": 32680 }, { "epoch": 0.7230421525721807, "grad_norm": 1.3117600679397583, "learning_rate": 3.5520927281088003e-06, "loss": 0.3671, "step": 32685 }, { "epoch": 0.723152760213694, "grad_norm": 1.3686795234680176, "learning_rate": 3.5494370415459134e-06, "loss": 0.3615, "step": 32690 }, { "epoch": 0.7232633678552072, "grad_norm": 1.1111679077148438, "learning_rate": 3.5467821338864915e-06, "loss": 0.3679, "step": 32695 }, { "epoch": 0.7233739754967204, "grad_norm": 0.969403088092804, "learning_rate": 3.5441280054511127e-06, "loss": 0.3997, "step": 32700 }, { "epoch": 0.7234845831382337, "grad_norm": 0.7214536070823669, "learning_rate": 3.5414746565602664e-06, "loss": 0.2453, "step": 32705 }, { "epoch": 0.723595190779747, "grad_norm": 0.9806076288223267, "learning_rate": 3.538822087534334e-06, "loss": 0.3277, "step": 32710 }, { "epoch": 0.7237057984212603, "grad_norm": 1.4750267267227173, "learning_rate": 3.5361702986936206e-06, "loss": 0.4547, "step": 32715 }, { "epoch": 0.7238164060627735, "grad_norm": 0.4128682613372803, "learning_rate": 3.533519290358328e-06, "loss": 0.1796, "step": 32720 }, { "epoch": 0.7239270137042868, "grad_norm": 1.4405301809310913, "learning_rate": 3.5308690628485633e-06, "loss": 0.4225, "step": 32725 }, { "epoch": 0.7240376213458001, "grad_norm": 1.3665688037872314, "learning_rate": 3.5282196164843397e-06, "loss": 0.3721, "step": 32730 }, { "epoch": 0.7241482289873133, "grad_norm": 1.5478657484054565, "learning_rate": 3.5255709515855784e-06, "loss": 0.2596, "step": 32735 }, { "epoch": 0.7242588366288266, "grad_norm": 1.3792270421981812, "learning_rate": 3.522923068472106e-06, "loss": 0.2908, "step": 32740 }, { "epoch": 0.7243694442703398, "grad_norm": 0.7669662833213806, "learning_rate": 3.52027596746365e-06, "loss": 0.3346, "step": 32745 }, { "epoch": 0.724480051911853, "grad_norm": 0.6930635571479797, "learning_rate": 3.51762964887985e-06, "loss": 0.309, "step": 32750 }, { "epoch": 0.7245906595533663, "grad_norm": 0.8375031352043152, "learning_rate": 3.5149841130402486e-06, "loss": 0.3888, "step": 32755 }, { "epoch": 0.7247012671948796, "grad_norm": 1.1845148801803589, "learning_rate": 3.51233936026429e-06, "loss": 0.3788, "step": 32760 }, { "epoch": 0.7248118748363929, "grad_norm": 1.6328061819076538, "learning_rate": 3.5096953908713306e-06, "loss": 0.4757, "step": 32765 }, { "epoch": 0.7249224824779061, "grad_norm": 0.8084607124328613, "learning_rate": 3.507052205180627e-06, "loss": 0.3617, "step": 32770 }, { "epoch": 0.7250330901194194, "grad_norm": 1.3601657152175903, "learning_rate": 3.504409803511344e-06, "loss": 0.4297, "step": 32775 }, { "epoch": 0.7251436977609327, "grad_norm": 0.45929187536239624, "learning_rate": 3.5017681861825505e-06, "loss": 0.2782, "step": 32780 }, { "epoch": 0.725254305402446, "grad_norm": 0.8953861594200134, "learning_rate": 3.4991273535132208e-06, "loss": 0.3462, "step": 32785 }, { "epoch": 0.7253649130439591, "grad_norm": 1.5205235481262207, "learning_rate": 3.496487305822234e-06, "loss": 0.3644, "step": 32790 }, { "epoch": 0.7254755206854724, "grad_norm": 2.5780224800109863, "learning_rate": 3.493848043428376e-06, "loss": 0.5041, "step": 32795 }, { "epoch": 0.7255861283269857, "grad_norm": 0.8519768118858337, "learning_rate": 3.491209566650338e-06, "loss": 0.2784, "step": 32800 }, { "epoch": 0.7256967359684989, "grad_norm": 1.2170488834381104, "learning_rate": 3.488571875806717e-06, "loss": 0.2845, "step": 32805 }, { "epoch": 0.7258073436100122, "grad_norm": 1.222214937210083, "learning_rate": 3.4859349712160017e-06, "loss": 0.3033, "step": 32810 }, { "epoch": 0.7259179512515255, "grad_norm": 1.2851771116256714, "learning_rate": 3.4832988531966117e-06, "loss": 0.258, "step": 32815 }, { "epoch": 0.7260285588930387, "grad_norm": 1.5209630727767944, "learning_rate": 3.4806635220668538e-06, "loss": 0.332, "step": 32820 }, { "epoch": 0.726139166534552, "grad_norm": 0.6729066371917725, "learning_rate": 3.478028978144935e-06, "loss": 0.2021, "step": 32825 }, { "epoch": 0.7262497741760653, "grad_norm": 2.0701489448547363, "learning_rate": 3.475395221748986e-06, "loss": 0.4997, "step": 32830 }, { "epoch": 0.7263603818175784, "grad_norm": 1.2249354124069214, "learning_rate": 3.4727622531970273e-06, "loss": 0.3931, "step": 32835 }, { "epoch": 0.7264709894590917, "grad_norm": 1.2789827585220337, "learning_rate": 3.470130072806991e-06, "loss": 0.2672, "step": 32840 }, { "epoch": 0.726581597100605, "grad_norm": 0.9798678755760193, "learning_rate": 3.4674986808967103e-06, "loss": 0.2613, "step": 32845 }, { "epoch": 0.7266922047421183, "grad_norm": 1.5948766469955444, "learning_rate": 3.464868077783925e-06, "loss": 0.3176, "step": 32850 }, { "epoch": 0.7268028123836315, "grad_norm": 1.2298479080200195, "learning_rate": 3.4622382637862804e-06, "loss": 0.3607, "step": 32855 }, { "epoch": 0.7269134200251448, "grad_norm": 0.9886939525604248, "learning_rate": 3.4596092392213264e-06, "loss": 0.3111, "step": 32860 }, { "epoch": 0.7270240276666581, "grad_norm": 1.6997488737106323, "learning_rate": 3.4569810044065155e-06, "loss": 0.2848, "step": 32865 }, { "epoch": 0.7271346353081714, "grad_norm": 1.0337488651275635, "learning_rate": 3.454353559659207e-06, "loss": 0.184, "step": 32870 }, { "epoch": 0.7272452429496846, "grad_norm": 1.1733195781707764, "learning_rate": 3.4517269052966627e-06, "loss": 0.3069, "step": 32875 }, { "epoch": 0.7273558505911979, "grad_norm": 1.7039180994033813, "learning_rate": 3.4491010416360527e-06, "loss": 0.3012, "step": 32880 }, { "epoch": 0.727466458232711, "grad_norm": 1.065312147140503, "learning_rate": 3.446475968994447e-06, "loss": 0.394, "step": 32885 }, { "epoch": 0.7275770658742243, "grad_norm": 1.1225683689117432, "learning_rate": 3.443851687688824e-06, "loss": 0.3239, "step": 32890 }, { "epoch": 0.7276876735157376, "grad_norm": 0.849884569644928, "learning_rate": 3.441228198036064e-06, "loss": 0.36, "step": 32895 }, { "epoch": 0.7277982811572509, "grad_norm": 1.3406989574432373, "learning_rate": 3.438605500352952e-06, "loss": 0.4267, "step": 32900 }, { "epoch": 0.7279088887987641, "grad_norm": 1.559859037399292, "learning_rate": 3.4359835949561793e-06, "loss": 0.3357, "step": 32905 }, { "epoch": 0.7280194964402774, "grad_norm": 0.6204971671104431, "learning_rate": 3.4333624821623378e-06, "loss": 0.2674, "step": 32910 }, { "epoch": 0.7281301040817907, "grad_norm": 0.6441110372543335, "learning_rate": 3.430742162287929e-06, "loss": 0.3532, "step": 32915 }, { "epoch": 0.728240711723304, "grad_norm": 0.8522050976753235, "learning_rate": 3.428122635649358e-06, "loss": 0.2354, "step": 32920 }, { "epoch": 0.7283513193648172, "grad_norm": 1.5072919130325317, "learning_rate": 3.42550390256292e-06, "loss": 0.2814, "step": 32925 }, { "epoch": 0.7284619270063304, "grad_norm": 1.561497688293457, "learning_rate": 3.4228859633448384e-06, "loss": 0.3052, "step": 32930 }, { "epoch": 0.7285725346478437, "grad_norm": 0.4959414601325989, "learning_rate": 3.420268818311225e-06, "loss": 0.3267, "step": 32935 }, { "epoch": 0.7286831422893569, "grad_norm": 0.5230057239532471, "learning_rate": 3.417652467778099e-06, "loss": 0.2355, "step": 32940 }, { "epoch": 0.7287937499308702, "grad_norm": 0.9757082462310791, "learning_rate": 3.4150369120613823e-06, "loss": 0.232, "step": 32945 }, { "epoch": 0.7289043575723835, "grad_norm": 0.7398083209991455, "learning_rate": 3.412422151476905e-06, "loss": 0.3282, "step": 32950 }, { "epoch": 0.7290149652138967, "grad_norm": 1.175371527671814, "learning_rate": 3.4098081863403978e-06, "loss": 0.2291, "step": 32955 }, { "epoch": 0.72912557285541, "grad_norm": 0.6341686844825745, "learning_rate": 3.407195016967495e-06, "loss": 0.2216, "step": 32960 }, { "epoch": 0.7292361804969233, "grad_norm": 0.9654957056045532, "learning_rate": 3.404582643673736e-06, "loss": 0.3986, "step": 32965 }, { "epoch": 0.7293467881384366, "grad_norm": 0.707962155342102, "learning_rate": 3.4019710667745664e-06, "loss": 0.2815, "step": 32970 }, { "epoch": 0.7294573957799498, "grad_norm": 1.2760438919067383, "learning_rate": 3.3993602865853313e-06, "loss": 0.4505, "step": 32975 }, { "epoch": 0.729568003421463, "grad_norm": 1.5088510513305664, "learning_rate": 3.3967503034212812e-06, "loss": 0.3639, "step": 32980 }, { "epoch": 0.7296786110629763, "grad_norm": 1.7810368537902832, "learning_rate": 3.3941411175975726e-06, "loss": 0.5233, "step": 32985 }, { "epoch": 0.7297892187044895, "grad_norm": 1.6537026166915894, "learning_rate": 3.391532729429262e-06, "loss": 0.3044, "step": 32990 }, { "epoch": 0.7298998263460028, "grad_norm": 1.7223764657974243, "learning_rate": 3.3889251392313138e-06, "loss": 0.2987, "step": 32995 }, { "epoch": 0.7300104339875161, "grad_norm": 0.8813601136207581, "learning_rate": 3.3863183473185914e-06, "loss": 0.3193, "step": 33000 }, { "epoch": 0.7301210416290294, "grad_norm": 1.2515238523483276, "learning_rate": 3.383712354005865e-06, "loss": 0.2653, "step": 33005 }, { "epoch": 0.7302316492705426, "grad_norm": 1.0567312240600586, "learning_rate": 3.381107159607807e-06, "loss": 0.417, "step": 33010 }, { "epoch": 0.7303422569120559, "grad_norm": 1.2104113101959229, "learning_rate": 3.3785027644389945e-06, "loss": 0.3184, "step": 33015 }, { "epoch": 0.7304528645535692, "grad_norm": 1.458774209022522, "learning_rate": 3.37589916881391e-06, "loss": 0.3053, "step": 33020 }, { "epoch": 0.7305634721950823, "grad_norm": 1.8124854564666748, "learning_rate": 3.373296373046928e-06, "loss": 0.2791, "step": 33025 }, { "epoch": 0.7306740798365956, "grad_norm": 0.7763404846191406, "learning_rate": 3.3706943774523436e-06, "loss": 0.2092, "step": 33030 }, { "epoch": 0.7307846874781089, "grad_norm": 1.6028881072998047, "learning_rate": 3.36809318234435e-06, "loss": 0.2552, "step": 33035 }, { "epoch": 0.7308952951196221, "grad_norm": 1.420248031616211, "learning_rate": 3.365492788037027e-06, "loss": 0.3443, "step": 33040 }, { "epoch": 0.7310059027611354, "grad_norm": 0.8180923461914062, "learning_rate": 3.362893194844383e-06, "loss": 0.3423, "step": 33045 }, { "epoch": 0.7311165104026487, "grad_norm": 1.0136311054229736, "learning_rate": 3.360294403080315e-06, "loss": 0.3676, "step": 33050 }, { "epoch": 0.731227118044162, "grad_norm": 0.5938013195991516, "learning_rate": 3.3576964130586266e-06, "loss": 0.2948, "step": 33055 }, { "epoch": 0.7313377256856752, "grad_norm": 1.1523559093475342, "learning_rate": 3.3550992250930236e-06, "loss": 0.3149, "step": 33060 }, { "epoch": 0.7314483333271885, "grad_norm": 1.0363926887512207, "learning_rate": 3.352502839497116e-06, "loss": 0.2846, "step": 33065 }, { "epoch": 0.7315589409687018, "grad_norm": 1.1935211420059204, "learning_rate": 3.349907256584416e-06, "loss": 0.296, "step": 33070 }, { "epoch": 0.7316695486102149, "grad_norm": 1.565064549446106, "learning_rate": 3.3473124766683396e-06, "loss": 0.4244, "step": 33075 }, { "epoch": 0.7317801562517282, "grad_norm": 1.0072988271713257, "learning_rate": 3.3447185000622062e-06, "loss": 0.41, "step": 33080 }, { "epoch": 0.7318907638932415, "grad_norm": 1.3290550708770752, "learning_rate": 3.3421253270792376e-06, "loss": 0.3367, "step": 33085 }, { "epoch": 0.7320013715347548, "grad_norm": 1.5294723510742188, "learning_rate": 3.3395329580325577e-06, "loss": 0.4235, "step": 33090 }, { "epoch": 0.732111979176268, "grad_norm": 1.2589300870895386, "learning_rate": 3.3369413932351955e-06, "loss": 0.4916, "step": 33095 }, { "epoch": 0.7322225868177813, "grad_norm": 1.3901880979537964, "learning_rate": 3.3343506330000806e-06, "loss": 0.4367, "step": 33100 }, { "epoch": 0.7323331944592946, "grad_norm": 1.0268442630767822, "learning_rate": 3.331760677640047e-06, "loss": 0.2949, "step": 33105 }, { "epoch": 0.7324438021008078, "grad_norm": 0.7290223836898804, "learning_rate": 3.3291715274678315e-06, "loss": 0.3333, "step": 33110 }, { "epoch": 0.7325544097423211, "grad_norm": 1.3119170665740967, "learning_rate": 3.3265831827960715e-06, "loss": 0.2761, "step": 33115 }, { "epoch": 0.7326650173838343, "grad_norm": 1.7088216543197632, "learning_rate": 3.3239956439373112e-06, "loss": 0.3728, "step": 33120 }, { "epoch": 0.7327756250253475, "grad_norm": 0.6166365146636963, "learning_rate": 3.3214089112039925e-06, "loss": 0.398, "step": 33125 }, { "epoch": 0.7328862326668608, "grad_norm": 3.3906543254852295, "learning_rate": 3.318822984908465e-06, "loss": 0.3657, "step": 33130 }, { "epoch": 0.7329968403083741, "grad_norm": 1.2810852527618408, "learning_rate": 3.31623786536298e-06, "loss": 0.4325, "step": 33135 }, { "epoch": 0.7331074479498874, "grad_norm": 1.4522823095321655, "learning_rate": 3.3136535528796808e-06, "loss": 0.3251, "step": 33140 }, { "epoch": 0.7332180555914006, "grad_norm": 1.0512816905975342, "learning_rate": 3.3110700477706325e-06, "loss": 0.2866, "step": 33145 }, { "epoch": 0.7333286632329139, "grad_norm": 1.1206494569778442, "learning_rate": 3.308487350347792e-06, "loss": 0.3507, "step": 33150 }, { "epoch": 0.7334392708744272, "grad_norm": 2.1682956218719482, "learning_rate": 3.305905460923009e-06, "loss": 0.2769, "step": 33155 }, { "epoch": 0.7335498785159404, "grad_norm": 1.5068072080612183, "learning_rate": 3.3033243798080574e-06, "loss": 0.3565, "step": 33160 }, { "epoch": 0.7336604861574537, "grad_norm": 1.3538662195205688, "learning_rate": 3.300744107314596e-06, "loss": 0.3051, "step": 33165 }, { "epoch": 0.7337710937989669, "grad_norm": 0.6977583169937134, "learning_rate": 3.2981646437541937e-06, "loss": 0.3588, "step": 33170 }, { "epoch": 0.7338817014404801, "grad_norm": 1.6606518030166626, "learning_rate": 3.2955859894383202e-06, "loss": 0.3888, "step": 33175 }, { "epoch": 0.7339923090819934, "grad_norm": 1.4077811241149902, "learning_rate": 3.293008144678346e-06, "loss": 0.2523, "step": 33180 }, { "epoch": 0.7341029167235067, "grad_norm": 0.8998649716377258, "learning_rate": 3.2904311097855447e-06, "loss": 0.4471, "step": 33185 }, { "epoch": 0.73421352436502, "grad_norm": 1.1129329204559326, "learning_rate": 3.2878548850710945e-06, "loss": 0.2777, "step": 33190 }, { "epoch": 0.7343241320065332, "grad_norm": 1.0904213190078735, "learning_rate": 3.2852794708460724e-06, "loss": 0.4101, "step": 33195 }, { "epoch": 0.7344347396480465, "grad_norm": 1.157449722290039, "learning_rate": 3.282704867421459e-06, "loss": 0.2669, "step": 33200 }, { "epoch": 0.7345453472895598, "grad_norm": 0.7456679940223694, "learning_rate": 3.2801310751081372e-06, "loss": 0.306, "step": 33205 }, { "epoch": 0.734655954931073, "grad_norm": 2.0214052200317383, "learning_rate": 3.2775580942168904e-06, "loss": 0.2872, "step": 33210 }, { "epoch": 0.7347665625725862, "grad_norm": 0.8490930199623108, "learning_rate": 3.2749859250584072e-06, "loss": 0.3778, "step": 33215 }, { "epoch": 0.7348771702140995, "grad_norm": 1.276064395904541, "learning_rate": 3.272414567943275e-06, "loss": 0.289, "step": 33220 }, { "epoch": 0.7349877778556128, "grad_norm": 0.9723585247993469, "learning_rate": 3.269844023181985e-06, "loss": 0.2504, "step": 33225 }, { "epoch": 0.735098385497126, "grad_norm": 2.354609966278076, "learning_rate": 3.2672742910849274e-06, "loss": 0.3326, "step": 33230 }, { "epoch": 0.7352089931386393, "grad_norm": 0.9305600523948669, "learning_rate": 3.2647053719624034e-06, "loss": 0.2649, "step": 33235 }, { "epoch": 0.7353196007801526, "grad_norm": 1.5424895286560059, "learning_rate": 3.2621372661245976e-06, "loss": 0.4474, "step": 33240 }, { "epoch": 0.7354302084216658, "grad_norm": 0.8612196445465088, "learning_rate": 3.259569973881617e-06, "loss": 0.3858, "step": 33245 }, { "epoch": 0.7355408160631791, "grad_norm": 1.385136365890503, "learning_rate": 3.2570034955434627e-06, "loss": 0.3455, "step": 33250 }, { "epoch": 0.7356514237046924, "grad_norm": 0.8338227868080139, "learning_rate": 3.2544378314200253e-06, "loss": 0.3885, "step": 33255 }, { "epoch": 0.7357620313462057, "grad_norm": 1.2177263498306274, "learning_rate": 3.251872981821119e-06, "loss": 0.3743, "step": 33260 }, { "epoch": 0.7358726389877188, "grad_norm": 1.1426998376846313, "learning_rate": 3.2493089470564445e-06, "loss": 0.3043, "step": 33265 }, { "epoch": 0.7359832466292321, "grad_norm": 0.8745356202125549, "learning_rate": 3.246745727435607e-06, "loss": 0.3916, "step": 33270 }, { "epoch": 0.7360938542707454, "grad_norm": 1.497873306274414, "learning_rate": 3.2441833232681164e-06, "loss": 0.411, "step": 33275 }, { "epoch": 0.7362044619122586, "grad_norm": 1.2731459140777588, "learning_rate": 3.2416217348633827e-06, "loss": 0.25, "step": 33280 }, { "epoch": 0.7363150695537719, "grad_norm": 0.9474956393241882, "learning_rate": 3.2390609625307145e-06, "loss": 0.2327, "step": 33285 }, { "epoch": 0.7364256771952852, "grad_norm": 1.885467529296875, "learning_rate": 3.2365010065793255e-06, "loss": 0.3415, "step": 33290 }, { "epoch": 0.7365362848367984, "grad_norm": 0.9185181260108948, "learning_rate": 3.23394186731833e-06, "loss": 0.2966, "step": 33295 }, { "epoch": 0.7366468924783117, "grad_norm": 1.012673020362854, "learning_rate": 3.231383545056743e-06, "loss": 0.2163, "step": 33300 }, { "epoch": 0.736757500119825, "grad_norm": 1.3880035877227783, "learning_rate": 3.228826040103481e-06, "loss": 0.4846, "step": 33305 }, { "epoch": 0.7368681077613382, "grad_norm": 1.6689207553863525, "learning_rate": 3.2262693527673616e-06, "loss": 0.3154, "step": 33310 }, { "epoch": 0.7369787154028514, "grad_norm": 0.9464114904403687, "learning_rate": 3.2237134833571036e-06, "loss": 0.3308, "step": 33315 }, { "epoch": 0.7370893230443647, "grad_norm": 0.46502596139907837, "learning_rate": 3.2211584321813296e-06, "loss": 0.1451, "step": 33320 }, { "epoch": 0.737199930685878, "grad_norm": 0.8958333134651184, "learning_rate": 3.2186041995485594e-06, "loss": 0.376, "step": 33325 }, { "epoch": 0.7373105383273912, "grad_norm": 1.1720123291015625, "learning_rate": 3.216050785767215e-06, "loss": 0.2858, "step": 33330 }, { "epoch": 0.7374211459689045, "grad_norm": 1.0819352865219116, "learning_rate": 3.2134981911456233e-06, "loss": 0.2445, "step": 33335 }, { "epoch": 0.7375317536104178, "grad_norm": 0.5467990636825562, "learning_rate": 3.2109464159920056e-06, "loss": 0.3054, "step": 33340 }, { "epoch": 0.7376423612519311, "grad_norm": 1.5558534860610962, "learning_rate": 3.2083954606144908e-06, "loss": 0.2522, "step": 33345 }, { "epoch": 0.7377529688934443, "grad_norm": 1.085801124572754, "learning_rate": 3.2058453253211088e-06, "loss": 0.4075, "step": 33350 }, { "epoch": 0.7378635765349576, "grad_norm": 2.0612897872924805, "learning_rate": 3.2032960104197764e-06, "loss": 0.3526, "step": 33355 }, { "epoch": 0.7379741841764708, "grad_norm": 1.5271456241607666, "learning_rate": 3.2007475162183334e-06, "loss": 0.5503, "step": 33360 }, { "epoch": 0.738084791817984, "grad_norm": 0.1725873351097107, "learning_rate": 3.19819984302451e-06, "loss": 0.3279, "step": 33365 }, { "epoch": 0.7381953994594973, "grad_norm": 1.1034539937973022, "learning_rate": 3.195652991145927e-06, "loss": 0.3498, "step": 33370 }, { "epoch": 0.7383060071010106, "grad_norm": 0.7734230160713196, "learning_rate": 3.1931069608901243e-06, "loss": 0.3642, "step": 33375 }, { "epoch": 0.7384166147425238, "grad_norm": 1.0296491384506226, "learning_rate": 3.190561752564533e-06, "loss": 0.2899, "step": 33380 }, { "epoch": 0.7385272223840371, "grad_norm": 1.5201542377471924, "learning_rate": 3.1880173664764847e-06, "loss": 0.293, "step": 33385 }, { "epoch": 0.7386378300255504, "grad_norm": 1.4147757291793823, "learning_rate": 3.1854738029332146e-06, "loss": 0.3528, "step": 33390 }, { "epoch": 0.7387484376670637, "grad_norm": 1.0549345016479492, "learning_rate": 3.182931062241856e-06, "loss": 0.2895, "step": 33395 }, { "epoch": 0.7388590453085769, "grad_norm": 1.205102562904358, "learning_rate": 3.180389144709445e-06, "loss": 0.3877, "step": 33400 }, { "epoch": 0.7389696529500901, "grad_norm": 1.4222841262817383, "learning_rate": 3.1778480506429165e-06, "loss": 0.3225, "step": 33405 }, { "epoch": 0.7390802605916034, "grad_norm": 0.40749701857566833, "learning_rate": 3.1753077803491073e-06, "loss": 0.26, "step": 33410 }, { "epoch": 0.7391908682331166, "grad_norm": 1.081725835800171, "learning_rate": 3.172768334134755e-06, "loss": 0.2579, "step": 33415 }, { "epoch": 0.7393014758746299, "grad_norm": 0.8278363943099976, "learning_rate": 3.170229712306495e-06, "loss": 0.4243, "step": 33420 }, { "epoch": 0.7394120835161432, "grad_norm": 1.393999695777893, "learning_rate": 3.167691915170866e-06, "loss": 0.3588, "step": 33425 }, { "epoch": 0.7395226911576565, "grad_norm": 0.7472971677780151, "learning_rate": 3.1651549430343077e-06, "loss": 0.2276, "step": 33430 }, { "epoch": 0.7396332987991697, "grad_norm": 1.068407654762268, "learning_rate": 3.162618796203156e-06, "loss": 0.3501, "step": 33435 }, { "epoch": 0.739743906440683, "grad_norm": 1.3044408559799194, "learning_rate": 3.1600834749836518e-06, "loss": 0.3816, "step": 33440 }, { "epoch": 0.7398545140821963, "grad_norm": 0.46114927530288696, "learning_rate": 3.157548979681935e-06, "loss": 0.2578, "step": 33445 }, { "epoch": 0.7399651217237095, "grad_norm": 1.1901675462722778, "learning_rate": 3.155015310604046e-06, "loss": 0.2423, "step": 33450 }, { "epoch": 0.7400757293652227, "grad_norm": 0.5516652464866638, "learning_rate": 3.152482468055915e-06, "loss": 0.3296, "step": 33455 }, { "epoch": 0.740186337006736, "grad_norm": 0.7632186412811279, "learning_rate": 3.1499504523433945e-06, "loss": 0.3959, "step": 33460 }, { "epoch": 0.7402969446482492, "grad_norm": 1.2626420259475708, "learning_rate": 3.147419263772221e-06, "loss": 0.286, "step": 33465 }, { "epoch": 0.7404075522897625, "grad_norm": 0.8586310148239136, "learning_rate": 3.144888902648028e-06, "loss": 0.2833, "step": 33470 }, { "epoch": 0.7405181599312758, "grad_norm": 0.94626384973526, "learning_rate": 3.1423593692763645e-06, "loss": 0.328, "step": 33475 }, { "epoch": 0.7406287675727891, "grad_norm": 1.0402873754501343, "learning_rate": 3.1398306639626664e-06, "loss": 0.3257, "step": 33480 }, { "epoch": 0.7407393752143023, "grad_norm": 1.8031483888626099, "learning_rate": 3.137302787012276e-06, "loss": 0.4201, "step": 33485 }, { "epoch": 0.7408499828558156, "grad_norm": 0.7094330787658691, "learning_rate": 3.1347757387304324e-06, "loss": 0.3154, "step": 33490 }, { "epoch": 0.7409605904973289, "grad_norm": 0.8477993011474609, "learning_rate": 3.1322495194222756e-06, "loss": 0.2171, "step": 33495 }, { "epoch": 0.741071198138842, "grad_norm": 1.346130132675171, "learning_rate": 3.129724129392847e-06, "loss": 0.2445, "step": 33500 }, { "epoch": 0.7411818057803553, "grad_norm": 0.6697807908058167, "learning_rate": 3.127199568947087e-06, "loss": 0.3563, "step": 33505 }, { "epoch": 0.7412924134218686, "grad_norm": 1.283226490020752, "learning_rate": 3.1246758383898335e-06, "loss": 0.4116, "step": 33510 }, { "epoch": 0.7414030210633819, "grad_norm": 1.058613657951355, "learning_rate": 3.1221529380258274e-06, "loss": 0.3543, "step": 33515 }, { "epoch": 0.7415136287048951, "grad_norm": 0.9134521484375, "learning_rate": 3.1196308681597075e-06, "loss": 0.2373, "step": 33520 }, { "epoch": 0.7416242363464084, "grad_norm": 1.6221317052841187, "learning_rate": 3.1171096290960144e-06, "loss": 0.4458, "step": 33525 }, { "epoch": 0.7417348439879217, "grad_norm": 0.8495303988456726, "learning_rate": 3.114589221139186e-06, "loss": 0.2969, "step": 33530 }, { "epoch": 0.7418454516294349, "grad_norm": 1.3002151250839233, "learning_rate": 3.1120696445935607e-06, "loss": 0.3893, "step": 33535 }, { "epoch": 0.7419560592709482, "grad_norm": 1.3346424102783203, "learning_rate": 3.1095508997633763e-06, "loss": 0.435, "step": 33540 }, { "epoch": 0.7420666669124614, "grad_norm": 1.2475687265396118, "learning_rate": 3.1070329869527703e-06, "loss": 0.3911, "step": 33545 }, { "epoch": 0.7421772745539746, "grad_norm": 0.5745762586593628, "learning_rate": 3.1045159064657804e-06, "loss": 0.329, "step": 33550 }, { "epoch": 0.7422878821954879, "grad_norm": 1.2205965518951416, "learning_rate": 3.101999658606344e-06, "loss": 0.3598, "step": 33555 }, { "epoch": 0.7423984898370012, "grad_norm": 1.4774088859558105, "learning_rate": 3.0994842436782958e-06, "loss": 0.2998, "step": 33560 }, { "epoch": 0.7425090974785145, "grad_norm": 1.4064669609069824, "learning_rate": 3.096969661985376e-06, "loss": 0.3754, "step": 33565 }, { "epoch": 0.7426197051200277, "grad_norm": 0.9477702379226685, "learning_rate": 3.094455913831208e-06, "loss": 0.2746, "step": 33570 }, { "epoch": 0.742730312761541, "grad_norm": 0.7337713241577148, "learning_rate": 3.0919429995193386e-06, "loss": 0.1718, "step": 33575 }, { "epoch": 0.7428409204030543, "grad_norm": 0.9596565365791321, "learning_rate": 3.089430919353198e-06, "loss": 0.4032, "step": 33580 }, { "epoch": 0.7429515280445675, "grad_norm": 1.1370480060577393, "learning_rate": 3.086919673636113e-06, "loss": 0.3186, "step": 33585 }, { "epoch": 0.7430621356860808, "grad_norm": 1.053529143333435, "learning_rate": 3.084409262671322e-06, "loss": 0.2373, "step": 33590 }, { "epoch": 0.743172743327594, "grad_norm": 1.5080325603485107, "learning_rate": 3.0818996867619557e-06, "loss": 0.3016, "step": 33595 }, { "epoch": 0.7432833509691072, "grad_norm": 0.8269264698028564, "learning_rate": 3.0793909462110426e-06, "loss": 0.3718, "step": 33600 }, { "epoch": 0.7433939586106205, "grad_norm": 1.7411754131317139, "learning_rate": 3.0768830413215146e-06, "loss": 0.3407, "step": 33605 }, { "epoch": 0.7435045662521338, "grad_norm": 1.3725465536117554, "learning_rate": 3.0743759723961986e-06, "loss": 0.411, "step": 33610 }, { "epoch": 0.7436151738936471, "grad_norm": 1.120438575744629, "learning_rate": 3.0718697397378227e-06, "loss": 0.3187, "step": 33615 }, { "epoch": 0.7437257815351603, "grad_norm": 1.4430952072143555, "learning_rate": 3.069364343649015e-06, "loss": 0.3966, "step": 33620 }, { "epoch": 0.7438363891766736, "grad_norm": 1.194082260131836, "learning_rate": 3.066859784432299e-06, "loss": 0.3152, "step": 33625 }, { "epoch": 0.7439469968181869, "grad_norm": 2.5865886211395264, "learning_rate": 3.0643560623901013e-06, "loss": 0.3962, "step": 33630 }, { "epoch": 0.7440576044597002, "grad_norm": 1.5668219327926636, "learning_rate": 3.061853177824745e-06, "loss": 0.3356, "step": 33635 }, { "epoch": 0.7441682121012133, "grad_norm": 1.1523157358169556, "learning_rate": 3.059351131038453e-06, "loss": 0.4315, "step": 33640 }, { "epoch": 0.7442788197427266, "grad_norm": 1.3326908349990845, "learning_rate": 3.0568499223333457e-06, "loss": 0.2599, "step": 33645 }, { "epoch": 0.7443894273842399, "grad_norm": 2.4646503925323486, "learning_rate": 3.054349552011444e-06, "loss": 0.4107, "step": 33650 }, { "epoch": 0.7445000350257531, "grad_norm": 0.6226585507392883, "learning_rate": 3.051850020374667e-06, "loss": 0.1739, "step": 33655 }, { "epoch": 0.7446106426672664, "grad_norm": 0.892366886138916, "learning_rate": 3.049351327724832e-06, "loss": 0.3413, "step": 33660 }, { "epoch": 0.7447212503087797, "grad_norm": 0.937661349773407, "learning_rate": 3.046853474363658e-06, "loss": 0.2757, "step": 33665 }, { "epoch": 0.7448318579502929, "grad_norm": 3.417371988296509, "learning_rate": 3.0443564605927524e-06, "loss": 0.1764, "step": 33670 }, { "epoch": 0.7449424655918062, "grad_norm": 0.7511801719665527, "learning_rate": 3.041860286713636e-06, "loss": 0.3309, "step": 33675 }, { "epoch": 0.7450530732333195, "grad_norm": 1.2035385370254517, "learning_rate": 3.0393649530277216e-06, "loss": 0.2927, "step": 33680 }, { "epoch": 0.7451636808748328, "grad_norm": 1.169154405593872, "learning_rate": 3.036870459836312e-06, "loss": 0.4113, "step": 33685 }, { "epoch": 0.7452742885163459, "grad_norm": 1.1820727586746216, "learning_rate": 3.0343768074406244e-06, "loss": 0.2866, "step": 33690 }, { "epoch": 0.7453848961578592, "grad_norm": 1.4949359893798828, "learning_rate": 3.0318839961417667e-06, "loss": 0.2967, "step": 33695 }, { "epoch": 0.7454955037993725, "grad_norm": 1.0045124292373657, "learning_rate": 3.0293920262407363e-06, "loss": 0.2376, "step": 33700 }, { "epoch": 0.7456061114408857, "grad_norm": 1.6551501750946045, "learning_rate": 3.0269008980384463e-06, "loss": 0.3954, "step": 33705 }, { "epoch": 0.745716719082399, "grad_norm": 1.020013451576233, "learning_rate": 3.0244106118356986e-06, "loss": 0.2724, "step": 33710 }, { "epoch": 0.7458273267239123, "grad_norm": 0.4443033039569855, "learning_rate": 3.0219211679331915e-06, "loss": 0.4283, "step": 33715 }, { "epoch": 0.7459379343654255, "grad_norm": 0.8468268513679504, "learning_rate": 3.019432566631527e-06, "loss": 0.3787, "step": 33720 }, { "epoch": 0.7460485420069388, "grad_norm": 1.3771696090698242, "learning_rate": 3.0169448082312012e-06, "loss": 0.2538, "step": 33725 }, { "epoch": 0.7461591496484521, "grad_norm": 1.0071158409118652, "learning_rate": 3.0144578930326118e-06, "loss": 0.2596, "step": 33730 }, { "epoch": 0.7462697572899653, "grad_norm": 1.05765962600708, "learning_rate": 3.0119718213360517e-06, "loss": 0.3616, "step": 33735 }, { "epoch": 0.7463803649314785, "grad_norm": 1.1072007417678833, "learning_rate": 3.009486593441713e-06, "loss": 0.4754, "step": 33740 }, { "epoch": 0.7464909725729918, "grad_norm": 1.6009211540222168, "learning_rate": 3.0070022096496874e-06, "loss": 0.2698, "step": 33745 }, { "epoch": 0.7466015802145051, "grad_norm": 0.6059082746505737, "learning_rate": 3.0045186702599637e-06, "loss": 0.3103, "step": 33750 }, { "epoch": 0.7467121878560183, "grad_norm": 1.7979038953781128, "learning_rate": 3.002035975572427e-06, "loss": 0.4517, "step": 33755 }, { "epoch": 0.7468227954975316, "grad_norm": 2.5189712047576904, "learning_rate": 2.9995541258868634e-06, "loss": 0.3584, "step": 33760 }, { "epoch": 0.7469334031390449, "grad_norm": 1.384292483329773, "learning_rate": 2.997073121502955e-06, "loss": 0.4555, "step": 33765 }, { "epoch": 0.7470440107805582, "grad_norm": 0.9028882384300232, "learning_rate": 2.994592962720282e-06, "loss": 0.1113, "step": 33770 }, { "epoch": 0.7471546184220714, "grad_norm": 1.7967129945755005, "learning_rate": 2.992113649838322e-06, "loss": 0.3447, "step": 33775 }, { "epoch": 0.7472652260635847, "grad_norm": 1.2890617847442627, "learning_rate": 2.9896351831564563e-06, "loss": 0.2434, "step": 33780 }, { "epoch": 0.7473758337050979, "grad_norm": 1.4307605028152466, "learning_rate": 2.987157562973948e-06, "loss": 0.3628, "step": 33785 }, { "epoch": 0.7474864413466111, "grad_norm": 1.247496247291565, "learning_rate": 2.984680789589979e-06, "loss": 0.3043, "step": 33790 }, { "epoch": 0.7475970489881244, "grad_norm": 1.2729493379592896, "learning_rate": 2.982204863303618e-06, "loss": 0.2781, "step": 33795 }, { "epoch": 0.7477076566296377, "grad_norm": 0.8940299153327942, "learning_rate": 2.9797297844138252e-06, "loss": 0.4874, "step": 33800 }, { "epoch": 0.747818264271151, "grad_norm": 1.2777752876281738, "learning_rate": 2.9772555532194737e-06, "loss": 0.4491, "step": 33805 }, { "epoch": 0.7479288719126642, "grad_norm": 0.7740147709846497, "learning_rate": 2.9747821700193215e-06, "loss": 0.2802, "step": 33810 }, { "epoch": 0.7480394795541775, "grad_norm": 1.0574904680252075, "learning_rate": 2.9723096351120318e-06, "loss": 0.4216, "step": 33815 }, { "epoch": 0.7481500871956908, "grad_norm": 0.3307296633720398, "learning_rate": 2.9698379487961592e-06, "loss": 0.3418, "step": 33820 }, { "epoch": 0.748260694837204, "grad_norm": 1.0227404832839966, "learning_rate": 2.967367111370162e-06, "loss": 0.3645, "step": 33825 }, { "epoch": 0.7483713024787172, "grad_norm": 1.3241090774536133, "learning_rate": 2.964897123132391e-06, "loss": 0.2697, "step": 33830 }, { "epoch": 0.7484819101202305, "grad_norm": 1.1695165634155273, "learning_rate": 2.9624279843810976e-06, "loss": 0.3629, "step": 33835 }, { "epoch": 0.7485925177617437, "grad_norm": 2.087340831756592, "learning_rate": 2.9599596954144296e-06, "loss": 0.2252, "step": 33840 }, { "epoch": 0.748703125403257, "grad_norm": 0.7983971238136292, "learning_rate": 2.957492256530431e-06, "loss": 0.4058, "step": 33845 }, { "epoch": 0.7488137330447703, "grad_norm": 2.0526483058929443, "learning_rate": 2.955025668027046e-06, "loss": 0.3368, "step": 33850 }, { "epoch": 0.7489243406862836, "grad_norm": 1.4266380071640015, "learning_rate": 2.9525599302021123e-06, "loss": 0.3868, "step": 33855 }, { "epoch": 0.7490349483277968, "grad_norm": 1.4667638540267944, "learning_rate": 2.950095043353368e-06, "loss": 0.4852, "step": 33860 }, { "epoch": 0.7491455559693101, "grad_norm": 0.796505868434906, "learning_rate": 2.9476310077784486e-06, "loss": 0.2987, "step": 33865 }, { "epoch": 0.7492561636108234, "grad_norm": 0.7420910596847534, "learning_rate": 2.9451678237748836e-06, "loss": 0.3719, "step": 33870 }, { "epoch": 0.7493667712523366, "grad_norm": 1.9476299285888672, "learning_rate": 2.942705491640103e-06, "loss": 0.335, "step": 33875 }, { "epoch": 0.7494773788938498, "grad_norm": 0.850367546081543, "learning_rate": 2.9402440116714357e-06, "loss": 0.3842, "step": 33880 }, { "epoch": 0.7495879865353631, "grad_norm": 1.410493016242981, "learning_rate": 2.937783384166094e-06, "loss": 0.3975, "step": 33885 }, { "epoch": 0.7496985941768763, "grad_norm": 1.0289372205734253, "learning_rate": 2.935323609421209e-06, "loss": 0.3902, "step": 33890 }, { "epoch": 0.7498092018183896, "grad_norm": 1.0970178842544556, "learning_rate": 2.9328646877337963e-06, "loss": 0.435, "step": 33895 }, { "epoch": 0.7499198094599029, "grad_norm": 1.7517650127410889, "learning_rate": 2.9304066194007617e-06, "loss": 0.37, "step": 33900 }, { "epoch": 0.7500304171014162, "grad_norm": 1.060796856880188, "learning_rate": 2.927949404718925e-06, "loss": 0.2298, "step": 33905 }, { "epoch": 0.7501410247429294, "grad_norm": 1.0272314548492432, "learning_rate": 2.9254930439849948e-06, "loss": 0.2718, "step": 33910 }, { "epoch": 0.7502516323844427, "grad_norm": 1.2188535928726196, "learning_rate": 2.9230375374955646e-06, "loss": 0.4032, "step": 33915 }, { "epoch": 0.750362240025956, "grad_norm": 1.318282127380371, "learning_rate": 2.9205828855471474e-06, "loss": 0.3403, "step": 33920 }, { "epoch": 0.7504728476674691, "grad_norm": 0.9993777871131897, "learning_rate": 2.918129088436138e-06, "loss": 0.3025, "step": 33925 }, { "epoch": 0.7505834553089824, "grad_norm": 0.9986881017684937, "learning_rate": 2.9156761464588314e-06, "loss": 0.2326, "step": 33930 }, { "epoch": 0.7506940629504957, "grad_norm": 1.3732458353042603, "learning_rate": 2.9132240599114194e-06, "loss": 0.4116, "step": 33935 }, { "epoch": 0.750804670592009, "grad_norm": 1.922980785369873, "learning_rate": 2.9107728290899906e-06, "loss": 0.4455, "step": 33940 }, { "epoch": 0.7509152782335222, "grad_norm": 1.096692442893982, "learning_rate": 2.9083224542905307e-06, "loss": 0.2843, "step": 33945 }, { "epoch": 0.7510258858750355, "grad_norm": 1.4723782539367676, "learning_rate": 2.905872935808922e-06, "loss": 0.3479, "step": 33950 }, { "epoch": 0.7511364935165488, "grad_norm": 1.0987650156021118, "learning_rate": 2.9034242739409434e-06, "loss": 0.3167, "step": 33955 }, { "epoch": 0.751247101158062, "grad_norm": 1.070604920387268, "learning_rate": 2.9009764689822686e-06, "loss": 0.2989, "step": 33960 }, { "epoch": 0.7513577087995753, "grad_norm": 1.0988131761550903, "learning_rate": 2.8985295212284703e-06, "loss": 0.3656, "step": 33965 }, { "epoch": 0.7514683164410886, "grad_norm": 1.0689289569854736, "learning_rate": 2.896083430975016e-06, "loss": 0.3079, "step": 33970 }, { "epoch": 0.7515789240826017, "grad_norm": 1.3104403018951416, "learning_rate": 2.8936381985172724e-06, "loss": 0.3043, "step": 33975 }, { "epoch": 0.751689531724115, "grad_norm": 1.971530795097351, "learning_rate": 2.891193824150498e-06, "loss": 0.473, "step": 33980 }, { "epoch": 0.7518001393656283, "grad_norm": 2.2085654735565186, "learning_rate": 2.8887503081698508e-06, "loss": 0.3267, "step": 33985 }, { "epoch": 0.7519107470071416, "grad_norm": 0.900980532169342, "learning_rate": 2.886307650870386e-06, "loss": 0.3423, "step": 33990 }, { "epoch": 0.7520213546486548, "grad_norm": 1.085532307624817, "learning_rate": 2.8838658525470544e-06, "loss": 0.2505, "step": 33995 }, { "epoch": 0.7521319622901681, "grad_norm": 1.0864509344100952, "learning_rate": 2.8814249134946957e-06, "loss": 0.3658, "step": 34000 }, { "epoch": 0.7522425699316814, "grad_norm": 1.6588307619094849, "learning_rate": 2.8789848340080605e-06, "loss": 0.3169, "step": 34005 }, { "epoch": 0.7523531775731946, "grad_norm": 2.0554542541503906, "learning_rate": 2.876545614381787e-06, "loss": 0.5106, "step": 34010 }, { "epoch": 0.7524637852147079, "grad_norm": 1.2877260446548462, "learning_rate": 2.874107254910402e-06, "loss": 0.2573, "step": 34015 }, { "epoch": 0.7525743928562211, "grad_norm": 0.79696124792099, "learning_rate": 2.8716697558883466e-06, "loss": 0.3673, "step": 34020 }, { "epoch": 0.7526850004977343, "grad_norm": 1.056949257850647, "learning_rate": 2.869233117609943e-06, "loss": 0.2557, "step": 34025 }, { "epoch": 0.7527956081392476, "grad_norm": 1.4935466051101685, "learning_rate": 2.866797340369416e-06, "loss": 0.3325, "step": 34030 }, { "epoch": 0.7529062157807609, "grad_norm": 1.483319878578186, "learning_rate": 2.8643624244608835e-06, "loss": 0.3885, "step": 34035 }, { "epoch": 0.7530168234222742, "grad_norm": 1.4567192792892456, "learning_rate": 2.861928370178363e-06, "loss": 0.3016, "step": 34040 }, { "epoch": 0.7531274310637874, "grad_norm": 1.0880964994430542, "learning_rate": 2.8594951778157644e-06, "loss": 0.5447, "step": 34045 }, { "epoch": 0.7532380387053007, "grad_norm": 1.2878859043121338, "learning_rate": 2.8570628476668938e-06, "loss": 0.2854, "step": 34050 }, { "epoch": 0.753348646346814, "grad_norm": 0.6644721627235413, "learning_rate": 2.854631380025457e-06, "loss": 0.3821, "step": 34055 }, { "epoch": 0.7534592539883272, "grad_norm": 1.1862337589263916, "learning_rate": 2.8522007751850512e-06, "loss": 0.2826, "step": 34060 }, { "epoch": 0.7535698616298405, "grad_norm": 0.5804129838943481, "learning_rate": 2.8497710334391706e-06, "loss": 0.3659, "step": 34065 }, { "epoch": 0.7536804692713537, "grad_norm": 0.9893611073493958, "learning_rate": 2.8473421550812076e-06, "loss": 0.3281, "step": 34070 }, { "epoch": 0.753791076912867, "grad_norm": 1.3975796699523926, "learning_rate": 2.8449141404044478e-06, "loss": 0.3014, "step": 34075 }, { "epoch": 0.7539016845543802, "grad_norm": 0.46469706296920776, "learning_rate": 2.842486989702072e-06, "loss": 0.2175, "step": 34080 }, { "epoch": 0.7540122921958935, "grad_norm": 0.7581819295883179, "learning_rate": 2.84006070326716e-06, "loss": 0.1982, "step": 34085 }, { "epoch": 0.7541228998374068, "grad_norm": 1.8817782402038574, "learning_rate": 2.8376352813926833e-06, "loss": 0.3134, "step": 34090 }, { "epoch": 0.75423350747892, "grad_norm": 1.1837754249572754, "learning_rate": 2.8352107243715145e-06, "loss": 0.3009, "step": 34095 }, { "epoch": 0.7543441151204333, "grad_norm": 0.9621619582176208, "learning_rate": 2.832787032496409e-06, "loss": 0.4236, "step": 34100 }, { "epoch": 0.7544547227619466, "grad_norm": 1.10175621509552, "learning_rate": 2.830364206060037e-06, "loss": 0.287, "step": 34105 }, { "epoch": 0.7545653304034599, "grad_norm": 0.9058597087860107, "learning_rate": 2.827942245354952e-06, "loss": 0.363, "step": 34110 }, { "epoch": 0.754675938044973, "grad_norm": 0.9938020706176758, "learning_rate": 2.825521150673598e-06, "loss": 0.3749, "step": 34115 }, { "epoch": 0.7547865456864863, "grad_norm": 1.3514198064804077, "learning_rate": 2.8231009223083283e-06, "loss": 0.4009, "step": 34120 }, { "epoch": 0.7548971533279996, "grad_norm": 1.1536586284637451, "learning_rate": 2.820681560551387e-06, "loss": 0.4769, "step": 34125 }, { "epoch": 0.7550077609695128, "grad_norm": 1.5034266710281372, "learning_rate": 2.8182630656949007e-06, "loss": 0.3079, "step": 34130 }, { "epoch": 0.7551183686110261, "grad_norm": 1.839363932609558, "learning_rate": 2.815845438030911e-06, "loss": 0.4234, "step": 34135 }, { "epoch": 0.7552289762525394, "grad_norm": 0.7719710469245911, "learning_rate": 2.8134286778513433e-06, "loss": 0.3993, "step": 34140 }, { "epoch": 0.7553395838940526, "grad_norm": 1.1539496183395386, "learning_rate": 2.8110127854480207e-06, "loss": 0.2935, "step": 34145 }, { "epoch": 0.7554501915355659, "grad_norm": 1.3432605266571045, "learning_rate": 2.8085977611126604e-06, "loss": 0.3617, "step": 34150 }, { "epoch": 0.7555607991770792, "grad_norm": 1.0008901357650757, "learning_rate": 2.806183605136877e-06, "loss": 0.2922, "step": 34155 }, { "epoch": 0.7556714068185925, "grad_norm": 0.9082362651824951, "learning_rate": 2.8037703178121788e-06, "loss": 0.1663, "step": 34160 }, { "epoch": 0.7557820144601056, "grad_norm": 0.8771294355392456, "learning_rate": 2.8013578994299694e-06, "loss": 0.2595, "step": 34165 }, { "epoch": 0.7558926221016189, "grad_norm": 1.399937629699707, "learning_rate": 2.798946350281547e-06, "loss": 0.4369, "step": 34170 }, { "epoch": 0.7560032297431322, "grad_norm": 1.2001315355300903, "learning_rate": 2.796535670658106e-06, "loss": 0.3416, "step": 34175 }, { "epoch": 0.7561138373846454, "grad_norm": 1.6390303373336792, "learning_rate": 2.794125860850736e-06, "loss": 0.4163, "step": 34180 }, { "epoch": 0.7562244450261587, "grad_norm": 1.2906674146652222, "learning_rate": 2.7917169211504192e-06, "loss": 0.2017, "step": 34185 }, { "epoch": 0.756335052667672, "grad_norm": 1.233567476272583, "learning_rate": 2.7893088518480347e-06, "loss": 0.371, "step": 34190 }, { "epoch": 0.7564456603091853, "grad_norm": 1.3000388145446777, "learning_rate": 2.786901653234356e-06, "loss": 0.3306, "step": 34195 }, { "epoch": 0.7565562679506985, "grad_norm": 0.5487843155860901, "learning_rate": 2.784495325600054e-06, "loss": 0.3178, "step": 34200 }, { "epoch": 0.7566668755922118, "grad_norm": 1.1341872215270996, "learning_rate": 2.7820898692356878e-06, "loss": 0.3845, "step": 34205 }, { "epoch": 0.756777483233725, "grad_norm": 0.8819133639335632, "learning_rate": 2.7796852844317223e-06, "loss": 0.2205, "step": 34210 }, { "epoch": 0.7568880908752382, "grad_norm": 0.9920343160629272, "learning_rate": 2.7772815714784993e-06, "loss": 0.3141, "step": 34215 }, { "epoch": 0.7569986985167515, "grad_norm": 1.4482730627059937, "learning_rate": 2.774878730666276e-06, "loss": 0.3669, "step": 34220 }, { "epoch": 0.7571093061582648, "grad_norm": 1.9092575311660767, "learning_rate": 2.7724767622851946e-06, "loss": 0.3854, "step": 34225 }, { "epoch": 0.757219913799778, "grad_norm": 1.2625564336776733, "learning_rate": 2.7700756666252827e-06, "loss": 0.3446, "step": 34230 }, { "epoch": 0.7573305214412913, "grad_norm": 1.6139885187149048, "learning_rate": 2.767675443976481e-06, "loss": 0.2752, "step": 34235 }, { "epoch": 0.7574411290828046, "grad_norm": 0.7536249160766602, "learning_rate": 2.765276094628616e-06, "loss": 0.2011, "step": 34240 }, { "epoch": 0.7575517367243179, "grad_norm": 0.688996434211731, "learning_rate": 2.762877618871399e-06, "loss": 0.3144, "step": 34245 }, { "epoch": 0.7576623443658311, "grad_norm": 0.7919974327087402, "learning_rate": 2.7604800169944557e-06, "loss": 0.2473, "step": 34250 }, { "epoch": 0.7577729520073444, "grad_norm": 1.3659802675247192, "learning_rate": 2.75808328928729e-06, "loss": 0.1665, "step": 34255 }, { "epoch": 0.7578835596488576, "grad_norm": 1.2948085069656372, "learning_rate": 2.755687436039308e-06, "loss": 0.2881, "step": 34260 }, { "epoch": 0.7579941672903708, "grad_norm": 3.496915578842163, "learning_rate": 2.753292457539807e-06, "loss": 0.3449, "step": 34265 }, { "epoch": 0.7581047749318841, "grad_norm": 0.552612841129303, "learning_rate": 2.750898354077981e-06, "loss": 0.3546, "step": 34270 }, { "epoch": 0.7582153825733974, "grad_norm": 0.6137376427650452, "learning_rate": 2.7485051259429174e-06, "loss": 0.2963, "step": 34275 }, { "epoch": 0.7583259902149107, "grad_norm": 1.6953572034835815, "learning_rate": 2.746112773423597e-06, "loss": 0.3002, "step": 34280 }, { "epoch": 0.7584365978564239, "grad_norm": 0.9647733569145203, "learning_rate": 2.7437212968088956e-06, "loss": 0.4388, "step": 34285 }, { "epoch": 0.7585472054979372, "grad_norm": 1.2210036516189575, "learning_rate": 2.741330696387584e-06, "loss": 0.3179, "step": 34290 }, { "epoch": 0.7586578131394505, "grad_norm": 0.9945254921913147, "learning_rate": 2.738940972448325e-06, "loss": 0.4349, "step": 34295 }, { "epoch": 0.7587684207809637, "grad_norm": 0.8422243595123291, "learning_rate": 2.736552125279679e-06, "loss": 0.3485, "step": 34300 }, { "epoch": 0.7588790284224769, "grad_norm": 1.985801339149475, "learning_rate": 2.734164155170097e-06, "loss": 0.3568, "step": 34305 }, { "epoch": 0.7589896360639902, "grad_norm": 0.7137259244918823, "learning_rate": 2.731777062407929e-06, "loss": 0.2776, "step": 34310 }, { "epoch": 0.7591002437055034, "grad_norm": 1.0446406602859497, "learning_rate": 2.729390847281407e-06, "loss": 0.2957, "step": 34315 }, { "epoch": 0.7592108513470167, "grad_norm": 1.1422556638717651, "learning_rate": 2.727005510078674e-06, "loss": 0.2154, "step": 34320 }, { "epoch": 0.75932145898853, "grad_norm": 0.6994808912277222, "learning_rate": 2.72462105108776e-06, "loss": 0.4622, "step": 34325 }, { "epoch": 0.7594320666300433, "grad_norm": 1.3450980186462402, "learning_rate": 2.722237470596578e-06, "loss": 0.2668, "step": 34330 }, { "epoch": 0.7595426742715565, "grad_norm": 0.40415963530540466, "learning_rate": 2.7198547688929542e-06, "loss": 0.2792, "step": 34335 }, { "epoch": 0.7596532819130698, "grad_norm": 1.9132623672485352, "learning_rate": 2.7174729462645976e-06, "loss": 0.4857, "step": 34340 }, { "epoch": 0.7597638895545831, "grad_norm": 1.0550838708877563, "learning_rate": 2.7150920029991046e-06, "loss": 0.2029, "step": 34345 }, { "epoch": 0.7598744971960962, "grad_norm": 1.564697265625, "learning_rate": 2.7127119393839817e-06, "loss": 0.3914, "step": 34350 }, { "epoch": 0.7599851048376095, "grad_norm": 1.3241626024246216, "learning_rate": 2.7103327557066195e-06, "loss": 0.2871, "step": 34355 }, { "epoch": 0.7600957124791228, "grad_norm": 1.126171588897705, "learning_rate": 2.7079544522543023e-06, "loss": 0.3607, "step": 34360 }, { "epoch": 0.760206320120636, "grad_norm": 2.2496259212493896, "learning_rate": 2.705577029314209e-06, "loss": 0.336, "step": 34365 }, { "epoch": 0.7603169277621493, "grad_norm": 0.9289327263832092, "learning_rate": 2.703200487173413e-06, "loss": 0.2289, "step": 34370 }, { "epoch": 0.7604275354036626, "grad_norm": 1.1864104270935059, "learning_rate": 2.700824826118883e-06, "loss": 0.3462, "step": 34375 }, { "epoch": 0.7605381430451759, "grad_norm": 1.3488432168960571, "learning_rate": 2.6984500464374764e-06, "loss": 0.2758, "step": 34380 }, { "epoch": 0.7606487506866891, "grad_norm": 1.097264051437378, "learning_rate": 2.6960761484159493e-06, "loss": 0.3136, "step": 34385 }, { "epoch": 0.7607593583282024, "grad_norm": 1.3675787448883057, "learning_rate": 2.693703132340947e-06, "loss": 0.3005, "step": 34390 }, { "epoch": 0.7608699659697157, "grad_norm": 0.8718785643577576, "learning_rate": 2.6913309984990133e-06, "loss": 0.3234, "step": 34395 }, { "epoch": 0.7609805736112288, "grad_norm": 1.097428321838379, "learning_rate": 2.6889597471765795e-06, "loss": 0.3943, "step": 34400 }, { "epoch": 0.7610911812527421, "grad_norm": 0.9698855876922607, "learning_rate": 2.6865893786599764e-06, "loss": 0.2361, "step": 34405 }, { "epoch": 0.7612017888942554, "grad_norm": 0.4896920919418335, "learning_rate": 2.6842198932354236e-06, "loss": 0.1627, "step": 34410 }, { "epoch": 0.7613123965357687, "grad_norm": 1.1751741170883179, "learning_rate": 2.681851291189035e-06, "loss": 0.451, "step": 34415 }, { "epoch": 0.7614230041772819, "grad_norm": 1.084951400756836, "learning_rate": 2.67948357280682e-06, "loss": 0.3604, "step": 34420 }, { "epoch": 0.7615336118187952, "grad_norm": 1.5048127174377441, "learning_rate": 2.677116738374683e-06, "loss": 0.6556, "step": 34425 }, { "epoch": 0.7616442194603085, "grad_norm": 1.3603929281234741, "learning_rate": 2.6747507881784084e-06, "loss": 0.3212, "step": 34430 }, { "epoch": 0.7617548271018217, "grad_norm": 0.7847374677658081, "learning_rate": 2.672385722503694e-06, "loss": 0.3486, "step": 34435 }, { "epoch": 0.761865434743335, "grad_norm": 0.80711430311203, "learning_rate": 2.6700215416361196e-06, "loss": 0.2407, "step": 34440 }, { "epoch": 0.7619760423848482, "grad_norm": 1.0038974285125732, "learning_rate": 2.667658245861152e-06, "loss": 0.2744, "step": 34445 }, { "epoch": 0.7620866500263614, "grad_norm": 1.117896556854248, "learning_rate": 2.6652958354641655e-06, "loss": 0.3706, "step": 34450 }, { "epoch": 0.7621972576678747, "grad_norm": 0.9389490485191345, "learning_rate": 2.6629343107304216e-06, "loss": 0.3102, "step": 34455 }, { "epoch": 0.762307865309388, "grad_norm": 1.011786699295044, "learning_rate": 2.6605736719450638e-06, "loss": 0.2152, "step": 34460 }, { "epoch": 0.7624184729509013, "grad_norm": 1.181165099143982, "learning_rate": 2.658213919393149e-06, "loss": 0.328, "step": 34465 }, { "epoch": 0.7625290805924145, "grad_norm": 1.0033085346221924, "learning_rate": 2.655855053359612e-06, "loss": 0.356, "step": 34470 }, { "epoch": 0.7626396882339278, "grad_norm": 1.7715502977371216, "learning_rate": 2.6534970741292855e-06, "loss": 0.3128, "step": 34475 }, { "epoch": 0.7627502958754411, "grad_norm": 2.605525493621826, "learning_rate": 2.6511399819868954e-06, "loss": 0.4037, "step": 34480 }, { "epoch": 0.7628609035169543, "grad_norm": 1.0116846561431885, "learning_rate": 2.648783777217059e-06, "loss": 0.3605, "step": 34485 }, { "epoch": 0.7629715111584676, "grad_norm": 0.8307408094406128, "learning_rate": 2.646428460104289e-06, "loss": 0.3112, "step": 34490 }, { "epoch": 0.7630821187999808, "grad_norm": 1.054020643234253, "learning_rate": 2.644074030932986e-06, "loss": 0.3472, "step": 34495 }, { "epoch": 0.763192726441494, "grad_norm": 1.4485762119293213, "learning_rate": 2.6417204899874504e-06, "loss": 0.2695, "step": 34500 }, { "epoch": 0.7633033340830073, "grad_norm": 2.0296616554260254, "learning_rate": 2.6393678375518684e-06, "loss": 0.4002, "step": 34505 }, { "epoch": 0.7634139417245206, "grad_norm": 1.2280954122543335, "learning_rate": 2.6370160739103246e-06, "loss": 0.3368, "step": 34510 }, { "epoch": 0.7635245493660339, "grad_norm": 0.8979802131652832, "learning_rate": 2.634665199346791e-06, "loss": 0.4292, "step": 34515 }, { "epoch": 0.7636351570075471, "grad_norm": 1.5472592115402222, "learning_rate": 2.632315214145137e-06, "loss": 0.458, "step": 34520 }, { "epoch": 0.7637457646490604, "grad_norm": 0.8601031303405762, "learning_rate": 2.6299661185891257e-06, "loss": 0.4117, "step": 34525 }, { "epoch": 0.7638563722905737, "grad_norm": 1.1569278240203857, "learning_rate": 2.627617912962399e-06, "loss": 0.4448, "step": 34530 }, { "epoch": 0.763966979932087, "grad_norm": 1.0093694925308228, "learning_rate": 2.6252705975485128e-06, "loss": 0.3152, "step": 34535 }, { "epoch": 0.7640775875736001, "grad_norm": 1.042427659034729, "learning_rate": 2.622924172630903e-06, "loss": 0.3545, "step": 34540 }, { "epoch": 0.7641881952151134, "grad_norm": 1.4671697616577148, "learning_rate": 2.620578638492892e-06, "loss": 0.3643, "step": 34545 }, { "epoch": 0.7642988028566267, "grad_norm": 1.1024090051651, "learning_rate": 2.6182339954177117e-06, "loss": 0.35, "step": 34550 }, { "epoch": 0.7644094104981399, "grad_norm": 1.9116400480270386, "learning_rate": 2.615890243688475e-06, "loss": 0.3892, "step": 34555 }, { "epoch": 0.7645200181396532, "grad_norm": 0.9581424593925476, "learning_rate": 2.613547383588182e-06, "loss": 0.4606, "step": 34560 }, { "epoch": 0.7646306257811665, "grad_norm": 1.456150770187378, "learning_rate": 2.61120541539974e-06, "loss": 0.3052, "step": 34565 }, { "epoch": 0.7647412334226797, "grad_norm": 1.081177830696106, "learning_rate": 2.608864339405939e-06, "loss": 0.4053, "step": 34570 }, { "epoch": 0.764851841064193, "grad_norm": 0.7802745699882507, "learning_rate": 2.6065241558894625e-06, "loss": 0.2961, "step": 34575 }, { "epoch": 0.7649624487057063, "grad_norm": 1.4438080787658691, "learning_rate": 2.604184865132888e-06, "loss": 0.3954, "step": 34580 }, { "epoch": 0.7650730563472196, "grad_norm": 0.9102382659912109, "learning_rate": 2.601846467418683e-06, "loss": 0.3334, "step": 34585 }, { "epoch": 0.7651836639887327, "grad_norm": 1.7254763841629028, "learning_rate": 2.599508963029208e-06, "loss": 0.3893, "step": 34590 }, { "epoch": 0.765294271630246, "grad_norm": 2.226759910583496, "learning_rate": 2.5971723522467174e-06, "loss": 0.3459, "step": 34595 }, { "epoch": 0.7654048792717593, "grad_norm": 0.8717790842056274, "learning_rate": 2.5948366353533548e-06, "loss": 0.2735, "step": 34600 }, { "epoch": 0.7655154869132725, "grad_norm": 1.7000209093093872, "learning_rate": 2.5925018126311585e-06, "loss": 0.4038, "step": 34605 }, { "epoch": 0.7656260945547858, "grad_norm": 1.244670033454895, "learning_rate": 2.590167884362057e-06, "loss": 0.3827, "step": 34610 }, { "epoch": 0.7657367021962991, "grad_norm": 0.9495211243629456, "learning_rate": 2.587834850827872e-06, "loss": 0.2266, "step": 34615 }, { "epoch": 0.7658473098378124, "grad_norm": 0.8865153789520264, "learning_rate": 2.585502712310315e-06, "loss": 0.5182, "step": 34620 }, { "epoch": 0.7659579174793256, "grad_norm": 1.7778232097625732, "learning_rate": 2.5831714690909915e-06, "loss": 0.4305, "step": 34625 }, { "epoch": 0.7660685251208389, "grad_norm": 1.1112840175628662, "learning_rate": 2.5808411214513994e-06, "loss": 0.3597, "step": 34630 }, { "epoch": 0.766179132762352, "grad_norm": 1.2249059677124023, "learning_rate": 2.5785116696729273e-06, "loss": 0.4145, "step": 34635 }, { "epoch": 0.7662897404038653, "grad_norm": 1.116105318069458, "learning_rate": 2.5761831140368586e-06, "loss": 0.3844, "step": 34640 }, { "epoch": 0.7664003480453786, "grad_norm": 1.9753038883209229, "learning_rate": 2.5738554548243566e-06, "loss": 0.361, "step": 34645 }, { "epoch": 0.7665109556868919, "grad_norm": 1.658126950263977, "learning_rate": 2.571528692316494e-06, "loss": 0.1076, "step": 34650 }, { "epoch": 0.7666215633284051, "grad_norm": 1.861863374710083, "learning_rate": 2.569202826794226e-06, "loss": 0.2766, "step": 34655 }, { "epoch": 0.7667321709699184, "grad_norm": 1.3982865810394287, "learning_rate": 2.5668778585383935e-06, "loss": 0.3109, "step": 34660 }, { "epoch": 0.7668427786114317, "grad_norm": 0.9352344274520874, "learning_rate": 2.564553787829742e-06, "loss": 0.3782, "step": 34665 }, { "epoch": 0.766953386252945, "grad_norm": 0.9836050271987915, "learning_rate": 2.5622306149489053e-06, "loss": 0.4017, "step": 34670 }, { "epoch": 0.7670639938944582, "grad_norm": 1.7487939596176147, "learning_rate": 2.5599083401763934e-06, "loss": 0.2278, "step": 34675 }, { "epoch": 0.7671746015359715, "grad_norm": 1.6623740196228027, "learning_rate": 2.557586963792632e-06, "loss": 0.2583, "step": 34680 }, { "epoch": 0.7672852091774847, "grad_norm": 1.347387433052063, "learning_rate": 2.555266486077922e-06, "loss": 0.3454, "step": 34685 }, { "epoch": 0.7673958168189979, "grad_norm": 1.6269434690475464, "learning_rate": 2.5529469073124613e-06, "loss": 0.3347, "step": 34690 }, { "epoch": 0.7675064244605112, "grad_norm": 0.8880745768547058, "learning_rate": 2.5506282277763372e-06, "loss": 0.3848, "step": 34695 }, { "epoch": 0.7676170321020245, "grad_norm": 0.3504287898540497, "learning_rate": 2.548310447749531e-06, "loss": 0.2953, "step": 34700 }, { "epoch": 0.7677276397435377, "grad_norm": 1.3862141370773315, "learning_rate": 2.5459935675119127e-06, "loss": 0.3671, "step": 34705 }, { "epoch": 0.767838247385051, "grad_norm": 0.9506332874298096, "learning_rate": 2.5436775873432453e-06, "loss": 0.3442, "step": 34710 }, { "epoch": 0.7679488550265643, "grad_norm": 1.3876047134399414, "learning_rate": 2.541362507523183e-06, "loss": 0.2581, "step": 34715 }, { "epoch": 0.7680594626680776, "grad_norm": 0.8837019205093384, "learning_rate": 2.5390483283312705e-06, "loss": 0.3998, "step": 34720 }, { "epoch": 0.7681700703095908, "grad_norm": 1.5051145553588867, "learning_rate": 2.5367350500469435e-06, "loss": 0.2067, "step": 34725 }, { "epoch": 0.768280677951104, "grad_norm": 1.2038522958755493, "learning_rate": 2.5344226729495315e-06, "loss": 0.2868, "step": 34730 }, { "epoch": 0.7683912855926173, "grad_norm": 1.427027702331543, "learning_rate": 2.532111197318252e-06, "loss": 0.3764, "step": 34735 }, { "epoch": 0.7685018932341305, "grad_norm": 0.8890478610992432, "learning_rate": 2.529800623432219e-06, "loss": 0.3658, "step": 34740 }, { "epoch": 0.7686125008756438, "grad_norm": 1.1641151905059814, "learning_rate": 2.5274909515704227e-06, "loss": 0.4661, "step": 34745 }, { "epoch": 0.7687231085171571, "grad_norm": 1.4725550413131714, "learning_rate": 2.525182182011766e-06, "loss": 0.3477, "step": 34750 }, { "epoch": 0.7688337161586704, "grad_norm": 1.1773675680160522, "learning_rate": 2.522874315035032e-06, "loss": 0.3239, "step": 34755 }, { "epoch": 0.7689443238001836, "grad_norm": 1.453096866607666, "learning_rate": 2.520567350918884e-06, "loss": 0.321, "step": 34760 }, { "epoch": 0.7690549314416969, "grad_norm": 1.3380593061447144, "learning_rate": 2.518261289941899e-06, "loss": 0.3269, "step": 34765 }, { "epoch": 0.7691655390832102, "grad_norm": 1.4841264486312866, "learning_rate": 2.515956132382531e-06, "loss": 0.4938, "step": 34770 }, { "epoch": 0.7692761467247234, "grad_norm": 0.4285747706890106, "learning_rate": 2.513651878519119e-06, "loss": 0.2841, "step": 34775 }, { "epoch": 0.7693867543662366, "grad_norm": 1.2771461009979248, "learning_rate": 2.51134852862991e-06, "loss": 0.3634, "step": 34780 }, { "epoch": 0.7694973620077499, "grad_norm": 1.4043707847595215, "learning_rate": 2.5090460829930298e-06, "loss": 0.3959, "step": 34785 }, { "epoch": 0.7696079696492631, "grad_norm": 2.1233086585998535, "learning_rate": 2.506744541886497e-06, "loss": 0.1148, "step": 34790 }, { "epoch": 0.7697185772907764, "grad_norm": 2.0057408809661865, "learning_rate": 2.504443905588223e-06, "loss": 0.5139, "step": 34795 }, { "epoch": 0.7698291849322897, "grad_norm": 1.8870947360992432, "learning_rate": 2.502144174376009e-06, "loss": 0.2993, "step": 34800 }, { "epoch": 0.769939792573803, "grad_norm": 0.8088183403015137, "learning_rate": 2.499845348527545e-06, "loss": 0.3698, "step": 34805 }, { "epoch": 0.7700504002153162, "grad_norm": 0.9609761834144592, "learning_rate": 2.4975474283204172e-06, "loss": 0.315, "step": 34810 }, { "epoch": 0.7701610078568295, "grad_norm": 0.8589332699775696, "learning_rate": 2.4952504140320953e-06, "loss": 0.2513, "step": 34815 }, { "epoch": 0.7702716154983428, "grad_norm": 1.0255341529846191, "learning_rate": 2.4929543059399442e-06, "loss": 0.2945, "step": 34820 }, { "epoch": 0.7703822231398559, "grad_norm": 2.1319217681884766, "learning_rate": 2.490659104321218e-06, "loss": 0.3541, "step": 34825 }, { "epoch": 0.7704928307813692, "grad_norm": 0.6983044743537903, "learning_rate": 2.488364809453062e-06, "loss": 0.3092, "step": 34830 }, { "epoch": 0.7706034384228825, "grad_norm": 1.1540693044662476, "learning_rate": 2.4860714216125126e-06, "loss": 0.2351, "step": 34835 }, { "epoch": 0.7707140460643958, "grad_norm": 0.9335679411888123, "learning_rate": 2.4837789410764934e-06, "loss": 0.2138, "step": 34840 }, { "epoch": 0.770824653705909, "grad_norm": 1.0337175130844116, "learning_rate": 2.481487368121822e-06, "loss": 0.2268, "step": 34845 }, { "epoch": 0.7709352613474223, "grad_norm": 1.4915344715118408, "learning_rate": 2.479196703025205e-06, "loss": 0.3633, "step": 34850 }, { "epoch": 0.7710458689889356, "grad_norm": 0.947673499584198, "learning_rate": 2.4769069460632424e-06, "loss": 0.2211, "step": 34855 }, { "epoch": 0.7711564766304488, "grad_norm": 1.9089137315750122, "learning_rate": 2.4746180975124125e-06, "loss": 0.3291, "step": 34860 }, { "epoch": 0.7712670842719621, "grad_norm": 1.4424052238464355, "learning_rate": 2.472330157649102e-06, "loss": 0.3053, "step": 34865 }, { "epoch": 0.7713776919134754, "grad_norm": 0.8881655335426331, "learning_rate": 2.4700431267495806e-06, "loss": 0.3321, "step": 34870 }, { "epoch": 0.7714882995549885, "grad_norm": 0.7668821811676025, "learning_rate": 2.4677570050899944e-06, "loss": 0.2454, "step": 34875 }, { "epoch": 0.7715989071965018, "grad_norm": 0.48635080456733704, "learning_rate": 2.4654717929464036e-06, "loss": 0.2156, "step": 34880 }, { "epoch": 0.7717095148380151, "grad_norm": 1.0763225555419922, "learning_rate": 2.463187490594745e-06, "loss": 0.2133, "step": 34885 }, { "epoch": 0.7718201224795284, "grad_norm": 1.0753414630889893, "learning_rate": 2.46090409831084e-06, "loss": 0.2726, "step": 34890 }, { "epoch": 0.7719307301210416, "grad_norm": 1.1803869009017944, "learning_rate": 2.4586216163704157e-06, "loss": 0.2344, "step": 34895 }, { "epoch": 0.7720413377625549, "grad_norm": 1.1874065399169922, "learning_rate": 2.4563400450490783e-06, "loss": 0.2947, "step": 34900 }, { "epoch": 0.7721519454040682, "grad_norm": 0.8053605556488037, "learning_rate": 2.454059384622327e-06, "loss": 0.4498, "step": 34905 }, { "epoch": 0.7722625530455814, "grad_norm": 0.6433166265487671, "learning_rate": 2.4517796353655497e-06, "loss": 0.2597, "step": 34910 }, { "epoch": 0.7723731606870947, "grad_norm": 2.0831217765808105, "learning_rate": 2.4495007975540263e-06, "loss": 0.2997, "step": 34915 }, { "epoch": 0.7724837683286079, "grad_norm": 0.8251069784164429, "learning_rate": 2.4472228714629265e-06, "loss": 0.3116, "step": 34920 }, { "epoch": 0.7725943759701211, "grad_norm": 0.8639699816703796, "learning_rate": 2.444945857367307e-06, "loss": 0.2738, "step": 34925 }, { "epoch": 0.7727049836116344, "grad_norm": 1.2609310150146484, "learning_rate": 2.4426697555421185e-06, "loss": 0.2931, "step": 34930 }, { "epoch": 0.7728155912531477, "grad_norm": 1.138782262802124, "learning_rate": 2.4403945662622e-06, "loss": 0.3466, "step": 34935 }, { "epoch": 0.772926198894661, "grad_norm": 1.3863340616226196, "learning_rate": 2.4381202898022772e-06, "loss": 0.4076, "step": 34940 }, { "epoch": 0.7730368065361742, "grad_norm": 2.540781259536743, "learning_rate": 2.43584692643697e-06, "loss": 0.4316, "step": 34945 }, { "epoch": 0.7731474141776875, "grad_norm": 1.293427586555481, "learning_rate": 2.4335744764407875e-06, "loss": 0.3425, "step": 34950 }, { "epoch": 0.7732580218192008, "grad_norm": 1.1985872983932495, "learning_rate": 2.4313029400881283e-06, "loss": 0.4398, "step": 34955 }, { "epoch": 0.773368629460714, "grad_norm": 0.6285930275917053, "learning_rate": 2.4290323176532714e-06, "loss": 0.257, "step": 34960 }, { "epoch": 0.7734792371022273, "grad_norm": 0.9906758069992065, "learning_rate": 2.4267626094104036e-06, "loss": 0.3651, "step": 34965 }, { "epoch": 0.7735898447437405, "grad_norm": 1.0934256315231323, "learning_rate": 2.4244938156335895e-06, "loss": 0.2863, "step": 34970 }, { "epoch": 0.7737004523852538, "grad_norm": 1.3034155368804932, "learning_rate": 2.4222259365967783e-06, "loss": 0.3698, "step": 34975 }, { "epoch": 0.773811060026767, "grad_norm": 1.36533522605896, "learning_rate": 2.4199589725738237e-06, "loss": 0.4582, "step": 34980 }, { "epoch": 0.7739216676682803, "grad_norm": 2.2137374877929688, "learning_rate": 2.4176929238384606e-06, "loss": 0.3173, "step": 34985 }, { "epoch": 0.7740322753097936, "grad_norm": 1.843651533126831, "learning_rate": 2.4154277906643055e-06, "loss": 0.3622, "step": 34990 }, { "epoch": 0.7741428829513068, "grad_norm": 0.6001081466674805, "learning_rate": 2.4131635733248803e-06, "loss": 0.1828, "step": 34995 }, { "epoch": 0.7742534905928201, "grad_norm": 0.8272764682769775, "learning_rate": 2.41090027209359e-06, "loss": 0.3567, "step": 35000 }, { "epoch": 0.7743640982343334, "grad_norm": 0.7385786771774292, "learning_rate": 2.408637887243719e-06, "loss": 0.3694, "step": 35005 }, { "epoch": 0.7744747058758467, "grad_norm": 0.5577773451805115, "learning_rate": 2.4063764190484563e-06, "loss": 0.2809, "step": 35010 }, { "epoch": 0.7745853135173598, "grad_norm": 1.2650634050369263, "learning_rate": 2.4041158677808717e-06, "loss": 0.2836, "step": 35015 }, { "epoch": 0.7746959211588731, "grad_norm": 0.7456777095794678, "learning_rate": 2.401856233713925e-06, "loss": 0.3723, "step": 35020 }, { "epoch": 0.7748065288003864, "grad_norm": 0.834822416305542, "learning_rate": 2.399597517120469e-06, "loss": 0.224, "step": 35025 }, { "epoch": 0.7749171364418996, "grad_norm": 0.9376032948493958, "learning_rate": 2.3973397182732403e-06, "loss": 0.2239, "step": 35030 }, { "epoch": 0.7750277440834129, "grad_norm": 1.4576457738876343, "learning_rate": 2.3950828374448685e-06, "loss": 0.211, "step": 35035 }, { "epoch": 0.7751383517249262, "grad_norm": 1.519547700881958, "learning_rate": 2.392826874907872e-06, "loss": 0.3564, "step": 35040 }, { "epoch": 0.7752489593664394, "grad_norm": 1.6491830348968506, "learning_rate": 2.390571830934656e-06, "loss": 0.2975, "step": 35045 }, { "epoch": 0.7753595670079527, "grad_norm": 0.4469330310821533, "learning_rate": 2.388317705797517e-06, "loss": 0.2585, "step": 35050 }, { "epoch": 0.775470174649466, "grad_norm": 1.2813136577606201, "learning_rate": 2.38606449976864e-06, "loss": 0.3412, "step": 35055 }, { "epoch": 0.7755807822909793, "grad_norm": 1.0879642963409424, "learning_rate": 2.383812213120099e-06, "loss": 0.2486, "step": 35060 }, { "epoch": 0.7756913899324924, "grad_norm": 1.5908942222595215, "learning_rate": 2.3815608461238572e-06, "loss": 0.4753, "step": 35065 }, { "epoch": 0.7758019975740057, "grad_norm": 1.218898057937622, "learning_rate": 2.3793103990517685e-06, "loss": 0.369, "step": 35070 }, { "epoch": 0.775912605215519, "grad_norm": 1.1669946908950806, "learning_rate": 2.3770608721755662e-06, "loss": 0.3238, "step": 35075 }, { "epoch": 0.7760232128570322, "grad_norm": 0.835780143737793, "learning_rate": 2.374812265766887e-06, "loss": 0.3966, "step": 35080 }, { "epoch": 0.7761338204985455, "grad_norm": 1.6867378950119019, "learning_rate": 2.3725645800972506e-06, "loss": 0.3347, "step": 35085 }, { "epoch": 0.7762444281400588, "grad_norm": 1.0299957990646362, "learning_rate": 2.3703178154380557e-06, "loss": 0.222, "step": 35090 }, { "epoch": 0.7763550357815721, "grad_norm": 1.531358242034912, "learning_rate": 2.368071972060607e-06, "loss": 0.3859, "step": 35095 }, { "epoch": 0.7764656434230853, "grad_norm": 1.232353925704956, "learning_rate": 2.365827050236089e-06, "loss": 0.2449, "step": 35100 }, { "epoch": 0.7765762510645986, "grad_norm": 0.7561743259429932, "learning_rate": 2.3635830502355673e-06, "loss": 0.1558, "step": 35105 }, { "epoch": 0.7766868587061118, "grad_norm": 0.4502047598361969, "learning_rate": 2.3613399723300124e-06, "loss": 0.2047, "step": 35110 }, { "epoch": 0.776797466347625, "grad_norm": 0.9430075287818909, "learning_rate": 2.359097816790272e-06, "loss": 0.2423, "step": 35115 }, { "epoch": 0.7769080739891383, "grad_norm": 1.3719667196273804, "learning_rate": 2.356856583887086e-06, "loss": 0.3581, "step": 35120 }, { "epoch": 0.7770186816306516, "grad_norm": 1.0042195320129395, "learning_rate": 2.354616273891083e-06, "loss": 0.2836, "step": 35125 }, { "epoch": 0.7771292892721648, "grad_norm": 1.3248835802078247, "learning_rate": 2.3523768870727794e-06, "loss": 0.3461, "step": 35130 }, { "epoch": 0.7772398969136781, "grad_norm": 1.7060173749923706, "learning_rate": 2.3501384237025803e-06, "loss": 0.4347, "step": 35135 }, { "epoch": 0.7773505045551914, "grad_norm": 1.0661171674728394, "learning_rate": 2.34790088405078e-06, "loss": 0.2977, "step": 35140 }, { "epoch": 0.7774611121967047, "grad_norm": 3.2903223037719727, "learning_rate": 2.3456642683875597e-06, "loss": 0.3387, "step": 35145 }, { "epoch": 0.7775717198382179, "grad_norm": 1.0651260614395142, "learning_rate": 2.34342857698299e-06, "loss": 0.2889, "step": 35150 }, { "epoch": 0.7776823274797311, "grad_norm": 0.9371194839477539, "learning_rate": 2.341193810107032e-06, "loss": 0.1833, "step": 35155 }, { "epoch": 0.7777929351212444, "grad_norm": 1.227719783782959, "learning_rate": 2.338959968029532e-06, "loss": 0.422, "step": 35160 }, { "epoch": 0.7779035427627576, "grad_norm": 0.4521711766719818, "learning_rate": 2.3367270510202246e-06, "loss": 0.2125, "step": 35165 }, { "epoch": 0.7780141504042709, "grad_norm": 0.9059541821479797, "learning_rate": 2.334495059348737e-06, "loss": 0.3139, "step": 35170 }, { "epoch": 0.7781247580457842, "grad_norm": 0.9266628623008728, "learning_rate": 2.3322639932845737e-06, "loss": 0.2676, "step": 35175 }, { "epoch": 0.7782353656872975, "grad_norm": 1.617491602897644, "learning_rate": 2.3300338530971445e-06, "loss": 0.3485, "step": 35180 }, { "epoch": 0.7783459733288107, "grad_norm": 1.8785096406936646, "learning_rate": 2.3278046390557364e-06, "loss": 0.3289, "step": 35185 }, { "epoch": 0.778456580970324, "grad_norm": 1.4907164573669434, "learning_rate": 2.3255763514295182e-06, "loss": 0.401, "step": 35190 }, { "epoch": 0.7785671886118373, "grad_norm": 1.4595733880996704, "learning_rate": 2.323348990487565e-06, "loss": 0.2335, "step": 35195 }, { "epoch": 0.7786777962533505, "grad_norm": 1.1993846893310547, "learning_rate": 2.321122556498828e-06, "loss": 0.2996, "step": 35200 }, { "epoch": 0.7787884038948637, "grad_norm": 1.1349315643310547, "learning_rate": 2.3188970497321407e-06, "loss": 0.2838, "step": 35205 }, { "epoch": 0.778899011536377, "grad_norm": 1.2794475555419922, "learning_rate": 2.3166724704562404e-06, "loss": 0.4403, "step": 35210 }, { "epoch": 0.7790096191778902, "grad_norm": 1.5582691431045532, "learning_rate": 2.314448818939745e-06, "loss": 0.3062, "step": 35215 }, { "epoch": 0.7791202268194035, "grad_norm": 0.6403608918190002, "learning_rate": 2.312226095451152e-06, "loss": 0.2993, "step": 35220 }, { "epoch": 0.7792308344609168, "grad_norm": 1.2478454113006592, "learning_rate": 2.310004300258861e-06, "loss": 0.2616, "step": 35225 }, { "epoch": 0.7793414421024301, "grad_norm": 1.1017903089523315, "learning_rate": 2.307783433631151e-06, "loss": 0.3889, "step": 35230 }, { "epoch": 0.7794520497439433, "grad_norm": 0.9458504319190979, "learning_rate": 2.305563495836193e-06, "loss": 0.2167, "step": 35235 }, { "epoch": 0.7795626573854566, "grad_norm": 0.9895420074462891, "learning_rate": 2.3033444871420395e-06, "loss": 0.3002, "step": 35240 }, { "epoch": 0.7796732650269699, "grad_norm": 1.2970203161239624, "learning_rate": 2.3011264078166393e-06, "loss": 0.4343, "step": 35245 }, { "epoch": 0.779783872668483, "grad_norm": 1.7668774127960205, "learning_rate": 2.298909258127823e-06, "loss": 0.3419, "step": 35250 }, { "epoch": 0.7798944803099963, "grad_norm": 0.9115409851074219, "learning_rate": 2.2966930383433104e-06, "loss": 0.0993, "step": 35255 }, { "epoch": 0.7800050879515096, "grad_norm": 1.0923945903778076, "learning_rate": 2.2944777487307103e-06, "loss": 0.2837, "step": 35260 }, { "epoch": 0.7801156955930229, "grad_norm": 0.8639640212059021, "learning_rate": 2.2922633895575176e-06, "loss": 0.3288, "step": 35265 }, { "epoch": 0.7802263032345361, "grad_norm": 1.646309494972229, "learning_rate": 2.2900499610911153e-06, "loss": 0.3506, "step": 35270 }, { "epoch": 0.7803369108760494, "grad_norm": 0.8962993025779724, "learning_rate": 2.2878374635987756e-06, "loss": 0.3934, "step": 35275 }, { "epoch": 0.7804475185175627, "grad_norm": 1.8401780128479004, "learning_rate": 2.285625897347654e-06, "loss": 0.2985, "step": 35280 }, { "epoch": 0.7805581261590759, "grad_norm": 1.4188438653945923, "learning_rate": 2.2834152626048024e-06, "loss": 0.3421, "step": 35285 }, { "epoch": 0.7806687338005892, "grad_norm": 0.7108331322669983, "learning_rate": 2.281205559637144e-06, "loss": 0.2906, "step": 35290 }, { "epoch": 0.7807793414421025, "grad_norm": 1.2657583951950073, "learning_rate": 2.2789967887115084e-06, "loss": 0.2627, "step": 35295 }, { "epoch": 0.7808899490836156, "grad_norm": 0.8933263421058655, "learning_rate": 2.276788950094605e-06, "loss": 0.3193, "step": 35300 }, { "epoch": 0.7810005567251289, "grad_norm": 1.2518389225006104, "learning_rate": 2.27458204405302e-06, "loss": 0.2719, "step": 35305 }, { "epoch": 0.7811111643666422, "grad_norm": 1.342299461364746, "learning_rate": 2.272376070853245e-06, "loss": 0.3215, "step": 35310 }, { "epoch": 0.7812217720081555, "grad_norm": 1.255752444267273, "learning_rate": 2.270171030761652e-06, "loss": 0.2923, "step": 35315 }, { "epoch": 0.7813323796496687, "grad_norm": 0.9308016896247864, "learning_rate": 2.267966924044489e-06, "loss": 0.3732, "step": 35320 }, { "epoch": 0.781442987291182, "grad_norm": 0.6976242065429688, "learning_rate": 2.2657637509679096e-06, "loss": 0.3354, "step": 35325 }, { "epoch": 0.7815535949326953, "grad_norm": 0.9795805811882019, "learning_rate": 2.263561511797945e-06, "loss": 0.3913, "step": 35330 }, { "epoch": 0.7816642025742085, "grad_norm": 2.573806047439575, "learning_rate": 2.2613602068005134e-06, "loss": 0.4275, "step": 35335 }, { "epoch": 0.7817748102157218, "grad_norm": 1.0661356449127197, "learning_rate": 2.2591598362414225e-06, "loss": 0.2812, "step": 35340 }, { "epoch": 0.781885417857235, "grad_norm": 1.1341675519943237, "learning_rate": 2.2569604003863665e-06, "loss": 0.2197, "step": 35345 }, { "epoch": 0.7819960254987482, "grad_norm": 1.0931614637374878, "learning_rate": 2.254761899500926e-06, "loss": 0.2393, "step": 35350 }, { "epoch": 0.7821066331402615, "grad_norm": 1.189681887626648, "learning_rate": 2.252564333850569e-06, "loss": 0.3626, "step": 35355 }, { "epoch": 0.7822172407817748, "grad_norm": 1.3568129539489746, "learning_rate": 2.250367703700651e-06, "loss": 0.3115, "step": 35360 }, { "epoch": 0.7823278484232881, "grad_norm": 1.0066173076629639, "learning_rate": 2.248172009316415e-06, "loss": 0.2614, "step": 35365 }, { "epoch": 0.7824384560648013, "grad_norm": 1.2429389953613281, "learning_rate": 2.2459772509629897e-06, "loss": 0.4192, "step": 35370 }, { "epoch": 0.7825490637063146, "grad_norm": 0.8068668842315674, "learning_rate": 2.2437834289053927e-06, "loss": 0.213, "step": 35375 }, { "epoch": 0.7826596713478279, "grad_norm": 0.8396861553192139, "learning_rate": 2.241590543408526e-06, "loss": 0.2665, "step": 35380 }, { "epoch": 0.7827702789893412, "grad_norm": 1.573319435119629, "learning_rate": 2.2393985947371832e-06, "loss": 0.2829, "step": 35385 }, { "epoch": 0.7828808866308544, "grad_norm": 1.7589287757873535, "learning_rate": 2.2372075831560337e-06, "loss": 0.435, "step": 35390 }, { "epoch": 0.7829914942723676, "grad_norm": 1.263524055480957, "learning_rate": 2.235017508929649e-06, "loss": 0.3147, "step": 35395 }, { "epoch": 0.7831021019138809, "grad_norm": 0.4649301767349243, "learning_rate": 2.2328283723224786e-06, "loss": 0.2149, "step": 35400 }, { "epoch": 0.7832127095553941, "grad_norm": 0.7679256200790405, "learning_rate": 2.2306401735988546e-06, "loss": 0.2762, "step": 35405 }, { "epoch": 0.7833233171969074, "grad_norm": 1.7419872283935547, "learning_rate": 2.228452913023008e-06, "loss": 0.3287, "step": 35410 }, { "epoch": 0.7834339248384207, "grad_norm": 0.8960385322570801, "learning_rate": 2.2262665908590497e-06, "loss": 0.3371, "step": 35415 }, { "epoch": 0.7835445324799339, "grad_norm": 1.2910865545272827, "learning_rate": 2.2240812073709697e-06, "loss": 0.4331, "step": 35420 }, { "epoch": 0.7836551401214472, "grad_norm": 0.7594606876373291, "learning_rate": 2.2218967628226606e-06, "loss": 0.2839, "step": 35425 }, { "epoch": 0.7837657477629605, "grad_norm": 1.001189112663269, "learning_rate": 2.2197132574778924e-06, "loss": 0.2787, "step": 35430 }, { "epoch": 0.7838763554044738, "grad_norm": 1.116193413734436, "learning_rate": 2.217530691600316e-06, "loss": 0.437, "step": 35435 }, { "epoch": 0.7839869630459869, "grad_norm": 1.045792818069458, "learning_rate": 2.2153490654534826e-06, "loss": 0.331, "step": 35440 }, { "epoch": 0.7840975706875002, "grad_norm": 1.2363274097442627, "learning_rate": 2.213168379300821e-06, "loss": 0.46, "step": 35445 }, { "epoch": 0.7842081783290135, "grad_norm": 1.2559332847595215, "learning_rate": 2.2109886334056473e-06, "loss": 0.3574, "step": 35450 }, { "epoch": 0.7843187859705267, "grad_norm": 0.6218835711479187, "learning_rate": 2.2088098280311666e-06, "loss": 0.1842, "step": 35455 }, { "epoch": 0.78442939361204, "grad_norm": 0.9620422720909119, "learning_rate": 2.2066319634404675e-06, "loss": 0.3327, "step": 35460 }, { "epoch": 0.7845400012535533, "grad_norm": 1.4673104286193848, "learning_rate": 2.204455039896528e-06, "loss": 0.3181, "step": 35465 }, { "epoch": 0.7846506088950665, "grad_norm": 1.5568357706069946, "learning_rate": 2.202279057662209e-06, "loss": 0.2466, "step": 35470 }, { "epoch": 0.7847612165365798, "grad_norm": 0.9093013405799866, "learning_rate": 2.200104017000261e-06, "loss": 0.2977, "step": 35475 }, { "epoch": 0.7848718241780931, "grad_norm": 0.8230237364768982, "learning_rate": 2.1979299181733195e-06, "loss": 0.3297, "step": 35480 }, { "epoch": 0.7849824318196064, "grad_norm": 1.4643590450286865, "learning_rate": 2.195756761443906e-06, "loss": 0.3327, "step": 35485 }, { "epoch": 0.7850930394611195, "grad_norm": 0.7719398140907288, "learning_rate": 2.1935845470744275e-06, "loss": 0.3908, "step": 35490 }, { "epoch": 0.7852036471026328, "grad_norm": 0.9984272122383118, "learning_rate": 2.19141327532718e-06, "loss": 0.4566, "step": 35495 }, { "epoch": 0.7853142547441461, "grad_norm": 1.556013584136963, "learning_rate": 2.1892429464643463e-06, "loss": 0.3751, "step": 35500 }, { "epoch": 0.7854248623856593, "grad_norm": 0.8390544652938843, "learning_rate": 2.187073560747983e-06, "loss": 0.2268, "step": 35505 }, { "epoch": 0.7855354700271726, "grad_norm": 1.1651936769485474, "learning_rate": 2.184905118440053e-06, "loss": 0.3353, "step": 35510 }, { "epoch": 0.7856460776686859, "grad_norm": 1.3499013185501099, "learning_rate": 2.182737619802393e-06, "loss": 0.3169, "step": 35515 }, { "epoch": 0.7857566853101992, "grad_norm": 1.5052307844161987, "learning_rate": 2.1805710650967217e-06, "loss": 0.386, "step": 35520 }, { "epoch": 0.7858672929517124, "grad_norm": 1.7195613384246826, "learning_rate": 2.178405454584658e-06, "loss": 0.3839, "step": 35525 }, { "epoch": 0.7859779005932257, "grad_norm": 0.9497186541557312, "learning_rate": 2.176240788527697e-06, "loss": 0.2463, "step": 35530 }, { "epoch": 0.7860885082347389, "grad_norm": 1.3049354553222656, "learning_rate": 2.1740770671872138e-06, "loss": 0.4943, "step": 35535 }, { "epoch": 0.7861991158762521, "grad_norm": 0.6466731429100037, "learning_rate": 2.171914290824486e-06, "loss": 0.2785, "step": 35540 }, { "epoch": 0.7863097235177654, "grad_norm": 0.5285360217094421, "learning_rate": 2.169752459700668e-06, "loss": 0.3101, "step": 35545 }, { "epoch": 0.7864203311592787, "grad_norm": 1.2248562574386597, "learning_rate": 2.1675915740767915e-06, "loss": 0.3541, "step": 35550 }, { "epoch": 0.786530938800792, "grad_norm": 0.7828044891357422, "learning_rate": 2.1654316342137914e-06, "loss": 0.2312, "step": 35555 }, { "epoch": 0.7866415464423052, "grad_norm": 1.2779771089553833, "learning_rate": 2.1632726403724768e-06, "loss": 0.3916, "step": 35560 }, { "epoch": 0.7867521540838185, "grad_norm": 1.252539873123169, "learning_rate": 2.161114592813546e-06, "loss": 0.3542, "step": 35565 }, { "epoch": 0.7868627617253318, "grad_norm": 0.8373769521713257, "learning_rate": 2.1589574917975807e-06, "loss": 0.3245, "step": 35570 }, { "epoch": 0.786973369366845, "grad_norm": 0.2857923209667206, "learning_rate": 2.156801337585054e-06, "loss": 0.4485, "step": 35575 }, { "epoch": 0.7870839770083583, "grad_norm": 0.7184885740280151, "learning_rate": 2.1546461304363163e-06, "loss": 0.4072, "step": 35580 }, { "epoch": 0.7871945846498715, "grad_norm": 1.5922722816467285, "learning_rate": 2.1524918706116117e-06, "loss": 0.2468, "step": 35585 }, { "epoch": 0.7873051922913847, "grad_norm": 1.3092366456985474, "learning_rate": 2.1503385583710655e-06, "loss": 0.2106, "step": 35590 }, { "epoch": 0.787415799932898, "grad_norm": 1.0998241901397705, "learning_rate": 2.1481861939746883e-06, "loss": 0.2346, "step": 35595 }, { "epoch": 0.7875264075744113, "grad_norm": 1.0226490497589111, "learning_rate": 2.146034777682381e-06, "loss": 0.3288, "step": 35600 }, { "epoch": 0.7876370152159246, "grad_norm": 1.021026372909546, "learning_rate": 2.1438843097539187e-06, "loss": 0.3373, "step": 35605 }, { "epoch": 0.7877476228574378, "grad_norm": 1.2669882774353027, "learning_rate": 2.141734790448977e-06, "loss": 0.37, "step": 35610 }, { "epoch": 0.7878582304989511, "grad_norm": 0.6038181185722351, "learning_rate": 2.13958622002711e-06, "loss": 0.3947, "step": 35615 }, { "epoch": 0.7879688381404644, "grad_norm": 2.183593273162842, "learning_rate": 2.1374385987477498e-06, "loss": 0.26, "step": 35620 }, { "epoch": 0.7880794457819776, "grad_norm": 0.9516775012016296, "learning_rate": 2.1352919268702275e-06, "loss": 0.368, "step": 35625 }, { "epoch": 0.7881900534234908, "grad_norm": 0.691406786441803, "learning_rate": 2.1331462046537543e-06, "loss": 0.2124, "step": 35630 }, { "epoch": 0.7883006610650041, "grad_norm": 0.9466923475265503, "learning_rate": 2.131001432357416e-06, "loss": 0.3998, "step": 35635 }, { "epoch": 0.7884112687065173, "grad_norm": 0.561081051826477, "learning_rate": 2.1288576102402027e-06, "loss": 0.1705, "step": 35640 }, { "epoch": 0.7885218763480306, "grad_norm": 1.4619882106781006, "learning_rate": 2.1267147385609787e-06, "loss": 0.4626, "step": 35645 }, { "epoch": 0.7886324839895439, "grad_norm": 0.8871880173683167, "learning_rate": 2.1245728175784895e-06, "loss": 0.2658, "step": 35650 }, { "epoch": 0.7887430916310572, "grad_norm": 0.7832179069519043, "learning_rate": 2.122431847551377e-06, "loss": 0.2163, "step": 35655 }, { "epoch": 0.7888536992725704, "grad_norm": 1.6511633396148682, "learning_rate": 2.120291828738161e-06, "loss": 0.3868, "step": 35660 }, { "epoch": 0.7889643069140837, "grad_norm": 0.702101469039917, "learning_rate": 2.1181527613972487e-06, "loss": 0.3128, "step": 35665 }, { "epoch": 0.789074914555597, "grad_norm": 1.2856297492980957, "learning_rate": 2.1160146457869303e-06, "loss": 0.2708, "step": 35670 }, { "epoch": 0.7891855221971102, "grad_norm": 0.8425899147987366, "learning_rate": 2.113877482165384e-06, "loss": 0.3346, "step": 35675 }, { "epoch": 0.7892961298386234, "grad_norm": 1.3152488470077515, "learning_rate": 2.1117412707906706e-06, "loss": 0.5573, "step": 35680 }, { "epoch": 0.7894067374801367, "grad_norm": 1.0319533348083496, "learning_rate": 2.1096060119207384e-06, "loss": 0.3982, "step": 35685 }, { "epoch": 0.78951734512165, "grad_norm": 0.9184229373931885, "learning_rate": 2.1074717058134175e-06, "loss": 0.2658, "step": 35690 }, { "epoch": 0.7896279527631632, "grad_norm": 1.160240888595581, "learning_rate": 2.105338352726425e-06, "loss": 0.4144, "step": 35695 }, { "epoch": 0.7897385604046765, "grad_norm": 0.9808903336524963, "learning_rate": 2.1032059529173664e-06, "loss": 0.3421, "step": 35700 }, { "epoch": 0.7898491680461898, "grad_norm": 1.6243668794631958, "learning_rate": 2.1010745066437198e-06, "loss": 0.5078, "step": 35705 }, { "epoch": 0.789959775687703, "grad_norm": 1.1698012351989746, "learning_rate": 2.0989440141628647e-06, "loss": 0.3428, "step": 35710 }, { "epoch": 0.7900703833292163, "grad_norm": 1.117740273475647, "learning_rate": 2.0968144757320564e-06, "loss": 0.448, "step": 35715 }, { "epoch": 0.7901809909707296, "grad_norm": 0.9802262187004089, "learning_rate": 2.094685891608429e-06, "loss": 0.4096, "step": 35720 }, { "epoch": 0.7902915986122427, "grad_norm": 1.5629796981811523, "learning_rate": 2.092558262049017e-06, "loss": 0.342, "step": 35725 }, { "epoch": 0.790402206253756, "grad_norm": 1.4697296619415283, "learning_rate": 2.0904315873107296e-06, "loss": 0.353, "step": 35730 }, { "epoch": 0.7905128138952693, "grad_norm": 1.938949465751648, "learning_rate": 2.0883058676503555e-06, "loss": 0.4365, "step": 35735 }, { "epoch": 0.7906234215367826, "grad_norm": 1.444346308708191, "learning_rate": 2.0861811033245815e-06, "loss": 0.2224, "step": 35740 }, { "epoch": 0.7907340291782958, "grad_norm": 1.1155579090118408, "learning_rate": 2.0840572945899715e-06, "loss": 0.3018, "step": 35745 }, { "epoch": 0.7908446368198091, "grad_norm": 0.9304134249687195, "learning_rate": 2.08193444170297e-06, "loss": 0.3995, "step": 35750 }, { "epoch": 0.7909552444613224, "grad_norm": 1.176045536994934, "learning_rate": 2.079812544919915e-06, "loss": 0.3096, "step": 35755 }, { "epoch": 0.7910658521028356, "grad_norm": 0.6681042909622192, "learning_rate": 2.0776916044970276e-06, "loss": 0.2463, "step": 35760 }, { "epoch": 0.7911764597443489, "grad_norm": 0.7916162014007568, "learning_rate": 2.075571620690401e-06, "loss": 0.2764, "step": 35765 }, { "epoch": 0.7912870673858622, "grad_norm": 1.3735694885253906, "learning_rate": 2.073452593756031e-06, "loss": 0.376, "step": 35770 }, { "epoch": 0.7913976750273753, "grad_norm": 1.678075909614563, "learning_rate": 2.0713345239497886e-06, "loss": 0.3591, "step": 35775 }, { "epoch": 0.7915082826688886, "grad_norm": 1.3819197416305542, "learning_rate": 2.0692174115274264e-06, "loss": 0.3448, "step": 35780 }, { "epoch": 0.7916188903104019, "grad_norm": 1.2135785818099976, "learning_rate": 2.0671012567445894e-06, "loss": 0.299, "step": 35785 }, { "epoch": 0.7917294979519152, "grad_norm": 1.6985267400741577, "learning_rate": 2.0649860598567985e-06, "loss": 0.3, "step": 35790 }, { "epoch": 0.7918401055934284, "grad_norm": 1.259717345237732, "learning_rate": 2.0628718211194663e-06, "loss": 0.3422, "step": 35795 }, { "epoch": 0.7919507132349417, "grad_norm": 1.4324923753738403, "learning_rate": 2.060758540787884e-06, "loss": 0.2307, "step": 35800 }, { "epoch": 0.792061320876455, "grad_norm": 2.0011937618255615, "learning_rate": 2.058646219117232e-06, "loss": 0.312, "step": 35805 }, { "epoch": 0.7921719285179682, "grad_norm": 0.9827954769134521, "learning_rate": 2.05653485636257e-06, "loss": 0.3602, "step": 35810 }, { "epoch": 0.7922825361594815, "grad_norm": 1.5453717708587646, "learning_rate": 2.054424452778848e-06, "loss": 0.3241, "step": 35815 }, { "epoch": 0.7923931438009947, "grad_norm": 1.043522834777832, "learning_rate": 2.05231500862089e-06, "loss": 0.4468, "step": 35820 }, { "epoch": 0.792503751442508, "grad_norm": 0.8324028849601746, "learning_rate": 2.0502065241434165e-06, "loss": 0.2391, "step": 35825 }, { "epoch": 0.7926143590840212, "grad_norm": 0.9133661389350891, "learning_rate": 2.048098999601028e-06, "loss": 0.2527, "step": 35830 }, { "epoch": 0.7927249667255345, "grad_norm": 1.2363780736923218, "learning_rate": 2.0459924352481986e-06, "loss": 0.3093, "step": 35835 }, { "epoch": 0.7928355743670478, "grad_norm": 1.3292477130889893, "learning_rate": 2.0438868313393034e-06, "loss": 0.297, "step": 35840 }, { "epoch": 0.792946182008561, "grad_norm": 0.9619921445846558, "learning_rate": 2.041782188128594e-06, "loss": 0.3796, "step": 35845 }, { "epoch": 0.7930567896500743, "grad_norm": 0.8667595386505127, "learning_rate": 2.0396785058701963e-06, "loss": 0.3491, "step": 35850 }, { "epoch": 0.7931673972915876, "grad_norm": 3.6403205394744873, "learning_rate": 2.0375757848181377e-06, "loss": 0.3604, "step": 35855 }, { "epoch": 0.7932780049331009, "grad_norm": 0.8526704907417297, "learning_rate": 2.0354740252263216e-06, "loss": 0.3284, "step": 35860 }, { "epoch": 0.793388612574614, "grad_norm": 0.9508678317070007, "learning_rate": 2.0333732273485272e-06, "loss": 0.2202, "step": 35865 }, { "epoch": 0.7934992202161273, "grad_norm": 0.8899331092834473, "learning_rate": 2.031273391438432e-06, "loss": 0.3033, "step": 35870 }, { "epoch": 0.7936098278576406, "grad_norm": 0.8689668774604797, "learning_rate": 2.029174517749588e-06, "loss": 0.2972, "step": 35875 }, { "epoch": 0.7937204354991538, "grad_norm": 1.0709614753723145, "learning_rate": 2.027076606535434e-06, "loss": 0.3499, "step": 35880 }, { "epoch": 0.7938310431406671, "grad_norm": 0.5723768472671509, "learning_rate": 2.024979658049292e-06, "loss": 0.4671, "step": 35885 }, { "epoch": 0.7939416507821804, "grad_norm": 1.1038118600845337, "learning_rate": 2.022883672544368e-06, "loss": 0.3727, "step": 35890 }, { "epoch": 0.7940522584236936, "grad_norm": 1.5923871994018555, "learning_rate": 2.0207886502737516e-06, "loss": 0.3856, "step": 35895 }, { "epoch": 0.7941628660652069, "grad_norm": 0.7789576053619385, "learning_rate": 2.0186945914904166e-06, "loss": 0.1283, "step": 35900 }, { "epoch": 0.7942734737067202, "grad_norm": 1.2911386489868164, "learning_rate": 2.016601496447218e-06, "loss": 0.2571, "step": 35905 }, { "epoch": 0.7943840813482335, "grad_norm": 0.698254406452179, "learning_rate": 2.0145093653968984e-06, "loss": 0.3805, "step": 35910 }, { "epoch": 0.7944946889897466, "grad_norm": 0.7086236476898193, "learning_rate": 2.0124181985920842e-06, "loss": 0.3122, "step": 35915 }, { "epoch": 0.7946052966312599, "grad_norm": 1.6644924879074097, "learning_rate": 2.010327996285275e-06, "loss": 0.3047, "step": 35920 }, { "epoch": 0.7947159042727732, "grad_norm": 0.534925639629364, "learning_rate": 2.0082387587288688e-06, "loss": 0.4088, "step": 35925 }, { "epoch": 0.7948265119142864, "grad_norm": 1.3796855211257935, "learning_rate": 2.0061504861751424e-06, "loss": 0.2057, "step": 35930 }, { "epoch": 0.7949371195557997, "grad_norm": 1.439736008644104, "learning_rate": 2.0040631788762455e-06, "loss": 0.3819, "step": 35935 }, { "epoch": 0.795047727197313, "grad_norm": 2.051797389984131, "learning_rate": 2.0019768370842275e-06, "loss": 0.3062, "step": 35940 }, { "epoch": 0.7951583348388263, "grad_norm": 1.1474614143371582, "learning_rate": 1.999891461051012e-06, "loss": 0.4064, "step": 35945 }, { "epoch": 0.7952689424803395, "grad_norm": 1.2740130424499512, "learning_rate": 1.997807051028403e-06, "loss": 0.2378, "step": 35950 }, { "epoch": 0.7953795501218528, "grad_norm": 1.0591868162155151, "learning_rate": 1.9957236072680974e-06, "loss": 0.3206, "step": 35955 }, { "epoch": 0.795490157763366, "grad_norm": 1.8185561895370483, "learning_rate": 1.993641130021672e-06, "loss": 0.4718, "step": 35960 }, { "epoch": 0.7956007654048792, "grad_norm": 0.9021329283714294, "learning_rate": 1.991559619540575e-06, "loss": 0.3593, "step": 35965 }, { "epoch": 0.7957113730463925, "grad_norm": 0.7378580570220947, "learning_rate": 1.9894790760761594e-06, "loss": 0.2593, "step": 35970 }, { "epoch": 0.7958219806879058, "grad_norm": 1.095841884613037, "learning_rate": 1.9873994998796486e-06, "loss": 0.2574, "step": 35975 }, { "epoch": 0.795932588329419, "grad_norm": 0.89662104845047, "learning_rate": 1.9853208912021414e-06, "loss": 0.2892, "step": 35980 }, { "epoch": 0.7960431959709323, "grad_norm": 1.0087693929672241, "learning_rate": 1.98324325029464e-06, "loss": 0.1562, "step": 35985 }, { "epoch": 0.7961538036124456, "grad_norm": 0.9010312557220459, "learning_rate": 1.9811665774080136e-06, "loss": 0.4432, "step": 35990 }, { "epoch": 0.7962644112539589, "grad_norm": 1.0510119199752808, "learning_rate": 1.9790908727930212e-06, "loss": 0.2409, "step": 35995 }, { "epoch": 0.7963750188954721, "grad_norm": 0.6760706901550293, "learning_rate": 1.977016136700304e-06, "loss": 0.305, "step": 36000 }, { "epoch": 0.7964856265369854, "grad_norm": 1.4893991947174072, "learning_rate": 1.9749423693803836e-06, "loss": 0.3906, "step": 36005 }, { "epoch": 0.7965962341784986, "grad_norm": 3.062582015991211, "learning_rate": 1.97286957108367e-06, "loss": 0.3326, "step": 36010 }, { "epoch": 0.7967068418200118, "grad_norm": 1.4551842212677002, "learning_rate": 1.9707977420604497e-06, "loss": 0.2425, "step": 36015 }, { "epoch": 0.7968174494615251, "grad_norm": 1.1514668464660645, "learning_rate": 1.968726882560896e-06, "loss": 0.2738, "step": 36020 }, { "epoch": 0.7969280571030384, "grad_norm": 0.8953185081481934, "learning_rate": 1.9666569928350665e-06, "loss": 0.379, "step": 36025 }, { "epoch": 0.7970386647445517, "grad_norm": 0.4874640703201294, "learning_rate": 1.9645880731329016e-06, "loss": 0.2319, "step": 36030 }, { "epoch": 0.7971492723860649, "grad_norm": 2.5622408390045166, "learning_rate": 1.9625201237042136e-06, "loss": 0.2797, "step": 36035 }, { "epoch": 0.7972598800275782, "grad_norm": 0.6202597618103027, "learning_rate": 1.960453144798715e-06, "loss": 0.2805, "step": 36040 }, { "epoch": 0.7973704876690915, "grad_norm": 0.7763904333114624, "learning_rate": 1.9583871366659933e-06, "loss": 0.4502, "step": 36045 }, { "epoch": 0.7974810953106047, "grad_norm": 1.0981351137161255, "learning_rate": 1.956322099555512e-06, "loss": 0.4478, "step": 36050 }, { "epoch": 0.7975917029521179, "grad_norm": 1.4653434753417969, "learning_rate": 1.954258033716628e-06, "loss": 0.308, "step": 36055 }, { "epoch": 0.7977023105936312, "grad_norm": 1.1559674739837646, "learning_rate": 1.9521949393985795e-06, "loss": 0.5582, "step": 36060 }, { "epoch": 0.7978129182351444, "grad_norm": 1.3485933542251587, "learning_rate": 1.9501328168504764e-06, "loss": 0.2785, "step": 36065 }, { "epoch": 0.7979235258766577, "grad_norm": 0.8976491093635559, "learning_rate": 1.948071666321325e-06, "loss": 0.3677, "step": 36070 }, { "epoch": 0.798034133518171, "grad_norm": 2.7732183933258057, "learning_rate": 1.9460114880600113e-06, "loss": 0.3539, "step": 36075 }, { "epoch": 0.7981447411596843, "grad_norm": 1.287609577178955, "learning_rate": 1.943952282315292e-06, "loss": 0.4305, "step": 36080 }, { "epoch": 0.7982553488011975, "grad_norm": 2.5940957069396973, "learning_rate": 1.941894049335824e-06, "loss": 0.4512, "step": 36085 }, { "epoch": 0.7983659564427108, "grad_norm": 0.9202138781547546, "learning_rate": 1.9398367893701364e-06, "loss": 0.3774, "step": 36090 }, { "epoch": 0.7984765640842241, "grad_norm": 0.45179444551467896, "learning_rate": 1.9377805026666375e-06, "loss": 0.1555, "step": 36095 }, { "epoch": 0.7985871717257373, "grad_norm": 0.8537367582321167, "learning_rate": 1.9357251894736296e-06, "loss": 0.3999, "step": 36100 }, { "epoch": 0.7986977793672505, "grad_norm": 1.5498861074447632, "learning_rate": 1.9336708500392885e-06, "loss": 0.2579, "step": 36105 }, { "epoch": 0.7988083870087638, "grad_norm": 0.844349205493927, "learning_rate": 1.9316174846116765e-06, "loss": 0.3231, "step": 36110 }, { "epoch": 0.798918994650277, "grad_norm": 1.3517640829086304, "learning_rate": 1.9295650934387343e-06, "loss": 0.3465, "step": 36115 }, { "epoch": 0.7990296022917903, "grad_norm": 1.0882056951522827, "learning_rate": 1.92751367676829e-06, "loss": 0.1928, "step": 36120 }, { "epoch": 0.7991402099333036, "grad_norm": 1.6526319980621338, "learning_rate": 1.9254632348480497e-06, "loss": 0.3761, "step": 36125 }, { "epoch": 0.7992508175748169, "grad_norm": 0.6883251070976257, "learning_rate": 1.9234137679256073e-06, "loss": 0.2769, "step": 36130 }, { "epoch": 0.7993614252163301, "grad_norm": 0.8765877485275269, "learning_rate": 1.921365276248427e-06, "loss": 0.291, "step": 36135 }, { "epoch": 0.7994720328578434, "grad_norm": 1.1483502388000488, "learning_rate": 1.9193177600638714e-06, "loss": 0.4196, "step": 36140 }, { "epoch": 0.7995826404993567, "grad_norm": 1.4199984073638916, "learning_rate": 1.917271219619178e-06, "loss": 0.3965, "step": 36145 }, { "epoch": 0.7996932481408698, "grad_norm": 0.8774084448814392, "learning_rate": 1.9152256551614566e-06, "loss": 0.3347, "step": 36150 }, { "epoch": 0.7998038557823831, "grad_norm": 1.4453095197677612, "learning_rate": 1.9131810669377183e-06, "loss": 0.2799, "step": 36155 }, { "epoch": 0.7999144634238964, "grad_norm": 1.4520684480667114, "learning_rate": 1.911137455194845e-06, "loss": 0.3254, "step": 36160 }, { "epoch": 0.8000250710654097, "grad_norm": 1.0256216526031494, "learning_rate": 1.9090948201795946e-06, "loss": 0.3142, "step": 36165 }, { "epoch": 0.8001356787069229, "grad_norm": 0.6614605188369751, "learning_rate": 1.9070531621386236e-06, "loss": 0.2918, "step": 36170 }, { "epoch": 0.8002462863484362, "grad_norm": 1.3089652061462402, "learning_rate": 1.90501248131846e-06, "loss": 0.2562, "step": 36175 }, { "epoch": 0.8003568939899495, "grad_norm": 1.1596823930740356, "learning_rate": 1.9029727779655084e-06, "loss": 0.3238, "step": 36180 }, { "epoch": 0.8004675016314627, "grad_norm": 1.3809070587158203, "learning_rate": 1.90093405232607e-06, "loss": 0.367, "step": 36185 }, { "epoch": 0.800578109272976, "grad_norm": 1.095514178276062, "learning_rate": 1.8988963046463205e-06, "loss": 0.2857, "step": 36190 }, { "epoch": 0.8006887169144893, "grad_norm": 0.7784490585327148, "learning_rate": 1.8968595351723096e-06, "loss": 0.3934, "step": 36195 }, { "epoch": 0.8007993245560024, "grad_norm": 1.5467044115066528, "learning_rate": 1.894823744149984e-06, "loss": 0.4349, "step": 36200 }, { "epoch": 0.8009099321975157, "grad_norm": 1.375597357749939, "learning_rate": 1.8927889318251614e-06, "loss": 0.2911, "step": 36205 }, { "epoch": 0.801020539839029, "grad_norm": 1.9106675386428833, "learning_rate": 1.8907550984435464e-06, "loss": 0.2627, "step": 36210 }, { "epoch": 0.8011311474805423, "grad_norm": 1.3594774007797241, "learning_rate": 1.8887222442507224e-06, "loss": 0.4772, "step": 36215 }, { "epoch": 0.8012417551220555, "grad_norm": 1.6327229738235474, "learning_rate": 1.8866903694921568e-06, "loss": 0.2766, "step": 36220 }, { "epoch": 0.8013523627635688, "grad_norm": 2.4022815227508545, "learning_rate": 1.8846594744131974e-06, "loss": 0.4013, "step": 36225 }, { "epoch": 0.8014629704050821, "grad_norm": 0.9033165574073792, "learning_rate": 1.882629559259075e-06, "loss": 0.3971, "step": 36230 }, { "epoch": 0.8015735780465953, "grad_norm": 1.3311059474945068, "learning_rate": 1.8806006242748997e-06, "loss": 0.3426, "step": 36235 }, { "epoch": 0.8016841856881086, "grad_norm": 1.637945532798767, "learning_rate": 1.878572669705666e-06, "loss": 0.3425, "step": 36240 }, { "epoch": 0.8017947933296218, "grad_norm": 1.1467639207839966, "learning_rate": 1.8765456957962514e-06, "loss": 0.3768, "step": 36245 }, { "epoch": 0.801905400971135, "grad_norm": 0.9506725668907166, "learning_rate": 1.8745197027914031e-06, "loss": 0.3641, "step": 36250 }, { "epoch": 0.8020160086126483, "grad_norm": 1.4616222381591797, "learning_rate": 1.8724946909357688e-06, "loss": 0.3939, "step": 36255 }, { "epoch": 0.8021266162541616, "grad_norm": 1.2729132175445557, "learning_rate": 1.8704706604738664e-06, "loss": 0.4185, "step": 36260 }, { "epoch": 0.8022372238956749, "grad_norm": 1.5481929779052734, "learning_rate": 1.8684476116500904e-06, "loss": 0.3106, "step": 36265 }, { "epoch": 0.8023478315371881, "grad_norm": 1.8443098068237305, "learning_rate": 1.8664255447087308e-06, "loss": 0.2721, "step": 36270 }, { "epoch": 0.8024584391787014, "grad_norm": 1.3960691690444946, "learning_rate": 1.8644044598939514e-06, "loss": 0.2663, "step": 36275 }, { "epoch": 0.8025690468202147, "grad_norm": 0.5517547726631165, "learning_rate": 1.8623843574497902e-06, "loss": 0.3396, "step": 36280 }, { "epoch": 0.802679654461728, "grad_norm": 0.7732178568840027, "learning_rate": 1.8603652376201808e-06, "loss": 0.3716, "step": 36285 }, { "epoch": 0.8027902621032412, "grad_norm": 0.8493314385414124, "learning_rate": 1.8583471006489317e-06, "loss": 0.2924, "step": 36290 }, { "epoch": 0.8029008697447544, "grad_norm": 1.7482168674468994, "learning_rate": 1.8563299467797247e-06, "loss": 0.3095, "step": 36295 }, { "epoch": 0.8030114773862677, "grad_norm": 1.2729772329330444, "learning_rate": 1.854313776256138e-06, "loss": 0.3179, "step": 36300 }, { "epoch": 0.8031220850277809, "grad_norm": 1.4645589590072632, "learning_rate": 1.8522985893216238e-06, "loss": 0.2282, "step": 36305 }, { "epoch": 0.8032326926692942, "grad_norm": 1.2693201303482056, "learning_rate": 1.8502843862195086e-06, "loss": 0.3297, "step": 36310 }, { "epoch": 0.8033433003108075, "grad_norm": 0.9904146194458008, "learning_rate": 1.848271167193012e-06, "loss": 0.2474, "step": 36315 }, { "epoch": 0.8034539079523207, "grad_norm": 0.8511794209480286, "learning_rate": 1.8462589324852297e-06, "loss": 0.3572, "step": 36320 }, { "epoch": 0.803564515593834, "grad_norm": 0.9059597253799438, "learning_rate": 1.8442476823391363e-06, "loss": 0.2529, "step": 36325 }, { "epoch": 0.8036751232353473, "grad_norm": 0.8651410341262817, "learning_rate": 1.842237416997591e-06, "loss": 0.3197, "step": 36330 }, { "epoch": 0.8037857308768606, "grad_norm": 0.7501698136329651, "learning_rate": 1.8402281367033314e-06, "loss": 0.3364, "step": 36335 }, { "epoch": 0.8038963385183737, "grad_norm": 1.2838897705078125, "learning_rate": 1.8382198416989783e-06, "loss": 0.2676, "step": 36340 }, { "epoch": 0.804006946159887, "grad_norm": 1.1418684720993042, "learning_rate": 1.8362125322270364e-06, "loss": 0.4136, "step": 36345 }, { "epoch": 0.8041175538014003, "grad_norm": 1.6908243894577026, "learning_rate": 1.8342062085298774e-06, "loss": 0.3774, "step": 36350 }, { "epoch": 0.8042281614429135, "grad_norm": 1.2743167877197266, "learning_rate": 1.8322008708497741e-06, "loss": 0.3236, "step": 36355 }, { "epoch": 0.8043387690844268, "grad_norm": 0.9945348501205444, "learning_rate": 1.8301965194288696e-06, "loss": 0.3884, "step": 36360 }, { "epoch": 0.8044493767259401, "grad_norm": 1.9313808679580688, "learning_rate": 1.8281931545091814e-06, "loss": 0.3632, "step": 36365 }, { "epoch": 0.8045599843674534, "grad_norm": 1.3951451778411865, "learning_rate": 1.8261907763326213e-06, "loss": 0.3561, "step": 36370 }, { "epoch": 0.8046705920089666, "grad_norm": 1.3875420093536377, "learning_rate": 1.8241893851409787e-06, "loss": 0.2667, "step": 36375 }, { "epoch": 0.8047811996504799, "grad_norm": 0.8739458322525024, "learning_rate": 1.8221889811759108e-06, "loss": 0.3736, "step": 36380 }, { "epoch": 0.8048918072919932, "grad_norm": 0.6474717259407043, "learning_rate": 1.8201895646789746e-06, "loss": 0.3292, "step": 36385 }, { "epoch": 0.8050024149335063, "grad_norm": 1.3746451139450073, "learning_rate": 1.8181911358915983e-06, "loss": 0.2538, "step": 36390 }, { "epoch": 0.8051130225750196, "grad_norm": 1.029513955116272, "learning_rate": 1.816193695055085e-06, "loss": 0.3873, "step": 36395 }, { "epoch": 0.8052236302165329, "grad_norm": 1.1261651515960693, "learning_rate": 1.8141972424106313e-06, "loss": 0.2005, "step": 36400 }, { "epoch": 0.8053342378580461, "grad_norm": 1.426323652267456, "learning_rate": 1.8122017781993095e-06, "loss": 0.3555, "step": 36405 }, { "epoch": 0.8054448454995594, "grad_norm": 0.7438443899154663, "learning_rate": 1.8102073026620637e-06, "loss": 0.3812, "step": 36410 }, { "epoch": 0.8055554531410727, "grad_norm": 0.6938494443893433, "learning_rate": 1.8082138160397322e-06, "loss": 0.2142, "step": 36415 }, { "epoch": 0.805666060782586, "grad_norm": 1.8262379169464111, "learning_rate": 1.8062213185730271e-06, "loss": 0.2132, "step": 36420 }, { "epoch": 0.8057766684240992, "grad_norm": 0.8335972428321838, "learning_rate": 1.8042298105025402e-06, "loss": 0.3284, "step": 36425 }, { "epoch": 0.8058872760656125, "grad_norm": 0.9962776303291321, "learning_rate": 1.802239292068747e-06, "loss": 0.2909, "step": 36430 }, { "epoch": 0.8059978837071257, "grad_norm": 1.125218391418457, "learning_rate": 1.800249763512002e-06, "loss": 0.3177, "step": 36435 }, { "epoch": 0.8061084913486389, "grad_norm": 1.3463155031204224, "learning_rate": 1.7982612250725385e-06, "loss": 0.3784, "step": 36440 }, { "epoch": 0.8062190989901522, "grad_norm": 1.133934736251831, "learning_rate": 1.7962736769904732e-06, "loss": 0.3249, "step": 36445 }, { "epoch": 0.8063297066316655, "grad_norm": 0.5875454545021057, "learning_rate": 1.794287119505802e-06, "loss": 0.305, "step": 36450 }, { "epoch": 0.8064403142731787, "grad_norm": 1.0977751016616821, "learning_rate": 1.7923015528584009e-06, "loss": 0.3653, "step": 36455 }, { "epoch": 0.806550921914692, "grad_norm": 1.3757123947143555, "learning_rate": 1.7903169772880292e-06, "loss": 0.2782, "step": 36460 }, { "epoch": 0.8066615295562053, "grad_norm": 1.3732744455337524, "learning_rate": 1.788333393034315e-06, "loss": 0.3881, "step": 36465 }, { "epoch": 0.8067721371977186, "grad_norm": 1.3941594362258911, "learning_rate": 1.7863508003367835e-06, "loss": 0.2237, "step": 36470 }, { "epoch": 0.8068827448392318, "grad_norm": 0.6324568390846252, "learning_rate": 1.7843691994348332e-06, "loss": 0.2428, "step": 36475 }, { "epoch": 0.8069933524807451, "grad_norm": 0.5939915180206299, "learning_rate": 1.782388590567733e-06, "loss": 0.3715, "step": 36480 }, { "epoch": 0.8071039601222583, "grad_norm": 1.023573637008667, "learning_rate": 1.7804089739746499e-06, "loss": 0.3629, "step": 36485 }, { "epoch": 0.8072145677637715, "grad_norm": 1.1408734321594238, "learning_rate": 1.778430349894621e-06, "loss": 0.2989, "step": 36490 }, { "epoch": 0.8073251754052848, "grad_norm": 1.4591400623321533, "learning_rate": 1.7764527185665558e-06, "loss": 0.4118, "step": 36495 }, { "epoch": 0.8074357830467981, "grad_norm": 0.7188566327095032, "learning_rate": 1.7744760802292626e-06, "loss": 0.4072, "step": 36500 }, { "epoch": 0.8075463906883114, "grad_norm": 0.8665531873703003, "learning_rate": 1.7725004351214181e-06, "loss": 0.3818, "step": 36505 }, { "epoch": 0.8076569983298246, "grad_norm": 1.187096357345581, "learning_rate": 1.7705257834815737e-06, "loss": 0.3358, "step": 36510 }, { "epoch": 0.8077676059713379, "grad_norm": 1.7606068849563599, "learning_rate": 1.7685521255481764e-06, "loss": 0.1923, "step": 36515 }, { "epoch": 0.8078782136128512, "grad_norm": 2.1930553913116455, "learning_rate": 1.7665794615595433e-06, "loss": 0.2296, "step": 36520 }, { "epoch": 0.8079888212543644, "grad_norm": 1.0190850496292114, "learning_rate": 1.764607791753866e-06, "loss": 0.311, "step": 36525 }, { "epoch": 0.8080994288958776, "grad_norm": 1.275171160697937, "learning_rate": 1.7626371163692312e-06, "loss": 0.3334, "step": 36530 }, { "epoch": 0.8082100365373909, "grad_norm": 1.7711129188537598, "learning_rate": 1.760667435643595e-06, "loss": 0.2521, "step": 36535 }, { "epoch": 0.8083206441789041, "grad_norm": 1.1847856044769287, "learning_rate": 1.7586987498147945e-06, "loss": 0.2954, "step": 36540 }, { "epoch": 0.8084312518204174, "grad_norm": 1.806368112564087, "learning_rate": 1.7567310591205478e-06, "loss": 0.3295, "step": 36545 }, { "epoch": 0.8085418594619307, "grad_norm": 0.9322842955589294, "learning_rate": 1.7547643637984547e-06, "loss": 0.3555, "step": 36550 }, { "epoch": 0.808652467103444, "grad_norm": 1.3087131977081299, "learning_rate": 1.7527986640859918e-06, "loss": 0.3256, "step": 36555 }, { "epoch": 0.8087630747449572, "grad_norm": 1.2587621212005615, "learning_rate": 1.7508339602205182e-06, "loss": 0.2854, "step": 36560 }, { "epoch": 0.8088736823864705, "grad_norm": 1.3634763956069946, "learning_rate": 1.7488702524392664e-06, "loss": 0.3397, "step": 36565 }, { "epoch": 0.8089842900279838, "grad_norm": 0.9698140025138855, "learning_rate": 1.746907540979359e-06, "loss": 0.3878, "step": 36570 }, { "epoch": 0.809094897669497, "grad_norm": 0.4625055491924286, "learning_rate": 1.744945826077793e-06, "loss": 0.1882, "step": 36575 }, { "epoch": 0.8092055053110102, "grad_norm": 1.097153902053833, "learning_rate": 1.7429851079714387e-06, "loss": 0.274, "step": 36580 }, { "epoch": 0.8093161129525235, "grad_norm": 1.3545457124710083, "learning_rate": 1.7410253868970584e-06, "loss": 0.2588, "step": 36585 }, { "epoch": 0.8094267205940368, "grad_norm": 1.2951046228408813, "learning_rate": 1.7390666630912878e-06, "loss": 0.2995, "step": 36590 }, { "epoch": 0.80953732823555, "grad_norm": 0.6812365055084229, "learning_rate": 1.7371089367906346e-06, "loss": 0.366, "step": 36595 }, { "epoch": 0.8096479358770633, "grad_norm": 1.556166172027588, "learning_rate": 1.7351522082315032e-06, "loss": 0.2314, "step": 36600 }, { "epoch": 0.8097585435185766, "grad_norm": 0.6969812512397766, "learning_rate": 1.7331964776501654e-06, "loss": 0.3195, "step": 36605 }, { "epoch": 0.8098691511600898, "grad_norm": 1.0436381101608276, "learning_rate": 1.731241745282768e-06, "loss": 0.4023, "step": 36610 }, { "epoch": 0.8099797588016031, "grad_norm": 1.0947827100753784, "learning_rate": 1.7292880113653521e-06, "loss": 0.3418, "step": 36615 }, { "epoch": 0.8100903664431164, "grad_norm": 1.339685320854187, "learning_rate": 1.7273352761338314e-06, "loss": 0.2686, "step": 36620 }, { "epoch": 0.8102009740846295, "grad_norm": 1.2731664180755615, "learning_rate": 1.7253835398239904e-06, "loss": 0.4787, "step": 36625 }, { "epoch": 0.8103115817261428, "grad_norm": 0.897048830986023, "learning_rate": 1.7234328026715064e-06, "loss": 0.2233, "step": 36630 }, { "epoch": 0.8104221893676561, "grad_norm": 0.6914821267127991, "learning_rate": 1.721483064911932e-06, "loss": 0.266, "step": 36635 }, { "epoch": 0.8105327970091694, "grad_norm": 1.6391164064407349, "learning_rate": 1.7195343267806897e-06, "loss": 0.3848, "step": 36640 }, { "epoch": 0.8106434046506826, "grad_norm": 0.6270391345024109, "learning_rate": 1.717586588513096e-06, "loss": 0.2264, "step": 36645 }, { "epoch": 0.8107540122921959, "grad_norm": 1.3145238161087036, "learning_rate": 1.7156398503443372e-06, "loss": 0.4422, "step": 36650 }, { "epoch": 0.8108646199337092, "grad_norm": 1.2198150157928467, "learning_rate": 1.7136941125094819e-06, "loss": 0.3431, "step": 36655 }, { "epoch": 0.8109752275752224, "grad_norm": 1.12309730052948, "learning_rate": 1.7117493752434776e-06, "loss": 0.3522, "step": 36660 }, { "epoch": 0.8110858352167357, "grad_norm": 1.8557051420211792, "learning_rate": 1.709805638781149e-06, "loss": 0.2852, "step": 36665 }, { "epoch": 0.8111964428582489, "grad_norm": 0.8407782912254333, "learning_rate": 1.7078629033572035e-06, "loss": 0.2957, "step": 36670 }, { "epoch": 0.8113070504997622, "grad_norm": 1.6708011627197266, "learning_rate": 1.7059211692062272e-06, "loss": 0.4033, "step": 36675 }, { "epoch": 0.8114176581412754, "grad_norm": 0.9940638542175293, "learning_rate": 1.7039804365626767e-06, "loss": 0.3826, "step": 36680 }, { "epoch": 0.8115282657827887, "grad_norm": 0.4605700671672821, "learning_rate": 1.7020407056609023e-06, "loss": 0.4533, "step": 36685 }, { "epoch": 0.811638873424302, "grad_norm": 1.6845078468322754, "learning_rate": 1.7001019767351268e-06, "loss": 0.3519, "step": 36690 }, { "epoch": 0.8117494810658152, "grad_norm": 1.3334523439407349, "learning_rate": 1.6981642500194428e-06, "loss": 0.3368, "step": 36695 }, { "epoch": 0.8118600887073285, "grad_norm": 1.2682945728302002, "learning_rate": 1.6962275257478367e-06, "loss": 0.3508, "step": 36700 }, { "epoch": 0.8119706963488418, "grad_norm": 1.3400764465332031, "learning_rate": 1.694291804154169e-06, "loss": 0.2431, "step": 36705 }, { "epoch": 0.812081303990355, "grad_norm": 0.5321619510650635, "learning_rate": 1.6923570854721694e-06, "loss": 0.177, "step": 36710 }, { "epoch": 0.8121919116318683, "grad_norm": 0.8841099739074707, "learning_rate": 1.6904233699354622e-06, "loss": 0.2089, "step": 36715 }, { "epoch": 0.8123025192733815, "grad_norm": 1.9163023233413696, "learning_rate": 1.688490657777543e-06, "loss": 0.4568, "step": 36720 }, { "epoch": 0.8124131269148948, "grad_norm": 1.2285579442977905, "learning_rate": 1.686558949231778e-06, "loss": 0.2187, "step": 36725 }, { "epoch": 0.812523734556408, "grad_norm": 1.013174057006836, "learning_rate": 1.6846282445314288e-06, "loss": 0.3623, "step": 36730 }, { "epoch": 0.8126343421979213, "grad_norm": 1.7946144342422485, "learning_rate": 1.6826985439096267e-06, "loss": 0.3851, "step": 36735 }, { "epoch": 0.8127449498394346, "grad_norm": 0.9246466755867004, "learning_rate": 1.680769847599375e-06, "loss": 0.2161, "step": 36740 }, { "epoch": 0.8128555574809478, "grad_norm": 0.934665322303772, "learning_rate": 1.6788421558335722e-06, "loss": 0.2478, "step": 36745 }, { "epoch": 0.8129661651224611, "grad_norm": 1.2506481409072876, "learning_rate": 1.6769154688449818e-06, "loss": 0.4134, "step": 36750 }, { "epoch": 0.8130767727639744, "grad_norm": 1.0225328207015991, "learning_rate": 1.6749897868662513e-06, "loss": 0.2889, "step": 36755 }, { "epoch": 0.8131873804054877, "grad_norm": 1.0322644710540771, "learning_rate": 1.6730651101299067e-06, "loss": 0.3773, "step": 36760 }, { "epoch": 0.8132979880470008, "grad_norm": 0.9408885836601257, "learning_rate": 1.671141438868351e-06, "loss": 0.2793, "step": 36765 }, { "epoch": 0.8134085956885141, "grad_norm": 1.1971311569213867, "learning_rate": 1.6692187733138688e-06, "loss": 0.5147, "step": 36770 }, { "epoch": 0.8135192033300274, "grad_norm": 1.4517498016357422, "learning_rate": 1.6672971136986227e-06, "loss": 0.2322, "step": 36775 }, { "epoch": 0.8136298109715406, "grad_norm": 0.9351434111595154, "learning_rate": 1.6653764602546451e-06, "loss": 0.306, "step": 36780 }, { "epoch": 0.8137404186130539, "grad_norm": 1.1641823053359985, "learning_rate": 1.6634568132138618e-06, "loss": 0.3806, "step": 36785 }, { "epoch": 0.8138510262545672, "grad_norm": 1.1805866956710815, "learning_rate": 1.6615381728080693e-06, "loss": 0.2355, "step": 36790 }, { "epoch": 0.8139616338960804, "grad_norm": 1.5157771110534668, "learning_rate": 1.6596205392689357e-06, "loss": 0.2789, "step": 36795 }, { "epoch": 0.8140722415375937, "grad_norm": 1.3419547080993652, "learning_rate": 1.6577039128280226e-06, "loss": 0.4775, "step": 36800 }, { "epoch": 0.814182849179107, "grad_norm": 1.1123204231262207, "learning_rate": 1.6557882937167613e-06, "loss": 0.3177, "step": 36805 }, { "epoch": 0.8142934568206203, "grad_norm": 1.2474812269210815, "learning_rate": 1.6538736821664547e-06, "loss": 0.3282, "step": 36810 }, { "epoch": 0.8144040644621334, "grad_norm": 1.6004377603530884, "learning_rate": 1.6519600784083e-06, "loss": 0.5134, "step": 36815 }, { "epoch": 0.8145146721036467, "grad_norm": 1.285545825958252, "learning_rate": 1.6500474826733626e-06, "loss": 0.203, "step": 36820 }, { "epoch": 0.81462527974516, "grad_norm": 1.068978190422058, "learning_rate": 1.6481358951925819e-06, "loss": 0.3479, "step": 36825 }, { "epoch": 0.8147358873866732, "grad_norm": 0.9569427371025085, "learning_rate": 1.646225316196788e-06, "loss": 0.2859, "step": 36830 }, { "epoch": 0.8148464950281865, "grad_norm": 1.5791813135147095, "learning_rate": 1.6443157459166835e-06, "loss": 0.1916, "step": 36835 }, { "epoch": 0.8149571026696998, "grad_norm": 1.4803475141525269, "learning_rate": 1.6424071845828404e-06, "loss": 0.3669, "step": 36840 }, { "epoch": 0.8150677103112131, "grad_norm": 0.8321998119354248, "learning_rate": 1.640499632425725e-06, "loss": 0.39, "step": 36845 }, { "epoch": 0.8151783179527263, "grad_norm": 1.2489871978759766, "learning_rate": 1.638593089675672e-06, "loss": 0.3956, "step": 36850 }, { "epoch": 0.8152889255942396, "grad_norm": 0.9138400554656982, "learning_rate": 1.6366875565628903e-06, "loss": 0.3074, "step": 36855 }, { "epoch": 0.8153995332357528, "grad_norm": 3.358586072921753, "learning_rate": 1.6347830333174785e-06, "loss": 0.2788, "step": 36860 }, { "epoch": 0.815510140877266, "grad_norm": 0.7919889092445374, "learning_rate": 1.6328795201694058e-06, "loss": 0.3748, "step": 36865 }, { "epoch": 0.8156207485187793, "grad_norm": 0.49176540970802307, "learning_rate": 1.6309770173485207e-06, "loss": 0.2808, "step": 36870 }, { "epoch": 0.8157313561602926, "grad_norm": 1.074565052986145, "learning_rate": 1.6290755250845481e-06, "loss": 0.3757, "step": 36875 }, { "epoch": 0.8158419638018058, "grad_norm": 0.7852692008018494, "learning_rate": 1.6271750436070955e-06, "loss": 0.2749, "step": 36880 }, { "epoch": 0.8159525714433191, "grad_norm": 2.5998408794403076, "learning_rate": 1.6252755731456426e-06, "loss": 0.4674, "step": 36885 }, { "epoch": 0.8160631790848324, "grad_norm": 1.1189708709716797, "learning_rate": 1.623377113929554e-06, "loss": 0.5018, "step": 36890 }, { "epoch": 0.8161737867263457, "grad_norm": 1.262642741203308, "learning_rate": 1.6214796661880606e-06, "loss": 0.3187, "step": 36895 }, { "epoch": 0.8162843943678589, "grad_norm": 0.9315586686134338, "learning_rate": 1.6195832301502855e-06, "loss": 0.2925, "step": 36900 }, { "epoch": 0.8163950020093722, "grad_norm": 1.1269171237945557, "learning_rate": 1.6176878060452229e-06, "loss": 0.3249, "step": 36905 }, { "epoch": 0.8165056096508854, "grad_norm": 1.0408470630645752, "learning_rate": 1.6157933941017367e-06, "loss": 0.2921, "step": 36910 }, { "epoch": 0.8166162172923986, "grad_norm": 1.8921618461608887, "learning_rate": 1.6138999945485857e-06, "loss": 0.2824, "step": 36915 }, { "epoch": 0.8167268249339119, "grad_norm": 1.2150325775146484, "learning_rate": 1.6120076076143953e-06, "loss": 0.175, "step": 36920 }, { "epoch": 0.8168374325754252, "grad_norm": 0.6099507212638855, "learning_rate": 1.6101162335276644e-06, "loss": 0.2658, "step": 36925 }, { "epoch": 0.8169480402169385, "grad_norm": 0.832771360874176, "learning_rate": 1.6082258725167832e-06, "loss": 0.2252, "step": 36930 }, { "epoch": 0.8170586478584517, "grad_norm": 2.1596381664276123, "learning_rate": 1.6063365248100116e-06, "loss": 0.3478, "step": 36935 }, { "epoch": 0.817169255499965, "grad_norm": 1.0220543146133423, "learning_rate": 1.6044481906354803e-06, "loss": 0.4634, "step": 36940 }, { "epoch": 0.8172798631414783, "grad_norm": 0.9164916276931763, "learning_rate": 1.6025608702212137e-06, "loss": 0.2206, "step": 36945 }, { "epoch": 0.8173904707829915, "grad_norm": 1.0295686721801758, "learning_rate": 1.6006745637951038e-06, "loss": 0.2769, "step": 36950 }, { "epoch": 0.8175010784245047, "grad_norm": 0.9690259099006653, "learning_rate": 1.5987892715849152e-06, "loss": 0.21, "step": 36955 }, { "epoch": 0.817611686066018, "grad_norm": 0.9153972268104553, "learning_rate": 1.5969049938183023e-06, "loss": 0.3839, "step": 36960 }, { "epoch": 0.8177222937075312, "grad_norm": 1.6377546787261963, "learning_rate": 1.59502173072279e-06, "loss": 0.3495, "step": 36965 }, { "epoch": 0.8178329013490445, "grad_norm": 1.6517924070358276, "learning_rate": 1.5931394825257806e-06, "loss": 0.3884, "step": 36970 }, { "epoch": 0.8179435089905578, "grad_norm": 0.7960008978843689, "learning_rate": 1.591258249454557e-06, "loss": 0.2254, "step": 36975 }, { "epoch": 0.8180541166320711, "grad_norm": 1.3665556907653809, "learning_rate": 1.589378031736275e-06, "loss": 0.3536, "step": 36980 }, { "epoch": 0.8181647242735843, "grad_norm": 1.5504158735275269, "learning_rate": 1.5874988295979709e-06, "loss": 0.3667, "step": 36985 }, { "epoch": 0.8182753319150976, "grad_norm": 1.1344703435897827, "learning_rate": 1.585620643266561e-06, "loss": 0.4733, "step": 36990 }, { "epoch": 0.8183859395566109, "grad_norm": 0.8433324098587036, "learning_rate": 1.5837434729688284e-06, "loss": 0.3008, "step": 36995 }, { "epoch": 0.8184965471981241, "grad_norm": 0.8804543614387512, "learning_rate": 1.5818673189314481e-06, "loss": 0.4336, "step": 37000 }, { "epoch": 0.8186071548396373, "grad_norm": 0.5499826669692993, "learning_rate": 1.5799921813809637e-06, "loss": 0.3303, "step": 37005 }, { "epoch": 0.8187177624811506, "grad_norm": 1.170064091682434, "learning_rate": 1.5781180605437918e-06, "loss": 0.407, "step": 37010 }, { "epoch": 0.8188283701226639, "grad_norm": 0.5542752146720886, "learning_rate": 1.576244956646239e-06, "loss": 0.294, "step": 37015 }, { "epoch": 0.8189389777641771, "grad_norm": 1.2850329875946045, "learning_rate": 1.5743728699144822e-06, "loss": 0.4651, "step": 37020 }, { "epoch": 0.8190495854056904, "grad_norm": 0.9574486613273621, "learning_rate": 1.5725018005745663e-06, "loss": 0.3568, "step": 37025 }, { "epoch": 0.8191601930472037, "grad_norm": 0.8559391498565674, "learning_rate": 1.5706317488524313e-06, "loss": 0.3382, "step": 37030 }, { "epoch": 0.8192708006887169, "grad_norm": 0.8640496134757996, "learning_rate": 1.5687627149738848e-06, "loss": 0.2172, "step": 37035 }, { "epoch": 0.8193814083302302, "grad_norm": 0.9261873960494995, "learning_rate": 1.5668946991646062e-06, "loss": 0.2906, "step": 37040 }, { "epoch": 0.8194920159717435, "grad_norm": 1.1680121421813965, "learning_rate": 1.5650277016501624e-06, "loss": 0.3385, "step": 37045 }, { "epoch": 0.8196026236132566, "grad_norm": 1.22238290309906, "learning_rate": 1.5631617226559948e-06, "loss": 0.392, "step": 37050 }, { "epoch": 0.8197132312547699, "grad_norm": 0.7059459090232849, "learning_rate": 1.5612967624074115e-06, "loss": 0.2396, "step": 37055 }, { "epoch": 0.8198238388962832, "grad_norm": 1.2748816013336182, "learning_rate": 1.5594328211296128e-06, "loss": 0.4237, "step": 37060 }, { "epoch": 0.8199344465377965, "grad_norm": 1.4355300664901733, "learning_rate": 1.5575698990476707e-06, "loss": 0.3699, "step": 37065 }, { "epoch": 0.8200450541793097, "grad_norm": 1.1055264472961426, "learning_rate": 1.555707996386524e-06, "loss": 0.3481, "step": 37070 }, { "epoch": 0.820155661820823, "grad_norm": 1.1955267190933228, "learning_rate": 1.5538471133710042e-06, "loss": 0.3233, "step": 37075 }, { "epoch": 0.8202662694623363, "grad_norm": 1.0501214265823364, "learning_rate": 1.5519872502258103e-06, "loss": 0.395, "step": 37080 }, { "epoch": 0.8203768771038495, "grad_norm": 1.5754998922348022, "learning_rate": 1.5501284071755196e-06, "loss": 0.3952, "step": 37085 }, { "epoch": 0.8204874847453628, "grad_norm": 0.8216390013694763, "learning_rate": 1.5482705844445878e-06, "loss": 0.1625, "step": 37090 }, { "epoch": 0.8205980923868761, "grad_norm": 0.7978630065917969, "learning_rate": 1.5464137822573455e-06, "loss": 0.4288, "step": 37095 }, { "epoch": 0.8207087000283892, "grad_norm": 0.6150808334350586, "learning_rate": 1.5445580008380011e-06, "loss": 0.2567, "step": 37100 }, { "epoch": 0.8208193076699025, "grad_norm": 1.3937824964523315, "learning_rate": 1.5427032404106424e-06, "loss": 0.3721, "step": 37105 }, { "epoch": 0.8209299153114158, "grad_norm": 1.4890795946121216, "learning_rate": 1.540849501199223e-06, "loss": 0.293, "step": 37110 }, { "epoch": 0.8210405229529291, "grad_norm": 1.7053383588790894, "learning_rate": 1.53899678342759e-06, "loss": 0.3069, "step": 37115 }, { "epoch": 0.8211511305944423, "grad_norm": 1.4334365129470825, "learning_rate": 1.5371450873194583e-06, "loss": 0.3473, "step": 37120 }, { "epoch": 0.8212617382359556, "grad_norm": 1.948474645614624, "learning_rate": 1.5352944130984105e-06, "loss": 0.2787, "step": 37125 }, { "epoch": 0.8213723458774689, "grad_norm": 1.2498098611831665, "learning_rate": 1.5334447609879243e-06, "loss": 0.3348, "step": 37130 }, { "epoch": 0.8214829535189822, "grad_norm": 1.8249495029449463, "learning_rate": 1.5315961312113448e-06, "loss": 0.4385, "step": 37135 }, { "epoch": 0.8215935611604954, "grad_norm": 2.2895216941833496, "learning_rate": 1.5297485239918841e-06, "loss": 0.4048, "step": 37140 }, { "epoch": 0.8217041688020086, "grad_norm": 0.9564852714538574, "learning_rate": 1.5279019395526485e-06, "loss": 0.267, "step": 37145 }, { "epoch": 0.8218147764435219, "grad_norm": 0.9959598183631897, "learning_rate": 1.5260563781166138e-06, "loss": 0.2169, "step": 37150 }, { "epoch": 0.8219253840850351, "grad_norm": 0.975394070148468, "learning_rate": 1.524211839906622e-06, "loss": 0.3055, "step": 37155 }, { "epoch": 0.8220359917265484, "grad_norm": 1.4811346530914307, "learning_rate": 1.522368325145408e-06, "loss": 0.3827, "step": 37160 }, { "epoch": 0.8221465993680617, "grad_norm": 0.9865260124206543, "learning_rate": 1.520525834055575e-06, "loss": 0.3516, "step": 37165 }, { "epoch": 0.8222572070095749, "grad_norm": 0.8856674432754517, "learning_rate": 1.5186843668595985e-06, "loss": 0.3229, "step": 37170 }, { "epoch": 0.8223678146510882, "grad_norm": 2.0540027618408203, "learning_rate": 1.5168439237798404e-06, "loss": 0.3235, "step": 37175 }, { "epoch": 0.8224784222926015, "grad_norm": 1.4171706438064575, "learning_rate": 1.515004505038533e-06, "loss": 0.3843, "step": 37180 }, { "epoch": 0.8225890299341148, "grad_norm": 0.7151245474815369, "learning_rate": 1.513166110857781e-06, "loss": 0.2599, "step": 37185 }, { "epoch": 0.822699637575628, "grad_norm": 1.260927677154541, "learning_rate": 1.5113287414595745e-06, "loss": 0.3944, "step": 37190 }, { "epoch": 0.8228102452171412, "grad_norm": 2.2295563220977783, "learning_rate": 1.5094923970657738e-06, "loss": 0.283, "step": 37195 }, { "epoch": 0.8229208528586545, "grad_norm": 1.0780177116394043, "learning_rate": 1.507657077898117e-06, "loss": 0.3546, "step": 37200 }, { "epoch": 0.8230314605001677, "grad_norm": 0.7907904386520386, "learning_rate": 1.5058227841782203e-06, "loss": 0.3393, "step": 37205 }, { "epoch": 0.823142068141681, "grad_norm": 0.5254600048065186, "learning_rate": 1.5039895161275674e-06, "loss": 0.3132, "step": 37210 }, { "epoch": 0.8232526757831943, "grad_norm": 1.5604631900787354, "learning_rate": 1.5021572739675328e-06, "loss": 0.2375, "step": 37215 }, { "epoch": 0.8233632834247075, "grad_norm": 0.7991588115692139, "learning_rate": 1.5003260579193569e-06, "loss": 0.3168, "step": 37220 }, { "epoch": 0.8234738910662208, "grad_norm": 0.9896994829177856, "learning_rate": 1.498495868204154e-06, "loss": 0.2866, "step": 37225 }, { "epoch": 0.8235844987077341, "grad_norm": 1.1484614610671997, "learning_rate": 1.4966667050429228e-06, "loss": 0.23, "step": 37230 }, { "epoch": 0.8236951063492474, "grad_norm": 0.7824456691741943, "learning_rate": 1.494838568656538e-06, "loss": 0.1492, "step": 37235 }, { "epoch": 0.8238057139907605, "grad_norm": 2.0379374027252197, "learning_rate": 1.4930114592657364e-06, "loss": 0.2964, "step": 37240 }, { "epoch": 0.8239163216322738, "grad_norm": 1.2791109085083008, "learning_rate": 1.4911853770911499e-06, "loss": 0.4051, "step": 37245 }, { "epoch": 0.8240269292737871, "grad_norm": 2.4533886909484863, "learning_rate": 1.489360322353276e-06, "loss": 0.4035, "step": 37250 }, { "epoch": 0.8241375369153003, "grad_norm": 2.340471029281616, "learning_rate": 1.4875362952724825e-06, "loss": 0.3509, "step": 37255 }, { "epoch": 0.8242481445568136, "grad_norm": 1.6323729753494263, "learning_rate": 1.4857132960690279e-06, "loss": 0.387, "step": 37260 }, { "epoch": 0.8243587521983269, "grad_norm": 0.8667770028114319, "learning_rate": 1.4838913249630394e-06, "loss": 0.1922, "step": 37265 }, { "epoch": 0.8244693598398402, "grad_norm": 1.2147245407104492, "learning_rate": 1.482070382174511e-06, "loss": 0.2797, "step": 37270 }, { "epoch": 0.8245799674813534, "grad_norm": 0.9881303310394287, "learning_rate": 1.4802504679233276e-06, "loss": 0.3682, "step": 37275 }, { "epoch": 0.8246905751228667, "grad_norm": 0.8301493525505066, "learning_rate": 1.478431582429245e-06, "loss": 0.3259, "step": 37280 }, { "epoch": 0.82480118276438, "grad_norm": 0.7313327789306641, "learning_rate": 1.476613725911885e-06, "loss": 0.3531, "step": 37285 }, { "epoch": 0.8249117904058931, "grad_norm": 1.4199309349060059, "learning_rate": 1.4747968985907612e-06, "loss": 0.4746, "step": 37290 }, { "epoch": 0.8250223980474064, "grad_norm": 1.4287643432617188, "learning_rate": 1.4729811006852513e-06, "loss": 0.2132, "step": 37295 }, { "epoch": 0.8251330056889197, "grad_norm": 1.0723387002944946, "learning_rate": 1.4711663324146131e-06, "loss": 0.2625, "step": 37300 }, { "epoch": 0.825243613330433, "grad_norm": 0.5370166897773743, "learning_rate": 1.469352593997978e-06, "loss": 0.3183, "step": 37305 }, { "epoch": 0.8253542209719462, "grad_norm": 1.2194209098815918, "learning_rate": 1.4675398856543565e-06, "loss": 0.4495, "step": 37310 }, { "epoch": 0.8254648286134595, "grad_norm": 1.635666847229004, "learning_rate": 1.4657282076026313e-06, "loss": 0.4363, "step": 37315 }, { "epoch": 0.8255754362549728, "grad_norm": 1.420648455619812, "learning_rate": 1.463917560061564e-06, "loss": 0.321, "step": 37320 }, { "epoch": 0.825686043896486, "grad_norm": 1.0763375759124756, "learning_rate": 1.4621079432497842e-06, "loss": 0.3102, "step": 37325 }, { "epoch": 0.8257966515379993, "grad_norm": 1.1554843187332153, "learning_rate": 1.4602993573858071e-06, "loss": 0.1957, "step": 37330 }, { "epoch": 0.8259072591795125, "grad_norm": 1.0142401456832886, "learning_rate": 1.4584918026880214e-06, "loss": 0.2985, "step": 37335 }, { "epoch": 0.8260178668210257, "grad_norm": 1.2840828895568848, "learning_rate": 1.456685279374681e-06, "loss": 0.3222, "step": 37340 }, { "epoch": 0.826128474462539, "grad_norm": 0.8771679401397705, "learning_rate": 1.4548797876639287e-06, "loss": 0.3761, "step": 37345 }, { "epoch": 0.8262390821040523, "grad_norm": 1.2665438652038574, "learning_rate": 1.4530753277737787e-06, "loss": 0.4494, "step": 37350 }, { "epoch": 0.8263496897455656, "grad_norm": 1.3903720378875732, "learning_rate": 1.451271899922112e-06, "loss": 0.303, "step": 37355 }, { "epoch": 0.8264602973870788, "grad_norm": 2.589890956878662, "learning_rate": 1.4494695043266972e-06, "loss": 0.4203, "step": 37360 }, { "epoch": 0.8265709050285921, "grad_norm": 0.7256768941879272, "learning_rate": 1.4476681412051741e-06, "loss": 0.2979, "step": 37365 }, { "epoch": 0.8266815126701054, "grad_norm": 1.115893840789795, "learning_rate": 1.4458678107750511e-06, "loss": 0.3503, "step": 37370 }, { "epoch": 0.8267921203116186, "grad_norm": 0.9452210068702698, "learning_rate": 1.4440685132537225e-06, "loss": 0.47, "step": 37375 }, { "epoch": 0.8269027279531319, "grad_norm": 1.3119232654571533, "learning_rate": 1.4422702488584529e-06, "loss": 0.2614, "step": 37380 }, { "epoch": 0.8270133355946451, "grad_norm": 1.1555601358413696, "learning_rate": 1.4404730178063763e-06, "loss": 0.2684, "step": 37385 }, { "epoch": 0.8271239432361583, "grad_norm": 1.386064887046814, "learning_rate": 1.438676820314514e-06, "loss": 0.4686, "step": 37390 }, { "epoch": 0.8272345508776716, "grad_norm": 0.12895473837852478, "learning_rate": 1.4368816565997579e-06, "loss": 0.3311, "step": 37395 }, { "epoch": 0.8273451585191849, "grad_norm": 1.1588759422302246, "learning_rate": 1.4350875268788643e-06, "loss": 0.434, "step": 37400 }, { "epoch": 0.8274557661606982, "grad_norm": 0.7362327575683594, "learning_rate": 1.4332944313684815e-06, "loss": 0.2529, "step": 37405 }, { "epoch": 0.8275663738022114, "grad_norm": 0.9821130633354187, "learning_rate": 1.4315023702851239e-06, "loss": 0.3968, "step": 37410 }, { "epoch": 0.8276769814437247, "grad_norm": 0.2817305028438568, "learning_rate": 1.4297113438451816e-06, "loss": 0.329, "step": 37415 }, { "epoch": 0.827787589085238, "grad_norm": 1.0706433057785034, "learning_rate": 1.4279213522649215e-06, "loss": 0.2474, "step": 37420 }, { "epoch": 0.8278981967267512, "grad_norm": 1.509087085723877, "learning_rate": 1.42613239576048e-06, "loss": 0.347, "step": 37425 }, { "epoch": 0.8280088043682644, "grad_norm": 0.897740364074707, "learning_rate": 1.4243444745478775e-06, "loss": 0.2983, "step": 37430 }, { "epoch": 0.8281194120097777, "grad_norm": 0.8981934189796448, "learning_rate": 1.4225575888430077e-06, "loss": 0.4193, "step": 37435 }, { "epoch": 0.828230019651291, "grad_norm": 1.0805375576019287, "learning_rate": 1.420771738861627e-06, "loss": 0.4806, "step": 37440 }, { "epoch": 0.8283406272928042, "grad_norm": 0.9095411896705627, "learning_rate": 1.4189869248193844e-06, "loss": 0.4429, "step": 37445 }, { "epoch": 0.8284512349343175, "grad_norm": 1.5811697244644165, "learning_rate": 1.4172031469317959e-06, "loss": 0.3298, "step": 37450 }, { "epoch": 0.8285618425758308, "grad_norm": 1.3133596181869507, "learning_rate": 1.4154204054142451e-06, "loss": 0.3379, "step": 37455 }, { "epoch": 0.828672450217344, "grad_norm": 1.3628371953964233, "learning_rate": 1.413638700482003e-06, "loss": 0.2911, "step": 37460 }, { "epoch": 0.8287830578588573, "grad_norm": 0.4255225360393524, "learning_rate": 1.4118580323502117e-06, "loss": 0.2495, "step": 37465 }, { "epoch": 0.8288936655003706, "grad_norm": 1.3859026432037354, "learning_rate": 1.4100784012338787e-06, "loss": 0.4049, "step": 37470 }, { "epoch": 0.8290042731418837, "grad_norm": 0.5988132953643799, "learning_rate": 1.4082998073479003e-06, "loss": 0.2611, "step": 37475 }, { "epoch": 0.829114880783397, "grad_norm": 0.40981969237327576, "learning_rate": 1.4065222509070419e-06, "loss": 0.3505, "step": 37480 }, { "epoch": 0.8292254884249103, "grad_norm": 1.2369980812072754, "learning_rate": 1.4047457321259362e-06, "loss": 0.3117, "step": 37485 }, { "epoch": 0.8293360960664236, "grad_norm": 0.8488494753837585, "learning_rate": 1.4029702512191024e-06, "loss": 0.2994, "step": 37490 }, { "epoch": 0.8294467037079368, "grad_norm": 0.9797278046607971, "learning_rate": 1.4011958084009315e-06, "loss": 0.3148, "step": 37495 }, { "epoch": 0.8295573113494501, "grad_norm": 1.3089193105697632, "learning_rate": 1.3994224038856796e-06, "loss": 0.3693, "step": 37500 }, { "epoch": 0.8296679189909634, "grad_norm": 1.3024388551712036, "learning_rate": 1.3976500378874903e-06, "loss": 0.3467, "step": 37505 }, { "epoch": 0.8297785266324766, "grad_norm": 0.5806398987770081, "learning_rate": 1.3958787106203753e-06, "loss": 0.2759, "step": 37510 }, { "epoch": 0.8298891342739899, "grad_norm": 0.5407693386077881, "learning_rate": 1.3941084222982214e-06, "loss": 0.2688, "step": 37515 }, { "epoch": 0.8299997419155032, "grad_norm": 0.932027280330658, "learning_rate": 1.3923391731347902e-06, "loss": 0.3615, "step": 37520 }, { "epoch": 0.8301103495570163, "grad_norm": 1.0692046880722046, "learning_rate": 1.390570963343718e-06, "loss": 0.432, "step": 37525 }, { "epoch": 0.8302209571985296, "grad_norm": 1.4429740905761719, "learning_rate": 1.3888037931385156e-06, "loss": 0.4247, "step": 37530 }, { "epoch": 0.8303315648400429, "grad_norm": 1.702808141708374, "learning_rate": 1.3870376627325698e-06, "loss": 0.2794, "step": 37535 }, { "epoch": 0.8304421724815562, "grad_norm": 0.6966885924339294, "learning_rate": 1.3852725723391358e-06, "loss": 0.1935, "step": 37540 }, { "epoch": 0.8305527801230694, "grad_norm": 0.9376307129859924, "learning_rate": 1.3835085221713518e-06, "loss": 0.3222, "step": 37545 }, { "epoch": 0.8306633877645827, "grad_norm": 1.823756456375122, "learning_rate": 1.3817455124422274e-06, "loss": 0.3942, "step": 37550 }, { "epoch": 0.830773995406096, "grad_norm": 1.8305089473724365, "learning_rate": 1.379983543364639e-06, "loss": 0.4271, "step": 37555 }, { "epoch": 0.8308846030476092, "grad_norm": 1.0505198240280151, "learning_rate": 1.3782226151513512e-06, "loss": 0.3896, "step": 37560 }, { "epoch": 0.8309952106891225, "grad_norm": 0.8156790733337402, "learning_rate": 1.3764627280149946e-06, "loss": 0.1499, "step": 37565 }, { "epoch": 0.8311058183306357, "grad_norm": 1.5131795406341553, "learning_rate": 1.3747038821680668e-06, "loss": 0.2775, "step": 37570 }, { "epoch": 0.831216425972149, "grad_norm": 0.666869044303894, "learning_rate": 1.3729460778229575e-06, "loss": 0.3546, "step": 37575 }, { "epoch": 0.8313270336136622, "grad_norm": 1.4092556238174438, "learning_rate": 1.3711893151919199e-06, "loss": 0.3689, "step": 37580 }, { "epoch": 0.8314376412551755, "grad_norm": 0.8501023054122925, "learning_rate": 1.3694335944870751e-06, "loss": 0.3582, "step": 37585 }, { "epoch": 0.8315482488966888, "grad_norm": 1.1559765338897705, "learning_rate": 1.367678915920433e-06, "loss": 0.4018, "step": 37590 }, { "epoch": 0.831658856538202, "grad_norm": 0.48376840353012085, "learning_rate": 1.3659252797038702e-06, "loss": 0.2478, "step": 37595 }, { "epoch": 0.8317694641797153, "grad_norm": 0.754156768321991, "learning_rate": 1.3641726860491322e-06, "loss": 0.3559, "step": 37600 }, { "epoch": 0.8318800718212286, "grad_norm": 0.9100866317749023, "learning_rate": 1.3624211351678495e-06, "loss": 0.3175, "step": 37605 }, { "epoch": 0.8319906794627419, "grad_norm": 0.512063205242157, "learning_rate": 1.360670627271522e-06, "loss": 0.3022, "step": 37610 }, { "epoch": 0.8321012871042551, "grad_norm": 1.0239489078521729, "learning_rate": 1.358921162571516e-06, "loss": 0.3359, "step": 37615 }, { "epoch": 0.8322118947457683, "grad_norm": 0.6986147165298462, "learning_rate": 1.3571727412790857e-06, "loss": 0.4029, "step": 37620 }, { "epoch": 0.8323225023872816, "grad_norm": 1.2118239402770996, "learning_rate": 1.3554253636053505e-06, "loss": 0.2454, "step": 37625 }, { "epoch": 0.8324331100287948, "grad_norm": 1.466785192489624, "learning_rate": 1.3536790297613044e-06, "loss": 0.2767, "step": 37630 }, { "epoch": 0.8325437176703081, "grad_norm": 1.6165523529052734, "learning_rate": 1.3519337399578202e-06, "loss": 0.2519, "step": 37635 }, { "epoch": 0.8326543253118214, "grad_norm": 1.2098839282989502, "learning_rate": 1.3501894944056348e-06, "loss": 0.284, "step": 37640 }, { "epoch": 0.8327649329533346, "grad_norm": 1.0995827913284302, "learning_rate": 1.3484462933153709e-06, "loss": 0.4207, "step": 37645 }, { "epoch": 0.8328755405948479, "grad_norm": 1.0317069292068481, "learning_rate": 1.3467041368975198e-06, "loss": 0.3696, "step": 37650 }, { "epoch": 0.8329861482363612, "grad_norm": 1.237473964691162, "learning_rate": 1.3449630253624391e-06, "loss": 0.3345, "step": 37655 }, { "epoch": 0.8330967558778745, "grad_norm": 1.0994993448257446, "learning_rate": 1.343222958920375e-06, "loss": 0.4439, "step": 37660 }, { "epoch": 0.8332073635193876, "grad_norm": 1.6514662504196167, "learning_rate": 1.3414839377814405e-06, "loss": 0.3276, "step": 37665 }, { "epoch": 0.8333179711609009, "grad_norm": 1.3017380237579346, "learning_rate": 1.339745962155613e-06, "loss": 0.2958, "step": 37670 }, { "epoch": 0.8334285788024142, "grad_norm": 1.0496207475662231, "learning_rate": 1.3380090322527617e-06, "loss": 0.4185, "step": 37675 }, { "epoch": 0.8335391864439274, "grad_norm": 0.9173368215560913, "learning_rate": 1.3362731482826185e-06, "loss": 0.325, "step": 37680 }, { "epoch": 0.8336497940854407, "grad_norm": 0.8362923264503479, "learning_rate": 1.334538310454786e-06, "loss": 0.3276, "step": 37685 }, { "epoch": 0.833760401726954, "grad_norm": 1.4144128561019897, "learning_rate": 1.3328045189787509e-06, "loss": 0.377, "step": 37690 }, { "epoch": 0.8338710093684673, "grad_norm": 0.959470808506012, "learning_rate": 1.3310717740638669e-06, "loss": 0.2405, "step": 37695 }, { "epoch": 0.8339816170099805, "grad_norm": 1.310828447341919, "learning_rate": 1.3293400759193576e-06, "loss": 0.4965, "step": 37700 }, { "epoch": 0.8340922246514938, "grad_norm": 0.9797999858856201, "learning_rate": 1.3276094247543304e-06, "loss": 0.4268, "step": 37705 }, { "epoch": 0.8342028322930071, "grad_norm": 0.7229366898536682, "learning_rate": 1.3258798207777613e-06, "loss": 0.2767, "step": 37710 }, { "epoch": 0.8343134399345202, "grad_norm": 1.3697623014450073, "learning_rate": 1.3241512641984933e-06, "loss": 0.2764, "step": 37715 }, { "epoch": 0.8344240475760335, "grad_norm": 0.7804010510444641, "learning_rate": 1.322423755225255e-06, "loss": 0.2702, "step": 37720 }, { "epoch": 0.8345346552175468, "grad_norm": 0.8471924662590027, "learning_rate": 1.3206972940666418e-06, "loss": 0.3997, "step": 37725 }, { "epoch": 0.83464526285906, "grad_norm": 1.192927360534668, "learning_rate": 1.3189718809311203e-06, "loss": 0.4255, "step": 37730 }, { "epoch": 0.8347558705005733, "grad_norm": 1.0419867038726807, "learning_rate": 1.3172475160270371e-06, "loss": 0.2768, "step": 37735 }, { "epoch": 0.8348664781420866, "grad_norm": 0.6907965540885925, "learning_rate": 1.3155241995626066e-06, "loss": 0.3121, "step": 37740 }, { "epoch": 0.8349770857835999, "grad_norm": 0.9236427545547485, "learning_rate": 1.3138019317459194e-06, "loss": 0.4381, "step": 37745 }, { "epoch": 0.8350876934251131, "grad_norm": 1.424660325050354, "learning_rate": 1.31208071278494e-06, "loss": 0.4016, "step": 37750 }, { "epoch": 0.8351983010666264, "grad_norm": 0.7456119060516357, "learning_rate": 1.3103605428875e-06, "loss": 0.3985, "step": 37755 }, { "epoch": 0.8353089087081396, "grad_norm": 0.4759611189365387, "learning_rate": 1.3086414222613153e-06, "loss": 0.2831, "step": 37760 }, { "epoch": 0.8354195163496528, "grad_norm": 0.8426387906074524, "learning_rate": 1.306923351113969e-06, "loss": 0.3065, "step": 37765 }, { "epoch": 0.8355301239911661, "grad_norm": 2.191974639892578, "learning_rate": 1.3052063296529116e-06, "loss": 0.3654, "step": 37770 }, { "epoch": 0.8356407316326794, "grad_norm": 1.4406254291534424, "learning_rate": 1.3034903580854786e-06, "loss": 0.2086, "step": 37775 }, { "epoch": 0.8357513392741927, "grad_norm": 0.8426774144172668, "learning_rate": 1.301775436618874e-06, "loss": 0.15, "step": 37780 }, { "epoch": 0.8358619469157059, "grad_norm": 1.191442608833313, "learning_rate": 1.3000615654601666e-06, "loss": 0.3917, "step": 37785 }, { "epoch": 0.8359725545572192, "grad_norm": 0.5563359260559082, "learning_rate": 1.2983487448163123e-06, "loss": 0.2463, "step": 37790 }, { "epoch": 0.8360831621987325, "grad_norm": 1.2785696983337402, "learning_rate": 1.2966369748941355e-06, "loss": 0.4206, "step": 37795 }, { "epoch": 0.8361937698402457, "grad_norm": 0.5301641821861267, "learning_rate": 1.294926255900323e-06, "loss": 0.2178, "step": 37800 }, { "epoch": 0.836304377481759, "grad_norm": 1.6415220499038696, "learning_rate": 1.2932165880414515e-06, "loss": 0.4651, "step": 37805 }, { "epoch": 0.8364149851232722, "grad_norm": 0.9719944000244141, "learning_rate": 1.291507971523962e-06, "loss": 0.3323, "step": 37810 }, { "epoch": 0.8365255927647854, "grad_norm": 0.976080060005188, "learning_rate": 1.2898004065541636e-06, "loss": 0.1975, "step": 37815 }, { "epoch": 0.8366362004062987, "grad_norm": 0.860754132270813, "learning_rate": 1.2880938933382503e-06, "loss": 0.2731, "step": 37820 }, { "epoch": 0.836746808047812, "grad_norm": 1.0668364763259888, "learning_rate": 1.286388432082284e-06, "loss": 0.3638, "step": 37825 }, { "epoch": 0.8368574156893253, "grad_norm": 0.9045087695121765, "learning_rate": 1.284684022992191e-06, "loss": 0.3664, "step": 37830 }, { "epoch": 0.8369680233308385, "grad_norm": 0.7365512847900391, "learning_rate": 1.2829806662737843e-06, "loss": 0.344, "step": 37835 }, { "epoch": 0.8370786309723518, "grad_norm": 0.7658262848854065, "learning_rate": 1.281278362132743e-06, "loss": 0.2893, "step": 37840 }, { "epoch": 0.8371892386138651, "grad_norm": 1.453442931175232, "learning_rate": 1.279577110774619e-06, "loss": 0.4574, "step": 37845 }, { "epoch": 0.8372998462553783, "grad_norm": 1.005017638206482, "learning_rate": 1.2778769124048396e-06, "loss": 0.2009, "step": 37850 }, { "epoch": 0.8374104538968915, "grad_norm": 0.8012054562568665, "learning_rate": 1.276177767228698e-06, "loss": 0.3902, "step": 37855 }, { "epoch": 0.8375210615384048, "grad_norm": 1.2887071371078491, "learning_rate": 1.27447967545137e-06, "loss": 0.3711, "step": 37860 }, { "epoch": 0.837631669179918, "grad_norm": 1.1570580005645752, "learning_rate": 1.2727826372779017e-06, "loss": 0.3336, "step": 37865 }, { "epoch": 0.8377422768214313, "grad_norm": 0.8433356285095215, "learning_rate": 1.2710866529132026e-06, "loss": 0.4073, "step": 37870 }, { "epoch": 0.8378528844629446, "grad_norm": 1.5107946395874023, "learning_rate": 1.2693917225620677e-06, "loss": 0.3956, "step": 37875 }, { "epoch": 0.8379634921044579, "grad_norm": 1.2045332193374634, "learning_rate": 1.267697846429161e-06, "loss": 0.405, "step": 37880 }, { "epoch": 0.8380740997459711, "grad_norm": 1.5002849102020264, "learning_rate": 1.2660050247190103e-06, "loss": 0.314, "step": 37885 }, { "epoch": 0.8381847073874844, "grad_norm": 0.9010340571403503, "learning_rate": 1.2643132576360307e-06, "loss": 0.3733, "step": 37890 }, { "epoch": 0.8382953150289977, "grad_norm": 0.6342419385910034, "learning_rate": 1.2626225453844997e-06, "loss": 0.1412, "step": 37895 }, { "epoch": 0.838405922670511, "grad_norm": 1.4225128889083862, "learning_rate": 1.2609328881685678e-06, "loss": 0.3765, "step": 37900 }, { "epoch": 0.8385165303120241, "grad_norm": 1.2081164121627808, "learning_rate": 1.2592442861922639e-06, "loss": 0.2961, "step": 37905 }, { "epoch": 0.8386271379535374, "grad_norm": 1.0347920656204224, "learning_rate": 1.2575567396594869e-06, "loss": 0.3287, "step": 37910 }, { "epoch": 0.8387377455950507, "grad_norm": 0.8790860176086426, "learning_rate": 1.2558702487740026e-06, "loss": 0.2941, "step": 37915 }, { "epoch": 0.8388483532365639, "grad_norm": 0.9397566914558411, "learning_rate": 1.2541848137394586e-06, "loss": 0.2449, "step": 37920 }, { "epoch": 0.8389589608780772, "grad_norm": 1.0081673860549927, "learning_rate": 1.2525004347593716e-06, "loss": 0.3194, "step": 37925 }, { "epoch": 0.8390695685195905, "grad_norm": 0.7801207900047302, "learning_rate": 1.2508171120371248e-06, "loss": 0.4091, "step": 37930 }, { "epoch": 0.8391801761611037, "grad_norm": 2.000302791595459, "learning_rate": 1.2491348457759823e-06, "loss": 0.4859, "step": 37935 }, { "epoch": 0.839290783802617, "grad_norm": 1.3098490238189697, "learning_rate": 1.247453636179079e-06, "loss": 0.48, "step": 37940 }, { "epoch": 0.8394013914441303, "grad_norm": 1.732001781463623, "learning_rate": 1.245773483449415e-06, "loss": 0.3235, "step": 37945 }, { "epoch": 0.8395119990856434, "grad_norm": 1.0065943002700806, "learning_rate": 1.2440943877898726e-06, "loss": 0.3315, "step": 37950 }, { "epoch": 0.8396226067271567, "grad_norm": 0.6806138753890991, "learning_rate": 1.2424163494032005e-06, "loss": 0.2869, "step": 37955 }, { "epoch": 0.83973321436867, "grad_norm": 0.6667391657829285, "learning_rate": 1.240739368492022e-06, "loss": 0.43, "step": 37960 }, { "epoch": 0.8398438220101833, "grad_norm": 0.7923169136047363, "learning_rate": 1.2390634452588346e-06, "loss": 0.1578, "step": 37965 }, { "epoch": 0.8399544296516965, "grad_norm": 1.62882661819458, "learning_rate": 1.237388579905998e-06, "loss": 0.3692, "step": 37970 }, { "epoch": 0.8400650372932098, "grad_norm": 0.534967839717865, "learning_rate": 1.2357147726357576e-06, "loss": 0.3108, "step": 37975 }, { "epoch": 0.8401756449347231, "grad_norm": 0.9472229480743408, "learning_rate": 1.234042023650226e-06, "loss": 0.2285, "step": 37980 }, { "epoch": 0.8402862525762363, "grad_norm": 1.8341338634490967, "learning_rate": 1.2323703331513814e-06, "loss": 0.2726, "step": 37985 }, { "epoch": 0.8403968602177496, "grad_norm": 1.2155627012252808, "learning_rate": 1.230699701341085e-06, "loss": 0.2886, "step": 37990 }, { "epoch": 0.8405074678592629, "grad_norm": 0.6618109941482544, "learning_rate": 1.229030128421066e-06, "loss": 0.2438, "step": 37995 }, { "epoch": 0.840618075500776, "grad_norm": 0.8978846669197083, "learning_rate": 1.227361614592917e-06, "loss": 0.2431, "step": 38000 }, { "epoch": 0.8407286831422893, "grad_norm": 0.9433590769767761, "learning_rate": 1.2256941600581185e-06, "loss": 0.3226, "step": 38005 }, { "epoch": 0.8408392907838026, "grad_norm": 0.5628076195716858, "learning_rate": 1.2240277650180144e-06, "loss": 0.2354, "step": 38010 }, { "epoch": 0.8409498984253159, "grad_norm": 2.181107759475708, "learning_rate": 1.2223624296738157e-06, "loss": 0.358, "step": 38015 }, { "epoch": 0.8410605060668291, "grad_norm": 1.3532944917678833, "learning_rate": 1.2206981542266161e-06, "loss": 0.3836, "step": 38020 }, { "epoch": 0.8411711137083424, "grad_norm": 1.5280908346176147, "learning_rate": 1.2190349388773781e-06, "loss": 0.1477, "step": 38025 }, { "epoch": 0.8412817213498557, "grad_norm": 2.354236364364624, "learning_rate": 1.2173727838269255e-06, "loss": 0.3561, "step": 38030 }, { "epoch": 0.841392328991369, "grad_norm": 1.6431019306182861, "learning_rate": 1.2157116892759725e-06, "loss": 0.2501, "step": 38035 }, { "epoch": 0.8415029366328822, "grad_norm": 0.9193219542503357, "learning_rate": 1.2140516554250937e-06, "loss": 0.2726, "step": 38040 }, { "epoch": 0.8416135442743954, "grad_norm": 0.9254633784294128, "learning_rate": 1.2123926824747323e-06, "loss": 0.2949, "step": 38045 }, { "epoch": 0.8417241519159087, "grad_norm": 0.9661634564399719, "learning_rate": 1.2107347706252137e-06, "loss": 0.3905, "step": 38050 }, { "epoch": 0.8418347595574219, "grad_norm": 0.6256411075592041, "learning_rate": 1.20907792007673e-06, "loss": 0.4621, "step": 38055 }, { "epoch": 0.8419453671989352, "grad_norm": 1.512451171875, "learning_rate": 1.2074221310293444e-06, "loss": 0.3427, "step": 38060 }, { "epoch": 0.8420559748404485, "grad_norm": 1.6459989547729492, "learning_rate": 1.2057674036829946e-06, "loss": 0.2108, "step": 38065 }, { "epoch": 0.8421665824819617, "grad_norm": 1.443810224533081, "learning_rate": 1.2041137382374834e-06, "loss": 0.3249, "step": 38070 }, { "epoch": 0.842277190123475, "grad_norm": 1.731126308441162, "learning_rate": 1.2024611348924953e-06, "loss": 0.3674, "step": 38075 }, { "epoch": 0.8423877977649883, "grad_norm": 0.7224918603897095, "learning_rate": 1.2008095938475816e-06, "loss": 0.3552, "step": 38080 }, { "epoch": 0.8424984054065016, "grad_norm": 0.6246736645698547, "learning_rate": 1.199159115302161e-06, "loss": 0.2653, "step": 38085 }, { "epoch": 0.8426090130480148, "grad_norm": 0.9377990961074829, "learning_rate": 1.1975096994555325e-06, "loss": 0.2369, "step": 38090 }, { "epoch": 0.842719620689528, "grad_norm": 0.7029932141304016, "learning_rate": 1.1958613465068635e-06, "loss": 0.3616, "step": 38095 }, { "epoch": 0.8428302283310413, "grad_norm": 1.4917442798614502, "learning_rate": 1.1942140566551862e-06, "loss": 0.3536, "step": 38100 }, { "epoch": 0.8429408359725545, "grad_norm": 1.0546317100524902, "learning_rate": 1.1925678300994148e-06, "loss": 0.2544, "step": 38105 }, { "epoch": 0.8430514436140678, "grad_norm": 1.695128083229065, "learning_rate": 1.1909226670383323e-06, "loss": 0.2553, "step": 38110 }, { "epoch": 0.8431620512555811, "grad_norm": 1.0147682428359985, "learning_rate": 1.1892785676705855e-06, "loss": 0.1541, "step": 38115 }, { "epoch": 0.8432726588970944, "grad_norm": 1.604799509048462, "learning_rate": 1.187635532194704e-06, "loss": 0.2866, "step": 38120 }, { "epoch": 0.8433832665386076, "grad_norm": 1.2027168273925781, "learning_rate": 1.1859935608090855e-06, "loss": 0.3269, "step": 38125 }, { "epoch": 0.8434938741801209, "grad_norm": 1.5589418411254883, "learning_rate": 1.1843526537119887e-06, "loss": 0.2736, "step": 38130 }, { "epoch": 0.8436044818216342, "grad_norm": 0.7046040892601013, "learning_rate": 1.1827128111015617e-06, "loss": 0.3086, "step": 38135 }, { "epoch": 0.8437150894631473, "grad_norm": 1.005470633506775, "learning_rate": 1.181074033175813e-06, "loss": 0.386, "step": 38140 }, { "epoch": 0.8438256971046606, "grad_norm": 1.5274831056594849, "learning_rate": 1.1794363201326186e-06, "loss": 0.2738, "step": 38145 }, { "epoch": 0.8439363047461739, "grad_norm": 0.7993412017822266, "learning_rate": 1.1777996721697393e-06, "loss": 0.2632, "step": 38150 }, { "epoch": 0.8440469123876871, "grad_norm": 1.0823944807052612, "learning_rate": 1.1761640894847982e-06, "loss": 0.3226, "step": 38155 }, { "epoch": 0.8441575200292004, "grad_norm": 0.6152334809303284, "learning_rate": 1.1745295722752858e-06, "loss": 0.3856, "step": 38160 }, { "epoch": 0.8442681276707137, "grad_norm": 0.8490246534347534, "learning_rate": 1.1728961207385758e-06, "loss": 0.5064, "step": 38165 }, { "epoch": 0.844378735312227, "grad_norm": 0.7592059969902039, "learning_rate": 1.1712637350719059e-06, "loss": 0.2822, "step": 38170 }, { "epoch": 0.8444893429537402, "grad_norm": 1.2234715223312378, "learning_rate": 1.1696324154723847e-06, "loss": 0.3137, "step": 38175 }, { "epoch": 0.8445999505952535, "grad_norm": 1.4912220239639282, "learning_rate": 1.1680021621369964e-06, "loss": 0.3552, "step": 38180 }, { "epoch": 0.8447105582367667, "grad_norm": 1.2265630960464478, "learning_rate": 1.1663729752625862e-06, "loss": 0.2714, "step": 38185 }, { "epoch": 0.8448211658782799, "grad_norm": 0.949310302734375, "learning_rate": 1.1647448550458861e-06, "loss": 0.3409, "step": 38190 }, { "epoch": 0.8449317735197932, "grad_norm": 1.6800918579101562, "learning_rate": 1.1631178016834887e-06, "loss": 0.3622, "step": 38195 }, { "epoch": 0.8450423811613065, "grad_norm": 0.4655797481536865, "learning_rate": 1.1614918153718569e-06, "loss": 0.2608, "step": 38200 }, { "epoch": 0.8451529888028197, "grad_norm": 1.298718810081482, "learning_rate": 1.1598668963073312e-06, "loss": 0.2905, "step": 38205 }, { "epoch": 0.845263596444333, "grad_norm": 1.1197125911712646, "learning_rate": 1.1582430446861226e-06, "loss": 0.2765, "step": 38210 }, { "epoch": 0.8453742040858463, "grad_norm": 0.8575302362442017, "learning_rate": 1.1566202607043031e-06, "loss": 0.3151, "step": 38215 }, { "epoch": 0.8454848117273596, "grad_norm": 1.4285908937454224, "learning_rate": 1.1549985445578294e-06, "loss": 0.189, "step": 38220 }, { "epoch": 0.8455954193688728, "grad_norm": 0.5749222636222839, "learning_rate": 1.1533778964425246e-06, "loss": 0.2904, "step": 38225 }, { "epoch": 0.8457060270103861, "grad_norm": 0.8609181046485901, "learning_rate": 1.1517583165540736e-06, "loss": 0.3244, "step": 38230 }, { "epoch": 0.8458166346518993, "grad_norm": 1.0341261625289917, "learning_rate": 1.1501398050880464e-06, "loss": 0.2742, "step": 38235 }, { "epoch": 0.8459272422934125, "grad_norm": 1.154068946838379, "learning_rate": 1.14852236223988e-06, "loss": 0.4552, "step": 38240 }, { "epoch": 0.8460378499349258, "grad_norm": 1.3925788402557373, "learning_rate": 1.1469059882048717e-06, "loss": 0.2586, "step": 38245 }, { "epoch": 0.8461484575764391, "grad_norm": 1.4936860799789429, "learning_rate": 1.1452906831782052e-06, "loss": 0.2737, "step": 38250 }, { "epoch": 0.8462590652179524, "grad_norm": 0.9646159410476685, "learning_rate": 1.1436764473549267e-06, "loss": 0.3087, "step": 38255 }, { "epoch": 0.8463696728594656, "grad_norm": 1.2146304845809937, "learning_rate": 1.1420632809299514e-06, "loss": 0.4946, "step": 38260 }, { "epoch": 0.8464802805009789, "grad_norm": 1.901719331741333, "learning_rate": 1.1404511840980725e-06, "loss": 0.4066, "step": 38265 }, { "epoch": 0.8465908881424922, "grad_norm": 1.079491138458252, "learning_rate": 1.1388401570539476e-06, "loss": 0.4345, "step": 38270 }, { "epoch": 0.8467014957840054, "grad_norm": 0.9815348982810974, "learning_rate": 1.1372301999921098e-06, "loss": 0.2254, "step": 38275 }, { "epoch": 0.8468121034255186, "grad_norm": 0.6722994446754456, "learning_rate": 1.1356213131069626e-06, "loss": 0.2163, "step": 38280 }, { "epoch": 0.8469227110670319, "grad_norm": 1.1111066341400146, "learning_rate": 1.1340134965927718e-06, "loss": 0.2844, "step": 38285 }, { "epoch": 0.8470333187085451, "grad_norm": 0.6622271537780762, "learning_rate": 1.1324067506436865e-06, "loss": 0.1687, "step": 38290 }, { "epoch": 0.8471439263500584, "grad_norm": 1.4185512065887451, "learning_rate": 1.1308010754537202e-06, "loss": 0.3227, "step": 38295 }, { "epoch": 0.8472545339915717, "grad_norm": 0.9687464833259583, "learning_rate": 1.1291964712167535e-06, "loss": 0.3178, "step": 38300 }, { "epoch": 0.847365141633085, "grad_norm": 0.11671901494264603, "learning_rate": 1.1275929381265472e-06, "loss": 0.2914, "step": 38305 }, { "epoch": 0.8474757492745982, "grad_norm": 1.0729323625564575, "learning_rate": 1.125990476376727e-06, "loss": 0.2475, "step": 38310 }, { "epoch": 0.8475863569161115, "grad_norm": 1.417917013168335, "learning_rate": 1.1243890861607842e-06, "loss": 0.309, "step": 38315 }, { "epoch": 0.8476969645576248, "grad_norm": 1.1602250337600708, "learning_rate": 1.1227887676720917e-06, "loss": 0.2422, "step": 38320 }, { "epoch": 0.847807572199138, "grad_norm": 1.3342052698135376, "learning_rate": 1.1211895211038881e-06, "loss": 0.3492, "step": 38325 }, { "epoch": 0.8479181798406512, "grad_norm": 1.4505939483642578, "learning_rate": 1.1195913466492747e-06, "loss": 0.4136, "step": 38330 }, { "epoch": 0.8480287874821645, "grad_norm": 1.2123280763626099, "learning_rate": 1.117994244501237e-06, "loss": 0.3743, "step": 38335 }, { "epoch": 0.8481393951236778, "grad_norm": 1.0348316431045532, "learning_rate": 1.1163982148526264e-06, "loss": 0.3518, "step": 38340 }, { "epoch": 0.848250002765191, "grad_norm": 0.7313585877418518, "learning_rate": 1.1148032578961543e-06, "loss": 0.1445, "step": 38345 }, { "epoch": 0.8483606104067043, "grad_norm": 2.526779890060425, "learning_rate": 1.1132093738244187e-06, "loss": 0.3851, "step": 38350 }, { "epoch": 0.8484712180482176, "grad_norm": 1.3276973962783813, "learning_rate": 1.1116165628298803e-06, "loss": 0.3444, "step": 38355 }, { "epoch": 0.8485818256897308, "grad_norm": 0.944996178150177, "learning_rate": 1.110024825104864e-06, "loss": 0.2747, "step": 38360 }, { "epoch": 0.8486924333312441, "grad_norm": 1.1918593645095825, "learning_rate": 1.1084341608415784e-06, "loss": 0.2786, "step": 38365 }, { "epoch": 0.8488030409727574, "grad_norm": 1.2000691890716553, "learning_rate": 1.1068445702320962e-06, "loss": 0.3317, "step": 38370 }, { "epoch": 0.8489136486142705, "grad_norm": 0.970880389213562, "learning_rate": 1.1052560534683533e-06, "loss": 0.2645, "step": 38375 }, { "epoch": 0.8490242562557838, "grad_norm": 1.0081202983856201, "learning_rate": 1.103668610742168e-06, "loss": 0.3205, "step": 38380 }, { "epoch": 0.8491348638972971, "grad_norm": 1.4251919984817505, "learning_rate": 1.1020822422452215e-06, "loss": 0.3291, "step": 38385 }, { "epoch": 0.8492454715388104, "grad_norm": 1.0390520095825195, "learning_rate": 1.1004969481690687e-06, "loss": 0.3987, "step": 38390 }, { "epoch": 0.8493560791803236, "grad_norm": 1.0890159606933594, "learning_rate": 1.0989127287051348e-06, "loss": 0.4466, "step": 38395 }, { "epoch": 0.8494666868218369, "grad_norm": 1.2207971811294556, "learning_rate": 1.0973295840447073e-06, "loss": 0.3475, "step": 38400 }, { "epoch": 0.8495772944633502, "grad_norm": 1.0834448337554932, "learning_rate": 1.0957475143789563e-06, "loss": 0.3805, "step": 38405 }, { "epoch": 0.8496879021048634, "grad_norm": 0.7580187916755676, "learning_rate": 1.0941665198989171e-06, "loss": 0.2658, "step": 38410 }, { "epoch": 0.8497985097463767, "grad_norm": 0.5551120042800903, "learning_rate": 1.0925866007954876e-06, "loss": 0.2317, "step": 38415 }, { "epoch": 0.84990911738789, "grad_norm": 0.46754300594329834, "learning_rate": 1.091007757259449e-06, "loss": 0.2375, "step": 38420 }, { "epoch": 0.8500197250294032, "grad_norm": 0.8704552054405212, "learning_rate": 1.0894299894814465e-06, "loss": 0.3791, "step": 38425 }, { "epoch": 0.8501303326709164, "grad_norm": 1.2944942712783813, "learning_rate": 1.0878532976519874e-06, "loss": 0.3441, "step": 38430 }, { "epoch": 0.8502409403124297, "grad_norm": 1.135328769683838, "learning_rate": 1.086277681961464e-06, "loss": 0.3061, "step": 38435 }, { "epoch": 0.850351547953943, "grad_norm": 1.7061636447906494, "learning_rate": 1.0847031426001308e-06, "loss": 0.3885, "step": 38440 }, { "epoch": 0.8504621555954562, "grad_norm": 0.1815148890018463, "learning_rate": 1.083129679758108e-06, "loss": 0.2737, "step": 38445 }, { "epoch": 0.8505727632369695, "grad_norm": 0.5034037232398987, "learning_rate": 1.081557293625396e-06, "loss": 0.2141, "step": 38450 }, { "epoch": 0.8506833708784828, "grad_norm": 1.056228756904602, "learning_rate": 1.0799859843918603e-06, "loss": 0.3053, "step": 38455 }, { "epoch": 0.850793978519996, "grad_norm": 0.8823147416114807, "learning_rate": 1.0784157522472293e-06, "loss": 0.3061, "step": 38460 }, { "epoch": 0.8509045861615093, "grad_norm": 2.88033127784729, "learning_rate": 1.0768465973811148e-06, "loss": 0.2881, "step": 38465 }, { "epoch": 0.8510151938030225, "grad_norm": 1.4433271884918213, "learning_rate": 1.0752785199829906e-06, "loss": 0.2546, "step": 38470 }, { "epoch": 0.8511258014445358, "grad_norm": 1.2056711912155151, "learning_rate": 1.0737115202421977e-06, "loss": 0.3428, "step": 38475 }, { "epoch": 0.851236409086049, "grad_norm": 0.754571795463562, "learning_rate": 1.0721455983479545e-06, "loss": 0.3693, "step": 38480 }, { "epoch": 0.8513470167275623, "grad_norm": 0.8781501650810242, "learning_rate": 1.0705807544893477e-06, "loss": 0.3056, "step": 38485 }, { "epoch": 0.8514576243690756, "grad_norm": 0.8666741847991943, "learning_rate": 1.0690169888553238e-06, "loss": 0.2813, "step": 38490 }, { "epoch": 0.8515682320105888, "grad_norm": 1.2805302143096924, "learning_rate": 1.0674543016347162e-06, "loss": 0.4043, "step": 38495 }, { "epoch": 0.8516788396521021, "grad_norm": 0.6209708452224731, "learning_rate": 1.0658926930162094e-06, "loss": 0.3203, "step": 38500 }, { "epoch": 0.8517894472936154, "grad_norm": 0.8009853959083557, "learning_rate": 1.0643321631883753e-06, "loss": 0.2559, "step": 38505 }, { "epoch": 0.8519000549351287, "grad_norm": 1.0893161296844482, "learning_rate": 1.062772712339647e-06, "loss": 0.4552, "step": 38510 }, { "epoch": 0.8520106625766419, "grad_norm": 1.2813453674316406, "learning_rate": 1.0612143406583198e-06, "loss": 0.2906, "step": 38515 }, { "epoch": 0.8521212702181551, "grad_norm": 1.1698710918426514, "learning_rate": 1.0596570483325742e-06, "loss": 0.2955, "step": 38520 }, { "epoch": 0.8522318778596684, "grad_norm": 0.7483323216438293, "learning_rate": 1.0581008355504529e-06, "loss": 0.3763, "step": 38525 }, { "epoch": 0.8523424855011816, "grad_norm": 1.439063549041748, "learning_rate": 1.0565457024998616e-06, "loss": 0.3763, "step": 38530 }, { "epoch": 0.8524530931426949, "grad_norm": 1.4976032972335815, "learning_rate": 1.0549916493685874e-06, "loss": 0.4521, "step": 38535 }, { "epoch": 0.8525637007842082, "grad_norm": 0.8291392922401428, "learning_rate": 1.0534386763442827e-06, "loss": 0.359, "step": 38540 }, { "epoch": 0.8526743084257214, "grad_norm": 1.1712294816970825, "learning_rate": 1.051886783614463e-06, "loss": 0.2656, "step": 38545 }, { "epoch": 0.8527849160672347, "grad_norm": 0.9078739881515503, "learning_rate": 1.050335971366524e-06, "loss": 0.3581, "step": 38550 }, { "epoch": 0.852895523708748, "grad_norm": 0.7919604182243347, "learning_rate": 1.0487862397877258e-06, "loss": 0.2847, "step": 38555 }, { "epoch": 0.8530061313502613, "grad_norm": 1.2567859888076782, "learning_rate": 1.047237589065192e-06, "loss": 0.3926, "step": 38560 }, { "epoch": 0.8531167389917744, "grad_norm": 1.419980764389038, "learning_rate": 1.0456900193859276e-06, "loss": 0.3249, "step": 38565 }, { "epoch": 0.8532273466332877, "grad_norm": 0.6410039067268372, "learning_rate": 1.044143530936802e-06, "loss": 0.3373, "step": 38570 }, { "epoch": 0.853337954274801, "grad_norm": 1.1550325155258179, "learning_rate": 1.0425981239045457e-06, "loss": 0.203, "step": 38575 }, { "epoch": 0.8534485619163142, "grad_norm": 1.1208938360214233, "learning_rate": 1.041053798475774e-06, "loss": 0.2945, "step": 38580 }, { "epoch": 0.8535591695578275, "grad_norm": 0.987612247467041, "learning_rate": 1.0395105548369623e-06, "loss": 0.321, "step": 38585 }, { "epoch": 0.8536697771993408, "grad_norm": 1.0321121215820312, "learning_rate": 1.0379683931744512e-06, "loss": 0.3315, "step": 38590 }, { "epoch": 0.8537803848408541, "grad_norm": 1.1224853992462158, "learning_rate": 1.036427313674463e-06, "loss": 0.3122, "step": 38595 }, { "epoch": 0.8538909924823673, "grad_norm": 0.2931019067764282, "learning_rate": 1.0348873165230788e-06, "loss": 0.202, "step": 38600 }, { "epoch": 0.8540016001238806, "grad_norm": 0.8245185017585754, "learning_rate": 1.0333484019062546e-06, "loss": 0.246, "step": 38605 }, { "epoch": 0.8541122077653939, "grad_norm": 0.503246545791626, "learning_rate": 1.0318105700098146e-06, "loss": 0.3232, "step": 38610 }, { "epoch": 0.854222815406907, "grad_norm": 0.5913194417953491, "learning_rate": 1.0302738210194463e-06, "loss": 0.3157, "step": 38615 }, { "epoch": 0.8543334230484203, "grad_norm": 1.005173683166504, "learning_rate": 1.0287381551207166e-06, "loss": 0.3447, "step": 38620 }, { "epoch": 0.8544440306899336, "grad_norm": 1.2595500946044922, "learning_rate": 1.0272035724990582e-06, "loss": 0.3781, "step": 38625 }, { "epoch": 0.8545546383314468, "grad_norm": 2.1431400775909424, "learning_rate": 1.025670073339764e-06, "loss": 0.2202, "step": 38630 }, { "epoch": 0.8546652459729601, "grad_norm": 0.8147755861282349, "learning_rate": 1.0241376578280115e-06, "loss": 0.4397, "step": 38635 }, { "epoch": 0.8547758536144734, "grad_norm": 0.8964388966560364, "learning_rate": 1.0226063261488373e-06, "loss": 0.3987, "step": 38640 }, { "epoch": 0.8548864612559867, "grad_norm": 0.8753561973571777, "learning_rate": 1.0210760784871442e-06, "loss": 0.2853, "step": 38645 }, { "epoch": 0.8549970688974999, "grad_norm": 1.7778514623641968, "learning_rate": 1.0195469150277149e-06, "loss": 0.4105, "step": 38650 }, { "epoch": 0.8551076765390132, "grad_norm": 1.2013181447982788, "learning_rate": 1.0180188359551957e-06, "loss": 0.3704, "step": 38655 }, { "epoch": 0.8552182841805264, "grad_norm": 0.3059025704860687, "learning_rate": 1.0164918414540947e-06, "loss": 0.3253, "step": 38660 }, { "epoch": 0.8553288918220396, "grad_norm": 0.9875783920288086, "learning_rate": 1.0149659317088035e-06, "loss": 0.2508, "step": 38665 }, { "epoch": 0.8554394994635529, "grad_norm": 1.4277801513671875, "learning_rate": 1.0134411069035742e-06, "loss": 0.2624, "step": 38670 }, { "epoch": 0.8555501071050662, "grad_norm": 1.3336396217346191, "learning_rate": 1.0119173672225236e-06, "loss": 0.3699, "step": 38675 }, { "epoch": 0.8556607147465795, "grad_norm": 1.5855810642242432, "learning_rate": 1.0103947128496484e-06, "loss": 0.3191, "step": 38680 }, { "epoch": 0.8557713223880927, "grad_norm": 0.900917649269104, "learning_rate": 1.0088731439688082e-06, "loss": 0.3625, "step": 38685 }, { "epoch": 0.855881930029606, "grad_norm": 0.9806206226348877, "learning_rate": 1.0073526607637274e-06, "loss": 0.3383, "step": 38690 }, { "epoch": 0.8559925376711193, "grad_norm": 1.2014625072479248, "learning_rate": 1.0058332634180078e-06, "loss": 0.4706, "step": 38695 }, { "epoch": 0.8561031453126325, "grad_norm": 0.5110193490982056, "learning_rate": 1.0043149521151184e-06, "loss": 0.2261, "step": 38700 }, { "epoch": 0.8562137529541458, "grad_norm": 0.749117910861969, "learning_rate": 1.002797727038387e-06, "loss": 0.3318, "step": 38705 }, { "epoch": 0.856324360595659, "grad_norm": 0.8880307078361511, "learning_rate": 1.0012815883710269e-06, "loss": 0.3231, "step": 38710 }, { "epoch": 0.8564349682371722, "grad_norm": 1.1084824800491333, "learning_rate": 9.99766536296103e-07, "loss": 0.3731, "step": 38715 }, { "epoch": 0.8565455758786855, "grad_norm": 1.14706552028656, "learning_rate": 9.98252570996563e-07, "loss": 0.4125, "step": 38720 }, { "epoch": 0.8566561835201988, "grad_norm": 1.1827332973480225, "learning_rate": 9.967396926552186e-07, "loss": 0.3749, "step": 38725 }, { "epoch": 0.8567667911617121, "grad_norm": 0.9739878177642822, "learning_rate": 9.95227901454744e-07, "loss": 0.2397, "step": 38730 }, { "epoch": 0.8568773988032253, "grad_norm": 1.1042393445968628, "learning_rate": 9.937171975776916e-07, "loss": 0.3554, "step": 38735 }, { "epoch": 0.8569880064447386, "grad_norm": 0.7292569875717163, "learning_rate": 9.922075812064802e-07, "loss": 0.2512, "step": 38740 }, { "epoch": 0.8570986140862519, "grad_norm": 0.44818270206451416, "learning_rate": 9.906990525233885e-07, "loss": 0.2082, "step": 38745 }, { "epoch": 0.8572092217277651, "grad_norm": 0.805273175239563, "learning_rate": 9.891916117105772e-07, "loss": 0.2382, "step": 38750 }, { "epoch": 0.8573198293692783, "grad_norm": 1.2403844594955444, "learning_rate": 9.876852589500686e-07, "loss": 0.2885, "step": 38755 }, { "epoch": 0.8574304370107916, "grad_norm": 1.2931625843048096, "learning_rate": 9.861799944237494e-07, "loss": 0.4383, "step": 38760 }, { "epoch": 0.8575410446523049, "grad_norm": 1.1473499536514282, "learning_rate": 9.846758183133853e-07, "loss": 0.3537, "step": 38765 }, { "epoch": 0.8576516522938181, "grad_norm": 1.6902093887329102, "learning_rate": 9.831727308006045e-07, "loss": 0.2635, "step": 38770 }, { "epoch": 0.8577622599353314, "grad_norm": 0.6627709865570068, "learning_rate": 9.816707320669006e-07, "loss": 0.2081, "step": 38775 }, { "epoch": 0.8578728675768447, "grad_norm": 1.4568915367126465, "learning_rate": 9.801698222936428e-07, "loss": 0.406, "step": 38780 }, { "epoch": 0.8579834752183579, "grad_norm": 1.0318299531936646, "learning_rate": 9.786700016620665e-07, "loss": 0.4107, "step": 38785 }, { "epoch": 0.8580940828598712, "grad_norm": 0.7254602909088135, "learning_rate": 9.771712703532688e-07, "loss": 0.401, "step": 38790 }, { "epoch": 0.8582046905013845, "grad_norm": 1.6828927993774414, "learning_rate": 9.756736285482271e-07, "loss": 0.2451, "step": 38795 }, { "epoch": 0.8583152981428978, "grad_norm": 0.9297253489494324, "learning_rate": 9.741770764277802e-07, "loss": 0.3326, "step": 38800 }, { "epoch": 0.8584259057844109, "grad_norm": 0.843186616897583, "learning_rate": 9.726816141726315e-07, "loss": 0.2853, "step": 38805 }, { "epoch": 0.8585365134259242, "grad_norm": 0.7660995125770569, "learning_rate": 9.71187241963364e-07, "loss": 0.2827, "step": 38810 }, { "epoch": 0.8586471210674375, "grad_norm": 1.3793662786483765, "learning_rate": 9.696939599804188e-07, "loss": 0.3574, "step": 38815 }, { "epoch": 0.8587577287089507, "grad_norm": 2.467979907989502, "learning_rate": 9.682017684041111e-07, "loss": 0.2069, "step": 38820 }, { "epoch": 0.858868336350464, "grad_norm": 0.7566235065460205, "learning_rate": 9.66710667414623e-07, "loss": 0.3924, "step": 38825 }, { "epoch": 0.8589789439919773, "grad_norm": 0.6390632390975952, "learning_rate": 9.652206571919997e-07, "loss": 0.3486, "step": 38830 }, { "epoch": 0.8590895516334905, "grad_norm": 0.919097363948822, "learning_rate": 9.637317379161648e-07, "loss": 0.2649, "step": 38835 }, { "epoch": 0.8592001592750038, "grad_norm": 0.5415529608726501, "learning_rate": 9.622439097669045e-07, "loss": 0.3262, "step": 38840 }, { "epoch": 0.8593107669165171, "grad_norm": 1.4456264972686768, "learning_rate": 9.607571729238684e-07, "loss": 0.3148, "step": 38845 }, { "epoch": 0.8594213745580302, "grad_norm": 0.8884249329566956, "learning_rate": 9.592715275665854e-07, "loss": 0.4213, "step": 38850 }, { "epoch": 0.8595319821995435, "grad_norm": 1.3805402517318726, "learning_rate": 9.577869738744461e-07, "loss": 0.3429, "step": 38855 }, { "epoch": 0.8596425898410568, "grad_norm": 1.205978512763977, "learning_rate": 9.56303512026705e-07, "loss": 0.3964, "step": 38860 }, { "epoch": 0.8597531974825701, "grad_norm": 1.2222223281860352, "learning_rate": 9.548211422024955e-07, "loss": 0.311, "step": 38865 }, { "epoch": 0.8598638051240833, "grad_norm": 1.0879791975021362, "learning_rate": 9.533398645808111e-07, "loss": 0.2542, "step": 38870 }, { "epoch": 0.8599744127655966, "grad_norm": 1.6054173707962036, "learning_rate": 9.518596793405121e-07, "loss": 0.3367, "step": 38875 }, { "epoch": 0.8600850204071099, "grad_norm": 1.268158197402954, "learning_rate": 9.503805866603366e-07, "loss": 0.3817, "step": 38880 }, { "epoch": 0.8601956280486232, "grad_norm": 1.640018343925476, "learning_rate": 9.489025867188828e-07, "loss": 0.3314, "step": 38885 }, { "epoch": 0.8603062356901364, "grad_norm": 0.40346774458885193, "learning_rate": 9.474256796946124e-07, "loss": 0.2948, "step": 38890 }, { "epoch": 0.8604168433316497, "grad_norm": 1.5606896877288818, "learning_rate": 9.459498657658705e-07, "loss": 0.4259, "step": 38895 }, { "epoch": 0.8605274509731629, "grad_norm": 0.8125496506690979, "learning_rate": 9.44475145110858e-07, "loss": 0.4071, "step": 38900 }, { "epoch": 0.8606380586146761, "grad_norm": 1.3903517723083496, "learning_rate": 9.430015179076424e-07, "loss": 0.3323, "step": 38905 }, { "epoch": 0.8607486662561894, "grad_norm": 1.0494273900985718, "learning_rate": 9.415289843341702e-07, "loss": 0.2842, "step": 38910 }, { "epoch": 0.8608592738977027, "grad_norm": 2.01436448097229, "learning_rate": 9.400575445682469e-07, "loss": 0.3947, "step": 38915 }, { "epoch": 0.8609698815392159, "grad_norm": 0.588791012763977, "learning_rate": 9.385871987875461e-07, "loss": 0.2469, "step": 38920 }, { "epoch": 0.8610804891807292, "grad_norm": 1.6800273656845093, "learning_rate": 9.371179471696167e-07, "loss": 0.3286, "step": 38925 }, { "epoch": 0.8611910968222425, "grad_norm": 0.5381014347076416, "learning_rate": 9.356497898918637e-07, "loss": 0.3176, "step": 38930 }, { "epoch": 0.8613017044637558, "grad_norm": 0.6753323078155518, "learning_rate": 9.341827271315729e-07, "loss": 0.2871, "step": 38935 }, { "epoch": 0.861412312105269, "grad_norm": 1.2176859378814697, "learning_rate": 9.327167590658892e-07, "loss": 0.2484, "step": 38940 }, { "epoch": 0.8615229197467822, "grad_norm": 0.38442328572273254, "learning_rate": 9.312518858718255e-07, "loss": 0.338, "step": 38945 }, { "epoch": 0.8616335273882955, "grad_norm": 1.5061479806900024, "learning_rate": 9.297881077262683e-07, "loss": 0.3084, "step": 38950 }, { "epoch": 0.8617441350298087, "grad_norm": 1.151477336883545, "learning_rate": 9.283254248059692e-07, "loss": 0.3054, "step": 38955 }, { "epoch": 0.861854742671322, "grad_norm": 0.666083574295044, "learning_rate": 9.268638372875394e-07, "loss": 0.3642, "step": 38960 }, { "epoch": 0.8619653503128353, "grad_norm": 1.069635272026062, "learning_rate": 9.254033453474732e-07, "loss": 0.3181, "step": 38965 }, { "epoch": 0.8620759579543485, "grad_norm": 1.159324049949646, "learning_rate": 9.239439491621238e-07, "loss": 0.3686, "step": 38970 }, { "epoch": 0.8621865655958618, "grad_norm": 1.5790640115737915, "learning_rate": 9.224856489077061e-07, "loss": 0.2997, "step": 38975 }, { "epoch": 0.8622971732373751, "grad_norm": 1.3074733018875122, "learning_rate": 9.210284447603157e-07, "loss": 0.2749, "step": 38980 }, { "epoch": 0.8624077808788884, "grad_norm": 1.4540053606033325, "learning_rate": 9.195723368959108e-07, "loss": 0.2603, "step": 38985 }, { "epoch": 0.8625183885204015, "grad_norm": 2.584914445877075, "learning_rate": 9.181173254903076e-07, "loss": 0.2084, "step": 38990 }, { "epoch": 0.8626289961619148, "grad_norm": 1.3127548694610596, "learning_rate": 9.166634107192062e-07, "loss": 0.3394, "step": 38995 }, { "epoch": 0.8627396038034281, "grad_norm": 1.0661379098892212, "learning_rate": 9.152105927581656e-07, "loss": 0.2511, "step": 39000 }, { "epoch": 0.8628502114449413, "grad_norm": 1.0472097396850586, "learning_rate": 9.137588717826073e-07, "loss": 0.291, "step": 39005 }, { "epoch": 0.8629608190864546, "grad_norm": 0.6581370830535889, "learning_rate": 9.123082479678324e-07, "loss": 0.2635, "step": 39010 }, { "epoch": 0.8630714267279679, "grad_norm": 1.1199805736541748, "learning_rate": 9.10858721489003e-07, "loss": 0.4259, "step": 39015 }, { "epoch": 0.8631820343694812, "grad_norm": 1.0610997676849365, "learning_rate": 9.094102925211423e-07, "loss": 0.2977, "step": 39020 }, { "epoch": 0.8632926420109944, "grad_norm": 0.6034849286079407, "learning_rate": 9.079629612391549e-07, "loss": 0.2272, "step": 39025 }, { "epoch": 0.8634032496525077, "grad_norm": 1.088358759880066, "learning_rate": 9.065167278178056e-07, "loss": 0.2504, "step": 39030 }, { "epoch": 0.863513857294021, "grad_norm": 0.452923983335495, "learning_rate": 9.050715924317188e-07, "loss": 0.2967, "step": 39035 }, { "epoch": 0.8636244649355341, "grad_norm": 1.6209334135055542, "learning_rate": 9.036275552554042e-07, "loss": 0.2491, "step": 39040 }, { "epoch": 0.8637350725770474, "grad_norm": 1.556930422782898, "learning_rate": 9.021846164632198e-07, "loss": 0.3258, "step": 39045 }, { "epoch": 0.8638456802185607, "grad_norm": 0.736476719379425, "learning_rate": 9.007427762294063e-07, "loss": 0.2813, "step": 39050 }, { "epoch": 0.863956287860074, "grad_norm": 0.6411951184272766, "learning_rate": 8.993020347280645e-07, "loss": 0.3677, "step": 39055 }, { "epoch": 0.8640668955015872, "grad_norm": 3.179917573928833, "learning_rate": 8.978623921331597e-07, "loss": 0.3056, "step": 39060 }, { "epoch": 0.8641775031431005, "grad_norm": 0.6452550888061523, "learning_rate": 8.964238486185328e-07, "loss": 0.3006, "step": 39065 }, { "epoch": 0.8642881107846138, "grad_norm": 0.7199798226356506, "learning_rate": 8.949864043578871e-07, "loss": 0.2139, "step": 39070 }, { "epoch": 0.864398718426127, "grad_norm": 0.5164555311203003, "learning_rate": 8.935500595247882e-07, "loss": 0.3337, "step": 39075 }, { "epoch": 0.8645093260676403, "grad_norm": 0.5841599702835083, "learning_rate": 8.921148142926805e-07, "loss": 0.532, "step": 39080 }, { "epoch": 0.8646199337091535, "grad_norm": 1.1207610368728638, "learning_rate": 8.90680668834869e-07, "loss": 0.3868, "step": 39085 }, { "epoch": 0.8647305413506667, "grad_norm": 2.1768369674682617, "learning_rate": 8.892476233245206e-07, "loss": 0.1699, "step": 39090 }, { "epoch": 0.86484114899218, "grad_norm": 1.9281773567199707, "learning_rate": 8.878156779346802e-07, "loss": 0.3769, "step": 39095 }, { "epoch": 0.8649517566336933, "grad_norm": 0.9550526738166809, "learning_rate": 8.863848328382552e-07, "loss": 0.4177, "step": 39100 }, { "epoch": 0.8650623642752066, "grad_norm": 1.7656177282333374, "learning_rate": 8.849550882080149e-07, "loss": 0.3215, "step": 39105 }, { "epoch": 0.8651729719167198, "grad_norm": 2.9204776287078857, "learning_rate": 8.835264442166058e-07, "loss": 0.2736, "step": 39110 }, { "epoch": 0.8652835795582331, "grad_norm": 1.019304871559143, "learning_rate": 8.820989010365355e-07, "loss": 0.344, "step": 39115 }, { "epoch": 0.8653941871997464, "grad_norm": 1.6147663593292236, "learning_rate": 8.806724588401738e-07, "loss": 0.4205, "step": 39120 }, { "epoch": 0.8655047948412596, "grad_norm": 0.48238715529441833, "learning_rate": 8.792471177997697e-07, "loss": 0.3346, "step": 39125 }, { "epoch": 0.8656154024827729, "grad_norm": 0.9829366207122803, "learning_rate": 8.778228780874321e-07, "loss": 0.2823, "step": 39130 }, { "epoch": 0.8657260101242861, "grad_norm": 0.9600426554679871, "learning_rate": 8.763997398751323e-07, "loss": 0.3116, "step": 39135 }, { "epoch": 0.8658366177657993, "grad_norm": 1.0120296478271484, "learning_rate": 8.749777033347196e-07, "loss": 0.3339, "step": 39140 }, { "epoch": 0.8659472254073126, "grad_norm": 0.744792103767395, "learning_rate": 8.735567686378987e-07, "loss": 0.4065, "step": 39145 }, { "epoch": 0.8660578330488259, "grad_norm": 1.4073742628097534, "learning_rate": 8.721369359562526e-07, "loss": 0.4309, "step": 39150 }, { "epoch": 0.8661684406903392, "grad_norm": 1.9332637786865234, "learning_rate": 8.707182054612251e-07, "loss": 0.4112, "step": 39155 }, { "epoch": 0.8662790483318524, "grad_norm": 0.8810939192771912, "learning_rate": 8.693005773241225e-07, "loss": 0.3604, "step": 39160 }, { "epoch": 0.8663896559733657, "grad_norm": 0.729651153087616, "learning_rate": 8.67884051716128e-07, "loss": 0.3202, "step": 39165 }, { "epoch": 0.866500263614879, "grad_norm": 1.0392816066741943, "learning_rate": 8.664686288082857e-07, "loss": 0.2461, "step": 39170 }, { "epoch": 0.8666108712563922, "grad_norm": 0.8293331265449524, "learning_rate": 8.650543087715035e-07, "loss": 0.2727, "step": 39175 }, { "epoch": 0.8667214788979054, "grad_norm": 0.8402110934257507, "learning_rate": 8.636410917765647e-07, "loss": 0.385, "step": 39180 }, { "epoch": 0.8668320865394187, "grad_norm": 1.4033502340316772, "learning_rate": 8.622289779941161e-07, "loss": 0.4041, "step": 39185 }, { "epoch": 0.866942694180932, "grad_norm": 0.927740216255188, "learning_rate": 8.608179675946626e-07, "loss": 0.2611, "step": 39190 }, { "epoch": 0.8670533018224452, "grad_norm": 0.3323821425437927, "learning_rate": 8.594080607485899e-07, "loss": 0.2444, "step": 39195 }, { "epoch": 0.8671639094639585, "grad_norm": 1.105894684791565, "learning_rate": 8.579992576261443e-07, "loss": 0.2126, "step": 39200 }, { "epoch": 0.8672745171054718, "grad_norm": 0.5632103681564331, "learning_rate": 8.565915583974316e-07, "loss": 0.3392, "step": 39205 }, { "epoch": 0.867385124746985, "grad_norm": 1.0315022468566895, "learning_rate": 8.551849632324371e-07, "loss": 0.2935, "step": 39210 }, { "epoch": 0.8674957323884983, "grad_norm": 0.6635228395462036, "learning_rate": 8.537794723010062e-07, "loss": 0.3904, "step": 39215 }, { "epoch": 0.8676063400300116, "grad_norm": 0.9328121542930603, "learning_rate": 8.523750857728474e-07, "loss": 0.2425, "step": 39220 }, { "epoch": 0.8677169476715249, "grad_norm": 1.244211196899414, "learning_rate": 8.509718038175441e-07, "loss": 0.3353, "step": 39225 }, { "epoch": 0.867827555313038, "grad_norm": 1.2418067455291748, "learning_rate": 8.495696266045428e-07, "loss": 0.273, "step": 39230 }, { "epoch": 0.8679381629545513, "grad_norm": 1.1453590393066406, "learning_rate": 8.481685543031504e-07, "loss": 0.4532, "step": 39235 }, { "epoch": 0.8680487705960646, "grad_norm": 1.3533935546875, "learning_rate": 8.467685870825515e-07, "loss": 0.2318, "step": 39240 }, { "epoch": 0.8681593782375778, "grad_norm": 1.164460301399231, "learning_rate": 8.453697251117909e-07, "loss": 0.5218, "step": 39245 }, { "epoch": 0.8682699858790911, "grad_norm": 0.7768779993057251, "learning_rate": 8.439719685597759e-07, "loss": 0.3649, "step": 39250 }, { "epoch": 0.8683805935206044, "grad_norm": 1.2141245603561401, "learning_rate": 8.425753175952933e-07, "loss": 0.2488, "step": 39255 }, { "epoch": 0.8684912011621176, "grad_norm": 0.8672716021537781, "learning_rate": 8.411797723869797e-07, "loss": 0.4166, "step": 39260 }, { "epoch": 0.8686018088036309, "grad_norm": 1.5677835941314697, "learning_rate": 8.397853331033524e-07, "loss": 0.4146, "step": 39265 }, { "epoch": 0.8687124164451442, "grad_norm": 1.3889261484146118, "learning_rate": 8.383919999127899e-07, "loss": 0.3176, "step": 39270 }, { "epoch": 0.8688230240866573, "grad_norm": 0.548263430595398, "learning_rate": 8.36999772983531e-07, "loss": 0.2737, "step": 39275 }, { "epoch": 0.8689336317281706, "grad_norm": 1.0910183191299438, "learning_rate": 8.356086524836926e-07, "loss": 0.4007, "step": 39280 }, { "epoch": 0.8690442393696839, "grad_norm": 2.128380537033081, "learning_rate": 8.342186385812512e-07, "loss": 0.3309, "step": 39285 }, { "epoch": 0.8691548470111972, "grad_norm": 0.8877822756767273, "learning_rate": 8.328297314440459e-07, "loss": 0.2865, "step": 39290 }, { "epoch": 0.8692654546527104, "grad_norm": 1.1008591651916504, "learning_rate": 8.314419312397925e-07, "loss": 0.3357, "step": 39295 }, { "epoch": 0.8693760622942237, "grad_norm": 1.4697904586791992, "learning_rate": 8.30055238136066e-07, "loss": 0.4147, "step": 39300 }, { "epoch": 0.869486669935737, "grad_norm": 1.826761245727539, "learning_rate": 8.286696523003057e-07, "loss": 0.3035, "step": 39305 }, { "epoch": 0.8695972775772502, "grad_norm": 1.11300528049469, "learning_rate": 8.272851738998244e-07, "loss": 0.2102, "step": 39310 }, { "epoch": 0.8697078852187635, "grad_norm": 1.001857876777649, "learning_rate": 8.259018031017974e-07, "loss": 0.3177, "step": 39315 }, { "epoch": 0.8698184928602768, "grad_norm": 1.1529909372329712, "learning_rate": 8.245195400732619e-07, "loss": 0.3303, "step": 39320 }, { "epoch": 0.86992910050179, "grad_norm": 0.8963597416877747, "learning_rate": 8.231383849811314e-07, "loss": 0.3624, "step": 39325 }, { "epoch": 0.8700397081433032, "grad_norm": 0.6369737982749939, "learning_rate": 8.21758337992179e-07, "loss": 0.1924, "step": 39330 }, { "epoch": 0.8701503157848165, "grad_norm": 0.8020033836364746, "learning_rate": 8.203793992730403e-07, "loss": 0.3546, "step": 39335 }, { "epoch": 0.8702609234263298, "grad_norm": 1.2913804054260254, "learning_rate": 8.190015689902264e-07, "loss": 0.4003, "step": 39340 }, { "epoch": 0.870371531067843, "grad_norm": 1.6159619092941284, "learning_rate": 8.1762484731011e-07, "loss": 0.4532, "step": 39345 }, { "epoch": 0.8704821387093563, "grad_norm": 1.135544776916504, "learning_rate": 8.162492343989248e-07, "loss": 0.3178, "step": 39350 }, { "epoch": 0.8705927463508696, "grad_norm": 0.7975497245788574, "learning_rate": 8.148747304227823e-07, "loss": 0.353, "step": 39355 }, { "epoch": 0.8707033539923829, "grad_norm": 1.004529356956482, "learning_rate": 8.135013355476462e-07, "loss": 0.2772, "step": 39360 }, { "epoch": 0.8708139616338961, "grad_norm": 1.0528509616851807, "learning_rate": 8.121290499393597e-07, "loss": 0.2699, "step": 39365 }, { "epoch": 0.8709245692754093, "grad_norm": 1.5277819633483887, "learning_rate": 8.107578737636257e-07, "loss": 0.317, "step": 39370 }, { "epoch": 0.8710351769169226, "grad_norm": 1.0070157051086426, "learning_rate": 8.093878071860073e-07, "loss": 0.316, "step": 39375 }, { "epoch": 0.8711457845584358, "grad_norm": 1.361592173576355, "learning_rate": 8.080188503719455e-07, "loss": 0.2963, "step": 39380 }, { "epoch": 0.8712563921999491, "grad_norm": 1.2561436891555786, "learning_rate": 8.066510034867403e-07, "loss": 0.2258, "step": 39385 }, { "epoch": 0.8713669998414624, "grad_norm": 0.6721475720405579, "learning_rate": 8.052842666955551e-07, "loss": 0.4159, "step": 39390 }, { "epoch": 0.8714776074829756, "grad_norm": 0.9185898303985596, "learning_rate": 8.03918640163428e-07, "loss": 0.2194, "step": 39395 }, { "epoch": 0.8715882151244889, "grad_norm": 1.104715347290039, "learning_rate": 8.02554124055257e-07, "loss": 0.2831, "step": 39400 }, { "epoch": 0.8716988227660022, "grad_norm": 0.8935679793357849, "learning_rate": 8.011907185358014e-07, "loss": 0.3364, "step": 39405 }, { "epoch": 0.8718094304075155, "grad_norm": 1.4565317630767822, "learning_rate": 7.998284237696996e-07, "loss": 0.2346, "step": 39410 }, { "epoch": 0.8719200380490287, "grad_norm": 0.9334602952003479, "learning_rate": 7.984672399214454e-07, "loss": 0.3179, "step": 39415 }, { "epoch": 0.8720306456905419, "grad_norm": 1.2813917398452759, "learning_rate": 7.971071671553976e-07, "loss": 0.3038, "step": 39420 }, { "epoch": 0.8721412533320552, "grad_norm": 0.5372806191444397, "learning_rate": 7.957482056357901e-07, "loss": 0.2056, "step": 39425 }, { "epoch": 0.8722518609735684, "grad_norm": 0.5267437696456909, "learning_rate": 7.943903555267162e-07, "loss": 0.2439, "step": 39430 }, { "epoch": 0.8723624686150817, "grad_norm": 1.0387176275253296, "learning_rate": 7.930336169921315e-07, "loss": 0.3563, "step": 39435 }, { "epoch": 0.872473076256595, "grad_norm": 1.5733919143676758, "learning_rate": 7.91677990195866e-07, "loss": 0.2149, "step": 39440 }, { "epoch": 0.8725836838981083, "grad_norm": 0.5569614768028259, "learning_rate": 7.90323475301612e-07, "loss": 0.3333, "step": 39445 }, { "epoch": 0.8726942915396215, "grad_norm": 1.1812485456466675, "learning_rate": 7.88970072472921e-07, "loss": 0.3218, "step": 39450 }, { "epoch": 0.8728048991811348, "grad_norm": 0.9315392971038818, "learning_rate": 7.876177818732223e-07, "loss": 0.4042, "step": 39455 }, { "epoch": 0.8729155068226481, "grad_norm": 1.094139814376831, "learning_rate": 7.862666036658029e-07, "loss": 0.3735, "step": 39460 }, { "epoch": 0.8730261144641612, "grad_norm": 1.056610107421875, "learning_rate": 7.849165380138124e-07, "loss": 0.2997, "step": 39465 }, { "epoch": 0.8731367221056745, "grad_norm": 0.5941892266273499, "learning_rate": 7.835675850802782e-07, "loss": 0.2365, "step": 39470 }, { "epoch": 0.8732473297471878, "grad_norm": 0.5531185865402222, "learning_rate": 7.822197450280799e-07, "loss": 0.2698, "step": 39475 }, { "epoch": 0.873357937388701, "grad_norm": 1.1928926706314087, "learning_rate": 7.808730180199719e-07, "loss": 0.3519, "step": 39480 }, { "epoch": 0.8734685450302143, "grad_norm": 1.25923752784729, "learning_rate": 7.795274042185729e-07, "loss": 0.3478, "step": 39485 }, { "epoch": 0.8735791526717276, "grad_norm": 1.8336783647537231, "learning_rate": 7.781829037863586e-07, "loss": 0.434, "step": 39490 }, { "epoch": 0.8736897603132409, "grad_norm": 0.8933510184288025, "learning_rate": 7.768395168856835e-07, "loss": 0.373, "step": 39495 }, { "epoch": 0.8738003679547541, "grad_norm": 1.6640963554382324, "learning_rate": 7.754972436787612e-07, "loss": 0.3097, "step": 39500 }, { "epoch": 0.8739109755962674, "grad_norm": 1.1803674697875977, "learning_rate": 7.741560843276641e-07, "loss": 0.2597, "step": 39505 }, { "epoch": 0.8740215832377807, "grad_norm": 1.0499203205108643, "learning_rate": 7.728160389943429e-07, "loss": 0.335, "step": 39510 }, { "epoch": 0.8741321908792938, "grad_norm": 1.6963213682174683, "learning_rate": 7.71477107840608e-07, "loss": 0.2346, "step": 39515 }, { "epoch": 0.8742427985208071, "grad_norm": 1.6056156158447266, "learning_rate": 7.701392910281291e-07, "loss": 0.367, "step": 39520 }, { "epoch": 0.8743534061623204, "grad_norm": 0.9425254464149475, "learning_rate": 7.688025887184525e-07, "loss": 0.3084, "step": 39525 }, { "epoch": 0.8744640138038337, "grad_norm": 1.1562035083770752, "learning_rate": 7.674670010729846e-07, "loss": 0.2864, "step": 39530 }, { "epoch": 0.8745746214453469, "grad_norm": 0.9658022522926331, "learning_rate": 7.66132528252993e-07, "loss": 0.3963, "step": 39535 }, { "epoch": 0.8746852290868602, "grad_norm": 1.1334506273269653, "learning_rate": 7.64799170419619e-07, "loss": 0.2535, "step": 39540 }, { "epoch": 0.8747958367283735, "grad_norm": 1.3926177024841309, "learning_rate": 7.634669277338647e-07, "loss": 0.3123, "step": 39545 }, { "epoch": 0.8749064443698867, "grad_norm": 0.8499661684036255, "learning_rate": 7.621358003565937e-07, "loss": 0.2299, "step": 39550 }, { "epoch": 0.8750170520114, "grad_norm": 0.8228171467781067, "learning_rate": 7.608057884485431e-07, "loss": 0.3045, "step": 39555 }, { "epoch": 0.8751276596529132, "grad_norm": 1.3565748929977417, "learning_rate": 7.594768921703133e-07, "loss": 0.2603, "step": 39560 }, { "epoch": 0.8752382672944264, "grad_norm": 0.8823809027671814, "learning_rate": 7.581491116823625e-07, "loss": 0.4078, "step": 39565 }, { "epoch": 0.8753488749359397, "grad_norm": 0.7208507061004639, "learning_rate": 7.568224471450258e-07, "loss": 0.4174, "step": 39570 }, { "epoch": 0.875459482577453, "grad_norm": 1.9722919464111328, "learning_rate": 7.554968987184929e-07, "loss": 0.2659, "step": 39575 }, { "epoch": 0.8755700902189663, "grad_norm": 0.7871860861778259, "learning_rate": 7.541724665628236e-07, "loss": 0.1895, "step": 39580 }, { "epoch": 0.8756806978604795, "grad_norm": 0.8645642995834351, "learning_rate": 7.528491508379465e-07, "loss": 0.211, "step": 39585 }, { "epoch": 0.8757913055019928, "grad_norm": 1.3946970701217651, "learning_rate": 7.515269517036472e-07, "loss": 0.3787, "step": 39590 }, { "epoch": 0.8759019131435061, "grad_norm": 1.2522618770599365, "learning_rate": 7.502058693195835e-07, "loss": 0.4415, "step": 39595 }, { "epoch": 0.8760125207850193, "grad_norm": 0.7147267460823059, "learning_rate": 7.488859038452767e-07, "loss": 0.3374, "step": 39600 }, { "epoch": 0.8761231284265326, "grad_norm": 0.6822241544723511, "learning_rate": 7.475670554401071e-07, "loss": 0.2002, "step": 39605 }, { "epoch": 0.8762337360680458, "grad_norm": 1.0381382703781128, "learning_rate": 7.462493242633296e-07, "loss": 0.3096, "step": 39610 }, { "epoch": 0.876344343709559, "grad_norm": 0.958562433719635, "learning_rate": 7.449327104740612e-07, "loss": 0.3571, "step": 39615 }, { "epoch": 0.8764549513510723, "grad_norm": 1.0576192140579224, "learning_rate": 7.43617214231277e-07, "loss": 0.3321, "step": 39620 }, { "epoch": 0.8765655589925856, "grad_norm": 0.6790910959243774, "learning_rate": 7.423028356938278e-07, "loss": 0.1696, "step": 39625 }, { "epoch": 0.8766761666340989, "grad_norm": 0.9308285117149353, "learning_rate": 7.409895750204232e-07, "loss": 0.3631, "step": 39630 }, { "epoch": 0.8767867742756121, "grad_norm": 0.77732253074646, "learning_rate": 7.396774323696366e-07, "loss": 0.179, "step": 39635 }, { "epoch": 0.8768973819171254, "grad_norm": 1.8770287036895752, "learning_rate": 7.383664078999109e-07, "loss": 0.3602, "step": 39640 }, { "epoch": 0.8770079895586387, "grad_norm": 1.0779093503952026, "learning_rate": 7.370565017695541e-07, "loss": 0.3878, "step": 39645 }, { "epoch": 0.877118597200152, "grad_norm": 0.6448560953140259, "learning_rate": 7.357477141367309e-07, "loss": 0.3359, "step": 39650 }, { "epoch": 0.8772292048416651, "grad_norm": 1.2732996940612793, "learning_rate": 7.344400451594824e-07, "loss": 0.4205, "step": 39655 }, { "epoch": 0.8773398124831784, "grad_norm": 0.8565564751625061, "learning_rate": 7.331334949957081e-07, "loss": 0.3005, "step": 39660 }, { "epoch": 0.8774504201246917, "grad_norm": 1.294618010520935, "learning_rate": 7.318280638031705e-07, "loss": 0.4756, "step": 39665 }, { "epoch": 0.8775610277662049, "grad_norm": 1.1359738111495972, "learning_rate": 7.305237517395058e-07, "loss": 0.1896, "step": 39670 }, { "epoch": 0.8776716354077182, "grad_norm": 0.49147292971611023, "learning_rate": 7.292205589622037e-07, "loss": 0.3065, "step": 39675 }, { "epoch": 0.8777822430492315, "grad_norm": 1.6661502122879028, "learning_rate": 7.279184856286247e-07, "loss": 0.4244, "step": 39680 }, { "epoch": 0.8778928506907447, "grad_norm": 0.768587589263916, "learning_rate": 7.266175318959989e-07, "loss": 0.3609, "step": 39685 }, { "epoch": 0.878003458332258, "grad_norm": 0.9545561075210571, "learning_rate": 7.253176979214083e-07, "loss": 0.3241, "step": 39690 }, { "epoch": 0.8781140659737713, "grad_norm": 1.3334225416183472, "learning_rate": 7.240189838618151e-07, "loss": 0.3064, "step": 39695 }, { "epoch": 0.8782246736152846, "grad_norm": 1.2866158485412598, "learning_rate": 7.22721389874036e-07, "loss": 0.2928, "step": 39700 }, { "epoch": 0.8783352812567977, "grad_norm": 1.4346522092819214, "learning_rate": 7.214249161147513e-07, "loss": 0.3586, "step": 39705 }, { "epoch": 0.878445888898311, "grad_norm": 1.405729055404663, "learning_rate": 7.201295627405136e-07, "loss": 0.3171, "step": 39710 }, { "epoch": 0.8785564965398243, "grad_norm": 1.0450206995010376, "learning_rate": 7.188353299077389e-07, "loss": 0.3301, "step": 39715 }, { "epoch": 0.8786671041813375, "grad_norm": 0.7944782972335815, "learning_rate": 7.175422177726976e-07, "loss": 0.2335, "step": 39720 }, { "epoch": 0.8787777118228508, "grad_norm": 0.6723170280456543, "learning_rate": 7.162502264915395e-07, "loss": 0.3691, "step": 39725 }, { "epoch": 0.8788883194643641, "grad_norm": 0.9615852236747742, "learning_rate": 7.149593562202716e-07, "loss": 0.254, "step": 39730 }, { "epoch": 0.8789989271058773, "grad_norm": 1.0153576135635376, "learning_rate": 7.136696071147608e-07, "loss": 0.259, "step": 39735 }, { "epoch": 0.8791095347473906, "grad_norm": 1.0119539499282837, "learning_rate": 7.12380979330749e-07, "loss": 0.2717, "step": 39740 }, { "epoch": 0.8792201423889039, "grad_norm": 1.2008686065673828, "learning_rate": 7.110934730238372e-07, "loss": 0.2915, "step": 39745 }, { "epoch": 0.879330750030417, "grad_norm": 1.1538784503936768, "learning_rate": 7.098070883494879e-07, "loss": 0.4057, "step": 39750 }, { "epoch": 0.8794413576719303, "grad_norm": 1.895290732383728, "learning_rate": 7.085218254630344e-07, "loss": 0.4105, "step": 39755 }, { "epoch": 0.8795519653134436, "grad_norm": 0.5539196729660034, "learning_rate": 7.072376845196738e-07, "loss": 0.3315, "step": 39760 }, { "epoch": 0.8796625729549569, "grad_norm": 1.1394997835159302, "learning_rate": 7.059546656744598e-07, "loss": 0.2574, "step": 39765 }, { "epoch": 0.8797731805964701, "grad_norm": 0.8985661864280701, "learning_rate": 7.046727690823207e-07, "loss": 0.3319, "step": 39770 }, { "epoch": 0.8798837882379834, "grad_norm": 0.5382373929023743, "learning_rate": 7.033919948980472e-07, "loss": 0.2943, "step": 39775 }, { "epoch": 0.8799943958794967, "grad_norm": 1.0985107421875, "learning_rate": 7.021123432762855e-07, "loss": 0.357, "step": 39780 }, { "epoch": 0.88010500352101, "grad_norm": 1.0468401908874512, "learning_rate": 7.008338143715609e-07, "loss": 0.3734, "step": 39785 }, { "epoch": 0.8802156111625232, "grad_norm": 0.9031158089637756, "learning_rate": 6.9955640833825e-07, "loss": 0.4425, "step": 39790 }, { "epoch": 0.8803262188040364, "grad_norm": 1.0979381799697876, "learning_rate": 6.982801253305993e-07, "loss": 0.2918, "step": 39795 }, { "epoch": 0.8804368264455497, "grad_norm": 1.3322322368621826, "learning_rate": 6.970049655027245e-07, "loss": 0.2995, "step": 39800 }, { "epoch": 0.8805474340870629, "grad_norm": 0.8656720519065857, "learning_rate": 6.957309290085934e-07, "loss": 0.212, "step": 39805 }, { "epoch": 0.8806580417285762, "grad_norm": 1.7731038331985474, "learning_rate": 6.94458016002052e-07, "loss": 0.2652, "step": 39810 }, { "epoch": 0.8807686493700895, "grad_norm": 1.0240869522094727, "learning_rate": 6.931862266368028e-07, "loss": 0.3568, "step": 39815 }, { "epoch": 0.8808792570116027, "grad_norm": 1.7001978158950806, "learning_rate": 6.919155610664097e-07, "loss": 0.3167, "step": 39820 }, { "epoch": 0.880989864653116, "grad_norm": 1.8053256273269653, "learning_rate": 6.906460194443098e-07, "loss": 0.3312, "step": 39825 }, { "epoch": 0.8811004722946293, "grad_norm": 1.57156503200531, "learning_rate": 6.893776019237996e-07, "loss": 0.347, "step": 39830 }, { "epoch": 0.8812110799361426, "grad_norm": 0.27761098742485046, "learning_rate": 6.881103086580365e-07, "loss": 0.148, "step": 39835 }, { "epoch": 0.8813216875776558, "grad_norm": 1.0044652223587036, "learning_rate": 6.868441398000491e-07, "loss": 0.2869, "step": 39840 }, { "epoch": 0.881432295219169, "grad_norm": 0.566563606262207, "learning_rate": 6.855790955027275e-07, "loss": 0.3756, "step": 39845 }, { "epoch": 0.8815429028606823, "grad_norm": 1.3388036489486694, "learning_rate": 6.843151759188204e-07, "loss": 0.3454, "step": 39850 }, { "epoch": 0.8816535105021955, "grad_norm": 1.760892629623413, "learning_rate": 6.830523812009515e-07, "loss": 0.3131, "step": 39855 }, { "epoch": 0.8817641181437088, "grad_norm": 2.1367578506469727, "learning_rate": 6.817907115016009e-07, "loss": 0.3416, "step": 39860 }, { "epoch": 0.8818747257852221, "grad_norm": 1.511818766593933, "learning_rate": 6.805301669731124e-07, "loss": 0.44, "step": 39865 }, { "epoch": 0.8819853334267354, "grad_norm": 1.1879099607467651, "learning_rate": 6.792707477676996e-07, "loss": 0.2942, "step": 39870 }, { "epoch": 0.8820959410682486, "grad_norm": 0.9627208113670349, "learning_rate": 6.780124540374366e-07, "loss": 0.3087, "step": 39875 }, { "epoch": 0.8822065487097619, "grad_norm": 0.7827447056770325, "learning_rate": 6.767552859342585e-07, "loss": 0.3294, "step": 39880 }, { "epoch": 0.8823171563512752, "grad_norm": 0.9110891819000244, "learning_rate": 6.754992436099739e-07, "loss": 0.33, "step": 39885 }, { "epoch": 0.8824277639927883, "grad_norm": 0.5823552012443542, "learning_rate": 6.742443272162447e-07, "loss": 0.2303, "step": 39890 }, { "epoch": 0.8825383716343016, "grad_norm": 1.171862006187439, "learning_rate": 6.729905369046008e-07, "loss": 0.3181, "step": 39895 }, { "epoch": 0.8826489792758149, "grad_norm": 1.6898449659347534, "learning_rate": 6.717378728264434e-07, "loss": 0.3049, "step": 39900 }, { "epoch": 0.8827595869173281, "grad_norm": 0.5729948878288269, "learning_rate": 6.704863351330227e-07, "loss": 0.3226, "step": 39905 }, { "epoch": 0.8828701945588414, "grad_norm": 0.6433860063552856, "learning_rate": 6.692359239754686e-07, "loss": 0.3159, "step": 39910 }, { "epoch": 0.8829808022003547, "grad_norm": 1.0464450120925903, "learning_rate": 6.679866395047674e-07, "loss": 0.3206, "step": 39915 }, { "epoch": 0.883091409841868, "grad_norm": 1.200269103050232, "learning_rate": 6.667384818717637e-07, "loss": 0.3805, "step": 39920 }, { "epoch": 0.8832020174833812, "grad_norm": 1.1961876153945923, "learning_rate": 6.654914512271782e-07, "loss": 0.2348, "step": 39925 }, { "epoch": 0.8833126251248945, "grad_norm": 1.0710513591766357, "learning_rate": 6.642455477215881e-07, "loss": 0.4051, "step": 39930 }, { "epoch": 0.8834232327664078, "grad_norm": 1.3011460304260254, "learning_rate": 6.630007715054321e-07, "loss": 0.3443, "step": 39935 }, { "epoch": 0.8835338404079209, "grad_norm": 1.1104806661605835, "learning_rate": 6.61757122729022e-07, "loss": 0.324, "step": 39940 }, { "epoch": 0.8836444480494342, "grad_norm": 2.9456355571746826, "learning_rate": 6.605146015425268e-07, "loss": 0.2801, "step": 39945 }, { "epoch": 0.8837550556909475, "grad_norm": 1.1410109996795654, "learning_rate": 6.592732080959762e-07, "loss": 0.3154, "step": 39950 }, { "epoch": 0.8838656633324607, "grad_norm": 1.0137135982513428, "learning_rate": 6.580329425392729e-07, "loss": 0.254, "step": 39955 }, { "epoch": 0.883976270973974, "grad_norm": 0.8237361311912537, "learning_rate": 6.567938050221778e-07, "loss": 0.2907, "step": 39960 }, { "epoch": 0.8840868786154873, "grad_norm": 1.3920223712921143, "learning_rate": 6.555557956943126e-07, "loss": 0.3337, "step": 39965 }, { "epoch": 0.8841974862570006, "grad_norm": 1.0315864086151123, "learning_rate": 6.543189147051709e-07, "loss": 0.3312, "step": 39970 }, { "epoch": 0.8843080938985138, "grad_norm": 0.8827542066574097, "learning_rate": 6.530831622041067e-07, "loss": 0.3599, "step": 39975 }, { "epoch": 0.8844187015400271, "grad_norm": 1.4212298393249512, "learning_rate": 6.518485383403306e-07, "loss": 0.3795, "step": 39980 }, { "epoch": 0.8845293091815403, "grad_norm": 1.161789894104004, "learning_rate": 6.506150432629277e-07, "loss": 0.3526, "step": 39985 }, { "epoch": 0.8846399168230535, "grad_norm": 1.008864164352417, "learning_rate": 6.493826771208434e-07, "loss": 0.3637, "step": 39990 }, { "epoch": 0.8847505244645668, "grad_norm": 1.6560568809509277, "learning_rate": 6.481514400628786e-07, "loss": 0.3868, "step": 39995 }, { "epoch": 0.8848611321060801, "grad_norm": 1.4850927591323853, "learning_rate": 6.469213322377144e-07, "loss": 0.3408, "step": 40000 }, { "epoch": 0.8849717397475934, "grad_norm": 0.9478441476821899, "learning_rate": 6.456923537938786e-07, "loss": 0.2615, "step": 40005 }, { "epoch": 0.8850823473891066, "grad_norm": 2.1644277572631836, "learning_rate": 6.444645048797704e-07, "loss": 0.2527, "step": 40010 }, { "epoch": 0.8851929550306199, "grad_norm": 1.2429271936416626, "learning_rate": 6.432377856436578e-07, "loss": 0.3364, "step": 40015 }, { "epoch": 0.8853035626721332, "grad_norm": 1.1581560373306274, "learning_rate": 6.420121962336601e-07, "loss": 0.2226, "step": 40020 }, { "epoch": 0.8854141703136464, "grad_norm": 0.9503978490829468, "learning_rate": 6.40787736797771e-07, "loss": 0.3159, "step": 40025 }, { "epoch": 0.8855247779551597, "grad_norm": 1.1022083759307861, "learning_rate": 6.395644074838436e-07, "loss": 0.3112, "step": 40030 }, { "epoch": 0.8856353855966729, "grad_norm": 0.8118749856948853, "learning_rate": 6.383422084395907e-07, "loss": 0.2962, "step": 40035 }, { "epoch": 0.8857459932381861, "grad_norm": 0.8988710045814514, "learning_rate": 6.371211398125965e-07, "loss": 0.2431, "step": 40040 }, { "epoch": 0.8858566008796994, "grad_norm": 1.0500386953353882, "learning_rate": 6.359012017503042e-07, "loss": 0.3023, "step": 40045 }, { "epoch": 0.8859672085212127, "grad_norm": 1.8624902963638306, "learning_rate": 6.34682394400018e-07, "loss": 0.4695, "step": 40050 }, { "epoch": 0.886077816162726, "grad_norm": 1.0411882400512695, "learning_rate": 6.334647179089115e-07, "loss": 0.4764, "step": 40055 }, { "epoch": 0.8861884238042392, "grad_norm": 0.9493129849433899, "learning_rate": 6.322481724240193e-07, "loss": 0.3, "step": 40060 }, { "epoch": 0.8862990314457525, "grad_norm": 1.3561615943908691, "learning_rate": 6.310327580922337e-07, "loss": 0.3691, "step": 40065 }, { "epoch": 0.8864096390872658, "grad_norm": 0.48792076110839844, "learning_rate": 6.29818475060322e-07, "loss": 0.3428, "step": 40070 }, { "epoch": 0.886520246728779, "grad_norm": 1.7414613962173462, "learning_rate": 6.286053234749057e-07, "loss": 0.3557, "step": 40075 }, { "epoch": 0.8866308543702922, "grad_norm": 0.6420961618423462, "learning_rate": 6.273933034824709e-07, "loss": 0.2517, "step": 40080 }, { "epoch": 0.8867414620118055, "grad_norm": 0.5702119469642639, "learning_rate": 6.261824152293705e-07, "loss": 0.1958, "step": 40085 }, { "epoch": 0.8868520696533188, "grad_norm": 1.4588733911514282, "learning_rate": 6.249726588618211e-07, "loss": 0.2747, "step": 40090 }, { "epoch": 0.886962677294832, "grad_norm": 0.5482408404350281, "learning_rate": 6.237640345258944e-07, "loss": 0.3942, "step": 40095 }, { "epoch": 0.8870732849363453, "grad_norm": 0.7881300449371338, "learning_rate": 6.225565423675384e-07, "loss": 0.268, "step": 40100 }, { "epoch": 0.8871838925778586, "grad_norm": 1.2172139883041382, "learning_rate": 6.213501825325529e-07, "loss": 0.3537, "step": 40105 }, { "epoch": 0.8872945002193718, "grad_norm": 0.6232389211654663, "learning_rate": 6.201449551666039e-07, "loss": 0.2489, "step": 40110 }, { "epoch": 0.8874051078608851, "grad_norm": 0.9894432425498962, "learning_rate": 6.18940860415228e-07, "loss": 0.4152, "step": 40115 }, { "epoch": 0.8875157155023984, "grad_norm": 1.4305028915405273, "learning_rate": 6.177378984238136e-07, "loss": 0.3805, "step": 40120 }, { "epoch": 0.8876263231439117, "grad_norm": 0.9777807593345642, "learning_rate": 6.16536069337621e-07, "loss": 0.2959, "step": 40125 }, { "epoch": 0.8877369307854248, "grad_norm": 1.0331412553787231, "learning_rate": 6.153353733017731e-07, "loss": 0.3752, "step": 40130 }, { "epoch": 0.8878475384269381, "grad_norm": 0.664314866065979, "learning_rate": 6.141358104612472e-07, "loss": 0.1823, "step": 40135 }, { "epoch": 0.8879581460684514, "grad_norm": 1.371509075164795, "learning_rate": 6.12937380960894e-07, "loss": 0.2245, "step": 40140 }, { "epoch": 0.8880687537099646, "grad_norm": 0.9676991105079651, "learning_rate": 6.117400849454258e-07, "loss": 0.4087, "step": 40145 }, { "epoch": 0.8881793613514779, "grad_norm": 0.7925074100494385, "learning_rate": 6.105439225594101e-07, "loss": 0.2248, "step": 40150 }, { "epoch": 0.8882899689929912, "grad_norm": 1.314848780632019, "learning_rate": 6.093488939472869e-07, "loss": 0.3878, "step": 40155 }, { "epoch": 0.8884005766345044, "grad_norm": 1.1034443378448486, "learning_rate": 6.081549992533553e-07, "loss": 0.3843, "step": 40160 }, { "epoch": 0.8885111842760177, "grad_norm": 0.9291820526123047, "learning_rate": 6.069622386217755e-07, "loss": 0.2407, "step": 40165 }, { "epoch": 0.888621791917531, "grad_norm": 1.4089624881744385, "learning_rate": 6.057706121965756e-07, "loss": 0.2653, "step": 40170 }, { "epoch": 0.8887323995590442, "grad_norm": 0.8917296528816223, "learning_rate": 6.04580120121645e-07, "loss": 0.391, "step": 40175 }, { "epoch": 0.8888430072005574, "grad_norm": 0.7463674545288086, "learning_rate": 6.033907625407298e-07, "loss": 0.2257, "step": 40180 }, { "epoch": 0.8889536148420707, "grad_norm": 1.3917757272720337, "learning_rate": 6.022025395974495e-07, "loss": 0.3163, "step": 40185 }, { "epoch": 0.889064222483584, "grad_norm": 3.5881102085113525, "learning_rate": 6.010154514352817e-07, "loss": 0.3213, "step": 40190 }, { "epoch": 0.8891748301250972, "grad_norm": 1.2788074016571045, "learning_rate": 5.998294981975627e-07, "loss": 0.4454, "step": 40195 }, { "epoch": 0.8892854377666105, "grad_norm": 0.8682320713996887, "learning_rate": 5.986446800275004e-07, "loss": 0.3887, "step": 40200 }, { "epoch": 0.8893960454081238, "grad_norm": 0.802413821220398, "learning_rate": 5.9746099706816e-07, "loss": 0.3277, "step": 40205 }, { "epoch": 0.889506653049637, "grad_norm": 2.1557865142822266, "learning_rate": 5.962784494624685e-07, "loss": 0.3268, "step": 40210 }, { "epoch": 0.8896172606911503, "grad_norm": 1.549560546875, "learning_rate": 5.950970373532228e-07, "loss": 0.4451, "step": 40215 }, { "epoch": 0.8897278683326636, "grad_norm": 1.082761287689209, "learning_rate": 5.939167608830731e-07, "loss": 0.396, "step": 40220 }, { "epoch": 0.8898384759741768, "grad_norm": 1.0680309534072876, "learning_rate": 5.92737620194539e-07, "loss": 0.2809, "step": 40225 }, { "epoch": 0.88994908361569, "grad_norm": 0.998909056186676, "learning_rate": 5.915596154300052e-07, "loss": 0.3121, "step": 40230 }, { "epoch": 0.8900596912572033, "grad_norm": 0.9858130812644958, "learning_rate": 5.90382746731708e-07, "loss": 0.3721, "step": 40235 }, { "epoch": 0.8901702988987166, "grad_norm": 2.488961935043335, "learning_rate": 5.892070142417605e-07, "loss": 0.3132, "step": 40240 }, { "epoch": 0.8902809065402298, "grad_norm": 2.642911911010742, "learning_rate": 5.880324181021313e-07, "loss": 0.2288, "step": 40245 }, { "epoch": 0.8903915141817431, "grad_norm": 1.221541166305542, "learning_rate": 5.86858958454648e-07, "loss": 0.2024, "step": 40250 }, { "epoch": 0.8905021218232564, "grad_norm": 1.4554550647735596, "learning_rate": 5.856866354410084e-07, "loss": 0.3437, "step": 40255 }, { "epoch": 0.8906127294647697, "grad_norm": 0.7799614667892456, "learning_rate": 5.845154492027727e-07, "loss": 0.2279, "step": 40260 }, { "epoch": 0.8907233371062829, "grad_norm": 1.103982925415039, "learning_rate": 5.833453998813566e-07, "loss": 0.3503, "step": 40265 }, { "epoch": 0.8908339447477961, "grad_norm": 0.5861274003982544, "learning_rate": 5.821764876180446e-07, "loss": 0.2639, "step": 40270 }, { "epoch": 0.8909445523893094, "grad_norm": 1.350035548210144, "learning_rate": 5.810087125539865e-07, "loss": 0.392, "step": 40275 }, { "epoch": 0.8910551600308226, "grad_norm": 1.1586081981658936, "learning_rate": 5.798420748301837e-07, "loss": 0.4089, "step": 40280 }, { "epoch": 0.8911657676723359, "grad_norm": 1.3202296495437622, "learning_rate": 5.786765745875122e-07, "loss": 0.3277, "step": 40285 }, { "epoch": 0.8912763753138492, "grad_norm": 0.3965986967086792, "learning_rate": 5.775122119667064e-07, "loss": 0.3472, "step": 40290 }, { "epoch": 0.8913869829553625, "grad_norm": 0.9386733174324036, "learning_rate": 5.763489871083583e-07, "loss": 0.316, "step": 40295 }, { "epoch": 0.8914975905968757, "grad_norm": 1.4905273914337158, "learning_rate": 5.751869001529309e-07, "loss": 0.3051, "step": 40300 }, { "epoch": 0.891608198238389, "grad_norm": 1.534773588180542, "learning_rate": 5.740259512407465e-07, "loss": 0.2444, "step": 40305 }, { "epoch": 0.8917188058799023, "grad_norm": 0.8949975371360779, "learning_rate": 5.728661405119829e-07, "loss": 0.3004, "step": 40310 }, { "epoch": 0.8918294135214155, "grad_norm": 1.0913419723510742, "learning_rate": 5.717074681066958e-07, "loss": 0.2563, "step": 40315 }, { "epoch": 0.8919400211629287, "grad_norm": 0.5333966612815857, "learning_rate": 5.70549934164788e-07, "loss": 0.1994, "step": 40320 }, { "epoch": 0.892050628804442, "grad_norm": 2.2671079635620117, "learning_rate": 5.693935388260319e-07, "loss": 0.2687, "step": 40325 }, { "epoch": 0.8921612364459552, "grad_norm": 1.1376771926879883, "learning_rate": 5.682382822300669e-07, "loss": 0.2514, "step": 40330 }, { "epoch": 0.8922718440874685, "grad_norm": 1.063331127166748, "learning_rate": 5.670841645163849e-07, "loss": 0.4199, "step": 40335 }, { "epoch": 0.8923824517289818, "grad_norm": 1.493370771408081, "learning_rate": 5.659311858243443e-07, "loss": 0.3196, "step": 40340 }, { "epoch": 0.8924930593704951, "grad_norm": 1.3032011985778809, "learning_rate": 5.647793462931739e-07, "loss": 0.2865, "step": 40345 }, { "epoch": 0.8926036670120083, "grad_norm": 0.8139416575431824, "learning_rate": 5.636286460619489e-07, "loss": 0.1527, "step": 40350 }, { "epoch": 0.8927142746535216, "grad_norm": 0.7795614004135132, "learning_rate": 5.624790852696227e-07, "loss": 0.3388, "step": 40355 }, { "epoch": 0.8928248822950349, "grad_norm": 1.42714262008667, "learning_rate": 5.613306640550043e-07, "loss": 0.194, "step": 40360 }, { "epoch": 0.892935489936548, "grad_norm": 0.23209692537784576, "learning_rate": 5.601833825567593e-07, "loss": 0.1521, "step": 40365 }, { "epoch": 0.8930460975780613, "grad_norm": 1.000994086265564, "learning_rate": 5.59037240913427e-07, "loss": 0.3883, "step": 40370 }, { "epoch": 0.8931567052195746, "grad_norm": 0.9473848342895508, "learning_rate": 5.578922392634034e-07, "loss": 0.2715, "step": 40375 }, { "epoch": 0.8932673128610878, "grad_norm": 1.4005675315856934, "learning_rate": 5.567483777449434e-07, "loss": 0.2123, "step": 40380 }, { "epoch": 0.8933779205026011, "grad_norm": 1.0071494579315186, "learning_rate": 5.55605656496172e-07, "loss": 0.3582, "step": 40385 }, { "epoch": 0.8934885281441144, "grad_norm": 1.5425307750701904, "learning_rate": 5.544640756550712e-07, "loss": 0.3229, "step": 40390 }, { "epoch": 0.8935991357856277, "grad_norm": 1.1729066371917725, "learning_rate": 5.53323635359484e-07, "loss": 0.3811, "step": 40395 }, { "epoch": 0.8937097434271409, "grad_norm": 1.8725221157073975, "learning_rate": 5.521843357471213e-07, "loss": 0.4146, "step": 40400 }, { "epoch": 0.8938203510686542, "grad_norm": 1.6933741569519043, "learning_rate": 5.510461769555531e-07, "loss": 0.3311, "step": 40405 }, { "epoch": 0.8939309587101675, "grad_norm": 0.5554590821266174, "learning_rate": 5.499091591222083e-07, "loss": 0.2197, "step": 40410 }, { "epoch": 0.8940415663516806, "grad_norm": 1.0557066202163696, "learning_rate": 5.48773282384385e-07, "loss": 0.348, "step": 40415 }, { "epoch": 0.8941521739931939, "grad_norm": 1.7758808135986328, "learning_rate": 5.476385468792411e-07, "loss": 0.2809, "step": 40420 }, { "epoch": 0.8942627816347072, "grad_norm": 1.0905500650405884, "learning_rate": 5.465049527437893e-07, "loss": 0.2776, "step": 40425 }, { "epoch": 0.8943733892762205, "grad_norm": 0.826238751411438, "learning_rate": 5.453725001149191e-07, "loss": 0.2748, "step": 40430 }, { "epoch": 0.8944839969177337, "grad_norm": 1.02289617061615, "learning_rate": 5.442411891293675e-07, "loss": 0.3744, "step": 40435 }, { "epoch": 0.894594604559247, "grad_norm": 1.0395439863204956, "learning_rate": 5.431110199237399e-07, "loss": 0.4615, "step": 40440 }, { "epoch": 0.8947052122007603, "grad_norm": 1.0560542345046997, "learning_rate": 5.419819926345094e-07, "loss": 0.3166, "step": 40445 }, { "epoch": 0.8948158198422735, "grad_norm": 0.9921424388885498, "learning_rate": 5.408541073979979e-07, "loss": 0.2923, "step": 40450 }, { "epoch": 0.8949264274837868, "grad_norm": 1.7800642251968384, "learning_rate": 5.397273643504041e-07, "loss": 0.5343, "step": 40455 }, { "epoch": 0.8950370351253, "grad_norm": 0.2786145508289337, "learning_rate": 5.386017636277808e-07, "loss": 0.2159, "step": 40460 }, { "epoch": 0.8951476427668132, "grad_norm": 0.734350323677063, "learning_rate": 5.374773053660387e-07, "loss": 0.3906, "step": 40465 }, { "epoch": 0.8952582504083265, "grad_norm": 1.1201461553573608, "learning_rate": 5.363539897009617e-07, "loss": 0.3848, "step": 40470 }, { "epoch": 0.8953688580498398, "grad_norm": 1.0040712356567383, "learning_rate": 5.352318167681891e-07, "loss": 0.4127, "step": 40475 }, { "epoch": 0.8954794656913531, "grad_norm": 0.8641895055770874, "learning_rate": 5.34110786703218e-07, "loss": 0.2988, "step": 40480 }, { "epoch": 0.8955900733328663, "grad_norm": 1.269666075706482, "learning_rate": 5.32990899641418e-07, "loss": 0.2759, "step": 40485 }, { "epoch": 0.8957006809743796, "grad_norm": 1.029882550239563, "learning_rate": 5.318721557180128e-07, "loss": 0.4428, "step": 40490 }, { "epoch": 0.8958112886158929, "grad_norm": 1.5001236200332642, "learning_rate": 5.307545550680892e-07, "loss": 0.4655, "step": 40495 }, { "epoch": 0.8959218962574061, "grad_norm": 1.5104331970214844, "learning_rate": 5.296380978265991e-07, "loss": 0.2849, "step": 40500 }, { "epoch": 0.8960325038989193, "grad_norm": 0.7213101387023926, "learning_rate": 5.285227841283557e-07, "loss": 0.2414, "step": 40505 }, { "epoch": 0.8961431115404326, "grad_norm": 0.47500476241111755, "learning_rate": 5.27408614108027e-07, "loss": 0.2331, "step": 40510 }, { "epoch": 0.8962537191819459, "grad_norm": 2.9414665699005127, "learning_rate": 5.262955879001552e-07, "loss": 0.3988, "step": 40515 }, { "epoch": 0.8963643268234591, "grad_norm": 0.8100789785385132, "learning_rate": 5.251837056391363e-07, "loss": 0.344, "step": 40520 }, { "epoch": 0.8964749344649724, "grad_norm": 1.262945294380188, "learning_rate": 5.240729674592249e-07, "loss": 0.3553, "step": 40525 }, { "epoch": 0.8965855421064857, "grad_norm": 0.9181321263313293, "learning_rate": 5.229633734945494e-07, "loss": 0.3588, "step": 40530 }, { "epoch": 0.8966961497479989, "grad_norm": 1.3864487409591675, "learning_rate": 5.218549238790882e-07, "loss": 0.2244, "step": 40535 }, { "epoch": 0.8968067573895122, "grad_norm": 0.7194846868515015, "learning_rate": 5.207476187466854e-07, "loss": 0.3691, "step": 40540 }, { "epoch": 0.8969173650310255, "grad_norm": 1.3652904033660889, "learning_rate": 5.196414582310527e-07, "loss": 0.2769, "step": 40545 }, { "epoch": 0.8970279726725388, "grad_norm": 1.004054307937622, "learning_rate": 5.185364424657557e-07, "loss": 0.2938, "step": 40550 }, { "epoch": 0.8971385803140519, "grad_norm": 0.9995962977409363, "learning_rate": 5.174325715842221e-07, "loss": 0.3454, "step": 40555 }, { "epoch": 0.8972491879555652, "grad_norm": 1.7079159021377563, "learning_rate": 5.163298457197508e-07, "loss": 0.1722, "step": 40560 }, { "epoch": 0.8973597955970785, "grad_norm": 1.8418707847595215, "learning_rate": 5.152282650054874e-07, "loss": 0.4131, "step": 40565 }, { "epoch": 0.8974704032385917, "grad_norm": 0.5440486669540405, "learning_rate": 5.141278295744534e-07, "loss": 0.2855, "step": 40570 }, { "epoch": 0.897581010880105, "grad_norm": 1.064266562461853, "learning_rate": 5.130285395595258e-07, "loss": 0.3131, "step": 40575 }, { "epoch": 0.8976916185216183, "grad_norm": 1.0195866823196411, "learning_rate": 5.119303950934395e-07, "loss": 0.3753, "step": 40580 }, { "epoch": 0.8978022261631315, "grad_norm": 0.843159556388855, "learning_rate": 5.108333963087986e-07, "loss": 0.346, "step": 40585 }, { "epoch": 0.8979128338046448, "grad_norm": 0.9021183252334595, "learning_rate": 5.097375433380647e-07, "loss": 0.4808, "step": 40590 }, { "epoch": 0.8980234414461581, "grad_norm": 1.5977652072906494, "learning_rate": 5.086428363135598e-07, "loss": 0.4447, "step": 40595 }, { "epoch": 0.8981340490876712, "grad_norm": 0.9294751882553101, "learning_rate": 5.075492753674715e-07, "loss": 0.3811, "step": 40600 }, { "epoch": 0.8982446567291845, "grad_norm": 0.6877821683883667, "learning_rate": 5.064568606318477e-07, "loss": 0.2443, "step": 40605 }, { "epoch": 0.8983552643706978, "grad_norm": 1.2505638599395752, "learning_rate": 5.053655922385936e-07, "loss": 0.2557, "step": 40610 }, { "epoch": 0.8984658720122111, "grad_norm": 1.2283107042312622, "learning_rate": 5.04275470319483e-07, "loss": 0.4214, "step": 40615 }, { "epoch": 0.8985764796537243, "grad_norm": 0.5865704417228699, "learning_rate": 5.031864950061472e-07, "loss": 0.2646, "step": 40620 }, { "epoch": 0.8986870872952376, "grad_norm": 1.892011284828186, "learning_rate": 5.020986664300764e-07, "loss": 0.3626, "step": 40625 }, { "epoch": 0.8987976949367509, "grad_norm": 1.3636449575424194, "learning_rate": 5.010119847226303e-07, "loss": 0.3402, "step": 40630 }, { "epoch": 0.8989083025782642, "grad_norm": 0.5442315340042114, "learning_rate": 4.999264500150236e-07, "loss": 0.2177, "step": 40635 }, { "epoch": 0.8990189102197774, "grad_norm": 0.9121378064155579, "learning_rate": 4.988420624383306e-07, "loss": 0.3025, "step": 40640 }, { "epoch": 0.8991295178612907, "grad_norm": 1.4009236097335815, "learning_rate": 4.977588221234975e-07, "loss": 0.2239, "step": 40645 }, { "epoch": 0.8992401255028039, "grad_norm": 1.157902479171753, "learning_rate": 4.966767292013208e-07, "loss": 0.2913, "step": 40650 }, { "epoch": 0.8993507331443171, "grad_norm": 0.9633356332778931, "learning_rate": 4.955957838024617e-07, "loss": 0.4851, "step": 40655 }, { "epoch": 0.8994613407858304, "grad_norm": 1.1563513278961182, "learning_rate": 4.945159860574489e-07, "loss": 0.4165, "step": 40660 }, { "epoch": 0.8995719484273437, "grad_norm": 0.9254968762397766, "learning_rate": 4.934373360966627e-07, "loss": 0.3355, "step": 40665 }, { "epoch": 0.8996825560688569, "grad_norm": 0.6330854296684265, "learning_rate": 4.923598340503521e-07, "loss": 0.2048, "step": 40670 }, { "epoch": 0.8997931637103702, "grad_norm": 1.0753458738327026, "learning_rate": 4.912834800486277e-07, "loss": 0.1519, "step": 40675 }, { "epoch": 0.8999037713518835, "grad_norm": 0.5646929740905762, "learning_rate": 4.90208274221452e-07, "loss": 0.181, "step": 40680 }, { "epoch": 0.9000143789933968, "grad_norm": 0.8511889576911926, "learning_rate": 4.891342166986623e-07, "loss": 0.2189, "step": 40685 }, { "epoch": 0.90012498663491, "grad_norm": 1.2157007455825806, "learning_rate": 4.880613076099495e-07, "loss": 0.4251, "step": 40690 }, { "epoch": 0.9002355942764232, "grad_norm": 1.4947352409362793, "learning_rate": 4.869895470848629e-07, "loss": 0.4287, "step": 40695 }, { "epoch": 0.9003462019179365, "grad_norm": 0.8627070784568787, "learning_rate": 4.859189352528215e-07, "loss": 0.3729, "step": 40700 }, { "epoch": 0.9004568095594497, "grad_norm": 1.5162097215652466, "learning_rate": 4.848494722431018e-07, "loss": 0.3233, "step": 40705 }, { "epoch": 0.900567417200963, "grad_norm": 1.0236682891845703, "learning_rate": 4.837811581848373e-07, "loss": 0.3323, "step": 40710 }, { "epoch": 0.9006780248424763, "grad_norm": 1.950176477432251, "learning_rate": 4.82713993207029e-07, "loss": 0.2732, "step": 40715 }, { "epoch": 0.9007886324839895, "grad_norm": 0.9728613495826721, "learning_rate": 4.816479774385374e-07, "loss": 0.3193, "step": 40720 }, { "epoch": 0.9008992401255028, "grad_norm": 0.8949021100997925, "learning_rate": 4.805831110080805e-07, "loss": 0.3675, "step": 40725 }, { "epoch": 0.9010098477670161, "grad_norm": 0.6584017276763916, "learning_rate": 4.795193940442444e-07, "loss": 0.1746, "step": 40730 }, { "epoch": 0.9011204554085294, "grad_norm": 0.5628211498260498, "learning_rate": 4.784568266754719e-07, "loss": 0.3697, "step": 40735 }, { "epoch": 0.9012310630500426, "grad_norm": 0.6416778564453125, "learning_rate": 4.773954090300637e-07, "loss": 0.3221, "step": 40740 }, { "epoch": 0.9013416706915558, "grad_norm": 0.9956336617469788, "learning_rate": 4.7633514123619164e-07, "loss": 0.1811, "step": 40745 }, { "epoch": 0.9014522783330691, "grad_norm": 1.160827398300171, "learning_rate": 4.7527602342187787e-07, "loss": 0.4528, "step": 40750 }, { "epoch": 0.9015628859745823, "grad_norm": 2.0285556316375732, "learning_rate": 4.742180557150111e-07, "loss": 0.378, "step": 40755 }, { "epoch": 0.9016734936160956, "grad_norm": 0.9404196739196777, "learning_rate": 4.731612382433448e-07, "loss": 0.3829, "step": 40760 }, { "epoch": 0.9017841012576089, "grad_norm": 0.3608545958995819, "learning_rate": 4.7210557113448574e-07, "loss": 0.2774, "step": 40765 }, { "epoch": 0.9018947088991222, "grad_norm": 0.11878509074449539, "learning_rate": 4.7105105451590414e-07, "loss": 0.2141, "step": 40770 }, { "epoch": 0.9020053165406354, "grad_norm": 0.6461073160171509, "learning_rate": 4.69997688514936e-07, "loss": 0.2519, "step": 40775 }, { "epoch": 0.9021159241821487, "grad_norm": 1.688609004020691, "learning_rate": 4.689454732587717e-07, "loss": 0.3486, "step": 40780 }, { "epoch": 0.902226531823662, "grad_norm": 1.1838536262512207, "learning_rate": 4.6789440887446966e-07, "loss": 0.2041, "step": 40785 }, { "epoch": 0.9023371394651751, "grad_norm": 0.9491080045700073, "learning_rate": 4.6684449548894396e-07, "loss": 0.1894, "step": 40790 }, { "epoch": 0.9024477471066884, "grad_norm": 0.4973229467868805, "learning_rate": 4.6579573322896755e-07, "loss": 0.2347, "step": 40795 }, { "epoch": 0.9025583547482017, "grad_norm": 1.5199848413467407, "learning_rate": 4.647481222211836e-07, "loss": 0.421, "step": 40800 }, { "epoch": 0.902668962389715, "grad_norm": 0.8741992115974426, "learning_rate": 4.637016625920909e-07, "loss": 0.3913, "step": 40805 }, { "epoch": 0.9027795700312282, "grad_norm": 1.4643455743789673, "learning_rate": 4.6265635446804293e-07, "loss": 0.3316, "step": 40810 }, { "epoch": 0.9028901776727415, "grad_norm": 0.8364869356155396, "learning_rate": 4.6161219797526637e-07, "loss": 0.3321, "step": 40815 }, { "epoch": 0.9030007853142548, "grad_norm": 1.6926298141479492, "learning_rate": 4.605691932398415e-07, "loss": 0.4926, "step": 40820 }, { "epoch": 0.903111392955768, "grad_norm": 0.5823477506637573, "learning_rate": 4.5952734038770765e-07, "loss": 0.2834, "step": 40825 }, { "epoch": 0.9032220005972813, "grad_norm": 0.83785480260849, "learning_rate": 4.584866395446719e-07, "loss": 0.4148, "step": 40830 }, { "epoch": 0.9033326082387946, "grad_norm": 0.681821346282959, "learning_rate": 4.5744709083639817e-07, "loss": 0.2092, "step": 40835 }, { "epoch": 0.9034432158803077, "grad_norm": 0.8987097144126892, "learning_rate": 4.564086943884083e-07, "loss": 0.2942, "step": 40840 }, { "epoch": 0.903553823521821, "grad_norm": 0.6557687520980835, "learning_rate": 4.5537145032609196e-07, "loss": 0.4137, "step": 40845 }, { "epoch": 0.9036644311633343, "grad_norm": 0.7199277281761169, "learning_rate": 4.5433535877469684e-07, "loss": 0.1983, "step": 40850 }, { "epoch": 0.9037750388048476, "grad_norm": 1.08755362033844, "learning_rate": 4.5330041985932604e-07, "loss": 0.3391, "step": 40855 }, { "epoch": 0.9038856464463608, "grad_norm": 1.0273336172103882, "learning_rate": 4.5226663370495193e-07, "loss": 0.1897, "step": 40860 }, { "epoch": 0.9039962540878741, "grad_norm": 1.2052626609802246, "learning_rate": 4.5123400043640355e-07, "loss": 0.1994, "step": 40865 }, { "epoch": 0.9041068617293874, "grad_norm": 0.6935741305351257, "learning_rate": 4.5020252017836664e-07, "loss": 0.2801, "step": 40870 }, { "epoch": 0.9042174693709006, "grad_norm": 0.9037285447120667, "learning_rate": 4.491721930554005e-07, "loss": 0.4345, "step": 40875 }, { "epoch": 0.9043280770124139, "grad_norm": 1.059486746788025, "learning_rate": 4.4814301919190895e-07, "loss": 0.3449, "step": 40880 }, { "epoch": 0.9044386846539271, "grad_norm": 1.478493571281433, "learning_rate": 4.47114998712167e-07, "loss": 0.4094, "step": 40885 }, { "epoch": 0.9045492922954403, "grad_norm": 1.0637768507003784, "learning_rate": 4.4608813174030986e-07, "loss": 0.3482, "step": 40890 }, { "epoch": 0.9046598999369536, "grad_norm": 1.203440546989441, "learning_rate": 4.450624184003283e-07, "loss": 0.3458, "step": 40895 }, { "epoch": 0.9047705075784669, "grad_norm": 1.0103988647460938, "learning_rate": 4.440378588160799e-07, "loss": 0.3452, "step": 40900 }, { "epoch": 0.9048811152199802, "grad_norm": 0.6111621856689453, "learning_rate": 4.43014453111279e-07, "loss": 0.235, "step": 40905 }, { "epoch": 0.9049917228614934, "grad_norm": 0.7597635984420776, "learning_rate": 4.4199220140949797e-07, "loss": 0.2778, "step": 40910 }, { "epoch": 0.9051023305030067, "grad_norm": 1.00594961643219, "learning_rate": 4.4097110383417794e-07, "loss": 0.3516, "step": 40915 }, { "epoch": 0.90521293814452, "grad_norm": 1.0197616815567017, "learning_rate": 4.3995116050861596e-07, "loss": 0.3648, "step": 40920 }, { "epoch": 0.9053235457860332, "grad_norm": 0.7247281670570374, "learning_rate": 4.389323715559657e-07, "loss": 0.3206, "step": 40925 }, { "epoch": 0.9054341534275465, "grad_norm": 1.482049822807312, "learning_rate": 4.3791473709924983e-07, "loss": 0.3383, "step": 40930 }, { "epoch": 0.9055447610690597, "grad_norm": 1.2649691104888916, "learning_rate": 4.368982572613467e-07, "loss": 0.1942, "step": 40935 }, { "epoch": 0.905655368710573, "grad_norm": 1.2427544593811035, "learning_rate": 4.3588293216499265e-07, "loss": 0.3387, "step": 40940 }, { "epoch": 0.9057659763520862, "grad_norm": 0.7675336599349976, "learning_rate": 4.3486876193279184e-07, "loss": 0.3766, "step": 40945 }, { "epoch": 0.9058765839935995, "grad_norm": 0.7832142114639282, "learning_rate": 4.33855746687204e-07, "loss": 0.2502, "step": 40950 }, { "epoch": 0.9059871916351128, "grad_norm": 0.9720451831817627, "learning_rate": 4.32843886550548e-07, "loss": 0.3068, "step": 40955 }, { "epoch": 0.906097799276626, "grad_norm": 1.558855414390564, "learning_rate": 4.3183318164501053e-07, "loss": 0.3579, "step": 40960 }, { "epoch": 0.9062084069181393, "grad_norm": 0.9162998199462891, "learning_rate": 4.3082363209262843e-07, "loss": 0.2327, "step": 40965 }, { "epoch": 0.9063190145596526, "grad_norm": 0.8499830365180969, "learning_rate": 4.2981523801530647e-07, "loss": 0.426, "step": 40970 }, { "epoch": 0.9064296222011659, "grad_norm": 1.0382695198059082, "learning_rate": 4.2880799953481044e-07, "loss": 0.377, "step": 40975 }, { "epoch": 0.906540229842679, "grad_norm": 1.4652233123779297, "learning_rate": 4.2780191677276205e-07, "loss": 0.3291, "step": 40980 }, { "epoch": 0.9066508374841923, "grad_norm": 0.5886197090148926, "learning_rate": 4.26796989850643e-07, "loss": 0.2591, "step": 40985 }, { "epoch": 0.9067614451257056, "grad_norm": 1.082015037536621, "learning_rate": 4.2579321888980285e-07, "loss": 0.3398, "step": 40990 }, { "epoch": 0.9068720527672188, "grad_norm": 1.441747784614563, "learning_rate": 4.2479060401144246e-07, "loss": 0.3203, "step": 40995 }, { "epoch": 0.9069826604087321, "grad_norm": 1.4751254320144653, "learning_rate": 4.237891453366294e-07, "loss": 0.4053, "step": 41000 }, { "epoch": 0.9070932680502454, "grad_norm": 0.8976525664329529, "learning_rate": 4.227888429862903e-07, "loss": 0.3008, "step": 41005 }, { "epoch": 0.9072038756917586, "grad_norm": 0.8505257368087769, "learning_rate": 4.217896970812085e-07, "loss": 0.3399, "step": 41010 }, { "epoch": 0.9073144833332719, "grad_norm": 1.582745909690857, "learning_rate": 4.2079170774203315e-07, "loss": 0.3199, "step": 41015 }, { "epoch": 0.9074250909747852, "grad_norm": 1.562455177307129, "learning_rate": 4.1979487508927106e-07, "loss": 0.4754, "step": 41020 }, { "epoch": 0.9075356986162985, "grad_norm": 0.8445956707000732, "learning_rate": 4.1879919924328604e-07, "loss": 0.2265, "step": 41025 }, { "epoch": 0.9076463062578116, "grad_norm": 1.1734281778335571, "learning_rate": 4.1780468032430857e-07, "loss": 0.3254, "step": 41030 }, { "epoch": 0.9077569138993249, "grad_norm": 1.1330463886260986, "learning_rate": 4.1681131845242915e-07, "loss": 0.2264, "step": 41035 }, { "epoch": 0.9078675215408382, "grad_norm": 1.9123331308364868, "learning_rate": 4.1581911374758867e-07, "loss": 0.4338, "step": 41040 }, { "epoch": 0.9079781291823514, "grad_norm": 1.1462239027023315, "learning_rate": 4.1482806632960227e-07, "loss": 0.3179, "step": 41045 }, { "epoch": 0.9080887368238647, "grad_norm": 0.7147899270057678, "learning_rate": 4.138381763181365e-07, "loss": 0.4771, "step": 41050 }, { "epoch": 0.908199344465378, "grad_norm": 1.423324704170227, "learning_rate": 4.1284944383271797e-07, "loss": 0.2702, "step": 41055 }, { "epoch": 0.9083099521068912, "grad_norm": 1.3891764879226685, "learning_rate": 4.1186186899273896e-07, "loss": 0.3201, "step": 41060 }, { "epoch": 0.9084205597484045, "grad_norm": 2.927061080932617, "learning_rate": 4.108754519174485e-07, "loss": 0.3648, "step": 41065 }, { "epoch": 0.9085311673899178, "grad_norm": 1.2873934507369995, "learning_rate": 4.0989019272595243e-07, "loss": 0.3966, "step": 41070 }, { "epoch": 0.908641775031431, "grad_norm": 1.2186603546142578, "learning_rate": 4.089060915372256e-07, "loss": 0.3518, "step": 41075 }, { "epoch": 0.9087523826729442, "grad_norm": 1.1836564540863037, "learning_rate": 4.079231484700952e-07, "loss": 0.3518, "step": 41080 }, { "epoch": 0.9088629903144575, "grad_norm": 1.3523433208465576, "learning_rate": 4.0694136364324956e-07, "loss": 0.3178, "step": 41085 }, { "epoch": 0.9089735979559708, "grad_norm": 1.0266845226287842, "learning_rate": 4.0596073717524385e-07, "loss": 0.3158, "step": 41090 }, { "epoch": 0.909084205597484, "grad_norm": 2.04609751701355, "learning_rate": 4.0498126918448453e-07, "loss": 0.3852, "step": 41095 }, { "epoch": 0.9091948132389973, "grad_norm": 0.6671724319458008, "learning_rate": 4.0400295978924143e-07, "loss": 0.3141, "step": 41100 }, { "epoch": 0.9093054208805106, "grad_norm": 1.0980671644210815, "learning_rate": 4.0302580910764997e-07, "loss": 0.3721, "step": 41105 }, { "epoch": 0.9094160285220239, "grad_norm": 0.9052223563194275, "learning_rate": 4.0204981725769363e-07, "loss": 0.2806, "step": 41110 }, { "epoch": 0.9095266361635371, "grad_norm": 2.664506196975708, "learning_rate": 4.010749843572304e-07, "loss": 0.2679, "step": 41115 }, { "epoch": 0.9096372438050504, "grad_norm": 0.9375569820404053, "learning_rate": 4.0010131052396704e-07, "loss": 0.213, "step": 41120 }, { "epoch": 0.9097478514465636, "grad_norm": 1.282954454421997, "learning_rate": 3.9912879587547417e-07, "loss": 0.3979, "step": 41125 }, { "epoch": 0.9098584590880768, "grad_norm": 1.0520490407943726, "learning_rate": 3.9815744052918324e-07, "loss": 0.2404, "step": 41130 }, { "epoch": 0.9099690667295901, "grad_norm": 1.0060948133468628, "learning_rate": 3.971872446023872e-07, "loss": 0.3981, "step": 41135 }, { "epoch": 0.9100796743711034, "grad_norm": 0.6616767644882202, "learning_rate": 3.9621820821223347e-07, "loss": 0.2569, "step": 41140 }, { "epoch": 0.9101902820126166, "grad_norm": 1.2681711912155151, "learning_rate": 3.95250331475735e-07, "loss": 0.3545, "step": 41145 }, { "epoch": 0.9103008896541299, "grad_norm": 0.8053475618362427, "learning_rate": 3.9428361450976284e-07, "loss": 0.2956, "step": 41150 }, { "epoch": 0.9104114972956432, "grad_norm": 0.9081950783729553, "learning_rate": 3.9331805743104577e-07, "loss": 0.2732, "step": 41155 }, { "epoch": 0.9105221049371565, "grad_norm": 0.9261406660079956, "learning_rate": 3.9235366035617617e-07, "loss": 0.3907, "step": 41160 }, { "epoch": 0.9106327125786697, "grad_norm": 1.2307953834533691, "learning_rate": 3.913904234016053e-07, "loss": 0.2511, "step": 41165 }, { "epoch": 0.9107433202201829, "grad_norm": 2.4570066928863525, "learning_rate": 3.9042834668364116e-07, "loss": 0.1577, "step": 41170 }, { "epoch": 0.9108539278616962, "grad_norm": 0.7512305974960327, "learning_rate": 3.8946743031845755e-07, "loss": 0.2434, "step": 41175 }, { "epoch": 0.9109645355032094, "grad_norm": 0.8808436989784241, "learning_rate": 3.885076744220817e-07, "loss": 0.2937, "step": 41180 }, { "epoch": 0.9110751431447227, "grad_norm": 1.18820321559906, "learning_rate": 3.875490791104042e-07, "loss": 0.4099, "step": 41185 }, { "epoch": 0.911185750786236, "grad_norm": 0.7828856706619263, "learning_rate": 3.8659164449917796e-07, "loss": 0.3325, "step": 41190 }, { "epoch": 0.9112963584277493, "grad_norm": 0.7504787445068359, "learning_rate": 3.856353707040106e-07, "loss": 0.3397, "step": 41195 }, { "epoch": 0.9114069660692625, "grad_norm": 0.9390162229537964, "learning_rate": 3.846802578403708e-07, "loss": 0.3993, "step": 41200 }, { "epoch": 0.9115175737107758, "grad_norm": 1.6038442850112915, "learning_rate": 3.8372630602359183e-07, "loss": 0.4143, "step": 41205 }, { "epoch": 0.9116281813522891, "grad_norm": 1.4378738403320312, "learning_rate": 3.827735153688594e-07, "loss": 0.4266, "step": 41210 }, { "epoch": 0.9117387889938023, "grad_norm": 0.934909999370575, "learning_rate": 3.818218859912248e-07, "loss": 0.3226, "step": 41215 }, { "epoch": 0.9118493966353155, "grad_norm": 1.101787805557251, "learning_rate": 3.8087141800559725e-07, "loss": 0.2834, "step": 41220 }, { "epoch": 0.9119600042768288, "grad_norm": 1.0792362689971924, "learning_rate": 3.7992211152674264e-07, "loss": 0.434, "step": 41225 }, { "epoch": 0.912070611918342, "grad_norm": 1.788314700126648, "learning_rate": 3.7897396666929266e-07, "loss": 0.2992, "step": 41230 }, { "epoch": 0.9121812195598553, "grad_norm": 0.700412929058075, "learning_rate": 3.780269835477346e-07, "loss": 0.228, "step": 41235 }, { "epoch": 0.9122918272013686, "grad_norm": 0.7771845459938049, "learning_rate": 3.7708116227641477e-07, "loss": 0.3904, "step": 41240 }, { "epoch": 0.9124024348428819, "grad_norm": 1.198654294013977, "learning_rate": 3.7613650296954297e-07, "loss": 0.2618, "step": 41245 }, { "epoch": 0.9125130424843951, "grad_norm": 0.8339768648147583, "learning_rate": 3.7519300574118565e-07, "loss": 0.2697, "step": 41250 }, { "epoch": 0.9126236501259084, "grad_norm": 1.381704330444336, "learning_rate": 3.7425067070526843e-07, "loss": 0.3678, "step": 41255 }, { "epoch": 0.9127342577674217, "grad_norm": 0.584433376789093, "learning_rate": 3.7330949797558023e-07, "loss": 0.2707, "step": 41260 }, { "epoch": 0.9128448654089348, "grad_norm": 1.2281696796417236, "learning_rate": 3.72369487665768e-07, "loss": 0.501, "step": 41265 }, { "epoch": 0.9129554730504481, "grad_norm": 1.296688199043274, "learning_rate": 3.7143063988933306e-07, "loss": 0.3655, "step": 41270 }, { "epoch": 0.9130660806919614, "grad_norm": 2.0109269618988037, "learning_rate": 3.704929547596459e-07, "loss": 0.3024, "step": 41275 }, { "epoch": 0.9131766883334747, "grad_norm": 1.236590027809143, "learning_rate": 3.6955643238993036e-07, "loss": 0.3882, "step": 41280 }, { "epoch": 0.9132872959749879, "grad_norm": 0.7802353501319885, "learning_rate": 3.686210728932682e-07, "loss": 0.2106, "step": 41285 }, { "epoch": 0.9133979036165012, "grad_norm": 0.48881375789642334, "learning_rate": 3.6768687638260915e-07, "loss": 0.2248, "step": 41290 }, { "epoch": 0.9135085112580145, "grad_norm": 0.7981859445571899, "learning_rate": 3.667538429707529e-07, "loss": 0.2537, "step": 41295 }, { "epoch": 0.9136191188995277, "grad_norm": 1.5287368297576904, "learning_rate": 3.658219727703627e-07, "loss": 0.3198, "step": 41300 }, { "epoch": 0.913729726541041, "grad_norm": 0.9699603915214539, "learning_rate": 3.6489126589396627e-07, "loss": 0.2646, "step": 41305 }, { "epoch": 0.9138403341825542, "grad_norm": 1.086338996887207, "learning_rate": 3.6396172245394156e-07, "loss": 0.3577, "step": 41310 }, { "epoch": 0.9139509418240674, "grad_norm": 1.1512901782989502, "learning_rate": 3.63033342562531e-07, "loss": 0.3892, "step": 41315 }, { "epoch": 0.9140615494655807, "grad_norm": 0.9237080216407776, "learning_rate": 3.621061263318404e-07, "loss": 0.2338, "step": 41320 }, { "epoch": 0.914172157107094, "grad_norm": 0.7518344521522522, "learning_rate": 3.611800738738247e-07, "loss": 0.3072, "step": 41325 }, { "epoch": 0.9142827647486073, "grad_norm": 1.0537610054016113, "learning_rate": 3.602551853003089e-07, "loss": 0.4441, "step": 41330 }, { "epoch": 0.9143933723901205, "grad_norm": 0.9070621132850647, "learning_rate": 3.593314607229736e-07, "loss": 0.212, "step": 41335 }, { "epoch": 0.9145039800316338, "grad_norm": 1.1837893724441528, "learning_rate": 3.584089002533531e-07, "loss": 0.435, "step": 41340 }, { "epoch": 0.9146145876731471, "grad_norm": 2.1229376792907715, "learning_rate": 3.5748750400285027e-07, "loss": 0.3234, "step": 41345 }, { "epoch": 0.9147251953146603, "grad_norm": 1.1959320306777954, "learning_rate": 3.5656727208272513e-07, "loss": 0.4156, "step": 41350 }, { "epoch": 0.9148358029561736, "grad_norm": 1.3288501501083374, "learning_rate": 3.556482046040888e-07, "loss": 0.3061, "step": 41355 }, { "epoch": 0.9149464105976868, "grad_norm": 0.8830294013023376, "learning_rate": 3.5473030167792465e-07, "loss": 0.3514, "step": 41360 }, { "epoch": 0.9150570182392, "grad_norm": 0.8574225902557373, "learning_rate": 3.5381356341506856e-07, "loss": 0.2398, "step": 41365 }, { "epoch": 0.9151676258807133, "grad_norm": 1.0382907390594482, "learning_rate": 3.5289798992621195e-07, "loss": 0.2684, "step": 41370 }, { "epoch": 0.9152782335222266, "grad_norm": 0.8326656222343445, "learning_rate": 3.519835813219152e-07, "loss": 0.5033, "step": 41375 }, { "epoch": 0.9153888411637399, "grad_norm": 1.1679272651672363, "learning_rate": 3.510703377125901e-07, "loss": 0.3957, "step": 41380 }, { "epoch": 0.9154994488052531, "grad_norm": 1.2001878023147583, "learning_rate": 3.5015825920850954e-07, "loss": 0.3313, "step": 41385 }, { "epoch": 0.9156100564467664, "grad_norm": 1.0890902280807495, "learning_rate": 3.492473459198109e-07, "loss": 0.274, "step": 41390 }, { "epoch": 0.9157206640882797, "grad_norm": 0.9509785175323486, "learning_rate": 3.483375979564829e-07, "loss": 0.3971, "step": 41395 }, { "epoch": 0.915831271729793, "grad_norm": 1.4872878789901733, "learning_rate": 3.474290154283766e-07, "loss": 0.1849, "step": 41400 }, { "epoch": 0.9159418793713061, "grad_norm": 1.0189108848571777, "learning_rate": 3.465215984452075e-07, "loss": 0.2355, "step": 41405 }, { "epoch": 0.9160524870128194, "grad_norm": 1.6178746223449707, "learning_rate": 3.456153471165413e-07, "loss": 0.3548, "step": 41410 }, { "epoch": 0.9161630946543327, "grad_norm": 1.2115981578826904, "learning_rate": 3.447102615518094e-07, "loss": 0.3506, "step": 41415 }, { "epoch": 0.9162737022958459, "grad_norm": 1.1854350566864014, "learning_rate": 3.438063418603033e-07, "loss": 0.2324, "step": 41420 }, { "epoch": 0.9163843099373592, "grad_norm": 0.679132878780365, "learning_rate": 3.4290358815116666e-07, "loss": 0.3544, "step": 41425 }, { "epoch": 0.9164949175788725, "grad_norm": 0.8062170147895813, "learning_rate": 3.4200200053340795e-07, "loss": 0.4406, "step": 41430 }, { "epoch": 0.9166055252203857, "grad_norm": 0.421016126871109, "learning_rate": 3.4110157911589556e-07, "loss": 0.2561, "step": 41435 }, { "epoch": 0.916716132861899, "grad_norm": 1.3531185388565063, "learning_rate": 3.4020232400735156e-07, "loss": 0.3807, "step": 41440 }, { "epoch": 0.9168267405034123, "grad_norm": 1.4382437467575073, "learning_rate": 3.3930423531636446e-07, "loss": 0.3581, "step": 41445 }, { "epoch": 0.9169373481449256, "grad_norm": 1.0524142980575562, "learning_rate": 3.3840731315137767e-07, "loss": 0.2788, "step": 41450 }, { "epoch": 0.9170479557864387, "grad_norm": 1.023908019065857, "learning_rate": 3.375115576206911e-07, "loss": 0.3709, "step": 41455 }, { "epoch": 0.917158563427952, "grad_norm": 1.0291461944580078, "learning_rate": 3.366169688324694e-07, "loss": 0.3091, "step": 41460 }, { "epoch": 0.9172691710694653, "grad_norm": 0.8258234858512878, "learning_rate": 3.357235468947351e-07, "loss": 0.2468, "step": 41465 }, { "epoch": 0.9173797787109785, "grad_norm": 1.6350270509719849, "learning_rate": 3.348312919153651e-07, "loss": 0.3358, "step": 41470 }, { "epoch": 0.9174903863524918, "grad_norm": 1.0912561416625977, "learning_rate": 3.3394020400210115e-07, "loss": 0.3983, "step": 41475 }, { "epoch": 0.9176009939940051, "grad_norm": 0.9220173954963684, "learning_rate": 3.3305028326254374e-07, "loss": 0.4083, "step": 41480 }, { "epoch": 0.9177116016355183, "grad_norm": 1.1265654563903809, "learning_rate": 3.3216152980414583e-07, "loss": 0.4266, "step": 41485 }, { "epoch": 0.9178222092770316, "grad_norm": 1.367092251777649, "learning_rate": 3.3127394373422715e-07, "loss": 0.3104, "step": 41490 }, { "epoch": 0.9179328169185449, "grad_norm": 2.064814567565918, "learning_rate": 3.303875251599642e-07, "loss": 0.3175, "step": 41495 }, { "epoch": 0.918043424560058, "grad_norm": 1.4814496040344238, "learning_rate": 3.29502274188388e-07, "loss": 0.2091, "step": 41500 }, { "epoch": 0.9181540322015713, "grad_norm": 1.1437684297561646, "learning_rate": 3.286181909263975e-07, "loss": 0.4404, "step": 41505 }, { "epoch": 0.9182646398430846, "grad_norm": 0.8768019676208496, "learning_rate": 3.277352754807417e-07, "loss": 0.1694, "step": 41510 }, { "epoch": 0.9183752474845979, "grad_norm": 0.4044598937034607, "learning_rate": 3.2685352795803204e-07, "loss": 0.2468, "step": 41515 }, { "epoch": 0.9184858551261111, "grad_norm": 1.2845650911331177, "learning_rate": 3.2597294846474226e-07, "loss": 0.4749, "step": 41520 }, { "epoch": 0.9185964627676244, "grad_norm": 1.1585150957107544, "learning_rate": 3.250935371072006e-07, "loss": 0.3586, "step": 41525 }, { "epoch": 0.9187070704091377, "grad_norm": 1.1158885955810547, "learning_rate": 3.2421529399159325e-07, "loss": 0.2674, "step": 41530 }, { "epoch": 0.918817678050651, "grad_norm": 1.3046917915344238, "learning_rate": 3.233382192239731e-07, "loss": 0.3055, "step": 41535 }, { "epoch": 0.9189282856921642, "grad_norm": 1.1150754690170288, "learning_rate": 3.224623129102411e-07, "loss": 0.2743, "step": 41540 }, { "epoch": 0.9190388933336775, "grad_norm": 1.6532889604568481, "learning_rate": 3.21587575156167e-07, "loss": 0.2887, "step": 41545 }, { "epoch": 0.9191495009751907, "grad_norm": 1.157412052154541, "learning_rate": 3.2071400606737304e-07, "loss": 0.2993, "step": 41550 }, { "epoch": 0.9192601086167039, "grad_norm": 1.0275039672851562, "learning_rate": 3.1984160574934144e-07, "loss": 0.3071, "step": 41555 }, { "epoch": 0.9193707162582172, "grad_norm": 1.2201685905456543, "learning_rate": 3.1897037430741683e-07, "loss": 0.4876, "step": 41560 }, { "epoch": 0.9194813238997305, "grad_norm": 0.8914898633956909, "learning_rate": 3.181003118467996e-07, "loss": 0.4207, "step": 41565 }, { "epoch": 0.9195919315412437, "grad_norm": 0.5917794108390808, "learning_rate": 3.172314184725456e-07, "loss": 0.204, "step": 41570 }, { "epoch": 0.919702539182757, "grad_norm": 0.6983105540275574, "learning_rate": 3.163636942895787e-07, "loss": 0.2699, "step": 41575 }, { "epoch": 0.9198131468242703, "grad_norm": 1.2462643384933472, "learning_rate": 3.154971394026751e-07, "loss": 0.4303, "step": 41580 }, { "epoch": 0.9199237544657836, "grad_norm": 1.1455514430999756, "learning_rate": 3.1463175391646785e-07, "loss": 0.2923, "step": 41585 }, { "epoch": 0.9200343621072968, "grad_norm": 1.3111141920089722, "learning_rate": 3.1376753793545657e-07, "loss": 0.3249, "step": 41590 }, { "epoch": 0.92014496974881, "grad_norm": 0.7447020411491394, "learning_rate": 3.1290449156399227e-07, "loss": 0.369, "step": 41595 }, { "epoch": 0.9202555773903233, "grad_norm": 1.247045874595642, "learning_rate": 3.1204261490628606e-07, "loss": 0.2792, "step": 41600 }, { "epoch": 0.9203661850318365, "grad_norm": 2.307407855987549, "learning_rate": 3.111819080664136e-07, "loss": 0.2761, "step": 41605 }, { "epoch": 0.9204767926733498, "grad_norm": 0.8077203035354614, "learning_rate": 3.1032237114830167e-07, "loss": 0.2874, "step": 41610 }, { "epoch": 0.9205874003148631, "grad_norm": 0.809786319732666, "learning_rate": 3.094640042557395e-07, "loss": 0.5301, "step": 41615 }, { "epoch": 0.9206980079563764, "grad_norm": 0.7333770394325256, "learning_rate": 3.0860680749237646e-07, "loss": 0.299, "step": 41620 }, { "epoch": 0.9208086155978896, "grad_norm": 1.4131258726119995, "learning_rate": 3.077507809617164e-07, "loss": 0.4161, "step": 41625 }, { "epoch": 0.9209192232394029, "grad_norm": 1.2033125162124634, "learning_rate": 3.068959247671244e-07, "loss": 0.3726, "step": 41630 }, { "epoch": 0.9210298308809162, "grad_norm": 0.6247822642326355, "learning_rate": 3.0604223901182784e-07, "loss": 0.3847, "step": 41635 }, { "epoch": 0.9211404385224294, "grad_norm": 1.1465866565704346, "learning_rate": 3.051897237989043e-07, "loss": 0.2618, "step": 41640 }, { "epoch": 0.9212510461639426, "grad_norm": 1.2746096849441528, "learning_rate": 3.0433837923129593e-07, "loss": 0.4578, "step": 41645 }, { "epoch": 0.9213616538054559, "grad_norm": 0.4193029999732971, "learning_rate": 3.034882054118049e-07, "loss": 0.2696, "step": 41650 }, { "epoch": 0.9214722614469691, "grad_norm": 0.25959452986717224, "learning_rate": 3.0263920244308577e-07, "loss": 0.0964, "step": 41655 }, { "epoch": 0.9215828690884824, "grad_norm": 1.266236662864685, "learning_rate": 3.017913704276576e-07, "loss": 0.2575, "step": 41660 }, { "epoch": 0.9216934767299957, "grad_norm": 1.054120659828186, "learning_rate": 3.009447094678963e-07, "loss": 0.3535, "step": 41665 }, { "epoch": 0.921804084371509, "grad_norm": 0.9322411417961121, "learning_rate": 3.000992196660335e-07, "loss": 0.391, "step": 41670 }, { "epoch": 0.9219146920130222, "grad_norm": 1.1354581117630005, "learning_rate": 2.992549011241641e-07, "loss": 0.6312, "step": 41675 }, { "epoch": 0.9220252996545355, "grad_norm": 1.1191949844360352, "learning_rate": 2.984117539442399e-07, "loss": 0.4408, "step": 41680 }, { "epoch": 0.9221359072960488, "grad_norm": 1.2480303049087524, "learning_rate": 2.975697782280673e-07, "loss": 0.3975, "step": 41685 }, { "epoch": 0.9222465149375619, "grad_norm": 1.2095210552215576, "learning_rate": 2.9672897407731714e-07, "loss": 0.3048, "step": 41690 }, { "epoch": 0.9223571225790752, "grad_norm": 0.7688949704170227, "learning_rate": 2.9588934159351713e-07, "loss": 0.333, "step": 41695 }, { "epoch": 0.9224677302205885, "grad_norm": 1.3592431545257568, "learning_rate": 2.9505088087804944e-07, "loss": 0.4181, "step": 41700 }, { "epoch": 0.9225783378621017, "grad_norm": 0.8039155006408691, "learning_rate": 2.942135920321598e-07, "loss": 0.2754, "step": 41705 }, { "epoch": 0.922688945503615, "grad_norm": 1.137308120727539, "learning_rate": 2.933774751569529e-07, "loss": 0.2346, "step": 41710 }, { "epoch": 0.9227995531451283, "grad_norm": 1.022757887840271, "learning_rate": 2.9254253035338354e-07, "loss": 0.3664, "step": 41715 }, { "epoch": 0.9229101607866416, "grad_norm": 1.0720776319503784, "learning_rate": 2.917087577222777e-07, "loss": 0.3321, "step": 41720 }, { "epoch": 0.9230207684281548, "grad_norm": 1.008287787437439, "learning_rate": 2.908761573643093e-07, "loss": 0.3382, "step": 41725 }, { "epoch": 0.9231313760696681, "grad_norm": 0.6085343360900879, "learning_rate": 2.9004472938001347e-07, "loss": 0.2886, "step": 41730 }, { "epoch": 0.9232419837111814, "grad_norm": 0.9351215362548828, "learning_rate": 2.8921447386979e-07, "loss": 0.5414, "step": 41735 }, { "epoch": 0.9233525913526945, "grad_norm": 0.7143120765686035, "learning_rate": 2.883853909338874e-07, "loss": 0.3429, "step": 41740 }, { "epoch": 0.9234631989942078, "grad_norm": 0.692288875579834, "learning_rate": 2.875574806724168e-07, "loss": 0.318, "step": 41745 }, { "epoch": 0.9235738066357211, "grad_norm": 0.7367690205574036, "learning_rate": 2.8673074318535275e-07, "loss": 0.3094, "step": 41750 }, { "epoch": 0.9236844142772344, "grad_norm": 1.2362786531448364, "learning_rate": 2.8590517857251865e-07, "loss": 0.3008, "step": 41755 }, { "epoch": 0.9237950219187476, "grad_norm": 0.816981852054596, "learning_rate": 2.8508078693360363e-07, "loss": 0.374, "step": 41760 }, { "epoch": 0.9239056295602609, "grad_norm": 1.0829453468322754, "learning_rate": 2.8425756836815477e-07, "loss": 0.3692, "step": 41765 }, { "epoch": 0.9240162372017742, "grad_norm": 3.751542806625366, "learning_rate": 2.834355229755692e-07, "loss": 0.3115, "step": 41770 }, { "epoch": 0.9241268448432874, "grad_norm": 1.5391433238983154, "learning_rate": 2.826146508551142e-07, "loss": 0.4782, "step": 41775 }, { "epoch": 0.9242374524848007, "grad_norm": 1.1489992141723633, "learning_rate": 2.8179495210590935e-07, "loss": 0.2852, "step": 41780 }, { "epoch": 0.9243480601263139, "grad_norm": 0.9262776970863342, "learning_rate": 2.809764268269299e-07, "loss": 0.3522, "step": 41785 }, { "epoch": 0.9244586677678271, "grad_norm": 1.522536277770996, "learning_rate": 2.8015907511701466e-07, "loss": 0.3781, "step": 41790 }, { "epoch": 0.9245692754093404, "grad_norm": 0.7602736353874207, "learning_rate": 2.793428970748602e-07, "loss": 0.2751, "step": 41795 }, { "epoch": 0.9246798830508537, "grad_norm": 1.315879225730896, "learning_rate": 2.7852789279901425e-07, "loss": 0.2577, "step": 41800 }, { "epoch": 0.924790490692367, "grad_norm": 0.9746664762496948, "learning_rate": 2.777140623878949e-07, "loss": 0.2872, "step": 41805 }, { "epoch": 0.9249010983338802, "grad_norm": 1.7469830513000488, "learning_rate": 2.76901405939769e-07, "loss": 0.2588, "step": 41810 }, { "epoch": 0.9250117059753935, "grad_norm": 0.6596074104309082, "learning_rate": 2.7608992355276144e-07, "loss": 0.3514, "step": 41815 }, { "epoch": 0.9251223136169068, "grad_norm": 0.9947226643562317, "learning_rate": 2.752796153248649e-07, "loss": 0.3238, "step": 41820 }, { "epoch": 0.92523292125842, "grad_norm": 0.5721462965011597, "learning_rate": 2.7447048135391895e-07, "loss": 0.3083, "step": 41825 }, { "epoch": 0.9253435288999333, "grad_norm": 0.4156166613101959, "learning_rate": 2.7366252173762765e-07, "loss": 0.4023, "step": 41830 }, { "epoch": 0.9254541365414465, "grad_norm": 1.8318527936935425, "learning_rate": 2.728557365735529e-07, "loss": 0.4175, "step": 41835 }, { "epoch": 0.9255647441829598, "grad_norm": 0.9619015455245972, "learning_rate": 2.720501259591124e-07, "loss": 0.2898, "step": 41840 }, { "epoch": 0.925675351824473, "grad_norm": 0.8466647863388062, "learning_rate": 2.7124568999158275e-07, "loss": 0.259, "step": 41845 }, { "epoch": 0.9257859594659863, "grad_norm": 1.978362798690796, "learning_rate": 2.7044242876810287e-07, "loss": 0.2359, "step": 41850 }, { "epoch": 0.9258965671074996, "grad_norm": 1.0137200355529785, "learning_rate": 2.6964034238566193e-07, "loss": 0.3756, "step": 41855 }, { "epoch": 0.9260071747490128, "grad_norm": 1.2795724868774414, "learning_rate": 2.6883943094111355e-07, "loss": 0.4509, "step": 41860 }, { "epoch": 0.9261177823905261, "grad_norm": 0.791984498500824, "learning_rate": 2.680396945311692e-07, "loss": 0.3114, "step": 41865 }, { "epoch": 0.9262283900320394, "grad_norm": 1.0069798231124878, "learning_rate": 2.6724113325239277e-07, "loss": 0.4083, "step": 41870 }, { "epoch": 0.9263389976735527, "grad_norm": 1.609898567199707, "learning_rate": 2.664437472012138e-07, "loss": 0.2781, "step": 41875 }, { "epoch": 0.9264496053150658, "grad_norm": 0.7637636661529541, "learning_rate": 2.6564753647391637e-07, "loss": 0.4682, "step": 41880 }, { "epoch": 0.9265602129565791, "grad_norm": 0.920247495174408, "learning_rate": 2.648525011666392e-07, "loss": 0.295, "step": 41885 }, { "epoch": 0.9266708205980924, "grad_norm": 1.1404829025268555, "learning_rate": 2.640586413753865e-07, "loss": 0.3015, "step": 41890 }, { "epoch": 0.9267814282396056, "grad_norm": 1.0565396547317505, "learning_rate": 2.6326595719601613e-07, "loss": 0.2682, "step": 41895 }, { "epoch": 0.9268920358811189, "grad_norm": 1.0914427042007446, "learning_rate": 2.624744487242403e-07, "loss": 0.3427, "step": 41900 }, { "epoch": 0.9270026435226322, "grad_norm": 0.891327440738678, "learning_rate": 2.6168411605563826e-07, "loss": 0.4049, "step": 41905 }, { "epoch": 0.9271132511641454, "grad_norm": 1.7160403728485107, "learning_rate": 2.608949592856425e-07, "loss": 0.3226, "step": 41910 }, { "epoch": 0.9272238588056587, "grad_norm": 1.394615888595581, "learning_rate": 2.601069785095378e-07, "loss": 0.3123, "step": 41915 }, { "epoch": 0.927334466447172, "grad_norm": 1.160380244255066, "learning_rate": 2.593201738224782e-07, "loss": 0.3392, "step": 41920 }, { "epoch": 0.9274450740886853, "grad_norm": 1.1008005142211914, "learning_rate": 2.5853454531946985e-07, "loss": 0.3219, "step": 41925 }, { "epoch": 0.9275556817301984, "grad_norm": 0.8823603391647339, "learning_rate": 2.577500930953725e-07, "loss": 0.2907, "step": 41930 }, { "epoch": 0.9276662893717117, "grad_norm": 1.0777233839035034, "learning_rate": 2.569668172449147e-07, "loss": 0.2412, "step": 41935 }, { "epoch": 0.927776897013225, "grad_norm": 1.439064383506775, "learning_rate": 2.5618471786267195e-07, "loss": 0.3534, "step": 41940 }, { "epoch": 0.9278875046547382, "grad_norm": 0.8388243317604065, "learning_rate": 2.554037950430832e-07, "loss": 0.3135, "step": 41945 }, { "epoch": 0.9279981122962515, "grad_norm": 0.5990657210350037, "learning_rate": 2.5462404888044856e-07, "loss": 0.2398, "step": 41950 }, { "epoch": 0.9281087199377648, "grad_norm": 1.1806528568267822, "learning_rate": 2.5384547946891715e-07, "loss": 0.2967, "step": 41955 }, { "epoch": 0.928219327579278, "grad_norm": 1.3757505416870117, "learning_rate": 2.530680869025026e-07, "loss": 0.4882, "step": 41960 }, { "epoch": 0.9283299352207913, "grad_norm": 2.2769339084625244, "learning_rate": 2.522918712750766e-07, "loss": 0.3287, "step": 41965 }, { "epoch": 0.9284405428623046, "grad_norm": 0.8748974204063416, "learning_rate": 2.515168326803652e-07, "loss": 0.3305, "step": 41970 }, { "epoch": 0.9285511505038178, "grad_norm": 0.8958375453948975, "learning_rate": 2.5074297121195354e-07, "loss": 0.4776, "step": 41975 }, { "epoch": 0.928661758145331, "grad_norm": 1.4591914415359497, "learning_rate": 2.4997028696328916e-07, "loss": 0.3764, "step": 41980 }, { "epoch": 0.9287723657868443, "grad_norm": 0.7269682288169861, "learning_rate": 2.4919878002766627e-07, "loss": 0.2941, "step": 41985 }, { "epoch": 0.9288829734283576, "grad_norm": 1.713003396987915, "learning_rate": 2.4842845049825035e-07, "loss": 0.3645, "step": 41990 }, { "epoch": 0.9289935810698708, "grad_norm": 1.719301700592041, "learning_rate": 2.4765929846805815e-07, "loss": 0.2848, "step": 41995 }, { "epoch": 0.9291041887113841, "grad_norm": 0.06624389439821243, "learning_rate": 2.4689132402995976e-07, "loss": 0.253, "step": 42000 }, { "epoch": 0.9292147963528974, "grad_norm": 1.255836009979248, "learning_rate": 2.461245272766921e-07, "loss": 0.5091, "step": 42005 }, { "epoch": 0.9293254039944107, "grad_norm": 1.1298929452896118, "learning_rate": 2.4535890830084453e-07, "loss": 0.4742, "step": 42010 }, { "epoch": 0.9294360116359239, "grad_norm": 0.5737537145614624, "learning_rate": 2.445944671948641e-07, "loss": 0.2733, "step": 42015 }, { "epoch": 0.9295466192774371, "grad_norm": 1.4376351833343506, "learning_rate": 2.438312040510582e-07, "loss": 0.3143, "step": 42020 }, { "epoch": 0.9296572269189504, "grad_norm": 0.6091480851173401, "learning_rate": 2.430691189615919e-07, "loss": 0.3377, "step": 42025 }, { "epoch": 0.9297678345604636, "grad_norm": 1.307261347770691, "learning_rate": 2.4230821201848165e-07, "loss": 0.3009, "step": 42030 }, { "epoch": 0.9298784422019769, "grad_norm": 1.2840797901153564, "learning_rate": 2.415484833136139e-07, "loss": 0.2956, "step": 42035 }, { "epoch": 0.9299890498434902, "grad_norm": 0.8622068166732788, "learning_rate": 2.4078993293871976e-07, "loss": 0.1825, "step": 42040 }, { "epoch": 0.9300996574850035, "grad_norm": 1.1710245609283447, "learning_rate": 2.4003256098539597e-07, "loss": 0.2286, "step": 42045 }, { "epoch": 0.9302102651265167, "grad_norm": 0.8481012582778931, "learning_rate": 2.392763675450971e-07, "loss": 0.3279, "step": 42050 }, { "epoch": 0.93032087276803, "grad_norm": 0.5735148191452026, "learning_rate": 2.3852135270913014e-07, "loss": 0.2117, "step": 42055 }, { "epoch": 0.9304314804095433, "grad_norm": 1.3979763984680176, "learning_rate": 2.3776751656866214e-07, "loss": 0.2951, "step": 42060 }, { "epoch": 0.9305420880510565, "grad_norm": 0.8741624355316162, "learning_rate": 2.3701485921472367e-07, "loss": 0.5207, "step": 42065 }, { "epoch": 0.9306526956925697, "grad_norm": 0.844036340713501, "learning_rate": 2.362633807381931e-07, "loss": 0.217, "step": 42070 }, { "epoch": 0.930763303334083, "grad_norm": 0.6088998317718506, "learning_rate": 2.3551308122981232e-07, "loss": 0.2685, "step": 42075 }, { "epoch": 0.9308739109755962, "grad_norm": 0.6896735429763794, "learning_rate": 2.3476396078018104e-07, "loss": 0.4942, "step": 42080 }, { "epoch": 0.9309845186171095, "grad_norm": 1.2728846073150635, "learning_rate": 2.340160194797536e-07, "loss": 0.3221, "step": 42085 }, { "epoch": 0.9310951262586228, "grad_norm": 0.8116730451583862, "learning_rate": 2.3326925741884554e-07, "loss": 0.4068, "step": 42090 }, { "epoch": 0.9312057339001361, "grad_norm": 0.5999986529350281, "learning_rate": 2.3252367468762805e-07, "loss": 0.3912, "step": 42095 }, { "epoch": 0.9313163415416493, "grad_norm": 0.9033049941062927, "learning_rate": 2.317792713761269e-07, "loss": 0.2602, "step": 42100 }, { "epoch": 0.9314269491831626, "grad_norm": 1.3335899114608765, "learning_rate": 2.3103604757423126e-07, "loss": 0.1484, "step": 42105 }, { "epoch": 0.9315375568246759, "grad_norm": 1.9190446138381958, "learning_rate": 2.3029400337168716e-07, "loss": 0.3831, "step": 42110 }, { "epoch": 0.931648164466189, "grad_norm": 0.6535742282867432, "learning_rate": 2.295531388580907e-07, "loss": 0.3844, "step": 42115 }, { "epoch": 0.9317587721077023, "grad_norm": 0.6511262655258179, "learning_rate": 2.288134541229059e-07, "loss": 0.2023, "step": 42120 }, { "epoch": 0.9318693797492156, "grad_norm": 0.725786566734314, "learning_rate": 2.2807494925544794e-07, "loss": 0.3287, "step": 42125 }, { "epoch": 0.9319799873907288, "grad_norm": 1.0109914541244507, "learning_rate": 2.2733762434488993e-07, "loss": 0.3405, "step": 42130 }, { "epoch": 0.9320905950322421, "grad_norm": 1.0172691345214844, "learning_rate": 2.26601479480264e-07, "loss": 0.2677, "step": 42135 }, { "epoch": 0.9322012026737554, "grad_norm": 0.8640355467796326, "learning_rate": 2.258665147504613e-07, "loss": 0.2935, "step": 42140 }, { "epoch": 0.9323118103152687, "grad_norm": 0.8604151010513306, "learning_rate": 2.2513273024422631e-07, "loss": 0.3757, "step": 42145 }, { "epoch": 0.9324224179567819, "grad_norm": 1.126399278640747, "learning_rate": 2.2440012605016492e-07, "loss": 0.411, "step": 42150 }, { "epoch": 0.9325330255982952, "grad_norm": 0.8829724192619324, "learning_rate": 2.2366870225673743e-07, "loss": 0.3853, "step": 42155 }, { "epoch": 0.9326436332398085, "grad_norm": 1.0930641889572144, "learning_rate": 2.229384589522632e-07, "loss": 0.3413, "step": 42160 }, { "epoch": 0.9327542408813216, "grad_norm": 0.6480528712272644, "learning_rate": 2.2220939622492056e-07, "loss": 0.3273, "step": 42165 }, { "epoch": 0.9328648485228349, "grad_norm": 0.6555204391479492, "learning_rate": 2.2148151416274243e-07, "loss": 0.468, "step": 42170 }, { "epoch": 0.9329754561643482, "grad_norm": 0.8820092082023621, "learning_rate": 2.2075481285361855e-07, "loss": 0.3088, "step": 42175 }, { "epoch": 0.9330860638058615, "grad_norm": 0.9888603687286377, "learning_rate": 2.2002929238530202e-07, "loss": 0.1725, "step": 42180 }, { "epoch": 0.9331966714473747, "grad_norm": 1.0385159254074097, "learning_rate": 2.193049528453961e-07, "loss": 0.2637, "step": 42185 }, { "epoch": 0.933307279088888, "grad_norm": 1.1678768396377563, "learning_rate": 2.1858179432136418e-07, "loss": 0.404, "step": 42190 }, { "epoch": 0.9334178867304013, "grad_norm": 1.8189702033996582, "learning_rate": 2.178598169005297e-07, "loss": 0.2537, "step": 42195 }, { "epoch": 0.9335284943719145, "grad_norm": 0.4450569450855255, "learning_rate": 2.1713902067006854e-07, "loss": 0.2972, "step": 42200 }, { "epoch": 0.9336391020134278, "grad_norm": 1.7706241607666016, "learning_rate": 2.1641940571701992e-07, "loss": 0.4276, "step": 42205 }, { "epoch": 0.933749709654941, "grad_norm": 0.88623046875, "learning_rate": 2.1570097212827545e-07, "loss": 0.3849, "step": 42210 }, { "epoch": 0.9338603172964542, "grad_norm": 1.057832956314087, "learning_rate": 2.1498371999058465e-07, "loss": 0.3833, "step": 42215 }, { "epoch": 0.9339709249379675, "grad_norm": 1.059844970703125, "learning_rate": 2.1426764939055712e-07, "loss": 0.4347, "step": 42220 }, { "epoch": 0.9340815325794808, "grad_norm": 1.3720159530639648, "learning_rate": 2.135527604146581e-07, "loss": 0.3188, "step": 42225 }, { "epoch": 0.9341921402209941, "grad_norm": 0.8966862559318542, "learning_rate": 2.128390531492075e-07, "loss": 0.197, "step": 42230 }, { "epoch": 0.9343027478625073, "grad_norm": 1.1548792123794556, "learning_rate": 2.1212652768038854e-07, "loss": 0.2787, "step": 42235 }, { "epoch": 0.9344133555040206, "grad_norm": 0.4857097268104553, "learning_rate": 2.114151840942391e-07, "loss": 0.4127, "step": 42240 }, { "epoch": 0.9345239631455339, "grad_norm": 1.2727625370025635, "learning_rate": 2.1070502247664937e-07, "loss": 0.2916, "step": 42245 }, { "epoch": 0.9346345707870471, "grad_norm": 1.3660476207733154, "learning_rate": 2.0999604291337515e-07, "loss": 0.4392, "step": 42250 }, { "epoch": 0.9347451784285604, "grad_norm": 0.5787297487258911, "learning_rate": 2.0928824549002468e-07, "loss": 0.2839, "step": 42255 }, { "epoch": 0.9348557860700736, "grad_norm": 0.6035010814666748, "learning_rate": 2.0858163029206068e-07, "loss": 0.2706, "step": 42260 }, { "epoch": 0.9349663937115869, "grad_norm": 1.2849853038787842, "learning_rate": 2.078761974048127e-07, "loss": 0.3151, "step": 42265 }, { "epoch": 0.9350770013531001, "grad_norm": 1.3950034379959106, "learning_rate": 2.071719469134581e-07, "loss": 0.2055, "step": 42270 }, { "epoch": 0.9351876089946134, "grad_norm": 0.9974755048751831, "learning_rate": 2.0646887890303334e-07, "loss": 0.3406, "step": 42275 }, { "epoch": 0.9352982166361267, "grad_norm": 0.7718987464904785, "learning_rate": 2.057669934584383e-07, "loss": 0.2696, "step": 42280 }, { "epoch": 0.9354088242776399, "grad_norm": 1.9183224439620972, "learning_rate": 2.0506629066442075e-07, "loss": 0.2921, "step": 42285 }, { "epoch": 0.9355194319191532, "grad_norm": 0.5880922675132751, "learning_rate": 2.0436677060559296e-07, "loss": 0.3289, "step": 42290 }, { "epoch": 0.9356300395606665, "grad_norm": 1.4127556085586548, "learning_rate": 2.0366843336642182e-07, "loss": 0.4047, "step": 42295 }, { "epoch": 0.9357406472021798, "grad_norm": 0.7647523880004883, "learning_rate": 2.029712790312288e-07, "loss": 0.327, "step": 42300 }, { "epoch": 0.9358512548436929, "grad_norm": 2.458760976791382, "learning_rate": 2.0227530768419878e-07, "loss": 0.4429, "step": 42305 }, { "epoch": 0.9359618624852062, "grad_norm": 1.0127238035202026, "learning_rate": 2.0158051940936894e-07, "loss": 0.4336, "step": 42310 }, { "epoch": 0.9360724701267195, "grad_norm": 0.7032179832458496, "learning_rate": 2.008869142906311e-07, "loss": 0.3292, "step": 42315 }, { "epoch": 0.9361830777682327, "grad_norm": 1.1753268241882324, "learning_rate": 2.0019449241174273e-07, "loss": 0.3443, "step": 42320 }, { "epoch": 0.936293685409746, "grad_norm": 1.3244283199310303, "learning_rate": 1.995032538563124e-07, "loss": 0.3632, "step": 42325 }, { "epoch": 0.9364042930512593, "grad_norm": 1.1082743406295776, "learning_rate": 1.9881319870780458e-07, "loss": 0.2811, "step": 42330 }, { "epoch": 0.9365149006927725, "grad_norm": 0.9696702361106873, "learning_rate": 1.9812432704954587e-07, "loss": 0.5879, "step": 42335 }, { "epoch": 0.9366255083342858, "grad_norm": 1.0760468244552612, "learning_rate": 1.9743663896471643e-07, "loss": 0.2143, "step": 42340 }, { "epoch": 0.9367361159757991, "grad_norm": 1.0076429843902588, "learning_rate": 1.9675013453635317e-07, "loss": 0.266, "step": 42345 }, { "epoch": 0.9368467236173124, "grad_norm": 1.5459024906158447, "learning_rate": 1.96064813847352e-07, "loss": 0.2983, "step": 42350 }, { "epoch": 0.9369573312588255, "grad_norm": 1.68173348903656, "learning_rate": 1.953806769804678e-07, "loss": 0.3543, "step": 42355 }, { "epoch": 0.9370679389003388, "grad_norm": 1.2151628732681274, "learning_rate": 1.9469772401830456e-07, "loss": 0.3206, "step": 42360 }, { "epoch": 0.9371785465418521, "grad_norm": 0.9274026155471802, "learning_rate": 1.9401595504333402e-07, "loss": 0.4086, "step": 42365 }, { "epoch": 0.9372891541833653, "grad_norm": 1.2116543054580688, "learning_rate": 1.9333537013787705e-07, "loss": 0.3338, "step": 42370 }, { "epoch": 0.9373997618248786, "grad_norm": 1.8300846815109253, "learning_rate": 1.9265596938411236e-07, "loss": 0.3636, "step": 42375 }, { "epoch": 0.9375103694663919, "grad_norm": 0.9087935090065002, "learning_rate": 1.9197775286408205e-07, "loss": 0.3168, "step": 42380 }, { "epoch": 0.9376209771079052, "grad_norm": 1.0558747053146362, "learning_rate": 1.9130072065967618e-07, "loss": 0.3521, "step": 42385 }, { "epoch": 0.9377315847494184, "grad_norm": 1.9720304012298584, "learning_rate": 1.906248728526461e-07, "loss": 0.2191, "step": 42390 }, { "epoch": 0.9378421923909317, "grad_norm": 0.7538480758666992, "learning_rate": 1.899502095246053e-07, "loss": 0.2849, "step": 42395 }, { "epoch": 0.9379528000324449, "grad_norm": 0.8047194480895996, "learning_rate": 1.8927673075701313e-07, "loss": 0.3378, "step": 42400 }, { "epoch": 0.9380634076739581, "grad_norm": 1.1758556365966797, "learning_rate": 1.8860443663119453e-07, "loss": 0.185, "step": 42405 }, { "epoch": 0.9381740153154714, "grad_norm": 1.3177114725112915, "learning_rate": 1.8793332722833124e-07, "loss": 0.3426, "step": 42410 }, { "epoch": 0.9382846229569847, "grad_norm": 0.5635718703269958, "learning_rate": 1.872634026294551e-07, "loss": 0.2324, "step": 42415 }, { "epoch": 0.9383952305984979, "grad_norm": 1.1571935415267944, "learning_rate": 1.8659466291546135e-07, "loss": 0.3097, "step": 42420 }, { "epoch": 0.9385058382400112, "grad_norm": 0.9519773125648499, "learning_rate": 1.8592710816710103e-07, "loss": 0.3904, "step": 42425 }, { "epoch": 0.9386164458815245, "grad_norm": 1.1142241954803467, "learning_rate": 1.852607384649785e-07, "loss": 0.2765, "step": 42430 }, { "epoch": 0.9387270535230378, "grad_norm": 1.188162922859192, "learning_rate": 1.8459555388956053e-07, "loss": 0.3039, "step": 42435 }, { "epoch": 0.938837661164551, "grad_norm": 0.8090048432350159, "learning_rate": 1.839315545211684e-07, "loss": 0.3097, "step": 42440 }, { "epoch": 0.9389482688060643, "grad_norm": 1.2699275016784668, "learning_rate": 1.832687404399758e-07, "loss": 0.3772, "step": 42445 }, { "epoch": 0.9390588764475775, "grad_norm": 0.8770390152931213, "learning_rate": 1.8260711172601975e-07, "loss": 0.2844, "step": 42450 }, { "epoch": 0.9391694840890907, "grad_norm": 0.6976553201675415, "learning_rate": 1.8194666845919418e-07, "loss": 0.1735, "step": 42455 }, { "epoch": 0.939280091730604, "grad_norm": 2.0062224864959717, "learning_rate": 1.8128741071924194e-07, "loss": 0.3121, "step": 42460 }, { "epoch": 0.9393906993721173, "grad_norm": 1.4096286296844482, "learning_rate": 1.8062933858577382e-07, "loss": 0.3836, "step": 42465 }, { "epoch": 0.9395013070136305, "grad_norm": 0.9920470118522644, "learning_rate": 1.7997245213824733e-07, "loss": 0.4052, "step": 42470 }, { "epoch": 0.9396119146551438, "grad_norm": 1.0976063013076782, "learning_rate": 1.7931675145598347e-07, "loss": 0.3686, "step": 42475 }, { "epoch": 0.9397225222966571, "grad_norm": 1.1607146263122559, "learning_rate": 1.7866223661815895e-07, "loss": 0.3082, "step": 42480 }, { "epoch": 0.9398331299381704, "grad_norm": 0.8005288243293762, "learning_rate": 1.7800890770380385e-07, "loss": 0.3663, "step": 42485 }, { "epoch": 0.9399437375796836, "grad_norm": 1.8777048587799072, "learning_rate": 1.773567647918073e-07, "loss": 0.2523, "step": 42490 }, { "epoch": 0.9400543452211968, "grad_norm": 0.7247810363769531, "learning_rate": 1.7670580796091852e-07, "loss": 0.3303, "step": 42495 }, { "epoch": 0.9401649528627101, "grad_norm": 0.9262227416038513, "learning_rate": 1.7605603728973686e-07, "loss": 0.1809, "step": 42500 }, { "epoch": 0.9402755605042233, "grad_norm": 0.7685204744338989, "learning_rate": 1.75407452856724e-07, "loss": 0.2494, "step": 42505 }, { "epoch": 0.9403861681457366, "grad_norm": 1.1286152601242065, "learning_rate": 1.747600547401962e-07, "loss": 0.472, "step": 42510 }, { "epoch": 0.9404967757872499, "grad_norm": 1.122395634651184, "learning_rate": 1.7411384301832424e-07, "loss": 0.2411, "step": 42515 }, { "epoch": 0.9406073834287632, "grad_norm": 0.7646514177322388, "learning_rate": 1.7346881776914125e-07, "loss": 0.2021, "step": 42520 }, { "epoch": 0.9407179910702764, "grad_norm": 1.35588538646698, "learning_rate": 1.7282497907053385e-07, "loss": 0.2915, "step": 42525 }, { "epoch": 0.9408285987117897, "grad_norm": 1.4748644828796387, "learning_rate": 1.7218232700024095e-07, "loss": 0.4141, "step": 42530 }, { "epoch": 0.940939206353303, "grad_norm": 0.9910474419593811, "learning_rate": 1.7154086163586714e-07, "loss": 0.3286, "step": 42535 }, { "epoch": 0.9410498139948162, "grad_norm": 1.8250707387924194, "learning_rate": 1.7090058305486823e-07, "loss": 0.3995, "step": 42540 }, { "epoch": 0.9411604216363294, "grad_norm": 0.628852903842926, "learning_rate": 1.7026149133455573e-07, "loss": 0.3417, "step": 42545 }, { "epoch": 0.9412710292778427, "grad_norm": 1.0431606769561768, "learning_rate": 1.696235865521023e-07, "loss": 0.3804, "step": 42550 }, { "epoch": 0.941381636919356, "grad_norm": 1.5185052156448364, "learning_rate": 1.6898686878453309e-07, "loss": 0.3122, "step": 42555 }, { "epoch": 0.9414922445608692, "grad_norm": 0.9460046291351318, "learning_rate": 1.6835133810873095e-07, "loss": 0.2848, "step": 42560 }, { "epoch": 0.9416028522023825, "grad_norm": 0.916386604309082, "learning_rate": 1.6771699460143786e-07, "loss": 0.2858, "step": 42565 }, { "epoch": 0.9417134598438958, "grad_norm": 1.72443425655365, "learning_rate": 1.6708383833925034e-07, "loss": 0.4374, "step": 42570 }, { "epoch": 0.941824067485409, "grad_norm": 1.0064181089401245, "learning_rate": 1.6645186939861946e-07, "loss": 0.2041, "step": 42575 }, { "epoch": 0.9419346751269223, "grad_norm": 1.0538815259933472, "learning_rate": 1.658210878558597e-07, "loss": 0.283, "step": 42580 }, { "epoch": 0.9420452827684356, "grad_norm": 1.129024624824524, "learning_rate": 1.651914937871335e-07, "loss": 0.4359, "step": 42585 }, { "epoch": 0.9421558904099487, "grad_norm": 1.2519950866699219, "learning_rate": 1.6456308726846449e-07, "loss": 0.3753, "step": 42590 }, { "epoch": 0.942266498051462, "grad_norm": 0.8991243839263916, "learning_rate": 1.639358683757364e-07, "loss": 0.3264, "step": 42595 }, { "epoch": 0.9423771056929753, "grad_norm": 1.1265114545822144, "learning_rate": 1.6330983718468197e-07, "loss": 0.2788, "step": 42600 }, { "epoch": 0.9424877133344886, "grad_norm": 2.7047109603881836, "learning_rate": 1.626849937708941e-07, "loss": 0.2479, "step": 42605 }, { "epoch": 0.9425983209760018, "grad_norm": 1.5810143947601318, "learning_rate": 1.6206133820982572e-07, "loss": 0.3278, "step": 42610 }, { "epoch": 0.9427089286175151, "grad_norm": 0.9129135608673096, "learning_rate": 1.614388705767811e-07, "loss": 0.3395, "step": 42615 }, { "epoch": 0.9428195362590284, "grad_norm": 1.1766788959503174, "learning_rate": 1.6081759094692119e-07, "loss": 0.3621, "step": 42620 }, { "epoch": 0.9429301439005416, "grad_norm": 1.03890860080719, "learning_rate": 1.601974993952693e-07, "loss": 0.4503, "step": 42625 }, { "epoch": 0.9430407515420549, "grad_norm": 1.4079698324203491, "learning_rate": 1.5957859599669667e-07, "loss": 0.3837, "step": 42630 }, { "epoch": 0.9431513591835682, "grad_norm": 1.2748618125915527, "learning_rate": 1.589608808259402e-07, "loss": 0.3668, "step": 42635 }, { "epoch": 0.9432619668250813, "grad_norm": 0.7327943444252014, "learning_rate": 1.5834435395758686e-07, "loss": 0.4058, "step": 42640 }, { "epoch": 0.9433725744665946, "grad_norm": 1.5094200372695923, "learning_rate": 1.5772901546608043e-07, "loss": 0.4099, "step": 42645 }, { "epoch": 0.9434831821081079, "grad_norm": 1.711987853050232, "learning_rate": 1.5711486542572485e-07, "loss": 0.2371, "step": 42650 }, { "epoch": 0.9435937897496212, "grad_norm": 0.6936134099960327, "learning_rate": 1.5650190391067855e-07, "loss": 0.2989, "step": 42655 }, { "epoch": 0.9437043973911344, "grad_norm": 0.6114973425865173, "learning_rate": 1.5589013099495565e-07, "loss": 0.3168, "step": 42660 }, { "epoch": 0.9438150050326477, "grad_norm": 1.2584184408187866, "learning_rate": 1.5527954675242706e-07, "loss": 0.3045, "step": 42665 }, { "epoch": 0.943925612674161, "grad_norm": 1.091772198677063, "learning_rate": 1.5467015125682384e-07, "loss": 0.2448, "step": 42670 }, { "epoch": 0.9440362203156742, "grad_norm": 0.9074851274490356, "learning_rate": 1.5406194458172487e-07, "loss": 0.1872, "step": 42675 }, { "epoch": 0.9441468279571875, "grad_norm": 0.7920113205909729, "learning_rate": 1.5345492680057694e-07, "loss": 0.3144, "step": 42680 }, { "epoch": 0.9442574355987007, "grad_norm": 1.539538025856018, "learning_rate": 1.5284909798667258e-07, "loss": 0.3583, "step": 42685 }, { "epoch": 0.944368043240214, "grad_norm": 0.9916984438896179, "learning_rate": 1.5224445821316658e-07, "loss": 0.2416, "step": 42690 }, { "epoch": 0.9444786508817272, "grad_norm": 0.9396360516548157, "learning_rate": 1.5164100755307053e-07, "loss": 0.5025, "step": 42695 }, { "epoch": 0.9445892585232405, "grad_norm": 1.1222435235977173, "learning_rate": 1.510387460792495e-07, "loss": 0.3758, "step": 42700 }, { "epoch": 0.9446998661647538, "grad_norm": 1.4034260511398315, "learning_rate": 1.5043767386442642e-07, "loss": 0.3391, "step": 42705 }, { "epoch": 0.944810473806267, "grad_norm": 2.3654096126556396, "learning_rate": 1.498377909811821e-07, "loss": 0.294, "step": 42710 }, { "epoch": 0.9449210814477803, "grad_norm": 1.125152349472046, "learning_rate": 1.4923909750194975e-07, "loss": 0.3447, "step": 42715 }, { "epoch": 0.9450316890892936, "grad_norm": 1.4256234169006348, "learning_rate": 1.4864159349902263e-07, "loss": 0.4093, "step": 42720 }, { "epoch": 0.9451422967308069, "grad_norm": 0.8718671202659607, "learning_rate": 1.480452790445508e-07, "loss": 0.3801, "step": 42725 }, { "epoch": 0.9452529043723201, "grad_norm": 0.7582515478134155, "learning_rate": 1.4745015421053665e-07, "loss": 0.2808, "step": 42730 }, { "epoch": 0.9453635120138333, "grad_norm": 1.2906490564346313, "learning_rate": 1.4685621906884163e-07, "loss": 0.2993, "step": 42735 }, { "epoch": 0.9454741196553466, "grad_norm": 0.7182313203811646, "learning_rate": 1.4626347369118499e-07, "loss": 0.3119, "step": 42740 }, { "epoch": 0.9455847272968598, "grad_norm": 1.2023261785507202, "learning_rate": 1.4567191814913728e-07, "loss": 0.3193, "step": 42745 }, { "epoch": 0.9456953349383731, "grad_norm": 1.1078674793243408, "learning_rate": 1.4508155251413247e-07, "loss": 0.2721, "step": 42750 }, { "epoch": 0.9458059425798864, "grad_norm": 1.6381984949111938, "learning_rate": 1.4449237685745466e-07, "loss": 0.3591, "step": 42755 }, { "epoch": 0.9459165502213996, "grad_norm": 1.2988231182098389, "learning_rate": 1.4390439125024692e-07, "loss": 0.3596, "step": 42760 }, { "epoch": 0.9460271578629129, "grad_norm": 1.0160706043243408, "learning_rate": 1.4331759576350912e-07, "loss": 0.2447, "step": 42765 }, { "epoch": 0.9461377655044262, "grad_norm": 1.184846043586731, "learning_rate": 1.4273199046809682e-07, "loss": 0.2435, "step": 42770 }, { "epoch": 0.9462483731459395, "grad_norm": 0.8747016191482544, "learning_rate": 1.4214757543471902e-07, "loss": 0.3613, "step": 42775 }, { "epoch": 0.9463589807874526, "grad_norm": 0.9636685848236084, "learning_rate": 1.4156435073394704e-07, "loss": 0.2789, "step": 42780 }, { "epoch": 0.9464695884289659, "grad_norm": 0.9232900142669678, "learning_rate": 1.409823164362034e-07, "loss": 0.2523, "step": 42785 }, { "epoch": 0.9465801960704792, "grad_norm": 0.8142092823982239, "learning_rate": 1.4040147261176863e-07, "loss": 0.2715, "step": 42790 }, { "epoch": 0.9466908037119924, "grad_norm": 0.8442710638046265, "learning_rate": 1.3982181933077986e-07, "loss": 0.3328, "step": 42795 }, { "epoch": 0.9468014113535057, "grad_norm": 0.9399609565734863, "learning_rate": 1.3924335666323007e-07, "loss": 0.1912, "step": 42800 }, { "epoch": 0.946912018995019, "grad_norm": 0.8944154977798462, "learning_rate": 1.386660846789667e-07, "loss": 0.4009, "step": 42805 }, { "epoch": 0.9470226266365322, "grad_norm": 0.9372165203094482, "learning_rate": 1.3809000344769842e-07, "loss": 0.2756, "step": 42810 }, { "epoch": 0.9471332342780455, "grad_norm": 0.749967098236084, "learning_rate": 1.3751511303898513e-07, "loss": 0.3288, "step": 42815 }, { "epoch": 0.9472438419195588, "grad_norm": 1.1429075002670288, "learning_rate": 1.3694141352224245e-07, "loss": 0.3542, "step": 42820 }, { "epoch": 0.947354449561072, "grad_norm": 1.7096689939498901, "learning_rate": 1.3636890496674937e-07, "loss": 0.2801, "step": 42825 }, { "epoch": 0.9474650572025852, "grad_norm": 0.7735417485237122, "learning_rate": 1.3579758744163175e-07, "loss": 0.2243, "step": 42830 }, { "epoch": 0.9475756648440985, "grad_norm": 0.6441946029663086, "learning_rate": 1.3522746101587658e-07, "loss": 0.2308, "step": 42835 }, { "epoch": 0.9476862724856118, "grad_norm": 1.0564336776733398, "learning_rate": 1.3465852575832995e-07, "loss": 0.329, "step": 42840 }, { "epoch": 0.947796880127125, "grad_norm": 0.8310256600379944, "learning_rate": 1.3409078173768574e-07, "loss": 0.3704, "step": 42845 }, { "epoch": 0.9479074877686383, "grad_norm": 0.879423201084137, "learning_rate": 1.335242290225025e-07, "loss": 0.3776, "step": 42850 }, { "epoch": 0.9480180954101516, "grad_norm": 0.9523389935493469, "learning_rate": 1.3295886768119103e-07, "loss": 0.2989, "step": 42855 }, { "epoch": 0.9481287030516649, "grad_norm": 0.776130199432373, "learning_rate": 1.3239469778201563e-07, "loss": 0.3929, "step": 42860 }, { "epoch": 0.9482393106931781, "grad_norm": 1.081250786781311, "learning_rate": 1.318317193931018e-07, "loss": 0.336, "step": 42865 }, { "epoch": 0.9483499183346914, "grad_norm": 1.1070860624313354, "learning_rate": 1.312699325824307e-07, "loss": 0.1592, "step": 42870 }, { "epoch": 0.9484605259762046, "grad_norm": 1.3475745916366577, "learning_rate": 1.3070933741783364e-07, "loss": 0.2474, "step": 42875 }, { "epoch": 0.9485711336177178, "grad_norm": 1.2211583852767944, "learning_rate": 1.3014993396700538e-07, "loss": 0.2384, "step": 42880 }, { "epoch": 0.9486817412592311, "grad_norm": 1.0816066265106201, "learning_rate": 1.2959172229749405e-07, "loss": 0.2456, "step": 42885 }, { "epoch": 0.9487923489007444, "grad_norm": 1.0734388828277588, "learning_rate": 1.2903470247670135e-07, "loss": 0.3465, "step": 42890 }, { "epoch": 0.9489029565422576, "grad_norm": 1.4867702722549438, "learning_rate": 1.28478874571889e-07, "loss": 0.4255, "step": 42895 }, { "epoch": 0.9490135641837709, "grad_norm": 1.3375779390335083, "learning_rate": 1.2792423865017224e-07, "loss": 0.2678, "step": 42900 }, { "epoch": 0.9491241718252842, "grad_norm": 1.5307644605636597, "learning_rate": 1.2737079477852188e-07, "loss": 0.4169, "step": 42905 }, { "epoch": 0.9492347794667975, "grad_norm": 1.584755778312683, "learning_rate": 1.2681854302377005e-07, "loss": 0.4479, "step": 42910 }, { "epoch": 0.9493453871083107, "grad_norm": 1.7790279388427734, "learning_rate": 1.2626748345259676e-07, "loss": 0.357, "step": 42915 }, { "epoch": 0.9494559947498239, "grad_norm": 0.9448535442352295, "learning_rate": 1.2571761613154432e-07, "loss": 0.3337, "step": 42920 }, { "epoch": 0.9495666023913372, "grad_norm": 0.9630046486854553, "learning_rate": 1.2516894112700962e-07, "loss": 0.288, "step": 42925 }, { "epoch": 0.9496772100328504, "grad_norm": 1.1529854536056519, "learning_rate": 1.2462145850524299e-07, "loss": 0.3941, "step": 42930 }, { "epoch": 0.9497878176743637, "grad_norm": 1.6522161960601807, "learning_rate": 1.240751683323538e-07, "loss": 0.3654, "step": 42935 }, { "epoch": 0.949898425315877, "grad_norm": 1.222377896308899, "learning_rate": 1.2353007067430812e-07, "loss": 0.3125, "step": 42940 }, { "epoch": 0.9500090329573903, "grad_norm": 0.8083510994911194, "learning_rate": 1.2298616559692334e-07, "loss": 0.4166, "step": 42945 }, { "epoch": 0.9501196405989035, "grad_norm": 1.4055936336517334, "learning_rate": 1.224434531658769e-07, "loss": 0.4243, "step": 42950 }, { "epoch": 0.9502302482404168, "grad_norm": 1.2466121912002563, "learning_rate": 1.219019334467031e-07, "loss": 0.3647, "step": 42955 }, { "epoch": 0.9503408558819301, "grad_norm": 1.6741048097610474, "learning_rate": 1.2136160650478735e-07, "loss": 0.3619, "step": 42960 }, { "epoch": 0.9504514635234433, "grad_norm": 1.2799497842788696, "learning_rate": 1.2082247240537526e-07, "loss": 0.2994, "step": 42965 }, { "epoch": 0.9505620711649565, "grad_norm": 0.31039679050445557, "learning_rate": 1.202845312135681e-07, "loss": 0.3375, "step": 42970 }, { "epoch": 0.9506726788064698, "grad_norm": 0.8169916868209839, "learning_rate": 1.1974778299431943e-07, "loss": 0.3325, "step": 42975 }, { "epoch": 0.950783286447983, "grad_norm": 2.001596450805664, "learning_rate": 1.1921222781244412e-07, "loss": 0.3899, "step": 42980 }, { "epoch": 0.9508938940894963, "grad_norm": 1.1530077457427979, "learning_rate": 1.1867786573261042e-07, "loss": 0.2525, "step": 42985 }, { "epoch": 0.9510045017310096, "grad_norm": 1.0471433401107788, "learning_rate": 1.1814469681933894e-07, "loss": 0.3388, "step": 42990 }, { "epoch": 0.9511151093725229, "grad_norm": 1.228029727935791, "learning_rate": 1.1761272113701372e-07, "loss": 0.3386, "step": 42995 }, { "epoch": 0.9512257170140361, "grad_norm": 1.2071501016616821, "learning_rate": 1.1708193874986895e-07, "loss": 0.2253, "step": 43000 }, { "epoch": 0.9513363246555494, "grad_norm": 1.0656743049621582, "learning_rate": 1.1655234972199448e-07, "loss": 0.2215, "step": 43005 }, { "epoch": 0.9514469322970627, "grad_norm": 1.6980595588684082, "learning_rate": 1.1602395411734135e-07, "loss": 0.4233, "step": 43010 }, { "epoch": 0.9515575399385758, "grad_norm": 0.8752798438072205, "learning_rate": 1.1549675199971188e-07, "loss": 0.3212, "step": 43015 }, { "epoch": 0.9516681475800891, "grad_norm": 3.250783681869507, "learning_rate": 1.1497074343276405e-07, "loss": 0.2064, "step": 43020 }, { "epoch": 0.9517787552216024, "grad_norm": 1.441205382347107, "learning_rate": 1.1444592848001701e-07, "loss": 0.4633, "step": 43025 }, { "epoch": 0.9518893628631157, "grad_norm": 1.1047072410583496, "learning_rate": 1.1392230720483788e-07, "loss": 0.4149, "step": 43030 }, { "epoch": 0.9519999705046289, "grad_norm": 1.0637433528900146, "learning_rate": 1.1339987967045496e-07, "loss": 0.3715, "step": 43035 }, { "epoch": 0.9521105781461422, "grad_norm": 1.9272586107254028, "learning_rate": 1.1287864593995335e-07, "loss": 0.2409, "step": 43040 }, { "epoch": 0.9522211857876555, "grad_norm": 1.1662540435791016, "learning_rate": 1.1235860607626936e-07, "loss": 0.3741, "step": 43045 }, { "epoch": 0.9523317934291687, "grad_norm": 0.8163707852363586, "learning_rate": 1.1183976014219833e-07, "loss": 0.2292, "step": 43050 }, { "epoch": 0.952442401070682, "grad_norm": 1.315038800239563, "learning_rate": 1.1132210820039124e-07, "loss": 0.3909, "step": 43055 }, { "epoch": 0.9525530087121953, "grad_norm": 1.3872880935668945, "learning_rate": 1.1080565031335367e-07, "loss": 0.2082, "step": 43060 }, { "epoch": 0.9526636163537084, "grad_norm": 1.577455997467041, "learning_rate": 1.1029038654344792e-07, "loss": 0.342, "step": 43065 }, { "epoch": 0.9527742239952217, "grad_norm": 1.3794342279434204, "learning_rate": 1.0977631695289426e-07, "loss": 0.2937, "step": 43070 }, { "epoch": 0.952884831636735, "grad_norm": 1.2837721109390259, "learning_rate": 1.092634416037619e-07, "loss": 0.3345, "step": 43075 }, { "epoch": 0.9529954392782483, "grad_norm": 2.3770968914031982, "learning_rate": 1.0875176055798353e-07, "loss": 0.3159, "step": 43080 }, { "epoch": 0.9531060469197615, "grad_norm": 1.1148220300674438, "learning_rate": 1.0824127387734529e-07, "loss": 0.4222, "step": 43085 }, { "epoch": 0.9532166545612748, "grad_norm": 0.7384015917778015, "learning_rate": 1.0773198162348453e-07, "loss": 0.2779, "step": 43090 }, { "epoch": 0.9533272622027881, "grad_norm": 1.0586259365081787, "learning_rate": 1.0722388385790205e-07, "loss": 0.3764, "step": 43095 }, { "epoch": 0.9534378698443013, "grad_norm": 0.7555502653121948, "learning_rate": 1.067169806419488e-07, "loss": 0.3374, "step": 43100 }, { "epoch": 0.9535484774858146, "grad_norm": 1.8363863229751587, "learning_rate": 1.0621127203683135e-07, "loss": 0.3676, "step": 43105 }, { "epoch": 0.9536590851273278, "grad_norm": 2.613034963607788, "learning_rate": 1.0570675810361752e-07, "loss": 0.2109, "step": 43110 }, { "epoch": 0.953769692768841, "grad_norm": 1.2664960622787476, "learning_rate": 1.0520343890322526e-07, "loss": 0.3524, "step": 43115 }, { "epoch": 0.9538803004103543, "grad_norm": 1.159754991531372, "learning_rate": 1.0470131449642929e-07, "loss": 0.2354, "step": 43120 }, { "epoch": 0.9539909080518676, "grad_norm": 0.9002740383148193, "learning_rate": 1.042003849438633e-07, "loss": 0.23, "step": 43125 }, { "epoch": 0.9541015156933809, "grad_norm": 0.7048442363739014, "learning_rate": 1.0370065030601229e-07, "loss": 0.2355, "step": 43130 }, { "epoch": 0.9542121233348941, "grad_norm": 1.1831703186035156, "learning_rate": 1.0320211064321906e-07, "loss": 0.3206, "step": 43135 }, { "epoch": 0.9543227309764074, "grad_norm": 0.4233165383338928, "learning_rate": 1.0270476601568547e-07, "loss": 0.2373, "step": 43140 }, { "epoch": 0.9544333386179207, "grad_norm": 1.3369245529174805, "learning_rate": 1.0220861648346126e-07, "loss": 0.3103, "step": 43145 }, { "epoch": 0.954543946259434, "grad_norm": 1.0769857168197632, "learning_rate": 1.017136621064585e-07, "loss": 0.3871, "step": 43150 }, { "epoch": 0.9546545539009472, "grad_norm": 0.6195645928382874, "learning_rate": 1.0121990294444273e-07, "loss": 0.395, "step": 43155 }, { "epoch": 0.9547651615424604, "grad_norm": 1.210334062576294, "learning_rate": 1.0072733905703513e-07, "loss": 0.3307, "step": 43160 }, { "epoch": 0.9548757691839737, "grad_norm": 0.8699105381965637, "learning_rate": 1.0023597050371147e-07, "loss": 0.367, "step": 43165 }, { "epoch": 0.9549863768254869, "grad_norm": 1.322170376777649, "learning_rate": 9.974579734380763e-08, "loss": 0.3369, "step": 43170 }, { "epoch": 0.9550969844670002, "grad_norm": 0.49603310227394104, "learning_rate": 9.925681963650735e-08, "loss": 0.4479, "step": 43175 }, { "epoch": 0.9552075921085135, "grad_norm": 0.7238638401031494, "learning_rate": 9.876903744085786e-08, "loss": 0.3997, "step": 43180 }, { "epoch": 0.9553181997500267, "grad_norm": 0.984447717666626, "learning_rate": 9.828245081575871e-08, "loss": 0.2572, "step": 43185 }, { "epoch": 0.95542880739154, "grad_norm": 1.3857005834579468, "learning_rate": 9.77970598199629e-08, "loss": 0.3821, "step": 43190 }, { "epoch": 0.9555394150330533, "grad_norm": 1.373903512954712, "learning_rate": 9.731286451208244e-08, "loss": 0.2449, "step": 43195 }, { "epoch": 0.9556500226745666, "grad_norm": 0.8086928725242615, "learning_rate": 9.6829864950585e-08, "loss": 0.3926, "step": 43200 }, { "epoch": 0.9557606303160797, "grad_norm": 1.095319390296936, "learning_rate": 9.634806119378947e-08, "loss": 0.3564, "step": 43205 }, { "epoch": 0.955871237957593, "grad_norm": 1.064218521118164, "learning_rate": 9.586745329987601e-08, "loss": 0.3705, "step": 43210 }, { "epoch": 0.9559818455991063, "grad_norm": 0.683488667011261, "learning_rate": 9.538804132687818e-08, "loss": 0.2904, "step": 43215 }, { "epoch": 0.9560924532406195, "grad_norm": 1.1895986795425415, "learning_rate": 9.4909825332683e-08, "loss": 0.2987, "step": 43220 }, { "epoch": 0.9562030608821328, "grad_norm": 2.380648374557495, "learning_rate": 9.443280537503652e-08, "loss": 0.4234, "step": 43225 }, { "epoch": 0.9563136685236461, "grad_norm": 0.8643562197685242, "learning_rate": 9.395698151153709e-08, "loss": 0.3193, "step": 43230 }, { "epoch": 0.9564242761651593, "grad_norm": 0.937745213508606, "learning_rate": 9.348235379964099e-08, "loss": 0.4561, "step": 43235 }, { "epoch": 0.9565348838066726, "grad_norm": 1.707581639289856, "learning_rate": 9.300892229666125e-08, "loss": 0.3422, "step": 43240 }, { "epoch": 0.9566454914481859, "grad_norm": 1.568827509880066, "learning_rate": 9.253668705976104e-08, "loss": 0.4304, "step": 43245 }, { "epoch": 0.9567560990896992, "grad_norm": 1.0694279670715332, "learning_rate": 9.206564814596475e-08, "loss": 0.2859, "step": 43250 }, { "epoch": 0.9568667067312123, "grad_norm": 1.0007576942443848, "learning_rate": 9.159580561215131e-08, "loss": 0.4229, "step": 43255 }, { "epoch": 0.9569773143727256, "grad_norm": 1.134358286857605, "learning_rate": 9.112715951505202e-08, "loss": 0.235, "step": 43260 }, { "epoch": 0.9570879220142389, "grad_norm": 1.554956078529358, "learning_rate": 9.065970991125605e-08, "loss": 0.3057, "step": 43265 }, { "epoch": 0.9571985296557521, "grad_norm": 1.0942102670669556, "learning_rate": 9.019345685720937e-08, "loss": 0.3492, "step": 43270 }, { "epoch": 0.9573091372972654, "grad_norm": 1.0037922859191895, "learning_rate": 8.972840040920916e-08, "loss": 0.2608, "step": 43275 }, { "epoch": 0.9574197449387787, "grad_norm": 0.8223391771316528, "learning_rate": 8.926454062341273e-08, "loss": 0.29, "step": 43280 }, { "epoch": 0.957530352580292, "grad_norm": 1.147793173789978, "learning_rate": 8.880187755583192e-08, "loss": 0.3086, "step": 43285 }, { "epoch": 0.9576409602218052, "grad_norm": 1.6377944946289062, "learning_rate": 8.834041126232984e-08, "loss": 0.3081, "step": 43290 }, { "epoch": 0.9577515678633185, "grad_norm": 0.7877917289733887, "learning_rate": 8.788014179863191e-08, "loss": 0.2358, "step": 43295 }, { "epoch": 0.9578621755048317, "grad_norm": 0.6959198713302612, "learning_rate": 8.742106922031368e-08, "loss": 0.2317, "step": 43300 }, { "epoch": 0.9579727831463449, "grad_norm": 0.6963282823562622, "learning_rate": 8.696319358280858e-08, "loss": 0.2528, "step": 43305 }, { "epoch": 0.9580833907878582, "grad_norm": 0.8597419261932373, "learning_rate": 8.650651494140461e-08, "loss": 0.3565, "step": 43310 }, { "epoch": 0.9581939984293715, "grad_norm": 1.042280673980713, "learning_rate": 8.605103335124543e-08, "loss": 0.2446, "step": 43315 }, { "epoch": 0.9583046060708847, "grad_norm": 0.6701626181602478, "learning_rate": 8.559674886733038e-08, "loss": 0.2933, "step": 43320 }, { "epoch": 0.958415213712398, "grad_norm": 1.6013484001159668, "learning_rate": 8.514366154451559e-08, "loss": 0.2649, "step": 43325 }, { "epoch": 0.9585258213539113, "grad_norm": 1.2538914680480957, "learning_rate": 8.46917714375095e-08, "loss": 0.2336, "step": 43330 }, { "epoch": 0.9586364289954246, "grad_norm": 0.7983217239379883, "learning_rate": 8.424107860087738e-08, "loss": 0.3355, "step": 43335 }, { "epoch": 0.9587470366369378, "grad_norm": 0.9974101185798645, "learning_rate": 8.379158308904234e-08, "loss": 0.3353, "step": 43340 }, { "epoch": 0.9588576442784511, "grad_norm": 1.5072726011276245, "learning_rate": 8.334328495627875e-08, "loss": 0.2672, "step": 43345 }, { "epoch": 0.9589682519199643, "grad_norm": 1.3137551546096802, "learning_rate": 8.289618425671886e-08, "loss": 0.3036, "step": 43350 }, { "epoch": 0.9590788595614775, "grad_norm": 1.0633275508880615, "learning_rate": 8.24502810443517e-08, "loss": 0.3519, "step": 43355 }, { "epoch": 0.9591894672029908, "grad_norm": 0.8632997274398804, "learning_rate": 8.200557537301757e-08, "loss": 0.3447, "step": 43360 }, { "epoch": 0.9593000748445041, "grad_norm": 0.9159598350524902, "learning_rate": 8.156206729641458e-08, "loss": 0.4249, "step": 43365 }, { "epoch": 0.9594106824860174, "grad_norm": 1.0872684717178345, "learning_rate": 8.111975686809992e-08, "loss": 0.3119, "step": 43370 }, { "epoch": 0.9595212901275306, "grad_norm": 1.0485632419586182, "learning_rate": 8.067864414147753e-08, "loss": 0.3439, "step": 43375 }, { "epoch": 0.9596318977690439, "grad_norm": 1.0438555479049683, "learning_rate": 8.023872916981367e-08, "loss": 0.3176, "step": 43380 }, { "epoch": 0.9597425054105572, "grad_norm": 1.0988162755966187, "learning_rate": 7.980001200623033e-08, "loss": 0.4191, "step": 43385 }, { "epoch": 0.9598531130520704, "grad_norm": 1.6069175004959106, "learning_rate": 7.936249270369733e-08, "loss": 0.2421, "step": 43390 }, { "epoch": 0.9599637206935836, "grad_norm": 0.8838641047477722, "learning_rate": 7.89261713150502e-08, "loss": 0.2785, "step": 43395 }, { "epoch": 0.9600743283350969, "grad_norm": 1.2416774034500122, "learning_rate": 7.849104789297123e-08, "loss": 0.3721, "step": 43400 }, { "epoch": 0.9601849359766101, "grad_norm": 1.5921387672424316, "learning_rate": 7.805712249000286e-08, "loss": 0.3569, "step": 43405 }, { "epoch": 0.9602955436181234, "grad_norm": 1.4046406745910645, "learning_rate": 7.762439515854093e-08, "loss": 0.2534, "step": 43410 }, { "epoch": 0.9604061512596367, "grad_norm": 1.3209121227264404, "learning_rate": 7.719286595083698e-08, "loss": 0.2956, "step": 43415 }, { "epoch": 0.96051675890115, "grad_norm": 0.7234629392623901, "learning_rate": 7.67625349189982e-08, "loss": 0.3342, "step": 43420 }, { "epoch": 0.9606273665426632, "grad_norm": 1.430939793586731, "learning_rate": 7.633340211498863e-08, "loss": 0.2387, "step": 43425 }, { "epoch": 0.9607379741841765, "grad_norm": 0.5750002861022949, "learning_rate": 7.590546759062234e-08, "loss": 0.3098, "step": 43430 }, { "epoch": 0.9608485818256898, "grad_norm": 0.7027139067649841, "learning_rate": 7.547873139757466e-08, "loss": 0.3059, "step": 43435 }, { "epoch": 0.960959189467203, "grad_norm": 1.7361222505569458, "learning_rate": 7.505319358737439e-08, "loss": 0.3357, "step": 43440 }, { "epoch": 0.9610697971087162, "grad_norm": 1.0631269216537476, "learning_rate": 7.462885421140375e-08, "loss": 0.2623, "step": 43445 }, { "epoch": 0.9611804047502295, "grad_norm": 0.8902190923690796, "learning_rate": 7.420571332090177e-08, "loss": 0.3157, "step": 43450 }, { "epoch": 0.9612910123917427, "grad_norm": 1.6059798002243042, "learning_rate": 7.378377096696309e-08, "loss": 0.3703, "step": 43455 }, { "epoch": 0.961401620033256, "grad_norm": 0.8027824759483337, "learning_rate": 7.336302720053701e-08, "loss": 0.1945, "step": 43460 }, { "epoch": 0.9615122276747693, "grad_norm": 0.8749886155128479, "learning_rate": 7.29434820724273e-08, "loss": 0.2168, "step": 43465 }, { "epoch": 0.9616228353162826, "grad_norm": 1.2712798118591309, "learning_rate": 7.252513563329567e-08, "loss": 0.2428, "step": 43470 }, { "epoch": 0.9617334429577958, "grad_norm": 1.1105276346206665, "learning_rate": 7.210798793365614e-08, "loss": 0.3612, "step": 43475 }, { "epoch": 0.9618440505993091, "grad_norm": 1.2913923263549805, "learning_rate": 7.169203902387844e-08, "loss": 0.3967, "step": 43480 }, { "epoch": 0.9619546582408224, "grad_norm": 1.6381129026412964, "learning_rate": 7.127728895419017e-08, "loss": 0.3332, "step": 43485 }, { "epoch": 0.9620652658823355, "grad_norm": 0.9130541086196899, "learning_rate": 7.086373777467126e-08, "loss": 0.2642, "step": 43490 }, { "epoch": 0.9621758735238488, "grad_norm": 0.741291880607605, "learning_rate": 7.045138553525732e-08, "loss": 0.2193, "step": 43495 }, { "epoch": 0.9622864811653621, "grad_norm": 1.6833274364471436, "learning_rate": 7.004023228574075e-08, "loss": 0.4026, "step": 43500 }, { "epoch": 0.9623970888068754, "grad_norm": 1.1207948923110962, "learning_rate": 6.963027807576739e-08, "loss": 0.3745, "step": 43505 }, { "epoch": 0.9625076964483886, "grad_norm": 0.6552388072013855, "learning_rate": 6.922152295483986e-08, "loss": 0.1854, "step": 43510 }, { "epoch": 0.9626183040899019, "grad_norm": 0.9157015085220337, "learning_rate": 6.881396697231423e-08, "loss": 0.2333, "step": 43515 }, { "epoch": 0.9627289117314152, "grad_norm": 0.6960498094558716, "learning_rate": 6.840761017740339e-08, "loss": 0.2643, "step": 43520 }, { "epoch": 0.9628395193729284, "grad_norm": 2.146000862121582, "learning_rate": 6.800245261917582e-08, "loss": 0.3523, "step": 43525 }, { "epoch": 0.9629501270144417, "grad_norm": 0.894228994846344, "learning_rate": 6.759849434655241e-08, "loss": 0.3209, "step": 43530 }, { "epoch": 0.963060734655955, "grad_norm": 0.6723679900169373, "learning_rate": 6.719573540831081e-08, "loss": 0.3214, "step": 43535 }, { "epoch": 0.9631713422974681, "grad_norm": 0.9829245805740356, "learning_rate": 6.679417585308657e-08, "loss": 0.2464, "step": 43540 }, { "epoch": 0.9632819499389814, "grad_norm": 1.0709248781204224, "learning_rate": 6.639381572936532e-08, "loss": 0.3353, "step": 43545 }, { "epoch": 0.9633925575804947, "grad_norm": 0.7182893753051758, "learning_rate": 6.599465508549063e-08, "loss": 0.3944, "step": 43550 }, { "epoch": 0.963503165222008, "grad_norm": 0.42292460799217224, "learning_rate": 6.559669396966284e-08, "loss": 0.2571, "step": 43555 }, { "epoch": 0.9636137728635212, "grad_norm": 1.234338641166687, "learning_rate": 6.51999324299335e-08, "loss": 0.3151, "step": 43560 }, { "epoch": 0.9637243805050345, "grad_norm": 1.730083703994751, "learning_rate": 6.480437051421318e-08, "loss": 0.3251, "step": 43565 }, { "epoch": 0.9638349881465478, "grad_norm": 1.477449893951416, "learning_rate": 6.441000827026478e-08, "loss": 0.4353, "step": 43570 }, { "epoch": 0.963945595788061, "grad_norm": 1.3712983131408691, "learning_rate": 6.401684574570798e-08, "loss": 0.4298, "step": 43575 }, { "epoch": 0.9640562034295743, "grad_norm": 0.7855100035667419, "learning_rate": 6.362488298801594e-08, "loss": 0.2152, "step": 43580 }, { "epoch": 0.9641668110710875, "grad_norm": 1.290292501449585, "learning_rate": 6.323412004452079e-08, "loss": 0.4071, "step": 43585 }, { "epoch": 0.9642774187126008, "grad_norm": 0.7281972765922546, "learning_rate": 6.284455696240366e-08, "loss": 0.345, "step": 43590 }, { "epoch": 0.964388026354114, "grad_norm": 0.7642232179641724, "learning_rate": 6.245619378870582e-08, "loss": 0.2656, "step": 43595 }, { "epoch": 0.9644986339956273, "grad_norm": 0.8068370819091797, "learning_rate": 6.206903057032199e-08, "loss": 0.2676, "step": 43600 }, { "epoch": 0.9646092416371406, "grad_norm": 0.8956291675567627, "learning_rate": 6.168306735400142e-08, "loss": 0.3304, "step": 43605 }, { "epoch": 0.9647198492786538, "grad_norm": 0.8894555568695068, "learning_rate": 6.129830418635019e-08, "loss": 0.3059, "step": 43610 }, { "epoch": 0.9648304569201671, "grad_norm": 1.1081128120422363, "learning_rate": 6.091474111382778e-08, "loss": 0.3556, "step": 43615 }, { "epoch": 0.9649410645616804, "grad_norm": 0.8812016844749451, "learning_rate": 6.053237818274827e-08, "loss": 0.2975, "step": 43620 }, { "epoch": 0.9650516722031937, "grad_norm": 1.268801212310791, "learning_rate": 6.01512154392836e-08, "loss": 0.3426, "step": 43625 }, { "epoch": 0.9651622798447068, "grad_norm": 0.9856082201004028, "learning_rate": 5.977125292945807e-08, "loss": 0.2115, "step": 43630 }, { "epoch": 0.9652728874862201, "grad_norm": 0.9556242823600769, "learning_rate": 5.939249069915165e-08, "loss": 0.4054, "step": 43635 }, { "epoch": 0.9653834951277334, "grad_norm": 1.3494082689285278, "learning_rate": 5.901492879410109e-08, "loss": 0.35, "step": 43640 }, { "epoch": 0.9654941027692466, "grad_norm": 0.610934317111969, "learning_rate": 5.8638567259895474e-08, "loss": 0.4047, "step": 43645 }, { "epoch": 0.9656047104107599, "grad_norm": 1.4158157110214233, "learning_rate": 5.826340614198067e-08, "loss": 0.3139, "step": 43650 }, { "epoch": 0.9657153180522732, "grad_norm": 1.3118176460266113, "learning_rate": 5.788944548565822e-08, "loss": 0.4026, "step": 43655 }, { "epoch": 0.9658259256937864, "grad_norm": 1.1946582794189453, "learning_rate": 5.751668533608312e-08, "loss": 0.3375, "step": 43660 }, { "epoch": 0.9659365333352997, "grad_norm": 1.3158819675445557, "learning_rate": 5.714512573826492e-08, "loss": 0.3365, "step": 43665 }, { "epoch": 0.966047140976813, "grad_norm": 1.0635216236114502, "learning_rate": 5.6774766737072164e-08, "loss": 0.2467, "step": 43670 }, { "epoch": 0.9661577486183263, "grad_norm": 2.075782060623169, "learning_rate": 5.640560837722242e-08, "loss": 0.3913, "step": 43675 }, { "epoch": 0.9662683562598394, "grad_norm": 1.0921069383621216, "learning_rate": 5.603765070329226e-08, "loss": 0.2513, "step": 43680 }, { "epoch": 0.9663789639013527, "grad_norm": 1.307204246520996, "learning_rate": 5.567089375971502e-08, "loss": 0.3083, "step": 43685 }, { "epoch": 0.966489571542866, "grad_norm": 1.6483756303787231, "learning_rate": 5.530533759077306e-08, "loss": 0.2957, "step": 43690 }, { "epoch": 0.9666001791843792, "grad_norm": 4.403174877166748, "learning_rate": 5.4940982240608844e-08, "loss": 0.2831, "step": 43695 }, { "epoch": 0.9667107868258925, "grad_norm": 1.5188766717910767, "learning_rate": 5.457782775321829e-08, "loss": 0.2875, "step": 43700 }, { "epoch": 0.9668213944674058, "grad_norm": 2.0575919151306152, "learning_rate": 5.4215874172451885e-08, "loss": 0.4116, "step": 43705 }, { "epoch": 0.966932002108919, "grad_norm": 0.9330376982688904, "learning_rate": 5.3855121542014666e-08, "loss": 0.3215, "step": 43710 }, { "epoch": 0.9670426097504323, "grad_norm": 1.505617380142212, "learning_rate": 5.349556990546845e-08, "loss": 0.2711, "step": 43715 }, { "epoch": 0.9671532173919456, "grad_norm": 1.3638933897018433, "learning_rate": 5.3137219306228505e-08, "loss": 0.3622, "step": 43720 }, { "epoch": 0.9672638250334588, "grad_norm": 1.010921835899353, "learning_rate": 5.278006978756578e-08, "loss": 0.3105, "step": 43725 }, { "epoch": 0.967374432674972, "grad_norm": 0.9704737067222595, "learning_rate": 5.242412139260689e-08, "loss": 0.3722, "step": 43730 }, { "epoch": 0.9674850403164853, "grad_norm": 1.4122055768966675, "learning_rate": 5.206937416432967e-08, "loss": 0.5289, "step": 43735 }, { "epoch": 0.9675956479579986, "grad_norm": 0.9239323139190674, "learning_rate": 5.171582814557208e-08, "loss": 0.3769, "step": 43740 }, { "epoch": 0.9677062555995118, "grad_norm": 1.0423548221588135, "learning_rate": 5.1363483379025525e-08, "loss": 0.2969, "step": 43745 }, { "epoch": 0.9678168632410251, "grad_norm": 0.6491488814353943, "learning_rate": 5.1012339907232644e-08, "loss": 0.3586, "step": 43750 }, { "epoch": 0.9679274708825384, "grad_norm": 0.8940595984458923, "learning_rate": 5.0662397772596186e-08, "loss": 0.3257, "step": 43755 }, { "epoch": 0.9680380785240517, "grad_norm": 0.8797730207443237, "learning_rate": 5.031365701737012e-08, "loss": 0.338, "step": 43760 }, { "epoch": 0.9681486861655649, "grad_norm": 1.0326614379882812, "learning_rate": 4.9966117683666325e-08, "loss": 0.2812, "step": 43765 }, { "epoch": 0.9682592938070782, "grad_norm": 0.38056954741477966, "learning_rate": 4.961977981344901e-08, "loss": 0.4354, "step": 43770 }, { "epoch": 0.9683699014485914, "grad_norm": 0.9065060019493103, "learning_rate": 4.927464344853805e-08, "loss": 0.3835, "step": 43775 }, { "epoch": 0.9684805090901046, "grad_norm": 1.2756187915802002, "learning_rate": 4.893070863060789e-08, "loss": 0.2863, "step": 43780 }, { "epoch": 0.9685911167316179, "grad_norm": 0.8419935703277588, "learning_rate": 4.858797540119198e-08, "loss": 0.3605, "step": 43785 }, { "epoch": 0.9687017243731312, "grad_norm": 0.9440147280693054, "learning_rate": 4.824644380167165e-08, "loss": 0.3561, "step": 43790 }, { "epoch": 0.9688123320146445, "grad_norm": 0.7143673300743103, "learning_rate": 4.790611387328836e-08, "loss": 0.2492, "step": 43795 }, { "epoch": 0.9689229396561577, "grad_norm": 0.496543288230896, "learning_rate": 4.756698565713702e-08, "loss": 0.3524, "step": 43800 }, { "epoch": 0.969033547297671, "grad_norm": 0.838784396648407, "learning_rate": 4.7229059194165986e-08, "loss": 0.4432, "step": 43805 }, { "epoch": 0.9691441549391843, "grad_norm": 1.291550636291504, "learning_rate": 4.689233452518038e-08, "loss": 0.4323, "step": 43810 }, { "epoch": 0.9692547625806975, "grad_norm": 1.2735079526901245, "learning_rate": 4.655681169084103e-08, "loss": 0.4207, "step": 43815 }, { "epoch": 0.9693653702222107, "grad_norm": 1.1395496129989624, "learning_rate": 4.6222490731659965e-08, "loss": 0.2817, "step": 43820 }, { "epoch": 0.969475977863724, "grad_norm": 1.3045949935913086, "learning_rate": 4.588937168800822e-08, "loss": 0.3012, "step": 43825 }, { "epoch": 0.9695865855052372, "grad_norm": 1.1814136505126953, "learning_rate": 4.5557454600108074e-08, "loss": 0.2364, "step": 43830 }, { "epoch": 0.9696971931467505, "grad_norm": 1.113559365272522, "learning_rate": 4.5226739508039686e-08, "loss": 0.4873, "step": 43835 }, { "epoch": 0.9698078007882638, "grad_norm": 0.9343705177307129, "learning_rate": 4.4897226451735556e-08, "loss": 0.3626, "step": 43840 }, { "epoch": 0.9699184084297771, "grad_norm": 0.60808926820755, "learning_rate": 4.4568915470986076e-08, "loss": 0.2779, "step": 43845 }, { "epoch": 0.9700290160712903, "grad_norm": 0.863846480846405, "learning_rate": 4.4241806605433976e-08, "loss": 0.2968, "step": 43850 }, { "epoch": 0.9701396237128036, "grad_norm": 0.5442524552345276, "learning_rate": 4.391589989457656e-08, "loss": 0.1823, "step": 43855 }, { "epoch": 0.9702502313543169, "grad_norm": 0.4170854687690735, "learning_rate": 4.35911953777679e-08, "loss": 0.1431, "step": 43860 }, { "epoch": 0.9703608389958301, "grad_norm": 1.3851791620254517, "learning_rate": 4.3267693094215526e-08, "loss": 0.4179, "step": 43865 }, { "epoch": 0.9704714466373433, "grad_norm": 0.9054778814315796, "learning_rate": 4.294539308298373e-08, "loss": 0.3558, "step": 43870 }, { "epoch": 0.9705820542788566, "grad_norm": 1.9544063806533813, "learning_rate": 4.262429538298807e-08, "loss": 0.3265, "step": 43875 }, { "epoch": 0.9706926619203698, "grad_norm": 1.125009536743164, "learning_rate": 4.230440003300085e-08, "loss": 0.2527, "step": 43880 }, { "epoch": 0.9708032695618831, "grad_norm": 1.5303318500518799, "learning_rate": 4.198570707165117e-08, "loss": 0.2035, "step": 43885 }, { "epoch": 0.9709138772033964, "grad_norm": 0.7889043092727661, "learning_rate": 4.1668216537420484e-08, "loss": 0.2965, "step": 43890 }, { "epoch": 0.9710244848449097, "grad_norm": 1.1140737533569336, "learning_rate": 4.1351928468644777e-08, "loss": 0.5207, "step": 43895 }, { "epoch": 0.9711350924864229, "grad_norm": 1.3673279285430908, "learning_rate": 4.103684290351684e-08, "loss": 0.3048, "step": 43900 }, { "epoch": 0.9712457001279362, "grad_norm": 0.8929126262664795, "learning_rate": 4.07229598800829e-08, "loss": 0.2882, "step": 43905 }, { "epoch": 0.9713563077694495, "grad_norm": 0.8313111066818237, "learning_rate": 4.041027943624376e-08, "loss": 0.4133, "step": 43910 }, { "epoch": 0.9714669154109626, "grad_norm": 0.6848447322845459, "learning_rate": 4.009880160975588e-08, "loss": 0.2671, "step": 43915 }, { "epoch": 0.9715775230524759, "grad_norm": 1.5957636833190918, "learning_rate": 3.978852643822917e-08, "loss": 0.3999, "step": 43920 }, { "epoch": 0.9716881306939892, "grad_norm": 1.560433030128479, "learning_rate": 3.947945395913144e-08, "loss": 0.2057, "step": 43925 }, { "epoch": 0.9717987383355025, "grad_norm": 0.8941912055015564, "learning_rate": 3.917158420978062e-08, "loss": 0.2913, "step": 43930 }, { "epoch": 0.9719093459770157, "grad_norm": 1.3726508617401123, "learning_rate": 3.8864917227353636e-08, "loss": 0.4565, "step": 43935 }, { "epoch": 0.972019953618529, "grad_norm": 1.544691801071167, "learning_rate": 3.855945304887976e-08, "loss": 0.2771, "step": 43940 }, { "epoch": 0.9721305612600423, "grad_norm": 1.2875642776489258, "learning_rate": 3.825519171124392e-08, "loss": 0.4063, "step": 43945 }, { "epoch": 0.9722411689015555, "grad_norm": 0.6276775598526001, "learning_rate": 3.795213325118452e-08, "loss": 0.2635, "step": 43950 }, { "epoch": 0.9723517765430688, "grad_norm": 0.9743587970733643, "learning_rate": 3.765027770529672e-08, "loss": 0.3296, "step": 43955 }, { "epoch": 0.9724623841845821, "grad_norm": 0.9140300750732422, "learning_rate": 3.7349625110030265e-08, "loss": 0.3247, "step": 43960 }, { "epoch": 0.9725729918260952, "grad_norm": 0.8447665572166443, "learning_rate": 3.7050175501687216e-08, "loss": 0.3102, "step": 43965 }, { "epoch": 0.9726835994676085, "grad_norm": 1.104649305343628, "learning_rate": 3.675192891642754e-08, "loss": 0.4849, "step": 43970 }, { "epoch": 0.9727942071091218, "grad_norm": 0.7341554760932922, "learning_rate": 3.645488539026354e-08, "loss": 0.2881, "step": 43975 }, { "epoch": 0.9729048147506351, "grad_norm": 1.5241870880126953, "learning_rate": 3.615904495906319e-08, "loss": 0.409, "step": 43980 }, { "epoch": 0.9730154223921483, "grad_norm": 1.0207395553588867, "learning_rate": 3.586440765854904e-08, "loss": 0.2898, "step": 43985 }, { "epoch": 0.9731260300336616, "grad_norm": 1.16932213306427, "learning_rate": 3.557097352429928e-08, "loss": 0.3598, "step": 43990 }, { "epoch": 0.9732366376751749, "grad_norm": 1.280771017074585, "learning_rate": 3.5278742591743356e-08, "loss": 0.4002, "step": 43995 }, { "epoch": 0.9733472453166881, "grad_norm": 0.8860983848571777, "learning_rate": 3.498771489617192e-08, "loss": 0.2641, "step": 44000 }, { "epoch": 0.9734578529582014, "grad_norm": 0.8312844038009644, "learning_rate": 3.469789047272465e-08, "loss": 0.2967, "step": 44005 }, { "epoch": 0.9735684605997146, "grad_norm": 1.3298097848892212, "learning_rate": 3.440926935639688e-08, "loss": 0.3442, "step": 44010 }, { "epoch": 0.9736790682412279, "grad_norm": 0.9424477219581604, "learning_rate": 3.412185158204073e-08, "loss": 0.3698, "step": 44015 }, { "epoch": 0.9737896758827411, "grad_norm": 1.075718641281128, "learning_rate": 3.383563718436178e-08, "loss": 0.3267, "step": 44020 }, { "epoch": 0.9739002835242544, "grad_norm": 0.9466701149940491, "learning_rate": 3.355062619791904e-08, "loss": 0.4501, "step": 44025 }, { "epoch": 0.9740108911657677, "grad_norm": 0.9155141711235046, "learning_rate": 3.326681865712944e-08, "loss": 0.2957, "step": 44030 }, { "epoch": 0.9741214988072809, "grad_norm": 1.0297354459762573, "learning_rate": 3.298421459626111e-08, "loss": 0.3591, "step": 44035 }, { "epoch": 0.9742321064487942, "grad_norm": 1.5054594278335571, "learning_rate": 3.270281404943787e-08, "loss": 0.4171, "step": 44040 }, { "epoch": 0.9743427140903075, "grad_norm": 1.8432294130325317, "learning_rate": 3.2422617050641425e-08, "loss": 0.2613, "step": 44045 }, { "epoch": 0.9744533217318208, "grad_norm": 0.8293846845626831, "learning_rate": 3.2143623633703605e-08, "loss": 0.295, "step": 44050 }, { "epoch": 0.974563929373334, "grad_norm": 0.809073805809021, "learning_rate": 3.18658338323119e-08, "loss": 0.3188, "step": 44055 }, { "epoch": 0.9746745370148472, "grad_norm": 1.27605402469635, "learning_rate": 3.1589247680011704e-08, "loss": 0.4562, "step": 44060 }, { "epoch": 0.9747851446563605, "grad_norm": 0.8783007264137268, "learning_rate": 3.1313865210199635e-08, "loss": 0.2493, "step": 44065 }, { "epoch": 0.9748957522978737, "grad_norm": 0.7802658081054688, "learning_rate": 3.103968645612687e-08, "loss": 0.2944, "step": 44070 }, { "epoch": 0.975006359939387, "grad_norm": 1.6105525493621826, "learning_rate": 3.076671145090249e-08, "loss": 0.3879, "step": 44075 }, { "epoch": 0.9751169675809003, "grad_norm": 0.26902520656585693, "learning_rate": 3.049494022748567e-08, "loss": 0.3248, "step": 44080 }, { "epoch": 0.9752275752224135, "grad_norm": 0.7602634429931641, "learning_rate": 3.0224372818695724e-08, "loss": 0.213, "step": 44085 }, { "epoch": 0.9753381828639268, "grad_norm": 1.2931236028671265, "learning_rate": 2.9955009257200964e-08, "loss": 0.3573, "step": 44090 }, { "epoch": 0.9754487905054401, "grad_norm": 2.015890598297119, "learning_rate": 2.968684957552759e-08, "loss": 0.4447, "step": 44095 }, { "epoch": 0.9755593981469534, "grad_norm": 1.2910219430923462, "learning_rate": 2.9419893806057477e-08, "loss": 0.4475, "step": 44100 }, { "epoch": 0.9756700057884665, "grad_norm": 1.93290376663208, "learning_rate": 2.9154141981023732e-08, "loss": 0.3846, "step": 44105 }, { "epoch": 0.9757806134299798, "grad_norm": 1.0300754308700562, "learning_rate": 2.888959413251513e-08, "loss": 0.3326, "step": 44110 }, { "epoch": 0.9758912210714931, "grad_norm": 0.8932158350944519, "learning_rate": 2.862625029247834e-08, "loss": 0.2911, "step": 44115 }, { "epoch": 0.9760018287130063, "grad_norm": 0.9097659587860107, "learning_rate": 2.836411049270904e-08, "loss": 0.276, "step": 44120 }, { "epoch": 0.9761124363545196, "grad_norm": 2.024054527282715, "learning_rate": 2.810317476486302e-08, "loss": 0.38, "step": 44125 }, { "epoch": 0.9762230439960329, "grad_norm": 1.3228811025619507, "learning_rate": 2.784344314044729e-08, "loss": 0.3164, "step": 44130 }, { "epoch": 0.9763336516375462, "grad_norm": 0.9883993268013, "learning_rate": 2.758491565082344e-08, "loss": 0.3291, "step": 44135 }, { "epoch": 0.9764442592790594, "grad_norm": 1.210230827331543, "learning_rate": 2.7327592327209827e-08, "loss": 0.2821, "step": 44140 }, { "epoch": 0.9765548669205727, "grad_norm": 2.1061625480651855, "learning_rate": 2.7071473200678265e-08, "loss": 0.4017, "step": 44145 }, { "epoch": 0.976665474562086, "grad_norm": 0.9956964254379272, "learning_rate": 2.6816558302154016e-08, "loss": 0.412, "step": 44150 }, { "epoch": 0.9767760822035991, "grad_norm": 0.3307163119316101, "learning_rate": 2.6562847662418012e-08, "loss": 0.2277, "step": 44155 }, { "epoch": 0.9768866898451124, "grad_norm": 0.8326628804206848, "learning_rate": 2.6310341312106857e-08, "loss": 0.4186, "step": 44160 }, { "epoch": 0.9769972974866257, "grad_norm": 1.064047932624817, "learning_rate": 2.6059039281709498e-08, "loss": 0.3117, "step": 44165 }, { "epoch": 0.9771079051281389, "grad_norm": 1.74552321434021, "learning_rate": 2.5808941601571657e-08, "loss": 0.4886, "step": 44170 }, { "epoch": 0.9772185127696522, "grad_norm": 1.0192034244537354, "learning_rate": 2.5560048301892516e-08, "loss": 0.3471, "step": 44175 }, { "epoch": 0.9773291204111655, "grad_norm": 2.3376076221466064, "learning_rate": 2.5312359412723587e-08, "loss": 0.4959, "step": 44180 }, { "epoch": 0.9774397280526788, "grad_norm": 1.1214144229888916, "learning_rate": 2.50658749639765e-08, "loss": 0.3214, "step": 44185 }, { "epoch": 0.977550335694192, "grad_norm": 0.3044200837612152, "learning_rate": 2.4820594985411893e-08, "loss": 0.3002, "step": 44190 }, { "epoch": 0.9776609433357053, "grad_norm": 0.7986836433410645, "learning_rate": 2.457651950664719e-08, "loss": 0.2614, "step": 44195 }, { "epoch": 0.9777715509772185, "grad_norm": 1.0779314041137695, "learning_rate": 2.4333648557156586e-08, "loss": 0.3954, "step": 44200 }, { "epoch": 0.9778821586187317, "grad_norm": 1.163984775543213, "learning_rate": 2.4091982166264404e-08, "loss": 0.4044, "step": 44205 }, { "epoch": 0.977992766260245, "grad_norm": 0.7123786807060242, "learning_rate": 2.385152036315286e-08, "loss": 0.399, "step": 44210 }, { "epoch": 0.9781033739017583, "grad_norm": 1.107764482498169, "learning_rate": 2.361226317685761e-08, "loss": 0.4051, "step": 44215 }, { "epoch": 0.9782139815432715, "grad_norm": 1.035296082496643, "learning_rate": 2.3374210636268878e-08, "loss": 0.2917, "step": 44220 }, { "epoch": 0.9783245891847848, "grad_norm": 1.629630208015442, "learning_rate": 2.3137362770130346e-08, "loss": 0.388, "step": 44225 }, { "epoch": 0.9784351968262981, "grad_norm": 0.7107282280921936, "learning_rate": 2.290171960704357e-08, "loss": 0.2244, "step": 44230 }, { "epoch": 0.9785458044678114, "grad_norm": 1.408760666847229, "learning_rate": 2.2667281175460242e-08, "loss": 0.3438, "step": 44235 }, { "epoch": 0.9786564121093246, "grad_norm": 2.038865089416504, "learning_rate": 2.2434047503689938e-08, "loss": 0.3139, "step": 44240 }, { "epoch": 0.9787670197508379, "grad_norm": 0.4330565333366394, "learning_rate": 2.2202018619894572e-08, "loss": 0.3254, "step": 44245 }, { "epoch": 0.9788776273923511, "grad_norm": 0.6991093754768372, "learning_rate": 2.1971194552092843e-08, "loss": 0.4145, "step": 44250 }, { "epoch": 0.9789882350338643, "grad_norm": 0.9346259236335754, "learning_rate": 2.1741575328155794e-08, "loss": 0.2189, "step": 44255 }, { "epoch": 0.9790988426753776, "grad_norm": 2.657240629196167, "learning_rate": 2.1513160975810133e-08, "loss": 0.3631, "step": 44260 }, { "epoch": 0.9792094503168909, "grad_norm": 1.1674549579620361, "learning_rate": 2.1285951522636018e-08, "loss": 0.2731, "step": 44265 }, { "epoch": 0.9793200579584042, "grad_norm": 1.0256150960922241, "learning_rate": 2.105994699607039e-08, "loss": 0.4577, "step": 44270 }, { "epoch": 0.9794306655999174, "grad_norm": 1.0727412700653076, "learning_rate": 2.0835147423402536e-08, "loss": 0.4215, "step": 44275 }, { "epoch": 0.9795412732414307, "grad_norm": 1.0216151475906372, "learning_rate": 2.0611552831777403e-08, "loss": 0.2975, "step": 44280 }, { "epoch": 0.979651880882944, "grad_norm": 1.4752767086029053, "learning_rate": 2.038916324819229e-08, "loss": 0.3161, "step": 44285 }, { "epoch": 0.9797624885244572, "grad_norm": 1.3177337646484375, "learning_rate": 2.016797869950238e-08, "loss": 0.3027, "step": 44290 }, { "epoch": 0.9798730961659704, "grad_norm": 1.2421481609344482, "learning_rate": 1.994799921241519e-08, "loss": 0.3101, "step": 44295 }, { "epoch": 0.9799837038074837, "grad_norm": 1.2231987714767456, "learning_rate": 1.972922481349393e-08, "loss": 0.4314, "step": 44300 }, { "epoch": 0.980094311448997, "grad_norm": 1.1160156726837158, "learning_rate": 1.951165552915302e-08, "loss": 0.2869, "step": 44305 }, { "epoch": 0.9802049190905102, "grad_norm": 0.8673393726348877, "learning_rate": 1.9295291385666994e-08, "loss": 0.3603, "step": 44310 }, { "epoch": 0.9803155267320235, "grad_norm": 1.084324598312378, "learning_rate": 1.9080132409159403e-08, "loss": 0.4363, "step": 44315 }, { "epoch": 0.9804261343735368, "grad_norm": 1.2145400047302246, "learning_rate": 1.8866178625611688e-08, "loss": 0.4303, "step": 44320 }, { "epoch": 0.98053674201505, "grad_norm": 1.341312050819397, "learning_rate": 1.8653430060858735e-08, "loss": 0.157, "step": 44325 }, { "epoch": 0.9806473496565633, "grad_norm": 1.7596089839935303, "learning_rate": 1.8441886740589997e-08, "loss": 0.3394, "step": 44330 }, { "epoch": 0.9807579572980766, "grad_norm": 0.41809675097465515, "learning_rate": 1.8231548690348378e-08, "loss": 0.2277, "step": 44335 }, { "epoch": 0.9808685649395897, "grad_norm": 0.7741185426712036, "learning_rate": 1.8022415935533554e-08, "loss": 0.3563, "step": 44340 }, { "epoch": 0.980979172581103, "grad_norm": 1.015209436416626, "learning_rate": 1.781448850139644e-08, "loss": 0.3418, "step": 44345 }, { "epoch": 0.9810897802226163, "grad_norm": 1.6903144121170044, "learning_rate": 1.7607766413045846e-08, "loss": 0.3253, "step": 44350 }, { "epoch": 0.9812003878641296, "grad_norm": 1.3812369108200073, "learning_rate": 1.7402249695441796e-08, "loss": 0.3045, "step": 44355 }, { "epoch": 0.9813109955056428, "grad_norm": 1.4027310609817505, "learning_rate": 1.7197938373402222e-08, "loss": 0.2542, "step": 44360 }, { "epoch": 0.9814216031471561, "grad_norm": 0.5277694463729858, "learning_rate": 1.699483247159739e-08, "loss": 0.1675, "step": 44365 }, { "epoch": 0.9815322107886694, "grad_norm": 0.517677366733551, "learning_rate": 1.67929320145499e-08, "loss": 0.2782, "step": 44370 }, { "epoch": 0.9816428184301826, "grad_norm": 1.2465370893478394, "learning_rate": 1.659223702664248e-08, "loss": 0.2432, "step": 44375 }, { "epoch": 0.9817534260716959, "grad_norm": 0.808479905128479, "learning_rate": 1.6392747532105736e-08, "loss": 0.2188, "step": 44380 }, { "epoch": 0.9818640337132092, "grad_norm": 1.2211167812347412, "learning_rate": 1.6194463555030404e-08, "loss": 0.3453, "step": 44385 }, { "epoch": 0.9819746413547223, "grad_norm": 1.6736855506896973, "learning_rate": 1.5997385119358445e-08, "loss": 0.3199, "step": 44390 }, { "epoch": 0.9820852489962356, "grad_norm": 0.6414506435394287, "learning_rate": 1.5801512248887484e-08, "loss": 0.1696, "step": 44395 }, { "epoch": 0.9821958566377489, "grad_norm": 1.5760133266448975, "learning_rate": 1.5606844967268608e-08, "loss": 0.3417, "step": 44400 }, { "epoch": 0.9823064642792622, "grad_norm": 1.4161633253097534, "learning_rate": 1.5413383298007456e-08, "loss": 0.3682, "step": 44405 }, { "epoch": 0.9824170719207754, "grad_norm": 1.0439426898956299, "learning_rate": 1.5221127264464232e-08, "loss": 0.3997, "step": 44410 }, { "epoch": 0.9825276795622887, "grad_norm": 0.7585548758506775, "learning_rate": 1.5030076889854804e-08, "loss": 0.2614, "step": 44415 }, { "epoch": 0.982638287203802, "grad_norm": 1.1366868019104004, "learning_rate": 1.4840232197248505e-08, "loss": 0.3209, "step": 44420 }, { "epoch": 0.9827488948453152, "grad_norm": 1.3889745473861694, "learning_rate": 1.4651593209568104e-08, "loss": 0.437, "step": 44425 }, { "epoch": 0.9828595024868285, "grad_norm": 1.459627628326416, "learning_rate": 1.446415994959316e-08, "loss": 0.2811, "step": 44430 }, { "epoch": 0.9829701101283417, "grad_norm": 1.4238557815551758, "learning_rate": 1.4277932439954455e-08, "loss": 0.3361, "step": 44435 }, { "epoch": 0.983080717769855, "grad_norm": 0.8100314140319824, "learning_rate": 1.4092910703138451e-08, "loss": 0.3617, "step": 44440 }, { "epoch": 0.9831913254113682, "grad_norm": 1.306684970855713, "learning_rate": 1.3909094761489494e-08, "loss": 0.3704, "step": 44445 }, { "epoch": 0.9833019330528815, "grad_norm": 0.5368705987930298, "learning_rate": 1.3726484637200943e-08, "loss": 0.3289, "step": 44450 }, { "epoch": 0.9834125406943948, "grad_norm": 1.0740015506744385, "learning_rate": 1.3545080352322936e-08, "loss": 0.2964, "step": 44455 }, { "epoch": 0.983523148335908, "grad_norm": 2.0925779342651367, "learning_rate": 1.3364881928761286e-08, "loss": 0.3579, "step": 44460 }, { "epoch": 0.9836337559774213, "grad_norm": 0.8676304221153259, "learning_rate": 1.3185889388273032e-08, "loss": 0.4389, "step": 44465 }, { "epoch": 0.9837443636189346, "grad_norm": 1.3627774715423584, "learning_rate": 1.3008102752473106e-08, "loss": 0.2745, "step": 44470 }, { "epoch": 0.9838549712604479, "grad_norm": 0.7408573627471924, "learning_rate": 1.2831522042828782e-08, "loss": 0.3705, "step": 44475 }, { "epoch": 0.9839655789019611, "grad_norm": 1.717667818069458, "learning_rate": 1.265614728066189e-08, "loss": 0.4018, "step": 44480 }, { "epoch": 0.9840761865434743, "grad_norm": 0.699761688709259, "learning_rate": 1.2481978487148826e-08, "loss": 0.2723, "step": 44485 }, { "epoch": 0.9841867941849876, "grad_norm": 0.6322053670883179, "learning_rate": 1.2309015683320546e-08, "loss": 0.2922, "step": 44490 }, { "epoch": 0.9842974018265008, "grad_norm": 0.9672725796699524, "learning_rate": 1.2137258890061454e-08, "loss": 0.369, "step": 44495 }, { "epoch": 0.9844080094680141, "grad_norm": 1.6737964153289795, "learning_rate": 1.1966708128112736e-08, "loss": 0.3692, "step": 44500 }, { "epoch": 0.9845186171095274, "grad_norm": 1.2030048370361328, "learning_rate": 1.179736341806792e-08, "loss": 0.2782, "step": 44505 }, { "epoch": 0.9846292247510406, "grad_norm": 0.7475076913833618, "learning_rate": 1.1629224780373982e-08, "loss": 0.3352, "step": 44510 }, { "epoch": 0.9847398323925539, "grad_norm": 1.0846457481384277, "learning_rate": 1.1462292235335792e-08, "loss": 0.3857, "step": 44515 }, { "epoch": 0.9848504400340672, "grad_norm": 1.375097393989563, "learning_rate": 1.1296565803108339e-08, "loss": 0.2664, "step": 44520 }, { "epoch": 0.9849610476755805, "grad_norm": 0.9589844942092896, "learning_rate": 1.1132045503703393e-08, "loss": 0.3594, "step": 44525 }, { "epoch": 0.9850716553170936, "grad_norm": 0.9291345477104187, "learning_rate": 1.0968731356987284e-08, "loss": 0.2816, "step": 44530 }, { "epoch": 0.9851822629586069, "grad_norm": 1.3328547477722168, "learning_rate": 1.0806623382680904e-08, "loss": 0.2886, "step": 44535 }, { "epoch": 0.9852928706001202, "grad_norm": 1.3005990982055664, "learning_rate": 1.0645721600357483e-08, "loss": 0.3334, "step": 44540 }, { "epoch": 0.9854034782416334, "grad_norm": 0.8189138174057007, "learning_rate": 1.0486026029445928e-08, "loss": 0.3334, "step": 44545 }, { "epoch": 0.9855140858831467, "grad_norm": 0.6173079013824463, "learning_rate": 1.0327536689230811e-08, "loss": 0.3616, "step": 44550 }, { "epoch": 0.98562469352466, "grad_norm": 1.5327001810073853, "learning_rate": 1.0170253598847935e-08, "loss": 0.2989, "step": 44555 }, { "epoch": 0.9857353011661733, "grad_norm": 1.2357803583145142, "learning_rate": 1.0014176777289885e-08, "loss": 0.3922, "step": 44560 }, { "epoch": 0.9858459088076865, "grad_norm": 1.0950720310211182, "learning_rate": 9.8593062434027e-09, "loss": 0.4353, "step": 44565 }, { "epoch": 0.9859565164491998, "grad_norm": 0.8551430106163025, "learning_rate": 9.705642015886974e-09, "loss": 0.2444, "step": 44570 }, { "epoch": 0.9860671240907131, "grad_norm": 1.6071292161941528, "learning_rate": 9.553184113297863e-09, "loss": 0.2949, "step": 44575 }, { "epoch": 0.9861777317322262, "grad_norm": 0.8356910347938538, "learning_rate": 9.40193255404509e-09, "loss": 0.2256, "step": 44580 }, { "epoch": 0.9862883393737395, "grad_norm": 0.7448384165763855, "learning_rate": 9.25188735639182e-09, "loss": 0.2243, "step": 44585 }, { "epoch": 0.9863989470152528, "grad_norm": 1.2559914588928223, "learning_rate": 9.103048538454673e-09, "loss": 0.1919, "step": 44590 }, { "epoch": 0.986509554656766, "grad_norm": 0.9741445779800415, "learning_rate": 8.95541611820816e-09, "loss": 0.3364, "step": 44595 }, { "epoch": 0.9866201622982793, "grad_norm": 1.2922651767730713, "learning_rate": 8.808990113476911e-09, "loss": 0.2589, "step": 44600 }, { "epoch": 0.9867307699397926, "grad_norm": 1.2817283868789673, "learning_rate": 8.66377054194345e-09, "loss": 0.3695, "step": 44605 }, { "epoch": 0.9868413775813059, "grad_norm": 0.9054825901985168, "learning_rate": 8.519757421141529e-09, "loss": 0.313, "step": 44610 }, { "epoch": 0.9869519852228191, "grad_norm": 0.556235134601593, "learning_rate": 8.376950768461678e-09, "loss": 0.1169, "step": 44615 }, { "epoch": 0.9870625928643324, "grad_norm": 1.37453293800354, "learning_rate": 8.235350601146775e-09, "loss": 0.3756, "step": 44620 }, { "epoch": 0.9871732005058456, "grad_norm": 0.991200864315033, "learning_rate": 8.094956936296472e-09, "loss": 0.263, "step": 44625 }, { "epoch": 0.9872838081473588, "grad_norm": 0.8331853747367859, "learning_rate": 7.955769790861656e-09, "loss": 0.2369, "step": 44630 }, { "epoch": 0.9873944157888721, "grad_norm": 1.3220340013504028, "learning_rate": 7.817789181651102e-09, "loss": 0.2753, "step": 44635 }, { "epoch": 0.9875050234303854, "grad_norm": 2.273803949356079, "learning_rate": 7.68101512532371e-09, "loss": 0.3257, "step": 44640 }, { "epoch": 0.9876156310718986, "grad_norm": 1.2871111631393433, "learning_rate": 7.545447638396264e-09, "loss": 0.4903, "step": 44645 }, { "epoch": 0.9877262387134119, "grad_norm": 3.079326629638672, "learning_rate": 7.411086737237893e-09, "loss": 0.2508, "step": 44650 }, { "epoch": 0.9878368463549252, "grad_norm": 0.807977020740509, "learning_rate": 7.277932438072288e-09, "loss": 0.4753, "step": 44655 }, { "epoch": 0.9879474539964385, "grad_norm": 1.1475026607513428, "learning_rate": 7.145984756978808e-09, "loss": 0.246, "step": 44660 }, { "epoch": 0.9880580616379517, "grad_norm": 0.9632619619369507, "learning_rate": 7.015243709890263e-09, "loss": 0.2948, "step": 44665 }, { "epoch": 0.988168669279465, "grad_norm": 3.9410483837127686, "learning_rate": 6.8857093125929145e-09, "loss": 0.3408, "step": 44670 }, { "epoch": 0.9882792769209782, "grad_norm": 1.14405357837677, "learning_rate": 6.757381580727584e-09, "loss": 0.3145, "step": 44675 }, { "epoch": 0.9883898845624914, "grad_norm": 1.4856693744659424, "learning_rate": 6.630260529790767e-09, "loss": 0.4577, "step": 44680 }, { "epoch": 0.9885004922040047, "grad_norm": 1.1995941400527954, "learning_rate": 6.504346175131293e-09, "loss": 0.2842, "step": 44685 }, { "epoch": 0.988611099845518, "grad_norm": 0.8655288815498352, "learning_rate": 6.379638531954779e-09, "loss": 0.3381, "step": 44690 }, { "epoch": 0.9887217074870313, "grad_norm": 0.522936999797821, "learning_rate": 6.25613761531807e-09, "loss": 0.2544, "step": 44695 }, { "epoch": 0.9888323151285445, "grad_norm": 1.330186128616333, "learning_rate": 6.13384344013368e-09, "loss": 0.3745, "step": 44700 }, { "epoch": 0.9889429227700578, "grad_norm": 1.5158779621124268, "learning_rate": 6.012756021170907e-09, "loss": 0.2401, "step": 44705 }, { "epoch": 0.9890535304115711, "grad_norm": 0.6998991966247559, "learning_rate": 5.892875373048057e-09, "loss": 0.1749, "step": 44710 }, { "epoch": 0.9891641380530843, "grad_norm": 0.6624928712844849, "learning_rate": 5.774201510243549e-09, "loss": 0.4083, "step": 44715 }, { "epoch": 0.9892747456945975, "grad_norm": 0.90262371301651, "learning_rate": 5.656734447084811e-09, "loss": 0.3319, "step": 44720 }, { "epoch": 0.9893853533361108, "grad_norm": 1.0910990238189697, "learning_rate": 5.540474197757162e-09, "loss": 0.34, "step": 44725 }, { "epoch": 0.989495960977624, "grad_norm": 0.7527628540992737, "learning_rate": 5.425420776298263e-09, "loss": 0.3454, "step": 44730 }, { "epoch": 0.9896065686191373, "grad_norm": 1.501615047454834, "learning_rate": 5.311574196602554e-09, "loss": 0.2593, "step": 44735 }, { "epoch": 0.9897171762606506, "grad_norm": 0.5434844493865967, "learning_rate": 5.198934472414596e-09, "loss": 0.311, "step": 44740 }, { "epoch": 0.9898277839021639, "grad_norm": 1.725484848022461, "learning_rate": 5.087501617336843e-09, "loss": 0.4385, "step": 44745 }, { "epoch": 0.9899383915436771, "grad_norm": 1.1010972261428833, "learning_rate": 4.977275644825197e-09, "loss": 0.3629, "step": 44750 }, { "epoch": 0.9900489991851904, "grad_norm": 1.0042967796325684, "learning_rate": 4.868256568187901e-09, "loss": 0.1945, "step": 44755 }, { "epoch": 0.9901596068267037, "grad_norm": 1.20514976978302, "learning_rate": 4.760444400591091e-09, "loss": 0.3026, "step": 44760 }, { "epoch": 0.990270214468217, "grad_norm": 0.7164071798324585, "learning_rate": 4.653839155051021e-09, "loss": 0.2969, "step": 44765 }, { "epoch": 0.9903808221097301, "grad_norm": 1.4413563013076782, "learning_rate": 4.548440844440727e-09, "loss": 0.2712, "step": 44770 }, { "epoch": 0.9904914297512434, "grad_norm": 1.5974504947662354, "learning_rate": 4.4442494814889156e-09, "loss": 0.4258, "step": 44775 }, { "epoch": 0.9906020373927567, "grad_norm": 0.819701075553894, "learning_rate": 4.341265078773305e-09, "loss": 0.3482, "step": 44780 }, { "epoch": 0.9907126450342699, "grad_norm": 0.9617906212806702, "learning_rate": 4.239487648731722e-09, "loss": 0.3759, "step": 44785 }, { "epoch": 0.9908232526757832, "grad_norm": 0.6768617033958435, "learning_rate": 4.138917203654336e-09, "loss": 0.4007, "step": 44790 }, { "epoch": 0.9909338603172965, "grad_norm": 1.0307756662368774, "learning_rate": 4.039553755682546e-09, "loss": 0.2918, "step": 44795 }, { "epoch": 0.9910444679588097, "grad_norm": 1.0369569063186646, "learning_rate": 3.941397316815643e-09, "loss": 0.3471, "step": 44800 }, { "epoch": 0.991155075600323, "grad_norm": 1.0301721096038818, "learning_rate": 3.844447898906367e-09, "loss": 0.2859, "step": 44805 }, { "epoch": 0.9912656832418363, "grad_norm": 1.0580987930297852, "learning_rate": 3.748705513660911e-09, "loss": 0.4062, "step": 44810 }, { "epoch": 0.9913762908833494, "grad_norm": 0.9911401867866516, "learning_rate": 3.6541701726400257e-09, "loss": 0.3405, "step": 44815 }, { "epoch": 0.9914868985248627, "grad_norm": 0.9192270040512085, "learning_rate": 3.5608418872590256e-09, "loss": 0.2693, "step": 44820 }, { "epoch": 0.991597506166376, "grad_norm": 2.145486354827881, "learning_rate": 3.468720668787784e-09, "loss": 0.3675, "step": 44825 }, { "epoch": 0.9917081138078893, "grad_norm": 1.6240590810775757, "learning_rate": 3.377806528348515e-09, "loss": 0.2804, "step": 44830 }, { "epoch": 0.9918187214494025, "grad_norm": 1.3521783351898193, "learning_rate": 3.2880994769213248e-09, "loss": 0.3897, "step": 44835 }, { "epoch": 0.9919293290909158, "grad_norm": 0.9259163737297058, "learning_rate": 3.1995995253353285e-09, "loss": 0.2094, "step": 44840 }, { "epoch": 0.9920399367324291, "grad_norm": 1.4135597944259644, "learning_rate": 3.112306684279753e-09, "loss": 0.3523, "step": 44845 }, { "epoch": 0.9921505443739423, "grad_norm": 1.1354868412017822, "learning_rate": 3.0262209642928362e-09, "loss": 0.3118, "step": 44850 }, { "epoch": 0.9922611520154556, "grad_norm": 0.8588415384292603, "learning_rate": 2.941342375770706e-09, "loss": 0.4141, "step": 44855 }, { "epoch": 0.9923717596569689, "grad_norm": 0.9490187168121338, "learning_rate": 2.8576709289629413e-09, "loss": 0.3635, "step": 44860 }, { "epoch": 0.992482367298482, "grad_norm": 0.9203305840492249, "learning_rate": 2.7752066339714613e-09, "loss": 0.387, "step": 44865 }, { "epoch": 0.9925929749399953, "grad_norm": 1.146507978439331, "learning_rate": 2.6939495007538565e-09, "loss": 0.3518, "step": 44870 }, { "epoch": 0.9927035825815086, "grad_norm": 3.110248327255249, "learning_rate": 2.6138995391233878e-09, "loss": 0.3187, "step": 44875 }, { "epoch": 0.9928141902230219, "grad_norm": 1.496558427810669, "learning_rate": 2.5350567587445472e-09, "loss": 0.3178, "step": 44880 }, { "epoch": 0.9929247978645351, "grad_norm": 0.7571333646774292, "learning_rate": 2.4574211691386072e-09, "loss": 0.3298, "step": 44885 }, { "epoch": 0.9930354055060484, "grad_norm": 0.9262034296989441, "learning_rate": 2.380992779679181e-09, "loss": 0.4348, "step": 44890 }, { "epoch": 0.9931460131475617, "grad_norm": 0.6034582853317261, "learning_rate": 2.305771599595552e-09, "loss": 0.1824, "step": 44895 }, { "epoch": 0.993256620789075, "grad_norm": 0.7429075837135315, "learning_rate": 2.2317576379704553e-09, "loss": 0.3022, "step": 44900 }, { "epoch": 0.9933672284305882, "grad_norm": 1.2162818908691406, "learning_rate": 2.158950903741186e-09, "loss": 0.2178, "step": 44905 }, { "epoch": 0.9934778360721014, "grad_norm": 1.2111719846725464, "learning_rate": 2.0873514056996e-09, "loss": 0.3985, "step": 44910 }, { "epoch": 0.9935884437136147, "grad_norm": 1.3748955726623535, "learning_rate": 2.0169591524898945e-09, "loss": 0.2982, "step": 44915 }, { "epoch": 0.9936990513551279, "grad_norm": 0.9158650040626526, "learning_rate": 1.947774152613047e-09, "loss": 0.4225, "step": 44920 }, { "epoch": 0.9938096589966412, "grad_norm": 0.99457848072052, "learning_rate": 1.879796414423485e-09, "loss": 0.4137, "step": 44925 }, { "epoch": 0.9939202666381545, "grad_norm": 1.1148381233215332, "learning_rate": 1.8130259461279775e-09, "loss": 0.4296, "step": 44930 }, { "epoch": 0.9940308742796677, "grad_norm": 1.7547036409378052, "learning_rate": 1.7474627557900748e-09, "loss": 0.3045, "step": 44935 }, { "epoch": 0.994141481921181, "grad_norm": 0.6871848702430725, "learning_rate": 1.6831068513267768e-09, "loss": 0.3359, "step": 44940 }, { "epoch": 0.9942520895626943, "grad_norm": 1.1672884225845337, "learning_rate": 1.6199582405085346e-09, "loss": 0.3063, "step": 44945 }, { "epoch": 0.9943626972042076, "grad_norm": 0.6046397089958191, "learning_rate": 1.55801693096036e-09, "loss": 0.247, "step": 44950 }, { "epoch": 0.9944733048457208, "grad_norm": 0.5264644026756287, "learning_rate": 1.4972829301618253e-09, "loss": 0.2982, "step": 44955 }, { "epoch": 0.994583912487234, "grad_norm": 1.0089828968048096, "learning_rate": 1.437756245447064e-09, "loss": 0.3629, "step": 44960 }, { "epoch": 0.9946945201287473, "grad_norm": 0.8051875233650208, "learning_rate": 1.3794368840036597e-09, "loss": 0.2277, "step": 44965 }, { "epoch": 0.9948051277702605, "grad_norm": 0.8991878628730774, "learning_rate": 1.3223248528726472e-09, "loss": 0.2985, "step": 44970 }, { "epoch": 0.9949157354117738, "grad_norm": 0.9208277463912964, "learning_rate": 1.2664201589518422e-09, "loss": 0.261, "step": 44975 }, { "epoch": 0.9950263430532871, "grad_norm": 0.842139482498169, "learning_rate": 1.21172280899029e-09, "loss": 0.1996, "step": 44980 }, { "epoch": 0.9951369506948003, "grad_norm": 1.2299338579177856, "learning_rate": 1.1582328095938179e-09, "loss": 0.3471, "step": 44985 }, { "epoch": 0.9952475583363136, "grad_norm": 0.8140215873718262, "learning_rate": 1.105950167220593e-09, "loss": 0.1823, "step": 44990 }, { "epoch": 0.9953581659778269, "grad_norm": 1.1594854593276978, "learning_rate": 1.0548748881833436e-09, "loss": 0.4109, "step": 44995 }, { "epoch": 0.9954687736193402, "grad_norm": 0.5586759448051453, "learning_rate": 1.0050069786515792e-09, "loss": 0.2028, "step": 45000 }, { "epoch": 0.9955793812608533, "grad_norm": 0.7748615741729736, "learning_rate": 9.563464446438186e-10, "loss": 0.2574, "step": 45005 }, { "epoch": 0.9956899889023666, "grad_norm": 1.5388298034667969, "learning_rate": 9.088932920386928e-10, "loss": 0.4062, "step": 45010 }, { "epoch": 0.9958005965438799, "grad_norm": 2.342991352081299, "learning_rate": 8.626475265638423e-10, "loss": 0.3714, "step": 45015 }, { "epoch": 0.9959112041853931, "grad_norm": 1.1153980493545532, "learning_rate": 8.176091538047992e-10, "loss": 0.3897, "step": 45020 }, { "epoch": 0.9960218118269064, "grad_norm": 2.177870273590088, "learning_rate": 7.737781791994358e-10, "loss": 0.3754, "step": 45025 }, { "epoch": 0.9961324194684197, "grad_norm": 1.4383739233016968, "learning_rate": 7.311546080401854e-10, "loss": 0.3187, "step": 45030 }, { "epoch": 0.996243027109933, "grad_norm": 0.9463118314743042, "learning_rate": 6.897384454740419e-10, "loss": 0.1865, "step": 45035 }, { "epoch": 0.9963536347514462, "grad_norm": 0.061334796249866486, "learning_rate": 6.495296965014497e-10, "loss": 0.24, "step": 45040 }, { "epoch": 0.9964642423929595, "grad_norm": 2.928356409072876, "learning_rate": 6.105283659785244e-10, "loss": 0.4376, "step": 45045 }, { "epoch": 0.9965748500344728, "grad_norm": 0.4413149356842041, "learning_rate": 5.72734458614832e-10, "loss": 0.2152, "step": 45050 }, { "epoch": 0.9966854576759859, "grad_norm": 0.42425721883773804, "learning_rate": 5.36147978972279e-10, "loss": 0.2397, "step": 45055 }, { "epoch": 0.9967960653174992, "grad_norm": 1.2166426181793213, "learning_rate": 5.007689314706632e-10, "loss": 0.2818, "step": 45060 }, { "epoch": 0.9969066729590125, "grad_norm": 1.5567803382873535, "learning_rate": 4.665973203810126e-10, "loss": 0.5122, "step": 45065 }, { "epoch": 0.9970172806005257, "grad_norm": 1.7467998266220093, "learning_rate": 4.336331498300261e-10, "loss": 0.2032, "step": 45070 }, { "epoch": 0.997127888242039, "grad_norm": 0.6609592437744141, "learning_rate": 4.018764237967432e-10, "loss": 0.2236, "step": 45075 }, { "epoch": 0.9972384958835523, "grad_norm": 1.251699686050415, "learning_rate": 3.713271461180945e-10, "loss": 0.3344, "step": 45080 }, { "epoch": 0.9973491035250656, "grad_norm": 1.0829441547393799, "learning_rate": 3.419853204800205e-10, "loss": 0.3829, "step": 45085 }, { "epoch": 0.9974597111665788, "grad_norm": 1.3028589487075806, "learning_rate": 3.138509504285736e-10, "loss": 0.3224, "step": 45090 }, { "epoch": 0.9975703188080921, "grad_norm": 1.203287959098816, "learning_rate": 2.8692403935881574e-10, "loss": 0.2299, "step": 45095 }, { "epoch": 0.9976809264496053, "grad_norm": 0.9893448948860168, "learning_rate": 2.612045905225902e-10, "loss": 0.1364, "step": 45100 }, { "epoch": 0.9977915340911185, "grad_norm": 0.8504573702812195, "learning_rate": 2.3669260702630093e-10, "loss": 0.3305, "step": 45105 }, { "epoch": 0.9979021417326318, "grad_norm": 1.7462215423583984, "learning_rate": 2.1338809182869235e-10, "loss": 0.3215, "step": 45110 }, { "epoch": 0.9980127493741451, "grad_norm": 0.7786715626716614, "learning_rate": 1.9129104774417985e-10, "loss": 0.3044, "step": 45115 }, { "epoch": 0.9981233570156584, "grad_norm": 0.8417012691497803, "learning_rate": 1.7040147744173952e-10, "loss": 0.347, "step": 45120 }, { "epoch": 0.9982339646571716, "grad_norm": 0.9224909543991089, "learning_rate": 1.507193834426879e-10, "loss": 0.3912, "step": 45125 }, { "epoch": 0.9983445722986849, "grad_norm": 1.0801572799682617, "learning_rate": 1.3224476812401243e-10, "loss": 0.2207, "step": 45130 }, { "epoch": 0.9984551799401982, "grad_norm": 4.631531715393066, "learning_rate": 1.1497763371726145e-10, "loss": 0.4075, "step": 45135 }, { "epoch": 0.9985657875817114, "grad_norm": 0.6097607612609863, "learning_rate": 9.891798230632354e-11, "loss": 0.3633, "step": 45140 }, { "epoch": 0.9986763952232246, "grad_norm": 0.9844467639923096, "learning_rate": 8.406581583075834e-11, "loss": 0.3839, "step": 45145 }, { "epoch": 0.9987870028647379, "grad_norm": 1.2555344104766846, "learning_rate": 7.042113608468626e-11, "loss": 0.2362, "step": 45150 }, { "epoch": 0.9988976105062511, "grad_norm": 1.353078842163086, "learning_rate": 5.798394471456803e-11, "loss": 0.5051, "step": 45155 }, { "epoch": 0.9990082181477644, "grad_norm": 0.8903672099113464, "learning_rate": 4.675424322253541e-11, "loss": 0.2615, "step": 45160 }, { "epoch": 0.9991188257892777, "grad_norm": 0.5975431799888611, "learning_rate": 3.673203296528094e-11, "loss": 0.3058, "step": 45165 }, { "epoch": 0.999229433430791, "grad_norm": 0.6321763396263123, "learning_rate": 2.7917315151837486e-11, "loss": 0.2805, "step": 45170 }, { "epoch": 0.9993400410723042, "grad_norm": 0.9471678733825684, "learning_rate": 2.0310090846908937e-11, "loss": 0.1668, "step": 45175 }, { "epoch": 0.9994506487138175, "grad_norm": 0.7833312153816223, "learning_rate": 1.3910360969759951e-11, "loss": 0.2907, "step": 45180 }, { "epoch": 0.9995612563553308, "grad_norm": 0.6544402241706848, "learning_rate": 8.718126293105756e-12, "loss": 0.3455, "step": 45185 }, { "epoch": 0.999671863996844, "grad_norm": 1.462174892425537, "learning_rate": 4.7333874431121365e-12, "loss": 0.3522, "step": 45190 }, { "epoch": 0.9997824716383572, "grad_norm": 2.6061296463012695, "learning_rate": 1.9561449016158862e-12, "loss": 0.3375, "step": 45195 }, { "epoch": 0.9998930792798705, "grad_norm": 1.4005731344223022, "learning_rate": 3.8639900279413556e-13, "loss": 0.3596, "step": 45200 }, { "epoch": 0.9999815653930811, "step": 45204, "total_flos": 2.1648841527480484e+19, "train_loss": 0.358738521435558, "train_runtime": 99955.0953, "train_samples_per_second": 10.854, "train_steps_per_second": 0.452 } ], "logging_steps": 5, "max_steps": 45204, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.1648841527480484e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }