{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 46153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010833531948085715, "grad_norm": 2.7235190868377686, "learning_rate": 1.9999999420824903e-05, "loss": 0.7015, "step": 5 }, { "epoch": 0.0002166706389617143, "grad_norm": 2.414857864379883, "learning_rate": 1.999999768329967e-05, "loss": 0.5493, "step": 10 }, { "epoch": 0.00032500595844257147, "grad_norm": 1.698796272277832, "learning_rate": 1.9999994787424503e-05, "loss": 0.4117, "step": 15 }, { "epoch": 0.0004333412779234286, "grad_norm": 1.7569530010223389, "learning_rate": 1.9999990733199743e-05, "loss": 0.5492, "step": 20 }, { "epoch": 0.0005416765974042857, "grad_norm": 1.3689367771148682, "learning_rate": 1.9999985520625857e-05, "loss": 0.601, "step": 25 }, { "epoch": 0.0006500119168851429, "grad_norm": 1.2858846187591553, "learning_rate": 1.9999979149703445e-05, "loss": 0.4681, "step": 30 }, { "epoch": 0.000758347236366, "grad_norm": 1.4113597869873047, "learning_rate": 1.9999971620433252e-05, "loss": 0.5089, "step": 35 }, { "epoch": 0.0008666825558468572, "grad_norm": 1.5467652082443237, "learning_rate": 1.9999962932816142e-05, "loss": 0.4915, "step": 40 }, { "epoch": 0.0009750178753277143, "grad_norm": 2.0565288066864014, "learning_rate": 1.999995308685313e-05, "loss": 0.4644, "step": 45 }, { "epoch": 0.0010833531948085714, "grad_norm": 1.1893069744110107, "learning_rate": 1.999994208254535e-05, "loss": 0.5231, "step": 50 }, { "epoch": 0.0011916885142894287, "grad_norm": 1.3814841508865356, "learning_rate": 1.999992991989408e-05, "loss": 0.4084, "step": 55 }, { "epoch": 0.0013000238337702859, "grad_norm": 1.829251766204834, "learning_rate": 1.9999916598900726e-05, "loss": 0.4854, "step": 60 }, { "epoch": 0.001408359153251143, "grad_norm": 1.7848165035247803, "learning_rate": 1.9999902119566836e-05, "loss": 0.4203, "step": 65 }, { "epoch": 0.001516694472732, "grad_norm": 1.1946731805801392, "learning_rate": 1.999988648189408e-05, "loss": 0.5026, "step": 70 }, { "epoch": 0.0016250297922128572, "grad_norm": 1.4514065980911255, "learning_rate": 1.999986968588427e-05, "loss": 0.4936, "step": 75 }, { "epoch": 0.0017333651116937143, "grad_norm": 1.4242520332336426, "learning_rate": 1.9999851731539365e-05, "loss": 0.4646, "step": 80 }, { "epoch": 0.0018417004311745715, "grad_norm": 1.5421302318572998, "learning_rate": 1.9999832618861426e-05, "loss": 0.4586, "step": 85 }, { "epoch": 0.0019500357506554286, "grad_norm": 1.5881341695785522, "learning_rate": 1.999981234785268e-05, "loss": 0.4295, "step": 90 }, { "epoch": 0.002058371070136286, "grad_norm": 1.6458631753921509, "learning_rate": 1.999979091851547e-05, "loss": 0.4822, "step": 95 }, { "epoch": 0.002166706389617143, "grad_norm": 1.2136765718460083, "learning_rate": 1.999976833085228e-05, "loss": 0.4886, "step": 100 }, { "epoch": 0.002275041709098, "grad_norm": 1.5977977514266968, "learning_rate": 1.9999744584865724e-05, "loss": 0.5317, "step": 105 }, { "epoch": 0.0023833770285788575, "grad_norm": 1.2734445333480835, "learning_rate": 1.9999719680558555e-05, "loss": 0.4239, "step": 110 }, { "epoch": 0.0024917123480597144, "grad_norm": 1.221501350402832, "learning_rate": 1.9999693617933654e-05, "loss": 0.414, "step": 115 }, { "epoch": 0.0026000476675405717, "grad_norm": 1.5670639276504517, "learning_rate": 1.9999666396994044e-05, "loss": 0.4442, "step": 120 }, { "epoch": 0.0027083829870214286, "grad_norm": 1.5892319679260254, "learning_rate": 1.999963801774288e-05, "loss": 0.5494, "step": 125 }, { "epoch": 0.002816718306502286, "grad_norm": 1.3238633871078491, "learning_rate": 1.9999608480183445e-05, "loss": 0.5368, "step": 130 }, { "epoch": 0.002925053625983143, "grad_norm": 1.7416787147521973, "learning_rate": 1.9999577784319158e-05, "loss": 0.4828, "step": 135 }, { "epoch": 0.003033388945464, "grad_norm": 1.6492130756378174, "learning_rate": 1.999954593015358e-05, "loss": 0.5856, "step": 140 }, { "epoch": 0.0031417242649448575, "grad_norm": 0.9742892384529114, "learning_rate": 1.99995129176904e-05, "loss": 0.4424, "step": 145 }, { "epoch": 0.0032500595844257144, "grad_norm": 1.4950246810913086, "learning_rate": 1.9999478746933443e-05, "loss": 0.4296, "step": 150 }, { "epoch": 0.0033583949039065718, "grad_norm": 0.950337827205658, "learning_rate": 1.999944341788666e-05, "loss": 0.4722, "step": 155 }, { "epoch": 0.0034667302233874287, "grad_norm": 1.0192716121673584, "learning_rate": 1.9999406930554153e-05, "loss": 0.5171, "step": 160 }, { "epoch": 0.003575065542868286, "grad_norm": 1.251477837562561, "learning_rate": 1.9999369284940144e-05, "loss": 0.4717, "step": 165 }, { "epoch": 0.003683400862349143, "grad_norm": 1.2961573600769043, "learning_rate": 1.9999330481048993e-05, "loss": 0.4934, "step": 170 }, { "epoch": 0.0037917361818300003, "grad_norm": 1.1114574670791626, "learning_rate": 1.9999290518885197e-05, "loss": 0.4589, "step": 175 }, { "epoch": 0.003900071501310857, "grad_norm": 1.3814114332199097, "learning_rate": 1.9999249398453383e-05, "loss": 0.516, "step": 180 }, { "epoch": 0.004008406820791715, "grad_norm": 1.1585659980773926, "learning_rate": 1.9999207119758315e-05, "loss": 0.4062, "step": 185 }, { "epoch": 0.004116742140272572, "grad_norm": 1.2475154399871826, "learning_rate": 1.999916368280489e-05, "loss": 0.4278, "step": 190 }, { "epoch": 0.004225077459753429, "grad_norm": 1.0515974760055542, "learning_rate": 1.9999119087598137e-05, "loss": 0.4026, "step": 195 }, { "epoch": 0.004333412779234286, "grad_norm": 1.1001428365707397, "learning_rate": 1.9999073334143228e-05, "loss": 0.5893, "step": 200 }, { "epoch": 0.004441748098715143, "grad_norm": 1.3614131212234497, "learning_rate": 1.999902642244546e-05, "loss": 0.4314, "step": 205 }, { "epoch": 0.004550083418196, "grad_norm": 1.5581042766571045, "learning_rate": 1.9998978352510267e-05, "loss": 0.5552, "step": 210 }, { "epoch": 0.004658418737676857, "grad_norm": 1.3716667890548706, "learning_rate": 1.9998929124343212e-05, "loss": 0.4235, "step": 215 }, { "epoch": 0.004766754057157715, "grad_norm": 1.5631877183914185, "learning_rate": 1.9998878737950004e-05, "loss": 0.4994, "step": 220 }, { "epoch": 0.004875089376638572, "grad_norm": 1.3510173559188843, "learning_rate": 1.999882719333648e-05, "loss": 0.4995, "step": 225 }, { "epoch": 0.004983424696119429, "grad_norm": 1.2517638206481934, "learning_rate": 1.9998774490508605e-05, "loss": 0.3349, "step": 230 }, { "epoch": 0.005091760015600286, "grad_norm": 1.2018076181411743, "learning_rate": 1.9998720629472488e-05, "loss": 0.4807, "step": 235 }, { "epoch": 0.0052000953350811435, "grad_norm": 0.8912019729614258, "learning_rate": 1.9998665610234366e-05, "loss": 0.3689, "step": 240 }, { "epoch": 0.005308430654562, "grad_norm": 1.0603381395339966, "learning_rate": 1.9998609432800612e-05, "loss": 0.484, "step": 245 }, { "epoch": 0.005416765974042857, "grad_norm": 1.268182635307312, "learning_rate": 1.999855209717774e-05, "loss": 0.5383, "step": 250 }, { "epoch": 0.005525101293523715, "grad_norm": 1.3475357294082642, "learning_rate": 1.9998493603372383e-05, "loss": 0.6112, "step": 255 }, { "epoch": 0.005633436613004572, "grad_norm": 1.2673261165618896, "learning_rate": 1.9998433951391315e-05, "loss": 0.455, "step": 260 }, { "epoch": 0.005741771932485429, "grad_norm": 0.8517705798149109, "learning_rate": 1.9998373141241454e-05, "loss": 0.4177, "step": 265 }, { "epoch": 0.005850107251966286, "grad_norm": 1.1834776401519775, "learning_rate": 1.999831117292984e-05, "loss": 0.4896, "step": 270 }, { "epoch": 0.0059584425714471435, "grad_norm": 1.4864054918289185, "learning_rate": 1.999824804646365e-05, "loss": 0.4712, "step": 275 }, { "epoch": 0.006066777890928, "grad_norm": 1.3767472505569458, "learning_rate": 1.99981837618502e-05, "loss": 0.3789, "step": 280 }, { "epoch": 0.006175113210408857, "grad_norm": 1.1334308385849, "learning_rate": 1.9998118319096935e-05, "loss": 0.3708, "step": 285 }, { "epoch": 0.006283448529889715, "grad_norm": 1.4560970067977905, "learning_rate": 1.999805171821143e-05, "loss": 0.4404, "step": 290 }, { "epoch": 0.006391783849370572, "grad_norm": 1.2616043090820312, "learning_rate": 1.9997983959201408e-05, "loss": 0.5028, "step": 295 }, { "epoch": 0.006500119168851429, "grad_norm": 0.8953388929367065, "learning_rate": 1.9997915042074713e-05, "loss": 0.4147, "step": 300 }, { "epoch": 0.006608454488332286, "grad_norm": 1.0580995082855225, "learning_rate": 1.9997844966839328e-05, "loss": 0.5172, "step": 305 }, { "epoch": 0.0067167898078131436, "grad_norm": 0.9777295589447021, "learning_rate": 1.9997773733503373e-05, "loss": 0.436, "step": 310 }, { "epoch": 0.0068251251272940005, "grad_norm": 1.1684647798538208, "learning_rate": 1.9997701342075095e-05, "loss": 0.4852, "step": 315 }, { "epoch": 0.006933460446774857, "grad_norm": 0.9552314877510071, "learning_rate": 1.9997627792562885e-05, "loss": 0.3994, "step": 320 }, { "epoch": 0.007041795766255714, "grad_norm": 1.3918323516845703, "learning_rate": 1.9997553084975258e-05, "loss": 0.4931, "step": 325 }, { "epoch": 0.007150131085736572, "grad_norm": 1.4222451448440552, "learning_rate": 1.9997477219320872e-05, "loss": 0.4806, "step": 330 }, { "epoch": 0.007258466405217429, "grad_norm": 0.9957696199417114, "learning_rate": 1.999740019560851e-05, "loss": 0.4717, "step": 335 }, { "epoch": 0.007366801724698286, "grad_norm": 1.6258692741394043, "learning_rate": 1.9997322013847097e-05, "loss": 0.5616, "step": 340 }, { "epoch": 0.007475137044179144, "grad_norm": 1.3005832433700562, "learning_rate": 1.9997242674045686e-05, "loss": 0.5212, "step": 345 }, { "epoch": 0.0075834723636600005, "grad_norm": 1.1311180591583252, "learning_rate": 1.9997162176213473e-05, "loss": 0.3121, "step": 350 }, { "epoch": 0.007691807683140857, "grad_norm": 1.46100914478302, "learning_rate": 1.999708052035978e-05, "loss": 0.3802, "step": 355 }, { "epoch": 0.007800143002621714, "grad_norm": 1.1820048093795776, "learning_rate": 1.9996997706494064e-05, "loss": 0.4358, "step": 360 }, { "epoch": 0.007908478322102572, "grad_norm": 1.481699824333191, "learning_rate": 1.9996913734625916e-05, "loss": 0.5097, "step": 365 }, { "epoch": 0.00801681364158343, "grad_norm": 1.1633845567703247, "learning_rate": 1.999682860476507e-05, "loss": 0.3835, "step": 370 }, { "epoch": 0.008125148961064286, "grad_norm": 1.0373575687408447, "learning_rate": 1.9996742316921378e-05, "loss": 0.5049, "step": 375 }, { "epoch": 0.008233484280545144, "grad_norm": 1.1871370077133179, "learning_rate": 1.9996654871104843e-05, "loss": 0.4953, "step": 380 }, { "epoch": 0.008341819600026, "grad_norm": 0.9362674355506897, "learning_rate": 1.999656626732559e-05, "loss": 0.3296, "step": 385 }, { "epoch": 0.008450154919506857, "grad_norm": 1.0077570676803589, "learning_rate": 1.9996476505593883e-05, "loss": 0.492, "step": 390 }, { "epoch": 0.008558490238987715, "grad_norm": 1.6124992370605469, "learning_rate": 1.9996385585920123e-05, "loss": 0.671, "step": 395 }, { "epoch": 0.008666825558468571, "grad_norm": 0.8607150912284851, "learning_rate": 1.9996293508314837e-05, "loss": 0.3851, "step": 400 }, { "epoch": 0.008775160877949429, "grad_norm": 1.0923070907592773, "learning_rate": 1.9996200272788692e-05, "loss": 0.5403, "step": 405 }, { "epoch": 0.008883496197430287, "grad_norm": 1.523041009902954, "learning_rate": 1.999610587935249e-05, "loss": 0.5151, "step": 410 }, { "epoch": 0.008991831516911143, "grad_norm": 1.138096809387207, "learning_rate": 1.9996010328017158e-05, "loss": 0.4135, "step": 415 }, { "epoch": 0.009100166836392, "grad_norm": 1.22113037109375, "learning_rate": 1.9995913618793775e-05, "loss": 0.466, "step": 420 }, { "epoch": 0.009208502155872858, "grad_norm": 1.1496285200119019, "learning_rate": 1.9995815751693538e-05, "loss": 0.4316, "step": 425 }, { "epoch": 0.009316837475353714, "grad_norm": 1.689724087715149, "learning_rate": 1.999571672672778e-05, "loss": 0.452, "step": 430 }, { "epoch": 0.009425172794834572, "grad_norm": 1.091392993927002, "learning_rate": 1.999561654390798e-05, "loss": 0.4413, "step": 435 }, { "epoch": 0.00953350811431543, "grad_norm": 0.8983684182167053, "learning_rate": 1.999551520324573e-05, "loss": 0.4666, "step": 440 }, { "epoch": 0.009641843433796286, "grad_norm": 1.101024866104126, "learning_rate": 1.9995412704752786e-05, "loss": 0.474, "step": 445 }, { "epoch": 0.009750178753277144, "grad_norm": 1.289973497390747, "learning_rate": 1.9995309048441007e-05, "loss": 0.4306, "step": 450 }, { "epoch": 0.009858514072758, "grad_norm": 1.1211236715316772, "learning_rate": 1.9995204234322404e-05, "loss": 0.4787, "step": 455 }, { "epoch": 0.009966849392238858, "grad_norm": 1.1679269075393677, "learning_rate": 1.999509826240912e-05, "loss": 0.4473, "step": 460 }, { "epoch": 0.010075184711719715, "grad_norm": 1.199578881263733, "learning_rate": 1.9994991132713427e-05, "loss": 0.5052, "step": 465 }, { "epoch": 0.010183520031200571, "grad_norm": 1.2829262018203735, "learning_rate": 1.9994882845247735e-05, "loss": 0.3834, "step": 470 }, { "epoch": 0.01029185535068143, "grad_norm": 1.047071099281311, "learning_rate": 1.999477340002459e-05, "loss": 0.3374, "step": 475 }, { "epoch": 0.010400190670162287, "grad_norm": 0.9112053513526917, "learning_rate": 1.999466279705667e-05, "loss": 0.3943, "step": 480 }, { "epoch": 0.010508525989643143, "grad_norm": 1.5882530212402344, "learning_rate": 1.9994551036356788e-05, "loss": 0.4766, "step": 485 }, { "epoch": 0.010616861309124, "grad_norm": 0.8878957629203796, "learning_rate": 1.9994438117937883e-05, "loss": 0.3006, "step": 490 }, { "epoch": 0.010725196628604858, "grad_norm": 1.066976547241211, "learning_rate": 1.9994324041813038e-05, "loss": 0.3822, "step": 495 }, { "epoch": 0.010833531948085715, "grad_norm": 1.2557276487350464, "learning_rate": 1.9994208807995466e-05, "loss": 0.4096, "step": 500 }, { "epoch": 0.010941867267566572, "grad_norm": 1.0236873626708984, "learning_rate": 1.9994092416498518e-05, "loss": 0.4451, "step": 505 }, { "epoch": 0.01105020258704743, "grad_norm": 1.1740291118621826, "learning_rate": 1.999397486733568e-05, "loss": 0.3333, "step": 510 }, { "epoch": 0.011158537906528286, "grad_norm": 1.5653035640716553, "learning_rate": 1.9993856160520558e-05, "loss": 0.4462, "step": 515 }, { "epoch": 0.011266873226009144, "grad_norm": 1.43739914894104, "learning_rate": 1.999373629606691e-05, "loss": 0.3472, "step": 520 }, { "epoch": 0.01137520854549, "grad_norm": 1.2012832164764404, "learning_rate": 1.9993615273988615e-05, "loss": 0.4375, "step": 525 }, { "epoch": 0.011483543864970858, "grad_norm": 1.1351840496063232, "learning_rate": 1.99934930942997e-05, "loss": 0.4361, "step": 530 }, { "epoch": 0.011591879184451715, "grad_norm": 1.0732275247573853, "learning_rate": 1.999336975701431e-05, "loss": 0.4369, "step": 535 }, { "epoch": 0.011700214503932571, "grad_norm": 1.0221426486968994, "learning_rate": 1.9993245262146733e-05, "loss": 0.4399, "step": 540 }, { "epoch": 0.01180854982341343, "grad_norm": 1.3546396493911743, "learning_rate": 1.9993119609711394e-05, "loss": 0.417, "step": 545 }, { "epoch": 0.011916885142894287, "grad_norm": 0.8066328763961792, "learning_rate": 1.9992992799722845e-05, "loss": 0.4251, "step": 550 }, { "epoch": 0.012025220462375143, "grad_norm": 0.8750069737434387, "learning_rate": 1.999286483219577e-05, "loss": 0.5474, "step": 555 }, { "epoch": 0.012133555781856, "grad_norm": 0.9098784327507019, "learning_rate": 1.9992735707145004e-05, "loss": 0.3958, "step": 560 }, { "epoch": 0.012241891101336859, "grad_norm": 1.1539760828018188, "learning_rate": 1.9992605424585493e-05, "loss": 0.4832, "step": 565 }, { "epoch": 0.012350226420817715, "grad_norm": 1.316842794418335, "learning_rate": 1.9992473984532336e-05, "loss": 0.2908, "step": 570 }, { "epoch": 0.012458561740298572, "grad_norm": 1.2277872562408447, "learning_rate": 1.999234138700075e-05, "loss": 0.3728, "step": 575 }, { "epoch": 0.01256689705977943, "grad_norm": 1.2565290927886963, "learning_rate": 1.9992207632006106e-05, "loss": 0.4053, "step": 580 }, { "epoch": 0.012675232379260286, "grad_norm": 1.5433927774429321, "learning_rate": 1.999207271956389e-05, "loss": 0.5396, "step": 585 }, { "epoch": 0.012783567698741144, "grad_norm": 1.0752031803131104, "learning_rate": 1.9991936649689726e-05, "loss": 0.4265, "step": 590 }, { "epoch": 0.012891903018222, "grad_norm": 1.1353851556777954, "learning_rate": 1.9991799422399384e-05, "loss": 0.4675, "step": 595 }, { "epoch": 0.013000238337702858, "grad_norm": 1.194663643836975, "learning_rate": 1.9991661037708755e-05, "loss": 0.384, "step": 600 }, { "epoch": 0.013108573657183716, "grad_norm": 0.9787612557411194, "learning_rate": 1.9991521495633873e-05, "loss": 0.3513, "step": 605 }, { "epoch": 0.013216908976664572, "grad_norm": 1.2138960361480713, "learning_rate": 1.9991380796190894e-05, "loss": 0.5094, "step": 610 }, { "epoch": 0.01332524429614543, "grad_norm": 1.1149234771728516, "learning_rate": 1.9991238939396123e-05, "loss": 0.4173, "step": 615 }, { "epoch": 0.013433579615626287, "grad_norm": 1.1967897415161133, "learning_rate": 1.999109592526599e-05, "loss": 0.4203, "step": 620 }, { "epoch": 0.013541914935107143, "grad_norm": 1.1632211208343506, "learning_rate": 1.9990951753817058e-05, "loss": 0.5178, "step": 625 }, { "epoch": 0.013650250254588001, "grad_norm": 0.9753016829490662, "learning_rate": 1.999080642506603e-05, "loss": 0.2946, "step": 630 }, { "epoch": 0.013758585574068859, "grad_norm": 1.188796043395996, "learning_rate": 1.9990659939029744e-05, "loss": 0.4654, "step": 635 }, { "epoch": 0.013866920893549715, "grad_norm": 1.7238026857376099, "learning_rate": 1.999051229572516e-05, "loss": 0.4672, "step": 640 }, { "epoch": 0.013975256213030572, "grad_norm": 1.0546398162841797, "learning_rate": 1.9990363495169386e-05, "loss": 0.3583, "step": 645 }, { "epoch": 0.014083591532511429, "grad_norm": 1.219425082206726, "learning_rate": 1.9990213537379656e-05, "loss": 0.4122, "step": 650 }, { "epoch": 0.014191926851992286, "grad_norm": 0.9312684535980225, "learning_rate": 1.9990062422373345e-05, "loss": 0.3811, "step": 655 }, { "epoch": 0.014300262171473144, "grad_norm": 1.1916559934616089, "learning_rate": 1.9989910150167948e-05, "loss": 0.4151, "step": 660 }, { "epoch": 0.014408597490954, "grad_norm": 0.9022374749183655, "learning_rate": 1.9989756720781114e-05, "loss": 0.4422, "step": 665 }, { "epoch": 0.014516932810434858, "grad_norm": 0.9367965459823608, "learning_rate": 1.9989602134230607e-05, "loss": 0.5012, "step": 670 }, { "epoch": 0.014625268129915716, "grad_norm": 1.134117603302002, "learning_rate": 1.9989446390534337e-05, "loss": 0.3942, "step": 675 }, { "epoch": 0.014733603449396572, "grad_norm": 1.3154411315917969, "learning_rate": 1.9989289489710345e-05, "loss": 0.47, "step": 680 }, { "epoch": 0.01484193876887743, "grad_norm": 1.260404348373413, "learning_rate": 1.9989131431776806e-05, "loss": 0.4553, "step": 685 }, { "epoch": 0.014950274088358287, "grad_norm": 1.0467016696929932, "learning_rate": 1.9988972216752026e-05, "loss": 0.2936, "step": 690 }, { "epoch": 0.015058609407839143, "grad_norm": 1.8606239557266235, "learning_rate": 1.9988811844654453e-05, "loss": 0.5305, "step": 695 }, { "epoch": 0.015166944727320001, "grad_norm": 0.9901790022850037, "learning_rate": 1.9988650315502656e-05, "loss": 0.5156, "step": 700 }, { "epoch": 0.015275280046800859, "grad_norm": 0.6778713464736938, "learning_rate": 1.998848762931535e-05, "loss": 0.4065, "step": 705 }, { "epoch": 0.015383615366281715, "grad_norm": 0.9480881690979004, "learning_rate": 1.9988323786111383e-05, "loss": 0.5412, "step": 710 }, { "epoch": 0.015491950685762573, "grad_norm": 1.4341474771499634, "learning_rate": 1.998815878590973e-05, "loss": 0.4317, "step": 715 }, { "epoch": 0.015600286005243429, "grad_norm": 1.4374536275863647, "learning_rate": 1.99879926287295e-05, "loss": 0.5099, "step": 720 }, { "epoch": 0.015708621324724288, "grad_norm": 1.5463976860046387, "learning_rate": 1.998782531458995e-05, "loss": 0.5814, "step": 725 }, { "epoch": 0.015816956644205144, "grad_norm": 1.2605255842208862, "learning_rate": 1.9987656843510454e-05, "loss": 0.3682, "step": 730 }, { "epoch": 0.015925291963686, "grad_norm": 1.114677906036377, "learning_rate": 1.9987487215510524e-05, "loss": 0.3116, "step": 735 }, { "epoch": 0.01603362728316686, "grad_norm": 0.9495889544487, "learning_rate": 1.9987316430609817e-05, "loss": 0.4406, "step": 740 }, { "epoch": 0.016141962602647716, "grad_norm": 1.0558733940124512, "learning_rate": 1.998714448882811e-05, "loss": 0.427, "step": 745 }, { "epoch": 0.016250297922128572, "grad_norm": 1.4326279163360596, "learning_rate": 1.9986971390185318e-05, "loss": 0.3607, "step": 750 }, { "epoch": 0.016358633241609428, "grad_norm": 1.0051994323730469, "learning_rate": 1.99867971347015e-05, "loss": 0.4677, "step": 755 }, { "epoch": 0.016466968561090287, "grad_norm": 1.4996483325958252, "learning_rate": 1.998662172239683e-05, "loss": 0.4228, "step": 760 }, { "epoch": 0.016575303880571143, "grad_norm": 1.015702724456787, "learning_rate": 1.998644515329164e-05, "loss": 0.4082, "step": 765 }, { "epoch": 0.016683639200052, "grad_norm": 1.0381453037261963, "learning_rate": 1.9986267427406373e-05, "loss": 0.4602, "step": 770 }, { "epoch": 0.01679197451953286, "grad_norm": 1.1000330448150635, "learning_rate": 1.998608854476162e-05, "loss": 0.3836, "step": 775 }, { "epoch": 0.016900309839013715, "grad_norm": 1.3170807361602783, "learning_rate": 1.9985908505378098e-05, "loss": 0.4323, "step": 780 }, { "epoch": 0.01700864515849457, "grad_norm": 1.3168234825134277, "learning_rate": 1.9985727309276667e-05, "loss": 0.4306, "step": 785 }, { "epoch": 0.01711698047797543, "grad_norm": 0.7767938375473022, "learning_rate": 1.9985544956478312e-05, "loss": 0.3832, "step": 790 }, { "epoch": 0.017225315797456286, "grad_norm": 1.0829684734344482, "learning_rate": 1.9985361447004155e-05, "loss": 0.3778, "step": 795 }, { "epoch": 0.017333651116937143, "grad_norm": 1.122418999671936, "learning_rate": 1.998517678087546e-05, "loss": 0.5501, "step": 800 }, { "epoch": 0.017441986436418002, "grad_norm": 1.0314364433288574, "learning_rate": 1.998499095811361e-05, "loss": 0.4317, "step": 805 }, { "epoch": 0.017550321755898858, "grad_norm": 2.304234027862549, "learning_rate": 1.9984803978740133e-05, "loss": 0.3939, "step": 810 }, { "epoch": 0.017658657075379714, "grad_norm": 1.3427822589874268, "learning_rate": 1.9984615842776687e-05, "loss": 0.525, "step": 815 }, { "epoch": 0.017766992394860574, "grad_norm": 0.8537143468856812, "learning_rate": 1.9984426550245067e-05, "loss": 0.5633, "step": 820 }, { "epoch": 0.01787532771434143, "grad_norm": 1.269234299659729, "learning_rate": 1.9984236101167195e-05, "loss": 0.4222, "step": 825 }, { "epoch": 0.017983663033822286, "grad_norm": 1.1140320301055908, "learning_rate": 1.9984044495565138e-05, "loss": 0.5542, "step": 830 }, { "epoch": 0.018091998353303145, "grad_norm": 1.3309389352798462, "learning_rate": 1.9983851733461085e-05, "loss": 0.5145, "step": 835 }, { "epoch": 0.018200333672784, "grad_norm": 1.0464973449707031, "learning_rate": 1.9983657814877366e-05, "loss": 0.4276, "step": 840 }, { "epoch": 0.018308668992264857, "grad_norm": 0.8400686383247375, "learning_rate": 1.9983462739836444e-05, "loss": 0.3993, "step": 845 }, { "epoch": 0.018417004311745717, "grad_norm": 1.6072200536727905, "learning_rate": 1.998326650836092e-05, "loss": 0.4173, "step": 850 }, { "epoch": 0.018525339631226573, "grad_norm": 1.1744937896728516, "learning_rate": 1.9983069120473517e-05, "loss": 0.4504, "step": 855 }, { "epoch": 0.01863367495070743, "grad_norm": 0.9858243465423584, "learning_rate": 1.9982870576197103e-05, "loss": 0.3323, "step": 860 }, { "epoch": 0.01874201027018829, "grad_norm": 1.3024373054504395, "learning_rate": 1.9982670875554677e-05, "loss": 0.3702, "step": 865 }, { "epoch": 0.018850345589669144, "grad_norm": 0.8675714135169983, "learning_rate": 1.998247001856937e-05, "loss": 0.4316, "step": 870 }, { "epoch": 0.01895868090915, "grad_norm": 1.0964782238006592, "learning_rate": 1.9982268005264448e-05, "loss": 0.4357, "step": 875 }, { "epoch": 0.01906701622863086, "grad_norm": 1.0899674892425537, "learning_rate": 1.9982064835663313e-05, "loss": 0.3124, "step": 880 }, { "epoch": 0.019175351548111716, "grad_norm": 1.0709079504013062, "learning_rate": 1.99818605097895e-05, "loss": 0.3421, "step": 885 }, { "epoch": 0.019283686867592572, "grad_norm": 1.5749599933624268, "learning_rate": 1.9981655027666673e-05, "loss": 0.3337, "step": 890 }, { "epoch": 0.019392022187073428, "grad_norm": 1.0183417797088623, "learning_rate": 1.9981448389318636e-05, "loss": 0.3218, "step": 895 }, { "epoch": 0.019500357506554288, "grad_norm": 1.275662899017334, "learning_rate": 1.9981240594769324e-05, "loss": 0.5449, "step": 900 }, { "epoch": 0.019608692826035144, "grad_norm": 1.1121793985366821, "learning_rate": 1.998103164404281e-05, "loss": 0.4336, "step": 905 }, { "epoch": 0.019717028145516, "grad_norm": 1.6116878986358643, "learning_rate": 1.9980821537163295e-05, "loss": 0.4722, "step": 910 }, { "epoch": 0.01982536346499686, "grad_norm": 1.0705269575119019, "learning_rate": 1.998061027415512e-05, "loss": 0.4643, "step": 915 }, { "epoch": 0.019933698784477715, "grad_norm": 1.1509321928024292, "learning_rate": 1.998039785504275e-05, "loss": 0.4228, "step": 920 }, { "epoch": 0.02004203410395857, "grad_norm": 1.4319677352905273, "learning_rate": 1.9980184279850797e-05, "loss": 0.4418, "step": 925 }, { "epoch": 0.02015036942343943, "grad_norm": 1.023921251296997, "learning_rate": 1.9979969548604e-05, "loss": 0.367, "step": 930 }, { "epoch": 0.020258704742920287, "grad_norm": 1.7992990016937256, "learning_rate": 1.9979753661327228e-05, "loss": 0.4205, "step": 935 }, { "epoch": 0.020367040062401143, "grad_norm": 1.5278102159500122, "learning_rate": 1.9979536618045494e-05, "loss": 0.3699, "step": 940 }, { "epoch": 0.020475375381882002, "grad_norm": 1.2980040311813354, "learning_rate": 1.9979318418783934e-05, "loss": 0.4611, "step": 945 }, { "epoch": 0.02058371070136286, "grad_norm": 1.0645532608032227, "learning_rate": 1.9979099063567828e-05, "loss": 0.4633, "step": 950 }, { "epoch": 0.020692046020843714, "grad_norm": 1.012487769126892, "learning_rate": 1.9978878552422575e-05, "loss": 0.4863, "step": 955 }, { "epoch": 0.020800381340324574, "grad_norm": 0.9991220831871033, "learning_rate": 1.9978656885373733e-05, "loss": 0.3598, "step": 960 }, { "epoch": 0.02090871665980543, "grad_norm": 1.0614393949508667, "learning_rate": 1.9978434062446967e-05, "loss": 0.3673, "step": 965 }, { "epoch": 0.021017051979286286, "grad_norm": 1.2272288799285889, "learning_rate": 1.9978210083668094e-05, "loss": 0.4564, "step": 970 }, { "epoch": 0.021125387298767145, "grad_norm": 1.0558357238769531, "learning_rate": 1.9977984949063057e-05, "loss": 0.454, "step": 975 }, { "epoch": 0.021233722618248, "grad_norm": 1.1315927505493164, "learning_rate": 1.9977758658657935e-05, "loss": 0.4096, "step": 980 }, { "epoch": 0.021342057937728857, "grad_norm": 1.4288475513458252, "learning_rate": 1.9977531212478934e-05, "loss": 0.502, "step": 985 }, { "epoch": 0.021450393257209717, "grad_norm": 1.3050246238708496, "learning_rate": 1.997730261055241e-05, "loss": 0.3705, "step": 990 }, { "epoch": 0.021558728576690573, "grad_norm": 0.9655411839485168, "learning_rate": 1.9977072852904836e-05, "loss": 0.3675, "step": 995 }, { "epoch": 0.02166706389617143, "grad_norm": 1.385482907295227, "learning_rate": 1.9976841939562827e-05, "loss": 0.4135, "step": 1000 }, { "epoch": 0.02177539921565229, "grad_norm": 1.1751946210861206, "learning_rate": 1.9976609870553137e-05, "loss": 0.452, "step": 1005 }, { "epoch": 0.021883734535133145, "grad_norm": 1.0693477392196655, "learning_rate": 1.997637664590264e-05, "loss": 0.4236, "step": 1010 }, { "epoch": 0.021992069854614, "grad_norm": 1.0520353317260742, "learning_rate": 1.9976142265638357e-05, "loss": 0.4391, "step": 1015 }, { "epoch": 0.02210040517409486, "grad_norm": 0.9417238235473633, "learning_rate": 1.9975906729787432e-05, "loss": 0.5044, "step": 1020 }, { "epoch": 0.022208740493575716, "grad_norm": 1.1064977645874023, "learning_rate": 1.9975670038377152e-05, "loss": 0.4808, "step": 1025 }, { "epoch": 0.022317075813056572, "grad_norm": 1.0218169689178467, "learning_rate": 1.9975432191434934e-05, "loss": 0.3665, "step": 1030 }, { "epoch": 0.022425411132537428, "grad_norm": 1.2397944927215576, "learning_rate": 1.9975193188988333e-05, "loss": 0.4136, "step": 1035 }, { "epoch": 0.022533746452018288, "grad_norm": 1.0804662704467773, "learning_rate": 1.9974953031065024e-05, "loss": 0.4653, "step": 1040 }, { "epoch": 0.022642081771499144, "grad_norm": 1.1134520769119263, "learning_rate": 1.9974711717692833e-05, "loss": 0.4835, "step": 1045 }, { "epoch": 0.02275041709098, "grad_norm": 1.5130475759506226, "learning_rate": 1.9974469248899712e-05, "loss": 0.4524, "step": 1050 }, { "epoch": 0.02285875241046086, "grad_norm": 1.0863401889801025, "learning_rate": 1.9974225624713742e-05, "loss": 0.4264, "step": 1055 }, { "epoch": 0.022967087729941715, "grad_norm": 0.9638212323188782, "learning_rate": 1.997398084516315e-05, "loss": 0.4512, "step": 1060 }, { "epoch": 0.02307542304942257, "grad_norm": 0.968042254447937, "learning_rate": 1.997373491027629e-05, "loss": 0.4408, "step": 1065 }, { "epoch": 0.02318375836890343, "grad_norm": 0.982221245765686, "learning_rate": 1.9973487820081642e-05, "loss": 0.3798, "step": 1070 }, { "epoch": 0.023292093688384287, "grad_norm": 0.7894834876060486, "learning_rate": 1.9973239574607833e-05, "loss": 0.4496, "step": 1075 }, { "epoch": 0.023400429007865143, "grad_norm": 1.1354342699050903, "learning_rate": 1.9972990173883625e-05, "loss": 0.3527, "step": 1080 }, { "epoch": 0.023508764327346002, "grad_norm": 1.3024243116378784, "learning_rate": 1.9972739617937894e-05, "loss": 0.4065, "step": 1085 }, { "epoch": 0.02361709964682686, "grad_norm": 1.231086015701294, "learning_rate": 1.9972487906799673e-05, "loss": 0.3898, "step": 1090 }, { "epoch": 0.023725434966307715, "grad_norm": 0.8983778357505798, "learning_rate": 1.9972235040498116e-05, "loss": 0.408, "step": 1095 }, { "epoch": 0.023833770285788574, "grad_norm": 1.219935417175293, "learning_rate": 1.9971981019062513e-05, "loss": 0.4673, "step": 1100 }, { "epoch": 0.02394210560526943, "grad_norm": 1.4583091735839844, "learning_rate": 1.997172584252229e-05, "loss": 0.4203, "step": 1105 }, { "epoch": 0.024050440924750286, "grad_norm": 1.2980668544769287, "learning_rate": 1.9971469510907003e-05, "loss": 0.434, "step": 1110 }, { "epoch": 0.024158776244231146, "grad_norm": 0.9257343411445618, "learning_rate": 1.9971212024246344e-05, "loss": 0.3736, "step": 1115 }, { "epoch": 0.024267111563712, "grad_norm": 1.0681817531585693, "learning_rate": 1.9970953382570143e-05, "loss": 0.4415, "step": 1120 }, { "epoch": 0.024375446883192858, "grad_norm": 1.149304986000061, "learning_rate": 1.997069358590836e-05, "loss": 0.4698, "step": 1125 }, { "epoch": 0.024483782202673717, "grad_norm": 1.1269619464874268, "learning_rate": 1.997043263429108e-05, "loss": 0.4397, "step": 1130 }, { "epoch": 0.024592117522154573, "grad_norm": 1.1347397565841675, "learning_rate": 1.9970170527748542e-05, "loss": 0.4757, "step": 1135 }, { "epoch": 0.02470045284163543, "grad_norm": 1.184895396232605, "learning_rate": 1.9969907266311098e-05, "loss": 0.4609, "step": 1140 }, { "epoch": 0.02480878816111629, "grad_norm": 0.9468145370483398, "learning_rate": 1.996964285000925e-05, "loss": 0.4684, "step": 1145 }, { "epoch": 0.024917123480597145, "grad_norm": 1.0595066547393799, "learning_rate": 1.996937727887362e-05, "loss": 0.4221, "step": 1150 }, { "epoch": 0.025025458800078, "grad_norm": 0.9468479156494141, "learning_rate": 1.9969110552934975e-05, "loss": 0.289, "step": 1155 }, { "epoch": 0.02513379411955886, "grad_norm": 0.9305704236030579, "learning_rate": 1.9968842672224208e-05, "loss": 0.36, "step": 1160 }, { "epoch": 0.025242129439039716, "grad_norm": 1.1895065307617188, "learning_rate": 1.9968573636772356e-05, "loss": 0.5376, "step": 1165 }, { "epoch": 0.025350464758520572, "grad_norm": 0.9440916776657104, "learning_rate": 1.996830344661057e-05, "loss": 0.3281, "step": 1170 }, { "epoch": 0.02545880007800143, "grad_norm": 1.0400093793869019, "learning_rate": 1.996803210177016e-05, "loss": 0.4201, "step": 1175 }, { "epoch": 0.025567135397482288, "grad_norm": 0.898118793964386, "learning_rate": 1.9967759602282547e-05, "loss": 0.3819, "step": 1180 }, { "epoch": 0.025675470716963144, "grad_norm": 1.4812995195388794, "learning_rate": 1.9967485948179307e-05, "loss": 0.4532, "step": 1185 }, { "epoch": 0.025783806036444, "grad_norm": 1.147014856338501, "learning_rate": 1.9967211139492128e-05, "loss": 0.3677, "step": 1190 }, { "epoch": 0.02589214135592486, "grad_norm": 1.1625664234161377, "learning_rate": 1.996693517625285e-05, "loss": 0.5529, "step": 1195 }, { "epoch": 0.026000476675405716, "grad_norm": 1.1865737438201904, "learning_rate": 1.9966658058493437e-05, "loss": 0.4916, "step": 1200 }, { "epoch": 0.02610881199488657, "grad_norm": 1.0288282632827759, "learning_rate": 1.9966379786245985e-05, "loss": 0.4839, "step": 1205 }, { "epoch": 0.02621714731436743, "grad_norm": 1.0798232555389404, "learning_rate": 1.9966100359542737e-05, "loss": 0.4464, "step": 1210 }, { "epoch": 0.026325482633848287, "grad_norm": 1.2994465827941895, "learning_rate": 1.996581977841605e-05, "loss": 0.4326, "step": 1215 }, { "epoch": 0.026433817953329143, "grad_norm": 0.8968429565429688, "learning_rate": 1.9965538042898432e-05, "loss": 0.4216, "step": 1220 }, { "epoch": 0.026542153272810003, "grad_norm": 1.0967695713043213, "learning_rate": 1.9965255153022513e-05, "loss": 0.4731, "step": 1225 }, { "epoch": 0.02665048859229086, "grad_norm": 0.6993317008018494, "learning_rate": 1.9964971108821064e-05, "loss": 0.3175, "step": 1230 }, { "epoch": 0.026758823911771715, "grad_norm": 1.2557475566864014, "learning_rate": 1.996468591032699e-05, "loss": 0.3313, "step": 1235 }, { "epoch": 0.026867159231252574, "grad_norm": 1.3732300996780396, "learning_rate": 1.996439955757332e-05, "loss": 0.4361, "step": 1240 }, { "epoch": 0.02697549455073343, "grad_norm": 1.115757703781128, "learning_rate": 1.9964112050593234e-05, "loss": 0.463, "step": 1245 }, { "epoch": 0.027083829870214286, "grad_norm": 1.2628432512283325, "learning_rate": 1.9963823389420026e-05, "loss": 0.474, "step": 1250 }, { "epoch": 0.027192165189695146, "grad_norm": 1.2146614789962769, "learning_rate": 1.9963533574087137e-05, "loss": 0.3863, "step": 1255 }, { "epoch": 0.027300500509176002, "grad_norm": 1.3543164730072021, "learning_rate": 1.9963242604628137e-05, "loss": 0.3849, "step": 1260 }, { "epoch": 0.027408835828656858, "grad_norm": 1.1259393692016602, "learning_rate": 1.996295048107673e-05, "loss": 0.4769, "step": 1265 }, { "epoch": 0.027517171148137717, "grad_norm": 1.1550183296203613, "learning_rate": 1.9962657203466757e-05, "loss": 0.4641, "step": 1270 }, { "epoch": 0.027625506467618573, "grad_norm": 1.4144526720046997, "learning_rate": 1.9962362771832183e-05, "loss": 0.3549, "step": 1275 }, { "epoch": 0.02773384178709943, "grad_norm": 1.2631131410598755, "learning_rate": 1.996206718620712e-05, "loss": 0.4882, "step": 1280 }, { "epoch": 0.02784217710658029, "grad_norm": 1.3473711013793945, "learning_rate": 1.9961770446625806e-05, "loss": 0.3662, "step": 1285 }, { "epoch": 0.027950512426061145, "grad_norm": 0.8654823303222656, "learning_rate": 1.9961472553122613e-05, "loss": 0.4384, "step": 1290 }, { "epoch": 0.028058847745542, "grad_norm": 1.0932592153549194, "learning_rate": 1.9961173505732047e-05, "loss": 0.3942, "step": 1295 }, { "epoch": 0.028167183065022857, "grad_norm": 0.9938560724258423, "learning_rate": 1.996087330448875e-05, "loss": 0.4756, "step": 1300 }, { "epoch": 0.028275518384503717, "grad_norm": 0.9196484088897705, "learning_rate": 1.9960571949427495e-05, "loss": 0.4808, "step": 1305 }, { "epoch": 0.028383853703984573, "grad_norm": 1.1203153133392334, "learning_rate": 1.996026944058319e-05, "loss": 0.5105, "step": 1310 }, { "epoch": 0.02849218902346543, "grad_norm": 0.8060592412948608, "learning_rate": 1.995996577799087e-05, "loss": 0.4694, "step": 1315 }, { "epoch": 0.028600524342946288, "grad_norm": 1.021653652191162, "learning_rate": 1.995966096168572e-05, "loss": 0.3551, "step": 1320 }, { "epoch": 0.028708859662427144, "grad_norm": 1.1935774087905884, "learning_rate": 1.9959354991703042e-05, "loss": 0.394, "step": 1325 }, { "epoch": 0.028817194981908, "grad_norm": 0.983348548412323, "learning_rate": 1.9959047868078275e-05, "loss": 0.4537, "step": 1330 }, { "epoch": 0.02892553030138886, "grad_norm": 1.3267273902893066, "learning_rate": 1.9958739590847e-05, "loss": 0.4062, "step": 1335 }, { "epoch": 0.029033865620869716, "grad_norm": 0.9921041131019592, "learning_rate": 1.995843016004493e-05, "loss": 0.3907, "step": 1340 }, { "epoch": 0.029142200940350572, "grad_norm": 0.9055088758468628, "learning_rate": 1.9958119575707904e-05, "loss": 0.35, "step": 1345 }, { "epoch": 0.02925053625983143, "grad_norm": 1.1416926383972168, "learning_rate": 1.9957807837871893e-05, "loss": 0.4631, "step": 1350 }, { "epoch": 0.029358871579312287, "grad_norm": 0.9327611327171326, "learning_rate": 1.9957494946573014e-05, "loss": 0.4587, "step": 1355 }, { "epoch": 0.029467206898793143, "grad_norm": 0.893851101398468, "learning_rate": 1.995718090184751e-05, "loss": 0.3821, "step": 1360 }, { "epoch": 0.029575542218274003, "grad_norm": 1.162205696105957, "learning_rate": 1.9956865703731757e-05, "loss": 0.3485, "step": 1365 }, { "epoch": 0.02968387753775486, "grad_norm": 1.0740337371826172, "learning_rate": 1.9956549352262265e-05, "loss": 0.5327, "step": 1370 }, { "epoch": 0.029792212857235715, "grad_norm": 1.514835238456726, "learning_rate": 1.9956231847475676e-05, "loss": 0.4209, "step": 1375 }, { "epoch": 0.029900548176716574, "grad_norm": 1.0505120754241943, "learning_rate": 1.9955913189408777e-05, "loss": 0.43, "step": 1380 }, { "epoch": 0.03000888349619743, "grad_norm": 0.9178633093833923, "learning_rate": 1.9955593378098472e-05, "loss": 0.3336, "step": 1385 }, { "epoch": 0.030117218815678286, "grad_norm": 0.8407590389251709, "learning_rate": 1.995527241358181e-05, "loss": 0.4236, "step": 1390 }, { "epoch": 0.030225554135159146, "grad_norm": 0.9512525200843811, "learning_rate": 1.995495029589597e-05, "loss": 0.4621, "step": 1395 }, { "epoch": 0.030333889454640002, "grad_norm": 1.0433675050735474, "learning_rate": 1.995462702507826e-05, "loss": 0.3003, "step": 1400 }, { "epoch": 0.030442224774120858, "grad_norm": 1.1571779251098633, "learning_rate": 1.9954302601166132e-05, "loss": 0.4218, "step": 1405 }, { "epoch": 0.030550560093601718, "grad_norm": 0.9107965230941772, "learning_rate": 1.9953977024197163e-05, "loss": 0.3635, "step": 1410 }, { "epoch": 0.030658895413082574, "grad_norm": 1.0310231447219849, "learning_rate": 1.9953650294209063e-05, "loss": 0.3839, "step": 1415 }, { "epoch": 0.03076723073256343, "grad_norm": 1.1820430755615234, "learning_rate": 1.9953322411239688e-05, "loss": 0.3819, "step": 1420 }, { "epoch": 0.03087556605204429, "grad_norm": 1.9753533601760864, "learning_rate": 1.995299337532701e-05, "loss": 0.4796, "step": 1425 }, { "epoch": 0.030983901371525145, "grad_norm": 1.202282190322876, "learning_rate": 1.9952663186509142e-05, "loss": 0.4373, "step": 1430 }, { "epoch": 0.031092236691006, "grad_norm": 1.4368435144424438, "learning_rate": 1.9952331844824336e-05, "loss": 0.3943, "step": 1435 }, { "epoch": 0.031200572010486857, "grad_norm": 1.135152816772461, "learning_rate": 1.9951999350310976e-05, "loss": 0.4939, "step": 1440 }, { "epoch": 0.03130890732996772, "grad_norm": 0.8869400024414062, "learning_rate": 1.9951665703007566e-05, "loss": 0.4853, "step": 1445 }, { "epoch": 0.031417242649448576, "grad_norm": 0.9000290036201477, "learning_rate": 1.9951330902952763e-05, "loss": 0.3566, "step": 1450 }, { "epoch": 0.03152557796892943, "grad_norm": 0.8023254871368408, "learning_rate": 1.995099495018535e-05, "loss": 0.5776, "step": 1455 }, { "epoch": 0.03163391328841029, "grad_norm": 1.3526217937469482, "learning_rate": 1.995065784474423e-05, "loss": 0.4739, "step": 1460 }, { "epoch": 0.03174224860789115, "grad_norm": 1.1238371133804321, "learning_rate": 1.9950319586668466e-05, "loss": 0.3734, "step": 1465 }, { "epoch": 0.031850583927372, "grad_norm": 1.4496561288833618, "learning_rate": 1.994998017599723e-05, "loss": 0.4042, "step": 1470 }, { "epoch": 0.03195891924685286, "grad_norm": 1.586187481880188, "learning_rate": 1.9949639612769844e-05, "loss": 0.4185, "step": 1475 }, { "epoch": 0.03206725456633372, "grad_norm": 0.8981671929359436, "learning_rate": 1.9949297897025754e-05, "loss": 0.3369, "step": 1480 }, { "epoch": 0.03217558988581457, "grad_norm": 1.0952513217926025, "learning_rate": 1.9948955028804543e-05, "loss": 0.4011, "step": 1485 }, { "epoch": 0.03228392520529543, "grad_norm": 1.1206063032150269, "learning_rate": 1.9948611008145927e-05, "loss": 0.4337, "step": 1490 }, { "epoch": 0.03239226052477629, "grad_norm": 0.9773597121238708, "learning_rate": 1.9948265835089753e-05, "loss": 0.4917, "step": 1495 }, { "epoch": 0.032500595844257144, "grad_norm": 1.2511464357376099, "learning_rate": 1.994791950967601e-05, "loss": 0.3896, "step": 1500 }, { "epoch": 0.032608931163738, "grad_norm": 0.9108788371086121, "learning_rate": 1.994757203194481e-05, "loss": 0.4454, "step": 1505 }, { "epoch": 0.032717266483218856, "grad_norm": 0.8698796629905701, "learning_rate": 1.9947223401936406e-05, "loss": 0.3503, "step": 1510 }, { "epoch": 0.032825601802699715, "grad_norm": 1.0829691886901855, "learning_rate": 1.994687361969118e-05, "loss": 0.3208, "step": 1515 }, { "epoch": 0.032933937122180575, "grad_norm": 1.0140756368637085, "learning_rate": 1.994652268524965e-05, "loss": 0.4013, "step": 1520 }, { "epoch": 0.03304227244166143, "grad_norm": 1.0206973552703857, "learning_rate": 1.9946170598652465e-05, "loss": 0.4594, "step": 1525 }, { "epoch": 0.03315060776114229, "grad_norm": 1.1016652584075928, "learning_rate": 1.9945817359940408e-05, "loss": 0.3694, "step": 1530 }, { "epoch": 0.033258943080623146, "grad_norm": 0.8749850988388062, "learning_rate": 1.99454629691544e-05, "loss": 0.5139, "step": 1535 }, { "epoch": 0.033367278400104, "grad_norm": 1.3254839181900024, "learning_rate": 1.994510742633549e-05, "loss": 0.4487, "step": 1540 }, { "epoch": 0.03347561371958486, "grad_norm": 1.0854792594909668, "learning_rate": 1.9944750731524866e-05, "loss": 0.4007, "step": 1545 }, { "epoch": 0.03358394903906572, "grad_norm": 0.8947243690490723, "learning_rate": 1.9944392884763837e-05, "loss": 0.3374, "step": 1550 }, { "epoch": 0.03369228435854657, "grad_norm": 0.8696319460868835, "learning_rate": 1.9944033886093863e-05, "loss": 0.4536, "step": 1555 }, { "epoch": 0.03380061967802743, "grad_norm": 0.930271565914154, "learning_rate": 1.9943673735556523e-05, "loss": 0.4201, "step": 1560 }, { "epoch": 0.03390895499750829, "grad_norm": 0.99686199426651, "learning_rate": 1.9943312433193536e-05, "loss": 0.4681, "step": 1565 }, { "epoch": 0.03401729031698914, "grad_norm": 1.0717865228652954, "learning_rate": 1.9942949979046755e-05, "loss": 0.3583, "step": 1570 }, { "epoch": 0.03412562563647, "grad_norm": 1.3879432678222656, "learning_rate": 1.9942586373158162e-05, "loss": 0.3998, "step": 1575 }, { "epoch": 0.03423396095595086, "grad_norm": 0.7827798128128052, "learning_rate": 1.9942221615569882e-05, "loss": 0.3178, "step": 1580 }, { "epoch": 0.034342296275431713, "grad_norm": 1.0907423496246338, "learning_rate": 1.994185570632416e-05, "loss": 0.4028, "step": 1585 }, { "epoch": 0.03445063159491257, "grad_norm": 1.2317733764648438, "learning_rate": 1.994148864546338e-05, "loss": 0.3709, "step": 1590 }, { "epoch": 0.03455896691439343, "grad_norm": 0.9094792008399963, "learning_rate": 1.994112043303007e-05, "loss": 0.3754, "step": 1595 }, { "epoch": 0.034667302233874285, "grad_norm": 1.1893261671066284, "learning_rate": 1.9940751069066873e-05, "loss": 0.3626, "step": 1600 }, { "epoch": 0.034775637553355145, "grad_norm": 1.0244930982589722, "learning_rate": 1.994038055361658e-05, "loss": 0.5156, "step": 1605 }, { "epoch": 0.034883972872836004, "grad_norm": 1.1178492307662964, "learning_rate": 1.9940008886722103e-05, "loss": 0.4256, "step": 1610 }, { "epoch": 0.03499230819231686, "grad_norm": 0.9077433347702026, "learning_rate": 1.99396360684265e-05, "loss": 0.3871, "step": 1615 }, { "epoch": 0.035100643511797716, "grad_norm": 0.8432390689849854, "learning_rate": 1.9939262098772952e-05, "loss": 0.3527, "step": 1620 }, { "epoch": 0.035208978831278576, "grad_norm": 1.039941430091858, "learning_rate": 1.9938886977804783e-05, "loss": 0.4695, "step": 1625 }, { "epoch": 0.03531731415075943, "grad_norm": 0.9640547037124634, "learning_rate": 1.993851070556544e-05, "loss": 0.4513, "step": 1630 }, { "epoch": 0.03542564947024029, "grad_norm": 0.8631457686424255, "learning_rate": 1.9938133282098514e-05, "loss": 0.4981, "step": 1635 }, { "epoch": 0.03553398478972115, "grad_norm": 0.9832780361175537, "learning_rate": 1.9937754707447716e-05, "loss": 0.4676, "step": 1640 }, { "epoch": 0.035642320109202, "grad_norm": 1.1794817447662354, "learning_rate": 1.9937374981656907e-05, "loss": 0.4308, "step": 1645 }, { "epoch": 0.03575065542868286, "grad_norm": 0.9472767114639282, "learning_rate": 1.9936994104770064e-05, "loss": 0.3598, "step": 1650 }, { "epoch": 0.03585899074816372, "grad_norm": 1.1849220991134644, "learning_rate": 1.993661207683131e-05, "loss": 0.2965, "step": 1655 }, { "epoch": 0.03596732606764457, "grad_norm": 1.0793379545211792, "learning_rate": 1.99362288978849e-05, "loss": 0.4113, "step": 1660 }, { "epoch": 0.03607566138712543, "grad_norm": 1.1519927978515625, "learning_rate": 1.9935844567975215e-05, "loss": 0.452, "step": 1665 }, { "epoch": 0.03618399670660629, "grad_norm": 0.7120981216430664, "learning_rate": 1.9935459087146775e-05, "loss": 0.4043, "step": 1670 }, { "epoch": 0.03629233202608714, "grad_norm": 0.8816445469856262, "learning_rate": 1.9935072455444237e-05, "loss": 0.3394, "step": 1675 }, { "epoch": 0.036400667345568, "grad_norm": 1.3873337507247925, "learning_rate": 1.993468467291238e-05, "loss": 0.3626, "step": 1680 }, { "epoch": 0.03650900266504886, "grad_norm": 1.1270328760147095, "learning_rate": 1.9934295739596125e-05, "loss": 0.478, "step": 1685 }, { "epoch": 0.036617337984529715, "grad_norm": 1.150001883506775, "learning_rate": 1.9933905655540525e-05, "loss": 0.4339, "step": 1690 }, { "epoch": 0.036725673304010574, "grad_norm": 1.0805400609970093, "learning_rate": 1.9933514420790762e-05, "loss": 0.4274, "step": 1695 }, { "epoch": 0.036834008623491434, "grad_norm": 1.4286655187606812, "learning_rate": 1.993312203539216e-05, "loss": 0.4049, "step": 1700 }, { "epoch": 0.036942343942972286, "grad_norm": 1.2814252376556396, "learning_rate": 1.9932728499390168e-05, "loss": 0.3977, "step": 1705 }, { "epoch": 0.037050679262453146, "grad_norm": 1.1724668741226196, "learning_rate": 1.9932333812830373e-05, "loss": 0.4825, "step": 1710 }, { "epoch": 0.037159014581934005, "grad_norm": 1.2857450246810913, "learning_rate": 1.993193797575849e-05, "loss": 0.4071, "step": 1715 }, { "epoch": 0.03726734990141486, "grad_norm": 1.4411669969558716, "learning_rate": 1.9931540988220373e-05, "loss": 0.3944, "step": 1720 }, { "epoch": 0.03737568522089572, "grad_norm": 1.3490899801254272, "learning_rate": 1.9931142850262007e-05, "loss": 0.3536, "step": 1725 }, { "epoch": 0.03748402054037658, "grad_norm": 1.0694905519485474, "learning_rate": 1.9930743561929514e-05, "loss": 0.4648, "step": 1730 }, { "epoch": 0.03759235585985743, "grad_norm": 0.7808147072792053, "learning_rate": 1.9930343123269137e-05, "loss": 0.4042, "step": 1735 }, { "epoch": 0.03770069117933829, "grad_norm": 1.2061059474945068, "learning_rate": 1.9929941534327268e-05, "loss": 0.4363, "step": 1740 }, { "epoch": 0.03780902649881915, "grad_norm": 0.853563129901886, "learning_rate": 1.9929538795150424e-05, "loss": 0.489, "step": 1745 }, { "epoch": 0.0379173618183, "grad_norm": 1.279189944267273, "learning_rate": 1.992913490578525e-05, "loss": 0.4354, "step": 1750 }, { "epoch": 0.03802569713778086, "grad_norm": 1.0473911762237549, "learning_rate": 1.992872986627854e-05, "loss": 0.4914, "step": 1755 }, { "epoch": 0.03813403245726172, "grad_norm": 1.1861108541488647, "learning_rate": 1.992832367667721e-05, "loss": 0.3928, "step": 1760 }, { "epoch": 0.03824236777674257, "grad_norm": 0.853769063949585, "learning_rate": 1.9927916337028304e-05, "loss": 0.3819, "step": 1765 }, { "epoch": 0.03835070309622343, "grad_norm": 1.102841854095459, "learning_rate": 1.9927507847379012e-05, "loss": 0.4726, "step": 1770 }, { "epoch": 0.03845903841570429, "grad_norm": 1.160768985748291, "learning_rate": 1.992709820777665e-05, "loss": 0.4266, "step": 1775 }, { "epoch": 0.038567373735185144, "grad_norm": 1.0815297365188599, "learning_rate": 1.9926687418268665e-05, "loss": 0.4484, "step": 1780 }, { "epoch": 0.038675709054666, "grad_norm": 0.9362607002258301, "learning_rate": 1.9926275478902644e-05, "loss": 0.4592, "step": 1785 }, { "epoch": 0.038784044374146856, "grad_norm": 1.0468387603759766, "learning_rate": 1.9925862389726308e-05, "loss": 0.4185, "step": 1790 }, { "epoch": 0.038892379693627716, "grad_norm": 0.982815682888031, "learning_rate": 1.9925448150787506e-05, "loss": 0.3909, "step": 1795 }, { "epoch": 0.039000715013108575, "grad_norm": 0.8285172581672668, "learning_rate": 1.9925032762134216e-05, "loss": 0.3734, "step": 1800 }, { "epoch": 0.03910905033258943, "grad_norm": 1.0176348686218262, "learning_rate": 1.9924616223814558e-05, "loss": 0.4176, "step": 1805 }, { "epoch": 0.03921738565207029, "grad_norm": 0.7655782103538513, "learning_rate": 1.992419853587678e-05, "loss": 0.3635, "step": 1810 }, { "epoch": 0.03932572097155115, "grad_norm": 1.3702540397644043, "learning_rate": 1.9923779698369266e-05, "loss": 0.3911, "step": 1815 }, { "epoch": 0.039434056291032, "grad_norm": 1.2923216819763184, "learning_rate": 1.9923359711340533e-05, "loss": 0.4544, "step": 1820 }, { "epoch": 0.03954239161051286, "grad_norm": 1.0061962604522705, "learning_rate": 1.9922938574839227e-05, "loss": 0.4997, "step": 1825 }, { "epoch": 0.03965072692999372, "grad_norm": 1.0312678813934326, "learning_rate": 1.9922516288914137e-05, "loss": 0.3801, "step": 1830 }, { "epoch": 0.03975906224947457, "grad_norm": 0.9500500559806824, "learning_rate": 1.992209285361417e-05, "loss": 0.3838, "step": 1835 }, { "epoch": 0.03986739756895543, "grad_norm": 1.2118014097213745, "learning_rate": 1.9921668268988382e-05, "loss": 0.3743, "step": 1840 }, { "epoch": 0.03997573288843629, "grad_norm": 0.939135730266571, "learning_rate": 1.9921242535085952e-05, "loss": 0.3185, "step": 1845 }, { "epoch": 0.04008406820791714, "grad_norm": 0.9313821196556091, "learning_rate": 1.9920815651956193e-05, "loss": 0.4004, "step": 1850 }, { "epoch": 0.040192403527398, "grad_norm": 1.1904656887054443, "learning_rate": 1.9920387619648553e-05, "loss": 0.3515, "step": 1855 }, { "epoch": 0.04030073884687886, "grad_norm": 1.4162958860397339, "learning_rate": 1.9919958438212618e-05, "loss": 0.4189, "step": 1860 }, { "epoch": 0.040409074166359714, "grad_norm": 1.2255293130874634, "learning_rate": 1.9919528107698096e-05, "loss": 0.4732, "step": 1865 }, { "epoch": 0.04051740948584057, "grad_norm": 0.7517035007476807, "learning_rate": 1.991909662815484e-05, "loss": 0.3185, "step": 1870 }, { "epoch": 0.04062574480532143, "grad_norm": 0.8276292681694031, "learning_rate": 1.9918663999632826e-05, "loss": 0.385, "step": 1875 }, { "epoch": 0.040734080124802285, "grad_norm": 1.54827880859375, "learning_rate": 1.991823022218217e-05, "loss": 0.5742, "step": 1880 }, { "epoch": 0.040842415444283145, "grad_norm": 1.067118763923645, "learning_rate": 1.9917795295853115e-05, "loss": 0.3768, "step": 1885 }, { "epoch": 0.040950750763764004, "grad_norm": 0.8015969395637512, "learning_rate": 1.9917359220696045e-05, "loss": 0.4028, "step": 1890 }, { "epoch": 0.04105908608324486, "grad_norm": 1.0034445524215698, "learning_rate": 1.9916921996761474e-05, "loss": 0.229, "step": 1895 }, { "epoch": 0.04116742140272572, "grad_norm": 0.937673032283783, "learning_rate": 1.9916483624100044e-05, "loss": 0.342, "step": 1900 }, { "epoch": 0.041275756722206576, "grad_norm": 1.1561015844345093, "learning_rate": 1.9916044102762533e-05, "loss": 0.4548, "step": 1905 }, { "epoch": 0.04138409204168743, "grad_norm": 1.1486231088638306, "learning_rate": 1.9915603432799858e-05, "loss": 0.5615, "step": 1910 }, { "epoch": 0.04149242736116829, "grad_norm": 0.9741374254226685, "learning_rate": 1.9915161614263057e-05, "loss": 0.4534, "step": 1915 }, { "epoch": 0.04160076268064915, "grad_norm": 1.5958127975463867, "learning_rate": 1.9914718647203316e-05, "loss": 0.4897, "step": 1920 }, { "epoch": 0.04170909800013, "grad_norm": 1.2432211637496948, "learning_rate": 1.991427453167194e-05, "loss": 0.3477, "step": 1925 }, { "epoch": 0.04181743331961086, "grad_norm": 0.97201007604599, "learning_rate": 1.9913829267720377e-05, "loss": 0.4229, "step": 1930 }, { "epoch": 0.04192576863909172, "grad_norm": 1.2913364171981812, "learning_rate": 1.9913382855400202e-05, "loss": 0.3805, "step": 1935 }, { "epoch": 0.04203410395857257, "grad_norm": 1.186529517173767, "learning_rate": 1.9912935294763125e-05, "loss": 0.481, "step": 1940 }, { "epoch": 0.04214243927805343, "grad_norm": 0.9556075930595398, "learning_rate": 1.9912486585860988e-05, "loss": 0.389, "step": 1945 }, { "epoch": 0.04225077459753429, "grad_norm": 0.9400811791419983, "learning_rate": 1.9912036728745772e-05, "loss": 0.3183, "step": 1950 }, { "epoch": 0.04235910991701514, "grad_norm": 1.3954319953918457, "learning_rate": 1.9911585723469583e-05, "loss": 0.4696, "step": 1955 }, { "epoch": 0.042467445236496, "grad_norm": 0.8756564855575562, "learning_rate": 1.9911133570084663e-05, "loss": 0.3388, "step": 1960 }, { "epoch": 0.04257578055597686, "grad_norm": 1.1999200582504272, "learning_rate": 1.9910680268643388e-05, "loss": 0.477, "step": 1965 }, { "epoch": 0.042684115875457715, "grad_norm": 0.9603111147880554, "learning_rate": 1.9910225819198265e-05, "loss": 0.4197, "step": 1970 }, { "epoch": 0.042792451194938574, "grad_norm": 0.7810286283493042, "learning_rate": 1.9909770221801935e-05, "loss": 0.3447, "step": 1975 }, { "epoch": 0.042900786514419434, "grad_norm": 0.863334059715271, "learning_rate": 1.9909313476507175e-05, "loss": 0.3728, "step": 1980 }, { "epoch": 0.043009121833900286, "grad_norm": 1.0504206418991089, "learning_rate": 1.990885558336689e-05, "loss": 0.3561, "step": 1985 }, { "epoch": 0.043117457153381146, "grad_norm": 0.990695059299469, "learning_rate": 1.9908396542434122e-05, "loss": 0.44, "step": 1990 }, { "epoch": 0.043225792472862005, "grad_norm": 1.1435505151748657, "learning_rate": 1.9907936353762038e-05, "loss": 0.4907, "step": 1995 }, { "epoch": 0.04333412779234286, "grad_norm": 1.1921627521514893, "learning_rate": 1.9907475017403952e-05, "loss": 0.4665, "step": 2000 }, { "epoch": 0.04344246311182372, "grad_norm": 1.0187792778015137, "learning_rate": 1.99070125334133e-05, "loss": 0.3086, "step": 2005 }, { "epoch": 0.04355079843130458, "grad_norm": 0.7352256178855896, "learning_rate": 1.9906548901843648e-05, "loss": 0.3832, "step": 2010 }, { "epoch": 0.04365913375078543, "grad_norm": 0.9807059168815613, "learning_rate": 1.990608412274871e-05, "loss": 0.3219, "step": 2015 }, { "epoch": 0.04376746907026629, "grad_norm": 0.8445367813110352, "learning_rate": 1.9905618196182318e-05, "loss": 0.3848, "step": 2020 }, { "epoch": 0.04387580438974715, "grad_norm": 1.1381770372390747, "learning_rate": 1.9905151122198445e-05, "loss": 0.415, "step": 2025 }, { "epoch": 0.043984139709228, "grad_norm": 0.9177400469779968, "learning_rate": 1.9904682900851195e-05, "loss": 0.3823, "step": 2030 }, { "epoch": 0.04409247502870886, "grad_norm": 0.8941115736961365, "learning_rate": 1.99042135321948e-05, "loss": 0.3098, "step": 2035 }, { "epoch": 0.04420081034818972, "grad_norm": 1.0041824579238892, "learning_rate": 1.9903743016283634e-05, "loss": 0.4055, "step": 2040 }, { "epoch": 0.04430914566767057, "grad_norm": 1.0302138328552246, "learning_rate": 1.9903271353172197e-05, "loss": 0.4645, "step": 2045 }, { "epoch": 0.04441748098715143, "grad_norm": 1.1581999063491821, "learning_rate": 1.9902798542915125e-05, "loss": 0.5094, "step": 2050 }, { "epoch": 0.044525816306632285, "grad_norm": 0.7069829702377319, "learning_rate": 1.9902324585567185e-05, "loss": 0.3865, "step": 2055 }, { "epoch": 0.044634151626113144, "grad_norm": 1.110148549079895, "learning_rate": 1.9901849481183277e-05, "loss": 0.4177, "step": 2060 }, { "epoch": 0.044742486945594004, "grad_norm": 0.9363313913345337, "learning_rate": 1.9901373229818438e-05, "loss": 0.4149, "step": 2065 }, { "epoch": 0.044850822265074856, "grad_norm": 1.4804904460906982, "learning_rate": 1.9900895831527837e-05, "loss": 0.4895, "step": 2070 }, { "epoch": 0.044959157584555716, "grad_norm": 1.2684965133666992, "learning_rate": 1.9900417286366766e-05, "loss": 0.3939, "step": 2075 }, { "epoch": 0.045067492904036575, "grad_norm": 1.1547921895980835, "learning_rate": 1.989993759439066e-05, "loss": 0.5144, "step": 2080 }, { "epoch": 0.04517582822351743, "grad_norm": 1.0706701278686523, "learning_rate": 1.9899456755655085e-05, "loss": 0.3771, "step": 2085 }, { "epoch": 0.04528416354299829, "grad_norm": 1.2325979471206665, "learning_rate": 1.989897477021574e-05, "loss": 0.4583, "step": 2090 }, { "epoch": 0.04539249886247915, "grad_norm": 1.5492217540740967, "learning_rate": 1.989849163812845e-05, "loss": 0.3792, "step": 2095 }, { "epoch": 0.04550083418196, "grad_norm": 1.142044186592102, "learning_rate": 1.9898007359449186e-05, "loss": 0.4173, "step": 2100 }, { "epoch": 0.04560916950144086, "grad_norm": 1.301121473312378, "learning_rate": 1.9897521934234042e-05, "loss": 0.4786, "step": 2105 }, { "epoch": 0.04571750482092172, "grad_norm": 1.1171679496765137, "learning_rate": 1.989703536253925e-05, "loss": 0.4302, "step": 2110 }, { "epoch": 0.04582584014040257, "grad_norm": 1.3252239227294922, "learning_rate": 1.9896547644421162e-05, "loss": 0.2974, "step": 2115 }, { "epoch": 0.04593417545988343, "grad_norm": 1.3178342580795288, "learning_rate": 1.9896058779936285e-05, "loss": 0.3855, "step": 2120 }, { "epoch": 0.04604251077936429, "grad_norm": 0.9673906564712524, "learning_rate": 1.989556876914124e-05, "loss": 0.4702, "step": 2125 }, { "epoch": 0.04615084609884514, "grad_norm": 0.8842958807945251, "learning_rate": 1.9895077612092787e-05, "loss": 0.2313, "step": 2130 }, { "epoch": 0.046259181418326, "grad_norm": 0.9337120056152344, "learning_rate": 1.9894585308847826e-05, "loss": 0.484, "step": 2135 }, { "epoch": 0.04636751673780686, "grad_norm": 1.4578274488449097, "learning_rate": 1.989409185946337e-05, "loss": 0.441, "step": 2140 }, { "epoch": 0.046475852057287714, "grad_norm": 1.4788306951522827, "learning_rate": 1.9893597263996592e-05, "loss": 0.3946, "step": 2145 }, { "epoch": 0.046584187376768574, "grad_norm": 1.145688533782959, "learning_rate": 1.9893101522504773e-05, "loss": 0.3574, "step": 2150 }, { "epoch": 0.04669252269624943, "grad_norm": 1.3315943479537964, "learning_rate": 1.989260463504534e-05, "loss": 0.4482, "step": 2155 }, { "epoch": 0.046800858015730286, "grad_norm": 1.1218692064285278, "learning_rate": 1.9892106601675856e-05, "loss": 0.4278, "step": 2160 }, { "epoch": 0.046909193335211145, "grad_norm": 1.2104421854019165, "learning_rate": 1.9891607422454003e-05, "loss": 0.491, "step": 2165 }, { "epoch": 0.047017528654692005, "grad_norm": 1.118722915649414, "learning_rate": 1.989110709743761e-05, "loss": 0.3449, "step": 2170 }, { "epoch": 0.04712586397417286, "grad_norm": 1.2484357357025146, "learning_rate": 1.9890605626684623e-05, "loss": 0.4007, "step": 2175 }, { "epoch": 0.04723419929365372, "grad_norm": 0.6337562203407288, "learning_rate": 1.989010301025314e-05, "loss": 0.3701, "step": 2180 }, { "epoch": 0.047342534613134576, "grad_norm": 1.0836654901504517, "learning_rate": 1.9889599248201376e-05, "loss": 0.4169, "step": 2185 }, { "epoch": 0.04745086993261543, "grad_norm": 1.1162135601043701, "learning_rate": 1.988909434058769e-05, "loss": 0.4377, "step": 2190 }, { "epoch": 0.04755920525209629, "grad_norm": 1.2731908559799194, "learning_rate": 1.9888588287470553e-05, "loss": 0.3411, "step": 2195 }, { "epoch": 0.04766754057157715, "grad_norm": 1.3280541896820068, "learning_rate": 1.9888081088908598e-05, "loss": 0.4879, "step": 2200 }, { "epoch": 0.047775875891058, "grad_norm": 1.2231303453445435, "learning_rate": 1.9887572744960574e-05, "loss": 0.3341, "step": 2205 }, { "epoch": 0.04788421121053886, "grad_norm": 1.3236316442489624, "learning_rate": 1.9887063255685362e-05, "loss": 0.3584, "step": 2210 }, { "epoch": 0.04799254653001972, "grad_norm": 1.049830436706543, "learning_rate": 1.988655262114198e-05, "loss": 0.3923, "step": 2215 }, { "epoch": 0.04810088184950057, "grad_norm": 1.1480505466461182, "learning_rate": 1.988604084138958e-05, "loss": 0.4414, "step": 2220 }, { "epoch": 0.04820921716898143, "grad_norm": 1.7722320556640625, "learning_rate": 1.9885527916487438e-05, "loss": 0.3178, "step": 2225 }, { "epoch": 0.04831755248846229, "grad_norm": 1.0804592370986938, "learning_rate": 1.9885013846494975e-05, "loss": 0.4759, "step": 2230 }, { "epoch": 0.048425887807943144, "grad_norm": 0.8976107835769653, "learning_rate": 1.9884498631471733e-05, "loss": 0.4313, "step": 2235 }, { "epoch": 0.048534223127424, "grad_norm": 1.069974422454834, "learning_rate": 1.9883982271477396e-05, "loss": 0.4343, "step": 2240 }, { "epoch": 0.04864255844690486, "grad_norm": 0.9804556369781494, "learning_rate": 1.9883464766571775e-05, "loss": 0.3769, "step": 2245 }, { "epoch": 0.048750893766385715, "grad_norm": 1.3612321615219116, "learning_rate": 1.9882946116814813e-05, "loss": 0.374, "step": 2250 }, { "epoch": 0.048859229085866575, "grad_norm": 0.8019570112228394, "learning_rate": 1.988242632226659e-05, "loss": 0.4514, "step": 2255 }, { "epoch": 0.048967564405347434, "grad_norm": 0.9042229652404785, "learning_rate": 1.988190538298732e-05, "loss": 0.4793, "step": 2260 }, { "epoch": 0.04907589972482829, "grad_norm": 1.132684350013733, "learning_rate": 1.9881383299037334e-05, "loss": 0.4345, "step": 2265 }, { "epoch": 0.049184235044309146, "grad_norm": 1.434518814086914, "learning_rate": 1.9880860070477124e-05, "loss": 0.4434, "step": 2270 }, { "epoch": 0.049292570363790006, "grad_norm": 0.8192652463912964, "learning_rate": 1.9880335697367286e-05, "loss": 0.2855, "step": 2275 }, { "epoch": 0.04940090568327086, "grad_norm": 1.3570537567138672, "learning_rate": 1.987981017976857e-05, "loss": 0.407, "step": 2280 }, { "epoch": 0.04950924100275172, "grad_norm": 1.3173516988754272, "learning_rate": 1.987928351774184e-05, "loss": 0.3374, "step": 2285 }, { "epoch": 0.04961757632223258, "grad_norm": 0.7716070413589478, "learning_rate": 1.9878755711348105e-05, "loss": 0.3716, "step": 2290 }, { "epoch": 0.04972591164171343, "grad_norm": 1.0920348167419434, "learning_rate": 1.987822676064851e-05, "loss": 0.3892, "step": 2295 }, { "epoch": 0.04983424696119429, "grad_norm": 1.2134182453155518, "learning_rate": 1.9877696665704315e-05, "loss": 0.4561, "step": 2300 }, { "epoch": 0.04994258228067515, "grad_norm": 1.1872649192810059, "learning_rate": 1.987716542657693e-05, "loss": 0.4205, "step": 2305 }, { "epoch": 0.050050917600156, "grad_norm": 0.7267194986343384, "learning_rate": 1.9876633043327892e-05, "loss": 0.3511, "step": 2310 }, { "epoch": 0.05015925291963686, "grad_norm": 0.9629629850387573, "learning_rate": 1.9876099516018866e-05, "loss": 0.4032, "step": 2315 }, { "epoch": 0.05026758823911772, "grad_norm": 1.2471238374710083, "learning_rate": 1.987556484471166e-05, "loss": 0.3279, "step": 2320 }, { "epoch": 0.05037592355859857, "grad_norm": 1.033008337020874, "learning_rate": 1.98750290294682e-05, "loss": 0.3637, "step": 2325 }, { "epoch": 0.05048425887807943, "grad_norm": 1.0606147050857544, "learning_rate": 1.9874492070350554e-05, "loss": 0.2917, "step": 2330 }, { "epoch": 0.050592594197560285, "grad_norm": 1.2238337993621826, "learning_rate": 1.987395396742092e-05, "loss": 0.5002, "step": 2335 }, { "epoch": 0.050700929517041145, "grad_norm": 1.2872203588485718, "learning_rate": 1.9873414720741633e-05, "loss": 0.3709, "step": 2340 }, { "epoch": 0.050809264836522004, "grad_norm": 1.2372853755950928, "learning_rate": 1.987287433037515e-05, "loss": 0.5257, "step": 2345 }, { "epoch": 0.05091760015600286, "grad_norm": 1.0069159269332886, "learning_rate": 1.9872332796384077e-05, "loss": 0.4196, "step": 2350 }, { "epoch": 0.051025935475483716, "grad_norm": 1.1841461658477783, "learning_rate": 1.9871790118831134e-05, "loss": 0.4499, "step": 2355 }, { "epoch": 0.051134270794964576, "grad_norm": 1.1233538389205933, "learning_rate": 1.987124629777919e-05, "loss": 0.4103, "step": 2360 }, { "epoch": 0.05124260611444543, "grad_norm": 1.3052875995635986, "learning_rate": 1.9870701333291228e-05, "loss": 0.443, "step": 2365 }, { "epoch": 0.05135094143392629, "grad_norm": 1.53458571434021, "learning_rate": 1.987015522543038e-05, "loss": 0.4312, "step": 2370 }, { "epoch": 0.05145927675340715, "grad_norm": 0.8942107558250427, "learning_rate": 1.9869607974259904e-05, "loss": 0.3127, "step": 2375 }, { "epoch": 0.051567612072888, "grad_norm": 1.0238046646118164, "learning_rate": 1.9869059579843194e-05, "loss": 0.4301, "step": 2380 }, { "epoch": 0.05167594739236886, "grad_norm": 1.7441941499710083, "learning_rate": 1.9868510042243768e-05, "loss": 0.4081, "step": 2385 }, { "epoch": 0.05178428271184972, "grad_norm": 0.9286105632781982, "learning_rate": 1.9867959361525288e-05, "loss": 0.3913, "step": 2390 }, { "epoch": 0.05189261803133057, "grad_norm": 1.026591420173645, "learning_rate": 1.9867407537751533e-05, "loss": 0.2942, "step": 2395 }, { "epoch": 0.05200095335081143, "grad_norm": 1.2370611429214478, "learning_rate": 1.986685457098643e-05, "loss": 0.4451, "step": 2400 }, { "epoch": 0.05210928867029229, "grad_norm": 1.133952021598816, "learning_rate": 1.986630046129403e-05, "loss": 0.4299, "step": 2405 }, { "epoch": 0.05221762398977314, "grad_norm": 0.9939681887626648, "learning_rate": 1.986574520873852e-05, "loss": 0.3076, "step": 2410 }, { "epoch": 0.052325959309254, "grad_norm": 0.7362215518951416, "learning_rate": 1.9865188813384212e-05, "loss": 0.3852, "step": 2415 }, { "epoch": 0.05243429462873486, "grad_norm": 1.0987054109573364, "learning_rate": 1.986463127529557e-05, "loss": 0.408, "step": 2420 }, { "epoch": 0.052542629948215715, "grad_norm": 1.2026114463806152, "learning_rate": 1.9864072594537157e-05, "loss": 0.3774, "step": 2425 }, { "epoch": 0.052650965267696574, "grad_norm": 0.9589418172836304, "learning_rate": 1.9863512771173703e-05, "loss": 0.3635, "step": 2430 }, { "epoch": 0.052759300587177434, "grad_norm": 1.0852984189987183, "learning_rate": 1.9862951805270047e-05, "loss": 0.5163, "step": 2435 }, { "epoch": 0.052867635906658286, "grad_norm": 1.1594613790512085, "learning_rate": 1.9862389696891175e-05, "loss": 0.427, "step": 2440 }, { "epoch": 0.052975971226139146, "grad_norm": 1.423185110092163, "learning_rate": 1.9861826446102192e-05, "loss": 0.2779, "step": 2445 }, { "epoch": 0.053084306545620005, "grad_norm": 1.2315585613250732, "learning_rate": 1.9861262052968347e-05, "loss": 0.32, "step": 2450 }, { "epoch": 0.05319264186510086, "grad_norm": 1.4762980937957764, "learning_rate": 1.9860696517555013e-05, "loss": 0.3688, "step": 2455 }, { "epoch": 0.05330097718458172, "grad_norm": 1.061032772064209, "learning_rate": 1.9860129839927702e-05, "loss": 0.388, "step": 2460 }, { "epoch": 0.05340931250406258, "grad_norm": 1.0354013442993164, "learning_rate": 1.9859562020152054e-05, "loss": 0.4141, "step": 2465 }, { "epoch": 0.05351764782354343, "grad_norm": 1.1116414070129395, "learning_rate": 1.9858993058293842e-05, "loss": 0.5293, "step": 2470 }, { "epoch": 0.05362598314302429, "grad_norm": 1.2517813444137573, "learning_rate": 1.985842295441897e-05, "loss": 0.3411, "step": 2475 }, { "epoch": 0.05373431846250515, "grad_norm": 1.1563111543655396, "learning_rate": 1.9857851708593484e-05, "loss": 0.42, "step": 2480 }, { "epoch": 0.053842653781986, "grad_norm": 1.6009868383407593, "learning_rate": 1.9857279320883544e-05, "loss": 0.5049, "step": 2485 }, { "epoch": 0.05395098910146686, "grad_norm": 1.4326162338256836, "learning_rate": 1.985670579135546e-05, "loss": 0.4352, "step": 2490 }, { "epoch": 0.05405932442094772, "grad_norm": 1.7926661968231201, "learning_rate": 1.9856131120075654e-05, "loss": 0.3444, "step": 2495 }, { "epoch": 0.05416765974042857, "grad_norm": 1.219686508178711, "learning_rate": 1.985555530711071e-05, "loss": 0.3801, "step": 2500 }, { "epoch": 0.05427599505990943, "grad_norm": 1.2511463165283203, "learning_rate": 1.9854978352527322e-05, "loss": 0.4017, "step": 2505 }, { "epoch": 0.05438433037939029, "grad_norm": 1.3173253536224365, "learning_rate": 1.9854400256392314e-05, "loss": 0.4989, "step": 2510 }, { "epoch": 0.054492665698871144, "grad_norm": 1.534544825553894, "learning_rate": 1.9853821018772656e-05, "loss": 0.5153, "step": 2515 }, { "epoch": 0.054601001018352004, "grad_norm": 1.5213266611099243, "learning_rate": 1.9853240639735448e-05, "loss": 0.511, "step": 2520 }, { "epoch": 0.05470933633783286, "grad_norm": 0.9865068793296814, "learning_rate": 1.9852659119347906e-05, "loss": 0.4395, "step": 2525 }, { "epoch": 0.054817671657313716, "grad_norm": 1.3264821767807007, "learning_rate": 1.9852076457677404e-05, "loss": 0.3911, "step": 2530 }, { "epoch": 0.054926006976794575, "grad_norm": 0.8249490261077881, "learning_rate": 1.9851492654791426e-05, "loss": 0.4091, "step": 2535 }, { "epoch": 0.055034342296275435, "grad_norm": 1.229555606842041, "learning_rate": 1.98509077107576e-05, "loss": 0.3832, "step": 2540 }, { "epoch": 0.05514267761575629, "grad_norm": 1.2026827335357666, "learning_rate": 1.985032162564368e-05, "loss": 0.4467, "step": 2545 }, { "epoch": 0.05525101293523715, "grad_norm": 1.0485275983810425, "learning_rate": 1.984973439951756e-05, "loss": 0.42, "step": 2550 }, { "epoch": 0.055359348254718006, "grad_norm": 1.1888206005096436, "learning_rate": 1.9849146032447256e-05, "loss": 0.4811, "step": 2555 }, { "epoch": 0.05546768357419886, "grad_norm": 1.1652300357818604, "learning_rate": 1.984855652450093e-05, "loss": 0.36, "step": 2560 }, { "epoch": 0.05557601889367972, "grad_norm": 1.0452789068222046, "learning_rate": 1.9847965875746857e-05, "loss": 0.4131, "step": 2565 }, { "epoch": 0.05568435421316058, "grad_norm": 1.1044408082962036, "learning_rate": 1.984737408625346e-05, "loss": 0.3766, "step": 2570 }, { "epoch": 0.05579268953264143, "grad_norm": 0.8990198373794556, "learning_rate": 1.9846781156089293e-05, "loss": 0.4224, "step": 2575 }, { "epoch": 0.05590102485212229, "grad_norm": 1.0015950202941895, "learning_rate": 1.984618708532303e-05, "loss": 0.397, "step": 2580 }, { "epoch": 0.05600936017160315, "grad_norm": 1.062981367111206, "learning_rate": 1.984559187402349e-05, "loss": 0.4376, "step": 2585 }, { "epoch": 0.056117695491084, "grad_norm": 1.4202004671096802, "learning_rate": 1.9844995522259618e-05, "loss": 0.3613, "step": 2590 }, { "epoch": 0.05622603081056486, "grad_norm": 1.3233774900436401, "learning_rate": 1.9844398030100492e-05, "loss": 0.4031, "step": 2595 }, { "epoch": 0.056334366130045714, "grad_norm": 1.158003330230713, "learning_rate": 1.9843799397615323e-05, "loss": 0.5118, "step": 2600 }, { "epoch": 0.056442701449526574, "grad_norm": 1.0624910593032837, "learning_rate": 1.9843199624873458e-05, "loss": 0.3371, "step": 2605 }, { "epoch": 0.05655103676900743, "grad_norm": 1.5259946584701538, "learning_rate": 1.9842598711944365e-05, "loss": 0.4176, "step": 2610 }, { "epoch": 0.056659372088488286, "grad_norm": 0.8725970387458801, "learning_rate": 1.9841996658897653e-05, "loss": 0.3384, "step": 2615 }, { "epoch": 0.056767707407969145, "grad_norm": 1.008112907409668, "learning_rate": 1.9841393465803062e-05, "loss": 0.299, "step": 2620 }, { "epoch": 0.056876042727450005, "grad_norm": 1.5669230222702026, "learning_rate": 1.984078913273046e-05, "loss": 0.4313, "step": 2625 }, { "epoch": 0.05698437804693086, "grad_norm": 0.9091070294380188, "learning_rate": 1.9840183659749853e-05, "loss": 0.4629, "step": 2630 }, { "epoch": 0.05709271336641172, "grad_norm": 1.5798628330230713, "learning_rate": 1.9839577046931374e-05, "loss": 0.4298, "step": 2635 }, { "epoch": 0.057201048685892576, "grad_norm": 1.1212323904037476, "learning_rate": 1.9838969294345297e-05, "loss": 0.4097, "step": 2640 }, { "epoch": 0.05730938400537343, "grad_norm": 1.0981521606445312, "learning_rate": 1.983836040206201e-05, "loss": 0.3949, "step": 2645 }, { "epoch": 0.05741771932485429, "grad_norm": 1.1161490678787231, "learning_rate": 1.9837750370152048e-05, "loss": 0.3976, "step": 2650 }, { "epoch": 0.05752605464433515, "grad_norm": 1.307163119316101, "learning_rate": 1.9837139198686076e-05, "loss": 0.4102, "step": 2655 }, { "epoch": 0.057634389963816, "grad_norm": 1.0558558702468872, "learning_rate": 1.983652688773489e-05, "loss": 0.3859, "step": 2660 }, { "epoch": 0.05774272528329686, "grad_norm": 0.9995655417442322, "learning_rate": 1.9835913437369413e-05, "loss": 0.3724, "step": 2665 }, { "epoch": 0.05785106060277772, "grad_norm": 1.3908106088638306, "learning_rate": 1.9835298847660708e-05, "loss": 0.3826, "step": 2670 }, { "epoch": 0.05795939592225857, "grad_norm": 1.1406176090240479, "learning_rate": 1.9834683118679963e-05, "loss": 0.4741, "step": 2675 }, { "epoch": 0.05806773124173943, "grad_norm": 1.2918334007263184, "learning_rate": 1.9834066250498502e-05, "loss": 0.429, "step": 2680 }, { "epoch": 0.05817606656122029, "grad_norm": 0.9959827065467834, "learning_rate": 1.983344824318778e-05, "loss": 0.4712, "step": 2685 }, { "epoch": 0.058284401880701144, "grad_norm": 1.2101465463638306, "learning_rate": 1.9832829096819386e-05, "loss": 0.3435, "step": 2690 }, { "epoch": 0.058392737200182, "grad_norm": 1.1460593938827515, "learning_rate": 1.9832208811465036e-05, "loss": 0.4725, "step": 2695 }, { "epoch": 0.05850107251966286, "grad_norm": 1.1342058181762695, "learning_rate": 1.983158738719658e-05, "loss": 0.3635, "step": 2700 }, { "epoch": 0.058609407839143715, "grad_norm": 1.464566946029663, "learning_rate": 1.9830964824086007e-05, "loss": 0.3963, "step": 2705 }, { "epoch": 0.058717743158624575, "grad_norm": 0.9168642163276672, "learning_rate": 1.9830341122205422e-05, "loss": 0.3419, "step": 2710 }, { "epoch": 0.058826078478105434, "grad_norm": 1.244091272354126, "learning_rate": 1.982971628162708e-05, "loss": 0.4169, "step": 2715 }, { "epoch": 0.05893441379758629, "grad_norm": 0.6929450631141663, "learning_rate": 1.982909030242335e-05, "loss": 0.3874, "step": 2720 }, { "epoch": 0.059042749117067146, "grad_norm": 1.4105819463729858, "learning_rate": 1.9828463184666756e-05, "loss": 0.5203, "step": 2725 }, { "epoch": 0.059151084436548006, "grad_norm": 0.9425551295280457, "learning_rate": 1.9827834928429927e-05, "loss": 0.3995, "step": 2730 }, { "epoch": 0.05925941975602886, "grad_norm": 1.1435893774032593, "learning_rate": 1.9827205533785644e-05, "loss": 0.3586, "step": 2735 }, { "epoch": 0.05936775507550972, "grad_norm": 1.1035292148590088, "learning_rate": 1.982657500080681e-05, "loss": 0.3257, "step": 2740 }, { "epoch": 0.05947609039499058, "grad_norm": 1.080426573753357, "learning_rate": 1.982594332956647e-05, "loss": 0.3514, "step": 2745 }, { "epoch": 0.05958442571447143, "grad_norm": 0.9115095138549805, "learning_rate": 1.982531052013778e-05, "loss": 0.4026, "step": 2750 }, { "epoch": 0.05969276103395229, "grad_norm": 0.8732407093048096, "learning_rate": 1.9824676572594053e-05, "loss": 0.467, "step": 2755 }, { "epoch": 0.05980109635343315, "grad_norm": 1.1732498407363892, "learning_rate": 1.982404148700872e-05, "loss": 0.3089, "step": 2760 }, { "epoch": 0.059909431672914, "grad_norm": 1.4207959175109863, "learning_rate": 1.9823405263455345e-05, "loss": 0.4431, "step": 2765 }, { "epoch": 0.06001776699239486, "grad_norm": 1.1080808639526367, "learning_rate": 1.9822767902007624e-05, "loss": 0.3437, "step": 2770 }, { "epoch": 0.06012610231187572, "grad_norm": 0.6778680086135864, "learning_rate": 1.9822129402739386e-05, "loss": 0.4335, "step": 2775 }, { "epoch": 0.06023443763135657, "grad_norm": 1.3175880908966064, "learning_rate": 1.9821489765724594e-05, "loss": 0.4523, "step": 2780 }, { "epoch": 0.06034277295083743, "grad_norm": 1.1473150253295898, "learning_rate": 1.9820848991037337e-05, "loss": 0.5229, "step": 2785 }, { "epoch": 0.06045110827031829, "grad_norm": 1.2940391302108765, "learning_rate": 1.982020707875184e-05, "loss": 0.4654, "step": 2790 }, { "epoch": 0.060559443589799145, "grad_norm": 1.0677402019500732, "learning_rate": 1.981956402894246e-05, "loss": 0.4538, "step": 2795 }, { "epoch": 0.060667778909280004, "grad_norm": 1.0347718000411987, "learning_rate": 1.9818919841683686e-05, "loss": 0.4394, "step": 2800 }, { "epoch": 0.060776114228760864, "grad_norm": 1.1407626867294312, "learning_rate": 1.9818274517050136e-05, "loss": 0.3011, "step": 2805 }, { "epoch": 0.060884449548241716, "grad_norm": 1.25590181350708, "learning_rate": 1.9817628055116557e-05, "loss": 0.373, "step": 2810 }, { "epoch": 0.060992784867722576, "grad_norm": 1.2560309171676636, "learning_rate": 1.981698045595784e-05, "loss": 0.4461, "step": 2815 }, { "epoch": 0.061101120187203435, "grad_norm": 1.5139379501342773, "learning_rate": 1.9816331719648994e-05, "loss": 0.4368, "step": 2820 }, { "epoch": 0.06120945550668429, "grad_norm": 0.969521164894104, "learning_rate": 1.9815681846265168e-05, "loss": 0.4393, "step": 2825 }, { "epoch": 0.06131779082616515, "grad_norm": 1.8118661642074585, "learning_rate": 1.9815030835881634e-05, "loss": 0.4196, "step": 2830 }, { "epoch": 0.06142612614564601, "grad_norm": 0.7012310028076172, "learning_rate": 1.981437868857381e-05, "loss": 0.4785, "step": 2835 }, { "epoch": 0.06153446146512686, "grad_norm": 1.0846431255340576, "learning_rate": 1.9813725404417237e-05, "loss": 0.4174, "step": 2840 }, { "epoch": 0.06164279678460772, "grad_norm": 0.9224893450737, "learning_rate": 1.9813070983487583e-05, "loss": 0.3785, "step": 2845 }, { "epoch": 0.06175113210408858, "grad_norm": 0.713914155960083, "learning_rate": 1.9812415425860652e-05, "loss": 0.4047, "step": 2850 }, { "epoch": 0.06185946742356943, "grad_norm": 1.415415644645691, "learning_rate": 1.9811758731612393e-05, "loss": 0.4468, "step": 2855 }, { "epoch": 0.06196780274305029, "grad_norm": 1.1357392072677612, "learning_rate": 1.981110090081886e-05, "loss": 0.4893, "step": 2860 }, { "epoch": 0.06207613806253115, "grad_norm": 1.159040927886963, "learning_rate": 1.9810441933556256e-05, "loss": 0.5245, "step": 2865 }, { "epoch": 0.062184473382012, "grad_norm": 1.1932728290557861, "learning_rate": 1.9809781829900916e-05, "loss": 0.3526, "step": 2870 }, { "epoch": 0.06229280870149286, "grad_norm": 1.2042800188064575, "learning_rate": 1.98091205899293e-05, "loss": 0.3934, "step": 2875 }, { "epoch": 0.062401144020973714, "grad_norm": 1.172789216041565, "learning_rate": 1.9808458213718007e-05, "loss": 0.5309, "step": 2880 }, { "epoch": 0.06250947934045457, "grad_norm": 1.094330072402954, "learning_rate": 1.980779470134376e-05, "loss": 0.4145, "step": 2885 }, { "epoch": 0.06261781465993543, "grad_norm": 0.9580143094062805, "learning_rate": 1.9807130052883415e-05, "loss": 0.275, "step": 2890 }, { "epoch": 0.06272614997941629, "grad_norm": 1.2833927869796753, "learning_rate": 1.9806464268413966e-05, "loss": 0.317, "step": 2895 }, { "epoch": 0.06283448529889715, "grad_norm": 1.2831863164901733, "learning_rate": 1.9805797348012534e-05, "loss": 0.4634, "step": 2900 }, { "epoch": 0.062942820618378, "grad_norm": 1.0026440620422363, "learning_rate": 1.980512929175637e-05, "loss": 0.3984, "step": 2905 }, { "epoch": 0.06305115593785886, "grad_norm": 1.2032533884048462, "learning_rate": 1.9804460099722856e-05, "loss": 0.3388, "step": 2910 }, { "epoch": 0.06315949125733972, "grad_norm": 0.8626621961593628, "learning_rate": 1.9803789771989513e-05, "loss": 0.4371, "step": 2915 }, { "epoch": 0.06326782657682058, "grad_norm": 1.255376935005188, "learning_rate": 1.980311830863398e-05, "loss": 0.3548, "step": 2920 }, { "epoch": 0.06337616189630144, "grad_norm": 1.260528326034546, "learning_rate": 1.9802445709734045e-05, "loss": 0.4725, "step": 2925 }, { "epoch": 0.0634844972157823, "grad_norm": 1.319213628768921, "learning_rate": 1.980177197536762e-05, "loss": 0.4426, "step": 2930 }, { "epoch": 0.06359283253526314, "grad_norm": 0.7994346022605896, "learning_rate": 1.9801097105612738e-05, "loss": 0.424, "step": 2935 }, { "epoch": 0.063701167854744, "grad_norm": 1.001602053642273, "learning_rate": 1.9800421100547576e-05, "loss": 0.4038, "step": 2940 }, { "epoch": 0.06380950317422486, "grad_norm": 1.5483664274215698, "learning_rate": 1.9799743960250438e-05, "loss": 0.5258, "step": 2945 }, { "epoch": 0.06391783849370572, "grad_norm": 1.458622932434082, "learning_rate": 1.9799065684799767e-05, "loss": 0.2897, "step": 2950 }, { "epoch": 0.06402617381318658, "grad_norm": 1.1160708665847778, "learning_rate": 1.9798386274274125e-05, "loss": 0.4477, "step": 2955 }, { "epoch": 0.06413450913266744, "grad_norm": 1.1757616996765137, "learning_rate": 1.979770572875221e-05, "loss": 0.3912, "step": 2960 }, { "epoch": 0.06424284445214828, "grad_norm": 1.1254057884216309, "learning_rate": 1.9797024048312858e-05, "loss": 0.3666, "step": 2965 }, { "epoch": 0.06435117977162914, "grad_norm": 0.871245801448822, "learning_rate": 1.979634123303503e-05, "loss": 0.3266, "step": 2970 }, { "epoch": 0.06445951509111, "grad_norm": 1.069661021232605, "learning_rate": 1.979565728299782e-05, "loss": 0.4416, "step": 2975 }, { "epoch": 0.06456785041059086, "grad_norm": 1.8103491067886353, "learning_rate": 1.979497219828045e-05, "loss": 0.4293, "step": 2980 }, { "epoch": 0.06467618573007172, "grad_norm": 1.2006827592849731, "learning_rate": 1.9794285978962283e-05, "loss": 0.3319, "step": 2985 }, { "epoch": 0.06478452104955258, "grad_norm": 1.2941133975982666, "learning_rate": 1.97935986251228e-05, "loss": 0.4354, "step": 2990 }, { "epoch": 0.06489285636903343, "grad_norm": 0.8972794413566589, "learning_rate": 1.9792910136841627e-05, "loss": 0.3736, "step": 2995 }, { "epoch": 0.06500119168851429, "grad_norm": 1.3215473890304565, "learning_rate": 1.979222051419851e-05, "loss": 0.3981, "step": 3000 }, { "epoch": 0.06510952700799515, "grad_norm": 1.6333682537078857, "learning_rate": 1.9791529757273338e-05, "loss": 0.4487, "step": 3005 }, { "epoch": 0.065217862327476, "grad_norm": 1.0149061679840088, "learning_rate": 1.979083786614612e-05, "loss": 0.389, "step": 3010 }, { "epoch": 0.06532619764695687, "grad_norm": 1.5061441659927368, "learning_rate": 1.9790144840897e-05, "loss": 0.3548, "step": 3015 }, { "epoch": 0.06543453296643771, "grad_norm": 1.2121623754501343, "learning_rate": 1.978945068160625e-05, "loss": 0.3734, "step": 3020 }, { "epoch": 0.06554286828591857, "grad_norm": 0.77189701795578, "learning_rate": 1.9788755388354296e-05, "loss": 0.3777, "step": 3025 }, { "epoch": 0.06565120360539943, "grad_norm": 1.1796475648880005, "learning_rate": 1.978805896122166e-05, "loss": 0.4619, "step": 3030 }, { "epoch": 0.06575953892488029, "grad_norm": 1.1715774536132812, "learning_rate": 1.978736140028902e-05, "loss": 0.3442, "step": 3035 }, { "epoch": 0.06586787424436115, "grad_norm": 1.1708629131317139, "learning_rate": 1.9786662705637172e-05, "loss": 0.473, "step": 3040 }, { "epoch": 0.06597620956384201, "grad_norm": 0.8817400932312012, "learning_rate": 1.978596287734706e-05, "loss": 0.315, "step": 3045 }, { "epoch": 0.06608454488332285, "grad_norm": 1.2264484167099, "learning_rate": 1.9785261915499744e-05, "loss": 0.503, "step": 3050 }, { "epoch": 0.06619288020280371, "grad_norm": 1.3567266464233398, "learning_rate": 1.9784559820176414e-05, "loss": 0.3982, "step": 3055 }, { "epoch": 0.06630121552228457, "grad_norm": 1.4478083848953247, "learning_rate": 1.9783856591458403e-05, "loss": 0.4192, "step": 3060 }, { "epoch": 0.06640955084176543, "grad_norm": 1.1904504299163818, "learning_rate": 1.9783152229427164e-05, "loss": 0.3612, "step": 3065 }, { "epoch": 0.06651788616124629, "grad_norm": 1.1118361949920654, "learning_rate": 1.9782446734164294e-05, "loss": 0.3535, "step": 3070 }, { "epoch": 0.06662622148072715, "grad_norm": 1.2601358890533447, "learning_rate": 1.978174010575151e-05, "loss": 0.4521, "step": 3075 }, { "epoch": 0.066734556800208, "grad_norm": 1.0329488515853882, "learning_rate": 1.9781032344270666e-05, "loss": 0.4549, "step": 3080 }, { "epoch": 0.06684289211968886, "grad_norm": 1.0106709003448486, "learning_rate": 1.9780323449803745e-05, "loss": 0.3709, "step": 3085 }, { "epoch": 0.06695122743916972, "grad_norm": 1.4203647375106812, "learning_rate": 1.977961342243286e-05, "loss": 0.3388, "step": 3090 }, { "epoch": 0.06705956275865058, "grad_norm": 0.9139717221260071, "learning_rate": 1.977890226224026e-05, "loss": 0.4861, "step": 3095 }, { "epoch": 0.06716789807813144, "grad_norm": 1.1429413557052612, "learning_rate": 1.9778189969308323e-05, "loss": 0.5469, "step": 3100 }, { "epoch": 0.0672762333976123, "grad_norm": 1.1332942247390747, "learning_rate": 1.9777476543719552e-05, "loss": 0.4793, "step": 3105 }, { "epoch": 0.06738456871709314, "grad_norm": 1.3617349863052368, "learning_rate": 1.977676198555659e-05, "loss": 0.4177, "step": 3110 }, { "epoch": 0.067492904036574, "grad_norm": 1.6243956089019775, "learning_rate": 1.977604629490221e-05, "loss": 0.4008, "step": 3115 }, { "epoch": 0.06760123935605486, "grad_norm": 0.9184507131576538, "learning_rate": 1.977532947183931e-05, "loss": 0.307, "step": 3120 }, { "epoch": 0.06770957467553572, "grad_norm": 1.143863320350647, "learning_rate": 1.9774611516450925e-05, "loss": 0.4356, "step": 3125 }, { "epoch": 0.06781790999501658, "grad_norm": 1.1687986850738525, "learning_rate": 1.9773892428820223e-05, "loss": 0.3222, "step": 3130 }, { "epoch": 0.06792624531449744, "grad_norm": 1.512439489364624, "learning_rate": 1.977317220903049e-05, "loss": 0.4749, "step": 3135 }, { "epoch": 0.06803458063397828, "grad_norm": 1.0120844841003418, "learning_rate": 1.9772450857165163e-05, "loss": 0.394, "step": 3140 }, { "epoch": 0.06814291595345914, "grad_norm": 0.9175419807434082, "learning_rate": 1.9771728373307795e-05, "loss": 0.3556, "step": 3145 }, { "epoch": 0.06825125127294, "grad_norm": 1.3597557544708252, "learning_rate": 1.9771004757542075e-05, "loss": 0.4753, "step": 3150 }, { "epoch": 0.06835958659242086, "grad_norm": 1.1261433362960815, "learning_rate": 1.9770280009951822e-05, "loss": 0.3967, "step": 3155 }, { "epoch": 0.06846792191190172, "grad_norm": 1.4626237154006958, "learning_rate": 1.976955413062099e-05, "loss": 0.3617, "step": 3160 }, { "epoch": 0.06857625723138258, "grad_norm": 1.2926228046417236, "learning_rate": 1.9768827119633663e-05, "loss": 0.5725, "step": 3165 }, { "epoch": 0.06868459255086343, "grad_norm": 0.9691271781921387, "learning_rate": 1.976809897707405e-05, "loss": 0.4216, "step": 3170 }, { "epoch": 0.06879292787034429, "grad_norm": 0.9988919496536255, "learning_rate": 1.9767369703026492e-05, "loss": 0.3461, "step": 3175 }, { "epoch": 0.06890126318982515, "grad_norm": 1.2709513902664185, "learning_rate": 1.9766639297575473e-05, "loss": 0.4015, "step": 3180 }, { "epoch": 0.069009598509306, "grad_norm": 1.3807140588760376, "learning_rate": 1.9765907760805595e-05, "loss": 0.3406, "step": 3185 }, { "epoch": 0.06911793382878686, "grad_norm": 1.2349826097488403, "learning_rate": 1.9765175092801594e-05, "loss": 0.3287, "step": 3190 }, { "epoch": 0.06922626914826772, "grad_norm": 1.2220007181167603, "learning_rate": 1.9764441293648344e-05, "loss": 0.3801, "step": 3195 }, { "epoch": 0.06933460446774857, "grad_norm": 1.3518987894058228, "learning_rate": 1.9763706363430838e-05, "loss": 0.4489, "step": 3200 }, { "epoch": 0.06944293978722943, "grad_norm": 1.0798059701919556, "learning_rate": 1.9762970302234215e-05, "loss": 0.4332, "step": 3205 }, { "epoch": 0.06955127510671029, "grad_norm": 0.9959261417388916, "learning_rate": 1.9762233110143728e-05, "loss": 0.3912, "step": 3210 }, { "epoch": 0.06965961042619115, "grad_norm": 1.4297744035720825, "learning_rate": 1.9761494787244775e-05, "loss": 0.394, "step": 3215 }, { "epoch": 0.06976794574567201, "grad_norm": 1.1743800640106201, "learning_rate": 1.9760755333622875e-05, "loss": 0.3395, "step": 3220 }, { "epoch": 0.06987628106515287, "grad_norm": 1.1705238819122314, "learning_rate": 1.9760014749363688e-05, "loss": 0.5177, "step": 3225 }, { "epoch": 0.06998461638463371, "grad_norm": 1.2809813022613525, "learning_rate": 1.9759273034552997e-05, "loss": 0.441, "step": 3230 }, { "epoch": 0.07009295170411457, "grad_norm": 1.2466709613800049, "learning_rate": 1.975853018927672e-05, "loss": 0.3378, "step": 3235 }, { "epoch": 0.07020128702359543, "grad_norm": 1.436536192893982, "learning_rate": 1.9757786213620904e-05, "loss": 0.4133, "step": 3240 }, { "epoch": 0.07030962234307629, "grad_norm": 1.520317554473877, "learning_rate": 1.9757041107671724e-05, "loss": 0.342, "step": 3245 }, { "epoch": 0.07041795766255715, "grad_norm": 1.2620463371276855, "learning_rate": 1.9756294871515496e-05, "loss": 0.4978, "step": 3250 }, { "epoch": 0.07052629298203801, "grad_norm": 1.1163952350616455, "learning_rate": 1.9755547505238652e-05, "loss": 0.4556, "step": 3255 }, { "epoch": 0.07063462830151886, "grad_norm": 0.8746482133865356, "learning_rate": 1.975479900892777e-05, "loss": 0.36, "step": 3260 }, { "epoch": 0.07074296362099972, "grad_norm": 1.1988275051116943, "learning_rate": 1.9754049382669548e-05, "loss": 0.3868, "step": 3265 }, { "epoch": 0.07085129894048058, "grad_norm": 1.7185617685317993, "learning_rate": 1.9753298626550824e-05, "loss": 0.3687, "step": 3270 }, { "epoch": 0.07095963425996143, "grad_norm": 1.1802281141281128, "learning_rate": 1.9752546740658555e-05, "loss": 0.4216, "step": 3275 }, { "epoch": 0.0710679695794423, "grad_norm": 2.3718254566192627, "learning_rate": 1.975179372507984e-05, "loss": 0.4496, "step": 3280 }, { "epoch": 0.07117630489892314, "grad_norm": 1.1586743593215942, "learning_rate": 1.9751039579901908e-05, "loss": 0.3326, "step": 3285 }, { "epoch": 0.071284640218404, "grad_norm": 1.0275856256484985, "learning_rate": 1.9750284305212103e-05, "loss": 0.5178, "step": 3290 }, { "epoch": 0.07139297553788486, "grad_norm": 1.1302063465118408, "learning_rate": 1.974952790109793e-05, "loss": 0.3933, "step": 3295 }, { "epoch": 0.07150131085736572, "grad_norm": 1.147288203239441, "learning_rate": 1.974877036764699e-05, "loss": 0.4645, "step": 3300 }, { "epoch": 0.07160964617684658, "grad_norm": 0.9135515689849854, "learning_rate": 1.9748011704947044e-05, "loss": 0.392, "step": 3305 }, { "epoch": 0.07171798149632744, "grad_norm": 0.9430795311927795, "learning_rate": 1.9747251913085965e-05, "loss": 0.4727, "step": 3310 }, { "epoch": 0.07182631681580828, "grad_norm": 1.143859624862671, "learning_rate": 1.9746490992151766e-05, "loss": 0.4043, "step": 3315 }, { "epoch": 0.07193465213528914, "grad_norm": 1.4342327117919922, "learning_rate": 1.974572894223259e-05, "loss": 0.4133, "step": 3320 }, { "epoch": 0.07204298745477, "grad_norm": 1.1156730651855469, "learning_rate": 1.9744965763416703e-05, "loss": 0.4376, "step": 3325 }, { "epoch": 0.07215132277425086, "grad_norm": 1.0766512155532837, "learning_rate": 1.9744201455792514e-05, "loss": 0.437, "step": 3330 }, { "epoch": 0.07225965809373172, "grad_norm": 1.139540433883667, "learning_rate": 1.9743436019448554e-05, "loss": 0.4424, "step": 3335 }, { "epoch": 0.07236799341321258, "grad_norm": 1.1840434074401855, "learning_rate": 1.974266945447349e-05, "loss": 0.3622, "step": 3340 }, { "epoch": 0.07247632873269343, "grad_norm": 1.2086639404296875, "learning_rate": 1.974190176095611e-05, "loss": 0.4541, "step": 3345 }, { "epoch": 0.07258466405217429, "grad_norm": 1.1159608364105225, "learning_rate": 1.974113293898535e-05, "loss": 0.512, "step": 3350 }, { "epoch": 0.07269299937165515, "grad_norm": 1.1010322570800781, "learning_rate": 1.974036298865026e-05, "loss": 0.4284, "step": 3355 }, { "epoch": 0.072801334691136, "grad_norm": 1.5455747842788696, "learning_rate": 1.9739591910040027e-05, "loss": 0.3831, "step": 3360 }, { "epoch": 0.07290967001061686, "grad_norm": 1.0856739282608032, "learning_rate": 1.973881970324397e-05, "loss": 0.4232, "step": 3365 }, { "epoch": 0.07301800533009772, "grad_norm": 1.587934970855713, "learning_rate": 1.973804636835154e-05, "loss": 0.5102, "step": 3370 }, { "epoch": 0.07312634064957857, "grad_norm": 1.2769887447357178, "learning_rate": 1.973727190545231e-05, "loss": 0.3392, "step": 3375 }, { "epoch": 0.07323467596905943, "grad_norm": 1.5210481882095337, "learning_rate": 1.9736496314635998e-05, "loss": 0.3559, "step": 3380 }, { "epoch": 0.07334301128854029, "grad_norm": 1.0424915552139282, "learning_rate": 1.973571959599244e-05, "loss": 0.3619, "step": 3385 }, { "epoch": 0.07345134660802115, "grad_norm": 1.0790736675262451, "learning_rate": 1.973494174961161e-05, "loss": 0.3884, "step": 3390 }, { "epoch": 0.07355968192750201, "grad_norm": 1.0712347030639648, "learning_rate": 1.97341627755836e-05, "loss": 0.4673, "step": 3395 }, { "epoch": 0.07366801724698287, "grad_norm": 0.8339586853981018, "learning_rate": 1.973338267399866e-05, "loss": 0.42, "step": 3400 }, { "epoch": 0.07377635256646371, "grad_norm": 1.3031630516052246, "learning_rate": 1.9732601444947136e-05, "loss": 0.3873, "step": 3405 }, { "epoch": 0.07388468788594457, "grad_norm": 1.1264643669128418, "learning_rate": 1.9731819088519532e-05, "loss": 0.4509, "step": 3410 }, { "epoch": 0.07399302320542543, "grad_norm": 1.668999195098877, "learning_rate": 1.973103560480647e-05, "loss": 0.4993, "step": 3415 }, { "epoch": 0.07410135852490629, "grad_norm": 0.841631293296814, "learning_rate": 1.9730250993898702e-05, "loss": 0.3614, "step": 3420 }, { "epoch": 0.07420969384438715, "grad_norm": 1.0844600200653076, "learning_rate": 1.9729465255887118e-05, "loss": 0.3474, "step": 3425 }, { "epoch": 0.07431802916386801, "grad_norm": 1.0584888458251953, "learning_rate": 1.972867839086273e-05, "loss": 0.3832, "step": 3430 }, { "epoch": 0.07442636448334886, "grad_norm": 1.8828017711639404, "learning_rate": 1.9727890398916686e-05, "loss": 0.4273, "step": 3435 }, { "epoch": 0.07453469980282972, "grad_norm": 1.1300619840621948, "learning_rate": 1.9727101280140263e-05, "loss": 0.3817, "step": 3440 }, { "epoch": 0.07464303512231057, "grad_norm": 0.9114024043083191, "learning_rate": 1.972631103462487e-05, "loss": 0.4496, "step": 3445 }, { "epoch": 0.07475137044179143, "grad_norm": 1.3562575578689575, "learning_rate": 1.972551966246204e-05, "loss": 0.3575, "step": 3450 }, { "epoch": 0.0748597057612723, "grad_norm": 1.2312391996383667, "learning_rate": 1.9724727163743447e-05, "loss": 0.4078, "step": 3455 }, { "epoch": 0.07496804108075315, "grad_norm": 1.0454155206680298, "learning_rate": 1.972393353856089e-05, "loss": 0.3756, "step": 3460 }, { "epoch": 0.075076376400234, "grad_norm": 0.9331857562065125, "learning_rate": 1.97231387870063e-05, "loss": 0.3762, "step": 3465 }, { "epoch": 0.07518471171971486, "grad_norm": 1.3262195587158203, "learning_rate": 1.972234290917173e-05, "loss": 0.3824, "step": 3470 }, { "epoch": 0.07529304703919572, "grad_norm": 1.3099147081375122, "learning_rate": 1.9721545905149373e-05, "loss": 0.3234, "step": 3475 }, { "epoch": 0.07540138235867658, "grad_norm": 1.2116364240646362, "learning_rate": 1.9720747775031553e-05, "loss": 0.3757, "step": 3480 }, { "epoch": 0.07550971767815744, "grad_norm": 1.2530747652053833, "learning_rate": 1.9719948518910722e-05, "loss": 0.398, "step": 3485 }, { "epoch": 0.0756180529976383, "grad_norm": 1.3068552017211914, "learning_rate": 1.9719148136879457e-05, "loss": 0.2859, "step": 3490 }, { "epoch": 0.07572638831711914, "grad_norm": 1.3343865871429443, "learning_rate": 1.9718346629030475e-05, "loss": 0.5033, "step": 3495 }, { "epoch": 0.0758347236366, "grad_norm": 1.2076237201690674, "learning_rate": 1.971754399545662e-05, "loss": 0.4451, "step": 3500 }, { "epoch": 0.07594305895608086, "grad_norm": 1.1481525897979736, "learning_rate": 1.971674023625086e-05, "loss": 0.3947, "step": 3505 }, { "epoch": 0.07605139427556172, "grad_norm": 1.2213941812515259, "learning_rate": 1.9715935351506297e-05, "loss": 0.3707, "step": 3510 }, { "epoch": 0.07615972959504258, "grad_norm": 1.11886727809906, "learning_rate": 1.9715129341316168e-05, "loss": 0.3137, "step": 3515 }, { "epoch": 0.07626806491452344, "grad_norm": 1.2221068143844604, "learning_rate": 1.9714322205773843e-05, "loss": 0.418, "step": 3520 }, { "epoch": 0.07637640023400429, "grad_norm": 1.2156847715377808, "learning_rate": 1.9713513944972808e-05, "loss": 0.3997, "step": 3525 }, { "epoch": 0.07648473555348514, "grad_norm": 1.1012346744537354, "learning_rate": 1.971270455900669e-05, "loss": 0.4155, "step": 3530 }, { "epoch": 0.076593070872966, "grad_norm": 1.1379159688949585, "learning_rate": 1.9711894047969245e-05, "loss": 0.3793, "step": 3535 }, { "epoch": 0.07670140619244686, "grad_norm": 1.3743621110916138, "learning_rate": 1.9711082411954358e-05, "loss": 0.3637, "step": 3540 }, { "epoch": 0.07680974151192772, "grad_norm": 1.0985093116760254, "learning_rate": 1.9710269651056047e-05, "loss": 0.3227, "step": 3545 }, { "epoch": 0.07691807683140858, "grad_norm": 1.8764840364456177, "learning_rate": 1.970945576536846e-05, "loss": 0.3364, "step": 3550 }, { "epoch": 0.07702641215088943, "grad_norm": 0.8986888527870178, "learning_rate": 1.9708640754985862e-05, "loss": 0.2706, "step": 3555 }, { "epoch": 0.07713474747037029, "grad_norm": 1.2737231254577637, "learning_rate": 1.9707824620002676e-05, "loss": 0.3645, "step": 3560 }, { "epoch": 0.07724308278985115, "grad_norm": 0.9719212651252747, "learning_rate": 1.970700736051343e-05, "loss": 0.3813, "step": 3565 }, { "epoch": 0.077351418109332, "grad_norm": 1.217044711112976, "learning_rate": 1.9706188976612788e-05, "loss": 0.402, "step": 3570 }, { "epoch": 0.07745975342881287, "grad_norm": 1.182393193244934, "learning_rate": 1.9705369468395553e-05, "loss": 0.3804, "step": 3575 }, { "epoch": 0.07756808874829371, "grad_norm": 1.1451818943023682, "learning_rate": 1.9704548835956655e-05, "loss": 0.437, "step": 3580 }, { "epoch": 0.07767642406777457, "grad_norm": 0.9547986388206482, "learning_rate": 1.9703727079391144e-05, "loss": 0.4357, "step": 3585 }, { "epoch": 0.07778475938725543, "grad_norm": 1.5197423696517944, "learning_rate": 1.9702904198794216e-05, "loss": 0.3389, "step": 3590 }, { "epoch": 0.07789309470673629, "grad_norm": 0.9130820035934448, "learning_rate": 1.9702080194261187e-05, "loss": 0.3816, "step": 3595 }, { "epoch": 0.07800143002621715, "grad_norm": 1.2845113277435303, "learning_rate": 1.9701255065887502e-05, "loss": 0.357, "step": 3600 }, { "epoch": 0.07810976534569801, "grad_norm": 1.279329538345337, "learning_rate": 1.9700428813768742e-05, "loss": 0.4509, "step": 3605 }, { "epoch": 0.07821810066517886, "grad_norm": 1.1815894842147827, "learning_rate": 1.9699601438000618e-05, "loss": 0.448, "step": 3610 }, { "epoch": 0.07832643598465971, "grad_norm": 1.4317653179168701, "learning_rate": 1.9698772938678966e-05, "loss": 0.4306, "step": 3615 }, { "epoch": 0.07843477130414057, "grad_norm": 1.014473557472229, "learning_rate": 1.9697943315899756e-05, "loss": 0.3882, "step": 3620 }, { "epoch": 0.07854310662362143, "grad_norm": 1.2720977067947388, "learning_rate": 1.9697112569759088e-05, "loss": 0.4034, "step": 3625 }, { "epoch": 0.0786514419431023, "grad_norm": 1.2668312788009644, "learning_rate": 1.9696280700353195e-05, "loss": 0.3396, "step": 3630 }, { "epoch": 0.07875977726258315, "grad_norm": 0.941707193851471, "learning_rate": 1.969544770777843e-05, "loss": 0.3492, "step": 3635 }, { "epoch": 0.078868112582064, "grad_norm": 1.165148138999939, "learning_rate": 1.9694613592131285e-05, "loss": 0.3371, "step": 3640 }, { "epoch": 0.07897644790154486, "grad_norm": 1.1324506998062134, "learning_rate": 1.9693778353508385e-05, "loss": 0.3559, "step": 3645 }, { "epoch": 0.07908478322102572, "grad_norm": 1.2107833623886108, "learning_rate": 1.9692941992006473e-05, "loss": 0.4135, "step": 3650 }, { "epoch": 0.07919311854050658, "grad_norm": 1.7561439275741577, "learning_rate": 1.9692104507722433e-05, "loss": 0.3413, "step": 3655 }, { "epoch": 0.07930145385998744, "grad_norm": 0.9582454562187195, "learning_rate": 1.969126590075327e-05, "loss": 0.3568, "step": 3660 }, { "epoch": 0.0794097891794683, "grad_norm": 1.0101673603057861, "learning_rate": 1.9690426171196128e-05, "loss": 0.3645, "step": 3665 }, { "epoch": 0.07951812449894914, "grad_norm": 1.192751407623291, "learning_rate": 1.968958531914828e-05, "loss": 0.4247, "step": 3670 }, { "epoch": 0.07962645981843, "grad_norm": 1.025607705116272, "learning_rate": 1.9688743344707123e-05, "loss": 0.4962, "step": 3675 }, { "epoch": 0.07973479513791086, "grad_norm": 1.2884777784347534, "learning_rate": 1.968790024797018e-05, "loss": 0.2869, "step": 3680 }, { "epoch": 0.07984313045739172, "grad_norm": 1.4362268447875977, "learning_rate": 1.9687056029035127e-05, "loss": 0.3529, "step": 3685 }, { "epoch": 0.07995146577687258, "grad_norm": 1.5331863164901733, "learning_rate": 1.9686210687999742e-05, "loss": 0.4673, "step": 3690 }, { "epoch": 0.08005980109635344, "grad_norm": 1.1870529651641846, "learning_rate": 1.9685364224961943e-05, "loss": 0.3186, "step": 3695 }, { "epoch": 0.08016813641583428, "grad_norm": 1.4408965110778809, "learning_rate": 1.9684516640019794e-05, "loss": 0.3527, "step": 3700 }, { "epoch": 0.08027647173531514, "grad_norm": 1.1136988401412964, "learning_rate": 1.968366793327146e-05, "loss": 0.336, "step": 3705 }, { "epoch": 0.080384807054796, "grad_norm": 1.2767032384872437, "learning_rate": 1.968281810481526e-05, "loss": 0.4068, "step": 3710 }, { "epoch": 0.08049314237427686, "grad_norm": 1.024316430091858, "learning_rate": 1.9681967154749628e-05, "loss": 0.4063, "step": 3715 }, { "epoch": 0.08060147769375772, "grad_norm": 1.6838806867599487, "learning_rate": 1.968111508317314e-05, "loss": 0.3987, "step": 3720 }, { "epoch": 0.08070981301323858, "grad_norm": 1.1364014148712158, "learning_rate": 1.968026189018449e-05, "loss": 0.3894, "step": 3725 }, { "epoch": 0.08081814833271943, "grad_norm": 0.9654539823532104, "learning_rate": 1.9679407575882514e-05, "loss": 0.3262, "step": 3730 }, { "epoch": 0.08092648365220029, "grad_norm": 1.0515985488891602, "learning_rate": 1.9678552140366167e-05, "loss": 0.3132, "step": 3735 }, { "epoch": 0.08103481897168115, "grad_norm": 1.5097886323928833, "learning_rate": 1.9677695583734537e-05, "loss": 0.3695, "step": 3740 }, { "epoch": 0.081143154291162, "grad_norm": 1.2032216787338257, "learning_rate": 1.9676837906086847e-05, "loss": 0.4949, "step": 3745 }, { "epoch": 0.08125148961064287, "grad_norm": 1.0033072233200073, "learning_rate": 1.9675979107522446e-05, "loss": 0.4211, "step": 3750 }, { "epoch": 0.08135982493012373, "grad_norm": 0.9870221018791199, "learning_rate": 1.967511918814081e-05, "loss": 0.4909, "step": 3755 }, { "epoch": 0.08146816024960457, "grad_norm": 1.2348523139953613, "learning_rate": 1.967425814804155e-05, "loss": 0.3986, "step": 3760 }, { "epoch": 0.08157649556908543, "grad_norm": 1.013107419013977, "learning_rate": 1.9673395987324402e-05, "loss": 0.2897, "step": 3765 }, { "epoch": 0.08168483088856629, "grad_norm": 1.6005858182907104, "learning_rate": 1.967253270608924e-05, "loss": 0.4053, "step": 3770 }, { "epoch": 0.08179316620804715, "grad_norm": 1.0279743671417236, "learning_rate": 1.9671668304436055e-05, "loss": 0.337, "step": 3775 }, { "epoch": 0.08190150152752801, "grad_norm": 1.4522134065628052, "learning_rate": 1.9670802782464978e-05, "loss": 0.3953, "step": 3780 }, { "epoch": 0.08200983684700887, "grad_norm": 1.2721210718154907, "learning_rate": 1.966993614027627e-05, "loss": 0.4226, "step": 3785 }, { "epoch": 0.08211817216648971, "grad_norm": 1.5169260501861572, "learning_rate": 1.9669068377970313e-05, "loss": 0.3949, "step": 3790 }, { "epoch": 0.08222650748597057, "grad_norm": 1.1391340494155884, "learning_rate": 1.9668199495647627e-05, "loss": 0.2992, "step": 3795 }, { "epoch": 0.08233484280545143, "grad_norm": 1.0552788972854614, "learning_rate": 1.9667329493408865e-05, "loss": 0.3729, "step": 3800 }, { "epoch": 0.08244317812493229, "grad_norm": 1.759002447128296, "learning_rate": 1.966645837135479e-05, "loss": 0.4024, "step": 3805 }, { "epoch": 0.08255151344441315, "grad_norm": 1.0496405363082886, "learning_rate": 1.966558612958632e-05, "loss": 0.3699, "step": 3810 }, { "epoch": 0.08265984876389401, "grad_norm": 1.1611920595169067, "learning_rate": 1.9664712768204488e-05, "loss": 0.4292, "step": 3815 }, { "epoch": 0.08276818408337486, "grad_norm": 1.4277451038360596, "learning_rate": 1.9663838287310456e-05, "loss": 0.3899, "step": 3820 }, { "epoch": 0.08287651940285572, "grad_norm": 1.180551528930664, "learning_rate": 1.966296268700553e-05, "loss": 0.4005, "step": 3825 }, { "epoch": 0.08298485472233658, "grad_norm": 1.3949768543243408, "learning_rate": 1.966208596739112e-05, "loss": 0.3604, "step": 3830 }, { "epoch": 0.08309319004181744, "grad_norm": 2.135932683944702, "learning_rate": 1.9661208128568793e-05, "loss": 0.3662, "step": 3835 }, { "epoch": 0.0832015253612983, "grad_norm": 1.257912039756775, "learning_rate": 1.9660329170640227e-05, "loss": 0.4497, "step": 3840 }, { "epoch": 0.08330986068077914, "grad_norm": 1.2616413831710815, "learning_rate": 1.965944909370724e-05, "loss": 0.3929, "step": 3845 }, { "epoch": 0.08341819600026, "grad_norm": 1.4817111492156982, "learning_rate": 1.965856789787177e-05, "loss": 0.4153, "step": 3850 }, { "epoch": 0.08352653131974086, "grad_norm": 1.0647437572479248, "learning_rate": 1.96576855832359e-05, "loss": 0.2905, "step": 3855 }, { "epoch": 0.08363486663922172, "grad_norm": 1.0346893072128296, "learning_rate": 1.9656802149901826e-05, "loss": 0.3493, "step": 3860 }, { "epoch": 0.08374320195870258, "grad_norm": 0.9849586486816406, "learning_rate": 1.965591759797188e-05, "loss": 0.2992, "step": 3865 }, { "epoch": 0.08385153727818344, "grad_norm": 1.5035645961761475, "learning_rate": 1.9655031927548527e-05, "loss": 0.3106, "step": 3870 }, { "epoch": 0.08395987259766428, "grad_norm": 1.3134766817092896, "learning_rate": 1.9654145138734356e-05, "loss": 0.3948, "step": 3875 }, { "epoch": 0.08406820791714514, "grad_norm": 1.1839547157287598, "learning_rate": 1.965325723163209e-05, "loss": 0.3836, "step": 3880 }, { "epoch": 0.084176543236626, "grad_norm": 0.7417203783988953, "learning_rate": 1.9652368206344582e-05, "loss": 0.3472, "step": 3885 }, { "epoch": 0.08428487855610686, "grad_norm": 0.9083147048950195, "learning_rate": 1.9651478062974805e-05, "loss": 0.4491, "step": 3890 }, { "epoch": 0.08439321387558772, "grad_norm": 1.4774885177612305, "learning_rate": 1.965058680162588e-05, "loss": 0.3934, "step": 3895 }, { "epoch": 0.08450154919506858, "grad_norm": 1.1290206909179688, "learning_rate": 1.9649694422401036e-05, "loss": 0.4233, "step": 3900 }, { "epoch": 0.08460988451454943, "grad_norm": 1.10462486743927, "learning_rate": 1.9648800925403645e-05, "loss": 0.4493, "step": 3905 }, { "epoch": 0.08471821983403029, "grad_norm": 1.2459752559661865, "learning_rate": 1.9647906310737206e-05, "loss": 0.3581, "step": 3910 }, { "epoch": 0.08482655515351115, "grad_norm": 1.290757656097412, "learning_rate": 1.964701057850535e-05, "loss": 0.3346, "step": 3915 }, { "epoch": 0.084934890472992, "grad_norm": 1.0892118215560913, "learning_rate": 1.9646113728811827e-05, "loss": 0.3624, "step": 3920 }, { "epoch": 0.08504322579247287, "grad_norm": 1.0563299655914307, "learning_rate": 1.9645215761760528e-05, "loss": 0.416, "step": 3925 }, { "epoch": 0.08515156111195372, "grad_norm": 1.0545005798339844, "learning_rate": 1.964431667745547e-05, "loss": 0.3853, "step": 3930 }, { "epoch": 0.08525989643143457, "grad_norm": 0.7483544945716858, "learning_rate": 1.9643416476000796e-05, "loss": 0.4084, "step": 3935 }, { "epoch": 0.08536823175091543, "grad_norm": 1.1998835802078247, "learning_rate": 1.964251515750078e-05, "loss": 0.4145, "step": 3940 }, { "epoch": 0.08547656707039629, "grad_norm": 0.9179049134254456, "learning_rate": 1.9641612722059827e-05, "loss": 0.3678, "step": 3945 }, { "epoch": 0.08558490238987715, "grad_norm": 1.0901994705200195, "learning_rate": 1.9640709169782473e-05, "loss": 0.3658, "step": 3950 }, { "epoch": 0.08569323770935801, "grad_norm": 1.1676990985870361, "learning_rate": 1.9639804500773382e-05, "loss": 0.4047, "step": 3955 }, { "epoch": 0.08580157302883887, "grad_norm": 1.4177378416061401, "learning_rate": 1.963889871513734e-05, "loss": 0.3647, "step": 3960 }, { "epoch": 0.08590990834831971, "grad_norm": 0.9889872074127197, "learning_rate": 1.9637991812979278e-05, "loss": 0.3701, "step": 3965 }, { "epoch": 0.08601824366780057, "grad_norm": 1.2022820711135864, "learning_rate": 1.9637083794404238e-05, "loss": 0.3564, "step": 3970 }, { "epoch": 0.08612657898728143, "grad_norm": 0.9854957461357117, "learning_rate": 1.9636174659517402e-05, "loss": 0.4337, "step": 3975 }, { "epoch": 0.08623491430676229, "grad_norm": 3.65543270111084, "learning_rate": 1.9635264408424084e-05, "loss": 0.4062, "step": 3980 }, { "epoch": 0.08634324962624315, "grad_norm": 1.2827547788619995, "learning_rate": 1.963435304122972e-05, "loss": 0.4761, "step": 3985 }, { "epoch": 0.08645158494572401, "grad_norm": 1.5806204080581665, "learning_rate": 1.963344055803988e-05, "loss": 0.4475, "step": 3990 }, { "epoch": 0.08655992026520486, "grad_norm": 1.1954008340835571, "learning_rate": 1.9632526958960257e-05, "loss": 0.4273, "step": 3995 }, { "epoch": 0.08666825558468572, "grad_norm": 1.6596771478652954, "learning_rate": 1.9631612244096684e-05, "loss": 0.4092, "step": 4000 }, { "epoch": 0.08677659090416658, "grad_norm": 1.1249966621398926, "learning_rate": 1.9630696413555113e-05, "loss": 0.3593, "step": 4005 }, { "epoch": 0.08688492622364744, "grad_norm": 1.3598008155822754, "learning_rate": 1.9629779467441633e-05, "loss": 0.4784, "step": 4010 }, { "epoch": 0.0869932615431283, "grad_norm": 1.3960472345352173, "learning_rate": 1.962886140586245e-05, "loss": 0.3566, "step": 4015 }, { "epoch": 0.08710159686260915, "grad_norm": 1.5517023801803589, "learning_rate": 1.962794222892392e-05, "loss": 0.4319, "step": 4020 }, { "epoch": 0.08720993218209, "grad_norm": 1.3008824586868286, "learning_rate": 1.96270219367325e-05, "loss": 0.3804, "step": 4025 }, { "epoch": 0.08731826750157086, "grad_norm": 1.3230003118515015, "learning_rate": 1.962610052939481e-05, "loss": 0.3609, "step": 4030 }, { "epoch": 0.08742660282105172, "grad_norm": 1.4045583009719849, "learning_rate": 1.9625178007017573e-05, "loss": 0.394, "step": 4035 }, { "epoch": 0.08753493814053258, "grad_norm": 0.945906937122345, "learning_rate": 1.9624254369707644e-05, "loss": 0.307, "step": 4040 }, { "epoch": 0.08764327346001344, "grad_norm": 1.22568678855896, "learning_rate": 1.962332961757202e-05, "loss": 0.4077, "step": 4045 }, { "epoch": 0.0877516087794943, "grad_norm": 1.3246421813964844, "learning_rate": 1.962240375071782e-05, "loss": 0.4035, "step": 4050 }, { "epoch": 0.08785994409897514, "grad_norm": 1.2881847620010376, "learning_rate": 1.9621476769252283e-05, "loss": 0.4784, "step": 4055 }, { "epoch": 0.087968279418456, "grad_norm": 1.4005892276763916, "learning_rate": 1.9620548673282794e-05, "loss": 0.3855, "step": 4060 }, { "epoch": 0.08807661473793686, "grad_norm": 1.5028802156448364, "learning_rate": 1.9619619462916858e-05, "loss": 0.4556, "step": 4065 }, { "epoch": 0.08818495005741772, "grad_norm": 1.0775099992752075, "learning_rate": 1.9618689138262112e-05, "loss": 0.3582, "step": 4070 }, { "epoch": 0.08829328537689858, "grad_norm": 0.7439400553703308, "learning_rate": 1.9617757699426315e-05, "loss": 0.428, "step": 4075 }, { "epoch": 0.08840162069637944, "grad_norm": 1.1456583738327026, "learning_rate": 1.961682514651736e-05, "loss": 0.4249, "step": 4080 }, { "epoch": 0.08850995601586029, "grad_norm": 1.1970908641815186, "learning_rate": 1.9615891479643274e-05, "loss": 0.3912, "step": 4085 }, { "epoch": 0.08861829133534115, "grad_norm": 1.2043359279632568, "learning_rate": 1.9614956698912205e-05, "loss": 0.3677, "step": 4090 }, { "epoch": 0.088726626654822, "grad_norm": 1.2054431438446045, "learning_rate": 1.9614020804432435e-05, "loss": 0.4114, "step": 4095 }, { "epoch": 0.08883496197430286, "grad_norm": 1.1697070598602295, "learning_rate": 1.9613083796312374e-05, "loss": 0.3958, "step": 4100 }, { "epoch": 0.08894329729378372, "grad_norm": 1.223549485206604, "learning_rate": 1.961214567466056e-05, "loss": 0.3335, "step": 4105 }, { "epoch": 0.08905163261326457, "grad_norm": 1.0770491361618042, "learning_rate": 1.9611206439585657e-05, "loss": 0.3892, "step": 4110 }, { "epoch": 0.08915996793274543, "grad_norm": 1.2206414937973022, "learning_rate": 1.9610266091196464e-05, "loss": 0.3656, "step": 4115 }, { "epoch": 0.08926830325222629, "grad_norm": 0.7941572070121765, "learning_rate": 1.9609324629601908e-05, "loss": 0.4041, "step": 4120 }, { "epoch": 0.08937663857170715, "grad_norm": 0.8616718053817749, "learning_rate": 1.9608382054911043e-05, "loss": 0.4176, "step": 4125 }, { "epoch": 0.08948497389118801, "grad_norm": 1.0947825908660889, "learning_rate": 1.9607438367233044e-05, "loss": 0.3927, "step": 4130 }, { "epoch": 0.08959330921066887, "grad_norm": 1.2013540267944336, "learning_rate": 1.9606493566677236e-05, "loss": 0.4315, "step": 4135 }, { "epoch": 0.08970164453014971, "grad_norm": 1.3193316459655762, "learning_rate": 1.960554765335305e-05, "loss": 0.504, "step": 4140 }, { "epoch": 0.08980997984963057, "grad_norm": 1.387759804725647, "learning_rate": 1.960460062737006e-05, "loss": 0.5352, "step": 4145 }, { "epoch": 0.08991831516911143, "grad_norm": 0.7658320665359497, "learning_rate": 1.9603652488837963e-05, "loss": 0.389, "step": 4150 }, { "epoch": 0.09002665048859229, "grad_norm": 1.2668801546096802, "learning_rate": 1.960270323786659e-05, "loss": 0.3945, "step": 4155 }, { "epoch": 0.09013498580807315, "grad_norm": 0.8084719777107239, "learning_rate": 1.9601752874565897e-05, "loss": 0.3304, "step": 4160 }, { "epoch": 0.09024332112755401, "grad_norm": 0.8878810405731201, "learning_rate": 1.9600801399045966e-05, "loss": 0.3846, "step": 4165 }, { "epoch": 0.09035165644703486, "grad_norm": 0.8977913856506348, "learning_rate": 1.959984881141701e-05, "loss": 0.3406, "step": 4170 }, { "epoch": 0.09045999176651572, "grad_norm": 1.5146002769470215, "learning_rate": 1.9598895111789378e-05, "loss": 0.3933, "step": 4175 }, { "epoch": 0.09056832708599657, "grad_norm": 1.2287758588790894, "learning_rate": 1.9597940300273536e-05, "loss": 0.374, "step": 4180 }, { "epoch": 0.09067666240547743, "grad_norm": 1.3263790607452393, "learning_rate": 1.959698437698009e-05, "loss": 0.3355, "step": 4185 }, { "epoch": 0.0907849977249583, "grad_norm": 1.3896645307540894, "learning_rate": 1.959602734201977e-05, "loss": 0.3952, "step": 4190 }, { "epoch": 0.09089333304443915, "grad_norm": 1.155259609222412, "learning_rate": 1.9595069195503424e-05, "loss": 0.3787, "step": 4195 }, { "epoch": 0.09100166836392, "grad_norm": 1.174607515335083, "learning_rate": 1.959410993754205e-05, "loss": 0.3474, "step": 4200 }, { "epoch": 0.09111000368340086, "grad_norm": 1.2289972305297852, "learning_rate": 1.959314956824676e-05, "loss": 0.3212, "step": 4205 }, { "epoch": 0.09121833900288172, "grad_norm": 1.7381138801574707, "learning_rate": 1.9592188087728794e-05, "loss": 0.4292, "step": 4210 }, { "epoch": 0.09132667432236258, "grad_norm": 1.267330288887024, "learning_rate": 1.959122549609953e-05, "loss": 0.4124, "step": 4215 }, { "epoch": 0.09143500964184344, "grad_norm": 1.0339354276657104, "learning_rate": 1.9590261793470474e-05, "loss": 0.433, "step": 4220 }, { "epoch": 0.0915433449613243, "grad_norm": 1.0153870582580566, "learning_rate": 1.9589296979953248e-05, "loss": 0.4094, "step": 4225 }, { "epoch": 0.09165168028080514, "grad_norm": 1.1968287229537964, "learning_rate": 1.9588331055659614e-05, "loss": 0.3835, "step": 4230 }, { "epoch": 0.091760015600286, "grad_norm": 1.2067933082580566, "learning_rate": 1.9587364020701458e-05, "loss": 0.307, "step": 4235 }, { "epoch": 0.09186835091976686, "grad_norm": 1.3052622079849243, "learning_rate": 1.95863958751908e-05, "loss": 0.4386, "step": 4240 }, { "epoch": 0.09197668623924772, "grad_norm": 1.1747773885726929, "learning_rate": 1.958542661923979e-05, "loss": 0.3915, "step": 4245 }, { "epoch": 0.09208502155872858, "grad_norm": 1.7386921644210815, "learning_rate": 1.958445625296069e-05, "loss": 0.4265, "step": 4250 }, { "epoch": 0.09219335687820944, "grad_norm": 1.1160526275634766, "learning_rate": 1.958348477646591e-05, "loss": 0.3693, "step": 4255 }, { "epoch": 0.09230169219769029, "grad_norm": 1.3982429504394531, "learning_rate": 1.958251218986798e-05, "loss": 0.3211, "step": 4260 }, { "epoch": 0.09241002751717114, "grad_norm": 1.2059887647628784, "learning_rate": 1.9581538493279553e-05, "loss": 0.2549, "step": 4265 }, { "epoch": 0.092518362836652, "grad_norm": 1.2881962060928345, "learning_rate": 1.958056368681343e-05, "loss": 0.3896, "step": 4270 }, { "epoch": 0.09262669815613286, "grad_norm": 1.2872347831726074, "learning_rate": 1.9579587770582517e-05, "loss": 0.3873, "step": 4275 }, { "epoch": 0.09273503347561372, "grad_norm": 1.3229944705963135, "learning_rate": 1.9578610744699868e-05, "loss": 0.4445, "step": 4280 }, { "epoch": 0.09284336879509458, "grad_norm": 0.9801967144012451, "learning_rate": 1.9577632609278644e-05, "loss": 0.4127, "step": 4285 }, { "epoch": 0.09295170411457543, "grad_norm": 0.9314708113670349, "learning_rate": 1.957665336443216e-05, "loss": 0.4458, "step": 4290 }, { "epoch": 0.09306003943405629, "grad_norm": 1.142872929573059, "learning_rate": 1.957567301027384e-05, "loss": 0.266, "step": 4295 }, { "epoch": 0.09316837475353715, "grad_norm": 1.07169508934021, "learning_rate": 1.9574691546917247e-05, "loss": 0.3303, "step": 4300 }, { "epoch": 0.09327671007301801, "grad_norm": 1.2839175462722778, "learning_rate": 1.9573708974476068e-05, "loss": 0.4479, "step": 4305 }, { "epoch": 0.09338504539249887, "grad_norm": 0.9478782415390015, "learning_rate": 1.9572725293064118e-05, "loss": 0.3594, "step": 4310 }, { "epoch": 0.09349338071197973, "grad_norm": 1.4425170421600342, "learning_rate": 1.957174050279534e-05, "loss": 0.4332, "step": 4315 }, { "epoch": 0.09360171603146057, "grad_norm": 1.1201879978179932, "learning_rate": 1.957075460378381e-05, "loss": 0.3821, "step": 4320 }, { "epoch": 0.09371005135094143, "grad_norm": 1.3060563802719116, "learning_rate": 1.956976759614373e-05, "loss": 0.4804, "step": 4325 }, { "epoch": 0.09381838667042229, "grad_norm": 0.8034870028495789, "learning_rate": 1.956877947998943e-05, "loss": 0.2687, "step": 4330 }, { "epoch": 0.09392672198990315, "grad_norm": 1.1342018842697144, "learning_rate": 1.9567790255435367e-05, "loss": 0.3234, "step": 4335 }, { "epoch": 0.09403505730938401, "grad_norm": 1.142920970916748, "learning_rate": 1.9566799922596127e-05, "loss": 0.3787, "step": 4340 }, { "epoch": 0.09414339262886487, "grad_norm": 1.5363693237304688, "learning_rate": 1.9565808481586427e-05, "loss": 0.4236, "step": 4345 }, { "epoch": 0.09425172794834571, "grad_norm": 1.5167816877365112, "learning_rate": 1.9564815932521113e-05, "loss": 0.3233, "step": 4350 }, { "epoch": 0.09436006326782657, "grad_norm": 1.3309780359268188, "learning_rate": 1.9563822275515153e-05, "loss": 0.4317, "step": 4355 }, { "epoch": 0.09446839858730743, "grad_norm": 0.9685962796211243, "learning_rate": 1.9562827510683644e-05, "loss": 0.316, "step": 4360 }, { "epoch": 0.0945767339067883, "grad_norm": 1.2371504306793213, "learning_rate": 1.9561831638141823e-05, "loss": 0.2973, "step": 4365 }, { "epoch": 0.09468506922626915, "grad_norm": 1.034538984298706, "learning_rate": 1.956083465800504e-05, "loss": 0.3244, "step": 4370 }, { "epoch": 0.09479340454575, "grad_norm": 0.9255212545394897, "learning_rate": 1.955983657038879e-05, "loss": 0.2909, "step": 4375 }, { "epoch": 0.09490173986523086, "grad_norm": 1.5400609970092773, "learning_rate": 1.9558837375408673e-05, "loss": 0.4074, "step": 4380 }, { "epoch": 0.09501007518471172, "grad_norm": 1.1553335189819336, "learning_rate": 1.955783707318044e-05, "loss": 0.3261, "step": 4385 }, { "epoch": 0.09511841050419258, "grad_norm": 1.2631937265396118, "learning_rate": 1.9556835663819956e-05, "loss": 0.3729, "step": 4390 }, { "epoch": 0.09522674582367344, "grad_norm": 0.9635251760482788, "learning_rate": 1.9555833147443225e-05, "loss": 0.3695, "step": 4395 }, { "epoch": 0.0953350811431543, "grad_norm": 1.292364239692688, "learning_rate": 1.955482952416637e-05, "loss": 0.3622, "step": 4400 }, { "epoch": 0.09544341646263514, "grad_norm": 1.5330755710601807, "learning_rate": 1.9553824794105644e-05, "loss": 0.5413, "step": 4405 }, { "epoch": 0.095551751782116, "grad_norm": 1.1094893217086792, "learning_rate": 1.9552818957377433e-05, "loss": 0.4226, "step": 4410 }, { "epoch": 0.09566008710159686, "grad_norm": 1.214246392250061, "learning_rate": 1.9551812014098244e-05, "loss": 0.3665, "step": 4415 }, { "epoch": 0.09576842242107772, "grad_norm": 1.0480891466140747, "learning_rate": 1.9550803964384725e-05, "loss": 0.3955, "step": 4420 }, { "epoch": 0.09587675774055858, "grad_norm": 1.3797378540039062, "learning_rate": 1.9549794808353636e-05, "loss": 0.4585, "step": 4425 }, { "epoch": 0.09598509306003944, "grad_norm": 1.202212929725647, "learning_rate": 1.954878454612187e-05, "loss": 0.4604, "step": 4430 }, { "epoch": 0.09609342837952028, "grad_norm": 1.1731823682785034, "learning_rate": 1.9547773177806458e-05, "loss": 0.3623, "step": 4435 }, { "epoch": 0.09620176369900114, "grad_norm": 1.0321940183639526, "learning_rate": 1.954676070352455e-05, "loss": 0.4008, "step": 4440 }, { "epoch": 0.096310099018482, "grad_norm": 1.1535675525665283, "learning_rate": 1.954574712339343e-05, "loss": 0.2937, "step": 4445 }, { "epoch": 0.09641843433796286, "grad_norm": 1.1795470714569092, "learning_rate": 1.9544732437530492e-05, "loss": 0.4693, "step": 4450 }, { "epoch": 0.09652676965744372, "grad_norm": 1.6312172412872314, "learning_rate": 1.9543716646053284e-05, "loss": 0.4086, "step": 4455 }, { "epoch": 0.09663510497692458, "grad_norm": 0.6435692310333252, "learning_rate": 1.954269974907947e-05, "loss": 0.2729, "step": 4460 }, { "epoch": 0.09674344029640543, "grad_norm": 1.038539171218872, "learning_rate": 1.9541681746726836e-05, "loss": 0.411, "step": 4465 }, { "epoch": 0.09685177561588629, "grad_norm": 1.176721453666687, "learning_rate": 1.9540662639113308e-05, "loss": 0.2493, "step": 4470 }, { "epoch": 0.09696011093536715, "grad_norm": 1.3171398639678955, "learning_rate": 1.953964242635693e-05, "loss": 0.4787, "step": 4475 }, { "epoch": 0.097068446254848, "grad_norm": 1.2728848457336426, "learning_rate": 1.9538621108575885e-05, "loss": 0.4211, "step": 4480 }, { "epoch": 0.09717678157432887, "grad_norm": 1.3330787420272827, "learning_rate": 1.953759868588847e-05, "loss": 0.4042, "step": 4485 }, { "epoch": 0.09728511689380973, "grad_norm": 0.7702111005783081, "learning_rate": 1.9536575158413122e-05, "loss": 0.4019, "step": 4490 }, { "epoch": 0.09739345221329057, "grad_norm": 1.378275990486145, "learning_rate": 1.95355505262684e-05, "loss": 0.379, "step": 4495 }, { "epoch": 0.09750178753277143, "grad_norm": 0.7106673121452332, "learning_rate": 1.953452478957299e-05, "loss": 0.3132, "step": 4500 }, { "epoch": 0.09761012285225229, "grad_norm": 0.9397929906845093, "learning_rate": 1.9533497948445712e-05, "loss": 0.3385, "step": 4505 }, { "epoch": 0.09771845817173315, "grad_norm": 1.289496660232544, "learning_rate": 1.9532470003005508e-05, "loss": 0.3964, "step": 4510 }, { "epoch": 0.09782679349121401, "grad_norm": 0.8697097897529602, "learning_rate": 1.953144095337145e-05, "loss": 0.4182, "step": 4515 }, { "epoch": 0.09793512881069487, "grad_norm": 1.1126161813735962, "learning_rate": 1.953041079966274e-05, "loss": 0.4443, "step": 4520 }, { "epoch": 0.09804346413017571, "grad_norm": 1.5025838613510132, "learning_rate": 1.9529379541998703e-05, "loss": 0.3485, "step": 4525 }, { "epoch": 0.09815179944965657, "grad_norm": 1.11224365234375, "learning_rate": 1.95283471804988e-05, "loss": 0.3667, "step": 4530 }, { "epoch": 0.09826013476913743, "grad_norm": 1.2440526485443115, "learning_rate": 1.9527313715282604e-05, "loss": 0.3054, "step": 4535 }, { "epoch": 0.09836847008861829, "grad_norm": 1.616292953491211, "learning_rate": 1.9526279146469837e-05, "loss": 0.3896, "step": 4540 }, { "epoch": 0.09847680540809915, "grad_norm": 1.9213906526565552, "learning_rate": 1.9525243474180335e-05, "loss": 0.391, "step": 4545 }, { "epoch": 0.09858514072758001, "grad_norm": 1.358481764793396, "learning_rate": 1.9524206698534063e-05, "loss": 0.4629, "step": 4550 }, { "epoch": 0.09869347604706086, "grad_norm": 1.1274127960205078, "learning_rate": 1.952316881965112e-05, "loss": 0.4252, "step": 4555 }, { "epoch": 0.09880181136654172, "grad_norm": 1.5168495178222656, "learning_rate": 1.9522129837651725e-05, "loss": 0.364, "step": 4560 }, { "epoch": 0.09891014668602258, "grad_norm": 1.5198248624801636, "learning_rate": 1.952108975265623e-05, "loss": 0.3768, "step": 4565 }, { "epoch": 0.09901848200550344, "grad_norm": 0.833404541015625, "learning_rate": 1.952004856478511e-05, "loss": 0.3239, "step": 4570 }, { "epoch": 0.0991268173249843, "grad_norm": 0.9821022152900696, "learning_rate": 1.9519006274158978e-05, "loss": 0.4348, "step": 4575 }, { "epoch": 0.09923515264446515, "grad_norm": 0.7074096202850342, "learning_rate": 1.9517962880898562e-05, "loss": 0.4434, "step": 4580 }, { "epoch": 0.099343487963946, "grad_norm": 1.1470550298690796, "learning_rate": 1.9516918385124724e-05, "loss": 0.4344, "step": 4585 }, { "epoch": 0.09945182328342686, "grad_norm": 1.2840991020202637, "learning_rate": 1.9515872786958458e-05, "loss": 0.599, "step": 4590 }, { "epoch": 0.09956015860290772, "grad_norm": 1.6077078580856323, "learning_rate": 1.9514826086520874e-05, "loss": 0.3743, "step": 4595 }, { "epoch": 0.09966849392238858, "grad_norm": 1.4577490091323853, "learning_rate": 1.9513778283933226e-05, "loss": 0.3933, "step": 4600 }, { "epoch": 0.09977682924186944, "grad_norm": 1.2111834287643433, "learning_rate": 1.9512729379316875e-05, "loss": 0.3187, "step": 4605 }, { "epoch": 0.0998851645613503, "grad_norm": 0.9801928997039795, "learning_rate": 1.9511679372793326e-05, "loss": 0.367, "step": 4610 }, { "epoch": 0.09999349988083114, "grad_norm": 1.2288010120391846, "learning_rate": 1.9510628264484207e-05, "loss": 0.4225, "step": 4615 }, { "epoch": 0.100101835200312, "grad_norm": 1.0677763223648071, "learning_rate": 1.9509576054511274e-05, "loss": 0.4449, "step": 4620 }, { "epoch": 0.10021017051979286, "grad_norm": 1.1943830251693726, "learning_rate": 1.9508522742996408e-05, "loss": 0.2997, "step": 4625 }, { "epoch": 0.10031850583927372, "grad_norm": 1.334741234779358, "learning_rate": 1.950746833006162e-05, "loss": 0.5095, "step": 4630 }, { "epoch": 0.10042684115875458, "grad_norm": 1.4240647554397583, "learning_rate": 1.950641281582905e-05, "loss": 0.3456, "step": 4635 }, { "epoch": 0.10053517647823544, "grad_norm": 0.9668065309524536, "learning_rate": 1.9505356200420956e-05, "loss": 0.3186, "step": 4640 }, { "epoch": 0.10064351179771629, "grad_norm": 1.1408915519714355, "learning_rate": 1.950429848395974e-05, "loss": 0.3352, "step": 4645 }, { "epoch": 0.10075184711719715, "grad_norm": 1.5712425708770752, "learning_rate": 1.9503239666567916e-05, "loss": 0.3052, "step": 4650 }, { "epoch": 0.100860182436678, "grad_norm": 0.8596181869506836, "learning_rate": 1.950217974836814e-05, "loss": 0.3922, "step": 4655 }, { "epoch": 0.10096851775615887, "grad_norm": 1.2255077362060547, "learning_rate": 1.950111872948318e-05, "loss": 0.4648, "step": 4660 }, { "epoch": 0.10107685307563972, "grad_norm": 1.2689309120178223, "learning_rate": 1.9500056610035944e-05, "loss": 0.3792, "step": 4665 }, { "epoch": 0.10118518839512057, "grad_norm": 1.1986000537872314, "learning_rate": 1.949899339014946e-05, "loss": 0.2883, "step": 4670 }, { "epoch": 0.10129352371460143, "grad_norm": 1.4144368171691895, "learning_rate": 1.9497929069946884e-05, "loss": 0.3125, "step": 4675 }, { "epoch": 0.10140185903408229, "grad_norm": 1.3229330778121948, "learning_rate": 1.9496863649551507e-05, "loss": 0.3312, "step": 4680 }, { "epoch": 0.10151019435356315, "grad_norm": 1.697083592414856, "learning_rate": 1.9495797129086736e-05, "loss": 0.3659, "step": 4685 }, { "epoch": 0.10161852967304401, "grad_norm": 1.1315674781799316, "learning_rate": 1.9494729508676117e-05, "loss": 0.3342, "step": 4690 }, { "epoch": 0.10172686499252487, "grad_norm": 1.6078869104385376, "learning_rate": 1.9493660788443312e-05, "loss": 0.48, "step": 4695 }, { "epoch": 0.10183520031200571, "grad_norm": 1.256417155265808, "learning_rate": 1.9492590968512125e-05, "loss": 0.3633, "step": 4700 }, { "epoch": 0.10194353563148657, "grad_norm": 0.9591279625892639, "learning_rate": 1.949152004900647e-05, "loss": 0.4021, "step": 4705 }, { "epoch": 0.10205187095096743, "grad_norm": 1.3695883750915527, "learning_rate": 1.94904480300504e-05, "loss": 0.3573, "step": 4710 }, { "epoch": 0.10216020627044829, "grad_norm": 1.2765024900436401, "learning_rate": 1.9489374911768097e-05, "loss": 0.4256, "step": 4715 }, { "epoch": 0.10226854158992915, "grad_norm": 0.9663264155387878, "learning_rate": 1.9488300694283858e-05, "loss": 0.4155, "step": 4720 }, { "epoch": 0.10237687690941001, "grad_norm": 1.2139972448349, "learning_rate": 1.948722537772212e-05, "loss": 0.3119, "step": 4725 }, { "epoch": 0.10248521222889086, "grad_norm": 1.3517833948135376, "learning_rate": 1.9486148962207437e-05, "loss": 0.4146, "step": 4730 }, { "epoch": 0.10259354754837172, "grad_norm": 1.6692355871200562, "learning_rate": 1.94850714478645e-05, "loss": 0.4571, "step": 4735 }, { "epoch": 0.10270188286785258, "grad_norm": 1.6209861040115356, "learning_rate": 1.9483992834818126e-05, "loss": 0.4028, "step": 4740 }, { "epoch": 0.10281021818733344, "grad_norm": 0.9997550249099731, "learning_rate": 1.948291312319325e-05, "loss": 0.3995, "step": 4745 }, { "epoch": 0.1029185535068143, "grad_norm": 1.196950912475586, "learning_rate": 1.9481832313114943e-05, "loss": 0.4708, "step": 4750 }, { "epoch": 0.10302688882629515, "grad_norm": 1.1994768381118774, "learning_rate": 1.94807504047084e-05, "loss": 0.2952, "step": 4755 }, { "epoch": 0.103135224145776, "grad_norm": 2.244211196899414, "learning_rate": 1.947966739809894e-05, "loss": 0.3822, "step": 4760 }, { "epoch": 0.10324355946525686, "grad_norm": 1.1490862369537354, "learning_rate": 1.9478583293412024e-05, "loss": 0.4969, "step": 4765 }, { "epoch": 0.10335189478473772, "grad_norm": 0.9491375684738159, "learning_rate": 1.947749809077322e-05, "loss": 0.308, "step": 4770 }, { "epoch": 0.10346023010421858, "grad_norm": 1.5222408771514893, "learning_rate": 1.9476411790308237e-05, "loss": 0.3417, "step": 4775 }, { "epoch": 0.10356856542369944, "grad_norm": 1.4236810207366943, "learning_rate": 1.94753243921429e-05, "loss": 0.3537, "step": 4780 }, { "epoch": 0.1036769007431803, "grad_norm": 1.4345446825027466, "learning_rate": 1.9474235896403177e-05, "loss": 0.3619, "step": 4785 }, { "epoch": 0.10378523606266114, "grad_norm": 1.4420177936553955, "learning_rate": 1.9473146303215146e-05, "loss": 0.3886, "step": 4790 }, { "epoch": 0.103893571382142, "grad_norm": 1.2754989862442017, "learning_rate": 1.9472055612705026e-05, "loss": 0.3901, "step": 4795 }, { "epoch": 0.10400190670162286, "grad_norm": 1.541136622428894, "learning_rate": 1.9470963824999153e-05, "loss": 0.4366, "step": 4800 }, { "epoch": 0.10411024202110372, "grad_norm": 1.2027157545089722, "learning_rate": 1.9469870940224e-05, "loss": 0.3859, "step": 4805 }, { "epoch": 0.10421857734058458, "grad_norm": 1.243912935256958, "learning_rate": 1.9468776958506152e-05, "loss": 0.3331, "step": 4810 }, { "epoch": 0.10432691266006544, "grad_norm": 1.3068673610687256, "learning_rate": 1.946768187997234e-05, "loss": 0.3798, "step": 4815 }, { "epoch": 0.10443524797954629, "grad_norm": 0.7214394807815552, "learning_rate": 1.9466585704749406e-05, "loss": 0.4764, "step": 4820 }, { "epoch": 0.10454358329902715, "grad_norm": 1.1296573877334595, "learning_rate": 1.9465488432964326e-05, "loss": 0.3321, "step": 4825 }, { "epoch": 0.104651918618508, "grad_norm": 1.4310539960861206, "learning_rate": 1.9464390064744206e-05, "loss": 0.4001, "step": 4830 }, { "epoch": 0.10476025393798886, "grad_norm": 1.0659269094467163, "learning_rate": 1.9463290600216274e-05, "loss": 0.4603, "step": 4835 }, { "epoch": 0.10486858925746972, "grad_norm": 1.1671820878982544, "learning_rate": 1.9462190039507884e-05, "loss": 0.4484, "step": 4840 }, { "epoch": 0.10497692457695058, "grad_norm": 1.6691430807113647, "learning_rate": 1.946108838274652e-05, "loss": 0.4156, "step": 4845 }, { "epoch": 0.10508525989643143, "grad_norm": 1.6497533321380615, "learning_rate": 1.9459985630059795e-05, "loss": 0.4223, "step": 4850 }, { "epoch": 0.10519359521591229, "grad_norm": 1.2830666303634644, "learning_rate": 1.945888178157545e-05, "loss": 0.4274, "step": 4855 }, { "epoch": 0.10530193053539315, "grad_norm": 0.8882331252098083, "learning_rate": 1.9457776837421342e-05, "loss": 0.3445, "step": 4860 }, { "epoch": 0.10541026585487401, "grad_norm": 1.1274100542068481, "learning_rate": 1.9456670797725463e-05, "loss": 0.3628, "step": 4865 }, { "epoch": 0.10551860117435487, "grad_norm": 1.1462095975875854, "learning_rate": 1.9455563662615934e-05, "loss": 0.4172, "step": 4870 }, { "epoch": 0.10562693649383573, "grad_norm": 1.1799477338790894, "learning_rate": 1.9454455432221003e-05, "loss": 0.3364, "step": 4875 }, { "epoch": 0.10573527181331657, "grad_norm": 1.5469187498092651, "learning_rate": 1.9453346106669032e-05, "loss": 0.4038, "step": 4880 }, { "epoch": 0.10584360713279743, "grad_norm": 1.1472665071487427, "learning_rate": 1.945223568608853e-05, "loss": 0.5267, "step": 4885 }, { "epoch": 0.10595194245227829, "grad_norm": 1.2314555644989014, "learning_rate": 1.9451124170608112e-05, "loss": 0.4257, "step": 4890 }, { "epoch": 0.10606027777175915, "grad_norm": 1.568852424621582, "learning_rate": 1.9450011560356542e-05, "loss": 0.3429, "step": 4895 }, { "epoch": 0.10616861309124001, "grad_norm": 1.1765899658203125, "learning_rate": 1.9448897855462695e-05, "loss": 0.3271, "step": 4900 }, { "epoch": 0.10627694841072087, "grad_norm": 1.143932819366455, "learning_rate": 1.9447783056055574e-05, "loss": 0.3803, "step": 4905 }, { "epoch": 0.10638528373020172, "grad_norm": 1.318422794342041, "learning_rate": 1.9446667162264314e-05, "loss": 0.3552, "step": 4910 }, { "epoch": 0.10649361904968258, "grad_norm": 0.7440243363380432, "learning_rate": 1.9445550174218172e-05, "loss": 0.3036, "step": 4915 }, { "epoch": 0.10660195436916343, "grad_norm": 1.4195634126663208, "learning_rate": 1.944443209204654e-05, "loss": 0.4139, "step": 4920 }, { "epoch": 0.1067102896886443, "grad_norm": 1.211711049079895, "learning_rate": 1.9443312915878925e-05, "loss": 0.4366, "step": 4925 }, { "epoch": 0.10681862500812515, "grad_norm": 1.1694635152816772, "learning_rate": 1.944219264584497e-05, "loss": 0.3823, "step": 4930 }, { "epoch": 0.106926960327606, "grad_norm": 1.3024725914001465, "learning_rate": 1.9441071282074443e-05, "loss": 0.3575, "step": 4935 }, { "epoch": 0.10703529564708686, "grad_norm": 1.1584150791168213, "learning_rate": 1.9439948824697232e-05, "loss": 0.4684, "step": 4940 }, { "epoch": 0.10714363096656772, "grad_norm": 1.2098791599273682, "learning_rate": 1.9438825273843363e-05, "loss": 0.39, "step": 4945 }, { "epoch": 0.10725196628604858, "grad_norm": 1.3306530714035034, "learning_rate": 1.9437700629642977e-05, "loss": 0.374, "step": 4950 }, { "epoch": 0.10736030160552944, "grad_norm": 1.273998498916626, "learning_rate": 1.9436574892226355e-05, "loss": 0.313, "step": 4955 }, { "epoch": 0.1074686369250103, "grad_norm": 1.3917303085327148, "learning_rate": 1.943544806172389e-05, "loss": 0.4332, "step": 4960 }, { "epoch": 0.10757697224449114, "grad_norm": 0.9747551083564758, "learning_rate": 1.9434320138266103e-05, "loss": 0.4587, "step": 4965 }, { "epoch": 0.107685307563972, "grad_norm": 1.4247477054595947, "learning_rate": 1.9433191121983662e-05, "loss": 0.3441, "step": 4970 }, { "epoch": 0.10779364288345286, "grad_norm": 1.387208104133606, "learning_rate": 1.9432061013007333e-05, "loss": 0.4002, "step": 4975 }, { "epoch": 0.10790197820293372, "grad_norm": 1.1625107526779175, "learning_rate": 1.9430929811468033e-05, "loss": 0.3084, "step": 4980 }, { "epoch": 0.10801031352241458, "grad_norm": 1.5144050121307373, "learning_rate": 1.9429797517496785e-05, "loss": 0.4484, "step": 4985 }, { "epoch": 0.10811864884189544, "grad_norm": 1.2447617053985596, "learning_rate": 1.9428664131224754e-05, "loss": 0.3939, "step": 4990 }, { "epoch": 0.10822698416137629, "grad_norm": 1.0492956638336182, "learning_rate": 1.942752965278323e-05, "loss": 0.5223, "step": 4995 }, { "epoch": 0.10833531948085715, "grad_norm": 1.1721203327178955, "learning_rate": 1.9426394082303614e-05, "loss": 0.3535, "step": 5000 }, { "epoch": 0.108443654800338, "grad_norm": 1.084883213043213, "learning_rate": 1.9425257419917455e-05, "loss": 0.3922, "step": 5005 }, { "epoch": 0.10855199011981886, "grad_norm": 1.8625476360321045, "learning_rate": 1.9424119665756412e-05, "loss": 0.2948, "step": 5010 }, { "epoch": 0.10866032543929972, "grad_norm": 1.38339364528656, "learning_rate": 1.942298081995228e-05, "loss": 0.3731, "step": 5015 }, { "epoch": 0.10876866075878058, "grad_norm": 1.5167311429977417, "learning_rate": 1.9421840882636975e-05, "loss": 0.3914, "step": 5020 }, { "epoch": 0.10887699607826143, "grad_norm": 1.4952630996704102, "learning_rate": 1.9420699853942543e-05, "loss": 0.3075, "step": 5025 }, { "epoch": 0.10898533139774229, "grad_norm": 1.5656189918518066, "learning_rate": 1.9419557734001156e-05, "loss": 0.3435, "step": 5030 }, { "epoch": 0.10909366671722315, "grad_norm": 1.104972243309021, "learning_rate": 1.941841452294511e-05, "loss": 0.4008, "step": 5035 }, { "epoch": 0.10920200203670401, "grad_norm": 1.1634396314620972, "learning_rate": 1.941727022090683e-05, "loss": 0.4437, "step": 5040 }, { "epoch": 0.10931033735618487, "grad_norm": 1.293220043182373, "learning_rate": 1.9416124828018864e-05, "loss": 0.3497, "step": 5045 }, { "epoch": 0.10941867267566573, "grad_norm": 1.2805070877075195, "learning_rate": 1.941497834441389e-05, "loss": 0.3688, "step": 5050 }, { "epoch": 0.10952700799514657, "grad_norm": 1.373594880104065, "learning_rate": 1.9413830770224717e-05, "loss": 0.4272, "step": 5055 }, { "epoch": 0.10963534331462743, "grad_norm": 0.9062872529029846, "learning_rate": 1.941268210558426e-05, "loss": 0.4191, "step": 5060 }, { "epoch": 0.10974367863410829, "grad_norm": 1.3490540981292725, "learning_rate": 1.9411532350625588e-05, "loss": 0.4932, "step": 5065 }, { "epoch": 0.10985201395358915, "grad_norm": 1.101868748664856, "learning_rate": 1.9410381505481878e-05, "loss": 0.3566, "step": 5070 }, { "epoch": 0.10996034927307001, "grad_norm": 0.808931827545166, "learning_rate": 1.9409229570286436e-05, "loss": 0.2956, "step": 5075 }, { "epoch": 0.11006868459255087, "grad_norm": 1.0684130191802979, "learning_rate": 1.94080765451727e-05, "loss": 0.3547, "step": 5080 }, { "epoch": 0.11017701991203172, "grad_norm": 0.8862087726593018, "learning_rate": 1.940692243027423e-05, "loss": 0.3544, "step": 5085 }, { "epoch": 0.11028535523151257, "grad_norm": 1.1188112497329712, "learning_rate": 1.940576722572471e-05, "loss": 0.3717, "step": 5090 }, { "epoch": 0.11039369055099343, "grad_norm": 0.9585795402526855, "learning_rate": 1.9404610931657957e-05, "loss": 0.3193, "step": 5095 }, { "epoch": 0.1105020258704743, "grad_norm": 1.0136128664016724, "learning_rate": 1.9403453548207905e-05, "loss": 0.382, "step": 5100 }, { "epoch": 0.11061036118995515, "grad_norm": 1.055727481842041, "learning_rate": 1.940229507550863e-05, "loss": 0.3741, "step": 5105 }, { "epoch": 0.11071869650943601, "grad_norm": 1.2464016675949097, "learning_rate": 1.940113551369431e-05, "loss": 0.4736, "step": 5110 }, { "epoch": 0.11082703182891686, "grad_norm": 1.0075304508209229, "learning_rate": 1.9399974862899273e-05, "loss": 0.3574, "step": 5115 }, { "epoch": 0.11093536714839772, "grad_norm": 1.607925534248352, "learning_rate": 1.9398813123257958e-05, "loss": 0.4236, "step": 5120 }, { "epoch": 0.11104370246787858, "grad_norm": 1.315664529800415, "learning_rate": 1.9397650294904938e-05, "loss": 0.3588, "step": 5125 }, { "epoch": 0.11115203778735944, "grad_norm": 1.298354148864746, "learning_rate": 1.9396486377974912e-05, "loss": 0.3652, "step": 5130 }, { "epoch": 0.1112603731068403, "grad_norm": 1.187354564666748, "learning_rate": 1.9395321372602693e-05, "loss": 0.482, "step": 5135 }, { "epoch": 0.11136870842632116, "grad_norm": 1.3305225372314453, "learning_rate": 1.939415527892324e-05, "loss": 0.3851, "step": 5140 }, { "epoch": 0.111477043745802, "grad_norm": 1.30003821849823, "learning_rate": 1.9392988097071615e-05, "loss": 0.4043, "step": 5145 }, { "epoch": 0.11158537906528286, "grad_norm": 1.55089271068573, "learning_rate": 1.939181982718303e-05, "loss": 0.4015, "step": 5150 }, { "epoch": 0.11169371438476372, "grad_norm": 1.3365881443023682, "learning_rate": 1.939065046939281e-05, "loss": 0.3446, "step": 5155 }, { "epoch": 0.11180204970424458, "grad_norm": 1.5729119777679443, "learning_rate": 1.93894800238364e-05, "loss": 0.3972, "step": 5160 }, { "epoch": 0.11191038502372544, "grad_norm": 1.406652808189392, "learning_rate": 1.938830849064939e-05, "loss": 0.3402, "step": 5165 }, { "epoch": 0.1120187203432063, "grad_norm": 1.7526947259902954, "learning_rate": 1.938713586996747e-05, "loss": 0.3588, "step": 5170 }, { "epoch": 0.11212705566268714, "grad_norm": 1.055021047592163, "learning_rate": 1.9385962161926485e-05, "loss": 0.3204, "step": 5175 }, { "epoch": 0.112235390982168, "grad_norm": 1.391162395477295, "learning_rate": 1.9384787366662384e-05, "loss": 0.484, "step": 5180 }, { "epoch": 0.11234372630164886, "grad_norm": 1.4114015102386475, "learning_rate": 1.938361148431125e-05, "loss": 0.3856, "step": 5185 }, { "epoch": 0.11245206162112972, "grad_norm": 1.4799163341522217, "learning_rate": 1.9382434515009292e-05, "loss": 0.4505, "step": 5190 }, { "epoch": 0.11256039694061058, "grad_norm": 1.1679420471191406, "learning_rate": 1.9381256458892842e-05, "loss": 0.3664, "step": 5195 }, { "epoch": 0.11266873226009143, "grad_norm": 1.0893003940582275, "learning_rate": 1.9380077316098366e-05, "loss": 0.3172, "step": 5200 }, { "epoch": 0.11277706757957229, "grad_norm": 1.2095463275909424, "learning_rate": 1.9378897086762446e-05, "loss": 0.3383, "step": 5205 }, { "epoch": 0.11288540289905315, "grad_norm": 1.2293697595596313, "learning_rate": 1.9377715771021793e-05, "loss": 0.3793, "step": 5210 }, { "epoch": 0.112993738218534, "grad_norm": 1.4088623523712158, "learning_rate": 1.9376533369013248e-05, "loss": 0.4711, "step": 5215 }, { "epoch": 0.11310207353801487, "grad_norm": 1.5057454109191895, "learning_rate": 1.9375349880873768e-05, "loss": 0.3582, "step": 5220 }, { "epoch": 0.11321040885749573, "grad_norm": 1.3951990604400635, "learning_rate": 1.9374165306740452e-05, "loss": 0.4067, "step": 5225 }, { "epoch": 0.11331874417697657, "grad_norm": 1.3125436305999756, "learning_rate": 1.937297964675051e-05, "loss": 0.3439, "step": 5230 }, { "epoch": 0.11342707949645743, "grad_norm": 1.401720643043518, "learning_rate": 1.937179290104128e-05, "loss": 0.3224, "step": 5235 }, { "epoch": 0.11353541481593829, "grad_norm": 1.715541124343872, "learning_rate": 1.9370605069750233e-05, "loss": 0.3848, "step": 5240 }, { "epoch": 0.11364375013541915, "grad_norm": 3.842730760574341, "learning_rate": 1.9369416153014962e-05, "loss": 0.4159, "step": 5245 }, { "epoch": 0.11375208545490001, "grad_norm": 1.1265101432800293, "learning_rate": 1.9368226150973184e-05, "loss": 0.4082, "step": 5250 }, { "epoch": 0.11386042077438087, "grad_norm": 1.4300426244735718, "learning_rate": 1.936703506376274e-05, "loss": 0.4294, "step": 5255 }, { "epoch": 0.11396875609386171, "grad_norm": 1.4527214765548706, "learning_rate": 1.9365842891521603e-05, "loss": 0.3565, "step": 5260 }, { "epoch": 0.11407709141334257, "grad_norm": 1.5670454502105713, "learning_rate": 1.9364649634387865e-05, "loss": 0.4067, "step": 5265 }, { "epoch": 0.11418542673282343, "grad_norm": 1.133362054824829, "learning_rate": 1.9363455292499754e-05, "loss": 0.3427, "step": 5270 }, { "epoch": 0.11429376205230429, "grad_norm": 1.2197705507278442, "learning_rate": 1.936225986599561e-05, "loss": 0.3667, "step": 5275 }, { "epoch": 0.11440209737178515, "grad_norm": 1.2619127035140991, "learning_rate": 1.9361063355013906e-05, "loss": 0.4969, "step": 5280 }, { "epoch": 0.11451043269126601, "grad_norm": 0.8158186674118042, "learning_rate": 1.935986575969324e-05, "loss": 0.3328, "step": 5285 }, { "epoch": 0.11461876801074686, "grad_norm": 4.941709995269775, "learning_rate": 1.935866708017234e-05, "loss": 0.358, "step": 5290 }, { "epoch": 0.11472710333022772, "grad_norm": 1.4470871686935425, "learning_rate": 1.9357467316590052e-05, "loss": 0.4365, "step": 5295 }, { "epoch": 0.11483543864970858, "grad_norm": 0.9829011559486389, "learning_rate": 1.9356266469085345e-05, "loss": 0.3508, "step": 5300 }, { "epoch": 0.11494377396918944, "grad_norm": 1.1342339515686035, "learning_rate": 1.9355064537797327e-05, "loss": 0.4518, "step": 5305 }, { "epoch": 0.1150521092886703, "grad_norm": 1.3453587293624878, "learning_rate": 1.9353861522865223e-05, "loss": 0.3035, "step": 5310 }, { "epoch": 0.11516044460815116, "grad_norm": 1.4267960786819458, "learning_rate": 1.9352657424428383e-05, "loss": 0.4444, "step": 5315 }, { "epoch": 0.115268779927632, "grad_norm": 1.286023497581482, "learning_rate": 1.935145224262628e-05, "loss": 0.4094, "step": 5320 }, { "epoch": 0.11537711524711286, "grad_norm": 1.2107311487197876, "learning_rate": 1.9350245977598522e-05, "loss": 0.303, "step": 5325 }, { "epoch": 0.11548545056659372, "grad_norm": 1.2550597190856934, "learning_rate": 1.934903862948483e-05, "loss": 0.4423, "step": 5330 }, { "epoch": 0.11559378588607458, "grad_norm": 1.0691847801208496, "learning_rate": 1.9347830198425064e-05, "loss": 0.3728, "step": 5335 }, { "epoch": 0.11570212120555544, "grad_norm": 1.0224707126617432, "learning_rate": 1.93466206845592e-05, "loss": 0.2717, "step": 5340 }, { "epoch": 0.1158104565250363, "grad_norm": 1.293371558189392, "learning_rate": 1.9345410088027344e-05, "loss": 0.3632, "step": 5345 }, { "epoch": 0.11591879184451714, "grad_norm": 1.098051905632019, "learning_rate": 1.934419840896972e-05, "loss": 0.3879, "step": 5350 }, { "epoch": 0.116027127163998, "grad_norm": 1.6038882732391357, "learning_rate": 1.934298564752669e-05, "loss": 0.3646, "step": 5355 }, { "epoch": 0.11613546248347886, "grad_norm": 1.607257604598999, "learning_rate": 1.9341771803838728e-05, "loss": 0.4423, "step": 5360 }, { "epoch": 0.11624379780295972, "grad_norm": 1.2064218521118164, "learning_rate": 1.9340556878046444e-05, "loss": 0.4031, "step": 5365 }, { "epoch": 0.11635213312244058, "grad_norm": 1.450278878211975, "learning_rate": 1.9339340870290567e-05, "loss": 0.355, "step": 5370 }, { "epoch": 0.11646046844192144, "grad_norm": 1.1941559314727783, "learning_rate": 1.933812378071195e-05, "loss": 0.4869, "step": 5375 }, { "epoch": 0.11656880376140229, "grad_norm": 1.6452513933181763, "learning_rate": 1.9336905609451584e-05, "loss": 0.4028, "step": 5380 }, { "epoch": 0.11667713908088315, "grad_norm": 2.124267816543579, "learning_rate": 1.9335686356650565e-05, "loss": 0.3916, "step": 5385 }, { "epoch": 0.116785474400364, "grad_norm": 1.524208903312683, "learning_rate": 1.933446602245013e-05, "loss": 0.3445, "step": 5390 }, { "epoch": 0.11689380971984487, "grad_norm": 1.075977087020874, "learning_rate": 1.933324460699164e-05, "loss": 0.385, "step": 5395 }, { "epoch": 0.11700214503932573, "grad_norm": 1.1795214414596558, "learning_rate": 1.9332022110416573e-05, "loss": 0.3799, "step": 5400 }, { "epoch": 0.11711048035880658, "grad_norm": 1.136246681213379, "learning_rate": 1.933079853286654e-05, "loss": 0.3896, "step": 5405 }, { "epoch": 0.11721881567828743, "grad_norm": 1.4494661092758179, "learning_rate": 1.932957387448327e-05, "loss": 0.4029, "step": 5410 }, { "epoch": 0.11732715099776829, "grad_norm": 1.4341870546340942, "learning_rate": 1.9328348135408626e-05, "loss": 0.3545, "step": 5415 }, { "epoch": 0.11743548631724915, "grad_norm": 1.304880976676941, "learning_rate": 1.9327121315784587e-05, "loss": 0.3557, "step": 5420 }, { "epoch": 0.11754382163673001, "grad_norm": 1.0836076736450195, "learning_rate": 1.932589341575326e-05, "loss": 0.3154, "step": 5425 }, { "epoch": 0.11765215695621087, "grad_norm": 1.2717632055282593, "learning_rate": 1.932466443545689e-05, "loss": 0.3258, "step": 5430 }, { "epoch": 0.11776049227569173, "grad_norm": 1.050511121749878, "learning_rate": 1.9323434375037825e-05, "loss": 0.3642, "step": 5435 }, { "epoch": 0.11786882759517257, "grad_norm": 1.1138664484024048, "learning_rate": 1.9322203234638553e-05, "loss": 0.3607, "step": 5440 }, { "epoch": 0.11797716291465343, "grad_norm": 1.505310297012329, "learning_rate": 1.9320971014401682e-05, "loss": 0.4027, "step": 5445 }, { "epoch": 0.11808549823413429, "grad_norm": 1.4175904989242554, "learning_rate": 1.931973771446995e-05, "loss": 0.4838, "step": 5450 }, { "epoch": 0.11819383355361515, "grad_norm": 1.0164544582366943, "learning_rate": 1.931850333498621e-05, "loss": 0.3976, "step": 5455 }, { "epoch": 0.11830216887309601, "grad_norm": 0.8425665497779846, "learning_rate": 1.931726787609345e-05, "loss": 0.4141, "step": 5460 }, { "epoch": 0.11841050419257686, "grad_norm": 1.536169409751892, "learning_rate": 1.9316031337934783e-05, "loss": 0.3331, "step": 5465 }, { "epoch": 0.11851883951205772, "grad_norm": 1.536963701248169, "learning_rate": 1.9314793720653434e-05, "loss": 0.4234, "step": 5470 }, { "epoch": 0.11862717483153858, "grad_norm": 1.719548225402832, "learning_rate": 1.9313555024392775e-05, "loss": 0.3149, "step": 5475 }, { "epoch": 0.11873551015101944, "grad_norm": 1.384429693222046, "learning_rate": 1.931231524929628e-05, "loss": 0.3584, "step": 5480 }, { "epoch": 0.1188438454705003, "grad_norm": 1.3464243412017822, "learning_rate": 1.9311074395507558e-05, "loss": 0.3846, "step": 5485 }, { "epoch": 0.11895218078998115, "grad_norm": 1.1345666646957397, "learning_rate": 1.9309832463170352e-05, "loss": 0.2782, "step": 5490 }, { "epoch": 0.119060516109462, "grad_norm": 1.1891241073608398, "learning_rate": 1.9308589452428513e-05, "loss": 0.5643, "step": 5495 }, { "epoch": 0.11916885142894286, "grad_norm": 1.220039963722229, "learning_rate": 1.9307345363426032e-05, "loss": 0.2854, "step": 5500 }, { "epoch": 0.11927718674842372, "grad_norm": 1.1934576034545898, "learning_rate": 1.9306100196307012e-05, "loss": 0.41, "step": 5505 }, { "epoch": 0.11938552206790458, "grad_norm": 1.3958042860031128, "learning_rate": 1.9304853951215693e-05, "loss": 0.2963, "step": 5510 }, { "epoch": 0.11949385738738544, "grad_norm": 1.239056944847107, "learning_rate": 1.930360662829643e-05, "loss": 0.4465, "step": 5515 }, { "epoch": 0.1196021927068663, "grad_norm": 0.8546872138977051, "learning_rate": 1.9302358227693703e-05, "loss": 0.3257, "step": 5520 }, { "epoch": 0.11971052802634714, "grad_norm": 1.1661312580108643, "learning_rate": 1.930110874955213e-05, "loss": 0.504, "step": 5525 }, { "epoch": 0.119818863345828, "grad_norm": 0.7984006404876709, "learning_rate": 1.9299858194016434e-05, "loss": 0.3532, "step": 5530 }, { "epoch": 0.11992719866530886, "grad_norm": 1.1712627410888672, "learning_rate": 1.929860656123148e-05, "loss": 0.29, "step": 5535 }, { "epoch": 0.12003553398478972, "grad_norm": 1.2870489358901978, "learning_rate": 1.9297353851342252e-05, "loss": 0.4095, "step": 5540 }, { "epoch": 0.12014386930427058, "grad_norm": 1.0019659996032715, "learning_rate": 1.929610006449385e-05, "loss": 0.3152, "step": 5545 }, { "epoch": 0.12025220462375144, "grad_norm": 1.8674389123916626, "learning_rate": 1.9294845200831512e-05, "loss": 0.3778, "step": 5550 }, { "epoch": 0.12036053994323229, "grad_norm": 1.3012778759002686, "learning_rate": 1.9293589260500596e-05, "loss": 0.3719, "step": 5555 }, { "epoch": 0.12046887526271315, "grad_norm": 1.5107263326644897, "learning_rate": 1.929233224364658e-05, "loss": 0.2777, "step": 5560 }, { "epoch": 0.120577210582194, "grad_norm": 1.2503010034561157, "learning_rate": 1.9291074150415075e-05, "loss": 0.3556, "step": 5565 }, { "epoch": 0.12068554590167486, "grad_norm": 1.648234248161316, "learning_rate": 1.9289814980951812e-05, "loss": 0.4474, "step": 5570 }, { "epoch": 0.12079388122115572, "grad_norm": 1.1863151788711548, "learning_rate": 1.928855473540264e-05, "loss": 0.3646, "step": 5575 }, { "epoch": 0.12090221654063658, "grad_norm": 1.2176815271377563, "learning_rate": 1.9287293413913548e-05, "loss": 0.3998, "step": 5580 }, { "epoch": 0.12101055186011743, "grad_norm": 1.3683847188949585, "learning_rate": 1.9286031016630634e-05, "loss": 0.3733, "step": 5585 }, { "epoch": 0.12111888717959829, "grad_norm": 1.4106706380844116, "learning_rate": 1.9284767543700137e-05, "loss": 0.3523, "step": 5590 }, { "epoch": 0.12122722249907915, "grad_norm": 1.7282942533493042, "learning_rate": 1.92835029952684e-05, "loss": 0.3681, "step": 5595 }, { "epoch": 0.12133555781856001, "grad_norm": 2.0030460357666016, "learning_rate": 1.9282237371481913e-05, "loss": 0.321, "step": 5600 }, { "epoch": 0.12144389313804087, "grad_norm": 1.1269586086273193, "learning_rate": 1.928097067248727e-05, "loss": 0.3566, "step": 5605 }, { "epoch": 0.12155222845752173, "grad_norm": 1.5298105478286743, "learning_rate": 1.9279702898431205e-05, "loss": 0.4255, "step": 5610 }, { "epoch": 0.12166056377700257, "grad_norm": 1.328581690788269, "learning_rate": 1.927843404946057e-05, "loss": 0.5004, "step": 5615 }, { "epoch": 0.12176889909648343, "grad_norm": 1.0626214742660522, "learning_rate": 1.927716412572234e-05, "loss": 0.2684, "step": 5620 }, { "epoch": 0.12187723441596429, "grad_norm": 1.6640797853469849, "learning_rate": 1.9275893127363615e-05, "loss": 0.2516, "step": 5625 }, { "epoch": 0.12198556973544515, "grad_norm": 1.2397880554199219, "learning_rate": 1.927462105453163e-05, "loss": 0.4051, "step": 5630 }, { "epoch": 0.12209390505492601, "grad_norm": 1.0439112186431885, "learning_rate": 1.9273347907373727e-05, "loss": 0.3077, "step": 5635 }, { "epoch": 0.12220224037440687, "grad_norm": 1.2118492126464844, "learning_rate": 1.9272073686037386e-05, "loss": 0.4159, "step": 5640 }, { "epoch": 0.12231057569388772, "grad_norm": 1.223753809928894, "learning_rate": 1.92707983906702e-05, "loss": 0.2977, "step": 5645 }, { "epoch": 0.12241891101336858, "grad_norm": 1.8034964799880981, "learning_rate": 1.92695220214199e-05, "loss": 0.3566, "step": 5650 }, { "epoch": 0.12252724633284943, "grad_norm": 1.3395522832870483, "learning_rate": 1.926824457843433e-05, "loss": 0.455, "step": 5655 }, { "epoch": 0.1226355816523303, "grad_norm": 2.017810344696045, "learning_rate": 1.9266966061861466e-05, "loss": 0.4124, "step": 5660 }, { "epoch": 0.12274391697181115, "grad_norm": 1.497436285018921, "learning_rate": 1.92656864718494e-05, "loss": 0.3917, "step": 5665 }, { "epoch": 0.12285225229129201, "grad_norm": 1.174426555633545, "learning_rate": 1.9264405808546357e-05, "loss": 0.3169, "step": 5670 }, { "epoch": 0.12296058761077286, "grad_norm": 1.4659128189086914, "learning_rate": 1.9263124072100682e-05, "loss": 0.3668, "step": 5675 }, { "epoch": 0.12306892293025372, "grad_norm": 0.988694965839386, "learning_rate": 1.9261841262660846e-05, "loss": 0.2722, "step": 5680 }, { "epoch": 0.12317725824973458, "grad_norm": 1.1211625337600708, "learning_rate": 1.926055738037544e-05, "loss": 0.3711, "step": 5685 }, { "epoch": 0.12328559356921544, "grad_norm": 1.1120331287384033, "learning_rate": 1.9259272425393187e-05, "loss": 0.3817, "step": 5690 }, { "epoch": 0.1233939288886963, "grad_norm": 1.1881513595581055, "learning_rate": 1.9257986397862925e-05, "loss": 0.4072, "step": 5695 }, { "epoch": 0.12350226420817716, "grad_norm": 1.1956967115402222, "learning_rate": 1.9256699297933626e-05, "loss": 0.3396, "step": 5700 }, { "epoch": 0.123610599527658, "grad_norm": 1.4639779329299927, "learning_rate": 1.9255411125754376e-05, "loss": 0.4163, "step": 5705 }, { "epoch": 0.12371893484713886, "grad_norm": 1.3030807971954346, "learning_rate": 1.9254121881474394e-05, "loss": 0.4007, "step": 5710 }, { "epoch": 0.12382727016661972, "grad_norm": 1.1423823833465576, "learning_rate": 1.925283156524302e-05, "loss": 0.4707, "step": 5715 }, { "epoch": 0.12393560548610058, "grad_norm": 1.4233025312423706, "learning_rate": 1.9251540177209714e-05, "loss": 0.3557, "step": 5720 }, { "epoch": 0.12404394080558144, "grad_norm": 1.2238774299621582, "learning_rate": 1.9250247717524072e-05, "loss": 0.4078, "step": 5725 }, { "epoch": 0.1241522761250623, "grad_norm": 1.2464277744293213, "learning_rate": 1.9248954186335796e-05, "loss": 0.3943, "step": 5730 }, { "epoch": 0.12426061144454315, "grad_norm": 1.8427073955535889, "learning_rate": 1.9247659583794728e-05, "loss": 0.3281, "step": 5735 }, { "epoch": 0.124368946764024, "grad_norm": 1.5681782960891724, "learning_rate": 1.9246363910050828e-05, "loss": 0.3341, "step": 5740 }, { "epoch": 0.12447728208350486, "grad_norm": 0.9808512926101685, "learning_rate": 1.9245067165254176e-05, "loss": 0.4769, "step": 5745 }, { "epoch": 0.12458561740298572, "grad_norm": 1.3608478307724, "learning_rate": 1.9243769349554985e-05, "loss": 0.4425, "step": 5750 }, { "epoch": 0.12469395272246658, "grad_norm": 1.2635064125061035, "learning_rate": 1.924247046310359e-05, "loss": 0.3619, "step": 5755 }, { "epoch": 0.12480228804194743, "grad_norm": 2.156212568283081, "learning_rate": 1.9241170506050442e-05, "loss": 0.3081, "step": 5760 }, { "epoch": 0.12491062336142829, "grad_norm": 1.2430481910705566, "learning_rate": 1.9239869478546124e-05, "loss": 0.4809, "step": 5765 }, { "epoch": 0.12501895868090915, "grad_norm": 1.2673237323760986, "learning_rate": 1.923856738074134e-05, "loss": 0.4079, "step": 5770 }, { "epoch": 0.12512729400039, "grad_norm": 1.3768178224563599, "learning_rate": 1.923726421278692e-05, "loss": 0.3697, "step": 5775 }, { "epoch": 0.12523562931987087, "grad_norm": 1.9462485313415527, "learning_rate": 1.923595997483381e-05, "loss": 0.3339, "step": 5780 }, { "epoch": 0.1253439646393517, "grad_norm": 1.2434275150299072, "learning_rate": 1.9234654667033098e-05, "loss": 0.3285, "step": 5785 }, { "epoch": 0.12545229995883259, "grad_norm": 1.408157467842102, "learning_rate": 1.9233348289535972e-05, "loss": 0.4182, "step": 5790 }, { "epoch": 0.12556063527831343, "grad_norm": 1.1106675863265991, "learning_rate": 1.9232040842493766e-05, "loss": 0.4156, "step": 5795 }, { "epoch": 0.1256689705977943, "grad_norm": 1.334194302558899, "learning_rate": 1.9230732326057923e-05, "loss": 0.3367, "step": 5800 }, { "epoch": 0.12577730591727515, "grad_norm": 1.2040232419967651, "learning_rate": 1.9229422740380016e-05, "loss": 0.3518, "step": 5805 }, { "epoch": 0.125885641236756, "grad_norm": 1.5097557306289673, "learning_rate": 1.9228112085611747e-05, "loss": 0.3378, "step": 5810 }, { "epoch": 0.12599397655623687, "grad_norm": 1.409122109413147, "learning_rate": 1.9226800361904924e-05, "loss": 0.4157, "step": 5815 }, { "epoch": 0.12610231187571772, "grad_norm": 1.21300208568573, "learning_rate": 1.9225487569411497e-05, "loss": 0.4234, "step": 5820 }, { "epoch": 0.1262106471951986, "grad_norm": 1.197640299797058, "learning_rate": 1.9224173708283534e-05, "loss": 0.3384, "step": 5825 }, { "epoch": 0.12631898251467943, "grad_norm": 1.0141842365264893, "learning_rate": 1.9222858778673225e-05, "loss": 0.3952, "step": 5830 }, { "epoch": 0.12642731783416028, "grad_norm": 1.1584911346435547, "learning_rate": 1.9221542780732884e-05, "loss": 0.3413, "step": 5835 }, { "epoch": 0.12653565315364115, "grad_norm": 1.0613099336624146, "learning_rate": 1.9220225714614953e-05, "loss": 0.3424, "step": 5840 }, { "epoch": 0.126643988473122, "grad_norm": 1.6533114910125732, "learning_rate": 1.921890758047199e-05, "loss": 0.4534, "step": 5845 }, { "epoch": 0.12675232379260287, "grad_norm": 1.3065744638442993, "learning_rate": 1.9217588378456683e-05, "loss": 0.3733, "step": 5850 }, { "epoch": 0.12686065911208372, "grad_norm": 0.967128574848175, "learning_rate": 1.921626810872184e-05, "loss": 0.3412, "step": 5855 }, { "epoch": 0.1269689944315646, "grad_norm": 0.8706000447273254, "learning_rate": 1.92149467714204e-05, "loss": 0.3728, "step": 5860 }, { "epoch": 0.12707732975104544, "grad_norm": 1.4541302919387817, "learning_rate": 1.9213624366705416e-05, "loss": 0.2329, "step": 5865 }, { "epoch": 0.12718566507052628, "grad_norm": 0.9875240921974182, "learning_rate": 1.9212300894730066e-05, "loss": 0.3342, "step": 5870 }, { "epoch": 0.12729400039000716, "grad_norm": 1.5492712259292603, "learning_rate": 1.9210976355647658e-05, "loss": 0.3926, "step": 5875 }, { "epoch": 0.127402335709488, "grad_norm": 1.2519214153289795, "learning_rate": 1.9209650749611622e-05, "loss": 0.3682, "step": 5880 }, { "epoch": 0.12751067102896887, "grad_norm": 1.7266699075698853, "learning_rate": 1.9208324076775506e-05, "loss": 0.4402, "step": 5885 }, { "epoch": 0.12761900634844972, "grad_norm": 1.3146443367004395, "learning_rate": 1.920699633729299e-05, "loss": 0.3985, "step": 5890 }, { "epoch": 0.12772734166793057, "grad_norm": 1.0657134056091309, "learning_rate": 1.9205667531317863e-05, "loss": 0.4137, "step": 5895 }, { "epoch": 0.12783567698741144, "grad_norm": 1.2700743675231934, "learning_rate": 1.9204337659004057e-05, "loss": 0.364, "step": 5900 }, { "epoch": 0.12794401230689229, "grad_norm": 1.6164170503616333, "learning_rate": 1.9203006720505614e-05, "loss": 0.3443, "step": 5905 }, { "epoch": 0.12805234762637316, "grad_norm": 1.629775047302246, "learning_rate": 1.92016747159767e-05, "loss": 0.3858, "step": 5910 }, { "epoch": 0.128160682945854, "grad_norm": 1.540149211883545, "learning_rate": 1.9200341645571618e-05, "loss": 0.4268, "step": 5915 }, { "epoch": 0.12826901826533488, "grad_norm": 1.2520838975906372, "learning_rate": 1.919900750944477e-05, "loss": 0.403, "step": 5920 }, { "epoch": 0.12837735358481572, "grad_norm": 1.1451207399368286, "learning_rate": 1.919767230775071e-05, "loss": 0.4509, "step": 5925 }, { "epoch": 0.12848568890429657, "grad_norm": 1.4012590646743774, "learning_rate": 1.919633604064409e-05, "loss": 0.2641, "step": 5930 }, { "epoch": 0.12859402422377744, "grad_norm": 1.3494069576263428, "learning_rate": 1.91949987082797e-05, "loss": 0.3024, "step": 5935 }, { "epoch": 0.1287023595432583, "grad_norm": 1.301985740661621, "learning_rate": 1.9193660310812454e-05, "loss": 0.4365, "step": 5940 }, { "epoch": 0.12881069486273916, "grad_norm": 1.4698071479797363, "learning_rate": 1.9192320848397382e-05, "loss": 0.4228, "step": 5945 }, { "epoch": 0.12891903018222, "grad_norm": 0.9681501388549805, "learning_rate": 1.9190980321189637e-05, "loss": 0.3531, "step": 5950 }, { "epoch": 0.12902736550170085, "grad_norm": 1.4048932790756226, "learning_rate": 1.9189638729344506e-05, "loss": 0.4785, "step": 5955 }, { "epoch": 0.12913570082118173, "grad_norm": 1.199576735496521, "learning_rate": 1.918829607301739e-05, "loss": 0.363, "step": 5960 }, { "epoch": 0.12924403614066257, "grad_norm": 1.1854875087738037, "learning_rate": 1.918695235236381e-05, "loss": 0.3669, "step": 5965 }, { "epoch": 0.12935237146014344, "grad_norm": 1.56834077835083, "learning_rate": 1.9185607567539424e-05, "loss": 0.3877, "step": 5970 }, { "epoch": 0.1294607067796243, "grad_norm": 1.772512435913086, "learning_rate": 1.91842617187e-05, "loss": 0.3365, "step": 5975 }, { "epoch": 0.12956904209910516, "grad_norm": 1.409999966621399, "learning_rate": 1.918291480600144e-05, "loss": 0.4177, "step": 5980 }, { "epoch": 0.129677377418586, "grad_norm": 1.2277623414993286, "learning_rate": 1.9181566829599755e-05, "loss": 0.3147, "step": 5985 }, { "epoch": 0.12978571273806686, "grad_norm": 1.4327783584594727, "learning_rate": 1.9180217789651093e-05, "loss": 0.398, "step": 5990 }, { "epoch": 0.12989404805754773, "grad_norm": 1.4012084007263184, "learning_rate": 1.917886768631172e-05, "loss": 0.3771, "step": 5995 }, { "epoch": 0.13000238337702857, "grad_norm": 1.4179729223251343, "learning_rate": 1.9177516519738025e-05, "loss": 0.3524, "step": 6000 }, { "epoch": 0.13011071869650945, "grad_norm": 2.0018184185028076, "learning_rate": 1.9176164290086524e-05, "loss": 0.4027, "step": 6005 }, { "epoch": 0.1302190540159903, "grad_norm": 1.3616142272949219, "learning_rate": 1.9174810997513845e-05, "loss": 0.403, "step": 6010 }, { "epoch": 0.13032738933547114, "grad_norm": 0.9599253535270691, "learning_rate": 1.9173456642176753e-05, "loss": 0.2898, "step": 6015 }, { "epoch": 0.130435724654952, "grad_norm": 1.0059770345687866, "learning_rate": 1.9172101224232123e-05, "loss": 0.2927, "step": 6020 }, { "epoch": 0.13054405997443286, "grad_norm": 1.2966126203536987, "learning_rate": 1.9170744743836968e-05, "loss": 0.4067, "step": 6025 }, { "epoch": 0.13065239529391373, "grad_norm": 1.1126651763916016, "learning_rate": 1.916938720114841e-05, "loss": 0.3735, "step": 6030 }, { "epoch": 0.13076073061339458, "grad_norm": 1.0809152126312256, "learning_rate": 1.9168028596323707e-05, "loss": 0.4185, "step": 6035 }, { "epoch": 0.13086906593287542, "grad_norm": 1.327556848526001, "learning_rate": 1.9166668929520224e-05, "loss": 0.357, "step": 6040 }, { "epoch": 0.1309774012523563, "grad_norm": 1.1777399778366089, "learning_rate": 1.9165308200895466e-05, "loss": 0.3597, "step": 6045 }, { "epoch": 0.13108573657183714, "grad_norm": 1.4108673334121704, "learning_rate": 1.9163946410607045e-05, "loss": 0.37, "step": 6050 }, { "epoch": 0.13119407189131801, "grad_norm": 1.559719204902649, "learning_rate": 1.916258355881271e-05, "loss": 0.4306, "step": 6055 }, { "epoch": 0.13130240721079886, "grad_norm": 0.7224079966545105, "learning_rate": 1.9161219645670326e-05, "loss": 0.3284, "step": 6060 }, { "epoch": 0.13141074253027973, "grad_norm": 0.9724043607711792, "learning_rate": 1.9159854671337877e-05, "loss": 0.283, "step": 6065 }, { "epoch": 0.13151907784976058, "grad_norm": 1.455519199371338, "learning_rate": 1.9158488635973486e-05, "loss": 0.4253, "step": 6070 }, { "epoch": 0.13162741316924143, "grad_norm": 0.8600032329559326, "learning_rate": 1.915712153973537e-05, "loss": 0.3752, "step": 6075 }, { "epoch": 0.1317357484887223, "grad_norm": 1.5644954442977905, "learning_rate": 1.915575338278191e-05, "loss": 0.4182, "step": 6080 }, { "epoch": 0.13184408380820314, "grad_norm": 0.9838908314704895, "learning_rate": 1.9154384165271565e-05, "loss": 0.308, "step": 6085 }, { "epoch": 0.13195241912768402, "grad_norm": 1.581178903579712, "learning_rate": 1.915301388736295e-05, "loss": 0.3302, "step": 6090 }, { "epoch": 0.13206075444716486, "grad_norm": 0.8590443134307861, "learning_rate": 1.9151642549214787e-05, "loss": 0.4109, "step": 6095 }, { "epoch": 0.1321690897666457, "grad_norm": 1.2885172367095947, "learning_rate": 1.915027015098593e-05, "loss": 0.2496, "step": 6100 }, { "epoch": 0.13227742508612658, "grad_norm": 1.2360293865203857, "learning_rate": 1.9148896692835344e-05, "loss": 0.4108, "step": 6105 }, { "epoch": 0.13238576040560743, "grad_norm": 1.4966932535171509, "learning_rate": 1.9147522174922125e-05, "loss": 0.399, "step": 6110 }, { "epoch": 0.1324940957250883, "grad_norm": 1.419127106666565, "learning_rate": 1.9146146597405496e-05, "loss": 0.397, "step": 6115 }, { "epoch": 0.13260243104456915, "grad_norm": 1.28976309299469, "learning_rate": 1.9144769960444793e-05, "loss": 0.3068, "step": 6120 }, { "epoch": 0.13271076636405002, "grad_norm": 1.2973687648773193, "learning_rate": 1.9143392264199475e-05, "loss": 0.3829, "step": 6125 }, { "epoch": 0.13281910168353087, "grad_norm": 2.0056138038635254, "learning_rate": 1.914201350882913e-05, "loss": 0.3226, "step": 6130 }, { "epoch": 0.1329274370030117, "grad_norm": 1.3158537149429321, "learning_rate": 1.9140633694493472e-05, "loss": 0.3334, "step": 6135 }, { "epoch": 0.13303577232249258, "grad_norm": 1.2139369249343872, "learning_rate": 1.9139252821352325e-05, "loss": 0.3958, "step": 6140 }, { "epoch": 0.13314410764197343, "grad_norm": 1.2143312692642212, "learning_rate": 1.9137870889565646e-05, "loss": 0.3166, "step": 6145 }, { "epoch": 0.1332524429614543, "grad_norm": 1.516671061515808, "learning_rate": 1.9136487899293508e-05, "loss": 0.36, "step": 6150 }, { "epoch": 0.13336077828093515, "grad_norm": 1.4401518106460571, "learning_rate": 1.913510385069611e-05, "loss": 0.4049, "step": 6155 }, { "epoch": 0.133469113600416, "grad_norm": 1.5383062362670898, "learning_rate": 1.913371874393378e-05, "loss": 0.3122, "step": 6160 }, { "epoch": 0.13357744891989687, "grad_norm": 0.9908223748207092, "learning_rate": 1.9132332579166954e-05, "loss": 0.3364, "step": 6165 }, { "epoch": 0.13368578423937771, "grad_norm": 1.0159112215042114, "learning_rate": 1.9130945356556198e-05, "loss": 0.4624, "step": 6170 }, { "epoch": 0.1337941195588586, "grad_norm": 1.4922479391098022, "learning_rate": 1.9129557076262208e-05, "loss": 0.3861, "step": 6175 }, { "epoch": 0.13390245487833943, "grad_norm": 1.1992563009262085, "learning_rate": 1.912816773844579e-05, "loss": 0.3248, "step": 6180 }, { "epoch": 0.1340107901978203, "grad_norm": 1.0491442680358887, "learning_rate": 1.912677734326788e-05, "loss": 0.3904, "step": 6185 }, { "epoch": 0.13411912551730115, "grad_norm": 1.454178810119629, "learning_rate": 1.9125385890889536e-05, "loss": 0.3849, "step": 6190 }, { "epoch": 0.134227460836782, "grad_norm": 1.007778286933899, "learning_rate": 1.912399338147193e-05, "loss": 0.3734, "step": 6195 }, { "epoch": 0.13433579615626287, "grad_norm": 1.01105797290802, "learning_rate": 1.912259981517637e-05, "loss": 0.3859, "step": 6200 }, { "epoch": 0.13444413147574372, "grad_norm": 1.1307218074798584, "learning_rate": 1.9121205192164277e-05, "loss": 0.3286, "step": 6205 }, { "epoch": 0.1345524667952246, "grad_norm": 1.579106092453003, "learning_rate": 1.9119809512597198e-05, "loss": 0.4152, "step": 6210 }, { "epoch": 0.13466080211470544, "grad_norm": 0.9060111045837402, "learning_rate": 1.9118412776636804e-05, "loss": 0.2426, "step": 6215 }, { "epoch": 0.13476913743418628, "grad_norm": 1.296872854232788, "learning_rate": 1.9117014984444885e-05, "loss": 0.3001, "step": 6220 }, { "epoch": 0.13487747275366715, "grad_norm": 1.2646992206573486, "learning_rate": 1.911561613618335e-05, "loss": 0.3505, "step": 6225 }, { "epoch": 0.134985808073148, "grad_norm": 1.4339057207107544, "learning_rate": 1.9114216232014238e-05, "loss": 0.3614, "step": 6230 }, { "epoch": 0.13509414339262887, "grad_norm": 0.7169139981269836, "learning_rate": 1.9112815272099707e-05, "loss": 0.4279, "step": 6235 }, { "epoch": 0.13520247871210972, "grad_norm": 1.4753057956695557, "learning_rate": 1.9111413256602035e-05, "loss": 0.38, "step": 6240 }, { "epoch": 0.1353108140315906, "grad_norm": 1.9111789464950562, "learning_rate": 1.9110010185683628e-05, "loss": 0.3674, "step": 6245 }, { "epoch": 0.13541914935107144, "grad_norm": 1.642071008682251, "learning_rate": 1.9108606059507007e-05, "loss": 0.3547, "step": 6250 }, { "epoch": 0.13552748467055228, "grad_norm": 1.5040361881256104, "learning_rate": 1.9107200878234824e-05, "loss": 0.3349, "step": 6255 }, { "epoch": 0.13563581999003316, "grad_norm": 1.1295275688171387, "learning_rate": 1.9105794642029845e-05, "loss": 0.3219, "step": 6260 }, { "epoch": 0.135744155309514, "grad_norm": 1.3201407194137573, "learning_rate": 1.9104387351054957e-05, "loss": 0.3533, "step": 6265 }, { "epoch": 0.13585249062899488, "grad_norm": 1.6010271310806274, "learning_rate": 1.9102979005473184e-05, "loss": 0.4356, "step": 6270 }, { "epoch": 0.13596082594847572, "grad_norm": 1.2886455059051514, "learning_rate": 1.9101569605447653e-05, "loss": 0.4525, "step": 6275 }, { "epoch": 0.13606916126795657, "grad_norm": 1.6005258560180664, "learning_rate": 1.9100159151141623e-05, "loss": 0.4846, "step": 6280 }, { "epoch": 0.13617749658743744, "grad_norm": 1.260379672050476, "learning_rate": 1.909874764271848e-05, "loss": 0.2594, "step": 6285 }, { "epoch": 0.1362858319069183, "grad_norm": 1.3123393058776855, "learning_rate": 1.909733508034172e-05, "loss": 0.4697, "step": 6290 }, { "epoch": 0.13639416722639916, "grad_norm": 1.2151360511779785, "learning_rate": 1.9095921464174965e-05, "loss": 0.5363, "step": 6295 }, { "epoch": 0.13650250254588, "grad_norm": 1.1657084226608276, "learning_rate": 1.909450679438197e-05, "loss": 0.3737, "step": 6300 }, { "epoch": 0.13661083786536085, "grad_norm": 1.4594759941101074, "learning_rate": 1.9093091071126597e-05, "loss": 0.4895, "step": 6305 }, { "epoch": 0.13671917318484172, "grad_norm": 1.8545061349868774, "learning_rate": 1.909167429457284e-05, "loss": 0.3068, "step": 6310 }, { "epoch": 0.13682750850432257, "grad_norm": 1.412540316581726, "learning_rate": 1.9090256464884806e-05, "loss": 0.3808, "step": 6315 }, { "epoch": 0.13693584382380344, "grad_norm": 1.635216236114502, "learning_rate": 1.908883758222673e-05, "loss": 0.4945, "step": 6320 }, { "epoch": 0.1370441791432843, "grad_norm": 1.4889757633209229, "learning_rate": 1.9087417646762977e-05, "loss": 0.3897, "step": 6325 }, { "epoch": 0.13715251446276516, "grad_norm": 1.0233570337295532, "learning_rate": 1.9085996658658016e-05, "loss": 0.507, "step": 6330 }, { "epoch": 0.137260849782246, "grad_norm": 1.4310532808303833, "learning_rate": 1.908457461807645e-05, "loss": 0.4852, "step": 6335 }, { "epoch": 0.13736918510172685, "grad_norm": 1.2632027864456177, "learning_rate": 1.9083151525183002e-05, "loss": 0.4069, "step": 6340 }, { "epoch": 0.13747752042120773, "grad_norm": 1.259133219718933, "learning_rate": 1.908172738014252e-05, "loss": 0.3738, "step": 6345 }, { "epoch": 0.13758585574068857, "grad_norm": 1.3992516994476318, "learning_rate": 1.9080302183119958e-05, "loss": 0.4645, "step": 6350 }, { "epoch": 0.13769419106016945, "grad_norm": 2.072643995285034, "learning_rate": 1.9078875934280417e-05, "loss": 0.2923, "step": 6355 }, { "epoch": 0.1378025263796503, "grad_norm": 1.3896158933639526, "learning_rate": 1.90774486337891e-05, "loss": 0.3946, "step": 6360 }, { "epoch": 0.13791086169913114, "grad_norm": 1.0936386585235596, "learning_rate": 1.9076020281811334e-05, "loss": 0.3145, "step": 6365 }, { "epoch": 0.138019197018612, "grad_norm": 1.4837232828140259, "learning_rate": 1.907459087851258e-05, "loss": 0.3817, "step": 6370 }, { "epoch": 0.13812753233809286, "grad_norm": 1.063533067703247, "learning_rate": 1.907316042405841e-05, "loss": 0.3938, "step": 6375 }, { "epoch": 0.13823586765757373, "grad_norm": 1.4462287425994873, "learning_rate": 1.907172891861452e-05, "loss": 0.3829, "step": 6380 }, { "epoch": 0.13834420297705458, "grad_norm": 1.1764260530471802, "learning_rate": 1.907029636234673e-05, "loss": 0.2793, "step": 6385 }, { "epoch": 0.13845253829653545, "grad_norm": 1.3534530401229858, "learning_rate": 1.906886275542098e-05, "loss": 0.3609, "step": 6390 }, { "epoch": 0.1385608736160163, "grad_norm": 1.3652018308639526, "learning_rate": 1.906742809800333e-05, "loss": 0.2762, "step": 6395 }, { "epoch": 0.13866920893549714, "grad_norm": 1.6282497644424438, "learning_rate": 1.906599239025997e-05, "loss": 0.4693, "step": 6400 }, { "epoch": 0.138777544254978, "grad_norm": 1.1349401473999023, "learning_rate": 1.9064555632357194e-05, "loss": 0.3675, "step": 6405 }, { "epoch": 0.13888587957445886, "grad_norm": 1.209755539894104, "learning_rate": 1.9063117824461437e-05, "loss": 0.3406, "step": 6410 }, { "epoch": 0.13899421489393973, "grad_norm": 1.507958173751831, "learning_rate": 1.9061678966739246e-05, "loss": 0.4059, "step": 6415 }, { "epoch": 0.13910255021342058, "grad_norm": 1.3466824293136597, "learning_rate": 1.9060239059357288e-05, "loss": 0.379, "step": 6420 }, { "epoch": 0.13921088553290142, "grad_norm": 1.444564700126648, "learning_rate": 1.905879810248236e-05, "loss": 0.3792, "step": 6425 }, { "epoch": 0.1393192208523823, "grad_norm": 1.469313383102417, "learning_rate": 1.9057356096281372e-05, "loss": 0.351, "step": 6430 }, { "epoch": 0.13942755617186314, "grad_norm": 1.2717626094818115, "learning_rate": 1.9055913040921358e-05, "loss": 0.4021, "step": 6435 }, { "epoch": 0.13953589149134402, "grad_norm": 1.2161157131195068, "learning_rate": 1.9054468936569477e-05, "loss": 0.3667, "step": 6440 }, { "epoch": 0.13964422681082486, "grad_norm": 1.459471583366394, "learning_rate": 1.905302378339301e-05, "loss": 0.355, "step": 6445 }, { "epoch": 0.13975256213030574, "grad_norm": 1.079583764076233, "learning_rate": 1.9051577581559346e-05, "loss": 0.368, "step": 6450 }, { "epoch": 0.13986089744978658, "grad_norm": 1.381922960281372, "learning_rate": 1.9050130331236013e-05, "loss": 0.3639, "step": 6455 }, { "epoch": 0.13996923276926743, "grad_norm": 0.9892253875732422, "learning_rate": 1.9048682032590653e-05, "loss": 0.4662, "step": 6460 }, { "epoch": 0.1400775680887483, "grad_norm": 1.201278567314148, "learning_rate": 1.9047232685791027e-05, "loss": 0.2921, "step": 6465 }, { "epoch": 0.14018590340822915, "grad_norm": 1.4616762399673462, "learning_rate": 1.9045782291005027e-05, "loss": 0.3648, "step": 6470 }, { "epoch": 0.14029423872771002, "grad_norm": 1.0821475982666016, "learning_rate": 1.904433084840065e-05, "loss": 0.3146, "step": 6475 }, { "epoch": 0.14040257404719086, "grad_norm": 1.0692709684371948, "learning_rate": 1.904287835814603e-05, "loss": 0.3052, "step": 6480 }, { "epoch": 0.1405109093666717, "grad_norm": 1.2065294981002808, "learning_rate": 1.9041424820409414e-05, "loss": 0.4054, "step": 6485 }, { "epoch": 0.14061924468615258, "grad_norm": 1.2928162813186646, "learning_rate": 1.9039970235359172e-05, "loss": 0.4671, "step": 6490 }, { "epoch": 0.14072758000563343, "grad_norm": 1.520200490951538, "learning_rate": 1.90385146031638e-05, "loss": 0.3765, "step": 6495 }, { "epoch": 0.1408359153251143, "grad_norm": 1.2669823169708252, "learning_rate": 1.90370579239919e-05, "loss": 0.4084, "step": 6500 }, { "epoch": 0.14094425064459515, "grad_norm": 0.8996846079826355, "learning_rate": 1.903560019801222e-05, "loss": 0.2916, "step": 6505 }, { "epoch": 0.14105258596407602, "grad_norm": 1.6586581468582153, "learning_rate": 1.9034141425393614e-05, "loss": 0.3419, "step": 6510 }, { "epoch": 0.14116092128355687, "grad_norm": 1.1770375967025757, "learning_rate": 1.903268160630505e-05, "loss": 0.4171, "step": 6515 }, { "epoch": 0.1412692566030377, "grad_norm": 0.965221643447876, "learning_rate": 1.9031220740915636e-05, "loss": 0.2865, "step": 6520 }, { "epoch": 0.1413775919225186, "grad_norm": 1.1588398218154907, "learning_rate": 1.902975882939458e-05, "loss": 0.2979, "step": 6525 }, { "epoch": 0.14148592724199943, "grad_norm": 1.1581944227218628, "learning_rate": 1.902829587191124e-05, "loss": 0.5049, "step": 6530 }, { "epoch": 0.1415942625614803, "grad_norm": 1.1312216520309448, "learning_rate": 1.902683186863506e-05, "loss": 0.4144, "step": 6535 }, { "epoch": 0.14170259788096115, "grad_norm": 1.407315969467163, "learning_rate": 1.902536681973563e-05, "loss": 0.3732, "step": 6540 }, { "epoch": 0.141810933200442, "grad_norm": 1.5824053287506104, "learning_rate": 1.9023900725382654e-05, "loss": 0.4689, "step": 6545 }, { "epoch": 0.14191926851992287, "grad_norm": 1.0754069089889526, "learning_rate": 1.902243358574596e-05, "loss": 0.3765, "step": 6550 }, { "epoch": 0.14202760383940372, "grad_norm": 1.2751274108886719, "learning_rate": 1.9020965400995484e-05, "loss": 0.4186, "step": 6555 }, { "epoch": 0.1421359391588846, "grad_norm": 1.3604060411453247, "learning_rate": 1.901949617130131e-05, "loss": 0.3543, "step": 6560 }, { "epoch": 0.14224427447836543, "grad_norm": 1.4918363094329834, "learning_rate": 1.901802589683361e-05, "loss": 0.4087, "step": 6565 }, { "epoch": 0.14235260979784628, "grad_norm": 1.4860217571258545, "learning_rate": 1.9016554577762702e-05, "loss": 0.3087, "step": 6570 }, { "epoch": 0.14246094511732715, "grad_norm": 1.4476515054702759, "learning_rate": 1.9015082214259015e-05, "loss": 0.472, "step": 6575 }, { "epoch": 0.142569280436808, "grad_norm": 1.1648837327957153, "learning_rate": 1.90136088064931e-05, "loss": 0.4334, "step": 6580 }, { "epoch": 0.14267761575628887, "grad_norm": 1.4068177938461304, "learning_rate": 1.9012134354635626e-05, "loss": 0.3439, "step": 6585 }, { "epoch": 0.14278595107576972, "grad_norm": 1.6207256317138672, "learning_rate": 1.9010658858857387e-05, "loss": 0.3483, "step": 6590 }, { "epoch": 0.1428942863952506, "grad_norm": 0.9276180863380432, "learning_rate": 1.9009182319329302e-05, "loss": 0.3764, "step": 6595 }, { "epoch": 0.14300262171473144, "grad_norm": 1.13140070438385, "learning_rate": 1.90077047362224e-05, "loss": 0.3893, "step": 6600 }, { "epoch": 0.14311095703421228, "grad_norm": 1.172678828239441, "learning_rate": 1.9006226109707845e-05, "loss": 0.3636, "step": 6605 }, { "epoch": 0.14321929235369316, "grad_norm": 0.7065723538398743, "learning_rate": 1.9004746439956906e-05, "loss": 0.4046, "step": 6610 }, { "epoch": 0.143327627673174, "grad_norm": 1.3908060789108276, "learning_rate": 1.9003265727140982e-05, "loss": 0.34, "step": 6615 }, { "epoch": 0.14343596299265488, "grad_norm": 1.6278581619262695, "learning_rate": 1.9001783971431592e-05, "loss": 0.2989, "step": 6620 }, { "epoch": 0.14354429831213572, "grad_norm": 1.264307975769043, "learning_rate": 1.900030117300038e-05, "loss": 0.3699, "step": 6625 }, { "epoch": 0.14365263363161657, "grad_norm": 1.1653257608413696, "learning_rate": 1.89988173320191e-05, "loss": 0.403, "step": 6630 }, { "epoch": 0.14376096895109744, "grad_norm": 1.4841208457946777, "learning_rate": 1.899733244865963e-05, "loss": 0.51, "step": 6635 }, { "epoch": 0.14386930427057829, "grad_norm": 1.1641863584518433, "learning_rate": 1.8995846523093976e-05, "loss": 0.3417, "step": 6640 }, { "epoch": 0.14397763959005916, "grad_norm": 1.113389015197754, "learning_rate": 1.8994359555494263e-05, "loss": 0.4354, "step": 6645 }, { "epoch": 0.14408597490954, "grad_norm": 1.0392506122589111, "learning_rate": 1.899287154603273e-05, "loss": 0.32, "step": 6650 }, { "epoch": 0.14419431022902088, "grad_norm": 2.0961334705352783, "learning_rate": 1.8991382494881744e-05, "loss": 0.4665, "step": 6655 }, { "epoch": 0.14430264554850172, "grad_norm": 1.723097324371338, "learning_rate": 1.8989892402213785e-05, "loss": 0.2624, "step": 6660 }, { "epoch": 0.14441098086798257, "grad_norm": 1.8500701189041138, "learning_rate": 1.898840126820146e-05, "loss": 0.4335, "step": 6665 }, { "epoch": 0.14451931618746344, "grad_norm": 1.3146578073501587, "learning_rate": 1.8986909093017493e-05, "loss": 0.3307, "step": 6670 }, { "epoch": 0.1446276515069443, "grad_norm": 1.5783286094665527, "learning_rate": 1.8985415876834735e-05, "loss": 0.3621, "step": 6675 }, { "epoch": 0.14473598682642516, "grad_norm": 1.4948164224624634, "learning_rate": 1.8983921619826144e-05, "loss": 0.3837, "step": 6680 }, { "epoch": 0.144844322145906, "grad_norm": 0.9223770499229431, "learning_rate": 1.898242632216482e-05, "loss": 0.4411, "step": 6685 }, { "epoch": 0.14495265746538685, "grad_norm": 1.0154120922088623, "learning_rate": 1.8980929984023958e-05, "loss": 0.2862, "step": 6690 }, { "epoch": 0.14506099278486773, "grad_norm": 1.3711621761322021, "learning_rate": 1.897943260557689e-05, "loss": 0.5173, "step": 6695 }, { "epoch": 0.14516932810434857, "grad_norm": 1.3165147304534912, "learning_rate": 1.8977934186997068e-05, "loss": 0.2905, "step": 6700 }, { "epoch": 0.14527766342382945, "grad_norm": 1.5790857076644897, "learning_rate": 1.8976434728458062e-05, "loss": 0.4788, "step": 6705 }, { "epoch": 0.1453859987433103, "grad_norm": 1.2666280269622803, "learning_rate": 1.8974934230133556e-05, "loss": 0.447, "step": 6710 }, { "epoch": 0.14549433406279116, "grad_norm": 0.9714571237564087, "learning_rate": 1.8973432692197365e-05, "loss": 0.4147, "step": 6715 }, { "epoch": 0.145602669382272, "grad_norm": 0.9535754919052124, "learning_rate": 1.8971930114823423e-05, "loss": 0.3867, "step": 6720 }, { "epoch": 0.14571100470175286, "grad_norm": 1.18850839138031, "learning_rate": 1.8970426498185774e-05, "loss": 0.3998, "step": 6725 }, { "epoch": 0.14581934002123373, "grad_norm": 1.1514793634414673, "learning_rate": 1.896892184245859e-05, "loss": 0.3446, "step": 6730 }, { "epoch": 0.14592767534071457, "grad_norm": 2.108440399169922, "learning_rate": 1.8967416147816168e-05, "loss": 0.4071, "step": 6735 }, { "epoch": 0.14603601066019545, "grad_norm": 1.4561045169830322, "learning_rate": 1.8965909414432918e-05, "loss": 0.3804, "step": 6740 }, { "epoch": 0.1461443459796763, "grad_norm": 1.3387609720230103, "learning_rate": 1.8964401642483368e-05, "loss": 0.3452, "step": 6745 }, { "epoch": 0.14625268129915714, "grad_norm": 0.9574897289276123, "learning_rate": 1.8962892832142175e-05, "loss": 0.473, "step": 6750 }, { "epoch": 0.146361016618638, "grad_norm": 1.5595636367797852, "learning_rate": 1.8961382983584113e-05, "loss": 0.378, "step": 6755 }, { "epoch": 0.14646935193811886, "grad_norm": 1.0353013277053833, "learning_rate": 1.8959872096984074e-05, "loss": 0.3472, "step": 6760 }, { "epoch": 0.14657768725759973, "grad_norm": 1.5791305303573608, "learning_rate": 1.895836017251707e-05, "loss": 0.3234, "step": 6765 }, { "epoch": 0.14668602257708058, "grad_norm": 1.9194246530532837, "learning_rate": 1.8956847210358237e-05, "loss": 0.4249, "step": 6770 }, { "epoch": 0.14679435789656145, "grad_norm": 1.167374849319458, "learning_rate": 1.895533321068283e-05, "loss": 0.2987, "step": 6775 }, { "epoch": 0.1469026932160423, "grad_norm": 1.71198308467865, "learning_rate": 1.8953818173666217e-05, "loss": 0.3665, "step": 6780 }, { "epoch": 0.14701102853552314, "grad_norm": 1.3505185842514038, "learning_rate": 1.89523020994839e-05, "loss": 0.3947, "step": 6785 }, { "epoch": 0.14711936385500402, "grad_norm": 1.4410008192062378, "learning_rate": 1.895078498831149e-05, "loss": 0.3924, "step": 6790 }, { "epoch": 0.14722769917448486, "grad_norm": 1.5509445667266846, "learning_rate": 1.8949266840324723e-05, "loss": 0.295, "step": 6795 }, { "epoch": 0.14733603449396573, "grad_norm": 1.5360525846481323, "learning_rate": 1.894774765569945e-05, "loss": 0.3584, "step": 6800 }, { "epoch": 0.14744436981344658, "grad_norm": 1.162327766418457, "learning_rate": 1.8946227434611652e-05, "loss": 0.3383, "step": 6805 }, { "epoch": 0.14755270513292743, "grad_norm": 2.3386011123657227, "learning_rate": 1.8944706177237417e-05, "loss": 0.4132, "step": 6810 }, { "epoch": 0.1476610404524083, "grad_norm": 2.139458179473877, "learning_rate": 1.8943183883752963e-05, "loss": 0.3995, "step": 6815 }, { "epoch": 0.14776937577188914, "grad_norm": 1.5453472137451172, "learning_rate": 1.8941660554334626e-05, "loss": 0.4294, "step": 6820 }, { "epoch": 0.14787771109137002, "grad_norm": 1.8241873979568481, "learning_rate": 1.894013618915886e-05, "loss": 0.3068, "step": 6825 }, { "epoch": 0.14798604641085086, "grad_norm": 1.5460623502731323, "learning_rate": 1.893861078840224e-05, "loss": 0.5162, "step": 6830 }, { "epoch": 0.1480943817303317, "grad_norm": 1.161704421043396, "learning_rate": 1.8937084352241458e-05, "loss": 0.341, "step": 6835 }, { "epoch": 0.14820271704981258, "grad_norm": 1.5938503742218018, "learning_rate": 1.8935556880853335e-05, "loss": 0.3655, "step": 6840 }, { "epoch": 0.14831105236929343, "grad_norm": 1.5848875045776367, "learning_rate": 1.89340283744148e-05, "loss": 0.3628, "step": 6845 }, { "epoch": 0.1484193876887743, "grad_norm": 1.2806782722473145, "learning_rate": 1.8932498833102908e-05, "loss": 0.3691, "step": 6850 }, { "epoch": 0.14852772300825515, "grad_norm": 1.2453125715255737, "learning_rate": 1.8930968257094836e-05, "loss": 0.3489, "step": 6855 }, { "epoch": 0.14863605832773602, "grad_norm": 1.3058862686157227, "learning_rate": 1.892943664656788e-05, "loss": 0.4546, "step": 6860 }, { "epoch": 0.14874439364721687, "grad_norm": 1.112744927406311, "learning_rate": 1.892790400169945e-05, "loss": 0.462, "step": 6865 }, { "epoch": 0.1488527289666977, "grad_norm": 1.2103887796401978, "learning_rate": 1.892637032266708e-05, "loss": 0.2957, "step": 6870 }, { "epoch": 0.14896106428617858, "grad_norm": 1.7272133827209473, "learning_rate": 1.8924835609648428e-05, "loss": 0.3715, "step": 6875 }, { "epoch": 0.14906939960565943, "grad_norm": 1.2153379917144775, "learning_rate": 1.892329986282126e-05, "loss": 0.5009, "step": 6880 }, { "epoch": 0.1491777349251403, "grad_norm": 1.8386452198028564, "learning_rate": 1.892176308236348e-05, "loss": 0.3339, "step": 6885 }, { "epoch": 0.14928607024462115, "grad_norm": 1.4399840831756592, "learning_rate": 1.892022526845309e-05, "loss": 0.2964, "step": 6890 }, { "epoch": 0.149394405564102, "grad_norm": 1.1699984073638916, "learning_rate": 1.891868642126823e-05, "loss": 0.3514, "step": 6895 }, { "epoch": 0.14950274088358287, "grad_norm": 1.6113094091415405, "learning_rate": 1.8917146540987148e-05, "loss": 0.4625, "step": 6900 }, { "epoch": 0.14961107620306371, "grad_norm": 1.419161081314087, "learning_rate": 1.891560562778822e-05, "loss": 0.4569, "step": 6905 }, { "epoch": 0.1497194115225446, "grad_norm": 1.4370859861373901, "learning_rate": 1.8914063681849933e-05, "loss": 0.4026, "step": 6910 }, { "epoch": 0.14982774684202543, "grad_norm": 1.536641240119934, "learning_rate": 1.8912520703350905e-05, "loss": 0.3666, "step": 6915 }, { "epoch": 0.1499360821615063, "grad_norm": 1.723490595817566, "learning_rate": 1.891097669246986e-05, "loss": 0.4845, "step": 6920 }, { "epoch": 0.15004441748098715, "grad_norm": 1.3635952472686768, "learning_rate": 1.8909431649385653e-05, "loss": 0.4052, "step": 6925 }, { "epoch": 0.150152752800468, "grad_norm": 1.1630713939666748, "learning_rate": 1.890788557427725e-05, "loss": 0.3365, "step": 6930 }, { "epoch": 0.15026108811994887, "grad_norm": 0.9789702296257019, "learning_rate": 1.8906338467323747e-05, "loss": 0.3959, "step": 6935 }, { "epoch": 0.15036942343942972, "grad_norm": 1.1961032152175903, "learning_rate": 1.890479032870435e-05, "loss": 0.3188, "step": 6940 }, { "epoch": 0.1504777587589106, "grad_norm": 1.128814697265625, "learning_rate": 1.8903241158598386e-05, "loss": 0.4004, "step": 6945 }, { "epoch": 0.15058609407839144, "grad_norm": 1.643304705619812, "learning_rate": 1.8901690957185304e-05, "loss": 0.3099, "step": 6950 }, { "epoch": 0.15069442939787228, "grad_norm": 1.288944125175476, "learning_rate": 1.890013972464467e-05, "loss": 0.3821, "step": 6955 }, { "epoch": 0.15080276471735315, "grad_norm": 1.1524842977523804, "learning_rate": 1.8898587461156175e-05, "loss": 0.3511, "step": 6960 }, { "epoch": 0.150911100036834, "grad_norm": 1.154252290725708, "learning_rate": 1.8897034166899625e-05, "loss": 0.3551, "step": 6965 }, { "epoch": 0.15101943535631487, "grad_norm": 1.2800425291061401, "learning_rate": 1.889547984205494e-05, "loss": 0.4206, "step": 6970 }, { "epoch": 0.15112777067579572, "grad_norm": 1.4910176992416382, "learning_rate": 1.8893924486802172e-05, "loss": 0.3007, "step": 6975 }, { "epoch": 0.1512361059952766, "grad_norm": 1.5354363918304443, "learning_rate": 1.8892368101321486e-05, "loss": 0.3705, "step": 6980 }, { "epoch": 0.15134444131475744, "grad_norm": 1.261475682258606, "learning_rate": 1.8890810685793158e-05, "loss": 0.3281, "step": 6985 }, { "epoch": 0.15145277663423828, "grad_norm": 1.233335256576538, "learning_rate": 1.88892522403976e-05, "loss": 0.3224, "step": 6990 }, { "epoch": 0.15156111195371916, "grad_norm": 1.1529579162597656, "learning_rate": 1.888769276531533e-05, "loss": 0.3587, "step": 6995 }, { "epoch": 0.1516694472732, "grad_norm": 1.5088893175125122, "learning_rate": 1.888613226072699e-05, "loss": 0.4433, "step": 7000 }, { "epoch": 0.15177778259268088, "grad_norm": 1.330466628074646, "learning_rate": 1.8884570726813344e-05, "loss": 0.3981, "step": 7005 }, { "epoch": 0.15188611791216172, "grad_norm": 1.3771708011627197, "learning_rate": 1.8883008163755264e-05, "loss": 0.4298, "step": 7010 }, { "epoch": 0.15199445323164257, "grad_norm": 1.5933135747909546, "learning_rate": 1.8881444571733764e-05, "loss": 0.3771, "step": 7015 }, { "epoch": 0.15210278855112344, "grad_norm": 1.0013055801391602, "learning_rate": 1.887987995092995e-05, "loss": 0.2896, "step": 7020 }, { "epoch": 0.1522111238706043, "grad_norm": 1.149685025215149, "learning_rate": 1.8878314301525064e-05, "loss": 0.4939, "step": 7025 }, { "epoch": 0.15231945919008516, "grad_norm": 0.8712325096130371, "learning_rate": 1.8876747623700463e-05, "loss": 0.2998, "step": 7030 }, { "epoch": 0.152427794509566, "grad_norm": 1.0204730033874512, "learning_rate": 1.8875179917637627e-05, "loss": 0.3303, "step": 7035 }, { "epoch": 0.15253612982904688, "grad_norm": 1.1129273176193237, "learning_rate": 1.8873611183518143e-05, "loss": 0.3835, "step": 7040 }, { "epoch": 0.15264446514852772, "grad_norm": 0.9466359615325928, "learning_rate": 1.887204142152373e-05, "loss": 0.3806, "step": 7045 }, { "epoch": 0.15275280046800857, "grad_norm": 0.9843347072601318, "learning_rate": 1.8870470631836227e-05, "loss": 0.305, "step": 7050 }, { "epoch": 0.15286113578748944, "grad_norm": 1.5336270332336426, "learning_rate": 1.886889881463758e-05, "loss": 0.3329, "step": 7055 }, { "epoch": 0.1529694711069703, "grad_norm": 1.3999923467636108, "learning_rate": 1.8867325970109857e-05, "loss": 0.4478, "step": 7060 }, { "epoch": 0.15307780642645116, "grad_norm": 1.1937873363494873, "learning_rate": 1.8865752098435254e-05, "loss": 0.4175, "step": 7065 }, { "epoch": 0.153186141745932, "grad_norm": 1.7596664428710938, "learning_rate": 1.8864177199796084e-05, "loss": 0.3803, "step": 7070 }, { "epoch": 0.15329447706541285, "grad_norm": 2.008934259414673, "learning_rate": 1.8862601274374765e-05, "loss": 0.4333, "step": 7075 }, { "epoch": 0.15340281238489373, "grad_norm": 0.7217236161231995, "learning_rate": 1.8861024322353855e-05, "loss": 0.2929, "step": 7080 }, { "epoch": 0.15351114770437457, "grad_norm": 1.7819693088531494, "learning_rate": 1.885944634391601e-05, "loss": 0.3737, "step": 7085 }, { "epoch": 0.15361948302385545, "grad_norm": 1.8334373235702515, "learning_rate": 1.8857867339244025e-05, "loss": 0.279, "step": 7090 }, { "epoch": 0.1537278183433363, "grad_norm": 1.3796172142028809, "learning_rate": 1.8856287308520796e-05, "loss": 0.374, "step": 7095 }, { "epoch": 0.15383615366281717, "grad_norm": 1.1164144277572632, "learning_rate": 1.885470625192935e-05, "loss": 0.2862, "step": 7100 }, { "epoch": 0.153944488982298, "grad_norm": 1.1087634563446045, "learning_rate": 1.8853124169652832e-05, "loss": 0.3434, "step": 7105 }, { "epoch": 0.15405282430177886, "grad_norm": 2.148104667663574, "learning_rate": 1.8851541061874496e-05, "loss": 0.4429, "step": 7110 }, { "epoch": 0.15416115962125973, "grad_norm": 1.104085087776184, "learning_rate": 1.8849956928777726e-05, "loss": 0.4021, "step": 7115 }, { "epoch": 0.15426949494074058, "grad_norm": 1.354296326637268, "learning_rate": 1.8848371770546016e-05, "loss": 0.4745, "step": 7120 }, { "epoch": 0.15437783026022145, "grad_norm": 1.0896731615066528, "learning_rate": 1.8846785587362986e-05, "loss": 0.3381, "step": 7125 }, { "epoch": 0.1544861655797023, "grad_norm": 1.6264183521270752, "learning_rate": 1.884519837941237e-05, "loss": 0.3223, "step": 7130 }, { "epoch": 0.15459450089918314, "grad_norm": 1.3693395853042603, "learning_rate": 1.8843610146878025e-05, "loss": 0.3659, "step": 7135 }, { "epoch": 0.154702836218664, "grad_norm": 1.465677261352539, "learning_rate": 1.8842020889943916e-05, "loss": 0.384, "step": 7140 }, { "epoch": 0.15481117153814486, "grad_norm": 1.5697202682495117, "learning_rate": 1.8840430608794145e-05, "loss": 0.3793, "step": 7145 }, { "epoch": 0.15491950685762573, "grad_norm": 1.5304285287857056, "learning_rate": 1.8838839303612915e-05, "loss": 0.377, "step": 7150 }, { "epoch": 0.15502784217710658, "grad_norm": 1.106516718864441, "learning_rate": 1.883724697458456e-05, "loss": 0.3745, "step": 7155 }, { "epoch": 0.15513617749658742, "grad_norm": 1.1905097961425781, "learning_rate": 1.8835653621893526e-05, "loss": 0.4093, "step": 7160 }, { "epoch": 0.1552445128160683, "grad_norm": 1.554119348526001, "learning_rate": 1.8834059245724378e-05, "loss": 0.4304, "step": 7165 }, { "epoch": 0.15535284813554914, "grad_norm": 0.7984355688095093, "learning_rate": 1.8832463846261797e-05, "loss": 0.4428, "step": 7170 }, { "epoch": 0.15546118345503002, "grad_norm": 1.1544281244277954, "learning_rate": 1.8830867423690592e-05, "loss": 0.4459, "step": 7175 }, { "epoch": 0.15556951877451086, "grad_norm": 1.3050700426101685, "learning_rate": 1.8829269978195682e-05, "loss": 0.3011, "step": 7180 }, { "epoch": 0.15567785409399174, "grad_norm": 1.5948143005371094, "learning_rate": 1.8827671509962105e-05, "loss": 0.3397, "step": 7185 }, { "epoch": 0.15578618941347258, "grad_norm": 1.2791774272918701, "learning_rate": 1.8826072019175026e-05, "loss": 0.2997, "step": 7190 }, { "epoch": 0.15589452473295343, "grad_norm": 0.8689677715301514, "learning_rate": 1.8824471506019715e-05, "loss": 0.3047, "step": 7195 }, { "epoch": 0.1560028600524343, "grad_norm": 1.5835305452346802, "learning_rate": 1.882286997068157e-05, "loss": 0.406, "step": 7200 }, { "epoch": 0.15611119537191515, "grad_norm": 1.0941355228424072, "learning_rate": 1.882126741334611e-05, "loss": 0.3503, "step": 7205 }, { "epoch": 0.15621953069139602, "grad_norm": 1.274441123008728, "learning_rate": 1.881966383419896e-05, "loss": 0.2759, "step": 7210 }, { "epoch": 0.15632786601087686, "grad_norm": 1.934034824371338, "learning_rate": 1.8818059233425875e-05, "loss": 0.4275, "step": 7215 }, { "epoch": 0.1564362013303577, "grad_norm": 1.266743540763855, "learning_rate": 1.881645361121272e-05, "loss": 0.2596, "step": 7220 }, { "epoch": 0.15654453664983858, "grad_norm": 1.5096392631530762, "learning_rate": 1.8814846967745484e-05, "loss": 0.2744, "step": 7225 }, { "epoch": 0.15665287196931943, "grad_norm": 1.5155678987503052, "learning_rate": 1.8813239303210275e-05, "loss": 0.3915, "step": 7230 }, { "epoch": 0.1567612072888003, "grad_norm": 1.3497148752212524, "learning_rate": 1.8811630617793316e-05, "loss": 0.5349, "step": 7235 }, { "epoch": 0.15686954260828115, "grad_norm": 0.8534379601478577, "learning_rate": 1.8810020911680947e-05, "loss": 0.3501, "step": 7240 }, { "epoch": 0.15697787792776202, "grad_norm": 1.11820650100708, "learning_rate": 1.8808410185059628e-05, "loss": 0.2748, "step": 7245 }, { "epoch": 0.15708621324724287, "grad_norm": 1.4191690683364868, "learning_rate": 1.8806798438115942e-05, "loss": 0.4667, "step": 7250 }, { "epoch": 0.1571945485667237, "grad_norm": 1.451587200164795, "learning_rate": 1.880518567103658e-05, "loss": 0.2874, "step": 7255 }, { "epoch": 0.1573028838862046, "grad_norm": 1.098638892173767, "learning_rate": 1.880357188400836e-05, "loss": 0.3364, "step": 7260 }, { "epoch": 0.15741121920568543, "grad_norm": 1.4880638122558594, "learning_rate": 1.8801957077218217e-05, "loss": 0.3036, "step": 7265 }, { "epoch": 0.1575195545251663, "grad_norm": 1.499732255935669, "learning_rate": 1.8800341250853197e-05, "loss": 0.3363, "step": 7270 }, { "epoch": 0.15762788984464715, "grad_norm": 0.9648255705833435, "learning_rate": 1.8798724405100475e-05, "loss": 0.3602, "step": 7275 }, { "epoch": 0.157736225164128, "grad_norm": 1.5634251832962036, "learning_rate": 1.8797106540147333e-05, "loss": 0.3152, "step": 7280 }, { "epoch": 0.15784456048360887, "grad_norm": 1.4468616247177124, "learning_rate": 1.8795487656181182e-05, "loss": 0.421, "step": 7285 }, { "epoch": 0.15795289580308972, "grad_norm": 1.0451550483703613, "learning_rate": 1.8793867753389545e-05, "loss": 0.3727, "step": 7290 }, { "epoch": 0.1580612311225706, "grad_norm": 0.8790073990821838, "learning_rate": 1.8792246831960052e-05, "loss": 0.3704, "step": 7295 }, { "epoch": 0.15816956644205143, "grad_norm": 1.9906169176101685, "learning_rate": 1.879062489208048e-05, "loss": 0.4116, "step": 7300 }, { "epoch": 0.1582779017615323, "grad_norm": 1.1209667921066284, "learning_rate": 1.8789001933938692e-05, "loss": 0.3874, "step": 7305 }, { "epoch": 0.15838623708101315, "grad_norm": 1.026635766029358, "learning_rate": 1.878737795772269e-05, "loss": 0.3558, "step": 7310 }, { "epoch": 0.158494572400494, "grad_norm": 1.3536218404769897, "learning_rate": 1.8785752963620586e-05, "loss": 0.3888, "step": 7315 }, { "epoch": 0.15860290771997487, "grad_norm": 1.2148483991622925, "learning_rate": 1.8784126951820613e-05, "loss": 0.3212, "step": 7320 }, { "epoch": 0.15871124303945572, "grad_norm": 1.147773027420044, "learning_rate": 1.878249992251112e-05, "loss": 0.3113, "step": 7325 }, { "epoch": 0.1588195783589366, "grad_norm": 1.4411603212356567, "learning_rate": 1.878087187588057e-05, "loss": 0.3135, "step": 7330 }, { "epoch": 0.15892791367841744, "grad_norm": 1.3698723316192627, "learning_rate": 1.8779242812117554e-05, "loss": 0.4418, "step": 7335 }, { "epoch": 0.15903624899789828, "grad_norm": 0.988097608089447, "learning_rate": 1.877761273141077e-05, "loss": 0.3191, "step": 7340 }, { "epoch": 0.15914458431737916, "grad_norm": 1.2561558485031128, "learning_rate": 1.877598163394904e-05, "loss": 0.3805, "step": 7345 }, { "epoch": 0.15925291963686, "grad_norm": 1.8328279256820679, "learning_rate": 1.8774349519921303e-05, "loss": 0.3529, "step": 7350 }, { "epoch": 0.15936125495634088, "grad_norm": 2.7077651023864746, "learning_rate": 1.877271638951661e-05, "loss": 0.412, "step": 7355 }, { "epoch": 0.15946959027582172, "grad_norm": 1.3715946674346924, "learning_rate": 1.877108224292414e-05, "loss": 0.4189, "step": 7360 }, { "epoch": 0.1595779255953026, "grad_norm": 1.2308934926986694, "learning_rate": 1.8769447080333186e-05, "loss": 0.3203, "step": 7365 }, { "epoch": 0.15968626091478344, "grad_norm": 1.440679907798767, "learning_rate": 1.876781090193315e-05, "loss": 0.3436, "step": 7370 }, { "epoch": 0.15979459623426429, "grad_norm": 1.3013639450073242, "learning_rate": 1.8766173707913565e-05, "loss": 0.409, "step": 7375 }, { "epoch": 0.15990293155374516, "grad_norm": 1.0808496475219727, "learning_rate": 1.8764535498464075e-05, "loss": 0.3991, "step": 7380 }, { "epoch": 0.160011266873226, "grad_norm": 1.2875381708145142, "learning_rate": 1.8762896273774437e-05, "loss": 0.3356, "step": 7385 }, { "epoch": 0.16011960219270688, "grad_norm": 1.311914324760437, "learning_rate": 1.8761256034034535e-05, "loss": 0.3945, "step": 7390 }, { "epoch": 0.16022793751218772, "grad_norm": 1.2526079416275024, "learning_rate": 1.8759614779434362e-05, "loss": 0.4202, "step": 7395 }, { "epoch": 0.16033627283166857, "grad_norm": 1.1412622928619385, "learning_rate": 1.8757972510164043e-05, "loss": 0.368, "step": 7400 }, { "epoch": 0.16044460815114944, "grad_norm": 0.9558348655700684, "learning_rate": 1.87563292264138e-05, "loss": 0.3524, "step": 7405 }, { "epoch": 0.1605529434706303, "grad_norm": 1.7891288995742798, "learning_rate": 1.8754684928373983e-05, "loss": 0.3682, "step": 7410 }, { "epoch": 0.16066127879011116, "grad_norm": 0.8486018180847168, "learning_rate": 1.8753039616235065e-05, "loss": 0.3195, "step": 7415 }, { "epoch": 0.160769614109592, "grad_norm": 1.678166389465332, "learning_rate": 1.8751393290187625e-05, "loss": 0.4058, "step": 7420 }, { "epoch": 0.16087794942907285, "grad_norm": 1.156661033630371, "learning_rate": 1.8749745950422372e-05, "loss": 0.335, "step": 7425 }, { "epoch": 0.16098628474855373, "grad_norm": 1.580012321472168, "learning_rate": 1.874809759713012e-05, "loss": 0.5039, "step": 7430 }, { "epoch": 0.16109462006803457, "grad_norm": 1.0219495296478271, "learning_rate": 1.8746448230501807e-05, "loss": 0.3426, "step": 7435 }, { "epoch": 0.16120295538751545, "grad_norm": 1.5103178024291992, "learning_rate": 1.874479785072849e-05, "loss": 0.4134, "step": 7440 }, { "epoch": 0.1613112907069963, "grad_norm": 1.30012845993042, "learning_rate": 1.874314645800134e-05, "loss": 0.3663, "step": 7445 }, { "epoch": 0.16141962602647716, "grad_norm": 1.0051063299179077, "learning_rate": 1.874149405251164e-05, "loss": 0.3353, "step": 7450 }, { "epoch": 0.161527961345958, "grad_norm": 1.0468636751174927, "learning_rate": 1.8739840634450804e-05, "loss": 0.3494, "step": 7455 }, { "epoch": 0.16163629666543886, "grad_norm": 1.4675332307815552, "learning_rate": 1.8738186204010357e-05, "loss": 0.3531, "step": 7460 }, { "epoch": 0.16174463198491973, "grad_norm": 0.9849429130554199, "learning_rate": 1.8736530761381932e-05, "loss": 0.3641, "step": 7465 }, { "epoch": 0.16185296730440057, "grad_norm": 1.4163671731948853, "learning_rate": 1.8734874306757293e-05, "loss": 0.443, "step": 7470 }, { "epoch": 0.16196130262388145, "grad_norm": 1.60055673122406, "learning_rate": 1.8733216840328318e-05, "loss": 0.458, "step": 7475 }, { "epoch": 0.1620696379433623, "grad_norm": 1.9306484460830688, "learning_rate": 1.873155836228699e-05, "loss": 0.3072, "step": 7480 }, { "epoch": 0.16217797326284314, "grad_norm": 1.683133602142334, "learning_rate": 1.8729898872825425e-05, "loss": 0.3111, "step": 7485 }, { "epoch": 0.162286308582324, "grad_norm": 0.9472081065177917, "learning_rate": 1.8728238372135853e-05, "loss": 0.2863, "step": 7490 }, { "epoch": 0.16239464390180486, "grad_norm": 1.2358629703521729, "learning_rate": 1.8726576860410612e-05, "loss": 0.45, "step": 7495 }, { "epoch": 0.16250297922128573, "grad_norm": 0.9903904795646667, "learning_rate": 1.8724914337842163e-05, "loss": 0.3737, "step": 7500 }, { "epoch": 0.16261131454076658, "grad_norm": 1.1411104202270508, "learning_rate": 1.872325080462309e-05, "loss": 0.2926, "step": 7505 }, { "epoch": 0.16271964986024745, "grad_norm": 1.0585466623306274, "learning_rate": 1.8721586260946086e-05, "loss": 0.2885, "step": 7510 }, { "epoch": 0.1628279851797283, "grad_norm": 1.4082858562469482, "learning_rate": 1.8719920707003966e-05, "loss": 0.3461, "step": 7515 }, { "epoch": 0.16293632049920914, "grad_norm": 1.073534369468689, "learning_rate": 1.8718254142989652e-05, "loss": 0.407, "step": 7520 }, { "epoch": 0.16304465581869002, "grad_norm": 1.5907275676727295, "learning_rate": 1.8716586569096197e-05, "loss": 0.3636, "step": 7525 }, { "epoch": 0.16315299113817086, "grad_norm": 1.5061688423156738, "learning_rate": 1.8714917985516762e-05, "loss": 0.34, "step": 7530 }, { "epoch": 0.16326132645765173, "grad_norm": 1.7644424438476562, "learning_rate": 1.8713248392444625e-05, "loss": 0.3349, "step": 7535 }, { "epoch": 0.16336966177713258, "grad_norm": 1.3736765384674072, "learning_rate": 1.871157779007319e-05, "loss": 0.315, "step": 7540 }, { "epoch": 0.16347799709661343, "grad_norm": 1.5452483892440796, "learning_rate": 1.8709906178595967e-05, "loss": 0.3568, "step": 7545 }, { "epoch": 0.1635863324160943, "grad_norm": 0.958326518535614, "learning_rate": 1.8708233558206586e-05, "loss": 0.3358, "step": 7550 }, { "epoch": 0.16369466773557514, "grad_norm": 1.5772128105163574, "learning_rate": 1.87065599290988e-05, "loss": 0.2447, "step": 7555 }, { "epoch": 0.16380300305505602, "grad_norm": 1.4207594394683838, "learning_rate": 1.8704885291466467e-05, "loss": 0.2899, "step": 7560 }, { "epoch": 0.16391133837453686, "grad_norm": 1.1140908002853394, "learning_rate": 1.870320964550357e-05, "loss": 0.3814, "step": 7565 }, { "epoch": 0.16401967369401774, "grad_norm": 0.8819750547409058, "learning_rate": 1.8701532991404215e-05, "loss": 0.373, "step": 7570 }, { "epoch": 0.16412800901349858, "grad_norm": 1.4975037574768066, "learning_rate": 1.869985532936261e-05, "loss": 0.4245, "step": 7575 }, { "epoch": 0.16423634433297943, "grad_norm": 1.3587255477905273, "learning_rate": 1.8698176659573088e-05, "loss": 0.3887, "step": 7580 }, { "epoch": 0.1643446796524603, "grad_norm": 1.6803759336471558, "learning_rate": 1.86964969822301e-05, "loss": 0.4091, "step": 7585 }, { "epoch": 0.16445301497194115, "grad_norm": 1.1857572793960571, "learning_rate": 1.869481629752821e-05, "loss": 0.3959, "step": 7590 }, { "epoch": 0.16456135029142202, "grad_norm": 1.0836360454559326, "learning_rate": 1.86931346056621e-05, "loss": 0.3235, "step": 7595 }, { "epoch": 0.16466968561090287, "grad_norm": 1.3591299057006836, "learning_rate": 1.8691451906826566e-05, "loss": 0.3154, "step": 7600 }, { "epoch": 0.1647780209303837, "grad_norm": 2.250730514526367, "learning_rate": 1.868976820121653e-05, "loss": 0.3901, "step": 7605 }, { "epoch": 0.16488635624986459, "grad_norm": 0.9719659686088562, "learning_rate": 1.868808348902702e-05, "loss": 0.2867, "step": 7610 }, { "epoch": 0.16499469156934543, "grad_norm": 1.207702875137329, "learning_rate": 1.8686397770453183e-05, "loss": 0.2339, "step": 7615 }, { "epoch": 0.1651030268888263, "grad_norm": 1.4452913999557495, "learning_rate": 1.8684711045690293e-05, "loss": 0.2692, "step": 7620 }, { "epoch": 0.16521136220830715, "grad_norm": 1.4614715576171875, "learning_rate": 1.8683023314933718e-05, "loss": 0.3393, "step": 7625 }, { "epoch": 0.16531969752778802, "grad_norm": 1.0124934911727905, "learning_rate": 1.868133457837897e-05, "loss": 0.3991, "step": 7630 }, { "epoch": 0.16542803284726887, "grad_norm": 1.4794608354568481, "learning_rate": 1.8679644836221653e-05, "loss": 0.3607, "step": 7635 }, { "epoch": 0.16553636816674971, "grad_norm": 1.3791606426239014, "learning_rate": 1.8677954088657503e-05, "loss": 0.3031, "step": 7640 }, { "epoch": 0.1656447034862306, "grad_norm": 1.2910326719284058, "learning_rate": 1.867626233588237e-05, "loss": 0.4304, "step": 7645 }, { "epoch": 0.16575303880571143, "grad_norm": 1.408707618713379, "learning_rate": 1.867456957809221e-05, "loss": 0.3374, "step": 7650 }, { "epoch": 0.1658613741251923, "grad_norm": 1.1833348274230957, "learning_rate": 1.8672875815483115e-05, "loss": 0.3398, "step": 7655 }, { "epoch": 0.16596970944467315, "grad_norm": 1.1209267377853394, "learning_rate": 1.8671181048251276e-05, "loss": 0.3427, "step": 7660 }, { "epoch": 0.166078044764154, "grad_norm": 0.9528221487998962, "learning_rate": 1.8669485276593005e-05, "loss": 0.4217, "step": 7665 }, { "epoch": 0.16618638008363487, "grad_norm": 1.3674087524414062, "learning_rate": 1.8667788500704734e-05, "loss": 0.38, "step": 7670 }, { "epoch": 0.16629471540311572, "grad_norm": 1.4634863138198853, "learning_rate": 1.8666090720783012e-05, "loss": 0.3979, "step": 7675 }, { "epoch": 0.1664030507225966, "grad_norm": 1.27157461643219, "learning_rate": 1.8664391937024495e-05, "loss": 0.3681, "step": 7680 }, { "epoch": 0.16651138604207744, "grad_norm": 1.6232893466949463, "learning_rate": 1.8662692149625963e-05, "loss": 0.3982, "step": 7685 }, { "epoch": 0.16661972136155828, "grad_norm": 1.4065276384353638, "learning_rate": 1.8660991358784313e-05, "loss": 0.3628, "step": 7690 }, { "epoch": 0.16672805668103916, "grad_norm": 1.1051794290542603, "learning_rate": 1.865928956469656e-05, "loss": 0.3501, "step": 7695 }, { "epoch": 0.16683639200052, "grad_norm": 1.4828095436096191, "learning_rate": 1.8657586767559824e-05, "loss": 0.4046, "step": 7700 }, { "epoch": 0.16694472732000087, "grad_norm": 1.1836919784545898, "learning_rate": 1.8655882967571353e-05, "loss": 0.3857, "step": 7705 }, { "epoch": 0.16705306263948172, "grad_norm": 1.4114363193511963, "learning_rate": 1.8654178164928504e-05, "loss": 0.3454, "step": 7710 }, { "epoch": 0.1671613979589626, "grad_norm": 1.4313745498657227, "learning_rate": 1.8652472359828752e-05, "loss": 0.3743, "step": 7715 }, { "epoch": 0.16726973327844344, "grad_norm": 1.5005033016204834, "learning_rate": 1.8650765552469696e-05, "loss": 0.3529, "step": 7720 }, { "epoch": 0.16737806859792428, "grad_norm": 1.1090806722640991, "learning_rate": 1.8649057743049038e-05, "loss": 0.4832, "step": 7725 }, { "epoch": 0.16748640391740516, "grad_norm": 1.7759745121002197, "learning_rate": 1.86473489317646e-05, "loss": 0.3147, "step": 7730 }, { "epoch": 0.167594739236886, "grad_norm": 1.1410143375396729, "learning_rate": 1.8645639118814328e-05, "loss": 0.242, "step": 7735 }, { "epoch": 0.16770307455636688, "grad_norm": 1.5999833345413208, "learning_rate": 1.8643928304396275e-05, "loss": 0.3772, "step": 7740 }, { "epoch": 0.16781140987584772, "grad_norm": 1.377774715423584, "learning_rate": 1.8642216488708615e-05, "loss": 0.2999, "step": 7745 }, { "epoch": 0.16791974519532857, "grad_norm": 0.9994262456893921, "learning_rate": 1.8640503671949635e-05, "loss": 0.4393, "step": 7750 }, { "epoch": 0.16802808051480944, "grad_norm": 1.627995252609253, "learning_rate": 1.8638789854317737e-05, "loss": 0.363, "step": 7755 }, { "epoch": 0.1681364158342903, "grad_norm": 1.3793175220489502, "learning_rate": 1.8637075036011446e-05, "loss": 0.2986, "step": 7760 }, { "epoch": 0.16824475115377116, "grad_norm": 1.340332269668579, "learning_rate": 1.8635359217229398e-05, "loss": 0.3708, "step": 7765 }, { "epoch": 0.168353086473252, "grad_norm": 1.3806629180908203, "learning_rate": 1.8633642398170336e-05, "loss": 0.3469, "step": 7770 }, { "epoch": 0.16846142179273288, "grad_norm": 0.8492829203605652, "learning_rate": 1.8631924579033138e-05, "loss": 0.3776, "step": 7775 }, { "epoch": 0.16856975711221373, "grad_norm": 1.536311388015747, "learning_rate": 1.8630205760016782e-05, "loss": 0.4571, "step": 7780 }, { "epoch": 0.16867809243169457, "grad_norm": 1.1356284618377686, "learning_rate": 1.862848594132037e-05, "loss": 0.3715, "step": 7785 }, { "epoch": 0.16878642775117544, "grad_norm": 1.2777128219604492, "learning_rate": 1.8626765123143117e-05, "loss": 0.4296, "step": 7790 }, { "epoch": 0.1688947630706563, "grad_norm": 1.3376632928848267, "learning_rate": 1.8625043305684355e-05, "loss": 0.4044, "step": 7795 }, { "epoch": 0.16900309839013716, "grad_norm": 1.279464602470398, "learning_rate": 1.8623320489143526e-05, "loss": 0.516, "step": 7800 }, { "epoch": 0.169111433709618, "grad_norm": 1.489068865776062, "learning_rate": 1.8621596673720198e-05, "loss": 0.371, "step": 7805 }, { "epoch": 0.16921976902909885, "grad_norm": 1.1134436130523682, "learning_rate": 1.8619871859614045e-05, "loss": 0.2883, "step": 7810 }, { "epoch": 0.16932810434857973, "grad_norm": 1.8871172666549683, "learning_rate": 1.8618146047024863e-05, "loss": 0.3899, "step": 7815 }, { "epoch": 0.16943643966806057, "grad_norm": 1.272857666015625, "learning_rate": 1.861641923615256e-05, "loss": 0.2566, "step": 7820 }, { "epoch": 0.16954477498754145, "grad_norm": 1.202963948249817, "learning_rate": 1.8614691427197166e-05, "loss": 0.3891, "step": 7825 }, { "epoch": 0.1696531103070223, "grad_norm": 1.2639201879501343, "learning_rate": 1.8612962620358815e-05, "loss": 0.3525, "step": 7830 }, { "epoch": 0.16976144562650317, "grad_norm": 1.288777232170105, "learning_rate": 1.861123281583777e-05, "loss": 0.4487, "step": 7835 }, { "epoch": 0.169869780945984, "grad_norm": 1.0931893587112427, "learning_rate": 1.8609502013834397e-05, "loss": 0.2854, "step": 7840 }, { "epoch": 0.16997811626546486, "grad_norm": 1.1452170610427856, "learning_rate": 1.8607770214549186e-05, "loss": 0.2492, "step": 7845 }, { "epoch": 0.17008645158494573, "grad_norm": 1.598645806312561, "learning_rate": 1.8606037418182743e-05, "loss": 0.4025, "step": 7850 }, { "epoch": 0.17019478690442658, "grad_norm": 1.0399267673492432, "learning_rate": 1.860430362493578e-05, "loss": 0.3461, "step": 7855 }, { "epoch": 0.17030312222390745, "grad_norm": 1.384197473526001, "learning_rate": 1.8602568835009135e-05, "loss": 0.2789, "step": 7860 }, { "epoch": 0.1704114575433883, "grad_norm": 1.7207343578338623, "learning_rate": 1.8600833048603757e-05, "loss": 0.4139, "step": 7865 }, { "epoch": 0.17051979286286914, "grad_norm": 1.7811285257339478, "learning_rate": 1.8599096265920714e-05, "loss": 0.2967, "step": 7870 }, { "epoch": 0.17062812818235001, "grad_norm": 1.1400798559188843, "learning_rate": 1.859735848716118e-05, "loss": 0.3484, "step": 7875 }, { "epoch": 0.17073646350183086, "grad_norm": 1.1130657196044922, "learning_rate": 1.8595619712526454e-05, "loss": 0.3666, "step": 7880 }, { "epoch": 0.17084479882131173, "grad_norm": 1.2701283693313599, "learning_rate": 1.8593879942217944e-05, "loss": 0.3187, "step": 7885 }, { "epoch": 0.17095313414079258, "grad_norm": 1.4888836145401, "learning_rate": 1.8592139176437184e-05, "loss": 0.3575, "step": 7890 }, { "epoch": 0.17106146946027345, "grad_norm": 1.5068696737289429, "learning_rate": 1.8590397415385807e-05, "loss": 0.4332, "step": 7895 }, { "epoch": 0.1711698047797543, "grad_norm": 1.4667866230010986, "learning_rate": 1.8588654659265578e-05, "loss": 0.2507, "step": 7900 }, { "epoch": 0.17127814009923514, "grad_norm": 1.423768162727356, "learning_rate": 1.858691090827836e-05, "loss": 0.3192, "step": 7905 }, { "epoch": 0.17138647541871602, "grad_norm": 1.0879014730453491, "learning_rate": 1.8585166162626148e-05, "loss": 0.304, "step": 7910 }, { "epoch": 0.17149481073819686, "grad_norm": 1.2266992330551147, "learning_rate": 1.858342042251104e-05, "loss": 0.3834, "step": 7915 }, { "epoch": 0.17160314605767774, "grad_norm": 1.1380791664123535, "learning_rate": 1.8581673688135257e-05, "loss": 0.3012, "step": 7920 }, { "epoch": 0.17171148137715858, "grad_norm": 1.9381593465805054, "learning_rate": 1.857992595970113e-05, "loss": 0.3553, "step": 7925 }, { "epoch": 0.17181981669663943, "grad_norm": 1.3263022899627686, "learning_rate": 1.857817723741111e-05, "loss": 0.3579, "step": 7930 }, { "epoch": 0.1719281520161203, "grad_norm": 1.3574373722076416, "learning_rate": 1.8576427521467754e-05, "loss": 0.4448, "step": 7935 }, { "epoch": 0.17203648733560115, "grad_norm": 1.1481763124465942, "learning_rate": 1.8574676812073747e-05, "loss": 0.3954, "step": 7940 }, { "epoch": 0.17214482265508202, "grad_norm": 1.195592999458313, "learning_rate": 1.8572925109431878e-05, "loss": 0.3038, "step": 7945 }, { "epoch": 0.17225315797456286, "grad_norm": 1.57627272605896, "learning_rate": 1.857117241374506e-05, "loss": 0.4603, "step": 7950 }, { "epoch": 0.1723614932940437, "grad_norm": 1.172018051147461, "learning_rate": 1.8569418725216316e-05, "loss": 0.2983, "step": 7955 }, { "epoch": 0.17246982861352458, "grad_norm": 2.3959338665008545, "learning_rate": 1.8567664044048776e-05, "loss": 0.4281, "step": 7960 }, { "epoch": 0.17257816393300543, "grad_norm": 1.1866809129714966, "learning_rate": 1.8565908370445705e-05, "loss": 0.3496, "step": 7965 }, { "epoch": 0.1726864992524863, "grad_norm": 1.22792387008667, "learning_rate": 1.8564151704610466e-05, "loss": 0.2374, "step": 7970 }, { "epoch": 0.17279483457196715, "grad_norm": 1.840989112854004, "learning_rate": 1.8562394046746543e-05, "loss": 0.3874, "step": 7975 }, { "epoch": 0.17290316989144802, "grad_norm": 1.8226985931396484, "learning_rate": 1.8560635397057535e-05, "loss": 0.4826, "step": 7980 }, { "epoch": 0.17301150521092887, "grad_norm": 1.2681947946548462, "learning_rate": 1.855887575574715e-05, "loss": 0.3755, "step": 7985 }, { "epoch": 0.1731198405304097, "grad_norm": 1.5849785804748535, "learning_rate": 1.8557115123019226e-05, "loss": 0.4595, "step": 7990 }, { "epoch": 0.1732281758498906, "grad_norm": 0.8579666018486023, "learning_rate": 1.8555353499077698e-05, "loss": 0.3177, "step": 7995 }, { "epoch": 0.17333651116937143, "grad_norm": 1.142100214958191, "learning_rate": 1.8553590884126628e-05, "loss": 0.3846, "step": 8000 }, { "epoch": 0.1734448464888523, "grad_norm": 1.5218700170516968, "learning_rate": 1.8551827278370183e-05, "loss": 0.2944, "step": 8005 }, { "epoch": 0.17355318180833315, "grad_norm": 1.624360203742981, "learning_rate": 1.8550062682012657e-05, "loss": 0.4372, "step": 8010 }, { "epoch": 0.173661517127814, "grad_norm": 1.3222421407699585, "learning_rate": 1.854829709525845e-05, "loss": 0.4256, "step": 8015 }, { "epoch": 0.17376985244729487, "grad_norm": 1.4984331130981445, "learning_rate": 1.854653051831208e-05, "loss": 0.3147, "step": 8020 }, { "epoch": 0.17387818776677572, "grad_norm": 2.0460474491119385, "learning_rate": 1.854476295137817e-05, "loss": 0.3435, "step": 8025 }, { "epoch": 0.1739865230862566, "grad_norm": 0.9696934223175049, "learning_rate": 1.854299439466148e-05, "loss": 0.2778, "step": 8030 }, { "epoch": 0.17409485840573743, "grad_norm": 1.8017984628677368, "learning_rate": 1.854122484836686e-05, "loss": 0.3649, "step": 8035 }, { "epoch": 0.1742031937252183, "grad_norm": 1.4660602807998657, "learning_rate": 1.8539454312699287e-05, "loss": 0.3619, "step": 8040 }, { "epoch": 0.17431152904469915, "grad_norm": 0.9687219262123108, "learning_rate": 1.853768278786385e-05, "loss": 0.381, "step": 8045 }, { "epoch": 0.17441986436418, "grad_norm": 1.0085266828536987, "learning_rate": 1.8535910274065764e-05, "loss": 0.3074, "step": 8050 }, { "epoch": 0.17452819968366087, "grad_norm": 1.1131256818771362, "learning_rate": 1.853413677151034e-05, "loss": 0.3792, "step": 8055 }, { "epoch": 0.17463653500314172, "grad_norm": 0.9463642835617065, "learning_rate": 1.8532362280403008e-05, "loss": 0.3777, "step": 8060 }, { "epoch": 0.1747448703226226, "grad_norm": 2.1649277210235596, "learning_rate": 1.853058680094932e-05, "loss": 0.353, "step": 8065 }, { "epoch": 0.17485320564210344, "grad_norm": 1.3000980615615845, "learning_rate": 1.8528810333354945e-05, "loss": 0.3749, "step": 8070 }, { "epoch": 0.17496154096158428, "grad_norm": 1.5200779438018799, "learning_rate": 1.852703287782565e-05, "loss": 0.4808, "step": 8075 }, { "epoch": 0.17506987628106516, "grad_norm": 1.4024689197540283, "learning_rate": 1.852525443456733e-05, "loss": 0.3796, "step": 8080 }, { "epoch": 0.175178211600546, "grad_norm": 1.0638480186462402, "learning_rate": 1.8523475003785995e-05, "loss": 0.2526, "step": 8085 }, { "epoch": 0.17528654692002688, "grad_norm": 1.738370418548584, "learning_rate": 1.8521694585687765e-05, "loss": 0.3846, "step": 8090 }, { "epoch": 0.17539488223950772, "grad_norm": 1.770090103149414, "learning_rate": 1.851991318047887e-05, "loss": 0.3796, "step": 8095 }, { "epoch": 0.1755032175589886, "grad_norm": 1.2969459295272827, "learning_rate": 1.8518130788365657e-05, "loss": 0.3453, "step": 8100 }, { "epoch": 0.17561155287846944, "grad_norm": 1.3089048862457275, "learning_rate": 1.85163474095546e-05, "loss": 0.3013, "step": 8105 }, { "epoch": 0.17571988819795029, "grad_norm": 1.0256285667419434, "learning_rate": 1.8514563044252267e-05, "loss": 0.332, "step": 8110 }, { "epoch": 0.17582822351743116, "grad_norm": 1.2876187562942505, "learning_rate": 1.8512777692665354e-05, "loss": 0.4823, "step": 8115 }, { "epoch": 0.175936558836912, "grad_norm": 1.2348500490188599, "learning_rate": 1.8510991355000664e-05, "loss": 0.3916, "step": 8120 }, { "epoch": 0.17604489415639288, "grad_norm": 1.521374225616455, "learning_rate": 1.8509204031465126e-05, "loss": 0.4298, "step": 8125 }, { "epoch": 0.17615322947587372, "grad_norm": 1.3128317594528198, "learning_rate": 1.8507415722265766e-05, "loss": 0.2906, "step": 8130 }, { "epoch": 0.17626156479535457, "grad_norm": 1.477608323097229, "learning_rate": 1.8505626427609736e-05, "loss": 0.2238, "step": 8135 }, { "epoch": 0.17636990011483544, "grad_norm": 1.1306016445159912, "learning_rate": 1.8503836147704297e-05, "loss": 0.2779, "step": 8140 }, { "epoch": 0.1764782354343163, "grad_norm": 1.1896634101867676, "learning_rate": 1.850204488275683e-05, "loss": 0.3249, "step": 8145 }, { "epoch": 0.17658657075379716, "grad_norm": 0.992177426815033, "learning_rate": 1.850025263297482e-05, "loss": 0.4242, "step": 8150 }, { "epoch": 0.176694906073278, "grad_norm": 1.1493889093399048, "learning_rate": 1.849845939856588e-05, "loss": 0.3971, "step": 8155 }, { "epoch": 0.17680324139275888, "grad_norm": 2.2384626865386963, "learning_rate": 1.8496665179737724e-05, "loss": 0.4038, "step": 8160 }, { "epoch": 0.17691157671223973, "grad_norm": 1.070052146911621, "learning_rate": 1.8494869976698187e-05, "loss": 0.2765, "step": 8165 }, { "epoch": 0.17701991203172057, "grad_norm": 1.306032657623291, "learning_rate": 1.8493073789655217e-05, "loss": 0.3026, "step": 8170 }, { "epoch": 0.17712824735120145, "grad_norm": 1.491178274154663, "learning_rate": 1.8491276618816875e-05, "loss": 0.4191, "step": 8175 }, { "epoch": 0.1772365826706823, "grad_norm": 1.575433373451233, "learning_rate": 1.8489478464391336e-05, "loss": 0.3351, "step": 8180 }, { "epoch": 0.17734491799016316, "grad_norm": 1.4319921731948853, "learning_rate": 1.848767932658689e-05, "loss": 0.4047, "step": 8185 }, { "epoch": 0.177453253309644, "grad_norm": 1.516815185546875, "learning_rate": 1.848587920561194e-05, "loss": 0.4319, "step": 8190 }, { "epoch": 0.17756158862912486, "grad_norm": 1.7050111293792725, "learning_rate": 1.8484078101675e-05, "loss": 0.2665, "step": 8195 }, { "epoch": 0.17766992394860573, "grad_norm": 1.5801626443862915, "learning_rate": 1.8482276014984703e-05, "loss": 0.3081, "step": 8200 }, { "epoch": 0.17777825926808657, "grad_norm": 1.456713080406189, "learning_rate": 1.8480472945749796e-05, "loss": 0.3333, "step": 8205 }, { "epoch": 0.17788659458756745, "grad_norm": 1.3735326528549194, "learning_rate": 1.8478668894179135e-05, "loss": 0.3252, "step": 8210 }, { "epoch": 0.1779949299070483, "grad_norm": 1.554260015487671, "learning_rate": 1.8476863860481694e-05, "loss": 0.3361, "step": 8215 }, { "epoch": 0.17810326522652914, "grad_norm": 1.667463779449463, "learning_rate": 1.8475057844866557e-05, "loss": 0.4253, "step": 8220 }, { "epoch": 0.17821160054601, "grad_norm": 1.6065541505813599, "learning_rate": 1.8473250847542923e-05, "loss": 0.3892, "step": 8225 }, { "epoch": 0.17831993586549086, "grad_norm": 1.0865038633346558, "learning_rate": 1.8471442868720113e-05, "loss": 0.2776, "step": 8230 }, { "epoch": 0.17842827118497173, "grad_norm": 1.7498153448104858, "learning_rate": 1.8469633908607547e-05, "loss": 0.2584, "step": 8235 }, { "epoch": 0.17853660650445258, "grad_norm": 1.011205792427063, "learning_rate": 1.8467823967414763e-05, "loss": 0.399, "step": 8240 }, { "epoch": 0.17864494182393345, "grad_norm": 1.4616096019744873, "learning_rate": 1.8466013045351426e-05, "loss": 0.4251, "step": 8245 }, { "epoch": 0.1787532771434143, "grad_norm": 1.2567108869552612, "learning_rate": 1.84642011426273e-05, "loss": 0.3038, "step": 8250 }, { "epoch": 0.17886161246289514, "grad_norm": 1.147872805595398, "learning_rate": 1.846238825945226e-05, "loss": 0.411, "step": 8255 }, { "epoch": 0.17896994778237602, "grad_norm": 1.004780888557434, "learning_rate": 1.846057439603631e-05, "loss": 0.3797, "step": 8260 }, { "epoch": 0.17907828310185686, "grad_norm": 1.0835098028182983, "learning_rate": 1.845875955258955e-05, "loss": 0.3647, "step": 8265 }, { "epoch": 0.17918661842133773, "grad_norm": 1.4067872762680054, "learning_rate": 1.8456943729322216e-05, "loss": 0.2832, "step": 8270 }, { "epoch": 0.17929495374081858, "grad_norm": 1.2896721363067627, "learning_rate": 1.845512692644463e-05, "loss": 0.3183, "step": 8275 }, { "epoch": 0.17940328906029943, "grad_norm": 1.4297661781311035, "learning_rate": 1.845330914416725e-05, "loss": 0.3252, "step": 8280 }, { "epoch": 0.1795116243797803, "grad_norm": 2.0003180503845215, "learning_rate": 1.8451490382700636e-05, "loss": 0.3414, "step": 8285 }, { "epoch": 0.17961995969926114, "grad_norm": 1.16204035282135, "learning_rate": 1.8449670642255463e-05, "loss": 0.3915, "step": 8290 }, { "epoch": 0.17972829501874202, "grad_norm": 1.1583224534988403, "learning_rate": 1.8447849923042523e-05, "loss": 0.2684, "step": 8295 }, { "epoch": 0.17983663033822286, "grad_norm": 1.2136950492858887, "learning_rate": 1.8446028225272725e-05, "loss": 0.3004, "step": 8300 }, { "epoch": 0.17994496565770374, "grad_norm": 1.0938076972961426, "learning_rate": 1.844420554915707e-05, "loss": 0.3421, "step": 8305 }, { "epoch": 0.18005330097718458, "grad_norm": 1.2138351202011108, "learning_rate": 1.8442381894906697e-05, "loss": 0.3398, "step": 8310 }, { "epoch": 0.18016163629666543, "grad_norm": 1.191360354423523, "learning_rate": 1.8440557262732852e-05, "loss": 0.3142, "step": 8315 }, { "epoch": 0.1802699716161463, "grad_norm": 1.6273826360702515, "learning_rate": 1.8438731652846885e-05, "loss": 0.3483, "step": 8320 }, { "epoch": 0.18037830693562715, "grad_norm": 1.5727401971817017, "learning_rate": 1.8436905065460268e-05, "loss": 0.3697, "step": 8325 }, { "epoch": 0.18048664225510802, "grad_norm": 1.1953037977218628, "learning_rate": 1.8435077500784584e-05, "loss": 0.3525, "step": 8330 }, { "epoch": 0.18059497757458887, "grad_norm": 1.036938190460205, "learning_rate": 1.8433248959031533e-05, "loss": 0.3238, "step": 8335 }, { "epoch": 0.1807033128940697, "grad_norm": 1.518314003944397, "learning_rate": 1.8431419440412917e-05, "loss": 0.402, "step": 8340 }, { "epoch": 0.18081164821355059, "grad_norm": 1.0967057943344116, "learning_rate": 1.8429588945140658e-05, "loss": 0.3616, "step": 8345 }, { "epoch": 0.18091998353303143, "grad_norm": 1.130558967590332, "learning_rate": 1.8427757473426798e-05, "loss": 0.3536, "step": 8350 }, { "epoch": 0.1810283188525123, "grad_norm": 1.5439825057983398, "learning_rate": 1.8425925025483485e-05, "loss": 0.2806, "step": 8355 }, { "epoch": 0.18113665417199315, "grad_norm": 1.0932369232177734, "learning_rate": 1.8424091601522976e-05, "loss": 0.3428, "step": 8360 }, { "epoch": 0.18124498949147402, "grad_norm": 1.3090593814849854, "learning_rate": 1.8422257201757648e-05, "loss": 0.3746, "step": 8365 }, { "epoch": 0.18135332481095487, "grad_norm": 0.7636069655418396, "learning_rate": 1.842042182639999e-05, "loss": 0.2837, "step": 8370 }, { "epoch": 0.18146166013043571, "grad_norm": 1.6064319610595703, "learning_rate": 1.84185854756626e-05, "loss": 0.3488, "step": 8375 }, { "epoch": 0.1815699954499166, "grad_norm": 2.0138792991638184, "learning_rate": 1.8416748149758194e-05, "loss": 0.4519, "step": 8380 }, { "epoch": 0.18167833076939743, "grad_norm": 1.3128708600997925, "learning_rate": 1.8414909848899595e-05, "loss": 0.3229, "step": 8385 }, { "epoch": 0.1817866660888783, "grad_norm": 1.630936622619629, "learning_rate": 1.8413070573299745e-05, "loss": 0.3143, "step": 8390 }, { "epoch": 0.18189500140835915, "grad_norm": 1.3136988878250122, "learning_rate": 1.8411230323171702e-05, "loss": 0.3863, "step": 8395 }, { "epoch": 0.18200333672784, "grad_norm": 1.8015856742858887, "learning_rate": 1.840938909872862e-05, "loss": 0.2458, "step": 8400 }, { "epoch": 0.18211167204732087, "grad_norm": 1.4772480726242065, "learning_rate": 1.8407546900183786e-05, "loss": 0.3588, "step": 8405 }, { "epoch": 0.18222000736680172, "grad_norm": 1.4880211353302002, "learning_rate": 1.840570372775059e-05, "loss": 0.3573, "step": 8410 }, { "epoch": 0.1823283426862826, "grad_norm": 1.184537649154663, "learning_rate": 1.8403859581642532e-05, "loss": 0.3278, "step": 8415 }, { "epoch": 0.18243667800576344, "grad_norm": 1.2981280088424683, "learning_rate": 1.8402014462073236e-05, "loss": 0.303, "step": 8420 }, { "epoch": 0.1825450133252443, "grad_norm": 1.529929280281067, "learning_rate": 1.840016836925642e-05, "loss": 0.3084, "step": 8425 }, { "epoch": 0.18265334864472516, "grad_norm": 1.305190086364746, "learning_rate": 1.839832130340594e-05, "loss": 0.3337, "step": 8430 }, { "epoch": 0.182761683964206, "grad_norm": 1.2146862745285034, "learning_rate": 1.839647326473574e-05, "loss": 0.4183, "step": 8435 }, { "epoch": 0.18287001928368687, "grad_norm": 1.4840514659881592, "learning_rate": 1.8394624253459896e-05, "loss": 0.403, "step": 8440 }, { "epoch": 0.18297835460316772, "grad_norm": 1.3279337882995605, "learning_rate": 1.8392774269792577e-05, "loss": 0.2564, "step": 8445 }, { "epoch": 0.1830866899226486, "grad_norm": 1.2096267938613892, "learning_rate": 1.8390923313948086e-05, "loss": 0.3328, "step": 8450 }, { "epoch": 0.18319502524212944, "grad_norm": 1.292564034461975, "learning_rate": 1.8389071386140823e-05, "loss": 0.295, "step": 8455 }, { "epoch": 0.18330336056161028, "grad_norm": 1.698944091796875, "learning_rate": 1.8387218486585312e-05, "loss": 0.4663, "step": 8460 }, { "epoch": 0.18341169588109116, "grad_norm": 1.4272124767303467, "learning_rate": 1.8385364615496176e-05, "loss": 0.4387, "step": 8465 }, { "epoch": 0.183520031200572, "grad_norm": 1.1291377544403076, "learning_rate": 1.8383509773088163e-05, "loss": 0.377, "step": 8470 }, { "epoch": 0.18362836652005288, "grad_norm": 1.4203673601150513, "learning_rate": 1.838165395957613e-05, "loss": 0.329, "step": 8475 }, { "epoch": 0.18373670183953372, "grad_norm": 1.5062999725341797, "learning_rate": 1.837979717517504e-05, "loss": 0.4839, "step": 8480 }, { "epoch": 0.18384503715901457, "grad_norm": 1.6851328611373901, "learning_rate": 1.837793942009998e-05, "loss": 0.2873, "step": 8485 }, { "epoch": 0.18395337247849544, "grad_norm": 0.9005063772201538, "learning_rate": 1.8376080694566136e-05, "loss": 0.3181, "step": 8490 }, { "epoch": 0.1840617077979763, "grad_norm": 1.4241564273834229, "learning_rate": 1.837422099878882e-05, "loss": 0.3981, "step": 8495 }, { "epoch": 0.18417004311745716, "grad_norm": 1.7467201948165894, "learning_rate": 1.8372360332983445e-05, "loss": 0.371, "step": 8500 }, { "epoch": 0.184278378436938, "grad_norm": 1.3656237125396729, "learning_rate": 1.8370498697365543e-05, "loss": 0.3394, "step": 8505 }, { "epoch": 0.18438671375641888, "grad_norm": 1.2608164548873901, "learning_rate": 1.836863609215076e-05, "loss": 0.4047, "step": 8510 }, { "epoch": 0.18449504907589973, "grad_norm": 1.2431319952011108, "learning_rate": 1.8366772517554846e-05, "loss": 0.3486, "step": 8515 }, { "epoch": 0.18460338439538057, "grad_norm": 1.2714489698410034, "learning_rate": 1.836490797379367e-05, "loss": 0.3025, "step": 8520 }, { "epoch": 0.18471171971486144, "grad_norm": 0.8307031989097595, "learning_rate": 1.836304246108321e-05, "loss": 0.3318, "step": 8525 }, { "epoch": 0.1848200550343423, "grad_norm": 1.3990426063537598, "learning_rate": 1.8361175979639565e-05, "loss": 0.4088, "step": 8530 }, { "epoch": 0.18492839035382316, "grad_norm": 1.0590153932571411, "learning_rate": 1.835930852967893e-05, "loss": 0.39, "step": 8535 }, { "epoch": 0.185036725673304, "grad_norm": 1.5742624998092651, "learning_rate": 1.835744011141762e-05, "loss": 0.4926, "step": 8540 }, { "epoch": 0.18514506099278485, "grad_norm": 1.8589404821395874, "learning_rate": 1.835557072507207e-05, "loss": 0.3416, "step": 8545 }, { "epoch": 0.18525339631226573, "grad_norm": 1.778412938117981, "learning_rate": 1.8353700370858824e-05, "loss": 0.457, "step": 8550 }, { "epoch": 0.18536173163174657, "grad_norm": 1.2947096824645996, "learning_rate": 1.835182904899452e-05, "loss": 0.2366, "step": 8555 }, { "epoch": 0.18547006695122745, "grad_norm": 1.3748422861099243, "learning_rate": 1.8349956759695934e-05, "loss": 0.4262, "step": 8560 }, { "epoch": 0.1855784022707083, "grad_norm": 1.1623703241348267, "learning_rate": 1.834808350317994e-05, "loss": 0.4609, "step": 8565 }, { "epoch": 0.18568673759018917, "grad_norm": 1.1039766073226929, "learning_rate": 1.834620927966353e-05, "loss": 0.2463, "step": 8570 }, { "epoch": 0.18579507290967, "grad_norm": 1.5110113620758057, "learning_rate": 1.8344334089363798e-05, "loss": 0.2987, "step": 8575 }, { "epoch": 0.18590340822915086, "grad_norm": 1.856052041053772, "learning_rate": 1.834245793249796e-05, "loss": 0.4227, "step": 8580 }, { "epoch": 0.18601174354863173, "grad_norm": 1.5997782945632935, "learning_rate": 1.8340580809283335e-05, "loss": 0.4253, "step": 8585 }, { "epoch": 0.18612007886811258, "grad_norm": 1.531233310699463, "learning_rate": 1.8338702719937375e-05, "loss": 0.3308, "step": 8590 }, { "epoch": 0.18622841418759345, "grad_norm": 1.459344506263733, "learning_rate": 1.8336823664677613e-05, "loss": 0.3361, "step": 8595 }, { "epoch": 0.1863367495070743, "grad_norm": 1.6922180652618408, "learning_rate": 1.8334943643721717e-05, "loss": 0.3362, "step": 8600 }, { "epoch": 0.18644508482655514, "grad_norm": 1.0174756050109863, "learning_rate": 1.833306265728746e-05, "loss": 0.3206, "step": 8605 }, { "epoch": 0.18655342014603601, "grad_norm": 1.1378495693206787, "learning_rate": 1.8331180705592716e-05, "loss": 0.3139, "step": 8610 }, { "epoch": 0.18666175546551686, "grad_norm": 1.3872590065002441, "learning_rate": 1.832929778885549e-05, "loss": 0.4148, "step": 8615 }, { "epoch": 0.18677009078499773, "grad_norm": 1.625856876373291, "learning_rate": 1.8327413907293892e-05, "loss": 0.4, "step": 8620 }, { "epoch": 0.18687842610447858, "grad_norm": 1.6161555051803589, "learning_rate": 1.8325529061126138e-05, "loss": 0.3549, "step": 8625 }, { "epoch": 0.18698676142395945, "grad_norm": 1.0955020189285278, "learning_rate": 1.8323643250570553e-05, "loss": 0.2301, "step": 8630 }, { "epoch": 0.1870950967434403, "grad_norm": 1.5258166790008545, "learning_rate": 1.832175647584559e-05, "loss": 0.3826, "step": 8635 }, { "epoch": 0.18720343206292114, "grad_norm": 1.4575304985046387, "learning_rate": 1.83198687371698e-05, "loss": 0.499, "step": 8640 }, { "epoch": 0.18731176738240202, "grad_norm": 1.411298394203186, "learning_rate": 1.8317980034761844e-05, "loss": 0.3109, "step": 8645 }, { "epoch": 0.18742010270188286, "grad_norm": 1.4506624937057495, "learning_rate": 1.831609036884051e-05, "loss": 0.3117, "step": 8650 }, { "epoch": 0.18752843802136374, "grad_norm": 1.0881104469299316, "learning_rate": 1.8314199739624676e-05, "loss": 0.2533, "step": 8655 }, { "epoch": 0.18763677334084458, "grad_norm": 1.1979008913040161, "learning_rate": 1.8312308147333354e-05, "loss": 0.3497, "step": 8660 }, { "epoch": 0.18774510866032543, "grad_norm": 1.4110920429229736, "learning_rate": 1.8310415592185647e-05, "loss": 0.4125, "step": 8665 }, { "epoch": 0.1878534439798063, "grad_norm": 1.1865278482437134, "learning_rate": 1.830852207440079e-05, "loss": 0.3957, "step": 8670 }, { "epoch": 0.18796177929928715, "grad_norm": 1.239516258239746, "learning_rate": 1.8306627594198104e-05, "loss": 0.3353, "step": 8675 }, { "epoch": 0.18807011461876802, "grad_norm": 1.620377540588379, "learning_rate": 1.8304732151797048e-05, "loss": 0.4112, "step": 8680 }, { "epoch": 0.18817844993824887, "grad_norm": 1.917327880859375, "learning_rate": 1.830283574741718e-05, "loss": 0.325, "step": 8685 }, { "epoch": 0.18828678525772974, "grad_norm": 1.125401258468628, "learning_rate": 1.8300938381278163e-05, "loss": 0.2795, "step": 8690 }, { "epoch": 0.18839512057721058, "grad_norm": 1.2618268728256226, "learning_rate": 1.8299040053599786e-05, "loss": 0.3797, "step": 8695 }, { "epoch": 0.18850345589669143, "grad_norm": 1.7813630104064941, "learning_rate": 1.8297140764601934e-05, "loss": 0.3844, "step": 8700 }, { "epoch": 0.1886117912161723, "grad_norm": 1.564283013343811, "learning_rate": 1.829524051450462e-05, "loss": 0.3253, "step": 8705 }, { "epoch": 0.18872012653565315, "grad_norm": 1.5019862651824951, "learning_rate": 1.8293339303527955e-05, "loss": 0.44, "step": 8710 }, { "epoch": 0.18882846185513402, "grad_norm": 1.5603715181350708, "learning_rate": 1.8291437131892165e-05, "loss": 0.3547, "step": 8715 }, { "epoch": 0.18893679717461487, "grad_norm": 0.8317362070083618, "learning_rate": 1.8289533999817588e-05, "loss": 0.3396, "step": 8720 }, { "epoch": 0.1890451324940957, "grad_norm": 1.8316839933395386, "learning_rate": 1.8287629907524673e-05, "loss": 0.4219, "step": 8725 }, { "epoch": 0.1891534678135766, "grad_norm": 1.5226292610168457, "learning_rate": 1.8285724855233984e-05, "loss": 0.3851, "step": 8730 }, { "epoch": 0.18926180313305743, "grad_norm": 1.2370901107788086, "learning_rate": 1.828381884316619e-05, "loss": 0.3456, "step": 8735 }, { "epoch": 0.1893701384525383, "grad_norm": 1.2740970849990845, "learning_rate": 1.8281911871542075e-05, "loss": 0.3932, "step": 8740 }, { "epoch": 0.18947847377201915, "grad_norm": 1.3298165798187256, "learning_rate": 1.828000394058253e-05, "loss": 0.4616, "step": 8745 }, { "epoch": 0.1895868090915, "grad_norm": 1.2906025648117065, "learning_rate": 1.8278095050508568e-05, "loss": 0.3666, "step": 8750 }, { "epoch": 0.18969514441098087, "grad_norm": 1.5751162767410278, "learning_rate": 1.82761852015413e-05, "loss": 0.4516, "step": 8755 }, { "epoch": 0.18980347973046172, "grad_norm": 1.287365436553955, "learning_rate": 1.827427439390195e-05, "loss": 0.3682, "step": 8760 }, { "epoch": 0.1899118150499426, "grad_norm": 1.2792024612426758, "learning_rate": 1.827236262781186e-05, "loss": 0.3796, "step": 8765 }, { "epoch": 0.19002015036942344, "grad_norm": 1.303788423538208, "learning_rate": 1.8270449903492482e-05, "loss": 0.4268, "step": 8770 }, { "epoch": 0.1901284856889043, "grad_norm": 1.310887336730957, "learning_rate": 1.8268536221165373e-05, "loss": 0.4894, "step": 8775 }, { "epoch": 0.19023682100838515, "grad_norm": 1.691768765449524, "learning_rate": 1.8266621581052204e-05, "loss": 0.3837, "step": 8780 }, { "epoch": 0.190345156327866, "grad_norm": 1.4069315195083618, "learning_rate": 1.826470598337476e-05, "loss": 0.4065, "step": 8785 }, { "epoch": 0.19045349164734687, "grad_norm": 1.6430469751358032, "learning_rate": 1.8262789428354937e-05, "loss": 0.2815, "step": 8790 }, { "epoch": 0.19056182696682772, "grad_norm": 1.4866154193878174, "learning_rate": 1.826087191621473e-05, "loss": 0.4513, "step": 8795 }, { "epoch": 0.1906701622863086, "grad_norm": 1.2910865545272827, "learning_rate": 1.8258953447176263e-05, "loss": 0.471, "step": 8800 }, { "epoch": 0.19077849760578944, "grad_norm": 1.4214445352554321, "learning_rate": 1.8257034021461756e-05, "loss": 0.4538, "step": 8805 }, { "epoch": 0.19088683292527028, "grad_norm": 1.2995946407318115, "learning_rate": 1.8255113639293546e-05, "loss": 0.3449, "step": 8810 }, { "epoch": 0.19099516824475116, "grad_norm": 1.383775234222412, "learning_rate": 1.8253192300894084e-05, "loss": 0.458, "step": 8815 }, { "epoch": 0.191103503564232, "grad_norm": 1.3040348291397095, "learning_rate": 1.825127000648593e-05, "loss": 0.4138, "step": 8820 }, { "epoch": 0.19121183888371288, "grad_norm": 1.2593010663986206, "learning_rate": 1.824934675629175e-05, "loss": 0.3569, "step": 8825 }, { "epoch": 0.19132017420319372, "grad_norm": 1.1368883848190308, "learning_rate": 1.8247422550534317e-05, "loss": 0.3246, "step": 8830 }, { "epoch": 0.1914285095226746, "grad_norm": 1.2186447381973267, "learning_rate": 1.8245497389436532e-05, "loss": 0.3767, "step": 8835 }, { "epoch": 0.19153684484215544, "grad_norm": 1.1128630638122559, "learning_rate": 1.8243571273221394e-05, "loss": 0.325, "step": 8840 }, { "epoch": 0.19164518016163629, "grad_norm": 1.2207444906234741, "learning_rate": 1.8241644202112007e-05, "loss": 0.3574, "step": 8845 }, { "epoch": 0.19175351548111716, "grad_norm": 1.5787330865859985, "learning_rate": 1.8239716176331604e-05, "loss": 0.4541, "step": 8850 }, { "epoch": 0.191861850800598, "grad_norm": 1.50628662109375, "learning_rate": 1.823778719610351e-05, "loss": 0.3797, "step": 8855 }, { "epoch": 0.19197018612007888, "grad_norm": 1.5977801084518433, "learning_rate": 1.8235857261651176e-05, "loss": 0.376, "step": 8860 }, { "epoch": 0.19207852143955972, "grad_norm": 1.7429208755493164, "learning_rate": 1.8233926373198145e-05, "loss": 0.4263, "step": 8865 }, { "epoch": 0.19218685675904057, "grad_norm": 1.7578290700912476, "learning_rate": 1.8231994530968093e-05, "loss": 0.3957, "step": 8870 }, { "epoch": 0.19229519207852144, "grad_norm": 1.0809487104415894, "learning_rate": 1.8230061735184788e-05, "loss": 0.3056, "step": 8875 }, { "epoch": 0.1924035273980023, "grad_norm": 1.303123116493225, "learning_rate": 1.8228127986072114e-05, "loss": 0.3972, "step": 8880 }, { "epoch": 0.19251186271748316, "grad_norm": 1.5853570699691772, "learning_rate": 1.8226193283854076e-05, "loss": 0.3581, "step": 8885 }, { "epoch": 0.192620198036964, "grad_norm": 1.5146377086639404, "learning_rate": 1.8224257628754773e-05, "loss": 0.432, "step": 8890 }, { "epoch": 0.19272853335644488, "grad_norm": 1.125623106956482, "learning_rate": 1.8222321020998422e-05, "loss": 0.3479, "step": 8895 }, { "epoch": 0.19283686867592573, "grad_norm": 1.064041018486023, "learning_rate": 1.8220383460809348e-05, "loss": 0.2622, "step": 8900 }, { "epoch": 0.19294520399540657, "grad_norm": 1.7473469972610474, "learning_rate": 1.8218444948411995e-05, "loss": 0.3587, "step": 8905 }, { "epoch": 0.19305353931488745, "grad_norm": 1.3232474327087402, "learning_rate": 1.8216505484030907e-05, "loss": 0.4291, "step": 8910 }, { "epoch": 0.1931618746343683, "grad_norm": 1.1632760763168335, "learning_rate": 1.821456506789074e-05, "loss": 0.3705, "step": 8915 }, { "epoch": 0.19327020995384916, "grad_norm": 0.9911452531814575, "learning_rate": 1.8212623700216264e-05, "loss": 0.4267, "step": 8920 }, { "epoch": 0.19337854527333, "grad_norm": 2.4771299362182617, "learning_rate": 1.821068138123236e-05, "loss": 0.3518, "step": 8925 }, { "epoch": 0.19348688059281086, "grad_norm": 1.0960181951522827, "learning_rate": 1.8208738111164016e-05, "loss": 0.2539, "step": 8930 }, { "epoch": 0.19359521591229173, "grad_norm": 1.2882250547409058, "learning_rate": 1.8206793890236324e-05, "loss": 0.3366, "step": 8935 }, { "epoch": 0.19370355123177257, "grad_norm": 1.7791731357574463, "learning_rate": 1.82048487186745e-05, "loss": 0.3539, "step": 8940 }, { "epoch": 0.19381188655125345, "grad_norm": 1.5817070007324219, "learning_rate": 1.8202902596703856e-05, "loss": 0.4369, "step": 8945 }, { "epoch": 0.1939202218707343, "grad_norm": 1.1270607709884644, "learning_rate": 1.820095552454983e-05, "loss": 0.3393, "step": 8950 }, { "epoch": 0.19402855719021517, "grad_norm": 1.3946881294250488, "learning_rate": 1.8199007502437958e-05, "loss": 0.4193, "step": 8955 }, { "epoch": 0.194136892509696, "grad_norm": 1.496543049812317, "learning_rate": 1.8197058530593884e-05, "loss": 0.3896, "step": 8960 }, { "epoch": 0.19424522782917686, "grad_norm": 1.724069356918335, "learning_rate": 1.8195108609243375e-05, "loss": 0.3042, "step": 8965 }, { "epoch": 0.19435356314865773, "grad_norm": 1.8363571166992188, "learning_rate": 1.8193157738612293e-05, "loss": 0.3355, "step": 8970 }, { "epoch": 0.19446189846813858, "grad_norm": 1.3993406295776367, "learning_rate": 1.8191205918926624e-05, "loss": 0.4418, "step": 8975 }, { "epoch": 0.19457023378761945, "grad_norm": 1.7535232305526733, "learning_rate": 1.818925315041245e-05, "loss": 0.4045, "step": 8980 }, { "epoch": 0.1946785691071003, "grad_norm": 1.6517096757888794, "learning_rate": 1.8187299433295976e-05, "loss": 0.3105, "step": 8985 }, { "epoch": 0.19478690442658114, "grad_norm": 1.2180994749069214, "learning_rate": 1.8185344767803505e-05, "loss": 0.3839, "step": 8990 }, { "epoch": 0.19489523974606202, "grad_norm": 1.462127447128296, "learning_rate": 1.8183389154161463e-05, "loss": 0.3768, "step": 8995 }, { "epoch": 0.19500357506554286, "grad_norm": 1.3809083700180054, "learning_rate": 1.8181432592596372e-05, "loss": 0.3227, "step": 9000 }, { "epoch": 0.19511191038502373, "grad_norm": 2.048654317855835, "learning_rate": 1.8179475083334875e-05, "loss": 0.4555, "step": 9005 }, { "epoch": 0.19522024570450458, "grad_norm": 1.425752878189087, "learning_rate": 1.8177516626603716e-05, "loss": 0.3835, "step": 9010 }, { "epoch": 0.19532858102398543, "grad_norm": 1.6683518886566162, "learning_rate": 1.8175557222629757e-05, "loss": 0.3813, "step": 9015 }, { "epoch": 0.1954369163434663, "grad_norm": 1.6648999452590942, "learning_rate": 1.8173596871639963e-05, "loss": 0.3618, "step": 9020 }, { "epoch": 0.19554525166294714, "grad_norm": 1.5828500986099243, "learning_rate": 1.8171635573861413e-05, "loss": 0.5097, "step": 9025 }, { "epoch": 0.19565358698242802, "grad_norm": 1.2832062244415283, "learning_rate": 1.816967332952129e-05, "loss": 0.3223, "step": 9030 }, { "epoch": 0.19576192230190886, "grad_norm": 0.9702895283699036, "learning_rate": 1.8167710138846897e-05, "loss": 0.404, "step": 9035 }, { "epoch": 0.19587025762138974, "grad_norm": 1.2182244062423706, "learning_rate": 1.8165746002065633e-05, "loss": 0.3351, "step": 9040 }, { "epoch": 0.19597859294087058, "grad_norm": 1.5754585266113281, "learning_rate": 1.816378091940502e-05, "loss": 0.3396, "step": 9045 }, { "epoch": 0.19608692826035143, "grad_norm": 1.2268459796905518, "learning_rate": 1.8161814891092682e-05, "loss": 0.2268, "step": 9050 }, { "epoch": 0.1961952635798323, "grad_norm": 1.2193976640701294, "learning_rate": 1.8159847917356347e-05, "loss": 0.3187, "step": 9055 }, { "epoch": 0.19630359889931315, "grad_norm": 1.371512770652771, "learning_rate": 1.815787999842387e-05, "loss": 0.3919, "step": 9060 }, { "epoch": 0.19641193421879402, "grad_norm": 1.0439364910125732, "learning_rate": 1.81559111345232e-05, "loss": 0.4333, "step": 9065 }, { "epoch": 0.19652026953827487, "grad_norm": 1.9749867916107178, "learning_rate": 1.81539413258824e-05, "loss": 0.3288, "step": 9070 }, { "epoch": 0.1966286048577557, "grad_norm": 0.9704071283340454, "learning_rate": 1.8151970572729645e-05, "loss": 0.3068, "step": 9075 }, { "epoch": 0.19673694017723659, "grad_norm": 1.0871124267578125, "learning_rate": 1.8149998875293214e-05, "loss": 0.3914, "step": 9080 }, { "epoch": 0.19684527549671743, "grad_norm": 1.0327136516571045, "learning_rate": 1.81480262338015e-05, "loss": 0.4369, "step": 9085 }, { "epoch": 0.1969536108161983, "grad_norm": 1.3218765258789062, "learning_rate": 1.8146052648483004e-05, "loss": 0.3654, "step": 9090 }, { "epoch": 0.19706194613567915, "grad_norm": 2.275744915008545, "learning_rate": 1.814407811956634e-05, "loss": 0.3493, "step": 9095 }, { "epoch": 0.19717028145516002, "grad_norm": 1.1549028158187866, "learning_rate": 1.814210264728022e-05, "loss": 0.3773, "step": 9100 }, { "epoch": 0.19727861677464087, "grad_norm": 1.8279668092727661, "learning_rate": 1.8140126231853477e-05, "loss": 0.3096, "step": 9105 }, { "epoch": 0.19738695209412171, "grad_norm": 1.773930549621582, "learning_rate": 1.8138148873515053e-05, "loss": 0.3794, "step": 9110 }, { "epoch": 0.1974952874136026, "grad_norm": 1.6157686710357666, "learning_rate": 1.813617057249399e-05, "loss": 0.2699, "step": 9115 }, { "epoch": 0.19760362273308343, "grad_norm": 1.5722036361694336, "learning_rate": 1.8134191329019444e-05, "loss": 0.2818, "step": 9120 }, { "epoch": 0.1977119580525643, "grad_norm": 1.5883617401123047, "learning_rate": 1.8132211143320684e-05, "loss": 0.363, "step": 9125 }, { "epoch": 0.19782029337204515, "grad_norm": 2.0634818077087402, "learning_rate": 1.813023001562708e-05, "loss": 0.3156, "step": 9130 }, { "epoch": 0.197928628691526, "grad_norm": 1.1296194791793823, "learning_rate": 1.8128247946168124e-05, "loss": 0.315, "step": 9135 }, { "epoch": 0.19803696401100687, "grad_norm": 0.9498031735420227, "learning_rate": 1.8126264935173405e-05, "loss": 0.294, "step": 9140 }, { "epoch": 0.19814529933048772, "grad_norm": 0.8694939017295837, "learning_rate": 1.8124280982872624e-05, "loss": 0.3637, "step": 9145 }, { "epoch": 0.1982536346499686, "grad_norm": 1.360059380531311, "learning_rate": 1.8122296089495594e-05, "loss": 0.2482, "step": 9150 }, { "epoch": 0.19836196996944944, "grad_norm": 1.2735414505004883, "learning_rate": 1.8120310255272227e-05, "loss": 0.4011, "step": 9155 }, { "epoch": 0.1984703052889303, "grad_norm": 1.2249900102615356, "learning_rate": 1.8118323480432566e-05, "loss": 0.3322, "step": 9160 }, { "epoch": 0.19857864060841116, "grad_norm": 1.642085075378418, "learning_rate": 1.811633576520674e-05, "loss": 0.418, "step": 9165 }, { "epoch": 0.198686975927892, "grad_norm": 1.1750739812850952, "learning_rate": 1.8114347109825e-05, "loss": 0.3291, "step": 9170 }, { "epoch": 0.19879531124737287, "grad_norm": 1.3552240133285522, "learning_rate": 1.8112357514517697e-05, "loss": 0.3509, "step": 9175 }, { "epoch": 0.19890364656685372, "grad_norm": 1.3716201782226562, "learning_rate": 1.8110366979515303e-05, "loss": 0.4089, "step": 9180 }, { "epoch": 0.1990119818863346, "grad_norm": 1.203421950340271, "learning_rate": 1.8108375505048385e-05, "loss": 0.3975, "step": 9185 }, { "epoch": 0.19912031720581544, "grad_norm": 1.6104203462600708, "learning_rate": 1.810638309134763e-05, "loss": 0.3405, "step": 9190 }, { "epoch": 0.19922865252529628, "grad_norm": 1.4127919673919678, "learning_rate": 1.8104389738643825e-05, "loss": 0.3282, "step": 9195 }, { "epoch": 0.19933698784477716, "grad_norm": 1.6407698392868042, "learning_rate": 1.8102395447167874e-05, "loss": 0.3672, "step": 9200 }, { "epoch": 0.199445323164258, "grad_norm": 1.0842140913009644, "learning_rate": 1.8100400217150788e-05, "loss": 0.2516, "step": 9205 }, { "epoch": 0.19955365848373888, "grad_norm": 1.9664489030838013, "learning_rate": 1.8098404048823674e-05, "loss": 0.397, "step": 9210 }, { "epoch": 0.19966199380321972, "grad_norm": 1.521257996559143, "learning_rate": 1.809640694241777e-05, "loss": 0.3113, "step": 9215 }, { "epoch": 0.1997703291227006, "grad_norm": 1.1844743490219116, "learning_rate": 1.8094408898164402e-05, "loss": 0.411, "step": 9220 }, { "epoch": 0.19987866444218144, "grad_norm": 1.2793406248092651, "learning_rate": 1.8092409916295022e-05, "loss": 0.3769, "step": 9225 }, { "epoch": 0.1999869997616623, "grad_norm": 1.7931042909622192, "learning_rate": 1.8090409997041174e-05, "loss": 0.4205, "step": 9230 }, { "epoch": 0.20009533508114316, "grad_norm": 1.4265422821044922, "learning_rate": 1.8088409140634523e-05, "loss": 0.4574, "step": 9235 }, { "epoch": 0.200203670400624, "grad_norm": 1.4384337663650513, "learning_rate": 1.808640734730684e-05, "loss": 0.3285, "step": 9240 }, { "epoch": 0.20031200572010488, "grad_norm": 1.4694468975067139, "learning_rate": 1.8084404617289995e-05, "loss": 0.3606, "step": 9245 }, { "epoch": 0.20042034103958573, "grad_norm": 1.407599687576294, "learning_rate": 1.8082400950815983e-05, "loss": 0.3199, "step": 9250 }, { "epoch": 0.20052867635906657, "grad_norm": 1.740141749382019, "learning_rate": 1.8080396348116894e-05, "loss": 0.3921, "step": 9255 }, { "epoch": 0.20063701167854744, "grad_norm": 1.5205103158950806, "learning_rate": 1.8078390809424934e-05, "loss": 0.4029, "step": 9260 }, { "epoch": 0.2007453469980283, "grad_norm": 1.3409225940704346, "learning_rate": 1.807638433497241e-05, "loss": 0.3076, "step": 9265 }, { "epoch": 0.20085368231750916, "grad_norm": 1.0538170337677002, "learning_rate": 1.8074376924991748e-05, "loss": 0.3793, "step": 9270 }, { "epoch": 0.20096201763699, "grad_norm": 1.647498607635498, "learning_rate": 1.807236857971547e-05, "loss": 0.4835, "step": 9275 }, { "epoch": 0.20107035295647088, "grad_norm": 1.9082945585250854, "learning_rate": 1.807035929937622e-05, "loss": 0.3875, "step": 9280 }, { "epoch": 0.20117868827595173, "grad_norm": 1.776515007019043, "learning_rate": 1.806834908420674e-05, "loss": 0.423, "step": 9285 }, { "epoch": 0.20128702359543257, "grad_norm": 1.018633484840393, "learning_rate": 1.8066337934439878e-05, "loss": 0.3337, "step": 9290 }, { "epoch": 0.20139535891491345, "grad_norm": 1.0136425495147705, "learning_rate": 1.80643258503086e-05, "loss": 0.3715, "step": 9295 }, { "epoch": 0.2015036942343943, "grad_norm": 1.5549334287643433, "learning_rate": 1.806231283204598e-05, "loss": 0.3995, "step": 9300 }, { "epoch": 0.20161202955387517, "grad_norm": 1.1586503982543945, "learning_rate": 1.806029887988519e-05, "loss": 0.3528, "step": 9305 }, { "epoch": 0.201720364873356, "grad_norm": 1.1336928606033325, "learning_rate": 1.8058283994059516e-05, "loss": 0.3825, "step": 9310 }, { "epoch": 0.20182870019283686, "grad_norm": 2.0066144466400146, "learning_rate": 1.8056268174802356e-05, "loss": 0.4068, "step": 9315 }, { "epoch": 0.20193703551231773, "grad_norm": 1.3486570119857788, "learning_rate": 1.8054251422347213e-05, "loss": 0.3176, "step": 9320 }, { "epoch": 0.20204537083179858, "grad_norm": 1.1247416734695435, "learning_rate": 1.805223373692769e-05, "loss": 0.3854, "step": 9325 }, { "epoch": 0.20215370615127945, "grad_norm": 1.5984147787094116, "learning_rate": 1.8050215118777516e-05, "loss": 0.3508, "step": 9330 }, { "epoch": 0.2022620414707603, "grad_norm": 1.211398959159851, "learning_rate": 1.804819556813051e-05, "loss": 0.3465, "step": 9335 }, { "epoch": 0.20237037679024114, "grad_norm": 1.369850993156433, "learning_rate": 1.804617508522061e-05, "loss": 0.2621, "step": 9340 }, { "epoch": 0.20247871210972201, "grad_norm": 1.2979991436004639, "learning_rate": 1.8044153670281858e-05, "loss": 0.3457, "step": 9345 }, { "epoch": 0.20258704742920286, "grad_norm": 1.1576082706451416, "learning_rate": 1.8042131323548408e-05, "loss": 0.3951, "step": 9350 }, { "epoch": 0.20269538274868373, "grad_norm": 1.3548227548599243, "learning_rate": 1.8040108045254513e-05, "loss": 0.3925, "step": 9355 }, { "epoch": 0.20280371806816458, "grad_norm": 0.9641069769859314, "learning_rate": 1.803808383563454e-05, "loss": 0.3256, "step": 9360 }, { "epoch": 0.20291205338764545, "grad_norm": 0.9511604309082031, "learning_rate": 1.8036058694922967e-05, "loss": 0.2569, "step": 9365 }, { "epoch": 0.2030203887071263, "grad_norm": 1.6366918087005615, "learning_rate": 1.8034032623354373e-05, "loss": 0.4097, "step": 9370 }, { "epoch": 0.20312872402660714, "grad_norm": 1.5247396230697632, "learning_rate": 1.803200562116345e-05, "loss": 0.318, "step": 9375 }, { "epoch": 0.20323705934608802, "grad_norm": 1.523963212966919, "learning_rate": 1.8029977688584998e-05, "loss": 0.2343, "step": 9380 }, { "epoch": 0.20334539466556886, "grad_norm": 1.4127154350280762, "learning_rate": 1.8027948825853917e-05, "loss": 0.3398, "step": 9385 }, { "epoch": 0.20345372998504974, "grad_norm": 1.1062722206115723, "learning_rate": 1.802591903320522e-05, "loss": 0.3132, "step": 9390 }, { "epoch": 0.20356206530453058, "grad_norm": 1.3897737264633179, "learning_rate": 1.8023888310874037e-05, "loss": 0.3847, "step": 9395 }, { "epoch": 0.20367040062401143, "grad_norm": 1.8612703084945679, "learning_rate": 1.8021856659095588e-05, "loss": 0.3451, "step": 9400 }, { "epoch": 0.2037787359434923, "grad_norm": 1.5163145065307617, "learning_rate": 1.8019824078105212e-05, "loss": 0.2708, "step": 9405 }, { "epoch": 0.20388707126297315, "grad_norm": 1.5509486198425293, "learning_rate": 1.8017790568138352e-05, "loss": 0.3761, "step": 9410 }, { "epoch": 0.20399540658245402, "grad_norm": 1.3508045673370361, "learning_rate": 1.8015756129430565e-05, "loss": 0.3679, "step": 9415 }, { "epoch": 0.20410374190193487, "grad_norm": 1.2869633436203003, "learning_rate": 1.8013720762217507e-05, "loss": 0.2972, "step": 9420 }, { "epoch": 0.20421207722141574, "grad_norm": 3.095237970352173, "learning_rate": 1.8011684466734943e-05, "loss": 0.2737, "step": 9425 }, { "epoch": 0.20432041254089658, "grad_norm": 1.3270845413208008, "learning_rate": 1.8009647243218748e-05, "loss": 0.3211, "step": 9430 }, { "epoch": 0.20442874786037743, "grad_norm": 1.5959534645080566, "learning_rate": 1.8007609091904906e-05, "loss": 0.3476, "step": 9435 }, { "epoch": 0.2045370831798583, "grad_norm": 1.3374178409576416, "learning_rate": 1.8005570013029502e-05, "loss": 0.3503, "step": 9440 }, { "epoch": 0.20464541849933915, "grad_norm": 1.6612153053283691, "learning_rate": 1.8003530006828736e-05, "loss": 0.3963, "step": 9445 }, { "epoch": 0.20475375381882002, "grad_norm": 1.7032060623168945, "learning_rate": 1.8001489073538913e-05, "loss": 0.353, "step": 9450 }, { "epoch": 0.20486208913830087, "grad_norm": 1.6817985773086548, "learning_rate": 1.799944721339644e-05, "loss": 0.285, "step": 9455 }, { "epoch": 0.2049704244577817, "grad_norm": 1.2975215911865234, "learning_rate": 1.7997404426637843e-05, "loss": 0.2987, "step": 9460 }, { "epoch": 0.2050787597772626, "grad_norm": 1.5696574449539185, "learning_rate": 1.7995360713499742e-05, "loss": 0.3436, "step": 9465 }, { "epoch": 0.20518709509674343, "grad_norm": 1.6089608669281006, "learning_rate": 1.7993316074218873e-05, "loss": 0.3128, "step": 9470 }, { "epoch": 0.2052954304162243, "grad_norm": 1.3218001127243042, "learning_rate": 1.7991270509032076e-05, "loss": 0.3192, "step": 9475 }, { "epoch": 0.20540376573570515, "grad_norm": 1.1467128992080688, "learning_rate": 1.79892240181763e-05, "loss": 0.3578, "step": 9480 }, { "epoch": 0.20551210105518603, "grad_norm": 0.7603650093078613, "learning_rate": 1.7987176601888602e-05, "loss": 0.283, "step": 9485 }, { "epoch": 0.20562043637466687, "grad_norm": 1.423450231552124, "learning_rate": 1.7985128260406143e-05, "loss": 0.3199, "step": 9490 }, { "epoch": 0.20572877169414772, "grad_norm": 1.2474733591079712, "learning_rate": 1.798307899396619e-05, "loss": 0.3496, "step": 9495 }, { "epoch": 0.2058371070136286, "grad_norm": 1.7606542110443115, "learning_rate": 1.798102880280612e-05, "loss": 0.3497, "step": 9500 }, { "epoch": 0.20594544233310944, "grad_norm": 1.0408180952072144, "learning_rate": 1.7978977687163426e-05, "loss": 0.3458, "step": 9505 }, { "epoch": 0.2060537776525903, "grad_norm": 2.754063606262207, "learning_rate": 1.7976925647275686e-05, "loss": 0.3279, "step": 9510 }, { "epoch": 0.20616211297207115, "grad_norm": 2.229678153991699, "learning_rate": 1.7974872683380608e-05, "loss": 0.3605, "step": 9515 }, { "epoch": 0.206270448291552, "grad_norm": 2.490037441253662, "learning_rate": 1.7972818795715986e-05, "loss": 0.3439, "step": 9520 }, { "epoch": 0.20637878361103287, "grad_norm": 1.2323416471481323, "learning_rate": 1.7970763984519747e-05, "loss": 0.3347, "step": 9525 }, { "epoch": 0.20648711893051372, "grad_norm": 1.508428692817688, "learning_rate": 1.79687082500299e-05, "loss": 0.4649, "step": 9530 }, { "epoch": 0.2065954542499946, "grad_norm": 1.5089102983474731, "learning_rate": 1.7966651592484572e-05, "loss": 0.4111, "step": 9535 }, { "epoch": 0.20670378956947544, "grad_norm": 1.5792217254638672, "learning_rate": 1.7964594012122e-05, "loss": 0.3802, "step": 9540 }, { "epoch": 0.2068121248889563, "grad_norm": 1.8466887474060059, "learning_rate": 1.796253550918052e-05, "loss": 0.3773, "step": 9545 }, { "epoch": 0.20692046020843716, "grad_norm": 1.2663402557373047, "learning_rate": 1.796047608389858e-05, "loss": 0.3028, "step": 9550 }, { "epoch": 0.207028795527918, "grad_norm": 1.193809151649475, "learning_rate": 1.7958415736514733e-05, "loss": 0.2396, "step": 9555 }, { "epoch": 0.20713713084739888, "grad_norm": 1.481972575187683, "learning_rate": 1.795635446726764e-05, "loss": 0.3838, "step": 9560 }, { "epoch": 0.20724546616687972, "grad_norm": 1.6031981706619263, "learning_rate": 1.7954292276396073e-05, "loss": 0.4093, "step": 9565 }, { "epoch": 0.2073538014863606, "grad_norm": 1.4254652261734009, "learning_rate": 1.7952229164138895e-05, "loss": 0.3348, "step": 9570 }, { "epoch": 0.20746213680584144, "grad_norm": 1.1751089096069336, "learning_rate": 1.7950165130735094e-05, "loss": 0.3668, "step": 9575 }, { "epoch": 0.2075704721253223, "grad_norm": 0.9846240878105164, "learning_rate": 1.7948100176423758e-05, "loss": 0.429, "step": 9580 }, { "epoch": 0.20767880744480316, "grad_norm": 1.3315677642822266, "learning_rate": 1.7946034301444078e-05, "loss": 0.2795, "step": 9585 }, { "epoch": 0.207787142764284, "grad_norm": 1.3344067335128784, "learning_rate": 1.7943967506035354e-05, "loss": 0.3483, "step": 9590 }, { "epoch": 0.20789547808376488, "grad_norm": 1.1384634971618652, "learning_rate": 1.7941899790436997e-05, "loss": 0.2535, "step": 9595 }, { "epoch": 0.20800381340324572, "grad_norm": 1.8867225646972656, "learning_rate": 1.7939831154888518e-05, "loss": 0.3492, "step": 9600 }, { "epoch": 0.20811214872272657, "grad_norm": 1.202655553817749, "learning_rate": 1.793776159962954e-05, "loss": 0.3888, "step": 9605 }, { "epoch": 0.20822048404220744, "grad_norm": 1.5882394313812256, "learning_rate": 1.7935691124899786e-05, "loss": 0.3194, "step": 9610 }, { "epoch": 0.2083288193616883, "grad_norm": 1.71647047996521, "learning_rate": 1.7933619730939095e-05, "loss": 0.4694, "step": 9615 }, { "epoch": 0.20843715468116916, "grad_norm": 2.1741862297058105, "learning_rate": 1.79315474179874e-05, "loss": 0.427, "step": 9620 }, { "epoch": 0.20854549000065, "grad_norm": 1.4154726266860962, "learning_rate": 1.7929474186284755e-05, "loss": 0.4272, "step": 9625 }, { "epoch": 0.20865382532013088, "grad_norm": 1.1000367403030396, "learning_rate": 1.7927400036071305e-05, "loss": 0.4376, "step": 9630 }, { "epoch": 0.20876216063961173, "grad_norm": 1.1388063430786133, "learning_rate": 1.7925324967587316e-05, "loss": 0.2778, "step": 9635 }, { "epoch": 0.20887049595909257, "grad_norm": 1.1154565811157227, "learning_rate": 1.7923248981073153e-05, "loss": 0.3826, "step": 9640 }, { "epoch": 0.20897883127857345, "grad_norm": 1.5929756164550781, "learning_rate": 1.792117207676928e-05, "loss": 0.4809, "step": 9645 }, { "epoch": 0.2090871665980543, "grad_norm": 1.6467769145965576, "learning_rate": 1.7919094254916286e-05, "loss": 0.3062, "step": 9650 }, { "epoch": 0.20919550191753516, "grad_norm": 1.6739546060562134, "learning_rate": 1.7917015515754847e-05, "loss": 0.5051, "step": 9655 }, { "epoch": 0.209303837237016, "grad_norm": 1.2356232404708862, "learning_rate": 1.791493585952576e-05, "loss": 0.2925, "step": 9660 }, { "epoch": 0.20941217255649686, "grad_norm": 1.6323838233947754, "learning_rate": 1.791285528646992e-05, "loss": 0.3936, "step": 9665 }, { "epoch": 0.20952050787597773, "grad_norm": 1.5227359533309937, "learning_rate": 1.7910773796828326e-05, "loss": 0.2658, "step": 9670 }, { "epoch": 0.20962884319545858, "grad_norm": 1.4594416618347168, "learning_rate": 1.7908691390842095e-05, "loss": 0.2551, "step": 9675 }, { "epoch": 0.20973717851493945, "grad_norm": 1.669189214706421, "learning_rate": 1.7906608068752435e-05, "loss": 0.2924, "step": 9680 }, { "epoch": 0.2098455138344203, "grad_norm": 4.714184284210205, "learning_rate": 1.7904523830800673e-05, "loss": 0.394, "step": 9685 }, { "epoch": 0.20995384915390117, "grad_norm": 1.1602168083190918, "learning_rate": 1.7902438677228233e-05, "loss": 0.2885, "step": 9690 }, { "epoch": 0.210062184473382, "grad_norm": 1.381329894065857, "learning_rate": 1.7900352608276654e-05, "loss": 0.2786, "step": 9695 }, { "epoch": 0.21017051979286286, "grad_norm": 1.2763688564300537, "learning_rate": 1.7898265624187573e-05, "loss": 0.3195, "step": 9700 }, { "epoch": 0.21027885511234373, "grad_norm": 1.3012149333953857, "learning_rate": 1.789617772520273e-05, "loss": 0.3273, "step": 9705 }, { "epoch": 0.21038719043182458, "grad_norm": 2.643717050552368, "learning_rate": 1.789408891156399e-05, "loss": 0.3934, "step": 9710 }, { "epoch": 0.21049552575130545, "grad_norm": 1.572353720664978, "learning_rate": 1.7891999183513298e-05, "loss": 0.3233, "step": 9715 }, { "epoch": 0.2106038610707863, "grad_norm": 1.0000989437103271, "learning_rate": 1.7889908541292724e-05, "loss": 0.2887, "step": 9720 }, { "epoch": 0.21071219639026714, "grad_norm": 1.3561515808105469, "learning_rate": 1.7887816985144436e-05, "loss": 0.2389, "step": 9725 }, { "epoch": 0.21082053170974802, "grad_norm": 2.13163685798645, "learning_rate": 1.7885724515310708e-05, "loss": 0.3888, "step": 9730 }, { "epoch": 0.21092886702922886, "grad_norm": 1.618497610092163, "learning_rate": 1.7883631132033925e-05, "loss": 0.2979, "step": 9735 }, { "epoch": 0.21103720234870973, "grad_norm": 1.470320224761963, "learning_rate": 1.7881536835556572e-05, "loss": 0.3414, "step": 9740 }, { "epoch": 0.21114553766819058, "grad_norm": 1.3974504470825195, "learning_rate": 1.7879441626121245e-05, "loss": 0.2947, "step": 9745 }, { "epoch": 0.21125387298767145, "grad_norm": 1.6475507020950317, "learning_rate": 1.7877345503970633e-05, "loss": 0.4416, "step": 9750 }, { "epoch": 0.2113622083071523, "grad_norm": 1.628808617591858, "learning_rate": 1.7875248469347552e-05, "loss": 0.4288, "step": 9755 }, { "epoch": 0.21147054362663315, "grad_norm": 1.5710721015930176, "learning_rate": 1.7873150522494906e-05, "loss": 0.2526, "step": 9760 }, { "epoch": 0.21157887894611402, "grad_norm": 1.592992901802063, "learning_rate": 1.7871051663655713e-05, "loss": 0.3269, "step": 9765 }, { "epoch": 0.21168721426559486, "grad_norm": 1.644484519958496, "learning_rate": 1.786895189307309e-05, "loss": 0.328, "step": 9770 }, { "epoch": 0.21179554958507574, "grad_norm": 1.0207698345184326, "learning_rate": 1.786685121099027e-05, "loss": 0.2997, "step": 9775 }, { "epoch": 0.21190388490455658, "grad_norm": 1.8197851181030273, "learning_rate": 1.786474961765058e-05, "loss": 0.4484, "step": 9780 }, { "epoch": 0.21201222022403743, "grad_norm": 1.351001262664795, "learning_rate": 1.7862647113297463e-05, "loss": 0.2696, "step": 9785 }, { "epoch": 0.2121205555435183, "grad_norm": 1.3160536289215088, "learning_rate": 1.7860543698174456e-05, "loss": 0.3675, "step": 9790 }, { "epoch": 0.21222889086299915, "grad_norm": 1.4023332595825195, "learning_rate": 1.7858439372525217e-05, "loss": 0.34, "step": 9795 }, { "epoch": 0.21233722618248002, "grad_norm": 1.655108094215393, "learning_rate": 1.7856334136593495e-05, "loss": 0.3465, "step": 9800 }, { "epoch": 0.21244556150196087, "grad_norm": 1.3302114009857178, "learning_rate": 1.785422799062315e-05, "loss": 0.3626, "step": 9805 }, { "epoch": 0.21255389682144174, "grad_norm": 1.0766141414642334, "learning_rate": 1.7852120934858154e-05, "loss": 0.3249, "step": 9810 }, { "epoch": 0.21266223214092259, "grad_norm": 1.301738977432251, "learning_rate": 1.7850012969542565e-05, "loss": 0.2729, "step": 9815 }, { "epoch": 0.21277056746040343, "grad_norm": 1.5826016664505005, "learning_rate": 1.784790409492057e-05, "loss": 0.2378, "step": 9820 }, { "epoch": 0.2128789027798843, "grad_norm": 1.4581520557403564, "learning_rate": 1.7845794311236447e-05, "loss": 0.295, "step": 9825 }, { "epoch": 0.21298723809936515, "grad_norm": 2.2152929306030273, "learning_rate": 1.7843683618734583e-05, "loss": 0.4728, "step": 9830 }, { "epoch": 0.21309557341884602, "grad_norm": 1.6033363342285156, "learning_rate": 1.7841572017659474e-05, "loss": 0.3694, "step": 9835 }, { "epoch": 0.21320390873832687, "grad_norm": 1.5413382053375244, "learning_rate": 1.7839459508255705e-05, "loss": 0.4148, "step": 9840 }, { "epoch": 0.21331224405780772, "grad_norm": 1.120091438293457, "learning_rate": 1.783734609076799e-05, "loss": 0.3098, "step": 9845 }, { "epoch": 0.2134205793772886, "grad_norm": 1.4197351932525635, "learning_rate": 1.783523176544114e-05, "loss": 0.3359, "step": 9850 }, { "epoch": 0.21352891469676943, "grad_norm": 1.149409532546997, "learning_rate": 1.7833116532520057e-05, "loss": 0.3502, "step": 9855 }, { "epoch": 0.2136372500162503, "grad_norm": 2.0468688011169434, "learning_rate": 1.7831000392249763e-05, "loss": 0.4587, "step": 9860 }, { "epoch": 0.21374558533573115, "grad_norm": 1.2272993326187134, "learning_rate": 1.7828883344875385e-05, "loss": 0.3257, "step": 9865 }, { "epoch": 0.213853920655212, "grad_norm": 1.0631515979766846, "learning_rate": 1.7826765390642147e-05, "loss": 0.3011, "step": 9870 }, { "epoch": 0.21396225597469287, "grad_norm": 1.3655149936676025, "learning_rate": 1.7824646529795383e-05, "loss": 0.3424, "step": 9875 }, { "epoch": 0.21407059129417372, "grad_norm": 1.18468177318573, "learning_rate": 1.782252676258053e-05, "loss": 0.2349, "step": 9880 }, { "epoch": 0.2141789266136546, "grad_norm": 1.4978837966918945, "learning_rate": 1.7820406089243133e-05, "loss": 0.3233, "step": 9885 }, { "epoch": 0.21428726193313544, "grad_norm": 1.317826747894287, "learning_rate": 1.7818284510028842e-05, "loss": 0.3697, "step": 9890 }, { "epoch": 0.2143955972526163, "grad_norm": 1.4179141521453857, "learning_rate": 1.7816162025183408e-05, "loss": 0.334, "step": 9895 }, { "epoch": 0.21450393257209716, "grad_norm": 1.2692536115646362, "learning_rate": 1.781403863495269e-05, "loss": 0.2452, "step": 9900 }, { "epoch": 0.214612267891578, "grad_norm": 1.4042019844055176, "learning_rate": 1.781191433958265e-05, "loss": 0.2099, "step": 9905 }, { "epoch": 0.21472060321105887, "grad_norm": 1.2667558193206787, "learning_rate": 1.7809789139319356e-05, "loss": 0.3243, "step": 9910 }, { "epoch": 0.21482893853053972, "grad_norm": 1.2327762842178345, "learning_rate": 1.7807663034408983e-05, "loss": 0.3372, "step": 9915 }, { "epoch": 0.2149372738500206, "grad_norm": 1.6816400289535522, "learning_rate": 1.7805536025097802e-05, "loss": 0.5155, "step": 9920 }, { "epoch": 0.21504560916950144, "grad_norm": 1.8228676319122314, "learning_rate": 1.78034081116322e-05, "loss": 0.2638, "step": 9925 }, { "epoch": 0.21515394448898228, "grad_norm": 1.2524698972702026, "learning_rate": 1.7801279294258668e-05, "loss": 0.3992, "step": 9930 }, { "epoch": 0.21526227980846316, "grad_norm": 1.4714090824127197, "learning_rate": 1.779914957322379e-05, "loss": 0.4487, "step": 9935 }, { "epoch": 0.215370615127944, "grad_norm": 1.2564371824264526, "learning_rate": 1.7797018948774264e-05, "loss": 0.3507, "step": 9940 }, { "epoch": 0.21547895044742488, "grad_norm": 1.2179980278015137, "learning_rate": 1.7794887421156893e-05, "loss": 0.4347, "step": 9945 }, { "epoch": 0.21558728576690572, "grad_norm": 1.8086929321289062, "learning_rate": 1.7792754990618585e-05, "loss": 0.3446, "step": 9950 }, { "epoch": 0.2156956210863866, "grad_norm": 1.2618955373764038, "learning_rate": 1.779062165740634e-05, "loss": 0.302, "step": 9955 }, { "epoch": 0.21580395640586744, "grad_norm": 1.3581537008285522, "learning_rate": 1.7788487421767285e-05, "loss": 0.3428, "step": 9960 }, { "epoch": 0.2159122917253483, "grad_norm": 1.2075843811035156, "learning_rate": 1.778635228394863e-05, "loss": 0.3218, "step": 9965 }, { "epoch": 0.21602062704482916, "grad_norm": 1.1618940830230713, "learning_rate": 1.7784216244197707e-05, "loss": 0.2865, "step": 9970 }, { "epoch": 0.21612896236431, "grad_norm": 1.2439323663711548, "learning_rate": 1.7782079302761935e-05, "loss": 0.3113, "step": 9975 }, { "epoch": 0.21623729768379088, "grad_norm": 1.7784899473190308, "learning_rate": 1.7779941459888852e-05, "loss": 0.4847, "step": 9980 }, { "epoch": 0.21634563300327173, "grad_norm": 1.6789460182189941, "learning_rate": 1.7777802715826097e-05, "loss": 0.3619, "step": 9985 }, { "epoch": 0.21645396832275257, "grad_norm": 1.5005308389663696, "learning_rate": 1.777566307082141e-05, "loss": 0.3959, "step": 9990 }, { "epoch": 0.21656230364223344, "grad_norm": 1.5075708627700806, "learning_rate": 1.777352252512263e-05, "loss": 0.3524, "step": 9995 }, { "epoch": 0.2166706389617143, "grad_norm": 1.4210543632507324, "learning_rate": 1.7771381078977717e-05, "loss": 0.2992, "step": 10000 }, { "epoch": 0.21677897428119516, "grad_norm": 1.9359567165374756, "learning_rate": 1.776923873263472e-05, "loss": 0.3965, "step": 10005 }, { "epoch": 0.216887309600676, "grad_norm": 1.4725520610809326, "learning_rate": 1.77670954863418e-05, "loss": 0.2981, "step": 10010 }, { "epoch": 0.21699564492015688, "grad_norm": 1.4100087881088257, "learning_rate": 1.776495134034722e-05, "loss": 0.3788, "step": 10015 }, { "epoch": 0.21710398023963773, "grad_norm": 1.464329719543457, "learning_rate": 1.7762806294899344e-05, "loss": 0.2662, "step": 10020 }, { "epoch": 0.21721231555911857, "grad_norm": 1.533594012260437, "learning_rate": 1.7760660350246645e-05, "loss": 0.419, "step": 10025 }, { "epoch": 0.21732065087859945, "grad_norm": 1.3225185871124268, "learning_rate": 1.77585135066377e-05, "loss": 0.4658, "step": 10030 }, { "epoch": 0.2174289861980803, "grad_norm": 1.1891909837722778, "learning_rate": 1.775636576432119e-05, "loss": 0.3072, "step": 10035 }, { "epoch": 0.21753732151756117, "grad_norm": 1.7973814010620117, "learning_rate": 1.7754217123545895e-05, "loss": 0.3422, "step": 10040 }, { "epoch": 0.217645656837042, "grad_norm": 1.749811053276062, "learning_rate": 1.7752067584560705e-05, "loss": 0.4289, "step": 10045 }, { "epoch": 0.21775399215652286, "grad_norm": 1.7904770374298096, "learning_rate": 1.7749917147614608e-05, "loss": 0.3454, "step": 10050 }, { "epoch": 0.21786232747600373, "grad_norm": 1.2260874509811401, "learning_rate": 1.7747765812956704e-05, "loss": 0.3268, "step": 10055 }, { "epoch": 0.21797066279548458, "grad_norm": 1.6420493125915527, "learning_rate": 1.7745613580836195e-05, "loss": 0.3405, "step": 10060 }, { "epoch": 0.21807899811496545, "grad_norm": 1.627817153930664, "learning_rate": 1.774346045150238e-05, "loss": 0.3829, "step": 10065 }, { "epoch": 0.2181873334344463, "grad_norm": 1.4513124227523804, "learning_rate": 1.774130642520467e-05, "loss": 0.4271, "step": 10070 }, { "epoch": 0.21829566875392717, "grad_norm": 1.3141530752182007, "learning_rate": 1.7739151502192574e-05, "loss": 0.3327, "step": 10075 }, { "epoch": 0.21840400407340801, "grad_norm": 1.5414435863494873, "learning_rate": 1.773699568271571e-05, "loss": 0.4228, "step": 10080 }, { "epoch": 0.21851233939288886, "grad_norm": 1.5572410821914673, "learning_rate": 1.7734838967023796e-05, "loss": 0.3738, "step": 10085 }, { "epoch": 0.21862067471236973, "grad_norm": 1.983725666999817, "learning_rate": 1.7732681355366654e-05, "loss": 0.3687, "step": 10090 }, { "epoch": 0.21872901003185058, "grad_norm": 1.5291714668273926, "learning_rate": 1.7730522847994214e-05, "loss": 0.3095, "step": 10095 }, { "epoch": 0.21883734535133145, "grad_norm": 0.8050408959388733, "learning_rate": 1.7728363445156505e-05, "loss": 0.334, "step": 10100 }, { "epoch": 0.2189456806708123, "grad_norm": 1.286200761795044, "learning_rate": 1.7726203147103664e-05, "loss": 0.2989, "step": 10105 }, { "epoch": 0.21905401599029314, "grad_norm": 2.0804243087768555, "learning_rate": 1.7724041954085925e-05, "loss": 0.4125, "step": 10110 }, { "epoch": 0.21916235130977402, "grad_norm": 1.3365535736083984, "learning_rate": 1.772187986635363e-05, "loss": 0.3718, "step": 10115 }, { "epoch": 0.21927068662925486, "grad_norm": 1.9013943672180176, "learning_rate": 1.771971688415723e-05, "loss": 0.3961, "step": 10120 }, { "epoch": 0.21937902194873574, "grad_norm": 1.3529918193817139, "learning_rate": 1.7717553007747268e-05, "loss": 0.3051, "step": 10125 }, { "epoch": 0.21948735726821658, "grad_norm": 1.6568107604980469, "learning_rate": 1.7715388237374397e-05, "loss": 0.3492, "step": 10130 }, { "epoch": 0.21959569258769743, "grad_norm": 1.5694626569747925, "learning_rate": 1.771322257328938e-05, "loss": 0.4141, "step": 10135 }, { "epoch": 0.2197040279071783, "grad_norm": 1.2358943223953247, "learning_rate": 1.771105601574307e-05, "loss": 0.3627, "step": 10140 }, { "epoch": 0.21981236322665915, "grad_norm": 1.7119452953338623, "learning_rate": 1.7708888564986432e-05, "loss": 0.3228, "step": 10145 }, { "epoch": 0.21992069854614002, "grad_norm": 1.6093275547027588, "learning_rate": 1.770672022127053e-05, "loss": 0.3644, "step": 10150 }, { "epoch": 0.22002903386562087, "grad_norm": 1.572827935218811, "learning_rate": 1.770455098484654e-05, "loss": 0.4253, "step": 10155 }, { "epoch": 0.22013736918510174, "grad_norm": 1.6229299306869507, "learning_rate": 1.7702380855965733e-05, "loss": 0.3617, "step": 10160 }, { "epoch": 0.22024570450458258, "grad_norm": 0.9676244258880615, "learning_rate": 1.7700209834879486e-05, "loss": 0.29, "step": 10165 }, { "epoch": 0.22035403982406343, "grad_norm": 1.7120918035507202, "learning_rate": 1.7698037921839275e-05, "loss": 0.341, "step": 10170 }, { "epoch": 0.2204623751435443, "grad_norm": 1.3234654664993286, "learning_rate": 1.769586511709669e-05, "loss": 0.2886, "step": 10175 }, { "epoch": 0.22057071046302515, "grad_norm": 1.6903460025787354, "learning_rate": 1.7693691420903416e-05, "loss": 0.3076, "step": 10180 }, { "epoch": 0.22067904578250602, "grad_norm": 1.0809097290039062, "learning_rate": 1.769151683351124e-05, "loss": 0.3865, "step": 10185 }, { "epoch": 0.22078738110198687, "grad_norm": 1.5830439329147339, "learning_rate": 1.768934135517206e-05, "loss": 0.377, "step": 10190 }, { "epoch": 0.2208957164214677, "grad_norm": 1.4415953159332275, "learning_rate": 1.768716498613787e-05, "loss": 0.3112, "step": 10195 }, { "epoch": 0.2210040517409486, "grad_norm": 1.113556146621704, "learning_rate": 1.768498772666077e-05, "loss": 0.3365, "step": 10200 }, { "epoch": 0.22111238706042943, "grad_norm": 1.4840644598007202, "learning_rate": 1.7682809576992963e-05, "loss": 0.3102, "step": 10205 }, { "epoch": 0.2212207223799103, "grad_norm": 1.6705069541931152, "learning_rate": 1.7680630537386755e-05, "loss": 0.2396, "step": 10210 }, { "epoch": 0.22132905769939115, "grad_norm": 1.4638428688049316, "learning_rate": 1.767845060809455e-05, "loss": 0.3117, "step": 10215 }, { "epoch": 0.22143739301887203, "grad_norm": 1.4951224327087402, "learning_rate": 1.7676269789368873e-05, "loss": 0.418, "step": 10220 }, { "epoch": 0.22154572833835287, "grad_norm": 1.2554142475128174, "learning_rate": 1.7674088081462328e-05, "loss": 0.3044, "step": 10225 }, { "epoch": 0.22165406365783372, "grad_norm": 1.3036165237426758, "learning_rate": 1.7671905484627637e-05, "loss": 0.3253, "step": 10230 }, { "epoch": 0.2217623989773146, "grad_norm": 1.168125867843628, "learning_rate": 1.766972199911762e-05, "loss": 0.3228, "step": 10235 }, { "epoch": 0.22187073429679544, "grad_norm": 0.94898521900177, "learning_rate": 1.7667537625185204e-05, "loss": 0.3038, "step": 10240 }, { "epoch": 0.2219790696162763, "grad_norm": 2.2627065181732178, "learning_rate": 1.766535236308341e-05, "loss": 0.3202, "step": 10245 }, { "epoch": 0.22208740493575715, "grad_norm": 1.6111249923706055, "learning_rate": 1.7663166213065376e-05, "loss": 0.3936, "step": 10250 }, { "epoch": 0.222195740255238, "grad_norm": 1.2348822355270386, "learning_rate": 1.7660979175384326e-05, "loss": 0.4074, "step": 10255 }, { "epoch": 0.22230407557471887, "grad_norm": 1.7615418434143066, "learning_rate": 1.7658791250293604e-05, "loss": 0.3143, "step": 10260 }, { "epoch": 0.22241241089419972, "grad_norm": 2.2745773792266846, "learning_rate": 1.765660243804664e-05, "loss": 0.3923, "step": 10265 }, { "epoch": 0.2225207462136806, "grad_norm": 1.673394799232483, "learning_rate": 1.7654412738896985e-05, "loss": 0.4058, "step": 10270 }, { "epoch": 0.22262908153316144, "grad_norm": 1.911873459815979, "learning_rate": 1.7652222153098275e-05, "loss": 0.4003, "step": 10275 }, { "epoch": 0.2227374168526423, "grad_norm": 1.539247751235962, "learning_rate": 1.7650030680904254e-05, "loss": 0.2215, "step": 10280 }, { "epoch": 0.22284575217212316, "grad_norm": 1.0157519578933716, "learning_rate": 1.7647838322568786e-05, "loss": 0.2867, "step": 10285 }, { "epoch": 0.222954087491604, "grad_norm": 1.438899278640747, "learning_rate": 1.7645645078345807e-05, "loss": 0.403, "step": 10290 }, { "epoch": 0.22306242281108488, "grad_norm": 1.9628549814224243, "learning_rate": 1.7643450948489376e-05, "loss": 0.4333, "step": 10295 }, { "epoch": 0.22317075813056572, "grad_norm": 1.040802240371704, "learning_rate": 1.7641255933253654e-05, "loss": 0.2667, "step": 10300 }, { "epoch": 0.2232790934500466, "grad_norm": 1.419093132019043, "learning_rate": 1.7639060032892897e-05, "loss": 0.2657, "step": 10305 }, { "epoch": 0.22338742876952744, "grad_norm": 2.2976362705230713, "learning_rate": 1.763686324766147e-05, "loss": 0.4251, "step": 10310 }, { "epoch": 0.2234957640890083, "grad_norm": 1.5780490636825562, "learning_rate": 1.763466557781383e-05, "loss": 0.2988, "step": 10315 }, { "epoch": 0.22360409940848916, "grad_norm": 1.64057195186615, "learning_rate": 1.763246702360456e-05, "loss": 0.4435, "step": 10320 }, { "epoch": 0.22371243472797, "grad_norm": 1.442791223526001, "learning_rate": 1.763026758528831e-05, "loss": 0.4036, "step": 10325 }, { "epoch": 0.22382077004745088, "grad_norm": 1.2050853967666626, "learning_rate": 1.7628067263119866e-05, "loss": 0.2962, "step": 10330 }, { "epoch": 0.22392910536693172, "grad_norm": 2.0866832733154297, "learning_rate": 1.7625866057354102e-05, "loss": 0.3569, "step": 10335 }, { "epoch": 0.2240374406864126, "grad_norm": 1.210807204246521, "learning_rate": 1.7623663968245982e-05, "loss": 0.3195, "step": 10340 }, { "epoch": 0.22414577600589344, "grad_norm": 1.733176589012146, "learning_rate": 1.76214609960506e-05, "loss": 0.399, "step": 10345 }, { "epoch": 0.2242541113253743, "grad_norm": 1.6988035440444946, "learning_rate": 1.761925714102313e-05, "loss": 0.4146, "step": 10350 }, { "epoch": 0.22436244664485516, "grad_norm": 1.4822556972503662, "learning_rate": 1.761705240341886e-05, "loss": 0.4152, "step": 10355 }, { "epoch": 0.224470781964336, "grad_norm": 1.0345982313156128, "learning_rate": 1.7614846783493166e-05, "loss": 0.3426, "step": 10360 }, { "epoch": 0.22457911728381688, "grad_norm": 1.2202496528625488, "learning_rate": 1.7612640281501545e-05, "loss": 0.3896, "step": 10365 }, { "epoch": 0.22468745260329773, "grad_norm": 1.2948485612869263, "learning_rate": 1.7610432897699586e-05, "loss": 0.3214, "step": 10370 }, { "epoch": 0.22479578792277857, "grad_norm": 1.9843567609786987, "learning_rate": 1.7608224632342978e-05, "loss": 0.3721, "step": 10375 }, { "epoch": 0.22490412324225945, "grad_norm": 1.022020697593689, "learning_rate": 1.7606015485687518e-05, "loss": 0.3235, "step": 10380 }, { "epoch": 0.2250124585617403, "grad_norm": 1.3501267433166504, "learning_rate": 1.76038054579891e-05, "loss": 0.3284, "step": 10385 }, { "epoch": 0.22512079388122117, "grad_norm": 1.2349982261657715, "learning_rate": 1.7601594549503732e-05, "loss": 0.325, "step": 10390 }, { "epoch": 0.225229129200702, "grad_norm": 1.760105848312378, "learning_rate": 1.7599382760487502e-05, "loss": 0.3218, "step": 10395 }, { "epoch": 0.22533746452018286, "grad_norm": 1.502200961112976, "learning_rate": 1.7597170091196618e-05, "loss": 0.2748, "step": 10400 }, { "epoch": 0.22544579983966373, "grad_norm": 1.721537709236145, "learning_rate": 1.7594956541887386e-05, "loss": 0.4388, "step": 10405 }, { "epoch": 0.22555413515914458, "grad_norm": 1.5739154815673828, "learning_rate": 1.7592742112816213e-05, "loss": 0.331, "step": 10410 }, { "epoch": 0.22566247047862545, "grad_norm": 1.5114517211914062, "learning_rate": 1.75905268042396e-05, "loss": 0.2958, "step": 10415 }, { "epoch": 0.2257708057981063, "grad_norm": 1.5241791009902954, "learning_rate": 1.7588310616414166e-05, "loss": 0.274, "step": 10420 }, { "epoch": 0.22587914111758717, "grad_norm": 1.4519070386886597, "learning_rate": 1.758609354959662e-05, "loss": 0.3281, "step": 10425 }, { "epoch": 0.225987476437068, "grad_norm": 1.380971908569336, "learning_rate": 1.7583875604043777e-05, "loss": 0.3271, "step": 10430 }, { "epoch": 0.22609581175654886, "grad_norm": 1.830227017402649, "learning_rate": 1.7581656780012547e-05, "loss": 0.4306, "step": 10435 }, { "epoch": 0.22620414707602973, "grad_norm": 1.6857656240463257, "learning_rate": 1.7579437077759957e-05, "loss": 0.3145, "step": 10440 }, { "epoch": 0.22631248239551058, "grad_norm": 1.3906691074371338, "learning_rate": 1.757721649754312e-05, "loss": 0.2441, "step": 10445 }, { "epoch": 0.22642081771499145, "grad_norm": 0.7788088917732239, "learning_rate": 1.7574995039619258e-05, "loss": 0.2873, "step": 10450 }, { "epoch": 0.2265291530344723, "grad_norm": 1.4359341859817505, "learning_rate": 1.7572772704245695e-05, "loss": 0.4233, "step": 10455 }, { "epoch": 0.22663748835395314, "grad_norm": 1.3436955213546753, "learning_rate": 1.7570549491679852e-05, "loss": 0.2345, "step": 10460 }, { "epoch": 0.22674582367343402, "grad_norm": 1.7709088325500488, "learning_rate": 1.7568325402179264e-05, "loss": 0.4578, "step": 10465 }, { "epoch": 0.22685415899291486, "grad_norm": 1.7356135845184326, "learning_rate": 1.7566100436001544e-05, "loss": 0.3573, "step": 10470 }, { "epoch": 0.22696249431239574, "grad_norm": 1.2329379320144653, "learning_rate": 1.7563874593404434e-05, "loss": 0.3531, "step": 10475 }, { "epoch": 0.22707082963187658, "grad_norm": 1.748194694519043, "learning_rate": 1.7561647874645754e-05, "loss": 0.376, "step": 10480 }, { "epoch": 0.22717916495135745, "grad_norm": 1.0227757692337036, "learning_rate": 1.7559420279983442e-05, "loss": 0.2706, "step": 10485 }, { "epoch": 0.2272875002708383, "grad_norm": 1.2897522449493408, "learning_rate": 1.7557191809675536e-05, "loss": 0.4076, "step": 10490 }, { "epoch": 0.22739583559031915, "grad_norm": 1.559122920036316, "learning_rate": 1.7554962463980162e-05, "loss": 0.3562, "step": 10495 }, { "epoch": 0.22750417090980002, "grad_norm": 1.137703776359558, "learning_rate": 1.755273224315556e-05, "loss": 0.3562, "step": 10500 }, { "epoch": 0.22761250622928086, "grad_norm": 1.5278732776641846, "learning_rate": 1.7550501147460068e-05, "loss": 0.2881, "step": 10505 }, { "epoch": 0.22772084154876174, "grad_norm": 0.6661429405212402, "learning_rate": 1.7548269177152124e-05, "loss": 0.2738, "step": 10510 }, { "epoch": 0.22782917686824258, "grad_norm": 1.2340260744094849, "learning_rate": 1.7546036332490267e-05, "loss": 0.3413, "step": 10515 }, { "epoch": 0.22793751218772343, "grad_norm": 1.2275168895721436, "learning_rate": 1.7543802613733143e-05, "loss": 0.369, "step": 10520 }, { "epoch": 0.2280458475072043, "grad_norm": 1.6187467575073242, "learning_rate": 1.7541568021139493e-05, "loss": 0.321, "step": 10525 }, { "epoch": 0.22815418282668515, "grad_norm": 1.5437965393066406, "learning_rate": 1.7539332554968158e-05, "loss": 0.3407, "step": 10530 }, { "epoch": 0.22826251814616602, "grad_norm": 1.4069980382919312, "learning_rate": 1.7537096215478088e-05, "loss": 0.3491, "step": 10535 }, { "epoch": 0.22837085346564687, "grad_norm": 1.3349769115447998, "learning_rate": 1.7534859002928323e-05, "loss": 0.3268, "step": 10540 }, { "epoch": 0.22847918878512774, "grad_norm": 1.602196455001831, "learning_rate": 1.7532620917578022e-05, "loss": 0.4272, "step": 10545 }, { "epoch": 0.22858752410460859, "grad_norm": 1.0642008781433105, "learning_rate": 1.753038195968642e-05, "loss": 0.3687, "step": 10550 }, { "epoch": 0.22869585942408943, "grad_norm": 1.397403359413147, "learning_rate": 1.7528142129512877e-05, "loss": 0.3142, "step": 10555 }, { "epoch": 0.2288041947435703, "grad_norm": 1.7768025398254395, "learning_rate": 1.7525901427316836e-05, "loss": 0.3329, "step": 10560 }, { "epoch": 0.22891253006305115, "grad_norm": 1.5803539752960205, "learning_rate": 1.7523659853357855e-05, "loss": 0.4442, "step": 10565 }, { "epoch": 0.22902086538253202, "grad_norm": 1.428842306137085, "learning_rate": 1.7521417407895585e-05, "loss": 0.3022, "step": 10570 }, { "epoch": 0.22912920070201287, "grad_norm": 1.1659988164901733, "learning_rate": 1.7519174091189777e-05, "loss": 0.3551, "step": 10575 }, { "epoch": 0.22923753602149372, "grad_norm": 1.6541821956634521, "learning_rate": 1.751692990350029e-05, "loss": 0.3197, "step": 10580 }, { "epoch": 0.2293458713409746, "grad_norm": 1.277632236480713, "learning_rate": 1.7514684845087075e-05, "loss": 0.3833, "step": 10585 }, { "epoch": 0.22945420666045543, "grad_norm": 1.235364556312561, "learning_rate": 1.7512438916210195e-05, "loss": 0.3031, "step": 10590 }, { "epoch": 0.2295625419799363, "grad_norm": 1.2284564971923828, "learning_rate": 1.7510192117129798e-05, "loss": 0.3074, "step": 10595 }, { "epoch": 0.22967087729941715, "grad_norm": 3.1181302070617676, "learning_rate": 1.7507944448106148e-05, "loss": 0.4376, "step": 10600 }, { "epoch": 0.22977921261889803, "grad_norm": 1.3150908946990967, "learning_rate": 1.7505695909399604e-05, "loss": 0.3984, "step": 10605 }, { "epoch": 0.22988754793837887, "grad_norm": 2.0225272178649902, "learning_rate": 1.7503446501270624e-05, "loss": 0.249, "step": 10610 }, { "epoch": 0.22999588325785972, "grad_norm": 1.4765443801879883, "learning_rate": 1.7501196223979767e-05, "loss": 0.2662, "step": 10615 }, { "epoch": 0.2301042185773406, "grad_norm": 0.7927231788635254, "learning_rate": 1.7498945077787697e-05, "loss": 0.2806, "step": 10620 }, { "epoch": 0.23021255389682144, "grad_norm": 1.5055681467056274, "learning_rate": 1.7496693062955174e-05, "loss": 0.3486, "step": 10625 }, { "epoch": 0.2303208892163023, "grad_norm": 1.178777813911438, "learning_rate": 1.749444017974306e-05, "loss": 0.3092, "step": 10630 }, { "epoch": 0.23042922453578316, "grad_norm": 1.577877402305603, "learning_rate": 1.7492186428412317e-05, "loss": 0.3406, "step": 10635 }, { "epoch": 0.230537559855264, "grad_norm": 1.2528250217437744, "learning_rate": 1.748993180922401e-05, "loss": 0.286, "step": 10640 }, { "epoch": 0.23064589517474487, "grad_norm": 1.3575682640075684, "learning_rate": 1.7487676322439303e-05, "loss": 0.3753, "step": 10645 }, { "epoch": 0.23075423049422572, "grad_norm": 2.003579616546631, "learning_rate": 1.748541996831946e-05, "loss": 0.3985, "step": 10650 }, { "epoch": 0.2308625658137066, "grad_norm": 1.2306933403015137, "learning_rate": 1.748316274712584e-05, "loss": 0.3704, "step": 10655 }, { "epoch": 0.23097090113318744, "grad_norm": 2.128343105316162, "learning_rate": 1.7480904659119916e-05, "loss": 0.3771, "step": 10660 }, { "epoch": 0.23107923645266829, "grad_norm": 1.9441570043563843, "learning_rate": 1.7478645704563254e-05, "loss": 0.3067, "step": 10665 }, { "epoch": 0.23118757177214916, "grad_norm": 1.422179937362671, "learning_rate": 1.7476385883717514e-05, "loss": 0.4923, "step": 10670 }, { "epoch": 0.23129590709163, "grad_norm": 1.1675961017608643, "learning_rate": 1.7474125196844468e-05, "loss": 0.3319, "step": 10675 }, { "epoch": 0.23140424241111088, "grad_norm": 1.4099128246307373, "learning_rate": 1.7471863644205978e-05, "loss": 0.3803, "step": 10680 }, { "epoch": 0.23151257773059172, "grad_norm": 1.2016172409057617, "learning_rate": 1.7469601226064016e-05, "loss": 0.3943, "step": 10685 }, { "epoch": 0.2316209130500726, "grad_norm": 1.0243077278137207, "learning_rate": 1.7467337942680645e-05, "loss": 0.2822, "step": 10690 }, { "epoch": 0.23172924836955344, "grad_norm": 1.6162571907043457, "learning_rate": 1.7465073794318034e-05, "loss": 0.3357, "step": 10695 }, { "epoch": 0.2318375836890343, "grad_norm": 1.581179141998291, "learning_rate": 1.746280878123845e-05, "loss": 0.4258, "step": 10700 }, { "epoch": 0.23194591900851516, "grad_norm": 1.6860178709030151, "learning_rate": 1.7460542903704264e-05, "loss": 0.1893, "step": 10705 }, { "epoch": 0.232054254327996, "grad_norm": 1.4266127347946167, "learning_rate": 1.745827616197794e-05, "loss": 0.4921, "step": 10710 }, { "epoch": 0.23216258964747688, "grad_norm": 2.001805305480957, "learning_rate": 1.7456008556322047e-05, "loss": 0.3045, "step": 10715 }, { "epoch": 0.23227092496695773, "grad_norm": 1.4363256692886353, "learning_rate": 1.7453740086999253e-05, "loss": 0.3854, "step": 10720 }, { "epoch": 0.23237926028643857, "grad_norm": 1.6565606594085693, "learning_rate": 1.7451470754272328e-05, "loss": 0.4526, "step": 10725 }, { "epoch": 0.23248759560591944, "grad_norm": 1.3363947868347168, "learning_rate": 1.7449200558404142e-05, "loss": 0.3141, "step": 10730 }, { "epoch": 0.2325959309254003, "grad_norm": 1.412692904472351, "learning_rate": 1.7446929499657658e-05, "loss": 0.3167, "step": 10735 }, { "epoch": 0.23270426624488116, "grad_norm": 1.1338438987731934, "learning_rate": 1.7444657578295944e-05, "loss": 0.3333, "step": 10740 }, { "epoch": 0.232812601564362, "grad_norm": 1.7174588441848755, "learning_rate": 1.7442384794582174e-05, "loss": 0.2816, "step": 10745 }, { "epoch": 0.23292093688384288, "grad_norm": 1.1914223432540894, "learning_rate": 1.7440111148779612e-05, "loss": 0.3494, "step": 10750 }, { "epoch": 0.23302927220332373, "grad_norm": 1.1437087059020996, "learning_rate": 1.7437836641151624e-05, "loss": 0.3419, "step": 10755 }, { "epoch": 0.23313760752280457, "grad_norm": 0.6874377727508545, "learning_rate": 1.7435561271961682e-05, "loss": 0.2809, "step": 10760 }, { "epoch": 0.23324594284228545, "grad_norm": 1.3754355907440186, "learning_rate": 1.7433285041473352e-05, "loss": 0.3524, "step": 10765 }, { "epoch": 0.2333542781617663, "grad_norm": 0.9624472260475159, "learning_rate": 1.74310079499503e-05, "loss": 0.3136, "step": 10770 }, { "epoch": 0.23346261348124717, "grad_norm": 2.185076951980591, "learning_rate": 1.742872999765629e-05, "loss": 0.3699, "step": 10775 }, { "epoch": 0.233570948800728, "grad_norm": 1.3612366914749146, "learning_rate": 1.7426451184855196e-05, "loss": 0.3369, "step": 10780 }, { "epoch": 0.23367928412020886, "grad_norm": 2.324382781982422, "learning_rate": 1.742417151181098e-05, "loss": 0.266, "step": 10785 }, { "epoch": 0.23378761943968973, "grad_norm": 1.496703028678894, "learning_rate": 1.742189097878771e-05, "loss": 0.4476, "step": 10790 }, { "epoch": 0.23389595475917058, "grad_norm": 1.5937895774841309, "learning_rate": 1.7419609586049543e-05, "loss": 0.4175, "step": 10795 }, { "epoch": 0.23400429007865145, "grad_norm": 2.050917148590088, "learning_rate": 1.7417327333860757e-05, "loss": 0.3853, "step": 10800 }, { "epoch": 0.2341126253981323, "grad_norm": 1.8484375476837158, "learning_rate": 1.741504422248571e-05, "loss": 0.321, "step": 10805 }, { "epoch": 0.23422096071761317, "grad_norm": 2.115156888961792, "learning_rate": 1.741276025218887e-05, "loss": 0.4003, "step": 10810 }, { "epoch": 0.23432929603709401, "grad_norm": 1.1378636360168457, "learning_rate": 1.7410475423234795e-05, "loss": 0.2988, "step": 10815 }, { "epoch": 0.23443763135657486, "grad_norm": 1.6507700681686401, "learning_rate": 1.740818973588815e-05, "loss": 0.3439, "step": 10820 }, { "epoch": 0.23454596667605573, "grad_norm": 1.3869158029556274, "learning_rate": 1.74059031904137e-05, "loss": 0.3241, "step": 10825 }, { "epoch": 0.23465430199553658, "grad_norm": 1.901099443435669, "learning_rate": 1.7403615787076308e-05, "loss": 0.4219, "step": 10830 }, { "epoch": 0.23476263731501745, "grad_norm": 1.1544069051742554, "learning_rate": 1.740132752614093e-05, "loss": 0.4019, "step": 10835 }, { "epoch": 0.2348709726344983, "grad_norm": 1.7571159601211548, "learning_rate": 1.739903840787263e-05, "loss": 0.2805, "step": 10840 }, { "epoch": 0.23497930795397914, "grad_norm": 1.8347347974777222, "learning_rate": 1.739674843253657e-05, "loss": 0.3042, "step": 10845 }, { "epoch": 0.23508764327346002, "grad_norm": 1.547672986984253, "learning_rate": 1.7394457600398003e-05, "loss": 0.38, "step": 10850 }, { "epoch": 0.23519597859294086, "grad_norm": 1.1436816453933716, "learning_rate": 1.7392165911722297e-05, "loss": 0.3114, "step": 10855 }, { "epoch": 0.23530431391242174, "grad_norm": 1.4196189641952515, "learning_rate": 1.7389873366774902e-05, "loss": 0.4831, "step": 10860 }, { "epoch": 0.23541264923190258, "grad_norm": 1.4379255771636963, "learning_rate": 1.7387579965821378e-05, "loss": 0.3586, "step": 10865 }, { "epoch": 0.23552098455138346, "grad_norm": 1.1036458015441895, "learning_rate": 1.7385285709127388e-05, "loss": 0.2816, "step": 10870 }, { "epoch": 0.2356293198708643, "grad_norm": 1.2083353996276855, "learning_rate": 1.7382990596958672e-05, "loss": 0.2709, "step": 10875 }, { "epoch": 0.23573765519034515, "grad_norm": 1.3241808414459229, "learning_rate": 1.7380694629581095e-05, "loss": 0.2731, "step": 10880 }, { "epoch": 0.23584599050982602, "grad_norm": 1.3120073080062866, "learning_rate": 1.7378397807260605e-05, "loss": 0.3026, "step": 10885 }, { "epoch": 0.23595432582930687, "grad_norm": 1.6442464590072632, "learning_rate": 1.737610013026326e-05, "loss": 0.2837, "step": 10890 }, { "epoch": 0.23606266114878774, "grad_norm": 1.5071454048156738, "learning_rate": 1.737380159885521e-05, "loss": 0.2552, "step": 10895 }, { "epoch": 0.23617099646826858, "grad_norm": 1.1934068202972412, "learning_rate": 1.7371502213302703e-05, "loss": 0.2438, "step": 10900 }, { "epoch": 0.23627933178774943, "grad_norm": 1.7841298580169678, "learning_rate": 1.7369201973872088e-05, "loss": 0.3391, "step": 10905 }, { "epoch": 0.2363876671072303, "grad_norm": 1.1323351860046387, "learning_rate": 1.7366900880829817e-05, "loss": 0.3, "step": 10910 }, { "epoch": 0.23649600242671115, "grad_norm": 1.4079277515411377, "learning_rate": 1.7364598934442435e-05, "loss": 0.2495, "step": 10915 }, { "epoch": 0.23660433774619202, "grad_norm": 1.3634816408157349, "learning_rate": 1.7362296134976588e-05, "loss": 0.3719, "step": 10920 }, { "epoch": 0.23671267306567287, "grad_norm": 2.005703926086426, "learning_rate": 1.7359992482699023e-05, "loss": 0.433, "step": 10925 }, { "epoch": 0.23682100838515371, "grad_norm": 1.334226369857788, "learning_rate": 1.735768797787658e-05, "loss": 0.3249, "step": 10930 }, { "epoch": 0.2369293437046346, "grad_norm": 1.2132344245910645, "learning_rate": 1.73553826207762e-05, "loss": 0.417, "step": 10935 }, { "epoch": 0.23703767902411543, "grad_norm": 1.1894514560699463, "learning_rate": 1.735307641166493e-05, "loss": 0.3002, "step": 10940 }, { "epoch": 0.2371460143435963, "grad_norm": 1.1249879598617554, "learning_rate": 1.7350769350809908e-05, "loss": 0.3009, "step": 10945 }, { "epoch": 0.23725434966307715, "grad_norm": 1.0886279344558716, "learning_rate": 1.734846143847837e-05, "loss": 0.1952, "step": 10950 }, { "epoch": 0.23736268498255803, "grad_norm": 1.0908117294311523, "learning_rate": 1.7346152674937654e-05, "loss": 0.2871, "step": 10955 }, { "epoch": 0.23747102030203887, "grad_norm": 0.8751459717750549, "learning_rate": 1.7343843060455194e-05, "loss": 0.3476, "step": 10960 }, { "epoch": 0.23757935562151972, "grad_norm": 1.286093831062317, "learning_rate": 1.734153259529853e-05, "loss": 0.3504, "step": 10965 }, { "epoch": 0.2376876909410006, "grad_norm": 1.679835557937622, "learning_rate": 1.7339221279735286e-05, "loss": 0.2545, "step": 10970 }, { "epoch": 0.23779602626048144, "grad_norm": 1.8400338888168335, "learning_rate": 1.73369091140332e-05, "loss": 0.3159, "step": 10975 }, { "epoch": 0.2379043615799623, "grad_norm": 1.334099531173706, "learning_rate": 1.7334596098460103e-05, "loss": 0.3261, "step": 10980 }, { "epoch": 0.23801269689944315, "grad_norm": 0.9663282036781311, "learning_rate": 1.733228223328392e-05, "loss": 0.3059, "step": 10985 }, { "epoch": 0.238121032218924, "grad_norm": 1.4531358480453491, "learning_rate": 1.7329967518772676e-05, "loss": 0.3671, "step": 10990 }, { "epoch": 0.23822936753840487, "grad_norm": 1.8457382917404175, "learning_rate": 1.7327651955194497e-05, "loss": 0.4568, "step": 10995 }, { "epoch": 0.23833770285788572, "grad_norm": 1.255879282951355, "learning_rate": 1.732533554281761e-05, "loss": 0.3888, "step": 11000 }, { "epoch": 0.2384460381773666, "grad_norm": 1.4991297721862793, "learning_rate": 1.732301828191033e-05, "loss": 0.3583, "step": 11005 }, { "epoch": 0.23855437349684744, "grad_norm": 1.3154106140136719, "learning_rate": 1.7320700172741082e-05, "loss": 0.3378, "step": 11010 }, { "epoch": 0.2386627088163283, "grad_norm": 1.1702908277511597, "learning_rate": 1.7318381215578387e-05, "loss": 0.2831, "step": 11015 }, { "epoch": 0.23877104413580916, "grad_norm": 1.5102094411849976, "learning_rate": 1.7316061410690855e-05, "loss": 0.35, "step": 11020 }, { "epoch": 0.23887937945529, "grad_norm": 1.463987112045288, "learning_rate": 1.73137407583472e-05, "loss": 0.3296, "step": 11025 }, { "epoch": 0.23898771477477088, "grad_norm": 1.8111448287963867, "learning_rate": 1.7311419258816243e-05, "loss": 0.4275, "step": 11030 }, { "epoch": 0.23909605009425172, "grad_norm": 1.0712158679962158, "learning_rate": 1.7309096912366888e-05, "loss": 0.3955, "step": 11035 }, { "epoch": 0.2392043854137326, "grad_norm": 0.9640330076217651, "learning_rate": 1.7306773719268145e-05, "loss": 0.3747, "step": 11040 }, { "epoch": 0.23931272073321344, "grad_norm": 1.6141356229782104, "learning_rate": 1.7304449679789125e-05, "loss": 0.3434, "step": 11045 }, { "epoch": 0.2394210560526943, "grad_norm": 1.5434777736663818, "learning_rate": 1.7302124794199027e-05, "loss": 0.3557, "step": 11050 }, { "epoch": 0.23952939137217516, "grad_norm": 1.1488319635391235, "learning_rate": 1.7299799062767158e-05, "loss": 0.3098, "step": 11055 }, { "epoch": 0.239637726691656, "grad_norm": 1.759324073791504, "learning_rate": 1.7297472485762918e-05, "loss": 0.3389, "step": 11060 }, { "epoch": 0.23974606201113688, "grad_norm": 1.4221699237823486, "learning_rate": 1.7295145063455808e-05, "loss": 0.3454, "step": 11065 }, { "epoch": 0.23985439733061772, "grad_norm": 1.3711777925491333, "learning_rate": 1.7292816796115427e-05, "loss": 0.2669, "step": 11070 }, { "epoch": 0.2399627326500986, "grad_norm": 1.130617380142212, "learning_rate": 1.7290487684011463e-05, "loss": 0.2476, "step": 11075 }, { "epoch": 0.24007106796957944, "grad_norm": 2.4889209270477295, "learning_rate": 1.728815772741371e-05, "loss": 0.3707, "step": 11080 }, { "epoch": 0.2401794032890603, "grad_norm": 1.7619130611419678, "learning_rate": 1.7285826926592063e-05, "loss": 0.411, "step": 11085 }, { "epoch": 0.24028773860854116, "grad_norm": 1.5900499820709229, "learning_rate": 1.7283495281816506e-05, "loss": 0.3088, "step": 11090 }, { "epoch": 0.240396073928022, "grad_norm": 1.2921875715255737, "learning_rate": 1.7281162793357133e-05, "loss": 0.3163, "step": 11095 }, { "epoch": 0.24050440924750288, "grad_norm": 1.9942065477371216, "learning_rate": 1.727882946148412e-05, "loss": 0.3163, "step": 11100 }, { "epoch": 0.24061274456698373, "grad_norm": 1.2606749534606934, "learning_rate": 1.7276495286467748e-05, "loss": 0.3155, "step": 11105 }, { "epoch": 0.24072107988646457, "grad_norm": 2.015256881713867, "learning_rate": 1.7274160268578398e-05, "loss": 0.4174, "step": 11110 }, { "epoch": 0.24082941520594545, "grad_norm": 0.9479063749313354, "learning_rate": 1.7271824408086554e-05, "loss": 0.3928, "step": 11115 }, { "epoch": 0.2409377505254263, "grad_norm": 1.2963275909423828, "learning_rate": 1.726948770526278e-05, "loss": 0.3074, "step": 11120 }, { "epoch": 0.24104608584490717, "grad_norm": 1.703722357749939, "learning_rate": 1.7267150160377753e-05, "loss": 0.3687, "step": 11125 }, { "epoch": 0.241154421164388, "grad_norm": 1.6759846210479736, "learning_rate": 1.726481177370224e-05, "loss": 0.397, "step": 11130 }, { "epoch": 0.24126275648386888, "grad_norm": 2.1753926277160645, "learning_rate": 1.726247254550711e-05, "loss": 0.3158, "step": 11135 }, { "epoch": 0.24137109180334973, "grad_norm": 1.3476191759109497, "learning_rate": 1.726013247606333e-05, "loss": 0.3436, "step": 11140 }, { "epoch": 0.24147942712283058, "grad_norm": 1.3804199695587158, "learning_rate": 1.7257791565641963e-05, "loss": 0.3176, "step": 11145 }, { "epoch": 0.24158776244231145, "grad_norm": 1.3742245435714722, "learning_rate": 1.7255449814514155e-05, "loss": 0.3033, "step": 11150 }, { "epoch": 0.2416960977617923, "grad_norm": 1.5807909965515137, "learning_rate": 1.725310722295118e-05, "loss": 0.3588, "step": 11155 }, { "epoch": 0.24180443308127317, "grad_norm": 1.4998375177383423, "learning_rate": 1.7250763791224382e-05, "loss": 0.3444, "step": 11160 }, { "epoch": 0.241912768400754, "grad_norm": 1.7035809755325317, "learning_rate": 1.7248419519605217e-05, "loss": 0.3661, "step": 11165 }, { "epoch": 0.24202110372023486, "grad_norm": 1.878976821899414, "learning_rate": 1.7246074408365232e-05, "loss": 0.3825, "step": 11170 }, { "epoch": 0.24212943903971573, "grad_norm": 1.0106123685836792, "learning_rate": 1.724372845777607e-05, "loss": 0.2573, "step": 11175 }, { "epoch": 0.24223777435919658, "grad_norm": 1.8955333232879639, "learning_rate": 1.724138166810948e-05, "loss": 0.2807, "step": 11180 }, { "epoch": 0.24234610967867745, "grad_norm": 1.7611132860183716, "learning_rate": 1.7239034039637298e-05, "loss": 0.3194, "step": 11185 }, { "epoch": 0.2424544449981583, "grad_norm": 1.6137980222702026, "learning_rate": 1.7236685572631463e-05, "loss": 0.39, "step": 11190 }, { "epoch": 0.24256278031763917, "grad_norm": 1.5455527305603027, "learning_rate": 1.7234336267364012e-05, "loss": 0.2681, "step": 11195 }, { "epoch": 0.24267111563712002, "grad_norm": 1.9669342041015625, "learning_rate": 1.7231986124107076e-05, "loss": 0.3129, "step": 11200 }, { "epoch": 0.24277945095660086, "grad_norm": 1.7227421998977661, "learning_rate": 1.722963514313288e-05, "loss": 0.3792, "step": 11205 }, { "epoch": 0.24288778627608174, "grad_norm": 1.3470916748046875, "learning_rate": 1.722728332471375e-05, "loss": 0.3614, "step": 11210 }, { "epoch": 0.24299612159556258, "grad_norm": 1.1005854606628418, "learning_rate": 1.7224930669122117e-05, "loss": 0.2941, "step": 11215 }, { "epoch": 0.24310445691504345, "grad_norm": 1.1609028577804565, "learning_rate": 1.7222577176630493e-05, "loss": 0.4439, "step": 11220 }, { "epoch": 0.2432127922345243, "grad_norm": 1.0281825065612793, "learning_rate": 1.7220222847511496e-05, "loss": 0.3884, "step": 11225 }, { "epoch": 0.24332112755400515, "grad_norm": 2.492798328399658, "learning_rate": 1.7217867682037844e-05, "loss": 0.3537, "step": 11230 }, { "epoch": 0.24342946287348602, "grad_norm": 1.5226935148239136, "learning_rate": 1.721551168048234e-05, "loss": 0.2497, "step": 11235 }, { "epoch": 0.24353779819296686, "grad_norm": 1.4581117630004883, "learning_rate": 1.72131548431179e-05, "loss": 0.3485, "step": 11240 }, { "epoch": 0.24364613351244774, "grad_norm": 1.410740852355957, "learning_rate": 1.7210797170217525e-05, "loss": 0.308, "step": 11245 }, { "epoch": 0.24375446883192858, "grad_norm": 1.4190032482147217, "learning_rate": 1.7208438662054314e-05, "loss": 0.2852, "step": 11250 }, { "epoch": 0.24386280415140943, "grad_norm": 1.545642375946045, "learning_rate": 1.7206079318901468e-05, "loss": 0.3373, "step": 11255 }, { "epoch": 0.2439711394708903, "grad_norm": 1.3250445127487183, "learning_rate": 1.7203719141032275e-05, "loss": 0.3835, "step": 11260 }, { "epoch": 0.24407947479037115, "grad_norm": 1.5394470691680908, "learning_rate": 1.7201358128720137e-05, "loss": 0.3463, "step": 11265 }, { "epoch": 0.24418781010985202, "grad_norm": 1.2782543897628784, "learning_rate": 1.7198996282238535e-05, "loss": 0.2202, "step": 11270 }, { "epoch": 0.24429614542933287, "grad_norm": 1.1606040000915527, "learning_rate": 1.7196633601861054e-05, "loss": 0.3547, "step": 11275 }, { "epoch": 0.24440448074881374, "grad_norm": 1.0766453742980957, "learning_rate": 1.7194270087861373e-05, "loss": 0.2979, "step": 11280 }, { "epoch": 0.2445128160682946, "grad_norm": 1.33353590965271, "learning_rate": 1.7191905740513276e-05, "loss": 0.3887, "step": 11285 }, { "epoch": 0.24462115138777543, "grad_norm": 1.5961321592330933, "learning_rate": 1.718954056009063e-05, "loss": 0.3788, "step": 11290 }, { "epoch": 0.2447294867072563, "grad_norm": 1.1149479150772095, "learning_rate": 1.7187174546867414e-05, "loss": 0.3285, "step": 11295 }, { "epoch": 0.24483782202673715, "grad_norm": 1.159063696861267, "learning_rate": 1.718480770111769e-05, "loss": 0.3407, "step": 11300 }, { "epoch": 0.24494615734621802, "grad_norm": 1.5812796354293823, "learning_rate": 1.718244002311562e-05, "loss": 0.2847, "step": 11305 }, { "epoch": 0.24505449266569887, "grad_norm": 1.5219508409500122, "learning_rate": 1.718007151313547e-05, "loss": 0.2436, "step": 11310 }, { "epoch": 0.24516282798517972, "grad_norm": 1.0828732252120972, "learning_rate": 1.7177702171451587e-05, "loss": 0.3025, "step": 11315 }, { "epoch": 0.2452711633046606, "grad_norm": 1.3769928216934204, "learning_rate": 1.7175331998338432e-05, "loss": 0.3181, "step": 11320 }, { "epoch": 0.24537949862414143, "grad_norm": 1.3165210485458374, "learning_rate": 1.7172960994070552e-05, "loss": 0.3689, "step": 11325 }, { "epoch": 0.2454878339436223, "grad_norm": 1.328086018562317, "learning_rate": 1.717058915892259e-05, "loss": 0.2991, "step": 11330 }, { "epoch": 0.24559616926310315, "grad_norm": 1.486840844154358, "learning_rate": 1.716821649316929e-05, "loss": 0.4581, "step": 11335 }, { "epoch": 0.24570450458258403, "grad_norm": 1.3581780195236206, "learning_rate": 1.716584299708549e-05, "loss": 0.301, "step": 11340 }, { "epoch": 0.24581283990206487, "grad_norm": 1.4091142416000366, "learning_rate": 1.7163468670946122e-05, "loss": 0.3855, "step": 11345 }, { "epoch": 0.24592117522154572, "grad_norm": 1.675466537475586, "learning_rate": 1.7161093515026222e-05, "loss": 0.3393, "step": 11350 }, { "epoch": 0.2460295105410266, "grad_norm": 1.6830353736877441, "learning_rate": 1.7158717529600905e-05, "loss": 0.4263, "step": 11355 }, { "epoch": 0.24613784586050744, "grad_norm": 1.5162461996078491, "learning_rate": 1.7156340714945403e-05, "loss": 0.3201, "step": 11360 }, { "epoch": 0.2462461811799883, "grad_norm": 1.8650493621826172, "learning_rate": 1.715396307133503e-05, "loss": 0.3089, "step": 11365 }, { "epoch": 0.24635451649946916, "grad_norm": 1.3044822216033936, "learning_rate": 1.7151584599045204e-05, "loss": 0.2954, "step": 11370 }, { "epoch": 0.24646285181895, "grad_norm": 1.4107818603515625, "learning_rate": 1.7149205298351434e-05, "loss": 0.3066, "step": 11375 }, { "epoch": 0.24657118713843088, "grad_norm": 1.1434733867645264, "learning_rate": 1.714682516952932e-05, "loss": 0.3281, "step": 11380 }, { "epoch": 0.24667952245791172, "grad_norm": 1.5350090265274048, "learning_rate": 1.714444421285457e-05, "loss": 0.2685, "step": 11385 }, { "epoch": 0.2467878577773926, "grad_norm": 2.3578262329101562, "learning_rate": 1.7142062428602984e-05, "loss": 0.4568, "step": 11390 }, { "epoch": 0.24689619309687344, "grad_norm": 1.9812653064727783, "learning_rate": 1.7139679817050455e-05, "loss": 0.3835, "step": 11395 }, { "epoch": 0.2470045284163543, "grad_norm": 2.3015642166137695, "learning_rate": 1.7137296378472968e-05, "loss": 0.2719, "step": 11400 }, { "epoch": 0.24711286373583516, "grad_norm": 1.5931991338729858, "learning_rate": 1.7134912113146614e-05, "loss": 0.3422, "step": 11405 }, { "epoch": 0.247221199055316, "grad_norm": 1.1016829013824463, "learning_rate": 1.7132527021347574e-05, "loss": 0.4024, "step": 11410 }, { "epoch": 0.24732953437479688, "grad_norm": 1.2285653352737427, "learning_rate": 1.7130141103352124e-05, "loss": 0.3089, "step": 11415 }, { "epoch": 0.24743786969427772, "grad_norm": 0.9147241711616516, "learning_rate": 1.7127754359436632e-05, "loss": 0.3079, "step": 11420 }, { "epoch": 0.2475462050137586, "grad_norm": 1.4824849367141724, "learning_rate": 1.7125366789877577e-05, "loss": 0.3012, "step": 11425 }, { "epoch": 0.24765454033323944, "grad_norm": 1.5919954776763916, "learning_rate": 1.7122978394951512e-05, "loss": 0.4113, "step": 11430 }, { "epoch": 0.2477628756527203, "grad_norm": 1.8252336978912354, "learning_rate": 1.7120589174935106e-05, "loss": 0.3952, "step": 11435 }, { "epoch": 0.24787121097220116, "grad_norm": 1.0732067823410034, "learning_rate": 1.711819913010511e-05, "loss": 0.3416, "step": 11440 }, { "epoch": 0.247979546291682, "grad_norm": 1.5830752849578857, "learning_rate": 1.7115808260738376e-05, "loss": 0.2158, "step": 11445 }, { "epoch": 0.24808788161116288, "grad_norm": 1.0140808820724487, "learning_rate": 1.7113416567111846e-05, "loss": 0.3909, "step": 11450 }, { "epoch": 0.24819621693064373, "grad_norm": 1.356898307800293, "learning_rate": 1.7111024049502572e-05, "loss": 0.3206, "step": 11455 }, { "epoch": 0.2483045522501246, "grad_norm": 1.319606065750122, "learning_rate": 1.7108630708187682e-05, "loss": 0.4245, "step": 11460 }, { "epoch": 0.24841288756960545, "grad_norm": 1.6684986352920532, "learning_rate": 1.710623654344441e-05, "loss": 0.304, "step": 11465 }, { "epoch": 0.2485212228890863, "grad_norm": 1.345115065574646, "learning_rate": 1.7103841555550088e-05, "loss": 0.3066, "step": 11470 }, { "epoch": 0.24862955820856716, "grad_norm": 1.8183969259262085, "learning_rate": 1.7101445744782137e-05, "loss": 0.3141, "step": 11475 }, { "epoch": 0.248737893528048, "grad_norm": 1.447840929031372, "learning_rate": 1.7099049111418074e-05, "loss": 0.3952, "step": 11480 }, { "epoch": 0.24884622884752888, "grad_norm": 1.077765941619873, "learning_rate": 1.7096651655735517e-05, "loss": 0.2899, "step": 11485 }, { "epoch": 0.24895456416700973, "grad_norm": 1.1775764226913452, "learning_rate": 1.7094253378012174e-05, "loss": 0.2659, "step": 11490 }, { "epoch": 0.24906289948649057, "grad_norm": 1.342801809310913, "learning_rate": 1.709185427852585e-05, "loss": 0.329, "step": 11495 }, { "epoch": 0.24917123480597145, "grad_norm": 1.3359462022781372, "learning_rate": 1.7089454357554437e-05, "loss": 0.3751, "step": 11500 }, { "epoch": 0.2492795701254523, "grad_norm": 1.4392603635787964, "learning_rate": 1.7087053615375944e-05, "loss": 0.4429, "step": 11505 }, { "epoch": 0.24938790544493317, "grad_norm": 2.653827428817749, "learning_rate": 1.708465205226845e-05, "loss": 0.3524, "step": 11510 }, { "epoch": 0.249496240764414, "grad_norm": 1.487229824066162, "learning_rate": 1.708224966851015e-05, "loss": 0.3011, "step": 11515 }, { "epoch": 0.24960457608389486, "grad_norm": 1.666218638420105, "learning_rate": 1.707984646437931e-05, "loss": 0.2997, "step": 11520 }, { "epoch": 0.24971291140337573, "grad_norm": 1.7567474842071533, "learning_rate": 1.7077442440154317e-05, "loss": 0.2924, "step": 11525 }, { "epoch": 0.24982124672285658, "grad_norm": 1.4280626773834229, "learning_rate": 1.7075037596113636e-05, "loss": 0.3212, "step": 11530 }, { "epoch": 0.24992958204233745, "grad_norm": 1.7905058860778809, "learning_rate": 1.7072631932535836e-05, "loss": 0.389, "step": 11535 }, { "epoch": 0.2500379173618183, "grad_norm": 1.3991576433181763, "learning_rate": 1.707022544969957e-05, "loss": 0.2772, "step": 11540 }, { "epoch": 0.25014625268129914, "grad_norm": 1.6624958515167236, "learning_rate": 1.7067818147883603e-05, "loss": 0.3253, "step": 11545 }, { "epoch": 0.25025458800078, "grad_norm": 2.24476957321167, "learning_rate": 1.706541002736678e-05, "loss": 0.3645, "step": 11550 }, { "epoch": 0.2503629233202609, "grad_norm": 1.340063214302063, "learning_rate": 1.706300108842804e-05, "loss": 0.3273, "step": 11555 }, { "epoch": 0.25047125863974173, "grad_norm": 1.7525556087493896, "learning_rate": 1.706059133134643e-05, "loss": 0.2836, "step": 11560 }, { "epoch": 0.2505795939592226, "grad_norm": 1.645923137664795, "learning_rate": 1.705818075640108e-05, "loss": 0.4044, "step": 11565 }, { "epoch": 0.2506879292787034, "grad_norm": 1.4601128101348877, "learning_rate": 1.7055769363871224e-05, "loss": 0.2754, "step": 11570 }, { "epoch": 0.2507962645981843, "grad_norm": 1.7460631132125854, "learning_rate": 1.705335715403618e-05, "loss": 0.4669, "step": 11575 }, { "epoch": 0.25090459991766517, "grad_norm": 0.9804571270942688, "learning_rate": 1.7050944127175373e-05, "loss": 0.4021, "step": 11580 }, { "epoch": 0.251012935237146, "grad_norm": 1.737219214439392, "learning_rate": 1.7048530283568308e-05, "loss": 0.2634, "step": 11585 }, { "epoch": 0.25112127055662686, "grad_norm": 1.4154568910598755, "learning_rate": 1.70461156234946e-05, "loss": 0.3646, "step": 11590 }, { "epoch": 0.2512296058761077, "grad_norm": 1.4778461456298828, "learning_rate": 1.7043700147233944e-05, "loss": 0.3341, "step": 11595 }, { "epoch": 0.2513379411955886, "grad_norm": 1.3315887451171875, "learning_rate": 1.7041283855066142e-05, "loss": 0.2689, "step": 11600 }, { "epoch": 0.25144627651506946, "grad_norm": 1.7692033052444458, "learning_rate": 1.7038866747271085e-05, "loss": 0.3665, "step": 11605 }, { "epoch": 0.2515546118345503, "grad_norm": 1.3058041334152222, "learning_rate": 1.7036448824128756e-05, "loss": 0.3012, "step": 11610 }, { "epoch": 0.25166294715403115, "grad_norm": 1.3192014694213867, "learning_rate": 1.703403008591924e-05, "loss": 0.4062, "step": 11615 }, { "epoch": 0.251771282473512, "grad_norm": 1.633970022201538, "learning_rate": 1.7031610532922704e-05, "loss": 0.3868, "step": 11620 }, { "epoch": 0.2518796177929929, "grad_norm": 2.0679268836975098, "learning_rate": 1.702919016541942e-05, "loss": 0.3501, "step": 11625 }, { "epoch": 0.25198795311247374, "grad_norm": 1.6795800924301147, "learning_rate": 1.702676898368976e-05, "loss": 0.2713, "step": 11630 }, { "epoch": 0.2520962884319546, "grad_norm": 1.5663760900497437, "learning_rate": 1.702434698801417e-05, "loss": 0.3143, "step": 11635 }, { "epoch": 0.25220462375143543, "grad_norm": 1.9014465808868408, "learning_rate": 1.7021924178673207e-05, "loss": 0.3583, "step": 11640 }, { "epoch": 0.2523129590709163, "grad_norm": 1.0843735933303833, "learning_rate": 1.7019500555947515e-05, "loss": 0.3676, "step": 11645 }, { "epoch": 0.2524212943903972, "grad_norm": 1.4237596988677979, "learning_rate": 1.7017076120117835e-05, "loss": 0.2608, "step": 11650 }, { "epoch": 0.252529629709878, "grad_norm": 1.3039273023605347, "learning_rate": 1.7014650871465004e-05, "loss": 0.3397, "step": 11655 }, { "epoch": 0.25263796502935887, "grad_norm": 1.4530680179595947, "learning_rate": 1.7012224810269947e-05, "loss": 0.2928, "step": 11660 }, { "epoch": 0.2527463003488397, "grad_norm": 1.2935456037521362, "learning_rate": 1.7009797936813692e-05, "loss": 0.3724, "step": 11665 }, { "epoch": 0.25285463566832056, "grad_norm": 1.2969964742660522, "learning_rate": 1.7007370251377353e-05, "loss": 0.2333, "step": 11670 }, { "epoch": 0.25296297098780146, "grad_norm": 1.2763320207595825, "learning_rate": 1.7004941754242137e-05, "loss": 0.3392, "step": 11675 }, { "epoch": 0.2530713063072823, "grad_norm": 1.7296336889266968, "learning_rate": 1.7002512445689355e-05, "loss": 0.3714, "step": 11680 }, { "epoch": 0.25317964162676315, "grad_norm": 1.1349875926971436, "learning_rate": 1.7000082326000404e-05, "loss": 0.2882, "step": 11685 }, { "epoch": 0.253287976946244, "grad_norm": 1.2751049995422363, "learning_rate": 1.6997651395456777e-05, "loss": 0.239, "step": 11690 }, { "epoch": 0.2533963122657249, "grad_norm": 1.8805674314498901, "learning_rate": 1.6995219654340055e-05, "loss": 0.3936, "step": 11695 }, { "epoch": 0.25350464758520574, "grad_norm": 1.9293640851974487, "learning_rate": 1.6992787102931934e-05, "loss": 0.373, "step": 11700 }, { "epoch": 0.2536129829046866, "grad_norm": 2.172144651412964, "learning_rate": 1.6990353741514174e-05, "loss": 0.4736, "step": 11705 }, { "epoch": 0.25372131822416744, "grad_norm": 1.7982336282730103, "learning_rate": 1.6987919570368648e-05, "loss": 0.4962, "step": 11710 }, { "epoch": 0.2538296535436483, "grad_norm": 1.0679101943969727, "learning_rate": 1.698548458977732e-05, "loss": 0.3247, "step": 11715 }, { "epoch": 0.2539379888631292, "grad_norm": 1.4353426694869995, "learning_rate": 1.6983048800022244e-05, "loss": 0.4017, "step": 11720 }, { "epoch": 0.25404632418261003, "grad_norm": 0.8938042521476746, "learning_rate": 1.698061220138557e-05, "loss": 0.3271, "step": 11725 }, { "epoch": 0.2541546595020909, "grad_norm": 2.0013389587402344, "learning_rate": 1.697817479414954e-05, "loss": 0.3661, "step": 11730 }, { "epoch": 0.2542629948215717, "grad_norm": 1.4542862176895142, "learning_rate": 1.69757365785965e-05, "loss": 0.3065, "step": 11735 }, { "epoch": 0.25437133014105257, "grad_norm": 1.375556230545044, "learning_rate": 1.6973297555008873e-05, "loss": 0.2624, "step": 11740 }, { "epoch": 0.25447966546053347, "grad_norm": 2.2421069145202637, "learning_rate": 1.6970857723669178e-05, "loss": 0.3113, "step": 11745 }, { "epoch": 0.2545880007800143, "grad_norm": 1.6094282865524292, "learning_rate": 1.696841708486004e-05, "loss": 0.3158, "step": 11750 }, { "epoch": 0.25469633609949516, "grad_norm": 1.722725749015808, "learning_rate": 1.6965975638864173e-05, "loss": 0.3385, "step": 11755 }, { "epoch": 0.254804671418976, "grad_norm": 1.1010682582855225, "learning_rate": 1.696353338596438e-05, "loss": 0.3657, "step": 11760 }, { "epoch": 0.25491300673845685, "grad_norm": 1.785766363143921, "learning_rate": 1.6961090326443556e-05, "loss": 0.3393, "step": 11765 }, { "epoch": 0.25502134205793775, "grad_norm": 2.2772533893585205, "learning_rate": 1.6958646460584695e-05, "loss": 0.3483, "step": 11770 }, { "epoch": 0.2551296773774186, "grad_norm": 1.3224585056304932, "learning_rate": 1.695620178867088e-05, "loss": 0.4016, "step": 11775 }, { "epoch": 0.25523801269689944, "grad_norm": 1.3393173217773438, "learning_rate": 1.6953756310985294e-05, "loss": 0.3927, "step": 11780 }, { "epoch": 0.2553463480163803, "grad_norm": 1.3021761178970337, "learning_rate": 1.6951310027811208e-05, "loss": 0.3677, "step": 11785 }, { "epoch": 0.25545468333586113, "grad_norm": 1.8113690614700317, "learning_rate": 1.6948862939431983e-05, "loss": 0.3079, "step": 11790 }, { "epoch": 0.25556301865534203, "grad_norm": 1.471140742301941, "learning_rate": 1.694641504613108e-05, "loss": 0.3518, "step": 11795 }, { "epoch": 0.2556713539748229, "grad_norm": 1.2902750968933105, "learning_rate": 1.6943966348192052e-05, "loss": 0.3005, "step": 11800 }, { "epoch": 0.2557796892943037, "grad_norm": 1.1823135614395142, "learning_rate": 1.6941516845898544e-05, "loss": 0.3898, "step": 11805 }, { "epoch": 0.25588802461378457, "grad_norm": 1.7971501350402832, "learning_rate": 1.6939066539534294e-05, "loss": 0.3556, "step": 11810 }, { "epoch": 0.2559963599332654, "grad_norm": 1.9871236085891724, "learning_rate": 1.6936615429383133e-05, "loss": 0.36, "step": 11815 }, { "epoch": 0.2561046952527463, "grad_norm": 1.535490870475769, "learning_rate": 1.693416351572898e-05, "loss": 0.2776, "step": 11820 }, { "epoch": 0.25621303057222716, "grad_norm": 1.9668303728103638, "learning_rate": 1.693171079885586e-05, "loss": 0.3792, "step": 11825 }, { "epoch": 0.256321365891708, "grad_norm": 1.6419768333435059, "learning_rate": 1.6929257279047882e-05, "loss": 0.3334, "step": 11830 }, { "epoch": 0.25642970121118885, "grad_norm": 0.8516096472740173, "learning_rate": 1.692680295658925e-05, "loss": 0.3391, "step": 11835 }, { "epoch": 0.25653803653066976, "grad_norm": 1.474631905555725, "learning_rate": 1.6924347831764255e-05, "loss": 0.3682, "step": 11840 }, { "epoch": 0.2566463718501506, "grad_norm": 1.6316300630569458, "learning_rate": 1.6921891904857295e-05, "loss": 0.2771, "step": 11845 }, { "epoch": 0.25675470716963145, "grad_norm": 1.4553066492080688, "learning_rate": 1.6919435176152846e-05, "loss": 0.3427, "step": 11850 }, { "epoch": 0.2568630424891123, "grad_norm": 1.1292011737823486, "learning_rate": 1.6916977645935485e-05, "loss": 0.3483, "step": 11855 }, { "epoch": 0.25697137780859314, "grad_norm": 1.327389121055603, "learning_rate": 1.691451931448988e-05, "loss": 0.3634, "step": 11860 }, { "epoch": 0.25707971312807404, "grad_norm": 1.2441933155059814, "learning_rate": 1.6912060182100792e-05, "loss": 0.3858, "step": 11865 }, { "epoch": 0.2571880484475549, "grad_norm": 1.6558177471160889, "learning_rate": 1.6909600249053072e-05, "loss": 0.2726, "step": 11870 }, { "epoch": 0.25729638376703573, "grad_norm": 1.7525883913040161, "learning_rate": 1.6907139515631672e-05, "loss": 0.3883, "step": 11875 }, { "epoch": 0.2574047190865166, "grad_norm": 1.8948798179626465, "learning_rate": 1.6904677982121626e-05, "loss": 0.3339, "step": 11880 }, { "epoch": 0.2575130544059974, "grad_norm": 1.1369613409042358, "learning_rate": 1.6902215648808067e-05, "loss": 0.292, "step": 11885 }, { "epoch": 0.2576213897254783, "grad_norm": 1.5744528770446777, "learning_rate": 1.6899752515976224e-05, "loss": 0.251, "step": 11890 }, { "epoch": 0.25772972504495917, "grad_norm": 1.8888399600982666, "learning_rate": 1.6897288583911407e-05, "loss": 0.2251, "step": 11895 }, { "epoch": 0.25783806036444, "grad_norm": 1.1652544736862183, "learning_rate": 1.6894823852899032e-05, "loss": 0.2929, "step": 11900 }, { "epoch": 0.25794639568392086, "grad_norm": 1.6076029539108276, "learning_rate": 1.6892358323224594e-05, "loss": 0.2979, "step": 11905 }, { "epoch": 0.2580547310034017, "grad_norm": 1.4590789079666138, "learning_rate": 1.6889891995173698e-05, "loss": 0.4749, "step": 11910 }, { "epoch": 0.2581630663228826, "grad_norm": 1.4112577438354492, "learning_rate": 1.6887424869032022e-05, "loss": 0.3986, "step": 11915 }, { "epoch": 0.25827140164236345, "grad_norm": 1.1348861455917358, "learning_rate": 1.6884956945085347e-05, "loss": 0.3491, "step": 11920 }, { "epoch": 0.2583797369618443, "grad_norm": 0.9853880405426025, "learning_rate": 1.6882488223619548e-05, "loss": 0.2363, "step": 11925 }, { "epoch": 0.25848807228132514, "grad_norm": 1.5558717250823975, "learning_rate": 1.6880018704920585e-05, "loss": 0.3792, "step": 11930 }, { "epoch": 0.258596407600806, "grad_norm": 2.458773136138916, "learning_rate": 1.687754838927452e-05, "loss": 0.3073, "step": 11935 }, { "epoch": 0.2587047429202869, "grad_norm": 1.2971906661987305, "learning_rate": 1.68750772769675e-05, "loss": 0.42, "step": 11940 }, { "epoch": 0.25881307823976774, "grad_norm": 1.7183399200439453, "learning_rate": 1.6872605368285767e-05, "loss": 0.3704, "step": 11945 }, { "epoch": 0.2589214135592486, "grad_norm": 1.2166929244995117, "learning_rate": 1.6870132663515653e-05, "loss": 0.4774, "step": 11950 }, { "epoch": 0.2590297488787294, "grad_norm": 1.3183072805404663, "learning_rate": 1.6867659162943585e-05, "loss": 0.3866, "step": 11955 }, { "epoch": 0.25913808419821033, "grad_norm": 1.1916255950927734, "learning_rate": 1.6865184866856078e-05, "loss": 0.3243, "step": 11960 }, { "epoch": 0.2592464195176912, "grad_norm": 1.7472995519638062, "learning_rate": 1.6862709775539746e-05, "loss": 0.3006, "step": 11965 }, { "epoch": 0.259354754837172, "grad_norm": 1.4329115152359009, "learning_rate": 1.686023388928129e-05, "loss": 0.2398, "step": 11970 }, { "epoch": 0.25946309015665286, "grad_norm": 1.6566873788833618, "learning_rate": 1.6857757208367505e-05, "loss": 0.3423, "step": 11975 }, { "epoch": 0.2595714254761337, "grad_norm": 1.8509448766708374, "learning_rate": 1.6855279733085278e-05, "loss": 0.4582, "step": 11980 }, { "epoch": 0.2596797607956146, "grad_norm": 1.4767948389053345, "learning_rate": 1.6852801463721582e-05, "loss": 0.3647, "step": 11985 }, { "epoch": 0.25978809611509546, "grad_norm": 1.4672211408615112, "learning_rate": 1.6850322400563494e-05, "loss": 0.3877, "step": 11990 }, { "epoch": 0.2598964314345763, "grad_norm": 1.836892008781433, "learning_rate": 1.6847842543898168e-05, "loss": 0.4163, "step": 11995 }, { "epoch": 0.26000476675405715, "grad_norm": 1.5274643898010254, "learning_rate": 1.6845361894012874e-05, "loss": 0.3564, "step": 12000 }, { "epoch": 0.260113102073538, "grad_norm": 1.3616138696670532, "learning_rate": 1.6842880451194942e-05, "loss": 0.2745, "step": 12005 }, { "epoch": 0.2602214373930189, "grad_norm": 2.031083106994629, "learning_rate": 1.6840398215731814e-05, "loss": 0.3124, "step": 12010 }, { "epoch": 0.26032977271249974, "grad_norm": 1.1538119316101074, "learning_rate": 1.6837915187911028e-05, "loss": 0.3335, "step": 12015 }, { "epoch": 0.2604381080319806, "grad_norm": 1.428138256072998, "learning_rate": 1.6835431368020196e-05, "loss": 0.41, "step": 12020 }, { "epoch": 0.26054644335146143, "grad_norm": 1.3830084800720215, "learning_rate": 1.6832946756347034e-05, "loss": 0.3987, "step": 12025 }, { "epoch": 0.2606547786709423, "grad_norm": 1.307254672050476, "learning_rate": 1.6830461353179353e-05, "loss": 0.2913, "step": 12030 }, { "epoch": 0.2607631139904232, "grad_norm": 1.7944244146347046, "learning_rate": 1.6827975158805037e-05, "loss": 0.3457, "step": 12035 }, { "epoch": 0.260871449309904, "grad_norm": 1.2094273567199707, "learning_rate": 1.6825488173512088e-05, "loss": 0.2977, "step": 12040 }, { "epoch": 0.26097978462938487, "grad_norm": 1.4247018098831177, "learning_rate": 1.682300039758858e-05, "loss": 0.3362, "step": 12045 }, { "epoch": 0.2610881199488657, "grad_norm": 1.6450539827346802, "learning_rate": 1.6820511831322678e-05, "loss": 0.3586, "step": 12050 }, { "epoch": 0.26119645526834656, "grad_norm": 1.120561957359314, "learning_rate": 1.6818022475002654e-05, "loss": 0.3407, "step": 12055 }, { "epoch": 0.26130479058782746, "grad_norm": 2.029498815536499, "learning_rate": 1.6815532328916862e-05, "loss": 0.2594, "step": 12060 }, { "epoch": 0.2614131259073083, "grad_norm": 1.1361595392227173, "learning_rate": 1.6813041393353748e-05, "loss": 0.4067, "step": 12065 }, { "epoch": 0.26152146122678915, "grad_norm": 1.2783100605010986, "learning_rate": 1.6810549668601847e-05, "loss": 0.3014, "step": 12070 }, { "epoch": 0.26162979654627, "grad_norm": 1.824884295463562, "learning_rate": 1.680805715494979e-05, "loss": 0.3166, "step": 12075 }, { "epoch": 0.26173813186575084, "grad_norm": 1.3094160556793213, "learning_rate": 1.6805563852686292e-05, "loss": 0.3269, "step": 12080 }, { "epoch": 0.26184646718523175, "grad_norm": 1.5035196542739868, "learning_rate": 1.680306976210017e-05, "loss": 0.3316, "step": 12085 }, { "epoch": 0.2619548025047126, "grad_norm": 1.1935569047927856, "learning_rate": 1.6800574883480326e-05, "loss": 0.2505, "step": 12090 }, { "epoch": 0.26206313782419344, "grad_norm": 1.6617878675460815, "learning_rate": 1.6798079217115758e-05, "loss": 0.3416, "step": 12095 }, { "epoch": 0.2621714731436743, "grad_norm": 1.73334801197052, "learning_rate": 1.6795582763295548e-05, "loss": 0.4241, "step": 12100 }, { "epoch": 0.2622798084631552, "grad_norm": 1.4438546895980835, "learning_rate": 1.6793085522308866e-05, "loss": 0.2993, "step": 12105 }, { "epoch": 0.26238814378263603, "grad_norm": 1.7213314771652222, "learning_rate": 1.6790587494444993e-05, "loss": 0.2892, "step": 12110 }, { "epoch": 0.2624964791021169, "grad_norm": 1.3387928009033203, "learning_rate": 1.6788088679993276e-05, "loss": 0.3263, "step": 12115 }, { "epoch": 0.2626048144215977, "grad_norm": 1.6752339601516724, "learning_rate": 1.678558907924318e-05, "loss": 0.4297, "step": 12120 }, { "epoch": 0.26271314974107857, "grad_norm": 1.1010609865188599, "learning_rate": 1.678308869248423e-05, "loss": 0.4231, "step": 12125 }, { "epoch": 0.26282148506055947, "grad_norm": 1.2833011150360107, "learning_rate": 1.6780587520006067e-05, "loss": 0.3386, "step": 12130 }, { "epoch": 0.2629298203800403, "grad_norm": 1.635023593902588, "learning_rate": 1.6778085562098413e-05, "loss": 0.3704, "step": 12135 }, { "epoch": 0.26303815569952116, "grad_norm": 1.5233508348464966, "learning_rate": 1.6775582819051085e-05, "loss": 0.3608, "step": 12140 }, { "epoch": 0.263146491019002, "grad_norm": 1.2179954051971436, "learning_rate": 1.6773079291153985e-05, "loss": 0.3535, "step": 12145 }, { "epoch": 0.26325482633848285, "grad_norm": 1.0395617485046387, "learning_rate": 1.6770574978697104e-05, "loss": 0.2929, "step": 12150 }, { "epoch": 0.26336316165796375, "grad_norm": 1.428914189338684, "learning_rate": 1.676806988197054e-05, "loss": 0.3108, "step": 12155 }, { "epoch": 0.2634714969774446, "grad_norm": 0.8878602981567383, "learning_rate": 1.6765564001264463e-05, "loss": 0.3228, "step": 12160 }, { "epoch": 0.26357983229692544, "grad_norm": 1.4487940073013306, "learning_rate": 1.676305733686915e-05, "loss": 0.2452, "step": 12165 }, { "epoch": 0.2636881676164063, "grad_norm": 1.4224390983581543, "learning_rate": 1.6760549889074954e-05, "loss": 0.3041, "step": 12170 }, { "epoch": 0.26379650293588713, "grad_norm": 1.3403600454330444, "learning_rate": 1.6758041658172325e-05, "loss": 0.3024, "step": 12175 }, { "epoch": 0.26390483825536803, "grad_norm": 2.0201103687286377, "learning_rate": 1.67555326444518e-05, "loss": 0.4043, "step": 12180 }, { "epoch": 0.2640131735748489, "grad_norm": 1.3792741298675537, "learning_rate": 1.6753022848204025e-05, "loss": 0.3282, "step": 12185 }, { "epoch": 0.2641215088943297, "grad_norm": 1.305262565612793, "learning_rate": 1.6750512269719706e-05, "loss": 0.3465, "step": 12190 }, { "epoch": 0.26422984421381057, "grad_norm": 1.6158177852630615, "learning_rate": 1.6748000909289665e-05, "loss": 0.2565, "step": 12195 }, { "epoch": 0.2643381795332914, "grad_norm": 1.045498251914978, "learning_rate": 1.6745488767204806e-05, "loss": 0.2353, "step": 12200 }, { "epoch": 0.2644465148527723, "grad_norm": 1.5513230562210083, "learning_rate": 1.674297584375612e-05, "loss": 0.3998, "step": 12205 }, { "epoch": 0.26455485017225316, "grad_norm": 1.1327968835830688, "learning_rate": 1.6740462139234693e-05, "loss": 0.3711, "step": 12210 }, { "epoch": 0.264663185491734, "grad_norm": 1.91313898563385, "learning_rate": 1.67379476539317e-05, "loss": 0.2845, "step": 12215 }, { "epoch": 0.26477152081121486, "grad_norm": 1.5987367630004883, "learning_rate": 1.67354323881384e-05, "loss": 0.3952, "step": 12220 }, { "epoch": 0.26487985613069576, "grad_norm": 1.7206430435180664, "learning_rate": 1.673291634214616e-05, "loss": 0.353, "step": 12225 }, { "epoch": 0.2649881914501766, "grad_norm": 1.8386048078536987, "learning_rate": 1.673039951624642e-05, "loss": 0.3776, "step": 12230 }, { "epoch": 0.26509652676965745, "grad_norm": 1.4337178468704224, "learning_rate": 1.6727881910730714e-05, "loss": 0.3498, "step": 12235 }, { "epoch": 0.2652048620891383, "grad_norm": 1.0909688472747803, "learning_rate": 1.6725363525890678e-05, "loss": 0.2655, "step": 12240 }, { "epoch": 0.26531319740861914, "grad_norm": 0.9152570962905884, "learning_rate": 1.672284436201802e-05, "loss": 0.3518, "step": 12245 }, { "epoch": 0.26542153272810004, "grad_norm": 1.7608243227005005, "learning_rate": 1.672032441940455e-05, "loss": 0.3875, "step": 12250 }, { "epoch": 0.2655298680475809, "grad_norm": 1.4819334745407104, "learning_rate": 1.6717803698342167e-05, "loss": 0.3074, "step": 12255 }, { "epoch": 0.26563820336706173, "grad_norm": 1.1237120628356934, "learning_rate": 1.6715282199122863e-05, "loss": 0.3414, "step": 12260 }, { "epoch": 0.2657465386865426, "grad_norm": 1.3947975635528564, "learning_rate": 1.6712759922038706e-05, "loss": 0.3653, "step": 12265 }, { "epoch": 0.2658548740060234, "grad_norm": 1.4548940658569336, "learning_rate": 1.671023686738187e-05, "loss": 0.3361, "step": 12270 }, { "epoch": 0.2659632093255043, "grad_norm": 1.2280904054641724, "learning_rate": 1.6707713035444614e-05, "loss": 0.2857, "step": 12275 }, { "epoch": 0.26607154464498517, "grad_norm": 0.9014101624488831, "learning_rate": 1.6705188426519284e-05, "loss": 0.2943, "step": 12280 }, { "epoch": 0.266179879964466, "grad_norm": 1.0333460569381714, "learning_rate": 1.6702663040898316e-05, "loss": 0.2702, "step": 12285 }, { "epoch": 0.26628821528394686, "grad_norm": 1.6298933029174805, "learning_rate": 1.6700136878874242e-05, "loss": 0.3211, "step": 12290 }, { "epoch": 0.2663965506034277, "grad_norm": 1.4405020475387573, "learning_rate": 1.669760994073968e-05, "loss": 0.3179, "step": 12295 }, { "epoch": 0.2665048859229086, "grad_norm": 2.330217123031616, "learning_rate": 1.6695082226787335e-05, "loss": 0.3476, "step": 12300 }, { "epoch": 0.26661322124238945, "grad_norm": 1.0318357944488525, "learning_rate": 1.6692553737310005e-05, "loss": 0.3663, "step": 12305 }, { "epoch": 0.2667215565618703, "grad_norm": 1.5226168632507324, "learning_rate": 1.6690024472600583e-05, "loss": 0.316, "step": 12310 }, { "epoch": 0.26682989188135114, "grad_norm": 1.5620009899139404, "learning_rate": 1.6687494432952037e-05, "loss": 0.3578, "step": 12315 }, { "epoch": 0.266938227200832, "grad_norm": 3.067246675491333, "learning_rate": 1.6684963618657444e-05, "loss": 0.2461, "step": 12320 }, { "epoch": 0.2670465625203129, "grad_norm": 1.4185175895690918, "learning_rate": 1.6682432030009956e-05, "loss": 0.3956, "step": 12325 }, { "epoch": 0.26715489783979374, "grad_norm": 1.1536736488342285, "learning_rate": 1.667989966730282e-05, "loss": 0.3687, "step": 12330 }, { "epoch": 0.2672632331592746, "grad_norm": 1.613114356994629, "learning_rate": 1.6677366530829372e-05, "loss": 0.3892, "step": 12335 }, { "epoch": 0.26737156847875543, "grad_norm": 1.5681110620498657, "learning_rate": 1.667483262088304e-05, "loss": 0.3428, "step": 12340 }, { "epoch": 0.2674799037982363, "grad_norm": 2.176145315170288, "learning_rate": 1.6672297937757336e-05, "loss": 0.2648, "step": 12345 }, { "epoch": 0.2675882391177172, "grad_norm": 1.4377589225769043, "learning_rate": 1.6669762481745862e-05, "loss": 0.3519, "step": 12350 }, { "epoch": 0.267696574437198, "grad_norm": 1.643762230873108, "learning_rate": 1.6667226253142325e-05, "loss": 0.3706, "step": 12355 }, { "epoch": 0.26780490975667887, "grad_norm": 1.078151822090149, "learning_rate": 1.66646892522405e-05, "loss": 0.3921, "step": 12360 }, { "epoch": 0.2679132450761597, "grad_norm": 1.317665934562683, "learning_rate": 1.666215147933426e-05, "loss": 0.2925, "step": 12365 }, { "epoch": 0.2680215803956406, "grad_norm": 1.6286277770996094, "learning_rate": 1.665961293471757e-05, "loss": 0.3604, "step": 12370 }, { "epoch": 0.26812991571512146, "grad_norm": 1.6514723300933838, "learning_rate": 1.6657073618684486e-05, "loss": 0.333, "step": 12375 }, { "epoch": 0.2682382510346023, "grad_norm": 1.425391435623169, "learning_rate": 1.6654533531529144e-05, "loss": 0.3514, "step": 12380 }, { "epoch": 0.26834658635408315, "grad_norm": 1.6020475625991821, "learning_rate": 1.6651992673545777e-05, "loss": 0.3608, "step": 12385 }, { "epoch": 0.268454921673564, "grad_norm": 1.9841927289962769, "learning_rate": 1.6649451045028708e-05, "loss": 0.3484, "step": 12390 }, { "epoch": 0.2685632569930449, "grad_norm": 1.2325379848480225, "learning_rate": 1.6646908646272342e-05, "loss": 0.2051, "step": 12395 }, { "epoch": 0.26867159231252574, "grad_norm": 0.8259221315383911, "learning_rate": 1.664436547757118e-05, "loss": 0.3404, "step": 12400 }, { "epoch": 0.2687799276320066, "grad_norm": 1.6793321371078491, "learning_rate": 1.664182153921981e-05, "loss": 0.2742, "step": 12405 }, { "epoch": 0.26888826295148743, "grad_norm": 1.5311839580535889, "learning_rate": 1.663927683151291e-05, "loss": 0.3763, "step": 12410 }, { "epoch": 0.2689965982709683, "grad_norm": 1.3663889169692993, "learning_rate": 1.6636731354745243e-05, "loss": 0.3744, "step": 12415 }, { "epoch": 0.2691049335904492, "grad_norm": 1.4931237697601318, "learning_rate": 1.6634185109211668e-05, "loss": 0.2845, "step": 12420 }, { "epoch": 0.26921326890993, "grad_norm": 2.0206024646759033, "learning_rate": 1.663163809520713e-05, "loss": 0.3395, "step": 12425 }, { "epoch": 0.26932160422941087, "grad_norm": 1.4539517164230347, "learning_rate": 1.662909031302666e-05, "loss": 0.5011, "step": 12430 }, { "epoch": 0.2694299395488917, "grad_norm": 1.0733400583267212, "learning_rate": 1.662654176296538e-05, "loss": 0.2765, "step": 12435 }, { "epoch": 0.26953827486837256, "grad_norm": 1.6300266981124878, "learning_rate": 1.6623992445318506e-05, "loss": 0.2179, "step": 12440 }, { "epoch": 0.26964661018785346, "grad_norm": 2.442992687225342, "learning_rate": 1.662144236038133e-05, "loss": 0.3257, "step": 12445 }, { "epoch": 0.2697549455073343, "grad_norm": 0.9542927742004395, "learning_rate": 1.6618891508449246e-05, "loss": 0.2985, "step": 12450 }, { "epoch": 0.26986328082681516, "grad_norm": 1.9172559976577759, "learning_rate": 1.6616339889817732e-05, "loss": 0.3907, "step": 12455 }, { "epoch": 0.269971616146296, "grad_norm": 1.7263914346694946, "learning_rate": 1.6613787504782357e-05, "loss": 0.3609, "step": 12460 }, { "epoch": 0.27007995146577685, "grad_norm": 1.4317412376403809, "learning_rate": 1.6611234353638774e-05, "loss": 0.3224, "step": 12465 }, { "epoch": 0.27018828678525775, "grad_norm": 2.1222481727600098, "learning_rate": 1.6608680436682724e-05, "loss": 0.3794, "step": 12470 }, { "epoch": 0.2702966221047386, "grad_norm": 1.9411535263061523, "learning_rate": 1.6606125754210044e-05, "loss": 0.3962, "step": 12475 }, { "epoch": 0.27040495742421944, "grad_norm": 1.3881062269210815, "learning_rate": 1.6603570306516656e-05, "loss": 0.3069, "step": 12480 }, { "epoch": 0.2705132927437003, "grad_norm": 1.2519676685333252, "learning_rate": 1.6601014093898573e-05, "loss": 0.3451, "step": 12485 }, { "epoch": 0.2706216280631812, "grad_norm": 1.4384291172027588, "learning_rate": 1.6598457116651886e-05, "loss": 0.4051, "step": 12490 }, { "epoch": 0.27072996338266203, "grad_norm": 1.360263705253601, "learning_rate": 1.6595899375072788e-05, "loss": 0.3165, "step": 12495 }, { "epoch": 0.2708382987021429, "grad_norm": 1.8004693984985352, "learning_rate": 1.6593340869457554e-05, "loss": 0.3372, "step": 12500 }, { "epoch": 0.2709466340216237, "grad_norm": 1.3227821588516235, "learning_rate": 1.659078160010255e-05, "loss": 0.3526, "step": 12505 }, { "epoch": 0.27105496934110457, "grad_norm": 1.8544989824295044, "learning_rate": 1.658822156730423e-05, "loss": 0.4187, "step": 12510 }, { "epoch": 0.27116330466058547, "grad_norm": 1.6568886041641235, "learning_rate": 1.6585660771359125e-05, "loss": 0.3554, "step": 12515 }, { "epoch": 0.2712716399800663, "grad_norm": 1.152104139328003, "learning_rate": 1.6583099212563878e-05, "loss": 0.4147, "step": 12520 }, { "epoch": 0.27137997529954716, "grad_norm": 1.1660031080245972, "learning_rate": 1.6580536891215202e-05, "loss": 0.2979, "step": 12525 }, { "epoch": 0.271488310619028, "grad_norm": 1.4008361101150513, "learning_rate": 1.6577973807609902e-05, "loss": 0.268, "step": 12530 }, { "epoch": 0.27159664593850885, "grad_norm": 1.4238629341125488, "learning_rate": 1.6575409962044875e-05, "loss": 0.3456, "step": 12535 }, { "epoch": 0.27170498125798975, "grad_norm": 1.3086345195770264, "learning_rate": 1.6572845354817106e-05, "loss": 0.3199, "step": 12540 }, { "epoch": 0.2718133165774706, "grad_norm": 1.3639161586761475, "learning_rate": 1.6570279986223658e-05, "loss": 0.3618, "step": 12545 }, { "epoch": 0.27192165189695144, "grad_norm": 1.1514054536819458, "learning_rate": 1.65677138565617e-05, "loss": 0.3788, "step": 12550 }, { "epoch": 0.2720299872164323, "grad_norm": 1.9770220518112183, "learning_rate": 1.6565146966128477e-05, "loss": 0.398, "step": 12555 }, { "epoch": 0.27213832253591314, "grad_norm": 1.4560059309005737, "learning_rate": 1.6562579315221325e-05, "loss": 0.2852, "step": 12560 }, { "epoch": 0.27224665785539404, "grad_norm": 1.1268678903579712, "learning_rate": 1.6560010904137664e-05, "loss": 0.3186, "step": 12565 }, { "epoch": 0.2723549931748749, "grad_norm": 1.7020817995071411, "learning_rate": 1.655744173317501e-05, "loss": 0.3681, "step": 12570 }, { "epoch": 0.2724633284943557, "grad_norm": 1.7895891666412354, "learning_rate": 1.655487180263096e-05, "loss": 0.3175, "step": 12575 }, { "epoch": 0.2725716638138366, "grad_norm": 0.9030568599700928, "learning_rate": 1.6552301112803203e-05, "loss": 0.339, "step": 12580 }, { "epoch": 0.2726799991333174, "grad_norm": 1.6925102472305298, "learning_rate": 1.6549729663989518e-05, "loss": 0.4508, "step": 12585 }, { "epoch": 0.2727883344527983, "grad_norm": 1.3698540925979614, "learning_rate": 1.6547157456487765e-05, "loss": 0.284, "step": 12590 }, { "epoch": 0.27289666977227917, "grad_norm": 2.016529083251953, "learning_rate": 1.65445844905959e-05, "loss": 0.2863, "step": 12595 }, { "epoch": 0.27300500509176, "grad_norm": 1.6911430358886719, "learning_rate": 1.6542010766611956e-05, "loss": 0.2537, "step": 12600 }, { "epoch": 0.27311334041124086, "grad_norm": 1.2521165609359741, "learning_rate": 1.6539436284834068e-05, "loss": 0.2923, "step": 12605 }, { "epoch": 0.2732216757307217, "grad_norm": 1.6316200494766235, "learning_rate": 1.6536861045560443e-05, "loss": 0.3295, "step": 12610 }, { "epoch": 0.2733300110502026, "grad_norm": 1.9228060245513916, "learning_rate": 1.6534285049089385e-05, "loss": 0.3842, "step": 12615 }, { "epoch": 0.27343834636968345, "grad_norm": 1.3934712409973145, "learning_rate": 1.6531708295719295e-05, "loss": 0.307, "step": 12620 }, { "epoch": 0.2735466816891643, "grad_norm": 1.485435128211975, "learning_rate": 1.6529130785748642e-05, "loss": 0.3273, "step": 12625 }, { "epoch": 0.27365501700864514, "grad_norm": 1.5398075580596924, "learning_rate": 1.6526552519475993e-05, "loss": 0.3516, "step": 12630 }, { "epoch": 0.27376335232812604, "grad_norm": 1.8577461242675781, "learning_rate": 1.65239734972e-05, "loss": 0.2909, "step": 12635 }, { "epoch": 0.2738716876476069, "grad_norm": 1.0197348594665527, "learning_rate": 1.652139371921941e-05, "loss": 0.3538, "step": 12640 }, { "epoch": 0.27398002296708773, "grad_norm": 1.4897931814193726, "learning_rate": 1.6518813185833046e-05, "loss": 0.3569, "step": 12645 }, { "epoch": 0.2740883582865686, "grad_norm": 1.2423725128173828, "learning_rate": 1.651623189733983e-05, "loss": 0.4073, "step": 12650 }, { "epoch": 0.2741966936060494, "grad_norm": 1.444542407989502, "learning_rate": 1.6513649854038758e-05, "loss": 0.4098, "step": 12655 }, { "epoch": 0.2743050289255303, "grad_norm": 1.4962509870529175, "learning_rate": 1.6511067056228925e-05, "loss": 0.3727, "step": 12660 }, { "epoch": 0.27441336424501117, "grad_norm": 1.4633973836898804, "learning_rate": 1.650848350420951e-05, "loss": 0.2788, "step": 12665 }, { "epoch": 0.274521699564492, "grad_norm": 2.0921053886413574, "learning_rate": 1.650589919827978e-05, "loss": 0.3231, "step": 12670 }, { "epoch": 0.27463003488397286, "grad_norm": 1.065471887588501, "learning_rate": 1.6503314138739084e-05, "loss": 0.2885, "step": 12675 }, { "epoch": 0.2747383702034537, "grad_norm": 1.42979097366333, "learning_rate": 1.6500728325886866e-05, "loss": 0.2532, "step": 12680 }, { "epoch": 0.2748467055229346, "grad_norm": 1.8907426595687866, "learning_rate": 1.649814176002265e-05, "loss": 0.3647, "step": 12685 }, { "epoch": 0.27495504084241545, "grad_norm": 1.639302134513855, "learning_rate": 1.6495554441446056e-05, "loss": 0.326, "step": 12690 }, { "epoch": 0.2750633761618963, "grad_norm": 1.102064847946167, "learning_rate": 1.649296637045678e-05, "loss": 0.2788, "step": 12695 }, { "epoch": 0.27517171148137715, "grad_norm": 1.1151096820831299, "learning_rate": 1.6490377547354616e-05, "loss": 0.3437, "step": 12700 }, { "epoch": 0.275280046800858, "grad_norm": 1.3097987174987793, "learning_rate": 1.648778797243944e-05, "loss": 0.2789, "step": 12705 }, { "epoch": 0.2753883821203389, "grad_norm": 1.2480754852294922, "learning_rate": 1.6485197646011217e-05, "loss": 0.3964, "step": 12710 }, { "epoch": 0.27549671743981974, "grad_norm": 2.0318429470062256, "learning_rate": 1.648260656836999e-05, "loss": 0.2955, "step": 12715 }, { "epoch": 0.2756050527593006, "grad_norm": 1.383779525756836, "learning_rate": 1.6480014739815903e-05, "loss": 0.3317, "step": 12720 }, { "epoch": 0.27571338807878143, "grad_norm": 1.470137357711792, "learning_rate": 1.6477422160649183e-05, "loss": 0.3029, "step": 12725 }, { "epoch": 0.2758217233982623, "grad_norm": 1.7517924308776855, "learning_rate": 1.6474828831170134e-05, "loss": 0.2814, "step": 12730 }, { "epoch": 0.2759300587177432, "grad_norm": 1.4603554010391235, "learning_rate": 1.6472234751679156e-05, "loss": 0.3114, "step": 12735 }, { "epoch": 0.276038394037224, "grad_norm": 1.1581686735153198, "learning_rate": 1.6469639922476738e-05, "loss": 0.3219, "step": 12740 }, { "epoch": 0.27614672935670487, "grad_norm": 1.16033136844635, "learning_rate": 1.646704434386345e-05, "loss": 0.4246, "step": 12745 }, { "epoch": 0.2762550646761857, "grad_norm": 1.5301547050476074, "learning_rate": 1.6464448016139954e-05, "loss": 0.3601, "step": 12750 }, { "epoch": 0.2763633999956666, "grad_norm": 1.9479504823684692, "learning_rate": 1.646185093960699e-05, "loss": 0.275, "step": 12755 }, { "epoch": 0.27647173531514746, "grad_norm": 1.0109232664108276, "learning_rate": 1.645925311456539e-05, "loss": 0.4585, "step": 12760 }, { "epoch": 0.2765800706346283, "grad_norm": 1.4460506439208984, "learning_rate": 1.6456654541316082e-05, "loss": 0.3373, "step": 12765 }, { "epoch": 0.27668840595410915, "grad_norm": 1.754495620727539, "learning_rate": 1.6454055220160064e-05, "loss": 0.2311, "step": 12770 }, { "epoch": 0.27679674127359, "grad_norm": 1.4287666082382202, "learning_rate": 1.645145515139843e-05, "loss": 0.3516, "step": 12775 }, { "epoch": 0.2769050765930709, "grad_norm": 1.1659091711044312, "learning_rate": 1.644885433533236e-05, "loss": 0.3435, "step": 12780 }, { "epoch": 0.27701341191255174, "grad_norm": 1.2961349487304688, "learning_rate": 1.6446252772263117e-05, "loss": 0.2831, "step": 12785 }, { "epoch": 0.2771217472320326, "grad_norm": 1.7560404539108276, "learning_rate": 1.6443650462492054e-05, "loss": 0.341, "step": 12790 }, { "epoch": 0.27723008255151343, "grad_norm": 2.964478015899658, "learning_rate": 1.6441047406320616e-05, "loss": 0.3882, "step": 12795 }, { "epoch": 0.2773384178709943, "grad_norm": 2.5371510982513428, "learning_rate": 1.6438443604050324e-05, "loss": 0.3387, "step": 12800 }, { "epoch": 0.2774467531904752, "grad_norm": 1.4885625839233398, "learning_rate": 1.6435839055982782e-05, "loss": 0.3324, "step": 12805 }, { "epoch": 0.277555088509956, "grad_norm": 1.9120506048202515, "learning_rate": 1.64332337624197e-05, "loss": 0.235, "step": 12810 }, { "epoch": 0.2776634238294369, "grad_norm": 1.1139330863952637, "learning_rate": 1.643062772366285e-05, "loss": 0.3934, "step": 12815 }, { "epoch": 0.2777717591489177, "grad_norm": 1.4957424402236938, "learning_rate": 1.6428020940014113e-05, "loss": 0.2512, "step": 12820 }, { "epoch": 0.27788009446839856, "grad_norm": 1.2014284133911133, "learning_rate": 1.642541341177544e-05, "loss": 0.3898, "step": 12825 }, { "epoch": 0.27798842978787947, "grad_norm": 1.0179508924484253, "learning_rate": 1.6422805139248878e-05, "loss": 0.316, "step": 12830 }, { "epoch": 0.2780967651073603, "grad_norm": 1.5537687540054321, "learning_rate": 1.6420196122736553e-05, "loss": 0.3903, "step": 12835 }, { "epoch": 0.27820510042684116, "grad_norm": 1.8018238544464111, "learning_rate": 1.6417586362540684e-05, "loss": 0.3758, "step": 12840 }, { "epoch": 0.278313435746322, "grad_norm": 2.1704275608062744, "learning_rate": 1.641497585896357e-05, "loss": 0.3028, "step": 12845 }, { "epoch": 0.27842177106580285, "grad_norm": 1.0451488494873047, "learning_rate": 1.6412364612307596e-05, "loss": 0.3196, "step": 12850 }, { "epoch": 0.27853010638528375, "grad_norm": 1.2966328859329224, "learning_rate": 1.6409752622875238e-05, "loss": 0.2983, "step": 12855 }, { "epoch": 0.2786384417047646, "grad_norm": 1.3428422212600708, "learning_rate": 1.6407139890969062e-05, "loss": 0.3495, "step": 12860 }, { "epoch": 0.27874677702424544, "grad_norm": 1.7308050394058228, "learning_rate": 1.6404526416891707e-05, "loss": 0.4065, "step": 12865 }, { "epoch": 0.2788551123437263, "grad_norm": 1.5788806676864624, "learning_rate": 1.6401912200945904e-05, "loss": 0.4098, "step": 12870 }, { "epoch": 0.27896344766320713, "grad_norm": 1.454671025276184, "learning_rate": 1.6399297243434476e-05, "loss": 0.4074, "step": 12875 }, { "epoch": 0.27907178298268803, "grad_norm": 1.65697181224823, "learning_rate": 1.639668154466032e-05, "loss": 0.2962, "step": 12880 }, { "epoch": 0.2791801183021689, "grad_norm": 1.7704681158065796, "learning_rate": 1.6394065104926434e-05, "loss": 0.4143, "step": 12885 }, { "epoch": 0.2792884536216497, "grad_norm": 1.2742605209350586, "learning_rate": 1.6391447924535885e-05, "loss": 0.2878, "step": 12890 }, { "epoch": 0.27939678894113057, "grad_norm": 1.2138772010803223, "learning_rate": 1.638883000379184e-05, "loss": 0.3582, "step": 12895 }, { "epoch": 0.27950512426061147, "grad_norm": 2.3789072036743164, "learning_rate": 1.638621134299754e-05, "loss": 0.3189, "step": 12900 }, { "epoch": 0.2796134595800923, "grad_norm": 1.6798388957977295, "learning_rate": 1.6383591942456327e-05, "loss": 0.2371, "step": 12905 }, { "epoch": 0.27972179489957316, "grad_norm": 1.280508279800415, "learning_rate": 1.638097180247161e-05, "loss": 0.2951, "step": 12910 }, { "epoch": 0.279830130219054, "grad_norm": 1.260433554649353, "learning_rate": 1.6378350923346898e-05, "loss": 0.3371, "step": 12915 }, { "epoch": 0.27993846553853485, "grad_norm": 1.2352240085601807, "learning_rate": 1.6375729305385778e-05, "loss": 0.3146, "step": 12920 }, { "epoch": 0.28004680085801575, "grad_norm": 1.5298428535461426, "learning_rate": 1.637310694889193e-05, "loss": 0.4207, "step": 12925 }, { "epoch": 0.2801551361774966, "grad_norm": 1.2315592765808105, "learning_rate": 1.6370483854169107e-05, "loss": 0.3617, "step": 12930 }, { "epoch": 0.28026347149697745, "grad_norm": 1.74832022190094, "learning_rate": 1.636786002152116e-05, "loss": 0.2377, "step": 12935 }, { "epoch": 0.2803718068164583, "grad_norm": 1.2903679609298706, "learning_rate": 1.6365235451252018e-05, "loss": 0.2722, "step": 12940 }, { "epoch": 0.28048014213593914, "grad_norm": 1.5354608297348022, "learning_rate": 1.6362610143665704e-05, "loss": 0.2752, "step": 12945 }, { "epoch": 0.28058847745542004, "grad_norm": 1.5168089866638184, "learning_rate": 1.6359984099066316e-05, "loss": 0.4237, "step": 12950 }, { "epoch": 0.2806968127749009, "grad_norm": 0.8546538352966309, "learning_rate": 1.6357357317758043e-05, "loss": 0.4136, "step": 12955 }, { "epoch": 0.28080514809438173, "grad_norm": 1.8368245363235474, "learning_rate": 1.6354729800045157e-05, "loss": 0.3417, "step": 12960 }, { "epoch": 0.2809134834138626, "grad_norm": 1.5482358932495117, "learning_rate": 1.6352101546232017e-05, "loss": 0.3284, "step": 12965 }, { "epoch": 0.2810218187333434, "grad_norm": 1.4207196235656738, "learning_rate": 1.634947255662307e-05, "loss": 0.3443, "step": 12970 }, { "epoch": 0.2811301540528243, "grad_norm": 1.5349045991897583, "learning_rate": 1.634684283152284e-05, "loss": 0.2858, "step": 12975 }, { "epoch": 0.28123848937230517, "grad_norm": 1.212644100189209, "learning_rate": 1.6344212371235945e-05, "loss": 0.2746, "step": 12980 }, { "epoch": 0.281346824691786, "grad_norm": 1.6706863641738892, "learning_rate": 1.634158117606708e-05, "loss": 0.4008, "step": 12985 }, { "epoch": 0.28145516001126686, "grad_norm": 1.304299235343933, "learning_rate": 1.6338949246321038e-05, "loss": 0.3455, "step": 12990 }, { "epoch": 0.2815634953307477, "grad_norm": 1.3347814083099365, "learning_rate": 1.6336316582302678e-05, "loss": 0.3507, "step": 12995 }, { "epoch": 0.2816718306502286, "grad_norm": 1.6360695362091064, "learning_rate": 1.633368318431696e-05, "loss": 0.2282, "step": 13000 }, { "epoch": 0.28178016596970945, "grad_norm": 1.152800440788269, "learning_rate": 1.633104905266893e-05, "loss": 0.2368, "step": 13005 }, { "epoch": 0.2818885012891903, "grad_norm": 2.359750986099243, "learning_rate": 1.6328414187663703e-05, "loss": 0.3011, "step": 13010 }, { "epoch": 0.28199683660867114, "grad_norm": 1.4113036394119263, "learning_rate": 1.632577858960649e-05, "loss": 0.3487, "step": 13015 }, { "epoch": 0.28210517192815204, "grad_norm": 0.944722056388855, "learning_rate": 1.6323142258802587e-05, "loss": 0.2517, "step": 13020 }, { "epoch": 0.2822135072476329, "grad_norm": 1.7447872161865234, "learning_rate": 1.6320505195557374e-05, "loss": 0.3773, "step": 13025 }, { "epoch": 0.28232184256711373, "grad_norm": 1.5991405248641968, "learning_rate": 1.6317867400176316e-05, "loss": 0.3594, "step": 13030 }, { "epoch": 0.2824301778865946, "grad_norm": 1.5711708068847656, "learning_rate": 1.6315228872964962e-05, "loss": 0.3924, "step": 13035 }, { "epoch": 0.2825385132060754, "grad_norm": 1.7626612186431885, "learning_rate": 1.6312589614228947e-05, "loss": 0.3047, "step": 13040 }, { "epoch": 0.2826468485255563, "grad_norm": 2.281799554824829, "learning_rate": 1.6309949624273987e-05, "loss": 0.3568, "step": 13045 }, { "epoch": 0.2827551838450372, "grad_norm": 1.640634536743164, "learning_rate": 1.6307308903405884e-05, "loss": 0.4047, "step": 13050 }, { "epoch": 0.282863519164518, "grad_norm": 1.3108195066452026, "learning_rate": 1.630466745193053e-05, "loss": 0.304, "step": 13055 }, { "epoch": 0.28297185448399886, "grad_norm": 1.3858261108398438, "learning_rate": 1.6302025270153894e-05, "loss": 0.2917, "step": 13060 }, { "epoch": 0.2830801898034797, "grad_norm": 1.477307915687561, "learning_rate": 1.6299382358382038e-05, "loss": 0.4276, "step": 13065 }, { "epoch": 0.2831885251229606, "grad_norm": 1.2012488842010498, "learning_rate": 1.6296738716921097e-05, "loss": 0.3484, "step": 13070 }, { "epoch": 0.28329686044244146, "grad_norm": 1.912410020828247, "learning_rate": 1.6294094346077304e-05, "loss": 0.2795, "step": 13075 }, { "epoch": 0.2834051957619223, "grad_norm": 1.578093409538269, "learning_rate": 1.629144924615696e-05, "loss": 0.2827, "step": 13080 }, { "epoch": 0.28351353108140315, "grad_norm": 1.6715025901794434, "learning_rate": 1.6288803417466474e-05, "loss": 0.3612, "step": 13085 }, { "epoch": 0.283621866400884, "grad_norm": 1.7376196384429932, "learning_rate": 1.6286156860312316e-05, "loss": 0.2995, "step": 13090 }, { "epoch": 0.2837302017203649, "grad_norm": 1.554911494255066, "learning_rate": 1.628350957500105e-05, "loss": 0.4894, "step": 13095 }, { "epoch": 0.28383853703984574, "grad_norm": 2.0092906951904297, "learning_rate": 1.628086156183933e-05, "loss": 0.4179, "step": 13100 }, { "epoch": 0.2839468723593266, "grad_norm": 1.497218370437622, "learning_rate": 1.6278212821133884e-05, "loss": 0.2857, "step": 13105 }, { "epoch": 0.28405520767880743, "grad_norm": 1.2141188383102417, "learning_rate": 1.6275563353191532e-05, "loss": 0.3158, "step": 13110 }, { "epoch": 0.2841635429982883, "grad_norm": 1.4914137125015259, "learning_rate": 1.627291315831917e-05, "loss": 0.2835, "step": 13115 }, { "epoch": 0.2842718783177692, "grad_norm": 1.6903616189956665, "learning_rate": 1.6270262236823787e-05, "loss": 0.277, "step": 13120 }, { "epoch": 0.28438021363725, "grad_norm": 1.3298099040985107, "learning_rate": 1.6267610589012455e-05, "loss": 0.3726, "step": 13125 }, { "epoch": 0.28448854895673087, "grad_norm": 1.586503028869629, "learning_rate": 1.6264958215192325e-05, "loss": 0.3249, "step": 13130 }, { "epoch": 0.2845968842762117, "grad_norm": 1.1995258331298828, "learning_rate": 1.6262305115670635e-05, "loss": 0.3483, "step": 13135 }, { "epoch": 0.28470521959569256, "grad_norm": 1.263697624206543, "learning_rate": 1.6259651290754703e-05, "loss": 0.2491, "step": 13140 }, { "epoch": 0.28481355491517346, "grad_norm": 1.482848882675171, "learning_rate": 1.625699674075194e-05, "loss": 0.2853, "step": 13145 }, { "epoch": 0.2849218902346543, "grad_norm": 1.7423473596572876, "learning_rate": 1.625434146596984e-05, "loss": 0.3618, "step": 13150 }, { "epoch": 0.28503022555413515, "grad_norm": 1.0145931243896484, "learning_rate": 1.6251685466715965e-05, "loss": 0.2893, "step": 13155 }, { "epoch": 0.285138560873616, "grad_norm": 1.6342865228652954, "learning_rate": 1.624902874329798e-05, "loss": 0.2933, "step": 13160 }, { "epoch": 0.2852468961930969, "grad_norm": 2.0874578952789307, "learning_rate": 1.6246371296023627e-05, "loss": 0.2638, "step": 13165 }, { "epoch": 0.28535523151257774, "grad_norm": 1.5372570753097534, "learning_rate": 1.624371312520073e-05, "loss": 0.2178, "step": 13170 }, { "epoch": 0.2854635668320586, "grad_norm": 1.8171154260635376, "learning_rate": 1.6241054231137196e-05, "loss": 0.3175, "step": 13175 }, { "epoch": 0.28557190215153944, "grad_norm": 1.7279729843139648, "learning_rate": 1.6238394614141022e-05, "loss": 0.3234, "step": 13180 }, { "epoch": 0.2856802374710203, "grad_norm": 1.1464921236038208, "learning_rate": 1.6235734274520282e-05, "loss": 0.3322, "step": 13185 }, { "epoch": 0.2857885727905012, "grad_norm": 1.8393841981887817, "learning_rate": 1.623307321258314e-05, "loss": 0.3013, "step": 13190 }, { "epoch": 0.28589690810998203, "grad_norm": 1.3612854480743408, "learning_rate": 1.6230411428637834e-05, "loss": 0.3469, "step": 13195 }, { "epoch": 0.2860052434294629, "grad_norm": 1.3495360612869263, "learning_rate": 1.62277489229927e-05, "loss": 0.2777, "step": 13200 }, { "epoch": 0.2861135787489437, "grad_norm": 1.549973726272583, "learning_rate": 1.622508569595614e-05, "loss": 0.2697, "step": 13205 }, { "epoch": 0.28622191406842457, "grad_norm": 1.7131447792053223, "learning_rate": 1.6222421747836658e-05, "loss": 0.3302, "step": 13210 }, { "epoch": 0.28633024938790547, "grad_norm": 1.729315161705017, "learning_rate": 1.6219757078942826e-05, "loss": 0.2987, "step": 13215 }, { "epoch": 0.2864385847073863, "grad_norm": 1.0591764450073242, "learning_rate": 1.6217091689583312e-05, "loss": 0.2556, "step": 13220 }, { "epoch": 0.28654692002686716, "grad_norm": 1.162454605102539, "learning_rate": 1.621442558006685e-05, "loss": 0.3835, "step": 13225 }, { "epoch": 0.286655255346348, "grad_norm": 1.2149009704589844, "learning_rate": 1.6211758750702284e-05, "loss": 0.3954, "step": 13230 }, { "epoch": 0.28676359066582885, "grad_norm": 1.2520877122879028, "learning_rate": 1.6209091201798518e-05, "loss": 0.1449, "step": 13235 }, { "epoch": 0.28687192598530975, "grad_norm": 1.543190360069275, "learning_rate": 1.6206422933664544e-05, "loss": 0.3804, "step": 13240 }, { "epoch": 0.2869802613047906, "grad_norm": 1.2643312215805054, "learning_rate": 1.620375394660945e-05, "loss": 0.3306, "step": 13245 }, { "epoch": 0.28708859662427144, "grad_norm": 1.493701457977295, "learning_rate": 1.6201084240942394e-05, "loss": 0.3843, "step": 13250 }, { "epoch": 0.2871969319437523, "grad_norm": 1.234082579612732, "learning_rate": 1.6198413816972618e-05, "loss": 0.4119, "step": 13255 }, { "epoch": 0.28730526726323313, "grad_norm": 0.9710360765457153, "learning_rate": 1.6195742675009456e-05, "loss": 0.2451, "step": 13260 }, { "epoch": 0.28741360258271403, "grad_norm": 1.3056120872497559, "learning_rate": 1.619307081536231e-05, "loss": 0.3531, "step": 13265 }, { "epoch": 0.2875219379021949, "grad_norm": 1.3095186948776245, "learning_rate": 1.6190398238340693e-05, "loss": 0.3349, "step": 13270 }, { "epoch": 0.2876302732216757, "grad_norm": 1.4331997632980347, "learning_rate": 1.6187724944254166e-05, "loss": 0.4478, "step": 13275 }, { "epoch": 0.28773860854115657, "grad_norm": 1.63703453540802, "learning_rate": 1.61850509334124e-05, "loss": 0.2293, "step": 13280 }, { "epoch": 0.28784694386063747, "grad_norm": 1.2566864490509033, "learning_rate": 1.618237620612513e-05, "loss": 0.2313, "step": 13285 }, { "epoch": 0.2879552791801183, "grad_norm": 1.2417309284210205, "learning_rate": 1.617970076270219e-05, "loss": 0.3296, "step": 13290 }, { "epoch": 0.28806361449959916, "grad_norm": 1.9112776517868042, "learning_rate": 1.6177024603453492e-05, "loss": 0.3422, "step": 13295 }, { "epoch": 0.28817194981908, "grad_norm": 1.6013529300689697, "learning_rate": 1.6174347728689025e-05, "loss": 0.3546, "step": 13300 }, { "epoch": 0.28828028513856085, "grad_norm": 1.9358822107315063, "learning_rate": 1.617167013871886e-05, "loss": 0.3048, "step": 13305 }, { "epoch": 0.28838862045804176, "grad_norm": 1.7230286598205566, "learning_rate": 1.6168991833853168e-05, "loss": 0.3295, "step": 13310 }, { "epoch": 0.2884969557775226, "grad_norm": 1.7290083169937134, "learning_rate": 1.616631281440218e-05, "loss": 0.3934, "step": 13315 }, { "epoch": 0.28860529109700345, "grad_norm": 2.0495641231536865, "learning_rate": 1.6163633080676225e-05, "loss": 0.3913, "step": 13320 }, { "epoch": 0.2887136264164843, "grad_norm": 2.292652130126953, "learning_rate": 1.6160952632985708e-05, "loss": 0.347, "step": 13325 }, { "epoch": 0.28882196173596514, "grad_norm": 1.9485598802566528, "learning_rate": 1.615827147164112e-05, "loss": 0.3465, "step": 13330 }, { "epoch": 0.28893029705544604, "grad_norm": 1.371490716934204, "learning_rate": 1.615558959695303e-05, "loss": 0.3852, "step": 13335 }, { "epoch": 0.2890386323749269, "grad_norm": 1.9900381565093994, "learning_rate": 1.61529070092321e-05, "loss": 0.3651, "step": 13340 }, { "epoch": 0.28914696769440773, "grad_norm": 1.891823649406433, "learning_rate": 1.6150223708789062e-05, "loss": 0.3454, "step": 13345 }, { "epoch": 0.2892553030138886, "grad_norm": 1.8066450357437134, "learning_rate": 1.614753969593474e-05, "loss": 0.2751, "step": 13350 }, { "epoch": 0.2893636383333694, "grad_norm": 1.7275522947311401, "learning_rate": 1.614485497098003e-05, "loss": 0.302, "step": 13355 }, { "epoch": 0.2894719736528503, "grad_norm": 1.3501509428024292, "learning_rate": 1.6142169534235922e-05, "loss": 0.461, "step": 13360 }, { "epoch": 0.28958030897233117, "grad_norm": 1.659726619720459, "learning_rate": 1.6139483386013487e-05, "loss": 0.3441, "step": 13365 }, { "epoch": 0.289688644291812, "grad_norm": 1.8146802186965942, "learning_rate": 1.6136796526623867e-05, "loss": 0.3746, "step": 13370 }, { "epoch": 0.28979697961129286, "grad_norm": 1.506384015083313, "learning_rate": 1.61341089563783e-05, "loss": 0.2733, "step": 13375 }, { "epoch": 0.2899053149307737, "grad_norm": 1.6491996049880981, "learning_rate": 1.61314206755881e-05, "loss": 0.3728, "step": 13380 }, { "epoch": 0.2900136502502546, "grad_norm": 1.6270290613174438, "learning_rate": 1.6128731684564664e-05, "loss": 0.3222, "step": 13385 }, { "epoch": 0.29012198556973545, "grad_norm": 1.0617536306381226, "learning_rate": 1.612604198361947e-05, "loss": 0.3143, "step": 13390 }, { "epoch": 0.2902303208892163, "grad_norm": 1.1739592552185059, "learning_rate": 1.612335157306408e-05, "loss": 0.371, "step": 13395 }, { "epoch": 0.29033865620869714, "grad_norm": 0.9858551621437073, "learning_rate": 1.612066045321014e-05, "loss": 0.3377, "step": 13400 }, { "epoch": 0.290446991528178, "grad_norm": 1.959256887435913, "learning_rate": 1.611796862436937e-05, "loss": 0.2668, "step": 13405 }, { "epoch": 0.2905553268476589, "grad_norm": 1.8575801849365234, "learning_rate": 1.6115276086853585e-05, "loss": 0.344, "step": 13410 }, { "epoch": 0.29066366216713974, "grad_norm": 1.3422893285751343, "learning_rate": 1.6112582840974672e-05, "loss": 0.3243, "step": 13415 }, { "epoch": 0.2907719974866206, "grad_norm": 1.0726577043533325, "learning_rate": 1.6109888887044602e-05, "loss": 0.3063, "step": 13420 }, { "epoch": 0.2908803328061014, "grad_norm": 1.8975412845611572, "learning_rate": 1.6107194225375434e-05, "loss": 0.4206, "step": 13425 }, { "epoch": 0.29098866812558233, "grad_norm": 1.4854719638824463, "learning_rate": 1.6104498856279297e-05, "loss": 0.4078, "step": 13430 }, { "epoch": 0.2910970034450632, "grad_norm": 1.7783769369125366, "learning_rate": 1.6101802780068414e-05, "loss": 0.276, "step": 13435 }, { "epoch": 0.291205338764544, "grad_norm": 1.5504601001739502, "learning_rate": 1.6099105997055083e-05, "loss": 0.3298, "step": 13440 }, { "epoch": 0.29131367408402487, "grad_norm": 1.6825021505355835, "learning_rate": 1.609640850755169e-05, "loss": 0.3858, "step": 13445 }, { "epoch": 0.2914220094035057, "grad_norm": 1.7095202207565308, "learning_rate": 1.6093710311870693e-05, "loss": 0.3118, "step": 13450 }, { "epoch": 0.2915303447229866, "grad_norm": 1.1606857776641846, "learning_rate": 1.609101141032464e-05, "loss": 0.4036, "step": 13455 }, { "epoch": 0.29163868004246746, "grad_norm": 2.474874258041382, "learning_rate": 1.6088311803226158e-05, "loss": 0.3674, "step": 13460 }, { "epoch": 0.2917470153619483, "grad_norm": 1.4474800825119019, "learning_rate": 1.6085611490887957e-05, "loss": 0.2728, "step": 13465 }, { "epoch": 0.29185535068142915, "grad_norm": 0.9021762013435364, "learning_rate": 1.608291047362283e-05, "loss": 0.3704, "step": 13470 }, { "epoch": 0.29196368600091, "grad_norm": 1.8383070230484009, "learning_rate": 1.6080208751743646e-05, "loss": 0.3693, "step": 13475 }, { "epoch": 0.2920720213203909, "grad_norm": 1.5249390602111816, "learning_rate": 1.6077506325563354e-05, "loss": 0.2547, "step": 13480 }, { "epoch": 0.29218035663987174, "grad_norm": 1.8408256769180298, "learning_rate": 1.6074803195395e-05, "loss": 0.3353, "step": 13485 }, { "epoch": 0.2922886919593526, "grad_norm": 1.039288878440857, "learning_rate": 1.6072099361551696e-05, "loss": 0.3358, "step": 13490 }, { "epoch": 0.29239702727883343, "grad_norm": 2.0211069583892822, "learning_rate": 1.606939482434664e-05, "loss": 0.3537, "step": 13495 }, { "epoch": 0.2925053625983143, "grad_norm": 1.8483645915985107, "learning_rate": 1.6066689584093117e-05, "loss": 0.3321, "step": 13500 }, { "epoch": 0.2926136979177952, "grad_norm": 1.1897093057632446, "learning_rate": 1.6063983641104477e-05, "loss": 0.1844, "step": 13505 }, { "epoch": 0.292722033237276, "grad_norm": 1.299584150314331, "learning_rate": 1.6061276995694178e-05, "loss": 0.3278, "step": 13510 }, { "epoch": 0.29283036855675687, "grad_norm": 1.29302978515625, "learning_rate": 1.605856964817573e-05, "loss": 0.2675, "step": 13515 }, { "epoch": 0.2929387038762377, "grad_norm": 1.8357330560684204, "learning_rate": 1.6055861598862753e-05, "loss": 0.4523, "step": 13520 }, { "epoch": 0.29304703919571856, "grad_norm": 1.3901219367980957, "learning_rate": 1.6053152848068926e-05, "loss": 0.2762, "step": 13525 }, { "epoch": 0.29315537451519946, "grad_norm": 1.5383070707321167, "learning_rate": 1.6050443396108014e-05, "loss": 0.2624, "step": 13530 }, { "epoch": 0.2932637098346803, "grad_norm": 1.711470127105713, "learning_rate": 1.604773324329387e-05, "loss": 0.3426, "step": 13535 }, { "epoch": 0.29337204515416115, "grad_norm": 1.664194107055664, "learning_rate": 1.6045022389940426e-05, "loss": 0.229, "step": 13540 }, { "epoch": 0.293480380473642, "grad_norm": 1.4728779792785645, "learning_rate": 1.6042310836361692e-05, "loss": 0.353, "step": 13545 }, { "epoch": 0.2935887157931229, "grad_norm": 1.416605830192566, "learning_rate": 1.6039598582871763e-05, "loss": 0.3833, "step": 13550 }, { "epoch": 0.29369705111260375, "grad_norm": 1.5818971395492554, "learning_rate": 1.603688562978481e-05, "loss": 0.3885, "step": 13555 }, { "epoch": 0.2938053864320846, "grad_norm": 1.4999443292617798, "learning_rate": 1.603417197741509e-05, "loss": 0.3278, "step": 13560 }, { "epoch": 0.29391372175156544, "grad_norm": 1.5748004913330078, "learning_rate": 1.6031457626076935e-05, "loss": 0.2822, "step": 13565 }, { "epoch": 0.2940220570710463, "grad_norm": 1.0463306903839111, "learning_rate": 1.602874257608477e-05, "loss": 0.2494, "step": 13570 }, { "epoch": 0.2941303923905272, "grad_norm": 1.6155827045440674, "learning_rate": 1.6026026827753085e-05, "loss": 0.2556, "step": 13575 }, { "epoch": 0.29423872771000803, "grad_norm": 1.518309473991394, "learning_rate": 1.6023310381396463e-05, "loss": 0.2578, "step": 13580 }, { "epoch": 0.2943470630294889, "grad_norm": 1.3194633722305298, "learning_rate": 1.6020593237329563e-05, "loss": 0.3206, "step": 13585 }, { "epoch": 0.2944553983489697, "grad_norm": 1.3518965244293213, "learning_rate": 1.6017875395867126e-05, "loss": 0.2833, "step": 13590 }, { "epoch": 0.29456373366845057, "grad_norm": 1.2715022563934326, "learning_rate": 1.6015156857323972e-05, "loss": 0.3412, "step": 13595 }, { "epoch": 0.29467206898793147, "grad_norm": 1.4181503057479858, "learning_rate": 1.6012437622015e-05, "loss": 0.3642, "step": 13600 }, { "epoch": 0.2947804043074123, "grad_norm": 1.550636887550354, "learning_rate": 1.60097176902552e-05, "loss": 0.3818, "step": 13605 }, { "epoch": 0.29488873962689316, "grad_norm": 1.6887677907943726, "learning_rate": 1.600699706235963e-05, "loss": 0.3583, "step": 13610 }, { "epoch": 0.294997074946374, "grad_norm": 1.5348198413848877, "learning_rate": 1.600427573864343e-05, "loss": 0.2186, "step": 13615 }, { "epoch": 0.29510541026585485, "grad_norm": 1.0933914184570312, "learning_rate": 1.6001553719421837e-05, "loss": 0.2931, "step": 13620 }, { "epoch": 0.29521374558533575, "grad_norm": 1.239607572555542, "learning_rate": 1.5998831005010144e-05, "loss": 0.2858, "step": 13625 }, { "epoch": 0.2953220809048166, "grad_norm": 1.6134425401687622, "learning_rate": 1.5996107595723744e-05, "loss": 0.356, "step": 13630 }, { "epoch": 0.29543041622429744, "grad_norm": 1.808387279510498, "learning_rate": 1.59933834918781e-05, "loss": 0.3632, "step": 13635 }, { "epoch": 0.2955387515437783, "grad_norm": 1.4753845930099487, "learning_rate": 1.5990658693788757e-05, "loss": 0.4079, "step": 13640 }, { "epoch": 0.29564708686325913, "grad_norm": 1.8179206848144531, "learning_rate": 1.598793320177135e-05, "loss": 0.4095, "step": 13645 }, { "epoch": 0.29575542218274004, "grad_norm": 1.6272155046463013, "learning_rate": 1.5985207016141575e-05, "loss": 0.3609, "step": 13650 }, { "epoch": 0.2958637575022209, "grad_norm": 1.4624624252319336, "learning_rate": 1.5982480137215228e-05, "loss": 0.2561, "step": 13655 }, { "epoch": 0.2959720928217017, "grad_norm": 1.242552638053894, "learning_rate": 1.5979752565308174e-05, "loss": 0.4501, "step": 13660 }, { "epoch": 0.29608042814118257, "grad_norm": 1.6367683410644531, "learning_rate": 1.5977024300736363e-05, "loss": 0.2972, "step": 13665 }, { "epoch": 0.2961887634606634, "grad_norm": 2.0039892196655273, "learning_rate": 1.5974295343815823e-05, "loss": 0.2588, "step": 13670 }, { "epoch": 0.2962970987801443, "grad_norm": 1.3204914331436157, "learning_rate": 1.5971565694862664e-05, "loss": 0.246, "step": 13675 }, { "epoch": 0.29640543409962516, "grad_norm": 1.6424264907836914, "learning_rate": 1.596883535419307e-05, "loss": 0.3606, "step": 13680 }, { "epoch": 0.296513769419106, "grad_norm": 1.9653798341751099, "learning_rate": 1.5966104322123313e-05, "loss": 0.4302, "step": 13685 }, { "epoch": 0.29662210473858686, "grad_norm": 1.5361214876174927, "learning_rate": 1.5963372598969744e-05, "loss": 0.3551, "step": 13690 }, { "epoch": 0.29673044005806776, "grad_norm": 1.456540584564209, "learning_rate": 1.596064018504879e-05, "loss": 0.2946, "step": 13695 }, { "epoch": 0.2968387753775486, "grad_norm": 1.686476469039917, "learning_rate": 1.5957907080676962e-05, "loss": 0.3197, "step": 13700 }, { "epoch": 0.29694711069702945, "grad_norm": 1.1420515775680542, "learning_rate": 1.595517328617085e-05, "loss": 0.3341, "step": 13705 }, { "epoch": 0.2970554460165103, "grad_norm": 1.3322856426239014, "learning_rate": 1.5952438801847118e-05, "loss": 0.3696, "step": 13710 }, { "epoch": 0.29716378133599114, "grad_norm": 1.6347002983093262, "learning_rate": 1.594970362802252e-05, "loss": 0.4156, "step": 13715 }, { "epoch": 0.29727211665547204, "grad_norm": 1.5940254926681519, "learning_rate": 1.594696776501388e-05, "loss": 0.2662, "step": 13720 }, { "epoch": 0.2973804519749529, "grad_norm": 1.5922350883483887, "learning_rate": 1.594423121313811e-05, "loss": 0.3475, "step": 13725 }, { "epoch": 0.29748878729443373, "grad_norm": 1.6183782815933228, "learning_rate": 1.5941493972712203e-05, "loss": 0.2528, "step": 13730 }, { "epoch": 0.2975971226139146, "grad_norm": 1.194106101989746, "learning_rate": 1.593875604405322e-05, "loss": 0.2007, "step": 13735 }, { "epoch": 0.2977054579333954, "grad_norm": 1.6226915121078491, "learning_rate": 1.5936017427478315e-05, "loss": 0.2849, "step": 13740 }, { "epoch": 0.2978137932528763, "grad_norm": 1.825257658958435, "learning_rate": 1.593327812330471e-05, "loss": 0.3707, "step": 13745 }, { "epoch": 0.29792212857235717, "grad_norm": 0.9420788884162903, "learning_rate": 1.5930538131849714e-05, "loss": 0.2609, "step": 13750 }, { "epoch": 0.298030463891838, "grad_norm": 1.38413405418396, "learning_rate": 1.5927797453430718e-05, "loss": 0.3027, "step": 13755 }, { "epoch": 0.29813879921131886, "grad_norm": 1.5127328634262085, "learning_rate": 1.592505608836518e-05, "loss": 0.3407, "step": 13760 }, { "epoch": 0.2982471345307997, "grad_norm": 0.9391416907310486, "learning_rate": 1.5922314036970657e-05, "loss": 0.2415, "step": 13765 }, { "epoch": 0.2983554698502806, "grad_norm": 1.6470932960510254, "learning_rate": 1.5919571299564765e-05, "loss": 0.3709, "step": 13770 }, { "epoch": 0.29846380516976145, "grad_norm": 1.7438522577285767, "learning_rate": 1.5916827876465218e-05, "loss": 0.3138, "step": 13775 }, { "epoch": 0.2985721404892423, "grad_norm": 1.0500231981277466, "learning_rate": 1.5914083767989792e-05, "loss": 0.3925, "step": 13780 }, { "epoch": 0.29868047580872314, "grad_norm": 1.5369250774383545, "learning_rate": 1.5911338974456357e-05, "loss": 0.3074, "step": 13785 }, { "epoch": 0.298788811128204, "grad_norm": 1.7381230592727661, "learning_rate": 1.590859349618285e-05, "loss": 0.3654, "step": 13790 }, { "epoch": 0.2988971464476849, "grad_norm": 1.4588080644607544, "learning_rate": 1.59058473334873e-05, "loss": 0.2906, "step": 13795 }, { "epoch": 0.29900548176716574, "grad_norm": 1.2381255626678467, "learning_rate": 1.5903100486687805e-05, "loss": 0.3518, "step": 13800 }, { "epoch": 0.2991138170866466, "grad_norm": 1.2557989358901978, "learning_rate": 1.5900352956102547e-05, "loss": 0.3013, "step": 13805 }, { "epoch": 0.29922215240612743, "grad_norm": 1.3902703523635864, "learning_rate": 1.5897604742049786e-05, "loss": 0.3671, "step": 13810 }, { "epoch": 0.29933048772560833, "grad_norm": 1.3423147201538086, "learning_rate": 1.5894855844847863e-05, "loss": 0.2905, "step": 13815 }, { "epoch": 0.2994388230450892, "grad_norm": 1.2260431051254272, "learning_rate": 1.589210626481519e-05, "loss": 0.2968, "step": 13820 }, { "epoch": 0.29954715836457, "grad_norm": 1.03899347782135, "learning_rate": 1.588935600227028e-05, "loss": 0.3131, "step": 13825 }, { "epoch": 0.29965549368405087, "grad_norm": 1.471488118171692, "learning_rate": 1.5886605057531692e-05, "loss": 0.3887, "step": 13830 }, { "epoch": 0.2997638290035317, "grad_norm": 1.2767741680145264, "learning_rate": 1.5883853430918095e-05, "loss": 0.2813, "step": 13835 }, { "epoch": 0.2998721643230126, "grad_norm": 2.108276844024658, "learning_rate": 1.588110112274821e-05, "loss": 0.2591, "step": 13840 }, { "epoch": 0.29998049964249346, "grad_norm": 1.6783605813980103, "learning_rate": 1.5878348133340863e-05, "loss": 0.4985, "step": 13845 }, { "epoch": 0.3000888349619743, "grad_norm": 1.2595211267471313, "learning_rate": 1.5875594463014946e-05, "loss": 0.2733, "step": 13850 }, { "epoch": 0.30019717028145515, "grad_norm": 0.9857636094093323, "learning_rate": 1.5872840112089423e-05, "loss": 0.3491, "step": 13855 }, { "epoch": 0.300305505600936, "grad_norm": 1.615783929824829, "learning_rate": 1.587008508088335e-05, "loss": 0.4493, "step": 13860 }, { "epoch": 0.3004138409204169, "grad_norm": 1.712027668952942, "learning_rate": 1.586732936971585e-05, "loss": 0.2204, "step": 13865 }, { "epoch": 0.30052217623989774, "grad_norm": 1.404159426689148, "learning_rate": 1.5864572978906142e-05, "loss": 0.2445, "step": 13870 }, { "epoch": 0.3006305115593786, "grad_norm": 1.6699641942977905, "learning_rate": 1.5861815908773503e-05, "loss": 0.3314, "step": 13875 }, { "epoch": 0.30073884687885943, "grad_norm": 1.7160403728485107, "learning_rate": 1.5859058159637298e-05, "loss": 0.3364, "step": 13880 }, { "epoch": 0.3008471821983403, "grad_norm": 1.4760265350341797, "learning_rate": 1.5856299731816974e-05, "loss": 0.3748, "step": 13885 }, { "epoch": 0.3009555175178212, "grad_norm": 1.813386082649231, "learning_rate": 1.5853540625632056e-05, "loss": 0.2319, "step": 13890 }, { "epoch": 0.301063852837302, "grad_norm": 1.96198308467865, "learning_rate": 1.5850780841402143e-05, "loss": 0.3763, "step": 13895 }, { "epoch": 0.30117218815678287, "grad_norm": 1.5917882919311523, "learning_rate": 1.5848020379446914e-05, "loss": 0.3282, "step": 13900 }, { "epoch": 0.3012805234762637, "grad_norm": 1.641971468925476, "learning_rate": 1.5845259240086126e-05, "loss": 0.2832, "step": 13905 }, { "epoch": 0.30138885879574456, "grad_norm": 1.433504581451416, "learning_rate": 1.5842497423639617e-05, "loss": 0.2849, "step": 13910 }, { "epoch": 0.30149719411522546, "grad_norm": 1.272992730140686, "learning_rate": 1.58397349304273e-05, "loss": 0.3466, "step": 13915 }, { "epoch": 0.3016055294347063, "grad_norm": 1.862733244895935, "learning_rate": 1.5836971760769176e-05, "loss": 0.3566, "step": 13920 }, { "epoch": 0.30171386475418716, "grad_norm": 1.769449234008789, "learning_rate": 1.5834207914985306e-05, "loss": 0.3415, "step": 13925 }, { "epoch": 0.301822200073668, "grad_norm": 1.5034351348876953, "learning_rate": 1.583144339339585e-05, "loss": 0.3235, "step": 13930 }, { "epoch": 0.30193053539314885, "grad_norm": 0.6020963191986084, "learning_rate": 1.582867819632103e-05, "loss": 0.27, "step": 13935 }, { "epoch": 0.30203887071262975, "grad_norm": 1.8427633047103882, "learning_rate": 1.5825912324081155e-05, "loss": 0.2124, "step": 13940 }, { "epoch": 0.3021472060321106, "grad_norm": 1.3103529214859009, "learning_rate": 1.5823145776996608e-05, "loss": 0.2886, "step": 13945 }, { "epoch": 0.30225554135159144, "grad_norm": 2.292100191116333, "learning_rate": 1.5820378555387853e-05, "loss": 0.4031, "step": 13950 }, { "epoch": 0.3023638766710723, "grad_norm": 1.5781266689300537, "learning_rate": 1.5817610659575435e-05, "loss": 0.2506, "step": 13955 }, { "epoch": 0.3024722119905532, "grad_norm": 1.6221071481704712, "learning_rate": 1.5814842089879965e-05, "loss": 0.3885, "step": 13960 }, { "epoch": 0.30258054731003403, "grad_norm": 1.1384891271591187, "learning_rate": 1.5812072846622147e-05, "loss": 0.3918, "step": 13965 }, { "epoch": 0.3026888826295149, "grad_norm": 1.2891302108764648, "learning_rate": 1.580930293012276e-05, "loss": 0.2688, "step": 13970 }, { "epoch": 0.3027972179489957, "grad_norm": 1.8433014154434204, "learning_rate": 1.5806532340702645e-05, "loss": 0.3208, "step": 13975 }, { "epoch": 0.30290555326847657, "grad_norm": 1.6305010318756104, "learning_rate": 1.5803761078682743e-05, "loss": 0.2443, "step": 13980 }, { "epoch": 0.30301388858795747, "grad_norm": 1.5948774814605713, "learning_rate": 1.580098914438406e-05, "loss": 0.2623, "step": 13985 }, { "epoch": 0.3031222239074383, "grad_norm": 1.375638484954834, "learning_rate": 1.5798216538127683e-05, "loss": 0.2761, "step": 13990 }, { "epoch": 0.30323055922691916, "grad_norm": 1.179908275604248, "learning_rate": 1.5795443260234778e-05, "loss": 0.275, "step": 13995 }, { "epoch": 0.3033388945464, "grad_norm": 1.5736111402511597, "learning_rate": 1.5792669311026586e-05, "loss": 0.4235, "step": 14000 }, { "epoch": 0.30344722986588085, "grad_norm": 1.551702618598938, "learning_rate": 1.5789894690824432e-05, "loss": 0.2878, "step": 14005 }, { "epoch": 0.30355556518536175, "grad_norm": 1.5427343845367432, "learning_rate": 1.5787119399949705e-05, "loss": 0.3747, "step": 14010 }, { "epoch": 0.3036639005048426, "grad_norm": 2.1646385192871094, "learning_rate": 1.578434343872389e-05, "loss": 0.289, "step": 14015 }, { "epoch": 0.30377223582432344, "grad_norm": 1.5955830812454224, "learning_rate": 1.5781566807468538e-05, "loss": 0.3371, "step": 14020 }, { "epoch": 0.3038805711438043, "grad_norm": 1.58433198928833, "learning_rate": 1.5778789506505277e-05, "loss": 0.3171, "step": 14025 }, { "epoch": 0.30398890646328514, "grad_norm": 1.6299382448196411, "learning_rate": 1.577601153615582e-05, "loss": 0.3001, "step": 14030 }, { "epoch": 0.30409724178276604, "grad_norm": 1.092578411102295, "learning_rate": 1.5773232896741947e-05, "loss": 0.3183, "step": 14035 }, { "epoch": 0.3042055771022469, "grad_norm": 1.121797800064087, "learning_rate": 1.577045358858553e-05, "loss": 0.2987, "step": 14040 }, { "epoch": 0.30431391242172773, "grad_norm": 1.3886842727661133, "learning_rate": 1.5767673612008505e-05, "loss": 0.3301, "step": 14045 }, { "epoch": 0.3044222477412086, "grad_norm": 2.0619094371795654, "learning_rate": 1.5764892967332893e-05, "loss": 0.2782, "step": 14050 }, { "epoch": 0.3045305830606894, "grad_norm": 1.4745014905929565, "learning_rate": 1.576211165488079e-05, "loss": 0.2999, "step": 14055 }, { "epoch": 0.3046389183801703, "grad_norm": 1.2743206024169922, "learning_rate": 1.5759329674974365e-05, "loss": 0.2472, "step": 14060 }, { "epoch": 0.30474725369965117, "grad_norm": 1.9834903478622437, "learning_rate": 1.575654702793587e-05, "loss": 0.3638, "step": 14065 }, { "epoch": 0.304855589019132, "grad_norm": 2.663600206375122, "learning_rate": 1.5753763714087637e-05, "loss": 0.3165, "step": 14070 }, { "epoch": 0.30496392433861286, "grad_norm": 1.7973177433013916, "learning_rate": 1.5750979733752073e-05, "loss": 0.2634, "step": 14075 }, { "epoch": 0.30507225965809376, "grad_norm": 1.8946876525878906, "learning_rate": 1.574819508725165e-05, "loss": 0.3644, "step": 14080 }, { "epoch": 0.3051805949775746, "grad_norm": 1.147887945175171, "learning_rate": 1.574540977490894e-05, "loss": 0.3825, "step": 14085 }, { "epoch": 0.30528893029705545, "grad_norm": 1.3223589658737183, "learning_rate": 1.574262379704657e-05, "loss": 0.3274, "step": 14090 }, { "epoch": 0.3053972656165363, "grad_norm": 2.1197454929351807, "learning_rate": 1.573983715398726e-05, "loss": 0.243, "step": 14095 }, { "epoch": 0.30550560093601714, "grad_norm": 2.1140317916870117, "learning_rate": 1.5737049846053797e-05, "loss": 0.3464, "step": 14100 }, { "epoch": 0.30561393625549804, "grad_norm": 1.6760263442993164, "learning_rate": 1.573426187356905e-05, "loss": 0.3009, "step": 14105 }, { "epoch": 0.3057222715749789, "grad_norm": 1.4643124341964722, "learning_rate": 1.573147323685596e-05, "loss": 0.4155, "step": 14110 }, { "epoch": 0.30583060689445973, "grad_norm": 1.690507173538208, "learning_rate": 1.5728683936237562e-05, "loss": 0.2965, "step": 14115 }, { "epoch": 0.3059389422139406, "grad_norm": 1.5641616582870483, "learning_rate": 1.5725893972036944e-05, "loss": 0.3647, "step": 14120 }, { "epoch": 0.3060472775334214, "grad_norm": 1.1845098733901978, "learning_rate": 1.572310334457728e-05, "loss": 0.3075, "step": 14125 }, { "epoch": 0.3061556128529023, "grad_norm": 1.854921817779541, "learning_rate": 1.5720312054181827e-05, "loss": 0.3739, "step": 14130 }, { "epoch": 0.30626394817238317, "grad_norm": 1.0947531461715698, "learning_rate": 1.571752010117391e-05, "loss": 0.2749, "step": 14135 }, { "epoch": 0.306372283491864, "grad_norm": 1.3211854696273804, "learning_rate": 1.571472748587694e-05, "loss": 0.2648, "step": 14140 }, { "epoch": 0.30648061881134486, "grad_norm": 1.2247083187103271, "learning_rate": 1.5711934208614397e-05, "loss": 0.3278, "step": 14145 }, { "epoch": 0.3065889541308257, "grad_norm": 1.2061439752578735, "learning_rate": 1.570914026970984e-05, "loss": 0.2656, "step": 14150 }, { "epoch": 0.3066972894503066, "grad_norm": 0.6577515006065369, "learning_rate": 1.5706345669486905e-05, "loss": 0.2891, "step": 14155 }, { "epoch": 0.30680562476978746, "grad_norm": 1.8016235828399658, "learning_rate": 1.570355040826931e-05, "loss": 0.342, "step": 14160 }, { "epoch": 0.3069139600892683, "grad_norm": 2.485938310623169, "learning_rate": 1.5700754486380834e-05, "loss": 0.4616, "step": 14165 }, { "epoch": 0.30702229540874915, "grad_norm": 1.5824874639511108, "learning_rate": 1.569795790414535e-05, "loss": 0.364, "step": 14170 }, { "epoch": 0.30713063072823, "grad_norm": 1.1944680213928223, "learning_rate": 1.56951606618868e-05, "loss": 0.3532, "step": 14175 }, { "epoch": 0.3072389660477109, "grad_norm": 1.3329129219055176, "learning_rate": 1.5692362759929197e-05, "loss": 0.3618, "step": 14180 }, { "epoch": 0.30734730136719174, "grad_norm": 1.2309519052505493, "learning_rate": 1.5689564198596644e-05, "loss": 0.4024, "step": 14185 }, { "epoch": 0.3074556366866726, "grad_norm": 1.5451486110687256, "learning_rate": 1.5686764978213304e-05, "loss": 0.3187, "step": 14190 }, { "epoch": 0.30756397200615343, "grad_norm": 1.3339427709579468, "learning_rate": 1.5683965099103433e-05, "loss": 0.425, "step": 14195 }, { "epoch": 0.30767230732563433, "grad_norm": 1.2120065689086914, "learning_rate": 1.5681164561591348e-05, "loss": 0.2015, "step": 14200 }, { "epoch": 0.3077806426451152, "grad_norm": 0.9704610705375671, "learning_rate": 1.5678363366001453e-05, "loss": 0.3052, "step": 14205 }, { "epoch": 0.307888977964596, "grad_norm": 1.9053208827972412, "learning_rate": 1.5675561512658227e-05, "loss": 0.2791, "step": 14210 }, { "epoch": 0.30799731328407687, "grad_norm": 1.4755910634994507, "learning_rate": 1.567275900188622e-05, "loss": 0.3447, "step": 14215 }, { "epoch": 0.3081056486035577, "grad_norm": 1.403795838356018, "learning_rate": 1.5669955834010057e-05, "loss": 0.3233, "step": 14220 }, { "epoch": 0.3082139839230386, "grad_norm": 1.3758621215820312, "learning_rate": 1.5667152009354446e-05, "loss": 0.347, "step": 14225 }, { "epoch": 0.30832231924251946, "grad_norm": 2.1264383792877197, "learning_rate": 1.566434752824417e-05, "loss": 0.3897, "step": 14230 }, { "epoch": 0.3084306545620003, "grad_norm": 1.2482739686965942, "learning_rate": 1.5661542391004087e-05, "loss": 0.2889, "step": 14235 }, { "epoch": 0.30853898988148115, "grad_norm": 1.4720911979675293, "learning_rate": 1.5658736597959126e-05, "loss": 0.2748, "step": 14240 }, { "epoch": 0.308647325200962, "grad_norm": 1.5405199527740479, "learning_rate": 1.5655930149434294e-05, "loss": 0.3137, "step": 14245 }, { "epoch": 0.3087556605204429, "grad_norm": 1.4565303325653076, "learning_rate": 1.5653123045754684e-05, "loss": 0.314, "step": 14250 }, { "epoch": 0.30886399583992374, "grad_norm": 2.2756450176239014, "learning_rate": 1.5650315287245453e-05, "loss": 0.3167, "step": 14255 }, { "epoch": 0.3089723311594046, "grad_norm": 1.2262141704559326, "learning_rate": 1.5647506874231838e-05, "loss": 0.2463, "step": 14260 }, { "epoch": 0.30908066647888544, "grad_norm": 1.4781830310821533, "learning_rate": 1.5644697807039153e-05, "loss": 0.2536, "step": 14265 }, { "epoch": 0.3091890017983663, "grad_norm": 1.4072787761688232, "learning_rate": 1.564188808599278e-05, "loss": 0.3432, "step": 14270 }, { "epoch": 0.3092973371178472, "grad_norm": 1.2474066019058228, "learning_rate": 1.563907771141819e-05, "loss": 0.2726, "step": 14275 }, { "epoch": 0.309405672437328, "grad_norm": 2.6665666103363037, "learning_rate": 1.563626668364092e-05, "loss": 0.3434, "step": 14280 }, { "epoch": 0.3095140077568089, "grad_norm": 1.5844535827636719, "learning_rate": 1.563345500298659e-05, "loss": 0.3154, "step": 14285 }, { "epoch": 0.3096223430762897, "grad_norm": 1.4977693557739258, "learning_rate": 1.563064266978088e-05, "loss": 0.3134, "step": 14290 }, { "epoch": 0.30973067839577056, "grad_norm": 1.665773630142212, "learning_rate": 1.562782968434957e-05, "loss": 0.2279, "step": 14295 }, { "epoch": 0.30983901371525147, "grad_norm": 1.9185645580291748, "learning_rate": 1.5625016047018495e-05, "loss": 0.242, "step": 14300 }, { "epoch": 0.3099473490347323, "grad_norm": 1.1097233295440674, "learning_rate": 1.562220175811357e-05, "loss": 0.209, "step": 14305 }, { "epoch": 0.31005568435421316, "grad_norm": 1.2025136947631836, "learning_rate": 1.5619386817960794e-05, "loss": 0.3196, "step": 14310 }, { "epoch": 0.310164019673694, "grad_norm": 1.9949398040771484, "learning_rate": 1.561657122688623e-05, "loss": 0.3728, "step": 14315 }, { "epoch": 0.31027235499317485, "grad_norm": 1.3049118518829346, "learning_rate": 1.5613754985216032e-05, "loss": 0.2398, "step": 14320 }, { "epoch": 0.31038069031265575, "grad_norm": 1.640109658241272, "learning_rate": 1.5610938093276407e-05, "loss": 0.4016, "step": 14325 }, { "epoch": 0.3104890256321366, "grad_norm": 1.8484810590744019, "learning_rate": 1.560812055139366e-05, "loss": 0.2404, "step": 14330 }, { "epoch": 0.31059736095161744, "grad_norm": 1.685516357421875, "learning_rate": 1.5605302359894155e-05, "loss": 0.2882, "step": 14335 }, { "epoch": 0.3107056962710983, "grad_norm": 2.289438247680664, "learning_rate": 1.5602483519104344e-05, "loss": 0.2496, "step": 14340 }, { "epoch": 0.3108140315905792, "grad_norm": 1.2174031734466553, "learning_rate": 1.5599664029350732e-05, "loss": 0.203, "step": 14345 }, { "epoch": 0.31092236691006003, "grad_norm": 1.028944492340088, "learning_rate": 1.559684389095993e-05, "loss": 0.2935, "step": 14350 }, { "epoch": 0.3110307022295409, "grad_norm": 1.168195128440857, "learning_rate": 1.5594023104258612e-05, "loss": 0.2184, "step": 14355 }, { "epoch": 0.3111390375490217, "grad_norm": 1.225813388824463, "learning_rate": 1.5591201669573507e-05, "loss": 0.3774, "step": 14360 }, { "epoch": 0.31124737286850257, "grad_norm": 1.1318762302398682, "learning_rate": 1.5588379587231446e-05, "loss": 0.3133, "step": 14365 }, { "epoch": 0.31135570818798347, "grad_norm": 1.4749561548233032, "learning_rate": 1.5585556857559322e-05, "loss": 0.3598, "step": 14370 }, { "epoch": 0.3114640435074643, "grad_norm": 1.7703123092651367, "learning_rate": 1.5582733480884114e-05, "loss": 0.3845, "step": 14375 }, { "epoch": 0.31157237882694516, "grad_norm": 1.5122230052947998, "learning_rate": 1.5579909457532857e-05, "loss": 0.4002, "step": 14380 }, { "epoch": 0.311680714146426, "grad_norm": 1.7321946620941162, "learning_rate": 1.5577084787832676e-05, "loss": 0.2861, "step": 14385 }, { "epoch": 0.31178904946590685, "grad_norm": 2.212050199508667, "learning_rate": 1.557425947211077e-05, "loss": 0.36, "step": 14390 }, { "epoch": 0.31189738478538775, "grad_norm": 1.5289099216461182, "learning_rate": 1.5571433510694404e-05, "loss": 0.296, "step": 14395 }, { "epoch": 0.3120057201048686, "grad_norm": 1.8834794759750366, "learning_rate": 1.5568606903910927e-05, "loss": 0.3736, "step": 14400 }, { "epoch": 0.31211405542434945, "grad_norm": 1.3792774677276611, "learning_rate": 1.5565779652087757e-05, "loss": 0.3376, "step": 14405 }, { "epoch": 0.3122223907438303, "grad_norm": 1.4063363075256348, "learning_rate": 1.556295175555239e-05, "loss": 0.3409, "step": 14410 }, { "epoch": 0.31233072606331114, "grad_norm": 1.4135586023330688, "learning_rate": 1.5560123214632396e-05, "loss": 0.3959, "step": 14415 }, { "epoch": 0.31243906138279204, "grad_norm": 1.8190277814865112, "learning_rate": 1.5557294029655418e-05, "loss": 0.3181, "step": 14420 }, { "epoch": 0.3125473967022729, "grad_norm": 1.3760572671890259, "learning_rate": 1.5554464200949175e-05, "loss": 0.2825, "step": 14425 }, { "epoch": 0.31265573202175373, "grad_norm": 1.658515214920044, "learning_rate": 1.555163372884146e-05, "loss": 0.3043, "step": 14430 }, { "epoch": 0.3127640673412346, "grad_norm": 1.692352533340454, "learning_rate": 1.554880261366014e-05, "loss": 0.3576, "step": 14435 }, { "epoch": 0.3128724026607154, "grad_norm": 1.4692208766937256, "learning_rate": 1.554597085573316e-05, "loss": 0.2933, "step": 14440 }, { "epoch": 0.3129807379801963, "grad_norm": 1.3646485805511475, "learning_rate": 1.5543138455388532e-05, "loss": 0.3136, "step": 14445 }, { "epoch": 0.31308907329967717, "grad_norm": 1.5008955001831055, "learning_rate": 1.5540305412954354e-05, "loss": 0.3358, "step": 14450 }, { "epoch": 0.313197408619158, "grad_norm": 1.2617781162261963, "learning_rate": 1.553747172875878e-05, "loss": 0.3789, "step": 14455 }, { "epoch": 0.31330574393863886, "grad_norm": 1.2270581722259521, "learning_rate": 1.5534637403130068e-05, "loss": 0.3751, "step": 14460 }, { "epoch": 0.31341407925811976, "grad_norm": 1.1116011142730713, "learning_rate": 1.5531802436396516e-05, "loss": 0.2228, "step": 14465 }, { "epoch": 0.3135224145776006, "grad_norm": 1.410104751586914, "learning_rate": 1.5528966828886517e-05, "loss": 0.3764, "step": 14470 }, { "epoch": 0.31363074989708145, "grad_norm": 1.564913272857666, "learning_rate": 1.5526130580928537e-05, "loss": 0.2536, "step": 14475 }, { "epoch": 0.3137390852165623, "grad_norm": 2.076190948486328, "learning_rate": 1.5523293692851113e-05, "loss": 0.3592, "step": 14480 }, { "epoch": 0.31384742053604314, "grad_norm": 0.9743932485580444, "learning_rate": 1.5520456164982853e-05, "loss": 0.3424, "step": 14485 }, { "epoch": 0.31395575585552404, "grad_norm": 1.3649184703826904, "learning_rate": 1.551761799765244e-05, "loss": 0.2719, "step": 14490 }, { "epoch": 0.3140640911750049, "grad_norm": 1.3698636293411255, "learning_rate": 1.5514779191188636e-05, "loss": 0.3315, "step": 14495 }, { "epoch": 0.31417242649448573, "grad_norm": 0.9166367053985596, "learning_rate": 1.5511939745920276e-05, "loss": 0.4133, "step": 14500 }, { "epoch": 0.3142807618139666, "grad_norm": 1.3886644840240479, "learning_rate": 1.5509099662176264e-05, "loss": 0.3273, "step": 14505 }, { "epoch": 0.3143890971334474, "grad_norm": 1.4548448324203491, "learning_rate": 1.5506258940285582e-05, "loss": 0.2879, "step": 14510 }, { "epoch": 0.3144974324529283, "grad_norm": 0.8632714748382568, "learning_rate": 1.5503417580577285e-05, "loss": 0.2217, "step": 14515 }, { "epoch": 0.3146057677724092, "grad_norm": 2.182344913482666, "learning_rate": 1.5500575583380505e-05, "loss": 0.3091, "step": 14520 }, { "epoch": 0.31471410309189, "grad_norm": 1.3553035259246826, "learning_rate": 1.549773294902444e-05, "loss": 0.3527, "step": 14525 }, { "epoch": 0.31482243841137086, "grad_norm": 1.1039515733718872, "learning_rate": 1.549488967783837e-05, "loss": 0.2818, "step": 14530 }, { "epoch": 0.3149307737308517, "grad_norm": 1.484944462776184, "learning_rate": 1.5492045770151642e-05, "loss": 0.3303, "step": 14535 }, { "epoch": 0.3150391090503326, "grad_norm": 1.341213345527649, "learning_rate": 1.5489201226293685e-05, "loss": 0.3554, "step": 14540 }, { "epoch": 0.31514744436981346, "grad_norm": 1.651750922203064, "learning_rate": 1.5486356046593996e-05, "loss": 0.2145, "step": 14545 }, { "epoch": 0.3152557796892943, "grad_norm": 1.5499215126037598, "learning_rate": 1.548351023138214e-05, "loss": 0.3175, "step": 14550 }, { "epoch": 0.31536411500877515, "grad_norm": 1.2418773174285889, "learning_rate": 1.5480663780987767e-05, "loss": 0.2452, "step": 14555 }, { "epoch": 0.315472450328256, "grad_norm": 2.0145256519317627, "learning_rate": 1.54778166957406e-05, "loss": 0.3049, "step": 14560 }, { "epoch": 0.3155807856477369, "grad_norm": 1.5538040399551392, "learning_rate": 1.5474968975970423e-05, "loss": 0.3006, "step": 14565 }, { "epoch": 0.31568912096721774, "grad_norm": 2.1028389930725098, "learning_rate": 1.5472120622007107e-05, "loss": 0.3131, "step": 14570 }, { "epoch": 0.3157974562866986, "grad_norm": 1.2963056564331055, "learning_rate": 1.5469271634180586e-05, "loss": 0.3348, "step": 14575 }, { "epoch": 0.31590579160617943, "grad_norm": 1.4595887660980225, "learning_rate": 1.546642201282088e-05, "loss": 0.3108, "step": 14580 }, { "epoch": 0.3160141269256603, "grad_norm": 1.8137415647506714, "learning_rate": 1.546357175825807e-05, "loss": 0.3113, "step": 14585 }, { "epoch": 0.3161224622451412, "grad_norm": 1.3206428289413452, "learning_rate": 1.5460720870822312e-05, "loss": 0.2285, "step": 14590 }, { "epoch": 0.316230797564622, "grad_norm": 1.533532977104187, "learning_rate": 1.5457869350843847e-05, "loss": 0.3518, "step": 14595 }, { "epoch": 0.31633913288410287, "grad_norm": 1.1636852025985718, "learning_rate": 1.5455017198652974e-05, "loss": 0.2798, "step": 14600 }, { "epoch": 0.3164474682035837, "grad_norm": 1.2269387245178223, "learning_rate": 1.545216441458008e-05, "loss": 0.3083, "step": 14605 }, { "epoch": 0.3165558035230646, "grad_norm": 1.2847627401351929, "learning_rate": 1.5449310998955603e-05, "loss": 0.3278, "step": 14610 }, { "epoch": 0.31666413884254546, "grad_norm": 1.991765022277832, "learning_rate": 1.5446456952110086e-05, "loss": 0.3631, "step": 14615 }, { "epoch": 0.3167724741620263, "grad_norm": 1.9336397647857666, "learning_rate": 1.5443602274374115e-05, "loss": 0.3624, "step": 14620 }, { "epoch": 0.31688080948150715, "grad_norm": 0.8086458444595337, "learning_rate": 1.5440746966078365e-05, "loss": 0.2206, "step": 14625 }, { "epoch": 0.316989144800988, "grad_norm": 2.021760940551758, "learning_rate": 1.543789102755358e-05, "loss": 0.394, "step": 14630 }, { "epoch": 0.3170974801204689, "grad_norm": 1.2921432256698608, "learning_rate": 1.5435034459130584e-05, "loss": 0.2292, "step": 14635 }, { "epoch": 0.31720581543994975, "grad_norm": 1.4439743757247925, "learning_rate": 1.543217726114026e-05, "loss": 0.3775, "step": 14640 }, { "epoch": 0.3173141507594306, "grad_norm": 1.6711015701293945, "learning_rate": 1.5429319433913573e-05, "loss": 0.2859, "step": 14645 }, { "epoch": 0.31742248607891144, "grad_norm": 1.1117558479309082, "learning_rate": 1.5426460977781562e-05, "loss": 0.3082, "step": 14650 }, { "epoch": 0.3175308213983923, "grad_norm": 1.368444800376892, "learning_rate": 1.5423601893075336e-05, "loss": 0.3337, "step": 14655 }, { "epoch": 0.3176391567178732, "grad_norm": 1.3206685781478882, "learning_rate": 1.5420742180126077e-05, "loss": 0.3618, "step": 14660 }, { "epoch": 0.31774749203735403, "grad_norm": 1.3923593759536743, "learning_rate": 1.5417881839265037e-05, "loss": 0.3603, "step": 14665 }, { "epoch": 0.3178558273568349, "grad_norm": 1.697396993637085, "learning_rate": 1.5415020870823547e-05, "loss": 0.301, "step": 14670 }, { "epoch": 0.3179641626763157, "grad_norm": 2.2952229976654053, "learning_rate": 1.5412159275133004e-05, "loss": 0.4945, "step": 14675 }, { "epoch": 0.31807249799579657, "grad_norm": 1.2329449653625488, "learning_rate": 1.5409297052524886e-05, "loss": 0.3569, "step": 14680 }, { "epoch": 0.31818083331527747, "grad_norm": 1.8259942531585693, "learning_rate": 1.5406434203330735e-05, "loss": 0.3814, "step": 14685 }, { "epoch": 0.3182891686347583, "grad_norm": 1.695639967918396, "learning_rate": 1.5403570727882168e-05, "loss": 0.3823, "step": 14690 }, { "epoch": 0.31839750395423916, "grad_norm": 1.5494108200073242, "learning_rate": 1.540070662651088e-05, "loss": 0.298, "step": 14695 }, { "epoch": 0.31850583927372, "grad_norm": 2.125521421432495, "learning_rate": 1.539784189954863e-05, "loss": 0.3152, "step": 14700 }, { "epoch": 0.31861417459320085, "grad_norm": 1.063035011291504, "learning_rate": 1.5394976547327258e-05, "loss": 0.37, "step": 14705 }, { "epoch": 0.31872250991268175, "grad_norm": 1.8189839124679565, "learning_rate": 1.5392110570178665e-05, "loss": 0.2702, "step": 14710 }, { "epoch": 0.3188308452321626, "grad_norm": 1.1354632377624512, "learning_rate": 1.538924396843484e-05, "loss": 0.2662, "step": 14715 }, { "epoch": 0.31893918055164344, "grad_norm": 2.324882984161377, "learning_rate": 1.5386376742427834e-05, "loss": 0.3381, "step": 14720 }, { "epoch": 0.3190475158711243, "grad_norm": 1.8068431615829468, "learning_rate": 1.5383508892489768e-05, "loss": 0.2342, "step": 14725 }, { "epoch": 0.3191558511906052, "grad_norm": 1.7140522003173828, "learning_rate": 1.5380640418952842e-05, "loss": 0.3891, "step": 14730 }, { "epoch": 0.31926418651008603, "grad_norm": 1.3466663360595703, "learning_rate": 1.5377771322149328e-05, "loss": 0.3018, "step": 14735 }, { "epoch": 0.3193725218295669, "grad_norm": 1.7126575708389282, "learning_rate": 1.537490160241156e-05, "loss": 0.2288, "step": 14740 }, { "epoch": 0.3194808571490477, "grad_norm": 1.1368699073791504, "learning_rate": 1.537203126007196e-05, "loss": 0.4179, "step": 14745 }, { "epoch": 0.31958919246852857, "grad_norm": 1.7055610418319702, "learning_rate": 1.536916029546301e-05, "loss": 0.215, "step": 14750 }, { "epoch": 0.3196975277880095, "grad_norm": 1.728456735610962, "learning_rate": 1.5366288708917272e-05, "loss": 0.3521, "step": 14755 }, { "epoch": 0.3198058631074903, "grad_norm": 1.498731255531311, "learning_rate": 1.5363416500767372e-05, "loss": 0.3943, "step": 14760 }, { "epoch": 0.31991419842697116, "grad_norm": 1.2002253532409668, "learning_rate": 1.5360543671346016e-05, "loss": 0.2747, "step": 14765 }, { "epoch": 0.320022533746452, "grad_norm": 1.2975341081619263, "learning_rate": 1.5357670220985978e-05, "loss": 0.3468, "step": 14770 }, { "epoch": 0.32013086906593285, "grad_norm": 1.8559160232543945, "learning_rate": 1.5354796150020102e-05, "loss": 0.3683, "step": 14775 }, { "epoch": 0.32023920438541376, "grad_norm": 1.625167965888977, "learning_rate": 1.5351921458781303e-05, "loss": 0.2772, "step": 14780 }, { "epoch": 0.3203475397048946, "grad_norm": 1.50922691822052, "learning_rate": 1.534904614760258e-05, "loss": 0.3848, "step": 14785 }, { "epoch": 0.32045587502437545, "grad_norm": 1.3124805688858032, "learning_rate": 1.5346170216816985e-05, "loss": 0.2924, "step": 14790 }, { "epoch": 0.3205642103438563, "grad_norm": 1.3978776931762695, "learning_rate": 1.5343293666757658e-05, "loss": 0.3831, "step": 14795 }, { "epoch": 0.32067254566333714, "grad_norm": 1.6351661682128906, "learning_rate": 1.5340416497757804e-05, "loss": 0.2952, "step": 14800 }, { "epoch": 0.32078088098281804, "grad_norm": 1.285403847694397, "learning_rate": 1.53375387101507e-05, "loss": 0.408, "step": 14805 }, { "epoch": 0.3208892163022989, "grad_norm": 2.332629680633545, "learning_rate": 1.533466030426969e-05, "loss": 0.3332, "step": 14810 }, { "epoch": 0.32099755162177973, "grad_norm": 1.7544395923614502, "learning_rate": 1.5331781280448193e-05, "loss": 0.3518, "step": 14815 }, { "epoch": 0.3211058869412606, "grad_norm": 1.2663122415542603, "learning_rate": 1.532890163901971e-05, "loss": 0.2807, "step": 14820 }, { "epoch": 0.3212142222607414, "grad_norm": 0.9028120636940002, "learning_rate": 1.5326021380317796e-05, "loss": 0.2937, "step": 14825 }, { "epoch": 0.3213225575802223, "grad_norm": 1.6285605430603027, "learning_rate": 1.532314050467609e-05, "loss": 0.3116, "step": 14830 }, { "epoch": 0.32143089289970317, "grad_norm": 1.1213514804840088, "learning_rate": 1.5320259012428295e-05, "loss": 0.3831, "step": 14835 }, { "epoch": 0.321539228219184, "grad_norm": 1.6718021631240845, "learning_rate": 1.5317376903908195e-05, "loss": 0.2677, "step": 14840 }, { "epoch": 0.32164756353866486, "grad_norm": 1.3503390550613403, "learning_rate": 1.5314494179449633e-05, "loss": 0.2094, "step": 14845 }, { "epoch": 0.3217558988581457, "grad_norm": 1.4973183870315552, "learning_rate": 1.5311610839386532e-05, "loss": 0.3905, "step": 14850 }, { "epoch": 0.3218642341776266, "grad_norm": 2.02679443359375, "learning_rate": 1.5308726884052884e-05, "loss": 0.465, "step": 14855 }, { "epoch": 0.32197256949710745, "grad_norm": 1.3343970775604248, "learning_rate": 1.530584231378275e-05, "loss": 0.2945, "step": 14860 }, { "epoch": 0.3220809048165883, "grad_norm": 1.642755389213562, "learning_rate": 1.5302957128910264e-05, "loss": 0.2301, "step": 14865 }, { "epoch": 0.32218924013606914, "grad_norm": 1.4889096021652222, "learning_rate": 1.5300071329769632e-05, "loss": 0.3981, "step": 14870 }, { "epoch": 0.32229757545555004, "grad_norm": 1.9531959295272827, "learning_rate": 1.5297184916695135e-05, "loss": 0.3345, "step": 14875 }, { "epoch": 0.3224059107750309, "grad_norm": 0.9804950952529907, "learning_rate": 1.5294297890021115e-05, "loss": 0.3058, "step": 14880 }, { "epoch": 0.32251424609451174, "grad_norm": 1.4617704153060913, "learning_rate": 1.5291410250081997e-05, "loss": 0.2622, "step": 14885 }, { "epoch": 0.3226225814139926, "grad_norm": 1.2506877183914185, "learning_rate": 1.5288521997212263e-05, "loss": 0.3337, "step": 14890 }, { "epoch": 0.3227309167334734, "grad_norm": 2.9536149501800537, "learning_rate": 1.5285633131746476e-05, "loss": 0.2144, "step": 14895 }, { "epoch": 0.32283925205295433, "grad_norm": 1.2256993055343628, "learning_rate": 1.528274365401927e-05, "loss": 0.2811, "step": 14900 }, { "epoch": 0.3229475873724352, "grad_norm": 1.4666646718978882, "learning_rate": 1.527985356436535e-05, "loss": 0.2417, "step": 14905 }, { "epoch": 0.323055922691916, "grad_norm": 2.171013355255127, "learning_rate": 1.5276962863119488e-05, "loss": 0.3384, "step": 14910 }, { "epoch": 0.32316425801139687, "grad_norm": 1.0563772916793823, "learning_rate": 1.5274071550616526e-05, "loss": 0.2306, "step": 14915 }, { "epoch": 0.3232725933308777, "grad_norm": 1.5510072708129883, "learning_rate": 1.527117962719138e-05, "loss": 0.3272, "step": 14920 }, { "epoch": 0.3233809286503586, "grad_norm": 1.2702827453613281, "learning_rate": 1.5268287093179034e-05, "loss": 0.2996, "step": 14925 }, { "epoch": 0.32348926396983946, "grad_norm": 1.4937944412231445, "learning_rate": 1.5265393948914553e-05, "loss": 0.2224, "step": 14930 }, { "epoch": 0.3235975992893203, "grad_norm": 1.6257455348968506, "learning_rate": 1.5262500194733056e-05, "loss": 0.335, "step": 14935 }, { "epoch": 0.32370593460880115, "grad_norm": 1.771501064300537, "learning_rate": 1.525960583096974e-05, "loss": 0.3179, "step": 14940 }, { "epoch": 0.323814269928282, "grad_norm": 0.8184332847595215, "learning_rate": 1.5256710857959882e-05, "loss": 0.3613, "step": 14945 }, { "epoch": 0.3239226052477629, "grad_norm": 1.3093185424804688, "learning_rate": 1.525381527603881e-05, "loss": 0.4406, "step": 14950 }, { "epoch": 0.32403094056724374, "grad_norm": 0.8049766421318054, "learning_rate": 1.5250919085541946e-05, "loss": 0.2629, "step": 14955 }, { "epoch": 0.3241392758867246, "grad_norm": 1.5981271266937256, "learning_rate": 1.5248022286804765e-05, "loss": 0.3054, "step": 14960 }, { "epoch": 0.32424761120620543, "grad_norm": 1.0039191246032715, "learning_rate": 1.5245124880162816e-05, "loss": 0.3351, "step": 14965 }, { "epoch": 0.3243559465256863, "grad_norm": 1.787009596824646, "learning_rate": 1.5242226865951724e-05, "loss": 0.2498, "step": 14970 }, { "epoch": 0.3244642818451672, "grad_norm": 1.674938440322876, "learning_rate": 1.5239328244507175e-05, "loss": 0.2912, "step": 14975 }, { "epoch": 0.324572617164648, "grad_norm": 1.266169786453247, "learning_rate": 1.5236429016164932e-05, "loss": 0.2959, "step": 14980 }, { "epoch": 0.32468095248412887, "grad_norm": 1.4198440313339233, "learning_rate": 1.5233529181260833e-05, "loss": 0.2915, "step": 14985 }, { "epoch": 0.3247892878036097, "grad_norm": 1.7759793996810913, "learning_rate": 1.5230628740130777e-05, "loss": 0.2735, "step": 14990 }, { "epoch": 0.3248976231230906, "grad_norm": 1.2963114976882935, "learning_rate": 1.5227727693110734e-05, "loss": 0.2475, "step": 14995 }, { "epoch": 0.32500595844257146, "grad_norm": 1.3465449810028076, "learning_rate": 1.5224826040536749e-05, "loss": 0.3107, "step": 15000 }, { "epoch": 0.3251142937620523, "grad_norm": 1.671061396598816, "learning_rate": 1.5221923782744936e-05, "loss": 0.3729, "step": 15005 }, { "epoch": 0.32522262908153315, "grad_norm": 1.8065201044082642, "learning_rate": 1.521902092007148e-05, "loss": 0.3591, "step": 15010 }, { "epoch": 0.325330964401014, "grad_norm": 1.7556487321853638, "learning_rate": 1.521611745285263e-05, "loss": 0.2918, "step": 15015 }, { "epoch": 0.3254392997204949, "grad_norm": 1.4739363193511963, "learning_rate": 1.5213213381424705e-05, "loss": 0.3343, "step": 15020 }, { "epoch": 0.32554763503997575, "grad_norm": 1.6903396844863892, "learning_rate": 1.5210308706124108e-05, "loss": 0.3569, "step": 15025 }, { "epoch": 0.3256559703594566, "grad_norm": 1.6489497423171997, "learning_rate": 1.52074034272873e-05, "loss": 0.332, "step": 15030 }, { "epoch": 0.32576430567893744, "grad_norm": 1.2573238611221313, "learning_rate": 1.5204497545250809e-05, "loss": 0.2819, "step": 15035 }, { "epoch": 0.3258726409984183, "grad_norm": 1.4910467863082886, "learning_rate": 1.5201591060351242e-05, "loss": 0.3322, "step": 15040 }, { "epoch": 0.3259809763178992, "grad_norm": 1.5009851455688477, "learning_rate": 1.5198683972925268e-05, "loss": 0.3981, "step": 15045 }, { "epoch": 0.32608931163738003, "grad_norm": 1.1637625694274902, "learning_rate": 1.5195776283309636e-05, "loss": 0.3297, "step": 15050 }, { "epoch": 0.3261976469568609, "grad_norm": 1.4875038862228394, "learning_rate": 1.5192867991841152e-05, "loss": 0.3031, "step": 15055 }, { "epoch": 0.3263059822763417, "grad_norm": 1.2331968545913696, "learning_rate": 1.5189959098856698e-05, "loss": 0.2397, "step": 15060 }, { "epoch": 0.32641431759582257, "grad_norm": 1.6720384359359741, "learning_rate": 1.5187049604693234e-05, "loss": 0.2893, "step": 15065 }, { "epoch": 0.32652265291530347, "grad_norm": 1.667604684829712, "learning_rate": 1.518413950968777e-05, "loss": 0.3331, "step": 15070 }, { "epoch": 0.3266309882347843, "grad_norm": 1.2686240673065186, "learning_rate": 1.5181228814177403e-05, "loss": 0.4391, "step": 15075 }, { "epoch": 0.32673932355426516, "grad_norm": 1.4660028219223022, "learning_rate": 1.5178317518499292e-05, "loss": 0.4093, "step": 15080 }, { "epoch": 0.326847658873746, "grad_norm": 1.9652754068374634, "learning_rate": 1.5175405622990672e-05, "loss": 0.281, "step": 15085 }, { "epoch": 0.32695599419322685, "grad_norm": 1.2602407932281494, "learning_rate": 1.5172493127988835e-05, "loss": 0.2602, "step": 15090 }, { "epoch": 0.32706432951270775, "grad_norm": 1.5029940605163574, "learning_rate": 1.5169580033831155e-05, "loss": 0.2637, "step": 15095 }, { "epoch": 0.3271726648321886, "grad_norm": 1.2206672430038452, "learning_rate": 1.5166666340855066e-05, "loss": 0.2093, "step": 15100 }, { "epoch": 0.32728100015166944, "grad_norm": 2.0762135982513428, "learning_rate": 1.516375204939808e-05, "loss": 0.2975, "step": 15105 }, { "epoch": 0.3273893354711503, "grad_norm": 1.122578740119934, "learning_rate": 1.516083715979777e-05, "loss": 0.2847, "step": 15110 }, { "epoch": 0.32749767079063113, "grad_norm": 1.2083380222320557, "learning_rate": 1.5157921672391784e-05, "loss": 0.3985, "step": 15115 }, { "epoch": 0.32760600611011204, "grad_norm": 1.100781798362732, "learning_rate": 1.515500558751784e-05, "loss": 0.2846, "step": 15120 }, { "epoch": 0.3277143414295929, "grad_norm": 2.0017588138580322, "learning_rate": 1.5152088905513717e-05, "loss": 0.4369, "step": 15125 }, { "epoch": 0.3278226767490737, "grad_norm": 1.6462762355804443, "learning_rate": 1.5149171626717278e-05, "loss": 0.421, "step": 15130 }, { "epoch": 0.3279310120685546, "grad_norm": 2.3347699642181396, "learning_rate": 1.514625375146644e-05, "loss": 0.3767, "step": 15135 }, { "epoch": 0.3280393473880355, "grad_norm": 1.2607604265213013, "learning_rate": 1.5143335280099191e-05, "loss": 0.305, "step": 15140 }, { "epoch": 0.3281476827075163, "grad_norm": 1.1963410377502441, "learning_rate": 1.5140416212953602e-05, "loss": 0.3478, "step": 15145 }, { "epoch": 0.32825601802699717, "grad_norm": 1.6331384181976318, "learning_rate": 1.5137496550367793e-05, "loss": 0.2554, "step": 15150 }, { "epoch": 0.328364353346478, "grad_norm": 1.6274584531784058, "learning_rate": 1.5134576292679975e-05, "loss": 0.308, "step": 15155 }, { "epoch": 0.32847268866595886, "grad_norm": 2.184443950653076, "learning_rate": 1.5131655440228406e-05, "loss": 0.3075, "step": 15160 }, { "epoch": 0.32858102398543976, "grad_norm": 1.5561532974243164, "learning_rate": 1.5128733993351423e-05, "loss": 0.2609, "step": 15165 }, { "epoch": 0.3286893593049206, "grad_norm": 1.815406322479248, "learning_rate": 1.512581195238744e-05, "loss": 0.203, "step": 15170 }, { "epoch": 0.32879769462440145, "grad_norm": 1.696324348449707, "learning_rate": 1.5122889317674927e-05, "loss": 0.2216, "step": 15175 }, { "epoch": 0.3289060299438823, "grad_norm": 1.1543887853622437, "learning_rate": 1.5119966089552427e-05, "loss": 0.2965, "step": 15180 }, { "epoch": 0.32901436526336314, "grad_norm": 2.2791101932525635, "learning_rate": 1.511704226835855e-05, "loss": 0.2892, "step": 15185 }, { "epoch": 0.32912270058284404, "grad_norm": 1.466111183166504, "learning_rate": 1.511411785443198e-05, "loss": 0.3063, "step": 15190 }, { "epoch": 0.3292310359023249, "grad_norm": 1.9046506881713867, "learning_rate": 1.5111192848111466e-05, "loss": 0.2266, "step": 15195 }, { "epoch": 0.32933937122180573, "grad_norm": 1.3507682085037231, "learning_rate": 1.5108267249735828e-05, "loss": 0.3227, "step": 15200 }, { "epoch": 0.3294477065412866, "grad_norm": 1.5698288679122925, "learning_rate": 1.5105341059643952e-05, "loss": 0.3242, "step": 15205 }, { "epoch": 0.3295560418607674, "grad_norm": 1.7681576013565063, "learning_rate": 1.5102414278174791e-05, "loss": 0.3938, "step": 15210 }, { "epoch": 0.3296643771802483, "grad_norm": 1.1138585805892944, "learning_rate": 1.5099486905667368e-05, "loss": 0.2939, "step": 15215 }, { "epoch": 0.32977271249972917, "grad_norm": 1.8660520315170288, "learning_rate": 1.5096558942460782e-05, "loss": 0.2403, "step": 15220 }, { "epoch": 0.32988104781921, "grad_norm": 1.7489478588104248, "learning_rate": 1.5093630388894184e-05, "loss": 0.344, "step": 15225 }, { "epoch": 0.32998938313869086, "grad_norm": 1.3556636571884155, "learning_rate": 1.5090701245306808e-05, "loss": 0.2565, "step": 15230 }, { "epoch": 0.3300977184581717, "grad_norm": 1.3730602264404297, "learning_rate": 1.5087771512037956e-05, "loss": 0.2729, "step": 15235 }, { "epoch": 0.3302060537776526, "grad_norm": 1.696654200553894, "learning_rate": 1.5084841189426984e-05, "loss": 0.3009, "step": 15240 }, { "epoch": 0.33031438909713345, "grad_norm": 1.513100266456604, "learning_rate": 1.5081910277813335e-05, "loss": 0.2923, "step": 15245 }, { "epoch": 0.3304227244166143, "grad_norm": 1.039129376411438, "learning_rate": 1.5078978777536507e-05, "loss": 0.2452, "step": 15250 }, { "epoch": 0.33053105973609515, "grad_norm": 1.650849461555481, "learning_rate": 1.507604668893607e-05, "loss": 0.3942, "step": 15255 }, { "epoch": 0.33063939505557605, "grad_norm": 1.86471426486969, "learning_rate": 1.5073114012351661e-05, "loss": 0.3367, "step": 15260 }, { "epoch": 0.3307477303750569, "grad_norm": 1.4417681694030762, "learning_rate": 1.5070180748122991e-05, "loss": 0.3011, "step": 15265 }, { "epoch": 0.33085606569453774, "grad_norm": 1.1357656717300415, "learning_rate": 1.506724689658983e-05, "loss": 0.3544, "step": 15270 }, { "epoch": 0.3309644010140186, "grad_norm": 1.3755820989608765, "learning_rate": 1.506431245809203e-05, "loss": 0.3168, "step": 15275 }, { "epoch": 0.33107273633349943, "grad_norm": 1.9602384567260742, "learning_rate": 1.5061377432969488e-05, "loss": 0.24, "step": 15280 }, { "epoch": 0.33118107165298033, "grad_norm": 1.2535902261734009, "learning_rate": 1.5058441821562192e-05, "loss": 0.382, "step": 15285 }, { "epoch": 0.3312894069724612, "grad_norm": 1.9821399450302124, "learning_rate": 1.5055505624210189e-05, "loss": 0.3204, "step": 15290 }, { "epoch": 0.331397742291942, "grad_norm": 1.1693148612976074, "learning_rate": 1.505256884125359e-05, "loss": 0.3471, "step": 15295 }, { "epoch": 0.33150607761142287, "grad_norm": 1.6673351526260376, "learning_rate": 1.5049631473032577e-05, "loss": 0.4146, "step": 15300 }, { "epoch": 0.3316144129309037, "grad_norm": 1.642525553703308, "learning_rate": 1.5046693519887404e-05, "loss": 0.3938, "step": 15305 }, { "epoch": 0.3317227482503846, "grad_norm": 1.2559300661087036, "learning_rate": 1.5043754982158381e-05, "loss": 0.2833, "step": 15310 }, { "epoch": 0.33183108356986546, "grad_norm": 1.2816181182861328, "learning_rate": 1.50408158601859e-05, "loss": 0.3352, "step": 15315 }, { "epoch": 0.3319394188893463, "grad_norm": 3.1267621517181396, "learning_rate": 1.5037876154310416e-05, "loss": 0.3296, "step": 15320 }, { "epoch": 0.33204775420882715, "grad_norm": 1.5928648710250854, "learning_rate": 1.5034935864872443e-05, "loss": 0.2453, "step": 15325 }, { "epoch": 0.332156089528308, "grad_norm": 1.8402318954467773, "learning_rate": 1.5031994992212578e-05, "loss": 0.4066, "step": 15330 }, { "epoch": 0.3322644248477889, "grad_norm": 2.361945867538452, "learning_rate": 1.5029053536671469e-05, "loss": 0.2745, "step": 15335 }, { "epoch": 0.33237276016726974, "grad_norm": 0.935377299785614, "learning_rate": 1.5026111498589846e-05, "loss": 0.2893, "step": 15340 }, { "epoch": 0.3324810954867506, "grad_norm": 1.4842296838760376, "learning_rate": 1.5023168878308493e-05, "loss": 0.3962, "step": 15345 }, { "epoch": 0.33258943080623143, "grad_norm": 1.3814979791641235, "learning_rate": 1.5020225676168276e-05, "loss": 0.4065, "step": 15350 }, { "epoch": 0.3326977661257123, "grad_norm": 1.440153956413269, "learning_rate": 1.5017281892510118e-05, "loss": 0.2595, "step": 15355 }, { "epoch": 0.3328061014451932, "grad_norm": 1.4482884407043457, "learning_rate": 1.501433752767501e-05, "loss": 0.2733, "step": 15360 }, { "epoch": 0.332914436764674, "grad_norm": 1.5293793678283691, "learning_rate": 1.5011392582004012e-05, "loss": 0.2419, "step": 15365 }, { "epoch": 0.33302277208415487, "grad_norm": 1.3374643325805664, "learning_rate": 1.5008447055838255e-05, "loss": 0.3587, "step": 15370 }, { "epoch": 0.3331311074036357, "grad_norm": 1.3849059343338013, "learning_rate": 1.5005500949518937e-05, "loss": 0.3403, "step": 15375 }, { "epoch": 0.33323944272311656, "grad_norm": 1.24981689453125, "learning_rate": 1.5002554263387314e-05, "loss": 0.347, "step": 15380 }, { "epoch": 0.33334777804259746, "grad_norm": 1.388665795326233, "learning_rate": 1.4999606997784714e-05, "loss": 0.2296, "step": 15385 }, { "epoch": 0.3334561133620783, "grad_norm": 1.3170405626296997, "learning_rate": 1.4996659153052543e-05, "loss": 0.268, "step": 15390 }, { "epoch": 0.33356444868155916, "grad_norm": 1.1107783317565918, "learning_rate": 1.4993710729532258e-05, "loss": 0.184, "step": 15395 }, { "epoch": 0.33367278400104, "grad_norm": 2.0296287536621094, "learning_rate": 1.4990761727565388e-05, "loss": 0.4157, "step": 15400 }, { "epoch": 0.3337811193205209, "grad_norm": 1.5360146760940552, "learning_rate": 1.4987812147493534e-05, "loss": 0.3009, "step": 15405 }, { "epoch": 0.33388945464000175, "grad_norm": 1.240893840789795, "learning_rate": 1.4984861989658362e-05, "loss": 0.2878, "step": 15410 }, { "epoch": 0.3339977899594826, "grad_norm": 1.0742937326431274, "learning_rate": 1.4981911254401604e-05, "loss": 0.2203, "step": 15415 }, { "epoch": 0.33410612527896344, "grad_norm": 1.909632921218872, "learning_rate": 1.4978959942065053e-05, "loss": 0.4048, "step": 15420 }, { "epoch": 0.3342144605984443, "grad_norm": 1.632521629333496, "learning_rate": 1.4976008052990576e-05, "loss": 0.2672, "step": 15425 }, { "epoch": 0.3343227959179252, "grad_norm": 1.8125431537628174, "learning_rate": 1.4973055587520108e-05, "loss": 0.273, "step": 15430 }, { "epoch": 0.33443113123740603, "grad_norm": 1.648758888244629, "learning_rate": 1.4970102545995647e-05, "loss": 0.252, "step": 15435 }, { "epoch": 0.3345394665568869, "grad_norm": 1.2707688808441162, "learning_rate": 1.4967148928759259e-05, "loss": 0.3806, "step": 15440 }, { "epoch": 0.3346478018763677, "grad_norm": 1.501417875289917, "learning_rate": 1.4964194736153075e-05, "loss": 0.3988, "step": 15445 }, { "epoch": 0.33475613719584857, "grad_norm": 1.2091732025146484, "learning_rate": 1.4961239968519295e-05, "loss": 0.3509, "step": 15450 }, { "epoch": 0.33486447251532947, "grad_norm": 1.7789220809936523, "learning_rate": 1.495828462620018e-05, "loss": 0.2142, "step": 15455 }, { "epoch": 0.3349728078348103, "grad_norm": 1.3489923477172852, "learning_rate": 1.495532870953807e-05, "loss": 0.3888, "step": 15460 }, { "epoch": 0.33508114315429116, "grad_norm": 1.6372050046920776, "learning_rate": 1.495237221887536e-05, "loss": 0.3224, "step": 15465 }, { "epoch": 0.335189478473772, "grad_norm": 1.4433786869049072, "learning_rate": 1.4949415154554514e-05, "loss": 0.2644, "step": 15470 }, { "epoch": 0.33529781379325285, "grad_norm": 2.2332983016967773, "learning_rate": 1.4946457516918066e-05, "loss": 0.2452, "step": 15475 }, { "epoch": 0.33540614911273375, "grad_norm": 1.8863991498947144, "learning_rate": 1.4943499306308609e-05, "loss": 0.3359, "step": 15480 }, { "epoch": 0.3355144844322146, "grad_norm": 1.341923713684082, "learning_rate": 1.4940540523068813e-05, "loss": 0.3883, "step": 15485 }, { "epoch": 0.33562281975169544, "grad_norm": 1.4854174852371216, "learning_rate": 1.4937581167541406e-05, "loss": 0.2576, "step": 15490 }, { "epoch": 0.3357311550711763, "grad_norm": 1.767662525177002, "learning_rate": 1.4934621240069187e-05, "loss": 0.3439, "step": 15495 }, { "epoch": 0.33583949039065714, "grad_norm": 1.1416466236114502, "learning_rate": 1.493166074099502e-05, "loss": 0.2241, "step": 15500 }, { "epoch": 0.33594782571013804, "grad_norm": 1.862737774848938, "learning_rate": 1.4928699670661828e-05, "loss": 0.3626, "step": 15505 }, { "epoch": 0.3360561610296189, "grad_norm": 2.326054811477661, "learning_rate": 1.4925738029412613e-05, "loss": 0.4552, "step": 15510 }, { "epoch": 0.33616449634909973, "grad_norm": 1.3577148914337158, "learning_rate": 1.4922775817590437e-05, "loss": 0.3804, "step": 15515 }, { "epoch": 0.3362728316685806, "grad_norm": 1.2894916534423828, "learning_rate": 1.4919813035538422e-05, "loss": 0.1699, "step": 15520 }, { "epoch": 0.3363811669880615, "grad_norm": 1.2866625785827637, "learning_rate": 1.4916849683599766e-05, "loss": 0.1958, "step": 15525 }, { "epoch": 0.3364895023075423, "grad_norm": 1.1021342277526855, "learning_rate": 1.491388576211773e-05, "loss": 0.2635, "step": 15530 }, { "epoch": 0.33659783762702317, "grad_norm": 1.1227188110351562, "learning_rate": 1.491092127143564e-05, "loss": 0.2638, "step": 15535 }, { "epoch": 0.336706172946504, "grad_norm": 1.6145925521850586, "learning_rate": 1.4907956211896886e-05, "loss": 0.2766, "step": 15540 }, { "epoch": 0.33681450826598486, "grad_norm": 1.3308160305023193, "learning_rate": 1.4904990583844923e-05, "loss": 0.3727, "step": 15545 }, { "epoch": 0.33692284358546576, "grad_norm": 1.6878159046173096, "learning_rate": 1.490202438762328e-05, "loss": 0.258, "step": 15550 }, { "epoch": 0.3370311789049466, "grad_norm": 1.239855170249939, "learning_rate": 1.489905762357554e-05, "loss": 0.2913, "step": 15555 }, { "epoch": 0.33713951422442745, "grad_norm": 1.811476707458496, "learning_rate": 1.4896090292045367e-05, "loss": 0.3726, "step": 15560 }, { "epoch": 0.3372478495439083, "grad_norm": 1.3288025856018066, "learning_rate": 1.4893122393376476e-05, "loss": 0.2879, "step": 15565 }, { "epoch": 0.33735618486338914, "grad_norm": 1.606868028640747, "learning_rate": 1.4890153927912654e-05, "loss": 0.2773, "step": 15570 }, { "epoch": 0.33746452018287004, "grad_norm": 1.288468599319458, "learning_rate": 1.4887184895997755e-05, "loss": 0.2961, "step": 15575 }, { "epoch": 0.3375728555023509, "grad_norm": 1.8527579307556152, "learning_rate": 1.4884215297975694e-05, "loss": 0.3775, "step": 15580 }, { "epoch": 0.33768119082183173, "grad_norm": 2.9157800674438477, "learning_rate": 1.4881245134190458e-05, "loss": 0.2853, "step": 15585 }, { "epoch": 0.3377895261413126, "grad_norm": 2.0435969829559326, "learning_rate": 1.4878274404986095e-05, "loss": 0.3298, "step": 15590 }, { "epoch": 0.3378978614607934, "grad_norm": 1.4383116960525513, "learning_rate": 1.4875303110706716e-05, "loss": 0.2536, "step": 15595 }, { "epoch": 0.3380061967802743, "grad_norm": 1.5978320837020874, "learning_rate": 1.4872331251696504e-05, "loss": 0.2965, "step": 15600 }, { "epoch": 0.33811453209975517, "grad_norm": 2.004338502883911, "learning_rate": 1.4869358828299704e-05, "loss": 0.4814, "step": 15605 }, { "epoch": 0.338222867419236, "grad_norm": 1.332451581954956, "learning_rate": 1.486638584086063e-05, "loss": 0.3131, "step": 15610 }, { "epoch": 0.33833120273871686, "grad_norm": 1.5895733833312988, "learning_rate": 1.486341228972365e-05, "loss": 0.3553, "step": 15615 }, { "epoch": 0.3384395380581977, "grad_norm": 1.5864356756210327, "learning_rate": 1.4860438175233215e-05, "loss": 0.3612, "step": 15620 }, { "epoch": 0.3385478733776786, "grad_norm": 1.507850170135498, "learning_rate": 1.4857463497733822e-05, "loss": 0.3056, "step": 15625 }, { "epoch": 0.33865620869715946, "grad_norm": 1.8016380071640015, "learning_rate": 1.485448825757005e-05, "loss": 0.2368, "step": 15630 }, { "epoch": 0.3387645440166403, "grad_norm": 1.0682892799377441, "learning_rate": 1.4851512455086535e-05, "loss": 0.311, "step": 15635 }, { "epoch": 0.33887287933612115, "grad_norm": 1.5561678409576416, "learning_rate": 1.4848536090627975e-05, "loss": 0.3181, "step": 15640 }, { "epoch": 0.338981214655602, "grad_norm": 1.690205454826355, "learning_rate": 1.4845559164539144e-05, "loss": 0.2387, "step": 15645 }, { "epoch": 0.3390895499750829, "grad_norm": 1.371321201324463, "learning_rate": 1.4842581677164864e-05, "loss": 0.3407, "step": 15650 }, { "epoch": 0.33919788529456374, "grad_norm": 1.106028437614441, "learning_rate": 1.4839603628850043e-05, "loss": 0.2124, "step": 15655 }, { "epoch": 0.3393062206140446, "grad_norm": 1.403881311416626, "learning_rate": 1.483662501993964e-05, "loss": 0.2935, "step": 15660 }, { "epoch": 0.33941455593352543, "grad_norm": 1.599487543106079, "learning_rate": 1.4833645850778677e-05, "loss": 0.3653, "step": 15665 }, { "epoch": 0.33952289125300633, "grad_norm": 1.0197904109954834, "learning_rate": 1.4830666121712252e-05, "loss": 0.3989, "step": 15670 }, { "epoch": 0.3396312265724872, "grad_norm": 2.0951004028320312, "learning_rate": 1.4827685833085519e-05, "loss": 0.273, "step": 15675 }, { "epoch": 0.339739561891968, "grad_norm": 1.3872920274734497, "learning_rate": 1.4824704985243703e-05, "loss": 0.3486, "step": 15680 }, { "epoch": 0.33984789721144887, "grad_norm": 1.7853286266326904, "learning_rate": 1.4821723578532087e-05, "loss": 0.3448, "step": 15685 }, { "epoch": 0.3399562325309297, "grad_norm": 1.609347939491272, "learning_rate": 1.4818741613296026e-05, "loss": 0.3341, "step": 15690 }, { "epoch": 0.3400645678504106, "grad_norm": 1.3506369590759277, "learning_rate": 1.4815759089880932e-05, "loss": 0.2728, "step": 15695 }, { "epoch": 0.34017290316989146, "grad_norm": 1.0101536512374878, "learning_rate": 1.481277600863229e-05, "loss": 0.3186, "step": 15700 }, { "epoch": 0.3402812384893723, "grad_norm": 1.8800166845321655, "learning_rate": 1.480979236989564e-05, "loss": 0.2709, "step": 15705 }, { "epoch": 0.34038957380885315, "grad_norm": 0.9672285318374634, "learning_rate": 1.4806808174016596e-05, "loss": 0.2497, "step": 15710 }, { "epoch": 0.340497909128334, "grad_norm": 1.8947997093200684, "learning_rate": 1.480382342134083e-05, "loss": 0.2241, "step": 15715 }, { "epoch": 0.3406062444478149, "grad_norm": 1.82700514793396, "learning_rate": 1.4800838112214079e-05, "loss": 0.4683, "step": 15720 }, { "epoch": 0.34071457976729574, "grad_norm": 1.3741062879562378, "learning_rate": 1.4797852246982154e-05, "loss": 0.2636, "step": 15725 }, { "epoch": 0.3408229150867766, "grad_norm": 2.668898820877075, "learning_rate": 1.4794865825990918e-05, "loss": 0.285, "step": 15730 }, { "epoch": 0.34093125040625744, "grad_norm": 1.508527398109436, "learning_rate": 1.47918788495863e-05, "loss": 0.3317, "step": 15735 }, { "epoch": 0.3410395857257383, "grad_norm": 1.7896318435668945, "learning_rate": 1.4788891318114305e-05, "loss": 0.2938, "step": 15740 }, { "epoch": 0.3411479210452192, "grad_norm": 1.752243995666504, "learning_rate": 1.4785903231920982e-05, "loss": 0.3534, "step": 15745 }, { "epoch": 0.34125625636470003, "grad_norm": 1.2569583654403687, "learning_rate": 1.4782914591352466e-05, "loss": 0.2531, "step": 15750 }, { "epoch": 0.3413645916841809, "grad_norm": 1.1070585250854492, "learning_rate": 1.4779925396754941e-05, "loss": 0.3228, "step": 15755 }, { "epoch": 0.3414729270036617, "grad_norm": 1.324310302734375, "learning_rate": 1.4776935648474663e-05, "loss": 0.3625, "step": 15760 }, { "epoch": 0.34158126232314256, "grad_norm": 1.6112496852874756, "learning_rate": 1.477394534685795e-05, "loss": 0.3921, "step": 15765 }, { "epoch": 0.34168959764262347, "grad_norm": 1.4333655834197998, "learning_rate": 1.477095449225118e-05, "loss": 0.4098, "step": 15770 }, { "epoch": 0.3417979329621043, "grad_norm": 2.2276225090026855, "learning_rate": 1.4767963085000802e-05, "loss": 0.3141, "step": 15775 }, { "epoch": 0.34190626828158516, "grad_norm": 1.6192518472671509, "learning_rate": 1.4764971125453324e-05, "loss": 0.352, "step": 15780 }, { "epoch": 0.342014603601066, "grad_norm": 1.8229734897613525, "learning_rate": 1.4761978613955323e-05, "loss": 0.3299, "step": 15785 }, { "epoch": 0.3421229389205469, "grad_norm": 1.57587730884552, "learning_rate": 1.4758985550853428e-05, "loss": 0.3161, "step": 15790 }, { "epoch": 0.34223127424002775, "grad_norm": 1.0527150630950928, "learning_rate": 1.4755991936494352e-05, "loss": 0.2987, "step": 15795 }, { "epoch": 0.3423396095595086, "grad_norm": 1.3659992218017578, "learning_rate": 1.4752997771224853e-05, "loss": 0.2747, "step": 15800 }, { "epoch": 0.34244794487898944, "grad_norm": 1.4371846914291382, "learning_rate": 1.4750003055391765e-05, "loss": 0.255, "step": 15805 }, { "epoch": 0.3425562801984703, "grad_norm": 1.2735096216201782, "learning_rate": 1.4747007789341974e-05, "loss": 0.3037, "step": 15810 }, { "epoch": 0.3426646155179512, "grad_norm": 1.0812991857528687, "learning_rate": 1.4744011973422441e-05, "loss": 0.2699, "step": 15815 }, { "epoch": 0.34277295083743203, "grad_norm": 1.5595319271087646, "learning_rate": 1.474101560798019e-05, "loss": 0.2495, "step": 15820 }, { "epoch": 0.3428812861569129, "grad_norm": 1.4452269077301025, "learning_rate": 1.4738018693362296e-05, "loss": 0.3644, "step": 15825 }, { "epoch": 0.3429896214763937, "grad_norm": 1.762069821357727, "learning_rate": 1.4735021229915916e-05, "loss": 0.2862, "step": 15830 }, { "epoch": 0.34309795679587457, "grad_norm": 1.4857187271118164, "learning_rate": 1.4732023217988256e-05, "loss": 0.302, "step": 15835 }, { "epoch": 0.34320629211535547, "grad_norm": 1.418006181716919, "learning_rate": 1.4729024657926589e-05, "loss": 0.398, "step": 15840 }, { "epoch": 0.3433146274348363, "grad_norm": 1.5149354934692383, "learning_rate": 1.4726025550078257e-05, "loss": 0.2935, "step": 15845 }, { "epoch": 0.34342296275431716, "grad_norm": 1.4531724452972412, "learning_rate": 1.4723025894790665e-05, "loss": 0.2928, "step": 15850 }, { "epoch": 0.343531298073798, "grad_norm": 1.5213984251022339, "learning_rate": 1.472002569241127e-05, "loss": 0.3643, "step": 15855 }, { "epoch": 0.34363963339327885, "grad_norm": 1.6499375104904175, "learning_rate": 1.471702494328761e-05, "loss": 0.3067, "step": 15860 }, { "epoch": 0.34374796871275975, "grad_norm": 1.526719093322754, "learning_rate": 1.4714023647767265e-05, "loss": 0.228, "step": 15865 }, { "epoch": 0.3438563040322406, "grad_norm": 1.6785777807235718, "learning_rate": 1.47110218061979e-05, "loss": 0.3278, "step": 15870 }, { "epoch": 0.34396463935172145, "grad_norm": 1.2782092094421387, "learning_rate": 1.4708019418927228e-05, "loss": 0.2376, "step": 15875 }, { "epoch": 0.3440729746712023, "grad_norm": 1.2034907341003418, "learning_rate": 1.4705016486303034e-05, "loss": 0.2837, "step": 15880 }, { "epoch": 0.34418130999068314, "grad_norm": 0.9189687371253967, "learning_rate": 1.470201300867316e-05, "loss": 0.3995, "step": 15885 }, { "epoch": 0.34428964531016404, "grad_norm": 1.6446752548217773, "learning_rate": 1.4699008986385515e-05, "loss": 0.4178, "step": 15890 }, { "epoch": 0.3443979806296449, "grad_norm": 1.3443856239318848, "learning_rate": 1.4696004419788072e-05, "loss": 0.2642, "step": 15895 }, { "epoch": 0.34450631594912573, "grad_norm": 2.2821388244628906, "learning_rate": 1.4692999309228861e-05, "loss": 0.2901, "step": 15900 }, { "epoch": 0.3446146512686066, "grad_norm": 2.01495361328125, "learning_rate": 1.4689993655055983e-05, "loss": 0.3667, "step": 15905 }, { "epoch": 0.3447229865880874, "grad_norm": 1.4340075254440308, "learning_rate": 1.4686987457617594e-05, "loss": 0.3383, "step": 15910 }, { "epoch": 0.3448313219075683, "grad_norm": 1.4750196933746338, "learning_rate": 1.4683980717261918e-05, "loss": 0.255, "step": 15915 }, { "epoch": 0.34493965722704917, "grad_norm": 1.2949378490447998, "learning_rate": 1.4680973434337245e-05, "loss": 0.2596, "step": 15920 }, { "epoch": 0.34504799254653, "grad_norm": 1.3222222328186035, "learning_rate": 1.467796560919192e-05, "loss": 0.4219, "step": 15925 }, { "epoch": 0.34515632786601086, "grad_norm": 1.3107852935791016, "learning_rate": 1.4674957242174355e-05, "loss": 0.2596, "step": 15930 }, { "epoch": 0.34526466318549176, "grad_norm": 1.7274830341339111, "learning_rate": 1.4671948333633024e-05, "loss": 0.1633, "step": 15935 }, { "epoch": 0.3453729985049726, "grad_norm": 1.479455590248108, "learning_rate": 1.4668938883916463e-05, "loss": 0.271, "step": 15940 }, { "epoch": 0.34548133382445345, "grad_norm": 1.0451711416244507, "learning_rate": 1.4665928893373276e-05, "loss": 0.3434, "step": 15945 }, { "epoch": 0.3455896691439343, "grad_norm": 1.733400821685791, "learning_rate": 1.466291836235212e-05, "loss": 0.3152, "step": 15950 }, { "epoch": 0.34569800446341514, "grad_norm": 1.3529447317123413, "learning_rate": 1.4659907291201725e-05, "loss": 0.2433, "step": 15955 }, { "epoch": 0.34580633978289604, "grad_norm": 1.1857165098190308, "learning_rate": 1.465689568027087e-05, "loss": 0.4793, "step": 15960 }, { "epoch": 0.3459146751023769, "grad_norm": 1.4557316303253174, "learning_rate": 1.4653883529908415e-05, "loss": 0.2635, "step": 15965 }, { "epoch": 0.34602301042185774, "grad_norm": 1.687828779220581, "learning_rate": 1.465087084046327e-05, "loss": 0.3033, "step": 15970 }, { "epoch": 0.3461313457413386, "grad_norm": 1.282869815826416, "learning_rate": 1.4647857612284405e-05, "loss": 0.3087, "step": 15975 }, { "epoch": 0.3462396810608194, "grad_norm": 1.7437046766281128, "learning_rate": 1.4644843845720861e-05, "loss": 0.2731, "step": 15980 }, { "epoch": 0.3463480163803003, "grad_norm": 1.0111360549926758, "learning_rate": 1.4641829541121739e-05, "loss": 0.3664, "step": 15985 }, { "epoch": 0.3464563516997812, "grad_norm": 1.6380630731582642, "learning_rate": 1.4638814698836196e-05, "loss": 0.3026, "step": 15990 }, { "epoch": 0.346564687019262, "grad_norm": 1.2289161682128906, "learning_rate": 1.4635799319213462e-05, "loss": 0.4091, "step": 15995 }, { "epoch": 0.34667302233874286, "grad_norm": 1.3089250326156616, "learning_rate": 1.4632783402602822e-05, "loss": 0.2634, "step": 16000 }, { "epoch": 0.3467813576582237, "grad_norm": 1.6061142683029175, "learning_rate": 1.4629766949353621e-05, "loss": 0.2638, "step": 16005 }, { "epoch": 0.3468896929777046, "grad_norm": 1.746922254562378, "learning_rate": 1.4626749959815275e-05, "loss": 0.3476, "step": 16010 }, { "epoch": 0.34699802829718546, "grad_norm": 2.368384599685669, "learning_rate": 1.4623732434337253e-05, "loss": 0.286, "step": 16015 }, { "epoch": 0.3471063636166663, "grad_norm": 1.2455389499664307, "learning_rate": 1.4620714373269096e-05, "loss": 0.3126, "step": 16020 }, { "epoch": 0.34721469893614715, "grad_norm": 1.1495577096939087, "learning_rate": 1.4617695776960394e-05, "loss": 0.3728, "step": 16025 }, { "epoch": 0.347323034255628, "grad_norm": 1.6638580560684204, "learning_rate": 1.461467664576081e-05, "loss": 0.3075, "step": 16030 }, { "epoch": 0.3474313695751089, "grad_norm": 1.4342814683914185, "learning_rate": 1.4611656980020062e-05, "loss": 0.2993, "step": 16035 }, { "epoch": 0.34753970489458974, "grad_norm": 1.8951289653778076, "learning_rate": 1.4608636780087937e-05, "loss": 0.3321, "step": 16040 }, { "epoch": 0.3476480402140706, "grad_norm": 1.2529088258743286, "learning_rate": 1.460561604631428e-05, "loss": 0.338, "step": 16045 }, { "epoch": 0.34775637553355143, "grad_norm": 1.3935030698776245, "learning_rate": 1.4602594779048995e-05, "loss": 0.2674, "step": 16050 }, { "epoch": 0.34786471085303233, "grad_norm": 1.5965825319290161, "learning_rate": 1.4599572978642052e-05, "loss": 0.2611, "step": 16055 }, { "epoch": 0.3479730461725132, "grad_norm": 1.0263066291809082, "learning_rate": 1.4596550645443482e-05, "loss": 0.2872, "step": 16060 }, { "epoch": 0.348081381491994, "grad_norm": 1.8981541395187378, "learning_rate": 1.4593527779803374e-05, "loss": 0.3925, "step": 16065 }, { "epoch": 0.34818971681147487, "grad_norm": 1.5049368143081665, "learning_rate": 1.4590504382071885e-05, "loss": 0.3017, "step": 16070 }, { "epoch": 0.3482980521309557, "grad_norm": 2.1671183109283447, "learning_rate": 1.458748045259923e-05, "loss": 0.3785, "step": 16075 }, { "epoch": 0.3484063874504366, "grad_norm": 1.9244273900985718, "learning_rate": 1.458445599173568e-05, "loss": 0.3161, "step": 16080 }, { "epoch": 0.34851472276991746, "grad_norm": 1.39460027217865, "learning_rate": 1.4581430999831583e-05, "loss": 0.2434, "step": 16085 }, { "epoch": 0.3486230580893983, "grad_norm": 2.011112689971924, "learning_rate": 1.4578405477237334e-05, "loss": 0.3334, "step": 16090 }, { "epoch": 0.34873139340887915, "grad_norm": 1.3439879417419434, "learning_rate": 1.4575379424303395e-05, "loss": 0.2961, "step": 16095 }, { "epoch": 0.34883972872836, "grad_norm": 0.9806525707244873, "learning_rate": 1.4572352841380288e-05, "loss": 0.2418, "step": 16100 }, { "epoch": 0.3489480640478409, "grad_norm": 1.6823233366012573, "learning_rate": 1.45693257288186e-05, "loss": 0.3051, "step": 16105 }, { "epoch": 0.34905639936732175, "grad_norm": 1.444746494293213, "learning_rate": 1.4566298086968973e-05, "loss": 0.3759, "step": 16110 }, { "epoch": 0.3491647346868026, "grad_norm": 2.548699140548706, "learning_rate": 1.4563269916182117e-05, "loss": 0.3404, "step": 16115 }, { "epoch": 0.34927307000628344, "grad_norm": 1.6063005924224854, "learning_rate": 1.45602412168088e-05, "loss": 0.2189, "step": 16120 }, { "epoch": 0.3493814053257643, "grad_norm": 1.3473260402679443, "learning_rate": 1.4557211989199847e-05, "loss": 0.3273, "step": 16125 }, { "epoch": 0.3494897406452452, "grad_norm": 1.5749200582504272, "learning_rate": 1.4554182233706154e-05, "loss": 0.2776, "step": 16130 }, { "epoch": 0.34959807596472603, "grad_norm": 1.91506028175354, "learning_rate": 1.455115195067867e-05, "loss": 0.342, "step": 16135 }, { "epoch": 0.3497064112842069, "grad_norm": 1.2138270139694214, "learning_rate": 1.454812114046841e-05, "loss": 0.3434, "step": 16140 }, { "epoch": 0.3498147466036877, "grad_norm": 1.519484281539917, "learning_rate": 1.4545089803426447e-05, "loss": 0.3227, "step": 16145 }, { "epoch": 0.34992308192316857, "grad_norm": 2.2019147872924805, "learning_rate": 1.4542057939903915e-05, "loss": 0.3574, "step": 16150 }, { "epoch": 0.35003141724264947, "grad_norm": 1.6570301055908203, "learning_rate": 1.4539025550252009e-05, "loss": 0.2765, "step": 16155 }, { "epoch": 0.3501397525621303, "grad_norm": 2.2365851402282715, "learning_rate": 1.4535992634821989e-05, "loss": 0.2971, "step": 16160 }, { "epoch": 0.35024808788161116, "grad_norm": 1.8196052312850952, "learning_rate": 1.4532959193965172e-05, "loss": 0.2847, "step": 16165 }, { "epoch": 0.350356423201092, "grad_norm": 1.2251640558242798, "learning_rate": 1.4529925228032938e-05, "loss": 0.3592, "step": 16170 }, { "epoch": 0.35046475852057285, "grad_norm": 1.2554291486740112, "learning_rate": 1.452689073737672e-05, "loss": 0.326, "step": 16175 }, { "epoch": 0.35057309384005375, "grad_norm": 1.712120532989502, "learning_rate": 1.4523855722348026e-05, "loss": 0.2241, "step": 16180 }, { "epoch": 0.3506814291595346, "grad_norm": 1.2699358463287354, "learning_rate": 1.4520820183298414e-05, "loss": 0.2681, "step": 16185 }, { "epoch": 0.35078976447901544, "grad_norm": 1.791079044342041, "learning_rate": 1.4517784120579505e-05, "loss": 0.2888, "step": 16190 }, { "epoch": 0.3508980997984963, "grad_norm": 1.4049631357192993, "learning_rate": 1.4514747534542981e-05, "loss": 0.2697, "step": 16195 }, { "epoch": 0.3510064351179772, "grad_norm": 1.4511914253234863, "learning_rate": 1.4511710425540585e-05, "loss": 0.3646, "step": 16200 }, { "epoch": 0.35111477043745803, "grad_norm": 1.3543505668640137, "learning_rate": 1.4508672793924123e-05, "loss": 0.3554, "step": 16205 }, { "epoch": 0.3512231057569389, "grad_norm": 1.0993884801864624, "learning_rate": 1.4505634640045458e-05, "loss": 0.3697, "step": 16210 }, { "epoch": 0.3513314410764197, "grad_norm": 2.018676280975342, "learning_rate": 1.4502595964256515e-05, "loss": 0.2738, "step": 16215 }, { "epoch": 0.35143977639590057, "grad_norm": 0.9869911670684814, "learning_rate": 1.4499556766909279e-05, "loss": 0.3329, "step": 16220 }, { "epoch": 0.3515481117153815, "grad_norm": 1.2948591709136963, "learning_rate": 1.4496517048355794e-05, "loss": 0.2916, "step": 16225 }, { "epoch": 0.3516564470348623, "grad_norm": 1.399859070777893, "learning_rate": 1.4493476808948168e-05, "loss": 0.3493, "step": 16230 }, { "epoch": 0.35176478235434316, "grad_norm": 1.8097872734069824, "learning_rate": 1.4490436049038565e-05, "loss": 0.3078, "step": 16235 }, { "epoch": 0.351873117673824, "grad_norm": 1.7441753149032593, "learning_rate": 1.448739476897921e-05, "loss": 0.3352, "step": 16240 }, { "epoch": 0.35198145299330486, "grad_norm": 1.4472447633743286, "learning_rate": 1.4484352969122398e-05, "loss": 0.276, "step": 16245 }, { "epoch": 0.35208978831278576, "grad_norm": 1.398617148399353, "learning_rate": 1.4481310649820462e-05, "loss": 0.2844, "step": 16250 }, { "epoch": 0.3521981236322666, "grad_norm": 1.570575475692749, "learning_rate": 1.4478267811425823e-05, "loss": 0.2715, "step": 16255 }, { "epoch": 0.35230645895174745, "grad_norm": 1.7451778650283813, "learning_rate": 1.4475224454290943e-05, "loss": 0.3013, "step": 16260 }, { "epoch": 0.3524147942712283, "grad_norm": 1.6354410648345947, "learning_rate": 1.4472180578768347e-05, "loss": 0.3567, "step": 16265 }, { "epoch": 0.35252312959070914, "grad_norm": 1.6655207872390747, "learning_rate": 1.4469136185210624e-05, "loss": 0.352, "step": 16270 }, { "epoch": 0.35263146491019004, "grad_norm": 1.7015632390975952, "learning_rate": 1.4466091273970419e-05, "loss": 0.3667, "step": 16275 }, { "epoch": 0.3527398002296709, "grad_norm": 1.6095036268234253, "learning_rate": 1.4463045845400445e-05, "loss": 0.2389, "step": 16280 }, { "epoch": 0.35284813554915173, "grad_norm": 1.7589304447174072, "learning_rate": 1.4459999899853467e-05, "loss": 0.2645, "step": 16285 }, { "epoch": 0.3529564708686326, "grad_norm": 1.205202579498291, "learning_rate": 1.445695343768231e-05, "loss": 0.2329, "step": 16290 }, { "epoch": 0.3530648061881134, "grad_norm": 1.3978933095932007, "learning_rate": 1.4453906459239863e-05, "loss": 0.3068, "step": 16295 }, { "epoch": 0.3531731415075943, "grad_norm": 1.6846060752868652, "learning_rate": 1.4450858964879073e-05, "loss": 0.4169, "step": 16300 }, { "epoch": 0.35328147682707517, "grad_norm": 1.281808614730835, "learning_rate": 1.4447810954952946e-05, "loss": 0.322, "step": 16305 }, { "epoch": 0.353389812146556, "grad_norm": 1.184394359588623, "learning_rate": 1.4444762429814545e-05, "loss": 0.4018, "step": 16310 }, { "epoch": 0.35349814746603686, "grad_norm": 1.9008803367614746, "learning_rate": 1.4441713389817002e-05, "loss": 0.2835, "step": 16315 }, { "epoch": 0.35360648278551776, "grad_norm": 1.4145197868347168, "learning_rate": 1.4438663835313498e-05, "loss": 0.3256, "step": 16320 }, { "epoch": 0.3537148181049986, "grad_norm": 1.4944193363189697, "learning_rate": 1.443561376665728e-05, "loss": 0.3677, "step": 16325 }, { "epoch": 0.35382315342447945, "grad_norm": 1.222936749458313, "learning_rate": 1.4432563184201653e-05, "loss": 0.2627, "step": 16330 }, { "epoch": 0.3539314887439603, "grad_norm": 1.2084165811538696, "learning_rate": 1.4429512088299982e-05, "loss": 0.3389, "step": 16335 }, { "epoch": 0.35403982406344114, "grad_norm": 1.4479219913482666, "learning_rate": 1.4426460479305688e-05, "loss": 0.2586, "step": 16340 }, { "epoch": 0.35414815938292205, "grad_norm": 1.6763406991958618, "learning_rate": 1.4423408357572255e-05, "loss": 0.2347, "step": 16345 }, { "epoch": 0.3542564947024029, "grad_norm": 1.2359617948532104, "learning_rate": 1.442035572345323e-05, "loss": 0.2716, "step": 16350 }, { "epoch": 0.35436483002188374, "grad_norm": 1.418459177017212, "learning_rate": 1.4417302577302207e-05, "loss": 0.2298, "step": 16355 }, { "epoch": 0.3544731653413646, "grad_norm": 1.4778733253479004, "learning_rate": 1.4414248919472855e-05, "loss": 0.1789, "step": 16360 }, { "epoch": 0.35458150066084543, "grad_norm": 1.199429988861084, "learning_rate": 1.441119475031889e-05, "loss": 0.2514, "step": 16365 }, { "epoch": 0.35468983598032633, "grad_norm": 0.91290682554245, "learning_rate": 1.4408140070194093e-05, "loss": 0.2641, "step": 16370 }, { "epoch": 0.3547981712998072, "grad_norm": 1.4684516191482544, "learning_rate": 1.44050848794523e-05, "loss": 0.3476, "step": 16375 }, { "epoch": 0.354906506619288, "grad_norm": 1.7480907440185547, "learning_rate": 1.4402029178447419e-05, "loss": 0.2988, "step": 16380 }, { "epoch": 0.35501484193876887, "grad_norm": 1.0134203433990479, "learning_rate": 1.4398972967533395e-05, "loss": 0.3338, "step": 16385 }, { "epoch": 0.3551231772582497, "grad_norm": 1.7439022064208984, "learning_rate": 1.439591624706425e-05, "loss": 0.2847, "step": 16390 }, { "epoch": 0.3552315125777306, "grad_norm": 1.3486833572387695, "learning_rate": 1.4392859017394056e-05, "loss": 0.2734, "step": 16395 }, { "epoch": 0.35533984789721146, "grad_norm": 1.431481122970581, "learning_rate": 1.4389801278876953e-05, "loss": 0.3219, "step": 16400 }, { "epoch": 0.3554481832166923, "grad_norm": 1.7236216068267822, "learning_rate": 1.438674303186713e-05, "loss": 0.3123, "step": 16405 }, { "epoch": 0.35555651853617315, "grad_norm": 1.4720103740692139, "learning_rate": 1.438368427671884e-05, "loss": 0.2675, "step": 16410 }, { "epoch": 0.355664853855654, "grad_norm": 3.1968963146209717, "learning_rate": 1.4380625013786396e-05, "loss": 0.325, "step": 16415 }, { "epoch": 0.3557731891751349, "grad_norm": 1.5779505968093872, "learning_rate": 1.4377565243424166e-05, "loss": 0.3612, "step": 16420 }, { "epoch": 0.35588152449461574, "grad_norm": 1.3958957195281982, "learning_rate": 1.4374504965986575e-05, "loss": 0.3498, "step": 16425 }, { "epoch": 0.3559898598140966, "grad_norm": 1.5475202798843384, "learning_rate": 1.4371444181828117e-05, "loss": 0.3298, "step": 16430 }, { "epoch": 0.35609819513357743, "grad_norm": 1.377938151359558, "learning_rate": 1.4368382891303335e-05, "loss": 0.2468, "step": 16435 }, { "epoch": 0.3562065304530583, "grad_norm": 1.478398084640503, "learning_rate": 1.436532109476683e-05, "loss": 0.2514, "step": 16440 }, { "epoch": 0.3563148657725392, "grad_norm": 1.9275504350662231, "learning_rate": 1.4362258792573269e-05, "loss": 0.2897, "step": 16445 }, { "epoch": 0.35642320109202, "grad_norm": 1.5311716794967651, "learning_rate": 1.4359195985077377e-05, "loss": 0.3018, "step": 16450 }, { "epoch": 0.35653153641150087, "grad_norm": 1.9345667362213135, "learning_rate": 1.435613267263393e-05, "loss": 0.3076, "step": 16455 }, { "epoch": 0.3566398717309817, "grad_norm": 1.2872875928878784, "learning_rate": 1.4353068855597766e-05, "loss": 0.336, "step": 16460 }, { "epoch": 0.3567482070504626, "grad_norm": 1.2455096244812012, "learning_rate": 1.4350004534323785e-05, "loss": 0.2168, "step": 16465 }, { "epoch": 0.35685654236994346, "grad_norm": 1.2391703128814697, "learning_rate": 1.4346939709166942e-05, "loss": 0.2386, "step": 16470 }, { "epoch": 0.3569648776894243, "grad_norm": 2.183898687362671, "learning_rate": 1.434387438048225e-05, "loss": 0.3062, "step": 16475 }, { "epoch": 0.35707321300890515, "grad_norm": 1.0683283805847168, "learning_rate": 1.4340808548624785e-05, "loss": 0.2548, "step": 16480 }, { "epoch": 0.357181548328386, "grad_norm": 1.7588815689086914, "learning_rate": 1.433774221394967e-05, "loss": 0.2284, "step": 16485 }, { "epoch": 0.3572898836478669, "grad_norm": 1.3325997591018677, "learning_rate": 1.4334675376812102e-05, "loss": 0.3557, "step": 16490 }, { "epoch": 0.35739821896734775, "grad_norm": 1.565898060798645, "learning_rate": 1.4331608037567324e-05, "loss": 0.3631, "step": 16495 }, { "epoch": 0.3575065542868286, "grad_norm": 1.6018270254135132, "learning_rate": 1.4328540196570644e-05, "loss": 0.3168, "step": 16500 }, { "epoch": 0.35761488960630944, "grad_norm": 1.0469576120376587, "learning_rate": 1.4325471854177424e-05, "loss": 0.2457, "step": 16505 }, { "epoch": 0.3577232249257903, "grad_norm": 1.4983437061309814, "learning_rate": 1.4322403010743085e-05, "loss": 0.3124, "step": 16510 }, { "epoch": 0.3578315602452712, "grad_norm": 1.44779372215271, "learning_rate": 1.4319333666623104e-05, "loss": 0.2597, "step": 16515 }, { "epoch": 0.35793989556475203, "grad_norm": 1.7677052021026611, "learning_rate": 1.4316263822173022e-05, "loss": 0.3036, "step": 16520 }, { "epoch": 0.3580482308842329, "grad_norm": 1.4430207014083862, "learning_rate": 1.4313193477748435e-05, "loss": 0.2361, "step": 16525 }, { "epoch": 0.3581565662037137, "grad_norm": 1.5613737106323242, "learning_rate": 1.4310122633704996e-05, "loss": 0.3682, "step": 16530 }, { "epoch": 0.35826490152319457, "grad_norm": 1.7019034624099731, "learning_rate": 1.4307051290398415e-05, "loss": 0.3375, "step": 16535 }, { "epoch": 0.35837323684267547, "grad_norm": 1.187227725982666, "learning_rate": 1.4303979448184461e-05, "loss": 0.3114, "step": 16540 }, { "epoch": 0.3584815721621563, "grad_norm": 1.2324657440185547, "learning_rate": 1.4300907107418961e-05, "loss": 0.2976, "step": 16545 }, { "epoch": 0.35858990748163716, "grad_norm": 1.4292551279067993, "learning_rate": 1.4297834268457803e-05, "loss": 0.2965, "step": 16550 }, { "epoch": 0.358698242801118, "grad_norm": 1.6207438707351685, "learning_rate": 1.4294760931656924e-05, "loss": 0.236, "step": 16555 }, { "epoch": 0.35880657812059885, "grad_norm": 1.5972731113433838, "learning_rate": 1.429168709737233e-05, "loss": 0.3048, "step": 16560 }, { "epoch": 0.35891491344007975, "grad_norm": 1.594241738319397, "learning_rate": 1.4288612765960068e-05, "loss": 0.3733, "step": 16565 }, { "epoch": 0.3590232487595606, "grad_norm": 1.9081733226776123, "learning_rate": 1.4285537937776266e-05, "loss": 0.3351, "step": 16570 }, { "epoch": 0.35913158407904144, "grad_norm": 1.7274750471115112, "learning_rate": 1.428246261317709e-05, "loss": 0.2591, "step": 16575 }, { "epoch": 0.3592399193985223, "grad_norm": 1.2917767763137817, "learning_rate": 1.4279386792518772e-05, "loss": 0.394, "step": 16580 }, { "epoch": 0.3593482547180032, "grad_norm": 1.9050544500350952, "learning_rate": 1.42763104761576e-05, "loss": 0.3545, "step": 16585 }, { "epoch": 0.35945659003748404, "grad_norm": 2.0947346687316895, "learning_rate": 1.4273233664449918e-05, "loss": 0.4302, "step": 16590 }, { "epoch": 0.3595649253569649, "grad_norm": 1.8910175561904907, "learning_rate": 1.4270156357752128e-05, "loss": 0.2896, "step": 16595 }, { "epoch": 0.3596732606764457, "grad_norm": 1.6273373365402222, "learning_rate": 1.426707855642069e-05, "loss": 0.274, "step": 16600 }, { "epoch": 0.3597815959959266, "grad_norm": 2.6170878410339355, "learning_rate": 1.4264000260812125e-05, "loss": 0.4204, "step": 16605 }, { "epoch": 0.3598899313154075, "grad_norm": 1.26129150390625, "learning_rate": 1.4260921471283e-05, "loss": 0.1932, "step": 16610 }, { "epoch": 0.3599982666348883, "grad_norm": 2.039090633392334, "learning_rate": 1.4257842188189954e-05, "loss": 0.3206, "step": 16615 }, { "epoch": 0.36010660195436917, "grad_norm": 1.7254048585891724, "learning_rate": 1.4254762411889673e-05, "loss": 0.1986, "step": 16620 }, { "epoch": 0.36021493727385, "grad_norm": 1.3475146293640137, "learning_rate": 1.4251682142738906e-05, "loss": 0.3794, "step": 16625 }, { "epoch": 0.36032327259333086, "grad_norm": 1.6103569269180298, "learning_rate": 1.4248601381094452e-05, "loss": 0.3967, "step": 16630 }, { "epoch": 0.36043160791281176, "grad_norm": 1.5940722227096558, "learning_rate": 1.4245520127313166e-05, "loss": 0.3705, "step": 16635 }, { "epoch": 0.3605399432322926, "grad_norm": 1.6484142541885376, "learning_rate": 1.4242438381751976e-05, "loss": 0.3113, "step": 16640 }, { "epoch": 0.36064827855177345, "grad_norm": 1.3925187587738037, "learning_rate": 1.423935614476785e-05, "loss": 0.2395, "step": 16645 }, { "epoch": 0.3607566138712543, "grad_norm": 1.7935930490493774, "learning_rate": 1.423627341671782e-05, "loss": 0.3419, "step": 16650 }, { "epoch": 0.36086494919073514, "grad_norm": 1.8132566213607788, "learning_rate": 1.4233190197958976e-05, "loss": 0.3618, "step": 16655 }, { "epoch": 0.36097328451021604, "grad_norm": 1.5383638143539429, "learning_rate": 1.4230106488848461e-05, "loss": 0.3151, "step": 16660 }, { "epoch": 0.3610816198296969, "grad_norm": 2.175097703933716, "learning_rate": 1.4227022289743472e-05, "loss": 0.3665, "step": 16665 }, { "epoch": 0.36118995514917773, "grad_norm": 1.2317208051681519, "learning_rate": 1.4223937601001276e-05, "loss": 0.325, "step": 16670 }, { "epoch": 0.3612982904686586, "grad_norm": 1.3188533782958984, "learning_rate": 1.422085242297918e-05, "loss": 0.2896, "step": 16675 }, { "epoch": 0.3614066257881394, "grad_norm": 2.4578919410705566, "learning_rate": 1.4217766756034563e-05, "loss": 0.3258, "step": 16680 }, { "epoch": 0.3615149611076203, "grad_norm": 1.4066040515899658, "learning_rate": 1.4214680600524843e-05, "loss": 0.3386, "step": 16685 }, { "epoch": 0.36162329642710117, "grad_norm": 1.6218211650848389, "learning_rate": 1.4211593956807516e-05, "loss": 0.283, "step": 16690 }, { "epoch": 0.361731631746582, "grad_norm": 1.554127812385559, "learning_rate": 1.420850682524012e-05, "loss": 0.285, "step": 16695 }, { "epoch": 0.36183996706606286, "grad_norm": 1.8990836143493652, "learning_rate": 1.420541920618025e-05, "loss": 0.2648, "step": 16700 }, { "epoch": 0.3619483023855437, "grad_norm": 1.7359602451324463, "learning_rate": 1.4202331099985562e-05, "loss": 0.3819, "step": 16705 }, { "epoch": 0.3620566377050246, "grad_norm": 1.5530048608779907, "learning_rate": 1.4199242507013769e-05, "loss": 0.2731, "step": 16710 }, { "epoch": 0.36216497302450545, "grad_norm": 1.6704820394515991, "learning_rate": 1.4196153427622635e-05, "loss": 0.331, "step": 16715 }, { "epoch": 0.3622733083439863, "grad_norm": 1.4025657176971436, "learning_rate": 1.4193063862169982e-05, "loss": 0.3112, "step": 16720 }, { "epoch": 0.36238164366346715, "grad_norm": 1.8293066024780273, "learning_rate": 1.4189973811013695e-05, "loss": 0.3443, "step": 16725 }, { "epoch": 0.36248997898294805, "grad_norm": 1.7249966859817505, "learning_rate": 1.4186883274511708e-05, "loss": 0.3728, "step": 16730 }, { "epoch": 0.3625983143024289, "grad_norm": 1.698185920715332, "learning_rate": 1.4183792253022012e-05, "loss": 0.2186, "step": 16735 }, { "epoch": 0.36270664962190974, "grad_norm": 1.5772286653518677, "learning_rate": 1.4180700746902658e-05, "loss": 0.3057, "step": 16740 }, { "epoch": 0.3628149849413906, "grad_norm": 2.022212505340576, "learning_rate": 1.4177608756511752e-05, "loss": 0.3433, "step": 16745 }, { "epoch": 0.36292332026087143, "grad_norm": 1.6075458526611328, "learning_rate": 1.4174516282207446e-05, "loss": 0.4061, "step": 16750 }, { "epoch": 0.36303165558035233, "grad_norm": 1.2723588943481445, "learning_rate": 1.4171423324347966e-05, "loss": 0.2973, "step": 16755 }, { "epoch": 0.3631399908998332, "grad_norm": 1.4745216369628906, "learning_rate": 1.416832988329158e-05, "loss": 0.3226, "step": 16760 }, { "epoch": 0.363248326219314, "grad_norm": 1.6754732131958008, "learning_rate": 1.4165235959396622e-05, "loss": 0.4763, "step": 16765 }, { "epoch": 0.36335666153879487, "grad_norm": 1.139835000038147, "learning_rate": 1.4162141553021472e-05, "loss": 0.3422, "step": 16770 }, { "epoch": 0.3634649968582757, "grad_norm": 1.3742769956588745, "learning_rate": 1.4159046664524571e-05, "loss": 0.306, "step": 16775 }, { "epoch": 0.3635733321777566, "grad_norm": 1.2656978368759155, "learning_rate": 1.4155951294264418e-05, "loss": 0.2942, "step": 16780 }, { "epoch": 0.36368166749723746, "grad_norm": 1.878178358078003, "learning_rate": 1.4152855442599562e-05, "loss": 0.3481, "step": 16785 }, { "epoch": 0.3637900028167183, "grad_norm": 1.1150230169296265, "learning_rate": 1.4149759109888614e-05, "loss": 0.2737, "step": 16790 }, { "epoch": 0.36389833813619915, "grad_norm": 1.9961082935333252, "learning_rate": 1.4146662296490236e-05, "loss": 0.2317, "step": 16795 }, { "epoch": 0.36400667345568, "grad_norm": 2.093069314956665, "learning_rate": 1.4143565002763149e-05, "loss": 0.2654, "step": 16800 }, { "epoch": 0.3641150087751609, "grad_norm": 2.3023815155029297, "learning_rate": 1.4140467229066124e-05, "loss": 0.3304, "step": 16805 }, { "epoch": 0.36422334409464174, "grad_norm": 1.3433672189712524, "learning_rate": 1.4137368975757996e-05, "loss": 0.2515, "step": 16810 }, { "epoch": 0.3643316794141226, "grad_norm": 1.5569329261779785, "learning_rate": 1.4134270243197653e-05, "loss": 0.372, "step": 16815 }, { "epoch": 0.36444001473360343, "grad_norm": 1.561952829360962, "learning_rate": 1.413117103174403e-05, "loss": 0.318, "step": 16820 }, { "epoch": 0.3645483500530843, "grad_norm": 1.2840348482131958, "learning_rate": 1.4128071341756129e-05, "loss": 0.2801, "step": 16825 }, { "epoch": 0.3646566853725652, "grad_norm": 1.085257887840271, "learning_rate": 1.4124971173593002e-05, "loss": 0.2899, "step": 16830 }, { "epoch": 0.364765020692046, "grad_norm": 2.189958333969116, "learning_rate": 1.4121870527613757e-05, "loss": 0.2791, "step": 16835 }, { "epoch": 0.3648733560115269, "grad_norm": 1.4728529453277588, "learning_rate": 1.4118769404177557e-05, "loss": 0.336, "step": 16840 }, { "epoch": 0.3649816913310077, "grad_norm": 1.7330466508865356, "learning_rate": 1.411566780364362e-05, "loss": 0.3233, "step": 16845 }, { "epoch": 0.3650900266504886, "grad_norm": 2.130906105041504, "learning_rate": 1.4112565726371219e-05, "loss": 0.2248, "step": 16850 }, { "epoch": 0.36519836196996946, "grad_norm": 1.0387163162231445, "learning_rate": 1.4109463172719686e-05, "loss": 0.2822, "step": 16855 }, { "epoch": 0.3653066972894503, "grad_norm": 1.3544691801071167, "learning_rate": 1.4106360143048405e-05, "loss": 0.3712, "step": 16860 }, { "epoch": 0.36541503260893116, "grad_norm": 1.2401071786880493, "learning_rate": 1.4103256637716817e-05, "loss": 0.4006, "step": 16865 }, { "epoch": 0.365523367928412, "grad_norm": 1.5367302894592285, "learning_rate": 1.4100152657084411e-05, "loss": 0.2926, "step": 16870 }, { "epoch": 0.3656317032478929, "grad_norm": 1.513391375541687, "learning_rate": 1.409704820151074e-05, "loss": 0.3108, "step": 16875 }, { "epoch": 0.36574003856737375, "grad_norm": 1.3953074216842651, "learning_rate": 1.409394327135541e-05, "loss": 0.2595, "step": 16880 }, { "epoch": 0.3658483738868546, "grad_norm": 1.8094022274017334, "learning_rate": 1.4090837866978078e-05, "loss": 0.4021, "step": 16885 }, { "epoch": 0.36595670920633544, "grad_norm": 1.4666285514831543, "learning_rate": 1.408773198873846e-05, "loss": 0.2139, "step": 16890 }, { "epoch": 0.3660650445258163, "grad_norm": 1.4729762077331543, "learning_rate": 1.4084625636996328e-05, "loss": 0.3167, "step": 16895 }, { "epoch": 0.3661733798452972, "grad_norm": 1.6690336465835571, "learning_rate": 1.4081518812111502e-05, "loss": 0.3705, "step": 16900 }, { "epoch": 0.36628171516477803, "grad_norm": 1.4966758489608765, "learning_rate": 1.4078411514443862e-05, "loss": 0.2736, "step": 16905 }, { "epoch": 0.3663900504842589, "grad_norm": 1.485365390777588, "learning_rate": 1.4075303744353344e-05, "loss": 0.2635, "step": 16910 }, { "epoch": 0.3664983858037397, "grad_norm": 1.0480388402938843, "learning_rate": 1.4072195502199933e-05, "loss": 0.2187, "step": 16915 }, { "epoch": 0.36660672112322057, "grad_norm": 1.4582520723342896, "learning_rate": 1.4069086788343675e-05, "loss": 0.3149, "step": 16920 }, { "epoch": 0.36671505644270147, "grad_norm": 1.5864593982696533, "learning_rate": 1.4065977603144666e-05, "loss": 0.4029, "step": 16925 }, { "epoch": 0.3668233917621823, "grad_norm": 1.6158952713012695, "learning_rate": 1.4062867946963064e-05, "loss": 0.3159, "step": 16930 }, { "epoch": 0.36693172708166316, "grad_norm": 1.369005560874939, "learning_rate": 1.405975782015907e-05, "loss": 0.289, "step": 16935 }, { "epoch": 0.367040062401144, "grad_norm": 1.185073733329773, "learning_rate": 1.4056647223092948e-05, "loss": 0.2779, "step": 16940 }, { "epoch": 0.36714839772062485, "grad_norm": 1.3529900312423706, "learning_rate": 1.4053536156125013e-05, "loss": 0.2377, "step": 16945 }, { "epoch": 0.36725673304010575, "grad_norm": 1.1597164869308472, "learning_rate": 1.4050424619615637e-05, "loss": 0.2852, "step": 16950 }, { "epoch": 0.3673650683595866, "grad_norm": 1.3127976655960083, "learning_rate": 1.4047312613925246e-05, "loss": 0.3195, "step": 16955 }, { "epoch": 0.36747340367906745, "grad_norm": 1.4284286499023438, "learning_rate": 1.4044200139414317e-05, "loss": 0.2941, "step": 16960 }, { "epoch": 0.3675817389985483, "grad_norm": 1.6388765573501587, "learning_rate": 1.4041087196443381e-05, "loss": 0.1935, "step": 16965 }, { "epoch": 0.36769007431802914, "grad_norm": 1.3937978744506836, "learning_rate": 1.4037973785373031e-05, "loss": 0.303, "step": 16970 }, { "epoch": 0.36779840963751004, "grad_norm": 1.9772933721542358, "learning_rate": 1.4034859906563905e-05, "loss": 0.3788, "step": 16975 }, { "epoch": 0.3679067449569909, "grad_norm": 1.3700650930404663, "learning_rate": 1.4031745560376701e-05, "loss": 0.2266, "step": 16980 }, { "epoch": 0.36801508027647173, "grad_norm": 1.4182302951812744, "learning_rate": 1.4028630747172171e-05, "loss": 0.3509, "step": 16985 }, { "epoch": 0.3681234155959526, "grad_norm": 1.508299708366394, "learning_rate": 1.4025515467311119e-05, "loss": 0.3354, "step": 16990 }, { "epoch": 0.3682317509154335, "grad_norm": 1.7345094680786133, "learning_rate": 1.40223997211544e-05, "loss": 0.4158, "step": 16995 }, { "epoch": 0.3683400862349143, "grad_norm": 2.0180745124816895, "learning_rate": 1.4019283509062929e-05, "loss": 0.3912, "step": 17000 }, { "epoch": 0.36844842155439517, "grad_norm": 1.9136141538619995, "learning_rate": 1.4016166831397673e-05, "loss": 0.2223, "step": 17005 }, { "epoch": 0.368556756873876, "grad_norm": 1.805358648300171, "learning_rate": 1.4013049688519654e-05, "loss": 0.3946, "step": 17010 }, { "epoch": 0.36866509219335686, "grad_norm": 1.7882112264633179, "learning_rate": 1.400993208078994e-05, "loss": 0.3913, "step": 17015 }, { "epoch": 0.36877342751283776, "grad_norm": 1.2375339269638062, "learning_rate": 1.4006814008569664e-05, "loss": 0.2834, "step": 17020 }, { "epoch": 0.3688817628323186, "grad_norm": 1.5308637619018555, "learning_rate": 1.4003695472220012e-05, "loss": 0.3277, "step": 17025 }, { "epoch": 0.36899009815179945, "grad_norm": 1.9292913675308228, "learning_rate": 1.400057647210221e-05, "loss": 0.2428, "step": 17030 }, { "epoch": 0.3690984334712803, "grad_norm": 1.4609860181808472, "learning_rate": 1.3997457008577554e-05, "loss": 0.4069, "step": 17035 }, { "epoch": 0.36920676879076114, "grad_norm": 1.2322710752487183, "learning_rate": 1.3994337082007382e-05, "loss": 0.3664, "step": 17040 }, { "epoch": 0.36931510411024204, "grad_norm": 1.4461156129837036, "learning_rate": 1.3991216692753099e-05, "loss": 0.3287, "step": 17045 }, { "epoch": 0.3694234394297229, "grad_norm": 1.511629343032837, "learning_rate": 1.3988095841176149e-05, "loss": 0.3921, "step": 17050 }, { "epoch": 0.36953177474920373, "grad_norm": 1.360264539718628, "learning_rate": 1.3984974527638036e-05, "loss": 0.312, "step": 17055 }, { "epoch": 0.3696401100686846, "grad_norm": 1.7232130765914917, "learning_rate": 1.398185275250032e-05, "loss": 0.3972, "step": 17060 }, { "epoch": 0.3697484453881654, "grad_norm": 1.9129830598831177, "learning_rate": 1.397873051612461e-05, "loss": 0.2636, "step": 17065 }, { "epoch": 0.3698567807076463, "grad_norm": 1.9256004095077515, "learning_rate": 1.3975607818872573e-05, "loss": 0.317, "step": 17070 }, { "epoch": 0.36996511602712717, "grad_norm": 1.7156729698181152, "learning_rate": 1.3972484661105922e-05, "loss": 0.3314, "step": 17075 }, { "epoch": 0.370073451346608, "grad_norm": 1.517404317855835, "learning_rate": 1.3969361043186433e-05, "loss": 0.3249, "step": 17080 }, { "epoch": 0.37018178666608886, "grad_norm": 1.5873793363571167, "learning_rate": 1.396623696547593e-05, "loss": 0.2806, "step": 17085 }, { "epoch": 0.3702901219855697, "grad_norm": 2.367926597595215, "learning_rate": 1.3963112428336286e-05, "loss": 0.2502, "step": 17090 }, { "epoch": 0.3703984573050506, "grad_norm": 1.5232579708099365, "learning_rate": 1.3959987432129435e-05, "loss": 0.1874, "step": 17095 }, { "epoch": 0.37050679262453146, "grad_norm": 1.4187010526657104, "learning_rate": 1.395686197721736e-05, "loss": 0.3397, "step": 17100 }, { "epoch": 0.3706151279440123, "grad_norm": 1.2538036108016968, "learning_rate": 1.3953736063962097e-05, "loss": 0.3146, "step": 17105 }, { "epoch": 0.37072346326349315, "grad_norm": 1.4004255533218384, "learning_rate": 1.3950609692725743e-05, "loss": 0.3174, "step": 17110 }, { "epoch": 0.37083179858297405, "grad_norm": 0.6369525790214539, "learning_rate": 1.3947482863870431e-05, "loss": 0.281, "step": 17115 }, { "epoch": 0.3709401339024549, "grad_norm": 1.2252027988433838, "learning_rate": 1.3944355577758366e-05, "loss": 0.2947, "step": 17120 }, { "epoch": 0.37104846922193574, "grad_norm": 2.668935775756836, "learning_rate": 1.3941227834751793e-05, "loss": 0.2697, "step": 17125 }, { "epoch": 0.3711568045414166, "grad_norm": 2.1359524726867676, "learning_rate": 1.3938099635213016e-05, "loss": 0.3432, "step": 17130 }, { "epoch": 0.37126513986089743, "grad_norm": 1.396087408065796, "learning_rate": 1.3934970979504388e-05, "loss": 0.4106, "step": 17135 }, { "epoch": 0.37137347518037833, "grad_norm": 1.0589720010757446, "learning_rate": 1.3931841867988313e-05, "loss": 0.319, "step": 17140 }, { "epoch": 0.3714818104998592, "grad_norm": 1.7754589319229126, "learning_rate": 1.3928712301027265e-05, "loss": 0.3135, "step": 17145 }, { "epoch": 0.37159014581934, "grad_norm": 1.332457184791565, "learning_rate": 1.3925582278983745e-05, "loss": 0.3476, "step": 17150 }, { "epoch": 0.37169848113882087, "grad_norm": 1.345413088798523, "learning_rate": 1.3922451802220326e-05, "loss": 0.2446, "step": 17155 }, { "epoch": 0.3718068164583017, "grad_norm": 1.140554666519165, "learning_rate": 1.391932087109962e-05, "loss": 0.2464, "step": 17160 }, { "epoch": 0.3719151517777826, "grad_norm": 1.46259605884552, "learning_rate": 1.3916189485984302e-05, "loss": 0.2435, "step": 17165 }, { "epoch": 0.37202348709726346, "grad_norm": 1.6687668561935425, "learning_rate": 1.3913057647237099e-05, "loss": 0.3166, "step": 17170 }, { "epoch": 0.3721318224167443, "grad_norm": 1.3687431812286377, "learning_rate": 1.3909925355220784e-05, "loss": 0.276, "step": 17175 }, { "epoch": 0.37224015773622515, "grad_norm": 1.1865718364715576, "learning_rate": 1.3906792610298187e-05, "loss": 0.3556, "step": 17180 }, { "epoch": 0.372348493055706, "grad_norm": 2.1637179851531982, "learning_rate": 1.3903659412832191e-05, "loss": 0.2105, "step": 17185 }, { "epoch": 0.3724568283751869, "grad_norm": 0.8866034746170044, "learning_rate": 1.390052576318573e-05, "loss": 0.3319, "step": 17190 }, { "epoch": 0.37256516369466774, "grad_norm": 1.5665507316589355, "learning_rate": 1.3897391661721788e-05, "loss": 0.3315, "step": 17195 }, { "epoch": 0.3726734990141486, "grad_norm": 1.0581005811691284, "learning_rate": 1.3894257108803406e-05, "loss": 0.348, "step": 17200 }, { "epoch": 0.37278183433362944, "grad_norm": 1.3317643404006958, "learning_rate": 1.3891122104793673e-05, "loss": 0.4227, "step": 17205 }, { "epoch": 0.3728901696531103, "grad_norm": 1.3266080617904663, "learning_rate": 1.3887986650055732e-05, "loss": 0.3332, "step": 17210 }, { "epoch": 0.3729985049725912, "grad_norm": 1.1170032024383545, "learning_rate": 1.3884850744952778e-05, "loss": 0.3941, "step": 17215 }, { "epoch": 0.37310684029207203, "grad_norm": 1.6364696025848389, "learning_rate": 1.3881714389848064e-05, "loss": 0.2303, "step": 17220 }, { "epoch": 0.3732151756115529, "grad_norm": 1.3517241477966309, "learning_rate": 1.3878577585104885e-05, "loss": 0.2927, "step": 17225 }, { "epoch": 0.3733235109310337, "grad_norm": 2.667485237121582, "learning_rate": 1.3875440331086596e-05, "loss": 0.29, "step": 17230 }, { "epoch": 0.37343184625051457, "grad_norm": 1.3230091333389282, "learning_rate": 1.3872302628156596e-05, "loss": 0.2473, "step": 17235 }, { "epoch": 0.37354018156999547, "grad_norm": 1.971595048904419, "learning_rate": 1.3869164476678343e-05, "loss": 0.2883, "step": 17240 }, { "epoch": 0.3736485168894763, "grad_norm": 1.274497628211975, "learning_rate": 1.3866025877015346e-05, "loss": 0.2198, "step": 17245 }, { "epoch": 0.37375685220895716, "grad_norm": 1.916037678718567, "learning_rate": 1.3862886829531164e-05, "loss": 0.2262, "step": 17250 }, { "epoch": 0.373865187528438, "grad_norm": 1.9524691104888916, "learning_rate": 1.3859747334589411e-05, "loss": 0.3506, "step": 17255 }, { "epoch": 0.3739735228479189, "grad_norm": 1.4216443300247192, "learning_rate": 1.3856607392553745e-05, "loss": 0.3673, "step": 17260 }, { "epoch": 0.37408185816739975, "grad_norm": 0.8033127188682556, "learning_rate": 1.3853467003787887e-05, "loss": 0.2631, "step": 17265 }, { "epoch": 0.3741901934868806, "grad_norm": 1.1771148443222046, "learning_rate": 1.3850326168655601e-05, "loss": 0.3249, "step": 17270 }, { "epoch": 0.37429852880636144, "grad_norm": 0.8396017551422119, "learning_rate": 1.3847184887520707e-05, "loss": 0.253, "step": 17275 }, { "epoch": 0.3744068641258423, "grad_norm": 1.4592386484146118, "learning_rate": 1.3844043160747074e-05, "loss": 0.2322, "step": 17280 }, { "epoch": 0.3745151994453232, "grad_norm": 1.2579597234725952, "learning_rate": 1.3840900988698623e-05, "loss": 0.3219, "step": 17285 }, { "epoch": 0.37462353476480403, "grad_norm": 1.5310271978378296, "learning_rate": 1.383775837173933e-05, "loss": 0.2337, "step": 17290 }, { "epoch": 0.3747318700842849, "grad_norm": 1.504982590675354, "learning_rate": 1.3834615310233219e-05, "loss": 0.2927, "step": 17295 }, { "epoch": 0.3748402054037657, "grad_norm": 1.2005733251571655, "learning_rate": 1.3831471804544369e-05, "loss": 0.3019, "step": 17300 }, { "epoch": 0.37494854072324657, "grad_norm": 1.5310693979263306, "learning_rate": 1.3828327855036902e-05, "loss": 0.2311, "step": 17305 }, { "epoch": 0.37505687604272747, "grad_norm": 1.3939297199249268, "learning_rate": 1.3825183462075007e-05, "loss": 0.287, "step": 17310 }, { "epoch": 0.3751652113622083, "grad_norm": 1.7093814611434937, "learning_rate": 1.3822038626022905e-05, "loss": 0.3661, "step": 17315 }, { "epoch": 0.37527354668168916, "grad_norm": 1.555901050567627, "learning_rate": 1.3818893347244884e-05, "loss": 0.3631, "step": 17320 }, { "epoch": 0.37538188200117, "grad_norm": 1.1065573692321777, "learning_rate": 1.3815747626105273e-05, "loss": 0.3406, "step": 17325 }, { "epoch": 0.37549021732065085, "grad_norm": 1.4656118154525757, "learning_rate": 1.3812601462968463e-05, "loss": 0.3022, "step": 17330 }, { "epoch": 0.37559855264013176, "grad_norm": 1.709990382194519, "learning_rate": 1.3809454858198883e-05, "loss": 0.2441, "step": 17335 }, { "epoch": 0.3757068879596126, "grad_norm": 2.2200663089752197, "learning_rate": 1.3806307812161025e-05, "loss": 0.3893, "step": 17340 }, { "epoch": 0.37581522327909345, "grad_norm": 1.6153472661972046, "learning_rate": 1.3803160325219427e-05, "loss": 0.2645, "step": 17345 }, { "epoch": 0.3759235585985743, "grad_norm": 1.5160791873931885, "learning_rate": 1.3800012397738675e-05, "loss": 0.463, "step": 17350 }, { "epoch": 0.37603189391805514, "grad_norm": 1.180928111076355, "learning_rate": 1.3796864030083412e-05, "loss": 0.4144, "step": 17355 }, { "epoch": 0.37614022923753604, "grad_norm": 1.8633495569229126, "learning_rate": 1.3793715222618325e-05, "loss": 0.2098, "step": 17360 }, { "epoch": 0.3762485645570169, "grad_norm": 1.6728148460388184, "learning_rate": 1.3790565975708164e-05, "loss": 0.2699, "step": 17365 }, { "epoch": 0.37635689987649773, "grad_norm": 2.0157768726348877, "learning_rate": 1.3787416289717714e-05, "loss": 0.3126, "step": 17370 }, { "epoch": 0.3764652351959786, "grad_norm": 1.9166370630264282, "learning_rate": 1.3784266165011823e-05, "loss": 0.3186, "step": 17375 }, { "epoch": 0.3765735705154595, "grad_norm": 1.6471933126449585, "learning_rate": 1.3781115601955382e-05, "loss": 0.2407, "step": 17380 }, { "epoch": 0.3766819058349403, "grad_norm": 0.993524432182312, "learning_rate": 1.3777964600913344e-05, "loss": 0.2556, "step": 17385 }, { "epoch": 0.37679024115442117, "grad_norm": 1.417204737663269, "learning_rate": 1.3774813162250702e-05, "loss": 0.3264, "step": 17390 }, { "epoch": 0.376898576473902, "grad_norm": 1.7884132862091064, "learning_rate": 1.3771661286332499e-05, "loss": 0.2115, "step": 17395 }, { "epoch": 0.37700691179338286, "grad_norm": 1.3998503684997559, "learning_rate": 1.3768508973523837e-05, "loss": 0.2944, "step": 17400 }, { "epoch": 0.37711524711286376, "grad_norm": 1.1539431810379028, "learning_rate": 1.3765356224189858e-05, "loss": 0.3232, "step": 17405 }, { "epoch": 0.3772235824323446, "grad_norm": 1.8018403053283691, "learning_rate": 1.3762203038695772e-05, "loss": 0.4133, "step": 17410 }, { "epoch": 0.37733191775182545, "grad_norm": 1.5070924758911133, "learning_rate": 1.3759049417406817e-05, "loss": 0.3165, "step": 17415 }, { "epoch": 0.3774402530713063, "grad_norm": 1.6856961250305176, "learning_rate": 1.37558953606883e-05, "loss": 0.3487, "step": 17420 }, { "epoch": 0.37754858839078714, "grad_norm": 1.8946762084960938, "learning_rate": 1.375274086890557e-05, "loss": 0.2537, "step": 17425 }, { "epoch": 0.37765692371026804, "grad_norm": 1.709586501121521, "learning_rate": 1.3749585942424024e-05, "loss": 0.2658, "step": 17430 }, { "epoch": 0.3777652590297489, "grad_norm": 1.5297163724899292, "learning_rate": 1.3746430581609117e-05, "loss": 0.3364, "step": 17435 }, { "epoch": 0.37787359434922974, "grad_norm": 1.564777135848999, "learning_rate": 1.3743274786826348e-05, "loss": 0.2202, "step": 17440 }, { "epoch": 0.3779819296687106, "grad_norm": 1.311716914176941, "learning_rate": 1.3740118558441268e-05, "loss": 0.33, "step": 17445 }, { "epoch": 0.3780902649881914, "grad_norm": 1.844140648841858, "learning_rate": 1.3736961896819483e-05, "loss": 0.3068, "step": 17450 }, { "epoch": 0.37819860030767233, "grad_norm": 1.6131587028503418, "learning_rate": 1.3733804802326636e-05, "loss": 0.3646, "step": 17455 }, { "epoch": 0.3783069356271532, "grad_norm": 1.6472833156585693, "learning_rate": 1.373064727532844e-05, "loss": 0.2853, "step": 17460 }, { "epoch": 0.378415270946634, "grad_norm": 2.295217275619507, "learning_rate": 1.372748931619064e-05, "loss": 0.409, "step": 17465 }, { "epoch": 0.37852360626611486, "grad_norm": 1.1594706773757935, "learning_rate": 1.3724330925279046e-05, "loss": 0.2793, "step": 17470 }, { "epoch": 0.3786319415855957, "grad_norm": 1.2011419534683228, "learning_rate": 1.37211721029595e-05, "loss": 0.2649, "step": 17475 }, { "epoch": 0.3787402769050766, "grad_norm": 1.3472115993499756, "learning_rate": 1.3718012849597906e-05, "loss": 0.3016, "step": 17480 }, { "epoch": 0.37884861222455746, "grad_norm": 1.1315903663635254, "learning_rate": 1.3714853165560223e-05, "loss": 0.4914, "step": 17485 }, { "epoch": 0.3789569475440383, "grad_norm": 1.4255471229553223, "learning_rate": 1.371169305121245e-05, "loss": 0.2836, "step": 17490 }, { "epoch": 0.37906528286351915, "grad_norm": 1.37417471408844, "learning_rate": 1.3708532506920637e-05, "loss": 0.341, "step": 17495 }, { "epoch": 0.379173618183, "grad_norm": 1.2526929378509521, "learning_rate": 1.3705371533050888e-05, "loss": 0.3, "step": 17500 }, { "epoch": 0.3792819535024809, "grad_norm": 1.5290768146514893, "learning_rate": 1.3702210129969348e-05, "loss": 0.2684, "step": 17505 }, { "epoch": 0.37939028882196174, "grad_norm": 1.6840702295303345, "learning_rate": 1.3699048298042232e-05, "loss": 0.3111, "step": 17510 }, { "epoch": 0.3794986241414426, "grad_norm": 1.3784579038619995, "learning_rate": 1.3695886037635777e-05, "loss": 0.3044, "step": 17515 }, { "epoch": 0.37960695946092343, "grad_norm": 2.747774839401245, "learning_rate": 1.3692723349116293e-05, "loss": 0.2457, "step": 17520 }, { "epoch": 0.37971529478040433, "grad_norm": 1.5211496353149414, "learning_rate": 1.368956023285012e-05, "loss": 0.3643, "step": 17525 }, { "epoch": 0.3798236300998852, "grad_norm": 1.4698312282562256, "learning_rate": 1.3686396689203667e-05, "loss": 0.3112, "step": 17530 }, { "epoch": 0.379931965419366, "grad_norm": 1.5266520977020264, "learning_rate": 1.368323271854338e-05, "loss": 0.3043, "step": 17535 }, { "epoch": 0.38004030073884687, "grad_norm": 2.1427152156829834, "learning_rate": 1.3680068321235758e-05, "loss": 0.2972, "step": 17540 }, { "epoch": 0.3801486360583277, "grad_norm": 1.4354915618896484, "learning_rate": 1.3676903497647346e-05, "loss": 0.4337, "step": 17545 }, { "epoch": 0.3802569713778086, "grad_norm": 1.581641674041748, "learning_rate": 1.3673738248144746e-05, "loss": 0.3977, "step": 17550 }, { "epoch": 0.38036530669728946, "grad_norm": 1.1008282899856567, "learning_rate": 1.36705725730946e-05, "loss": 0.1889, "step": 17555 }, { "epoch": 0.3804736420167703, "grad_norm": 2.174301862716675, "learning_rate": 1.366740647286361e-05, "loss": 0.332, "step": 17560 }, { "epoch": 0.38058197733625115, "grad_norm": 1.8554195165634155, "learning_rate": 1.3664239947818516e-05, "loss": 0.2331, "step": 17565 }, { "epoch": 0.380690312655732, "grad_norm": 1.5722025632858276, "learning_rate": 1.3661072998326115e-05, "loss": 0.2772, "step": 17570 }, { "epoch": 0.3807986479752129, "grad_norm": 1.4918373823165894, "learning_rate": 1.3657905624753247e-05, "loss": 0.3697, "step": 17575 }, { "epoch": 0.38090698329469375, "grad_norm": 1.9154142141342163, "learning_rate": 1.3654737827466811e-05, "loss": 0.2474, "step": 17580 }, { "epoch": 0.3810153186141746, "grad_norm": 1.3338546752929688, "learning_rate": 1.3651569606833746e-05, "loss": 0.3696, "step": 17585 }, { "epoch": 0.38112365393365544, "grad_norm": 1.0235031843185425, "learning_rate": 1.3648400963221042e-05, "loss": 0.226, "step": 17590 }, { "epoch": 0.3812319892531363, "grad_norm": 1.6054646968841553, "learning_rate": 1.364523189699574e-05, "loss": 0.3245, "step": 17595 }, { "epoch": 0.3813403245726172, "grad_norm": 1.8696539402008057, "learning_rate": 1.3642062408524926e-05, "loss": 0.3694, "step": 17600 }, { "epoch": 0.38144865989209803, "grad_norm": 1.7075461149215698, "learning_rate": 1.3638892498175742e-05, "loss": 0.2299, "step": 17605 }, { "epoch": 0.3815569952115789, "grad_norm": 2.017354965209961, "learning_rate": 1.3635722166315372e-05, "loss": 0.2656, "step": 17610 }, { "epoch": 0.3816653305310597, "grad_norm": 1.455859899520874, "learning_rate": 1.3632551413311053e-05, "loss": 0.3743, "step": 17615 }, { "epoch": 0.38177366585054057, "grad_norm": 1.424552321434021, "learning_rate": 1.3629380239530066e-05, "loss": 0.3459, "step": 17620 }, { "epoch": 0.38188200117002147, "grad_norm": 1.381110668182373, "learning_rate": 1.3626208645339747e-05, "loss": 0.3648, "step": 17625 }, { "epoch": 0.3819903364895023, "grad_norm": 1.279004693031311, "learning_rate": 1.362303663110748e-05, "loss": 0.241, "step": 17630 }, { "epoch": 0.38209867180898316, "grad_norm": 1.1983064413070679, "learning_rate": 1.361986419720069e-05, "loss": 0.3184, "step": 17635 }, { "epoch": 0.382207007128464, "grad_norm": 2.1463682651519775, "learning_rate": 1.3616691343986861e-05, "loss": 0.317, "step": 17640 }, { "epoch": 0.3823153424479449, "grad_norm": 1.8100744485855103, "learning_rate": 1.3613518071833512e-05, "loss": 0.2426, "step": 17645 }, { "epoch": 0.38242367776742575, "grad_norm": 1.312042474746704, "learning_rate": 1.3610344381108229e-05, "loss": 0.2602, "step": 17650 }, { "epoch": 0.3825320130869066, "grad_norm": 1.6392841339111328, "learning_rate": 1.3607170272178632e-05, "loss": 0.3061, "step": 17655 }, { "epoch": 0.38264034840638744, "grad_norm": 1.5944679975509644, "learning_rate": 1.3603995745412392e-05, "loss": 0.3525, "step": 17660 }, { "epoch": 0.3827486837258683, "grad_norm": 1.098048210144043, "learning_rate": 1.3600820801177236e-05, "loss": 0.2881, "step": 17665 }, { "epoch": 0.3828570190453492, "grad_norm": 0.8341870307922363, "learning_rate": 1.3597645439840931e-05, "loss": 0.2356, "step": 17670 }, { "epoch": 0.38296535436483004, "grad_norm": 1.803067922592163, "learning_rate": 1.3594469661771292e-05, "loss": 0.3618, "step": 17675 }, { "epoch": 0.3830736896843109, "grad_norm": 1.2064156532287598, "learning_rate": 1.359129346733619e-05, "loss": 0.4061, "step": 17680 }, { "epoch": 0.3831820250037917, "grad_norm": 1.38887619972229, "learning_rate": 1.3588116856903537e-05, "loss": 0.3718, "step": 17685 }, { "epoch": 0.38329036032327257, "grad_norm": 1.4095436334609985, "learning_rate": 1.3584939830841295e-05, "loss": 0.2737, "step": 17690 }, { "epoch": 0.3833986956427535, "grad_norm": 1.6421328783035278, "learning_rate": 1.3581762389517474e-05, "loss": 0.3932, "step": 17695 }, { "epoch": 0.3835070309622343, "grad_norm": 1.3636934757232666, "learning_rate": 1.3578584533300136e-05, "loss": 0.2573, "step": 17700 }, { "epoch": 0.38361536628171516, "grad_norm": 1.3677877187728882, "learning_rate": 1.357540626255739e-05, "loss": 0.3233, "step": 17705 }, { "epoch": 0.383723701601196, "grad_norm": 1.3867741823196411, "learning_rate": 1.3572227577657387e-05, "loss": 0.3036, "step": 17710 }, { "epoch": 0.38383203692067686, "grad_norm": 1.4310177564620972, "learning_rate": 1.3569048478968332e-05, "loss": 0.2118, "step": 17715 }, { "epoch": 0.38394037224015776, "grad_norm": 1.3239983320236206, "learning_rate": 1.356586896685847e-05, "loss": 0.3965, "step": 17720 }, { "epoch": 0.3840487075596386, "grad_norm": 1.4886565208435059, "learning_rate": 1.356268904169611e-05, "loss": 0.3011, "step": 17725 }, { "epoch": 0.38415704287911945, "grad_norm": 1.2810527086257935, "learning_rate": 1.3559508703849593e-05, "loss": 0.3163, "step": 17730 }, { "epoch": 0.3842653781986003, "grad_norm": 1.8926143646240234, "learning_rate": 1.3556327953687312e-05, "loss": 0.3235, "step": 17735 }, { "epoch": 0.38437371351808114, "grad_norm": 1.5291728973388672, "learning_rate": 1.3553146791577714e-05, "loss": 0.2345, "step": 17740 }, { "epoch": 0.38448204883756204, "grad_norm": 1.618668794631958, "learning_rate": 1.354996521788928e-05, "loss": 0.2223, "step": 17745 }, { "epoch": 0.3845903841570429, "grad_norm": 1.5145474672317505, "learning_rate": 1.3546783232990563e-05, "loss": 0.2422, "step": 17750 }, { "epoch": 0.38469871947652373, "grad_norm": 2.0466771125793457, "learning_rate": 1.3543600837250135e-05, "loss": 0.3252, "step": 17755 }, { "epoch": 0.3848070547960046, "grad_norm": 1.3387953042984009, "learning_rate": 1.3540418031036633e-05, "loss": 0.2699, "step": 17760 }, { "epoch": 0.3849153901154854, "grad_norm": 2.143995761871338, "learning_rate": 1.3537234814718738e-05, "loss": 0.3371, "step": 17765 }, { "epoch": 0.3850237254349663, "grad_norm": 1.2539175748825073, "learning_rate": 1.3534051188665177e-05, "loss": 0.3803, "step": 17770 }, { "epoch": 0.38513206075444717, "grad_norm": 1.4408296346664429, "learning_rate": 1.3530867153244724e-05, "loss": 0.3416, "step": 17775 }, { "epoch": 0.385240396073928, "grad_norm": 1.2783480882644653, "learning_rate": 1.3527682708826207e-05, "loss": 0.3169, "step": 17780 }, { "epoch": 0.38534873139340886, "grad_norm": 1.7525666952133179, "learning_rate": 1.352449785577849e-05, "loss": 0.267, "step": 17785 }, { "epoch": 0.38545706671288976, "grad_norm": 1.5831780433654785, "learning_rate": 1.3521312594470497e-05, "loss": 0.3201, "step": 17790 }, { "epoch": 0.3855654020323706, "grad_norm": 1.4009873867034912, "learning_rate": 1.3518126925271186e-05, "loss": 0.3162, "step": 17795 }, { "epoch": 0.38567373735185145, "grad_norm": 1.0808141231536865, "learning_rate": 1.3514940848549574e-05, "loss": 0.2613, "step": 17800 }, { "epoch": 0.3857820726713323, "grad_norm": 1.4588322639465332, "learning_rate": 1.3511754364674719e-05, "loss": 0.3282, "step": 17805 }, { "epoch": 0.38589040799081314, "grad_norm": 1.7488969564437866, "learning_rate": 1.3508567474015725e-05, "loss": 0.3854, "step": 17810 }, { "epoch": 0.38599874331029405, "grad_norm": 1.4964083433151245, "learning_rate": 1.3505380176941747e-05, "loss": 0.195, "step": 17815 }, { "epoch": 0.3861070786297749, "grad_norm": 1.352418303489685, "learning_rate": 1.3502192473821986e-05, "loss": 0.2397, "step": 17820 }, { "epoch": 0.38621541394925574, "grad_norm": 1.5433845520019531, "learning_rate": 1.3499004365025693e-05, "loss": 0.3667, "step": 17825 }, { "epoch": 0.3863237492687366, "grad_norm": 1.123031497001648, "learning_rate": 1.3495815850922158e-05, "loss": 0.2617, "step": 17830 }, { "epoch": 0.38643208458821743, "grad_norm": 1.4841769933700562, "learning_rate": 1.3492626931880725e-05, "loss": 0.3954, "step": 17835 }, { "epoch": 0.38654041990769833, "grad_norm": 1.9134575128555298, "learning_rate": 1.348943760827078e-05, "loss": 0.2529, "step": 17840 }, { "epoch": 0.3866487552271792, "grad_norm": 1.6102774143218994, "learning_rate": 1.3486247880461758e-05, "loss": 0.3788, "step": 17845 }, { "epoch": 0.38675709054666, "grad_norm": 1.4911155700683594, "learning_rate": 1.3483057748823145e-05, "loss": 0.2742, "step": 17850 }, { "epoch": 0.38686542586614087, "grad_norm": 1.8017570972442627, "learning_rate": 1.3479867213724469e-05, "loss": 0.3017, "step": 17855 }, { "epoch": 0.3869737611856217, "grad_norm": 1.2400376796722412, "learning_rate": 1.3476676275535303e-05, "loss": 0.3624, "step": 17860 }, { "epoch": 0.3870820965051026, "grad_norm": 1.3171675205230713, "learning_rate": 1.347348493462527e-05, "loss": 0.326, "step": 17865 }, { "epoch": 0.38719043182458346, "grad_norm": 1.930226445198059, "learning_rate": 1.3470293191364046e-05, "loss": 0.3254, "step": 17870 }, { "epoch": 0.3872987671440643, "grad_norm": 1.4456931352615356, "learning_rate": 1.3467101046121337e-05, "loss": 0.2906, "step": 17875 }, { "epoch": 0.38740710246354515, "grad_norm": 2.074772596359253, "learning_rate": 1.3463908499266906e-05, "loss": 0.269, "step": 17880 }, { "epoch": 0.387515437783026, "grad_norm": 1.6451740264892578, "learning_rate": 1.3460715551170569e-05, "loss": 0.2307, "step": 17885 }, { "epoch": 0.3876237731025069, "grad_norm": 1.4257758855819702, "learning_rate": 1.3457522202202173e-05, "loss": 0.3237, "step": 17890 }, { "epoch": 0.38773210842198774, "grad_norm": 1.6260302066802979, "learning_rate": 1.3454328452731625e-05, "loss": 0.2883, "step": 17895 }, { "epoch": 0.3878404437414686, "grad_norm": 1.9007059335708618, "learning_rate": 1.3451134303128872e-05, "loss": 0.2524, "step": 17900 }, { "epoch": 0.38794877906094943, "grad_norm": 1.1058275699615479, "learning_rate": 1.344793975376391e-05, "loss": 0.1792, "step": 17905 }, { "epoch": 0.38805711438043033, "grad_norm": 1.62169349193573, "learning_rate": 1.3444744805006775e-05, "loss": 0.2515, "step": 17910 }, { "epoch": 0.3881654496999112, "grad_norm": 1.0850436687469482, "learning_rate": 1.3441549457227559e-05, "loss": 0.2878, "step": 17915 }, { "epoch": 0.388273785019392, "grad_norm": 1.5807371139526367, "learning_rate": 1.343835371079639e-05, "loss": 0.3781, "step": 17920 }, { "epoch": 0.38838212033887287, "grad_norm": 1.3621782064437866, "learning_rate": 1.3435157566083452e-05, "loss": 0.3284, "step": 17925 }, { "epoch": 0.3884904556583537, "grad_norm": 1.7735034227371216, "learning_rate": 1.343196102345897e-05, "loss": 0.3828, "step": 17930 }, { "epoch": 0.3885987909778346, "grad_norm": 1.721360206604004, "learning_rate": 1.342876408329321e-05, "loss": 0.2889, "step": 17935 }, { "epoch": 0.38870712629731546, "grad_norm": 1.3666040897369385, "learning_rate": 1.3425566745956495e-05, "loss": 0.2933, "step": 17940 }, { "epoch": 0.3888154616167963, "grad_norm": 1.5530853271484375, "learning_rate": 1.342236901181919e-05, "loss": 0.3532, "step": 17945 }, { "epoch": 0.38892379693627716, "grad_norm": 1.4961416721343994, "learning_rate": 1.3419170881251699e-05, "loss": 0.1858, "step": 17950 }, { "epoch": 0.389032132255758, "grad_norm": 1.5936795473098755, "learning_rate": 1.3415972354624487e-05, "loss": 0.28, "step": 17955 }, { "epoch": 0.3891404675752389, "grad_norm": 1.56813645362854, "learning_rate": 1.3412773432308041e-05, "loss": 0.3104, "step": 17960 }, { "epoch": 0.38924880289471975, "grad_norm": 1.6335890293121338, "learning_rate": 1.3409574114672921e-05, "loss": 0.2865, "step": 17965 }, { "epoch": 0.3893571382142006, "grad_norm": 1.3032793998718262, "learning_rate": 1.3406374402089713e-05, "loss": 0.2762, "step": 17970 }, { "epoch": 0.38946547353368144, "grad_norm": 1.43485689163208, "learning_rate": 1.340317429492906e-05, "loss": 0.3289, "step": 17975 }, { "epoch": 0.3895738088531623, "grad_norm": 2.4797070026397705, "learning_rate": 1.3399973793561643e-05, "loss": 0.3713, "step": 17980 }, { "epoch": 0.3896821441726432, "grad_norm": 1.705967664718628, "learning_rate": 1.339677289835819e-05, "loss": 0.4064, "step": 17985 }, { "epoch": 0.38979047949212403, "grad_norm": 1.2194222211837769, "learning_rate": 1.3393571609689488e-05, "loss": 0.168, "step": 17990 }, { "epoch": 0.3898988148116049, "grad_norm": 1.4149670600891113, "learning_rate": 1.3390369927926347e-05, "loss": 0.3287, "step": 17995 }, { "epoch": 0.3900071501310857, "grad_norm": 1.732499599456787, "learning_rate": 1.3387167853439638e-05, "loss": 0.1609, "step": 18000 }, { "epoch": 0.39011548545056657, "grad_norm": 1.7739589214324951, "learning_rate": 1.3383965386600272e-05, "loss": 0.2676, "step": 18005 }, { "epoch": 0.39022382077004747, "grad_norm": 1.7493929862976074, "learning_rate": 1.3380762527779206e-05, "loss": 0.3291, "step": 18010 }, { "epoch": 0.3903321560895283, "grad_norm": 1.8185728788375854, "learning_rate": 1.3377559277347447e-05, "loss": 0.3031, "step": 18015 }, { "epoch": 0.39044049140900916, "grad_norm": 1.2587207555770874, "learning_rate": 1.3374355635676044e-05, "loss": 0.2879, "step": 18020 }, { "epoch": 0.39054882672849, "grad_norm": 1.3369605541229248, "learning_rate": 1.3371151603136086e-05, "loss": 0.3202, "step": 18025 }, { "epoch": 0.39065716204797085, "grad_norm": 1.155348300933838, "learning_rate": 1.3367947180098715e-05, "loss": 0.2659, "step": 18030 }, { "epoch": 0.39076549736745175, "grad_norm": 1.5153992176055908, "learning_rate": 1.3364742366935116e-05, "loss": 0.2653, "step": 18035 }, { "epoch": 0.3908738326869326, "grad_norm": 1.5554708242416382, "learning_rate": 1.3361537164016514e-05, "loss": 0.3173, "step": 18040 }, { "epoch": 0.39098216800641344, "grad_norm": 1.2223905324935913, "learning_rate": 1.3358331571714192e-05, "loss": 0.2325, "step": 18045 }, { "epoch": 0.3910905033258943, "grad_norm": 1.1266878843307495, "learning_rate": 1.3355125590399463e-05, "loss": 0.3575, "step": 18050 }, { "epoch": 0.3911988386453752, "grad_norm": 1.2879018783569336, "learning_rate": 1.3351919220443693e-05, "loss": 0.3133, "step": 18055 }, { "epoch": 0.39130717396485604, "grad_norm": 1.687505841255188, "learning_rate": 1.3348712462218294e-05, "loss": 0.329, "step": 18060 }, { "epoch": 0.3914155092843369, "grad_norm": 1.014031171798706, "learning_rate": 1.334550531609472e-05, "loss": 0.1995, "step": 18065 }, { "epoch": 0.39152384460381773, "grad_norm": 1.5517441034317017, "learning_rate": 1.334229778244447e-05, "loss": 0.3652, "step": 18070 }, { "epoch": 0.3916321799232986, "grad_norm": 1.4486933946609497, "learning_rate": 1.3339089861639093e-05, "loss": 0.3308, "step": 18075 }, { "epoch": 0.3917405152427795, "grad_norm": 1.6690261363983154, "learning_rate": 1.3335881554050172e-05, "loss": 0.2755, "step": 18080 }, { "epoch": 0.3918488505622603, "grad_norm": 1.7421101331710815, "learning_rate": 1.3332672860049345e-05, "loss": 0.2941, "step": 18085 }, { "epoch": 0.39195718588174117, "grad_norm": 1.2377586364746094, "learning_rate": 1.3329463780008291e-05, "loss": 0.305, "step": 18090 }, { "epoch": 0.392065521201222, "grad_norm": 1.295911192893982, "learning_rate": 1.3326254314298731e-05, "loss": 0.234, "step": 18095 }, { "epoch": 0.39217385652070286, "grad_norm": 1.4008220434188843, "learning_rate": 1.3323044463292441e-05, "loss": 0.3154, "step": 18100 }, { "epoch": 0.39228219184018376, "grad_norm": 1.5199967622756958, "learning_rate": 1.3319834227361223e-05, "loss": 0.2768, "step": 18105 }, { "epoch": 0.3923905271596646, "grad_norm": 1.8867859840393066, "learning_rate": 1.3316623606876943e-05, "loss": 0.2535, "step": 18110 }, { "epoch": 0.39249886247914545, "grad_norm": 1.5122902393341064, "learning_rate": 1.3313412602211506e-05, "loss": 0.2294, "step": 18115 }, { "epoch": 0.3926071977986263, "grad_norm": 1.5923446416854858, "learning_rate": 1.3310201213736851e-05, "loss": 0.3316, "step": 18120 }, { "epoch": 0.39271553311810714, "grad_norm": 1.1228861808776855, "learning_rate": 1.3306989441824972e-05, "loss": 0.3425, "step": 18125 }, { "epoch": 0.39282386843758804, "grad_norm": 1.540806770324707, "learning_rate": 1.33037772868479e-05, "loss": 0.2762, "step": 18130 }, { "epoch": 0.3929322037570689, "grad_norm": 1.1615080833435059, "learning_rate": 1.3300564749177727e-05, "loss": 0.2677, "step": 18135 }, { "epoch": 0.39304053907654973, "grad_norm": 1.2731337547302246, "learning_rate": 1.3297351829186566e-05, "loss": 0.262, "step": 18140 }, { "epoch": 0.3931488743960306, "grad_norm": 1.7225888967514038, "learning_rate": 1.3294138527246592e-05, "loss": 0.3458, "step": 18145 }, { "epoch": 0.3932572097155114, "grad_norm": 1.3596501350402832, "learning_rate": 1.3290924843730016e-05, "loss": 0.251, "step": 18150 }, { "epoch": 0.3933655450349923, "grad_norm": 1.3700255155563354, "learning_rate": 1.3287710779009096e-05, "loss": 0.2166, "step": 18155 }, { "epoch": 0.39347388035447317, "grad_norm": 1.6721723079681396, "learning_rate": 1.328449633345613e-05, "loss": 0.2071, "step": 18160 }, { "epoch": 0.393582215673954, "grad_norm": 1.1977434158325195, "learning_rate": 1.3281281507443463e-05, "loss": 0.2583, "step": 18165 }, { "epoch": 0.39369055099343486, "grad_norm": 0.9915305972099304, "learning_rate": 1.327806630134349e-05, "loss": 0.2708, "step": 18170 }, { "epoch": 0.39379888631291576, "grad_norm": 1.9167455434799194, "learning_rate": 1.327485071552864e-05, "loss": 0.3023, "step": 18175 }, { "epoch": 0.3939072216323966, "grad_norm": 1.9804991483688354, "learning_rate": 1.3271634750371394e-05, "loss": 0.3561, "step": 18180 }, { "epoch": 0.39401555695187745, "grad_norm": 1.8437297344207764, "learning_rate": 1.326841840624427e-05, "loss": 0.348, "step": 18185 }, { "epoch": 0.3941238922713583, "grad_norm": 1.646065354347229, "learning_rate": 1.3265201683519834e-05, "loss": 0.3252, "step": 18190 }, { "epoch": 0.39423222759083915, "grad_norm": 2.3371689319610596, "learning_rate": 1.3261984582570697e-05, "loss": 0.3181, "step": 18195 }, { "epoch": 0.39434056291032005, "grad_norm": 1.6023637056350708, "learning_rate": 1.3258767103769512e-05, "loss": 0.2909, "step": 18200 }, { "epoch": 0.3944488982298009, "grad_norm": 1.1522979736328125, "learning_rate": 1.3255549247488973e-05, "loss": 0.3022, "step": 18205 }, { "epoch": 0.39455723354928174, "grad_norm": 1.4294281005859375, "learning_rate": 1.325233101410182e-05, "loss": 0.2924, "step": 18210 }, { "epoch": 0.3946655688687626, "grad_norm": 1.3250277042388916, "learning_rate": 1.3249112403980842e-05, "loss": 0.3326, "step": 18215 }, { "epoch": 0.39477390418824343, "grad_norm": 1.5533422231674194, "learning_rate": 1.324589341749886e-05, "loss": 0.3814, "step": 18220 }, { "epoch": 0.39488223950772433, "grad_norm": 1.1477237939834595, "learning_rate": 1.324267405502875e-05, "loss": 0.2134, "step": 18225 }, { "epoch": 0.3949905748272052, "grad_norm": 1.6247620582580566, "learning_rate": 1.3239454316943427e-05, "loss": 0.2494, "step": 18230 }, { "epoch": 0.395098910146686, "grad_norm": 2.3643274307250977, "learning_rate": 1.3236234203615851e-05, "loss": 0.276, "step": 18235 }, { "epoch": 0.39520724546616687, "grad_norm": 1.2242575883865356, "learning_rate": 1.3233013715419019e-05, "loss": 0.2749, "step": 18240 }, { "epoch": 0.3953155807856477, "grad_norm": 1.9516714811325073, "learning_rate": 1.3229792852725977e-05, "loss": 0.3253, "step": 18245 }, { "epoch": 0.3954239161051286, "grad_norm": 1.3546608686447144, "learning_rate": 1.3226571615909816e-05, "loss": 0.3549, "step": 18250 }, { "epoch": 0.39553225142460946, "grad_norm": 1.5087363719940186, "learning_rate": 1.3223350005343668e-05, "loss": 0.2974, "step": 18255 }, { "epoch": 0.3956405867440903, "grad_norm": 1.7772948741912842, "learning_rate": 1.3220128021400708e-05, "loss": 0.3282, "step": 18260 }, { "epoch": 0.39574892206357115, "grad_norm": 2.1077256202697754, "learning_rate": 1.3216905664454154e-05, "loss": 0.402, "step": 18265 }, { "epoch": 0.395857257383052, "grad_norm": 1.6464123725891113, "learning_rate": 1.3213682934877268e-05, "loss": 0.3856, "step": 18270 }, { "epoch": 0.3959655927025329, "grad_norm": 1.7624391317367554, "learning_rate": 1.3210459833043354e-05, "loss": 0.4315, "step": 18275 }, { "epoch": 0.39607392802201374, "grad_norm": 1.5234456062316895, "learning_rate": 1.3207236359325764e-05, "loss": 0.286, "step": 18280 }, { "epoch": 0.3961822633414946, "grad_norm": 1.6499853134155273, "learning_rate": 1.3204012514097884e-05, "loss": 0.2798, "step": 18285 }, { "epoch": 0.39629059866097543, "grad_norm": 1.7855957746505737, "learning_rate": 1.320078829773315e-05, "loss": 0.4294, "step": 18290 }, { "epoch": 0.39639893398045634, "grad_norm": 0.551758348941803, "learning_rate": 1.3197563710605038e-05, "loss": 0.2128, "step": 18295 }, { "epoch": 0.3965072692999372, "grad_norm": 1.92522132396698, "learning_rate": 1.319433875308707e-05, "loss": 0.3992, "step": 18300 }, { "epoch": 0.396615604619418, "grad_norm": 1.2725900411605835, "learning_rate": 1.319111342555281e-05, "loss": 0.235, "step": 18305 }, { "epoch": 0.3967239399388989, "grad_norm": 1.9652388095855713, "learning_rate": 1.3187887728375865e-05, "loss": 0.3001, "step": 18310 }, { "epoch": 0.3968322752583797, "grad_norm": 1.6255836486816406, "learning_rate": 1.3184661661929878e-05, "loss": 0.2253, "step": 18315 }, { "epoch": 0.3969406105778606, "grad_norm": 1.4727963209152222, "learning_rate": 1.3181435226588547e-05, "loss": 0.3155, "step": 18320 }, { "epoch": 0.39704894589734147, "grad_norm": 1.2684659957885742, "learning_rate": 1.3178208422725598e-05, "loss": 0.2895, "step": 18325 }, { "epoch": 0.3971572812168223, "grad_norm": 1.3902052640914917, "learning_rate": 1.3174981250714815e-05, "loss": 0.3332, "step": 18330 }, { "epoch": 0.39726561653630316, "grad_norm": 1.4874271154403687, "learning_rate": 1.3171753710930017e-05, "loss": 0.3387, "step": 18335 }, { "epoch": 0.397373951855784, "grad_norm": 1.2502820491790771, "learning_rate": 1.3168525803745062e-05, "loss": 0.2617, "step": 18340 }, { "epoch": 0.3974822871752649, "grad_norm": 2.0196564197540283, "learning_rate": 1.3165297529533858e-05, "loss": 0.2779, "step": 18345 }, { "epoch": 0.39759062249474575, "grad_norm": 1.7315433025360107, "learning_rate": 1.3162068888670352e-05, "loss": 0.3575, "step": 18350 }, { "epoch": 0.3976989578142266, "grad_norm": 0.8222269415855408, "learning_rate": 1.3158839881528534e-05, "loss": 0.3273, "step": 18355 }, { "epoch": 0.39780729313370744, "grad_norm": 1.4042086601257324, "learning_rate": 1.3155610508482436e-05, "loss": 0.3307, "step": 18360 }, { "epoch": 0.3979156284531883, "grad_norm": 1.4699716567993164, "learning_rate": 1.3152380769906128e-05, "loss": 0.2042, "step": 18365 }, { "epoch": 0.3980239637726692, "grad_norm": 1.5368214845657349, "learning_rate": 1.314915066617373e-05, "loss": 0.2891, "step": 18370 }, { "epoch": 0.39813229909215003, "grad_norm": 2.0926952362060547, "learning_rate": 1.3145920197659403e-05, "loss": 0.3149, "step": 18375 }, { "epoch": 0.3982406344116309, "grad_norm": 1.608749270439148, "learning_rate": 1.314268936473735e-05, "loss": 0.2825, "step": 18380 }, { "epoch": 0.3983489697311117, "grad_norm": 1.5437651872634888, "learning_rate": 1.3139458167781806e-05, "loss": 0.3197, "step": 18385 }, { "epoch": 0.39845730505059257, "grad_norm": 1.2459205389022827, "learning_rate": 1.3136226607167066e-05, "loss": 0.3116, "step": 18390 }, { "epoch": 0.39856564037007347, "grad_norm": 1.4470341205596924, "learning_rate": 1.3132994683267455e-05, "loss": 0.2425, "step": 18395 }, { "epoch": 0.3986739756895543, "grad_norm": 0.9304448962211609, "learning_rate": 1.3129762396457339e-05, "loss": 0.2169, "step": 18400 }, { "epoch": 0.39878231100903516, "grad_norm": 1.520329236984253, "learning_rate": 1.3126529747111137e-05, "loss": 0.3492, "step": 18405 }, { "epoch": 0.398890646328516, "grad_norm": 1.985198974609375, "learning_rate": 1.3123296735603297e-05, "loss": 0.2702, "step": 18410 }, { "epoch": 0.39899898164799685, "grad_norm": 1.4122915267944336, "learning_rate": 1.3120063362308318e-05, "loss": 0.2604, "step": 18415 }, { "epoch": 0.39910731696747775, "grad_norm": 1.3081960678100586, "learning_rate": 1.3116829627600733e-05, "loss": 0.2479, "step": 18420 }, { "epoch": 0.3992156522869586, "grad_norm": 1.7329754829406738, "learning_rate": 1.311359553185513e-05, "loss": 0.2586, "step": 18425 }, { "epoch": 0.39932398760643945, "grad_norm": 1.709695816040039, "learning_rate": 1.3110361075446125e-05, "loss": 0.2697, "step": 18430 }, { "epoch": 0.3994323229259203, "grad_norm": 1.9173190593719482, "learning_rate": 1.3107126258748384e-05, "loss": 0.3655, "step": 18435 }, { "epoch": 0.3995406582454012, "grad_norm": 1.3080133199691772, "learning_rate": 1.310389108213661e-05, "loss": 0.3065, "step": 18440 }, { "epoch": 0.39964899356488204, "grad_norm": 1.5078924894332886, "learning_rate": 1.310065554598555e-05, "loss": 0.2992, "step": 18445 }, { "epoch": 0.3997573288843629, "grad_norm": 1.6936898231506348, "learning_rate": 1.3097419650669993e-05, "loss": 0.241, "step": 18450 }, { "epoch": 0.39986566420384373, "grad_norm": 1.8326997756958008, "learning_rate": 1.3094183396564769e-05, "loss": 0.3543, "step": 18455 }, { "epoch": 0.3999739995233246, "grad_norm": 1.9466102123260498, "learning_rate": 1.309094678404475e-05, "loss": 0.3051, "step": 18460 }, { "epoch": 0.4000823348428055, "grad_norm": 1.5433615446090698, "learning_rate": 1.3087709813484843e-05, "loss": 0.2512, "step": 18465 }, { "epoch": 0.4001906701622863, "grad_norm": 1.6708745956420898, "learning_rate": 1.3084472485260014e-05, "loss": 0.325, "step": 18470 }, { "epoch": 0.40029900548176717, "grad_norm": 1.8220000267028809, "learning_rate": 1.3081234799745255e-05, "loss": 0.3401, "step": 18475 }, { "epoch": 0.400407340801248, "grad_norm": 1.5012987852096558, "learning_rate": 1.3077996757315599e-05, "loss": 0.2456, "step": 18480 }, { "epoch": 0.40051567612072886, "grad_norm": 1.753803014755249, "learning_rate": 1.3074758358346125e-05, "loss": 0.3033, "step": 18485 }, { "epoch": 0.40062401144020976, "grad_norm": 1.9094396829605103, "learning_rate": 1.3071519603211952e-05, "loss": 0.2516, "step": 18490 }, { "epoch": 0.4007323467596906, "grad_norm": 1.4810030460357666, "learning_rate": 1.306828049228825e-05, "loss": 0.2914, "step": 18495 }, { "epoch": 0.40084068207917145, "grad_norm": 1.8460978269577026, "learning_rate": 1.3065041025950213e-05, "loss": 0.3286, "step": 18500 }, { "epoch": 0.4009490173986523, "grad_norm": 1.563146710395813, "learning_rate": 1.306180120457309e-05, "loss": 0.2376, "step": 18505 }, { "epoch": 0.40105735271813314, "grad_norm": 0.992411732673645, "learning_rate": 1.3058561028532161e-05, "loss": 0.2203, "step": 18510 }, { "epoch": 0.40116568803761404, "grad_norm": 1.7072323560714722, "learning_rate": 1.3055320498202757e-05, "loss": 0.3187, "step": 18515 }, { "epoch": 0.4012740233570949, "grad_norm": 1.7342778444290161, "learning_rate": 1.305207961396024e-05, "loss": 0.3394, "step": 18520 }, { "epoch": 0.40138235867657573, "grad_norm": 1.3686354160308838, "learning_rate": 1.304883837618002e-05, "loss": 0.2784, "step": 18525 }, { "epoch": 0.4014906939960566, "grad_norm": 1.142838954925537, "learning_rate": 1.3045596785237545e-05, "loss": 0.3122, "step": 18530 }, { "epoch": 0.4015990293155374, "grad_norm": 1.8075727224349976, "learning_rate": 1.3042354841508307e-05, "loss": 0.4008, "step": 18535 }, { "epoch": 0.4017073646350183, "grad_norm": 1.853884220123291, "learning_rate": 1.3039112545367831e-05, "loss": 0.3433, "step": 18540 }, { "epoch": 0.4018156999544992, "grad_norm": 1.8443620204925537, "learning_rate": 1.3035869897191696e-05, "loss": 0.2338, "step": 18545 }, { "epoch": 0.40192403527398, "grad_norm": 1.4210245609283447, "learning_rate": 1.303262689735551e-05, "loss": 0.412, "step": 18550 }, { "epoch": 0.40203237059346086, "grad_norm": 1.2759919166564941, "learning_rate": 1.3029383546234928e-05, "loss": 0.3066, "step": 18555 }, { "epoch": 0.40214070591294176, "grad_norm": 1.0692241191864014, "learning_rate": 1.3026139844205642e-05, "loss": 0.41, "step": 18560 }, { "epoch": 0.4022490412324226, "grad_norm": 2.134047269821167, "learning_rate": 1.3022895791643384e-05, "loss": 0.2602, "step": 18565 }, { "epoch": 0.40235737655190346, "grad_norm": 1.034932255744934, "learning_rate": 1.3019651388923935e-05, "loss": 0.306, "step": 18570 }, { "epoch": 0.4024657118713843, "grad_norm": 1.67258882522583, "learning_rate": 1.3016406636423105e-05, "loss": 0.3041, "step": 18575 }, { "epoch": 0.40257404719086515, "grad_norm": 1.7648417949676514, "learning_rate": 1.3013161534516754e-05, "loss": 0.3099, "step": 18580 }, { "epoch": 0.40268238251034605, "grad_norm": 1.3885325193405151, "learning_rate": 1.3009916083580772e-05, "loss": 0.3226, "step": 18585 }, { "epoch": 0.4027907178298269, "grad_norm": 1.4241054058074951, "learning_rate": 1.3006670283991102e-05, "loss": 0.3693, "step": 18590 }, { "epoch": 0.40289905314930774, "grad_norm": 2.3809549808502197, "learning_rate": 1.3003424136123721e-05, "loss": 0.3961, "step": 18595 }, { "epoch": 0.4030073884687886, "grad_norm": 1.632755994796753, "learning_rate": 1.3000177640354648e-05, "loss": 0.2925, "step": 18600 }, { "epoch": 0.40311572378826943, "grad_norm": 2.0927486419677734, "learning_rate": 1.2996930797059934e-05, "loss": 0.2894, "step": 18605 }, { "epoch": 0.40322405910775033, "grad_norm": 1.8289520740509033, "learning_rate": 1.2993683606615682e-05, "loss": 0.2856, "step": 18610 }, { "epoch": 0.4033323944272312, "grad_norm": 1.2058383226394653, "learning_rate": 1.2990436069398026e-05, "loss": 0.2707, "step": 18615 }, { "epoch": 0.403440729746712, "grad_norm": 1.925879716873169, "learning_rate": 1.2987188185783152e-05, "loss": 0.342, "step": 18620 }, { "epoch": 0.40354906506619287, "grad_norm": 1.4495924711227417, "learning_rate": 1.2983939956147275e-05, "loss": 0.2708, "step": 18625 }, { "epoch": 0.4036574003856737, "grad_norm": 1.190415382385254, "learning_rate": 1.2980691380866655e-05, "loss": 0.2171, "step": 18630 }, { "epoch": 0.4037657357051546, "grad_norm": 1.9957102537155151, "learning_rate": 1.2977442460317586e-05, "loss": 0.3504, "step": 18635 }, { "epoch": 0.40387407102463546, "grad_norm": 1.8722724914550781, "learning_rate": 1.297419319487641e-05, "loss": 0.3036, "step": 18640 }, { "epoch": 0.4039824063441163, "grad_norm": 2.247913122177124, "learning_rate": 1.2970943584919509e-05, "loss": 0.3218, "step": 18645 }, { "epoch": 0.40409074166359715, "grad_norm": 1.1941479444503784, "learning_rate": 1.2967693630823295e-05, "loss": 0.241, "step": 18650 }, { "epoch": 0.404199076983078, "grad_norm": 1.5588806867599487, "learning_rate": 1.2964443332964232e-05, "loss": 0.2121, "step": 18655 }, { "epoch": 0.4043074123025589, "grad_norm": 1.348560094833374, "learning_rate": 1.2961192691718813e-05, "loss": 0.2577, "step": 18660 }, { "epoch": 0.40441574762203975, "grad_norm": 1.2632296085357666, "learning_rate": 1.2957941707463584e-05, "loss": 0.2854, "step": 18665 }, { "epoch": 0.4045240829415206, "grad_norm": 1.5568814277648926, "learning_rate": 1.2954690380575114e-05, "loss": 0.2787, "step": 18670 }, { "epoch": 0.40463241826100144, "grad_norm": 1.5319247245788574, "learning_rate": 1.2951438711430026e-05, "loss": 0.2873, "step": 18675 }, { "epoch": 0.4047407535804823, "grad_norm": 1.5651919841766357, "learning_rate": 1.2948186700404979e-05, "loss": 0.2207, "step": 18680 }, { "epoch": 0.4048490888999632, "grad_norm": 1.5632281303405762, "learning_rate": 1.2944934347876664e-05, "loss": 0.3927, "step": 18685 }, { "epoch": 0.40495742421944403, "grad_norm": 1.0121558904647827, "learning_rate": 1.2941681654221817e-05, "loss": 0.2685, "step": 18690 }, { "epoch": 0.4050657595389249, "grad_norm": 1.0584609508514404, "learning_rate": 1.2938428619817223e-05, "loss": 0.2573, "step": 18695 }, { "epoch": 0.4051740948584057, "grad_norm": 1.4700316190719604, "learning_rate": 1.2935175245039689e-05, "loss": 0.2957, "step": 18700 }, { "epoch": 0.4052824301778866, "grad_norm": 1.2576720714569092, "learning_rate": 1.2931921530266067e-05, "loss": 0.2648, "step": 18705 }, { "epoch": 0.40539076549736747, "grad_norm": 1.9543005228042603, "learning_rate": 1.292866747587326e-05, "loss": 0.3241, "step": 18710 }, { "epoch": 0.4054991008168483, "grad_norm": 1.5150554180145264, "learning_rate": 1.2925413082238196e-05, "loss": 0.2686, "step": 18715 }, { "epoch": 0.40560743613632916, "grad_norm": 1.7386173009872437, "learning_rate": 1.2922158349737852e-05, "loss": 0.2391, "step": 18720 }, { "epoch": 0.40571577145581, "grad_norm": 1.020268201828003, "learning_rate": 1.2918903278749235e-05, "loss": 0.3622, "step": 18725 }, { "epoch": 0.4058241067752909, "grad_norm": 1.879995346069336, "learning_rate": 1.2915647869649397e-05, "loss": 0.2872, "step": 18730 }, { "epoch": 0.40593244209477175, "grad_norm": 1.3048458099365234, "learning_rate": 1.2912392122815432e-05, "loss": 0.3597, "step": 18735 }, { "epoch": 0.4060407774142526, "grad_norm": 1.7615488767623901, "learning_rate": 1.2909136038624465e-05, "loss": 0.355, "step": 18740 }, { "epoch": 0.40614911273373344, "grad_norm": 1.167941689491272, "learning_rate": 1.290587961745367e-05, "loss": 0.2251, "step": 18745 }, { "epoch": 0.4062574480532143, "grad_norm": 1.9146322011947632, "learning_rate": 1.2902622859680248e-05, "loss": 0.1817, "step": 18750 }, { "epoch": 0.4063657833726952, "grad_norm": 2.1491565704345703, "learning_rate": 1.2899365765681451e-05, "loss": 0.3483, "step": 18755 }, { "epoch": 0.40647411869217603, "grad_norm": 1.967627763748169, "learning_rate": 1.2896108335834562e-05, "loss": 0.2231, "step": 18760 }, { "epoch": 0.4065824540116569, "grad_norm": 1.858994722366333, "learning_rate": 1.2892850570516907e-05, "loss": 0.3158, "step": 18765 }, { "epoch": 0.4066907893311377, "grad_norm": 1.901178240776062, "learning_rate": 1.2889592470105848e-05, "loss": 0.3164, "step": 18770 }, { "epoch": 0.40679912465061857, "grad_norm": 2.063621759414673, "learning_rate": 1.2886334034978785e-05, "loss": 0.2422, "step": 18775 }, { "epoch": 0.40690745997009947, "grad_norm": 1.383979320526123, "learning_rate": 1.288307526551316e-05, "loss": 0.31, "step": 18780 }, { "epoch": 0.4070157952895803, "grad_norm": 1.2192057371139526, "learning_rate": 1.2879816162086458e-05, "loss": 0.2463, "step": 18785 }, { "epoch": 0.40712413060906116, "grad_norm": 1.6762112379074097, "learning_rate": 1.2876556725076192e-05, "loss": 0.3193, "step": 18790 }, { "epoch": 0.407232465928542, "grad_norm": 1.7181905508041382, "learning_rate": 1.2873296954859921e-05, "loss": 0.286, "step": 18795 }, { "epoch": 0.40734080124802285, "grad_norm": 1.7435569763183594, "learning_rate": 1.287003685181524e-05, "loss": 0.2726, "step": 18800 }, { "epoch": 0.40744913656750376, "grad_norm": 1.3362987041473389, "learning_rate": 1.2866776416319781e-05, "loss": 0.2817, "step": 18805 }, { "epoch": 0.4075574718869846, "grad_norm": 1.166448712348938, "learning_rate": 1.286351564875122e-05, "loss": 0.3894, "step": 18810 }, { "epoch": 0.40766580720646545, "grad_norm": 1.2880991697311401, "learning_rate": 1.2860254549487268e-05, "loss": 0.2701, "step": 18815 }, { "epoch": 0.4077741425259463, "grad_norm": 1.4539415836334229, "learning_rate": 1.285699311890567e-05, "loss": 0.3669, "step": 18820 }, { "epoch": 0.4078824778454272, "grad_norm": 1.6407396793365479, "learning_rate": 1.2853731357384215e-05, "loss": 0.2875, "step": 18825 }, { "epoch": 0.40799081316490804, "grad_norm": 1.8713409900665283, "learning_rate": 1.2850469265300736e-05, "loss": 0.2414, "step": 18830 }, { "epoch": 0.4080991484843889, "grad_norm": 1.6286253929138184, "learning_rate": 1.2847206843033091e-05, "loss": 0.2518, "step": 18835 }, { "epoch": 0.40820748380386973, "grad_norm": 1.5932471752166748, "learning_rate": 1.2843944090959186e-05, "loss": 0.3062, "step": 18840 }, { "epoch": 0.4083158191233506, "grad_norm": 1.6950207948684692, "learning_rate": 1.2840681009456959e-05, "loss": 0.2416, "step": 18845 }, { "epoch": 0.4084241544428315, "grad_norm": 1.9721537828445435, "learning_rate": 1.2837417598904386e-05, "loss": 0.2765, "step": 18850 }, { "epoch": 0.4085324897623123, "grad_norm": 1.9943102598190308, "learning_rate": 1.2834153859679494e-05, "loss": 0.292, "step": 18855 }, { "epoch": 0.40864082508179317, "grad_norm": 1.4907201528549194, "learning_rate": 1.2830889792160333e-05, "loss": 0.2997, "step": 18860 }, { "epoch": 0.408749160401274, "grad_norm": 1.3070937395095825, "learning_rate": 1.2827625396724995e-05, "loss": 0.2378, "step": 18865 }, { "epoch": 0.40885749572075486, "grad_norm": 2.1294500827789307, "learning_rate": 1.2824360673751612e-05, "loss": 0.2915, "step": 18870 }, { "epoch": 0.40896583104023576, "grad_norm": 0.8070087432861328, "learning_rate": 1.2821095623618356e-05, "loss": 0.2082, "step": 18875 }, { "epoch": 0.4090741663597166, "grad_norm": 1.790548324584961, "learning_rate": 1.2817830246703431e-05, "loss": 0.2266, "step": 18880 }, { "epoch": 0.40918250167919745, "grad_norm": 1.7175452709197998, "learning_rate": 1.2814564543385082e-05, "loss": 0.3542, "step": 18885 }, { "epoch": 0.4092908369986783, "grad_norm": 1.7810711860656738, "learning_rate": 1.2811298514041592e-05, "loss": 0.278, "step": 18890 }, { "epoch": 0.40939917231815914, "grad_norm": 1.7844243049621582, "learning_rate": 1.2808032159051284e-05, "loss": 0.3057, "step": 18895 }, { "epoch": 0.40950750763764004, "grad_norm": 1.700130581855774, "learning_rate": 1.2804765478792513e-05, "loss": 0.3017, "step": 18900 }, { "epoch": 0.4096158429571209, "grad_norm": 1.3267433643341064, "learning_rate": 1.2801498473643679e-05, "loss": 0.3186, "step": 18905 }, { "epoch": 0.40972417827660174, "grad_norm": 1.017645001411438, "learning_rate": 1.2798231143983211e-05, "loss": 0.3711, "step": 18910 }, { "epoch": 0.4098325135960826, "grad_norm": 1.562764048576355, "learning_rate": 1.2794963490189585e-05, "loss": 0.3099, "step": 18915 }, { "epoch": 0.4099408489155634, "grad_norm": 1.2945737838745117, "learning_rate": 1.2791695512641307e-05, "loss": 0.3367, "step": 18920 }, { "epoch": 0.41004918423504433, "grad_norm": 1.6297950744628906, "learning_rate": 1.2788427211716924e-05, "loss": 0.2631, "step": 18925 }, { "epoch": 0.4101575195545252, "grad_norm": 1.7703957557678223, "learning_rate": 1.2785158587795015e-05, "loss": 0.3151, "step": 18930 }, { "epoch": 0.410265854874006, "grad_norm": 1.959741473197937, "learning_rate": 1.2781889641254208e-05, "loss": 0.277, "step": 18935 }, { "epoch": 0.41037419019348687, "grad_norm": 1.800642728805542, "learning_rate": 1.277862037247316e-05, "loss": 0.2843, "step": 18940 }, { "epoch": 0.4104825255129677, "grad_norm": 1.7282280921936035, "learning_rate": 1.2775350781830565e-05, "loss": 0.3127, "step": 18945 }, { "epoch": 0.4105908608324486, "grad_norm": 1.968335509300232, "learning_rate": 1.2772080869705152e-05, "loss": 0.2397, "step": 18950 }, { "epoch": 0.41069919615192946, "grad_norm": 1.6514078378677368, "learning_rate": 1.2768810636475701e-05, "loss": 0.3148, "step": 18955 }, { "epoch": 0.4108075314714103, "grad_norm": 1.4772679805755615, "learning_rate": 1.2765540082521017e-05, "loss": 0.2224, "step": 18960 }, { "epoch": 0.41091586679089115, "grad_norm": 1.4280760288238525, "learning_rate": 1.2762269208219938e-05, "loss": 0.3564, "step": 18965 }, { "epoch": 0.41102420211037205, "grad_norm": 1.1827881336212158, "learning_rate": 1.275899801395135e-05, "loss": 0.2206, "step": 18970 }, { "epoch": 0.4111325374298529, "grad_norm": 2.6724767684936523, "learning_rate": 1.2755726500094173e-05, "loss": 0.2734, "step": 18975 }, { "epoch": 0.41124087274933374, "grad_norm": 2.5726232528686523, "learning_rate": 1.2752454667027361e-05, "loss": 0.3665, "step": 18980 }, { "epoch": 0.4113492080688146, "grad_norm": 1.7353408336639404, "learning_rate": 1.2749182515129908e-05, "loss": 0.3573, "step": 18985 }, { "epoch": 0.41145754338829543, "grad_norm": 1.2795530557632446, "learning_rate": 1.2745910044780843e-05, "loss": 0.2975, "step": 18990 }, { "epoch": 0.41156587870777633, "grad_norm": 1.4205665588378906, "learning_rate": 1.2742637256359235e-05, "loss": 0.378, "step": 18995 }, { "epoch": 0.4116742140272572, "grad_norm": 1.5115545988082886, "learning_rate": 1.2739364150244182e-05, "loss": 0.2557, "step": 19000 }, { "epoch": 0.411782549346738, "grad_norm": 2.0643460750579834, "learning_rate": 1.273609072681483e-05, "loss": 0.2756, "step": 19005 }, { "epoch": 0.41189088466621887, "grad_norm": 1.6315668821334839, "learning_rate": 1.2732816986450354e-05, "loss": 0.2599, "step": 19010 }, { "epoch": 0.4119992199856997, "grad_norm": 1.519851803779602, "learning_rate": 1.2729542929529967e-05, "loss": 0.2872, "step": 19015 }, { "epoch": 0.4121075553051806, "grad_norm": 1.477325677871704, "learning_rate": 1.2726268556432919e-05, "loss": 0.226, "step": 19020 }, { "epoch": 0.41221589062466146, "grad_norm": 1.4316540956497192, "learning_rate": 1.27229938675385e-05, "loss": 0.3205, "step": 19025 }, { "epoch": 0.4123242259441423, "grad_norm": 1.4282875061035156, "learning_rate": 1.2719718863226032e-05, "loss": 0.2216, "step": 19030 }, { "epoch": 0.41243256126362315, "grad_norm": 1.3727227449417114, "learning_rate": 1.2716443543874877e-05, "loss": 0.329, "step": 19035 }, { "epoch": 0.412540896583104, "grad_norm": 1.4266786575317383, "learning_rate": 1.2713167909864425e-05, "loss": 0.3307, "step": 19040 }, { "epoch": 0.4126492319025849, "grad_norm": 1.341897964477539, "learning_rate": 1.270989196157412e-05, "loss": 0.2117, "step": 19045 }, { "epoch": 0.41275756722206575, "grad_norm": 2.291257619857788, "learning_rate": 1.270661569938342e-05, "loss": 0.243, "step": 19050 }, { "epoch": 0.4128659025415466, "grad_norm": 1.5299372673034668, "learning_rate": 1.2703339123671839e-05, "loss": 0.2804, "step": 19055 }, { "epoch": 0.41297423786102744, "grad_norm": 2.285909414291382, "learning_rate": 1.2700062234818915e-05, "loss": 0.2706, "step": 19060 }, { "epoch": 0.4130825731805083, "grad_norm": 1.8875699043273926, "learning_rate": 1.2696785033204231e-05, "loss": 0.3107, "step": 19065 }, { "epoch": 0.4131909084999892, "grad_norm": 1.8570231199264526, "learning_rate": 1.2693507519207394e-05, "loss": 0.2593, "step": 19070 }, { "epoch": 0.41329924381947003, "grad_norm": 1.6800510883331299, "learning_rate": 1.2690229693208061e-05, "loss": 0.3742, "step": 19075 }, { "epoch": 0.4134075791389509, "grad_norm": 1.7729727029800415, "learning_rate": 1.268695155558592e-05, "loss": 0.2603, "step": 19080 }, { "epoch": 0.4135159144584317, "grad_norm": 1.3560080528259277, "learning_rate": 1.2683673106720693e-05, "loss": 0.3564, "step": 19085 }, { "epoch": 0.4136242497779126, "grad_norm": 2.281186819076538, "learning_rate": 1.2680394346992132e-05, "loss": 0.2585, "step": 19090 }, { "epoch": 0.41373258509739347, "grad_norm": 1.882019281387329, "learning_rate": 1.2677115276780044e-05, "loss": 0.4558, "step": 19095 }, { "epoch": 0.4138409204168743, "grad_norm": 1.9430975914001465, "learning_rate": 1.2673835896464253e-05, "loss": 0.3734, "step": 19100 }, { "epoch": 0.41394925573635516, "grad_norm": 1.9747414588928223, "learning_rate": 1.2670556206424624e-05, "loss": 0.2674, "step": 19105 }, { "epoch": 0.414057591055836, "grad_norm": 1.6816623210906982, "learning_rate": 1.2667276207041069e-05, "loss": 0.4052, "step": 19110 }, { "epoch": 0.4141659263753169, "grad_norm": 2.4378514289855957, "learning_rate": 1.2663995898693515e-05, "loss": 0.4706, "step": 19115 }, { "epoch": 0.41427426169479775, "grad_norm": 1.996065378189087, "learning_rate": 1.2660715281761949e-05, "loss": 0.3442, "step": 19120 }, { "epoch": 0.4143825970142786, "grad_norm": 1.7035189867019653, "learning_rate": 1.2657434356626373e-05, "loss": 0.3573, "step": 19125 }, { "epoch": 0.41449093233375944, "grad_norm": 1.4985154867172241, "learning_rate": 1.2654153123666832e-05, "loss": 0.3101, "step": 19130 }, { "epoch": 0.4145992676532403, "grad_norm": 2.135129928588867, "learning_rate": 1.2650871583263415e-05, "loss": 0.3812, "step": 19135 }, { "epoch": 0.4147076029727212, "grad_norm": 1.9243782758712769, "learning_rate": 1.2647589735796233e-05, "loss": 0.3154, "step": 19140 }, { "epoch": 0.41481593829220204, "grad_norm": 1.0300689935684204, "learning_rate": 1.264430758164544e-05, "loss": 0.1943, "step": 19145 }, { "epoch": 0.4149242736116829, "grad_norm": 2.2540700435638428, "learning_rate": 1.2641025121191226e-05, "loss": 0.303, "step": 19150 }, { "epoch": 0.4150326089311637, "grad_norm": 1.351027250289917, "learning_rate": 1.2637742354813815e-05, "loss": 0.3583, "step": 19155 }, { "epoch": 0.4151409442506446, "grad_norm": 1.1376057863235474, "learning_rate": 1.2634459282893468e-05, "loss": 0.3195, "step": 19160 }, { "epoch": 0.4152492795701255, "grad_norm": 1.5873523950576782, "learning_rate": 1.2631175905810476e-05, "loss": 0.2237, "step": 19165 }, { "epoch": 0.4153576148896063, "grad_norm": 2.0224153995513916, "learning_rate": 1.2627892223945169e-05, "loss": 0.3516, "step": 19170 }, { "epoch": 0.41546595020908716, "grad_norm": 1.608340859413147, "learning_rate": 1.2624608237677914e-05, "loss": 0.2482, "step": 19175 }, { "epoch": 0.415574285528568, "grad_norm": 1.1884028911590576, "learning_rate": 1.262132394738911e-05, "loss": 0.3168, "step": 19180 }, { "epoch": 0.41568262084804886, "grad_norm": 1.4188982248306274, "learning_rate": 1.2618039353459195e-05, "loss": 0.2757, "step": 19185 }, { "epoch": 0.41579095616752976, "grad_norm": 1.975359320640564, "learning_rate": 1.2614754456268638e-05, "loss": 0.2949, "step": 19190 }, { "epoch": 0.4158992914870106, "grad_norm": 1.2677204608917236, "learning_rate": 1.2611469256197947e-05, "loss": 0.3248, "step": 19195 }, { "epoch": 0.41600762680649145, "grad_norm": 1.6151645183563232, "learning_rate": 1.2608183753627664e-05, "loss": 0.2503, "step": 19200 }, { "epoch": 0.4161159621259723, "grad_norm": 0.7759866118431091, "learning_rate": 1.2604897948938367e-05, "loss": 0.2659, "step": 19205 }, { "epoch": 0.41622429744545314, "grad_norm": 1.7607827186584473, "learning_rate": 1.2601611842510657e-05, "loss": 0.2548, "step": 19210 }, { "epoch": 0.41633263276493404, "grad_norm": 1.1553770303726196, "learning_rate": 1.2598325434725189e-05, "loss": 0.2518, "step": 19215 }, { "epoch": 0.4164409680844149, "grad_norm": 1.5469844341278076, "learning_rate": 1.2595038725962643e-05, "loss": 0.3319, "step": 19220 }, { "epoch": 0.41654930340389573, "grad_norm": 1.792948842048645, "learning_rate": 1.2591751716603735e-05, "loss": 0.3454, "step": 19225 }, { "epoch": 0.4166576387233766, "grad_norm": 1.3192224502563477, "learning_rate": 1.2588464407029216e-05, "loss": 0.2348, "step": 19230 }, { "epoch": 0.4167659740428575, "grad_norm": 2.1559550762176514, "learning_rate": 1.2585176797619866e-05, "loss": 0.3792, "step": 19235 }, { "epoch": 0.4168743093623383, "grad_norm": 1.445900559425354, "learning_rate": 1.2581888888756517e-05, "loss": 0.2963, "step": 19240 }, { "epoch": 0.41698264468181917, "grad_norm": 1.8165491819381714, "learning_rate": 1.2578600680820014e-05, "loss": 0.3093, "step": 19245 }, { "epoch": 0.4170909800013, "grad_norm": 1.3117737770080566, "learning_rate": 1.2575312174191247e-05, "loss": 0.2603, "step": 19250 }, { "epoch": 0.41719931532078086, "grad_norm": 1.1116502285003662, "learning_rate": 1.2572023369251146e-05, "loss": 0.2547, "step": 19255 }, { "epoch": 0.41730765064026176, "grad_norm": 1.658128261566162, "learning_rate": 1.2568734266380664e-05, "loss": 0.2995, "step": 19260 }, { "epoch": 0.4174159859597426, "grad_norm": 1.2409156560897827, "learning_rate": 1.2565444865960798e-05, "loss": 0.3299, "step": 19265 }, { "epoch": 0.41752432127922345, "grad_norm": 1.5593494176864624, "learning_rate": 1.2562155168372574e-05, "loss": 0.3058, "step": 19270 }, { "epoch": 0.4176326565987043, "grad_norm": 0.9133288264274597, "learning_rate": 1.2558865173997058e-05, "loss": 0.2239, "step": 19275 }, { "epoch": 0.41774099191818514, "grad_norm": 1.2975454330444336, "learning_rate": 1.255557488321534e-05, "loss": 0.2289, "step": 19280 }, { "epoch": 0.41784932723766605, "grad_norm": 1.073542594909668, "learning_rate": 1.2552284296408554e-05, "loss": 0.2854, "step": 19285 }, { "epoch": 0.4179576625571469, "grad_norm": 1.928249478340149, "learning_rate": 1.2548993413957868e-05, "loss": 0.2737, "step": 19290 }, { "epoch": 0.41806599787662774, "grad_norm": 1.1865352392196655, "learning_rate": 1.2545702236244477e-05, "loss": 0.3682, "step": 19295 }, { "epoch": 0.4181743331961086, "grad_norm": 1.775322437286377, "learning_rate": 1.2542410763649617e-05, "loss": 0.3198, "step": 19300 }, { "epoch": 0.41828266851558943, "grad_norm": 1.1684764623641968, "learning_rate": 1.2539118996554555e-05, "loss": 0.356, "step": 19305 }, { "epoch": 0.41839100383507033, "grad_norm": 1.5746617317199707, "learning_rate": 1.2535826935340593e-05, "loss": 0.3316, "step": 19310 }, { "epoch": 0.4184993391545512, "grad_norm": 1.5706515312194824, "learning_rate": 1.2532534580389068e-05, "loss": 0.2979, "step": 19315 }, { "epoch": 0.418607674474032, "grad_norm": 1.708937406539917, "learning_rate": 1.252924193208135e-05, "loss": 0.3548, "step": 19320 }, { "epoch": 0.41871600979351287, "grad_norm": 1.1268706321716309, "learning_rate": 1.2525948990798843e-05, "loss": 0.2246, "step": 19325 }, { "epoch": 0.4188243451129937, "grad_norm": 1.8229331970214844, "learning_rate": 1.2522655756922983e-05, "loss": 0.3775, "step": 19330 }, { "epoch": 0.4189326804324746, "grad_norm": 1.427590012550354, "learning_rate": 1.2519362230835238e-05, "loss": 0.2742, "step": 19335 }, { "epoch": 0.41904101575195546, "grad_norm": 1.6394544839859009, "learning_rate": 1.2516068412917126e-05, "loss": 0.3188, "step": 19340 }, { "epoch": 0.4191493510714363, "grad_norm": 1.32485830783844, "learning_rate": 1.2512774303550175e-05, "loss": 0.2852, "step": 19345 }, { "epoch": 0.41925768639091715, "grad_norm": 2.0681710243225098, "learning_rate": 1.2509479903115965e-05, "loss": 0.3158, "step": 19350 }, { "epoch": 0.41936602171039805, "grad_norm": 2.305443286895752, "learning_rate": 1.2506185211996095e-05, "loss": 0.2346, "step": 19355 }, { "epoch": 0.4194743570298789, "grad_norm": 1.711524248123169, "learning_rate": 1.2502890230572217e-05, "loss": 0.2706, "step": 19360 }, { "epoch": 0.41958269234935974, "grad_norm": 1.2733957767486572, "learning_rate": 1.2499594959225998e-05, "loss": 0.3188, "step": 19365 }, { "epoch": 0.4196910276688406, "grad_norm": 1.2854007482528687, "learning_rate": 1.2496299398339144e-05, "loss": 0.259, "step": 19370 }, { "epoch": 0.41979936298832143, "grad_norm": 1.677061676979065, "learning_rate": 1.24930035482934e-05, "loss": 0.2391, "step": 19375 }, { "epoch": 0.41990769830780234, "grad_norm": 1.2170984745025635, "learning_rate": 1.2489707409470539e-05, "loss": 0.2552, "step": 19380 }, { "epoch": 0.4200160336272832, "grad_norm": 1.7815322875976562, "learning_rate": 1.2486410982252374e-05, "loss": 0.2419, "step": 19385 }, { "epoch": 0.420124368946764, "grad_norm": 2.1257517337799072, "learning_rate": 1.2483114267020742e-05, "loss": 0.2967, "step": 19390 }, { "epoch": 0.42023270426624487, "grad_norm": 1.2934297323226929, "learning_rate": 1.247981726415752e-05, "loss": 0.3116, "step": 19395 }, { "epoch": 0.4203410395857257, "grad_norm": 1.2531627416610718, "learning_rate": 1.2476519974044611e-05, "loss": 0.2312, "step": 19400 }, { "epoch": 0.4204493749052066, "grad_norm": 1.519420862197876, "learning_rate": 1.2473222397063966e-05, "loss": 0.2351, "step": 19405 }, { "epoch": 0.42055771022468746, "grad_norm": 1.478388786315918, "learning_rate": 1.2469924533597554e-05, "loss": 0.2719, "step": 19410 }, { "epoch": 0.4206660455441683, "grad_norm": 1.7572365999221802, "learning_rate": 1.2466626384027386e-05, "loss": 0.2934, "step": 19415 }, { "epoch": 0.42077438086364916, "grad_norm": 1.3622444868087769, "learning_rate": 1.2463327948735497e-05, "loss": 0.3392, "step": 19420 }, { "epoch": 0.42088271618313, "grad_norm": 1.9587879180908203, "learning_rate": 1.2460029228103969e-05, "loss": 0.2894, "step": 19425 }, { "epoch": 0.4209910515026109, "grad_norm": 1.51802396774292, "learning_rate": 1.2456730222514904e-05, "loss": 0.3504, "step": 19430 }, { "epoch": 0.42109938682209175, "grad_norm": 2.036149263381958, "learning_rate": 1.2453430932350445e-05, "loss": 0.2691, "step": 19435 }, { "epoch": 0.4212077221415726, "grad_norm": 1.2805781364440918, "learning_rate": 1.2450131357992767e-05, "loss": 0.2516, "step": 19440 }, { "epoch": 0.42131605746105344, "grad_norm": 0.8596205711364746, "learning_rate": 1.2446831499824075e-05, "loss": 0.2607, "step": 19445 }, { "epoch": 0.4214243927805343, "grad_norm": 1.557625412940979, "learning_rate": 1.2443531358226607e-05, "loss": 0.3101, "step": 19450 }, { "epoch": 0.4215327281000152, "grad_norm": 1.3329180479049683, "learning_rate": 1.2440230933582632e-05, "loss": 0.2723, "step": 19455 }, { "epoch": 0.42164106341949603, "grad_norm": 1.65608549118042, "learning_rate": 1.2436930226274457e-05, "loss": 0.3638, "step": 19460 }, { "epoch": 0.4217493987389769, "grad_norm": 1.0207602977752686, "learning_rate": 1.2433629236684424e-05, "loss": 0.2243, "step": 19465 }, { "epoch": 0.4218577340584577, "grad_norm": 1.4845168590545654, "learning_rate": 1.24303279651949e-05, "loss": 0.3353, "step": 19470 }, { "epoch": 0.42196606937793857, "grad_norm": 1.4922218322753906, "learning_rate": 1.2427026412188285e-05, "loss": 0.2732, "step": 19475 }, { "epoch": 0.42207440469741947, "grad_norm": 1.7279592752456665, "learning_rate": 1.2423724578047018e-05, "loss": 0.2636, "step": 19480 }, { "epoch": 0.4221827400169003, "grad_norm": 1.477716088294983, "learning_rate": 1.2420422463153567e-05, "loss": 0.2738, "step": 19485 }, { "epoch": 0.42229107533638116, "grad_norm": 1.8290600776672363, "learning_rate": 1.241712006789043e-05, "loss": 0.2816, "step": 19490 }, { "epoch": 0.422399410655862, "grad_norm": 3.1087827682495117, "learning_rate": 1.2413817392640143e-05, "loss": 0.3661, "step": 19495 }, { "epoch": 0.4225077459753429, "grad_norm": 1.4154586791992188, "learning_rate": 1.2410514437785268e-05, "loss": 0.3083, "step": 19500 }, { "epoch": 0.42261608129482375, "grad_norm": 0.9201440811157227, "learning_rate": 1.2407211203708407e-05, "loss": 0.3215, "step": 19505 }, { "epoch": 0.4227244166143046, "grad_norm": 1.0717718601226807, "learning_rate": 1.2403907690792188e-05, "loss": 0.2621, "step": 19510 }, { "epoch": 0.42283275193378544, "grad_norm": 1.2713078260421753, "learning_rate": 1.2400603899419273e-05, "loss": 0.3049, "step": 19515 }, { "epoch": 0.4229410872532663, "grad_norm": 1.2602678537368774, "learning_rate": 1.2397299829972357e-05, "loss": 0.3003, "step": 19520 }, { "epoch": 0.4230494225727472, "grad_norm": 2.4853944778442383, "learning_rate": 1.2393995482834167e-05, "loss": 0.2404, "step": 19525 }, { "epoch": 0.42315775789222804, "grad_norm": 1.3080211877822876, "learning_rate": 1.2390690858387462e-05, "loss": 0.3299, "step": 19530 }, { "epoch": 0.4232660932117089, "grad_norm": 1.2591514587402344, "learning_rate": 1.2387385957015034e-05, "loss": 0.3733, "step": 19535 }, { "epoch": 0.42337442853118973, "grad_norm": 2.1374080181121826, "learning_rate": 1.2384080779099708e-05, "loss": 0.3186, "step": 19540 }, { "epoch": 0.4234827638506706, "grad_norm": 1.3193942308425903, "learning_rate": 1.2380775325024336e-05, "loss": 0.2619, "step": 19545 }, { "epoch": 0.4235910991701515, "grad_norm": 2.009127140045166, "learning_rate": 1.2377469595171805e-05, "loss": 0.2736, "step": 19550 }, { "epoch": 0.4236994344896323, "grad_norm": 1.420844554901123, "learning_rate": 1.2374163589925037e-05, "loss": 0.2137, "step": 19555 }, { "epoch": 0.42380776980911317, "grad_norm": 2.0804994106292725, "learning_rate": 1.2370857309666981e-05, "loss": 0.3139, "step": 19560 }, { "epoch": 0.423916105128594, "grad_norm": 1.3525524139404297, "learning_rate": 1.2367550754780625e-05, "loss": 0.2496, "step": 19565 }, { "epoch": 0.42402444044807486, "grad_norm": 1.051523208618164, "learning_rate": 1.2364243925648977e-05, "loss": 0.3083, "step": 19570 }, { "epoch": 0.42413277576755576, "grad_norm": 1.5422004461288452, "learning_rate": 1.2360936822655086e-05, "loss": 0.2723, "step": 19575 }, { "epoch": 0.4242411110870366, "grad_norm": 1.7503447532653809, "learning_rate": 1.2357629446182031e-05, "loss": 0.3284, "step": 19580 }, { "epoch": 0.42434944640651745, "grad_norm": 1.5611625909805298, "learning_rate": 1.2354321796612925e-05, "loss": 0.2452, "step": 19585 }, { "epoch": 0.4244577817259983, "grad_norm": 1.5506441593170166, "learning_rate": 1.2351013874330905e-05, "loss": 0.3463, "step": 19590 }, { "epoch": 0.42456611704547914, "grad_norm": 1.415473461151123, "learning_rate": 1.2347705679719145e-05, "loss": 0.4111, "step": 19595 }, { "epoch": 0.42467445236496004, "grad_norm": 1.7372326850891113, "learning_rate": 1.2344397213160849e-05, "loss": 0.3224, "step": 19600 }, { "epoch": 0.4247827876844409, "grad_norm": 1.5007699728012085, "learning_rate": 1.2341088475039258e-05, "loss": 0.2204, "step": 19605 }, { "epoch": 0.42489112300392173, "grad_norm": 1.1581714153289795, "learning_rate": 1.2337779465737635e-05, "loss": 0.3126, "step": 19610 }, { "epoch": 0.4249994583234026, "grad_norm": 1.9986727237701416, "learning_rate": 1.2334470185639283e-05, "loss": 0.1821, "step": 19615 }, { "epoch": 0.4251077936428835, "grad_norm": 1.517465353012085, "learning_rate": 1.2331160635127525e-05, "loss": 0.2628, "step": 19620 }, { "epoch": 0.4252161289623643, "grad_norm": 2.38285493850708, "learning_rate": 1.2327850814585732e-05, "loss": 0.3075, "step": 19625 }, { "epoch": 0.42532446428184517, "grad_norm": 2.101522922515869, "learning_rate": 1.2324540724397294e-05, "loss": 0.2044, "step": 19630 }, { "epoch": 0.425432799601326, "grad_norm": 1.2815520763397217, "learning_rate": 1.2321230364945633e-05, "loss": 0.2137, "step": 19635 }, { "epoch": 0.42554113492080686, "grad_norm": 1.642133355140686, "learning_rate": 1.2317919736614208e-05, "loss": 0.3172, "step": 19640 }, { "epoch": 0.42564947024028776, "grad_norm": 2.057650566101074, "learning_rate": 1.2314608839786503e-05, "loss": 0.3405, "step": 19645 }, { "epoch": 0.4257578055597686, "grad_norm": 1.2964917421340942, "learning_rate": 1.2311297674846037e-05, "loss": 0.2508, "step": 19650 }, { "epoch": 0.42586614087924946, "grad_norm": 1.6971004009246826, "learning_rate": 1.230798624217636e-05, "loss": 0.3563, "step": 19655 }, { "epoch": 0.4259744761987303, "grad_norm": 1.483390212059021, "learning_rate": 1.2304674542161048e-05, "loss": 0.3352, "step": 19660 }, { "epoch": 0.42608281151821115, "grad_norm": 1.2767850160598755, "learning_rate": 1.2301362575183715e-05, "loss": 0.1936, "step": 19665 }, { "epoch": 0.42619114683769205, "grad_norm": 1.3487924337387085, "learning_rate": 1.2298050341628e-05, "loss": 0.2836, "step": 19670 }, { "epoch": 0.4262994821571729, "grad_norm": 1.299599528312683, "learning_rate": 1.229473784187758e-05, "loss": 0.3645, "step": 19675 }, { "epoch": 0.42640781747665374, "grad_norm": 1.3927654027938843, "learning_rate": 1.2291425076316156e-05, "loss": 0.3053, "step": 19680 }, { "epoch": 0.4265161527961346, "grad_norm": 2.1996371746063232, "learning_rate": 1.2288112045327463e-05, "loss": 0.3313, "step": 19685 }, { "epoch": 0.42662448811561543, "grad_norm": 1.2934768199920654, "learning_rate": 1.2284798749295268e-05, "loss": 0.2691, "step": 19690 }, { "epoch": 0.42673282343509633, "grad_norm": 1.2613588571548462, "learning_rate": 1.228148518860336e-05, "loss": 0.3328, "step": 19695 }, { "epoch": 0.4268411587545772, "grad_norm": 2.029250144958496, "learning_rate": 1.2278171363635569e-05, "loss": 0.2229, "step": 19700 }, { "epoch": 0.426949494074058, "grad_norm": 1.5590260028839111, "learning_rate": 1.2274857274775756e-05, "loss": 0.2975, "step": 19705 }, { "epoch": 0.42705782939353887, "grad_norm": 1.834998607635498, "learning_rate": 1.2271542922407801e-05, "loss": 0.2876, "step": 19710 }, { "epoch": 0.4271661647130197, "grad_norm": 1.603302001953125, "learning_rate": 1.2268228306915631e-05, "loss": 0.2459, "step": 19715 }, { "epoch": 0.4272745000325006, "grad_norm": 1.201431393623352, "learning_rate": 1.2264913428683186e-05, "loss": 0.2274, "step": 19720 }, { "epoch": 0.42738283535198146, "grad_norm": 1.6543240547180176, "learning_rate": 1.2261598288094451e-05, "loss": 0.3179, "step": 19725 }, { "epoch": 0.4274911706714623, "grad_norm": 2.0174248218536377, "learning_rate": 1.2258282885533432e-05, "loss": 0.3539, "step": 19730 }, { "epoch": 0.42759950599094315, "grad_norm": 1.5681791305541992, "learning_rate": 1.225496722138417e-05, "loss": 0.3508, "step": 19735 }, { "epoch": 0.427707841310424, "grad_norm": 1.5835598707199097, "learning_rate": 1.2251651296030735e-05, "loss": 0.296, "step": 19740 }, { "epoch": 0.4278161766299049, "grad_norm": 1.7644084692001343, "learning_rate": 1.2248335109857226e-05, "loss": 0.3311, "step": 19745 }, { "epoch": 0.42792451194938574, "grad_norm": 1.5151138305664062, "learning_rate": 1.2245018663247773e-05, "loss": 0.3101, "step": 19750 }, { "epoch": 0.4280328472688666, "grad_norm": 1.4947596788406372, "learning_rate": 1.2241701956586543e-05, "loss": 0.2701, "step": 19755 }, { "epoch": 0.42814118258834744, "grad_norm": 0.8635053038597107, "learning_rate": 1.2238384990257719e-05, "loss": 0.2049, "step": 19760 }, { "epoch": 0.42824951790782834, "grad_norm": 1.5885695219039917, "learning_rate": 1.2235067764645526e-05, "loss": 0.2568, "step": 19765 }, { "epoch": 0.4283578532273092, "grad_norm": 1.4699028730392456, "learning_rate": 1.2231750280134213e-05, "loss": 0.3296, "step": 19770 }, { "epoch": 0.42846618854679, "grad_norm": 0.8925603032112122, "learning_rate": 1.2228432537108061e-05, "loss": 0.3023, "step": 19775 }, { "epoch": 0.4285745238662709, "grad_norm": 1.4990266561508179, "learning_rate": 1.2225114535951384e-05, "loss": 0.274, "step": 19780 }, { "epoch": 0.4286828591857517, "grad_norm": 1.6422932147979736, "learning_rate": 1.2221796277048518e-05, "loss": 0.3255, "step": 19785 }, { "epoch": 0.4287911945052326, "grad_norm": 1.4685226678848267, "learning_rate": 1.2218477760783836e-05, "loss": 0.3921, "step": 19790 }, { "epoch": 0.42889952982471347, "grad_norm": 1.7450366020202637, "learning_rate": 1.2215158987541737e-05, "loss": 0.392, "step": 19795 }, { "epoch": 0.4290078651441943, "grad_norm": 1.233234167098999, "learning_rate": 1.2211839957706656e-05, "loss": 0.2848, "step": 19800 }, { "epoch": 0.42911620046367516, "grad_norm": 1.011638879776001, "learning_rate": 1.2208520671663047e-05, "loss": 0.2075, "step": 19805 }, { "epoch": 0.429224535783156, "grad_norm": 1.5626814365386963, "learning_rate": 1.2205201129795403e-05, "loss": 0.353, "step": 19810 }, { "epoch": 0.4293328711026369, "grad_norm": 1.591728925704956, "learning_rate": 1.2201881332488239e-05, "loss": 0.2485, "step": 19815 }, { "epoch": 0.42944120642211775, "grad_norm": 1.469622254371643, "learning_rate": 1.2198561280126109e-05, "loss": 0.3112, "step": 19820 }, { "epoch": 0.4295495417415986, "grad_norm": 1.2619521617889404, "learning_rate": 1.2195240973093589e-05, "loss": 0.328, "step": 19825 }, { "epoch": 0.42965787706107944, "grad_norm": 1.4507808685302734, "learning_rate": 1.2191920411775288e-05, "loss": 0.3014, "step": 19830 }, { "epoch": 0.4297662123805603, "grad_norm": 1.4504791498184204, "learning_rate": 1.2188599596555841e-05, "loss": 0.2874, "step": 19835 }, { "epoch": 0.4298745477000412, "grad_norm": 1.6858893632888794, "learning_rate": 1.2185278527819914e-05, "loss": 0.2726, "step": 19840 }, { "epoch": 0.42998288301952203, "grad_norm": 2.188246250152588, "learning_rate": 1.218195720595221e-05, "loss": 0.4433, "step": 19845 }, { "epoch": 0.4300912183390029, "grad_norm": 2.642956018447876, "learning_rate": 1.2178635631337447e-05, "loss": 0.3501, "step": 19850 }, { "epoch": 0.4301995536584837, "grad_norm": 1.0141003131866455, "learning_rate": 1.2175313804360382e-05, "loss": 0.2242, "step": 19855 }, { "epoch": 0.43030788897796457, "grad_norm": 1.3362318277359009, "learning_rate": 1.2171991725405799e-05, "loss": 0.3571, "step": 19860 }, { "epoch": 0.43041622429744547, "grad_norm": 1.1653822660446167, "learning_rate": 1.2168669394858507e-05, "loss": 0.2115, "step": 19865 }, { "epoch": 0.4305245596169263, "grad_norm": 1.3453569412231445, "learning_rate": 1.2165346813103358e-05, "loss": 0.2795, "step": 19870 }, { "epoch": 0.43063289493640716, "grad_norm": 1.3562508821487427, "learning_rate": 1.2162023980525215e-05, "loss": 0.3123, "step": 19875 }, { "epoch": 0.430741230255888, "grad_norm": 1.5774351358413696, "learning_rate": 1.2158700897508982e-05, "loss": 0.2565, "step": 19880 }, { "epoch": 0.4308495655753689, "grad_norm": 1.9353619813919067, "learning_rate": 1.2155377564439587e-05, "loss": 0.3188, "step": 19885 }, { "epoch": 0.43095790089484975, "grad_norm": 1.1666679382324219, "learning_rate": 1.215205398170199e-05, "loss": 0.4151, "step": 19890 }, { "epoch": 0.4310662362143306, "grad_norm": 1.7037465572357178, "learning_rate": 1.2148730149681176e-05, "loss": 0.2474, "step": 19895 }, { "epoch": 0.43117457153381145, "grad_norm": 1.5280734300613403, "learning_rate": 1.214540606876216e-05, "loss": 0.331, "step": 19900 }, { "epoch": 0.4312829068532923, "grad_norm": 1.850381851196289, "learning_rate": 1.2142081739329992e-05, "loss": 0.387, "step": 19905 }, { "epoch": 0.4313912421727732, "grad_norm": 1.5866045951843262, "learning_rate": 1.213875716176974e-05, "loss": 0.2683, "step": 19910 }, { "epoch": 0.43149957749225404, "grad_norm": 1.52667236328125, "learning_rate": 1.2135432336466511e-05, "loss": 0.2433, "step": 19915 }, { "epoch": 0.4316079128117349, "grad_norm": 1.5647236108779907, "learning_rate": 1.2132107263805434e-05, "loss": 0.2474, "step": 19920 }, { "epoch": 0.43171624813121573, "grad_norm": 1.7360472679138184, "learning_rate": 1.2128781944171673e-05, "loss": 0.2757, "step": 19925 }, { "epoch": 0.4318245834506966, "grad_norm": 1.219217300415039, "learning_rate": 1.2125456377950411e-05, "loss": 0.3893, "step": 19930 }, { "epoch": 0.4319329187701775, "grad_norm": 1.2598165273666382, "learning_rate": 1.2122130565526864e-05, "loss": 0.4389, "step": 19935 }, { "epoch": 0.4320412540896583, "grad_norm": 1.512257695198059, "learning_rate": 1.2118804507286283e-05, "loss": 0.2498, "step": 19940 }, { "epoch": 0.43214958940913917, "grad_norm": 1.6869540214538574, "learning_rate": 1.211547820361394e-05, "loss": 0.3345, "step": 19945 }, { "epoch": 0.43225792472862, "grad_norm": 1.7043919563293457, "learning_rate": 1.2112151654895134e-05, "loss": 0.3673, "step": 19950 }, { "epoch": 0.43236626004810086, "grad_norm": 1.408583641052246, "learning_rate": 1.2108824861515202e-05, "loss": 0.3792, "step": 19955 }, { "epoch": 0.43247459536758176, "grad_norm": 1.785232663154602, "learning_rate": 1.2105497823859498e-05, "loss": 0.3034, "step": 19960 }, { "epoch": 0.4325829306870626, "grad_norm": 3.5703155994415283, "learning_rate": 1.2102170542313414e-05, "loss": 0.2428, "step": 19965 }, { "epoch": 0.43269126600654345, "grad_norm": 2.495664596557617, "learning_rate": 1.2098843017262363e-05, "loss": 0.2862, "step": 19970 }, { "epoch": 0.4327996013260243, "grad_norm": 1.4832096099853516, "learning_rate": 1.2095515249091787e-05, "loss": 0.3547, "step": 19975 }, { "epoch": 0.43290793664550514, "grad_norm": 2.0291731357574463, "learning_rate": 1.2092187238187161e-05, "loss": 0.2889, "step": 19980 }, { "epoch": 0.43301627196498604, "grad_norm": 1.6418938636779785, "learning_rate": 1.2088858984933982e-05, "loss": 0.2554, "step": 19985 }, { "epoch": 0.4331246072844669, "grad_norm": 1.8753986358642578, "learning_rate": 1.2085530489717782e-05, "loss": 0.2924, "step": 19990 }, { "epoch": 0.43323294260394773, "grad_norm": 1.2049139738082886, "learning_rate": 1.208220175292412e-05, "loss": 0.3091, "step": 19995 }, { "epoch": 0.4333412779234286, "grad_norm": 1.681816577911377, "learning_rate": 1.2078872774938572e-05, "loss": 0.2731, "step": 20000 }, { "epoch": 0.4334496132429094, "grad_norm": 1.8826571702957153, "learning_rate": 1.2075543556146757e-05, "loss": 0.2633, "step": 20005 }, { "epoch": 0.4335579485623903, "grad_norm": 1.5579376220703125, "learning_rate": 1.207221409693431e-05, "loss": 0.2914, "step": 20010 }, { "epoch": 0.4336662838818712, "grad_norm": 1.6699905395507812, "learning_rate": 1.2068884397686904e-05, "loss": 0.3166, "step": 20015 }, { "epoch": 0.433774619201352, "grad_norm": 1.3462965488433838, "learning_rate": 1.2065554458790232e-05, "loss": 0.2831, "step": 20020 }, { "epoch": 0.43388295452083286, "grad_norm": 2.0016775131225586, "learning_rate": 1.2062224280630017e-05, "loss": 0.3641, "step": 20025 }, { "epoch": 0.43399128984031377, "grad_norm": 1.3547475337982178, "learning_rate": 1.205889386359201e-05, "loss": 0.2597, "step": 20030 }, { "epoch": 0.4340996251597946, "grad_norm": 1.6969799995422363, "learning_rate": 1.2055563208061994e-05, "loss": 0.3473, "step": 20035 }, { "epoch": 0.43420796047927546, "grad_norm": 1.6141881942749023, "learning_rate": 1.205223231442577e-05, "loss": 0.3401, "step": 20040 }, { "epoch": 0.4343162957987563, "grad_norm": 1.3213531970977783, "learning_rate": 1.2048901183069177e-05, "loss": 0.2734, "step": 20045 }, { "epoch": 0.43442463111823715, "grad_norm": 1.814285397529602, "learning_rate": 1.2045569814378076e-05, "loss": 0.2956, "step": 20050 }, { "epoch": 0.43453296643771805, "grad_norm": 1.781899333000183, "learning_rate": 1.2042238208738351e-05, "loss": 0.3224, "step": 20055 }, { "epoch": 0.4346413017571989, "grad_norm": 1.5301177501678467, "learning_rate": 1.2038906366535922e-05, "loss": 0.2358, "step": 20060 }, { "epoch": 0.43474963707667974, "grad_norm": 2.0267696380615234, "learning_rate": 1.2035574288156737e-05, "loss": 0.4237, "step": 20065 }, { "epoch": 0.4348579723961606, "grad_norm": 1.3158103227615356, "learning_rate": 1.203224197398676e-05, "loss": 0.3949, "step": 20070 }, { "epoch": 0.43496630771564143, "grad_norm": 1.4373446702957153, "learning_rate": 1.2028909424411996e-05, "loss": 0.2367, "step": 20075 }, { "epoch": 0.43507464303512233, "grad_norm": 1.4471423625946045, "learning_rate": 1.2025576639818466e-05, "loss": 0.2566, "step": 20080 }, { "epoch": 0.4351829783546032, "grad_norm": 1.796467900276184, "learning_rate": 1.2022243620592225e-05, "loss": 0.2295, "step": 20085 }, { "epoch": 0.435291313674084, "grad_norm": 2.5655016899108887, "learning_rate": 1.2018910367119358e-05, "loss": 0.3677, "step": 20090 }, { "epoch": 0.43539964899356487, "grad_norm": 1.1753073930740356, "learning_rate": 1.2015576879785966e-05, "loss": 0.2335, "step": 20095 }, { "epoch": 0.4355079843130457, "grad_norm": 1.5468775033950806, "learning_rate": 1.2012243158978185e-05, "loss": 0.3081, "step": 20100 }, { "epoch": 0.4356163196325266, "grad_norm": 1.5004163980484009, "learning_rate": 1.2008909205082175e-05, "loss": 0.305, "step": 20105 }, { "epoch": 0.43572465495200746, "grad_norm": 1.0654035806655884, "learning_rate": 1.200557501848413e-05, "loss": 0.2011, "step": 20110 }, { "epoch": 0.4358329902714883, "grad_norm": 1.8699287176132202, "learning_rate": 1.2002240599570264e-05, "loss": 0.3593, "step": 20115 }, { "epoch": 0.43594132559096915, "grad_norm": 1.621206283569336, "learning_rate": 1.1998905948726815e-05, "loss": 0.2545, "step": 20120 }, { "epoch": 0.43604966091045, "grad_norm": 2.1219069957733154, "learning_rate": 1.1995571066340059e-05, "loss": 0.2685, "step": 20125 }, { "epoch": 0.4361579962299309, "grad_norm": 1.7186524868011475, "learning_rate": 1.1992235952796287e-05, "loss": 0.2203, "step": 20130 }, { "epoch": 0.43626633154941175, "grad_norm": 1.3449358940124512, "learning_rate": 1.1988900608481821e-05, "loss": 0.2828, "step": 20135 }, { "epoch": 0.4363746668688926, "grad_norm": 1.1171256303787231, "learning_rate": 1.1985565033783018e-05, "loss": 0.2501, "step": 20140 }, { "epoch": 0.43648300218837344, "grad_norm": 1.3884426355361938, "learning_rate": 1.1982229229086247e-05, "loss": 0.3253, "step": 20145 }, { "epoch": 0.43659133750785434, "grad_norm": 1.757840871810913, "learning_rate": 1.197889319477791e-05, "loss": 0.3016, "step": 20150 }, { "epoch": 0.4366996728273352, "grad_norm": 1.3289847373962402, "learning_rate": 1.1975556931244445e-05, "loss": 0.2172, "step": 20155 }, { "epoch": 0.43680800814681603, "grad_norm": 1.0196315050125122, "learning_rate": 1.1972220438872302e-05, "loss": 0.2696, "step": 20160 }, { "epoch": 0.4369163434662969, "grad_norm": 1.866942048072815, "learning_rate": 1.1968883718047966e-05, "loss": 0.3819, "step": 20165 }, { "epoch": 0.4370246787857777, "grad_norm": 1.4790607690811157, "learning_rate": 1.1965546769157946e-05, "loss": 0.249, "step": 20170 }, { "epoch": 0.4371330141052586, "grad_norm": 1.3270409107208252, "learning_rate": 1.1962209592588773e-05, "loss": 0.2956, "step": 20175 }, { "epoch": 0.43724134942473947, "grad_norm": 1.1314234733581543, "learning_rate": 1.1958872188727015e-05, "loss": 0.3162, "step": 20180 }, { "epoch": 0.4373496847442203, "grad_norm": 1.4446830749511719, "learning_rate": 1.1955534557959258e-05, "loss": 0.2163, "step": 20185 }, { "epoch": 0.43745802006370116, "grad_norm": 1.0088635683059692, "learning_rate": 1.1952196700672116e-05, "loss": 0.2709, "step": 20190 }, { "epoch": 0.437566355383182, "grad_norm": 1.0739939212799072, "learning_rate": 1.194885861725223e-05, "loss": 0.2907, "step": 20195 }, { "epoch": 0.4376746907026629, "grad_norm": 1.4651966094970703, "learning_rate": 1.1945520308086265e-05, "loss": 0.2142, "step": 20200 }, { "epoch": 0.43778302602214375, "grad_norm": 1.3203986883163452, "learning_rate": 1.1942181773560918e-05, "loss": 0.2602, "step": 20205 }, { "epoch": 0.4378913613416246, "grad_norm": 0.909661054611206, "learning_rate": 1.1938843014062908e-05, "loss": 0.2052, "step": 20210 }, { "epoch": 0.43799969666110544, "grad_norm": 1.2020865678787231, "learning_rate": 1.193550402997898e-05, "loss": 0.2574, "step": 20215 }, { "epoch": 0.4381080319805863, "grad_norm": 1.1462056636810303, "learning_rate": 1.19321648216959e-05, "loss": 0.3771, "step": 20220 }, { "epoch": 0.4382163673000672, "grad_norm": 1.143815040588379, "learning_rate": 1.192882538960047e-05, "loss": 0.226, "step": 20225 }, { "epoch": 0.43832470261954803, "grad_norm": 1.566117525100708, "learning_rate": 1.1925485734079512e-05, "loss": 0.3029, "step": 20230 }, { "epoch": 0.4384330379390289, "grad_norm": 1.1881288290023804, "learning_rate": 1.1922145855519876e-05, "loss": 0.3392, "step": 20235 }, { "epoch": 0.4385413732585097, "grad_norm": 1.4101548194885254, "learning_rate": 1.1918805754308437e-05, "loss": 0.2238, "step": 20240 }, { "epoch": 0.43864970857799057, "grad_norm": 1.1756513118743896, "learning_rate": 1.1915465430832096e-05, "loss": 0.2524, "step": 20245 }, { "epoch": 0.4387580438974715, "grad_norm": 2.058903217315674, "learning_rate": 1.1912124885477777e-05, "loss": 0.392, "step": 20250 }, { "epoch": 0.4388663792169523, "grad_norm": 1.4713590145111084, "learning_rate": 1.1908784118632432e-05, "loss": 0.2693, "step": 20255 }, { "epoch": 0.43897471453643316, "grad_norm": 1.563079595565796, "learning_rate": 1.1905443130683046e-05, "loss": 0.2841, "step": 20260 }, { "epoch": 0.439083049855914, "grad_norm": 2.2853469848632812, "learning_rate": 1.1902101922016612e-05, "loss": 0.3003, "step": 20265 }, { "epoch": 0.43919138517539486, "grad_norm": 1.45400869846344, "learning_rate": 1.1898760493020165e-05, "loss": 0.3574, "step": 20270 }, { "epoch": 0.43929972049487576, "grad_norm": 1.9907106161117554, "learning_rate": 1.1895418844080757e-05, "loss": 0.2716, "step": 20275 }, { "epoch": 0.4394080558143566, "grad_norm": 1.790107250213623, "learning_rate": 1.1892076975585471e-05, "loss": 0.2836, "step": 20280 }, { "epoch": 0.43951639113383745, "grad_norm": 1.2517528533935547, "learning_rate": 1.1888734887921411e-05, "loss": 0.2234, "step": 20285 }, { "epoch": 0.4396247264533183, "grad_norm": 0.9370266199111938, "learning_rate": 1.1885392581475705e-05, "loss": 0.1423, "step": 20290 }, { "epoch": 0.4397330617727992, "grad_norm": 1.747635006904602, "learning_rate": 1.1882050056635514e-05, "loss": 0.2632, "step": 20295 }, { "epoch": 0.43984139709228004, "grad_norm": 2.297255277633667, "learning_rate": 1.1878707313788019e-05, "loss": 0.2488, "step": 20300 }, { "epoch": 0.4399497324117609, "grad_norm": 0.987669050693512, "learning_rate": 1.1875364353320422e-05, "loss": 0.201, "step": 20305 }, { "epoch": 0.44005806773124173, "grad_norm": 2.1959424018859863, "learning_rate": 1.1872021175619957e-05, "loss": 0.2849, "step": 20310 }, { "epoch": 0.4401664030507226, "grad_norm": 1.6924248933792114, "learning_rate": 1.1868677781073883e-05, "loss": 0.2606, "step": 20315 }, { "epoch": 0.4402747383702035, "grad_norm": 1.1155195236206055, "learning_rate": 1.1865334170069479e-05, "loss": 0.3533, "step": 20320 }, { "epoch": 0.4403830736896843, "grad_norm": 2.114103317260742, "learning_rate": 1.1861990342994054e-05, "loss": 0.2647, "step": 20325 }, { "epoch": 0.44049140900916517, "grad_norm": 1.4404489994049072, "learning_rate": 1.1858646300234946e-05, "loss": 0.2183, "step": 20330 }, { "epoch": 0.440599744328646, "grad_norm": 1.2426234483718872, "learning_rate": 1.1855302042179502e-05, "loss": 0.3314, "step": 20335 }, { "epoch": 0.44070807964812686, "grad_norm": 1.5552643537521362, "learning_rate": 1.185195756921511e-05, "loss": 0.257, "step": 20340 }, { "epoch": 0.44081641496760776, "grad_norm": 1.5412508249282837, "learning_rate": 1.1848612881729173e-05, "loss": 0.2876, "step": 20345 }, { "epoch": 0.4409247502870886, "grad_norm": 1.141156554222107, "learning_rate": 1.184526798010913e-05, "loss": 0.2708, "step": 20350 }, { "epoch": 0.44103308560656945, "grad_norm": 1.7660690546035767, "learning_rate": 1.184192286474243e-05, "loss": 0.29, "step": 20355 }, { "epoch": 0.4411414209260503, "grad_norm": 1.546645998954773, "learning_rate": 1.183857753601656e-05, "loss": 0.2472, "step": 20360 }, { "epoch": 0.44124975624553114, "grad_norm": 1.977723479270935, "learning_rate": 1.1835231994319025e-05, "loss": 0.3068, "step": 20365 }, { "epoch": 0.44135809156501205, "grad_norm": 1.6483200788497925, "learning_rate": 1.1831886240037356e-05, "loss": 0.3664, "step": 20370 }, { "epoch": 0.4414664268844929, "grad_norm": 1.2552765607833862, "learning_rate": 1.1828540273559105e-05, "loss": 0.2702, "step": 20375 }, { "epoch": 0.44157476220397374, "grad_norm": 1.3373287916183472, "learning_rate": 1.1825194095271857e-05, "loss": 0.1848, "step": 20380 }, { "epoch": 0.4416830975234546, "grad_norm": 2.0834641456604004, "learning_rate": 1.1821847705563215e-05, "loss": 0.4158, "step": 20385 }, { "epoch": 0.4417914328429354, "grad_norm": 1.4736968278884888, "learning_rate": 1.1818501104820805e-05, "loss": 0.2252, "step": 20390 }, { "epoch": 0.44189976816241633, "grad_norm": 1.6176981925964355, "learning_rate": 1.1815154293432283e-05, "loss": 0.3216, "step": 20395 }, { "epoch": 0.4420081034818972, "grad_norm": 1.8761069774627686, "learning_rate": 1.1811807271785327e-05, "loss": 0.3603, "step": 20400 }, { "epoch": 0.442116438801378, "grad_norm": 2.1178250312805176, "learning_rate": 1.1808460040267642e-05, "loss": 0.2661, "step": 20405 }, { "epoch": 0.44222477412085887, "grad_norm": 1.4544910192489624, "learning_rate": 1.180511259926695e-05, "loss": 0.3385, "step": 20410 }, { "epoch": 0.44233310944033977, "grad_norm": 1.7645683288574219, "learning_rate": 1.1801764949171007e-05, "loss": 0.264, "step": 20415 }, { "epoch": 0.4424414447598206, "grad_norm": 1.4699162244796753, "learning_rate": 1.1798417090367582e-05, "loss": 0.2832, "step": 20420 }, { "epoch": 0.44254978007930146, "grad_norm": 1.344251036643982, "learning_rate": 1.1795069023244478e-05, "loss": 0.2853, "step": 20425 }, { "epoch": 0.4426581153987823, "grad_norm": 1.3800241947174072, "learning_rate": 1.1791720748189519e-05, "loss": 0.2596, "step": 20430 }, { "epoch": 0.44276645071826315, "grad_norm": 1.2800229787826538, "learning_rate": 1.178837226559055e-05, "loss": 0.2625, "step": 20435 }, { "epoch": 0.44287478603774405, "grad_norm": 1.6473931074142456, "learning_rate": 1.1785023575835443e-05, "loss": 0.2665, "step": 20440 }, { "epoch": 0.4429831213572249, "grad_norm": 1.496649980545044, "learning_rate": 1.1781674679312096e-05, "loss": 0.3041, "step": 20445 }, { "epoch": 0.44309145667670574, "grad_norm": 1.7679375410079956, "learning_rate": 1.177832557640843e-05, "loss": 0.2423, "step": 20450 }, { "epoch": 0.4431997919961866, "grad_norm": 1.3435618877410889, "learning_rate": 1.177497626751238e-05, "loss": 0.2886, "step": 20455 }, { "epoch": 0.44330812731566743, "grad_norm": 1.3425287008285522, "learning_rate": 1.177162675301192e-05, "loss": 0.2603, "step": 20460 }, { "epoch": 0.44341646263514833, "grad_norm": 1.819329857826233, "learning_rate": 1.176827703329504e-05, "loss": 0.237, "step": 20465 }, { "epoch": 0.4435247979546292, "grad_norm": 2.340681314468384, "learning_rate": 1.1764927108749752e-05, "loss": 0.3178, "step": 20470 }, { "epoch": 0.44363313327411, "grad_norm": 1.9752037525177002, "learning_rate": 1.1761576979764101e-05, "loss": 0.3629, "step": 20475 }, { "epoch": 0.44374146859359087, "grad_norm": 1.5667136907577515, "learning_rate": 1.1758226646726145e-05, "loss": 0.2833, "step": 20480 }, { "epoch": 0.4438498039130717, "grad_norm": 1.8110405206680298, "learning_rate": 1.175487611002397e-05, "loss": 0.304, "step": 20485 }, { "epoch": 0.4439581392325526, "grad_norm": 0.8567072749137878, "learning_rate": 1.1751525370045685e-05, "loss": 0.2311, "step": 20490 }, { "epoch": 0.44406647455203346, "grad_norm": 1.2871569395065308, "learning_rate": 1.1748174427179421e-05, "loss": 0.3633, "step": 20495 }, { "epoch": 0.4441748098715143, "grad_norm": 1.5549529790878296, "learning_rate": 1.1744823281813343e-05, "loss": 0.2713, "step": 20500 }, { "epoch": 0.44428314519099515, "grad_norm": 1.2136868238449097, "learning_rate": 1.174147193433562e-05, "loss": 0.3201, "step": 20505 }, { "epoch": 0.444391480510476, "grad_norm": 1.6284921169281006, "learning_rate": 1.1738120385134463e-05, "loss": 0.2727, "step": 20510 }, { "epoch": 0.4444998158299569, "grad_norm": 1.8998486995697021, "learning_rate": 1.1734768634598094e-05, "loss": 0.2999, "step": 20515 }, { "epoch": 0.44460815114943775, "grad_norm": 2.15146541595459, "learning_rate": 1.1731416683114768e-05, "loss": 0.2849, "step": 20520 }, { "epoch": 0.4447164864689186, "grad_norm": 1.2143279314041138, "learning_rate": 1.1728064531072757e-05, "loss": 0.2243, "step": 20525 }, { "epoch": 0.44482482178839944, "grad_norm": 1.6535619497299194, "learning_rate": 1.1724712178860354e-05, "loss": 0.2358, "step": 20530 }, { "epoch": 0.4449331571078803, "grad_norm": 1.5646787881851196, "learning_rate": 1.1721359626865885e-05, "loss": 0.2326, "step": 20535 }, { "epoch": 0.4450414924273612, "grad_norm": 1.430440068244934, "learning_rate": 1.1718006875477682e-05, "loss": 0.3345, "step": 20540 }, { "epoch": 0.44514982774684203, "grad_norm": 1.415330410003662, "learning_rate": 1.1714653925084126e-05, "loss": 0.269, "step": 20545 }, { "epoch": 0.4452581630663229, "grad_norm": 1.6586127281188965, "learning_rate": 1.1711300776073595e-05, "loss": 0.3931, "step": 20550 }, { "epoch": 0.4453664983858037, "grad_norm": 1.3902291059494019, "learning_rate": 1.1707947428834503e-05, "loss": 0.1845, "step": 20555 }, { "epoch": 0.4454748337052846, "grad_norm": 1.4366352558135986, "learning_rate": 1.1704593883755287e-05, "loss": 0.2492, "step": 20560 }, { "epoch": 0.44558316902476547, "grad_norm": 1.4773569107055664, "learning_rate": 1.1701240141224406e-05, "loss": 0.3694, "step": 20565 }, { "epoch": 0.4456915043442463, "grad_norm": 1.181671142578125, "learning_rate": 1.169788620163034e-05, "loss": 0.2324, "step": 20570 }, { "epoch": 0.44579983966372716, "grad_norm": 1.0967317819595337, "learning_rate": 1.1694532065361591e-05, "loss": 0.2475, "step": 20575 }, { "epoch": 0.445908174983208, "grad_norm": 1.631986141204834, "learning_rate": 1.1691177732806685e-05, "loss": 0.2941, "step": 20580 }, { "epoch": 0.4460165103026889, "grad_norm": 0.7675203680992126, "learning_rate": 1.1687823204354172e-05, "loss": 0.2932, "step": 20585 }, { "epoch": 0.44612484562216975, "grad_norm": 1.656131625175476, "learning_rate": 1.1684468480392624e-05, "loss": 0.2584, "step": 20590 }, { "epoch": 0.4462331809416506, "grad_norm": 1.2165641784667969, "learning_rate": 1.1681113561310638e-05, "loss": 0.3736, "step": 20595 }, { "epoch": 0.44634151626113144, "grad_norm": 1.4093997478485107, "learning_rate": 1.1677758447496827e-05, "loss": 0.3348, "step": 20600 }, { "epoch": 0.4464498515806123, "grad_norm": 1.533488392829895, "learning_rate": 1.1674403139339836e-05, "loss": 0.2774, "step": 20605 }, { "epoch": 0.4465581869000932, "grad_norm": 1.9185127019882202, "learning_rate": 1.1671047637228322e-05, "loss": 0.2373, "step": 20610 }, { "epoch": 0.44666652221957404, "grad_norm": 1.724062442779541, "learning_rate": 1.166769194155097e-05, "loss": 0.2981, "step": 20615 }, { "epoch": 0.4467748575390549, "grad_norm": 2.3148839473724365, "learning_rate": 1.1664336052696489e-05, "loss": 0.2467, "step": 20620 }, { "epoch": 0.4468831928585357, "grad_norm": 1.5027488470077515, "learning_rate": 1.1660979971053612e-05, "loss": 0.2205, "step": 20625 }, { "epoch": 0.4469915281780166, "grad_norm": 1.7988144159317017, "learning_rate": 1.1657623697011082e-05, "loss": 0.4066, "step": 20630 }, { "epoch": 0.4470998634974975, "grad_norm": 1.3186553716659546, "learning_rate": 1.165426723095768e-05, "loss": 0.3148, "step": 20635 }, { "epoch": 0.4472081988169783, "grad_norm": 1.6066468954086304, "learning_rate": 1.16509105732822e-05, "loss": 0.3106, "step": 20640 }, { "epoch": 0.44731653413645917, "grad_norm": 1.318441390991211, "learning_rate": 1.1647553724373459e-05, "loss": 0.3106, "step": 20645 }, { "epoch": 0.44742486945594, "grad_norm": 1.7948834896087646, "learning_rate": 1.1644196684620302e-05, "loss": 0.2487, "step": 20650 }, { "epoch": 0.44753320477542086, "grad_norm": 2.05692195892334, "learning_rate": 1.164083945441159e-05, "loss": 0.2899, "step": 20655 }, { "epoch": 0.44764154009490176, "grad_norm": 1.2247190475463867, "learning_rate": 1.1637482034136203e-05, "loss": 0.1759, "step": 20660 }, { "epoch": 0.4477498754143826, "grad_norm": 1.3218857049942017, "learning_rate": 1.1634124424183055e-05, "loss": 0.258, "step": 20665 }, { "epoch": 0.44785821073386345, "grad_norm": 1.927891492843628, "learning_rate": 1.163076662494107e-05, "loss": 0.3358, "step": 20670 }, { "epoch": 0.4479665460533443, "grad_norm": 2.0799367427825928, "learning_rate": 1.1627408636799202e-05, "loss": 0.3069, "step": 20675 }, { "epoch": 0.4480748813728252, "grad_norm": 1.414373755455017, "learning_rate": 1.162405046014642e-05, "loss": 0.184, "step": 20680 }, { "epoch": 0.44818321669230604, "grad_norm": 2.359635829925537, "learning_rate": 1.1620692095371719e-05, "loss": 0.3726, "step": 20685 }, { "epoch": 0.4482915520117869, "grad_norm": 1.771723747253418, "learning_rate": 1.161733354286412e-05, "loss": 0.2779, "step": 20690 }, { "epoch": 0.44839988733126773, "grad_norm": 1.3699803352355957, "learning_rate": 1.161397480301266e-05, "loss": 0.1851, "step": 20695 }, { "epoch": 0.4485082226507486, "grad_norm": 1.263798713684082, "learning_rate": 1.1610615876206395e-05, "loss": 0.3087, "step": 20700 }, { "epoch": 0.4486165579702295, "grad_norm": 2.5229997634887695, "learning_rate": 1.1607256762834402e-05, "loss": 0.293, "step": 20705 }, { "epoch": 0.4487248932897103, "grad_norm": 1.6163551807403564, "learning_rate": 1.1603897463285793e-05, "loss": 0.3596, "step": 20710 }, { "epoch": 0.44883322860919117, "grad_norm": 2.260303020477295, "learning_rate": 1.1600537977949694e-05, "loss": 0.2735, "step": 20715 }, { "epoch": 0.448941563928672, "grad_norm": 1.8353618383407593, "learning_rate": 1.159717830721524e-05, "loss": 0.2675, "step": 20720 }, { "epoch": 0.44904989924815286, "grad_norm": 1.366123080253601, "learning_rate": 1.1593818451471608e-05, "loss": 0.334, "step": 20725 }, { "epoch": 0.44915823456763376, "grad_norm": 1.5771985054016113, "learning_rate": 1.1590458411107983e-05, "loss": 0.209, "step": 20730 }, { "epoch": 0.4492665698871146, "grad_norm": 1.6373778581619263, "learning_rate": 1.1587098186513576e-05, "loss": 0.223, "step": 20735 }, { "epoch": 0.44937490520659545, "grad_norm": 1.7234923839569092, "learning_rate": 1.158373777807762e-05, "loss": 0.2816, "step": 20740 }, { "epoch": 0.4494832405260763, "grad_norm": 1.6805392503738403, "learning_rate": 1.1580377186189367e-05, "loss": 0.3209, "step": 20745 }, { "epoch": 0.44959157584555715, "grad_norm": 1.584657073020935, "learning_rate": 1.1577016411238089e-05, "loss": 0.2845, "step": 20750 }, { "epoch": 0.44969991116503805, "grad_norm": 1.4880174398422241, "learning_rate": 1.1573655453613082e-05, "loss": 0.3298, "step": 20755 }, { "epoch": 0.4498082464845189, "grad_norm": 1.5569320917129517, "learning_rate": 1.1570294313703667e-05, "loss": 0.179, "step": 20760 }, { "epoch": 0.44991658180399974, "grad_norm": 1.953041434288025, "learning_rate": 1.1566932991899178e-05, "loss": 0.3215, "step": 20765 }, { "epoch": 0.4500249171234806, "grad_norm": 1.0100328922271729, "learning_rate": 1.1563571488588975e-05, "loss": 0.2404, "step": 20770 }, { "epoch": 0.45013325244296143, "grad_norm": 1.4951871633529663, "learning_rate": 1.1560209804162437e-05, "loss": 0.3081, "step": 20775 }, { "epoch": 0.45024158776244233, "grad_norm": 1.5064786672592163, "learning_rate": 1.1556847939008966e-05, "loss": 0.2727, "step": 20780 }, { "epoch": 0.4503499230819232, "grad_norm": 1.168134331703186, "learning_rate": 1.1553485893517981e-05, "loss": 0.1956, "step": 20785 }, { "epoch": 0.450458258401404, "grad_norm": 1.2282365560531616, "learning_rate": 1.1550123668078927e-05, "loss": 0.1932, "step": 20790 }, { "epoch": 0.45056659372088487, "grad_norm": 1.0949254035949707, "learning_rate": 1.1546761263081267e-05, "loss": 0.3323, "step": 20795 }, { "epoch": 0.4506749290403657, "grad_norm": 1.7329987287521362, "learning_rate": 1.1543398678914488e-05, "loss": 0.2251, "step": 20800 }, { "epoch": 0.4507832643598466, "grad_norm": 1.6032657623291016, "learning_rate": 1.1540035915968087e-05, "loss": 0.2746, "step": 20805 }, { "epoch": 0.45089159967932746, "grad_norm": 1.2671748399734497, "learning_rate": 1.1536672974631597e-05, "loss": 0.2366, "step": 20810 }, { "epoch": 0.4509999349988083, "grad_norm": 1.2935731410980225, "learning_rate": 1.1533309855294566e-05, "loss": 0.2353, "step": 20815 }, { "epoch": 0.45110827031828915, "grad_norm": 2.096259117126465, "learning_rate": 1.1529946558346553e-05, "loss": 0.2718, "step": 20820 }, { "epoch": 0.45121660563777005, "grad_norm": 2.1073713302612305, "learning_rate": 1.152658308417715e-05, "loss": 0.2796, "step": 20825 }, { "epoch": 0.4513249409572509, "grad_norm": 1.6374459266662598, "learning_rate": 1.1523219433175965e-05, "loss": 0.3229, "step": 20830 }, { "epoch": 0.45143327627673174, "grad_norm": 1.9794518947601318, "learning_rate": 1.1519855605732629e-05, "loss": 0.436, "step": 20835 }, { "epoch": 0.4515416115962126, "grad_norm": 1.1856474876403809, "learning_rate": 1.1516491602236786e-05, "loss": 0.349, "step": 20840 }, { "epoch": 0.45164994691569343, "grad_norm": 1.9817622900009155, "learning_rate": 1.1513127423078107e-05, "loss": 0.228, "step": 20845 }, { "epoch": 0.45175828223517434, "grad_norm": 1.3337138891220093, "learning_rate": 1.1509763068646288e-05, "loss": 0.3526, "step": 20850 }, { "epoch": 0.4518666175546552, "grad_norm": 2.151193141937256, "learning_rate": 1.150639853933103e-05, "loss": 0.3693, "step": 20855 }, { "epoch": 0.451974952874136, "grad_norm": 0.7772493362426758, "learning_rate": 1.1503033835522066e-05, "loss": 0.3385, "step": 20860 }, { "epoch": 0.4520832881936169, "grad_norm": 1.8474503755569458, "learning_rate": 1.149966895760915e-05, "loss": 0.3527, "step": 20865 }, { "epoch": 0.4521916235130977, "grad_norm": 1.2125946283340454, "learning_rate": 1.1496303905982046e-05, "loss": 0.2097, "step": 20870 }, { "epoch": 0.4522999588325786, "grad_norm": 1.067652702331543, "learning_rate": 1.1492938681030551e-05, "loss": 0.2692, "step": 20875 }, { "epoch": 0.45240829415205946, "grad_norm": 2.0059640407562256, "learning_rate": 1.1489573283144477e-05, "loss": 0.2689, "step": 20880 }, { "epoch": 0.4525166294715403, "grad_norm": 1.5303831100463867, "learning_rate": 1.148620771271365e-05, "loss": 0.2667, "step": 20885 }, { "epoch": 0.45262496479102116, "grad_norm": 0.885542094707489, "learning_rate": 1.1482841970127922e-05, "loss": 0.1949, "step": 20890 }, { "epoch": 0.452733300110502, "grad_norm": 1.128376841545105, "learning_rate": 1.1479476055777167e-05, "loss": 0.2677, "step": 20895 }, { "epoch": 0.4528416354299829, "grad_norm": 1.2796732187271118, "learning_rate": 1.1476109970051272e-05, "loss": 0.268, "step": 20900 }, { "epoch": 0.45294997074946375, "grad_norm": 1.3968491554260254, "learning_rate": 1.147274371334015e-05, "loss": 0.2885, "step": 20905 }, { "epoch": 0.4530583060689446, "grad_norm": 2.989964246749878, "learning_rate": 1.1469377286033729e-05, "loss": 0.3107, "step": 20910 }, { "epoch": 0.45316664138842544, "grad_norm": 1.3700673580169678, "learning_rate": 1.1466010688521962e-05, "loss": 0.2259, "step": 20915 }, { "epoch": 0.4532749767079063, "grad_norm": 1.1285089254379272, "learning_rate": 1.1462643921194819e-05, "loss": 0.3604, "step": 20920 }, { "epoch": 0.4533833120273872, "grad_norm": 2.0835630893707275, "learning_rate": 1.1459276984442283e-05, "loss": 0.3155, "step": 20925 }, { "epoch": 0.45349164734686803, "grad_norm": 1.2538572549819946, "learning_rate": 1.1455909878654372e-05, "loss": 0.2638, "step": 20930 }, { "epoch": 0.4535999826663489, "grad_norm": 1.817663311958313, "learning_rate": 1.1452542604221113e-05, "loss": 0.3266, "step": 20935 }, { "epoch": 0.4537083179858297, "grad_norm": 1.2501437664031982, "learning_rate": 1.1449175161532551e-05, "loss": 0.2292, "step": 20940 }, { "epoch": 0.4538166533053106, "grad_norm": 2.3866448402404785, "learning_rate": 1.1445807550978751e-05, "loss": 0.2794, "step": 20945 }, { "epoch": 0.45392498862479147, "grad_norm": 1.3896849155426025, "learning_rate": 1.144243977294981e-05, "loss": 0.2531, "step": 20950 }, { "epoch": 0.4540333239442723, "grad_norm": 0.9149044156074524, "learning_rate": 1.1439071827835826e-05, "loss": 0.2116, "step": 20955 }, { "epoch": 0.45414165926375316, "grad_norm": 1.2391972541809082, "learning_rate": 1.1435703716026926e-05, "loss": 0.2029, "step": 20960 }, { "epoch": 0.454249994583234, "grad_norm": 1.7485889196395874, "learning_rate": 1.1432335437913262e-05, "loss": 0.2802, "step": 20965 }, { "epoch": 0.4543583299027149, "grad_norm": 1.0132780075073242, "learning_rate": 1.1428966993884991e-05, "loss": 0.2013, "step": 20970 }, { "epoch": 0.45446666522219575, "grad_norm": 0.9417344331741333, "learning_rate": 1.1425598384332302e-05, "loss": 0.2812, "step": 20975 }, { "epoch": 0.4545750005416766, "grad_norm": 1.7612851858139038, "learning_rate": 1.1422229609645394e-05, "loss": 0.2708, "step": 20980 }, { "epoch": 0.45468333586115744, "grad_norm": 1.5501033067703247, "learning_rate": 1.1418860670214492e-05, "loss": 0.32, "step": 20985 }, { "epoch": 0.4547916711806383, "grad_norm": 2.8559188842773438, "learning_rate": 1.1415491566429836e-05, "loss": 0.293, "step": 20990 }, { "epoch": 0.4549000065001192, "grad_norm": 1.358201265335083, "learning_rate": 1.1412122298681683e-05, "loss": 0.2764, "step": 20995 }, { "epoch": 0.45500834181960004, "grad_norm": 1.4163761138916016, "learning_rate": 1.1408752867360315e-05, "loss": 0.2405, "step": 21000 }, { "epoch": 0.4551166771390809, "grad_norm": 1.2913637161254883, "learning_rate": 1.1405383272856034e-05, "loss": 0.2128, "step": 21005 }, { "epoch": 0.45522501245856173, "grad_norm": 1.1802886724472046, "learning_rate": 1.1402013515559154e-05, "loss": 0.2541, "step": 21010 }, { "epoch": 0.4553333477780426, "grad_norm": 1.639206886291504, "learning_rate": 1.1398643595860008e-05, "loss": 0.2918, "step": 21015 }, { "epoch": 0.4554416830975235, "grad_norm": 1.160046100616455, "learning_rate": 1.1395273514148952e-05, "loss": 0.2939, "step": 21020 }, { "epoch": 0.4555500184170043, "grad_norm": 1.6330854892730713, "learning_rate": 1.1391903270816364e-05, "loss": 0.3116, "step": 21025 }, { "epoch": 0.45565835373648517, "grad_norm": 1.3640272617340088, "learning_rate": 1.138853286625263e-05, "loss": 0.2857, "step": 21030 }, { "epoch": 0.455766689055966, "grad_norm": 1.3585312366485596, "learning_rate": 1.1385162300848164e-05, "loss": 0.2628, "step": 21035 }, { "epoch": 0.45587502437544686, "grad_norm": 1.500733494758606, "learning_rate": 1.1381791574993397e-05, "loss": 0.3213, "step": 21040 }, { "epoch": 0.45598335969492776, "grad_norm": 1.5788037776947021, "learning_rate": 1.1378420689078773e-05, "loss": 0.2077, "step": 21045 }, { "epoch": 0.4560916950144086, "grad_norm": 1.473830223083496, "learning_rate": 1.1375049643494761e-05, "loss": 0.2419, "step": 21050 }, { "epoch": 0.45620003033388945, "grad_norm": 1.9005963802337646, "learning_rate": 1.137167843863185e-05, "loss": 0.3089, "step": 21055 }, { "epoch": 0.4563083656533703, "grad_norm": 1.593883752822876, "learning_rate": 1.1368307074880538e-05, "loss": 0.2805, "step": 21060 }, { "epoch": 0.45641670097285114, "grad_norm": 1.2113124132156372, "learning_rate": 1.1364935552631347e-05, "loss": 0.2603, "step": 21065 }, { "epoch": 0.45652503629233204, "grad_norm": 1.9489109516143799, "learning_rate": 1.1361563872274817e-05, "loss": 0.2955, "step": 21070 }, { "epoch": 0.4566333716118129, "grad_norm": 0.9961859583854675, "learning_rate": 1.1358192034201512e-05, "loss": 0.2411, "step": 21075 }, { "epoch": 0.45674170693129373, "grad_norm": 1.2953344583511353, "learning_rate": 1.1354820038802003e-05, "loss": 0.2677, "step": 21080 }, { "epoch": 0.4568500422507746, "grad_norm": 1.1163673400878906, "learning_rate": 1.1351447886466891e-05, "loss": 0.2508, "step": 21085 }, { "epoch": 0.4569583775702555, "grad_norm": 2.05781626701355, "learning_rate": 1.1348075577586783e-05, "loss": 0.2999, "step": 21090 }, { "epoch": 0.4570667128897363, "grad_norm": 1.1181803941726685, "learning_rate": 1.1344703112552315e-05, "loss": 0.3551, "step": 21095 }, { "epoch": 0.45717504820921717, "grad_norm": 0.975010097026825, "learning_rate": 1.1341330491754137e-05, "loss": 0.2455, "step": 21100 }, { "epoch": 0.457283383528698, "grad_norm": 0.9710100889205933, "learning_rate": 1.1337957715582912e-05, "loss": 0.2012, "step": 21105 }, { "epoch": 0.45739171884817886, "grad_norm": 1.2972556352615356, "learning_rate": 1.1334584784429328e-05, "loss": 0.3323, "step": 21110 }, { "epoch": 0.45750005416765976, "grad_norm": 1.0611765384674072, "learning_rate": 1.1331211698684087e-05, "loss": 0.3244, "step": 21115 }, { "epoch": 0.4576083894871406, "grad_norm": 0.9493811726570129, "learning_rate": 1.1327838458737917e-05, "loss": 0.1728, "step": 21120 }, { "epoch": 0.45771672480662146, "grad_norm": 1.5980738401412964, "learning_rate": 1.132446506498155e-05, "loss": 0.274, "step": 21125 }, { "epoch": 0.4578250601261023, "grad_norm": 1.693533182144165, "learning_rate": 1.1321091517805746e-05, "loss": 0.2266, "step": 21130 }, { "epoch": 0.45793339544558315, "grad_norm": 1.4165446758270264, "learning_rate": 1.131771781760128e-05, "loss": 0.3697, "step": 21135 }, { "epoch": 0.45804173076506405, "grad_norm": 1.5020962953567505, "learning_rate": 1.1314343964758945e-05, "loss": 0.2703, "step": 21140 }, { "epoch": 0.4581500660845449, "grad_norm": 1.127817988395691, "learning_rate": 1.1310969959669548e-05, "loss": 0.3266, "step": 21145 }, { "epoch": 0.45825840140402574, "grad_norm": 2.057835817337036, "learning_rate": 1.1307595802723922e-05, "loss": 0.2564, "step": 21150 }, { "epoch": 0.4583667367235066, "grad_norm": 2.129873752593994, "learning_rate": 1.1304221494312909e-05, "loss": 0.2608, "step": 21155 }, { "epoch": 0.45847507204298743, "grad_norm": 1.6885799169540405, "learning_rate": 1.1300847034827373e-05, "loss": 0.3133, "step": 21160 }, { "epoch": 0.45858340736246833, "grad_norm": 0.975649356842041, "learning_rate": 1.1297472424658194e-05, "loss": 0.2278, "step": 21165 }, { "epoch": 0.4586917426819492, "grad_norm": 2.122814416885376, "learning_rate": 1.129409766419627e-05, "loss": 0.2288, "step": 21170 }, { "epoch": 0.45880007800143, "grad_norm": 1.5365674495697021, "learning_rate": 1.129072275383252e-05, "loss": 0.2782, "step": 21175 }, { "epoch": 0.45890841332091087, "grad_norm": 1.2923765182495117, "learning_rate": 1.1287347693957874e-05, "loss": 0.1765, "step": 21180 }, { "epoch": 0.4590167486403917, "grad_norm": 1.3049092292785645, "learning_rate": 1.1283972484963282e-05, "loss": 0.3182, "step": 21185 }, { "epoch": 0.4591250839598726, "grad_norm": 1.246934413909912, "learning_rate": 1.1280597127239707e-05, "loss": 0.4304, "step": 21190 }, { "epoch": 0.45923341927935346, "grad_norm": 1.4446886777877808, "learning_rate": 1.1277221621178143e-05, "loss": 0.3264, "step": 21195 }, { "epoch": 0.4593417545988343, "grad_norm": 1.3790982961654663, "learning_rate": 1.1273845967169585e-05, "loss": 0.2564, "step": 21200 }, { "epoch": 0.45945008991831515, "grad_norm": 2.2596018314361572, "learning_rate": 1.1270470165605054e-05, "loss": 0.3139, "step": 21205 }, { "epoch": 0.45955842523779605, "grad_norm": 1.4085866212844849, "learning_rate": 1.1267094216875584e-05, "loss": 0.2447, "step": 21210 }, { "epoch": 0.4596667605572769, "grad_norm": 2.4341866970062256, "learning_rate": 1.1263718121372236e-05, "loss": 0.3477, "step": 21215 }, { "epoch": 0.45977509587675774, "grad_norm": 1.1109087467193604, "learning_rate": 1.1260341879486071e-05, "loss": 0.2728, "step": 21220 }, { "epoch": 0.4598834311962386, "grad_norm": 1.4939824342727661, "learning_rate": 1.1256965491608178e-05, "loss": 0.2696, "step": 21225 }, { "epoch": 0.45999176651571944, "grad_norm": 1.646562099456787, "learning_rate": 1.1253588958129664e-05, "loss": 0.1959, "step": 21230 }, { "epoch": 0.46010010183520034, "grad_norm": 2.234138011932373, "learning_rate": 1.1250212279441643e-05, "loss": 0.416, "step": 21235 }, { "epoch": 0.4602084371546812, "grad_norm": 1.8611599206924438, "learning_rate": 1.1246835455935263e-05, "loss": 0.2449, "step": 21240 }, { "epoch": 0.46031677247416203, "grad_norm": 1.0695247650146484, "learning_rate": 1.1243458488001673e-05, "loss": 0.3473, "step": 21245 }, { "epoch": 0.4604251077936429, "grad_norm": 1.6102508306503296, "learning_rate": 1.1240081376032041e-05, "loss": 0.3373, "step": 21250 }, { "epoch": 0.4605334431131237, "grad_norm": 2.5103471279144287, "learning_rate": 1.1236704120417561e-05, "loss": 0.3364, "step": 21255 }, { "epoch": 0.4606417784326046, "grad_norm": 1.5991356372833252, "learning_rate": 1.1233326721549433e-05, "loss": 0.1782, "step": 21260 }, { "epoch": 0.46075011375208547, "grad_norm": 1.4443439245224, "learning_rate": 1.122994917981888e-05, "loss": 0.3318, "step": 21265 }, { "epoch": 0.4608584490715663, "grad_norm": 1.0303215980529785, "learning_rate": 1.1226571495617139e-05, "loss": 0.1631, "step": 21270 }, { "epoch": 0.46096678439104716, "grad_norm": 1.023313283920288, "learning_rate": 1.1223193669335464e-05, "loss": 0.1958, "step": 21275 }, { "epoch": 0.461075119710528, "grad_norm": 1.7307326793670654, "learning_rate": 1.1219815701365127e-05, "loss": 0.2228, "step": 21280 }, { "epoch": 0.4611834550300089, "grad_norm": 1.1851491928100586, "learning_rate": 1.121643759209741e-05, "loss": 0.2881, "step": 21285 }, { "epoch": 0.46129179034948975, "grad_norm": 1.4508994817733765, "learning_rate": 1.1213059341923622e-05, "loss": 0.2371, "step": 21290 }, { "epoch": 0.4614001256689706, "grad_norm": 1.4976718425750732, "learning_rate": 1.1209680951235082e-05, "loss": 0.2298, "step": 21295 }, { "epoch": 0.46150846098845144, "grad_norm": 0.914280354976654, "learning_rate": 1.1206302420423128e-05, "loss": 0.2723, "step": 21300 }, { "epoch": 0.4616167963079323, "grad_norm": 1.5614346265792847, "learning_rate": 1.1202923749879107e-05, "loss": 0.3341, "step": 21305 }, { "epoch": 0.4617251316274132, "grad_norm": 1.999060034751892, "learning_rate": 1.1199544939994385e-05, "loss": 0.2887, "step": 21310 }, { "epoch": 0.46183346694689403, "grad_norm": 1.2396622896194458, "learning_rate": 1.1196165991160356e-05, "loss": 0.3165, "step": 21315 }, { "epoch": 0.4619418022663749, "grad_norm": 1.6046044826507568, "learning_rate": 1.119278690376841e-05, "loss": 0.2633, "step": 21320 }, { "epoch": 0.4620501375858557, "grad_norm": 1.5128440856933594, "learning_rate": 1.1189407678209974e-05, "loss": 0.325, "step": 21325 }, { "epoch": 0.46215847290533657, "grad_norm": 1.6025211811065674, "learning_rate": 1.1186028314876476e-05, "loss": 0.2898, "step": 21330 }, { "epoch": 0.46226680822481747, "grad_norm": 1.3952606916427612, "learning_rate": 1.1182648814159363e-05, "loss": 0.2133, "step": 21335 }, { "epoch": 0.4623751435442983, "grad_norm": 1.3741977214813232, "learning_rate": 1.11792691764501e-05, "loss": 0.3375, "step": 21340 }, { "epoch": 0.46248347886377916, "grad_norm": 1.241239309310913, "learning_rate": 1.117588940214017e-05, "loss": 0.321, "step": 21345 }, { "epoch": 0.46259181418326, "grad_norm": 1.4685618877410889, "learning_rate": 1.1172509491621067e-05, "loss": 0.3225, "step": 21350 }, { "epoch": 0.4627001495027409, "grad_norm": 0.9478353261947632, "learning_rate": 1.1169129445284301e-05, "loss": 0.3416, "step": 21355 }, { "epoch": 0.46280848482222176, "grad_norm": 1.480036735534668, "learning_rate": 1.1165749263521404e-05, "loss": 0.2708, "step": 21360 }, { "epoch": 0.4629168201417026, "grad_norm": 1.3044209480285645, "learning_rate": 1.1162368946723918e-05, "loss": 0.2926, "step": 21365 }, { "epoch": 0.46302515546118345, "grad_norm": 1.4434552192687988, "learning_rate": 1.1158988495283403e-05, "loss": 0.256, "step": 21370 }, { "epoch": 0.4631334907806643, "grad_norm": 1.264391303062439, "learning_rate": 1.1155607909591432e-05, "loss": 0.2468, "step": 21375 }, { "epoch": 0.4632418261001452, "grad_norm": 1.4335517883300781, "learning_rate": 1.1152227190039596e-05, "loss": 0.2091, "step": 21380 }, { "epoch": 0.46335016141962604, "grad_norm": 1.9772822856903076, "learning_rate": 1.1148846337019498e-05, "loss": 0.2678, "step": 21385 }, { "epoch": 0.4634584967391069, "grad_norm": 1.057360291481018, "learning_rate": 1.1145465350922765e-05, "loss": 0.1837, "step": 21390 }, { "epoch": 0.46356683205858773, "grad_norm": 1.7562272548675537, "learning_rate": 1.1142084232141028e-05, "loss": 0.2242, "step": 21395 }, { "epoch": 0.4636751673780686, "grad_norm": 0.9808679223060608, "learning_rate": 1.1138702981065941e-05, "loss": 0.3487, "step": 21400 }, { "epoch": 0.4637835026975495, "grad_norm": 1.5894622802734375, "learning_rate": 1.113532159808917e-05, "loss": 0.298, "step": 21405 }, { "epoch": 0.4638918380170303, "grad_norm": 1.6217873096466064, "learning_rate": 1.1131940083602401e-05, "loss": 0.3924, "step": 21410 }, { "epoch": 0.46400017333651117, "grad_norm": 1.4633328914642334, "learning_rate": 1.112855843799733e-05, "loss": 0.2442, "step": 21415 }, { "epoch": 0.464108508655992, "grad_norm": 2.0069022178649902, "learning_rate": 1.1125176661665673e-05, "loss": 0.2541, "step": 21420 }, { "epoch": 0.46421684397547286, "grad_norm": 1.6906293630599976, "learning_rate": 1.1121794754999151e-05, "loss": 0.253, "step": 21425 }, { "epoch": 0.46432517929495376, "grad_norm": 1.1985243558883667, "learning_rate": 1.1118412718389511e-05, "loss": 0.3181, "step": 21430 }, { "epoch": 0.4644335146144346, "grad_norm": 1.1023037433624268, "learning_rate": 1.1115030552228512e-05, "loss": 0.2196, "step": 21435 }, { "epoch": 0.46454184993391545, "grad_norm": 1.39805269241333, "learning_rate": 1.1111648256907925e-05, "loss": 0.361, "step": 21440 }, { "epoch": 0.4646501852533963, "grad_norm": 1.1871337890625, "learning_rate": 1.1108265832819542e-05, "loss": 0.3475, "step": 21445 }, { "epoch": 0.46475852057287714, "grad_norm": 1.8030835390090942, "learning_rate": 1.1104883280355164e-05, "loss": 0.2512, "step": 21450 }, { "epoch": 0.46486685589235804, "grad_norm": 1.407251238822937, "learning_rate": 1.110150059990661e-05, "loss": 0.2867, "step": 21455 }, { "epoch": 0.4649751912118389, "grad_norm": 1.5764896869659424, "learning_rate": 1.109811779186571e-05, "loss": 0.3345, "step": 21460 }, { "epoch": 0.46508352653131974, "grad_norm": 1.6192262172698975, "learning_rate": 1.1094734856624313e-05, "loss": 0.3001, "step": 21465 }, { "epoch": 0.4651918618508006, "grad_norm": 1.630700707435608, "learning_rate": 1.1091351794574285e-05, "loss": 0.1597, "step": 21470 }, { "epoch": 0.4653001971702815, "grad_norm": 0.8644784688949585, "learning_rate": 1.1087968606107495e-05, "loss": 0.1597, "step": 21475 }, { "epoch": 0.4654085324897623, "grad_norm": 1.779465913772583, "learning_rate": 1.1084585291615844e-05, "loss": 0.3378, "step": 21480 }, { "epoch": 0.4655168678092432, "grad_norm": 1.7346127033233643, "learning_rate": 1.1081201851491235e-05, "loss": 0.3364, "step": 21485 }, { "epoch": 0.465625203128724, "grad_norm": 1.8952960968017578, "learning_rate": 1.1077818286125585e-05, "loss": 0.2441, "step": 21490 }, { "epoch": 0.46573353844820486, "grad_norm": 1.6790586709976196, "learning_rate": 1.1074434595910833e-05, "loss": 0.3005, "step": 21495 }, { "epoch": 0.46584187376768577, "grad_norm": 1.4924036264419556, "learning_rate": 1.1071050781238931e-05, "loss": 0.2571, "step": 21500 }, { "epoch": 0.4659502090871666, "grad_norm": 1.088875412940979, "learning_rate": 1.1067666842501837e-05, "loss": 0.2729, "step": 21505 }, { "epoch": 0.46605854440664746, "grad_norm": 1.8621054887771606, "learning_rate": 1.1064282780091533e-05, "loss": 0.291, "step": 21510 }, { "epoch": 0.4661668797261283, "grad_norm": 1.6712291240692139, "learning_rate": 1.1060898594400013e-05, "loss": 0.2603, "step": 21515 }, { "epoch": 0.46627521504560915, "grad_norm": 1.2269396781921387, "learning_rate": 1.1057514285819281e-05, "loss": 0.3254, "step": 21520 }, { "epoch": 0.46638355036509005, "grad_norm": 1.2874369621276855, "learning_rate": 1.1054129854741363e-05, "loss": 0.2143, "step": 21525 }, { "epoch": 0.4664918856845709, "grad_norm": 1.2812416553497314, "learning_rate": 1.105074530155829e-05, "loss": 0.3154, "step": 21530 }, { "epoch": 0.46660022100405174, "grad_norm": 1.1080070734024048, "learning_rate": 1.1047360626662116e-05, "loss": 0.2304, "step": 21535 }, { "epoch": 0.4667085563235326, "grad_norm": 1.693971872329712, "learning_rate": 1.1043975830444904e-05, "loss": 0.4241, "step": 21540 }, { "epoch": 0.46681689164301343, "grad_norm": 1.7338389158248901, "learning_rate": 1.104059091329873e-05, "loss": 0.2508, "step": 21545 }, { "epoch": 0.46692522696249433, "grad_norm": 1.5174287557601929, "learning_rate": 1.1037205875615685e-05, "loss": 0.3814, "step": 21550 }, { "epoch": 0.4670335622819752, "grad_norm": 2.0763437747955322, "learning_rate": 1.1033820717787875e-05, "loss": 0.2604, "step": 21555 }, { "epoch": 0.467141897601456, "grad_norm": 1.5678631067276, "learning_rate": 1.1030435440207425e-05, "loss": 0.3315, "step": 21560 }, { "epoch": 0.46725023292093687, "grad_norm": 2.4862399101257324, "learning_rate": 1.1027050043266465e-05, "loss": 0.2895, "step": 21565 }, { "epoch": 0.4673585682404177, "grad_norm": 1.7036619186401367, "learning_rate": 1.102366452735714e-05, "loss": 0.2865, "step": 21570 }, { "epoch": 0.4674669035598986, "grad_norm": 1.8677027225494385, "learning_rate": 1.1020278892871616e-05, "loss": 0.2733, "step": 21575 }, { "epoch": 0.46757523887937946, "grad_norm": 2.4063923358917236, "learning_rate": 1.1016893140202068e-05, "loss": 0.2778, "step": 21580 }, { "epoch": 0.4676835741988603, "grad_norm": 1.21476149559021, "learning_rate": 1.101350726974068e-05, "loss": 0.2738, "step": 21585 }, { "epoch": 0.46779190951834115, "grad_norm": 1.9601377248764038, "learning_rate": 1.1010121281879658e-05, "loss": 0.3455, "step": 21590 }, { "epoch": 0.467900244837822, "grad_norm": 0.9744799137115479, "learning_rate": 1.1006735177011216e-05, "loss": 0.2862, "step": 21595 }, { "epoch": 0.4680085801573029, "grad_norm": 1.457972764968872, "learning_rate": 1.1003348955527585e-05, "loss": 0.4208, "step": 21600 }, { "epoch": 0.46811691547678375, "grad_norm": 2.048581600189209, "learning_rate": 1.0999962617821008e-05, "loss": 0.2804, "step": 21605 }, { "epoch": 0.4682252507962646, "grad_norm": 1.4093960523605347, "learning_rate": 1.0996576164283741e-05, "loss": 0.2118, "step": 21610 }, { "epoch": 0.46833358611574544, "grad_norm": 1.4106910228729248, "learning_rate": 1.0993189595308055e-05, "loss": 0.2145, "step": 21615 }, { "epoch": 0.46844192143522634, "grad_norm": 1.9329876899719238, "learning_rate": 1.0989802911286232e-05, "loss": 0.3339, "step": 21620 }, { "epoch": 0.4685502567547072, "grad_norm": 1.738379955291748, "learning_rate": 1.098641611261057e-05, "loss": 0.2195, "step": 21625 }, { "epoch": 0.46865859207418803, "grad_norm": 1.633945107460022, "learning_rate": 1.0983029199673376e-05, "loss": 0.2418, "step": 21630 }, { "epoch": 0.4687669273936689, "grad_norm": 2.460773468017578, "learning_rate": 1.0979642172866975e-05, "loss": 0.341, "step": 21635 }, { "epoch": 0.4688752627131497, "grad_norm": 1.1356309652328491, "learning_rate": 1.0976255032583705e-05, "loss": 0.2932, "step": 21640 }, { "epoch": 0.4689835980326306, "grad_norm": 1.7019648551940918, "learning_rate": 1.097286777921591e-05, "loss": 0.3998, "step": 21645 }, { "epoch": 0.46909193335211147, "grad_norm": 1.4527226686477661, "learning_rate": 1.0969480413155959e-05, "loss": 0.3345, "step": 21650 }, { "epoch": 0.4692002686715923, "grad_norm": 1.2883621454238892, "learning_rate": 1.0966092934796226e-05, "loss": 0.3255, "step": 21655 }, { "epoch": 0.46930860399107316, "grad_norm": 1.211188793182373, "learning_rate": 1.0962705344529099e-05, "loss": 0.2208, "step": 21660 }, { "epoch": 0.469416939310554, "grad_norm": 1.366952896118164, "learning_rate": 1.0959317642746978e-05, "loss": 0.3283, "step": 21665 }, { "epoch": 0.4695252746300349, "grad_norm": 1.3167698383331299, "learning_rate": 1.0955929829842275e-05, "loss": 0.2389, "step": 21670 }, { "epoch": 0.46963360994951575, "grad_norm": 2.4275362491607666, "learning_rate": 1.0952541906207426e-05, "loss": 0.2903, "step": 21675 }, { "epoch": 0.4697419452689966, "grad_norm": 1.219658613204956, "learning_rate": 1.0949153872234866e-05, "loss": 0.3124, "step": 21680 }, { "epoch": 0.46985028058847744, "grad_norm": 2.0519704818725586, "learning_rate": 1.0945765728317047e-05, "loss": 0.3227, "step": 21685 }, { "epoch": 0.4699586159079583, "grad_norm": 1.4353002309799194, "learning_rate": 1.0942377474846436e-05, "loss": 0.3127, "step": 21690 }, { "epoch": 0.4700669512274392, "grad_norm": 1.3836493492126465, "learning_rate": 1.0938989112215507e-05, "loss": 0.3495, "step": 21695 }, { "epoch": 0.47017528654692003, "grad_norm": 0.7988618612289429, "learning_rate": 1.0935600640816763e-05, "loss": 0.2593, "step": 21700 }, { "epoch": 0.4702836218664009, "grad_norm": 1.6345713138580322, "learning_rate": 1.0932212061042698e-05, "loss": 0.3972, "step": 21705 }, { "epoch": 0.4703919571858817, "grad_norm": 1.5552207231521606, "learning_rate": 1.0928823373285828e-05, "loss": 0.2581, "step": 21710 }, { "epoch": 0.47050029250536257, "grad_norm": 2.3735363483428955, "learning_rate": 1.0925434577938684e-05, "loss": 0.3553, "step": 21715 }, { "epoch": 0.4706086278248435, "grad_norm": 1.2418217658996582, "learning_rate": 1.0922045675393807e-05, "loss": 0.1962, "step": 21720 }, { "epoch": 0.4707169631443243, "grad_norm": 2.211921453475952, "learning_rate": 1.0918656666043752e-05, "loss": 0.3534, "step": 21725 }, { "epoch": 0.47082529846380516, "grad_norm": 1.6788884401321411, "learning_rate": 1.0915267550281083e-05, "loss": 0.2544, "step": 21730 }, { "epoch": 0.470933633783286, "grad_norm": 1.3833736181259155, "learning_rate": 1.091187832849838e-05, "loss": 0.2897, "step": 21735 }, { "epoch": 0.4710419691027669, "grad_norm": 1.3237824440002441, "learning_rate": 1.0908489001088235e-05, "loss": 0.2055, "step": 21740 }, { "epoch": 0.47115030442224776, "grad_norm": 1.5537189245224, "learning_rate": 1.0905099568443242e-05, "loss": 0.2814, "step": 21745 }, { "epoch": 0.4712586397417286, "grad_norm": 1.636244773864746, "learning_rate": 1.0901710030956027e-05, "loss": 0.3409, "step": 21750 }, { "epoch": 0.47136697506120945, "grad_norm": 2.07462477684021, "learning_rate": 1.0898320389019212e-05, "loss": 0.2551, "step": 21755 }, { "epoch": 0.4714753103806903, "grad_norm": 1.965335488319397, "learning_rate": 1.0894930643025436e-05, "loss": 0.2783, "step": 21760 }, { "epoch": 0.4715836457001712, "grad_norm": 1.2019929885864258, "learning_rate": 1.089154079336735e-05, "loss": 0.289, "step": 21765 }, { "epoch": 0.47169198101965204, "grad_norm": 0.988040030002594, "learning_rate": 1.0888150840437618e-05, "loss": 0.2146, "step": 21770 }, { "epoch": 0.4718003163391329, "grad_norm": 1.7156665325164795, "learning_rate": 1.0884760784628918e-05, "loss": 0.3489, "step": 21775 }, { "epoch": 0.47190865165861373, "grad_norm": 1.8329836130142212, "learning_rate": 1.0881370626333936e-05, "loss": 0.33, "step": 21780 }, { "epoch": 0.4720169869780946, "grad_norm": 1.1398534774780273, "learning_rate": 1.0877980365945369e-05, "loss": 0.2356, "step": 21785 }, { "epoch": 0.4721253222975755, "grad_norm": 1.3444374799728394, "learning_rate": 1.0874590003855925e-05, "loss": 0.2704, "step": 21790 }, { "epoch": 0.4722336576170563, "grad_norm": 1.8943614959716797, "learning_rate": 1.0871199540458334e-05, "loss": 0.2755, "step": 21795 }, { "epoch": 0.47234199293653717, "grad_norm": 1.6522427797317505, "learning_rate": 1.0867808976145327e-05, "loss": 0.3551, "step": 21800 }, { "epoch": 0.472450328256018, "grad_norm": 1.1716523170471191, "learning_rate": 1.0864418311309648e-05, "loss": 0.3253, "step": 21805 }, { "epoch": 0.47255866357549886, "grad_norm": 1.3315420150756836, "learning_rate": 1.0861027546344058e-05, "loss": 0.2555, "step": 21810 }, { "epoch": 0.47266699889497976, "grad_norm": 1.711544156074524, "learning_rate": 1.0857636681641322e-05, "loss": 0.3865, "step": 21815 }, { "epoch": 0.4727753342144606, "grad_norm": 1.7277008295059204, "learning_rate": 1.0854245717594229e-05, "loss": 0.2752, "step": 21820 }, { "epoch": 0.47288366953394145, "grad_norm": 1.4267877340316772, "learning_rate": 1.0850854654595565e-05, "loss": 0.2844, "step": 21825 }, { "epoch": 0.4729920048534223, "grad_norm": 1.7973153591156006, "learning_rate": 1.0847463493038132e-05, "loss": 0.4019, "step": 21830 }, { "epoch": 0.47310034017290314, "grad_norm": 2.3846609592437744, "learning_rate": 1.0844072233314751e-05, "loss": 0.2652, "step": 21835 }, { "epoch": 0.47320867549238405, "grad_norm": 1.4304426908493042, "learning_rate": 1.0840680875818242e-05, "loss": 0.3417, "step": 21840 }, { "epoch": 0.4733170108118649, "grad_norm": 2.100905656814575, "learning_rate": 1.083728942094145e-05, "loss": 0.2968, "step": 21845 }, { "epoch": 0.47342534613134574, "grad_norm": 1.753881812095642, "learning_rate": 1.0833897869077222e-05, "loss": 0.2343, "step": 21850 }, { "epoch": 0.4735336814508266, "grad_norm": 1.4849663972854614, "learning_rate": 1.0830506220618415e-05, "loss": 0.3943, "step": 21855 }, { "epoch": 0.47364201677030743, "grad_norm": 1.600310206413269, "learning_rate": 1.0827114475957905e-05, "loss": 0.2127, "step": 21860 }, { "epoch": 0.47375035208978833, "grad_norm": 1.0456993579864502, "learning_rate": 1.0823722635488574e-05, "loss": 0.2274, "step": 21865 }, { "epoch": 0.4738586874092692, "grad_norm": 0.8171137571334839, "learning_rate": 1.0820330699603315e-05, "loss": 0.2655, "step": 21870 }, { "epoch": 0.47396702272875, "grad_norm": 1.220123529434204, "learning_rate": 1.0816938668695031e-05, "loss": 0.2965, "step": 21875 }, { "epoch": 0.47407535804823087, "grad_norm": 1.3370420932769775, "learning_rate": 1.0813546543156642e-05, "loss": 0.2983, "step": 21880 }, { "epoch": 0.47418369336771177, "grad_norm": 2.0979390144348145, "learning_rate": 1.0810154323381068e-05, "loss": 0.2349, "step": 21885 }, { "epoch": 0.4742920286871926, "grad_norm": 1.801039457321167, "learning_rate": 1.0806762009761256e-05, "loss": 0.2417, "step": 21890 }, { "epoch": 0.47440036400667346, "grad_norm": 1.6676218509674072, "learning_rate": 1.080336960269015e-05, "loss": 0.2694, "step": 21895 }, { "epoch": 0.4745086993261543, "grad_norm": 1.6760172843933105, "learning_rate": 1.079997710256071e-05, "loss": 0.2597, "step": 21900 }, { "epoch": 0.47461703464563515, "grad_norm": 1.7176927328109741, "learning_rate": 1.079658450976591e-05, "loss": 0.369, "step": 21905 }, { "epoch": 0.47472536996511605, "grad_norm": 2.1368186473846436, "learning_rate": 1.079319182469872e-05, "loss": 0.2731, "step": 21910 }, { "epoch": 0.4748337052845969, "grad_norm": 1.759381651878357, "learning_rate": 1.0789799047752145e-05, "loss": 0.2669, "step": 21915 }, { "epoch": 0.47494204060407774, "grad_norm": 1.8442929983139038, "learning_rate": 1.078640617931918e-05, "loss": 0.1788, "step": 21920 }, { "epoch": 0.4750503759235586, "grad_norm": 1.3251475095748901, "learning_rate": 1.078301321979284e-05, "loss": 0.2344, "step": 21925 }, { "epoch": 0.47515871124303943, "grad_norm": 1.9754376411437988, "learning_rate": 1.0779620169566146e-05, "loss": 0.3845, "step": 21930 }, { "epoch": 0.47526704656252033, "grad_norm": 1.1143832206726074, "learning_rate": 1.0776227029032133e-05, "loss": 0.2926, "step": 21935 }, { "epoch": 0.4753753818820012, "grad_norm": 1.4687711000442505, "learning_rate": 1.077283379858385e-05, "loss": 0.2889, "step": 21940 }, { "epoch": 0.475483717201482, "grad_norm": 1.7442164421081543, "learning_rate": 1.0769440478614347e-05, "loss": 0.2479, "step": 21945 }, { "epoch": 0.47559205252096287, "grad_norm": 1.449807047843933, "learning_rate": 1.0766047069516692e-05, "loss": 0.2662, "step": 21950 }, { "epoch": 0.4757003878404437, "grad_norm": 1.6960121393203735, "learning_rate": 1.0762653571683958e-05, "loss": 0.3094, "step": 21955 }, { "epoch": 0.4758087231599246, "grad_norm": 1.9601143598556519, "learning_rate": 1.0759259985509232e-05, "loss": 0.2146, "step": 21960 }, { "epoch": 0.47591705847940546, "grad_norm": 1.583776831626892, "learning_rate": 1.075586631138561e-05, "loss": 0.2732, "step": 21965 }, { "epoch": 0.4760253937988863, "grad_norm": 2.6665596961975098, "learning_rate": 1.0752472549706201e-05, "loss": 0.2767, "step": 21970 }, { "epoch": 0.47613372911836715, "grad_norm": 1.60187566280365, "learning_rate": 1.0749078700864117e-05, "loss": 0.3285, "step": 21975 }, { "epoch": 0.476242064437848, "grad_norm": 1.0031249523162842, "learning_rate": 1.074568476525249e-05, "loss": 0.1495, "step": 21980 }, { "epoch": 0.4763503997573289, "grad_norm": 1.0663856267929077, "learning_rate": 1.074229074326445e-05, "loss": 0.2395, "step": 21985 }, { "epoch": 0.47645873507680975, "grad_norm": 1.1659294366836548, "learning_rate": 1.0738896635293146e-05, "loss": 0.3017, "step": 21990 }, { "epoch": 0.4765670703962906, "grad_norm": 0.7687063217163086, "learning_rate": 1.0735502441731737e-05, "loss": 0.2732, "step": 21995 }, { "epoch": 0.47667540571577144, "grad_norm": 1.217636227607727, "learning_rate": 1.073210816297339e-05, "loss": 0.3034, "step": 22000 }, { "epoch": 0.47678374103525234, "grad_norm": 1.4310625791549683, "learning_rate": 1.0728713799411274e-05, "loss": 0.3016, "step": 22005 }, { "epoch": 0.4768920763547332, "grad_norm": 1.2522014379501343, "learning_rate": 1.0725319351438585e-05, "loss": 0.2393, "step": 22010 }, { "epoch": 0.47700041167421403, "grad_norm": 0.8556925654411316, "learning_rate": 1.0721924819448513e-05, "loss": 0.2516, "step": 22015 }, { "epoch": 0.4771087469936949, "grad_norm": 2.03879451751709, "learning_rate": 1.0718530203834265e-05, "loss": 0.2622, "step": 22020 }, { "epoch": 0.4772170823131757, "grad_norm": 1.7473313808441162, "learning_rate": 1.071513550498906e-05, "loss": 0.2537, "step": 22025 }, { "epoch": 0.4773254176326566, "grad_norm": 1.1406302452087402, "learning_rate": 1.0711740723306115e-05, "loss": 0.2897, "step": 22030 }, { "epoch": 0.47743375295213747, "grad_norm": 1.8214257955551147, "learning_rate": 1.070834585917867e-05, "loss": 0.2819, "step": 22035 }, { "epoch": 0.4775420882716183, "grad_norm": 1.7505066394805908, "learning_rate": 1.070495091299997e-05, "loss": 0.3088, "step": 22040 }, { "epoch": 0.47765042359109916, "grad_norm": 1.487626552581787, "learning_rate": 1.0701555885163263e-05, "loss": 0.2497, "step": 22045 }, { "epoch": 0.47775875891058, "grad_norm": 1.6285216808319092, "learning_rate": 1.069816077606182e-05, "loss": 0.2961, "step": 22050 }, { "epoch": 0.4778670942300609, "grad_norm": 1.9001719951629639, "learning_rate": 1.0694765586088907e-05, "loss": 0.3651, "step": 22055 }, { "epoch": 0.47797542954954175, "grad_norm": 1.5875189304351807, "learning_rate": 1.069137031563781e-05, "loss": 0.3422, "step": 22060 }, { "epoch": 0.4780837648690226, "grad_norm": 1.2433148622512817, "learning_rate": 1.0687974965101819e-05, "loss": 0.2939, "step": 22065 }, { "epoch": 0.47819210018850344, "grad_norm": 2.4523661136627197, "learning_rate": 1.0684579534874234e-05, "loss": 0.2933, "step": 22070 }, { "epoch": 0.4783004355079843, "grad_norm": 1.5443981885910034, "learning_rate": 1.0681184025348364e-05, "loss": 0.3505, "step": 22075 }, { "epoch": 0.4784087708274652, "grad_norm": 1.4859603643417358, "learning_rate": 1.0677788436917526e-05, "loss": 0.2362, "step": 22080 }, { "epoch": 0.47851710614694604, "grad_norm": 1.367669701576233, "learning_rate": 1.0674392769975055e-05, "loss": 0.226, "step": 22085 }, { "epoch": 0.4786254414664269, "grad_norm": 1.9307224750518799, "learning_rate": 1.0670997024914282e-05, "loss": 0.2576, "step": 22090 }, { "epoch": 0.4787337767859077, "grad_norm": 1.2157340049743652, "learning_rate": 1.0667601202128557e-05, "loss": 0.3036, "step": 22095 }, { "epoch": 0.4788421121053886, "grad_norm": 1.7741608619689941, "learning_rate": 1.0664205302011233e-05, "loss": 0.2283, "step": 22100 }, { "epoch": 0.4789504474248695, "grad_norm": 2.3287620544433594, "learning_rate": 1.0660809324955675e-05, "loss": 0.1972, "step": 22105 }, { "epoch": 0.4790587827443503, "grad_norm": 1.3796299695968628, "learning_rate": 1.0657413271355254e-05, "loss": 0.2402, "step": 22110 }, { "epoch": 0.47916711806383117, "grad_norm": 1.8856226205825806, "learning_rate": 1.0654017141603354e-05, "loss": 0.3098, "step": 22115 }, { "epoch": 0.479275453383312, "grad_norm": 1.1168122291564941, "learning_rate": 1.0650620936093365e-05, "loss": 0.2534, "step": 22120 }, { "epoch": 0.47938378870279286, "grad_norm": 1.4917978048324585, "learning_rate": 1.0647224655218688e-05, "loss": 0.331, "step": 22125 }, { "epoch": 0.47949212402227376, "grad_norm": 1.2717487812042236, "learning_rate": 1.0643828299372729e-05, "loss": 0.164, "step": 22130 }, { "epoch": 0.4796004593417546, "grad_norm": 1.0993329286575317, "learning_rate": 1.0640431868948905e-05, "loss": 0.2597, "step": 22135 }, { "epoch": 0.47970879466123545, "grad_norm": 1.5815898180007935, "learning_rate": 1.0637035364340647e-05, "loss": 0.2577, "step": 22140 }, { "epoch": 0.4798171299807163, "grad_norm": 1.614150047302246, "learning_rate": 1.0633638785941382e-05, "loss": 0.3056, "step": 22145 }, { "epoch": 0.4799254653001972, "grad_norm": 1.2395226955413818, "learning_rate": 1.0630242134144556e-05, "loss": 0.3044, "step": 22150 }, { "epoch": 0.48003380061967804, "grad_norm": 0.8799046874046326, "learning_rate": 1.0626845409343618e-05, "loss": 0.3248, "step": 22155 }, { "epoch": 0.4801421359391589, "grad_norm": 1.522985816001892, "learning_rate": 1.0623448611932033e-05, "loss": 0.2169, "step": 22160 }, { "epoch": 0.48025047125863973, "grad_norm": 1.8127140998840332, "learning_rate": 1.062005174230326e-05, "loss": 0.3696, "step": 22165 }, { "epoch": 0.4803588065781206, "grad_norm": 1.2097610235214233, "learning_rate": 1.0616654800850787e-05, "loss": 0.2727, "step": 22170 }, { "epoch": 0.4804671418976015, "grad_norm": 1.547751784324646, "learning_rate": 1.0613257787968087e-05, "loss": 0.258, "step": 22175 }, { "epoch": 0.4805754772170823, "grad_norm": 1.9092810153961182, "learning_rate": 1.0609860704048662e-05, "loss": 0.2735, "step": 22180 }, { "epoch": 0.48068381253656317, "grad_norm": 1.1610102653503418, "learning_rate": 1.0606463549486015e-05, "loss": 0.2828, "step": 22185 }, { "epoch": 0.480792147856044, "grad_norm": 1.3702783584594727, "learning_rate": 1.0603066324673645e-05, "loss": 0.2823, "step": 22190 }, { "epoch": 0.48090048317552486, "grad_norm": 1.631952166557312, "learning_rate": 1.0599669030005078e-05, "loss": 0.2618, "step": 22195 }, { "epoch": 0.48100881849500576, "grad_norm": 0.8145065307617188, "learning_rate": 1.0596271665873831e-05, "loss": 0.2737, "step": 22200 }, { "epoch": 0.4811171538144866, "grad_norm": 1.5230265855789185, "learning_rate": 1.059287423267345e-05, "loss": 0.23, "step": 22205 }, { "epoch": 0.48122548913396745, "grad_norm": 1.344258189201355, "learning_rate": 1.0589476730797467e-05, "loss": 0.3257, "step": 22210 }, { "epoch": 0.4813338244534483, "grad_norm": 1.6687579154968262, "learning_rate": 1.0586079160639435e-05, "loss": 0.2367, "step": 22215 }, { "epoch": 0.48144215977292915, "grad_norm": 1.5286896228790283, "learning_rate": 1.0582681522592915e-05, "loss": 0.3166, "step": 22220 }, { "epoch": 0.48155049509241005, "grad_norm": 1.7839564085006714, "learning_rate": 1.0579283817051466e-05, "loss": 0.31, "step": 22225 }, { "epoch": 0.4816588304118909, "grad_norm": 1.8542507886886597, "learning_rate": 1.0575886044408665e-05, "loss": 0.2799, "step": 22230 }, { "epoch": 0.48176716573137174, "grad_norm": 1.3967456817626953, "learning_rate": 1.0572488205058094e-05, "loss": 0.2839, "step": 22235 }, { "epoch": 0.4818755010508526, "grad_norm": 1.3896856307983398, "learning_rate": 1.056909029939334e-05, "loss": 0.2174, "step": 22240 }, { "epoch": 0.48198383637033343, "grad_norm": 1.3629066944122314, "learning_rate": 1.0565692327808e-05, "loss": 0.2692, "step": 22245 }, { "epoch": 0.48209217168981433, "grad_norm": 1.669278860092163, "learning_rate": 1.0562294290695674e-05, "loss": 0.2536, "step": 22250 }, { "epoch": 0.4822005070092952, "grad_norm": 1.8441638946533203, "learning_rate": 1.0558896188449981e-05, "loss": 0.3168, "step": 22255 }, { "epoch": 0.482308842328776, "grad_norm": 3.611361265182495, "learning_rate": 1.0555498021464537e-05, "loss": 0.2836, "step": 22260 }, { "epoch": 0.48241717764825687, "grad_norm": 0.980129599571228, "learning_rate": 1.0552099790132968e-05, "loss": 0.1748, "step": 22265 }, { "epoch": 0.48252551296773777, "grad_norm": 1.8092091083526611, "learning_rate": 1.054870149484891e-05, "loss": 0.2428, "step": 22270 }, { "epoch": 0.4826338482872186, "grad_norm": 1.6813865900039673, "learning_rate": 1.0545303136006002e-05, "loss": 0.3271, "step": 22275 }, { "epoch": 0.48274218360669946, "grad_norm": 1.319388508796692, "learning_rate": 1.0541904713997894e-05, "loss": 0.2506, "step": 22280 }, { "epoch": 0.4828505189261803, "grad_norm": 1.6362146139144897, "learning_rate": 1.0538506229218244e-05, "loss": 0.293, "step": 22285 }, { "epoch": 0.48295885424566115, "grad_norm": 1.2139835357666016, "learning_rate": 1.0535107682060712e-05, "loss": 0.2473, "step": 22290 }, { "epoch": 0.48306718956514205, "grad_norm": 1.25132155418396, "learning_rate": 1.0531709072918972e-05, "loss": 0.2373, "step": 22295 }, { "epoch": 0.4831755248846229, "grad_norm": 1.9211019277572632, "learning_rate": 1.0528310402186701e-05, "loss": 0.2767, "step": 22300 }, { "epoch": 0.48328386020410374, "grad_norm": 1.79086172580719, "learning_rate": 1.0524911670257588e-05, "loss": 0.2675, "step": 22305 }, { "epoch": 0.4833921955235846, "grad_norm": 1.2223901748657227, "learning_rate": 1.0521512877525315e-05, "loss": 0.2755, "step": 22310 }, { "epoch": 0.48350053084306543, "grad_norm": 1.8715460300445557, "learning_rate": 1.051811402438359e-05, "loss": 0.3104, "step": 22315 }, { "epoch": 0.48360886616254634, "grad_norm": 1.2682522535324097, "learning_rate": 1.0514715111226114e-05, "loss": 0.3813, "step": 22320 }, { "epoch": 0.4837172014820272, "grad_norm": 1.5758527517318726, "learning_rate": 1.0511316138446608e-05, "loss": 0.3617, "step": 22325 }, { "epoch": 0.483825536801508, "grad_norm": 1.5414034128189087, "learning_rate": 1.0507917106438783e-05, "loss": 0.2361, "step": 22330 }, { "epoch": 0.4839338721209889, "grad_norm": 1.81585693359375, "learning_rate": 1.050451801559637e-05, "loss": 0.2678, "step": 22335 }, { "epoch": 0.4840422074404697, "grad_norm": 2.785395622253418, "learning_rate": 1.0501118866313105e-05, "loss": 0.3378, "step": 22340 }, { "epoch": 0.4841505427599506, "grad_norm": 1.7683931589126587, "learning_rate": 1.0497719658982724e-05, "loss": 0.2945, "step": 22345 }, { "epoch": 0.48425887807943147, "grad_norm": 1.3989100456237793, "learning_rate": 1.0494320393998978e-05, "loss": 0.3132, "step": 22350 }, { "epoch": 0.4843672133989123, "grad_norm": 1.6944400072097778, "learning_rate": 1.0490921071755617e-05, "loss": 0.3069, "step": 22355 }, { "epoch": 0.48447554871839316, "grad_norm": 1.6356606483459473, "learning_rate": 1.0487521692646405e-05, "loss": 0.2924, "step": 22360 }, { "epoch": 0.484583884037874, "grad_norm": 1.856943130493164, "learning_rate": 1.0484122257065108e-05, "loss": 0.2814, "step": 22365 }, { "epoch": 0.4846922193573549, "grad_norm": 1.270288348197937, "learning_rate": 1.0480722765405498e-05, "loss": 0.1495, "step": 22370 }, { "epoch": 0.48480055467683575, "grad_norm": 2.168245792388916, "learning_rate": 1.047732321806136e-05, "loss": 0.2163, "step": 22375 }, { "epoch": 0.4849088899963166, "grad_norm": 1.273223638534546, "learning_rate": 1.0473923615426476e-05, "loss": 0.3016, "step": 22380 }, { "epoch": 0.48501722531579744, "grad_norm": 1.9166795015335083, "learning_rate": 1.047052395789464e-05, "loss": 0.2036, "step": 22385 }, { "epoch": 0.48512556063527834, "grad_norm": 1.2577849626541138, "learning_rate": 1.0467124245859652e-05, "loss": 0.3561, "step": 22390 }, { "epoch": 0.4852338959547592, "grad_norm": 1.485622525215149, "learning_rate": 1.0463724479715317e-05, "loss": 0.2921, "step": 22395 }, { "epoch": 0.48534223127424003, "grad_norm": 1.551195502281189, "learning_rate": 1.0460324659855452e-05, "loss": 0.3844, "step": 22400 }, { "epoch": 0.4854505665937209, "grad_norm": 2.0553102493286133, "learning_rate": 1.0456924786673868e-05, "loss": 0.3142, "step": 22405 }, { "epoch": 0.4855589019132017, "grad_norm": 1.0867644548416138, "learning_rate": 1.045352486056439e-05, "loss": 0.2675, "step": 22410 }, { "epoch": 0.4856672372326826, "grad_norm": 1.5180550813674927, "learning_rate": 1.0450124881920851e-05, "loss": 0.2637, "step": 22415 }, { "epoch": 0.48577557255216347, "grad_norm": 1.5227117538452148, "learning_rate": 1.044672485113709e-05, "loss": 0.2891, "step": 22420 }, { "epoch": 0.4858839078716443, "grad_norm": 1.1506528854370117, "learning_rate": 1.0443324768606945e-05, "loss": 0.2574, "step": 22425 }, { "epoch": 0.48599224319112516, "grad_norm": 1.4983099699020386, "learning_rate": 1.043992463472427e-05, "loss": 0.2465, "step": 22430 }, { "epoch": 0.486100578510606, "grad_norm": 2.010021686553955, "learning_rate": 1.0436524449882912e-05, "loss": 0.2391, "step": 22435 }, { "epoch": 0.4862089138300869, "grad_norm": 2.2674038410186768, "learning_rate": 1.0433124214476736e-05, "loss": 0.2698, "step": 22440 }, { "epoch": 0.48631724914956775, "grad_norm": 1.2917373180389404, "learning_rate": 1.0429723928899609e-05, "loss": 0.2365, "step": 22445 }, { "epoch": 0.4864255844690486, "grad_norm": 1.7807196378707886, "learning_rate": 1.0426323593545402e-05, "loss": 0.2539, "step": 22450 }, { "epoch": 0.48653391978852945, "grad_norm": 1.2325611114501953, "learning_rate": 1.0422923208807993e-05, "loss": 0.283, "step": 22455 }, { "epoch": 0.4866422551080103, "grad_norm": 1.4916878938674927, "learning_rate": 1.0419522775081265e-05, "loss": 0.3495, "step": 22460 }, { "epoch": 0.4867505904274912, "grad_norm": 1.3431780338287354, "learning_rate": 1.041612229275911e-05, "loss": 0.2873, "step": 22465 }, { "epoch": 0.48685892574697204, "grad_norm": 1.2984063625335693, "learning_rate": 1.041272176223542e-05, "loss": 0.2295, "step": 22470 }, { "epoch": 0.4869672610664529, "grad_norm": 1.4818248748779297, "learning_rate": 1.0409321183904095e-05, "loss": 0.2469, "step": 22475 }, { "epoch": 0.48707559638593373, "grad_norm": 1.0594156980514526, "learning_rate": 1.0405920558159043e-05, "loss": 0.2259, "step": 22480 }, { "epoch": 0.4871839317054146, "grad_norm": 1.468881607055664, "learning_rate": 1.0402519885394178e-05, "loss": 0.3731, "step": 22485 }, { "epoch": 0.4872922670248955, "grad_norm": 1.4430755376815796, "learning_rate": 1.0399119166003408e-05, "loss": 0.2081, "step": 22490 }, { "epoch": 0.4874006023443763, "grad_norm": 1.3768877983093262, "learning_rate": 1.0395718400380665e-05, "loss": 0.2642, "step": 22495 }, { "epoch": 0.48750893766385717, "grad_norm": 1.6394551992416382, "learning_rate": 1.0392317588919874e-05, "loss": 0.2732, "step": 22500 }, { "epoch": 0.487617272983338, "grad_norm": 1.388418197631836, "learning_rate": 1.0388916732014967e-05, "loss": 0.2601, "step": 22505 }, { "epoch": 0.48772560830281886, "grad_norm": 1.5566850900650024, "learning_rate": 1.0385515830059884e-05, "loss": 0.2766, "step": 22510 }, { "epoch": 0.48783394362229976, "grad_norm": 1.434321403503418, "learning_rate": 1.0382114883448562e-05, "loss": 0.2928, "step": 22515 }, { "epoch": 0.4879422789417806, "grad_norm": 1.5680888891220093, "learning_rate": 1.037871389257496e-05, "loss": 0.2383, "step": 22520 }, { "epoch": 0.48805061426126145, "grad_norm": 1.1889084577560425, "learning_rate": 1.0375312857833024e-05, "loss": 0.2428, "step": 22525 }, { "epoch": 0.4881589495807423, "grad_norm": 1.8399012088775635, "learning_rate": 1.0371911779616717e-05, "loss": 0.2082, "step": 22530 }, { "epoch": 0.4882672849002232, "grad_norm": 1.8553431034088135, "learning_rate": 1.0368510658319997e-05, "loss": 0.2829, "step": 22535 }, { "epoch": 0.48837562021970404, "grad_norm": 2.267936944961548, "learning_rate": 1.0365109494336842e-05, "loss": 0.1957, "step": 22540 }, { "epoch": 0.4884839555391849, "grad_norm": 0.952158510684967, "learning_rate": 1.0361708288061223e-05, "loss": 0.222, "step": 22545 }, { "epoch": 0.48859229085866573, "grad_norm": 1.143113374710083, "learning_rate": 1.0358307039887115e-05, "loss": 0.3657, "step": 22550 }, { "epoch": 0.4887006261781466, "grad_norm": 1.9179497957229614, "learning_rate": 1.0354905750208504e-05, "loss": 0.2366, "step": 22555 }, { "epoch": 0.4888089614976275, "grad_norm": 1.5977915525436401, "learning_rate": 1.0351504419419375e-05, "loss": 0.1766, "step": 22560 }, { "epoch": 0.4889172968171083, "grad_norm": 1.2867902517318726, "learning_rate": 1.034810304791373e-05, "loss": 0.1955, "step": 22565 }, { "epoch": 0.4890256321365892, "grad_norm": 4.601733684539795, "learning_rate": 1.0344701636085559e-05, "loss": 0.208, "step": 22570 }, { "epoch": 0.48913396745607, "grad_norm": 1.3249108791351318, "learning_rate": 1.0341300184328866e-05, "loss": 0.264, "step": 22575 }, { "epoch": 0.48924230277555086, "grad_norm": 1.59172785282135, "learning_rate": 1.033789869303766e-05, "loss": 0.2612, "step": 22580 }, { "epoch": 0.48935063809503176, "grad_norm": 1.662904977798462, "learning_rate": 1.0334497162605954e-05, "loss": 0.2211, "step": 22585 }, { "epoch": 0.4894589734145126, "grad_norm": 1.1290204524993896, "learning_rate": 1.033109559342776e-05, "loss": 0.2376, "step": 22590 }, { "epoch": 0.48956730873399346, "grad_norm": 3.0306026935577393, "learning_rate": 1.0327693985897103e-05, "loss": 0.3539, "step": 22595 }, { "epoch": 0.4896756440534743, "grad_norm": 2.356511116027832, "learning_rate": 1.0324292340408007e-05, "loss": 0.2691, "step": 22600 }, { "epoch": 0.48978397937295515, "grad_norm": 1.621685266494751, "learning_rate": 1.03208906573545e-05, "loss": 0.2403, "step": 22605 }, { "epoch": 0.48989231469243605, "grad_norm": 1.072860598564148, "learning_rate": 1.0317488937130615e-05, "loss": 0.2586, "step": 22610 }, { "epoch": 0.4900006500119169, "grad_norm": 2.6336874961853027, "learning_rate": 1.0314087180130397e-05, "loss": 0.264, "step": 22615 }, { "epoch": 0.49010898533139774, "grad_norm": 1.555716872215271, "learning_rate": 1.0310685386747881e-05, "loss": 0.2932, "step": 22620 }, { "epoch": 0.4902173206508786, "grad_norm": 2.5669357776641846, "learning_rate": 1.030728355737712e-05, "loss": 0.261, "step": 22625 }, { "epoch": 0.49032565597035943, "grad_norm": 1.9610823392868042, "learning_rate": 1.0303881692412161e-05, "loss": 0.2895, "step": 22630 }, { "epoch": 0.49043399128984033, "grad_norm": 1.5812628269195557, "learning_rate": 1.0300479792247058e-05, "loss": 0.3135, "step": 22635 }, { "epoch": 0.4905423266093212, "grad_norm": 1.6864622831344604, "learning_rate": 1.0297077857275875e-05, "loss": 0.2536, "step": 22640 }, { "epoch": 0.490650661928802, "grad_norm": 1.7206014394760132, "learning_rate": 1.029367588789267e-05, "loss": 0.2198, "step": 22645 }, { "epoch": 0.49075899724828287, "grad_norm": 1.0321730375289917, "learning_rate": 1.0290273884491516e-05, "loss": 0.204, "step": 22650 }, { "epoch": 0.49086733256776377, "grad_norm": 1.8990256786346436, "learning_rate": 1.0286871847466476e-05, "loss": 0.1947, "step": 22655 }, { "epoch": 0.4909756678872446, "grad_norm": 2.0980403423309326, "learning_rate": 1.0283469777211633e-05, "loss": 0.3724, "step": 22660 }, { "epoch": 0.49108400320672546, "grad_norm": 1.3987077474594116, "learning_rate": 1.0280067674121064e-05, "loss": 0.3305, "step": 22665 }, { "epoch": 0.4911923385262063, "grad_norm": 1.2341173887252808, "learning_rate": 1.0276665538588847e-05, "loss": 0.2212, "step": 22670 }, { "epoch": 0.49130067384568715, "grad_norm": 1.6127393245697021, "learning_rate": 1.0273263371009075e-05, "loss": 0.2698, "step": 22675 }, { "epoch": 0.49140900916516805, "grad_norm": 2.2906429767608643, "learning_rate": 1.026986117177583e-05, "loss": 0.2392, "step": 22680 }, { "epoch": 0.4915173444846489, "grad_norm": 1.45255446434021, "learning_rate": 1.0266458941283216e-05, "loss": 0.3631, "step": 22685 }, { "epoch": 0.49162567980412974, "grad_norm": 1.2271900177001953, "learning_rate": 1.0263056679925323e-05, "loss": 0.2298, "step": 22690 }, { "epoch": 0.4917340151236106, "grad_norm": 1.6193050146102905, "learning_rate": 1.0259654388096255e-05, "loss": 0.2891, "step": 22695 }, { "epoch": 0.49184235044309144, "grad_norm": 1.8797533512115479, "learning_rate": 1.0256252066190113e-05, "loss": 0.3211, "step": 22700 }, { "epoch": 0.49195068576257234, "grad_norm": 1.5399805307388306, "learning_rate": 1.0252849714601011e-05, "loss": 0.3289, "step": 22705 }, { "epoch": 0.4920590210820532, "grad_norm": 1.2855582237243652, "learning_rate": 1.0249447333723057e-05, "loss": 0.2745, "step": 22710 }, { "epoch": 0.49216735640153403, "grad_norm": 1.0078588724136353, "learning_rate": 1.0246044923950364e-05, "loss": 0.2482, "step": 22715 }, { "epoch": 0.4922756917210149, "grad_norm": 1.7325303554534912, "learning_rate": 1.0242642485677054e-05, "loss": 0.3029, "step": 22720 }, { "epoch": 0.4923840270404957, "grad_norm": 1.466274380683899, "learning_rate": 1.0239240019297248e-05, "loss": 0.3097, "step": 22725 }, { "epoch": 0.4924923623599766, "grad_norm": 1.1490886211395264, "learning_rate": 1.0235837525205065e-05, "loss": 0.22, "step": 22730 }, { "epoch": 0.49260069767945747, "grad_norm": 2.1641833782196045, "learning_rate": 1.0232435003794641e-05, "loss": 0.2771, "step": 22735 }, { "epoch": 0.4927090329989383, "grad_norm": 1.858945608139038, "learning_rate": 1.0229032455460104e-05, "loss": 0.275, "step": 22740 }, { "epoch": 0.49281736831841916, "grad_norm": 1.4677013158798218, "learning_rate": 1.0225629880595586e-05, "loss": 0.2867, "step": 22745 }, { "epoch": 0.4929257036379, "grad_norm": 1.8303033113479614, "learning_rate": 1.0222227279595231e-05, "loss": 0.2291, "step": 22750 }, { "epoch": 0.4930340389573809, "grad_norm": 1.427482008934021, "learning_rate": 1.021882465285317e-05, "loss": 0.3069, "step": 22755 }, { "epoch": 0.49314237427686175, "grad_norm": 1.6945981979370117, "learning_rate": 1.0215422000763553e-05, "loss": 0.309, "step": 22760 }, { "epoch": 0.4932507095963426, "grad_norm": 1.3586276769638062, "learning_rate": 1.0212019323720524e-05, "loss": 0.1767, "step": 22765 }, { "epoch": 0.49335904491582344, "grad_norm": 1.1622471809387207, "learning_rate": 1.020861662211823e-05, "loss": 0.2679, "step": 22770 }, { "epoch": 0.4934673802353043, "grad_norm": 1.5230228900909424, "learning_rate": 1.0205213896350828e-05, "loss": 0.3005, "step": 22775 }, { "epoch": 0.4935757155547852, "grad_norm": 1.0164835453033447, "learning_rate": 1.0201811146812466e-05, "loss": 0.2757, "step": 22780 }, { "epoch": 0.49368405087426603, "grad_norm": 1.0822159051895142, "learning_rate": 1.019840837389731e-05, "loss": 0.2801, "step": 22785 }, { "epoch": 0.4937923861937469, "grad_norm": 1.8084850311279297, "learning_rate": 1.0195005577999517e-05, "loss": 0.3445, "step": 22790 }, { "epoch": 0.4939007215132277, "grad_norm": 1.751882553100586, "learning_rate": 1.0191602759513249e-05, "loss": 0.2779, "step": 22795 }, { "epoch": 0.4940090568327086, "grad_norm": 1.3319603204727173, "learning_rate": 1.0188199918832666e-05, "loss": 0.238, "step": 22800 }, { "epoch": 0.49411739215218947, "grad_norm": 1.4356498718261719, "learning_rate": 1.0184797056351945e-05, "loss": 0.2941, "step": 22805 }, { "epoch": 0.4942257274716703, "grad_norm": 1.6992801427841187, "learning_rate": 1.0181394172465255e-05, "loss": 0.2037, "step": 22810 }, { "epoch": 0.49433406279115116, "grad_norm": 1.5495795011520386, "learning_rate": 1.0177991267566766e-05, "loss": 0.3311, "step": 22815 }, { "epoch": 0.494442398110632, "grad_norm": 1.6517817974090576, "learning_rate": 1.0174588342050655e-05, "loss": 0.2606, "step": 22820 }, { "epoch": 0.4945507334301129, "grad_norm": 1.5063388347625732, "learning_rate": 1.0171185396311101e-05, "loss": 0.315, "step": 22825 }, { "epoch": 0.49465906874959376, "grad_norm": 1.3098379373550415, "learning_rate": 1.0167782430742282e-05, "loss": 0.3964, "step": 22830 }, { "epoch": 0.4947674040690746, "grad_norm": 1.3731242418289185, "learning_rate": 1.0164379445738381e-05, "loss": 0.1975, "step": 22835 }, { "epoch": 0.49487573938855545, "grad_norm": 1.9635676145553589, "learning_rate": 1.0160976441693585e-05, "loss": 0.3209, "step": 22840 }, { "epoch": 0.4949840747080363, "grad_norm": 1.646999478340149, "learning_rate": 1.0157573419002078e-05, "loss": 0.3202, "step": 22845 }, { "epoch": 0.4950924100275172, "grad_norm": 1.2837707996368408, "learning_rate": 1.015417037805805e-05, "loss": 0.2531, "step": 22850 }, { "epoch": 0.49520074534699804, "grad_norm": 1.884046196937561, "learning_rate": 1.0150767319255697e-05, "loss": 0.3678, "step": 22855 }, { "epoch": 0.4953090806664789, "grad_norm": 1.7666652202606201, "learning_rate": 1.0147364242989206e-05, "loss": 0.2688, "step": 22860 }, { "epoch": 0.49541741598595973, "grad_norm": 1.784169316291809, "learning_rate": 1.0143961149652776e-05, "loss": 0.3563, "step": 22865 }, { "epoch": 0.4955257513054406, "grad_norm": 0.9456453323364258, "learning_rate": 1.0140558039640602e-05, "loss": 0.1519, "step": 22870 }, { "epoch": 0.4956340866249215, "grad_norm": 2.1071603298187256, "learning_rate": 1.0137154913346887e-05, "loss": 0.303, "step": 22875 }, { "epoch": 0.4957424219444023, "grad_norm": 1.7927565574645996, "learning_rate": 1.013375177116583e-05, "loss": 0.2298, "step": 22880 }, { "epoch": 0.49585075726388317, "grad_norm": 1.1078513860702515, "learning_rate": 1.0130348613491632e-05, "loss": 0.3772, "step": 22885 }, { "epoch": 0.495959092583364, "grad_norm": 1.3961437940597534, "learning_rate": 1.0126945440718499e-05, "loss": 0.2876, "step": 22890 }, { "epoch": 0.49606742790284486, "grad_norm": 1.582431435585022, "learning_rate": 1.012354225324064e-05, "loss": 0.286, "step": 22895 }, { "epoch": 0.49617576322232576, "grad_norm": 1.3480761051177979, "learning_rate": 1.012013905145226e-05, "loss": 0.3026, "step": 22900 }, { "epoch": 0.4962840985418066, "grad_norm": 1.206386685371399, "learning_rate": 1.0116735835747572e-05, "loss": 0.2646, "step": 22905 }, { "epoch": 0.49639243386128745, "grad_norm": 1.0576448440551758, "learning_rate": 1.0113332606520787e-05, "loss": 0.3036, "step": 22910 }, { "epoch": 0.4965007691807683, "grad_norm": 1.1784874200820923, "learning_rate": 1.0109929364166117e-05, "loss": 0.274, "step": 22915 }, { "epoch": 0.4966091045002492, "grad_norm": 1.5271861553192139, "learning_rate": 1.0106526109077774e-05, "loss": 0.2933, "step": 22920 }, { "epoch": 0.49671743981973004, "grad_norm": 1.0862623453140259, "learning_rate": 1.010312284164998e-05, "loss": 0.3445, "step": 22925 }, { "epoch": 0.4968257751392109, "grad_norm": 1.2636831998825073, "learning_rate": 1.0099719562276948e-05, "loss": 0.3146, "step": 22930 }, { "epoch": 0.49693411045869174, "grad_norm": 1.3106863498687744, "learning_rate": 1.0096316271352899e-05, "loss": 0.3064, "step": 22935 }, { "epoch": 0.4970424457781726, "grad_norm": 1.9666073322296143, "learning_rate": 1.0092912969272052e-05, "loss": 0.3922, "step": 22940 }, { "epoch": 0.4971507810976535, "grad_norm": 1.4294594526290894, "learning_rate": 1.0089509656428632e-05, "loss": 0.2299, "step": 22945 }, { "epoch": 0.49725911641713433, "grad_norm": 1.7249301671981812, "learning_rate": 1.0086106333216856e-05, "loss": 0.3329, "step": 22950 }, { "epoch": 0.4973674517366152, "grad_norm": 2.535641670227051, "learning_rate": 1.0082703000030953e-05, "loss": 0.2508, "step": 22955 }, { "epoch": 0.497475787056096, "grad_norm": 1.1996108293533325, "learning_rate": 1.0079299657265147e-05, "loss": 0.2715, "step": 22960 }, { "epoch": 0.49758412237557686, "grad_norm": 1.3898383378982544, "learning_rate": 1.0075896305313661e-05, "loss": 0.1766, "step": 22965 }, { "epoch": 0.49769245769505777, "grad_norm": 1.613255262374878, "learning_rate": 1.0072492944570726e-05, "loss": 0.2217, "step": 22970 }, { "epoch": 0.4978007930145386, "grad_norm": 1.3666458129882812, "learning_rate": 1.006908957543057e-05, "loss": 0.1804, "step": 22975 }, { "epoch": 0.49790912833401946, "grad_norm": 1.2362943887710571, "learning_rate": 1.006568619828742e-05, "loss": 0.2103, "step": 22980 }, { "epoch": 0.4980174636535003, "grad_norm": 1.7112581729888916, "learning_rate": 1.006228281353551e-05, "loss": 0.283, "step": 22985 }, { "epoch": 0.49812579897298115, "grad_norm": 1.613898515701294, "learning_rate": 1.005887942156907e-05, "loss": 0.2097, "step": 22990 }, { "epoch": 0.49823413429246205, "grad_norm": 1.5014621019363403, "learning_rate": 1.0055476022782328e-05, "loss": 0.2945, "step": 22995 }, { "epoch": 0.4983424696119429, "grad_norm": 1.4821031093597412, "learning_rate": 1.0052072617569525e-05, "loss": 0.2458, "step": 23000 }, { "epoch": 0.49845080493142374, "grad_norm": 1.6392079591751099, "learning_rate": 1.0048669206324884e-05, "loss": 0.2668, "step": 23005 }, { "epoch": 0.4985591402509046, "grad_norm": 1.2653436660766602, "learning_rate": 1.0045265789442646e-05, "loss": 0.2668, "step": 23010 }, { "epoch": 0.49866747557038543, "grad_norm": 2.1492955684661865, "learning_rate": 1.0041862367317043e-05, "loss": 0.151, "step": 23015 }, { "epoch": 0.49877581088986633, "grad_norm": 1.445326805114746, "learning_rate": 1.0038458940342313e-05, "loss": 0.2884, "step": 23020 }, { "epoch": 0.4988841462093472, "grad_norm": 1.0143766403198242, "learning_rate": 1.003505550891269e-05, "loss": 0.2207, "step": 23025 }, { "epoch": 0.498992481528828, "grad_norm": 1.902323842048645, "learning_rate": 1.0031652073422415e-05, "loss": 0.2775, "step": 23030 }, { "epoch": 0.49910081684830887, "grad_norm": 1.3790534734725952, "learning_rate": 1.0028248634265721e-05, "loss": 0.2376, "step": 23035 }, { "epoch": 0.4992091521677897, "grad_norm": 2.558130979537964, "learning_rate": 1.0024845191836843e-05, "loss": 0.2523, "step": 23040 }, { "epoch": 0.4993174874872706, "grad_norm": 1.1192280054092407, "learning_rate": 1.002144174653002e-05, "loss": 0.1956, "step": 23045 }, { "epoch": 0.49942582280675146, "grad_norm": 1.2796040773391724, "learning_rate": 1.0018038298739495e-05, "loss": 0.2282, "step": 23050 }, { "epoch": 0.4995341581262323, "grad_norm": 2.6036200523376465, "learning_rate": 1.0014634848859503e-05, "loss": 0.2804, "step": 23055 }, { "epoch": 0.49964249344571315, "grad_norm": 1.9961957931518555, "learning_rate": 1.0011231397284281e-05, "loss": 0.3089, "step": 23060 }, { "epoch": 0.49975082876519406, "grad_norm": 1.553466796875, "learning_rate": 1.0007827944408073e-05, "loss": 0.3783, "step": 23065 }, { "epoch": 0.4998591640846749, "grad_norm": 1.1613988876342773, "learning_rate": 1.0004424490625115e-05, "loss": 0.3241, "step": 23070 }, { "epoch": 0.49996749940415575, "grad_norm": 1.6840649843215942, "learning_rate": 1.0001021036329643e-05, "loss": 0.265, "step": 23075 }, { "epoch": 0.5000758347236366, "grad_norm": 1.4608811140060425, "learning_rate": 9.997617581915901e-06, "loss": 0.3366, "step": 23080 }, { "epoch": 0.5001841700431174, "grad_norm": 1.1630187034606934, "learning_rate": 9.994214127778129e-06, "loss": 0.3292, "step": 23085 }, { "epoch": 0.5002925053625983, "grad_norm": 1.4948086738586426, "learning_rate": 9.99081067431056e-06, "loss": 0.3368, "step": 23090 }, { "epoch": 0.5004008406820791, "grad_norm": 1.4256713390350342, "learning_rate": 9.987407221907438e-06, "loss": 0.1696, "step": 23095 }, { "epoch": 0.50050917600156, "grad_norm": 1.943464756011963, "learning_rate": 9.984003770963e-06, "loss": 0.3335, "step": 23100 }, { "epoch": 0.5006175113210409, "grad_norm": 1.6234554052352905, "learning_rate": 9.980600321871484e-06, "loss": 0.3123, "step": 23105 }, { "epoch": 0.5007258466405218, "grad_norm": 1.2354815006256104, "learning_rate": 9.977196875027133e-06, "loss": 0.2609, "step": 23110 }, { "epoch": 0.5008341819600026, "grad_norm": 2.1495561599731445, "learning_rate": 9.97379343082418e-06, "loss": 0.2889, "step": 23115 }, { "epoch": 0.5009425172794835, "grad_norm": 1.2008260488510132, "learning_rate": 9.970389989656867e-06, "loss": 0.1762, "step": 23120 }, { "epoch": 0.5010508525989643, "grad_norm": 1.7054985761642456, "learning_rate": 9.96698655191943e-06, "loss": 0.2951, "step": 23125 }, { "epoch": 0.5011591879184452, "grad_norm": 1.6895016431808472, "learning_rate": 9.963583118006107e-06, "loss": 0.2291, "step": 23130 }, { "epoch": 0.501267523237926, "grad_norm": 2.9834506511688232, "learning_rate": 9.960179688311134e-06, "loss": 0.2765, "step": 23135 }, { "epoch": 0.5013758585574069, "grad_norm": 2.1475205421447754, "learning_rate": 9.956776263228746e-06, "loss": 0.3108, "step": 23140 }, { "epoch": 0.5014841938768877, "grad_norm": 1.7305502891540527, "learning_rate": 9.953372843153183e-06, "loss": 0.2438, "step": 23145 }, { "epoch": 0.5015925291963687, "grad_norm": 1.6978875398635864, "learning_rate": 9.949969428478674e-06, "loss": 0.2591, "step": 23150 }, { "epoch": 0.5017008645158495, "grad_norm": 1.7197951078414917, "learning_rate": 9.946566019599459e-06, "loss": 0.2277, "step": 23155 }, { "epoch": 0.5018091998353303, "grad_norm": 1.9694147109985352, "learning_rate": 9.943162616909772e-06, "loss": 0.3468, "step": 23160 }, { "epoch": 0.5019175351548112, "grad_norm": 1.7171683311462402, "learning_rate": 9.939759220803843e-06, "loss": 0.216, "step": 23165 }, { "epoch": 0.502025870474292, "grad_norm": 1.7690404653549194, "learning_rate": 9.936355831675905e-06, "loss": 0.3242, "step": 23170 }, { "epoch": 0.5021342057937729, "grad_norm": 1.586159348487854, "learning_rate": 9.932952449920193e-06, "loss": 0.2691, "step": 23175 }, { "epoch": 0.5022425411132537, "grad_norm": 1.4576616287231445, "learning_rate": 9.929549075930933e-06, "loss": 0.2336, "step": 23180 }, { "epoch": 0.5023508764327346, "grad_norm": 0.689250111579895, "learning_rate": 9.926145710102358e-06, "loss": 0.3092, "step": 23185 }, { "epoch": 0.5024592117522154, "grad_norm": 1.564945936203003, "learning_rate": 9.922742352828697e-06, "loss": 0.2481, "step": 23190 }, { "epoch": 0.5025675470716963, "grad_norm": 2.1212613582611084, "learning_rate": 9.919339004504176e-06, "loss": 0.2692, "step": 23195 }, { "epoch": 0.5026758823911772, "grad_norm": 0.9318692088127136, "learning_rate": 9.915935665523023e-06, "loss": 0.2535, "step": 23200 }, { "epoch": 0.5027842177106581, "grad_norm": 1.0810799598693848, "learning_rate": 9.912532336279464e-06, "loss": 0.2557, "step": 23205 }, { "epoch": 0.5028925530301389, "grad_norm": 1.6165668964385986, "learning_rate": 9.909129017167724e-06, "loss": 0.2737, "step": 23210 }, { "epoch": 0.5030008883496198, "grad_norm": 1.3675154447555542, "learning_rate": 9.905725708582025e-06, "loss": 0.2605, "step": 23215 }, { "epoch": 0.5031092236691006, "grad_norm": 1.0956668853759766, "learning_rate": 9.902322410916591e-06, "loss": 0.2041, "step": 23220 }, { "epoch": 0.5032175589885814, "grad_norm": 1.8141279220581055, "learning_rate": 9.898919124565644e-06, "loss": 0.3085, "step": 23225 }, { "epoch": 0.5033258943080623, "grad_norm": 1.8923035860061646, "learning_rate": 9.895515849923401e-06, "loss": 0.2603, "step": 23230 }, { "epoch": 0.5034342296275431, "grad_norm": 1.297271966934204, "learning_rate": 9.892112587384083e-06, "loss": 0.2425, "step": 23235 }, { "epoch": 0.503542564947024, "grad_norm": 1.7978895902633667, "learning_rate": 9.888709337341905e-06, "loss": 0.3462, "step": 23240 }, { "epoch": 0.5036509002665048, "grad_norm": 1.8725931644439697, "learning_rate": 9.885306100191082e-06, "loss": 0.384, "step": 23245 }, { "epoch": 0.5037592355859858, "grad_norm": 1.2543816566467285, "learning_rate": 9.881902876325825e-06, "loss": 0.2302, "step": 23250 }, { "epoch": 0.5038675709054666, "grad_norm": 1.889076828956604, "learning_rate": 9.878499666140357e-06, "loss": 0.3223, "step": 23255 }, { "epoch": 0.5039759062249475, "grad_norm": 1.4907457828521729, "learning_rate": 9.87509647002888e-06, "loss": 0.182, "step": 23260 }, { "epoch": 0.5040842415444283, "grad_norm": 0.8428869843482971, "learning_rate": 9.871693288385606e-06, "loss": 0.1848, "step": 23265 }, { "epoch": 0.5041925768639092, "grad_norm": 1.453458547592163, "learning_rate": 9.868290121604745e-06, "loss": 0.3122, "step": 23270 }, { "epoch": 0.50430091218339, "grad_norm": 1.681442141532898, "learning_rate": 9.864886970080501e-06, "loss": 0.2572, "step": 23275 }, { "epoch": 0.5044092475028709, "grad_norm": 2.177238702774048, "learning_rate": 9.861483834207073e-06, "loss": 0.2637, "step": 23280 }, { "epoch": 0.5045175828223517, "grad_norm": 0.899417519569397, "learning_rate": 9.858080714378672e-06, "loss": 0.3027, "step": 23285 }, { "epoch": 0.5046259181418326, "grad_norm": 1.6129887104034424, "learning_rate": 9.854677610989491e-06, "loss": 0.2901, "step": 23290 }, { "epoch": 0.5047342534613135, "grad_norm": 1.658077597618103, "learning_rate": 9.85127452443373e-06, "loss": 0.3131, "step": 23295 }, { "epoch": 0.5048425887807944, "grad_norm": 1.6490588188171387, "learning_rate": 9.847871455105592e-06, "loss": 0.331, "step": 23300 }, { "epoch": 0.5049509241002752, "grad_norm": 1.7715306282043457, "learning_rate": 9.844468403399264e-06, "loss": 0.2539, "step": 23305 }, { "epoch": 0.505059259419756, "grad_norm": 1.3015310764312744, "learning_rate": 9.841065369708945e-06, "loss": 0.3091, "step": 23310 }, { "epoch": 0.5051675947392369, "grad_norm": 1.0386106967926025, "learning_rate": 9.837662354428821e-06, "loss": 0.1656, "step": 23315 }, { "epoch": 0.5052759300587177, "grad_norm": 1.585291862487793, "learning_rate": 9.83425935795308e-06, "loss": 0.2546, "step": 23320 }, { "epoch": 0.5053842653781986, "grad_norm": 1.602689504623413, "learning_rate": 9.830856380675911e-06, "loss": 0.2415, "step": 23325 }, { "epoch": 0.5054926006976794, "grad_norm": 1.5302646160125732, "learning_rate": 9.827453422991496e-06, "loss": 0.2766, "step": 23330 }, { "epoch": 0.5056009360171603, "grad_norm": 1.0277751684188843, "learning_rate": 9.824050485294018e-06, "loss": 0.2931, "step": 23335 }, { "epoch": 0.5057092713366411, "grad_norm": 1.3632487058639526, "learning_rate": 9.820647567977655e-06, "loss": 0.3289, "step": 23340 }, { "epoch": 0.5058176066561221, "grad_norm": 1.6897432804107666, "learning_rate": 9.817244671436582e-06, "loss": 0.3329, "step": 23345 }, { "epoch": 0.5059259419756029, "grad_norm": 1.4765543937683105, "learning_rate": 9.813841796064977e-06, "loss": 0.2414, "step": 23350 }, { "epoch": 0.5060342772950838, "grad_norm": 1.587964415550232, "learning_rate": 9.810438942257014e-06, "loss": 0.2692, "step": 23355 }, { "epoch": 0.5061426126145646, "grad_norm": 1.5950815677642822, "learning_rate": 9.807036110406858e-06, "loss": 0.2521, "step": 23360 }, { "epoch": 0.5062509479340455, "grad_norm": 1.5103689432144165, "learning_rate": 9.803633300908679e-06, "loss": 0.2644, "step": 23365 }, { "epoch": 0.5063592832535263, "grad_norm": 1.3050774335861206, "learning_rate": 9.80023051415664e-06, "loss": 0.2827, "step": 23370 }, { "epoch": 0.5064676185730072, "grad_norm": 1.7705345153808594, "learning_rate": 9.796827750544903e-06, "loss": 0.3276, "step": 23375 }, { "epoch": 0.506575953892488, "grad_norm": 1.250044584274292, "learning_rate": 9.793425010467628e-06, "loss": 0.2691, "step": 23380 }, { "epoch": 0.5066842892119688, "grad_norm": 1.1252025365829468, "learning_rate": 9.790022294318971e-06, "loss": 0.2079, "step": 23385 }, { "epoch": 0.5067926245314498, "grad_norm": 0.6798468828201294, "learning_rate": 9.786619602493084e-06, "loss": 0.1935, "step": 23390 }, { "epoch": 0.5069009598509306, "grad_norm": 1.6061536073684692, "learning_rate": 9.783216935384122e-06, "loss": 0.2146, "step": 23395 }, { "epoch": 0.5070092951704115, "grad_norm": 1.444897174835205, "learning_rate": 9.77981429338623e-06, "loss": 0.1886, "step": 23400 }, { "epoch": 0.5071176304898923, "grad_norm": 0.8652638792991638, "learning_rate": 9.776411676893554e-06, "loss": 0.1373, "step": 23405 }, { "epoch": 0.5072259658093732, "grad_norm": 1.5002686977386475, "learning_rate": 9.773009086300235e-06, "loss": 0.264, "step": 23410 }, { "epoch": 0.507334301128854, "grad_norm": 1.0018285512924194, "learning_rate": 9.769606522000414e-06, "loss": 0.327, "step": 23415 }, { "epoch": 0.5074426364483349, "grad_norm": 1.4250630140304565, "learning_rate": 9.766203984388225e-06, "loss": 0.2329, "step": 23420 }, { "epoch": 0.5075509717678157, "grad_norm": 1.5610922574996948, "learning_rate": 9.762801473857803e-06, "loss": 0.3224, "step": 23425 }, { "epoch": 0.5076593070872966, "grad_norm": 2.112874746322632, "learning_rate": 9.759398990803278e-06, "loss": 0.3853, "step": 23430 }, { "epoch": 0.5077676424067774, "grad_norm": 1.4141731262207031, "learning_rate": 9.755996535618775e-06, "loss": 0.3191, "step": 23435 }, { "epoch": 0.5078759777262584, "grad_norm": 1.690651774406433, "learning_rate": 9.752594108698416e-06, "loss": 0.299, "step": 23440 }, { "epoch": 0.5079843130457392, "grad_norm": 1.8912510871887207, "learning_rate": 9.749191710436325e-06, "loss": 0.2891, "step": 23445 }, { "epoch": 0.5080926483652201, "grad_norm": 1.8368514776229858, "learning_rate": 9.745789341226617e-06, "loss": 0.2294, "step": 23450 }, { "epoch": 0.5082009836847009, "grad_norm": 1.4175102710723877, "learning_rate": 9.742387001463406e-06, "loss": 0.2912, "step": 23455 }, { "epoch": 0.5083093190041817, "grad_norm": 1.1227577924728394, "learning_rate": 9.738984691540802e-06, "loss": 0.2425, "step": 23460 }, { "epoch": 0.5084176543236626, "grad_norm": 1.5344326496124268, "learning_rate": 9.735582411852912e-06, "loss": 0.2413, "step": 23465 }, { "epoch": 0.5085259896431434, "grad_norm": 2.345665216445923, "learning_rate": 9.732180162793839e-06, "loss": 0.2347, "step": 23470 }, { "epoch": 0.5086343249626243, "grad_norm": 1.8746440410614014, "learning_rate": 9.72877794475768e-06, "loss": 0.3069, "step": 23475 }, { "epoch": 0.5087426602821051, "grad_norm": 1.091300129890442, "learning_rate": 9.725375758138537e-06, "loss": 0.3474, "step": 23480 }, { "epoch": 0.508850995601586, "grad_norm": 1.8847777843475342, "learning_rate": 9.721973603330496e-06, "loss": 0.2986, "step": 23485 }, { "epoch": 0.5089593309210669, "grad_norm": 1.3417935371398926, "learning_rate": 9.718571480727645e-06, "loss": 0.2267, "step": 23490 }, { "epoch": 0.5090676662405478, "grad_norm": 2.370159864425659, "learning_rate": 9.715169390724076e-06, "loss": 0.3908, "step": 23495 }, { "epoch": 0.5091760015600286, "grad_norm": 1.5803102254867554, "learning_rate": 9.711767333713865e-06, "loss": 0.3909, "step": 23500 }, { "epoch": 0.5092843368795095, "grad_norm": 2.304096221923828, "learning_rate": 9.70836531009109e-06, "loss": 0.2981, "step": 23505 }, { "epoch": 0.5093926721989903, "grad_norm": 1.4434726238250732, "learning_rate": 9.704963320249827e-06, "loss": 0.3067, "step": 23510 }, { "epoch": 0.5095010075184712, "grad_norm": 1.8697866201400757, "learning_rate": 9.701561364584147e-06, "loss": 0.1822, "step": 23515 }, { "epoch": 0.509609342837952, "grad_norm": 2.350677967071533, "learning_rate": 9.698159443488107e-06, "loss": 0.2672, "step": 23520 }, { "epoch": 0.5097176781574329, "grad_norm": 1.1234526634216309, "learning_rate": 9.694757557355777e-06, "loss": 0.3048, "step": 23525 }, { "epoch": 0.5098260134769137, "grad_norm": 1.2328814268112183, "learning_rate": 9.691355706581209e-06, "loss": 0.3096, "step": 23530 }, { "epoch": 0.5099343487963947, "grad_norm": 2.1680970191955566, "learning_rate": 9.687953891558456e-06, "loss": 0.3268, "step": 23535 }, { "epoch": 0.5100426841158755, "grad_norm": 1.2348432540893555, "learning_rate": 9.684552112681573e-06, "loss": 0.2706, "step": 23540 }, { "epoch": 0.5101510194353563, "grad_norm": 1.8671306371688843, "learning_rate": 9.681150370344603e-06, "loss": 0.3101, "step": 23545 }, { "epoch": 0.5102593547548372, "grad_norm": 1.8523777723312378, "learning_rate": 9.67774866494159e-06, "loss": 0.3848, "step": 23550 }, { "epoch": 0.510367690074318, "grad_norm": 1.238904356956482, "learning_rate": 9.674346996866564e-06, "loss": 0.2318, "step": 23555 }, { "epoch": 0.5104760253937989, "grad_norm": 1.782447338104248, "learning_rate": 9.670945366513559e-06, "loss": 0.3025, "step": 23560 }, { "epoch": 0.5105843607132797, "grad_norm": 1.2257741689682007, "learning_rate": 9.667543774276606e-06, "loss": 0.1994, "step": 23565 }, { "epoch": 0.5106926960327606, "grad_norm": 1.4105056524276733, "learning_rate": 9.664142220549723e-06, "loss": 0.2362, "step": 23570 }, { "epoch": 0.5108010313522414, "grad_norm": 2.3657066822052, "learning_rate": 9.660740705726937e-06, "loss": 0.386, "step": 23575 }, { "epoch": 0.5109093666717223, "grad_norm": 1.1507488489151, "learning_rate": 9.657339230202255e-06, "loss": 0.236, "step": 23580 }, { "epoch": 0.5110177019912032, "grad_norm": 1.6165578365325928, "learning_rate": 9.653937794369689e-06, "loss": 0.1997, "step": 23585 }, { "epoch": 0.5111260373106841, "grad_norm": 1.8037501573562622, "learning_rate": 9.650536398623248e-06, "loss": 0.3097, "step": 23590 }, { "epoch": 0.5112343726301649, "grad_norm": 1.8305648565292358, "learning_rate": 9.64713504335693e-06, "loss": 0.3127, "step": 23595 }, { "epoch": 0.5113427079496458, "grad_norm": 0.8747916221618652, "learning_rate": 9.64373372896473e-06, "loss": 0.2285, "step": 23600 }, { "epoch": 0.5114510432691266, "grad_norm": 1.599231243133545, "learning_rate": 9.640332455840642e-06, "loss": 0.2178, "step": 23605 }, { "epoch": 0.5115593785886074, "grad_norm": 1.798103928565979, "learning_rate": 9.636931224378652e-06, "loss": 0.333, "step": 23610 }, { "epoch": 0.5116677139080883, "grad_norm": 1.4560210704803467, "learning_rate": 9.633530034972739e-06, "loss": 0.2874, "step": 23615 }, { "epoch": 0.5117760492275691, "grad_norm": 1.4526739120483398, "learning_rate": 9.630128888016883e-06, "loss": 0.2278, "step": 23620 }, { "epoch": 0.51188438454705, "grad_norm": 1.2776784896850586, "learning_rate": 9.626727783905055e-06, "loss": 0.256, "step": 23625 }, { "epoch": 0.5119927198665308, "grad_norm": 1.1954275369644165, "learning_rate": 9.62332672303122e-06, "loss": 0.2998, "step": 23630 }, { "epoch": 0.5121010551860118, "grad_norm": 1.973629117012024, "learning_rate": 9.61992570578934e-06, "loss": 0.2843, "step": 23635 }, { "epoch": 0.5122093905054926, "grad_norm": 1.2534931898117065, "learning_rate": 9.616524732573376e-06, "loss": 0.2326, "step": 23640 }, { "epoch": 0.5123177258249735, "grad_norm": 1.7800185680389404, "learning_rate": 9.613123803777278e-06, "loss": 0.3279, "step": 23645 }, { "epoch": 0.5124260611444543, "grad_norm": 1.5081470012664795, "learning_rate": 9.60972291979499e-06, "loss": 0.2551, "step": 23650 }, { "epoch": 0.5125343964639352, "grad_norm": 1.1907196044921875, "learning_rate": 9.606322081020456e-06, "loss": 0.2907, "step": 23655 }, { "epoch": 0.512642731783416, "grad_norm": 1.5671617984771729, "learning_rate": 9.602921287847613e-06, "loss": 0.2222, "step": 23660 }, { "epoch": 0.5127510671028969, "grad_norm": 2.3130252361297607, "learning_rate": 9.59952054067039e-06, "loss": 0.2705, "step": 23665 }, { "epoch": 0.5128594024223777, "grad_norm": 1.2588348388671875, "learning_rate": 9.596119839882713e-06, "loss": 0.2156, "step": 23670 }, { "epoch": 0.5129677377418586, "grad_norm": 2.812718152999878, "learning_rate": 9.592719185878501e-06, "loss": 0.338, "step": 23675 }, { "epoch": 0.5130760730613395, "grad_norm": 1.265802025794983, "learning_rate": 9.589318579051671e-06, "loss": 0.2858, "step": 23680 }, { "epoch": 0.5131844083808204, "grad_norm": 1.5232971906661987, "learning_rate": 9.58591801979613e-06, "loss": 0.2804, "step": 23685 }, { "epoch": 0.5132927437003012, "grad_norm": 0.9627498984336853, "learning_rate": 9.582517508505788e-06, "loss": 0.2316, "step": 23690 }, { "epoch": 0.513401079019782, "grad_norm": 1.4509414434432983, "learning_rate": 9.579117045574536e-06, "loss": 0.3235, "step": 23695 }, { "epoch": 0.5135094143392629, "grad_norm": 0.8777430057525635, "learning_rate": 9.57571663139627e-06, "loss": 0.2294, "step": 23700 }, { "epoch": 0.5136177496587437, "grad_norm": 1.5889573097229004, "learning_rate": 9.572316266364876e-06, "loss": 0.2893, "step": 23705 }, { "epoch": 0.5137260849782246, "grad_norm": 1.1908711194992065, "learning_rate": 9.568915950874235e-06, "loss": 0.232, "step": 23710 }, { "epoch": 0.5138344202977054, "grad_norm": 2.0151782035827637, "learning_rate": 9.565515685318225e-06, "loss": 0.2654, "step": 23715 }, { "epoch": 0.5139427556171863, "grad_norm": 1.2205406427383423, "learning_rate": 9.562115470090716e-06, "loss": 0.2444, "step": 23720 }, { "epoch": 0.5140510909366671, "grad_norm": 1.2182401418685913, "learning_rate": 9.558715305585569e-06, "loss": 0.268, "step": 23725 }, { "epoch": 0.5141594262561481, "grad_norm": 1.30672287940979, "learning_rate": 9.555315192196639e-06, "loss": 0.1352, "step": 23730 }, { "epoch": 0.5142677615756289, "grad_norm": 2.0715253353118896, "learning_rate": 9.551915130317784e-06, "loss": 0.3003, "step": 23735 }, { "epoch": 0.5143760968951098, "grad_norm": 1.6747212409973145, "learning_rate": 9.548515120342851e-06, "loss": 0.2133, "step": 23740 }, { "epoch": 0.5144844322145906, "grad_norm": 1.9761172533035278, "learning_rate": 9.54511516266568e-06, "loss": 0.2626, "step": 23745 }, { "epoch": 0.5145927675340715, "grad_norm": 1.3006081581115723, "learning_rate": 9.541715257680099e-06, "loss": 0.1265, "step": 23750 }, { "epoch": 0.5147011028535523, "grad_norm": 1.4737763404846191, "learning_rate": 9.538315405779947e-06, "loss": 0.2671, "step": 23755 }, { "epoch": 0.5148094381730332, "grad_norm": 1.507758378982544, "learning_rate": 9.534915607359034e-06, "loss": 0.3531, "step": 23760 }, { "epoch": 0.514917773492514, "grad_norm": 1.7005430459976196, "learning_rate": 9.531515862811182e-06, "loss": 0.2656, "step": 23765 }, { "epoch": 0.5150261088119948, "grad_norm": 2.09629225730896, "learning_rate": 9.528116172530198e-06, "loss": 0.2649, "step": 23770 }, { "epoch": 0.5151344441314757, "grad_norm": 1.675297498703003, "learning_rate": 9.524716536909888e-06, "loss": 0.3514, "step": 23775 }, { "epoch": 0.5152427794509566, "grad_norm": 1.6216846704483032, "learning_rate": 9.521316956344043e-06, "loss": 0.3235, "step": 23780 }, { "epoch": 0.5153511147704375, "grad_norm": 1.4774417877197266, "learning_rate": 9.517917431226463e-06, "loss": 0.2041, "step": 23785 }, { "epoch": 0.5154594500899183, "grad_norm": 1.4371848106384277, "learning_rate": 9.514517961950925e-06, "loss": 0.264, "step": 23790 }, { "epoch": 0.5155677854093992, "grad_norm": 1.644705057144165, "learning_rate": 9.511118548911213e-06, "loss": 0.2366, "step": 23795 }, { "epoch": 0.51567612072888, "grad_norm": 1.3250706195831299, "learning_rate": 9.50771919250109e-06, "loss": 0.2611, "step": 23800 }, { "epoch": 0.5157844560483609, "grad_norm": 2.0206384658813477, "learning_rate": 9.504319893114325e-06, "loss": 0.3844, "step": 23805 }, { "epoch": 0.5158927913678417, "grad_norm": 1.6855556964874268, "learning_rate": 9.500920651144675e-06, "loss": 0.3165, "step": 23810 }, { "epoch": 0.5160011266873226, "grad_norm": 2.0753941535949707, "learning_rate": 9.49752146698589e-06, "loss": 0.3268, "step": 23815 }, { "epoch": 0.5161094620068034, "grad_norm": 1.4987740516662598, "learning_rate": 9.494122341031717e-06, "loss": 0.264, "step": 23820 }, { "epoch": 0.5162177973262844, "grad_norm": 1.6641353368759155, "learning_rate": 9.490723273675888e-06, "loss": 0.3256, "step": 23825 }, { "epoch": 0.5163261326457652, "grad_norm": 2.1755175590515137, "learning_rate": 9.487324265312146e-06, "loss": 0.2696, "step": 23830 }, { "epoch": 0.5164344679652461, "grad_norm": 1.714569091796875, "learning_rate": 9.483925316334204e-06, "loss": 0.356, "step": 23835 }, { "epoch": 0.5165428032847269, "grad_norm": 1.1741414070129395, "learning_rate": 9.480526427135786e-06, "loss": 0.2787, "step": 23840 }, { "epoch": 0.5166511386042077, "grad_norm": 1.5514317750930786, "learning_rate": 9.477127598110598e-06, "loss": 0.2815, "step": 23845 }, { "epoch": 0.5167594739236886, "grad_norm": 0.9903181195259094, "learning_rate": 9.473728829652345e-06, "loss": 0.2194, "step": 23850 }, { "epoch": 0.5168678092431694, "grad_norm": 1.5767168998718262, "learning_rate": 9.470330122154725e-06, "loss": 0.3066, "step": 23855 }, { "epoch": 0.5169761445626503, "grad_norm": 1.4307140111923218, "learning_rate": 9.466931476011426e-06, "loss": 0.1899, "step": 23860 }, { "epoch": 0.5170844798821311, "grad_norm": 1.530662178993225, "learning_rate": 9.463532891616128e-06, "loss": 0.264, "step": 23865 }, { "epoch": 0.517192815201612, "grad_norm": 2.0891449451446533, "learning_rate": 9.460134369362509e-06, "loss": 0.3395, "step": 23870 }, { "epoch": 0.5173011505210929, "grad_norm": 1.7382256984710693, "learning_rate": 9.456735909644235e-06, "loss": 0.2751, "step": 23875 }, { "epoch": 0.5174094858405738, "grad_norm": 1.300261378288269, "learning_rate": 9.453337512854969e-06, "loss": 0.2308, "step": 23880 }, { "epoch": 0.5175178211600546, "grad_norm": 1.4986063241958618, "learning_rate": 9.449939179388364e-06, "loss": 0.1784, "step": 23885 }, { "epoch": 0.5176261564795355, "grad_norm": 1.7375328540802002, "learning_rate": 9.446540909638063e-06, "loss": 0.268, "step": 23890 }, { "epoch": 0.5177344917990163, "grad_norm": 1.4220352172851562, "learning_rate": 9.443142703997708e-06, "loss": 0.231, "step": 23895 }, { "epoch": 0.5178428271184972, "grad_norm": 1.77134370803833, "learning_rate": 9.439744562860931e-06, "loss": 0.3271, "step": 23900 }, { "epoch": 0.517951162437978, "grad_norm": 2.7683520317077637, "learning_rate": 9.436346486621352e-06, "loss": 0.2522, "step": 23905 }, { "epoch": 0.5180594977574589, "grad_norm": 1.1081115007400513, "learning_rate": 9.432948475672589e-06, "loss": 0.239, "step": 23910 }, { "epoch": 0.5181678330769397, "grad_norm": 1.1958903074264526, "learning_rate": 9.42955053040825e-06, "loss": 0.3288, "step": 23915 }, { "epoch": 0.5182761683964207, "grad_norm": 1.4721299409866333, "learning_rate": 9.426152651221935e-06, "loss": 0.2482, "step": 23920 }, { "epoch": 0.5183845037159015, "grad_norm": 1.7006579637527466, "learning_rate": 9.422754838507242e-06, "loss": 0.2393, "step": 23925 }, { "epoch": 0.5184928390353823, "grad_norm": 1.715650200843811, "learning_rate": 9.419357092657753e-06, "loss": 0.32, "step": 23930 }, { "epoch": 0.5186011743548632, "grad_norm": 1.8361225128173828, "learning_rate": 9.415959414067049e-06, "loss": 0.2794, "step": 23935 }, { "epoch": 0.518709509674344, "grad_norm": 1.174868106842041, "learning_rate": 9.412561803128694e-06, "loss": 0.2456, "step": 23940 }, { "epoch": 0.5188178449938249, "grad_norm": 1.2349194288253784, "learning_rate": 9.409164260236258e-06, "loss": 0.2442, "step": 23945 }, { "epoch": 0.5189261803133057, "grad_norm": 2.2168822288513184, "learning_rate": 9.40576678578329e-06, "loss": 0.2922, "step": 23950 }, { "epoch": 0.5190345156327866, "grad_norm": 2.128835916519165, "learning_rate": 9.402369380163339e-06, "loss": 0.2934, "step": 23955 }, { "epoch": 0.5191428509522674, "grad_norm": 1.6374837160110474, "learning_rate": 9.398972043769942e-06, "loss": 0.2926, "step": 23960 }, { "epoch": 0.5192511862717483, "grad_norm": 1.5961191654205322, "learning_rate": 9.395574776996632e-06, "loss": 0.2632, "step": 23965 }, { "epoch": 0.5193595215912292, "grad_norm": 1.2475149631500244, "learning_rate": 9.392177580236924e-06, "loss": 0.2423, "step": 23970 }, { "epoch": 0.5194678569107101, "grad_norm": 1.7368184328079224, "learning_rate": 9.388780453884344e-06, "loss": 0.196, "step": 23975 }, { "epoch": 0.5195761922301909, "grad_norm": 1.0944271087646484, "learning_rate": 9.38538339833239e-06, "loss": 0.2793, "step": 23980 }, { "epoch": 0.5196845275496718, "grad_norm": 0.9201672673225403, "learning_rate": 9.381986413974564e-06, "loss": 0.2456, "step": 23985 }, { "epoch": 0.5197928628691526, "grad_norm": 1.5912551879882812, "learning_rate": 9.378589501204352e-06, "loss": 0.2857, "step": 23990 }, { "epoch": 0.5199011981886335, "grad_norm": 1.4918979406356812, "learning_rate": 9.37519266041524e-06, "loss": 0.2823, "step": 23995 }, { "epoch": 0.5200095335081143, "grad_norm": 0.35897621512413025, "learning_rate": 9.371795892000699e-06, "loss": 0.2105, "step": 24000 }, { "epoch": 0.5201178688275951, "grad_norm": 2.7516684532165527, "learning_rate": 9.368399196354193e-06, "loss": 0.1959, "step": 24005 }, { "epoch": 0.520226204147076, "grad_norm": 1.2196897268295288, "learning_rate": 9.365002573869177e-06, "loss": 0.2269, "step": 24010 }, { "epoch": 0.5203345394665568, "grad_norm": 1.503996729850769, "learning_rate": 9.361606024939101e-06, "loss": 0.3123, "step": 24015 }, { "epoch": 0.5204428747860378, "grad_norm": 1.13957941532135, "learning_rate": 9.3582095499574e-06, "loss": 0.3234, "step": 24020 }, { "epoch": 0.5205512101055186, "grad_norm": 1.4732295274734497, "learning_rate": 9.354813149317512e-06, "loss": 0.2684, "step": 24025 }, { "epoch": 0.5206595454249995, "grad_norm": 1.8965418338775635, "learning_rate": 9.351416823412856e-06, "loss": 0.2677, "step": 24030 }, { "epoch": 0.5207678807444803, "grad_norm": 1.230147361755371, "learning_rate": 9.348020572636848e-06, "loss": 0.3012, "step": 24035 }, { "epoch": 0.5208762160639612, "grad_norm": 1.4583721160888672, "learning_rate": 9.344624397382889e-06, "loss": 0.2879, "step": 24040 }, { "epoch": 0.520984551383442, "grad_norm": 1.2893263101577759, "learning_rate": 9.341228298044375e-06, "loss": 0.2387, "step": 24045 }, { "epoch": 0.5210928867029229, "grad_norm": 1.3356742858886719, "learning_rate": 9.337832275014693e-06, "loss": 0.2396, "step": 24050 }, { "epoch": 0.5212012220224037, "grad_norm": 2.026986598968506, "learning_rate": 9.334436328687224e-06, "loss": 0.2974, "step": 24055 }, { "epoch": 0.5213095573418846, "grad_norm": 1.9821910858154297, "learning_rate": 9.331040459455337e-06, "loss": 0.2545, "step": 24060 }, { "epoch": 0.5214178926613655, "grad_norm": 1.0803524255752563, "learning_rate": 9.327644667712385e-06, "loss": 0.2807, "step": 24065 }, { "epoch": 0.5215262279808464, "grad_norm": 1.6680619716644287, "learning_rate": 9.324248953851734e-06, "loss": 0.3688, "step": 24070 }, { "epoch": 0.5216345633003272, "grad_norm": 1.092808485031128, "learning_rate": 9.320853318266718e-06, "loss": 0.2674, "step": 24075 }, { "epoch": 0.521742898619808, "grad_norm": 1.3835222721099854, "learning_rate": 9.31745776135067e-06, "loss": 0.318, "step": 24080 }, { "epoch": 0.5218512339392889, "grad_norm": 1.801038384437561, "learning_rate": 9.314062283496917e-06, "loss": 0.3699, "step": 24085 }, { "epoch": 0.5219595692587697, "grad_norm": 2.5767033100128174, "learning_rate": 9.310666885098771e-06, "loss": 0.2692, "step": 24090 }, { "epoch": 0.5220679045782506, "grad_norm": 1.936813473701477, "learning_rate": 9.307271566549542e-06, "loss": 0.2496, "step": 24095 }, { "epoch": 0.5221762398977314, "grad_norm": 1.5186859369277954, "learning_rate": 9.303876328242525e-06, "loss": 0.2491, "step": 24100 }, { "epoch": 0.5222845752172123, "grad_norm": 1.313868522644043, "learning_rate": 9.300481170571007e-06, "loss": 0.2556, "step": 24105 }, { "epoch": 0.5223929105366931, "grad_norm": 0.7672193050384521, "learning_rate": 9.297086093928267e-06, "loss": 0.3187, "step": 24110 }, { "epoch": 0.5225012458561741, "grad_norm": 1.0226795673370361, "learning_rate": 9.29369109870757e-06, "loss": 0.2598, "step": 24115 }, { "epoch": 0.5226095811756549, "grad_norm": 1.658687710762024, "learning_rate": 9.290296185302181e-06, "loss": 0.2707, "step": 24120 }, { "epoch": 0.5227179164951358, "grad_norm": 1.1417347192764282, "learning_rate": 9.28690135410535e-06, "loss": 0.2476, "step": 24125 }, { "epoch": 0.5228262518146166, "grad_norm": 1.0115270614624023, "learning_rate": 9.283506605510311e-06, "loss": 0.1506, "step": 24130 }, { "epoch": 0.5229345871340975, "grad_norm": 1.6592649221420288, "learning_rate": 9.280111939910301e-06, "loss": 0.3216, "step": 24135 }, { "epoch": 0.5230429224535783, "grad_norm": 1.3693137168884277, "learning_rate": 9.27671735769854e-06, "loss": 0.2685, "step": 24140 }, { "epoch": 0.5231512577730592, "grad_norm": 1.303369402885437, "learning_rate": 9.273322859268237e-06, "loss": 0.2635, "step": 24145 }, { "epoch": 0.52325959309254, "grad_norm": 1.824808120727539, "learning_rate": 9.269928445012595e-06, "loss": 0.2377, "step": 24150 }, { "epoch": 0.5233679284120208, "grad_norm": 2.42219877243042, "learning_rate": 9.266534115324806e-06, "loss": 0.2942, "step": 24155 }, { "epoch": 0.5234762637315017, "grad_norm": 1.7528071403503418, "learning_rate": 9.263139870598052e-06, "loss": 0.2215, "step": 24160 }, { "epoch": 0.5235845990509826, "grad_norm": 1.879071831703186, "learning_rate": 9.259745711225506e-06, "loss": 0.2251, "step": 24165 }, { "epoch": 0.5236929343704635, "grad_norm": 1.2720872163772583, "learning_rate": 9.256351637600328e-06, "loss": 0.2453, "step": 24170 }, { "epoch": 0.5238012696899443, "grad_norm": 1.4737074375152588, "learning_rate": 9.252957650115676e-06, "loss": 0.2186, "step": 24175 }, { "epoch": 0.5239096050094252, "grad_norm": 1.535422444343567, "learning_rate": 9.249563749164691e-06, "loss": 0.2943, "step": 24180 }, { "epoch": 0.524017940328906, "grad_norm": 1.6294114589691162, "learning_rate": 9.246169935140503e-06, "loss": 0.1641, "step": 24185 }, { "epoch": 0.5241262756483869, "grad_norm": 1.1275701522827148, "learning_rate": 9.242776208436235e-06, "loss": 0.2032, "step": 24190 }, { "epoch": 0.5242346109678677, "grad_norm": 1.9005862474441528, "learning_rate": 9.239382569445e-06, "loss": 0.2012, "step": 24195 }, { "epoch": 0.5243429462873486, "grad_norm": 1.2641007900238037, "learning_rate": 9.2359890185599e-06, "loss": 0.3172, "step": 24200 }, { "epoch": 0.5244512816068294, "grad_norm": 2.203840732574463, "learning_rate": 9.23259555617403e-06, "loss": 0.2863, "step": 24205 }, { "epoch": 0.5245596169263104, "grad_norm": 1.5493217706680298, "learning_rate": 9.229202182680462e-06, "loss": 0.2663, "step": 24210 }, { "epoch": 0.5246679522457912, "grad_norm": 0.8747116327285767, "learning_rate": 9.22580889847228e-06, "loss": 0.2767, "step": 24215 }, { "epoch": 0.5247762875652721, "grad_norm": 1.536746621131897, "learning_rate": 9.22241570394254e-06, "loss": 0.2007, "step": 24220 }, { "epoch": 0.5248846228847529, "grad_norm": 0.7661157846450806, "learning_rate": 9.219022599484292e-06, "loss": 0.2774, "step": 24225 }, { "epoch": 0.5249929582042338, "grad_norm": 1.5819159746170044, "learning_rate": 9.215629585490576e-06, "loss": 0.2382, "step": 24230 }, { "epoch": 0.5251012935237146, "grad_norm": 0.9858903288841248, "learning_rate": 9.212236662354423e-06, "loss": 0.268, "step": 24235 }, { "epoch": 0.5252096288431954, "grad_norm": 1.89689302444458, "learning_rate": 9.208843830468854e-06, "loss": 0.2789, "step": 24240 }, { "epoch": 0.5253179641626763, "grad_norm": 1.7369924783706665, "learning_rate": 9.205451090226874e-06, "loss": 0.3043, "step": 24245 }, { "epoch": 0.5254262994821571, "grad_norm": 1.540452003479004, "learning_rate": 9.202058442021482e-06, "loss": 0.2576, "step": 24250 }, { "epoch": 0.525534634801638, "grad_norm": 1.3106224536895752, "learning_rate": 9.198665886245666e-06, "loss": 0.275, "step": 24255 }, { "epoch": 0.5256429701211189, "grad_norm": 0.8838403820991516, "learning_rate": 9.1952734232924e-06, "loss": 0.2788, "step": 24260 }, { "epoch": 0.5257513054405998, "grad_norm": 1.4013389348983765, "learning_rate": 9.191881053554658e-06, "loss": 0.3284, "step": 24265 }, { "epoch": 0.5258596407600806, "grad_norm": 0.9406363368034363, "learning_rate": 9.18848877742539e-06, "loss": 0.2336, "step": 24270 }, { "epoch": 0.5259679760795615, "grad_norm": 1.6935776472091675, "learning_rate": 9.185096595297539e-06, "loss": 0.2966, "step": 24275 }, { "epoch": 0.5260763113990423, "grad_norm": 1.1185382604599, "learning_rate": 9.181704507564044e-06, "loss": 0.2667, "step": 24280 }, { "epoch": 0.5261846467185232, "grad_norm": 1.3592890501022339, "learning_rate": 9.178312514617821e-06, "loss": 0.2584, "step": 24285 }, { "epoch": 0.526292982038004, "grad_norm": 1.927544116973877, "learning_rate": 9.174920616851785e-06, "loss": 0.3086, "step": 24290 }, { "epoch": 0.5264013173574849, "grad_norm": 2.061784267425537, "learning_rate": 9.171528814658835e-06, "loss": 0.2932, "step": 24295 }, { "epoch": 0.5265096526769657, "grad_norm": 1.3109098672866821, "learning_rate": 9.168137108431861e-06, "loss": 0.2091, "step": 24300 }, { "epoch": 0.5266179879964467, "grad_norm": 1.8347536325454712, "learning_rate": 9.164745498563739e-06, "loss": 0.274, "step": 24305 }, { "epoch": 0.5267263233159275, "grad_norm": 1.2083044052124023, "learning_rate": 9.161353985447341e-06, "loss": 0.2449, "step": 24310 }, { "epoch": 0.5268346586354083, "grad_norm": 1.2159507274627686, "learning_rate": 9.157962569475525e-06, "loss": 0.2096, "step": 24315 }, { "epoch": 0.5269429939548892, "grad_norm": 2.01516056060791, "learning_rate": 9.154571251041127e-06, "loss": 0.2848, "step": 24320 }, { "epoch": 0.52705132927437, "grad_norm": 1.654893159866333, "learning_rate": 9.151180030536988e-06, "loss": 0.2022, "step": 24325 }, { "epoch": 0.5271596645938509, "grad_norm": 1.3454053401947021, "learning_rate": 9.147788908355927e-06, "loss": 0.2574, "step": 24330 }, { "epoch": 0.5272679999133317, "grad_norm": 1.6900103092193604, "learning_rate": 9.144397884890753e-06, "loss": 0.3107, "step": 24335 }, { "epoch": 0.5273763352328126, "grad_norm": 1.563698649406433, "learning_rate": 9.141006960534267e-06, "loss": 0.303, "step": 24340 }, { "epoch": 0.5274846705522934, "grad_norm": 1.2735575437545776, "learning_rate": 9.137616135679258e-06, "loss": 0.2789, "step": 24345 }, { "epoch": 0.5275930058717743, "grad_norm": 1.9471954107284546, "learning_rate": 9.1342254107185e-06, "loss": 0.1752, "step": 24350 }, { "epoch": 0.5277013411912552, "grad_norm": 1.554021954536438, "learning_rate": 9.130834786044758e-06, "loss": 0.292, "step": 24355 }, { "epoch": 0.5278096765107361, "grad_norm": 1.3603229522705078, "learning_rate": 9.127444262050788e-06, "loss": 0.2804, "step": 24360 }, { "epoch": 0.5279180118302169, "grad_norm": 1.4057066440582275, "learning_rate": 9.124053839129328e-06, "loss": 0.2347, "step": 24365 }, { "epoch": 0.5280263471496978, "grad_norm": 1.0717707872390747, "learning_rate": 9.120663517673111e-06, "loss": 0.243, "step": 24370 }, { "epoch": 0.5281346824691786, "grad_norm": 1.7692255973815918, "learning_rate": 9.117273298074853e-06, "loss": 0.1559, "step": 24375 }, { "epoch": 0.5282430177886595, "grad_norm": 1.2143118381500244, "learning_rate": 9.113883180727259e-06, "loss": 0.2433, "step": 24380 }, { "epoch": 0.5283513531081403, "grad_norm": 1.4230539798736572, "learning_rate": 9.110493166023024e-06, "loss": 0.1811, "step": 24385 }, { "epoch": 0.5284596884276211, "grad_norm": 1.753434181213379, "learning_rate": 9.107103254354832e-06, "loss": 0.338, "step": 24390 }, { "epoch": 0.528568023747102, "grad_norm": 1.6647560596466064, "learning_rate": 9.103713446115353e-06, "loss": 0.3047, "step": 24395 }, { "epoch": 0.5286763590665828, "grad_norm": 2.4591434001922607, "learning_rate": 9.100323741697242e-06, "loss": 0.3179, "step": 24400 }, { "epoch": 0.5287846943860638, "grad_norm": 1.3366349935531616, "learning_rate": 9.09693414149315e-06, "loss": 0.2511, "step": 24405 }, { "epoch": 0.5288930297055446, "grad_norm": 0.9804871082305908, "learning_rate": 9.093544645895708e-06, "loss": 0.2299, "step": 24410 }, { "epoch": 0.5290013650250255, "grad_norm": 1.8380366563796997, "learning_rate": 9.090155255297543e-06, "loss": 0.2914, "step": 24415 }, { "epoch": 0.5291097003445063, "grad_norm": 1.0121506452560425, "learning_rate": 9.086765970091261e-06, "loss": 0.2497, "step": 24420 }, { "epoch": 0.5292180356639872, "grad_norm": 2.6031265258789062, "learning_rate": 9.083376790669462e-06, "loss": 0.1963, "step": 24425 }, { "epoch": 0.529326370983468, "grad_norm": 1.086991786956787, "learning_rate": 9.07998771742473e-06, "loss": 0.293, "step": 24430 }, { "epoch": 0.5294347063029489, "grad_norm": 1.4406461715698242, "learning_rate": 9.07659875074964e-06, "loss": 0.2855, "step": 24435 }, { "epoch": 0.5295430416224297, "grad_norm": 1.3336234092712402, "learning_rate": 9.073209891036752e-06, "loss": 0.2775, "step": 24440 }, { "epoch": 0.5296513769419106, "grad_norm": 1.2899401187896729, "learning_rate": 9.069821138678614e-06, "loss": 0.3391, "step": 24445 }, { "epoch": 0.5297597122613915, "grad_norm": 1.77482008934021, "learning_rate": 9.066432494067761e-06, "loss": 0.2695, "step": 24450 }, { "epoch": 0.5298680475808724, "grad_norm": 1.703997015953064, "learning_rate": 9.06304395759672e-06, "loss": 0.2875, "step": 24455 }, { "epoch": 0.5299763829003532, "grad_norm": 1.6063274145126343, "learning_rate": 9.059655529658004e-06, "loss": 0.3477, "step": 24460 }, { "epoch": 0.530084718219834, "grad_norm": 1.5269211530685425, "learning_rate": 9.056267210644105e-06, "loss": 0.3136, "step": 24465 }, { "epoch": 0.5301930535393149, "grad_norm": 1.5964915752410889, "learning_rate": 9.052879000947515e-06, "loss": 0.4064, "step": 24470 }, { "epoch": 0.5303013888587957, "grad_norm": 1.6253995895385742, "learning_rate": 9.049490900960704e-06, "loss": 0.2769, "step": 24475 }, { "epoch": 0.5304097241782766, "grad_norm": 1.2509623765945435, "learning_rate": 9.046102911076133e-06, "loss": 0.3271, "step": 24480 }, { "epoch": 0.5305180594977574, "grad_norm": 1.4706141948699951, "learning_rate": 9.042715031686254e-06, "loss": 0.2173, "step": 24485 }, { "epoch": 0.5306263948172383, "grad_norm": 1.35916006565094, "learning_rate": 9.039327263183495e-06, "loss": 0.2756, "step": 24490 }, { "epoch": 0.5307347301367191, "grad_norm": 1.02934730052948, "learning_rate": 9.035939605960282e-06, "loss": 0.2631, "step": 24495 }, { "epoch": 0.5308430654562001, "grad_norm": 1.4219050407409668, "learning_rate": 9.032552060409019e-06, "loss": 0.2772, "step": 24500 }, { "epoch": 0.5309514007756809, "grad_norm": 1.5343177318572998, "learning_rate": 9.029164626922113e-06, "loss": 0.2496, "step": 24505 }, { "epoch": 0.5310597360951618, "grad_norm": 2.13594388961792, "learning_rate": 9.02577730589194e-06, "loss": 0.2547, "step": 24510 }, { "epoch": 0.5311680714146426, "grad_norm": 1.378507137298584, "learning_rate": 9.022390097710874e-06, "loss": 0.289, "step": 24515 }, { "epoch": 0.5312764067341235, "grad_norm": 1.3087491989135742, "learning_rate": 9.019003002771273e-06, "loss": 0.2349, "step": 24520 }, { "epoch": 0.5313847420536043, "grad_norm": 1.8173800706863403, "learning_rate": 9.015616021465476e-06, "loss": 0.2137, "step": 24525 }, { "epoch": 0.5314930773730852, "grad_norm": 1.8036322593688965, "learning_rate": 9.012229154185817e-06, "loss": 0.2607, "step": 24530 }, { "epoch": 0.531601412692566, "grad_norm": 1.4795949459075928, "learning_rate": 9.008842401324613e-06, "loss": 0.2158, "step": 24535 }, { "epoch": 0.5317097480120468, "grad_norm": 1.6122751235961914, "learning_rate": 9.00545576327417e-06, "loss": 0.2244, "step": 24540 }, { "epoch": 0.5318180833315277, "grad_norm": 1.8206313848495483, "learning_rate": 9.002069240426778e-06, "loss": 0.2513, "step": 24545 }, { "epoch": 0.5319264186510086, "grad_norm": 1.7094173431396484, "learning_rate": 8.998682833174712e-06, "loss": 0.3101, "step": 24550 }, { "epoch": 0.5320347539704895, "grad_norm": 1.3813419342041016, "learning_rate": 8.995296541910245e-06, "loss": 0.2503, "step": 24555 }, { "epoch": 0.5321430892899703, "grad_norm": 0.9810305237770081, "learning_rate": 8.991910367025622e-06, "loss": 0.292, "step": 24560 }, { "epoch": 0.5322514246094512, "grad_norm": 1.9943300485610962, "learning_rate": 8.988524308913083e-06, "loss": 0.2418, "step": 24565 }, { "epoch": 0.532359759928932, "grad_norm": 1.3360466957092285, "learning_rate": 8.98513836796485e-06, "loss": 0.2933, "step": 24570 }, { "epoch": 0.5324680952484129, "grad_norm": 1.557481050491333, "learning_rate": 8.981752544573133e-06, "loss": 0.2779, "step": 24575 }, { "epoch": 0.5325764305678937, "grad_norm": 1.540914535522461, "learning_rate": 8.978366839130132e-06, "loss": 0.3108, "step": 24580 }, { "epoch": 0.5326847658873746, "grad_norm": 1.0660570859909058, "learning_rate": 8.974981252028027e-06, "loss": 0.1964, "step": 24585 }, { "epoch": 0.5327931012068554, "grad_norm": 1.0312362909317017, "learning_rate": 8.971595783658991e-06, "loss": 0.287, "step": 24590 }, { "epoch": 0.5329014365263364, "grad_norm": 1.0613672733306885, "learning_rate": 8.968210434415176e-06, "loss": 0.2346, "step": 24595 }, { "epoch": 0.5330097718458172, "grad_norm": 1.5648868083953857, "learning_rate": 8.964825204688728e-06, "loss": 0.2235, "step": 24600 }, { "epoch": 0.5331181071652981, "grad_norm": 1.8076536655426025, "learning_rate": 8.961440094871772e-06, "loss": 0.2545, "step": 24605 }, { "epoch": 0.5332264424847789, "grad_norm": 1.4210424423217773, "learning_rate": 8.958055105356423e-06, "loss": 0.4024, "step": 24610 }, { "epoch": 0.5333347778042598, "grad_norm": 1.4890860319137573, "learning_rate": 8.954670236534783e-06, "loss": 0.2314, "step": 24615 }, { "epoch": 0.5334431131237406, "grad_norm": 1.314191222190857, "learning_rate": 8.951285488798935e-06, "loss": 0.303, "step": 24620 }, { "epoch": 0.5335514484432214, "grad_norm": 1.2361823320388794, "learning_rate": 8.947900862540956e-06, "loss": 0.2431, "step": 24625 }, { "epoch": 0.5336597837627023, "grad_norm": 1.0554357767105103, "learning_rate": 8.9445163581529e-06, "loss": 0.2891, "step": 24630 }, { "epoch": 0.5337681190821831, "grad_norm": 1.7626055479049683, "learning_rate": 8.941131976026813e-06, "loss": 0.3249, "step": 24635 }, { "epoch": 0.533876454401664, "grad_norm": 1.8832064867019653, "learning_rate": 8.937747716554726e-06, "loss": 0.2356, "step": 24640 }, { "epoch": 0.5339847897211449, "grad_norm": 1.7955278158187866, "learning_rate": 8.93436358012865e-06, "loss": 0.3324, "step": 24645 }, { "epoch": 0.5340931250406258, "grad_norm": 1.176805853843689, "learning_rate": 8.930979567140594e-06, "loss": 0.2346, "step": 24650 }, { "epoch": 0.5342014603601066, "grad_norm": 1.6666604280471802, "learning_rate": 8.927595677982537e-06, "loss": 0.2632, "step": 24655 }, { "epoch": 0.5343097956795875, "grad_norm": 1.3819115161895752, "learning_rate": 8.92421191304646e-06, "loss": 0.2212, "step": 24660 }, { "epoch": 0.5344181309990683, "grad_norm": 1.3693689107894897, "learning_rate": 8.920828272724317e-06, "loss": 0.228, "step": 24665 }, { "epoch": 0.5345264663185492, "grad_norm": 1.8960258960723877, "learning_rate": 8.917444757408051e-06, "loss": 0.3126, "step": 24670 }, { "epoch": 0.53463480163803, "grad_norm": 2.023186206817627, "learning_rate": 8.914061367489594e-06, "loss": 0.2172, "step": 24675 }, { "epoch": 0.5347431369575109, "grad_norm": 1.874796748161316, "learning_rate": 8.910678103360861e-06, "loss": 0.3059, "step": 24680 }, { "epoch": 0.5348514722769917, "grad_norm": 1.8138856887817383, "learning_rate": 8.90729496541375e-06, "loss": 0.2193, "step": 24685 }, { "epoch": 0.5349598075964725, "grad_norm": 1.8596388101577759, "learning_rate": 8.903911954040152e-06, "loss": 0.2485, "step": 24690 }, { "epoch": 0.5350681429159535, "grad_norm": 1.5194827318191528, "learning_rate": 8.900529069631929e-06, "loss": 0.2076, "step": 24695 }, { "epoch": 0.5351764782354343, "grad_norm": 1.3941007852554321, "learning_rate": 8.897146312580947e-06, "loss": 0.2777, "step": 24700 }, { "epoch": 0.5352848135549152, "grad_norm": 1.0728601217269897, "learning_rate": 8.893763683279042e-06, "loss": 0.294, "step": 24705 }, { "epoch": 0.535393148874396, "grad_norm": 0.9014778137207031, "learning_rate": 8.890381182118045e-06, "loss": 0.3099, "step": 24710 }, { "epoch": 0.5355014841938769, "grad_norm": 1.536391258239746, "learning_rate": 8.886998809489766e-06, "loss": 0.2215, "step": 24715 }, { "epoch": 0.5356098195133577, "grad_norm": 1.3562134504318237, "learning_rate": 8.883616565786002e-06, "loss": 0.1817, "step": 24720 }, { "epoch": 0.5357181548328386, "grad_norm": 2.096883773803711, "learning_rate": 8.880234451398536e-06, "loss": 0.3724, "step": 24725 }, { "epoch": 0.5358264901523194, "grad_norm": 1.1784526109695435, "learning_rate": 8.876852466719135e-06, "loss": 0.2388, "step": 24730 }, { "epoch": 0.5359348254718003, "grad_norm": 1.748392105102539, "learning_rate": 8.873470612139549e-06, "loss": 0.2494, "step": 24735 }, { "epoch": 0.5360431607912812, "grad_norm": 1.8653132915496826, "learning_rate": 8.870088888051513e-06, "loss": 0.2954, "step": 24740 }, { "epoch": 0.5361514961107621, "grad_norm": 1.0039788484573364, "learning_rate": 8.86670729484676e-06, "loss": 0.2118, "step": 24745 }, { "epoch": 0.5362598314302429, "grad_norm": 1.4856717586517334, "learning_rate": 8.863325832916988e-06, "loss": 0.3253, "step": 24750 }, { "epoch": 0.5363681667497238, "grad_norm": 1.6432480812072754, "learning_rate": 8.85994450265389e-06, "loss": 0.3007, "step": 24755 }, { "epoch": 0.5364765020692046, "grad_norm": 1.1362404823303223, "learning_rate": 8.856563304449147e-06, "loss": 0.1951, "step": 24760 }, { "epoch": 0.5365848373886855, "grad_norm": 1.8031740188598633, "learning_rate": 8.853182238694414e-06, "loss": 0.2622, "step": 24765 }, { "epoch": 0.5366931727081663, "grad_norm": 1.7311757802963257, "learning_rate": 8.849801305781339e-06, "loss": 0.2409, "step": 24770 }, { "epoch": 0.5368015080276471, "grad_norm": 1.7140182256698608, "learning_rate": 8.846420506101553e-06, "loss": 0.2916, "step": 24775 }, { "epoch": 0.536909843347128, "grad_norm": 1.8208998441696167, "learning_rate": 8.84303984004667e-06, "loss": 0.1983, "step": 24780 }, { "epoch": 0.5370181786666088, "grad_norm": 1.1382206678390503, "learning_rate": 8.839659308008292e-06, "loss": 0.2767, "step": 24785 }, { "epoch": 0.5371265139860898, "grad_norm": 1.7717760801315308, "learning_rate": 8.836278910377995e-06, "loss": 0.218, "step": 24790 }, { "epoch": 0.5372348493055706, "grad_norm": 1.3220824003219604, "learning_rate": 8.83289864754736e-06, "loss": 0.2827, "step": 24795 }, { "epoch": 0.5373431846250515, "grad_norm": 1.553884744644165, "learning_rate": 8.829518519907935e-06, "loss": 0.2707, "step": 24800 }, { "epoch": 0.5374515199445323, "grad_norm": 1.9033563137054443, "learning_rate": 8.826138527851252e-06, "loss": 0.2714, "step": 24805 }, { "epoch": 0.5375598552640132, "grad_norm": 1.209517002105713, "learning_rate": 8.822758671768837e-06, "loss": 0.2324, "step": 24810 }, { "epoch": 0.537668190583494, "grad_norm": 1.4313626289367676, "learning_rate": 8.819378952052196e-06, "loss": 0.3066, "step": 24815 }, { "epoch": 0.5377765259029749, "grad_norm": 1.4471501111984253, "learning_rate": 8.815999369092817e-06, "loss": 0.2564, "step": 24820 }, { "epoch": 0.5378848612224557, "grad_norm": 2.0079143047332764, "learning_rate": 8.812619923282173e-06, "loss": 0.2796, "step": 24825 }, { "epoch": 0.5379931965419366, "grad_norm": 3.0296483039855957, "learning_rate": 8.809240615011727e-06, "loss": 0.3023, "step": 24830 }, { "epoch": 0.5381015318614175, "grad_norm": 1.3472635746002197, "learning_rate": 8.805861444672914e-06, "loss": 0.2843, "step": 24835 }, { "epoch": 0.5382098671808984, "grad_norm": 1.7034779787063599, "learning_rate": 8.802482412657167e-06, "loss": 0.3238, "step": 24840 }, { "epoch": 0.5383182025003792, "grad_norm": 1.7725999355316162, "learning_rate": 8.799103519355894e-06, "loss": 0.3524, "step": 24845 }, { "epoch": 0.53842653781986, "grad_norm": 1.104416012763977, "learning_rate": 8.795724765160488e-06, "loss": 0.226, "step": 24850 }, { "epoch": 0.5385348731393409, "grad_norm": 1.540833830833435, "learning_rate": 8.79234615046233e-06, "loss": 0.21, "step": 24855 }, { "epoch": 0.5386432084588217, "grad_norm": 1.2990292310714722, "learning_rate": 8.788967675652778e-06, "loss": 0.3491, "step": 24860 }, { "epoch": 0.5387515437783026, "grad_norm": 1.6030415296554565, "learning_rate": 8.78558934112318e-06, "loss": 0.3121, "step": 24865 }, { "epoch": 0.5388598790977834, "grad_norm": 1.092979907989502, "learning_rate": 8.782211147264864e-06, "loss": 0.2387, "step": 24870 }, { "epoch": 0.5389682144172643, "grad_norm": 1.0337042808532715, "learning_rate": 8.778833094469144e-06, "loss": 0.2454, "step": 24875 }, { "epoch": 0.5390765497367451, "grad_norm": 1.1949357986450195, "learning_rate": 8.77545518312732e-06, "loss": 0.1805, "step": 24880 }, { "epoch": 0.5391848850562261, "grad_norm": 1.4344136714935303, "learning_rate": 8.772077413630665e-06, "loss": 0.2791, "step": 24885 }, { "epoch": 0.5392932203757069, "grad_norm": 1.9362802505493164, "learning_rate": 8.768699786370448e-06, "loss": 0.2462, "step": 24890 }, { "epoch": 0.5394015556951878, "grad_norm": 1.0698487758636475, "learning_rate": 8.765322301737919e-06, "loss": 0.2448, "step": 24895 }, { "epoch": 0.5395098910146686, "grad_norm": 1.3763777017593384, "learning_rate": 8.761944960124304e-06, "loss": 0.1194, "step": 24900 }, { "epoch": 0.5396182263341495, "grad_norm": 1.0873218774795532, "learning_rate": 8.75856776192082e-06, "loss": 0.2099, "step": 24905 }, { "epoch": 0.5397265616536303, "grad_norm": 1.261795997619629, "learning_rate": 8.755190707518662e-06, "loss": 0.3928, "step": 24910 }, { "epoch": 0.5398348969731112, "grad_norm": 1.5135732889175415, "learning_rate": 8.751813797309015e-06, "loss": 0.2788, "step": 24915 }, { "epoch": 0.539943232292592, "grad_norm": 1.0305835008621216, "learning_rate": 8.748437031683042e-06, "loss": 0.2092, "step": 24920 }, { "epoch": 0.5400515676120728, "grad_norm": 2.5298402309417725, "learning_rate": 8.745060411031892e-06, "loss": 0.3246, "step": 24925 }, { "epoch": 0.5401599029315537, "grad_norm": 1.03712797164917, "learning_rate": 8.741683935746692e-06, "loss": 0.266, "step": 24930 }, { "epoch": 0.5402682382510346, "grad_norm": 1.9875320196151733, "learning_rate": 8.738307606218556e-06, "loss": 0.2528, "step": 24935 }, { "epoch": 0.5403765735705155, "grad_norm": 1.7744157314300537, "learning_rate": 8.734931422838588e-06, "loss": 0.3353, "step": 24940 }, { "epoch": 0.5404849088899963, "grad_norm": 1.681976079940796, "learning_rate": 8.731555385997862e-06, "loss": 0.2659, "step": 24945 }, { "epoch": 0.5405932442094772, "grad_norm": 1.0438896417617798, "learning_rate": 8.728179496087442e-06, "loss": 0.2112, "step": 24950 }, { "epoch": 0.540701579528958, "grad_norm": 1.746039867401123, "learning_rate": 8.72480375349838e-06, "loss": 0.3263, "step": 24955 }, { "epoch": 0.5408099148484389, "grad_norm": 1.507507562637329, "learning_rate": 8.721428158621698e-06, "loss": 0.2899, "step": 24960 }, { "epoch": 0.5409182501679197, "grad_norm": 2.3516619205474854, "learning_rate": 8.71805271184841e-06, "loss": 0.2012, "step": 24965 }, { "epoch": 0.5410265854874006, "grad_norm": 1.3885809183120728, "learning_rate": 8.714677413569515e-06, "loss": 0.1671, "step": 24970 }, { "epoch": 0.5411349208068814, "grad_norm": 1.5471704006195068, "learning_rate": 8.711302264175986e-06, "loss": 0.2812, "step": 24975 }, { "epoch": 0.5412432561263624, "grad_norm": 1.9046388864517212, "learning_rate": 8.70792726405878e-06, "loss": 0.2673, "step": 24980 }, { "epoch": 0.5413515914458432, "grad_norm": 1.3737964630126953, "learning_rate": 8.70455241360885e-06, "loss": 0.2683, "step": 24985 }, { "epoch": 0.5414599267653241, "grad_norm": 1.3155139684677124, "learning_rate": 8.701177713217116e-06, "loss": 0.2622, "step": 24990 }, { "epoch": 0.5415682620848049, "grad_norm": 1.221031665802002, "learning_rate": 8.697803163274487e-06, "loss": 0.2298, "step": 24995 }, { "epoch": 0.5416765974042858, "grad_norm": 1.824995756149292, "learning_rate": 8.694428764171857e-06, "loss": 0.2774, "step": 25000 }, { "epoch": 0.5417849327237666, "grad_norm": 2.0251872539520264, "learning_rate": 8.691054516300098e-06, "loss": 0.3169, "step": 25005 }, { "epoch": 0.5418932680432474, "grad_norm": 1.2853409051895142, "learning_rate": 8.687680420050063e-06, "loss": 0.2696, "step": 25010 }, { "epoch": 0.5420016033627283, "grad_norm": 2.3692495822906494, "learning_rate": 8.684306475812593e-06, "loss": 0.2777, "step": 25015 }, { "epoch": 0.5421099386822091, "grad_norm": 1.2875559329986572, "learning_rate": 8.68093268397851e-06, "loss": 0.2284, "step": 25020 }, { "epoch": 0.54221827400169, "grad_norm": 1.7451446056365967, "learning_rate": 8.677559044938614e-06, "loss": 0.2591, "step": 25025 }, { "epoch": 0.5423266093211709, "grad_norm": 1.4996845722198486, "learning_rate": 8.674185559083688e-06, "loss": 0.2496, "step": 25030 }, { "epoch": 0.5424349446406518, "grad_norm": 1.5610499382019043, "learning_rate": 8.67081222680451e-06, "loss": 0.2962, "step": 25035 }, { "epoch": 0.5425432799601326, "grad_norm": 1.6462416648864746, "learning_rate": 8.667439048491826e-06, "loss": 0.3413, "step": 25040 }, { "epoch": 0.5426516152796135, "grad_norm": 1.2726527452468872, "learning_rate": 8.664066024536363e-06, "loss": 0.243, "step": 25045 }, { "epoch": 0.5427599505990943, "grad_norm": 1.8128712177276611, "learning_rate": 8.660693155328842e-06, "loss": 0.1654, "step": 25050 }, { "epoch": 0.5428682859185752, "grad_norm": 1.57457435131073, "learning_rate": 8.657320441259953e-06, "loss": 0.1504, "step": 25055 }, { "epoch": 0.542976621238056, "grad_norm": 1.595259666442871, "learning_rate": 8.653947882720379e-06, "loss": 0.2375, "step": 25060 }, { "epoch": 0.5430849565575369, "grad_norm": 1.551345705986023, "learning_rate": 8.650575480100778e-06, "loss": 0.2869, "step": 25065 }, { "epoch": 0.5431932918770177, "grad_norm": 1.9761664867401123, "learning_rate": 8.647203233791794e-06, "loss": 0.2415, "step": 25070 }, { "epoch": 0.5433016271964985, "grad_norm": 0.7436693906784058, "learning_rate": 8.64383114418405e-06, "loss": 0.2147, "step": 25075 }, { "epoch": 0.5434099625159795, "grad_norm": 1.9754668474197388, "learning_rate": 8.64045921166815e-06, "loss": 0.2786, "step": 25080 }, { "epoch": 0.5435182978354604, "grad_norm": 1.9125397205352783, "learning_rate": 8.637087436634688e-06, "loss": 0.2267, "step": 25085 }, { "epoch": 0.5436266331549412, "grad_norm": 2.1232423782348633, "learning_rate": 8.63371581947423e-06, "loss": 0.198, "step": 25090 }, { "epoch": 0.543734968474422, "grad_norm": 1.1186211109161377, "learning_rate": 8.630344360577326e-06, "loss": 0.2133, "step": 25095 }, { "epoch": 0.5438433037939029, "grad_norm": 1.4252245426177979, "learning_rate": 8.626973060334511e-06, "loss": 0.2673, "step": 25100 }, { "epoch": 0.5439516391133837, "grad_norm": 1.433672547340393, "learning_rate": 8.6236019191363e-06, "loss": 0.3239, "step": 25105 }, { "epoch": 0.5440599744328646, "grad_norm": 2.092257022857666, "learning_rate": 8.620230937373187e-06, "loss": 0.1806, "step": 25110 }, { "epoch": 0.5441683097523454, "grad_norm": 1.7662508487701416, "learning_rate": 8.616860115435652e-06, "loss": 0.2668, "step": 25115 }, { "epoch": 0.5442766450718263, "grad_norm": 1.1699764728546143, "learning_rate": 8.613489453714152e-06, "loss": 0.2931, "step": 25120 }, { "epoch": 0.5443849803913072, "grad_norm": 2.093003749847412, "learning_rate": 8.610118952599128e-06, "loss": 0.2385, "step": 25125 }, { "epoch": 0.5444933157107881, "grad_norm": 1.9943170547485352, "learning_rate": 8.606748612481003e-06, "loss": 0.2745, "step": 25130 }, { "epoch": 0.5446016510302689, "grad_norm": 1.264721155166626, "learning_rate": 8.603378433750181e-06, "loss": 0.2479, "step": 25135 }, { "epoch": 0.5447099863497498, "grad_norm": 1.812900185585022, "learning_rate": 8.600008416797047e-06, "loss": 0.2881, "step": 25140 }, { "epoch": 0.5448183216692306, "grad_norm": 2.1581952571868896, "learning_rate": 8.596638562011965e-06, "loss": 0.3035, "step": 25145 }, { "epoch": 0.5449266569887115, "grad_norm": 1.7795313596725464, "learning_rate": 8.593268869785284e-06, "loss": 0.2322, "step": 25150 }, { "epoch": 0.5450349923081923, "grad_norm": 1.7734918594360352, "learning_rate": 8.589899340507332e-06, "loss": 0.3715, "step": 25155 }, { "epoch": 0.5451433276276731, "grad_norm": 1.4398998022079468, "learning_rate": 8.586529974568419e-06, "loss": 0.2408, "step": 25160 }, { "epoch": 0.545251662947154, "grad_norm": 1.8959159851074219, "learning_rate": 8.583160772358831e-06, "loss": 0.3158, "step": 25165 }, { "epoch": 0.5453599982666348, "grad_norm": 1.4910026788711548, "learning_rate": 8.579791734268846e-06, "loss": 0.2512, "step": 25170 }, { "epoch": 0.5454683335861158, "grad_norm": 1.6660130023956299, "learning_rate": 8.576422860688712e-06, "loss": 0.2755, "step": 25175 }, { "epoch": 0.5455766689055966, "grad_norm": 1.4941486120224, "learning_rate": 8.573054152008667e-06, "loss": 0.2887, "step": 25180 }, { "epoch": 0.5456850042250775, "grad_norm": 1.3528988361358643, "learning_rate": 8.56968560861892e-06, "loss": 0.2567, "step": 25185 }, { "epoch": 0.5457933395445583, "grad_norm": 1.6368674039840698, "learning_rate": 8.566317230909672e-06, "loss": 0.2508, "step": 25190 }, { "epoch": 0.5459016748640392, "grad_norm": 1.7771841287612915, "learning_rate": 8.562949019271094e-06, "loss": 0.3086, "step": 25195 }, { "epoch": 0.54601001018352, "grad_norm": 1.2711691856384277, "learning_rate": 8.559580974093346e-06, "loss": 0.3429, "step": 25200 }, { "epoch": 0.5461183455030009, "grad_norm": 1.4893064498901367, "learning_rate": 8.556213095766565e-06, "loss": 0.2313, "step": 25205 }, { "epoch": 0.5462266808224817, "grad_norm": 0.9545601606369019, "learning_rate": 8.55284538468087e-06, "loss": 0.2677, "step": 25210 }, { "epoch": 0.5463350161419626, "grad_norm": 1.608098030090332, "learning_rate": 8.549477841226359e-06, "loss": 0.2561, "step": 25215 }, { "epoch": 0.5464433514614434, "grad_norm": 1.3666774034500122, "learning_rate": 8.54611046579311e-06, "loss": 0.2544, "step": 25220 }, { "epoch": 0.5465516867809244, "grad_norm": 1.2597076892852783, "learning_rate": 8.542743258771179e-06, "loss": 0.2796, "step": 25225 }, { "epoch": 0.5466600221004052, "grad_norm": 1.30660080909729, "learning_rate": 8.539376220550618e-06, "loss": 0.2683, "step": 25230 }, { "epoch": 0.546768357419886, "grad_norm": 1.3641074895858765, "learning_rate": 8.53600935152144e-06, "loss": 0.303, "step": 25235 }, { "epoch": 0.5468766927393669, "grad_norm": 1.342679738998413, "learning_rate": 8.532642652073649e-06, "loss": 0.2223, "step": 25240 }, { "epoch": 0.5469850280588477, "grad_norm": 1.5279313325881958, "learning_rate": 8.529276122597227e-06, "loss": 0.3053, "step": 25245 }, { "epoch": 0.5470933633783286, "grad_norm": 1.5398473739624023, "learning_rate": 8.525909763482133e-06, "loss": 0.2479, "step": 25250 }, { "epoch": 0.5472016986978094, "grad_norm": 1.4229435920715332, "learning_rate": 8.522543575118311e-06, "loss": 0.1715, "step": 25255 }, { "epoch": 0.5473100340172903, "grad_norm": 2.0205023288726807, "learning_rate": 8.519177557895684e-06, "loss": 0.2543, "step": 25260 }, { "epoch": 0.5474183693367711, "grad_norm": 1.6078202724456787, "learning_rate": 8.515811712204154e-06, "loss": 0.2583, "step": 25265 }, { "epoch": 0.5475267046562521, "grad_norm": 1.943499207496643, "learning_rate": 8.512446038433599e-06, "loss": 0.3606, "step": 25270 }, { "epoch": 0.5476350399757329, "grad_norm": 1.4787180423736572, "learning_rate": 8.509080536973892e-06, "loss": 0.27, "step": 25275 }, { "epoch": 0.5477433752952138, "grad_norm": 1.484187364578247, "learning_rate": 8.505715208214871e-06, "loss": 0.2316, "step": 25280 }, { "epoch": 0.5478517106146946, "grad_norm": 2.3824915885925293, "learning_rate": 8.50235005254636e-06, "loss": 0.2669, "step": 25285 }, { "epoch": 0.5479600459341755, "grad_norm": 1.1957905292510986, "learning_rate": 8.498985070358158e-06, "loss": 0.3247, "step": 25290 }, { "epoch": 0.5480683812536563, "grad_norm": 1.4100111722946167, "learning_rate": 8.495620262040052e-06, "loss": 0.241, "step": 25295 }, { "epoch": 0.5481767165731372, "grad_norm": 1.3302170038223267, "learning_rate": 8.492255627981803e-06, "loss": 0.3232, "step": 25300 }, { "epoch": 0.548285051892618, "grad_norm": 1.7609087228775024, "learning_rate": 8.488891168573153e-06, "loss": 0.2141, "step": 25305 }, { "epoch": 0.5483933872120988, "grad_norm": 0.8529482483863831, "learning_rate": 8.485526884203824e-06, "loss": 0.1791, "step": 25310 }, { "epoch": 0.5485017225315797, "grad_norm": 1.348545789718628, "learning_rate": 8.48216277526352e-06, "loss": 0.23, "step": 25315 }, { "epoch": 0.5486100578510607, "grad_norm": 1.387515902519226, "learning_rate": 8.478798842141917e-06, "loss": 0.3057, "step": 25320 }, { "epoch": 0.5487183931705415, "grad_norm": 0.9510493278503418, "learning_rate": 8.475435085228685e-06, "loss": 0.2612, "step": 25325 }, { "epoch": 0.5488267284900223, "grad_norm": 1.9243065118789673, "learning_rate": 8.472071504913459e-06, "loss": 0.2943, "step": 25330 }, { "epoch": 0.5489350638095032, "grad_norm": 1.9307503700256348, "learning_rate": 8.468708101585862e-06, "loss": 0.2752, "step": 25335 }, { "epoch": 0.549043399128984, "grad_norm": 1.9397330284118652, "learning_rate": 8.465344875635492e-06, "loss": 0.2485, "step": 25340 }, { "epoch": 0.5491517344484649, "grad_norm": 1.197788953781128, "learning_rate": 8.461981827451928e-06, "loss": 0.2618, "step": 25345 }, { "epoch": 0.5492600697679457, "grad_norm": 1.4562722444534302, "learning_rate": 8.458618957424732e-06, "loss": 0.2132, "step": 25350 }, { "epoch": 0.5493684050874266, "grad_norm": 1.4283188581466675, "learning_rate": 8.455256265943437e-06, "loss": 0.2738, "step": 25355 }, { "epoch": 0.5494767404069074, "grad_norm": 0.9140832424163818, "learning_rate": 8.451893753397567e-06, "loss": 0.2219, "step": 25360 }, { "epoch": 0.5495850757263884, "grad_norm": 2.4898574352264404, "learning_rate": 8.44853142017661e-06, "loss": 0.2766, "step": 25365 }, { "epoch": 0.5496934110458692, "grad_norm": 2.0503766536712646, "learning_rate": 8.44516926667005e-06, "loss": 0.2025, "step": 25370 }, { "epoch": 0.5498017463653501, "grad_norm": 1.5730443000793457, "learning_rate": 8.441807293267338e-06, "loss": 0.2745, "step": 25375 }, { "epoch": 0.5499100816848309, "grad_norm": 2.0864832401275635, "learning_rate": 8.438445500357912e-06, "loss": 0.2915, "step": 25380 }, { "epoch": 0.5500184170043118, "grad_norm": 1.9236207008361816, "learning_rate": 8.43508388833118e-06, "loss": 0.2893, "step": 25385 }, { "epoch": 0.5501267523237926, "grad_norm": 1.8781377077102661, "learning_rate": 8.431722457576539e-06, "loss": 0.2197, "step": 25390 }, { "epoch": 0.5502350876432734, "grad_norm": 1.5384671688079834, "learning_rate": 8.428361208483357e-06, "loss": 0.2088, "step": 25395 }, { "epoch": 0.5503434229627543, "grad_norm": 1.332349419593811, "learning_rate": 8.425000141440987e-06, "loss": 0.2374, "step": 25400 }, { "epoch": 0.5504517582822351, "grad_norm": 1.0911208391189575, "learning_rate": 8.421639256838756e-06, "loss": 0.2284, "step": 25405 }, { "epoch": 0.550560093601716, "grad_norm": 0.9856230020523071, "learning_rate": 8.418278555065974e-06, "loss": 0.2056, "step": 25410 }, { "epoch": 0.5506684289211969, "grad_norm": 0.8691927194595337, "learning_rate": 8.414918036511925e-06, "loss": 0.206, "step": 25415 }, { "epoch": 0.5507767642406778, "grad_norm": 2.074155569076538, "learning_rate": 8.41155770156588e-06, "loss": 0.2799, "step": 25420 }, { "epoch": 0.5508850995601586, "grad_norm": 1.74275541305542, "learning_rate": 8.408197550617078e-06, "loss": 0.2898, "step": 25425 }, { "epoch": 0.5509934348796395, "grad_norm": 1.4507285356521606, "learning_rate": 8.404837584054747e-06, "loss": 0.3076, "step": 25430 }, { "epoch": 0.5511017701991203, "grad_norm": 1.777169108390808, "learning_rate": 8.401477802268086e-06, "loss": 0.2029, "step": 25435 }, { "epoch": 0.5512101055186012, "grad_norm": 1.8083150386810303, "learning_rate": 8.398118205646275e-06, "loss": 0.2512, "step": 25440 }, { "epoch": 0.551318440838082, "grad_norm": 1.1925359964370728, "learning_rate": 8.394758794578473e-06, "loss": 0.2958, "step": 25445 }, { "epoch": 0.5514267761575629, "grad_norm": 0.8809664249420166, "learning_rate": 8.39139956945382e-06, "loss": 0.2771, "step": 25450 }, { "epoch": 0.5515351114770437, "grad_norm": 1.377335548400879, "learning_rate": 8.38804053066143e-06, "loss": 0.3091, "step": 25455 }, { "epoch": 0.5516434467965246, "grad_norm": 1.2364988327026367, "learning_rate": 8.384681678590397e-06, "loss": 0.2563, "step": 25460 }, { "epoch": 0.5517517821160055, "grad_norm": 1.2110040187835693, "learning_rate": 8.38132301362979e-06, "loss": 0.2246, "step": 25465 }, { "epoch": 0.5518601174354864, "grad_norm": 1.1903765201568604, "learning_rate": 8.377964536168667e-06, "loss": 0.2267, "step": 25470 }, { "epoch": 0.5519684527549672, "grad_norm": 1.9080137014389038, "learning_rate": 8.374606246596054e-06, "loss": 0.2905, "step": 25475 }, { "epoch": 0.552076788074448, "grad_norm": 1.3086023330688477, "learning_rate": 8.37124814530096e-06, "loss": 0.2155, "step": 25480 }, { "epoch": 0.5521851233939289, "grad_norm": 1.6242316961288452, "learning_rate": 8.36789023267237e-06, "loss": 0.3238, "step": 25485 }, { "epoch": 0.5522934587134097, "grad_norm": 1.7897666692733765, "learning_rate": 8.36453250909925e-06, "loss": 0.2738, "step": 25490 }, { "epoch": 0.5524017940328906, "grad_norm": 1.465077519416809, "learning_rate": 8.361174974970536e-06, "loss": 0.2416, "step": 25495 }, { "epoch": 0.5525101293523714, "grad_norm": 1.31321120262146, "learning_rate": 8.357817630675152e-06, "loss": 0.2025, "step": 25500 }, { "epoch": 0.5526184646718523, "grad_norm": 1.4850502014160156, "learning_rate": 8.354460476601995e-06, "loss": 0.3202, "step": 25505 }, { "epoch": 0.5527267999913332, "grad_norm": 1.4395543336868286, "learning_rate": 8.351103513139939e-06, "loss": 0.3519, "step": 25510 }, { "epoch": 0.5528351353108141, "grad_norm": 0.7432330846786499, "learning_rate": 8.347746740677843e-06, "loss": 0.1904, "step": 25515 }, { "epoch": 0.5529434706302949, "grad_norm": 1.6464918851852417, "learning_rate": 8.344390159604538e-06, "loss": 0.2516, "step": 25520 }, { "epoch": 0.5530518059497758, "grad_norm": 1.7832523584365845, "learning_rate": 8.341033770308832e-06, "loss": 0.3166, "step": 25525 }, { "epoch": 0.5531601412692566, "grad_norm": 1.6268925666809082, "learning_rate": 8.337677573179513e-06, "loss": 0.2987, "step": 25530 }, { "epoch": 0.5532684765887375, "grad_norm": 0.901579737663269, "learning_rate": 8.334321568605343e-06, "loss": 0.2643, "step": 25535 }, { "epoch": 0.5533768119082183, "grad_norm": 1.65430748462677, "learning_rate": 8.330965756975069e-06, "loss": 0.2551, "step": 25540 }, { "epoch": 0.5534851472276991, "grad_norm": 1.5813113451004028, "learning_rate": 8.32761013867741e-06, "loss": 0.2379, "step": 25545 }, { "epoch": 0.55359348254718, "grad_norm": 1.5298316478729248, "learning_rate": 8.324254714101064e-06, "loss": 0.2227, "step": 25550 }, { "epoch": 0.5537018178666608, "grad_norm": 1.4525083303451538, "learning_rate": 8.320899483634706e-06, "loss": 0.3066, "step": 25555 }, { "epoch": 0.5538101531861418, "grad_norm": 1.0309737920761108, "learning_rate": 8.317544447666987e-06, "loss": 0.3475, "step": 25560 }, { "epoch": 0.5539184885056226, "grad_norm": 1.2222836017608643, "learning_rate": 8.314189606586544e-06, "loss": 0.2667, "step": 25565 }, { "epoch": 0.5540268238251035, "grad_norm": 2.111126661300659, "learning_rate": 8.310834960781982e-06, "loss": 0.2479, "step": 25570 }, { "epoch": 0.5541351591445843, "grad_norm": 1.5388468503952026, "learning_rate": 8.307480510641886e-06, "loss": 0.2876, "step": 25575 }, { "epoch": 0.5542434944640652, "grad_norm": 1.384040355682373, "learning_rate": 8.304126256554818e-06, "loss": 0.1907, "step": 25580 }, { "epoch": 0.554351829783546, "grad_norm": 1.0557388067245483, "learning_rate": 8.30077219890932e-06, "loss": 0.2717, "step": 25585 }, { "epoch": 0.5544601651030269, "grad_norm": 1.4722282886505127, "learning_rate": 8.297418338093906e-06, "loss": 0.2881, "step": 25590 }, { "epoch": 0.5545685004225077, "grad_norm": 1.3387850522994995, "learning_rate": 8.294064674497075e-06, "loss": 0.2033, "step": 25595 }, { "epoch": 0.5546768357419886, "grad_norm": 1.5720410346984863, "learning_rate": 8.290711208507296e-06, "loss": 0.275, "step": 25600 }, { "epoch": 0.5547851710614694, "grad_norm": 1.7679364681243896, "learning_rate": 8.287357940513018e-06, "loss": 0.2878, "step": 25605 }, { "epoch": 0.5548935063809504, "grad_norm": 1.1964360475540161, "learning_rate": 8.284004870902666e-06, "loss": 0.2746, "step": 25610 }, { "epoch": 0.5550018417004312, "grad_norm": 1.3477346897125244, "learning_rate": 8.280652000064646e-06, "loss": 0.3059, "step": 25615 }, { "epoch": 0.555110177019912, "grad_norm": 1.9604567289352417, "learning_rate": 8.277299328387337e-06, "loss": 0.2115, "step": 25620 }, { "epoch": 0.5552185123393929, "grad_norm": 1.347679615020752, "learning_rate": 8.273946856259092e-06, "loss": 0.2392, "step": 25625 }, { "epoch": 0.5553268476588737, "grad_norm": 1.817610263824463, "learning_rate": 8.270594584068249e-06, "loss": 0.217, "step": 25630 }, { "epoch": 0.5554351829783546, "grad_norm": 1.0105338096618652, "learning_rate": 8.267242512203118e-06, "loss": 0.1615, "step": 25635 }, { "epoch": 0.5555435182978354, "grad_norm": 1.6189630031585693, "learning_rate": 8.263890641051983e-06, "loss": 0.153, "step": 25640 }, { "epoch": 0.5556518536173163, "grad_norm": 2.1618804931640625, "learning_rate": 8.260538971003111e-06, "loss": 0.2995, "step": 25645 }, { "epoch": 0.5557601889367971, "grad_norm": 2.036888599395752, "learning_rate": 8.257187502444744e-06, "loss": 0.2791, "step": 25650 }, { "epoch": 0.5558685242562781, "grad_norm": 1.5025192499160767, "learning_rate": 8.253836235765093e-06, "loss": 0.2578, "step": 25655 }, { "epoch": 0.5559768595757589, "grad_norm": 1.2924247980117798, "learning_rate": 8.250485171352359e-06, "loss": 0.2032, "step": 25660 }, { "epoch": 0.5560851948952398, "grad_norm": 1.2295681238174438, "learning_rate": 8.24713430959471e-06, "loss": 0.2334, "step": 25665 }, { "epoch": 0.5561935302147206, "grad_norm": 1.6353214979171753, "learning_rate": 8.243783650880295e-06, "loss": 0.2941, "step": 25670 }, { "epoch": 0.5563018655342015, "grad_norm": 1.9260261058807373, "learning_rate": 8.240433195597235e-06, "loss": 0.2365, "step": 25675 }, { "epoch": 0.5564102008536823, "grad_norm": 1.556776762008667, "learning_rate": 8.237082944133632e-06, "loss": 0.2166, "step": 25680 }, { "epoch": 0.5565185361731632, "grad_norm": 1.8468170166015625, "learning_rate": 8.23373289687756e-06, "loss": 0.2168, "step": 25685 }, { "epoch": 0.556626871492644, "grad_norm": 1.5006201267242432, "learning_rate": 8.230383054217073e-06, "loss": 0.3094, "step": 25690 }, { "epoch": 0.5567352068121248, "grad_norm": 1.6249264478683472, "learning_rate": 8.227033416540203e-06, "loss": 0.2074, "step": 25695 }, { "epoch": 0.5568435421316057, "grad_norm": 1.5837104320526123, "learning_rate": 8.22368398423495e-06, "loss": 0.2319, "step": 25700 }, { "epoch": 0.5569518774510867, "grad_norm": 1.412653923034668, "learning_rate": 8.220334757689294e-06, "loss": 0.2392, "step": 25705 }, { "epoch": 0.5570602127705675, "grad_norm": 1.6548022031784058, "learning_rate": 8.216985737291203e-06, "loss": 0.2801, "step": 25710 }, { "epoch": 0.5571685480900483, "grad_norm": 1.2385753393173218, "learning_rate": 8.213636923428603e-06, "loss": 0.2989, "step": 25715 }, { "epoch": 0.5572768834095292, "grad_norm": 1.346015214920044, "learning_rate": 8.210288316489406e-06, "loss": 0.2321, "step": 25720 }, { "epoch": 0.55738521872901, "grad_norm": 1.6792323589324951, "learning_rate": 8.206939916861499e-06, "loss": 0.2166, "step": 25725 }, { "epoch": 0.5574935540484909, "grad_norm": 0.7062650322914124, "learning_rate": 8.203591724932742e-06, "loss": 0.2533, "step": 25730 }, { "epoch": 0.5576018893679717, "grad_norm": 1.46735417842865, "learning_rate": 8.200243741090975e-06, "loss": 0.2442, "step": 25735 }, { "epoch": 0.5577102246874526, "grad_norm": 1.1448873281478882, "learning_rate": 8.196895965724007e-06, "loss": 0.2663, "step": 25740 }, { "epoch": 0.5578185600069334, "grad_norm": 1.6382787227630615, "learning_rate": 8.193548399219632e-06, "loss": 0.3214, "step": 25745 }, { "epoch": 0.5579268953264143, "grad_norm": 1.1919103860855103, "learning_rate": 8.190201041965615e-06, "loss": 0.2421, "step": 25750 }, { "epoch": 0.5580352306458952, "grad_norm": 1.701786994934082, "learning_rate": 8.186853894349691e-06, "loss": 0.2599, "step": 25755 }, { "epoch": 0.5581435659653761, "grad_norm": 1.3879563808441162, "learning_rate": 8.183506956759588e-06, "loss": 0.272, "step": 25760 }, { "epoch": 0.5582519012848569, "grad_norm": 0.8294285535812378, "learning_rate": 8.180160229582992e-06, "loss": 0.2739, "step": 25765 }, { "epoch": 0.5583602366043378, "grad_norm": 1.2572767734527588, "learning_rate": 8.176813713207574e-06, "loss": 0.2356, "step": 25770 }, { "epoch": 0.5584685719238186, "grad_norm": 1.7796069383621216, "learning_rate": 8.173467408020973e-06, "loss": 0.3503, "step": 25775 }, { "epoch": 0.5585769072432994, "grad_norm": 1.749648094177246, "learning_rate": 8.170121314410814e-06, "loss": 0.2667, "step": 25780 }, { "epoch": 0.5586852425627803, "grad_norm": 1.7393453121185303, "learning_rate": 8.166775432764687e-06, "loss": 0.2548, "step": 25785 }, { "epoch": 0.5587935778822611, "grad_norm": 1.1458454132080078, "learning_rate": 8.163429763470167e-06, "loss": 0.2403, "step": 25790 }, { "epoch": 0.558901913201742, "grad_norm": 1.944170355796814, "learning_rate": 8.160084306914795e-06, "loss": 0.2939, "step": 25795 }, { "epoch": 0.5590102485212229, "grad_norm": 3.182199001312256, "learning_rate": 8.15673906348609e-06, "loss": 0.2646, "step": 25800 }, { "epoch": 0.5591185838407038, "grad_norm": 1.81195068359375, "learning_rate": 8.153394033571559e-06, "loss": 0.2405, "step": 25805 }, { "epoch": 0.5592269191601846, "grad_norm": 2.104454755783081, "learning_rate": 8.150049217558665e-06, "loss": 0.159, "step": 25810 }, { "epoch": 0.5593352544796655, "grad_norm": 2.0903520584106445, "learning_rate": 8.146704615834857e-06, "loss": 0.2342, "step": 25815 }, { "epoch": 0.5594435897991463, "grad_norm": 1.123655080795288, "learning_rate": 8.143360228787558e-06, "loss": 0.2078, "step": 25820 }, { "epoch": 0.5595519251186272, "grad_norm": 0.7798278331756592, "learning_rate": 8.140016056804161e-06, "loss": 0.2934, "step": 25825 }, { "epoch": 0.559660260438108, "grad_norm": 1.7520532608032227, "learning_rate": 8.136672100272043e-06, "loss": 0.2514, "step": 25830 }, { "epoch": 0.5597685957575889, "grad_norm": 1.2836024761199951, "learning_rate": 8.13332835957855e-06, "loss": 0.2194, "step": 25835 }, { "epoch": 0.5598769310770697, "grad_norm": 1.1960197687149048, "learning_rate": 8.129984835111004e-06, "loss": 0.2702, "step": 25840 }, { "epoch": 0.5599852663965506, "grad_norm": 1.1503201723098755, "learning_rate": 8.126641527256702e-06, "loss": 0.2647, "step": 25845 }, { "epoch": 0.5600936017160315, "grad_norm": 1.9032673835754395, "learning_rate": 8.123298436402913e-06, "loss": 0.2719, "step": 25850 }, { "epoch": 0.5602019370355124, "grad_norm": 1.9192736148834229, "learning_rate": 8.11995556293689e-06, "loss": 0.3616, "step": 25855 }, { "epoch": 0.5603102723549932, "grad_norm": 1.4163622856140137, "learning_rate": 8.11661290724585e-06, "loss": 0.211, "step": 25860 }, { "epoch": 0.560418607674474, "grad_norm": 1.753311038017273, "learning_rate": 8.113270469716993e-06, "loss": 0.2592, "step": 25865 }, { "epoch": 0.5605269429939549, "grad_norm": 1.6047935485839844, "learning_rate": 8.10992825073749e-06, "loss": 0.193, "step": 25870 }, { "epoch": 0.5606352783134357, "grad_norm": 1.3777648210525513, "learning_rate": 8.106586250694486e-06, "loss": 0.223, "step": 25875 }, { "epoch": 0.5607436136329166, "grad_norm": 1.067984700202942, "learning_rate": 8.1032444699751e-06, "loss": 0.2352, "step": 25880 }, { "epoch": 0.5608519489523974, "grad_norm": 1.0678832530975342, "learning_rate": 8.09990290896643e-06, "loss": 0.1868, "step": 25885 }, { "epoch": 0.5609602842718783, "grad_norm": 1.2707265615463257, "learning_rate": 8.096561568055543e-06, "loss": 0.2151, "step": 25890 }, { "epoch": 0.5610686195913592, "grad_norm": 1.2328486442565918, "learning_rate": 8.093220447629484e-06, "loss": 0.187, "step": 25895 }, { "epoch": 0.5611769549108401, "grad_norm": 1.7169932126998901, "learning_rate": 8.089879548075275e-06, "loss": 0.3082, "step": 25900 }, { "epoch": 0.5612852902303209, "grad_norm": 1.1917260885238647, "learning_rate": 8.086538869779905e-06, "loss": 0.2331, "step": 25905 }, { "epoch": 0.5613936255498018, "grad_norm": 1.4336594343185425, "learning_rate": 8.083198413130344e-06, "loss": 0.2988, "step": 25910 }, { "epoch": 0.5615019608692826, "grad_norm": 1.7483268976211548, "learning_rate": 8.079858178513534e-06, "loss": 0.2848, "step": 25915 }, { "epoch": 0.5616102961887635, "grad_norm": 1.354485034942627, "learning_rate": 8.07651816631639e-06, "loss": 0.3125, "step": 25920 }, { "epoch": 0.5617186315082443, "grad_norm": 1.1100966930389404, "learning_rate": 8.073178376925801e-06, "loss": 0.2184, "step": 25925 }, { "epoch": 0.5618269668277251, "grad_norm": 1.6753175258636475, "learning_rate": 8.069838810728637e-06, "loss": 0.2372, "step": 25930 }, { "epoch": 0.561935302147206, "grad_norm": 1.7221026420593262, "learning_rate": 8.066499468111729e-06, "loss": 0.2756, "step": 25935 }, { "epoch": 0.5620436374666868, "grad_norm": 1.4660793542861938, "learning_rate": 8.063160349461897e-06, "loss": 0.1957, "step": 25940 }, { "epoch": 0.5621519727861678, "grad_norm": 1.2457078695297241, "learning_rate": 8.05982145516592e-06, "loss": 0.2378, "step": 25945 }, { "epoch": 0.5622603081056486, "grad_norm": 1.2798501253128052, "learning_rate": 8.056482785610567e-06, "loss": 0.23, "step": 25950 }, { "epoch": 0.5623686434251295, "grad_norm": 1.133162498474121, "learning_rate": 8.053144341182568e-06, "loss": 0.2831, "step": 25955 }, { "epoch": 0.5624769787446103, "grad_norm": 1.3525094985961914, "learning_rate": 8.049806122268635e-06, "loss": 0.2132, "step": 25960 }, { "epoch": 0.5625853140640912, "grad_norm": 1.62522554397583, "learning_rate": 8.046468129255448e-06, "loss": 0.2965, "step": 25965 }, { "epoch": 0.562693649383572, "grad_norm": 1.4207810163497925, "learning_rate": 8.043130362529664e-06, "loss": 0.3234, "step": 25970 }, { "epoch": 0.5628019847030529, "grad_norm": 1.7858736515045166, "learning_rate": 8.039792822477916e-06, "loss": 0.2878, "step": 25975 }, { "epoch": 0.5629103200225337, "grad_norm": 1.9125570058822632, "learning_rate": 8.036455509486805e-06, "loss": 0.197, "step": 25980 }, { "epoch": 0.5630186553420146, "grad_norm": 1.9667346477508545, "learning_rate": 8.033118423942908e-06, "loss": 0.2591, "step": 25985 }, { "epoch": 0.5631269906614954, "grad_norm": 1.777750849723816, "learning_rate": 8.029781566232777e-06, "loss": 0.303, "step": 25990 }, { "epoch": 0.5632353259809764, "grad_norm": 1.6196098327636719, "learning_rate": 8.026444936742934e-06, "loss": 0.1909, "step": 25995 }, { "epoch": 0.5633436613004572, "grad_norm": 1.8601701259613037, "learning_rate": 8.023108535859885e-06, "loss": 0.3227, "step": 26000 }, { "epoch": 0.5634519966199381, "grad_norm": 2.181901454925537, "learning_rate": 8.019772363970099e-06, "loss": 0.2344, "step": 26005 }, { "epoch": 0.5635603319394189, "grad_norm": 1.3570170402526855, "learning_rate": 8.016436421460023e-06, "loss": 0.2385, "step": 26010 }, { "epoch": 0.5636686672588997, "grad_norm": 1.1529747247695923, "learning_rate": 8.01310070871607e-06, "loss": 0.2424, "step": 26015 }, { "epoch": 0.5637770025783806, "grad_norm": 1.4675348997116089, "learning_rate": 8.009765226124639e-06, "loss": 0.2523, "step": 26020 }, { "epoch": 0.5638853378978614, "grad_norm": 1.729817271232605, "learning_rate": 8.006429974072092e-06, "loss": 0.2569, "step": 26025 }, { "epoch": 0.5639936732173423, "grad_norm": 0.9098167419433594, "learning_rate": 8.003094952944768e-06, "loss": 0.2234, "step": 26030 }, { "epoch": 0.5641020085368231, "grad_norm": 1.7764222621917725, "learning_rate": 7.99976016312898e-06, "loss": 0.2036, "step": 26035 }, { "epoch": 0.5642103438563041, "grad_norm": 1.5184770822525024, "learning_rate": 7.99642560501101e-06, "loss": 0.2743, "step": 26040 }, { "epoch": 0.5643186791757849, "grad_norm": 1.8756299018859863, "learning_rate": 7.993091278977125e-06, "loss": 0.3278, "step": 26045 }, { "epoch": 0.5644270144952658, "grad_norm": 1.2264065742492676, "learning_rate": 7.989757185413552e-06, "loss": 0.2086, "step": 26050 }, { "epoch": 0.5645353498147466, "grad_norm": 1.2695305347442627, "learning_rate": 7.986423324706494e-06, "loss": 0.2521, "step": 26055 }, { "epoch": 0.5646436851342275, "grad_norm": 2.166034698486328, "learning_rate": 7.98308969724213e-06, "loss": 0.3451, "step": 26060 }, { "epoch": 0.5647520204537083, "grad_norm": 2.1660077571868896, "learning_rate": 7.979756303406613e-06, "loss": 0.3004, "step": 26065 }, { "epoch": 0.5648603557731892, "grad_norm": 1.9378105401992798, "learning_rate": 7.976423143586064e-06, "loss": 0.2576, "step": 26070 }, { "epoch": 0.56496869109267, "grad_norm": 1.323341965675354, "learning_rate": 7.97309021816658e-06, "loss": 0.1991, "step": 26075 }, { "epoch": 0.5650770264121509, "grad_norm": 1.1303731203079224, "learning_rate": 7.969757527534232e-06, "loss": 0.2099, "step": 26080 }, { "epoch": 0.5651853617316317, "grad_norm": 1.5702650547027588, "learning_rate": 7.96642507207506e-06, "loss": 0.2936, "step": 26085 }, { "epoch": 0.5652936970511127, "grad_norm": 1.8883341550827026, "learning_rate": 7.96309285217508e-06, "loss": 0.2274, "step": 26090 }, { "epoch": 0.5654020323705935, "grad_norm": 2.2169857025146484, "learning_rate": 7.959760868220284e-06, "loss": 0.2446, "step": 26095 }, { "epoch": 0.5655103676900743, "grad_norm": 1.6909621953964233, "learning_rate": 7.956429120596626e-06, "loss": 0.3464, "step": 26100 }, { "epoch": 0.5656187030095552, "grad_norm": 1.1194008588790894, "learning_rate": 7.953097609690043e-06, "loss": 0.2175, "step": 26105 }, { "epoch": 0.565727038329036, "grad_norm": 0.9369567036628723, "learning_rate": 7.949766335886438e-06, "loss": 0.2828, "step": 26110 }, { "epoch": 0.5658353736485169, "grad_norm": 1.2733267545700073, "learning_rate": 7.94643529957169e-06, "loss": 0.1716, "step": 26115 }, { "epoch": 0.5659437089679977, "grad_norm": 1.034584403038025, "learning_rate": 7.943104501131652e-06, "loss": 0.3275, "step": 26120 }, { "epoch": 0.5660520442874786, "grad_norm": 2.1033363342285156, "learning_rate": 7.939773940952144e-06, "loss": 0.3675, "step": 26125 }, { "epoch": 0.5661603796069594, "grad_norm": 1.467263102531433, "learning_rate": 7.936443619418964e-06, "loss": 0.2377, "step": 26130 }, { "epoch": 0.5662687149264403, "grad_norm": 1.1756497621536255, "learning_rate": 7.933113536917877e-06, "loss": 0.2688, "step": 26135 }, { "epoch": 0.5663770502459212, "grad_norm": 1.6334277391433716, "learning_rate": 7.929783693834625e-06, "loss": 0.2068, "step": 26140 }, { "epoch": 0.5664853855654021, "grad_norm": 1.7510076761245728, "learning_rate": 7.92645409055492e-06, "loss": 0.341, "step": 26145 }, { "epoch": 0.5665937208848829, "grad_norm": 1.7432900667190552, "learning_rate": 7.923124727464448e-06, "loss": 0.3049, "step": 26150 }, { "epoch": 0.5667020562043638, "grad_norm": 1.105189561843872, "learning_rate": 7.919795604948864e-06, "loss": 0.2138, "step": 26155 }, { "epoch": 0.5668103915238446, "grad_norm": 1.7934850454330444, "learning_rate": 7.9164667233938e-06, "loss": 0.2171, "step": 26160 }, { "epoch": 0.5669187268433254, "grad_norm": 1.387416958808899, "learning_rate": 7.913138083184852e-06, "loss": 0.2306, "step": 26165 }, { "epoch": 0.5670270621628063, "grad_norm": 1.488661289215088, "learning_rate": 7.909809684707597e-06, "loss": 0.2486, "step": 26170 }, { "epoch": 0.5671353974822871, "grad_norm": 1.2461543083190918, "learning_rate": 7.906481528347578e-06, "loss": 0.2465, "step": 26175 }, { "epoch": 0.567243732801768, "grad_norm": 1.3959031105041504, "learning_rate": 7.903153614490317e-06, "loss": 0.3215, "step": 26180 }, { "epoch": 0.5673520681212489, "grad_norm": 1.5314428806304932, "learning_rate": 7.899825943521291e-06, "loss": 0.2827, "step": 26185 }, { "epoch": 0.5674604034407298, "grad_norm": 1.8983848094940186, "learning_rate": 7.896498515825974e-06, "loss": 0.2952, "step": 26190 }, { "epoch": 0.5675687387602106, "grad_norm": 1.3877969980239868, "learning_rate": 7.893171331789793e-06, "loss": 0.2693, "step": 26195 }, { "epoch": 0.5676770740796915, "grad_norm": 1.7293452024459839, "learning_rate": 7.889844391798153e-06, "loss": 0.2067, "step": 26200 }, { "epoch": 0.5677854093991723, "grad_norm": 1.0436060428619385, "learning_rate": 7.88651769623643e-06, "loss": 0.2317, "step": 26205 }, { "epoch": 0.5678937447186532, "grad_norm": 1.3568127155303955, "learning_rate": 7.883191245489973e-06, "loss": 0.1998, "step": 26210 }, { "epoch": 0.568002080038134, "grad_norm": 1.2936955690383911, "learning_rate": 7.879865039944102e-06, "loss": 0.175, "step": 26215 }, { "epoch": 0.5681104153576149, "grad_norm": 2.2780110836029053, "learning_rate": 7.876539079984103e-06, "loss": 0.2538, "step": 26220 }, { "epoch": 0.5682187506770957, "grad_norm": 1.5615999698638916, "learning_rate": 7.873213365995244e-06, "loss": 0.3441, "step": 26225 }, { "epoch": 0.5683270859965766, "grad_norm": 1.500685691833496, "learning_rate": 7.869887898362756e-06, "loss": 0.2702, "step": 26230 }, { "epoch": 0.5684354213160575, "grad_norm": 2.1439452171325684, "learning_rate": 7.866562677471842e-06, "loss": 0.1928, "step": 26235 }, { "epoch": 0.5685437566355384, "grad_norm": 0.7694470882415771, "learning_rate": 7.863237703707687e-06, "loss": 0.2581, "step": 26240 }, { "epoch": 0.5686520919550192, "grad_norm": 0.8834710717201233, "learning_rate": 7.859912977455437e-06, "loss": 0.2565, "step": 26245 }, { "epoch": 0.5687604272745, "grad_norm": 1.4266682863235474, "learning_rate": 7.85658849910021e-06, "loss": 0.3271, "step": 26250 }, { "epoch": 0.5688687625939809, "grad_norm": 1.2162888050079346, "learning_rate": 7.853264269027096e-06, "loss": 0.2278, "step": 26255 }, { "epoch": 0.5689770979134617, "grad_norm": 1.5083770751953125, "learning_rate": 7.849940287621159e-06, "loss": 0.1694, "step": 26260 }, { "epoch": 0.5690854332329426, "grad_norm": 1.5139371156692505, "learning_rate": 7.84661655526743e-06, "loss": 0.2021, "step": 26265 }, { "epoch": 0.5691937685524234, "grad_norm": 1.8490877151489258, "learning_rate": 7.843293072350916e-06, "loss": 0.2282, "step": 26270 }, { "epoch": 0.5693021038719043, "grad_norm": 0.9935671091079712, "learning_rate": 7.839969839256593e-06, "loss": 0.2026, "step": 26275 }, { "epoch": 0.5694104391913851, "grad_norm": 1.3781524896621704, "learning_rate": 7.836646856369405e-06, "loss": 0.2634, "step": 26280 }, { "epoch": 0.5695187745108661, "grad_norm": 1.9288424253463745, "learning_rate": 7.833324124074268e-06, "loss": 0.288, "step": 26285 }, { "epoch": 0.5696271098303469, "grad_norm": 1.5418096780776978, "learning_rate": 7.830001642756082e-06, "loss": 0.1808, "step": 26290 }, { "epoch": 0.5697354451498278, "grad_norm": 1.2513000965118408, "learning_rate": 7.826679412799698e-06, "loss": 0.2434, "step": 26295 }, { "epoch": 0.5698437804693086, "grad_norm": 1.6386293172836304, "learning_rate": 7.823357434589945e-06, "loss": 0.2037, "step": 26300 }, { "epoch": 0.5699521157887895, "grad_norm": 1.28794527053833, "learning_rate": 7.820035708511629e-06, "loss": 0.3034, "step": 26305 }, { "epoch": 0.5700604511082703, "grad_norm": 1.4516595602035522, "learning_rate": 7.81671423494952e-06, "loss": 0.213, "step": 26310 }, { "epoch": 0.5701687864277512, "grad_norm": 0.8470987677574158, "learning_rate": 7.81339301428836e-06, "loss": 0.2954, "step": 26315 }, { "epoch": 0.570277121747232, "grad_norm": 1.2146999835968018, "learning_rate": 7.810072046912864e-06, "loss": 0.2257, "step": 26320 }, { "epoch": 0.5703854570667128, "grad_norm": 0.6023609042167664, "learning_rate": 7.806751333207719e-06, "loss": 0.2207, "step": 26325 }, { "epoch": 0.5704937923861938, "grad_norm": 1.390121340751648, "learning_rate": 7.803430873557571e-06, "loss": 0.2895, "step": 26330 }, { "epoch": 0.5706021277056746, "grad_norm": 1.8921988010406494, "learning_rate": 7.800110668347057e-06, "loss": 0.3117, "step": 26335 }, { "epoch": 0.5707104630251555, "grad_norm": 2.077299118041992, "learning_rate": 7.796790717960766e-06, "loss": 0.252, "step": 26340 }, { "epoch": 0.5708187983446363, "grad_norm": 1.8631491661071777, "learning_rate": 7.793471022783267e-06, "loss": 0.2237, "step": 26345 }, { "epoch": 0.5709271336641172, "grad_norm": 1.5104395151138306, "learning_rate": 7.790151583199096e-06, "loss": 0.267, "step": 26350 }, { "epoch": 0.571035468983598, "grad_norm": 1.375435709953308, "learning_rate": 7.78683239959276e-06, "loss": 0.2153, "step": 26355 }, { "epoch": 0.5711438043030789, "grad_norm": 1.3993165493011475, "learning_rate": 7.783513472348738e-06, "loss": 0.2911, "step": 26360 }, { "epoch": 0.5712521396225597, "grad_norm": 1.5821049213409424, "learning_rate": 7.780194801851477e-06, "loss": 0.2244, "step": 26365 }, { "epoch": 0.5713604749420406, "grad_norm": 1.7101716995239258, "learning_rate": 7.776876388485398e-06, "loss": 0.2097, "step": 26370 }, { "epoch": 0.5714688102615214, "grad_norm": 1.7839665412902832, "learning_rate": 7.773558232634883e-06, "loss": 0.2573, "step": 26375 }, { "epoch": 0.5715771455810024, "grad_norm": 1.8226203918457031, "learning_rate": 7.770240334684293e-06, "loss": 0.3036, "step": 26380 }, { "epoch": 0.5716854809004832, "grad_norm": 1.7110683917999268, "learning_rate": 7.766922695017964e-06, "loss": 0.1979, "step": 26385 }, { "epoch": 0.5717938162199641, "grad_norm": 2.266794443130493, "learning_rate": 7.763605314020186e-06, "loss": 0.2779, "step": 26390 }, { "epoch": 0.5719021515394449, "grad_norm": 1.8563427925109863, "learning_rate": 7.760288192075232e-06, "loss": 0.2846, "step": 26395 }, { "epoch": 0.5720104868589257, "grad_norm": 1.8801512718200684, "learning_rate": 7.75697132956734e-06, "loss": 0.2373, "step": 26400 }, { "epoch": 0.5721188221784066, "grad_norm": 1.636215329170227, "learning_rate": 7.75365472688072e-06, "loss": 0.2705, "step": 26405 }, { "epoch": 0.5722271574978874, "grad_norm": 1.2544150352478027, "learning_rate": 7.750338384399548e-06, "loss": 0.3744, "step": 26410 }, { "epoch": 0.5723354928173683, "grad_norm": 1.7126623392105103, "learning_rate": 7.747022302507975e-06, "loss": 0.2489, "step": 26415 }, { "epoch": 0.5724438281368491, "grad_norm": 1.4158916473388672, "learning_rate": 7.743706481590121e-06, "loss": 0.2868, "step": 26420 }, { "epoch": 0.5725521634563301, "grad_norm": 1.5772342681884766, "learning_rate": 7.740390922030065e-06, "loss": 0.2533, "step": 26425 }, { "epoch": 0.5726604987758109, "grad_norm": 1.357688546180725, "learning_rate": 7.737075624211876e-06, "loss": 0.2959, "step": 26430 }, { "epoch": 0.5727688340952918, "grad_norm": 2.2157158851623535, "learning_rate": 7.733760588519579e-06, "loss": 0.4195, "step": 26435 }, { "epoch": 0.5728771694147726, "grad_norm": 0.7316827178001404, "learning_rate": 7.730445815337167e-06, "loss": 0.194, "step": 26440 }, { "epoch": 0.5729855047342535, "grad_norm": 1.4212157726287842, "learning_rate": 7.727131305048612e-06, "loss": 0.1966, "step": 26445 }, { "epoch": 0.5730938400537343, "grad_norm": 1.727028727531433, "learning_rate": 7.723817058037846e-06, "loss": 0.2876, "step": 26450 }, { "epoch": 0.5732021753732152, "grad_norm": 1.1030597686767578, "learning_rate": 7.720503074688777e-06, "loss": 0.2143, "step": 26455 }, { "epoch": 0.573310510692696, "grad_norm": 1.6579961776733398, "learning_rate": 7.717189355385281e-06, "loss": 0.3018, "step": 26460 }, { "epoch": 0.5734188460121769, "grad_norm": 0.9842896461486816, "learning_rate": 7.713875900511202e-06, "loss": 0.3374, "step": 26465 }, { "epoch": 0.5735271813316577, "grad_norm": 1.1077018976211548, "learning_rate": 7.710562710450351e-06, "loss": 0.2233, "step": 26470 }, { "epoch": 0.5736355166511387, "grad_norm": 1.5448631048202515, "learning_rate": 7.707249785586511e-06, "loss": 0.2814, "step": 26475 }, { "epoch": 0.5737438519706195, "grad_norm": 1.5601801872253418, "learning_rate": 7.703937126303443e-06, "loss": 0.306, "step": 26480 }, { "epoch": 0.5738521872901003, "grad_norm": 1.5959453582763672, "learning_rate": 7.700624732984863e-06, "loss": 0.2014, "step": 26485 }, { "epoch": 0.5739605226095812, "grad_norm": 1.9041602611541748, "learning_rate": 7.697312606014465e-06, "loss": 0.3699, "step": 26490 }, { "epoch": 0.574068857929062, "grad_norm": 1.582427978515625, "learning_rate": 7.694000745775908e-06, "loss": 0.2891, "step": 26495 }, { "epoch": 0.5741771932485429, "grad_norm": 1.124528169631958, "learning_rate": 7.690689152652817e-06, "loss": 0.2781, "step": 26500 }, { "epoch": 0.5742855285680237, "grad_norm": 1.2908695936203003, "learning_rate": 7.687377827028796e-06, "loss": 0.2654, "step": 26505 }, { "epoch": 0.5743938638875046, "grad_norm": 1.7499114274978638, "learning_rate": 7.68406676928741e-06, "loss": 0.4491, "step": 26510 }, { "epoch": 0.5745021992069854, "grad_norm": 2.080549478530884, "learning_rate": 7.680755979812197e-06, "loss": 0.2169, "step": 26515 }, { "epoch": 0.5746105345264663, "grad_norm": 1.7013424634933472, "learning_rate": 7.677445458986661e-06, "loss": 0.2586, "step": 26520 }, { "epoch": 0.5747188698459472, "grad_norm": 1.7048388719558716, "learning_rate": 7.674135207194272e-06, "loss": 0.2122, "step": 26525 }, { "epoch": 0.5748272051654281, "grad_norm": 1.377793312072754, "learning_rate": 7.670825224818485e-06, "loss": 0.2333, "step": 26530 }, { "epoch": 0.5749355404849089, "grad_norm": 2.0231456756591797, "learning_rate": 7.6675155122427e-06, "loss": 0.2516, "step": 26535 }, { "epoch": 0.5750438758043898, "grad_norm": 1.5508488416671753, "learning_rate": 7.664206069850306e-06, "loss": 0.3831, "step": 26540 }, { "epoch": 0.5751522111238706, "grad_norm": 1.9047586917877197, "learning_rate": 7.660896898024646e-06, "loss": 0.2225, "step": 26545 }, { "epoch": 0.5752605464433515, "grad_norm": 1.2877461910247803, "learning_rate": 7.657587997149043e-06, "loss": 0.2611, "step": 26550 }, { "epoch": 0.5753688817628323, "grad_norm": 1.599979043006897, "learning_rate": 7.65427936760678e-06, "loss": 0.2924, "step": 26555 }, { "epoch": 0.5754772170823131, "grad_norm": 1.4075884819030762, "learning_rate": 7.650971009781112e-06, "loss": 0.2504, "step": 26560 }, { "epoch": 0.575585552401794, "grad_norm": 1.6380540132522583, "learning_rate": 7.647662924055267e-06, "loss": 0.1819, "step": 26565 }, { "epoch": 0.5756938877212749, "grad_norm": 0.985146164894104, "learning_rate": 7.644355110812429e-06, "loss": 0.2321, "step": 26570 }, { "epoch": 0.5758022230407558, "grad_norm": 2.015495777130127, "learning_rate": 7.641047570435767e-06, "loss": 0.352, "step": 26575 }, { "epoch": 0.5759105583602366, "grad_norm": 1.7180633544921875, "learning_rate": 7.637740303308408e-06, "loss": 0.2589, "step": 26580 }, { "epoch": 0.5760188936797175, "grad_norm": 1.6106947660446167, "learning_rate": 7.634433309813447e-06, "loss": 0.2281, "step": 26585 }, { "epoch": 0.5761272289991983, "grad_norm": 1.6441476345062256, "learning_rate": 7.631126590333951e-06, "loss": 0.3552, "step": 26590 }, { "epoch": 0.5762355643186792, "grad_norm": 1.2170166969299316, "learning_rate": 7.627820145252954e-06, "loss": 0.1812, "step": 26595 }, { "epoch": 0.57634389963816, "grad_norm": 2.6750905513763428, "learning_rate": 7.624513974953458e-06, "loss": 0.334, "step": 26600 }, { "epoch": 0.5764522349576409, "grad_norm": 1.426727056503296, "learning_rate": 7.621208079818433e-06, "loss": 0.302, "step": 26605 }, { "epoch": 0.5765605702771217, "grad_norm": 2.0927958488464355, "learning_rate": 7.6179024602308186e-06, "loss": 0.2375, "step": 26610 }, { "epoch": 0.5766689055966026, "grad_norm": 1.6622734069824219, "learning_rate": 7.614597116573519e-06, "loss": 0.3797, "step": 26615 }, { "epoch": 0.5767772409160835, "grad_norm": 1.324040174484253, "learning_rate": 7.6112920492294084e-06, "loss": 0.3202, "step": 26620 }, { "epoch": 0.5768855762355644, "grad_norm": 1.1651184558868408, "learning_rate": 7.607987258581333e-06, "loss": 0.2457, "step": 26625 }, { "epoch": 0.5769939115550452, "grad_norm": 1.3187657594680786, "learning_rate": 7.604682745012102e-06, "loss": 0.2871, "step": 26630 }, { "epoch": 0.577102246874526, "grad_norm": 1.5411620140075684, "learning_rate": 7.601378508904493e-06, "loss": 0.3085, "step": 26635 }, { "epoch": 0.5772105821940069, "grad_norm": 1.6055810451507568, "learning_rate": 7.598074550641253e-06, "loss": 0.3179, "step": 26640 }, { "epoch": 0.5773189175134877, "grad_norm": 2.108715057373047, "learning_rate": 7.594770870605094e-06, "loss": 0.2616, "step": 26645 }, { "epoch": 0.5774272528329686, "grad_norm": 1.0051473379135132, "learning_rate": 7.591467469178701e-06, "loss": 0.2164, "step": 26650 }, { "epoch": 0.5775355881524494, "grad_norm": 1.6288210153579712, "learning_rate": 7.5881643467447195e-06, "loss": 0.2469, "step": 26655 }, { "epoch": 0.5776439234719303, "grad_norm": 1.5658822059631348, "learning_rate": 7.584861503685771e-06, "loss": 0.2893, "step": 26660 }, { "epoch": 0.5777522587914111, "grad_norm": 1.585316777229309, "learning_rate": 7.58155894038444e-06, "loss": 0.283, "step": 26665 }, { "epoch": 0.5778605941108921, "grad_norm": 2.070258140563965, "learning_rate": 7.578256657223272e-06, "loss": 0.2581, "step": 26670 }, { "epoch": 0.5779689294303729, "grad_norm": 1.8128771781921387, "learning_rate": 7.574954654584796e-06, "loss": 0.2476, "step": 26675 }, { "epoch": 0.5780772647498538, "grad_norm": 1.2215750217437744, "learning_rate": 7.571652932851497e-06, "loss": 0.2902, "step": 26680 }, { "epoch": 0.5781856000693346, "grad_norm": 1.5632927417755127, "learning_rate": 7.568351492405828e-06, "loss": 0.2732, "step": 26685 }, { "epoch": 0.5782939353888155, "grad_norm": 1.6491583585739136, "learning_rate": 7.565050333630212e-06, "loss": 0.1528, "step": 26690 }, { "epoch": 0.5784022707082963, "grad_norm": 1.7102391719818115, "learning_rate": 7.561749456907041e-06, "loss": 0.3114, "step": 26695 }, { "epoch": 0.5785106060277772, "grad_norm": 1.69014573097229, "learning_rate": 7.55844886261867e-06, "loss": 0.1795, "step": 26700 }, { "epoch": 0.578618941347258, "grad_norm": 2.2007062435150146, "learning_rate": 7.555148551147424e-06, "loss": 0.2664, "step": 26705 }, { "epoch": 0.5787272766667388, "grad_norm": 1.4485609531402588, "learning_rate": 7.551848522875595e-06, "loss": 0.2663, "step": 26710 }, { "epoch": 0.5788356119862198, "grad_norm": 1.0601840019226074, "learning_rate": 7.548548778185435e-06, "loss": 0.26, "step": 26715 }, { "epoch": 0.5789439473057006, "grad_norm": 1.8176606893539429, "learning_rate": 7.545249317459181e-06, "loss": 0.2365, "step": 26720 }, { "epoch": 0.5790522826251815, "grad_norm": 1.820601463317871, "learning_rate": 7.541950141079022e-06, "loss": 0.2749, "step": 26725 }, { "epoch": 0.5791606179446623, "grad_norm": 1.2401891946792603, "learning_rate": 7.538651249427118e-06, "loss": 0.257, "step": 26730 }, { "epoch": 0.5792689532641432, "grad_norm": 1.4306808710098267, "learning_rate": 7.535352642885597e-06, "loss": 0.227, "step": 26735 }, { "epoch": 0.579377288583624, "grad_norm": 1.7485098838806152, "learning_rate": 7.53205432183655e-06, "loss": 0.2128, "step": 26740 }, { "epoch": 0.5794856239031049, "grad_norm": 1.1291290521621704, "learning_rate": 7.52875628666204e-06, "loss": 0.2224, "step": 26745 }, { "epoch": 0.5795939592225857, "grad_norm": 1.1569223403930664, "learning_rate": 7.525458537744096e-06, "loss": 0.2096, "step": 26750 }, { "epoch": 0.5797022945420666, "grad_norm": 1.1103287935256958, "learning_rate": 7.52216107546471e-06, "loss": 0.2299, "step": 26755 }, { "epoch": 0.5798106298615474, "grad_norm": 2.3583874702453613, "learning_rate": 7.518863900205846e-06, "loss": 0.2778, "step": 26760 }, { "epoch": 0.5799189651810284, "grad_norm": 1.0679067373275757, "learning_rate": 7.515567012349428e-06, "loss": 0.2127, "step": 26765 }, { "epoch": 0.5800273005005092, "grad_norm": 1.6120696067810059, "learning_rate": 7.512270412277362e-06, "loss": 0.2101, "step": 26770 }, { "epoch": 0.5801356358199901, "grad_norm": 1.605334758758545, "learning_rate": 7.5089741003714986e-06, "loss": 0.2213, "step": 26775 }, { "epoch": 0.5802439711394709, "grad_norm": 1.4452519416809082, "learning_rate": 7.50567807701367e-06, "loss": 0.2616, "step": 26780 }, { "epoch": 0.5803523064589517, "grad_norm": 2.1184802055358887, "learning_rate": 7.502382342585673e-06, "loss": 0.3483, "step": 26785 }, { "epoch": 0.5804606417784326, "grad_norm": 1.2033169269561768, "learning_rate": 7.4990868974692665e-06, "loss": 0.2881, "step": 26790 }, { "epoch": 0.5805689770979134, "grad_norm": 1.547584891319275, "learning_rate": 7.495791742046179e-06, "loss": 0.3115, "step": 26795 }, { "epoch": 0.5806773124173943, "grad_norm": 1.0827805995941162, "learning_rate": 7.492496876698105e-06, "loss": 0.1368, "step": 26800 }, { "epoch": 0.5807856477368751, "grad_norm": 1.2557666301727295, "learning_rate": 7.4892023018067066e-06, "loss": 0.223, "step": 26805 }, { "epoch": 0.580893983056356, "grad_norm": 1.4925086498260498, "learning_rate": 7.485908017753609e-06, "loss": 0.2623, "step": 26810 }, { "epoch": 0.5810023183758369, "grad_norm": 1.2110284566879272, "learning_rate": 7.482614024920404e-06, "loss": 0.2232, "step": 26815 }, { "epoch": 0.5811106536953178, "grad_norm": 1.2361390590667725, "learning_rate": 7.479320323688655e-06, "loss": 0.1846, "step": 26820 }, { "epoch": 0.5812189890147986, "grad_norm": 1.6619104146957397, "learning_rate": 7.476026914439889e-06, "loss": 0.23, "step": 26825 }, { "epoch": 0.5813273243342795, "grad_norm": 2.2505195140838623, "learning_rate": 7.4727337975555945e-06, "loss": 0.305, "step": 26830 }, { "epoch": 0.5814356596537603, "grad_norm": 1.5263293981552124, "learning_rate": 7.469440973417231e-06, "loss": 0.2604, "step": 26835 }, { "epoch": 0.5815439949732412, "grad_norm": 1.3924914598464966, "learning_rate": 7.466148442406224e-06, "loss": 0.2437, "step": 26840 }, { "epoch": 0.581652330292722, "grad_norm": 1.2091224193572998, "learning_rate": 7.462856204903961e-06, "loss": 0.2447, "step": 26845 }, { "epoch": 0.5817606656122029, "grad_norm": 1.120322823524475, "learning_rate": 7.4595642612918015e-06, "loss": 0.2801, "step": 26850 }, { "epoch": 0.5818690009316837, "grad_norm": 1.4159536361694336, "learning_rate": 7.456272611951067e-06, "loss": 0.2919, "step": 26855 }, { "epoch": 0.5819773362511647, "grad_norm": 1.3513048887252808, "learning_rate": 7.452981257263043e-06, "loss": 0.2851, "step": 26860 }, { "epoch": 0.5820856715706455, "grad_norm": 1.5443239212036133, "learning_rate": 7.449690197608988e-06, "loss": 0.2485, "step": 26865 }, { "epoch": 0.5821940068901263, "grad_norm": 1.1776834726333618, "learning_rate": 7.44639943337012e-06, "loss": 0.1623, "step": 26870 }, { "epoch": 0.5823023422096072, "grad_norm": 1.3691978454589844, "learning_rate": 7.443108964927624e-06, "loss": 0.2653, "step": 26875 }, { "epoch": 0.582410677529088, "grad_norm": 1.350550651550293, "learning_rate": 7.4398187926626515e-06, "loss": 0.2694, "step": 26880 }, { "epoch": 0.5825190128485689, "grad_norm": 1.305063009262085, "learning_rate": 7.436528916956322e-06, "loss": 0.2774, "step": 26885 }, { "epoch": 0.5826273481680497, "grad_norm": 1.7762786149978638, "learning_rate": 7.433239338189717e-06, "loss": 0.223, "step": 26890 }, { "epoch": 0.5827356834875306, "grad_norm": 1.3437503576278687, "learning_rate": 7.429950056743884e-06, "loss": 0.3481, "step": 26895 }, { "epoch": 0.5828440188070114, "grad_norm": 1.6576554775238037, "learning_rate": 7.426661072999837e-06, "loss": 0.3048, "step": 26900 }, { "epoch": 0.5829523541264923, "grad_norm": 1.334030032157898, "learning_rate": 7.423372387338559e-06, "loss": 0.23, "step": 26905 }, { "epoch": 0.5830606894459732, "grad_norm": 1.8006638288497925, "learning_rate": 7.420084000140986e-06, "loss": 0.3334, "step": 26910 }, { "epoch": 0.5831690247654541, "grad_norm": 1.3515934944152832, "learning_rate": 7.416795911788037e-06, "loss": 0.2876, "step": 26915 }, { "epoch": 0.5832773600849349, "grad_norm": 0.9971072673797607, "learning_rate": 7.4135081226605865e-06, "loss": 0.222, "step": 26920 }, { "epoch": 0.5833856954044158, "grad_norm": 1.3521767854690552, "learning_rate": 7.4102206331394746e-06, "loss": 0.2109, "step": 26925 }, { "epoch": 0.5834940307238966, "grad_norm": 0.7770697474479675, "learning_rate": 7.406933443605506e-06, "loss": 0.2659, "step": 26930 }, { "epoch": 0.5836023660433775, "grad_norm": 1.31333589553833, "learning_rate": 7.403646554439456e-06, "loss": 0.2837, "step": 26935 }, { "epoch": 0.5837107013628583, "grad_norm": 1.063367486000061, "learning_rate": 7.400359966022057e-06, "loss": 0.2342, "step": 26940 }, { "epoch": 0.5838190366823391, "grad_norm": 1.2401751279830933, "learning_rate": 7.397073678734017e-06, "loss": 0.2401, "step": 26945 }, { "epoch": 0.58392737200182, "grad_norm": 0.6175356507301331, "learning_rate": 7.393787692955996e-06, "loss": 0.2853, "step": 26950 }, { "epoch": 0.584035707321301, "grad_norm": 0.7838605642318726, "learning_rate": 7.390502009068627e-06, "loss": 0.1932, "step": 26955 }, { "epoch": 0.5841440426407818, "grad_norm": 1.5098953247070312, "learning_rate": 7.387216627452513e-06, "loss": 0.1898, "step": 26960 }, { "epoch": 0.5842523779602626, "grad_norm": 1.52659010887146, "learning_rate": 7.383931548488215e-06, "loss": 0.2666, "step": 26965 }, { "epoch": 0.5843607132797435, "grad_norm": 1.0278455018997192, "learning_rate": 7.380646772556257e-06, "loss": 0.2542, "step": 26970 }, { "epoch": 0.5844690485992243, "grad_norm": 1.4585466384887695, "learning_rate": 7.377362300037132e-06, "loss": 0.2717, "step": 26975 }, { "epoch": 0.5845773839187052, "grad_norm": 2.013782024383545, "learning_rate": 7.3740781313113005e-06, "loss": 0.3031, "step": 26980 }, { "epoch": 0.584685719238186, "grad_norm": 2.1916799545288086, "learning_rate": 7.370794266759178e-06, "loss": 0.3042, "step": 26985 }, { "epoch": 0.5847940545576669, "grad_norm": 1.7914056777954102, "learning_rate": 7.367510706761156e-06, "loss": 0.2177, "step": 26990 }, { "epoch": 0.5849023898771477, "grad_norm": 1.455307960510254, "learning_rate": 7.364227451697583e-06, "loss": 0.2602, "step": 26995 }, { "epoch": 0.5850107251966286, "grad_norm": 0.7385848164558411, "learning_rate": 7.360944501948776e-06, "loss": 0.2648, "step": 27000 }, { "epoch": 0.5851190605161095, "grad_norm": 1.6832202672958374, "learning_rate": 7.357661857895011e-06, "loss": 0.3044, "step": 27005 }, { "epoch": 0.5852273958355904, "grad_norm": 1.7728831768035889, "learning_rate": 7.3543795199165415e-06, "loss": 0.2942, "step": 27010 }, { "epoch": 0.5853357311550712, "grad_norm": 2.040221691131592, "learning_rate": 7.351097488393575e-06, "loss": 0.1641, "step": 27015 }, { "epoch": 0.585444066474552, "grad_norm": 1.4735597372055054, "learning_rate": 7.347815763706283e-06, "loss": 0.1437, "step": 27020 }, { "epoch": 0.5855524017940329, "grad_norm": 1.4736331701278687, "learning_rate": 7.344534346234804e-06, "loss": 0.209, "step": 27025 }, { "epoch": 0.5856607371135137, "grad_norm": 1.350782036781311, "learning_rate": 7.3412532363592425e-06, "loss": 0.278, "step": 27030 }, { "epoch": 0.5857690724329946, "grad_norm": 1.5936944484710693, "learning_rate": 7.337972434459666e-06, "loss": 0.2559, "step": 27035 }, { "epoch": 0.5858774077524754, "grad_norm": 1.506817102432251, "learning_rate": 7.334691940916105e-06, "loss": 0.2135, "step": 27040 }, { "epoch": 0.5859857430719563, "grad_norm": 1.2653019428253174, "learning_rate": 7.331411756108556e-06, "loss": 0.2413, "step": 27045 }, { "epoch": 0.5860940783914371, "grad_norm": 1.5141968727111816, "learning_rate": 7.3281318804169806e-06, "loss": 0.2078, "step": 27050 }, { "epoch": 0.5862024137109181, "grad_norm": 1.5778354406356812, "learning_rate": 7.3248523142213005e-06, "loss": 0.3102, "step": 27055 }, { "epoch": 0.5863107490303989, "grad_norm": 1.3429735898971558, "learning_rate": 7.321573057901408e-06, "loss": 0.295, "step": 27060 }, { "epoch": 0.5864190843498798, "grad_norm": 1.7512516975402832, "learning_rate": 7.318294111837152e-06, "loss": 0.1977, "step": 27065 }, { "epoch": 0.5865274196693606, "grad_norm": 1.8794761896133423, "learning_rate": 7.315015476408354e-06, "loss": 0.2374, "step": 27070 }, { "epoch": 0.5866357549888415, "grad_norm": 0.5534735918045044, "learning_rate": 7.31173715199479e-06, "loss": 0.2389, "step": 27075 }, { "epoch": 0.5867440903083223, "grad_norm": 1.2135647535324097, "learning_rate": 7.308459138976207e-06, "loss": 0.2369, "step": 27080 }, { "epoch": 0.5868524256278032, "grad_norm": 1.4163583517074585, "learning_rate": 7.305181437732314e-06, "loss": 0.2774, "step": 27085 }, { "epoch": 0.586960760947284, "grad_norm": 2.223446846008301, "learning_rate": 7.301904048642783e-06, "loss": 0.2614, "step": 27090 }, { "epoch": 0.5870690962667648, "grad_norm": 1.0273535251617432, "learning_rate": 7.29862697208725e-06, "loss": 0.2496, "step": 27095 }, { "epoch": 0.5871774315862458, "grad_norm": 1.3320802450180054, "learning_rate": 7.295350208445315e-06, "loss": 0.2184, "step": 27100 }, { "epoch": 0.5872857669057266, "grad_norm": 2.007338762283325, "learning_rate": 7.292073758096543e-06, "loss": 0.3814, "step": 27105 }, { "epoch": 0.5873941022252075, "grad_norm": 1.9009742736816406, "learning_rate": 7.288797621420462e-06, "loss": 0.2818, "step": 27110 }, { "epoch": 0.5875024375446883, "grad_norm": 1.6610194444656372, "learning_rate": 7.285521798796565e-06, "loss": 0.2744, "step": 27115 }, { "epoch": 0.5876107728641692, "grad_norm": 1.669012188911438, "learning_rate": 7.282246290604302e-06, "loss": 0.2678, "step": 27120 }, { "epoch": 0.58771910818365, "grad_norm": 1.4971809387207031, "learning_rate": 7.278971097223094e-06, "loss": 0.4002, "step": 27125 }, { "epoch": 0.5878274435031309, "grad_norm": 2.460599422454834, "learning_rate": 7.275696219032324e-06, "loss": 0.1914, "step": 27130 }, { "epoch": 0.5879357788226117, "grad_norm": 1.1779358386993408, "learning_rate": 7.2724216564113385e-06, "loss": 0.2469, "step": 27135 }, { "epoch": 0.5880441141420926, "grad_norm": 1.9577306509017944, "learning_rate": 7.269147409739444e-06, "loss": 0.2297, "step": 27140 }, { "epoch": 0.5881524494615734, "grad_norm": 1.5063172578811646, "learning_rate": 7.265873479395913e-06, "loss": 0.2911, "step": 27145 }, { "epoch": 0.5882607847810544, "grad_norm": 1.2632982730865479, "learning_rate": 7.26259986575998e-06, "loss": 0.2803, "step": 27150 }, { "epoch": 0.5883691201005352, "grad_norm": 2.190866470336914, "learning_rate": 7.25932656921085e-06, "loss": 0.2021, "step": 27155 }, { "epoch": 0.5884774554200161, "grad_norm": 1.9967659711837769, "learning_rate": 7.25605359012768e-06, "loss": 0.2513, "step": 27160 }, { "epoch": 0.5885857907394969, "grad_norm": 2.1196508407592773, "learning_rate": 7.2527809288896e-06, "loss": 0.3056, "step": 27165 }, { "epoch": 0.5886941260589778, "grad_norm": 2.0512747764587402, "learning_rate": 7.249508585875693e-06, "loss": 0.1741, "step": 27170 }, { "epoch": 0.5888024613784586, "grad_norm": 1.2172502279281616, "learning_rate": 7.246236561465016e-06, "loss": 0.1169, "step": 27175 }, { "epoch": 0.5889107966979394, "grad_norm": 1.185411810874939, "learning_rate": 7.24296485603658e-06, "loss": 0.2616, "step": 27180 }, { "epoch": 0.5890191320174203, "grad_norm": 1.5099122524261475, "learning_rate": 7.2396934699693685e-06, "loss": 0.2563, "step": 27185 }, { "epoch": 0.5891274673369011, "grad_norm": 1.7273569107055664, "learning_rate": 7.236422403642318e-06, "loss": 0.3251, "step": 27190 }, { "epoch": 0.589235802656382, "grad_norm": 1.0421825647354126, "learning_rate": 7.233151657434332e-06, "loss": 0.21, "step": 27195 }, { "epoch": 0.5893441379758629, "grad_norm": 1.679644227027893, "learning_rate": 7.229881231724276e-06, "loss": 0.2625, "step": 27200 }, { "epoch": 0.5894524732953438, "grad_norm": 1.7686117887496948, "learning_rate": 7.226611126890986e-06, "loss": 0.2158, "step": 27205 }, { "epoch": 0.5895608086148246, "grad_norm": 1.4765726327896118, "learning_rate": 7.223341343313253e-06, "loss": 0.3207, "step": 27210 }, { "epoch": 0.5896691439343055, "grad_norm": 0.9830209016799927, "learning_rate": 7.220071881369829e-06, "loss": 0.242, "step": 27215 }, { "epoch": 0.5897774792537863, "grad_norm": 1.4979768991470337, "learning_rate": 7.216802741439437e-06, "loss": 0.3382, "step": 27220 }, { "epoch": 0.5898858145732672, "grad_norm": 1.4029306173324585, "learning_rate": 7.213533923900754e-06, "loss": 0.288, "step": 27225 }, { "epoch": 0.589994149892748, "grad_norm": 1.3928536176681519, "learning_rate": 7.2102654291324235e-06, "loss": 0.2461, "step": 27230 }, { "epoch": 0.5901024852122289, "grad_norm": 1.5964869260787964, "learning_rate": 7.206997257513054e-06, "loss": 0.2461, "step": 27235 }, { "epoch": 0.5902108205317097, "grad_norm": 1.0650157928466797, "learning_rate": 7.203729409421213e-06, "loss": 0.246, "step": 27240 }, { "epoch": 0.5903191558511907, "grad_norm": 1.757054328918457, "learning_rate": 7.2004618852354265e-06, "loss": 0.2734, "step": 27245 }, { "epoch": 0.5904274911706715, "grad_norm": 1.6715714931488037, "learning_rate": 7.197194685334199e-06, "loss": 0.236, "step": 27250 }, { "epoch": 0.5905358264901523, "grad_norm": 1.4659370183944702, "learning_rate": 7.193927810095982e-06, "loss": 0.3259, "step": 27255 }, { "epoch": 0.5906441618096332, "grad_norm": 1.9431143999099731, "learning_rate": 7.190661259899192e-06, "loss": 0.2681, "step": 27260 }, { "epoch": 0.590752497129114, "grad_norm": 1.6647168397903442, "learning_rate": 7.187395035122211e-06, "loss": 0.2693, "step": 27265 }, { "epoch": 0.5908608324485949, "grad_norm": 1.4040077924728394, "learning_rate": 7.184129136143382e-06, "loss": 0.2326, "step": 27270 }, { "epoch": 0.5909691677680757, "grad_norm": 1.7026312351226807, "learning_rate": 7.180863563341011e-06, "loss": 0.253, "step": 27275 }, { "epoch": 0.5910775030875566, "grad_norm": 1.9188166856765747, "learning_rate": 7.177598317093366e-06, "loss": 0.2677, "step": 27280 }, { "epoch": 0.5911858384070374, "grad_norm": 1.585612177848816, "learning_rate": 7.174333397778676e-06, "loss": 0.3666, "step": 27285 }, { "epoch": 0.5912941737265183, "grad_norm": 1.3475109338760376, "learning_rate": 7.171068805775132e-06, "loss": 0.2641, "step": 27290 }, { "epoch": 0.5914025090459992, "grad_norm": 0.9604895114898682, "learning_rate": 7.16780454146089e-06, "loss": 0.2632, "step": 27295 }, { "epoch": 0.5915108443654801, "grad_norm": 1.842659592628479, "learning_rate": 7.164540605214065e-06, "loss": 0.2534, "step": 27300 }, { "epoch": 0.5916191796849609, "grad_norm": 1.3800595998764038, "learning_rate": 7.161276997412739e-06, "loss": 0.2262, "step": 27305 }, { "epoch": 0.5917275150044418, "grad_norm": 1.5503426790237427, "learning_rate": 7.158013718434946e-06, "loss": 0.3254, "step": 27310 }, { "epoch": 0.5918358503239226, "grad_norm": 1.1819626092910767, "learning_rate": 7.154750768658692e-06, "loss": 0.1098, "step": 27315 }, { "epoch": 0.5919441856434035, "grad_norm": 1.6234767436981201, "learning_rate": 7.151488148461938e-06, "loss": 0.25, "step": 27320 }, { "epoch": 0.5920525209628843, "grad_norm": 1.437256932258606, "learning_rate": 7.148225858222613e-06, "loss": 0.2331, "step": 27325 }, { "epoch": 0.5921608562823651, "grad_norm": 1.4697132110595703, "learning_rate": 7.1449638983186024e-06, "loss": 0.2947, "step": 27330 }, { "epoch": 0.592269191601846, "grad_norm": 1.1371225118637085, "learning_rate": 7.141702269127756e-06, "loss": 0.2805, "step": 27335 }, { "epoch": 0.5923775269213268, "grad_norm": 1.1129794120788574, "learning_rate": 7.138440971027885e-06, "loss": 0.172, "step": 27340 }, { "epoch": 0.5924858622408078, "grad_norm": 1.0949015617370605, "learning_rate": 7.135180004396758e-06, "loss": 0.1817, "step": 27345 }, { "epoch": 0.5925941975602886, "grad_norm": 1.1460614204406738, "learning_rate": 7.131919369612115e-06, "loss": 0.3018, "step": 27350 }, { "epoch": 0.5927025328797695, "grad_norm": 1.5888787508010864, "learning_rate": 7.1286590670516486e-06, "loss": 0.2201, "step": 27355 }, { "epoch": 0.5928108681992503, "grad_norm": 1.8843120336532593, "learning_rate": 7.125399097093018e-06, "loss": 0.2651, "step": 27360 }, { "epoch": 0.5929192035187312, "grad_norm": 0.6801751852035522, "learning_rate": 7.122139460113838e-06, "loss": 0.2574, "step": 27365 }, { "epoch": 0.593027538838212, "grad_norm": 1.2763457298278809, "learning_rate": 7.1188801564916935e-06, "loss": 0.2606, "step": 27370 }, { "epoch": 0.5931358741576929, "grad_norm": 1.7722392082214355, "learning_rate": 7.115621186604122e-06, "loss": 0.2305, "step": 27375 }, { "epoch": 0.5932442094771737, "grad_norm": 1.158620834350586, "learning_rate": 7.11236255082863e-06, "loss": 0.2179, "step": 27380 }, { "epoch": 0.5933525447966546, "grad_norm": 1.6456983089447021, "learning_rate": 7.109104249542679e-06, "loss": 0.2769, "step": 27385 }, { "epoch": 0.5934608801161355, "grad_norm": 1.3697707653045654, "learning_rate": 7.1058462831236915e-06, "loss": 0.223, "step": 27390 }, { "epoch": 0.5935692154356164, "grad_norm": 1.8314048051834106, "learning_rate": 7.1025886519490615e-06, "loss": 0.1905, "step": 27395 }, { "epoch": 0.5936775507550972, "grad_norm": 1.4615862369537354, "learning_rate": 7.099331356396133e-06, "loss": 0.2696, "step": 27400 }, { "epoch": 0.593785886074578, "grad_norm": 1.8312323093414307, "learning_rate": 7.096074396842214e-06, "loss": 0.2078, "step": 27405 }, { "epoch": 0.5938942213940589, "grad_norm": 1.1229579448699951, "learning_rate": 7.092817773664575e-06, "loss": 0.2469, "step": 27410 }, { "epoch": 0.5940025567135397, "grad_norm": 1.3106294870376587, "learning_rate": 7.0895614872404485e-06, "loss": 0.2607, "step": 27415 }, { "epoch": 0.5941108920330206, "grad_norm": 2.4572153091430664, "learning_rate": 7.086305537947025e-06, "loss": 0.324, "step": 27420 }, { "epoch": 0.5942192273525014, "grad_norm": 1.6267035007476807, "learning_rate": 7.0830499261614596e-06, "loss": 0.2235, "step": 27425 }, { "epoch": 0.5943275626719823, "grad_norm": 1.4317070245742798, "learning_rate": 7.079794652260862e-06, "loss": 0.1693, "step": 27430 }, { "epoch": 0.5944358979914631, "grad_norm": 1.1760804653167725, "learning_rate": 7.07653971662231e-06, "loss": 0.3262, "step": 27435 }, { "epoch": 0.5945442333109441, "grad_norm": 1.7710973024368286, "learning_rate": 7.073285119622832e-06, "loss": 0.3691, "step": 27440 }, { "epoch": 0.5946525686304249, "grad_norm": 1.219985008239746, "learning_rate": 7.070030861639439e-06, "loss": 0.2901, "step": 27445 }, { "epoch": 0.5947609039499058, "grad_norm": 1.4364311695098877, "learning_rate": 7.066776943049076e-06, "loss": 0.2085, "step": 27450 }, { "epoch": 0.5948692392693866, "grad_norm": 1.3746628761291504, "learning_rate": 7.063523364228666e-06, "loss": 0.3237, "step": 27455 }, { "epoch": 0.5949775745888675, "grad_norm": 1.4297822713851929, "learning_rate": 7.060270125555087e-06, "loss": 0.1777, "step": 27460 }, { "epoch": 0.5950859099083483, "grad_norm": 1.1834354400634766, "learning_rate": 7.057017227405176e-06, "loss": 0.3307, "step": 27465 }, { "epoch": 0.5951942452278292, "grad_norm": 1.6581557989120483, "learning_rate": 7.053764670155734e-06, "loss": 0.1623, "step": 27470 }, { "epoch": 0.59530258054731, "grad_norm": 1.1017976999282837, "learning_rate": 7.050512454183518e-06, "loss": 0.175, "step": 27475 }, { "epoch": 0.5954109158667908, "grad_norm": 1.2824304103851318, "learning_rate": 7.047260579865252e-06, "loss": 0.251, "step": 27480 }, { "epoch": 0.5955192511862718, "grad_norm": 1.3421430587768555, "learning_rate": 7.044009047577612e-06, "loss": 0.3054, "step": 27485 }, { "epoch": 0.5956275865057526, "grad_norm": 1.275962471961975, "learning_rate": 7.0407578576972465e-06, "loss": 0.1821, "step": 27490 }, { "epoch": 0.5957359218252335, "grad_norm": 1.0601726770401, "learning_rate": 7.0375070106007545e-06, "loss": 0.3036, "step": 27495 }, { "epoch": 0.5958442571447143, "grad_norm": 1.2685364484786987, "learning_rate": 7.0342565066647e-06, "loss": 0.1309, "step": 27500 }, { "epoch": 0.5959525924641952, "grad_norm": 1.6596745252609253, "learning_rate": 7.031006346265598e-06, "loss": 0.1844, "step": 27505 }, { "epoch": 0.596060927783676, "grad_norm": 1.944130301475525, "learning_rate": 7.027756529779937e-06, "loss": 0.225, "step": 27510 }, { "epoch": 0.5961692631031569, "grad_norm": 1.1591084003448486, "learning_rate": 7.024507057584158e-06, "loss": 0.2079, "step": 27515 }, { "epoch": 0.5962775984226377, "grad_norm": 1.6460171937942505, "learning_rate": 7.021257930054662e-06, "loss": 0.1911, "step": 27520 }, { "epoch": 0.5963859337421186, "grad_norm": 1.4944769144058228, "learning_rate": 7.018009147567815e-06, "loss": 0.2143, "step": 27525 }, { "epoch": 0.5964942690615994, "grad_norm": 1.979189157485962, "learning_rate": 7.014760710499937e-06, "loss": 0.2264, "step": 27530 }, { "epoch": 0.5966026043810804, "grad_norm": 0.9296234250068665, "learning_rate": 7.01151261922731e-06, "loss": 0.2479, "step": 27535 }, { "epoch": 0.5967109397005612, "grad_norm": 1.2468276023864746, "learning_rate": 7.00826487412618e-06, "loss": 0.2179, "step": 27540 }, { "epoch": 0.5968192750200421, "grad_norm": 1.6678615808486938, "learning_rate": 7.005017475572748e-06, "loss": 0.2319, "step": 27545 }, { "epoch": 0.5969276103395229, "grad_norm": 1.360984444618225, "learning_rate": 7.0017704239431775e-06, "loss": 0.3014, "step": 27550 }, { "epoch": 0.5970359456590038, "grad_norm": 1.5361825227737427, "learning_rate": 6.99852371961359e-06, "loss": 0.1403, "step": 27555 }, { "epoch": 0.5971442809784846, "grad_norm": 2.1849617958068848, "learning_rate": 6.9952773629600665e-06, "loss": 0.3059, "step": 27560 }, { "epoch": 0.5972526162979654, "grad_norm": 1.158259630203247, "learning_rate": 6.992031354358651e-06, "loss": 0.2375, "step": 27565 }, { "epoch": 0.5973609516174463, "grad_norm": 1.5829216241836548, "learning_rate": 6.9887856941853426e-06, "loss": 0.2531, "step": 27570 }, { "epoch": 0.5974692869369271, "grad_norm": 1.184104561805725, "learning_rate": 6.985540382816104e-06, "loss": 0.1824, "step": 27575 }, { "epoch": 0.597577622256408, "grad_norm": 1.4920048713684082, "learning_rate": 6.982295420626854e-06, "loss": 0.2597, "step": 27580 }, { "epoch": 0.5976859575758889, "grad_norm": 1.9680014848709106, "learning_rate": 6.9790508079934745e-06, "loss": 0.3298, "step": 27585 }, { "epoch": 0.5977942928953698, "grad_norm": 1.941383719444275, "learning_rate": 6.975806545291807e-06, "loss": 0.2502, "step": 27590 }, { "epoch": 0.5979026282148506, "grad_norm": 1.289632797241211, "learning_rate": 6.972562632897648e-06, "loss": 0.2354, "step": 27595 }, { "epoch": 0.5980109635343315, "grad_norm": 1.794002890586853, "learning_rate": 6.969319071186758e-06, "loss": 0.2243, "step": 27600 }, { "epoch": 0.5981192988538123, "grad_norm": 1.1304699182510376, "learning_rate": 6.966075860534852e-06, "loss": 0.2407, "step": 27605 }, { "epoch": 0.5982276341732932, "grad_norm": 1.9013718366622925, "learning_rate": 6.9628330013176105e-06, "loss": 0.2646, "step": 27610 }, { "epoch": 0.598335969492774, "grad_norm": 1.5718514919281006, "learning_rate": 6.959590493910668e-06, "loss": 0.2502, "step": 27615 }, { "epoch": 0.5984443048122549, "grad_norm": 2.100975275039673, "learning_rate": 6.956348338689623e-06, "loss": 0.35, "step": 27620 }, { "epoch": 0.5985526401317357, "grad_norm": 1.54911208152771, "learning_rate": 6.95310653603003e-06, "loss": 0.2077, "step": 27625 }, { "epoch": 0.5986609754512167, "grad_norm": 1.1062507629394531, "learning_rate": 6.949865086307398e-06, "loss": 0.269, "step": 27630 }, { "epoch": 0.5987693107706975, "grad_norm": 1.9627431631088257, "learning_rate": 6.946623989897208e-06, "loss": 0.2162, "step": 27635 }, { "epoch": 0.5988776460901784, "grad_norm": 1.2332977056503296, "learning_rate": 6.943383247174889e-06, "loss": 0.198, "step": 27640 }, { "epoch": 0.5989859814096592, "grad_norm": 1.1624290943145752, "learning_rate": 6.940142858515833e-06, "loss": 0.1424, "step": 27645 }, { "epoch": 0.59909431672914, "grad_norm": 1.0081852674484253, "learning_rate": 6.936902824295388e-06, "loss": 0.2201, "step": 27650 }, { "epoch": 0.5992026520486209, "grad_norm": 1.895805835723877, "learning_rate": 6.933663144888868e-06, "loss": 0.226, "step": 27655 }, { "epoch": 0.5993109873681017, "grad_norm": 1.605886459350586, "learning_rate": 6.930423820671539e-06, "loss": 0.2734, "step": 27660 }, { "epoch": 0.5994193226875826, "grad_norm": 1.5426995754241943, "learning_rate": 6.927184852018627e-06, "loss": 0.2214, "step": 27665 }, { "epoch": 0.5995276580070634, "grad_norm": 2.3451786041259766, "learning_rate": 6.923946239305321e-06, "loss": 0.3436, "step": 27670 }, { "epoch": 0.5996359933265443, "grad_norm": 1.175567865371704, "learning_rate": 6.920707982906762e-06, "loss": 0.2225, "step": 27675 }, { "epoch": 0.5997443286460252, "grad_norm": 1.7338197231292725, "learning_rate": 6.917470083198053e-06, "loss": 0.2648, "step": 27680 }, { "epoch": 0.5998526639655061, "grad_norm": 1.4912757873535156, "learning_rate": 6.91423254055426e-06, "loss": 0.2641, "step": 27685 }, { "epoch": 0.5999609992849869, "grad_norm": 1.1613874435424805, "learning_rate": 6.910995355350405e-06, "loss": 0.2487, "step": 27690 }, { "epoch": 0.6000693346044678, "grad_norm": 1.0788905620574951, "learning_rate": 6.907758527961463e-06, "loss": 0.2646, "step": 27695 }, { "epoch": 0.6001776699239486, "grad_norm": 1.4081593751907349, "learning_rate": 6.9045220587623744e-06, "loss": 0.2566, "step": 27700 }, { "epoch": 0.6002860052434295, "grad_norm": 1.450881004333496, "learning_rate": 6.901285948128037e-06, "loss": 0.2013, "step": 27705 }, { "epoch": 0.6003943405629103, "grad_norm": 1.3818222284317017, "learning_rate": 6.898050196433302e-06, "loss": 0.2425, "step": 27710 }, { "epoch": 0.6005026758823911, "grad_norm": 1.365767002105713, "learning_rate": 6.894814804052984e-06, "loss": 0.3115, "step": 27715 }, { "epoch": 0.600611011201872, "grad_norm": 1.5217995643615723, "learning_rate": 6.891579771361856e-06, "loss": 0.2142, "step": 27720 }, { "epoch": 0.6007193465213528, "grad_norm": 1.0971089601516724, "learning_rate": 6.888345098734646e-06, "loss": 0.3689, "step": 27725 }, { "epoch": 0.6008276818408338, "grad_norm": 1.2690167427062988, "learning_rate": 6.885110786546041e-06, "loss": 0.2177, "step": 27730 }, { "epoch": 0.6009360171603146, "grad_norm": 1.1683979034423828, "learning_rate": 6.881876835170693e-06, "loss": 0.195, "step": 27735 }, { "epoch": 0.6010443524797955, "grad_norm": 1.591282606124878, "learning_rate": 6.878643244983207e-06, "loss": 0.22, "step": 27740 }, { "epoch": 0.6011526877992763, "grad_norm": 1.2287678718566895, "learning_rate": 6.875410016358142e-06, "loss": 0.3006, "step": 27745 }, { "epoch": 0.6012610231187572, "grad_norm": 1.2110103368759155, "learning_rate": 6.872177149670019e-06, "loss": 0.34, "step": 27750 }, { "epoch": 0.601369358438238, "grad_norm": 1.0923947095870972, "learning_rate": 6.8689446452933195e-06, "loss": 0.2564, "step": 27755 }, { "epoch": 0.6014776937577189, "grad_norm": 2.022845506668091, "learning_rate": 6.86571250360248e-06, "loss": 0.3106, "step": 27760 }, { "epoch": 0.6015860290771997, "grad_norm": 1.1183863878250122, "learning_rate": 6.862480724971894e-06, "loss": 0.2273, "step": 27765 }, { "epoch": 0.6016943643966806, "grad_norm": 2.1699821949005127, "learning_rate": 6.8592493097759164e-06, "loss": 0.3068, "step": 27770 }, { "epoch": 0.6018026997161615, "grad_norm": 1.4240096807479858, "learning_rate": 6.856018258388854e-06, "loss": 0.4331, "step": 27775 }, { "epoch": 0.6019110350356424, "grad_norm": 1.479998230934143, "learning_rate": 6.852787571184984e-06, "loss": 0.1947, "step": 27780 }, { "epoch": 0.6020193703551232, "grad_norm": 1.0921481847763062, "learning_rate": 6.849557248538529e-06, "loss": 0.3011, "step": 27785 }, { "epoch": 0.602127705674604, "grad_norm": 1.4591495990753174, "learning_rate": 6.8463272908236715e-06, "loss": 0.2532, "step": 27790 }, { "epoch": 0.6022360409940849, "grad_norm": 1.8482060432434082, "learning_rate": 6.843097698414556e-06, "loss": 0.2817, "step": 27795 }, { "epoch": 0.6023443763135657, "grad_norm": 1.1057932376861572, "learning_rate": 6.839868471685283e-06, "loss": 0.301, "step": 27800 }, { "epoch": 0.6024527116330466, "grad_norm": 1.2903977632522583, "learning_rate": 6.836639611009907e-06, "loss": 0.3803, "step": 27805 }, { "epoch": 0.6025610469525274, "grad_norm": 2.251981735229492, "learning_rate": 6.8334111167624454e-06, "loss": 0.3128, "step": 27810 }, { "epoch": 0.6026693822720083, "grad_norm": 1.24441659450531, "learning_rate": 6.830182989316869e-06, "loss": 0.1667, "step": 27815 }, { "epoch": 0.6027777175914891, "grad_norm": 2.070012331008911, "learning_rate": 6.826955229047112e-06, "loss": 0.2384, "step": 27820 }, { "epoch": 0.6028860529109701, "grad_norm": 1.4788166284561157, "learning_rate": 6.8237278363270565e-06, "loss": 0.2402, "step": 27825 }, { "epoch": 0.6029943882304509, "grad_norm": 2.1036665439605713, "learning_rate": 6.820500811530552e-06, "loss": 0.2789, "step": 27830 }, { "epoch": 0.6031027235499318, "grad_norm": 2.0045218467712402, "learning_rate": 6.817274155031399e-06, "loss": 0.2225, "step": 27835 }, { "epoch": 0.6032110588694126, "grad_norm": 1.1197073459625244, "learning_rate": 6.8140478672033595e-06, "loss": 0.2845, "step": 27840 }, { "epoch": 0.6033193941888935, "grad_norm": 1.3152008056640625, "learning_rate": 6.810821948420149e-06, "loss": 0.24, "step": 27845 }, { "epoch": 0.6034277295083743, "grad_norm": 2.038970470428467, "learning_rate": 6.807596399055441e-06, "loss": 0.2799, "step": 27850 }, { "epoch": 0.6035360648278552, "grad_norm": 1.310949683189392, "learning_rate": 6.804371219482868e-06, "loss": 0.2448, "step": 27855 }, { "epoch": 0.603644400147336, "grad_norm": 1.4392989873886108, "learning_rate": 6.801146410076019e-06, "loss": 0.3468, "step": 27860 }, { "epoch": 0.6037527354668168, "grad_norm": 1.8254334926605225, "learning_rate": 6.797921971208441e-06, "loss": 0.3238, "step": 27865 }, { "epoch": 0.6038610707862977, "grad_norm": 1.318731427192688, "learning_rate": 6.794697903253633e-06, "loss": 0.2475, "step": 27870 }, { "epoch": 0.6039694061057787, "grad_norm": 1.7138786315917969, "learning_rate": 6.791474206585057e-06, "loss": 0.1917, "step": 27875 }, { "epoch": 0.6040777414252595, "grad_norm": 1.3661680221557617, "learning_rate": 6.788250881576133e-06, "loss": 0.2315, "step": 27880 }, { "epoch": 0.6041860767447403, "grad_norm": 1.3661775588989258, "learning_rate": 6.78502792860023e-06, "loss": 0.2701, "step": 27885 }, { "epoch": 0.6042944120642212, "grad_norm": 1.2693672180175781, "learning_rate": 6.781805348030683e-06, "loss": 0.2899, "step": 27890 }, { "epoch": 0.604402747383702, "grad_norm": 1.9428526163101196, "learning_rate": 6.778583140240778e-06, "loss": 0.2414, "step": 27895 }, { "epoch": 0.6045110827031829, "grad_norm": 1.3002490997314453, "learning_rate": 6.775361305603758e-06, "loss": 0.195, "step": 27900 }, { "epoch": 0.6046194180226637, "grad_norm": 1.4184614419937134, "learning_rate": 6.772139844492827e-06, "loss": 0.3987, "step": 27905 }, { "epoch": 0.6047277533421446, "grad_norm": 1.1297776699066162, "learning_rate": 6.768918757281144e-06, "loss": 0.2507, "step": 27910 }, { "epoch": 0.6048360886616254, "grad_norm": 0.8903091549873352, "learning_rate": 6.765698044341817e-06, "loss": 0.1676, "step": 27915 }, { "epoch": 0.6049444239811064, "grad_norm": 1.574036717414856, "learning_rate": 6.762477706047921e-06, "loss": 0.3003, "step": 27920 }, { "epoch": 0.6050527593005872, "grad_norm": 1.5633679628372192, "learning_rate": 6.759257742772486e-06, "loss": 0.2213, "step": 27925 }, { "epoch": 0.6051610946200681, "grad_norm": 2.323225259780884, "learning_rate": 6.7560381548884955e-06, "loss": 0.2785, "step": 27930 }, { "epoch": 0.6052694299395489, "grad_norm": 1.7703864574432373, "learning_rate": 6.752818942768892e-06, "loss": 0.1838, "step": 27935 }, { "epoch": 0.6053777652590298, "grad_norm": 1.360794186592102, "learning_rate": 6.749600106786569e-06, "loss": 0.3223, "step": 27940 }, { "epoch": 0.6054861005785106, "grad_norm": 1.4275485277175903, "learning_rate": 6.746381647314384e-06, "loss": 0.2964, "step": 27945 }, { "epoch": 0.6055944358979914, "grad_norm": 1.5220251083374023, "learning_rate": 6.743163564725148e-06, "loss": 0.1622, "step": 27950 }, { "epoch": 0.6057027712174723, "grad_norm": 2.148191213607788, "learning_rate": 6.739945859391623e-06, "loss": 0.1892, "step": 27955 }, { "epoch": 0.6058111065369531, "grad_norm": 1.8622044324874878, "learning_rate": 6.736728531686536e-06, "loss": 0.2375, "step": 27960 }, { "epoch": 0.605919441856434, "grad_norm": 2.171617031097412, "learning_rate": 6.733511581982564e-06, "loss": 0.2716, "step": 27965 }, { "epoch": 0.6060277771759149, "grad_norm": 2.3413374423980713, "learning_rate": 6.730295010652338e-06, "loss": 0.3529, "step": 27970 }, { "epoch": 0.6061361124953958, "grad_norm": 1.1350207328796387, "learning_rate": 6.72707881806846e-06, "loss": 0.2341, "step": 27975 }, { "epoch": 0.6062444478148766, "grad_norm": 1.0754833221435547, "learning_rate": 6.723863004603472e-06, "loss": 0.2215, "step": 27980 }, { "epoch": 0.6063527831343575, "grad_norm": 1.1805953979492188, "learning_rate": 6.720647570629881e-06, "loss": 0.1872, "step": 27985 }, { "epoch": 0.6064611184538383, "grad_norm": 2.1384634971618652, "learning_rate": 6.717432516520142e-06, "loss": 0.1926, "step": 27990 }, { "epoch": 0.6065694537733192, "grad_norm": 1.2300665378570557, "learning_rate": 6.714217842646673e-06, "loss": 0.2065, "step": 27995 }, { "epoch": 0.6066777890928, "grad_norm": 1.7343194484710693, "learning_rate": 6.711003549381847e-06, "loss": 0.1989, "step": 28000 }, { "epoch": 0.6067861244122809, "grad_norm": 1.1314152479171753, "learning_rate": 6.707789637097989e-06, "loss": 0.2522, "step": 28005 }, { "epoch": 0.6068944597317617, "grad_norm": 1.174033284187317, "learning_rate": 6.704576106167384e-06, "loss": 0.2909, "step": 28010 }, { "epoch": 0.6070027950512427, "grad_norm": 2.03458571434021, "learning_rate": 6.70136295696227e-06, "loss": 0.3564, "step": 28015 }, { "epoch": 0.6071111303707235, "grad_norm": 1.4620435237884521, "learning_rate": 6.698150189854849e-06, "loss": 0.2949, "step": 28020 }, { "epoch": 0.6072194656902044, "grad_norm": 1.5887467861175537, "learning_rate": 6.694937805217263e-06, "loss": 0.2538, "step": 28025 }, { "epoch": 0.6073278010096852, "grad_norm": 1.2304095029830933, "learning_rate": 6.691725803421624e-06, "loss": 0.2439, "step": 28030 }, { "epoch": 0.607436136329166, "grad_norm": 2.1587488651275635, "learning_rate": 6.688514184839992e-06, "loss": 0.1675, "step": 28035 }, { "epoch": 0.6075444716486469, "grad_norm": 1.4515414237976074, "learning_rate": 6.685302949844386e-06, "loss": 0.2063, "step": 28040 }, { "epoch": 0.6076528069681277, "grad_norm": 1.3984224796295166, "learning_rate": 6.682092098806778e-06, "loss": 0.198, "step": 28045 }, { "epoch": 0.6077611422876086, "grad_norm": 1.7335405349731445, "learning_rate": 6.6788816320991e-06, "loss": 0.2018, "step": 28050 }, { "epoch": 0.6078694776070894, "grad_norm": 1.3890544176101685, "learning_rate": 6.675671550093234e-06, "loss": 0.306, "step": 28055 }, { "epoch": 0.6079778129265703, "grad_norm": 1.9523282051086426, "learning_rate": 6.672461853161021e-06, "loss": 0.2139, "step": 28060 }, { "epoch": 0.6080861482460512, "grad_norm": 1.4898505210876465, "learning_rate": 6.669252541674251e-06, "loss": 0.1252, "step": 28065 }, { "epoch": 0.6081944835655321, "grad_norm": 1.63486647605896, "learning_rate": 6.666043616004685e-06, "loss": 0.1919, "step": 28070 }, { "epoch": 0.6083028188850129, "grad_norm": 1.8481632471084595, "learning_rate": 6.662835076524021e-06, "loss": 0.3223, "step": 28075 }, { "epoch": 0.6084111542044938, "grad_norm": 1.8375827074050903, "learning_rate": 6.6596269236039236e-06, "loss": 0.221, "step": 28080 }, { "epoch": 0.6085194895239746, "grad_norm": 1.6358602046966553, "learning_rate": 6.656419157616008e-06, "loss": 0.2904, "step": 28085 }, { "epoch": 0.6086278248434555, "grad_norm": 2.2451348304748535, "learning_rate": 6.6532117789318475e-06, "loss": 0.2824, "step": 28090 }, { "epoch": 0.6087361601629363, "grad_norm": 2.064676523208618, "learning_rate": 6.650004787922966e-06, "loss": 0.2449, "step": 28095 }, { "epoch": 0.6088444954824171, "grad_norm": 1.3618731498718262, "learning_rate": 6.646798184960848e-06, "loss": 0.241, "step": 28100 }, { "epoch": 0.608952830801898, "grad_norm": 1.3672667741775513, "learning_rate": 6.643591970416928e-06, "loss": 0.167, "step": 28105 }, { "epoch": 0.6090611661213788, "grad_norm": 0.9726381897926331, "learning_rate": 6.6403861446626005e-06, "loss": 0.2169, "step": 28110 }, { "epoch": 0.6091695014408598, "grad_norm": 1.7987505197525024, "learning_rate": 6.637180708069208e-06, "loss": 0.1894, "step": 28115 }, { "epoch": 0.6092778367603406, "grad_norm": 2.023344039916992, "learning_rate": 6.633975661008059e-06, "loss": 0.29, "step": 28120 }, { "epoch": 0.6093861720798215, "grad_norm": 1.5288904905319214, "learning_rate": 6.630771003850405e-06, "loss": 0.2798, "step": 28125 }, { "epoch": 0.6094945073993023, "grad_norm": 1.6318763494491577, "learning_rate": 6.627566736967459e-06, "loss": 0.2173, "step": 28130 }, { "epoch": 0.6096028427187832, "grad_norm": 1.5756691694259644, "learning_rate": 6.624362860730389e-06, "loss": 0.2449, "step": 28135 }, { "epoch": 0.609711178038264, "grad_norm": 2.1323671340942383, "learning_rate": 6.6211593755103136e-06, "loss": 0.2566, "step": 28140 }, { "epoch": 0.6098195133577449, "grad_norm": 1.0059996843338013, "learning_rate": 6.617956281678309e-06, "loss": 0.268, "step": 28145 }, { "epoch": 0.6099278486772257, "grad_norm": 1.3843120336532593, "learning_rate": 6.6147535796054075e-06, "loss": 0.2564, "step": 28150 }, { "epoch": 0.6100361839967066, "grad_norm": 2.1239047050476074, "learning_rate": 6.611551269662594e-06, "loss": 0.2972, "step": 28155 }, { "epoch": 0.6101445193161875, "grad_norm": 1.5047270059585571, "learning_rate": 6.6083493522208005e-06, "loss": 0.1519, "step": 28160 }, { "epoch": 0.6102528546356684, "grad_norm": 3.081490993499756, "learning_rate": 6.605147827650933e-06, "loss": 0.3534, "step": 28165 }, { "epoch": 0.6103611899551492, "grad_norm": 1.9909560680389404, "learning_rate": 6.601946696323833e-06, "loss": 0.2028, "step": 28170 }, { "epoch": 0.61046952527463, "grad_norm": 1.6543759107589722, "learning_rate": 6.598745958610307e-06, "loss": 0.314, "step": 28175 }, { "epoch": 0.6105778605941109, "grad_norm": 0.8681895136833191, "learning_rate": 6.59554561488111e-06, "loss": 0.2489, "step": 28180 }, { "epoch": 0.6106861959135917, "grad_norm": 1.5082247257232666, "learning_rate": 6.592345665506956e-06, "loss": 0.2742, "step": 28185 }, { "epoch": 0.6107945312330726, "grad_norm": 2.6087498664855957, "learning_rate": 6.589146110858511e-06, "loss": 0.2527, "step": 28190 }, { "epoch": 0.6109028665525534, "grad_norm": 1.6550390720367432, "learning_rate": 6.585946951306393e-06, "loss": 0.2227, "step": 28195 }, { "epoch": 0.6110112018720343, "grad_norm": 1.3820552825927734, "learning_rate": 6.582748187221177e-06, "loss": 0.2951, "step": 28200 }, { "epoch": 0.6111195371915151, "grad_norm": 1.5867340564727783, "learning_rate": 6.5795498189733945e-06, "loss": 0.216, "step": 28205 }, { "epoch": 0.6112278725109961, "grad_norm": 1.802616000175476, "learning_rate": 6.576351846933522e-06, "loss": 0.3043, "step": 28210 }, { "epoch": 0.6113362078304769, "grad_norm": 1.5027740001678467, "learning_rate": 6.573154271472006e-06, "loss": 0.2994, "step": 28215 }, { "epoch": 0.6114445431499578, "grad_norm": 1.4130641222000122, "learning_rate": 6.569957092959234e-06, "loss": 0.2952, "step": 28220 }, { "epoch": 0.6115528784694386, "grad_norm": 1.0742394924163818, "learning_rate": 6.566760311765552e-06, "loss": 0.2262, "step": 28225 }, { "epoch": 0.6116612137889195, "grad_norm": 1.530881643295288, "learning_rate": 6.563563928261256e-06, "loss": 0.2245, "step": 28230 }, { "epoch": 0.6117695491084003, "grad_norm": 1.8398054838180542, "learning_rate": 6.560367942816602e-06, "loss": 0.3449, "step": 28235 }, { "epoch": 0.6118778844278812, "grad_norm": 1.3856533765792847, "learning_rate": 6.557172355801797e-06, "loss": 0.2748, "step": 28240 }, { "epoch": 0.611986219747362, "grad_norm": 1.5930908918380737, "learning_rate": 6.553977167586999e-06, "loss": 0.2338, "step": 28245 }, { "epoch": 0.6120945550668428, "grad_norm": 1.0908105373382568, "learning_rate": 6.550782378542325e-06, "loss": 0.2857, "step": 28250 }, { "epoch": 0.6122028903863237, "grad_norm": 2.0071334838867188, "learning_rate": 6.547587989037845e-06, "loss": 0.2365, "step": 28255 }, { "epoch": 0.6123112257058047, "grad_norm": 1.5901046991348267, "learning_rate": 6.5443939994435745e-06, "loss": 0.2428, "step": 28260 }, { "epoch": 0.6124195610252855, "grad_norm": 2.070517063140869, "learning_rate": 6.5412004101294975e-06, "loss": 0.2532, "step": 28265 }, { "epoch": 0.6125278963447663, "grad_norm": 1.0163244009017944, "learning_rate": 6.538007221465541e-06, "loss": 0.15, "step": 28270 }, { "epoch": 0.6126362316642472, "grad_norm": 1.2452661991119385, "learning_rate": 6.534814433821585e-06, "loss": 0.1792, "step": 28275 }, { "epoch": 0.612744566983728, "grad_norm": 1.869430422782898, "learning_rate": 6.531622047567472e-06, "loss": 0.2907, "step": 28280 }, { "epoch": 0.6128529023032089, "grad_norm": 1.9846009016036987, "learning_rate": 6.528430063072986e-06, "loss": 0.2777, "step": 28285 }, { "epoch": 0.6129612376226897, "grad_norm": 0.8414430618286133, "learning_rate": 6.5252384807078735e-06, "loss": 0.1955, "step": 28290 }, { "epoch": 0.6130695729421706, "grad_norm": 1.621403694152832, "learning_rate": 6.522047300841831e-06, "loss": 0.2848, "step": 28295 }, { "epoch": 0.6131779082616514, "grad_norm": 2.3056159019470215, "learning_rate": 6.518856523844509e-06, "loss": 0.2481, "step": 28300 }, { "epoch": 0.6132862435811324, "grad_norm": 2.2961690425872803, "learning_rate": 6.515666150085509e-06, "loss": 0.2617, "step": 28305 }, { "epoch": 0.6133945789006132, "grad_norm": 1.3214175701141357, "learning_rate": 6.5124761799343925e-06, "loss": 0.2339, "step": 28310 }, { "epoch": 0.6135029142200941, "grad_norm": 1.3228448629379272, "learning_rate": 6.509286613760668e-06, "loss": 0.2744, "step": 28315 }, { "epoch": 0.6136112495395749, "grad_norm": 1.8499835729599, "learning_rate": 6.5060974519337975e-06, "loss": 0.1685, "step": 28320 }, { "epoch": 0.6137195848590558, "grad_norm": 1.8196825981140137, "learning_rate": 6.502908694823198e-06, "loss": 0.2065, "step": 28325 }, { "epoch": 0.6138279201785366, "grad_norm": 1.221461534500122, "learning_rate": 6.499720342798241e-06, "loss": 0.2872, "step": 28330 }, { "epoch": 0.6139362554980174, "grad_norm": 1.3334708213806152, "learning_rate": 6.496532396228248e-06, "loss": 0.2647, "step": 28335 }, { "epoch": 0.6140445908174983, "grad_norm": 2.2570011615753174, "learning_rate": 6.493344855482495e-06, "loss": 0.3538, "step": 28340 }, { "epoch": 0.6141529261369791, "grad_norm": 1.466137409210205, "learning_rate": 6.49015772093021e-06, "loss": 0.3107, "step": 28345 }, { "epoch": 0.61426126145646, "grad_norm": 1.2961236238479614, "learning_rate": 6.486970992940576e-06, "loss": 0.1622, "step": 28350 }, { "epoch": 0.6143695967759409, "grad_norm": 2.1206867694854736, "learning_rate": 6.483784671882725e-06, "loss": 0.2742, "step": 28355 }, { "epoch": 0.6144779320954218, "grad_norm": 1.0742452144622803, "learning_rate": 6.480598758125749e-06, "loss": 0.2249, "step": 28360 }, { "epoch": 0.6145862674149026, "grad_norm": 1.434490442276001, "learning_rate": 6.477413252038687e-06, "loss": 0.2595, "step": 28365 }, { "epoch": 0.6146946027343835, "grad_norm": 2.040269374847412, "learning_rate": 6.474228153990532e-06, "loss": 0.2973, "step": 28370 }, { "epoch": 0.6148029380538643, "grad_norm": 1.9760783910751343, "learning_rate": 6.471043464350227e-06, "loss": 0.3378, "step": 28375 }, { "epoch": 0.6149112733733452, "grad_norm": 1.6650910377502441, "learning_rate": 6.467859183486673e-06, "loss": 0.2261, "step": 28380 }, { "epoch": 0.615019608692826, "grad_norm": 1.6999332904815674, "learning_rate": 6.464675311768723e-06, "loss": 0.1728, "step": 28385 }, { "epoch": 0.6151279440123069, "grad_norm": 1.3655898571014404, "learning_rate": 6.461491849565178e-06, "loss": 0.2562, "step": 28390 }, { "epoch": 0.6152362793317877, "grad_norm": 2.093400239944458, "learning_rate": 6.4583087972447986e-06, "loss": 0.2322, "step": 28395 }, { "epoch": 0.6153446146512687, "grad_norm": 1.1451265811920166, "learning_rate": 6.455126155176283e-06, "loss": 0.2285, "step": 28400 }, { "epoch": 0.6154529499707495, "grad_norm": 1.404942274093628, "learning_rate": 6.4519439237283045e-06, "loss": 0.1558, "step": 28405 }, { "epoch": 0.6155612852902304, "grad_norm": 1.2961201667785645, "learning_rate": 6.448762103269473e-06, "loss": 0.2781, "step": 28410 }, { "epoch": 0.6156696206097112, "grad_norm": 1.102197289466858, "learning_rate": 6.445580694168354e-06, "loss": 0.1511, "step": 28415 }, { "epoch": 0.615777955929192, "grad_norm": 1.426211953163147, "learning_rate": 6.442399696793466e-06, "loss": 0.2181, "step": 28420 }, { "epoch": 0.6158862912486729, "grad_norm": 1.5041775703430176, "learning_rate": 6.43921911151328e-06, "loss": 0.2177, "step": 28425 }, { "epoch": 0.6159946265681537, "grad_norm": 1.6986643075942993, "learning_rate": 6.436038938696223e-06, "loss": 0.1926, "step": 28430 }, { "epoch": 0.6161029618876346, "grad_norm": 1.431904673576355, "learning_rate": 6.432859178710663e-06, "loss": 0.3135, "step": 28435 }, { "epoch": 0.6162112972071154, "grad_norm": 1.197483777999878, "learning_rate": 6.4296798319249335e-06, "loss": 0.2452, "step": 28440 }, { "epoch": 0.6163196325265963, "grad_norm": 1.776692509651184, "learning_rate": 6.42650089870731e-06, "loss": 0.2984, "step": 28445 }, { "epoch": 0.6164279678460772, "grad_norm": 1.6415938138961792, "learning_rate": 6.423322379426023e-06, "loss": 0.2996, "step": 28450 }, { "epoch": 0.6165363031655581, "grad_norm": 2.323981523513794, "learning_rate": 6.420144274449264e-06, "loss": 0.3122, "step": 28455 }, { "epoch": 0.6166446384850389, "grad_norm": 2.0489628314971924, "learning_rate": 6.416966584145163e-06, "loss": 0.2436, "step": 28460 }, { "epoch": 0.6167529738045198, "grad_norm": 1.6918326616287231, "learning_rate": 6.413789308881812e-06, "loss": 0.3039, "step": 28465 }, { "epoch": 0.6168613091240006, "grad_norm": 1.064011812210083, "learning_rate": 6.410612449027249e-06, "loss": 0.3286, "step": 28470 }, { "epoch": 0.6169696444434815, "grad_norm": 1.4142533540725708, "learning_rate": 6.407436004949463e-06, "loss": 0.241, "step": 28475 }, { "epoch": 0.6170779797629623, "grad_norm": 1.506492018699646, "learning_rate": 6.404259977016398e-06, "loss": 0.239, "step": 28480 }, { "epoch": 0.6171863150824431, "grad_norm": 1.7347346544265747, "learning_rate": 6.401084365595952e-06, "loss": 0.2894, "step": 28485 }, { "epoch": 0.617294650401924, "grad_norm": 0.9594079256057739, "learning_rate": 6.397909171055969e-06, "loss": 0.262, "step": 28490 }, { "epoch": 0.6174029857214048, "grad_norm": 1.2858755588531494, "learning_rate": 6.39473439376425e-06, "loss": 0.2612, "step": 28495 }, { "epoch": 0.6175113210408858, "grad_norm": 1.580445408821106, "learning_rate": 6.391560034088542e-06, "loss": 0.2912, "step": 28500 }, { "epoch": 0.6176196563603666, "grad_norm": 1.568261981010437, "learning_rate": 6.388386092396554e-06, "loss": 0.2505, "step": 28505 }, { "epoch": 0.6177279916798475, "grad_norm": 1.355650544166565, "learning_rate": 6.385212569055934e-06, "loss": 0.2166, "step": 28510 }, { "epoch": 0.6178363269993283, "grad_norm": 1.258622407913208, "learning_rate": 6.382039464434287e-06, "loss": 0.2463, "step": 28515 }, { "epoch": 0.6179446623188092, "grad_norm": 1.4308973550796509, "learning_rate": 6.378866778899173e-06, "loss": 0.2913, "step": 28520 }, { "epoch": 0.61805299763829, "grad_norm": 1.3678172826766968, "learning_rate": 6.375694512818097e-06, "loss": 0.2142, "step": 28525 }, { "epoch": 0.6181613329577709, "grad_norm": 1.9413206577301025, "learning_rate": 6.372522666558519e-06, "loss": 0.3901, "step": 28530 }, { "epoch": 0.6182696682772517, "grad_norm": 1.5562744140625, "learning_rate": 6.369351240487851e-06, "loss": 0.2997, "step": 28535 }, { "epoch": 0.6183780035967326, "grad_norm": 1.4158532619476318, "learning_rate": 6.366180234973456e-06, "loss": 0.2477, "step": 28540 }, { "epoch": 0.6184863389162135, "grad_norm": 1.5727362632751465, "learning_rate": 6.363009650382642e-06, "loss": 0.1911, "step": 28545 }, { "epoch": 0.6185946742356944, "grad_norm": 1.147372841835022, "learning_rate": 6.359839487082682e-06, "loss": 0.2601, "step": 28550 }, { "epoch": 0.6187030095551752, "grad_norm": 1.8534812927246094, "learning_rate": 6.356669745440785e-06, "loss": 0.1867, "step": 28555 }, { "epoch": 0.618811344874656, "grad_norm": 1.5220588445663452, "learning_rate": 6.353500425824124e-06, "loss": 0.229, "step": 28560 }, { "epoch": 0.6189196801941369, "grad_norm": 1.2565608024597168, "learning_rate": 6.350331528599811e-06, "loss": 0.213, "step": 28565 }, { "epoch": 0.6190280155136177, "grad_norm": 1.4480347633361816, "learning_rate": 6.347163054134921e-06, "loss": 0.2757, "step": 28570 }, { "epoch": 0.6191363508330986, "grad_norm": 2.213529586791992, "learning_rate": 6.34399500279647e-06, "loss": 0.2254, "step": 28575 }, { "epoch": 0.6192446861525794, "grad_norm": 1.472571611404419, "learning_rate": 6.340827374951433e-06, "loss": 0.1366, "step": 28580 }, { "epoch": 0.6193530214720603, "grad_norm": 1.464855432510376, "learning_rate": 6.3376601709667285e-06, "loss": 0.2285, "step": 28585 }, { "epoch": 0.6194613567915411, "grad_norm": 1.6181347370147705, "learning_rate": 6.334493391209231e-06, "loss": 0.2467, "step": 28590 }, { "epoch": 0.6195696921110221, "grad_norm": 1.231478214263916, "learning_rate": 6.3313270360457644e-06, "loss": 0.246, "step": 28595 }, { "epoch": 0.6196780274305029, "grad_norm": 1.1940879821777344, "learning_rate": 6.328161105843105e-06, "loss": 0.1657, "step": 28600 }, { "epoch": 0.6197863627499838, "grad_norm": 1.7863918542861938, "learning_rate": 6.324995600967978e-06, "loss": 0.3366, "step": 28605 }, { "epoch": 0.6198946980694646, "grad_norm": 1.3794074058532715, "learning_rate": 6.321830521787058e-06, "loss": 0.2652, "step": 28610 }, { "epoch": 0.6200030333889455, "grad_norm": 2.1651906967163086, "learning_rate": 6.318665868666974e-06, "loss": 0.1902, "step": 28615 }, { "epoch": 0.6201113687084263, "grad_norm": 2.039372444152832, "learning_rate": 6.3155016419743025e-06, "loss": 0.3402, "step": 28620 }, { "epoch": 0.6202197040279072, "grad_norm": 2.041883945465088, "learning_rate": 6.312337842075571e-06, "loss": 0.2827, "step": 28625 }, { "epoch": 0.620328039347388, "grad_norm": 1.940909504890442, "learning_rate": 6.309174469337261e-06, "loss": 0.2425, "step": 28630 }, { "epoch": 0.6204363746668689, "grad_norm": 1.4953519105911255, "learning_rate": 6.306011524125799e-06, "loss": 0.1801, "step": 28635 }, { "epoch": 0.6205447099863497, "grad_norm": 1.2009245157241821, "learning_rate": 6.302849006807569e-06, "loss": 0.1965, "step": 28640 }, { "epoch": 0.6206530453058307, "grad_norm": 1.3517578840255737, "learning_rate": 6.2996869177488905e-06, "loss": 0.2221, "step": 28645 }, { "epoch": 0.6207613806253115, "grad_norm": 1.5734769105911255, "learning_rate": 6.296525257316057e-06, "loss": 0.1801, "step": 28650 }, { "epoch": 0.6208697159447923, "grad_norm": 1.638298511505127, "learning_rate": 6.293364025875295e-06, "loss": 0.1941, "step": 28655 }, { "epoch": 0.6209780512642732, "grad_norm": 1.4169127941131592, "learning_rate": 6.290203223792785e-06, "loss": 0.2652, "step": 28660 }, { "epoch": 0.621086386583754, "grad_norm": 1.5727213621139526, "learning_rate": 6.287042851434657e-06, "loss": 0.2722, "step": 28665 }, { "epoch": 0.6211947219032349, "grad_norm": 1.4886525869369507, "learning_rate": 6.283882909166996e-06, "loss": 0.1745, "step": 28670 }, { "epoch": 0.6213030572227157, "grad_norm": 1.7571864128112793, "learning_rate": 6.280723397355834e-06, "loss": 0.2554, "step": 28675 }, { "epoch": 0.6214113925421966, "grad_norm": 1.831283688545227, "learning_rate": 6.277564316367151e-06, "loss": 0.2846, "step": 28680 }, { "epoch": 0.6215197278616774, "grad_norm": 1.5062103271484375, "learning_rate": 6.27440566656688e-06, "loss": 0.2846, "step": 28685 }, { "epoch": 0.6216280631811584, "grad_norm": 1.4671986103057861, "learning_rate": 6.271247448320897e-06, "loss": 0.2714, "step": 28690 }, { "epoch": 0.6217363985006392, "grad_norm": 1.2440983057022095, "learning_rate": 6.268089661995046e-06, "loss": 0.263, "step": 28695 }, { "epoch": 0.6218447338201201, "grad_norm": 1.919029951095581, "learning_rate": 6.264932307955103e-06, "loss": 0.2148, "step": 28700 }, { "epoch": 0.6219530691396009, "grad_norm": 1.2078638076782227, "learning_rate": 6.261775386566802e-06, "loss": 0.1919, "step": 28705 }, { "epoch": 0.6220614044590818, "grad_norm": 1.5116585493087769, "learning_rate": 6.258618898195824e-06, "loss": 0.2628, "step": 28710 }, { "epoch": 0.6221697397785626, "grad_norm": 1.1737223863601685, "learning_rate": 6.2554628432078006e-06, "loss": 0.296, "step": 28715 }, { "epoch": 0.6222780750980434, "grad_norm": 1.479063868522644, "learning_rate": 6.252307221968312e-06, "loss": 0.2112, "step": 28720 }, { "epoch": 0.6223864104175243, "grad_norm": 1.597185492515564, "learning_rate": 6.2491520348428915e-06, "loss": 0.2828, "step": 28725 }, { "epoch": 0.6224947457370051, "grad_norm": 1.51027250289917, "learning_rate": 6.245997282197021e-06, "loss": 0.198, "step": 28730 }, { "epoch": 0.622603081056486, "grad_norm": 0.8552813529968262, "learning_rate": 6.2428429643961295e-06, "loss": 0.2538, "step": 28735 }, { "epoch": 0.6227114163759669, "grad_norm": 1.3368046283721924, "learning_rate": 6.2396890818055955e-06, "loss": 0.1826, "step": 28740 }, { "epoch": 0.6228197516954478, "grad_norm": 0.8736948370933533, "learning_rate": 6.236535634790757e-06, "loss": 0.3565, "step": 28745 }, { "epoch": 0.6229280870149286, "grad_norm": 1.6877520084381104, "learning_rate": 6.233382623716886e-06, "loss": 0.2738, "step": 28750 }, { "epoch": 0.6230364223344095, "grad_norm": 1.4134578704833984, "learning_rate": 6.2302300489492154e-06, "loss": 0.3039, "step": 28755 }, { "epoch": 0.6231447576538903, "grad_norm": 1.7579914331436157, "learning_rate": 6.227077910852921e-06, "loss": 0.3737, "step": 28760 }, { "epoch": 0.6232530929733712, "grad_norm": 1.6213409900665283, "learning_rate": 6.223926209793134e-06, "loss": 0.2947, "step": 28765 }, { "epoch": 0.623361428292852, "grad_norm": 1.2771046161651611, "learning_rate": 6.220774946134928e-06, "loss": 0.2218, "step": 28770 }, { "epoch": 0.6234697636123329, "grad_norm": 1.017351746559143, "learning_rate": 6.217624120243334e-06, "loss": 0.1676, "step": 28775 }, { "epoch": 0.6235780989318137, "grad_norm": 1.608508586883545, "learning_rate": 6.214473732483324e-06, "loss": 0.1545, "step": 28780 }, { "epoch": 0.6236864342512946, "grad_norm": 1.0946911573410034, "learning_rate": 6.211323783219826e-06, "loss": 0.1727, "step": 28785 }, { "epoch": 0.6237947695707755, "grad_norm": 1.6157488822937012, "learning_rate": 6.208174272817711e-06, "loss": 0.2929, "step": 28790 }, { "epoch": 0.6239031048902564, "grad_norm": 1.7367908954620361, "learning_rate": 6.205025201641806e-06, "loss": 0.2133, "step": 28795 }, { "epoch": 0.6240114402097372, "grad_norm": 1.565476417541504, "learning_rate": 6.2018765700568836e-06, "loss": 0.3361, "step": 28800 }, { "epoch": 0.624119775529218, "grad_norm": 1.476630449295044, "learning_rate": 6.198728378427665e-06, "loss": 0.2018, "step": 28805 }, { "epoch": 0.6242281108486989, "grad_norm": 1.4616594314575195, "learning_rate": 6.19558062711882e-06, "loss": 0.2251, "step": 28810 }, { "epoch": 0.6243364461681797, "grad_norm": 1.7141770124435425, "learning_rate": 6.19243331649497e-06, "loss": 0.316, "step": 28815 }, { "epoch": 0.6244447814876606, "grad_norm": 1.7405225038528442, "learning_rate": 6.189286446920681e-06, "loss": 0.1559, "step": 28820 }, { "epoch": 0.6245531168071414, "grad_norm": 1.9163202047348022, "learning_rate": 6.186140018760475e-06, "loss": 0.2089, "step": 28825 }, { "epoch": 0.6246614521266223, "grad_norm": 1.0616836547851562, "learning_rate": 6.1829940323788155e-06, "loss": 0.2827, "step": 28830 }, { "epoch": 0.6247697874461032, "grad_norm": 1.5695677995681763, "learning_rate": 6.179848488140116e-06, "loss": 0.2084, "step": 28835 }, { "epoch": 0.6248781227655841, "grad_norm": 1.9910202026367188, "learning_rate": 6.176703386408744e-06, "loss": 0.2982, "step": 28840 }, { "epoch": 0.6249864580850649, "grad_norm": 1.1845457553863525, "learning_rate": 6.173558727549014e-06, "loss": 0.2606, "step": 28845 }, { "epoch": 0.6250947934045458, "grad_norm": 1.3641401529312134, "learning_rate": 6.1704145119251845e-06, "loss": 0.3035, "step": 28850 }, { "epoch": 0.6252031287240266, "grad_norm": 1.2462066411972046, "learning_rate": 6.167270739901468e-06, "loss": 0.2296, "step": 28855 }, { "epoch": 0.6253114640435075, "grad_norm": 1.1272704601287842, "learning_rate": 6.164127411842021e-06, "loss": 0.2489, "step": 28860 }, { "epoch": 0.6254197993629883, "grad_norm": 0.9346657991409302, "learning_rate": 6.16098452811095e-06, "loss": 0.2495, "step": 28865 }, { "epoch": 0.6255281346824692, "grad_norm": 2.0920298099517822, "learning_rate": 6.157842089072315e-06, "loss": 0.2915, "step": 28870 }, { "epoch": 0.62563647000195, "grad_norm": 1.4395687580108643, "learning_rate": 6.154700095090118e-06, "loss": 0.2595, "step": 28875 }, { "epoch": 0.6257448053214308, "grad_norm": 2.0438249111175537, "learning_rate": 6.1515585465283155e-06, "loss": 0.3565, "step": 28880 }, { "epoch": 0.6258531406409118, "grad_norm": 1.5815553665161133, "learning_rate": 6.148417443750798e-06, "loss": 0.2234, "step": 28885 }, { "epoch": 0.6259614759603926, "grad_norm": 1.301677942276001, "learning_rate": 6.145276787121428e-06, "loss": 0.3279, "step": 28890 }, { "epoch": 0.6260698112798735, "grad_norm": 1.6167595386505127, "learning_rate": 6.142136577003997e-06, "loss": 0.2484, "step": 28895 }, { "epoch": 0.6261781465993543, "grad_norm": 1.603577971458435, "learning_rate": 6.1389968137622545e-06, "loss": 0.2207, "step": 28900 }, { "epoch": 0.6262864819188352, "grad_norm": 0.9941624402999878, "learning_rate": 6.135857497759893e-06, "loss": 0.3271, "step": 28905 }, { "epoch": 0.626394817238316, "grad_norm": 1.5173678398132324, "learning_rate": 6.132718629360554e-06, "loss": 0.2046, "step": 28910 }, { "epoch": 0.6265031525577969, "grad_norm": 1.6373393535614014, "learning_rate": 6.129580208927835e-06, "loss": 0.3004, "step": 28915 }, { "epoch": 0.6266114878772777, "grad_norm": 1.7161146402359009, "learning_rate": 6.126442236825265e-06, "loss": 0.2272, "step": 28920 }, { "epoch": 0.6267198231967586, "grad_norm": 0.7520221471786499, "learning_rate": 6.123304713416338e-06, "loss": 0.2307, "step": 28925 }, { "epoch": 0.6268281585162395, "grad_norm": 1.3608371019363403, "learning_rate": 6.1201676390644815e-06, "loss": 0.2563, "step": 28930 }, { "epoch": 0.6269364938357204, "grad_norm": 1.4840452671051025, "learning_rate": 6.117031014133088e-06, "loss": 0.217, "step": 28935 }, { "epoch": 0.6270448291552012, "grad_norm": 1.8409385681152344, "learning_rate": 6.113894838985486e-06, "loss": 0.2473, "step": 28940 }, { "epoch": 0.6271531644746821, "grad_norm": 1.7810254096984863, "learning_rate": 6.110759113984952e-06, "loss": 0.234, "step": 28945 }, { "epoch": 0.6272614997941629, "grad_norm": 1.2521535158157349, "learning_rate": 6.107623839494716e-06, "loss": 0.182, "step": 28950 }, { "epoch": 0.6273698351136437, "grad_norm": 2.036756992340088, "learning_rate": 6.104489015877949e-06, "loss": 0.2595, "step": 28955 }, { "epoch": 0.6274781704331246, "grad_norm": 1.5750772953033447, "learning_rate": 6.101354643497775e-06, "loss": 0.2634, "step": 28960 }, { "epoch": 0.6275865057526054, "grad_norm": 1.2377899885177612, "learning_rate": 6.098220722717263e-06, "loss": 0.2073, "step": 28965 }, { "epoch": 0.6276948410720863, "grad_norm": 1.3678869009017944, "learning_rate": 6.095087253899432e-06, "loss": 0.2807, "step": 28970 }, { "epoch": 0.6278031763915671, "grad_norm": 1.1258676052093506, "learning_rate": 6.091954237407247e-06, "loss": 0.3589, "step": 28975 }, { "epoch": 0.6279115117110481, "grad_norm": 1.5226227045059204, "learning_rate": 6.088821673603616e-06, "loss": 0.1543, "step": 28980 }, { "epoch": 0.6280198470305289, "grad_norm": 1.783119797706604, "learning_rate": 6.0856895628514105e-06, "loss": 0.2592, "step": 28985 }, { "epoch": 0.6281281823500098, "grad_norm": 1.3614667654037476, "learning_rate": 6.082557905513433e-06, "loss": 0.3354, "step": 28990 }, { "epoch": 0.6282365176694906, "grad_norm": 1.5556564331054688, "learning_rate": 6.079426701952438e-06, "loss": 0.2329, "step": 28995 }, { "epoch": 0.6283448529889715, "grad_norm": 1.164333462715149, "learning_rate": 6.07629595253113e-06, "loss": 0.1826, "step": 29000 }, { "epoch": 0.6284531883084523, "grad_norm": 1.7625508308410645, "learning_rate": 6.0731656576121575e-06, "loss": 0.177, "step": 29005 }, { "epoch": 0.6285615236279332, "grad_norm": 1.8126810789108276, "learning_rate": 6.0700358175581195e-06, "loss": 0.2545, "step": 29010 }, { "epoch": 0.628669858947414, "grad_norm": 1.4335423707962036, "learning_rate": 6.066906432731563e-06, "loss": 0.1926, "step": 29015 }, { "epoch": 0.6287781942668949, "grad_norm": 1.6805596351623535, "learning_rate": 6.063777503494976e-06, "loss": 0.2605, "step": 29020 }, { "epoch": 0.6288865295863757, "grad_norm": 1.6426053047180176, "learning_rate": 6.060649030210803e-06, "loss": 0.2852, "step": 29025 }, { "epoch": 0.6289948649058567, "grad_norm": 1.1960862874984741, "learning_rate": 6.0575210132414255e-06, "loss": 0.3167, "step": 29030 }, { "epoch": 0.6291032002253375, "grad_norm": 1.6252144575119019, "learning_rate": 6.054393452949181e-06, "loss": 0.2333, "step": 29035 }, { "epoch": 0.6292115355448183, "grad_norm": 1.3830554485321045, "learning_rate": 6.051266349696352e-06, "loss": 0.2577, "step": 29040 }, { "epoch": 0.6293198708642992, "grad_norm": 1.0623154640197754, "learning_rate": 6.048139703845161e-06, "loss": 0.1742, "step": 29045 }, { "epoch": 0.62942820618378, "grad_norm": 1.3070800304412842, "learning_rate": 6.045013515757789e-06, "loss": 0.2332, "step": 29050 }, { "epoch": 0.6295365415032609, "grad_norm": 1.4518851041793823, "learning_rate": 6.041887785796356e-06, "loss": 0.2266, "step": 29055 }, { "epoch": 0.6296448768227417, "grad_norm": 0.5003889203071594, "learning_rate": 6.03876251432293e-06, "loss": 0.2078, "step": 29060 }, { "epoch": 0.6297532121422226, "grad_norm": 1.5571231842041016, "learning_rate": 6.035637701699527e-06, "loss": 0.2279, "step": 29065 }, { "epoch": 0.6298615474617034, "grad_norm": 0.8486309051513672, "learning_rate": 6.032513348288111e-06, "loss": 0.1594, "step": 29070 }, { "epoch": 0.6299698827811844, "grad_norm": 1.5363632440567017, "learning_rate": 6.029389454450587e-06, "loss": 0.1743, "step": 29075 }, { "epoch": 0.6300782181006652, "grad_norm": 0.966700553894043, "learning_rate": 6.026266020548817e-06, "loss": 0.161, "step": 29080 }, { "epoch": 0.6301865534201461, "grad_norm": 1.786474347114563, "learning_rate": 6.023143046944604e-06, "loss": 0.224, "step": 29085 }, { "epoch": 0.6302948887396269, "grad_norm": 2.0021419525146484, "learning_rate": 6.020020533999694e-06, "loss": 0.261, "step": 29090 }, { "epoch": 0.6304032240591078, "grad_norm": 1.697759985923767, "learning_rate": 6.016898482075786e-06, "loss": 0.3036, "step": 29095 }, { "epoch": 0.6305115593785886, "grad_norm": 0.8664216995239258, "learning_rate": 6.013776891534522e-06, "loss": 0.2208, "step": 29100 }, { "epoch": 0.6306198946980694, "grad_norm": 1.4860374927520752, "learning_rate": 6.01065576273749e-06, "loss": 0.2196, "step": 29105 }, { "epoch": 0.6307282300175503, "grad_norm": 1.265153408050537, "learning_rate": 6.0075350960462284e-06, "loss": 0.2421, "step": 29110 }, { "epoch": 0.6308365653370311, "grad_norm": 1.8268394470214844, "learning_rate": 6.004414891822219e-06, "loss": 0.3181, "step": 29115 }, { "epoch": 0.630944900656512, "grad_norm": 1.8495879173278809, "learning_rate": 6.001295150426891e-06, "loss": 0.2262, "step": 29120 }, { "epoch": 0.6310532359759929, "grad_norm": 1.2490441799163818, "learning_rate": 5.998175872221616e-06, "loss": 0.2642, "step": 29125 }, { "epoch": 0.6311615712954738, "grad_norm": 2.0151093006134033, "learning_rate": 5.995057057567721e-06, "loss": 0.2748, "step": 29130 }, { "epoch": 0.6312699066149546, "grad_norm": 1.6446812152862549, "learning_rate": 5.9919387068264735e-06, "loss": 0.3382, "step": 29135 }, { "epoch": 0.6313782419344355, "grad_norm": 1.2238386869430542, "learning_rate": 5.988820820359085e-06, "loss": 0.1994, "step": 29140 }, { "epoch": 0.6314865772539163, "grad_norm": 1.1114341020584106, "learning_rate": 5.985703398526716e-06, "loss": 0.2101, "step": 29145 }, { "epoch": 0.6315949125733972, "grad_norm": 1.789461374282837, "learning_rate": 5.982586441690475e-06, "loss": 0.2422, "step": 29150 }, { "epoch": 0.631703247892878, "grad_norm": 0.9749012589454651, "learning_rate": 5.979469950211414e-06, "loss": 0.232, "step": 29155 }, { "epoch": 0.6318115832123589, "grad_norm": 1.8357460498809814, "learning_rate": 5.9763539244505345e-06, "loss": 0.1652, "step": 29160 }, { "epoch": 0.6319199185318397, "grad_norm": 1.5643134117126465, "learning_rate": 5.973238364768776e-06, "loss": 0.2645, "step": 29165 }, { "epoch": 0.6320282538513206, "grad_norm": 1.4106011390686035, "learning_rate": 5.970123271527031e-06, "loss": 0.2741, "step": 29170 }, { "epoch": 0.6321365891708015, "grad_norm": 1.1930736303329468, "learning_rate": 5.9670086450861345e-06, "loss": 0.2726, "step": 29175 }, { "epoch": 0.6322449244902824, "grad_norm": 1.2215287685394287, "learning_rate": 5.963894485806876e-06, "loss": 0.22, "step": 29180 }, { "epoch": 0.6323532598097632, "grad_norm": 1.6292593479156494, "learning_rate": 5.960780794049981e-06, "loss": 0.2952, "step": 29185 }, { "epoch": 0.632461595129244, "grad_norm": 1.4328088760375977, "learning_rate": 5.957667570176122e-06, "loss": 0.2977, "step": 29190 }, { "epoch": 0.6325699304487249, "grad_norm": 1.4597687721252441, "learning_rate": 5.954554814545925e-06, "loss": 0.3058, "step": 29195 }, { "epoch": 0.6326782657682057, "grad_norm": 1.1710929870605469, "learning_rate": 5.951442527519949e-06, "loss": 0.1797, "step": 29200 }, { "epoch": 0.6327866010876866, "grad_norm": 1.4556267261505127, "learning_rate": 5.948330709458709e-06, "loss": 0.1361, "step": 29205 }, { "epoch": 0.6328949364071674, "grad_norm": 1.3086541891098022, "learning_rate": 5.945219360722662e-06, "loss": 0.2273, "step": 29210 }, { "epoch": 0.6330032717266483, "grad_norm": 1.267539620399475, "learning_rate": 5.94210848167221e-06, "loss": 0.2107, "step": 29215 }, { "epoch": 0.6331116070461292, "grad_norm": 1.5989689826965332, "learning_rate": 5.938998072667701e-06, "loss": 0.3061, "step": 29220 }, { "epoch": 0.6332199423656101, "grad_norm": 1.6031744480133057, "learning_rate": 5.935888134069436e-06, "loss": 0.3436, "step": 29225 }, { "epoch": 0.6333282776850909, "grad_norm": 1.1212849617004395, "learning_rate": 5.932778666237651e-06, "loss": 0.2123, "step": 29230 }, { "epoch": 0.6334366130045718, "grad_norm": 0.8585649728775024, "learning_rate": 5.92966966953253e-06, "loss": 0.3155, "step": 29235 }, { "epoch": 0.6335449483240526, "grad_norm": 1.196063756942749, "learning_rate": 5.9265611443142024e-06, "loss": 0.1861, "step": 29240 }, { "epoch": 0.6336532836435335, "grad_norm": 1.598299264907837, "learning_rate": 5.923453090942747e-06, "loss": 0.329, "step": 29245 }, { "epoch": 0.6337616189630143, "grad_norm": 1.417505145072937, "learning_rate": 5.920345509778183e-06, "loss": 0.3188, "step": 29250 }, { "epoch": 0.6338699542824952, "grad_norm": 2.0054056644439697, "learning_rate": 5.91723840118048e-06, "loss": 0.2573, "step": 29255 }, { "epoch": 0.633978289601976, "grad_norm": 1.5790691375732422, "learning_rate": 5.9141317655095485e-06, "loss": 0.2004, "step": 29260 }, { "epoch": 0.6340866249214568, "grad_norm": 2.063328266143799, "learning_rate": 5.911025603125245e-06, "loss": 0.3325, "step": 29265 }, { "epoch": 0.6341949602409378, "grad_norm": 1.4460439682006836, "learning_rate": 5.907919914387371e-06, "loss": 0.1689, "step": 29270 }, { "epoch": 0.6343032955604186, "grad_norm": 1.8892695903778076, "learning_rate": 5.904814699655678e-06, "loss": 0.2674, "step": 29275 }, { "epoch": 0.6344116308798995, "grad_norm": 2.884472131729126, "learning_rate": 5.9017099592898555e-06, "loss": 0.2125, "step": 29280 }, { "epoch": 0.6345199661993803, "grad_norm": 0.8586745858192444, "learning_rate": 5.898605693649542e-06, "loss": 0.2404, "step": 29285 }, { "epoch": 0.6346283015188612, "grad_norm": 1.6699239015579224, "learning_rate": 5.895501903094319e-06, "loss": 0.2517, "step": 29290 }, { "epoch": 0.634736636838342, "grad_norm": 1.7336194515228271, "learning_rate": 5.892398587983717e-06, "loss": 0.2682, "step": 29295 }, { "epoch": 0.6348449721578229, "grad_norm": 2.4215853214263916, "learning_rate": 5.889295748677206e-06, "loss": 0.2261, "step": 29300 }, { "epoch": 0.6349533074773037, "grad_norm": 1.2674635648727417, "learning_rate": 5.886193385534204e-06, "loss": 0.2512, "step": 29305 }, { "epoch": 0.6350616427967846, "grad_norm": 1.1445802450180054, "learning_rate": 5.883091498914073e-06, "loss": 0.2039, "step": 29310 }, { "epoch": 0.6351699781162654, "grad_norm": 1.3553197383880615, "learning_rate": 5.8799900891761206e-06, "loss": 0.17, "step": 29315 }, { "epoch": 0.6352783134357464, "grad_norm": 1.7592816352844238, "learning_rate": 5.876889156679597e-06, "loss": 0.3037, "step": 29320 }, { "epoch": 0.6353866487552272, "grad_norm": 2.161968469619751, "learning_rate": 5.873788701783703e-06, "loss": 0.134, "step": 29325 }, { "epoch": 0.6354949840747081, "grad_norm": 1.9285346269607544, "learning_rate": 5.8706887248475765e-06, "loss": 0.2501, "step": 29330 }, { "epoch": 0.6356033193941889, "grad_norm": 2.4335951805114746, "learning_rate": 5.867589226230305e-06, "loss": 0.2397, "step": 29335 }, { "epoch": 0.6357116547136697, "grad_norm": 2.374086856842041, "learning_rate": 5.864490206290917e-06, "loss": 0.2327, "step": 29340 }, { "epoch": 0.6358199900331506, "grad_norm": 2.345083713531494, "learning_rate": 5.861391665388389e-06, "loss": 0.3054, "step": 29345 }, { "epoch": 0.6359283253526314, "grad_norm": 1.535819411277771, "learning_rate": 5.858293603881641e-06, "loss": 0.3051, "step": 29350 }, { "epoch": 0.6360366606721123, "grad_norm": 2.273582696914673, "learning_rate": 5.855196022129535e-06, "loss": 0.2684, "step": 29355 }, { "epoch": 0.6361449959915931, "grad_norm": 1.0173231363296509, "learning_rate": 5.852098920490881e-06, "loss": 0.2286, "step": 29360 }, { "epoch": 0.6362533313110741, "grad_norm": 1.3436987400054932, "learning_rate": 5.84900229932443e-06, "loss": 0.2392, "step": 29365 }, { "epoch": 0.6363616666305549, "grad_norm": 1.4641019105911255, "learning_rate": 5.845906158988883e-06, "loss": 0.29, "step": 29370 }, { "epoch": 0.6364700019500358, "grad_norm": 1.3411997556686401, "learning_rate": 5.842810499842878e-06, "loss": 0.2763, "step": 29375 }, { "epoch": 0.6365783372695166, "grad_norm": 1.2217854261398315, "learning_rate": 5.839715322245003e-06, "loss": 0.1718, "step": 29380 }, { "epoch": 0.6366866725889975, "grad_norm": 1.3378294706344604, "learning_rate": 5.836620626553786e-06, "loss": 0.2511, "step": 29385 }, { "epoch": 0.6367950079084783, "grad_norm": 1.2280604839324951, "learning_rate": 5.833526413127704e-06, "loss": 0.1845, "step": 29390 }, { "epoch": 0.6369033432279592, "grad_norm": 1.6225063800811768, "learning_rate": 5.83043268232517e-06, "loss": 0.2562, "step": 29395 }, { "epoch": 0.63701167854744, "grad_norm": 1.4770506620407104, "learning_rate": 5.827339434504553e-06, "loss": 0.272, "step": 29400 }, { "epoch": 0.6371200138669209, "grad_norm": 1.7803633213043213, "learning_rate": 5.824246670024152e-06, "loss": 0.258, "step": 29405 }, { "epoch": 0.6372283491864017, "grad_norm": 1.289625883102417, "learning_rate": 5.821154389242226e-06, "loss": 0.2515, "step": 29410 }, { "epoch": 0.6373366845058827, "grad_norm": 1.0695457458496094, "learning_rate": 5.818062592516961e-06, "loss": 0.1981, "step": 29415 }, { "epoch": 0.6374450198253635, "grad_norm": 1.232901692390442, "learning_rate": 5.814971280206498e-06, "loss": 0.1691, "step": 29420 }, { "epoch": 0.6375533551448443, "grad_norm": 1.3664624691009521, "learning_rate": 5.8118804526689265e-06, "loss": 0.1725, "step": 29425 }, { "epoch": 0.6376616904643252, "grad_norm": 1.3428642749786377, "learning_rate": 5.808790110262261e-06, "loss": 0.2107, "step": 29430 }, { "epoch": 0.637770025783806, "grad_norm": 1.2038493156433105, "learning_rate": 5.805700253344482e-06, "loss": 0.1938, "step": 29435 }, { "epoch": 0.6378783611032869, "grad_norm": 1.3901058435440063, "learning_rate": 5.802610882273494e-06, "loss": 0.2391, "step": 29440 }, { "epoch": 0.6379866964227677, "grad_norm": 1.8975262641906738, "learning_rate": 5.7995219974071605e-06, "loss": 0.2848, "step": 29445 }, { "epoch": 0.6380950317422486, "grad_norm": 1.1255675554275513, "learning_rate": 5.796433599103278e-06, "loss": 0.2736, "step": 29450 }, { "epoch": 0.6382033670617294, "grad_norm": 1.5238838195800781, "learning_rate": 5.793345687719598e-06, "loss": 0.1832, "step": 29455 }, { "epoch": 0.6383117023812104, "grad_norm": 1.513026475906372, "learning_rate": 5.790258263613799e-06, "loss": 0.3286, "step": 29460 }, { "epoch": 0.6384200377006912, "grad_norm": 1.062589406967163, "learning_rate": 5.787171327143519e-06, "loss": 0.2695, "step": 29465 }, { "epoch": 0.6385283730201721, "grad_norm": 1.237923502922058, "learning_rate": 5.784084878666337e-06, "loss": 0.2281, "step": 29470 }, { "epoch": 0.6386367083396529, "grad_norm": 1.695899486541748, "learning_rate": 5.780998918539763e-06, "loss": 0.325, "step": 29475 }, { "epoch": 0.6387450436591338, "grad_norm": 1.723618745803833, "learning_rate": 5.777913447121268e-06, "loss": 0.2412, "step": 29480 }, { "epoch": 0.6388533789786146, "grad_norm": 1.176019549369812, "learning_rate": 5.77482846476825e-06, "loss": 0.2472, "step": 29485 }, { "epoch": 0.6389617142980955, "grad_norm": 1.086012363433838, "learning_rate": 5.771743971838063e-06, "loss": 0.2135, "step": 29490 }, { "epoch": 0.6390700496175763, "grad_norm": 1.5565954446792603, "learning_rate": 5.768659968687995e-06, "loss": 0.2413, "step": 29495 }, { "epoch": 0.6391783849370571, "grad_norm": 1.2853277921676636, "learning_rate": 5.765576455675288e-06, "loss": 0.224, "step": 29500 }, { "epoch": 0.639286720256538, "grad_norm": 2.31817364692688, "learning_rate": 5.762493433157117e-06, "loss": 0.2456, "step": 29505 }, { "epoch": 0.639395055576019, "grad_norm": 1.5396236181259155, "learning_rate": 5.759410901490596e-06, "loss": 0.2501, "step": 29510 }, { "epoch": 0.6395033908954998, "grad_norm": 1.5019972324371338, "learning_rate": 5.7563288610328046e-06, "loss": 0.2828, "step": 29515 }, { "epoch": 0.6396117262149806, "grad_norm": 1.2519749402999878, "learning_rate": 5.753247312140743e-06, "loss": 0.2276, "step": 29520 }, { "epoch": 0.6397200615344615, "grad_norm": 1.633832573890686, "learning_rate": 5.750166255171366e-06, "loss": 0.2976, "step": 29525 }, { "epoch": 0.6398283968539423, "grad_norm": 1.5263750553131104, "learning_rate": 5.747085690481563e-06, "loss": 0.2732, "step": 29530 }, { "epoch": 0.6399367321734232, "grad_norm": 0.8207868933677673, "learning_rate": 5.744005618428178e-06, "loss": 0.2486, "step": 29535 }, { "epoch": 0.640045067492904, "grad_norm": 1.491325855255127, "learning_rate": 5.740926039367986e-06, "loss": 0.2602, "step": 29540 }, { "epoch": 0.6401534028123849, "grad_norm": 1.7571983337402344, "learning_rate": 5.737846953657708e-06, "loss": 0.2506, "step": 29545 }, { "epoch": 0.6402617381318657, "grad_norm": 1.3332823514938354, "learning_rate": 5.734768361654019e-06, "loss": 0.2577, "step": 29550 }, { "epoch": 0.6403700734513466, "grad_norm": 1.81779944896698, "learning_rate": 5.7316902637135165e-06, "loss": 0.2767, "step": 29555 }, { "epoch": 0.6404784087708275, "grad_norm": 1.084287166595459, "learning_rate": 5.728612660192759e-06, "loss": 0.2746, "step": 29560 }, { "epoch": 0.6405867440903084, "grad_norm": 1.6025021076202393, "learning_rate": 5.725535551448238e-06, "loss": 0.2611, "step": 29565 }, { "epoch": 0.6406950794097892, "grad_norm": 1.541390061378479, "learning_rate": 5.722458937836396e-06, "loss": 0.2181, "step": 29570 }, { "epoch": 0.64080341472927, "grad_norm": 1.7693675756454468, "learning_rate": 5.719382819713608e-06, "loss": 0.4228, "step": 29575 }, { "epoch": 0.6409117500487509, "grad_norm": 1.4892288446426392, "learning_rate": 5.716307197436195e-06, "loss": 0.2581, "step": 29580 }, { "epoch": 0.6410200853682317, "grad_norm": 0.8943405747413635, "learning_rate": 5.713232071360426e-06, "loss": 0.1804, "step": 29585 }, { "epoch": 0.6411284206877126, "grad_norm": 0.9724084138870239, "learning_rate": 5.710157441842501e-06, "loss": 0.1566, "step": 29590 }, { "epoch": 0.6412367560071934, "grad_norm": 1.0678598880767822, "learning_rate": 5.707083309238579e-06, "loss": 0.2217, "step": 29595 }, { "epoch": 0.6413450913266743, "grad_norm": 0.9942871928215027, "learning_rate": 5.7040096739047445e-06, "loss": 0.1863, "step": 29600 }, { "epoch": 0.6414534266461552, "grad_norm": 1.7890459299087524, "learning_rate": 5.7009365361970346e-06, "loss": 0.2986, "step": 29605 }, { "epoch": 0.6415617619656361, "grad_norm": 1.2835747003555298, "learning_rate": 5.697863896471432e-06, "loss": 0.2213, "step": 29610 }, { "epoch": 0.6416700972851169, "grad_norm": 2.087292432785034, "learning_rate": 5.694791755083846e-06, "loss": 0.2017, "step": 29615 }, { "epoch": 0.6417784326045978, "grad_norm": 1.501060962677002, "learning_rate": 5.691720112390147e-06, "loss": 0.269, "step": 29620 }, { "epoch": 0.6418867679240786, "grad_norm": 1.2566708326339722, "learning_rate": 5.688648968746131e-06, "loss": 0.322, "step": 29625 }, { "epoch": 0.6419951032435595, "grad_norm": 1.700946569442749, "learning_rate": 5.685578324507552e-06, "loss": 0.2683, "step": 29630 }, { "epoch": 0.6421034385630403, "grad_norm": 1.41099214553833, "learning_rate": 5.6825081800300915e-06, "loss": 0.2742, "step": 29635 }, { "epoch": 0.6422117738825212, "grad_norm": 1.7648273706436157, "learning_rate": 5.679438535669386e-06, "loss": 0.2886, "step": 29640 }, { "epoch": 0.642320109202002, "grad_norm": 1.3826240301132202, "learning_rate": 5.676369391780998e-06, "loss": 0.222, "step": 29645 }, { "epoch": 0.6424284445214828, "grad_norm": 1.4236072301864624, "learning_rate": 5.673300748720454e-06, "loss": 0.298, "step": 29650 }, { "epoch": 0.6425367798409638, "grad_norm": 1.6316120624542236, "learning_rate": 5.6702326068432e-06, "loss": 0.2935, "step": 29655 }, { "epoch": 0.6426451151604446, "grad_norm": 1.3297885656356812, "learning_rate": 5.667164966504638e-06, "loss": 0.2338, "step": 29660 }, { "epoch": 0.6427534504799255, "grad_norm": 0.740847647190094, "learning_rate": 5.664097828060113e-06, "loss": 0.2434, "step": 29665 }, { "epoch": 0.6428617857994063, "grad_norm": 1.6161423921585083, "learning_rate": 5.661031191864898e-06, "loss": 0.2245, "step": 29670 }, { "epoch": 0.6429701211188872, "grad_norm": 1.1252082586288452, "learning_rate": 5.657965058274228e-06, "loss": 0.2964, "step": 29675 }, { "epoch": 0.643078456438368, "grad_norm": 1.719814658164978, "learning_rate": 5.654899427643257e-06, "loss": 0.2178, "step": 29680 }, { "epoch": 0.6431867917578489, "grad_norm": 1.998942255973816, "learning_rate": 5.651834300327101e-06, "loss": 0.1659, "step": 29685 }, { "epoch": 0.6432951270773297, "grad_norm": 1.8241156339645386, "learning_rate": 5.648769676680802e-06, "loss": 0.2876, "step": 29690 }, { "epoch": 0.6434034623968106, "grad_norm": 0.8793107271194458, "learning_rate": 5.645705557059359e-06, "loss": 0.2502, "step": 29695 }, { "epoch": 0.6435117977162914, "grad_norm": 1.4884486198425293, "learning_rate": 5.6426419418176955e-06, "loss": 0.1672, "step": 29700 }, { "epoch": 0.6436201330357724, "grad_norm": 1.5021644830703735, "learning_rate": 5.63957883131069e-06, "loss": 0.3124, "step": 29705 }, { "epoch": 0.6437284683552532, "grad_norm": 1.4898850917816162, "learning_rate": 5.636516225893162e-06, "loss": 0.3477, "step": 29710 }, { "epoch": 0.6438368036747341, "grad_norm": 0.9472200870513916, "learning_rate": 5.633454125919859e-06, "loss": 0.1488, "step": 29715 }, { "epoch": 0.6439451389942149, "grad_norm": 1.499872088432312, "learning_rate": 5.630392531745491e-06, "loss": 0.3194, "step": 29720 }, { "epoch": 0.6440534743136958, "grad_norm": 2.0402626991271973, "learning_rate": 5.627331443724686e-06, "loss": 0.3062, "step": 29725 }, { "epoch": 0.6441618096331766, "grad_norm": 1.2075462341308594, "learning_rate": 5.624270862212035e-06, "loss": 0.3154, "step": 29730 }, { "epoch": 0.6442701449526574, "grad_norm": 1.741146206855774, "learning_rate": 5.621210787562053e-06, "loss": 0.1772, "step": 29735 }, { "epoch": 0.6443784802721383, "grad_norm": 1.6197046041488647, "learning_rate": 5.6181512201292084e-06, "loss": 0.2261, "step": 29740 }, { "epoch": 0.6444868155916191, "grad_norm": 1.0628855228424072, "learning_rate": 5.615092160267907e-06, "loss": 0.2302, "step": 29745 }, { "epoch": 0.6445951509111001, "grad_norm": 1.5797910690307617, "learning_rate": 5.612033608332486e-06, "loss": 0.2832, "step": 29750 }, { "epoch": 0.6447034862305809, "grad_norm": 1.176661729812622, "learning_rate": 5.608975564677245e-06, "loss": 0.2196, "step": 29755 }, { "epoch": 0.6448118215500618, "grad_norm": 1.5033749341964722, "learning_rate": 5.605918029656406e-06, "loss": 0.2369, "step": 29760 }, { "epoch": 0.6449201568695426, "grad_norm": 1.4332878589630127, "learning_rate": 5.602861003624141e-06, "loss": 0.2639, "step": 29765 }, { "epoch": 0.6450284921890235, "grad_norm": 1.4134725332260132, "learning_rate": 5.5998044869345566e-06, "loss": 0.2184, "step": 29770 }, { "epoch": 0.6451368275085043, "grad_norm": 0.8784380555152893, "learning_rate": 5.596748479941711e-06, "loss": 0.1777, "step": 29775 }, { "epoch": 0.6452451628279852, "grad_norm": 1.0582349300384521, "learning_rate": 5.593692982999596e-06, "loss": 0.2591, "step": 29780 }, { "epoch": 0.645353498147466, "grad_norm": 1.1864268779754639, "learning_rate": 5.590637996462136e-06, "loss": 0.1911, "step": 29785 }, { "epoch": 0.6454618334669469, "grad_norm": 1.3407304286956787, "learning_rate": 5.587583520683216e-06, "loss": 0.286, "step": 29790 }, { "epoch": 0.6455701687864277, "grad_norm": 0.7408902645111084, "learning_rate": 5.584529556016645e-06, "loss": 0.1564, "step": 29795 }, { "epoch": 0.6456785041059087, "grad_norm": 1.4302620887756348, "learning_rate": 5.581476102816179e-06, "loss": 0.2065, "step": 29800 }, { "epoch": 0.6457868394253895, "grad_norm": 0.8751159906387329, "learning_rate": 5.5784231614355175e-06, "loss": 0.2392, "step": 29805 }, { "epoch": 0.6458951747448703, "grad_norm": 1.5499296188354492, "learning_rate": 5.575370732228303e-06, "loss": 0.2864, "step": 29810 }, { "epoch": 0.6460035100643512, "grad_norm": 0.7335732579231262, "learning_rate": 5.5723188155481025e-06, "loss": 0.2795, "step": 29815 }, { "epoch": 0.646111845383832, "grad_norm": 1.9042309522628784, "learning_rate": 5.569267411748445e-06, "loss": 0.2694, "step": 29820 }, { "epoch": 0.6462201807033129, "grad_norm": 1.2343195676803589, "learning_rate": 5.566216521182786e-06, "loss": 0.2587, "step": 29825 }, { "epoch": 0.6463285160227937, "grad_norm": 1.5588798522949219, "learning_rate": 5.563166144204519e-06, "loss": 0.2729, "step": 29830 }, { "epoch": 0.6464368513422746, "grad_norm": 1.308794379234314, "learning_rate": 5.560116281166997e-06, "loss": 0.3065, "step": 29835 }, { "epoch": 0.6465451866617554, "grad_norm": 1.0254822969436646, "learning_rate": 5.557066932423489e-06, "loss": 0.2801, "step": 29840 }, { "epoch": 0.6466535219812363, "grad_norm": 2.7225611209869385, "learning_rate": 5.5540180983272265e-06, "loss": 0.3264, "step": 29845 }, { "epoch": 0.6467618573007172, "grad_norm": 1.3142982721328735, "learning_rate": 5.550969779231359e-06, "loss": 0.2837, "step": 29850 }, { "epoch": 0.6468701926201981, "grad_norm": 1.2398830652236938, "learning_rate": 5.547921975489003e-06, "loss": 0.2295, "step": 29855 }, { "epoch": 0.6469785279396789, "grad_norm": 1.1470354795455933, "learning_rate": 5.544874687453194e-06, "loss": 0.2917, "step": 29860 }, { "epoch": 0.6470868632591598, "grad_norm": 1.7368438243865967, "learning_rate": 5.541827915476911e-06, "loss": 0.2363, "step": 29865 }, { "epoch": 0.6471951985786406, "grad_norm": 1.578904390335083, "learning_rate": 5.538781659913084e-06, "loss": 0.2923, "step": 29870 }, { "epoch": 0.6473035338981215, "grad_norm": 1.053762435913086, "learning_rate": 5.535735921114569e-06, "loss": 0.2418, "step": 29875 }, { "epoch": 0.6474118692176023, "grad_norm": 1.9902958869934082, "learning_rate": 5.532690699434178e-06, "loss": 0.2623, "step": 29880 }, { "epoch": 0.6475202045370831, "grad_norm": 2.184232234954834, "learning_rate": 5.529645995224644e-06, "loss": 0.3204, "step": 29885 }, { "epoch": 0.647628539856564, "grad_norm": 1.4902493953704834, "learning_rate": 5.526601808838659e-06, "loss": 0.2153, "step": 29890 }, { "epoch": 0.647736875176045, "grad_norm": 1.4567662477493286, "learning_rate": 5.52355814062884e-06, "loss": 0.3191, "step": 29895 }, { "epoch": 0.6478452104955258, "grad_norm": 1.7974426746368408, "learning_rate": 5.520514990947753e-06, "loss": 0.2379, "step": 29900 }, { "epoch": 0.6479535458150066, "grad_norm": 0.6292114853858948, "learning_rate": 5.517472360147906e-06, "loss": 0.1397, "step": 29905 }, { "epoch": 0.6480618811344875, "grad_norm": 1.1706953048706055, "learning_rate": 5.514430248581733e-06, "loss": 0.3148, "step": 29910 }, { "epoch": 0.6481702164539683, "grad_norm": 1.6093369722366333, "learning_rate": 5.511388656601626e-06, "loss": 0.2108, "step": 29915 }, { "epoch": 0.6482785517734492, "grad_norm": 1.9017040729522705, "learning_rate": 5.508347584559901e-06, "loss": 0.2162, "step": 29920 }, { "epoch": 0.64838688709293, "grad_norm": 1.9521304368972778, "learning_rate": 5.505307032808826e-06, "loss": 0.1409, "step": 29925 }, { "epoch": 0.6484952224124109, "grad_norm": 2.0209338665008545, "learning_rate": 5.502267001700596e-06, "loss": 0.2833, "step": 29930 }, { "epoch": 0.6486035577318917, "grad_norm": 1.0967751741409302, "learning_rate": 5.4992274915873645e-06, "loss": 0.1905, "step": 29935 }, { "epoch": 0.6487118930513726, "grad_norm": 2.2477917671203613, "learning_rate": 5.4961885028212e-06, "loss": 0.2744, "step": 29940 }, { "epoch": 0.6488202283708535, "grad_norm": 1.332607626914978, "learning_rate": 5.493150035754132e-06, "loss": 0.2211, "step": 29945 }, { "epoch": 0.6489285636903344, "grad_norm": 1.2854604721069336, "learning_rate": 5.490112090738124e-06, "loss": 0.3518, "step": 29950 }, { "epoch": 0.6490368990098152, "grad_norm": 2.0494585037231445, "learning_rate": 5.4870746681250665e-06, "loss": 0.2577, "step": 29955 }, { "epoch": 0.649145234329296, "grad_norm": 1.0491750240325928, "learning_rate": 5.48403776826681e-06, "loss": 0.1988, "step": 29960 }, { "epoch": 0.6492535696487769, "grad_norm": 1.1673893928527832, "learning_rate": 5.481001391515125e-06, "loss": 0.1599, "step": 29965 }, { "epoch": 0.6493619049682577, "grad_norm": 1.4447126388549805, "learning_rate": 5.477965538221738e-06, "loss": 0.2955, "step": 29970 }, { "epoch": 0.6494702402877386, "grad_norm": 1.7098417282104492, "learning_rate": 5.4749302087382995e-06, "loss": 0.2826, "step": 29975 }, { "epoch": 0.6495785756072194, "grad_norm": 1.5637974739074707, "learning_rate": 5.471895403416414e-06, "loss": 0.4201, "step": 29980 }, { "epoch": 0.6496869109267003, "grad_norm": 1.0137187242507935, "learning_rate": 5.468861122607612e-06, "loss": 0.2634, "step": 29985 }, { "epoch": 0.6497952462461812, "grad_norm": 1.4342913627624512, "learning_rate": 5.465827366663372e-06, "loss": 0.1735, "step": 29990 }, { "epoch": 0.6499035815656621, "grad_norm": 1.0292609930038452, "learning_rate": 5.462794135935114e-06, "loss": 0.2365, "step": 29995 }, { "epoch": 0.6500119168851429, "grad_norm": 1.2640135288238525, "learning_rate": 5.459761430774185e-06, "loss": 0.1659, "step": 30000 }, { "epoch": 0.6501202522046238, "grad_norm": 1.9079535007476807, "learning_rate": 5.456729251531883e-06, "loss": 0.2775, "step": 30005 }, { "epoch": 0.6502285875241046, "grad_norm": 1.59404456615448, "learning_rate": 5.453697598559436e-06, "loss": 0.2379, "step": 30010 }, { "epoch": 0.6503369228435855, "grad_norm": 1.6659787893295288, "learning_rate": 5.450666472208024e-06, "loss": 0.2407, "step": 30015 }, { "epoch": 0.6504452581630663, "grad_norm": 1.2149688005447388, "learning_rate": 5.447635872828747e-06, "loss": 0.2626, "step": 30020 }, { "epoch": 0.6505535934825472, "grad_norm": 2.8233642578125, "learning_rate": 5.444605800772663e-06, "loss": 0.258, "step": 30025 }, { "epoch": 0.650661928802028, "grad_norm": 1.3585777282714844, "learning_rate": 5.441576256390758e-06, "loss": 0.2369, "step": 30030 }, { "epoch": 0.6507702641215088, "grad_norm": 1.0749738216400146, "learning_rate": 5.438547240033954e-06, "loss": 0.2771, "step": 30035 }, { "epoch": 0.6508785994409898, "grad_norm": 0.9923851490020752, "learning_rate": 5.435518752053123e-06, "loss": 0.2636, "step": 30040 }, { "epoch": 0.6509869347604706, "grad_norm": 1.92714524269104, "learning_rate": 5.432490792799068e-06, "loss": 0.2226, "step": 30045 }, { "epoch": 0.6510952700799515, "grad_norm": 1.628166675567627, "learning_rate": 5.429463362622537e-06, "loss": 0.1841, "step": 30050 }, { "epoch": 0.6512036053994323, "grad_norm": 1.3500653505325317, "learning_rate": 5.426436461874205e-06, "loss": 0.267, "step": 30055 }, { "epoch": 0.6513119407189132, "grad_norm": 0.7796057462692261, "learning_rate": 5.423410090904702e-06, "loss": 0.2084, "step": 30060 }, { "epoch": 0.651420276038394, "grad_norm": 1.8042635917663574, "learning_rate": 5.420384250064581e-06, "loss": 0.2285, "step": 30065 }, { "epoch": 0.6515286113578749, "grad_norm": 1.6242015361785889, "learning_rate": 5.417358939704338e-06, "loss": 0.1877, "step": 30070 }, { "epoch": 0.6516369466773557, "grad_norm": 1.1480467319488525, "learning_rate": 5.4143341601744196e-06, "loss": 0.2368, "step": 30075 }, { "epoch": 0.6517452819968366, "grad_norm": 1.447575330734253, "learning_rate": 5.411309911825189e-06, "loss": 0.2605, "step": 30080 }, { "epoch": 0.6518536173163174, "grad_norm": 1.6522643566131592, "learning_rate": 5.408286195006972e-06, "loss": 0.2065, "step": 30085 }, { "epoch": 0.6519619526357984, "grad_norm": 1.2186341285705566, "learning_rate": 5.405263010070007e-06, "loss": 0.2213, "step": 30090 }, { "epoch": 0.6520702879552792, "grad_norm": 1.8705804347991943, "learning_rate": 5.402240357364502e-06, "loss": 0.3225, "step": 30095 }, { "epoch": 0.6521786232747601, "grad_norm": 1.820815920829773, "learning_rate": 5.399218237240577e-06, "loss": 0.241, "step": 30100 }, { "epoch": 0.6522869585942409, "grad_norm": 1.2926385402679443, "learning_rate": 5.396196650048296e-06, "loss": 0.1942, "step": 30105 }, { "epoch": 0.6523952939137218, "grad_norm": 0.5840024352073669, "learning_rate": 5.393175596137669e-06, "loss": 0.1953, "step": 30110 }, { "epoch": 0.6525036292332026, "grad_norm": 1.1964048147201538, "learning_rate": 5.390155075858638e-06, "loss": 0.27, "step": 30115 }, { "epoch": 0.6526119645526834, "grad_norm": 1.523937463760376, "learning_rate": 5.387135089561088e-06, "loss": 0.2485, "step": 30120 }, { "epoch": 0.6527202998721643, "grad_norm": 1.4266057014465332, "learning_rate": 5.384115637594835e-06, "loss": 0.2086, "step": 30125 }, { "epoch": 0.6528286351916451, "grad_norm": 1.834365725517273, "learning_rate": 5.381096720309643e-06, "loss": 0.1993, "step": 30130 }, { "epoch": 0.6529369705111261, "grad_norm": 0.7687347531318665, "learning_rate": 5.378078338055201e-06, "loss": 0.1747, "step": 30135 }, { "epoch": 0.6530453058306069, "grad_norm": 1.4467805624008179, "learning_rate": 5.375060491181147e-06, "loss": 0.2861, "step": 30140 }, { "epoch": 0.6531536411500878, "grad_norm": 1.1127737760543823, "learning_rate": 5.372043180037057e-06, "loss": 0.2139, "step": 30145 }, { "epoch": 0.6532619764695686, "grad_norm": 2.22917103767395, "learning_rate": 5.369026404972434e-06, "loss": 0.2222, "step": 30150 }, { "epoch": 0.6533703117890495, "grad_norm": 1.7602243423461914, "learning_rate": 5.366010166336735e-06, "loss": 0.2324, "step": 30155 }, { "epoch": 0.6534786471085303, "grad_norm": 2.02299165725708, "learning_rate": 5.3629944644793355e-06, "loss": 0.1434, "step": 30160 }, { "epoch": 0.6535869824280112, "grad_norm": 1.473548173904419, "learning_rate": 5.359979299749569e-06, "loss": 0.3433, "step": 30165 }, { "epoch": 0.653695317747492, "grad_norm": 0.48686790466308594, "learning_rate": 5.356964672496689e-06, "loss": 0.2252, "step": 30170 }, { "epoch": 0.6538036530669729, "grad_norm": 1.1770448684692383, "learning_rate": 5.353950583069905e-06, "loss": 0.3933, "step": 30175 }, { "epoch": 0.6539119883864537, "grad_norm": 1.743741512298584, "learning_rate": 5.3509370318183415e-06, "loss": 0.2735, "step": 30180 }, { "epoch": 0.6540203237059347, "grad_norm": 1.1692142486572266, "learning_rate": 5.3479240190910815e-06, "loss": 0.1227, "step": 30185 }, { "epoch": 0.6541286590254155, "grad_norm": 1.1861878633499146, "learning_rate": 5.3449115452371405e-06, "loss": 0.2597, "step": 30190 }, { "epoch": 0.6542369943448963, "grad_norm": 1.5915251970291138, "learning_rate": 5.341899610605459e-06, "loss": 0.2232, "step": 30195 }, { "epoch": 0.6543453296643772, "grad_norm": 1.8114441633224487, "learning_rate": 5.338888215544933e-06, "loss": 0.2166, "step": 30200 }, { "epoch": 0.654453664983858, "grad_norm": 1.8978537321090698, "learning_rate": 5.33587736040438e-06, "loss": 0.3178, "step": 30205 }, { "epoch": 0.6545620003033389, "grad_norm": 1.839709997177124, "learning_rate": 5.33286704553257e-06, "loss": 0.2599, "step": 30210 }, { "epoch": 0.6546703356228197, "grad_norm": 1.7938098907470703, "learning_rate": 5.3298572712781945e-06, "loss": 0.2526, "step": 30215 }, { "epoch": 0.6547786709423006, "grad_norm": 1.3006259202957153, "learning_rate": 5.3268480379899e-06, "loss": 0.2376, "step": 30220 }, { "epoch": 0.6548870062617814, "grad_norm": 1.4971749782562256, "learning_rate": 5.323839346016253e-06, "loss": 0.2948, "step": 30225 }, { "epoch": 0.6549953415812623, "grad_norm": 1.7910372018814087, "learning_rate": 5.320831195705772e-06, "loss": 0.2074, "step": 30230 }, { "epoch": 0.6551036769007432, "grad_norm": 1.3691140413284302, "learning_rate": 5.3178235874069005e-06, "loss": 0.2744, "step": 30235 }, { "epoch": 0.6552120122202241, "grad_norm": 0.36875441670417786, "learning_rate": 5.314816521468026e-06, "loss": 0.2199, "step": 30240 }, { "epoch": 0.6553203475397049, "grad_norm": 0.9877104163169861, "learning_rate": 5.311809998237478e-06, "loss": 0.101, "step": 30245 }, { "epoch": 0.6554286828591858, "grad_norm": 1.9549790620803833, "learning_rate": 5.3088040180635095e-06, "loss": 0.3046, "step": 30250 }, { "epoch": 0.6555370181786666, "grad_norm": 1.4724204540252686, "learning_rate": 5.3057985812943235e-06, "loss": 0.2603, "step": 30255 }, { "epoch": 0.6556453534981475, "grad_norm": 1.2773220539093018, "learning_rate": 5.30279368827805e-06, "loss": 0.2705, "step": 30260 }, { "epoch": 0.6557536888176283, "grad_norm": 1.8486748933792114, "learning_rate": 5.2997893393627665e-06, "loss": 0.3048, "step": 30265 }, { "epoch": 0.6558620241371091, "grad_norm": 2.3026492595672607, "learning_rate": 5.29678553489648e-06, "loss": 0.2547, "step": 30270 }, { "epoch": 0.65597035945659, "grad_norm": 0.8273379802703857, "learning_rate": 5.29378227522713e-06, "loss": 0.2741, "step": 30275 }, { "epoch": 0.656078694776071, "grad_norm": 1.7657915353775024, "learning_rate": 5.290779560702606e-06, "loss": 0.2165, "step": 30280 }, { "epoch": 0.6561870300955518, "grad_norm": 1.3815702199935913, "learning_rate": 5.287777391670724e-06, "loss": 0.2428, "step": 30285 }, { "epoch": 0.6562953654150326, "grad_norm": 1.2991207838058472, "learning_rate": 5.284775768479247e-06, "loss": 0.1635, "step": 30290 }, { "epoch": 0.6564037007345135, "grad_norm": 1.810516357421875, "learning_rate": 5.281774691475859e-06, "loss": 0.2449, "step": 30295 }, { "epoch": 0.6565120360539943, "grad_norm": 1.3454562425613403, "learning_rate": 5.2787741610081975e-06, "loss": 0.2654, "step": 30300 }, { "epoch": 0.6566203713734752, "grad_norm": 1.0577353239059448, "learning_rate": 5.275774177423827e-06, "loss": 0.2175, "step": 30305 }, { "epoch": 0.656728706692956, "grad_norm": 1.6001112461090088, "learning_rate": 5.272774741070243e-06, "loss": 0.2539, "step": 30310 }, { "epoch": 0.6568370420124369, "grad_norm": 1.0160599946975708, "learning_rate": 5.269775852294896e-06, "loss": 0.3078, "step": 30315 }, { "epoch": 0.6569453773319177, "grad_norm": 1.6171879768371582, "learning_rate": 5.266777511445156e-06, "loss": 0.2176, "step": 30320 }, { "epoch": 0.6570537126513986, "grad_norm": 1.4253724813461304, "learning_rate": 5.263779718868339e-06, "loss": 0.309, "step": 30325 }, { "epoch": 0.6571620479708795, "grad_norm": 2.1381468772888184, "learning_rate": 5.2607824749116855e-06, "loss": 0.3235, "step": 30330 }, { "epoch": 0.6572703832903604, "grad_norm": 0.9328407049179077, "learning_rate": 5.257785779922398e-06, "loss": 0.2305, "step": 30335 }, { "epoch": 0.6573787186098412, "grad_norm": 1.3577271699905396, "learning_rate": 5.254789634247588e-06, "loss": 0.197, "step": 30340 }, { "epoch": 0.657487053929322, "grad_norm": 1.5226306915283203, "learning_rate": 5.251794038234312e-06, "loss": 0.259, "step": 30345 }, { "epoch": 0.6575953892488029, "grad_norm": 1.5729516744613647, "learning_rate": 5.248798992229573e-06, "loss": 0.2536, "step": 30350 }, { "epoch": 0.6577037245682837, "grad_norm": 1.8128069639205933, "learning_rate": 5.245804496580295e-06, "loss": 0.1524, "step": 30355 }, { "epoch": 0.6578120598877646, "grad_norm": 1.0579688549041748, "learning_rate": 5.24281055163335e-06, "loss": 0.2149, "step": 30360 }, { "epoch": 0.6579203952072454, "grad_norm": 1.3445796966552734, "learning_rate": 5.239817157735537e-06, "loss": 0.1707, "step": 30365 }, { "epoch": 0.6580287305267263, "grad_norm": 1.6007410287857056, "learning_rate": 5.236824315233603e-06, "loss": 0.2207, "step": 30370 }, { "epoch": 0.6581370658462071, "grad_norm": 1.3403605222702026, "learning_rate": 5.233832024474215e-06, "loss": 0.2129, "step": 30375 }, { "epoch": 0.6582454011656881, "grad_norm": 1.7638946771621704, "learning_rate": 5.23084028580399e-06, "loss": 0.2098, "step": 30380 }, { "epoch": 0.6583537364851689, "grad_norm": 1.2711893320083618, "learning_rate": 5.227849099569479e-06, "loss": 0.2988, "step": 30385 }, { "epoch": 0.6584620718046498, "grad_norm": 1.6617934703826904, "learning_rate": 5.224858466117161e-06, "loss": 0.1973, "step": 30390 }, { "epoch": 0.6585704071241306, "grad_norm": 3.219980478286743, "learning_rate": 5.2218683857934606e-06, "loss": 0.2359, "step": 30395 }, { "epoch": 0.6586787424436115, "grad_norm": 2.6240007877349854, "learning_rate": 5.218878858944728e-06, "loss": 0.1329, "step": 30400 }, { "epoch": 0.6587870777630923, "grad_norm": 1.6062440872192383, "learning_rate": 5.215889885917262e-06, "loss": 0.2146, "step": 30405 }, { "epoch": 0.6588954130825732, "grad_norm": 1.933051347732544, "learning_rate": 5.212901467057283e-06, "loss": 0.3275, "step": 30410 }, { "epoch": 0.659003748402054, "grad_norm": 1.8716814517974854, "learning_rate": 5.209913602710962e-06, "loss": 0.2538, "step": 30415 }, { "epoch": 0.6591120837215348, "grad_norm": 1.3718619346618652, "learning_rate": 5.2069262932243905e-06, "loss": 0.1584, "step": 30420 }, { "epoch": 0.6592204190410158, "grad_norm": 1.3792202472686768, "learning_rate": 5.203939538943609e-06, "loss": 0.1868, "step": 30425 }, { "epoch": 0.6593287543604966, "grad_norm": 1.410428524017334, "learning_rate": 5.2009533402145894e-06, "loss": 0.2336, "step": 30430 }, { "epoch": 0.6594370896799775, "grad_norm": 1.1527669429779053, "learning_rate": 5.1979676973832325e-06, "loss": 0.2103, "step": 30435 }, { "epoch": 0.6595454249994583, "grad_norm": 1.8962775468826294, "learning_rate": 5.194982610795387e-06, "loss": 0.2776, "step": 30440 }, { "epoch": 0.6596537603189392, "grad_norm": 1.4301129579544067, "learning_rate": 5.191998080796823e-06, "loss": 0.261, "step": 30445 }, { "epoch": 0.65976209563842, "grad_norm": 1.398471713066101, "learning_rate": 5.189014107733261e-06, "loss": 0.201, "step": 30450 }, { "epoch": 0.6598704309579009, "grad_norm": 1.7194066047668457, "learning_rate": 5.1860306919503435e-06, "loss": 0.2084, "step": 30455 }, { "epoch": 0.6599787662773817, "grad_norm": 1.1518837213516235, "learning_rate": 5.183047833793659e-06, "loss": 0.2248, "step": 30460 }, { "epoch": 0.6600871015968626, "grad_norm": 1.9565263986587524, "learning_rate": 5.180065533608723e-06, "loss": 0.2287, "step": 30465 }, { "epoch": 0.6601954369163434, "grad_norm": 1.373032808303833, "learning_rate": 5.177083791740995e-06, "loss": 0.2286, "step": 30470 }, { "epoch": 0.6603037722358244, "grad_norm": 0.9905805587768555, "learning_rate": 5.174102608535858e-06, "loss": 0.2323, "step": 30475 }, { "epoch": 0.6604121075553052, "grad_norm": 1.859406590461731, "learning_rate": 5.1711219843386426e-06, "loss": 0.1614, "step": 30480 }, { "epoch": 0.6605204428747861, "grad_norm": 1.2595267295837402, "learning_rate": 5.168141919494614e-06, "loss": 0.1994, "step": 30485 }, { "epoch": 0.6606287781942669, "grad_norm": 1.0851609706878662, "learning_rate": 5.165162414348957e-06, "loss": 0.2753, "step": 30490 }, { "epoch": 0.6607371135137478, "grad_norm": 1.7093682289123535, "learning_rate": 5.162183469246813e-06, "loss": 0.1293, "step": 30495 }, { "epoch": 0.6608454488332286, "grad_norm": 1.2154550552368164, "learning_rate": 5.15920508453324e-06, "loss": 0.1313, "step": 30500 }, { "epoch": 0.6609537841527094, "grad_norm": 1.5723105669021606, "learning_rate": 5.1562272605532456e-06, "loss": 0.2475, "step": 30505 }, { "epoch": 0.6610621194721903, "grad_norm": 1.294330358505249, "learning_rate": 5.153249997651765e-06, "loss": 0.2106, "step": 30510 }, { "epoch": 0.6611704547916711, "grad_norm": 1.4818682670593262, "learning_rate": 5.150273296173662e-06, "loss": 0.201, "step": 30515 }, { "epoch": 0.6612787901111521, "grad_norm": 0.9520467519760132, "learning_rate": 5.147297156463751e-06, "loss": 0.2366, "step": 30520 }, { "epoch": 0.6613871254306329, "grad_norm": 1.428200125694275, "learning_rate": 5.14432157886677e-06, "loss": 0.2596, "step": 30525 }, { "epoch": 0.6614954607501138, "grad_norm": 1.5422875881195068, "learning_rate": 5.1413465637273995e-06, "loss": 0.2011, "step": 30530 }, { "epoch": 0.6616037960695946, "grad_norm": 1.2484900951385498, "learning_rate": 5.138372111390244e-06, "loss": 0.154, "step": 30535 }, { "epoch": 0.6617121313890755, "grad_norm": 1.3364920616149902, "learning_rate": 5.1353982221998546e-06, "loss": 0.2793, "step": 30540 }, { "epoch": 0.6618204667085563, "grad_norm": 1.78102707862854, "learning_rate": 5.132424896500711e-06, "loss": 0.3048, "step": 30545 }, { "epoch": 0.6619288020280372, "grad_norm": 1.9175124168395996, "learning_rate": 5.129452134637223e-06, "loss": 0.226, "step": 30550 }, { "epoch": 0.662037137347518, "grad_norm": 1.5320426225662231, "learning_rate": 5.126479936953746e-06, "loss": 0.1811, "step": 30555 }, { "epoch": 0.6621454726669989, "grad_norm": 1.1534156799316406, "learning_rate": 5.123508303794561e-06, "loss": 0.2376, "step": 30560 }, { "epoch": 0.6622538079864797, "grad_norm": 1.211439847946167, "learning_rate": 5.120537235503893e-06, "loss": 0.1581, "step": 30565 }, { "epoch": 0.6623621433059607, "grad_norm": 1.5671181678771973, "learning_rate": 5.117566732425884e-06, "loss": 0.1958, "step": 30570 }, { "epoch": 0.6624704786254415, "grad_norm": 1.0549345016479492, "learning_rate": 5.114596794904638e-06, "loss": 0.2091, "step": 30575 }, { "epoch": 0.6625788139449224, "grad_norm": 1.5637130737304688, "learning_rate": 5.111627423284169e-06, "loss": 0.1769, "step": 30580 }, { "epoch": 0.6626871492644032, "grad_norm": 1.3828779458999634, "learning_rate": 5.108658617908433e-06, "loss": 0.226, "step": 30585 }, { "epoch": 0.662795484583884, "grad_norm": 1.4918544292449951, "learning_rate": 5.1056903791213265e-06, "loss": 0.2319, "step": 30590 }, { "epoch": 0.6629038199033649, "grad_norm": 1.4556900262832642, "learning_rate": 5.1027227072666694e-06, "loss": 0.2266, "step": 30595 }, { "epoch": 0.6630121552228457, "grad_norm": 1.1418366432189941, "learning_rate": 5.099755602688229e-06, "loss": 0.2653, "step": 30600 }, { "epoch": 0.6631204905423266, "grad_norm": 1.4652512073516846, "learning_rate": 5.096789065729692e-06, "loss": 0.34, "step": 30605 }, { "epoch": 0.6632288258618074, "grad_norm": 1.6135857105255127, "learning_rate": 5.093823096734697e-06, "loss": 0.2453, "step": 30610 }, { "epoch": 0.6633371611812883, "grad_norm": 1.0288256406784058, "learning_rate": 5.090857696046797e-06, "loss": 0.2085, "step": 30615 }, { "epoch": 0.6634454965007692, "grad_norm": 1.2471226453781128, "learning_rate": 5.087892864009494e-06, "loss": 0.2351, "step": 30620 }, { "epoch": 0.6635538318202501, "grad_norm": 2.240682363510132, "learning_rate": 5.084928600966224e-06, "loss": 0.2725, "step": 30625 }, { "epoch": 0.6636621671397309, "grad_norm": 2.0405890941619873, "learning_rate": 5.081964907260342e-06, "loss": 0.1874, "step": 30630 }, { "epoch": 0.6637705024592118, "grad_norm": 1.576244831085205, "learning_rate": 5.079001783235158e-06, "loss": 0.3311, "step": 30635 }, { "epoch": 0.6638788377786926, "grad_norm": 0.8745210766792297, "learning_rate": 5.076039229233898e-06, "loss": 0.2752, "step": 30640 }, { "epoch": 0.6639871730981735, "grad_norm": 1.412632703781128, "learning_rate": 5.073077245599736e-06, "loss": 0.1876, "step": 30645 }, { "epoch": 0.6640955084176543, "grad_norm": 1.6812527179718018, "learning_rate": 5.070115832675765e-06, "loss": 0.2471, "step": 30650 }, { "epoch": 0.6642038437371351, "grad_norm": 1.1207516193389893, "learning_rate": 5.06715499080503e-06, "loss": 0.1352, "step": 30655 }, { "epoch": 0.664312179056616, "grad_norm": 1.1589579582214355, "learning_rate": 5.064194720330491e-06, "loss": 0.2364, "step": 30660 }, { "epoch": 0.664420514376097, "grad_norm": 1.62881338596344, "learning_rate": 5.061235021595055e-06, "loss": 0.2237, "step": 30665 }, { "epoch": 0.6645288496955778, "grad_norm": 1.2545695304870605, "learning_rate": 5.058275894941562e-06, "loss": 0.2095, "step": 30670 }, { "epoch": 0.6646371850150586, "grad_norm": 1.6720303297042847, "learning_rate": 5.055317340712778e-06, "loss": 0.2669, "step": 30675 }, { "epoch": 0.6647455203345395, "grad_norm": 1.3716037273406982, "learning_rate": 5.052359359251411e-06, "loss": 0.2253, "step": 30680 }, { "epoch": 0.6648538556540203, "grad_norm": 1.9252715110778809, "learning_rate": 5.049401950900094e-06, "loss": 0.2837, "step": 30685 }, { "epoch": 0.6649621909735012, "grad_norm": 1.4214892387390137, "learning_rate": 5.046445116001404e-06, "loss": 0.2953, "step": 30690 }, { "epoch": 0.665070526292982, "grad_norm": 0.7119281888008118, "learning_rate": 5.0434888548978385e-06, "loss": 0.1679, "step": 30695 }, { "epoch": 0.6651788616124629, "grad_norm": 0.6009576916694641, "learning_rate": 5.0405331679318455e-06, "loss": 0.1757, "step": 30700 }, { "epoch": 0.6652871969319437, "grad_norm": 1.2248761653900146, "learning_rate": 5.037578055445789e-06, "loss": 0.2118, "step": 30705 }, { "epoch": 0.6653955322514246, "grad_norm": 1.774669885635376, "learning_rate": 5.0346235177819805e-06, "loss": 0.2346, "step": 30710 }, { "epoch": 0.6655038675709055, "grad_norm": 1.1588789224624634, "learning_rate": 5.031669555282653e-06, "loss": 0.2306, "step": 30715 }, { "epoch": 0.6656122028903864, "grad_norm": 1.343691349029541, "learning_rate": 5.02871616828998e-06, "loss": 0.2668, "step": 30720 }, { "epoch": 0.6657205382098672, "grad_norm": 1.6329742670059204, "learning_rate": 5.025763357146074e-06, "loss": 0.3375, "step": 30725 }, { "epoch": 0.665828873529348, "grad_norm": 1.3540172576904297, "learning_rate": 5.022811122192965e-06, "loss": 0.1595, "step": 30730 }, { "epoch": 0.6659372088488289, "grad_norm": 2.276838779449463, "learning_rate": 5.019859463772634e-06, "loss": 0.3805, "step": 30735 }, { "epoch": 0.6660455441683097, "grad_norm": 1.1136317253112793, "learning_rate": 5.016908382226977e-06, "loss": 0.2231, "step": 30740 }, { "epoch": 0.6661538794877906, "grad_norm": 1.4472503662109375, "learning_rate": 5.0139578778978415e-06, "loss": 0.2495, "step": 30745 }, { "epoch": 0.6662622148072714, "grad_norm": 2.27065110206604, "learning_rate": 5.011007951126996e-06, "loss": 0.1842, "step": 30750 }, { "epoch": 0.6663705501267523, "grad_norm": 1.6143516302108765, "learning_rate": 5.0080586022561385e-06, "loss": 0.2267, "step": 30755 }, { "epoch": 0.6664788854462331, "grad_norm": 1.6387884616851807, "learning_rate": 5.005109831626917e-06, "loss": 0.2377, "step": 30760 }, { "epoch": 0.6665872207657141, "grad_norm": 1.1222807168960571, "learning_rate": 5.002161639580891e-06, "loss": 0.239, "step": 30765 }, { "epoch": 0.6666955560851949, "grad_norm": 1.0147138833999634, "learning_rate": 4.999214026459578e-06, "loss": 0.3179, "step": 30770 }, { "epoch": 0.6668038914046758, "grad_norm": 1.1831995248794556, "learning_rate": 4.996266992604405e-06, "loss": 0.2522, "step": 30775 }, { "epoch": 0.6669122267241566, "grad_norm": 1.6013907194137573, "learning_rate": 4.99332053835675e-06, "loss": 0.1627, "step": 30780 }, { "epoch": 0.6670205620436375, "grad_norm": 1.9746447801589966, "learning_rate": 4.990374664057908e-06, "loss": 0.3087, "step": 30785 }, { "epoch": 0.6671288973631183, "grad_norm": 1.590648889541626, "learning_rate": 4.987429370049116e-06, "loss": 0.2622, "step": 30790 }, { "epoch": 0.6672372326825992, "grad_norm": 1.2297474145889282, "learning_rate": 4.984484656671545e-06, "loss": 0.2812, "step": 30795 }, { "epoch": 0.66734556800208, "grad_norm": 1.5027985572814941, "learning_rate": 4.981540524266292e-06, "loss": 0.2399, "step": 30800 }, { "epoch": 0.6674539033215608, "grad_norm": 0.7162780165672302, "learning_rate": 4.978596973174395e-06, "loss": 0.2275, "step": 30805 }, { "epoch": 0.6675622386410418, "grad_norm": 1.6544134616851807, "learning_rate": 4.975654003736811e-06, "loss": 0.3207, "step": 30810 }, { "epoch": 0.6676705739605227, "grad_norm": 1.5942833423614502, "learning_rate": 4.972711616294454e-06, "loss": 0.2646, "step": 30815 }, { "epoch": 0.6677789092800035, "grad_norm": 1.6833724975585938, "learning_rate": 4.969769811188142e-06, "loss": 0.2978, "step": 30820 }, { "epoch": 0.6678872445994843, "grad_norm": 0.929298460483551, "learning_rate": 4.96682858875865e-06, "loss": 0.223, "step": 30825 }, { "epoch": 0.6679955799189652, "grad_norm": 1.2875986099243164, "learning_rate": 4.963887949346669e-06, "loss": 0.205, "step": 30830 }, { "epoch": 0.668103915238446, "grad_norm": 1.0832436084747314, "learning_rate": 4.960947893292824e-06, "loss": 0.1937, "step": 30835 }, { "epoch": 0.6682122505579269, "grad_norm": 1.2096889019012451, "learning_rate": 4.9580084209376835e-06, "loss": 0.2011, "step": 30840 }, { "epoch": 0.6683205858774077, "grad_norm": 1.4051977396011353, "learning_rate": 4.955069532621736e-06, "loss": 0.2736, "step": 30845 }, { "epoch": 0.6684289211968886, "grad_norm": 1.3944870233535767, "learning_rate": 4.952131228685413e-06, "loss": 0.2369, "step": 30850 }, { "epoch": 0.6685372565163694, "grad_norm": 1.3221160173416138, "learning_rate": 4.9491935094690666e-06, "loss": 0.1572, "step": 30855 }, { "epoch": 0.6686455918358504, "grad_norm": 1.4050461053848267, "learning_rate": 4.946256375312991e-06, "loss": 0.2368, "step": 30860 }, { "epoch": 0.6687539271553312, "grad_norm": 1.1293739080429077, "learning_rate": 4.943319826557413e-06, "loss": 0.2725, "step": 30865 }, { "epoch": 0.6688622624748121, "grad_norm": 1.5659592151641846, "learning_rate": 4.94038386354248e-06, "loss": 0.2629, "step": 30870 }, { "epoch": 0.6689705977942929, "grad_norm": 1.1804325580596924, "learning_rate": 4.9374484866082885e-06, "loss": 0.2298, "step": 30875 }, { "epoch": 0.6690789331137738, "grad_norm": 0.8842514753341675, "learning_rate": 4.934513696094847e-06, "loss": 0.2547, "step": 30880 }, { "epoch": 0.6691872684332546, "grad_norm": 1.8523287773132324, "learning_rate": 4.9315794923421175e-06, "loss": 0.264, "step": 30885 }, { "epoch": 0.6692956037527354, "grad_norm": 1.1382862329483032, "learning_rate": 4.9286458756899755e-06, "loss": 0.2437, "step": 30890 }, { "epoch": 0.6694039390722163, "grad_norm": 1.3588296175003052, "learning_rate": 4.925712846478244e-06, "loss": 0.2851, "step": 30895 }, { "epoch": 0.6695122743916971, "grad_norm": 2.544201135635376, "learning_rate": 4.922780405046662e-06, "loss": 0.3079, "step": 30900 }, { "epoch": 0.669620609711178, "grad_norm": 1.9016263484954834, "learning_rate": 4.919848551734917e-06, "loss": 0.2713, "step": 30905 }, { "epoch": 0.6697289450306589, "grad_norm": 2.1821556091308594, "learning_rate": 4.916917286882613e-06, "loss": 0.2631, "step": 30910 }, { "epoch": 0.6698372803501398, "grad_norm": 1.614311933517456, "learning_rate": 4.913986610829296e-06, "loss": 0.1913, "step": 30915 }, { "epoch": 0.6699456156696206, "grad_norm": 1.0632139444351196, "learning_rate": 4.911056523914447e-06, "loss": 0.2176, "step": 30920 }, { "epoch": 0.6700539509891015, "grad_norm": 2.2902472019195557, "learning_rate": 4.908127026477462e-06, "loss": 0.2396, "step": 30925 }, { "epoch": 0.6701622863085823, "grad_norm": 3.15822696685791, "learning_rate": 4.905198118857689e-06, "loss": 0.2499, "step": 30930 }, { "epoch": 0.6702706216280632, "grad_norm": 0.9075609445571899, "learning_rate": 4.902269801394392e-06, "loss": 0.1953, "step": 30935 }, { "epoch": 0.670378956947544, "grad_norm": 1.5634596347808838, "learning_rate": 4.899342074426775e-06, "loss": 0.2316, "step": 30940 }, { "epoch": 0.6704872922670249, "grad_norm": 1.3907582759857178, "learning_rate": 4.8964149382939696e-06, "loss": 0.2867, "step": 30945 }, { "epoch": 0.6705956275865057, "grad_norm": 2.6450629234313965, "learning_rate": 4.893488393335044e-06, "loss": 0.1035, "step": 30950 }, { "epoch": 0.6707039629059867, "grad_norm": 1.16592276096344, "learning_rate": 4.890562439888989e-06, "loss": 0.3144, "step": 30955 }, { "epoch": 0.6708122982254675, "grad_norm": 1.4317002296447754, "learning_rate": 4.887637078294737e-06, "loss": 0.1593, "step": 30960 }, { "epoch": 0.6709206335449484, "grad_norm": 1.8026460409164429, "learning_rate": 4.88471230889115e-06, "loss": 0.2659, "step": 30965 }, { "epoch": 0.6710289688644292, "grad_norm": 1.533931851387024, "learning_rate": 4.88178813201701e-06, "loss": 0.1497, "step": 30970 }, { "epoch": 0.67113730418391, "grad_norm": 1.8361310958862305, "learning_rate": 4.878864548011048e-06, "loss": 0.2603, "step": 30975 }, { "epoch": 0.6712456395033909, "grad_norm": 1.790257215499878, "learning_rate": 4.875941557211911e-06, "loss": 0.1728, "step": 30980 }, { "epoch": 0.6713539748228717, "grad_norm": 1.964898943901062, "learning_rate": 4.8730191599581886e-06, "loss": 0.2666, "step": 30985 }, { "epoch": 0.6714623101423526, "grad_norm": 1.6416486501693726, "learning_rate": 4.870097356588391e-06, "loss": 0.2369, "step": 30990 }, { "epoch": 0.6715706454618334, "grad_norm": 1.9040228128433228, "learning_rate": 4.867176147440973e-06, "loss": 0.2673, "step": 30995 }, { "epoch": 0.6716789807813143, "grad_norm": 1.4921613931655884, "learning_rate": 4.864255532854308e-06, "loss": 0.297, "step": 31000 }, { "epoch": 0.6717873161007952, "grad_norm": 1.7766153812408447, "learning_rate": 4.861335513166699e-06, "loss": 0.2605, "step": 31005 }, { "epoch": 0.6718956514202761, "grad_norm": 1.0325734615325928, "learning_rate": 4.858416088716401e-06, "loss": 0.2007, "step": 31010 }, { "epoch": 0.6720039867397569, "grad_norm": 1.706673264503479, "learning_rate": 4.855497259841575e-06, "loss": 0.2403, "step": 31015 }, { "epoch": 0.6721123220592378, "grad_norm": 0.9922043681144714, "learning_rate": 4.85257902688033e-06, "loss": 0.1516, "step": 31020 }, { "epoch": 0.6722206573787186, "grad_norm": 1.4056257009506226, "learning_rate": 4.849661390170693e-06, "loss": 0.254, "step": 31025 }, { "epoch": 0.6723289926981995, "grad_norm": 1.3408697843551636, "learning_rate": 4.846744350050635e-06, "loss": 0.2727, "step": 31030 }, { "epoch": 0.6724373280176803, "grad_norm": 1.2113535404205322, "learning_rate": 4.843827906858048e-06, "loss": 0.2982, "step": 31035 }, { "epoch": 0.6725456633371611, "grad_norm": 0.8548913598060608, "learning_rate": 4.840912060930756e-06, "loss": 0.2505, "step": 31040 }, { "epoch": 0.672653998656642, "grad_norm": 1.733850359916687, "learning_rate": 4.837996812606521e-06, "loss": 0.3388, "step": 31045 }, { "epoch": 0.672762333976123, "grad_norm": 1.3432207107543945, "learning_rate": 4.835082162223021e-06, "loss": 0.228, "step": 31050 }, { "epoch": 0.6728706692956038, "grad_norm": 1.3137803077697754, "learning_rate": 4.832168110117891e-06, "loss": 0.2372, "step": 31055 }, { "epoch": 0.6729790046150846, "grad_norm": 1.3998702764511108, "learning_rate": 4.8292546566286665e-06, "loss": 0.2421, "step": 31060 }, { "epoch": 0.6730873399345655, "grad_norm": 2.0624921321868896, "learning_rate": 4.826341802092836e-06, "loss": 0.3013, "step": 31065 }, { "epoch": 0.6731956752540463, "grad_norm": 1.3221291303634644, "learning_rate": 4.823429546847808e-06, "loss": 0.2239, "step": 31070 }, { "epoch": 0.6733040105735272, "grad_norm": 1.845424771308899, "learning_rate": 4.820517891230916e-06, "loss": 0.2469, "step": 31075 }, { "epoch": 0.673412345893008, "grad_norm": 2.17106032371521, "learning_rate": 4.817606835579442e-06, "loss": 0.3096, "step": 31080 }, { "epoch": 0.6735206812124889, "grad_norm": 1.576368808746338, "learning_rate": 4.814696380230582e-06, "loss": 0.2558, "step": 31085 }, { "epoch": 0.6736290165319697, "grad_norm": 1.5305567979812622, "learning_rate": 4.811786525521471e-06, "loss": 0.2216, "step": 31090 }, { "epoch": 0.6737373518514506, "grad_norm": 1.3459919691085815, "learning_rate": 4.80887727178917e-06, "loss": 0.3434, "step": 31095 }, { "epoch": 0.6738456871709315, "grad_norm": 1.2433099746704102, "learning_rate": 4.8059686193706735e-06, "loss": 0.2845, "step": 31100 }, { "epoch": 0.6739540224904124, "grad_norm": 1.6864550113677979, "learning_rate": 4.8030605686029095e-06, "loss": 0.2343, "step": 31105 }, { "epoch": 0.6740623578098932, "grad_norm": 1.4509981870651245, "learning_rate": 4.800153119822725e-06, "loss": 0.1841, "step": 31110 }, { "epoch": 0.674170693129374, "grad_norm": 1.8219285011291504, "learning_rate": 4.797246273366911e-06, "loss": 0.2174, "step": 31115 }, { "epoch": 0.6742790284488549, "grad_norm": 1.5634162425994873, "learning_rate": 4.794340029572175e-06, "loss": 0.334, "step": 31120 }, { "epoch": 0.6743873637683357, "grad_norm": 0.6716442108154297, "learning_rate": 4.791434388775172e-06, "loss": 0.2107, "step": 31125 }, { "epoch": 0.6744956990878166, "grad_norm": 1.5014147758483887, "learning_rate": 4.788529351312464e-06, "loss": 0.2015, "step": 31130 }, { "epoch": 0.6746040344072974, "grad_norm": 1.688733696937561, "learning_rate": 4.785624917520568e-06, "loss": 0.2438, "step": 31135 }, { "epoch": 0.6747123697267783, "grad_norm": 2.908069610595703, "learning_rate": 4.78272108773591e-06, "loss": 0.2943, "step": 31140 }, { "epoch": 0.6748207050462591, "grad_norm": 1.280476689338684, "learning_rate": 4.779817862294863e-06, "loss": 0.242, "step": 31145 }, { "epoch": 0.6749290403657401, "grad_norm": 1.603572964668274, "learning_rate": 4.776915241533715e-06, "loss": 0.2453, "step": 31150 }, { "epoch": 0.6750373756852209, "grad_norm": 1.673244833946228, "learning_rate": 4.774013225788694e-06, "loss": 0.2477, "step": 31155 }, { "epoch": 0.6751457110047018, "grad_norm": 1.6793454885482788, "learning_rate": 4.771111815395959e-06, "loss": 0.229, "step": 31160 }, { "epoch": 0.6752540463241826, "grad_norm": 1.3147705793380737, "learning_rate": 4.768211010691588e-06, "loss": 0.3179, "step": 31165 }, { "epoch": 0.6753623816436635, "grad_norm": 1.799277901649475, "learning_rate": 4.765310812011602e-06, "loss": 0.1883, "step": 31170 }, { "epoch": 0.6754707169631443, "grad_norm": 2.0387213230133057, "learning_rate": 4.76241121969194e-06, "loss": 0.2435, "step": 31175 }, { "epoch": 0.6755790522826252, "grad_norm": 1.2733635902404785, "learning_rate": 4.759512234068483e-06, "loss": 0.1828, "step": 31180 }, { "epoch": 0.675687387602106, "grad_norm": 1.341472864151001, "learning_rate": 4.756613855477028e-06, "loss": 0.2572, "step": 31185 }, { "epoch": 0.6757957229215868, "grad_norm": 0.6649749279022217, "learning_rate": 4.753716084253315e-06, "loss": 0.1664, "step": 31190 }, { "epoch": 0.6759040582410678, "grad_norm": 1.0963882207870483, "learning_rate": 4.750818920733001e-06, "loss": 0.1982, "step": 31195 }, { "epoch": 0.6760123935605487, "grad_norm": 1.7184802293777466, "learning_rate": 4.74792236525168e-06, "loss": 0.2926, "step": 31200 }, { "epoch": 0.6761207288800295, "grad_norm": 1.084182620048523, "learning_rate": 4.7450264181448835e-06, "loss": 0.2086, "step": 31205 }, { "epoch": 0.6762290641995103, "grad_norm": 1.4555507898330688, "learning_rate": 4.742131079748052e-06, "loss": 0.2169, "step": 31210 }, { "epoch": 0.6763373995189912, "grad_norm": 1.3098334074020386, "learning_rate": 4.739236350396575e-06, "loss": 0.1857, "step": 31215 }, { "epoch": 0.676445734838472, "grad_norm": 1.4604192972183228, "learning_rate": 4.736342230425758e-06, "loss": 0.2198, "step": 31220 }, { "epoch": 0.6765540701579529, "grad_norm": 1.2337133884429932, "learning_rate": 4.733448720170848e-06, "loss": 0.2987, "step": 31225 }, { "epoch": 0.6766624054774337, "grad_norm": 1.263890266418457, "learning_rate": 4.730555819967007e-06, "loss": 0.2182, "step": 31230 }, { "epoch": 0.6767707407969146, "grad_norm": 1.9427528381347656, "learning_rate": 4.7276635301493405e-06, "loss": 0.2516, "step": 31235 }, { "epoch": 0.6768790761163954, "grad_norm": 1.2164278030395508, "learning_rate": 4.724771851052875e-06, "loss": 0.2674, "step": 31240 }, { "epoch": 0.6769874114358764, "grad_norm": 1.818631649017334, "learning_rate": 4.721880783012561e-06, "loss": 0.2345, "step": 31245 }, { "epoch": 0.6770957467553572, "grad_norm": 1.9658688306808472, "learning_rate": 4.718990326363298e-06, "loss": 0.1768, "step": 31250 }, { "epoch": 0.6772040820748381, "grad_norm": 1.339158296585083, "learning_rate": 4.716100481439894e-06, "loss": 0.2679, "step": 31255 }, { "epoch": 0.6773124173943189, "grad_norm": 2.341747283935547, "learning_rate": 4.7132112485771e-06, "loss": 0.214, "step": 31260 }, { "epoch": 0.6774207527137998, "grad_norm": 0.9990419745445251, "learning_rate": 4.710322628109582e-06, "loss": 0.2978, "step": 31265 }, { "epoch": 0.6775290880332806, "grad_norm": 1.4975638389587402, "learning_rate": 4.707434620371952e-06, "loss": 0.1989, "step": 31270 }, { "epoch": 0.6776374233527614, "grad_norm": 1.455000877380371, "learning_rate": 4.7045472256987405e-06, "loss": 0.3035, "step": 31275 }, { "epoch": 0.6777457586722423, "grad_norm": 1.5377769470214844, "learning_rate": 4.701660444424401e-06, "loss": 0.1545, "step": 31280 }, { "epoch": 0.6778540939917231, "grad_norm": 1.2788525819778442, "learning_rate": 4.698774276883334e-06, "loss": 0.2637, "step": 31285 }, { "epoch": 0.677962429311204, "grad_norm": 1.5245736837387085, "learning_rate": 4.695888723409851e-06, "loss": 0.2432, "step": 31290 }, { "epoch": 0.6780707646306849, "grad_norm": 1.818173885345459, "learning_rate": 4.693003784338205e-06, "loss": 0.2917, "step": 31295 }, { "epoch": 0.6781790999501658, "grad_norm": 1.2494704723358154, "learning_rate": 4.690119460002571e-06, "loss": 0.1928, "step": 31300 }, { "epoch": 0.6782874352696466, "grad_norm": 1.5821795463562012, "learning_rate": 4.687235750737059e-06, "loss": 0.2451, "step": 31305 }, { "epoch": 0.6783957705891275, "grad_norm": 1.6527718305587769, "learning_rate": 4.684352656875701e-06, "loss": 0.2992, "step": 31310 }, { "epoch": 0.6785041059086083, "grad_norm": 0.765148937702179, "learning_rate": 4.681470178752455e-06, "loss": 0.2058, "step": 31315 }, { "epoch": 0.6786124412280892, "grad_norm": 1.0887140035629272, "learning_rate": 4.67858831670122e-06, "loss": 0.1351, "step": 31320 }, { "epoch": 0.67872077654757, "grad_norm": 0.8627936840057373, "learning_rate": 4.675707071055812e-06, "loss": 0.2058, "step": 31325 }, { "epoch": 0.6788291118670509, "grad_norm": 0.9858233332633972, "learning_rate": 4.6728264421499845e-06, "loss": 0.2244, "step": 31330 }, { "epoch": 0.6789374471865317, "grad_norm": 1.2333818674087524, "learning_rate": 4.6699464303174095e-06, "loss": 0.2002, "step": 31335 }, { "epoch": 0.6790457825060127, "grad_norm": 1.3970564603805542, "learning_rate": 4.667067035891695e-06, "loss": 0.3847, "step": 31340 }, { "epoch": 0.6791541178254935, "grad_norm": 1.708756685256958, "learning_rate": 4.664188259206381e-06, "loss": 0.2933, "step": 31345 }, { "epoch": 0.6792624531449744, "grad_norm": 0.9346111416816711, "learning_rate": 4.661310100594925e-06, "loss": 0.3312, "step": 31350 }, { "epoch": 0.6793707884644552, "grad_norm": 1.148869276046753, "learning_rate": 4.6584325603907224e-06, "loss": 0.2198, "step": 31355 }, { "epoch": 0.679479123783936, "grad_norm": 1.4386757612228394, "learning_rate": 4.655555638927087e-06, "loss": 0.1793, "step": 31360 }, { "epoch": 0.6795874591034169, "grad_norm": 1.5112942457199097, "learning_rate": 4.6526793365372755e-06, "loss": 0.2243, "step": 31365 }, { "epoch": 0.6796957944228977, "grad_norm": 1.2445180416107178, "learning_rate": 4.6498036535544554e-06, "loss": 0.1646, "step": 31370 }, { "epoch": 0.6798041297423786, "grad_norm": 0.904621958732605, "learning_rate": 4.646928590311741e-06, "loss": 0.2787, "step": 31375 }, { "epoch": 0.6799124650618594, "grad_norm": 1.432712435722351, "learning_rate": 4.644054147142157e-06, "loss": 0.2863, "step": 31380 }, { "epoch": 0.6800208003813403, "grad_norm": 1.7012578248977661, "learning_rate": 4.64118032437867e-06, "loss": 0.2459, "step": 31385 }, { "epoch": 0.6801291357008212, "grad_norm": 1.6039156913757324, "learning_rate": 4.638307122354164e-06, "loss": 0.239, "step": 31390 }, { "epoch": 0.6802374710203021, "grad_norm": 1.1383893489837646, "learning_rate": 4.63543454140146e-06, "loss": 0.2436, "step": 31395 }, { "epoch": 0.6803458063397829, "grad_norm": 2.997454881668091, "learning_rate": 4.632562581853307e-06, "loss": 0.2464, "step": 31400 }, { "epoch": 0.6804541416592638, "grad_norm": 1.6218032836914062, "learning_rate": 4.629691244042371e-06, "loss": 0.2129, "step": 31405 }, { "epoch": 0.6805624769787446, "grad_norm": 1.2626177072525024, "learning_rate": 4.626820528301261e-06, "loss": 0.286, "step": 31410 }, { "epoch": 0.6806708122982255, "grad_norm": 1.6518464088439941, "learning_rate": 4.6239504349625e-06, "loss": 0.2257, "step": 31415 }, { "epoch": 0.6807791476177063, "grad_norm": 1.247946858406067, "learning_rate": 4.6210809643585496e-06, "loss": 0.1268, "step": 31420 }, { "epoch": 0.6808874829371871, "grad_norm": 2.4025723934173584, "learning_rate": 4.618212116821791e-06, "loss": 0.2631, "step": 31425 }, { "epoch": 0.680995818256668, "grad_norm": 1.6497071981430054, "learning_rate": 4.615343892684542e-06, "loss": 0.1692, "step": 31430 }, { "epoch": 0.6811041535761488, "grad_norm": 1.607444167137146, "learning_rate": 4.6124762922790375e-06, "loss": 0.3912, "step": 31435 }, { "epoch": 0.6812124888956298, "grad_norm": 2.0315704345703125, "learning_rate": 4.60960931593745e-06, "loss": 0.2696, "step": 31440 }, { "epoch": 0.6813208242151106, "grad_norm": 1.539250373840332, "learning_rate": 4.6067429639918785e-06, "loss": 0.2705, "step": 31445 }, { "epoch": 0.6814291595345915, "grad_norm": 0.8539365530014038, "learning_rate": 4.60387723677434e-06, "loss": 0.1982, "step": 31450 }, { "epoch": 0.6815374948540723, "grad_norm": 1.2261326313018799, "learning_rate": 4.601012134616793e-06, "loss": 0.2642, "step": 31455 }, { "epoch": 0.6816458301735532, "grad_norm": 1.3972349166870117, "learning_rate": 4.598147657851111e-06, "loss": 0.276, "step": 31460 }, { "epoch": 0.681754165493034, "grad_norm": 1.3333567380905151, "learning_rate": 4.595283806809105e-06, "loss": 0.2383, "step": 31465 }, { "epoch": 0.6818625008125149, "grad_norm": 1.1915760040283203, "learning_rate": 4.592420581822506e-06, "loss": 0.2455, "step": 31470 }, { "epoch": 0.6819708361319957, "grad_norm": 0.9299991130828857, "learning_rate": 4.589557983222979e-06, "loss": 0.1666, "step": 31475 }, { "epoch": 0.6820791714514766, "grad_norm": 1.2930103540420532, "learning_rate": 4.586696011342111e-06, "loss": 0.1696, "step": 31480 }, { "epoch": 0.6821875067709575, "grad_norm": 0.7523574829101562, "learning_rate": 4.583834666511412e-06, "loss": 0.3266, "step": 31485 }, { "epoch": 0.6822958420904384, "grad_norm": 1.0511236190795898, "learning_rate": 4.580973949062339e-06, "loss": 0.246, "step": 31490 }, { "epoch": 0.6824041774099192, "grad_norm": 2.170562982559204, "learning_rate": 4.578113859326255e-06, "loss": 0.2882, "step": 31495 }, { "epoch": 0.6825125127294001, "grad_norm": 1.4370561838150024, "learning_rate": 4.575254397634463e-06, "loss": 0.1368, "step": 31500 }, { "epoch": 0.6826208480488809, "grad_norm": 1.9196103811264038, "learning_rate": 4.572395564318184e-06, "loss": 0.1969, "step": 31505 }, { "epoch": 0.6827291833683617, "grad_norm": 0.8866597414016724, "learning_rate": 4.569537359708576e-06, "loss": 0.1444, "step": 31510 }, { "epoch": 0.6828375186878426, "grad_norm": 1.3233247995376587, "learning_rate": 4.566679784136717e-06, "loss": 0.1976, "step": 31515 }, { "epoch": 0.6829458540073234, "grad_norm": 1.1791430711746216, "learning_rate": 4.56382283793361e-06, "loss": 0.1728, "step": 31520 }, { "epoch": 0.6830541893268043, "grad_norm": 2.114253520965576, "learning_rate": 4.560966521430197e-06, "loss": 0.2336, "step": 31525 }, { "epoch": 0.6831625246462851, "grad_norm": 1.0249439477920532, "learning_rate": 4.5581108349573325e-06, "loss": 0.174, "step": 31530 }, { "epoch": 0.6832708599657661, "grad_norm": 1.6558966636657715, "learning_rate": 4.555255778845807e-06, "loss": 0.3161, "step": 31535 }, { "epoch": 0.6833791952852469, "grad_norm": 1.1738401651382446, "learning_rate": 4.552401353426339e-06, "loss": 0.2425, "step": 31540 }, { "epoch": 0.6834875306047278, "grad_norm": 2.5411274433135986, "learning_rate": 4.549547559029571e-06, "loss": 0.2816, "step": 31545 }, { "epoch": 0.6835958659242086, "grad_norm": 1.5294345617294312, "learning_rate": 4.546694395986072e-06, "loss": 0.2859, "step": 31550 }, { "epoch": 0.6837042012436895, "grad_norm": 1.1770199537277222, "learning_rate": 4.543841864626332e-06, "loss": 0.1952, "step": 31555 }, { "epoch": 0.6838125365631703, "grad_norm": 1.5529398918151855, "learning_rate": 4.540989965280784e-06, "loss": 0.1677, "step": 31560 }, { "epoch": 0.6839208718826512, "grad_norm": 1.7861158847808838, "learning_rate": 4.538138698279767e-06, "loss": 0.2266, "step": 31565 }, { "epoch": 0.684029207202132, "grad_norm": 1.2715282440185547, "learning_rate": 4.535288063953568e-06, "loss": 0.2384, "step": 31570 }, { "epoch": 0.6841375425216129, "grad_norm": 1.6379156112670898, "learning_rate": 4.5324380626323815e-06, "loss": 0.1469, "step": 31575 }, { "epoch": 0.6842458778410938, "grad_norm": 1.6500931978225708, "learning_rate": 4.529588694646342e-06, "loss": 0.2644, "step": 31580 }, { "epoch": 0.6843542131605747, "grad_norm": 1.8589454889297485, "learning_rate": 4.526739960325508e-06, "loss": 0.2879, "step": 31585 }, { "epoch": 0.6844625484800555, "grad_norm": 2.3124146461486816, "learning_rate": 4.523891859999857e-06, "loss": 0.3149, "step": 31590 }, { "epoch": 0.6845708837995363, "grad_norm": 1.3222829103469849, "learning_rate": 4.521044393999306e-06, "loss": 0.2742, "step": 31595 }, { "epoch": 0.6846792191190172, "grad_norm": 1.1304702758789062, "learning_rate": 4.518197562653682e-06, "loss": 0.17, "step": 31600 }, { "epoch": 0.684787554438498, "grad_norm": 1.3000859022140503, "learning_rate": 4.515351366292758e-06, "loss": 0.4259, "step": 31605 }, { "epoch": 0.6848958897579789, "grad_norm": 1.761191487312317, "learning_rate": 4.512505805246215e-06, "loss": 0.2693, "step": 31610 }, { "epoch": 0.6850042250774597, "grad_norm": 2.056468963623047, "learning_rate": 4.509660879843674e-06, "loss": 0.3264, "step": 31615 }, { "epoch": 0.6851125603969406, "grad_norm": 1.9391040802001953, "learning_rate": 4.506816590414671e-06, "loss": 0.2548, "step": 31620 }, { "epoch": 0.6852208957164214, "grad_norm": 1.1104910373687744, "learning_rate": 4.503972937288683e-06, "loss": 0.2189, "step": 31625 }, { "epoch": 0.6853292310359024, "grad_norm": 1.5750072002410889, "learning_rate": 4.5011299207950955e-06, "loss": 0.2057, "step": 31630 }, { "epoch": 0.6854375663553832, "grad_norm": 0.7635299563407898, "learning_rate": 4.498287541263234e-06, "loss": 0.2965, "step": 31635 }, { "epoch": 0.6855459016748641, "grad_norm": 1.041042447090149, "learning_rate": 4.495445799022349e-06, "loss": 0.1602, "step": 31640 }, { "epoch": 0.6856542369943449, "grad_norm": 1.6629860401153564, "learning_rate": 4.492604694401606e-06, "loss": 0.183, "step": 31645 }, { "epoch": 0.6857625723138258, "grad_norm": 1.518666386604309, "learning_rate": 4.489764227730112e-06, "loss": 0.2334, "step": 31650 }, { "epoch": 0.6858709076333066, "grad_norm": 1.206847906112671, "learning_rate": 4.486924399336885e-06, "loss": 0.2737, "step": 31655 }, { "epoch": 0.6859792429527874, "grad_norm": 1.7811224460601807, "learning_rate": 4.484085209550884e-06, "loss": 0.205, "step": 31660 }, { "epoch": 0.6860875782722683, "grad_norm": 1.48939847946167, "learning_rate": 4.48124665870098e-06, "loss": 0.1895, "step": 31665 }, { "epoch": 0.6861959135917491, "grad_norm": 1.8367258310317993, "learning_rate": 4.478408747115983e-06, "loss": 0.2678, "step": 31670 }, { "epoch": 0.68630424891123, "grad_norm": 1.734328031539917, "learning_rate": 4.475571475124615e-06, "loss": 0.2646, "step": 31675 }, { "epoch": 0.6864125842307109, "grad_norm": 1.2695655822753906, "learning_rate": 4.472734843055536e-06, "loss": 0.2488, "step": 31680 }, { "epoch": 0.6865209195501918, "grad_norm": 1.631389856338501, "learning_rate": 4.469898851237332e-06, "loss": 0.2287, "step": 31685 }, { "epoch": 0.6866292548696726, "grad_norm": 1.232037901878357, "learning_rate": 4.4670634999985e-06, "loss": 0.2275, "step": 31690 }, { "epoch": 0.6867375901891535, "grad_norm": 1.6829462051391602, "learning_rate": 4.464228789667482e-06, "loss": 0.1941, "step": 31695 }, { "epoch": 0.6868459255086343, "grad_norm": 1.9197509288787842, "learning_rate": 4.4613947205726295e-06, "loss": 0.2385, "step": 31700 }, { "epoch": 0.6869542608281152, "grad_norm": 1.5168468952178955, "learning_rate": 4.458561293042234e-06, "loss": 0.2733, "step": 31705 }, { "epoch": 0.687062596147596, "grad_norm": 1.6693732738494873, "learning_rate": 4.455728507404499e-06, "loss": 0.3447, "step": 31710 }, { "epoch": 0.6871709314670769, "grad_norm": 0.955771803855896, "learning_rate": 4.452896363987566e-06, "loss": 0.1929, "step": 31715 }, { "epoch": 0.6872792667865577, "grad_norm": 2.4277701377868652, "learning_rate": 4.4500648631194936e-06, "loss": 0.2003, "step": 31720 }, { "epoch": 0.6873876021060387, "grad_norm": 2.0145795345306396, "learning_rate": 4.447234005128261e-06, "loss": 0.2977, "step": 31725 }, { "epoch": 0.6874959374255195, "grad_norm": 1.636685848236084, "learning_rate": 4.444403790341797e-06, "loss": 0.2274, "step": 31730 }, { "epoch": 0.6876042727450004, "grad_norm": 1.5304690599441528, "learning_rate": 4.441574219087926e-06, "loss": 0.2239, "step": 31735 }, { "epoch": 0.6877126080644812, "grad_norm": 1.585350751876831, "learning_rate": 4.438745291694422e-06, "loss": 0.2477, "step": 31740 }, { "epoch": 0.687820943383962, "grad_norm": 1.831785798072815, "learning_rate": 4.435917008488963e-06, "loss": 0.3038, "step": 31745 }, { "epoch": 0.6879292787034429, "grad_norm": 1.7181516885757446, "learning_rate": 4.433089369799173e-06, "loss": 0.2122, "step": 31750 }, { "epoch": 0.6880376140229237, "grad_norm": 2.0101799964904785, "learning_rate": 4.430262375952588e-06, "loss": 0.2322, "step": 31755 }, { "epoch": 0.6881459493424046, "grad_norm": 1.093419075012207, "learning_rate": 4.427436027276667e-06, "loss": 0.2818, "step": 31760 }, { "epoch": 0.6882542846618854, "grad_norm": 1.463078260421753, "learning_rate": 4.4246103240988095e-06, "loss": 0.2532, "step": 31765 }, { "epoch": 0.6883626199813663, "grad_norm": 1.7133963108062744, "learning_rate": 4.421785266746323e-06, "loss": 0.2114, "step": 31770 }, { "epoch": 0.6884709553008472, "grad_norm": 1.5569798946380615, "learning_rate": 4.418960855546451e-06, "loss": 0.3089, "step": 31775 }, { "epoch": 0.6885792906203281, "grad_norm": 2.2292158603668213, "learning_rate": 4.4161370908263616e-06, "loss": 0.2271, "step": 31780 }, { "epoch": 0.6886876259398089, "grad_norm": 0.7346052527427673, "learning_rate": 4.413313972913146e-06, "loss": 0.2524, "step": 31785 }, { "epoch": 0.6887959612592898, "grad_norm": 2.0554094314575195, "learning_rate": 4.410491502133819e-06, "loss": 0.2363, "step": 31790 }, { "epoch": 0.6889042965787706, "grad_norm": 1.0800713300704956, "learning_rate": 4.407669678815316e-06, "loss": 0.2909, "step": 31795 }, { "epoch": 0.6890126318982515, "grad_norm": 2.823925018310547, "learning_rate": 4.4048485032845125e-06, "loss": 0.1923, "step": 31800 }, { "epoch": 0.6891209672177323, "grad_norm": 2.076921224594116, "learning_rate": 4.402027975868191e-06, "loss": 0.2348, "step": 31805 }, { "epoch": 0.6892293025372132, "grad_norm": 1.819234013557434, "learning_rate": 4.3992080968930725e-06, "loss": 0.2804, "step": 31810 }, { "epoch": 0.689337637856694, "grad_norm": 1.307892084121704, "learning_rate": 4.396388866685794e-06, "loss": 0.2833, "step": 31815 }, { "epoch": 0.6894459731761748, "grad_norm": 1.9935420751571655, "learning_rate": 4.3935702855729266e-06, "loss": 0.2491, "step": 31820 }, { "epoch": 0.6895543084956558, "grad_norm": 1.0651830434799194, "learning_rate": 4.3907523538809505e-06, "loss": 0.2058, "step": 31825 }, { "epoch": 0.6896626438151366, "grad_norm": 1.7001234292984009, "learning_rate": 4.387935071936295e-06, "loss": 0.2264, "step": 31830 }, { "epoch": 0.6897709791346175, "grad_norm": 1.6740455627441406, "learning_rate": 4.3851184400652916e-06, "loss": 0.2236, "step": 31835 }, { "epoch": 0.6898793144540983, "grad_norm": 1.2949533462524414, "learning_rate": 4.382302458594203e-06, "loss": 0.2098, "step": 31840 }, { "epoch": 0.6899876497735792, "grad_norm": 1.8798606395721436, "learning_rate": 4.379487127849225e-06, "loss": 0.1974, "step": 31845 }, { "epoch": 0.69009598509306, "grad_norm": 2.0662038326263428, "learning_rate": 4.376672448156465e-06, "loss": 0.2697, "step": 31850 }, { "epoch": 0.6902043204125409, "grad_norm": 1.8428481817245483, "learning_rate": 4.373858419841967e-06, "loss": 0.3114, "step": 31855 }, { "epoch": 0.6903126557320217, "grad_norm": 1.3725701570510864, "learning_rate": 4.371045043231688e-06, "loss": 0.2584, "step": 31860 }, { "epoch": 0.6904209910515026, "grad_norm": 1.6478512287139893, "learning_rate": 4.368232318651523e-06, "loss": 0.2755, "step": 31865 }, { "epoch": 0.6905293263709835, "grad_norm": 0.8180378079414368, "learning_rate": 4.365420246427276e-06, "loss": 0.1812, "step": 31870 }, { "epoch": 0.6906376616904644, "grad_norm": 1.4100596904754639, "learning_rate": 4.362608826884688e-06, "loss": 0.2102, "step": 31875 }, { "epoch": 0.6907459970099452, "grad_norm": 0.624957263469696, "learning_rate": 4.359798060349423e-06, "loss": 0.1909, "step": 31880 }, { "epoch": 0.6908543323294261, "grad_norm": 1.5278087854385376, "learning_rate": 4.356987947147059e-06, "loss": 0.2249, "step": 31885 }, { "epoch": 0.6909626676489069, "grad_norm": 1.3938531875610352, "learning_rate": 4.354178487603111e-06, "loss": 0.3752, "step": 31890 }, { "epoch": 0.6910710029683877, "grad_norm": 1.156359076499939, "learning_rate": 4.351369682043009e-06, "loss": 0.2721, "step": 31895 }, { "epoch": 0.6911793382878686, "grad_norm": 1.7159658670425415, "learning_rate": 4.348561530792116e-06, "loss": 0.2236, "step": 31900 }, { "epoch": 0.6912876736073494, "grad_norm": 1.187605619430542, "learning_rate": 4.3457540341757075e-06, "loss": 0.2216, "step": 31905 }, { "epoch": 0.6913960089268303, "grad_norm": 1.9037384986877441, "learning_rate": 4.342947192518997e-06, "loss": 0.2346, "step": 31910 }, { "epoch": 0.6915043442463111, "grad_norm": 2.366568088531494, "learning_rate": 4.3401410061471085e-06, "loss": 0.1494, "step": 31915 }, { "epoch": 0.6916126795657921, "grad_norm": 1.011107087135315, "learning_rate": 4.3373354753850985e-06, "loss": 0.2215, "step": 31920 }, { "epoch": 0.6917210148852729, "grad_norm": 1.7033190727233887, "learning_rate": 4.334530600557951e-06, "loss": 0.2586, "step": 31925 }, { "epoch": 0.6918293502047538, "grad_norm": 1.6549997329711914, "learning_rate": 4.331726381990562e-06, "loss": 0.2022, "step": 31930 }, { "epoch": 0.6919376855242346, "grad_norm": 1.527032494544983, "learning_rate": 4.3289228200077634e-06, "loss": 0.2802, "step": 31935 }, { "epoch": 0.6920460208437155, "grad_norm": 2.075118064880371, "learning_rate": 4.3261199149343e-06, "loss": 0.2931, "step": 31940 }, { "epoch": 0.6921543561631963, "grad_norm": 1.6816952228546143, "learning_rate": 4.323317667094854e-06, "loss": 0.321, "step": 31945 }, { "epoch": 0.6922626914826772, "grad_norm": 1.4971034526824951, "learning_rate": 4.320516076814016e-06, "loss": 0.312, "step": 31950 }, { "epoch": 0.692371026802158, "grad_norm": 1.7386494874954224, "learning_rate": 4.317715144416314e-06, "loss": 0.2392, "step": 31955 }, { "epoch": 0.6924793621216389, "grad_norm": 1.3770071268081665, "learning_rate": 4.31491487022619e-06, "loss": 0.3137, "step": 31960 }, { "epoch": 0.6925876974411197, "grad_norm": 1.1252518892288208, "learning_rate": 4.312115254568019e-06, "loss": 0.2125, "step": 31965 }, { "epoch": 0.6926960327606007, "grad_norm": 1.6942763328552246, "learning_rate": 4.309316297766088e-06, "loss": 0.2827, "step": 31970 }, { "epoch": 0.6928043680800815, "grad_norm": 1.137378454208374, "learning_rate": 4.306518000144616e-06, "loss": 0.2804, "step": 31975 }, { "epoch": 0.6929127033995623, "grad_norm": 1.8814555406570435, "learning_rate": 4.3037203620277504e-06, "loss": 0.2598, "step": 31980 }, { "epoch": 0.6930210387190432, "grad_norm": 0.9755052328109741, "learning_rate": 4.300923383739546e-06, "loss": 0.2371, "step": 31985 }, { "epoch": 0.693129374038524, "grad_norm": 1.5264334678649902, "learning_rate": 4.298127065603999e-06, "loss": 0.2069, "step": 31990 }, { "epoch": 0.6932377093580049, "grad_norm": 1.6489429473876953, "learning_rate": 4.295331407945014e-06, "loss": 0.2157, "step": 31995 }, { "epoch": 0.6933460446774857, "grad_norm": 1.41226327419281, "learning_rate": 4.292536411086433e-06, "loss": 0.2245, "step": 32000 }, { "epoch": 0.6934543799969666, "grad_norm": 1.8285331726074219, "learning_rate": 4.289742075352012e-06, "loss": 0.269, "step": 32005 }, { "epoch": 0.6935627153164474, "grad_norm": 1.7636511325836182, "learning_rate": 4.2869484010654275e-06, "loss": 0.2447, "step": 32010 }, { "epoch": 0.6936710506359284, "grad_norm": 1.3948639631271362, "learning_rate": 4.2841553885502885e-06, "loss": 0.1966, "step": 32015 }, { "epoch": 0.6937793859554092, "grad_norm": 0.8250443935394287, "learning_rate": 4.281363038130126e-06, "loss": 0.2509, "step": 32020 }, { "epoch": 0.6938877212748901, "grad_norm": 0.927738606929779, "learning_rate": 4.278571350128392e-06, "loss": 0.2537, "step": 32025 }, { "epoch": 0.6939960565943709, "grad_norm": 1.4040441513061523, "learning_rate": 4.275780324868458e-06, "loss": 0.2598, "step": 32030 }, { "epoch": 0.6941043919138518, "grad_norm": 1.0914863348007202, "learning_rate": 4.272989962673627e-06, "loss": 0.3057, "step": 32035 }, { "epoch": 0.6942127272333326, "grad_norm": 1.0236859321594238, "learning_rate": 4.2702002638671195e-06, "loss": 0.2049, "step": 32040 }, { "epoch": 0.6943210625528135, "grad_norm": 1.5631349086761475, "learning_rate": 4.267411228772074e-06, "loss": 0.3346, "step": 32045 }, { "epoch": 0.6944293978722943, "grad_norm": 1.1770225763320923, "learning_rate": 4.264622857711569e-06, "loss": 0.2187, "step": 32050 }, { "epoch": 0.6945377331917751, "grad_norm": 1.972417950630188, "learning_rate": 4.261835151008585e-06, "loss": 0.3046, "step": 32055 }, { "epoch": 0.694646068511256, "grad_norm": 1.320526123046875, "learning_rate": 4.2590481089860444e-06, "loss": 0.1777, "step": 32060 }, { "epoch": 0.6947544038307369, "grad_norm": 1.4017333984375, "learning_rate": 4.256261731966775e-06, "loss": 0.256, "step": 32065 }, { "epoch": 0.6948627391502178, "grad_norm": 2.236304759979248, "learning_rate": 4.253476020273549e-06, "loss": 0.3227, "step": 32070 }, { "epoch": 0.6949710744696986, "grad_norm": 1.3404364585876465, "learning_rate": 4.250690974229044e-06, "loss": 0.2575, "step": 32075 }, { "epoch": 0.6950794097891795, "grad_norm": 1.4039064645767212, "learning_rate": 4.2479065941558604e-06, "loss": 0.232, "step": 32080 }, { "epoch": 0.6951877451086603, "grad_norm": 1.3423010110855103, "learning_rate": 4.245122880376535e-06, "loss": 0.2353, "step": 32085 }, { "epoch": 0.6952960804281412, "grad_norm": 1.1847503185272217, "learning_rate": 4.242339833213513e-06, "loss": 0.2076, "step": 32090 }, { "epoch": 0.695404415747622, "grad_norm": 1.313087821006775, "learning_rate": 4.239557452989175e-06, "loss": 0.2315, "step": 32095 }, { "epoch": 0.6955127510671029, "grad_norm": 1.1370009183883667, "learning_rate": 4.236775740025811e-06, "loss": 0.1985, "step": 32100 }, { "epoch": 0.6956210863865837, "grad_norm": 0.958013653755188, "learning_rate": 4.2339946946456475e-06, "loss": 0.2253, "step": 32105 }, { "epoch": 0.6957294217060647, "grad_norm": 2.320476770401001, "learning_rate": 4.231214317170821e-06, "loss": 0.1933, "step": 32110 }, { "epoch": 0.6958377570255455, "grad_norm": 1.8723095655441284, "learning_rate": 4.2284346079234e-06, "loss": 0.3168, "step": 32115 }, { "epoch": 0.6959460923450264, "grad_norm": 1.2262191772460938, "learning_rate": 4.225655567225374e-06, "loss": 0.2122, "step": 32120 }, { "epoch": 0.6960544276645072, "grad_norm": 2.006730556488037, "learning_rate": 4.222877195398648e-06, "loss": 0.2704, "step": 32125 }, { "epoch": 0.696162762983988, "grad_norm": 1.0285104513168335, "learning_rate": 4.22009949276506e-06, "loss": 0.1677, "step": 32130 }, { "epoch": 0.6962710983034689, "grad_norm": 1.6260309219360352, "learning_rate": 4.217322459646361e-06, "loss": 0.1905, "step": 32135 }, { "epoch": 0.6963794336229497, "grad_norm": 1.8996697664260864, "learning_rate": 4.214546096364234e-06, "loss": 0.3275, "step": 32140 }, { "epoch": 0.6964877689424306, "grad_norm": 3.7658066749572754, "learning_rate": 4.211770403240271e-06, "loss": 0.2578, "step": 32145 }, { "epoch": 0.6965961042619114, "grad_norm": 1.4738038778305054, "learning_rate": 4.2089953805960025e-06, "loss": 0.2212, "step": 32150 }, { "epoch": 0.6967044395813923, "grad_norm": 0.45457807183265686, "learning_rate": 4.206221028752867e-06, "loss": 0.2505, "step": 32155 }, { "epoch": 0.6968127749008732, "grad_norm": 1.7208404541015625, "learning_rate": 4.203447348032234e-06, "loss": 0.2458, "step": 32160 }, { "epoch": 0.6969211102203541, "grad_norm": 2.1016242504119873, "learning_rate": 4.2006743387553985e-06, "loss": 0.2079, "step": 32165 }, { "epoch": 0.6970294455398349, "grad_norm": 1.428452968597412, "learning_rate": 4.197902001243561e-06, "loss": 0.2572, "step": 32170 }, { "epoch": 0.6971377808593158, "grad_norm": 1.5683649778366089, "learning_rate": 4.1951303358178665e-06, "loss": 0.1655, "step": 32175 }, { "epoch": 0.6972461161787966, "grad_norm": 1.9912409782409668, "learning_rate": 4.192359342799361e-06, "loss": 0.3358, "step": 32180 }, { "epoch": 0.6973544514982775, "grad_norm": 1.4947282075881958, "learning_rate": 4.189589022509032e-06, "loss": 0.2871, "step": 32185 }, { "epoch": 0.6974627868177583, "grad_norm": 1.0764062404632568, "learning_rate": 4.186819375267771e-06, "loss": 0.2434, "step": 32190 }, { "epoch": 0.6975711221372392, "grad_norm": 1.8096129894256592, "learning_rate": 4.184050401396407e-06, "loss": 0.3078, "step": 32195 }, { "epoch": 0.69767945745672, "grad_norm": 1.5442126989364624, "learning_rate": 4.181282101215678e-06, "loss": 0.1353, "step": 32200 }, { "epoch": 0.6977877927762008, "grad_norm": 1.8780046701431274, "learning_rate": 4.178514475046256e-06, "loss": 0.2029, "step": 32205 }, { "epoch": 0.6978961280956818, "grad_norm": 1.1968042850494385, "learning_rate": 4.175747523208723e-06, "loss": 0.2976, "step": 32210 }, { "epoch": 0.6980044634151626, "grad_norm": 0.8299019932746887, "learning_rate": 4.172981246023592e-06, "loss": 0.2174, "step": 32215 }, { "epoch": 0.6981127987346435, "grad_norm": 1.2883594036102295, "learning_rate": 4.170215643811299e-06, "loss": 0.2558, "step": 32220 }, { "epoch": 0.6982211340541243, "grad_norm": 0.14868655800819397, "learning_rate": 4.16745071689219e-06, "loss": 0.2361, "step": 32225 }, { "epoch": 0.6983294693736052, "grad_norm": 1.4376949071884155, "learning_rate": 4.164686465586546e-06, "loss": 0.2858, "step": 32230 }, { "epoch": 0.698437804693086, "grad_norm": 2.0646920204162598, "learning_rate": 4.16192289021456e-06, "loss": 0.2365, "step": 32235 }, { "epoch": 0.6985461400125669, "grad_norm": 1.4991703033447266, "learning_rate": 4.159159991096355e-06, "loss": 0.2716, "step": 32240 }, { "epoch": 0.6986544753320477, "grad_norm": 1.4473557472229004, "learning_rate": 4.156397768551971e-06, "loss": 0.1859, "step": 32245 }, { "epoch": 0.6987628106515286, "grad_norm": 1.0830503702163696, "learning_rate": 4.153636222901364e-06, "loss": 0.2398, "step": 32250 }, { "epoch": 0.6988711459710095, "grad_norm": 1.3010209798812866, "learning_rate": 4.150875354464421e-06, "loss": 0.1728, "step": 32255 }, { "epoch": 0.6989794812904904, "grad_norm": 1.1985191106796265, "learning_rate": 4.1481151635609495e-06, "loss": 0.21, "step": 32260 }, { "epoch": 0.6990878166099712, "grad_norm": 1.9428786039352417, "learning_rate": 4.145355650510679e-06, "loss": 0.2053, "step": 32265 }, { "epoch": 0.6991961519294521, "grad_norm": 1.272315263748169, "learning_rate": 4.1425968156332485e-06, "loss": 0.1632, "step": 32270 }, { "epoch": 0.6993044872489329, "grad_norm": 1.0924185514450073, "learning_rate": 4.1398386592482386e-06, "loss": 0.274, "step": 32275 }, { "epoch": 0.6994128225684138, "grad_norm": 1.3311012983322144, "learning_rate": 4.1370811816751335e-06, "loss": 0.3319, "step": 32280 }, { "epoch": 0.6995211578878946, "grad_norm": 1.1376316547393799, "learning_rate": 4.134324383233344e-06, "loss": 0.2411, "step": 32285 }, { "epoch": 0.6996294932073754, "grad_norm": 1.5123491287231445, "learning_rate": 4.1315682642422095e-06, "loss": 0.2467, "step": 32290 }, { "epoch": 0.6997378285268563, "grad_norm": 1.3738032579421997, "learning_rate": 4.128812825020981e-06, "loss": 0.236, "step": 32295 }, { "epoch": 0.6998461638463371, "grad_norm": 2.043184280395508, "learning_rate": 4.126058065888837e-06, "loss": 0.3498, "step": 32300 }, { "epoch": 0.6999544991658181, "grad_norm": 1.8114968538284302, "learning_rate": 4.123303987164869e-06, "loss": 0.2368, "step": 32305 }, { "epoch": 0.7000628344852989, "grad_norm": 1.3093560934066772, "learning_rate": 4.120550589168108e-06, "loss": 0.2523, "step": 32310 }, { "epoch": 0.7001711698047798, "grad_norm": 1.3793439865112305, "learning_rate": 4.117797872217488e-06, "loss": 0.2888, "step": 32315 }, { "epoch": 0.7002795051242606, "grad_norm": 1.5829983949661255, "learning_rate": 4.115045836631865e-06, "loss": 0.1891, "step": 32320 }, { "epoch": 0.7003878404437415, "grad_norm": 1.7471634149551392, "learning_rate": 4.1122944827300286e-06, "loss": 0.1532, "step": 32325 }, { "epoch": 0.7004961757632223, "grad_norm": 1.4210535287857056, "learning_rate": 4.109543810830675e-06, "loss": 0.3122, "step": 32330 }, { "epoch": 0.7006045110827032, "grad_norm": 1.7088650465011597, "learning_rate": 4.106793821252435e-06, "loss": 0.2731, "step": 32335 }, { "epoch": 0.700712846402184, "grad_norm": 1.5383028984069824, "learning_rate": 4.104044514313847e-06, "loss": 0.2867, "step": 32340 }, { "epoch": 0.7008211817216649, "grad_norm": 1.593345284461975, "learning_rate": 4.1012958903333855e-06, "loss": 0.1893, "step": 32345 }, { "epoch": 0.7009295170411457, "grad_norm": 1.094819188117981, "learning_rate": 4.098547949629428e-06, "loss": 0.1674, "step": 32350 }, { "epoch": 0.7010378523606267, "grad_norm": 1.9960222244262695, "learning_rate": 4.095800692520287e-06, "loss": 0.2996, "step": 32355 }, { "epoch": 0.7011461876801075, "grad_norm": 2.262162685394287, "learning_rate": 4.093054119324195e-06, "loss": 0.282, "step": 32360 }, { "epoch": 0.7012545229995883, "grad_norm": 1.8114511966705322, "learning_rate": 4.090308230359292e-06, "loss": 0.1626, "step": 32365 }, { "epoch": 0.7013628583190692, "grad_norm": 0.7878893613815308, "learning_rate": 4.087563025943658e-06, "loss": 0.2074, "step": 32370 }, { "epoch": 0.70147119363855, "grad_norm": 1.704612135887146, "learning_rate": 4.084818506395276e-06, "loss": 0.209, "step": 32375 }, { "epoch": 0.7015795289580309, "grad_norm": 1.3598836660385132, "learning_rate": 4.0820746720320635e-06, "loss": 0.2245, "step": 32380 }, { "epoch": 0.7016878642775117, "grad_norm": 1.9456690549850464, "learning_rate": 4.079331523171845e-06, "loss": 0.2626, "step": 32385 }, { "epoch": 0.7017961995969926, "grad_norm": 1.0952228307724, "learning_rate": 4.076589060132384e-06, "loss": 0.3015, "step": 32390 }, { "epoch": 0.7019045349164734, "grad_norm": 1.7882487773895264, "learning_rate": 4.073847283231343e-06, "loss": 0.2314, "step": 32395 }, { "epoch": 0.7020128702359544, "grad_norm": 1.5925095081329346, "learning_rate": 4.0711061927863205e-06, "loss": 0.2332, "step": 32400 }, { "epoch": 0.7021212055554352, "grad_norm": 1.2908929586410522, "learning_rate": 4.068365789114834e-06, "loss": 0.2148, "step": 32405 }, { "epoch": 0.7022295408749161, "grad_norm": 1.300991415977478, "learning_rate": 4.065626072534311e-06, "loss": 0.2299, "step": 32410 }, { "epoch": 0.7023378761943969, "grad_norm": 1.1896989345550537, "learning_rate": 4.062887043362116e-06, "loss": 0.2946, "step": 32415 }, { "epoch": 0.7024462115138778, "grad_norm": 1.0259708166122437, "learning_rate": 4.060148701915514e-06, "loss": 0.1826, "step": 32420 }, { "epoch": 0.7025545468333586, "grad_norm": 1.3015061616897583, "learning_rate": 4.057411048511709e-06, "loss": 0.1725, "step": 32425 }, { "epoch": 0.7026628821528395, "grad_norm": 1.5652498006820679, "learning_rate": 4.0546740834678125e-06, "loss": 0.1125, "step": 32430 }, { "epoch": 0.7027712174723203, "grad_norm": 1.136394739151001, "learning_rate": 4.051937807100864e-06, "loss": 0.2618, "step": 32435 }, { "epoch": 0.7028795527918011, "grad_norm": 1.7085932493209839, "learning_rate": 4.049202219727815e-06, "loss": 0.143, "step": 32440 }, { "epoch": 0.702987888111282, "grad_norm": 1.0306082963943481, "learning_rate": 4.0464673216655516e-06, "loss": 0.2565, "step": 32445 }, { "epoch": 0.703096223430763, "grad_norm": 1.9310942888259888, "learning_rate": 4.04373311323086e-06, "loss": 0.318, "step": 32450 }, { "epoch": 0.7032045587502438, "grad_norm": 1.5315052270889282, "learning_rate": 4.040999594740463e-06, "loss": 0.2683, "step": 32455 }, { "epoch": 0.7033128940697246, "grad_norm": 1.6319773197174072, "learning_rate": 4.038266766511e-06, "loss": 0.2208, "step": 32460 }, { "epoch": 0.7034212293892055, "grad_norm": 0.5930204391479492, "learning_rate": 4.035534628859021e-06, "loss": 0.1862, "step": 32465 }, { "epoch": 0.7035295647086863, "grad_norm": 1.8852993249893188, "learning_rate": 4.0328031821010126e-06, "loss": 0.297, "step": 32470 }, { "epoch": 0.7036379000281672, "grad_norm": 1.940508246421814, "learning_rate": 4.030072426553363e-06, "loss": 0.2421, "step": 32475 }, { "epoch": 0.703746235347648, "grad_norm": 1.310131311416626, "learning_rate": 4.027342362532396e-06, "loss": 0.2259, "step": 32480 }, { "epoch": 0.7038545706671289, "grad_norm": 1.471665382385254, "learning_rate": 4.024612990354347e-06, "loss": 0.2622, "step": 32485 }, { "epoch": 0.7039629059866097, "grad_norm": 1.4191797971725464, "learning_rate": 4.021884310335368e-06, "loss": 0.2057, "step": 32490 }, { "epoch": 0.7040712413060907, "grad_norm": 1.551859736442566, "learning_rate": 4.01915632279154e-06, "loss": 0.1718, "step": 32495 }, { "epoch": 0.7041795766255715, "grad_norm": 1.4693994522094727, "learning_rate": 4.016429028038858e-06, "loss": 0.246, "step": 32500 }, { "epoch": 0.7042879119450524, "grad_norm": 1.7600572109222412, "learning_rate": 4.013702426393242e-06, "loss": 0.1785, "step": 32505 }, { "epoch": 0.7043962472645332, "grad_norm": 1.626952886581421, "learning_rate": 4.010976518170523e-06, "loss": 0.1931, "step": 32510 }, { "epoch": 0.704504582584014, "grad_norm": 1.1606754064559937, "learning_rate": 4.008251303686462e-06, "loss": 0.2043, "step": 32515 }, { "epoch": 0.7046129179034949, "grad_norm": 1.9316539764404297, "learning_rate": 4.005526783256731e-06, "loss": 0.1781, "step": 32520 }, { "epoch": 0.7047212532229757, "grad_norm": 1.6530368328094482, "learning_rate": 4.002802957196922e-06, "loss": 0.2767, "step": 32525 }, { "epoch": 0.7048295885424566, "grad_norm": 1.6191197633743286, "learning_rate": 4.000079825822556e-06, "loss": 0.2151, "step": 32530 }, { "epoch": 0.7049379238619374, "grad_norm": 1.1945923566818237, "learning_rate": 3.997357389449059e-06, "loss": 0.2261, "step": 32535 }, { "epoch": 0.7050462591814183, "grad_norm": 1.0435994863510132, "learning_rate": 3.994635648391792e-06, "loss": 0.3403, "step": 32540 }, { "epoch": 0.7051545945008992, "grad_norm": 1.4820661544799805, "learning_rate": 3.991914602966019e-06, "loss": 0.2863, "step": 32545 }, { "epoch": 0.7052629298203801, "grad_norm": 1.049131155014038, "learning_rate": 3.989194253486944e-06, "loss": 0.2531, "step": 32550 }, { "epoch": 0.7053712651398609, "grad_norm": 1.8766525983810425, "learning_rate": 3.986474600269674e-06, "loss": 0.3285, "step": 32555 }, { "epoch": 0.7054796004593418, "grad_norm": 1.5337674617767334, "learning_rate": 3.9837556436292345e-06, "loss": 0.3077, "step": 32560 }, { "epoch": 0.7055879357788226, "grad_norm": 1.5196319818496704, "learning_rate": 3.981037383880585e-06, "loss": 0.226, "step": 32565 }, { "epoch": 0.7056962710983035, "grad_norm": 1.0809870958328247, "learning_rate": 3.978319821338586e-06, "loss": 0.2264, "step": 32570 }, { "epoch": 0.7058046064177843, "grad_norm": 1.6931352615356445, "learning_rate": 3.9756029563180344e-06, "loss": 0.2013, "step": 32575 }, { "epoch": 0.7059129417372652, "grad_norm": 1.9373215436935425, "learning_rate": 3.972886789133632e-06, "loss": 0.2226, "step": 32580 }, { "epoch": 0.706021277056746, "grad_norm": 0.9151611328125, "learning_rate": 3.970171320100012e-06, "loss": 0.1682, "step": 32585 }, { "epoch": 0.7061296123762268, "grad_norm": 1.9465593099594116, "learning_rate": 3.967456549531714e-06, "loss": 0.207, "step": 32590 }, { "epoch": 0.7062379476957078, "grad_norm": 1.73361074924469, "learning_rate": 3.964742477743207e-06, "loss": 0.1741, "step": 32595 }, { "epoch": 0.7063462830151886, "grad_norm": 2.110515832901001, "learning_rate": 3.962029105048881e-06, "loss": 0.1867, "step": 32600 }, { "epoch": 0.7064546183346695, "grad_norm": 1.285367727279663, "learning_rate": 3.95931643176303e-06, "loss": 0.2805, "step": 32605 }, { "epoch": 0.7065629536541503, "grad_norm": 1.343434453010559, "learning_rate": 3.956604458199884e-06, "loss": 0.17, "step": 32610 }, { "epoch": 0.7066712889736312, "grad_norm": 1.829085350036621, "learning_rate": 3.953893184673579e-06, "loss": 0.2754, "step": 32615 }, { "epoch": 0.706779624293112, "grad_norm": 1.8072922229766846, "learning_rate": 3.95118261149818e-06, "loss": 0.2658, "step": 32620 }, { "epoch": 0.7068879596125929, "grad_norm": 1.210477590560913, "learning_rate": 3.948472738987661e-06, "loss": 0.126, "step": 32625 }, { "epoch": 0.7069962949320737, "grad_norm": 2.3871777057647705, "learning_rate": 3.9457635674559266e-06, "loss": 0.2365, "step": 32630 }, { "epoch": 0.7071046302515546, "grad_norm": 1.400229573249817, "learning_rate": 3.943055097216788e-06, "loss": 0.2268, "step": 32635 }, { "epoch": 0.7072129655710355, "grad_norm": 0.6280673146247864, "learning_rate": 3.9403473285839826e-06, "loss": 0.2123, "step": 32640 }, { "epoch": 0.7073213008905164, "grad_norm": 1.326465368270874, "learning_rate": 3.93764026187117e-06, "loss": 0.1439, "step": 32645 }, { "epoch": 0.7074296362099972, "grad_norm": 1.7872737646102905, "learning_rate": 3.9349338973919135e-06, "loss": 0.3319, "step": 32650 }, { "epoch": 0.7075379715294781, "grad_norm": 2.296229362487793, "learning_rate": 3.9322282354597155e-06, "loss": 0.2969, "step": 32655 }, { "epoch": 0.7076463068489589, "grad_norm": 1.2124724388122559, "learning_rate": 3.929523276387976e-06, "loss": 0.2493, "step": 32660 }, { "epoch": 0.7077546421684398, "grad_norm": 1.4569852352142334, "learning_rate": 3.926819020490035e-06, "loss": 0.2145, "step": 32665 }, { "epoch": 0.7078629774879206, "grad_norm": 1.0132942199707031, "learning_rate": 3.924115468079129e-06, "loss": 0.2778, "step": 32670 }, { "epoch": 0.7079713128074014, "grad_norm": 1.6505491733551025, "learning_rate": 3.921412619468434e-06, "loss": 0.1635, "step": 32675 }, { "epoch": 0.7080796481268823, "grad_norm": 1.9462132453918457, "learning_rate": 3.918710474971026e-06, "loss": 0.2799, "step": 32680 }, { "epoch": 0.7081879834463631, "grad_norm": 1.2593002319335938, "learning_rate": 3.916009034899915e-06, "loss": 0.2435, "step": 32685 }, { "epoch": 0.7082963187658441, "grad_norm": 1.6624393463134766, "learning_rate": 3.913308299568015e-06, "loss": 0.2563, "step": 32690 }, { "epoch": 0.7084046540853249, "grad_norm": 0.9858646392822266, "learning_rate": 3.9106082692881705e-06, "loss": 0.1875, "step": 32695 }, { "epoch": 0.7085129894048058, "grad_norm": 1.112999439239502, "learning_rate": 3.907908944373142e-06, "loss": 0.1305, "step": 32700 }, { "epoch": 0.7086213247242866, "grad_norm": 1.5933187007904053, "learning_rate": 3.9052103251356e-06, "loss": 0.2494, "step": 32705 }, { "epoch": 0.7087296600437675, "grad_norm": 1.4504656791687012, "learning_rate": 3.902512411888145e-06, "loss": 0.2315, "step": 32710 }, { "epoch": 0.7088379953632483, "grad_norm": 1.3843859434127808, "learning_rate": 3.899815204943284e-06, "loss": 0.1759, "step": 32715 }, { "epoch": 0.7089463306827292, "grad_norm": 1.6236572265625, "learning_rate": 3.897118704613453e-06, "loss": 0.2889, "step": 32720 }, { "epoch": 0.70905466600221, "grad_norm": 1.949787974357605, "learning_rate": 3.894422911210999e-06, "loss": 0.2295, "step": 32725 }, { "epoch": 0.7091630013216909, "grad_norm": 1.370322585105896, "learning_rate": 3.891727825048186e-06, "loss": 0.2442, "step": 32730 }, { "epoch": 0.7092713366411717, "grad_norm": 1.2353841066360474, "learning_rate": 3.889033446437206e-06, "loss": 0.3108, "step": 32735 }, { "epoch": 0.7093796719606527, "grad_norm": 1.4656122922897339, "learning_rate": 3.886339775690152e-06, "loss": 0.2797, "step": 32740 }, { "epoch": 0.7094880072801335, "grad_norm": 1.8654288053512573, "learning_rate": 3.88364681311906e-06, "loss": 0.1994, "step": 32745 }, { "epoch": 0.7095963425996143, "grad_norm": 1.2574138641357422, "learning_rate": 3.880954559035858e-06, "loss": 0.1448, "step": 32750 }, { "epoch": 0.7097046779190952, "grad_norm": 1.3290377855300903, "learning_rate": 3.87826301375241e-06, "loss": 0.1543, "step": 32755 }, { "epoch": 0.709813013238576, "grad_norm": 0.9880591630935669, "learning_rate": 3.875572177580489e-06, "loss": 0.212, "step": 32760 }, { "epoch": 0.7099213485580569, "grad_norm": 1.718342900276184, "learning_rate": 3.872882050831782e-06, "loss": 0.2386, "step": 32765 }, { "epoch": 0.7100296838775377, "grad_norm": 1.7067369222640991, "learning_rate": 3.870192633817911e-06, "loss": 0.2898, "step": 32770 }, { "epoch": 0.7101380191970186, "grad_norm": 1.6783127784729004, "learning_rate": 3.867503926850395e-06, "loss": 0.3022, "step": 32775 }, { "epoch": 0.7102463545164994, "grad_norm": 1.3121761083602905, "learning_rate": 3.864815930240686e-06, "loss": 0.2154, "step": 32780 }, { "epoch": 0.7103546898359804, "grad_norm": 1.0875020027160645, "learning_rate": 3.86212864430014e-06, "loss": 0.2162, "step": 32785 }, { "epoch": 0.7104630251554612, "grad_norm": 1.295305848121643, "learning_rate": 3.859442069340054e-06, "loss": 0.1867, "step": 32790 }, { "epoch": 0.7105713604749421, "grad_norm": 1.6062852144241333, "learning_rate": 3.856756205671617e-06, "loss": 0.1802, "step": 32795 }, { "epoch": 0.7106796957944229, "grad_norm": 1.679443359375, "learning_rate": 3.8540710536059445e-06, "loss": 0.2133, "step": 32800 }, { "epoch": 0.7107880311139038, "grad_norm": 1.4216535091400146, "learning_rate": 3.851386613454079e-06, "loss": 0.2055, "step": 32805 }, { "epoch": 0.7108963664333846, "grad_norm": 1.3090118169784546, "learning_rate": 3.848702885526964e-06, "loss": 0.2822, "step": 32810 }, { "epoch": 0.7110047017528655, "grad_norm": 1.661919355392456, "learning_rate": 3.8460198701354765e-06, "loss": 0.3073, "step": 32815 }, { "epoch": 0.7111130370723463, "grad_norm": 1.5184849500656128, "learning_rate": 3.843337567590397e-06, "loss": 0.2091, "step": 32820 }, { "epoch": 0.7112213723918271, "grad_norm": 1.0026801824569702, "learning_rate": 3.840655978202436e-06, "loss": 0.199, "step": 32825 }, { "epoch": 0.711329707711308, "grad_norm": 1.3408933877944946, "learning_rate": 3.837975102282211e-06, "loss": 0.2418, "step": 32830 }, { "epoch": 0.711438043030789, "grad_norm": 1.6063203811645508, "learning_rate": 3.835294940140263e-06, "loss": 0.2418, "step": 32835 }, { "epoch": 0.7115463783502698, "grad_norm": 0.9630728363990784, "learning_rate": 3.832615492087053e-06, "loss": 0.2164, "step": 32840 }, { "epoch": 0.7116547136697506, "grad_norm": 1.2655014991760254, "learning_rate": 3.829936758432946e-06, "loss": 0.2188, "step": 32845 }, { "epoch": 0.7117630489892315, "grad_norm": 1.4899444580078125, "learning_rate": 3.827258739488242e-06, "loss": 0.1792, "step": 32850 }, { "epoch": 0.7118713843087123, "grad_norm": 1.0572502613067627, "learning_rate": 3.824581435563142e-06, "loss": 0.1963, "step": 32855 }, { "epoch": 0.7119797196281932, "grad_norm": 1.3922137022018433, "learning_rate": 3.821904846967778e-06, "loss": 0.1793, "step": 32860 }, { "epoch": 0.712088054947674, "grad_norm": 1.9695240259170532, "learning_rate": 3.819228974012187e-06, "loss": 0.2452, "step": 32865 }, { "epoch": 0.7121963902671549, "grad_norm": 0.8573660850524902, "learning_rate": 3.816553817006335e-06, "loss": 0.2708, "step": 32870 }, { "epoch": 0.7123047255866357, "grad_norm": 1.5343166589736938, "learning_rate": 3.81387937626009e-06, "loss": 0.2029, "step": 32875 }, { "epoch": 0.7124130609061166, "grad_norm": 1.138569951057434, "learning_rate": 3.8112056520832563e-06, "loss": 0.3131, "step": 32880 }, { "epoch": 0.7125213962255975, "grad_norm": 2.0841493606567383, "learning_rate": 3.8085326447855353e-06, "loss": 0.2546, "step": 32885 }, { "epoch": 0.7126297315450784, "grad_norm": 1.6852097511291504, "learning_rate": 3.8058603546765593e-06, "loss": 0.1454, "step": 32890 }, { "epoch": 0.7127380668645592, "grad_norm": 1.9054628610610962, "learning_rate": 3.8031887820658776e-06, "loss": 0.2589, "step": 32895 }, { "epoch": 0.71284640218404, "grad_norm": 1.8592365980148315, "learning_rate": 3.800517927262942e-06, "loss": 0.2401, "step": 32900 }, { "epoch": 0.7129547375035209, "grad_norm": 1.3314216136932373, "learning_rate": 3.797847790577142e-06, "loss": 0.2383, "step": 32905 }, { "epoch": 0.7130630728230017, "grad_norm": 1.686238169670105, "learning_rate": 3.7951783723177614e-06, "loss": 0.2547, "step": 32910 }, { "epoch": 0.7131714081424826, "grad_norm": 1.8046084642410278, "learning_rate": 3.7925096727940236e-06, "loss": 0.2224, "step": 32915 }, { "epoch": 0.7132797434619634, "grad_norm": 1.4659614562988281, "learning_rate": 3.7898416923150473e-06, "loss": 0.2003, "step": 32920 }, { "epoch": 0.7133880787814443, "grad_norm": 1.7653000354766846, "learning_rate": 3.7871744311898875e-06, "loss": 0.3447, "step": 32925 }, { "epoch": 0.7134964141009252, "grad_norm": 1.6191095113754272, "learning_rate": 3.7845078897274968e-06, "loss": 0.2149, "step": 32930 }, { "epoch": 0.7136047494204061, "grad_norm": 1.9424875974655151, "learning_rate": 3.7818420682367598e-06, "loss": 0.236, "step": 32935 }, { "epoch": 0.7137130847398869, "grad_norm": 1.6161638498306274, "learning_rate": 3.7791769670264746e-06, "loss": 0.209, "step": 32940 }, { "epoch": 0.7138214200593678, "grad_norm": 1.737412452697754, "learning_rate": 3.7765125864053454e-06, "loss": 0.2913, "step": 32945 }, { "epoch": 0.7139297553788486, "grad_norm": 1.573883295059204, "learning_rate": 3.77384892668201e-06, "loss": 0.2869, "step": 32950 }, { "epoch": 0.7140380906983295, "grad_norm": 1.3131483793258667, "learning_rate": 3.771185988165005e-06, "loss": 0.274, "step": 32955 }, { "epoch": 0.7141464260178103, "grad_norm": 1.5509929656982422, "learning_rate": 3.768523771162799e-06, "loss": 0.2152, "step": 32960 }, { "epoch": 0.7142547613372912, "grad_norm": 1.6491219997406006, "learning_rate": 3.7658622759837626e-06, "loss": 0.2875, "step": 32965 }, { "epoch": 0.714363096656772, "grad_norm": 1.6016161441802979, "learning_rate": 3.763201502936198e-06, "loss": 0.2669, "step": 32970 }, { "epoch": 0.7144714319762528, "grad_norm": 0.6888905167579651, "learning_rate": 3.7605414523283124e-06, "loss": 0.2502, "step": 32975 }, { "epoch": 0.7145797672957338, "grad_norm": 1.4446170330047607, "learning_rate": 3.757882124468225e-06, "loss": 0.2643, "step": 32980 }, { "epoch": 0.7146881026152146, "grad_norm": 1.3447110652923584, "learning_rate": 3.755223519663994e-06, "loss": 0.2214, "step": 32985 }, { "epoch": 0.7147964379346955, "grad_norm": 1.5436547994613647, "learning_rate": 3.7525656382235675e-06, "loss": 0.3132, "step": 32990 }, { "epoch": 0.7149047732541763, "grad_norm": 1.6348052024841309, "learning_rate": 3.7499084804548293e-06, "loss": 0.2219, "step": 32995 }, { "epoch": 0.7150131085736572, "grad_norm": 1.3072949647903442, "learning_rate": 3.7472520466655625e-06, "loss": 0.3098, "step": 33000 }, { "epoch": 0.715121443893138, "grad_norm": 0.9536283612251282, "learning_rate": 3.7445963371634853e-06, "loss": 0.2416, "step": 33005 }, { "epoch": 0.7152297792126189, "grad_norm": 1.3256930112838745, "learning_rate": 3.7419413522562154e-06, "loss": 0.2018, "step": 33010 }, { "epoch": 0.7153381145320997, "grad_norm": 1.360015630722046, "learning_rate": 3.7392870922512902e-06, "loss": 0.2636, "step": 33015 }, { "epoch": 0.7154464498515806, "grad_norm": 1.4564825296401978, "learning_rate": 3.7366335574561743e-06, "loss": 0.2068, "step": 33020 }, { "epoch": 0.7155547851710615, "grad_norm": 1.7972829341888428, "learning_rate": 3.7339807481782286e-06, "loss": 0.182, "step": 33025 }, { "epoch": 0.7156631204905424, "grad_norm": 1.1066845655441284, "learning_rate": 3.731328664724755e-06, "loss": 0.2681, "step": 33030 }, { "epoch": 0.7157714558100232, "grad_norm": 2.1634700298309326, "learning_rate": 3.728677307402947e-06, "loss": 0.2712, "step": 33035 }, { "epoch": 0.7158797911295041, "grad_norm": 1.582298994064331, "learning_rate": 3.7260266765199327e-06, "loss": 0.1953, "step": 33040 }, { "epoch": 0.7159881264489849, "grad_norm": 1.737075924873352, "learning_rate": 3.723376772382743e-06, "loss": 0.22, "step": 33045 }, { "epoch": 0.7160964617684658, "grad_norm": 2.1857340335845947, "learning_rate": 3.720727595298328e-06, "loss": 0.2873, "step": 33050 }, { "epoch": 0.7162047970879466, "grad_norm": 1.4632419347763062, "learning_rate": 3.71807914557356e-06, "loss": 0.2036, "step": 33055 }, { "epoch": 0.7163131324074274, "grad_norm": 1.6415979862213135, "learning_rate": 3.715431423515217e-06, "loss": 0.2188, "step": 33060 }, { "epoch": 0.7164214677269083, "grad_norm": 1.3872548341751099, "learning_rate": 3.7127844294300043e-06, "loss": 0.2463, "step": 33065 }, { "epoch": 0.7165298030463891, "grad_norm": 1.7177218198776245, "learning_rate": 3.7101381636245283e-06, "loss": 0.242, "step": 33070 }, { "epoch": 0.7166381383658701, "grad_norm": 1.2973510026931763, "learning_rate": 3.707492626405326e-06, "loss": 0.1436, "step": 33075 }, { "epoch": 0.7167464736853509, "grad_norm": 0.9173075556755066, "learning_rate": 3.704847818078843e-06, "loss": 0.2475, "step": 33080 }, { "epoch": 0.7168548090048318, "grad_norm": 1.2277723550796509, "learning_rate": 3.7022037389514354e-06, "loss": 0.3267, "step": 33085 }, { "epoch": 0.7169631443243126, "grad_norm": 1.4273842573165894, "learning_rate": 3.6995603893293873e-06, "loss": 0.2887, "step": 33090 }, { "epoch": 0.7170714796437935, "grad_norm": 2.739745616912842, "learning_rate": 3.6969177695188853e-06, "loss": 0.2542, "step": 33095 }, { "epoch": 0.7171798149632743, "grad_norm": 1.7834949493408203, "learning_rate": 3.6942758798260416e-06, "loss": 0.238, "step": 33100 }, { "epoch": 0.7172881502827552, "grad_norm": 1.2212451696395874, "learning_rate": 3.6916347205568747e-06, "loss": 0.1656, "step": 33105 }, { "epoch": 0.717396485602236, "grad_norm": 1.6442527770996094, "learning_rate": 3.6889942920173292e-06, "loss": 0.2322, "step": 33110 }, { "epoch": 0.7175048209217169, "grad_norm": 1.0742897987365723, "learning_rate": 3.6863545945132526e-06, "loss": 0.1954, "step": 33115 }, { "epoch": 0.7176131562411977, "grad_norm": 0.8482387661933899, "learning_rate": 3.6837156283504217e-06, "loss": 0.2071, "step": 33120 }, { "epoch": 0.7177214915606787, "grad_norm": 1.138070821762085, "learning_rate": 3.6810773938345136e-06, "loss": 0.2521, "step": 33125 }, { "epoch": 0.7178298268801595, "grad_norm": 0.7745825052261353, "learning_rate": 3.678439891271133e-06, "loss": 0.1696, "step": 33130 }, { "epoch": 0.7179381621996404, "grad_norm": 1.2558728456497192, "learning_rate": 3.675803120965796e-06, "loss": 0.3072, "step": 33135 }, { "epoch": 0.7180464975191212, "grad_norm": 1.5176708698272705, "learning_rate": 3.6731670832239275e-06, "loss": 0.2677, "step": 33140 }, { "epoch": 0.718154832838602, "grad_norm": 2.6630501747131348, "learning_rate": 3.670531778350881e-06, "loss": 0.286, "step": 33145 }, { "epoch": 0.7182631681580829, "grad_norm": 1.2490957975387573, "learning_rate": 3.667897206651908e-06, "loss": 0.1918, "step": 33150 }, { "epoch": 0.7183715034775637, "grad_norm": 1.7471153736114502, "learning_rate": 3.6652633684321926e-06, "loss": 0.2315, "step": 33155 }, { "epoch": 0.7184798387970446, "grad_norm": 1.0597492456436157, "learning_rate": 3.6626302639968194e-06, "loss": 0.2645, "step": 33160 }, { "epoch": 0.7185881741165254, "grad_norm": 1.7654389142990112, "learning_rate": 3.6599978936507987e-06, "loss": 0.3331, "step": 33165 }, { "epoch": 0.7186965094360064, "grad_norm": 2.0070502758026123, "learning_rate": 3.6573662576990442e-06, "loss": 0.1613, "step": 33170 }, { "epoch": 0.7188048447554872, "grad_norm": 1.3902455568313599, "learning_rate": 3.6547353564463972e-06, "loss": 0.1558, "step": 33175 }, { "epoch": 0.7189131800749681, "grad_norm": 1.3089884519577026, "learning_rate": 3.6521051901976112e-06, "loss": 0.2005, "step": 33180 }, { "epoch": 0.7190215153944489, "grad_norm": 1.2541801929473877, "learning_rate": 3.649475759257343e-06, "loss": 0.1641, "step": 33185 }, { "epoch": 0.7191298507139298, "grad_norm": 1.9935312271118164, "learning_rate": 3.646847063930181e-06, "loss": 0.2369, "step": 33190 }, { "epoch": 0.7192381860334106, "grad_norm": 1.5234309434890747, "learning_rate": 3.644219104520613e-06, "loss": 0.3057, "step": 33195 }, { "epoch": 0.7193465213528915, "grad_norm": 1.2354400157928467, "learning_rate": 3.6415918813330564e-06, "loss": 0.1982, "step": 33200 }, { "epoch": 0.7194548566723723, "grad_norm": 1.3134759664535522, "learning_rate": 3.638965394671826e-06, "loss": 0.2597, "step": 33205 }, { "epoch": 0.7195631919918531, "grad_norm": 2.162731647491455, "learning_rate": 3.6363396448411715e-06, "loss": 0.2397, "step": 33210 }, { "epoch": 0.719671527311334, "grad_norm": 1.459768295288086, "learning_rate": 3.633714632145241e-06, "loss": 0.2537, "step": 33215 }, { "epoch": 0.719779862630815, "grad_norm": 2.3411104679107666, "learning_rate": 3.6310903568880963e-06, "loss": 0.2709, "step": 33220 }, { "epoch": 0.7198881979502958, "grad_norm": 1.8857847452163696, "learning_rate": 3.6284668193737347e-06, "loss": 0.2135, "step": 33225 }, { "epoch": 0.7199965332697766, "grad_norm": 2.1785833835601807, "learning_rate": 3.625844019906043e-06, "loss": 0.284, "step": 33230 }, { "epoch": 0.7201048685892575, "grad_norm": 1.3487842082977295, "learning_rate": 3.6232219587888406e-06, "loss": 0.2013, "step": 33235 }, { "epoch": 0.7202132039087383, "grad_norm": 1.6820275783538818, "learning_rate": 3.6206006363258463e-06, "loss": 0.1784, "step": 33240 }, { "epoch": 0.7203215392282192, "grad_norm": 1.1948342323303223, "learning_rate": 3.6179800528207076e-06, "loss": 0.284, "step": 33245 }, { "epoch": 0.7204298745477, "grad_norm": 2.0834267139434814, "learning_rate": 3.615360208576978e-06, "loss": 0.2012, "step": 33250 }, { "epoch": 0.7205382098671809, "grad_norm": 1.8114930391311646, "learning_rate": 3.612741103898123e-06, "loss": 0.2092, "step": 33255 }, { "epoch": 0.7206465451866617, "grad_norm": 1.2832669019699097, "learning_rate": 3.6101227390875328e-06, "loss": 0.1644, "step": 33260 }, { "epoch": 0.7207548805061426, "grad_norm": 1.3964625597000122, "learning_rate": 3.6075051144484996e-06, "loss": 0.3218, "step": 33265 }, { "epoch": 0.7208632158256235, "grad_norm": 1.820168375968933, "learning_rate": 3.6048882302842404e-06, "loss": 0.1974, "step": 33270 }, { "epoch": 0.7209715511451044, "grad_norm": 2.2691516876220703, "learning_rate": 3.60227208689788e-06, "loss": 0.2431, "step": 33275 }, { "epoch": 0.7210798864645852, "grad_norm": 1.600515365600586, "learning_rate": 3.599656684592463e-06, "loss": 0.2147, "step": 33280 }, { "epoch": 0.721188221784066, "grad_norm": 1.3005732297897339, "learning_rate": 3.5970420236709434e-06, "loss": 0.2107, "step": 33285 }, { "epoch": 0.7212965571035469, "grad_norm": 1.273777723312378, "learning_rate": 3.5944281044361853e-06, "loss": 0.2847, "step": 33290 }, { "epoch": 0.7214048924230277, "grad_norm": 1.3298070430755615, "learning_rate": 3.5918149271909785e-06, "loss": 0.2341, "step": 33295 }, { "epoch": 0.7215132277425086, "grad_norm": 0.8789453506469727, "learning_rate": 3.5892024922380154e-06, "loss": 0.2301, "step": 33300 }, { "epoch": 0.7216215630619894, "grad_norm": 1.7380772829055786, "learning_rate": 3.586590799879912e-06, "loss": 0.2542, "step": 33305 }, { "epoch": 0.7217298983814703, "grad_norm": 1.257049560546875, "learning_rate": 3.5839798504191893e-06, "loss": 0.2306, "step": 33310 }, { "epoch": 0.7218382337009512, "grad_norm": 1.277728796005249, "learning_rate": 3.581369644158289e-06, "loss": 0.1757, "step": 33315 }, { "epoch": 0.7219465690204321, "grad_norm": 1.4309967756271362, "learning_rate": 3.5787601813995664e-06, "loss": 0.2376, "step": 33320 }, { "epoch": 0.7220549043399129, "grad_norm": 1.806978702545166, "learning_rate": 3.576151462445284e-06, "loss": 0.227, "step": 33325 }, { "epoch": 0.7221632396593938, "grad_norm": 1.3769314289093018, "learning_rate": 3.5735434875976292e-06, "loss": 0.2315, "step": 33330 }, { "epoch": 0.7222715749788746, "grad_norm": 1.221388816833496, "learning_rate": 3.570936257158689e-06, "loss": 0.1945, "step": 33335 }, { "epoch": 0.7223799102983555, "grad_norm": 2.394376277923584, "learning_rate": 3.568329771430481e-06, "loss": 0.2308, "step": 33340 }, { "epoch": 0.7224882456178363, "grad_norm": 1.194717526435852, "learning_rate": 3.5657240307149176e-06, "loss": 0.208, "step": 33345 }, { "epoch": 0.7225965809373172, "grad_norm": 1.3831346035003662, "learning_rate": 3.5631190353138434e-06, "loss": 0.1807, "step": 33350 }, { "epoch": 0.722704916256798, "grad_norm": 1.7643544673919678, "learning_rate": 3.560514785529001e-06, "loss": 0.224, "step": 33355 }, { "epoch": 0.7228132515762788, "grad_norm": 1.126312255859375, "learning_rate": 3.557911281662061e-06, "loss": 0.2486, "step": 33360 }, { "epoch": 0.7229215868957598, "grad_norm": 1.283664584159851, "learning_rate": 3.5553085240145936e-06, "loss": 0.2042, "step": 33365 }, { "epoch": 0.7230299222152407, "grad_norm": 0.889875590801239, "learning_rate": 3.552706512888091e-06, "loss": 0.1979, "step": 33370 }, { "epoch": 0.7231382575347215, "grad_norm": 1.6510008573532104, "learning_rate": 3.550105248583963e-06, "loss": 0.1745, "step": 33375 }, { "epoch": 0.7232465928542023, "grad_norm": 1.2973439693450928, "learning_rate": 3.5475047314035183e-06, "loss": 0.1932, "step": 33380 }, { "epoch": 0.7233549281736832, "grad_norm": 1.0335240364074707, "learning_rate": 3.5449049616479957e-06, "loss": 0.3034, "step": 33385 }, { "epoch": 0.723463263493164, "grad_norm": 1.1697602272033691, "learning_rate": 3.542305939618533e-06, "loss": 0.2308, "step": 33390 }, { "epoch": 0.7235715988126449, "grad_norm": 1.719089388847351, "learning_rate": 3.5397076656161944e-06, "loss": 0.2764, "step": 33395 }, { "epoch": 0.7236799341321257, "grad_norm": 1.7562345266342163, "learning_rate": 3.537110139941944e-06, "loss": 0.2401, "step": 33400 }, { "epoch": 0.7237882694516066, "grad_norm": 1.0470775365829468, "learning_rate": 3.5345133628966722e-06, "loss": 0.2315, "step": 33405 }, { "epoch": 0.7238966047710874, "grad_norm": 1.157529592514038, "learning_rate": 3.531917334781172e-06, "loss": 0.2772, "step": 33410 }, { "epoch": 0.7240049400905684, "grad_norm": 0.846537172794342, "learning_rate": 3.529322055896156e-06, "loss": 0.2153, "step": 33415 }, { "epoch": 0.7241132754100492, "grad_norm": 1.738317847251892, "learning_rate": 3.526727526542253e-06, "loss": 0.2854, "step": 33420 }, { "epoch": 0.7242216107295301, "grad_norm": 2.1784286499023438, "learning_rate": 3.5241337470199933e-06, "loss": 0.3498, "step": 33425 }, { "epoch": 0.7243299460490109, "grad_norm": 1.7350928783416748, "learning_rate": 3.5215407176298332e-06, "loss": 0.1743, "step": 33430 }, { "epoch": 0.7244382813684918, "grad_norm": 1.5095876455307007, "learning_rate": 3.518948438672131e-06, "loss": 0.2063, "step": 33435 }, { "epoch": 0.7245466166879726, "grad_norm": 2.022624969482422, "learning_rate": 3.5163569104471695e-06, "loss": 0.2254, "step": 33440 }, { "epoch": 0.7246549520074534, "grad_norm": 0.9727820754051208, "learning_rate": 3.513766133255131e-06, "loss": 0.1891, "step": 33445 }, { "epoch": 0.7247632873269343, "grad_norm": 1.2400418519973755, "learning_rate": 3.511176107396125e-06, "loss": 0.2326, "step": 33450 }, { "epoch": 0.7248716226464151, "grad_norm": 2.1846797466278076, "learning_rate": 3.5085868331701657e-06, "loss": 0.2481, "step": 33455 }, { "epoch": 0.7249799579658961, "grad_norm": 1.8904608488082886, "learning_rate": 3.505998310877172e-06, "loss": 0.2503, "step": 33460 }, { "epoch": 0.7250882932853769, "grad_norm": 1.9309903383255005, "learning_rate": 3.5034105408170026e-06, "loss": 0.2746, "step": 33465 }, { "epoch": 0.7251966286048578, "grad_norm": 1.7245689630508423, "learning_rate": 3.5008235232893984e-06, "loss": 0.232, "step": 33470 }, { "epoch": 0.7253049639243386, "grad_norm": 1.0931050777435303, "learning_rate": 3.498237258594035e-06, "loss": 0.1872, "step": 33475 }, { "epoch": 0.7254132992438195, "grad_norm": 2.307882785797119, "learning_rate": 3.4956517470304862e-06, "loss": 0.2679, "step": 33480 }, { "epoch": 0.7255216345633003, "grad_norm": 2.2433247566223145, "learning_rate": 3.4930669888982494e-06, "loss": 0.2764, "step": 33485 }, { "epoch": 0.7256299698827812, "grad_norm": 1.006781816482544, "learning_rate": 3.4904829844967293e-06, "loss": 0.1964, "step": 33490 }, { "epoch": 0.725738305202262, "grad_norm": 1.9054248332977295, "learning_rate": 3.4878997341252387e-06, "loss": 0.2038, "step": 33495 }, { "epoch": 0.7258466405217429, "grad_norm": 2.2389516830444336, "learning_rate": 3.485317238083017e-06, "loss": 0.2724, "step": 33500 }, { "epoch": 0.7259549758412237, "grad_norm": 1.202925205230713, "learning_rate": 3.4827354966691985e-06, "loss": 0.1956, "step": 33505 }, { "epoch": 0.7260633111607047, "grad_norm": 1.465765357017517, "learning_rate": 3.480154510182845e-06, "loss": 0.1466, "step": 33510 }, { "epoch": 0.7261716464801855, "grad_norm": 1.2451850175857544, "learning_rate": 3.4775742789229237e-06, "loss": 0.2562, "step": 33515 }, { "epoch": 0.7262799817996664, "grad_norm": 2.2591440677642822, "learning_rate": 3.474994803188321e-06, "loss": 0.2051, "step": 33520 }, { "epoch": 0.7263883171191472, "grad_norm": 1.5487760305404663, "learning_rate": 3.4724160832778243e-06, "loss": 0.2673, "step": 33525 }, { "epoch": 0.726496652438628, "grad_norm": 1.7202991247177124, "learning_rate": 3.4698381194901374e-06, "loss": 0.1708, "step": 33530 }, { "epoch": 0.7266049877581089, "grad_norm": 0.7635290026664734, "learning_rate": 3.467260912123888e-06, "loss": 0.2278, "step": 33535 }, { "epoch": 0.7267133230775897, "grad_norm": 1.4280554056167603, "learning_rate": 3.4646844614775965e-06, "loss": 0.2261, "step": 33540 }, { "epoch": 0.7268216583970706, "grad_norm": 0.987735390663147, "learning_rate": 3.4621087678497147e-06, "loss": 0.2825, "step": 33545 }, { "epoch": 0.7269299937165514, "grad_norm": 1.8146653175354004, "learning_rate": 3.459533831538592e-06, "loss": 0.2603, "step": 33550 }, { "epoch": 0.7270383290360324, "grad_norm": 1.19858717918396, "learning_rate": 3.4569596528424974e-06, "loss": 0.1387, "step": 33555 }, { "epoch": 0.7271466643555132, "grad_norm": 1.2819981575012207, "learning_rate": 3.4543862320596167e-06, "loss": 0.2081, "step": 33560 }, { "epoch": 0.7272549996749941, "grad_norm": 1.036643147468567, "learning_rate": 3.451813569488034e-06, "loss": 0.1999, "step": 33565 }, { "epoch": 0.7273633349944749, "grad_norm": 0.3161188066005707, "learning_rate": 3.449241665425761e-06, "loss": 0.2121, "step": 33570 }, { "epoch": 0.7274716703139558, "grad_norm": 1.861964464187622, "learning_rate": 3.4466705201707074e-06, "loss": 0.2258, "step": 33575 }, { "epoch": 0.7275800056334366, "grad_norm": 1.312355399131775, "learning_rate": 3.444100134020708e-06, "loss": 0.2604, "step": 33580 }, { "epoch": 0.7276883409529175, "grad_norm": 1.3286391496658325, "learning_rate": 3.4415305072734974e-06, "loss": 0.1463, "step": 33585 }, { "epoch": 0.7277966762723983, "grad_norm": 1.2034509181976318, "learning_rate": 3.4389616402267357e-06, "loss": 0.2257, "step": 33590 }, { "epoch": 0.7279050115918791, "grad_norm": 1.1655396223068237, "learning_rate": 3.43639353317798e-06, "loss": 0.2197, "step": 33595 }, { "epoch": 0.72801334691136, "grad_norm": 1.2485419511795044, "learning_rate": 3.4338261864247137e-06, "loss": 0.2651, "step": 33600 }, { "epoch": 0.728121682230841, "grad_norm": 2.5248632431030273, "learning_rate": 3.4312596002643184e-06, "loss": 0.201, "step": 33605 }, { "epoch": 0.7282300175503218, "grad_norm": 1.1552799940109253, "learning_rate": 3.4286937749940994e-06, "loss": 0.2128, "step": 33610 }, { "epoch": 0.7283383528698026, "grad_norm": 1.4997243881225586, "learning_rate": 3.426128710911273e-06, "loss": 0.153, "step": 33615 }, { "epoch": 0.7284466881892835, "grad_norm": 1.0205893516540527, "learning_rate": 3.423564408312954e-06, "loss": 0.2734, "step": 33620 }, { "epoch": 0.7285550235087643, "grad_norm": 1.7145835161209106, "learning_rate": 3.421000867496187e-06, "loss": 0.2695, "step": 33625 }, { "epoch": 0.7286633588282452, "grad_norm": 2.6737375259399414, "learning_rate": 3.4184380887579126e-06, "loss": 0.4185, "step": 33630 }, { "epoch": 0.728771694147726, "grad_norm": 1.3811501264572144, "learning_rate": 3.4158760723949967e-06, "loss": 0.2018, "step": 33635 }, { "epoch": 0.7288800294672069, "grad_norm": 1.6006879806518555, "learning_rate": 3.413314818704205e-06, "loss": 0.2431, "step": 33640 }, { "epoch": 0.7289883647866877, "grad_norm": 1.2395859956741333, "learning_rate": 3.4107543279822262e-06, "loss": 0.2144, "step": 33645 }, { "epoch": 0.7290967001061686, "grad_norm": 1.5555381774902344, "learning_rate": 3.4081946005256493e-06, "loss": 0.1816, "step": 33650 }, { "epoch": 0.7292050354256495, "grad_norm": 0.42684388160705566, "learning_rate": 3.4056356366309817e-06, "loss": 0.2441, "step": 33655 }, { "epoch": 0.7293133707451304, "grad_norm": 1.5264592170715332, "learning_rate": 3.403077436594645e-06, "loss": 0.23, "step": 33660 }, { "epoch": 0.7294217060646112, "grad_norm": 1.8889498710632324, "learning_rate": 3.4005200007129625e-06, "loss": 0.1982, "step": 33665 }, { "epoch": 0.729530041384092, "grad_norm": 1.5524019002914429, "learning_rate": 3.397963329282181e-06, "loss": 0.2828, "step": 33670 }, { "epoch": 0.7296383767035729, "grad_norm": 1.5242111682891846, "learning_rate": 3.395407422598446e-06, "loss": 0.1965, "step": 33675 }, { "epoch": 0.7297467120230537, "grad_norm": 1.0040583610534668, "learning_rate": 3.392852280957828e-06, "loss": 0.2278, "step": 33680 }, { "epoch": 0.7298550473425346, "grad_norm": 1.6098227500915527, "learning_rate": 3.3902979046562947e-06, "loss": 0.2041, "step": 33685 }, { "epoch": 0.7299633826620154, "grad_norm": 1.1836251020431519, "learning_rate": 3.3877442939897388e-06, "loss": 0.2071, "step": 33690 }, { "epoch": 0.7300717179814963, "grad_norm": 1.8729450702667236, "learning_rate": 3.385191449253955e-06, "loss": 0.1861, "step": 33695 }, { "epoch": 0.7301800533009772, "grad_norm": 1.0324450731277466, "learning_rate": 3.3826393707446448e-06, "loss": 0.1691, "step": 33700 }, { "epoch": 0.7302883886204581, "grad_norm": 1.2306379079818726, "learning_rate": 3.3800880587574424e-06, "loss": 0.1995, "step": 33705 }, { "epoch": 0.7303967239399389, "grad_norm": 1.3671883344650269, "learning_rate": 3.3775375135878695e-06, "loss": 0.2176, "step": 33710 }, { "epoch": 0.7305050592594198, "grad_norm": 1.6114914417266846, "learning_rate": 3.3749877355313742e-06, "loss": 0.26, "step": 33715 }, { "epoch": 0.7306133945789006, "grad_norm": 1.001937747001648, "learning_rate": 3.3724387248833033e-06, "loss": 0.1938, "step": 33720 }, { "epoch": 0.7307217298983815, "grad_norm": 1.1432549953460693, "learning_rate": 3.369890481938929e-06, "loss": 0.2292, "step": 33725 }, { "epoch": 0.7308300652178623, "grad_norm": 1.392704725265503, "learning_rate": 3.3673430069934234e-06, "loss": 0.2124, "step": 33730 }, { "epoch": 0.7309384005373432, "grad_norm": 1.2967562675476074, "learning_rate": 3.3647963003418694e-06, "loss": 0.2017, "step": 33735 }, { "epoch": 0.731046735856824, "grad_norm": 1.1039488315582275, "learning_rate": 3.362250362279271e-06, "loss": 0.157, "step": 33740 }, { "epoch": 0.7311550711763048, "grad_norm": 1.491495966911316, "learning_rate": 3.359705193100533e-06, "loss": 0.2215, "step": 33745 }, { "epoch": 0.7312634064957858, "grad_norm": 1.7305197715759277, "learning_rate": 3.357160793100477e-06, "loss": 0.1979, "step": 33750 }, { "epoch": 0.7313717418152667, "grad_norm": 1.3177236318588257, "learning_rate": 3.354617162573832e-06, "loss": 0.1535, "step": 33755 }, { "epoch": 0.7314800771347475, "grad_norm": 0.6726060509681702, "learning_rate": 3.352074301815246e-06, "loss": 0.2322, "step": 33760 }, { "epoch": 0.7315884124542283, "grad_norm": 0.6826272010803223, "learning_rate": 3.3495322111192643e-06, "loss": 0.197, "step": 33765 }, { "epoch": 0.7316967477737092, "grad_norm": 1.9661318063735962, "learning_rate": 3.3469908907803495e-06, "loss": 0.4034, "step": 33770 }, { "epoch": 0.73180508309319, "grad_norm": 2.1918327808380127, "learning_rate": 3.3444503410928806e-06, "loss": 0.2583, "step": 33775 }, { "epoch": 0.7319134184126709, "grad_norm": 1.0468165874481201, "learning_rate": 3.341910562351137e-06, "loss": 0.2713, "step": 33780 }, { "epoch": 0.7320217537321517, "grad_norm": 1.8905527591705322, "learning_rate": 3.33937155484932e-06, "loss": 0.1806, "step": 33785 }, { "epoch": 0.7321300890516326, "grad_norm": 1.4862191677093506, "learning_rate": 3.3368333188815295e-06, "loss": 0.1887, "step": 33790 }, { "epoch": 0.7322384243711134, "grad_norm": 1.7298779487609863, "learning_rate": 3.334295854741787e-06, "loss": 0.2182, "step": 33795 }, { "epoch": 0.7323467596905944, "grad_norm": 1.495500922203064, "learning_rate": 3.3317591627240144e-06, "loss": 0.267, "step": 33800 }, { "epoch": 0.7324550950100752, "grad_norm": 2.2339494228363037, "learning_rate": 3.329223243122052e-06, "loss": 0.1918, "step": 33805 }, { "epoch": 0.7325634303295561, "grad_norm": 1.6670175790786743, "learning_rate": 3.326688096229652e-06, "loss": 0.2877, "step": 33810 }, { "epoch": 0.7326717656490369, "grad_norm": 1.0646734237670898, "learning_rate": 3.3241537223404674e-06, "loss": 0.239, "step": 33815 }, { "epoch": 0.7327801009685178, "grad_norm": 1.314488172531128, "learning_rate": 3.3216201217480725e-06, "loss": 0.2641, "step": 33820 }, { "epoch": 0.7328884362879986, "grad_norm": 1.25095796585083, "learning_rate": 3.3190872947459417e-06, "loss": 0.2364, "step": 33825 }, { "epoch": 0.7329967716074794, "grad_norm": 1.2743390798568726, "learning_rate": 3.3165552416274705e-06, "loss": 0.2266, "step": 33830 }, { "epoch": 0.7331051069269603, "grad_norm": 1.4236598014831543, "learning_rate": 3.314023962685954e-06, "loss": 0.1554, "step": 33835 }, { "epoch": 0.7332134422464411, "grad_norm": 1.1957271099090576, "learning_rate": 3.3114934582146087e-06, "loss": 0.2682, "step": 33840 }, { "epoch": 0.7333217775659221, "grad_norm": 1.962446928024292, "learning_rate": 3.308963728506548e-06, "loss": 0.2867, "step": 33845 }, { "epoch": 0.7334301128854029, "grad_norm": 1.4239975214004517, "learning_rate": 3.3064347738548088e-06, "loss": 0.3314, "step": 33850 }, { "epoch": 0.7335384482048838, "grad_norm": 1.12669038772583, "learning_rate": 3.303906594552334e-06, "loss": 0.1678, "step": 33855 }, { "epoch": 0.7336467835243646, "grad_norm": 1.4196627140045166, "learning_rate": 3.30137919089197e-06, "loss": 0.1769, "step": 33860 }, { "epoch": 0.7337551188438455, "grad_norm": 1.928697943687439, "learning_rate": 3.2988525631664846e-06, "loss": 0.2325, "step": 33865 }, { "epoch": 0.7338634541633263, "grad_norm": 1.689149260520935, "learning_rate": 3.2963267116685425e-06, "loss": 0.279, "step": 33870 }, { "epoch": 0.7339717894828072, "grad_norm": 1.8724559545516968, "learning_rate": 3.2938016366907343e-06, "loss": 0.2213, "step": 33875 }, { "epoch": 0.734080124802288, "grad_norm": 2.125601291656494, "learning_rate": 3.2912773385255436e-06, "loss": 0.1981, "step": 33880 }, { "epoch": 0.7341884601217689, "grad_norm": 1.15645170211792, "learning_rate": 3.28875381746538e-06, "loss": 0.2822, "step": 33885 }, { "epoch": 0.7342967954412497, "grad_norm": 0.9543782472610474, "learning_rate": 3.2862310738025493e-06, "loss": 0.2422, "step": 33890 }, { "epoch": 0.7344051307607307, "grad_norm": 2.284153938293457, "learning_rate": 3.283709107829276e-06, "loss": 0.3437, "step": 33895 }, { "epoch": 0.7345134660802115, "grad_norm": 1.4877843856811523, "learning_rate": 3.281187919837696e-06, "loss": 0.2634, "step": 33900 }, { "epoch": 0.7346218013996924, "grad_norm": 1.4833016395568848, "learning_rate": 3.278667510119844e-06, "loss": 0.1969, "step": 33905 }, { "epoch": 0.7347301367191732, "grad_norm": 1.4307793378829956, "learning_rate": 3.2761478789676793e-06, "loss": 0.3391, "step": 33910 }, { "epoch": 0.734838472038654, "grad_norm": 1.3995561599731445, "learning_rate": 3.2736290266730565e-06, "loss": 0.2152, "step": 33915 }, { "epoch": 0.7349468073581349, "grad_norm": 1.7217553853988647, "learning_rate": 3.271110953527752e-06, "loss": 0.2018, "step": 33920 }, { "epoch": 0.7350551426776157, "grad_norm": 1.7032990455627441, "learning_rate": 3.2685936598234426e-06, "loss": 0.2822, "step": 33925 }, { "epoch": 0.7351634779970966, "grad_norm": 1.1623059511184692, "learning_rate": 3.2660771458517238e-06, "loss": 0.1631, "step": 33930 }, { "epoch": 0.7352718133165774, "grad_norm": 1.5230520963668823, "learning_rate": 3.2635614119040936e-06, "loss": 0.2779, "step": 33935 }, { "epoch": 0.7353801486360583, "grad_norm": 1.7936830520629883, "learning_rate": 3.2610464582719594e-06, "loss": 0.2779, "step": 33940 }, { "epoch": 0.7354884839555392, "grad_norm": 2.136352062225342, "learning_rate": 3.2585322852466428e-06, "loss": 0.2117, "step": 33945 }, { "epoch": 0.7355968192750201, "grad_norm": 1.426897406578064, "learning_rate": 3.2560188931193737e-06, "loss": 0.2224, "step": 33950 }, { "epoch": 0.7357051545945009, "grad_norm": 1.9006506204605103, "learning_rate": 3.2535062821812947e-06, "loss": 0.2561, "step": 33955 }, { "epoch": 0.7358134899139818, "grad_norm": 1.691364049911499, "learning_rate": 3.250994452723447e-06, "loss": 0.2727, "step": 33960 }, { "epoch": 0.7359218252334626, "grad_norm": 1.2747983932495117, "learning_rate": 3.2484834050367953e-06, "loss": 0.2469, "step": 33965 }, { "epoch": 0.7360301605529435, "grad_norm": 0.7984288930892944, "learning_rate": 3.2459731394121997e-06, "loss": 0.2257, "step": 33970 }, { "epoch": 0.7361384958724243, "grad_norm": 2.1239547729492188, "learning_rate": 3.2434636561404442e-06, "loss": 0.2682, "step": 33975 }, { "epoch": 0.7362468311919051, "grad_norm": 1.2492411136627197, "learning_rate": 3.240954955512211e-06, "loss": 0.186, "step": 33980 }, { "epoch": 0.736355166511386, "grad_norm": 0.859689474105835, "learning_rate": 3.2384470378180933e-06, "loss": 0.1502, "step": 33985 }, { "epoch": 0.736463501830867, "grad_norm": 1.674302339553833, "learning_rate": 3.235939903348597e-06, "loss": 0.3976, "step": 33990 }, { "epoch": 0.7365718371503478, "grad_norm": 1.199167013168335, "learning_rate": 3.2334335523941384e-06, "loss": 0.3315, "step": 33995 }, { "epoch": 0.7366801724698286, "grad_norm": 1.8319441080093384, "learning_rate": 3.2309279852450416e-06, "loss": 0.2173, "step": 34000 }, { "epoch": 0.7367885077893095, "grad_norm": 2.5289533138275146, "learning_rate": 3.2284232021915353e-06, "loss": 0.2846, "step": 34005 }, { "epoch": 0.7368968431087903, "grad_norm": 1.6672576665878296, "learning_rate": 3.225919203523765e-06, "loss": 0.2147, "step": 34010 }, { "epoch": 0.7370051784282712, "grad_norm": 1.4496195316314697, "learning_rate": 3.2234159895317798e-06, "loss": 0.268, "step": 34015 }, { "epoch": 0.737113513747752, "grad_norm": 1.4451000690460205, "learning_rate": 3.2209135605055343e-06, "loss": 0.1719, "step": 34020 }, { "epoch": 0.7372218490672329, "grad_norm": 1.9367305040359497, "learning_rate": 3.218411916734907e-06, "loss": 0.2395, "step": 34025 }, { "epoch": 0.7373301843867137, "grad_norm": 1.3127707242965698, "learning_rate": 3.2159110585096666e-06, "loss": 0.2703, "step": 34030 }, { "epoch": 0.7374385197061946, "grad_norm": 1.5983095169067383, "learning_rate": 3.2134109861195086e-06, "loss": 0.2322, "step": 34035 }, { "epoch": 0.7375468550256755, "grad_norm": 1.1572049856185913, "learning_rate": 3.210911699854018e-06, "loss": 0.2497, "step": 34040 }, { "epoch": 0.7376551903451564, "grad_norm": 1.2045257091522217, "learning_rate": 3.2084132000027123e-06, "loss": 0.2074, "step": 34045 }, { "epoch": 0.7377635256646372, "grad_norm": 1.456623911857605, "learning_rate": 3.2059154868550003e-06, "loss": 0.2223, "step": 34050 }, { "epoch": 0.737871860984118, "grad_norm": 1.3746317625045776, "learning_rate": 3.2034185607002e-06, "loss": 0.2163, "step": 34055 }, { "epoch": 0.7379801963035989, "grad_norm": 2.1133193969726562, "learning_rate": 3.2009224218275504e-06, "loss": 0.275, "step": 34060 }, { "epoch": 0.7380885316230797, "grad_norm": 1.7183986902236938, "learning_rate": 3.1984270705261844e-06, "loss": 0.2995, "step": 34065 }, { "epoch": 0.7381968669425606, "grad_norm": 0.8247794508934021, "learning_rate": 3.1959325070851578e-06, "loss": 0.1609, "step": 34070 }, { "epoch": 0.7383052022620414, "grad_norm": 1.3728505373001099, "learning_rate": 3.1934387317934223e-06, "loss": 0.2662, "step": 34075 }, { "epoch": 0.7384135375815223, "grad_norm": 1.6231884956359863, "learning_rate": 3.1909457449398505e-06, "loss": 0.2407, "step": 34080 }, { "epoch": 0.7385218729010032, "grad_norm": 1.5045363903045654, "learning_rate": 3.1884535468132117e-06, "loss": 0.2583, "step": 34085 }, { "epoch": 0.7386302082204841, "grad_norm": 1.4239349365234375, "learning_rate": 3.1859621377021923e-06, "loss": 0.2437, "step": 34090 }, { "epoch": 0.7387385435399649, "grad_norm": 0.9621121883392334, "learning_rate": 3.183471517895389e-06, "loss": 0.1605, "step": 34095 }, { "epoch": 0.7388468788594458, "grad_norm": 1.696214199066162, "learning_rate": 3.1809816876812947e-06, "loss": 0.2165, "step": 34100 }, { "epoch": 0.7389552141789266, "grad_norm": 0.7051489353179932, "learning_rate": 3.1784926473483256e-06, "loss": 0.229, "step": 34105 }, { "epoch": 0.7390635494984075, "grad_norm": 2.2798192501068115, "learning_rate": 3.1760043971847954e-06, "loss": 0.263, "step": 34110 }, { "epoch": 0.7391718848178883, "grad_norm": 1.1435978412628174, "learning_rate": 3.173516937478934e-06, "loss": 0.1415, "step": 34115 }, { "epoch": 0.7392802201373692, "grad_norm": 2.1495048999786377, "learning_rate": 3.171030268518872e-06, "loss": 0.2999, "step": 34120 }, { "epoch": 0.73938855545685, "grad_norm": 2.269940137863159, "learning_rate": 3.1685443905926593e-06, "loss": 0.3445, "step": 34125 }, { "epoch": 0.7394968907763309, "grad_norm": 1.6869438886642456, "learning_rate": 3.1660593039882405e-06, "loss": 0.3033, "step": 34130 }, { "epoch": 0.7396052260958118, "grad_norm": 2.032085418701172, "learning_rate": 3.1635750089934782e-06, "loss": 0.258, "step": 34135 }, { "epoch": 0.7397135614152927, "grad_norm": 1.4580599069595337, "learning_rate": 3.1610915058961457e-06, "loss": 0.2805, "step": 34140 }, { "epoch": 0.7398218967347735, "grad_norm": 1.2711461782455444, "learning_rate": 3.1586087949839106e-06, "loss": 0.2621, "step": 34145 }, { "epoch": 0.7399302320542543, "grad_norm": 1.4233688116073608, "learning_rate": 3.1561268765443663e-06, "loss": 0.2189, "step": 34150 }, { "epoch": 0.7400385673737352, "grad_norm": 1.9625760316848755, "learning_rate": 3.1536457508649997e-06, "loss": 0.2881, "step": 34155 }, { "epoch": 0.740146902693216, "grad_norm": 1.1434417963027954, "learning_rate": 3.1511654182332175e-06, "loss": 0.1857, "step": 34160 }, { "epoch": 0.7402552380126969, "grad_norm": 1.8388760089874268, "learning_rate": 3.1486858789363228e-06, "loss": 0.1742, "step": 34165 }, { "epoch": 0.7403635733321777, "grad_norm": 1.5243829488754272, "learning_rate": 3.1462071332615396e-06, "loss": 0.191, "step": 34170 }, { "epoch": 0.7404719086516586, "grad_norm": 1.0704184770584106, "learning_rate": 3.143729181495986e-06, "loss": 0.3041, "step": 34175 }, { "epoch": 0.7405802439711394, "grad_norm": 1.268729329109192, "learning_rate": 3.141252023926704e-06, "loss": 0.2151, "step": 34180 }, { "epoch": 0.7406885792906204, "grad_norm": 1.4926542043685913, "learning_rate": 3.1387756608406274e-06, "loss": 0.2154, "step": 34185 }, { "epoch": 0.7407969146101012, "grad_norm": 1.61951744556427, "learning_rate": 3.13630009252461e-06, "loss": 0.1707, "step": 34190 }, { "epoch": 0.7409052499295821, "grad_norm": 1.5092053413391113, "learning_rate": 3.133825319265411e-06, "loss": 0.246, "step": 34195 }, { "epoch": 0.7410135852490629, "grad_norm": 0.9296833872795105, "learning_rate": 3.131351341349691e-06, "loss": 0.2041, "step": 34200 }, { "epoch": 0.7411219205685438, "grad_norm": 1.8535361289978027, "learning_rate": 3.1288781590640284e-06, "loss": 0.2797, "step": 34205 }, { "epoch": 0.7412302558880246, "grad_norm": 1.4543373584747314, "learning_rate": 3.1264057726948995e-06, "loss": 0.3644, "step": 34210 }, { "epoch": 0.7413385912075054, "grad_norm": 0.8445451855659485, "learning_rate": 3.123934182528697e-06, "loss": 0.1482, "step": 34215 }, { "epoch": 0.7414469265269863, "grad_norm": 1.7739942073822021, "learning_rate": 3.1214633888517165e-06, "loss": 0.3054, "step": 34220 }, { "epoch": 0.7415552618464671, "grad_norm": 1.954693078994751, "learning_rate": 3.118993391950159e-06, "loss": 0.1869, "step": 34225 }, { "epoch": 0.7416635971659481, "grad_norm": 2.1305830478668213, "learning_rate": 3.1165241921101395e-06, "loss": 0.3017, "step": 34230 }, { "epoch": 0.7417719324854289, "grad_norm": 1.9448899030685425, "learning_rate": 3.114055789617678e-06, "loss": 0.284, "step": 34235 }, { "epoch": 0.7418802678049098, "grad_norm": 1.1163746118545532, "learning_rate": 3.111588184758706e-06, "loss": 0.2129, "step": 34240 }, { "epoch": 0.7419886031243906, "grad_norm": 1.3825470209121704, "learning_rate": 3.10912137781905e-06, "loss": 0.1643, "step": 34245 }, { "epoch": 0.7420969384438715, "grad_norm": 0.8548109531402588, "learning_rate": 3.1066553690844602e-06, "loss": 0.2291, "step": 34250 }, { "epoch": 0.7422052737633523, "grad_norm": 1.3782081604003906, "learning_rate": 3.104190158840583e-06, "loss": 0.2371, "step": 34255 }, { "epoch": 0.7423136090828332, "grad_norm": 1.5529710054397583, "learning_rate": 3.1017257473729747e-06, "loss": 0.2761, "step": 34260 }, { "epoch": 0.742421944402314, "grad_norm": 1.6934343576431274, "learning_rate": 3.099262134967106e-06, "loss": 0.2945, "step": 34265 }, { "epoch": 0.7425302797217949, "grad_norm": 1.595000982284546, "learning_rate": 3.0967993219083413e-06, "loss": 0.2549, "step": 34270 }, { "epoch": 0.7426386150412757, "grad_norm": 1.3082314729690552, "learning_rate": 3.0943373084819694e-06, "loss": 0.2097, "step": 34275 }, { "epoch": 0.7427469503607567, "grad_norm": 1.833513617515564, "learning_rate": 3.091876094973166e-06, "loss": 0.2121, "step": 34280 }, { "epoch": 0.7428552856802375, "grad_norm": 1.3048380613327026, "learning_rate": 3.0894156816670406e-06, "loss": 0.2242, "step": 34285 }, { "epoch": 0.7429636209997184, "grad_norm": 1.770397663116455, "learning_rate": 3.086956068848588e-06, "loss": 0.3056, "step": 34290 }, { "epoch": 0.7430719563191992, "grad_norm": 0.7803713083267212, "learning_rate": 3.084497256802714e-06, "loss": 0.2006, "step": 34295 }, { "epoch": 0.74318029163868, "grad_norm": 2.1346232891082764, "learning_rate": 3.0820392458142424e-06, "loss": 0.1996, "step": 34300 }, { "epoch": 0.7432886269581609, "grad_norm": 2.777639627456665, "learning_rate": 3.0795820361678885e-06, "loss": 0.2694, "step": 34305 }, { "epoch": 0.7433969622776417, "grad_norm": 1.0034856796264648, "learning_rate": 3.077125628148292e-06, "loss": 0.1654, "step": 34310 }, { "epoch": 0.7435052975971226, "grad_norm": 1.2832645177841187, "learning_rate": 3.074670022039984e-06, "loss": 0.2124, "step": 34315 }, { "epoch": 0.7436136329166034, "grad_norm": 1.5316154956817627, "learning_rate": 3.0722152181274144e-06, "loss": 0.2229, "step": 34320 }, { "epoch": 0.7437219682360843, "grad_norm": 1.5687520503997803, "learning_rate": 3.069761216694932e-06, "loss": 0.3045, "step": 34325 }, { "epoch": 0.7438303035555652, "grad_norm": 1.1071279048919678, "learning_rate": 3.0673080180267966e-06, "loss": 0.2797, "step": 34330 }, { "epoch": 0.7439386388750461, "grad_norm": 1.4203816652297974, "learning_rate": 3.064855622407179e-06, "loss": 0.2563, "step": 34335 }, { "epoch": 0.7440469741945269, "grad_norm": 1.0363363027572632, "learning_rate": 3.0624040301201462e-06, "loss": 0.2731, "step": 34340 }, { "epoch": 0.7441553095140078, "grad_norm": 1.1813169717788696, "learning_rate": 3.0599532414496835e-06, "loss": 0.1489, "step": 34345 }, { "epoch": 0.7442636448334886, "grad_norm": 1.1460402011871338, "learning_rate": 3.0575032566796735e-06, "loss": 0.2342, "step": 34350 }, { "epoch": 0.7443719801529695, "grad_norm": 1.7356665134429932, "learning_rate": 3.055054076093916e-06, "loss": 0.2414, "step": 34355 }, { "epoch": 0.7444803154724503, "grad_norm": 1.6732736825942993, "learning_rate": 3.0526056999761058e-06, "loss": 0.2332, "step": 34360 }, { "epoch": 0.7445886507919312, "grad_norm": 1.107714295387268, "learning_rate": 3.0501581286098546e-06, "loss": 0.2053, "step": 34365 }, { "epoch": 0.744696986111412, "grad_norm": 1.3839211463928223, "learning_rate": 3.0477113622786734e-06, "loss": 0.1747, "step": 34370 }, { "epoch": 0.744805321430893, "grad_norm": 0.9880338907241821, "learning_rate": 3.0452654012659866e-06, "loss": 0.1589, "step": 34375 }, { "epoch": 0.7449136567503738, "grad_norm": 0.7865043878555298, "learning_rate": 3.0428202458551238e-06, "loss": 0.2807, "step": 34380 }, { "epoch": 0.7450219920698546, "grad_norm": 1.0362727642059326, "learning_rate": 3.040375896329313e-06, "loss": 0.1988, "step": 34385 }, { "epoch": 0.7451303273893355, "grad_norm": 2.040227174758911, "learning_rate": 3.0379323529717033e-06, "loss": 0.2377, "step": 34390 }, { "epoch": 0.7452386627088163, "grad_norm": 1.553255558013916, "learning_rate": 3.0354896160653346e-06, "loss": 0.1924, "step": 34395 }, { "epoch": 0.7453469980282972, "grad_norm": 1.4644726514816284, "learning_rate": 3.03304768589317e-06, "loss": 0.1572, "step": 34400 }, { "epoch": 0.745455333347778, "grad_norm": 1.4321569204330444, "learning_rate": 3.0306065627380623e-06, "loss": 0.2118, "step": 34405 }, { "epoch": 0.7455636686672589, "grad_norm": 1.267614483833313, "learning_rate": 3.0281662468827856e-06, "loss": 0.2596, "step": 34410 }, { "epoch": 0.7456720039867397, "grad_norm": 0.640340268611908, "learning_rate": 3.0257267386100085e-06, "loss": 0.2147, "step": 34415 }, { "epoch": 0.7457803393062206, "grad_norm": 1.4524345397949219, "learning_rate": 3.0232880382023176e-06, "loss": 0.1781, "step": 34420 }, { "epoch": 0.7458886746257015, "grad_norm": 0.8685858845710754, "learning_rate": 3.0208501459421925e-06, "loss": 0.1411, "step": 34425 }, { "epoch": 0.7459970099451824, "grad_norm": 1.2444589138031006, "learning_rate": 3.018413062112031e-06, "loss": 0.1862, "step": 34430 }, { "epoch": 0.7461053452646632, "grad_norm": 1.9912408590316772, "learning_rate": 3.015976786994135e-06, "loss": 0.2586, "step": 34435 }, { "epoch": 0.7462136805841441, "grad_norm": 1.348592758178711, "learning_rate": 3.0135413208707033e-06, "loss": 0.1621, "step": 34440 }, { "epoch": 0.7463220159036249, "grad_norm": 1.8860430717468262, "learning_rate": 3.0111066640238574e-06, "loss": 0.2182, "step": 34445 }, { "epoch": 0.7464303512231057, "grad_norm": 1.0586230754852295, "learning_rate": 3.008672816735606e-06, "loss": 0.241, "step": 34450 }, { "epoch": 0.7465386865425866, "grad_norm": 1.8581740856170654, "learning_rate": 3.006239779287883e-06, "loss": 0.261, "step": 34455 }, { "epoch": 0.7466470218620674, "grad_norm": 1.1387927532196045, "learning_rate": 3.0038075519625144e-06, "loss": 0.2824, "step": 34460 }, { "epoch": 0.7467553571815483, "grad_norm": 1.8424683809280396, "learning_rate": 3.001376135041235e-06, "loss": 0.1791, "step": 34465 }, { "epoch": 0.7468636925010291, "grad_norm": 1.715736746788025, "learning_rate": 2.9989455288056945e-06, "loss": 0.2626, "step": 34470 }, { "epoch": 0.7469720278205101, "grad_norm": 1.9227585792541504, "learning_rate": 2.9965157335374316e-06, "loss": 0.2449, "step": 34475 }, { "epoch": 0.7470803631399909, "grad_norm": 1.8870079517364502, "learning_rate": 2.994086749517916e-06, "loss": 0.2559, "step": 34480 }, { "epoch": 0.7471886984594718, "grad_norm": 0.9939573407173157, "learning_rate": 2.991658577028499e-06, "loss": 0.2071, "step": 34485 }, { "epoch": 0.7472970337789526, "grad_norm": 1.523311972618103, "learning_rate": 2.9892312163504534e-06, "loss": 0.1767, "step": 34490 }, { "epoch": 0.7474053690984335, "grad_norm": 2.194612503051758, "learning_rate": 2.98680466776495e-06, "loss": 0.2423, "step": 34495 }, { "epoch": 0.7475137044179143, "grad_norm": 1.0385956764221191, "learning_rate": 2.9843789315530647e-06, "loss": 0.2485, "step": 34500 }, { "epoch": 0.7476220397373952, "grad_norm": 2.1657257080078125, "learning_rate": 2.98195400799579e-06, "loss": 0.2116, "step": 34505 }, { "epoch": 0.747730375056876, "grad_norm": 1.6779030561447144, "learning_rate": 2.9795298973740095e-06, "loss": 0.2388, "step": 34510 }, { "epoch": 0.7478387103763569, "grad_norm": 1.3947653770446777, "learning_rate": 2.9771065999685277e-06, "loss": 0.3303, "step": 34515 }, { "epoch": 0.7479470456958378, "grad_norm": 1.5749993324279785, "learning_rate": 2.974684116060036e-06, "loss": 0.245, "step": 34520 }, { "epoch": 0.7480553810153187, "grad_norm": 1.174713134765625, "learning_rate": 2.972262445929157e-06, "loss": 0.2831, "step": 34525 }, { "epoch": 0.7481637163347995, "grad_norm": 1.5527845621109009, "learning_rate": 2.969841589856398e-06, "loss": 0.2554, "step": 34530 }, { "epoch": 0.7482720516542803, "grad_norm": 1.817574143409729, "learning_rate": 2.967421548122177e-06, "loss": 0.1645, "step": 34535 }, { "epoch": 0.7483803869737612, "grad_norm": 1.9547228813171387, "learning_rate": 2.9650023210068235e-06, "loss": 0.2062, "step": 34540 }, { "epoch": 0.748488722293242, "grad_norm": 1.6246261596679688, "learning_rate": 2.962583908790564e-06, "loss": 0.3144, "step": 34545 }, { "epoch": 0.7485970576127229, "grad_norm": 1.141182541847229, "learning_rate": 2.9601663117535416e-06, "loss": 0.2259, "step": 34550 }, { "epoch": 0.7487053929322037, "grad_norm": 1.535625696182251, "learning_rate": 2.957749530175792e-06, "loss": 0.3481, "step": 34555 }, { "epoch": 0.7488137282516846, "grad_norm": 2.004467725753784, "learning_rate": 2.9553335643372696e-06, "loss": 0.279, "step": 34560 }, { "epoch": 0.7489220635711654, "grad_norm": 1.405377984046936, "learning_rate": 2.9529184145178215e-06, "loss": 0.2215, "step": 34565 }, { "epoch": 0.7490303988906464, "grad_norm": 1.1523139476776123, "learning_rate": 2.9505040809972097e-06, "loss": 0.1947, "step": 34570 }, { "epoch": 0.7491387342101272, "grad_norm": 1.0508530139923096, "learning_rate": 2.9480905640551015e-06, "loss": 0.2175, "step": 34575 }, { "epoch": 0.7492470695296081, "grad_norm": 1.4080902338027954, "learning_rate": 2.9456778639710605e-06, "loss": 0.2314, "step": 34580 }, { "epoch": 0.7493554048490889, "grad_norm": 1.4380548000335693, "learning_rate": 2.943265981024569e-06, "loss": 0.2712, "step": 34585 }, { "epoch": 0.7494637401685698, "grad_norm": 1.1588386297225952, "learning_rate": 2.9408549154950007e-06, "loss": 0.226, "step": 34590 }, { "epoch": 0.7495720754880506, "grad_norm": 1.5491458177566528, "learning_rate": 2.9384446676616475e-06, "loss": 0.2286, "step": 34595 }, { "epoch": 0.7496804108075314, "grad_norm": 1.4895685911178589, "learning_rate": 2.936035237803694e-06, "loss": 0.2456, "step": 34600 }, { "epoch": 0.7497887461270123, "grad_norm": 1.9178961515426636, "learning_rate": 2.9336266262002432e-06, "loss": 0.2358, "step": 34605 }, { "epoch": 0.7498970814464931, "grad_norm": 1.389503836631775, "learning_rate": 2.9312188331302906e-06, "loss": 0.1889, "step": 34610 }, { "epoch": 0.7500054167659741, "grad_norm": 2.0714030265808105, "learning_rate": 2.9288118588727466e-06, "loss": 0.2906, "step": 34615 }, { "epoch": 0.7501137520854549, "grad_norm": 1.6667178869247437, "learning_rate": 2.9264057037064243e-06, "loss": 0.2844, "step": 34620 }, { "epoch": 0.7502220874049358, "grad_norm": 2.5343194007873535, "learning_rate": 2.924000367910036e-06, "loss": 0.2795, "step": 34625 }, { "epoch": 0.7503304227244166, "grad_norm": 2.422880172729492, "learning_rate": 2.9215958517622102e-06, "loss": 0.1587, "step": 34630 }, { "epoch": 0.7504387580438975, "grad_norm": 1.1028462648391724, "learning_rate": 2.9191921555414658e-06, "loss": 0.1335, "step": 34635 }, { "epoch": 0.7505470933633783, "grad_norm": 0.830711305141449, "learning_rate": 2.916789279526244e-06, "loss": 0.1472, "step": 34640 }, { "epoch": 0.7506554286828592, "grad_norm": 1.4782203435897827, "learning_rate": 2.9143872239948744e-06, "loss": 0.1918, "step": 34645 }, { "epoch": 0.75076376400234, "grad_norm": 1.6534667015075684, "learning_rate": 2.9119859892256065e-06, "loss": 0.2214, "step": 34650 }, { "epoch": 0.7508720993218209, "grad_norm": 2.2540993690490723, "learning_rate": 2.9095855754965785e-06, "loss": 0.2207, "step": 34655 }, { "epoch": 0.7509804346413017, "grad_norm": 1.236873984336853, "learning_rate": 2.907185983085852e-06, "loss": 0.2222, "step": 34660 }, { "epoch": 0.7510887699607827, "grad_norm": 2.361227512359619, "learning_rate": 2.904787212271375e-06, "loss": 0.2411, "step": 34665 }, { "epoch": 0.7511971052802635, "grad_norm": 1.4388315677642822, "learning_rate": 2.9023892633310125e-06, "loss": 0.2474, "step": 34670 }, { "epoch": 0.7513054405997444, "grad_norm": 1.8543739318847656, "learning_rate": 2.8999921365425352e-06, "loss": 0.2833, "step": 34675 }, { "epoch": 0.7514137759192252, "grad_norm": 1.804679036140442, "learning_rate": 2.8975958321836085e-06, "loss": 0.3158, "step": 34680 }, { "epoch": 0.751522111238706, "grad_norm": 0.5341275334358215, "learning_rate": 2.8952003505318126e-06, "loss": 0.2403, "step": 34685 }, { "epoch": 0.7516304465581869, "grad_norm": 1.4546902179718018, "learning_rate": 2.892805691864624e-06, "loss": 0.249, "step": 34690 }, { "epoch": 0.7517387818776677, "grad_norm": 1.1771457195281982, "learning_rate": 2.890411856459433e-06, "loss": 0.2688, "step": 34695 }, { "epoch": 0.7518471171971486, "grad_norm": 2.1076526641845703, "learning_rate": 2.8880188445935265e-06, "loss": 0.1845, "step": 34700 }, { "epoch": 0.7519554525166294, "grad_norm": 1.44956374168396, "learning_rate": 2.885626656544097e-06, "loss": 0.2156, "step": 34705 }, { "epoch": 0.7520637878361103, "grad_norm": 0.9196727275848389, "learning_rate": 2.883235292588249e-06, "loss": 0.2169, "step": 34710 }, { "epoch": 0.7521721231555912, "grad_norm": 1.4745218753814697, "learning_rate": 2.880844753002976e-06, "loss": 0.173, "step": 34715 }, { "epoch": 0.7522804584750721, "grad_norm": 1.2175891399383545, "learning_rate": 2.878455038065201e-06, "loss": 0.1802, "step": 34720 }, { "epoch": 0.7523887937945529, "grad_norm": 1.7237274646759033, "learning_rate": 2.876066148051725e-06, "loss": 0.2445, "step": 34725 }, { "epoch": 0.7524971291140338, "grad_norm": 1.0084253549575806, "learning_rate": 2.873678083239273e-06, "loss": 0.1881, "step": 34730 }, { "epoch": 0.7526054644335146, "grad_norm": 1.7522389888763428, "learning_rate": 2.8712908439044616e-06, "loss": 0.1846, "step": 34735 }, { "epoch": 0.7527137997529955, "grad_norm": 2.0012998580932617, "learning_rate": 2.868904430323817e-06, "loss": 0.1864, "step": 34740 }, { "epoch": 0.7528221350724763, "grad_norm": 1.3133059740066528, "learning_rate": 2.8665188427737713e-06, "loss": 0.1761, "step": 34745 }, { "epoch": 0.7529304703919572, "grad_norm": 1.3901937007904053, "learning_rate": 2.864134081530656e-06, "loss": 0.2252, "step": 34750 }, { "epoch": 0.753038805711438, "grad_norm": 1.066583514213562, "learning_rate": 2.8617501468707144e-06, "loss": 0.1921, "step": 34755 }, { "epoch": 0.753147141030919, "grad_norm": 1.0346325635910034, "learning_rate": 2.8593670390700823e-06, "loss": 0.213, "step": 34760 }, { "epoch": 0.7532554763503998, "grad_norm": 1.4932284355163574, "learning_rate": 2.8569847584048173e-06, "loss": 0.2412, "step": 34765 }, { "epoch": 0.7533638116698806, "grad_norm": 1.2781767845153809, "learning_rate": 2.854603305150866e-06, "loss": 0.2529, "step": 34770 }, { "epoch": 0.7534721469893615, "grad_norm": 1.221955418586731, "learning_rate": 2.8522226795840802e-06, "loss": 0.1989, "step": 34775 }, { "epoch": 0.7535804823088423, "grad_norm": 1.9510493278503418, "learning_rate": 2.849842881980227e-06, "loss": 0.2079, "step": 34780 }, { "epoch": 0.7536888176283232, "grad_norm": 1.9445478916168213, "learning_rate": 2.847463912614964e-06, "loss": 0.2707, "step": 34785 }, { "epoch": 0.753797152947804, "grad_norm": 1.7703399658203125, "learning_rate": 2.8450857717638635e-06, "loss": 0.2269, "step": 34790 }, { "epoch": 0.7539054882672849, "grad_norm": 1.5615196228027344, "learning_rate": 2.8427084597023934e-06, "loss": 0.2267, "step": 34795 }, { "epoch": 0.7540138235867657, "grad_norm": 1.4182493686676025, "learning_rate": 2.8403319767059356e-06, "loss": 0.286, "step": 34800 }, { "epoch": 0.7541221589062466, "grad_norm": 0.9868385791778564, "learning_rate": 2.837956323049762e-06, "loss": 0.1656, "step": 34805 }, { "epoch": 0.7542304942257275, "grad_norm": 1.546949863433838, "learning_rate": 2.8355814990090623e-06, "loss": 0.251, "step": 34810 }, { "epoch": 0.7543388295452084, "grad_norm": 1.0922118425369263, "learning_rate": 2.8332075048589257e-06, "loss": 0.2045, "step": 34815 }, { "epoch": 0.7544471648646892, "grad_norm": 2.762718915939331, "learning_rate": 2.8308343408743376e-06, "loss": 0.3102, "step": 34820 }, { "epoch": 0.7545555001841701, "grad_norm": 1.4087510108947754, "learning_rate": 2.8284620073302006e-06, "loss": 0.2175, "step": 34825 }, { "epoch": 0.7546638355036509, "grad_norm": 0.6939383745193481, "learning_rate": 2.8260905045013065e-06, "loss": 0.129, "step": 34830 }, { "epoch": 0.7547721708231317, "grad_norm": 1.5691132545471191, "learning_rate": 2.823719832662366e-06, "loss": 0.2298, "step": 34835 }, { "epoch": 0.7548805061426126, "grad_norm": 0.8817400932312012, "learning_rate": 2.8213499920879793e-06, "loss": 0.2072, "step": 34840 }, { "epoch": 0.7549888414620934, "grad_norm": 1.870969533920288, "learning_rate": 2.8189809830526628e-06, "loss": 0.1602, "step": 34845 }, { "epoch": 0.7550971767815743, "grad_norm": 1.217115879058838, "learning_rate": 2.816612805830824e-06, "loss": 0.2065, "step": 34850 }, { "epoch": 0.7552055121010551, "grad_norm": 1.632683277130127, "learning_rate": 2.814245460696788e-06, "loss": 0.1959, "step": 34855 }, { "epoch": 0.7553138474205361, "grad_norm": 1.0183734893798828, "learning_rate": 2.81187894792477e-06, "loss": 0.2154, "step": 34860 }, { "epoch": 0.7554221827400169, "grad_norm": 1.1825529336929321, "learning_rate": 2.809513267788898e-06, "loss": 0.1763, "step": 34865 }, { "epoch": 0.7555305180594978, "grad_norm": 1.138978362083435, "learning_rate": 2.8071484205632037e-06, "loss": 0.2191, "step": 34870 }, { "epoch": 0.7556388533789786, "grad_norm": 0.928173840045929, "learning_rate": 2.8047844065216124e-06, "loss": 0.2528, "step": 34875 }, { "epoch": 0.7557471886984595, "grad_norm": 1.252681851387024, "learning_rate": 2.8024212259379656e-06, "loss": 0.2982, "step": 34880 }, { "epoch": 0.7558555240179403, "grad_norm": 1.4287433624267578, "learning_rate": 2.8000588790859985e-06, "loss": 0.2246, "step": 34885 }, { "epoch": 0.7559638593374212, "grad_norm": 1.3384819030761719, "learning_rate": 2.797697366239357e-06, "loss": 0.3045, "step": 34890 }, { "epoch": 0.756072194656902, "grad_norm": 2.064262628555298, "learning_rate": 2.7953366876715827e-06, "loss": 0.2698, "step": 34895 }, { "epoch": 0.7561805299763829, "grad_norm": 1.367433786392212, "learning_rate": 2.79297684365613e-06, "loss": 0.257, "step": 34900 }, { "epoch": 0.7562888652958638, "grad_norm": 1.8518612384796143, "learning_rate": 2.790617834466346e-06, "loss": 0.167, "step": 34905 }, { "epoch": 0.7563972006153447, "grad_norm": 1.6456372737884521, "learning_rate": 2.7882596603754895e-06, "loss": 0.2937, "step": 34910 }, { "epoch": 0.7565055359348255, "grad_norm": 1.5410053730010986, "learning_rate": 2.7859023216567217e-06, "loss": 0.2352, "step": 34915 }, { "epoch": 0.7566138712543063, "grad_norm": 1.1248849630355835, "learning_rate": 2.783545818583101e-06, "loss": 0.3132, "step": 34920 }, { "epoch": 0.7567222065737872, "grad_norm": 1.8583241701126099, "learning_rate": 2.7811901514275963e-06, "loss": 0.2591, "step": 34925 }, { "epoch": 0.756830541893268, "grad_norm": 1.6566481590270996, "learning_rate": 2.7788353204630723e-06, "loss": 0.2001, "step": 34930 }, { "epoch": 0.7569388772127489, "grad_norm": 1.1453187465667725, "learning_rate": 2.776481325962307e-06, "loss": 0.1885, "step": 34935 }, { "epoch": 0.7570472125322297, "grad_norm": 1.6776901483535767, "learning_rate": 2.7741281681979715e-06, "loss": 0.1753, "step": 34940 }, { "epoch": 0.7571555478517106, "grad_norm": 0.5419267416000366, "learning_rate": 2.7717758474426417e-06, "loss": 0.1745, "step": 34945 }, { "epoch": 0.7572638831711914, "grad_norm": 1.4434210062026978, "learning_rate": 2.7694243639688033e-06, "loss": 0.2423, "step": 34950 }, { "epoch": 0.7573722184906724, "grad_norm": 2.472975730895996, "learning_rate": 2.7670737180488326e-06, "loss": 0.2318, "step": 34955 }, { "epoch": 0.7574805538101532, "grad_norm": 0.9109546542167664, "learning_rate": 2.764723909955028e-06, "loss": 0.245, "step": 34960 }, { "epoch": 0.7575888891296341, "grad_norm": 0.8023596405982971, "learning_rate": 2.7623749399595713e-06, "loss": 0.1877, "step": 34965 }, { "epoch": 0.7576972244491149, "grad_norm": 1.2798999547958374, "learning_rate": 2.7600268083345616e-06, "loss": 0.1567, "step": 34970 }, { "epoch": 0.7578055597685958, "grad_norm": 1.894696593284607, "learning_rate": 2.7576795153519907e-06, "loss": 0.2557, "step": 34975 }, { "epoch": 0.7579138950880766, "grad_norm": 1.1192541122436523, "learning_rate": 2.7553330612837557e-06, "loss": 0.2673, "step": 34980 }, { "epoch": 0.7580222304075575, "grad_norm": 1.6958197355270386, "learning_rate": 2.7529874464016627e-06, "loss": 0.1678, "step": 34985 }, { "epoch": 0.7581305657270383, "grad_norm": 1.320056676864624, "learning_rate": 2.7506426709774116e-06, "loss": 0.1378, "step": 34990 }, { "epoch": 0.7582389010465191, "grad_norm": 1.1422456502914429, "learning_rate": 2.748298735282614e-06, "loss": 0.16, "step": 34995 }, { "epoch": 0.758347236366, "grad_norm": 1.3076695203781128, "learning_rate": 2.7459556395887753e-06, "loss": 0.1985, "step": 35000 }, { "epoch": 0.758455571685481, "grad_norm": 1.042204737663269, "learning_rate": 2.7436133841673095e-06, "loss": 0.1517, "step": 35005 }, { "epoch": 0.7585639070049618, "grad_norm": 2.927663803100586, "learning_rate": 2.7412719692895317e-06, "loss": 0.2261, "step": 35010 }, { "epoch": 0.7586722423244426, "grad_norm": 1.5628488063812256, "learning_rate": 2.738931395226665e-06, "loss": 0.208, "step": 35015 }, { "epoch": 0.7587805776439235, "grad_norm": 1.415469765663147, "learning_rate": 2.7365916622498245e-06, "loss": 0.1718, "step": 35020 }, { "epoch": 0.7588889129634043, "grad_norm": 1.5706117153167725, "learning_rate": 2.7342527706300314e-06, "loss": 0.2078, "step": 35025 }, { "epoch": 0.7589972482828852, "grad_norm": 0.946333110332489, "learning_rate": 2.731914720638217e-06, "loss": 0.209, "step": 35030 }, { "epoch": 0.759105583602366, "grad_norm": 1.4915827512741089, "learning_rate": 2.7295775125452028e-06, "loss": 0.1854, "step": 35035 }, { "epoch": 0.7592139189218469, "grad_norm": 1.393223762512207, "learning_rate": 2.7272411466217263e-06, "loss": 0.2012, "step": 35040 }, { "epoch": 0.7593222542413277, "grad_norm": 1.7038657665252686, "learning_rate": 2.724905623138414e-06, "loss": 0.1887, "step": 35045 }, { "epoch": 0.7594305895608087, "grad_norm": 1.118094801902771, "learning_rate": 2.722570942365804e-06, "loss": 0.2154, "step": 35050 }, { "epoch": 0.7595389248802895, "grad_norm": 2.1374943256378174, "learning_rate": 2.720237104574338e-06, "loss": 0.1855, "step": 35055 }, { "epoch": 0.7596472601997704, "grad_norm": 1.222257375717163, "learning_rate": 2.7179041100343494e-06, "loss": 0.2653, "step": 35060 }, { "epoch": 0.7597555955192512, "grad_norm": 1.2275217771530151, "learning_rate": 2.7155719590160868e-06, "loss": 0.216, "step": 35065 }, { "epoch": 0.759863930838732, "grad_norm": 1.7750767469406128, "learning_rate": 2.713240651789689e-06, "loss": 0.1961, "step": 35070 }, { "epoch": 0.7599722661582129, "grad_norm": 2.1685233116149902, "learning_rate": 2.7109101886252097e-06, "loss": 0.1868, "step": 35075 }, { "epoch": 0.7600806014776937, "grad_norm": 1.793250322341919, "learning_rate": 2.7085805697925902e-06, "loss": 0.2311, "step": 35080 }, { "epoch": 0.7601889367971746, "grad_norm": 1.7889413833618164, "learning_rate": 2.706251795561691e-06, "loss": 0.2071, "step": 35085 }, { "epoch": 0.7602972721166554, "grad_norm": 1.3406062126159668, "learning_rate": 2.703923866202256e-06, "loss": 0.2191, "step": 35090 }, { "epoch": 0.7604056074361363, "grad_norm": 1.2286880016326904, "learning_rate": 2.7015967819839497e-06, "loss": 0.2367, "step": 35095 }, { "epoch": 0.7605139427556172, "grad_norm": 1.5195400714874268, "learning_rate": 2.699270543176323e-06, "loss": 0.2359, "step": 35100 }, { "epoch": 0.7606222780750981, "grad_norm": 1.840878963470459, "learning_rate": 2.6969451500488396e-06, "loss": 0.1176, "step": 35105 }, { "epoch": 0.7607306133945789, "grad_norm": 1.6585999727249146, "learning_rate": 2.6946206028708634e-06, "loss": 0.2083, "step": 35110 }, { "epoch": 0.7608389487140598, "grad_norm": 2.0198557376861572, "learning_rate": 2.692296901911653e-06, "loss": 0.2439, "step": 35115 }, { "epoch": 0.7609472840335406, "grad_norm": 1.762876033782959, "learning_rate": 2.689974047440379e-06, "loss": 0.2372, "step": 35120 }, { "epoch": 0.7610556193530215, "grad_norm": 1.5749168395996094, "learning_rate": 2.6876520397261053e-06, "loss": 0.2315, "step": 35125 }, { "epoch": 0.7611639546725023, "grad_norm": 0.8827248811721802, "learning_rate": 2.6853308790378076e-06, "loss": 0.1602, "step": 35130 }, { "epoch": 0.7612722899919832, "grad_norm": 1.2523096799850464, "learning_rate": 2.6830105656443495e-06, "loss": 0.247, "step": 35135 }, { "epoch": 0.761380625311464, "grad_norm": 1.9526963233947754, "learning_rate": 2.680691099814513e-06, "loss": 0.2395, "step": 35140 }, { "epoch": 0.761488960630945, "grad_norm": 1.7188255786895752, "learning_rate": 2.6783724818169655e-06, "loss": 0.3106, "step": 35145 }, { "epoch": 0.7615972959504258, "grad_norm": 2.2605485916137695, "learning_rate": 2.6760547119202884e-06, "loss": 0.2312, "step": 35150 }, { "epoch": 0.7617056312699066, "grad_norm": 1.2017823457717896, "learning_rate": 2.6737377903929627e-06, "loss": 0.2621, "step": 35155 }, { "epoch": 0.7618139665893875, "grad_norm": 1.8273974657058716, "learning_rate": 2.671421717503364e-06, "loss": 0.1966, "step": 35160 }, { "epoch": 0.7619223019088683, "grad_norm": 1.236337661743164, "learning_rate": 2.6691064935197806e-06, "loss": 0.3469, "step": 35165 }, { "epoch": 0.7620306372283492, "grad_norm": 2.201608180999756, "learning_rate": 2.6667921187103896e-06, "loss": 0.2596, "step": 35170 }, { "epoch": 0.76213897254783, "grad_norm": 1.243823766708374, "learning_rate": 2.6644785933432828e-06, "loss": 0.2221, "step": 35175 }, { "epoch": 0.7622473078673109, "grad_norm": 1.2013459205627441, "learning_rate": 2.6621659176864423e-06, "loss": 0.2422, "step": 35180 }, { "epoch": 0.7623556431867917, "grad_norm": 1.0400499105453491, "learning_rate": 2.659854092007763e-06, "loss": 0.2004, "step": 35185 }, { "epoch": 0.7624639785062726, "grad_norm": 1.0540894269943237, "learning_rate": 2.657543116575031e-06, "loss": 0.2047, "step": 35190 }, { "epoch": 0.7625723138257535, "grad_norm": 1.3878912925720215, "learning_rate": 2.6552329916559338e-06, "loss": 0.1555, "step": 35195 }, { "epoch": 0.7626806491452344, "grad_norm": 1.529726505279541, "learning_rate": 2.6529237175180754e-06, "loss": 0.1726, "step": 35200 }, { "epoch": 0.7627889844647152, "grad_norm": 1.6345410346984863, "learning_rate": 2.650615294428942e-06, "loss": 0.232, "step": 35205 }, { "epoch": 0.7628973197841961, "grad_norm": 1.1981724500656128, "learning_rate": 2.6483077226559364e-06, "loss": 0.1663, "step": 35210 }, { "epoch": 0.7630056551036769, "grad_norm": 1.857906699180603, "learning_rate": 2.6460010024663494e-06, "loss": 0.264, "step": 35215 }, { "epoch": 0.7631139904231578, "grad_norm": 1.3484735488891602, "learning_rate": 2.6436951341273863e-06, "loss": 0.2137, "step": 35220 }, { "epoch": 0.7632223257426386, "grad_norm": 1.6689848899841309, "learning_rate": 2.641390117906145e-06, "loss": 0.1868, "step": 35225 }, { "epoch": 0.7633306610621194, "grad_norm": 1.6251472234725952, "learning_rate": 2.6390859540696222e-06, "loss": 0.2786, "step": 35230 }, { "epoch": 0.7634389963816003, "grad_norm": 1.4371695518493652, "learning_rate": 2.63678264288473e-06, "loss": 0.2784, "step": 35235 }, { "epoch": 0.7635473317010811, "grad_norm": 1.9032424688339233, "learning_rate": 2.6344801846182634e-06, "loss": 0.151, "step": 35240 }, { "epoch": 0.7636556670205621, "grad_norm": 1.4919800758361816, "learning_rate": 2.6321785795369324e-06, "loss": 0.2115, "step": 35245 }, { "epoch": 0.7637640023400429, "grad_norm": 1.8812246322631836, "learning_rate": 2.6298778279073435e-06, "loss": 0.2629, "step": 35250 }, { "epoch": 0.7638723376595238, "grad_norm": 1.2826485633850098, "learning_rate": 2.6275779299960056e-06, "loss": 0.1664, "step": 35255 }, { "epoch": 0.7639806729790046, "grad_norm": 1.9651154279708862, "learning_rate": 2.6252788860693266e-06, "loss": 0.2421, "step": 35260 }, { "epoch": 0.7640890082984855, "grad_norm": 0.7962360978126526, "learning_rate": 2.6229806963936124e-06, "loss": 0.1951, "step": 35265 }, { "epoch": 0.7641973436179663, "grad_norm": 1.1568807363510132, "learning_rate": 2.620683361235079e-06, "loss": 0.2109, "step": 35270 }, { "epoch": 0.7643056789374472, "grad_norm": 1.3525818586349487, "learning_rate": 2.6183868808598334e-06, "loss": 0.2546, "step": 35275 }, { "epoch": 0.764414014256928, "grad_norm": 2.3286209106445312, "learning_rate": 2.6160912555338937e-06, "loss": 0.1767, "step": 35280 }, { "epoch": 0.7645223495764089, "grad_norm": 0.680813193321228, "learning_rate": 2.613796485523169e-06, "loss": 0.1209, "step": 35285 }, { "epoch": 0.7646306848958898, "grad_norm": 1.387523889541626, "learning_rate": 2.6115025710934748e-06, "loss": 0.2318, "step": 35290 }, { "epoch": 0.7647390202153707, "grad_norm": 1.6273325681686401, "learning_rate": 2.6092095125105323e-06, "loss": 0.2774, "step": 35295 }, { "epoch": 0.7648473555348515, "grad_norm": 1.5001589059829712, "learning_rate": 2.60691731003995e-06, "loss": 0.2263, "step": 35300 }, { "epoch": 0.7649556908543323, "grad_norm": 1.8213340044021606, "learning_rate": 2.6046259639472525e-06, "loss": 0.2293, "step": 35305 }, { "epoch": 0.7650640261738132, "grad_norm": 2.085390090942383, "learning_rate": 2.6023354744978514e-06, "loss": 0.1903, "step": 35310 }, { "epoch": 0.765172361493294, "grad_norm": 1.4532945156097412, "learning_rate": 2.6000458419570717e-06, "loss": 0.1394, "step": 35315 }, { "epoch": 0.7652806968127749, "grad_norm": 1.4477219581604004, "learning_rate": 2.5977570665901264e-06, "loss": 0.2811, "step": 35320 }, { "epoch": 0.7653890321322557, "grad_norm": 1.8547167778015137, "learning_rate": 2.595469148662142e-06, "loss": 0.2305, "step": 35325 }, { "epoch": 0.7654973674517366, "grad_norm": 2.2004480361938477, "learning_rate": 2.5931820884381344e-06, "loss": 0.2511, "step": 35330 }, { "epoch": 0.7656057027712174, "grad_norm": 0.9466878771781921, "learning_rate": 2.5908958861830313e-06, "loss": 0.2311, "step": 35335 }, { "epoch": 0.7657140380906984, "grad_norm": 2.0634994506835938, "learning_rate": 2.588610542161647e-06, "loss": 0.2431, "step": 35340 }, { "epoch": 0.7658223734101792, "grad_norm": 1.6368091106414795, "learning_rate": 2.5863260566387105e-06, "loss": 0.2732, "step": 35345 }, { "epoch": 0.7659307087296601, "grad_norm": 1.0243017673492432, "learning_rate": 2.5840424298788448e-06, "loss": 0.2906, "step": 35350 }, { "epoch": 0.7660390440491409, "grad_norm": 1.8603549003601074, "learning_rate": 2.58175966214657e-06, "loss": 0.2742, "step": 35355 }, { "epoch": 0.7661473793686218, "grad_norm": 1.3865289688110352, "learning_rate": 2.5794777537063176e-06, "loss": 0.3053, "step": 35360 }, { "epoch": 0.7662557146881026, "grad_norm": 1.0831763744354248, "learning_rate": 2.5771967048224033e-06, "loss": 0.2401, "step": 35365 }, { "epoch": 0.7663640500075835, "grad_norm": 1.1887620687484741, "learning_rate": 2.5749165157590605e-06, "loss": 0.1972, "step": 35370 }, { "epoch": 0.7664723853270643, "grad_norm": 1.432744026184082, "learning_rate": 2.572637186780409e-06, "loss": 0.2023, "step": 35375 }, { "epoch": 0.7665807206465451, "grad_norm": 1.9547119140625, "learning_rate": 2.570358718150481e-06, "loss": 0.2547, "step": 35380 }, { "epoch": 0.766689055966026, "grad_norm": 0.8986815810203552, "learning_rate": 2.568081110133195e-06, "loss": 0.2141, "step": 35385 }, { "epoch": 0.766797391285507, "grad_norm": 1.0828633308410645, "learning_rate": 2.5658043629923834e-06, "loss": 0.2198, "step": 35390 }, { "epoch": 0.7669057266049878, "grad_norm": 1.2933320999145508, "learning_rate": 2.5635284769917744e-06, "loss": 0.1489, "step": 35395 }, { "epoch": 0.7670140619244686, "grad_norm": 1.5250861644744873, "learning_rate": 2.5612534523949906e-06, "loss": 0.164, "step": 35400 }, { "epoch": 0.7671223972439495, "grad_norm": 1.2567863464355469, "learning_rate": 2.558979289465565e-06, "loss": 0.2752, "step": 35405 }, { "epoch": 0.7672307325634303, "grad_norm": 1.2285627126693726, "learning_rate": 2.5567059884669186e-06, "loss": 0.2503, "step": 35410 }, { "epoch": 0.7673390678829112, "grad_norm": 1.9269438982009888, "learning_rate": 2.5544335496623862e-06, "loss": 0.235, "step": 35415 }, { "epoch": 0.767447403202392, "grad_norm": 1.3068654537200928, "learning_rate": 2.55216197331519e-06, "loss": 0.2186, "step": 35420 }, { "epoch": 0.7675557385218729, "grad_norm": 1.2803186178207397, "learning_rate": 2.549891259688463e-06, "loss": 0.2267, "step": 35425 }, { "epoch": 0.7676640738413537, "grad_norm": 1.6044130325317383, "learning_rate": 2.547621409045231e-06, "loss": 0.2046, "step": 35430 }, { "epoch": 0.7677724091608347, "grad_norm": 2.0709216594696045, "learning_rate": 2.545352421648416e-06, "loss": 0.1651, "step": 35435 }, { "epoch": 0.7678807444803155, "grad_norm": 1.6099894046783447, "learning_rate": 2.5430842977608596e-06, "loss": 0.2305, "step": 35440 }, { "epoch": 0.7679890797997964, "grad_norm": 1.4154976606369019, "learning_rate": 2.540817037645278e-06, "loss": 0.2236, "step": 35445 }, { "epoch": 0.7680974151192772, "grad_norm": 1.260063886642456, "learning_rate": 2.538550641564308e-06, "loss": 0.1672, "step": 35450 }, { "epoch": 0.768205750438758, "grad_norm": 1.6506272554397583, "learning_rate": 2.5362851097804696e-06, "loss": 0.2385, "step": 35455 }, { "epoch": 0.7683140857582389, "grad_norm": 1.4286150932312012, "learning_rate": 2.5340204425561987e-06, "loss": 0.3075, "step": 35460 }, { "epoch": 0.7684224210777197, "grad_norm": 1.27877676486969, "learning_rate": 2.53175664015382e-06, "loss": 0.2319, "step": 35465 }, { "epoch": 0.7685307563972006, "grad_norm": 1.9208505153656006, "learning_rate": 2.529493702835556e-06, "loss": 0.205, "step": 35470 }, { "epoch": 0.7686390917166814, "grad_norm": 1.4377682209014893, "learning_rate": 2.5272316308635415e-06, "loss": 0.2053, "step": 35475 }, { "epoch": 0.7687474270361623, "grad_norm": 1.7205095291137695, "learning_rate": 2.5249704244997975e-06, "loss": 0.2255, "step": 35480 }, { "epoch": 0.7688557623556432, "grad_norm": 0.8439889550209045, "learning_rate": 2.5227100840062534e-06, "loss": 0.1922, "step": 35485 }, { "epoch": 0.7689640976751241, "grad_norm": 1.3127524852752686, "learning_rate": 2.520450609644738e-06, "loss": 0.2139, "step": 35490 }, { "epoch": 0.7690724329946049, "grad_norm": 2.0852608680725098, "learning_rate": 2.5181920016769767e-06, "loss": 0.1802, "step": 35495 }, { "epoch": 0.7691807683140858, "grad_norm": 1.6641044616699219, "learning_rate": 2.5159342603645965e-06, "loss": 0.2336, "step": 35500 }, { "epoch": 0.7692891036335666, "grad_norm": 1.8340495824813843, "learning_rate": 2.5136773859691164e-06, "loss": 0.2425, "step": 35505 }, { "epoch": 0.7693974389530475, "grad_norm": 1.3559348583221436, "learning_rate": 2.5114213787519692e-06, "loss": 0.2044, "step": 35510 }, { "epoch": 0.7695057742725283, "grad_norm": 1.1172654628753662, "learning_rate": 2.5091662389744743e-06, "loss": 0.1918, "step": 35515 }, { "epoch": 0.7696141095920092, "grad_norm": 1.4359633922576904, "learning_rate": 2.5069119668978605e-06, "loss": 0.1815, "step": 35520 }, { "epoch": 0.76972244491149, "grad_norm": 1.3818504810333252, "learning_rate": 2.504658562783245e-06, "loss": 0.1945, "step": 35525 }, { "epoch": 0.7698307802309708, "grad_norm": 1.272478699684143, "learning_rate": 2.5024060268916593e-06, "loss": 0.202, "step": 35530 }, { "epoch": 0.7699391155504518, "grad_norm": 1.2947998046875, "learning_rate": 2.5001543594840183e-06, "loss": 0.2559, "step": 35535 }, { "epoch": 0.7700474508699326, "grad_norm": 1.9677634239196777, "learning_rate": 2.4979035608211464e-06, "loss": 0.1821, "step": 35540 }, { "epoch": 0.7701557861894135, "grad_norm": 1.9435712099075317, "learning_rate": 2.495653631163768e-06, "loss": 0.2121, "step": 35545 }, { "epoch": 0.7702641215088943, "grad_norm": 0.9742671251296997, "learning_rate": 2.4934045707724995e-06, "loss": 0.3273, "step": 35550 }, { "epoch": 0.7703724568283752, "grad_norm": 1.5962296724319458, "learning_rate": 2.4911563799078654e-06, "loss": 0.2401, "step": 35555 }, { "epoch": 0.770480792147856, "grad_norm": 1.0036576986312866, "learning_rate": 2.488909058830279e-06, "loss": 0.2147, "step": 35560 }, { "epoch": 0.7705891274673369, "grad_norm": 1.237149953842163, "learning_rate": 2.4866626078000644e-06, "loss": 0.1952, "step": 35565 }, { "epoch": 0.7706974627868177, "grad_norm": 1.476428747177124, "learning_rate": 2.484417027077435e-06, "loss": 0.2121, "step": 35570 }, { "epoch": 0.7708057981062986, "grad_norm": 1.895906925201416, "learning_rate": 2.482172316922512e-06, "loss": 0.2345, "step": 35575 }, { "epoch": 0.7709141334257795, "grad_norm": 1.4142009019851685, "learning_rate": 2.479928477595306e-06, "loss": 0.2734, "step": 35580 }, { "epoch": 0.7710224687452604, "grad_norm": 1.801499366760254, "learning_rate": 2.477685509355735e-06, "loss": 0.2639, "step": 35585 }, { "epoch": 0.7711308040647412, "grad_norm": 1.0334864854812622, "learning_rate": 2.475443412463617e-06, "loss": 0.2712, "step": 35590 }, { "epoch": 0.7712391393842221, "grad_norm": 1.4605956077575684, "learning_rate": 2.4732021871786595e-06, "loss": 0.3272, "step": 35595 }, { "epoch": 0.7713474747037029, "grad_norm": 1.4838793277740479, "learning_rate": 2.470961833760479e-06, "loss": 0.2409, "step": 35600 }, { "epoch": 0.7714558100231838, "grad_norm": 1.1406196355819702, "learning_rate": 2.468722352468582e-06, "loss": 0.1561, "step": 35605 }, { "epoch": 0.7715641453426646, "grad_norm": 1.0477774143218994, "learning_rate": 2.4664837435623854e-06, "loss": 0.2229, "step": 35610 }, { "epoch": 0.7716724806621454, "grad_norm": 1.0516701936721802, "learning_rate": 2.464246007301192e-06, "loss": 0.2444, "step": 35615 }, { "epoch": 0.7717808159816263, "grad_norm": 0.8519303798675537, "learning_rate": 2.462009143944216e-06, "loss": 0.3228, "step": 35620 }, { "epoch": 0.7718891513011071, "grad_norm": 1.4004422426223755, "learning_rate": 2.4597731537505585e-06, "loss": 0.2168, "step": 35625 }, { "epoch": 0.7719974866205881, "grad_norm": 0.7493157982826233, "learning_rate": 2.457538036979229e-06, "loss": 0.1286, "step": 35630 }, { "epoch": 0.7721058219400689, "grad_norm": 1.3961323499679565, "learning_rate": 2.4553037938891344e-06, "loss": 0.2661, "step": 35635 }, { "epoch": 0.7722141572595498, "grad_norm": 1.8257503509521484, "learning_rate": 2.4530704247390724e-06, "loss": 0.2459, "step": 35640 }, { "epoch": 0.7723224925790306, "grad_norm": 2.035957098007202, "learning_rate": 2.4508379297877527e-06, "loss": 0.2558, "step": 35645 }, { "epoch": 0.7724308278985115, "grad_norm": 1.2826297283172607, "learning_rate": 2.4486063092937685e-06, "loss": 0.2195, "step": 35650 }, { "epoch": 0.7725391632179923, "grad_norm": 0.9586272239685059, "learning_rate": 2.446375563515627e-06, "loss": 0.2197, "step": 35655 }, { "epoch": 0.7726474985374732, "grad_norm": 1.4833855628967285, "learning_rate": 2.44414569271172e-06, "loss": 0.4338, "step": 35660 }, { "epoch": 0.772755833856954, "grad_norm": 1.6251839399337769, "learning_rate": 2.4419166971403506e-06, "loss": 0.2339, "step": 35665 }, { "epoch": 0.7728641691764349, "grad_norm": 1.594059944152832, "learning_rate": 2.439688577059712e-06, "loss": 0.2898, "step": 35670 }, { "epoch": 0.7729725044959158, "grad_norm": 1.6332437992095947, "learning_rate": 2.437461332727893e-06, "loss": 0.1986, "step": 35675 }, { "epoch": 0.7730808398153967, "grad_norm": 1.6094392538070679, "learning_rate": 2.435234964402896e-06, "loss": 0.1874, "step": 35680 }, { "epoch": 0.7731891751348775, "grad_norm": 1.3639607429504395, "learning_rate": 2.433009472342607e-06, "loss": 0.333, "step": 35685 }, { "epoch": 0.7732975104543583, "grad_norm": 1.355735421180725, "learning_rate": 2.4307848568048187e-06, "loss": 0.3085, "step": 35690 }, { "epoch": 0.7734058457738392, "grad_norm": 1.1579903364181519, "learning_rate": 2.428561118047216e-06, "loss": 0.2345, "step": 35695 }, { "epoch": 0.77351418109332, "grad_norm": 2.02541184425354, "learning_rate": 2.4263382563273908e-06, "loss": 0.2108, "step": 35700 }, { "epoch": 0.7736225164128009, "grad_norm": 1.5589699745178223, "learning_rate": 2.4241162719028245e-06, "loss": 0.2257, "step": 35705 }, { "epoch": 0.7737308517322817, "grad_norm": 1.7421631813049316, "learning_rate": 2.4218951650308974e-06, "loss": 0.1726, "step": 35710 }, { "epoch": 0.7738391870517626, "grad_norm": 1.4530731439590454, "learning_rate": 2.4196749359689e-06, "loss": 0.2166, "step": 35715 }, { "epoch": 0.7739475223712434, "grad_norm": 1.7098300457000732, "learning_rate": 2.4174555849740044e-06, "loss": 0.2539, "step": 35720 }, { "epoch": 0.7740558576907244, "grad_norm": 1.0021456480026245, "learning_rate": 2.4152371123032926e-06, "loss": 0.1915, "step": 35725 }, { "epoch": 0.7741641930102052, "grad_norm": 1.757913589477539, "learning_rate": 2.413019518213742e-06, "loss": 0.2698, "step": 35730 }, { "epoch": 0.7742725283296861, "grad_norm": 1.3717890977859497, "learning_rate": 2.41080280296223e-06, "loss": 0.2588, "step": 35735 }, { "epoch": 0.7743808636491669, "grad_norm": 1.4393339157104492, "learning_rate": 2.408586966805527e-06, "loss": 0.1201, "step": 35740 }, { "epoch": 0.7744891989686478, "grad_norm": 1.1908421516418457, "learning_rate": 2.406372010000302e-06, "loss": 0.2126, "step": 35745 }, { "epoch": 0.7745975342881286, "grad_norm": 1.6235758066177368, "learning_rate": 2.4041579328031296e-06, "loss": 0.2556, "step": 35750 }, { "epoch": 0.7747058696076095, "grad_norm": 1.6811457872390747, "learning_rate": 2.4019447354704726e-06, "loss": 0.2573, "step": 35755 }, { "epoch": 0.7748142049270903, "grad_norm": 2.1681265830993652, "learning_rate": 2.3997324182587014e-06, "loss": 0.2755, "step": 35760 }, { "epoch": 0.7749225402465711, "grad_norm": 1.7381939888000488, "learning_rate": 2.397520981424075e-06, "loss": 0.2712, "step": 35765 }, { "epoch": 0.775030875566052, "grad_norm": 1.3468049764633179, "learning_rate": 2.395310425222761e-06, "loss": 0.2835, "step": 35770 }, { "epoch": 0.775139210885533, "grad_norm": 1.2205616235733032, "learning_rate": 2.393100749910813e-06, "loss": 0.1674, "step": 35775 }, { "epoch": 0.7752475462050138, "grad_norm": 1.479773998260498, "learning_rate": 2.3908919557441913e-06, "loss": 0.2437, "step": 35780 }, { "epoch": 0.7753558815244946, "grad_norm": 1.9358291625976562, "learning_rate": 2.388684042978755e-06, "loss": 0.1978, "step": 35785 }, { "epoch": 0.7754642168439755, "grad_norm": 1.0283056497573853, "learning_rate": 2.386477011870252e-06, "loss": 0.2079, "step": 35790 }, { "epoch": 0.7755725521634563, "grad_norm": 1.1759897470474243, "learning_rate": 2.384270862674339e-06, "loss": 0.2035, "step": 35795 }, { "epoch": 0.7756808874829372, "grad_norm": 1.6479369401931763, "learning_rate": 2.382065595646561e-06, "loss": 0.2342, "step": 35800 }, { "epoch": 0.775789222802418, "grad_norm": 1.3353968858718872, "learning_rate": 2.37986121104237e-06, "loss": 0.3289, "step": 35805 }, { "epoch": 0.7758975581218989, "grad_norm": 1.1192805767059326, "learning_rate": 2.3776577091171048e-06, "loss": 0.2265, "step": 35810 }, { "epoch": 0.7760058934413797, "grad_norm": 1.3634393215179443, "learning_rate": 2.3754550901260143e-06, "loss": 0.2977, "step": 35815 }, { "epoch": 0.7761142287608607, "grad_norm": 2.969839096069336, "learning_rate": 2.373253354324232e-06, "loss": 0.3141, "step": 35820 }, { "epoch": 0.7762225640803415, "grad_norm": 1.6283433437347412, "learning_rate": 2.3710525019668017e-06, "loss": 0.1832, "step": 35825 }, { "epoch": 0.7763308993998224, "grad_norm": 1.0813660621643066, "learning_rate": 2.3688525333086588e-06, "loss": 0.2852, "step": 35830 }, { "epoch": 0.7764392347193032, "grad_norm": 1.6328898668289185, "learning_rate": 2.366653448604633e-06, "loss": 0.2739, "step": 35835 }, { "epoch": 0.776547570038784, "grad_norm": 1.4202911853790283, "learning_rate": 2.3644552481094606e-06, "loss": 0.1842, "step": 35840 }, { "epoch": 0.7766559053582649, "grad_norm": 1.5648733377456665, "learning_rate": 2.362257932077765e-06, "loss": 0.3312, "step": 35845 }, { "epoch": 0.7767642406777457, "grad_norm": 1.6207624673843384, "learning_rate": 2.360061500764077e-06, "loss": 0.1446, "step": 35850 }, { "epoch": 0.7768725759972266, "grad_norm": 0.8867859840393066, "learning_rate": 2.3578659544228146e-06, "loss": 0.2407, "step": 35855 }, { "epoch": 0.7769809113167074, "grad_norm": 0.8332605361938477, "learning_rate": 2.3556712933083057e-06, "loss": 0.1578, "step": 35860 }, { "epoch": 0.7770892466361883, "grad_norm": 1.9924241304397583, "learning_rate": 2.3534775176747626e-06, "loss": 0.192, "step": 35865 }, { "epoch": 0.7771975819556692, "grad_norm": 0.6115329265594482, "learning_rate": 2.3512846277763037e-06, "loss": 0.2761, "step": 35870 }, { "epoch": 0.7773059172751501, "grad_norm": 0.9016456007957458, "learning_rate": 2.3490926238669455e-06, "loss": 0.2561, "step": 35875 }, { "epoch": 0.7774142525946309, "grad_norm": 0.9649950265884399, "learning_rate": 2.346901506200594e-06, "loss": 0.178, "step": 35880 }, { "epoch": 0.7775225879141118, "grad_norm": 1.4223532676696777, "learning_rate": 2.344711275031062e-06, "loss": 0.1884, "step": 35885 }, { "epoch": 0.7776309232335926, "grad_norm": 1.1285828351974487, "learning_rate": 2.3425219306120505e-06, "loss": 0.2863, "step": 35890 }, { "epoch": 0.7777392585530735, "grad_norm": 0.31394925713539124, "learning_rate": 2.340333473197166e-06, "loss": 0.1966, "step": 35895 }, { "epoch": 0.7778475938725543, "grad_norm": 1.5572502613067627, "learning_rate": 2.3381459030399044e-06, "loss": 0.2536, "step": 35900 }, { "epoch": 0.7779559291920352, "grad_norm": 1.4098470211029053, "learning_rate": 2.3359592203936688e-06, "loss": 0.3123, "step": 35905 }, { "epoch": 0.778064264511516, "grad_norm": 1.6550240516662598, "learning_rate": 2.3337734255117494e-06, "loss": 0.2022, "step": 35910 }, { "epoch": 0.7781725998309968, "grad_norm": 1.8245891332626343, "learning_rate": 2.3315885186473364e-06, "loss": 0.154, "step": 35915 }, { "epoch": 0.7782809351504778, "grad_norm": 1.047271728515625, "learning_rate": 2.3294045000535205e-06, "loss": 0.2466, "step": 35920 }, { "epoch": 0.7783892704699586, "grad_norm": 1.0916191339492798, "learning_rate": 2.3272213699832878e-06, "loss": 0.2253, "step": 35925 }, { "epoch": 0.7784976057894395, "grad_norm": 1.2782566547393799, "learning_rate": 2.3250391286895245e-06, "loss": 0.2063, "step": 35930 }, { "epoch": 0.7786059411089203, "grad_norm": 1.777658462524414, "learning_rate": 2.322857776425004e-06, "loss": 0.2447, "step": 35935 }, { "epoch": 0.7787142764284012, "grad_norm": 1.7203319072723389, "learning_rate": 2.320677313442409e-06, "loss": 0.2322, "step": 35940 }, { "epoch": 0.778822611747882, "grad_norm": 1.679153561592102, "learning_rate": 2.318497739994311e-06, "loss": 0.1784, "step": 35945 }, { "epoch": 0.7789309470673629, "grad_norm": 0.9954844117164612, "learning_rate": 2.316319056333178e-06, "loss": 0.1881, "step": 35950 }, { "epoch": 0.7790392823868437, "grad_norm": 1.367056965827942, "learning_rate": 2.314141262711385e-06, "loss": 0.156, "step": 35955 }, { "epoch": 0.7791476177063246, "grad_norm": 1.5057202577590942, "learning_rate": 2.311964359381188e-06, "loss": 0.2171, "step": 35960 }, { "epoch": 0.7792559530258055, "grad_norm": 1.00935959815979, "learning_rate": 2.3097883465947537e-06, "loss": 0.2686, "step": 35965 }, { "epoch": 0.7793642883452864, "grad_norm": 2.3933777809143066, "learning_rate": 2.3076132246041406e-06, "loss": 0.2392, "step": 35970 }, { "epoch": 0.7794726236647672, "grad_norm": 1.394768238067627, "learning_rate": 2.305438993661305e-06, "loss": 0.2859, "step": 35975 }, { "epoch": 0.7795809589842481, "grad_norm": 1.7818714380264282, "learning_rate": 2.3032656540180987e-06, "loss": 0.2055, "step": 35980 }, { "epoch": 0.7796892943037289, "grad_norm": 1.531355619430542, "learning_rate": 2.301093205926267e-06, "loss": 0.2662, "step": 35985 }, { "epoch": 0.7797976296232098, "grad_norm": 1.5000466108322144, "learning_rate": 2.298921649637459e-06, "loss": 0.2284, "step": 35990 }, { "epoch": 0.7799059649426906, "grad_norm": 0.9684063792228699, "learning_rate": 2.2967509854032145e-06, "loss": 0.251, "step": 35995 }, { "epoch": 0.7800143002621714, "grad_norm": 2.1994857788085938, "learning_rate": 2.294581213474976e-06, "loss": 0.1679, "step": 36000 }, { "epoch": 0.7801226355816523, "grad_norm": 1.609145164489746, "learning_rate": 2.2924123341040727e-06, "loss": 0.2673, "step": 36005 }, { "epoch": 0.7802309709011331, "grad_norm": 1.8508598804473877, "learning_rate": 2.2902443475417446e-06, "loss": 0.3084, "step": 36010 }, { "epoch": 0.7803393062206141, "grad_norm": 0.8779188394546509, "learning_rate": 2.2880772540391118e-06, "loss": 0.1948, "step": 36015 }, { "epoch": 0.7804476415400949, "grad_norm": 1.0334079265594482, "learning_rate": 2.2859110538472085e-06, "loss": 0.2809, "step": 36020 }, { "epoch": 0.7805559768595758, "grad_norm": 1.9438432455062866, "learning_rate": 2.2837457472169534e-06, "loss": 0.2802, "step": 36025 }, { "epoch": 0.7806643121790566, "grad_norm": 0.7745115160942078, "learning_rate": 2.2815813343991623e-06, "loss": 0.2034, "step": 36030 }, { "epoch": 0.7807726474985375, "grad_norm": 1.5119901895523071, "learning_rate": 2.2794178156445523e-06, "loss": 0.2282, "step": 36035 }, { "epoch": 0.7808809828180183, "grad_norm": 2.1360116004943848, "learning_rate": 2.2772551912037334e-06, "loss": 0.1827, "step": 36040 }, { "epoch": 0.7809893181374992, "grad_norm": 0.9478182196617126, "learning_rate": 2.275093461327216e-06, "loss": 0.2119, "step": 36045 }, { "epoch": 0.78109765345698, "grad_norm": 1.8268413543701172, "learning_rate": 2.272932626265398e-06, "loss": 0.2034, "step": 36050 }, { "epoch": 0.7812059887764609, "grad_norm": 1.428157091140747, "learning_rate": 2.2707726862685875e-06, "loss": 0.3198, "step": 36055 }, { "epoch": 0.7813143240959417, "grad_norm": 1.4952070713043213, "learning_rate": 2.268613641586975e-06, "loss": 0.2102, "step": 36060 }, { "epoch": 0.7814226594154227, "grad_norm": 1.2949259281158447, "learning_rate": 2.266455492470656e-06, "loss": 0.2858, "step": 36065 }, { "epoch": 0.7815309947349035, "grad_norm": 1.3308008909225464, "learning_rate": 2.2642982391696223e-06, "loss": 0.2078, "step": 36070 }, { "epoch": 0.7816393300543844, "grad_norm": 1.6434736251831055, "learning_rate": 2.2621418819337536e-06, "loss": 0.1967, "step": 36075 }, { "epoch": 0.7817476653738652, "grad_norm": 1.6545251607894897, "learning_rate": 2.2599864210128374e-06, "loss": 0.2474, "step": 36080 }, { "epoch": 0.781856000693346, "grad_norm": 1.5957059860229492, "learning_rate": 2.2578318566565473e-06, "loss": 0.1764, "step": 36085 }, { "epoch": 0.7819643360128269, "grad_norm": 2.074122190475464, "learning_rate": 2.2556781891144607e-06, "loss": 0.1926, "step": 36090 }, { "epoch": 0.7820726713323077, "grad_norm": 1.30413818359375, "learning_rate": 2.253525418636043e-06, "loss": 0.2, "step": 36095 }, { "epoch": 0.7821810066517886, "grad_norm": 1.0098721981048584, "learning_rate": 2.2513735454706664e-06, "loss": 0.2172, "step": 36100 }, { "epoch": 0.7822893419712694, "grad_norm": 1.5350556373596191, "learning_rate": 2.2492225698675875e-06, "loss": 0.1993, "step": 36105 }, { "epoch": 0.7823976772907504, "grad_norm": 1.3968783617019653, "learning_rate": 2.247072492075968e-06, "loss": 0.1571, "step": 36110 }, { "epoch": 0.7825060126102312, "grad_norm": 1.3914557695388794, "learning_rate": 2.2449233123448633e-06, "loss": 0.2355, "step": 36115 }, { "epoch": 0.7826143479297121, "grad_norm": 1.8005603551864624, "learning_rate": 2.2427750309232187e-06, "loss": 0.3263, "step": 36120 }, { "epoch": 0.7827226832491929, "grad_norm": 1.9510136842727661, "learning_rate": 2.2406276480598875e-06, "loss": 0.2841, "step": 36125 }, { "epoch": 0.7828310185686738, "grad_norm": 1.8544646501541138, "learning_rate": 2.2384811640036042e-06, "loss": 0.2356, "step": 36130 }, { "epoch": 0.7829393538881546, "grad_norm": 1.7031333446502686, "learning_rate": 2.236335579003014e-06, "loss": 0.2518, "step": 36135 }, { "epoch": 0.7830476892076355, "grad_norm": 1.3810365200042725, "learning_rate": 2.2341908933066437e-06, "loss": 0.2252, "step": 36140 }, { "epoch": 0.7831560245271163, "grad_norm": 1.8917360305786133, "learning_rate": 2.23204710716293e-06, "loss": 0.2824, "step": 36145 }, { "epoch": 0.7832643598465971, "grad_norm": 1.814288854598999, "learning_rate": 2.2299042208201914e-06, "loss": 0.1844, "step": 36150 }, { "epoch": 0.783372695166078, "grad_norm": 1.5723146200180054, "learning_rate": 2.227762234526657e-06, "loss": 0.1662, "step": 36155 }, { "epoch": 0.783481030485559, "grad_norm": 1.4460784196853638, "learning_rate": 2.2256211485304357e-06, "loss": 0.2202, "step": 36160 }, { "epoch": 0.7835893658050398, "grad_norm": 1.1275783777236938, "learning_rate": 2.223480963079544e-06, "loss": 0.1392, "step": 36165 }, { "epoch": 0.7836977011245206, "grad_norm": 1.5202877521514893, "learning_rate": 2.2213416784218944e-06, "loss": 0.214, "step": 36170 }, { "epoch": 0.7838060364440015, "grad_norm": 1.1311637163162231, "learning_rate": 2.2192032948052833e-06, "loss": 0.1741, "step": 36175 }, { "epoch": 0.7839143717634823, "grad_norm": 1.1383264064788818, "learning_rate": 2.217065812477417e-06, "loss": 0.2762, "step": 36180 }, { "epoch": 0.7840227070829632, "grad_norm": 0.7756058573722839, "learning_rate": 2.214929231685886e-06, "loss": 0.2074, "step": 36185 }, { "epoch": 0.784131042402444, "grad_norm": 1.8345516920089722, "learning_rate": 2.212793552678185e-06, "loss": 0.2226, "step": 36190 }, { "epoch": 0.7842393777219249, "grad_norm": 1.1518045663833618, "learning_rate": 2.2106587757016994e-06, "loss": 0.2614, "step": 36195 }, { "epoch": 0.7843477130414057, "grad_norm": 0.8351070284843445, "learning_rate": 2.2085249010037067e-06, "loss": 0.2338, "step": 36200 }, { "epoch": 0.7844560483608867, "grad_norm": 1.3738807439804077, "learning_rate": 2.2063919288313884e-06, "loss": 0.1805, "step": 36205 }, { "epoch": 0.7845643836803675, "grad_norm": 0.5577611327171326, "learning_rate": 2.2042598594318165e-06, "loss": 0.2682, "step": 36210 }, { "epoch": 0.7846727189998484, "grad_norm": 1.2887787818908691, "learning_rate": 2.2021286930519636e-06, "loss": 0.258, "step": 36215 }, { "epoch": 0.7847810543193292, "grad_norm": 1.5871285200119019, "learning_rate": 2.1999984299386855e-06, "loss": 0.2194, "step": 36220 }, { "epoch": 0.78488938963881, "grad_norm": 0.6991038918495178, "learning_rate": 2.1978690703387484e-06, "loss": 0.145, "step": 36225 }, { "epoch": 0.7849977249582909, "grad_norm": 0.6791013479232788, "learning_rate": 2.1957406144988035e-06, "loss": 0.2581, "step": 36230 }, { "epoch": 0.7851060602777717, "grad_norm": 0.8966258764266968, "learning_rate": 2.1936130626653984e-06, "loss": 0.2032, "step": 36235 }, { "epoch": 0.7852143955972526, "grad_norm": 1.3506553173065186, "learning_rate": 2.1914864150849825e-06, "loss": 0.2245, "step": 36240 }, { "epoch": 0.7853227309167334, "grad_norm": 0.9734497666358948, "learning_rate": 2.189360672003892e-06, "loss": 0.2208, "step": 36245 }, { "epoch": 0.7854310662362143, "grad_norm": 1.2602033615112305, "learning_rate": 2.1872358336683664e-06, "loss": 0.1886, "step": 36250 }, { "epoch": 0.7855394015556952, "grad_norm": 1.1574152708053589, "learning_rate": 2.185111900324528e-06, "loss": 0.1816, "step": 36255 }, { "epoch": 0.7856477368751761, "grad_norm": 1.0461323261260986, "learning_rate": 2.1829888722184155e-06, "loss": 0.2733, "step": 36260 }, { "epoch": 0.7857560721946569, "grad_norm": 1.2113274335861206, "learning_rate": 2.1808667495959425e-06, "loss": 0.1955, "step": 36265 }, { "epoch": 0.7858644075141378, "grad_norm": 1.2123695611953735, "learning_rate": 2.178745532702924e-06, "loss": 0.2993, "step": 36270 }, { "epoch": 0.7859727428336186, "grad_norm": 1.837969422340393, "learning_rate": 2.176625221785076e-06, "loss": 0.2628, "step": 36275 }, { "epoch": 0.7860810781530995, "grad_norm": 1.1938972473144531, "learning_rate": 2.1745058170879986e-06, "loss": 0.2245, "step": 36280 }, { "epoch": 0.7861894134725803, "grad_norm": 1.2878214120864868, "learning_rate": 2.1723873188571987e-06, "loss": 0.2694, "step": 36285 }, { "epoch": 0.7862977487920612, "grad_norm": 1.7792340517044067, "learning_rate": 2.170269727338068e-06, "loss": 0.2835, "step": 36290 }, { "epoch": 0.786406084111542, "grad_norm": 1.1990846395492554, "learning_rate": 2.1681530427759033e-06, "loss": 0.1711, "step": 36295 }, { "epoch": 0.7865144194310228, "grad_norm": 1.7192832231521606, "learning_rate": 2.1660372654158844e-06, "loss": 0.2404, "step": 36300 }, { "epoch": 0.7866227547505038, "grad_norm": 1.3709299564361572, "learning_rate": 2.1639223955030954e-06, "loss": 0.1784, "step": 36305 }, { "epoch": 0.7867310900699847, "grad_norm": 1.6268999576568604, "learning_rate": 2.161808433282515e-06, "loss": 0.1707, "step": 36310 }, { "epoch": 0.7868394253894655, "grad_norm": 1.6354022026062012, "learning_rate": 2.1596953789990095e-06, "loss": 0.2288, "step": 36315 }, { "epoch": 0.7869477607089463, "grad_norm": 2.563215494155884, "learning_rate": 2.157583232897349e-06, "loss": 0.2662, "step": 36320 }, { "epoch": 0.7870560960284272, "grad_norm": 1.712620735168457, "learning_rate": 2.155471995222189e-06, "loss": 0.2898, "step": 36325 }, { "epoch": 0.787164431347908, "grad_norm": 1.960006833076477, "learning_rate": 2.15336166621809e-06, "loss": 0.1504, "step": 36330 }, { "epoch": 0.7872727666673889, "grad_norm": 1.5494223833084106, "learning_rate": 2.151252246129496e-06, "loss": 0.2121, "step": 36335 }, { "epoch": 0.7873811019868697, "grad_norm": 1.1000686883926392, "learning_rate": 2.149143735200758e-06, "loss": 0.1445, "step": 36340 }, { "epoch": 0.7874894373063506, "grad_norm": 1.1753573417663574, "learning_rate": 2.14703613367611e-06, "loss": 0.2821, "step": 36345 }, { "epoch": 0.7875977726258315, "grad_norm": 1.8043999671936035, "learning_rate": 2.1449294417996893e-06, "loss": 0.2203, "step": 36350 }, { "epoch": 0.7877061079453124, "grad_norm": 1.8980662822723389, "learning_rate": 2.142823659815525e-06, "loss": 0.2155, "step": 36355 }, { "epoch": 0.7878144432647932, "grad_norm": 1.7102980613708496, "learning_rate": 2.140718787967537e-06, "loss": 0.2505, "step": 36360 }, { "epoch": 0.7879227785842741, "grad_norm": 0.7510504722595215, "learning_rate": 2.1386148264995486e-06, "loss": 0.1809, "step": 36365 }, { "epoch": 0.7880311139037549, "grad_norm": 1.3064838647842407, "learning_rate": 2.1365117756552654e-06, "loss": 0.1631, "step": 36370 }, { "epoch": 0.7881394492232358, "grad_norm": 2.171809196472168, "learning_rate": 2.1344096356783016e-06, "loss": 0.2668, "step": 36375 }, { "epoch": 0.7882477845427166, "grad_norm": 0.9690737724304199, "learning_rate": 2.1323084068121527e-06, "loss": 0.2071, "step": 36380 }, { "epoch": 0.7883561198621974, "grad_norm": 1.76498281955719, "learning_rate": 2.1302080893002185e-06, "loss": 0.2393, "step": 36385 }, { "epoch": 0.7884644551816783, "grad_norm": 0.5928475260734558, "learning_rate": 2.1281086833857844e-06, "loss": 0.2383, "step": 36390 }, { "epoch": 0.7885727905011591, "grad_norm": 1.0543453693389893, "learning_rate": 2.1260101893120423e-06, "loss": 0.184, "step": 36395 }, { "epoch": 0.7886811258206401, "grad_norm": 1.4724195003509521, "learning_rate": 2.1239126073220637e-06, "loss": 0.2363, "step": 36400 }, { "epoch": 0.7887894611401209, "grad_norm": 1.4928441047668457, "learning_rate": 2.1218159376588276e-06, "loss": 0.2891, "step": 36405 }, { "epoch": 0.7888977964596018, "grad_norm": 1.0248321294784546, "learning_rate": 2.119720180565201e-06, "loss": 0.2737, "step": 36410 }, { "epoch": 0.7890061317790826, "grad_norm": 1.4008328914642334, "learning_rate": 2.1176253362839427e-06, "loss": 0.2055, "step": 36415 }, { "epoch": 0.7891144670985635, "grad_norm": 1.1637535095214844, "learning_rate": 2.1155314050577137e-06, "loss": 0.2318, "step": 36420 }, { "epoch": 0.7892228024180443, "grad_norm": 1.5723663568496704, "learning_rate": 2.11343838712906e-06, "loss": 0.2418, "step": 36425 }, { "epoch": 0.7893311377375252, "grad_norm": 1.3930634260177612, "learning_rate": 2.111346282740431e-06, "loss": 0.1975, "step": 36430 }, { "epoch": 0.789439473057006, "grad_norm": 1.4496948719024658, "learning_rate": 2.1092550921341624e-06, "loss": 0.2066, "step": 36435 }, { "epoch": 0.7895478083764869, "grad_norm": 1.7826565504074097, "learning_rate": 2.107164815552486e-06, "loss": 0.221, "step": 36440 }, { "epoch": 0.7896561436959677, "grad_norm": 1.1414594650268555, "learning_rate": 2.1050754532375328e-06, "loss": 0.207, "step": 36445 }, { "epoch": 0.7897644790154487, "grad_norm": 1.4369159936904907, "learning_rate": 2.1029870054313183e-06, "loss": 0.2024, "step": 36450 }, { "epoch": 0.7898728143349295, "grad_norm": 1.5546268224716187, "learning_rate": 2.100899472375767e-06, "loss": 0.2722, "step": 36455 }, { "epoch": 0.7899811496544104, "grad_norm": 1.6465438604354858, "learning_rate": 2.0988128543126796e-06, "loss": 0.2588, "step": 36460 }, { "epoch": 0.7900894849738912, "grad_norm": 1.1944032907485962, "learning_rate": 2.0967271514837673e-06, "loss": 0.1877, "step": 36465 }, { "epoch": 0.790197820293372, "grad_norm": 1.951865315437317, "learning_rate": 2.0946423641306224e-06, "loss": 0.2811, "step": 36470 }, { "epoch": 0.7903061556128529, "grad_norm": 1.1568124294281006, "learning_rate": 2.092558492494736e-06, "loss": 0.1947, "step": 36475 }, { "epoch": 0.7904144909323337, "grad_norm": 2.1151461601257324, "learning_rate": 2.090475536817497e-06, "loss": 0.3166, "step": 36480 }, { "epoch": 0.7905228262518146, "grad_norm": 2.3987953662872314, "learning_rate": 2.088393497340179e-06, "loss": 0.2825, "step": 36485 }, { "epoch": 0.7906311615712954, "grad_norm": 1.2197133302688599, "learning_rate": 2.0863123743039616e-06, "loss": 0.1838, "step": 36490 }, { "epoch": 0.7907394968907764, "grad_norm": 1.3160980939865112, "learning_rate": 2.0842321679499023e-06, "loss": 0.2612, "step": 36495 }, { "epoch": 0.7908478322102572, "grad_norm": 2.2422051429748535, "learning_rate": 2.0821528785189747e-06, "loss": 0.2241, "step": 36500 }, { "epoch": 0.7909561675297381, "grad_norm": 1.9698668718338013, "learning_rate": 2.080074506252027e-06, "loss": 0.2685, "step": 36505 }, { "epoch": 0.7910645028492189, "grad_norm": 1.515618085861206, "learning_rate": 2.077997051389804e-06, "loss": 0.2168, "step": 36510 }, { "epoch": 0.7911728381686998, "grad_norm": 1.6955444812774658, "learning_rate": 2.075920514172953e-06, "loss": 0.1087, "step": 36515 }, { "epoch": 0.7912811734881806, "grad_norm": 1.4450984001159668, "learning_rate": 2.0738448948420066e-06, "loss": 0.2549, "step": 36520 }, { "epoch": 0.7913895088076615, "grad_norm": 1.8954147100448608, "learning_rate": 2.071770193637397e-06, "loss": 0.1797, "step": 36525 }, { "epoch": 0.7914978441271423, "grad_norm": 1.9037295579910278, "learning_rate": 2.0696964107994434e-06, "loss": 0.2794, "step": 36530 }, { "epoch": 0.7916061794466231, "grad_norm": 0.9861748218536377, "learning_rate": 2.067623546568367e-06, "loss": 0.1376, "step": 36535 }, { "epoch": 0.791714514766104, "grad_norm": 1.5063753128051758, "learning_rate": 2.0655516011842723e-06, "loss": 0.2629, "step": 36540 }, { "epoch": 0.791822850085585, "grad_norm": 1.315528154373169, "learning_rate": 2.0634805748871678e-06, "loss": 0.2913, "step": 36545 }, { "epoch": 0.7919311854050658, "grad_norm": 2.0408477783203125, "learning_rate": 2.0614104679169524e-06, "loss": 0.3168, "step": 36550 }, { "epoch": 0.7920395207245466, "grad_norm": 1.38787841796875, "learning_rate": 2.0593412805134105e-06, "loss": 0.2063, "step": 36555 }, { "epoch": 0.7921478560440275, "grad_norm": 2.147353410720825, "learning_rate": 2.057273012916233e-06, "loss": 0.2657, "step": 36560 }, { "epoch": 0.7922561913635083, "grad_norm": 1.4432754516601562, "learning_rate": 2.055205665364993e-06, "loss": 0.218, "step": 36565 }, { "epoch": 0.7923645266829892, "grad_norm": 2.554710626602173, "learning_rate": 2.0531392380991665e-06, "loss": 0.2008, "step": 36570 }, { "epoch": 0.79247286200247, "grad_norm": 1.6186660528182983, "learning_rate": 2.051073731358112e-06, "loss": 0.1841, "step": 36575 }, { "epoch": 0.7925811973219509, "grad_norm": 1.2218384742736816, "learning_rate": 2.0490091453810935e-06, "loss": 0.189, "step": 36580 }, { "epoch": 0.7926895326414317, "grad_norm": 1.7613861560821533, "learning_rate": 2.0469454804072573e-06, "loss": 0.2906, "step": 36585 }, { "epoch": 0.7927978679609127, "grad_norm": 1.6248323917388916, "learning_rate": 2.0448827366756495e-06, "loss": 0.2604, "step": 36590 }, { "epoch": 0.7929062032803935, "grad_norm": 1.286239504814148, "learning_rate": 2.042820914425213e-06, "loss": 0.1951, "step": 36595 }, { "epoch": 0.7930145385998744, "grad_norm": 2.3964052200317383, "learning_rate": 2.0407600138947714e-06, "loss": 0.1891, "step": 36600 }, { "epoch": 0.7931228739193552, "grad_norm": 1.1096116304397583, "learning_rate": 2.0387000353230557e-06, "loss": 0.2555, "step": 36605 }, { "epoch": 0.793231209238836, "grad_norm": 1.9086856842041016, "learning_rate": 2.0366409789486785e-06, "loss": 0.1791, "step": 36610 }, { "epoch": 0.7933395445583169, "grad_norm": 0.9345535039901733, "learning_rate": 2.0345828450101545e-06, "loss": 0.1726, "step": 36615 }, { "epoch": 0.7934478798777977, "grad_norm": 0.8805817365646362, "learning_rate": 2.0325256337458842e-06, "loss": 0.1973, "step": 36620 }, { "epoch": 0.7935562151972786, "grad_norm": 1.5275158882141113, "learning_rate": 2.0304693453941693e-06, "loss": 0.2247, "step": 36625 }, { "epoch": 0.7936645505167594, "grad_norm": 1.5986154079437256, "learning_rate": 2.0284139801931936e-06, "loss": 0.2364, "step": 36630 }, { "epoch": 0.7937728858362403, "grad_norm": 2.0121939182281494, "learning_rate": 2.0263595383810477e-06, "loss": 0.1746, "step": 36635 }, { "epoch": 0.7938812211557212, "grad_norm": 1.2527399063110352, "learning_rate": 2.0243060201957e-06, "loss": 0.1937, "step": 36640 }, { "epoch": 0.7939895564752021, "grad_norm": 1.1370129585266113, "learning_rate": 2.022253425875025e-06, "loss": 0.211, "step": 36645 }, { "epoch": 0.7940978917946829, "grad_norm": 1.307516098022461, "learning_rate": 2.0202017556567867e-06, "loss": 0.162, "step": 36650 }, { "epoch": 0.7942062271141638, "grad_norm": 1.587575078010559, "learning_rate": 2.018151009778635e-06, "loss": 0.2397, "step": 36655 }, { "epoch": 0.7943145624336446, "grad_norm": 1.0820891857147217, "learning_rate": 2.0161011884781223e-06, "loss": 0.2485, "step": 36660 }, { "epoch": 0.7944228977531255, "grad_norm": 1.00352144241333, "learning_rate": 2.0140522919926864e-06, "loss": 0.2126, "step": 36665 }, { "epoch": 0.7945312330726063, "grad_norm": 2.1194963455200195, "learning_rate": 2.012004320559665e-06, "loss": 0.2248, "step": 36670 }, { "epoch": 0.7946395683920872, "grad_norm": 1.0721474885940552, "learning_rate": 2.009957274416284e-06, "loss": 0.2701, "step": 36675 }, { "epoch": 0.794747903711568, "grad_norm": 0.9876551032066345, "learning_rate": 2.007911153799659e-06, "loss": 0.2283, "step": 36680 }, { "epoch": 0.7948562390310489, "grad_norm": 2.3203742504119873, "learning_rate": 2.0058659589468076e-06, "loss": 0.1957, "step": 36685 }, { "epoch": 0.7949645743505298, "grad_norm": 1.0483278036117554, "learning_rate": 2.0038216900946274e-06, "loss": 0.2652, "step": 36690 }, { "epoch": 0.7950729096700107, "grad_norm": 1.9476426839828491, "learning_rate": 2.0017783474799267e-06, "loss": 0.2726, "step": 36695 }, { "epoch": 0.7951812449894915, "grad_norm": 1.7359304428100586, "learning_rate": 1.9997359313393884e-06, "loss": 0.2305, "step": 36700 }, { "epoch": 0.7952895803089723, "grad_norm": 1.1658146381378174, "learning_rate": 1.9976944419096024e-06, "loss": 0.2548, "step": 36705 }, { "epoch": 0.7953979156284532, "grad_norm": 1.630184292793274, "learning_rate": 1.9956538794270396e-06, "loss": 0.2826, "step": 36710 }, { "epoch": 0.795506250947934, "grad_norm": 1.549930453300476, "learning_rate": 1.993614244128067e-06, "loss": 0.2357, "step": 36715 }, { "epoch": 0.7956145862674149, "grad_norm": 1.1017940044403076, "learning_rate": 1.9915755362489518e-06, "loss": 0.2566, "step": 36720 }, { "epoch": 0.7957229215868957, "grad_norm": 1.1719945669174194, "learning_rate": 1.989537756025842e-06, "loss": 0.2394, "step": 36725 }, { "epoch": 0.7958312569063766, "grad_norm": 1.2559114694595337, "learning_rate": 1.987500903694789e-06, "loss": 0.1975, "step": 36730 }, { "epoch": 0.7959395922258575, "grad_norm": 1.1125564575195312, "learning_rate": 1.9854649794917234e-06, "loss": 0.2416, "step": 36735 }, { "epoch": 0.7960479275453384, "grad_norm": 1.042306900024414, "learning_rate": 1.9834299836524872e-06, "loss": 0.1942, "step": 36740 }, { "epoch": 0.7961562628648192, "grad_norm": 1.0766063928604126, "learning_rate": 1.9813959164128005e-06, "loss": 0.2018, "step": 36745 }, { "epoch": 0.7962645981843001, "grad_norm": 1.1095539331436157, "learning_rate": 1.9793627780082746e-06, "loss": 0.1852, "step": 36750 }, { "epoch": 0.7963729335037809, "grad_norm": 1.4987702369689941, "learning_rate": 1.9773305686744238e-06, "loss": 0.2264, "step": 36755 }, { "epoch": 0.7964812688232618, "grad_norm": 1.610720157623291, "learning_rate": 1.975299288646646e-06, "loss": 0.1654, "step": 36760 }, { "epoch": 0.7965896041427426, "grad_norm": 1.6850758790969849, "learning_rate": 1.9732689381602366e-06, "loss": 0.3349, "step": 36765 }, { "epoch": 0.7966979394622234, "grad_norm": 1.1204873323440552, "learning_rate": 1.9712395174503784e-06, "loss": 0.172, "step": 36770 }, { "epoch": 0.7968062747817043, "grad_norm": 1.572670340538025, "learning_rate": 1.9692110267521548e-06, "loss": 0.1453, "step": 36775 }, { "epoch": 0.7969146101011851, "grad_norm": 1.1933577060699463, "learning_rate": 1.9671834663005285e-06, "loss": 0.1666, "step": 36780 }, { "epoch": 0.7970229454206661, "grad_norm": 1.1300286054611206, "learning_rate": 1.9651568363303665e-06, "loss": 0.2557, "step": 36785 }, { "epoch": 0.7971312807401469, "grad_norm": 1.5067474842071533, "learning_rate": 1.963131137076426e-06, "loss": 0.2377, "step": 36790 }, { "epoch": 0.7972396160596278, "grad_norm": 1.2225899696350098, "learning_rate": 1.961106368773348e-06, "loss": 0.19, "step": 36795 }, { "epoch": 0.7973479513791086, "grad_norm": 1.1125766038894653, "learning_rate": 1.959082531655677e-06, "loss": 0.1936, "step": 36800 }, { "epoch": 0.7974562866985895, "grad_norm": 1.7395535707473755, "learning_rate": 1.95705962595784e-06, "loss": 0.2646, "step": 36805 }, { "epoch": 0.7975646220180703, "grad_norm": 1.7466496229171753, "learning_rate": 1.9550376519141635e-06, "loss": 0.2534, "step": 36810 }, { "epoch": 0.7976729573375512, "grad_norm": 0.9392306804656982, "learning_rate": 1.95301660975886e-06, "loss": 0.2156, "step": 36815 }, { "epoch": 0.797781292657032, "grad_norm": 1.3794358968734741, "learning_rate": 1.95099649972604e-06, "loss": 0.2672, "step": 36820 }, { "epoch": 0.7978896279765129, "grad_norm": 1.0972219705581665, "learning_rate": 1.9489773220497e-06, "loss": 0.2868, "step": 36825 }, { "epoch": 0.7979979632959937, "grad_norm": 1.1435317993164062, "learning_rate": 1.946959076963735e-06, "loss": 0.1829, "step": 36830 }, { "epoch": 0.7981062986154747, "grad_norm": 1.218997836112976, "learning_rate": 1.944941764701924e-06, "loss": 0.234, "step": 36835 }, { "epoch": 0.7982146339349555, "grad_norm": 0.8939063549041748, "learning_rate": 1.9429253854979446e-06, "loss": 0.2243, "step": 36840 }, { "epoch": 0.7983229692544364, "grad_norm": 0.7796903252601624, "learning_rate": 1.940909939585367e-06, "loss": 0.1829, "step": 36845 }, { "epoch": 0.7984313045739172, "grad_norm": 1.2379029989242554, "learning_rate": 1.9388954271976448e-06, "loss": 0.232, "step": 36850 }, { "epoch": 0.798539639893398, "grad_norm": 1.0773330926895142, "learning_rate": 1.936881848568135e-06, "loss": 0.2472, "step": 36855 }, { "epoch": 0.7986479752128789, "grad_norm": 1.4748419523239136, "learning_rate": 1.9348692039300745e-06, "loss": 0.2438, "step": 36860 }, { "epoch": 0.7987563105323597, "grad_norm": 0.8711227178573608, "learning_rate": 1.9328574935166033e-06, "loss": 0.1999, "step": 36865 }, { "epoch": 0.7988646458518406, "grad_norm": 2.016181468963623, "learning_rate": 1.930846717560745e-06, "loss": 0.2777, "step": 36870 }, { "epoch": 0.7989729811713214, "grad_norm": 1.5694643259048462, "learning_rate": 1.928836876295419e-06, "loss": 0.2666, "step": 36875 }, { "epoch": 0.7990813164908024, "grad_norm": 1.5453952550888062, "learning_rate": 1.926827969953433e-06, "loss": 0.2502, "step": 36880 }, { "epoch": 0.7991896518102832, "grad_norm": 1.2823724746704102, "learning_rate": 1.9248199987674907e-06, "loss": 0.2845, "step": 36885 }, { "epoch": 0.7992979871297641, "grad_norm": 1.3163548707962036, "learning_rate": 1.9228129629701884e-06, "loss": 0.2738, "step": 36890 }, { "epoch": 0.7994063224492449, "grad_norm": 1.51101815700531, "learning_rate": 1.9208068627940045e-06, "loss": 0.2705, "step": 36895 }, { "epoch": 0.7995146577687258, "grad_norm": 1.5728216171264648, "learning_rate": 1.918801698471323e-06, "loss": 0.2379, "step": 36900 }, { "epoch": 0.7996229930882066, "grad_norm": 1.6121822595596313, "learning_rate": 1.916797470234405e-06, "loss": 0.2428, "step": 36905 }, { "epoch": 0.7997313284076875, "grad_norm": 0.9611690640449524, "learning_rate": 1.9147941783154166e-06, "loss": 0.2577, "step": 36910 }, { "epoch": 0.7998396637271683, "grad_norm": 1.5923455953598022, "learning_rate": 1.912791822946406e-06, "loss": 0.3171, "step": 36915 }, { "epoch": 0.7999479990466491, "grad_norm": 0.8968745470046997, "learning_rate": 1.910790404359314e-06, "loss": 0.198, "step": 36920 }, { "epoch": 0.80005633436613, "grad_norm": 1.7318495512008667, "learning_rate": 1.9087899227859796e-06, "loss": 0.3177, "step": 36925 }, { "epoch": 0.800164669685611, "grad_norm": 1.199885606765747, "learning_rate": 1.9067903784581221e-06, "loss": 0.1928, "step": 36930 }, { "epoch": 0.8002730050050918, "grad_norm": 1.5034855604171753, "learning_rate": 1.9047917716073672e-06, "loss": 0.1964, "step": 36935 }, { "epoch": 0.8003813403245726, "grad_norm": 1.0795940160751343, "learning_rate": 1.9027941024652174e-06, "loss": 0.2065, "step": 36940 }, { "epoch": 0.8004896756440535, "grad_norm": 1.449366807937622, "learning_rate": 1.9007973712630768e-06, "loss": 0.207, "step": 36945 }, { "epoch": 0.8005980109635343, "grad_norm": 1.3339108228683472, "learning_rate": 1.8988015782322343e-06, "loss": 0.2041, "step": 36950 }, { "epoch": 0.8007063462830152, "grad_norm": 1.169738531112671, "learning_rate": 1.896806723603871e-06, "loss": 0.2425, "step": 36955 }, { "epoch": 0.800814681602496, "grad_norm": 1.2841919660568237, "learning_rate": 1.8948128076090654e-06, "loss": 0.187, "step": 36960 }, { "epoch": 0.8009230169219769, "grad_norm": 0.6562467813491821, "learning_rate": 1.8928198304787782e-06, "loss": 0.182, "step": 36965 }, { "epoch": 0.8010313522414577, "grad_norm": 1.5139365196228027, "learning_rate": 1.89082779244387e-06, "loss": 0.1913, "step": 36970 }, { "epoch": 0.8011396875609386, "grad_norm": 1.551203727722168, "learning_rate": 1.8888366937350845e-06, "loss": 0.1757, "step": 36975 }, { "epoch": 0.8012480228804195, "grad_norm": 0.9651422500610352, "learning_rate": 1.8868465345830622e-06, "loss": 0.1784, "step": 36980 }, { "epoch": 0.8013563581999004, "grad_norm": 1.338767647743225, "learning_rate": 1.884857315218337e-06, "loss": 0.2303, "step": 36985 }, { "epoch": 0.8014646935193812, "grad_norm": 2.7216005325317383, "learning_rate": 1.8828690358713242e-06, "loss": 0.2008, "step": 36990 }, { "epoch": 0.8015730288388621, "grad_norm": 1.5688378810882568, "learning_rate": 1.880881696772342e-06, "loss": 0.2899, "step": 36995 }, { "epoch": 0.8016813641583429, "grad_norm": 2.4837594032287598, "learning_rate": 1.8788952981515874e-06, "loss": 0.1663, "step": 37000 }, { "epoch": 0.8017896994778237, "grad_norm": 1.8138936758041382, "learning_rate": 1.8769098402391618e-06, "loss": 0.279, "step": 37005 }, { "epoch": 0.8018980347973046, "grad_norm": 0.8897067904472351, "learning_rate": 1.8749253232650443e-06, "loss": 0.2346, "step": 37010 }, { "epoch": 0.8020063701167854, "grad_norm": 1.7801787853240967, "learning_rate": 1.872941747459117e-06, "loss": 0.1744, "step": 37015 }, { "epoch": 0.8021147054362663, "grad_norm": 1.7362786531448364, "learning_rate": 1.8709591130511428e-06, "loss": 0.2294, "step": 37020 }, { "epoch": 0.8022230407557472, "grad_norm": 1.5961782932281494, "learning_rate": 1.8689774202707823e-06, "loss": 0.3024, "step": 37025 }, { "epoch": 0.8023313760752281, "grad_norm": 1.5226496458053589, "learning_rate": 1.8669966693475871e-06, "loss": 0.1511, "step": 37030 }, { "epoch": 0.8024397113947089, "grad_norm": 1.4925856590270996, "learning_rate": 1.865016860510993e-06, "loss": 0.1677, "step": 37035 }, { "epoch": 0.8025480467141898, "grad_norm": 2.237065315246582, "learning_rate": 1.8630379939903353e-06, "loss": 0.1736, "step": 37040 }, { "epoch": 0.8026563820336706, "grad_norm": 1.7927244901657104, "learning_rate": 1.8610600700148329e-06, "loss": 0.2194, "step": 37045 }, { "epoch": 0.8027647173531515, "grad_norm": 1.8431998491287231, "learning_rate": 1.8590830888136024e-06, "loss": 0.2837, "step": 37050 }, { "epoch": 0.8028730526726323, "grad_norm": 1.000235676765442, "learning_rate": 1.8571070506156419e-06, "loss": 0.1259, "step": 37055 }, { "epoch": 0.8029813879921132, "grad_norm": 0.9143610000610352, "learning_rate": 1.8551319556498526e-06, "loss": 0.2068, "step": 37060 }, { "epoch": 0.803089723311594, "grad_norm": 1.5749157667160034, "learning_rate": 1.8531578041450126e-06, "loss": 0.1988, "step": 37065 }, { "epoch": 0.8031980586310749, "grad_norm": 1.132824182510376, "learning_rate": 1.8511845963298048e-06, "loss": 0.2939, "step": 37070 }, { "epoch": 0.8033063939505558, "grad_norm": 1.6826082468032837, "learning_rate": 1.8492123324327892e-06, "loss": 0.233, "step": 37075 }, { "epoch": 0.8034147292700367, "grad_norm": 1.6786322593688965, "learning_rate": 1.847241012682427e-06, "loss": 0.2261, "step": 37080 }, { "epoch": 0.8035230645895175, "grad_norm": 1.2028025388717651, "learning_rate": 1.8452706373070662e-06, "loss": 0.1663, "step": 37085 }, { "epoch": 0.8036313999089983, "grad_norm": 1.0006940364837646, "learning_rate": 1.843301206534942e-06, "loss": 0.2547, "step": 37090 }, { "epoch": 0.8037397352284792, "grad_norm": 2.2983858585357666, "learning_rate": 1.841332720594189e-06, "loss": 0.2505, "step": 37095 }, { "epoch": 0.80384807054796, "grad_norm": 2.24163818359375, "learning_rate": 1.839365179712821e-06, "loss": 0.3097, "step": 37100 }, { "epoch": 0.8039564058674409, "grad_norm": 1.4084445238113403, "learning_rate": 1.8373985841187525e-06, "loss": 0.2117, "step": 37105 }, { "epoch": 0.8040647411869217, "grad_norm": 1.2982133626937866, "learning_rate": 1.8354329340397803e-06, "loss": 0.3262, "step": 37110 }, { "epoch": 0.8041730765064026, "grad_norm": 1.946358561515808, "learning_rate": 1.8334682297036e-06, "loss": 0.2285, "step": 37115 }, { "epoch": 0.8042814118258835, "grad_norm": 1.0638494491577148, "learning_rate": 1.8315044713377872e-06, "loss": 0.2167, "step": 37120 }, { "epoch": 0.8043897471453644, "grad_norm": 1.0732715129852295, "learning_rate": 1.8295416591698179e-06, "loss": 0.222, "step": 37125 }, { "epoch": 0.8044980824648452, "grad_norm": 1.9213314056396484, "learning_rate": 1.8275797934270568e-06, "loss": 0.4031, "step": 37130 }, { "epoch": 0.8046064177843261, "grad_norm": 1.2213815450668335, "learning_rate": 1.8256188743367509e-06, "loss": 0.1945, "step": 37135 }, { "epoch": 0.8047147531038069, "grad_norm": 1.2812843322753906, "learning_rate": 1.8236589021260487e-06, "loss": 0.2112, "step": 37140 }, { "epoch": 0.8048230884232878, "grad_norm": 2.4677388668060303, "learning_rate": 1.8216998770219785e-06, "loss": 0.2724, "step": 37145 }, { "epoch": 0.8049314237427686, "grad_norm": 2.9184398651123047, "learning_rate": 1.81974179925147e-06, "loss": 0.1405, "step": 37150 }, { "epoch": 0.8050397590622494, "grad_norm": 1.2654651403427124, "learning_rate": 1.8177846690413303e-06, "loss": 0.1899, "step": 37155 }, { "epoch": 0.8051480943817303, "grad_norm": 2.119236707687378, "learning_rate": 1.8158284866182695e-06, "loss": 0.3077, "step": 37160 }, { "epoch": 0.8052564297012111, "grad_norm": 1.4439095258712769, "learning_rate": 1.8138732522088799e-06, "loss": 0.2223, "step": 37165 }, { "epoch": 0.8053647650206921, "grad_norm": 1.9664565324783325, "learning_rate": 1.8119189660396409e-06, "loss": 0.4201, "step": 37170 }, { "epoch": 0.8054731003401729, "grad_norm": 3.1039342880249023, "learning_rate": 1.809965628336937e-06, "loss": 0.2268, "step": 37175 }, { "epoch": 0.8055814356596538, "grad_norm": 1.3919131755828857, "learning_rate": 1.8080132393270267e-06, "loss": 0.2182, "step": 37180 }, { "epoch": 0.8056897709791346, "grad_norm": 1.6822127103805542, "learning_rate": 1.8060617992360685e-06, "loss": 0.2385, "step": 37185 }, { "epoch": 0.8057981062986155, "grad_norm": 1.8243961334228516, "learning_rate": 1.804111308290104e-06, "loss": 0.196, "step": 37190 }, { "epoch": 0.8059064416180963, "grad_norm": 0.8855387568473816, "learning_rate": 1.8021617667150714e-06, "loss": 0.1933, "step": 37195 }, { "epoch": 0.8060147769375772, "grad_norm": 1.5938022136688232, "learning_rate": 1.8002131747367947e-06, "loss": 0.141, "step": 37200 }, { "epoch": 0.806123112257058, "grad_norm": 1.5764141082763672, "learning_rate": 1.7982655325809872e-06, "loss": 0.2161, "step": 37205 }, { "epoch": 0.8062314475765389, "grad_norm": 1.3244829177856445, "learning_rate": 1.7963188404732579e-06, "loss": 0.1816, "step": 37210 }, { "epoch": 0.8063397828960197, "grad_norm": 0.955318808555603, "learning_rate": 1.7943730986390973e-06, "loss": 0.1955, "step": 37215 }, { "epoch": 0.8064481182155007, "grad_norm": 1.7990877628326416, "learning_rate": 1.7924283073038928e-06, "loss": 0.2662, "step": 37220 }, { "epoch": 0.8065564535349815, "grad_norm": 0.8373862504959106, "learning_rate": 1.790484466692919e-06, "loss": 0.1731, "step": 37225 }, { "epoch": 0.8066647888544624, "grad_norm": 1.556878924369812, "learning_rate": 1.788541577031344e-06, "loss": 0.1213, "step": 37230 }, { "epoch": 0.8067731241739432, "grad_norm": 1.625661015510559, "learning_rate": 1.7865996385442197e-06, "loss": 0.2316, "step": 37235 }, { "epoch": 0.806881459493424, "grad_norm": 0.6809019446372986, "learning_rate": 1.784658651456489e-06, "loss": 0.1118, "step": 37240 }, { "epoch": 0.8069897948129049, "grad_norm": 0.9854766726493835, "learning_rate": 1.7827186159929888e-06, "loss": 0.2649, "step": 37245 }, { "epoch": 0.8070981301323857, "grad_norm": 1.461651086807251, "learning_rate": 1.7807795323784404e-06, "loss": 0.1148, "step": 37250 }, { "epoch": 0.8072064654518666, "grad_norm": 1.0989503860473633, "learning_rate": 1.7788414008374611e-06, "loss": 0.181, "step": 37255 }, { "epoch": 0.8073148007713474, "grad_norm": 0.9662601351737976, "learning_rate": 1.776904221594551e-06, "loss": 0.2001, "step": 37260 }, { "epoch": 0.8074231360908284, "grad_norm": 1.5844053030014038, "learning_rate": 1.7749679948741038e-06, "loss": 0.2634, "step": 37265 }, { "epoch": 0.8075314714103092, "grad_norm": 1.54373300075531, "learning_rate": 1.7730327209004071e-06, "loss": 0.2544, "step": 37270 }, { "epoch": 0.8076398067297901, "grad_norm": 1.282842993736267, "learning_rate": 1.771098399897626e-06, "loss": 0.1453, "step": 37275 }, { "epoch": 0.8077481420492709, "grad_norm": 1.7774509191513062, "learning_rate": 1.76916503208983e-06, "loss": 0.3326, "step": 37280 }, { "epoch": 0.8078564773687518, "grad_norm": 1.6126095056533813, "learning_rate": 1.7672326177009636e-06, "loss": 0.1584, "step": 37285 }, { "epoch": 0.8079648126882326, "grad_norm": 0.6982601284980774, "learning_rate": 1.765301156954875e-06, "loss": 0.2299, "step": 37290 }, { "epoch": 0.8080731480077135, "grad_norm": 1.361656904220581, "learning_rate": 1.763370650075289e-06, "loss": 0.2396, "step": 37295 }, { "epoch": 0.8081814833271943, "grad_norm": 2.010255813598633, "learning_rate": 1.7614410972858299e-06, "loss": 0.1565, "step": 37300 }, { "epoch": 0.8082898186466752, "grad_norm": 1.271873116493225, "learning_rate": 1.7595124988100043e-06, "loss": 0.2596, "step": 37305 }, { "epoch": 0.808398153966156, "grad_norm": 0.5756308436393738, "learning_rate": 1.757584854871216e-06, "loss": 0.2143, "step": 37310 }, { "epoch": 0.808506489285637, "grad_norm": 1.466478943824768, "learning_rate": 1.7556581656927486e-06, "loss": 0.2398, "step": 37315 }, { "epoch": 0.8086148246051178, "grad_norm": 1.2836138010025024, "learning_rate": 1.753732431497781e-06, "loss": 0.1439, "step": 37320 }, { "epoch": 0.8087231599245986, "grad_norm": 1.3723753690719604, "learning_rate": 1.7518076525093863e-06, "loss": 0.2418, "step": 37325 }, { "epoch": 0.8088314952440795, "grad_norm": 1.4593946933746338, "learning_rate": 1.7498838289505139e-06, "loss": 0.2329, "step": 37330 }, { "epoch": 0.8089398305635603, "grad_norm": 1.3024110794067383, "learning_rate": 1.7479609610440152e-06, "loss": 0.2728, "step": 37335 }, { "epoch": 0.8090481658830412, "grad_norm": 1.739546775817871, "learning_rate": 1.7460390490126222e-06, "loss": 0.3156, "step": 37340 }, { "epoch": 0.809156501202522, "grad_norm": 1.9916372299194336, "learning_rate": 1.7441180930789626e-06, "loss": 0.2038, "step": 37345 }, { "epoch": 0.8092648365220029, "grad_norm": 2.1304233074188232, "learning_rate": 1.7421980934655469e-06, "loss": 0.2182, "step": 37350 }, { "epoch": 0.8093731718414837, "grad_norm": 1.2256449460983276, "learning_rate": 1.7402790503947831e-06, "loss": 0.2911, "step": 37355 }, { "epoch": 0.8094815071609646, "grad_norm": 2.6170551776885986, "learning_rate": 1.7383609640889575e-06, "loss": 0.268, "step": 37360 }, { "epoch": 0.8095898424804455, "grad_norm": 1.8263179063796997, "learning_rate": 1.7364438347702551e-06, "loss": 0.2445, "step": 37365 }, { "epoch": 0.8096981777999264, "grad_norm": 2.0195508003234863, "learning_rate": 1.7345276626607489e-06, "loss": 0.2234, "step": 37370 }, { "epoch": 0.8098065131194072, "grad_norm": 1.1779595613479614, "learning_rate": 1.7326124479823936e-06, "loss": 0.253, "step": 37375 }, { "epoch": 0.8099148484388881, "grad_norm": 1.844243049621582, "learning_rate": 1.730698190957043e-06, "loss": 0.1949, "step": 37380 }, { "epoch": 0.8100231837583689, "grad_norm": 1.9905282258987427, "learning_rate": 1.7287848918064298e-06, "loss": 0.1912, "step": 37385 }, { "epoch": 0.8101315190778497, "grad_norm": 1.5805692672729492, "learning_rate": 1.7268725507521878e-06, "loss": 0.2915, "step": 37390 }, { "epoch": 0.8102398543973306, "grad_norm": 1.2434637546539307, "learning_rate": 1.7249611680158263e-06, "loss": 0.1452, "step": 37395 }, { "epoch": 0.8103481897168114, "grad_norm": 1.8270018100738525, "learning_rate": 1.723050743818756e-06, "loss": 0.2197, "step": 37400 }, { "epoch": 0.8104565250362923, "grad_norm": 0.6321781277656555, "learning_rate": 1.7211412783822689e-06, "loss": 0.1833, "step": 37405 }, { "epoch": 0.8105648603557732, "grad_norm": 0.7460438013076782, "learning_rate": 1.7192327719275415e-06, "loss": 0.1653, "step": 37410 }, { "epoch": 0.8106731956752541, "grad_norm": 2.1096603870391846, "learning_rate": 1.7173252246756567e-06, "loss": 0.3137, "step": 37415 }, { "epoch": 0.8107815309947349, "grad_norm": 1.3006428480148315, "learning_rate": 1.7154186368475678e-06, "loss": 0.1617, "step": 37420 }, { "epoch": 0.8108898663142158, "grad_norm": 1.2103033065795898, "learning_rate": 1.7135130086641295e-06, "loss": 0.3135, "step": 37425 }, { "epoch": 0.8109982016336966, "grad_norm": 1.817086935043335, "learning_rate": 1.7116083403460759e-06, "loss": 0.216, "step": 37430 }, { "epoch": 0.8111065369531775, "grad_norm": 1.3872328996658325, "learning_rate": 1.7097046321140375e-06, "loss": 0.2851, "step": 37435 }, { "epoch": 0.8112148722726583, "grad_norm": 1.421458125114441, "learning_rate": 1.707801884188528e-06, "loss": 0.3148, "step": 37440 }, { "epoch": 0.8113232075921392, "grad_norm": 0.8890901207923889, "learning_rate": 1.705900096789952e-06, "loss": 0.1775, "step": 37445 }, { "epoch": 0.81143154291162, "grad_norm": 1.3286914825439453, "learning_rate": 1.7039992701386066e-06, "loss": 0.2245, "step": 37450 }, { "epoch": 0.8115398782311009, "grad_norm": 2.2936816215515137, "learning_rate": 1.702099404454668e-06, "loss": 0.2824, "step": 37455 }, { "epoch": 0.8116482135505818, "grad_norm": 1.8919769525527954, "learning_rate": 1.7002004999582122e-06, "loss": 0.1769, "step": 37460 }, { "epoch": 0.8117565488700627, "grad_norm": 1.629791259765625, "learning_rate": 1.698302556869197e-06, "loss": 0.3252, "step": 37465 }, { "epoch": 0.8118648841895435, "grad_norm": 1.487509846687317, "learning_rate": 1.6964055754074739e-06, "loss": 0.2341, "step": 37470 }, { "epoch": 0.8119732195090243, "grad_norm": 1.3993873596191406, "learning_rate": 1.694509555792776e-06, "loss": 0.2292, "step": 37475 }, { "epoch": 0.8120815548285052, "grad_norm": 1.5695558786392212, "learning_rate": 1.692614498244728e-06, "loss": 0.1949, "step": 37480 }, { "epoch": 0.812189890147986, "grad_norm": 1.458574891090393, "learning_rate": 1.6907204029828484e-06, "loss": 0.1754, "step": 37485 }, { "epoch": 0.8122982254674669, "grad_norm": 1.5430155992507935, "learning_rate": 1.6888272702265352e-06, "loss": 0.2124, "step": 37490 }, { "epoch": 0.8124065607869477, "grad_norm": 1.0668851137161255, "learning_rate": 1.6869351001950829e-06, "loss": 0.1713, "step": 37495 }, { "epoch": 0.8125148961064286, "grad_norm": 1.6735949516296387, "learning_rate": 1.6850438931076675e-06, "loss": 0.1807, "step": 37500 }, { "epoch": 0.8126232314259094, "grad_norm": 1.9878687858581543, "learning_rate": 1.6831536491833612e-06, "loss": 0.2621, "step": 37505 }, { "epoch": 0.8127315667453904, "grad_norm": 1.8756026029586792, "learning_rate": 1.6812643686411157e-06, "loss": 0.268, "step": 37510 }, { "epoch": 0.8128399020648712, "grad_norm": 0.9464672803878784, "learning_rate": 1.6793760516997792e-06, "loss": 0.1971, "step": 37515 }, { "epoch": 0.8129482373843521, "grad_norm": 1.4712944030761719, "learning_rate": 1.677488698578086e-06, "loss": 0.2018, "step": 37520 }, { "epoch": 0.8130565727038329, "grad_norm": 0.8790415525436401, "learning_rate": 1.6756023094946538e-06, "loss": 0.1905, "step": 37525 }, { "epoch": 0.8131649080233138, "grad_norm": 1.4036235809326172, "learning_rate": 1.6737168846679962e-06, "loss": 0.2401, "step": 37530 }, { "epoch": 0.8132732433427946, "grad_norm": 2.3174326419830322, "learning_rate": 1.6718324243165086e-06, "loss": 0.2556, "step": 37535 }, { "epoch": 0.8133815786622755, "grad_norm": 0.8194109797477722, "learning_rate": 1.6699489286584802e-06, "loss": 0.218, "step": 37540 }, { "epoch": 0.8134899139817563, "grad_norm": 2.076279640197754, "learning_rate": 1.6680663979120814e-06, "loss": 0.281, "step": 37545 }, { "epoch": 0.8135982493012371, "grad_norm": 1.5661219358444214, "learning_rate": 1.6661848322953823e-06, "loss": 0.2163, "step": 37550 }, { "epoch": 0.8137065846207181, "grad_norm": 1.3118072748184204, "learning_rate": 1.6643042320263258e-06, "loss": 0.2452, "step": 37555 }, { "epoch": 0.8138149199401989, "grad_norm": 1.2935911417007446, "learning_rate": 1.662424597322756e-06, "loss": 0.154, "step": 37560 }, { "epoch": 0.8139232552596798, "grad_norm": 1.2525776624679565, "learning_rate": 1.6605459284024016e-06, "loss": 0.2542, "step": 37565 }, { "epoch": 0.8140315905791606, "grad_norm": 0.8519570231437683, "learning_rate": 1.6586682254828746e-06, "loss": 0.2666, "step": 37570 }, { "epoch": 0.8141399258986415, "grad_norm": 1.3435202836990356, "learning_rate": 1.6567914887816827e-06, "loss": 0.1819, "step": 37575 }, { "epoch": 0.8142482612181223, "grad_norm": 1.2139259576797485, "learning_rate": 1.6549157185162125e-06, "loss": 0.25, "step": 37580 }, { "epoch": 0.8143565965376032, "grad_norm": 2.0050911903381348, "learning_rate": 1.6530409149037508e-06, "loss": 0.2032, "step": 37585 }, { "epoch": 0.814464931857084, "grad_norm": 1.3842729330062866, "learning_rate": 1.6511670781614576e-06, "loss": 0.3028, "step": 37590 }, { "epoch": 0.8145732671765649, "grad_norm": 2.140707015991211, "learning_rate": 1.6492942085063967e-06, "loss": 0.1458, "step": 37595 }, { "epoch": 0.8146816024960457, "grad_norm": 1.199562430381775, "learning_rate": 1.647422306155505e-06, "loss": 0.1458, "step": 37600 }, { "epoch": 0.8147899378155267, "grad_norm": 0.5944446921348572, "learning_rate": 1.6455513713256177e-06, "loss": 0.2181, "step": 37605 }, { "epoch": 0.8148982731350075, "grad_norm": 1.6932621002197266, "learning_rate": 1.643681404233457e-06, "loss": 0.1648, "step": 37610 }, { "epoch": 0.8150066084544884, "grad_norm": 0.9081850051879883, "learning_rate": 1.6418124050956253e-06, "loss": 0.173, "step": 37615 }, { "epoch": 0.8151149437739692, "grad_norm": 1.709246039390564, "learning_rate": 1.6399443741286236e-06, "loss": 0.1545, "step": 37620 }, { "epoch": 0.81522327909345, "grad_norm": 1.3044674396514893, "learning_rate": 1.6380773115488301e-06, "loss": 0.2881, "step": 37625 }, { "epoch": 0.8153316144129309, "grad_norm": 1.387924313545227, "learning_rate": 1.6362112175725198e-06, "loss": 0.1697, "step": 37630 }, { "epoch": 0.8154399497324117, "grad_norm": 1.8599634170532227, "learning_rate": 1.6343460924158494e-06, "loss": 0.2363, "step": 37635 }, { "epoch": 0.8155482850518926, "grad_norm": 1.3807127475738525, "learning_rate": 1.6324819362948684e-06, "loss": 0.1391, "step": 37640 }, { "epoch": 0.8156566203713734, "grad_norm": 1.583827018737793, "learning_rate": 1.63061874942551e-06, "loss": 0.2414, "step": 37645 }, { "epoch": 0.8157649556908544, "grad_norm": 1.2559362649917603, "learning_rate": 1.628756532023591e-06, "loss": 0.2227, "step": 37650 }, { "epoch": 0.8158732910103352, "grad_norm": 1.932583212852478, "learning_rate": 1.6268952843048314e-06, "loss": 0.2249, "step": 37655 }, { "epoch": 0.8159816263298161, "grad_norm": 0.8197731971740723, "learning_rate": 1.625035006484822e-06, "loss": 0.1176, "step": 37660 }, { "epoch": 0.8160899616492969, "grad_norm": 1.4724924564361572, "learning_rate": 1.6231756987790525e-06, "loss": 0.1831, "step": 37665 }, { "epoch": 0.8161982969687778, "grad_norm": 1.2658957242965698, "learning_rate": 1.6213173614028898e-06, "loss": 0.1979, "step": 37670 }, { "epoch": 0.8163066322882586, "grad_norm": 1.6154423952102661, "learning_rate": 1.6194599945716016e-06, "loss": 0.2626, "step": 37675 }, { "epoch": 0.8164149676077395, "grad_norm": 1.217477560043335, "learning_rate": 1.6176035985003336e-06, "loss": 0.2099, "step": 37680 }, { "epoch": 0.8165233029272203, "grad_norm": 1.0918229818344116, "learning_rate": 1.6157481734041169e-06, "loss": 0.1693, "step": 37685 }, { "epoch": 0.8166316382467012, "grad_norm": 1.1007037162780762, "learning_rate": 1.61389371949788e-06, "loss": 0.1794, "step": 37690 }, { "epoch": 0.816739973566182, "grad_norm": 1.6808322668075562, "learning_rate": 1.6120402369964305e-06, "loss": 0.2092, "step": 37695 }, { "epoch": 0.816848308885663, "grad_norm": 1.3145827054977417, "learning_rate": 1.6101877261144672e-06, "loss": 0.2092, "step": 37700 }, { "epoch": 0.8169566442051438, "grad_norm": 1.0629785060882568, "learning_rate": 1.608336187066577e-06, "loss": 0.247, "step": 37705 }, { "epoch": 0.8170649795246246, "grad_norm": 1.7949984073638916, "learning_rate": 1.606485620067234e-06, "loss": 0.1681, "step": 37710 }, { "epoch": 0.8171733148441055, "grad_norm": 1.2540700435638428, "learning_rate": 1.604636025330798e-06, "loss": 0.2211, "step": 37715 }, { "epoch": 0.8172816501635863, "grad_norm": 1.4062410593032837, "learning_rate": 1.602787403071513e-06, "loss": 0.2205, "step": 37720 }, { "epoch": 0.8173899854830672, "grad_norm": 1.0058900117874146, "learning_rate": 1.6009397535035199e-06, "loss": 0.213, "step": 37725 }, { "epoch": 0.817498320802548, "grad_norm": 1.293859601020813, "learning_rate": 1.5990930768408354e-06, "loss": 0.2571, "step": 37730 }, { "epoch": 0.8176066561220289, "grad_norm": 1.1338993310928345, "learning_rate": 1.5972473732973758e-06, "loss": 0.1585, "step": 37735 }, { "epoch": 0.8177149914415097, "grad_norm": 1.133565068244934, "learning_rate": 1.5954026430869318e-06, "loss": 0.1943, "step": 37740 }, { "epoch": 0.8178233267609906, "grad_norm": 1.1383819580078125, "learning_rate": 1.5935588864231931e-06, "loss": 0.2303, "step": 37745 }, { "epoch": 0.8179316620804715, "grad_norm": 1.9687871932983398, "learning_rate": 1.5917161035197259e-06, "loss": 0.1561, "step": 37750 }, { "epoch": 0.8180399973999524, "grad_norm": 1.0862007141113281, "learning_rate": 1.5898742945899925e-06, "loss": 0.2362, "step": 37755 }, { "epoch": 0.8181483327194332, "grad_norm": 1.0117883682250977, "learning_rate": 1.5880334598473413e-06, "loss": 0.2359, "step": 37760 }, { "epoch": 0.8182566680389141, "grad_norm": 1.190192461013794, "learning_rate": 1.5861935995049993e-06, "loss": 0.2495, "step": 37765 }, { "epoch": 0.8183650033583949, "grad_norm": 1.3693565130233765, "learning_rate": 1.5843547137760928e-06, "loss": 0.2333, "step": 37770 }, { "epoch": 0.8184733386778758, "grad_norm": 1.0079714059829712, "learning_rate": 1.5825168028736248e-06, "loss": 0.2225, "step": 37775 }, { "epoch": 0.8185816739973566, "grad_norm": 2.271977424621582, "learning_rate": 1.5806798670104927e-06, "loss": 0.2326, "step": 37780 }, { "epoch": 0.8186900093168374, "grad_norm": 2.0459020137786865, "learning_rate": 1.5788439063994743e-06, "loss": 0.1477, "step": 37785 }, { "epoch": 0.8187983446363183, "grad_norm": 2.202907085418701, "learning_rate": 1.5770089212532435e-06, "loss": 0.2109, "step": 37790 }, { "epoch": 0.8189066799557992, "grad_norm": 1.9352532625198364, "learning_rate": 1.5751749117843495e-06, "loss": 0.2112, "step": 37795 }, { "epoch": 0.8190150152752801, "grad_norm": 1.8987901210784912, "learning_rate": 1.573341878205238e-06, "loss": 0.2993, "step": 37800 }, { "epoch": 0.8191233505947609, "grad_norm": 1.1188207864761353, "learning_rate": 1.571509820728242e-06, "loss": 0.2236, "step": 37805 }, { "epoch": 0.8192316859142418, "grad_norm": 2.16245436668396, "learning_rate": 1.5696787395655711e-06, "loss": 0.3238, "step": 37810 }, { "epoch": 0.8193400212337226, "grad_norm": 1.4492905139923096, "learning_rate": 1.5678486349293354e-06, "loss": 0.182, "step": 37815 }, { "epoch": 0.8194483565532035, "grad_norm": 1.2670036554336548, "learning_rate": 1.5660195070315175e-06, "loss": 0.2032, "step": 37820 }, { "epoch": 0.8195566918726843, "grad_norm": 1.9315156936645508, "learning_rate": 1.5641913560840028e-06, "loss": 0.2752, "step": 37825 }, { "epoch": 0.8196650271921652, "grad_norm": 1.9089527130126953, "learning_rate": 1.562364182298548e-06, "loss": 0.2535, "step": 37830 }, { "epoch": 0.819773362511646, "grad_norm": 1.3459073305130005, "learning_rate": 1.5605379858868098e-06, "loss": 0.274, "step": 37835 }, { "epoch": 0.8198816978311269, "grad_norm": 1.0541348457336426, "learning_rate": 1.5587127670603198e-06, "loss": 0.1911, "step": 37840 }, { "epoch": 0.8199900331506078, "grad_norm": 0.9108521342277527, "learning_rate": 1.5568885260305056e-06, "loss": 0.1833, "step": 37845 }, { "epoch": 0.8200983684700887, "grad_norm": 1.4653114080429077, "learning_rate": 1.5550652630086804e-06, "loss": 0.2499, "step": 37850 }, { "epoch": 0.8202067037895695, "grad_norm": 2.0561606884002686, "learning_rate": 1.5532429782060366e-06, "loss": 0.1906, "step": 37855 }, { "epoch": 0.8203150391090503, "grad_norm": 1.6611442565917969, "learning_rate": 1.5514216718336638e-06, "loss": 0.1795, "step": 37860 }, { "epoch": 0.8204233744285312, "grad_norm": 1.1727899312973022, "learning_rate": 1.5496013441025293e-06, "loss": 0.1635, "step": 37865 }, { "epoch": 0.820531709748012, "grad_norm": 1.3532849550247192, "learning_rate": 1.5477819952234929e-06, "loss": 0.2318, "step": 37870 }, { "epoch": 0.8206400450674929, "grad_norm": 1.1065564155578613, "learning_rate": 1.5459636254072962e-06, "loss": 0.296, "step": 37875 }, { "epoch": 0.8207483803869737, "grad_norm": 0.6161576509475708, "learning_rate": 1.5441462348645752e-06, "loss": 0.2579, "step": 37880 }, { "epoch": 0.8208567157064546, "grad_norm": 1.688058853149414, "learning_rate": 1.5423298238058438e-06, "loss": 0.1847, "step": 37885 }, { "epoch": 0.8209650510259354, "grad_norm": 1.3235564231872559, "learning_rate": 1.5405143924415034e-06, "loss": 0.1863, "step": 37890 }, { "epoch": 0.8210733863454164, "grad_norm": 1.474777340888977, "learning_rate": 1.5386999409818482e-06, "loss": 0.2079, "step": 37895 }, { "epoch": 0.8211817216648972, "grad_norm": 1.3483061790466309, "learning_rate": 1.5368864696370545e-06, "loss": 0.1505, "step": 37900 }, { "epoch": 0.8212900569843781, "grad_norm": 1.5896639823913574, "learning_rate": 1.5350739786171886e-06, "loss": 0.1974, "step": 37905 }, { "epoch": 0.8213983923038589, "grad_norm": 0.7594262361526489, "learning_rate": 1.533262468132195e-06, "loss": 0.1582, "step": 37910 }, { "epoch": 0.8215067276233398, "grad_norm": 0.6029388308525085, "learning_rate": 1.5314519383919147e-06, "loss": 0.2544, "step": 37915 }, { "epoch": 0.8216150629428206, "grad_norm": 2.084506034851074, "learning_rate": 1.5296423896060687e-06, "loss": 0.1743, "step": 37920 }, { "epoch": 0.8217233982623015, "grad_norm": 1.1649762392044067, "learning_rate": 1.5278338219842637e-06, "loss": 0.2656, "step": 37925 }, { "epoch": 0.8218317335817823, "grad_norm": 1.3874024152755737, "learning_rate": 1.5260262357360001e-06, "loss": 0.2907, "step": 37930 }, { "epoch": 0.8219400689012631, "grad_norm": 1.0759931802749634, "learning_rate": 1.5242196310706537e-06, "loss": 0.2442, "step": 37935 }, { "epoch": 0.8220484042207441, "grad_norm": 1.059296727180481, "learning_rate": 1.522414008197497e-06, "loss": 0.3232, "step": 37940 }, { "epoch": 0.822156739540225, "grad_norm": 1.3959944248199463, "learning_rate": 1.5206093673256817e-06, "loss": 0.2072, "step": 37945 }, { "epoch": 0.8222650748597058, "grad_norm": 1.5405373573303223, "learning_rate": 1.5188057086642537e-06, "loss": 0.2487, "step": 37950 }, { "epoch": 0.8223734101791866, "grad_norm": 1.7154873609542847, "learning_rate": 1.5170030324221352e-06, "loss": 0.2312, "step": 37955 }, { "epoch": 0.8224817454986675, "grad_norm": 1.5210121870040894, "learning_rate": 1.5152013388081388e-06, "loss": 0.2685, "step": 37960 }, { "epoch": 0.8225900808181483, "grad_norm": 0.752849817276001, "learning_rate": 1.5134006280309666e-06, "loss": 0.2193, "step": 37965 }, { "epoch": 0.8226984161376292, "grad_norm": 1.37613046169281, "learning_rate": 1.5116009002991993e-06, "loss": 0.2847, "step": 37970 }, { "epoch": 0.82280675145711, "grad_norm": 2.550013303756714, "learning_rate": 1.509802155821315e-06, "loss": 0.2856, "step": 37975 }, { "epoch": 0.8229150867765909, "grad_norm": 0.9585152864456177, "learning_rate": 1.5080043948056656e-06, "loss": 0.2691, "step": 37980 }, { "epoch": 0.8230234220960717, "grad_norm": 1.537852168083191, "learning_rate": 1.5062076174604978e-06, "loss": 0.2259, "step": 37985 }, { "epoch": 0.8231317574155527, "grad_norm": 1.1606578826904297, "learning_rate": 1.5044118239939398e-06, "loss": 0.2688, "step": 37990 }, { "epoch": 0.8232400927350335, "grad_norm": 1.6356313228607178, "learning_rate": 1.5026170146140073e-06, "loss": 0.1563, "step": 37995 }, { "epoch": 0.8233484280545144, "grad_norm": 1.242979884147644, "learning_rate": 1.5008231895286051e-06, "loss": 0.2204, "step": 38000 }, { "epoch": 0.8234567633739952, "grad_norm": 0.6563218235969543, "learning_rate": 1.4990303489455172e-06, "loss": 0.2538, "step": 38005 }, { "epoch": 0.823565098693476, "grad_norm": 1.5702917575836182, "learning_rate": 1.4972384930724205e-06, "loss": 0.1959, "step": 38010 }, { "epoch": 0.8236734340129569, "grad_norm": 1.7527621984481812, "learning_rate": 1.4954476221168711e-06, "loss": 0.2331, "step": 38015 }, { "epoch": 0.8237817693324377, "grad_norm": 1.1752312183380127, "learning_rate": 1.493657736286318e-06, "loss": 0.2021, "step": 38020 }, { "epoch": 0.8238901046519186, "grad_norm": 1.7578134536743164, "learning_rate": 1.4918688357880894e-06, "loss": 0.1669, "step": 38025 }, { "epoch": 0.8239984399713994, "grad_norm": 1.7335127592086792, "learning_rate": 1.4900809208294066e-06, "loss": 0.2229, "step": 38030 }, { "epoch": 0.8241067752908803, "grad_norm": 1.3860200643539429, "learning_rate": 1.4882939916173688e-06, "loss": 0.2298, "step": 38035 }, { "epoch": 0.8242151106103612, "grad_norm": 1.5575779676437378, "learning_rate": 1.4865080483589667e-06, "loss": 0.2053, "step": 38040 }, { "epoch": 0.8243234459298421, "grad_norm": 1.3817658424377441, "learning_rate": 1.4847230912610767e-06, "loss": 0.2222, "step": 38045 }, { "epoch": 0.8244317812493229, "grad_norm": 0.8281635642051697, "learning_rate": 1.4829391205304555e-06, "loss": 0.2718, "step": 38050 }, { "epoch": 0.8245401165688038, "grad_norm": 1.0501151084899902, "learning_rate": 1.4811561363737537e-06, "loss": 0.2507, "step": 38055 }, { "epoch": 0.8246484518882846, "grad_norm": 1.8641189336776733, "learning_rate": 1.4793741389974992e-06, "loss": 0.2276, "step": 38060 }, { "epoch": 0.8247567872077655, "grad_norm": 1.1142247915267944, "learning_rate": 1.4775931286081147e-06, "loss": 0.2585, "step": 38065 }, { "epoch": 0.8248651225272463, "grad_norm": 1.1247237920761108, "learning_rate": 1.4758131054118974e-06, "loss": 0.2056, "step": 38070 }, { "epoch": 0.8249734578467272, "grad_norm": 1.399867296218872, "learning_rate": 1.4740340696150423e-06, "loss": 0.2754, "step": 38075 }, { "epoch": 0.825081793166208, "grad_norm": 1.5708223581314087, "learning_rate": 1.4722560214236193e-06, "loss": 0.2684, "step": 38080 }, { "epoch": 0.825190128485689, "grad_norm": 1.4647969007492065, "learning_rate": 1.47047896104359e-06, "loss": 0.1402, "step": 38085 }, { "epoch": 0.8252984638051698, "grad_norm": 1.7870625257492065, "learning_rate": 1.468702888680803e-06, "loss": 0.1203, "step": 38090 }, { "epoch": 0.8254067991246506, "grad_norm": 2.5265395641326904, "learning_rate": 1.4669278045409863e-06, "loss": 0.2574, "step": 38095 }, { "epoch": 0.8255151344441315, "grad_norm": 0.7683199048042297, "learning_rate": 1.4651537088297596e-06, "loss": 0.2471, "step": 38100 }, { "epoch": 0.8256234697636123, "grad_norm": 2.4896352291107178, "learning_rate": 1.4633806017526208e-06, "loss": 0.224, "step": 38105 }, { "epoch": 0.8257318050830932, "grad_norm": 1.2385375499725342, "learning_rate": 1.4616084835149635e-06, "loss": 0.1839, "step": 38110 }, { "epoch": 0.825840140402574, "grad_norm": 1.2512052059173584, "learning_rate": 1.4598373543220567e-06, "loss": 0.2128, "step": 38115 }, { "epoch": 0.8259484757220549, "grad_norm": 1.7323981523513794, "learning_rate": 1.458067214379062e-06, "loss": 0.1759, "step": 38120 }, { "epoch": 0.8260568110415357, "grad_norm": 1.4970828294754028, "learning_rate": 1.456298063891023e-06, "loss": 0.1975, "step": 38125 }, { "epoch": 0.8261651463610166, "grad_norm": 1.5193135738372803, "learning_rate": 1.4545299030628667e-06, "loss": 0.1164, "step": 38130 }, { "epoch": 0.8262734816804975, "grad_norm": 1.7454023361206055, "learning_rate": 1.4527627320994097e-06, "loss": 0.2826, "step": 38135 }, { "epoch": 0.8263818169999784, "grad_norm": 1.2078847885131836, "learning_rate": 1.4509965512053526e-06, "loss": 0.2049, "step": 38140 }, { "epoch": 0.8264901523194592, "grad_norm": 1.5887237787246704, "learning_rate": 1.4492313605852825e-06, "loss": 0.2729, "step": 38145 }, { "epoch": 0.8265984876389401, "grad_norm": 2.7635436058044434, "learning_rate": 1.4474671604436674e-06, "loss": 0.3383, "step": 38150 }, { "epoch": 0.8267068229584209, "grad_norm": 1.0222944021224976, "learning_rate": 1.445703950984867e-06, "loss": 0.2077, "step": 38155 }, { "epoch": 0.8268151582779018, "grad_norm": 1.1051915884017944, "learning_rate": 1.4439417324131177e-06, "loss": 0.266, "step": 38160 }, { "epoch": 0.8269234935973826, "grad_norm": 1.7102705240249634, "learning_rate": 1.442180504932551e-06, "loss": 0.1855, "step": 38165 }, { "epoch": 0.8270318289168634, "grad_norm": 0.8506316542625427, "learning_rate": 1.440420268747178e-06, "loss": 0.1994, "step": 38170 }, { "epoch": 0.8271401642363443, "grad_norm": 1.1136999130249023, "learning_rate": 1.4386610240608912e-06, "loss": 0.188, "step": 38175 }, { "epoch": 0.8272484995558252, "grad_norm": 1.035420298576355, "learning_rate": 1.4369027710774764e-06, "loss": 0.2269, "step": 38180 }, { "epoch": 0.8273568348753061, "grad_norm": 0.5064231753349304, "learning_rate": 1.4351455100005994e-06, "loss": 0.1411, "step": 38185 }, { "epoch": 0.8274651701947869, "grad_norm": 0.81637042760849, "learning_rate": 1.4333892410338169e-06, "loss": 0.2056, "step": 38190 }, { "epoch": 0.8275735055142678, "grad_norm": 1.1860367059707642, "learning_rate": 1.43163396438056e-06, "loss": 0.1798, "step": 38195 }, { "epoch": 0.8276818408337486, "grad_norm": 1.8656139373779297, "learning_rate": 1.4298796802441573e-06, "loss": 0.2822, "step": 38200 }, { "epoch": 0.8277901761532295, "grad_norm": 1.6486859321594238, "learning_rate": 1.4281263888278118e-06, "loss": 0.2199, "step": 38205 }, { "epoch": 0.8278985114727103, "grad_norm": 1.0979640483856201, "learning_rate": 1.4263740903346168e-06, "loss": 0.1523, "step": 38210 }, { "epoch": 0.8280068467921912, "grad_norm": 0.6809828877449036, "learning_rate": 1.4246227849675521e-06, "loss": 0.173, "step": 38215 }, { "epoch": 0.828115182111672, "grad_norm": 1.42020845413208, "learning_rate": 1.4228724729294762e-06, "loss": 0.257, "step": 38220 }, { "epoch": 0.8282235174311529, "grad_norm": 1.7442150115966797, "learning_rate": 1.4211231544231418e-06, "loss": 0.2618, "step": 38225 }, { "epoch": 0.8283318527506338, "grad_norm": 0.8509999513626099, "learning_rate": 1.4193748296511733e-06, "loss": 0.1964, "step": 38230 }, { "epoch": 0.8284401880701147, "grad_norm": 0.9511086940765381, "learning_rate": 1.4176274988160976e-06, "loss": 0.2624, "step": 38235 }, { "epoch": 0.8285485233895955, "grad_norm": 0.8944128155708313, "learning_rate": 1.4158811621203127e-06, "loss": 0.2024, "step": 38240 }, { "epoch": 0.8286568587090763, "grad_norm": 1.3662720918655396, "learning_rate": 1.4141358197661025e-06, "loss": 0.3167, "step": 38245 }, { "epoch": 0.8287651940285572, "grad_norm": 2.014343500137329, "learning_rate": 1.412391471955643e-06, "loss": 0.2339, "step": 38250 }, { "epoch": 0.828873529348038, "grad_norm": 1.6005343198776245, "learning_rate": 1.4106481188909882e-06, "loss": 0.223, "step": 38255 }, { "epoch": 0.8289818646675189, "grad_norm": 1.454370379447937, "learning_rate": 1.4089057607740809e-06, "loss": 0.1749, "step": 38260 }, { "epoch": 0.8290901999869997, "grad_norm": 0.8943676948547363, "learning_rate": 1.4071643978067461e-06, "loss": 0.1871, "step": 38265 }, { "epoch": 0.8291985353064806, "grad_norm": 1.7388817071914673, "learning_rate": 1.4054240301906962e-06, "loss": 0.1347, "step": 38270 }, { "epoch": 0.8293068706259614, "grad_norm": 0.7736380100250244, "learning_rate": 1.4036846581275242e-06, "loss": 0.1832, "step": 38275 }, { "epoch": 0.8294152059454424, "grad_norm": 1.857782244682312, "learning_rate": 1.4019462818187113e-06, "loss": 0.2852, "step": 38280 }, { "epoch": 0.8295235412649232, "grad_norm": 0.9056721329689026, "learning_rate": 1.400208901465625e-06, "loss": 0.2902, "step": 38285 }, { "epoch": 0.8296318765844041, "grad_norm": 1.2047898769378662, "learning_rate": 1.3984725172695102e-06, "loss": 0.1996, "step": 38290 }, { "epoch": 0.8297402119038849, "grad_norm": 0.5545894503593445, "learning_rate": 1.3967371294315057e-06, "loss": 0.231, "step": 38295 }, { "epoch": 0.8298485472233658, "grad_norm": 1.4694268703460693, "learning_rate": 1.3950027381526266e-06, "loss": 0.2275, "step": 38300 }, { "epoch": 0.8299568825428466, "grad_norm": 1.4555524587631226, "learning_rate": 1.3932693436337786e-06, "loss": 0.1412, "step": 38305 }, { "epoch": 0.8300652178623275, "grad_norm": 1.7064846754074097, "learning_rate": 1.3915369460757467e-06, "loss": 0.2474, "step": 38310 }, { "epoch": 0.8301735531818083, "grad_norm": 1.764285683631897, "learning_rate": 1.389805545679207e-06, "loss": 0.246, "step": 38315 }, { "epoch": 0.8302818885012891, "grad_norm": 1.1713560819625854, "learning_rate": 1.3880751426447114e-06, "loss": 0.3364, "step": 38320 }, { "epoch": 0.8303902238207701, "grad_norm": 1.0825207233428955, "learning_rate": 1.386345737172704e-06, "loss": 0.2011, "step": 38325 }, { "epoch": 0.830498559140251, "grad_norm": 1.19381844997406, "learning_rate": 1.3846173294635135e-06, "loss": 0.1862, "step": 38330 }, { "epoch": 0.8306068944597318, "grad_norm": 1.4831266403198242, "learning_rate": 1.382889919717344e-06, "loss": 0.2887, "step": 38335 }, { "epoch": 0.8307152297792126, "grad_norm": 1.537832498550415, "learning_rate": 1.3811635081342945e-06, "loss": 0.2803, "step": 38340 }, { "epoch": 0.8308235650986935, "grad_norm": 1.418906807899475, "learning_rate": 1.3794380949143416e-06, "loss": 0.1973, "step": 38345 }, { "epoch": 0.8309319004181743, "grad_norm": 1.7693381309509277, "learning_rate": 1.3777136802573509e-06, "loss": 0.1986, "step": 38350 }, { "epoch": 0.8310402357376552, "grad_norm": 1.000041127204895, "learning_rate": 1.3759902643630664e-06, "loss": 0.2051, "step": 38355 }, { "epoch": 0.831148571057136, "grad_norm": 1.7417526245117188, "learning_rate": 1.3742678474311244e-06, "loss": 0.1695, "step": 38360 }, { "epoch": 0.8312569063766169, "grad_norm": 1.4097671508789062, "learning_rate": 1.3725464296610357e-06, "loss": 0.312, "step": 38365 }, { "epoch": 0.8313652416960977, "grad_norm": 0.7871866226196289, "learning_rate": 1.3708260112522075e-06, "loss": 0.177, "step": 38370 }, { "epoch": 0.8314735770155787, "grad_norm": 1.6579197645187378, "learning_rate": 1.3691065924039182e-06, "loss": 0.2628, "step": 38375 }, { "epoch": 0.8315819123350595, "grad_norm": 1.3048175573349, "learning_rate": 1.3673881733153394e-06, "loss": 0.2992, "step": 38380 }, { "epoch": 0.8316902476545404, "grad_norm": 1.266511082649231, "learning_rate": 1.3656707541855264e-06, "loss": 0.2223, "step": 38385 }, { "epoch": 0.8317985829740212, "grad_norm": 1.4898885488510132, "learning_rate": 1.363954335213411e-06, "loss": 0.2264, "step": 38390 }, { "epoch": 0.831906918293502, "grad_norm": 1.2030068635940552, "learning_rate": 1.3622389165978212e-06, "loss": 0.2619, "step": 38395 }, { "epoch": 0.8320152536129829, "grad_norm": 0.9271427392959595, "learning_rate": 1.3605244985374577e-06, "loss": 0.1987, "step": 38400 }, { "epoch": 0.8321235889324637, "grad_norm": 1.6757113933563232, "learning_rate": 1.358811081230913e-06, "loss": 0.2642, "step": 38405 }, { "epoch": 0.8322319242519446, "grad_norm": 1.8132860660552979, "learning_rate": 1.3570986648766593e-06, "loss": 0.1825, "step": 38410 }, { "epoch": 0.8323402595714254, "grad_norm": 0.9349760413169861, "learning_rate": 1.3553872496730536e-06, "loss": 0.2059, "step": 38415 }, { "epoch": 0.8324485948909063, "grad_norm": 1.0090516805648804, "learning_rate": 1.3536768358183395e-06, "loss": 0.1829, "step": 38420 }, { "epoch": 0.8325569302103872, "grad_norm": 1.2084225416183472, "learning_rate": 1.3519674235106383e-06, "loss": 0.2446, "step": 38425 }, { "epoch": 0.8326652655298681, "grad_norm": 1.7260421514511108, "learning_rate": 1.3502590129479675e-06, "loss": 0.2157, "step": 38430 }, { "epoch": 0.8327736008493489, "grad_norm": 1.0523675680160522, "learning_rate": 1.3485516043282154e-06, "loss": 0.1624, "step": 38435 }, { "epoch": 0.8328819361688298, "grad_norm": 1.964633822441101, "learning_rate": 1.3468451978491638e-06, "loss": 0.1777, "step": 38440 }, { "epoch": 0.8329902714883106, "grad_norm": 0.8143784403800964, "learning_rate": 1.345139793708471e-06, "loss": 0.1833, "step": 38445 }, { "epoch": 0.8330986068077915, "grad_norm": 0.9938148856163025, "learning_rate": 1.3434353921036813e-06, "loss": 0.1584, "step": 38450 }, { "epoch": 0.8332069421272723, "grad_norm": 1.4411553144454956, "learning_rate": 1.3417319932322282e-06, "loss": 0.2988, "step": 38455 }, { "epoch": 0.8333152774467532, "grad_norm": 1.9267852306365967, "learning_rate": 1.3400295972914212e-06, "loss": 0.181, "step": 38460 }, { "epoch": 0.833423612766234, "grad_norm": 1.530492901802063, "learning_rate": 1.3383282044784607e-06, "loss": 0.2887, "step": 38465 }, { "epoch": 0.833531948085715, "grad_norm": 1.4758435487747192, "learning_rate": 1.336627814990421e-06, "loss": 0.2248, "step": 38470 }, { "epoch": 0.8336402834051958, "grad_norm": 1.5591635704040527, "learning_rate": 1.334928429024277e-06, "loss": 0.3226, "step": 38475 }, { "epoch": 0.8337486187246766, "grad_norm": 1.4690260887145996, "learning_rate": 1.333230046776871e-06, "loss": 0.3277, "step": 38480 }, { "epoch": 0.8338569540441575, "grad_norm": 1.7654322385787964, "learning_rate": 1.3315326684449348e-06, "loss": 0.1836, "step": 38485 }, { "epoch": 0.8339652893636383, "grad_norm": 0.7022150158882141, "learning_rate": 1.3298362942250864e-06, "loss": 0.1745, "step": 38490 }, { "epoch": 0.8340736246831192, "grad_norm": 1.7435650825500488, "learning_rate": 1.3281409243138222e-06, "loss": 0.1465, "step": 38495 }, { "epoch": 0.8341819600026, "grad_norm": 1.617714524269104, "learning_rate": 1.3264465589075293e-06, "loss": 0.2186, "step": 38500 }, { "epoch": 0.8342902953220809, "grad_norm": 0.8503140807151794, "learning_rate": 1.3247531982024719e-06, "loss": 0.126, "step": 38505 }, { "epoch": 0.8343986306415617, "grad_norm": 0.32669612765312195, "learning_rate": 1.3230608423948022e-06, "loss": 0.1928, "step": 38510 }, { "epoch": 0.8345069659610426, "grad_norm": 1.6161998510360718, "learning_rate": 1.321369491680552e-06, "loss": 0.1977, "step": 38515 }, { "epoch": 0.8346153012805235, "grad_norm": 1.7392476797103882, "learning_rate": 1.3196791462556403e-06, "loss": 0.2551, "step": 38520 }, { "epoch": 0.8347236366000044, "grad_norm": 1.50690495967865, "learning_rate": 1.3179898063158692e-06, "loss": 0.2613, "step": 38525 }, { "epoch": 0.8348319719194852, "grad_norm": 1.4301947355270386, "learning_rate": 1.3163014720569224e-06, "loss": 0.2294, "step": 38530 }, { "epoch": 0.8349403072389661, "grad_norm": 1.2137573957443237, "learning_rate": 1.3146141436743687e-06, "loss": 0.18, "step": 38535 }, { "epoch": 0.8350486425584469, "grad_norm": 1.7971566915512085, "learning_rate": 1.3129278213636577e-06, "loss": 0.1839, "step": 38540 }, { "epoch": 0.8351569778779278, "grad_norm": 3.1858267784118652, "learning_rate": 1.3112425053201282e-06, "loss": 0.2415, "step": 38545 }, { "epoch": 0.8352653131974086, "grad_norm": 1.8648738861083984, "learning_rate": 1.3095581957389958e-06, "loss": 0.221, "step": 38550 }, { "epoch": 0.8353736485168894, "grad_norm": 1.3109228610992432, "learning_rate": 1.3078748928153651e-06, "loss": 0.2162, "step": 38555 }, { "epoch": 0.8354819838363703, "grad_norm": 1.1027568578720093, "learning_rate": 1.3061925967442179e-06, "loss": 0.1608, "step": 38560 }, { "epoch": 0.8355903191558511, "grad_norm": 1.0167864561080933, "learning_rate": 1.3045113077204274e-06, "loss": 0.1832, "step": 38565 }, { "epoch": 0.8356986544753321, "grad_norm": 1.6143536567687988, "learning_rate": 1.302831025938741e-06, "loss": 0.2528, "step": 38570 }, { "epoch": 0.8358069897948129, "grad_norm": 1.4287002086639404, "learning_rate": 1.3011517515937965e-06, "loss": 0.304, "step": 38575 }, { "epoch": 0.8359153251142938, "grad_norm": 1.493878960609436, "learning_rate": 1.2994734848801161e-06, "loss": 0.2657, "step": 38580 }, { "epoch": 0.8360236604337746, "grad_norm": 1.4392777681350708, "learning_rate": 1.297796225992095e-06, "loss": 0.1637, "step": 38585 }, { "epoch": 0.8361319957532555, "grad_norm": 0.7822135090827942, "learning_rate": 1.296119975124025e-06, "loss": 0.215, "step": 38590 }, { "epoch": 0.8362403310727363, "grad_norm": 1.4180841445922852, "learning_rate": 1.2944447324700693e-06, "loss": 0.217, "step": 38595 }, { "epoch": 0.8363486663922172, "grad_norm": 1.3906441926956177, "learning_rate": 1.292770498224285e-06, "loss": 0.211, "step": 38600 }, { "epoch": 0.836457001711698, "grad_norm": 1.4719387292861938, "learning_rate": 1.2910972725806016e-06, "loss": 0.2204, "step": 38605 }, { "epoch": 0.8365653370311789, "grad_norm": 1.5090830326080322, "learning_rate": 1.2894250557328426e-06, "loss": 0.2797, "step": 38610 }, { "epoch": 0.8366736723506598, "grad_norm": 2.4409358501434326, "learning_rate": 1.2877538478747031e-06, "loss": 0.1415, "step": 38615 }, { "epoch": 0.8367820076701407, "grad_norm": 1.5139002799987793, "learning_rate": 1.2860836491997718e-06, "loss": 0.1764, "step": 38620 }, { "epoch": 0.8368903429896215, "grad_norm": 1.3581756353378296, "learning_rate": 1.2844144599015175e-06, "loss": 0.1501, "step": 38625 }, { "epoch": 0.8369986783091024, "grad_norm": 0.7594563364982605, "learning_rate": 1.282746280173285e-06, "loss": 0.3147, "step": 38630 }, { "epoch": 0.8371070136285832, "grad_norm": 1.1460402011871338, "learning_rate": 1.2810791102083154e-06, "loss": 0.3189, "step": 38635 }, { "epoch": 0.837215348948064, "grad_norm": 1.1458139419555664, "learning_rate": 1.2794129501997176e-06, "loss": 0.2546, "step": 38640 }, { "epoch": 0.8373236842675449, "grad_norm": 1.389259934425354, "learning_rate": 1.2777478003404986e-06, "loss": 0.1897, "step": 38645 }, { "epoch": 0.8374320195870257, "grad_norm": 1.2876100540161133, "learning_rate": 1.2760836608235373e-06, "loss": 0.16, "step": 38650 }, { "epoch": 0.8375403549065066, "grad_norm": 1.2381970882415771, "learning_rate": 1.274420531841598e-06, "loss": 0.2556, "step": 38655 }, { "epoch": 0.8376486902259874, "grad_norm": 1.3657959699630737, "learning_rate": 1.2727584135873317e-06, "loss": 0.2276, "step": 38660 }, { "epoch": 0.8377570255454684, "grad_norm": 1.1117867231369019, "learning_rate": 1.2710973062532662e-06, "loss": 0.2301, "step": 38665 }, { "epoch": 0.8378653608649492, "grad_norm": 0.9308428168296814, "learning_rate": 1.269437210031822e-06, "loss": 0.2353, "step": 38670 }, { "epoch": 0.8379736961844301, "grad_norm": 0.9682656526565552, "learning_rate": 1.267778125115292e-06, "loss": 0.2655, "step": 38675 }, { "epoch": 0.8380820315039109, "grad_norm": 1.4969186782836914, "learning_rate": 1.2661200516958594e-06, "loss": 0.1646, "step": 38680 }, { "epoch": 0.8381903668233918, "grad_norm": 1.6583693027496338, "learning_rate": 1.2644629899655848e-06, "loss": 0.22, "step": 38685 }, { "epoch": 0.8382987021428726, "grad_norm": 1.0833117961883545, "learning_rate": 1.2628069401164134e-06, "loss": 0.1369, "step": 38690 }, { "epoch": 0.8384070374623535, "grad_norm": 1.108628273010254, "learning_rate": 1.2611519023401764e-06, "loss": 0.2832, "step": 38695 }, { "epoch": 0.8385153727818343, "grad_norm": 0.7971040606498718, "learning_rate": 1.2594978768285804e-06, "loss": 0.1989, "step": 38700 }, { "epoch": 0.8386237081013151, "grad_norm": 0.6772046685218811, "learning_rate": 1.2578448637732266e-06, "loss": 0.1787, "step": 38705 }, { "epoch": 0.8387320434207961, "grad_norm": 1.4580013751983643, "learning_rate": 1.2561928633655817e-06, "loss": 0.1962, "step": 38710 }, { "epoch": 0.838840378740277, "grad_norm": 1.9232367277145386, "learning_rate": 1.2545418757970173e-06, "loss": 0.147, "step": 38715 }, { "epoch": 0.8389487140597578, "grad_norm": 1.6860262155532837, "learning_rate": 1.2528919012587693e-06, "loss": 0.1713, "step": 38720 }, { "epoch": 0.8390570493792386, "grad_norm": 1.1743583679199219, "learning_rate": 1.2512429399419601e-06, "loss": 0.0989, "step": 38725 }, { "epoch": 0.8391653846987195, "grad_norm": 0.9282735586166382, "learning_rate": 1.2495949920376016e-06, "loss": 0.2042, "step": 38730 }, { "epoch": 0.8392737200182003, "grad_norm": 1.0857614278793335, "learning_rate": 1.247948057736581e-06, "loss": 0.1549, "step": 38735 }, { "epoch": 0.8393820553376812, "grad_norm": 0.7142701745033264, "learning_rate": 1.2463021372296747e-06, "loss": 0.1845, "step": 38740 }, { "epoch": 0.839490390657162, "grad_norm": 1.6484272480010986, "learning_rate": 1.2446572307075321e-06, "loss": 0.1979, "step": 38745 }, { "epoch": 0.8395987259766429, "grad_norm": 1.1218258142471313, "learning_rate": 1.2430133383606979e-06, "loss": 0.1992, "step": 38750 }, { "epoch": 0.8397070612961237, "grad_norm": 1.1333210468292236, "learning_rate": 1.241370460379585e-06, "loss": 0.2582, "step": 38755 }, { "epoch": 0.8398153966156047, "grad_norm": 1.628484845161438, "learning_rate": 1.2397285969545015e-06, "loss": 0.2286, "step": 38760 }, { "epoch": 0.8399237319350855, "grad_norm": 1.7107895612716675, "learning_rate": 1.2380877482756326e-06, "loss": 0.1632, "step": 38765 }, { "epoch": 0.8400320672545664, "grad_norm": 1.141296625137329, "learning_rate": 1.2364479145330422e-06, "loss": 0.2158, "step": 38770 }, { "epoch": 0.8401404025740472, "grad_norm": 2.727799415588379, "learning_rate": 1.2348090959166858e-06, "loss": 0.2266, "step": 38775 }, { "epoch": 0.840248737893528, "grad_norm": 1.5594583749771118, "learning_rate": 1.2331712926163896e-06, "loss": 0.2228, "step": 38780 }, { "epoch": 0.8403570732130089, "grad_norm": 1.139503002166748, "learning_rate": 1.2315345048218763e-06, "loss": 0.2204, "step": 38785 }, { "epoch": 0.8404654085324897, "grad_norm": 1.9756778478622437, "learning_rate": 1.2298987327227352e-06, "loss": 0.2259, "step": 38790 }, { "epoch": 0.8405737438519706, "grad_norm": 1.2517762184143066, "learning_rate": 1.2282639765084524e-06, "loss": 0.1988, "step": 38795 }, { "epoch": 0.8406820791714514, "grad_norm": 1.6068130731582642, "learning_rate": 1.2266302363683857e-06, "loss": 0.2039, "step": 38800 }, { "epoch": 0.8407904144909323, "grad_norm": 0.49945512413978577, "learning_rate": 1.2249975124917822e-06, "loss": 0.2118, "step": 38805 }, { "epoch": 0.8408987498104132, "grad_norm": 1.6879552602767944, "learning_rate": 1.2233658050677656e-06, "loss": 0.1687, "step": 38810 }, { "epoch": 0.8410070851298941, "grad_norm": 1.0114758014678955, "learning_rate": 1.2217351142853474e-06, "loss": 0.2535, "step": 38815 }, { "epoch": 0.8411154204493749, "grad_norm": 1.5436862707138062, "learning_rate": 1.2201054403334189e-06, "loss": 0.2254, "step": 38820 }, { "epoch": 0.8412237557688558, "grad_norm": 2.0056498050689697, "learning_rate": 1.2184767834007506e-06, "loss": 0.1684, "step": 38825 }, { "epoch": 0.8413320910883366, "grad_norm": 1.766735553741455, "learning_rate": 1.2168491436760011e-06, "loss": 0.3278, "step": 38830 }, { "epoch": 0.8414404264078175, "grad_norm": 1.8029899597167969, "learning_rate": 1.2152225213477054e-06, "loss": 0.1216, "step": 38835 }, { "epoch": 0.8415487617272983, "grad_norm": 1.1854244470596313, "learning_rate": 1.2135969166042872e-06, "loss": 0.2077, "step": 38840 }, { "epoch": 0.8416570970467792, "grad_norm": 1.087823510169983, "learning_rate": 1.2119723296340424e-06, "loss": 0.2311, "step": 38845 }, { "epoch": 0.84176543236626, "grad_norm": 1.4924273490905762, "learning_rate": 1.210348760625162e-06, "loss": 0.2737, "step": 38850 }, { "epoch": 0.841873767685741, "grad_norm": 0.6151227355003357, "learning_rate": 1.208726209765706e-06, "loss": 0.1807, "step": 38855 }, { "epoch": 0.8419821030052218, "grad_norm": 1.5705305337905884, "learning_rate": 1.2071046772436246e-06, "loss": 0.351, "step": 38860 }, { "epoch": 0.8420904383247027, "grad_norm": 1.1735864877700806, "learning_rate": 1.2054841632467517e-06, "loss": 0.2167, "step": 38865 }, { "epoch": 0.8421987736441835, "grad_norm": 1.008236050605774, "learning_rate": 1.2038646679627953e-06, "loss": 0.2267, "step": 38870 }, { "epoch": 0.8423071089636643, "grad_norm": 1.2210129499435425, "learning_rate": 1.2022461915793515e-06, "loss": 0.2145, "step": 38875 }, { "epoch": 0.8424154442831452, "grad_norm": 1.163540005683899, "learning_rate": 1.2006287342838952e-06, "loss": 0.1839, "step": 38880 }, { "epoch": 0.842523779602626, "grad_norm": 1.6024236679077148, "learning_rate": 1.1990122962637872e-06, "loss": 0.2192, "step": 38885 }, { "epoch": 0.8426321149221069, "grad_norm": 1.8182042837142944, "learning_rate": 1.1973968777062662e-06, "loss": 0.1773, "step": 38890 }, { "epoch": 0.8427404502415877, "grad_norm": 1.3860135078430176, "learning_rate": 1.1957824787984508e-06, "loss": 0.2715, "step": 38895 }, { "epoch": 0.8428487855610686, "grad_norm": 1.393539547920227, "learning_rate": 1.1941690997273514e-06, "loss": 0.2759, "step": 38900 }, { "epoch": 0.8429571208805495, "grad_norm": 2.7330009937286377, "learning_rate": 1.1925567406798456e-06, "loss": 0.2841, "step": 38905 }, { "epoch": 0.8430654562000304, "grad_norm": 1.6293104887008667, "learning_rate": 1.1909454018427103e-06, "loss": 0.2971, "step": 38910 }, { "epoch": 0.8431737915195112, "grad_norm": 1.5824085474014282, "learning_rate": 1.1893350834025885e-06, "loss": 0.2389, "step": 38915 }, { "epoch": 0.8432821268389921, "grad_norm": 1.9522405862808228, "learning_rate": 1.1877257855460156e-06, "loss": 0.1153, "step": 38920 }, { "epoch": 0.8433904621584729, "grad_norm": 1.893144130706787, "learning_rate": 1.1861175084594022e-06, "loss": 0.1767, "step": 38925 }, { "epoch": 0.8434987974779538, "grad_norm": 1.2550956010818481, "learning_rate": 1.184510252329042e-06, "loss": 0.1735, "step": 38930 }, { "epoch": 0.8436071327974346, "grad_norm": 1.1269160509109497, "learning_rate": 1.1829040173411144e-06, "loss": 0.1714, "step": 38935 }, { "epoch": 0.8437154681169154, "grad_norm": 1.3690928220748901, "learning_rate": 1.1812988036816741e-06, "loss": 0.2472, "step": 38940 }, { "epoch": 0.8438238034363963, "grad_norm": 1.1332350969314575, "learning_rate": 1.1796946115366658e-06, "loss": 0.239, "step": 38945 }, { "epoch": 0.8439321387558771, "grad_norm": 1.0788075923919678, "learning_rate": 1.1780914410919075e-06, "loss": 0.2543, "step": 38950 }, { "epoch": 0.8440404740753581, "grad_norm": 1.1125872135162354, "learning_rate": 1.1764892925331018e-06, "loss": 0.264, "step": 38955 }, { "epoch": 0.8441488093948389, "grad_norm": 1.3202565908432007, "learning_rate": 1.174888166045839e-06, "loss": 0.2356, "step": 38960 }, { "epoch": 0.8442571447143198, "grad_norm": 1.1842249631881714, "learning_rate": 1.1732880618155784e-06, "loss": 0.2226, "step": 38965 }, { "epoch": 0.8443654800338006, "grad_norm": 1.504569411277771, "learning_rate": 1.1716889800276753e-06, "loss": 0.2154, "step": 38970 }, { "epoch": 0.8444738153532815, "grad_norm": 1.0843174457550049, "learning_rate": 1.1700909208673528e-06, "loss": 0.187, "step": 38975 }, { "epoch": 0.8445821506727623, "grad_norm": 0.999523937702179, "learning_rate": 1.1684938845197269e-06, "loss": 0.1353, "step": 38980 }, { "epoch": 0.8446904859922432, "grad_norm": 1.6005014181137085, "learning_rate": 1.1668978711697875e-06, "loss": 0.1692, "step": 38985 }, { "epoch": 0.844798821311724, "grad_norm": 1.6589562892913818, "learning_rate": 1.1653028810024114e-06, "loss": 0.1967, "step": 38990 }, { "epoch": 0.8449071566312049, "grad_norm": 1.3573600053787231, "learning_rate": 1.1637089142023506e-06, "loss": 0.2188, "step": 38995 }, { "epoch": 0.8450154919506858, "grad_norm": 0.8820226788520813, "learning_rate": 1.162115970954244e-06, "loss": 0.2613, "step": 39000 }, { "epoch": 0.8451238272701667, "grad_norm": 1.237420678138733, "learning_rate": 1.160524051442613e-06, "loss": 0.2522, "step": 39005 }, { "epoch": 0.8452321625896475, "grad_norm": 1.5340771675109863, "learning_rate": 1.1589331558518535e-06, "loss": 0.2016, "step": 39010 }, { "epoch": 0.8453404979091284, "grad_norm": 1.0916390419006348, "learning_rate": 1.1573432843662513e-06, "loss": 0.1811, "step": 39015 }, { "epoch": 0.8454488332286092, "grad_norm": 1.6977825164794922, "learning_rate": 1.1557544371699635e-06, "loss": 0.3817, "step": 39020 }, { "epoch": 0.84555716854809, "grad_norm": 1.7791121006011963, "learning_rate": 1.1541666144470398e-06, "loss": 0.3323, "step": 39025 }, { "epoch": 0.8456655038675709, "grad_norm": 1.942840814590454, "learning_rate": 1.1525798163814016e-06, "loss": 0.2972, "step": 39030 }, { "epoch": 0.8457738391870517, "grad_norm": 1.47977614402771, "learning_rate": 1.1509940431568588e-06, "loss": 0.183, "step": 39035 }, { "epoch": 0.8458821745065326, "grad_norm": 2.2488181591033936, "learning_rate": 1.1494092949570968e-06, "loss": 0.2496, "step": 39040 }, { "epoch": 0.8459905098260134, "grad_norm": 1.5151426792144775, "learning_rate": 1.1478255719656872e-06, "loss": 0.2441, "step": 39045 }, { "epoch": 0.8460988451454944, "grad_norm": 1.2773674726486206, "learning_rate": 1.1462428743660781e-06, "loss": 0.13, "step": 39050 }, { "epoch": 0.8462071804649752, "grad_norm": 1.897765874862671, "learning_rate": 1.1446612023416026e-06, "loss": 0.1936, "step": 39055 }, { "epoch": 0.8463155157844561, "grad_norm": 1.36177659034729, "learning_rate": 1.1430805560754765e-06, "loss": 0.2649, "step": 39060 }, { "epoch": 0.8464238511039369, "grad_norm": 1.083108901977539, "learning_rate": 1.1415009357507879e-06, "loss": 0.225, "step": 39065 }, { "epoch": 0.8465321864234178, "grad_norm": 1.0808743238449097, "learning_rate": 1.139922341550519e-06, "loss": 0.265, "step": 39070 }, { "epoch": 0.8466405217428986, "grad_norm": 1.6003522872924805, "learning_rate": 1.1383447736575193e-06, "loss": 0.2135, "step": 39075 }, { "epoch": 0.8467488570623795, "grad_norm": 2.45504093170166, "learning_rate": 1.1367682322545336e-06, "loss": 0.189, "step": 39080 }, { "epoch": 0.8468571923818603, "grad_norm": 1.0250630378723145, "learning_rate": 1.135192717524174e-06, "loss": 0.2378, "step": 39085 }, { "epoch": 0.8469655277013411, "grad_norm": 1.1061632633209229, "learning_rate": 1.1336182296489452e-06, "loss": 0.2026, "step": 39090 }, { "epoch": 0.847073863020822, "grad_norm": 1.2688496112823486, "learning_rate": 1.1320447688112269e-06, "loss": 0.3482, "step": 39095 }, { "epoch": 0.847182198340303, "grad_norm": 0.8028013706207275, "learning_rate": 1.1304723351932757e-06, "loss": 0.156, "step": 39100 }, { "epoch": 0.8472905336597838, "grad_norm": 1.697461485862732, "learning_rate": 1.1289009289772434e-06, "loss": 0.1698, "step": 39105 }, { "epoch": 0.8473988689792646, "grad_norm": 1.5394366979599, "learning_rate": 1.1273305503451471e-06, "loss": 0.1588, "step": 39110 }, { "epoch": 0.8475072042987455, "grad_norm": 1.792493462562561, "learning_rate": 1.1257611994788953e-06, "loss": 0.3025, "step": 39115 }, { "epoch": 0.8476155396182263, "grad_norm": 0.8203088641166687, "learning_rate": 1.1241928765602705e-06, "loss": 0.2593, "step": 39120 }, { "epoch": 0.8477238749377072, "grad_norm": 1.5662018060684204, "learning_rate": 1.1226255817709442e-06, "loss": 0.1817, "step": 39125 }, { "epoch": 0.847832210257188, "grad_norm": 2.1517579555511475, "learning_rate": 1.1210593152924608e-06, "loss": 0.259, "step": 39130 }, { "epoch": 0.8479405455766689, "grad_norm": 1.0582799911499023, "learning_rate": 1.119494077306248e-06, "loss": 0.1654, "step": 39135 }, { "epoch": 0.8480488808961497, "grad_norm": 1.1835038661956787, "learning_rate": 1.1179298679936168e-06, "loss": 0.1769, "step": 39140 }, { "epoch": 0.8481572162156307, "grad_norm": 1.4284487962722778, "learning_rate": 1.1163666875357538e-06, "loss": 0.2674, "step": 39145 }, { "epoch": 0.8482655515351115, "grad_norm": 1.3415262699127197, "learning_rate": 1.114804536113737e-06, "loss": 0.2613, "step": 39150 }, { "epoch": 0.8483738868545924, "grad_norm": 2.124976873397827, "learning_rate": 1.1132434139085136e-06, "loss": 0.1409, "step": 39155 }, { "epoch": 0.8484822221740732, "grad_norm": 0.8857773542404175, "learning_rate": 1.111683321100918e-06, "loss": 0.2532, "step": 39160 }, { "epoch": 0.848590557493554, "grad_norm": 1.7143200635910034, "learning_rate": 1.1101242578716608e-06, "loss": 0.2668, "step": 39165 }, { "epoch": 0.8486988928130349, "grad_norm": 0.7913036346435547, "learning_rate": 1.1085662244013407e-06, "loss": 0.1962, "step": 39170 }, { "epoch": 0.8488072281325157, "grad_norm": 1.0486669540405273, "learning_rate": 1.1070092208704286e-06, "loss": 0.2433, "step": 39175 }, { "epoch": 0.8489155634519966, "grad_norm": 1.1276975870132446, "learning_rate": 1.1054532474592805e-06, "loss": 0.2304, "step": 39180 }, { "epoch": 0.8490238987714774, "grad_norm": 1.0685901641845703, "learning_rate": 1.1038983043481345e-06, "loss": 0.2043, "step": 39185 }, { "epoch": 0.8491322340909583, "grad_norm": 1.6839795112609863, "learning_rate": 1.102344391717104e-06, "loss": 0.215, "step": 39190 }, { "epoch": 0.8492405694104392, "grad_norm": 1.3300907611846924, "learning_rate": 1.1007915097461896e-06, "loss": 0.2863, "step": 39195 }, { "epoch": 0.8493489047299201, "grad_norm": 1.3084800243377686, "learning_rate": 1.0992396586152687e-06, "loss": 0.1246, "step": 39200 }, { "epoch": 0.8494572400494009, "grad_norm": 1.2601122856140137, "learning_rate": 1.0976888385041018e-06, "loss": 0.184, "step": 39205 }, { "epoch": 0.8495655753688818, "grad_norm": 0.9064792990684509, "learning_rate": 1.0961390495923264e-06, "loss": 0.2485, "step": 39210 }, { "epoch": 0.8496739106883626, "grad_norm": 1.4660484790802002, "learning_rate": 1.0945902920594598e-06, "loss": 0.2484, "step": 39215 }, { "epoch": 0.8497822460078435, "grad_norm": 1.8092634677886963, "learning_rate": 1.0930425660849076e-06, "loss": 0.1762, "step": 39220 }, { "epoch": 0.8498905813273243, "grad_norm": 1.833999752998352, "learning_rate": 1.0914958718479452e-06, "loss": 0.2308, "step": 39225 }, { "epoch": 0.8499989166468052, "grad_norm": 1.2748284339904785, "learning_rate": 1.0899502095277393e-06, "loss": 0.2453, "step": 39230 }, { "epoch": 0.850107251966286, "grad_norm": 0.9062174558639526, "learning_rate": 1.0884055793033266e-06, "loss": 0.1431, "step": 39235 }, { "epoch": 0.850215587285767, "grad_norm": 1.7134480476379395, "learning_rate": 1.086861981353633e-06, "loss": 0.159, "step": 39240 }, { "epoch": 0.8503239226052478, "grad_norm": 1.412901759147644, "learning_rate": 1.085319415857461e-06, "loss": 0.2819, "step": 39245 }, { "epoch": 0.8504322579247287, "grad_norm": 1.3601380586624146, "learning_rate": 1.0837778829934908e-06, "loss": 0.1737, "step": 39250 }, { "epoch": 0.8505405932442095, "grad_norm": 1.016413927078247, "learning_rate": 1.0822373829402899e-06, "loss": 0.154, "step": 39255 }, { "epoch": 0.8506489285636903, "grad_norm": 1.9027199745178223, "learning_rate": 1.0806979158762976e-06, "loss": 0.2261, "step": 39260 }, { "epoch": 0.8507572638831712, "grad_norm": 0.8240925073623657, "learning_rate": 1.0791594819798435e-06, "loss": 0.1545, "step": 39265 }, { "epoch": 0.850865599202652, "grad_norm": 1.3310139179229736, "learning_rate": 1.0776220814291272e-06, "loss": 0.2232, "step": 39270 }, { "epoch": 0.8509739345221329, "grad_norm": 1.6839474439620972, "learning_rate": 1.0760857144022373e-06, "loss": 0.2349, "step": 39275 }, { "epoch": 0.8510822698416137, "grad_norm": 1.452662706375122, "learning_rate": 1.0745503810771352e-06, "loss": 0.2626, "step": 39280 }, { "epoch": 0.8511906051610946, "grad_norm": 1.5837090015411377, "learning_rate": 1.0730160816316692e-06, "loss": 0.219, "step": 39285 }, { "epoch": 0.8512989404805755, "grad_norm": 1.13394033908844, "learning_rate": 1.071482816243563e-06, "loss": 0.2814, "step": 39290 }, { "epoch": 0.8514072758000564, "grad_norm": 1.7575557231903076, "learning_rate": 1.0699505850904234e-06, "loss": 0.1799, "step": 39295 }, { "epoch": 0.8515156111195372, "grad_norm": 1.7564445734024048, "learning_rate": 1.0684193883497385e-06, "loss": 0.2002, "step": 39300 }, { "epoch": 0.8516239464390181, "grad_norm": 1.4802526235580444, "learning_rate": 1.0668892261988706e-06, "loss": 0.2367, "step": 39305 }, { "epoch": 0.8517322817584989, "grad_norm": 1.756943702697754, "learning_rate": 1.0653600988150692e-06, "loss": 0.2021, "step": 39310 }, { "epoch": 0.8518406170779798, "grad_norm": 1.1686675548553467, "learning_rate": 1.063832006375457e-06, "loss": 0.1725, "step": 39315 }, { "epoch": 0.8519489523974606, "grad_norm": 1.3640233278274536, "learning_rate": 1.0623049490570458e-06, "loss": 0.2556, "step": 39320 }, { "epoch": 0.8520572877169414, "grad_norm": 1.721057415008545, "learning_rate": 1.0607789270367176e-06, "loss": 0.282, "step": 39325 }, { "epoch": 0.8521656230364223, "grad_norm": 1.1353644132614136, "learning_rate": 1.0592539404912426e-06, "loss": 0.1993, "step": 39330 }, { "epoch": 0.8522739583559031, "grad_norm": 1.3503668308258057, "learning_rate": 1.0577299895972648e-06, "loss": 0.1927, "step": 39335 }, { "epoch": 0.8523822936753841, "grad_norm": 1.0752902030944824, "learning_rate": 1.0562070745313124e-06, "loss": 0.1529, "step": 39340 }, { "epoch": 0.8524906289948649, "grad_norm": 1.5716451406478882, "learning_rate": 1.0546851954697946e-06, "loss": 0.1572, "step": 39345 }, { "epoch": 0.8525989643143458, "grad_norm": 0.9596091508865356, "learning_rate": 1.0531643525889945e-06, "loss": 0.1665, "step": 39350 }, { "epoch": 0.8527072996338266, "grad_norm": 1.8782328367233276, "learning_rate": 1.0516445460650814e-06, "loss": 0.1447, "step": 39355 }, { "epoch": 0.8528156349533075, "grad_norm": 1.2484049797058105, "learning_rate": 1.0501257760741002e-06, "loss": 0.2973, "step": 39360 }, { "epoch": 0.8529239702727883, "grad_norm": 1.3559889793395996, "learning_rate": 1.0486080427919798e-06, "loss": 0.2737, "step": 39365 }, { "epoch": 0.8530323055922692, "grad_norm": 1.597240686416626, "learning_rate": 1.0470913463945243e-06, "loss": 0.2162, "step": 39370 }, { "epoch": 0.85314064091175, "grad_norm": 1.4205068349838257, "learning_rate": 1.0455756870574242e-06, "loss": 0.2527, "step": 39375 }, { "epoch": 0.8532489762312309, "grad_norm": 1.3749167919158936, "learning_rate": 1.0440610649562433e-06, "loss": 0.2006, "step": 39380 }, { "epoch": 0.8533573115507118, "grad_norm": 1.5535379648208618, "learning_rate": 1.0425474802664237e-06, "loss": 0.1553, "step": 39385 }, { "epoch": 0.8534656468701927, "grad_norm": 1.1129777431488037, "learning_rate": 1.0410349331633008e-06, "loss": 0.2722, "step": 39390 }, { "epoch": 0.8535739821896735, "grad_norm": 1.2272884845733643, "learning_rate": 1.0395234238220718e-06, "loss": 0.1482, "step": 39395 }, { "epoch": 0.8536823175091544, "grad_norm": 1.6035836935043335, "learning_rate": 1.0380129524178295e-06, "loss": 0.2265, "step": 39400 }, { "epoch": 0.8537906528286352, "grad_norm": 1.7024180889129639, "learning_rate": 1.0365035191255346e-06, "loss": 0.2882, "step": 39405 }, { "epoch": 0.853898988148116, "grad_norm": 1.0487672090530396, "learning_rate": 1.034995124120035e-06, "loss": 0.1865, "step": 39410 }, { "epoch": 0.8540073234675969, "grad_norm": 0.8482322096824646, "learning_rate": 1.0334877675760545e-06, "loss": 0.197, "step": 39415 }, { "epoch": 0.8541156587870777, "grad_norm": 1.2660808563232422, "learning_rate": 1.0319814496681957e-06, "loss": 0.1884, "step": 39420 }, { "epoch": 0.8542239941065586, "grad_norm": 1.5904021263122559, "learning_rate": 1.0304761705709477e-06, "loss": 0.2104, "step": 39425 }, { "epoch": 0.8543323294260394, "grad_norm": 1.3822540044784546, "learning_rate": 1.0289719304586688e-06, "loss": 0.2286, "step": 39430 }, { "epoch": 0.8544406647455204, "grad_norm": 0.6044918894767761, "learning_rate": 1.0274687295056063e-06, "loss": 0.2079, "step": 39435 }, { "epoch": 0.8545490000650012, "grad_norm": 1.7929375171661377, "learning_rate": 1.0259665678858831e-06, "loss": 0.2208, "step": 39440 }, { "epoch": 0.8546573353844821, "grad_norm": 1.2939860820770264, "learning_rate": 1.0244654457735048e-06, "loss": 0.2149, "step": 39445 }, { "epoch": 0.8547656707039629, "grad_norm": 1.2009142637252808, "learning_rate": 1.0229653633423498e-06, "loss": 0.2168, "step": 39450 }, { "epoch": 0.8548740060234438, "grad_norm": 0.8083229660987854, "learning_rate": 1.0214663207661802e-06, "loss": 0.127, "step": 39455 }, { "epoch": 0.8549823413429246, "grad_norm": 0.9526703357696533, "learning_rate": 1.0199683182186404e-06, "loss": 0.1926, "step": 39460 }, { "epoch": 0.8550906766624055, "grad_norm": 1.1004680395126343, "learning_rate": 1.0184713558732484e-06, "loss": 0.3465, "step": 39465 }, { "epoch": 0.8551990119818863, "grad_norm": 1.2432104349136353, "learning_rate": 1.0169754339034088e-06, "loss": 0.2163, "step": 39470 }, { "epoch": 0.8553073473013671, "grad_norm": 1.6972709894180298, "learning_rate": 1.0154805524823974e-06, "loss": 0.2081, "step": 39475 }, { "epoch": 0.855415682620848, "grad_norm": 1.3151540756225586, "learning_rate": 1.0139867117833769e-06, "loss": 0.201, "step": 39480 }, { "epoch": 0.855524017940329, "grad_norm": 1.0463201999664307, "learning_rate": 1.0124939119793843e-06, "loss": 0.1732, "step": 39485 }, { "epoch": 0.8556323532598098, "grad_norm": 1.3466525077819824, "learning_rate": 1.011002153243339e-06, "loss": 0.2089, "step": 39490 }, { "epoch": 0.8557406885792906, "grad_norm": 1.2046951055526733, "learning_rate": 1.0095114357480418e-06, "loss": 0.2224, "step": 39495 }, { "epoch": 0.8558490238987715, "grad_norm": 1.1063814163208008, "learning_rate": 1.0080217596661645e-06, "loss": 0.1769, "step": 39500 }, { "epoch": 0.8559573592182523, "grad_norm": 0.961571991443634, "learning_rate": 1.006533125170268e-06, "loss": 0.2113, "step": 39505 }, { "epoch": 0.8560656945377332, "grad_norm": 0.8625065684318542, "learning_rate": 1.0050455324327857e-06, "loss": 0.2316, "step": 39510 }, { "epoch": 0.856174029857214, "grad_norm": 1.5943683385849, "learning_rate": 1.003558981626036e-06, "loss": 0.2473, "step": 39515 }, { "epoch": 0.8562823651766949, "grad_norm": 1.1798491477966309, "learning_rate": 1.0020734729222093e-06, "loss": 0.1738, "step": 39520 }, { "epoch": 0.8563907004961757, "grad_norm": 1.2329351902008057, "learning_rate": 1.0005890064933833e-06, "loss": 0.2468, "step": 39525 }, { "epoch": 0.8564990358156567, "grad_norm": 1.467771053314209, "learning_rate": 9.991055825115082e-07, "loss": 0.2589, "step": 39530 }, { "epoch": 0.8566073711351375, "grad_norm": 0.9980409741401672, "learning_rate": 9.976232011484188e-07, "loss": 0.2432, "step": 39535 }, { "epoch": 0.8567157064546184, "grad_norm": 1.8991119861602783, "learning_rate": 9.961418625758269e-07, "loss": 0.1763, "step": 39540 }, { "epoch": 0.8568240417740992, "grad_norm": 1.9028706550598145, "learning_rate": 9.946615669653204e-07, "loss": 0.2586, "step": 39545 }, { "epoch": 0.85693237709358, "grad_norm": 1.3933022022247314, "learning_rate": 9.931823144883745e-07, "loss": 0.1436, "step": 39550 }, { "epoch": 0.8570407124130609, "grad_norm": 1.4408483505249023, "learning_rate": 9.917041053163322e-07, "loss": 0.2498, "step": 39555 }, { "epoch": 0.8571490477325417, "grad_norm": 2.4894559383392334, "learning_rate": 9.902269396204278e-07, "loss": 0.1993, "step": 39560 }, { "epoch": 0.8572573830520226, "grad_norm": 1.427499532699585, "learning_rate": 9.88750817571763e-07, "loss": 0.142, "step": 39565 }, { "epoch": 0.8573657183715034, "grad_norm": 0.8079016804695129, "learning_rate": 9.872757393413302e-07, "loss": 0.2114, "step": 39570 }, { "epoch": 0.8574740536909843, "grad_norm": 1.2302604913711548, "learning_rate": 9.858017050999902e-07, "loss": 0.2147, "step": 39575 }, { "epoch": 0.8575823890104652, "grad_norm": 0.9720746874809265, "learning_rate": 9.84328715018491e-07, "loss": 0.132, "step": 39580 }, { "epoch": 0.8576907243299461, "grad_norm": 1.560807228088379, "learning_rate": 9.828567692674563e-07, "loss": 0.1908, "step": 39585 }, { "epoch": 0.8577990596494269, "grad_norm": 1.0807996988296509, "learning_rate": 9.81385868017386e-07, "loss": 0.2381, "step": 39590 }, { "epoch": 0.8579073949689078, "grad_norm": 1.25816810131073, "learning_rate": 9.799160114386664e-07, "loss": 0.2504, "step": 39595 }, { "epoch": 0.8580157302883886, "grad_norm": 0.8871785402297974, "learning_rate": 9.784471997015542e-07, "loss": 0.1389, "step": 39600 }, { "epoch": 0.8581240656078695, "grad_norm": 1.5166168212890625, "learning_rate": 9.769794329761928e-07, "loss": 0.2104, "step": 39605 }, { "epoch": 0.8582324009273503, "grad_norm": 1.1806350946426392, "learning_rate": 9.75512711432598e-07, "loss": 0.2685, "step": 39610 }, { "epoch": 0.8583407362468312, "grad_norm": 1.7206209897994995, "learning_rate": 9.740470352406695e-07, "loss": 0.2642, "step": 39615 }, { "epoch": 0.858449071566312, "grad_norm": 1.4271721839904785, "learning_rate": 9.725824045701838e-07, "loss": 0.216, "step": 39620 }, { "epoch": 0.8585574068857929, "grad_norm": 1.3472269773483276, "learning_rate": 9.71118819590794e-07, "loss": 0.19, "step": 39625 }, { "epoch": 0.8586657422052738, "grad_norm": 1.6707175970077515, "learning_rate": 9.696562804720367e-07, "loss": 0.1701, "step": 39630 }, { "epoch": 0.8587740775247547, "grad_norm": 1.3326178789138794, "learning_rate": 9.681947873833243e-07, "loss": 0.2153, "step": 39635 }, { "epoch": 0.8588824128442355, "grad_norm": 2.2141289710998535, "learning_rate": 9.667343404939511e-07, "loss": 0.2752, "step": 39640 }, { "epoch": 0.8589907481637163, "grad_norm": 1.5336703062057495, "learning_rate": 9.652749399730843e-07, "loss": 0.217, "step": 39645 }, { "epoch": 0.8590990834831972, "grad_norm": 0.7434677481651306, "learning_rate": 9.638165859897774e-07, "loss": 0.1982, "step": 39650 }, { "epoch": 0.859207418802678, "grad_norm": 2.383875608444214, "learning_rate": 9.623592787129576e-07, "loss": 0.2279, "step": 39655 }, { "epoch": 0.8593157541221589, "grad_norm": 1.4388465881347656, "learning_rate": 9.609030183114287e-07, "loss": 0.2656, "step": 39660 }, { "epoch": 0.8594240894416397, "grad_norm": 1.1504262685775757, "learning_rate": 9.594478049538824e-07, "loss": 0.2161, "step": 39665 }, { "epoch": 0.8595324247611206, "grad_norm": 1.7808568477630615, "learning_rate": 9.579936388088773e-07, "loss": 0.1668, "step": 39670 }, { "epoch": 0.8596407600806015, "grad_norm": 1.649075984954834, "learning_rate": 9.565405200448607e-07, "loss": 0.3153, "step": 39675 }, { "epoch": 0.8597490954000824, "grad_norm": 1.3056718111038208, "learning_rate": 9.550884488301537e-07, "loss": 0.2576, "step": 39680 }, { "epoch": 0.8598574307195632, "grad_norm": 1.1008347272872925, "learning_rate": 9.536374253329594e-07, "loss": 0.2677, "step": 39685 }, { "epoch": 0.8599657660390441, "grad_norm": 1.6663457155227661, "learning_rate": 9.521874497213546e-07, "loss": 0.2667, "step": 39690 }, { "epoch": 0.8600741013585249, "grad_norm": 1.090635061264038, "learning_rate": 9.507385221632959e-07, "loss": 0.2357, "step": 39695 }, { "epoch": 0.8601824366780058, "grad_norm": 1.6779423952102661, "learning_rate": 9.492906428266224e-07, "loss": 0.2445, "step": 39700 }, { "epoch": 0.8602907719974866, "grad_norm": 0.6211152076721191, "learning_rate": 9.478438118790467e-07, "loss": 0.1976, "step": 39705 }, { "epoch": 0.8603991073169674, "grad_norm": 1.441970705986023, "learning_rate": 9.463980294881669e-07, "loss": 0.3091, "step": 39710 }, { "epoch": 0.8605074426364483, "grad_norm": 1.670896291732788, "learning_rate": 9.449532958214503e-07, "loss": 0.3328, "step": 39715 }, { "epoch": 0.8606157779559291, "grad_norm": 2.6255269050598145, "learning_rate": 9.435096110462516e-07, "loss": 0.2956, "step": 39720 }, { "epoch": 0.8607241132754101, "grad_norm": 1.2083781957626343, "learning_rate": 9.42066975329795e-07, "loss": 0.225, "step": 39725 }, { "epoch": 0.8608324485948909, "grad_norm": 1.0666087865829468, "learning_rate": 9.406253888391937e-07, "loss": 0.1964, "step": 39730 }, { "epoch": 0.8609407839143718, "grad_norm": 0.6534349322319031, "learning_rate": 9.391848517414315e-07, "loss": 0.2107, "step": 39735 }, { "epoch": 0.8610491192338526, "grad_norm": 1.4619977474212646, "learning_rate": 9.377453642033729e-07, "loss": 0.1479, "step": 39740 }, { "epoch": 0.8611574545533335, "grad_norm": 1.3975286483764648, "learning_rate": 9.363069263917623e-07, "loss": 0.2019, "step": 39745 }, { "epoch": 0.8612657898728143, "grad_norm": 1.9949959516525269, "learning_rate": 9.348695384732188e-07, "loss": 0.248, "step": 39750 }, { "epoch": 0.8613741251922952, "grad_norm": 0.6937990188598633, "learning_rate": 9.334332006142455e-07, "loss": 0.2333, "step": 39755 }, { "epoch": 0.861482460511776, "grad_norm": 1.4264631271362305, "learning_rate": 9.319979129812162e-07, "loss": 0.2029, "step": 39760 }, { "epoch": 0.8615907958312569, "grad_norm": 1.1472505331039429, "learning_rate": 9.305636757403924e-07, "loss": 0.1927, "step": 39765 }, { "epoch": 0.8616991311507378, "grad_norm": 0.97357177734375, "learning_rate": 9.291304890579045e-07, "loss": 0.254, "step": 39770 }, { "epoch": 0.8618074664702187, "grad_norm": 1.6446257829666138, "learning_rate": 9.276983530997685e-07, "loss": 0.2166, "step": 39775 }, { "epoch": 0.8619158017896995, "grad_norm": 1.9861843585968018, "learning_rate": 9.262672680318763e-07, "loss": 0.2694, "step": 39780 }, { "epoch": 0.8620241371091804, "grad_norm": 1.2778377532958984, "learning_rate": 9.248372340199952e-07, "loss": 0.2126, "step": 39785 }, { "epoch": 0.8621324724286612, "grad_norm": 1.1643952131271362, "learning_rate": 9.23408251229776e-07, "loss": 0.1777, "step": 39790 }, { "epoch": 0.862240807748142, "grad_norm": 1.1859256029129028, "learning_rate": 9.219803198267418e-07, "loss": 0.2232, "step": 39795 }, { "epoch": 0.8623491430676229, "grad_norm": 1.2834532260894775, "learning_rate": 9.205534399763016e-07, "loss": 0.203, "step": 39800 }, { "epoch": 0.8624574783871037, "grad_norm": 1.74850594997406, "learning_rate": 9.19127611843732e-07, "loss": 0.1726, "step": 39805 }, { "epoch": 0.8625658137065846, "grad_norm": 2.743964910507202, "learning_rate": 9.177028355941997e-07, "loss": 0.2366, "step": 39810 }, { "epoch": 0.8626741490260654, "grad_norm": 1.1660264730453491, "learning_rate": 9.162791113927394e-07, "loss": 0.2436, "step": 39815 }, { "epoch": 0.8627824843455464, "grad_norm": 1.6207914352416992, "learning_rate": 9.148564394042703e-07, "loss": 0.2397, "step": 39820 }, { "epoch": 0.8628908196650272, "grad_norm": 1.3527623414993286, "learning_rate": 9.134348197935882e-07, "loss": 0.224, "step": 39825 }, { "epoch": 0.8629991549845081, "grad_norm": 1.6549710035324097, "learning_rate": 9.120142527253639e-07, "loss": 0.2066, "step": 39830 }, { "epoch": 0.8631074903039889, "grad_norm": 1.361920714378357, "learning_rate": 9.105947383641523e-07, "loss": 0.2453, "step": 39835 }, { "epoch": 0.8632158256234698, "grad_norm": 1.080069661140442, "learning_rate": 9.091762768743794e-07, "loss": 0.2218, "step": 39840 }, { "epoch": 0.8633241609429506, "grad_norm": 1.3967485427856445, "learning_rate": 9.077588684203553e-07, "loss": 0.2587, "step": 39845 }, { "epoch": 0.8634324962624315, "grad_norm": 1.58976411819458, "learning_rate": 9.06342513166264e-07, "loss": 0.2775, "step": 39850 }, { "epoch": 0.8635408315819123, "grad_norm": 1.3089525699615479, "learning_rate": 9.049272112761697e-07, "loss": 0.1569, "step": 39855 }, { "epoch": 0.8636491669013932, "grad_norm": 2.784487724304199, "learning_rate": 9.03512962914015e-07, "loss": 0.2086, "step": 39860 }, { "epoch": 0.863757502220874, "grad_norm": 1.2097277641296387, "learning_rate": 9.020997682436161e-07, "loss": 0.1452, "step": 39865 }, { "epoch": 0.863865837540355, "grad_norm": 1.413551688194275, "learning_rate": 9.006876274286725e-07, "loss": 0.257, "step": 39870 }, { "epoch": 0.8639741728598358, "grad_norm": 1.8329622745513916, "learning_rate": 8.992765406327597e-07, "loss": 0.236, "step": 39875 }, { "epoch": 0.8640825081793166, "grad_norm": 1.1992665529251099, "learning_rate": 8.978665080193328e-07, "loss": 0.1844, "step": 39880 }, { "epoch": 0.8641908434987975, "grad_norm": 1.3572912216186523, "learning_rate": 8.964575297517187e-07, "loss": 0.2263, "step": 39885 }, { "epoch": 0.8642991788182783, "grad_norm": 1.4983582496643066, "learning_rate": 8.950496059931313e-07, "loss": 0.2492, "step": 39890 }, { "epoch": 0.8644075141377592, "grad_norm": 1.7999846935272217, "learning_rate": 8.936427369066536e-07, "loss": 0.1896, "step": 39895 }, { "epoch": 0.86451584945724, "grad_norm": 1.0496708154678345, "learning_rate": 8.922369226552507e-07, "loss": 0.1659, "step": 39900 }, { "epoch": 0.8646241847767209, "grad_norm": 1.482011318206787, "learning_rate": 8.908321634017681e-07, "loss": 0.1569, "step": 39905 }, { "epoch": 0.8647325200962017, "grad_norm": 0.6057774424552917, "learning_rate": 8.894284593089219e-07, "loss": 0.2502, "step": 39910 }, { "epoch": 0.8648408554156827, "grad_norm": 0.585780143737793, "learning_rate": 8.880258105393125e-07, "loss": 0.1755, "step": 39915 }, { "epoch": 0.8649491907351635, "grad_norm": 1.7848533391952515, "learning_rate": 8.866242172554151e-07, "loss": 0.2236, "step": 39920 }, { "epoch": 0.8650575260546444, "grad_norm": 1.1320792436599731, "learning_rate": 8.852236796195857e-07, "loss": 0.1634, "step": 39925 }, { "epoch": 0.8651658613741252, "grad_norm": 1.224444031715393, "learning_rate": 8.838241977940542e-07, "loss": 0.2007, "step": 39930 }, { "epoch": 0.8652741966936061, "grad_norm": 1.5580476522445679, "learning_rate": 8.824257719409269e-07, "loss": 0.2469, "step": 39935 }, { "epoch": 0.8653825320130869, "grad_norm": 1.3142989873886108, "learning_rate": 8.810284022221938e-07, "loss": 0.1857, "step": 39940 }, { "epoch": 0.8654908673325677, "grad_norm": 1.1828713417053223, "learning_rate": 8.796320887997167e-07, "loss": 0.2175, "step": 39945 }, { "epoch": 0.8655992026520486, "grad_norm": 1.2434468269348145, "learning_rate": 8.782368318352419e-07, "loss": 0.1588, "step": 39950 }, { "epoch": 0.8657075379715294, "grad_norm": 1.6234461069107056, "learning_rate": 8.768426314903832e-07, "loss": 0.2593, "step": 39955 }, { "epoch": 0.8658158732910103, "grad_norm": 0.707768976688385, "learning_rate": 8.754494879266429e-07, "loss": 0.1373, "step": 39960 }, { "epoch": 0.8659242086104912, "grad_norm": 1.4693794250488281, "learning_rate": 8.740574013053916e-07, "loss": 0.3457, "step": 39965 }, { "epoch": 0.8660325439299721, "grad_norm": 1.352269172668457, "learning_rate": 8.726663717878848e-07, "loss": 0.2528, "step": 39970 }, { "epoch": 0.8661408792494529, "grad_norm": 1.1784740686416626, "learning_rate": 8.712763995352513e-07, "loss": 0.2194, "step": 39975 }, { "epoch": 0.8662492145689338, "grad_norm": 0.909954845905304, "learning_rate": 8.698874847084981e-07, "loss": 0.171, "step": 39980 }, { "epoch": 0.8663575498884146, "grad_norm": 1.1597346067428589, "learning_rate": 8.68499627468512e-07, "loss": 0.2326, "step": 39985 }, { "epoch": 0.8664658852078955, "grad_norm": 0.36621806025505066, "learning_rate": 8.671128279760532e-07, "loss": 0.1317, "step": 39990 }, { "epoch": 0.8665742205273763, "grad_norm": 1.394519567489624, "learning_rate": 8.657270863917644e-07, "loss": 0.2494, "step": 39995 }, { "epoch": 0.8666825558468572, "grad_norm": 1.491127848625183, "learning_rate": 8.643424028761582e-07, "loss": 0.2773, "step": 40000 }, { "epoch": 0.866790891166338, "grad_norm": 1.50186026096344, "learning_rate": 8.629587775896353e-07, "loss": 0.1898, "step": 40005 }, { "epoch": 0.8668992264858189, "grad_norm": 1.2659657001495361, "learning_rate": 8.61576210692463e-07, "loss": 0.2214, "step": 40010 }, { "epoch": 0.8670075618052998, "grad_norm": 1.5629082918167114, "learning_rate": 8.601947023447932e-07, "loss": 0.1597, "step": 40015 }, { "epoch": 0.8671158971247807, "grad_norm": 1.048008680343628, "learning_rate": 8.588142527066546e-07, "loss": 0.145, "step": 40020 }, { "epoch": 0.8672242324442615, "grad_norm": 1.4199419021606445, "learning_rate": 8.57434861937948e-07, "loss": 0.238, "step": 40025 }, { "epoch": 0.8673325677637423, "grad_norm": 0.6166799068450928, "learning_rate": 8.560565301984591e-07, "loss": 0.1041, "step": 40030 }, { "epoch": 0.8674409030832232, "grad_norm": 1.3377585411071777, "learning_rate": 8.546792576478435e-07, "loss": 0.3404, "step": 40035 }, { "epoch": 0.867549238402704, "grad_norm": 0.907978355884552, "learning_rate": 8.533030444456403e-07, "loss": 0.2947, "step": 40040 }, { "epoch": 0.8676575737221849, "grad_norm": 1.0495576858520508, "learning_rate": 8.519278907512596e-07, "loss": 0.2374, "step": 40045 }, { "epoch": 0.8677659090416657, "grad_norm": 1.8447575569152832, "learning_rate": 8.505537967239974e-07, "loss": 0.2014, "step": 40050 }, { "epoch": 0.8678742443611466, "grad_norm": 1.6006580591201782, "learning_rate": 8.491807625230164e-07, "loss": 0.1962, "step": 40055 }, { "epoch": 0.8679825796806275, "grad_norm": 2.0792551040649414, "learning_rate": 8.47808788307366e-07, "loss": 0.2064, "step": 40060 }, { "epoch": 0.8680909150001084, "grad_norm": 2.6081554889678955, "learning_rate": 8.46437874235968e-07, "loss": 0.2743, "step": 40065 }, { "epoch": 0.8681992503195892, "grad_norm": 0.9813966751098633, "learning_rate": 8.45068020467621e-07, "loss": 0.1719, "step": 40070 }, { "epoch": 0.8683075856390701, "grad_norm": 1.4451606273651123, "learning_rate": 8.436992271610045e-07, "loss": 0.2803, "step": 40075 }, { "epoch": 0.8684159209585509, "grad_norm": 1.6791915893554688, "learning_rate": 8.423314944746697e-07, "loss": 0.1871, "step": 40080 }, { "epoch": 0.8685242562780318, "grad_norm": 1.167358160018921, "learning_rate": 8.409648225670508e-07, "loss": 0.2271, "step": 40085 }, { "epoch": 0.8686325915975126, "grad_norm": 1.7530124187469482, "learning_rate": 8.395992115964536e-07, "loss": 0.2852, "step": 40090 }, { "epoch": 0.8687409269169935, "grad_norm": 1.4092953205108643, "learning_rate": 8.382346617210668e-07, "loss": 0.1924, "step": 40095 }, { "epoch": 0.8688492622364743, "grad_norm": 1.5187137126922607, "learning_rate": 8.368711730989499e-07, "loss": 0.3006, "step": 40100 }, { "epoch": 0.8689575975559551, "grad_norm": 1.1907587051391602, "learning_rate": 8.35508745888044e-07, "loss": 0.2345, "step": 40105 }, { "epoch": 0.8690659328754361, "grad_norm": 1.2839680910110474, "learning_rate": 8.341473802461642e-07, "loss": 0.2334, "step": 40110 }, { "epoch": 0.8691742681949169, "grad_norm": 1.403978705406189, "learning_rate": 8.327870763310064e-07, "loss": 0.2762, "step": 40115 }, { "epoch": 0.8692826035143978, "grad_norm": 0.647908627986908, "learning_rate": 8.314278343001436e-07, "loss": 0.1663, "step": 40120 }, { "epoch": 0.8693909388338786, "grad_norm": 1.9843037128448486, "learning_rate": 8.300696543110176e-07, "loss": 0.1805, "step": 40125 }, { "epoch": 0.8694992741533595, "grad_norm": 1.8993072509765625, "learning_rate": 8.287125365209603e-07, "loss": 0.2561, "step": 40130 }, { "epoch": 0.8696076094728403, "grad_norm": 1.0252083539962769, "learning_rate": 8.273564810871682e-07, "loss": 0.1799, "step": 40135 }, { "epoch": 0.8697159447923212, "grad_norm": 1.13852059841156, "learning_rate": 8.2600148816672e-07, "loss": 0.2515, "step": 40140 }, { "epoch": 0.869824280111802, "grad_norm": 1.0725691318511963, "learning_rate": 8.246475579165758e-07, "loss": 0.2939, "step": 40145 }, { "epoch": 0.8699326154312829, "grad_norm": 1.4447388648986816, "learning_rate": 8.232946904935623e-07, "loss": 0.1759, "step": 40150 }, { "epoch": 0.8700409507507637, "grad_norm": 1.6553778648376465, "learning_rate": 8.219428860543943e-07, "loss": 0.2166, "step": 40155 }, { "epoch": 0.8701492860702447, "grad_norm": 2.202697515487671, "learning_rate": 8.20592144755652e-07, "loss": 0.2265, "step": 40160 }, { "epoch": 0.8702576213897255, "grad_norm": 1.2336242198944092, "learning_rate": 8.192424667538057e-07, "loss": 0.1834, "step": 40165 }, { "epoch": 0.8703659567092064, "grad_norm": 1.742789387702942, "learning_rate": 8.178938522051904e-07, "loss": 0.2812, "step": 40170 }, { "epoch": 0.8704742920286872, "grad_norm": 1.8706847429275513, "learning_rate": 8.165463012660257e-07, "loss": 0.3081, "step": 40175 }, { "epoch": 0.870582627348168, "grad_norm": 1.3151758909225464, "learning_rate": 8.151998140924034e-07, "loss": 0.2783, "step": 40180 }, { "epoch": 0.8706909626676489, "grad_norm": 1.336273431777954, "learning_rate": 8.13854390840293e-07, "loss": 0.2479, "step": 40185 }, { "epoch": 0.8707992979871297, "grad_norm": 0.9584859609603882, "learning_rate": 8.125100316655455e-07, "loss": 0.2577, "step": 40190 }, { "epoch": 0.8709076333066106, "grad_norm": 1.0893831253051758, "learning_rate": 8.111667367238795e-07, "loss": 0.2205, "step": 40195 }, { "epoch": 0.8710159686260914, "grad_norm": 1.4224110841751099, "learning_rate": 8.098245061709009e-07, "loss": 0.1419, "step": 40200 }, { "epoch": 0.8711243039455724, "grad_norm": 1.7578363418579102, "learning_rate": 8.084833401620806e-07, "loss": 0.2073, "step": 40205 }, { "epoch": 0.8712326392650532, "grad_norm": 1.7187963724136353, "learning_rate": 8.071432388527789e-07, "loss": 0.2659, "step": 40210 }, { "epoch": 0.8713409745845341, "grad_norm": 1.2838492393493652, "learning_rate": 8.05804202398226e-07, "loss": 0.1955, "step": 40215 }, { "epoch": 0.8714493099040149, "grad_norm": 2.491318464279175, "learning_rate": 8.044662309535234e-07, "loss": 0.272, "step": 40220 }, { "epoch": 0.8715576452234958, "grad_norm": 1.145033597946167, "learning_rate": 8.031293246736616e-07, "loss": 0.3021, "step": 40225 }, { "epoch": 0.8716659805429766, "grad_norm": 1.564808964729309, "learning_rate": 8.017934837134967e-07, "loss": 0.2344, "step": 40230 }, { "epoch": 0.8717743158624575, "grad_norm": 0.8993708491325378, "learning_rate": 8.004587082277693e-07, "loss": 0.2403, "step": 40235 }, { "epoch": 0.8718826511819383, "grad_norm": 1.0613917112350464, "learning_rate": 7.991249983710903e-07, "loss": 0.1446, "step": 40240 }, { "epoch": 0.8719909865014192, "grad_norm": 1.2339435815811157, "learning_rate": 7.977923542979516e-07, "loss": 0.2213, "step": 40245 }, { "epoch": 0.8720993218209, "grad_norm": 1.9117990732192993, "learning_rate": 7.964607761627186e-07, "loss": 0.2382, "step": 40250 }, { "epoch": 0.872207657140381, "grad_norm": 1.7957961559295654, "learning_rate": 7.951302641196357e-07, "loss": 0.2717, "step": 40255 }, { "epoch": 0.8723159924598618, "grad_norm": 1.4124382734298706, "learning_rate": 7.938008183228241e-07, "loss": 0.191, "step": 40260 }, { "epoch": 0.8724243277793426, "grad_norm": 1.956977128982544, "learning_rate": 7.924724389262784e-07, "loss": 0.2778, "step": 40265 }, { "epoch": 0.8725326630988235, "grad_norm": 2.0297632217407227, "learning_rate": 7.911451260838721e-07, "loss": 0.2504, "step": 40270 }, { "epoch": 0.8726409984183043, "grad_norm": 1.0175257921218872, "learning_rate": 7.898188799493534e-07, "loss": 0.1919, "step": 40275 }, { "epoch": 0.8727493337377852, "grad_norm": 2.294792652130127, "learning_rate": 7.884937006763505e-07, "loss": 0.1844, "step": 40280 }, { "epoch": 0.872857669057266, "grad_norm": 2.3968498706817627, "learning_rate": 7.871695884183617e-07, "loss": 0.2314, "step": 40285 }, { "epoch": 0.8729660043767469, "grad_norm": 1.398788332939148, "learning_rate": 7.858465433287698e-07, "loss": 0.2466, "step": 40290 }, { "epoch": 0.8730743396962277, "grad_norm": 0.8477714657783508, "learning_rate": 7.845245655608269e-07, "loss": 0.2033, "step": 40295 }, { "epoch": 0.8731826750157087, "grad_norm": 1.3724291324615479, "learning_rate": 7.832036552676647e-07, "loss": 0.2305, "step": 40300 }, { "epoch": 0.8732910103351895, "grad_norm": 1.0076112747192383, "learning_rate": 7.818838126022932e-07, "loss": 0.2265, "step": 40305 }, { "epoch": 0.8733993456546704, "grad_norm": 0.9116782546043396, "learning_rate": 7.805650377175933e-07, "loss": 0.1751, "step": 40310 }, { "epoch": 0.8735076809741512, "grad_norm": 1.5268919467926025, "learning_rate": 7.792473307663273e-07, "loss": 0.289, "step": 40315 }, { "epoch": 0.8736160162936321, "grad_norm": 2.6700069904327393, "learning_rate": 7.779306919011309e-07, "loss": 0.2145, "step": 40320 }, { "epoch": 0.8737243516131129, "grad_norm": 2.113666296005249, "learning_rate": 7.766151212745177e-07, "loss": 0.1908, "step": 40325 }, { "epoch": 0.8738326869325937, "grad_norm": 1.8732589483261108, "learning_rate": 7.753006190388757e-07, "loss": 0.1809, "step": 40330 }, { "epoch": 0.8739410222520746, "grad_norm": 1.3830020427703857, "learning_rate": 7.73987185346472e-07, "loss": 0.2093, "step": 40335 }, { "epoch": 0.8740493575715554, "grad_norm": 2.709035873413086, "learning_rate": 7.72674820349446e-07, "loss": 0.2105, "step": 40340 }, { "epoch": 0.8741576928910363, "grad_norm": 1.4892598390579224, "learning_rate": 7.713635241998174e-07, "loss": 0.191, "step": 40345 }, { "epoch": 0.8742660282105172, "grad_norm": 1.427977204322815, "learning_rate": 7.700532970494789e-07, "loss": 0.1607, "step": 40350 }, { "epoch": 0.8743743635299981, "grad_norm": 2.1152231693267822, "learning_rate": 7.687441390502015e-07, "loss": 0.2006, "step": 40355 }, { "epoch": 0.8744826988494789, "grad_norm": 1.4503344297409058, "learning_rate": 7.674360503536326e-07, "loss": 0.2253, "step": 40360 }, { "epoch": 0.8745910341689598, "grad_norm": 1.1550120115280151, "learning_rate": 7.661290311112913e-07, "loss": 0.2311, "step": 40365 }, { "epoch": 0.8746993694884406, "grad_norm": 1.8023850917816162, "learning_rate": 7.648230814745805e-07, "loss": 0.1851, "step": 40370 }, { "epoch": 0.8748077048079215, "grad_norm": 1.7320829629898071, "learning_rate": 7.635182015947717e-07, "loss": 0.2447, "step": 40375 }, { "epoch": 0.8749160401274023, "grad_norm": 0.6433514952659607, "learning_rate": 7.622143916230184e-07, "loss": 0.1667, "step": 40380 }, { "epoch": 0.8750243754468832, "grad_norm": 2.0542869567871094, "learning_rate": 7.609116517103454e-07, "loss": 0.3188, "step": 40385 }, { "epoch": 0.875132710766364, "grad_norm": 1.5975831747055054, "learning_rate": 7.596099820076541e-07, "loss": 0.154, "step": 40390 }, { "epoch": 0.8752410460858449, "grad_norm": 1.322337031364441, "learning_rate": 7.583093826657273e-07, "loss": 0.2611, "step": 40395 }, { "epoch": 0.8753493814053258, "grad_norm": 1.494227409362793, "learning_rate": 7.570098538352144e-07, "loss": 0.3278, "step": 40400 }, { "epoch": 0.8754577167248067, "grad_norm": 1.4855256080627441, "learning_rate": 7.557113956666529e-07, "loss": 0.1828, "step": 40405 }, { "epoch": 0.8755660520442875, "grad_norm": 1.7527728080749512, "learning_rate": 7.544140083104456e-07, "loss": 0.2133, "step": 40410 }, { "epoch": 0.8756743873637683, "grad_norm": 1.6128660440444946, "learning_rate": 7.531176919168781e-07, "loss": 0.2784, "step": 40415 }, { "epoch": 0.8757827226832492, "grad_norm": 1.379825472831726, "learning_rate": 7.518224466361079e-07, "loss": 0.1773, "step": 40420 }, { "epoch": 0.87589105800273, "grad_norm": 1.2657591104507446, "learning_rate": 7.505282726181684e-07, "loss": 0.2436, "step": 40425 }, { "epoch": 0.8759993933222109, "grad_norm": 2.1795434951782227, "learning_rate": 7.49235170012973e-07, "loss": 0.2215, "step": 40430 }, { "epoch": 0.8761077286416917, "grad_norm": 1.7489063739776611, "learning_rate": 7.47943138970304e-07, "loss": 0.1758, "step": 40435 }, { "epoch": 0.8762160639611726, "grad_norm": 0.9245489835739136, "learning_rate": 7.466521796398285e-07, "loss": 0.1769, "step": 40440 }, { "epoch": 0.8763243992806535, "grad_norm": 1.4947593212127686, "learning_rate": 7.453622921710801e-07, "loss": 0.1634, "step": 40445 }, { "epoch": 0.8764327346001344, "grad_norm": 1.540312647819519, "learning_rate": 7.440734767134794e-07, "loss": 0.2109, "step": 40450 }, { "epoch": 0.8765410699196152, "grad_norm": 1.4291021823883057, "learning_rate": 7.427857334163113e-07, "loss": 0.2417, "step": 40455 }, { "epoch": 0.8766494052390961, "grad_norm": 1.4027420282363892, "learning_rate": 7.414990624287421e-07, "loss": 0.2321, "step": 40460 }, { "epoch": 0.8767577405585769, "grad_norm": 1.2245248556137085, "learning_rate": 7.40213463899816e-07, "loss": 0.2353, "step": 40465 }, { "epoch": 0.8768660758780578, "grad_norm": 1.609028697013855, "learning_rate": 7.38928937978447e-07, "loss": 0.2466, "step": 40470 }, { "epoch": 0.8769744111975386, "grad_norm": 1.281392216682434, "learning_rate": 7.376454848134307e-07, "loss": 0.1917, "step": 40475 }, { "epoch": 0.8770827465170195, "grad_norm": 1.323258876800537, "learning_rate": 7.363631045534336e-07, "loss": 0.1996, "step": 40480 }, { "epoch": 0.8771910818365003, "grad_norm": 1.5718629360198975, "learning_rate": 7.350817973470026e-07, "loss": 0.3291, "step": 40485 }, { "epoch": 0.8772994171559811, "grad_norm": 1.1316479444503784, "learning_rate": 7.338015633425566e-07, "loss": 0.3092, "step": 40490 }, { "epoch": 0.8774077524754621, "grad_norm": 0.7016321420669556, "learning_rate": 7.325224026883904e-07, "loss": 0.2107, "step": 40495 }, { "epoch": 0.877516087794943, "grad_norm": 1.5503063201904297, "learning_rate": 7.312443155326799e-07, "loss": 0.2728, "step": 40500 }, { "epoch": 0.8776244231144238, "grad_norm": 0.9676215648651123, "learning_rate": 7.299673020234666e-07, "loss": 0.2105, "step": 40505 }, { "epoch": 0.8777327584339046, "grad_norm": 0.9571859836578369, "learning_rate": 7.286913623086788e-07, "loss": 0.3213, "step": 40510 }, { "epoch": 0.8778410937533855, "grad_norm": 0.9711827039718628, "learning_rate": 7.274164965361108e-07, "loss": 0.1867, "step": 40515 }, { "epoch": 0.8779494290728663, "grad_norm": 1.9862492084503174, "learning_rate": 7.261427048534397e-07, "loss": 0.2611, "step": 40520 }, { "epoch": 0.8780577643923472, "grad_norm": 1.0661267042160034, "learning_rate": 7.248699874082121e-07, "loss": 0.2033, "step": 40525 }, { "epoch": 0.878166099711828, "grad_norm": 1.655089020729065, "learning_rate": 7.235983443478578e-07, "loss": 0.3238, "step": 40530 }, { "epoch": 0.8782744350313089, "grad_norm": 1.3714081048965454, "learning_rate": 7.223277758196723e-07, "loss": 0.2709, "step": 40535 }, { "epoch": 0.8783827703507897, "grad_norm": 0.9402740597724915, "learning_rate": 7.210582819708356e-07, "loss": 0.2369, "step": 40540 }, { "epoch": 0.8784911056702707, "grad_norm": 1.2580114603042603, "learning_rate": 7.197898629483968e-07, "loss": 0.2227, "step": 40545 }, { "epoch": 0.8785994409897515, "grad_norm": 1.4697811603546143, "learning_rate": 7.185225188992861e-07, "loss": 0.1226, "step": 40550 }, { "epoch": 0.8787077763092324, "grad_norm": 1.6207865476608276, "learning_rate": 7.17256249970305e-07, "loss": 0.2225, "step": 40555 }, { "epoch": 0.8788161116287132, "grad_norm": 1.3291692733764648, "learning_rate": 7.159910563081318e-07, "loss": 0.2529, "step": 40560 }, { "epoch": 0.878924446948194, "grad_norm": 1.0733225345611572, "learning_rate": 7.147269380593213e-07, "loss": 0.1726, "step": 40565 }, { "epoch": 0.8790327822676749, "grad_norm": 0.8715769052505493, "learning_rate": 7.134638953703e-07, "loss": 0.1458, "step": 40570 }, { "epoch": 0.8791411175871557, "grad_norm": 1.6907330751419067, "learning_rate": 7.122019283873761e-07, "loss": 0.1968, "step": 40575 }, { "epoch": 0.8792494529066366, "grad_norm": 1.3468730449676514, "learning_rate": 7.109410372567249e-07, "loss": 0.2248, "step": 40580 }, { "epoch": 0.8793577882261174, "grad_norm": 0.8044525384902954, "learning_rate": 7.096812221244065e-07, "loss": 0.176, "step": 40585 }, { "epoch": 0.8794661235455984, "grad_norm": 0.7320740818977356, "learning_rate": 7.084224831363485e-07, "loss": 0.2498, "step": 40590 }, { "epoch": 0.8795744588650792, "grad_norm": 1.0110023021697998, "learning_rate": 7.071648204383574e-07, "loss": 0.1769, "step": 40595 }, { "epoch": 0.8796827941845601, "grad_norm": 1.519088625907898, "learning_rate": 7.05908234176117e-07, "loss": 0.2159, "step": 40600 }, { "epoch": 0.8797911295040409, "grad_norm": 1.3403085470199585, "learning_rate": 7.046527244951806e-07, "loss": 0.2433, "step": 40605 }, { "epoch": 0.8798994648235218, "grad_norm": 1.3066442012786865, "learning_rate": 7.033982915409842e-07, "loss": 0.2546, "step": 40610 }, { "epoch": 0.8800078001430026, "grad_norm": 1.2697947025299072, "learning_rate": 7.021449354588295e-07, "loss": 0.2268, "step": 40615 }, { "epoch": 0.8801161354624835, "grad_norm": 1.6609399318695068, "learning_rate": 7.008926563939045e-07, "loss": 0.1887, "step": 40620 }, { "epoch": 0.8802244707819643, "grad_norm": 1.400762915611267, "learning_rate": 6.996414544912655e-07, "loss": 0.1805, "step": 40625 }, { "epoch": 0.8803328061014452, "grad_norm": 1.4899497032165527, "learning_rate": 6.983913298958412e-07, "loss": 0.2485, "step": 40630 }, { "epoch": 0.880441141420926, "grad_norm": 0.8379281759262085, "learning_rate": 6.971422827524466e-07, "loss": 0.1531, "step": 40635 }, { "epoch": 0.880549476740407, "grad_norm": 1.3704196214675903, "learning_rate": 6.958943132057572e-07, "loss": 0.2798, "step": 40640 }, { "epoch": 0.8806578120598878, "grad_norm": 0.7260115742683411, "learning_rate": 6.946474214003407e-07, "loss": 0.1885, "step": 40645 }, { "epoch": 0.8807661473793686, "grad_norm": 2.543769598007202, "learning_rate": 6.934016074806238e-07, "loss": 0.2495, "step": 40650 }, { "epoch": 0.8808744826988495, "grad_norm": 0.8352882862091064, "learning_rate": 6.921568715909188e-07, "loss": 0.1683, "step": 40655 }, { "epoch": 0.8809828180183303, "grad_norm": 1.1364161968231201, "learning_rate": 6.909132138754093e-07, "loss": 0.2141, "step": 40660 }, { "epoch": 0.8810911533378112, "grad_norm": 1.2173243761062622, "learning_rate": 6.896706344781524e-07, "loss": 0.271, "step": 40665 }, { "epoch": 0.881199488657292, "grad_norm": 1.7051304578781128, "learning_rate": 6.884291335430859e-07, "loss": 0.3664, "step": 40670 }, { "epoch": 0.8813078239767729, "grad_norm": 0.8822298645973206, "learning_rate": 6.871887112140152e-07, "loss": 0.2454, "step": 40675 }, { "epoch": 0.8814161592962537, "grad_norm": 1.2713309526443481, "learning_rate": 6.859493676346274e-07, "loss": 0.2037, "step": 40680 }, { "epoch": 0.8815244946157347, "grad_norm": 1.055446982383728, "learning_rate": 6.847111029484777e-07, "loss": 0.2222, "step": 40685 }, { "epoch": 0.8816328299352155, "grad_norm": 1.531897783279419, "learning_rate": 6.834739172990068e-07, "loss": 0.1859, "step": 40690 }, { "epoch": 0.8817411652546964, "grad_norm": 1.3945631980895996, "learning_rate": 6.822378108295213e-07, "loss": 0.2172, "step": 40695 }, { "epoch": 0.8818495005741772, "grad_norm": 1.5589280128479004, "learning_rate": 6.810027836832044e-07, "loss": 0.2114, "step": 40700 }, { "epoch": 0.8819578358936581, "grad_norm": 0.6627679467201233, "learning_rate": 6.797688360031174e-07, "loss": 0.2, "step": 40705 }, { "epoch": 0.8820661712131389, "grad_norm": 2.122852087020874, "learning_rate": 6.785359679321923e-07, "loss": 0.1843, "step": 40710 }, { "epoch": 0.8821745065326198, "grad_norm": 1.976865291595459, "learning_rate": 6.773041796132407e-07, "loss": 0.1372, "step": 40715 }, { "epoch": 0.8822828418521006, "grad_norm": 1.8322572708129883, "learning_rate": 6.76073471188945e-07, "loss": 0.2392, "step": 40720 }, { "epoch": 0.8823911771715814, "grad_norm": 1.9045614004135132, "learning_rate": 6.748438428018667e-07, "loss": 0.2465, "step": 40725 }, { "epoch": 0.8824995124910623, "grad_norm": 1.7418620586395264, "learning_rate": 6.736152945944363e-07, "loss": 0.2439, "step": 40730 }, { "epoch": 0.8826078478105432, "grad_norm": 1.3690598011016846, "learning_rate": 6.723878267089656e-07, "loss": 0.201, "step": 40735 }, { "epoch": 0.8827161831300241, "grad_norm": 1.2425271272659302, "learning_rate": 6.711614392876386e-07, "loss": 0.2075, "step": 40740 }, { "epoch": 0.8828245184495049, "grad_norm": 1.585067868232727, "learning_rate": 6.699361324725117e-07, "loss": 0.177, "step": 40745 }, { "epoch": 0.8829328537689858, "grad_norm": 2.0625836849212646, "learning_rate": 6.687119064055214e-07, "loss": 0.2266, "step": 40750 }, { "epoch": 0.8830411890884666, "grad_norm": 1.2553043365478516, "learning_rate": 6.67488761228472e-07, "loss": 0.2558, "step": 40755 }, { "epoch": 0.8831495244079475, "grad_norm": 0.8816577792167664, "learning_rate": 6.662666970830501e-07, "loss": 0.2282, "step": 40760 }, { "epoch": 0.8832578597274283, "grad_norm": 1.1935734748840332, "learning_rate": 6.650457141108102e-07, "loss": 0.1375, "step": 40765 }, { "epoch": 0.8833661950469092, "grad_norm": 1.978035569190979, "learning_rate": 6.638258124531882e-07, "loss": 0.1633, "step": 40770 }, { "epoch": 0.88347453036639, "grad_norm": 1.0145994424819946, "learning_rate": 6.626069922514888e-07, "loss": 0.1958, "step": 40775 }, { "epoch": 0.8835828656858709, "grad_norm": 1.1151009798049927, "learning_rate": 6.613892536468969e-07, "loss": 0.2025, "step": 40780 }, { "epoch": 0.8836912010053518, "grad_norm": 0.9401816725730896, "learning_rate": 6.601725967804662e-07, "loss": 0.1075, "step": 40785 }, { "epoch": 0.8837995363248327, "grad_norm": 1.3776975870132446, "learning_rate": 6.589570217931285e-07, "loss": 0.2217, "step": 40790 }, { "epoch": 0.8839078716443135, "grad_norm": 1.179668664932251, "learning_rate": 6.577425288256933e-07, "loss": 0.2155, "step": 40795 }, { "epoch": 0.8840162069637943, "grad_norm": 1.18218994140625, "learning_rate": 6.565291180188383e-07, "loss": 0.1457, "step": 40800 }, { "epoch": 0.8841245422832752, "grad_norm": 1.3347225189208984, "learning_rate": 6.553167895131207e-07, "loss": 0.204, "step": 40805 }, { "epoch": 0.884232877602756, "grad_norm": 1.4168204069137573, "learning_rate": 6.541055434489674e-07, "loss": 0.2278, "step": 40810 }, { "epoch": 0.8843412129222369, "grad_norm": 2.2281129360198975, "learning_rate": 6.528953799666882e-07, "loss": 0.1762, "step": 40815 }, { "epoch": 0.8844495482417177, "grad_norm": 1.1797209978103638, "learning_rate": 6.516862992064566e-07, "loss": 0.1796, "step": 40820 }, { "epoch": 0.8845578835611986, "grad_norm": 1.1105526685714722, "learning_rate": 6.504783013083327e-07, "loss": 0.2435, "step": 40825 }, { "epoch": 0.8846662188806795, "grad_norm": 1.0250892639160156, "learning_rate": 6.492713864122391e-07, "loss": 0.2145, "step": 40830 }, { "epoch": 0.8847745542001604, "grad_norm": 2.523822069168091, "learning_rate": 6.480655546579817e-07, "loss": 0.1761, "step": 40835 }, { "epoch": 0.8848828895196412, "grad_norm": 1.3424404859542847, "learning_rate": 6.468608061852388e-07, "loss": 0.2458, "step": 40840 }, { "epoch": 0.8849912248391221, "grad_norm": 1.01679265499115, "learning_rate": 6.456571411335611e-07, "loss": 0.1697, "step": 40845 }, { "epoch": 0.8850995601586029, "grad_norm": 1.470918893814087, "learning_rate": 6.444545596423768e-07, "loss": 0.2387, "step": 40850 }, { "epoch": 0.8852078954780838, "grad_norm": 1.6370829343795776, "learning_rate": 6.432530618509835e-07, "loss": 0.1953, "step": 40855 }, { "epoch": 0.8853162307975646, "grad_norm": 0.9854326844215393, "learning_rate": 6.420526478985612e-07, "loss": 0.2283, "step": 40860 }, { "epoch": 0.8854245661170455, "grad_norm": 1.9143624305725098, "learning_rate": 6.408533179241571e-07, "loss": 0.1746, "step": 40865 }, { "epoch": 0.8855329014365263, "grad_norm": 1.0189435482025146, "learning_rate": 6.39655072066695e-07, "loss": 0.2649, "step": 40870 }, { "epoch": 0.8856412367560071, "grad_norm": 1.5573896169662476, "learning_rate": 6.384579104649758e-07, "loss": 0.2763, "step": 40875 }, { "epoch": 0.8857495720754881, "grad_norm": 2.8760406970977783, "learning_rate": 6.372618332576697e-07, "loss": 0.2612, "step": 40880 }, { "epoch": 0.885857907394969, "grad_norm": 1.227030634880066, "learning_rate": 6.360668405833293e-07, "loss": 0.217, "step": 40885 }, { "epoch": 0.8859662427144498, "grad_norm": 1.0710872411727905, "learning_rate": 6.348729325803715e-07, "loss": 0.2853, "step": 40890 }, { "epoch": 0.8860745780339306, "grad_norm": 1.3666446208953857, "learning_rate": 6.336801093870959e-07, "loss": 0.2813, "step": 40895 }, { "epoch": 0.8861829133534115, "grad_norm": 1.314224362373352, "learning_rate": 6.32488371141673e-07, "loss": 0.2481, "step": 40900 }, { "epoch": 0.8862912486728923, "grad_norm": 2.246385335922241, "learning_rate": 6.312977179821456e-07, "loss": 0.2358, "step": 40905 }, { "epoch": 0.8863995839923732, "grad_norm": 1.893602728843689, "learning_rate": 6.301081500464357e-07, "loss": 0.2933, "step": 40910 }, { "epoch": 0.886507919311854, "grad_norm": 1.266365885734558, "learning_rate": 6.289196674723342e-07, "loss": 0.2612, "step": 40915 }, { "epoch": 0.8866162546313349, "grad_norm": 0.9233821630477905, "learning_rate": 6.277322703975119e-07, "loss": 0.1559, "step": 40920 }, { "epoch": 0.8867245899508157, "grad_norm": 1.36818528175354, "learning_rate": 6.26545958959508e-07, "loss": 0.2279, "step": 40925 }, { "epoch": 0.8868329252702967, "grad_norm": 1.883030652999878, "learning_rate": 6.253607332957401e-07, "loss": 0.1816, "step": 40930 }, { "epoch": 0.8869412605897775, "grad_norm": 2.2603278160095215, "learning_rate": 6.241765935435018e-07, "loss": 0.17, "step": 40935 }, { "epoch": 0.8870495959092584, "grad_norm": 2.3373589515686035, "learning_rate": 6.229935398399523e-07, "loss": 0.1476, "step": 40940 }, { "epoch": 0.8871579312287392, "grad_norm": 1.641098141670227, "learning_rate": 6.218115723221363e-07, "loss": 0.2331, "step": 40945 }, { "epoch": 0.88726626654822, "grad_norm": 1.7336770296096802, "learning_rate": 6.206306911269622e-07, "loss": 0.2483, "step": 40950 }, { "epoch": 0.8873746018677009, "grad_norm": 1.022517204284668, "learning_rate": 6.194508963912216e-07, "loss": 0.2063, "step": 40955 }, { "epoch": 0.8874829371871817, "grad_norm": 1.9694257974624634, "learning_rate": 6.182721882515718e-07, "loss": 0.2038, "step": 40960 }, { "epoch": 0.8875912725066626, "grad_norm": 1.473827600479126, "learning_rate": 6.170945668445527e-07, "loss": 0.2146, "step": 40965 }, { "epoch": 0.8876996078261434, "grad_norm": 0.8645578622817993, "learning_rate": 6.159180323065705e-07, "loss": 0.2466, "step": 40970 }, { "epoch": 0.8878079431456244, "grad_norm": 1.6373093128204346, "learning_rate": 6.147425847739108e-07, "loss": 0.2074, "step": 40975 }, { "epoch": 0.8879162784651052, "grad_norm": 1.3678398132324219, "learning_rate": 6.135682243827334e-07, "loss": 0.2955, "step": 40980 }, { "epoch": 0.8880246137845861, "grad_norm": 1.0278764963150024, "learning_rate": 6.123949512690663e-07, "loss": 0.1658, "step": 40985 }, { "epoch": 0.8881329491040669, "grad_norm": 1.7985795736312866, "learning_rate": 6.112227655688196e-07, "loss": 0.291, "step": 40990 }, { "epoch": 0.8882412844235478, "grad_norm": 1.552938461303711, "learning_rate": 6.100516674177703e-07, "loss": 0.306, "step": 40995 }, { "epoch": 0.8883496197430286, "grad_norm": 1.4119318723678589, "learning_rate": 6.088816569515754e-07, "loss": 0.2298, "step": 41000 }, { "epoch": 0.8884579550625095, "grad_norm": 1.1339811086654663, "learning_rate": 6.077127343057598e-07, "loss": 0.2355, "step": 41005 }, { "epoch": 0.8885662903819903, "grad_norm": 1.3896129131317139, "learning_rate": 6.065448996157286e-07, "loss": 0.2317, "step": 41010 }, { "epoch": 0.8886746257014712, "grad_norm": 1.4930195808410645, "learning_rate": 6.053781530167557e-07, "loss": 0.2128, "step": 41015 }, { "epoch": 0.888782961020952, "grad_norm": 0.9872957468032837, "learning_rate": 6.042124946439943e-07, "loss": 0.2336, "step": 41020 }, { "epoch": 0.888891296340433, "grad_norm": 1.8110295534133911, "learning_rate": 6.03047924632465e-07, "loss": 0.2719, "step": 41025 }, { "epoch": 0.8889996316599138, "grad_norm": 0.776563823223114, "learning_rate": 6.018844431170667e-07, "loss": 0.2057, "step": 41030 }, { "epoch": 0.8891079669793946, "grad_norm": 1.5151996612548828, "learning_rate": 6.007220502325739e-07, "loss": 0.2055, "step": 41035 }, { "epoch": 0.8892163022988755, "grad_norm": 1.2163554430007935, "learning_rate": 5.995607461136288e-07, "loss": 0.2442, "step": 41040 }, { "epoch": 0.8893246376183563, "grad_norm": 1.8016085624694824, "learning_rate": 5.984005308947538e-07, "loss": 0.2156, "step": 41045 }, { "epoch": 0.8894329729378372, "grad_norm": 1.5341358184814453, "learning_rate": 5.972414047103403e-07, "loss": 0.2329, "step": 41050 }, { "epoch": 0.889541308257318, "grad_norm": 1.9002201557159424, "learning_rate": 5.960833676946576e-07, "loss": 0.227, "step": 41055 }, { "epoch": 0.8896496435767989, "grad_norm": 1.675660490989685, "learning_rate": 5.949264199818449e-07, "loss": 0.1732, "step": 41060 }, { "epoch": 0.8897579788962797, "grad_norm": 0.7732641100883484, "learning_rate": 5.937705617059208e-07, "loss": 0.2442, "step": 41065 }, { "epoch": 0.8898663142157606, "grad_norm": 1.1364233493804932, "learning_rate": 5.926157930007714e-07, "loss": 0.246, "step": 41070 }, { "epoch": 0.8899746495352415, "grad_norm": 1.1911184787750244, "learning_rate": 5.914621140001565e-07, "loss": 0.2611, "step": 41075 }, { "epoch": 0.8900829848547224, "grad_norm": 1.4390379190444946, "learning_rate": 5.903095248377189e-07, "loss": 0.1864, "step": 41080 }, { "epoch": 0.8901913201742032, "grad_norm": 0.8773348331451416, "learning_rate": 5.891580256469631e-07, "loss": 0.2346, "step": 41085 }, { "epoch": 0.8902996554936841, "grad_norm": 1.3176907300949097, "learning_rate": 5.880076165612791e-07, "loss": 0.2363, "step": 41090 }, { "epoch": 0.8904079908131649, "grad_norm": 0.8085648417472839, "learning_rate": 5.868582977139181e-07, "loss": 0.1719, "step": 41095 }, { "epoch": 0.8905163261326458, "grad_norm": 1.9297356605529785, "learning_rate": 5.857100692380168e-07, "loss": 0.219, "step": 41100 }, { "epoch": 0.8906246614521266, "grad_norm": 1.6292927265167236, "learning_rate": 5.845629312665768e-07, "loss": 0.2346, "step": 41105 }, { "epoch": 0.8907329967716074, "grad_norm": 1.6390091180801392, "learning_rate": 5.834168839324783e-07, "loss": 0.2461, "step": 41110 }, { "epoch": 0.8908413320910883, "grad_norm": 0.9037926197052002, "learning_rate": 5.822719273684729e-07, "loss": 0.2231, "step": 41115 }, { "epoch": 0.8909496674105692, "grad_norm": 1.3767790794372559, "learning_rate": 5.811280617071858e-07, "loss": 0.2693, "step": 41120 }, { "epoch": 0.8910580027300501, "grad_norm": 1.323116660118103, "learning_rate": 5.799852870811207e-07, "loss": 0.1682, "step": 41125 }, { "epoch": 0.8911663380495309, "grad_norm": 1.3965983390808105, "learning_rate": 5.788436036226464e-07, "loss": 0.285, "step": 41130 }, { "epoch": 0.8912746733690118, "grad_norm": 1.0942164659500122, "learning_rate": 5.777030114640125e-07, "loss": 0.2256, "step": 41135 }, { "epoch": 0.8913830086884926, "grad_norm": 1.5077147483825684, "learning_rate": 5.765635107373402e-07, "loss": 0.1767, "step": 41140 }, { "epoch": 0.8914913440079735, "grad_norm": 1.932706356048584, "learning_rate": 5.754251015746192e-07, "loss": 0.256, "step": 41145 }, { "epoch": 0.8915996793274543, "grad_norm": 1.742520809173584, "learning_rate": 5.742877841077222e-07, "loss": 0.262, "step": 41150 }, { "epoch": 0.8917080146469352, "grad_norm": 1.5986965894699097, "learning_rate": 5.731515584683867e-07, "loss": 0.1867, "step": 41155 }, { "epoch": 0.891816349966416, "grad_norm": 1.6650183200836182, "learning_rate": 5.720164247882309e-07, "loss": 0.2878, "step": 41160 }, { "epoch": 0.8919246852858969, "grad_norm": 1.5652786493301392, "learning_rate": 5.708823831987398e-07, "loss": 0.2316, "step": 41165 }, { "epoch": 0.8920330206053778, "grad_norm": 1.162994146347046, "learning_rate": 5.697494338312759e-07, "loss": 0.2383, "step": 41170 }, { "epoch": 0.8921413559248587, "grad_norm": 1.8330631256103516, "learning_rate": 5.686175768170743e-07, "loss": 0.2627, "step": 41175 }, { "epoch": 0.8922496912443395, "grad_norm": 1.3585765361785889, "learning_rate": 5.674868122872468e-07, "loss": 0.1206, "step": 41180 }, { "epoch": 0.8923580265638204, "grad_norm": 1.489342451095581, "learning_rate": 5.66357140372773e-07, "loss": 0.2089, "step": 41185 }, { "epoch": 0.8924663618833012, "grad_norm": 1.672471523284912, "learning_rate": 5.652285612045061e-07, "loss": 0.2748, "step": 41190 }, { "epoch": 0.892574697202782, "grad_norm": 1.2741247415542603, "learning_rate": 5.641010749131803e-07, "loss": 0.2295, "step": 41195 }, { "epoch": 0.8926830325222629, "grad_norm": 1.4660934209823608, "learning_rate": 5.629746816293924e-07, "loss": 0.2633, "step": 41200 }, { "epoch": 0.8927913678417437, "grad_norm": 1.1098899841308594, "learning_rate": 5.618493814836235e-07, "loss": 0.1948, "step": 41205 }, { "epoch": 0.8928997031612246, "grad_norm": 1.3555421829223633, "learning_rate": 5.607251746062181e-07, "loss": 0.3045, "step": 41210 }, { "epoch": 0.8930080384807055, "grad_norm": 1.2843211889266968, "learning_rate": 5.596020611274011e-07, "loss": 0.254, "step": 41215 }, { "epoch": 0.8931163738001864, "grad_norm": 0.8997394442558289, "learning_rate": 5.584800411772706e-07, "loss": 0.2538, "step": 41220 }, { "epoch": 0.8932247091196672, "grad_norm": 1.0813143253326416, "learning_rate": 5.573591148857904e-07, "loss": 0.1514, "step": 41225 }, { "epoch": 0.8933330444391481, "grad_norm": 0.8938834071159363, "learning_rate": 5.562392823828089e-07, "loss": 0.2314, "step": 41230 }, { "epoch": 0.8934413797586289, "grad_norm": 1.4564836025238037, "learning_rate": 5.551205437980367e-07, "loss": 0.2266, "step": 41235 }, { "epoch": 0.8935497150781098, "grad_norm": 1.7294896841049194, "learning_rate": 5.540028992610668e-07, "loss": 0.2434, "step": 41240 }, { "epoch": 0.8936580503975906, "grad_norm": 1.5492231845855713, "learning_rate": 5.528863489013581e-07, "loss": 0.2866, "step": 41245 }, { "epoch": 0.8937663857170715, "grad_norm": 1.6986271142959595, "learning_rate": 5.517708928482502e-07, "loss": 0.2312, "step": 41250 }, { "epoch": 0.8938747210365523, "grad_norm": 1.1448471546173096, "learning_rate": 5.506565312309487e-07, "loss": 0.3125, "step": 41255 }, { "epoch": 0.8939830563560331, "grad_norm": 1.2409394979476929, "learning_rate": 5.49543264178537e-07, "loss": 0.358, "step": 41260 }, { "epoch": 0.8940913916755141, "grad_norm": 2.1300432682037354, "learning_rate": 5.484310918199698e-07, "loss": 0.3082, "step": 41265 }, { "epoch": 0.894199726994995, "grad_norm": 1.8492056131362915, "learning_rate": 5.473200142840762e-07, "loss": 0.2293, "step": 41270 }, { "epoch": 0.8943080623144758, "grad_norm": 1.1747586727142334, "learning_rate": 5.4621003169956e-07, "loss": 0.1923, "step": 41275 }, { "epoch": 0.8944163976339566, "grad_norm": 1.4803924560546875, "learning_rate": 5.451011441949905e-07, "loss": 0.2896, "step": 41280 }, { "epoch": 0.8945247329534375, "grad_norm": 1.0198413133621216, "learning_rate": 5.439933518988216e-07, "loss": 0.1093, "step": 41285 }, { "epoch": 0.8946330682729183, "grad_norm": 1.4283725023269653, "learning_rate": 5.428866549393708e-07, "loss": 0.1702, "step": 41290 }, { "epoch": 0.8947414035923992, "grad_norm": 1.4168087244033813, "learning_rate": 5.417810534448342e-07, "loss": 0.1797, "step": 41295 }, { "epoch": 0.89484973891188, "grad_norm": 2.0765328407287598, "learning_rate": 5.406765475432773e-07, "loss": 0.1876, "step": 41300 }, { "epoch": 0.8949580742313609, "grad_norm": 1.2768758535385132, "learning_rate": 5.395731373626433e-07, "loss": 0.1889, "step": 41305 }, { "epoch": 0.8950664095508417, "grad_norm": 1.422746181488037, "learning_rate": 5.384708230307445e-07, "loss": 0.2498, "step": 41310 }, { "epoch": 0.8951747448703227, "grad_norm": 1.6519619226455688, "learning_rate": 5.373696046752641e-07, "loss": 0.2066, "step": 41315 }, { "epoch": 0.8952830801898035, "grad_norm": 1.0689629316329956, "learning_rate": 5.362694824237669e-07, "loss": 0.2128, "step": 41320 }, { "epoch": 0.8953914155092844, "grad_norm": 0.922521710395813, "learning_rate": 5.35170456403683e-07, "loss": 0.3581, "step": 41325 }, { "epoch": 0.8954997508287652, "grad_norm": 1.278135061264038, "learning_rate": 5.340725267423197e-07, "loss": 0.2261, "step": 41330 }, { "epoch": 0.895608086148246, "grad_norm": 1.3421798944473267, "learning_rate": 5.329756935668528e-07, "loss": 0.2935, "step": 41335 }, { "epoch": 0.8957164214677269, "grad_norm": 1.5956276655197144, "learning_rate": 5.318799570043376e-07, "loss": 0.2522, "step": 41340 }, { "epoch": 0.8958247567872077, "grad_norm": 1.555668592453003, "learning_rate": 5.307853171816957e-07, "loss": 0.2813, "step": 41345 }, { "epoch": 0.8959330921066886, "grad_norm": 1.1232749223709106, "learning_rate": 5.296917742257268e-07, "loss": 0.1927, "step": 41350 }, { "epoch": 0.8960414274261694, "grad_norm": 1.6090770959854126, "learning_rate": 5.285993282631008e-07, "loss": 0.2043, "step": 41355 }, { "epoch": 0.8961497627456504, "grad_norm": 2.6784451007843018, "learning_rate": 5.275079794203586e-07, "loss": 0.195, "step": 41360 }, { "epoch": 0.8962580980651312, "grad_norm": 1.4480112791061401, "learning_rate": 5.264177278239213e-07, "loss": 0.2166, "step": 41365 }, { "epoch": 0.8963664333846121, "grad_norm": 1.0646846294403076, "learning_rate": 5.253285736000746e-07, "loss": 0.1978, "step": 41370 }, { "epoch": 0.8964747687040929, "grad_norm": 1.4736354351043701, "learning_rate": 5.242405168749842e-07, "loss": 0.2316, "step": 41375 }, { "epoch": 0.8965831040235738, "grad_norm": 1.7723840475082397, "learning_rate": 5.231535577746816e-07, "loss": 0.1554, "step": 41380 }, { "epoch": 0.8966914393430546, "grad_norm": 1.4544942378997803, "learning_rate": 5.22067696425077e-07, "loss": 0.305, "step": 41385 }, { "epoch": 0.8967997746625355, "grad_norm": 0.7375266551971436, "learning_rate": 5.2098293295195e-07, "loss": 0.1784, "step": 41390 }, { "epoch": 0.8969081099820163, "grad_norm": 1.3030050992965698, "learning_rate": 5.198992674809544e-07, "loss": 0.1761, "step": 41395 }, { "epoch": 0.8970164453014972, "grad_norm": 1.6705576181411743, "learning_rate": 5.188167001376177e-07, "loss": 0.312, "step": 41400 }, { "epoch": 0.897124780620978, "grad_norm": 1.9516714811325073, "learning_rate": 5.177352310473361e-07, "loss": 0.3465, "step": 41405 }, { "epoch": 0.897233115940459, "grad_norm": 1.8488730192184448, "learning_rate": 5.166548603353838e-07, "loss": 0.1878, "step": 41410 }, { "epoch": 0.8973414512599398, "grad_norm": 0.7783259153366089, "learning_rate": 5.155755881269064e-07, "loss": 0.1662, "step": 41415 }, { "epoch": 0.8974497865794206, "grad_norm": 1.680212140083313, "learning_rate": 5.144974145469205e-07, "loss": 0.195, "step": 41420 }, { "epoch": 0.8975581218989015, "grad_norm": 1.053942322731018, "learning_rate": 5.134203397203175e-07, "loss": 0.1909, "step": 41425 }, { "epoch": 0.8976664572183823, "grad_norm": 1.625205159187317, "learning_rate": 5.123443637718572e-07, "loss": 0.2758, "step": 41430 }, { "epoch": 0.8977747925378632, "grad_norm": 1.3838399648666382, "learning_rate": 5.112694868261791e-07, "loss": 0.1979, "step": 41435 }, { "epoch": 0.897883127857344, "grad_norm": 1.5384329557418823, "learning_rate": 5.10195709007788e-07, "loss": 0.2409, "step": 41440 }, { "epoch": 0.8979914631768249, "grad_norm": 1.2007322311401367, "learning_rate": 5.091230304410677e-07, "loss": 0.2087, "step": 41445 }, { "epoch": 0.8980997984963057, "grad_norm": 1.8280264139175415, "learning_rate": 5.080514512502699e-07, "loss": 0.1611, "step": 41450 }, { "epoch": 0.8982081338157866, "grad_norm": 0.7888853549957275, "learning_rate": 5.069809715595242e-07, "loss": 0.0754, "step": 41455 }, { "epoch": 0.8983164691352675, "grad_norm": 1.1997069120407104, "learning_rate": 5.059115914928259e-07, "loss": 0.212, "step": 41460 }, { "epoch": 0.8984248044547484, "grad_norm": 1.5999946594238281, "learning_rate": 5.04843311174048e-07, "loss": 0.1461, "step": 41465 }, { "epoch": 0.8985331397742292, "grad_norm": 1.667270302772522, "learning_rate": 5.037761307269362e-07, "loss": 0.2391, "step": 41470 }, { "epoch": 0.8986414750937101, "grad_norm": 2.5769760608673096, "learning_rate": 5.027100502751048e-07, "loss": 0.2063, "step": 41475 }, { "epoch": 0.8987498104131909, "grad_norm": 1.2916029691696167, "learning_rate": 5.016450699420461e-07, "loss": 0.155, "step": 41480 }, { "epoch": 0.8988581457326718, "grad_norm": 2.0119335651397705, "learning_rate": 5.005811898511204e-07, "loss": 0.2692, "step": 41485 }, { "epoch": 0.8989664810521526, "grad_norm": 0.808755099773407, "learning_rate": 4.995184101255624e-07, "loss": 0.1746, "step": 41490 }, { "epoch": 0.8990748163716334, "grad_norm": 1.469576358795166, "learning_rate": 4.984567308884791e-07, "loss": 0.2623, "step": 41495 }, { "epoch": 0.8991831516911143, "grad_norm": 2.2157516479492188, "learning_rate": 4.973961522628512e-07, "loss": 0.2564, "step": 41500 }, { "epoch": 0.8992914870105952, "grad_norm": 1.3720526695251465, "learning_rate": 4.963366743715293e-07, "loss": 0.2367, "step": 41505 }, { "epoch": 0.8993998223300761, "grad_norm": 1.6669175624847412, "learning_rate": 4.952782973372383e-07, "loss": 0.1973, "step": 41510 }, { "epoch": 0.8995081576495569, "grad_norm": 1.1183781623840332, "learning_rate": 4.942210212825771e-07, "loss": 0.2677, "step": 41515 }, { "epoch": 0.8996164929690378, "grad_norm": 1.802924394607544, "learning_rate": 4.931648463300132e-07, "loss": 0.2482, "step": 41520 }, { "epoch": 0.8997248282885186, "grad_norm": 1.114925742149353, "learning_rate": 4.921097726018908e-07, "loss": 0.3666, "step": 41525 }, { "epoch": 0.8998331636079995, "grad_norm": 1.090883731842041, "learning_rate": 4.910558002204213e-07, "loss": 0.2178, "step": 41530 }, { "epoch": 0.8999414989274803, "grad_norm": 1.298587679862976, "learning_rate": 4.900029293076947e-07, "loss": 0.1033, "step": 41535 }, { "epoch": 0.9000498342469612, "grad_norm": 1.4646376371383667, "learning_rate": 4.889511599856677e-07, "loss": 0.1886, "step": 41540 }, { "epoch": 0.900158169566442, "grad_norm": 0.8658797144889832, "learning_rate": 4.879004923761743e-07, "loss": 0.1692, "step": 41545 }, { "epoch": 0.9002665048859229, "grad_norm": 1.7034896612167358, "learning_rate": 4.86850926600917e-07, "loss": 0.2717, "step": 41550 }, { "epoch": 0.9003748402054038, "grad_norm": 1.57170569896698, "learning_rate": 4.858024627814728e-07, "loss": 0.1656, "step": 41555 }, { "epoch": 0.9004831755248847, "grad_norm": 1.8079856634140015, "learning_rate": 4.847551010392915e-07, "loss": 0.2847, "step": 41560 }, { "epoch": 0.9005915108443655, "grad_norm": 1.485975742340088, "learning_rate": 4.837088414956925e-07, "loss": 0.2418, "step": 41565 }, { "epoch": 0.9006998461638464, "grad_norm": 0.9260821342468262, "learning_rate": 4.826636842718712e-07, "loss": 0.2238, "step": 41570 }, { "epoch": 0.9008081814833272, "grad_norm": 1.0478678941726685, "learning_rate": 4.81619629488892e-07, "loss": 0.1795, "step": 41575 }, { "epoch": 0.900916516802808, "grad_norm": 1.8734782934188843, "learning_rate": 4.805766772676934e-07, "loss": 0.1788, "step": 41580 }, { "epoch": 0.9010248521222889, "grad_norm": 2.6570241451263428, "learning_rate": 4.795348277290845e-07, "loss": 0.2308, "step": 41585 }, { "epoch": 0.9011331874417697, "grad_norm": 1.4382473230361938, "learning_rate": 4.784940809937511e-07, "loss": 0.2074, "step": 41590 }, { "epoch": 0.9012415227612506, "grad_norm": 2.1329543590545654, "learning_rate": 4.774544371822465e-07, "loss": 0.2125, "step": 41595 }, { "epoch": 0.9013498580807314, "grad_norm": 1.2700128555297852, "learning_rate": 4.7641589641499566e-07, "loss": 0.2051, "step": 41600 }, { "epoch": 0.9014581934002124, "grad_norm": 1.2755404710769653, "learning_rate": 4.7537845881230113e-07, "loss": 0.1895, "step": 41605 }, { "epoch": 0.9015665287196932, "grad_norm": 1.6188241243362427, "learning_rate": 4.743421244943325e-07, "loss": 0.3182, "step": 41610 }, { "epoch": 0.9016748640391741, "grad_norm": 1.2779910564422607, "learning_rate": 4.733068935811358e-07, "loss": 0.1872, "step": 41615 }, { "epoch": 0.9017831993586549, "grad_norm": 1.6941778659820557, "learning_rate": 4.722727661926241e-07, "loss": 0.2331, "step": 41620 }, { "epoch": 0.9018915346781358, "grad_norm": 1.176984429359436, "learning_rate": 4.7123974244858817e-07, "loss": 0.1134, "step": 41625 }, { "epoch": 0.9019998699976166, "grad_norm": 0.9946633577346802, "learning_rate": 4.702078224686879e-07, "loss": 0.2164, "step": 41630 }, { "epoch": 0.9021082053170975, "grad_norm": 1.8960274457931519, "learning_rate": 4.691770063724532e-07, "loss": 0.2706, "step": 41635 }, { "epoch": 0.9022165406365783, "grad_norm": 1.7097511291503906, "learning_rate": 4.6814729427929196e-07, "loss": 0.2458, "step": 41640 }, { "epoch": 0.9023248759560591, "grad_norm": 1.6573659181594849, "learning_rate": 4.6711868630847867e-07, "loss": 0.235, "step": 41645 }, { "epoch": 0.9024332112755401, "grad_norm": 0.9735870957374573, "learning_rate": 4.6609118257916255e-07, "loss": 0.2195, "step": 41650 }, { "epoch": 0.902541546595021, "grad_norm": 3.183347225189209, "learning_rate": 4.650647832103661e-07, "loss": 0.1736, "step": 41655 }, { "epoch": 0.9026498819145018, "grad_norm": 1.779443621635437, "learning_rate": 4.6403948832098087e-07, "loss": 0.2764, "step": 41660 }, { "epoch": 0.9027582172339826, "grad_norm": 1.1847608089447021, "learning_rate": 4.6301529802977307e-07, "loss": 0.1871, "step": 41665 }, { "epoch": 0.9028665525534635, "grad_norm": 0.869021475315094, "learning_rate": 4.619922124553777e-07, "loss": 0.2161, "step": 41670 }, { "epoch": 0.9029748878729443, "grad_norm": 1.1024909019470215, "learning_rate": 4.609702317163067e-07, "loss": 0.2343, "step": 41675 }, { "epoch": 0.9030832231924252, "grad_norm": 1.7032182216644287, "learning_rate": 4.599493559309387e-07, "loss": 0.2245, "step": 41680 }, { "epoch": 0.903191558511906, "grad_norm": 1.0032376050949097, "learning_rate": 4.5892958521752793e-07, "loss": 0.2313, "step": 41685 }, { "epoch": 0.9032998938313869, "grad_norm": 1.4812121391296387, "learning_rate": 4.579109196941989e-07, "loss": 0.3425, "step": 41690 }, { "epoch": 0.9034082291508677, "grad_norm": 1.3421237468719482, "learning_rate": 4.568933594789504e-07, "loss": 0.2094, "step": 41695 }, { "epoch": 0.9035165644703487, "grad_norm": 1.80011785030365, "learning_rate": 4.558769046896494e-07, "loss": 0.2176, "step": 41700 }, { "epoch": 0.9036248997898295, "grad_norm": 2.691215991973877, "learning_rate": 4.548615554440383e-07, "loss": 0.248, "step": 41705 }, { "epoch": 0.9037332351093104, "grad_norm": 1.6390843391418457, "learning_rate": 4.5384731185973086e-07, "loss": 0.2789, "step": 41710 }, { "epoch": 0.9038415704287912, "grad_norm": 1.2778615951538086, "learning_rate": 4.528341740542097e-07, "loss": 0.1699, "step": 41715 }, { "epoch": 0.903949905748272, "grad_norm": 1.6305729150772095, "learning_rate": 4.5182214214483434e-07, "loss": 0.2587, "step": 41720 }, { "epoch": 0.9040582410677529, "grad_norm": 1.813869833946228, "learning_rate": 4.5081121624882987e-07, "loss": 0.2461, "step": 41725 }, { "epoch": 0.9041665763872337, "grad_norm": 1.569248080253601, "learning_rate": 4.4980139648330143e-07, "loss": 0.1296, "step": 41730 }, { "epoch": 0.9042749117067146, "grad_norm": 1.4608614444732666, "learning_rate": 4.4879268296521784e-07, "loss": 0.2529, "step": 41735 }, { "epoch": 0.9043832470261954, "grad_norm": 1.6217074394226074, "learning_rate": 4.477850758114266e-07, "loss": 0.1947, "step": 41740 }, { "epoch": 0.9044915823456764, "grad_norm": 1.077468752861023, "learning_rate": 4.4677857513863997e-07, "loss": 0.2369, "step": 41745 }, { "epoch": 0.9045999176651572, "grad_norm": 1.8947479724884033, "learning_rate": 4.45773181063448e-07, "loss": 0.2331, "step": 41750 }, { "epoch": 0.9047082529846381, "grad_norm": 1.6262562274932861, "learning_rate": 4.447688937023131e-07, "loss": 0.1909, "step": 41755 }, { "epoch": 0.9048165883041189, "grad_norm": 1.1050126552581787, "learning_rate": 4.4376571317156226e-07, "loss": 0.1985, "step": 41760 }, { "epoch": 0.9049249236235998, "grad_norm": 1.4116042852401733, "learning_rate": 4.427636395874024e-07, "loss": 0.2462, "step": 41765 }, { "epoch": 0.9050332589430806, "grad_norm": 1.277261734008789, "learning_rate": 4.417626730659075e-07, "loss": 0.2739, "step": 41770 }, { "epoch": 0.9051415942625615, "grad_norm": 1.9956490993499756, "learning_rate": 4.4076281372302465e-07, "loss": 0.2856, "step": 41775 }, { "epoch": 0.9052499295820423, "grad_norm": 3.0221359729766846, "learning_rate": 4.397640616745713e-07, "loss": 0.1537, "step": 41780 }, { "epoch": 0.9053582649015232, "grad_norm": 1.1669285297393799, "learning_rate": 4.3876641703624044e-07, "loss": 0.1705, "step": 41785 }, { "epoch": 0.905466600221004, "grad_norm": 1.8901423215866089, "learning_rate": 4.377698799235919e-07, "loss": 0.2348, "step": 41790 }, { "epoch": 0.905574935540485, "grad_norm": 1.5515795946121216, "learning_rate": 4.36774450452061e-07, "loss": 0.2435, "step": 41795 }, { "epoch": 0.9056832708599658, "grad_norm": 0.8118362426757812, "learning_rate": 4.3578012873695344e-07, "loss": 0.1743, "step": 41800 }, { "epoch": 0.9057916061794467, "grad_norm": 1.3405276536941528, "learning_rate": 4.347869148934447e-07, "loss": 0.3439, "step": 41805 }, { "epoch": 0.9058999414989275, "grad_norm": 1.3115006685256958, "learning_rate": 4.337948090365862e-07, "loss": 0.1905, "step": 41810 }, { "epoch": 0.9060082768184083, "grad_norm": 1.1792793273925781, "learning_rate": 4.32803811281296e-07, "loss": 0.226, "step": 41815 }, { "epoch": 0.9061166121378892, "grad_norm": 1.9943856000900269, "learning_rate": 4.3181392174236893e-07, "loss": 0.1872, "step": 41820 }, { "epoch": 0.90622494745737, "grad_norm": 2.0515899658203125, "learning_rate": 4.3082514053446657e-07, "loss": 0.1202, "step": 41825 }, { "epoch": 0.9063332827768509, "grad_norm": 1.5554943084716797, "learning_rate": 4.298374677721273e-07, "loss": 0.2741, "step": 41830 }, { "epoch": 0.9064416180963317, "grad_norm": 1.6687967777252197, "learning_rate": 4.288509035697563e-07, "loss": 0.2421, "step": 41835 }, { "epoch": 0.9065499534158126, "grad_norm": 1.2458575963974, "learning_rate": 4.2786544804163e-07, "loss": 0.1945, "step": 41840 }, { "epoch": 0.9066582887352935, "grad_norm": 2.0928614139556885, "learning_rate": 4.2688110130190143e-07, "loss": 0.1325, "step": 41845 }, { "epoch": 0.9067666240547744, "grad_norm": 1.5959241390228271, "learning_rate": 4.258978634645927e-07, "loss": 0.1989, "step": 41850 }, { "epoch": 0.9068749593742552, "grad_norm": 1.2587441205978394, "learning_rate": 4.2491573464359613e-07, "loss": 0.2545, "step": 41855 }, { "epoch": 0.9069832946937361, "grad_norm": 1.9534071683883667, "learning_rate": 4.2393471495267734e-07, "loss": 0.2387, "step": 41860 }, { "epoch": 0.9070916300132169, "grad_norm": 1.063637137413025, "learning_rate": 4.2295480450547323e-07, "loss": 0.1764, "step": 41865 }, { "epoch": 0.9071999653326978, "grad_norm": 1.316787838935852, "learning_rate": 4.219760034154896e-07, "loss": 0.2615, "step": 41870 }, { "epoch": 0.9073083006521786, "grad_norm": 0.9311412572860718, "learning_rate": 4.2099831179610693e-07, "loss": 0.1973, "step": 41875 }, { "epoch": 0.9074166359716594, "grad_norm": 1.3753465414047241, "learning_rate": 4.20021729760578e-07, "loss": 0.2963, "step": 41880 }, { "epoch": 0.9075249712911403, "grad_norm": 1.2935476303100586, "learning_rate": 4.190462574220222e-07, "loss": 0.2172, "step": 41885 }, { "epoch": 0.9076333066106212, "grad_norm": 1.0340242385864258, "learning_rate": 4.180718948934348e-07, "loss": 0.2418, "step": 41890 }, { "epoch": 0.9077416419301021, "grad_norm": 0.9927297830581665, "learning_rate": 4.17098642287681e-07, "loss": 0.2413, "step": 41895 }, { "epoch": 0.9078499772495829, "grad_norm": 1.475840449333191, "learning_rate": 4.1612649971749854e-07, "loss": 0.2799, "step": 41900 }, { "epoch": 0.9079583125690638, "grad_norm": 1.1744247674942017, "learning_rate": 4.1515546729549495e-07, "loss": 0.2648, "step": 41905 }, { "epoch": 0.9080666478885446, "grad_norm": 2.0619118213653564, "learning_rate": 4.141855451341481e-07, "loss": 0.2476, "step": 41910 }, { "epoch": 0.9081749832080255, "grad_norm": 1.3881845474243164, "learning_rate": 4.1321673334581147e-07, "loss": 0.273, "step": 41915 }, { "epoch": 0.9082833185275063, "grad_norm": 1.2410097122192383, "learning_rate": 4.1224903204270527e-07, "loss": 0.2842, "step": 41920 }, { "epoch": 0.9083916538469872, "grad_norm": 1.586987018585205, "learning_rate": 4.1128244133692544e-07, "loss": 0.261, "step": 41925 }, { "epoch": 0.908499989166468, "grad_norm": 0.7967771887779236, "learning_rate": 4.1031696134043453e-07, "loss": 0.1016, "step": 41930 }, { "epoch": 0.9086083244859489, "grad_norm": 2.2686169147491455, "learning_rate": 4.093525921650721e-07, "loss": 0.2894, "step": 41935 }, { "epoch": 0.9087166598054298, "grad_norm": 0.7754508852958679, "learning_rate": 4.08389333922542e-07, "loss": 0.1995, "step": 41940 }, { "epoch": 0.9088249951249107, "grad_norm": 1.0197627544403076, "learning_rate": 4.0742718672442503e-07, "loss": 0.1465, "step": 41945 }, { "epoch": 0.9089333304443915, "grad_norm": 1.4823200702667236, "learning_rate": 4.064661506821732e-07, "loss": 0.2904, "step": 41950 }, { "epoch": 0.9090416657638724, "grad_norm": 0.6496924757957458, "learning_rate": 4.055062259071063e-07, "loss": 0.2427, "step": 41955 }, { "epoch": 0.9091500010833532, "grad_norm": 1.892848253250122, "learning_rate": 4.0454741251041765e-07, "loss": 0.1823, "step": 41960 }, { "epoch": 0.909258336402834, "grad_norm": 1.353202223777771, "learning_rate": 4.0358971060317056e-07, "loss": 0.1776, "step": 41965 }, { "epoch": 0.9093666717223149, "grad_norm": 1.7464994192123413, "learning_rate": 4.0263312029630297e-07, "loss": 0.2612, "step": 41970 }, { "epoch": 0.9094750070417957, "grad_norm": 0.8673453330993652, "learning_rate": 4.0167764170061854e-07, "loss": 0.2371, "step": 41975 }, { "epoch": 0.9095833423612766, "grad_norm": 1.6014314889907837, "learning_rate": 4.0072327492679753e-07, "loss": 0.1619, "step": 41980 }, { "epoch": 0.9096916776807574, "grad_norm": 1.1486762762069702, "learning_rate": 3.99770020085386e-07, "loss": 0.1585, "step": 41985 }, { "epoch": 0.9098000130002384, "grad_norm": 1.0743374824523926, "learning_rate": 3.988178772868068e-07, "loss": 0.2024, "step": 41990 }, { "epoch": 0.9099083483197192, "grad_norm": 1.3443740606307983, "learning_rate": 3.9786684664135264e-07, "loss": 0.1893, "step": 41995 }, { "epoch": 0.9100166836392001, "grad_norm": 1.5174986124038696, "learning_rate": 3.9691692825918225e-07, "loss": 0.2315, "step": 42000 }, { "epoch": 0.9101250189586809, "grad_norm": 1.1902861595153809, "learning_rate": 3.959681222503331e-07, "loss": 0.2573, "step": 42005 }, { "epoch": 0.9102333542781618, "grad_norm": 1.2511273622512817, "learning_rate": 3.9502042872470727e-07, "loss": 0.264, "step": 42010 }, { "epoch": 0.9103416895976426, "grad_norm": 1.4133408069610596, "learning_rate": 3.9407384779208355e-07, "loss": 0.3383, "step": 42015 }, { "epoch": 0.9104500249171235, "grad_norm": 0.8772237300872803, "learning_rate": 3.931283795621066e-07, "loss": 0.1557, "step": 42020 }, { "epoch": 0.9105583602366043, "grad_norm": 0.9089181423187256, "learning_rate": 3.9218402414429645e-07, "loss": 0.1548, "step": 42025 }, { "epoch": 0.9106666955560851, "grad_norm": 1.5394132137298584, "learning_rate": 3.9124078164804233e-07, "loss": 0.1688, "step": 42030 }, { "epoch": 0.9107750308755661, "grad_norm": 1.5480976104736328, "learning_rate": 3.9029865218260355e-07, "loss": 0.2419, "step": 42035 }, { "epoch": 0.910883366195047, "grad_norm": 1.4045906066894531, "learning_rate": 3.8935763585711384e-07, "loss": 0.2766, "step": 42040 }, { "epoch": 0.9109917015145278, "grad_norm": 1.5484460592269897, "learning_rate": 3.884177327805727e-07, "loss": 0.2419, "step": 42045 }, { "epoch": 0.9111000368340086, "grad_norm": 2.2253146171569824, "learning_rate": 3.874789430618575e-07, "loss": 0.3579, "step": 42050 }, { "epoch": 0.9112083721534895, "grad_norm": 1.596530556678772, "learning_rate": 3.8654126680971014e-07, "loss": 0.1856, "step": 42055 }, { "epoch": 0.9113167074729703, "grad_norm": 1.3646703958511353, "learning_rate": 3.856047041327493e-07, "loss": 0.2589, "step": 42060 }, { "epoch": 0.9114250427924512, "grad_norm": 0.7527316808700562, "learning_rate": 3.8466925513945705e-07, "loss": 0.2172, "step": 42065 }, { "epoch": 0.911533378111932, "grad_norm": 1.3238543272018433, "learning_rate": 3.837349199381968e-07, "loss": 0.2746, "step": 42070 }, { "epoch": 0.9116417134314129, "grad_norm": 0.8103119134902954, "learning_rate": 3.8280169863719295e-07, "loss": 0.2905, "step": 42075 }, { "epoch": 0.9117500487508937, "grad_norm": 1.2147741317749023, "learning_rate": 3.818695913445469e-07, "loss": 0.1828, "step": 42080 }, { "epoch": 0.9118583840703747, "grad_norm": 1.0558335781097412, "learning_rate": 3.8093859816822786e-07, "loss": 0.209, "step": 42085 }, { "epoch": 0.9119667193898555, "grad_norm": 1.2683074474334717, "learning_rate": 3.800087192160795e-07, "loss": 0.2135, "step": 42090 }, { "epoch": 0.9120750547093364, "grad_norm": 0.691246509552002, "learning_rate": 3.790799545958146e-07, "loss": 0.2427, "step": 42095 }, { "epoch": 0.9121833900288172, "grad_norm": 1.6901588439941406, "learning_rate": 3.781523044150137e-07, "loss": 0.1986, "step": 42100 }, { "epoch": 0.912291725348298, "grad_norm": 0.6070297956466675, "learning_rate": 3.7722576878113537e-07, "loss": 0.2309, "step": 42105 }, { "epoch": 0.9124000606677789, "grad_norm": 1.7425838708877563, "learning_rate": 3.763003478015015e-07, "loss": 0.2827, "step": 42110 }, { "epoch": 0.9125083959872597, "grad_norm": 1.876462697982788, "learning_rate": 3.7537604158330855e-07, "loss": 0.2013, "step": 42115 }, { "epoch": 0.9126167313067406, "grad_norm": 1.4612733125686646, "learning_rate": 3.7445285023362644e-07, "loss": 0.2755, "step": 42120 }, { "epoch": 0.9127250666262214, "grad_norm": 1.714198350906372, "learning_rate": 3.735307738593885e-07, "loss": 0.2404, "step": 42125 }, { "epoch": 0.9128334019457023, "grad_norm": 1.9707934856414795, "learning_rate": 3.7260981256740827e-07, "loss": 0.2424, "step": 42130 }, { "epoch": 0.9129417372651832, "grad_norm": 1.5811450481414795, "learning_rate": 3.716899664643592e-07, "loss": 0.262, "step": 42135 }, { "epoch": 0.9130500725846641, "grad_norm": 0.9105997085571289, "learning_rate": 3.707712356567983e-07, "loss": 0.2345, "step": 42140 }, { "epoch": 0.9131584079041449, "grad_norm": 1.229629397392273, "learning_rate": 3.698536202511449e-07, "loss": 0.1479, "step": 42145 }, { "epoch": 0.9132667432236258, "grad_norm": 1.2778103351593018, "learning_rate": 3.6893712035368734e-07, "loss": 0.2375, "step": 42150 }, { "epoch": 0.9133750785431066, "grad_norm": 1.1757937669754028, "learning_rate": 3.6802173607059287e-07, "loss": 0.176, "step": 42155 }, { "epoch": 0.9134834138625875, "grad_norm": 1.465557336807251, "learning_rate": 3.6710746750789226e-07, "loss": 0.218, "step": 42160 }, { "epoch": 0.9135917491820683, "grad_norm": 1.3593683242797852, "learning_rate": 3.661943147714919e-07, "loss": 0.2355, "step": 42165 }, { "epoch": 0.9137000845015492, "grad_norm": 1.64992094039917, "learning_rate": 3.65282277967165e-07, "loss": 0.228, "step": 42170 }, { "epoch": 0.91380841982103, "grad_norm": 1.2892295122146606, "learning_rate": 3.6437135720055805e-07, "loss": 0.236, "step": 42175 }, { "epoch": 0.913916755140511, "grad_norm": 1.6481404304504395, "learning_rate": 3.6346155257718786e-07, "loss": 0.257, "step": 42180 }, { "epoch": 0.9140250904599918, "grad_norm": 1.86586594581604, "learning_rate": 3.625528642024412e-07, "loss": 0.2153, "step": 42185 }, { "epoch": 0.9141334257794727, "grad_norm": 0.9609149694442749, "learning_rate": 3.6164529218157716e-07, "loss": 0.2365, "step": 42190 }, { "epoch": 0.9142417610989535, "grad_norm": 1.4070088863372803, "learning_rate": 3.6073883661972176e-07, "loss": 0.2284, "step": 42195 }, { "epoch": 0.9143500964184343, "grad_norm": 1.5468459129333496, "learning_rate": 3.598334976218776e-07, "loss": 0.1966, "step": 42200 }, { "epoch": 0.9144584317379152, "grad_norm": 1.1250265836715698, "learning_rate": 3.589292752929119e-07, "loss": 0.1594, "step": 42205 }, { "epoch": 0.914566767057396, "grad_norm": 0.9357972145080566, "learning_rate": 3.5802616973756867e-07, "loss": 0.2025, "step": 42210 }, { "epoch": 0.9146751023768769, "grad_norm": 1.4419385194778442, "learning_rate": 3.571241810604542e-07, "loss": 0.2268, "step": 42215 }, { "epoch": 0.9147834376963577, "grad_norm": 1.1132208108901978, "learning_rate": 3.5622330936605496e-07, "loss": 0.2045, "step": 42220 }, { "epoch": 0.9148917730158386, "grad_norm": 1.745644211769104, "learning_rate": 3.553235547587197e-07, "loss": 0.2323, "step": 42225 }, { "epoch": 0.9150001083353195, "grad_norm": 2.1875507831573486, "learning_rate": 3.5442491734267503e-07, "loss": 0.1936, "step": 42230 }, { "epoch": 0.9151084436548004, "grad_norm": 1.6351615190505981, "learning_rate": 3.5352739722201215e-07, "loss": 0.1523, "step": 42235 }, { "epoch": 0.9152167789742812, "grad_norm": 1.0338244438171387, "learning_rate": 3.526309945006967e-07, "loss": 0.2188, "step": 42240 }, { "epoch": 0.9153251142937621, "grad_norm": 1.3553129434585571, "learning_rate": 3.517357092825635e-07, "loss": 0.1402, "step": 42245 }, { "epoch": 0.9154334496132429, "grad_norm": 2.2438554763793945, "learning_rate": 3.5084154167131513e-07, "loss": 0.1418, "step": 42250 }, { "epoch": 0.9155417849327238, "grad_norm": 1.6485724449157715, "learning_rate": 3.49948491770532e-07, "loss": 0.1852, "step": 42255 }, { "epoch": 0.9156501202522046, "grad_norm": 1.7310773134231567, "learning_rate": 3.4905655968365706e-07, "loss": 0.1551, "step": 42260 }, { "epoch": 0.9157584555716854, "grad_norm": 0.9289772510528564, "learning_rate": 3.481657455140097e-07, "loss": 0.1863, "step": 42265 }, { "epoch": 0.9158667908911663, "grad_norm": 2.1289944648742676, "learning_rate": 3.472760493647753e-07, "loss": 0.1704, "step": 42270 }, { "epoch": 0.9159751262106473, "grad_norm": 0.887260377407074, "learning_rate": 3.463874713390125e-07, "loss": 0.2315, "step": 42275 }, { "epoch": 0.9160834615301281, "grad_norm": 1.7432034015655518, "learning_rate": 3.455000115396512e-07, "loss": 0.2486, "step": 42280 }, { "epoch": 0.9161917968496089, "grad_norm": 2.2375035285949707, "learning_rate": 3.44613670069488e-07, "loss": 0.2994, "step": 42285 }, { "epoch": 0.9163001321690898, "grad_norm": 2.22866153717041, "learning_rate": 3.437284470311952e-07, "loss": 0.1643, "step": 42290 }, { "epoch": 0.9164084674885706, "grad_norm": 1.8880095481872559, "learning_rate": 3.428443425273087e-07, "loss": 0.1776, "step": 42295 }, { "epoch": 0.9165168028080515, "grad_norm": 1.7765675783157349, "learning_rate": 3.419613566602431e-07, "loss": 0.2575, "step": 42300 }, { "epoch": 0.9166251381275323, "grad_norm": 1.8435308933258057, "learning_rate": 3.410794895322744e-07, "loss": 0.1376, "step": 42305 }, { "epoch": 0.9167334734470132, "grad_norm": 2.110595226287842, "learning_rate": 3.4019874124555874e-07, "loss": 0.2197, "step": 42310 }, { "epoch": 0.916841808766494, "grad_norm": 1.5181161165237427, "learning_rate": 3.3931911190211444e-07, "loss": 0.2556, "step": 42315 }, { "epoch": 0.9169501440859749, "grad_norm": 1.286435604095459, "learning_rate": 3.384406016038333e-07, "loss": 0.2292, "step": 42320 }, { "epoch": 0.9170584794054558, "grad_norm": 1.029340386390686, "learning_rate": 3.375632104524784e-07, "loss": 0.2936, "step": 42325 }, { "epoch": 0.9171668147249367, "grad_norm": 1.6286768913269043, "learning_rate": 3.366869385496818e-07, "loss": 0.2063, "step": 42330 }, { "epoch": 0.9172751500444175, "grad_norm": 2.2328836917877197, "learning_rate": 3.358117859969479e-07, "loss": 0.2725, "step": 42335 }, { "epoch": 0.9173834853638984, "grad_norm": 1.5630792379379272, "learning_rate": 3.3493775289564767e-07, "loss": 0.1669, "step": 42340 }, { "epoch": 0.9174918206833792, "grad_norm": 0.9577457308769226, "learning_rate": 3.3406483934702807e-07, "loss": 0.1172, "step": 42345 }, { "epoch": 0.91760015600286, "grad_norm": 0.8654007911682129, "learning_rate": 3.331930454522003e-07, "loss": 0.1834, "step": 42350 }, { "epoch": 0.9177084913223409, "grad_norm": 2.539400815963745, "learning_rate": 3.323223713121493e-07, "loss": 0.2782, "step": 42355 }, { "epoch": 0.9178168266418217, "grad_norm": 1.6353174448013306, "learning_rate": 3.3145281702773093e-07, "loss": 0.2928, "step": 42360 }, { "epoch": 0.9179251619613026, "grad_norm": 1.919654130935669, "learning_rate": 3.3058438269966796e-07, "loss": 0.2535, "step": 42365 }, { "epoch": 0.9180334972807834, "grad_norm": 1.2557872533798218, "learning_rate": 3.2971706842855666e-07, "loss": 0.1679, "step": 42370 }, { "epoch": 0.9181418326002644, "grad_norm": 1.2990474700927734, "learning_rate": 3.2885087431485996e-07, "loss": 0.1713, "step": 42375 }, { "epoch": 0.9182501679197452, "grad_norm": 1.2672652006149292, "learning_rate": 3.279858004589176e-07, "loss": 0.2092, "step": 42380 }, { "epoch": 0.9183585032392261, "grad_norm": 1.140884280204773, "learning_rate": 3.2712184696093384e-07, "loss": 0.2256, "step": 42385 }, { "epoch": 0.9184668385587069, "grad_norm": 1.0428657531738281, "learning_rate": 3.2625901392098425e-07, "loss": 0.1797, "step": 42390 }, { "epoch": 0.9185751738781878, "grad_norm": 1.2553937435150146, "learning_rate": 3.253973014390155e-07, "loss": 0.2586, "step": 42395 }, { "epoch": 0.9186835091976686, "grad_norm": 0.9509873390197754, "learning_rate": 3.245367096148433e-07, "loss": 0.2033, "step": 42400 }, { "epoch": 0.9187918445171495, "grad_norm": 1.8632725477218628, "learning_rate": 3.236772385481546e-07, "loss": 0.2011, "step": 42405 }, { "epoch": 0.9189001798366303, "grad_norm": 0.8913033604621887, "learning_rate": 3.228188883385064e-07, "loss": 0.1664, "step": 42410 }, { "epoch": 0.9190085151561111, "grad_norm": 2.206851005554199, "learning_rate": 3.2196165908532696e-07, "loss": 0.2529, "step": 42415 }, { "epoch": 0.9191168504755921, "grad_norm": 0.9139492511749268, "learning_rate": 3.211055508879102e-07, "loss": 0.3087, "step": 42420 }, { "epoch": 0.919225185795073, "grad_norm": 1.581809163093567, "learning_rate": 3.202505638454279e-07, "loss": 0.2861, "step": 42425 }, { "epoch": 0.9193335211145538, "grad_norm": 1.7853901386260986, "learning_rate": 3.193966980569141e-07, "loss": 0.2696, "step": 42430 }, { "epoch": 0.9194418564340346, "grad_norm": 1.2079508304595947, "learning_rate": 3.1854395362127753e-07, "loss": 0.2613, "step": 42435 }, { "epoch": 0.9195501917535155, "grad_norm": 2.148399591445923, "learning_rate": 3.1769233063729585e-07, "loss": 0.2331, "step": 42440 }, { "epoch": 0.9196585270729963, "grad_norm": 1.2297347784042358, "learning_rate": 3.1684182920361575e-07, "loss": 0.2213, "step": 42445 }, { "epoch": 0.9197668623924772, "grad_norm": 1.4945613145828247, "learning_rate": 3.1599244941875716e-07, "loss": 0.2413, "step": 42450 }, { "epoch": 0.919875197711958, "grad_norm": 1.3848042488098145, "learning_rate": 3.151441913811071e-07, "loss": 0.2064, "step": 42455 }, { "epoch": 0.9199835330314389, "grad_norm": 1.8274685144424438, "learning_rate": 3.142970551889224e-07, "loss": 0.1575, "step": 42460 }, { "epoch": 0.9200918683509197, "grad_norm": 1.7930397987365723, "learning_rate": 3.134510409403324e-07, "loss": 0.2379, "step": 42465 }, { "epoch": 0.9202002036704007, "grad_norm": 0.9430245757102966, "learning_rate": 3.126061487333343e-07, "loss": 0.2367, "step": 42470 }, { "epoch": 0.9203085389898815, "grad_norm": 1.012376308441162, "learning_rate": 3.117623786657975e-07, "loss": 0.2504, "step": 42475 }, { "epoch": 0.9204168743093624, "grad_norm": 1.3684122562408447, "learning_rate": 3.1091973083545836e-07, "loss": 0.2883, "step": 42480 }, { "epoch": 0.9205252096288432, "grad_norm": 2.773090362548828, "learning_rate": 3.100782053399276e-07, "loss": 0.1799, "step": 42485 }, { "epoch": 0.9206335449483241, "grad_norm": 1.222044587135315, "learning_rate": 3.0923780227668067e-07, "loss": 0.2018, "step": 42490 }, { "epoch": 0.9207418802678049, "grad_norm": 1.2239012718200684, "learning_rate": 3.083985217430674e-07, "loss": 0.2213, "step": 42495 }, { "epoch": 0.9208502155872857, "grad_norm": 1.2207973003387451, "learning_rate": 3.075603638363045e-07, "loss": 0.1755, "step": 42500 }, { "epoch": 0.9209585509067666, "grad_norm": 1.2335578203201294, "learning_rate": 3.06723328653481e-07, "loss": 0.2176, "step": 42505 }, { "epoch": 0.9210668862262474, "grad_norm": 1.3828595876693726, "learning_rate": 3.058874162915537e-07, "loss": 0.1993, "step": 42510 }, { "epoch": 0.9211752215457283, "grad_norm": 1.7136753797531128, "learning_rate": 3.0505262684735306e-07, "loss": 0.2294, "step": 42515 }, { "epoch": 0.9212835568652092, "grad_norm": 1.5227762460708618, "learning_rate": 3.0421896041757384e-07, "loss": 0.1451, "step": 42520 }, { "epoch": 0.9213918921846901, "grad_norm": 1.8335334062576294, "learning_rate": 3.033864170987855e-07, "loss": 0.244, "step": 42525 }, { "epoch": 0.9215002275041709, "grad_norm": 1.7447211742401123, "learning_rate": 3.025549969874264e-07, "loss": 0.2304, "step": 42530 }, { "epoch": 0.9216085628236518, "grad_norm": 1.6920026540756226, "learning_rate": 3.0172470017980295e-07, "loss": 0.1968, "step": 42535 }, { "epoch": 0.9217168981431326, "grad_norm": 1.9945985078811646, "learning_rate": 3.008955267720925e-07, "loss": 0.2034, "step": 42540 }, { "epoch": 0.9218252334626135, "grad_norm": 1.0720235109329224, "learning_rate": 3.0006747686034176e-07, "loss": 0.1863, "step": 42545 }, { "epoch": 0.9219335687820943, "grad_norm": 1.425982117652893, "learning_rate": 2.992405505404705e-07, "loss": 0.2185, "step": 42550 }, { "epoch": 0.9220419041015752, "grad_norm": 1.6013236045837402, "learning_rate": 2.984147479082633e-07, "loss": 0.2149, "step": 42555 }, { "epoch": 0.922150239421056, "grad_norm": 0.8183702826499939, "learning_rate": 2.9759006905937806e-07, "loss": 0.2335, "step": 42560 }, { "epoch": 0.922258574740537, "grad_norm": 1.7001982927322388, "learning_rate": 2.9676651408934163e-07, "loss": 0.187, "step": 42565 }, { "epoch": 0.9223669100600178, "grad_norm": 1.0259885787963867, "learning_rate": 2.9594408309354894e-07, "loss": 0.236, "step": 42570 }, { "epoch": 0.9224752453794987, "grad_norm": 1.4213460683822632, "learning_rate": 2.951227761672681e-07, "loss": 0.2584, "step": 42575 }, { "epoch": 0.9225835806989795, "grad_norm": 1.2594640254974365, "learning_rate": 2.9430259340563425e-07, "loss": 0.2006, "step": 42580 }, { "epoch": 0.9226919160184603, "grad_norm": 1.6149072647094727, "learning_rate": 2.934835349036547e-07, "loss": 0.2299, "step": 42585 }, { "epoch": 0.9228002513379412, "grad_norm": 1.310909628868103, "learning_rate": 2.9266560075620234e-07, "loss": 0.2478, "step": 42590 }, { "epoch": 0.922908586657422, "grad_norm": 1.3985835313796997, "learning_rate": 2.918487910580259e-07, "loss": 0.1696, "step": 42595 }, { "epoch": 0.9230169219769029, "grad_norm": 1.5221409797668457, "learning_rate": 2.9103310590373965e-07, "loss": 0.1501, "step": 42600 }, { "epoch": 0.9231252572963837, "grad_norm": 1.0218502283096313, "learning_rate": 2.902185453878259e-07, "loss": 0.2866, "step": 42605 }, { "epoch": 0.9232335926158646, "grad_norm": 1.1345784664154053, "learning_rate": 2.894051096046413e-07, "loss": 0.198, "step": 42610 }, { "epoch": 0.9233419279353455, "grad_norm": 0.9763696193695068, "learning_rate": 2.885927986484094e-07, "loss": 0.2216, "step": 42615 }, { "epoch": 0.9234502632548264, "grad_norm": 1.0901380777359009, "learning_rate": 2.87781612613226e-07, "loss": 0.287, "step": 42620 }, { "epoch": 0.9235585985743072, "grad_norm": 1.638511061668396, "learning_rate": 2.8697155159305377e-07, "loss": 0.1713, "step": 42625 }, { "epoch": 0.9236669338937881, "grad_norm": 1.767345666885376, "learning_rate": 2.8616261568172655e-07, "loss": 0.2957, "step": 42630 }, { "epoch": 0.9237752692132689, "grad_norm": 1.3138487339019775, "learning_rate": 2.853548049729471e-07, "loss": 0.2091, "step": 42635 }, { "epoch": 0.9238836045327498, "grad_norm": 2.3607420921325684, "learning_rate": 2.8454811956028614e-07, "loss": 0.2437, "step": 42640 }, { "epoch": 0.9239919398522306, "grad_norm": 1.8073554039001465, "learning_rate": 2.837425595371901e-07, "loss": 0.2348, "step": 42645 }, { "epoch": 0.9241002751717114, "grad_norm": 1.1989437341690063, "learning_rate": 2.829381249969676e-07, "loss": 0.128, "step": 42650 }, { "epoch": 0.9242086104911923, "grad_norm": 0.6960209608078003, "learning_rate": 2.82134816032803e-07, "loss": 0.1513, "step": 42655 }, { "epoch": 0.9243169458106731, "grad_norm": 1.2122867107391357, "learning_rate": 2.813326327377464e-07, "loss": 0.2863, "step": 42660 }, { "epoch": 0.9244252811301541, "grad_norm": 1.0273703336715698, "learning_rate": 2.805315752047166e-07, "loss": 0.1952, "step": 42665 }, { "epoch": 0.9245336164496349, "grad_norm": 1.351528286933899, "learning_rate": 2.797316435265085e-07, "loss": 0.2687, "step": 42670 }, { "epoch": 0.9246419517691158, "grad_norm": 1.2831308841705322, "learning_rate": 2.78932837795779e-07, "loss": 0.1573, "step": 42675 }, { "epoch": 0.9247502870885966, "grad_norm": 1.642819881439209, "learning_rate": 2.7813515810505955e-07, "loss": 0.2495, "step": 42680 }, { "epoch": 0.9248586224080775, "grad_norm": 1.3925262689590454, "learning_rate": 2.773386045467463e-07, "loss": 0.1601, "step": 42685 }, { "epoch": 0.9249669577275583, "grad_norm": 1.2426961660385132, "learning_rate": 2.765431772131122e-07, "loss": 0.1704, "step": 42690 }, { "epoch": 0.9250752930470392, "grad_norm": 0.734944224357605, "learning_rate": 2.7574887619629344e-07, "loss": 0.3117, "step": 42695 }, { "epoch": 0.92518362836652, "grad_norm": 1.6218377351760864, "learning_rate": 2.7495570158829757e-07, "loss": 0.2822, "step": 42700 }, { "epoch": 0.9252919636860009, "grad_norm": 1.794594645500183, "learning_rate": 2.7416365348100214e-07, "loss": 0.2329, "step": 42705 }, { "epoch": 0.9254002990054818, "grad_norm": 1.244174838066101, "learning_rate": 2.73372731966155e-07, "loss": 0.2109, "step": 42710 }, { "epoch": 0.9255086343249627, "grad_norm": 1.6175618171691895, "learning_rate": 2.7258293713537274e-07, "loss": 0.2419, "step": 42715 }, { "epoch": 0.9256169696444435, "grad_norm": 0.9400803446769714, "learning_rate": 2.7179426908013895e-07, "loss": 0.2288, "step": 42720 }, { "epoch": 0.9257253049639244, "grad_norm": 1.7207220792770386, "learning_rate": 2.710067278918116e-07, "loss": 0.3301, "step": 42725 }, { "epoch": 0.9258336402834052, "grad_norm": 1.1191961765289307, "learning_rate": 2.702203136616144e-07, "loss": 0.214, "step": 42730 }, { "epoch": 0.925941975602886, "grad_norm": 1.441218376159668, "learning_rate": 2.6943502648064225e-07, "loss": 0.2368, "step": 42735 }, { "epoch": 0.9260503109223669, "grad_norm": 2.5618772506713867, "learning_rate": 2.6865086643985683e-07, "loss": 0.1951, "step": 42740 }, { "epoch": 0.9261586462418477, "grad_norm": 1.1899865865707397, "learning_rate": 2.6786783363009437e-07, "loss": 0.3074, "step": 42745 }, { "epoch": 0.9262669815613286, "grad_norm": 1.2784661054611206, "learning_rate": 2.670859281420557e-07, "loss": 0.148, "step": 42750 }, { "epoch": 0.9263753168808094, "grad_norm": 1.2244043350219727, "learning_rate": 2.663051500663139e-07, "loss": 0.1921, "step": 42755 }, { "epoch": 0.9264836522002904, "grad_norm": 1.755218505859375, "learning_rate": 2.6552549949330765e-07, "loss": 0.2123, "step": 42760 }, { "epoch": 0.9265919875197712, "grad_norm": 0.6429932713508606, "learning_rate": 2.6474697651335144e-07, "loss": 0.2151, "step": 42765 }, { "epoch": 0.9267003228392521, "grad_norm": 0.9898696541786194, "learning_rate": 2.639695812166232e-07, "loss": 0.2866, "step": 42770 }, { "epoch": 0.9268086581587329, "grad_norm": 1.1180824041366577, "learning_rate": 2.6319331369317414e-07, "loss": 0.2649, "step": 42775 }, { "epoch": 0.9269169934782138, "grad_norm": 1.4714668989181519, "learning_rate": 2.6241817403292235e-07, "loss": 0.183, "step": 42780 }, { "epoch": 0.9270253287976946, "grad_norm": 1.359143853187561, "learning_rate": 2.616441623256549e-07, "loss": 0.2836, "step": 42785 }, { "epoch": 0.9271336641171755, "grad_norm": 0.9589196443557739, "learning_rate": 2.6087127866103236e-07, "loss": 0.171, "step": 42790 }, { "epoch": 0.9272419994366563, "grad_norm": 1.8206183910369873, "learning_rate": 2.6009952312857857e-07, "loss": 0.2795, "step": 42795 }, { "epoch": 0.9273503347561372, "grad_norm": 1.562516689300537, "learning_rate": 2.5932889581769204e-07, "loss": 0.1846, "step": 42800 }, { "epoch": 0.9274586700756181, "grad_norm": 1.216942310333252, "learning_rate": 2.5855939681763697e-07, "loss": 0.2152, "step": 42805 }, { "epoch": 0.927567005395099, "grad_norm": 1.2089755535125732, "learning_rate": 2.577910262175498e-07, "loss": 0.1487, "step": 42810 }, { "epoch": 0.9276753407145798, "grad_norm": 1.556382417678833, "learning_rate": 2.570237841064338e-07, "loss": 0.2243, "step": 42815 }, { "epoch": 0.9277836760340606, "grad_norm": 1.5172905921936035, "learning_rate": 2.562576705731623e-07, "loss": 0.1577, "step": 42820 }, { "epoch": 0.9278920113535415, "grad_norm": 0.8457920551300049, "learning_rate": 2.554926857064788e-07, "loss": 0.3158, "step": 42825 }, { "epoch": 0.9280003466730223, "grad_norm": 1.111484169960022, "learning_rate": 2.547288295949946e-07, "loss": 0.2202, "step": 42830 }, { "epoch": 0.9281086819925032, "grad_norm": 1.0055480003356934, "learning_rate": 2.539661023271922e-07, "loss": 0.1292, "step": 42835 }, { "epoch": 0.928217017311984, "grad_norm": 0.8427673578262329, "learning_rate": 2.5320450399142104e-07, "loss": 0.205, "step": 42840 }, { "epoch": 0.9283253526314649, "grad_norm": 1.1618103981018066, "learning_rate": 2.524440346759005e-07, "loss": 0.2973, "step": 42845 }, { "epoch": 0.9284336879509457, "grad_norm": 0.9449501633644104, "learning_rate": 2.516846944687212e-07, "loss": 0.1742, "step": 42850 }, { "epoch": 0.9285420232704267, "grad_norm": 1.2174702882766724, "learning_rate": 2.5092648345783733e-07, "loss": 0.2081, "step": 42855 }, { "epoch": 0.9286503585899075, "grad_norm": 2.080463171005249, "learning_rate": 2.5016940173108183e-07, "loss": 0.2604, "step": 42860 }, { "epoch": 0.9287586939093884, "grad_norm": 1.4883979558944702, "learning_rate": 2.49413449376148e-07, "loss": 0.2197, "step": 42865 }, { "epoch": 0.9288670292288692, "grad_norm": 1.6233744621276855, "learning_rate": 2.4865862648060347e-07, "loss": 0.2306, "step": 42870 }, { "epoch": 0.9289753645483501, "grad_norm": 1.3719598054885864, "learning_rate": 2.479049331318817e-07, "loss": 0.2341, "step": 42875 }, { "epoch": 0.9290836998678309, "grad_norm": 1.5724847316741943, "learning_rate": 2.4715236941728507e-07, "loss": 0.2225, "step": 42880 }, { "epoch": 0.9291920351873117, "grad_norm": 0.9627926349639893, "learning_rate": 2.464009354239916e-07, "loss": 0.1369, "step": 42885 }, { "epoch": 0.9293003705067926, "grad_norm": 1.7614434957504272, "learning_rate": 2.456506312390383e-07, "loss": 0.2218, "step": 42890 }, { "epoch": 0.9294087058262734, "grad_norm": 1.4194064140319824, "learning_rate": 2.449014569493413e-07, "loss": 0.1681, "step": 42895 }, { "epoch": 0.9295170411457543, "grad_norm": 1.4977461099624634, "learning_rate": 2.441534126416767e-07, "loss": 0.227, "step": 42900 }, { "epoch": 0.9296253764652352, "grad_norm": 1.4891489744186401, "learning_rate": 2.4340649840269845e-07, "loss": 0.1444, "step": 42905 }, { "epoch": 0.9297337117847161, "grad_norm": 0.8288758397102356, "learning_rate": 2.42660714318923e-07, "loss": 0.1561, "step": 42910 }, { "epoch": 0.9298420471041969, "grad_norm": 1.2064439058303833, "learning_rate": 2.41916060476739e-07, "loss": 0.2585, "step": 42915 }, { "epoch": 0.9299503824236778, "grad_norm": 2.0313799381256104, "learning_rate": 2.4117253696240405e-07, "loss": 0.2598, "step": 42920 }, { "epoch": 0.9300587177431586, "grad_norm": 0.7069805264472961, "learning_rate": 2.4043014386204267e-07, "loss": 0.2147, "step": 42925 }, { "epoch": 0.9301670530626395, "grad_norm": 0.40729963779449463, "learning_rate": 2.3968888126165156e-07, "loss": 0.2136, "step": 42930 }, { "epoch": 0.9302753883821203, "grad_norm": 1.5716980695724487, "learning_rate": 2.389487492470932e-07, "loss": 0.2069, "step": 42935 }, { "epoch": 0.9303837237016012, "grad_norm": 0.933940589427948, "learning_rate": 2.3820974790410234e-07, "loss": 0.2691, "step": 42940 }, { "epoch": 0.930492059021082, "grad_norm": 1.7125730514526367, "learning_rate": 2.3747187731827935e-07, "loss": 0.2234, "step": 42945 }, { "epoch": 0.930600394340563, "grad_norm": 1.0266454219818115, "learning_rate": 2.3673513757509702e-07, "loss": 0.1472, "step": 42950 }, { "epoch": 0.9307087296600438, "grad_norm": 0.9218014478683472, "learning_rate": 2.3599952875989706e-07, "loss": 0.1436, "step": 42955 }, { "epoch": 0.9308170649795247, "grad_norm": 0.9140111804008484, "learning_rate": 2.3526505095788466e-07, "loss": 0.187, "step": 42960 }, { "epoch": 0.9309254002990055, "grad_norm": 2.3311960697174072, "learning_rate": 2.3453170425414174e-07, "loss": 0.1667, "step": 42965 }, { "epoch": 0.9310337356184863, "grad_norm": 0.9474839568138123, "learning_rate": 2.3379948873361369e-07, "loss": 0.1805, "step": 42970 }, { "epoch": 0.9311420709379672, "grad_norm": 1.503029227256775, "learning_rate": 2.3306840448111822e-07, "loss": 0.3049, "step": 42975 }, { "epoch": 0.931250406257448, "grad_norm": 1.0164283514022827, "learning_rate": 2.3233845158133872e-07, "loss": 0.1362, "step": 42980 }, { "epoch": 0.9313587415769289, "grad_norm": 1.3447808027267456, "learning_rate": 2.3160963011882975e-07, "loss": 0.2045, "step": 42985 }, { "epoch": 0.9314670768964097, "grad_norm": 1.3190546035766602, "learning_rate": 2.3088194017801492e-07, "loss": 0.179, "step": 42990 }, { "epoch": 0.9315754122158906, "grad_norm": 1.1227234601974487, "learning_rate": 2.3015538184318675e-07, "loss": 0.216, "step": 42995 }, { "epoch": 0.9316837475353715, "grad_norm": 1.165423035621643, "learning_rate": 2.2942995519850353e-07, "loss": 0.3037, "step": 43000 }, { "epoch": 0.9317920828548524, "grad_norm": 1.0470227003097534, "learning_rate": 2.2870566032799802e-07, "loss": 0.236, "step": 43005 }, { "epoch": 0.9319004181743332, "grad_norm": 1.743207573890686, "learning_rate": 2.2798249731556754e-07, "loss": 0.2385, "step": 43010 }, { "epoch": 0.9320087534938141, "grad_norm": 1.4526984691619873, "learning_rate": 2.2726046624497954e-07, "loss": 0.2548, "step": 43015 }, { "epoch": 0.9321170888132949, "grad_norm": 1.291400671005249, "learning_rate": 2.265395671998716e-07, "loss": 0.1983, "step": 43020 }, { "epoch": 0.9322254241327758, "grad_norm": 2.431785821914673, "learning_rate": 2.2581980026374794e-07, "loss": 0.3194, "step": 43025 }, { "epoch": 0.9323337594522566, "grad_norm": 1.1809062957763672, "learning_rate": 2.2510116551998417e-07, "loss": 0.2995, "step": 43030 }, { "epoch": 0.9324420947717375, "grad_norm": 1.4016797542572021, "learning_rate": 2.2438366305182148e-07, "loss": 0.3042, "step": 43035 }, { "epoch": 0.9325504300912183, "grad_norm": 1.2563623189926147, "learning_rate": 2.2366729294237332e-07, "loss": 0.1353, "step": 43040 }, { "epoch": 0.9326587654106991, "grad_norm": 1.404861569404602, "learning_rate": 2.2295205527462006e-07, "loss": 0.2087, "step": 43045 }, { "epoch": 0.9327671007301801, "grad_norm": 1.89167058467865, "learning_rate": 2.222379501314087e-07, "loss": 0.3321, "step": 43050 }, { "epoch": 0.9328754360496609, "grad_norm": 1.2619425058364868, "learning_rate": 2.2152497759546198e-07, "loss": 0.1881, "step": 43055 }, { "epoch": 0.9329837713691418, "grad_norm": 1.8670763969421387, "learning_rate": 2.2081313774936386e-07, "loss": 0.3351, "step": 43060 }, { "epoch": 0.9330921066886226, "grad_norm": 1.8443971872329712, "learning_rate": 2.201024306755728e-07, "loss": 0.2388, "step": 43065 }, { "epoch": 0.9332004420081035, "grad_norm": 0.974068820476532, "learning_rate": 2.1939285645641184e-07, "loss": 0.1665, "step": 43070 }, { "epoch": 0.9333087773275843, "grad_norm": 0.9044931530952454, "learning_rate": 2.1868441517407525e-07, "loss": 0.1859, "step": 43075 }, { "epoch": 0.9334171126470652, "grad_norm": 1.4577629566192627, "learning_rate": 2.1797710691062512e-07, "loss": 0.2795, "step": 43080 }, { "epoch": 0.933525447966546, "grad_norm": 1.694532036781311, "learning_rate": 2.1727093174799153e-07, "loss": 0.2185, "step": 43085 }, { "epoch": 0.9336337832860269, "grad_norm": 1.5811458826065063, "learning_rate": 2.1656588976797677e-07, "loss": 0.2506, "step": 43090 }, { "epoch": 0.9337421186055078, "grad_norm": 1.8937901258468628, "learning_rate": 2.1586198105224554e-07, "loss": 0.2518, "step": 43095 }, { "epoch": 0.9338504539249887, "grad_norm": 2.871244430541992, "learning_rate": 2.1515920568233928e-07, "loss": 0.3711, "step": 43100 }, { "epoch": 0.9339587892444695, "grad_norm": 1.3019438982009888, "learning_rate": 2.1445756373966065e-07, "loss": 0.2211, "step": 43105 }, { "epoch": 0.9340671245639504, "grad_norm": 1.2082436084747314, "learning_rate": 2.1375705530548795e-07, "loss": 0.2229, "step": 43110 }, { "epoch": 0.9341754598834312, "grad_norm": 1.4239413738250732, "learning_rate": 2.1305768046096187e-07, "loss": 0.2063, "step": 43115 }, { "epoch": 0.934283795202912, "grad_norm": 1.5332155227661133, "learning_rate": 2.1235943928709312e-07, "loss": 0.1962, "step": 43120 }, { "epoch": 0.9343921305223929, "grad_norm": 1.3884339332580566, "learning_rate": 2.1166233186476703e-07, "loss": 0.2206, "step": 43125 }, { "epoch": 0.9345004658418737, "grad_norm": 0.9046311974525452, "learning_rate": 2.10966358274729e-07, "loss": 0.2021, "step": 43130 }, { "epoch": 0.9346088011613546, "grad_norm": 0.9835720062255859, "learning_rate": 2.1027151859759897e-07, "loss": 0.1291, "step": 43135 }, { "epoch": 0.9347171364808354, "grad_norm": 2.0679073333740234, "learning_rate": 2.095778129138637e-07, "loss": 0.1655, "step": 43140 }, { "epoch": 0.9348254718003164, "grad_norm": 0.8215717673301697, "learning_rate": 2.0888524130387776e-07, "loss": 0.1639, "step": 43145 }, { "epoch": 0.9349338071197972, "grad_norm": 1.905959129333496, "learning_rate": 2.0819380384786592e-07, "loss": 0.1355, "step": 43150 }, { "epoch": 0.9350421424392781, "grad_norm": 1.421755313873291, "learning_rate": 2.0750350062592073e-07, "loss": 0.183, "step": 43155 }, { "epoch": 0.9351504777587589, "grad_norm": 2.1427853107452393, "learning_rate": 2.068143317180038e-07, "loss": 0.1688, "step": 43160 }, { "epoch": 0.9352588130782398, "grad_norm": 2.0504963397979736, "learning_rate": 2.061262972039435e-07, "loss": 0.2053, "step": 43165 }, { "epoch": 0.9353671483977206, "grad_norm": 1.0151989459991455, "learning_rate": 2.0543939716344163e-07, "loss": 0.1811, "step": 43170 }, { "epoch": 0.9354754837172015, "grad_norm": 1.1294260025024414, "learning_rate": 2.0475363167606122e-07, "loss": 0.1576, "step": 43175 }, { "epoch": 0.9355838190366823, "grad_norm": 1.1852060556411743, "learning_rate": 2.0406900082123981e-07, "loss": 0.2578, "step": 43180 }, { "epoch": 0.9356921543561632, "grad_norm": 1.224095344543457, "learning_rate": 2.0338550467828177e-07, "loss": 0.166, "step": 43185 }, { "epoch": 0.935800489675644, "grad_norm": 2.5149409770965576, "learning_rate": 2.0270314332636042e-07, "loss": 0.2365, "step": 43190 }, { "epoch": 0.935908824995125, "grad_norm": 1.9254628419876099, "learning_rate": 2.0202191684451588e-07, "loss": 0.1674, "step": 43195 }, { "epoch": 0.9360171603146058, "grad_norm": 1.9869742393493652, "learning_rate": 2.0134182531165837e-07, "loss": 0.2237, "step": 43200 }, { "epoch": 0.9361254956340866, "grad_norm": 1.3486459255218506, "learning_rate": 2.0066286880656706e-07, "loss": 0.2483, "step": 43205 }, { "epoch": 0.9362338309535675, "grad_norm": 1.4441479444503784, "learning_rate": 1.9998504740788793e-07, "loss": 0.1521, "step": 43210 }, { "epoch": 0.9363421662730483, "grad_norm": 2.291044235229492, "learning_rate": 1.9930836119413822e-07, "loss": 0.1676, "step": 43215 }, { "epoch": 0.9364505015925292, "grad_norm": 0.5637838244438171, "learning_rate": 1.9863281024369853e-07, "loss": 0.116, "step": 43220 }, { "epoch": 0.93655883691201, "grad_norm": 0.8893889784812927, "learning_rate": 1.9795839463482513e-07, "loss": 0.1671, "step": 43225 }, { "epoch": 0.9366671722314909, "grad_norm": 1.0694924592971802, "learning_rate": 1.9728511444563557e-07, "loss": 0.2659, "step": 43230 }, { "epoch": 0.9367755075509717, "grad_norm": 2.245044469833374, "learning_rate": 1.966129697541219e-07, "loss": 0.233, "step": 43235 }, { "epoch": 0.9368838428704527, "grad_norm": 0.9038106799125671, "learning_rate": 1.9594196063813965e-07, "loss": 0.3019, "step": 43240 }, { "epoch": 0.9369921781899335, "grad_norm": 0.7453679442405701, "learning_rate": 1.9527208717541658e-07, "loss": 0.1685, "step": 43245 }, { "epoch": 0.9371005135094144, "grad_norm": 1.1967369318008423, "learning_rate": 1.9460334944354841e-07, "loss": 0.2256, "step": 43250 }, { "epoch": 0.9372088488288952, "grad_norm": 1.3859459161758423, "learning_rate": 1.9393574751999766e-07, "loss": 0.307, "step": 43255 }, { "epoch": 0.9373171841483761, "grad_norm": 1.3363935947418213, "learning_rate": 1.9326928148209467e-07, "loss": 0.1917, "step": 43260 }, { "epoch": 0.9374255194678569, "grad_norm": 1.3340418338775635, "learning_rate": 1.9260395140704102e-07, "loss": 0.2264, "step": 43265 }, { "epoch": 0.9375338547873378, "grad_norm": 1.087362289428711, "learning_rate": 1.9193975737190506e-07, "loss": 0.1773, "step": 43270 }, { "epoch": 0.9376421901068186, "grad_norm": 1.4644594192504883, "learning_rate": 1.9127669945362303e-07, "loss": 0.2155, "step": 43275 }, { "epoch": 0.9377505254262994, "grad_norm": 1.5119655132293701, "learning_rate": 1.906147777290013e-07, "loss": 0.2406, "step": 43280 }, { "epoch": 0.9378588607457803, "grad_norm": 1.1292603015899658, "learning_rate": 1.899539922747129e-07, "loss": 0.3006, "step": 43285 }, { "epoch": 0.9379671960652612, "grad_norm": 1.7492448091506958, "learning_rate": 1.8929434316729888e-07, "loss": 0.2305, "step": 43290 }, { "epoch": 0.9380755313847421, "grad_norm": 1.2861026525497437, "learning_rate": 1.8863583048317257e-07, "loss": 0.1616, "step": 43295 }, { "epoch": 0.9381838667042229, "grad_norm": 1.5927159786224365, "learning_rate": 1.879784542986096e-07, "loss": 0.1403, "step": 43300 }, { "epoch": 0.9382922020237038, "grad_norm": 1.522679328918457, "learning_rate": 1.873222146897602e-07, "loss": 0.2836, "step": 43305 }, { "epoch": 0.9384005373431846, "grad_norm": 1.3637303113937378, "learning_rate": 1.8666711173263685e-07, "loss": 0.2409, "step": 43310 }, { "epoch": 0.9385088726626655, "grad_norm": 0.9050498008728027, "learning_rate": 1.8601314550312554e-07, "loss": 0.25, "step": 43315 }, { "epoch": 0.9386172079821463, "grad_norm": 1.4701639413833618, "learning_rate": 1.853603160769779e-07, "loss": 0.2009, "step": 43320 }, { "epoch": 0.9387255433016272, "grad_norm": 1.1522315740585327, "learning_rate": 1.8470862352981344e-07, "loss": 0.1545, "step": 43325 }, { "epoch": 0.938833878621108, "grad_norm": 2.126805305480957, "learning_rate": 1.84058067937124e-07, "loss": 0.115, "step": 43330 }, { "epoch": 0.938942213940589, "grad_norm": 1.5685454607009888, "learning_rate": 1.834086493742615e-07, "loss": 0.1141, "step": 43335 }, { "epoch": 0.9390505492600698, "grad_norm": 1.0162471532821655, "learning_rate": 1.8276036791645692e-07, "loss": 0.1947, "step": 43340 }, { "epoch": 0.9391588845795507, "grad_norm": 1.299074411392212, "learning_rate": 1.8211322363880014e-07, "loss": 0.2056, "step": 43345 }, { "epoch": 0.9392672198990315, "grad_norm": 0.9756198525428772, "learning_rate": 1.8146721661625454e-07, "loss": 0.1618, "step": 43350 }, { "epoch": 0.9393755552185123, "grad_norm": 0.9582821130752563, "learning_rate": 1.808223469236514e-07, "loss": 0.2634, "step": 43355 }, { "epoch": 0.9394838905379932, "grad_norm": 1.2717440128326416, "learning_rate": 1.8017861463568763e-07, "loss": 0.2055, "step": 43360 }, { "epoch": 0.939592225857474, "grad_norm": 1.668710470199585, "learning_rate": 1.7953601982693025e-07, "loss": 0.193, "step": 43365 }, { "epoch": 0.9397005611769549, "grad_norm": 1.3088490962982178, "learning_rate": 1.7889456257181414e-07, "loss": 0.2328, "step": 43370 }, { "epoch": 0.9398088964964357, "grad_norm": 1.1824873685836792, "learning_rate": 1.7825424294464323e-07, "loss": 0.247, "step": 43375 }, { "epoch": 0.9399172318159166, "grad_norm": 1.8531826734542847, "learning_rate": 1.7761506101958814e-07, "loss": 0.2463, "step": 43380 }, { "epoch": 0.9400255671353975, "grad_norm": 0.9299270510673523, "learning_rate": 1.7697701687068858e-07, "loss": 0.2925, "step": 43385 }, { "epoch": 0.9401339024548784, "grad_norm": 1.3009339570999146, "learning_rate": 1.7634011057185318e-07, "loss": 0.2316, "step": 43390 }, { "epoch": 0.9402422377743592, "grad_norm": 1.443316102027893, "learning_rate": 1.757043421968585e-07, "loss": 0.3074, "step": 43395 }, { "epoch": 0.9403505730938401, "grad_norm": 1.5081400871276855, "learning_rate": 1.7506971181934672e-07, "loss": 0.2211, "step": 43400 }, { "epoch": 0.9404589084133209, "grad_norm": 1.436295509338379, "learning_rate": 1.744362195128313e-07, "loss": 0.2249, "step": 43405 }, { "epoch": 0.9405672437328018, "grad_norm": 2.3613483905792236, "learning_rate": 1.738038653506946e-07, "loss": 0.1794, "step": 43410 }, { "epoch": 0.9406755790522826, "grad_norm": 1.4955466985702515, "learning_rate": 1.731726494061814e-07, "loss": 0.1311, "step": 43415 }, { "epoch": 0.9407839143717635, "grad_norm": 0.9612016677856445, "learning_rate": 1.725425717524132e-07, "loss": 0.1808, "step": 43420 }, { "epoch": 0.9408922496912443, "grad_norm": 1.6410400867462158, "learning_rate": 1.7191363246237046e-07, "loss": 0.2633, "step": 43425 }, { "epoch": 0.9410005850107251, "grad_norm": 1.0264503955841064, "learning_rate": 1.7128583160891056e-07, "loss": 0.2258, "step": 43430 }, { "epoch": 0.9411089203302061, "grad_norm": 0.8795071244239807, "learning_rate": 1.7065916926475079e-07, "loss": 0.2528, "step": 43435 }, { "epoch": 0.941217255649687, "grad_norm": 1.8533774614334106, "learning_rate": 1.7003364550248425e-07, "loss": 0.1432, "step": 43440 }, { "epoch": 0.9413255909691678, "grad_norm": 1.7255669832229614, "learning_rate": 1.694092603945674e-07, "loss": 0.2507, "step": 43445 }, { "epoch": 0.9414339262886486, "grad_norm": 1.3279950618743896, "learning_rate": 1.687860140133246e-07, "loss": 0.1785, "step": 43450 }, { "epoch": 0.9415422616081295, "grad_norm": 0.7440613508224487, "learning_rate": 1.6816390643095038e-07, "loss": 0.1842, "step": 43455 }, { "epoch": 0.9416505969276103, "grad_norm": 1.3056639432907104, "learning_rate": 1.6754293771950702e-07, "loss": 0.1179, "step": 43460 }, { "epoch": 0.9417589322470912, "grad_norm": 1.8122047185897827, "learning_rate": 1.669231079509248e-07, "loss": 0.2349, "step": 43465 }, { "epoch": 0.941867267566572, "grad_norm": 1.2728550434112549, "learning_rate": 1.6630441719699962e-07, "loss": 0.2651, "step": 43470 }, { "epoch": 0.9419756028860529, "grad_norm": 1.2974051237106323, "learning_rate": 1.6568686552940084e-07, "loss": 0.1608, "step": 43475 }, { "epoch": 0.9420839382055338, "grad_norm": 1.1970324516296387, "learning_rate": 1.6507045301965896e-07, "loss": 0.2701, "step": 43480 }, { "epoch": 0.9421922735250147, "grad_norm": 1.0027813911437988, "learning_rate": 1.64455179739178e-07, "loss": 0.2633, "step": 43485 }, { "epoch": 0.9423006088444955, "grad_norm": 1.1912050247192383, "learning_rate": 1.6384104575922877e-07, "loss": 0.2156, "step": 43490 }, { "epoch": 0.9424089441639764, "grad_norm": 1.4797005653381348, "learning_rate": 1.6322805115094763e-07, "loss": 0.2528, "step": 43495 }, { "epoch": 0.9425172794834572, "grad_norm": 0.7710201740264893, "learning_rate": 1.626161959853434e-07, "loss": 0.1847, "step": 43500 }, { "epoch": 0.942625614802938, "grad_norm": 1.5920491218566895, "learning_rate": 1.6200548033328822e-07, "loss": 0.2295, "step": 43505 }, { "epoch": 0.9427339501224189, "grad_norm": 1.2413092851638794, "learning_rate": 1.6139590426552553e-07, "loss": 0.2307, "step": 43510 }, { "epoch": 0.9428422854418997, "grad_norm": 0.949762225151062, "learning_rate": 1.6078746785266442e-07, "loss": 0.1741, "step": 43515 }, { "epoch": 0.9429506207613806, "grad_norm": 1.0331003665924072, "learning_rate": 1.6018017116518403e-07, "loss": 0.1495, "step": 43520 }, { "epoch": 0.9430589560808614, "grad_norm": 1.4255179166793823, "learning_rate": 1.5957401427342922e-07, "loss": 0.2371, "step": 43525 }, { "epoch": 0.9431672914003424, "grad_norm": 1.8628569841384888, "learning_rate": 1.5896899724761604e-07, "loss": 0.3267, "step": 43530 }, { "epoch": 0.9432756267198232, "grad_norm": 1.9400761127471924, "learning_rate": 1.5836512015782623e-07, "loss": 0.1887, "step": 43535 }, { "epoch": 0.9433839620393041, "grad_norm": 0.9969860911369324, "learning_rate": 1.5776238307400936e-07, "loss": 0.1651, "step": 43540 }, { "epoch": 0.9434922973587849, "grad_norm": 1.5423336029052734, "learning_rate": 1.5716078606598406e-07, "loss": 0.2117, "step": 43545 }, { "epoch": 0.9436006326782658, "grad_norm": 1.0310335159301758, "learning_rate": 1.5656032920343566e-07, "loss": 0.2302, "step": 43550 }, { "epoch": 0.9437089679977466, "grad_norm": 1.8345271348953247, "learning_rate": 1.5596101255591857e-07, "loss": 0.3438, "step": 43555 }, { "epoch": 0.9438173033172275, "grad_norm": 1.921586036682129, "learning_rate": 1.5536283619285386e-07, "loss": 0.1662, "step": 43560 }, { "epoch": 0.9439256386367083, "grad_norm": 2.0264170169830322, "learning_rate": 1.547658001835328e-07, "loss": 0.2561, "step": 43565 }, { "epoch": 0.9440339739561892, "grad_norm": 1.431107759475708, "learning_rate": 1.5416990459711234e-07, "loss": 0.1791, "step": 43570 }, { "epoch": 0.94414230927567, "grad_norm": 1.3067346811294556, "learning_rate": 1.535751495026172e-07, "loss": 0.2951, "step": 43575 }, { "epoch": 0.944250644595151, "grad_norm": 1.183868169784546, "learning_rate": 1.529815349689412e-07, "loss": 0.3139, "step": 43580 }, { "epoch": 0.9443589799146318, "grad_norm": 1.527626633644104, "learning_rate": 1.5238906106484597e-07, "loss": 0.2553, "step": 43585 }, { "epoch": 0.9444673152341126, "grad_norm": 1.2068687677383423, "learning_rate": 1.517977278589622e-07, "loss": 0.1825, "step": 43590 }, { "epoch": 0.9445756505535935, "grad_norm": 1.6781026124954224, "learning_rate": 1.5120753541978394e-07, "loss": 0.2885, "step": 43595 }, { "epoch": 0.9446839858730743, "grad_norm": 1.3263602256774902, "learning_rate": 1.506184838156799e-07, "loss": 0.2289, "step": 43600 }, { "epoch": 0.9447923211925552, "grad_norm": 1.6156420707702637, "learning_rate": 1.500305731148799e-07, "loss": 0.2887, "step": 43605 }, { "epoch": 0.944900656512036, "grad_norm": 1.419899582862854, "learning_rate": 1.4944380338548504e-07, "loss": 0.2385, "step": 43610 }, { "epoch": 0.9450089918315169, "grad_norm": 1.471508502960205, "learning_rate": 1.4885817469546425e-07, "loss": 0.252, "step": 43615 }, { "epoch": 0.9451173271509977, "grad_norm": 1.169525384902954, "learning_rate": 1.4827368711265334e-07, "loss": 0.3696, "step": 43620 }, { "epoch": 0.9452256624704787, "grad_norm": 1.8155995607376099, "learning_rate": 1.4769034070475697e-07, "loss": 0.2256, "step": 43625 }, { "epoch": 0.9453339977899595, "grad_norm": 1.6592230796813965, "learning_rate": 1.4710813553934776e-07, "loss": 0.2539, "step": 43630 }, { "epoch": 0.9454423331094404, "grad_norm": 0.7641301155090332, "learning_rate": 1.4652707168386403e-07, "loss": 0.3034, "step": 43635 }, { "epoch": 0.9455506684289212, "grad_norm": 1.6458749771118164, "learning_rate": 1.4594714920561525e-07, "loss": 0.212, "step": 43640 }, { "epoch": 0.9456590037484021, "grad_norm": 1.5466960668563843, "learning_rate": 1.4536836817177436e-07, "loss": 0.2223, "step": 43645 }, { "epoch": 0.9457673390678829, "grad_norm": 1.782531499862671, "learning_rate": 1.4479072864938658e-07, "loss": 0.2369, "step": 43650 }, { "epoch": 0.9458756743873638, "grad_norm": 3.089904308319092, "learning_rate": 1.4421423070536066e-07, "loss": 0.1949, "step": 43655 }, { "epoch": 0.9459840097068446, "grad_norm": 1.2572062015533447, "learning_rate": 1.436388744064776e-07, "loss": 0.2501, "step": 43660 }, { "epoch": 0.9460923450263254, "grad_norm": 1.4963665008544922, "learning_rate": 1.430646598193819e-07, "loss": 0.1868, "step": 43665 }, { "epoch": 0.9462006803458063, "grad_norm": 1.5791614055633545, "learning_rate": 1.424915870105892e-07, "loss": 0.257, "step": 43670 }, { "epoch": 0.9463090156652872, "grad_norm": 1.5145002603530884, "learning_rate": 1.4191965604648084e-07, "loss": 0.1981, "step": 43675 }, { "epoch": 0.9464173509847681, "grad_norm": 1.7379138469696045, "learning_rate": 1.4134886699330497e-07, "loss": 0.2242, "step": 43680 }, { "epoch": 0.9465256863042489, "grad_norm": 1.1495603322982788, "learning_rate": 1.407792199171809e-07, "loss": 0.1991, "step": 43685 }, { "epoch": 0.9466340216237298, "grad_norm": 0.8897133469581604, "learning_rate": 1.4021071488409366e-07, "loss": 0.2509, "step": 43690 }, { "epoch": 0.9467423569432106, "grad_norm": 1.3052935600280762, "learning_rate": 1.3964335195989498e-07, "loss": 0.2202, "step": 43695 }, { "epoch": 0.9468506922626915, "grad_norm": 0.9725906848907471, "learning_rate": 1.3907713121030565e-07, "loss": 0.3164, "step": 43700 }, { "epoch": 0.9469590275821723, "grad_norm": 1.8171573877334595, "learning_rate": 1.3851205270091428e-07, "loss": 0.202, "step": 43705 }, { "epoch": 0.9470673629016532, "grad_norm": 1.4837524890899658, "learning_rate": 1.3794811649717632e-07, "loss": 0.2487, "step": 43710 }, { "epoch": 0.947175698221134, "grad_norm": 1.9328430891036987, "learning_rate": 1.3738532266441618e-07, "loss": 0.2643, "step": 43715 }, { "epoch": 0.9472840335406149, "grad_norm": 2.008901357650757, "learning_rate": 1.368236712678239e-07, "loss": 0.1577, "step": 43720 }, { "epoch": 0.9473923688600958, "grad_norm": 1.4336516857147217, "learning_rate": 1.362631623724586e-07, "loss": 0.2567, "step": 43725 }, { "epoch": 0.9475007041795767, "grad_norm": 0.5466527342796326, "learning_rate": 1.3570379604324835e-07, "loss": 0.2463, "step": 43730 }, { "epoch": 0.9476090394990575, "grad_norm": 1.3396843671798706, "learning_rate": 1.351455723449846e-07, "loss": 0.1812, "step": 43735 }, { "epoch": 0.9477173748185383, "grad_norm": 1.8070790767669678, "learning_rate": 1.345884913423323e-07, "loss": 0.2061, "step": 43740 }, { "epoch": 0.9478257101380192, "grad_norm": 1.2183676958084106, "learning_rate": 1.3403255309981877e-07, "loss": 0.1627, "step": 43745 }, { "epoch": 0.9479340454575, "grad_norm": 0.990070641040802, "learning_rate": 1.3347775768184134e-07, "loss": 0.3019, "step": 43750 }, { "epoch": 0.9480423807769809, "grad_norm": 1.277632474899292, "learning_rate": 1.329241051526653e-07, "loss": 0.2822, "step": 43755 }, { "epoch": 0.9481507160964617, "grad_norm": 1.679421305656433, "learning_rate": 1.3237159557642376e-07, "loss": 0.2514, "step": 43760 }, { "epoch": 0.9482590514159426, "grad_norm": 1.7499853372573853, "learning_rate": 1.3182022901711444e-07, "loss": 0.2023, "step": 43765 }, { "epoch": 0.9483673867354235, "grad_norm": 1.0740853548049927, "learning_rate": 1.3127000553860737e-07, "loss": 0.2194, "step": 43770 }, { "epoch": 0.9484757220549044, "grad_norm": 0.9370332360267639, "learning_rate": 1.307209252046371e-07, "loss": 0.1599, "step": 43775 }, { "epoch": 0.9485840573743852, "grad_norm": 1.199711561203003, "learning_rate": 1.3017298807880386e-07, "loss": 0.168, "step": 43780 }, { "epoch": 0.9486923926938661, "grad_norm": 1.2311570644378662, "learning_rate": 1.2962619422458132e-07, "loss": 0.2068, "step": 43785 }, { "epoch": 0.9488007280133469, "grad_norm": 0.7418756484985352, "learning_rate": 1.2908054370530554e-07, "loss": 0.2288, "step": 43790 }, { "epoch": 0.9489090633328278, "grad_norm": 1.2062692642211914, "learning_rate": 1.2853603658418367e-07, "loss": 0.2387, "step": 43795 }, { "epoch": 0.9490173986523086, "grad_norm": 1.5776419639587402, "learning_rate": 1.279926729242864e-07, "loss": 0.2314, "step": 43800 }, { "epoch": 0.9491257339717895, "grad_norm": 1.2795473337173462, "learning_rate": 1.274504527885556e-07, "loss": 0.2015, "step": 43805 }, { "epoch": 0.9492340692912703, "grad_norm": 1.1466325521469116, "learning_rate": 1.2690937623979882e-07, "loss": 0.2315, "step": 43810 }, { "epoch": 0.9493424046107511, "grad_norm": 1.6459852457046509, "learning_rate": 1.2636944334069145e-07, "loss": 0.1733, "step": 43815 }, { "epoch": 0.9494507399302321, "grad_norm": 1.3767673969268799, "learning_rate": 1.2583065415377682e-07, "loss": 0.2049, "step": 43820 }, { "epoch": 0.949559075249713, "grad_norm": 1.749889612197876, "learning_rate": 1.2529300874146722e-07, "loss": 0.1849, "step": 43825 }, { "epoch": 0.9496674105691938, "grad_norm": 1.481012225151062, "learning_rate": 1.2475650716603838e-07, "loss": 0.2476, "step": 43830 }, { "epoch": 0.9497757458886746, "grad_norm": 0.9687017798423767, "learning_rate": 1.2422114948963726e-07, "loss": 0.2517, "step": 43835 }, { "epoch": 0.9498840812081555, "grad_norm": 1.3332194089889526, "learning_rate": 1.236869357742776e-07, "loss": 0.3488, "step": 43840 }, { "epoch": 0.9499924165276363, "grad_norm": 1.5999966859817505, "learning_rate": 1.2315386608183877e-07, "loss": 0.2568, "step": 43845 }, { "epoch": 0.9501007518471172, "grad_norm": 1.67322838306427, "learning_rate": 1.2262194047406917e-07, "loss": 0.2037, "step": 43850 }, { "epoch": 0.950209087166598, "grad_norm": 1.6396971940994263, "learning_rate": 1.2209115901258506e-07, "loss": 0.2884, "step": 43855 }, { "epoch": 0.9503174224860789, "grad_norm": 1.699341893196106, "learning_rate": 1.215615217588695e-07, "loss": 0.1723, "step": 43860 }, { "epoch": 0.9504257578055598, "grad_norm": 1.2577810287475586, "learning_rate": 1.2103302877427114e-07, "loss": 0.184, "step": 43865 }, { "epoch": 0.9505340931250407, "grad_norm": 1.2909990549087524, "learning_rate": 1.2050568012000997e-07, "loss": 0.2215, "step": 43870 }, { "epoch": 0.9506424284445215, "grad_norm": 1.900970458984375, "learning_rate": 1.199794758571715e-07, "loss": 0.2569, "step": 43875 }, { "epoch": 0.9507507637640024, "grad_norm": 0.7921380400657654, "learning_rate": 1.1945441604670816e-07, "loss": 0.1919, "step": 43880 }, { "epoch": 0.9508590990834832, "grad_norm": 1.6764166355133057, "learning_rate": 1.1893050074944012e-07, "loss": 0.1887, "step": 43885 }, { "epoch": 0.950967434402964, "grad_norm": 1.3154277801513672, "learning_rate": 1.1840773002605555e-07, "loss": 0.1643, "step": 43890 }, { "epoch": 0.9510757697224449, "grad_norm": 1.1859244108200073, "learning_rate": 1.178861039371082e-07, "loss": 0.2197, "step": 43895 }, { "epoch": 0.9511841050419257, "grad_norm": 1.4740465879440308, "learning_rate": 1.1736562254302197e-07, "loss": 0.229, "step": 43900 }, { "epoch": 0.9512924403614066, "grad_norm": 1.0084396600723267, "learning_rate": 1.1684628590408641e-07, "loss": 0.2312, "step": 43905 }, { "epoch": 0.9514007756808874, "grad_norm": 2.378504753112793, "learning_rate": 1.1632809408046009e-07, "loss": 0.2458, "step": 43910 }, { "epoch": 0.9515091110003684, "grad_norm": 1.56035315990448, "learning_rate": 1.1581104713216496e-07, "loss": 0.2094, "step": 43915 }, { "epoch": 0.9516174463198492, "grad_norm": 1.4270448684692383, "learning_rate": 1.1529514511909534e-07, "loss": 0.2591, "step": 43920 }, { "epoch": 0.9517257816393301, "grad_norm": 1.331950306892395, "learning_rate": 1.1478038810101122e-07, "loss": 0.1996, "step": 43925 }, { "epoch": 0.9518341169588109, "grad_norm": 1.5167571306228638, "learning_rate": 1.1426677613753712e-07, "loss": 0.1799, "step": 43930 }, { "epoch": 0.9519424522782918, "grad_norm": 1.6616148948669434, "learning_rate": 1.1375430928816988e-07, "loss": 0.3048, "step": 43935 }, { "epoch": 0.9520507875977726, "grad_norm": 1.0714808702468872, "learning_rate": 1.1324298761226982e-07, "loss": 0.255, "step": 43940 }, { "epoch": 0.9521591229172535, "grad_norm": 1.1682839393615723, "learning_rate": 1.127328111690662e-07, "loss": 0.164, "step": 43945 }, { "epoch": 0.9522674582367343, "grad_norm": 1.4617927074432373, "learning_rate": 1.1222378001765399e-07, "loss": 0.2385, "step": 43950 }, { "epoch": 0.9523757935562152, "grad_norm": 1.7158235311508179, "learning_rate": 1.1171589421699935e-07, "loss": 0.2272, "step": 43955 }, { "epoch": 0.952484128875696, "grad_norm": 1.7274937629699707, "learning_rate": 1.1120915382593079e-07, "loss": 0.167, "step": 43960 }, { "epoch": 0.952592464195177, "grad_norm": 1.6847820281982422, "learning_rate": 1.1070355890314799e-07, "loss": 0.2315, "step": 43965 }, { "epoch": 0.9527007995146578, "grad_norm": 0.6705902814865112, "learning_rate": 1.1019910950721746e-07, "loss": 0.1987, "step": 43970 }, { "epoch": 0.9528091348341386, "grad_norm": 0.8543872237205505, "learning_rate": 1.0969580569656912e-07, "loss": 0.1808, "step": 43975 }, { "epoch": 0.9529174701536195, "grad_norm": 1.4425640106201172, "learning_rate": 1.0919364752950634e-07, "loss": 0.2061, "step": 43980 }, { "epoch": 0.9530258054731003, "grad_norm": 0.8584553599357605, "learning_rate": 1.086926350641948e-07, "loss": 0.2186, "step": 43985 }, { "epoch": 0.9531341407925812, "grad_norm": 0.9567883014678955, "learning_rate": 1.081927683586692e-07, "loss": 0.2272, "step": 43990 }, { "epoch": 0.953242476112062, "grad_norm": 1.2370504140853882, "learning_rate": 1.0769404747083211e-07, "loss": 0.2656, "step": 43995 }, { "epoch": 0.9533508114315429, "grad_norm": 2.1442949771881104, "learning_rate": 1.07196472458454e-07, "loss": 0.2189, "step": 44000 }, { "epoch": 0.9534591467510237, "grad_norm": 0.8894227147102356, "learning_rate": 1.0670004337916873e-07, "loss": 0.2053, "step": 44005 }, { "epoch": 0.9535674820705047, "grad_norm": 1.6117026805877686, "learning_rate": 1.0620476029048254e-07, "loss": 0.1269, "step": 44010 }, { "epoch": 0.9536758173899855, "grad_norm": 0.49922215938568115, "learning_rate": 1.0571062324976621e-07, "loss": 0.1501, "step": 44015 }, { "epoch": 0.9537841527094664, "grad_norm": 1.6574448347091675, "learning_rate": 1.052176323142573e-07, "loss": 0.1746, "step": 44020 }, { "epoch": 0.9538924880289472, "grad_norm": 1.2546160221099854, "learning_rate": 1.0472578754106234e-07, "loss": 0.2566, "step": 44025 }, { "epoch": 0.9540008233484281, "grad_norm": 1.7465580701828003, "learning_rate": 1.0423508898715351e-07, "loss": 0.2394, "step": 44030 }, { "epoch": 0.9541091586679089, "grad_norm": 1.427195429801941, "learning_rate": 1.0374553670937093e-07, "loss": 0.2052, "step": 44035 }, { "epoch": 0.9542174939873898, "grad_norm": 2.2571070194244385, "learning_rate": 1.0325713076442257e-07, "loss": 0.1999, "step": 44040 }, { "epoch": 0.9543258293068706, "grad_norm": 1.6872373819351196, "learning_rate": 1.0276987120888204e-07, "loss": 0.2179, "step": 44045 }, { "epoch": 0.9544341646263514, "grad_norm": 1.64475679397583, "learning_rate": 1.022837580991909e-07, "loss": 0.2515, "step": 44050 }, { "epoch": 0.9545424999458323, "grad_norm": 0.5999545454978943, "learning_rate": 1.017987914916596e-07, "loss": 0.2337, "step": 44055 }, { "epoch": 0.9546508352653132, "grad_norm": 1.3255175352096558, "learning_rate": 1.0131497144246216e-07, "loss": 0.1825, "step": 44060 }, { "epoch": 0.9547591705847941, "grad_norm": 1.2371753454208374, "learning_rate": 1.008322980076426e-07, "loss": 0.2091, "step": 44065 }, { "epoch": 0.9548675059042749, "grad_norm": 1.2458827495574951, "learning_rate": 1.0035077124311288e-07, "loss": 0.1906, "step": 44070 }, { "epoch": 0.9549758412237558, "grad_norm": 1.4469037055969238, "learning_rate": 9.987039120464947e-08, "loss": 0.2788, "step": 44075 }, { "epoch": 0.9550841765432366, "grad_norm": 1.063774824142456, "learning_rate": 9.939115794789789e-08, "loss": 0.2975, "step": 44080 }, { "epoch": 0.9551925118627175, "grad_norm": 1.9380407333374023, "learning_rate": 9.891307152836927e-08, "loss": 0.1916, "step": 44085 }, { "epoch": 0.9553008471821983, "grad_norm": 0.838022768497467, "learning_rate": 9.843613200144153e-08, "loss": 0.1744, "step": 44090 }, { "epoch": 0.9554091825016792, "grad_norm": 1.4531152248382568, "learning_rate": 9.796033942236493e-08, "loss": 0.2778, "step": 44095 }, { "epoch": 0.95551751782116, "grad_norm": 1.9408549070358276, "learning_rate": 9.748569384624873e-08, "loss": 0.2213, "step": 44100 }, { "epoch": 0.9556258531406409, "grad_norm": 1.8480321168899536, "learning_rate": 9.70121953280756e-08, "loss": 0.2121, "step": 44105 }, { "epoch": 0.9557341884601218, "grad_norm": 2.2787628173828125, "learning_rate": 9.653984392269167e-08, "loss": 0.1866, "step": 44110 }, { "epoch": 0.9558425237796027, "grad_norm": 1.0299218893051147, "learning_rate": 9.606863968481539e-08, "loss": 0.1357, "step": 44115 }, { "epoch": 0.9559508590990835, "grad_norm": 0.854932963848114, "learning_rate": 9.559858266902533e-08, "loss": 0.2759, "step": 44120 }, { "epoch": 0.9560591944185644, "grad_norm": 1.7108787298202515, "learning_rate": 9.512967292977126e-08, "loss": 0.2117, "step": 44125 }, { "epoch": 0.9561675297380452, "grad_norm": 1.1731802225112915, "learning_rate": 9.466191052136975e-08, "loss": 0.2169, "step": 44130 }, { "epoch": 0.956275865057526, "grad_norm": 1.336819052696228, "learning_rate": 9.41952954980041e-08, "loss": 0.2932, "step": 44135 }, { "epoch": 0.9563842003770069, "grad_norm": 1.111628532409668, "learning_rate": 9.372982791372443e-08, "loss": 0.2326, "step": 44140 }, { "epoch": 0.9564925356964877, "grad_norm": 1.0210994482040405, "learning_rate": 9.326550782244759e-08, "loss": 0.2145, "step": 44145 }, { "epoch": 0.9566008710159686, "grad_norm": 1.019813895225525, "learning_rate": 9.280233527795946e-08, "loss": 0.2602, "step": 44150 }, { "epoch": 0.9567092063354495, "grad_norm": 1.88810396194458, "learning_rate": 9.234031033391155e-08, "loss": 0.2519, "step": 44155 }, { "epoch": 0.9568175416549304, "grad_norm": 1.1030352115631104, "learning_rate": 9.187943304382107e-08, "loss": 0.2137, "step": 44160 }, { "epoch": 0.9569258769744112, "grad_norm": 1.6018991470336914, "learning_rate": 9.141970346107532e-08, "loss": 0.1608, "step": 44165 }, { "epoch": 0.9570342122938921, "grad_norm": 0.8403728008270264, "learning_rate": 9.0961121638925e-08, "loss": 0.1435, "step": 44170 }, { "epoch": 0.9571425476133729, "grad_norm": 1.2846242189407349, "learning_rate": 9.050368763049323e-08, "loss": 0.2467, "step": 44175 }, { "epoch": 0.9572508829328538, "grad_norm": 1.1459753513336182, "learning_rate": 9.004740148876311e-08, "loss": 0.2526, "step": 44180 }, { "epoch": 0.9573592182523346, "grad_norm": 1.2847458124160767, "learning_rate": 8.959226326659242e-08, "loss": 0.2368, "step": 44185 }, { "epoch": 0.9574675535718155, "grad_norm": 2.1028640270233154, "learning_rate": 8.913827301669897e-08, "loss": 0.2289, "step": 44190 }, { "epoch": 0.9575758888912963, "grad_norm": 1.1113756895065308, "learning_rate": 8.86854307916718e-08, "loss": 0.1857, "step": 44195 }, { "epoch": 0.9576842242107771, "grad_norm": 1.3627303838729858, "learning_rate": 8.823373664396673e-08, "loss": 0.215, "step": 44200 }, { "epoch": 0.9577925595302581, "grad_norm": 1.566563606262207, "learning_rate": 8.778319062590413e-08, "loss": 0.2304, "step": 44205 }, { "epoch": 0.957900894849739, "grad_norm": 2.0119128227233887, "learning_rate": 8.733379278967446e-08, "loss": 0.2393, "step": 44210 }, { "epoch": 0.9580092301692198, "grad_norm": 1.5118359327316284, "learning_rate": 8.688554318733278e-08, "loss": 0.2198, "step": 44215 }, { "epoch": 0.9581175654887006, "grad_norm": 1.1151797771453857, "learning_rate": 8.643844187080308e-08, "loss": 0.2472, "step": 44220 }, { "epoch": 0.9582259008081815, "grad_norm": 1.1049869060516357, "learning_rate": 8.599248889187395e-08, "loss": 0.2242, "step": 44225 }, { "epoch": 0.9583342361276623, "grad_norm": 1.7645286321640015, "learning_rate": 8.554768430220406e-08, "loss": 0.24, "step": 44230 }, { "epoch": 0.9584425714471432, "grad_norm": 2.1099660396575928, "learning_rate": 8.510402815331553e-08, "loss": 0.2509, "step": 44235 }, { "epoch": 0.958550906766624, "grad_norm": 1.2264962196350098, "learning_rate": 8.46615204966017e-08, "loss": 0.1474, "step": 44240 }, { "epoch": 0.9586592420861049, "grad_norm": 1.0172464847564697, "learning_rate": 8.422016138331712e-08, "loss": 0.1876, "step": 44245 }, { "epoch": 0.9587675774055857, "grad_norm": 2.2704715728759766, "learning_rate": 8.377995086458979e-08, "loss": 0.2143, "step": 44250 }, { "epoch": 0.9588759127250667, "grad_norm": 1.1049327850341797, "learning_rate": 8.334088899141001e-08, "loss": 0.2527, "step": 44255 }, { "epoch": 0.9589842480445475, "grad_norm": 1.178877830505371, "learning_rate": 8.290297581463603e-08, "loss": 0.1641, "step": 44260 }, { "epoch": 0.9590925833640284, "grad_norm": 1.2847130298614502, "learning_rate": 8.246621138499499e-08, "loss": 0.2651, "step": 44265 }, { "epoch": 0.9592009186835092, "grad_norm": 1.5471224784851074, "learning_rate": 8.203059575307759e-08, "loss": 0.2407, "step": 44270 }, { "epoch": 0.95930925400299, "grad_norm": 1.2456380128860474, "learning_rate": 8.159612896934566e-08, "loss": 0.1973, "step": 44275 }, { "epoch": 0.9594175893224709, "grad_norm": 0.8531249165534973, "learning_rate": 8.116281108412338e-08, "loss": 0.2373, "step": 44280 }, { "epoch": 0.9595259246419517, "grad_norm": 1.7143923044204712, "learning_rate": 8.073064214760618e-08, "loss": 0.1956, "step": 44285 }, { "epoch": 0.9596342599614326, "grad_norm": 0.9521524906158447, "learning_rate": 8.029962220985399e-08, "loss": 0.19, "step": 44290 }, { "epoch": 0.9597425952809134, "grad_norm": 1.547978401184082, "learning_rate": 7.986975132079244e-08, "loss": 0.1549, "step": 44295 }, { "epoch": 0.9598509306003944, "grad_norm": 1.2669563293457031, "learning_rate": 7.944102953021616e-08, "loss": 0.2752, "step": 44300 }, { "epoch": 0.9599592659198752, "grad_norm": 0.6695440411567688, "learning_rate": 7.901345688778761e-08, "loss": 0.193, "step": 44305 }, { "epoch": 0.9600676012393561, "grad_norm": 1.0224336385726929, "learning_rate": 7.858703344303386e-08, "loss": 0.2551, "step": 44310 }, { "epoch": 0.9601759365588369, "grad_norm": 1.5927270650863647, "learning_rate": 7.816175924534874e-08, "loss": 0.1859, "step": 44315 }, { "epoch": 0.9602842718783178, "grad_norm": 1.4583420753479004, "learning_rate": 7.773763434399506e-08, "loss": 0.2832, "step": 44320 }, { "epoch": 0.9603926071977986, "grad_norm": 1.4817650318145752, "learning_rate": 7.73146587881013e-08, "loss": 0.2222, "step": 44325 }, { "epoch": 0.9605009425172795, "grad_norm": 1.8223280906677246, "learning_rate": 7.689283262666159e-08, "loss": 0.1775, "step": 44330 }, { "epoch": 0.9606092778367603, "grad_norm": 1.7643147706985474, "learning_rate": 7.647215590854018e-08, "loss": 0.2987, "step": 44335 }, { "epoch": 0.9607176131562412, "grad_norm": 2.5190207958221436, "learning_rate": 7.605262868246477e-08, "loss": 0.3086, "step": 44340 }, { "epoch": 0.960825948475722, "grad_norm": 1.3041682243347168, "learning_rate": 7.563425099703092e-08, "loss": 0.159, "step": 44345 }, { "epoch": 0.960934283795203, "grad_norm": 2.61574649810791, "learning_rate": 7.52170229007021e-08, "loss": 0.2708, "step": 44350 }, { "epoch": 0.9610426191146838, "grad_norm": 1.2516627311706543, "learning_rate": 7.480094444180853e-08, "loss": 0.2939, "step": 44355 }, { "epoch": 0.9611509544341647, "grad_norm": 1.627946138381958, "learning_rate": 7.438601566854609e-08, "loss": 0.2745, "step": 44360 }, { "epoch": 0.9612592897536455, "grad_norm": 1.333847999572754, "learning_rate": 7.397223662897856e-08, "loss": 0.1956, "step": 44365 }, { "epoch": 0.9613676250731263, "grad_norm": 1.2049221992492676, "learning_rate": 7.35596073710343e-08, "loss": 0.1454, "step": 44370 }, { "epoch": 0.9614759603926072, "grad_norm": 1.6246076822280884, "learning_rate": 7.31481279425128e-08, "loss": 0.2196, "step": 44375 }, { "epoch": 0.961584295712088, "grad_norm": 1.4769045114517212, "learning_rate": 7.273779839107598e-08, "loss": 0.2767, "step": 44380 }, { "epoch": 0.9616926310315689, "grad_norm": 0.7266498804092407, "learning_rate": 7.232861876425357e-08, "loss": 0.1636, "step": 44385 }, { "epoch": 0.9618009663510497, "grad_norm": 1.3076221942901611, "learning_rate": 7.192058910944544e-08, "loss": 0.143, "step": 44390 }, { "epoch": 0.9619093016705307, "grad_norm": 1.876136302947998, "learning_rate": 7.151370947391379e-08, "loss": 0.1764, "step": 44395 }, { "epoch": 0.9620176369900115, "grad_norm": 1.805768370628357, "learning_rate": 7.11079799047909e-08, "loss": 0.2229, "step": 44400 }, { "epoch": 0.9621259723094924, "grad_norm": 1.121151328086853, "learning_rate": 7.070340044907364e-08, "loss": 0.1962, "step": 44405 }, { "epoch": 0.9622343076289732, "grad_norm": 1.80580472946167, "learning_rate": 7.029997115362564e-08, "loss": 0.2025, "step": 44410 }, { "epoch": 0.9623426429484541, "grad_norm": 1.6232651472091675, "learning_rate": 6.98976920651806e-08, "loss": 0.2026, "step": 44415 }, { "epoch": 0.9624509782679349, "grad_norm": 1.089759111404419, "learning_rate": 6.949656323033349e-08, "loss": 0.224, "step": 44420 }, { "epoch": 0.9625593135874158, "grad_norm": 1.5403679609298706, "learning_rate": 6.909658469555159e-08, "loss": 0.2254, "step": 44425 }, { "epoch": 0.9626676489068966, "grad_norm": 1.211065649986267, "learning_rate": 6.869775650716448e-08, "loss": 0.226, "step": 44430 }, { "epoch": 0.9627759842263774, "grad_norm": 0.8904886245727539, "learning_rate": 6.830007871137301e-08, "loss": 0.1607, "step": 44435 }, { "epoch": 0.9628843195458583, "grad_norm": 1.0672160387039185, "learning_rate": 6.790355135423921e-08, "loss": 0.1536, "step": 44440 }, { "epoch": 0.9629926548653392, "grad_norm": 1.740075945854187, "learning_rate": 6.750817448169633e-08, "loss": 0.329, "step": 44445 }, { "epoch": 0.9631009901848201, "grad_norm": 1.8303483724594116, "learning_rate": 6.71139481395433e-08, "loss": 0.2163, "step": 44450 }, { "epoch": 0.9632093255043009, "grad_norm": 1.2183977365493774, "learning_rate": 6.67208723734436e-08, "loss": 0.1985, "step": 44455 }, { "epoch": 0.9633176608237818, "grad_norm": 1.4519813060760498, "learning_rate": 6.63289472289319e-08, "loss": 0.2405, "step": 44460 }, { "epoch": 0.9634259961432626, "grad_norm": 1.2153302431106567, "learning_rate": 6.593817275140413e-08, "loss": 0.171, "step": 44465 }, { "epoch": 0.9635343314627435, "grad_norm": 0.9654567241668701, "learning_rate": 6.554854898612739e-08, "loss": 0.1985, "step": 44470 }, { "epoch": 0.9636426667822243, "grad_norm": 1.5359199047088623, "learning_rate": 6.516007597823338e-08, "loss": 0.1863, "step": 44475 }, { "epoch": 0.9637510021017052, "grad_norm": 1.4966379404067993, "learning_rate": 6.477275377272052e-08, "loss": 0.2746, "step": 44480 }, { "epoch": 0.963859337421186, "grad_norm": 0.8903793096542358, "learning_rate": 6.438658241445405e-08, "loss": 0.2299, "step": 44485 }, { "epoch": 0.9639676727406669, "grad_norm": 2.308326482772827, "learning_rate": 6.400156194816598e-08, "loss": 0.234, "step": 44490 }, { "epoch": 0.9640760080601478, "grad_norm": 1.7601664066314697, "learning_rate": 6.361769241845617e-08, "loss": 0.2527, "step": 44495 }, { "epoch": 0.9641843433796287, "grad_norm": 1.799222707748413, "learning_rate": 6.323497386979016e-08, "loss": 0.1499, "step": 44500 }, { "epoch": 0.9642926786991095, "grad_norm": 0.6645825505256653, "learning_rate": 6.285340634650028e-08, "loss": 0.195, "step": 44505 }, { "epoch": 0.9644010140185904, "grad_norm": 1.590179681777954, "learning_rate": 6.247298989278339e-08, "loss": 0.1908, "step": 44510 }, { "epoch": 0.9645093493380712, "grad_norm": 1.556501865386963, "learning_rate": 6.209372455270757e-08, "loss": 0.2518, "step": 44515 }, { "epoch": 0.964617684657552, "grad_norm": 0.9395818114280701, "learning_rate": 6.171561037020324e-08, "loss": 0.1786, "step": 44520 }, { "epoch": 0.9647260199770329, "grad_norm": 1.4433512687683105, "learning_rate": 6.133864738906981e-08, "loss": 0.2888, "step": 44525 }, { "epoch": 0.9648343552965137, "grad_norm": 1.6313401460647583, "learning_rate": 6.096283565297345e-08, "loss": 0.1971, "step": 44530 }, { "epoch": 0.9649426906159946, "grad_norm": 1.4961347579956055, "learning_rate": 6.058817520544601e-08, "loss": 0.1776, "step": 44535 }, { "epoch": 0.9650510259354755, "grad_norm": 0.5524575710296631, "learning_rate": 6.021466608988503e-08, "loss": 0.1589, "step": 44540 }, { "epoch": 0.9651593612549564, "grad_norm": 0.7971532940864563, "learning_rate": 5.984230834955806e-08, "loss": 0.138, "step": 44545 }, { "epoch": 0.9652676965744372, "grad_norm": 0.4715363383293152, "learning_rate": 5.9471102027596204e-08, "loss": 0.1886, "step": 44550 }, { "epoch": 0.9653760318939181, "grad_norm": 1.7406504154205322, "learning_rate": 5.910104716699727e-08, "loss": 0.3089, "step": 44555 }, { "epoch": 0.9654843672133989, "grad_norm": 1.434558629989624, "learning_rate": 5.873214381062808e-08, "loss": 0.272, "step": 44560 }, { "epoch": 0.9655927025328798, "grad_norm": 1.4274940490722656, "learning_rate": 5.8364392001218905e-08, "loss": 0.2153, "step": 44565 }, { "epoch": 0.9657010378523606, "grad_norm": 2.3031184673309326, "learning_rate": 5.7997791781370103e-08, "loss": 0.1557, "step": 44570 }, { "epoch": 0.9658093731718415, "grad_norm": 1.0169129371643066, "learning_rate": 5.763234319354549e-08, "loss": 0.2028, "step": 44575 }, { "epoch": 0.9659177084913223, "grad_norm": 0.9640418887138367, "learning_rate": 5.726804628007787e-08, "loss": 0.2468, "step": 44580 }, { "epoch": 0.9660260438108031, "grad_norm": 1.6022602319717407, "learning_rate": 5.69049010831646e-08, "loss": 0.2595, "step": 44585 }, { "epoch": 0.9661343791302841, "grad_norm": 1.4709141254425049, "learning_rate": 5.6542907644869806e-08, "loss": 0.1842, "step": 44590 }, { "epoch": 0.966242714449765, "grad_norm": 1.339958906173706, "learning_rate": 5.6182066007127724e-08, "loss": 0.2902, "step": 44595 }, { "epoch": 0.9663510497692458, "grad_norm": 1.3137978315353394, "learning_rate": 5.582237621173492e-08, "loss": 0.2713, "step": 44600 }, { "epoch": 0.9664593850887266, "grad_norm": 1.860376238822937, "learning_rate": 5.5463838300355846e-08, "loss": 0.1941, "step": 44605 }, { "epoch": 0.9665677204082075, "grad_norm": 1.498091697692871, "learning_rate": 5.510645231452172e-08, "loss": 0.2458, "step": 44610 }, { "epoch": 0.9666760557276883, "grad_norm": 1.7609153985977173, "learning_rate": 5.475021829563054e-08, "loss": 0.22, "step": 44615 }, { "epoch": 0.9667843910471692, "grad_norm": 1.1600093841552734, "learning_rate": 5.439513628494708e-08, "loss": 0.2643, "step": 44620 }, { "epoch": 0.96689272636665, "grad_norm": 1.2347511053085327, "learning_rate": 5.404120632360177e-08, "loss": 0.1471, "step": 44625 }, { "epoch": 0.9670010616861309, "grad_norm": 2.029334783554077, "learning_rate": 5.3688428452591814e-08, "loss": 0.3398, "step": 44630 }, { "epoch": 0.9671093970056117, "grad_norm": 1.0779179334640503, "learning_rate": 5.33368027127823e-08, "loss": 0.2065, "step": 44635 }, { "epoch": 0.9672177323250927, "grad_norm": 1.6108381748199463, "learning_rate": 5.298632914490176e-08, "loss": 0.2162, "step": 44640 }, { "epoch": 0.9673260676445735, "grad_norm": 3.058647394180298, "learning_rate": 5.263700778955105e-08, "loss": 0.2744, "step": 44645 }, { "epoch": 0.9674344029640544, "grad_norm": 1.6894854307174683, "learning_rate": 5.2288838687190035e-08, "loss": 0.2905, "step": 44650 }, { "epoch": 0.9675427382835352, "grad_norm": 1.630898356437683, "learning_rate": 5.194182187815089e-08, "loss": 0.1564, "step": 44655 }, { "epoch": 0.967651073603016, "grad_norm": 0.9334551692008972, "learning_rate": 5.159595740262924e-08, "loss": 0.1747, "step": 44660 }, { "epoch": 0.9677594089224969, "grad_norm": 1.3084238767623901, "learning_rate": 5.12512453006897e-08, "loss": 0.2263, "step": 44665 }, { "epoch": 0.9678677442419777, "grad_norm": 1.4960070848464966, "learning_rate": 5.090768561226034e-08, "loss": 0.2507, "step": 44670 }, { "epoch": 0.9679760795614586, "grad_norm": 1.377649188041687, "learning_rate": 5.05652783771382e-08, "loss": 0.1773, "step": 44675 }, { "epoch": 0.9680844148809394, "grad_norm": 0.7415347695350647, "learning_rate": 5.022402363498602e-08, "loss": 0.1659, "step": 44680 }, { "epoch": 0.9681927502004204, "grad_norm": 0.9587848782539368, "learning_rate": 4.988392142533327e-08, "loss": 0.2173, "step": 44685 }, { "epoch": 0.9683010855199012, "grad_norm": 1.2912787199020386, "learning_rate": 4.954497178757622e-08, "loss": 0.271, "step": 44690 }, { "epoch": 0.9684094208393821, "grad_norm": 1.052193522453308, "learning_rate": 4.9207174760974586e-08, "loss": 0.2602, "step": 44695 }, { "epoch": 0.9685177561588629, "grad_norm": 1.7843509912490845, "learning_rate": 4.887053038466039e-08, "loss": 0.2831, "step": 44700 }, { "epoch": 0.9686260914783438, "grad_norm": 1.0456784963607788, "learning_rate": 4.853503869762688e-08, "loss": 0.2703, "step": 44705 }, { "epoch": 0.9687344267978246, "grad_norm": 1.5607784986495972, "learning_rate": 4.8200699738736313e-08, "loss": 0.2064, "step": 44710 }, { "epoch": 0.9688427621173055, "grad_norm": 1.3285229206085205, "learning_rate": 4.786751354671659e-08, "loss": 0.228, "step": 44715 }, { "epoch": 0.9689510974367863, "grad_norm": 1.6557236909866333, "learning_rate": 4.753548016016352e-08, "loss": 0.2018, "step": 44720 }, { "epoch": 0.9690594327562672, "grad_norm": 1.1646134853363037, "learning_rate": 4.7204599617535206e-08, "loss": 0.2184, "step": 44725 }, { "epoch": 0.969167768075748, "grad_norm": 1.4984230995178223, "learning_rate": 4.687487195716323e-08, "loss": 0.2043, "step": 44730 }, { "epoch": 0.969276103395229, "grad_norm": 1.6823357343673706, "learning_rate": 4.6546297217238134e-08, "loss": 0.1777, "step": 44735 }, { "epoch": 0.9693844387147098, "grad_norm": 1.2947216033935547, "learning_rate": 4.621887543582171e-08, "loss": 0.2408, "step": 44740 }, { "epoch": 0.9694927740341907, "grad_norm": 1.5413480997085571, "learning_rate": 4.589260665084139e-08, "loss": 0.1845, "step": 44745 }, { "epoch": 0.9696011093536715, "grad_norm": 1.9933260679244995, "learning_rate": 4.5567490900090275e-08, "loss": 0.2339, "step": 44750 }, { "epoch": 0.9697094446731523, "grad_norm": 1.6078622341156006, "learning_rate": 4.524352822122824e-08, "loss": 0.2601, "step": 44755 }, { "epoch": 0.9698177799926332, "grad_norm": 1.4072273969650269, "learning_rate": 4.4920718651779715e-08, "loss": 0.2934, "step": 44760 }, { "epoch": 0.969926115312114, "grad_norm": 1.101863980293274, "learning_rate": 4.4599062229140347e-08, "loss": 0.2166, "step": 44765 }, { "epoch": 0.9700344506315949, "grad_norm": 1.2994377613067627, "learning_rate": 4.427855899056699e-08, "loss": 0.324, "step": 44770 }, { "epoch": 0.9701427859510757, "grad_norm": 1.2731620073318481, "learning_rate": 4.395920897318662e-08, "loss": 0.2453, "step": 44775 }, { "epoch": 0.9702511212705567, "grad_norm": 1.2530461549758911, "learning_rate": 4.364101221398964e-08, "loss": 0.1768, "step": 44780 }, { "epoch": 0.9703594565900375, "grad_norm": 0.982438325881958, "learning_rate": 4.332396874983547e-08, "loss": 0.2296, "step": 44785 }, { "epoch": 0.9704677919095184, "grad_norm": 1.0730913877487183, "learning_rate": 4.3008078617448044e-08, "loss": 0.1855, "step": 44790 }, { "epoch": 0.9705761272289992, "grad_norm": 2.027111291885376, "learning_rate": 4.269334185341922e-08, "loss": 0.1709, "step": 44795 }, { "epoch": 0.9706844625484801, "grad_norm": 0.9276526570320129, "learning_rate": 4.2379758494207615e-08, "loss": 0.2374, "step": 44800 }, { "epoch": 0.9707927978679609, "grad_norm": 1.7691082954406738, "learning_rate": 4.2067328576134156e-08, "loss": 0.2376, "step": 44805 }, { "epoch": 0.9709011331874418, "grad_norm": 1.1689499616622925, "learning_rate": 4.175605213539102e-08, "loss": 0.2419, "step": 44810 }, { "epoch": 0.9710094685069226, "grad_norm": 1.302064299583435, "learning_rate": 4.144592920803603e-08, "loss": 0.1772, "step": 44815 }, { "epoch": 0.9711178038264034, "grad_norm": 1.9012690782546997, "learning_rate": 4.113695982998933e-08, "loss": 0.2125, "step": 44820 }, { "epoch": 0.9712261391458843, "grad_norm": 1.6140450239181519, "learning_rate": 4.082914403704341e-08, "loss": 0.3063, "step": 44825 }, { "epoch": 0.9713344744653652, "grad_norm": 1.0120935440063477, "learning_rate": 4.052248186485197e-08, "loss": 0.1561, "step": 44830 }, { "epoch": 0.9714428097848461, "grad_norm": 1.4592844247817993, "learning_rate": 4.021697334893881e-08, "loss": 0.2052, "step": 44835 }, { "epoch": 0.9715511451043269, "grad_norm": 1.3501302003860474, "learning_rate": 3.9912618524691196e-08, "loss": 0.1486, "step": 44840 }, { "epoch": 0.9716594804238078, "grad_norm": 1.1623393297195435, "learning_rate": 3.960941742736424e-08, "loss": 0.2116, "step": 44845 }, { "epoch": 0.9717678157432886, "grad_norm": 1.7281523942947388, "learning_rate": 3.9307370092080966e-08, "loss": 0.2754, "step": 44850 }, { "epoch": 0.9718761510627695, "grad_norm": 1.7567999362945557, "learning_rate": 3.900647655382561e-08, "loss": 0.224, "step": 44855 }, { "epoch": 0.9719844863822503, "grad_norm": 1.5160114765167236, "learning_rate": 3.870673684745585e-08, "loss": 0.2768, "step": 44860 }, { "epoch": 0.9720928217017312, "grad_norm": 1.7527713775634766, "learning_rate": 3.840815100769058e-08, "loss": 0.2223, "step": 44865 }, { "epoch": 0.972201157021212, "grad_norm": 0.7118023037910461, "learning_rate": 3.8110719069115456e-08, "loss": 0.1907, "step": 44870 }, { "epoch": 0.9723094923406929, "grad_norm": 0.5930436849594116, "learning_rate": 3.7814441066185145e-08, "loss": 0.1926, "step": 44875 }, { "epoch": 0.9724178276601738, "grad_norm": 1.2533140182495117, "learning_rate": 3.751931703321776e-08, "loss": 0.2395, "step": 44880 }, { "epoch": 0.9725261629796547, "grad_norm": 1.4509140253067017, "learning_rate": 3.7225347004400395e-08, "loss": 0.1685, "step": 44885 }, { "epoch": 0.9726344982991355, "grad_norm": 1.0472948551177979, "learning_rate": 3.693253101378358e-08, "loss": 0.2027, "step": 44890 }, { "epoch": 0.9727428336186164, "grad_norm": 0.9590471386909485, "learning_rate": 3.6640869095285745e-08, "loss": 0.2023, "step": 44895 }, { "epoch": 0.9728511689380972, "grad_norm": 1.4355548620224, "learning_rate": 3.63503612826932e-08, "loss": 0.2847, "step": 44900 }, { "epoch": 0.972959504257578, "grad_norm": 1.669304370880127, "learning_rate": 3.6061007609655696e-08, "loss": 0.2656, "step": 44905 }, { "epoch": 0.9730678395770589, "grad_norm": 0.9108738899230957, "learning_rate": 3.577280810968975e-08, "loss": 0.1879, "step": 44910 }, { "epoch": 0.9731761748965397, "grad_norm": 2.1939878463745117, "learning_rate": 3.548576281618088e-08, "loss": 0.3624, "step": 44915 }, { "epoch": 0.9732845102160206, "grad_norm": 0.9974406361579895, "learning_rate": 3.5199871762376934e-08, "loss": 0.2054, "step": 44920 }, { "epoch": 0.9733928455355015, "grad_norm": 1.0147185325622559, "learning_rate": 3.491513498139587e-08, "loss": 0.1803, "step": 44925 }, { "epoch": 0.9735011808549824, "grad_norm": 1.8863025903701782, "learning_rate": 3.463155250621908e-08, "loss": 0.159, "step": 44930 }, { "epoch": 0.9736095161744632, "grad_norm": 1.2348638772964478, "learning_rate": 3.434912436969584e-08, "loss": 0.2664, "step": 44935 }, { "epoch": 0.9737178514939441, "grad_norm": 1.3852134943008423, "learning_rate": 3.406785060454221e-08, "loss": 0.1386, "step": 44940 }, { "epoch": 0.9738261868134249, "grad_norm": 1.4480928182601929, "learning_rate": 3.3787731243336566e-08, "loss": 0.2142, "step": 44945 }, { "epoch": 0.9739345221329058, "grad_norm": 1.4392889738082886, "learning_rate": 3.3508766318529616e-08, "loss": 0.172, "step": 44950 }, { "epoch": 0.9740428574523866, "grad_norm": 1.3692651987075806, "learning_rate": 3.323095586243441e-08, "loss": 0.2131, "step": 44955 }, { "epoch": 0.9741511927718675, "grad_norm": 0.9577040672302246, "learning_rate": 3.2954299907229644e-08, "loss": 0.2474, "step": 44960 }, { "epoch": 0.9742595280913483, "grad_norm": 1.3291572332382202, "learning_rate": 3.267879848496303e-08, "loss": 0.1327, "step": 44965 }, { "epoch": 0.9743678634108291, "grad_norm": 1.7716989517211914, "learning_rate": 3.240445162754791e-08, "loss": 0.1462, "step": 44970 }, { "epoch": 0.9744761987303101, "grad_norm": 1.2002277374267578, "learning_rate": 3.21312593667622e-08, "loss": 0.2694, "step": 44975 }, { "epoch": 0.974584534049791, "grad_norm": 1.390627145767212, "learning_rate": 3.18592217342506e-08, "loss": 0.1679, "step": 44980 }, { "epoch": 0.9746928693692718, "grad_norm": 1.612674355506897, "learning_rate": 3.1588338761526784e-08, "loss": 0.1991, "step": 44985 }, { "epoch": 0.9748012046887526, "grad_norm": 0.4351104497909546, "learning_rate": 3.131861047996676e-08, "loss": 0.2132, "step": 44990 }, { "epoch": 0.9749095400082335, "grad_norm": 1.4158021211624146, "learning_rate": 3.105003692081443e-08, "loss": 0.2154, "step": 44995 }, { "epoch": 0.9750178753277143, "grad_norm": 2.3336269855499268, "learning_rate": 3.078261811518046e-08, "loss": 0.1812, "step": 45000 }, { "epoch": 0.9751262106471952, "grad_norm": 3.6860103607177734, "learning_rate": 3.051635409404119e-08, "loss": 0.274, "step": 45005 }, { "epoch": 0.975234545966676, "grad_norm": 1.4642060995101929, "learning_rate": 3.025124488823972e-08, "loss": 0.2541, "step": 45010 }, { "epoch": 0.9753428812861569, "grad_norm": 1.17422354221344, "learning_rate": 2.9987290528484815e-08, "loss": 0.1586, "step": 45015 }, { "epoch": 0.9754512166056377, "grad_norm": 1.1621614694595337, "learning_rate": 2.9724491045352023e-08, "loss": 0.1452, "step": 45020 }, { "epoch": 0.9755595519251187, "grad_norm": 0.8434295058250427, "learning_rate": 2.9462846469281437e-08, "loss": 0.1694, "step": 45025 }, { "epoch": 0.9756678872445995, "grad_norm": 1.927372694015503, "learning_rate": 2.9202356830582145e-08, "loss": 0.1633, "step": 45030 }, { "epoch": 0.9757762225640804, "grad_norm": 1.6244837045669556, "learning_rate": 2.894302215942557e-08, "loss": 0.2254, "step": 45035 }, { "epoch": 0.9758845578835612, "grad_norm": 1.1371666193008423, "learning_rate": 2.8684842485855456e-08, "loss": 0.181, "step": 45040 }, { "epoch": 0.9759928932030421, "grad_norm": 1.632996916770935, "learning_rate": 2.8427817839774553e-08, "loss": 0.2305, "step": 45045 }, { "epoch": 0.9761012285225229, "grad_norm": 0.8338512778282166, "learning_rate": 2.8171948250957925e-08, "loss": 0.2097, "step": 45050 }, { "epoch": 0.9762095638420037, "grad_norm": 1.5351735353469849, "learning_rate": 2.7917233749042984e-08, "loss": 0.2272, "step": 45055 }, { "epoch": 0.9763178991614846, "grad_norm": 1.4077574014663696, "learning_rate": 2.76636743635339e-08, "loss": 0.1841, "step": 45060 }, { "epoch": 0.9764262344809654, "grad_norm": 1.109689474105835, "learning_rate": 2.7411270123803847e-08, "loss": 0.2592, "step": 45065 }, { "epoch": 0.9765345698004464, "grad_norm": 1.45572030544281, "learning_rate": 2.7160021059087215e-08, "loss": 0.2925, "step": 45070 }, { "epoch": 0.9766429051199272, "grad_norm": 1.8437707424163818, "learning_rate": 2.6909927198490727e-08, "loss": 0.2714, "step": 45075 }, { "epoch": 0.9767512404394081, "grad_norm": 1.375505805015564, "learning_rate": 2.6660988570981206e-08, "loss": 0.212, "step": 45080 }, { "epoch": 0.9768595757588889, "grad_norm": 1.521552562713623, "learning_rate": 2.6413205205395586e-08, "loss": 0.2289, "step": 45085 }, { "epoch": 0.9769679110783698, "grad_norm": 1.7024385929107666, "learning_rate": 2.6166577130436466e-08, "loss": 0.3333, "step": 45090 }, { "epoch": 0.9770762463978506, "grad_norm": 1.3405566215515137, "learning_rate": 2.592110437466988e-08, "loss": 0.3508, "step": 45095 }, { "epoch": 0.9771845817173315, "grad_norm": 1.5131981372833252, "learning_rate": 2.5676786966533084e-08, "loss": 0.1931, "step": 45100 }, { "epoch": 0.9772929170368123, "grad_norm": 1.0862390995025635, "learning_rate": 2.5433624934324554e-08, "loss": 0.1725, "step": 45105 }, { "epoch": 0.9774012523562932, "grad_norm": 1.0875300168991089, "learning_rate": 2.519161830621175e-08, "loss": 0.2784, "step": 45110 }, { "epoch": 0.977509587675774, "grad_norm": 1.984606385231018, "learning_rate": 2.4950767110227813e-08, "loss": 0.3264, "step": 45115 }, { "epoch": 0.977617922995255, "grad_norm": 0.5002038478851318, "learning_rate": 2.4711071374270423e-08, "loss": 0.1678, "step": 45120 }, { "epoch": 0.9777262583147358, "grad_norm": 1.7277394533157349, "learning_rate": 2.4472531126106258e-08, "loss": 0.2875, "step": 45125 }, { "epoch": 0.9778345936342167, "grad_norm": 1.6097867488861084, "learning_rate": 2.4235146393365438e-08, "loss": 0.2043, "step": 45130 }, { "epoch": 0.9779429289536975, "grad_norm": 1.3395907878875732, "learning_rate": 2.399891720354597e-08, "loss": 0.2593, "step": 45135 }, { "epoch": 0.9780512642731783, "grad_norm": 1.0417582988739014, "learning_rate": 2.3763843584011515e-08, "loss": 0.1492, "step": 45140 }, { "epoch": 0.9781595995926592, "grad_norm": 1.8048323392868042, "learning_rate": 2.3529925561992516e-08, "loss": 0.1997, "step": 45145 }, { "epoch": 0.97826793491214, "grad_norm": 1.3356884717941284, "learning_rate": 2.329716316458397e-08, "loss": 0.2781, "step": 45150 }, { "epoch": 0.9783762702316209, "grad_norm": 1.1708173751831055, "learning_rate": 2.3065556418747635e-08, "loss": 0.2912, "step": 45155 }, { "epoch": 0.9784846055511017, "grad_norm": 1.2175639867782593, "learning_rate": 2.283510535131206e-08, "loss": 0.2337, "step": 45160 }, { "epoch": 0.9785929408705826, "grad_norm": 1.3951319456100464, "learning_rate": 2.260580998897144e-08, "loss": 0.2188, "step": 45165 }, { "epoch": 0.9787012761900635, "grad_norm": 1.3771493434906006, "learning_rate": 2.2377670358286752e-08, "loss": 0.1664, "step": 45170 }, { "epoch": 0.9788096115095444, "grad_norm": 1.0968724489212036, "learning_rate": 2.2150686485683527e-08, "loss": 0.2078, "step": 45175 }, { "epoch": 0.9789179468290252, "grad_norm": 1.2280231714248657, "learning_rate": 2.1924858397456285e-08, "loss": 0.2113, "step": 45180 }, { "epoch": 0.9790262821485061, "grad_norm": 1.43962824344635, "learning_rate": 2.1700186119761878e-08, "loss": 0.2283, "step": 45185 }, { "epoch": 0.9791346174679869, "grad_norm": 1.2828043699264526, "learning_rate": 2.1476669678626162e-08, "loss": 0.266, "step": 45190 }, { "epoch": 0.9792429527874678, "grad_norm": 2.091651201248169, "learning_rate": 2.125430909993953e-08, "loss": 0.2747, "step": 45195 }, { "epoch": 0.9793512881069486, "grad_norm": 1.8798521757125854, "learning_rate": 2.103310440946027e-08, "loss": 0.262, "step": 45200 }, { "epoch": 0.9794596234264294, "grad_norm": 1.4208229780197144, "learning_rate": 2.0813055632811218e-08, "loss": 0.1907, "step": 45205 }, { "epoch": 0.9795679587459103, "grad_norm": 1.8804086446762085, "learning_rate": 2.0594162795480875e-08, "loss": 0.2021, "step": 45210 }, { "epoch": 0.9796762940653913, "grad_norm": 1.5021512508392334, "learning_rate": 2.0376425922824515e-08, "loss": 0.2375, "step": 45215 }, { "epoch": 0.9797846293848721, "grad_norm": 0.9616076350212097, "learning_rate": 2.0159845040065297e-08, "loss": 0.1544, "step": 45220 }, { "epoch": 0.9798929647043529, "grad_norm": 1.4867527484893799, "learning_rate": 1.9944420172290923e-08, "loss": 0.2097, "step": 45225 }, { "epoch": 0.9800013000238338, "grad_norm": 1.398067593574524, "learning_rate": 1.9730151344452553e-08, "loss": 0.1796, "step": 45230 }, { "epoch": 0.9801096353433146, "grad_norm": 1.3706114292144775, "learning_rate": 1.951703858137255e-08, "loss": 0.2566, "step": 45235 }, { "epoch": 0.9802179706627955, "grad_norm": 1.543527603149414, "learning_rate": 1.9305081907735612e-08, "loss": 0.1587, "step": 45240 }, { "epoch": 0.9803263059822763, "grad_norm": 1.4972401857376099, "learning_rate": 1.9094281348095435e-08, "loss": 0.2284, "step": 45245 }, { "epoch": 0.9804346413017572, "grad_norm": 1.893064022064209, "learning_rate": 1.8884636926866927e-08, "loss": 0.2422, "step": 45250 }, { "epoch": 0.980542976621238, "grad_norm": 1.8529834747314453, "learning_rate": 1.8676148668337333e-08, "loss": 0.1922, "step": 45255 }, { "epoch": 0.9806513119407189, "grad_norm": 1.0888338088989258, "learning_rate": 1.8468816596656224e-08, "loss": 0.2193, "step": 45260 }, { "epoch": 0.9807596472601998, "grad_norm": 0.7366431355476379, "learning_rate": 1.826264073583772e-08, "loss": 0.2321, "step": 45265 }, { "epoch": 0.9808679825796807, "grad_norm": 1.3993239402770996, "learning_rate": 1.8057621109767165e-08, "loss": 0.2568, "step": 45270 }, { "epoch": 0.9809763178991615, "grad_norm": 1.2677206993103027, "learning_rate": 1.7853757742191114e-08, "loss": 0.1965, "step": 45275 }, { "epoch": 0.9810846532186424, "grad_norm": 1.5503184795379639, "learning_rate": 1.765105065672512e-08, "loss": 0.2199, "step": 45280 }, { "epoch": 0.9811929885381232, "grad_norm": 1.0194469690322876, "learning_rate": 1.7449499876848186e-08, "loss": 0.2714, "step": 45285 }, { "epoch": 0.981301323857604, "grad_norm": 1.6203603744506836, "learning_rate": 1.7249105425909406e-08, "loss": 0.2367, "step": 45290 }, { "epoch": 0.9814096591770849, "grad_norm": 2.172438859939575, "learning_rate": 1.7049867327120218e-08, "loss": 0.1641, "step": 45295 }, { "epoch": 0.9815179944965657, "grad_norm": 1.6065202951431274, "learning_rate": 1.6851785603558824e-08, "loss": 0.2228, "step": 45300 }, { "epoch": 0.9816263298160466, "grad_norm": 1.3887102603912354, "learning_rate": 1.6654860278170203e-08, "loss": 0.1855, "step": 45305 }, { "epoch": 0.9817346651355275, "grad_norm": 1.7688676118850708, "learning_rate": 1.645909137376611e-08, "loss": 0.2922, "step": 45310 }, { "epoch": 0.9818430004550084, "grad_norm": 1.178482174873352, "learning_rate": 1.6264478913021743e-08, "loss": 0.2276, "step": 45315 }, { "epoch": 0.9819513357744892, "grad_norm": 2.216535806655884, "learning_rate": 1.6071022918482392e-08, "loss": 0.3126, "step": 45320 }, { "epoch": 0.9820596710939701, "grad_norm": 1.6138921976089478, "learning_rate": 1.5878723412555696e-08, "loss": 0.1778, "step": 45325 }, { "epoch": 0.9821680064134509, "grad_norm": 1.1920549869537354, "learning_rate": 1.568758041751606e-08, "loss": 0.3425, "step": 45330 }, { "epoch": 0.9822763417329318, "grad_norm": 1.3570258617401123, "learning_rate": 1.5497593955505763e-08, "loss": 0.264, "step": 45335 }, { "epoch": 0.9823846770524126, "grad_norm": 0.8097262382507324, "learning_rate": 1.5308764048531655e-08, "loss": 0.2063, "step": 45340 }, { "epoch": 0.9824930123718935, "grad_norm": 1.6534470319747925, "learning_rate": 1.5121090718466236e-08, "loss": 0.1855, "step": 45345 }, { "epoch": 0.9826013476913743, "grad_norm": 2.150114059448242, "learning_rate": 1.4934573987048784e-08, "loss": 0.3212, "step": 45350 }, { "epoch": 0.9827096830108552, "grad_norm": 1.2860639095306396, "learning_rate": 1.4749213875884238e-08, "loss": 0.1232, "step": 45355 }, { "epoch": 0.9828180183303361, "grad_norm": 2.007425308227539, "learning_rate": 1.4565010406444313e-08, "loss": 0.1834, "step": 45360 }, { "epoch": 0.982926353649817, "grad_norm": 0.8051698803901672, "learning_rate": 1.4381963600066384e-08, "loss": 0.218, "step": 45365 }, { "epoch": 0.9830346889692978, "grad_norm": 1.4228217601776123, "learning_rate": 1.4200073477954601e-08, "loss": 0.1208, "step": 45370 }, { "epoch": 0.9831430242887786, "grad_norm": 1.1234081983566284, "learning_rate": 1.4019340061175446e-08, "loss": 0.2534, "step": 45375 }, { "epoch": 0.9832513596082595, "grad_norm": 1.165818691253662, "learning_rate": 1.3839763370666615e-08, "loss": 0.2063, "step": 45380 }, { "epoch": 0.9833596949277403, "grad_norm": 1.6362476348876953, "learning_rate": 1.3661343427228135e-08, "loss": 0.2189, "step": 45385 }, { "epoch": 0.9834680302472212, "grad_norm": 1.3432672023773193, "learning_rate": 1.348408025152792e-08, "loss": 0.157, "step": 45390 }, { "epoch": 0.983576365566702, "grad_norm": 1.6383308172225952, "learning_rate": 1.3307973864099543e-08, "loss": 0.1904, "step": 45395 }, { "epoch": 0.9836847008861829, "grad_norm": 1.6520475149154663, "learning_rate": 1.3133024285340024e-08, "loss": 0.2238, "step": 45400 }, { "epoch": 0.9837930362056637, "grad_norm": 1.5399360656738281, "learning_rate": 1.2959231535517592e-08, "loss": 0.1977, "step": 45405 }, { "epoch": 0.9839013715251447, "grad_norm": 1.2559893131256104, "learning_rate": 1.278659563476281e-08, "loss": 0.2383, "step": 45410 }, { "epoch": 0.9840097068446255, "grad_norm": 1.4554336071014404, "learning_rate": 1.2615116603071909e-08, "loss": 0.2292, "step": 45415 }, { "epoch": 0.9841180421641064, "grad_norm": 1.1102107763290405, "learning_rate": 1.2444794460307884e-08, "loss": 0.2465, "step": 45420 }, { "epoch": 0.9842263774835872, "grad_norm": 1.1896299123764038, "learning_rate": 1.2275629226201624e-08, "loss": 0.1854, "step": 45425 }, { "epoch": 0.9843347128030681, "grad_norm": 2.098146677017212, "learning_rate": 1.2107620920348562e-08, "loss": 0.256, "step": 45430 }, { "epoch": 0.9844430481225489, "grad_norm": 1.121016263961792, "learning_rate": 1.1940769562207577e-08, "loss": 0.1997, "step": 45435 }, { "epoch": 0.9845513834420297, "grad_norm": 1.3962522745132446, "learning_rate": 1.1775075171107652e-08, "loss": 0.2127, "step": 45440 }, { "epoch": 0.9846597187615106, "grad_norm": 1.4471409320831299, "learning_rate": 1.1610537766242325e-08, "loss": 0.2243, "step": 45445 }, { "epoch": 0.9847680540809914, "grad_norm": 1.554612636566162, "learning_rate": 1.144715736666968e-08, "loss": 0.1332, "step": 45450 }, { "epoch": 0.9848763894004724, "grad_norm": 1.6041709184646606, "learning_rate": 1.1284933991314584e-08, "loss": 0.286, "step": 45455 }, { "epoch": 0.9849847247199532, "grad_norm": 1.1202701330184937, "learning_rate": 1.1123867658969779e-08, "loss": 0.2696, "step": 45460 }, { "epoch": 0.9850930600394341, "grad_norm": 1.385934829711914, "learning_rate": 1.0963958388291452e-08, "loss": 0.2024, "step": 45465 }, { "epoch": 0.9852013953589149, "grad_norm": 0.7445408701896667, "learning_rate": 1.0805206197802564e-08, "loss": 0.2287, "step": 45470 }, { "epoch": 0.9853097306783958, "grad_norm": 0.9167163372039795, "learning_rate": 1.0647611105892852e-08, "loss": 0.1671, "step": 45475 }, { "epoch": 0.9854180659978766, "grad_norm": 2.3028762340545654, "learning_rate": 1.04911731308166e-08, "loss": 0.1966, "step": 45480 }, { "epoch": 0.9855264013173575, "grad_norm": 1.107704997062683, "learning_rate": 1.0335892290695981e-08, "loss": 0.2486, "step": 45485 }, { "epoch": 0.9856347366368383, "grad_norm": 1.2531319856643677, "learning_rate": 1.0181768603515497e-08, "loss": 0.1449, "step": 45490 }, { "epoch": 0.9857430719563192, "grad_norm": 1.3859481811523438, "learning_rate": 1.0028802087130863e-08, "loss": 0.2245, "step": 45495 }, { "epoch": 0.9858514072758, "grad_norm": 1.7271965742111206, "learning_rate": 9.876992759259018e-09, "loss": 0.3331, "step": 45500 }, { "epoch": 0.985959742595281, "grad_norm": 0.9460235834121704, "learning_rate": 9.726340637485898e-09, "loss": 0.1809, "step": 45505 }, { "epoch": 0.9860680779147618, "grad_norm": 1.2908066511154175, "learning_rate": 9.576845739261986e-09, "loss": 0.2093, "step": 45510 }, { "epoch": 0.9861764132342427, "grad_norm": 1.737354040145874, "learning_rate": 9.42850808190343e-09, "loss": 0.2026, "step": 45515 }, { "epoch": 0.9862847485537235, "grad_norm": 1.0573875904083252, "learning_rate": 9.281327682594266e-09, "loss": 0.2377, "step": 45520 }, { "epoch": 0.9863930838732043, "grad_norm": 1.6827138662338257, "learning_rate": 9.135304558381962e-09, "loss": 0.3061, "step": 45525 }, { "epoch": 0.9865014191926852, "grad_norm": 0.6512291431427002, "learning_rate": 8.990438726181883e-09, "loss": 0.2023, "step": 45530 }, { "epoch": 0.986609754512166, "grad_norm": 1.2884312868118286, "learning_rate": 8.846730202772823e-09, "loss": 0.1859, "step": 45535 }, { "epoch": 0.9867180898316469, "grad_norm": 0.9533786773681641, "learning_rate": 8.704179004803691e-09, "loss": 0.1646, "step": 45540 }, { "epoch": 0.9868264251511277, "grad_norm": 0.9188399314880371, "learning_rate": 8.562785148785724e-09, "loss": 0.296, "step": 45545 }, { "epoch": 0.9869347604706086, "grad_norm": 1.2296253442764282, "learning_rate": 8.422548651098039e-09, "loss": 0.1901, "step": 45550 }, { "epoch": 0.9870430957900895, "grad_norm": 0.9839481711387634, "learning_rate": 8.2834695279832e-09, "loss": 0.1357, "step": 45555 }, { "epoch": 0.9871514311095704, "grad_norm": 1.5977394580841064, "learning_rate": 8.145547795552766e-09, "loss": 0.2566, "step": 45560 }, { "epoch": 0.9872597664290512, "grad_norm": 0.6160488724708557, "learning_rate": 8.008783469782844e-09, "loss": 0.19, "step": 45565 }, { "epoch": 0.9873681017485321, "grad_norm": 1.4046883583068848, "learning_rate": 7.873176566515206e-09, "loss": 0.3051, "step": 45570 }, { "epoch": 0.9874764370680129, "grad_norm": 1.7855334281921387, "learning_rate": 7.738727101457288e-09, "loss": 0.2065, "step": 45575 }, { "epoch": 0.9875847723874938, "grad_norm": 1.5824205875396729, "learning_rate": 7.60543509018552e-09, "loss": 0.2192, "step": 45580 }, { "epoch": 0.9876931077069746, "grad_norm": 1.459255576133728, "learning_rate": 7.473300548136442e-09, "loss": 0.2367, "step": 45585 }, { "epoch": 0.9878014430264555, "grad_norm": 1.1488395929336548, "learning_rate": 7.342323490617808e-09, "loss": 0.2246, "step": 45590 }, { "epoch": 0.9879097783459363, "grad_norm": 0.875965416431427, "learning_rate": 7.212503932801929e-09, "loss": 0.2874, "step": 45595 }, { "epoch": 0.9880181136654173, "grad_norm": 1.7967857122421265, "learning_rate": 7.083841889724552e-09, "loss": 0.2167, "step": 45600 }, { "epoch": 0.9881264489848981, "grad_norm": 2.309623956680298, "learning_rate": 6.956337376290423e-09, "loss": 0.3154, "step": 45605 }, { "epoch": 0.9882347843043789, "grad_norm": 0.8944942951202393, "learning_rate": 6.829990407268838e-09, "loss": 0.1841, "step": 45610 }, { "epoch": 0.9883431196238598, "grad_norm": 1.6030638217926025, "learning_rate": 6.7048009972947585e-09, "loss": 0.1667, "step": 45615 }, { "epoch": 0.9884514549433406, "grad_norm": 1.3076956272125244, "learning_rate": 6.5807691608710255e-09, "loss": 0.1734, "step": 45620 }, { "epoch": 0.9885597902628215, "grad_norm": 1.285643219947815, "learning_rate": 6.457894912362817e-09, "loss": 0.1716, "step": 45625 }, { "epoch": 0.9886681255823023, "grad_norm": 1.670290231704712, "learning_rate": 6.336178266004301e-09, "loss": 0.2174, "step": 45630 }, { "epoch": 0.9887764609017832, "grad_norm": 1.5312683582305908, "learning_rate": 6.215619235895309e-09, "loss": 0.2298, "step": 45635 }, { "epoch": 0.988884796221264, "grad_norm": 1.8492343425750732, "learning_rate": 6.096217835999119e-09, "loss": 0.2058, "step": 45640 }, { "epoch": 0.9889931315407449, "grad_norm": 1.0191782712936401, "learning_rate": 5.977974080147997e-09, "loss": 0.2552, "step": 45645 }, { "epoch": 0.9891014668602258, "grad_norm": 1.174689769744873, "learning_rate": 5.860887982037655e-09, "loss": 0.1999, "step": 45650 }, { "epoch": 0.9892098021797067, "grad_norm": 0.9247663021087646, "learning_rate": 5.744959555231688e-09, "loss": 0.2038, "step": 45655 }, { "epoch": 0.9893181374991875, "grad_norm": 1.6744626760482788, "learning_rate": 5.6301888131571336e-09, "loss": 0.2147, "step": 45660 }, { "epoch": 0.9894264728186684, "grad_norm": 1.732948899269104, "learning_rate": 5.516575769111132e-09, "loss": 0.2611, "step": 45665 }, { "epoch": 0.9895348081381492, "grad_norm": 1.1644155979156494, "learning_rate": 5.404120436250937e-09, "loss": 0.2916, "step": 45670 }, { "epoch": 0.98964314345763, "grad_norm": 1.1642056703567505, "learning_rate": 5.292822827605015e-09, "loss": 0.2339, "step": 45675 }, { "epoch": 0.9897514787771109, "grad_norm": 1.2346727848052979, "learning_rate": 5.182682956065277e-09, "loss": 0.2672, "step": 45680 }, { "epoch": 0.9898598140965917, "grad_norm": 1.1715890169143677, "learning_rate": 5.073700834389295e-09, "loss": 0.1679, "step": 45685 }, { "epoch": 0.9899681494160726, "grad_norm": 1.8580323457717896, "learning_rate": 4.965876475201415e-09, "loss": 0.1761, "step": 45690 }, { "epoch": 0.9900764847355534, "grad_norm": 2.1310198307037354, "learning_rate": 4.859209890990535e-09, "loss": 0.2262, "step": 45695 }, { "epoch": 0.9901848200550344, "grad_norm": 1.417580246925354, "learning_rate": 4.753701094112328e-09, "loss": 0.2474, "step": 45700 }, { "epoch": 0.9902931553745152, "grad_norm": 1.6925530433654785, "learning_rate": 4.6493500967903505e-09, "loss": 0.2606, "step": 45705 }, { "epoch": 0.9904014906939961, "grad_norm": 0.986320972442627, "learning_rate": 4.546156911109378e-09, "loss": 0.2041, "step": 45710 }, { "epoch": 0.9905098260134769, "grad_norm": 1.2937912940979004, "learning_rate": 4.444121549025404e-09, "loss": 0.2285, "step": 45715 }, { "epoch": 0.9906181613329578, "grad_norm": 1.555869698524475, "learning_rate": 4.343244022356752e-09, "loss": 0.3585, "step": 45720 }, { "epoch": 0.9907264966524386, "grad_norm": 1.2144408226013184, "learning_rate": 4.243524342787408e-09, "loss": 0.2108, "step": 45725 }, { "epoch": 0.9908348319719195, "grad_norm": 0.7202710509300232, "learning_rate": 4.144962521869245e-09, "loss": 0.2516, "step": 45730 }, { "epoch": 0.9909431672914003, "grad_norm": 2.3583872318267822, "learning_rate": 4.047558571020904e-09, "loss": 0.2058, "step": 45735 }, { "epoch": 0.9910515026108812, "grad_norm": 1.3158155679702759, "learning_rate": 3.9513125015222535e-09, "loss": 0.1856, "step": 45740 }, { "epoch": 0.9911598379303621, "grad_norm": 1.002907633781433, "learning_rate": 3.856224324523261e-09, "loss": 0.1124, "step": 45745 }, { "epoch": 0.991268173249843, "grad_norm": 1.7858566045761108, "learning_rate": 3.762294051038451e-09, "loss": 0.1818, "step": 45750 }, { "epoch": 0.9913765085693238, "grad_norm": 1.2130054235458374, "learning_rate": 3.669521691948008e-09, "loss": 0.1409, "step": 45755 }, { "epoch": 0.9914848438888046, "grad_norm": 0.5909806489944458, "learning_rate": 3.5779072579988916e-09, "loss": 0.2748, "step": 45760 }, { "epoch": 0.9915931792082855, "grad_norm": 1.2998523712158203, "learning_rate": 3.487450759802613e-09, "loss": 0.1779, "step": 45765 }, { "epoch": 0.9917015145277663, "grad_norm": 1.3978965282440186, "learning_rate": 3.3981522078385676e-09, "loss": 0.1873, "step": 45770 }, { "epoch": 0.9918098498472472, "grad_norm": 1.8996409177780151, "learning_rate": 3.3100116124484825e-09, "loss": 0.2094, "step": 45775 }, { "epoch": 0.991918185166728, "grad_norm": 2.152578353881836, "learning_rate": 3.2230289838430797e-09, "loss": 0.3174, "step": 45780 }, { "epoch": 0.9920265204862089, "grad_norm": 1.3234155178070068, "learning_rate": 3.137204332097632e-09, "loss": 0.2215, "step": 45785 }, { "epoch": 0.9921348558056897, "grad_norm": 2.1195943355560303, "learning_rate": 3.0525376671552974e-09, "loss": 0.1996, "step": 45790 }, { "epoch": 0.9922431911251707, "grad_norm": 1.5684727430343628, "learning_rate": 2.969028998821566e-09, "loss": 0.2435, "step": 45795 }, { "epoch": 0.9923515264446515, "grad_norm": 1.151090145111084, "learning_rate": 2.8866783367698103e-09, "loss": 0.1408, "step": 45800 }, { "epoch": 0.9924598617641324, "grad_norm": 1.2516694068908691, "learning_rate": 2.805485690540177e-09, "loss": 0.235, "step": 45805 }, { "epoch": 0.9925681970836132, "grad_norm": 2.1145315170288086, "learning_rate": 2.7254510695362556e-09, "loss": 0.2378, "step": 45810 }, { "epoch": 0.9926765324030941, "grad_norm": 1.2147233486175537, "learning_rate": 2.6465744830306285e-09, "loss": 0.195, "step": 45815 }, { "epoch": 0.9927848677225749, "grad_norm": 1.3403856754302979, "learning_rate": 2.5688559401593206e-09, "loss": 0.1546, "step": 45820 }, { "epoch": 0.9928932030420557, "grad_norm": 2.1614878177642822, "learning_rate": 2.4922954499240205e-09, "loss": 0.2258, "step": 45825 }, { "epoch": 0.9930015383615366, "grad_norm": 1.6809661388397217, "learning_rate": 2.4168930211931896e-09, "loss": 0.1757, "step": 45830 }, { "epoch": 0.9931098736810174, "grad_norm": 1.8576570749282837, "learning_rate": 2.342648662702063e-09, "loss": 0.1721, "step": 45835 }, { "epoch": 0.9932182090004984, "grad_norm": 1.7692710161209106, "learning_rate": 2.2695623830504275e-09, "loss": 0.2147, "step": 45840 }, { "epoch": 0.9933265443199792, "grad_norm": 0.8664883971214294, "learning_rate": 2.1976341907026244e-09, "loss": 0.183, "step": 45845 }, { "epoch": 0.9934348796394601, "grad_norm": 0.8299692869186401, "learning_rate": 2.1268640939930974e-09, "loss": 0.214, "step": 45850 }, { "epoch": 0.9935432149589409, "grad_norm": 1.1634637117385864, "learning_rate": 2.057252101118623e-09, "loss": 0.227, "step": 45855 }, { "epoch": 0.9936515502784218, "grad_norm": 0.8784791827201843, "learning_rate": 1.988798220141641e-09, "loss": 0.2891, "step": 45860 }, { "epoch": 0.9937598855979026, "grad_norm": 1.2088518142700195, "learning_rate": 1.9215024589913643e-09, "loss": 0.1104, "step": 45865 }, { "epoch": 0.9938682209173835, "grad_norm": 1.458077311515808, "learning_rate": 1.8553648254648893e-09, "loss": 0.1853, "step": 45870 }, { "epoch": 0.9939765562368643, "grad_norm": 0.7842757105827332, "learning_rate": 1.7903853272216442e-09, "loss": 0.2432, "step": 45875 }, { "epoch": 0.9940848915563452, "grad_norm": 1.6117695569992065, "learning_rate": 1.7265639717900517e-09, "loss": 0.1987, "step": 45880 }, { "epoch": 0.994193226875826, "grad_norm": 1.790738582611084, "learning_rate": 1.6639007665608665e-09, "loss": 0.2146, "step": 45885 }, { "epoch": 0.994301562195307, "grad_norm": 1.912838339805603, "learning_rate": 1.6023957187938366e-09, "loss": 0.2092, "step": 45890 }, { "epoch": 0.9944098975147878, "grad_norm": 0.3590981960296631, "learning_rate": 1.5420488356143737e-09, "loss": 0.1698, "step": 45895 }, { "epoch": 0.9945182328342687, "grad_norm": 2.6616082191467285, "learning_rate": 1.4828601240102213e-09, "loss": 0.2504, "step": 45900 }, { "epoch": 0.9946265681537495, "grad_norm": 0.9964662194252014, "learning_rate": 1.4248295908392274e-09, "loss": 0.238, "step": 45905 }, { "epoch": 0.9947349034732303, "grad_norm": 1.7008814811706543, "learning_rate": 1.367957242823792e-09, "loss": 0.2471, "step": 45910 }, { "epoch": 0.9948432387927112, "grad_norm": 1.515289068222046, "learning_rate": 1.3122430865508683e-09, "loss": 0.2006, "step": 45915 }, { "epoch": 0.994951574112192, "grad_norm": 1.2257957458496094, "learning_rate": 1.2576871284741831e-09, "loss": 0.2926, "step": 45920 }, { "epoch": 0.9950599094316729, "grad_norm": 1.3933396339416504, "learning_rate": 1.2042893749131258e-09, "loss": 0.235, "step": 45925 }, { "epoch": 0.9951682447511537, "grad_norm": 1.3439602851867676, "learning_rate": 1.1520498320527484e-09, "loss": 0.245, "step": 45930 }, { "epoch": 0.9952765800706346, "grad_norm": 1.1394712924957275, "learning_rate": 1.100968505944877e-09, "loss": 0.0943, "step": 45935 }, { "epoch": 0.9953849153901155, "grad_norm": 1.325989007949829, "learning_rate": 1.0510454025070006e-09, "loss": 0.1814, "step": 45940 }, { "epoch": 0.9954932507095964, "grad_norm": 1.8333858251571655, "learning_rate": 1.00228052752116e-09, "loss": 0.2868, "step": 45945 }, { "epoch": 0.9956015860290772, "grad_norm": 1.656977891921997, "learning_rate": 9.546738866350602e-10, "loss": 0.3063, "step": 45950 }, { "epoch": 0.9957099213485581, "grad_norm": 1.490906834602356, "learning_rate": 9.082254853653993e-10, "loss": 0.2491, "step": 45955 }, { "epoch": 0.9958182566680389, "grad_norm": 1.0023179054260254, "learning_rate": 8.629353290912079e-10, "loss": 0.2459, "step": 45960 }, { "epoch": 0.9959265919875198, "grad_norm": 1.5325068235397339, "learning_rate": 8.188034230582897e-10, "loss": 0.1917, "step": 45965 }, { "epoch": 0.9960349273070006, "grad_norm": 1.8970452547073364, "learning_rate": 7.758297723803321e-10, "loss": 0.2336, "step": 45970 }, { "epoch": 0.9961432626264815, "grad_norm": 2.007992744445801, "learning_rate": 7.340143820333545e-10, "loss": 0.1923, "step": 45975 }, { "epoch": 0.9962515979459623, "grad_norm": 1.8528732061386108, "learning_rate": 6.933572568612601e-10, "loss": 0.2825, "step": 45980 }, { "epoch": 0.9963599332654433, "grad_norm": 2.0639572143554688, "learning_rate": 6.538584015747251e-10, "loss": 0.2792, "step": 45985 }, { "epoch": 0.9964682685849241, "grad_norm": 1.2649884223937988, "learning_rate": 6.155178207489787e-10, "loss": 0.2519, "step": 45990 }, { "epoch": 0.996576603904405, "grad_norm": 0.7843980193138123, "learning_rate": 5.783355188249129e-10, "loss": 0.2099, "step": 45995 }, { "epoch": 0.9966849392238858, "grad_norm": 1.2107594013214111, "learning_rate": 5.423115001079726e-10, "loss": 0.3082, "step": 46000 }, { "epoch": 0.9967932745433666, "grad_norm": 0.5143965482711792, "learning_rate": 5.074457687737066e-10, "loss": 0.1206, "step": 46005 }, { "epoch": 0.9969016098628475, "grad_norm": 1.4925873279571533, "learning_rate": 4.737383288588859e-10, "loss": 0.1261, "step": 46010 }, { "epoch": 0.9970099451823283, "grad_norm": 0.7836426496505737, "learning_rate": 4.411891842681648e-10, "loss": 0.2289, "step": 46015 }, { "epoch": 0.9971182805018092, "grad_norm": 1.943342685699463, "learning_rate": 4.0979833877297095e-10, "loss": 0.3832, "step": 46020 }, { "epoch": 0.99722661582129, "grad_norm": 1.3677321672439575, "learning_rate": 3.795657960092847e-10, "loss": 0.2179, "step": 46025 }, { "epoch": 0.9973349511407709, "grad_norm": 1.6442766189575195, "learning_rate": 3.5049155947763924e-10, "loss": 0.2491, "step": 46030 }, { "epoch": 0.9974432864602518, "grad_norm": 1.9845207929611206, "learning_rate": 3.225756325464513e-10, "loss": 0.2146, "step": 46035 }, { "epoch": 0.9975516217797327, "grad_norm": 1.3889206647872925, "learning_rate": 2.9581801845091073e-10, "loss": 0.1869, "step": 46040 }, { "epoch": 0.9976599570992135, "grad_norm": 1.2882345914840698, "learning_rate": 2.702187202885398e-10, "loss": 0.2683, "step": 46045 }, { "epoch": 0.9977682924186944, "grad_norm": 1.7147579193115234, "learning_rate": 2.4577774102585437e-10, "loss": 0.2851, "step": 46050 }, { "epoch": 0.9978766277381752, "grad_norm": 1.6480119228363037, "learning_rate": 2.22495083492813e-10, "loss": 0.2659, "step": 46055 }, { "epoch": 0.997984963057656, "grad_norm": 1.0033056735992432, "learning_rate": 2.0037075038725761e-10, "loss": 0.2603, "step": 46060 }, { "epoch": 0.9980932983771369, "grad_norm": 0.6248530745506287, "learning_rate": 1.7940474427269316e-10, "loss": 0.1914, "step": 46065 }, { "epoch": 0.9982016336966177, "grad_norm": 1.3904796838760376, "learning_rate": 1.5959706757606718e-10, "loss": 0.2173, "step": 46070 }, { "epoch": 0.9983099690160986, "grad_norm": 1.3345955610275269, "learning_rate": 1.4094772259221068e-10, "loss": 0.3105, "step": 46075 }, { "epoch": 0.9984183043355794, "grad_norm": 1.2733432054519653, "learning_rate": 1.2345671148161764e-10, "loss": 0.2073, "step": 46080 }, { "epoch": 0.9985266396550604, "grad_norm": 1.612874150276184, "learning_rate": 1.0712403627155532e-10, "loss": 0.2309, "step": 46085 }, { "epoch": 0.9986349749745412, "grad_norm": 0.9090170860290527, "learning_rate": 9.19496988516233e-11, "loss": 0.2163, "step": 46090 }, { "epoch": 0.9987433102940221, "grad_norm": 0.5192211866378784, "learning_rate": 7.793370098152508e-11, "loss": 0.1688, "step": 46095 }, { "epoch": 0.9988516456135029, "grad_norm": 1.505074381828308, "learning_rate": 6.50760442832965e-11, "loss": 0.1654, "step": 46100 }, { "epoch": 0.9989599809329838, "grad_norm": 1.463861107826233, "learning_rate": 5.3376730246856854e-11, "loss": 0.243, "step": 46105 }, { "epoch": 0.9990683162524646, "grad_norm": 1.0240237712860107, "learning_rate": 4.283576022778846e-11, "loss": 0.2643, "step": 46110 }, { "epoch": 0.9991766515719455, "grad_norm": 2.040769577026367, "learning_rate": 3.345313544733664e-11, "loss": 0.2112, "step": 46115 }, { "epoch": 0.9992849868914263, "grad_norm": 1.3797962665557861, "learning_rate": 2.522885699129951e-11, "loss": 0.2273, "step": 46120 }, { "epoch": 0.9993933222109072, "grad_norm": 1.9284459352493286, "learning_rate": 1.8162925813358658e-11, "loss": 0.2039, "step": 46125 }, { "epoch": 0.9995016575303881, "grad_norm": 1.7539584636688232, "learning_rate": 1.225534273063822e-11, "loss": 0.2044, "step": 46130 }, { "epoch": 0.999609992849869, "grad_norm": 2.005608558654785, "learning_rate": 7.506108429256032e-12, "loss": 0.253, "step": 46135 }, { "epoch": 0.9997183281693498, "grad_norm": 1.7132806777954102, "learning_rate": 3.915223458772488e-12, "loss": 0.212, "step": 46140 }, { "epoch": 0.9998266634888306, "grad_norm": 2.2819485664367676, "learning_rate": 1.4826882333007774e-12, "loss": 0.2378, "step": 46145 }, { "epoch": 0.9999349988083115, "grad_norm": 1.0312246084213257, "learning_rate": 2.0850303705799435e-13, "loss": 0.2645, "step": 46150 }, { "epoch": 1.0, "step": 46153, "total_flos": 1.0913185643289536e+20, "train_loss": 0.2896117240476564, "train_runtime": 611264.0895, "train_samples_per_second": 1.812, "train_steps_per_second": 0.076 } ], "logging_steps": 5, "max_steps": 46153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0913185643289536e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }