{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 55064, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.8160685747493826e-05, "grad_norm": 22.072545002026025, "learning_rate": 3.629764065335753e-08, "loss": 1.1696, "step": 1 }, { "epoch": 0.00018160685747493826, "grad_norm": 13.554373578863913, "learning_rate": 3.6297640653357535e-07, "loss": 1.1932, "step": 10 }, { "epoch": 0.0003632137149498765, "grad_norm": 5.1373895013397926, "learning_rate": 7.259528130671507e-07, "loss": 1.0597, "step": 20 }, { "epoch": 0.0005448205724248148, "grad_norm": 3.1669731232953153, "learning_rate": 1.088929219600726e-06, "loss": 0.9655, "step": 30 }, { "epoch": 0.000726427429899753, "grad_norm": 4.601780642068655, "learning_rate": 1.4519056261343014e-06, "loss": 0.9216, "step": 40 }, { "epoch": 0.0009080342873746913, "grad_norm": 2.6405384883758565, "learning_rate": 1.8148820326678768e-06, "loss": 0.9046, "step": 50 }, { "epoch": 0.0010896411448496296, "grad_norm": 2.4680300759569564, "learning_rate": 2.177858439201452e-06, "loss": 0.8693, "step": 60 }, { "epoch": 0.0012712480023245678, "grad_norm": 2.53116103203694, "learning_rate": 2.540834845735027e-06, "loss": 0.8537, "step": 70 }, { "epoch": 0.001452854859799506, "grad_norm": 2.5180723189978007, "learning_rate": 2.903811252268603e-06, "loss": 0.8693, "step": 80 }, { "epoch": 0.0016344617172744443, "grad_norm": 2.4045480066533496, "learning_rate": 3.266787658802178e-06, "loss": 0.8525, "step": 90 }, { "epoch": 0.0018160685747493825, "grad_norm": 2.5537909784118598, "learning_rate": 3.6297640653357536e-06, "loss": 0.8406, "step": 100 }, { "epoch": 0.001997675432224321, "grad_norm": 2.587773402122514, "learning_rate": 3.992740471869328e-06, "loss": 0.8188, "step": 110 }, { "epoch": 0.002179282289699259, "grad_norm": 2.4149061074148617, "learning_rate": 4.355716878402904e-06, "loss": 0.8231, "step": 120 }, { "epoch": 0.0023608891471741974, "grad_norm": 2.6547380518446295, "learning_rate": 4.71869328493648e-06, "loss": 0.8373, "step": 130 }, { "epoch": 0.0025424960046491357, "grad_norm": 2.3549109086428026, "learning_rate": 5.081669691470054e-06, "loss": 0.8181, "step": 140 }, { "epoch": 0.002724102862124074, "grad_norm": 2.5828724223535895, "learning_rate": 5.44464609800363e-06, "loss": 0.833, "step": 150 }, { "epoch": 0.002905709719599012, "grad_norm": 2.5095073028423656, "learning_rate": 5.807622504537206e-06, "loss": 0.8321, "step": 160 }, { "epoch": 0.0030873165770739504, "grad_norm": 2.366346625741285, "learning_rate": 6.170598911070781e-06, "loss": 0.8377, "step": 170 }, { "epoch": 0.0032689234345488886, "grad_norm": 2.3104798123888437, "learning_rate": 6.533575317604356e-06, "loss": 0.7983, "step": 180 }, { "epoch": 0.003450530292023827, "grad_norm": 2.507990095797411, "learning_rate": 6.896551724137932e-06, "loss": 0.8204, "step": 190 }, { "epoch": 0.003632137149498765, "grad_norm": 2.4380505437724045, "learning_rate": 7.259528130671507e-06, "loss": 0.8099, "step": 200 }, { "epoch": 0.0038137440069737033, "grad_norm": 2.4257500830089316, "learning_rate": 7.622504537205082e-06, "loss": 0.8456, "step": 210 }, { "epoch": 0.003995350864448642, "grad_norm": 2.47457322148341, "learning_rate": 7.985480943738657e-06, "loss": 0.8287, "step": 220 }, { "epoch": 0.00417695772192358, "grad_norm": 2.591309338618785, "learning_rate": 8.348457350272232e-06, "loss": 0.8264, "step": 230 }, { "epoch": 0.004358564579398518, "grad_norm": 2.2538515422282126, "learning_rate": 8.711433756805808e-06, "loss": 0.8144, "step": 240 }, { "epoch": 0.004540171436873457, "grad_norm": 2.3432936203919565, "learning_rate": 9.074410163339384e-06, "loss": 0.8325, "step": 250 }, { "epoch": 0.004721778294348395, "grad_norm": 2.3443138908970678, "learning_rate": 9.43738656987296e-06, "loss": 0.82, "step": 260 }, { "epoch": 0.004903385151823333, "grad_norm": 2.5744741706593097, "learning_rate": 9.800362976406535e-06, "loss": 0.8218, "step": 270 }, { "epoch": 0.005084992009298271, "grad_norm": 2.2436485091197085, "learning_rate": 1.0163339382940109e-05, "loss": 0.8272, "step": 280 }, { "epoch": 0.00526659886677321, "grad_norm": 2.395981815918401, "learning_rate": 1.0526315789473684e-05, "loss": 0.8224, "step": 290 }, { "epoch": 0.005448205724248148, "grad_norm": 2.3647507935779117, "learning_rate": 1.088929219600726e-05, "loss": 0.8235, "step": 300 }, { "epoch": 0.005629812581723086, "grad_norm": 2.3009852174596057, "learning_rate": 1.1252268602540837e-05, "loss": 0.849, "step": 310 }, { "epoch": 0.005811419439198024, "grad_norm": 2.2707934571526236, "learning_rate": 1.1615245009074411e-05, "loss": 0.8402, "step": 320 }, { "epoch": 0.0059930262966729625, "grad_norm": 2.3578961911794414, "learning_rate": 1.1978221415607987e-05, "loss": 0.8396, "step": 330 }, { "epoch": 0.006174633154147901, "grad_norm": 2.106262237839789, "learning_rate": 1.2341197822141563e-05, "loss": 0.8383, "step": 340 }, { "epoch": 0.006356240011622839, "grad_norm": 2.2736789725635056, "learning_rate": 1.2704174228675136e-05, "loss": 0.8325, "step": 350 }, { "epoch": 0.006537846869097777, "grad_norm": 2.5253717675531324, "learning_rate": 1.3067150635208712e-05, "loss": 0.8461, "step": 360 }, { "epoch": 0.006719453726572715, "grad_norm": 2.263415140280458, "learning_rate": 1.3430127041742288e-05, "loss": 0.842, "step": 370 }, { "epoch": 0.006901060584047654, "grad_norm": 2.1227499523070907, "learning_rate": 1.3793103448275863e-05, "loss": 0.8372, "step": 380 }, { "epoch": 0.007082667441522592, "grad_norm": 2.2475921028786185, "learning_rate": 1.4156079854809439e-05, "loss": 0.8601, "step": 390 }, { "epoch": 0.00726427429899753, "grad_norm": 2.6037510157051758, "learning_rate": 1.4519056261343015e-05, "loss": 0.8364, "step": 400 }, { "epoch": 0.007445881156472468, "grad_norm": 2.1462620226573543, "learning_rate": 1.4882032667876588e-05, "loss": 0.8474, "step": 410 }, { "epoch": 0.007627488013947407, "grad_norm": 2.174749511100977, "learning_rate": 1.5245009074410164e-05, "loss": 0.8591, "step": 420 }, { "epoch": 0.007809094871422345, "grad_norm": 2.408871755103711, "learning_rate": 1.5607985480943738e-05, "loss": 0.8477, "step": 430 }, { "epoch": 0.007990701728897284, "grad_norm": 2.1487240212362897, "learning_rate": 1.5970961887477314e-05, "loss": 0.8497, "step": 440 }, { "epoch": 0.008172308586372221, "grad_norm": 2.0408941415184816, "learning_rate": 1.6333938294010893e-05, "loss": 0.8458, "step": 450 }, { "epoch": 0.00835391544384716, "grad_norm": 2.224406988582459, "learning_rate": 1.6696914700544465e-05, "loss": 0.8511, "step": 460 }, { "epoch": 0.008535522301322098, "grad_norm": 2.3911018541289932, "learning_rate": 1.705989110707804e-05, "loss": 0.8558, "step": 470 }, { "epoch": 0.008717129158797037, "grad_norm": 2.1255046435349607, "learning_rate": 1.7422867513611616e-05, "loss": 0.8599, "step": 480 }, { "epoch": 0.008898736016271974, "grad_norm": 2.468706385647628, "learning_rate": 1.7785843920145192e-05, "loss": 0.8794, "step": 490 }, { "epoch": 0.009080342873746913, "grad_norm": 2.2350280131817657, "learning_rate": 1.8148820326678767e-05, "loss": 0.8576, "step": 500 }, { "epoch": 0.00926194973122185, "grad_norm": 2.230616344181099, "learning_rate": 1.8511796733212343e-05, "loss": 0.8727, "step": 510 }, { "epoch": 0.00944355658869679, "grad_norm": 2.211250963952609, "learning_rate": 1.887477313974592e-05, "loss": 0.857, "step": 520 }, { "epoch": 0.009625163446171727, "grad_norm": 2.090049683208436, "learning_rate": 1.9237749546279494e-05, "loss": 0.8895, "step": 530 }, { "epoch": 0.009806770303646666, "grad_norm": 1.981567104584761, "learning_rate": 1.960072595281307e-05, "loss": 0.8685, "step": 540 }, { "epoch": 0.009988377161121604, "grad_norm": 2.0158405415514746, "learning_rate": 1.9963702359346645e-05, "loss": 0.879, "step": 550 }, { "epoch": 0.010169984018596543, "grad_norm": 2.0424751819945217, "learning_rate": 1.9999998654899938e-05, "loss": 0.8773, "step": 560 }, { "epoch": 0.01035159087607148, "grad_norm": 2.180736585343267, "learning_rate": 1.9999994005171795e-05, "loss": 0.8721, "step": 570 }, { "epoch": 0.01053319773354642, "grad_norm": 1.8938647620420423, "learning_rate": 1.999998603421094e-05, "loss": 0.8646, "step": 580 }, { "epoch": 0.010714804591021357, "grad_norm": 1.9270398376478397, "learning_rate": 1.999997474202002e-05, "loss": 0.8932, "step": 590 }, { "epoch": 0.010896411448496296, "grad_norm": 1.9914256330284952, "learning_rate": 1.9999960128602782e-05, "loss": 0.8789, "step": 600 }, { "epoch": 0.011078018305971233, "grad_norm": 2.0506137184085724, "learning_rate": 1.9999942193964087e-05, "loss": 0.8762, "step": 610 }, { "epoch": 0.011259625163446172, "grad_norm": 1.9230881434808045, "learning_rate": 1.9999920938109883e-05, "loss": 0.8739, "step": 620 }, { "epoch": 0.01144123202092111, "grad_norm": 2.0354858465720858, "learning_rate": 1.999989636104724e-05, "loss": 0.874, "step": 630 }, { "epoch": 0.011622838878396049, "grad_norm": 2.0223773689624873, "learning_rate": 1.9999868462784312e-05, "loss": 0.8769, "step": 640 }, { "epoch": 0.011804445735870986, "grad_norm": 2.0798296905344063, "learning_rate": 1.9999837243330366e-05, "loss": 0.8809, "step": 650 }, { "epoch": 0.011986052593345925, "grad_norm": 2.0810040429159367, "learning_rate": 1.9999802702695775e-05, "loss": 0.8701, "step": 660 }, { "epoch": 0.012167659450820862, "grad_norm": 3.3385764458281764, "learning_rate": 1.9999764840892005e-05, "loss": 0.8813, "step": 670 }, { "epoch": 0.012349266308295801, "grad_norm": 2.2218018361447185, "learning_rate": 1.9999723657931636e-05, "loss": 0.8855, "step": 680 }, { "epoch": 0.012530873165770739, "grad_norm": 2.1613809213785538, "learning_rate": 1.9999679153828343e-05, "loss": 0.8603, "step": 690 }, { "epoch": 0.012712480023245678, "grad_norm": 2.018109752185538, "learning_rate": 1.9999631328596907e-05, "loss": 0.8923, "step": 700 }, { "epoch": 0.012894086880720615, "grad_norm": 2.079189973183792, "learning_rate": 1.9999580182253212e-05, "loss": 0.8733, "step": 710 }, { "epoch": 0.013075693738195554, "grad_norm": 2.0581071283105254, "learning_rate": 1.9999525714814244e-05, "loss": 0.8875, "step": 720 }, { "epoch": 0.013257300595670492, "grad_norm": 2.2901668951587313, "learning_rate": 1.9999467926298094e-05, "loss": 0.8583, "step": 730 }, { "epoch": 0.01343890745314543, "grad_norm": 1.8895000558822943, "learning_rate": 1.9999406816723957e-05, "loss": 0.8814, "step": 740 }, { "epoch": 0.013620514310620368, "grad_norm": 2.401396596574631, "learning_rate": 1.9999342386112127e-05, "loss": 0.8681, "step": 750 }, { "epoch": 0.013802121168095307, "grad_norm": 1.8856069147433971, "learning_rate": 1.9999274634484004e-05, "loss": 0.8973, "step": 760 }, { "epoch": 0.013983728025570246, "grad_norm": 1.801964875686738, "learning_rate": 1.9999203561862085e-05, "loss": 0.8589, "step": 770 }, { "epoch": 0.014165334883045184, "grad_norm": 1.7991391038444289, "learning_rate": 1.9999129168269982e-05, "loss": 0.8793, "step": 780 }, { "epoch": 0.014346941740520123, "grad_norm": 1.860802824693034, "learning_rate": 1.9999051453732398e-05, "loss": 0.8854, "step": 790 }, { "epoch": 0.01452854859799506, "grad_norm": 1.9557756292158959, "learning_rate": 1.9998970418275146e-05, "loss": 0.8883, "step": 800 }, { "epoch": 0.01471015545547, "grad_norm": 2.387305441074328, "learning_rate": 1.9998886061925136e-05, "loss": 0.8731, "step": 810 }, { "epoch": 0.014891762312944937, "grad_norm": 1.8483102718281448, "learning_rate": 1.9998798384710395e-05, "loss": 0.8891, "step": 820 }, { "epoch": 0.015073369170419876, "grad_norm": 1.7902803180053561, "learning_rate": 1.999870738666003e-05, "loss": 0.8716, "step": 830 }, { "epoch": 0.015254976027894813, "grad_norm": 1.7769967908399542, "learning_rate": 1.999861306780427e-05, "loss": 0.86, "step": 840 }, { "epoch": 0.015436582885369752, "grad_norm": 1.7884022183511306, "learning_rate": 1.9998515428174436e-05, "loss": 0.869, "step": 850 }, { "epoch": 0.01561818974284469, "grad_norm": 1.7006309025790298, "learning_rate": 1.9998414467802964e-05, "loss": 0.8718, "step": 860 }, { "epoch": 0.01579979660031963, "grad_norm": 1.9490134070187046, "learning_rate": 1.999831018672338e-05, "loss": 0.8733, "step": 870 }, { "epoch": 0.015981403457794568, "grad_norm": 1.8314874436748572, "learning_rate": 1.9998202584970325e-05, "loss": 0.869, "step": 880 }, { "epoch": 0.016163010315269503, "grad_norm": 1.7979185222149934, "learning_rate": 1.9998091662579525e-05, "loss": 0.8758, "step": 890 }, { "epoch": 0.016344617172744443, "grad_norm": 1.7238813987226698, "learning_rate": 1.9997977419587827e-05, "loss": 0.8663, "step": 900 }, { "epoch": 0.01652622403021938, "grad_norm": 1.7125962292453185, "learning_rate": 1.9997859856033172e-05, "loss": 0.8776, "step": 910 }, { "epoch": 0.01670783088769432, "grad_norm": 1.7407472838737568, "learning_rate": 1.9997738971954604e-05, "loss": 0.8724, "step": 920 }, { "epoch": 0.016889437745169256, "grad_norm": 1.8067768591194995, "learning_rate": 1.999761476739227e-05, "loss": 0.8768, "step": 930 }, { "epoch": 0.017071044602644195, "grad_norm": 1.8143416007770585, "learning_rate": 1.9997487242387433e-05, "loss": 0.8676, "step": 940 }, { "epoch": 0.017252651460119135, "grad_norm": 1.8721111457225121, "learning_rate": 1.9997356396982434e-05, "loss": 0.8746, "step": 950 }, { "epoch": 0.017434258317594074, "grad_norm": 1.7566508532842318, "learning_rate": 1.9997222231220736e-05, "loss": 0.8637, "step": 960 }, { "epoch": 0.01761586517506901, "grad_norm": 1.84078197563772, "learning_rate": 1.9997084745146896e-05, "loss": 0.8559, "step": 970 }, { "epoch": 0.01779747203254395, "grad_norm": 1.7161636779399536, "learning_rate": 1.9996943938806578e-05, "loss": 0.8715, "step": 980 }, { "epoch": 0.017979078890018887, "grad_norm": 1.7234221701422847, "learning_rate": 1.999679981224655e-05, "loss": 0.8699, "step": 990 }, { "epoch": 0.018160685747493827, "grad_norm": 1.7324473349539034, "learning_rate": 1.999665236551467e-05, "loss": 0.8774, "step": 1000 }, { "epoch": 0.018342292604968762, "grad_norm": 1.7309382424877815, "learning_rate": 1.9996501598659916e-05, "loss": 0.874, "step": 1010 }, { "epoch": 0.0185238994624437, "grad_norm": 1.7584137830545667, "learning_rate": 1.9996347511732362e-05, "loss": 0.869, "step": 1020 }, { "epoch": 0.01870550631991864, "grad_norm": 1.7966705431127983, "learning_rate": 1.9996190104783183e-05, "loss": 0.8747, "step": 1030 }, { "epoch": 0.01888711317739358, "grad_norm": 1.7440726211537043, "learning_rate": 1.9996029377864653e-05, "loss": 0.8641, "step": 1040 }, { "epoch": 0.019068720034868515, "grad_norm": 1.790098197227981, "learning_rate": 1.999586533103016e-05, "loss": 0.8755, "step": 1050 }, { "epoch": 0.019250326892343454, "grad_norm": 1.892065752472464, "learning_rate": 1.999569796433418e-05, "loss": 0.8742, "step": 1060 }, { "epoch": 0.019431933749818393, "grad_norm": 1.6628752959125366, "learning_rate": 1.9995527277832308e-05, "loss": 0.8471, "step": 1070 }, { "epoch": 0.019613540607293332, "grad_norm": 1.7207437589179413, "learning_rate": 1.999535327158123e-05, "loss": 0.8749, "step": 1080 }, { "epoch": 0.019795147464768268, "grad_norm": 1.6325060334098291, "learning_rate": 1.9995175945638736e-05, "loss": 0.8683, "step": 1090 }, { "epoch": 0.019976754322243207, "grad_norm": 1.639810342412919, "learning_rate": 1.9994995300063716e-05, "loss": 0.8574, "step": 1100 }, { "epoch": 0.020158361179718146, "grad_norm": 1.7550089999752214, "learning_rate": 1.9994811334916174e-05, "loss": 0.8784, "step": 1110 }, { "epoch": 0.020339968037193085, "grad_norm": 1.802584397981231, "learning_rate": 1.9994624050257205e-05, "loss": 0.8592, "step": 1120 }, { "epoch": 0.02052157489466802, "grad_norm": 2.184874252708985, "learning_rate": 1.9994433446149014e-05, "loss": 0.8779, "step": 1130 }, { "epoch": 0.02070318175214296, "grad_norm": 1.7411937500297876, "learning_rate": 1.9994239522654903e-05, "loss": 0.8703, "step": 1140 }, { "epoch": 0.0208847886096179, "grad_norm": 1.6570184356513136, "learning_rate": 1.9994042279839276e-05, "loss": 0.8635, "step": 1150 }, { "epoch": 0.02106639546709284, "grad_norm": 1.7061748105122787, "learning_rate": 1.9993841717767646e-05, "loss": 0.8593, "step": 1160 }, { "epoch": 0.021248002324567777, "grad_norm": 1.664024235351797, "learning_rate": 1.9993637836506624e-05, "loss": 0.8594, "step": 1170 }, { "epoch": 0.021429609182042713, "grad_norm": 1.5872768208965355, "learning_rate": 1.999343063612392e-05, "loss": 0.8533, "step": 1180 }, { "epoch": 0.021611216039517652, "grad_norm": 1.8406121682958212, "learning_rate": 1.9993220116688358e-05, "loss": 0.8731, "step": 1190 }, { "epoch": 0.02179282289699259, "grad_norm": 1.6990786648328935, "learning_rate": 1.9993006278269845e-05, "loss": 0.8627, "step": 1200 }, { "epoch": 0.02197442975446753, "grad_norm": 1.645224703827949, "learning_rate": 1.999278912093941e-05, "loss": 0.851, "step": 1210 }, { "epoch": 0.022156036611942466, "grad_norm": 1.649943688372278, "learning_rate": 1.9992568644769175e-05, "loss": 0.867, "step": 1220 }, { "epoch": 0.022337643469417405, "grad_norm": 1.5529114700016295, "learning_rate": 1.9992344849832363e-05, "loss": 0.8422, "step": 1230 }, { "epoch": 0.022519250326892344, "grad_norm": 1.6332838226045259, "learning_rate": 1.9992117736203305e-05, "loss": 0.8697, "step": 1240 }, { "epoch": 0.022700857184367283, "grad_norm": 1.6003502879774076, "learning_rate": 1.999188730395743e-05, "loss": 0.8665, "step": 1250 }, { "epoch": 0.02288246404184222, "grad_norm": 1.6147350470418778, "learning_rate": 1.9991653553171263e-05, "loss": 0.8681, "step": 1260 }, { "epoch": 0.023064070899317158, "grad_norm": 1.748291872599324, "learning_rate": 1.999141648392245e-05, "loss": 0.8604, "step": 1270 }, { "epoch": 0.023245677756792097, "grad_norm": 1.6985146818857326, "learning_rate": 1.9991176096289716e-05, "loss": 0.8706, "step": 1280 }, { "epoch": 0.023427284614267036, "grad_norm": 1.6115358511101807, "learning_rate": 1.9990932390352907e-05, "loss": 0.8522, "step": 1290 }, { "epoch": 0.023608891471741972, "grad_norm": 1.6948101373371465, "learning_rate": 1.9990685366192957e-05, "loss": 0.8555, "step": 1300 }, { "epoch": 0.02379049832921691, "grad_norm": 1.6003854508113202, "learning_rate": 1.9990435023891916e-05, "loss": 0.8534, "step": 1310 }, { "epoch": 0.02397210518669185, "grad_norm": 1.7173927657934387, "learning_rate": 1.9990181363532923e-05, "loss": 0.8573, "step": 1320 }, { "epoch": 0.02415371204416679, "grad_norm": 1.6029446531563996, "learning_rate": 1.998992438520023e-05, "loss": 0.8749, "step": 1330 }, { "epoch": 0.024335318901641725, "grad_norm": 1.6600337616614003, "learning_rate": 1.998966408897918e-05, "loss": 0.8528, "step": 1340 }, { "epoch": 0.024516925759116664, "grad_norm": 1.6479578822316647, "learning_rate": 1.9989400474956224e-05, "loss": 0.8519, "step": 1350 }, { "epoch": 0.024698532616591603, "grad_norm": 2.3910863581486868, "learning_rate": 1.9989133543218917e-05, "loss": 0.8472, "step": 1360 }, { "epoch": 0.024880139474066542, "grad_norm": 1.565373401670308, "learning_rate": 1.9988863293855912e-05, "loss": 0.8529, "step": 1370 }, { "epoch": 0.025061746331541478, "grad_norm": 1.5830826389352255, "learning_rate": 1.9988589726956967e-05, "loss": 0.8513, "step": 1380 }, { "epoch": 0.025243353189016417, "grad_norm": 1.6238977162886012, "learning_rate": 1.9988312842612935e-05, "loss": 0.8731, "step": 1390 }, { "epoch": 0.025424960046491356, "grad_norm": 1.6540023367553163, "learning_rate": 1.9988032640915784e-05, "loss": 0.8638, "step": 1400 }, { "epoch": 0.025606566903966295, "grad_norm": 1.5826508654808467, "learning_rate": 1.9987749121958564e-05, "loss": 0.854, "step": 1410 }, { "epoch": 0.02578817376144123, "grad_norm": 1.600397436777816, "learning_rate": 1.998746228583545e-05, "loss": 0.8614, "step": 1420 }, { "epoch": 0.02596978061891617, "grad_norm": 1.6399654756312576, "learning_rate": 1.99871721326417e-05, "loss": 0.8572, "step": 1430 }, { "epoch": 0.02615138747639111, "grad_norm": 1.5969872229847344, "learning_rate": 1.9986878662473684e-05, "loss": 0.8554, "step": 1440 }, { "epoch": 0.026332994333866048, "grad_norm": 1.5231081386740226, "learning_rate": 1.9986581875428867e-05, "loss": 0.8419, "step": 1450 }, { "epoch": 0.026514601191340983, "grad_norm": 1.5904156368582956, "learning_rate": 1.9986281771605826e-05, "loss": 0.8706, "step": 1460 }, { "epoch": 0.026696208048815923, "grad_norm": 1.6711533956025761, "learning_rate": 1.998597835110422e-05, "loss": 0.8783, "step": 1470 }, { "epoch": 0.02687781490629086, "grad_norm": 1.4964793814355264, "learning_rate": 1.9985671614024834e-05, "loss": 0.8459, "step": 1480 }, { "epoch": 0.0270594217637658, "grad_norm": 4.194425501133323, "learning_rate": 1.9985361560469538e-05, "loss": 0.852, "step": 1490 }, { "epoch": 0.027241028621240736, "grad_norm": 2.3010680458770367, "learning_rate": 1.9985048190541305e-05, "loss": 0.8499, "step": 1500 }, { "epoch": 0.027422635478715675, "grad_norm": 1.6092587361536366, "learning_rate": 1.998473150434422e-05, "loss": 0.8681, "step": 1510 }, { "epoch": 0.027604242336190615, "grad_norm": 1.4890838524222756, "learning_rate": 1.9984411501983456e-05, "loss": 0.8487, "step": 1520 }, { "epoch": 0.027785849193665554, "grad_norm": 1.5825896156895873, "learning_rate": 1.9984088183565292e-05, "loss": 0.8487, "step": 1530 }, { "epoch": 0.027967456051140493, "grad_norm": 1.5912023741194057, "learning_rate": 1.9983761549197118e-05, "loss": 0.8442, "step": 1540 }, { "epoch": 0.02814906290861543, "grad_norm": 1.6146342214205378, "learning_rate": 1.998343159898741e-05, "loss": 0.867, "step": 1550 }, { "epoch": 0.028330669766090368, "grad_norm": 1.5111846764229768, "learning_rate": 1.9983098333045748e-05, "loss": 0.8326, "step": 1560 }, { "epoch": 0.028512276623565307, "grad_norm": 1.6436259073934727, "learning_rate": 1.9982761751482828e-05, "loss": 0.8527, "step": 1570 }, { "epoch": 0.028693883481040246, "grad_norm": 1.6928978247210946, "learning_rate": 1.9982421854410433e-05, "loss": 0.8621, "step": 1580 }, { "epoch": 0.02887549033851518, "grad_norm": 1.6139426001808408, "learning_rate": 1.9982078641941445e-05, "loss": 0.8435, "step": 1590 }, { "epoch": 0.02905709719599012, "grad_norm": 1.5457291693874822, "learning_rate": 1.9981732114189863e-05, "loss": 0.8524, "step": 1600 }, { "epoch": 0.02923870405346506, "grad_norm": 1.6701375128483353, "learning_rate": 1.9981382271270767e-05, "loss": 0.8394, "step": 1610 }, { "epoch": 0.02942031091094, "grad_norm": 1.4991499961943313, "learning_rate": 1.9981029113300353e-05, "loss": 0.8702, "step": 1620 }, { "epoch": 0.029601917768414934, "grad_norm": 1.6159778795048056, "learning_rate": 1.9980672640395916e-05, "loss": 0.8587, "step": 1630 }, { "epoch": 0.029783524625889873, "grad_norm": 1.5216235047909379, "learning_rate": 1.9980312852675842e-05, "loss": 0.8694, "step": 1640 }, { "epoch": 0.029965131483364812, "grad_norm": 1.599738483495354, "learning_rate": 1.9979949750259632e-05, "loss": 0.8193, "step": 1650 }, { "epoch": 0.03014673834083975, "grad_norm": 1.5385063852515763, "learning_rate": 1.9979583333267872e-05, "loss": 0.8417, "step": 1660 }, { "epoch": 0.030328345198314687, "grad_norm": 1.572625526718443, "learning_rate": 1.9979213601822268e-05, "loss": 0.8445, "step": 1670 }, { "epoch": 0.030509952055789626, "grad_norm": 1.557343780554663, "learning_rate": 1.9978840556045612e-05, "loss": 0.8517, "step": 1680 }, { "epoch": 0.030691558913264565, "grad_norm": 1.4983835210490142, "learning_rate": 1.9978464196061798e-05, "loss": 0.8418, "step": 1690 }, { "epoch": 0.030873165770739504, "grad_norm": 1.4884646792001692, "learning_rate": 1.9978084521995825e-05, "loss": 0.8496, "step": 1700 }, { "epoch": 0.03105477262821444, "grad_norm": 1.4434372977886822, "learning_rate": 1.9977701533973798e-05, "loss": 0.8277, "step": 1710 }, { "epoch": 0.03123637948568938, "grad_norm": 1.527612480444595, "learning_rate": 1.9977315232122908e-05, "loss": 0.8613, "step": 1720 }, { "epoch": 0.03141798634316432, "grad_norm": 1.5368530695984508, "learning_rate": 1.997692561657146e-05, "loss": 0.8552, "step": 1730 }, { "epoch": 0.03159959320063926, "grad_norm": 1.5250849546883603, "learning_rate": 1.9976532687448852e-05, "loss": 0.851, "step": 1740 }, { "epoch": 0.031781200058114197, "grad_norm": 1.4790040071917725, "learning_rate": 1.997613644488559e-05, "loss": 0.8156, "step": 1750 }, { "epoch": 0.031962806915589136, "grad_norm": 3.0703199589102432, "learning_rate": 1.997573688901327e-05, "loss": 0.8475, "step": 1760 }, { "epoch": 0.03214441377306407, "grad_norm": 1.558626606506934, "learning_rate": 1.997533401996459e-05, "loss": 0.8362, "step": 1770 }, { "epoch": 0.03232602063053901, "grad_norm": 1.528679924800097, "learning_rate": 1.9974927837873365e-05, "loss": 0.8459, "step": 1780 }, { "epoch": 0.032507627488013946, "grad_norm": 1.468257776224465, "learning_rate": 1.9974518342874488e-05, "loss": 0.85, "step": 1790 }, { "epoch": 0.032689234345488885, "grad_norm": 1.5245863911378854, "learning_rate": 1.9974105535103963e-05, "loss": 0.8511, "step": 1800 }, { "epoch": 0.032870841202963824, "grad_norm": 1.453764113068433, "learning_rate": 1.9973689414698896e-05, "loss": 0.8538, "step": 1810 }, { "epoch": 0.03305244806043876, "grad_norm": 1.50443749137656, "learning_rate": 1.9973269981797488e-05, "loss": 0.857, "step": 1820 }, { "epoch": 0.0332340549179137, "grad_norm": 1.6142113299307748, "learning_rate": 1.997284723653904e-05, "loss": 0.8444, "step": 1830 }, { "epoch": 0.03341566177538864, "grad_norm": 1.4818242376404904, "learning_rate": 1.9972421179063964e-05, "loss": 0.8484, "step": 1840 }, { "epoch": 0.033597268632863574, "grad_norm": 1.5292492639165942, "learning_rate": 1.9971991809513758e-05, "loss": 0.8476, "step": 1850 }, { "epoch": 0.03377887549033851, "grad_norm": 1.4755500798384362, "learning_rate": 1.9971559128031024e-05, "loss": 0.8559, "step": 1860 }, { "epoch": 0.03396048234781345, "grad_norm": 1.4928248510046438, "learning_rate": 1.9971123134759468e-05, "loss": 0.8506, "step": 1870 }, { "epoch": 0.03414208920528839, "grad_norm": 1.5251493146108082, "learning_rate": 1.9970683829843896e-05, "loss": 0.8486, "step": 1880 }, { "epoch": 0.03432369606276333, "grad_norm": 1.5250678818257455, "learning_rate": 1.9970241213430208e-05, "loss": 0.8484, "step": 1890 }, { "epoch": 0.03450530292023827, "grad_norm": 1.4631144497739006, "learning_rate": 1.9969795285665405e-05, "loss": 0.8311, "step": 1900 }, { "epoch": 0.03468690977771321, "grad_norm": 1.4911793532613264, "learning_rate": 1.99693460466976e-05, "loss": 0.835, "step": 1910 }, { "epoch": 0.03486851663518815, "grad_norm": 1.4839256870340733, "learning_rate": 1.9968893496675982e-05, "loss": 0.8475, "step": 1920 }, { "epoch": 0.035050123492663086, "grad_norm": 1.4692170100711681, "learning_rate": 1.9968437635750866e-05, "loss": 0.8318, "step": 1930 }, { "epoch": 0.03523173035013802, "grad_norm": 1.4988449826160994, "learning_rate": 1.9967978464073643e-05, "loss": 0.8548, "step": 1940 }, { "epoch": 0.03541333720761296, "grad_norm": 1.4797659202092048, "learning_rate": 1.9967515981796827e-05, "loss": 0.8192, "step": 1950 }, { "epoch": 0.0355949440650879, "grad_norm": 1.5459429848289925, "learning_rate": 1.9967050189074007e-05, "loss": 0.8535, "step": 1960 }, { "epoch": 0.035776550922562836, "grad_norm": 1.5220607066355418, "learning_rate": 1.9966581086059893e-05, "loss": 0.8508, "step": 1970 }, { "epoch": 0.035958157780037775, "grad_norm": 1.445682714989805, "learning_rate": 1.9966108672910282e-05, "loss": 0.8391, "step": 1980 }, { "epoch": 0.036139764637512714, "grad_norm": 1.6430945887016166, "learning_rate": 1.9965632949782074e-05, "loss": 0.8518, "step": 1990 }, { "epoch": 0.03632137149498765, "grad_norm": 1.4343148217928037, "learning_rate": 1.996515391683326e-05, "loss": 0.8456, "step": 2000 }, { "epoch": 0.03650297835246259, "grad_norm": 1.4872819106809596, "learning_rate": 1.996467157422295e-05, "loss": 0.834, "step": 2010 }, { "epoch": 0.036684585209937524, "grad_norm": 1.5365620888745386, "learning_rate": 1.996418592211134e-05, "loss": 0.8303, "step": 2020 }, { "epoch": 0.036866192067412464, "grad_norm": 1.5373902358252922, "learning_rate": 1.996369696065972e-05, "loss": 0.859, "step": 2030 }, { "epoch": 0.0370477989248874, "grad_norm": 1.4636687202420098, "learning_rate": 1.996320469003048e-05, "loss": 0.8527, "step": 2040 }, { "epoch": 0.03722940578236234, "grad_norm": 1.4739151147943055, "learning_rate": 1.9962709110387132e-05, "loss": 0.8309, "step": 2050 }, { "epoch": 0.03741101263983728, "grad_norm": 1.4764809028274197, "learning_rate": 1.9962210221894257e-05, "loss": 0.8405, "step": 2060 }, { "epoch": 0.03759261949731222, "grad_norm": 1.500785621330756, "learning_rate": 1.996170802471755e-05, "loss": 0.8429, "step": 2070 }, { "epoch": 0.03777422635478716, "grad_norm": 1.4854808833329198, "learning_rate": 1.996120251902381e-05, "loss": 0.8543, "step": 2080 }, { "epoch": 0.0379558332122621, "grad_norm": 1.4137335907303532, "learning_rate": 1.9960693704980915e-05, "loss": 0.8307, "step": 2090 }, { "epoch": 0.03813744006973703, "grad_norm": 1.4794247098761264, "learning_rate": 1.9960181582757862e-05, "loss": 0.8273, "step": 2100 }, { "epoch": 0.03831904692721197, "grad_norm": 1.5019578592872713, "learning_rate": 1.9959666152524737e-05, "loss": 0.8351, "step": 2110 }, { "epoch": 0.03850065378468691, "grad_norm": 1.4555705844867364, "learning_rate": 1.9959147414452725e-05, "loss": 0.8305, "step": 2120 }, { "epoch": 0.03868226064216185, "grad_norm": 1.464789906090683, "learning_rate": 1.9958625368714113e-05, "loss": 0.8269, "step": 2130 }, { "epoch": 0.03886386749963679, "grad_norm": 1.8710947017113215, "learning_rate": 1.9958100015482283e-05, "loss": 0.8361, "step": 2140 }, { "epoch": 0.039045474357111726, "grad_norm": 1.438661673692853, "learning_rate": 1.995757135493172e-05, "loss": 0.8382, "step": 2150 }, { "epoch": 0.039227081214586665, "grad_norm": 1.4487294739181196, "learning_rate": 1.9957039387238e-05, "loss": 0.8224, "step": 2160 }, { "epoch": 0.039408688072061604, "grad_norm": 1.4802061143067597, "learning_rate": 1.9956504112577806e-05, "loss": 0.8388, "step": 2170 }, { "epoch": 0.039590294929536536, "grad_norm": 1.4690665454451353, "learning_rate": 1.9955965531128914e-05, "loss": 0.8358, "step": 2180 }, { "epoch": 0.039771901787011475, "grad_norm": 1.4757395111331706, "learning_rate": 1.99554236430702e-05, "loss": 0.8495, "step": 2190 }, { "epoch": 0.039953508644486414, "grad_norm": 1.4275510699082479, "learning_rate": 1.9954878448581636e-05, "loss": 0.8439, "step": 2200 }, { "epoch": 0.04013511550196135, "grad_norm": 1.426673329876081, "learning_rate": 1.9954329947844297e-05, "loss": 0.838, "step": 2210 }, { "epoch": 0.04031672235943629, "grad_norm": 1.3926462701685771, "learning_rate": 1.995377814104035e-05, "loss": 0.8432, "step": 2220 }, { "epoch": 0.04049832921691123, "grad_norm": 1.4820787063490697, "learning_rate": 1.9953223028353063e-05, "loss": 0.8375, "step": 2230 }, { "epoch": 0.04067993607438617, "grad_norm": 1.4179061595330453, "learning_rate": 1.9952664609966804e-05, "loss": 0.8255, "step": 2240 }, { "epoch": 0.04086154293186111, "grad_norm": 1.3936429806998236, "learning_rate": 1.9952102886067035e-05, "loss": 0.8273, "step": 2250 }, { "epoch": 0.04104314978933604, "grad_norm": 1.4407316507678585, "learning_rate": 1.995153785684032e-05, "loss": 0.8269, "step": 2260 }, { "epoch": 0.04122475664681098, "grad_norm": 1.5010707823756162, "learning_rate": 1.9950969522474314e-05, "loss": 0.8392, "step": 2270 }, { "epoch": 0.04140636350428592, "grad_norm": 1.3799847040627307, "learning_rate": 1.995039788315778e-05, "loss": 0.8336, "step": 2280 }, { "epoch": 0.04158797036176086, "grad_norm": 1.455751598696671, "learning_rate": 1.994982293908057e-05, "loss": 0.8461, "step": 2290 }, { "epoch": 0.0417695772192358, "grad_norm": 1.4309526789109583, "learning_rate": 1.9949244690433637e-05, "loss": 0.8314, "step": 2300 }, { "epoch": 0.04195118407671074, "grad_norm": 1.4071534543452069, "learning_rate": 1.994866313740903e-05, "loss": 0.8279, "step": 2310 }, { "epoch": 0.04213279093418568, "grad_norm": 1.50313377917687, "learning_rate": 1.9948078280199894e-05, "loss": 0.846, "step": 2320 }, { "epoch": 0.042314397791660616, "grad_norm": 1.3983246821582376, "learning_rate": 1.994749011900048e-05, "loss": 0.8405, "step": 2330 }, { "epoch": 0.042496004649135555, "grad_norm": 1.4438941835367285, "learning_rate": 1.9946898654006124e-05, "loss": 0.8352, "step": 2340 }, { "epoch": 0.04267761150661049, "grad_norm": 1.5290216459805304, "learning_rate": 1.994630388541327e-05, "loss": 0.8283, "step": 2350 }, { "epoch": 0.042859218364085426, "grad_norm": 1.4049003969784917, "learning_rate": 1.9945705813419453e-05, "loss": 0.826, "step": 2360 }, { "epoch": 0.043040825221560365, "grad_norm": 1.4280663618258846, "learning_rate": 1.9945104438223308e-05, "loss": 0.8316, "step": 2370 }, { "epoch": 0.043222432079035304, "grad_norm": 1.412887890345131, "learning_rate": 1.994449976002456e-05, "loss": 0.8178, "step": 2380 }, { "epoch": 0.04340403893651024, "grad_norm": 1.4187818979671112, "learning_rate": 1.9943891779024044e-05, "loss": 0.8273, "step": 2390 }, { "epoch": 0.04358564579398518, "grad_norm": 1.4610281026919552, "learning_rate": 1.994328049542368e-05, "loss": 0.8427, "step": 2400 }, { "epoch": 0.04376725265146012, "grad_norm": 1.37973239091475, "learning_rate": 1.9942665909426494e-05, "loss": 0.8472, "step": 2410 }, { "epoch": 0.04394885950893506, "grad_norm": 1.4179892421399742, "learning_rate": 1.99420480212366e-05, "loss": 0.8171, "step": 2420 }, { "epoch": 0.04413046636640999, "grad_norm": 1.415250858832967, "learning_rate": 1.9941426831059213e-05, "loss": 0.8219, "step": 2430 }, { "epoch": 0.04431207322388493, "grad_norm": 1.3728151140563505, "learning_rate": 1.994080233910065e-05, "loss": 0.8301, "step": 2440 }, { "epoch": 0.04449368008135987, "grad_norm": 1.4299172220287941, "learning_rate": 1.994017454556832e-05, "loss": 0.8211, "step": 2450 }, { "epoch": 0.04467528693883481, "grad_norm": 1.6217971714988966, "learning_rate": 1.993954345067072e-05, "loss": 0.8291, "step": 2460 }, { "epoch": 0.04485689379630975, "grad_norm": 1.399477454971511, "learning_rate": 1.9938909054617458e-05, "loss": 0.8213, "step": 2470 }, { "epoch": 0.04503850065378469, "grad_norm": 1.4763909877866872, "learning_rate": 1.9938271357619227e-05, "loss": 0.8423, "step": 2480 }, { "epoch": 0.04522010751125963, "grad_norm": 1.379003354947015, "learning_rate": 1.9937630359887822e-05, "loss": 0.8182, "step": 2490 }, { "epoch": 0.045401714368734566, "grad_norm": 1.5727057484367917, "learning_rate": 1.993698606163614e-05, "loss": 0.8261, "step": 2500 }, { "epoch": 0.0455833212262095, "grad_norm": 1.4128236938547003, "learning_rate": 1.993633846307816e-05, "loss": 0.8225, "step": 2510 }, { "epoch": 0.04576492808368444, "grad_norm": 1.3684432935885176, "learning_rate": 1.9935687564428967e-05, "loss": 0.8213, "step": 2520 }, { "epoch": 0.04594653494115938, "grad_norm": 1.3897579815592735, "learning_rate": 1.9935033365904743e-05, "loss": 0.8344, "step": 2530 }, { "epoch": 0.046128141798634316, "grad_norm": 1.413359153513314, "learning_rate": 1.993437586772276e-05, "loss": 0.8434, "step": 2540 }, { "epoch": 0.046309748656109255, "grad_norm": 1.378503250435036, "learning_rate": 1.993371507010138e-05, "loss": 0.8171, "step": 2550 }, { "epoch": 0.046491355513584194, "grad_norm": 1.3627502686040638, "learning_rate": 1.9933050973260088e-05, "loss": 0.8483, "step": 2560 }, { "epoch": 0.04667296237105913, "grad_norm": 1.310195817287414, "learning_rate": 1.9932383577419432e-05, "loss": 0.8225, "step": 2570 }, { "epoch": 0.04685456922853407, "grad_norm": 1.3700263083037667, "learning_rate": 1.9931712882801072e-05, "loss": 0.8268, "step": 2580 }, { "epoch": 0.047036176086009004, "grad_norm": 1.4677876777915355, "learning_rate": 1.9931038889627764e-05, "loss": 0.8361, "step": 2590 }, { "epoch": 0.047217782943483944, "grad_norm": 1.389116264874626, "learning_rate": 1.993036159812336e-05, "loss": 0.8339, "step": 2600 }, { "epoch": 0.04739938980095888, "grad_norm": 1.3501876600285039, "learning_rate": 1.9929681008512798e-05, "loss": 0.8348, "step": 2610 }, { "epoch": 0.04758099665843382, "grad_norm": 1.569306591746755, "learning_rate": 1.992899712102212e-05, "loss": 0.8307, "step": 2620 }, { "epoch": 0.04776260351590876, "grad_norm": 1.35055640468862, "learning_rate": 1.992830993587846e-05, "loss": 0.8371, "step": 2630 }, { "epoch": 0.0479442103733837, "grad_norm": 1.3094270794228269, "learning_rate": 1.9927619453310052e-05, "loss": 0.8244, "step": 2640 }, { "epoch": 0.04812581723085864, "grad_norm": 1.5701267619938148, "learning_rate": 1.992692567354622e-05, "loss": 0.8279, "step": 2650 }, { "epoch": 0.04830742408833358, "grad_norm": 1.4237258987903412, "learning_rate": 1.992622859681738e-05, "loss": 0.8145, "step": 2660 }, { "epoch": 0.04848903094580851, "grad_norm": 1.501782271365487, "learning_rate": 1.9925528223355054e-05, "loss": 0.8193, "step": 2670 }, { "epoch": 0.04867063780328345, "grad_norm": 1.327337217658768, "learning_rate": 1.9924824553391847e-05, "loss": 0.8289, "step": 2680 }, { "epoch": 0.04885224466075839, "grad_norm": 1.339105089389385, "learning_rate": 1.992411758716147e-05, "loss": 0.8266, "step": 2690 }, { "epoch": 0.04903385151823333, "grad_norm": 1.4266486865394086, "learning_rate": 1.992340732489872e-05, "loss": 0.8216, "step": 2700 }, { "epoch": 0.04921545837570827, "grad_norm": 1.3204877811782798, "learning_rate": 1.9922693766839492e-05, "loss": 0.8339, "step": 2710 }, { "epoch": 0.049397065233183206, "grad_norm": 1.4088504943998983, "learning_rate": 1.9921976913220774e-05, "loss": 0.8147, "step": 2720 }, { "epoch": 0.049578672090658145, "grad_norm": 1.3894140397991237, "learning_rate": 1.992125676428065e-05, "loss": 0.8319, "step": 2730 }, { "epoch": 0.049760278948133084, "grad_norm": 1.3642559441225492, "learning_rate": 1.9920533320258303e-05, "loss": 0.8114, "step": 2740 }, { "epoch": 0.04994188580560802, "grad_norm": 1.3350529474583028, "learning_rate": 1.9919806581393998e-05, "loss": 0.8327, "step": 2750 }, { "epoch": 0.050123492663082955, "grad_norm": 1.454980921435489, "learning_rate": 1.991907654792911e-05, "loss": 0.8334, "step": 2760 }, { "epoch": 0.050305099520557894, "grad_norm": 1.3812282392000885, "learning_rate": 1.99183432201061e-05, "loss": 0.8378, "step": 2770 }, { "epoch": 0.05048670637803283, "grad_norm": 1.4693423400524936, "learning_rate": 1.991760659816851e-05, "loss": 0.8314, "step": 2780 }, { "epoch": 0.05066831323550777, "grad_norm": 1.9891857863910396, "learning_rate": 1.9916866682361004e-05, "loss": 0.8116, "step": 2790 }, { "epoch": 0.05084992009298271, "grad_norm": 1.2909218178804032, "learning_rate": 1.9916123472929323e-05, "loss": 0.8233, "step": 2800 }, { "epoch": 0.05103152695045765, "grad_norm": 1.3749461341311975, "learning_rate": 1.9915376970120302e-05, "loss": 0.8232, "step": 2810 }, { "epoch": 0.05121313380793259, "grad_norm": 1.325618578281526, "learning_rate": 1.991462717418187e-05, "loss": 0.8208, "step": 2820 }, { "epoch": 0.05139474066540753, "grad_norm": 1.3178482479343303, "learning_rate": 1.9913874085363054e-05, "loss": 0.8212, "step": 2830 }, { "epoch": 0.05157634752288246, "grad_norm": 1.4077199494225696, "learning_rate": 1.9913117703913975e-05, "loss": 0.822, "step": 2840 }, { "epoch": 0.0517579543803574, "grad_norm": 1.3045274897665595, "learning_rate": 1.991235803008584e-05, "loss": 0.8174, "step": 2850 }, { "epoch": 0.05193956123783234, "grad_norm": 1.3441649908687252, "learning_rate": 1.991159506413096e-05, "loss": 0.8117, "step": 2860 }, { "epoch": 0.05212116809530728, "grad_norm": 1.3548948083856747, "learning_rate": 1.9910828806302727e-05, "loss": 0.8094, "step": 2870 }, { "epoch": 0.05230277495278222, "grad_norm": 1.315874836366516, "learning_rate": 1.991005925685564e-05, "loss": 0.8276, "step": 2880 }, { "epoch": 0.05248438181025716, "grad_norm": 1.4007670389866353, "learning_rate": 1.9909286416045278e-05, "loss": 0.8166, "step": 2890 }, { "epoch": 0.052665988667732096, "grad_norm": 1.3403679000438806, "learning_rate": 1.9908510284128327e-05, "loss": 0.8204, "step": 2900 }, { "epoch": 0.052847595525207035, "grad_norm": 1.3472461840708037, "learning_rate": 1.9907730861362554e-05, "loss": 0.8072, "step": 2910 }, { "epoch": 0.05302920238268197, "grad_norm": 1.3975717245739994, "learning_rate": 1.9906948148006823e-05, "loss": 0.8305, "step": 2920 }, { "epoch": 0.053210809240156906, "grad_norm": 1.3535406972743962, "learning_rate": 1.9906162144321094e-05, "loss": 0.8105, "step": 2930 }, { "epoch": 0.053392416097631845, "grad_norm": 1.2963098227177106, "learning_rate": 1.9905372850566414e-05, "loss": 0.8177, "step": 2940 }, { "epoch": 0.053574022955106784, "grad_norm": 1.3777463012334363, "learning_rate": 1.990458026700493e-05, "loss": 0.8229, "step": 2950 }, { "epoch": 0.05375562981258172, "grad_norm": 1.3756437129315924, "learning_rate": 1.9903784393899875e-05, "loss": 0.8116, "step": 2960 }, { "epoch": 0.05393723667005666, "grad_norm": 2.063100645780123, "learning_rate": 1.990298523151558e-05, "loss": 0.8271, "step": 2970 }, { "epoch": 0.0541188435275316, "grad_norm": 1.3195235778398415, "learning_rate": 1.9902182780117464e-05, "loss": 0.8161, "step": 2980 }, { "epoch": 0.05430045038500654, "grad_norm": 1.342122272302943, "learning_rate": 1.9901377039972033e-05, "loss": 0.8143, "step": 2990 }, { "epoch": 0.05448205724248147, "grad_norm": 1.3308533115050398, "learning_rate": 1.9900568011346904e-05, "loss": 0.8222, "step": 3000 }, { "epoch": 0.05466366409995641, "grad_norm": 1.3897510249834, "learning_rate": 1.9899755694510766e-05, "loss": 0.8136, "step": 3010 }, { "epoch": 0.05484527095743135, "grad_norm": 1.3174933282512993, "learning_rate": 1.9898940089733413e-05, "loss": 0.8183, "step": 3020 }, { "epoch": 0.05502687781490629, "grad_norm": 1.3741998071831858, "learning_rate": 1.9898121197285726e-05, "loss": 0.8136, "step": 3030 }, { "epoch": 0.05520848467238123, "grad_norm": 1.3503959147892732, "learning_rate": 1.9897299017439677e-05, "loss": 0.8207, "step": 3040 }, { "epoch": 0.05539009152985617, "grad_norm": 1.3313318547263748, "learning_rate": 1.9896473550468333e-05, "loss": 0.8163, "step": 3050 }, { "epoch": 0.05557169838733111, "grad_norm": 1.3418921632639036, "learning_rate": 1.9895644796645844e-05, "loss": 0.8045, "step": 3060 }, { "epoch": 0.055753305244806046, "grad_norm": 1.2985045684826666, "learning_rate": 1.989481275624747e-05, "loss": 0.8081, "step": 3070 }, { "epoch": 0.055934912102280986, "grad_norm": 1.4020979333903405, "learning_rate": 1.9893977429549544e-05, "loss": 0.8373, "step": 3080 }, { "epoch": 0.05611651895975592, "grad_norm": 1.362117167968302, "learning_rate": 1.9893138816829495e-05, "loss": 0.826, "step": 3090 }, { "epoch": 0.05629812581723086, "grad_norm": 1.3215540094971285, "learning_rate": 1.9892296918365856e-05, "loss": 0.8165, "step": 3100 }, { "epoch": 0.056479732674705796, "grad_norm": 1.3402202895730493, "learning_rate": 1.989145173443823e-05, "loss": 0.8083, "step": 3110 }, { "epoch": 0.056661339532180735, "grad_norm": 1.3621165777161737, "learning_rate": 1.989060326532733e-05, "loss": 0.8153, "step": 3120 }, { "epoch": 0.056842946389655674, "grad_norm": 1.339912414035358, "learning_rate": 1.988975151131495e-05, "loss": 0.8156, "step": 3130 }, { "epoch": 0.05702455324713061, "grad_norm": 1.3043964931830303, "learning_rate": 1.9888896472683973e-05, "loss": 0.8106, "step": 3140 }, { "epoch": 0.05720616010460555, "grad_norm": 1.3697905383174243, "learning_rate": 1.9888038149718387e-05, "loss": 0.8093, "step": 3150 }, { "epoch": 0.05738776696208049, "grad_norm": 1.3702080496782343, "learning_rate": 1.9887176542703255e-05, "loss": 0.8223, "step": 3160 }, { "epoch": 0.057569373819555424, "grad_norm": 1.327735543277802, "learning_rate": 1.9886311651924735e-05, "loss": 0.8294, "step": 3170 }, { "epoch": 0.05775098067703036, "grad_norm": 1.3046228219283396, "learning_rate": 1.9885443477670087e-05, "loss": 0.8127, "step": 3180 }, { "epoch": 0.0579325875345053, "grad_norm": 1.4087015961543268, "learning_rate": 1.988457202022764e-05, "loss": 0.8142, "step": 3190 }, { "epoch": 0.05811419439198024, "grad_norm": 1.3232562455138464, "learning_rate": 1.9883697279886834e-05, "loss": 0.8247, "step": 3200 }, { "epoch": 0.05829580124945518, "grad_norm": 1.315014174273878, "learning_rate": 1.9882819256938186e-05, "loss": 0.8182, "step": 3210 }, { "epoch": 0.05847740810693012, "grad_norm": 1.5332870764869069, "learning_rate": 1.9881937951673312e-05, "loss": 0.8382, "step": 3220 }, { "epoch": 0.05865901496440506, "grad_norm": 1.3317570206533536, "learning_rate": 1.988105336438491e-05, "loss": 0.8278, "step": 3230 }, { "epoch": 0.05884062182188, "grad_norm": 1.3087569869389304, "learning_rate": 1.9880165495366773e-05, "loss": 0.7954, "step": 3240 }, { "epoch": 0.05902222867935493, "grad_norm": 1.3326775441298873, "learning_rate": 1.9879274344913785e-05, "loss": 0.8149, "step": 3250 }, { "epoch": 0.05920383553682987, "grad_norm": 1.3269988420257042, "learning_rate": 1.987837991332192e-05, "loss": 0.806, "step": 3260 }, { "epoch": 0.05938544239430481, "grad_norm": 1.379967929864518, "learning_rate": 1.9877482200888237e-05, "loss": 0.8175, "step": 3270 }, { "epoch": 0.05956704925177975, "grad_norm": 1.2972483509961443, "learning_rate": 1.9876581207910885e-05, "loss": 0.8203, "step": 3280 }, { "epoch": 0.059748656109254686, "grad_norm": 1.3146122045738877, "learning_rate": 1.987567693468911e-05, "loss": 0.8213, "step": 3290 }, { "epoch": 0.059930262966729625, "grad_norm": 1.4814528965303644, "learning_rate": 1.9874769381523236e-05, "loss": 0.8297, "step": 3300 }, { "epoch": 0.060111869824204564, "grad_norm": 1.3431093684829198, "learning_rate": 1.987385854871469e-05, "loss": 0.8151, "step": 3310 }, { "epoch": 0.0602934766816795, "grad_norm": 1.3761816409771812, "learning_rate": 1.9872944436565976e-05, "loss": 0.8264, "step": 3320 }, { "epoch": 0.060475083539154435, "grad_norm": 1.3245725299634739, "learning_rate": 1.9872027045380694e-05, "loss": 0.8209, "step": 3330 }, { "epoch": 0.060656690396629374, "grad_norm": 1.3494004713037524, "learning_rate": 1.987110637546353e-05, "loss": 0.807, "step": 3340 }, { "epoch": 0.060838297254104313, "grad_norm": 1.2937563406142667, "learning_rate": 1.9870182427120262e-05, "loss": 0.8242, "step": 3350 }, { "epoch": 0.06101990411157925, "grad_norm": 1.2749883694219544, "learning_rate": 1.986925520065775e-05, "loss": 0.8278, "step": 3360 }, { "epoch": 0.06120151096905419, "grad_norm": 1.6345231857732474, "learning_rate": 1.9868324696383956e-05, "loss": 0.8255, "step": 3370 }, { "epoch": 0.06138311782652913, "grad_norm": 1.211488924069118, "learning_rate": 1.9867390914607916e-05, "loss": 0.8085, "step": 3380 }, { "epoch": 0.06156472468400407, "grad_norm": 1.290148861089466, "learning_rate": 1.986645385563976e-05, "loss": 0.809, "step": 3390 }, { "epoch": 0.06174633154147901, "grad_norm": 1.3258879199675437, "learning_rate": 1.9865513519790713e-05, "loss": 0.8101, "step": 3400 }, { "epoch": 0.06192793839895394, "grad_norm": 1.3188233802593785, "learning_rate": 1.9864569907373075e-05, "loss": 0.8147, "step": 3410 }, { "epoch": 0.06210954525642888, "grad_norm": 1.3019620247532646, "learning_rate": 1.986362301870025e-05, "loss": 0.818, "step": 3420 }, { "epoch": 0.06229115211390382, "grad_norm": 1.3618657922065889, "learning_rate": 1.9862672854086718e-05, "loss": 0.8095, "step": 3430 }, { "epoch": 0.06247275897137876, "grad_norm": 1.3103190481064029, "learning_rate": 1.9861719413848052e-05, "loss": 0.8045, "step": 3440 }, { "epoch": 0.0626543658288537, "grad_norm": 1.2674320019906062, "learning_rate": 1.9860762698300908e-05, "loss": 0.8024, "step": 3450 }, { "epoch": 0.06283597268632864, "grad_norm": 1.3019235617874017, "learning_rate": 1.985980270776304e-05, "loss": 0.8062, "step": 3460 }, { "epoch": 0.06301757954380358, "grad_norm": 1.2863251281528392, "learning_rate": 1.985883944255328e-05, "loss": 0.8079, "step": 3470 }, { "epoch": 0.06319918640127851, "grad_norm": 1.3968836292821774, "learning_rate": 1.9857872902991547e-05, "loss": 0.8049, "step": 3480 }, { "epoch": 0.06338079325875345, "grad_norm": 1.351535799693524, "learning_rate": 1.9856903089398857e-05, "loss": 0.8073, "step": 3490 }, { "epoch": 0.06356240011622839, "grad_norm": 1.4934431783451982, "learning_rate": 1.9855930002097307e-05, "loss": 0.801, "step": 3500 }, { "epoch": 0.06374400697370333, "grad_norm": 1.308574573155386, "learning_rate": 1.9854953641410082e-05, "loss": 0.8182, "step": 3510 }, { "epoch": 0.06392561383117827, "grad_norm": 1.332117549802633, "learning_rate": 1.9853974007661453e-05, "loss": 0.8121, "step": 3520 }, { "epoch": 0.06410722068865321, "grad_norm": 1.3435630446234408, "learning_rate": 1.985299110117678e-05, "loss": 0.8057, "step": 3530 }, { "epoch": 0.06428882754612814, "grad_norm": 1.3147120824046585, "learning_rate": 1.985200492228251e-05, "loss": 0.8104, "step": 3540 }, { "epoch": 0.06447043440360307, "grad_norm": 1.2997767612147093, "learning_rate": 1.9851015471306175e-05, "loss": 0.8141, "step": 3550 }, { "epoch": 0.06465204126107801, "grad_norm": 1.4126586107986479, "learning_rate": 1.9850022748576397e-05, "loss": 0.8157, "step": 3560 }, { "epoch": 0.06483364811855295, "grad_norm": 1.394449718961483, "learning_rate": 1.984902675442288e-05, "loss": 0.8206, "step": 3570 }, { "epoch": 0.06501525497602789, "grad_norm": 1.2716509268835485, "learning_rate": 1.984802748917642e-05, "loss": 0.8125, "step": 3580 }, { "epoch": 0.06519686183350283, "grad_norm": 2.8036512795475046, "learning_rate": 1.9847024953168893e-05, "loss": 0.8143, "step": 3590 }, { "epoch": 0.06537846869097777, "grad_norm": 1.3576585985425087, "learning_rate": 1.9846019146733267e-05, "loss": 0.8229, "step": 3600 }, { "epoch": 0.06556007554845271, "grad_norm": 1.292777744562159, "learning_rate": 1.9845010070203593e-05, "loss": 0.8162, "step": 3610 }, { "epoch": 0.06574168240592765, "grad_norm": 1.2411264727587517, "learning_rate": 1.984399772391501e-05, "loss": 0.8071, "step": 3620 }, { "epoch": 0.06592328926340259, "grad_norm": 1.2901861805648462, "learning_rate": 1.9842982108203736e-05, "loss": 0.8081, "step": 3630 }, { "epoch": 0.06610489612087753, "grad_norm": 1.2755895095743797, "learning_rate": 1.984196322340709e-05, "loss": 0.8108, "step": 3640 }, { "epoch": 0.06628650297835247, "grad_norm": 1.2538260101771803, "learning_rate": 1.9840941069863464e-05, "loss": 0.8091, "step": 3650 }, { "epoch": 0.0664681098358274, "grad_norm": 1.2573275756416877, "learning_rate": 1.983991564791234e-05, "loss": 0.7966, "step": 3660 }, { "epoch": 0.06664971669330234, "grad_norm": 1.2753675664214739, "learning_rate": 1.983888695789428e-05, "loss": 0.7978, "step": 3670 }, { "epoch": 0.06683132355077728, "grad_norm": 2.522259504364877, "learning_rate": 1.983785500015094e-05, "loss": 0.7981, "step": 3680 }, { "epoch": 0.06701293040825222, "grad_norm": 1.3338469454143673, "learning_rate": 1.9836819775025062e-05, "loss": 0.7933, "step": 3690 }, { "epoch": 0.06719453726572715, "grad_norm": 1.7990838255987391, "learning_rate": 1.9835781282860455e-05, "loss": 0.8143, "step": 3700 }, { "epoch": 0.06737614412320209, "grad_norm": 1.2847840187435215, "learning_rate": 1.983473952400204e-05, "loss": 0.8019, "step": 3710 }, { "epoch": 0.06755775098067703, "grad_norm": 1.2761327996421097, "learning_rate": 1.9833694498795805e-05, "loss": 0.8113, "step": 3720 }, { "epoch": 0.06773935783815196, "grad_norm": 1.2882177609624588, "learning_rate": 1.9832646207588825e-05, "loss": 0.808, "step": 3730 }, { "epoch": 0.0679209646956269, "grad_norm": 1.2932911009183876, "learning_rate": 1.9831594650729266e-05, "loss": 0.8118, "step": 3740 }, { "epoch": 0.06810257155310184, "grad_norm": 1.2347606613803124, "learning_rate": 1.983053982856637e-05, "loss": 0.7979, "step": 3750 }, { "epoch": 0.06828417841057678, "grad_norm": 1.2719187789885622, "learning_rate": 1.9829481741450475e-05, "loss": 0.8029, "step": 3760 }, { "epoch": 0.06846578526805172, "grad_norm": 1.2552572002107218, "learning_rate": 1.982842038973299e-05, "loss": 0.8122, "step": 3770 }, { "epoch": 0.06864739212552666, "grad_norm": 1.264188068618, "learning_rate": 1.9827355773766416e-05, "loss": 0.8154, "step": 3780 }, { "epoch": 0.0688289989830016, "grad_norm": 1.2124441981208005, "learning_rate": 1.982628789390434e-05, "loss": 0.8028, "step": 3790 }, { "epoch": 0.06901060584047654, "grad_norm": 1.3351577067907685, "learning_rate": 1.9825216750501432e-05, "loss": 0.8138, "step": 3800 }, { "epoch": 0.06919221269795148, "grad_norm": 1.302327703150681, "learning_rate": 1.9824142343913436e-05, "loss": 0.8152, "step": 3810 }, { "epoch": 0.06937381955542642, "grad_norm": 2.927047850258561, "learning_rate": 1.9823064674497193e-05, "loss": 0.8098, "step": 3820 }, { "epoch": 0.06955542641290136, "grad_norm": 1.2516805746408277, "learning_rate": 1.982198374261062e-05, "loss": 0.8069, "step": 3830 }, { "epoch": 0.0697370332703763, "grad_norm": 1.250033248175745, "learning_rate": 1.9820899548612722e-05, "loss": 0.8022, "step": 3840 }, { "epoch": 0.06991864012785123, "grad_norm": 1.228843496407892, "learning_rate": 1.9819812092863585e-05, "loss": 0.7891, "step": 3850 }, { "epoch": 0.07010024698532617, "grad_norm": 1.2675477321287645, "learning_rate": 1.9818721375724376e-05, "loss": 0.8086, "step": 3860 }, { "epoch": 0.0702818538428011, "grad_norm": 1.245052364341022, "learning_rate": 1.9817627397557352e-05, "loss": 0.7982, "step": 3870 }, { "epoch": 0.07046346070027604, "grad_norm": 1.3207118315682589, "learning_rate": 1.9816530158725844e-05, "loss": 0.799, "step": 3880 }, { "epoch": 0.07064506755775098, "grad_norm": 1.2611711868061426, "learning_rate": 1.9815429659594276e-05, "loss": 0.8133, "step": 3890 }, { "epoch": 0.07082667441522592, "grad_norm": 1.3068161122699464, "learning_rate": 1.9814325900528146e-05, "loss": 0.8, "step": 3900 }, { "epoch": 0.07100828127270085, "grad_norm": 1.2556080728146137, "learning_rate": 1.981321888189404e-05, "loss": 0.8117, "step": 3910 }, { "epoch": 0.0711898881301758, "grad_norm": 1.2551863126653307, "learning_rate": 1.981210860405962e-05, "loss": 0.8056, "step": 3920 }, { "epoch": 0.07137149498765073, "grad_norm": 1.2964352494204727, "learning_rate": 1.981099506739364e-05, "loss": 0.7983, "step": 3930 }, { "epoch": 0.07155310184512567, "grad_norm": 1.2572710665801434, "learning_rate": 1.9809878272265935e-05, "loss": 0.8068, "step": 3940 }, { "epoch": 0.07173470870260061, "grad_norm": 1.2296844917035, "learning_rate": 1.980875821904741e-05, "loss": 0.8053, "step": 3950 }, { "epoch": 0.07191631556007555, "grad_norm": 1.1972348725340858, "learning_rate": 1.980763490811007e-05, "loss": 0.7985, "step": 3960 }, { "epoch": 0.07209792241755049, "grad_norm": 1.2782785918879287, "learning_rate": 1.9806508339826985e-05, "loss": 0.8061, "step": 3970 }, { "epoch": 0.07227952927502543, "grad_norm": 1.2743158210533867, "learning_rate": 1.980537851457232e-05, "loss": 0.8156, "step": 3980 }, { "epoch": 0.07246113613250037, "grad_norm": 1.2683984513940456, "learning_rate": 1.9804245432721315e-05, "loss": 0.8034, "step": 3990 }, { "epoch": 0.0726427429899753, "grad_norm": 1.3129028291641784, "learning_rate": 1.9803109094650295e-05, "loss": 0.814, "step": 4000 }, { "epoch": 0.07282434984745025, "grad_norm": 1.2251092663986702, "learning_rate": 1.9801969500736662e-05, "loss": 0.7975, "step": 4010 }, { "epoch": 0.07300595670492518, "grad_norm": 1.2529155900416837, "learning_rate": 1.9800826651358904e-05, "loss": 0.7877, "step": 4020 }, { "epoch": 0.07318756356240011, "grad_norm": 1.1884310408133256, "learning_rate": 1.9799680546896582e-05, "loss": 0.8154, "step": 4030 }, { "epoch": 0.07336917041987505, "grad_norm": 1.2890473685872619, "learning_rate": 1.979853118773035e-05, "loss": 0.7945, "step": 4040 }, { "epoch": 0.07355077727734999, "grad_norm": 1.3219472548613653, "learning_rate": 1.9797378574241935e-05, "loss": 0.8021, "step": 4050 }, { "epoch": 0.07373238413482493, "grad_norm": 1.340098129928525, "learning_rate": 1.9796222706814153e-05, "loss": 0.8178, "step": 4060 }, { "epoch": 0.07391399099229987, "grad_norm": 1.3072998618329947, "learning_rate": 1.9795063585830885e-05, "loss": 0.8079, "step": 4070 }, { "epoch": 0.0740955978497748, "grad_norm": 1.2227271555911632, "learning_rate": 1.979390121167711e-05, "loss": 0.8105, "step": 4080 }, { "epoch": 0.07427720470724974, "grad_norm": 1.2581898122096138, "learning_rate": 1.9792735584738875e-05, "loss": 0.8158, "step": 4090 }, { "epoch": 0.07445881156472468, "grad_norm": 1.2410604366036435, "learning_rate": 1.9791566705403315e-05, "loss": 0.7935, "step": 4100 }, { "epoch": 0.07464041842219962, "grad_norm": 1.2578870363652284, "learning_rate": 1.9790394574058636e-05, "loss": 0.7912, "step": 4110 }, { "epoch": 0.07482202527967456, "grad_norm": 1.325735695341401, "learning_rate": 1.9789219191094142e-05, "loss": 0.7925, "step": 4120 }, { "epoch": 0.0750036321371495, "grad_norm": 1.2585285610170043, "learning_rate": 1.9788040556900196e-05, "loss": 0.8087, "step": 4130 }, { "epoch": 0.07518523899462444, "grad_norm": 1.2592708337586886, "learning_rate": 1.9786858671868252e-05, "loss": 0.7957, "step": 4140 }, { "epoch": 0.07536684585209938, "grad_norm": 1.2466483807370663, "learning_rate": 1.9785673536390843e-05, "loss": 0.784, "step": 4150 }, { "epoch": 0.07554845270957432, "grad_norm": 1.3378991764039792, "learning_rate": 1.9784485150861578e-05, "loss": 0.8165, "step": 4160 }, { "epoch": 0.07573005956704926, "grad_norm": 1.2691979327438587, "learning_rate": 1.978329351567515e-05, "loss": 0.8057, "step": 4170 }, { "epoch": 0.0759116664245242, "grad_norm": 1.729556141124203, "learning_rate": 1.978209863122733e-05, "loss": 0.7961, "step": 4180 }, { "epoch": 0.07609327328199912, "grad_norm": 1.242292450870014, "learning_rate": 1.9780900497914964e-05, "loss": 0.801, "step": 4190 }, { "epoch": 0.07627488013947406, "grad_norm": 1.7232746110167625, "learning_rate": 1.9779699116135983e-05, "loss": 0.8268, "step": 4200 }, { "epoch": 0.076456486996949, "grad_norm": 1.2576016873590412, "learning_rate": 1.977849448628939e-05, "loss": 0.7976, "step": 4210 }, { "epoch": 0.07663809385442394, "grad_norm": 1.296895902572056, "learning_rate": 1.9777286608775277e-05, "loss": 0.8039, "step": 4220 }, { "epoch": 0.07681970071189888, "grad_norm": 1.2432804947634837, "learning_rate": 1.9776075483994806e-05, "loss": 0.8137, "step": 4230 }, { "epoch": 0.07700130756937382, "grad_norm": 1.2229330051247802, "learning_rate": 1.9774861112350216e-05, "loss": 0.7939, "step": 4240 }, { "epoch": 0.07718291442684876, "grad_norm": 1.2688397758768817, "learning_rate": 1.9773643494244833e-05, "loss": 0.8158, "step": 4250 }, { "epoch": 0.0773645212843237, "grad_norm": 1.2029035234453582, "learning_rate": 1.9772422630083057e-05, "loss": 0.814, "step": 4260 }, { "epoch": 0.07754612814179863, "grad_norm": 1.1782827967525993, "learning_rate": 1.9771198520270363e-05, "loss": 0.7949, "step": 4270 }, { "epoch": 0.07772773499927357, "grad_norm": 1.3083255369351166, "learning_rate": 1.9769971165213305e-05, "loss": 0.8037, "step": 4280 }, { "epoch": 0.07790934185674851, "grad_norm": 1.2978643704752628, "learning_rate": 1.976874056531952e-05, "loss": 0.8098, "step": 4290 }, { "epoch": 0.07809094871422345, "grad_norm": 1.3300672878839288, "learning_rate": 1.9767506720997717e-05, "loss": 0.7983, "step": 4300 }, { "epoch": 0.07827255557169839, "grad_norm": 1.2897688472950992, "learning_rate": 1.9766269632657686e-05, "loss": 0.7881, "step": 4310 }, { "epoch": 0.07845416242917333, "grad_norm": 1.245843180605057, "learning_rate": 1.9765029300710294e-05, "loss": 0.7999, "step": 4320 }, { "epoch": 0.07863576928664827, "grad_norm": 1.2006938319667952, "learning_rate": 1.9763785725567482e-05, "loss": 0.7972, "step": 4330 }, { "epoch": 0.07881737614412321, "grad_norm": 1.3478729830168716, "learning_rate": 1.976253890764227e-05, "loss": 0.786, "step": 4340 }, { "epoch": 0.07899898300159815, "grad_norm": 1.2812721524537019, "learning_rate": 1.976128884734876e-05, "loss": 0.8184, "step": 4350 }, { "epoch": 0.07918058985907307, "grad_norm": 1.299163462101211, "learning_rate": 1.976003554510212e-05, "loss": 0.7931, "step": 4360 }, { "epoch": 0.07936219671654801, "grad_norm": 1.2437858004758058, "learning_rate": 1.9758779001318604e-05, "loss": 0.8052, "step": 4370 }, { "epoch": 0.07954380357402295, "grad_norm": 1.2657769038100009, "learning_rate": 1.9757519216415543e-05, "loss": 0.7943, "step": 4380 }, { "epoch": 0.07972541043149789, "grad_norm": 1.2943295722506936, "learning_rate": 1.9756256190811334e-05, "loss": 0.7994, "step": 4390 }, { "epoch": 0.07990701728897283, "grad_norm": 1.2045452893374686, "learning_rate": 1.975498992492547e-05, "loss": 0.7851, "step": 4400 }, { "epoch": 0.08008862414644777, "grad_norm": 1.2324801992811567, "learning_rate": 1.975372041917849e-05, "loss": 0.8129, "step": 4410 }, { "epoch": 0.0802702310039227, "grad_norm": 1.2697767129751139, "learning_rate": 1.975244767399204e-05, "loss": 0.806, "step": 4420 }, { "epoch": 0.08045183786139765, "grad_norm": 1.2871566366449052, "learning_rate": 1.9751171689788825e-05, "loss": 0.8145, "step": 4430 }, { "epoch": 0.08063344471887259, "grad_norm": 1.246059096572125, "learning_rate": 1.974989246699263e-05, "loss": 0.8124, "step": 4440 }, { "epoch": 0.08081505157634752, "grad_norm": 1.3576496494645556, "learning_rate": 1.974861000602831e-05, "loss": 0.8142, "step": 4450 }, { "epoch": 0.08099665843382246, "grad_norm": 1.2326819368512067, "learning_rate": 1.9747324307321804e-05, "loss": 0.8078, "step": 4460 }, { "epoch": 0.0811782652912974, "grad_norm": 3.2364124637965013, "learning_rate": 1.9746035371300124e-05, "loss": 0.7839, "step": 4470 }, { "epoch": 0.08135987214877234, "grad_norm": 1.2866865981762736, "learning_rate": 1.9744743198391357e-05, "loss": 0.797, "step": 4480 }, { "epoch": 0.08154147900624728, "grad_norm": 1.3159537578036133, "learning_rate": 1.9743447789024658e-05, "loss": 0.8221, "step": 4490 }, { "epoch": 0.08172308586372222, "grad_norm": 1.2420609379217917, "learning_rate": 1.9742149143630268e-05, "loss": 0.8083, "step": 4500 }, { "epoch": 0.08190469272119716, "grad_norm": 1.3277078443908958, "learning_rate": 1.9740847262639497e-05, "loss": 0.798, "step": 4510 }, { "epoch": 0.08208629957867208, "grad_norm": 1.2155727920648591, "learning_rate": 1.9739542146484727e-05, "loss": 0.8094, "step": 4520 }, { "epoch": 0.08226790643614702, "grad_norm": 1.2506233318873248, "learning_rate": 1.9738233795599423e-05, "loss": 0.7833, "step": 4530 }, { "epoch": 0.08244951329362196, "grad_norm": 1.293535977588664, "learning_rate": 1.9736922210418113e-05, "loss": 0.7997, "step": 4540 }, { "epoch": 0.0826311201510969, "grad_norm": 1.2622377268591665, "learning_rate": 1.973560739137641e-05, "loss": 0.8026, "step": 4550 }, { "epoch": 0.08281272700857184, "grad_norm": 1.192387632854376, "learning_rate": 1.9734289338910996e-05, "loss": 0.7865, "step": 4560 }, { "epoch": 0.08299433386604678, "grad_norm": 1.254452887815232, "learning_rate": 1.9732968053459622e-05, "loss": 0.8012, "step": 4570 }, { "epoch": 0.08317594072352172, "grad_norm": 1.2219483054978773, "learning_rate": 1.9731643535461124e-05, "loss": 0.7963, "step": 4580 }, { "epoch": 0.08335754758099666, "grad_norm": 1.2432893817637825, "learning_rate": 1.9730315785355404e-05, "loss": 0.8189, "step": 4590 }, { "epoch": 0.0835391544384716, "grad_norm": 1.2229832341488476, "learning_rate": 1.9728984803583437e-05, "loss": 0.7982, "step": 4600 }, { "epoch": 0.08372076129594654, "grad_norm": 1.251941743150748, "learning_rate": 1.9727650590587272e-05, "loss": 0.8023, "step": 4610 }, { "epoch": 0.08390236815342147, "grad_norm": 1.267432199315672, "learning_rate": 1.9726313146810037e-05, "loss": 0.8052, "step": 4620 }, { "epoch": 0.08408397501089641, "grad_norm": 1.1902685038977514, "learning_rate": 1.9724972472695927e-05, "loss": 0.7896, "step": 4630 }, { "epoch": 0.08426558186837135, "grad_norm": 1.287979604343563, "learning_rate": 1.9723628568690208e-05, "loss": 0.7954, "step": 4640 }, { "epoch": 0.08444718872584629, "grad_norm": 1.327016852109532, "learning_rate": 1.9722281435239227e-05, "loss": 0.7981, "step": 4650 }, { "epoch": 0.08462879558332123, "grad_norm": 1.21323959355291, "learning_rate": 1.9720931072790397e-05, "loss": 0.7815, "step": 4660 }, { "epoch": 0.08481040244079617, "grad_norm": 1.2272151663494615, "learning_rate": 1.9719577481792202e-05, "loss": 0.7975, "step": 4670 }, { "epoch": 0.08499200929827111, "grad_norm": 1.2107010944772931, "learning_rate": 1.971822066269421e-05, "loss": 0.8053, "step": 4680 }, { "epoch": 0.08517361615574603, "grad_norm": 1.2088364122808015, "learning_rate": 1.9716860615947036e-05, "loss": 0.7988, "step": 4690 }, { "epoch": 0.08535522301322097, "grad_norm": 1.2816023844173468, "learning_rate": 1.97154973420024e-05, "loss": 0.7908, "step": 4700 }, { "epoch": 0.08553682987069591, "grad_norm": 1.2324539978004767, "learning_rate": 1.971413084131307e-05, "loss": 0.7919, "step": 4710 }, { "epoch": 0.08571843672817085, "grad_norm": 1.2327114855385526, "learning_rate": 1.9712761114332896e-05, "loss": 0.8073, "step": 4720 }, { "epoch": 0.08590004358564579, "grad_norm": 1.2153048724550655, "learning_rate": 1.9711388161516792e-05, "loss": 0.8069, "step": 4730 }, { "epoch": 0.08608165044312073, "grad_norm": 1.2194085688878247, "learning_rate": 1.9710011983320748e-05, "loss": 0.7757, "step": 4740 }, { "epoch": 0.08626325730059567, "grad_norm": 1.2662239486315712, "learning_rate": 1.9708632580201828e-05, "loss": 0.7822, "step": 4750 }, { "epoch": 0.08644486415807061, "grad_norm": 1.3649290742584737, "learning_rate": 1.970724995261817e-05, "loss": 0.8148, "step": 4760 }, { "epoch": 0.08662647101554555, "grad_norm": 1.181142742411894, "learning_rate": 1.9705864101028962e-05, "loss": 0.7965, "step": 4770 }, { "epoch": 0.08680807787302049, "grad_norm": 1.265335479910089, "learning_rate": 1.970447502589449e-05, "loss": 0.8034, "step": 4780 }, { "epoch": 0.08698968473049543, "grad_norm": 1.2215500926553906, "learning_rate": 1.9703082727676097e-05, "loss": 0.7829, "step": 4790 }, { "epoch": 0.08717129158797036, "grad_norm": 1.209960561806768, "learning_rate": 1.9701687206836192e-05, "loss": 0.7957, "step": 4800 }, { "epoch": 0.0873528984454453, "grad_norm": 1.2135647442068223, "learning_rate": 1.9700288463838263e-05, "loss": 0.787, "step": 4810 }, { "epoch": 0.08753450530292024, "grad_norm": 1.3107735602981518, "learning_rate": 1.9698886499146874e-05, "loss": 0.7862, "step": 4820 }, { "epoch": 0.08771611216039518, "grad_norm": 1.2897952946445188, "learning_rate": 1.9697481313227634e-05, "loss": 0.8112, "step": 4830 }, { "epoch": 0.08789771901787012, "grad_norm": 1.1645832570626748, "learning_rate": 1.969607290654725e-05, "loss": 0.7671, "step": 4840 }, { "epoch": 0.08807932587534505, "grad_norm": 1.1822566572026663, "learning_rate": 1.9694661279573487e-05, "loss": 0.8028, "step": 4850 }, { "epoch": 0.08826093273281999, "grad_norm": 1.2278306938944379, "learning_rate": 1.969324643277517e-05, "loss": 0.7768, "step": 4860 }, { "epoch": 0.08844253959029492, "grad_norm": 1.2371366445486578, "learning_rate": 1.9691828366622216e-05, "loss": 0.7783, "step": 4870 }, { "epoch": 0.08862414644776986, "grad_norm": 1.1686772490895778, "learning_rate": 1.9690407081585586e-05, "loss": 0.7969, "step": 4880 }, { "epoch": 0.0888057533052448, "grad_norm": 1.2666257699506371, "learning_rate": 1.968898257813733e-05, "loss": 0.8073, "step": 4890 }, { "epoch": 0.08898736016271974, "grad_norm": 1.1892302424946275, "learning_rate": 1.968755485675055e-05, "loss": 0.7866, "step": 4900 }, { "epoch": 0.08916896702019468, "grad_norm": 1.2028755913422948, "learning_rate": 1.968612391789944e-05, "loss": 0.7883, "step": 4910 }, { "epoch": 0.08935057387766962, "grad_norm": 1.2630793531661229, "learning_rate": 1.968468976205924e-05, "loss": 0.7827, "step": 4920 }, { "epoch": 0.08953218073514456, "grad_norm": 1.2423403282873915, "learning_rate": 1.9683252389706263e-05, "loss": 0.791, "step": 4930 }, { "epoch": 0.0897137875926195, "grad_norm": 1.210734644018147, "learning_rate": 1.96818118013179e-05, "loss": 0.7876, "step": 4940 }, { "epoch": 0.08989539445009444, "grad_norm": 1.2390340899360044, "learning_rate": 1.9680367997372603e-05, "loss": 0.7895, "step": 4950 }, { "epoch": 0.09007700130756938, "grad_norm": 1.236984172699923, "learning_rate": 1.9678920978349895e-05, "loss": 0.7961, "step": 4960 }, { "epoch": 0.09025860816504432, "grad_norm": 1.1999844103844073, "learning_rate": 1.967747074473036e-05, "loss": 0.7976, "step": 4970 }, { "epoch": 0.09044021502251925, "grad_norm": 1.1760208799769698, "learning_rate": 1.967601729699566e-05, "loss": 0.7768, "step": 4980 }, { "epoch": 0.0906218218799942, "grad_norm": 1.2203462714580948, "learning_rate": 1.9674560635628513e-05, "loss": 0.8022, "step": 4990 }, { "epoch": 0.09080342873746913, "grad_norm": 1.2728839539336938, "learning_rate": 1.9673100761112717e-05, "loss": 0.808, "step": 5000 }, { "epoch": 0.09098503559494407, "grad_norm": 1.2236246441112406, "learning_rate": 1.9671637673933122e-05, "loss": 0.7836, "step": 5010 }, { "epoch": 0.091166642452419, "grad_norm": 1.2041625745866211, "learning_rate": 1.9670171374575666e-05, "loss": 0.8017, "step": 5020 }, { "epoch": 0.09134824930989394, "grad_norm": 1.238260971195624, "learning_rate": 1.966870186352733e-05, "loss": 0.7781, "step": 5030 }, { "epoch": 0.09152985616736888, "grad_norm": 1.1488025659335221, "learning_rate": 1.966722914127618e-05, "loss": 0.7924, "step": 5040 }, { "epoch": 0.09171146302484381, "grad_norm": 1.3027427709690755, "learning_rate": 1.966575320831134e-05, "loss": 0.8078, "step": 5050 }, { "epoch": 0.09189306988231875, "grad_norm": 1.1699469776523657, "learning_rate": 1.9664274065123e-05, "loss": 0.788, "step": 5060 }, { "epoch": 0.09207467673979369, "grad_norm": 1.237709335139122, "learning_rate": 1.966279171220242e-05, "loss": 0.7998, "step": 5070 }, { "epoch": 0.09225628359726863, "grad_norm": 1.205897809193581, "learning_rate": 1.966130615004192e-05, "loss": 0.7871, "step": 5080 }, { "epoch": 0.09243789045474357, "grad_norm": 1.2347466034612251, "learning_rate": 1.96598173791349e-05, "loss": 0.7958, "step": 5090 }, { "epoch": 0.09261949731221851, "grad_norm": 1.2408207919792045, "learning_rate": 1.9658325399975805e-05, "loss": 0.801, "step": 5100 }, { "epoch": 0.09280110416969345, "grad_norm": 1.1792431160360068, "learning_rate": 1.965683021306016e-05, "loss": 0.7944, "step": 5110 }, { "epoch": 0.09298271102716839, "grad_norm": 1.218367305326118, "learning_rate": 1.9655331818884554e-05, "loss": 0.7919, "step": 5120 }, { "epoch": 0.09316431788464333, "grad_norm": 1.1811861060680722, "learning_rate": 1.9653830217946636e-05, "loss": 0.7908, "step": 5130 }, { "epoch": 0.09334592474211827, "grad_norm": 1.2353696667384504, "learning_rate": 1.9652325410745124e-05, "loss": 0.798, "step": 5140 }, { "epoch": 0.0935275315995932, "grad_norm": 1.2257347515668096, "learning_rate": 1.9650817397779806e-05, "loss": 0.7935, "step": 5150 }, { "epoch": 0.09370913845706814, "grad_norm": 1.2123015174968175, "learning_rate": 1.9649306179551515e-05, "loss": 0.8023, "step": 5160 }, { "epoch": 0.09389074531454308, "grad_norm": 1.1857605794213641, "learning_rate": 1.9647791756562172e-05, "loss": 0.794, "step": 5170 }, { "epoch": 0.09407235217201801, "grad_norm": 1.23464582191243, "learning_rate": 1.9646274129314752e-05, "loss": 0.7881, "step": 5180 }, { "epoch": 0.09425395902949295, "grad_norm": 1.2160773395590583, "learning_rate": 1.9644753298313288e-05, "loss": 0.7763, "step": 5190 }, { "epoch": 0.09443556588696789, "grad_norm": 1.3008634740197418, "learning_rate": 1.9643229264062892e-05, "loss": 0.782, "step": 5200 }, { "epoch": 0.09461717274444283, "grad_norm": 1.1587449823557359, "learning_rate": 1.9641702027069727e-05, "loss": 0.7802, "step": 5210 }, { "epoch": 0.09479877960191777, "grad_norm": 1.1876680687146797, "learning_rate": 1.9640171587841027e-05, "loss": 0.8026, "step": 5220 }, { "epoch": 0.0949803864593927, "grad_norm": 1.2274835439313954, "learning_rate": 1.963863794688508e-05, "loss": 0.7904, "step": 5230 }, { "epoch": 0.09516199331686764, "grad_norm": 1.221281286553527, "learning_rate": 1.963710110471125e-05, "loss": 0.795, "step": 5240 }, { "epoch": 0.09534360017434258, "grad_norm": 1.3161888399108783, "learning_rate": 1.9635561061829958e-05, "loss": 0.7853, "step": 5250 }, { "epoch": 0.09552520703181752, "grad_norm": 1.177661788114672, "learning_rate": 1.963401781875269e-05, "loss": 0.7862, "step": 5260 }, { "epoch": 0.09570681388929246, "grad_norm": 1.2333850590116988, "learning_rate": 1.9632471375991992e-05, "loss": 0.804, "step": 5270 }, { "epoch": 0.0958884207467674, "grad_norm": 1.1687962830296246, "learning_rate": 1.963092173406147e-05, "loss": 0.8002, "step": 5280 }, { "epoch": 0.09607002760424234, "grad_norm": 1.2044247968452104, "learning_rate": 1.96293688934758e-05, "loss": 0.7909, "step": 5290 }, { "epoch": 0.09625163446171728, "grad_norm": 1.2477547761225918, "learning_rate": 1.9627812854750713e-05, "loss": 0.7898, "step": 5300 }, { "epoch": 0.09643324131919222, "grad_norm": 1.1807462524213226, "learning_rate": 1.9626253618403014e-05, "loss": 0.7898, "step": 5310 }, { "epoch": 0.09661484817666716, "grad_norm": 1.2225694559258558, "learning_rate": 1.9624691184950553e-05, "loss": 0.7779, "step": 5320 }, { "epoch": 0.0967964550341421, "grad_norm": 1.1690150377310322, "learning_rate": 1.962312555491226e-05, "loss": 0.7762, "step": 5330 }, { "epoch": 0.09697806189161702, "grad_norm": 1.1591423585962861, "learning_rate": 1.962155672880811e-05, "loss": 0.7893, "step": 5340 }, { "epoch": 0.09715966874909196, "grad_norm": 1.173932235028733, "learning_rate": 1.961998470715915e-05, "loss": 0.7877, "step": 5350 }, { "epoch": 0.0973412756065669, "grad_norm": 1.2015009203796734, "learning_rate": 1.961840949048748e-05, "loss": 0.7855, "step": 5360 }, { "epoch": 0.09752288246404184, "grad_norm": 1.1906878919410877, "learning_rate": 1.9616831079316273e-05, "loss": 0.7993, "step": 5370 }, { "epoch": 0.09770448932151678, "grad_norm": 1.1677886771837527, "learning_rate": 1.9615249474169758e-05, "loss": 0.7919, "step": 5380 }, { "epoch": 0.09788609617899172, "grad_norm": 1.108616778226608, "learning_rate": 1.9613664675573217e-05, "loss": 0.7903, "step": 5390 }, { "epoch": 0.09806770303646666, "grad_norm": 1.0898344134240148, "learning_rate": 1.9612076684053003e-05, "loss": 0.7845, "step": 5400 }, { "epoch": 0.0982493098939416, "grad_norm": 1.2381006979361022, "learning_rate": 1.9610485500136523e-05, "loss": 0.7945, "step": 5410 }, { "epoch": 0.09843091675141653, "grad_norm": 1.1756659291179445, "learning_rate": 1.9608891124352246e-05, "loss": 0.7934, "step": 5420 }, { "epoch": 0.09861252360889147, "grad_norm": 1.1570713113479374, "learning_rate": 1.9607293557229707e-05, "loss": 0.7923, "step": 5430 }, { "epoch": 0.09879413046636641, "grad_norm": 1.1130481556520972, "learning_rate": 1.960569279929949e-05, "loss": 0.7853, "step": 5440 }, { "epoch": 0.09897573732384135, "grad_norm": 1.2527980297801384, "learning_rate": 1.960408885109324e-05, "loss": 0.7948, "step": 5450 }, { "epoch": 0.09915734418131629, "grad_norm": 1.2207979021761142, "learning_rate": 1.9602481713143678e-05, "loss": 0.7901, "step": 5460 }, { "epoch": 0.09933895103879123, "grad_norm": 1.2844700372400568, "learning_rate": 1.960087138598456e-05, "loss": 0.8012, "step": 5470 }, { "epoch": 0.09952055789626617, "grad_norm": 1.2102385492577046, "learning_rate": 1.9599257870150726e-05, "loss": 0.7879, "step": 5480 }, { "epoch": 0.09970216475374111, "grad_norm": 1.1546733082027911, "learning_rate": 1.959764116617805e-05, "loss": 0.7994, "step": 5490 }, { "epoch": 0.09988377161121605, "grad_norm": 1.2007601926821916, "learning_rate": 1.9596021274603488e-05, "loss": 0.7735, "step": 5500 }, { "epoch": 0.10006537846869097, "grad_norm": 1.217832796585649, "learning_rate": 1.9594398195965038e-05, "loss": 0.7881, "step": 5510 }, { "epoch": 0.10024698532616591, "grad_norm": 1.2049151880771094, "learning_rate": 1.9592771930801762e-05, "loss": 0.7941, "step": 5520 }, { "epoch": 0.10042859218364085, "grad_norm": 1.1588549587099457, "learning_rate": 1.9591142479653783e-05, "loss": 0.7815, "step": 5530 }, { "epoch": 0.10061019904111579, "grad_norm": 1.1799925673458411, "learning_rate": 1.9589509843062274e-05, "loss": 0.7953, "step": 5540 }, { "epoch": 0.10079180589859073, "grad_norm": 1.1332630454771153, "learning_rate": 1.9587874021569485e-05, "loss": 0.7755, "step": 5550 }, { "epoch": 0.10097341275606567, "grad_norm": 1.2011206802587162, "learning_rate": 1.95862350157187e-05, "loss": 0.7888, "step": 5560 }, { "epoch": 0.1011550196135406, "grad_norm": 1.187489026886572, "learning_rate": 1.9584592826054276e-05, "loss": 0.7912, "step": 5570 }, { "epoch": 0.10133662647101555, "grad_norm": 1.1840841535606708, "learning_rate": 1.958294745312162e-05, "loss": 0.7749, "step": 5580 }, { "epoch": 0.10151823332849048, "grad_norm": 1.1821672026162666, "learning_rate": 1.95812988974672e-05, "loss": 0.7928, "step": 5590 }, { "epoch": 0.10169984018596542, "grad_norm": 1.2045924002715442, "learning_rate": 1.9579647159638538e-05, "loss": 0.78, "step": 5600 }, { "epoch": 0.10188144704344036, "grad_norm": 1.168527561085609, "learning_rate": 1.9577992240184218e-05, "loss": 0.7867, "step": 5610 }, { "epoch": 0.1020630539009153, "grad_norm": 1.2295122290627276, "learning_rate": 1.957633413965388e-05, "loss": 0.8011, "step": 5620 }, { "epoch": 0.10224466075839024, "grad_norm": 1.1856539695728805, "learning_rate": 1.957467285859821e-05, "loss": 0.7871, "step": 5630 }, { "epoch": 0.10242626761586518, "grad_norm": 1.1616055905641305, "learning_rate": 1.9573008397568963e-05, "loss": 0.7872, "step": 5640 }, { "epoch": 0.10260787447334012, "grad_norm": 1.180405990461748, "learning_rate": 1.957134075711895e-05, "loss": 0.7941, "step": 5650 }, { "epoch": 0.10278948133081506, "grad_norm": 1.1940359035481387, "learning_rate": 1.9569669937802026e-05, "loss": 0.7936, "step": 5660 }, { "epoch": 0.10297108818828998, "grad_norm": 1.1544851822852815, "learning_rate": 1.9567995940173113e-05, "loss": 0.782, "step": 5670 }, { "epoch": 0.10315269504576492, "grad_norm": 1.1640546779422407, "learning_rate": 1.9566318764788185e-05, "loss": 0.7817, "step": 5680 }, { "epoch": 0.10333430190323986, "grad_norm": 1.1350215498365428, "learning_rate": 1.956463841220427e-05, "loss": 0.7831, "step": 5690 }, { "epoch": 0.1035159087607148, "grad_norm": 1.156272829537814, "learning_rate": 1.9562954882979453e-05, "loss": 0.7966, "step": 5700 }, { "epoch": 0.10369751561818974, "grad_norm": 1.1732661912300806, "learning_rate": 1.9561268177672873e-05, "loss": 0.7802, "step": 5710 }, { "epoch": 0.10387912247566468, "grad_norm": 1.152512105393919, "learning_rate": 1.9559578296844727e-05, "loss": 0.7822, "step": 5720 }, { "epoch": 0.10406072933313962, "grad_norm": 1.1663205494837898, "learning_rate": 1.9557885241056263e-05, "loss": 0.7765, "step": 5730 }, { "epoch": 0.10424233619061456, "grad_norm": 1.1282787673434111, "learning_rate": 1.9556189010869778e-05, "loss": 0.7908, "step": 5740 }, { "epoch": 0.1044239430480895, "grad_norm": 1.1806747193845473, "learning_rate": 1.9554489606848644e-05, "loss": 0.7912, "step": 5750 }, { "epoch": 0.10460554990556443, "grad_norm": 1.193542964354476, "learning_rate": 1.955278702955726e-05, "loss": 0.7914, "step": 5760 }, { "epoch": 0.10478715676303937, "grad_norm": 1.1808379607550932, "learning_rate": 1.9551081279561098e-05, "loss": 0.792, "step": 5770 }, { "epoch": 0.10496876362051431, "grad_norm": 1.1131785609204068, "learning_rate": 1.9549372357426673e-05, "loss": 0.7921, "step": 5780 }, { "epoch": 0.10515037047798925, "grad_norm": 1.246933321594397, "learning_rate": 1.9547660263721567e-05, "loss": 0.7948, "step": 5790 }, { "epoch": 0.10533197733546419, "grad_norm": 1.1815499036569121, "learning_rate": 1.9545944999014398e-05, "loss": 0.7895, "step": 5800 }, { "epoch": 0.10551358419293913, "grad_norm": 1.1605707223907402, "learning_rate": 1.9544226563874847e-05, "loss": 0.7779, "step": 5810 }, { "epoch": 0.10569519105041407, "grad_norm": 1.136179667170127, "learning_rate": 1.954250495887365e-05, "loss": 0.7896, "step": 5820 }, { "epoch": 0.10587679790788901, "grad_norm": 1.1588330478603626, "learning_rate": 1.954078018458259e-05, "loss": 0.7997, "step": 5830 }, { "epoch": 0.10605840476536393, "grad_norm": 1.248534557795413, "learning_rate": 1.9539052241574504e-05, "loss": 0.7904, "step": 5840 }, { "epoch": 0.10624001162283887, "grad_norm": 1.1353262982075303, "learning_rate": 1.9537321130423285e-05, "loss": 0.7736, "step": 5850 }, { "epoch": 0.10642161848031381, "grad_norm": 1.1837902965585942, "learning_rate": 1.9535586851703875e-05, "loss": 0.7732, "step": 5860 }, { "epoch": 0.10660322533778875, "grad_norm": 1.1083305306883866, "learning_rate": 1.953384940599227e-05, "loss": 0.7822, "step": 5870 }, { "epoch": 0.10678483219526369, "grad_norm": 1.175228018375797, "learning_rate": 1.9532108793865513e-05, "loss": 0.7762, "step": 5880 }, { "epoch": 0.10696643905273863, "grad_norm": 1.1933832421639488, "learning_rate": 1.9530365015901705e-05, "loss": 0.7705, "step": 5890 }, { "epoch": 0.10714804591021357, "grad_norm": 1.1901295031753032, "learning_rate": 1.952861807267999e-05, "loss": 0.7774, "step": 5900 }, { "epoch": 0.10732965276768851, "grad_norm": 1.1533299586900172, "learning_rate": 1.952686796478058e-05, "loss": 0.7947, "step": 5910 }, { "epoch": 0.10751125962516345, "grad_norm": 1.1608314712046366, "learning_rate": 1.9525114692784716e-05, "loss": 0.7728, "step": 5920 }, { "epoch": 0.10769286648263839, "grad_norm": 1.1871824520368848, "learning_rate": 1.9523358257274706e-05, "loss": 0.7939, "step": 5930 }, { "epoch": 0.10787447334011332, "grad_norm": 1.15677888890327, "learning_rate": 1.9521598658833906e-05, "loss": 0.8024, "step": 5940 }, { "epoch": 0.10805608019758826, "grad_norm": 1.1719980688249387, "learning_rate": 1.9519835898046714e-05, "loss": 0.786, "step": 5950 }, { "epoch": 0.1082376870550632, "grad_norm": 1.1257567561971555, "learning_rate": 1.9518069975498583e-05, "loss": 0.788, "step": 5960 }, { "epoch": 0.10841929391253814, "grad_norm": 1.1938218139117167, "learning_rate": 1.9516300891776024e-05, "loss": 0.7868, "step": 5970 }, { "epoch": 0.10860090077001308, "grad_norm": 1.1635204979511586, "learning_rate": 1.9514528647466592e-05, "loss": 0.774, "step": 5980 }, { "epoch": 0.10878250762748802, "grad_norm": 1.1984074699950162, "learning_rate": 1.9512753243158882e-05, "loss": 0.7861, "step": 5990 }, { "epoch": 0.10896411448496295, "grad_norm": 1.18640717839212, "learning_rate": 1.9510974679442555e-05, "loss": 0.7971, "step": 6000 }, { "epoch": 0.10914572134243788, "grad_norm": 1.1791518675119734, "learning_rate": 1.950919295690831e-05, "loss": 0.7904, "step": 6010 }, { "epoch": 0.10932732819991282, "grad_norm": 1.1629594416920894, "learning_rate": 1.95074080761479e-05, "loss": 0.7811, "step": 6020 }, { "epoch": 0.10950893505738776, "grad_norm": 1.1433189678835902, "learning_rate": 1.950562003775413e-05, "loss": 0.7924, "step": 6030 }, { "epoch": 0.1096905419148627, "grad_norm": 1.1582749674085224, "learning_rate": 1.950382884232084e-05, "loss": 0.7884, "step": 6040 }, { "epoch": 0.10987214877233764, "grad_norm": 1.2447802741013578, "learning_rate": 1.9502034490442936e-05, "loss": 0.7947, "step": 6050 }, { "epoch": 0.11005375562981258, "grad_norm": 1.1694147968069004, "learning_rate": 1.9500236982716362e-05, "loss": 0.7695, "step": 6060 }, { "epoch": 0.11023536248728752, "grad_norm": 1.1727958270329795, "learning_rate": 1.9498436319738113e-05, "loss": 0.7813, "step": 6070 }, { "epoch": 0.11041696934476246, "grad_norm": 1.1424740113211533, "learning_rate": 1.949663250210623e-05, "loss": 0.7877, "step": 6080 }, { "epoch": 0.1105985762022374, "grad_norm": 1.213202715213241, "learning_rate": 1.9494825530419804e-05, "loss": 0.7858, "step": 6090 }, { "epoch": 0.11078018305971234, "grad_norm": 1.1289408685152686, "learning_rate": 1.9493015405278976e-05, "loss": 0.7692, "step": 6100 }, { "epoch": 0.11096178991718728, "grad_norm": 1.1829224893074166, "learning_rate": 1.949120212728492e-05, "loss": 0.777, "step": 6110 }, { "epoch": 0.11114339677466221, "grad_norm": 1.1631357910673807, "learning_rate": 1.9489385697039884e-05, "loss": 0.7908, "step": 6120 }, { "epoch": 0.11132500363213715, "grad_norm": 1.125836692738868, "learning_rate": 1.9487566115147136e-05, "loss": 0.7769, "step": 6130 }, { "epoch": 0.11150661048961209, "grad_norm": 1.1619052845816906, "learning_rate": 1.9485743382211003e-05, "loss": 0.773, "step": 6140 }, { "epoch": 0.11168821734708703, "grad_norm": 1.0992807694479771, "learning_rate": 1.9483917498836864e-05, "loss": 0.7713, "step": 6150 }, { "epoch": 0.11186982420456197, "grad_norm": 1.2023173257149438, "learning_rate": 1.9482088465631133e-05, "loss": 0.7799, "step": 6160 }, { "epoch": 0.1120514310620369, "grad_norm": 1.1799719697967908, "learning_rate": 1.948025628320127e-05, "loss": 0.7945, "step": 6170 }, { "epoch": 0.11223303791951184, "grad_norm": 1.1637175401214817, "learning_rate": 1.9478420952155795e-05, "loss": 0.7672, "step": 6180 }, { "epoch": 0.11241464477698677, "grad_norm": 1.304224477496466, "learning_rate": 1.947658247310426e-05, "loss": 0.7787, "step": 6190 }, { "epoch": 0.11259625163446171, "grad_norm": 1.1852873083495656, "learning_rate": 1.9474740846657262e-05, "loss": 0.784, "step": 6200 }, { "epoch": 0.11277785849193665, "grad_norm": 1.1253014272285444, "learning_rate": 1.947289607342646e-05, "loss": 0.7869, "step": 6210 }, { "epoch": 0.11295946534941159, "grad_norm": 1.1789670106852865, "learning_rate": 1.9471048154024537e-05, "loss": 0.7832, "step": 6220 }, { "epoch": 0.11314107220688653, "grad_norm": 1.1418885848174551, "learning_rate": 1.946919708906523e-05, "loss": 0.8065, "step": 6230 }, { "epoch": 0.11332267906436147, "grad_norm": 1.1417004798485515, "learning_rate": 1.9467342879163334e-05, "loss": 0.7834, "step": 6240 }, { "epoch": 0.11350428592183641, "grad_norm": 1.11303880789898, "learning_rate": 1.9465485524934663e-05, "loss": 0.769, "step": 6250 }, { "epoch": 0.11368589277931135, "grad_norm": 1.1176369315447623, "learning_rate": 1.9463625026996088e-05, "loss": 0.7787, "step": 6260 }, { "epoch": 0.11386749963678629, "grad_norm": 1.174072447802223, "learning_rate": 1.946176138596553e-05, "loss": 0.7737, "step": 6270 }, { "epoch": 0.11404910649426123, "grad_norm": 1.1488873152219157, "learning_rate": 1.945989460246194e-05, "loss": 0.7685, "step": 6280 }, { "epoch": 0.11423071335173617, "grad_norm": 1.195827792134475, "learning_rate": 1.9458024677105332e-05, "loss": 0.7704, "step": 6290 }, { "epoch": 0.1144123202092111, "grad_norm": 1.1574846290211032, "learning_rate": 1.9456151610516745e-05, "loss": 0.7746, "step": 6300 }, { "epoch": 0.11459392706668604, "grad_norm": 1.1341448614982004, "learning_rate": 1.9454275403318264e-05, "loss": 0.7826, "step": 6310 }, { "epoch": 0.11477553392416098, "grad_norm": 1.1550959841284827, "learning_rate": 1.9452396056133032e-05, "loss": 0.7741, "step": 6320 }, { "epoch": 0.11495714078163591, "grad_norm": 1.1719423452702122, "learning_rate": 1.945051356958522e-05, "loss": 0.772, "step": 6330 }, { "epoch": 0.11513874763911085, "grad_norm": 1.2371236892206205, "learning_rate": 1.9448627944300044e-05, "loss": 0.7843, "step": 6340 }, { "epoch": 0.11532035449658579, "grad_norm": 1.1940446728904321, "learning_rate": 1.9446739180903762e-05, "loss": 0.7726, "step": 6350 }, { "epoch": 0.11550196135406073, "grad_norm": 1.1165639460323096, "learning_rate": 1.944484728002368e-05, "loss": 0.7923, "step": 6360 }, { "epoch": 0.11568356821153566, "grad_norm": 1.1316237628068744, "learning_rate": 1.9442952242288146e-05, "loss": 0.7658, "step": 6370 }, { "epoch": 0.1158651750690106, "grad_norm": 1.1311675049640963, "learning_rate": 1.9441054068326543e-05, "loss": 0.7707, "step": 6380 }, { "epoch": 0.11604678192648554, "grad_norm": 1.1065753327554873, "learning_rate": 1.94391527587693e-05, "loss": 0.766, "step": 6390 }, { "epoch": 0.11622838878396048, "grad_norm": 1.1777584867486506, "learning_rate": 1.9437248314247884e-05, "loss": 0.7741, "step": 6400 }, { "epoch": 0.11640999564143542, "grad_norm": 1.1350734930847692, "learning_rate": 1.9435340735394807e-05, "loss": 0.763, "step": 6410 }, { "epoch": 0.11659160249891036, "grad_norm": 1.1786667349761775, "learning_rate": 1.9433430022843623e-05, "loss": 0.7827, "step": 6420 }, { "epoch": 0.1167732093563853, "grad_norm": 1.1364381409816142, "learning_rate": 1.943151617722892e-05, "loss": 0.7913, "step": 6430 }, { "epoch": 0.11695481621386024, "grad_norm": 1.1089952624853534, "learning_rate": 1.942959919918634e-05, "loss": 0.7676, "step": 6440 }, { "epoch": 0.11713642307133518, "grad_norm": 1.1121544414503628, "learning_rate": 1.9427679089352546e-05, "loss": 0.7706, "step": 6450 }, { "epoch": 0.11731802992881012, "grad_norm": 1.1444120993803504, "learning_rate": 1.9425755848365255e-05, "loss": 0.7852, "step": 6460 }, { "epoch": 0.11749963678628506, "grad_norm": 1.1525130842760993, "learning_rate": 1.9423829476863223e-05, "loss": 0.7723, "step": 6470 }, { "epoch": 0.11768124364376, "grad_norm": 1.1055467384163749, "learning_rate": 1.942189997548624e-05, "loss": 0.7792, "step": 6480 }, { "epoch": 0.11786285050123492, "grad_norm": 1.1345359734154588, "learning_rate": 1.941996734487514e-05, "loss": 0.779, "step": 6490 }, { "epoch": 0.11804445735870986, "grad_norm": 1.1969308685791364, "learning_rate": 1.9418031585671798e-05, "loss": 0.779, "step": 6500 }, { "epoch": 0.1182260642161848, "grad_norm": 1.1996556726815624, "learning_rate": 1.9416092698519124e-05, "loss": 0.7704, "step": 6510 }, { "epoch": 0.11840767107365974, "grad_norm": 1.1945131268213316, "learning_rate": 1.9414150684061058e-05, "loss": 0.7746, "step": 6520 }, { "epoch": 0.11858927793113468, "grad_norm": 1.1515272658912958, "learning_rate": 1.9412205542942604e-05, "loss": 0.7833, "step": 6530 }, { "epoch": 0.11877088478860962, "grad_norm": 1.11417782385927, "learning_rate": 1.941025727580978e-05, "loss": 0.7886, "step": 6540 }, { "epoch": 0.11895249164608455, "grad_norm": 1.0975260553275255, "learning_rate": 1.940830588330965e-05, "loss": 0.7819, "step": 6550 }, { "epoch": 0.1191340985035595, "grad_norm": 1.109213478095343, "learning_rate": 1.9406351366090324e-05, "loss": 0.7763, "step": 6560 }, { "epoch": 0.11931570536103443, "grad_norm": 1.1664959060250464, "learning_rate": 1.9404393724800937e-05, "loss": 0.7661, "step": 6570 }, { "epoch": 0.11949731221850937, "grad_norm": 1.1518331326822435, "learning_rate": 1.940243296009167e-05, "loss": 0.7768, "step": 6580 }, { "epoch": 0.11967891907598431, "grad_norm": 1.1441904333811954, "learning_rate": 1.940046907261374e-05, "loss": 0.7717, "step": 6590 }, { "epoch": 0.11986052593345925, "grad_norm": 1.09473248765146, "learning_rate": 1.93985020630194e-05, "loss": 0.7592, "step": 6600 }, { "epoch": 0.12004213279093419, "grad_norm": 1.1606607138526785, "learning_rate": 1.9396531931961937e-05, "loss": 0.7763, "step": 6610 }, { "epoch": 0.12022373964840913, "grad_norm": 1.1473475632663044, "learning_rate": 1.939455868009568e-05, "loss": 0.7911, "step": 6620 }, { "epoch": 0.12040534650588407, "grad_norm": 1.1410854601287033, "learning_rate": 1.9392582308075992e-05, "loss": 0.784, "step": 6630 }, { "epoch": 0.120586953363359, "grad_norm": 1.1314645268287378, "learning_rate": 1.9390602816559273e-05, "loss": 0.7825, "step": 6640 }, { "epoch": 0.12076856022083395, "grad_norm": 1.1785931677270873, "learning_rate": 1.938862020620296e-05, "loss": 0.7867, "step": 6650 }, { "epoch": 0.12095016707830887, "grad_norm": 1.1975152203812647, "learning_rate": 1.938663447766552e-05, "loss": 0.7639, "step": 6660 }, { "epoch": 0.12113177393578381, "grad_norm": 1.1539478602923814, "learning_rate": 1.9384645631606462e-05, "loss": 0.7748, "step": 6670 }, { "epoch": 0.12131338079325875, "grad_norm": 1.1484508213437452, "learning_rate": 1.9382653668686333e-05, "loss": 0.7755, "step": 6680 }, { "epoch": 0.12149498765073369, "grad_norm": 1.1786624803670573, "learning_rate": 1.9380658589566703e-05, "loss": 0.788, "step": 6690 }, { "epoch": 0.12167659450820863, "grad_norm": 1.1846808565419782, "learning_rate": 1.937866039491019e-05, "loss": 0.782, "step": 6700 }, { "epoch": 0.12185820136568357, "grad_norm": 1.175278684389065, "learning_rate": 1.937665908538044e-05, "loss": 0.7894, "step": 6710 }, { "epoch": 0.1220398082231585, "grad_norm": 1.056856298963324, "learning_rate": 1.9374654661642135e-05, "loss": 0.7692, "step": 6720 }, { "epoch": 0.12222141508063344, "grad_norm": 1.159618451441827, "learning_rate": 1.937264712436099e-05, "loss": 0.7822, "step": 6730 }, { "epoch": 0.12240302193810838, "grad_norm": 1.1451069738686752, "learning_rate": 1.9370636474203754e-05, "loss": 0.7819, "step": 6740 }, { "epoch": 0.12258462879558332, "grad_norm": 1.1027009426390735, "learning_rate": 1.9368622711838215e-05, "loss": 0.7715, "step": 6750 }, { "epoch": 0.12276623565305826, "grad_norm": 1.1220032674426106, "learning_rate": 1.936660583793319e-05, "loss": 0.7689, "step": 6760 }, { "epoch": 0.1229478425105332, "grad_norm": 1.0840046064583495, "learning_rate": 1.936458585315853e-05, "loss": 0.7655, "step": 6770 }, { "epoch": 0.12312944936800814, "grad_norm": 1.229166537847661, "learning_rate": 1.9362562758185116e-05, "loss": 0.8011, "step": 6780 }, { "epoch": 0.12331105622548308, "grad_norm": 1.0693504750540974, "learning_rate": 1.9360536553684865e-05, "loss": 0.7699, "step": 6790 }, { "epoch": 0.12349266308295802, "grad_norm": 1.0914328147856824, "learning_rate": 1.9358507240330735e-05, "loss": 0.7649, "step": 6800 }, { "epoch": 0.12367426994043296, "grad_norm": 1.1588921497270077, "learning_rate": 1.93564748187967e-05, "loss": 0.7693, "step": 6810 }, { "epoch": 0.12385587679790788, "grad_norm": 1.2167775156559988, "learning_rate": 1.935443928975778e-05, "loss": 0.7962, "step": 6820 }, { "epoch": 0.12403748365538282, "grad_norm": 1.1301760882823582, "learning_rate": 1.935240065389002e-05, "loss": 0.775, "step": 6830 }, { "epoch": 0.12421909051285776, "grad_norm": 7.316255380699001, "learning_rate": 1.9350358911870496e-05, "loss": 0.7806, "step": 6840 }, { "epoch": 0.1244006973703327, "grad_norm": 1.2084779130544845, "learning_rate": 1.9348314064377327e-05, "loss": 0.7847, "step": 6850 }, { "epoch": 0.12458230422780764, "grad_norm": 1.145590184136509, "learning_rate": 1.9346266112089647e-05, "loss": 0.7694, "step": 6860 }, { "epoch": 0.12476391108528258, "grad_norm": 1.1106032923957547, "learning_rate": 1.934421505568763e-05, "loss": 0.7859, "step": 6870 }, { "epoch": 0.12494551794275752, "grad_norm": 1.162498550183311, "learning_rate": 1.934216089585248e-05, "loss": 0.775, "step": 6880 }, { "epoch": 0.12512712480023247, "grad_norm": 1.1982406643098802, "learning_rate": 1.934010363326644e-05, "loss": 0.7721, "step": 6890 }, { "epoch": 0.1253087316577074, "grad_norm": 1.200648867684232, "learning_rate": 1.9338043268612763e-05, "loss": 0.7586, "step": 6900 }, { "epoch": 0.12549033851518232, "grad_norm": 1.1149905227271866, "learning_rate": 1.9335979802575747e-05, "loss": 0.7713, "step": 6910 }, { "epoch": 0.12567194537265727, "grad_norm": 1.105215772468994, "learning_rate": 1.9333913235840725e-05, "loss": 0.7725, "step": 6920 }, { "epoch": 0.1258535522301322, "grad_norm": 1.1263189151076125, "learning_rate": 1.9331843569094045e-05, "loss": 0.7774, "step": 6930 }, { "epoch": 0.12603515908760715, "grad_norm": 1.1903311676107995, "learning_rate": 1.9329770803023094e-05, "loss": 0.7649, "step": 6940 }, { "epoch": 0.12621676594508208, "grad_norm": 1.2027541674822553, "learning_rate": 1.932769493831629e-05, "loss": 0.7839, "step": 6950 }, { "epoch": 0.12639837280255703, "grad_norm": 1.1646977496863924, "learning_rate": 1.932561597566307e-05, "loss": 0.7981, "step": 6960 }, { "epoch": 0.12657997966003195, "grad_norm": 1.1158072979392144, "learning_rate": 1.932353391575391e-05, "loss": 0.7698, "step": 6970 }, { "epoch": 0.1267615865175069, "grad_norm": 1.2373095002499546, "learning_rate": 1.9321448759280313e-05, "loss": 0.7703, "step": 6980 }, { "epoch": 0.12694319337498183, "grad_norm": 1.131176685230506, "learning_rate": 1.93193605069348e-05, "loss": 0.7635, "step": 6990 }, { "epoch": 0.12712480023245679, "grad_norm": 1.0990051445959168, "learning_rate": 1.9317269159410937e-05, "loss": 0.7722, "step": 7000 }, { "epoch": 0.1273064070899317, "grad_norm": 1.0790951438579952, "learning_rate": 1.931517471740331e-05, "loss": 0.7567, "step": 7010 }, { "epoch": 0.12748801394740666, "grad_norm": 1.1517556926899628, "learning_rate": 1.9313077181607527e-05, "loss": 0.7797, "step": 7020 }, { "epoch": 0.1276696208048816, "grad_norm": 1.1134223269215098, "learning_rate": 1.931097655272023e-05, "loss": 0.7823, "step": 7030 }, { "epoch": 0.12785122766235654, "grad_norm": 1.1023694301149018, "learning_rate": 1.930887283143909e-05, "loss": 0.7823, "step": 7040 }, { "epoch": 0.12803283451983147, "grad_norm": 1.1060714770625573, "learning_rate": 1.9306766018462797e-05, "loss": 0.7801, "step": 7050 }, { "epoch": 0.12821444137730642, "grad_norm": 1.0732996685398648, "learning_rate": 1.930465611449108e-05, "loss": 0.7628, "step": 7060 }, { "epoch": 0.12839604823478135, "grad_norm": 1.160284815281924, "learning_rate": 1.9302543120224683e-05, "loss": 0.7816, "step": 7070 }, { "epoch": 0.12857765509225627, "grad_norm": 1.1177152770243526, "learning_rate": 1.9300427036365384e-05, "loss": 0.7632, "step": 7080 }, { "epoch": 0.12875926194973122, "grad_norm": 1.0977308667262744, "learning_rate": 1.929830786361598e-05, "loss": 0.7701, "step": 7090 }, { "epoch": 0.12894086880720615, "grad_norm": 1.0840919017538646, "learning_rate": 1.92961856026803e-05, "loss": 0.7799, "step": 7100 }, { "epoch": 0.1291224756646811, "grad_norm": 1.1756271299220096, "learning_rate": 1.92940602542632e-05, "loss": 0.7861, "step": 7110 }, { "epoch": 0.12930408252215603, "grad_norm": 1.05232854798997, "learning_rate": 1.9291931819070552e-05, "loss": 0.7786, "step": 7120 }, { "epoch": 0.12948568937963098, "grad_norm": 1.0961995977384624, "learning_rate": 1.928980029780926e-05, "loss": 0.76, "step": 7130 }, { "epoch": 0.1296672962371059, "grad_norm": 1.0854509854267715, "learning_rate": 1.928766569118726e-05, "loss": 0.7651, "step": 7140 }, { "epoch": 0.12984890309458086, "grad_norm": 1.1163452420687137, "learning_rate": 1.9285527999913494e-05, "loss": 0.7658, "step": 7150 }, { "epoch": 0.13003050995205578, "grad_norm": 2.5620701784185624, "learning_rate": 1.928338722469795e-05, "loss": 0.7607, "step": 7160 }, { "epoch": 0.13021211680953074, "grad_norm": 1.0715162703514867, "learning_rate": 1.9281243366251624e-05, "loss": 0.7769, "step": 7170 }, { "epoch": 0.13039372366700566, "grad_norm": 1.1592681787612882, "learning_rate": 1.927909642528654e-05, "loss": 0.7758, "step": 7180 }, { "epoch": 0.13057533052448062, "grad_norm": 1.1279542756552583, "learning_rate": 1.9276946402515753e-05, "loss": 0.7764, "step": 7190 }, { "epoch": 0.13075693738195554, "grad_norm": 1.0902465414651723, "learning_rate": 1.927479329865333e-05, "loss": 0.7616, "step": 7200 }, { "epoch": 0.1309385442394305, "grad_norm": 1.1146463313675832, "learning_rate": 1.9272637114414368e-05, "loss": 0.7724, "step": 7210 }, { "epoch": 0.13112015109690542, "grad_norm": 1.0680336851404437, "learning_rate": 1.9270477850514996e-05, "loss": 0.7784, "step": 7220 }, { "epoch": 0.13130175795438034, "grad_norm": 1.1419288682830508, "learning_rate": 1.9268315507672342e-05, "loss": 0.7821, "step": 7230 }, { "epoch": 0.1314833648118553, "grad_norm": 1.1068497780395312, "learning_rate": 1.926615008660458e-05, "loss": 0.7687, "step": 7240 }, { "epoch": 0.13166497166933022, "grad_norm": 1.089922424739312, "learning_rate": 1.9263981588030894e-05, "loss": 0.7672, "step": 7250 }, { "epoch": 0.13184657852680517, "grad_norm": 1.1292245492892787, "learning_rate": 1.9261810012671494e-05, "loss": 0.778, "step": 7260 }, { "epoch": 0.1320281853842801, "grad_norm": 1.1738538521415152, "learning_rate": 1.9259635361247616e-05, "loss": 0.7717, "step": 7270 }, { "epoch": 0.13220979224175505, "grad_norm": 1.087754135852555, "learning_rate": 1.9257457634481504e-05, "loss": 0.7802, "step": 7280 }, { "epoch": 0.13239139909922998, "grad_norm": 1.0804507505885668, "learning_rate": 1.9255276833096436e-05, "loss": 0.769, "step": 7290 }, { "epoch": 0.13257300595670493, "grad_norm": 1.0636807205721863, "learning_rate": 1.9253092957816707e-05, "loss": 0.7681, "step": 7300 }, { "epoch": 0.13275461281417986, "grad_norm": 1.0736582571044604, "learning_rate": 1.925090600936763e-05, "loss": 0.7619, "step": 7310 }, { "epoch": 0.1329362196716548, "grad_norm": 1.101912580639657, "learning_rate": 1.9248715988475552e-05, "loss": 0.7625, "step": 7320 }, { "epoch": 0.13311782652912973, "grad_norm": 1.128071406053464, "learning_rate": 1.9246522895867822e-05, "loss": 0.7783, "step": 7330 }, { "epoch": 0.1332994333866047, "grad_norm": 1.1399578874574712, "learning_rate": 1.924432673227282e-05, "loss": 0.786, "step": 7340 }, { "epoch": 0.1334810402440796, "grad_norm": 1.1360411740990286, "learning_rate": 1.9242127498419943e-05, "loss": 0.7638, "step": 7350 }, { "epoch": 0.13366264710155457, "grad_norm": 1.0524443458001478, "learning_rate": 1.9239925195039606e-05, "loss": 0.7713, "step": 7360 }, { "epoch": 0.1338442539590295, "grad_norm": 1.1309157695104473, "learning_rate": 1.9237719822863247e-05, "loss": 0.7914, "step": 7370 }, { "epoch": 0.13402586081650444, "grad_norm": 1.1302272253302623, "learning_rate": 1.9235511382623328e-05, "loss": 0.7712, "step": 7380 }, { "epoch": 0.13420746767397937, "grad_norm": 1.0586584737595535, "learning_rate": 1.9233299875053314e-05, "loss": 0.7699, "step": 7390 }, { "epoch": 0.1343890745314543, "grad_norm": 1.0818850790401968, "learning_rate": 1.9231085300887704e-05, "loss": 0.7758, "step": 7400 }, { "epoch": 0.13457068138892925, "grad_norm": 1.0778685026809307, "learning_rate": 1.922886766086201e-05, "loss": 0.7589, "step": 7410 }, { "epoch": 0.13475228824640417, "grad_norm": 1.0782888397888415, "learning_rate": 1.922664695571276e-05, "loss": 0.7639, "step": 7420 }, { "epoch": 0.13493389510387913, "grad_norm": 1.0977776093545608, "learning_rate": 1.9224423186177504e-05, "loss": 0.7612, "step": 7430 }, { "epoch": 0.13511550196135405, "grad_norm": 1.0731000279111742, "learning_rate": 1.9222196352994807e-05, "loss": 0.7834, "step": 7440 }, { "epoch": 0.135297108818829, "grad_norm": 1.1339066290956366, "learning_rate": 1.921996645690426e-05, "loss": 0.7844, "step": 7450 }, { "epoch": 0.13547871567630393, "grad_norm": 1.1217371114165546, "learning_rate": 1.921773349864645e-05, "loss": 0.767, "step": 7460 }, { "epoch": 0.13566032253377888, "grad_norm": 1.1261912903209181, "learning_rate": 1.9215497478963e-05, "loss": 0.7739, "step": 7470 }, { "epoch": 0.1358419293912538, "grad_norm": 1.1212949475168172, "learning_rate": 1.9213258398596554e-05, "loss": 0.7848, "step": 7480 }, { "epoch": 0.13602353624872876, "grad_norm": 1.044832171092984, "learning_rate": 1.9211016258290747e-05, "loss": 0.7736, "step": 7490 }, { "epoch": 0.13620514310620369, "grad_norm": 1.0661073818232412, "learning_rate": 1.9208771058790262e-05, "loss": 0.7712, "step": 7500 }, { "epoch": 0.13638674996367864, "grad_norm": 1.1733449818376025, "learning_rate": 1.9206522800840775e-05, "loss": 0.7694, "step": 7510 }, { "epoch": 0.13656835682115356, "grad_norm": 1.146724319197424, "learning_rate": 1.9204271485188982e-05, "loss": 0.7821, "step": 7520 }, { "epoch": 0.13674996367862852, "grad_norm": 1.1206709786158942, "learning_rate": 1.9202017112582605e-05, "loss": 0.7772, "step": 7530 }, { "epoch": 0.13693157053610344, "grad_norm": 1.0819348414872785, "learning_rate": 1.919975968377037e-05, "loss": 0.7872, "step": 7540 }, { "epoch": 0.1371131773935784, "grad_norm": 1.1334190062731997, "learning_rate": 1.9197499199502023e-05, "loss": 0.7585, "step": 7550 }, { "epoch": 0.13729478425105332, "grad_norm": 1.120147438336049, "learning_rate": 1.919523566052832e-05, "loss": 0.7715, "step": 7560 }, { "epoch": 0.13747639110852825, "grad_norm": 1.1217549030653884, "learning_rate": 1.9192969067601045e-05, "loss": 0.7901, "step": 7570 }, { "epoch": 0.1376579979660032, "grad_norm": 1.0943498245444616, "learning_rate": 1.9190699421472976e-05, "loss": 0.7626, "step": 7580 }, { "epoch": 0.13783960482347812, "grad_norm": 1.1561855062051134, "learning_rate": 1.9188426722897923e-05, "loss": 0.7805, "step": 7590 }, { "epoch": 0.13802121168095308, "grad_norm": 1.0828939638072776, "learning_rate": 1.91861509726307e-05, "loss": 0.765, "step": 7600 }, { "epoch": 0.138202818538428, "grad_norm": 1.111750520437127, "learning_rate": 1.9183872171427137e-05, "loss": 0.7598, "step": 7610 }, { "epoch": 0.13838442539590295, "grad_norm": 1.113155872612474, "learning_rate": 1.9181590320044076e-05, "loss": 0.7737, "step": 7620 }, { "epoch": 0.13856603225337788, "grad_norm": 1.1416553036969606, "learning_rate": 1.9179305419239376e-05, "loss": 0.7883, "step": 7630 }, { "epoch": 0.13874763911085283, "grad_norm": 1.07264785130457, "learning_rate": 1.9177017469771905e-05, "loss": 0.7645, "step": 7640 }, { "epoch": 0.13892924596832776, "grad_norm": 1.0875652287987525, "learning_rate": 1.9174726472401546e-05, "loss": 0.7783, "step": 7650 }, { "epoch": 0.1391108528258027, "grad_norm": 1.1000988123455302, "learning_rate": 1.917243242788919e-05, "loss": 0.7647, "step": 7660 }, { "epoch": 0.13929245968327764, "grad_norm": 1.0869176636014681, "learning_rate": 1.9170135336996748e-05, "loss": 0.7812, "step": 7670 }, { "epoch": 0.1394740665407526, "grad_norm": 1.1246253968944484, "learning_rate": 1.9167835200487127e-05, "loss": 0.7706, "step": 7680 }, { "epoch": 0.13965567339822751, "grad_norm": 1.0828905997180298, "learning_rate": 1.9165532019124275e-05, "loss": 0.7713, "step": 7690 }, { "epoch": 0.13983728025570247, "grad_norm": 1.1177079828706062, "learning_rate": 1.9163225793673112e-05, "loss": 0.7732, "step": 7700 }, { "epoch": 0.1400188871131774, "grad_norm": 1.092042919734347, "learning_rate": 1.91609165248996e-05, "loss": 0.7833, "step": 7710 }, { "epoch": 0.14020049397065235, "grad_norm": 1.130257268685211, "learning_rate": 1.9158604213570705e-05, "loss": 0.7669, "step": 7720 }, { "epoch": 0.14038210082812727, "grad_norm": 1.138519275206929, "learning_rate": 1.9156288860454393e-05, "loss": 0.7764, "step": 7730 }, { "epoch": 0.1405637076856022, "grad_norm": 1.105939953270807, "learning_rate": 1.9153970466319652e-05, "loss": 0.7635, "step": 7740 }, { "epoch": 0.14074531454307715, "grad_norm": 1.1667571927723586, "learning_rate": 1.915164903193647e-05, "loss": 0.7694, "step": 7750 }, { "epoch": 0.14092692140055207, "grad_norm": 1.1076722657344091, "learning_rate": 1.9149324558075852e-05, "loss": 0.7664, "step": 7760 }, { "epoch": 0.14110852825802703, "grad_norm": 1.0812361288651908, "learning_rate": 1.9146997045509813e-05, "loss": 0.7716, "step": 7770 }, { "epoch": 0.14129013511550195, "grad_norm": 1.1365033456066527, "learning_rate": 1.914466649501137e-05, "loss": 0.7683, "step": 7780 }, { "epoch": 0.1414717419729769, "grad_norm": 1.06360604654795, "learning_rate": 1.9142332907354557e-05, "loss": 0.7708, "step": 7790 }, { "epoch": 0.14165334883045183, "grad_norm": 1.1371346110744507, "learning_rate": 1.913999628331441e-05, "loss": 0.773, "step": 7800 }, { "epoch": 0.14183495568792678, "grad_norm": 1.125548541369095, "learning_rate": 1.913765662366698e-05, "loss": 0.77, "step": 7810 }, { "epoch": 0.1420165625454017, "grad_norm": 1.0711937405023377, "learning_rate": 1.9135313929189324e-05, "loss": 0.7818, "step": 7820 }, { "epoch": 0.14219816940287666, "grad_norm": 1.095391750343934, "learning_rate": 1.91329682006595e-05, "loss": 0.7739, "step": 7830 }, { "epoch": 0.1423797762603516, "grad_norm": 1.1190510057063643, "learning_rate": 1.9130619438856587e-05, "loss": 0.7799, "step": 7840 }, { "epoch": 0.14256138311782654, "grad_norm": 1.0560421740740067, "learning_rate": 1.9128267644560656e-05, "loss": 0.7779, "step": 7850 }, { "epoch": 0.14274298997530147, "grad_norm": 1.1151401999322688, "learning_rate": 1.9125912818552798e-05, "loss": 0.7656, "step": 7860 }, { "epoch": 0.14292459683277642, "grad_norm": 1.1291230892603201, "learning_rate": 1.9123554961615104e-05, "loss": 0.7582, "step": 7870 }, { "epoch": 0.14310620369025134, "grad_norm": 1.0572319340169785, "learning_rate": 1.9121194074530676e-05, "loss": 0.7723, "step": 7880 }, { "epoch": 0.14328781054772627, "grad_norm": 1.1153618609460345, "learning_rate": 1.9118830158083615e-05, "loss": 0.7532, "step": 7890 }, { "epoch": 0.14346941740520122, "grad_norm": 1.0418927591822897, "learning_rate": 1.911646321305904e-05, "loss": 0.742, "step": 7900 }, { "epoch": 0.14365102426267615, "grad_norm": 1.0973500301281407, "learning_rate": 1.9114093240243065e-05, "loss": 0.7684, "step": 7910 }, { "epoch": 0.1438326311201511, "grad_norm": 1.1435132999831643, "learning_rate": 1.9111720240422814e-05, "loss": 0.7702, "step": 7920 }, { "epoch": 0.14401423797762603, "grad_norm": 1.0580741561915372, "learning_rate": 1.9109344214386418e-05, "loss": 0.7626, "step": 7930 }, { "epoch": 0.14419584483510098, "grad_norm": 1.0667731399265736, "learning_rate": 1.910696516292301e-05, "loss": 0.7701, "step": 7940 }, { "epoch": 0.1443774516925759, "grad_norm": 1.201613097722299, "learning_rate": 1.9104583086822727e-05, "loss": 0.7545, "step": 7950 }, { "epoch": 0.14455905855005086, "grad_norm": 1.0926602557020835, "learning_rate": 1.9102197986876708e-05, "loss": 0.7705, "step": 7960 }, { "epoch": 0.14474066540752578, "grad_norm": 1.0949980726321153, "learning_rate": 1.9099809863877113e-05, "loss": 0.7705, "step": 7970 }, { "epoch": 0.14492227226500073, "grad_norm": 1.0789955168138237, "learning_rate": 1.909741871861708e-05, "loss": 0.7651, "step": 7980 }, { "epoch": 0.14510387912247566, "grad_norm": 1.095630269399159, "learning_rate": 1.909502455189078e-05, "loss": 0.7678, "step": 7990 }, { "epoch": 0.1452854859799506, "grad_norm": 1.1626981194933366, "learning_rate": 1.909262736449336e-05, "loss": 0.7775, "step": 8000 }, { "epoch": 0.14546709283742554, "grad_norm": 1.115357055938732, "learning_rate": 1.9090227157220983e-05, "loss": 0.7705, "step": 8010 }, { "epoch": 0.1456486996949005, "grad_norm": 1.0931700713000312, "learning_rate": 1.9087823930870818e-05, "loss": 0.7465, "step": 8020 }, { "epoch": 0.14583030655237542, "grad_norm": 1.209459871436711, "learning_rate": 1.9085417686241027e-05, "loss": 0.7864, "step": 8030 }, { "epoch": 0.14601191340985037, "grad_norm": 1.0542133191520793, "learning_rate": 1.908300842413079e-05, "loss": 0.7578, "step": 8040 }, { "epoch": 0.1461935202673253, "grad_norm": 1.0630599995423284, "learning_rate": 1.9080596145340277e-05, "loss": 0.7658, "step": 8050 }, { "epoch": 0.14637512712480022, "grad_norm": 1.0878086410972714, "learning_rate": 1.9078180850670654e-05, "loss": 0.7692, "step": 8060 }, { "epoch": 0.14655673398227517, "grad_norm": 1.0682096750601477, "learning_rate": 1.9075762540924104e-05, "loss": 0.7636, "step": 8070 }, { "epoch": 0.1467383408397501, "grad_norm": 1.139344422348896, "learning_rate": 1.9073341216903804e-05, "loss": 0.7688, "step": 8080 }, { "epoch": 0.14691994769722505, "grad_norm": 1.1507302308634006, "learning_rate": 1.9070916879413934e-05, "loss": 0.7744, "step": 8090 }, { "epoch": 0.14710155455469998, "grad_norm": 1.0441663084303872, "learning_rate": 1.9068489529259666e-05, "loss": 0.7554, "step": 8100 }, { "epoch": 0.14728316141217493, "grad_norm": 1.1894365913694147, "learning_rate": 1.906605916724719e-05, "loss": 0.7713, "step": 8110 }, { "epoch": 0.14746476826964985, "grad_norm": 1.1212049561295803, "learning_rate": 1.906362579418368e-05, "loss": 0.7763, "step": 8120 }, { "epoch": 0.1476463751271248, "grad_norm": 1.1890139561876223, "learning_rate": 1.9061189410877312e-05, "loss": 0.7624, "step": 8130 }, { "epoch": 0.14782798198459973, "grad_norm": 1.1166443176889762, "learning_rate": 1.9058750018137277e-05, "loss": 0.7668, "step": 8140 }, { "epoch": 0.14800958884207469, "grad_norm": 1.1216095078846577, "learning_rate": 1.9056307616773747e-05, "loss": 0.7484, "step": 8150 }, { "epoch": 0.1481911956995496, "grad_norm": 1.0288163590828197, "learning_rate": 1.9053862207597906e-05, "loss": 0.7573, "step": 8160 }, { "epoch": 0.14837280255702456, "grad_norm": 1.0523923823788528, "learning_rate": 1.9051413791421928e-05, "loss": 0.7572, "step": 8170 }, { "epoch": 0.1485544094144995, "grad_norm": 1.1672236275543413, "learning_rate": 1.904896236905899e-05, "loss": 0.7692, "step": 8180 }, { "epoch": 0.14873601627197444, "grad_norm": 1.0844533413771837, "learning_rate": 1.9046507941323263e-05, "loss": 0.749, "step": 8190 }, { "epoch": 0.14891762312944937, "grad_norm": 1.0538203302766247, "learning_rate": 1.904405050902993e-05, "loss": 0.7496, "step": 8200 }, { "epoch": 0.14909922998692432, "grad_norm": 1.0585956745594756, "learning_rate": 1.9041590072995155e-05, "loss": 0.7723, "step": 8210 }, { "epoch": 0.14928083684439925, "grad_norm": 1.0947128949984413, "learning_rate": 1.9039126634036108e-05, "loss": 0.7786, "step": 8220 }, { "epoch": 0.14946244370187417, "grad_norm": 1.0526101308844982, "learning_rate": 1.9036660192970952e-05, "loss": 0.7578, "step": 8230 }, { "epoch": 0.14964405055934912, "grad_norm": 1.1386631928427584, "learning_rate": 1.9034190750618854e-05, "loss": 0.7722, "step": 8240 }, { "epoch": 0.14982565741682405, "grad_norm": 1.067692061157273, "learning_rate": 1.9031718307799974e-05, "loss": 0.7669, "step": 8250 }, { "epoch": 0.150007264274299, "grad_norm": 1.15500311457003, "learning_rate": 1.9029242865335465e-05, "loss": 0.7775, "step": 8260 }, { "epoch": 0.15018887113177393, "grad_norm": 1.0457380122071624, "learning_rate": 1.9026764424047482e-05, "loss": 0.7574, "step": 8270 }, { "epoch": 0.15037047798924888, "grad_norm": 1.085429387221421, "learning_rate": 1.9024282984759174e-05, "loss": 0.7545, "step": 8280 }, { "epoch": 0.1505520848467238, "grad_norm": 1.157834535609163, "learning_rate": 1.9021798548294682e-05, "loss": 0.7836, "step": 8290 }, { "epoch": 0.15073369170419876, "grad_norm": 1.0838442553066483, "learning_rate": 1.901931111547915e-05, "loss": 0.7683, "step": 8300 }, { "epoch": 0.15091529856167368, "grad_norm": 1.0913537870080463, "learning_rate": 1.9016820687138706e-05, "loss": 0.7671, "step": 8310 }, { "epoch": 0.15109690541914864, "grad_norm": 1.063610699026615, "learning_rate": 1.9014327264100484e-05, "loss": 0.7582, "step": 8320 }, { "epoch": 0.15127851227662356, "grad_norm": 1.0955862104509237, "learning_rate": 1.9011830847192615e-05, "loss": 0.7699, "step": 8330 }, { "epoch": 0.15146011913409851, "grad_norm": 1.1519039618238311, "learning_rate": 1.9009331437244207e-05, "loss": 0.7842, "step": 8340 }, { "epoch": 0.15164172599157344, "grad_norm": 1.0670634011877536, "learning_rate": 1.9006829035085377e-05, "loss": 0.7584, "step": 8350 }, { "epoch": 0.1518233328490484, "grad_norm": 1.0491640585311703, "learning_rate": 1.900432364154723e-05, "loss": 0.7616, "step": 8360 }, { "epoch": 0.15200493970652332, "grad_norm": 1.0616212875799447, "learning_rate": 1.900181525746187e-05, "loss": 0.7471, "step": 8370 }, { "epoch": 0.15218654656399824, "grad_norm": 1.0136845126892562, "learning_rate": 1.8999303883662387e-05, "loss": 0.7644, "step": 8380 }, { "epoch": 0.1523681534214732, "grad_norm": 1.0610021273192638, "learning_rate": 1.8996789520982868e-05, "loss": 0.7669, "step": 8390 }, { "epoch": 0.15254976027894812, "grad_norm": 1.0228499629781105, "learning_rate": 1.8994272170258388e-05, "loss": 0.7608, "step": 8400 }, { "epoch": 0.15273136713642307, "grad_norm": 1.0350065515125688, "learning_rate": 1.8991751832325026e-05, "loss": 0.7553, "step": 8410 }, { "epoch": 0.152912973993898, "grad_norm": 1.1405072441667305, "learning_rate": 1.898922850801984e-05, "loss": 0.7523, "step": 8420 }, { "epoch": 0.15309458085137295, "grad_norm": 1.047831078887167, "learning_rate": 1.8986702198180883e-05, "loss": 0.7847, "step": 8430 }, { "epoch": 0.15327618770884788, "grad_norm": 1.0904467577114056, "learning_rate": 1.898417290364721e-05, "loss": 0.7616, "step": 8440 }, { "epoch": 0.15345779456632283, "grad_norm": 1.1795401785193447, "learning_rate": 1.8981640625258855e-05, "loss": 0.756, "step": 8450 }, { "epoch": 0.15363940142379776, "grad_norm": 1.0395243536232146, "learning_rate": 1.8979105363856846e-05, "loss": 0.7659, "step": 8460 }, { "epoch": 0.1538210082812727, "grad_norm": 1.0765577192326914, "learning_rate": 1.89765671202832e-05, "loss": 0.764, "step": 8470 }, { "epoch": 0.15400261513874763, "grad_norm": 1.0301366800315346, "learning_rate": 1.897402589538093e-05, "loss": 0.7706, "step": 8480 }, { "epoch": 0.1541842219962226, "grad_norm": 1.0774341963239609, "learning_rate": 1.897148168999404e-05, "loss": 0.7544, "step": 8490 }, { "epoch": 0.1543658288536975, "grad_norm": 1.0431021910275333, "learning_rate": 1.8968934504967514e-05, "loss": 0.7638, "step": 8500 }, { "epoch": 0.15454743571117247, "grad_norm": 1.0747461589688447, "learning_rate": 1.896638434114734e-05, "loss": 0.764, "step": 8510 }, { "epoch": 0.1547290425686474, "grad_norm": 1.046291857128864, "learning_rate": 1.8963831199380478e-05, "loss": 0.7618, "step": 8520 }, { "epoch": 0.15491064942612234, "grad_norm": 1.0480526556723244, "learning_rate": 1.8961275080514892e-05, "loss": 0.7614, "step": 8530 }, { "epoch": 0.15509225628359727, "grad_norm": 1.101773908225338, "learning_rate": 1.895871598539953e-05, "loss": 0.7683, "step": 8540 }, { "epoch": 0.1552738631410722, "grad_norm": 1.0780272187606643, "learning_rate": 1.8956153914884323e-05, "loss": 0.774, "step": 8550 }, { "epoch": 0.15545546999854715, "grad_norm": 1.2139558944079687, "learning_rate": 1.89535888698202e-05, "loss": 0.761, "step": 8560 }, { "epoch": 0.15563707685602207, "grad_norm": 1.0389334215172426, "learning_rate": 1.8951020851059064e-05, "loss": 0.7641, "step": 8570 }, { "epoch": 0.15581868371349702, "grad_norm": 1.0808830464753332, "learning_rate": 1.8948449859453822e-05, "loss": 0.7682, "step": 8580 }, { "epoch": 0.15600029057097195, "grad_norm": 1.1096763462993946, "learning_rate": 1.894587589585836e-05, "loss": 0.7402, "step": 8590 }, { "epoch": 0.1561818974284469, "grad_norm": 1.0240441194726662, "learning_rate": 1.8943298961127553e-05, "loss": 0.7532, "step": 8600 }, { "epoch": 0.15636350428592183, "grad_norm": 1.0380508091614555, "learning_rate": 1.8940719056117256e-05, "loss": 0.7459, "step": 8610 }, { "epoch": 0.15654511114339678, "grad_norm": 1.1264975896496117, "learning_rate": 1.8938136181684318e-05, "loss": 0.7658, "step": 8620 }, { "epoch": 0.1567267180008717, "grad_norm": 1.02832050076413, "learning_rate": 1.8935550338686577e-05, "loss": 0.7644, "step": 8630 }, { "epoch": 0.15690832485834666, "grad_norm": 1.0803359017853562, "learning_rate": 1.8932961527982846e-05, "loss": 0.7734, "step": 8640 }, { "epoch": 0.15708993171582158, "grad_norm": 1.0217648113449367, "learning_rate": 1.8930369750432932e-05, "loss": 0.7543, "step": 8650 }, { "epoch": 0.15727153857329654, "grad_norm": 1.0249404660739925, "learning_rate": 1.8927775006897627e-05, "loss": 0.7757, "step": 8660 }, { "epoch": 0.15745314543077146, "grad_norm": 1.0715598518934235, "learning_rate": 1.89251772982387e-05, "loss": 0.7711, "step": 8670 }, { "epoch": 0.15763475228824642, "grad_norm": 1.0650524290133885, "learning_rate": 1.892257662531892e-05, "loss": 0.77, "step": 8680 }, { "epoch": 0.15781635914572134, "grad_norm": 1.0706535360310399, "learning_rate": 1.8919972989002027e-05, "loss": 0.7726, "step": 8690 }, { "epoch": 0.1579979660031963, "grad_norm": 1.018171657340659, "learning_rate": 1.8917366390152747e-05, "loss": 0.7545, "step": 8700 }, { "epoch": 0.15817957286067122, "grad_norm": 1.0447480126039936, "learning_rate": 1.8914756829636798e-05, "loss": 0.7573, "step": 8710 }, { "epoch": 0.15836117971814614, "grad_norm": 1.079924416769921, "learning_rate": 1.8912144308320872e-05, "loss": 0.7507, "step": 8720 }, { "epoch": 0.1585427865756211, "grad_norm": 1.0891328931344757, "learning_rate": 1.8909528827072652e-05, "loss": 0.749, "step": 8730 }, { "epoch": 0.15872439343309602, "grad_norm": 1.1981180455821288, "learning_rate": 1.8906910386760798e-05, "loss": 0.7707, "step": 8740 }, { "epoch": 0.15890600029057098, "grad_norm": 3.37463722258239, "learning_rate": 1.8904288988254954e-05, "loss": 0.765, "step": 8750 }, { "epoch": 0.1590876071480459, "grad_norm": 1.2886940725183123, "learning_rate": 1.8901664632425753e-05, "loss": 0.7767, "step": 8760 }, { "epoch": 0.15926921400552085, "grad_norm": 1.0635032037506298, "learning_rate": 1.8899037320144804e-05, "loss": 0.7619, "step": 8770 }, { "epoch": 0.15945082086299578, "grad_norm": 1.065729830855646, "learning_rate": 1.8896407052284694e-05, "loss": 0.7508, "step": 8780 }, { "epoch": 0.15963242772047073, "grad_norm": 1.0175755590992197, "learning_rate": 1.8893773829719006e-05, "loss": 0.7632, "step": 8790 }, { "epoch": 0.15981403457794566, "grad_norm": 1.0882825312551214, "learning_rate": 1.8891137653322283e-05, "loss": 0.757, "step": 8800 }, { "epoch": 0.1599956414354206, "grad_norm": 1.0413635727174901, "learning_rate": 1.8888498523970073e-05, "loss": 0.7748, "step": 8810 }, { "epoch": 0.16017724829289554, "grad_norm": 1.2085157728742608, "learning_rate": 1.8885856442538887e-05, "loss": 0.7634, "step": 8820 }, { "epoch": 0.1603588551503705, "grad_norm": 1.038236772255739, "learning_rate": 1.888321140990622e-05, "loss": 0.7687, "step": 8830 }, { "epoch": 0.1605404620078454, "grad_norm": 1.0613831978926587, "learning_rate": 1.8880563426950554e-05, "loss": 0.7633, "step": 8840 }, { "epoch": 0.16072206886532037, "grad_norm": 1.097966496748563, "learning_rate": 1.887791249455134e-05, "loss": 0.76, "step": 8850 }, { "epoch": 0.1609036757227953, "grad_norm": 1.0702567254239226, "learning_rate": 1.8875258613589024e-05, "loss": 0.7713, "step": 8860 }, { "epoch": 0.16108528258027024, "grad_norm": 1.049004400959287, "learning_rate": 1.8872601784945014e-05, "loss": 0.751, "step": 8870 }, { "epoch": 0.16126688943774517, "grad_norm": 1.1363976619113603, "learning_rate": 1.886994200950171e-05, "loss": 0.7722, "step": 8880 }, { "epoch": 0.1614484962952201, "grad_norm": 1.095718211743535, "learning_rate": 1.8867279288142483e-05, "loss": 0.7573, "step": 8890 }, { "epoch": 0.16163010315269505, "grad_norm": 1.038467757220399, "learning_rate": 1.886461362175169e-05, "loss": 0.7505, "step": 8900 }, { "epoch": 0.16181171001016997, "grad_norm": 1.0421642082404496, "learning_rate": 1.8861945011214655e-05, "loss": 0.7662, "step": 8910 }, { "epoch": 0.16199331686764493, "grad_norm": 1.0271580686357125, "learning_rate": 1.885927345741769e-05, "loss": 0.7622, "step": 8920 }, { "epoch": 0.16217492372511985, "grad_norm": 1.0127329787603256, "learning_rate": 1.885659896124808e-05, "loss": 0.7693, "step": 8930 }, { "epoch": 0.1623565305825948, "grad_norm": 1.0283320976085593, "learning_rate": 1.8853921523594087e-05, "loss": 0.7491, "step": 8940 }, { "epoch": 0.16253813744006973, "grad_norm": 1.044617690054215, "learning_rate": 1.885124114534495e-05, "loss": 0.7755, "step": 8950 }, { "epoch": 0.16271974429754468, "grad_norm": 1.1486125756309582, "learning_rate": 1.8848557827390894e-05, "loss": 0.7552, "step": 8960 }, { "epoch": 0.1629013511550196, "grad_norm": 1.0598158185989774, "learning_rate": 1.8845871570623097e-05, "loss": 0.7639, "step": 8970 }, { "epoch": 0.16308295801249456, "grad_norm": 1.0970598134269147, "learning_rate": 1.884318237593374e-05, "loss": 0.7594, "step": 8980 }, { "epoch": 0.1632645648699695, "grad_norm": 1.084854440114401, "learning_rate": 1.8840490244215966e-05, "loss": 0.7625, "step": 8990 }, { "epoch": 0.16344617172744444, "grad_norm": 1.058979749257257, "learning_rate": 1.883779517636389e-05, "loss": 0.7585, "step": 9000 }, { "epoch": 0.16362777858491936, "grad_norm": 1.0665273975922336, "learning_rate": 1.883509717327261e-05, "loss": 0.7583, "step": 9010 }, { "epoch": 0.16380938544239432, "grad_norm": 1.042022937418176, "learning_rate": 1.8832396235838196e-05, "loss": 0.7494, "step": 9020 }, { "epoch": 0.16399099229986924, "grad_norm": 1.0526259630237704, "learning_rate": 1.882969236495769e-05, "loss": 0.7558, "step": 9030 }, { "epoch": 0.16417259915734417, "grad_norm": 1.052852607838611, "learning_rate": 1.882698556152912e-05, "loss": 0.768, "step": 9040 }, { "epoch": 0.16435420601481912, "grad_norm": 1.0466232481044626, "learning_rate": 1.8824275826451467e-05, "loss": 0.7655, "step": 9050 }, { "epoch": 0.16453581287229405, "grad_norm": 1.1083420956741352, "learning_rate": 1.8821563160624706e-05, "loss": 0.7716, "step": 9060 }, { "epoch": 0.164717419729769, "grad_norm": 1.0389040270996155, "learning_rate": 1.881884756494978e-05, "loss": 0.7566, "step": 9070 }, { "epoch": 0.16489902658724392, "grad_norm": 1.10205434374719, "learning_rate": 1.8816129040328587e-05, "loss": 0.7533, "step": 9080 }, { "epoch": 0.16508063344471888, "grad_norm": 1.0504420080991903, "learning_rate": 1.881340758766403e-05, "loss": 0.7689, "step": 9090 }, { "epoch": 0.1652622403021938, "grad_norm": 1.0267748749025818, "learning_rate": 1.8810683207859957e-05, "loss": 0.7584, "step": 9100 }, { "epoch": 0.16544384715966876, "grad_norm": 1.047228319874724, "learning_rate": 1.8807955901821197e-05, "loss": 0.7611, "step": 9110 }, { "epoch": 0.16562545401714368, "grad_norm": 1.0359232984045328, "learning_rate": 1.8805225670453563e-05, "loss": 0.7543, "step": 9120 }, { "epoch": 0.16580706087461863, "grad_norm": 1.0525866627333282, "learning_rate": 1.880249251466382e-05, "loss": 0.7589, "step": 9130 }, { "epoch": 0.16598866773209356, "grad_norm": 1.0510388420050425, "learning_rate": 1.8799756435359714e-05, "loss": 0.7599, "step": 9140 }, { "epoch": 0.1661702745895685, "grad_norm": 1.0558652773302484, "learning_rate": 1.879701743344996e-05, "loss": 0.7509, "step": 9150 }, { "epoch": 0.16635188144704344, "grad_norm": 1.043904478847335, "learning_rate": 1.8794275509844254e-05, "loss": 0.7571, "step": 9160 }, { "epoch": 0.1665334883045184, "grad_norm": 0.9912943009994001, "learning_rate": 1.8791530665453247e-05, "loss": 0.7612, "step": 9170 }, { "epoch": 0.16671509516199332, "grad_norm": 1.0639056165472722, "learning_rate": 1.878878290118856e-05, "loss": 0.7489, "step": 9180 }, { "epoch": 0.16689670201946827, "grad_norm": 1.0779757307683169, "learning_rate": 1.87860322179628e-05, "loss": 0.77, "step": 9190 }, { "epoch": 0.1670783088769432, "grad_norm": 1.0397710900932542, "learning_rate": 1.878327861668953e-05, "loss": 0.7659, "step": 9200 }, { "epoch": 0.16725991573441812, "grad_norm": 1.048819963424626, "learning_rate": 1.8780522098283284e-05, "loss": 0.7476, "step": 9210 }, { "epoch": 0.16744152259189307, "grad_norm": 1.0989404514907641, "learning_rate": 1.8777762663659568e-05, "loss": 0.7541, "step": 9220 }, { "epoch": 0.167623129449368, "grad_norm": 1.0603043908149916, "learning_rate": 1.8775000313734853e-05, "loss": 0.7537, "step": 9230 }, { "epoch": 0.16780473630684295, "grad_norm": 1.0226687487158073, "learning_rate": 1.877223504942658e-05, "loss": 0.7664, "step": 9240 }, { "epoch": 0.16798634316431788, "grad_norm": 1.118843095598683, "learning_rate": 1.8769466871653167e-05, "loss": 0.7533, "step": 9250 }, { "epoch": 0.16816795002179283, "grad_norm": 1.0060292132357225, "learning_rate": 1.8766695781333976e-05, "loss": 0.7522, "step": 9260 }, { "epoch": 0.16834955687926775, "grad_norm": 1.016369042910311, "learning_rate": 1.8763921779389363e-05, "loss": 0.7676, "step": 9270 }, { "epoch": 0.1685311637367427, "grad_norm": 1.032976789898295, "learning_rate": 1.8761144866740632e-05, "loss": 0.7717, "step": 9280 }, { "epoch": 0.16871277059421763, "grad_norm": 1.043092360068977, "learning_rate": 1.875836504431007e-05, "loss": 0.7577, "step": 9290 }, { "epoch": 0.16889437745169258, "grad_norm": 1.048139654429619, "learning_rate": 1.8755582313020912e-05, "loss": 0.7531, "step": 9300 }, { "epoch": 0.1690759843091675, "grad_norm": 1.0017627826481266, "learning_rate": 1.875279667379737e-05, "loss": 0.7513, "step": 9310 }, { "epoch": 0.16925759116664246, "grad_norm": 2.8943194897327134, "learning_rate": 1.8750008127564622e-05, "loss": 0.758, "step": 9320 }, { "epoch": 0.1694391980241174, "grad_norm": 1.084204988404051, "learning_rate": 1.874721667524881e-05, "loss": 0.7554, "step": 9330 }, { "epoch": 0.16962080488159234, "grad_norm": 1.0505217698378713, "learning_rate": 1.874442231777704e-05, "loss": 0.7487, "step": 9340 }, { "epoch": 0.16980241173906727, "grad_norm": 1.0261221816667403, "learning_rate": 1.8741625056077385e-05, "loss": 0.7578, "step": 9350 }, { "epoch": 0.16998401859654222, "grad_norm": 1.0481410320398723, "learning_rate": 1.8738824891078877e-05, "loss": 0.7728, "step": 9360 }, { "epoch": 0.17016562545401714, "grad_norm": 1.0266110821985632, "learning_rate": 1.8736021823711524e-05, "loss": 0.7482, "step": 9370 }, { "epoch": 0.17034723231149207, "grad_norm": 1.0670256604439907, "learning_rate": 1.8733215854906284e-05, "loss": 0.7731, "step": 9380 }, { "epoch": 0.17052883916896702, "grad_norm": 1.1375083372233525, "learning_rate": 1.8730406985595085e-05, "loss": 0.7491, "step": 9390 }, { "epoch": 0.17071044602644195, "grad_norm": 1.0273008820906562, "learning_rate": 1.8727595216710825e-05, "loss": 0.7545, "step": 9400 }, { "epoch": 0.1708920528839169, "grad_norm": 1.0451698838782253, "learning_rate": 1.8724780549187353e-05, "loss": 0.7573, "step": 9410 }, { "epoch": 0.17107365974139183, "grad_norm": 1.065060824454178, "learning_rate": 1.8721962983959486e-05, "loss": 0.7667, "step": 9420 }, { "epoch": 0.17125526659886678, "grad_norm": 1.0622250049870523, "learning_rate": 1.8719142521963007e-05, "loss": 0.7728, "step": 9430 }, { "epoch": 0.1714368734563417, "grad_norm": 1.065042801369323, "learning_rate": 1.871631916413465e-05, "loss": 0.7669, "step": 9440 }, { "epoch": 0.17161848031381666, "grad_norm": 1.0763309923971285, "learning_rate": 1.871349291141213e-05, "loss": 0.7698, "step": 9450 }, { "epoch": 0.17180008717129158, "grad_norm": 1.0056456442841522, "learning_rate": 1.8710663764734104e-05, "loss": 0.7506, "step": 9460 }, { "epoch": 0.17198169402876654, "grad_norm": 1.1159129414594107, "learning_rate": 1.8707831725040198e-05, "loss": 0.7647, "step": 9470 }, { "epoch": 0.17216330088624146, "grad_norm": 1.1306711047463442, "learning_rate": 1.8704996793271e-05, "loss": 0.7695, "step": 9480 }, { "epoch": 0.1723449077437164, "grad_norm": 1.0061953834279402, "learning_rate": 1.870215897036806e-05, "loss": 0.7446, "step": 9490 }, { "epoch": 0.17252651460119134, "grad_norm": 1.0100963436451758, "learning_rate": 1.8699318257273882e-05, "loss": 0.7468, "step": 9500 }, { "epoch": 0.1727081214586663, "grad_norm": 1.6415094843709346, "learning_rate": 1.8696474654931938e-05, "loss": 0.7688, "step": 9510 }, { "epoch": 0.17288972831614122, "grad_norm": 1.0527922312177738, "learning_rate": 1.869362816428665e-05, "loss": 0.7607, "step": 9520 }, { "epoch": 0.17307133517361614, "grad_norm": 1.020520162489585, "learning_rate": 1.8690778786283406e-05, "loss": 0.7707, "step": 9530 }, { "epoch": 0.1732529420310911, "grad_norm": 1.0236426700461334, "learning_rate": 1.8687926521868555e-05, "loss": 0.753, "step": 9540 }, { "epoch": 0.17343454888856602, "grad_norm": 1.002408136707146, "learning_rate": 1.8685071371989392e-05, "loss": 0.7506, "step": 9550 }, { "epoch": 0.17361615574604097, "grad_norm": 1.3225811755378987, "learning_rate": 1.8682213337594188e-05, "loss": 0.7425, "step": 9560 }, { "epoch": 0.1737977626035159, "grad_norm": 1.0956593001891481, "learning_rate": 1.8679352419632158e-05, "loss": 0.7598, "step": 9570 }, { "epoch": 0.17397936946099085, "grad_norm": 1.0904604497150054, "learning_rate": 1.8676488619053484e-05, "loss": 0.7713, "step": 9580 }, { "epoch": 0.17416097631846578, "grad_norm": 1.0349252276297767, "learning_rate": 1.8673621936809303e-05, "loss": 0.7563, "step": 9590 }, { "epoch": 0.17434258317594073, "grad_norm": 1.0214699499830366, "learning_rate": 1.8670752373851703e-05, "loss": 0.7483, "step": 9600 }, { "epoch": 0.17452419003341565, "grad_norm": 1.1076098330471542, "learning_rate": 1.866787993113373e-05, "loss": 0.7545, "step": 9610 }, { "epoch": 0.1747057968908906, "grad_norm": 1.0520624805653775, "learning_rate": 1.8665004609609395e-05, "loss": 0.7543, "step": 9620 }, { "epoch": 0.17488740374836553, "grad_norm": 1.0627974563142626, "learning_rate": 1.8662126410233662e-05, "loss": 0.7688, "step": 9630 }, { "epoch": 0.17506901060584049, "grad_norm": 1.0155744634468113, "learning_rate": 1.8659245333962444e-05, "loss": 0.7526, "step": 9640 }, { "epoch": 0.1752506174633154, "grad_norm": 1.0043150911271246, "learning_rate": 1.8656361381752616e-05, "loss": 0.7559, "step": 9650 }, { "epoch": 0.17543222432079036, "grad_norm": 1.0337210895190054, "learning_rate": 1.8653474554562004e-05, "loss": 0.7486, "step": 9660 }, { "epoch": 0.1756138311782653, "grad_norm": 1.0206216437655182, "learning_rate": 1.8650584853349394e-05, "loss": 0.7497, "step": 9670 }, { "epoch": 0.17579543803574024, "grad_norm": 1.0373852898037244, "learning_rate": 1.864769227907452e-05, "loss": 0.7674, "step": 9680 }, { "epoch": 0.17597704489321517, "grad_norm": 1.0616610314902368, "learning_rate": 1.8644796832698077e-05, "loss": 0.7619, "step": 9690 }, { "epoch": 0.1761586517506901, "grad_norm": 1.1180747608361266, "learning_rate": 1.8641898515181715e-05, "loss": 0.7662, "step": 9700 }, { "epoch": 0.17634025860816505, "grad_norm": 0.9702444363564319, "learning_rate": 1.863899732748802e-05, "loss": 0.7488, "step": 9710 }, { "epoch": 0.17652186546563997, "grad_norm": 1.053505164357678, "learning_rate": 1.863609327058056e-05, "loss": 0.7446, "step": 9720 }, { "epoch": 0.17670347232311492, "grad_norm": 1.0633928225665108, "learning_rate": 1.8633186345423825e-05, "loss": 0.761, "step": 9730 }, { "epoch": 0.17688507918058985, "grad_norm": 0.996020199017573, "learning_rate": 1.8630276552983286e-05, "loss": 0.7476, "step": 9740 }, { "epoch": 0.1770666860380648, "grad_norm": 1.030964778672433, "learning_rate": 1.8627363894225347e-05, "loss": 0.7506, "step": 9750 }, { "epoch": 0.17724829289553973, "grad_norm": 1.1776281615891702, "learning_rate": 1.862444837011737e-05, "loss": 0.7773, "step": 9760 }, { "epoch": 0.17742989975301468, "grad_norm": 1.0088688455515749, "learning_rate": 1.8621529981627672e-05, "loss": 0.7539, "step": 9770 }, { "epoch": 0.1776115066104896, "grad_norm": 1.0698248806472563, "learning_rate": 1.861860872972552e-05, "loss": 0.7634, "step": 9780 }, { "epoch": 0.17779311346796456, "grad_norm": 1.0482577439538583, "learning_rate": 1.8615684615381123e-05, "loss": 0.7708, "step": 9790 }, { "epoch": 0.17797472032543948, "grad_norm": 0.9982247528707722, "learning_rate": 1.861275763956566e-05, "loss": 0.7606, "step": 9800 }, { "epoch": 0.17815632718291444, "grad_norm": 1.0225753996788463, "learning_rate": 1.8609827803251234e-05, "loss": 0.756, "step": 9810 }, { "epoch": 0.17833793404038936, "grad_norm": 1.0254876362263163, "learning_rate": 1.860689510741092e-05, "loss": 0.7376, "step": 9820 }, { "epoch": 0.17851954089786432, "grad_norm": 1.0292385682205605, "learning_rate": 1.8603959553018736e-05, "loss": 0.7617, "step": 9830 }, { "epoch": 0.17870114775533924, "grad_norm": 1.065544291570148, "learning_rate": 1.8601021141049645e-05, "loss": 0.7365, "step": 9840 }, { "epoch": 0.1788827546128142, "grad_norm": 1.0270519399550713, "learning_rate": 1.8598079872479565e-05, "loss": 0.7504, "step": 9850 }, { "epoch": 0.17906436147028912, "grad_norm": 1.0084252674286083, "learning_rate": 1.8595135748285362e-05, "loss": 0.7617, "step": 9860 }, { "epoch": 0.17924596832776404, "grad_norm": 0.9996077157837727, "learning_rate": 1.859218876944484e-05, "loss": 0.7636, "step": 9870 }, { "epoch": 0.179427575185239, "grad_norm": 1.0372379220677403, "learning_rate": 1.8589238936936772e-05, "loss": 0.7536, "step": 9880 }, { "epoch": 0.17960918204271392, "grad_norm": 1.0074550684658368, "learning_rate": 1.858628625174086e-05, "loss": 0.7448, "step": 9890 }, { "epoch": 0.17979078890018887, "grad_norm": 1.0479765297699764, "learning_rate": 1.858333071483776e-05, "loss": 0.7605, "step": 9900 }, { "epoch": 0.1799723957576638, "grad_norm": 1.0546417111192041, "learning_rate": 1.8580372327209077e-05, "loss": 0.7538, "step": 9910 }, { "epoch": 0.18015400261513875, "grad_norm": 1.0099561622303082, "learning_rate": 1.8577411089837357e-05, "loss": 0.7351, "step": 9920 }, { "epoch": 0.18033560947261368, "grad_norm": 1.0071118446380327, "learning_rate": 1.8574447003706103e-05, "loss": 0.7567, "step": 9930 }, { "epoch": 0.18051721633008863, "grad_norm": 1.0540304103885607, "learning_rate": 1.8571480069799755e-05, "loss": 0.7511, "step": 9940 }, { "epoch": 0.18069882318756356, "grad_norm": 1.0718621124166279, "learning_rate": 1.85685102891037e-05, "loss": 0.756, "step": 9950 }, { "epoch": 0.1808804300450385, "grad_norm": 1.0351249480534905, "learning_rate": 1.856553766260427e-05, "loss": 0.7611, "step": 9960 }, { "epoch": 0.18106203690251343, "grad_norm": 1.0477566097928448, "learning_rate": 1.8562562191288747e-05, "loss": 0.752, "step": 9970 }, { "epoch": 0.1812436437599884, "grad_norm": 1.0204469380919543, "learning_rate": 1.8559583876145356e-05, "loss": 0.7393, "step": 9980 }, { "epoch": 0.1814252506174633, "grad_norm": 1.01940285730271, "learning_rate": 1.855660271816326e-05, "loss": 0.7435, "step": 9990 }, { "epoch": 0.18160685747493827, "grad_norm": 1.1117499836609406, "learning_rate": 1.8553618718332577e-05, "loss": 0.7475, "step": 10000 }, { "epoch": 0.1817884643324132, "grad_norm": 1.0105463122719816, "learning_rate": 1.8550631877644362e-05, "loss": 0.7641, "step": 10010 }, { "epoch": 0.18197007118988814, "grad_norm": 1.0820003305693913, "learning_rate": 1.8547642197090618e-05, "loss": 0.7457, "step": 10020 }, { "epoch": 0.18215167804736307, "grad_norm": 1.047683026517944, "learning_rate": 1.8544649677664277e-05, "loss": 0.7449, "step": 10030 }, { "epoch": 0.182333284904838, "grad_norm": 1.0470484635801078, "learning_rate": 1.8541654320359238e-05, "loss": 0.7342, "step": 10040 }, { "epoch": 0.18251489176231295, "grad_norm": 1.0069078576554529, "learning_rate": 1.853865612617032e-05, "loss": 0.7446, "step": 10050 }, { "epoch": 0.18269649861978787, "grad_norm": 1.0307840951226048, "learning_rate": 1.8535655096093302e-05, "loss": 0.7359, "step": 10060 }, { "epoch": 0.18287810547726283, "grad_norm": 1.0582670525236901, "learning_rate": 1.853265123112489e-05, "loss": 0.7625, "step": 10070 }, { "epoch": 0.18305971233473775, "grad_norm": 1.013502784141716, "learning_rate": 1.8529644532262738e-05, "loss": 0.7693, "step": 10080 }, { "epoch": 0.1832413191922127, "grad_norm": 1.086766673989656, "learning_rate": 1.8526635000505444e-05, "loss": 0.7568, "step": 10090 }, { "epoch": 0.18342292604968763, "grad_norm": 0.9972832771350579, "learning_rate": 1.8523622636852547e-05, "loss": 0.7573, "step": 10100 }, { "epoch": 0.18360453290716258, "grad_norm": 1.0064292828137342, "learning_rate": 1.8520607442304518e-05, "loss": 0.7524, "step": 10110 }, { "epoch": 0.1837861397646375, "grad_norm": 0.9786042809784081, "learning_rate": 1.8517589417862773e-05, "loss": 0.7395, "step": 10120 }, { "epoch": 0.18396774662211246, "grad_norm": 0.9931678342157998, "learning_rate": 1.8514568564529675e-05, "loss": 0.7577, "step": 10130 }, { "epoch": 0.18414935347958739, "grad_norm": 1.0218516086645169, "learning_rate": 1.8511544883308517e-05, "loss": 0.7502, "step": 10140 }, { "epoch": 0.18433096033706234, "grad_norm": 1.0588207142381274, "learning_rate": 1.8508518375203534e-05, "loss": 0.753, "step": 10150 }, { "epoch": 0.18451256719453726, "grad_norm": 1.0506436371633463, "learning_rate": 1.85054890412199e-05, "loss": 0.7423, "step": 10160 }, { "epoch": 0.18469417405201222, "grad_norm": 1.2414885220289023, "learning_rate": 1.850245688236373e-05, "loss": 0.7579, "step": 10170 }, { "epoch": 0.18487578090948714, "grad_norm": 1.0377075078059386, "learning_rate": 1.8499421899642077e-05, "loss": 0.745, "step": 10180 }, { "epoch": 0.18505738776696207, "grad_norm": 1.2299678691476217, "learning_rate": 1.849638409406292e-05, "loss": 0.7424, "step": 10190 }, { "epoch": 0.18523899462443702, "grad_norm": 1.0611101188213015, "learning_rate": 1.84933434666352e-05, "loss": 0.759, "step": 10200 }, { "epoch": 0.18542060148191195, "grad_norm": 1.0027524481747956, "learning_rate": 1.8490300018368768e-05, "loss": 0.7386, "step": 10210 }, { "epoch": 0.1856022083393869, "grad_norm": 1.0558469017498509, "learning_rate": 1.8487253750274435e-05, "loss": 0.7469, "step": 10220 }, { "epoch": 0.18578381519686182, "grad_norm": 1.0445394561656476, "learning_rate": 1.848420466336393e-05, "loss": 0.7542, "step": 10230 }, { "epoch": 0.18596542205433678, "grad_norm": 1.0256679899204593, "learning_rate": 1.848115275864993e-05, "loss": 0.747, "step": 10240 }, { "epoch": 0.1861470289118117, "grad_norm": 1.3080743468231661, "learning_rate": 1.847809803714604e-05, "loss": 0.7662, "step": 10250 }, { "epoch": 0.18632863576928665, "grad_norm": 1.2125891679882626, "learning_rate": 1.8475040499866815e-05, "loss": 0.7477, "step": 10260 }, { "epoch": 0.18651024262676158, "grad_norm": 1.0215011711956579, "learning_rate": 1.847198014782772e-05, "loss": 0.7457, "step": 10270 }, { "epoch": 0.18669184948423653, "grad_norm": 0.9887639783258041, "learning_rate": 1.8468916982045184e-05, "loss": 0.7482, "step": 10280 }, { "epoch": 0.18687345634171146, "grad_norm": 1.0681935282084545, "learning_rate": 1.8465851003536547e-05, "loss": 0.7435, "step": 10290 }, { "epoch": 0.1870550631991864, "grad_norm": 1.0401648956265637, "learning_rate": 1.8462782213320097e-05, "loss": 0.7611, "step": 10300 }, { "epoch": 0.18723667005666134, "grad_norm": 1.0099984411193421, "learning_rate": 1.845971061241505e-05, "loss": 0.7518, "step": 10310 }, { "epoch": 0.1874182769141363, "grad_norm": 1.0056951839040666, "learning_rate": 1.845663620184156e-05, "loss": 0.7424, "step": 10320 }, { "epoch": 0.18759988377161121, "grad_norm": 1.0588504726391355, "learning_rate": 1.84535589826207e-05, "loss": 0.758, "step": 10330 }, { "epoch": 0.18778149062908617, "grad_norm": 1.0061297389076298, "learning_rate": 1.8450478955774497e-05, "loss": 0.7467, "step": 10340 }, { "epoch": 0.1879630974865611, "grad_norm": 1.0013170819622383, "learning_rate": 1.8447396122325898e-05, "loss": 0.7601, "step": 10350 }, { "epoch": 0.18814470434403602, "grad_norm": 1.0716704613165544, "learning_rate": 1.8444310483298785e-05, "loss": 0.7632, "step": 10360 }, { "epoch": 0.18832631120151097, "grad_norm": 1.0000953915369166, "learning_rate": 1.8441222039717967e-05, "loss": 0.7481, "step": 10370 }, { "epoch": 0.1885079180589859, "grad_norm": 1.0638034606666789, "learning_rate": 1.843813079260919e-05, "loss": 0.7466, "step": 10380 }, { "epoch": 0.18868952491646085, "grad_norm": 1.0906419663923, "learning_rate": 1.8435036742999138e-05, "loss": 0.7559, "step": 10390 }, { "epoch": 0.18887113177393577, "grad_norm": 1.0375157269204571, "learning_rate": 1.8431939891915406e-05, "loss": 0.7557, "step": 10400 }, { "epoch": 0.18905273863141073, "grad_norm": 1.0049172870166245, "learning_rate": 1.8428840240386536e-05, "loss": 0.7513, "step": 10410 }, { "epoch": 0.18923434548888565, "grad_norm": 1.0908787790815104, "learning_rate": 1.8425737789441998e-05, "loss": 0.7516, "step": 10420 }, { "epoch": 0.1894159523463606, "grad_norm": 1.0468516202600453, "learning_rate": 1.842263254011218e-05, "loss": 0.7528, "step": 10430 }, { "epoch": 0.18959755920383553, "grad_norm": 0.9978024895191083, "learning_rate": 1.841952449342842e-05, "loss": 0.7507, "step": 10440 }, { "epoch": 0.18977916606131048, "grad_norm": 1.0229498330996887, "learning_rate": 1.841641365042296e-05, "loss": 0.754, "step": 10450 }, { "epoch": 0.1899607729187854, "grad_norm": 1.0278671366651198, "learning_rate": 1.8413300012128995e-05, "loss": 0.7539, "step": 10460 }, { "epoch": 0.19014237977626036, "grad_norm": 1.0510517572142375, "learning_rate": 1.841018357958063e-05, "loss": 0.7505, "step": 10470 }, { "epoch": 0.1903239866337353, "grad_norm": 1.0417691332284562, "learning_rate": 1.8407064353812913e-05, "loss": 0.7491, "step": 10480 }, { "epoch": 0.19050559349121024, "grad_norm": 0.9548945912711079, "learning_rate": 1.84039423358618e-05, "loss": 0.7559, "step": 10490 }, { "epoch": 0.19068720034868517, "grad_norm": 1.0790863317100508, "learning_rate": 1.84008175267642e-05, "loss": 0.7577, "step": 10500 }, { "epoch": 0.19086880720616012, "grad_norm": 0.9732978754171392, "learning_rate": 1.8397689927557932e-05, "loss": 0.7577, "step": 10510 }, { "epoch": 0.19105041406363504, "grad_norm": 1.004305576267522, "learning_rate": 1.8394559539281737e-05, "loss": 0.7449, "step": 10520 }, { "epoch": 0.19123202092110997, "grad_norm": 1.009900916709798, "learning_rate": 1.8391426362975298e-05, "loss": 0.7362, "step": 10530 }, { "epoch": 0.19141362777858492, "grad_norm": 1.0163535910116144, "learning_rate": 1.8388290399679216e-05, "loss": 0.7494, "step": 10540 }, { "epoch": 0.19159523463605985, "grad_norm": 1.0265579052285605, "learning_rate": 1.8385151650435015e-05, "loss": 0.7393, "step": 10550 }, { "epoch": 0.1917768414935348, "grad_norm": 0.9801696657545973, "learning_rate": 1.838201011628515e-05, "loss": 0.7422, "step": 10560 }, { "epoch": 0.19195844835100973, "grad_norm": 0.9749840202694823, "learning_rate": 1.8378865798272993e-05, "loss": 0.741, "step": 10570 }, { "epoch": 0.19214005520848468, "grad_norm": 0.9862706412077429, "learning_rate": 1.8375718697442853e-05, "loss": 0.7467, "step": 10580 }, { "epoch": 0.1923216620659596, "grad_norm": 1.0339730832750083, "learning_rate": 1.8372568814839952e-05, "loss": 0.752, "step": 10590 }, { "epoch": 0.19250326892343456, "grad_norm": 0.9637995337667785, "learning_rate": 1.8369416151510445e-05, "loss": 0.7581, "step": 10600 }, { "epoch": 0.19268487578090948, "grad_norm": 1.0204396542765028, "learning_rate": 1.8366260708501394e-05, "loss": 0.7397, "step": 10610 }, { "epoch": 0.19286648263838443, "grad_norm": 0.9759324958555442, "learning_rate": 1.836310248686081e-05, "loss": 0.7482, "step": 10620 }, { "epoch": 0.19304808949585936, "grad_norm": 1.0879237662694665, "learning_rate": 1.8359941487637602e-05, "loss": 0.7473, "step": 10630 }, { "epoch": 0.1932296963533343, "grad_norm": 1.0195502785604615, "learning_rate": 1.8356777711881614e-05, "loss": 0.7392, "step": 10640 }, { "epoch": 0.19341130321080924, "grad_norm": 0.9885499215236955, "learning_rate": 1.8353611160643613e-05, "loss": 0.75, "step": 10650 }, { "epoch": 0.1935929100682842, "grad_norm": 1.0020533400283904, "learning_rate": 1.8350441834975283e-05, "loss": 0.758, "step": 10660 }, { "epoch": 0.19377451692575912, "grad_norm": 0.9977633510865329, "learning_rate": 1.8347269735929233e-05, "loss": 0.7454, "step": 10670 }, { "epoch": 0.19395612378323404, "grad_norm": 0.9714675263813037, "learning_rate": 1.8344094864558987e-05, "loss": 0.7379, "step": 10680 }, { "epoch": 0.194137730640709, "grad_norm": 1.0519271692877947, "learning_rate": 1.8340917221919002e-05, "loss": 0.7482, "step": 10690 }, { "epoch": 0.19431933749818392, "grad_norm": 0.9863689458376568, "learning_rate": 1.8337736809064642e-05, "loss": 0.7477, "step": 10700 }, { "epoch": 0.19450094435565887, "grad_norm": 1.0041899515316337, "learning_rate": 1.83345536270522e-05, "loss": 0.7482, "step": 10710 }, { "epoch": 0.1946825512131338, "grad_norm": 0.9782567594209565, "learning_rate": 1.8331367676938884e-05, "loss": 0.744, "step": 10720 }, { "epoch": 0.19486415807060875, "grad_norm": 0.9993841145505755, "learning_rate": 1.8328178959782822e-05, "loss": 0.7434, "step": 10730 }, { "epoch": 0.19504576492808368, "grad_norm": 0.9860906545872884, "learning_rate": 1.8324987476643064e-05, "loss": 0.7524, "step": 10740 }, { "epoch": 0.19522737178555863, "grad_norm": 1.0121681161741092, "learning_rate": 1.832179322857957e-05, "loss": 0.7446, "step": 10750 }, { "epoch": 0.19540897864303355, "grad_norm": 1.0098826612839762, "learning_rate": 1.8318596216653234e-05, "loss": 0.7698, "step": 10760 }, { "epoch": 0.1955905855005085, "grad_norm": 1.040633526267089, "learning_rate": 1.8315396441925854e-05, "loss": 0.7498, "step": 10770 }, { "epoch": 0.19577219235798343, "grad_norm": 1.0060279360682192, "learning_rate": 1.831219390546015e-05, "loss": 0.7469, "step": 10780 }, { "epoch": 0.19595379921545839, "grad_norm": 1.01532827153719, "learning_rate": 1.830898860831976e-05, "loss": 0.7392, "step": 10790 }, { "epoch": 0.1961354060729333, "grad_norm": 1.0091043499969814, "learning_rate": 1.8305780551569236e-05, "loss": 0.7458, "step": 10800 }, { "epoch": 0.19631701293040826, "grad_norm": 1.1177630633189788, "learning_rate": 1.8302569736274057e-05, "loss": 0.7451, "step": 10810 }, { "epoch": 0.1964986197878832, "grad_norm": 1.005351736775105, "learning_rate": 1.82993561635006e-05, "loss": 0.7412, "step": 10820 }, { "epoch": 0.19668022664535814, "grad_norm": 0.9971087110187757, "learning_rate": 1.8296139834316177e-05, "loss": 0.7613, "step": 10830 }, { "epoch": 0.19686183350283307, "grad_norm": 1.0154989110185386, "learning_rate": 1.8292920749788998e-05, "loss": 0.752, "step": 10840 }, { "epoch": 0.197043440360308, "grad_norm": 1.047240931123451, "learning_rate": 1.82896989109882e-05, "loss": 0.7687, "step": 10850 }, { "epoch": 0.19722504721778295, "grad_norm": 0.9850901630158435, "learning_rate": 1.8286474318983837e-05, "loss": 0.7481, "step": 10860 }, { "epoch": 0.19740665407525787, "grad_norm": 0.9916742740696144, "learning_rate": 1.828324697484686e-05, "loss": 0.7455, "step": 10870 }, { "epoch": 0.19758826093273282, "grad_norm": 1.1901736838832124, "learning_rate": 1.8280016879649155e-05, "loss": 0.7408, "step": 10880 }, { "epoch": 0.19776986779020775, "grad_norm": 1.0518418345644698, "learning_rate": 1.827678403446351e-05, "loss": 0.7531, "step": 10890 }, { "epoch": 0.1979514746476827, "grad_norm": 1.0417344263740462, "learning_rate": 1.827354844036363e-05, "loss": 0.755, "step": 10900 }, { "epoch": 0.19813308150515763, "grad_norm": 1.0189777655193657, "learning_rate": 1.8270310098424128e-05, "loss": 0.746, "step": 10910 }, { "epoch": 0.19831468836263258, "grad_norm": 1.0193127593397924, "learning_rate": 1.826706900972054e-05, "loss": 0.7611, "step": 10920 }, { "epoch": 0.1984962952201075, "grad_norm": 1.0083419553979716, "learning_rate": 1.8263825175329296e-05, "loss": 0.7333, "step": 10930 }, { "epoch": 0.19867790207758246, "grad_norm": 1.0100879363888817, "learning_rate": 1.8260578596327762e-05, "loss": 0.7418, "step": 10940 }, { "epoch": 0.19885950893505738, "grad_norm": 1.0067179501283257, "learning_rate": 1.8257329273794196e-05, "loss": 0.7445, "step": 10950 }, { "epoch": 0.19904111579253234, "grad_norm": 1.025702991933531, "learning_rate": 1.8254077208807776e-05, "loss": 0.7464, "step": 10960 }, { "epoch": 0.19922272265000726, "grad_norm": 0.9705325678304705, "learning_rate": 1.8250822402448587e-05, "loss": 0.7473, "step": 10970 }, { "epoch": 0.19940432950748221, "grad_norm": 1.0641555571704517, "learning_rate": 1.824756485579763e-05, "loss": 0.7478, "step": 10980 }, { "epoch": 0.19958593636495714, "grad_norm": 1.0203425124158825, "learning_rate": 1.8244304569936813e-05, "loss": 0.7258, "step": 10990 }, { "epoch": 0.1997675432224321, "grad_norm": 0.9961728880844775, "learning_rate": 1.824104154594895e-05, "loss": 0.7469, "step": 11000 }, { "epoch": 0.19994915007990702, "grad_norm": 0.9634166148946457, "learning_rate": 1.823777578491777e-05, "loss": 0.7509, "step": 11010 }, { "epoch": 0.20013075693738194, "grad_norm": 1.0288125482358366, "learning_rate": 1.823450728792791e-05, "loss": 0.7628, "step": 11020 }, { "epoch": 0.2003123637948569, "grad_norm": 0.9823611701764198, "learning_rate": 1.8231236056064915e-05, "loss": 0.7382, "step": 11030 }, { "epoch": 0.20049397065233182, "grad_norm": 1.009141944870812, "learning_rate": 1.8227962090415233e-05, "loss": 0.7398, "step": 11040 }, { "epoch": 0.20067557750980677, "grad_norm": 0.9957720993122982, "learning_rate": 1.822468539206623e-05, "loss": 0.7603, "step": 11050 }, { "epoch": 0.2008571843672817, "grad_norm": 0.9871719530724702, "learning_rate": 1.822140596210617e-05, "loss": 0.7429, "step": 11060 }, { "epoch": 0.20103879122475665, "grad_norm": 1.0042722826120285, "learning_rate": 1.821812380162423e-05, "loss": 0.7312, "step": 11070 }, { "epoch": 0.20122039808223158, "grad_norm": 1.063081325188519, "learning_rate": 1.8214838911710497e-05, "loss": 0.7468, "step": 11080 }, { "epoch": 0.20140200493970653, "grad_norm": 1.0206919533838001, "learning_rate": 1.8211551293455953e-05, "loss": 0.7579, "step": 11090 }, { "epoch": 0.20158361179718146, "grad_norm": 1.0528605987769406, "learning_rate": 1.82082609479525e-05, "loss": 0.7472, "step": 11100 }, { "epoch": 0.2017652186546564, "grad_norm": 1.0012277878276723, "learning_rate": 1.8204967876292934e-05, "loss": 0.7297, "step": 11110 }, { "epoch": 0.20194682551213133, "grad_norm": 1.0041766642354173, "learning_rate": 1.8201672079570965e-05, "loss": 0.7403, "step": 11120 }, { "epoch": 0.2021284323696063, "grad_norm": 1.0191136448048637, "learning_rate": 1.8198373558881203e-05, "loss": 0.7538, "step": 11130 }, { "epoch": 0.2023100392270812, "grad_norm": 1.0428315034531608, "learning_rate": 1.8195072315319156e-05, "loss": 0.7664, "step": 11140 }, { "epoch": 0.20249164608455616, "grad_norm": 1.0005252762184806, "learning_rate": 1.819176834998126e-05, "loss": 0.7342, "step": 11150 }, { "epoch": 0.2026732529420311, "grad_norm": 1.0469546274825918, "learning_rate": 1.818846166396483e-05, "loss": 0.7544, "step": 11160 }, { "epoch": 0.20285485979950604, "grad_norm": 0.9415535880293063, "learning_rate": 1.8185152258368094e-05, "loss": 0.7347, "step": 11170 }, { "epoch": 0.20303646665698097, "grad_norm": 0.9960722261050456, "learning_rate": 1.8181840134290184e-05, "loss": 0.7587, "step": 11180 }, { "epoch": 0.2032180735144559, "grad_norm": 0.9778573513396849, "learning_rate": 1.817852529283114e-05, "loss": 0.7394, "step": 11190 }, { "epoch": 0.20339968037193085, "grad_norm": 0.9740131144234779, "learning_rate": 1.817520773509189e-05, "loss": 0.7471, "step": 11200 }, { "epoch": 0.20358128722940577, "grad_norm": 0.9933593793731396, "learning_rate": 1.8171887462174277e-05, "loss": 0.747, "step": 11210 }, { "epoch": 0.20376289408688072, "grad_norm": 0.9774101858664334, "learning_rate": 1.816856447518104e-05, "loss": 0.7417, "step": 11220 }, { "epoch": 0.20394450094435565, "grad_norm": 1.0050868544142104, "learning_rate": 1.8165238775215826e-05, "loss": 0.7465, "step": 11230 }, { "epoch": 0.2041261078018306, "grad_norm": 1.016121260958824, "learning_rate": 1.816191036338317e-05, "loss": 0.7434, "step": 11240 }, { "epoch": 0.20430771465930553, "grad_norm": 0.9717088877518872, "learning_rate": 1.815857924078852e-05, "loss": 0.7447, "step": 11250 }, { "epoch": 0.20448932151678048, "grad_norm": 1.0002562301894413, "learning_rate": 1.8155245408538222e-05, "loss": 0.7475, "step": 11260 }, { "epoch": 0.2046709283742554, "grad_norm": 1.0234936306818554, "learning_rate": 1.8151908867739515e-05, "loss": 0.7399, "step": 11270 }, { "epoch": 0.20485253523173036, "grad_norm": 1.0319644016971756, "learning_rate": 1.8148569619500548e-05, "loss": 0.7497, "step": 11280 }, { "epoch": 0.20503414208920528, "grad_norm": 0.9969747169945622, "learning_rate": 1.8145227664930358e-05, "loss": 0.7408, "step": 11290 }, { "epoch": 0.20521574894668024, "grad_norm": 1.0388701818743595, "learning_rate": 1.814188300513889e-05, "loss": 0.7493, "step": 11300 }, { "epoch": 0.20539735580415516, "grad_norm": 1.009562851119616, "learning_rate": 1.8138535641236984e-05, "loss": 0.7434, "step": 11310 }, { "epoch": 0.20557896266163012, "grad_norm": 0.9883387108812828, "learning_rate": 1.813518557433638e-05, "loss": 0.7593, "step": 11320 }, { "epoch": 0.20576056951910504, "grad_norm": 0.9929646774155636, "learning_rate": 1.8131832805549708e-05, "loss": 0.7444, "step": 11330 }, { "epoch": 0.20594217637657997, "grad_norm": 1.0239829907732731, "learning_rate": 1.8128477335990507e-05, "loss": 0.7482, "step": 11340 }, { "epoch": 0.20612378323405492, "grad_norm": 1.0030164661441938, "learning_rate": 1.81251191667732e-05, "loss": 0.7411, "step": 11350 }, { "epoch": 0.20630539009152984, "grad_norm": 0.9814688017703691, "learning_rate": 1.8121758299013122e-05, "loss": 0.7474, "step": 11360 }, { "epoch": 0.2064869969490048, "grad_norm": 1.039871379309786, "learning_rate": 1.8118394733826492e-05, "loss": 0.7325, "step": 11370 }, { "epoch": 0.20666860380647972, "grad_norm": 0.9772350183341051, "learning_rate": 1.8115028472330432e-05, "loss": 0.7275, "step": 11380 }, { "epoch": 0.20685021066395468, "grad_norm": 0.9941964728600324, "learning_rate": 1.811165951564295e-05, "loss": 0.7251, "step": 11390 }, { "epoch": 0.2070318175214296, "grad_norm": 0.9739369369568949, "learning_rate": 1.8108287864882962e-05, "loss": 0.7544, "step": 11400 }, { "epoch": 0.20721342437890455, "grad_norm": 0.983974141336293, "learning_rate": 1.810491352117027e-05, "loss": 0.7399, "step": 11410 }, { "epoch": 0.20739503123637948, "grad_norm": 1.027302734392156, "learning_rate": 1.8101536485625576e-05, "loss": 0.7391, "step": 11420 }, { "epoch": 0.20757663809385443, "grad_norm": 1.008148910540082, "learning_rate": 1.8098156759370464e-05, "loss": 0.7559, "step": 11430 }, { "epoch": 0.20775824495132936, "grad_norm": 1.002884695119504, "learning_rate": 1.809477434352743e-05, "loss": 0.7466, "step": 11440 }, { "epoch": 0.2079398518088043, "grad_norm": 1.0102502300067304, "learning_rate": 1.809138923921985e-05, "loss": 0.7426, "step": 11450 }, { "epoch": 0.20812145866627924, "grad_norm": 0.9923264749834528, "learning_rate": 1.808800144757199e-05, "loss": 0.7533, "step": 11460 }, { "epoch": 0.2083030655237542, "grad_norm": 1.0307901352469642, "learning_rate": 1.8084610969709024e-05, "loss": 0.7636, "step": 11470 }, { "epoch": 0.2084846723812291, "grad_norm": 0.915188784605276, "learning_rate": 1.808121780675701e-05, "loss": 0.7302, "step": 11480 }, { "epoch": 0.20866627923870407, "grad_norm": 1.0101765874717943, "learning_rate": 1.8077821959842888e-05, "loss": 0.7422, "step": 11490 }, { "epoch": 0.208847886096179, "grad_norm": 0.9803214499112723, "learning_rate": 1.807442343009451e-05, "loss": 0.7471, "step": 11500 }, { "epoch": 0.20902949295365392, "grad_norm": 2.340939591144724, "learning_rate": 1.80710222186406e-05, "loss": 0.7453, "step": 11510 }, { "epoch": 0.20921109981112887, "grad_norm": 1.0226496531991114, "learning_rate": 1.8067618326610777e-05, "loss": 0.7407, "step": 11520 }, { "epoch": 0.2093927066686038, "grad_norm": 1.0300487255536164, "learning_rate": 1.8064211755135567e-05, "loss": 0.7286, "step": 11530 }, { "epoch": 0.20957431352607875, "grad_norm": 0.947643352401751, "learning_rate": 1.8060802505346355e-05, "loss": 0.7495, "step": 11540 }, { "epoch": 0.20975592038355367, "grad_norm": 1.2512488157715806, "learning_rate": 1.8057390578375445e-05, "loss": 0.7483, "step": 11550 }, { "epoch": 0.20993752724102863, "grad_norm": 0.9992239237792963, "learning_rate": 1.8053975975356015e-05, "loss": 0.7475, "step": 11560 }, { "epoch": 0.21011913409850355, "grad_norm": 1.0291233879918589, "learning_rate": 1.8050558697422132e-05, "loss": 0.7351, "step": 11570 }, { "epoch": 0.2103007409559785, "grad_norm": 1.0616158223049157, "learning_rate": 1.8047138745708758e-05, "loss": 0.7263, "step": 11580 }, { "epoch": 0.21048234781345343, "grad_norm": 0.9782235415425596, "learning_rate": 1.804371612135174e-05, "loss": 0.7411, "step": 11590 }, { "epoch": 0.21066395467092838, "grad_norm": 0.9571063118624981, "learning_rate": 1.8040290825487807e-05, "loss": 0.7368, "step": 11600 }, { "epoch": 0.2108455615284033, "grad_norm": 1.04451810377295, "learning_rate": 1.8036862859254582e-05, "loss": 0.7419, "step": 11610 }, { "epoch": 0.21102716838587826, "grad_norm": 0.9867469271995285, "learning_rate": 1.8033432223790574e-05, "loss": 0.7399, "step": 11620 }, { "epoch": 0.2112087752433532, "grad_norm": 0.954637196387081, "learning_rate": 1.8029998920235177e-05, "loss": 0.7275, "step": 11630 }, { "epoch": 0.21139038210082814, "grad_norm": 0.9540511702744541, "learning_rate": 1.8026562949728676e-05, "loss": 0.7379, "step": 11640 }, { "epoch": 0.21157198895830306, "grad_norm": 1.0168844810020086, "learning_rate": 1.802312431341223e-05, "loss": 0.7414, "step": 11650 }, { "epoch": 0.21175359581577802, "grad_norm": 1.001300399280615, "learning_rate": 1.8019683012427894e-05, "loss": 0.7469, "step": 11660 }, { "epoch": 0.21193520267325294, "grad_norm": 1.0121959043742152, "learning_rate": 1.8016239047918604e-05, "loss": 0.7293, "step": 11670 }, { "epoch": 0.21211680953072787, "grad_norm": 0.9784382310649361, "learning_rate": 1.8012792421028185e-05, "loss": 0.7346, "step": 11680 }, { "epoch": 0.21229841638820282, "grad_norm": 0.9974489044771465, "learning_rate": 1.8009343132901338e-05, "loss": 0.7387, "step": 11690 }, { "epoch": 0.21248002324567775, "grad_norm": 0.97830003533035, "learning_rate": 1.8005891184683657e-05, "loss": 0.7446, "step": 11700 }, { "epoch": 0.2126616301031527, "grad_norm": 0.9910212539978481, "learning_rate": 1.8002436577521613e-05, "loss": 0.7393, "step": 11710 }, { "epoch": 0.21284323696062762, "grad_norm": 0.9725856321366916, "learning_rate": 1.7998979312562558e-05, "loss": 0.7333, "step": 11720 }, { "epoch": 0.21302484381810258, "grad_norm": 0.9730205125883448, "learning_rate": 1.799551939095474e-05, "loss": 0.7381, "step": 11730 }, { "epoch": 0.2132064506755775, "grad_norm": 0.9967003628191956, "learning_rate": 1.799205681384727e-05, "loss": 0.7349, "step": 11740 }, { "epoch": 0.21338805753305246, "grad_norm": 0.9878530222962445, "learning_rate": 1.798859158239016e-05, "loss": 0.736, "step": 11750 }, { "epoch": 0.21356966439052738, "grad_norm": 0.9928702623548519, "learning_rate": 1.798512369773429e-05, "loss": 0.7458, "step": 11760 }, { "epoch": 0.21375127124800233, "grad_norm": 0.9897685217396583, "learning_rate": 1.7981653161031425e-05, "loss": 0.7499, "step": 11770 }, { "epoch": 0.21393287810547726, "grad_norm": 0.9751609171468089, "learning_rate": 1.7978179973434213e-05, "loss": 0.7291, "step": 11780 }, { "epoch": 0.2141144849629522, "grad_norm": 0.9515618871271774, "learning_rate": 1.7974704136096185e-05, "loss": 0.7298, "step": 11790 }, { "epoch": 0.21429609182042714, "grad_norm": 0.9913217179465736, "learning_rate": 1.797122565017174e-05, "loss": 0.7281, "step": 11800 }, { "epoch": 0.2144776986779021, "grad_norm": 0.985245345080443, "learning_rate": 1.796774451681617e-05, "loss": 0.7353, "step": 11810 }, { "epoch": 0.21465930553537702, "grad_norm": 0.9627940913051314, "learning_rate": 1.7964260737185643e-05, "loss": 0.7481, "step": 11820 }, { "epoch": 0.21484091239285194, "grad_norm": 1.0412747294747828, "learning_rate": 1.7960774312437198e-05, "loss": 0.7349, "step": 11830 }, { "epoch": 0.2150225192503269, "grad_norm": 1.034650269372385, "learning_rate": 1.7957285243728768e-05, "loss": 0.7476, "step": 11840 }, { "epoch": 0.21520412610780182, "grad_norm": 0.9715848758094864, "learning_rate": 1.7953793532219144e-05, "loss": 0.7476, "step": 11850 }, { "epoch": 0.21538573296527677, "grad_norm": 1.049754101694285, "learning_rate": 1.795029917906801e-05, "loss": 0.7251, "step": 11860 }, { "epoch": 0.2155673398227517, "grad_norm": 1.0005839564420844, "learning_rate": 1.7946802185435923e-05, "loss": 0.7414, "step": 11870 }, { "epoch": 0.21574894668022665, "grad_norm": 0.9949608487415084, "learning_rate": 1.7943302552484318e-05, "loss": 0.737, "step": 11880 }, { "epoch": 0.21593055353770158, "grad_norm": 0.9995903274116031, "learning_rate": 1.7939800281375503e-05, "loss": 0.7462, "step": 11890 }, { "epoch": 0.21611216039517653, "grad_norm": 1.00284506495395, "learning_rate": 1.793629537327266e-05, "loss": 0.7415, "step": 11900 }, { "epoch": 0.21629376725265145, "grad_norm": 1.0117496352103392, "learning_rate": 1.7932787829339862e-05, "loss": 0.7498, "step": 11910 }, { "epoch": 0.2164753741101264, "grad_norm": 1.05324397752225, "learning_rate": 1.792927765074204e-05, "loss": 0.742, "step": 11920 }, { "epoch": 0.21665698096760133, "grad_norm": 0.994746827610896, "learning_rate": 1.7925764838645006e-05, "loss": 0.742, "step": 11930 }, { "epoch": 0.21683858782507628, "grad_norm": 1.1338134113715295, "learning_rate": 1.792224939421545e-05, "loss": 0.7452, "step": 11940 }, { "epoch": 0.2170201946825512, "grad_norm": 1.0147226643495406, "learning_rate": 1.7918731318620933e-05, "loss": 0.7228, "step": 11950 }, { "epoch": 0.21720180154002616, "grad_norm": 0.9629800850434304, "learning_rate": 1.791521061302989e-05, "loss": 0.7399, "step": 11960 }, { "epoch": 0.2173834083975011, "grad_norm": 1.0372811699383575, "learning_rate": 1.7911687278611624e-05, "loss": 0.7365, "step": 11970 }, { "epoch": 0.21756501525497604, "grad_norm": 0.9803285018449727, "learning_rate": 1.790816131653633e-05, "loss": 0.737, "step": 11980 }, { "epoch": 0.21774662211245097, "grad_norm": 1.0412484462616434, "learning_rate": 1.7904632727975052e-05, "loss": 0.7456, "step": 11990 }, { "epoch": 0.2179282289699259, "grad_norm": 1.0176122664481497, "learning_rate": 1.7901101514099725e-05, "loss": 0.7486, "step": 12000 }, { "epoch": 0.21810983582740084, "grad_norm": 0.9937061858560856, "learning_rate": 1.789756767608314e-05, "loss": 0.753, "step": 12010 }, { "epoch": 0.21829144268487577, "grad_norm": 1.0574647310579577, "learning_rate": 1.7894031215098972e-05, "loss": 0.737, "step": 12020 }, { "epoch": 0.21847304954235072, "grad_norm": 1.016138840119923, "learning_rate": 1.7890492132321765e-05, "loss": 0.7401, "step": 12030 }, { "epoch": 0.21865465639982565, "grad_norm": 0.9643926427579693, "learning_rate": 1.7886950428926924e-05, "loss": 0.7335, "step": 12040 }, { "epoch": 0.2188362632573006, "grad_norm": 0.9853146109501515, "learning_rate": 1.788340610609074e-05, "loss": 0.7261, "step": 12050 }, { "epoch": 0.21901787011477553, "grad_norm": 0.9756843289400804, "learning_rate": 1.787985916499036e-05, "loss": 0.7427, "step": 12060 }, { "epoch": 0.21919947697225048, "grad_norm": 1.0539336934378167, "learning_rate": 1.7876309606803807e-05, "loss": 0.7489, "step": 12070 }, { "epoch": 0.2193810838297254, "grad_norm": 0.992054056432129, "learning_rate": 1.7872757432709975e-05, "loss": 0.7316, "step": 12080 }, { "epoch": 0.21956269068720036, "grad_norm": 1.0275295616969422, "learning_rate": 1.786920264388863e-05, "loss": 0.7196, "step": 12090 }, { "epoch": 0.21974429754467528, "grad_norm": 0.9989324768644946, "learning_rate": 1.7865645241520386e-05, "loss": 0.7281, "step": 12100 }, { "epoch": 0.21992590440215024, "grad_norm": 0.9671677218721662, "learning_rate": 1.7862085226786748e-05, "loss": 0.752, "step": 12110 }, { "epoch": 0.22010751125962516, "grad_norm": 0.9584965380660114, "learning_rate": 1.7858522600870088e-05, "loss": 0.7213, "step": 12120 }, { "epoch": 0.2202891181171001, "grad_norm": 0.9815472199083913, "learning_rate": 1.7854957364953625e-05, "loss": 0.7406, "step": 12130 }, { "epoch": 0.22047072497457504, "grad_norm": 1.0781901916227388, "learning_rate": 1.7851389520221463e-05, "loss": 0.7341, "step": 12140 }, { "epoch": 0.22065233183205, "grad_norm": 0.9741371463861682, "learning_rate": 1.7847819067858568e-05, "loss": 0.7458, "step": 12150 }, { "epoch": 0.22083393868952492, "grad_norm": 0.9540485713603553, "learning_rate": 1.7844246009050767e-05, "loss": 0.7411, "step": 12160 }, { "epoch": 0.22101554554699984, "grad_norm": 0.9850432997300029, "learning_rate": 1.7840670344984764e-05, "loss": 0.7577, "step": 12170 }, { "epoch": 0.2211971524044748, "grad_norm": 0.989404989443182, "learning_rate": 1.783709207684811e-05, "loss": 0.7418, "step": 12180 }, { "epoch": 0.22137875926194972, "grad_norm": 1.0557098428302896, "learning_rate": 1.7833511205829245e-05, "loss": 0.7485, "step": 12190 }, { "epoch": 0.22156036611942467, "grad_norm": 0.9835623742275263, "learning_rate": 1.7829927733117452e-05, "loss": 0.7435, "step": 12200 }, { "epoch": 0.2217419729768996, "grad_norm": 0.9908988182504246, "learning_rate": 1.7826341659902886e-05, "loss": 0.7198, "step": 12210 }, { "epoch": 0.22192357983437455, "grad_norm": 1.0160979361666105, "learning_rate": 1.782275298737657e-05, "loss": 0.7419, "step": 12220 }, { "epoch": 0.22210518669184948, "grad_norm": 0.9823792859562889, "learning_rate": 1.781916171673038e-05, "loss": 0.7416, "step": 12230 }, { "epoch": 0.22228679354932443, "grad_norm": 1.0180191978901545, "learning_rate": 1.7815567849157068e-05, "loss": 0.7437, "step": 12240 }, { "epoch": 0.22246840040679935, "grad_norm": 1.0058718122898818, "learning_rate": 1.7811971385850242e-05, "loss": 0.733, "step": 12250 }, { "epoch": 0.2226500072642743, "grad_norm": 1.1154191534631992, "learning_rate": 1.7808372328004368e-05, "loss": 0.7401, "step": 12260 }, { "epoch": 0.22283161412174923, "grad_norm": 1.0111510110242707, "learning_rate": 1.780477067681478e-05, "loss": 0.7274, "step": 12270 }, { "epoch": 0.22301322097922419, "grad_norm": 1.0293305106332138, "learning_rate": 1.7801166433477668e-05, "loss": 0.7543, "step": 12280 }, { "epoch": 0.2231948278366991, "grad_norm": 1.0135855869752057, "learning_rate": 1.7797559599190085e-05, "loss": 0.7322, "step": 12290 }, { "epoch": 0.22337643469417406, "grad_norm": 0.9697897190852408, "learning_rate": 1.7793950175149953e-05, "loss": 0.7432, "step": 12300 }, { "epoch": 0.223558041551649, "grad_norm": 1.00501453277791, "learning_rate": 1.7790338162556036e-05, "loss": 0.7456, "step": 12310 }, { "epoch": 0.22373964840912394, "grad_norm": 0.9801798874798592, "learning_rate": 1.778672356260798e-05, "loss": 0.7231, "step": 12320 }, { "epoch": 0.22392125526659887, "grad_norm": 0.9418805075022272, "learning_rate": 1.778310637650627e-05, "loss": 0.7292, "step": 12330 }, { "epoch": 0.2241028621240738, "grad_norm": 0.9770739377574243, "learning_rate": 1.7779486605452256e-05, "loss": 0.7436, "step": 12340 }, { "epoch": 0.22428446898154875, "grad_norm": 0.9525854963463402, "learning_rate": 1.7775864250648157e-05, "loss": 0.7426, "step": 12350 }, { "epoch": 0.22446607583902367, "grad_norm": 0.9643419318473635, "learning_rate": 1.777223931329704e-05, "loss": 0.7441, "step": 12360 }, { "epoch": 0.22464768269649862, "grad_norm": 0.9513416904203116, "learning_rate": 1.7768611794602826e-05, "loss": 0.7361, "step": 12370 }, { "epoch": 0.22482928955397355, "grad_norm": 0.976906472552897, "learning_rate": 1.7764981695770303e-05, "loss": 0.743, "step": 12380 }, { "epoch": 0.2250108964114485, "grad_norm": 0.9825424829385065, "learning_rate": 1.7761349018005115e-05, "loss": 0.7356, "step": 12390 }, { "epoch": 0.22519250326892343, "grad_norm": 0.9464964796236836, "learning_rate": 1.7757713762513757e-05, "loss": 0.7404, "step": 12400 }, { "epoch": 0.22537411012639838, "grad_norm": 1.0253264828429027, "learning_rate": 1.7754075930503584e-05, "loss": 0.7432, "step": 12410 }, { "epoch": 0.2255557169838733, "grad_norm": 0.9460250319484224, "learning_rate": 1.77504355231828e-05, "loss": 0.7315, "step": 12420 }, { "epoch": 0.22573732384134826, "grad_norm": 1.093502585339729, "learning_rate": 1.7746792541760476e-05, "loss": 0.7423, "step": 12430 }, { "epoch": 0.22591893069882318, "grad_norm": 0.9869407536233331, "learning_rate": 1.774314698744653e-05, "loss": 0.7466, "step": 12440 }, { "epoch": 0.22610053755629814, "grad_norm": 1.0095460800542155, "learning_rate": 1.773949886145173e-05, "loss": 0.72, "step": 12450 }, { "epoch": 0.22628214441377306, "grad_norm": 0.9912132585549998, "learning_rate": 1.7735848164987715e-05, "loss": 0.719, "step": 12460 }, { "epoch": 0.22646375127124801, "grad_norm": 0.9573532913051831, "learning_rate": 1.7732194899266958e-05, "loss": 0.7278, "step": 12470 }, { "epoch": 0.22664535812872294, "grad_norm": 0.9974742818829733, "learning_rate": 1.7728539065502804e-05, "loss": 0.7389, "step": 12480 }, { "epoch": 0.22682696498619787, "grad_norm": 1.1450712366656384, "learning_rate": 1.7724880664909428e-05, "loss": 0.7472, "step": 12490 }, { "epoch": 0.22700857184367282, "grad_norm": 0.9385552688102312, "learning_rate": 1.772121969870188e-05, "loss": 0.7467, "step": 12500 }, { "epoch": 0.22719017870114774, "grad_norm": 0.9480012582199079, "learning_rate": 1.7717556168096054e-05, "loss": 0.7354, "step": 12510 }, { "epoch": 0.2273717855586227, "grad_norm": 0.9851474010023146, "learning_rate": 1.771389007430869e-05, "loss": 0.7479, "step": 12520 }, { "epoch": 0.22755339241609762, "grad_norm": 0.990392831964923, "learning_rate": 1.771022141855738e-05, "loss": 0.7312, "step": 12530 }, { "epoch": 0.22773499927357257, "grad_norm": 0.9705009750692976, "learning_rate": 1.770655020206058e-05, "loss": 0.7329, "step": 12540 }, { "epoch": 0.2279166061310475, "grad_norm": 1.012531136449708, "learning_rate": 1.770287642603758e-05, "loss": 0.7564, "step": 12550 }, { "epoch": 0.22809821298852245, "grad_norm": 0.9363910281893277, "learning_rate": 1.7699200091708533e-05, "loss": 0.7296, "step": 12560 }, { "epoch": 0.22827981984599738, "grad_norm": 0.9472261950785907, "learning_rate": 1.769552120029443e-05, "loss": 0.7336, "step": 12570 }, { "epoch": 0.22846142670347233, "grad_norm": 0.9414876141173063, "learning_rate": 1.7691839753017124e-05, "loss": 0.7321, "step": 12580 }, { "epoch": 0.22864303356094726, "grad_norm": 0.964124624924022, "learning_rate": 1.76881557510993e-05, "loss": 0.7305, "step": 12590 }, { "epoch": 0.2288246404184222, "grad_norm": 1.029366690536077, "learning_rate": 1.768446919576451e-05, "loss": 0.7329, "step": 12600 }, { "epoch": 0.22900624727589713, "grad_norm": 0.9888463098658361, "learning_rate": 1.7680780088237147e-05, "loss": 0.7313, "step": 12610 }, { "epoch": 0.2291878541333721, "grad_norm": 1.0036932488680157, "learning_rate": 1.7677088429742445e-05, "loss": 0.7304, "step": 12620 }, { "epoch": 0.229369460990847, "grad_norm": 0.9652087203212871, "learning_rate": 1.767339422150649e-05, "loss": 0.73, "step": 12630 }, { "epoch": 0.22955106784832197, "grad_norm": 0.9617411582364336, "learning_rate": 1.766969746475622e-05, "loss": 0.7434, "step": 12640 }, { "epoch": 0.2297326747057969, "grad_norm": 0.9811370123583206, "learning_rate": 1.7665998160719407e-05, "loss": 0.7343, "step": 12650 }, { "epoch": 0.22991428156327182, "grad_norm": 0.9809837365599559, "learning_rate": 1.7662296310624682e-05, "loss": 0.7504, "step": 12660 }, { "epoch": 0.23009588842074677, "grad_norm": 0.9536811583432702, "learning_rate": 1.7658591915701518e-05, "loss": 0.7294, "step": 12670 }, { "epoch": 0.2302774952782217, "grad_norm": 0.9835677095049841, "learning_rate": 1.765488497718023e-05, "loss": 0.7311, "step": 12680 }, { "epoch": 0.23045910213569665, "grad_norm": 0.982865052949262, "learning_rate": 1.7651175496291975e-05, "loss": 0.7243, "step": 12690 }, { "epoch": 0.23064070899317157, "grad_norm": 0.9427039523130626, "learning_rate": 1.7647463474268766e-05, "loss": 0.738, "step": 12700 }, { "epoch": 0.23082231585064653, "grad_norm": 1.000644736329769, "learning_rate": 1.764374891234345e-05, "loss": 0.7589, "step": 12710 }, { "epoch": 0.23100392270812145, "grad_norm": 0.9453519269843419, "learning_rate": 1.7640031811749714e-05, "loss": 0.7244, "step": 12720 }, { "epoch": 0.2311855295655964, "grad_norm": 0.9835534753156948, "learning_rate": 1.7636312173722102e-05, "loss": 0.7371, "step": 12730 }, { "epoch": 0.23136713642307133, "grad_norm": 1.0235468897058668, "learning_rate": 1.763258999949599e-05, "loss": 0.7352, "step": 12740 }, { "epoch": 0.23154874328054628, "grad_norm": 1.0124070078727005, "learning_rate": 1.76288652903076e-05, "loss": 0.7454, "step": 12750 }, { "epoch": 0.2317303501380212, "grad_norm": 1.9830634815515915, "learning_rate": 1.7625138047393996e-05, "loss": 0.737, "step": 12760 }, { "epoch": 0.23191195699549616, "grad_norm": 0.9553428895870097, "learning_rate": 1.762140827199308e-05, "loss": 0.7337, "step": 12770 }, { "epoch": 0.23209356385297109, "grad_norm": 1.0353857391315648, "learning_rate": 1.7617675965343603e-05, "loss": 0.7237, "step": 12780 }, { "epoch": 0.23227517071044604, "grad_norm": 1.027348815386949, "learning_rate": 1.7613941128685145e-05, "loss": 0.7342, "step": 12790 }, { "epoch": 0.23245677756792096, "grad_norm": 0.9869938433197495, "learning_rate": 1.7610203763258137e-05, "loss": 0.727, "step": 12800 }, { "epoch": 0.23263838442539592, "grad_norm": 1.0082561446807128, "learning_rate": 1.7606463870303846e-05, "loss": 0.739, "step": 12810 }, { "epoch": 0.23281999128287084, "grad_norm": 1.0016125556157454, "learning_rate": 1.760272145106438e-05, "loss": 0.7314, "step": 12820 }, { "epoch": 0.23300159814034577, "grad_norm": 0.9937162462057212, "learning_rate": 1.759897650678268e-05, "loss": 0.7319, "step": 12830 }, { "epoch": 0.23318320499782072, "grad_norm": 0.9592243673526217, "learning_rate": 1.759522903870253e-05, "loss": 0.7366, "step": 12840 }, { "epoch": 0.23336481185529565, "grad_norm": 0.9579670877027412, "learning_rate": 1.759147904806856e-05, "loss": 0.7289, "step": 12850 }, { "epoch": 0.2335464187127706, "grad_norm": 1.020985595330596, "learning_rate": 1.7587726536126216e-05, "loss": 0.7487, "step": 12860 }, { "epoch": 0.23372802557024552, "grad_norm": 0.95702618913124, "learning_rate": 1.7583971504121806e-05, "loss": 0.7277, "step": 12870 }, { "epoch": 0.23390963242772048, "grad_norm": 1.0118770913841602, "learning_rate": 1.7580213953302464e-05, "loss": 0.739, "step": 12880 }, { "epoch": 0.2340912392851954, "grad_norm": 0.9780983627403484, "learning_rate": 1.7576453884916155e-05, "loss": 0.7315, "step": 12890 }, { "epoch": 0.23427284614267035, "grad_norm": 0.9854921363265302, "learning_rate": 1.757269130021169e-05, "loss": 0.7418, "step": 12900 }, { "epoch": 0.23445445300014528, "grad_norm": 1.014811677149665, "learning_rate": 1.756892620043871e-05, "loss": 0.7403, "step": 12910 }, { "epoch": 0.23463605985762023, "grad_norm": 0.9687560074602714, "learning_rate": 1.7565158586847696e-05, "loss": 0.7404, "step": 12920 }, { "epoch": 0.23481766671509516, "grad_norm": 0.9223475320620852, "learning_rate": 1.7561388460689956e-05, "loss": 0.743, "step": 12930 }, { "epoch": 0.2349992735725701, "grad_norm": 0.9817171981709704, "learning_rate": 1.755761582321764e-05, "loss": 0.7502, "step": 12940 }, { "epoch": 0.23518088043004504, "grad_norm": 0.9634767419820544, "learning_rate": 1.7553840675683732e-05, "loss": 0.7261, "step": 12950 }, { "epoch": 0.23536248728752, "grad_norm": 0.9703930541441171, "learning_rate": 1.755006301934204e-05, "loss": 0.7379, "step": 12960 }, { "epoch": 0.23554409414499491, "grad_norm": 0.9937177764149326, "learning_rate": 1.754628285544722e-05, "loss": 0.7329, "step": 12970 }, { "epoch": 0.23572570100246984, "grad_norm": 0.996952949412566, "learning_rate": 1.7542500185254752e-05, "loss": 0.7342, "step": 12980 }, { "epoch": 0.2359073078599448, "grad_norm": 1.0143171048251387, "learning_rate": 1.7538715010020945e-05, "loss": 0.7294, "step": 12990 }, { "epoch": 0.23608891471741972, "grad_norm": 0.9463551045585474, "learning_rate": 1.7534927331002947e-05, "loss": 0.7248, "step": 13000 }, { "epoch": 0.23627052157489467, "grad_norm": 0.9504887501990664, "learning_rate": 1.7531137149458737e-05, "loss": 0.7349, "step": 13010 }, { "epoch": 0.2364521284323696, "grad_norm": 0.9901012862177343, "learning_rate": 1.752734446664712e-05, "loss": 0.7418, "step": 13020 }, { "epoch": 0.23663373528984455, "grad_norm": 0.9545081349839905, "learning_rate": 1.7523549283827737e-05, "loss": 0.7416, "step": 13030 }, { "epoch": 0.23681534214731947, "grad_norm": 0.9625347719308123, "learning_rate": 1.7519751602261056e-05, "loss": 0.7325, "step": 13040 }, { "epoch": 0.23699694900479443, "grad_norm": 1.0157846314371843, "learning_rate": 1.751595142320838e-05, "loss": 0.7319, "step": 13050 }, { "epoch": 0.23717855586226935, "grad_norm": 2.334998762403898, "learning_rate": 1.751214874793183e-05, "loss": 0.7291, "step": 13060 }, { "epoch": 0.2373601627197443, "grad_norm": 0.960948402581985, "learning_rate": 1.750834357769437e-05, "loss": 0.7436, "step": 13070 }, { "epoch": 0.23754176957721923, "grad_norm": 1.0314431751113897, "learning_rate": 1.7504535913759784e-05, "loss": 0.7456, "step": 13080 }, { "epoch": 0.23772337643469418, "grad_norm": 0.9847002429362512, "learning_rate": 1.7500725757392692e-05, "loss": 0.7576, "step": 13090 }, { "epoch": 0.2379049832921691, "grad_norm": 0.9929026580978461, "learning_rate": 1.7496913109858527e-05, "loss": 0.7547, "step": 13100 }, { "epoch": 0.23808659014964406, "grad_norm": 0.9848425589616785, "learning_rate": 1.7493097972423563e-05, "loss": 0.7312, "step": 13110 }, { "epoch": 0.238268197007119, "grad_norm": 0.9797433388564785, "learning_rate": 1.74892803463549e-05, "loss": 0.7368, "step": 13120 }, { "epoch": 0.23844980386459394, "grad_norm": 1.0203353453799515, "learning_rate": 1.7485460232920455e-05, "loss": 0.7346, "step": 13130 }, { "epoch": 0.23863141072206887, "grad_norm": 0.9321562143227574, "learning_rate": 1.7481637633388985e-05, "loss": 0.7347, "step": 13140 }, { "epoch": 0.2388130175795438, "grad_norm": 0.9341288412956101, "learning_rate": 1.7477812549030053e-05, "loss": 0.7119, "step": 13150 }, { "epoch": 0.23899462443701874, "grad_norm": 0.9422357624462051, "learning_rate": 1.7473984981114074e-05, "loss": 0.7404, "step": 13160 }, { "epoch": 0.23917623129449367, "grad_norm": 1.0010780283433942, "learning_rate": 1.7470154930912264e-05, "loss": 0.7451, "step": 13170 }, { "epoch": 0.23935783815196862, "grad_norm": 0.9978008041658513, "learning_rate": 1.7466322399696673e-05, "loss": 0.7353, "step": 13180 }, { "epoch": 0.23953944500944355, "grad_norm": 0.9864050007919908, "learning_rate": 1.7462487388740176e-05, "loss": 0.7369, "step": 13190 }, { "epoch": 0.2397210518669185, "grad_norm": 0.9651141582931655, "learning_rate": 1.7458649899316473e-05, "loss": 0.7276, "step": 13200 }, { "epoch": 0.23990265872439342, "grad_norm": 0.9812662504575331, "learning_rate": 1.745480993270008e-05, "loss": 0.7332, "step": 13210 }, { "epoch": 0.24008426558186838, "grad_norm": 0.9609598211555421, "learning_rate": 1.745096749016634e-05, "loss": 0.7297, "step": 13220 }, { "epoch": 0.2402658724393433, "grad_norm": 0.9106741355149598, "learning_rate": 1.744712257299142e-05, "loss": 0.7329, "step": 13230 }, { "epoch": 0.24044747929681826, "grad_norm": 0.9746963655520531, "learning_rate": 1.744327518245231e-05, "loss": 0.7221, "step": 13240 }, { "epoch": 0.24062908615429318, "grad_norm": 0.9229985956533514, "learning_rate": 1.7439425319826813e-05, "loss": 0.7421, "step": 13250 }, { "epoch": 0.24081069301176813, "grad_norm": 0.9674743377417775, "learning_rate": 1.7435572986393563e-05, "loss": 0.7427, "step": 13260 }, { "epoch": 0.24099229986924306, "grad_norm": 0.991154418991932, "learning_rate": 1.7431718183432012e-05, "loss": 0.7398, "step": 13270 }, { "epoch": 0.241173906726718, "grad_norm": 0.9293979521083162, "learning_rate": 1.7427860912222423e-05, "loss": 0.7259, "step": 13280 }, { "epoch": 0.24135551358419294, "grad_norm": 0.9775488949774043, "learning_rate": 1.742400117404589e-05, "loss": 0.7159, "step": 13290 }, { "epoch": 0.2415371204416679, "grad_norm": 0.9873121500950294, "learning_rate": 1.7420138970184325e-05, "loss": 0.73, "step": 13300 }, { "epoch": 0.24171872729914282, "grad_norm": 1.0058293755497174, "learning_rate": 1.741627430192046e-05, "loss": 0.7327, "step": 13310 }, { "epoch": 0.24190033415661774, "grad_norm": 0.9884187947479979, "learning_rate": 1.741240717053783e-05, "loss": 0.739, "step": 13320 }, { "epoch": 0.2420819410140927, "grad_norm": 1.0041640158776821, "learning_rate": 1.7408537577320813e-05, "loss": 0.7367, "step": 13330 }, { "epoch": 0.24226354787156762, "grad_norm": 1.0129073430953834, "learning_rate": 1.740466552355458e-05, "loss": 0.7241, "step": 13340 }, { "epoch": 0.24244515472904257, "grad_norm": 0.95177025030414, "learning_rate": 1.7400791010525143e-05, "loss": 0.732, "step": 13350 }, { "epoch": 0.2426267615865175, "grad_norm": 0.9886541172889803, "learning_rate": 1.7396914039519306e-05, "loss": 0.736, "step": 13360 }, { "epoch": 0.24280836844399245, "grad_norm": 0.9945617155160847, "learning_rate": 1.739303461182471e-05, "loss": 0.7364, "step": 13370 }, { "epoch": 0.24298997530146738, "grad_norm": 1.0391904646288401, "learning_rate": 1.738915272872981e-05, "loss": 0.7365, "step": 13380 }, { "epoch": 0.24317158215894233, "grad_norm": 0.9882873471616409, "learning_rate": 1.7385268391523853e-05, "loss": 0.7296, "step": 13390 }, { "epoch": 0.24335318901641725, "grad_norm": 0.9838007011368282, "learning_rate": 1.7381381601496935e-05, "loss": 0.7176, "step": 13400 }, { "epoch": 0.2435347958738922, "grad_norm": 0.9785882757762291, "learning_rate": 1.7377492359939938e-05, "loss": 0.7384, "step": 13410 }, { "epoch": 0.24371640273136713, "grad_norm": 0.959296945849088, "learning_rate": 1.7373600668144582e-05, "loss": 0.733, "step": 13420 }, { "epoch": 0.24389800958884209, "grad_norm": 0.9905182999115162, "learning_rate": 1.736970652740338e-05, "loss": 0.7288, "step": 13430 }, { "epoch": 0.244079616446317, "grad_norm": 0.9471523929199593, "learning_rate": 1.7365809939009674e-05, "loss": 0.7321, "step": 13440 }, { "epoch": 0.24426122330379196, "grad_norm": 0.9998112064542433, "learning_rate": 1.7361910904257607e-05, "loss": 0.7471, "step": 13450 }, { "epoch": 0.2444428301612669, "grad_norm": 1.008577032535627, "learning_rate": 1.7358009424442142e-05, "loss": 0.7476, "step": 13460 }, { "epoch": 0.24462443701874184, "grad_norm": 0.993198497136496, "learning_rate": 1.7354105500859053e-05, "loss": 0.7404, "step": 13470 }, { "epoch": 0.24480604387621677, "grad_norm": 2.791109286588761, "learning_rate": 1.7350199134804927e-05, "loss": 0.7409, "step": 13480 }, { "epoch": 0.2449876507336917, "grad_norm": 0.9430259430341734, "learning_rate": 1.734629032757715e-05, "loss": 0.7273, "step": 13490 }, { "epoch": 0.24516925759116664, "grad_norm": 0.9729202440562644, "learning_rate": 1.7342379080473942e-05, "loss": 0.7424, "step": 13500 }, { "epoch": 0.24535086444864157, "grad_norm": 0.974055756341499, "learning_rate": 1.733846539479431e-05, "loss": 0.7244, "step": 13510 }, { "epoch": 0.24553247130611652, "grad_norm": 0.9465515856768484, "learning_rate": 1.7334549271838086e-05, "loss": 0.7202, "step": 13520 }, { "epoch": 0.24571407816359145, "grad_norm": 0.9447928426307591, "learning_rate": 1.7330630712905902e-05, "loss": 0.7197, "step": 13530 }, { "epoch": 0.2458956850210664, "grad_norm": 0.9532928927740207, "learning_rate": 1.732670971929921e-05, "loss": 0.7333, "step": 13540 }, { "epoch": 0.24607729187854133, "grad_norm": 1.00858465565278, "learning_rate": 1.7322786292320257e-05, "loss": 0.7361, "step": 13550 }, { "epoch": 0.24625889873601628, "grad_norm": 0.9824162139144581, "learning_rate": 1.7318860433272106e-05, "loss": 0.7361, "step": 13560 }, { "epoch": 0.2464405055934912, "grad_norm": 0.9300908730143345, "learning_rate": 1.7314932143458633e-05, "loss": 0.7473, "step": 13570 }, { "epoch": 0.24662211245096616, "grad_norm": 0.9685989792104063, "learning_rate": 1.7311001424184512e-05, "loss": 0.7275, "step": 13580 }, { "epoch": 0.24680371930844108, "grad_norm": 0.9794258979957698, "learning_rate": 1.7307068276755227e-05, "loss": 0.7412, "step": 13590 }, { "epoch": 0.24698532616591604, "grad_norm": 0.961404230505895, "learning_rate": 1.7303132702477062e-05, "loss": 0.7247, "step": 13600 }, { "epoch": 0.24716693302339096, "grad_norm": 0.9880610992457797, "learning_rate": 1.7299194702657126e-05, "loss": 0.7274, "step": 13610 }, { "epoch": 0.24734853988086591, "grad_norm": 0.9823923716739931, "learning_rate": 1.729525427860331e-05, "loss": 0.7274, "step": 13620 }, { "epoch": 0.24753014673834084, "grad_norm": 0.9717187888836687, "learning_rate": 1.729131143162433e-05, "loss": 0.7263, "step": 13630 }, { "epoch": 0.24771175359581576, "grad_norm": 0.9633011275252774, "learning_rate": 1.7287366163029692e-05, "loss": 0.7373, "step": 13640 }, { "epoch": 0.24789336045329072, "grad_norm": 0.9141638042292958, "learning_rate": 1.7283418474129718e-05, "loss": 0.7198, "step": 13650 }, { "epoch": 0.24807496731076564, "grad_norm": 0.956971487222847, "learning_rate": 1.727946836623552e-05, "loss": 0.7148, "step": 13660 }, { "epoch": 0.2482565741682406, "grad_norm": 0.9305743892776538, "learning_rate": 1.727551584065903e-05, "loss": 0.745, "step": 13670 }, { "epoch": 0.24843818102571552, "grad_norm": 0.9924196447422476, "learning_rate": 1.7271560898712968e-05, "loss": 0.7276, "step": 13680 }, { "epoch": 0.24861978788319047, "grad_norm": 0.9565438416270607, "learning_rate": 1.7267603541710864e-05, "loss": 0.7318, "step": 13690 }, { "epoch": 0.2488013947406654, "grad_norm": 0.969314028006916, "learning_rate": 1.7263643770967057e-05, "loss": 0.7369, "step": 13700 }, { "epoch": 0.24898300159814035, "grad_norm": 0.9830856936123841, "learning_rate": 1.725968158779667e-05, "loss": 0.7358, "step": 13710 }, { "epoch": 0.24916460845561528, "grad_norm": 0.9612195470282978, "learning_rate": 1.725571699351564e-05, "loss": 0.7184, "step": 13720 }, { "epoch": 0.24934621531309023, "grad_norm": 0.9287229325046709, "learning_rate": 1.7251749989440704e-05, "loss": 0.7188, "step": 13730 }, { "epoch": 0.24952782217056516, "grad_norm": 1.0127274815171066, "learning_rate": 1.7247780576889393e-05, "loss": 0.7293, "step": 13740 }, { "epoch": 0.2497094290280401, "grad_norm": 0.9821298434659489, "learning_rate": 1.7243808757180046e-05, "loss": 0.7361, "step": 13750 }, { "epoch": 0.24989103588551503, "grad_norm": 0.9644858671167663, "learning_rate": 1.7239834531631796e-05, "loss": 0.7354, "step": 13760 }, { "epoch": 0.25007264274298996, "grad_norm": 0.9437196856380647, "learning_rate": 1.7235857901564577e-05, "loss": 0.7257, "step": 13770 }, { "epoch": 0.25025424960046494, "grad_norm": 0.914266238191087, "learning_rate": 1.7231878868299122e-05, "loss": 0.7137, "step": 13780 }, { "epoch": 0.25043585645793986, "grad_norm": 0.9192855244577898, "learning_rate": 1.7227897433156962e-05, "loss": 0.7297, "step": 13790 }, { "epoch": 0.2506174633154148, "grad_norm": 1.0002411775382185, "learning_rate": 1.722391359746042e-05, "loss": 0.7297, "step": 13800 }, { "epoch": 0.2507990701728897, "grad_norm": 0.9353880883068891, "learning_rate": 1.7219927362532627e-05, "loss": 0.7137, "step": 13810 }, { "epoch": 0.25098067703036464, "grad_norm": 0.9409221134027347, "learning_rate": 1.7215938729697505e-05, "loss": 0.7343, "step": 13820 }, { "epoch": 0.2511622838878396, "grad_norm": 0.9537536456170125, "learning_rate": 1.7211947700279765e-05, "loss": 0.7259, "step": 13830 }, { "epoch": 0.25134389074531455, "grad_norm": 0.9611743223389607, "learning_rate": 1.720795427560493e-05, "loss": 0.733, "step": 13840 }, { "epoch": 0.25152549760278947, "grad_norm": 0.9548713582165319, "learning_rate": 1.7203958456999305e-05, "loss": 0.7267, "step": 13850 }, { "epoch": 0.2517071044602644, "grad_norm": 0.9591344348701722, "learning_rate": 1.719996024579e-05, "loss": 0.7111, "step": 13860 }, { "epoch": 0.2518887113177394, "grad_norm": 0.9458875673242886, "learning_rate": 1.7195959643304912e-05, "loss": 0.7237, "step": 13870 }, { "epoch": 0.2520703181752143, "grad_norm": 0.9457300021548943, "learning_rate": 1.7191956650872734e-05, "loss": 0.7295, "step": 13880 }, { "epoch": 0.25225192503268923, "grad_norm": 0.928800318341411, "learning_rate": 1.7187951269822953e-05, "loss": 0.7153, "step": 13890 }, { "epoch": 0.25243353189016415, "grad_norm": 0.9528505598301222, "learning_rate": 1.7183943501485854e-05, "loss": 0.7422, "step": 13900 }, { "epoch": 0.25261513874763913, "grad_norm": 0.9098757871886578, "learning_rate": 1.717993334719251e-05, "loss": 0.72, "step": 13910 }, { "epoch": 0.25279674560511406, "grad_norm": 0.9935371858776035, "learning_rate": 1.7175920808274784e-05, "loss": 0.7272, "step": 13920 }, { "epoch": 0.252978352462589, "grad_norm": 0.9446994531614046, "learning_rate": 1.7171905886065338e-05, "loss": 0.7445, "step": 13930 }, { "epoch": 0.2531599593200639, "grad_norm": 0.9220169905719835, "learning_rate": 1.7167888581897617e-05, "loss": 0.7432, "step": 13940 }, { "epoch": 0.2533415661775389, "grad_norm": 0.9502546056918927, "learning_rate": 1.7163868897105865e-05, "loss": 0.7258, "step": 13950 }, { "epoch": 0.2535231730350138, "grad_norm": 0.9871330975024335, "learning_rate": 1.7159846833025117e-05, "loss": 0.7197, "step": 13960 }, { "epoch": 0.25370477989248874, "grad_norm": 0.9787629595805954, "learning_rate": 1.7155822390991194e-05, "loss": 0.7194, "step": 13970 }, { "epoch": 0.25388638674996367, "grad_norm": 0.9324236326033798, "learning_rate": 1.71517955723407e-05, "loss": 0.7261, "step": 13980 }, { "epoch": 0.2540679936074386, "grad_norm": 0.9711562311106217, "learning_rate": 1.7147766378411047e-05, "loss": 0.7219, "step": 13990 }, { "epoch": 0.25424960046491357, "grad_norm": 1.0073619578858335, "learning_rate": 1.714373481054042e-05, "loss": 0.7507, "step": 14000 }, { "epoch": 0.2544312073223885, "grad_norm": 0.9867921586770336, "learning_rate": 1.713970087006779e-05, "loss": 0.7207, "step": 14010 }, { "epoch": 0.2546128141798634, "grad_norm": 0.9367198099840657, "learning_rate": 1.7135664558332935e-05, "loss": 0.7095, "step": 14020 }, { "epoch": 0.25479442103733835, "grad_norm": 0.9693459254201358, "learning_rate": 1.7131625876676402e-05, "loss": 0.7449, "step": 14030 }, { "epoch": 0.25497602789481333, "grad_norm": 0.9359376553552992, "learning_rate": 1.7127584826439535e-05, "loss": 0.7306, "step": 14040 }, { "epoch": 0.25515763475228825, "grad_norm": 0.9339592443548564, "learning_rate": 1.712354140896446e-05, "loss": 0.7222, "step": 14050 }, { "epoch": 0.2553392416097632, "grad_norm": 0.9604336330180576, "learning_rate": 1.711949562559409e-05, "loss": 0.7333, "step": 14060 }, { "epoch": 0.2555208484672381, "grad_norm": 0.940819396127569, "learning_rate": 1.7115447477672126e-05, "loss": 0.7301, "step": 14070 }, { "epoch": 0.2557024553247131, "grad_norm": 0.9501332777660098, "learning_rate": 1.7111396966543054e-05, "loss": 0.7332, "step": 14080 }, { "epoch": 0.255884062182188, "grad_norm": 0.9926340846400532, "learning_rate": 1.710734409355214e-05, "loss": 0.7251, "step": 14090 }, { "epoch": 0.25606566903966294, "grad_norm": 0.9751217717013764, "learning_rate": 1.7103288860045447e-05, "loss": 0.7282, "step": 14100 }, { "epoch": 0.25624727589713786, "grad_norm": 0.96163126643028, "learning_rate": 1.70992312673698e-05, "loss": 0.7358, "step": 14110 }, { "epoch": 0.25642888275461284, "grad_norm": 0.9548346173851414, "learning_rate": 1.7095171316872833e-05, "loss": 0.7366, "step": 14120 }, { "epoch": 0.25661048961208777, "grad_norm": 0.9566514444422022, "learning_rate": 1.7091109009902948e-05, "loss": 0.7351, "step": 14130 }, { "epoch": 0.2567920964695627, "grad_norm": 0.9695366178407807, "learning_rate": 1.7087044347809324e-05, "loss": 0.7341, "step": 14140 }, { "epoch": 0.2569737033270376, "grad_norm": 0.9278242577799284, "learning_rate": 1.708297733194194e-05, "loss": 0.7357, "step": 14150 }, { "epoch": 0.25715531018451254, "grad_norm": 0.9271452950128175, "learning_rate": 1.7078907963651545e-05, "loss": 0.7233, "step": 14160 }, { "epoch": 0.2573369170419875, "grad_norm": 1.016673510649092, "learning_rate": 1.707483624428967e-05, "loss": 0.7257, "step": 14170 }, { "epoch": 0.25751852389946245, "grad_norm": 0.9722717599078741, "learning_rate": 1.7070762175208625e-05, "loss": 0.7454, "step": 14180 }, { "epoch": 0.2577001307569374, "grad_norm": 0.9540243164833967, "learning_rate": 1.7066685757761515e-05, "loss": 0.7273, "step": 14190 }, { "epoch": 0.2578817376144123, "grad_norm": 0.9679633258411353, "learning_rate": 1.7062606993302206e-05, "loss": 0.7187, "step": 14200 }, { "epoch": 0.2580633444718873, "grad_norm": 0.9366173518291028, "learning_rate": 1.705852588318535e-05, "loss": 0.7316, "step": 14210 }, { "epoch": 0.2582449513293622, "grad_norm": 0.9115092534424708, "learning_rate": 1.705444242876639e-05, "loss": 0.7214, "step": 14220 }, { "epoch": 0.25842655818683713, "grad_norm": 0.930189762372476, "learning_rate": 1.7050356631401522e-05, "loss": 0.7292, "step": 14230 }, { "epoch": 0.25860816504431205, "grad_norm": 0.9417909555252825, "learning_rate": 1.7046268492447743e-05, "loss": 0.7237, "step": 14240 }, { "epoch": 0.25878977190178704, "grad_norm": 0.9406614039890295, "learning_rate": 1.7042178013262822e-05, "loss": 0.714, "step": 14250 }, { "epoch": 0.25897137875926196, "grad_norm": 0.9523942469591611, "learning_rate": 1.70380851952053e-05, "loss": 0.7187, "step": 14260 }, { "epoch": 0.2591529856167369, "grad_norm": 0.9054437118468274, "learning_rate": 1.7033990039634497e-05, "loss": 0.7094, "step": 14270 }, { "epoch": 0.2593345924742118, "grad_norm": 0.984510150368639, "learning_rate": 1.7029892547910515e-05, "loss": 0.7302, "step": 14280 }, { "epoch": 0.2595161993316868, "grad_norm": 1.004736361458267, "learning_rate": 1.7025792721394224e-05, "loss": 0.7258, "step": 14290 }, { "epoch": 0.2596978061891617, "grad_norm": 0.9242475428337155, "learning_rate": 1.7021690561447274e-05, "loss": 0.7216, "step": 14300 }, { "epoch": 0.25987941304663664, "grad_norm": 0.9793408101848208, "learning_rate": 1.7017586069432083e-05, "loss": 0.725, "step": 14310 }, { "epoch": 0.26006101990411157, "grad_norm": 0.9655010478900691, "learning_rate": 1.7013479246711853e-05, "loss": 0.7257, "step": 14320 }, { "epoch": 0.2602426267615865, "grad_norm": 0.9789945466603146, "learning_rate": 1.7009370094650556e-05, "loss": 0.7232, "step": 14330 }, { "epoch": 0.2604242336190615, "grad_norm": 0.9491648309685525, "learning_rate": 1.7005258614612944e-05, "loss": 0.7234, "step": 14340 }, { "epoch": 0.2606058404765364, "grad_norm": 0.9667564591204476, "learning_rate": 1.7001144807964528e-05, "loss": 0.7262, "step": 14350 }, { "epoch": 0.2607874473340113, "grad_norm": 0.9689171199949449, "learning_rate": 1.69970286760716e-05, "loss": 0.719, "step": 14360 }, { "epoch": 0.26096905419148625, "grad_norm": 0.966865828486863, "learning_rate": 1.6992910220301227e-05, "loss": 0.728, "step": 14370 }, { "epoch": 0.26115066104896123, "grad_norm": 0.9740044190265833, "learning_rate": 1.6988789442021242e-05, "loss": 0.722, "step": 14380 }, { "epoch": 0.26133226790643616, "grad_norm": 0.9576264568864028, "learning_rate": 1.6984666342600257e-05, "loss": 0.7192, "step": 14390 }, { "epoch": 0.2615138747639111, "grad_norm": 0.9181155598177182, "learning_rate": 1.698054092340765e-05, "loss": 0.7309, "step": 14400 }, { "epoch": 0.261695481621386, "grad_norm": 0.922010872593215, "learning_rate": 1.6976413185813565e-05, "loss": 0.719, "step": 14410 }, { "epoch": 0.261877088478861, "grad_norm": 0.9371469104848869, "learning_rate": 1.697228313118892e-05, "loss": 0.7269, "step": 14420 }, { "epoch": 0.2620586953363359, "grad_norm": 0.9951548557189857, "learning_rate": 1.6968150760905405e-05, "loss": 0.7359, "step": 14430 }, { "epoch": 0.26224030219381084, "grad_norm": 0.9565302533362464, "learning_rate": 1.6964016076335483e-05, "loss": 0.7231, "step": 14440 }, { "epoch": 0.26242190905128576, "grad_norm": 0.9776784524027256, "learning_rate": 1.695987907885237e-05, "loss": 0.7305, "step": 14450 }, { "epoch": 0.2626035159087607, "grad_norm": 0.9356770301082713, "learning_rate": 1.6955739769830063e-05, "loss": 0.7358, "step": 14460 }, { "epoch": 0.26278512276623567, "grad_norm": 0.9363008211960608, "learning_rate": 1.695159815064333e-05, "loss": 0.7241, "step": 14470 }, { "epoch": 0.2629667296237106, "grad_norm": 0.9838254290984378, "learning_rate": 1.694745422266769e-05, "loss": 0.7256, "step": 14480 }, { "epoch": 0.2631483364811855, "grad_norm": 0.975221425072034, "learning_rate": 1.6943307987279445e-05, "loss": 0.7368, "step": 14490 }, { "epoch": 0.26332994333866044, "grad_norm": 0.9476602782838979, "learning_rate": 1.693915944585566e-05, "loss": 0.7179, "step": 14500 }, { "epoch": 0.2635115501961354, "grad_norm": 0.9307433502074595, "learning_rate": 1.6935008599774155e-05, "loss": 0.7352, "step": 14510 }, { "epoch": 0.26369315705361035, "grad_norm": 0.9735390544210659, "learning_rate": 1.6930855450413525e-05, "loss": 0.7249, "step": 14520 }, { "epoch": 0.2638747639110853, "grad_norm": 0.9461059984615225, "learning_rate": 1.6926699999153135e-05, "loss": 0.7142, "step": 14530 }, { "epoch": 0.2640563707685602, "grad_norm": 0.9570131629038686, "learning_rate": 1.69225422473731e-05, "loss": 0.7245, "step": 14540 }, { "epoch": 0.2642379776260352, "grad_norm": 0.94256749779863, "learning_rate": 1.691838219645431e-05, "loss": 0.7091, "step": 14550 }, { "epoch": 0.2644195844835101, "grad_norm": 0.9649547368270028, "learning_rate": 1.6914219847778418e-05, "loss": 0.7194, "step": 14560 }, { "epoch": 0.26460119134098503, "grad_norm": 0.960587793369349, "learning_rate": 1.691005520272784e-05, "loss": 0.7363, "step": 14570 }, { "epoch": 0.26478279819845996, "grad_norm": 0.9517962215780084, "learning_rate": 1.690588826268574e-05, "loss": 0.7335, "step": 14580 }, { "epoch": 0.26496440505593494, "grad_norm": 0.9840823323054896, "learning_rate": 1.690171902903607e-05, "loss": 0.7167, "step": 14590 }, { "epoch": 0.26514601191340986, "grad_norm": 0.9845243887489543, "learning_rate": 1.6897547503163524e-05, "loss": 0.7356, "step": 14600 }, { "epoch": 0.2653276187708848, "grad_norm": 0.9393833127383354, "learning_rate": 1.6893373686453562e-05, "loss": 0.7275, "step": 14610 }, { "epoch": 0.2655092256283597, "grad_norm": 2.250647541478012, "learning_rate": 1.6889197580292412e-05, "loss": 0.7255, "step": 14620 }, { "epoch": 0.26569083248583464, "grad_norm": 0.9957100909530434, "learning_rate": 1.688501918606705e-05, "loss": 0.7147, "step": 14630 }, { "epoch": 0.2658724393433096, "grad_norm": 0.9260470074541453, "learning_rate": 1.6880838505165225e-05, "loss": 0.728, "step": 14640 }, { "epoch": 0.26605404620078454, "grad_norm": 0.9291011844387437, "learning_rate": 1.687665553897544e-05, "loss": 0.727, "step": 14650 }, { "epoch": 0.26623565305825947, "grad_norm": 0.9503241735117622, "learning_rate": 1.687247028888695e-05, "loss": 0.7141, "step": 14660 }, { "epoch": 0.2664172599157344, "grad_norm": 0.9385435051615988, "learning_rate": 1.686828275628978e-05, "loss": 0.7333, "step": 14670 }, { "epoch": 0.2665988667732094, "grad_norm": 0.9342272556789583, "learning_rate": 1.68640929425747e-05, "loss": 0.71, "step": 14680 }, { "epoch": 0.2667804736306843, "grad_norm": 0.9223064964798963, "learning_rate": 1.6859900849133258e-05, "loss": 0.7348, "step": 14690 }, { "epoch": 0.2669620804881592, "grad_norm": 0.917498307895298, "learning_rate": 1.685570647735774e-05, "loss": 0.7173, "step": 14700 }, { "epoch": 0.26714368734563415, "grad_norm": 0.9264474790317646, "learning_rate": 1.6851509828641193e-05, "loss": 0.7242, "step": 14710 }, { "epoch": 0.26732529420310913, "grad_norm": 0.9185649718324315, "learning_rate": 1.6847310904377425e-05, "loss": 0.7248, "step": 14720 }, { "epoch": 0.26750690106058406, "grad_norm": 0.967834000691897, "learning_rate": 1.6843109705960995e-05, "loss": 0.7193, "step": 14730 }, { "epoch": 0.267688507918059, "grad_norm": 1.0077496201915965, "learning_rate": 1.6838906234787225e-05, "loss": 0.7319, "step": 14740 }, { "epoch": 0.2678701147755339, "grad_norm": 0.9028553120971714, "learning_rate": 1.683470049225218e-05, "loss": 0.725, "step": 14750 }, { "epoch": 0.2680517216330089, "grad_norm": 0.9753787623569117, "learning_rate": 1.683049247975269e-05, "loss": 0.7365, "step": 14760 }, { "epoch": 0.2682333284904838, "grad_norm": 0.9520766372765909, "learning_rate": 1.6826282198686336e-05, "loss": 0.7115, "step": 14770 }, { "epoch": 0.26841493534795874, "grad_norm": 0.937830172293231, "learning_rate": 1.6822069650451448e-05, "loss": 0.7339, "step": 14780 }, { "epoch": 0.26859654220543366, "grad_norm": 0.9514796552270798, "learning_rate": 1.681785483644711e-05, "loss": 0.7266, "step": 14790 }, { "epoch": 0.2687781490629086, "grad_norm": 0.9909774141594768, "learning_rate": 1.6813637758073165e-05, "loss": 0.7165, "step": 14800 }, { "epoch": 0.26895975592038357, "grad_norm": 0.9663740309207507, "learning_rate": 1.6809418416730206e-05, "loss": 0.7328, "step": 14810 }, { "epoch": 0.2691413627778585, "grad_norm": 0.9514214598386705, "learning_rate": 1.6805196813819568e-05, "loss": 0.7279, "step": 14820 }, { "epoch": 0.2693229696353334, "grad_norm": 0.9602292459351448, "learning_rate": 1.6800972950743347e-05, "loss": 0.7122, "step": 14830 }, { "epoch": 0.26950457649280835, "grad_norm": 1.01866535297733, "learning_rate": 1.679674682890439e-05, "loss": 0.7295, "step": 14840 }, { "epoch": 0.2696861833502833, "grad_norm": 0.922588942304609, "learning_rate": 1.679251844970629e-05, "loss": 0.72, "step": 14850 }, { "epoch": 0.26986779020775825, "grad_norm": 0.8976874766081538, "learning_rate": 1.678828781455339e-05, "loss": 0.7053, "step": 14860 }, { "epoch": 0.2700493970652332, "grad_norm": 0.9842570325553741, "learning_rate": 1.678405492485078e-05, "loss": 0.7275, "step": 14870 }, { "epoch": 0.2702310039227081, "grad_norm": 0.9552658258167067, "learning_rate": 1.677981978200431e-05, "loss": 0.7179, "step": 14880 }, { "epoch": 0.2704126107801831, "grad_norm": 0.9679334572315713, "learning_rate": 1.6775582387420564e-05, "loss": 0.7045, "step": 14890 }, { "epoch": 0.270594217637658, "grad_norm": 0.9667730974063896, "learning_rate": 1.677134274250688e-05, "loss": 0.7168, "step": 14900 }, { "epoch": 0.27077582449513293, "grad_norm": 1.2769507804059572, "learning_rate": 1.676710084867135e-05, "loss": 0.7261, "step": 14910 }, { "epoch": 0.27095743135260786, "grad_norm": 0.9614587630240276, "learning_rate": 1.6762856707322802e-05, "loss": 0.7088, "step": 14920 }, { "epoch": 0.27113903821008284, "grad_norm": 0.9549887275047572, "learning_rate": 1.675861031987081e-05, "loss": 0.7141, "step": 14930 }, { "epoch": 0.27132064506755776, "grad_norm": 0.9580966858017047, "learning_rate": 1.675436168772571e-05, "loss": 0.7259, "step": 14940 }, { "epoch": 0.2715022519250327, "grad_norm": 0.9299482817173442, "learning_rate": 1.6750110812298564e-05, "loss": 0.7275, "step": 14950 }, { "epoch": 0.2716838587825076, "grad_norm": 0.9403618350152856, "learning_rate": 1.674585769500119e-05, "loss": 0.71, "step": 14960 }, { "epoch": 0.27186546563998254, "grad_norm": 0.9318902787447547, "learning_rate": 1.6741602337246145e-05, "loss": 0.7273, "step": 14970 }, { "epoch": 0.2720470724974575, "grad_norm": 0.9819627655025333, "learning_rate": 1.673734474044674e-05, "loss": 0.7078, "step": 14980 }, { "epoch": 0.27222867935493245, "grad_norm": 0.9657897265643008, "learning_rate": 1.673308490601702e-05, "loss": 0.7216, "step": 14990 }, { "epoch": 0.27241028621240737, "grad_norm": 0.9738215911387631, "learning_rate": 1.6728822835371772e-05, "loss": 0.7245, "step": 15000 }, { "epoch": 0.2725918930698823, "grad_norm": 0.9267962533608148, "learning_rate": 1.6724558529926537e-05, "loss": 0.724, "step": 15010 }, { "epoch": 0.2727734999273573, "grad_norm": 0.9505710907092091, "learning_rate": 1.6720291991097583e-05, "loss": 0.7142, "step": 15020 }, { "epoch": 0.2729551067848322, "grad_norm": 0.9376041342248618, "learning_rate": 1.671602322030193e-05, "loss": 0.7307, "step": 15030 }, { "epoch": 0.2731367136423071, "grad_norm": 0.9076110874195454, "learning_rate": 1.6711752218957343e-05, "loss": 0.7068, "step": 15040 }, { "epoch": 0.27331832049978205, "grad_norm": 0.9586456944246788, "learning_rate": 1.6707478988482312e-05, "loss": 0.7136, "step": 15050 }, { "epoch": 0.27349992735725703, "grad_norm": 5.120092017910588, "learning_rate": 1.6703203530296087e-05, "loss": 0.7159, "step": 15060 }, { "epoch": 0.27368153421473196, "grad_norm": 1.137805034513637, "learning_rate": 1.669892584581864e-05, "loss": 0.7278, "step": 15070 }, { "epoch": 0.2738631410722069, "grad_norm": 0.9441485612991537, "learning_rate": 1.6694645936470696e-05, "loss": 0.7174, "step": 15080 }, { "epoch": 0.2740447479296818, "grad_norm": 0.9723126194733992, "learning_rate": 1.6690363803673708e-05, "loss": 0.7258, "step": 15090 }, { "epoch": 0.2742263547871568, "grad_norm": 0.9241185073731523, "learning_rate": 1.668607944884988e-05, "loss": 0.7226, "step": 15100 }, { "epoch": 0.2744079616446317, "grad_norm": 0.93843067069363, "learning_rate": 1.6681792873422142e-05, "loss": 0.7092, "step": 15110 }, { "epoch": 0.27458956850210664, "grad_norm": 0.9998607397077344, "learning_rate": 1.667750407881416e-05, "loss": 0.7234, "step": 15120 }, { "epoch": 0.27477117535958157, "grad_norm": 0.9911594247905005, "learning_rate": 1.667321306645036e-05, "loss": 0.7338, "step": 15130 }, { "epoch": 0.2749527822170565, "grad_norm": 0.9472297237577018, "learning_rate": 1.6668919837755874e-05, "loss": 0.7269, "step": 15140 }, { "epoch": 0.27513438907453147, "grad_norm": 0.914326653386911, "learning_rate": 1.6664624394156586e-05, "loss": 0.7123, "step": 15150 }, { "epoch": 0.2753159959320064, "grad_norm": 0.9662572862082659, "learning_rate": 1.6660326737079117e-05, "loss": 0.7287, "step": 15160 }, { "epoch": 0.2754976027894813, "grad_norm": 0.9168157010171388, "learning_rate": 1.665602686795082e-05, "loss": 0.73, "step": 15170 }, { "epoch": 0.27567920964695625, "grad_norm": 0.937169548399357, "learning_rate": 1.665172478819978e-05, "loss": 0.7232, "step": 15180 }, { "epoch": 0.2758608165044312, "grad_norm": 8.294525009172467, "learning_rate": 1.664742049925482e-05, "loss": 0.7237, "step": 15190 }, { "epoch": 0.27604242336190615, "grad_norm": 1.286776553153498, "learning_rate": 1.6643114002545498e-05, "loss": 0.7205, "step": 15200 }, { "epoch": 0.2762240302193811, "grad_norm": 0.9791417976146942, "learning_rate": 1.66388052995021e-05, "loss": 0.7417, "step": 15210 }, { "epoch": 0.276405637076856, "grad_norm": 0.9433548477697983, "learning_rate": 1.6634494391555642e-05, "loss": 0.7083, "step": 15220 }, { "epoch": 0.276587243934331, "grad_norm": 0.9338859963183987, "learning_rate": 1.6630181280137885e-05, "loss": 0.7092, "step": 15230 }, { "epoch": 0.2767688507918059, "grad_norm": 0.9826548281261994, "learning_rate": 1.6625865966681315e-05, "loss": 0.7234, "step": 15240 }, { "epoch": 0.27695045764928083, "grad_norm": 0.9555761226542155, "learning_rate": 1.662154845261914e-05, "loss": 0.7077, "step": 15250 }, { "epoch": 0.27713206450675576, "grad_norm": 0.9196037534348885, "learning_rate": 1.661722873938532e-05, "loss": 0.7218, "step": 15260 }, { "epoch": 0.27731367136423074, "grad_norm": 0.9194182613053203, "learning_rate": 1.6612906828414524e-05, "loss": 0.7159, "step": 15270 }, { "epoch": 0.27749527822170567, "grad_norm": 0.926841532558177, "learning_rate": 1.6608582721142167e-05, "loss": 0.7155, "step": 15280 }, { "epoch": 0.2776768850791806, "grad_norm": 0.9785126294243025, "learning_rate": 1.660425641900438e-05, "loss": 0.7144, "step": 15290 }, { "epoch": 0.2778584919366555, "grad_norm": 0.984220979013479, "learning_rate": 1.659992792343803e-05, "loss": 0.7188, "step": 15300 }, { "epoch": 0.27804009879413044, "grad_norm": 0.9256459560887782, "learning_rate": 1.659559723588072e-05, "loss": 0.7151, "step": 15310 }, { "epoch": 0.2782217056516054, "grad_norm": 0.951501044901622, "learning_rate": 1.6591264357770765e-05, "loss": 0.7184, "step": 15320 }, { "epoch": 0.27840331250908035, "grad_norm": 0.923256125938395, "learning_rate": 1.6586929290547217e-05, "loss": 0.7231, "step": 15330 }, { "epoch": 0.2785849193665553, "grad_norm": 0.9317437168491468, "learning_rate": 1.6582592035649852e-05, "loss": 0.7106, "step": 15340 }, { "epoch": 0.2787665262240302, "grad_norm": 0.9432308798524103, "learning_rate": 1.657825259451918e-05, "loss": 0.7145, "step": 15350 }, { "epoch": 0.2789481330815052, "grad_norm": 0.9505166068364012, "learning_rate": 1.6573910968596424e-05, "loss": 0.7259, "step": 15360 }, { "epoch": 0.2791297399389801, "grad_norm": 0.940362594176386, "learning_rate": 1.6569567159323548e-05, "loss": 0.7072, "step": 15370 }, { "epoch": 0.27931134679645503, "grad_norm": 0.9410904834880752, "learning_rate": 1.6565221168143226e-05, "loss": 0.7216, "step": 15380 }, { "epoch": 0.27949295365392995, "grad_norm": 0.9694538268180312, "learning_rate": 1.6560872996498866e-05, "loss": 0.7253, "step": 15390 }, { "epoch": 0.27967456051140493, "grad_norm": 0.9814468950257963, "learning_rate": 1.65565226458346e-05, "loss": 0.7218, "step": 15400 }, { "epoch": 0.27985616736887986, "grad_norm": 0.9450095086168478, "learning_rate": 1.655217011759528e-05, "loss": 0.7156, "step": 15410 }, { "epoch": 0.2800377742263548, "grad_norm": 0.9252265301174026, "learning_rate": 1.654781541322648e-05, "loss": 0.719, "step": 15420 }, { "epoch": 0.2802193810838297, "grad_norm": 0.9419339105556852, "learning_rate": 1.6543458534174505e-05, "loss": 0.7166, "step": 15430 }, { "epoch": 0.2804009879413047, "grad_norm": 0.9121488263311897, "learning_rate": 1.653909948188637e-05, "loss": 0.7156, "step": 15440 }, { "epoch": 0.2805825947987796, "grad_norm": 0.9446365504318198, "learning_rate": 1.6534738257809822e-05, "loss": 0.7272, "step": 15450 }, { "epoch": 0.28076420165625454, "grad_norm": 0.972092839818452, "learning_rate": 1.6530374863393327e-05, "loss": 0.7118, "step": 15460 }, { "epoch": 0.28094580851372947, "grad_norm": 0.9142432092982892, "learning_rate": 1.6526009300086072e-05, "loss": 0.7115, "step": 15470 }, { "epoch": 0.2811274153712044, "grad_norm": 0.9242428526040251, "learning_rate": 1.652164156933796e-05, "loss": 0.7309, "step": 15480 }, { "epoch": 0.2813090222286794, "grad_norm": 1.0111485708468309, "learning_rate": 1.6517271672599616e-05, "loss": 0.7269, "step": 15490 }, { "epoch": 0.2814906290861543, "grad_norm": 0.923262062321123, "learning_rate": 1.651289961132239e-05, "loss": 0.7267, "step": 15500 }, { "epoch": 0.2816722359436292, "grad_norm": 0.9652562544021831, "learning_rate": 1.6508525386958347e-05, "loss": 0.7201, "step": 15510 }, { "epoch": 0.28185384280110415, "grad_norm": 0.901295881851045, "learning_rate": 1.650414900096026e-05, "loss": 0.7003, "step": 15520 }, { "epoch": 0.28203544965857913, "grad_norm": 0.9485752461067256, "learning_rate": 1.6499770454781634e-05, "loss": 0.7251, "step": 15530 }, { "epoch": 0.28221705651605405, "grad_norm": 0.928801400709856, "learning_rate": 1.649538974987669e-05, "loss": 0.7136, "step": 15540 }, { "epoch": 0.282398663373529, "grad_norm": 1.014602450180785, "learning_rate": 1.6491006887700363e-05, "loss": 0.7197, "step": 15550 }, { "epoch": 0.2825802702310039, "grad_norm": 0.9162253819712722, "learning_rate": 1.64866218697083e-05, "loss": 0.7187, "step": 15560 }, { "epoch": 0.2827618770884789, "grad_norm": 0.9150142860491338, "learning_rate": 1.6482234697356875e-05, "loss": 0.7198, "step": 15570 }, { "epoch": 0.2829434839459538, "grad_norm": 0.9235853929607161, "learning_rate": 1.6477845372103163e-05, "loss": 0.7173, "step": 15580 }, { "epoch": 0.28312509080342874, "grad_norm": 0.9814525515350616, "learning_rate": 1.6473453895404968e-05, "loss": 0.7205, "step": 15590 }, { "epoch": 0.28330669766090366, "grad_norm": 0.8986419046709907, "learning_rate": 1.6469060268720798e-05, "loss": 0.72, "step": 15600 }, { "epoch": 0.2834883045183786, "grad_norm": 0.9503326859095289, "learning_rate": 1.646466449350989e-05, "loss": 0.7296, "step": 15610 }, { "epoch": 0.28366991137585357, "grad_norm": 0.9711259671237705, "learning_rate": 1.6460266571232165e-05, "loss": 0.7156, "step": 15620 }, { "epoch": 0.2838515182333285, "grad_norm": 1.042955553096823, "learning_rate": 1.6455866503348292e-05, "loss": 0.7188, "step": 15630 }, { "epoch": 0.2840331250908034, "grad_norm": 0.9456588078992352, "learning_rate": 1.6451464291319633e-05, "loss": 0.7284, "step": 15640 }, { "epoch": 0.28421473194827834, "grad_norm": 0.9114621180361133, "learning_rate": 1.6447059936608262e-05, "loss": 0.7088, "step": 15650 }, { "epoch": 0.2843963388057533, "grad_norm": 0.9375278351441267, "learning_rate": 1.644265344067697e-05, "loss": 0.7075, "step": 15660 }, { "epoch": 0.28457794566322825, "grad_norm": 0.9242992216565469, "learning_rate": 1.6438244804989266e-05, "loss": 0.703, "step": 15670 }, { "epoch": 0.2847595525207032, "grad_norm": 1.15663541356545, "learning_rate": 1.643383403100935e-05, "loss": 0.7154, "step": 15680 }, { "epoch": 0.2849411593781781, "grad_norm": 0.9157600934102007, "learning_rate": 1.642942112020215e-05, "loss": 0.7326, "step": 15690 }, { "epoch": 0.2851227662356531, "grad_norm": 0.9500341450660178, "learning_rate": 1.642500607403329e-05, "loss": 0.7111, "step": 15700 }, { "epoch": 0.285304373093128, "grad_norm": 0.9261203410294787, "learning_rate": 1.6420588893969124e-05, "loss": 0.7289, "step": 15710 }, { "epoch": 0.28548597995060293, "grad_norm": 0.9175219777712168, "learning_rate": 1.6416169581476692e-05, "loss": 0.7334, "step": 15720 }, { "epoch": 0.28566758680807786, "grad_norm": 0.9351250565860864, "learning_rate": 1.641174813802375e-05, "loss": 0.7135, "step": 15730 }, { "epoch": 0.28584919366555284, "grad_norm": 0.938356120472658, "learning_rate": 1.6407324565078763e-05, "loss": 0.7432, "step": 15740 }, { "epoch": 0.28603080052302776, "grad_norm": 0.9244984408724572, "learning_rate": 1.640289886411091e-05, "loss": 0.7135, "step": 15750 }, { "epoch": 0.2862124073805027, "grad_norm": 0.9155176669004148, "learning_rate": 1.6398471036590064e-05, "loss": 0.7172, "step": 15760 }, { "epoch": 0.2863940142379776, "grad_norm": 0.955116647262125, "learning_rate": 1.6394041083986815e-05, "loss": 0.7097, "step": 15770 }, { "epoch": 0.28657562109545254, "grad_norm": 0.9362750878787642, "learning_rate": 1.638960900777245e-05, "loss": 0.7095, "step": 15780 }, { "epoch": 0.2867572279529275, "grad_norm": 0.9244413818081632, "learning_rate": 1.6385174809418964e-05, "loss": 0.7142, "step": 15790 }, { "epoch": 0.28693883481040244, "grad_norm": 1.0003769819677781, "learning_rate": 1.6380738490399067e-05, "loss": 0.7188, "step": 15800 }, { "epoch": 0.28712044166787737, "grad_norm": 0.9648428446594752, "learning_rate": 1.6376300052186162e-05, "loss": 0.7131, "step": 15810 }, { "epoch": 0.2873020485253523, "grad_norm": 0.9396250049495056, "learning_rate": 1.637185949625435e-05, "loss": 0.7253, "step": 15820 }, { "epoch": 0.2874836553828273, "grad_norm": 0.9584760637125693, "learning_rate": 1.636741682407845e-05, "loss": 0.7175, "step": 15830 }, { "epoch": 0.2876652622403022, "grad_norm": 0.9418627242505595, "learning_rate": 1.636297203713398e-05, "loss": 0.7249, "step": 15840 }, { "epoch": 0.2878468690977771, "grad_norm": 0.9278779001536289, "learning_rate": 1.635852513689715e-05, "loss": 0.7256, "step": 15850 }, { "epoch": 0.28802847595525205, "grad_norm": 0.9601921513142673, "learning_rate": 1.635407612484489e-05, "loss": 0.7191, "step": 15860 }, { "epoch": 0.28821008281272703, "grad_norm": 0.9468597287658328, "learning_rate": 1.634962500245482e-05, "loss": 0.7068, "step": 15870 }, { "epoch": 0.28839168967020196, "grad_norm": 0.9162527097754954, "learning_rate": 1.634517177120525e-05, "loss": 0.7083, "step": 15880 }, { "epoch": 0.2885732965276769, "grad_norm": 0.9344562507414093, "learning_rate": 1.634071643257522e-05, "loss": 0.733, "step": 15890 }, { "epoch": 0.2887549033851518, "grad_norm": 0.9421388333293655, "learning_rate": 1.633625898804444e-05, "loss": 0.7178, "step": 15900 }, { "epoch": 0.2889365102426268, "grad_norm": 0.9101915675396177, "learning_rate": 1.633179943909334e-05, "loss": 0.7108, "step": 15910 }, { "epoch": 0.2891181171001017, "grad_norm": 0.9269879902678276, "learning_rate": 1.6327337787203036e-05, "loss": 0.726, "step": 15920 }, { "epoch": 0.28929972395757664, "grad_norm": 0.9055083919004443, "learning_rate": 1.6322874033855346e-05, "loss": 0.7289, "step": 15930 }, { "epoch": 0.28948133081505156, "grad_norm": 0.9719177767092597, "learning_rate": 1.631840818053279e-05, "loss": 0.7239, "step": 15940 }, { "epoch": 0.2896629376725265, "grad_norm": 0.9156298983690693, "learning_rate": 1.631394022871858e-05, "loss": 0.7242, "step": 15950 }, { "epoch": 0.28984454453000147, "grad_norm": 0.9308460870868931, "learning_rate": 1.630947017989663e-05, "loss": 0.717, "step": 15960 }, { "epoch": 0.2900261513874764, "grad_norm": 0.9221177430095241, "learning_rate": 1.6304998035551553e-05, "loss": 0.7239, "step": 15970 }, { "epoch": 0.2902077582449513, "grad_norm": 0.9773636929999485, "learning_rate": 1.6300523797168643e-05, "loss": 0.7245, "step": 15980 }, { "epoch": 0.29038936510242624, "grad_norm": 0.9460013165259404, "learning_rate": 1.629604746623391e-05, "loss": 0.7179, "step": 15990 }, { "epoch": 0.2905709719599012, "grad_norm": 0.9101185246105522, "learning_rate": 1.629156904423404e-05, "loss": 0.7158, "step": 16000 }, { "epoch": 0.29075257881737615, "grad_norm": 1.0059071524702328, "learning_rate": 1.6287088532656424e-05, "loss": 0.7171, "step": 16010 }, { "epoch": 0.2909341856748511, "grad_norm": 0.9460784807253206, "learning_rate": 1.6282605932989152e-05, "loss": 0.7145, "step": 16020 }, { "epoch": 0.291115792532326, "grad_norm": 0.948665347934511, "learning_rate": 1.627812124672099e-05, "loss": 0.7231, "step": 16030 }, { "epoch": 0.291297399389801, "grad_norm": 0.9094926864701749, "learning_rate": 1.627363447534141e-05, "loss": 0.7288, "step": 16040 }, { "epoch": 0.2914790062472759, "grad_norm": 0.9469658343957553, "learning_rate": 1.6269145620340577e-05, "loss": 0.7148, "step": 16050 }, { "epoch": 0.29166061310475083, "grad_norm": 0.984978341311309, "learning_rate": 1.626465468320935e-05, "loss": 0.7103, "step": 16060 }, { "epoch": 0.29184221996222576, "grad_norm": 0.9066962508750107, "learning_rate": 1.6260161665439265e-05, "loss": 0.7183, "step": 16070 }, { "epoch": 0.29202382681970074, "grad_norm": 0.9238615306315366, "learning_rate": 1.6255666568522566e-05, "loss": 0.6946, "step": 16080 }, { "epoch": 0.29220543367717566, "grad_norm": 0.9373521803045387, "learning_rate": 1.6251169393952174e-05, "loss": 0.7341, "step": 16090 }, { "epoch": 0.2923870405346506, "grad_norm": 0.9332822421738144, "learning_rate": 1.6246670143221714e-05, "loss": 0.7076, "step": 16100 }, { "epoch": 0.2925686473921255, "grad_norm": 0.9116308621865307, "learning_rate": 1.6242168817825487e-05, "loss": 0.7219, "step": 16110 }, { "epoch": 0.29275025424960044, "grad_norm": 0.887775747346037, "learning_rate": 1.6237665419258486e-05, "loss": 0.7182, "step": 16120 }, { "epoch": 0.2929318611070754, "grad_norm": 0.933576314350388, "learning_rate": 1.6233159949016402e-05, "loss": 0.6949, "step": 16130 }, { "epoch": 0.29311346796455034, "grad_norm": 0.9179468805336483, "learning_rate": 1.6228652408595603e-05, "loss": 0.7127, "step": 16140 }, { "epoch": 0.29329507482202527, "grad_norm": 0.9299519917270671, "learning_rate": 1.622414279949315e-05, "loss": 0.7142, "step": 16150 }, { "epoch": 0.2934766816795002, "grad_norm": 0.9125474728345916, "learning_rate": 1.621963112320679e-05, "loss": 0.7204, "step": 16160 }, { "epoch": 0.2936582885369752, "grad_norm": 0.9274620592864419, "learning_rate": 1.621511738123496e-05, "loss": 0.7174, "step": 16170 }, { "epoch": 0.2938398953944501, "grad_norm": 0.9249797887837365, "learning_rate": 1.621060157507678e-05, "loss": 0.7263, "step": 16180 }, { "epoch": 0.294021502251925, "grad_norm": 0.9208635412916364, "learning_rate": 1.6206083706232047e-05, "loss": 0.7148, "step": 16190 }, { "epoch": 0.29420310910939995, "grad_norm": 0.9019412194002505, "learning_rate": 1.620156377620126e-05, "loss": 0.7135, "step": 16200 }, { "epoch": 0.29438471596687493, "grad_norm": 0.9553958505805924, "learning_rate": 1.619704178648559e-05, "loss": 0.722, "step": 16210 }, { "epoch": 0.29456632282434986, "grad_norm": 0.9171266210446061, "learning_rate": 1.6192517738586893e-05, "loss": 0.7286, "step": 16220 }, { "epoch": 0.2947479296818248, "grad_norm": 1.0047642983472507, "learning_rate": 1.618799163400772e-05, "loss": 0.721, "step": 16230 }, { "epoch": 0.2949295365392997, "grad_norm": 0.9513787616157464, "learning_rate": 1.6183463474251285e-05, "loss": 0.7098, "step": 16240 }, { "epoch": 0.2951111433967747, "grad_norm": 0.9371288643714684, "learning_rate": 1.6178933260821504e-05, "loss": 0.7153, "step": 16250 }, { "epoch": 0.2952927502542496, "grad_norm": 0.9347227228001408, "learning_rate": 1.6174400995222966e-05, "loss": 0.7015, "step": 16260 }, { "epoch": 0.29547435711172454, "grad_norm": 0.8872315621730879, "learning_rate": 1.6169866678960942e-05, "loss": 0.7095, "step": 16270 }, { "epoch": 0.29565596396919946, "grad_norm": 0.9039398662661625, "learning_rate": 1.6165330313541386e-05, "loss": 0.7222, "step": 16280 }, { "epoch": 0.2958375708266744, "grad_norm": 1.0010417856584826, "learning_rate": 1.616079190047093e-05, "loss": 0.7135, "step": 16290 }, { "epoch": 0.29601917768414937, "grad_norm": 0.9208781923015021, "learning_rate": 1.615625144125689e-05, "loss": 0.696, "step": 16300 }, { "epoch": 0.2962007845416243, "grad_norm": 0.9482485004408467, "learning_rate": 1.6151708937407255e-05, "loss": 0.7373, "step": 16310 }, { "epoch": 0.2963823913990992, "grad_norm": 0.9322309698042123, "learning_rate": 1.61471643904307e-05, "loss": 0.7203, "step": 16320 }, { "epoch": 0.29656399825657415, "grad_norm": 0.9265822132984124, "learning_rate": 1.6142617801836577e-05, "loss": 0.7287, "step": 16330 }, { "epoch": 0.2967456051140491, "grad_norm": 1.06311362636089, "learning_rate": 1.613806917313491e-05, "loss": 0.7172, "step": 16340 }, { "epoch": 0.29692721197152405, "grad_norm": 0.9242131547877808, "learning_rate": 1.6133518505836407e-05, "loss": 0.6974, "step": 16350 }, { "epoch": 0.297108818828999, "grad_norm": 3.2612226830412294, "learning_rate": 1.6128965801452456e-05, "loss": 0.7134, "step": 16360 }, { "epoch": 0.2972904256864739, "grad_norm": 0.9425081212035588, "learning_rate": 1.612441106149511e-05, "loss": 0.7112, "step": 16370 }, { "epoch": 0.2974720325439489, "grad_norm": 0.9815497720920264, "learning_rate": 1.6119854287477116e-05, "loss": 0.7042, "step": 16380 }, { "epoch": 0.2976536394014238, "grad_norm": 0.9612069412120777, "learning_rate": 1.611529548091187e-05, "loss": 0.702, "step": 16390 }, { "epoch": 0.29783524625889873, "grad_norm": 0.9109469234054456, "learning_rate": 1.611073464331347e-05, "loss": 0.7196, "step": 16400 }, { "epoch": 0.29801685311637366, "grad_norm": 0.9264944836257065, "learning_rate": 1.6106171776196675e-05, "loss": 0.7125, "step": 16410 }, { "epoch": 0.29819845997384864, "grad_norm": 1.000022961429914, "learning_rate": 1.6101606881076917e-05, "loss": 0.7016, "step": 16420 }, { "epoch": 0.29838006683132356, "grad_norm": 0.9592943855397548, "learning_rate": 1.609703995947031e-05, "loss": 0.7164, "step": 16430 }, { "epoch": 0.2985616736887985, "grad_norm": 0.9091114609281423, "learning_rate": 1.609247101289363e-05, "loss": 0.7192, "step": 16440 }, { "epoch": 0.2987432805462734, "grad_norm": 0.9049841392924023, "learning_rate": 1.608790004286434e-05, "loss": 0.7218, "step": 16450 }, { "epoch": 0.29892488740374834, "grad_norm": 0.9534840258623181, "learning_rate": 1.6083327050900554e-05, "loss": 0.7137, "step": 16460 }, { "epoch": 0.2991064942612233, "grad_norm": 0.9346499329294755, "learning_rate": 1.607875203852108e-05, "loss": 0.7186, "step": 16470 }, { "epoch": 0.29928810111869825, "grad_norm": 0.9228668414211456, "learning_rate": 1.6074175007245382e-05, "loss": 0.7269, "step": 16480 }, { "epoch": 0.29946970797617317, "grad_norm": 0.9253580860730508, "learning_rate": 1.6069595958593606e-05, "loss": 0.7274, "step": 16490 }, { "epoch": 0.2996513148336481, "grad_norm": 0.9178506113272531, "learning_rate": 1.6065014894086554e-05, "loss": 0.7075, "step": 16500 }, { "epoch": 0.2998329216911231, "grad_norm": 0.9161228530023742, "learning_rate": 1.6060431815245706e-05, "loss": 0.719, "step": 16510 }, { "epoch": 0.300014528548598, "grad_norm": 0.8848488459469669, "learning_rate": 1.6055846723593215e-05, "loss": 0.7114, "step": 16520 }, { "epoch": 0.30019613540607293, "grad_norm": 0.9124767016438148, "learning_rate": 1.605125962065189e-05, "loss": 0.7168, "step": 16530 }, { "epoch": 0.30037774226354785, "grad_norm": 1.0858044910242186, "learning_rate": 1.6046670507945226e-05, "loss": 0.7178, "step": 16540 }, { "epoch": 0.30055934912102283, "grad_norm": 0.9315638112709378, "learning_rate": 1.6042079386997366e-05, "loss": 0.7133, "step": 16550 }, { "epoch": 0.30074095597849776, "grad_norm": 1.0216336538831168, "learning_rate": 1.603748625933313e-05, "loss": 0.7121, "step": 16560 }, { "epoch": 0.3009225628359727, "grad_norm": 0.9445700498541404, "learning_rate": 1.6032891126478007e-05, "loss": 0.7156, "step": 16570 }, { "epoch": 0.3011041696934476, "grad_norm": 0.9182298795902752, "learning_rate": 1.6028293989958147e-05, "loss": 0.7143, "step": 16580 }, { "epoch": 0.3012857765509226, "grad_norm": 0.8943256528659285, "learning_rate": 1.6023694851300368e-05, "loss": 0.7025, "step": 16590 }, { "epoch": 0.3014673834083975, "grad_norm": 0.8987700272518039, "learning_rate": 1.601909371203215e-05, "loss": 0.703, "step": 16600 }, { "epoch": 0.30164899026587244, "grad_norm": 0.972366886635742, "learning_rate": 1.601449057368164e-05, "loss": 0.7127, "step": 16610 }, { "epoch": 0.30183059712334737, "grad_norm": 0.9420632338323939, "learning_rate": 1.6009885437777652e-05, "loss": 0.7085, "step": 16620 }, { "epoch": 0.3020122039808223, "grad_norm": 0.9466570664516806, "learning_rate": 1.6005278305849652e-05, "loss": 0.7185, "step": 16630 }, { "epoch": 0.30219381083829727, "grad_norm": 0.9109137487369283, "learning_rate": 1.600066917942778e-05, "loss": 0.7141, "step": 16640 }, { "epoch": 0.3023754176957722, "grad_norm": 0.9265663480709588, "learning_rate": 1.5996058060042838e-05, "loss": 0.7353, "step": 16650 }, { "epoch": 0.3025570245532471, "grad_norm": 0.9062352650276191, "learning_rate": 1.5991444949226288e-05, "loss": 0.7175, "step": 16660 }, { "epoch": 0.30273863141072205, "grad_norm": 0.9315947859849526, "learning_rate": 1.5986829848510244e-05, "loss": 0.7071, "step": 16670 }, { "epoch": 0.30292023826819703, "grad_norm": 0.9117140859009748, "learning_rate": 1.5982212759427494e-05, "loss": 0.7073, "step": 16680 }, { "epoch": 0.30310184512567195, "grad_norm": 0.921314295704415, "learning_rate": 1.5977593683511487e-05, "loss": 0.7079, "step": 16690 }, { "epoch": 0.3032834519831469, "grad_norm": 0.9274037056059139, "learning_rate": 1.597297262229632e-05, "loss": 0.718, "step": 16700 }, { "epoch": 0.3034650588406218, "grad_norm": 0.9418404221917475, "learning_rate": 1.5968349577316757e-05, "loss": 0.7246, "step": 16710 }, { "epoch": 0.3036466656980968, "grad_norm": 0.9394887485660319, "learning_rate": 1.596372455010822e-05, "loss": 0.7097, "step": 16720 }, { "epoch": 0.3038282725555717, "grad_norm": 0.8942768985874779, "learning_rate": 1.5959097542206792e-05, "loss": 0.7296, "step": 16730 }, { "epoch": 0.30400987941304664, "grad_norm": 0.9093136038124301, "learning_rate": 1.5954468555149206e-05, "loss": 0.6987, "step": 16740 }, { "epoch": 0.30419148627052156, "grad_norm": 0.9461692800653194, "learning_rate": 1.5949837590472857e-05, "loss": 0.7212, "step": 16750 }, { "epoch": 0.3043730931279965, "grad_norm": 0.9015778747535512, "learning_rate": 1.59452046497158e-05, "loss": 0.697, "step": 16760 }, { "epoch": 0.30455469998547147, "grad_norm": 0.9587615384220982, "learning_rate": 1.5940569734416745e-05, "loss": 0.7066, "step": 16770 }, { "epoch": 0.3047363068429464, "grad_norm": 0.8975924075912083, "learning_rate": 1.593593284611506e-05, "loss": 0.7185, "step": 16780 }, { "epoch": 0.3049179137004213, "grad_norm": 0.9051737200793298, "learning_rate": 1.593129398635075e-05, "loss": 0.7077, "step": 16790 }, { "epoch": 0.30509952055789624, "grad_norm": 0.9125623117228762, "learning_rate": 1.5926653156664503e-05, "loss": 0.7248, "step": 16800 }, { "epoch": 0.3052811274153712, "grad_norm": 0.924796955001692, "learning_rate": 1.592201035859764e-05, "loss": 0.7012, "step": 16810 }, { "epoch": 0.30546273427284615, "grad_norm": 0.9821514243816228, "learning_rate": 1.5917365593692147e-05, "loss": 0.7055, "step": 16820 }, { "epoch": 0.3056443411303211, "grad_norm": 0.9133210876089667, "learning_rate": 1.5912718863490655e-05, "loss": 0.7165, "step": 16830 }, { "epoch": 0.305825947987796, "grad_norm": 0.9629609395050128, "learning_rate": 1.5908070169536455e-05, "loss": 0.7164, "step": 16840 }, { "epoch": 0.306007554845271, "grad_norm": 0.9778122108102871, "learning_rate": 1.590341951337349e-05, "loss": 0.7106, "step": 16850 }, { "epoch": 0.3061891617027459, "grad_norm": 0.9513143267663039, "learning_rate": 1.5898766896546348e-05, "loss": 0.6991, "step": 16860 }, { "epoch": 0.30637076856022083, "grad_norm": 1.0086246816421127, "learning_rate": 1.5894112320600273e-05, "loss": 0.7003, "step": 16870 }, { "epoch": 0.30655237541769575, "grad_norm": 0.926189028836909, "learning_rate": 1.588945578708116e-05, "loss": 0.7079, "step": 16880 }, { "epoch": 0.30673398227517074, "grad_norm": 0.9719896754394165, "learning_rate": 1.5884797297535555e-05, "loss": 0.7214, "step": 16890 }, { "epoch": 0.30691558913264566, "grad_norm": 0.8980737256672376, "learning_rate": 1.5880136853510644e-05, "loss": 0.7203, "step": 16900 }, { "epoch": 0.3070971959901206, "grad_norm": 0.9254218249696621, "learning_rate": 1.5875474456554282e-05, "loss": 0.7139, "step": 16910 }, { "epoch": 0.3072788028475955, "grad_norm": 0.9277078861324963, "learning_rate": 1.5870810108214952e-05, "loss": 0.7048, "step": 16920 }, { "epoch": 0.30746040970507044, "grad_norm": 0.9045664278388708, "learning_rate": 1.5866143810041793e-05, "loss": 0.705, "step": 16930 }, { "epoch": 0.3076420165625454, "grad_norm": 0.9775635185579015, "learning_rate": 1.58614755635846e-05, "loss": 0.7268, "step": 16940 }, { "epoch": 0.30782362342002034, "grad_norm": 0.9544010468298767, "learning_rate": 1.5856805370393793e-05, "loss": 0.7119, "step": 16950 }, { "epoch": 0.30800523027749527, "grad_norm": 0.937382910018187, "learning_rate": 1.5852133232020465e-05, "loss": 0.712, "step": 16960 }, { "epoch": 0.3081868371349702, "grad_norm": 0.8813765216589015, "learning_rate": 1.5847459150016343e-05, "loss": 0.7112, "step": 16970 }, { "epoch": 0.3083684439924452, "grad_norm": 0.9380223925342683, "learning_rate": 1.5842783125933793e-05, "loss": 0.7111, "step": 16980 }, { "epoch": 0.3085500508499201, "grad_norm": 0.9058772088933801, "learning_rate": 1.583810516132583e-05, "loss": 0.7125, "step": 16990 }, { "epoch": 0.308731657707395, "grad_norm": 0.9165574502998796, "learning_rate": 1.583342525774613e-05, "loss": 0.7103, "step": 17000 }, { "epoch": 0.30891326456486995, "grad_norm": 0.949124080364805, "learning_rate": 1.5828743416748983e-05, "loss": 0.7213, "step": 17010 }, { "epoch": 0.30909487142234493, "grad_norm": 0.9188339462525144, "learning_rate": 1.5824059639889347e-05, "loss": 0.728, "step": 17020 }, { "epoch": 0.30927647827981986, "grad_norm": 0.9398705632848317, "learning_rate": 1.5819373928722813e-05, "loss": 0.7076, "step": 17030 }, { "epoch": 0.3094580851372948, "grad_norm": 0.9055688099537653, "learning_rate": 1.581468628480561e-05, "loss": 0.6862, "step": 17040 }, { "epoch": 0.3096396919947697, "grad_norm": 0.9098239437663823, "learning_rate": 1.5809996709694624e-05, "loss": 0.7002, "step": 17050 }, { "epoch": 0.3098212988522447, "grad_norm": 0.9091318143436199, "learning_rate": 1.5805305204947366e-05, "loss": 0.7032, "step": 17060 }, { "epoch": 0.3100029057097196, "grad_norm": 0.9446684958883915, "learning_rate": 1.5800611772122e-05, "loss": 0.7174, "step": 17070 }, { "epoch": 0.31018451256719454, "grad_norm": 0.8925626329169566, "learning_rate": 1.579591641277732e-05, "loss": 0.7086, "step": 17080 }, { "epoch": 0.31036611942466946, "grad_norm": 0.9823311271947416, "learning_rate": 1.5791219128472768e-05, "loss": 0.7241, "step": 17090 }, { "epoch": 0.3105477262821444, "grad_norm": 0.9260707835667825, "learning_rate": 1.578651992076842e-05, "loss": 0.7164, "step": 17100 }, { "epoch": 0.31072933313961937, "grad_norm": 0.9166169630568447, "learning_rate": 1.5781818791224997e-05, "loss": 0.7077, "step": 17110 }, { "epoch": 0.3109109399970943, "grad_norm": 0.908634685739896, "learning_rate": 1.5777115741403852e-05, "loss": 0.7011, "step": 17120 }, { "epoch": 0.3110925468545692, "grad_norm": 0.8888377033631195, "learning_rate": 1.577241077286698e-05, "loss": 0.7141, "step": 17130 }, { "epoch": 0.31127415371204414, "grad_norm": 0.8955280610331938, "learning_rate": 1.5767703887177006e-05, "loss": 0.7024, "step": 17140 }, { "epoch": 0.3114557605695191, "grad_norm": 1.0067661547170927, "learning_rate": 1.5762995085897202e-05, "loss": 0.6966, "step": 17150 }, { "epoch": 0.31163736742699405, "grad_norm": 0.9345152863163135, "learning_rate": 1.575828437059147e-05, "loss": 0.7083, "step": 17160 }, { "epoch": 0.311818974284469, "grad_norm": 0.9128734278644499, "learning_rate": 1.5753571742824352e-05, "loss": 0.7207, "step": 17170 }, { "epoch": 0.3120005811419439, "grad_norm": 0.9239335328463933, "learning_rate": 1.5748857204161022e-05, "loss": 0.703, "step": 17180 }, { "epoch": 0.3121821879994189, "grad_norm": 0.9243937241693607, "learning_rate": 1.5744140756167285e-05, "loss": 0.7142, "step": 17190 }, { "epoch": 0.3123637948568938, "grad_norm": 0.9084061397938157, "learning_rate": 1.5739422400409585e-05, "loss": 0.7031, "step": 17200 }, { "epoch": 0.31254540171436873, "grad_norm": 0.8766750259857273, "learning_rate": 1.5734702138455002e-05, "loss": 0.7202, "step": 17210 }, { "epoch": 0.31272700857184366, "grad_norm": 0.9593168054385449, "learning_rate": 1.5729979971871244e-05, "loss": 0.7193, "step": 17220 }, { "epoch": 0.31290861542931864, "grad_norm": 0.9155571570518839, "learning_rate": 1.572525590222665e-05, "loss": 0.7027, "step": 17230 }, { "epoch": 0.31309022228679356, "grad_norm": 0.8685694316994911, "learning_rate": 1.57205299310902e-05, "loss": 0.7113, "step": 17240 }, { "epoch": 0.3132718291442685, "grad_norm": 0.9067647541551068, "learning_rate": 1.5715802060031493e-05, "loss": 0.7065, "step": 17250 }, { "epoch": 0.3134534360017434, "grad_norm": 0.8832149973234334, "learning_rate": 1.571107229062077e-05, "loss": 0.7039, "step": 17260 }, { "epoch": 0.31363504285921834, "grad_norm": 0.9546683210118958, "learning_rate": 1.5706340624428903e-05, "loss": 0.7098, "step": 17270 }, { "epoch": 0.3138166497166933, "grad_norm": 0.8834101166698847, "learning_rate": 1.5701607063027384e-05, "loss": 0.6931, "step": 17280 }, { "epoch": 0.31399825657416824, "grad_norm": 0.9452073585792509, "learning_rate": 1.569687160798834e-05, "loss": 0.7075, "step": 17290 }, { "epoch": 0.31417986343164317, "grad_norm": 0.9386343938897994, "learning_rate": 1.569213426088453e-05, "loss": 0.7044, "step": 17300 }, { "epoch": 0.3143614702891181, "grad_norm": 0.9314971334137477, "learning_rate": 1.568739502328933e-05, "loss": 0.7248, "step": 17310 }, { "epoch": 0.3145430771465931, "grad_norm": 0.9374864674668919, "learning_rate": 1.5682653896776766e-05, "loss": 0.7098, "step": 17320 }, { "epoch": 0.314724684004068, "grad_norm": 0.9340952644895758, "learning_rate": 1.5677910882921464e-05, "loss": 0.7038, "step": 17330 }, { "epoch": 0.3149062908615429, "grad_norm": 0.9275324878473857, "learning_rate": 1.5673165983298694e-05, "loss": 0.709, "step": 17340 }, { "epoch": 0.31508789771901785, "grad_norm": 0.9107986501529293, "learning_rate": 1.5668419199484353e-05, "loss": 0.7031, "step": 17350 }, { "epoch": 0.31526950457649283, "grad_norm": 0.884410752174316, "learning_rate": 1.5663670533054956e-05, "loss": 0.6954, "step": 17360 }, { "epoch": 0.31545111143396776, "grad_norm": 0.9468680335049027, "learning_rate": 1.5658919985587644e-05, "loss": 0.7139, "step": 17370 }, { "epoch": 0.3156327182914427, "grad_norm": 0.983006046559413, "learning_rate": 1.565416755866019e-05, "loss": 0.7119, "step": 17380 }, { "epoch": 0.3158143251489176, "grad_norm": 0.9304901282128774, "learning_rate": 1.5649413253850985e-05, "loss": 0.7312, "step": 17390 }, { "epoch": 0.3159959320063926, "grad_norm": 0.9074669681539792, "learning_rate": 1.5644657072739048e-05, "loss": 0.7093, "step": 17400 }, { "epoch": 0.3161775388638675, "grad_norm": 1.7766059004121044, "learning_rate": 1.563989901690401e-05, "loss": 0.7224, "step": 17410 }, { "epoch": 0.31635914572134244, "grad_norm": 0.885778677088372, "learning_rate": 1.5635139087926142e-05, "loss": 0.7089, "step": 17420 }, { "epoch": 0.31654075257881736, "grad_norm": 0.9266007940805903, "learning_rate": 1.563037728738632e-05, "loss": 0.7159, "step": 17430 }, { "epoch": 0.3167223594362923, "grad_norm": 0.9197227331820884, "learning_rate": 1.562561361686606e-05, "loss": 0.7171, "step": 17440 }, { "epoch": 0.31690396629376727, "grad_norm": 0.8793349144730782, "learning_rate": 1.5620848077947478e-05, "loss": 0.7054, "step": 17450 }, { "epoch": 0.3170855731512422, "grad_norm": 0.8724371838440971, "learning_rate": 1.5616080672213326e-05, "loss": 0.6985, "step": 17460 }, { "epoch": 0.3172671800087171, "grad_norm": 0.9446057199255302, "learning_rate": 1.5611311401246975e-05, "loss": 0.728, "step": 17470 }, { "epoch": 0.31744878686619205, "grad_norm": 0.922534228031742, "learning_rate": 1.560654026663241e-05, "loss": 0.7162, "step": 17480 }, { "epoch": 0.317630393723667, "grad_norm": 0.8959430101629545, "learning_rate": 1.560176726995423e-05, "loss": 0.7062, "step": 17490 }, { "epoch": 0.31781200058114195, "grad_norm": 0.960947781757605, "learning_rate": 1.5596992412797666e-05, "loss": 0.7108, "step": 17500 }, { "epoch": 0.3179936074386169, "grad_norm": 0.8771108109470337, "learning_rate": 1.5592215696748557e-05, "loss": 0.6941, "step": 17510 }, { "epoch": 0.3181752142960918, "grad_norm": 0.9129294355949222, "learning_rate": 1.558743712339337e-05, "loss": 0.7069, "step": 17520 }, { "epoch": 0.3183568211535668, "grad_norm": 0.9329869421867819, "learning_rate": 1.558265669431917e-05, "loss": 0.728, "step": 17530 }, { "epoch": 0.3185384280110417, "grad_norm": 0.9210944162178275, "learning_rate": 1.557787441111366e-05, "loss": 0.708, "step": 17540 }, { "epoch": 0.31872003486851663, "grad_norm": 0.9086793699519191, "learning_rate": 1.5573090275365137e-05, "loss": 0.7159, "step": 17550 }, { "epoch": 0.31890164172599156, "grad_norm": 0.9078095234635042, "learning_rate": 1.5568304288662536e-05, "loss": 0.7054, "step": 17560 }, { "epoch": 0.31908324858346654, "grad_norm": 0.9552429435574511, "learning_rate": 1.5563516452595387e-05, "loss": 0.712, "step": 17570 }, { "epoch": 0.31926485544094146, "grad_norm": 0.8921198902924428, "learning_rate": 1.555872676875385e-05, "loss": 0.7136, "step": 17580 }, { "epoch": 0.3194464622984164, "grad_norm": 0.9714751335734646, "learning_rate": 1.5553935238728687e-05, "loss": 0.7135, "step": 17590 }, { "epoch": 0.3196280691558913, "grad_norm": 0.9185528475969882, "learning_rate": 1.5549141864111278e-05, "loss": 0.7069, "step": 17600 }, { "epoch": 0.31980967601336624, "grad_norm": 0.9566012300058455, "learning_rate": 1.5544346646493616e-05, "loss": 0.7103, "step": 17610 }, { "epoch": 0.3199912828708412, "grad_norm": 0.9414032267961874, "learning_rate": 1.553954958746831e-05, "loss": 0.6995, "step": 17620 }, { "epoch": 0.32017288972831615, "grad_norm": 0.9072166546975705, "learning_rate": 1.5534750688628566e-05, "loss": 0.702, "step": 17630 }, { "epoch": 0.32035449658579107, "grad_norm": 0.9427450137115096, "learning_rate": 1.5529949951568217e-05, "loss": 0.7294, "step": 17640 }, { "epoch": 0.320536103443266, "grad_norm": 0.9349109983867261, "learning_rate": 1.5525147377881696e-05, "loss": 0.7104, "step": 17650 }, { "epoch": 0.320717710300741, "grad_norm": 0.9289177345539453, "learning_rate": 1.5520342969164056e-05, "loss": 0.7132, "step": 17660 }, { "epoch": 0.3208993171582159, "grad_norm": 0.8908679342501178, "learning_rate": 1.5515536727010956e-05, "loss": 0.7229, "step": 17670 }, { "epoch": 0.3210809240156908, "grad_norm": 0.9316830288181144, "learning_rate": 1.5510728653018655e-05, "loss": 0.7218, "step": 17680 }, { "epoch": 0.32126253087316575, "grad_norm": 0.9024395110915471, "learning_rate": 1.5505918748784026e-05, "loss": 0.701, "step": 17690 }, { "epoch": 0.32144413773064073, "grad_norm": 0.857429264961675, "learning_rate": 1.5501107015904557e-05, "loss": 0.7032, "step": 17700 }, { "epoch": 0.32162574458811566, "grad_norm": 0.9142050765087832, "learning_rate": 1.5496293455978337e-05, "loss": 0.6989, "step": 17710 }, { "epoch": 0.3218073514455906, "grad_norm": 0.9125424399899326, "learning_rate": 1.549147807060406e-05, "loss": 0.7083, "step": 17720 }, { "epoch": 0.3219889583030655, "grad_norm": 0.9115715415668415, "learning_rate": 1.548666086138103e-05, "loss": 0.7107, "step": 17730 }, { "epoch": 0.3221705651605405, "grad_norm": 0.8796737946562643, "learning_rate": 1.5481841829909153e-05, "loss": 0.7034, "step": 17740 }, { "epoch": 0.3223521720180154, "grad_norm": 0.8800535530328655, "learning_rate": 1.5477020977788945e-05, "loss": 0.7104, "step": 17750 }, { "epoch": 0.32253377887549034, "grad_norm": 0.8837244372967322, "learning_rate": 1.547219830662152e-05, "loss": 0.7072, "step": 17760 }, { "epoch": 0.32271538573296527, "grad_norm": 0.8968783187638115, "learning_rate": 1.5467373818008604e-05, "loss": 0.7, "step": 17770 }, { "epoch": 0.3228969925904402, "grad_norm": 0.8778329890777672, "learning_rate": 1.546254751355252e-05, "loss": 0.7001, "step": 17780 }, { "epoch": 0.32307859944791517, "grad_norm": 0.8945661900395402, "learning_rate": 1.5457719394856204e-05, "loss": 0.7162, "step": 17790 }, { "epoch": 0.3232602063053901, "grad_norm": 0.8801835710920707, "learning_rate": 1.545288946352318e-05, "loss": 0.7186, "step": 17800 }, { "epoch": 0.323441813162865, "grad_norm": 0.90224605586633, "learning_rate": 1.5448057721157584e-05, "loss": 0.7021, "step": 17810 }, { "epoch": 0.32362342002033995, "grad_norm": 0.9480404418275851, "learning_rate": 1.5443224169364154e-05, "loss": 0.7151, "step": 17820 }, { "epoch": 0.3238050268778149, "grad_norm": 0.9162566772955169, "learning_rate": 1.5438388809748222e-05, "loss": 0.7092, "step": 17830 }, { "epoch": 0.32398663373528985, "grad_norm": 0.8916187080214001, "learning_rate": 1.5433551643915724e-05, "loss": 0.7118, "step": 17840 }, { "epoch": 0.3241682405927648, "grad_norm": 0.9397502466667482, "learning_rate": 1.54287126734732e-05, "loss": 0.705, "step": 17850 }, { "epoch": 0.3243498474502397, "grad_norm": 0.9105658964576254, "learning_rate": 1.5423871900027778e-05, "loss": 0.6923, "step": 17860 }, { "epoch": 0.3245314543077147, "grad_norm": 1.0618793268957607, "learning_rate": 1.5419029325187202e-05, "loss": 0.681, "step": 17870 }, { "epoch": 0.3247130611651896, "grad_norm": 0.9404388119327081, "learning_rate": 1.54141849505598e-05, "loss": 0.7166, "step": 17880 }, { "epoch": 0.32489466802266453, "grad_norm": 0.8773932299603836, "learning_rate": 1.5409338777754504e-05, "loss": 0.7069, "step": 17890 }, { "epoch": 0.32507627488013946, "grad_norm": 0.9005981986280942, "learning_rate": 1.5404490808380842e-05, "loss": 0.6963, "step": 17900 }, { "epoch": 0.3252578817376144, "grad_norm": 0.9166575655197743, "learning_rate": 1.5399641044048935e-05, "loss": 0.711, "step": 17910 }, { "epoch": 0.32543948859508937, "grad_norm": 0.8950797566166928, "learning_rate": 1.5394789486369507e-05, "loss": 0.7002, "step": 17920 }, { "epoch": 0.3256210954525643, "grad_norm": 0.9302922813178438, "learning_rate": 1.5389936136953876e-05, "loss": 0.7111, "step": 17930 }, { "epoch": 0.3258027023100392, "grad_norm": 0.9325888400409026, "learning_rate": 1.5385080997413948e-05, "loss": 0.7042, "step": 17940 }, { "epoch": 0.32598430916751414, "grad_norm": 0.9279972412551596, "learning_rate": 1.538022406936223e-05, "loss": 0.7156, "step": 17950 }, { "epoch": 0.3261659160249891, "grad_norm": 0.9373100063124827, "learning_rate": 1.5375365354411825e-05, "loss": 0.719, "step": 17960 }, { "epoch": 0.32634752288246405, "grad_norm": 0.9304871165948916, "learning_rate": 1.5370504854176426e-05, "loss": 0.7269, "step": 17970 }, { "epoch": 0.326529129739939, "grad_norm": 0.8776806547132692, "learning_rate": 1.536564257027031e-05, "loss": 0.7223, "step": 17980 }, { "epoch": 0.3267107365974139, "grad_norm": 0.9366460226265837, "learning_rate": 1.536077850430837e-05, "loss": 0.6962, "step": 17990 }, { "epoch": 0.3268923434548889, "grad_norm": 0.9240120959267047, "learning_rate": 1.5355912657906068e-05, "loss": 0.7013, "step": 18000 }, { "epoch": 0.3270739503123638, "grad_norm": 0.8878549501765338, "learning_rate": 1.5351045032679465e-05, "loss": 0.7099, "step": 18010 }, { "epoch": 0.32725555716983873, "grad_norm": 0.9467097054847635, "learning_rate": 1.5346175630245216e-05, "loss": 0.717, "step": 18020 }, { "epoch": 0.32743716402731365, "grad_norm": 0.8843252181371212, "learning_rate": 1.534130445222057e-05, "loss": 0.7063, "step": 18030 }, { "epoch": 0.32761877088478863, "grad_norm": 0.9892133356338016, "learning_rate": 1.5336431500223346e-05, "loss": 0.7028, "step": 18040 }, { "epoch": 0.32780037774226356, "grad_norm": 0.8943408817429143, "learning_rate": 1.533155677587198e-05, "loss": 0.7176, "step": 18050 }, { "epoch": 0.3279819845997385, "grad_norm": 0.9253082956033282, "learning_rate": 1.5326680280785468e-05, "loss": 0.7311, "step": 18060 }, { "epoch": 0.3281635914572134, "grad_norm": 0.8826249078126005, "learning_rate": 1.532180201658342e-05, "loss": 0.6994, "step": 18070 }, { "epoch": 0.32834519831468834, "grad_norm": 0.879758444670623, "learning_rate": 1.5316921984886018e-05, "loss": 0.7032, "step": 18080 }, { "epoch": 0.3285268051721633, "grad_norm": 0.9355990226339281, "learning_rate": 1.5312040187314036e-05, "loss": 0.7189, "step": 18090 }, { "epoch": 0.32870841202963824, "grad_norm": 0.9140710409320092, "learning_rate": 1.5307156625488828e-05, "loss": 0.7035, "step": 18100 }, { "epoch": 0.32889001888711317, "grad_norm": 0.9808970683308282, "learning_rate": 1.5302271301032346e-05, "loss": 0.7119, "step": 18110 }, { "epoch": 0.3290716257445881, "grad_norm": 0.922139785485455, "learning_rate": 1.529738421556712e-05, "loss": 0.7012, "step": 18120 }, { "epoch": 0.3292532326020631, "grad_norm": 0.8781637468749394, "learning_rate": 1.5292495370716264e-05, "loss": 0.6946, "step": 18130 }, { "epoch": 0.329434839459538, "grad_norm": 0.9534375252144114, "learning_rate": 1.528760476810348e-05, "loss": 0.7016, "step": 18140 }, { "epoch": 0.3296164463170129, "grad_norm": 0.8796590427400457, "learning_rate": 1.5282712409353047e-05, "loss": 0.7225, "step": 18150 }, { "epoch": 0.32979805317448785, "grad_norm": 0.8461516120223458, "learning_rate": 1.527781829608984e-05, "loss": 0.6977, "step": 18160 }, { "epoch": 0.32997966003196283, "grad_norm": 0.9444794388281712, "learning_rate": 1.5272922429939305e-05, "loss": 0.7025, "step": 18170 }, { "epoch": 0.33016126688943775, "grad_norm": 1.2188903719766089, "learning_rate": 1.526802481252747e-05, "loss": 0.6951, "step": 18180 }, { "epoch": 0.3303428737469127, "grad_norm": 0.9369850825242686, "learning_rate": 1.5263125445480957e-05, "loss": 0.6955, "step": 18190 }, { "epoch": 0.3305244806043876, "grad_norm": 0.9293146232951291, "learning_rate": 1.5258224330426954e-05, "loss": 0.7051, "step": 18200 }, { "epoch": 0.3307060874618626, "grad_norm": 0.8781981859696498, "learning_rate": 1.5253321468993242e-05, "loss": 0.7115, "step": 18210 }, { "epoch": 0.3308876943193375, "grad_norm": 0.8903381893616078, "learning_rate": 1.5248416862808167e-05, "loss": 0.7095, "step": 18220 }, { "epoch": 0.33106930117681244, "grad_norm": 0.8767203730182003, "learning_rate": 1.5243510513500676e-05, "loss": 0.7079, "step": 18230 }, { "epoch": 0.33125090803428736, "grad_norm": 0.9083404563414827, "learning_rate": 1.5238602422700273e-05, "loss": 0.7098, "step": 18240 }, { "epoch": 0.3314325148917623, "grad_norm": 0.928773459428894, "learning_rate": 1.5233692592037054e-05, "loss": 0.7243, "step": 18250 }, { "epoch": 0.33161412174923727, "grad_norm": 0.9423088420361071, "learning_rate": 1.5228781023141688e-05, "loss": 0.7153, "step": 18260 }, { "epoch": 0.3317957286067122, "grad_norm": 0.9087945554053084, "learning_rate": 1.5223867717645427e-05, "loss": 0.7087, "step": 18270 }, { "epoch": 0.3319773354641871, "grad_norm": 5.926453467620992, "learning_rate": 1.5218952677180085e-05, "loss": 0.7053, "step": 18280 }, { "epoch": 0.33215894232166204, "grad_norm": 0.9060648086585995, "learning_rate": 1.5214035903378073e-05, "loss": 0.7004, "step": 18290 }, { "epoch": 0.332340549179137, "grad_norm": 0.8584687159839329, "learning_rate": 1.520911739787236e-05, "loss": 0.6901, "step": 18300 }, { "epoch": 0.33252215603661195, "grad_norm": 0.9184618319464403, "learning_rate": 1.5204197162296501e-05, "loss": 0.7075, "step": 18310 }, { "epoch": 0.3327037628940869, "grad_norm": 0.8751662450899831, "learning_rate": 1.5199275198284623e-05, "loss": 0.6879, "step": 18320 }, { "epoch": 0.3328853697515618, "grad_norm": 0.9577404254427536, "learning_rate": 1.5194351507471417e-05, "loss": 0.6996, "step": 18330 }, { "epoch": 0.3330669766090368, "grad_norm": 0.9148762871633378, "learning_rate": 1.5189426091492166e-05, "loss": 0.7027, "step": 18340 }, { "epoch": 0.3332485834665117, "grad_norm": 0.8857476715282376, "learning_rate": 1.5184498951982711e-05, "loss": 0.7087, "step": 18350 }, { "epoch": 0.33343019032398663, "grad_norm": 0.9127766894327483, "learning_rate": 1.5179570090579475e-05, "loss": 0.6897, "step": 18360 }, { "epoch": 0.33361179718146156, "grad_norm": 0.8959422141415937, "learning_rate": 1.5174639508919442e-05, "loss": 0.686, "step": 18370 }, { "epoch": 0.33379340403893654, "grad_norm": 0.8930897019643464, "learning_rate": 1.516970720864018e-05, "loss": 0.709, "step": 18380 }, { "epoch": 0.33397501089641146, "grad_norm": 0.8881490646480427, "learning_rate": 1.5164773191379819e-05, "loss": 0.6978, "step": 18390 }, { "epoch": 0.3341566177538864, "grad_norm": 0.9274952193394614, "learning_rate": 1.5159837458777062e-05, "loss": 0.709, "step": 18400 }, { "epoch": 0.3343382246113613, "grad_norm": 0.9366473346543497, "learning_rate": 1.515490001247118e-05, "loss": 0.6968, "step": 18410 }, { "epoch": 0.33451983146883624, "grad_norm": 0.8918647363135108, "learning_rate": 1.514996085410202e-05, "loss": 0.7011, "step": 18420 }, { "epoch": 0.3347014383263112, "grad_norm": 0.9226350526907883, "learning_rate": 1.514501998530999e-05, "loss": 0.7036, "step": 18430 }, { "epoch": 0.33488304518378614, "grad_norm": 0.8956654338602794, "learning_rate": 1.5140077407736065e-05, "loss": 0.7076, "step": 18440 }, { "epoch": 0.33506465204126107, "grad_norm": 0.9377494096649608, "learning_rate": 1.5135133123021795e-05, "loss": 0.7086, "step": 18450 }, { "epoch": 0.335246258898736, "grad_norm": 0.9035740048455296, "learning_rate": 1.5130187132809288e-05, "loss": 0.6876, "step": 18460 }, { "epoch": 0.335427865756211, "grad_norm": 0.9310579170487959, "learning_rate": 1.5125239438741232e-05, "loss": 0.7, "step": 18470 }, { "epoch": 0.3356094726136859, "grad_norm": 0.8999959524887744, "learning_rate": 1.5120290042460863e-05, "loss": 0.7107, "step": 18480 }, { "epoch": 0.3357910794711608, "grad_norm": 0.8883085608374318, "learning_rate": 1.5115338945612e-05, "loss": 0.6983, "step": 18490 }, { "epoch": 0.33597268632863575, "grad_norm": 0.9052721432335941, "learning_rate": 1.5110386149839014e-05, "loss": 0.7019, "step": 18500 }, { "epoch": 0.33615429318611073, "grad_norm": 0.8566027251839767, "learning_rate": 1.5105431656786844e-05, "loss": 0.7017, "step": 18510 }, { "epoch": 0.33633590004358566, "grad_norm": 0.8759117734661204, "learning_rate": 1.5100475468100992e-05, "loss": 0.7125, "step": 18520 }, { "epoch": 0.3365175069010606, "grad_norm": 0.8567305262028274, "learning_rate": 1.5095517585427533e-05, "loss": 0.699, "step": 18530 }, { "epoch": 0.3366991137585355, "grad_norm": 0.9144097540318662, "learning_rate": 1.5090558010413085e-05, "loss": 0.695, "step": 18540 }, { "epoch": 0.3368807206160105, "grad_norm": 0.8835994345221517, "learning_rate": 1.508559674470485e-05, "loss": 0.6901, "step": 18550 }, { "epoch": 0.3370623274734854, "grad_norm": 0.973244414566249, "learning_rate": 1.5080633789950571e-05, "loss": 0.7065, "step": 18560 }, { "epoch": 0.33724393433096034, "grad_norm": 0.9013442129384119, "learning_rate": 1.5075669147798568e-05, "loss": 0.7074, "step": 18570 }, { "epoch": 0.33742554118843526, "grad_norm": 0.9139106364532741, "learning_rate": 1.5070702819897713e-05, "loss": 0.7065, "step": 18580 }, { "epoch": 0.3376071480459102, "grad_norm": 0.8994669983909697, "learning_rate": 1.506573480789744e-05, "loss": 0.7005, "step": 18590 }, { "epoch": 0.33778875490338517, "grad_norm": 0.8885475733062863, "learning_rate": 1.5060765113447747e-05, "loss": 0.7098, "step": 18600 }, { "epoch": 0.3379703617608601, "grad_norm": 0.9170954927102967, "learning_rate": 1.5055793738199178e-05, "loss": 0.6963, "step": 18610 }, { "epoch": 0.338151968618335, "grad_norm": 0.8751456437255836, "learning_rate": 1.5050820683802849e-05, "loss": 0.6978, "step": 18620 }, { "epoch": 0.33833357547580994, "grad_norm": 0.8874633496334576, "learning_rate": 1.5045845951910428e-05, "loss": 0.7156, "step": 18630 }, { "epoch": 0.3385151823332849, "grad_norm": 0.877894809734989, "learning_rate": 1.5040869544174137e-05, "loss": 0.6898, "step": 18640 }, { "epoch": 0.33869678919075985, "grad_norm": 0.911380100017737, "learning_rate": 1.5035891462246759e-05, "loss": 0.7048, "step": 18650 }, { "epoch": 0.3388783960482348, "grad_norm": 0.8805448344943133, "learning_rate": 1.5030911707781632e-05, "loss": 0.7043, "step": 18660 }, { "epoch": 0.3390600029057097, "grad_norm": 0.9327666325110349, "learning_rate": 1.502593028243265e-05, "loss": 0.721, "step": 18670 }, { "epoch": 0.3392416097631847, "grad_norm": 0.9245208181033441, "learning_rate": 1.5020947187854263e-05, "loss": 0.7094, "step": 18680 }, { "epoch": 0.3394232166206596, "grad_norm": 0.9226163290299733, "learning_rate": 1.5015962425701469e-05, "loss": 0.7194, "step": 18690 }, { "epoch": 0.33960482347813453, "grad_norm": 0.929737590057542, "learning_rate": 1.5010975997629829e-05, "loss": 0.7011, "step": 18700 }, { "epoch": 0.33978643033560946, "grad_norm": 0.8873914912036543, "learning_rate": 1.5005987905295452e-05, "loss": 0.7183, "step": 18710 }, { "epoch": 0.33996803719308444, "grad_norm": 0.8905446286382788, "learning_rate": 1.5000998150354998e-05, "loss": 0.6993, "step": 18720 }, { "epoch": 0.34014964405055936, "grad_norm": 0.917016330640876, "learning_rate": 1.4996006734465682e-05, "loss": 0.7051, "step": 18730 }, { "epoch": 0.3403312509080343, "grad_norm": 0.9422959138476362, "learning_rate": 1.4991013659285272e-05, "loss": 0.702, "step": 18740 }, { "epoch": 0.3405128577655092, "grad_norm": 0.90379491085173, "learning_rate": 1.4986018926472086e-05, "loss": 0.7066, "step": 18750 }, { "epoch": 0.34069446462298414, "grad_norm": 0.9150082838526967, "learning_rate": 1.498102253768499e-05, "loss": 0.7013, "step": 18760 }, { "epoch": 0.3408760714804591, "grad_norm": 0.8572323714348439, "learning_rate": 1.4976024494583406e-05, "loss": 0.6899, "step": 18770 }, { "epoch": 0.34105767833793404, "grad_norm": 0.9233145714427113, "learning_rate": 1.4971024798827296e-05, "loss": 0.7158, "step": 18780 }, { "epoch": 0.34123928519540897, "grad_norm": 0.8997793438373385, "learning_rate": 1.4966023452077178e-05, "loss": 0.7111, "step": 18790 }, { "epoch": 0.3414208920528839, "grad_norm": 0.8846517151954715, "learning_rate": 1.496102045599412e-05, "loss": 0.7185, "step": 18800 }, { "epoch": 0.3416024989103589, "grad_norm": 0.8751775404411573, "learning_rate": 1.495601581223973e-05, "loss": 0.695, "step": 18810 }, { "epoch": 0.3417841057678338, "grad_norm": 0.870177344578466, "learning_rate": 1.4951009522476172e-05, "loss": 0.7054, "step": 18820 }, { "epoch": 0.3419657126253087, "grad_norm": 0.9188887354872516, "learning_rate": 1.4946001588366148e-05, "loss": 0.7021, "step": 18830 }, { "epoch": 0.34214731948278365, "grad_norm": 0.9014057242552297, "learning_rate": 1.4940992011572912e-05, "loss": 0.7016, "step": 18840 }, { "epoch": 0.34232892634025863, "grad_norm": 0.9561016779605817, "learning_rate": 1.4935980793760263e-05, "loss": 0.7254, "step": 18850 }, { "epoch": 0.34251053319773356, "grad_norm": 0.8534209902364278, "learning_rate": 1.4930967936592544e-05, "loss": 0.702, "step": 18860 }, { "epoch": 0.3426921400552085, "grad_norm": 0.8677543027880482, "learning_rate": 1.4925953441734642e-05, "loss": 0.6992, "step": 18870 }, { "epoch": 0.3428737469126834, "grad_norm": 0.8884787326442045, "learning_rate": 1.4920937310851987e-05, "loss": 0.6991, "step": 18880 }, { "epoch": 0.3430553537701584, "grad_norm": 0.912155082658895, "learning_rate": 1.4915919545610553e-05, "loss": 0.7076, "step": 18890 }, { "epoch": 0.3432369606276333, "grad_norm": 0.9401405690718091, "learning_rate": 1.4910900147676864e-05, "loss": 0.7008, "step": 18900 }, { "epoch": 0.34341856748510824, "grad_norm": 0.896026577579178, "learning_rate": 1.4905879118717971e-05, "loss": 0.6921, "step": 18910 }, { "epoch": 0.34360017434258316, "grad_norm": 0.8708525303909046, "learning_rate": 1.4900856460401485e-05, "loss": 0.6852, "step": 18920 }, { "epoch": 0.3437817812000581, "grad_norm": 0.8708128657578758, "learning_rate": 1.4895832174395542e-05, "loss": 0.6899, "step": 18930 }, { "epoch": 0.34396338805753307, "grad_norm": 0.9283365284754602, "learning_rate": 1.4890806262368824e-05, "loss": 0.7001, "step": 18940 }, { "epoch": 0.344144994915008, "grad_norm": 0.8769567453939769, "learning_rate": 1.488577872599056e-05, "loss": 0.6893, "step": 18950 }, { "epoch": 0.3443266017724829, "grad_norm": 0.94296003683109, "learning_rate": 1.4880749566930512e-05, "loss": 0.7047, "step": 18960 }, { "epoch": 0.34450820862995785, "grad_norm": 0.9112111437024255, "learning_rate": 1.487571878685898e-05, "loss": 0.7181, "step": 18970 }, { "epoch": 0.3446898154874328, "grad_norm": 0.897880619805044, "learning_rate": 1.4870686387446802e-05, "loss": 0.7101, "step": 18980 }, { "epoch": 0.34487142234490775, "grad_norm": 0.9430209812218607, "learning_rate": 1.4865652370365357e-05, "loss": 0.7018, "step": 18990 }, { "epoch": 0.3450530292023827, "grad_norm": 0.8890735181594753, "learning_rate": 1.4860616737286564e-05, "loss": 0.7192, "step": 19000 }, { "epoch": 0.3452346360598576, "grad_norm": 0.9107037322243963, "learning_rate": 1.4855579489882872e-05, "loss": 0.7036, "step": 19010 }, { "epoch": 0.3454162429173326, "grad_norm": 0.8626352154169222, "learning_rate": 1.4850540629827271e-05, "loss": 0.6943, "step": 19020 }, { "epoch": 0.3455978497748075, "grad_norm": 0.9264044329814625, "learning_rate": 1.4845500158793282e-05, "loss": 0.7005, "step": 19030 }, { "epoch": 0.34577945663228243, "grad_norm": 0.9676218192970273, "learning_rate": 1.4840458078454964e-05, "loss": 0.6955, "step": 19040 }, { "epoch": 0.34596106348975736, "grad_norm": 0.9876666199807795, "learning_rate": 1.483541439048691e-05, "loss": 0.7047, "step": 19050 }, { "epoch": 0.3461426703472323, "grad_norm": 0.9048251847307512, "learning_rate": 1.483036909656425e-05, "loss": 0.7002, "step": 19060 }, { "epoch": 0.34632427720470726, "grad_norm": 0.9203571261312538, "learning_rate": 1.4825322198362643e-05, "loss": 0.7016, "step": 19070 }, { "epoch": 0.3465058840621822, "grad_norm": 0.9143826516732294, "learning_rate": 1.482027369755828e-05, "loss": 0.6955, "step": 19080 }, { "epoch": 0.3466874909196571, "grad_norm": 0.942633937007917, "learning_rate": 1.481522359582789e-05, "loss": 0.7161, "step": 19090 }, { "epoch": 0.34686909777713204, "grad_norm": 0.9300222577010202, "learning_rate": 1.4810171894848728e-05, "loss": 0.6844, "step": 19100 }, { "epoch": 0.347050704634607, "grad_norm": 0.8731417340810176, "learning_rate": 1.4805118596298587e-05, "loss": 0.7062, "step": 19110 }, { "epoch": 0.34723231149208195, "grad_norm": 0.9039206660929114, "learning_rate": 1.4800063701855777e-05, "loss": 0.6997, "step": 19120 }, { "epoch": 0.34741391834955687, "grad_norm": 0.9057922587564885, "learning_rate": 1.4795007213199157e-05, "loss": 0.7033, "step": 19130 }, { "epoch": 0.3475955252070318, "grad_norm": 0.9457457670213906, "learning_rate": 1.4789949132008099e-05, "loss": 0.6887, "step": 19140 }, { "epoch": 0.3477771320645068, "grad_norm": 0.8803056319979922, "learning_rate": 1.4784889459962515e-05, "loss": 0.7031, "step": 19150 }, { "epoch": 0.3479587389219817, "grad_norm": 1.3836721388369442, "learning_rate": 1.4779828198742836e-05, "loss": 0.7135, "step": 19160 }, { "epoch": 0.34814034577945663, "grad_norm": 0.8703074628876352, "learning_rate": 1.4774765350030028e-05, "loss": 0.6843, "step": 19170 }, { "epoch": 0.34832195263693155, "grad_norm": 0.909906708847952, "learning_rate": 1.4769700915505585e-05, "loss": 0.6884, "step": 19180 }, { "epoch": 0.34850355949440653, "grad_norm": 0.8650438511623251, "learning_rate": 1.4764634896851518e-05, "loss": 0.6865, "step": 19190 }, { "epoch": 0.34868516635188146, "grad_norm": 0.9137274339079956, "learning_rate": 1.4759567295750376e-05, "loss": 0.7049, "step": 19200 }, { "epoch": 0.3488667732093564, "grad_norm": 0.8834265840459177, "learning_rate": 1.4754498113885225e-05, "loss": 0.6962, "step": 19210 }, { "epoch": 0.3490483800668313, "grad_norm": 0.8738619791840353, "learning_rate": 1.4749427352939663e-05, "loss": 0.7034, "step": 19220 }, { "epoch": 0.34922998692430623, "grad_norm": 0.8797117424369572, "learning_rate": 1.4744355014597806e-05, "loss": 0.696, "step": 19230 }, { "epoch": 0.3494115937817812, "grad_norm": 0.8843228028059947, "learning_rate": 1.4739281100544298e-05, "loss": 0.7007, "step": 19240 }, { "epoch": 0.34959320063925614, "grad_norm": 0.8803833067694229, "learning_rate": 1.4734205612464304e-05, "loss": 0.6968, "step": 19250 }, { "epoch": 0.34977480749673107, "grad_norm": 0.8701058572227737, "learning_rate": 1.472912855204351e-05, "loss": 0.6933, "step": 19260 }, { "epoch": 0.349956414354206, "grad_norm": 0.8603345366586242, "learning_rate": 1.4724049920968133e-05, "loss": 0.6959, "step": 19270 }, { "epoch": 0.35013802121168097, "grad_norm": 0.8880799823733687, "learning_rate": 1.4718969720924901e-05, "loss": 0.7042, "step": 19280 }, { "epoch": 0.3503196280691559, "grad_norm": 0.9158396221032403, "learning_rate": 1.4713887953601067e-05, "loss": 0.7022, "step": 19290 }, { "epoch": 0.3505012349266308, "grad_norm": 0.9148206419376745, "learning_rate": 1.4708804620684408e-05, "loss": 0.6963, "step": 19300 }, { "epoch": 0.35068284178410575, "grad_norm": 0.8797514671634417, "learning_rate": 1.470371972386322e-05, "loss": 0.6886, "step": 19310 }, { "epoch": 0.35086444864158073, "grad_norm": 0.8924817913185517, "learning_rate": 1.469863326482631e-05, "loss": 0.7047, "step": 19320 }, { "epoch": 0.35104605549905565, "grad_norm": 0.8941879872705926, "learning_rate": 1.4693545245263017e-05, "loss": 0.6995, "step": 19330 }, { "epoch": 0.3512276623565306, "grad_norm": 0.916036166001759, "learning_rate": 1.4688455666863188e-05, "loss": 0.6826, "step": 19340 }, { "epoch": 0.3514092692140055, "grad_norm": 0.9044328374863327, "learning_rate": 1.4683364531317195e-05, "loss": 0.7058, "step": 19350 }, { "epoch": 0.3515908760714805, "grad_norm": 0.865507392899269, "learning_rate": 1.4678271840315919e-05, "loss": 0.6982, "step": 19360 }, { "epoch": 0.3517724829289554, "grad_norm": 0.9133075375102648, "learning_rate": 1.4673177595550763e-05, "loss": 0.7057, "step": 19370 }, { "epoch": 0.35195408978643034, "grad_norm": 0.9047915612405554, "learning_rate": 1.4668081798713649e-05, "loss": 0.6871, "step": 19380 }, { "epoch": 0.35213569664390526, "grad_norm": 0.8897528969311442, "learning_rate": 1.4662984451497007e-05, "loss": 0.6967, "step": 19390 }, { "epoch": 0.3523173035013802, "grad_norm": 0.8695305660044379, "learning_rate": 1.4657885555593787e-05, "loss": 0.7021, "step": 19400 }, { "epoch": 0.35249891035885517, "grad_norm": 0.9095529034865742, "learning_rate": 1.4652785112697451e-05, "loss": 0.7013, "step": 19410 }, { "epoch": 0.3526805172163301, "grad_norm": 0.8933780673641606, "learning_rate": 1.4647683124501979e-05, "loss": 0.6865, "step": 19420 }, { "epoch": 0.352862124073805, "grad_norm": 0.8658718696472973, "learning_rate": 1.464257959270186e-05, "loss": 0.6984, "step": 19430 }, { "epoch": 0.35304373093127994, "grad_norm": 0.8950099628193036, "learning_rate": 1.4637474518992092e-05, "loss": 0.7079, "step": 19440 }, { "epoch": 0.3532253377887549, "grad_norm": 0.9384373784699833, "learning_rate": 1.4632367905068194e-05, "loss": 0.7048, "step": 19450 }, { "epoch": 0.35340694464622985, "grad_norm": 0.9302584105088996, "learning_rate": 1.4627259752626193e-05, "loss": 0.6912, "step": 19460 }, { "epoch": 0.3535885515037048, "grad_norm": 0.9123559615152815, "learning_rate": 1.4622150063362623e-05, "loss": 0.7067, "step": 19470 }, { "epoch": 0.3537701583611797, "grad_norm": 0.8867247216405527, "learning_rate": 1.4617038838974535e-05, "loss": 0.6997, "step": 19480 }, { "epoch": 0.3539517652186547, "grad_norm": 0.8704262995534483, "learning_rate": 1.4611926081159484e-05, "loss": 0.6774, "step": 19490 }, { "epoch": 0.3541333720761296, "grad_norm": 0.9019120003186317, "learning_rate": 1.4606811791615538e-05, "loss": 0.7243, "step": 19500 }, { "epoch": 0.35431497893360453, "grad_norm": 0.8848770618119429, "learning_rate": 1.4601695972041274e-05, "loss": 0.7043, "step": 19510 }, { "epoch": 0.35449658579107945, "grad_norm": 0.8825068742652087, "learning_rate": 1.4596578624135771e-05, "loss": 0.699, "step": 19520 }, { "epoch": 0.35467819264855444, "grad_norm": 0.9090687503540137, "learning_rate": 1.4591459749598627e-05, "loss": 0.7032, "step": 19530 }, { "epoch": 0.35485979950602936, "grad_norm": 0.8982228048278943, "learning_rate": 1.4586339350129938e-05, "loss": 0.6975, "step": 19540 }, { "epoch": 0.3550414063635043, "grad_norm": 0.861576003589533, "learning_rate": 1.4581217427430308e-05, "loss": 0.6988, "step": 19550 }, { "epoch": 0.3552230132209792, "grad_norm": 0.883914473288753, "learning_rate": 1.4576093983200848e-05, "loss": 0.7004, "step": 19560 }, { "epoch": 0.35540462007845414, "grad_norm": 0.9041766714699021, "learning_rate": 1.457096901914317e-05, "loss": 0.7046, "step": 19570 }, { "epoch": 0.3555862269359291, "grad_norm": 0.8982747160266341, "learning_rate": 1.4565842536959402e-05, "loss": 0.6957, "step": 19580 }, { "epoch": 0.35576783379340404, "grad_norm": 0.9017037801060229, "learning_rate": 1.4560714538352163e-05, "loss": 0.7179, "step": 19590 }, { "epoch": 0.35594944065087897, "grad_norm": 0.8864607737204093, "learning_rate": 1.4555585025024588e-05, "loss": 0.6948, "step": 19600 }, { "epoch": 0.3561310475083539, "grad_norm": 0.8770414791481408, "learning_rate": 1.4550453998680302e-05, "loss": 0.7096, "step": 19610 }, { "epoch": 0.3563126543658289, "grad_norm": 0.9361590382240733, "learning_rate": 1.4545321461023445e-05, "loss": 0.7083, "step": 19620 }, { "epoch": 0.3564942612233038, "grad_norm": 0.8666821376607527, "learning_rate": 1.4540187413758649e-05, "loss": 0.7162, "step": 19630 }, { "epoch": 0.3566758680807787, "grad_norm": 0.8855167398434001, "learning_rate": 1.4535051858591054e-05, "loss": 0.7153, "step": 19640 }, { "epoch": 0.35685747493825365, "grad_norm": 0.8968136757135557, "learning_rate": 1.4529914797226296e-05, "loss": 0.6849, "step": 19650 }, { "epoch": 0.35703908179572863, "grad_norm": 0.92164232059218, "learning_rate": 1.4524776231370519e-05, "loss": 0.6947, "step": 19660 }, { "epoch": 0.35722068865320356, "grad_norm": 0.9066829728104804, "learning_rate": 1.4519636162730355e-05, "loss": 0.6928, "step": 19670 }, { "epoch": 0.3574022955106785, "grad_norm": 0.8849683508983426, "learning_rate": 1.4514494593012945e-05, "loss": 0.716, "step": 19680 }, { "epoch": 0.3575839023681534, "grad_norm": 0.8610131460463074, "learning_rate": 1.4509351523925924e-05, "loss": 0.6881, "step": 19690 }, { "epoch": 0.3577655092256284, "grad_norm": 0.8960390896472187, "learning_rate": 1.4504206957177423e-05, "loss": 0.6924, "step": 19700 }, { "epoch": 0.3579471160831033, "grad_norm": 0.8578914117107462, "learning_rate": 1.4499060894476078e-05, "loss": 0.6841, "step": 19710 }, { "epoch": 0.35812872294057824, "grad_norm": 0.905193395724537, "learning_rate": 1.4493913337531016e-05, "loss": 0.7039, "step": 19720 }, { "epoch": 0.35831032979805316, "grad_norm": 0.9067323584869641, "learning_rate": 1.448876428805186e-05, "loss": 0.6793, "step": 19730 }, { "epoch": 0.3584919366555281, "grad_norm": 0.8802253734464981, "learning_rate": 1.4483613747748734e-05, "loss": 0.7089, "step": 19740 }, { "epoch": 0.35867354351300307, "grad_norm": 0.9107384993809002, "learning_rate": 1.4478461718332248e-05, "loss": 0.7001, "step": 19750 }, { "epoch": 0.358855150370478, "grad_norm": 0.9161169257650448, "learning_rate": 1.4473308201513517e-05, "loss": 0.6938, "step": 19760 }, { "epoch": 0.3590367572279529, "grad_norm": 0.9094424774344158, "learning_rate": 1.446815319900414e-05, "loss": 0.7055, "step": 19770 }, { "epoch": 0.35921836408542784, "grad_norm": 0.873314011731106, "learning_rate": 1.4462996712516217e-05, "loss": 0.7039, "step": 19780 }, { "epoch": 0.3593999709429028, "grad_norm": 0.8791599326742279, "learning_rate": 1.4457838743762338e-05, "loss": 0.6944, "step": 19790 }, { "epoch": 0.35958157780037775, "grad_norm": 0.8886265193273791, "learning_rate": 1.4452679294455587e-05, "loss": 0.6952, "step": 19800 }, { "epoch": 0.3597631846578527, "grad_norm": 0.8861157706216493, "learning_rate": 1.4447518366309535e-05, "loss": 0.7016, "step": 19810 }, { "epoch": 0.3599447915153276, "grad_norm": 0.8780304503091995, "learning_rate": 1.4442355961038248e-05, "loss": 0.6984, "step": 19820 }, { "epoch": 0.3601263983728026, "grad_norm": 0.9015700864024481, "learning_rate": 1.4437192080356284e-05, "loss": 0.6886, "step": 19830 }, { "epoch": 0.3603080052302775, "grad_norm": 0.9296054798900509, "learning_rate": 1.443202672597869e-05, "loss": 0.6967, "step": 19840 }, { "epoch": 0.36048961208775243, "grad_norm": 0.9523059021507944, "learning_rate": 1.4426859899620998e-05, "loss": 0.6975, "step": 19850 }, { "epoch": 0.36067121894522736, "grad_norm": 0.8710461770655722, "learning_rate": 1.442169160299923e-05, "loss": 0.7018, "step": 19860 }, { "epoch": 0.36085282580270234, "grad_norm": 0.9430492796331147, "learning_rate": 1.4416521837829906e-05, "loss": 0.6893, "step": 19870 }, { "epoch": 0.36103443266017726, "grad_norm": 0.8560678260436688, "learning_rate": 1.441135060583002e-05, "loss": 0.6813, "step": 19880 }, { "epoch": 0.3612160395176522, "grad_norm": 0.8549666155407908, "learning_rate": 1.4406177908717063e-05, "loss": 0.7004, "step": 19890 }, { "epoch": 0.3613976463751271, "grad_norm": 0.8796631205368368, "learning_rate": 1.4401003748209008e-05, "loss": 0.6856, "step": 19900 }, { "epoch": 0.36157925323260204, "grad_norm": 0.9043413295051861, "learning_rate": 1.4395828126024317e-05, "loss": 0.6933, "step": 19910 }, { "epoch": 0.361760860090077, "grad_norm": 0.9073556019427989, "learning_rate": 1.439065104388193e-05, "loss": 0.6948, "step": 19920 }, { "epoch": 0.36194246694755194, "grad_norm": 0.9076223191164585, "learning_rate": 1.4385472503501283e-05, "loss": 0.7122, "step": 19930 }, { "epoch": 0.36212407380502687, "grad_norm": 0.8895600956858569, "learning_rate": 1.4380292506602291e-05, "loss": 0.6926, "step": 19940 }, { "epoch": 0.3623056806625018, "grad_norm": 0.904891134029863, "learning_rate": 1.4375111054905349e-05, "loss": 0.6865, "step": 19950 }, { "epoch": 0.3624872875199768, "grad_norm": 0.8700398168221318, "learning_rate": 1.4369928150131343e-05, "loss": 0.6902, "step": 19960 }, { "epoch": 0.3626688943774517, "grad_norm": 0.8688569850812257, "learning_rate": 1.4364743794001632e-05, "loss": 0.7041, "step": 19970 }, { "epoch": 0.3628505012349266, "grad_norm": 0.8658985055848577, "learning_rate": 1.4359557988238067e-05, "loss": 0.681, "step": 19980 }, { "epoch": 0.36303210809240155, "grad_norm": 0.8885983306367419, "learning_rate": 1.435437073456297e-05, "loss": 0.6815, "step": 19990 }, { "epoch": 0.36321371494987653, "grad_norm": 0.9419721965420482, "learning_rate": 1.4349182034699158e-05, "loss": 0.7113, "step": 20000 }, { "epoch": 0.36339532180735146, "grad_norm": 0.8950339997087701, "learning_rate": 1.4343991890369913e-05, "loss": 0.698, "step": 20010 }, { "epoch": 0.3635769286648264, "grad_norm": 0.8826039544203723, "learning_rate": 1.4338800303299007e-05, "loss": 0.691, "step": 20020 }, { "epoch": 0.3637585355223013, "grad_norm": 0.8804077814102227, "learning_rate": 1.4333607275210688e-05, "loss": 0.6995, "step": 20030 }, { "epoch": 0.3639401423797763, "grad_norm": 0.8927125105743179, "learning_rate": 1.4328412807829683e-05, "loss": 0.6995, "step": 20040 }, { "epoch": 0.3641217492372512, "grad_norm": 0.8772147239308498, "learning_rate": 1.4323216902881195e-05, "loss": 0.7042, "step": 20050 }, { "epoch": 0.36430335609472614, "grad_norm": 0.8895562164605667, "learning_rate": 1.4318019562090904e-05, "loss": 0.6949, "step": 20060 }, { "epoch": 0.36448496295220106, "grad_norm": 0.872723994101361, "learning_rate": 1.4312820787184971e-05, "loss": 0.696, "step": 20070 }, { "epoch": 0.364666569809676, "grad_norm": 0.8980068834620745, "learning_rate": 1.4307620579890031e-05, "loss": 0.6957, "step": 20080 }, { "epoch": 0.36484817666715097, "grad_norm": 0.889753032618202, "learning_rate": 1.4302418941933199e-05, "loss": 0.7046, "step": 20090 }, { "epoch": 0.3650297835246259, "grad_norm": 0.8506183957660441, "learning_rate": 1.4297215875042052e-05, "loss": 0.6902, "step": 20100 }, { "epoch": 0.3652113903821008, "grad_norm": 0.9361467866016022, "learning_rate": 1.4292011380944658e-05, "loss": 0.7139, "step": 20110 }, { "epoch": 0.36539299723957575, "grad_norm": 0.8897878607539397, "learning_rate": 1.428680546136955e-05, "loss": 0.6962, "step": 20120 }, { "epoch": 0.3655746040970507, "grad_norm": 0.8653487305476482, "learning_rate": 1.4281598118045737e-05, "loss": 0.6807, "step": 20130 }, { "epoch": 0.36575621095452565, "grad_norm": 0.9464142413892336, "learning_rate": 1.4276389352702697e-05, "loss": 0.7002, "step": 20140 }, { "epoch": 0.3659378178120006, "grad_norm": 0.8797045295150294, "learning_rate": 1.4271179167070385e-05, "loss": 0.7058, "step": 20150 }, { "epoch": 0.3661194246694755, "grad_norm": 0.8588010291464889, "learning_rate": 1.4265967562879226e-05, "loss": 0.6903, "step": 20160 }, { "epoch": 0.3663010315269505, "grad_norm": 0.9032027464348282, "learning_rate": 1.4260754541860115e-05, "loss": 0.7006, "step": 20170 }, { "epoch": 0.3664826383844254, "grad_norm": 0.9212618865924616, "learning_rate": 1.425554010574442e-05, "loss": 0.6991, "step": 20180 }, { "epoch": 0.36666424524190033, "grad_norm": 0.8770810329066694, "learning_rate": 1.4250324256263976e-05, "loss": 0.7014, "step": 20190 }, { "epoch": 0.36684585209937526, "grad_norm": 0.8954327406692167, "learning_rate": 1.4245106995151091e-05, "loss": 0.71, "step": 20200 }, { "epoch": 0.3670274589568502, "grad_norm": 0.8828958910113751, "learning_rate": 1.423988832413854e-05, "loss": 0.6999, "step": 20210 }, { "epoch": 0.36720906581432516, "grad_norm": 0.9035506489711634, "learning_rate": 1.4234668244959564e-05, "loss": 0.6974, "step": 20220 }, { "epoch": 0.3673906726718001, "grad_norm": 0.9097378086908089, "learning_rate": 1.4229446759347875e-05, "loss": 0.6952, "step": 20230 }, { "epoch": 0.367572279529275, "grad_norm": 0.9010790556598134, "learning_rate": 1.4224223869037652e-05, "loss": 0.7073, "step": 20240 }, { "epoch": 0.36775388638674994, "grad_norm": 0.8572666617646982, "learning_rate": 1.4218999575763537e-05, "loss": 0.6918, "step": 20250 }, { "epoch": 0.3679354932442249, "grad_norm": 0.9361653300485064, "learning_rate": 1.4213773881260641e-05, "loss": 0.6806, "step": 20260 }, { "epoch": 0.36811710010169985, "grad_norm": 0.85736239321795, "learning_rate": 1.4208546787264541e-05, "loss": 0.7129, "step": 20270 }, { "epoch": 0.36829870695917477, "grad_norm": 0.8807453468782929, "learning_rate": 1.4203318295511277e-05, "loss": 0.6853, "step": 20280 }, { "epoch": 0.3684803138166497, "grad_norm": 0.9211752731502358, "learning_rate": 1.4198088407737354e-05, "loss": 0.6887, "step": 20290 }, { "epoch": 0.3686619206741247, "grad_norm": 0.8686000586296269, "learning_rate": 1.419285712567974e-05, "loss": 0.6896, "step": 20300 }, { "epoch": 0.3688435275315996, "grad_norm": 0.8868098593556146, "learning_rate": 1.4187624451075865e-05, "loss": 0.6916, "step": 20310 }, { "epoch": 0.3690251343890745, "grad_norm": 0.9107495754792683, "learning_rate": 1.4182390385663628e-05, "loss": 0.6902, "step": 20320 }, { "epoch": 0.36920674124654945, "grad_norm": 0.909641090743303, "learning_rate": 1.4177154931181379e-05, "loss": 0.7085, "step": 20330 }, { "epoch": 0.36938834810402443, "grad_norm": 0.8804214982732581, "learning_rate": 1.4171918089367942e-05, "loss": 0.7011, "step": 20340 }, { "epoch": 0.36956995496149936, "grad_norm": 0.8872594790653872, "learning_rate": 1.416667986196259e-05, "loss": 0.7059, "step": 20350 }, { "epoch": 0.3697515618189743, "grad_norm": 0.9044335048701582, "learning_rate": 1.4161440250705061e-05, "loss": 0.6846, "step": 20360 }, { "epoch": 0.3699331686764492, "grad_norm": 0.9221234331983846, "learning_rate": 1.4156199257335554e-05, "loss": 0.6941, "step": 20370 }, { "epoch": 0.37011477553392413, "grad_norm": 0.8916063957276956, "learning_rate": 1.4150956883594728e-05, "loss": 0.6882, "step": 20380 }, { "epoch": 0.3702963823913991, "grad_norm": 0.9236113858903912, "learning_rate": 1.4145713131223696e-05, "loss": 0.694, "step": 20390 }, { "epoch": 0.37047798924887404, "grad_norm": 0.8799783199102378, "learning_rate": 1.4140468001964031e-05, "loss": 0.7084, "step": 20400 }, { "epoch": 0.37065959610634897, "grad_norm": 0.8859195361563634, "learning_rate": 1.4135221497557767e-05, "loss": 0.6922, "step": 20410 }, { "epoch": 0.3708412029638239, "grad_norm": 0.8923172032148603, "learning_rate": 1.4129973619747385e-05, "loss": 0.7034, "step": 20420 }, { "epoch": 0.37102280982129887, "grad_norm": 0.9361332896407534, "learning_rate": 1.4124724370275838e-05, "loss": 0.6992, "step": 20430 }, { "epoch": 0.3712044166787738, "grad_norm": 0.912442912946211, "learning_rate": 1.4119473750886514e-05, "loss": 0.6877, "step": 20440 }, { "epoch": 0.3713860235362487, "grad_norm": 0.910761219803232, "learning_rate": 1.4114221763323275e-05, "loss": 0.6984, "step": 20450 }, { "epoch": 0.37156763039372365, "grad_norm": 0.8624873516818544, "learning_rate": 1.4108968409330426e-05, "loss": 0.7115, "step": 20460 }, { "epoch": 0.3717492372511986, "grad_norm": 0.9339147183529165, "learning_rate": 1.4103713690652727e-05, "loss": 0.6931, "step": 20470 }, { "epoch": 0.37193084410867355, "grad_norm": 0.8887698442615414, "learning_rate": 1.4098457609035396e-05, "loss": 0.6876, "step": 20480 }, { "epoch": 0.3721124509661485, "grad_norm": 0.8691224880309936, "learning_rate": 1.4093200166224103e-05, "loss": 0.6951, "step": 20490 }, { "epoch": 0.3722940578236234, "grad_norm": 0.9200415086786425, "learning_rate": 1.4087941363964965e-05, "loss": 0.6853, "step": 20500 }, { "epoch": 0.3724756646810984, "grad_norm": 0.9422453268419506, "learning_rate": 1.4082681204004556e-05, "loss": 0.7106, "step": 20510 }, { "epoch": 0.3726572715385733, "grad_norm": 0.8855728488917739, "learning_rate": 1.4077419688089893e-05, "loss": 0.6844, "step": 20520 }, { "epoch": 0.37283887839604823, "grad_norm": 0.8834966139358932, "learning_rate": 1.4072156817968457e-05, "loss": 0.6951, "step": 20530 }, { "epoch": 0.37302048525352316, "grad_norm": 0.888649089408443, "learning_rate": 1.4066892595388168e-05, "loss": 0.6954, "step": 20540 }, { "epoch": 0.3732020921109981, "grad_norm": 0.894478472316927, "learning_rate": 1.4061627022097395e-05, "loss": 0.6987, "step": 20550 }, { "epoch": 0.37338369896847307, "grad_norm": 0.8838399315894668, "learning_rate": 1.405636009984496e-05, "loss": 0.6891, "step": 20560 }, { "epoch": 0.373565305825948, "grad_norm": 0.9260863402624984, "learning_rate": 1.4051091830380133e-05, "loss": 0.6832, "step": 20570 }, { "epoch": 0.3737469126834229, "grad_norm": 0.86554320039935, "learning_rate": 1.4045822215452627e-05, "loss": 0.6889, "step": 20580 }, { "epoch": 0.37392851954089784, "grad_norm": 0.8596934615979821, "learning_rate": 1.4040551256812607e-05, "loss": 0.6874, "step": 20590 }, { "epoch": 0.3741101263983728, "grad_norm": 0.8859305621321356, "learning_rate": 1.4035278956210682e-05, "loss": 0.7, "step": 20600 }, { "epoch": 0.37429173325584775, "grad_norm": 0.8909186221083121, "learning_rate": 1.4030005315397908e-05, "loss": 0.6971, "step": 20610 }, { "epoch": 0.3744733401133227, "grad_norm": 0.9567839365184977, "learning_rate": 1.402473033612578e-05, "loss": 0.6801, "step": 20620 }, { "epoch": 0.3746549469707976, "grad_norm": 0.9118985470415211, "learning_rate": 1.4019454020146248e-05, "loss": 0.71, "step": 20630 }, { "epoch": 0.3748365538282726, "grad_norm": 0.8879963164166405, "learning_rate": 1.40141763692117e-05, "loss": 0.6965, "step": 20640 }, { "epoch": 0.3750181606857475, "grad_norm": 0.8823108117026277, "learning_rate": 1.4008897385074962e-05, "loss": 0.6998, "step": 20650 }, { "epoch": 0.37519976754322243, "grad_norm": 0.8885637219434824, "learning_rate": 1.4003617069489315e-05, "loss": 0.6939, "step": 20660 }, { "epoch": 0.37538137440069735, "grad_norm": 0.8707765589224813, "learning_rate": 1.3998335424208471e-05, "loss": 0.6988, "step": 20670 }, { "epoch": 0.37556298125817233, "grad_norm": 0.8767481264080195, "learning_rate": 1.3993052450986591e-05, "loss": 0.6908, "step": 20680 }, { "epoch": 0.37574458811564726, "grad_norm": 0.9033736192345707, "learning_rate": 1.3987768151578273e-05, "loss": 0.6973, "step": 20690 }, { "epoch": 0.3759261949731222, "grad_norm": 0.8857781858081981, "learning_rate": 1.398248252773856e-05, "loss": 0.6982, "step": 20700 }, { "epoch": 0.3761078018305971, "grad_norm": 0.9075774484079883, "learning_rate": 1.3977195581222926e-05, "loss": 0.7006, "step": 20710 }, { "epoch": 0.37628940868807204, "grad_norm": 0.8630299734014656, "learning_rate": 1.3971907313787294e-05, "loss": 0.7012, "step": 20720 }, { "epoch": 0.376471015545547, "grad_norm": 0.9036244313341836, "learning_rate": 1.3966617727188023e-05, "loss": 0.6912, "step": 20730 }, { "epoch": 0.37665262240302194, "grad_norm": 0.9134116452919089, "learning_rate": 1.3961326823181908e-05, "loss": 0.6944, "step": 20740 }, { "epoch": 0.37683422926049687, "grad_norm": 0.9022290986808211, "learning_rate": 1.395603460352618e-05, "loss": 0.7022, "step": 20750 }, { "epoch": 0.3770158361179718, "grad_norm": 0.9458411336874668, "learning_rate": 1.395074106997851e-05, "loss": 0.6978, "step": 20760 }, { "epoch": 0.3771974429754468, "grad_norm": 0.8673629604057538, "learning_rate": 1.3945446224297007e-05, "loss": 0.6929, "step": 20770 }, { "epoch": 0.3773790498329217, "grad_norm": 0.8651262414187724, "learning_rate": 1.3940150068240212e-05, "loss": 0.6879, "step": 20780 }, { "epoch": 0.3775606566903966, "grad_norm": 0.9068114854464344, "learning_rate": 1.3934852603567102e-05, "loss": 0.6986, "step": 20790 }, { "epoch": 0.37774226354787155, "grad_norm": 0.9174944107649258, "learning_rate": 1.392955383203709e-05, "loss": 0.6949, "step": 20800 }, { "epoch": 0.37792387040534653, "grad_norm": 0.899828666933848, "learning_rate": 1.3924253755410021e-05, "loss": 0.6965, "step": 20810 }, { "epoch": 0.37810547726282145, "grad_norm": 0.9048283018024944, "learning_rate": 1.391895237544618e-05, "loss": 0.6902, "step": 20820 }, { "epoch": 0.3782870841202964, "grad_norm": 0.8932508116925603, "learning_rate": 1.3913649693906274e-05, "loss": 0.6833, "step": 20830 }, { "epoch": 0.3784686909777713, "grad_norm": 0.8916763116895345, "learning_rate": 1.3908345712551452e-05, "loss": 0.6961, "step": 20840 }, { "epoch": 0.3786502978352463, "grad_norm": 0.8546444252104827, "learning_rate": 1.3903040433143286e-05, "loss": 0.6729, "step": 20850 }, { "epoch": 0.3788319046927212, "grad_norm": 0.8694550299422351, "learning_rate": 1.389773385744379e-05, "loss": 0.6806, "step": 20860 }, { "epoch": 0.37901351155019614, "grad_norm": 1.716392534247026, "learning_rate": 1.3892425987215395e-05, "loss": 0.6979, "step": 20870 }, { "epoch": 0.37919511840767106, "grad_norm": 0.9408796881540091, "learning_rate": 1.3887116824220974e-05, "loss": 0.6765, "step": 20880 }, { "epoch": 0.379376725265146, "grad_norm": 0.8738935737737157, "learning_rate": 1.3881806370223827e-05, "loss": 0.6856, "step": 20890 }, { "epoch": 0.37955833212262097, "grad_norm": 0.8737506722390781, "learning_rate": 1.3876494626987672e-05, "loss": 0.6821, "step": 20900 }, { "epoch": 0.3797399389800959, "grad_norm": 0.9545466814299786, "learning_rate": 1.3871181596276673e-05, "loss": 0.7008, "step": 20910 }, { "epoch": 0.3799215458375708, "grad_norm": 0.9054650390238012, "learning_rate": 1.3865867279855405e-05, "loss": 0.6955, "step": 20920 }, { "epoch": 0.38010315269504574, "grad_norm": 0.9009802400491838, "learning_rate": 1.386055167948888e-05, "loss": 0.6977, "step": 20930 }, { "epoch": 0.3802847595525207, "grad_norm": 0.8884708137622271, "learning_rate": 1.3855234796942538e-05, "loss": 0.6813, "step": 20940 }, { "epoch": 0.38046636640999565, "grad_norm": 0.8470074009280415, "learning_rate": 1.3849916633982234e-05, "loss": 0.6885, "step": 20950 }, { "epoch": 0.3806479732674706, "grad_norm": 0.895461076597338, "learning_rate": 1.3844597192374258e-05, "loss": 0.6925, "step": 20960 }, { "epoch": 0.3808295801249455, "grad_norm": 0.8592588735080536, "learning_rate": 1.3839276473885319e-05, "loss": 0.6875, "step": 20970 }, { "epoch": 0.3810111869824205, "grad_norm": 0.8846892898760228, "learning_rate": 1.3833954480282554e-05, "loss": 0.6875, "step": 20980 }, { "epoch": 0.3811927938398954, "grad_norm": 0.8724698326159707, "learning_rate": 1.3828631213333523e-05, "loss": 0.6847, "step": 20990 }, { "epoch": 0.38137440069737033, "grad_norm": 0.8981663116753648, "learning_rate": 1.3823306674806207e-05, "loss": 0.6879, "step": 21000 }, { "epoch": 0.38155600755484526, "grad_norm": 0.9150089792844045, "learning_rate": 1.381798086646901e-05, "loss": 0.6726, "step": 21010 }, { "epoch": 0.38173761441232024, "grad_norm": 0.8752981474738286, "learning_rate": 1.3812653790090758e-05, "loss": 0.6954, "step": 21020 }, { "epoch": 0.38191922126979516, "grad_norm": 0.8946598781498593, "learning_rate": 1.3807325447440696e-05, "loss": 0.6856, "step": 21030 }, { "epoch": 0.3821008281272701, "grad_norm": 0.8613540963850705, "learning_rate": 1.3801995840288497e-05, "loss": 0.6973, "step": 21040 }, { "epoch": 0.382282434984745, "grad_norm": 0.9100408272839522, "learning_rate": 1.3796664970404242e-05, "loss": 0.6834, "step": 21050 }, { "epoch": 0.38246404184221994, "grad_norm": 0.8819447480041314, "learning_rate": 1.3791332839558446e-05, "loss": 0.6998, "step": 21060 }, { "epoch": 0.3826456486996949, "grad_norm": 0.8811719870697275, "learning_rate": 1.3785999449522027e-05, "loss": 0.6901, "step": 21070 }, { "epoch": 0.38282725555716984, "grad_norm": 0.8891064421232072, "learning_rate": 1.3780664802066333e-05, "loss": 0.6842, "step": 21080 }, { "epoch": 0.38300886241464477, "grad_norm": 0.8594500282050255, "learning_rate": 1.3775328898963123e-05, "loss": 0.6903, "step": 21090 }, { "epoch": 0.3831904692721197, "grad_norm": 0.8988124112764612, "learning_rate": 1.376999174198458e-05, "loss": 0.6847, "step": 21100 }, { "epoch": 0.3833720761295947, "grad_norm": 0.8502411425194443, "learning_rate": 1.3764653332903295e-05, "loss": 0.6779, "step": 21110 }, { "epoch": 0.3835536829870696, "grad_norm": 0.9336322267078436, "learning_rate": 1.375931367349228e-05, "loss": 0.6872, "step": 21120 }, { "epoch": 0.3837352898445445, "grad_norm": 0.8594344879735107, "learning_rate": 1.3753972765524962e-05, "loss": 0.6837, "step": 21130 }, { "epoch": 0.38391689670201945, "grad_norm": 0.8996979145625025, "learning_rate": 1.3748630610775182e-05, "loss": 0.6892, "step": 21140 }, { "epoch": 0.38409850355949443, "grad_norm": 0.8499434654307345, "learning_rate": 1.3743287211017197e-05, "loss": 0.6956, "step": 21150 }, { "epoch": 0.38428011041696936, "grad_norm": 0.9666589040008318, "learning_rate": 1.3737942568025672e-05, "loss": 0.6896, "step": 21160 }, { "epoch": 0.3844617172744443, "grad_norm": 0.8718453774338969, "learning_rate": 1.3732596683575689e-05, "loss": 0.6948, "step": 21170 }, { "epoch": 0.3846433241319192, "grad_norm": 0.8809763027892289, "learning_rate": 1.3727249559442741e-05, "loss": 0.6866, "step": 21180 }, { "epoch": 0.3848249309893942, "grad_norm": 0.8580767158414827, "learning_rate": 1.3721901197402735e-05, "loss": 0.681, "step": 21190 }, { "epoch": 0.3850065378468691, "grad_norm": 0.8620524823891071, "learning_rate": 1.371655159923199e-05, "loss": 0.6926, "step": 21200 }, { "epoch": 0.38518814470434404, "grad_norm": 0.9027752573518459, "learning_rate": 1.3711200766707227e-05, "loss": 0.6937, "step": 21210 }, { "epoch": 0.38536975156181896, "grad_norm": 0.9067479682298697, "learning_rate": 1.3705848701605586e-05, "loss": 0.6924, "step": 21220 }, { "epoch": 0.3855513584192939, "grad_norm": 0.9015729054109706, "learning_rate": 1.3700495405704614e-05, "loss": 0.6766, "step": 21230 }, { "epoch": 0.38573296527676887, "grad_norm": 0.8899402325662111, "learning_rate": 1.3695140880782267e-05, "loss": 0.6794, "step": 21240 }, { "epoch": 0.3859145721342438, "grad_norm": 0.8647918668197021, "learning_rate": 1.3689785128616911e-05, "loss": 0.6964, "step": 21250 }, { "epoch": 0.3860961789917187, "grad_norm": 0.8663362733825896, "learning_rate": 1.3684428150987308e-05, "loss": 0.6897, "step": 21260 }, { "epoch": 0.38627778584919364, "grad_norm": 0.8962415208550191, "learning_rate": 1.3679069949672643e-05, "loss": 0.6924, "step": 21270 }, { "epoch": 0.3864593927066686, "grad_norm": 0.8814373043788243, "learning_rate": 1.36737105264525e-05, "loss": 0.687, "step": 21280 }, { "epoch": 0.38664099956414355, "grad_norm": 0.8407230551409498, "learning_rate": 1.3668349883106866e-05, "loss": 0.695, "step": 21290 }, { "epoch": 0.3868226064216185, "grad_norm": 0.8653917797108309, "learning_rate": 1.3662988021416139e-05, "loss": 0.6917, "step": 21300 }, { "epoch": 0.3870042132790934, "grad_norm": 0.9296608431507187, "learning_rate": 1.3657624943161119e-05, "loss": 0.6972, "step": 21310 }, { "epoch": 0.3871858201365684, "grad_norm": 0.8965078819287042, "learning_rate": 1.3652260650123009e-05, "loss": 0.6899, "step": 21320 }, { "epoch": 0.3873674269940433, "grad_norm": 0.8395071179641687, "learning_rate": 1.3646895144083416e-05, "loss": 0.6784, "step": 21330 }, { "epoch": 0.38754903385151823, "grad_norm": 0.8675932765808668, "learning_rate": 1.3641528426824354e-05, "loss": 0.6993, "step": 21340 }, { "epoch": 0.38773064070899316, "grad_norm": 0.882958105550213, "learning_rate": 1.3636160500128234e-05, "loss": 0.6875, "step": 21350 }, { "epoch": 0.3879122475664681, "grad_norm": 0.8840696521022369, "learning_rate": 1.3630791365777872e-05, "loss": 0.6939, "step": 21360 }, { "epoch": 0.38809385442394306, "grad_norm": 0.9032404602471805, "learning_rate": 1.3625421025556477e-05, "loss": 0.6919, "step": 21370 }, { "epoch": 0.388275461281418, "grad_norm": 0.9084034652384319, "learning_rate": 1.3620049481247672e-05, "loss": 0.6886, "step": 21380 }, { "epoch": 0.3884570681388929, "grad_norm": 1.0221427527113314, "learning_rate": 1.361467673463547e-05, "loss": 0.6912, "step": 21390 }, { "epoch": 0.38863867499636784, "grad_norm": 0.9377246072833064, "learning_rate": 1.3609302787504289e-05, "loss": 0.6983, "step": 21400 }, { "epoch": 0.3888202818538428, "grad_norm": 0.8896918465617792, "learning_rate": 1.3603927641638939e-05, "loss": 0.7087, "step": 21410 }, { "epoch": 0.38900188871131774, "grad_norm": 0.873123307631195, "learning_rate": 1.3598551298824631e-05, "loss": 0.6903, "step": 21420 }, { "epoch": 0.38918349556879267, "grad_norm": 0.9312041671286989, "learning_rate": 1.3593173760846982e-05, "loss": 0.6965, "step": 21430 }, { "epoch": 0.3893651024262676, "grad_norm": 0.8799452733722161, "learning_rate": 1.3587795029491992e-05, "loss": 0.6926, "step": 21440 }, { "epoch": 0.3895467092837426, "grad_norm": 0.8863656167543523, "learning_rate": 1.358241510654607e-05, "loss": 0.6796, "step": 21450 }, { "epoch": 0.3897283161412175, "grad_norm": 0.8595968041425098, "learning_rate": 1.3577033993796006e-05, "loss": 0.6808, "step": 21460 }, { "epoch": 0.3899099229986924, "grad_norm": 0.86004702958011, "learning_rate": 1.3571651693029e-05, "loss": 0.6963, "step": 21470 }, { "epoch": 0.39009152985616735, "grad_norm": 0.8812970481417386, "learning_rate": 1.356626820603264e-05, "loss": 0.6816, "step": 21480 }, { "epoch": 0.39027313671364233, "grad_norm": 0.8952054100150756, "learning_rate": 1.3560883534594905e-05, "loss": 0.6874, "step": 21490 }, { "epoch": 0.39045474357111726, "grad_norm": 0.8417228040927086, "learning_rate": 1.3555497680504175e-05, "loss": 0.6781, "step": 21500 }, { "epoch": 0.3906363504285922, "grad_norm": 0.8717207272885588, "learning_rate": 1.3550110645549215e-05, "loss": 0.6715, "step": 21510 }, { "epoch": 0.3908179572860671, "grad_norm": 0.8895344570150528, "learning_rate": 1.3544722431519186e-05, "loss": 0.6855, "step": 21520 }, { "epoch": 0.39099956414354203, "grad_norm": 0.8572000857868555, "learning_rate": 1.3539333040203644e-05, "loss": 0.6967, "step": 21530 }, { "epoch": 0.391181171001017, "grad_norm": 0.8891758023553354, "learning_rate": 1.3533942473392529e-05, "loss": 0.6749, "step": 21540 }, { "epoch": 0.39136277785849194, "grad_norm": 0.8560285243394995, "learning_rate": 1.3528550732876178e-05, "loss": 0.6856, "step": 21550 }, { "epoch": 0.39154438471596686, "grad_norm": 0.8543022056475391, "learning_rate": 1.3523157820445312e-05, "loss": 0.6772, "step": 21560 }, { "epoch": 0.3917259915734418, "grad_norm": 0.8491311523424591, "learning_rate": 1.3517763737891043e-05, "loss": 0.6886, "step": 21570 }, { "epoch": 0.39190759843091677, "grad_norm": 0.8759354606797278, "learning_rate": 1.3512368487004875e-05, "loss": 0.6868, "step": 21580 }, { "epoch": 0.3920892052883917, "grad_norm": 0.9003824027817343, "learning_rate": 1.3506972069578694e-05, "loss": 0.6752, "step": 21590 }, { "epoch": 0.3922708121458666, "grad_norm": 0.8704139501120358, "learning_rate": 1.3501574487404781e-05, "loss": 0.6684, "step": 21600 }, { "epoch": 0.39245241900334155, "grad_norm": 0.8541748155097324, "learning_rate": 1.3496175742275796e-05, "loss": 0.6726, "step": 21610 }, { "epoch": 0.3926340258608165, "grad_norm": 0.9081709006245529, "learning_rate": 1.3490775835984793e-05, "loss": 0.6802, "step": 21620 }, { "epoch": 0.39281563271829145, "grad_norm": 0.891529234537397, "learning_rate": 1.3485374770325202e-05, "loss": 0.701, "step": 21630 }, { "epoch": 0.3929972395757664, "grad_norm": 0.840655684656273, "learning_rate": 1.3479972547090849e-05, "loss": 0.6712, "step": 21640 }, { "epoch": 0.3931788464332413, "grad_norm": 0.8385444612941525, "learning_rate": 1.347456916807594e-05, "loss": 0.677, "step": 21650 }, { "epoch": 0.3933604532907163, "grad_norm": 0.870871809647154, "learning_rate": 1.3469164635075056e-05, "loss": 0.6855, "step": 21660 }, { "epoch": 0.3935420601481912, "grad_norm": 0.8868414299238548, "learning_rate": 1.3463758949883177e-05, "loss": 0.6764, "step": 21670 }, { "epoch": 0.39372366700566613, "grad_norm": 0.8774854610893774, "learning_rate": 1.3458352114295655e-05, "loss": 0.6753, "step": 21680 }, { "epoch": 0.39390527386314106, "grad_norm": 0.898103709694794, "learning_rate": 1.3452944130108225e-05, "loss": 0.6797, "step": 21690 }, { "epoch": 0.394086880720616, "grad_norm": 0.8636862389432102, "learning_rate": 1.3447534999117009e-05, "loss": 0.6963, "step": 21700 }, { "epoch": 0.39426848757809096, "grad_norm": 0.860409071432174, "learning_rate": 1.3442124723118504e-05, "loss": 0.6709, "step": 21710 }, { "epoch": 0.3944500944355659, "grad_norm": 0.8988962590756923, "learning_rate": 1.3436713303909592e-05, "loss": 0.6905, "step": 21720 }, { "epoch": 0.3946317012930408, "grad_norm": 0.8550790013231454, "learning_rate": 1.343130074328753e-05, "loss": 0.6842, "step": 21730 }, { "epoch": 0.39481330815051574, "grad_norm": 0.8700654928906815, "learning_rate": 1.3425887043049959e-05, "loss": 0.6938, "step": 21740 }, { "epoch": 0.3949949150079907, "grad_norm": 0.907061735721477, "learning_rate": 1.3420472204994894e-05, "loss": 0.668, "step": 21750 }, { "epoch": 0.39517652186546565, "grad_norm": 0.918346241396769, "learning_rate": 1.341505623092073e-05, "loss": 0.6883, "step": 21760 }, { "epoch": 0.39535812872294057, "grad_norm": 0.8821557683742209, "learning_rate": 1.3409639122626238e-05, "loss": 0.6826, "step": 21770 }, { "epoch": 0.3955397355804155, "grad_norm": 1.0224567137285878, "learning_rate": 1.340422088191057e-05, "loss": 0.6802, "step": 21780 }, { "epoch": 0.3957213424378905, "grad_norm": 0.8704976936240038, "learning_rate": 1.339880151057325e-05, "loss": 0.6945, "step": 21790 }, { "epoch": 0.3959029492953654, "grad_norm": 0.8870718385539103, "learning_rate": 1.3393381010414174e-05, "loss": 0.6976, "step": 21800 }, { "epoch": 0.39608455615284033, "grad_norm": 0.8642054291359935, "learning_rate": 1.3387959383233622e-05, "loss": 0.6748, "step": 21810 }, { "epoch": 0.39626616301031525, "grad_norm": 0.8696918019820019, "learning_rate": 1.3382536630832244e-05, "loss": 0.6897, "step": 21820 }, { "epoch": 0.39644776986779023, "grad_norm": 0.8770648764916628, "learning_rate": 1.337711275501106e-05, "loss": 0.6919, "step": 21830 }, { "epoch": 0.39662937672526516, "grad_norm": 0.8826156181581264, "learning_rate": 1.3371687757571467e-05, "loss": 0.7009, "step": 21840 }, { "epoch": 0.3968109835827401, "grad_norm": 0.8854291860802537, "learning_rate": 1.3366261640315239e-05, "loss": 0.6851, "step": 21850 }, { "epoch": 0.396992590440215, "grad_norm": 0.8789767820024901, "learning_rate": 1.3360834405044512e-05, "loss": 0.6792, "step": 21860 }, { "epoch": 0.39717419729768993, "grad_norm": 0.8964315412799748, "learning_rate": 1.3355406053561797e-05, "loss": 0.6788, "step": 21870 }, { "epoch": 0.3973558041551649, "grad_norm": 0.8985723596202548, "learning_rate": 1.334997658766998e-05, "loss": 0.6938, "step": 21880 }, { "epoch": 0.39753741101263984, "grad_norm": 0.9152850713213867, "learning_rate": 1.3344546009172313e-05, "loss": 0.6837, "step": 21890 }, { "epoch": 0.39771901787011477, "grad_norm": 0.8588099882813556, "learning_rate": 1.333911431987242e-05, "loss": 0.6837, "step": 21900 }, { "epoch": 0.3979006247275897, "grad_norm": 0.8683843174939682, "learning_rate": 1.3333681521574288e-05, "loss": 0.6827, "step": 21910 }, { "epoch": 0.39808223158506467, "grad_norm": 0.8885892933793871, "learning_rate": 1.3328247616082281e-05, "loss": 0.6768, "step": 21920 }, { "epoch": 0.3982638384425396, "grad_norm": 0.8604945244733888, "learning_rate": 1.3322812605201128e-05, "loss": 0.6706, "step": 21930 }, { "epoch": 0.3984454453000145, "grad_norm": 0.8355790987582502, "learning_rate": 1.331737649073592e-05, "loss": 0.6806, "step": 21940 }, { "epoch": 0.39862705215748945, "grad_norm": 0.858607374165582, "learning_rate": 1.3311939274492123e-05, "loss": 0.6697, "step": 21950 }, { "epoch": 0.39880865901496443, "grad_norm": 0.8478400419621348, "learning_rate": 1.3306500958275557e-05, "loss": 0.6797, "step": 21960 }, { "epoch": 0.39899026587243935, "grad_norm": 0.8967871823698471, "learning_rate": 1.3301061543892419e-05, "loss": 0.6772, "step": 21970 }, { "epoch": 0.3991718727299143, "grad_norm": 0.8923525434746729, "learning_rate": 1.3295621033149266e-05, "loss": 0.6853, "step": 21980 }, { "epoch": 0.3993534795873892, "grad_norm": 0.8366048502284069, "learning_rate": 1.3290179427853016e-05, "loss": 0.6671, "step": 21990 }, { "epoch": 0.3995350864448642, "grad_norm": 0.8711429444431418, "learning_rate": 1.3284736729810958e-05, "loss": 0.6962, "step": 22000 }, { "epoch": 0.3997166933023391, "grad_norm": 0.8826066262132445, "learning_rate": 1.327929294083074e-05, "loss": 0.6944, "step": 22010 }, { "epoch": 0.39989830015981404, "grad_norm": 0.8714590404420856, "learning_rate": 1.3273848062720367e-05, "loss": 0.6964, "step": 22020 }, { "epoch": 0.40007990701728896, "grad_norm": 0.888728123697888, "learning_rate": 1.3268402097288218e-05, "loss": 0.6946, "step": 22030 }, { "epoch": 0.4002615138747639, "grad_norm": 0.8660612392294491, "learning_rate": 1.3262955046343021e-05, "loss": 0.6812, "step": 22040 }, { "epoch": 0.40044312073223887, "grad_norm": 0.8470641820820065, "learning_rate": 1.325750691169387e-05, "loss": 0.681, "step": 22050 }, { "epoch": 0.4006247275897138, "grad_norm": 0.8615349946305957, "learning_rate": 1.3252057695150218e-05, "loss": 0.6884, "step": 22060 }, { "epoch": 0.4008063344471887, "grad_norm": 0.8907519835006787, "learning_rate": 1.3246607398521878e-05, "loss": 0.6926, "step": 22070 }, { "epoch": 0.40098794130466364, "grad_norm": 0.9069328357352396, "learning_rate": 1.3241156023619023e-05, "loss": 0.6915, "step": 22080 }, { "epoch": 0.4011695481621386, "grad_norm": 0.8734811435854936, "learning_rate": 1.3235703572252184e-05, "loss": 0.6757, "step": 22090 }, { "epoch": 0.40135115501961355, "grad_norm": 0.8750242139543394, "learning_rate": 1.3230250046232243e-05, "loss": 0.6874, "step": 22100 }, { "epoch": 0.4015327618770885, "grad_norm": 0.8804561791656565, "learning_rate": 1.3224795447370445e-05, "loss": 0.6724, "step": 22110 }, { "epoch": 0.4017143687345634, "grad_norm": 0.831227831841514, "learning_rate": 1.3219339777478392e-05, "loss": 0.685, "step": 22120 }, { "epoch": 0.4018959755920384, "grad_norm": 0.868565476695902, "learning_rate": 1.3213883038368042e-05, "loss": 0.6816, "step": 22130 }, { "epoch": 0.4020775824495133, "grad_norm": 0.873881162185792, "learning_rate": 1.3208425231851705e-05, "loss": 0.6915, "step": 22140 }, { "epoch": 0.40225918930698823, "grad_norm": 0.8745121354608111, "learning_rate": 1.3202966359742046e-05, "loss": 0.686, "step": 22150 }, { "epoch": 0.40244079616446315, "grad_norm": 0.8895860555212215, "learning_rate": 1.3197506423852085e-05, "loss": 0.6775, "step": 22160 }, { "epoch": 0.40262240302193814, "grad_norm": 0.8669259504025708, "learning_rate": 1.319204542599519e-05, "loss": 0.6957, "step": 22170 }, { "epoch": 0.40280400987941306, "grad_norm": 0.924203150266211, "learning_rate": 1.3186583367985094e-05, "loss": 0.7068, "step": 22180 }, { "epoch": 0.402985616736888, "grad_norm": 0.8719449841487883, "learning_rate": 1.318112025163587e-05, "loss": 0.6886, "step": 22190 }, { "epoch": 0.4031672235943629, "grad_norm": 0.9235265057531709, "learning_rate": 1.317565607876195e-05, "loss": 0.6896, "step": 22200 }, { "epoch": 0.40334883045183784, "grad_norm": 0.8747926050880546, "learning_rate": 1.317019085117811e-05, "loss": 0.7053, "step": 22210 }, { "epoch": 0.4035304373093128, "grad_norm": 0.8667429236674613, "learning_rate": 1.3164724570699485e-05, "loss": 0.6714, "step": 22220 }, { "epoch": 0.40371204416678774, "grad_norm": 0.8414258539038306, "learning_rate": 1.3159257239141552e-05, "loss": 0.6761, "step": 22230 }, { "epoch": 0.40389365102426267, "grad_norm": 0.8831217640329011, "learning_rate": 1.3153788858320142e-05, "loss": 0.679, "step": 22240 }, { "epoch": 0.4040752578817376, "grad_norm": 0.8962578513575354, "learning_rate": 1.3148319430051432e-05, "loss": 0.6851, "step": 22250 }, { "epoch": 0.4042568647392126, "grad_norm": 0.895207380961699, "learning_rate": 1.3142848956151948e-05, "loss": 0.6772, "step": 22260 }, { "epoch": 0.4044384715966875, "grad_norm": 0.8561154533042808, "learning_rate": 1.313737743843856e-05, "loss": 0.6852, "step": 22270 }, { "epoch": 0.4046200784541624, "grad_norm": 0.8820608682459975, "learning_rate": 1.313190487872849e-05, "loss": 0.6824, "step": 22280 }, { "epoch": 0.40480168531163735, "grad_norm": 0.9500525014592583, "learning_rate": 1.3126431278839302e-05, "loss": 0.6818, "step": 22290 }, { "epoch": 0.40498329216911233, "grad_norm": 0.8591544879839935, "learning_rate": 1.3120956640588909e-05, "loss": 0.6859, "step": 22300 }, { "epoch": 0.40516489902658726, "grad_norm": 0.8804960587063317, "learning_rate": 1.3115480965795564e-05, "loss": 0.6877, "step": 22310 }, { "epoch": 0.4053465058840622, "grad_norm": 0.864983214377168, "learning_rate": 1.3110004256277871e-05, "loss": 0.6678, "step": 22320 }, { "epoch": 0.4055281127415371, "grad_norm": 0.8513045408547216, "learning_rate": 1.310452651385477e-05, "loss": 0.6852, "step": 22330 }, { "epoch": 0.4057097195990121, "grad_norm": 0.8713544627564072, "learning_rate": 1.3099047740345552e-05, "loss": 0.6778, "step": 22340 }, { "epoch": 0.405891326456487, "grad_norm": 0.8771035207154798, "learning_rate": 1.3093567937569845e-05, "loss": 0.6822, "step": 22350 }, { "epoch": 0.40607293331396194, "grad_norm": 0.8578104871405964, "learning_rate": 1.3088087107347616e-05, "loss": 0.6841, "step": 22360 }, { "epoch": 0.40625454017143686, "grad_norm": 0.8743368222839246, "learning_rate": 1.308260525149918e-05, "loss": 0.6794, "step": 22370 }, { "epoch": 0.4064361470289118, "grad_norm": 0.8993015208242309, "learning_rate": 1.3077122371845193e-05, "loss": 0.6912, "step": 22380 }, { "epoch": 0.40661775388638677, "grad_norm": 0.8518500492035496, "learning_rate": 1.3071638470206642e-05, "loss": 0.6833, "step": 22390 }, { "epoch": 0.4067993607438617, "grad_norm": 0.8678605921015824, "learning_rate": 1.3066153548404863e-05, "loss": 0.6859, "step": 22400 }, { "epoch": 0.4069809676013366, "grad_norm": 0.8636791986228648, "learning_rate": 1.3060667608261529e-05, "loss": 0.6853, "step": 22410 }, { "epoch": 0.40716257445881154, "grad_norm": 0.8870213278004005, "learning_rate": 1.3055180651598647e-05, "loss": 0.6839, "step": 22420 }, { "epoch": 0.4073441813162865, "grad_norm": 0.8489232544936782, "learning_rate": 1.3049692680238563e-05, "loss": 0.6904, "step": 22430 }, { "epoch": 0.40752578817376145, "grad_norm": 0.8884888160658975, "learning_rate": 1.3044203696003965e-05, "loss": 0.6779, "step": 22440 }, { "epoch": 0.4077073950312364, "grad_norm": 0.8591274544577754, "learning_rate": 1.3038713700717872e-05, "loss": 0.6779, "step": 22450 }, { "epoch": 0.4078890018887113, "grad_norm": 0.8929204716938948, "learning_rate": 1.3033222696203636e-05, "loss": 0.6686, "step": 22460 }, { "epoch": 0.4080706087461863, "grad_norm": 0.9052120399356932, "learning_rate": 1.3027730684284954e-05, "loss": 0.6944, "step": 22470 }, { "epoch": 0.4082522156036612, "grad_norm": 0.8537984602744831, "learning_rate": 1.3022237666785849e-05, "loss": 0.6745, "step": 22480 }, { "epoch": 0.40843382246113613, "grad_norm": 0.9056038184973874, "learning_rate": 1.301674364553068e-05, "loss": 0.6818, "step": 22490 }, { "epoch": 0.40861542931861106, "grad_norm": 0.8754610011228721, "learning_rate": 1.3011248622344146e-05, "loss": 0.6871, "step": 22500 }, { "epoch": 0.408797036176086, "grad_norm": 0.9046496946813933, "learning_rate": 1.3005752599051267e-05, "loss": 0.6921, "step": 22510 }, { "epoch": 0.40897864303356096, "grad_norm": 0.875849050878878, "learning_rate": 1.3000255577477404e-05, "loss": 0.6855, "step": 22520 }, { "epoch": 0.4091602498910359, "grad_norm": 0.8842574305672367, "learning_rate": 1.2994757559448249e-05, "loss": 0.6788, "step": 22530 }, { "epoch": 0.4093418567485108, "grad_norm": 0.8525885278055123, "learning_rate": 1.2989258546789818e-05, "loss": 0.6837, "step": 22540 }, { "epoch": 0.40952346360598574, "grad_norm": 0.8886929379905436, "learning_rate": 1.2983758541328468e-05, "loss": 0.6872, "step": 22550 }, { "epoch": 0.4097050704634607, "grad_norm": 0.9235424468543451, "learning_rate": 1.2978257544890875e-05, "loss": 0.687, "step": 22560 }, { "epoch": 0.40988667732093564, "grad_norm": 0.9225548302641426, "learning_rate": 1.2972755559304048e-05, "loss": 0.6841, "step": 22570 }, { "epoch": 0.41006828417841057, "grad_norm": 0.8741686968193388, "learning_rate": 1.2967252586395333e-05, "loss": 0.6871, "step": 22580 }, { "epoch": 0.4102498910358855, "grad_norm": 0.9423865678611831, "learning_rate": 1.296174862799239e-05, "loss": 0.6895, "step": 22590 }, { "epoch": 0.4104314978933605, "grad_norm": 0.8745732028575944, "learning_rate": 1.2956243685923214e-05, "loss": 0.6949, "step": 22600 }, { "epoch": 0.4106131047508354, "grad_norm": 0.8889998889227935, "learning_rate": 1.2950737762016124e-05, "loss": 0.6774, "step": 22610 }, { "epoch": 0.4107947116083103, "grad_norm": 0.8926722217867543, "learning_rate": 1.2945230858099771e-05, "loss": 0.6811, "step": 22620 }, { "epoch": 0.41097631846578525, "grad_norm": 0.8802782760264347, "learning_rate": 1.2939722976003123e-05, "loss": 0.6743, "step": 22630 }, { "epoch": 0.41115792532326023, "grad_norm": 0.8674021560858667, "learning_rate": 1.293421411755548e-05, "loss": 0.6793, "step": 22640 }, { "epoch": 0.41133953218073516, "grad_norm": 0.9141377636480799, "learning_rate": 1.2928704284586461e-05, "loss": 0.6961, "step": 22650 }, { "epoch": 0.4115211390382101, "grad_norm": 0.885130988672142, "learning_rate": 1.2923193478926012e-05, "loss": 0.6707, "step": 22660 }, { "epoch": 0.411702745895685, "grad_norm": 0.8686498668492546, "learning_rate": 1.29176817024044e-05, "loss": 0.6824, "step": 22670 }, { "epoch": 0.41188435275315993, "grad_norm": 0.9107598534363941, "learning_rate": 1.2912168956852216e-05, "loss": 0.7052, "step": 22680 }, { "epoch": 0.4120659596106349, "grad_norm": 0.8699598594880356, "learning_rate": 1.2906655244100372e-05, "loss": 0.6815, "step": 22690 }, { "epoch": 0.41224756646810984, "grad_norm": 0.861787086794451, "learning_rate": 1.2901140565980099e-05, "loss": 0.6766, "step": 22700 }, { "epoch": 0.41242917332558476, "grad_norm": 0.8677050534300719, "learning_rate": 1.2895624924322953e-05, "loss": 0.6881, "step": 22710 }, { "epoch": 0.4126107801830597, "grad_norm": 0.8951783113458742, "learning_rate": 1.2890108320960812e-05, "loss": 0.6873, "step": 22720 }, { "epoch": 0.41279238704053467, "grad_norm": 0.8973096055064753, "learning_rate": 1.2884590757725864e-05, "loss": 0.6804, "step": 22730 }, { "epoch": 0.4129739938980096, "grad_norm": 0.8973825040756296, "learning_rate": 1.2879072236450623e-05, "loss": 0.6903, "step": 22740 }, { "epoch": 0.4131556007554845, "grad_norm": 0.8592635840739656, "learning_rate": 1.287355275896792e-05, "loss": 0.6887, "step": 22750 }, { "epoch": 0.41333720761295945, "grad_norm": 0.868684739063534, "learning_rate": 1.2868032327110904e-05, "loss": 0.6821, "step": 22760 }, { "epoch": 0.4135188144704344, "grad_norm": 0.8508104532991784, "learning_rate": 1.2862510942713038e-05, "loss": 0.6775, "step": 22770 }, { "epoch": 0.41370042132790935, "grad_norm": 0.8630823132507881, "learning_rate": 1.2856988607608103e-05, "loss": 0.6811, "step": 22780 }, { "epoch": 0.4138820281853843, "grad_norm": 0.8654520520110035, "learning_rate": 1.2851465323630196e-05, "loss": 0.6764, "step": 22790 }, { "epoch": 0.4140636350428592, "grad_norm": 0.8681315637207401, "learning_rate": 1.2845941092613731e-05, "loss": 0.6987, "step": 22800 }, { "epoch": 0.4142452419003342, "grad_norm": 0.8677396658042009, "learning_rate": 1.2840415916393434e-05, "loss": 0.6702, "step": 22810 }, { "epoch": 0.4144268487578091, "grad_norm": 0.9200654592796644, "learning_rate": 1.2834889796804345e-05, "loss": 0.6779, "step": 22820 }, { "epoch": 0.41460845561528403, "grad_norm": 0.919958064768274, "learning_rate": 1.282936273568182e-05, "loss": 0.6945, "step": 22830 }, { "epoch": 0.41479006247275896, "grad_norm": 0.8187109625711552, "learning_rate": 1.2823834734861525e-05, "loss": 0.6814, "step": 22840 }, { "epoch": 0.4149716693302339, "grad_norm": 0.8986341282000442, "learning_rate": 1.2818305796179438e-05, "loss": 0.6931, "step": 22850 }, { "epoch": 0.41515327618770886, "grad_norm": 0.8659863739332383, "learning_rate": 1.281277592147185e-05, "loss": 0.6596, "step": 22860 }, { "epoch": 0.4153348830451838, "grad_norm": 0.8809974797163049, "learning_rate": 1.2807245112575359e-05, "loss": 0.6727, "step": 22870 }, { "epoch": 0.4155164899026587, "grad_norm": 0.8826342766640277, "learning_rate": 1.2801713371326883e-05, "loss": 0.6732, "step": 22880 }, { "epoch": 0.41569809676013364, "grad_norm": 0.8539578168966234, "learning_rate": 1.2796180699563637e-05, "loss": 0.673, "step": 22890 }, { "epoch": 0.4158797036176086, "grad_norm": 0.8397978453209439, "learning_rate": 1.2790647099123153e-05, "loss": 0.6866, "step": 22900 }, { "epoch": 0.41606131047508355, "grad_norm": 0.8660733413730304, "learning_rate": 1.2785112571843268e-05, "loss": 0.6843, "step": 22910 }, { "epoch": 0.41624291733255847, "grad_norm": 0.900424644755407, "learning_rate": 1.2779577119562132e-05, "loss": 0.6853, "step": 22920 }, { "epoch": 0.4164245241900334, "grad_norm": 0.991707045997552, "learning_rate": 1.2774040744118196e-05, "loss": 0.6963, "step": 22930 }, { "epoch": 0.4166061310475084, "grad_norm": 0.8834397585205057, "learning_rate": 1.2768503447350224e-05, "loss": 0.6862, "step": 22940 }, { "epoch": 0.4167877379049833, "grad_norm": 0.8968429556262943, "learning_rate": 1.276296523109728e-05, "loss": 0.6734, "step": 22950 }, { "epoch": 0.4169693447624582, "grad_norm": 0.9003458183737856, "learning_rate": 1.2757426097198735e-05, "loss": 0.6751, "step": 22960 }, { "epoch": 0.41715095161993315, "grad_norm": 0.8382065093858797, "learning_rate": 1.2751886047494266e-05, "loss": 0.7017, "step": 22970 }, { "epoch": 0.41733255847740813, "grad_norm": 0.8839516529702944, "learning_rate": 1.274634508382385e-05, "loss": 0.6965, "step": 22980 }, { "epoch": 0.41751416533488306, "grad_norm": 0.8964860845984767, "learning_rate": 1.2740803208027775e-05, "loss": 0.6754, "step": 22990 }, { "epoch": 0.417695772192358, "grad_norm": 0.8479704448574812, "learning_rate": 1.2735260421946627e-05, "loss": 0.6842, "step": 23000 }, { "epoch": 0.4178773790498329, "grad_norm": 0.8514583879961084, "learning_rate": 1.2729716727421297e-05, "loss": 0.6733, "step": 23010 }, { "epoch": 0.41805898590730783, "grad_norm": 0.8822489753441267, "learning_rate": 1.2724172126292973e-05, "loss": 0.6853, "step": 23020 }, { "epoch": 0.4182405927647828, "grad_norm": 0.8570841109532422, "learning_rate": 1.271862662040315e-05, "loss": 0.6776, "step": 23030 }, { "epoch": 0.41842219962225774, "grad_norm": 0.8992100070228833, "learning_rate": 1.2713080211593618e-05, "loss": 0.6919, "step": 23040 }, { "epoch": 0.41860380647973267, "grad_norm": 0.9168918572480292, "learning_rate": 1.270753290170647e-05, "loss": 0.6988, "step": 23050 }, { "epoch": 0.4187854133372076, "grad_norm": 0.8522459316260586, "learning_rate": 1.2701984692584102e-05, "loss": 0.6857, "step": 23060 }, { "epoch": 0.41896702019468257, "grad_norm": 0.8670474641381487, "learning_rate": 1.2696435586069198e-05, "loss": 0.6801, "step": 23070 }, { "epoch": 0.4191486270521575, "grad_norm": 0.869312044354886, "learning_rate": 1.2690885584004753e-05, "loss": 0.678, "step": 23080 }, { "epoch": 0.4193302339096324, "grad_norm": 0.8542766148955654, "learning_rate": 1.2685334688234046e-05, "loss": 0.6873, "step": 23090 }, { "epoch": 0.41951184076710735, "grad_norm": 0.8437157900034225, "learning_rate": 1.2679782900600665e-05, "loss": 0.6791, "step": 23100 }, { "epoch": 0.4196934476245823, "grad_norm": 0.8898541600398939, "learning_rate": 1.2674230222948484e-05, "loss": 0.681, "step": 23110 }, { "epoch": 0.41987505448205725, "grad_norm": 0.8871802087913485, "learning_rate": 1.2668676657121686e-05, "loss": 0.6872, "step": 23120 }, { "epoch": 0.4200566613395322, "grad_norm": 0.8863036494517929, "learning_rate": 1.2663122204964734e-05, "loss": 0.6853, "step": 23130 }, { "epoch": 0.4202382681970071, "grad_norm": 0.8479659841031219, "learning_rate": 1.2657566868322391e-05, "loss": 0.6556, "step": 23140 }, { "epoch": 0.4204198750544821, "grad_norm": 0.8534000510719413, "learning_rate": 1.2652010649039718e-05, "loss": 0.6695, "step": 23150 }, { "epoch": 0.420601481911957, "grad_norm": 0.9103916389045764, "learning_rate": 1.264645354896207e-05, "loss": 0.6814, "step": 23160 }, { "epoch": 0.42078308876943193, "grad_norm": 0.8473504325372816, "learning_rate": 1.2640895569935081e-05, "loss": 0.6708, "step": 23170 }, { "epoch": 0.42096469562690686, "grad_norm": 0.862428400613628, "learning_rate": 1.263533671380469e-05, "loss": 0.6703, "step": 23180 }, { "epoch": 0.4211463024843818, "grad_norm": 0.9024102934077403, "learning_rate": 1.2629776982417125e-05, "loss": 0.6829, "step": 23190 }, { "epoch": 0.42132790934185677, "grad_norm": 0.8588895739299292, "learning_rate": 1.2624216377618903e-05, "loss": 0.6788, "step": 23200 }, { "epoch": 0.4215095161993317, "grad_norm": 0.8739684249592584, "learning_rate": 1.2618654901256831e-05, "loss": 0.6766, "step": 23210 }, { "epoch": 0.4216911230568066, "grad_norm": 0.8721625190299104, "learning_rate": 1.2613092555178004e-05, "loss": 0.6853, "step": 23220 }, { "epoch": 0.42187272991428154, "grad_norm": 0.8671431804416726, "learning_rate": 1.260752934122981e-05, "loss": 0.7028, "step": 23230 }, { "epoch": 0.4220543367717565, "grad_norm": 0.8734848256676855, "learning_rate": 1.2601965261259923e-05, "loss": 0.6752, "step": 23240 }, { "epoch": 0.42223594362923145, "grad_norm": 0.8278360317534222, "learning_rate": 1.2596400317116301e-05, "loss": 0.6909, "step": 23250 }, { "epoch": 0.4224175504867064, "grad_norm": 0.8581048429108902, "learning_rate": 1.25908345106472e-05, "loss": 0.6789, "step": 23260 }, { "epoch": 0.4225991573441813, "grad_norm": 0.8497553929695907, "learning_rate": 1.2585267843701148e-05, "loss": 0.6717, "step": 23270 }, { "epoch": 0.4227807642016563, "grad_norm": 0.8668529460078384, "learning_rate": 1.2579700318126968e-05, "loss": 0.6798, "step": 23280 }, { "epoch": 0.4229623710591312, "grad_norm": 0.8756789939934168, "learning_rate": 1.2574131935773765e-05, "loss": 0.6885, "step": 23290 }, { "epoch": 0.42314397791660613, "grad_norm": 0.8743165426779149, "learning_rate": 1.2568562698490934e-05, "loss": 0.6841, "step": 23300 }, { "epoch": 0.42332558477408105, "grad_norm": 0.8482818495368647, "learning_rate": 1.256299260812814e-05, "loss": 0.688, "step": 23310 }, { "epoch": 0.42350719163155603, "grad_norm": 0.8906067421887021, "learning_rate": 1.2557421666535348e-05, "loss": 0.6936, "step": 23320 }, { "epoch": 0.42368879848903096, "grad_norm": 0.8482602578417533, "learning_rate": 1.25518498755628e-05, "loss": 0.6918, "step": 23330 }, { "epoch": 0.4238704053465059, "grad_norm": 0.8959277101164895, "learning_rate": 1.2546277237061016e-05, "loss": 0.6892, "step": 23340 }, { "epoch": 0.4240520122039808, "grad_norm": 0.870397150414147, "learning_rate": 1.2540703752880797e-05, "loss": 0.6667, "step": 23350 }, { "epoch": 0.42423361906145574, "grad_norm": 0.844327717737066, "learning_rate": 1.2535129424873233e-05, "loss": 0.6757, "step": 23360 }, { "epoch": 0.4244152259189307, "grad_norm": 0.8540805633446148, "learning_rate": 1.2529554254889687e-05, "loss": 0.6802, "step": 23370 }, { "epoch": 0.42459683277640564, "grad_norm": 0.8877849097601166, "learning_rate": 1.25239782447818e-05, "loss": 0.6891, "step": 23380 }, { "epoch": 0.42477843963388057, "grad_norm": 0.9042935967884171, "learning_rate": 1.25184013964015e-05, "loss": 0.6793, "step": 23390 }, { "epoch": 0.4249600464913555, "grad_norm": 0.859493445380741, "learning_rate": 1.251282371160099e-05, "loss": 0.6663, "step": 23400 }, { "epoch": 0.4251416533488305, "grad_norm": 0.8701894674742636, "learning_rate": 1.250724519223275e-05, "loss": 0.6736, "step": 23410 }, { "epoch": 0.4253232602063054, "grad_norm": 0.8781743758088699, "learning_rate": 1.2501665840149532e-05, "loss": 0.6921, "step": 23420 }, { "epoch": 0.4255048670637803, "grad_norm": 0.8845747307256648, "learning_rate": 1.2496085657204378e-05, "loss": 0.6756, "step": 23430 }, { "epoch": 0.42568647392125525, "grad_norm": 0.8494289633207055, "learning_rate": 1.249050464525059e-05, "loss": 0.6845, "step": 23440 }, { "epoch": 0.42586808077873023, "grad_norm": 0.8755137203074164, "learning_rate": 1.2484922806141757e-05, "loss": 0.6766, "step": 23450 }, { "epoch": 0.42604968763620515, "grad_norm": 0.8885127632495412, "learning_rate": 1.2479340141731742e-05, "loss": 0.6701, "step": 23460 }, { "epoch": 0.4262312944936801, "grad_norm": 0.940253072005436, "learning_rate": 1.2473756653874672e-05, "loss": 0.6783, "step": 23470 }, { "epoch": 0.426412901351155, "grad_norm": 0.8924930603772194, "learning_rate": 1.2468172344424956e-05, "loss": 0.679, "step": 23480 }, { "epoch": 0.42659450820863, "grad_norm": 0.924903664264625, "learning_rate": 1.2462587215237277e-05, "loss": 0.6872, "step": 23490 }, { "epoch": 0.4267761150661049, "grad_norm": 0.8707066553279397, "learning_rate": 1.2457001268166585e-05, "loss": 0.6792, "step": 23500 }, { "epoch": 0.42695772192357984, "grad_norm": 0.8616651881138948, "learning_rate": 1.2451414505068105e-05, "loss": 0.6706, "step": 23510 }, { "epoch": 0.42713932878105476, "grad_norm": 0.8902416737874334, "learning_rate": 1.2445826927797334e-05, "loss": 0.6714, "step": 23520 }, { "epoch": 0.4273209356385297, "grad_norm": 0.8575441117425292, "learning_rate": 1.2440238538210034e-05, "loss": 0.6704, "step": 23530 }, { "epoch": 0.42750254249600467, "grad_norm": 0.8562045009068374, "learning_rate": 1.2434649338162243e-05, "loss": 0.6753, "step": 23540 }, { "epoch": 0.4276841493534796, "grad_norm": 0.880798858497051, "learning_rate": 1.2429059329510267e-05, "loss": 0.6783, "step": 23550 }, { "epoch": 0.4278657562109545, "grad_norm": 0.8577778024293018, "learning_rate": 1.2423468514110677e-05, "loss": 0.6906, "step": 23560 }, { "epoch": 0.42804736306842944, "grad_norm": 0.9561327934516267, "learning_rate": 1.241787689382031e-05, "loss": 0.6746, "step": 23570 }, { "epoch": 0.4282289699259044, "grad_norm": 0.876369161943813, "learning_rate": 1.2412284470496281e-05, "loss": 0.6859, "step": 23580 }, { "epoch": 0.42841057678337935, "grad_norm": 0.8466246587776609, "learning_rate": 1.2406691245995964e-05, "loss": 0.6819, "step": 23590 }, { "epoch": 0.4285921836408543, "grad_norm": 0.8739952532206711, "learning_rate": 1.2401097222176993e-05, "loss": 0.6804, "step": 23600 }, { "epoch": 0.4287737904983292, "grad_norm": 0.8807009920213188, "learning_rate": 1.2395502400897286e-05, "loss": 0.6791, "step": 23610 }, { "epoch": 0.4289553973558042, "grad_norm": 0.8696852469090088, "learning_rate": 1.2389906784015005e-05, "loss": 0.6757, "step": 23620 }, { "epoch": 0.4291370042132791, "grad_norm": 0.8640449085288535, "learning_rate": 1.2384310373388588e-05, "loss": 0.6752, "step": 23630 }, { "epoch": 0.42931861107075403, "grad_norm": 0.8879054912558956, "learning_rate": 1.2378713170876737e-05, "loss": 0.6751, "step": 23640 }, { "epoch": 0.42950021792822896, "grad_norm": 0.8389990649734481, "learning_rate": 1.2373115178338416e-05, "loss": 0.6863, "step": 23650 }, { "epoch": 0.4296818247857039, "grad_norm": 0.8509608795100261, "learning_rate": 1.2367516397632844e-05, "loss": 0.675, "step": 23660 }, { "epoch": 0.42986343164317886, "grad_norm": 0.8796508892418023, "learning_rate": 1.2361916830619508e-05, "loss": 0.6781, "step": 23670 }, { "epoch": 0.4300450385006538, "grad_norm": 0.8951751953597544, "learning_rate": 1.2356316479158162e-05, "loss": 0.6774, "step": 23680 }, { "epoch": 0.4302266453581287, "grad_norm": 0.8784623342611148, "learning_rate": 1.2350715345108805e-05, "loss": 0.6775, "step": 23690 }, { "epoch": 0.43040825221560364, "grad_norm": 0.8631698424814078, "learning_rate": 1.2345113430331713e-05, "loss": 0.6737, "step": 23700 }, { "epoch": 0.4305898590730786, "grad_norm": 0.9683618868784113, "learning_rate": 1.2339510736687406e-05, "loss": 0.6745, "step": 23710 }, { "epoch": 0.43077146593055354, "grad_norm": 0.8380404490325741, "learning_rate": 1.2333907266036677e-05, "loss": 0.6783, "step": 23720 }, { "epoch": 0.43095307278802847, "grad_norm": 0.8713570340508253, "learning_rate": 1.2328303020240569e-05, "loss": 0.6693, "step": 23730 }, { "epoch": 0.4311346796455034, "grad_norm": 0.8558667101130418, "learning_rate": 1.232269800116038e-05, "loss": 0.6706, "step": 23740 }, { "epoch": 0.4313162865029784, "grad_norm": 0.8433370089127208, "learning_rate": 1.231709221065767e-05, "loss": 0.6748, "step": 23750 }, { "epoch": 0.4314978933604533, "grad_norm": 0.8502330479955094, "learning_rate": 1.2311485650594256e-05, "loss": 0.6774, "step": 23760 }, { "epoch": 0.4316795002179282, "grad_norm": 0.9198130368353319, "learning_rate": 1.2305878322832208e-05, "loss": 0.6793, "step": 23770 }, { "epoch": 0.43186110707540315, "grad_norm": 0.8595788711287242, "learning_rate": 1.2300270229233846e-05, "loss": 0.676, "step": 23780 }, { "epoch": 0.43204271393287813, "grad_norm": 0.8755223692697811, "learning_rate": 1.2294661371661753e-05, "loss": 0.6833, "step": 23790 }, { "epoch": 0.43222432079035306, "grad_norm": 0.8704114605676913, "learning_rate": 1.2289051751978762e-05, "loss": 0.6674, "step": 23800 }, { "epoch": 0.432405927647828, "grad_norm": 0.8982379772237886, "learning_rate": 1.2283441372047959e-05, "loss": 0.6895, "step": 23810 }, { "epoch": 0.4325875345053029, "grad_norm": 0.8628280100853573, "learning_rate": 1.2277830233732684e-05, "loss": 0.6802, "step": 23820 }, { "epoch": 0.43276914136277783, "grad_norm": 0.9051814587886248, "learning_rate": 1.2272218338896527e-05, "loss": 0.6777, "step": 23830 }, { "epoch": 0.4329507482202528, "grad_norm": 0.8771815674683867, "learning_rate": 1.226660568940333e-05, "loss": 0.6765, "step": 23840 }, { "epoch": 0.43313235507772774, "grad_norm": 0.8657985546576357, "learning_rate": 1.2260992287117186e-05, "loss": 0.6982, "step": 23850 }, { "epoch": 0.43331396193520266, "grad_norm": 0.8776435468779914, "learning_rate": 1.2255378133902439e-05, "loss": 0.6859, "step": 23860 }, { "epoch": 0.4334955687926776, "grad_norm": 0.8662353277578841, "learning_rate": 1.2249763231623676e-05, "loss": 0.6872, "step": 23870 }, { "epoch": 0.43367717565015257, "grad_norm": 0.8735602066169863, "learning_rate": 1.2244147582145742e-05, "loss": 0.6822, "step": 23880 }, { "epoch": 0.4338587825076275, "grad_norm": 0.8397233112477597, "learning_rate": 1.2238531187333726e-05, "loss": 0.6852, "step": 23890 }, { "epoch": 0.4340403893651024, "grad_norm": 0.880545231288349, "learning_rate": 1.2232914049052961e-05, "loss": 0.6923, "step": 23900 }, { "epoch": 0.43422199622257734, "grad_norm": 0.8888704749534484, "learning_rate": 1.2227296169169034e-05, "loss": 0.681, "step": 23910 }, { "epoch": 0.4344036030800523, "grad_norm": 0.8921999225261427, "learning_rate": 1.2221677549547777e-05, "loss": 0.6623, "step": 23920 }, { "epoch": 0.43458520993752725, "grad_norm": 0.8697579966350388, "learning_rate": 1.221605819205526e-05, "loss": 0.6969, "step": 23930 }, { "epoch": 0.4347668167950022, "grad_norm": 0.8761915611607939, "learning_rate": 1.2210438098557804e-05, "loss": 0.6824, "step": 23940 }, { "epoch": 0.4349484236524771, "grad_norm": 0.8485229211094667, "learning_rate": 1.2204817270921977e-05, "loss": 0.6589, "step": 23950 }, { "epoch": 0.4351300305099521, "grad_norm": 0.8394621779169092, "learning_rate": 1.219919571101459e-05, "loss": 0.6801, "step": 23960 }, { "epoch": 0.435311637367427, "grad_norm": 0.8725607794025414, "learning_rate": 1.2193573420702692e-05, "loss": 0.6841, "step": 23970 }, { "epoch": 0.43549324422490193, "grad_norm": 0.8569686077300744, "learning_rate": 1.2187950401853573e-05, "loss": 0.691, "step": 23980 }, { "epoch": 0.43567485108237686, "grad_norm": 0.8575694337145092, "learning_rate": 1.2182326656334773e-05, "loss": 0.6733, "step": 23990 }, { "epoch": 0.4358564579398518, "grad_norm": 0.8363812406268111, "learning_rate": 1.2176702186014073e-05, "loss": 0.6617, "step": 24000 }, { "epoch": 0.43603806479732676, "grad_norm": 0.8886476218004121, "learning_rate": 1.217107699275949e-05, "loss": 0.6746, "step": 24010 }, { "epoch": 0.4362196716548017, "grad_norm": 0.8294760609526212, "learning_rate": 1.216545107843928e-05, "loss": 0.6789, "step": 24020 }, { "epoch": 0.4364012785122766, "grad_norm": 0.8619712907769609, "learning_rate": 1.2159824444921942e-05, "loss": 0.6796, "step": 24030 }, { "epoch": 0.43658288536975154, "grad_norm": 0.8811147349482513, "learning_rate": 1.2154197094076214e-05, "loss": 0.6978, "step": 24040 }, { "epoch": 0.4367644922272265, "grad_norm": 0.8655827508021289, "learning_rate": 1.2148569027771074e-05, "loss": 0.6886, "step": 24050 }, { "epoch": 0.43694609908470144, "grad_norm": 0.9054919818989721, "learning_rate": 1.2142940247875735e-05, "loss": 0.6674, "step": 24060 }, { "epoch": 0.43712770594217637, "grad_norm": 0.8534798188459694, "learning_rate": 1.2137310756259639e-05, "loss": 0.6877, "step": 24070 }, { "epoch": 0.4373093127996513, "grad_norm": 0.8793905155541382, "learning_rate": 1.2131680554792477e-05, "loss": 0.6803, "step": 24080 }, { "epoch": 0.4374909196571263, "grad_norm": 0.8585407223234551, "learning_rate": 1.2126049645344175e-05, "loss": 0.6797, "step": 24090 }, { "epoch": 0.4376725265146012, "grad_norm": 0.902210688076536, "learning_rate": 1.2120418029784885e-05, "loss": 0.6839, "step": 24100 }, { "epoch": 0.4378541333720761, "grad_norm": 0.8943787987830891, "learning_rate": 1.2114785709984997e-05, "loss": 0.6865, "step": 24110 }, { "epoch": 0.43803574022955105, "grad_norm": 0.8730882990170972, "learning_rate": 1.2109152687815144e-05, "loss": 0.6696, "step": 24120 }, { "epoch": 0.43821734708702603, "grad_norm": 0.8724750336829818, "learning_rate": 1.2103518965146179e-05, "loss": 0.6763, "step": 24130 }, { "epoch": 0.43839895394450096, "grad_norm": 0.898075806146259, "learning_rate": 1.2097884543849199e-05, "loss": 0.6744, "step": 24140 }, { "epoch": 0.4385805608019759, "grad_norm": 0.8536379237500871, "learning_rate": 1.209224942579552e-05, "loss": 0.6729, "step": 24150 }, { "epoch": 0.4387621676594508, "grad_norm": 0.8883788344482711, "learning_rate": 1.2086613612856705e-05, "loss": 0.6818, "step": 24160 }, { "epoch": 0.43894377451692573, "grad_norm": 0.8803705146675812, "learning_rate": 1.2080977106904535e-05, "loss": 0.6739, "step": 24170 }, { "epoch": 0.4391253813744007, "grad_norm": 0.8621729525795352, "learning_rate": 1.2075339909811024e-05, "loss": 0.6744, "step": 24180 }, { "epoch": 0.43930698823187564, "grad_norm": 0.846517888476851, "learning_rate": 1.206970202344842e-05, "loss": 0.6698, "step": 24190 }, { "epoch": 0.43948859508935056, "grad_norm": 0.8846099870034871, "learning_rate": 1.2064063449689198e-05, "loss": 0.6821, "step": 24200 }, { "epoch": 0.4396702019468255, "grad_norm": 0.8584888883713137, "learning_rate": 1.2058424190406061e-05, "loss": 0.6827, "step": 24210 }, { "epoch": 0.43985180880430047, "grad_norm": 0.8658005158740079, "learning_rate": 1.2052784247471938e-05, "loss": 0.6703, "step": 24220 }, { "epoch": 0.4400334156617754, "grad_norm": 0.8729134869628032, "learning_rate": 1.2047143622759986e-05, "loss": 0.6811, "step": 24230 }, { "epoch": 0.4402150225192503, "grad_norm": 0.8816590933929749, "learning_rate": 1.2041502318143592e-05, "loss": 0.6917, "step": 24240 }, { "epoch": 0.44039662937672525, "grad_norm": 0.8823562697012752, "learning_rate": 1.2035860335496364e-05, "loss": 0.6801, "step": 24250 }, { "epoch": 0.4405782362342002, "grad_norm": 0.8911928262363357, "learning_rate": 1.2030217676692133e-05, "loss": 0.6965, "step": 24260 }, { "epoch": 0.44075984309167515, "grad_norm": 0.8839573964810383, "learning_rate": 1.2024574343604965e-05, "loss": 0.6754, "step": 24270 }, { "epoch": 0.4409414499491501, "grad_norm": 0.8638008328530649, "learning_rate": 1.2018930338109138e-05, "loss": 0.6683, "step": 24280 }, { "epoch": 0.441123056806625, "grad_norm": 0.8424169511887601, "learning_rate": 1.2013285662079161e-05, "loss": 0.6708, "step": 24290 }, { "epoch": 0.4413046636641, "grad_norm": 0.8369342984263682, "learning_rate": 1.2007640317389764e-05, "loss": 0.685, "step": 24300 }, { "epoch": 0.4414862705215749, "grad_norm": 0.8939871997919927, "learning_rate": 1.2001994305915895e-05, "loss": 0.6794, "step": 24310 }, { "epoch": 0.44166787737904983, "grad_norm": 0.8171491966800378, "learning_rate": 1.1996347629532733e-05, "loss": 0.6591, "step": 24320 }, { "epoch": 0.44184948423652476, "grad_norm": 0.8724382493440554, "learning_rate": 1.1990700290115664e-05, "loss": 0.6796, "step": 24330 }, { "epoch": 0.4420310910939997, "grad_norm": 0.8625140716544031, "learning_rate": 1.198505228954031e-05, "loss": 0.6816, "step": 24340 }, { "epoch": 0.44221269795147466, "grad_norm": 0.8886448172490321, "learning_rate": 1.19794036296825e-05, "loss": 0.6712, "step": 24350 }, { "epoch": 0.4423943048089496, "grad_norm": 0.8508629889109692, "learning_rate": 1.1973754312418287e-05, "loss": 0.6701, "step": 24360 }, { "epoch": 0.4425759116664245, "grad_norm": 0.8742396733415831, "learning_rate": 1.1968104339623944e-05, "loss": 0.6768, "step": 24370 }, { "epoch": 0.44275751852389944, "grad_norm": 0.9029040564080029, "learning_rate": 1.1962453713175957e-05, "loss": 0.6739, "step": 24380 }, { "epoch": 0.4429391253813744, "grad_norm": 0.8612456518754475, "learning_rate": 1.1956802434951031e-05, "loss": 0.6754, "step": 24390 }, { "epoch": 0.44312073223884935, "grad_norm": 0.8265367371788324, "learning_rate": 1.195115050682609e-05, "loss": 0.6563, "step": 24400 }, { "epoch": 0.44330233909632427, "grad_norm": 0.8581401271265305, "learning_rate": 1.1945497930678274e-05, "loss": 0.6753, "step": 24410 }, { "epoch": 0.4434839459537992, "grad_norm": 0.8871578568067351, "learning_rate": 1.1939844708384932e-05, "loss": 0.6784, "step": 24420 }, { "epoch": 0.4436655528112742, "grad_norm": 0.883027220344624, "learning_rate": 1.1934190841823631e-05, "loss": 0.6739, "step": 24430 }, { "epoch": 0.4438471596687491, "grad_norm": 0.8707552979131943, "learning_rate": 1.1928536332872159e-05, "loss": 0.6697, "step": 24440 }, { "epoch": 0.44402876652622403, "grad_norm": 0.8747388269624823, "learning_rate": 1.1922881183408505e-05, "loss": 0.6592, "step": 24450 }, { "epoch": 0.44421037338369895, "grad_norm": 0.857352935652868, "learning_rate": 1.1917225395310881e-05, "loss": 0.6655, "step": 24460 }, { "epoch": 0.44439198024117393, "grad_norm": 0.8535906698427104, "learning_rate": 1.1911568970457708e-05, "loss": 0.6853, "step": 24470 }, { "epoch": 0.44457358709864886, "grad_norm": 0.8849700229707073, "learning_rate": 1.190591191072761e-05, "loss": 0.6841, "step": 24480 }, { "epoch": 0.4447551939561238, "grad_norm": 0.8569784621486138, "learning_rate": 1.1900254217999436e-05, "loss": 0.6719, "step": 24490 }, { "epoch": 0.4449368008135987, "grad_norm": 0.8577708122860304, "learning_rate": 1.1894595894152237e-05, "loss": 0.6711, "step": 24500 }, { "epoch": 0.44511840767107363, "grad_norm": 0.8397205523412791, "learning_rate": 1.1888936941065272e-05, "loss": 0.6832, "step": 24510 }, { "epoch": 0.4453000145285486, "grad_norm": 0.877764347275993, "learning_rate": 1.1883277360618015e-05, "loss": 0.6646, "step": 24520 }, { "epoch": 0.44548162138602354, "grad_norm": 0.8656818480325091, "learning_rate": 1.1877617154690149e-05, "loss": 0.6749, "step": 24530 }, { "epoch": 0.44566322824349847, "grad_norm": 0.8668843034261737, "learning_rate": 1.1871956325161556e-05, "loss": 0.695, "step": 24540 }, { "epoch": 0.4458448351009734, "grad_norm": 0.853486140976976, "learning_rate": 1.1866294873912332e-05, "loss": 0.679, "step": 24550 }, { "epoch": 0.44602644195844837, "grad_norm": 0.8298505419549597, "learning_rate": 1.1860632802822778e-05, "loss": 0.6593, "step": 24560 }, { "epoch": 0.4462080488159233, "grad_norm": 0.8633419154508685, "learning_rate": 1.1854970113773403e-05, "loss": 0.6841, "step": 24570 }, { "epoch": 0.4463896556733982, "grad_norm": 0.8756933183085144, "learning_rate": 1.1849306808644914e-05, "loss": 0.6745, "step": 24580 }, { "epoch": 0.44657126253087315, "grad_norm": 0.8438064890700355, "learning_rate": 1.1843642889318229e-05, "loss": 0.6851, "step": 24590 }, { "epoch": 0.44675286938834813, "grad_norm": 0.8608690364770296, "learning_rate": 1.183797835767447e-05, "loss": 0.6827, "step": 24600 }, { "epoch": 0.44693447624582305, "grad_norm": 0.8940051051124935, "learning_rate": 1.1832313215594961e-05, "loss": 0.6694, "step": 24610 }, { "epoch": 0.447116083103298, "grad_norm": 0.8781789929495495, "learning_rate": 1.1826647464961228e-05, "loss": 0.6676, "step": 24620 }, { "epoch": 0.4472976899607729, "grad_norm": 0.8579445170561025, "learning_rate": 1.1820981107655001e-05, "loss": 0.6746, "step": 24630 }, { "epoch": 0.4474792968182479, "grad_norm": 0.842057565389612, "learning_rate": 1.1815314145558208e-05, "loss": 0.6946, "step": 24640 }, { "epoch": 0.4476609036757228, "grad_norm": 0.885353615772634, "learning_rate": 1.180964658055298e-05, "loss": 0.6765, "step": 24650 }, { "epoch": 0.44784251053319774, "grad_norm": 0.8996308106575429, "learning_rate": 1.1803978414521654e-05, "loss": 0.6798, "step": 24660 }, { "epoch": 0.44802411739067266, "grad_norm": 0.8592751205515791, "learning_rate": 1.1798309649346753e-05, "loss": 0.6627, "step": 24670 }, { "epoch": 0.4482057242481476, "grad_norm": 0.8473453127917265, "learning_rate": 1.1792640286911012e-05, "loss": 0.674, "step": 24680 }, { "epoch": 0.44838733110562257, "grad_norm": 0.8389805309582302, "learning_rate": 1.1786970329097357e-05, "loss": 0.6687, "step": 24690 }, { "epoch": 0.4485689379630975, "grad_norm": 0.9050573300538165, "learning_rate": 1.1781299777788914e-05, "loss": 0.6717, "step": 24700 }, { "epoch": 0.4487505448205724, "grad_norm": 0.8579689721808178, "learning_rate": 1.1775628634869008e-05, "loss": 0.6744, "step": 24710 }, { "epoch": 0.44893215167804734, "grad_norm": 0.9223022385271686, "learning_rate": 1.1769956902221157e-05, "loss": 0.6773, "step": 24720 }, { "epoch": 0.4491137585355223, "grad_norm": 0.8839264738125289, "learning_rate": 1.1764284581729077e-05, "loss": 0.6796, "step": 24730 }, { "epoch": 0.44929536539299725, "grad_norm": 0.8508865373982449, "learning_rate": 1.1758611675276681e-05, "loss": 0.6788, "step": 24740 }, { "epoch": 0.4494769722504722, "grad_norm": 0.8645795834293936, "learning_rate": 1.1752938184748073e-05, "loss": 0.6796, "step": 24750 }, { "epoch": 0.4496585791079471, "grad_norm": 0.8771634029439149, "learning_rate": 1.174726411202755e-05, "loss": 0.6801, "step": 24760 }, { "epoch": 0.4498401859654221, "grad_norm": 0.8680574148633318, "learning_rate": 1.174158945899961e-05, "loss": 0.6744, "step": 24770 }, { "epoch": 0.450021792822897, "grad_norm": 0.8145155378704173, "learning_rate": 1.1735914227548932e-05, "loss": 0.6863, "step": 24780 }, { "epoch": 0.45020339968037193, "grad_norm": 0.8504126008283778, "learning_rate": 1.1730238419560398e-05, "loss": 0.6591, "step": 24790 }, { "epoch": 0.45038500653784685, "grad_norm": 0.8658993373855404, "learning_rate": 1.1724562036919073e-05, "loss": 0.6564, "step": 24800 }, { "epoch": 0.4505666133953218, "grad_norm": 0.8917892155285028, "learning_rate": 1.171888508151022e-05, "loss": 0.6739, "step": 24810 }, { "epoch": 0.45074822025279676, "grad_norm": 0.8905836162842959, "learning_rate": 1.171320755521929e-05, "loss": 0.6841, "step": 24820 }, { "epoch": 0.4509298271102717, "grad_norm": 0.8625017413631921, "learning_rate": 1.1707529459931921e-05, "loss": 0.6828, "step": 24830 }, { "epoch": 0.4511114339677466, "grad_norm": 0.853604563127342, "learning_rate": 1.1701850797533943e-05, "loss": 0.6649, "step": 24840 }, { "epoch": 0.45129304082522154, "grad_norm": 0.8503462070765759, "learning_rate": 1.1696171569911372e-05, "loss": 0.6726, "step": 24850 }, { "epoch": 0.4514746476826965, "grad_norm": 0.844671188383432, "learning_rate": 1.1690491778950412e-05, "loss": 0.672, "step": 24860 }, { "epoch": 0.45165625454017144, "grad_norm": 0.867455280001339, "learning_rate": 1.1684811426537456e-05, "loss": 0.6721, "step": 24870 }, { "epoch": 0.45183786139764637, "grad_norm": 0.8531096786653761, "learning_rate": 1.1679130514559085e-05, "loss": 0.666, "step": 24880 }, { "epoch": 0.4520194682551213, "grad_norm": 0.84980451722729, "learning_rate": 1.1673449044902057e-05, "loss": 0.6871, "step": 24890 }, { "epoch": 0.4522010751125963, "grad_norm": 0.9391337954529451, "learning_rate": 1.1667767019453323e-05, "loss": 0.6817, "step": 24900 }, { "epoch": 0.4523826819700712, "grad_norm": 0.8672504206789745, "learning_rate": 1.1662084440100021e-05, "loss": 0.6692, "step": 24910 }, { "epoch": 0.4525642888275461, "grad_norm": 0.8568196985842786, "learning_rate": 1.1656401308729467e-05, "loss": 0.6792, "step": 24920 }, { "epoch": 0.45274589568502105, "grad_norm": 0.8528365221050141, "learning_rate": 1.1650717627229163e-05, "loss": 0.6663, "step": 24930 }, { "epoch": 0.45292750254249603, "grad_norm": 0.8797634262431986, "learning_rate": 1.1645033397486792e-05, "loss": 0.6823, "step": 24940 }, { "epoch": 0.45310910939997096, "grad_norm": 0.8529494214726157, "learning_rate": 1.1639348621390218e-05, "loss": 0.6754, "step": 24950 }, { "epoch": 0.4532907162574459, "grad_norm": 0.8467948364629657, "learning_rate": 1.1633663300827494e-05, "loss": 0.6695, "step": 24960 }, { "epoch": 0.4534723231149208, "grad_norm": 0.8310169260430048, "learning_rate": 1.1627977437686843e-05, "loss": 0.6729, "step": 24970 }, { "epoch": 0.45365392997239573, "grad_norm": 0.8471290570636754, "learning_rate": 1.1622291033856679e-05, "loss": 0.6794, "step": 24980 }, { "epoch": 0.4538355368298707, "grad_norm": 0.8426972330599061, "learning_rate": 1.1616604091225583e-05, "loss": 0.678, "step": 24990 }, { "epoch": 0.45401714368734564, "grad_norm": 0.8998855239842508, "learning_rate": 1.1610916611682329e-05, "loss": 0.6808, "step": 25000 }, { "epoch": 0.45419875054482056, "grad_norm": 0.8419728379210767, "learning_rate": 1.1605228597115856e-05, "loss": 0.6757, "step": 25010 }, { "epoch": 0.4543803574022955, "grad_norm": 0.8697769684890039, "learning_rate": 1.1599540049415291e-05, "loss": 0.6777, "step": 25020 }, { "epoch": 0.45456196425977047, "grad_norm": 0.8243800056578302, "learning_rate": 1.1593850970469938e-05, "loss": 0.6653, "step": 25030 }, { "epoch": 0.4547435711172454, "grad_norm": 0.8284896940934715, "learning_rate": 1.1588161362169266e-05, "loss": 0.6846, "step": 25040 }, { "epoch": 0.4549251779747203, "grad_norm": 0.8647606911612706, "learning_rate": 1.1582471226402934e-05, "loss": 0.6672, "step": 25050 }, { "epoch": 0.45510678483219524, "grad_norm": 0.8451601929390273, "learning_rate": 1.1576780565060764e-05, "loss": 0.6733, "step": 25060 }, { "epoch": 0.4552883916896702, "grad_norm": 0.8317613125544939, "learning_rate": 1.1571089380032763e-05, "loss": 0.6639, "step": 25070 }, { "epoch": 0.45546999854714515, "grad_norm": 0.8677939305597127, "learning_rate": 1.1565397673209103e-05, "loss": 0.6807, "step": 25080 }, { "epoch": 0.4556516054046201, "grad_norm": 0.9142934301265845, "learning_rate": 1.1559705446480136e-05, "loss": 0.6653, "step": 25090 }, { "epoch": 0.455833212262095, "grad_norm": 0.8625325062865172, "learning_rate": 1.1554012701736381e-05, "loss": 0.6756, "step": 25100 }, { "epoch": 0.45601481911957, "grad_norm": 0.8714901726640367, "learning_rate": 1.1548319440868536e-05, "loss": 0.6724, "step": 25110 }, { "epoch": 0.4561964259770449, "grad_norm": 0.8477272626537127, "learning_rate": 1.1542625665767467e-05, "loss": 0.6898, "step": 25120 }, { "epoch": 0.45637803283451983, "grad_norm": 0.8405566819226856, "learning_rate": 1.1536931378324208e-05, "loss": 0.6722, "step": 25130 }, { "epoch": 0.45655963969199476, "grad_norm": 0.8777558944771932, "learning_rate": 1.1531236580429963e-05, "loss": 0.6703, "step": 25140 }, { "epoch": 0.4567412465494697, "grad_norm": 0.8819098900814692, "learning_rate": 1.1525541273976116e-05, "loss": 0.6737, "step": 25150 }, { "epoch": 0.45692285340694466, "grad_norm": 0.8457031449328949, "learning_rate": 1.1519845460854202e-05, "loss": 0.6683, "step": 25160 }, { "epoch": 0.4571044602644196, "grad_norm": 0.8609620453948447, "learning_rate": 1.1514149142955944e-05, "loss": 0.6814, "step": 25170 }, { "epoch": 0.4572860671218945, "grad_norm": 0.9162821242468454, "learning_rate": 1.1508452322173215e-05, "loss": 0.6639, "step": 25180 }, { "epoch": 0.45746767397936944, "grad_norm": 0.8491222403392743, "learning_rate": 1.1502755000398068e-05, "loss": 0.675, "step": 25190 }, { "epoch": 0.4576492808368444, "grad_norm": 0.8535219066145614, "learning_rate": 1.1497057179522712e-05, "loss": 0.6689, "step": 25200 }, { "epoch": 0.45783088769431934, "grad_norm": 0.90530486301441, "learning_rate": 1.1491358861439534e-05, "loss": 0.6582, "step": 25210 }, { "epoch": 0.45801249455179427, "grad_norm": 0.907420114987976, "learning_rate": 1.1485660048041073e-05, "loss": 0.6574, "step": 25220 }, { "epoch": 0.4581941014092692, "grad_norm": 0.8450316023246844, "learning_rate": 1.1479960741220042e-05, "loss": 0.6692, "step": 25230 }, { "epoch": 0.4583757082667442, "grad_norm": 0.8406320067289276, "learning_rate": 1.1474260942869313e-05, "loss": 0.6595, "step": 25240 }, { "epoch": 0.4585573151242191, "grad_norm": 0.8364549912279635, "learning_rate": 1.1468560654881923e-05, "loss": 0.6651, "step": 25250 }, { "epoch": 0.458738921981694, "grad_norm": 0.8475731985472799, "learning_rate": 1.1462859879151074e-05, "loss": 0.6614, "step": 25260 }, { "epoch": 0.45892052883916895, "grad_norm": 0.8337453439334309, "learning_rate": 1.1457158617570125e-05, "loss": 0.6668, "step": 25270 }, { "epoch": 0.45910213569664393, "grad_norm": 0.9072267139735155, "learning_rate": 1.14514568720326e-05, "loss": 0.6736, "step": 25280 }, { "epoch": 0.45928374255411886, "grad_norm": 0.8867970970270944, "learning_rate": 1.1445754644432179e-05, "loss": 0.6645, "step": 25290 }, { "epoch": 0.4594653494115938, "grad_norm": 0.8630493595626089, "learning_rate": 1.1440051936662706e-05, "loss": 0.6582, "step": 25300 }, { "epoch": 0.4596469562690687, "grad_norm": 0.8633415860517117, "learning_rate": 1.1434348750618192e-05, "loss": 0.6726, "step": 25310 }, { "epoch": 0.45982856312654363, "grad_norm": 0.8582935637095334, "learning_rate": 1.1428645088192788e-05, "loss": 0.6789, "step": 25320 }, { "epoch": 0.4600101699840186, "grad_norm": 0.8830419645168516, "learning_rate": 1.1422940951280824e-05, "loss": 0.6539, "step": 25330 }, { "epoch": 0.46019177684149354, "grad_norm": 0.8407673795319572, "learning_rate": 1.1417236341776773e-05, "loss": 0.6671, "step": 25340 }, { "epoch": 0.46037338369896846, "grad_norm": 0.8958603388516493, "learning_rate": 1.1411531261575266e-05, "loss": 0.6695, "step": 25350 }, { "epoch": 0.4605549905564434, "grad_norm": 0.8961820620577656, "learning_rate": 1.1405825712571102e-05, "loss": 0.6762, "step": 25360 }, { "epoch": 0.46073659741391837, "grad_norm": 0.8918205979130908, "learning_rate": 1.1400119696659224e-05, "loss": 0.6733, "step": 25370 }, { "epoch": 0.4609182042713933, "grad_norm": 0.8712319845123234, "learning_rate": 1.1394413215734729e-05, "loss": 0.6869, "step": 25380 }, { "epoch": 0.4610998111288682, "grad_norm": 0.8826060246358768, "learning_rate": 1.1388706271692882e-05, "loss": 0.6651, "step": 25390 }, { "epoch": 0.46128141798634315, "grad_norm": 0.8790378654377974, "learning_rate": 1.1382998866429087e-05, "loss": 0.6731, "step": 25400 }, { "epoch": 0.4614630248438181, "grad_norm": 0.8423234739391636, "learning_rate": 1.1377291001838908e-05, "loss": 0.6717, "step": 25410 }, { "epoch": 0.46164463170129305, "grad_norm": 0.840362886499724, "learning_rate": 1.1371582679818064e-05, "loss": 0.6675, "step": 25420 }, { "epoch": 0.461826238558768, "grad_norm": 0.8884073871743169, "learning_rate": 1.1365873902262421e-05, "loss": 0.672, "step": 25430 }, { "epoch": 0.4620078454162429, "grad_norm": 0.8488769205035298, "learning_rate": 1.1360164671067995e-05, "loss": 0.6774, "step": 25440 }, { "epoch": 0.4621894522737179, "grad_norm": 0.9096506123606212, "learning_rate": 1.1354454988130959e-05, "loss": 0.6704, "step": 25450 }, { "epoch": 0.4623710591311928, "grad_norm": 0.8939885070481616, "learning_rate": 1.1348744855347633e-05, "loss": 0.6654, "step": 25460 }, { "epoch": 0.46255266598866773, "grad_norm": 0.8758702433617136, "learning_rate": 1.1343034274614486e-05, "loss": 0.6716, "step": 25470 }, { "epoch": 0.46273427284614266, "grad_norm": 0.8531773357025657, "learning_rate": 1.1337323247828134e-05, "loss": 0.6722, "step": 25480 }, { "epoch": 0.4629158797036176, "grad_norm": 0.8490664479979303, "learning_rate": 1.1331611776885343e-05, "loss": 0.6623, "step": 25490 }, { "epoch": 0.46309748656109256, "grad_norm": 0.8702919204426114, "learning_rate": 1.1325899863683025e-05, "loss": 0.6783, "step": 25500 }, { "epoch": 0.4632790934185675, "grad_norm": 0.9037304224186631, "learning_rate": 1.1320187510118245e-05, "loss": 0.6753, "step": 25510 }, { "epoch": 0.4634607002760424, "grad_norm": 0.8513001025388359, "learning_rate": 1.1314474718088209e-05, "loss": 0.6714, "step": 25520 }, { "epoch": 0.46364230713351734, "grad_norm": 0.8551637300938045, "learning_rate": 1.1308761489490268e-05, "loss": 0.6535, "step": 25530 }, { "epoch": 0.4638239139909923, "grad_norm": 0.8646818594016453, "learning_rate": 1.1303047826221917e-05, "loss": 0.6684, "step": 25540 }, { "epoch": 0.46400552084846725, "grad_norm": 0.8746571195331272, "learning_rate": 1.1297333730180805e-05, "loss": 0.6844, "step": 25550 }, { "epoch": 0.46418712770594217, "grad_norm": 0.8832801301159394, "learning_rate": 1.1291619203264708e-05, "loss": 0.679, "step": 25560 }, { "epoch": 0.4643687345634171, "grad_norm": 0.874357595344917, "learning_rate": 1.1285904247371562e-05, "loss": 0.6636, "step": 25570 }, { "epoch": 0.4645503414208921, "grad_norm": 0.8809033775329663, "learning_rate": 1.1280188864399433e-05, "loss": 0.6704, "step": 25580 }, { "epoch": 0.464731948278367, "grad_norm": 0.839687459001714, "learning_rate": 1.1274473056246535e-05, "loss": 0.6751, "step": 25590 }, { "epoch": 0.4649135551358419, "grad_norm": 0.8833959024897554, "learning_rate": 1.1268756824811225e-05, "loss": 0.662, "step": 25600 }, { "epoch": 0.46509516199331685, "grad_norm": 0.8616332904416667, "learning_rate": 1.1263040171991993e-05, "loss": 0.6664, "step": 25610 }, { "epoch": 0.46527676885079183, "grad_norm": 0.8523138383154446, "learning_rate": 1.1257323099687476e-05, "loss": 0.6697, "step": 25620 }, { "epoch": 0.46545837570826676, "grad_norm": 0.8576167234707436, "learning_rate": 1.1251605609796448e-05, "loss": 0.6739, "step": 25630 }, { "epoch": 0.4656399825657417, "grad_norm": 0.9142974182065354, "learning_rate": 1.1245887704217822e-05, "loss": 0.6653, "step": 25640 }, { "epoch": 0.4658215894232166, "grad_norm": 0.8553911553966719, "learning_rate": 1.1240169384850648e-05, "loss": 0.6661, "step": 25650 }, { "epoch": 0.46600319628069153, "grad_norm": 0.8377541816802017, "learning_rate": 1.1234450653594113e-05, "loss": 0.6563, "step": 25660 }, { "epoch": 0.4661848031381665, "grad_norm": 0.8683056407482689, "learning_rate": 1.1228731512347546e-05, "loss": 0.6644, "step": 25670 }, { "epoch": 0.46636640999564144, "grad_norm": 0.8525146914923732, "learning_rate": 1.1223011963010404e-05, "loss": 0.6664, "step": 25680 }, { "epoch": 0.46654801685311637, "grad_norm": 0.8592301683327952, "learning_rate": 1.1217292007482284e-05, "loss": 0.6651, "step": 25690 }, { "epoch": 0.4667296237105913, "grad_norm": 0.9150410959915513, "learning_rate": 1.1211571647662915e-05, "loss": 0.6573, "step": 25700 }, { "epoch": 0.46691123056806627, "grad_norm": 0.8681948502057997, "learning_rate": 1.1205850885452167e-05, "loss": 0.6628, "step": 25710 }, { "epoch": 0.4670928374255412, "grad_norm": 0.8726678669437665, "learning_rate": 1.1200129722750041e-05, "loss": 0.6577, "step": 25720 }, { "epoch": 0.4672744442830161, "grad_norm": 0.8599975799323263, "learning_rate": 1.1194408161456664e-05, "loss": 0.6884, "step": 25730 }, { "epoch": 0.46745605114049105, "grad_norm": 0.8674722587001132, "learning_rate": 1.1188686203472305e-05, "loss": 0.6675, "step": 25740 }, { "epoch": 0.467637657997966, "grad_norm": 0.8782751387013322, "learning_rate": 1.118296385069736e-05, "loss": 0.6789, "step": 25750 }, { "epoch": 0.46781926485544095, "grad_norm": 0.8380511100360013, "learning_rate": 1.1177241105032356e-05, "loss": 0.6601, "step": 25760 }, { "epoch": 0.4680008717129159, "grad_norm": 0.8476664172063034, "learning_rate": 1.117151796837795e-05, "loss": 0.6704, "step": 25770 }, { "epoch": 0.4681824785703908, "grad_norm": 0.8446582946249825, "learning_rate": 1.116579444263493e-05, "loss": 0.6636, "step": 25780 }, { "epoch": 0.4683640854278658, "grad_norm": 0.8390812183207887, "learning_rate": 1.1160070529704215e-05, "loss": 0.6695, "step": 25790 }, { "epoch": 0.4685456922853407, "grad_norm": 0.8672743158645204, "learning_rate": 1.1154346231486847e-05, "loss": 0.6664, "step": 25800 }, { "epoch": 0.46872729914281563, "grad_norm": 0.9073706879507679, "learning_rate": 1.1148621549884007e-05, "loss": 0.6839, "step": 25810 }, { "epoch": 0.46890890600029056, "grad_norm": 0.8659557279755734, "learning_rate": 1.1142896486796988e-05, "loss": 0.6843, "step": 25820 }, { "epoch": 0.4690905128577655, "grad_norm": 0.8808909419941711, "learning_rate": 1.1137171044127225e-05, "loss": 0.6705, "step": 25830 }, { "epoch": 0.46927211971524047, "grad_norm": 0.8876420666511182, "learning_rate": 1.1131445223776265e-05, "loss": 0.6686, "step": 25840 }, { "epoch": 0.4694537265727154, "grad_norm": 0.8867029881441775, "learning_rate": 1.1125719027645791e-05, "loss": 0.6664, "step": 25850 }, { "epoch": 0.4696353334301903, "grad_norm": 0.8858141774429218, "learning_rate": 1.111999245763761e-05, "loss": 0.6681, "step": 25860 }, { "epoch": 0.46981694028766524, "grad_norm": 0.8421123962093232, "learning_rate": 1.1114265515653647e-05, "loss": 0.6625, "step": 25870 }, { "epoch": 0.4699985471451402, "grad_norm": 0.9181971621339795, "learning_rate": 1.1108538203595954e-05, "loss": 0.6717, "step": 25880 }, { "epoch": 0.47018015400261515, "grad_norm": 0.8627222981488096, "learning_rate": 1.1102810523366703e-05, "loss": 0.6792, "step": 25890 }, { "epoch": 0.4703617608600901, "grad_norm": 0.9047085898241262, "learning_rate": 1.1097082476868192e-05, "loss": 0.6853, "step": 25900 }, { "epoch": 0.470543367717565, "grad_norm": 0.8458995464363959, "learning_rate": 1.1091354066002846e-05, "loss": 0.6666, "step": 25910 }, { "epoch": 0.47072497457504, "grad_norm": 0.8550846583260716, "learning_rate": 1.1085625292673198e-05, "loss": 0.6748, "step": 25920 }, { "epoch": 0.4709065814325149, "grad_norm": 0.8885494381930291, "learning_rate": 1.1079896158781912e-05, "loss": 0.6797, "step": 25930 }, { "epoch": 0.47108818828998983, "grad_norm": 0.8613478080884502, "learning_rate": 1.1074166666231767e-05, "loss": 0.6597, "step": 25940 }, { "epoch": 0.47126979514746475, "grad_norm": 0.8415152045551715, "learning_rate": 1.106843681692566e-05, "loss": 0.6688, "step": 25950 }, { "epoch": 0.4714514020049397, "grad_norm": 0.8586254256278593, "learning_rate": 1.1062706612766609e-05, "loss": 0.6612, "step": 25960 }, { "epoch": 0.47163300886241466, "grad_norm": 0.8338996270041751, "learning_rate": 1.1056976055657751e-05, "loss": 0.6581, "step": 25970 }, { "epoch": 0.4718146157198896, "grad_norm": 0.8339021951232871, "learning_rate": 1.105124514750234e-05, "loss": 0.6821, "step": 25980 }, { "epoch": 0.4719962225773645, "grad_norm": 0.8619743162825203, "learning_rate": 1.1045513890203737e-05, "loss": 0.6611, "step": 25990 }, { "epoch": 0.47217782943483944, "grad_norm": 0.8368659436106147, "learning_rate": 1.1039782285665434e-05, "loss": 0.6785, "step": 26000 }, { "epoch": 0.4723594362923144, "grad_norm": 0.8810357419340501, "learning_rate": 1.1034050335791031e-05, "loss": 0.675, "step": 26010 }, { "epoch": 0.47254104314978934, "grad_norm": 0.8403778616054223, "learning_rate": 1.102831804248424e-05, "loss": 0.6788, "step": 26020 }, { "epoch": 0.47272265000726427, "grad_norm": 0.8644770845039245, "learning_rate": 1.1022585407648894e-05, "loss": 0.6819, "step": 26030 }, { "epoch": 0.4729042568647392, "grad_norm": 0.8845419522690852, "learning_rate": 1.1016852433188934e-05, "loss": 0.6778, "step": 26040 }, { "epoch": 0.4730858637222142, "grad_norm": 0.8923751610231206, "learning_rate": 1.1011119121008413e-05, "loss": 0.663, "step": 26050 }, { "epoch": 0.4732674705796891, "grad_norm": 0.8434092653521376, "learning_rate": 1.1005385473011503e-05, "loss": 0.6766, "step": 26060 }, { "epoch": 0.473449077437164, "grad_norm": 0.8818207538237237, "learning_rate": 1.099965149110248e-05, "loss": 0.6566, "step": 26070 }, { "epoch": 0.47363068429463895, "grad_norm": 0.8529574575225346, "learning_rate": 1.0993917177185736e-05, "loss": 0.6677, "step": 26080 }, { "epoch": 0.47381229115211393, "grad_norm": 0.8208047762098322, "learning_rate": 1.0988182533165768e-05, "loss": 0.6792, "step": 26090 }, { "epoch": 0.47399389800958885, "grad_norm": 0.8326667324841738, "learning_rate": 1.0982447560947185e-05, "loss": 0.6626, "step": 26100 }, { "epoch": 0.4741755048670638, "grad_norm": 0.9121750007377513, "learning_rate": 1.0976712262434713e-05, "loss": 0.6727, "step": 26110 }, { "epoch": 0.4743571117245387, "grad_norm": 0.8022340690451077, "learning_rate": 1.0970976639533174e-05, "loss": 0.6641, "step": 26120 }, { "epoch": 0.47453871858201363, "grad_norm": 0.8628419626166484, "learning_rate": 1.0965240694147502e-05, "loss": 0.6727, "step": 26130 }, { "epoch": 0.4747203254394886, "grad_norm": 0.8548753011353255, "learning_rate": 1.0959504428182744e-05, "loss": 0.6584, "step": 26140 }, { "epoch": 0.47490193229696354, "grad_norm": 0.8588632666366867, "learning_rate": 1.0953767843544044e-05, "loss": 0.6866, "step": 26150 }, { "epoch": 0.47508353915443846, "grad_norm": 0.8691589664499153, "learning_rate": 1.0948030942136656e-05, "loss": 0.6739, "step": 26160 }, { "epoch": 0.4752651460119134, "grad_norm": 0.8616906192568046, "learning_rate": 1.0942293725865944e-05, "loss": 0.6769, "step": 26170 }, { "epoch": 0.47544675286938837, "grad_norm": 0.8533065053832714, "learning_rate": 1.0936556196637371e-05, "loss": 0.6641, "step": 26180 }, { "epoch": 0.4756283597268633, "grad_norm": 0.8410414397968401, "learning_rate": 1.0930818356356503e-05, "loss": 0.6724, "step": 26190 }, { "epoch": 0.4758099665843382, "grad_norm": 0.856934654912025, "learning_rate": 1.092508020692901e-05, "loss": 0.6746, "step": 26200 }, { "epoch": 0.47599157344181314, "grad_norm": 0.8562804550001817, "learning_rate": 1.091934175026067e-05, "loss": 0.67, "step": 26210 }, { "epoch": 0.4761731802992881, "grad_norm": 0.8768150373147181, "learning_rate": 1.0913602988257357e-05, "loss": 0.6548, "step": 26220 }, { "epoch": 0.47635478715676305, "grad_norm": 0.9232639792067603, "learning_rate": 1.0907863922825049e-05, "loss": 0.679, "step": 26230 }, { "epoch": 0.476536394014238, "grad_norm": 0.8328263557936686, "learning_rate": 1.0902124555869824e-05, "loss": 0.6693, "step": 26240 }, { "epoch": 0.4767180008717129, "grad_norm": 0.8411511095911759, "learning_rate": 1.0896384889297863e-05, "loss": 0.6796, "step": 26250 }, { "epoch": 0.4768996077291879, "grad_norm": 0.8381283334156374, "learning_rate": 1.089064492501544e-05, "loss": 0.6579, "step": 26260 }, { "epoch": 0.4770812145866628, "grad_norm": 0.8497984934018257, "learning_rate": 1.0884904664928933e-05, "loss": 0.6735, "step": 26270 }, { "epoch": 0.47726282144413773, "grad_norm": 0.8791616454240847, "learning_rate": 1.087916411094482e-05, "loss": 0.6852, "step": 26280 }, { "epoch": 0.47744442830161266, "grad_norm": 0.9046695068329073, "learning_rate": 1.0873423264969671e-05, "loss": 0.6787, "step": 26290 }, { "epoch": 0.4776260351590876, "grad_norm": 0.8445336884118816, "learning_rate": 1.0867682128910153e-05, "loss": 0.654, "step": 26300 }, { "epoch": 0.47780764201656256, "grad_norm": 0.8311058288550103, "learning_rate": 1.0861940704673038e-05, "loss": 0.6652, "step": 26310 }, { "epoch": 0.4779892488740375, "grad_norm": 0.8453811155904875, "learning_rate": 1.0856198994165183e-05, "loss": 0.664, "step": 26320 }, { "epoch": 0.4781708557315124, "grad_norm": 0.8503962328752341, "learning_rate": 1.0850456999293549e-05, "loss": 0.6718, "step": 26330 }, { "epoch": 0.47835246258898734, "grad_norm": 0.8355643617159053, "learning_rate": 1.0844714721965183e-05, "loss": 0.6545, "step": 26340 }, { "epoch": 0.4785340694464623, "grad_norm": 0.8816045082958158, "learning_rate": 1.0838972164087234e-05, "loss": 0.6564, "step": 26350 }, { "epoch": 0.47871567630393724, "grad_norm": 0.9000155330019457, "learning_rate": 1.0833229327566939e-05, "loss": 0.6645, "step": 26360 }, { "epoch": 0.47889728316141217, "grad_norm": 0.8610410959962087, "learning_rate": 1.0827486214311627e-05, "loss": 0.6828, "step": 26370 }, { "epoch": 0.4790788900188871, "grad_norm": 0.8613289398325907, "learning_rate": 1.0821742826228722e-05, "loss": 0.6519, "step": 26380 }, { "epoch": 0.4792604968763621, "grad_norm": 0.8737276476487119, "learning_rate": 1.0815999165225738e-05, "loss": 0.6741, "step": 26390 }, { "epoch": 0.479442103733837, "grad_norm": 0.8686100113427878, "learning_rate": 1.081025523321028e-05, "loss": 0.6593, "step": 26400 }, { "epoch": 0.4796237105913119, "grad_norm": 0.8661960189094977, "learning_rate": 1.0804511032090041e-05, "loss": 0.659, "step": 26410 }, { "epoch": 0.47980531744878685, "grad_norm": 0.8232580103791902, "learning_rate": 1.0798766563772806e-05, "loss": 0.6709, "step": 26420 }, { "epoch": 0.47998692430626183, "grad_norm": 0.8497734045432006, "learning_rate": 1.0793021830166448e-05, "loss": 0.6567, "step": 26430 }, { "epoch": 0.48016853116373676, "grad_norm": 0.848927222303062, "learning_rate": 1.0787276833178927e-05, "loss": 0.6559, "step": 26440 }, { "epoch": 0.4803501380212117, "grad_norm": 0.8493311084966412, "learning_rate": 1.0781531574718294e-05, "loss": 0.6586, "step": 26450 }, { "epoch": 0.4805317448786866, "grad_norm": 0.8531686020310428, "learning_rate": 1.077578605669268e-05, "loss": 0.6682, "step": 26460 }, { "epoch": 0.48071335173616153, "grad_norm": 0.8774094060888364, "learning_rate": 1.077004028101031e-05, "loss": 0.6661, "step": 26470 }, { "epoch": 0.4808949585936365, "grad_norm": 0.928237765717057, "learning_rate": 1.0764294249579493e-05, "loss": 0.6656, "step": 26480 }, { "epoch": 0.48107656545111144, "grad_norm": 0.8784652008291403, "learning_rate": 1.0758547964308615e-05, "loss": 0.6686, "step": 26490 }, { "epoch": 0.48125817230858636, "grad_norm": 0.8445130774157455, "learning_rate": 1.075280142710615e-05, "loss": 0.6684, "step": 26500 }, { "epoch": 0.4814397791660613, "grad_norm": 0.8606795473131897, "learning_rate": 1.0747054639880666e-05, "loss": 0.6703, "step": 26510 }, { "epoch": 0.48162138602353627, "grad_norm": 0.8398495530325435, "learning_rate": 1.0741307604540803e-05, "loss": 0.6655, "step": 26520 }, { "epoch": 0.4818029928810112, "grad_norm": 0.8300381606883808, "learning_rate": 1.0735560322995284e-05, "loss": 0.6803, "step": 26530 }, { "epoch": 0.4819845997384861, "grad_norm": 0.8256601593961408, "learning_rate": 1.0729812797152922e-05, "loss": 0.6572, "step": 26540 }, { "epoch": 0.48216620659596104, "grad_norm": 0.827817056670684, "learning_rate": 1.0724065028922596e-05, "loss": 0.6585, "step": 26550 }, { "epoch": 0.482347813453436, "grad_norm": 0.8516895029241022, "learning_rate": 1.0718317020213283e-05, "loss": 0.6661, "step": 26560 }, { "epoch": 0.48252942031091095, "grad_norm": 0.8592347451469415, "learning_rate": 1.071256877293403e-05, "loss": 0.6626, "step": 26570 }, { "epoch": 0.4827110271683859, "grad_norm": 0.8384191466119066, "learning_rate": 1.0706820288993962e-05, "loss": 0.6664, "step": 26580 }, { "epoch": 0.4828926340258608, "grad_norm": 0.8225575893365904, "learning_rate": 1.0701071570302286e-05, "loss": 0.6563, "step": 26590 }, { "epoch": 0.4830742408833358, "grad_norm": 0.8570118738054503, "learning_rate": 1.0695322618768287e-05, "loss": 0.6648, "step": 26600 }, { "epoch": 0.4832558477408107, "grad_norm": 0.8651331141999598, "learning_rate": 1.0689573436301326e-05, "loss": 0.6463, "step": 26610 }, { "epoch": 0.48343745459828563, "grad_norm": 0.8810559332138295, "learning_rate": 1.0683824024810843e-05, "loss": 0.6697, "step": 26620 }, { "epoch": 0.48361906145576056, "grad_norm": 0.8167184588005215, "learning_rate": 1.0678074386206352e-05, "loss": 0.6568, "step": 26630 }, { "epoch": 0.4838006683132355, "grad_norm": 0.854143650543636, "learning_rate": 1.0672324522397443e-05, "loss": 0.656, "step": 26640 }, { "epoch": 0.48398227517071046, "grad_norm": 0.8752414910865425, "learning_rate": 1.066657443529378e-05, "loss": 0.6697, "step": 26650 }, { "epoch": 0.4841638820281854, "grad_norm": 0.8876585059286898, "learning_rate": 1.0660824126805103e-05, "loss": 0.6698, "step": 26660 }, { "epoch": 0.4843454888856603, "grad_norm": 0.8678082873783189, "learning_rate": 1.0655073598841223e-05, "loss": 0.6673, "step": 26670 }, { "epoch": 0.48452709574313524, "grad_norm": 0.8170341298562334, "learning_rate": 1.0649322853312028e-05, "loss": 0.6747, "step": 26680 }, { "epoch": 0.4847087026006102, "grad_norm": 0.8527169839025349, "learning_rate": 1.0643571892127472e-05, "loss": 0.6706, "step": 26690 }, { "epoch": 0.48489030945808514, "grad_norm": 0.8457179284625146, "learning_rate": 1.0637820717197582e-05, "loss": 0.6651, "step": 26700 }, { "epoch": 0.48507191631556007, "grad_norm": 0.8501688616652437, "learning_rate": 1.0632069330432467e-05, "loss": 0.6562, "step": 26710 }, { "epoch": 0.485253523173035, "grad_norm": 0.8949267546838215, "learning_rate": 1.062631773374229e-05, "loss": 0.665, "step": 26720 }, { "epoch": 0.48543513003051, "grad_norm": 0.8755173166775859, "learning_rate": 1.0620565929037294e-05, "loss": 0.6837, "step": 26730 }, { "epoch": 0.4856167368879849, "grad_norm": 0.8634163230506796, "learning_rate": 1.061481391822779e-05, "loss": 0.6608, "step": 26740 }, { "epoch": 0.4857983437454598, "grad_norm": 0.8643123424220657, "learning_rate": 1.0609061703224152e-05, "loss": 0.6436, "step": 26750 }, { "epoch": 0.48597995060293475, "grad_norm": 0.855395289231336, "learning_rate": 1.0603309285936829e-05, "loss": 0.6604, "step": 26760 }, { "epoch": 0.48616155746040973, "grad_norm": 0.8691473376142278, "learning_rate": 1.0597556668276332e-05, "loss": 0.6622, "step": 26770 }, { "epoch": 0.48634316431788466, "grad_norm": 0.8709799889894542, "learning_rate": 1.0591803852153242e-05, "loss": 0.6587, "step": 26780 }, { "epoch": 0.4865247711753596, "grad_norm": 0.9170057068083501, "learning_rate": 1.0586050839478201e-05, "loss": 0.6682, "step": 26790 }, { "epoch": 0.4867063780328345, "grad_norm": 0.8832097424649683, "learning_rate": 1.0580297632161921e-05, "loss": 0.6622, "step": 26800 }, { "epoch": 0.48688798489030943, "grad_norm": 0.8848476515575074, "learning_rate": 1.0574544232115178e-05, "loss": 0.6602, "step": 26810 }, { "epoch": 0.4870695917477844, "grad_norm": 0.8449377539196127, "learning_rate": 1.0568790641248812e-05, "loss": 0.6757, "step": 26820 }, { "epoch": 0.48725119860525934, "grad_norm": 0.8564744543887417, "learning_rate": 1.0563036861473723e-05, "loss": 0.6464, "step": 26830 }, { "epoch": 0.48743280546273426, "grad_norm": 0.8722565681832627, "learning_rate": 1.0557282894700877e-05, "loss": 0.6676, "step": 26840 }, { "epoch": 0.4876144123202092, "grad_norm": 0.8756954644163444, "learning_rate": 1.0551528742841304e-05, "loss": 0.6626, "step": 26850 }, { "epoch": 0.48779601917768417, "grad_norm": 0.833710360474107, "learning_rate": 1.0545774407806088e-05, "loss": 0.6652, "step": 26860 }, { "epoch": 0.4879776260351591, "grad_norm": 0.8353360565419463, "learning_rate": 1.0540019891506383e-05, "loss": 0.6543, "step": 26870 }, { "epoch": 0.488159232892634, "grad_norm": 0.863837415590219, "learning_rate": 1.0534265195853402e-05, "loss": 0.6603, "step": 26880 }, { "epoch": 0.48834083975010895, "grad_norm": 0.8489080679562885, "learning_rate": 1.0528510322758405e-05, "loss": 0.6472, "step": 26890 }, { "epoch": 0.4885224466075839, "grad_norm": 0.9455617018781546, "learning_rate": 1.0522755274132725e-05, "loss": 0.6818, "step": 26900 }, { "epoch": 0.48870405346505885, "grad_norm": 0.8927492181970108, "learning_rate": 1.0517000051887752e-05, "loss": 0.6648, "step": 26910 }, { "epoch": 0.4888856603225338, "grad_norm": 0.8619124647038022, "learning_rate": 1.0511244657934924e-05, "loss": 0.6713, "step": 26920 }, { "epoch": 0.4890672671800087, "grad_norm": 0.9207430698882207, "learning_rate": 1.050548909418575e-05, "loss": 0.6899, "step": 26930 }, { "epoch": 0.4892488740374837, "grad_norm": 0.8783656321598123, "learning_rate": 1.0499733362551783e-05, "loss": 0.6647, "step": 26940 }, { "epoch": 0.4894304808949586, "grad_norm": 0.8348988677853701, "learning_rate": 1.0493977464944635e-05, "loss": 0.6599, "step": 26950 }, { "epoch": 0.48961208775243353, "grad_norm": 0.8300408652057842, "learning_rate": 1.048822140327598e-05, "loss": 0.6544, "step": 26960 }, { "epoch": 0.48979369460990846, "grad_norm": 0.8520357744286472, "learning_rate": 1.0482465179457538e-05, "loss": 0.6725, "step": 26970 }, { "epoch": 0.4899753014673834, "grad_norm": 0.8289420956546882, "learning_rate": 1.0476708795401088e-05, "loss": 0.6581, "step": 26980 }, { "epoch": 0.49015690832485836, "grad_norm": 0.8750697168296354, "learning_rate": 1.0470952253018456e-05, "loss": 0.6572, "step": 26990 }, { "epoch": 0.4903385151823333, "grad_norm": 0.8765466902709713, "learning_rate": 1.0465195554221525e-05, "loss": 0.6542, "step": 27000 }, { "epoch": 0.4905201220398082, "grad_norm": 0.8212797164991765, "learning_rate": 1.0459438700922235e-05, "loss": 0.6626, "step": 27010 }, { "epoch": 0.49070172889728314, "grad_norm": 0.8697055135553075, "learning_rate": 1.0453681695032569e-05, "loss": 0.6605, "step": 27020 }, { "epoch": 0.4908833357547581, "grad_norm": 0.8422898510522102, "learning_rate": 1.0447924538464565e-05, "loss": 0.6481, "step": 27030 }, { "epoch": 0.49106494261223305, "grad_norm": 0.854523631187104, "learning_rate": 1.0442167233130307e-05, "loss": 0.6515, "step": 27040 }, { "epoch": 0.49124654946970797, "grad_norm": 0.8502165773769155, "learning_rate": 1.0436409780941935e-05, "loss": 0.6622, "step": 27050 }, { "epoch": 0.4914281563271829, "grad_norm": 0.8606947952842262, "learning_rate": 1.0430652183811628e-05, "loss": 0.6725, "step": 27060 }, { "epoch": 0.4916097631846579, "grad_norm": 0.8786974862440199, "learning_rate": 1.0424894443651627e-05, "loss": 0.6711, "step": 27070 }, { "epoch": 0.4917913700421328, "grad_norm": 0.8322856894642433, "learning_rate": 1.041913656237421e-05, "loss": 0.6462, "step": 27080 }, { "epoch": 0.49197297689960773, "grad_norm": 0.8326681824433462, "learning_rate": 1.04133785418917e-05, "loss": 0.6653, "step": 27090 }, { "epoch": 0.49215458375708265, "grad_norm": 0.8840974595718608, "learning_rate": 1.0407620384116475e-05, "loss": 0.6634, "step": 27100 }, { "epoch": 0.4923361906145576, "grad_norm": 0.8705172291567449, "learning_rate": 1.0401862090960953e-05, "loss": 0.6643, "step": 27110 }, { "epoch": 0.49251779747203256, "grad_norm": 0.8913208245179127, "learning_rate": 1.03961036643376e-05, "loss": 0.672, "step": 27120 }, { "epoch": 0.4926994043295075, "grad_norm": 0.8803203161253099, "learning_rate": 1.0390345106158923e-05, "loss": 0.6577, "step": 27130 }, { "epoch": 0.4928810111869824, "grad_norm": 0.9315530959681134, "learning_rate": 1.0384586418337474e-05, "loss": 0.6853, "step": 27140 }, { "epoch": 0.49306261804445733, "grad_norm": 0.9214302248177361, "learning_rate": 1.037882760278585e-05, "loss": 0.6683, "step": 27150 }, { "epoch": 0.4932442249019323, "grad_norm": 0.857794955571708, "learning_rate": 1.0373068661416688e-05, "loss": 0.6718, "step": 27160 }, { "epoch": 0.49342583175940724, "grad_norm": 0.8785061711644895, "learning_rate": 1.0367309596142667e-05, "loss": 0.6509, "step": 27170 }, { "epoch": 0.49360743861688217, "grad_norm": 0.8564815372565462, "learning_rate": 1.0361550408876511e-05, "loss": 0.6609, "step": 27180 }, { "epoch": 0.4937890454743571, "grad_norm": 0.8983785183026683, "learning_rate": 1.0355791101530973e-05, "loss": 0.6566, "step": 27190 }, { "epoch": 0.49397065233183207, "grad_norm": 0.8853670031175771, "learning_rate": 1.0350031676018866e-05, "loss": 0.6492, "step": 27200 }, { "epoch": 0.494152259189307, "grad_norm": 0.8170578255072702, "learning_rate": 1.034427213425302e-05, "loss": 0.647, "step": 27210 }, { "epoch": 0.4943338660467819, "grad_norm": 0.8269751093925691, "learning_rate": 1.0338512478146318e-05, "loss": 0.6649, "step": 27220 }, { "epoch": 0.49451547290425685, "grad_norm": 0.8339396268876169, "learning_rate": 1.0332752709611678e-05, "loss": 0.6538, "step": 27230 }, { "epoch": 0.49469707976173183, "grad_norm": 0.8462616966192763, "learning_rate": 1.0326992830562052e-05, "loss": 0.6639, "step": 27240 }, { "epoch": 0.49487868661920675, "grad_norm": 0.8184437053923248, "learning_rate": 1.0321232842910434e-05, "loss": 0.6562, "step": 27250 }, { "epoch": 0.4950602934766817, "grad_norm": 0.8889081776323131, "learning_rate": 1.0315472748569848e-05, "loss": 0.6578, "step": 27260 }, { "epoch": 0.4952419003341566, "grad_norm": 0.8422985557521749, "learning_rate": 1.030971254945336e-05, "loss": 0.6604, "step": 27270 }, { "epoch": 0.49542350719163153, "grad_norm": 0.8552554336136793, "learning_rate": 1.0303952247474066e-05, "loss": 0.6558, "step": 27280 }, { "epoch": 0.4956051140491065, "grad_norm": 0.829177083496428, "learning_rate": 1.0298191844545094e-05, "loss": 0.666, "step": 27290 }, { "epoch": 0.49578672090658144, "grad_norm": 0.819790220051922, "learning_rate": 1.0292431342579609e-05, "loss": 0.6617, "step": 27300 }, { "epoch": 0.49596832776405636, "grad_norm": 0.8300912519029191, "learning_rate": 1.0286670743490817e-05, "loss": 0.6438, "step": 27310 }, { "epoch": 0.4961499346215313, "grad_norm": 0.8767098636072295, "learning_rate": 1.028091004919194e-05, "loss": 0.6687, "step": 27320 }, { "epoch": 0.49633154147900627, "grad_norm": 0.8473537475606784, "learning_rate": 1.0275149261596244e-05, "loss": 0.6737, "step": 27330 }, { "epoch": 0.4965131483364812, "grad_norm": 0.8613801429099711, "learning_rate": 1.0269388382617021e-05, "loss": 0.6806, "step": 27340 }, { "epoch": 0.4966947551939561, "grad_norm": 0.8324904353263399, "learning_rate": 1.0263627414167591e-05, "loss": 0.6567, "step": 27350 }, { "epoch": 0.49687636205143104, "grad_norm": 0.845919932372706, "learning_rate": 1.0257866358161313e-05, "loss": 0.6493, "step": 27360 }, { "epoch": 0.497057968908906, "grad_norm": 0.8589986231192005, "learning_rate": 1.025210521651156e-05, "loss": 0.6568, "step": 27370 }, { "epoch": 0.49723957576638095, "grad_norm": 0.9237694252711921, "learning_rate": 1.0246343991131756e-05, "loss": 0.6587, "step": 27380 }, { "epoch": 0.4974211826238559, "grad_norm": 0.8316751312090453, "learning_rate": 1.0240582683935326e-05, "loss": 0.6627, "step": 27390 }, { "epoch": 0.4976027894813308, "grad_norm": 0.8470337363689374, "learning_rate": 1.0234821296835737e-05, "loss": 0.673, "step": 27400 }, { "epoch": 0.4977843963388058, "grad_norm": 0.8361215114020382, "learning_rate": 1.0229059831746489e-05, "loss": 0.6689, "step": 27410 }, { "epoch": 0.4979660031962807, "grad_norm": 0.8491108181868336, "learning_rate": 1.0223298290581092e-05, "loss": 0.6539, "step": 27420 }, { "epoch": 0.49814761005375563, "grad_norm": 0.8550168888251897, "learning_rate": 1.021753667525309e-05, "loss": 0.6726, "step": 27430 }, { "epoch": 0.49832921691123055, "grad_norm": 0.8431100204023718, "learning_rate": 1.0211774987676054e-05, "loss": 0.6608, "step": 27440 }, { "epoch": 0.4985108237687055, "grad_norm": 0.8163456082992381, "learning_rate": 1.0206013229763576e-05, "loss": 0.6558, "step": 27450 }, { "epoch": 0.49869243062618046, "grad_norm": 0.8069218957952062, "learning_rate": 1.0200251403429269e-05, "loss": 0.6576, "step": 27460 }, { "epoch": 0.4988740374836554, "grad_norm": 0.8405117336358046, "learning_rate": 1.0194489510586768e-05, "loss": 0.6481, "step": 27470 }, { "epoch": 0.4990556443411303, "grad_norm": 0.8888323696223339, "learning_rate": 1.018872755314974e-05, "loss": 0.6603, "step": 27480 }, { "epoch": 0.49923725119860524, "grad_norm": 0.8600302480001052, "learning_rate": 1.0182965533031859e-05, "loss": 0.6581, "step": 27490 }, { "epoch": 0.4994188580560802, "grad_norm": 0.8765005881236164, "learning_rate": 1.017720345214683e-05, "loss": 0.6738, "step": 27500 }, { "epoch": 0.49960046491355514, "grad_norm": 0.8755619986644232, "learning_rate": 1.017144131240838e-05, "loss": 0.6577, "step": 27510 }, { "epoch": 0.49978207177103007, "grad_norm": 0.893134202444756, "learning_rate": 1.0165679115730241e-05, "loss": 0.6609, "step": 27520 }, { "epoch": 0.499963678628505, "grad_norm": 0.8469625263822984, "learning_rate": 1.0159916864026181e-05, "loss": 0.6733, "step": 27530 }, { "epoch": 0.5001452854859799, "grad_norm": 0.8538737858336092, "learning_rate": 1.0154154559209977e-05, "loss": 0.6551, "step": 27540 }, { "epoch": 0.5003268923434548, "grad_norm": 0.8606428966515954, "learning_rate": 1.0148392203195427e-05, "loss": 0.654, "step": 27550 }, { "epoch": 0.5005084992009299, "grad_norm": 0.8972001333660036, "learning_rate": 1.0142629797896342e-05, "loss": 0.6663, "step": 27560 }, { "epoch": 0.5006901060584048, "grad_norm": 0.8469945583751611, "learning_rate": 1.0136867345226556e-05, "loss": 0.6618, "step": 27570 }, { "epoch": 0.5008717129158797, "grad_norm": 0.8499195496619821, "learning_rate": 1.0131104847099908e-05, "loss": 0.6711, "step": 27580 }, { "epoch": 0.5010533197733547, "grad_norm": 0.8457654869371701, "learning_rate": 1.0125342305430268e-05, "loss": 0.6653, "step": 27590 }, { "epoch": 0.5012349266308296, "grad_norm": 0.8633930077389057, "learning_rate": 1.0119579722131505e-05, "loss": 0.6638, "step": 27600 }, { "epoch": 0.5014165334883045, "grad_norm": 0.837700686932908, "learning_rate": 1.011381709911751e-05, "loss": 0.6499, "step": 27610 }, { "epoch": 0.5015981403457794, "grad_norm": 0.9169070399485515, "learning_rate": 1.0108054438302184e-05, "loss": 0.674, "step": 27620 }, { "epoch": 0.5017797472032544, "grad_norm": 0.8209269067468057, "learning_rate": 1.0102291741599441e-05, "loss": 0.6624, "step": 27630 }, { "epoch": 0.5019613540607293, "grad_norm": 0.875476674153006, "learning_rate": 1.0096529010923213e-05, "loss": 0.6738, "step": 27640 }, { "epoch": 0.5021429609182043, "grad_norm": 0.8747935045103862, "learning_rate": 1.0090766248187434e-05, "loss": 0.6562, "step": 27650 }, { "epoch": 0.5023245677756792, "grad_norm": 0.8239932341157423, "learning_rate": 1.0085003455306053e-05, "loss": 0.6608, "step": 27660 }, { "epoch": 0.5025061746331542, "grad_norm": 0.8613644177414647, "learning_rate": 1.007924063419303e-05, "loss": 0.6635, "step": 27670 }, { "epoch": 0.5026877814906291, "grad_norm": 0.8704083814364011, "learning_rate": 1.0073477786762331e-05, "loss": 0.6574, "step": 27680 }, { "epoch": 0.502869388348104, "grad_norm": 0.8483217961595182, "learning_rate": 1.0067714914927937e-05, "loss": 0.648, "step": 27690 }, { "epoch": 0.5030509952055789, "grad_norm": 0.8783420391994239, "learning_rate": 1.0061952020603829e-05, "loss": 0.6614, "step": 27700 }, { "epoch": 0.5032326020630539, "grad_norm": 0.8565389046178064, "learning_rate": 1.0056189105704001e-05, "loss": 0.6723, "step": 27710 }, { "epoch": 0.5034142089205288, "grad_norm": 1.3353685850881107, "learning_rate": 1.0050426172142454e-05, "loss": 0.6562, "step": 27720 }, { "epoch": 0.5035958157780038, "grad_norm": 0.846322088351231, "learning_rate": 1.0044663221833188e-05, "loss": 0.657, "step": 27730 }, { "epoch": 0.5037774226354788, "grad_norm": 0.8271031984606976, "learning_rate": 1.003890025669022e-05, "loss": 0.6683, "step": 27740 }, { "epoch": 0.5039590294929537, "grad_norm": 0.8422244404139216, "learning_rate": 1.0033137278627565e-05, "loss": 0.6691, "step": 27750 }, { "epoch": 0.5041406363504286, "grad_norm": 0.8421543828270428, "learning_rate": 1.002737428955924e-05, "loss": 0.665, "step": 27760 }, { "epoch": 0.5043222432079035, "grad_norm": 0.8502371939278102, "learning_rate": 1.0021611291399272e-05, "loss": 0.6576, "step": 27770 }, { "epoch": 0.5045038500653785, "grad_norm": 0.9089403521524223, "learning_rate": 1.0015848286061688e-05, "loss": 0.6625, "step": 27780 }, { "epoch": 0.5046854569228534, "grad_norm": 0.9056400882655314, "learning_rate": 1.0010085275460515e-05, "loss": 0.6716, "step": 27790 }, { "epoch": 0.5048670637803283, "grad_norm": 0.8303543424944657, "learning_rate": 1.0004322261509786e-05, "loss": 0.6506, "step": 27800 }, { "epoch": 0.5050486706378032, "grad_norm": 0.8776220735086726, "learning_rate": 9.998559246123531e-06, "loss": 0.6619, "step": 27810 }, { "epoch": 0.5052302774952783, "grad_norm": 0.8716677160548294, "learning_rate": 9.992796231215784e-06, "loss": 0.657, "step": 27820 }, { "epoch": 0.5054118843527532, "grad_norm": 0.8400722321959099, "learning_rate": 9.98703321870058e-06, "loss": 0.6631, "step": 27830 }, { "epoch": 0.5055934912102281, "grad_norm": 0.8707589847805998, "learning_rate": 9.98127021049195e-06, "loss": 0.6625, "step": 27840 }, { "epoch": 0.505775098067703, "grad_norm": 0.8033320428969651, "learning_rate": 9.975507208503916e-06, "loss": 0.6635, "step": 27850 }, { "epoch": 0.505956704925178, "grad_norm": 0.867232057010577, "learning_rate": 9.96974421465052e-06, "loss": 0.6719, "step": 27860 }, { "epoch": 0.5061383117826529, "grad_norm": 0.8952636707226651, "learning_rate": 9.963981230845775e-06, "loss": 0.6513, "step": 27870 }, { "epoch": 0.5063199186401278, "grad_norm": 0.8233752174147304, "learning_rate": 9.958218259003712e-06, "loss": 0.6531, "step": 27880 }, { "epoch": 0.5065015254976027, "grad_norm": 0.8536306631656994, "learning_rate": 9.952455301038342e-06, "loss": 0.6559, "step": 27890 }, { "epoch": 0.5066831323550778, "grad_norm": 0.8249223456763712, "learning_rate": 9.946692358863684e-06, "loss": 0.6423, "step": 27900 }, { "epoch": 0.5068647392125527, "grad_norm": 0.870337862380932, "learning_rate": 9.940929434393746e-06, "loss": 0.6463, "step": 27910 }, { "epoch": 0.5070463460700276, "grad_norm": 0.8876290105738954, "learning_rate": 9.93516652954253e-06, "loss": 0.6521, "step": 27920 }, { "epoch": 0.5072279529275026, "grad_norm": 0.8323407757944834, "learning_rate": 9.92940364622403e-06, "loss": 0.6627, "step": 27930 }, { "epoch": 0.5074095597849775, "grad_norm": 0.8372728916340834, "learning_rate": 9.923640786352234e-06, "loss": 0.6498, "step": 27940 }, { "epoch": 0.5075911666424524, "grad_norm": 0.8809968061165453, "learning_rate": 9.917877951841129e-06, "loss": 0.6568, "step": 27950 }, { "epoch": 0.5077727734999273, "grad_norm": 0.8741997156137938, "learning_rate": 9.912115144604681e-06, "loss": 0.6615, "step": 27960 }, { "epoch": 0.5079543803574023, "grad_norm": 0.8554803163079702, "learning_rate": 9.906352366556858e-06, "loss": 0.655, "step": 27970 }, { "epoch": 0.5081359872148772, "grad_norm": 0.847670206616698, "learning_rate": 9.900589619611609e-06, "loss": 0.6621, "step": 27980 }, { "epoch": 0.5083175940723522, "grad_norm": 0.82587570668328, "learning_rate": 9.894826905682886e-06, "loss": 0.6448, "step": 27990 }, { "epoch": 0.5084992009298271, "grad_norm": 0.8455173716047799, "learning_rate": 9.889064226684609e-06, "loss": 0.658, "step": 28000 }, { "epoch": 0.5086808077873021, "grad_norm": 0.8717255212565095, "learning_rate": 9.883301584530712e-06, "loss": 0.6553, "step": 28010 }, { "epoch": 0.508862414644777, "grad_norm": 0.8208680860875271, "learning_rate": 9.877538981135092e-06, "loss": 0.6583, "step": 28020 }, { "epoch": 0.5090440215022519, "grad_norm": 0.8114872104813842, "learning_rate": 9.871776418411655e-06, "loss": 0.6529, "step": 28030 }, { "epoch": 0.5092256283597268, "grad_norm": 0.8730814187482974, "learning_rate": 9.866013898274282e-06, "loss": 0.6548, "step": 28040 }, { "epoch": 0.5094072352172018, "grad_norm": 0.8460464816139008, "learning_rate": 9.86025142263683e-06, "loss": 0.6546, "step": 28050 }, { "epoch": 0.5095888420746767, "grad_norm": 0.8368485807441848, "learning_rate": 9.854488993413167e-06, "loss": 0.6559, "step": 28060 }, { "epoch": 0.5097704489321517, "grad_norm": 0.8624727614394793, "learning_rate": 9.84872661251712e-06, "loss": 0.6531, "step": 28070 }, { "epoch": 0.5099520557896267, "grad_norm": 0.8656131887634277, "learning_rate": 9.842964281862518e-06, "loss": 0.6514, "step": 28080 }, { "epoch": 0.5101336626471016, "grad_norm": 0.851819584974812, "learning_rate": 9.837202003363161e-06, "loss": 0.6567, "step": 28090 }, { "epoch": 0.5103152695045765, "grad_norm": 0.8699311940770407, "learning_rate": 9.83143977893284e-06, "loss": 0.6525, "step": 28100 }, { "epoch": 0.5104968763620514, "grad_norm": 0.8759732031205751, "learning_rate": 9.825677610485328e-06, "loss": 0.6523, "step": 28110 }, { "epoch": 0.5106784832195264, "grad_norm": 0.8671556461633779, "learning_rate": 9.81991549993437e-06, "loss": 0.6517, "step": 28120 }, { "epoch": 0.5108600900770013, "grad_norm": 0.8377532862786787, "learning_rate": 9.814153449193701e-06, "loss": 0.6648, "step": 28130 }, { "epoch": 0.5110416969344762, "grad_norm": 0.8616554703030801, "learning_rate": 9.808391460177037e-06, "loss": 0.6595, "step": 28140 }, { "epoch": 0.5112233037919511, "grad_norm": 0.8201375001502852, "learning_rate": 9.802629534798064e-06, "loss": 0.6483, "step": 28150 }, { "epoch": 0.5114049106494262, "grad_norm": 2.302581590475402, "learning_rate": 9.796867674970453e-06, "loss": 0.6536, "step": 28160 }, { "epoch": 0.5115865175069011, "grad_norm": 0.8454575630302711, "learning_rate": 9.791105882607857e-06, "loss": 0.6607, "step": 28170 }, { "epoch": 0.511768124364376, "grad_norm": 0.8371337911915616, "learning_rate": 9.785344159623897e-06, "loss": 0.667, "step": 28180 }, { "epoch": 0.511949731221851, "grad_norm": 0.8485901140764388, "learning_rate": 9.779582507932185e-06, "loss": 0.6554, "step": 28190 }, { "epoch": 0.5121313380793259, "grad_norm": 0.8586382150914321, "learning_rate": 9.773820929446291e-06, "loss": 0.6515, "step": 28200 }, { "epoch": 0.5123129449368008, "grad_norm": 0.8423047958047014, "learning_rate": 9.768059426079778e-06, "loss": 0.6697, "step": 28210 }, { "epoch": 0.5124945517942757, "grad_norm": 0.8604571482602373, "learning_rate": 9.762297999746169e-06, "loss": 0.6484, "step": 28220 }, { "epoch": 0.5126761586517506, "grad_norm": 0.871857542863211, "learning_rate": 9.756536652358978e-06, "loss": 0.6541, "step": 28230 }, { "epoch": 0.5128577655092257, "grad_norm": 0.8294952065510979, "learning_rate": 9.75077538583168e-06, "loss": 0.6535, "step": 28240 }, { "epoch": 0.5130393723667006, "grad_norm": 0.8327480051677995, "learning_rate": 9.745014202077718e-06, "loss": 0.6495, "step": 28250 }, { "epoch": 0.5132209792241755, "grad_norm": 0.8478401583995422, "learning_rate": 9.739253103010531e-06, "loss": 0.6585, "step": 28260 }, { "epoch": 0.5134025860816505, "grad_norm": 0.8607481006304663, "learning_rate": 9.733492090543505e-06, "loss": 0.6519, "step": 28270 }, { "epoch": 0.5135841929391254, "grad_norm": 0.8593717907301698, "learning_rate": 9.727731166590012e-06, "loss": 0.6564, "step": 28280 }, { "epoch": 0.5137657997966003, "grad_norm": 0.8690072770127588, "learning_rate": 9.721970333063386e-06, "loss": 0.653, "step": 28290 }, { "epoch": 0.5139474066540752, "grad_norm": 0.9122756959116436, "learning_rate": 9.71620959187694e-06, "loss": 0.6738, "step": 28300 }, { "epoch": 0.5141290135115502, "grad_norm": 0.8215034487591028, "learning_rate": 9.71044894494395e-06, "loss": 0.6497, "step": 28310 }, { "epoch": 0.5143106203690251, "grad_norm": 0.8221448780020262, "learning_rate": 9.70468839417766e-06, "loss": 0.6623, "step": 28320 }, { "epoch": 0.5144922272265001, "grad_norm": 0.859722265903805, "learning_rate": 9.698927941491287e-06, "loss": 0.6647, "step": 28330 }, { "epoch": 0.514673834083975, "grad_norm": 0.8541384297889504, "learning_rate": 9.69316758879801e-06, "loss": 0.6486, "step": 28340 }, { "epoch": 0.51485544094145, "grad_norm": 0.8279613103643799, "learning_rate": 9.68740733801098e-06, "loss": 0.6539, "step": 28350 }, { "epoch": 0.5150370477989249, "grad_norm": 0.9172596211171035, "learning_rate": 9.681647191043305e-06, "loss": 0.6656, "step": 28360 }, { "epoch": 0.5152186546563998, "grad_norm": 0.8419852054030685, "learning_rate": 9.675887149808076e-06, "loss": 0.6716, "step": 28370 }, { "epoch": 0.5154002615138747, "grad_norm": 0.8243856988386103, "learning_rate": 9.670127216218324e-06, "loss": 0.6606, "step": 28380 }, { "epoch": 0.5155818683713497, "grad_norm": 0.8577463173576876, "learning_rate": 9.664367392187073e-06, "loss": 0.6619, "step": 28390 }, { "epoch": 0.5157634752288246, "grad_norm": 0.8375858072041775, "learning_rate": 9.658607679627282e-06, "loss": 0.6547, "step": 28400 }, { "epoch": 0.5159450820862996, "grad_norm": 0.8519105844821875, "learning_rate": 9.652848080451899e-06, "loss": 0.654, "step": 28410 }, { "epoch": 0.5161266889437746, "grad_norm": 0.8671952238156488, "learning_rate": 9.64708859657381e-06, "loss": 0.6521, "step": 28420 }, { "epoch": 0.5163082958012495, "grad_norm": 0.8855631813073904, "learning_rate": 9.641329229905889e-06, "loss": 0.6421, "step": 28430 }, { "epoch": 0.5164899026587244, "grad_norm": 0.8410650379818544, "learning_rate": 9.635569982360946e-06, "loss": 0.6421, "step": 28440 }, { "epoch": 0.5166715095161993, "grad_norm": 0.8546395159770531, "learning_rate": 9.629810855851761e-06, "loss": 0.6552, "step": 28450 }, { "epoch": 0.5168531163736743, "grad_norm": 0.8757102720812107, "learning_rate": 9.624051852291085e-06, "loss": 0.6653, "step": 28460 }, { "epoch": 0.5170347232311492, "grad_norm": 0.8353447261122146, "learning_rate": 9.618292973591606e-06, "loss": 0.6508, "step": 28470 }, { "epoch": 0.5172163300886241, "grad_norm": 0.8834281265085371, "learning_rate": 9.612534221665996e-06, "loss": 0.6533, "step": 28480 }, { "epoch": 0.517397936946099, "grad_norm": 0.8560273030232478, "learning_rate": 9.606775598426859e-06, "loss": 0.6616, "step": 28490 }, { "epoch": 0.5175795438035741, "grad_norm": 0.8475605114530257, "learning_rate": 9.601017105786778e-06, "loss": 0.6665, "step": 28500 }, { "epoch": 0.517761150661049, "grad_norm": 0.8413442477665704, "learning_rate": 9.595258745658278e-06, "loss": 0.659, "step": 28510 }, { "epoch": 0.5179427575185239, "grad_norm": 0.8487351922242518, "learning_rate": 9.58950051995385e-06, "loss": 0.6653, "step": 28520 }, { "epoch": 0.5181243643759988, "grad_norm": 0.8546935412249996, "learning_rate": 9.583742430585934e-06, "loss": 0.6666, "step": 28530 }, { "epoch": 0.5183059712334738, "grad_norm": 0.851146140157754, "learning_rate": 9.577984479466927e-06, "loss": 0.6612, "step": 28540 }, { "epoch": 0.5184875780909487, "grad_norm": 0.8653459317215133, "learning_rate": 9.57222666850918e-06, "loss": 0.6476, "step": 28550 }, { "epoch": 0.5186691849484236, "grad_norm": 0.8479609217947106, "learning_rate": 9.566468999624992e-06, "loss": 0.6592, "step": 28560 }, { "epoch": 0.5188507918058985, "grad_norm": 0.8570033229836554, "learning_rate": 9.560711474726629e-06, "loss": 0.6496, "step": 28570 }, { "epoch": 0.5190323986633736, "grad_norm": 0.8555352326462424, "learning_rate": 9.554954095726292e-06, "loss": 0.6696, "step": 28580 }, { "epoch": 0.5192140055208485, "grad_norm": 0.8420655161749276, "learning_rate": 9.549196864536149e-06, "loss": 0.658, "step": 28590 }, { "epoch": 0.5193956123783234, "grad_norm": 0.8614771809295928, "learning_rate": 9.543439783068303e-06, "loss": 0.6422, "step": 28600 }, { "epoch": 0.5195772192357984, "grad_norm": 0.8548819000628406, "learning_rate": 9.537682853234825e-06, "loss": 0.6545, "step": 28610 }, { "epoch": 0.5197588260932733, "grad_norm": 0.834584265334159, "learning_rate": 9.53192607694772e-06, "loss": 0.6666, "step": 28620 }, { "epoch": 0.5199404329507482, "grad_norm": 0.813425812637014, "learning_rate": 9.526169456118952e-06, "loss": 0.6508, "step": 28630 }, { "epoch": 0.5201220398082231, "grad_norm": 0.8440465856377287, "learning_rate": 9.520412992660429e-06, "loss": 0.6615, "step": 28640 }, { "epoch": 0.5203036466656981, "grad_norm": 0.8627878156555933, "learning_rate": 9.514656688484004e-06, "loss": 0.6572, "step": 28650 }, { "epoch": 0.520485253523173, "grad_norm": 0.8334659072388717, "learning_rate": 9.508900545501488e-06, "loss": 0.6549, "step": 28660 }, { "epoch": 0.520666860380648, "grad_norm": 0.8862916226599608, "learning_rate": 9.503144565624622e-06, "loss": 0.6651, "step": 28670 }, { "epoch": 0.520848467238123, "grad_norm": 0.8885293341062788, "learning_rate": 9.497388750765114e-06, "loss": 0.6572, "step": 28680 }, { "epoch": 0.5210300740955979, "grad_norm": 0.9092634203730078, "learning_rate": 9.491633102834592e-06, "loss": 0.6627, "step": 28690 }, { "epoch": 0.5212116809530728, "grad_norm": 0.8570829023516952, "learning_rate": 9.48587762374465e-06, "loss": 0.6673, "step": 28700 }, { "epoch": 0.5213932878105477, "grad_norm": 0.8267525024053717, "learning_rate": 9.480122315406815e-06, "loss": 0.6549, "step": 28710 }, { "epoch": 0.5215748946680226, "grad_norm": 0.8496467383712853, "learning_rate": 9.474367179732561e-06, "loss": 0.6618, "step": 28720 }, { "epoch": 0.5217565015254976, "grad_norm": 0.8437776667260976, "learning_rate": 9.468612218633306e-06, "loss": 0.6546, "step": 28730 }, { "epoch": 0.5219381083829725, "grad_norm": 0.8813752235493523, "learning_rate": 9.462857434020403e-06, "loss": 0.6486, "step": 28740 }, { "epoch": 0.5221197152404474, "grad_norm": 0.8506568230813346, "learning_rate": 9.457102827805155e-06, "loss": 0.6664, "step": 28750 }, { "epoch": 0.5223013220979225, "grad_norm": 0.8345053232339511, "learning_rate": 9.451348401898797e-06, "loss": 0.6542, "step": 28760 }, { "epoch": 0.5224829289553974, "grad_norm": 0.8194139145762966, "learning_rate": 9.445594158212514e-06, "loss": 0.6546, "step": 28770 }, { "epoch": 0.5226645358128723, "grad_norm": 0.8442969131025471, "learning_rate": 9.43984009865742e-06, "loss": 0.6659, "step": 28780 }, { "epoch": 0.5228461426703472, "grad_norm": 0.8539052588654312, "learning_rate": 9.434086225144581e-06, "loss": 0.6566, "step": 28790 }, { "epoch": 0.5230277495278222, "grad_norm": 0.8382040045123724, "learning_rate": 9.428332539584983e-06, "loss": 0.6447, "step": 28800 }, { "epoch": 0.5232093563852971, "grad_norm": 0.8434579264668274, "learning_rate": 9.42257904388957e-06, "loss": 0.6603, "step": 28810 }, { "epoch": 0.523390963242772, "grad_norm": 0.8547441994597677, "learning_rate": 9.416825739969207e-06, "loss": 0.6468, "step": 28820 }, { "epoch": 0.5235725701002469, "grad_norm": 0.8393965018457795, "learning_rate": 9.411072629734707e-06, "loss": 0.6309, "step": 28830 }, { "epoch": 0.523754176957722, "grad_norm": 0.9062694217767799, "learning_rate": 9.405319715096808e-06, "loss": 0.6627, "step": 28840 }, { "epoch": 0.5239357838151969, "grad_norm": 0.8341300612874661, "learning_rate": 9.399566997966184e-06, "loss": 0.6673, "step": 28850 }, { "epoch": 0.5241173906726718, "grad_norm": 0.8479478604099794, "learning_rate": 9.393814480253457e-06, "loss": 0.6501, "step": 28860 }, { "epoch": 0.5242989975301467, "grad_norm": 0.8531450724126218, "learning_rate": 9.388062163869164e-06, "loss": 0.6562, "step": 28870 }, { "epoch": 0.5244806043876217, "grad_norm": 0.793569374224786, "learning_rate": 9.382310050723794e-06, "loss": 0.6396, "step": 28880 }, { "epoch": 0.5246622112450966, "grad_norm": 0.9424101270634966, "learning_rate": 9.37655814272775e-06, "loss": 0.6747, "step": 28890 }, { "epoch": 0.5248438181025715, "grad_norm": 0.8705221030498128, "learning_rate": 9.370806441791379e-06, "loss": 0.6529, "step": 28900 }, { "epoch": 0.5250254249600464, "grad_norm": 0.8790288450772725, "learning_rate": 9.365054949824958e-06, "loss": 0.6657, "step": 28910 }, { "epoch": 0.5252070318175214, "grad_norm": 0.8342728461720088, "learning_rate": 9.359303668738689e-06, "loss": 0.643, "step": 28920 }, { "epoch": 0.5253886386749964, "grad_norm": 0.8633644549268229, "learning_rate": 9.353552600442708e-06, "loss": 0.6516, "step": 28930 }, { "epoch": 0.5255702455324713, "grad_norm": 0.8895921749519137, "learning_rate": 9.347801746847084e-06, "loss": 0.6669, "step": 28940 }, { "epoch": 0.5257518523899463, "grad_norm": 0.8419579967463712, "learning_rate": 9.342051109861807e-06, "loss": 0.6706, "step": 28950 }, { "epoch": 0.5259334592474212, "grad_norm": 0.8418994186852556, "learning_rate": 9.336300691396795e-06, "loss": 0.65, "step": 28960 }, { "epoch": 0.5261150661048961, "grad_norm": 0.8232849756111317, "learning_rate": 9.330550493361906e-06, "loss": 0.6453, "step": 28970 }, { "epoch": 0.526296672962371, "grad_norm": 0.8317724405804786, "learning_rate": 9.324800517666904e-06, "loss": 0.6627, "step": 28980 }, { "epoch": 0.526478279819846, "grad_norm": 0.8578427437726781, "learning_rate": 9.319050766221503e-06, "loss": 0.6475, "step": 28990 }, { "epoch": 0.5266598866773209, "grad_norm": 0.8388132601845293, "learning_rate": 9.313301240935319e-06, "loss": 0.6543, "step": 29000 }, { "epoch": 0.5268414935347959, "grad_norm": 0.8459115605853554, "learning_rate": 9.307551943717916e-06, "loss": 0.6572, "step": 29010 }, { "epoch": 0.5270231003922708, "grad_norm": 0.8726001238940801, "learning_rate": 9.301802876478759e-06, "loss": 0.6642, "step": 29020 }, { "epoch": 0.5272047072497458, "grad_norm": 0.8544725467676137, "learning_rate": 9.296054041127258e-06, "loss": 0.6439, "step": 29030 }, { "epoch": 0.5273863141072207, "grad_norm": 0.8649092338981421, "learning_rate": 9.290305439572727e-06, "loss": 0.6482, "step": 29040 }, { "epoch": 0.5275679209646956, "grad_norm": 0.8667773261391994, "learning_rate": 9.284557073724421e-06, "loss": 0.6561, "step": 29050 }, { "epoch": 0.5277495278221705, "grad_norm": 0.8218010214631162, "learning_rate": 9.278808945491505e-06, "loss": 0.6405, "step": 29060 }, { "epoch": 0.5279311346796455, "grad_norm": 0.8158531214287843, "learning_rate": 9.273061056783057e-06, "loss": 0.6473, "step": 29070 }, { "epoch": 0.5281127415371204, "grad_norm": 0.7840694912613927, "learning_rate": 9.267313409508098e-06, "loss": 0.6471, "step": 29080 }, { "epoch": 0.5282943483945953, "grad_norm": 0.8285267534597406, "learning_rate": 9.26156600557555e-06, "loss": 0.6498, "step": 29090 }, { "epoch": 0.5284759552520704, "grad_norm": 0.8754029276173318, "learning_rate": 9.255818846894264e-06, "loss": 0.6353, "step": 29100 }, { "epoch": 0.5286575621095453, "grad_norm": 0.8400620670345518, "learning_rate": 9.250071935373004e-06, "loss": 0.6682, "step": 29110 }, { "epoch": 0.5288391689670202, "grad_norm": 0.8520567792191218, "learning_rate": 9.244325272920455e-06, "loss": 0.6665, "step": 29120 }, { "epoch": 0.5290207758244951, "grad_norm": 0.8235546634496601, "learning_rate": 9.238578861445221e-06, "loss": 0.6467, "step": 29130 }, { "epoch": 0.5292023826819701, "grad_norm": 0.8332359346121165, "learning_rate": 9.232832702855817e-06, "loss": 0.6591, "step": 29140 }, { "epoch": 0.529383989539445, "grad_norm": 0.8501176664204213, "learning_rate": 9.22708679906068e-06, "loss": 0.6567, "step": 29150 }, { "epoch": 0.5295655963969199, "grad_norm": 0.8520581152329532, "learning_rate": 9.221341151968154e-06, "loss": 0.6576, "step": 29160 }, { "epoch": 0.5297472032543948, "grad_norm": 0.8535291819220935, "learning_rate": 9.21559576348651e-06, "loss": 0.6496, "step": 29170 }, { "epoch": 0.5299288101118699, "grad_norm": 0.8657116427406709, "learning_rate": 9.209850635523919e-06, "loss": 0.652, "step": 29180 }, { "epoch": 0.5301104169693448, "grad_norm": 0.8209640473405481, "learning_rate": 9.204105769988481e-06, "loss": 0.6502, "step": 29190 }, { "epoch": 0.5302920238268197, "grad_norm": 0.8283306760966727, "learning_rate": 9.198361168788194e-06, "loss": 0.6494, "step": 29200 }, { "epoch": 0.5304736306842947, "grad_norm": 0.8020249215356068, "learning_rate": 9.192616833830981e-06, "loss": 0.6464, "step": 29210 }, { "epoch": 0.5306552375417696, "grad_norm": 0.8584712024059377, "learning_rate": 9.186872767024663e-06, "loss": 0.6649, "step": 29220 }, { "epoch": 0.5308368443992445, "grad_norm": 0.8678102414170004, "learning_rate": 9.181128970276987e-06, "loss": 0.6496, "step": 29230 }, { "epoch": 0.5310184512567194, "grad_norm": 0.8582929766377019, "learning_rate": 9.175385445495595e-06, "loss": 0.6549, "step": 29240 }, { "epoch": 0.5312000581141944, "grad_norm": 0.8590741895194381, "learning_rate": 9.169642194588055e-06, "loss": 0.6577, "step": 29250 }, { "epoch": 0.5313816649716693, "grad_norm": 0.8793954945006504, "learning_rate": 9.16389921946183e-06, "loss": 0.648, "step": 29260 }, { "epoch": 0.5315632718291443, "grad_norm": 0.8440911373924725, "learning_rate": 9.158156522024296e-06, "loss": 0.6489, "step": 29270 }, { "epoch": 0.5317448786866192, "grad_norm": 0.846932323585815, "learning_rate": 9.15241410418274e-06, "loss": 0.649, "step": 29280 }, { "epoch": 0.5319264855440942, "grad_norm": 0.8169164592537013, "learning_rate": 9.146671967844351e-06, "loss": 0.6573, "step": 29290 }, { "epoch": 0.5321080924015691, "grad_norm": 0.8648605460818848, "learning_rate": 9.140930114916233e-06, "loss": 0.6587, "step": 29300 }, { "epoch": 0.532289699259044, "grad_norm": 0.8175447658834001, "learning_rate": 9.135188547305384e-06, "loss": 0.6498, "step": 29310 }, { "epoch": 0.5324713061165189, "grad_norm": 0.8139830094085039, "learning_rate": 9.129447266918716e-06, "loss": 0.6509, "step": 29320 }, { "epoch": 0.5326529129739939, "grad_norm": 0.8504503856353903, "learning_rate": 9.123706275663044e-06, "loss": 0.6576, "step": 29330 }, { "epoch": 0.5328345198314688, "grad_norm": 0.8610676505815271, "learning_rate": 9.117965575445083e-06, "loss": 0.6672, "step": 29340 }, { "epoch": 0.5330161266889438, "grad_norm": 0.873234553301594, "learning_rate": 9.11222516817146e-06, "loss": 0.6365, "step": 29350 }, { "epoch": 0.5331977335464188, "grad_norm": 0.8677389911154811, "learning_rate": 9.10648505574869e-06, "loss": 0.648, "step": 29360 }, { "epoch": 0.5333793404038937, "grad_norm": 0.8350619126205391, "learning_rate": 9.100745240083209e-06, "loss": 0.6551, "step": 29370 }, { "epoch": 0.5335609472613686, "grad_norm": 0.8223009566113033, "learning_rate": 9.095005723081335e-06, "loss": 0.6519, "step": 29380 }, { "epoch": 0.5337425541188435, "grad_norm": 0.8393998799432367, "learning_rate": 9.089266506649305e-06, "loss": 0.6669, "step": 29390 }, { "epoch": 0.5339241609763185, "grad_norm": 0.887017836294868, "learning_rate": 9.083527592693237e-06, "loss": 0.6597, "step": 29400 }, { "epoch": 0.5341057678337934, "grad_norm": 0.9072619738754432, "learning_rate": 9.077788983119172e-06, "loss": 0.6462, "step": 29410 }, { "epoch": 0.5342873746912683, "grad_norm": 0.8708722830907613, "learning_rate": 9.072050679833027e-06, "loss": 0.652, "step": 29420 }, { "epoch": 0.5344689815487432, "grad_norm": 0.8515769079921377, "learning_rate": 9.066312684740633e-06, "loss": 0.6569, "step": 29430 }, { "epoch": 0.5346505884062183, "grad_norm": 0.8462529317268508, "learning_rate": 9.060574999747705e-06, "loss": 0.6636, "step": 29440 }, { "epoch": 0.5348321952636932, "grad_norm": 0.8591710222813309, "learning_rate": 9.054837626759874e-06, "loss": 0.6519, "step": 29450 }, { "epoch": 0.5350138021211681, "grad_norm": 0.8570699504178131, "learning_rate": 9.049100567682651e-06, "loss": 0.6506, "step": 29460 }, { "epoch": 0.535195408978643, "grad_norm": 0.834751553022102, "learning_rate": 9.043363824421443e-06, "loss": 0.6637, "step": 29470 }, { "epoch": 0.535377015836118, "grad_norm": 0.8348869812006687, "learning_rate": 9.037627398881564e-06, "loss": 0.662, "step": 29480 }, { "epoch": 0.5355586226935929, "grad_norm": 0.8418224849517669, "learning_rate": 9.03189129296821e-06, "loss": 0.6556, "step": 29490 }, { "epoch": 0.5357402295510678, "grad_norm": 0.8655856170914447, "learning_rate": 9.026155508586481e-06, "loss": 0.6448, "step": 29500 }, { "epoch": 0.5359218364085427, "grad_norm": 0.8611709883611236, "learning_rate": 9.020420047641365e-06, "loss": 0.6464, "step": 29510 }, { "epoch": 0.5361034432660178, "grad_norm": 0.8486952418121094, "learning_rate": 9.014684912037742e-06, "loss": 0.6858, "step": 29520 }, { "epoch": 0.5362850501234927, "grad_norm": 0.8261492646381682, "learning_rate": 9.008950103680385e-06, "loss": 0.6417, "step": 29530 }, { "epoch": 0.5364666569809676, "grad_norm": 0.8514697028715321, "learning_rate": 9.00321562447396e-06, "loss": 0.6512, "step": 29540 }, { "epoch": 0.5366482638384426, "grad_norm": 0.8417996190494796, "learning_rate": 8.997481476323021e-06, "loss": 0.6513, "step": 29550 }, { "epoch": 0.5368298706959175, "grad_norm": 0.8571914889587542, "learning_rate": 8.99174766113201e-06, "loss": 0.6549, "step": 29560 }, { "epoch": 0.5370114775533924, "grad_norm": 0.8313846385548582, "learning_rate": 8.986014180805268e-06, "loss": 0.6584, "step": 29570 }, { "epoch": 0.5371930844108673, "grad_norm": 0.859929064293273, "learning_rate": 8.980281037247013e-06, "loss": 0.649, "step": 29580 }, { "epoch": 0.5373746912683423, "grad_norm": 0.8465917044079485, "learning_rate": 8.974548232361361e-06, "loss": 0.6531, "step": 29590 }, { "epoch": 0.5375562981258172, "grad_norm": 0.8152762140349337, "learning_rate": 8.968815768052304e-06, "loss": 0.6488, "step": 29600 }, { "epoch": 0.5377379049832922, "grad_norm": 0.8405412722655273, "learning_rate": 8.963083646223739e-06, "loss": 0.6401, "step": 29610 }, { "epoch": 0.5379195118407671, "grad_norm": 0.8370455833645565, "learning_rate": 8.957351868779426e-06, "loss": 0.6452, "step": 29620 }, { "epoch": 0.5381011186982421, "grad_norm": 0.827007080014426, "learning_rate": 8.951620437623034e-06, "loss": 0.6465, "step": 29630 }, { "epoch": 0.538282725555717, "grad_norm": 0.8764054182263684, "learning_rate": 8.945889354658094e-06, "loss": 0.6756, "step": 29640 }, { "epoch": 0.5384643324131919, "grad_norm": 0.8605756573611694, "learning_rate": 8.940158621788047e-06, "loss": 0.6504, "step": 29650 }, { "epoch": 0.5386459392706668, "grad_norm": 0.8443384357272958, "learning_rate": 8.934428240916196e-06, "loss": 0.6503, "step": 29660 }, { "epoch": 0.5388275461281418, "grad_norm": 0.8288723508023601, "learning_rate": 8.92869821394573e-06, "loss": 0.6553, "step": 29670 }, { "epoch": 0.5390091529856167, "grad_norm": 0.8966177700920401, "learning_rate": 8.922968542779736e-06, "loss": 0.6623, "step": 29680 }, { "epoch": 0.5391907598430917, "grad_norm": 0.8496714832568449, "learning_rate": 8.917239229321162e-06, "loss": 0.6469, "step": 29690 }, { "epoch": 0.5393723667005667, "grad_norm": 0.8451944733727635, "learning_rate": 8.911510275472855e-06, "loss": 0.6599, "step": 29700 }, { "epoch": 0.5395539735580416, "grad_norm": 0.830863728192825, "learning_rate": 8.905781683137532e-06, "loss": 0.6496, "step": 29710 }, { "epoch": 0.5397355804155165, "grad_norm": 0.8560882363637807, "learning_rate": 8.900053454217796e-06, "loss": 0.6569, "step": 29720 }, { "epoch": 0.5399171872729914, "grad_norm": 0.8562212760755835, "learning_rate": 8.894325590616122e-06, "loss": 0.6591, "step": 29730 }, { "epoch": 0.5400987941304664, "grad_norm": 0.8552224500214309, "learning_rate": 8.888598094234871e-06, "loss": 0.6535, "step": 29740 }, { "epoch": 0.5402804009879413, "grad_norm": 0.8488132803655929, "learning_rate": 8.88287096697628e-06, "loss": 0.6419, "step": 29750 }, { "epoch": 0.5404620078454162, "grad_norm": 0.8756029123033285, "learning_rate": 8.877144210742455e-06, "loss": 0.6572, "step": 29760 }, { "epoch": 0.5406436147028911, "grad_norm": 0.850018292612589, "learning_rate": 8.871417827435396e-06, "loss": 0.6624, "step": 29770 }, { "epoch": 0.5408252215603662, "grad_norm": 0.883943125032319, "learning_rate": 8.86569181895696e-06, "loss": 0.6491, "step": 29780 }, { "epoch": 0.5410068284178411, "grad_norm": 0.8527804484512967, "learning_rate": 8.859966187208898e-06, "loss": 0.6515, "step": 29790 }, { "epoch": 0.541188435275316, "grad_norm": 0.8597352904928593, "learning_rate": 8.854240934092818e-06, "loss": 0.6453, "step": 29800 }, { "epoch": 0.5413700421327909, "grad_norm": 0.837550192384145, "learning_rate": 8.84851606151022e-06, "loss": 0.6529, "step": 29810 }, { "epoch": 0.5415516489902659, "grad_norm": 0.8675513030722876, "learning_rate": 8.84279157136246e-06, "loss": 0.6531, "step": 29820 }, { "epoch": 0.5417332558477408, "grad_norm": 0.8284699913389595, "learning_rate": 8.837067465550782e-06, "loss": 0.6457, "step": 29830 }, { "epoch": 0.5419148627052157, "grad_norm": 0.8457239865054678, "learning_rate": 8.831343745976288e-06, "loss": 0.6465, "step": 29840 }, { "epoch": 0.5420964695626906, "grad_norm": 0.8740779800517885, "learning_rate": 8.825620414539971e-06, "loss": 0.6528, "step": 29850 }, { "epoch": 0.5422780764201657, "grad_norm": 0.8348840608560031, "learning_rate": 8.819897473142677e-06, "loss": 0.6561, "step": 29860 }, { "epoch": 0.5424596832776406, "grad_norm": 0.8348003356399677, "learning_rate": 8.814174923685124e-06, "loss": 0.6486, "step": 29870 }, { "epoch": 0.5426412901351155, "grad_norm": 0.8254334749675174, "learning_rate": 8.808452768067917e-06, "loss": 0.6484, "step": 29880 }, { "epoch": 0.5428228969925905, "grad_norm": 0.8625995092416175, "learning_rate": 8.802731008191506e-06, "loss": 0.6617, "step": 29890 }, { "epoch": 0.5430045038500654, "grad_norm": 0.8436280079907698, "learning_rate": 8.79700964595623e-06, "loss": 0.6578, "step": 29900 }, { "epoch": 0.5431861107075403, "grad_norm": 0.8443745810188868, "learning_rate": 8.791288683262285e-06, "loss": 0.6523, "step": 29910 }, { "epoch": 0.5433677175650152, "grad_norm": 0.8528584459814345, "learning_rate": 8.785568122009736e-06, "loss": 0.6618, "step": 29920 }, { "epoch": 0.5435493244224902, "grad_norm": 0.8215263971149055, "learning_rate": 8.779847964098519e-06, "loss": 0.6492, "step": 29930 }, { "epoch": 0.5437309312799651, "grad_norm": 0.8180514454441914, "learning_rate": 8.774128211428429e-06, "loss": 0.6617, "step": 29940 }, { "epoch": 0.5439125381374401, "grad_norm": 0.834089684528039, "learning_rate": 8.768408865899133e-06, "loss": 0.6506, "step": 29950 }, { "epoch": 0.544094144994915, "grad_norm": 0.8546526593691999, "learning_rate": 8.762689929410156e-06, "loss": 0.6493, "step": 29960 }, { "epoch": 0.54427575185239, "grad_norm": 0.8618757215070892, "learning_rate": 8.756971403860896e-06, "loss": 0.6493, "step": 29970 }, { "epoch": 0.5444573587098649, "grad_norm": 0.8402223226103871, "learning_rate": 8.751253291150605e-06, "loss": 0.6574, "step": 29980 }, { "epoch": 0.5446389655673398, "grad_norm": 0.8672511354884093, "learning_rate": 8.745535593178407e-06, "loss": 0.6502, "step": 29990 }, { "epoch": 0.5448205724248147, "grad_norm": 0.8385248259970891, "learning_rate": 8.739818311843277e-06, "loss": 0.641, "step": 30000 }, { "epoch": 0.5450021792822897, "grad_norm": 0.8308304389143736, "learning_rate": 8.734101449044067e-06, "loss": 0.6466, "step": 30010 }, { "epoch": 0.5451837861397646, "grad_norm": 0.8297321864576889, "learning_rate": 8.728385006679475e-06, "loss": 0.6438, "step": 30020 }, { "epoch": 0.5453653929972396, "grad_norm": 0.8637836826551119, "learning_rate": 8.722668986648068e-06, "loss": 0.6468, "step": 30030 }, { "epoch": 0.5455469998547146, "grad_norm": 0.8535397918396599, "learning_rate": 8.716953390848267e-06, "loss": 0.6526, "step": 30040 }, { "epoch": 0.5457286067121895, "grad_norm": 0.8875124190084469, "learning_rate": 8.711238221178362e-06, "loss": 0.6538, "step": 30050 }, { "epoch": 0.5459102135696644, "grad_norm": 1.5426847984686434, "learning_rate": 8.705523479536493e-06, "loss": 0.6542, "step": 30060 }, { "epoch": 0.5460918204271393, "grad_norm": 0.8778659569184531, "learning_rate": 8.699809167820653e-06, "loss": 0.6617, "step": 30070 }, { "epoch": 0.5462734272846143, "grad_norm": 0.8672158474018119, "learning_rate": 8.69409528792871e-06, "loss": 0.6498, "step": 30080 }, { "epoch": 0.5464550341420892, "grad_norm": 0.8665567171524591, "learning_rate": 8.688381841758366e-06, "loss": 0.6536, "step": 30090 }, { "epoch": 0.5466366409995641, "grad_norm": 0.8423877651921028, "learning_rate": 8.682668831207199e-06, "loss": 0.648, "step": 30100 }, { "epoch": 0.546818247857039, "grad_norm": 0.8136823860818828, "learning_rate": 8.67695625817263e-06, "loss": 0.6522, "step": 30110 }, { "epoch": 0.5469998547145141, "grad_norm": 0.8646028532088088, "learning_rate": 8.67124412455194e-06, "loss": 0.6519, "step": 30120 }, { "epoch": 0.547181461571989, "grad_norm": 0.8328672045497275, "learning_rate": 8.665532432242264e-06, "loss": 0.6516, "step": 30130 }, { "epoch": 0.5473630684294639, "grad_norm": 0.8818122708217226, "learning_rate": 8.659821183140589e-06, "loss": 0.6529, "step": 30140 }, { "epoch": 0.5475446752869388, "grad_norm": 0.8401139271013365, "learning_rate": 8.654110379143753e-06, "loss": 0.6416, "step": 30150 }, { "epoch": 0.5477262821444138, "grad_norm": 0.8509845448346325, "learning_rate": 8.648400022148446e-06, "loss": 0.6451, "step": 30160 }, { "epoch": 0.5479078890018887, "grad_norm": 0.88372799028288, "learning_rate": 8.642690114051218e-06, "loss": 0.6571, "step": 30170 }, { "epoch": 0.5480894958593636, "grad_norm": 0.8569158145510499, "learning_rate": 8.636980656748453e-06, "loss": 0.6315, "step": 30180 }, { "epoch": 0.5482711027168385, "grad_norm": 0.8728181804871337, "learning_rate": 8.63127165213641e-06, "loss": 0.6547, "step": 30190 }, { "epoch": 0.5484527095743136, "grad_norm": 0.8611897318480214, "learning_rate": 8.62556310211117e-06, "loss": 0.6563, "step": 30200 }, { "epoch": 0.5486343164317885, "grad_norm": 0.8805176890204819, "learning_rate": 8.619855008568686e-06, "loss": 0.6567, "step": 30210 }, { "epoch": 0.5488159232892634, "grad_norm": 0.8311841365440712, "learning_rate": 8.614147373404744e-06, "loss": 0.6493, "step": 30220 }, { "epoch": 0.5489975301467384, "grad_norm": 0.8334810312769988, "learning_rate": 8.608440198514987e-06, "loss": 0.6336, "step": 30230 }, { "epoch": 0.5491791370042133, "grad_norm": 0.8330185639276847, "learning_rate": 8.602733485794898e-06, "loss": 0.6618, "step": 30240 }, { "epoch": 0.5493607438616882, "grad_norm": 0.8425419955984483, "learning_rate": 8.597027237139816e-06, "loss": 0.6516, "step": 30250 }, { "epoch": 0.5495423507191631, "grad_norm": 0.860157987090793, "learning_rate": 8.591321454444917e-06, "loss": 0.6392, "step": 30260 }, { "epoch": 0.5497239575766381, "grad_norm": 0.8109872562393602, "learning_rate": 8.585616139605223e-06, "loss": 0.6321, "step": 30270 }, { "epoch": 0.549905564434113, "grad_norm": 0.8601363317038787, "learning_rate": 8.579911294515605e-06, "loss": 0.6582, "step": 30280 }, { "epoch": 0.550087171291588, "grad_norm": 0.8379465059356278, "learning_rate": 8.574206921070776e-06, "loss": 0.6483, "step": 30290 }, { "epoch": 0.5502687781490629, "grad_norm": 0.8409694094292273, "learning_rate": 8.568503021165293e-06, "loss": 0.6533, "step": 30300 }, { "epoch": 0.5504503850065379, "grad_norm": 0.8390016997954982, "learning_rate": 8.562799596693553e-06, "loss": 0.6472, "step": 30310 }, { "epoch": 0.5506319918640128, "grad_norm": 0.8164676127753079, "learning_rate": 8.5570966495498e-06, "loss": 0.6515, "step": 30320 }, { "epoch": 0.5508135987214877, "grad_norm": 0.8263961228768137, "learning_rate": 8.551394181628114e-06, "loss": 0.6531, "step": 30330 }, { "epoch": 0.5509952055789626, "grad_norm": 0.8074318111711909, "learning_rate": 8.54569219482242e-06, "loss": 0.6423, "step": 30340 }, { "epoch": 0.5511768124364376, "grad_norm": 0.8573416492332152, "learning_rate": 8.539990691026484e-06, "loss": 0.6398, "step": 30350 }, { "epoch": 0.5513584192939125, "grad_norm": 0.8021870811390368, "learning_rate": 8.534289672133902e-06, "loss": 0.6554, "step": 30360 }, { "epoch": 0.5515400261513875, "grad_norm": 0.8320985150367881, "learning_rate": 8.528589140038124e-06, "loss": 0.6422, "step": 30370 }, { "epoch": 0.5517216330088625, "grad_norm": 0.8502654442519025, "learning_rate": 8.522889096632423e-06, "loss": 0.6507, "step": 30380 }, { "epoch": 0.5519032398663374, "grad_norm": 0.8561916004982387, "learning_rate": 8.517189543809926e-06, "loss": 0.646, "step": 30390 }, { "epoch": 0.5520848467238123, "grad_norm": 0.8197608443731659, "learning_rate": 8.51149048346358e-06, "loss": 0.6457, "step": 30400 }, { "epoch": 0.5522664535812872, "grad_norm": 0.8486549335386557, "learning_rate": 8.505791917486183e-06, "loss": 0.6588, "step": 30410 }, { "epoch": 0.5524480604387622, "grad_norm": 0.8617947085811849, "learning_rate": 8.500093847770355e-06, "loss": 0.6486, "step": 30420 }, { "epoch": 0.5526296672962371, "grad_norm": 0.8337683046991695, "learning_rate": 8.494396276208569e-06, "loss": 0.6576, "step": 30430 }, { "epoch": 0.552811274153712, "grad_norm": 0.8664759748808915, "learning_rate": 8.48869920469311e-06, "loss": 0.6507, "step": 30440 }, { "epoch": 0.5529928810111869, "grad_norm": 0.8824371215260358, "learning_rate": 8.483002635116124e-06, "loss": 0.6612, "step": 30450 }, { "epoch": 0.553174487868662, "grad_norm": 0.839845811264502, "learning_rate": 8.477306569369566e-06, "loss": 0.6527, "step": 30460 }, { "epoch": 0.5533560947261369, "grad_norm": 0.850973682610567, "learning_rate": 8.47161100934523e-06, "loss": 0.6586, "step": 30470 }, { "epoch": 0.5535377015836118, "grad_norm": 0.8367849203798147, "learning_rate": 8.465915956934751e-06, "loss": 0.6348, "step": 30480 }, { "epoch": 0.5537193084410867, "grad_norm": 0.843537540808632, "learning_rate": 8.460221414029588e-06, "loss": 0.6412, "step": 30490 }, { "epoch": 0.5539009152985617, "grad_norm": 0.7913252479033641, "learning_rate": 8.454527382521033e-06, "loss": 0.639, "step": 30500 }, { "epoch": 0.5540825221560366, "grad_norm": 0.8497326936166668, "learning_rate": 8.448833864300207e-06, "loss": 0.6432, "step": 30510 }, { "epoch": 0.5542641290135115, "grad_norm": 0.850507640218624, "learning_rate": 8.443140861258061e-06, "loss": 0.6399, "step": 30520 }, { "epoch": 0.5544457358709864, "grad_norm": 0.9058937423048397, "learning_rate": 8.437448375285375e-06, "loss": 0.6605, "step": 30530 }, { "epoch": 0.5546273427284615, "grad_norm": 0.833299740178487, "learning_rate": 8.431756408272756e-06, "loss": 0.642, "step": 30540 }, { "epoch": 0.5548089495859364, "grad_norm": 0.8525278785037362, "learning_rate": 8.426064962110646e-06, "loss": 0.6479, "step": 30550 }, { "epoch": 0.5549905564434113, "grad_norm": 0.8463974457996479, "learning_rate": 8.420374038689296e-06, "loss": 0.6472, "step": 30560 }, { "epoch": 0.5551721633008863, "grad_norm": 0.8192278262210185, "learning_rate": 8.414683639898807e-06, "loss": 0.6531, "step": 30570 }, { "epoch": 0.5553537701583612, "grad_norm": 0.8617490756728585, "learning_rate": 8.408993767629085e-06, "loss": 0.6515, "step": 30580 }, { "epoch": 0.5555353770158361, "grad_norm": 0.8752250377721765, "learning_rate": 8.403304423769878e-06, "loss": 0.6415, "step": 30590 }, { "epoch": 0.555716983873311, "grad_norm": 0.8905495762196455, "learning_rate": 8.397615610210743e-06, "loss": 0.6386, "step": 30600 }, { "epoch": 0.555898590730786, "grad_norm": 0.7958704846612067, "learning_rate": 8.391927328841076e-06, "loss": 0.6618, "step": 30610 }, { "epoch": 0.5560801975882609, "grad_norm": 0.8568948483590844, "learning_rate": 8.386239581550082e-06, "loss": 0.6476, "step": 30620 }, { "epoch": 0.5562618044457359, "grad_norm": 0.8281004556223482, "learning_rate": 8.380552370226801e-06, "loss": 0.6381, "step": 30630 }, { "epoch": 0.5564434113032108, "grad_norm": 0.865060408591288, "learning_rate": 8.374865696760084e-06, "loss": 0.6468, "step": 30640 }, { "epoch": 0.5566250181606858, "grad_norm": 0.8536964468796548, "learning_rate": 8.369179563038614e-06, "loss": 0.6447, "step": 30650 }, { "epoch": 0.5568066250181607, "grad_norm": 0.8230627918156862, "learning_rate": 8.363493970950889e-06, "loss": 0.6462, "step": 30660 }, { "epoch": 0.5569882318756356, "grad_norm": 0.8973774871803009, "learning_rate": 8.35780892238522e-06, "loss": 0.6369, "step": 30670 }, { "epoch": 0.5571698387331105, "grad_norm": 0.7941387879515366, "learning_rate": 8.352124419229755e-06, "loss": 0.6418, "step": 30680 }, { "epoch": 0.5573514455905855, "grad_norm": 0.8476183206013683, "learning_rate": 8.346440463372443e-06, "loss": 0.6432, "step": 30690 }, { "epoch": 0.5575330524480604, "grad_norm": 0.8121780956071207, "learning_rate": 8.340757056701065e-06, "loss": 0.6417, "step": 30700 }, { "epoch": 0.5577146593055354, "grad_norm": 0.82953445673299, "learning_rate": 8.335074201103211e-06, "loss": 0.6457, "step": 30710 }, { "epoch": 0.5578962661630104, "grad_norm": 0.8728903902731555, "learning_rate": 8.329391898466291e-06, "loss": 0.6508, "step": 30720 }, { "epoch": 0.5580778730204853, "grad_norm": 0.8956178231362455, "learning_rate": 8.323710150677533e-06, "loss": 0.6468, "step": 30730 }, { "epoch": 0.5582594798779602, "grad_norm": 0.7981993864847023, "learning_rate": 8.318028959623974e-06, "loss": 0.6346, "step": 30740 }, { "epoch": 0.5584410867354351, "grad_norm": 0.8813143264824919, "learning_rate": 8.312348327192476e-06, "loss": 0.6533, "step": 30750 }, { "epoch": 0.5586226935929101, "grad_norm": 0.82445532528814, "learning_rate": 8.306668255269708e-06, "loss": 0.6427, "step": 30760 }, { "epoch": 0.558804300450385, "grad_norm": 0.8474217388794221, "learning_rate": 8.300988745742155e-06, "loss": 0.6519, "step": 30770 }, { "epoch": 0.5589859073078599, "grad_norm": 0.8335093964304868, "learning_rate": 8.295309800496115e-06, "loss": 0.6466, "step": 30780 }, { "epoch": 0.5591675141653348, "grad_norm": 0.871584844259719, "learning_rate": 8.289631421417703e-06, "loss": 0.641, "step": 30790 }, { "epoch": 0.5593491210228099, "grad_norm": 0.8665375998104053, "learning_rate": 8.283953610392833e-06, "loss": 0.6426, "step": 30800 }, { "epoch": 0.5595307278802848, "grad_norm": 0.8340764912771041, "learning_rate": 8.278276369307252e-06, "loss": 0.6403, "step": 30810 }, { "epoch": 0.5597123347377597, "grad_norm": 0.8631979085601462, "learning_rate": 8.272599700046491e-06, "loss": 0.6526, "step": 30820 }, { "epoch": 0.5598939415952346, "grad_norm": 0.8794002476003263, "learning_rate": 8.26692360449592e-06, "loss": 0.6482, "step": 30830 }, { "epoch": 0.5600755484527096, "grad_norm": 0.8562033630915924, "learning_rate": 8.26124808454069e-06, "loss": 0.648, "step": 30840 }, { "epoch": 0.5602571553101845, "grad_norm": 0.847686913979511, "learning_rate": 8.255573142065784e-06, "loss": 0.6388, "step": 30850 }, { "epoch": 0.5604387621676594, "grad_norm": 0.865262410970545, "learning_rate": 8.24989877895598e-06, "loss": 0.6572, "step": 30860 }, { "epoch": 0.5606203690251343, "grad_norm": 0.856623343355275, "learning_rate": 8.244224997095863e-06, "loss": 0.6349, "step": 30870 }, { "epoch": 0.5608019758826094, "grad_norm": 0.8859451764196505, "learning_rate": 8.238551798369834e-06, "loss": 0.667, "step": 30880 }, { "epoch": 0.5609835827400843, "grad_norm": 1.2903548525592938, "learning_rate": 8.232879184662095e-06, "loss": 0.6332, "step": 30890 }, { "epoch": 0.5611651895975592, "grad_norm": 0.8458633023967659, "learning_rate": 8.227207157856654e-06, "loss": 0.6495, "step": 30900 }, { "epoch": 0.5613467964550342, "grad_norm": 0.8311984586427228, "learning_rate": 8.221535719837323e-06, "loss": 0.6488, "step": 30910 }, { "epoch": 0.5615284033125091, "grad_norm": 0.837422428195659, "learning_rate": 8.215864872487722e-06, "loss": 0.6587, "step": 30920 }, { "epoch": 0.561710010169984, "grad_norm": 0.8948349282039028, "learning_rate": 8.21019461769127e-06, "loss": 0.6527, "step": 30930 }, { "epoch": 0.5618916170274589, "grad_norm": 0.8530767257796396, "learning_rate": 8.204524957331194e-06, "loss": 0.6539, "step": 30940 }, { "epoch": 0.5620732238849339, "grad_norm": 0.8632588871435596, "learning_rate": 8.19885589329052e-06, "loss": 0.6331, "step": 30950 }, { "epoch": 0.5622548307424088, "grad_norm": 0.8807326990557722, "learning_rate": 8.193187427452076e-06, "loss": 0.6541, "step": 30960 }, { "epoch": 0.5624364375998838, "grad_norm": 0.841701243444699, "learning_rate": 8.187519561698496e-06, "loss": 0.6462, "step": 30970 }, { "epoch": 0.5626180444573587, "grad_norm": 0.8148145311113224, "learning_rate": 8.181852297912205e-06, "loss": 0.6445, "step": 30980 }, { "epoch": 0.5627996513148337, "grad_norm": 0.8617959204130021, "learning_rate": 8.176185637975443e-06, "loss": 0.6419, "step": 30990 }, { "epoch": 0.5629812581723086, "grad_norm": 0.8505716351268563, "learning_rate": 8.170519583770233e-06, "loss": 0.65, "step": 31000 }, { "epoch": 0.5631628650297835, "grad_norm": 0.862988016051139, "learning_rate": 8.16485413717841e-06, "loss": 0.6408, "step": 31010 }, { "epoch": 0.5633444718872584, "grad_norm": 0.8370939573790092, "learning_rate": 8.159189300081596e-06, "loss": 0.6331, "step": 31020 }, { "epoch": 0.5635260787447334, "grad_norm": 0.8385408338242354, "learning_rate": 8.153525074361223e-06, "loss": 0.6475, "step": 31030 }, { "epoch": 0.5637076856022083, "grad_norm": 0.8451640343123592, "learning_rate": 8.147861461898508e-06, "loss": 0.6428, "step": 31040 }, { "epoch": 0.5638892924596832, "grad_norm": 0.836185343975196, "learning_rate": 8.142198464574476e-06, "loss": 0.6434, "step": 31050 }, { "epoch": 0.5640708993171583, "grad_norm": 0.8624924158367012, "learning_rate": 8.136536084269935e-06, "loss": 0.6473, "step": 31060 }, { "epoch": 0.5642525061746332, "grad_norm": 0.8566077594451043, "learning_rate": 8.130874322865494e-06, "loss": 0.6469, "step": 31070 }, { "epoch": 0.5644341130321081, "grad_norm": 0.8792726854991813, "learning_rate": 8.12521318224156e-06, "loss": 0.6506, "step": 31080 }, { "epoch": 0.564615719889583, "grad_norm": 0.8643611850794416, "learning_rate": 8.119552664278331e-06, "loss": 0.6526, "step": 31090 }, { "epoch": 0.564797326747058, "grad_norm": 0.9199724751451358, "learning_rate": 8.113892770855797e-06, "loss": 0.6591, "step": 31100 }, { "epoch": 0.5649789336045329, "grad_norm": 0.8595023594672765, "learning_rate": 8.10823350385374e-06, "loss": 0.6456, "step": 31110 }, { "epoch": 0.5651605404620078, "grad_norm": 0.8463449854745947, "learning_rate": 8.102574865151739e-06, "loss": 0.6485, "step": 31120 }, { "epoch": 0.5653421473194827, "grad_norm": 0.8437703946505787, "learning_rate": 8.096916856629157e-06, "loss": 0.6463, "step": 31130 }, { "epoch": 0.5655237541769578, "grad_norm": 0.8326973428896814, "learning_rate": 8.091259480165154e-06, "loss": 0.6306, "step": 31140 }, { "epoch": 0.5657053610344327, "grad_norm": 0.8913754650796853, "learning_rate": 8.085602737638674e-06, "loss": 0.6337, "step": 31150 }, { "epoch": 0.5658869678919076, "grad_norm": 0.8418670938321849, "learning_rate": 8.079946630928459e-06, "loss": 0.646, "step": 31160 }, { "epoch": 0.5660685747493825, "grad_norm": 0.8340322941570987, "learning_rate": 8.074291161913033e-06, "loss": 0.6574, "step": 31170 }, { "epoch": 0.5662501816068575, "grad_norm": 0.8406757094947941, "learning_rate": 8.068636332470706e-06, "loss": 0.6439, "step": 31180 }, { "epoch": 0.5664317884643324, "grad_norm": 0.8493397642979648, "learning_rate": 8.062982144479583e-06, "loss": 0.6433, "step": 31190 }, { "epoch": 0.5666133953218073, "grad_norm": 0.822428424546309, "learning_rate": 8.057328599817552e-06, "loss": 0.6453, "step": 31200 }, { "epoch": 0.5667950021792822, "grad_norm": 0.8436541097879072, "learning_rate": 8.051675700362292e-06, "loss": 0.6499, "step": 31210 }, { "epoch": 0.5669766090367572, "grad_norm": 0.8166357502116265, "learning_rate": 8.046023447991255e-06, "loss": 0.634, "step": 31220 }, { "epoch": 0.5671582158942322, "grad_norm": 0.8575993351641451, "learning_rate": 8.040371844581694e-06, "loss": 0.6408, "step": 31230 }, { "epoch": 0.5673398227517071, "grad_norm": 0.8488797003976598, "learning_rate": 8.034720892010635e-06, "loss": 0.6503, "step": 31240 }, { "epoch": 0.5675214296091821, "grad_norm": 0.8544366076622425, "learning_rate": 8.029070592154894e-06, "loss": 0.6479, "step": 31250 }, { "epoch": 0.567703036466657, "grad_norm": 0.8767765111636249, "learning_rate": 8.023420946891072e-06, "loss": 0.6357, "step": 31260 }, { "epoch": 0.5678846433241319, "grad_norm": 0.8088569875467988, "learning_rate": 8.017771958095538e-06, "loss": 0.6541, "step": 31270 }, { "epoch": 0.5680662501816068, "grad_norm": 0.8319769109116415, "learning_rate": 8.012123627644462e-06, "loss": 0.6584, "step": 31280 }, { "epoch": 0.5682478570390818, "grad_norm": 0.8265525652594482, "learning_rate": 8.006475957413787e-06, "loss": 0.6495, "step": 31290 }, { "epoch": 0.5684294638965567, "grad_norm": 0.8304255471296567, "learning_rate": 8.000828949279234e-06, "loss": 0.6605, "step": 31300 }, { "epoch": 0.5686110707540317, "grad_norm": 0.8374918895359033, "learning_rate": 7.995182605116307e-06, "loss": 0.6375, "step": 31310 }, { "epoch": 0.5687926776115066, "grad_norm": 0.8143410843368619, "learning_rate": 7.989536926800292e-06, "loss": 0.6415, "step": 31320 }, { "epoch": 0.5689742844689816, "grad_norm": 0.8953141695709325, "learning_rate": 7.98389191620625e-06, "loss": 0.6393, "step": 31330 }, { "epoch": 0.5691558913264565, "grad_norm": 0.820440224231847, "learning_rate": 7.978247575209022e-06, "loss": 0.6434, "step": 31340 }, { "epoch": 0.5693374981839314, "grad_norm": 0.8486995327036095, "learning_rate": 7.972603905683223e-06, "loss": 0.6385, "step": 31350 }, { "epoch": 0.5695191050414063, "grad_norm": 0.8612830434093474, "learning_rate": 7.966960909503252e-06, "loss": 0.624, "step": 31360 }, { "epoch": 0.5697007118988813, "grad_norm": 0.8086168115214916, "learning_rate": 7.961318588543281e-06, "loss": 0.6528, "step": 31370 }, { "epoch": 0.5698823187563562, "grad_norm": 0.824984133776689, "learning_rate": 7.955676944677252e-06, "loss": 0.6553, "step": 31380 }, { "epoch": 0.5700639256138311, "grad_norm": 0.8209549796228185, "learning_rate": 7.950035979778895e-06, "loss": 0.6532, "step": 31390 }, { "epoch": 0.5702455324713062, "grad_norm": 0.8242026235558024, "learning_rate": 7.944395695721696e-06, "loss": 0.6391, "step": 31400 }, { "epoch": 0.5704271393287811, "grad_norm": 0.840906243478386, "learning_rate": 7.938756094378936e-06, "loss": 0.6447, "step": 31410 }, { "epoch": 0.570608746186256, "grad_norm": 0.7993630363450575, "learning_rate": 7.933117177623653e-06, "loss": 0.6463, "step": 31420 }, { "epoch": 0.5707903530437309, "grad_norm": 0.8605237711900385, "learning_rate": 7.92747894732867e-06, "loss": 0.6513, "step": 31430 }, { "epoch": 0.5709719599012059, "grad_norm": 0.8271633898585895, "learning_rate": 7.921841405366566e-06, "loss": 0.6394, "step": 31440 }, { "epoch": 0.5711535667586808, "grad_norm": 0.9165367936834351, "learning_rate": 7.91620455360971e-06, "loss": 0.6397, "step": 31450 }, { "epoch": 0.5713351736161557, "grad_norm": 0.9077701171577675, "learning_rate": 7.910568393930232e-06, "loss": 0.6384, "step": 31460 }, { "epoch": 0.5715167804736306, "grad_norm": 0.8066918681746827, "learning_rate": 7.904932928200022e-06, "loss": 0.6546, "step": 31470 }, { "epoch": 0.5716983873311057, "grad_norm": 0.8258681804191546, "learning_rate": 7.899298158290763e-06, "loss": 0.6355, "step": 31480 }, { "epoch": 0.5718799941885806, "grad_norm": 0.8524828609604103, "learning_rate": 7.893664086073889e-06, "loss": 0.6568, "step": 31490 }, { "epoch": 0.5720616010460555, "grad_norm": 0.8285117195014622, "learning_rate": 7.888030713420608e-06, "loss": 0.6415, "step": 31500 }, { "epoch": 0.5722432079035304, "grad_norm": 0.8748573796775407, "learning_rate": 7.882398042201896e-06, "loss": 0.6467, "step": 31510 }, { "epoch": 0.5724248147610054, "grad_norm": 0.8374537269652476, "learning_rate": 7.876766074288495e-06, "loss": 0.6446, "step": 31520 }, { "epoch": 0.5726064216184803, "grad_norm": 0.8699152567892486, "learning_rate": 7.871134811550913e-06, "loss": 0.6426, "step": 31530 }, { "epoch": 0.5727880284759552, "grad_norm": 0.882060039586545, "learning_rate": 7.865504255859426e-06, "loss": 0.6432, "step": 31540 }, { "epoch": 0.5729696353334301, "grad_norm": 0.8482146791935404, "learning_rate": 7.859874409084074e-06, "loss": 0.6554, "step": 31550 }, { "epoch": 0.5731512421909051, "grad_norm": 0.8709677283502374, "learning_rate": 7.854245273094659e-06, "loss": 0.6361, "step": 31560 }, { "epoch": 0.5733328490483801, "grad_norm": 0.8144827490036789, "learning_rate": 7.848616849760753e-06, "loss": 0.6405, "step": 31570 }, { "epoch": 0.573514455905855, "grad_norm": 0.8585379819722814, "learning_rate": 7.84298914095168e-06, "loss": 0.6336, "step": 31580 }, { "epoch": 0.57369606276333, "grad_norm": 0.8696973402560678, "learning_rate": 7.837362148536543e-06, "loss": 0.6484, "step": 31590 }, { "epoch": 0.5738776696208049, "grad_norm": 0.827669719568359, "learning_rate": 7.831735874384189e-06, "loss": 0.6494, "step": 31600 }, { "epoch": 0.5740592764782798, "grad_norm": 0.8113058423436554, "learning_rate": 7.826110320363248e-06, "loss": 0.6326, "step": 31610 }, { "epoch": 0.5742408833357547, "grad_norm": 0.8353233169912337, "learning_rate": 7.820485488342085e-06, "loss": 0.6364, "step": 31620 }, { "epoch": 0.5744224901932297, "grad_norm": 0.8545525990918782, "learning_rate": 7.814861380188852e-06, "loss": 0.6441, "step": 31630 }, { "epoch": 0.5746040970507046, "grad_norm": 0.84783310185811, "learning_rate": 7.809237997771435e-06, "loss": 0.6302, "step": 31640 }, { "epoch": 0.5747857039081796, "grad_norm": 0.8321772185160206, "learning_rate": 7.8036153429575e-06, "loss": 0.6367, "step": 31650 }, { "epoch": 0.5749673107656545, "grad_norm": 0.8459270952355861, "learning_rate": 7.79799341761446e-06, "loss": 0.6446, "step": 31660 }, { "epoch": 0.5751489176231295, "grad_norm": 0.8451679159922879, "learning_rate": 7.792372223609483e-06, "loss": 0.6384, "step": 31670 }, { "epoch": 0.5753305244806044, "grad_norm": 0.8581237721917928, "learning_rate": 7.786751762809508e-06, "loss": 0.638, "step": 31680 }, { "epoch": 0.5755121313380793, "grad_norm": 0.8726834106358653, "learning_rate": 7.781132037081215e-06, "loss": 0.6308, "step": 31690 }, { "epoch": 0.5756937381955542, "grad_norm": 0.8353584450144477, "learning_rate": 7.775513048291053e-06, "loss": 0.6554, "step": 31700 }, { "epoch": 0.5758753450530292, "grad_norm": 0.800637463435183, "learning_rate": 7.769894798305217e-06, "loss": 0.6469, "step": 31710 }, { "epoch": 0.5760569519105041, "grad_norm": 0.8182040599369697, "learning_rate": 7.764277288989659e-06, "loss": 0.6445, "step": 31720 }, { "epoch": 0.576238558767979, "grad_norm": 0.8918181478319592, "learning_rate": 7.758660522210086e-06, "loss": 0.6487, "step": 31730 }, { "epoch": 0.5764201656254541, "grad_norm": 0.8435301189527725, "learning_rate": 7.753044499831959e-06, "loss": 0.6458, "step": 31740 }, { "epoch": 0.576601772482929, "grad_norm": 0.9119445261819947, "learning_rate": 7.747429223720489e-06, "loss": 0.6435, "step": 31750 }, { "epoch": 0.5767833793404039, "grad_norm": 0.8582127093571468, "learning_rate": 7.741814695740646e-06, "loss": 0.6245, "step": 31760 }, { "epoch": 0.5769649861978788, "grad_norm": 0.8454831001286606, "learning_rate": 7.736200917757143e-06, "loss": 0.6242, "step": 31770 }, { "epoch": 0.5771465930553538, "grad_norm": 0.8414469344890503, "learning_rate": 7.730587891634442e-06, "loss": 0.6321, "step": 31780 }, { "epoch": 0.5773281999128287, "grad_norm": 0.8579993140551905, "learning_rate": 7.724975619236771e-06, "loss": 0.6354, "step": 31790 }, { "epoch": 0.5775098067703036, "grad_norm": 0.8518744144530054, "learning_rate": 7.719364102428089e-06, "loss": 0.6384, "step": 31800 }, { "epoch": 0.5776914136277785, "grad_norm": 0.8575227006509785, "learning_rate": 7.713753343072119e-06, "loss": 0.643, "step": 31810 }, { "epoch": 0.5778730204852536, "grad_norm": 0.8383920552069799, "learning_rate": 7.708143343032319e-06, "loss": 0.6433, "step": 31820 }, { "epoch": 0.5780546273427285, "grad_norm": 0.8405847260999055, "learning_rate": 7.702534104171909e-06, "loss": 0.6462, "step": 31830 }, { "epoch": 0.5782362342002034, "grad_norm": 0.8508331961509922, "learning_rate": 7.696925628353842e-06, "loss": 0.6277, "step": 31840 }, { "epoch": 0.5784178410576784, "grad_norm": 0.852154553736648, "learning_rate": 7.691317917440832e-06, "loss": 0.6388, "step": 31850 }, { "epoch": 0.5785994479151533, "grad_norm": 0.8486976079301096, "learning_rate": 7.685710973295326e-06, "loss": 0.6463, "step": 31860 }, { "epoch": 0.5787810547726282, "grad_norm": 0.8542299066176084, "learning_rate": 7.680104797779518e-06, "loss": 0.6426, "step": 31870 }, { "epoch": 0.5789626616301031, "grad_norm": 0.840225390731575, "learning_rate": 7.674499392755359e-06, "loss": 0.6351, "step": 31880 }, { "epoch": 0.579144268487578, "grad_norm": 0.8401637360746537, "learning_rate": 7.668894760084531e-06, "loss": 0.639, "step": 31890 }, { "epoch": 0.579325875345053, "grad_norm": 0.8318859975273233, "learning_rate": 7.663290901628467e-06, "loss": 0.6348, "step": 31900 }, { "epoch": 0.579507482202528, "grad_norm": 0.8474764857432622, "learning_rate": 7.657687819248334e-06, "loss": 0.6331, "step": 31910 }, { "epoch": 0.5796890890600029, "grad_norm": 0.81635998327351, "learning_rate": 7.652085514805055e-06, "loss": 0.6319, "step": 31920 }, { "epoch": 0.5798706959174779, "grad_norm": 0.8346370965462553, "learning_rate": 7.646483990159281e-06, "loss": 0.6479, "step": 31930 }, { "epoch": 0.5800523027749528, "grad_norm": 0.8316512841718547, "learning_rate": 7.64088324717141e-06, "loss": 0.6444, "step": 31940 }, { "epoch": 0.5802339096324277, "grad_norm": 0.8084843328308038, "learning_rate": 7.635283287701583e-06, "loss": 0.6364, "step": 31950 }, { "epoch": 0.5804155164899026, "grad_norm": 0.8630600281141199, "learning_rate": 7.629684113609678e-06, "loss": 0.6295, "step": 31960 }, { "epoch": 0.5805971233473776, "grad_norm": 0.8270627224254676, "learning_rate": 7.624085726755313e-06, "loss": 0.6503, "step": 31970 }, { "epoch": 0.5807787302048525, "grad_norm": 0.8488480707249308, "learning_rate": 7.618488128997837e-06, "loss": 0.6438, "step": 31980 }, { "epoch": 0.5809603370623275, "grad_norm": 0.8325482725969914, "learning_rate": 7.612891322196353e-06, "loss": 0.6424, "step": 31990 }, { "epoch": 0.5811419439198025, "grad_norm": 0.8922365574882258, "learning_rate": 7.607295308209681e-06, "loss": 0.6488, "step": 32000 }, { "epoch": 0.5813235507772774, "grad_norm": 0.8369466941907101, "learning_rate": 7.601700088896401e-06, "loss": 0.6384, "step": 32010 }, { "epoch": 0.5815051576347523, "grad_norm": 0.8458149115787611, "learning_rate": 7.596105666114804e-06, "loss": 0.6361, "step": 32020 }, { "epoch": 0.5816867644922272, "grad_norm": 0.8811306836270971, "learning_rate": 7.590512041722941e-06, "loss": 0.6383, "step": 32030 }, { "epoch": 0.5818683713497022, "grad_norm": 0.8265271571796561, "learning_rate": 7.584919217578577e-06, "loss": 0.649, "step": 32040 }, { "epoch": 0.5820499782071771, "grad_norm": 0.8796282223926192, "learning_rate": 7.579327195539226e-06, "loss": 0.6472, "step": 32050 }, { "epoch": 0.582231585064652, "grad_norm": 0.8341648869703455, "learning_rate": 7.573735977462129e-06, "loss": 0.6402, "step": 32060 }, { "epoch": 0.5824131919221269, "grad_norm": 0.8355785774707093, "learning_rate": 7.568145565204253e-06, "loss": 0.6623, "step": 32070 }, { "epoch": 0.582594798779602, "grad_norm": 0.846055246685675, "learning_rate": 7.562555960622314e-06, "loss": 0.6527, "step": 32080 }, { "epoch": 0.5827764056370769, "grad_norm": 0.8986686240583607, "learning_rate": 7.5569671655727485e-06, "loss": 0.6526, "step": 32090 }, { "epoch": 0.5829580124945518, "grad_norm": 0.8207947855568866, "learning_rate": 7.551379181911727e-06, "loss": 0.6452, "step": 32100 }, { "epoch": 0.5831396193520267, "grad_norm": 0.8706515889826381, "learning_rate": 7.545792011495146e-06, "loss": 0.6442, "step": 32110 }, { "epoch": 0.5833212262095017, "grad_norm": 0.8724820977621384, "learning_rate": 7.540205656178642e-06, "loss": 0.6371, "step": 32120 }, { "epoch": 0.5835028330669766, "grad_norm": 0.8687705994642684, "learning_rate": 7.5346201178175704e-06, "loss": 0.6351, "step": 32130 }, { "epoch": 0.5836844399244515, "grad_norm": 0.8506276554063633, "learning_rate": 7.529035398267021e-06, "loss": 0.6308, "step": 32140 }, { "epoch": 0.5838660467819264, "grad_norm": 0.8368349963521523, "learning_rate": 7.523451499381809e-06, "loss": 0.6341, "step": 32150 }, { "epoch": 0.5840476536394015, "grad_norm": 0.852974495537807, "learning_rate": 7.517868423016482e-06, "loss": 0.6484, "step": 32160 }, { "epoch": 0.5842292604968764, "grad_norm": 0.8677817263117432, "learning_rate": 7.512286171025309e-06, "loss": 0.6329, "step": 32170 }, { "epoch": 0.5844108673543513, "grad_norm": 0.8542396743152207, "learning_rate": 7.506704745262282e-06, "loss": 0.6424, "step": 32180 }, { "epoch": 0.5845924742118263, "grad_norm": 1.175404297032125, "learning_rate": 7.501124147581131e-06, "loss": 0.6296, "step": 32190 }, { "epoch": 0.5847740810693012, "grad_norm": 0.8224185391185626, "learning_rate": 7.495544379835298e-06, "loss": 0.6332, "step": 32200 }, { "epoch": 0.5849556879267761, "grad_norm": 0.878653424979099, "learning_rate": 7.489965443877958e-06, "loss": 0.6501, "step": 32210 }, { "epoch": 0.585137294784251, "grad_norm": 0.8588822381805697, "learning_rate": 7.4843873415620026e-06, "loss": 0.6238, "step": 32220 }, { "epoch": 0.585318901641726, "grad_norm": 0.8661643419683828, "learning_rate": 7.478810074740057e-06, "loss": 0.639, "step": 32230 }, { "epoch": 0.5855005084992009, "grad_norm": 0.8903705126971402, "learning_rate": 7.473233645264456e-06, "loss": 0.6417, "step": 32240 }, { "epoch": 0.5856821153566759, "grad_norm": 0.8718240956025999, "learning_rate": 7.467658054987268e-06, "loss": 0.6309, "step": 32250 }, { "epoch": 0.5858637222141508, "grad_norm": 0.8487603693723403, "learning_rate": 7.462083305760271e-06, "loss": 0.6385, "step": 32260 }, { "epoch": 0.5860453290716258, "grad_norm": 0.8869418559505152, "learning_rate": 7.456509399434979e-06, "loss": 0.6493, "step": 32270 }, { "epoch": 0.5862269359291007, "grad_norm": 0.8475493892485374, "learning_rate": 7.450936337862609e-06, "loss": 0.6432, "step": 32280 }, { "epoch": 0.5864085427865756, "grad_norm": 0.8405657207106663, "learning_rate": 7.4453641228941085e-06, "loss": 0.6435, "step": 32290 }, { "epoch": 0.5865901496440505, "grad_norm": 0.8340265208637336, "learning_rate": 7.439792756380141e-06, "loss": 0.6373, "step": 32300 }, { "epoch": 0.5867717565015255, "grad_norm": 0.8692833420692747, "learning_rate": 7.434222240171087e-06, "loss": 0.6394, "step": 32310 }, { "epoch": 0.5869533633590004, "grad_norm": 0.9278325657298034, "learning_rate": 7.428652576117048e-06, "loss": 0.645, "step": 32320 }, { "epoch": 0.5871349702164754, "grad_norm": 0.8220060800945568, "learning_rate": 7.423083766067839e-06, "loss": 0.6356, "step": 32330 }, { "epoch": 0.5873165770739504, "grad_norm": 0.8325945618848681, "learning_rate": 7.4175158118729915e-06, "loss": 0.6479, "step": 32340 }, { "epoch": 0.5874981839314253, "grad_norm": 0.8771721937320441, "learning_rate": 7.4119487153817534e-06, "loss": 0.6348, "step": 32350 }, { "epoch": 0.5876797907889002, "grad_norm": 0.8073639605261139, "learning_rate": 7.406382478443092e-06, "loss": 0.6343, "step": 32360 }, { "epoch": 0.5878613976463751, "grad_norm": 0.8253674977327345, "learning_rate": 7.400817102905684e-06, "loss": 0.65, "step": 32370 }, { "epoch": 0.58804300450385, "grad_norm": 0.8591248414079647, "learning_rate": 7.395252590617915e-06, "loss": 0.6416, "step": 32380 }, { "epoch": 0.588224611361325, "grad_norm": 0.8625773227648632, "learning_rate": 7.3896889434279e-06, "loss": 0.6367, "step": 32390 }, { "epoch": 0.5884062182187999, "grad_norm": 0.852694942827016, "learning_rate": 7.384126163183446e-06, "loss": 0.6385, "step": 32400 }, { "epoch": 0.5885878250762748, "grad_norm": 0.8535009587583104, "learning_rate": 7.378564251732093e-06, "loss": 0.6359, "step": 32410 }, { "epoch": 0.5887694319337499, "grad_norm": 0.8418204207589772, "learning_rate": 7.373003210921075e-06, "loss": 0.6223, "step": 32420 }, { "epoch": 0.5889510387912248, "grad_norm": 0.8517201335832616, "learning_rate": 7.36744304259735e-06, "loss": 0.637, "step": 32430 }, { "epoch": 0.5891326456486997, "grad_norm": 0.8284112939906092, "learning_rate": 7.361883748607575e-06, "loss": 0.6304, "step": 32440 }, { "epoch": 0.5893142525061746, "grad_norm": 0.8770506004599179, "learning_rate": 7.3563253307981265e-06, "loss": 0.6391, "step": 32450 }, { "epoch": 0.5894958593636496, "grad_norm": 0.7958943581255843, "learning_rate": 7.3507677910150795e-06, "loss": 0.6448, "step": 32460 }, { "epoch": 0.5896774662211245, "grad_norm": 0.837602307206358, "learning_rate": 7.345211131104233e-06, "loss": 0.6364, "step": 32470 }, { "epoch": 0.5898590730785994, "grad_norm": 0.7925717934747544, "learning_rate": 7.339655352911076e-06, "loss": 0.6315, "step": 32480 }, { "epoch": 0.5900406799360743, "grad_norm": 0.8843204130503073, "learning_rate": 7.334100458280815e-06, "loss": 0.6489, "step": 32490 }, { "epoch": 0.5902222867935494, "grad_norm": 0.8334407279047542, "learning_rate": 7.328546449058363e-06, "loss": 0.6407, "step": 32500 }, { "epoch": 0.5904038936510243, "grad_norm": 0.8353040004200886, "learning_rate": 7.322993327088332e-06, "loss": 0.6288, "step": 32510 }, { "epoch": 0.5905855005084992, "grad_norm": 0.8447366745496514, "learning_rate": 7.31744109421505e-06, "loss": 0.6476, "step": 32520 }, { "epoch": 0.5907671073659742, "grad_norm": 0.8899760133390738, "learning_rate": 7.311889752282539e-06, "loss": 0.645, "step": 32530 }, { "epoch": 0.5909487142234491, "grad_norm": 0.8928488455099077, "learning_rate": 7.306339303134533e-06, "loss": 0.6497, "step": 32540 }, { "epoch": 0.591130321080924, "grad_norm": 0.8511278871394039, "learning_rate": 7.300789748614464e-06, "loss": 0.6449, "step": 32550 }, { "epoch": 0.5913119279383989, "grad_norm": 0.8244489229092927, "learning_rate": 7.295241090565475e-06, "loss": 0.6363, "step": 32560 }, { "epoch": 0.5914935347958739, "grad_norm": 0.8702773846558569, "learning_rate": 7.289693330830401e-06, "loss": 0.6291, "step": 32570 }, { "epoch": 0.5916751416533488, "grad_norm": 0.8609290178910962, "learning_rate": 7.284146471251779e-06, "loss": 0.6345, "step": 32580 }, { "epoch": 0.5918567485108238, "grad_norm": 0.8724444417438542, "learning_rate": 7.27860051367186e-06, "loss": 0.6496, "step": 32590 }, { "epoch": 0.5920383553682987, "grad_norm": 0.8250339705178706, "learning_rate": 7.273055459932578e-06, "loss": 0.6386, "step": 32600 }, { "epoch": 0.5922199622257737, "grad_norm": 0.8226064323347551, "learning_rate": 7.267511311875586e-06, "loss": 0.6221, "step": 32610 }, { "epoch": 0.5924015690832486, "grad_norm": 0.8325630348262553, "learning_rate": 7.261968071342215e-06, "loss": 0.6292, "step": 32620 }, { "epoch": 0.5925831759407235, "grad_norm": 0.8542654572923487, "learning_rate": 7.256425740173514e-06, "loss": 0.646, "step": 32630 }, { "epoch": 0.5927647827981984, "grad_norm": 0.8309021614512101, "learning_rate": 7.250884320210212e-06, "loss": 0.6416, "step": 32640 }, { "epoch": 0.5929463896556734, "grad_norm": 0.8634935973093597, "learning_rate": 7.245343813292754e-06, "loss": 0.6381, "step": 32650 }, { "epoch": 0.5931279965131483, "grad_norm": 0.8465921104702899, "learning_rate": 7.239804221261265e-06, "loss": 0.6464, "step": 32660 }, { "epoch": 0.5933096033706233, "grad_norm": 0.8484144937263104, "learning_rate": 7.234265545955581e-06, "loss": 0.647, "step": 32670 }, { "epoch": 0.5934912102280983, "grad_norm": 0.836727769095675, "learning_rate": 7.22872778921522e-06, "loss": 0.6342, "step": 32680 }, { "epoch": 0.5936728170855732, "grad_norm": 0.8169370294784662, "learning_rate": 7.223190952879402e-06, "loss": 0.6316, "step": 32690 }, { "epoch": 0.5938544239430481, "grad_norm": 0.8506088671351096, "learning_rate": 7.217655038787041e-06, "loss": 0.6361, "step": 32700 }, { "epoch": 0.594036030800523, "grad_norm": 0.8494398817975298, "learning_rate": 7.212120048776745e-06, "loss": 0.6328, "step": 32710 }, { "epoch": 0.594217637657998, "grad_norm": 0.8586747982076082, "learning_rate": 7.206585984686814e-06, "loss": 0.6421, "step": 32720 }, { "epoch": 0.5943992445154729, "grad_norm": 0.8166383317536124, "learning_rate": 7.201052848355238e-06, "loss": 0.6249, "step": 32730 }, { "epoch": 0.5945808513729478, "grad_norm": 0.8075559048662059, "learning_rate": 7.1955206416197035e-06, "loss": 0.6272, "step": 32740 }, { "epoch": 0.5947624582304227, "grad_norm": 0.8106113405908301, "learning_rate": 7.189989366317584e-06, "loss": 0.6263, "step": 32750 }, { "epoch": 0.5949440650878978, "grad_norm": 0.8270484200842532, "learning_rate": 7.184459024285952e-06, "loss": 0.6252, "step": 32760 }, { "epoch": 0.5951256719453727, "grad_norm": 0.8686330600484914, "learning_rate": 7.17892961736156e-06, "loss": 0.6305, "step": 32770 }, { "epoch": 0.5953072788028476, "grad_norm": 0.8308078816130787, "learning_rate": 7.1734011473808496e-06, "loss": 0.6274, "step": 32780 }, { "epoch": 0.5954888856603225, "grad_norm": 0.8599385478755085, "learning_rate": 7.167873616179964e-06, "loss": 0.647, "step": 32790 }, { "epoch": 0.5956704925177975, "grad_norm": 0.8513341722043362, "learning_rate": 7.162347025594714e-06, "loss": 0.6379, "step": 32800 }, { "epoch": 0.5958520993752724, "grad_norm": 0.8186410981541111, "learning_rate": 7.156821377460626e-06, "loss": 0.6519, "step": 32810 }, { "epoch": 0.5960337062327473, "grad_norm": 0.8434887533018002, "learning_rate": 7.151296673612882e-06, "loss": 0.648, "step": 32820 }, { "epoch": 0.5962153130902222, "grad_norm": 0.8607189533957991, "learning_rate": 7.145772915886376e-06, "loss": 0.6409, "step": 32830 }, { "epoch": 0.5963969199476973, "grad_norm": 0.8509343127073711, "learning_rate": 7.140250106115672e-06, "loss": 0.6417, "step": 32840 }, { "epoch": 0.5965785268051722, "grad_norm": 0.8374140725362673, "learning_rate": 7.134728246135031e-06, "loss": 0.6298, "step": 32850 }, { "epoch": 0.5967601336626471, "grad_norm": 0.8381939940547029, "learning_rate": 7.129207337778384e-06, "loss": 0.6325, "step": 32860 }, { "epoch": 0.596941740520122, "grad_norm": 0.8452049855493902, "learning_rate": 7.123687382879362e-06, "loss": 0.6417, "step": 32870 }, { "epoch": 0.597123347377597, "grad_norm": 0.8378596705336684, "learning_rate": 7.118168383271267e-06, "loss": 0.6455, "step": 32880 }, { "epoch": 0.5973049542350719, "grad_norm": 0.8714954278546976, "learning_rate": 7.112650340787088e-06, "loss": 0.6458, "step": 32890 }, { "epoch": 0.5974865610925468, "grad_norm": 0.8464249332572505, "learning_rate": 7.107133257259499e-06, "loss": 0.6384, "step": 32900 }, { "epoch": 0.5976681679500218, "grad_norm": 0.8061911097564579, "learning_rate": 7.10161713452085e-06, "loss": 0.6381, "step": 32910 }, { "epoch": 0.5978497748074967, "grad_norm": 0.8134506722338689, "learning_rate": 7.09610197440318e-06, "loss": 0.6331, "step": 32920 }, { "epoch": 0.5980313816649717, "grad_norm": 0.8632746173210588, "learning_rate": 7.0905877787381975e-06, "loss": 0.6406, "step": 32930 }, { "epoch": 0.5982129885224466, "grad_norm": 0.8567536653489007, "learning_rate": 7.085074549357298e-06, "loss": 0.6238, "step": 32940 }, { "epoch": 0.5983945953799216, "grad_norm": 0.9442192073482805, "learning_rate": 7.079562288091554e-06, "loss": 0.63, "step": 32950 }, { "epoch": 0.5985762022373965, "grad_norm": 0.8638125414015048, "learning_rate": 7.074050996771722e-06, "loss": 0.6477, "step": 32960 }, { "epoch": 0.5987578090948714, "grad_norm": 0.8447015901876059, "learning_rate": 7.0685406772282265e-06, "loss": 0.6317, "step": 32970 }, { "epoch": 0.5989394159523463, "grad_norm": 0.855664397405295, "learning_rate": 7.0630313312911695e-06, "loss": 0.6351, "step": 32980 }, { "epoch": 0.5991210228098213, "grad_norm": 0.9183446132743163, "learning_rate": 7.057522960790344e-06, "loss": 0.647, "step": 32990 }, { "epoch": 0.5993026296672962, "grad_norm": 0.8533755075020157, "learning_rate": 7.052015567555199e-06, "loss": 0.6478, "step": 33000 }, { "epoch": 0.5994842365247712, "grad_norm": 0.8884476793397516, "learning_rate": 7.046509153414879e-06, "loss": 0.6285, "step": 33010 }, { "epoch": 0.5996658433822462, "grad_norm": 0.8746675005448472, "learning_rate": 7.041003720198184e-06, "loss": 0.6478, "step": 33020 }, { "epoch": 0.5998474502397211, "grad_norm": 0.816437095756706, "learning_rate": 7.035499269733606e-06, "loss": 0.6423, "step": 33030 }, { "epoch": 0.600029057097196, "grad_norm": 0.8218035608033902, "learning_rate": 7.029995803849295e-06, "loss": 0.6286, "step": 33040 }, { "epoch": 0.6002106639546709, "grad_norm": 0.7916827477142876, "learning_rate": 7.0244933243730885e-06, "loss": 0.6444, "step": 33050 }, { "epoch": 0.6003922708121459, "grad_norm": 0.8796936513384749, "learning_rate": 7.018991833132481e-06, "loss": 0.6328, "step": 33060 }, { "epoch": 0.6005738776696208, "grad_norm": 0.8207195860925813, "learning_rate": 7.013491331954653e-06, "loss": 0.6352, "step": 33070 }, { "epoch": 0.6007554845270957, "grad_norm": 0.8826533675473992, "learning_rate": 7.007991822666449e-06, "loss": 0.6387, "step": 33080 }, { "epoch": 0.6009370913845706, "grad_norm": 0.8209974880339687, "learning_rate": 7.002493307094382e-06, "loss": 0.6456, "step": 33090 }, { "epoch": 0.6011186982420457, "grad_norm": 0.8156601667825651, "learning_rate": 6.996995787064642e-06, "loss": 0.6322, "step": 33100 }, { "epoch": 0.6013003050995206, "grad_norm": 0.877879731573687, "learning_rate": 6.991499264403081e-06, "loss": 0.6474, "step": 33110 }, { "epoch": 0.6014819119569955, "grad_norm": 0.8474562148946004, "learning_rate": 6.986003740935225e-06, "loss": 0.6342, "step": 33120 }, { "epoch": 0.6016635188144704, "grad_norm": 0.8933979034411401, "learning_rate": 6.980509218486267e-06, "loss": 0.6328, "step": 33130 }, { "epoch": 0.6018451256719454, "grad_norm": 0.836598811951554, "learning_rate": 6.975015698881064e-06, "loss": 0.6387, "step": 33140 }, { "epoch": 0.6020267325294203, "grad_norm": 0.8027446970498053, "learning_rate": 6.969523183944144e-06, "loss": 0.643, "step": 33150 }, { "epoch": 0.6022083393868952, "grad_norm": 0.8586303217254925, "learning_rate": 6.964031675499705e-06, "loss": 0.6343, "step": 33160 }, { "epoch": 0.6023899462443701, "grad_norm": 0.8678138074380022, "learning_rate": 6.958541175371602e-06, "loss": 0.6468, "step": 33170 }, { "epoch": 0.6025715531018452, "grad_norm": 0.8380804587803418, "learning_rate": 6.953051685383354e-06, "loss": 0.6277, "step": 33180 }, { "epoch": 0.6027531599593201, "grad_norm": 0.8356977206814348, "learning_rate": 6.947563207358159e-06, "loss": 0.6451, "step": 33190 }, { "epoch": 0.602934766816795, "grad_norm": 0.8250366570217119, "learning_rate": 6.942075743118859e-06, "loss": 0.6395, "step": 33200 }, { "epoch": 0.60311637367427, "grad_norm": 0.8514111637710426, "learning_rate": 6.936589294487981e-06, "loss": 0.6345, "step": 33210 }, { "epoch": 0.6032979805317449, "grad_norm": 0.8436252678927582, "learning_rate": 6.931103863287691e-06, "loss": 0.6478, "step": 33220 }, { "epoch": 0.6034795873892198, "grad_norm": 0.821663924528776, "learning_rate": 6.925619451339842e-06, "loss": 0.6267, "step": 33230 }, { "epoch": 0.6036611942466947, "grad_norm": 0.8320781733621901, "learning_rate": 6.920136060465925e-06, "loss": 0.6299, "step": 33240 }, { "epoch": 0.6038428011041697, "grad_norm": 0.8680479721836384, "learning_rate": 6.914653692487112e-06, "loss": 0.6405, "step": 33250 }, { "epoch": 0.6040244079616446, "grad_norm": 0.8466454662920413, "learning_rate": 6.909172349224218e-06, "loss": 0.6341, "step": 33260 }, { "epoch": 0.6042060148191196, "grad_norm": 0.7952031648175606, "learning_rate": 6.903692032497734e-06, "loss": 0.6423, "step": 33270 }, { "epoch": 0.6043876216765945, "grad_norm": 0.8495618924752638, "learning_rate": 6.898212744127794e-06, "loss": 0.6415, "step": 33280 }, { "epoch": 0.6045692285340695, "grad_norm": 0.8409273191637475, "learning_rate": 6.8927344859342025e-06, "loss": 0.6447, "step": 33290 }, { "epoch": 0.6047508353915444, "grad_norm": 0.8386442943138774, "learning_rate": 6.887257259736417e-06, "loss": 0.6413, "step": 33300 }, { "epoch": 0.6049324422490193, "grad_norm": 0.8453651248824063, "learning_rate": 6.881781067353552e-06, "loss": 0.6537, "step": 33310 }, { "epoch": 0.6051140491064942, "grad_norm": 0.8337471130514147, "learning_rate": 6.876305910604379e-06, "loss": 0.6331, "step": 33320 }, { "epoch": 0.6052956559639692, "grad_norm": 0.8555567516512379, "learning_rate": 6.87083179130733e-06, "loss": 0.6422, "step": 33330 }, { "epoch": 0.6054772628214441, "grad_norm": 0.8301047405018014, "learning_rate": 6.865358711280483e-06, "loss": 0.636, "step": 33340 }, { "epoch": 0.605658869678919, "grad_norm": 0.863078312192106, "learning_rate": 6.859886672341578e-06, "loss": 0.6371, "step": 33350 }, { "epoch": 0.6058404765363941, "grad_norm": 0.8239325435456653, "learning_rate": 6.854415676308013e-06, "loss": 0.6316, "step": 33360 }, { "epoch": 0.606022083393869, "grad_norm": 0.8580722345339574, "learning_rate": 6.848945724996829e-06, "loss": 0.641, "step": 33370 }, { "epoch": 0.6062036902513439, "grad_norm": 0.8794142165460701, "learning_rate": 6.843476820224724e-06, "loss": 0.6263, "step": 33380 }, { "epoch": 0.6063852971088188, "grad_norm": 0.8251294191564517, "learning_rate": 6.838008963808054e-06, "loss": 0.6363, "step": 33390 }, { "epoch": 0.6065669039662938, "grad_norm": 0.8374410094938335, "learning_rate": 6.832542157562816e-06, "loss": 0.6389, "step": 33400 }, { "epoch": 0.6067485108237687, "grad_norm": 0.8145948015677946, "learning_rate": 6.827076403304672e-06, "loss": 0.6226, "step": 33410 }, { "epoch": 0.6069301176812436, "grad_norm": 0.8571734085024743, "learning_rate": 6.8216117028489205e-06, "loss": 0.638, "step": 33420 }, { "epoch": 0.6071117245387185, "grad_norm": 0.8456254478856711, "learning_rate": 6.8161480580105256e-06, "loss": 0.6458, "step": 33430 }, { "epoch": 0.6072933313961936, "grad_norm": 0.8756051626394289, "learning_rate": 6.81068547060408e-06, "loss": 0.631, "step": 33440 }, { "epoch": 0.6074749382536685, "grad_norm": 0.8307502892997937, "learning_rate": 6.805223942443851e-06, "loss": 0.6181, "step": 33450 }, { "epoch": 0.6076565451111434, "grad_norm": 0.8383702617464361, "learning_rate": 6.799763475343726e-06, "loss": 0.6392, "step": 33460 }, { "epoch": 0.6078381519686183, "grad_norm": 0.8611767308407196, "learning_rate": 6.794304071117268e-06, "loss": 0.636, "step": 33470 }, { "epoch": 0.6080197588260933, "grad_norm": 0.8258952131841804, "learning_rate": 6.788845731577662e-06, "loss": 0.64, "step": 33480 }, { "epoch": 0.6082013656835682, "grad_norm": 0.8391658948388427, "learning_rate": 6.783388458537759e-06, "loss": 0.641, "step": 33490 }, { "epoch": 0.6083829725410431, "grad_norm": 0.8577255615624042, "learning_rate": 6.77793225381004e-06, "loss": 0.646, "step": 33500 }, { "epoch": 0.608564579398518, "grad_norm": 0.8355669258713094, "learning_rate": 6.772477119206644e-06, "loss": 0.6284, "step": 33510 }, { "epoch": 0.608746186255993, "grad_norm": 0.8481072131314458, "learning_rate": 6.767023056539348e-06, "loss": 0.6501, "step": 33520 }, { "epoch": 0.608927793113468, "grad_norm": 0.8291338002563479, "learning_rate": 6.761570067619574e-06, "loss": 0.6367, "step": 33530 }, { "epoch": 0.6091093999709429, "grad_norm": 0.8640895434060857, "learning_rate": 6.756118154258387e-06, "loss": 0.646, "step": 33540 }, { "epoch": 0.6092910068284179, "grad_norm": 0.8091028885310068, "learning_rate": 6.750667318266495e-06, "loss": 0.6583, "step": 33550 }, { "epoch": 0.6094726136858928, "grad_norm": 0.8642949105614474, "learning_rate": 6.745217561454253e-06, "loss": 0.6505, "step": 33560 }, { "epoch": 0.6096542205433677, "grad_norm": 0.8348546352660396, "learning_rate": 6.739768885631649e-06, "loss": 0.641, "step": 33570 }, { "epoch": 0.6098358274008426, "grad_norm": 0.855548420327058, "learning_rate": 6.734321292608313e-06, "loss": 0.6403, "step": 33580 }, { "epoch": 0.6100174342583176, "grad_norm": 0.8291374282983759, "learning_rate": 6.728874784193527e-06, "loss": 0.6349, "step": 33590 }, { "epoch": 0.6101990411157925, "grad_norm": 0.8430692235067269, "learning_rate": 6.723429362196194e-06, "loss": 0.6221, "step": 33600 }, { "epoch": 0.6103806479732675, "grad_norm": 0.8809409802098012, "learning_rate": 6.717985028424876e-06, "loss": 0.6433, "step": 33610 }, { "epoch": 0.6105622548307424, "grad_norm": 0.8703125353296013, "learning_rate": 6.712541784687757e-06, "loss": 0.642, "step": 33620 }, { "epoch": 0.6107438616882174, "grad_norm": 0.8141472303178264, "learning_rate": 6.707099632792673e-06, "loss": 0.6466, "step": 33630 }, { "epoch": 0.6109254685456923, "grad_norm": 0.8318580105909353, "learning_rate": 6.701658574547082e-06, "loss": 0.6358, "step": 33640 }, { "epoch": 0.6111070754031672, "grad_norm": 0.8115009170341848, "learning_rate": 6.696218611758096e-06, "loss": 0.6361, "step": 33650 }, { "epoch": 0.6112886822606421, "grad_norm": 0.8420347782655407, "learning_rate": 6.690779746232446e-06, "loss": 0.643, "step": 33660 }, { "epoch": 0.6114702891181171, "grad_norm": 0.86930908627084, "learning_rate": 6.6853419797765125e-06, "loss": 0.63, "step": 33670 }, { "epoch": 0.611651895975592, "grad_norm": 0.7951243775959331, "learning_rate": 6.679905314196305e-06, "loss": 0.6349, "step": 33680 }, { "epoch": 0.6118335028330669, "grad_norm": 0.8353740846536764, "learning_rate": 6.674469751297463e-06, "loss": 0.64, "step": 33690 }, { "epoch": 0.612015109690542, "grad_norm": 0.8653363507336734, "learning_rate": 6.669035292885269e-06, "loss": 0.6409, "step": 33700 }, { "epoch": 0.6121967165480169, "grad_norm": 0.8116702898608705, "learning_rate": 6.663601940764634e-06, "loss": 0.6296, "step": 33710 }, { "epoch": 0.6123783234054918, "grad_norm": 0.8541740421109237, "learning_rate": 6.6581696967401e-06, "loss": 0.6311, "step": 33720 }, { "epoch": 0.6125599302629667, "grad_norm": 0.829067987015382, "learning_rate": 6.6527385626158435e-06, "loss": 0.6318, "step": 33730 }, { "epoch": 0.6127415371204417, "grad_norm": 0.8691999288412597, "learning_rate": 6.647308540195671e-06, "loss": 0.6321, "step": 33740 }, { "epoch": 0.6129231439779166, "grad_norm": 0.8352960430996074, "learning_rate": 6.641879631283021e-06, "loss": 0.6374, "step": 33750 }, { "epoch": 0.6131047508353915, "grad_norm": 0.8451395658537253, "learning_rate": 6.636451837680965e-06, "loss": 0.6418, "step": 33760 }, { "epoch": 0.6132863576928664, "grad_norm": 0.8213105281121196, "learning_rate": 6.6310251611921924e-06, "loss": 0.643, "step": 33770 }, { "epoch": 0.6134679645503415, "grad_norm": 0.8496464387479965, "learning_rate": 6.62559960361904e-06, "loss": 0.6442, "step": 33780 }, { "epoch": 0.6136495714078164, "grad_norm": 0.8491649198729488, "learning_rate": 6.620175166763456e-06, "loss": 0.6373, "step": 33790 }, { "epoch": 0.6138311782652913, "grad_norm": 0.856475496429166, "learning_rate": 6.6147518524270215e-06, "loss": 0.6417, "step": 33800 }, { "epoch": 0.6140127851227662, "grad_norm": 0.8298632644630121, "learning_rate": 6.609329662410952e-06, "loss": 0.6239, "step": 33810 }, { "epoch": 0.6141943919802412, "grad_norm": 0.8516657213142762, "learning_rate": 6.60390859851608e-06, "loss": 0.6263, "step": 33820 }, { "epoch": 0.6143759988377161, "grad_norm": 0.8157670581962143, "learning_rate": 6.598488662542872e-06, "loss": 0.6344, "step": 33830 }, { "epoch": 0.614557605695191, "grad_norm": 0.8763632343001533, "learning_rate": 6.593069856291411e-06, "loss": 0.6466, "step": 33840 }, { "epoch": 0.614739212552666, "grad_norm": 0.8899512086508639, "learning_rate": 6.587652181561416e-06, "loss": 0.6359, "step": 33850 }, { "epoch": 0.6149208194101409, "grad_norm": 0.8606393074698573, "learning_rate": 6.582235640152217e-06, "loss": 0.6417, "step": 33860 }, { "epoch": 0.6151024262676159, "grad_norm": 0.8382232904913869, "learning_rate": 6.5768202338627804e-06, "loss": 0.6437, "step": 33870 }, { "epoch": 0.6152840331250908, "grad_norm": 0.8525064843905451, "learning_rate": 6.571405964491686e-06, "loss": 0.6436, "step": 33880 }, { "epoch": 0.6154656399825658, "grad_norm": 0.8321695231012516, "learning_rate": 6.565992833837141e-06, "loss": 0.6465, "step": 33890 }, { "epoch": 0.6156472468400407, "grad_norm": 0.8328037098401045, "learning_rate": 6.560580843696973e-06, "loss": 0.6516, "step": 33900 }, { "epoch": 0.6158288536975156, "grad_norm": 0.8352222332872486, "learning_rate": 6.555169995868632e-06, "loss": 0.639, "step": 33910 }, { "epoch": 0.6160104605549905, "grad_norm": 0.8319118949580617, "learning_rate": 6.549760292149185e-06, "loss": 0.6422, "step": 33920 }, { "epoch": 0.6161920674124655, "grad_norm": 0.8299979773494842, "learning_rate": 6.544351734335326e-06, "loss": 0.6206, "step": 33930 }, { "epoch": 0.6163736742699404, "grad_norm": 0.8204577496642022, "learning_rate": 6.538944324223357e-06, "loss": 0.6284, "step": 33940 }, { "epoch": 0.6165552811274154, "grad_norm": 0.8209957788513027, "learning_rate": 6.533538063609211e-06, "loss": 0.6307, "step": 33950 }, { "epoch": 0.6167368879848903, "grad_norm": 0.841996150544287, "learning_rate": 6.528132954288438e-06, "loss": 0.6323, "step": 33960 }, { "epoch": 0.6169184948423653, "grad_norm": 0.8300376508425525, "learning_rate": 6.522728998056189e-06, "loss": 0.6376, "step": 33970 }, { "epoch": 0.6171001016998402, "grad_norm": 0.8266746586052875, "learning_rate": 6.517326196707259e-06, "loss": 0.6503, "step": 33980 }, { "epoch": 0.6172817085573151, "grad_norm": 0.8939156222835136, "learning_rate": 6.511924552036038e-06, "loss": 0.6379, "step": 33990 }, { "epoch": 0.61746331541479, "grad_norm": 0.8431182221588492, "learning_rate": 6.5065240658365355e-06, "loss": 0.6448, "step": 34000 }, { "epoch": 0.617644922272265, "grad_norm": 0.8496097501793516, "learning_rate": 6.501124739902388e-06, "loss": 0.6353, "step": 34010 }, { "epoch": 0.6178265291297399, "grad_norm": 0.8463537498107555, "learning_rate": 6.49572657602683e-06, "loss": 0.6348, "step": 34020 }, { "epoch": 0.6180081359872148, "grad_norm": 0.8893879276631854, "learning_rate": 6.490329576002725e-06, "loss": 0.6392, "step": 34030 }, { "epoch": 0.6181897428446899, "grad_norm": 0.8299933411751962, "learning_rate": 6.4849337416225386e-06, "loss": 0.6212, "step": 34040 }, { "epoch": 0.6183713497021648, "grad_norm": 0.8431936794780852, "learning_rate": 6.47953907467836e-06, "loss": 0.621, "step": 34050 }, { "epoch": 0.6185529565596397, "grad_norm": 0.8393777696781786, "learning_rate": 6.474145576961877e-06, "loss": 0.6279, "step": 34060 }, { "epoch": 0.6187345634171146, "grad_norm": 0.8989715390248865, "learning_rate": 6.468753250264406e-06, "loss": 0.6415, "step": 34070 }, { "epoch": 0.6189161702745896, "grad_norm": 0.8156357351622093, "learning_rate": 6.463362096376855e-06, "loss": 0.6213, "step": 34080 }, { "epoch": 0.6190977771320645, "grad_norm": 0.8657161892155076, "learning_rate": 6.45797211708976e-06, "loss": 0.6328, "step": 34090 }, { "epoch": 0.6192793839895394, "grad_norm": 0.843752851547774, "learning_rate": 6.452583314193257e-06, "loss": 0.6487, "step": 34100 }, { "epoch": 0.6194609908470143, "grad_norm": 0.8524816272278599, "learning_rate": 6.4471956894770945e-06, "loss": 0.6415, "step": 34110 }, { "epoch": 0.6196425977044894, "grad_norm": 0.8612632803461426, "learning_rate": 6.441809244730629e-06, "loss": 0.6455, "step": 34120 }, { "epoch": 0.6198242045619643, "grad_norm": 0.8356223142815454, "learning_rate": 6.436423981742825e-06, "loss": 0.6294, "step": 34130 }, { "epoch": 0.6200058114194392, "grad_norm": 0.8356858640311825, "learning_rate": 6.4310399023022544e-06, "loss": 0.6242, "step": 34140 }, { "epoch": 0.6201874182769141, "grad_norm": 0.8501644161302805, "learning_rate": 6.425657008197096e-06, "loss": 0.6464, "step": 34150 }, { "epoch": 0.6203690251343891, "grad_norm": 0.8621125077876629, "learning_rate": 6.42027530121514e-06, "loss": 0.6405, "step": 34160 }, { "epoch": 0.620550631991864, "grad_norm": 0.8306477670003001, "learning_rate": 6.414894783143768e-06, "loss": 0.631, "step": 34170 }, { "epoch": 0.6207322388493389, "grad_norm": 0.8748771973055244, "learning_rate": 6.409515455769988e-06, "loss": 0.657, "step": 34180 }, { "epoch": 0.6209138457068138, "grad_norm": 0.8509377686224295, "learning_rate": 6.404137320880395e-06, "loss": 0.6254, "step": 34190 }, { "epoch": 0.6210954525642888, "grad_norm": 0.8299068628770707, "learning_rate": 6.398760380261189e-06, "loss": 0.627, "step": 34200 }, { "epoch": 0.6212770594217638, "grad_norm": 0.8420763184816369, "learning_rate": 6.3933846356981855e-06, "loss": 0.6329, "step": 34210 }, { "epoch": 0.6214586662792387, "grad_norm": 0.8494897176065576, "learning_rate": 6.388010088976791e-06, "loss": 0.6406, "step": 34220 }, { "epoch": 0.6216402731367137, "grad_norm": 0.8585904166569371, "learning_rate": 6.382636741882025e-06, "loss": 0.6393, "step": 34230 }, { "epoch": 0.6218218799941886, "grad_norm": 0.8297265123709707, "learning_rate": 6.377264596198491e-06, "loss": 0.6434, "step": 34240 }, { "epoch": 0.6220034868516635, "grad_norm": 0.8207138916519541, "learning_rate": 6.371893653710417e-06, "loss": 0.6221, "step": 34250 }, { "epoch": 0.6221850937091384, "grad_norm": 0.8778953707437266, "learning_rate": 6.36652391620161e-06, "loss": 0.6282, "step": 34260 }, { "epoch": 0.6223667005666134, "grad_norm": 0.879636726256241, "learning_rate": 6.361155385455493e-06, "loss": 0.6308, "step": 34270 }, { "epoch": 0.6225483074240883, "grad_norm": 0.8627635151106325, "learning_rate": 6.355788063255075e-06, "loss": 0.634, "step": 34280 }, { "epoch": 0.6227299142815633, "grad_norm": 0.864083823013836, "learning_rate": 6.3504219513829715e-06, "loss": 0.6367, "step": 34290 }, { "epoch": 0.6229115211390382, "grad_norm": 0.8613035114699773, "learning_rate": 6.345057051621395e-06, "loss": 0.6358, "step": 34300 }, { "epoch": 0.6230931279965132, "grad_norm": 0.8906699937916642, "learning_rate": 6.339693365752154e-06, "loss": 0.629, "step": 34310 }, { "epoch": 0.6232747348539881, "grad_norm": 0.834646960196325, "learning_rate": 6.334330895556655e-06, "loss": 0.6449, "step": 34320 }, { "epoch": 0.623456341711463, "grad_norm": 0.8381950896477708, "learning_rate": 6.328969642815901e-06, "loss": 0.6245, "step": 34330 }, { "epoch": 0.623637948568938, "grad_norm": 0.855427202516877, "learning_rate": 6.323609609310488e-06, "loss": 0.6272, "step": 34340 }, { "epoch": 0.6238195554264129, "grad_norm": 0.83640132536848, "learning_rate": 6.318250796820607e-06, "loss": 0.627, "step": 34350 }, { "epoch": 0.6240011622838878, "grad_norm": 0.8424052888192066, "learning_rate": 6.312893207126054e-06, "loss": 0.6366, "step": 34360 }, { "epoch": 0.6241827691413627, "grad_norm": 0.8616628996168221, "learning_rate": 6.307536842006199e-06, "loss": 0.6311, "step": 34370 }, { "epoch": 0.6243643759988378, "grad_norm": 0.8497505835223979, "learning_rate": 6.302181703240027e-06, "loss": 0.6244, "step": 34380 }, { "epoch": 0.6245459828563127, "grad_norm": 0.8541689990325727, "learning_rate": 6.296827792606099e-06, "loss": 0.6375, "step": 34390 }, { "epoch": 0.6247275897137876, "grad_norm": 0.8326362767181271, "learning_rate": 6.291475111882573e-06, "loss": 0.6341, "step": 34400 }, { "epoch": 0.6249091965712625, "grad_norm": 0.8720061421575653, "learning_rate": 6.286123662847205e-06, "loss": 0.6369, "step": 34410 }, { "epoch": 0.6250908034287375, "grad_norm": 0.8524261138074941, "learning_rate": 6.280773447277333e-06, "loss": 0.6197, "step": 34420 }, { "epoch": 0.6252724102862124, "grad_norm": 0.829341939926407, "learning_rate": 6.275424466949893e-06, "loss": 0.6315, "step": 34430 }, { "epoch": 0.6254540171436873, "grad_norm": 0.877880836371955, "learning_rate": 6.2700767236414e-06, "loss": 0.6331, "step": 34440 }, { "epoch": 0.6256356240011622, "grad_norm": 0.8509347889458523, "learning_rate": 6.264730219127975e-06, "loss": 0.6428, "step": 34450 }, { "epoch": 0.6258172308586373, "grad_norm": 0.817542154847931, "learning_rate": 6.259384955185308e-06, "loss": 0.629, "step": 34460 }, { "epoch": 0.6259988377161122, "grad_norm": 0.8227302620865092, "learning_rate": 6.2540409335886946e-06, "loss": 0.6261, "step": 34470 }, { "epoch": 0.6261804445735871, "grad_norm": 0.8304825021017749, "learning_rate": 6.248698156113002e-06, "loss": 0.6354, "step": 34480 }, { "epoch": 0.626362051431062, "grad_norm": 0.8530100688040283, "learning_rate": 6.243356624532699e-06, "loss": 0.6285, "step": 34490 }, { "epoch": 0.626543658288537, "grad_norm": 0.8547373117256475, "learning_rate": 6.238016340621829e-06, "loss": 0.6481, "step": 34500 }, { "epoch": 0.6267252651460119, "grad_norm": 0.8530834331618815, "learning_rate": 6.232677306154027e-06, "loss": 0.6339, "step": 34510 }, { "epoch": 0.6269068720034868, "grad_norm": 0.842430647842547, "learning_rate": 6.227339522902512e-06, "loss": 0.6265, "step": 34520 }, { "epoch": 0.6270884788609618, "grad_norm": 0.8174437586686627, "learning_rate": 6.222002992640088e-06, "loss": 0.6374, "step": 34530 }, { "epoch": 0.6272700857184367, "grad_norm": 0.8615953544704803, "learning_rate": 6.21666771713914e-06, "loss": 0.6362, "step": 34540 }, { "epoch": 0.6274516925759117, "grad_norm": 0.844807426749733, "learning_rate": 6.211333698171638e-06, "loss": 0.636, "step": 34550 }, { "epoch": 0.6276332994333866, "grad_norm": 0.8312346866336888, "learning_rate": 6.206000937509138e-06, "loss": 0.6275, "step": 34560 }, { "epoch": 0.6278149062908616, "grad_norm": 0.8686596256893305, "learning_rate": 6.200669436922771e-06, "loss": 0.6365, "step": 34570 }, { "epoch": 0.6279965131483365, "grad_norm": 0.8233908350022703, "learning_rate": 6.195339198183259e-06, "loss": 0.6298, "step": 34580 }, { "epoch": 0.6281781200058114, "grad_norm": 0.8655817478748, "learning_rate": 6.190010223060896e-06, "loss": 0.6438, "step": 34590 }, { "epoch": 0.6283597268632863, "grad_norm": 0.8284035537967255, "learning_rate": 6.184682513325555e-06, "loss": 0.6408, "step": 34600 }, { "epoch": 0.6285413337207613, "grad_norm": 0.792683305815567, "learning_rate": 6.1793560707467025e-06, "loss": 0.6286, "step": 34610 }, { "epoch": 0.6287229405782362, "grad_norm": 0.827366812290653, "learning_rate": 6.174030897093366e-06, "loss": 0.6288, "step": 34620 }, { "epoch": 0.6289045474357112, "grad_norm": 0.8506026955646177, "learning_rate": 6.1687069941341705e-06, "loss": 0.6247, "step": 34630 }, { "epoch": 0.6290861542931862, "grad_norm": 0.8629340907840987, "learning_rate": 6.163384363637299e-06, "loss": 0.6302, "step": 34640 }, { "epoch": 0.6292677611506611, "grad_norm": 0.8703096549869752, "learning_rate": 6.158063007370532e-06, "loss": 0.6403, "step": 34650 }, { "epoch": 0.629449368008136, "grad_norm": 0.8677051526761557, "learning_rate": 6.152742927101206e-06, "loss": 0.6277, "step": 34660 }, { "epoch": 0.6296309748656109, "grad_norm": 0.9201042875595755, "learning_rate": 6.147424124596255e-06, "loss": 0.6363, "step": 34670 }, { "epoch": 0.6298125817230859, "grad_norm": 0.8162963638207492, "learning_rate": 6.142106601622171e-06, "loss": 0.6207, "step": 34680 }, { "epoch": 0.6299941885805608, "grad_norm": 0.8241661301731603, "learning_rate": 6.136790359945032e-06, "loss": 0.6286, "step": 34690 }, { "epoch": 0.6301757954380357, "grad_norm": 0.842082934291916, "learning_rate": 6.131475401330485e-06, "loss": 0.638, "step": 34700 }, { "epoch": 0.6303574022955106, "grad_norm": 0.859726299326323, "learning_rate": 6.126161727543752e-06, "loss": 0.6362, "step": 34710 }, { "epoch": 0.6305390091529857, "grad_norm": 0.833106075976795, "learning_rate": 6.120849340349629e-06, "loss": 0.6321, "step": 34720 }, { "epoch": 0.6307206160104606, "grad_norm": 0.844111752988075, "learning_rate": 6.115538241512484e-06, "loss": 0.637, "step": 34730 }, { "epoch": 0.6309022228679355, "grad_norm": 0.8218685897576327, "learning_rate": 6.110228432796261e-06, "loss": 0.6292, "step": 34740 }, { "epoch": 0.6310838297254104, "grad_norm": 0.8338272442989004, "learning_rate": 6.1049199159644666e-06, "loss": 0.6253, "step": 34750 }, { "epoch": 0.6312654365828854, "grad_norm": 0.823907215780491, "learning_rate": 6.09961269278019e-06, "loss": 0.6243, "step": 34760 }, { "epoch": 0.6314470434403603, "grad_norm": 0.8643014775076046, "learning_rate": 6.094306765006079e-06, "loss": 0.6404, "step": 34770 }, { "epoch": 0.6316286502978352, "grad_norm": 0.8356179155308545, "learning_rate": 6.0890021344043625e-06, "loss": 0.6314, "step": 34780 }, { "epoch": 0.6318102571553101, "grad_norm": 0.8339083730028802, "learning_rate": 6.08369880273683e-06, "loss": 0.6369, "step": 34790 }, { "epoch": 0.6319918640127852, "grad_norm": 0.8376407794618402, "learning_rate": 6.078396771764837e-06, "loss": 0.6328, "step": 34800 }, { "epoch": 0.6321734708702601, "grad_norm": 0.8658661788080337, "learning_rate": 6.073096043249322e-06, "loss": 0.6275, "step": 34810 }, { "epoch": 0.632355077727735, "grad_norm": 0.8626092721696791, "learning_rate": 6.067796618950773e-06, "loss": 0.6344, "step": 34820 }, { "epoch": 0.63253668458521, "grad_norm": 0.8647514505892787, "learning_rate": 6.06249850062926e-06, "loss": 0.6282, "step": 34830 }, { "epoch": 0.6327182914426849, "grad_norm": 0.824547587145925, "learning_rate": 6.057201690044407e-06, "loss": 0.6294, "step": 34840 }, { "epoch": 0.6328998983001598, "grad_norm": 0.84023741990415, "learning_rate": 6.051906188955415e-06, "loss": 0.6168, "step": 34850 }, { "epoch": 0.6330815051576347, "grad_norm": 0.8920559878850616, "learning_rate": 6.046611999121035e-06, "loss": 0.6366, "step": 34860 }, { "epoch": 0.6332631120151097, "grad_norm": 0.8880706515245267, "learning_rate": 6.041319122299603e-06, "loss": 0.6375, "step": 34870 }, { "epoch": 0.6334447188725846, "grad_norm": 0.850110598009395, "learning_rate": 6.036027560248998e-06, "loss": 0.6181, "step": 34880 }, { "epoch": 0.6336263257300596, "grad_norm": 0.8262406785680577, "learning_rate": 6.030737314726678e-06, "loss": 0.6225, "step": 34890 }, { "epoch": 0.6338079325875345, "grad_norm": 0.8233587233958485, "learning_rate": 6.025448387489654e-06, "loss": 0.6262, "step": 34900 }, { "epoch": 0.6339895394450095, "grad_norm": 0.8705508058134749, "learning_rate": 6.020160780294506e-06, "loss": 0.6321, "step": 34910 }, { "epoch": 0.6341711463024844, "grad_norm": 0.8529241004034533, "learning_rate": 6.014874494897369e-06, "loss": 0.6304, "step": 34920 }, { "epoch": 0.6343527531599593, "grad_norm": 0.9830503042457697, "learning_rate": 6.009589533053947e-06, "loss": 0.625, "step": 34930 }, { "epoch": 0.6345343600174342, "grad_norm": 0.861032335316042, "learning_rate": 6.004305896519496e-06, "loss": 0.6247, "step": 34940 }, { "epoch": 0.6347159668749092, "grad_norm": 0.8843052351790738, "learning_rate": 5.999023587048835e-06, "loss": 0.6454, "step": 34950 }, { "epoch": 0.6348975737323841, "grad_norm": 0.8943567766054826, "learning_rate": 5.993742606396349e-06, "loss": 0.6398, "step": 34960 }, { "epoch": 0.6350791805898591, "grad_norm": 0.8215293243041639, "learning_rate": 5.988462956315967e-06, "loss": 0.6363, "step": 34970 }, { "epoch": 0.635260787447334, "grad_norm": 0.8487732391988704, "learning_rate": 5.983184638561193e-06, "loss": 0.64, "step": 34980 }, { "epoch": 0.635442394304809, "grad_norm": 0.8534894609545938, "learning_rate": 5.9779076548850774e-06, "loss": 0.6478, "step": 34990 }, { "epoch": 0.6356240011622839, "grad_norm": 0.8405409333949846, "learning_rate": 5.9726320070402255e-06, "loss": 0.6291, "step": 35000 }, { "epoch": 0.6358056080197588, "grad_norm": 0.8754363427409508, "learning_rate": 5.967357696778811e-06, "loss": 0.6332, "step": 35010 }, { "epoch": 0.6359872148772338, "grad_norm": 0.8111500655467236, "learning_rate": 5.962084725852549e-06, "loss": 0.6167, "step": 35020 }, { "epoch": 0.6361688217347087, "grad_norm": 0.83858951701705, "learning_rate": 5.956813096012725e-06, "loss": 0.6397, "step": 35030 }, { "epoch": 0.6363504285921836, "grad_norm": 0.8187292137503415, "learning_rate": 5.951542809010162e-06, "loss": 0.6296, "step": 35040 }, { "epoch": 0.6365320354496585, "grad_norm": 0.8305096847498739, "learning_rate": 5.946273866595256e-06, "loss": 0.6232, "step": 35050 }, { "epoch": 0.6367136423071336, "grad_norm": 0.7971323019169524, "learning_rate": 5.941006270517935e-06, "loss": 0.6232, "step": 35060 }, { "epoch": 0.6368952491646085, "grad_norm": 0.8474077079463859, "learning_rate": 5.935740022527703e-06, "loss": 0.6291, "step": 35070 }, { "epoch": 0.6370768560220834, "grad_norm": 0.8720330250227659, "learning_rate": 5.930475124373597e-06, "loss": 0.6288, "step": 35080 }, { "epoch": 0.6372584628795583, "grad_norm": 0.8200936005350359, "learning_rate": 5.925211577804215e-06, "loss": 0.6194, "step": 35090 }, { "epoch": 0.6374400697370333, "grad_norm": 0.8655898517859035, "learning_rate": 5.919949384567705e-06, "loss": 0.6267, "step": 35100 }, { "epoch": 0.6376216765945082, "grad_norm": 0.8446403026153048, "learning_rate": 5.914688546411764e-06, "loss": 0.6295, "step": 35110 }, { "epoch": 0.6378032834519831, "grad_norm": 0.8821989192021211, "learning_rate": 5.909429065083641e-06, "loss": 0.6364, "step": 35120 }, { "epoch": 0.637984890309458, "grad_norm": 0.8204239189171727, "learning_rate": 5.904170942330131e-06, "loss": 0.6345, "step": 35130 }, { "epoch": 0.6381664971669331, "grad_norm": 0.8251490145261494, "learning_rate": 5.898914179897582e-06, "loss": 0.6216, "step": 35140 }, { "epoch": 0.638348104024408, "grad_norm": 0.8417195807280092, "learning_rate": 5.8936587795318855e-06, "loss": 0.6339, "step": 35150 }, { "epoch": 0.6385297108818829, "grad_norm": 0.8646475291514586, "learning_rate": 5.88840474297849e-06, "loss": 0.6325, "step": 35160 }, { "epoch": 0.6387113177393579, "grad_norm": 0.8480554285615924, "learning_rate": 5.883152071982375e-06, "loss": 0.639, "step": 35170 }, { "epoch": 0.6388929245968328, "grad_norm": 0.8259490759055063, "learning_rate": 5.877900768288085e-06, "loss": 0.6236, "step": 35180 }, { "epoch": 0.6390745314543077, "grad_norm": 0.815814198099649, "learning_rate": 5.872650833639697e-06, "loss": 0.618, "step": 35190 }, { "epoch": 0.6392561383117826, "grad_norm": 0.8057886438227367, "learning_rate": 5.867402269780834e-06, "loss": 0.6374, "step": 35200 }, { "epoch": 0.6394377451692576, "grad_norm": 0.8176243331524018, "learning_rate": 5.862155078454674e-06, "loss": 0.6119, "step": 35210 }, { "epoch": 0.6396193520267325, "grad_norm": 0.8338766928997359, "learning_rate": 5.856909261403925e-06, "loss": 0.6273, "step": 35220 }, { "epoch": 0.6398009588842075, "grad_norm": 0.8007414218702392, "learning_rate": 5.851664820370854e-06, "loss": 0.6365, "step": 35230 }, { "epoch": 0.6399825657416824, "grad_norm": 0.848010579934585, "learning_rate": 5.8464217570972534e-06, "loss": 0.6328, "step": 35240 }, { "epoch": 0.6401641725991574, "grad_norm": 0.8291063787622766, "learning_rate": 5.84118007332448e-06, "loss": 0.6452, "step": 35250 }, { "epoch": 0.6403457794566323, "grad_norm": 0.8281017150809096, "learning_rate": 5.835939770793406e-06, "loss": 0.6334, "step": 35260 }, { "epoch": 0.6405273863141072, "grad_norm": 0.9140306987031869, "learning_rate": 5.830700851244473e-06, "loss": 0.6372, "step": 35270 }, { "epoch": 0.6407089931715821, "grad_norm": 0.8667852237265992, "learning_rate": 5.825463316417639e-06, "loss": 0.6204, "step": 35280 }, { "epoch": 0.6408906000290571, "grad_norm": 0.872915878605813, "learning_rate": 5.820227168052414e-06, "loss": 0.6203, "step": 35290 }, { "epoch": 0.641072206886532, "grad_norm": 0.8077856391945194, "learning_rate": 5.814992407887849e-06, "loss": 0.6255, "step": 35300 }, { "epoch": 0.641253813744007, "grad_norm": 0.881959142774884, "learning_rate": 5.809759037662526e-06, "loss": 0.6394, "step": 35310 }, { "epoch": 0.641435420601482, "grad_norm": 0.8507284581664982, "learning_rate": 5.8045270591145765e-06, "loss": 0.6307, "step": 35320 }, { "epoch": 0.6416170274589569, "grad_norm": 0.8460639178074236, "learning_rate": 5.799296473981656e-06, "loss": 0.6347, "step": 35330 }, { "epoch": 0.6417986343164318, "grad_norm": 0.8227785477624506, "learning_rate": 5.794067284000973e-06, "loss": 0.6274, "step": 35340 }, { "epoch": 0.6419802411739067, "grad_norm": 0.8466164743910565, "learning_rate": 5.788839490909253e-06, "loss": 0.6197, "step": 35350 }, { "epoch": 0.6421618480313817, "grad_norm": 0.831746990070178, "learning_rate": 5.7836130964427815e-06, "loss": 0.6416, "step": 35360 }, { "epoch": 0.6423434548888566, "grad_norm": 0.8414471190790097, "learning_rate": 5.778388102337355e-06, "loss": 0.6333, "step": 35370 }, { "epoch": 0.6425250617463315, "grad_norm": 0.83881231539793, "learning_rate": 5.773164510328329e-06, "loss": 0.6228, "step": 35380 }, { "epoch": 0.6427066686038064, "grad_norm": 0.8638053916519827, "learning_rate": 5.767942322150568e-06, "loss": 0.6395, "step": 35390 }, { "epoch": 0.6428882754612815, "grad_norm": 0.8193961337897555, "learning_rate": 5.762721539538494e-06, "loss": 0.6235, "step": 35400 }, { "epoch": 0.6430698823187564, "grad_norm": 0.8126426811168416, "learning_rate": 5.757502164226043e-06, "loss": 0.6386, "step": 35410 }, { "epoch": 0.6432514891762313, "grad_norm": 1.3111680777389734, "learning_rate": 5.7522841979467e-06, "loss": 0.6324, "step": 35420 }, { "epoch": 0.6434330960337062, "grad_norm": 0.8468569479960365, "learning_rate": 5.747067642433467e-06, "loss": 0.6307, "step": 35430 }, { "epoch": 0.6436147028911812, "grad_norm": 0.8478089832205361, "learning_rate": 5.741852499418887e-06, "loss": 0.6356, "step": 35440 }, { "epoch": 0.6437963097486561, "grad_norm": 0.864554457702198, "learning_rate": 5.736638770635036e-06, "loss": 0.6266, "step": 35450 }, { "epoch": 0.643977916606131, "grad_norm": 0.8394937888655274, "learning_rate": 5.731426457813507e-06, "loss": 0.6367, "step": 35460 }, { "epoch": 0.6441595234636059, "grad_norm": 0.8803326565837121, "learning_rate": 5.726215562685441e-06, "loss": 0.6244, "step": 35470 }, { "epoch": 0.644341130321081, "grad_norm": 0.8537665455893733, "learning_rate": 5.7210060869814895e-06, "loss": 0.6273, "step": 35480 }, { "epoch": 0.6445227371785559, "grad_norm": 0.8248262769817135, "learning_rate": 5.71579803243185e-06, "loss": 0.6304, "step": 35490 }, { "epoch": 0.6447043440360308, "grad_norm": 0.8119190009165099, "learning_rate": 5.7105914007662355e-06, "loss": 0.636, "step": 35500 }, { "epoch": 0.6448859508935058, "grad_norm": 0.8235716473383909, "learning_rate": 5.705386193713887e-06, "loss": 0.6154, "step": 35510 }, { "epoch": 0.6450675577509807, "grad_norm": 0.8577211591861067, "learning_rate": 5.700182413003582e-06, "loss": 0.6214, "step": 35520 }, { "epoch": 0.6452491646084556, "grad_norm": 0.8256315636084487, "learning_rate": 5.694980060363613e-06, "loss": 0.6153, "step": 35530 }, { "epoch": 0.6454307714659305, "grad_norm": 0.8441001702237049, "learning_rate": 5.689779137521809e-06, "loss": 0.6228, "step": 35540 }, { "epoch": 0.6456123783234055, "grad_norm": 0.8500079309083322, "learning_rate": 5.684579646205513e-06, "loss": 0.6328, "step": 35550 }, { "epoch": 0.6457939851808804, "grad_norm": 0.8335033443497782, "learning_rate": 5.6793815881416035e-06, "loss": 0.6231, "step": 35560 }, { "epoch": 0.6459755920383554, "grad_norm": 0.8133607593258604, "learning_rate": 5.674184965056473e-06, "loss": 0.6349, "step": 35570 }, { "epoch": 0.6461571988958303, "grad_norm": 0.852335301862707, "learning_rate": 5.668989778676046e-06, "loss": 0.6237, "step": 35580 }, { "epoch": 0.6463388057533053, "grad_norm": 0.8471352571598513, "learning_rate": 5.663796030725763e-06, "loss": 0.6332, "step": 35590 }, { "epoch": 0.6465204126107802, "grad_norm": 0.8024472925590392, "learning_rate": 5.6586037229305894e-06, "loss": 0.6364, "step": 35600 }, { "epoch": 0.6467020194682551, "grad_norm": 0.8509969529820504, "learning_rate": 5.653412857015015e-06, "loss": 0.6314, "step": 35610 }, { "epoch": 0.64688362632573, "grad_norm": 0.8471060191103645, "learning_rate": 5.648223434703042e-06, "loss": 0.6194, "step": 35620 }, { "epoch": 0.647065233183205, "grad_norm": 0.8690643406489617, "learning_rate": 5.643035457718209e-06, "loss": 0.6247, "step": 35630 }, { "epoch": 0.6472468400406799, "grad_norm": 0.8571384820337603, "learning_rate": 5.637848927783556e-06, "loss": 0.629, "step": 35640 }, { "epoch": 0.6474284468981548, "grad_norm": 0.8675719491324319, "learning_rate": 5.632663846621658e-06, "loss": 0.6314, "step": 35650 }, { "epoch": 0.6476100537556299, "grad_norm": 0.8477622618134735, "learning_rate": 5.6274802159545975e-06, "loss": 0.6158, "step": 35660 }, { "epoch": 0.6477916606131048, "grad_norm": 0.8510046713675231, "learning_rate": 5.622298037503984e-06, "loss": 0.638, "step": 35670 }, { "epoch": 0.6479732674705797, "grad_norm": 0.8681198259880245, "learning_rate": 5.617117312990934e-06, "loss": 0.627, "step": 35680 }, { "epoch": 0.6481548743280546, "grad_norm": 0.8598885722141305, "learning_rate": 5.611938044136096e-06, "loss": 0.6321, "step": 35690 }, { "epoch": 0.6483364811855296, "grad_norm": 0.8189728229424239, "learning_rate": 5.6067602326596235e-06, "loss": 0.6225, "step": 35700 }, { "epoch": 0.6485180880430045, "grad_norm": 0.8321991432553895, "learning_rate": 5.601583880281185e-06, "loss": 0.6301, "step": 35710 }, { "epoch": 0.6486996949004794, "grad_norm": 0.8080067176544223, "learning_rate": 5.596408988719975e-06, "loss": 0.6445, "step": 35720 }, { "epoch": 0.6488813017579543, "grad_norm": 0.8225232071836455, "learning_rate": 5.59123555969469e-06, "loss": 0.617, "step": 35730 }, { "epoch": 0.6490629086154294, "grad_norm": 0.8493228467876017, "learning_rate": 5.586063594923554e-06, "loss": 0.6209, "step": 35740 }, { "epoch": 0.6492445154729043, "grad_norm": 0.8137911695833919, "learning_rate": 5.580893096124292e-06, "loss": 0.6259, "step": 35750 }, { "epoch": 0.6494261223303792, "grad_norm": 0.895724039986987, "learning_rate": 5.575724065014154e-06, "loss": 0.6177, "step": 35760 }, { "epoch": 0.6496077291878541, "grad_norm": 0.8612696005570806, "learning_rate": 5.570556503309889e-06, "loss": 0.6173, "step": 35770 }, { "epoch": 0.6497893360453291, "grad_norm": 0.8536981033940712, "learning_rate": 5.565390412727774e-06, "loss": 0.6283, "step": 35780 }, { "epoch": 0.649970942902804, "grad_norm": 0.807444634904795, "learning_rate": 5.5602257949835805e-06, "loss": 0.637, "step": 35790 }, { "epoch": 0.6501525497602789, "grad_norm": 0.8612433408246696, "learning_rate": 5.5550626517926065e-06, "loss": 0.6276, "step": 35800 }, { "epoch": 0.6503341566177538, "grad_norm": 0.850389776843336, "learning_rate": 5.549900984869646e-06, "loss": 0.6282, "step": 35810 }, { "epoch": 0.6505157634752288, "grad_norm": 0.8574224083999128, "learning_rate": 5.544740795929018e-06, "loss": 0.6333, "step": 35820 }, { "epoch": 0.6506973703327038, "grad_norm": 0.8481962366447685, "learning_rate": 5.5395820866845355e-06, "loss": 0.6249, "step": 35830 }, { "epoch": 0.6508789771901787, "grad_norm": 0.8833109111516771, "learning_rate": 5.5344248588495275e-06, "loss": 0.6272, "step": 35840 }, { "epoch": 0.6510605840476537, "grad_norm": 0.8102893746920092, "learning_rate": 5.5292691141368375e-06, "loss": 0.6209, "step": 35850 }, { "epoch": 0.6512421909051286, "grad_norm": 0.8699578132826407, "learning_rate": 5.5241148542588e-06, "loss": 0.6316, "step": 35860 }, { "epoch": 0.6514237977626035, "grad_norm": 0.8556686573309308, "learning_rate": 5.518962080927273e-06, "loss": 0.6244, "step": 35870 }, { "epoch": 0.6516054046200784, "grad_norm": 0.8086131694948465, "learning_rate": 5.513810795853607e-06, "loss": 0.6249, "step": 35880 }, { "epoch": 0.6517870114775534, "grad_norm": 0.8167865228559116, "learning_rate": 5.5086610007486715e-06, "loss": 0.6348, "step": 35890 }, { "epoch": 0.6519686183350283, "grad_norm": 0.8561291095051868, "learning_rate": 5.503512697322831e-06, "loss": 0.6261, "step": 35900 }, { "epoch": 0.6521502251925033, "grad_norm": 0.8184704698459085, "learning_rate": 5.498365887285954e-06, "loss": 0.6225, "step": 35910 }, { "epoch": 0.6523318320499782, "grad_norm": 0.8759003056321826, "learning_rate": 5.493220572347424e-06, "loss": 0.6321, "step": 35920 }, { "epoch": 0.6525134389074532, "grad_norm": 0.8071773591351271, "learning_rate": 5.488076754216114e-06, "loss": 0.6425, "step": 35930 }, { "epoch": 0.6526950457649281, "grad_norm": 0.8804141569109465, "learning_rate": 5.482934434600414e-06, "loss": 0.6389, "step": 35940 }, { "epoch": 0.652876652622403, "grad_norm": 0.8686515374324285, "learning_rate": 5.477793615208202e-06, "loss": 0.6394, "step": 35950 }, { "epoch": 0.653058259479878, "grad_norm": 0.8299373143232284, "learning_rate": 5.472654297746871e-06, "loss": 0.6148, "step": 35960 }, { "epoch": 0.6532398663373529, "grad_norm": 0.8868792350834014, "learning_rate": 5.467516483923303e-06, "loss": 0.6262, "step": 35970 }, { "epoch": 0.6534214731948278, "grad_norm": 0.8351892447686624, "learning_rate": 5.462380175443892e-06, "loss": 0.6228, "step": 35980 }, { "epoch": 0.6536030800523027, "grad_norm": 0.8662255347759483, "learning_rate": 5.457245374014525e-06, "loss": 0.6197, "step": 35990 }, { "epoch": 0.6537846869097778, "grad_norm": 0.8278101614321478, "learning_rate": 5.452112081340586e-06, "loss": 0.6252, "step": 36000 }, { "epoch": 0.6539662937672527, "grad_norm": 0.8417476854455612, "learning_rate": 5.446980299126967e-06, "loss": 0.6287, "step": 36010 }, { "epoch": 0.6541479006247276, "grad_norm": 0.8594635509538173, "learning_rate": 5.441850029078048e-06, "loss": 0.6427, "step": 36020 }, { "epoch": 0.6543295074822025, "grad_norm": 0.8276795592769137, "learning_rate": 5.436721272897721e-06, "loss": 0.6314, "step": 36030 }, { "epoch": 0.6545111143396775, "grad_norm": 0.8477778128603561, "learning_rate": 5.431594032289356e-06, "loss": 0.6225, "step": 36040 }, { "epoch": 0.6546927211971524, "grad_norm": 0.8564671564269215, "learning_rate": 5.426468308955839e-06, "loss": 0.6286, "step": 36050 }, { "epoch": 0.6548743280546273, "grad_norm": 0.8989625947684313, "learning_rate": 5.421344104599535e-06, "loss": 0.6362, "step": 36060 }, { "epoch": 0.6550559349121022, "grad_norm": 0.8721095571416922, "learning_rate": 5.416221420922321e-06, "loss": 0.6376, "step": 36070 }, { "epoch": 0.6552375417695773, "grad_norm": 0.8561834572382635, "learning_rate": 5.41110025962555e-06, "loss": 0.6244, "step": 36080 }, { "epoch": 0.6554191486270522, "grad_norm": 0.8354244479282925, "learning_rate": 5.405980622410091e-06, "loss": 0.634, "step": 36090 }, { "epoch": 0.6556007554845271, "grad_norm": 0.8234945285881785, "learning_rate": 5.40086251097629e-06, "loss": 0.6262, "step": 36100 }, { "epoch": 0.655782362342002, "grad_norm": 0.8691384545060241, "learning_rate": 5.395745927023987e-06, "loss": 0.6242, "step": 36110 }, { "epoch": 0.655963969199477, "grad_norm": 0.8480157304801796, "learning_rate": 5.390630872252532e-06, "loss": 0.6368, "step": 36120 }, { "epoch": 0.6561455760569519, "grad_norm": 0.8681803468075506, "learning_rate": 5.385517348360741e-06, "loss": 0.6232, "step": 36130 }, { "epoch": 0.6563271829144268, "grad_norm": 0.8524558217727157, "learning_rate": 5.380405357046947e-06, "loss": 0.6154, "step": 36140 }, { "epoch": 0.6565087897719017, "grad_norm": 0.8592200066803073, "learning_rate": 5.375294900008954e-06, "loss": 0.6063, "step": 36150 }, { "epoch": 0.6566903966293767, "grad_norm": 0.8212740603491352, "learning_rate": 5.370185978944071e-06, "loss": 0.6281, "step": 36160 }, { "epoch": 0.6568720034868517, "grad_norm": 0.8366608827191069, "learning_rate": 5.365078595549083e-06, "loss": 0.6213, "step": 36170 }, { "epoch": 0.6570536103443266, "grad_norm": 0.8374209398525162, "learning_rate": 5.359972751520282e-06, "loss": 0.6336, "step": 36180 }, { "epoch": 0.6572352172018016, "grad_norm": 0.813048224334491, "learning_rate": 5.3548684485534285e-06, "loss": 0.6434, "step": 36190 }, { "epoch": 0.6574168240592765, "grad_norm": 0.8874180824778312, "learning_rate": 5.3497656883437896e-06, "loss": 0.6277, "step": 36200 }, { "epoch": 0.6575984309167514, "grad_norm": 0.8121462784750237, "learning_rate": 5.344664472586105e-06, "loss": 0.6162, "step": 36210 }, { "epoch": 0.6577800377742263, "grad_norm": 0.833822879484174, "learning_rate": 5.339564802974615e-06, "loss": 0.6093, "step": 36220 }, { "epoch": 0.6579616446317013, "grad_norm": 0.8723094372206193, "learning_rate": 5.33446668120303e-06, "loss": 0.6348, "step": 36230 }, { "epoch": 0.6581432514891762, "grad_norm": 0.8523164148111819, "learning_rate": 5.3293701089645644e-06, "loss": 0.6222, "step": 36240 }, { "epoch": 0.6583248583466512, "grad_norm": 0.8192599953893637, "learning_rate": 5.324275087951909e-06, "loss": 0.6107, "step": 36250 }, { "epoch": 0.6585064652041261, "grad_norm": 0.8431131669685173, "learning_rate": 5.319181619857234e-06, "loss": 0.636, "step": 36260 }, { "epoch": 0.6586880720616011, "grad_norm": 0.8244670817759053, "learning_rate": 5.314089706372208e-06, "loss": 0.6293, "step": 36270 }, { "epoch": 0.658869678919076, "grad_norm": 0.83344838975655, "learning_rate": 5.3089993491879655e-06, "loss": 0.6331, "step": 36280 }, { "epoch": 0.6590512857765509, "grad_norm": 0.840803546482746, "learning_rate": 5.303910549995143e-06, "loss": 0.6378, "step": 36290 }, { "epoch": 0.6592328926340258, "grad_norm": 0.8347171125226035, "learning_rate": 5.298823310483845e-06, "loss": 0.6202, "step": 36300 }, { "epoch": 0.6594144994915008, "grad_norm": 0.8506577528414565, "learning_rate": 5.29373763234366e-06, "loss": 0.6233, "step": 36310 }, { "epoch": 0.6595961063489757, "grad_norm": 0.7908077936558857, "learning_rate": 5.288653517263669e-06, "loss": 0.6252, "step": 36320 }, { "epoch": 0.6597777132064506, "grad_norm": 0.8521985033552414, "learning_rate": 5.283570966932416e-06, "loss": 0.625, "step": 36330 }, { "epoch": 0.6599593200639257, "grad_norm": 0.8233909712033758, "learning_rate": 5.278489983037946e-06, "loss": 0.613, "step": 36340 }, { "epoch": 0.6601409269214006, "grad_norm": 0.8146585554617694, "learning_rate": 5.273410567267765e-06, "loss": 0.6072, "step": 36350 }, { "epoch": 0.6603225337788755, "grad_norm": 0.8495201026284105, "learning_rate": 5.268332721308873e-06, "loss": 0.6209, "step": 36360 }, { "epoch": 0.6605041406363504, "grad_norm": 0.816903335691545, "learning_rate": 5.263256446847733e-06, "loss": 0.6351, "step": 36370 }, { "epoch": 0.6606857474938254, "grad_norm": 0.7993477020319186, "learning_rate": 5.258181745570306e-06, "loss": 0.6039, "step": 36380 }, { "epoch": 0.6608673543513003, "grad_norm": 0.813324186286961, "learning_rate": 5.253108619162011e-06, "loss": 0.6301, "step": 36390 }, { "epoch": 0.6610489612087752, "grad_norm": 0.8482919525065036, "learning_rate": 5.24803706930775e-06, "loss": 0.626, "step": 36400 }, { "epoch": 0.6612305680662501, "grad_norm": 0.849669100857397, "learning_rate": 5.242967097691913e-06, "loss": 0.6272, "step": 36410 }, { "epoch": 0.6614121749237252, "grad_norm": 0.8528030168453855, "learning_rate": 5.23789870599835e-06, "loss": 0.6066, "step": 36420 }, { "epoch": 0.6615937817812001, "grad_norm": 0.8351411017705201, "learning_rate": 5.232831895910397e-06, "loss": 0.6245, "step": 36430 }, { "epoch": 0.661775388638675, "grad_norm": 0.8181355596633217, "learning_rate": 5.227766669110854e-06, "loss": 0.6161, "step": 36440 }, { "epoch": 0.66195699549615, "grad_norm": 0.8462961283519091, "learning_rate": 5.222703027282011e-06, "loss": 0.6206, "step": 36450 }, { "epoch": 0.6621386023536249, "grad_norm": 0.8158000588618022, "learning_rate": 5.217640972105613e-06, "loss": 0.6343, "step": 36460 }, { "epoch": 0.6623202092110998, "grad_norm": 0.8550736553317647, "learning_rate": 5.212580505262895e-06, "loss": 0.638, "step": 36470 }, { "epoch": 0.6625018160685747, "grad_norm": 0.852363262797476, "learning_rate": 5.2075216284345506e-06, "loss": 0.6306, "step": 36480 }, { "epoch": 0.6626834229260496, "grad_norm": 0.8427865452215147, "learning_rate": 5.202464343300757e-06, "loss": 0.6139, "step": 36490 }, { "epoch": 0.6628650297835246, "grad_norm": 0.8890781616646009, "learning_rate": 5.1974086515411555e-06, "loss": 0.6295, "step": 36500 }, { "epoch": 0.6630466366409996, "grad_norm": 0.82450849466668, "learning_rate": 5.192354554834855e-06, "loss": 0.635, "step": 36510 }, { "epoch": 0.6632282434984745, "grad_norm": 0.8764005449039489, "learning_rate": 5.187302054860448e-06, "loss": 0.6202, "step": 36520 }, { "epoch": 0.6634098503559495, "grad_norm": 0.8170599190237178, "learning_rate": 5.182251153295981e-06, "loss": 0.6326, "step": 36530 }, { "epoch": 0.6635914572134244, "grad_norm": 0.8292119057300289, "learning_rate": 5.177201851818983e-06, "loss": 0.6224, "step": 36540 }, { "epoch": 0.6637730640708993, "grad_norm": 0.8690518647766922, "learning_rate": 5.172154152106439e-06, "loss": 0.6258, "step": 36550 }, { "epoch": 0.6639546709283742, "grad_norm": 0.8375613270722986, "learning_rate": 5.1671080558348155e-06, "loss": 0.6337, "step": 36560 }, { "epoch": 0.6641362777858492, "grad_norm": 0.8302330462057086, "learning_rate": 5.162063564680032e-06, "loss": 0.6238, "step": 36570 }, { "epoch": 0.6643178846433241, "grad_norm": 0.8246481691586105, "learning_rate": 5.157020680317491e-06, "loss": 0.6357, "step": 36580 }, { "epoch": 0.6644994915007991, "grad_norm": 0.8620058567863979, "learning_rate": 5.151979404422045e-06, "loss": 0.637, "step": 36590 }, { "epoch": 0.664681098358274, "grad_norm": 0.8352164424017058, "learning_rate": 5.146939738668027e-06, "loss": 0.6264, "step": 36600 }, { "epoch": 0.664862705215749, "grad_norm": 0.8179383604804191, "learning_rate": 5.1419016847292204e-06, "loss": 0.6277, "step": 36610 }, { "epoch": 0.6650443120732239, "grad_norm": 0.8610025599261278, "learning_rate": 5.1368652442788894e-06, "loss": 0.6334, "step": 36620 }, { "epoch": 0.6652259189306988, "grad_norm": 0.8399955758225027, "learning_rate": 5.131830418989745e-06, "loss": 0.6244, "step": 36630 }, { "epoch": 0.6654075257881737, "grad_norm": 0.8385450765389377, "learning_rate": 5.126797210533978e-06, "loss": 0.6188, "step": 36640 }, { "epoch": 0.6655891326456487, "grad_norm": 0.8085756031957406, "learning_rate": 5.1217656205832364e-06, "loss": 0.6144, "step": 36650 }, { "epoch": 0.6657707395031236, "grad_norm": 0.8761352464608829, "learning_rate": 5.116735650808622e-06, "loss": 0.6301, "step": 36660 }, { "epoch": 0.6659523463605985, "grad_norm": 0.8386942677124445, "learning_rate": 5.111707302880713e-06, "loss": 0.616, "step": 36670 }, { "epoch": 0.6661339532180736, "grad_norm": 0.8532214129477642, "learning_rate": 5.106680578469534e-06, "loss": 0.62, "step": 36680 }, { "epoch": 0.6663155600755485, "grad_norm": 0.9039980045836095, "learning_rate": 5.101655479244587e-06, "loss": 0.6274, "step": 36690 }, { "epoch": 0.6664971669330234, "grad_norm": 0.840800852769867, "learning_rate": 5.096632006874822e-06, "loss": 0.635, "step": 36700 }, { "epoch": 0.6666787737904983, "grad_norm": 0.8316311886279235, "learning_rate": 5.091610163028646e-06, "loss": 0.6178, "step": 36710 }, { "epoch": 0.6668603806479733, "grad_norm": 0.8517049204693357, "learning_rate": 5.086589949373941e-06, "loss": 0.6171, "step": 36720 }, { "epoch": 0.6670419875054482, "grad_norm": 0.8541003564258982, "learning_rate": 5.081571367578029e-06, "loss": 0.6216, "step": 36730 }, { "epoch": 0.6672235943629231, "grad_norm": 0.8440193158410917, "learning_rate": 5.0765544193077065e-06, "loss": 0.6266, "step": 36740 }, { "epoch": 0.667405201220398, "grad_norm": 0.8935392081995767, "learning_rate": 5.071539106229213e-06, "loss": 0.6268, "step": 36750 }, { "epoch": 0.6675868080778731, "grad_norm": 0.8291998504306731, "learning_rate": 5.066525430008259e-06, "loss": 0.6206, "step": 36760 }, { "epoch": 0.667768414935348, "grad_norm": 0.8484054996128726, "learning_rate": 5.0615133923099955e-06, "loss": 0.6378, "step": 36770 }, { "epoch": 0.6679500217928229, "grad_norm": 0.8571169780337689, "learning_rate": 5.056502994799046e-06, "loss": 0.6002, "step": 36780 }, { "epoch": 0.6681316286502978, "grad_norm": 0.8876147787394743, "learning_rate": 5.0514942391394765e-06, "loss": 0.6302, "step": 36790 }, { "epoch": 0.6683132355077728, "grad_norm": 0.840732778055047, "learning_rate": 5.0464871269948105e-06, "loss": 0.6266, "step": 36800 }, { "epoch": 0.6684948423652477, "grad_norm": 0.8416664152508514, "learning_rate": 5.041481660028033e-06, "loss": 0.6301, "step": 36810 }, { "epoch": 0.6686764492227226, "grad_norm": 0.926044432141928, "learning_rate": 5.036477839901572e-06, "loss": 0.6339, "step": 36820 }, { "epoch": 0.6688580560801975, "grad_norm": 0.8755481837368266, "learning_rate": 5.031475668277319e-06, "loss": 0.6289, "step": 36830 }, { "epoch": 0.6690396629376725, "grad_norm": 0.8700804802397847, "learning_rate": 5.026475146816605e-06, "loss": 0.6267, "step": 36840 }, { "epoch": 0.6692212697951475, "grad_norm": 0.8579232506517176, "learning_rate": 5.021476277180229e-06, "loss": 0.6353, "step": 36850 }, { "epoch": 0.6694028766526224, "grad_norm": 0.8367439145883359, "learning_rate": 5.016479061028425e-06, "loss": 0.6149, "step": 36860 }, { "epoch": 0.6695844835100974, "grad_norm": 0.8972620708863432, "learning_rate": 5.011483500020893e-06, "loss": 0.6325, "step": 36870 }, { "epoch": 0.6697660903675723, "grad_norm": 0.8066559913488772, "learning_rate": 5.00648959581677e-06, "loss": 0.6206, "step": 36880 }, { "epoch": 0.6699476972250472, "grad_norm": 0.8620161841420635, "learning_rate": 5.001497350074654e-06, "loss": 0.6145, "step": 36890 }, { "epoch": 0.6701293040825221, "grad_norm": 0.850413104958048, "learning_rate": 4.996506764452586e-06, "loss": 0.6202, "step": 36900 }, { "epoch": 0.6703109109399971, "grad_norm": 0.8716478775245171, "learning_rate": 4.99151784060805e-06, "loss": 0.6087, "step": 36910 }, { "epoch": 0.670492517797472, "grad_norm": 0.8440377012834562, "learning_rate": 4.986530580197995e-06, "loss": 0.6256, "step": 36920 }, { "epoch": 0.670674124654947, "grad_norm": 0.809804157750951, "learning_rate": 4.981544984878797e-06, "loss": 0.6136, "step": 36930 }, { "epoch": 0.670855731512422, "grad_norm": 0.8183264382025623, "learning_rate": 4.976561056306298e-06, "loss": 0.6121, "step": 36940 }, { "epoch": 0.6710373383698969, "grad_norm": 0.8933778809046399, "learning_rate": 4.97157879613577e-06, "loss": 0.6335, "step": 36950 }, { "epoch": 0.6712189452273718, "grad_norm": 0.8709512968592535, "learning_rate": 4.966598206021947e-06, "loss": 0.6146, "step": 36960 }, { "epoch": 0.6714005520848467, "grad_norm": 0.8548645597397233, "learning_rate": 4.961619287618992e-06, "loss": 0.6366, "step": 36970 }, { "epoch": 0.6715821589423216, "grad_norm": 0.8438348516970322, "learning_rate": 4.956642042580526e-06, "loss": 0.6234, "step": 36980 }, { "epoch": 0.6717637657997966, "grad_norm": 0.883147891732856, "learning_rate": 4.951666472559604e-06, "loss": 0.6302, "step": 36990 }, { "epoch": 0.6719453726572715, "grad_norm": 0.8482665273809107, "learning_rate": 4.946692579208736e-06, "loss": 0.6299, "step": 37000 }, { "epoch": 0.6721269795147464, "grad_norm": 0.8670511880085653, "learning_rate": 4.941720364179862e-06, "loss": 0.6265, "step": 37010 }, { "epoch": 0.6723085863722215, "grad_norm": 0.8771532353450994, "learning_rate": 4.936749829124377e-06, "loss": 0.6278, "step": 37020 }, { "epoch": 0.6724901932296964, "grad_norm": 0.8502974713138038, "learning_rate": 4.931780975693108e-06, "loss": 0.6201, "step": 37030 }, { "epoch": 0.6726718000871713, "grad_norm": 0.8058480321882535, "learning_rate": 4.926813805536329e-06, "loss": 0.6139, "step": 37040 }, { "epoch": 0.6728534069446462, "grad_norm": 0.876380084936701, "learning_rate": 4.921848320303757e-06, "loss": 0.6122, "step": 37050 }, { "epoch": 0.6730350138021212, "grad_norm": 0.8568270284768985, "learning_rate": 4.916884521644542e-06, "loss": 0.6202, "step": 37060 }, { "epoch": 0.6732166206595961, "grad_norm": 0.8534915978232402, "learning_rate": 4.911922411207281e-06, "loss": 0.6256, "step": 37070 }, { "epoch": 0.673398227517071, "grad_norm": 0.8308072443120947, "learning_rate": 4.906961990640005e-06, "loss": 0.6228, "step": 37080 }, { "epoch": 0.6735798343745459, "grad_norm": 0.8452862365871725, "learning_rate": 4.902003261590188e-06, "loss": 0.6421, "step": 37090 }, { "epoch": 0.673761441232021, "grad_norm": 0.8224065793907642, "learning_rate": 4.897046225704741e-06, "loss": 0.6259, "step": 37100 }, { "epoch": 0.6739430480894959, "grad_norm": 0.8690019795164247, "learning_rate": 4.892090884630007e-06, "loss": 0.6181, "step": 37110 }, { "epoch": 0.6741246549469708, "grad_norm": 0.8353308043763675, "learning_rate": 4.887137240011778e-06, "loss": 0.6203, "step": 37120 }, { "epoch": 0.6743062618044458, "grad_norm": 0.8577212016397382, "learning_rate": 4.882185293495267e-06, "loss": 0.628, "step": 37130 }, { "epoch": 0.6744878686619207, "grad_norm": 0.8701490573933879, "learning_rate": 4.8772350467251415e-06, "loss": 0.6174, "step": 37140 }, { "epoch": 0.6746694755193956, "grad_norm": 0.8594755115863838, "learning_rate": 4.872286501345487e-06, "loss": 0.6117, "step": 37150 }, { "epoch": 0.6748510823768705, "grad_norm": 0.8261270744842824, "learning_rate": 4.8673396589998365e-06, "loss": 0.6174, "step": 37160 }, { "epoch": 0.6750326892343455, "grad_norm": 0.8692567453701131, "learning_rate": 4.862394521331148e-06, "loss": 0.6215, "step": 37170 }, { "epoch": 0.6752142960918204, "grad_norm": 0.8628681810224664, "learning_rate": 4.8574510899818226e-06, "loss": 0.6215, "step": 37180 }, { "epoch": 0.6753959029492954, "grad_norm": 0.8426920621201294, "learning_rate": 4.852509366593685e-06, "loss": 0.6243, "step": 37190 }, { "epoch": 0.6755775098067703, "grad_norm": 0.8127453411815354, "learning_rate": 4.847569352808004e-06, "loss": 0.6187, "step": 37200 }, { "epoch": 0.6757591166642453, "grad_norm": 0.8454828912688801, "learning_rate": 4.842631050265468e-06, "loss": 0.6139, "step": 37210 }, { "epoch": 0.6759407235217202, "grad_norm": 0.8677126750062444, "learning_rate": 4.837694460606204e-06, "loss": 0.6196, "step": 37220 }, { "epoch": 0.6761223303791951, "grad_norm": 0.8321633167729918, "learning_rate": 4.832759585469773e-06, "loss": 0.6255, "step": 37230 }, { "epoch": 0.67630393723667, "grad_norm": 0.8318336994735329, "learning_rate": 4.8278264264951565e-06, "loss": 0.6237, "step": 37240 }, { "epoch": 0.676485544094145, "grad_norm": 0.8457767246878163, "learning_rate": 4.822894985320781e-06, "loss": 0.6115, "step": 37250 }, { "epoch": 0.6766671509516199, "grad_norm": 0.7895144287242405, "learning_rate": 4.817965263584485e-06, "loss": 0.6239, "step": 37260 }, { "epoch": 0.6768487578090949, "grad_norm": 0.8757304401866407, "learning_rate": 4.813037262923552e-06, "loss": 0.6204, "step": 37270 }, { "epoch": 0.6770303646665699, "grad_norm": 0.8688524244583969, "learning_rate": 4.808110984974681e-06, "loss": 0.6253, "step": 37280 }, { "epoch": 0.6772119715240448, "grad_norm": 0.8778468986019626, "learning_rate": 4.80318643137401e-06, "loss": 0.6105, "step": 37290 }, { "epoch": 0.6773935783815197, "grad_norm": 0.8243795993967014, "learning_rate": 4.798263603757097e-06, "loss": 0.6391, "step": 37300 }, { "epoch": 0.6775751852389946, "grad_norm": 0.8278327540792775, "learning_rate": 4.793342503758923e-06, "loss": 0.6207, "step": 37310 }, { "epoch": 0.6777567920964696, "grad_norm": 0.8277486682080716, "learning_rate": 4.78842313301391e-06, "loss": 0.6234, "step": 37320 }, { "epoch": 0.6779383989539445, "grad_norm": 0.8294790011394659, "learning_rate": 4.783505493155887e-06, "loss": 0.6206, "step": 37330 }, { "epoch": 0.6781200058114194, "grad_norm": 0.82356566872251, "learning_rate": 4.778589585818127e-06, "loss": 0.6251, "step": 37340 }, { "epoch": 0.6783016126688943, "grad_norm": 0.8491466730045721, "learning_rate": 4.7736754126333095e-06, "loss": 0.6329, "step": 37350 }, { "epoch": 0.6784832195263694, "grad_norm": 0.8447090247713572, "learning_rate": 4.768762975233555e-06, "loss": 0.6298, "step": 37360 }, { "epoch": 0.6786648263838443, "grad_norm": 0.8072619634001686, "learning_rate": 4.7638522752503914e-06, "loss": 0.6164, "step": 37370 }, { "epoch": 0.6788464332413192, "grad_norm": 0.8827869092360943, "learning_rate": 4.758943314314786e-06, "loss": 0.6181, "step": 37380 }, { "epoch": 0.6790280400987941, "grad_norm": 0.7953144499483442, "learning_rate": 4.75403609405711e-06, "loss": 0.6202, "step": 37390 }, { "epoch": 0.6792096469562691, "grad_norm": 0.8382781715414722, "learning_rate": 4.749130616107176e-06, "loss": 0.609, "step": 37400 }, { "epoch": 0.679391253813744, "grad_norm": 0.8263910380721823, "learning_rate": 4.7442268820941995e-06, "loss": 0.6326, "step": 37410 }, { "epoch": 0.6795728606712189, "grad_norm": 0.8555130455916926, "learning_rate": 4.739324893646834e-06, "loss": 0.6306, "step": 37420 }, { "epoch": 0.6797544675286938, "grad_norm": 0.8203141943844026, "learning_rate": 4.7344246523931385e-06, "loss": 0.6404, "step": 37430 }, { "epoch": 0.6799360743861689, "grad_norm": 0.8339913675751005, "learning_rate": 4.729526159960599e-06, "loss": 0.6207, "step": 37440 }, { "epoch": 0.6801176812436438, "grad_norm": 0.8404096456334227, "learning_rate": 4.724629417976127e-06, "loss": 0.6222, "step": 37450 }, { "epoch": 0.6802992881011187, "grad_norm": 0.869831363748081, "learning_rate": 4.719734428066034e-06, "loss": 0.6223, "step": 37460 }, { "epoch": 0.6804808949585937, "grad_norm": 0.8137400797681719, "learning_rate": 4.714841191856072e-06, "loss": 0.6185, "step": 37470 }, { "epoch": 0.6806625018160686, "grad_norm": 0.8382247482075388, "learning_rate": 4.709949710971391e-06, "loss": 0.6108, "step": 37480 }, { "epoch": 0.6808441086735435, "grad_norm": 0.8300924935027371, "learning_rate": 4.705059987036573e-06, "loss": 0.6323, "step": 37490 }, { "epoch": 0.6810257155310184, "grad_norm": 0.8234281849068302, "learning_rate": 4.700172021675607e-06, "loss": 0.6091, "step": 37500 }, { "epoch": 0.6812073223884934, "grad_norm": 0.8430244737666508, "learning_rate": 4.6952858165119e-06, "loss": 0.6338, "step": 37510 }, { "epoch": 0.6813889292459683, "grad_norm": 0.8886923768034737, "learning_rate": 4.690401373168277e-06, "loss": 0.6393, "step": 37520 }, { "epoch": 0.6815705361034433, "grad_norm": 0.843885259749205, "learning_rate": 4.685518693266975e-06, "loss": 0.6196, "step": 37530 }, { "epoch": 0.6817521429609182, "grad_norm": 0.8405962220260437, "learning_rate": 4.68063777842965e-06, "loss": 0.6176, "step": 37540 }, { "epoch": 0.6819337498183932, "grad_norm": 0.887282744644454, "learning_rate": 4.675758630277362e-06, "loss": 0.6256, "step": 37550 }, { "epoch": 0.6821153566758681, "grad_norm": 0.8502110745284791, "learning_rate": 4.6708812504305985e-06, "loss": 0.6407, "step": 37560 }, { "epoch": 0.682296963533343, "grad_norm": 0.8312640472198369, "learning_rate": 4.666005640509244e-06, "loss": 0.6109, "step": 37570 }, { "epoch": 0.6824785703908179, "grad_norm": 0.8617773288303273, "learning_rate": 4.66113180213261e-06, "loss": 0.6259, "step": 37580 }, { "epoch": 0.6826601772482929, "grad_norm": 0.8336562125738028, "learning_rate": 4.656259736919407e-06, "loss": 0.6264, "step": 37590 }, { "epoch": 0.6828417841057678, "grad_norm": 0.8478134410716852, "learning_rate": 4.651389446487767e-06, "loss": 0.6158, "step": 37600 }, { "epoch": 0.6830233909632428, "grad_norm": 0.8430085214544741, "learning_rate": 4.646520932455227e-06, "loss": 0.6227, "step": 37610 }, { "epoch": 0.6832049978207178, "grad_norm": 0.878155929358324, "learning_rate": 4.641654196438729e-06, "loss": 0.6172, "step": 37620 }, { "epoch": 0.6833866046781927, "grad_norm": 0.8616823040831642, "learning_rate": 4.636789240054636e-06, "loss": 0.6218, "step": 37630 }, { "epoch": 0.6835682115356676, "grad_norm": 0.8528526976313248, "learning_rate": 4.631926064918712e-06, "loss": 0.6214, "step": 37640 }, { "epoch": 0.6837498183931425, "grad_norm": 0.8409974370653193, "learning_rate": 4.627064672646134e-06, "loss": 0.6216, "step": 37650 }, { "epoch": 0.6839314252506175, "grad_norm": 0.8250103547214386, "learning_rate": 4.622205064851481e-06, "loss": 0.6077, "step": 37660 }, { "epoch": 0.6841130321080924, "grad_norm": 0.8636838939507109, "learning_rate": 4.617347243148745e-06, "loss": 0.6254, "step": 37670 }, { "epoch": 0.6842946389655673, "grad_norm": 0.8392368494546653, "learning_rate": 4.612491209151321e-06, "loss": 0.618, "step": 37680 }, { "epoch": 0.6844762458230422, "grad_norm": 0.8480781915460931, "learning_rate": 4.6076369644720154e-06, "loss": 0.6122, "step": 37690 }, { "epoch": 0.6846578526805173, "grad_norm": 0.8494063534595391, "learning_rate": 4.602784510723035e-06, "loss": 0.6112, "step": 37700 }, { "epoch": 0.6848394595379922, "grad_norm": 0.8856269144825457, "learning_rate": 4.5979338495159895e-06, "loss": 0.6414, "step": 37710 }, { "epoch": 0.6850210663954671, "grad_norm": 0.8830979670217353, "learning_rate": 4.593084982461904e-06, "loss": 0.6209, "step": 37720 }, { "epoch": 0.685202673252942, "grad_norm": 0.823595571805988, "learning_rate": 4.588237911171194e-06, "loss": 0.6319, "step": 37730 }, { "epoch": 0.685384280110417, "grad_norm": 0.8815686231231236, "learning_rate": 4.583392637253693e-06, "loss": 0.6225, "step": 37740 }, { "epoch": 0.6855658869678919, "grad_norm": 0.8637753757872374, "learning_rate": 4.578549162318624e-06, "loss": 0.6121, "step": 37750 }, { "epoch": 0.6857474938253668, "grad_norm": 0.8523339406524281, "learning_rate": 4.573707487974625e-06, "loss": 0.62, "step": 37760 }, { "epoch": 0.6859291006828417, "grad_norm": 0.8152174626552695, "learning_rate": 4.568867615829721e-06, "loss": 0.6125, "step": 37770 }, { "epoch": 0.6861107075403168, "grad_norm": 0.8697294859038959, "learning_rate": 4.564029547491357e-06, "loss": 0.6159, "step": 37780 }, { "epoch": 0.6862923143977917, "grad_norm": 0.842090102647567, "learning_rate": 4.55919328456636e-06, "loss": 0.6106, "step": 37790 }, { "epoch": 0.6864739212552666, "grad_norm": 0.8296630322652763, "learning_rate": 4.554358828660974e-06, "loss": 0.6182, "step": 37800 }, { "epoch": 0.6866555281127416, "grad_norm": 0.8318399762834668, "learning_rate": 4.549526181380829e-06, "loss": 0.6116, "step": 37810 }, { "epoch": 0.6868371349702165, "grad_norm": 0.8386329900558442, "learning_rate": 4.544695344330967e-06, "loss": 0.6096, "step": 37820 }, { "epoch": 0.6870187418276914, "grad_norm": 0.8274290958963965, "learning_rate": 4.539866319115815e-06, "loss": 0.624, "step": 37830 }, { "epoch": 0.6872003486851663, "grad_norm": 0.8614436750077112, "learning_rate": 4.53503910733921e-06, "loss": 0.6248, "step": 37840 }, { "epoch": 0.6873819555426413, "grad_norm": 0.8418126250580322, "learning_rate": 4.5302137106043845e-06, "loss": 0.6252, "step": 37850 }, { "epoch": 0.6875635624001162, "grad_norm": 0.9293481207713413, "learning_rate": 4.52539013051396e-06, "loss": 0.6275, "step": 37860 }, { "epoch": 0.6877451692575912, "grad_norm": 0.8015436337108919, "learning_rate": 4.5205683686699675e-06, "loss": 0.6164, "step": 37870 }, { "epoch": 0.6879267761150661, "grad_norm": 0.8049849036464126, "learning_rate": 4.5157484266738206e-06, "loss": 0.6183, "step": 37880 }, { "epoch": 0.6881083829725411, "grad_norm": 0.9396901016793141, "learning_rate": 4.5109303061263425e-06, "loss": 0.6328, "step": 37890 }, { "epoch": 0.688289989830016, "grad_norm": 0.8091845938637736, "learning_rate": 4.506114008627739e-06, "loss": 0.6086, "step": 37900 }, { "epoch": 0.6884715966874909, "grad_norm": 0.8338312809895678, "learning_rate": 4.501299535777613e-06, "loss": 0.6212, "step": 37910 }, { "epoch": 0.6886532035449658, "grad_norm": 0.8256474304156243, "learning_rate": 4.496486889174971e-06, "loss": 0.6259, "step": 37920 }, { "epoch": 0.6888348104024408, "grad_norm": 0.9180941611929505, "learning_rate": 4.491676070418198e-06, "loss": 0.6367, "step": 37930 }, { "epoch": 0.6890164172599157, "grad_norm": 0.8485781182255517, "learning_rate": 4.486867081105089e-06, "loss": 0.62, "step": 37940 }, { "epoch": 0.6891980241173906, "grad_norm": 0.8532499145100568, "learning_rate": 4.482059922832813e-06, "loss": 0.6162, "step": 37950 }, { "epoch": 0.6893796309748657, "grad_norm": 0.8342415933416008, "learning_rate": 4.477254597197949e-06, "loss": 0.6178, "step": 37960 }, { "epoch": 0.6895612378323406, "grad_norm": 0.8326797210182451, "learning_rate": 4.472451105796449e-06, "loss": 0.6174, "step": 37970 }, { "epoch": 0.6897428446898155, "grad_norm": 0.8316033568376029, "learning_rate": 4.467649450223674e-06, "loss": 0.6357, "step": 37980 }, { "epoch": 0.6899244515472904, "grad_norm": 0.8494063398489059, "learning_rate": 4.46284963207436e-06, "loss": 0.6198, "step": 37990 }, { "epoch": 0.6901060584047654, "grad_norm": 0.8486970461115562, "learning_rate": 4.4580516529426444e-06, "loss": 0.6273, "step": 38000 }, { "epoch": 0.6902876652622403, "grad_norm": 0.8241688260372168, "learning_rate": 4.4532555144220464e-06, "loss": 0.6198, "step": 38010 }, { "epoch": 0.6904692721197152, "grad_norm": 0.8613819127595548, "learning_rate": 4.448461218105472e-06, "loss": 0.6265, "step": 38020 }, { "epoch": 0.6906508789771901, "grad_norm": 0.8419759548152379, "learning_rate": 4.443668765585228e-06, "loss": 0.6263, "step": 38030 }, { "epoch": 0.6908324858346652, "grad_norm": 0.8399475427807899, "learning_rate": 4.438878158452991e-06, "loss": 0.6239, "step": 38040 }, { "epoch": 0.6910140926921401, "grad_norm": 0.8690111988633179, "learning_rate": 4.434089398299843e-06, "loss": 0.6194, "step": 38050 }, { "epoch": 0.691195699549615, "grad_norm": 0.7712875097679727, "learning_rate": 4.429302486716236e-06, "loss": 0.6253, "step": 38060 }, { "epoch": 0.6913773064070899, "grad_norm": 0.8645399952559907, "learning_rate": 4.424517425292023e-06, "loss": 0.6138, "step": 38070 }, { "epoch": 0.6915589132645649, "grad_norm": 0.8464440269117403, "learning_rate": 4.419734215616428e-06, "loss": 0.62, "step": 38080 }, { "epoch": 0.6917405201220398, "grad_norm": 0.8878609725796359, "learning_rate": 4.414952859278074e-06, "loss": 0.6382, "step": 38090 }, { "epoch": 0.6919221269795147, "grad_norm": 0.9391986855785239, "learning_rate": 4.410173357864957e-06, "loss": 0.6233, "step": 38100 }, { "epoch": 0.6921037338369896, "grad_norm": 0.8637221703369946, "learning_rate": 4.405395712964461e-06, "loss": 0.6258, "step": 38110 }, { "epoch": 0.6922853406944646, "grad_norm": 0.8470776772627235, "learning_rate": 4.400619926163358e-06, "loss": 0.6294, "step": 38120 }, { "epoch": 0.6924669475519396, "grad_norm": 0.8567947339469345, "learning_rate": 4.395845999047794e-06, "loss": 0.6246, "step": 38130 }, { "epoch": 0.6926485544094145, "grad_norm": 0.8486152437645956, "learning_rate": 4.3910739332033095e-06, "loss": 0.6202, "step": 38140 }, { "epoch": 0.6928301612668895, "grad_norm": 0.8774209265570302, "learning_rate": 4.386303730214809e-06, "loss": 0.6179, "step": 38150 }, { "epoch": 0.6930117681243644, "grad_norm": 0.8681972232150568, "learning_rate": 4.3815353916666e-06, "loss": 0.6182, "step": 38160 }, { "epoch": 0.6931933749818393, "grad_norm": 0.8473653672087519, "learning_rate": 4.376768919142351e-06, "loss": 0.6197, "step": 38170 }, { "epoch": 0.6933749818393142, "grad_norm": 0.8677228761612509, "learning_rate": 4.372004314225127e-06, "loss": 0.6191, "step": 38180 }, { "epoch": 0.6935565886967892, "grad_norm": 0.8475225321321763, "learning_rate": 4.367241578497357e-06, "loss": 0.6149, "step": 38190 }, { "epoch": 0.6937381955542641, "grad_norm": 0.8376047085730743, "learning_rate": 4.362480713540864e-06, "loss": 0.6086, "step": 38200 }, { "epoch": 0.6939198024117391, "grad_norm": 0.8688124144724868, "learning_rate": 4.357721720936839e-06, "loss": 0.625, "step": 38210 }, { "epoch": 0.694101409269214, "grad_norm": 0.8484210615957936, "learning_rate": 4.352964602265858e-06, "loss": 0.6175, "step": 38220 }, { "epoch": 0.694283016126689, "grad_norm": 0.8754932187498031, "learning_rate": 4.348209359107868e-06, "loss": 0.6279, "step": 38230 }, { "epoch": 0.6944646229841639, "grad_norm": 0.8516990365928717, "learning_rate": 4.343455993042198e-06, "loss": 0.6214, "step": 38240 }, { "epoch": 0.6946462298416388, "grad_norm": 0.8454221072941819, "learning_rate": 4.338704505647559e-06, "loss": 0.6152, "step": 38250 }, { "epoch": 0.6948278366991137, "grad_norm": 0.8484832214306021, "learning_rate": 4.333954898502021e-06, "loss": 0.6167, "step": 38260 }, { "epoch": 0.6950094435565887, "grad_norm": 0.8481535673882794, "learning_rate": 4.3292071731830485e-06, "loss": 0.6185, "step": 38270 }, { "epoch": 0.6951910504140636, "grad_norm": 0.833231433677322, "learning_rate": 4.324461331267465e-06, "loss": 0.6176, "step": 38280 }, { "epoch": 0.6953726572715385, "grad_norm": 0.8486630113747662, "learning_rate": 4.3197173743314855e-06, "loss": 0.6222, "step": 38290 }, { "epoch": 0.6955542641290136, "grad_norm": 0.8020091181844762, "learning_rate": 4.314975303950684e-06, "loss": 0.6044, "step": 38300 }, { "epoch": 0.6957358709864885, "grad_norm": 0.8552914787595305, "learning_rate": 4.310235121700008e-06, "loss": 0.6207, "step": 38310 }, { "epoch": 0.6959174778439634, "grad_norm": 0.9384330218012509, "learning_rate": 4.305496829153793e-06, "loss": 0.6214, "step": 38320 }, { "epoch": 0.6960990847014383, "grad_norm": 0.8497316500233343, "learning_rate": 4.30076042788573e-06, "loss": 0.6215, "step": 38330 }, { "epoch": 0.6962806915589133, "grad_norm": 0.8429288476649579, "learning_rate": 4.296025919468894e-06, "loss": 0.6241, "step": 38340 }, { "epoch": 0.6964622984163882, "grad_norm": 0.9398413418595579, "learning_rate": 4.291293305475722e-06, "loss": 0.6273, "step": 38350 }, { "epoch": 0.6966439052738631, "grad_norm": 0.80419656430302, "learning_rate": 4.286562587478033e-06, "loss": 0.6097, "step": 38360 }, { "epoch": 0.696825512131338, "grad_norm": 0.8902010118943419, "learning_rate": 4.281833767046999e-06, "loss": 0.6343, "step": 38370 }, { "epoch": 0.6970071189888131, "grad_norm": 0.8673610357956497, "learning_rate": 4.277106845753183e-06, "loss": 0.6193, "step": 38380 }, { "epoch": 0.697188725846288, "grad_norm": 0.8343534006583176, "learning_rate": 4.2723818251664974e-06, "loss": 0.6169, "step": 38390 }, { "epoch": 0.6973703327037629, "grad_norm": 0.8629212300437131, "learning_rate": 4.26765870685624e-06, "loss": 0.621, "step": 38400 }, { "epoch": 0.6975519395612378, "grad_norm": 0.8880574495754887, "learning_rate": 4.262937492391066e-06, "loss": 0.6235, "step": 38410 }, { "epoch": 0.6977335464187128, "grad_norm": 0.8111377319098905, "learning_rate": 4.258218183338999e-06, "loss": 0.6173, "step": 38420 }, { "epoch": 0.6979151532761877, "grad_norm": 0.8346516699161606, "learning_rate": 4.253500781267438e-06, "loss": 0.6124, "step": 38430 }, { "epoch": 0.6980967601336626, "grad_norm": 0.8556993843888597, "learning_rate": 4.248785287743135e-06, "loss": 0.6141, "step": 38440 }, { "epoch": 0.6982783669911375, "grad_norm": 0.8530960628362293, "learning_rate": 4.244071704332225e-06, "loss": 0.6196, "step": 38450 }, { "epoch": 0.6984599738486125, "grad_norm": 0.8381623141725751, "learning_rate": 4.239360032600192e-06, "loss": 0.633, "step": 38460 }, { "epoch": 0.6986415807060875, "grad_norm": 0.8254500279474152, "learning_rate": 4.2346502741119e-06, "loss": 0.6057, "step": 38470 }, { "epoch": 0.6988231875635624, "grad_norm": 0.8609470847480967, "learning_rate": 4.2299424304315615e-06, "loss": 0.6209, "step": 38480 }, { "epoch": 0.6990047944210374, "grad_norm": 0.8357330943116889, "learning_rate": 4.225236503122773e-06, "loss": 0.6087, "step": 38490 }, { "epoch": 0.6991864012785123, "grad_norm": 0.8950784045038832, "learning_rate": 4.220532493748476e-06, "loss": 0.6284, "step": 38500 }, { "epoch": 0.6993680081359872, "grad_norm": 0.8198920015371928, "learning_rate": 4.21583040387098e-06, "loss": 0.6141, "step": 38510 }, { "epoch": 0.6995496149934621, "grad_norm": 0.863974866618276, "learning_rate": 4.211130235051967e-06, "loss": 0.6083, "step": 38520 }, { "epoch": 0.6997312218509371, "grad_norm": 0.9056916292205662, "learning_rate": 4.2064319888524655e-06, "loss": 0.6219, "step": 38530 }, { "epoch": 0.699912828708412, "grad_norm": 0.8242831422549964, "learning_rate": 4.201735666832881e-06, "loss": 0.6043, "step": 38540 }, { "epoch": 0.700094435565887, "grad_norm": 0.8514177236338888, "learning_rate": 4.197041270552966e-06, "loss": 0.619, "step": 38550 }, { "epoch": 0.7002760424233619, "grad_norm": 0.867038915420781, "learning_rate": 4.192348801571845e-06, "loss": 0.6057, "step": 38560 }, { "epoch": 0.7004576492808369, "grad_norm": 0.8354030436700199, "learning_rate": 4.187658261447991e-06, "loss": 0.6179, "step": 38570 }, { "epoch": 0.7006392561383118, "grad_norm": 0.830389271915437, "learning_rate": 4.182969651739249e-06, "loss": 0.6058, "step": 38580 }, { "epoch": 0.7008208629957867, "grad_norm": 0.8095897025687534, "learning_rate": 4.178282974002811e-06, "loss": 0.6195, "step": 38590 }, { "epoch": 0.7010024698532616, "grad_norm": 0.855798257956027, "learning_rate": 4.173598229795237e-06, "loss": 0.6056, "step": 38600 }, { "epoch": 0.7011840767107366, "grad_norm": 0.829628206931894, "learning_rate": 4.168915420672436e-06, "loss": 0.6381, "step": 38610 }, { "epoch": 0.7013656835682115, "grad_norm": 0.8494992576048984, "learning_rate": 4.164234548189684e-06, "loss": 0.6216, "step": 38620 }, { "epoch": 0.7015472904256864, "grad_norm": 0.8415342781883903, "learning_rate": 4.159555613901603e-06, "loss": 0.6236, "step": 38630 }, { "epoch": 0.7017288972831615, "grad_norm": 0.8236063265769302, "learning_rate": 4.154878619362179e-06, "loss": 0.6022, "step": 38640 }, { "epoch": 0.7019105041406364, "grad_norm": 0.8666198559852286, "learning_rate": 4.150203566124756e-06, "loss": 0.6119, "step": 38650 }, { "epoch": 0.7020921109981113, "grad_norm": 0.839352163341539, "learning_rate": 4.145530455742022e-06, "loss": 0.6371, "step": 38660 }, { "epoch": 0.7022737178555862, "grad_norm": 0.8129479757430741, "learning_rate": 4.1408592897660325e-06, "loss": 0.6085, "step": 38670 }, { "epoch": 0.7024553247130612, "grad_norm": 0.8530598131726863, "learning_rate": 4.136190069748186e-06, "loss": 0.635, "step": 38680 }, { "epoch": 0.7026369315705361, "grad_norm": 0.8556293668164779, "learning_rate": 4.131522797239247e-06, "loss": 0.6202, "step": 38690 }, { "epoch": 0.702818538428011, "grad_norm": 0.8374294028570985, "learning_rate": 4.1268574737893174e-06, "loss": 0.618, "step": 38700 }, { "epoch": 0.7030001452854859, "grad_norm": 0.8480824307607266, "learning_rate": 4.122194100947869e-06, "loss": 0.6169, "step": 38710 }, { "epoch": 0.703181752142961, "grad_norm": 0.8341425252261577, "learning_rate": 4.1175326802637135e-06, "loss": 0.6139, "step": 38720 }, { "epoch": 0.7033633590004359, "grad_norm": 0.847389338842459, "learning_rate": 4.112873213285016e-06, "loss": 0.6234, "step": 38730 }, { "epoch": 0.7035449658579108, "grad_norm": 0.8421842868027447, "learning_rate": 4.108215701559299e-06, "loss": 0.6092, "step": 38740 }, { "epoch": 0.7037265727153857, "grad_norm": 0.8595838568497892, "learning_rate": 4.103560146633427e-06, "loss": 0.6225, "step": 38750 }, { "epoch": 0.7039081795728607, "grad_norm": 0.8416043509955394, "learning_rate": 4.098906550053625e-06, "loss": 0.6228, "step": 38760 }, { "epoch": 0.7040897864303356, "grad_norm": 0.8322787133074374, "learning_rate": 4.094254913365454e-06, "loss": 0.6268, "step": 38770 }, { "epoch": 0.7042713932878105, "grad_norm": 0.8634613525066805, "learning_rate": 4.089605238113841e-06, "loss": 0.6105, "step": 38780 }, { "epoch": 0.7044530001452854, "grad_norm": 0.8124827446494053, "learning_rate": 4.084957525843043e-06, "loss": 0.6242, "step": 38790 }, { "epoch": 0.7046346070027604, "grad_norm": 0.8389475134175478, "learning_rate": 4.080311778096682e-06, "loss": 0.6175, "step": 38800 }, { "epoch": 0.7048162138602354, "grad_norm": 0.8828223161371525, "learning_rate": 4.075667996417716e-06, "loss": 0.624, "step": 38810 }, { "epoch": 0.7049978207177103, "grad_norm": 0.8379076810517229, "learning_rate": 4.071026182348451e-06, "loss": 0.6236, "step": 38820 }, { "epoch": 0.7051794275751853, "grad_norm": 0.8257533371691275, "learning_rate": 4.06638633743055e-06, "loss": 0.6004, "step": 38830 }, { "epoch": 0.7053610344326602, "grad_norm": 0.8447359917528463, "learning_rate": 4.061748463205005e-06, "loss": 0.6117, "step": 38840 }, { "epoch": 0.7055426412901351, "grad_norm": 0.8528590921440847, "learning_rate": 4.057112561212173e-06, "loss": 0.6158, "step": 38850 }, { "epoch": 0.70572424814761, "grad_norm": 0.8468025631186384, "learning_rate": 4.052478632991738e-06, "loss": 0.6299, "step": 38860 }, { "epoch": 0.705905855005085, "grad_norm": 0.8610457775820445, "learning_rate": 4.047846680082741e-06, "loss": 0.6236, "step": 38870 }, { "epoch": 0.7060874618625599, "grad_norm": 0.8408742360482715, "learning_rate": 4.043216704023557e-06, "loss": 0.6218, "step": 38880 }, { "epoch": 0.7062690687200349, "grad_norm": 0.8621250922014589, "learning_rate": 4.038588706351918e-06, "loss": 0.6224, "step": 38890 }, { "epoch": 0.7064506755775098, "grad_norm": 0.8297736815016256, "learning_rate": 4.033962688604881e-06, "loss": 0.6152, "step": 38900 }, { "epoch": 0.7066322824349848, "grad_norm": 0.8252220020585354, "learning_rate": 4.0293386523188636e-06, "loss": 0.6166, "step": 38910 }, { "epoch": 0.7068138892924597, "grad_norm": 0.8403791420663635, "learning_rate": 4.024716599029614e-06, "loss": 0.6072, "step": 38920 }, { "epoch": 0.7069954961499346, "grad_norm": 0.8085158864082637, "learning_rate": 4.020096530272219e-06, "loss": 0.6172, "step": 38930 }, { "epoch": 0.7071771030074095, "grad_norm": 0.8715015646734356, "learning_rate": 4.0154784475811216e-06, "loss": 0.6149, "step": 38940 }, { "epoch": 0.7073587098648845, "grad_norm": 0.8489760915309181, "learning_rate": 4.0108623524900854e-06, "loss": 0.6167, "step": 38950 }, { "epoch": 0.7075403167223594, "grad_norm": 0.8390951385191925, "learning_rate": 4.006248246532233e-06, "loss": 0.6128, "step": 38960 }, { "epoch": 0.7077219235798343, "grad_norm": 0.8475576689444536, "learning_rate": 4.00163613124001e-06, "loss": 0.622, "step": 38970 }, { "epoch": 0.7079035304373094, "grad_norm": 0.8467657749029488, "learning_rate": 3.997026008145214e-06, "loss": 0.6177, "step": 38980 }, { "epoch": 0.7080851372947843, "grad_norm": 0.811362997337248, "learning_rate": 3.9924178787789696e-06, "loss": 0.6214, "step": 38990 }, { "epoch": 0.7082667441522592, "grad_norm": 0.8238417628963929, "learning_rate": 3.98781174467175e-06, "loss": 0.607, "step": 39000 }, { "epoch": 0.7084483510097341, "grad_norm": 0.8725003279565271, "learning_rate": 3.9832076073533555e-06, "loss": 0.6174, "step": 39010 }, { "epoch": 0.7086299578672091, "grad_norm": 0.8574861974904675, "learning_rate": 3.978605468352934e-06, "loss": 0.6111, "step": 39020 }, { "epoch": 0.708811564724684, "grad_norm": 0.825220515398795, "learning_rate": 3.974005329198957e-06, "loss": 0.6093, "step": 39030 }, { "epoch": 0.7089931715821589, "grad_norm": 0.8737887098505827, "learning_rate": 3.969407191419242e-06, "loss": 0.6159, "step": 39040 }, { "epoch": 0.7091747784396338, "grad_norm": 0.859033799321125, "learning_rate": 3.9648110565409415e-06, "loss": 0.6297, "step": 39050 }, { "epoch": 0.7093563852971089, "grad_norm": 0.8283635116184063, "learning_rate": 3.960216926090535e-06, "loss": 0.6109, "step": 39060 }, { "epoch": 0.7095379921545838, "grad_norm": 0.8169144039931084, "learning_rate": 3.955624801593845e-06, "loss": 0.6189, "step": 39070 }, { "epoch": 0.7097195990120587, "grad_norm": 0.8714583979254652, "learning_rate": 3.951034684576018e-06, "loss": 0.6174, "step": 39080 }, { "epoch": 0.7099012058695336, "grad_norm": 0.8256357056672637, "learning_rate": 3.946446576561548e-06, "loss": 0.6161, "step": 39090 }, { "epoch": 0.7100828127270086, "grad_norm": 0.8651934240909269, "learning_rate": 3.941860479074246e-06, "loss": 0.6187, "step": 39100 }, { "epoch": 0.7102644195844835, "grad_norm": 0.8560847738716423, "learning_rate": 3.937276393637267e-06, "loss": 0.6206, "step": 39110 }, { "epoch": 0.7104460264419584, "grad_norm": 0.8267539970051853, "learning_rate": 3.932694321773092e-06, "loss": 0.6083, "step": 39120 }, { "epoch": 0.7106276332994333, "grad_norm": 0.8451353283394638, "learning_rate": 3.928114265003532e-06, "loss": 0.6266, "step": 39130 }, { "epoch": 0.7108092401569083, "grad_norm": 0.8346073838816221, "learning_rate": 3.923536224849736e-06, "loss": 0.6304, "step": 39140 }, { "epoch": 0.7109908470143833, "grad_norm": 0.8232924992440386, "learning_rate": 3.918960202832173e-06, "loss": 0.6279, "step": 39150 }, { "epoch": 0.7111724538718582, "grad_norm": 0.8257488577448531, "learning_rate": 3.914386200470655e-06, "loss": 0.6068, "step": 39160 }, { "epoch": 0.7113540607293332, "grad_norm": 0.8647015135164715, "learning_rate": 3.909814219284306e-06, "loss": 0.606, "step": 39170 }, { "epoch": 0.7115356675868081, "grad_norm": 0.8440583524658224, "learning_rate": 3.9052442607915975e-06, "loss": 0.6278, "step": 39180 }, { "epoch": 0.711717274444283, "grad_norm": 0.892358729887323, "learning_rate": 3.900676326510313e-06, "loss": 0.6196, "step": 39190 }, { "epoch": 0.7118988813017579, "grad_norm": 0.8509593319784033, "learning_rate": 3.896110417957577e-06, "loss": 0.6077, "step": 39200 }, { "epoch": 0.7120804881592329, "grad_norm": 0.839078735481771, "learning_rate": 3.89154653664983e-06, "loss": 0.6096, "step": 39210 }, { "epoch": 0.7122620950167078, "grad_norm": 0.8643039541199858, "learning_rate": 3.8869846841028435e-06, "loss": 0.6172, "step": 39220 }, { "epoch": 0.7124437018741828, "grad_norm": 0.8264135354132193, "learning_rate": 3.882424861831721e-06, "loss": 0.6208, "step": 39230 }, { "epoch": 0.7126253087316577, "grad_norm": 0.8283347289762522, "learning_rate": 3.8778670713508794e-06, "loss": 0.6344, "step": 39240 }, { "epoch": 0.7128069155891327, "grad_norm": 0.8308424344287948, "learning_rate": 3.8733113141740754e-06, "loss": 0.6131, "step": 39250 }, { "epoch": 0.7129885224466076, "grad_norm": 0.8402499578039416, "learning_rate": 3.868757591814376e-06, "loss": 0.6161, "step": 39260 }, { "epoch": 0.7131701293040825, "grad_norm": 0.9021743430825362, "learning_rate": 3.864205905784186e-06, "loss": 0.6125, "step": 39270 }, { "epoch": 0.7133517361615574, "grad_norm": 0.8777284198592773, "learning_rate": 3.85965625759522e-06, "loss": 0.6242, "step": 39280 }, { "epoch": 0.7135333430190324, "grad_norm": 0.8574136893087952, "learning_rate": 3.855108648758531e-06, "loss": 0.6174, "step": 39290 }, { "epoch": 0.7137149498765073, "grad_norm": 0.821901036887175, "learning_rate": 3.850563080784478e-06, "loss": 0.6162, "step": 39300 }, { "epoch": 0.7138965567339822, "grad_norm": 0.8515050912170972, "learning_rate": 3.846019555182758e-06, "loss": 0.617, "step": 39310 }, { "epoch": 0.7140781635914573, "grad_norm": 0.834074859839743, "learning_rate": 3.84147807346238e-06, "loss": 0.6284, "step": 39320 }, { "epoch": 0.7142597704489322, "grad_norm": 0.8775392801318368, "learning_rate": 3.836938637131674e-06, "loss": 0.6183, "step": 39330 }, { "epoch": 0.7144413773064071, "grad_norm": 0.8725719053468193, "learning_rate": 3.832401247698297e-06, "loss": 0.6202, "step": 39340 }, { "epoch": 0.714622984163882, "grad_norm": 0.8616237546841923, "learning_rate": 3.8278659066692196e-06, "loss": 0.6163, "step": 39350 }, { "epoch": 0.714804591021357, "grad_norm": 0.8628606079588046, "learning_rate": 3.823332615550739e-06, "loss": 0.6235, "step": 39360 }, { "epoch": 0.7149861978788319, "grad_norm": 0.8665129189770934, "learning_rate": 3.818801375848462e-06, "loss": 0.6146, "step": 39370 }, { "epoch": 0.7151678047363068, "grad_norm": 0.8459781890976823, "learning_rate": 3.8142721890673263e-06, "loss": 0.6063, "step": 39380 }, { "epoch": 0.7153494115937817, "grad_norm": 0.872913518644516, "learning_rate": 3.8097450567115734e-06, "loss": 0.6158, "step": 39390 }, { "epoch": 0.7155310184512568, "grad_norm": 0.8855810291848443, "learning_rate": 3.805219980284779e-06, "loss": 0.633, "step": 39400 }, { "epoch": 0.7157126253087317, "grad_norm": 0.8580628459678449, "learning_rate": 3.800696961289818e-06, "loss": 0.6168, "step": 39410 }, { "epoch": 0.7158942321662066, "grad_norm": 0.8466079534614732, "learning_rate": 3.7961760012289007e-06, "loss": 0.6147, "step": 39420 }, { "epoch": 0.7160758390236815, "grad_norm": 0.8322913737673728, "learning_rate": 3.791657101603534e-06, "loss": 0.6227, "step": 39430 }, { "epoch": 0.7162574458811565, "grad_norm": 0.8561184263431847, "learning_rate": 3.7871402639145573e-06, "loss": 0.6144, "step": 39440 }, { "epoch": 0.7164390527386314, "grad_norm": 0.8448224124900832, "learning_rate": 3.7826254896621185e-06, "loss": 0.625, "step": 39450 }, { "epoch": 0.7166206595961063, "grad_norm": 0.8438017031378614, "learning_rate": 3.778112780345675e-06, "loss": 0.5967, "step": 39460 }, { "epoch": 0.7168022664535812, "grad_norm": 0.820506286634203, "learning_rate": 3.773602137464012e-06, "loss": 0.6139, "step": 39470 }, { "epoch": 0.7169838733110562, "grad_norm": 0.8398694115785386, "learning_rate": 3.7690935625152093e-06, "loss": 0.6081, "step": 39480 }, { "epoch": 0.7171654801685312, "grad_norm": 0.8301370004627981, "learning_rate": 3.7645870569966804e-06, "loss": 0.6129, "step": 39490 }, { "epoch": 0.7173470870260061, "grad_norm": 0.8589036401025936, "learning_rate": 3.7600826224051334e-06, "loss": 0.613, "step": 39500 }, { "epoch": 0.7175286938834811, "grad_norm": 0.8874579442777072, "learning_rate": 3.755580260236603e-06, "loss": 0.6162, "step": 39510 }, { "epoch": 0.717710300740956, "grad_norm": 0.8490128827233214, "learning_rate": 3.7510799719864277e-06, "loss": 0.6147, "step": 39520 }, { "epoch": 0.7178919075984309, "grad_norm": 0.8236077518612915, "learning_rate": 3.746581759149254e-06, "loss": 0.6098, "step": 39530 }, { "epoch": 0.7180735144559058, "grad_norm": 0.8480315023170005, "learning_rate": 3.7420856232190505e-06, "loss": 0.6065, "step": 39540 }, { "epoch": 0.7182551213133808, "grad_norm": 0.8255934017010574, "learning_rate": 3.737591565689085e-06, "loss": 0.6092, "step": 39550 }, { "epoch": 0.7184367281708557, "grad_norm": 0.8762278396848936, "learning_rate": 3.7330995880519427e-06, "loss": 0.6004, "step": 39560 }, { "epoch": 0.7186183350283307, "grad_norm": 0.8394200784492374, "learning_rate": 3.7286096917995117e-06, "loss": 0.6114, "step": 39570 }, { "epoch": 0.7187999418858056, "grad_norm": 0.8276163185407318, "learning_rate": 3.724121878422996e-06, "loss": 0.6144, "step": 39580 }, { "epoch": 0.7189815487432806, "grad_norm": 0.815998270683967, "learning_rate": 3.7196361494128986e-06, "loss": 0.6133, "step": 39590 }, { "epoch": 0.7191631556007555, "grad_norm": 0.9151824338650586, "learning_rate": 3.71515250625904e-06, "loss": 0.6259, "step": 39600 }, { "epoch": 0.7193447624582304, "grad_norm": 0.8504408879399998, "learning_rate": 3.7106709504505434e-06, "loss": 0.6244, "step": 39610 }, { "epoch": 0.7195263693157053, "grad_norm": 0.8812479895247689, "learning_rate": 3.7061914834758317e-06, "loss": 0.6213, "step": 39620 }, { "epoch": 0.7197079761731803, "grad_norm": 0.8468612348939377, "learning_rate": 3.7017141068226505e-06, "loss": 0.6273, "step": 39630 }, { "epoch": 0.7198895830306552, "grad_norm": 0.8867519136958556, "learning_rate": 3.697238821978033e-06, "loss": 0.6206, "step": 39640 }, { "epoch": 0.7200711898881301, "grad_norm": 0.8285649210516371, "learning_rate": 3.6927656304283345e-06, "loss": 0.6174, "step": 39650 }, { "epoch": 0.7202527967456052, "grad_norm": 0.8521304658935435, "learning_rate": 3.688294533659199e-06, "loss": 0.6193, "step": 39660 }, { "epoch": 0.7204344036030801, "grad_norm": 0.8116706023915033, "learning_rate": 3.6838255331555906e-06, "loss": 0.6117, "step": 39670 }, { "epoch": 0.720616010460555, "grad_norm": 0.8242267338553404, "learning_rate": 3.6793586304017616e-06, "loss": 0.6168, "step": 39680 }, { "epoch": 0.7207976173180299, "grad_norm": 0.8583333411982063, "learning_rate": 3.674893826881282e-06, "loss": 0.6098, "step": 39690 }, { "epoch": 0.7209792241755049, "grad_norm": 0.8536642881423838, "learning_rate": 3.670431124077011e-06, "loss": 0.6255, "step": 39700 }, { "epoch": 0.7211608310329798, "grad_norm": 0.8759482488647229, "learning_rate": 3.6659705234711253e-06, "loss": 0.6196, "step": 39710 }, { "epoch": 0.7213424378904547, "grad_norm": 0.8953299169308059, "learning_rate": 3.661512026545089e-06, "loss": 0.6213, "step": 39720 }, { "epoch": 0.7215240447479296, "grad_norm": 0.8378121294522253, "learning_rate": 3.6570556347796725e-06, "loss": 0.6091, "step": 39730 }, { "epoch": 0.7217056516054047, "grad_norm": 0.8430709885218486, "learning_rate": 3.652601349654954e-06, "loss": 0.6224, "step": 39740 }, { "epoch": 0.7218872584628796, "grad_norm": 0.8331703178668413, "learning_rate": 3.6481491726502993e-06, "loss": 0.5993, "step": 39750 }, { "epoch": 0.7220688653203545, "grad_norm": 0.8691447130568414, "learning_rate": 3.643699105244387e-06, "loss": 0.6222, "step": 39760 }, { "epoch": 0.7222504721778295, "grad_norm": 0.8474788453856545, "learning_rate": 3.639251148915184e-06, "loss": 0.6176, "step": 39770 }, { "epoch": 0.7224320790353044, "grad_norm": 0.8449743951437649, "learning_rate": 3.634805305139966e-06, "loss": 0.608, "step": 39780 }, { "epoch": 0.7226136858927793, "grad_norm": 0.8306417825485993, "learning_rate": 3.630361575395296e-06, "loss": 0.6198, "step": 39790 }, { "epoch": 0.7227952927502542, "grad_norm": 0.8570690062949597, "learning_rate": 3.6259199611570473e-06, "loss": 0.6125, "step": 39800 }, { "epoch": 0.7229768996077292, "grad_norm": 0.8127787037978212, "learning_rate": 3.6214804639003786e-06, "loss": 0.6131, "step": 39810 }, { "epoch": 0.7231585064652041, "grad_norm": 0.8782227176075806, "learning_rate": 3.6170430850997527e-06, "loss": 0.6096, "step": 39820 }, { "epoch": 0.7233401133226791, "grad_norm": 0.83588470115888, "learning_rate": 3.612607826228932e-06, "loss": 0.6172, "step": 39830 }, { "epoch": 0.723521720180154, "grad_norm": 0.8405850329305171, "learning_rate": 3.6081746887609635e-06, "loss": 0.6115, "step": 39840 }, { "epoch": 0.723703327037629, "grad_norm": 0.8356409380949252, "learning_rate": 3.603743674168202e-06, "loss": 0.6072, "step": 39850 }, { "epoch": 0.7238849338951039, "grad_norm": 0.8211478123598267, "learning_rate": 3.599314783922284e-06, "loss": 0.6061, "step": 39860 }, { "epoch": 0.7240665407525788, "grad_norm": 0.8280941730494249, "learning_rate": 3.5948880194941573e-06, "loss": 0.6221, "step": 39870 }, { "epoch": 0.7242481476100537, "grad_norm": 0.8653306351758894, "learning_rate": 3.590463382354046e-06, "loss": 0.6141, "step": 39880 }, { "epoch": 0.7244297544675287, "grad_norm": 0.8338804313803531, "learning_rate": 3.5860408739714816e-06, "loss": 0.618, "step": 39890 }, { "epoch": 0.7246113613250036, "grad_norm": 0.8886021970539756, "learning_rate": 3.5816204958152777e-06, "loss": 0.6299, "step": 39900 }, { "epoch": 0.7247929681824786, "grad_norm": 0.8600761942778543, "learning_rate": 3.577202249353552e-06, "loss": 0.6199, "step": 39910 }, { "epoch": 0.7249745750399536, "grad_norm": 0.8957703434409576, "learning_rate": 3.572786136053704e-06, "loss": 0.6135, "step": 39920 }, { "epoch": 0.7251561818974285, "grad_norm": 0.8466877494108946, "learning_rate": 3.5683721573824258e-06, "loss": 0.6105, "step": 39930 }, { "epoch": 0.7253377887549034, "grad_norm": 0.8687863735871747, "learning_rate": 3.56396031480571e-06, "loss": 0.6068, "step": 39940 }, { "epoch": 0.7255193956123783, "grad_norm": 0.8321419937109654, "learning_rate": 3.5595506097888266e-06, "loss": 0.6266, "step": 39950 }, { "epoch": 0.7257010024698533, "grad_norm": 0.8505607253379484, "learning_rate": 3.5551430437963474e-06, "loss": 0.6125, "step": 39960 }, { "epoch": 0.7258826093273282, "grad_norm": 0.8288599958810616, "learning_rate": 3.550737618292124e-06, "loss": 0.6004, "step": 39970 }, { "epoch": 0.7260642161848031, "grad_norm": 0.833259212594749, "learning_rate": 3.5463343347393065e-06, "loss": 0.6059, "step": 39980 }, { "epoch": 0.726245823042278, "grad_norm": 0.8084547631310322, "learning_rate": 3.541933194600322e-06, "loss": 0.6117, "step": 39990 }, { "epoch": 0.7264274298997531, "grad_norm": 0.8108762627441657, "learning_rate": 3.5375341993369007e-06, "loss": 0.617, "step": 40000 }, { "epoch": 0.726609036757228, "grad_norm": 0.8452472906705619, "learning_rate": 3.5331373504100485e-06, "loss": 0.604, "step": 40010 }, { "epoch": 0.7267906436147029, "grad_norm": 0.8189142164165659, "learning_rate": 3.528742649280057e-06, "loss": 0.5965, "step": 40020 }, { "epoch": 0.7269722504721778, "grad_norm": 0.8528373856685174, "learning_rate": 3.524350097406518e-06, "loss": 0.6139, "step": 40030 }, { "epoch": 0.7271538573296528, "grad_norm": 0.8803961456838826, "learning_rate": 3.519959696248294e-06, "loss": 0.636, "step": 40040 }, { "epoch": 0.7273354641871277, "grad_norm": 0.8521760045992919, "learning_rate": 3.515571447263546e-06, "loss": 0.6028, "step": 40050 }, { "epoch": 0.7275170710446026, "grad_norm": 0.868449010269505, "learning_rate": 3.5111853519097094e-06, "loss": 0.5945, "step": 40060 }, { "epoch": 0.7276986779020775, "grad_norm": 0.8143735404381972, "learning_rate": 3.5068014116435144e-06, "loss": 0.62, "step": 40070 }, { "epoch": 0.7278802847595526, "grad_norm": 0.8238602599678863, "learning_rate": 3.5024196279209657e-06, "loss": 0.6237, "step": 40080 }, { "epoch": 0.7280618916170275, "grad_norm": 0.8431507550109768, "learning_rate": 3.4980400021973615e-06, "loss": 0.6061, "step": 40090 }, { "epoch": 0.7282434984745024, "grad_norm": 0.8903908414351556, "learning_rate": 3.4936625359272726e-06, "loss": 0.6093, "step": 40100 }, { "epoch": 0.7284251053319774, "grad_norm": 0.850461002320581, "learning_rate": 3.4892872305645655e-06, "loss": 0.6095, "step": 40110 }, { "epoch": 0.7286067121894523, "grad_norm": 0.8458735839154522, "learning_rate": 3.4849140875623768e-06, "loss": 0.6266, "step": 40120 }, { "epoch": 0.7287883190469272, "grad_norm": 0.857292233281437, "learning_rate": 3.4805431083731265e-06, "loss": 0.6143, "step": 40130 }, { "epoch": 0.7289699259044021, "grad_norm": 0.922390485769235, "learning_rate": 3.4761742944485277e-06, "loss": 0.6108, "step": 40140 }, { "epoch": 0.729151532761877, "grad_norm": 0.8493948697875312, "learning_rate": 3.471807647239559e-06, "loss": 0.6042, "step": 40150 }, { "epoch": 0.729333139619352, "grad_norm": 0.8480431371370576, "learning_rate": 3.4674431681964925e-06, "loss": 0.6262, "step": 40160 }, { "epoch": 0.729514746476827, "grad_norm": 0.8373512360736111, "learning_rate": 3.463080858768868e-06, "loss": 0.6105, "step": 40170 }, { "epoch": 0.7296963533343019, "grad_norm": 0.9020317224162451, "learning_rate": 3.4587207204055164e-06, "loss": 0.6354, "step": 40180 }, { "epoch": 0.7298779601917769, "grad_norm": 0.8668674010635156, "learning_rate": 3.4543627545545365e-06, "loss": 0.6083, "step": 40190 }, { "epoch": 0.7300595670492518, "grad_norm": 0.8679115093948735, "learning_rate": 3.4500069626633183e-06, "loss": 0.6195, "step": 40200 }, { "epoch": 0.7302411739067267, "grad_norm": 0.8319593201399134, "learning_rate": 3.4456533461785137e-06, "loss": 0.6149, "step": 40210 }, { "epoch": 0.7304227807642016, "grad_norm": 0.834723092483455, "learning_rate": 3.441301906546065e-06, "loss": 0.6163, "step": 40220 }, { "epoch": 0.7306043876216766, "grad_norm": 0.8209237794247752, "learning_rate": 3.4369526452111924e-06, "loss": 0.6056, "step": 40230 }, { "epoch": 0.7307859944791515, "grad_norm": 0.8604082179127286, "learning_rate": 3.4326055636183784e-06, "loss": 0.6146, "step": 40240 }, { "epoch": 0.7309676013366264, "grad_norm": 0.8465568033943189, "learning_rate": 3.4282606632114004e-06, "loss": 0.6074, "step": 40250 }, { "epoch": 0.7311492081941015, "grad_norm": 0.855146077098048, "learning_rate": 3.4239179454332926e-06, "loss": 0.6053, "step": 40260 }, { "epoch": 0.7313308150515764, "grad_norm": 0.8727949881899629, "learning_rate": 3.4195774117263813e-06, "loss": 0.6094, "step": 40270 }, { "epoch": 0.7315124219090513, "grad_norm": 0.8450042036038062, "learning_rate": 3.415239063532253e-06, "loss": 0.6263, "step": 40280 }, { "epoch": 0.7316940287665262, "grad_norm": 0.861076871272253, "learning_rate": 3.4109029022917816e-06, "loss": 0.6101, "step": 40290 }, { "epoch": 0.7318756356240012, "grad_norm": 0.8557006550702182, "learning_rate": 3.4065689294451022e-06, "loss": 0.6222, "step": 40300 }, { "epoch": 0.7320572424814761, "grad_norm": 0.8122236265481408, "learning_rate": 3.402237146431633e-06, "loss": 0.5995, "step": 40310 }, { "epoch": 0.732238849338951, "grad_norm": 0.9333835391476447, "learning_rate": 3.3979075546900597e-06, "loss": 0.6369, "step": 40320 }, { "epoch": 0.7324204561964259, "grad_norm": 0.8574924763663878, "learning_rate": 3.393580155658337e-06, "loss": 0.619, "step": 40330 }, { "epoch": 0.732602063053901, "grad_norm": 0.8545649474777821, "learning_rate": 3.3892549507737025e-06, "loss": 0.6082, "step": 40340 }, { "epoch": 0.7327836699113759, "grad_norm": 0.7982702526487327, "learning_rate": 3.384931941472652e-06, "loss": 0.6069, "step": 40350 }, { "epoch": 0.7329652767688508, "grad_norm": 0.865057894757705, "learning_rate": 3.380611129190966e-06, "loss": 0.6214, "step": 40360 }, { "epoch": 0.7331468836263257, "grad_norm": 0.8489049002435408, "learning_rate": 3.3762925153636785e-06, "loss": 0.6098, "step": 40370 }, { "epoch": 0.7333284904838007, "grad_norm": 0.8288060558562985, "learning_rate": 3.3719761014251107e-06, "loss": 0.621, "step": 40380 }, { "epoch": 0.7335100973412756, "grad_norm": 0.867062206006629, "learning_rate": 3.3676618888088387e-06, "loss": 0.6244, "step": 40390 }, { "epoch": 0.7336917041987505, "grad_norm": 0.8378856856946334, "learning_rate": 3.3633498789477193e-06, "loss": 0.6144, "step": 40400 }, { "epoch": 0.7338733110562254, "grad_norm": 0.8657517997590792, "learning_rate": 3.359040073273866e-06, "loss": 0.6021, "step": 40410 }, { "epoch": 0.7340549179137004, "grad_norm": 0.8403679567505481, "learning_rate": 3.3547324732186728e-06, "loss": 0.6123, "step": 40420 }, { "epoch": 0.7342365247711754, "grad_norm": 0.828085120480767, "learning_rate": 3.3504270802127926e-06, "loss": 0.6108, "step": 40430 }, { "epoch": 0.7344181316286503, "grad_norm": 0.8250842849572894, "learning_rate": 3.346123895686142e-06, "loss": 0.6023, "step": 40440 }, { "epoch": 0.7345997384861253, "grad_norm": 0.836707707247803, "learning_rate": 3.341822921067919e-06, "loss": 0.6188, "step": 40450 }, { "epoch": 0.7347813453436002, "grad_norm": 0.8468734282202167, "learning_rate": 3.3375241577865693e-06, "loss": 0.6138, "step": 40460 }, { "epoch": 0.7349629522010751, "grad_norm": 0.8633597895060273, "learning_rate": 3.333227607269821e-06, "loss": 0.6112, "step": 40470 }, { "epoch": 0.73514455905855, "grad_norm": 0.8300404291154315, "learning_rate": 3.328933270944652e-06, "loss": 0.6094, "step": 40480 }, { "epoch": 0.735326165916025, "grad_norm": 0.8451129050182534, "learning_rate": 3.3246411502373186e-06, "loss": 0.6019, "step": 40490 }, { "epoch": 0.7355077727734999, "grad_norm": 0.8439580349251274, "learning_rate": 3.32035124657333e-06, "loss": 0.6071, "step": 40500 }, { "epoch": 0.7356893796309749, "grad_norm": 0.8510883174718725, "learning_rate": 3.316063561377468e-06, "loss": 0.6171, "step": 40510 }, { "epoch": 0.7358709864884498, "grad_norm": 0.8684899816116373, "learning_rate": 3.3117780960737723e-06, "loss": 0.6112, "step": 40520 }, { "epoch": 0.7360525933459248, "grad_norm": 0.8723125768884491, "learning_rate": 3.307494852085541e-06, "loss": 0.6196, "step": 40530 }, { "epoch": 0.7362342002033997, "grad_norm": 0.8631584262050446, "learning_rate": 3.303213830835349e-06, "loss": 0.6125, "step": 40540 }, { "epoch": 0.7364158070608746, "grad_norm": 0.8601021000508949, "learning_rate": 3.2989350337450152e-06, "loss": 0.6011, "step": 40550 }, { "epoch": 0.7365974139183495, "grad_norm": 0.8358517734944942, "learning_rate": 3.294658462235637e-06, "loss": 0.6177, "step": 40560 }, { "epoch": 0.7367790207758245, "grad_norm": 0.8556938731412452, "learning_rate": 3.2903841177275566e-06, "loss": 0.6139, "step": 40570 }, { "epoch": 0.7369606276332994, "grad_norm": 0.8440497455105791, "learning_rate": 3.2861120016403904e-06, "loss": 0.6228, "step": 40580 }, { "epoch": 0.7371422344907743, "grad_norm": 0.8616399708198046, "learning_rate": 3.2818421153930026e-06, "loss": 0.6069, "step": 40590 }, { "epoch": 0.7373238413482494, "grad_norm": 0.8205148901463023, "learning_rate": 3.2775744604035285e-06, "loss": 0.6036, "step": 40600 }, { "epoch": 0.7375054482057243, "grad_norm": 0.8194998896012502, "learning_rate": 3.2733090380893506e-06, "loss": 0.6085, "step": 40610 }, { "epoch": 0.7376870550631992, "grad_norm": 0.8620604481140002, "learning_rate": 3.2690458498671184e-06, "loss": 0.6221, "step": 40620 }, { "epoch": 0.7378686619206741, "grad_norm": 0.8762344025361098, "learning_rate": 3.2647848971527398e-06, "loss": 0.6248, "step": 40630 }, { "epoch": 0.738050268778149, "grad_norm": 0.8274058869621563, "learning_rate": 3.260526181361372e-06, "loss": 0.6084, "step": 40640 }, { "epoch": 0.738231875635624, "grad_norm": 0.8759653137387892, "learning_rate": 3.25626970390744e-06, "loss": 0.5995, "step": 40650 }, { "epoch": 0.7384134824930989, "grad_norm": 0.8333129691457158, "learning_rate": 3.2520154662046143e-06, "loss": 0.5978, "step": 40660 }, { "epoch": 0.7385950893505738, "grad_norm": 0.8196756225183116, "learning_rate": 3.2477634696658323e-06, "loss": 0.6167, "step": 40670 }, { "epoch": 0.7387766962080489, "grad_norm": 0.824654867872205, "learning_rate": 3.2435137157032748e-06, "loss": 0.6131, "step": 40680 }, { "epoch": 0.7389583030655238, "grad_norm": 0.897565776176338, "learning_rate": 3.2392662057283943e-06, "loss": 0.614, "step": 40690 }, { "epoch": 0.7391399099229987, "grad_norm": 0.8415924314305749, "learning_rate": 3.2350209411518785e-06, "loss": 0.5993, "step": 40700 }, { "epoch": 0.7393215167804736, "grad_norm": 0.8988383936378946, "learning_rate": 3.230777923383689e-06, "loss": 0.6253, "step": 40710 }, { "epoch": 0.7395031236379486, "grad_norm": 0.8757164344791512, "learning_rate": 3.226537153833026e-06, "loss": 0.6059, "step": 40720 }, { "epoch": 0.7396847304954235, "grad_norm": 0.8402414442538345, "learning_rate": 3.222298633908347e-06, "loss": 0.6113, "step": 40730 }, { "epoch": 0.7398663373528984, "grad_norm": 0.8680032085917974, "learning_rate": 3.218062365017369e-06, "loss": 0.6051, "step": 40740 }, { "epoch": 0.7400479442103733, "grad_norm": 0.8827034602077682, "learning_rate": 3.213828348567051e-06, "loss": 0.6143, "step": 40750 }, { "epoch": 0.7402295510678483, "grad_norm": 0.835173185209447, "learning_rate": 3.2095965859636147e-06, "loss": 0.5977, "step": 40760 }, { "epoch": 0.7404111579253233, "grad_norm": 0.8839875925651635, "learning_rate": 3.205367078612522e-06, "loss": 0.6098, "step": 40770 }, { "epoch": 0.7405927647827982, "grad_norm": 0.8470902053434205, "learning_rate": 3.201139827918499e-06, "loss": 0.6148, "step": 40780 }, { "epoch": 0.7407743716402732, "grad_norm": 0.8550086957516041, "learning_rate": 3.196914835285506e-06, "loss": 0.6125, "step": 40790 }, { "epoch": 0.7409559784977481, "grad_norm": 0.8759891235893378, "learning_rate": 3.192692102116771e-06, "loss": 0.6172, "step": 40800 }, { "epoch": 0.741137585355223, "grad_norm": 0.9264721470704042, "learning_rate": 3.1884716298147554e-06, "loss": 0.6142, "step": 40810 }, { "epoch": 0.7413191922126979, "grad_norm": 0.8408314668494319, "learning_rate": 3.1842534197811823e-06, "loss": 0.6086, "step": 40820 }, { "epoch": 0.7415007990701729, "grad_norm": 0.8284069473869802, "learning_rate": 3.180037473417017e-06, "loss": 0.6199, "step": 40830 }, { "epoch": 0.7416824059276478, "grad_norm": 0.822872231755064, "learning_rate": 3.17582379212247e-06, "loss": 0.6069, "step": 40840 }, { "epoch": 0.7418640127851228, "grad_norm": 0.8327740449390255, "learning_rate": 3.171612377297011e-06, "loss": 0.6047, "step": 40850 }, { "epoch": 0.7420456196425977, "grad_norm": 0.8441888114111641, "learning_rate": 3.167403230339342e-06, "loss": 0.6159, "step": 40860 }, { "epoch": 0.7422272265000727, "grad_norm": 0.884666007658653, "learning_rate": 3.1631963526474275e-06, "loss": 0.6078, "step": 40870 }, { "epoch": 0.7424088333575476, "grad_norm": 0.8770171727284733, "learning_rate": 3.1589917456184617e-06, "loss": 0.6071, "step": 40880 }, { "epoch": 0.7425904402150225, "grad_norm": 0.8295954854451945, "learning_rate": 3.154789410648902e-06, "loss": 0.609, "step": 40890 }, { "epoch": 0.7427720470724974, "grad_norm": 0.8321855622228265, "learning_rate": 3.1505893491344353e-06, "loss": 0.5984, "step": 40900 }, { "epoch": 0.7429536539299724, "grad_norm": 0.8698745641185182, "learning_rate": 3.146391562470006e-06, "loss": 0.6159, "step": 40910 }, { "epoch": 0.7431352607874473, "grad_norm": 0.8630182630332268, "learning_rate": 3.142196052049795e-06, "loss": 0.5996, "step": 40920 }, { "epoch": 0.7433168676449222, "grad_norm": 0.9016899744086525, "learning_rate": 3.1380028192672275e-06, "loss": 0.6211, "step": 40930 }, { "epoch": 0.7434984745023973, "grad_norm": 0.8718045268898112, "learning_rate": 3.1338118655149796e-06, "loss": 0.6192, "step": 40940 }, { "epoch": 0.7436800813598722, "grad_norm": 0.9001522585779803, "learning_rate": 3.1296231921849597e-06, "loss": 0.6113, "step": 40950 }, { "epoch": 0.7438616882173471, "grad_norm": 0.8431968313934554, "learning_rate": 3.1254368006683313e-06, "loss": 0.6194, "step": 40960 }, { "epoch": 0.744043295074822, "grad_norm": 0.8427566553400266, "learning_rate": 3.1212526923554853e-06, "loss": 0.6167, "step": 40970 }, { "epoch": 0.744224901932297, "grad_norm": 0.8204720219541308, "learning_rate": 3.1170708686360705e-06, "loss": 0.6023, "step": 40980 }, { "epoch": 0.7444065087897719, "grad_norm": 0.8429413587171719, "learning_rate": 3.1128913308989616e-06, "loss": 0.614, "step": 40990 }, { "epoch": 0.7445881156472468, "grad_norm": 0.8576693216617602, "learning_rate": 3.108714080532288e-06, "loss": 0.6079, "step": 41000 }, { "epoch": 0.7447697225047217, "grad_norm": 0.8439000033758094, "learning_rate": 3.104539118923405e-06, "loss": 0.6177, "step": 41010 }, { "epoch": 0.7449513293621968, "grad_norm": 0.8442528742820762, "learning_rate": 3.100366447458919e-06, "loss": 0.6241, "step": 41020 }, { "epoch": 0.7451329362196717, "grad_norm": 0.8517317616354337, "learning_rate": 3.096196067524676e-06, "loss": 0.6138, "step": 41030 }, { "epoch": 0.7453145430771466, "grad_norm": 0.81966156026847, "learning_rate": 3.0920279805057507e-06, "loss": 0.6162, "step": 41040 }, { "epoch": 0.7454961499346215, "grad_norm": 0.8576563384599413, "learning_rate": 3.087862187786468e-06, "loss": 0.6164, "step": 41050 }, { "epoch": 0.7456777567920965, "grad_norm": 0.8522546240873444, "learning_rate": 3.0836986907503796e-06, "loss": 0.6134, "step": 41060 }, { "epoch": 0.7458593636495714, "grad_norm": 0.8633130466921244, "learning_rate": 3.0795374907802865e-06, "loss": 0.6245, "step": 41070 }, { "epoch": 0.7460409705070463, "grad_norm": 0.8331431085685307, "learning_rate": 3.0753785892582154e-06, "loss": 0.6173, "step": 41080 }, { "epoch": 0.7462225773645212, "grad_norm": 0.8566960356206081, "learning_rate": 3.0712219875654412e-06, "loss": 0.6156, "step": 41090 }, { "epoch": 0.7464041842219962, "grad_norm": 0.8245711281254545, "learning_rate": 3.067067687082462e-06, "loss": 0.6014, "step": 41100 }, { "epoch": 0.7465857910794712, "grad_norm": 0.8726888534844933, "learning_rate": 3.0629156891890256e-06, "loss": 0.6166, "step": 41110 }, { "epoch": 0.7467673979369461, "grad_norm": 0.8363599577323112, "learning_rate": 3.0587659952641047e-06, "loss": 0.6237, "step": 41120 }, { "epoch": 0.7469490047944211, "grad_norm": 0.8200387259167795, "learning_rate": 3.0546186066859064e-06, "loss": 0.6061, "step": 41130 }, { "epoch": 0.747130611651896, "grad_norm": 0.8292234804577456, "learning_rate": 3.0504735248318816e-06, "loss": 0.602, "step": 41140 }, { "epoch": 0.7473122185093709, "grad_norm": 0.8883040400262642, "learning_rate": 3.0463307510787044e-06, "loss": 0.6118, "step": 41150 }, { "epoch": 0.7474938253668458, "grad_norm": 0.8638641596835259, "learning_rate": 3.0421902868022922e-06, "loss": 0.6092, "step": 41160 }, { "epoch": 0.7476754322243208, "grad_norm": 0.8788765386361546, "learning_rate": 3.0380521333777844e-06, "loss": 0.6153, "step": 41170 }, { "epoch": 0.7478570390817957, "grad_norm": 0.8447368907582822, "learning_rate": 3.033916292179566e-06, "loss": 0.6032, "step": 41180 }, { "epoch": 0.7480386459392707, "grad_norm": 0.8404709701323969, "learning_rate": 3.0297827645812394e-06, "loss": 0.606, "step": 41190 }, { "epoch": 0.7482202527967456, "grad_norm": 0.8635736545327864, "learning_rate": 3.025651551955654e-06, "loss": 0.6034, "step": 41200 }, { "epoch": 0.7484018596542206, "grad_norm": 0.887353394947505, "learning_rate": 3.0215226556748743e-06, "loss": 0.6274, "step": 41210 }, { "epoch": 0.7485834665116955, "grad_norm": 0.8908189270893377, "learning_rate": 3.017396077110212e-06, "loss": 0.6225, "step": 41220 }, { "epoch": 0.7487650733691704, "grad_norm": 0.8702619452328307, "learning_rate": 3.013271817632195e-06, "loss": 0.6151, "step": 41230 }, { "epoch": 0.7489466802266453, "grad_norm": 0.8326226003701725, "learning_rate": 3.0091498786105854e-06, "loss": 0.6195, "step": 41240 }, { "epoch": 0.7491282870841203, "grad_norm": 0.8520546843655319, "learning_rate": 3.005030261414382e-06, "loss": 0.6065, "step": 41250 }, { "epoch": 0.7493098939415952, "grad_norm": 0.8660859545958187, "learning_rate": 3.0009129674118e-06, "loss": 0.6283, "step": 41260 }, { "epoch": 0.7494915007990701, "grad_norm": 0.8176032962192333, "learning_rate": 2.9967979979702945e-06, "loss": 0.6131, "step": 41270 }, { "epoch": 0.7496731076565452, "grad_norm": 0.8614771347745558, "learning_rate": 2.9926853544565382e-06, "loss": 0.6261, "step": 41280 }, { "epoch": 0.7498547145140201, "grad_norm": 0.8651205031076232, "learning_rate": 2.9885750382364433e-06, "loss": 0.6289, "step": 41290 }, { "epoch": 0.750036321371495, "grad_norm": 0.8614289641087639, "learning_rate": 2.984467050675136e-06, "loss": 0.6163, "step": 41300 }, { "epoch": 0.7502179282289699, "grad_norm": 0.8394076075895556, "learning_rate": 2.98036139313698e-06, "loss": 0.6031, "step": 41310 }, { "epoch": 0.7503995350864449, "grad_norm": 0.8585461793779071, "learning_rate": 2.976258066985559e-06, "loss": 0.6049, "step": 41320 }, { "epoch": 0.7505811419439198, "grad_norm": 0.8629909243317805, "learning_rate": 2.9721570735836804e-06, "loss": 0.6155, "step": 41330 }, { "epoch": 0.7507627488013947, "grad_norm": 0.8431583281817255, "learning_rate": 2.9680584142933857e-06, "loss": 0.6201, "step": 41340 }, { "epoch": 0.7509443556588696, "grad_norm": 0.8544446355177603, "learning_rate": 2.9639620904759315e-06, "loss": 0.6137, "step": 41350 }, { "epoch": 0.7511259625163447, "grad_norm": 0.8715619366941179, "learning_rate": 2.9598681034918075e-06, "loss": 0.6247, "step": 41360 }, { "epoch": 0.7513075693738196, "grad_norm": 0.8540607379267016, "learning_rate": 2.9557764547007174e-06, "loss": 0.615, "step": 41370 }, { "epoch": 0.7514891762312945, "grad_norm": 0.831699591127592, "learning_rate": 2.951687145461599e-06, "loss": 0.6083, "step": 41380 }, { "epoch": 0.7516707830887694, "grad_norm": 0.7993892781566457, "learning_rate": 2.9476001771326048e-06, "loss": 0.5971, "step": 41390 }, { "epoch": 0.7518523899462444, "grad_norm": 0.8487282863787702, "learning_rate": 2.9435155510711157e-06, "loss": 0.5996, "step": 41400 }, { "epoch": 0.7520339968037193, "grad_norm": 0.8339931556121227, "learning_rate": 2.939433268633727e-06, "loss": 0.61, "step": 41410 }, { "epoch": 0.7522156036611942, "grad_norm": 0.8601790125853136, "learning_rate": 2.9353533311762626e-06, "loss": 0.6073, "step": 41420 }, { "epoch": 0.7523972105186691, "grad_norm": 0.8404875897296348, "learning_rate": 2.9312757400537704e-06, "loss": 0.6153, "step": 41430 }, { "epoch": 0.7525788173761441, "grad_norm": 0.8467041007923215, "learning_rate": 2.927200496620507e-06, "loss": 0.6024, "step": 41440 }, { "epoch": 0.7527604242336191, "grad_norm": 0.8416127729478489, "learning_rate": 2.9231276022299626e-06, "loss": 0.6197, "step": 41450 }, { "epoch": 0.752942031091094, "grad_norm": 0.8321057213098895, "learning_rate": 2.9190570582348352e-06, "loss": 0.6082, "step": 41460 }, { "epoch": 0.753123637948569, "grad_norm": 0.8531431024039675, "learning_rate": 2.914988865987054e-06, "loss": 0.6079, "step": 41470 }, { "epoch": 0.7533052448060439, "grad_norm": 0.8452613283381708, "learning_rate": 2.9109230268377544e-06, "loss": 0.6192, "step": 41480 }, { "epoch": 0.7534868516635188, "grad_norm": 0.8654128676556594, "learning_rate": 2.9068595421373024e-06, "loss": 0.6193, "step": 41490 }, { "epoch": 0.7536684585209937, "grad_norm": 0.8544535973043049, "learning_rate": 2.9027984132352728e-06, "loss": 0.6018, "step": 41500 }, { "epoch": 0.7538500653784687, "grad_norm": 0.8407678185366516, "learning_rate": 2.8987396414804667e-06, "loss": 0.6219, "step": 41510 }, { "epoch": 0.7540316722359436, "grad_norm": 0.8866414621426268, "learning_rate": 2.894683228220895e-06, "loss": 0.6157, "step": 41520 }, { "epoch": 0.7542132790934186, "grad_norm": 0.8196061009638257, "learning_rate": 2.890629174803783e-06, "loss": 0.633, "step": 41530 }, { "epoch": 0.7543948859508935, "grad_norm": 0.8429744458961826, "learning_rate": 2.8865774825755854e-06, "loss": 0.607, "step": 41540 }, { "epoch": 0.7545764928083685, "grad_norm": 0.8914299834239152, "learning_rate": 2.882528152881956e-06, "loss": 0.6029, "step": 41550 }, { "epoch": 0.7547580996658434, "grad_norm": 0.8486689468892001, "learning_rate": 2.8784811870677797e-06, "loss": 0.6058, "step": 41560 }, { "epoch": 0.7549397065233183, "grad_norm": 0.8538990035061602, "learning_rate": 2.874436586477143e-06, "loss": 0.6144, "step": 41570 }, { "epoch": 0.7551213133807932, "grad_norm": 0.8805102247378195, "learning_rate": 2.8703943524533582e-06, "loss": 0.6122, "step": 41580 }, { "epoch": 0.7553029202382682, "grad_norm": 0.8468884525536698, "learning_rate": 2.866354486338939e-06, "loss": 0.5923, "step": 41590 }, { "epoch": 0.7554845270957431, "grad_norm": 0.8466568716460677, "learning_rate": 2.8623169894756274e-06, "loss": 0.5987, "step": 41600 }, { "epoch": 0.755666133953218, "grad_norm": 0.8118319999945756, "learning_rate": 2.858281863204365e-06, "loss": 0.6135, "step": 41610 }, { "epoch": 0.7558477408106931, "grad_norm": 0.9043422120754896, "learning_rate": 2.854249108865317e-06, "loss": 0.6113, "step": 41620 }, { "epoch": 0.756029347668168, "grad_norm": 0.8564851335065465, "learning_rate": 2.850218727797852e-06, "loss": 0.614, "step": 41630 }, { "epoch": 0.7562109545256429, "grad_norm": 0.8604967030135766, "learning_rate": 2.8461907213405526e-06, "loss": 0.6118, "step": 41640 }, { "epoch": 0.7563925613831178, "grad_norm": 0.8377107536723686, "learning_rate": 2.8421650908312204e-06, "loss": 0.6024, "step": 41650 }, { "epoch": 0.7565741682405928, "grad_norm": 0.8421498647070417, "learning_rate": 2.8381418376068547e-06, "loss": 0.6011, "step": 41660 }, { "epoch": 0.7567557750980677, "grad_norm": 0.8453314260455536, "learning_rate": 2.8341209630036783e-06, "loss": 0.6383, "step": 41670 }, { "epoch": 0.7569373819555426, "grad_norm": 0.8660694661310104, "learning_rate": 2.830102468357113e-06, "loss": 0.6134, "step": 41680 }, { "epoch": 0.7571189888130175, "grad_norm": 0.8550807600977846, "learning_rate": 2.8260863550018013e-06, "loss": 0.6206, "step": 41690 }, { "epoch": 0.7573005956704926, "grad_norm": 0.8569958098451079, "learning_rate": 2.822072624271581e-06, "loss": 0.6145, "step": 41700 }, { "epoch": 0.7574822025279675, "grad_norm": 0.8389860312757451, "learning_rate": 2.818061277499514e-06, "loss": 0.611, "step": 41710 }, { "epoch": 0.7576638093854424, "grad_norm": 0.8388394436028659, "learning_rate": 2.8140523160178556e-06, "loss": 0.6125, "step": 41720 }, { "epoch": 0.7578454162429173, "grad_norm": 0.832121198071436, "learning_rate": 2.810045741158084e-06, "loss": 0.6044, "step": 41730 }, { "epoch": 0.7580270231003923, "grad_norm": 0.8631282316521659, "learning_rate": 2.8060415542508713e-06, "loss": 0.6111, "step": 41740 }, { "epoch": 0.7582086299578672, "grad_norm": 0.8487567253849236, "learning_rate": 2.8020397566260995e-06, "loss": 0.6101, "step": 41750 }, { "epoch": 0.7583902368153421, "grad_norm": 0.858047820193116, "learning_rate": 2.798040349612866e-06, "loss": 0.612, "step": 41760 }, { "epoch": 0.758571843672817, "grad_norm": 0.8620011558961335, "learning_rate": 2.7940433345394623e-06, "loss": 0.5973, "step": 41770 }, { "epoch": 0.758753450530292, "grad_norm": 0.8357513197857993, "learning_rate": 2.790048712733395e-06, "loss": 0.6121, "step": 41780 }, { "epoch": 0.758935057387767, "grad_norm": 0.8941680221080859, "learning_rate": 2.786056485521369e-06, "loss": 0.6311, "step": 41790 }, { "epoch": 0.7591166642452419, "grad_norm": 0.8623627175180844, "learning_rate": 2.7820666542292994e-06, "loss": 0.602, "step": 41800 }, { "epoch": 0.7592982711027169, "grad_norm": 0.8578121974456001, "learning_rate": 2.778079220182298e-06, "loss": 0.6045, "step": 41810 }, { "epoch": 0.7594798779601918, "grad_norm": 0.8424772390026786, "learning_rate": 2.7740941847046885e-06, "loss": 0.6128, "step": 41820 }, { "epoch": 0.7596614848176667, "grad_norm": 0.8751216482472739, "learning_rate": 2.770111549119998e-06, "loss": 0.6116, "step": 41830 }, { "epoch": 0.7598430916751416, "grad_norm": 0.8417634332449628, "learning_rate": 2.766131314750947e-06, "loss": 0.6106, "step": 41840 }, { "epoch": 0.7600246985326166, "grad_norm": 0.8236829801387969, "learning_rate": 2.7621534829194695e-06, "loss": 0.5997, "step": 41850 }, { "epoch": 0.7602063053900915, "grad_norm": 0.8195705929885048, "learning_rate": 2.758178054946693e-06, "loss": 0.6145, "step": 41860 }, { "epoch": 0.7603879122475665, "grad_norm": 0.849324055051107, "learning_rate": 2.7542050321529545e-06, "loss": 0.6072, "step": 41870 }, { "epoch": 0.7605695191050414, "grad_norm": 0.8330347041628838, "learning_rate": 2.750234415857783e-06, "loss": 0.6199, "step": 41880 }, { "epoch": 0.7607511259625164, "grad_norm": 0.8732597020021138, "learning_rate": 2.7462662073799195e-06, "loss": 0.6037, "step": 41890 }, { "epoch": 0.7609327328199913, "grad_norm": 0.8693822699303784, "learning_rate": 2.7423004080372938e-06, "loss": 0.6116, "step": 41900 }, { "epoch": 0.7611143396774662, "grad_norm": 0.8588174067290277, "learning_rate": 2.7383370191470447e-06, "loss": 0.6122, "step": 41910 }, { "epoch": 0.7612959465349411, "grad_norm": 0.8905376325945332, "learning_rate": 2.7343760420255026e-06, "loss": 0.6128, "step": 41920 }, { "epoch": 0.7614775533924161, "grad_norm": 0.8391282552290213, "learning_rate": 2.7304174779882066e-06, "loss": 0.6014, "step": 41930 }, { "epoch": 0.761659160249891, "grad_norm": 0.8819597637832177, "learning_rate": 2.726461328349885e-06, "loss": 0.6218, "step": 41940 }, { "epoch": 0.7618407671073659, "grad_norm": 0.8687742636052329, "learning_rate": 2.722507594424466e-06, "loss": 0.6008, "step": 41950 }, { "epoch": 0.762022373964841, "grad_norm": 0.8731632726094326, "learning_rate": 2.7185562775250827e-06, "loss": 0.6029, "step": 41960 }, { "epoch": 0.7622039808223159, "grad_norm": 0.8532667910082731, "learning_rate": 2.714607378964055e-06, "loss": 0.6047, "step": 41970 }, { "epoch": 0.7623855876797908, "grad_norm": 0.8465477067949603, "learning_rate": 2.7106609000529105e-06, "loss": 0.6178, "step": 41980 }, { "epoch": 0.7625671945372657, "grad_norm": 0.8786096987453504, "learning_rate": 2.7067168421023603e-06, "loss": 0.6103, "step": 41990 }, { "epoch": 0.7627488013947407, "grad_norm": 0.8535811089603498, "learning_rate": 2.702775206422326e-06, "loss": 0.6093, "step": 42000 }, { "epoch": 0.7629304082522156, "grad_norm": 0.8329076461805953, "learning_rate": 2.69883599432191e-06, "loss": 0.5976, "step": 42010 }, { "epoch": 0.7631120151096905, "grad_norm": 0.8797252157701042, "learning_rate": 2.694899207109424e-06, "loss": 0.6136, "step": 42020 }, { "epoch": 0.7632936219671654, "grad_norm": 0.841799320560487, "learning_rate": 2.6909648460923655e-06, "loss": 0.6146, "step": 42030 }, { "epoch": 0.7634752288246405, "grad_norm": 0.8041659431144416, "learning_rate": 2.687032912577423e-06, "loss": 0.6044, "step": 42040 }, { "epoch": 0.7636568356821154, "grad_norm": 0.8249960392229445, "learning_rate": 2.683103407870491e-06, "loss": 0.5992, "step": 42050 }, { "epoch": 0.7638384425395903, "grad_norm": 0.8372227038883011, "learning_rate": 2.679176333276644e-06, "loss": 0.5966, "step": 42060 }, { "epoch": 0.7640200493970652, "grad_norm": 0.8509446722238306, "learning_rate": 2.6752516901001624e-06, "loss": 0.6183, "step": 42070 }, { "epoch": 0.7642016562545402, "grad_norm": 0.8392964605348509, "learning_rate": 2.6713294796445053e-06, "loss": 0.6096, "step": 42080 }, { "epoch": 0.7643832631120151, "grad_norm": 0.7973193291400639, "learning_rate": 2.667409703212337e-06, "loss": 0.6101, "step": 42090 }, { "epoch": 0.76456486996949, "grad_norm": 0.8341462297221808, "learning_rate": 2.663492362105502e-06, "loss": 0.5964, "step": 42100 }, { "epoch": 0.764746476826965, "grad_norm": 0.9107859885083724, "learning_rate": 2.6595774576250466e-06, "loss": 0.6209, "step": 42110 }, { "epoch": 0.7649280836844399, "grad_norm": 0.8965179888181094, "learning_rate": 2.655664991071196e-06, "loss": 0.6377, "step": 42120 }, { "epoch": 0.7651096905419149, "grad_norm": 0.8394112664611643, "learning_rate": 2.6517549637433794e-06, "loss": 0.6169, "step": 42130 }, { "epoch": 0.7652912973993898, "grad_norm": 0.8370676579170172, "learning_rate": 2.6478473769402047e-06, "loss": 0.6049, "step": 42140 }, { "epoch": 0.7654729042568648, "grad_norm": 0.8461495996272517, "learning_rate": 2.64394223195947e-06, "loss": 0.603, "step": 42150 }, { "epoch": 0.7656545111143397, "grad_norm": 0.8521150787080343, "learning_rate": 2.6400395300981717e-06, "loss": 0.5981, "step": 42160 }, { "epoch": 0.7658361179718146, "grad_norm": 0.8521179717383704, "learning_rate": 2.636139272652485e-06, "loss": 0.5976, "step": 42170 }, { "epoch": 0.7660177248292895, "grad_norm": 0.9079349783615169, "learning_rate": 2.6322414609177793e-06, "loss": 0.607, "step": 42180 }, { "epoch": 0.7661993316867645, "grad_norm": 0.8762839952746156, "learning_rate": 2.6283460961886053e-06, "loss": 0.619, "step": 42190 }, { "epoch": 0.7663809385442394, "grad_norm": 0.8698077215227665, "learning_rate": 2.6244531797587112e-06, "loss": 0.6144, "step": 42200 }, { "epoch": 0.7665625454017144, "grad_norm": 0.8489052765828811, "learning_rate": 2.62056271292102e-06, "loss": 0.6097, "step": 42210 }, { "epoch": 0.7667441522591893, "grad_norm": 0.842635982819226, "learning_rate": 2.6166746969676483e-06, "loss": 0.6135, "step": 42220 }, { "epoch": 0.7669257591166643, "grad_norm": 0.8435706707264917, "learning_rate": 2.6127891331899023e-06, "loss": 0.6125, "step": 42230 }, { "epoch": 0.7671073659741392, "grad_norm": 0.8939633164252948, "learning_rate": 2.6089060228782624e-06, "loss": 0.5995, "step": 42240 }, { "epoch": 0.7672889728316141, "grad_norm": 0.8790349621935174, "learning_rate": 2.605025367322407e-06, "loss": 0.6074, "step": 42250 }, { "epoch": 0.767470579689089, "grad_norm": 0.8776188298336416, "learning_rate": 2.6011471678111866e-06, "loss": 0.5944, "step": 42260 }, { "epoch": 0.767652186546564, "grad_norm": 0.8530062486578288, "learning_rate": 2.597271425632647e-06, "loss": 0.611, "step": 42270 }, { "epoch": 0.7678337934040389, "grad_norm": 0.8289572212501608, "learning_rate": 2.5933981420740094e-06, "loss": 0.6074, "step": 42280 }, { "epoch": 0.7680154002615138, "grad_norm": 0.8909279667572056, "learning_rate": 2.589527318421686e-06, "loss": 0.6188, "step": 42290 }, { "epoch": 0.7681970071189889, "grad_norm": 0.83539493689141, "learning_rate": 2.585658955961264e-06, "loss": 0.6066, "step": 42300 }, { "epoch": 0.7683786139764638, "grad_norm": 0.8464759991838909, "learning_rate": 2.581793055977523e-06, "loss": 0.607, "step": 42310 }, { "epoch": 0.7685602208339387, "grad_norm": 0.843264928097793, "learning_rate": 2.5779296197544123e-06, "loss": 0.6058, "step": 42320 }, { "epoch": 0.7687418276914136, "grad_norm": 0.8712394149282097, "learning_rate": 2.5740686485750768e-06, "loss": 0.6096, "step": 42330 }, { "epoch": 0.7689234345488886, "grad_norm": 0.8511386949178242, "learning_rate": 2.5702101437218318e-06, "loss": 0.5978, "step": 42340 }, { "epoch": 0.7691050414063635, "grad_norm": 0.8922747424454264, "learning_rate": 2.5663541064761743e-06, "loss": 0.6049, "step": 42350 }, { "epoch": 0.7692866482638384, "grad_norm": 0.8554803578081345, "learning_rate": 2.5625005381187918e-06, "loss": 0.6076, "step": 42360 }, { "epoch": 0.7694682551213133, "grad_norm": 0.8441651309701441, "learning_rate": 2.558649439929537e-06, "loss": 0.604, "step": 42370 }, { "epoch": 0.7696498619787884, "grad_norm": 0.84406064893888, "learning_rate": 2.554800813187458e-06, "loss": 0.6171, "step": 42380 }, { "epoch": 0.7698314688362633, "grad_norm": 0.8697854159985043, "learning_rate": 2.550954659170766e-06, "loss": 0.6107, "step": 42390 }, { "epoch": 0.7700130756937382, "grad_norm": 0.8621250333685754, "learning_rate": 2.547110979156867e-06, "loss": 0.5911, "step": 42400 }, { "epoch": 0.7701946825512132, "grad_norm": 0.891770044223293, "learning_rate": 2.54326977442233e-06, "loss": 0.6044, "step": 42410 }, { "epoch": 0.7703762894086881, "grad_norm": 0.8711493065883501, "learning_rate": 2.5394310462429163e-06, "loss": 0.607, "step": 42420 }, { "epoch": 0.770557896266163, "grad_norm": 0.9484218955242201, "learning_rate": 2.5355947958935535e-06, "loss": 0.6148, "step": 42430 }, { "epoch": 0.7707395031236379, "grad_norm": 0.8695999004842905, "learning_rate": 2.5317610246483484e-06, "loss": 0.6128, "step": 42440 }, { "epoch": 0.7709211099811129, "grad_norm": 0.8911400936523396, "learning_rate": 2.527929733780592e-06, "loss": 0.6046, "step": 42450 }, { "epoch": 0.7711027168385878, "grad_norm": 0.8313553408482462, "learning_rate": 2.524100924562739e-06, "loss": 0.603, "step": 42460 }, { "epoch": 0.7712843236960628, "grad_norm": 0.8195187547097768, "learning_rate": 2.5202745982664344e-06, "loss": 0.5967, "step": 42470 }, { "epoch": 0.7714659305535377, "grad_norm": 0.8576610055879809, "learning_rate": 2.516450756162484e-06, "loss": 0.616, "step": 42480 }, { "epoch": 0.7716475374110127, "grad_norm": 0.8232639386829654, "learning_rate": 2.5126293995208815e-06, "loss": 0.6093, "step": 42490 }, { "epoch": 0.7718291442684876, "grad_norm": 0.8620055815436615, "learning_rate": 2.5088105296107844e-06, "loss": 0.6094, "step": 42500 }, { "epoch": 0.7720107511259625, "grad_norm": 0.8198392160965798, "learning_rate": 2.5049941477005325e-06, "loss": 0.6011, "step": 42510 }, { "epoch": 0.7721923579834374, "grad_norm": 0.8537643327114123, "learning_rate": 2.5011802550576325e-06, "loss": 0.6009, "step": 42520 }, { "epoch": 0.7723739648409124, "grad_norm": 0.8245085020302921, "learning_rate": 2.4973688529487714e-06, "loss": 0.5951, "step": 42530 }, { "epoch": 0.7725555716983873, "grad_norm": 0.8820018955519385, "learning_rate": 2.493559942639805e-06, "loss": 0.6229, "step": 42540 }, { "epoch": 0.7727371785558622, "grad_norm": 0.8067436352861355, "learning_rate": 2.489753525395756e-06, "loss": 0.6043, "step": 42550 }, { "epoch": 0.7729187854133373, "grad_norm": 0.9025801819591409, "learning_rate": 2.4859496024808326e-06, "loss": 0.6124, "step": 42560 }, { "epoch": 0.7731003922708122, "grad_norm": 0.9116045278840991, "learning_rate": 2.4821481751583997e-06, "loss": 0.615, "step": 42570 }, { "epoch": 0.7732819991282871, "grad_norm": 0.8432300600533235, "learning_rate": 2.4783492446910073e-06, "loss": 0.5982, "step": 42580 }, { "epoch": 0.773463605985762, "grad_norm": 0.8516558957581132, "learning_rate": 2.474552812340364e-06, "loss": 0.6089, "step": 42590 }, { "epoch": 0.773645212843237, "grad_norm": 0.8560372310907105, "learning_rate": 2.4707588793673588e-06, "loss": 0.6115, "step": 42600 }, { "epoch": 0.7738268197007119, "grad_norm": 0.8535455357364313, "learning_rate": 2.4669674470320403e-06, "loss": 0.6112, "step": 42610 }, { "epoch": 0.7740084265581868, "grad_norm": 0.891565257028944, "learning_rate": 2.463178516593635e-06, "loss": 0.6035, "step": 42620 }, { "epoch": 0.7741900334156617, "grad_norm": 0.8097565558615856, "learning_rate": 2.4593920893105393e-06, "loss": 0.6064, "step": 42630 }, { "epoch": 0.7743716402731368, "grad_norm": 0.9042115119272081, "learning_rate": 2.4556081664403085e-06, "loss": 0.608, "step": 42640 }, { "epoch": 0.7745532471306117, "grad_norm": 0.8330073529464165, "learning_rate": 2.4518267492396776e-06, "loss": 0.5926, "step": 42650 }, { "epoch": 0.7747348539880866, "grad_norm": 0.8403431391433172, "learning_rate": 2.448047838964539e-06, "loss": 0.6013, "step": 42660 }, { "epoch": 0.7749164608455615, "grad_norm": 0.8548044162836359, "learning_rate": 2.4442714368699615e-06, "loss": 0.6167, "step": 42670 }, { "epoch": 0.7750980677030365, "grad_norm": 0.8942378236292908, "learning_rate": 2.440497544210173e-06, "loss": 0.6024, "step": 42680 }, { "epoch": 0.7752796745605114, "grad_norm": 0.8767720711727579, "learning_rate": 2.4367261622385763e-06, "loss": 0.61, "step": 42690 }, { "epoch": 0.7754612814179863, "grad_norm": 0.8837876016688238, "learning_rate": 2.4329572922077318e-06, "loss": 0.6237, "step": 42700 }, { "epoch": 0.7756428882754612, "grad_norm": 0.9019499116449099, "learning_rate": 2.429190935369373e-06, "loss": 0.6035, "step": 42710 }, { "epoch": 0.7758244951329362, "grad_norm": 0.8749971350230255, "learning_rate": 2.4254270929743917e-06, "loss": 0.6118, "step": 42720 }, { "epoch": 0.7760061019904112, "grad_norm": 0.8385641626276965, "learning_rate": 2.4216657662728536e-06, "loss": 0.6074, "step": 42730 }, { "epoch": 0.7761877088478861, "grad_norm": 0.8648835540318451, "learning_rate": 2.41790695651398e-06, "loss": 0.6184, "step": 42740 }, { "epoch": 0.776369315705361, "grad_norm": 0.8879049393500914, "learning_rate": 2.4141506649461577e-06, "loss": 0.608, "step": 42750 }, { "epoch": 0.776550922562836, "grad_norm": 0.865472623442833, "learning_rate": 2.410396892816944e-06, "loss": 0.6173, "step": 42760 }, { "epoch": 0.7767325294203109, "grad_norm": 0.8748829754888832, "learning_rate": 2.40664564137305e-06, "loss": 0.6078, "step": 42770 }, { "epoch": 0.7769141362777858, "grad_norm": 0.8788163810667616, "learning_rate": 2.4028969118603595e-06, "loss": 0.6259, "step": 42780 }, { "epoch": 0.7770957431352608, "grad_norm": 0.8442314581660579, "learning_rate": 2.3991507055239094e-06, "loss": 0.6068, "step": 42790 }, { "epoch": 0.7772773499927357, "grad_norm": 0.8524547702137948, "learning_rate": 2.3954070236079064e-06, "loss": 0.6057, "step": 42800 }, { "epoch": 0.7774589568502107, "grad_norm": 0.8210273310725962, "learning_rate": 2.3916658673557092e-06, "loss": 0.6057, "step": 42810 }, { "epoch": 0.7776405637076856, "grad_norm": 0.8522212650450861, "learning_rate": 2.387927238009852e-06, "loss": 0.6069, "step": 42820 }, { "epoch": 0.7778221705651606, "grad_norm": 0.8200241801920872, "learning_rate": 2.384191136812016e-06, "loss": 0.6068, "step": 42830 }, { "epoch": 0.7780037774226355, "grad_norm": 0.8304788145921111, "learning_rate": 2.380457565003046e-06, "loss": 0.6153, "step": 42840 }, { "epoch": 0.7781853842801104, "grad_norm": 0.8597065497458305, "learning_rate": 2.376726523822954e-06, "loss": 0.6063, "step": 42850 }, { "epoch": 0.7783669911375853, "grad_norm": 0.8172841942235521, "learning_rate": 2.372998014510902e-06, "loss": 0.6087, "step": 42860 }, { "epoch": 0.7785485979950603, "grad_norm": 0.886786641324876, "learning_rate": 2.36927203830522e-06, "loss": 0.6137, "step": 42870 }, { "epoch": 0.7787302048525352, "grad_norm": 0.8192068482408121, "learning_rate": 2.3655485964433876e-06, "loss": 0.5983, "step": 42880 }, { "epoch": 0.7789118117100101, "grad_norm": 0.8533543336121987, "learning_rate": 2.3618276901620516e-06, "loss": 0.6075, "step": 42890 }, { "epoch": 0.7790934185674852, "grad_norm": 0.8783646919603804, "learning_rate": 2.358109320697007e-06, "loss": 0.6138, "step": 42900 }, { "epoch": 0.7792750254249601, "grad_norm": 0.878617055691604, "learning_rate": 2.354393489283219e-06, "loss": 0.6001, "step": 42910 }, { "epoch": 0.779456632282435, "grad_norm": 0.8695758347670324, "learning_rate": 2.3506801971547934e-06, "loss": 0.6014, "step": 42920 }, { "epoch": 0.7796382391399099, "grad_norm": 0.8853997055993276, "learning_rate": 2.34696944554501e-06, "loss": 0.6042, "step": 42930 }, { "epoch": 0.7798198459973849, "grad_norm": 0.8825140609247758, "learning_rate": 2.3432612356862917e-06, "loss": 0.6109, "step": 42940 }, { "epoch": 0.7800014528548598, "grad_norm": 0.8361755664766649, "learning_rate": 2.339555568810221e-06, "loss": 0.588, "step": 42950 }, { "epoch": 0.7801830597123347, "grad_norm": 0.8830956017461614, "learning_rate": 2.3358524461475417e-06, "loss": 0.6125, "step": 42960 }, { "epoch": 0.7803646665698096, "grad_norm": 0.8344704837390223, "learning_rate": 2.332151868928142e-06, "loss": 0.6143, "step": 42970 }, { "epoch": 0.7805462734272847, "grad_norm": 0.8502185547332424, "learning_rate": 2.3284538383810774e-06, "loss": 0.6091, "step": 42980 }, { "epoch": 0.7807278802847596, "grad_norm": 0.8366452012394872, "learning_rate": 2.3247583557345423e-06, "loss": 0.6081, "step": 42990 }, { "epoch": 0.7809094871422345, "grad_norm": 0.8645257181623275, "learning_rate": 2.3210654222159013e-06, "loss": 0.6085, "step": 43000 }, { "epoch": 0.7810910939997094, "grad_norm": 0.8722043241773308, "learning_rate": 2.317375039051657e-06, "loss": 0.6037, "step": 43010 }, { "epoch": 0.7812727008571844, "grad_norm": 0.8377829441519745, "learning_rate": 2.3136872074674765e-06, "loss": 0.5958, "step": 43020 }, { "epoch": 0.7814543077146593, "grad_norm": 0.8526478064208896, "learning_rate": 2.310001928688177e-06, "loss": 0.6127, "step": 43030 }, { "epoch": 0.7816359145721342, "grad_norm": 0.8320637593214786, "learning_rate": 2.3063192039377215e-06, "loss": 0.6158, "step": 43040 }, { "epoch": 0.7818175214296091, "grad_norm": 0.8279530778413918, "learning_rate": 2.3026390344392346e-06, "loss": 0.5973, "step": 43050 }, { "epoch": 0.7819991282870841, "grad_norm": 0.8639394995065489, "learning_rate": 2.29896142141498e-06, "loss": 0.603, "step": 43060 }, { "epoch": 0.7821807351445591, "grad_norm": 0.864094766809671, "learning_rate": 2.2952863660863865e-06, "loss": 0.6143, "step": 43070 }, { "epoch": 0.782362342002034, "grad_norm": 0.8581765592816405, "learning_rate": 2.2916138696740197e-06, "loss": 0.6049, "step": 43080 }, { "epoch": 0.782543948859509, "grad_norm": 0.832904899180358, "learning_rate": 2.2879439333976084e-06, "loss": 0.6115, "step": 43090 }, { "epoch": 0.7827255557169839, "grad_norm": 0.8335635677376747, "learning_rate": 2.2842765584760183e-06, "loss": 0.6016, "step": 43100 }, { "epoch": 0.7829071625744588, "grad_norm": 0.8547023662480621, "learning_rate": 2.280611746127276e-06, "loss": 0.603, "step": 43110 }, { "epoch": 0.7830887694319337, "grad_norm": 0.8669887450677867, "learning_rate": 2.2769494975685468e-06, "loss": 0.6193, "step": 43120 }, { "epoch": 0.7832703762894087, "grad_norm": 0.9641007376366927, "learning_rate": 2.273289814016154e-06, "loss": 0.6072, "step": 43130 }, { "epoch": 0.7834519831468836, "grad_norm": 0.8376822822747028, "learning_rate": 2.269632696685563e-06, "loss": 0.6045, "step": 43140 }, { "epoch": 0.7836335900043586, "grad_norm": 0.831804773539668, "learning_rate": 2.2659781467913855e-06, "loss": 0.6099, "step": 43150 }, { "epoch": 0.7838151968618335, "grad_norm": 0.912039160145697, "learning_rate": 2.2623261655473873e-06, "loss": 0.6155, "step": 43160 }, { "epoch": 0.7839968037193085, "grad_norm": 0.8479991831141638, "learning_rate": 2.2586767541664723e-06, "loss": 0.6009, "step": 43170 }, { "epoch": 0.7841784105767834, "grad_norm": 0.8265042978214484, "learning_rate": 2.255029913860701e-06, "loss": 0.6049, "step": 43180 }, { "epoch": 0.7843600174342583, "grad_norm": 0.8333032933930744, "learning_rate": 2.2513856458412707e-06, "loss": 0.6026, "step": 43190 }, { "epoch": 0.7845416242917332, "grad_norm": 0.8817343647446039, "learning_rate": 2.247743951318532e-06, "loss": 0.6171, "step": 43200 }, { "epoch": 0.7847232311492082, "grad_norm": 0.837770435226927, "learning_rate": 2.244104831501972e-06, "loss": 0.6039, "step": 43210 }, { "epoch": 0.7849048380066831, "grad_norm": 0.8563209007102893, "learning_rate": 2.2404682876002328e-06, "loss": 0.6003, "step": 43220 }, { "epoch": 0.785086444864158, "grad_norm": 0.863693760687141, "learning_rate": 2.236834320821095e-06, "loss": 0.6047, "step": 43230 }, { "epoch": 0.785268051721633, "grad_norm": 0.851893826536159, "learning_rate": 2.23320293237148e-06, "loss": 0.6162, "step": 43240 }, { "epoch": 0.785449658579108, "grad_norm": 0.8192762427825648, "learning_rate": 2.229574123457463e-06, "loss": 0.6156, "step": 43250 }, { "epoch": 0.7856312654365829, "grad_norm": 0.8390563039164545, "learning_rate": 2.225947895284252e-06, "loss": 0.6095, "step": 43260 }, { "epoch": 0.7858128722940578, "grad_norm": 0.868726878089366, "learning_rate": 2.222324249056207e-06, "loss": 0.6021, "step": 43270 }, { "epoch": 0.7859944791515328, "grad_norm": 0.864592601873997, "learning_rate": 2.2187031859768205e-06, "loss": 0.6166, "step": 43280 }, { "epoch": 0.7861760860090077, "grad_norm": 0.8881235563398516, "learning_rate": 2.215084707248738e-06, "loss": 0.6084, "step": 43290 }, { "epoch": 0.7863576928664826, "grad_norm": 0.8958295457330488, "learning_rate": 2.211468814073735e-06, "loss": 0.6121, "step": 43300 }, { "epoch": 0.7865392997239575, "grad_norm": 0.8659648120381263, "learning_rate": 2.207855507652742e-06, "loss": 0.6047, "step": 43310 }, { "epoch": 0.7867209065814326, "grad_norm": 0.8422168518141947, "learning_rate": 2.2042447891858163e-06, "loss": 0.6035, "step": 43320 }, { "epoch": 0.7869025134389075, "grad_norm": 0.8665364723555464, "learning_rate": 2.2006366598721672e-06, "loss": 0.6098, "step": 43330 }, { "epoch": 0.7870841202963824, "grad_norm": 0.8690836591871302, "learning_rate": 2.197031120910137e-06, "loss": 0.6086, "step": 43340 }, { "epoch": 0.7872657271538573, "grad_norm": 0.8427031668191127, "learning_rate": 2.1934281734972076e-06, "loss": 0.6062, "step": 43350 }, { "epoch": 0.7874473340113323, "grad_norm": 0.8243125034271312, "learning_rate": 2.189827818830006e-06, "loss": 0.6013, "step": 43360 }, { "epoch": 0.7876289408688072, "grad_norm": 0.8513424085515238, "learning_rate": 2.1862300581042917e-06, "loss": 0.6126, "step": 43370 }, { "epoch": 0.7878105477262821, "grad_norm": 0.8467489159077585, "learning_rate": 2.182634892514969e-06, "loss": 0.5994, "step": 43380 }, { "epoch": 0.787992154583757, "grad_norm": 0.8291985866735357, "learning_rate": 2.179042323256071e-06, "loss": 0.6051, "step": 43390 }, { "epoch": 0.788173761441232, "grad_norm": 0.894364279811461, "learning_rate": 2.1754523515207815e-06, "loss": 0.6105, "step": 43400 }, { "epoch": 0.788355368298707, "grad_norm": 0.8639821041647684, "learning_rate": 2.171864978501407e-06, "loss": 0.6011, "step": 43410 }, { "epoch": 0.7885369751561819, "grad_norm": 0.8338333969097916, "learning_rate": 2.1682802053894024e-06, "loss": 0.6013, "step": 43420 }, { "epoch": 0.7887185820136569, "grad_norm": 0.8591464012604034, "learning_rate": 2.1646980333753577e-06, "loss": 0.5997, "step": 43430 }, { "epoch": 0.7889001888711318, "grad_norm": 0.8562231612706687, "learning_rate": 2.161118463648989e-06, "loss": 0.6006, "step": 43440 }, { "epoch": 0.7890817957286067, "grad_norm": 0.8537465684062812, "learning_rate": 2.1575414973991636e-06, "loss": 0.6064, "step": 43450 }, { "epoch": 0.7892634025860816, "grad_norm": 0.8589565039724398, "learning_rate": 2.153967135813869e-06, "loss": 0.6037, "step": 43460 }, { "epoch": 0.7894450094435566, "grad_norm": 0.8579653585808215, "learning_rate": 2.1503953800802402e-06, "loss": 0.6113, "step": 43470 }, { "epoch": 0.7896266163010315, "grad_norm": 0.8486336764861334, "learning_rate": 2.1468262313845355e-06, "loss": 0.6133, "step": 43480 }, { "epoch": 0.7898082231585065, "grad_norm": 0.8829942328910387, "learning_rate": 2.1432596909121583e-06, "loss": 0.6061, "step": 43490 }, { "epoch": 0.7899898300159814, "grad_norm": 0.872495026183541, "learning_rate": 2.139695759847635e-06, "loss": 0.6265, "step": 43500 }, { "epoch": 0.7901714368734564, "grad_norm": 0.8703693048929927, "learning_rate": 2.1361344393746374e-06, "loss": 0.6045, "step": 43510 }, { "epoch": 0.7903530437309313, "grad_norm": 0.814087515492702, "learning_rate": 2.1325757306759564e-06, "loss": 0.6128, "step": 43520 }, { "epoch": 0.7905346505884062, "grad_norm": 0.8259751752658437, "learning_rate": 2.129019634933529e-06, "loss": 0.5968, "step": 43530 }, { "epoch": 0.7907162574458811, "grad_norm": 0.8651281457456644, "learning_rate": 2.125466153328416e-06, "loss": 0.6098, "step": 43540 }, { "epoch": 0.7908978643033561, "grad_norm": 0.8738480920105226, "learning_rate": 2.1219152870408075e-06, "loss": 0.5973, "step": 43550 }, { "epoch": 0.791079471160831, "grad_norm": 0.8480664711504162, "learning_rate": 2.1183670372500366e-06, "loss": 0.6048, "step": 43560 }, { "epoch": 0.7912610780183059, "grad_norm": 0.8488488010308362, "learning_rate": 2.114821405134554e-06, "loss": 0.6068, "step": 43570 }, { "epoch": 0.791442684875781, "grad_norm": 0.8711627994859225, "learning_rate": 2.1112783918719536e-06, "loss": 0.5989, "step": 43580 }, { "epoch": 0.7916242917332559, "grad_norm": 0.8397783554428567, "learning_rate": 2.1077379986389467e-06, "loss": 0.5981, "step": 43590 }, { "epoch": 0.7918058985907308, "grad_norm": 0.852686145366961, "learning_rate": 2.104200226611387e-06, "loss": 0.6164, "step": 43600 }, { "epoch": 0.7919875054482057, "grad_norm": 0.8468496612611238, "learning_rate": 2.1006650769642466e-06, "loss": 0.5889, "step": 43610 }, { "epoch": 0.7921691123056807, "grad_norm": 0.8446029311412032, "learning_rate": 2.0971325508716366e-06, "loss": 0.6038, "step": 43620 }, { "epoch": 0.7923507191631556, "grad_norm": 0.8581035654225628, "learning_rate": 2.0936026495067874e-06, "loss": 0.6008, "step": 43630 }, { "epoch": 0.7925323260206305, "grad_norm": 0.8519081673204999, "learning_rate": 2.0900753740420653e-06, "loss": 0.5993, "step": 43640 }, { "epoch": 0.7927139328781054, "grad_norm": 0.8747173199963969, "learning_rate": 2.0865507256489614e-06, "loss": 0.5976, "step": 43650 }, { "epoch": 0.7928955397355805, "grad_norm": 0.8723066687482305, "learning_rate": 2.0830287054980893e-06, "loss": 0.6081, "step": 43660 }, { "epoch": 0.7930771465930554, "grad_norm": 0.8836503484844601, "learning_rate": 2.0795093147591993e-06, "loss": 0.6101, "step": 43670 }, { "epoch": 0.7932587534505303, "grad_norm": 0.840364755746385, "learning_rate": 2.0759925546011617e-06, "loss": 0.5975, "step": 43680 }, { "epoch": 0.7934403603080052, "grad_norm": 0.8861601437715018, "learning_rate": 2.0724784261919774e-06, "loss": 0.5967, "step": 43690 }, { "epoch": 0.7936219671654802, "grad_norm": 0.8702984446969434, "learning_rate": 2.068966930698766e-06, "loss": 0.5995, "step": 43700 }, { "epoch": 0.7938035740229551, "grad_norm": 0.8322997928644538, "learning_rate": 2.0654580692877835e-06, "loss": 0.6069, "step": 43710 }, { "epoch": 0.79398518088043, "grad_norm": 0.8354783173116767, "learning_rate": 2.0619518431244e-06, "loss": 0.6189, "step": 43720 }, { "epoch": 0.794166787737905, "grad_norm": 0.8611352778898375, "learning_rate": 2.058448253373121e-06, "loss": 0.6097, "step": 43730 }, { "epoch": 0.7943483945953799, "grad_norm": 0.8813144972866586, "learning_rate": 2.0549473011975683e-06, "loss": 0.6024, "step": 43740 }, { "epoch": 0.7945300014528549, "grad_norm": 0.8319444529806935, "learning_rate": 2.051448987760486e-06, "loss": 0.6077, "step": 43750 }, { "epoch": 0.7947116083103298, "grad_norm": 0.89791983057922, "learning_rate": 2.047953314223753e-06, "loss": 0.608, "step": 43760 }, { "epoch": 0.7948932151678048, "grad_norm": 0.8895397473145504, "learning_rate": 2.044460281748358e-06, "loss": 0.6176, "step": 43770 }, { "epoch": 0.7950748220252797, "grad_norm": 0.8306794437123827, "learning_rate": 2.0409698914944264e-06, "loss": 0.5935, "step": 43780 }, { "epoch": 0.7952564288827546, "grad_norm": 0.8657875152462903, "learning_rate": 2.037482144621191e-06, "loss": 0.6032, "step": 43790 }, { "epoch": 0.7954380357402295, "grad_norm": 0.8605850542283526, "learning_rate": 2.0339970422870213e-06, "loss": 0.6029, "step": 43800 }, { "epoch": 0.7956196425977045, "grad_norm": 0.8834885263955158, "learning_rate": 2.0305145856493958e-06, "loss": 0.5995, "step": 43810 }, { "epoch": 0.7958012494551794, "grad_norm": 0.894327974602787, "learning_rate": 2.0270347758649234e-06, "loss": 0.6051, "step": 43820 }, { "epoch": 0.7959828563126544, "grad_norm": 0.8449678116225356, "learning_rate": 2.023557614089332e-06, "loss": 0.6195, "step": 43830 }, { "epoch": 0.7961644631701293, "grad_norm": 0.8598010954698959, "learning_rate": 2.0200831014774635e-06, "loss": 0.5989, "step": 43840 }, { "epoch": 0.7963460700276043, "grad_norm": 0.8893907892036754, "learning_rate": 2.0166112391832917e-06, "loss": 0.6099, "step": 43850 }, { "epoch": 0.7965276768850792, "grad_norm": 0.8487502473331228, "learning_rate": 2.013142028359897e-06, "loss": 0.611, "step": 43860 }, { "epoch": 0.7967092837425541, "grad_norm": 0.8434440741253535, "learning_rate": 2.009675470159491e-06, "loss": 0.6025, "step": 43870 }, { "epoch": 0.796890890600029, "grad_norm": 0.8649034262995654, "learning_rate": 2.0062115657333936e-06, "loss": 0.6032, "step": 43880 }, { "epoch": 0.797072497457504, "grad_norm": 0.8654975265936942, "learning_rate": 2.002750316232056e-06, "loss": 0.6074, "step": 43890 }, { "epoch": 0.7972541043149789, "grad_norm": 0.8598460399279205, "learning_rate": 1.9992917228050336e-06, "loss": 0.6125, "step": 43900 }, { "epoch": 0.7974357111724538, "grad_norm": 0.8845679513724055, "learning_rate": 1.9958357866010114e-06, "loss": 0.5893, "step": 43910 }, { "epoch": 0.7976173180299289, "grad_norm": 0.8337254731818113, "learning_rate": 1.992382508767782e-06, "loss": 0.6138, "step": 43920 }, { "epoch": 0.7977989248874038, "grad_norm": 0.8794986333153472, "learning_rate": 1.9889318904522648e-06, "loss": 0.5968, "step": 43930 }, { "epoch": 0.7979805317448787, "grad_norm": 0.8626232802246582, "learning_rate": 1.9854839328004906e-06, "loss": 0.6015, "step": 43940 }, { "epoch": 0.7981621386023536, "grad_norm": 0.8517655022082026, "learning_rate": 1.982038636957602e-06, "loss": 0.5941, "step": 43950 }, { "epoch": 0.7983437454598286, "grad_norm": 0.879277082346829, "learning_rate": 1.978596004067869e-06, "loss": 0.6021, "step": 43960 }, { "epoch": 0.7985253523173035, "grad_norm": 0.8224250540589024, "learning_rate": 1.9751560352746644e-06, "loss": 0.6027, "step": 43970 }, { "epoch": 0.7987069591747784, "grad_norm": 0.8513715966658408, "learning_rate": 1.97171873172049e-06, "loss": 0.6017, "step": 43980 }, { "epoch": 0.7988885660322533, "grad_norm": 0.8424862721225396, "learning_rate": 1.968284094546948e-06, "loss": 0.5908, "step": 43990 }, { "epoch": 0.7990701728897284, "grad_norm": 0.8542303979551494, "learning_rate": 1.9648521248947683e-06, "loss": 0.5987, "step": 44000 }, { "epoch": 0.7992517797472033, "grad_norm": 0.8544702977032433, "learning_rate": 1.9614228239037815e-06, "loss": 0.6179, "step": 44010 }, { "epoch": 0.7994333866046782, "grad_norm": 0.8573119676357216, "learning_rate": 1.9579961927129467e-06, "loss": 0.6154, "step": 44020 }, { "epoch": 0.7996149934621531, "grad_norm": 0.8660826702583518, "learning_rate": 1.9545722324603213e-06, "loss": 0.6046, "step": 44030 }, { "epoch": 0.7997966003196281, "grad_norm": 0.8354890087597284, "learning_rate": 1.951150944283089e-06, "loss": 0.6201, "step": 44040 }, { "epoch": 0.799978207177103, "grad_norm": 0.8456887486626038, "learning_rate": 1.9477323293175377e-06, "loss": 0.5925, "step": 44050 }, { "epoch": 0.8001598140345779, "grad_norm": 0.8804202935533453, "learning_rate": 1.9443163886990655e-06, "loss": 0.5903, "step": 44060 }, { "epoch": 0.8003414208920528, "grad_norm": 0.8525928604197825, "learning_rate": 1.9409031235621935e-06, "loss": 0.5959, "step": 44070 }, { "epoch": 0.8005230277495278, "grad_norm": 0.8411143133271266, "learning_rate": 1.9374925350405405e-06, "loss": 0.5968, "step": 44080 }, { "epoch": 0.8007046346070028, "grad_norm": 0.8563078203258772, "learning_rate": 1.934084624266849e-06, "loss": 0.6095, "step": 44090 }, { "epoch": 0.8008862414644777, "grad_norm": 0.8491874177405555, "learning_rate": 1.9306793923729607e-06, "loss": 0.5933, "step": 44100 }, { "epoch": 0.8010678483219527, "grad_norm": 0.8144449803260824, "learning_rate": 1.927276840489839e-06, "loss": 0.5889, "step": 44110 }, { "epoch": 0.8012494551794276, "grad_norm": 0.8509852093371383, "learning_rate": 1.9238769697475444e-06, "loss": 0.5931, "step": 44120 }, { "epoch": 0.8014310620369025, "grad_norm": 0.8464549230985641, "learning_rate": 1.9204797812752598e-06, "loss": 0.5962, "step": 44130 }, { "epoch": 0.8016126688943774, "grad_norm": 0.8356455885686942, "learning_rate": 1.917085276201269e-06, "loss": 0.5966, "step": 44140 }, { "epoch": 0.8017942757518524, "grad_norm": 0.8830176519817108, "learning_rate": 1.913693455652965e-06, "loss": 0.5931, "step": 44150 }, { "epoch": 0.8019758826093273, "grad_norm": 0.8582873589225192, "learning_rate": 1.9103043207568537e-06, "loss": 0.6192, "step": 44160 }, { "epoch": 0.8021574894668023, "grad_norm": 0.8554357473293756, "learning_rate": 1.906917872638544e-06, "loss": 0.6077, "step": 44170 }, { "epoch": 0.8023390963242772, "grad_norm": 0.8885776375955949, "learning_rate": 1.9035341124227568e-06, "loss": 0.6028, "step": 44180 }, { "epoch": 0.8025207031817522, "grad_norm": 0.8582057695272651, "learning_rate": 1.9001530412333157e-06, "loss": 0.6028, "step": 44190 }, { "epoch": 0.8027023100392271, "grad_norm": 0.810730676170535, "learning_rate": 1.896774660193159e-06, "loss": 0.608, "step": 44200 }, { "epoch": 0.802883916896702, "grad_norm": 0.8955561007243295, "learning_rate": 1.8933989704243195e-06, "loss": 0.5895, "step": 44210 }, { "epoch": 0.803065523754177, "grad_norm": 0.8496896337557116, "learning_rate": 1.8900259730479465e-06, "loss": 0.6035, "step": 44220 }, { "epoch": 0.8032471306116519, "grad_norm": 0.8626227509156433, "learning_rate": 1.8866556691842941e-06, "loss": 0.6053, "step": 44230 }, { "epoch": 0.8034287374691268, "grad_norm": 0.8366920114023934, "learning_rate": 1.8832880599527147e-06, "loss": 0.6019, "step": 44240 }, { "epoch": 0.8036103443266017, "grad_norm": 0.8675181279579736, "learning_rate": 1.8799231464716738e-06, "loss": 0.5985, "step": 44250 }, { "epoch": 0.8037919511840768, "grad_norm": 0.9076753724813785, "learning_rate": 1.8765609298587351e-06, "loss": 0.6129, "step": 44260 }, { "epoch": 0.8039735580415517, "grad_norm": 0.8474172817536522, "learning_rate": 1.873201411230574e-06, "loss": 0.6012, "step": 44270 }, { "epoch": 0.8041551648990266, "grad_norm": 0.8539941967110605, "learning_rate": 1.8698445917029596e-06, "loss": 0.6068, "step": 44280 }, { "epoch": 0.8043367717565015, "grad_norm": 0.814108686645102, "learning_rate": 1.8664904723907761e-06, "loss": 0.5964, "step": 44290 }, { "epoch": 0.8045183786139765, "grad_norm": 0.8758865835189731, "learning_rate": 1.8631390544080007e-06, "loss": 0.6196, "step": 44300 }, { "epoch": 0.8046999854714514, "grad_norm": 0.8657383675308072, "learning_rate": 1.8597903388677218e-06, "loss": 0.6067, "step": 44310 }, { "epoch": 0.8048815923289263, "grad_norm": 0.8447656755557273, "learning_rate": 1.856444326882123e-06, "loss": 0.5967, "step": 44320 }, { "epoch": 0.8050631991864012, "grad_norm": 0.8710183851205596, "learning_rate": 1.8531010195624977e-06, "loss": 0.5942, "step": 44330 }, { "epoch": 0.8052448060438763, "grad_norm": 0.8680880160209085, "learning_rate": 1.849760418019233e-06, "loss": 0.5934, "step": 44340 }, { "epoch": 0.8054264129013512, "grad_norm": 0.8567858819562676, "learning_rate": 1.8464225233618206e-06, "loss": 0.6026, "step": 44350 }, { "epoch": 0.8056080197588261, "grad_norm": 0.8506301399215901, "learning_rate": 1.8430873366988577e-06, "loss": 0.6052, "step": 44360 }, { "epoch": 0.805789626616301, "grad_norm": 0.8469981149769875, "learning_rate": 1.839754859138032e-06, "loss": 0.6043, "step": 44370 }, { "epoch": 0.805971233473776, "grad_norm": 0.854354106464105, "learning_rate": 1.8364250917861448e-06, "loss": 0.6053, "step": 44380 }, { "epoch": 0.8061528403312509, "grad_norm": 0.8390812750597944, "learning_rate": 1.8330980357490836e-06, "loss": 0.5937, "step": 44390 }, { "epoch": 0.8063344471887258, "grad_norm": 0.8586735277073825, "learning_rate": 1.8297736921318465e-06, "loss": 0.6008, "step": 44400 }, { "epoch": 0.8065160540462007, "grad_norm": 0.8595177512248917, "learning_rate": 1.8264520620385218e-06, "loss": 0.589, "step": 44410 }, { "epoch": 0.8066976609036757, "grad_norm": 0.875372162571343, "learning_rate": 1.8231331465723056e-06, "loss": 0.5978, "step": 44420 }, { "epoch": 0.8068792677611507, "grad_norm": 0.8440448646136574, "learning_rate": 1.8198169468354832e-06, "loss": 0.598, "step": 44430 }, { "epoch": 0.8070608746186256, "grad_norm": 0.8885419146189085, "learning_rate": 1.8165034639294455e-06, "loss": 0.6088, "step": 44440 }, { "epoch": 0.8072424814761006, "grad_norm": 0.8570322002190501, "learning_rate": 1.8131926989546778e-06, "loss": 0.6176, "step": 44450 }, { "epoch": 0.8074240883335755, "grad_norm": 0.8636436985939417, "learning_rate": 1.8098846530107583e-06, "loss": 0.6048, "step": 44460 }, { "epoch": 0.8076056951910504, "grad_norm": 0.8433494284033362, "learning_rate": 1.8065793271963739e-06, "loss": 0.6094, "step": 44470 }, { "epoch": 0.8077873020485253, "grad_norm": 0.8705690951875317, "learning_rate": 1.8032767226092928e-06, "loss": 0.5911, "step": 44480 }, { "epoch": 0.8079689089060003, "grad_norm": 0.8710083055229725, "learning_rate": 1.7999768403463958e-06, "loss": 0.5989, "step": 44490 }, { "epoch": 0.8081505157634752, "grad_norm": 0.8794022299219546, "learning_rate": 1.7966796815036447e-06, "loss": 0.5932, "step": 44500 }, { "epoch": 0.8083321226209502, "grad_norm": 0.8253515860445974, "learning_rate": 1.793385247176107e-06, "loss": 0.6, "step": 44510 }, { "epoch": 0.8085137294784251, "grad_norm": 0.8577752672470568, "learning_rate": 1.7900935384579398e-06, "loss": 0.6066, "step": 44520 }, { "epoch": 0.8086953363359001, "grad_norm": 0.8473123542420893, "learning_rate": 1.7868045564423985e-06, "loss": 0.6196, "step": 44530 }, { "epoch": 0.808876943193375, "grad_norm": 0.8527755253306798, "learning_rate": 1.7835183022218316e-06, "loss": 0.6056, "step": 44540 }, { "epoch": 0.8090585500508499, "grad_norm": 0.8476237408643085, "learning_rate": 1.780234776887677e-06, "loss": 0.6122, "step": 44550 }, { "epoch": 0.8092401569083248, "grad_norm": 0.8263137959456591, "learning_rate": 1.776953981530476e-06, "loss": 0.6092, "step": 44560 }, { "epoch": 0.8094217637657998, "grad_norm": 0.8386708610775336, "learning_rate": 1.773675917239852e-06, "loss": 0.5778, "step": 44570 }, { "epoch": 0.8096033706232747, "grad_norm": 0.8799304129284629, "learning_rate": 1.7704005851045335e-06, "loss": 0.6098, "step": 44580 }, { "epoch": 0.8097849774807496, "grad_norm": 0.8603237259622809, "learning_rate": 1.7671279862123303e-06, "loss": 0.5962, "step": 44590 }, { "epoch": 0.8099665843382247, "grad_norm": 0.8685236609185536, "learning_rate": 1.7638581216501526e-06, "loss": 0.6149, "step": 44600 }, { "epoch": 0.8101481911956996, "grad_norm": 0.8611359454930183, "learning_rate": 1.7605909925039955e-06, "loss": 0.5901, "step": 44610 }, { "epoch": 0.8103297980531745, "grad_norm": 0.8866720704297322, "learning_rate": 1.7573265998589506e-06, "loss": 0.6035, "step": 44620 }, { "epoch": 0.8105114049106494, "grad_norm": 0.8264917276526118, "learning_rate": 1.754064944799203e-06, "loss": 0.6033, "step": 44630 }, { "epoch": 0.8106930117681244, "grad_norm": 0.902324250377664, "learning_rate": 1.7508060284080186e-06, "loss": 0.6049, "step": 44640 }, { "epoch": 0.8108746186255993, "grad_norm": 0.8613911607900664, "learning_rate": 1.7475498517677669e-06, "loss": 0.6005, "step": 44650 }, { "epoch": 0.8110562254830742, "grad_norm": 0.8873821946502407, "learning_rate": 1.7442964159598941e-06, "loss": 0.5965, "step": 44660 }, { "epoch": 0.8112378323405491, "grad_norm": 0.88759268265606, "learning_rate": 1.741045722064948e-06, "loss": 0.6152, "step": 44670 }, { "epoch": 0.8114194391980242, "grad_norm": 0.846020421734156, "learning_rate": 1.737797771162556e-06, "loss": 0.6105, "step": 44680 }, { "epoch": 0.8116010460554991, "grad_norm": 0.8602889426595124, "learning_rate": 1.734552564331442e-06, "loss": 0.6115, "step": 44690 }, { "epoch": 0.811782652912974, "grad_norm": 0.8869768909099425, "learning_rate": 1.7313101026494128e-06, "loss": 0.5916, "step": 44700 }, { "epoch": 0.811964259770449, "grad_norm": 0.8669718439809853, "learning_rate": 1.728070387193369e-06, "loss": 0.5954, "step": 44710 }, { "epoch": 0.8121458666279239, "grad_norm": 0.8775195460705966, "learning_rate": 1.7248334190392924e-06, "loss": 0.6095, "step": 44720 }, { "epoch": 0.8123274734853988, "grad_norm": 0.8788090153871546, "learning_rate": 1.7215991992622606e-06, "loss": 0.5983, "step": 44730 }, { "epoch": 0.8125090803428737, "grad_norm": 0.852205904479723, "learning_rate": 1.7183677289364309e-06, "loss": 0.6111, "step": 44740 }, { "epoch": 0.8126906872003486, "grad_norm": 0.8584148762978108, "learning_rate": 1.7151390091350484e-06, "loss": 0.6061, "step": 44750 }, { "epoch": 0.8128722940578236, "grad_norm": 0.8733303250512255, "learning_rate": 1.7119130409304519e-06, "loss": 0.6072, "step": 44760 }, { "epoch": 0.8130539009152986, "grad_norm": 0.8492919597632284, "learning_rate": 1.7086898253940565e-06, "loss": 0.6063, "step": 44770 }, { "epoch": 0.8132355077727735, "grad_norm": 0.8571957803517314, "learning_rate": 1.7054693635963715e-06, "loss": 0.6046, "step": 44780 }, { "epoch": 0.8134171146302485, "grad_norm": 0.8697351889411505, "learning_rate": 1.7022516566069846e-06, "loss": 0.6109, "step": 44790 }, { "epoch": 0.8135987214877234, "grad_norm": 0.8576930045159717, "learning_rate": 1.6990367054945756e-06, "loss": 0.6067, "step": 44800 }, { "epoch": 0.8137803283451983, "grad_norm": 0.8625173658513435, "learning_rate": 1.6958245113269e-06, "loss": 0.5881, "step": 44810 }, { "epoch": 0.8139619352026732, "grad_norm": 0.8773937137859354, "learning_rate": 1.692615075170808e-06, "loss": 0.603, "step": 44820 }, { "epoch": 0.8141435420601482, "grad_norm": 0.898099525928773, "learning_rate": 1.689408398092225e-06, "loss": 0.6014, "step": 44830 }, { "epoch": 0.8143251489176231, "grad_norm": 0.8848878463828462, "learning_rate": 1.6862044811561663e-06, "loss": 0.6072, "step": 44840 }, { "epoch": 0.814506755775098, "grad_norm": 0.8557722626604024, "learning_rate": 1.6830033254267275e-06, "loss": 0.607, "step": 44850 }, { "epoch": 0.814688362632573, "grad_norm": 0.8561093546500459, "learning_rate": 1.679804931967085e-06, "loss": 0.5945, "step": 44860 }, { "epoch": 0.814869969490048, "grad_norm": 0.8664227007707271, "learning_rate": 1.6766093018395047e-06, "loss": 0.6007, "step": 44870 }, { "epoch": 0.8150515763475229, "grad_norm": 0.8646027134832669, "learning_rate": 1.673416436105324e-06, "loss": 0.6023, "step": 44880 }, { "epoch": 0.8152331832049978, "grad_norm": 0.8710656224626131, "learning_rate": 1.670226335824976e-06, "loss": 0.6002, "step": 44890 }, { "epoch": 0.8154147900624727, "grad_norm": 0.8308473022946652, "learning_rate": 1.667039002057962e-06, "loss": 0.6049, "step": 44900 }, { "epoch": 0.8155963969199477, "grad_norm": 0.8598462920915908, "learning_rate": 1.663854435862875e-06, "loss": 0.6086, "step": 44910 }, { "epoch": 0.8157780037774226, "grad_norm": 0.8670645237947784, "learning_rate": 1.6606726382973781e-06, "loss": 0.6054, "step": 44920 }, { "epoch": 0.8159596106348975, "grad_norm": 0.8902748915707794, "learning_rate": 1.6574936104182281e-06, "loss": 0.6049, "step": 44930 }, { "epoch": 0.8161412174923726, "grad_norm": 0.8701817299133681, "learning_rate": 1.6543173532812496e-06, "loss": 0.6065, "step": 44940 }, { "epoch": 0.8163228243498475, "grad_norm": 0.9444413681225228, "learning_rate": 1.6511438679413549e-06, "loss": 0.6141, "step": 44950 }, { "epoch": 0.8165044312073224, "grad_norm": 0.857297236454464, "learning_rate": 1.6479731554525336e-06, "loss": 0.6079, "step": 44960 }, { "epoch": 0.8166860380647973, "grad_norm": 0.9077150305828088, "learning_rate": 1.6448052168678485e-06, "loss": 0.6109, "step": 44970 }, { "epoch": 0.8168676449222723, "grad_norm": 0.8677932967270477, "learning_rate": 1.6416400532394528e-06, "loss": 0.6076, "step": 44980 }, { "epoch": 0.8170492517797472, "grad_norm": 0.8753326468462612, "learning_rate": 1.6384776656185664e-06, "loss": 0.613, "step": 44990 }, { "epoch": 0.8172308586372221, "grad_norm": 0.8598722116528789, "learning_rate": 1.635318055055496e-06, "loss": 0.59, "step": 45000 }, { "epoch": 0.817412465494697, "grad_norm": 0.9987765940876807, "learning_rate": 1.6321612225996197e-06, "loss": 0.6095, "step": 45010 }, { "epoch": 0.817594072352172, "grad_norm": 0.8502029964155442, "learning_rate": 1.629007169299398e-06, "loss": 0.5998, "step": 45020 }, { "epoch": 0.817775679209647, "grad_norm": 0.8741342629604499, "learning_rate": 1.6258558962023662e-06, "loss": 0.5882, "step": 45030 }, { "epoch": 0.8179572860671219, "grad_norm": 0.8596998818334383, "learning_rate": 1.622707404355134e-06, "loss": 0.6061, "step": 45040 }, { "epoch": 0.8181388929245968, "grad_norm": 0.8530048903636814, "learning_rate": 1.619561694803392e-06, "loss": 0.6085, "step": 45050 }, { "epoch": 0.8183204997820718, "grad_norm": 0.8652241978765414, "learning_rate": 1.616418768591901e-06, "loss": 0.6031, "step": 45060 }, { "epoch": 0.8185021066395467, "grad_norm": 0.8394059035134985, "learning_rate": 1.613278626764504e-06, "loss": 0.6005, "step": 45070 }, { "epoch": 0.8186837134970216, "grad_norm": 0.8890168763609839, "learning_rate": 1.6101412703641128e-06, "loss": 0.5991, "step": 45080 }, { "epoch": 0.8188653203544966, "grad_norm": 0.8819617198615837, "learning_rate": 1.6070067004327195e-06, "loss": 0.5982, "step": 45090 }, { "epoch": 0.8190469272119715, "grad_norm": 0.8238136092853895, "learning_rate": 1.6038749180113855e-06, "loss": 0.6012, "step": 45100 }, { "epoch": 0.8192285340694465, "grad_norm": 0.8549640302080631, "learning_rate": 1.6007459241402523e-06, "loss": 0.6049, "step": 45110 }, { "epoch": 0.8194101409269214, "grad_norm": 0.887322548070846, "learning_rate": 1.5976197198585297e-06, "loss": 0.6082, "step": 45120 }, { "epoch": 0.8195917477843964, "grad_norm": 0.9015091518876012, "learning_rate": 1.594496306204506e-06, "loss": 0.6125, "step": 45130 }, { "epoch": 0.8197733546418713, "grad_norm": 0.8473394835880743, "learning_rate": 1.591375684215537e-06, "loss": 0.5964, "step": 45140 }, { "epoch": 0.8199549614993462, "grad_norm": 0.8431894353344306, "learning_rate": 1.5882578549280592e-06, "loss": 0.5951, "step": 45150 }, { "epoch": 0.8201365683568211, "grad_norm": 0.8610904271317871, "learning_rate": 1.585142819377573e-06, "loss": 0.5993, "step": 45160 }, { "epoch": 0.8203181752142961, "grad_norm": 0.8630986379939579, "learning_rate": 1.5820305785986545e-06, "loss": 0.595, "step": 45170 }, { "epoch": 0.820499782071771, "grad_norm": 0.8657571900348754, "learning_rate": 1.5789211336249555e-06, "loss": 0.6047, "step": 45180 }, { "epoch": 0.8206813889292459, "grad_norm": 0.8315284202843846, "learning_rate": 1.5758144854891898e-06, "loss": 0.594, "step": 45190 }, { "epoch": 0.820862995786721, "grad_norm": 0.8656411665494314, "learning_rate": 1.5727106352231558e-06, "loss": 0.6049, "step": 45200 }, { "epoch": 0.8210446026441959, "grad_norm": 0.8998895917610886, "learning_rate": 1.5696095838577074e-06, "loss": 0.5974, "step": 45210 }, { "epoch": 0.8212262095016708, "grad_norm": 0.8396671582302384, "learning_rate": 1.5665113324227831e-06, "loss": 0.5968, "step": 45220 }, { "epoch": 0.8214078163591457, "grad_norm": 0.9197351302680844, "learning_rate": 1.5634158819473789e-06, "loss": 0.6048, "step": 45230 }, { "epoch": 0.8215894232166207, "grad_norm": 0.8333866727366144, "learning_rate": 1.560323233459573e-06, "loss": 0.6167, "step": 45240 }, { "epoch": 0.8217710300740956, "grad_norm": 0.8737129867730464, "learning_rate": 1.557233387986502e-06, "loss": 0.5987, "step": 45250 }, { "epoch": 0.8219526369315705, "grad_norm": 0.85242361818716, "learning_rate": 1.554146346554376e-06, "loss": 0.6015, "step": 45260 }, { "epoch": 0.8221342437890454, "grad_norm": 0.8404383484919682, "learning_rate": 1.5510621101884772e-06, "loss": 0.6034, "step": 45270 }, { "epoch": 0.8223158506465205, "grad_norm": 0.90468233667325, "learning_rate": 1.547980679913148e-06, "loss": 0.5944, "step": 45280 }, { "epoch": 0.8224974575039954, "grad_norm": 0.9412192037598682, "learning_rate": 1.5449020567518091e-06, "loss": 0.6044, "step": 45290 }, { "epoch": 0.8226790643614703, "grad_norm": 0.8671449949658154, "learning_rate": 1.5418262417269391e-06, "loss": 0.608, "step": 45300 }, { "epoch": 0.8228606712189452, "grad_norm": 0.8420319581937085, "learning_rate": 1.5387532358600922e-06, "loss": 0.6074, "step": 45310 }, { "epoch": 0.8230422780764202, "grad_norm": 0.8734715025151552, "learning_rate": 1.5356830401718815e-06, "loss": 0.5954, "step": 45320 }, { "epoch": 0.8232238849338951, "grad_norm": 0.8958405185941405, "learning_rate": 1.5326156556819948e-06, "loss": 0.6034, "step": 45330 }, { "epoch": 0.82340549179137, "grad_norm": 0.8327391301736407, "learning_rate": 1.5295510834091799e-06, "loss": 0.6017, "step": 45340 }, { "epoch": 0.8235870986488449, "grad_norm": 0.8406318922278758, "learning_rate": 1.5264893243712564e-06, "loss": 0.594, "step": 45350 }, { "epoch": 0.8237687055063199, "grad_norm": 0.8647665592311322, "learning_rate": 1.5234303795851046e-06, "loss": 0.6049, "step": 45360 }, { "epoch": 0.8239503123637949, "grad_norm": 0.8663015702707152, "learning_rate": 1.5203742500666685e-06, "loss": 0.6072, "step": 45370 }, { "epoch": 0.8241319192212698, "grad_norm": 0.865602525122811, "learning_rate": 1.517320936830966e-06, "loss": 0.6074, "step": 45380 }, { "epoch": 0.8243135260787448, "grad_norm": 0.9192883423828383, "learning_rate": 1.5142704408920695e-06, "loss": 0.6147, "step": 45390 }, { "epoch": 0.8244951329362197, "grad_norm": 0.8477742123193611, "learning_rate": 1.5112227632631216e-06, "loss": 0.5906, "step": 45400 }, { "epoch": 0.8246767397936946, "grad_norm": 0.8806779712744935, "learning_rate": 1.5081779049563317e-06, "loss": 0.6152, "step": 45410 }, { "epoch": 0.8248583466511695, "grad_norm": 0.8560746480875416, "learning_rate": 1.505135866982962e-06, "loss": 0.6008, "step": 45420 }, { "epoch": 0.8250399535086445, "grad_norm": 0.8691707663871758, "learning_rate": 1.5020966503533507e-06, "loss": 0.5882, "step": 45430 }, { "epoch": 0.8252215603661194, "grad_norm": 0.8626163973958165, "learning_rate": 1.4990602560768886e-06, "loss": 0.6024, "step": 45440 }, { "epoch": 0.8254031672235944, "grad_norm": 0.8906084742684595, "learning_rate": 1.4960266851620364e-06, "loss": 0.6071, "step": 45450 }, { "epoch": 0.8255847740810693, "grad_norm": 0.8587381549140637, "learning_rate": 1.4929959386163118e-06, "loss": 0.6066, "step": 45460 }, { "epoch": 0.8257663809385443, "grad_norm": 0.8749206277600805, "learning_rate": 1.4899680174462994e-06, "loss": 0.601, "step": 45470 }, { "epoch": 0.8259479877960192, "grad_norm": 0.8415663009036187, "learning_rate": 1.4869429226576393e-06, "loss": 0.6064, "step": 45480 }, { "epoch": 0.8261295946534941, "grad_norm": 0.8831268046875347, "learning_rate": 1.4839206552550422e-06, "loss": 0.6005, "step": 45490 }, { "epoch": 0.826311201510969, "grad_norm": 0.8477309529635816, "learning_rate": 1.4809012162422676e-06, "loss": 0.6096, "step": 45500 }, { "epoch": 0.826492808368444, "grad_norm": 0.8883391874035966, "learning_rate": 1.4778846066221465e-06, "loss": 0.6016, "step": 45510 }, { "epoch": 0.8266744152259189, "grad_norm": 0.922723911815292, "learning_rate": 1.4748708273965628e-06, "loss": 0.6101, "step": 45520 }, { "epoch": 0.8268560220833938, "grad_norm": 0.862299149073197, "learning_rate": 1.4718598795664673e-06, "loss": 0.6166, "step": 45530 }, { "epoch": 0.8270376289408689, "grad_norm": 0.9047732497992385, "learning_rate": 1.4688517641318612e-06, "loss": 0.612, "step": 45540 }, { "epoch": 0.8272192357983438, "grad_norm": 0.8505859964410364, "learning_rate": 1.465846482091815e-06, "loss": 0.6021, "step": 45550 }, { "epoch": 0.8274008426558187, "grad_norm": 0.8403492991244449, "learning_rate": 1.4628440344444516e-06, "loss": 0.5916, "step": 45560 }, { "epoch": 0.8275824495132936, "grad_norm": 0.8595364336359496, "learning_rate": 1.4598444221869524e-06, "loss": 0.6054, "step": 45570 }, { "epoch": 0.8277640563707686, "grad_norm": 0.8551582461174357, "learning_rate": 1.4568476463155623e-06, "loss": 0.6066, "step": 45580 }, { "epoch": 0.8279456632282435, "grad_norm": 0.9085495303882741, "learning_rate": 1.4538537078255777e-06, "loss": 0.6046, "step": 45590 }, { "epoch": 0.8281272700857184, "grad_norm": 0.8963162568656978, "learning_rate": 1.4508626077113596e-06, "loss": 0.6065, "step": 45600 }, { "epoch": 0.8283088769431933, "grad_norm": 0.8869005756497816, "learning_rate": 1.447874346966318e-06, "loss": 0.5884, "step": 45610 }, { "epoch": 0.8284904838006684, "grad_norm": 0.8264320642502506, "learning_rate": 1.4448889265829291e-06, "loss": 0.6067, "step": 45620 }, { "epoch": 0.8286720906581433, "grad_norm": 0.8610516274487011, "learning_rate": 1.4419063475527163e-06, "loss": 0.6061, "step": 45630 }, { "epoch": 0.8288536975156182, "grad_norm": 0.8579859849075646, "learning_rate": 1.4389266108662691e-06, "loss": 0.6054, "step": 45640 }, { "epoch": 0.8290353043730931, "grad_norm": 0.8478004541908323, "learning_rate": 1.4359497175132252e-06, "loss": 0.6067, "step": 45650 }, { "epoch": 0.8292169112305681, "grad_norm": 0.8502159365377118, "learning_rate": 1.4329756684822793e-06, "loss": 0.5974, "step": 45660 }, { "epoch": 0.829398518088043, "grad_norm": 0.8344855363724668, "learning_rate": 1.4300044647611876e-06, "loss": 0.6121, "step": 45670 }, { "epoch": 0.8295801249455179, "grad_norm": 0.8759164658185322, "learning_rate": 1.42703610733675e-06, "loss": 0.5991, "step": 45680 }, { "epoch": 0.8297617318029928, "grad_norm": 0.8833718803304148, "learning_rate": 1.4240705971948343e-06, "loss": 0.6028, "step": 45690 }, { "epoch": 0.8299433386604678, "grad_norm": 0.8480663965480905, "learning_rate": 1.4211079353203516e-06, "loss": 0.5973, "step": 45700 }, { "epoch": 0.8301249455179428, "grad_norm": 0.8616797213493602, "learning_rate": 1.4181481226972738e-06, "loss": 0.5972, "step": 45710 }, { "epoch": 0.8303065523754177, "grad_norm": 0.8659882320593719, "learning_rate": 1.415191160308621e-06, "loss": 0.6091, "step": 45720 }, { "epoch": 0.8304881592328927, "grad_norm": 0.8773225745923884, "learning_rate": 1.4122370491364757e-06, "loss": 0.6044, "step": 45730 }, { "epoch": 0.8306697660903676, "grad_norm": 0.8648955795320634, "learning_rate": 1.4092857901619604e-06, "loss": 0.5973, "step": 45740 }, { "epoch": 0.8308513729478425, "grad_norm": 0.8713938451322709, "learning_rate": 1.406337384365264e-06, "loss": 0.6096, "step": 45750 }, { "epoch": 0.8310329798053174, "grad_norm": 0.8515266929959731, "learning_rate": 1.403391832725617e-06, "loss": 0.5946, "step": 45760 }, { "epoch": 0.8312145866627924, "grad_norm": 0.8538010768927851, "learning_rate": 1.4004491362213057e-06, "loss": 0.6003, "step": 45770 }, { "epoch": 0.8313961935202673, "grad_norm": 0.878989481807282, "learning_rate": 1.3975092958296731e-06, "loss": 0.6254, "step": 45780 }, { "epoch": 0.8315778003777423, "grad_norm": 0.9133195371610543, "learning_rate": 1.3945723125271049e-06, "loss": 0.599, "step": 45790 }, { "epoch": 0.8317594072352172, "grad_norm": 0.899916292776006, "learning_rate": 1.3916381872890427e-06, "loss": 0.6094, "step": 45800 }, { "epoch": 0.8319410140926922, "grad_norm": 0.8501452956195386, "learning_rate": 1.3887069210899817e-06, "loss": 0.603, "step": 45810 }, { "epoch": 0.8321226209501671, "grad_norm": 0.8654595108113693, "learning_rate": 1.385778514903461e-06, "loss": 0.6008, "step": 45820 }, { "epoch": 0.832304227807642, "grad_norm": 0.8982473103428457, "learning_rate": 1.3828529697020744e-06, "loss": 0.603, "step": 45830 }, { "epoch": 0.8324858346651169, "grad_norm": 0.9033437485644491, "learning_rate": 1.3799302864574627e-06, "loss": 0.6018, "step": 45840 }, { "epoch": 0.8326674415225919, "grad_norm": 0.8996989875984244, "learning_rate": 1.3770104661403205e-06, "loss": 0.6131, "step": 45850 }, { "epoch": 0.8328490483800668, "grad_norm": 0.8287369759772134, "learning_rate": 1.3740935097203845e-06, "loss": 0.5973, "step": 45860 }, { "epoch": 0.8330306552375417, "grad_norm": 0.8747159006129591, "learning_rate": 1.3711794181664496e-06, "loss": 0.6131, "step": 45870 }, { "epoch": 0.8332122620950168, "grad_norm": 0.8500055702175131, "learning_rate": 1.3682681924463482e-06, "loss": 0.6006, "step": 45880 }, { "epoch": 0.8333938689524917, "grad_norm": 0.8612680436765049, "learning_rate": 1.3653598335269714e-06, "loss": 0.5922, "step": 45890 }, { "epoch": 0.8335754758099666, "grad_norm": 0.8713173711097613, "learning_rate": 1.3624543423742497e-06, "loss": 0.6051, "step": 45900 }, { "epoch": 0.8337570826674415, "grad_norm": 0.834161983566108, "learning_rate": 1.3595517199531693e-06, "loss": 0.5999, "step": 45910 }, { "epoch": 0.8339386895249165, "grad_norm": 0.8862746568736752, "learning_rate": 1.3566519672277545e-06, "loss": 0.6106, "step": 45920 }, { "epoch": 0.8341202963823914, "grad_norm": 0.876793240315309, "learning_rate": 1.3537550851610858e-06, "loss": 0.5923, "step": 45930 }, { "epoch": 0.8343019032398663, "grad_norm": 0.8607866102076015, "learning_rate": 1.3508610747152817e-06, "loss": 0.599, "step": 45940 }, { "epoch": 0.8344835100973412, "grad_norm": 0.8744761248791839, "learning_rate": 1.3479699368515142e-06, "loss": 0.5998, "step": 45950 }, { "epoch": 0.8346651169548163, "grad_norm": 0.9220261122070144, "learning_rate": 1.3450816725299964e-06, "loss": 0.6196, "step": 45960 }, { "epoch": 0.8348467238122912, "grad_norm": 0.8686547821373722, "learning_rate": 1.3421962827099865e-06, "loss": 0.606, "step": 45970 }, { "epoch": 0.8350283306697661, "grad_norm": 0.8311057831050102, "learning_rate": 1.3393137683497958e-06, "loss": 0.5838, "step": 45980 }, { "epoch": 0.835209937527241, "grad_norm": 0.8396139556774135, "learning_rate": 1.3364341304067685e-06, "loss": 0.6117, "step": 45990 }, { "epoch": 0.835391544384716, "grad_norm": 0.8506715948794079, "learning_rate": 1.3335573698373061e-06, "loss": 0.5882, "step": 46000 }, { "epoch": 0.8355731512421909, "grad_norm": 0.8397123558713954, "learning_rate": 1.3306834875968422e-06, "loss": 0.5971, "step": 46010 }, { "epoch": 0.8357547580996658, "grad_norm": 0.8625958297902735, "learning_rate": 1.3278124846398665e-06, "loss": 0.5984, "step": 46020 }, { "epoch": 0.8359363649571407, "grad_norm": 0.9203688071393961, "learning_rate": 1.324944361919901e-06, "loss": 0.6168, "step": 46030 }, { "epoch": 0.8361179718146157, "grad_norm": 0.8960122898940854, "learning_rate": 1.3220791203895222e-06, "loss": 0.6073, "step": 46040 }, { "epoch": 0.8362995786720907, "grad_norm": 0.8986531584621975, "learning_rate": 1.3192167610003404e-06, "loss": 0.6012, "step": 46050 }, { "epoch": 0.8364811855295656, "grad_norm": 0.9027118750352348, "learning_rate": 1.3163572847030103e-06, "loss": 0.6082, "step": 46060 }, { "epoch": 0.8366627923870406, "grad_norm": 0.8721754185013749, "learning_rate": 1.3135006924472372e-06, "loss": 0.602, "step": 46070 }, { "epoch": 0.8368443992445155, "grad_norm": 0.8703051534809686, "learning_rate": 1.310646985181756e-06, "loss": 0.6011, "step": 46080 }, { "epoch": 0.8370260061019904, "grad_norm": 0.8302993505168526, "learning_rate": 1.3077961638543546e-06, "loss": 0.5959, "step": 46090 }, { "epoch": 0.8372076129594653, "grad_norm": 0.8226147735925456, "learning_rate": 1.3049482294118553e-06, "loss": 0.5991, "step": 46100 }, { "epoch": 0.8373892198169403, "grad_norm": 0.882221372106664, "learning_rate": 1.3021031828001252e-06, "loss": 0.6037, "step": 46110 }, { "epoch": 0.8375708266744152, "grad_norm": 0.8553157353354963, "learning_rate": 1.299261024964069e-06, "loss": 0.6236, "step": 46120 }, { "epoch": 0.8377524335318902, "grad_norm": 0.872930775429202, "learning_rate": 1.2964217568476379e-06, "loss": 0.5985, "step": 46130 }, { "epoch": 0.8379340403893651, "grad_norm": 0.860037229003563, "learning_rate": 1.2935853793938146e-06, "loss": 0.6098, "step": 46140 }, { "epoch": 0.8381156472468401, "grad_norm": 0.8524220504553781, "learning_rate": 1.2907518935446317e-06, "loss": 0.5899, "step": 46150 }, { "epoch": 0.838297254104315, "grad_norm": 0.8571357601423735, "learning_rate": 1.2879213002411538e-06, "loss": 0.5947, "step": 46160 }, { "epoch": 0.8384788609617899, "grad_norm": 0.8606086614901798, "learning_rate": 1.2850936004234848e-06, "loss": 0.6006, "step": 46170 }, { "epoch": 0.8386604678192648, "grad_norm": 0.8609723479275099, "learning_rate": 1.2822687950307744e-06, "loss": 0.5942, "step": 46180 }, { "epoch": 0.8388420746767398, "grad_norm": 0.8401840975955125, "learning_rate": 1.2794468850012044e-06, "loss": 0.6021, "step": 46190 }, { "epoch": 0.8390236815342147, "grad_norm": 0.8865604700273889, "learning_rate": 1.276627871271997e-06, "loss": 0.6005, "step": 46200 }, { "epoch": 0.8392052883916896, "grad_norm": 0.866850735598615, "learning_rate": 1.273811754779416e-06, "loss": 0.6005, "step": 46210 }, { "epoch": 0.8393868952491647, "grad_norm": 0.8396722919336044, "learning_rate": 1.2709985364587551e-06, "loss": 0.594, "step": 46220 }, { "epoch": 0.8395685021066396, "grad_norm": 0.8717875096863295, "learning_rate": 1.268188217244355e-06, "loss": 0.6143, "step": 46230 }, { "epoch": 0.8397501089641145, "grad_norm": 0.854484748765503, "learning_rate": 1.2653807980695855e-06, "loss": 0.6037, "step": 46240 }, { "epoch": 0.8399317158215894, "grad_norm": 0.8831391517747983, "learning_rate": 1.2625762798668574e-06, "loss": 0.5942, "step": 46250 }, { "epoch": 0.8401133226790644, "grad_norm": 0.8653253431195822, "learning_rate": 1.2597746635676155e-06, "loss": 0.5946, "step": 46260 }, { "epoch": 0.8402949295365393, "grad_norm": 0.8553954201623988, "learning_rate": 1.256975950102346e-06, "loss": 0.6062, "step": 46270 }, { "epoch": 0.8404765363940142, "grad_norm": 0.8516237445025358, "learning_rate": 1.2541801404005637e-06, "loss": 0.5905, "step": 46280 }, { "epoch": 0.8406581432514891, "grad_norm": 0.9040817113978237, "learning_rate": 1.2513872353908252e-06, "loss": 0.5988, "step": 46290 }, { "epoch": 0.8408397501089642, "grad_norm": 0.8808247499883554, "learning_rate": 1.2485972360007159e-06, "loss": 0.5933, "step": 46300 }, { "epoch": 0.8410213569664391, "grad_norm": 0.9185368075982157, "learning_rate": 1.245810143156866e-06, "loss": 0.5892, "step": 46310 }, { "epoch": 0.841202963823914, "grad_norm": 0.8330515629403241, "learning_rate": 1.2430259577849279e-06, "loss": 0.5938, "step": 46320 }, { "epoch": 0.8413845706813889, "grad_norm": 0.8666576778860026, "learning_rate": 1.2402446808096014e-06, "loss": 0.6119, "step": 46330 }, { "epoch": 0.8415661775388639, "grad_norm": 0.8721929190389731, "learning_rate": 1.2374663131546071e-06, "loss": 0.5973, "step": 46340 }, { "epoch": 0.8417477843963388, "grad_norm": 0.862700031067227, "learning_rate": 1.2346908557427128e-06, "loss": 0.5961, "step": 46350 }, { "epoch": 0.8419293912538137, "grad_norm": 0.8564810601808245, "learning_rate": 1.23191830949571e-06, "loss": 0.6044, "step": 46360 }, { "epoch": 0.8421109981112886, "grad_norm": 0.8501975723115519, "learning_rate": 1.229148675334424e-06, "loss": 0.5916, "step": 46370 }, { "epoch": 0.8422926049687636, "grad_norm": 0.8266171591068032, "learning_rate": 1.2263819541787193e-06, "loss": 0.5885, "step": 46380 }, { "epoch": 0.8424742118262386, "grad_norm": 0.855469990954473, "learning_rate": 1.2236181469474851e-06, "loss": 0.5996, "step": 46390 }, { "epoch": 0.8426558186837135, "grad_norm": 0.8859207971262949, "learning_rate": 1.2208572545586506e-06, "loss": 0.6052, "step": 46400 }, { "epoch": 0.8428374255411885, "grad_norm": 0.8626016957504766, "learning_rate": 1.21809927792917e-06, "loss": 0.6038, "step": 46410 }, { "epoch": 0.8430190323986634, "grad_norm": 0.8700698324003211, "learning_rate": 1.215344217975034e-06, "loss": 0.6003, "step": 46420 }, { "epoch": 0.8432006392561383, "grad_norm": 0.8966497991430546, "learning_rate": 1.2125920756112609e-06, "loss": 0.6137, "step": 46430 }, { "epoch": 0.8433822461136132, "grad_norm": 0.8155004524537682, "learning_rate": 1.2098428517519045e-06, "loss": 0.5992, "step": 46440 }, { "epoch": 0.8435638529710882, "grad_norm": 0.8461069350403977, "learning_rate": 1.2070965473100449e-06, "loss": 0.5992, "step": 46450 }, { "epoch": 0.8437454598285631, "grad_norm": 0.9065025595765757, "learning_rate": 1.2043531631977912e-06, "loss": 0.608, "step": 46460 }, { "epoch": 0.8439270666860381, "grad_norm": 0.8454989771316492, "learning_rate": 1.2016127003262923e-06, "loss": 0.5935, "step": 46470 }, { "epoch": 0.844108673543513, "grad_norm": 0.8479452220861289, "learning_rate": 1.1988751596057135e-06, "loss": 0.6074, "step": 46480 }, { "epoch": 0.844290280400988, "grad_norm": 0.8465224924787634, "learning_rate": 1.1961405419452609e-06, "loss": 0.5834, "step": 46490 }, { "epoch": 0.8444718872584629, "grad_norm": 0.8908749993556859, "learning_rate": 1.1934088482531625e-06, "loss": 0.598, "step": 46500 }, { "epoch": 0.8446534941159378, "grad_norm": 0.8789666617224761, "learning_rate": 1.1906800794366812e-06, "loss": 0.6156, "step": 46510 }, { "epoch": 0.8448351009734127, "grad_norm": 0.8202013929040674, "learning_rate": 1.1879542364021002e-06, "loss": 0.595, "step": 46520 }, { "epoch": 0.8450167078308877, "grad_norm": 0.8679781260921204, "learning_rate": 1.1852313200547416e-06, "loss": 0.5961, "step": 46530 }, { "epoch": 0.8451983146883626, "grad_norm": 0.8706471837750277, "learning_rate": 1.1825113312989444e-06, "loss": 0.5989, "step": 46540 }, { "epoch": 0.8453799215458375, "grad_norm": 0.8241851415738585, "learning_rate": 1.179794271038086e-06, "loss": 0.5912, "step": 46550 }, { "epoch": 0.8455615284033126, "grad_norm": 0.8956682027644555, "learning_rate": 1.1770801401745624e-06, "loss": 0.601, "step": 46560 }, { "epoch": 0.8457431352607875, "grad_norm": 0.8523192383959743, "learning_rate": 1.1743689396098002e-06, "loss": 0.5861, "step": 46570 }, { "epoch": 0.8459247421182624, "grad_norm": 0.8878993505905625, "learning_rate": 1.1716606702442546e-06, "loss": 0.5961, "step": 46580 }, { "epoch": 0.8461063489757373, "grad_norm": 0.8364726053402051, "learning_rate": 1.1689553329774018e-06, "loss": 0.6047, "step": 46590 }, { "epoch": 0.8462879558332123, "grad_norm": 0.8433685128709747, "learning_rate": 1.1662529287077505e-06, "loss": 0.59, "step": 46600 }, { "epoch": 0.8464695626906872, "grad_norm": 0.8468893128430626, "learning_rate": 1.1635534583328356e-06, "loss": 0.6002, "step": 46610 }, { "epoch": 0.8466511695481621, "grad_norm": 0.912439481287316, "learning_rate": 1.1608569227492085e-06, "loss": 0.6048, "step": 46620 }, { "epoch": 0.846832776405637, "grad_norm": 0.88832875984011, "learning_rate": 1.1581633228524568e-06, "loss": 0.5888, "step": 46630 }, { "epoch": 0.8470143832631121, "grad_norm": 0.8547330892372889, "learning_rate": 1.1554726595371845e-06, "loss": 0.5944, "step": 46640 }, { "epoch": 0.847195990120587, "grad_norm": 0.8896829649775448, "learning_rate": 1.1527849336970275e-06, "loss": 0.5983, "step": 46650 }, { "epoch": 0.8473775969780619, "grad_norm": 0.8507487407559147, "learning_rate": 1.1501001462246398e-06, "loss": 0.5881, "step": 46660 }, { "epoch": 0.8475592038355368, "grad_norm": 0.8563823475389724, "learning_rate": 1.1474182980117044e-06, "loss": 0.6034, "step": 46670 }, { "epoch": 0.8477408106930118, "grad_norm": 0.8537429270080871, "learning_rate": 1.1447393899489245e-06, "loss": 0.5872, "step": 46680 }, { "epoch": 0.8479224175504867, "grad_norm": 0.9294464948594678, "learning_rate": 1.1420634229260297e-06, "loss": 0.6013, "step": 46690 }, { "epoch": 0.8481040244079616, "grad_norm": 0.883689831348684, "learning_rate": 1.1393903978317688e-06, "loss": 0.604, "step": 46700 }, { "epoch": 0.8482856312654365, "grad_norm": 0.8579606841518475, "learning_rate": 1.1367203155539208e-06, "loss": 0.5762, "step": 46710 }, { "epoch": 0.8484672381229115, "grad_norm": 0.8416345275028, "learning_rate": 1.1340531769792773e-06, "loss": 0.5962, "step": 46720 }, { "epoch": 0.8486488449803865, "grad_norm": 0.8586627180605035, "learning_rate": 1.131388982993663e-06, "loss": 0.608, "step": 46730 }, { "epoch": 0.8488304518378614, "grad_norm": 0.872095978579262, "learning_rate": 1.1287277344819136e-06, "loss": 0.5968, "step": 46740 }, { "epoch": 0.8490120586953364, "grad_norm": 0.8680331865472245, "learning_rate": 1.1260694323278987e-06, "loss": 0.6001, "step": 46750 }, { "epoch": 0.8491936655528113, "grad_norm": 0.8949873476638595, "learning_rate": 1.1234140774144975e-06, "loss": 0.595, "step": 46760 }, { "epoch": 0.8493752724102862, "grad_norm": 0.8875154770279269, "learning_rate": 1.1207616706236168e-06, "loss": 0.6025, "step": 46770 }, { "epoch": 0.8495568792677611, "grad_norm": 0.9066470871845401, "learning_rate": 1.1181122128361854e-06, "loss": 0.6075, "step": 46780 }, { "epoch": 0.8497384861252361, "grad_norm": 0.8465894381114777, "learning_rate": 1.115465704932146e-06, "loss": 0.6119, "step": 46790 }, { "epoch": 0.849920092982711, "grad_norm": 0.8725641532636189, "learning_rate": 1.1128221477904723e-06, "loss": 0.6026, "step": 46800 }, { "epoch": 0.850101699840186, "grad_norm": 0.8314577075178833, "learning_rate": 1.1101815422891448e-06, "loss": 0.5991, "step": 46810 }, { "epoch": 0.850283306697661, "grad_norm": 0.8334367032206483, "learning_rate": 1.1075438893051771e-06, "loss": 0.6072, "step": 46820 }, { "epoch": 0.8504649135551359, "grad_norm": 0.8730225363136855, "learning_rate": 1.1049091897145892e-06, "loss": 0.6029, "step": 46830 }, { "epoch": 0.8506465204126108, "grad_norm": 0.8501907146358624, "learning_rate": 1.1022774443924333e-06, "loss": 0.6005, "step": 46840 }, { "epoch": 0.8508281272700857, "grad_norm": 0.871021392497116, "learning_rate": 1.0996486542127682e-06, "loss": 0.5934, "step": 46850 }, { "epoch": 0.8510097341275606, "grad_norm": 0.85229148500286, "learning_rate": 1.097022820048681e-06, "loss": 0.6056, "step": 46860 }, { "epoch": 0.8511913409850356, "grad_norm": 0.9041498091287693, "learning_rate": 1.0943999427722707e-06, "loss": 0.5984, "step": 46870 }, { "epoch": 0.8513729478425105, "grad_norm": 0.8895749837355736, "learning_rate": 1.0917800232546538e-06, "loss": 0.6011, "step": 46880 }, { "epoch": 0.8515545546999854, "grad_norm": 0.8495486404501118, "learning_rate": 1.0891630623659732e-06, "loss": 0.6116, "step": 46890 }, { "epoch": 0.8517361615574605, "grad_norm": 0.8639106171592305, "learning_rate": 1.086549060975377e-06, "loss": 0.5896, "step": 46900 }, { "epoch": 0.8519177684149354, "grad_norm": 0.8532935286385722, "learning_rate": 1.0839380199510419e-06, "loss": 0.5958, "step": 46910 }, { "epoch": 0.8520993752724103, "grad_norm": 0.8895100905940506, "learning_rate": 1.0813299401601507e-06, "loss": 0.5935, "step": 46920 }, { "epoch": 0.8522809821298852, "grad_norm": 0.8854734709593907, "learning_rate": 1.0787248224689128e-06, "loss": 0.6032, "step": 46930 }, { "epoch": 0.8524625889873602, "grad_norm": 0.8280143747593435, "learning_rate": 1.076122667742544e-06, "loss": 0.6018, "step": 46940 }, { "epoch": 0.8526441958448351, "grad_norm": 0.8504195080455325, "learning_rate": 1.0735234768452862e-06, "loss": 0.5988, "step": 46950 }, { "epoch": 0.85282580270231, "grad_norm": 0.8771442517804628, "learning_rate": 1.0709272506403878e-06, "loss": 0.5995, "step": 46960 }, { "epoch": 0.8530074095597849, "grad_norm": 0.8904083240455762, "learning_rate": 1.0683339899901169e-06, "loss": 0.5931, "step": 46970 }, { "epoch": 0.85318901641726, "grad_norm": 0.9019855992948297, "learning_rate": 1.0657436957557588e-06, "loss": 0.6037, "step": 46980 }, { "epoch": 0.8533706232747349, "grad_norm": 0.9258031606011875, "learning_rate": 1.0631563687976066e-06, "loss": 0.5925, "step": 46990 }, { "epoch": 0.8535522301322098, "grad_norm": 0.8708456017672909, "learning_rate": 1.060572009974975e-06, "loss": 0.6093, "step": 47000 }, { "epoch": 0.8537338369896847, "grad_norm": 0.8510681856047049, "learning_rate": 1.057990620146192e-06, "loss": 0.5948, "step": 47010 }, { "epoch": 0.8539154438471597, "grad_norm": 0.8847601006909879, "learning_rate": 1.055412200168594e-06, "loss": 0.6058, "step": 47020 }, { "epoch": 0.8540970507046346, "grad_norm": 0.9084752624160626, "learning_rate": 1.0528367508985383e-06, "loss": 0.5983, "step": 47030 }, { "epoch": 0.8542786575621095, "grad_norm": 0.8727713926769785, "learning_rate": 1.0502642731913882e-06, "loss": 0.602, "step": 47040 }, { "epoch": 0.8544602644195844, "grad_norm": 0.8668653991432731, "learning_rate": 1.0476947679015282e-06, "loss": 0.6029, "step": 47050 }, { "epoch": 0.8546418712770594, "grad_norm": 0.9045098414596999, "learning_rate": 1.0451282358823479e-06, "loss": 0.6123, "step": 47060 }, { "epoch": 0.8548234781345344, "grad_norm": 0.8894895294761795, "learning_rate": 1.0425646779862554e-06, "loss": 0.5974, "step": 47070 }, { "epoch": 0.8550050849920093, "grad_norm": 0.8706828773417613, "learning_rate": 1.0400040950646662e-06, "loss": 0.6028, "step": 47080 }, { "epoch": 0.8551866918494843, "grad_norm": 0.856452954103875, "learning_rate": 1.0374464879680123e-06, "loss": 0.6038, "step": 47090 }, { "epoch": 0.8553682987069592, "grad_norm": 0.8533526856211407, "learning_rate": 1.034891857545731e-06, "loss": 0.6097, "step": 47100 }, { "epoch": 0.8555499055644341, "grad_norm": 0.8965092011470323, "learning_rate": 1.0323402046462804e-06, "loss": 0.6056, "step": 47110 }, { "epoch": 0.855731512421909, "grad_norm": 0.8451168107601166, "learning_rate": 1.0297915301171201e-06, "loss": 0.5976, "step": 47120 }, { "epoch": 0.855913119279384, "grad_norm": 0.9026797126937346, "learning_rate": 1.0272458348047276e-06, "loss": 0.613, "step": 47130 }, { "epoch": 0.8560947261368589, "grad_norm": 0.8923041190019765, "learning_rate": 1.0247031195545854e-06, "loss": 0.6124, "step": 47140 }, { "epoch": 0.8562763329943338, "grad_norm": 0.8425062685267335, "learning_rate": 1.0221633852111911e-06, "loss": 0.6093, "step": 47150 }, { "epoch": 0.8564579398518088, "grad_norm": 0.8580480312906766, "learning_rate": 1.0196266326180505e-06, "loss": 0.5905, "step": 47160 }, { "epoch": 0.8566395467092838, "grad_norm": 0.842593217268758, "learning_rate": 1.0170928626176745e-06, "loss": 0.6025, "step": 47170 }, { "epoch": 0.8568211535667587, "grad_norm": 0.9045643082693633, "learning_rate": 1.0145620760515928e-06, "loss": 0.5885, "step": 47180 }, { "epoch": 0.8570027604242336, "grad_norm": 0.875151645793613, "learning_rate": 1.012034273760335e-06, "loss": 0.596, "step": 47190 }, { "epoch": 0.8571843672817085, "grad_norm": 0.8662765532896533, "learning_rate": 1.0095094565834473e-06, "loss": 0.5951, "step": 47200 }, { "epoch": 0.8573659741391835, "grad_norm": 0.8833410047429912, "learning_rate": 1.0069876253594767e-06, "loss": 0.5926, "step": 47210 }, { "epoch": 0.8575475809966584, "grad_norm": 0.8693078405987114, "learning_rate": 1.0044687809259867e-06, "loss": 0.6072, "step": 47220 }, { "epoch": 0.8577291878541333, "grad_norm": 0.8772447311906961, "learning_rate": 1.0019529241195403e-06, "loss": 0.599, "step": 47230 }, { "epoch": 0.8579107947116084, "grad_norm": 0.8761700984940345, "learning_rate": 9.994400557757166e-07, "loss": 0.6062, "step": 47240 }, { "epoch": 0.8580924015690833, "grad_norm": 0.8544604178703166, "learning_rate": 9.96930176729094e-07, "loss": 0.5944, "step": 47250 }, { "epoch": 0.8582740084265582, "grad_norm": 0.8141917772773399, "learning_rate": 9.944232878132675e-07, "loss": 0.5916, "step": 47260 }, { "epoch": 0.8584556152840331, "grad_norm": 0.8652573918777073, "learning_rate": 9.919193898608304e-07, "loss": 0.5978, "step": 47270 }, { "epoch": 0.8586372221415081, "grad_norm": 0.9479815264059844, "learning_rate": 9.894184837033838e-07, "loss": 0.6013, "step": 47280 }, { "epoch": 0.858818828998983, "grad_norm": 0.8607235422783743, "learning_rate": 9.869205701715423e-07, "loss": 0.5993, "step": 47290 }, { "epoch": 0.8590004358564579, "grad_norm": 0.8522642772325432, "learning_rate": 9.844256500949178e-07, "loss": 0.6018, "step": 47300 }, { "epoch": 0.8591820427139328, "grad_norm": 0.8680458437649509, "learning_rate": 9.819337243021354e-07, "loss": 0.5868, "step": 47310 }, { "epoch": 0.8593636495714078, "grad_norm": 0.8884044913736427, "learning_rate": 9.794447936208174e-07, "loss": 0.5946, "step": 47320 }, { "epoch": 0.8595452564288828, "grad_norm": 0.8910592115710317, "learning_rate": 9.769588588776013e-07, "loss": 0.6046, "step": 47330 }, { "epoch": 0.8597268632863577, "grad_norm": 0.834313475983997, "learning_rate": 9.744759208981192e-07, "loss": 0.5963, "step": 47340 }, { "epoch": 0.8599084701438326, "grad_norm": 0.8491710943355983, "learning_rate": 9.719959805070178e-07, "loss": 0.6154, "step": 47350 }, { "epoch": 0.8600900770013076, "grad_norm": 0.9033223739434785, "learning_rate": 9.695190385279419e-07, "loss": 0.6204, "step": 47360 }, { "epoch": 0.8602716838587825, "grad_norm": 0.9001209773561235, "learning_rate": 9.670450957835398e-07, "loss": 0.5992, "step": 47370 }, { "epoch": 0.8604532907162574, "grad_norm": 0.8498699060671682, "learning_rate": 9.645741530954689e-07, "loss": 0.6069, "step": 47380 }, { "epoch": 0.8606348975737323, "grad_norm": 0.8877517734893431, "learning_rate": 9.621062112843837e-07, "loss": 0.5947, "step": 47390 }, { "epoch": 0.8608165044312073, "grad_norm": 0.9054228550414747, "learning_rate": 9.596412711699487e-07, "loss": 0.5982, "step": 47400 }, { "epoch": 0.8609981112886823, "grad_norm": 0.8748696497644928, "learning_rate": 9.57179333570829e-07, "loss": 0.6082, "step": 47410 }, { "epoch": 0.8611797181461572, "grad_norm": 0.8676773400988043, "learning_rate": 9.547203993046893e-07, "loss": 0.6022, "step": 47420 }, { "epoch": 0.8613613250036322, "grad_norm": 0.8572971835291692, "learning_rate": 9.522644691882022e-07, "loss": 0.5954, "step": 47430 }, { "epoch": 0.8615429318611071, "grad_norm": 0.8177681024657628, "learning_rate": 9.498115440370359e-07, "loss": 0.5857, "step": 47440 }, { "epoch": 0.861724538718582, "grad_norm": 0.8675075294777516, "learning_rate": 9.47361624665869e-07, "loss": 0.5898, "step": 47450 }, { "epoch": 0.8619061455760569, "grad_norm": 0.8592115442706907, "learning_rate": 9.449147118883728e-07, "loss": 0.5921, "step": 47460 }, { "epoch": 0.8620877524335319, "grad_norm": 0.9183370539334584, "learning_rate": 9.424708065172283e-07, "loss": 0.6199, "step": 47470 }, { "epoch": 0.8622693592910068, "grad_norm": 0.8901198399424608, "learning_rate": 9.400299093641097e-07, "loss": 0.5995, "step": 47480 }, { "epoch": 0.8624509661484817, "grad_norm": 0.8887469897144583, "learning_rate": 9.375920212397016e-07, "loss": 0.6058, "step": 47490 }, { "epoch": 0.8626325730059567, "grad_norm": 0.8265596544063056, "learning_rate": 9.351571429536777e-07, "loss": 0.6055, "step": 47500 }, { "epoch": 0.8628141798634317, "grad_norm": 0.8822491412927213, "learning_rate": 9.327252753147243e-07, "loss": 0.5954, "step": 47510 }, { "epoch": 0.8629957867209066, "grad_norm": 0.861959793854123, "learning_rate": 9.302964191305152e-07, "loss": 0.6017, "step": 47520 }, { "epoch": 0.8631773935783815, "grad_norm": 0.8240106050821187, "learning_rate": 9.278705752077377e-07, "loss": 0.5884, "step": 47530 }, { "epoch": 0.8633590004358564, "grad_norm": 0.9127880205718779, "learning_rate": 9.254477443520648e-07, "loss": 0.6004, "step": 47540 }, { "epoch": 0.8635406072933314, "grad_norm": 0.8884924853538929, "learning_rate": 9.230279273681819e-07, "loss": 0.5958, "step": 47550 }, { "epoch": 0.8637222141508063, "grad_norm": 0.8369747364665775, "learning_rate": 9.206111250597627e-07, "loss": 0.5896, "step": 47560 }, { "epoch": 0.8639038210082812, "grad_norm": 0.8637199835568574, "learning_rate": 9.18197338229484e-07, "loss": 0.6038, "step": 47570 }, { "epoch": 0.8640854278657563, "grad_norm": 0.9090113068856072, "learning_rate": 9.157865676790223e-07, "loss": 0.5818, "step": 47580 }, { "epoch": 0.8642670347232312, "grad_norm": 0.8669878878946503, "learning_rate": 9.133788142090505e-07, "loss": 0.6057, "step": 47590 }, { "epoch": 0.8644486415807061, "grad_norm": 0.8493768668857901, "learning_rate": 9.109740786192422e-07, "loss": 0.5977, "step": 47600 }, { "epoch": 0.864630248438181, "grad_norm": 0.887000721783672, "learning_rate": 9.08572361708262e-07, "loss": 0.5875, "step": 47610 }, { "epoch": 0.864811855295656, "grad_norm": 0.8282756660374593, "learning_rate": 9.061736642737818e-07, "loss": 0.5888, "step": 47620 }, { "epoch": 0.8649934621531309, "grad_norm": 0.8663006132339538, "learning_rate": 9.037779871124596e-07, "loss": 0.6018, "step": 47630 }, { "epoch": 0.8651750690106058, "grad_norm": 0.8577612263415126, "learning_rate": 9.013853310199616e-07, "loss": 0.5962, "step": 47640 }, { "epoch": 0.8653566758680807, "grad_norm": 0.8756312112154284, "learning_rate": 8.989956967909408e-07, "loss": 0.602, "step": 47650 }, { "epoch": 0.8655382827255557, "grad_norm": 0.8914082935110343, "learning_rate": 8.966090852190545e-07, "loss": 0.5992, "step": 47660 }, { "epoch": 0.8657198895830307, "grad_norm": 0.8564101564990682, "learning_rate": 8.942254970969511e-07, "loss": 0.5844, "step": 47670 }, { "epoch": 0.8659014964405056, "grad_norm": 0.8513608890194276, "learning_rate": 8.918449332162726e-07, "loss": 0.6043, "step": 47680 }, { "epoch": 0.8660831032979805, "grad_norm": 0.9025163033957292, "learning_rate": 8.894673943676646e-07, "loss": 0.6125, "step": 47690 }, { "epoch": 0.8662647101554555, "grad_norm": 0.8613824133777466, "learning_rate": 8.870928813407609e-07, "loss": 0.5881, "step": 47700 }, { "epoch": 0.8664463170129304, "grad_norm": 0.8423419645108787, "learning_rate": 8.847213949241961e-07, "loss": 0.6069, "step": 47710 }, { "epoch": 0.8666279238704053, "grad_norm": 0.8596647429346456, "learning_rate": 8.823529359055926e-07, "loss": 0.6086, "step": 47720 }, { "epoch": 0.8668095307278803, "grad_norm": 0.8711557382869819, "learning_rate": 8.799875050715745e-07, "loss": 0.5786, "step": 47730 }, { "epoch": 0.8669911375853552, "grad_norm": 0.8470340916911467, "learning_rate": 8.776251032077543e-07, "loss": 0.6056, "step": 47740 }, { "epoch": 0.8671727444428302, "grad_norm": 0.8574737818959206, "learning_rate": 8.752657310987445e-07, "loss": 0.6062, "step": 47750 }, { "epoch": 0.8673543513003051, "grad_norm": 0.8647797191406891, "learning_rate": 8.729093895281448e-07, "loss": 0.6025, "step": 47760 }, { "epoch": 0.8675359581577801, "grad_norm": 0.8509283458243346, "learning_rate": 8.705560792785517e-07, "loss": 0.5809, "step": 47770 }, { "epoch": 0.867717565015255, "grad_norm": 0.8516925368617417, "learning_rate": 8.682058011315564e-07, "loss": 0.5896, "step": 47780 }, { "epoch": 0.8678991718727299, "grad_norm": 0.8869283831000107, "learning_rate": 8.65858555867739e-07, "loss": 0.6109, "step": 47790 }, { "epoch": 0.8680807787302048, "grad_norm": 0.822585799749719, "learning_rate": 8.635143442666771e-07, "loss": 0.6033, "step": 47800 }, { "epoch": 0.8682623855876798, "grad_norm": 0.8400503642756214, "learning_rate": 8.611731671069389e-07, "loss": 0.6039, "step": 47810 }, { "epoch": 0.8684439924451547, "grad_norm": 0.8455650159849828, "learning_rate": 8.588350251660815e-07, "loss": 0.6088, "step": 47820 }, { "epoch": 0.8686255993026296, "grad_norm": 0.8555387774389228, "learning_rate": 8.564999192206602e-07, "loss": 0.5994, "step": 47830 }, { "epoch": 0.8688072061601047, "grad_norm": 0.8449001069931517, "learning_rate": 8.541678500462147e-07, "loss": 0.5888, "step": 47840 }, { "epoch": 0.8689888130175796, "grad_norm": 0.8949095702525697, "learning_rate": 8.518388184172832e-07, "loss": 0.5934, "step": 47850 }, { "epoch": 0.8691704198750545, "grad_norm": 0.8717891179229331, "learning_rate": 8.495128251073881e-07, "loss": 0.6034, "step": 47860 }, { "epoch": 0.8693520267325294, "grad_norm": 0.8482090943518746, "learning_rate": 8.471898708890514e-07, "loss": 0.5846, "step": 47870 }, { "epoch": 0.8695336335900044, "grad_norm": 0.8562189829926222, "learning_rate": 8.448699565337759e-07, "loss": 0.5897, "step": 47880 }, { "epoch": 0.8697152404474793, "grad_norm": 0.8588755556983856, "learning_rate": 8.425530828120631e-07, "loss": 0.5891, "step": 47890 }, { "epoch": 0.8698968473049542, "grad_norm": 0.8924801880502731, "learning_rate": 8.402392504933987e-07, "loss": 0.6026, "step": 47900 }, { "epoch": 0.8700784541624291, "grad_norm": 0.9061477137452213, "learning_rate": 8.379284603462623e-07, "loss": 0.6124, "step": 47910 }, { "epoch": 0.8702600610199042, "grad_norm": 0.8653655409060025, "learning_rate": 8.3562071313812e-07, "loss": 0.5903, "step": 47920 }, { "epoch": 0.8704416678773791, "grad_norm": 0.852428473195559, "learning_rate": 8.33316009635431e-07, "loss": 0.6004, "step": 47930 }, { "epoch": 0.870623274734854, "grad_norm": 0.8334655620907059, "learning_rate": 8.310143506036384e-07, "loss": 0.5986, "step": 47940 }, { "epoch": 0.8708048815923289, "grad_norm": 0.8595251870921189, "learning_rate": 8.287157368071819e-07, "loss": 0.596, "step": 47950 }, { "epoch": 0.8709864884498039, "grad_norm": 0.9036649549913218, "learning_rate": 8.264201690094809e-07, "loss": 0.5974, "step": 47960 }, { "epoch": 0.8711680953072788, "grad_norm": 0.8268912496362656, "learning_rate": 8.241276479729465e-07, "loss": 0.5881, "step": 47970 }, { "epoch": 0.8713497021647537, "grad_norm": 0.9012357216997822, "learning_rate": 8.218381744589843e-07, "loss": 0.5978, "step": 47980 }, { "epoch": 0.8715313090222286, "grad_norm": 0.8506723412684789, "learning_rate": 8.195517492279759e-07, "loss": 0.5993, "step": 47990 }, { "epoch": 0.8717129158797036, "grad_norm": 0.8858569723503164, "learning_rate": 8.172683730393005e-07, "loss": 0.6054, "step": 48000 }, { "epoch": 0.8718945227371786, "grad_norm": 0.8741596293055189, "learning_rate": 8.149880466513194e-07, "loss": 0.6075, "step": 48010 }, { "epoch": 0.8720761295946535, "grad_norm": 0.9100186931875747, "learning_rate": 8.127107708213844e-07, "loss": 0.5943, "step": 48020 }, { "epoch": 0.8722577364521285, "grad_norm": 0.8242086520818007, "learning_rate": 8.104365463058294e-07, "loss": 0.6, "step": 48030 }, { "epoch": 0.8724393433096034, "grad_norm": 0.8700024801181239, "learning_rate": 8.081653738599816e-07, "loss": 0.581, "step": 48040 }, { "epoch": 0.8726209501670783, "grad_norm": 0.8351526739568065, "learning_rate": 8.05897254238146e-07, "loss": 0.5981, "step": 48050 }, { "epoch": 0.8728025570245532, "grad_norm": 0.8218753945505751, "learning_rate": 8.036321881936226e-07, "loss": 0.5797, "step": 48060 }, { "epoch": 0.8729841638820282, "grad_norm": 0.9126939525678889, "learning_rate": 8.013701764786919e-07, "loss": 0.6069, "step": 48070 }, { "epoch": 0.8731657707395031, "grad_norm": 0.8578410675673415, "learning_rate": 7.991112198446183e-07, "loss": 0.6089, "step": 48080 }, { "epoch": 0.8733473775969781, "grad_norm": 0.849467471457829, "learning_rate": 7.968553190416573e-07, "loss": 0.5792, "step": 48090 }, { "epoch": 0.873528984454453, "grad_norm": 0.8532493311083265, "learning_rate": 7.94602474819044e-07, "loss": 0.585, "step": 48100 }, { "epoch": 0.873710591311928, "grad_norm": 0.8596006965262986, "learning_rate": 7.923526879250043e-07, "loss": 0.5832, "step": 48110 }, { "epoch": 0.8738921981694029, "grad_norm": 0.903176191277346, "learning_rate": 7.901059591067417e-07, "loss": 0.6168, "step": 48120 }, { "epoch": 0.8740738050268778, "grad_norm": 1.004390957152776, "learning_rate": 7.878622891104515e-07, "loss": 0.6087, "step": 48130 }, { "epoch": 0.8742554118843527, "grad_norm": 0.8359722455311622, "learning_rate": 7.856216786813042e-07, "loss": 0.6023, "step": 48140 }, { "epoch": 0.8744370187418277, "grad_norm": 0.8644380779315071, "learning_rate": 7.833841285634646e-07, "loss": 0.5971, "step": 48150 }, { "epoch": 0.8746186255993026, "grad_norm": 0.912909595171605, "learning_rate": 7.811496395000706e-07, "loss": 0.5975, "step": 48160 }, { "epoch": 0.8748002324567775, "grad_norm": 0.8674579195415699, "learning_rate": 7.789182122332517e-07, "loss": 0.5868, "step": 48170 }, { "epoch": 0.8749818393142526, "grad_norm": 0.897738540883802, "learning_rate": 7.766898475041174e-07, "loss": 0.6183, "step": 48180 }, { "epoch": 0.8751634461717275, "grad_norm": 0.8560821903521444, "learning_rate": 7.744645460527555e-07, "loss": 0.5946, "step": 48190 }, { "epoch": 0.8753450530292024, "grad_norm": 0.8660593894619131, "learning_rate": 7.722423086182452e-07, "loss": 0.6083, "step": 48200 }, { "epoch": 0.8755266598866773, "grad_norm": 0.8796932478298565, "learning_rate": 7.700231359386445e-07, "loss": 0.5916, "step": 48210 }, { "epoch": 0.8757082667441523, "grad_norm": 0.8478947553620612, "learning_rate": 7.678070287509887e-07, "loss": 0.5891, "step": 48220 }, { "epoch": 0.8758898736016272, "grad_norm": 0.9439295084371172, "learning_rate": 7.655939877913021e-07, "loss": 0.604, "step": 48230 }, { "epoch": 0.8760714804591021, "grad_norm": 0.862387889077448, "learning_rate": 7.633840137945858e-07, "loss": 0.6001, "step": 48240 }, { "epoch": 0.876253087316577, "grad_norm": 0.8839769903894014, "learning_rate": 7.611771074948259e-07, "loss": 0.5978, "step": 48250 }, { "epoch": 0.8764346941740521, "grad_norm": 0.8771768559960088, "learning_rate": 7.589732696249863e-07, "loss": 0.6025, "step": 48260 }, { "epoch": 0.876616301031527, "grad_norm": 0.8650449848068351, "learning_rate": 7.567725009170135e-07, "loss": 0.6092, "step": 48270 }, { "epoch": 0.8767979078890019, "grad_norm": 0.8845434886525716, "learning_rate": 7.54574802101834e-07, "loss": 0.621, "step": 48280 }, { "epoch": 0.8769795147464768, "grad_norm": 0.8853507752800345, "learning_rate": 7.523801739093573e-07, "loss": 0.602, "step": 48290 }, { "epoch": 0.8771611216039518, "grad_norm": 0.8872330367125111, "learning_rate": 7.50188617068468e-07, "loss": 0.601, "step": 48300 }, { "epoch": 0.8773427284614267, "grad_norm": 0.8601453635809506, "learning_rate": 7.480001323070362e-07, "loss": 0.5999, "step": 48310 }, { "epoch": 0.8775243353189016, "grad_norm": 0.8995870061637418, "learning_rate": 7.458147203519073e-07, "loss": 0.5974, "step": 48320 }, { "epoch": 0.8777059421763765, "grad_norm": 0.8737140379349363, "learning_rate": 7.436323819289093e-07, "loss": 0.6017, "step": 48330 }, { "epoch": 0.8778875490338515, "grad_norm": 0.8685883457937955, "learning_rate": 7.414531177628447e-07, "loss": 0.5852, "step": 48340 }, { "epoch": 0.8780691558913265, "grad_norm": 0.8799565714452185, "learning_rate": 7.392769285775037e-07, "loss": 0.6057, "step": 48350 }, { "epoch": 0.8782507627488014, "grad_norm": 0.9024064774560664, "learning_rate": 7.371038150956444e-07, "loss": 0.595, "step": 48360 }, { "epoch": 0.8784323696062764, "grad_norm": 0.8937913276706054, "learning_rate": 7.34933778039012e-07, "loss": 0.596, "step": 48370 }, { "epoch": 0.8786139764637513, "grad_norm": 0.9100133954509, "learning_rate": 7.327668181283276e-07, "loss": 0.6088, "step": 48380 }, { "epoch": 0.8787955833212262, "grad_norm": 0.8845225134431819, "learning_rate": 7.306029360832856e-07, "loss": 0.5995, "step": 48390 }, { "epoch": 0.8789771901787011, "grad_norm": 0.8985790907755153, "learning_rate": 7.284421326225654e-07, "loss": 0.5892, "step": 48400 }, { "epoch": 0.879158797036176, "grad_norm": 0.8749501912706572, "learning_rate": 7.262844084638177e-07, "loss": 0.5991, "step": 48410 }, { "epoch": 0.879340403893651, "grad_norm": 0.8384517259777029, "learning_rate": 7.241297643236767e-07, "loss": 0.5863, "step": 48420 }, { "epoch": 0.879522010751126, "grad_norm": 0.8856922521719502, "learning_rate": 7.219782009177479e-07, "loss": 0.6013, "step": 48430 }, { "epoch": 0.8797036176086009, "grad_norm": 0.8627465680817636, "learning_rate": 7.198297189606174e-07, "loss": 0.5855, "step": 48440 }, { "epoch": 0.8798852244660759, "grad_norm": 0.855319349991772, "learning_rate": 7.176843191658445e-07, "loss": 0.6068, "step": 48450 }, { "epoch": 0.8800668313235508, "grad_norm": 0.8879905670307994, "learning_rate": 7.155420022459691e-07, "loss": 0.6138, "step": 48460 }, { "epoch": 0.8802484381810257, "grad_norm": 0.8754307422471985, "learning_rate": 7.134027689125045e-07, "loss": 0.6106, "step": 48470 }, { "epoch": 0.8804300450385006, "grad_norm": 0.8816597491586761, "learning_rate": 7.112666198759377e-07, "loss": 0.6069, "step": 48480 }, { "epoch": 0.8806116518959756, "grad_norm": 0.8384475892629966, "learning_rate": 7.091335558457379e-07, "loss": 0.5953, "step": 48490 }, { "epoch": 0.8807932587534505, "grad_norm": 0.8487818192773088, "learning_rate": 7.070035775303419e-07, "loss": 0.5905, "step": 48500 }, { "epoch": 0.8809748656109254, "grad_norm": 0.8232756369188416, "learning_rate": 7.048766856371691e-07, "loss": 0.6005, "step": 48510 }, { "epoch": 0.8811564724684005, "grad_norm": 0.8377594770264032, "learning_rate": 7.027528808726069e-07, "loss": 0.5926, "step": 48520 }, { "epoch": 0.8813380793258754, "grad_norm": 0.8380123676731939, "learning_rate": 7.006321639420244e-07, "loss": 0.597, "step": 48530 }, { "epoch": 0.8815196861833503, "grad_norm": 0.851822944007648, "learning_rate": 6.98514535549757e-07, "loss": 0.6017, "step": 48540 }, { "epoch": 0.8817012930408252, "grad_norm": 0.8441266724116263, "learning_rate": 6.96399996399123e-07, "loss": 0.5878, "step": 48550 }, { "epoch": 0.8818828998983002, "grad_norm": 0.8578482562600722, "learning_rate": 6.942885471924077e-07, "loss": 0.5998, "step": 48560 }, { "epoch": 0.8820645067557751, "grad_norm": 0.8266706168800058, "learning_rate": 6.921801886308743e-07, "loss": 0.6003, "step": 48570 }, { "epoch": 0.88224611361325, "grad_norm": 0.89019231036372, "learning_rate": 6.900749214147584e-07, "loss": 0.5959, "step": 48580 }, { "epoch": 0.8824277204707249, "grad_norm": 0.8696163620052026, "learning_rate": 6.879727462432661e-07, "loss": 0.5887, "step": 48590 }, { "epoch": 0.8826093273282, "grad_norm": 0.8481296350975449, "learning_rate": 6.858736638145814e-07, "loss": 0.5823, "step": 48600 }, { "epoch": 0.8827909341856749, "grad_norm": 0.8544429414840097, "learning_rate": 6.837776748258596e-07, "loss": 0.5897, "step": 48610 }, { "epoch": 0.8829725410431498, "grad_norm": 0.8663965555054796, "learning_rate": 6.816847799732251e-07, "loss": 0.5941, "step": 48620 }, { "epoch": 0.8831541479006247, "grad_norm": 0.8897089791260445, "learning_rate": 6.795949799517809e-07, "loss": 0.6067, "step": 48630 }, { "epoch": 0.8833357547580997, "grad_norm": 0.8393336208824981, "learning_rate": 6.77508275455595e-07, "loss": 0.5994, "step": 48640 }, { "epoch": 0.8835173616155746, "grad_norm": 0.8874010542242553, "learning_rate": 6.754246671777142e-07, "loss": 0.5965, "step": 48650 }, { "epoch": 0.8836989684730495, "grad_norm": 0.8386489158006175, "learning_rate": 6.733441558101505e-07, "loss": 0.5902, "step": 48660 }, { "epoch": 0.8838805753305244, "grad_norm": 0.8529287649477119, "learning_rate": 6.712667420438934e-07, "loss": 0.5972, "step": 48670 }, { "epoch": 0.8840621821879994, "grad_norm": 0.8662550381232271, "learning_rate": 6.691924265688987e-07, "loss": 0.6089, "step": 48680 }, { "epoch": 0.8842437890454744, "grad_norm": 0.8559891725051779, "learning_rate": 6.671212100740976e-07, "loss": 0.6106, "step": 48690 }, { "epoch": 0.8844253959029493, "grad_norm": 0.8781649553577249, "learning_rate": 6.650530932473864e-07, "loss": 0.5905, "step": 48700 }, { "epoch": 0.8846070027604243, "grad_norm": 0.8859387885001382, "learning_rate": 6.629880767756392e-07, "loss": 0.5938, "step": 48710 }, { "epoch": 0.8847886096178992, "grad_norm": 0.8471943050634817, "learning_rate": 6.609261613446916e-07, "loss": 0.6014, "step": 48720 }, { "epoch": 0.8849702164753741, "grad_norm": 0.8916794525126146, "learning_rate": 6.588673476393592e-07, "loss": 0.5945, "step": 48730 }, { "epoch": 0.885151823332849, "grad_norm": 0.8873389606773315, "learning_rate": 6.568116363434185e-07, "loss": 0.6115, "step": 48740 }, { "epoch": 0.885333430190324, "grad_norm": 0.8905819327374204, "learning_rate": 6.547590281396232e-07, "loss": 0.6012, "step": 48750 }, { "epoch": 0.8855150370477989, "grad_norm": 0.8523854550576601, "learning_rate": 6.52709523709687e-07, "loss": 0.6043, "step": 48760 }, { "epoch": 0.8856966439052739, "grad_norm": 0.8185012319103859, "learning_rate": 6.506631237343053e-07, "loss": 0.5891, "step": 48770 }, { "epoch": 0.8858782507627488, "grad_norm": 0.8405961713890624, "learning_rate": 6.486198288931311e-07, "loss": 0.5971, "step": 48780 }, { "epoch": 0.8860598576202238, "grad_norm": 0.845068546371756, "learning_rate": 6.465796398647894e-07, "loss": 0.5976, "step": 48790 }, { "epoch": 0.8862414644776987, "grad_norm": 0.8831519107610668, "learning_rate": 6.445425573268804e-07, "loss": 0.6044, "step": 48800 }, { "epoch": 0.8864230713351736, "grad_norm": 0.8456436237776169, "learning_rate": 6.425085819559606e-07, "loss": 0.5958, "step": 48810 }, { "epoch": 0.8866046781926485, "grad_norm": 0.8498945090278135, "learning_rate": 6.404777144275665e-07, "loss": 0.6018, "step": 48820 }, { "epoch": 0.8867862850501235, "grad_norm": 0.8687773167992325, "learning_rate": 6.384499554161916e-07, "loss": 0.5983, "step": 48830 }, { "epoch": 0.8869678919075984, "grad_norm": 0.8656053701366279, "learning_rate": 6.364253055953073e-07, "loss": 0.5917, "step": 48840 }, { "epoch": 0.8871494987650733, "grad_norm": 0.8931359908597116, "learning_rate": 6.344037656373436e-07, "loss": 0.5941, "step": 48850 }, { "epoch": 0.8873311056225484, "grad_norm": 0.8681212242406577, "learning_rate": 6.323853362137044e-07, "loss": 0.5958, "step": 48860 }, { "epoch": 0.8875127124800233, "grad_norm": 0.8560130504432547, "learning_rate": 6.303700179947558e-07, "loss": 0.6138, "step": 48870 }, { "epoch": 0.8876943193374982, "grad_norm": 0.8867029302210888, "learning_rate": 6.283578116498312e-07, "loss": 0.6053, "step": 48880 }, { "epoch": 0.8878759261949731, "grad_norm": 0.873441838066996, "learning_rate": 6.263487178472339e-07, "loss": 0.5854, "step": 48890 }, { "epoch": 0.8880575330524481, "grad_norm": 0.8851255812063625, "learning_rate": 6.243427372542288e-07, "loss": 0.6053, "step": 48900 }, { "epoch": 0.888239139909923, "grad_norm": 0.8547792019508186, "learning_rate": 6.22339870537052e-07, "loss": 0.5945, "step": 48910 }, { "epoch": 0.8884207467673979, "grad_norm": 0.8490349069424856, "learning_rate": 6.203401183608981e-07, "loss": 0.5899, "step": 48920 }, { "epoch": 0.8886023536248728, "grad_norm": 0.8944651072269885, "learning_rate": 6.18343481389937e-07, "loss": 0.599, "step": 48930 }, { "epoch": 0.8887839604823479, "grad_norm": 0.8362477040270041, "learning_rate": 6.163499602872946e-07, "loss": 0.6105, "step": 48940 }, { "epoch": 0.8889655673398228, "grad_norm": 0.8768134460641149, "learning_rate": 6.143595557150695e-07, "loss": 0.6018, "step": 48950 }, { "epoch": 0.8891471741972977, "grad_norm": 0.8928540610147663, "learning_rate": 6.12372268334317e-07, "loss": 0.6047, "step": 48960 }, { "epoch": 0.8893287810547726, "grad_norm": 0.9016710836772189, "learning_rate": 6.103880988050681e-07, "loss": 0.6065, "step": 48970 }, { "epoch": 0.8895103879122476, "grad_norm": 0.8524500083597308, "learning_rate": 6.084070477863068e-07, "loss": 0.591, "step": 48980 }, { "epoch": 0.8896919947697225, "grad_norm": 0.8747195444859946, "learning_rate": 6.064291159359881e-07, "loss": 0.5904, "step": 48990 }, { "epoch": 0.8898736016271974, "grad_norm": 0.8501278697815686, "learning_rate": 6.044543039110307e-07, "loss": 0.6073, "step": 49000 }, { "epoch": 0.8900552084846723, "grad_norm": 0.8728463149984044, "learning_rate": 6.024826123673167e-07, "loss": 0.6029, "step": 49010 }, { "epoch": 0.8902368153421473, "grad_norm": 0.8419381089961487, "learning_rate": 6.005140419596878e-07, "loss": 0.5974, "step": 49020 }, { "epoch": 0.8904184221996223, "grad_norm": 0.8360002713297747, "learning_rate": 5.985485933419555e-07, "loss": 0.5936, "step": 49030 }, { "epoch": 0.8906000290570972, "grad_norm": 0.8345756842570928, "learning_rate": 5.965862671668898e-07, "loss": 0.5876, "step": 49040 }, { "epoch": 0.8907816359145722, "grad_norm": 0.8725678744425809, "learning_rate": 5.946270640862272e-07, "loss": 0.5809, "step": 49050 }, { "epoch": 0.8909632427720471, "grad_norm": 0.8345143280310326, "learning_rate": 5.926709847506617e-07, "loss": 0.6001, "step": 49060 }, { "epoch": 0.891144849629522, "grad_norm": 0.8916317169258589, "learning_rate": 5.90718029809857e-07, "loss": 0.6034, "step": 49070 }, { "epoch": 0.8913264564869969, "grad_norm": 0.8667702743521114, "learning_rate": 5.887681999124318e-07, "loss": 0.5935, "step": 49080 }, { "epoch": 0.8915080633444719, "grad_norm": 0.8557456461270003, "learning_rate": 5.868214957059737e-07, "loss": 0.59, "step": 49090 }, { "epoch": 0.8916896702019468, "grad_norm": 0.8597199000594616, "learning_rate": 5.848779178370256e-07, "loss": 0.5922, "step": 49100 }, { "epoch": 0.8918712770594218, "grad_norm": 0.8716790714813815, "learning_rate": 5.829374669510979e-07, "loss": 0.6005, "step": 49110 }, { "epoch": 0.8920528839168967, "grad_norm": 0.8873765889299161, "learning_rate": 5.810001436926582e-07, "loss": 0.6018, "step": 49120 }, { "epoch": 0.8922344907743717, "grad_norm": 0.8932132914193459, "learning_rate": 5.790659487051387e-07, "loss": 0.6012, "step": 49130 }, { "epoch": 0.8924160976318466, "grad_norm": 0.8489687555205003, "learning_rate": 5.771348826309286e-07, "loss": 0.6019, "step": 49140 }, { "epoch": 0.8925977044893215, "grad_norm": 0.9021906139633143, "learning_rate": 5.752069461113829e-07, "loss": 0.6037, "step": 49150 }, { "epoch": 0.8927793113467964, "grad_norm": 0.8796148456282977, "learning_rate": 5.732821397868115e-07, "loss": 0.6058, "step": 49160 }, { "epoch": 0.8929609182042714, "grad_norm": 0.8505629701207095, "learning_rate": 5.71360464296492e-07, "loss": 0.5877, "step": 49170 }, { "epoch": 0.8931425250617463, "grad_norm": 0.859840305387715, "learning_rate": 5.69441920278655e-07, "loss": 0.5896, "step": 49180 }, { "epoch": 0.8933241319192212, "grad_norm": 0.8596323725922342, "learning_rate": 5.675265083704928e-07, "loss": 0.5953, "step": 49190 }, { "epoch": 0.8935057387766963, "grad_norm": 0.8713616653818966, "learning_rate": 5.656142292081612e-07, "loss": 0.6016, "step": 49200 }, { "epoch": 0.8936873456341712, "grad_norm": 0.8560704730564721, "learning_rate": 5.637050834267721e-07, "loss": 0.5932, "step": 49210 }, { "epoch": 0.8938689524916461, "grad_norm": 0.8695093374453096, "learning_rate": 5.617990716603972e-07, "loss": 0.5872, "step": 49220 }, { "epoch": 0.894050559349121, "grad_norm": 0.893670306259777, "learning_rate": 5.598961945420678e-07, "loss": 0.6007, "step": 49230 }, { "epoch": 0.894232166206596, "grad_norm": 0.8569349130543878, "learning_rate": 5.579964527037762e-07, "loss": 0.5976, "step": 49240 }, { "epoch": 0.8944137730640709, "grad_norm": 0.8356041293902308, "learning_rate": 5.560998467764677e-07, "loss": 0.6078, "step": 49250 }, { "epoch": 0.8945953799215458, "grad_norm": 0.8782912494575024, "learning_rate": 5.54206377390053e-07, "loss": 0.606, "step": 49260 }, { "epoch": 0.8947769867790207, "grad_norm": 0.8702681307330178, "learning_rate": 5.523160451733955e-07, "loss": 0.593, "step": 49270 }, { "epoch": 0.8949585936364958, "grad_norm": 0.8599440979424381, "learning_rate": 5.504288507543187e-07, "loss": 0.5933, "step": 49280 }, { "epoch": 0.8951402004939707, "grad_norm": 0.8874148601967794, "learning_rate": 5.485447947596057e-07, "loss": 0.5996, "step": 49290 }, { "epoch": 0.8953218073514456, "grad_norm": 0.8850160595432615, "learning_rate": 5.466638778149946e-07, "loss": 0.5817, "step": 49300 }, { "epoch": 0.8955034142089205, "grad_norm": 0.8511159900569595, "learning_rate": 5.447861005451826e-07, "loss": 0.5864, "step": 49310 }, { "epoch": 0.8956850210663955, "grad_norm": 0.9005897834625864, "learning_rate": 5.429114635738231e-07, "loss": 0.598, "step": 49320 }, { "epoch": 0.8958666279238704, "grad_norm": 0.8576042509748208, "learning_rate": 5.41039967523529e-07, "loss": 0.6001, "step": 49330 }, { "epoch": 0.8960482347813453, "grad_norm": 0.8593755955206698, "learning_rate": 5.391716130158642e-07, "loss": 0.6171, "step": 49340 }, { "epoch": 0.8962298416388202, "grad_norm": 0.8556949586842982, "learning_rate": 5.373064006713569e-07, "loss": 0.5929, "step": 49350 }, { "epoch": 0.8964114484962952, "grad_norm": 0.8382076949502387, "learning_rate": 5.354443311094859e-07, "loss": 0.5852, "step": 49360 }, { "epoch": 0.8965930553537702, "grad_norm": 0.8838194382433646, "learning_rate": 5.335854049486888e-07, "loss": 0.588, "step": 49370 }, { "epoch": 0.8967746622112451, "grad_norm": 0.8316383010157812, "learning_rate": 5.317296228063595e-07, "loss": 0.5994, "step": 49380 }, { "epoch": 0.8969562690687201, "grad_norm": 0.8826968013929197, "learning_rate": 5.298769852988439e-07, "loss": 0.5966, "step": 49390 }, { "epoch": 0.897137875926195, "grad_norm": 0.8613428003962492, "learning_rate": 5.280274930414475e-07, "loss": 0.5904, "step": 49400 }, { "epoch": 0.8973194827836699, "grad_norm": 0.890741993569051, "learning_rate": 5.261811466484334e-07, "loss": 0.6045, "step": 49410 }, { "epoch": 0.8975010896411448, "grad_norm": 0.8454364335234351, "learning_rate": 5.243379467330123e-07, "loss": 0.581, "step": 49420 }, { "epoch": 0.8976826964986198, "grad_norm": 0.8704424945641861, "learning_rate": 5.224978939073577e-07, "loss": 0.6097, "step": 49430 }, { "epoch": 0.8978643033560947, "grad_norm": 0.8919909160790932, "learning_rate": 5.206609887825897e-07, "loss": 0.6045, "step": 49440 }, { "epoch": 0.8980459102135696, "grad_norm": 0.8611047443712345, "learning_rate": 5.188272319687926e-07, "loss": 0.5906, "step": 49450 }, { "epoch": 0.8982275170710446, "grad_norm": 0.8774570000565063, "learning_rate": 5.169966240749968e-07, "loss": 0.5792, "step": 49460 }, { "epoch": 0.8984091239285196, "grad_norm": 0.8591641405346248, "learning_rate": 5.151691657091929e-07, "loss": 0.5947, "step": 49470 }, { "epoch": 0.8985907307859945, "grad_norm": 0.8548178076264192, "learning_rate": 5.133448574783185e-07, "loss": 0.599, "step": 49480 }, { "epoch": 0.8987723376434694, "grad_norm": 0.8800917656474572, "learning_rate": 5.115236999882744e-07, "loss": 0.5978, "step": 49490 }, { "epoch": 0.8989539445009443, "grad_norm": 0.8416849870994334, "learning_rate": 5.097056938439049e-07, "loss": 0.5935, "step": 49500 }, { "epoch": 0.8991355513584193, "grad_norm": 0.8567770537497492, "learning_rate": 5.078908396490157e-07, "loss": 0.5955, "step": 49510 }, { "epoch": 0.8993171582158942, "grad_norm": 0.8245832761149959, "learning_rate": 5.060791380063613e-07, "loss": 0.594, "step": 49520 }, { "epoch": 0.8994987650733691, "grad_norm": 0.8699522985561774, "learning_rate": 5.042705895176514e-07, "loss": 0.5889, "step": 49530 }, { "epoch": 0.8996803719308442, "grad_norm": 0.8547220671081273, "learning_rate": 5.024651947835447e-07, "loss": 0.6028, "step": 49540 }, { "epoch": 0.8998619787883191, "grad_norm": 0.8746315744106218, "learning_rate": 5.006629544036579e-07, "loss": 0.5871, "step": 49550 }, { "epoch": 0.900043585645794, "grad_norm": 0.917067679294178, "learning_rate": 4.988638689765557e-07, "loss": 0.6129, "step": 49560 }, { "epoch": 0.9002251925032689, "grad_norm": 0.8611181507641166, "learning_rate": 4.97067939099759e-07, "loss": 0.5957, "step": 49570 }, { "epoch": 0.9004067993607439, "grad_norm": 0.8563286200489588, "learning_rate": 4.952751653697363e-07, "loss": 0.591, "step": 49580 }, { "epoch": 0.9005884062182188, "grad_norm": 0.8754076441865962, "learning_rate": 4.934855483819079e-07, "loss": 0.6012, "step": 49590 }, { "epoch": 0.9007700130756937, "grad_norm": 0.8564298198471553, "learning_rate": 4.916990887306516e-07, "loss": 0.5963, "step": 49600 }, { "epoch": 0.9009516199331686, "grad_norm": 0.8646595412731777, "learning_rate": 4.899157870092897e-07, "loss": 0.6058, "step": 49610 }, { "epoch": 0.9011332267906436, "grad_norm": 0.8549215595717202, "learning_rate": 4.881356438101015e-07, "loss": 0.595, "step": 49620 }, { "epoch": 0.9013148336481186, "grad_norm": 0.8061455267497787, "learning_rate": 4.863586597243109e-07, "loss": 0.5855, "step": 49630 }, { "epoch": 0.9014964405055935, "grad_norm": 0.8505267615326806, "learning_rate": 4.845848353420979e-07, "loss": 0.5933, "step": 49640 }, { "epoch": 0.9016780473630684, "grad_norm": 0.8682634037837921, "learning_rate": 4.828141712525902e-07, "loss": 0.604, "step": 49650 }, { "epoch": 0.9018596542205434, "grad_norm": 0.907138045217309, "learning_rate": 4.810466680438686e-07, "loss": 0.594, "step": 49660 }, { "epoch": 0.9020412610780183, "grad_norm": 0.8528853671655294, "learning_rate": 4.7928232630296e-07, "loss": 0.5954, "step": 49670 }, { "epoch": 0.9022228679354932, "grad_norm": 0.8379054872105376, "learning_rate": 4.775211466158469e-07, "loss": 0.5996, "step": 49680 }, { "epoch": 0.9024044747929681, "grad_norm": 0.8565364791110035, "learning_rate": 4.7576312956745695e-07, "loss": 0.6007, "step": 49690 }, { "epoch": 0.9025860816504431, "grad_norm": 0.8465353969328743, "learning_rate": 4.740082757416664e-07, "loss": 0.5925, "step": 49700 }, { "epoch": 0.9027676885079181, "grad_norm": 0.8697657506041588, "learning_rate": 4.7225658572130686e-07, "loss": 0.5941, "step": 49710 }, { "epoch": 0.902949295365393, "grad_norm": 0.898234328702232, "learning_rate": 4.7050806008815395e-07, "loss": 0.603, "step": 49720 }, { "epoch": 0.903130902222868, "grad_norm": 0.8631845446793404, "learning_rate": 4.687626994229344e-07, "loss": 0.5902, "step": 49730 }, { "epoch": 0.9033125090803429, "grad_norm": 0.8830228073344192, "learning_rate": 4.6702050430532333e-07, "loss": 0.608, "step": 49740 }, { "epoch": 0.9034941159378178, "grad_norm": 0.8881234553924879, "learning_rate": 4.652814753139456e-07, "loss": 0.598, "step": 49750 }, { "epoch": 0.9036757227952927, "grad_norm": 0.84102710951171, "learning_rate": 4.635456130263716e-07, "loss": 0.6, "step": 49760 }, { "epoch": 0.9038573296527677, "grad_norm": 0.8414947061310425, "learning_rate": 4.6181291801912444e-07, "loss": 0.6056, "step": 49770 }, { "epoch": 0.9040389365102426, "grad_norm": 0.8752857286079782, "learning_rate": 4.6008339086767164e-07, "loss": 0.6134, "step": 49780 }, { "epoch": 0.9042205433677175, "grad_norm": 0.8263723533771977, "learning_rate": 4.5835703214642814e-07, "loss": 0.5951, "step": 49790 }, { "epoch": 0.9044021502251925, "grad_norm": 0.8540103142025702, "learning_rate": 4.5663384242875976e-07, "loss": 0.5779, "step": 49800 }, { "epoch": 0.9045837570826675, "grad_norm": 0.8759199988722788, "learning_rate": 4.5491382228697866e-07, "loss": 0.5793, "step": 49810 }, { "epoch": 0.9047653639401424, "grad_norm": 0.8516057975654514, "learning_rate": 4.531969722923435e-07, "loss": 0.5866, "step": 49820 }, { "epoch": 0.9049469707976173, "grad_norm": 0.848631842080609, "learning_rate": 4.5148329301506035e-07, "loss": 0.6014, "step": 49830 }, { "epoch": 0.9051285776550922, "grad_norm": 0.8421326478204944, "learning_rate": 4.4977278502428166e-07, "loss": 0.5956, "step": 49840 }, { "epoch": 0.9053101845125672, "grad_norm": 0.88171088777752, "learning_rate": 4.4806544888810866e-07, "loss": 0.6055, "step": 49850 }, { "epoch": 0.9054917913700421, "grad_norm": 0.8993046123019667, "learning_rate": 4.463612851735866e-07, "loss": 0.5909, "step": 49860 }, { "epoch": 0.905673398227517, "grad_norm": 0.8657314923850885, "learning_rate": 4.446602944467082e-07, "loss": 0.599, "step": 49870 }, { "epoch": 0.9058550050849921, "grad_norm": 0.9271005860881671, "learning_rate": 4.429624772724139e-07, "loss": 0.5929, "step": 49880 }, { "epoch": 0.906036611942467, "grad_norm": 0.8778035937589084, "learning_rate": 4.4126783421458707e-07, "loss": 0.607, "step": 49890 }, { "epoch": 0.9062182187999419, "grad_norm": 0.9109689431687809, "learning_rate": 4.3957636583605854e-07, "loss": 0.6034, "step": 49900 }, { "epoch": 0.9063998256574168, "grad_norm": 0.8417079454466863, "learning_rate": 4.378880726986068e-07, "loss": 0.5938, "step": 49910 }, { "epoch": 0.9065814325148918, "grad_norm": 0.8622897900189543, "learning_rate": 4.362029553629499e-07, "loss": 0.6105, "step": 49920 }, { "epoch": 0.9067630393723667, "grad_norm": 0.8621770878435258, "learning_rate": 4.345210143887579e-07, "loss": 0.5908, "step": 49930 }, { "epoch": 0.9069446462298416, "grad_norm": 0.8477184916486901, "learning_rate": 4.3284225033464076e-07, "loss": 0.5988, "step": 49940 }, { "epoch": 0.9071262530873165, "grad_norm": 0.8612132609580473, "learning_rate": 4.3116666375815794e-07, "loss": 0.5949, "step": 49950 }, { "epoch": 0.9073078599447915, "grad_norm": 0.8881675298925598, "learning_rate": 4.294942552158088e-07, "loss": 0.5976, "step": 49960 }, { "epoch": 0.9074894668022665, "grad_norm": 0.8246127361049165, "learning_rate": 4.2782502526304115e-07, "loss": 0.5833, "step": 49970 }, { "epoch": 0.9076710736597414, "grad_norm": 0.8507646514189957, "learning_rate": 4.261589744542449e-07, "loss": 0.6063, "step": 49980 }, { "epoch": 0.9078526805172163, "grad_norm": 0.8425326083334697, "learning_rate": 4.2449610334275196e-07, "loss": 0.6055, "step": 49990 }, { "epoch": 0.9080342873746913, "grad_norm": 0.8414296618064051, "learning_rate": 4.2283641248084594e-07, "loss": 0.6013, "step": 50000 }, { "epoch": 0.9082158942321662, "grad_norm": 0.8277062438675368, "learning_rate": 4.211799024197438e-07, "loss": 0.5874, "step": 50010 }, { "epoch": 0.9083975010896411, "grad_norm": 0.897888338815637, "learning_rate": 4.1952657370961547e-07, "loss": 0.5925, "step": 50020 }, { "epoch": 0.908579107947116, "grad_norm": 0.8591889026964592, "learning_rate": 4.178764268995683e-07, "loss": 0.5994, "step": 50030 }, { "epoch": 0.908760714804591, "grad_norm": 0.8611793329777231, "learning_rate": 4.1622946253765614e-07, "loss": 0.596, "step": 50040 }, { "epoch": 0.908942321662066, "grad_norm": 0.8613471557067406, "learning_rate": 4.145856811708726e-07, "loss": 0.6044, "step": 50050 }, { "epoch": 0.9091239285195409, "grad_norm": 0.9099553515936636, "learning_rate": 4.1294508334515757e-07, "loss": 0.6042, "step": 50060 }, { "epoch": 0.9093055353770159, "grad_norm": 0.8673121217556687, "learning_rate": 4.113076696053908e-07, "loss": 0.5895, "step": 50070 }, { "epoch": 0.9094871422344908, "grad_norm": 0.8809739985250585, "learning_rate": 4.0967344049539725e-07, "loss": 0.5908, "step": 50080 }, { "epoch": 0.9096687490919657, "grad_norm": 0.8371875706863662, "learning_rate": 4.080423965579428e-07, "loss": 0.5971, "step": 50090 }, { "epoch": 0.9098503559494406, "grad_norm": 0.8617342953622659, "learning_rate": 4.0641453833473397e-07, "loss": 0.5966, "step": 50100 }, { "epoch": 0.9100319628069156, "grad_norm": 0.9265487840831813, "learning_rate": 4.047898663664218e-07, "loss": 0.5942, "step": 50110 }, { "epoch": 0.9102135696643905, "grad_norm": 0.8289637793498795, "learning_rate": 4.0316838119259685e-07, "loss": 0.5956, "step": 50120 }, { "epoch": 0.9103951765218654, "grad_norm": 0.8818204917072401, "learning_rate": 4.015500833517938e-07, "loss": 0.6102, "step": 50130 }, { "epoch": 0.9105767833793404, "grad_norm": 0.8850658998878963, "learning_rate": 3.999349733814861e-07, "loss": 0.5907, "step": 50140 }, { "epoch": 0.9107583902368154, "grad_norm": 0.846862941302184, "learning_rate": 3.983230518180914e-07, "loss": 0.5935, "step": 50150 }, { "epoch": 0.9109399970942903, "grad_norm": 0.8645724060679402, "learning_rate": 3.967143191969636e-07, "loss": 0.6082, "step": 50160 }, { "epoch": 0.9111216039517652, "grad_norm": 0.8893165064187816, "learning_rate": 3.951087760524053e-07, "loss": 0.6053, "step": 50170 }, { "epoch": 0.9113032108092401, "grad_norm": 0.8484386936519397, "learning_rate": 3.9350642291765106e-07, "loss": 0.6022, "step": 50180 }, { "epoch": 0.9114848176667151, "grad_norm": 0.8720214196352152, "learning_rate": 3.919072603248797e-07, "loss": 0.5995, "step": 50190 }, { "epoch": 0.91166642452419, "grad_norm": 0.884933465601828, "learning_rate": 3.9031128880521294e-07, "loss": 0.5923, "step": 50200 }, { "epoch": 0.9118480313816649, "grad_norm": 0.8824374532304173, "learning_rate": 3.887185088887113e-07, "loss": 0.5997, "step": 50210 }, { "epoch": 0.91202963823914, "grad_norm": 0.856606384852916, "learning_rate": 3.871289211043705e-07, "loss": 0.6026, "step": 50220 }, { "epoch": 0.9122112450966149, "grad_norm": 0.8582962711810932, "learning_rate": 3.8554252598013374e-07, "loss": 0.6093, "step": 50230 }, { "epoch": 0.9123928519540898, "grad_norm": 0.9388675930682939, "learning_rate": 3.839593240428774e-07, "loss": 0.5981, "step": 50240 }, { "epoch": 0.9125744588115647, "grad_norm": 0.8996819557785437, "learning_rate": 3.8237931581842194e-07, "loss": 0.5896, "step": 50250 }, { "epoch": 0.9127560656690397, "grad_norm": 0.9335187135019785, "learning_rate": 3.8080250183152313e-07, "loss": 0.5933, "step": 50260 }, { "epoch": 0.9129376725265146, "grad_norm": 0.8548347633829474, "learning_rate": 3.7922888260587876e-07, "loss": 0.5976, "step": 50270 }, { "epoch": 0.9131192793839895, "grad_norm": 0.8543607497756259, "learning_rate": 3.7765845866412743e-07, "loss": 0.5859, "step": 50280 }, { "epoch": 0.9133008862414644, "grad_norm": 0.8916312624819968, "learning_rate": 3.760912305278408e-07, "loss": 0.5917, "step": 50290 }, { "epoch": 0.9134824930989394, "grad_norm": 0.857702672823652, "learning_rate": 3.745271987175303e-07, "loss": 0.5929, "step": 50300 }, { "epoch": 0.9136640999564144, "grad_norm": 0.842370932065699, "learning_rate": 3.729663637526526e-07, "loss": 0.5922, "step": 50310 }, { "epoch": 0.9138457068138893, "grad_norm": 0.9305823828900535, "learning_rate": 3.7140872615159306e-07, "loss": 0.598, "step": 50320 }, { "epoch": 0.9140273136713642, "grad_norm": 0.8669194804922351, "learning_rate": 3.6985428643168344e-07, "loss": 0.5997, "step": 50330 }, { "epoch": 0.9142089205288392, "grad_norm": 0.8646526530508082, "learning_rate": 3.683030451091862e-07, "loss": 0.589, "step": 50340 }, { "epoch": 0.9143905273863141, "grad_norm": 0.8889037442747355, "learning_rate": 3.667550026993083e-07, "loss": 0.6011, "step": 50350 }, { "epoch": 0.914572134243789, "grad_norm": 0.8719903686125164, "learning_rate": 3.6521015971618724e-07, "loss": 0.5986, "step": 50360 }, { "epoch": 0.914753741101264, "grad_norm": 0.8405558062248374, "learning_rate": 3.6366851667290594e-07, "loss": 0.5933, "step": 50370 }, { "epoch": 0.9149353479587389, "grad_norm": 0.8750760555645528, "learning_rate": 3.6213007408147814e-07, "loss": 0.6026, "step": 50380 }, { "epoch": 0.9151169548162139, "grad_norm": 0.8611704464425828, "learning_rate": 3.605948324528541e-07, "loss": 0.5991, "step": 50390 }, { "epoch": 0.9152985616736888, "grad_norm": 0.8322638798068037, "learning_rate": 3.590627922969281e-07, "loss": 0.6028, "step": 50400 }, { "epoch": 0.9154801685311638, "grad_norm": 0.8207556049094731, "learning_rate": 3.575339541225231e-07, "loss": 0.6007, "step": 50410 }, { "epoch": 0.9156617753886387, "grad_norm": 0.8031296440982394, "learning_rate": 3.5600831843740523e-07, "loss": 0.5916, "step": 50420 }, { "epoch": 0.9158433822461136, "grad_norm": 0.8443728862662301, "learning_rate": 3.544858857482714e-07, "loss": 0.6059, "step": 50430 }, { "epoch": 0.9160249891035885, "grad_norm": 0.8759006935766281, "learning_rate": 3.529666565607592e-07, "loss": 0.6048, "step": 50440 }, { "epoch": 0.9162065959610635, "grad_norm": 0.8860608375625809, "learning_rate": 3.5145063137943746e-07, "loss": 0.5941, "step": 50450 }, { "epoch": 0.9163882028185384, "grad_norm": 0.8753749411340448, "learning_rate": 3.4993781070781664e-07, "loss": 0.5971, "step": 50460 }, { "epoch": 0.9165698096760133, "grad_norm": 0.8748475618238113, "learning_rate": 3.484281950483381e-07, "loss": 0.6032, "step": 50470 }, { "epoch": 0.9167514165334884, "grad_norm": 0.8918151741156254, "learning_rate": 3.4692178490238316e-07, "loss": 0.593, "step": 50480 }, { "epoch": 0.9169330233909633, "grad_norm": 0.8709297400952711, "learning_rate": 3.4541858077026367e-07, "loss": 0.6099, "step": 50490 }, { "epoch": 0.9171146302484382, "grad_norm": 0.876114566134278, "learning_rate": 3.439185831512293e-07, "loss": 0.5968, "step": 50500 }, { "epoch": 0.9172962371059131, "grad_norm": 0.8472147677716985, "learning_rate": 3.4242179254346476e-07, "loss": 0.5916, "step": 50510 }, { "epoch": 0.917477843963388, "grad_norm": 0.9266767799376868, "learning_rate": 3.40928209444088e-07, "loss": 0.6013, "step": 50520 }, { "epoch": 0.917659450820863, "grad_norm": 0.8952352291307268, "learning_rate": 3.394378343491567e-07, "loss": 0.5931, "step": 50530 }, { "epoch": 0.9178410576783379, "grad_norm": 0.8846601227739327, "learning_rate": 3.3795066775365483e-07, "loss": 0.6063, "step": 50540 }, { "epoch": 0.9180226645358128, "grad_norm": 0.9566050553335897, "learning_rate": 3.3646671015150955e-07, "loss": 0.6031, "step": 50550 }, { "epoch": 0.9182042713932879, "grad_norm": 0.840429969831412, "learning_rate": 3.349859620355744e-07, "loss": 0.5887, "step": 50560 }, { "epoch": 0.9183858782507628, "grad_norm": 0.8567865191874051, "learning_rate": 3.335084238976438e-07, "loss": 0.5893, "step": 50570 }, { "epoch": 0.9185674851082377, "grad_norm": 0.8681272059244632, "learning_rate": 3.3203409622844073e-07, "loss": 0.5821, "step": 50580 }, { "epoch": 0.9187490919657126, "grad_norm": 0.8830782026093863, "learning_rate": 3.305629795176235e-07, "loss": 0.5963, "step": 50590 }, { "epoch": 0.9189306988231876, "grad_norm": 0.8766735131368025, "learning_rate": 3.290950742537846e-07, "loss": 0.576, "step": 50600 }, { "epoch": 0.9191123056806625, "grad_norm": 0.9020263282578124, "learning_rate": 3.27630380924453e-07, "loss": 0.5986, "step": 50610 }, { "epoch": 0.9192939125381374, "grad_norm": 0.8780885527914815, "learning_rate": 3.2616890001608393e-07, "loss": 0.6021, "step": 50620 }, { "epoch": 0.9194755193956123, "grad_norm": 0.9083922939369813, "learning_rate": 3.247106320140725e-07, "loss": 0.583, "step": 50630 }, { "epoch": 0.9196571262530873, "grad_norm": 0.8602510563507046, "learning_rate": 3.232555774027413e-07, "loss": 0.6048, "step": 50640 }, { "epoch": 0.9198387331105623, "grad_norm": 0.8218635689396526, "learning_rate": 3.2180373666535035e-07, "loss": 0.5966, "step": 50650 }, { "epoch": 0.9200203399680372, "grad_norm": 0.9222511048438472, "learning_rate": 3.2035511028408626e-07, "loss": 0.5858, "step": 50660 }, { "epoch": 0.9202019468255122, "grad_norm": 0.8858800339483058, "learning_rate": 3.1890969874007635e-07, "loss": 0.5989, "step": 50670 }, { "epoch": 0.9203835536829871, "grad_norm": 0.9264310880991748, "learning_rate": 3.174675025133733e-07, "loss": 0.5934, "step": 50680 }, { "epoch": 0.920565160540462, "grad_norm": 0.9137378155492704, "learning_rate": 3.1602852208296507e-07, "loss": 0.6036, "step": 50690 }, { "epoch": 0.9207467673979369, "grad_norm": 0.8608174004804484, "learning_rate": 3.1459275792677047e-07, "loss": 0.5981, "step": 50700 }, { "epoch": 0.9209283742554119, "grad_norm": 0.879011650499874, "learning_rate": 3.1316021052164026e-07, "loss": 0.5898, "step": 50710 }, { "epoch": 0.9211099811128868, "grad_norm": 0.877381251806271, "learning_rate": 3.117308803433572e-07, "loss": 0.5771, "step": 50720 }, { "epoch": 0.9212915879703618, "grad_norm": 0.840571779170788, "learning_rate": 3.10304767866636e-07, "loss": 0.5921, "step": 50730 }, { "epoch": 0.9214731948278367, "grad_norm": 0.8737805299249259, "learning_rate": 3.0888187356512113e-07, "loss": 0.5939, "step": 50740 }, { "epoch": 0.9216548016853117, "grad_norm": 0.8687425220640963, "learning_rate": 3.074621979113901e-07, "loss": 0.5943, "step": 50750 }, { "epoch": 0.9218364085427866, "grad_norm": 0.8446737037466201, "learning_rate": 3.060457413769491e-07, "loss": 0.591, "step": 50760 }, { "epoch": 0.9220180154002615, "grad_norm": 0.9083204233990143, "learning_rate": 3.0463250443223845e-07, "loss": 0.5967, "step": 50770 }, { "epoch": 0.9221996222577364, "grad_norm": 0.8446701689008853, "learning_rate": 3.032224875466272e-07, "loss": 0.6, "step": 50780 }, { "epoch": 0.9223812291152114, "grad_norm": 0.86608037061224, "learning_rate": 3.018156911884129e-07, "loss": 0.5941, "step": 50790 }, { "epoch": 0.9225628359726863, "grad_norm": 0.893322594101028, "learning_rate": 3.004121158248274e-07, "loss": 0.5934, "step": 50800 }, { "epoch": 0.9227444428301612, "grad_norm": 0.8824066131884734, "learning_rate": 2.9901176192203005e-07, "loss": 0.5931, "step": 50810 }, { "epoch": 0.9229260496876363, "grad_norm": 0.8771538138472658, "learning_rate": 2.9761462994511326e-07, "loss": 0.5924, "step": 50820 }, { "epoch": 0.9231076565451112, "grad_norm": 0.8455971734204834, "learning_rate": 2.9622072035809467e-07, "loss": 0.5993, "step": 50830 }, { "epoch": 0.9232892634025861, "grad_norm": 0.8663182995363549, "learning_rate": 2.948300336239263e-07, "loss": 0.6031, "step": 50840 }, { "epoch": 0.923470870260061, "grad_norm": 0.8721249360723994, "learning_rate": 2.934425702044863e-07, "loss": 0.5932, "step": 50850 }, { "epoch": 0.923652477117536, "grad_norm": 0.8604076532290866, "learning_rate": 2.920583305605851e-07, "loss": 0.5877, "step": 50860 }, { "epoch": 0.9238340839750109, "grad_norm": 0.8824045294970384, "learning_rate": 2.906773151519604e-07, "loss": 0.5837, "step": 50870 }, { "epoch": 0.9240156908324858, "grad_norm": 0.853734444503298, "learning_rate": 2.8929952443728203e-07, "loss": 0.6008, "step": 50880 }, { "epoch": 0.9241972976899607, "grad_norm": 0.8619926363015991, "learning_rate": 2.8792495887414397e-07, "loss": 0.5942, "step": 50890 }, { "epoch": 0.9243789045474358, "grad_norm": 0.873427441756198, "learning_rate": 2.8655361891907096e-07, "loss": 0.5784, "step": 50900 }, { "epoch": 0.9245605114049107, "grad_norm": 0.887595562974456, "learning_rate": 2.85185505027521e-07, "loss": 0.5927, "step": 50910 }, { "epoch": 0.9247421182623856, "grad_norm": 0.8628874730771574, "learning_rate": 2.838206176538727e-07, "loss": 0.5899, "step": 50920 }, { "epoch": 0.9249237251198605, "grad_norm": 0.8819338111023521, "learning_rate": 2.824589572514402e-07, "loss": 0.6138, "step": 50930 }, { "epoch": 0.9251053319773355, "grad_norm": 0.8472204924339864, "learning_rate": 2.811005242724607e-07, "loss": 0.5995, "step": 50940 }, { "epoch": 0.9252869388348104, "grad_norm": 0.8626713198380991, "learning_rate": 2.7974531916810434e-07, "loss": 0.6147, "step": 50950 }, { "epoch": 0.9254685456922853, "grad_norm": 0.860271890691684, "learning_rate": 2.783933423884633e-07, "loss": 0.5991, "step": 50960 }, { "epoch": 0.9256501525497602, "grad_norm": 0.8687946476229572, "learning_rate": 2.770445943825639e-07, "loss": 0.5983, "step": 50970 }, { "epoch": 0.9258317594072352, "grad_norm": 0.8737998453961988, "learning_rate": 2.756990755983546e-07, "loss": 0.5891, "step": 50980 }, { "epoch": 0.9260133662647102, "grad_norm": 0.8488763077651376, "learning_rate": 2.743567864827157e-07, "loss": 0.5885, "step": 50990 }, { "epoch": 0.9261949731221851, "grad_norm": 0.8020789996961979, "learning_rate": 2.730177274814505e-07, "loss": 0.5937, "step": 51000 }, { "epoch": 0.92637657997966, "grad_norm": 0.8281175839469548, "learning_rate": 2.716818990392944e-07, "loss": 0.5933, "step": 51010 }, { "epoch": 0.926558186837135, "grad_norm": 0.8643199466364525, "learning_rate": 2.703493015999048e-07, "loss": 0.6015, "step": 51020 }, { "epoch": 0.9267397936946099, "grad_norm": 0.9084543351822277, "learning_rate": 2.69019935605872e-07, "loss": 0.5999, "step": 51030 }, { "epoch": 0.9269214005520848, "grad_norm": 0.8465338547852197, "learning_rate": 2.6769380149870627e-07, "loss": 0.5885, "step": 51040 }, { "epoch": 0.9271030074095598, "grad_norm": 0.878255844363884, "learning_rate": 2.6637089971885076e-07, "loss": 0.6063, "step": 51050 }, { "epoch": 0.9272846142670347, "grad_norm": 0.8878509825361752, "learning_rate": 2.6505123070566963e-07, "loss": 0.5913, "step": 51060 }, { "epoch": 0.9274662211245097, "grad_norm": 0.8606641884694044, "learning_rate": 2.637347948974578e-07, "loss": 0.5933, "step": 51070 }, { "epoch": 0.9276478279819846, "grad_norm": 0.8989909169971887, "learning_rate": 2.6242159273143444e-07, "loss": 0.5923, "step": 51080 }, { "epoch": 0.9278294348394596, "grad_norm": 0.9047831271250143, "learning_rate": 2.6111162464374393e-07, "loss": 0.5891, "step": 51090 }, { "epoch": 0.9280110416969345, "grad_norm": 0.8363976446597889, "learning_rate": 2.5980489106945616e-07, "loss": 0.5851, "step": 51100 }, { "epoch": 0.9281926485544094, "grad_norm": 0.858137585872937, "learning_rate": 2.5850139244257054e-07, "loss": 0.6035, "step": 51110 }, { "epoch": 0.9283742554118843, "grad_norm": 0.9399107613246691, "learning_rate": 2.572011291960086e-07, "loss": 0.6007, "step": 51120 }, { "epoch": 0.9285558622693593, "grad_norm": 1.0112848082795531, "learning_rate": 2.5590410176161815e-07, "loss": 0.594, "step": 51130 }, { "epoch": 0.9287374691268342, "grad_norm": 0.8995344871558018, "learning_rate": 2.546103105701725e-07, "loss": 0.5973, "step": 51140 }, { "epoch": 0.9289190759843091, "grad_norm": 0.8563966867984826, "learning_rate": 2.5331975605137007e-07, "loss": 0.5858, "step": 51150 }, { "epoch": 0.9291006828417842, "grad_norm": 0.9184695518811011, "learning_rate": 2.5203243863383467e-07, "loss": 0.5999, "step": 51160 }, { "epoch": 0.9292822896992591, "grad_norm": 0.8295246794528354, "learning_rate": 2.5074835874511426e-07, "loss": 0.5863, "step": 51170 }, { "epoch": 0.929463896556734, "grad_norm": 0.8795557206095811, "learning_rate": 2.4946751681168225e-07, "loss": 0.5957, "step": 51180 }, { "epoch": 0.9296455034142089, "grad_norm": 0.8788105302931798, "learning_rate": 2.4818991325893385e-07, "loss": 0.5991, "step": 51190 }, { "epoch": 0.9298271102716839, "grad_norm": 0.9154407035016755, "learning_rate": 2.469155485111963e-07, "loss": 0.5896, "step": 51200 }, { "epoch": 0.9300087171291588, "grad_norm": 0.8277963334580732, "learning_rate": 2.4564442299171096e-07, "loss": 0.5834, "step": 51210 }, { "epoch": 0.9301903239866337, "grad_norm": 0.8785763106381973, "learning_rate": 2.4437653712265144e-07, "loss": 0.6009, "step": 51220 }, { "epoch": 0.9303719308441086, "grad_norm": 0.8678834553475003, "learning_rate": 2.431118913251107e-07, "loss": 0.5768, "step": 51230 }, { "epoch": 0.9305535377015837, "grad_norm": 0.877208076926464, "learning_rate": 2.4185048601910957e-07, "loss": 0.6147, "step": 51240 }, { "epoch": 0.9307351445590586, "grad_norm": 0.886260392712357, "learning_rate": 2.4059232162358726e-07, "loss": 0.5966, "step": 51250 }, { "epoch": 0.9309167514165335, "grad_norm": 0.8811733004002027, "learning_rate": 2.393373985564118e-07, "loss": 0.5987, "step": 51260 }, { "epoch": 0.9310983582740084, "grad_norm": 0.8753943611860133, "learning_rate": 2.3808571723437202e-07, "loss": 0.5962, "step": 51270 }, { "epoch": 0.9312799651314834, "grad_norm": 0.8566145704627197, "learning_rate": 2.3683727807318203e-07, "loss": 0.5955, "step": 51280 }, { "epoch": 0.9314615719889583, "grad_norm": 0.8553394052963805, "learning_rate": 2.355920814874757e-07, "loss": 0.6038, "step": 51290 }, { "epoch": 0.9316431788464332, "grad_norm": 0.8365732251849357, "learning_rate": 2.3435012789081223e-07, "loss": 0.5903, "step": 51300 }, { "epoch": 0.9318247857039081, "grad_norm": 0.8543380980841866, "learning_rate": 2.3311141769567502e-07, "loss": 0.5975, "step": 51310 }, { "epoch": 0.9320063925613831, "grad_norm": 0.8690650259497952, "learning_rate": 2.3187595131346719e-07, "loss": 0.607, "step": 51320 }, { "epoch": 0.9321879994188581, "grad_norm": 0.8229404269553914, "learning_rate": 2.3064372915451827e-07, "loss": 0.5901, "step": 51330 }, { "epoch": 0.932369606276333, "grad_norm": 0.8193225304406748, "learning_rate": 2.2941475162807537e-07, "loss": 0.5887, "step": 51340 }, { "epoch": 0.932551213133808, "grad_norm": 0.8321418622615243, "learning_rate": 2.2818901914231307e-07, "loss": 0.6046, "step": 51350 }, { "epoch": 0.9327328199912829, "grad_norm": 0.8794891057161165, "learning_rate": 2.269665321043235e-07, "loss": 0.5821, "step": 51360 }, { "epoch": 0.9329144268487578, "grad_norm": 0.8950843188473638, "learning_rate": 2.2574729092012637e-07, "loss": 0.5912, "step": 51370 }, { "epoch": 0.9330960337062327, "grad_norm": 0.8646342299541784, "learning_rate": 2.2453129599465774e-07, "loss": 0.5877, "step": 51380 }, { "epoch": 0.9332776405637077, "grad_norm": 0.8590345712796955, "learning_rate": 2.2331854773178007e-07, "loss": 0.5938, "step": 51390 }, { "epoch": 0.9334592474211826, "grad_norm": 0.8792901127545714, "learning_rate": 2.2210904653427344e-07, "loss": 0.5977, "step": 51400 }, { "epoch": 0.9336408542786576, "grad_norm": 0.874398883420994, "learning_rate": 2.209027928038432e-07, "loss": 0.5987, "step": 51410 }, { "epoch": 0.9338224611361325, "grad_norm": 0.8724683162050675, "learning_rate": 2.1969978694111439e-07, "loss": 0.5913, "step": 51420 }, { "epoch": 0.9340040679936075, "grad_norm": 0.8762841871854883, "learning_rate": 2.1850002934563296e-07, "loss": 0.5936, "step": 51430 }, { "epoch": 0.9341856748510824, "grad_norm": 0.8722682016485308, "learning_rate": 2.1730352041586578e-07, "loss": 0.5873, "step": 51440 }, { "epoch": 0.9343672817085573, "grad_norm": 0.8420068880883677, "learning_rate": 2.161102605492027e-07, "loss": 0.59, "step": 51450 }, { "epoch": 0.9345488885660322, "grad_norm": 0.8893919125349775, "learning_rate": 2.1492025014195338e-07, "loss": 0.5906, "step": 51460 }, { "epoch": 0.9347304954235072, "grad_norm": 0.8526607219333723, "learning_rate": 2.1373348958934726e-07, "loss": 0.5901, "step": 51470 }, { "epoch": 0.9349121022809821, "grad_norm": 0.8590329270480589, "learning_rate": 2.125499792855379e-07, "loss": 0.6132, "step": 51480 }, { "epoch": 0.935093709138457, "grad_norm": 0.889972235235948, "learning_rate": 2.113697196235931e-07, "loss": 0.608, "step": 51490 }, { "epoch": 0.935275315995932, "grad_norm": 0.8639581298425212, "learning_rate": 2.101927109955071e-07, "loss": 0.5945, "step": 51500 }, { "epoch": 0.935456922853407, "grad_norm": 0.8408256183093957, "learning_rate": 2.0901895379219271e-07, "loss": 0.5865, "step": 51510 }, { "epoch": 0.9356385297108819, "grad_norm": 0.8190420696815546, "learning_rate": 2.0784844840347928e-07, "loss": 0.585, "step": 51520 }, { "epoch": 0.9358201365683568, "grad_norm": 0.8944449835480178, "learning_rate": 2.0668119521812246e-07, "loss": 0.6027, "step": 51530 }, { "epoch": 0.9360017434258318, "grad_norm": 0.8431145646740723, "learning_rate": 2.0551719462379216e-07, "loss": 0.5992, "step": 51540 }, { "epoch": 0.9361833502833067, "grad_norm": 0.842762677165916, "learning_rate": 2.0435644700708245e-07, "loss": 0.5895, "step": 51550 }, { "epoch": 0.9363649571407816, "grad_norm": 0.8446480552597863, "learning_rate": 2.0319895275350275e-07, "loss": 0.5834, "step": 51560 }, { "epoch": 0.9365465639982565, "grad_norm": 0.8723273190738072, "learning_rate": 2.0204471224748447e-07, "loss": 0.5894, "step": 51570 }, { "epoch": 0.9367281708557316, "grad_norm": 0.8949474988982455, "learning_rate": 2.0089372587237864e-07, "loss": 0.5967, "step": 51580 }, { "epoch": 0.9369097777132065, "grad_norm": 0.9020792071787256, "learning_rate": 1.9974599401045514e-07, "loss": 0.6087, "step": 51590 }, { "epoch": 0.9370913845706814, "grad_norm": 0.9001764749420382, "learning_rate": 1.986015170429012e-07, "loss": 0.5896, "step": 51600 }, { "epoch": 0.9372729914281563, "grad_norm": 0.8592652750967021, "learning_rate": 1.9746029534982615e-07, "loss": 0.5992, "step": 51610 }, { "epoch": 0.9374545982856313, "grad_norm": 0.8469351922497008, "learning_rate": 1.9632232931025563e-07, "loss": 0.5843, "step": 51620 }, { "epoch": 0.9376362051431062, "grad_norm": 0.8756345813098668, "learning_rate": 1.9518761930213404e-07, "loss": 0.5914, "step": 51630 }, { "epoch": 0.9378178120005811, "grad_norm": 0.9140846019671761, "learning_rate": 1.9405616570232654e-07, "loss": 0.5973, "step": 51640 }, { "epoch": 0.937999418858056, "grad_norm": 0.8879234803206766, "learning_rate": 1.929279688866148e-07, "loss": 0.5964, "step": 51650 }, { "epoch": 0.938181025715531, "grad_norm": 0.8401562581639347, "learning_rate": 1.918030292297013e-07, "loss": 0.5893, "step": 51660 }, { "epoch": 0.938362632573006, "grad_norm": 0.8764233343473938, "learning_rate": 1.906813471052016e-07, "loss": 0.5876, "step": 51670 }, { "epoch": 0.9385442394304809, "grad_norm": 0.8076826969459957, "learning_rate": 1.8956292288565436e-07, "loss": 0.601, "step": 51680 }, { "epoch": 0.9387258462879559, "grad_norm": 0.8812185067493794, "learning_rate": 1.8844775694251583e-07, "loss": 0.6069, "step": 51690 }, { "epoch": 0.9389074531454308, "grad_norm": 0.8763412822685768, "learning_rate": 1.8733584964615637e-07, "loss": 0.593, "step": 51700 }, { "epoch": 0.9390890600029057, "grad_norm": 0.8556614656074828, "learning_rate": 1.8622720136586726e-07, "loss": 0.5985, "step": 51710 }, { "epoch": 0.9392706668603806, "grad_norm": 0.8650280815467363, "learning_rate": 1.8512181246985726e-07, "loss": 0.592, "step": 51720 }, { "epoch": 0.9394522737178556, "grad_norm": 0.8578302371099193, "learning_rate": 1.8401968332525278e-07, "loss": 0.5955, "step": 51730 }, { "epoch": 0.9396338805753305, "grad_norm": 0.869716164469088, "learning_rate": 1.8292081429809428e-07, "loss": 0.5916, "step": 51740 }, { "epoch": 0.9398154874328054, "grad_norm": 0.831098228940813, "learning_rate": 1.8182520575334317e-07, "loss": 0.6134, "step": 51750 }, { "epoch": 0.9399970942902804, "grad_norm": 0.8382327738428708, "learning_rate": 1.8073285805487727e-07, "loss": 0.6023, "step": 51760 }, { "epoch": 0.9401787011477554, "grad_norm": 0.8624715108198086, "learning_rate": 1.7964377156549085e-07, "loss": 0.6004, "step": 51770 }, { "epoch": 0.9403603080052303, "grad_norm": 0.85941033103368, "learning_rate": 1.785579466468945e-07, "loss": 0.5912, "step": 51780 }, { "epoch": 0.9405419148627052, "grad_norm": 0.8646368071458568, "learning_rate": 1.7747538365971651e-07, "loss": 0.605, "step": 51790 }, { "epoch": 0.9407235217201801, "grad_norm": 0.8573023113885729, "learning_rate": 1.7639608296350143e-07, "loss": 0.5918, "step": 51800 }, { "epoch": 0.9409051285776551, "grad_norm": 0.9062197852108298, "learning_rate": 1.7532004491671029e-07, "loss": 0.5968, "step": 51810 }, { "epoch": 0.94108673543513, "grad_norm": 0.8740278248041746, "learning_rate": 1.7424726987671947e-07, "loss": 0.5869, "step": 51820 }, { "epoch": 0.9412683422926049, "grad_norm": 0.8791863061908582, "learning_rate": 1.7317775819982506e-07, "loss": 0.5908, "step": 51830 }, { "epoch": 0.94144994915008, "grad_norm": 0.8847472016854879, "learning_rate": 1.721115102412352e-07, "loss": 0.5884, "step": 51840 }, { "epoch": 0.9416315560075549, "grad_norm": 0.8481358755311758, "learning_rate": 1.7104852635507763e-07, "loss": 0.5966, "step": 51850 }, { "epoch": 0.9418131628650298, "grad_norm": 0.8513023591025759, "learning_rate": 1.6998880689439224e-07, "loss": 0.585, "step": 51860 }, { "epoch": 0.9419947697225047, "grad_norm": 0.8281787258900423, "learning_rate": 1.6893235221113636e-07, "loss": 0.5837, "step": 51870 }, { "epoch": 0.9421763765799797, "grad_norm": 0.8849522760378886, "learning_rate": 1.67879162656186e-07, "loss": 0.5964, "step": 51880 }, { "epoch": 0.9423579834374546, "grad_norm": 0.833714324052718, "learning_rate": 1.668292385793291e-07, "loss": 0.5917, "step": 51890 }, { "epoch": 0.9425395902949295, "grad_norm": 0.866542812336923, "learning_rate": 1.6578258032926898e-07, "loss": 0.5958, "step": 51900 }, { "epoch": 0.9427211971524044, "grad_norm": 0.8930920295200071, "learning_rate": 1.6473918825362644e-07, "loss": 0.6182, "step": 51910 }, { "epoch": 0.9429028040098794, "grad_norm": 0.8785131501598981, "learning_rate": 1.6369906269893543e-07, "loss": 0.6035, "step": 51920 }, { "epoch": 0.9430844108673544, "grad_norm": 0.8846413962096159, "learning_rate": 1.6266220401064737e-07, "loss": 0.5951, "step": 51930 }, { "epoch": 0.9432660177248293, "grad_norm": 0.8513575360430516, "learning_rate": 1.616286125331268e-07, "loss": 0.5941, "step": 51940 }, { "epoch": 0.9434476245823042, "grad_norm": 0.8888021001354748, "learning_rate": 1.6059828860965353e-07, "loss": 0.6057, "step": 51950 }, { "epoch": 0.9436292314397792, "grad_norm": 0.8725416342626169, "learning_rate": 1.595712325824217e-07, "loss": 0.6102, "step": 51960 }, { "epoch": 0.9438108382972541, "grad_norm": 0.856503619260753, "learning_rate": 1.585474447925428e-07, "loss": 0.6067, "step": 51970 }, { "epoch": 0.943992445154729, "grad_norm": 0.8953527569358019, "learning_rate": 1.5752692558003935e-07, "loss": 0.5934, "step": 51980 }, { "epoch": 0.944174052012204, "grad_norm": 0.8674238272962447, "learning_rate": 1.5650967528384908e-07, "loss": 0.6008, "step": 51990 }, { "epoch": 0.9443556588696789, "grad_norm": 0.8804609234323635, "learning_rate": 1.554956942418262e-07, "loss": 0.6061, "step": 52000 }, { "epoch": 0.9445372657271539, "grad_norm": 0.8658661986842091, "learning_rate": 1.5448498279073687e-07, "loss": 0.5875, "step": 52010 }, { "epoch": 0.9447188725846288, "grad_norm": 0.8685256609120359, "learning_rate": 1.5347754126626258e-07, "loss": 0.5927, "step": 52020 }, { "epoch": 0.9449004794421038, "grad_norm": 0.8729566988492828, "learning_rate": 1.5247337000299567e-07, "loss": 0.6041, "step": 52030 }, { "epoch": 0.9450820862995787, "grad_norm": 0.8782713112315934, "learning_rate": 1.5147246933444937e-07, "loss": 0.6065, "step": 52040 }, { "epoch": 0.9452636931570536, "grad_norm": 0.8404627124852093, "learning_rate": 1.5047483959304332e-07, "loss": 0.587, "step": 52050 }, { "epoch": 0.9454453000145285, "grad_norm": 0.8345189627467672, "learning_rate": 1.4948048111011472e-07, "loss": 0.5935, "step": 52060 }, { "epoch": 0.9456269068720035, "grad_norm": 0.9020033718019311, "learning_rate": 1.484893942159127e-07, "loss": 0.5884, "step": 52070 }, { "epoch": 0.9458085137294784, "grad_norm": 0.869347339989739, "learning_rate": 1.4750157923960063e-07, "loss": 0.5911, "step": 52080 }, { "epoch": 0.9459901205869533, "grad_norm": 0.8407539145289069, "learning_rate": 1.4651703650925607e-07, "loss": 0.5977, "step": 52090 }, { "epoch": 0.9461717274444283, "grad_norm": 0.8629277649977263, "learning_rate": 1.455357663518675e-07, "loss": 0.595, "step": 52100 }, { "epoch": 0.9463533343019033, "grad_norm": 0.8483028434324432, "learning_rate": 1.4455776909333975e-07, "loss": 0.5916, "step": 52110 }, { "epoch": 0.9465349411593782, "grad_norm": 0.8255915557641337, "learning_rate": 1.435830450584852e-07, "loss": 0.5996, "step": 52120 }, { "epoch": 0.9467165480168531, "grad_norm": 0.8726524094468346, "learning_rate": 1.42611594571036e-07, "loss": 0.6031, "step": 52130 }, { "epoch": 0.946898154874328, "grad_norm": 0.8664842497084191, "learning_rate": 1.416434179536319e-07, "loss": 0.5864, "step": 52140 }, { "epoch": 0.947079761731803, "grad_norm": 0.8381030055535702, "learning_rate": 1.4067851552782675e-07, "loss": 0.5803, "step": 52150 }, { "epoch": 0.9472613685892779, "grad_norm": 0.8632565523256682, "learning_rate": 1.3971688761408864e-07, "loss": 0.589, "step": 52160 }, { "epoch": 0.9474429754467528, "grad_norm": 0.888111934459714, "learning_rate": 1.3875853453179654e-07, "loss": 0.6097, "step": 52170 }, { "epoch": 0.9476245823042279, "grad_norm": 0.8597838790065485, "learning_rate": 1.3780345659924033e-07, "loss": 0.5857, "step": 52180 }, { "epoch": 0.9478061891617028, "grad_norm": 0.8873171331900688, "learning_rate": 1.3685165413362623e-07, "loss": 0.595, "step": 52190 }, { "epoch": 0.9479877960191777, "grad_norm": 0.8723435002555151, "learning_rate": 1.3590312745106803e-07, "loss": 0.5945, "step": 52200 }, { "epoch": 0.9481694028766526, "grad_norm": 0.8735396515825697, "learning_rate": 1.34957876866596e-07, "loss": 0.6003, "step": 52210 }, { "epoch": 0.9483510097341276, "grad_norm": 0.8781995109352528, "learning_rate": 1.3401590269414788e-07, "loss": 0.5986, "step": 52220 }, { "epoch": 0.9485326165916025, "grad_norm": 0.8477008222825838, "learning_rate": 1.3307720524657674e-07, "loss": 0.5909, "step": 52230 }, { "epoch": 0.9487142234490774, "grad_norm": 0.9192122260590783, "learning_rate": 1.3214178483564433e-07, "loss": 0.5982, "step": 52240 }, { "epoch": 0.9488958303065523, "grad_norm": 0.849697847656441, "learning_rate": 1.3120964177202767e-07, "loss": 0.6101, "step": 52250 }, { "epoch": 0.9490774371640273, "grad_norm": 0.8433053463330608, "learning_rate": 1.3028077636531245e-07, "loss": 0.5875, "step": 52260 }, { "epoch": 0.9492590440215023, "grad_norm": 0.8556137471450983, "learning_rate": 1.2935518892399635e-07, "loss": 0.5983, "step": 52270 }, { "epoch": 0.9494406508789772, "grad_norm": 0.8801375782328184, "learning_rate": 1.2843287975549013e-07, "loss": 0.6002, "step": 52280 }, { "epoch": 0.9496222577364521, "grad_norm": 0.8422974156727318, "learning_rate": 1.2751384916611321e-07, "loss": 0.5942, "step": 52290 }, { "epoch": 0.9498038645939271, "grad_norm": 0.8728777797827446, "learning_rate": 1.2659809746109696e-07, "loss": 0.5969, "step": 52300 }, { "epoch": 0.949985471451402, "grad_norm": 0.8367705084943892, "learning_rate": 1.256856249445848e-07, "loss": 0.5958, "step": 52310 }, { "epoch": 0.9501670783088769, "grad_norm": 0.8429617487176763, "learning_rate": 1.2477643191962985e-07, "loss": 0.5926, "step": 52320 }, { "epoch": 0.9503486851663518, "grad_norm": 0.9069333565209209, "learning_rate": 1.238705186881972e-07, "loss": 0.6032, "step": 52330 }, { "epoch": 0.9505302920238268, "grad_norm": 0.8732495469498871, "learning_rate": 1.2296788555115957e-07, "loss": 0.5954, "step": 52340 }, { "epoch": 0.9507118988813018, "grad_norm": 0.856353244243155, "learning_rate": 1.2206853280830488e-07, "loss": 0.5879, "step": 52350 }, { "epoch": 0.9508935057387767, "grad_norm": 0.8651988256485353, "learning_rate": 1.2117246075832866e-07, "loss": 0.6055, "step": 52360 }, { "epoch": 0.9510751125962517, "grad_norm": 0.8539340713799073, "learning_rate": 1.202796696988373e-07, "loss": 0.6016, "step": 52370 }, { "epoch": 0.9512567194537266, "grad_norm": 0.8900988936984203, "learning_rate": 1.1939015992634806e-07, "loss": 0.5974, "step": 52380 }, { "epoch": 0.9514383263112015, "grad_norm": 0.8452915549465114, "learning_rate": 1.1850393173628793e-07, "loss": 0.591, "step": 52390 }, { "epoch": 0.9516199331686764, "grad_norm": 0.896905905214212, "learning_rate": 1.1762098542299371e-07, "loss": 0.6095, "step": 52400 }, { "epoch": 0.9518015400261514, "grad_norm": 0.877765632631629, "learning_rate": 1.1674132127971304e-07, "loss": 0.5955, "step": 52410 }, { "epoch": 0.9519831468836263, "grad_norm": 0.8688275434640791, "learning_rate": 1.1586493959860224e-07, "loss": 0.6046, "step": 52420 }, { "epoch": 0.9521647537411012, "grad_norm": 0.9247665099483532, "learning_rate": 1.1499184067072844e-07, "loss": 0.5928, "step": 52430 }, { "epoch": 0.9523463605985762, "grad_norm": 0.9253843603404664, "learning_rate": 1.1412202478606971e-07, "loss": 0.6097, "step": 52440 }, { "epoch": 0.9525279674560512, "grad_norm": 0.8573480588352781, "learning_rate": 1.1325549223351051e-07, "loss": 0.6124, "step": 52450 }, { "epoch": 0.9527095743135261, "grad_norm": 0.9297139651739912, "learning_rate": 1.1239224330084731e-07, "loss": 0.5955, "step": 52460 }, { "epoch": 0.952891181171001, "grad_norm": 0.8819501712933133, "learning_rate": 1.1153227827478519e-07, "loss": 0.5908, "step": 52470 }, { "epoch": 0.953072788028476, "grad_norm": 0.864990527975343, "learning_rate": 1.1067559744094014e-07, "loss": 0.6055, "step": 52480 }, { "epoch": 0.9532543948859509, "grad_norm": 0.8670589305989466, "learning_rate": 1.0982220108383457e-07, "loss": 0.59, "step": 52490 }, { "epoch": 0.9534360017434258, "grad_norm": 0.837765598879201, "learning_rate": 1.0897208948690063e-07, "loss": 0.5945, "step": 52500 }, { "epoch": 0.9536176086009007, "grad_norm": 0.8490893812149646, "learning_rate": 1.0812526293248137e-07, "loss": 0.5877, "step": 52510 }, { "epoch": 0.9537992154583758, "grad_norm": 0.888494688763744, "learning_rate": 1.0728172170182737e-07, "loss": 0.5957, "step": 52520 }, { "epoch": 0.9539808223158507, "grad_norm": 0.8730666306055993, "learning_rate": 1.0644146607510009e-07, "loss": 0.61, "step": 52530 }, { "epoch": 0.9541624291733256, "grad_norm": 0.8268694773461884, "learning_rate": 1.0560449633136627e-07, "loss": 0.591, "step": 52540 }, { "epoch": 0.9543440360308005, "grad_norm": 0.8691101528397294, "learning_rate": 1.0477081274860356e-07, "loss": 0.5925, "step": 52550 }, { "epoch": 0.9545256428882755, "grad_norm": 0.845917620150673, "learning_rate": 1.0394041560369827e-07, "loss": 0.5821, "step": 52560 }, { "epoch": 0.9547072497457504, "grad_norm": 0.8456743410890145, "learning_rate": 1.0311330517244533e-07, "loss": 0.588, "step": 52570 }, { "epoch": 0.9548888566032253, "grad_norm": 0.8778660731221429, "learning_rate": 1.0228948172954611e-07, "loss": 0.5976, "step": 52580 }, { "epoch": 0.9550704634607002, "grad_norm": 0.8662087288642922, "learning_rate": 1.0146894554861176e-07, "loss": 0.5962, "step": 52590 }, { "epoch": 0.9552520703181752, "grad_norm": 0.8796005852812678, "learning_rate": 1.0065169690216315e-07, "loss": 0.604, "step": 52600 }, { "epoch": 0.9554336771756502, "grad_norm": 0.8415109830611509, "learning_rate": 9.98377360616265e-08, "loss": 0.6009, "step": 52610 }, { "epoch": 0.9556152840331251, "grad_norm": 0.8348183833856577, "learning_rate": 9.90270632973378e-08, "loss": 0.599, "step": 52620 }, { "epoch": 0.9557968908906, "grad_norm": 0.9196037976932663, "learning_rate": 9.821967887854056e-08, "loss": 0.5954, "step": 52630 }, { "epoch": 0.955978497748075, "grad_norm": 0.8920967752152172, "learning_rate": 9.741558307338472e-08, "loss": 0.5957, "step": 52640 }, { "epoch": 0.9561601046055499, "grad_norm": 0.8663171388294977, "learning_rate": 9.66147761489311e-08, "loss": 0.5917, "step": 52650 }, { "epoch": 0.9563417114630248, "grad_norm": 0.873079128219129, "learning_rate": 9.581725837114697e-08, "loss": 0.5966, "step": 52660 }, { "epoch": 0.9565233183204997, "grad_norm": 0.8972484656019352, "learning_rate": 9.502303000490487e-08, "loss": 0.6065, "step": 52670 }, { "epoch": 0.9567049251779747, "grad_norm": 0.863196972065571, "learning_rate": 9.423209131398714e-08, "loss": 0.5922, "step": 52680 }, { "epoch": 0.9568865320354497, "grad_norm": 0.8539276601281304, "learning_rate": 9.344444256108476e-08, "loss": 0.5759, "step": 52690 }, { "epoch": 0.9570681388929246, "grad_norm": 0.8626680024677964, "learning_rate": 9.26600840077907e-08, "loss": 0.5895, "step": 52700 }, { "epoch": 0.9572497457503996, "grad_norm": 0.8170511353074152, "learning_rate": 9.187901591461323e-08, "loss": 0.5898, "step": 52710 }, { "epoch": 0.9574313526078745, "grad_norm": 0.8384103957723483, "learning_rate": 9.110123854096043e-08, "loss": 0.5948, "step": 52720 }, { "epoch": 0.9576129594653494, "grad_norm": 0.878535482957658, "learning_rate": 9.032675214515119e-08, "loss": 0.5902, "step": 52730 }, { "epoch": 0.9577945663228243, "grad_norm": 0.8790725295538505, "learning_rate": 8.955555698441088e-08, "loss": 0.5898, "step": 52740 }, { "epoch": 0.9579761731802993, "grad_norm": 0.8239373470340325, "learning_rate": 8.878765331487128e-08, "loss": 0.6006, "step": 52750 }, { "epoch": 0.9581577800377742, "grad_norm": 0.8330233039158392, "learning_rate": 8.80230413915717e-08, "loss": 0.5893, "step": 52760 }, { "epoch": 0.9583393868952491, "grad_norm": 0.8509150410696812, "learning_rate": 8.72617214684568e-08, "loss": 0.5929, "step": 52770 }, { "epoch": 0.9585209937527241, "grad_norm": 0.899212228916608, "learning_rate": 8.650369379837876e-08, "loss": 0.5875, "step": 52780 }, { "epoch": 0.9587026006101991, "grad_norm": 0.8460037426916095, "learning_rate": 8.57489586330984e-08, "loss": 0.591, "step": 52790 }, { "epoch": 0.958884207467674, "grad_norm": 0.8593255968929334, "learning_rate": 8.499751622327857e-08, "loss": 0.5973, "step": 52800 }, { "epoch": 0.9590658143251489, "grad_norm": 0.8666767805193639, "learning_rate": 8.424936681849183e-08, "loss": 0.5881, "step": 52810 }, { "epoch": 0.9592474211826238, "grad_norm": 0.8731341243864905, "learning_rate": 8.350451066721721e-08, "loss": 0.6061, "step": 52820 }, { "epoch": 0.9594290280400988, "grad_norm": 0.8864428966652674, "learning_rate": 8.276294801683571e-08, "loss": 0.5947, "step": 52830 }, { "epoch": 0.9596106348975737, "grad_norm": 0.8593663386959366, "learning_rate": 8.202467911364143e-08, "loss": 0.5884, "step": 52840 }, { "epoch": 0.9597922417550486, "grad_norm": 0.8383900610403802, "learning_rate": 8.128970420282934e-08, "loss": 0.5967, "step": 52850 }, { "epoch": 0.9599738486125237, "grad_norm": 0.9032212968964252, "learning_rate": 8.055802352850084e-08, "loss": 0.5945, "step": 52860 }, { "epoch": 0.9601554554699986, "grad_norm": 0.8622671753612952, "learning_rate": 7.9829637333666e-08, "loss": 0.6051, "step": 52870 }, { "epoch": 0.9603370623274735, "grad_norm": 0.8325853509918204, "learning_rate": 7.910454586023796e-08, "loss": 0.6067, "step": 52880 }, { "epoch": 0.9605186691849484, "grad_norm": 0.8510514251918865, "learning_rate": 7.838274934903633e-08, "loss": 0.5855, "step": 52890 }, { "epoch": 0.9607002760424234, "grad_norm": 0.8874773899540456, "learning_rate": 7.766424803978822e-08, "loss": 0.578, "step": 52900 }, { "epoch": 0.9608818828998983, "grad_norm": 0.8536950547022293, "learning_rate": 7.694904217112276e-08, "loss": 0.59, "step": 52910 }, { "epoch": 0.9610634897573732, "grad_norm": 0.8540948921273364, "learning_rate": 7.623713198057769e-08, "loss": 0.5868, "step": 52920 }, { "epoch": 0.9612450966148481, "grad_norm": 0.8298397867803592, "learning_rate": 7.552851770459502e-08, "loss": 0.5912, "step": 52930 }, { "epoch": 0.9614267034723231, "grad_norm": 0.8904381962538316, "learning_rate": 7.482319957852202e-08, "loss": 0.5933, "step": 52940 }, { "epoch": 0.9616083103297981, "grad_norm": 0.8678949789357089, "learning_rate": 7.412117783661132e-08, "loss": 0.5983, "step": 52950 }, { "epoch": 0.961789917187273, "grad_norm": 0.8572826311811109, "learning_rate": 7.342245271202086e-08, "loss": 0.5938, "step": 52960 }, { "epoch": 0.961971524044748, "grad_norm": 0.9197585748760035, "learning_rate": 7.272702443681389e-08, "loss": 0.5949, "step": 52970 }, { "epoch": 0.9621531309022229, "grad_norm": 0.8578044357671474, "learning_rate": 7.2034893241959e-08, "loss": 0.5794, "step": 52980 }, { "epoch": 0.9623347377596978, "grad_norm": 0.8268719747413459, "learning_rate": 7.134605935732786e-08, "loss": 0.598, "step": 52990 }, { "epoch": 0.9625163446171727, "grad_norm": 0.8689525185644131, "learning_rate": 7.066052301169967e-08, "loss": 0.5973, "step": 53000 }, { "epoch": 0.9626979514746477, "grad_norm": 0.8291638579417216, "learning_rate": 6.997828443275567e-08, "loss": 0.5924, "step": 53010 }, { "epoch": 0.9628795583321226, "grad_norm": 0.9018594160765258, "learning_rate": 6.92993438470846e-08, "loss": 0.5995, "step": 53020 }, { "epoch": 0.9630611651895976, "grad_norm": 0.8570125176263519, "learning_rate": 6.862370148017827e-08, "loss": 0.595, "step": 53030 }, { "epoch": 0.9632427720470725, "grad_norm": 0.8386180474911841, "learning_rate": 6.795135755643279e-08, "loss": 0.5811, "step": 53040 }, { "epoch": 0.9634243789045475, "grad_norm": 0.8528801311267621, "learning_rate": 6.72823122991495e-08, "loss": 0.5908, "step": 53050 }, { "epoch": 0.9636059857620224, "grad_norm": 0.8518013083692991, "learning_rate": 6.661656593053511e-08, "loss": 0.6119, "step": 53060 }, { "epoch": 0.9637875926194973, "grad_norm": 0.8731650458887422, "learning_rate": 6.595411867169832e-08, "loss": 0.6079, "step": 53070 }, { "epoch": 0.9639691994769722, "grad_norm": 0.844003769365836, "learning_rate": 6.529497074265534e-08, "loss": 0.5839, "step": 53080 }, { "epoch": 0.9641508063344472, "grad_norm": 0.8533217803851552, "learning_rate": 6.463912236232106e-08, "loss": 0.5867, "step": 53090 }, { "epoch": 0.9643324131919221, "grad_norm": 0.8798840856547633, "learning_rate": 6.398657374852236e-08, "loss": 0.5988, "step": 53100 }, { "epoch": 0.964514020049397, "grad_norm": 0.8512642138696633, "learning_rate": 6.333732511798251e-08, "loss": 0.6081, "step": 53110 }, { "epoch": 0.964695626906872, "grad_norm": 0.8908181237925302, "learning_rate": 6.269137668633352e-08, "loss": 0.6084, "step": 53120 }, { "epoch": 0.964877233764347, "grad_norm": 0.8903221396011963, "learning_rate": 6.204872866811151e-08, "loss": 0.6041, "step": 53130 }, { "epoch": 0.9650588406218219, "grad_norm": 0.8914734705808502, "learning_rate": 6.140938127675245e-08, "loss": 0.5938, "step": 53140 }, { "epoch": 0.9652404474792968, "grad_norm": 0.8762724471646992, "learning_rate": 6.077333472459979e-08, "loss": 0.5944, "step": 53150 }, { "epoch": 0.9654220543367718, "grad_norm": 0.8538762083655096, "learning_rate": 6.014058922289901e-08, "loss": 0.5914, "step": 53160 }, { "epoch": 0.9656036611942467, "grad_norm": 0.8542714382072848, "learning_rate": 5.951114498180088e-08, "loss": 0.5938, "step": 53170 }, { "epoch": 0.9657852680517216, "grad_norm": 0.8640491333371312, "learning_rate": 5.88850022103582e-08, "loss": 0.595, "step": 53180 }, { "epoch": 0.9659668749091965, "grad_norm": 0.879521123711123, "learning_rate": 5.826216111652683e-08, "loss": 0.5983, "step": 53190 }, { "epoch": 0.9661484817666716, "grad_norm": 0.88846959464421, "learning_rate": 5.764262190716796e-08, "loss": 0.5825, "step": 53200 }, { "epoch": 0.9663300886241465, "grad_norm": 0.8765277328142164, "learning_rate": 5.7026384788043674e-08, "loss": 0.6037, "step": 53210 }, { "epoch": 0.9665116954816214, "grad_norm": 0.8528704566078401, "learning_rate": 5.6413449963822477e-08, "loss": 0.5916, "step": 53220 }, { "epoch": 0.9666933023390963, "grad_norm": 0.8882995175282221, "learning_rate": 5.580381763807374e-08, "loss": 0.6081, "step": 53230 }, { "epoch": 0.9668749091965713, "grad_norm": 0.8831661920365338, "learning_rate": 5.519748801327107e-08, "loss": 0.5947, "step": 53240 }, { "epoch": 0.9670565160540462, "grad_norm": 0.8481213595456396, "learning_rate": 5.4594461290788936e-08, "loss": 0.5884, "step": 53250 }, { "epoch": 0.9672381229115211, "grad_norm": 0.8685321839892973, "learning_rate": 5.399473767090935e-08, "loss": 0.5861, "step": 53260 }, { "epoch": 0.967419729768996, "grad_norm": 0.8646552493952468, "learning_rate": 5.3398317352811865e-08, "loss": 0.5947, "step": 53270 }, { "epoch": 0.967601336626471, "grad_norm": 0.8816065305172502, "learning_rate": 5.28052005345836e-08, "loss": 0.5927, "step": 53280 }, { "epoch": 0.967782943483946, "grad_norm": 0.8859642104639333, "learning_rate": 5.2215387413212525e-08, "loss": 0.6066, "step": 53290 }, { "epoch": 0.9679645503414209, "grad_norm": 0.8530637163837703, "learning_rate": 5.1628878184588616e-08, "loss": 0.602, "step": 53300 }, { "epoch": 0.9681461571988959, "grad_norm": 0.8592707417149589, "learning_rate": 5.104567304350605e-08, "loss": 0.593, "step": 53310 }, { "epoch": 0.9683277640563708, "grad_norm": 0.8983064737688047, "learning_rate": 5.046577218365989e-08, "loss": 0.5818, "step": 53320 }, { "epoch": 0.9685093709138457, "grad_norm": 0.8561460635156796, "learning_rate": 4.988917579765051e-08, "loss": 0.5928, "step": 53330 }, { "epoch": 0.9686909777713206, "grad_norm": 0.8105827630735216, "learning_rate": 4.9315884076976964e-08, "loss": 0.5821, "step": 53340 }, { "epoch": 0.9688725846287956, "grad_norm": 0.8443881233443882, "learning_rate": 4.87458972120447e-08, "loss": 0.6022, "step": 53350 }, { "epoch": 0.9690541914862705, "grad_norm": 0.8158999421426363, "learning_rate": 4.817921539215897e-08, "loss": 0.5915, "step": 53360 }, { "epoch": 0.9692357983437455, "grad_norm": 0.8620746512096059, "learning_rate": 4.761583880552812e-08, "loss": 0.5953, "step": 53370 }, { "epoch": 0.9694174052012204, "grad_norm": 0.8792498344058237, "learning_rate": 4.705576763926245e-08, "loss": 0.6051, "step": 53380 }, { "epoch": 0.9695990120586954, "grad_norm": 0.9303265346422535, "learning_rate": 4.649900207937652e-08, "loss": 0.6025, "step": 53390 }, { "epoch": 0.9697806189161703, "grad_norm": 0.8793132212846184, "learning_rate": 4.594554231078241e-08, "loss": 0.5909, "step": 53400 }, { "epoch": 0.9699622257736452, "grad_norm": 0.8715810843500538, "learning_rate": 4.539538851729863e-08, "loss": 0.5955, "step": 53410 }, { "epoch": 0.9701438326311201, "grad_norm": 0.8796089148794018, "learning_rate": 4.4848540881643474e-08, "loss": 0.6101, "step": 53420 }, { "epoch": 0.9703254394885951, "grad_norm": 0.8759608716460213, "learning_rate": 4.4304999585439436e-08, "loss": 0.6011, "step": 53430 }, { "epoch": 0.97050704634607, "grad_norm": 0.8628352764184982, "learning_rate": 4.376476480920877e-08, "loss": 0.5878, "step": 53440 }, { "epoch": 0.9706886532035449, "grad_norm": 0.8552288682272762, "learning_rate": 4.3227836732374626e-08, "loss": 0.5939, "step": 53450 }, { "epoch": 0.97087026006102, "grad_norm": 0.8675487755581581, "learning_rate": 4.269421553326547e-08, "loss": 0.5957, "step": 53460 }, { "epoch": 0.9710518669184949, "grad_norm": 0.8902557229299494, "learning_rate": 4.2163901389107306e-08, "loss": 0.5802, "step": 53470 }, { "epoch": 0.9712334737759698, "grad_norm": 0.8912917871089421, "learning_rate": 4.1636894476031474e-08, "loss": 0.5979, "step": 53480 }, { "epoch": 0.9714150806334447, "grad_norm": 0.8767991520990754, "learning_rate": 4.111319496906907e-08, "loss": 0.5909, "step": 53490 }, { "epoch": 0.9715966874909197, "grad_norm": 0.8775512032275283, "learning_rate": 4.059280304215318e-08, "loss": 0.5989, "step": 53500 }, { "epoch": 0.9717782943483946, "grad_norm": 0.8781159626276076, "learning_rate": 4.0075718868117785e-08, "loss": 0.6078, "step": 53510 }, { "epoch": 0.9719599012058695, "grad_norm": 0.9055067252177087, "learning_rate": 3.956194261869772e-08, "loss": 0.5905, "step": 53520 }, { "epoch": 0.9721415080633444, "grad_norm": 0.8713404266186556, "learning_rate": 3.9051474464532055e-08, "loss": 0.5941, "step": 53530 }, { "epoch": 0.9723231149208195, "grad_norm": 0.8657828442019402, "learning_rate": 3.8544314575158504e-08, "loss": 0.5848, "step": 53540 }, { "epoch": 0.9725047217782944, "grad_norm": 0.8372950555381268, "learning_rate": 3.804046311901566e-08, "loss": 0.5782, "step": 53550 }, { "epoch": 0.9726863286357693, "grad_norm": 0.835712024094173, "learning_rate": 3.753992026344633e-08, "loss": 0.6045, "step": 53560 }, { "epoch": 0.9728679354932442, "grad_norm": 0.8712431830021893, "learning_rate": 3.7042686174690866e-08, "loss": 0.5923, "step": 53570 }, { "epoch": 0.9730495423507192, "grad_norm": 0.843276927639538, "learning_rate": 3.654876101789384e-08, "loss": 0.6003, "step": 53580 }, { "epoch": 0.9732311492081941, "grad_norm": 0.8465680007177889, "learning_rate": 3.605814495709847e-08, "loss": 0.5797, "step": 53590 }, { "epoch": 0.973412756065669, "grad_norm": 0.9041717594349922, "learning_rate": 3.557083815524998e-08, "loss": 0.5848, "step": 53600 }, { "epoch": 0.9735943629231439, "grad_norm": 0.879347380196726, "learning_rate": 3.508684077419333e-08, "loss": 0.6006, "step": 53610 }, { "epoch": 0.9737759697806189, "grad_norm": 0.874016591149911, "learning_rate": 3.460615297467773e-08, "loss": 0.5898, "step": 53620 }, { "epoch": 0.9739575766380939, "grad_norm": 0.882164635587089, "learning_rate": 3.4128774916348805e-08, "loss": 0.6074, "step": 53630 }, { "epoch": 0.9741391834955688, "grad_norm": 0.8512574068941916, "learning_rate": 3.3654706757756394e-08, "loss": 0.5889, "step": 53640 }, { "epoch": 0.9743207903530438, "grad_norm": 0.847828103154157, "learning_rate": 3.3183948656348994e-08, "loss": 0.5989, "step": 53650 }, { "epoch": 0.9745023972105187, "grad_norm": 0.8802890844222977, "learning_rate": 3.27165007684771e-08, "loss": 0.5913, "step": 53660 }, { "epoch": 0.9746840040679936, "grad_norm": 0.8645960832165691, "learning_rate": 3.225236324938985e-08, "loss": 0.5916, "step": 53670 }, { "epoch": 0.9748656109254685, "grad_norm": 0.8627691235757138, "learning_rate": 3.1791536253239494e-08, "loss": 0.5762, "step": 53680 }, { "epoch": 0.9750472177829435, "grad_norm": 0.8598529764017953, "learning_rate": 3.133401993307694e-08, "loss": 0.6048, "step": 53690 }, { "epoch": 0.9752288246404184, "grad_norm": 0.896773251192066, "learning_rate": 3.087981444085397e-08, "loss": 0.5883, "step": 53700 }, { "epoch": 0.9754104314978934, "grad_norm": 0.8503372837464445, "learning_rate": 3.042891992742325e-08, "loss": 0.5962, "step": 53710 }, { "epoch": 0.9755920383553683, "grad_norm": 0.8715918557528508, "learning_rate": 2.998133654253721e-08, "loss": 0.6048, "step": 53720 }, { "epoch": 0.9757736452128433, "grad_norm": 0.8261724946915796, "learning_rate": 2.953706443484805e-08, "loss": 0.5854, "step": 53730 }, { "epoch": 0.9759552520703182, "grad_norm": 0.8861666734916737, "learning_rate": 2.9096103751909964e-08, "loss": 0.6053, "step": 53740 }, { "epoch": 0.9761368589277931, "grad_norm": 0.8518455375304865, "learning_rate": 2.8658454640176914e-08, "loss": 0.5893, "step": 53750 }, { "epoch": 0.976318465785268, "grad_norm": 0.8621222779695469, "learning_rate": 2.8224117245000405e-08, "loss": 0.5969, "step": 53760 }, { "epoch": 0.976500072642743, "grad_norm": 0.9092944413478393, "learning_rate": 2.7793091710636157e-08, "loss": 0.5961, "step": 53770 }, { "epoch": 0.9766816795002179, "grad_norm": 0.8594699133013977, "learning_rate": 2.7365378180237433e-08, "loss": 0.5952, "step": 53780 }, { "epoch": 0.9768632863576928, "grad_norm": 0.8417034501506011, "learning_rate": 2.6940976795856168e-08, "loss": 0.5832, "step": 53790 }, { "epoch": 0.9770448932151679, "grad_norm": 0.8544419925976171, "learning_rate": 2.6519887698448488e-08, "loss": 0.6033, "step": 53800 }, { "epoch": 0.9772265000726428, "grad_norm": 0.8465808046330779, "learning_rate": 2.6102111027865862e-08, "loss": 0.5953, "step": 53810 }, { "epoch": 0.9774081069301177, "grad_norm": 0.8887621506669818, "learning_rate": 2.5687646922863963e-08, "loss": 0.6006, "step": 53820 }, { "epoch": 0.9775897137875926, "grad_norm": 0.8355696403869952, "learning_rate": 2.5276495521093792e-08, "loss": 0.5906, "step": 53830 }, { "epoch": 0.9777713206450676, "grad_norm": 0.8389859276613628, "learning_rate": 2.486865695910945e-08, "loss": 0.5859, "step": 53840 }, { "epoch": 0.9779529275025425, "grad_norm": 0.8683508971227176, "learning_rate": 2.4464131372363696e-08, "loss": 0.5936, "step": 53850 }, { "epoch": 0.9781345343600174, "grad_norm": 0.8630688134690558, "learning_rate": 2.4062918895209063e-08, "loss": 0.6017, "step": 53860 }, { "epoch": 0.9783161412174923, "grad_norm": 0.8564926684654371, "learning_rate": 2.366501966089674e-08, "loss": 0.584, "step": 53870 }, { "epoch": 0.9784977480749674, "grad_norm": 0.84494169746421, "learning_rate": 2.3270433801579894e-08, "loss": 0.6093, "step": 53880 }, { "epoch": 0.9786793549324423, "grad_norm": 0.8553200334093463, "learning_rate": 2.287916144830926e-08, "loss": 0.5878, "step": 53890 }, { "epoch": 0.9788609617899172, "grad_norm": 0.836529736752854, "learning_rate": 2.2491202731035333e-08, "loss": 0.584, "step": 53900 }, { "epoch": 0.9790425686473921, "grad_norm": 0.8635443181825969, "learning_rate": 2.2106557778607262e-08, "loss": 0.6106, "step": 53910 }, { "epoch": 0.9792241755048671, "grad_norm": 0.966291952132925, "learning_rate": 2.1725226718776195e-08, "loss": 0.6055, "step": 53920 }, { "epoch": 0.979405782362342, "grad_norm": 0.8524328844296769, "learning_rate": 2.1347209678190816e-08, "loss": 0.5903, "step": 53930 }, { "epoch": 0.9795873892198169, "grad_norm": 0.8647287060816682, "learning_rate": 2.0972506782398484e-08, "loss": 0.5956, "step": 53940 }, { "epoch": 0.9797689960772918, "grad_norm": 0.8372437352175742, "learning_rate": 2.060111815584853e-08, "loss": 0.5957, "step": 53950 }, { "epoch": 0.9799506029347668, "grad_norm": 0.8615018787441775, "learning_rate": 2.023304392188674e-08, "loss": 0.5859, "step": 53960 }, { "epoch": 0.9801322097922418, "grad_norm": 0.8496906592807295, "learning_rate": 1.9868284202758658e-08, "loss": 0.6007, "step": 53970 }, { "epoch": 0.9803138166497167, "grad_norm": 0.854511575025498, "learning_rate": 1.950683911961071e-08, "loss": 0.5912, "step": 53980 }, { "epoch": 0.9804954235071917, "grad_norm": 0.8956435749335896, "learning_rate": 1.914870879248687e-08, "loss": 0.5966, "step": 53990 }, { "epoch": 0.9806770303646666, "grad_norm": 0.8559062434729502, "learning_rate": 1.8793893340330882e-08, "loss": 0.5956, "step": 54000 }, { "epoch": 0.9808586372221415, "grad_norm": 0.8790197461625948, "learning_rate": 1.844239288098404e-08, "loss": 0.5951, "step": 54010 }, { "epoch": 0.9810402440796164, "grad_norm": 0.8520199314901685, "learning_rate": 1.8094207531189624e-08, "loss": 0.6056, "step": 54020 }, { "epoch": 0.9812218509370914, "grad_norm": 0.9000751853325503, "learning_rate": 1.7749337406587353e-08, "loss": 0.6013, "step": 54030 }, { "epoch": 0.9814034577945663, "grad_norm": 0.8793221168260689, "learning_rate": 1.7407782621716717e-08, "loss": 0.5907, "step": 54040 }, { "epoch": 0.9815850646520412, "grad_norm": 0.8696028517706945, "learning_rate": 1.7069543290015865e-08, "loss": 0.5888, "step": 54050 }, { "epoch": 0.9817666715095162, "grad_norm": 0.9243985359055094, "learning_rate": 1.6734619523821604e-08, "loss": 0.5957, "step": 54060 }, { "epoch": 0.9819482783669912, "grad_norm": 0.8273730030107519, "learning_rate": 1.640301143437162e-08, "loss": 0.5952, "step": 54070 }, { "epoch": 0.9821298852244661, "grad_norm": 0.8585794094880426, "learning_rate": 1.6074719131798922e-08, "loss": 0.5922, "step": 54080 }, { "epoch": 0.982311492081941, "grad_norm": 0.8497148526210959, "learning_rate": 1.5749742725138516e-08, "loss": 0.5935, "step": 54090 }, { "epoch": 0.9824930989394159, "grad_norm": 0.8538560325779585, "learning_rate": 1.542808232232185e-08, "loss": 0.5835, "step": 54100 }, { "epoch": 0.9826747057968909, "grad_norm": 0.8765610644480578, "learning_rate": 1.510973803018012e-08, "loss": 0.6021, "step": 54110 }, { "epoch": 0.9828563126543658, "grad_norm": 0.8399567623494082, "learning_rate": 1.47947099544421e-08, "loss": 0.5903, "step": 54120 }, { "epoch": 0.9830379195118407, "grad_norm": 0.8545957182958229, "learning_rate": 1.4482998199738529e-08, "loss": 0.5888, "step": 54130 }, { "epoch": 0.9832195263693158, "grad_norm": 0.8521384352283479, "learning_rate": 1.4174602869593268e-08, "loss": 0.5882, "step": 54140 }, { "epoch": 0.9834011332267907, "grad_norm": 0.8720488235413876, "learning_rate": 1.386952406643327e-08, "loss": 0.5966, "step": 54150 }, { "epoch": 0.9835827400842656, "grad_norm": 0.823100852403981, "learning_rate": 1.3567761891581931e-08, "loss": 0.5932, "step": 54160 }, { "epoch": 0.9837643469417405, "grad_norm": 0.867369403538679, "learning_rate": 1.3269316445261304e-08, "loss": 0.5897, "step": 54170 }, { "epoch": 0.9839459537992155, "grad_norm": 0.8872900729182596, "learning_rate": 1.29741878265921e-08, "loss": 0.5971, "step": 54180 }, { "epoch": 0.9841275606566904, "grad_norm": 0.8460642246726914, "learning_rate": 1.2682376133594799e-08, "loss": 0.5865, "step": 54190 }, { "epoch": 0.9843091675141653, "grad_norm": 0.8853578759853634, "learning_rate": 1.2393881463185209e-08, "loss": 0.6056, "step": 54200 }, { "epoch": 0.9844907743716402, "grad_norm": 0.8749527124495318, "learning_rate": 1.210870391118002e-08, "loss": 0.5992, "step": 54210 }, { "epoch": 0.9846723812291152, "grad_norm": 0.8736332929405615, "learning_rate": 1.1826843572293467e-08, "loss": 0.5945, "step": 54220 }, { "epoch": 0.9848539880865902, "grad_norm": 0.8356949164538674, "learning_rate": 1.1548300540137336e-08, "loss": 0.5907, "step": 54230 }, { "epoch": 0.9850355949440651, "grad_norm": 0.8425225764199361, "learning_rate": 1.1273074907223181e-08, "loss": 0.6104, "step": 54240 }, { "epoch": 0.98521720180154, "grad_norm": 0.8795072028323975, "learning_rate": 1.1001166764958992e-08, "loss": 0.5983, "step": 54250 }, { "epoch": 0.985398808659015, "grad_norm": 0.8609486581375027, "learning_rate": 1.0732576203652533e-08, "loss": 0.5934, "step": 54260 }, { "epoch": 0.9855804155164899, "grad_norm": 0.8682419280079304, "learning_rate": 1.0467303312508004e-08, "loss": 0.5922, "step": 54270 }, { "epoch": 0.9857620223739648, "grad_norm": 0.8982772127761336, "learning_rate": 1.0205348179630482e-08, "loss": 0.5862, "step": 54280 }, { "epoch": 0.9859436292314397, "grad_norm": 0.8444790615866458, "learning_rate": 9.946710892020372e-09, "loss": 0.5917, "step": 54290 }, { "epoch": 0.9861252360889147, "grad_norm": 0.8783469548487329, "learning_rate": 9.691391535576743e-09, "loss": 0.5994, "step": 54300 }, { "epoch": 0.9863068429463897, "grad_norm": 0.8845296645231888, "learning_rate": 9.43939019509843e-09, "loss": 0.5961, "step": 54310 }, { "epoch": 0.9864884498038646, "grad_norm": 0.8140349791188718, "learning_rate": 9.190706954279594e-09, "loss": 0.5853, "step": 54320 }, { "epoch": 0.9866700566613396, "grad_norm": 0.8966889007793764, "learning_rate": 8.945341895715275e-09, "loss": 0.6005, "step": 54330 }, { "epoch": 0.9868516635188145, "grad_norm": 0.8535454798710439, "learning_rate": 8.703295100894738e-09, "loss": 0.5906, "step": 54340 }, { "epoch": 0.9870332703762894, "grad_norm": 0.8454840275335931, "learning_rate": 8.464566650210337e-09, "loss": 0.5842, "step": 54350 }, { "epoch": 0.9872148772337643, "grad_norm": 0.894646795333976, "learning_rate": 8.229156622946433e-09, "loss": 0.5977, "step": 54360 }, { "epoch": 0.9873964840912393, "grad_norm": 0.8509109694146508, "learning_rate": 7.99706509729048e-09, "loss": 0.5922, "step": 54370 }, { "epoch": 0.9875780909487142, "grad_norm": 0.8691369047693082, "learning_rate": 7.768292150324152e-09, "loss": 0.5925, "step": 54380 }, { "epoch": 0.9877596978061891, "grad_norm": 0.8791009404982307, "learning_rate": 7.542837858030005e-09, "loss": 0.5998, "step": 54390 }, { "epoch": 0.9879413046636641, "grad_norm": 0.8494432162108069, "learning_rate": 7.320702295284809e-09, "loss": 0.5981, "step": 54400 }, { "epoch": 0.9881229115211391, "grad_norm": 0.8307988158645144, "learning_rate": 7.101885535865105e-09, "loss": 0.5906, "step": 54410 }, { "epoch": 0.988304518378614, "grad_norm": 0.853683624173788, "learning_rate": 6.886387652446091e-09, "loss": 0.604, "step": 54420 }, { "epoch": 0.9884861252360889, "grad_norm": 0.8869999370622156, "learning_rate": 6.674208716599407e-09, "loss": 0.5864, "step": 54430 }, { "epoch": 0.9886677320935638, "grad_norm": 0.8960893067687807, "learning_rate": 6.465348798794235e-09, "loss": 0.5849, "step": 54440 }, { "epoch": 0.9888493389510388, "grad_norm": 0.8771603232439336, "learning_rate": 6.259807968398424e-09, "loss": 0.5957, "step": 54450 }, { "epoch": 0.9890309458085137, "grad_norm": 0.86268743959926, "learning_rate": 6.057586293677365e-09, "loss": 0.5891, "step": 54460 }, { "epoch": 0.9892125526659886, "grad_norm": 0.8693760524049422, "learning_rate": 5.858683841791779e-09, "loss": 0.5851, "step": 54470 }, { "epoch": 0.9893941595234637, "grad_norm": 0.8649088858827152, "learning_rate": 5.663100678803268e-09, "loss": 0.5922, "step": 54480 }, { "epoch": 0.9895757663809386, "grad_norm": 0.8380019969267126, "learning_rate": 5.470836869669871e-09, "loss": 0.5891, "step": 54490 }, { "epoch": 0.9897573732384135, "grad_norm": 0.8736725891008295, "learning_rate": 5.281892478244954e-09, "loss": 0.602, "step": 54500 }, { "epoch": 0.9899389800958884, "grad_norm": 0.9209062402522572, "learning_rate": 5.096267567283874e-09, "loss": 0.6116, "step": 54510 }, { "epoch": 0.9901205869533634, "grad_norm": 0.8582252622414543, "learning_rate": 4.9139621984362065e-09, "loss": 0.5946, "step": 54520 }, { "epoch": 0.9903021938108383, "grad_norm": 1.2231644516442224, "learning_rate": 4.734976432247962e-09, "loss": 0.6024, "step": 54530 }, { "epoch": 0.9904838006683132, "grad_norm": 0.8382640965794697, "learning_rate": 4.559310328167144e-09, "loss": 0.5937, "step": 54540 }, { "epoch": 0.9906654075257881, "grad_norm": 0.867077728298558, "learning_rate": 4.3869639445359715e-09, "loss": 0.593, "step": 54550 }, { "epoch": 0.9908470143832631, "grad_norm": 0.8648525732521521, "learning_rate": 4.217937338593103e-09, "loss": 0.6071, "step": 54560 }, { "epoch": 0.9910286212407381, "grad_norm": 0.8484885453763799, "learning_rate": 4.052230566478077e-09, "loss": 0.5878, "step": 54570 }, { "epoch": 0.991210228098213, "grad_norm": 0.8544050172049583, "learning_rate": 3.889843683225758e-09, "loss": 0.5934, "step": 54580 }, { "epoch": 0.991391834955688, "grad_norm": 0.8788280507280098, "learning_rate": 3.73077674276745e-09, "loss": 0.6044, "step": 54590 }, { "epoch": 0.9915734418131629, "grad_norm": 0.871942825204647, "learning_rate": 3.575029797934226e-09, "loss": 0.5937, "step": 54600 }, { "epoch": 0.9917550486706378, "grad_norm": 0.8515296081445489, "learning_rate": 3.4226029004524875e-09, "loss": 0.598, "step": 54610 }, { "epoch": 0.9919366555281127, "grad_norm": 0.8193641024135151, "learning_rate": 3.2734961009472934e-09, "loss": 0.5973, "step": 54620 }, { "epoch": 0.9921182623855876, "grad_norm": 0.8467786336082367, "learning_rate": 3.1277094489401417e-09, "loss": 0.6044, "step": 54630 }, { "epoch": 0.9922998692430626, "grad_norm": 0.8978361276087164, "learning_rate": 2.9852429928511896e-09, "loss": 0.5855, "step": 54640 }, { "epoch": 0.9924814761005376, "grad_norm": 0.8776834248902239, "learning_rate": 2.8460967799948116e-09, "loss": 0.6091, "step": 54650 }, { "epoch": 0.9926630829580125, "grad_norm": 0.841752049473905, "learning_rate": 2.7102708565873714e-09, "loss": 0.5912, "step": 54660 }, { "epoch": 0.9928446898154875, "grad_norm": 0.880071602493263, "learning_rate": 2.5777652677383415e-09, "loss": 0.5989, "step": 54670 }, { "epoch": 0.9930262966729624, "grad_norm": 0.8662855607013112, "learning_rate": 2.4485800574558514e-09, "loss": 0.5827, "step": 54680 }, { "epoch": 0.9932079035304373, "grad_norm": 0.8773752269534217, "learning_rate": 2.3227152686455813e-09, "loss": 0.5956, "step": 54690 }, { "epoch": 0.9933895103879122, "grad_norm": 0.8796705895915726, "learning_rate": 2.2001709431096473e-09, "loss": 0.5899, "step": 54700 }, { "epoch": 0.9935711172453872, "grad_norm": 0.8206417799877211, "learning_rate": 2.0809471215488266e-09, "loss": 0.5935, "step": 54710 }, { "epoch": 0.9937527241028621, "grad_norm": 0.8629664931446862, "learning_rate": 1.9650438435603324e-09, "loss": 0.594, "step": 54720 }, { "epoch": 0.993934330960337, "grad_norm": 0.8894558961056174, "learning_rate": 1.8524611476378184e-09, "loss": 0.5879, "step": 54730 }, { "epoch": 0.994115937817812, "grad_norm": 0.8728146183728166, "learning_rate": 1.7431990711724856e-09, "loss": 0.6105, "step": 54740 }, { "epoch": 0.994297544675287, "grad_norm": 0.8271239131293383, "learning_rate": 1.6372576504530835e-09, "loss": 0.6064, "step": 54750 }, { "epoch": 0.9944791515327619, "grad_norm": 0.9297535605074798, "learning_rate": 1.5346369206648004e-09, "loss": 0.6005, "step": 54760 }, { "epoch": 0.9946607583902368, "grad_norm": 0.8432506896680422, "learning_rate": 1.435336915890373e-09, "loss": 0.5833, "step": 54770 }, { "epoch": 0.9948423652477117, "grad_norm": 0.8779127012420765, "learning_rate": 1.3393576691100862e-09, "loss": 0.5906, "step": 54780 }, { "epoch": 0.9950239721051867, "grad_norm": 0.891143366628808, "learning_rate": 1.246699212201774e-09, "loss": 0.5952, "step": 54790 }, { "epoch": 0.9952055789626616, "grad_norm": 0.8764565589137608, "learning_rate": 1.157361575937488e-09, "loss": 0.609, "step": 54800 }, { "epoch": 0.9953871858201365, "grad_norm": 0.8542332606440278, "learning_rate": 1.0713447899901586e-09, "loss": 0.5992, "step": 54810 }, { "epoch": 0.9955687926776116, "grad_norm": 0.8439479562028334, "learning_rate": 9.886488829269348e-10, "loss": 0.5984, "step": 54820 }, { "epoch": 0.9957503995350865, "grad_norm": 0.8731672218631258, "learning_rate": 9.092738822136237e-10, "loss": 0.5934, "step": 54830 }, { "epoch": 0.9959320063925614, "grad_norm": 0.8559371572718035, "learning_rate": 8.332198142124715e-10, "loss": 0.5878, "step": 54840 }, { "epoch": 0.9961136132500363, "grad_norm": 0.9016164462630953, "learning_rate": 7.604867041821617e-10, "loss": 0.602, "step": 54850 }, { "epoch": 0.9962952201075113, "grad_norm": 0.8627505096455501, "learning_rate": 6.910745762800375e-10, "loss": 0.5945, "step": 54860 }, { "epoch": 0.9964768269649862, "grad_norm": 0.8748639634983195, "learning_rate": 6.249834535587695e-10, "loss": 0.5958, "step": 54870 }, { "epoch": 0.9966584338224611, "grad_norm": 0.8679422344496758, "learning_rate": 5.622133579685773e-10, "loss": 0.5965, "step": 54880 }, { "epoch": 0.996840040679936, "grad_norm": 0.8862523698224981, "learning_rate": 5.027643103572288e-10, "loss": 0.5906, "step": 54890 }, { "epoch": 0.997021647537411, "grad_norm": 0.8357513648907153, "learning_rate": 4.4663633047004053e-10, "loss": 0.6007, "step": 54900 }, { "epoch": 0.997203254394886, "grad_norm": 0.885935093377573, "learning_rate": 3.9382943694654674e-10, "loss": 0.5964, "step": 54910 }, { "epoch": 0.9973848612523609, "grad_norm": 0.8312217094595846, "learning_rate": 3.4434364732716106e-10, "loss": 0.5933, "step": 54920 }, { "epoch": 0.9975664681098358, "grad_norm": 0.8733992409612876, "learning_rate": 2.9817897804540473e-10, "loss": 0.5899, "step": 54930 }, { "epoch": 0.9977480749673108, "grad_norm": 0.9056550926255091, "learning_rate": 2.5533544443567815e-10, "loss": 0.594, "step": 54940 }, { "epoch": 0.9979296818247857, "grad_norm": 0.9195064507404123, "learning_rate": 2.1581306072548937e-10, "loss": 0.6019, "step": 54950 }, { "epoch": 0.9981112886822606, "grad_norm": 0.8716000741352232, "learning_rate": 1.796118400421154e-10, "loss": 0.5946, "step": 54960 }, { "epoch": 0.9982928955397355, "grad_norm": 0.8809445154394203, "learning_rate": 1.4673179440816142e-10, "loss": 0.5938, "step": 54970 }, { "epoch": 0.9984745023972105, "grad_norm": 0.8691985282652118, "learning_rate": 1.1717293474489134e-10, "loss": 0.5908, "step": 54980 }, { "epoch": 0.9986561092546855, "grad_norm": 0.8374513810232335, "learning_rate": 9.093527086889708e-11, "loss": 0.5896, "step": 54990 }, { "epoch": 0.9988377161121604, "grad_norm": 0.9319321900012691, "learning_rate": 6.801881149431921e-11, "loss": 0.6019, "step": 55000 }, { "epoch": 0.9990193229696354, "grad_norm": 0.8546911416673354, "learning_rate": 4.842356423173655e-11, "loss": 0.5938, "step": 55010 }, { "epoch": 0.9992009298271103, "grad_norm": 0.8805995845024784, "learning_rate": 3.214953559038669e-11, "loss": 0.601, "step": 55020 }, { "epoch": 0.9993825366845852, "grad_norm": 0.9029111362706427, "learning_rate": 1.9196730973725098e-11, "loss": 0.5917, "step": 55030 }, { "epoch": 0.9995641435420601, "grad_norm": 0.897927639518583, "learning_rate": 9.565154684976208e-12, "loss": 0.5969, "step": 55040 }, { "epoch": 0.9997457503995351, "grad_norm": 0.8848631342662207, "learning_rate": 3.254809922692559e-12, "loss": 0.5898, "step": 55050 }, { "epoch": 0.99992735725701, "grad_norm": 0.8623276730073063, "learning_rate": 2.656987829752211e-13, "loss": 0.6021, "step": 55060 }, { "epoch": 1.0, "eval_loss": 0.5660967230796814, "eval_runtime": 20.3703, "eval_samples_per_second": 42.022, "eval_steps_per_second": 0.687, "step": 55064 }, { "epoch": 1.0, "step": 55064, "total_flos": 7.680275121596006e+16, "train_loss": 0.6780993922750357, "train_runtime": 284116.9477, "train_samples_per_second": 12.403, "train_steps_per_second": 0.194 } ], "logging_steps": 10, "max_steps": 55064, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.680275121596006e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }