{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 55064, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.8160685747493826e-05, "grad_norm": 35.58306201075179, "learning_rate": 3.629764065335753e-08, "loss": 3.3186, "step": 1 }, { "epoch": 0.00018160685747493826, "grad_norm": 32.314222950246865, "learning_rate": 3.6297640653357535e-07, "loss": 3.3115, "step": 10 }, { "epoch": 0.0003632137149498765, "grad_norm": 14.722599893814063, "learning_rate": 7.259528130671507e-07, "loss": 2.9456, "step": 20 }, { "epoch": 0.0005448205724248148, "grad_norm": 6.204759196872827, "learning_rate": 1.088929219600726e-06, "loss": 2.215, "step": 30 }, { "epoch": 0.000726427429899753, "grad_norm": 3.806609870756069, "learning_rate": 1.4519056261343014e-06, "loss": 1.7803, "step": 40 }, { "epoch": 0.0009080342873746913, "grad_norm": 2.9250660669739266, "learning_rate": 1.8148820326678768e-06, "loss": 1.552, "step": 50 }, { "epoch": 0.0010896411448496296, "grad_norm": 2.4027678612973316, "learning_rate": 2.177858439201452e-06, "loss": 1.3403, "step": 60 }, { "epoch": 0.0012712480023245678, "grad_norm": 2.1170265560430854, "learning_rate": 2.540834845735027e-06, "loss": 1.2144, "step": 70 }, { "epoch": 0.001452854859799506, "grad_norm": 2.0587246452427523, "learning_rate": 2.903811252268603e-06, "loss": 1.1237, "step": 80 }, { "epoch": 0.0016344617172744443, "grad_norm": 1.8576359538384335, "learning_rate": 3.266787658802178e-06, "loss": 1.0562, "step": 90 }, { "epoch": 0.0018160685747493825, "grad_norm": 1.973046493740211, "learning_rate": 3.6297640653357536e-06, "loss": 1.0122, "step": 100 }, { "epoch": 0.001997675432224321, "grad_norm": 1.82715115651137, "learning_rate": 3.992740471869328e-06, "loss": 0.9497, "step": 110 }, { "epoch": 0.002179282289699259, "grad_norm": 1.7957620581082474, "learning_rate": 4.355716878402904e-06, "loss": 0.9341, "step": 120 }, { "epoch": 0.0023608891471741974, "grad_norm": 2.2798258298401315, "learning_rate": 4.71869328493648e-06, "loss": 0.9302, "step": 130 }, { "epoch": 0.0025424960046491357, "grad_norm": 1.7602442828701297, "learning_rate": 5.081669691470054e-06, "loss": 0.8899, "step": 140 }, { "epoch": 0.002724102862124074, "grad_norm": 1.880080655252155, "learning_rate": 5.44464609800363e-06, "loss": 0.8965, "step": 150 }, { "epoch": 0.002905709719599012, "grad_norm": 1.8723206724131927, "learning_rate": 5.807622504537206e-06, "loss": 0.8856, "step": 160 }, { "epoch": 0.0030873165770739504, "grad_norm": 1.7310275231530132, "learning_rate": 6.170598911070781e-06, "loss": 0.8746, "step": 170 }, { "epoch": 0.0032689234345488886, "grad_norm": 1.657693554042452, "learning_rate": 6.533575317604356e-06, "loss": 0.8265, "step": 180 }, { "epoch": 0.003450530292023827, "grad_norm": 1.6784374837366303, "learning_rate": 6.896551724137932e-06, "loss": 0.8383, "step": 190 }, { "epoch": 0.003632137149498765, "grad_norm": 1.6942336511003442, "learning_rate": 7.259528130671507e-06, "loss": 0.8239, "step": 200 }, { "epoch": 0.0038137440069737033, "grad_norm": 1.689595897559344, "learning_rate": 7.622504537205082e-06, "loss": 0.8591, "step": 210 }, { "epoch": 0.003995350864448642, "grad_norm": 1.5735187060967828, "learning_rate": 7.985480943738657e-06, "loss": 0.8258, "step": 220 }, { "epoch": 0.00417695772192358, "grad_norm": 1.8503171794971773, "learning_rate": 8.348457350272232e-06, "loss": 0.8161, "step": 230 }, { "epoch": 0.004358564579398518, "grad_norm": 1.6894127217198855, "learning_rate": 8.711433756805808e-06, "loss": 0.7955, "step": 240 }, { "epoch": 0.004540171436873457, "grad_norm": 1.5686386444465337, "learning_rate": 9.074410163339384e-06, "loss": 0.8043, "step": 250 }, { "epoch": 0.004721778294348395, "grad_norm": 1.6886585900142073, "learning_rate": 9.43738656987296e-06, "loss": 0.7898, "step": 260 }, { "epoch": 0.004903385151823333, "grad_norm": 1.7700360861530586, "learning_rate": 9.800362976406535e-06, "loss": 0.7948, "step": 270 }, { "epoch": 0.005084992009298271, "grad_norm": 1.5871473741606152, "learning_rate": 1.0163339382940109e-05, "loss": 0.7854, "step": 280 }, { "epoch": 0.00526659886677321, "grad_norm": 1.7113786855111273, "learning_rate": 1.0526315789473684e-05, "loss": 0.7781, "step": 290 }, { "epoch": 0.005448205724248148, "grad_norm": 1.722296253060849, "learning_rate": 1.088929219600726e-05, "loss": 0.7783, "step": 300 }, { "epoch": 0.005629812581723086, "grad_norm": 1.7059403416569718, "learning_rate": 1.1252268602540837e-05, "loss": 0.7948, "step": 310 }, { "epoch": 0.005811419439198024, "grad_norm": 1.6933341958331007, "learning_rate": 1.1615245009074411e-05, "loss": 0.7821, "step": 320 }, { "epoch": 0.0059930262966729625, "grad_norm": 1.640807356608071, "learning_rate": 1.1978221415607987e-05, "loss": 0.7796, "step": 330 }, { "epoch": 0.006174633154147901, "grad_norm": 1.5915875609745933, "learning_rate": 1.2341197822141563e-05, "loss": 0.7707, "step": 340 }, { "epoch": 0.006356240011622839, "grad_norm": 1.557725806118495, "learning_rate": 1.2704174228675136e-05, "loss": 0.7588, "step": 350 }, { "epoch": 0.006537846869097777, "grad_norm": 1.615837585119804, "learning_rate": 1.3067150635208712e-05, "loss": 0.7722, "step": 360 }, { "epoch": 0.006719453726572715, "grad_norm": 1.6004423285066895, "learning_rate": 1.3430127041742288e-05, "loss": 0.7678, "step": 370 }, { "epoch": 0.006901060584047654, "grad_norm": 1.6914518549886544, "learning_rate": 1.3793103448275863e-05, "loss": 0.7612, "step": 380 }, { "epoch": 0.007082667441522592, "grad_norm": 1.6475934743364105, "learning_rate": 1.4156079854809439e-05, "loss": 0.7747, "step": 390 }, { "epoch": 0.00726427429899753, "grad_norm": 1.6081612612697154, "learning_rate": 1.4519056261343015e-05, "loss": 0.7558, "step": 400 }, { "epoch": 0.007445881156472468, "grad_norm": 1.6178702182302604, "learning_rate": 1.4882032667876588e-05, "loss": 0.7545, "step": 410 }, { "epoch": 0.007627488013947407, "grad_norm": 7.171314668842457, "learning_rate": 1.5245009074410164e-05, "loss": 0.7632, "step": 420 }, { "epoch": 0.007809094871422345, "grad_norm": 1.5478895745209114, "learning_rate": 1.5607985480943738e-05, "loss": 0.7444, "step": 430 }, { "epoch": 0.007990701728897284, "grad_norm": 1.5335354603218465, "learning_rate": 1.5970961887477314e-05, "loss": 0.7454, "step": 440 }, { "epoch": 0.008172308586372221, "grad_norm": 1.5627729772161036, "learning_rate": 1.6333938294010893e-05, "loss": 0.7426, "step": 450 }, { "epoch": 0.00835391544384716, "grad_norm": 1.5006182823543548, "learning_rate": 1.6696914700544465e-05, "loss": 0.7435, "step": 460 }, { "epoch": 0.008535522301322098, "grad_norm": 1.6127178345676547, "learning_rate": 1.705989110707804e-05, "loss": 0.7408, "step": 470 }, { "epoch": 0.008717129158797037, "grad_norm": 1.6126726486861374, "learning_rate": 1.7422867513611616e-05, "loss": 0.7429, "step": 480 }, { "epoch": 0.008898736016271974, "grad_norm": 1.76199239003064, "learning_rate": 1.7785843920145192e-05, "loss": 0.7644, "step": 490 }, { "epoch": 0.009080342873746913, "grad_norm": 1.632043395530153, "learning_rate": 1.8148820326678767e-05, "loss": 0.74, "step": 500 }, { "epoch": 0.00926194973122185, "grad_norm": 1.6040596841178802, "learning_rate": 1.8511796733212343e-05, "loss": 0.7548, "step": 510 }, { "epoch": 0.00944355658869679, "grad_norm": 1.548374608054754, "learning_rate": 1.887477313974592e-05, "loss": 0.7378, "step": 520 }, { "epoch": 0.009625163446171727, "grad_norm": 1.6822562818715954, "learning_rate": 1.9237749546279494e-05, "loss": 0.7636, "step": 530 }, { "epoch": 0.009806770303646666, "grad_norm": 1.4609647131082497, "learning_rate": 1.960072595281307e-05, "loss": 0.7337, "step": 540 }, { "epoch": 0.009988377161121604, "grad_norm": 1.5738231746859028, "learning_rate": 1.9963702359346645e-05, "loss": 0.7435, "step": 550 }, { "epoch": 0.010169984018596543, "grad_norm": 1.4594155557151298, "learning_rate": 1.9999998654899938e-05, "loss": 0.7407, "step": 560 }, { "epoch": 0.01035159087607148, "grad_norm": 1.6636102463031641, "learning_rate": 1.9999994005171795e-05, "loss": 0.73, "step": 570 }, { "epoch": 0.01053319773354642, "grad_norm": 1.4741805418097549, "learning_rate": 1.999998603421094e-05, "loss": 0.7262, "step": 580 }, { "epoch": 0.010714804591021357, "grad_norm": 1.5019480833239043, "learning_rate": 1.999997474202002e-05, "loss": 0.7535, "step": 590 }, { "epoch": 0.010896411448496296, "grad_norm": 1.401486080945441, "learning_rate": 1.9999960128602782e-05, "loss": 0.7428, "step": 600 }, { "epoch": 0.011078018305971233, "grad_norm": 1.4754191236257699, "learning_rate": 1.9999942193964087e-05, "loss": 0.7387, "step": 610 }, { "epoch": 0.011259625163446172, "grad_norm": 1.3892065618984015, "learning_rate": 1.9999920938109883e-05, "loss": 0.7336, "step": 620 }, { "epoch": 0.01144123202092111, "grad_norm": 1.5604689444364241, "learning_rate": 1.999989636104724e-05, "loss": 0.7278, "step": 630 }, { "epoch": 0.011622838878396049, "grad_norm": 1.4134138592216359, "learning_rate": 1.9999868462784312e-05, "loss": 0.7398, "step": 640 }, { "epoch": 0.011804445735870986, "grad_norm": 1.5321852879902833, "learning_rate": 1.9999837243330366e-05, "loss": 0.7388, "step": 650 }, { "epoch": 0.011986052593345925, "grad_norm": 1.4115052452143433, "learning_rate": 1.9999802702695775e-05, "loss": 0.7316, "step": 660 }, { "epoch": 0.012167659450820862, "grad_norm": 1.6593067493812976, "learning_rate": 1.9999764840892005e-05, "loss": 0.7397, "step": 670 }, { "epoch": 0.012349266308295801, "grad_norm": 1.531888054161507, "learning_rate": 1.9999723657931636e-05, "loss": 0.7416, "step": 680 }, { "epoch": 0.012530873165770739, "grad_norm": 1.4699749342151411, "learning_rate": 1.9999679153828343e-05, "loss": 0.7217, "step": 690 }, { "epoch": 0.012712480023245678, "grad_norm": 1.507718606375514, "learning_rate": 1.9999631328596907e-05, "loss": 0.7491, "step": 700 }, { "epoch": 0.012894086880720615, "grad_norm": 1.4031369729951937, "learning_rate": 1.9999580182253212e-05, "loss": 0.7324, "step": 710 }, { "epoch": 0.013075693738195554, "grad_norm": 1.540129425477107, "learning_rate": 1.9999525714814244e-05, "loss": 0.7487, "step": 720 }, { "epoch": 0.013257300595670492, "grad_norm": 1.425248722073867, "learning_rate": 1.9999467926298094e-05, "loss": 0.7176, "step": 730 }, { "epoch": 0.01343890745314543, "grad_norm": 1.3735310501797833, "learning_rate": 1.9999406816723957e-05, "loss": 0.7388, "step": 740 }, { "epoch": 0.013620514310620368, "grad_norm": 1.360531444041733, "learning_rate": 1.9999342386112127e-05, "loss": 0.7262, "step": 750 }, { "epoch": 0.013802121168095307, "grad_norm": 1.4155376468208372, "learning_rate": 1.9999274634484004e-05, "loss": 0.7529, "step": 760 }, { "epoch": 0.013983728025570246, "grad_norm": 1.317838194128412, "learning_rate": 1.9999203561862085e-05, "loss": 0.7132, "step": 770 }, { "epoch": 0.014165334883045184, "grad_norm": 1.4008944860207384, "learning_rate": 1.9999129168269982e-05, "loss": 0.7385, "step": 780 }, { "epoch": 0.014346941740520123, "grad_norm": 1.4389757010563218, "learning_rate": 1.9999051453732398e-05, "loss": 0.7388, "step": 790 }, { "epoch": 0.01452854859799506, "grad_norm": 1.361222381888974, "learning_rate": 1.9998970418275146e-05, "loss": 0.7414, "step": 800 }, { "epoch": 0.01471015545547, "grad_norm": 1.538055608671703, "learning_rate": 1.9998886061925136e-05, "loss": 0.727, "step": 810 }, { "epoch": 0.014891762312944937, "grad_norm": 1.4197143481518406, "learning_rate": 1.9998798384710395e-05, "loss": 0.7494, "step": 820 }, { "epoch": 0.015073369170419876, "grad_norm": 1.3590451040985319, "learning_rate": 1.999870738666003e-05, "loss": 0.728, "step": 830 }, { "epoch": 0.015254976027894813, "grad_norm": 1.421733503805545, "learning_rate": 1.999861306780427e-05, "loss": 0.7208, "step": 840 }, { "epoch": 0.015436582885369752, "grad_norm": 1.379533510222099, "learning_rate": 1.9998515428174436e-05, "loss": 0.7221, "step": 850 }, { "epoch": 0.01561818974284469, "grad_norm": 1.3019001727471642, "learning_rate": 1.9998414467802964e-05, "loss": 0.7325, "step": 860 }, { "epoch": 0.01579979660031963, "grad_norm": 1.3807315899325099, "learning_rate": 1.999831018672338e-05, "loss": 0.726, "step": 870 }, { "epoch": 0.015981403457794568, "grad_norm": 1.4022079932686367, "learning_rate": 1.9998202584970325e-05, "loss": 0.721, "step": 880 }, { "epoch": 0.016163010315269503, "grad_norm": 1.3427385549888018, "learning_rate": 1.9998091662579525e-05, "loss": 0.7232, "step": 890 }, { "epoch": 0.016344617172744443, "grad_norm": 1.333362030387319, "learning_rate": 1.9997977419587827e-05, "loss": 0.723, "step": 900 }, { "epoch": 0.01652622403021938, "grad_norm": 1.309292416046727, "learning_rate": 1.9997859856033172e-05, "loss": 0.7316, "step": 910 }, { "epoch": 0.01670783088769432, "grad_norm": 1.3480475492604924, "learning_rate": 1.9997738971954604e-05, "loss": 0.7263, "step": 920 }, { "epoch": 0.016889437745169256, "grad_norm": 1.393478199474698, "learning_rate": 1.999761476739227e-05, "loss": 0.7297, "step": 930 }, { "epoch": 0.017071044602644195, "grad_norm": 1.3847668249053566, "learning_rate": 1.9997487242387433e-05, "loss": 0.7234, "step": 940 }, { "epoch": 0.017252651460119135, "grad_norm": 1.4280939930900411, "learning_rate": 1.9997356396982434e-05, "loss": 0.7315, "step": 950 }, { "epoch": 0.017434258317594074, "grad_norm": 1.3812712504409148, "learning_rate": 1.9997222231220736e-05, "loss": 0.7197, "step": 960 }, { "epoch": 0.01761586517506901, "grad_norm": 1.4281336225268915, "learning_rate": 1.9997084745146896e-05, "loss": 0.7114, "step": 970 }, { "epoch": 0.01779747203254395, "grad_norm": 1.3465786460077795, "learning_rate": 1.9996943938806578e-05, "loss": 0.7287, "step": 980 }, { "epoch": 0.017979078890018887, "grad_norm": 1.3881022203396753, "learning_rate": 1.999679981224655e-05, "loss": 0.7264, "step": 990 }, { "epoch": 0.018160685747493827, "grad_norm": 1.3250776139185327, "learning_rate": 1.999665236551467e-05, "loss": 0.7382, "step": 1000 }, { "epoch": 0.018342292604968762, "grad_norm": 1.2696200632353383, "learning_rate": 1.9996501598659916e-05, "loss": 0.7223, "step": 1010 }, { "epoch": 0.0185238994624437, "grad_norm": 1.3371590251653696, "learning_rate": 1.9996347511732362e-05, "loss": 0.713, "step": 1020 }, { "epoch": 0.01870550631991864, "grad_norm": 1.3756660743839118, "learning_rate": 1.9996190104783183e-05, "loss": 0.731, "step": 1030 }, { "epoch": 0.01888711317739358, "grad_norm": 1.3271836952504283, "learning_rate": 1.9996029377864653e-05, "loss": 0.7161, "step": 1040 }, { "epoch": 0.019068720034868515, "grad_norm": 1.363450500837317, "learning_rate": 1.999586533103016e-05, "loss": 0.7337, "step": 1050 }, { "epoch": 0.019250326892343454, "grad_norm": 1.257463163260844, "learning_rate": 1.999569796433418e-05, "loss": 0.7305, "step": 1060 }, { "epoch": 0.019431933749818393, "grad_norm": 1.2613264746489032, "learning_rate": 1.9995527277832308e-05, "loss": 0.7045, "step": 1070 }, { "epoch": 0.019613540607293332, "grad_norm": 1.2985286812093397, "learning_rate": 1.999535327158123e-05, "loss": 0.7357, "step": 1080 }, { "epoch": 0.019795147464768268, "grad_norm": 1.24010577174114, "learning_rate": 1.9995175945638736e-05, "loss": 0.7287, "step": 1090 }, { "epoch": 0.019976754322243207, "grad_norm": 1.2586741699985196, "learning_rate": 1.9994995300063716e-05, "loss": 0.7184, "step": 1100 }, { "epoch": 0.020158361179718146, "grad_norm": 1.3257331543008646, "learning_rate": 1.9994811334916174e-05, "loss": 0.7379, "step": 1110 }, { "epoch": 0.020339968037193085, "grad_norm": 1.3513172012486494, "learning_rate": 1.9994624050257205e-05, "loss": 0.7154, "step": 1120 }, { "epoch": 0.02052157489466802, "grad_norm": 1.3753161198086747, "learning_rate": 1.9994433446149014e-05, "loss": 0.7359, "step": 1130 }, { "epoch": 0.02070318175214296, "grad_norm": 1.4218208754437058, "learning_rate": 1.9994239522654903e-05, "loss": 0.7316, "step": 1140 }, { "epoch": 0.0208847886096179, "grad_norm": 1.2896751868269427, "learning_rate": 1.9994042279839276e-05, "loss": 0.7163, "step": 1150 }, { "epoch": 0.02106639546709284, "grad_norm": 1.2971127641613838, "learning_rate": 1.9993841717767646e-05, "loss": 0.7199, "step": 1160 }, { "epoch": 0.021248002324567777, "grad_norm": 1.3128169892333883, "learning_rate": 1.9993637836506624e-05, "loss": 0.7203, "step": 1170 }, { "epoch": 0.021429609182042713, "grad_norm": 1.2257708208549396, "learning_rate": 1.999343063612392e-05, "loss": 0.7084, "step": 1180 }, { "epoch": 0.021611216039517652, "grad_norm": 1.368573984483903, "learning_rate": 1.9993220116688358e-05, "loss": 0.7274, "step": 1190 }, { "epoch": 0.02179282289699259, "grad_norm": 1.3434229842180485, "learning_rate": 1.9993006278269845e-05, "loss": 0.7168, "step": 1200 }, { "epoch": 0.02197442975446753, "grad_norm": 1.2573351818964529, "learning_rate": 1.999278912093941e-05, "loss": 0.7046, "step": 1210 }, { "epoch": 0.022156036611942466, "grad_norm": 1.3121338060896917, "learning_rate": 1.9992568644769175e-05, "loss": 0.7228, "step": 1220 }, { "epoch": 0.022337643469417405, "grad_norm": 1.1814921558381386, "learning_rate": 1.9992344849832363e-05, "loss": 0.7029, "step": 1230 }, { "epoch": 0.022519250326892344, "grad_norm": 1.2674261596780754, "learning_rate": 1.9992117736203305e-05, "loss": 0.7237, "step": 1240 }, { "epoch": 0.022700857184367283, "grad_norm": 1.2820669451987052, "learning_rate": 1.999188730395743e-05, "loss": 0.7243, "step": 1250 }, { "epoch": 0.02288246404184222, "grad_norm": 1.267567090100438, "learning_rate": 1.9991653553171263e-05, "loss": 0.7227, "step": 1260 }, { "epoch": 0.023064070899317158, "grad_norm": 1.362088844687599, "learning_rate": 1.999141648392245e-05, "loss": 0.7209, "step": 1270 }, { "epoch": 0.023245677756792097, "grad_norm": 1.3846425537004146, "learning_rate": 1.9991176096289716e-05, "loss": 0.7303, "step": 1280 }, { "epoch": 0.023427284614267036, "grad_norm": 1.2637918469951082, "learning_rate": 1.9990932390352907e-05, "loss": 0.7076, "step": 1290 }, { "epoch": 0.023608891471741972, "grad_norm": 1.3027461464570869, "learning_rate": 1.9990685366192957e-05, "loss": 0.7131, "step": 1300 }, { "epoch": 0.02379049832921691, "grad_norm": 1.2453934373780549, "learning_rate": 1.9990435023891916e-05, "loss": 0.7086, "step": 1310 }, { "epoch": 0.02397210518669185, "grad_norm": 1.2617514359106152, "learning_rate": 1.9990181363532923e-05, "loss": 0.7148, "step": 1320 }, { "epoch": 0.02415371204416679, "grad_norm": 1.1873638578269845, "learning_rate": 1.998992438520023e-05, "loss": 0.7373, "step": 1330 }, { "epoch": 0.024335318901641725, "grad_norm": 1.2445581612352847, "learning_rate": 1.998966408897918e-05, "loss": 0.7155, "step": 1340 }, { "epoch": 0.024516925759116664, "grad_norm": 1.2819588270317, "learning_rate": 1.9989400474956224e-05, "loss": 0.7121, "step": 1350 }, { "epoch": 0.024698532616591603, "grad_norm": 1.2751492060461822, "learning_rate": 1.9989133543218917e-05, "loss": 0.709, "step": 1360 }, { "epoch": 0.024880139474066542, "grad_norm": 1.2191290537916566, "learning_rate": 1.9988863293855912e-05, "loss": 0.7161, "step": 1370 }, { "epoch": 0.025061746331541478, "grad_norm": 1.273827844373462, "learning_rate": 1.9988589726956967e-05, "loss": 0.7079, "step": 1380 }, { "epoch": 0.025243353189016417, "grad_norm": 1.3156355002270028, "learning_rate": 1.9988312842612935e-05, "loss": 0.7337, "step": 1390 }, { "epoch": 0.025424960046491356, "grad_norm": 1.281814789842884, "learning_rate": 1.9988032640915784e-05, "loss": 0.7282, "step": 1400 }, { "epoch": 0.025606566903966295, "grad_norm": 1.222429114055018, "learning_rate": 1.9987749121958564e-05, "loss": 0.7163, "step": 1410 }, { "epoch": 0.02578817376144123, "grad_norm": 1.2325885027290775, "learning_rate": 1.998746228583545e-05, "loss": 0.7227, "step": 1420 }, { "epoch": 0.02596978061891617, "grad_norm": 1.383450395635756, "learning_rate": 1.99871721326417e-05, "loss": 0.7117, "step": 1430 }, { "epoch": 0.02615138747639111, "grad_norm": 1.224796282954928, "learning_rate": 1.9986878662473684e-05, "loss": 0.7157, "step": 1440 }, { "epoch": 0.026332994333866048, "grad_norm": 1.258087374218085, "learning_rate": 1.9986581875428867e-05, "loss": 0.7019, "step": 1450 }, { "epoch": 0.026514601191340983, "grad_norm": 1.2508379241875989, "learning_rate": 1.9986281771605826e-05, "loss": 0.7306, "step": 1460 }, { "epoch": 0.026696208048815923, "grad_norm": 1.220551476163685, "learning_rate": 1.998597835110422e-05, "loss": 0.7367, "step": 1470 }, { "epoch": 0.02687781490629086, "grad_norm": 1.2258675919502977, "learning_rate": 1.9985671614024834e-05, "loss": 0.7129, "step": 1480 }, { "epoch": 0.0270594217637658, "grad_norm": 1.4126075770947764, "learning_rate": 1.9985361560469538e-05, "loss": 0.7114, "step": 1490 }, { "epoch": 0.027241028621240736, "grad_norm": 1.316448682318712, "learning_rate": 1.9985048190541305e-05, "loss": 0.7097, "step": 1500 }, { "epoch": 0.027422635478715675, "grad_norm": 1.300465839374954, "learning_rate": 1.998473150434422e-05, "loss": 0.7272, "step": 1510 }, { "epoch": 0.027604242336190615, "grad_norm": 1.1733626223035665, "learning_rate": 1.9984411501983456e-05, "loss": 0.7098, "step": 1520 }, { "epoch": 0.027785849193665554, "grad_norm": 1.2360016808606011, "learning_rate": 1.9984088183565292e-05, "loss": 0.7103, "step": 1530 }, { "epoch": 0.027967456051140493, "grad_norm": 1.2640880063696043, "learning_rate": 1.9983761549197118e-05, "loss": 0.7015, "step": 1540 }, { "epoch": 0.02814906290861543, "grad_norm": 1.2327287365166182, "learning_rate": 1.998343159898741e-05, "loss": 0.7247, "step": 1550 }, { "epoch": 0.028330669766090368, "grad_norm": 1.2370495946925866, "learning_rate": 1.9983098333045748e-05, "loss": 0.6917, "step": 1560 }, { "epoch": 0.028512276623565307, "grad_norm": 1.230156768116289, "learning_rate": 1.9982761751482828e-05, "loss": 0.7113, "step": 1570 }, { "epoch": 0.028693883481040246, "grad_norm": 1.2954931613972192, "learning_rate": 1.9982421854410433e-05, "loss": 0.7211, "step": 1580 }, { "epoch": 0.02887549033851518, "grad_norm": 1.2276710824268964, "learning_rate": 1.9982078641941445e-05, "loss": 0.7041, "step": 1590 }, { "epoch": 0.02905709719599012, "grad_norm": 1.2423614271372267, "learning_rate": 1.9981732114189863e-05, "loss": 0.713, "step": 1600 }, { "epoch": 0.02923870405346506, "grad_norm": 1.2479335735395145, "learning_rate": 1.9981382271270767e-05, "loss": 0.7002, "step": 1610 }, { "epoch": 0.02942031091094, "grad_norm": 1.1796766378103154, "learning_rate": 1.9981029113300353e-05, "loss": 0.7279, "step": 1620 }, { "epoch": 0.029601917768414934, "grad_norm": 1.169896270061747, "learning_rate": 1.9980672640395916e-05, "loss": 0.7096, "step": 1630 }, { "epoch": 0.029783524625889873, "grad_norm": 1.1860566953405904, "learning_rate": 1.9980312852675842e-05, "loss": 0.7297, "step": 1640 }, { "epoch": 0.029965131483364812, "grad_norm": 1.2759371263206971, "learning_rate": 1.9979949750259632e-05, "loss": 0.6862, "step": 1650 }, { "epoch": 0.03014673834083975, "grad_norm": 1.2257946318572905, "learning_rate": 1.9979583333267872e-05, "loss": 0.7061, "step": 1660 }, { "epoch": 0.030328345198314687, "grad_norm": 1.1926905398593544, "learning_rate": 1.9979213601822268e-05, "loss": 0.7055, "step": 1670 }, { "epoch": 0.030509952055789626, "grad_norm": 1.1950016002527915, "learning_rate": 1.9978840556045612e-05, "loss": 0.7147, "step": 1680 }, { "epoch": 0.030691558913264565, "grad_norm": 1.1748950187030254, "learning_rate": 1.9978464196061798e-05, "loss": 0.7097, "step": 1690 }, { "epoch": 0.030873165770739504, "grad_norm": 1.2096549106849688, "learning_rate": 1.9978084521995825e-05, "loss": 0.7189, "step": 1700 }, { "epoch": 0.03105477262821444, "grad_norm": 1.1666733956941795, "learning_rate": 1.9977701533973798e-05, "loss": 0.694, "step": 1710 }, { "epoch": 0.03123637948568938, "grad_norm": 1.2056507257562152, "learning_rate": 1.9977315232122908e-05, "loss": 0.7252, "step": 1720 }, { "epoch": 0.03141798634316432, "grad_norm": 1.2551576412051737, "learning_rate": 1.997692561657146e-05, "loss": 0.7116, "step": 1730 }, { "epoch": 0.03159959320063926, "grad_norm": 1.2229854544737764, "learning_rate": 1.9976532687448852e-05, "loss": 0.7154, "step": 1740 }, { "epoch": 0.031781200058114197, "grad_norm": 1.1684214152994468, "learning_rate": 1.997613644488559e-05, "loss": 0.6744, "step": 1750 }, { "epoch": 0.031962806915589136, "grad_norm": 1.1942962329653009, "learning_rate": 1.997573688901327e-05, "loss": 0.7127, "step": 1760 }, { "epoch": 0.03214441377306407, "grad_norm": 1.2325238071096356, "learning_rate": 1.997533401996459e-05, "loss": 0.7006, "step": 1770 }, { "epoch": 0.03232602063053901, "grad_norm": 1.2090292490968093, "learning_rate": 1.9974927837873365e-05, "loss": 0.7052, "step": 1780 }, { "epoch": 0.032507627488013946, "grad_norm": 1.1760920267225203, "learning_rate": 1.9974518342874488e-05, "loss": 0.7147, "step": 1790 }, { "epoch": 0.032689234345488885, "grad_norm": 1.2341502211195021, "learning_rate": 1.9974105535103963e-05, "loss": 0.7153, "step": 1800 }, { "epoch": 0.032870841202963824, "grad_norm": 1.1498476940790676, "learning_rate": 1.9973689414698896e-05, "loss": 0.7156, "step": 1810 }, { "epoch": 0.03305244806043876, "grad_norm": 1.1846325544102296, "learning_rate": 1.9973269981797488e-05, "loss": 0.721, "step": 1820 }, { "epoch": 0.0332340549179137, "grad_norm": 1.2327978984426147, "learning_rate": 1.997284723653904e-05, "loss": 0.7062, "step": 1830 }, { "epoch": 0.03341566177538864, "grad_norm": 1.160747751423665, "learning_rate": 1.9972421179063964e-05, "loss": 0.7166, "step": 1840 }, { "epoch": 0.033597268632863574, "grad_norm": 1.1743706955106437, "learning_rate": 1.9971991809513758e-05, "loss": 0.7126, "step": 1850 }, { "epoch": 0.03377887549033851, "grad_norm": 1.19019689797559, "learning_rate": 1.9971559128031024e-05, "loss": 0.7156, "step": 1860 }, { "epoch": 0.03396048234781345, "grad_norm": 1.1557371079176342, "learning_rate": 1.9971123134759468e-05, "loss": 0.7109, "step": 1870 }, { "epoch": 0.03414208920528839, "grad_norm": 1.1989114135650003, "learning_rate": 1.9970683829843896e-05, "loss": 0.7104, "step": 1880 }, { "epoch": 0.03432369606276333, "grad_norm": 1.2567190597086217, "learning_rate": 1.9970241213430208e-05, "loss": 0.7123, "step": 1890 }, { "epoch": 0.03450530292023827, "grad_norm": 1.179180885876566, "learning_rate": 1.9969795285665405e-05, "loss": 0.6986, "step": 1900 }, { "epoch": 0.03468690977771321, "grad_norm": 1.150105920109094, "learning_rate": 1.99693460466976e-05, "loss": 0.7017, "step": 1910 }, { "epoch": 0.03486851663518815, "grad_norm": 1.1670486698269398, "learning_rate": 1.9968893496675982e-05, "loss": 0.714, "step": 1920 }, { "epoch": 0.035050123492663086, "grad_norm": 1.1858573334595328, "learning_rate": 1.9968437635750866e-05, "loss": 0.6976, "step": 1930 }, { "epoch": 0.03523173035013802, "grad_norm": 1.1385215503575647, "learning_rate": 1.9967978464073643e-05, "loss": 0.7185, "step": 1940 }, { "epoch": 0.03541333720761296, "grad_norm": 1.140636918347082, "learning_rate": 1.9967515981796827e-05, "loss": 0.6849, "step": 1950 }, { "epoch": 0.0355949440650879, "grad_norm": 1.2345015546746085, "learning_rate": 1.9967050189074007e-05, "loss": 0.7167, "step": 1960 }, { "epoch": 0.035776550922562836, "grad_norm": 1.162298541165482, "learning_rate": 1.9966581086059893e-05, "loss": 0.7147, "step": 1970 }, { "epoch": 0.035958157780037775, "grad_norm": 1.18447144037433, "learning_rate": 1.9966108672910282e-05, "loss": 0.7062, "step": 1980 }, { "epoch": 0.036139764637512714, "grad_norm": 1.2572073380373792, "learning_rate": 1.9965632949782074e-05, "loss": 0.7196, "step": 1990 }, { "epoch": 0.03632137149498765, "grad_norm": 1.1462677149498575, "learning_rate": 1.996515391683326e-05, "loss": 0.7079, "step": 2000 }, { "epoch": 0.03650297835246259, "grad_norm": 1.1921695600232802, "learning_rate": 1.996467157422295e-05, "loss": 0.7011, "step": 2010 }, { "epoch": 0.036684585209937524, "grad_norm": 1.1980847363023452, "learning_rate": 1.996418592211134e-05, "loss": 0.7033, "step": 2020 }, { "epoch": 0.036866192067412464, "grad_norm": 1.210243440826018, "learning_rate": 1.996369696065972e-05, "loss": 0.7079, "step": 2030 }, { "epoch": 0.0370477989248874, "grad_norm": 1.133287315861273, "learning_rate": 1.996320469003048e-05, "loss": 0.718, "step": 2040 }, { "epoch": 0.03722940578236234, "grad_norm": 1.2101806611685881, "learning_rate": 1.9962709110387132e-05, "loss": 0.6991, "step": 2050 }, { "epoch": 0.03741101263983728, "grad_norm": 1.1581507535002316, "learning_rate": 1.9962210221894257e-05, "loss": 0.71, "step": 2060 }, { "epoch": 0.03759261949731222, "grad_norm": 1.1823336432273128, "learning_rate": 1.996170802471755e-05, "loss": 0.7102, "step": 2070 }, { "epoch": 0.03777422635478716, "grad_norm": 1.1561543384309936, "learning_rate": 1.996120251902381e-05, "loss": 0.7183, "step": 2080 }, { "epoch": 0.0379558332122621, "grad_norm": 1.1300420492859011, "learning_rate": 1.9960693704980915e-05, "loss": 0.702, "step": 2090 }, { "epoch": 0.03813744006973703, "grad_norm": 1.1567942380313356, "learning_rate": 1.9960181582757862e-05, "loss": 0.6973, "step": 2100 }, { "epoch": 0.03831904692721197, "grad_norm": 1.1858545750085094, "learning_rate": 1.9959666152524737e-05, "loss": 0.7035, "step": 2110 }, { "epoch": 0.03850065378468691, "grad_norm": 1.19257265056155, "learning_rate": 1.9959147414452725e-05, "loss": 0.6997, "step": 2120 }, { "epoch": 0.03868226064216185, "grad_norm": 1.1656805945877342, "learning_rate": 1.9958625368714113e-05, "loss": 0.6968, "step": 2130 }, { "epoch": 0.03886386749963679, "grad_norm": 1.2616076885094545, "learning_rate": 1.9958100015482283e-05, "loss": 0.7014, "step": 2140 }, { "epoch": 0.039045474357111726, "grad_norm": 1.1847403684421551, "learning_rate": 1.995757135493172e-05, "loss": 0.7104, "step": 2150 }, { "epoch": 0.039227081214586665, "grad_norm": 1.1775415841552035, "learning_rate": 1.9957039387238e-05, "loss": 0.6898, "step": 2160 }, { "epoch": 0.039408688072061604, "grad_norm": 1.1604519500783226, "learning_rate": 1.9956504112577806e-05, "loss": 0.6973, "step": 2170 }, { "epoch": 0.039590294929536536, "grad_norm": 1.1949028652147777, "learning_rate": 1.9955965531128914e-05, "loss": 0.703, "step": 2180 }, { "epoch": 0.039771901787011475, "grad_norm": 1.1899753295389859, "learning_rate": 1.99554236430702e-05, "loss": 0.7132, "step": 2190 }, { "epoch": 0.039953508644486414, "grad_norm": 1.115492494078099, "learning_rate": 1.9954878448581636e-05, "loss": 0.7126, "step": 2200 }, { "epoch": 0.04013511550196135, "grad_norm": 1.1413687442682057, "learning_rate": 1.9954329947844297e-05, "loss": 0.7069, "step": 2210 }, { "epoch": 0.04031672235943629, "grad_norm": 1.1786918075207427, "learning_rate": 1.995377814104035e-05, "loss": 0.711, "step": 2220 }, { "epoch": 0.04049832921691123, "grad_norm": 1.1833702019999943, "learning_rate": 1.9953223028353063e-05, "loss": 0.7062, "step": 2230 }, { "epoch": 0.04067993607438617, "grad_norm": 1.127528793498676, "learning_rate": 1.9952664609966804e-05, "loss": 0.6958, "step": 2240 }, { "epoch": 0.04086154293186111, "grad_norm": 1.1420689408688856, "learning_rate": 1.9952102886067035e-05, "loss": 0.6933, "step": 2250 }, { "epoch": 0.04104314978933604, "grad_norm": 1.155751413642556, "learning_rate": 1.995153785684032e-05, "loss": 0.6945, "step": 2260 }, { "epoch": 0.04122475664681098, "grad_norm": 1.136660714537844, "learning_rate": 1.9950969522474314e-05, "loss": 0.7097, "step": 2270 }, { "epoch": 0.04140636350428592, "grad_norm": 1.0668556258109378, "learning_rate": 1.995039788315778e-05, "loss": 0.7037, "step": 2280 }, { "epoch": 0.04158797036176086, "grad_norm": 1.1605947296048262, "learning_rate": 1.994982293908057e-05, "loss": 0.7112, "step": 2290 }, { "epoch": 0.0417695772192358, "grad_norm": 1.1596310232940767, "learning_rate": 1.9949244690433637e-05, "loss": 0.707, "step": 2300 }, { "epoch": 0.04195118407671074, "grad_norm": 1.119259082018855, "learning_rate": 1.994866313740903e-05, "loss": 0.6973, "step": 2310 }, { "epoch": 0.04213279093418568, "grad_norm": 1.2571358379069384, "learning_rate": 1.9948078280199894e-05, "loss": 0.7123, "step": 2320 }, { "epoch": 0.042314397791660616, "grad_norm": 1.1786955944810058, "learning_rate": 1.994749011900048e-05, "loss": 0.7098, "step": 2330 }, { "epoch": 0.042496004649135555, "grad_norm": 1.1238902068178258, "learning_rate": 1.9946898654006124e-05, "loss": 0.7057, "step": 2340 }, { "epoch": 0.04267761150661049, "grad_norm": 1.2225514676877027, "learning_rate": 1.994630388541327e-05, "loss": 0.6946, "step": 2350 }, { "epoch": 0.042859218364085426, "grad_norm": 1.1538910510848497, "learning_rate": 1.9945705813419453e-05, "loss": 0.6973, "step": 2360 }, { "epoch": 0.043040825221560365, "grad_norm": 1.1181372564706644, "learning_rate": 1.9945104438223308e-05, "loss": 0.7012, "step": 2370 }, { "epoch": 0.043222432079035304, "grad_norm": 1.1270529192065464, "learning_rate": 1.994449976002456e-05, "loss": 0.6879, "step": 2380 }, { "epoch": 0.04340403893651024, "grad_norm": 1.15683711282227, "learning_rate": 1.9943891779024044e-05, "loss": 0.6943, "step": 2390 }, { "epoch": 0.04358564579398518, "grad_norm": 1.1171943396292519, "learning_rate": 1.994328049542368e-05, "loss": 0.7171, "step": 2400 }, { "epoch": 0.04376725265146012, "grad_norm": 1.0945978589149439, "learning_rate": 1.9942665909426494e-05, "loss": 0.7165, "step": 2410 }, { "epoch": 0.04394885950893506, "grad_norm": 1.1119874267997178, "learning_rate": 1.99420480212366e-05, "loss": 0.6895, "step": 2420 }, { "epoch": 0.04413046636640999, "grad_norm": 1.1272936774579825, "learning_rate": 1.9941426831059213e-05, "loss": 0.6939, "step": 2430 }, { "epoch": 0.04431207322388493, "grad_norm": 1.1583729458683778, "learning_rate": 1.994080233910065e-05, "loss": 0.6973, "step": 2440 }, { "epoch": 0.04449368008135987, "grad_norm": 1.1428341236687598, "learning_rate": 1.994017454556832e-05, "loss": 0.6903, "step": 2450 }, { "epoch": 0.04467528693883481, "grad_norm": 1.1531810378185188, "learning_rate": 1.993954345067072e-05, "loss": 0.6969, "step": 2460 }, { "epoch": 0.04485689379630975, "grad_norm": 1.0923729636315007, "learning_rate": 1.9938909054617458e-05, "loss": 0.69, "step": 2470 }, { "epoch": 0.04503850065378469, "grad_norm": 1.1291089114635608, "learning_rate": 1.9938271357619227e-05, "loss": 0.7259, "step": 2480 }, { "epoch": 0.04522010751125963, "grad_norm": 1.123549252571166, "learning_rate": 1.9937630359887822e-05, "loss": 0.6909, "step": 2490 }, { "epoch": 0.045401714368734566, "grad_norm": 1.142247370615794, "learning_rate": 1.993698606163614e-05, "loss": 0.6981, "step": 2500 }, { "epoch": 0.0455833212262095, "grad_norm": 1.1366830174607025, "learning_rate": 1.993633846307816e-05, "loss": 0.6946, "step": 2510 }, { "epoch": 0.04576492808368444, "grad_norm": 1.113742201519613, "learning_rate": 1.9935687564428967e-05, "loss": 0.6903, "step": 2520 }, { "epoch": 0.04594653494115938, "grad_norm": 1.121543335393683, "learning_rate": 1.9935033365904743e-05, "loss": 0.7068, "step": 2530 }, { "epoch": 0.046128141798634316, "grad_norm": 1.1446752660080146, "learning_rate": 1.993437586772276e-05, "loss": 0.7162, "step": 2540 }, { "epoch": 0.046309748656109255, "grad_norm": 1.1177328709354453, "learning_rate": 1.993371507010138e-05, "loss": 0.6881, "step": 2550 }, { "epoch": 0.046491355513584194, "grad_norm": 1.1032850779210095, "learning_rate": 1.9933050973260088e-05, "loss": 0.7188, "step": 2560 }, { "epoch": 0.04667296237105913, "grad_norm": 1.0628595510770726, "learning_rate": 1.9932383577419432e-05, "loss": 0.6932, "step": 2570 }, { "epoch": 0.04685456922853407, "grad_norm": 1.1043026233358415, "learning_rate": 1.9931712882801072e-05, "loss": 0.7015, "step": 2580 }, { "epoch": 0.047036176086009004, "grad_norm": 1.1647618158152748, "learning_rate": 1.9931038889627764e-05, "loss": 0.7086, "step": 2590 }, { "epoch": 0.047217782943483944, "grad_norm": 1.1072513568275142, "learning_rate": 1.993036159812336e-05, "loss": 0.7021, "step": 2600 }, { "epoch": 0.04739938980095888, "grad_norm": 1.0753547108659096, "learning_rate": 1.9929681008512798e-05, "loss": 0.7102, "step": 2610 }, { "epoch": 0.04758099665843382, "grad_norm": 1.1187609029950876, "learning_rate": 1.992899712102212e-05, "loss": 0.7046, "step": 2620 }, { "epoch": 0.04776260351590876, "grad_norm": 1.1462830748311925, "learning_rate": 1.992830993587846e-05, "loss": 0.7131, "step": 2630 }, { "epoch": 0.0479442103733837, "grad_norm": 1.085959342266387, "learning_rate": 1.9927619453310052e-05, "loss": 0.6989, "step": 2640 }, { "epoch": 0.04812581723085864, "grad_norm": 1.2328879505328019, "learning_rate": 1.992692567354622e-05, "loss": 0.7006, "step": 2650 }, { "epoch": 0.04830742408833358, "grad_norm": 1.1412427708725192, "learning_rate": 1.992622859681738e-05, "loss": 0.689, "step": 2660 }, { "epoch": 0.04848903094580851, "grad_norm": 1.2118198941198852, "learning_rate": 1.9925528223355054e-05, "loss": 0.694, "step": 2670 }, { "epoch": 0.04867063780328345, "grad_norm": 1.06732057415864, "learning_rate": 1.9924824553391847e-05, "loss": 0.7057, "step": 2680 }, { "epoch": 0.04885224466075839, "grad_norm": 1.1228052434262081, "learning_rate": 1.992411758716147e-05, "loss": 0.6995, "step": 2690 }, { "epoch": 0.04903385151823333, "grad_norm": 1.1055621861628362, "learning_rate": 1.992340732489872e-05, "loss": 0.695, "step": 2700 }, { "epoch": 0.04921545837570827, "grad_norm": 1.095724076256996, "learning_rate": 1.9922693766839492e-05, "loss": 0.7137, "step": 2710 }, { "epoch": 0.049397065233183206, "grad_norm": 1.0750810748818693, "learning_rate": 1.9921976913220774e-05, "loss": 0.6876, "step": 2720 }, { "epoch": 0.049578672090658145, "grad_norm": 1.0988335501409865, "learning_rate": 1.992125676428065e-05, "loss": 0.7075, "step": 2730 }, { "epoch": 0.049760278948133084, "grad_norm": 1.0758007256760838, "learning_rate": 1.9920533320258303e-05, "loss": 0.6881, "step": 2740 }, { "epoch": 0.04994188580560802, "grad_norm": 1.0705378302950304, "learning_rate": 1.9919806581393998e-05, "loss": 0.7031, "step": 2750 }, { "epoch": 0.050123492663082955, "grad_norm": 1.0953223089582391, "learning_rate": 1.991907654792911e-05, "loss": 0.7051, "step": 2760 }, { "epoch": 0.050305099520557894, "grad_norm": 1.1091288568061906, "learning_rate": 1.99183432201061e-05, "loss": 0.7133, "step": 2770 }, { "epoch": 0.05048670637803283, "grad_norm": 1.1358668870944633, "learning_rate": 1.991760659816851e-05, "loss": 0.7094, "step": 2780 }, { "epoch": 0.05066831323550777, "grad_norm": 1.0991448743293213, "learning_rate": 1.9916866682361004e-05, "loss": 0.6876, "step": 2790 }, { "epoch": 0.05084992009298271, "grad_norm": 1.0483885297489275, "learning_rate": 1.9916123472929323e-05, "loss": 0.7002, "step": 2800 }, { "epoch": 0.05103152695045765, "grad_norm": 1.132609481849367, "learning_rate": 1.9915376970120302e-05, "loss": 0.6913, "step": 2810 }, { "epoch": 0.05121313380793259, "grad_norm": 1.0569141089393732, "learning_rate": 1.991462717418187e-05, "loss": 0.6939, "step": 2820 }, { "epoch": 0.05139474066540753, "grad_norm": 1.0670214791868866, "learning_rate": 1.9913874085363054e-05, "loss": 0.6935, "step": 2830 }, { "epoch": 0.05157634752288246, "grad_norm": 1.1473160521006904, "learning_rate": 1.9913117703913975e-05, "loss": 0.7006, "step": 2840 }, { "epoch": 0.0517579543803574, "grad_norm": 1.0920358708759028, "learning_rate": 1.991235803008584e-05, "loss": 0.694, "step": 2850 }, { "epoch": 0.05193956123783234, "grad_norm": 1.0697816926851498, "learning_rate": 1.991159506413096e-05, "loss": 0.6885, "step": 2860 }, { "epoch": 0.05212116809530728, "grad_norm": 1.0334373464973277, "learning_rate": 1.9910828806302727e-05, "loss": 0.6848, "step": 2870 }, { "epoch": 0.05230277495278222, "grad_norm": 1.0432386116325385, "learning_rate": 1.991005925685564e-05, "loss": 0.7041, "step": 2880 }, { "epoch": 0.05248438181025716, "grad_norm": 1.0959756130383045, "learning_rate": 1.9909286416045278e-05, "loss": 0.695, "step": 2890 }, { "epoch": 0.052665988667732096, "grad_norm": 1.0666794571630802, "learning_rate": 1.9908510284128327e-05, "loss": 0.6986, "step": 2900 }, { "epoch": 0.052847595525207035, "grad_norm": 1.035573534360297, "learning_rate": 1.9907730861362554e-05, "loss": 0.6889, "step": 2910 }, { "epoch": 0.05302920238268197, "grad_norm": 1.1202092109418462, "learning_rate": 1.9906948148006823e-05, "loss": 0.7077, "step": 2920 }, { "epoch": 0.053210809240156906, "grad_norm": 1.0681095280438961, "learning_rate": 1.9906162144321094e-05, "loss": 0.6916, "step": 2930 }, { "epoch": 0.053392416097631845, "grad_norm": 1.058855129055549, "learning_rate": 1.9905372850566414e-05, "loss": 0.6927, "step": 2940 }, { "epoch": 0.053574022955106784, "grad_norm": 1.0863185878858126, "learning_rate": 1.990458026700493e-05, "loss": 0.6978, "step": 2950 }, { "epoch": 0.05375562981258172, "grad_norm": 1.124761097142119, "learning_rate": 1.9903784393899875e-05, "loss": 0.686, "step": 2960 }, { "epoch": 0.05393723667005666, "grad_norm": 1.0715905168500617, "learning_rate": 1.990298523151558e-05, "loss": 0.7013, "step": 2970 }, { "epoch": 0.0541188435275316, "grad_norm": 1.051817704709787, "learning_rate": 1.9902182780117464e-05, "loss": 0.6924, "step": 2980 }, { "epoch": 0.05430045038500654, "grad_norm": 1.0822313353420285, "learning_rate": 1.9901377039972033e-05, "loss": 0.6878, "step": 2990 }, { "epoch": 0.05448205724248147, "grad_norm": 1.1160667854005435, "learning_rate": 1.9900568011346904e-05, "loss": 0.6991, "step": 3000 }, { "epoch": 0.05466366409995641, "grad_norm": 1.1234219473053844, "learning_rate": 1.9899755694510766e-05, "loss": 0.701, "step": 3010 }, { "epoch": 0.05484527095743135, "grad_norm": 1.0860380353259427, "learning_rate": 1.9898940089733413e-05, "loss": 0.6963, "step": 3020 }, { "epoch": 0.05502687781490629, "grad_norm": 1.1198466583372377, "learning_rate": 1.9898121197285726e-05, "loss": 0.6956, "step": 3030 }, { "epoch": 0.05520848467238123, "grad_norm": 1.1233162891879094, "learning_rate": 1.9897299017439677e-05, "loss": 0.7027, "step": 3040 }, { "epoch": 0.05539009152985617, "grad_norm": 1.0646246454774781, "learning_rate": 1.9896473550468333e-05, "loss": 0.6921, "step": 3050 }, { "epoch": 0.05557169838733111, "grad_norm": 1.1045480285687241, "learning_rate": 1.9895644796645844e-05, "loss": 0.6837, "step": 3060 }, { "epoch": 0.055753305244806046, "grad_norm": 1.048208313967483, "learning_rate": 1.989481275624747e-05, "loss": 0.6887, "step": 3070 }, { "epoch": 0.055934912102280986, "grad_norm": 1.1440556618233975, "learning_rate": 1.9893977429549544e-05, "loss": 0.7159, "step": 3080 }, { "epoch": 0.05611651895975592, "grad_norm": 1.084410719572318, "learning_rate": 1.9893138816829495e-05, "loss": 0.7056, "step": 3090 }, { "epoch": 0.05629812581723086, "grad_norm": 1.0850231281336602, "learning_rate": 1.9892296918365856e-05, "loss": 0.6918, "step": 3100 }, { "epoch": 0.056479732674705796, "grad_norm": 1.1096845630204144, "learning_rate": 1.989145173443823e-05, "loss": 0.6908, "step": 3110 }, { "epoch": 0.056661339532180735, "grad_norm": 1.1097056225967248, "learning_rate": 1.989060326532733e-05, "loss": 0.6927, "step": 3120 }, { "epoch": 0.056842946389655674, "grad_norm": 1.065402887515684, "learning_rate": 1.988975151131495e-05, "loss": 0.6933, "step": 3130 }, { "epoch": 0.05702455324713061, "grad_norm": 1.0742476143113417, "learning_rate": 1.9888896472683973e-05, "loss": 0.689, "step": 3140 }, { "epoch": 0.05720616010460555, "grad_norm": 1.1372974732384689, "learning_rate": 1.9888038149718387e-05, "loss": 0.69, "step": 3150 }, { "epoch": 0.05738776696208049, "grad_norm": 1.0944743561172006, "learning_rate": 1.9887176542703255e-05, "loss": 0.7046, "step": 3160 }, { "epoch": 0.057569373819555424, "grad_norm": 1.0598056297349372, "learning_rate": 1.9886311651924735e-05, "loss": 0.7024, "step": 3170 }, { "epoch": 0.05775098067703036, "grad_norm": 1.049014167604148, "learning_rate": 1.9885443477670087e-05, "loss": 0.6918, "step": 3180 }, { "epoch": 0.0579325875345053, "grad_norm": 1.0990129998348268, "learning_rate": 1.988457202022764e-05, "loss": 0.6934, "step": 3190 }, { "epoch": 0.05811419439198024, "grad_norm": 1.0699420454547568, "learning_rate": 1.9883697279886834e-05, "loss": 0.7053, "step": 3200 }, { "epoch": 0.05829580124945518, "grad_norm": 1.0613223194031178, "learning_rate": 1.9882819256938186e-05, "loss": 0.697, "step": 3210 }, { "epoch": 0.05847740810693012, "grad_norm": 1.13703091395775, "learning_rate": 1.9881937951673312e-05, "loss": 0.7135, "step": 3220 }, { "epoch": 0.05865901496440506, "grad_norm": 1.0725501678172715, "learning_rate": 1.988105336438491e-05, "loss": 0.7064, "step": 3230 }, { "epoch": 0.05884062182188, "grad_norm": 1.0963745376288025, "learning_rate": 1.9880165495366773e-05, "loss": 0.6776, "step": 3240 }, { "epoch": 0.05902222867935493, "grad_norm": 1.0897529527028207, "learning_rate": 1.9879274344913785e-05, "loss": 0.6956, "step": 3250 }, { "epoch": 0.05920383553682987, "grad_norm": 1.086250413344943, "learning_rate": 1.987837991332192e-05, "loss": 0.6862, "step": 3260 }, { "epoch": 0.05938544239430481, "grad_norm": 1.1472019722928042, "learning_rate": 1.9877482200888237e-05, "loss": 0.6911, "step": 3270 }, { "epoch": 0.05956704925177975, "grad_norm": 1.0763224638895217, "learning_rate": 1.9876581207910885e-05, "loss": 0.7008, "step": 3280 }, { "epoch": 0.059748656109254686, "grad_norm": 1.0552162690693334, "learning_rate": 1.987567693468911e-05, "loss": 0.695, "step": 3290 }, { "epoch": 0.059930262966729625, "grad_norm": 1.1128620036633339, "learning_rate": 1.9874769381523236e-05, "loss": 0.7078, "step": 3300 }, { "epoch": 0.060111869824204564, "grad_norm": 1.0931185778221595, "learning_rate": 1.987385854871469e-05, "loss": 0.6934, "step": 3310 }, { "epoch": 0.0602934766816795, "grad_norm": 1.1135688611312746, "learning_rate": 1.9872944436565976e-05, "loss": 0.7045, "step": 3320 }, { "epoch": 0.060475083539154435, "grad_norm": 1.0396265590648215, "learning_rate": 1.9872027045380694e-05, "loss": 0.698, "step": 3330 }, { "epoch": 0.060656690396629374, "grad_norm": 1.0851632183333382, "learning_rate": 1.987110637546353e-05, "loss": 0.6876, "step": 3340 }, { "epoch": 0.060838297254104313, "grad_norm": 1.0382474677551456, "learning_rate": 1.9870182427120262e-05, "loss": 0.7013, "step": 3350 }, { "epoch": 0.06101990411157925, "grad_norm": 1.036629507813956, "learning_rate": 1.986925520065775e-05, "loss": 0.7046, "step": 3360 }, { "epoch": 0.06120151096905419, "grad_norm": 1.192772535314856, "learning_rate": 1.9868324696383956e-05, "loss": 0.7066, "step": 3370 }, { "epoch": 0.06138311782652913, "grad_norm": 1.0011406535413274, "learning_rate": 1.9867390914607916e-05, "loss": 0.6855, "step": 3380 }, { "epoch": 0.06156472468400407, "grad_norm": 1.0621317891511333, "learning_rate": 1.986645385563976e-05, "loss": 0.6906, "step": 3390 }, { "epoch": 0.06174633154147901, "grad_norm": 1.1493212944831228, "learning_rate": 1.9865513519790713e-05, "loss": 0.6922, "step": 3400 }, { "epoch": 0.06192793839895394, "grad_norm": 1.0694415557314771, "learning_rate": 1.9864569907373075e-05, "loss": 0.6945, "step": 3410 }, { "epoch": 0.06210954525642888, "grad_norm": 1.0735121030013248, "learning_rate": 1.986362301870025e-05, "loss": 0.7008, "step": 3420 }, { "epoch": 0.06229115211390382, "grad_norm": 1.1673314754890902, "learning_rate": 1.9862672854086718e-05, "loss": 0.6875, "step": 3430 }, { "epoch": 0.06247275897137876, "grad_norm": 1.0266360703127724, "learning_rate": 1.9861719413848052e-05, "loss": 0.6827, "step": 3440 }, { "epoch": 0.0626543658288537, "grad_norm": 1.079184658684054, "learning_rate": 1.9860762698300908e-05, "loss": 0.6781, "step": 3450 }, { "epoch": 0.06283597268632864, "grad_norm": 1.047054679090549, "learning_rate": 1.985980270776304e-05, "loss": 0.6872, "step": 3460 }, { "epoch": 0.06301757954380358, "grad_norm": 1.011274492310757, "learning_rate": 1.985883944255328e-05, "loss": 0.69, "step": 3470 }, { "epoch": 0.06319918640127851, "grad_norm": 1.0412156799968348, "learning_rate": 1.9857872902991547e-05, "loss": 0.6868, "step": 3480 }, { "epoch": 0.06338079325875345, "grad_norm": 1.0617097968549174, "learning_rate": 1.9856903089398857e-05, "loss": 0.6893, "step": 3490 }, { "epoch": 0.06356240011622839, "grad_norm": 1.09118557407771, "learning_rate": 1.9855930002097307e-05, "loss": 0.6841, "step": 3500 }, { "epoch": 0.06374400697370333, "grad_norm": 1.0879170705738375, "learning_rate": 1.9854953641410082e-05, "loss": 0.699, "step": 3510 }, { "epoch": 0.06392561383117827, "grad_norm": 1.0849300847789103, "learning_rate": 1.9853974007661453e-05, "loss": 0.6918, "step": 3520 }, { "epoch": 0.06410722068865321, "grad_norm": 1.0590652716384985, "learning_rate": 1.985299110117678e-05, "loss": 0.6909, "step": 3530 }, { "epoch": 0.06428882754612814, "grad_norm": 1.0786236508978573, "learning_rate": 1.985200492228251e-05, "loss": 0.6853, "step": 3540 }, { "epoch": 0.06447043440360307, "grad_norm": 1.0899236590464954, "learning_rate": 1.9851015471306175e-05, "loss": 0.6959, "step": 3550 }, { "epoch": 0.06465204126107801, "grad_norm": 1.0900990825352355, "learning_rate": 1.9850022748576397e-05, "loss": 0.698, "step": 3560 }, { "epoch": 0.06483364811855295, "grad_norm": 1.0554273784983748, "learning_rate": 1.984902675442288e-05, "loss": 0.6999, "step": 3570 }, { "epoch": 0.06501525497602789, "grad_norm": 1.0456429865031893, "learning_rate": 1.984802748917642e-05, "loss": 0.695, "step": 3580 }, { "epoch": 0.06519686183350283, "grad_norm": 1.0808991909854073, "learning_rate": 1.9847024953168893e-05, "loss": 0.697, "step": 3590 }, { "epoch": 0.06537846869097777, "grad_norm": 1.0961602756786972, "learning_rate": 1.9846019146733267e-05, "loss": 0.7046, "step": 3600 }, { "epoch": 0.06556007554845271, "grad_norm": 1.0837672111010397, "learning_rate": 1.9845010070203593e-05, "loss": 0.7004, "step": 3610 }, { "epoch": 0.06574168240592765, "grad_norm": 1.015514424977221, "learning_rate": 1.984399772391501e-05, "loss": 0.6916, "step": 3620 }, { "epoch": 0.06592328926340259, "grad_norm": 1.0913409935675717, "learning_rate": 1.9842982108203736e-05, "loss": 0.6875, "step": 3630 }, { "epoch": 0.06610489612087753, "grad_norm": 1.0373472166011892, "learning_rate": 1.984196322340709e-05, "loss": 0.6941, "step": 3640 }, { "epoch": 0.06628650297835247, "grad_norm": 1.0432735100248738, "learning_rate": 1.9840941069863464e-05, "loss": 0.6945, "step": 3650 }, { "epoch": 0.0664681098358274, "grad_norm": 1.016223780024091, "learning_rate": 1.983991564791234e-05, "loss": 0.6776, "step": 3660 }, { "epoch": 0.06664971669330234, "grad_norm": 1.0407293140672123, "learning_rate": 1.983888695789428e-05, "loss": 0.6824, "step": 3670 }, { "epoch": 0.06683132355077728, "grad_norm": 3.0767986689492424, "learning_rate": 1.983785500015094e-05, "loss": 0.6906, "step": 3680 }, { "epoch": 0.06701293040825222, "grad_norm": 1.0791815202291548, "learning_rate": 1.9836819775025062e-05, "loss": 0.6746, "step": 3690 }, { "epoch": 0.06719453726572715, "grad_norm": 1.1079684797439437, "learning_rate": 1.9835781282860455e-05, "loss": 0.7055, "step": 3700 }, { "epoch": 0.06737614412320209, "grad_norm": 1.0755390658111812, "learning_rate": 1.983473952400204e-05, "loss": 0.6877, "step": 3710 }, { "epoch": 0.06755775098067703, "grad_norm": 1.0232201654653486, "learning_rate": 1.9833694498795805e-05, "loss": 0.6943, "step": 3720 }, { "epoch": 0.06773935783815196, "grad_norm": 1.0701076986918894, "learning_rate": 1.9832646207588825e-05, "loss": 0.6874, "step": 3730 }, { "epoch": 0.0679209646956269, "grad_norm": 1.077669217012869, "learning_rate": 1.9831594650729266e-05, "loss": 0.6897, "step": 3740 }, { "epoch": 0.06810257155310184, "grad_norm": 1.0683586513353842, "learning_rate": 1.983053982856637e-05, "loss": 0.68, "step": 3750 }, { "epoch": 0.06828417841057678, "grad_norm": 1.021935778022038, "learning_rate": 1.9829481741450475e-05, "loss": 0.688, "step": 3760 }, { "epoch": 0.06846578526805172, "grad_norm": 1.0245248575187533, "learning_rate": 1.982842038973299e-05, "loss": 0.6976, "step": 3770 }, { "epoch": 0.06864739212552666, "grad_norm": 1.0445998524101814, "learning_rate": 1.9827355773766416e-05, "loss": 0.6963, "step": 3780 }, { "epoch": 0.0688289989830016, "grad_norm": 0.994574164416656, "learning_rate": 1.982628789390434e-05, "loss": 0.6872, "step": 3790 }, { "epoch": 0.06901060584047654, "grad_norm": 1.0706178181649688, "learning_rate": 1.9825216750501432e-05, "loss": 0.6913, "step": 3800 }, { "epoch": 0.06919221269795148, "grad_norm": 1.0497389922049876, "learning_rate": 1.9824142343913436e-05, "loss": 0.6975, "step": 3810 }, { "epoch": 0.06937381955542642, "grad_norm": 1.0717193247814871, "learning_rate": 1.9823064674497193e-05, "loss": 0.6923, "step": 3820 }, { "epoch": 0.06955542641290136, "grad_norm": 0.9996659969783792, "learning_rate": 1.982198374261062e-05, "loss": 0.6919, "step": 3830 }, { "epoch": 0.0697370332703763, "grad_norm": 1.0760587168202158, "learning_rate": 1.9820899548612722e-05, "loss": 0.6873, "step": 3840 }, { "epoch": 0.06991864012785123, "grad_norm": 1.0540598990819239, "learning_rate": 1.9819812092863585e-05, "loss": 0.6744, "step": 3850 }, { "epoch": 0.07010024698532617, "grad_norm": 1.0650119340005642, "learning_rate": 1.9818721375724376e-05, "loss": 0.6912, "step": 3860 }, { "epoch": 0.0702818538428011, "grad_norm": 1.0084190390798249, "learning_rate": 1.9817627397557352e-05, "loss": 0.6816, "step": 3870 }, { "epoch": 0.07046346070027604, "grad_norm": 1.0555127864061609, "learning_rate": 1.9816530158725844e-05, "loss": 0.68, "step": 3880 }, { "epoch": 0.07064506755775098, "grad_norm": 0.9936826653544755, "learning_rate": 1.9815429659594276e-05, "loss": 0.6972, "step": 3890 }, { "epoch": 0.07082667441522592, "grad_norm": 1.0590005018146655, "learning_rate": 1.9814325900528146e-05, "loss": 0.6791, "step": 3900 }, { "epoch": 0.07100828127270085, "grad_norm": 1.0710863532163486, "learning_rate": 1.981321888189404e-05, "loss": 0.6928, "step": 3910 }, { "epoch": 0.0711898881301758, "grad_norm": 1.0466736198214859, "learning_rate": 1.981210860405962e-05, "loss": 0.6911, "step": 3920 }, { "epoch": 0.07137149498765073, "grad_norm": 1.0403768048965076, "learning_rate": 1.981099506739364e-05, "loss": 0.6846, "step": 3930 }, { "epoch": 0.07155310184512567, "grad_norm": 1.0459689851686527, "learning_rate": 1.9809878272265935e-05, "loss": 0.6925, "step": 3940 }, { "epoch": 0.07173470870260061, "grad_norm": 1.0262794313120003, "learning_rate": 1.980875821904741e-05, "loss": 0.6901, "step": 3950 }, { "epoch": 0.07191631556007555, "grad_norm": 0.9923361250568221, "learning_rate": 1.980763490811007e-05, "loss": 0.6832, "step": 3960 }, { "epoch": 0.07209792241755049, "grad_norm": 1.0530960946555337, "learning_rate": 1.9806508339826985e-05, "loss": 0.6931, "step": 3970 }, { "epoch": 0.07227952927502543, "grad_norm": 1.0628298885692398, "learning_rate": 1.980537851457232e-05, "loss": 0.7011, "step": 3980 }, { "epoch": 0.07246113613250037, "grad_norm": 1.0273075095899376, "learning_rate": 1.9804245432721315e-05, "loss": 0.6916, "step": 3990 }, { "epoch": 0.0726427429899753, "grad_norm": 1.048379546966258, "learning_rate": 1.9803109094650295e-05, "loss": 0.6997, "step": 4000 }, { "epoch": 0.07282434984745025, "grad_norm": 1.0671916211375194, "learning_rate": 1.9801969500736662e-05, "loss": 0.6781, "step": 4010 }, { "epoch": 0.07300595670492518, "grad_norm": 1.0246652076100535, "learning_rate": 1.9800826651358904e-05, "loss": 0.6786, "step": 4020 }, { "epoch": 0.07318756356240011, "grad_norm": 1.003265770675486, "learning_rate": 1.9799680546896582e-05, "loss": 0.6962, "step": 4030 }, { "epoch": 0.07336917041987505, "grad_norm": 1.06930062983289, "learning_rate": 1.979853118773035e-05, "loss": 0.6756, "step": 4040 }, { "epoch": 0.07355077727734999, "grad_norm": 1.0486113446376542, "learning_rate": 1.9797378574241935e-05, "loss": 0.6803, "step": 4050 }, { "epoch": 0.07373238413482493, "grad_norm": 1.1626621595645588, "learning_rate": 1.9796222706814153e-05, "loss": 0.6977, "step": 4060 }, { "epoch": 0.07391399099229987, "grad_norm": 1.0962160771035085, "learning_rate": 1.9795063585830885e-05, "loss": 0.6872, "step": 4070 }, { "epoch": 0.0740955978497748, "grad_norm": 1.0154130010186817, "learning_rate": 1.979390121167711e-05, "loss": 0.6895, "step": 4080 }, { "epoch": 0.07427720470724974, "grad_norm": 1.1017928200506983, "learning_rate": 1.9792735584738875e-05, "loss": 0.7009, "step": 4090 }, { "epoch": 0.07445881156472468, "grad_norm": 1.0444353823506936, "learning_rate": 1.9791566705403315e-05, "loss": 0.6789, "step": 4100 }, { "epoch": 0.07464041842219962, "grad_norm": 1.0518724902419516, "learning_rate": 1.9790394574058636e-05, "loss": 0.6785, "step": 4110 }, { "epoch": 0.07482202527967456, "grad_norm": 1.0504518790516657, "learning_rate": 1.9789219191094142e-05, "loss": 0.6803, "step": 4120 }, { "epoch": 0.0750036321371495, "grad_norm": 1.0101227997086095, "learning_rate": 1.9788040556900196e-05, "loss": 0.7003, "step": 4130 }, { "epoch": 0.07518523899462444, "grad_norm": 1.0272978842636722, "learning_rate": 1.9786858671868252e-05, "loss": 0.6853, "step": 4140 }, { "epoch": 0.07536684585209938, "grad_norm": 1.025813213064945, "learning_rate": 1.9785673536390843e-05, "loss": 0.6714, "step": 4150 }, { "epoch": 0.07554845270957432, "grad_norm": 1.0205076141487104, "learning_rate": 1.9784485150861578e-05, "loss": 0.7039, "step": 4160 }, { "epoch": 0.07573005956704926, "grad_norm": 1.0508053258337628, "learning_rate": 1.978329351567515e-05, "loss": 0.6947, "step": 4170 }, { "epoch": 0.0759116664245242, "grad_norm": 1.028248329622418, "learning_rate": 1.978209863122733e-05, "loss": 0.6862, "step": 4180 }, { "epoch": 0.07609327328199912, "grad_norm": 1.0107877634808573, "learning_rate": 1.9780900497914964e-05, "loss": 0.6874, "step": 4190 }, { "epoch": 0.07627488013947406, "grad_norm": 1.070769117069988, "learning_rate": 1.9779699116135983e-05, "loss": 0.7086, "step": 4200 }, { "epoch": 0.076456486996949, "grad_norm": 0.9898276587660672, "learning_rate": 1.977849448628939e-05, "loss": 0.6846, "step": 4210 }, { "epoch": 0.07663809385442394, "grad_norm": 1.048515619325957, "learning_rate": 1.9777286608775277e-05, "loss": 0.6845, "step": 4220 }, { "epoch": 0.07681970071189888, "grad_norm": 1.0008083632307054, "learning_rate": 1.9776075483994806e-05, "loss": 0.6989, "step": 4230 }, { "epoch": 0.07700130756937382, "grad_norm": 1.0051893921327542, "learning_rate": 1.9774861112350216e-05, "loss": 0.6856, "step": 4240 }, { "epoch": 0.07718291442684876, "grad_norm": 1.0797575140581368, "learning_rate": 1.9773643494244833e-05, "loss": 0.7032, "step": 4250 }, { "epoch": 0.0773645212843237, "grad_norm": 0.9942112015866905, "learning_rate": 1.9772422630083057e-05, "loss": 0.6965, "step": 4260 }, { "epoch": 0.07754612814179863, "grad_norm": 0.97990701177344, "learning_rate": 1.9771198520270363e-05, "loss": 0.6817, "step": 4270 }, { "epoch": 0.07772773499927357, "grad_norm": 1.0759459630248687, "learning_rate": 1.9769971165213305e-05, "loss": 0.6913, "step": 4280 }, { "epoch": 0.07790934185674851, "grad_norm": 1.0622207937232393, "learning_rate": 1.976874056531952e-05, "loss": 0.6998, "step": 4290 }, { "epoch": 0.07809094871422345, "grad_norm": 1.064081815339844, "learning_rate": 1.9767506720997717e-05, "loss": 0.6855, "step": 4300 }, { "epoch": 0.07827255557169839, "grad_norm": 1.033774569221807, "learning_rate": 1.9766269632657686e-05, "loss": 0.6721, "step": 4310 }, { "epoch": 0.07845416242917333, "grad_norm": 1.2704428630380995, "learning_rate": 1.9765029300710294e-05, "loss": 0.6891, "step": 4320 }, { "epoch": 0.07863576928664827, "grad_norm": 1.020073177130914, "learning_rate": 1.9763785725567482e-05, "loss": 0.6874, "step": 4330 }, { "epoch": 0.07881737614412321, "grad_norm": 1.1105352473679793, "learning_rate": 1.976253890764227e-05, "loss": 0.6706, "step": 4340 }, { "epoch": 0.07899898300159815, "grad_norm": 1.061504512874691, "learning_rate": 1.976128884734876e-05, "loss": 0.7028, "step": 4350 }, { "epoch": 0.07918058985907307, "grad_norm": 1.0482442796999107, "learning_rate": 1.976003554510212e-05, "loss": 0.684, "step": 4360 }, { "epoch": 0.07936219671654801, "grad_norm": 1.030020111124085, "learning_rate": 1.9758779001318604e-05, "loss": 0.6936, "step": 4370 }, { "epoch": 0.07954380357402295, "grad_norm": 1.0333998725523472, "learning_rate": 1.9757519216415543e-05, "loss": 0.6872, "step": 4380 }, { "epoch": 0.07972541043149789, "grad_norm": 1.0790993029219327, "learning_rate": 1.9756256190811334e-05, "loss": 0.6898, "step": 4390 }, { "epoch": 0.07990701728897283, "grad_norm": 1.0333527528098783, "learning_rate": 1.975498992492547e-05, "loss": 0.672, "step": 4400 }, { "epoch": 0.08008862414644777, "grad_norm": 1.027079705677428, "learning_rate": 1.975372041917849e-05, "loss": 0.6982, "step": 4410 }, { "epoch": 0.0802702310039227, "grad_norm": 1.0624595780419142, "learning_rate": 1.975244767399204e-05, "loss": 0.6919, "step": 4420 }, { "epoch": 0.08045183786139765, "grad_norm": 1.0493090649046495, "learning_rate": 1.9751171689788825e-05, "loss": 0.6967, "step": 4430 }, { "epoch": 0.08063344471887259, "grad_norm": 1.0307987920507873, "learning_rate": 1.974989246699263e-05, "loss": 0.6989, "step": 4440 }, { "epoch": 0.08081505157634752, "grad_norm": 1.0206365264600143, "learning_rate": 1.974861000602831e-05, "loss": 0.6952, "step": 4450 }, { "epoch": 0.08099665843382246, "grad_norm": 0.9822118348391959, "learning_rate": 1.9747324307321804e-05, "loss": 0.6946, "step": 4460 }, { "epoch": 0.0811782652912974, "grad_norm": 1.1085885656732255, "learning_rate": 1.9746035371300124e-05, "loss": 0.6767, "step": 4470 }, { "epoch": 0.08135987214877234, "grad_norm": 1.0297900278320204, "learning_rate": 1.9744743198391357e-05, "loss": 0.6832, "step": 4480 }, { "epoch": 0.08154147900624728, "grad_norm": 1.0730351765906343, "learning_rate": 1.9743447789024658e-05, "loss": 0.7099, "step": 4490 }, { "epoch": 0.08172308586372222, "grad_norm": 1.0318890767323554, "learning_rate": 1.9742149143630268e-05, "loss": 0.7006, "step": 4500 }, { "epoch": 0.08190469272119716, "grad_norm": 1.0332861599088785, "learning_rate": 1.9740847262639497e-05, "loss": 0.6856, "step": 4510 }, { "epoch": 0.08208629957867208, "grad_norm": 0.9913475631391987, "learning_rate": 1.9739542146484727e-05, "loss": 0.6977, "step": 4520 }, { "epoch": 0.08226790643614702, "grad_norm": 1.0405782463724174, "learning_rate": 1.9738233795599423e-05, "loss": 0.673, "step": 4530 }, { "epoch": 0.08244951329362196, "grad_norm": 1.0184109300071793, "learning_rate": 1.9736922210418113e-05, "loss": 0.6898, "step": 4540 }, { "epoch": 0.0826311201510969, "grad_norm": 0.9837316340392186, "learning_rate": 1.973560739137641e-05, "loss": 0.6933, "step": 4550 }, { "epoch": 0.08281272700857184, "grad_norm": 1.0245623768699463, "learning_rate": 1.9734289338910996e-05, "loss": 0.6727, "step": 4560 }, { "epoch": 0.08299433386604678, "grad_norm": 1.0531553170390409, "learning_rate": 1.9732968053459622e-05, "loss": 0.6913, "step": 4570 }, { "epoch": 0.08317594072352172, "grad_norm": 0.9738884577655609, "learning_rate": 1.9731643535461124e-05, "loss": 0.6825, "step": 4580 }, { "epoch": 0.08335754758099666, "grad_norm": 1.0134683448069604, "learning_rate": 1.9730315785355404e-05, "loss": 0.7055, "step": 4590 }, { "epoch": 0.0835391544384716, "grad_norm": 1.019333104974168, "learning_rate": 1.9728984803583437e-05, "loss": 0.6885, "step": 4600 }, { "epoch": 0.08372076129594654, "grad_norm": 1.049989982533376, "learning_rate": 1.9727650590587272e-05, "loss": 0.6956, "step": 4610 }, { "epoch": 0.08390236815342147, "grad_norm": 1.0392003250026018, "learning_rate": 1.9726313146810037e-05, "loss": 0.6921, "step": 4620 }, { "epoch": 0.08408397501089641, "grad_norm": 1.009379322408453, "learning_rate": 1.9724972472695927e-05, "loss": 0.6825, "step": 4630 }, { "epoch": 0.08426558186837135, "grad_norm": 1.0060321625866655, "learning_rate": 1.9723628568690208e-05, "loss": 0.6868, "step": 4640 }, { "epoch": 0.08444718872584629, "grad_norm": 1.0356247228257456, "learning_rate": 1.9722281435239227e-05, "loss": 0.6843, "step": 4650 }, { "epoch": 0.08462879558332123, "grad_norm": 1.0324439305852506, "learning_rate": 1.9720931072790397e-05, "loss": 0.6719, "step": 4660 }, { "epoch": 0.08481040244079617, "grad_norm": 0.9927096391895635, "learning_rate": 1.9719577481792202e-05, "loss": 0.6876, "step": 4670 }, { "epoch": 0.08499200929827111, "grad_norm": 0.986759194239674, "learning_rate": 1.971822066269421e-05, "loss": 0.7003, "step": 4680 }, { "epoch": 0.08517361615574603, "grad_norm": 1.0296543073039681, "learning_rate": 1.9716860615947036e-05, "loss": 0.6868, "step": 4690 }, { "epoch": 0.08535522301322097, "grad_norm": 1.0546768765345567, "learning_rate": 1.97154973420024e-05, "loss": 0.681, "step": 4700 }, { "epoch": 0.08553682987069591, "grad_norm": 1.0492071866324542, "learning_rate": 1.971413084131307e-05, "loss": 0.6797, "step": 4710 }, { "epoch": 0.08571843672817085, "grad_norm": 1.0500817546723966, "learning_rate": 1.9712761114332896e-05, "loss": 0.6978, "step": 4720 }, { "epoch": 0.08590004358564579, "grad_norm": 1.0248307177610507, "learning_rate": 1.9711388161516792e-05, "loss": 0.6951, "step": 4730 }, { "epoch": 0.08608165044312073, "grad_norm": 0.9955962679733306, "learning_rate": 1.9710011983320748e-05, "loss": 0.6651, "step": 4740 }, { "epoch": 0.08626325730059567, "grad_norm": 1.0182815976313808, "learning_rate": 1.9708632580201828e-05, "loss": 0.6759, "step": 4750 }, { "epoch": 0.08644486415807061, "grad_norm": 1.0484971037219264, "learning_rate": 1.970724995261817e-05, "loss": 0.6987, "step": 4760 }, { "epoch": 0.08662647101554555, "grad_norm": 0.9891836148773081, "learning_rate": 1.9705864101028962e-05, "loss": 0.6878, "step": 4770 }, { "epoch": 0.08680807787302049, "grad_norm": 1.098182945923468, "learning_rate": 1.970447502589449e-05, "loss": 0.6926, "step": 4780 }, { "epoch": 0.08698968473049543, "grad_norm": 1.012714484684794, "learning_rate": 1.9703082727676097e-05, "loss": 0.6744, "step": 4790 }, { "epoch": 0.08717129158797036, "grad_norm": 0.9884723693117493, "learning_rate": 1.9701687206836192e-05, "loss": 0.684, "step": 4800 }, { "epoch": 0.0873528984454453, "grad_norm": 0.9993686904010239, "learning_rate": 1.9700288463838263e-05, "loss": 0.6774, "step": 4810 }, { "epoch": 0.08753450530292024, "grad_norm": 1.0479640337116134, "learning_rate": 1.9698886499146874e-05, "loss": 0.6772, "step": 4820 }, { "epoch": 0.08771611216039518, "grad_norm": 1.0749114740468597, "learning_rate": 1.9697481313227634e-05, "loss": 0.7015, "step": 4830 }, { "epoch": 0.08789771901787012, "grad_norm": 0.9686249467560644, "learning_rate": 1.969607290654725e-05, "loss": 0.6625, "step": 4840 }, { "epoch": 0.08807932587534505, "grad_norm": 1.0008180561418378, "learning_rate": 1.9694661279573487e-05, "loss": 0.6919, "step": 4850 }, { "epoch": 0.08826093273281999, "grad_norm": 1.032052235106613, "learning_rate": 1.969324643277517e-05, "loss": 0.6651, "step": 4860 }, { "epoch": 0.08844253959029492, "grad_norm": 1.014224104468032, "learning_rate": 1.9691828366622216e-05, "loss": 0.6682, "step": 4870 }, { "epoch": 0.08862414644776986, "grad_norm": 0.9789206181355945, "learning_rate": 1.9690407081585586e-05, "loss": 0.6871, "step": 4880 }, { "epoch": 0.0888057533052448, "grad_norm": 1.0492168096695358, "learning_rate": 1.968898257813733e-05, "loss": 0.6907, "step": 4890 }, { "epoch": 0.08898736016271974, "grad_norm": 0.9928360369665252, "learning_rate": 1.968755485675055e-05, "loss": 0.675, "step": 4900 }, { "epoch": 0.08916896702019468, "grad_norm": 0.9846873595194777, "learning_rate": 1.968612391789944e-05, "loss": 0.6792, "step": 4910 }, { "epoch": 0.08935057387766962, "grad_norm": 1.0419750827497143, "learning_rate": 1.968468976205924e-05, "loss": 0.674, "step": 4920 }, { "epoch": 0.08953218073514456, "grad_norm": 1.034695606468906, "learning_rate": 1.9683252389706263e-05, "loss": 0.6812, "step": 4930 }, { "epoch": 0.0897137875926195, "grad_norm": 0.9792002258663296, "learning_rate": 1.96818118013179e-05, "loss": 0.6779, "step": 4940 }, { "epoch": 0.08989539445009444, "grad_norm": 1.0177913601884427, "learning_rate": 1.9680367997372603e-05, "loss": 0.681, "step": 4950 }, { "epoch": 0.09007700130756938, "grad_norm": 1.028503724663756, "learning_rate": 1.9678920978349895e-05, "loss": 0.686, "step": 4960 }, { "epoch": 0.09025860816504432, "grad_norm": 1.012816069935854, "learning_rate": 1.967747074473036e-05, "loss": 0.688, "step": 4970 }, { "epoch": 0.09044021502251925, "grad_norm": 0.950776677746086, "learning_rate": 1.967601729699566e-05, "loss": 0.6677, "step": 4980 }, { "epoch": 0.0906218218799942, "grad_norm": 0.9928423979746249, "learning_rate": 1.9674560635628513e-05, "loss": 0.6896, "step": 4990 }, { "epoch": 0.09080342873746913, "grad_norm": 1.0143197938674215, "learning_rate": 1.9673100761112717e-05, "loss": 0.6961, "step": 5000 }, { "epoch": 0.09098503559494407, "grad_norm": 1.035970201664861, "learning_rate": 1.9671637673933122e-05, "loss": 0.6779, "step": 5010 }, { "epoch": 0.091166642452419, "grad_norm": 1.022627842777633, "learning_rate": 1.9670171374575666e-05, "loss": 0.6935, "step": 5020 }, { "epoch": 0.09134824930989394, "grad_norm": 1.0153940980194034, "learning_rate": 1.966870186352733e-05, "loss": 0.6708, "step": 5030 }, { "epoch": 0.09152985616736888, "grad_norm": 0.9622554710671188, "learning_rate": 1.966722914127618e-05, "loss": 0.6848, "step": 5040 }, { "epoch": 0.09171146302484381, "grad_norm": 1.0904096070691136, "learning_rate": 1.966575320831134e-05, "loss": 0.6927, "step": 5050 }, { "epoch": 0.09189306988231875, "grad_norm": 0.9654089606466388, "learning_rate": 1.9664274065123e-05, "loss": 0.6797, "step": 5060 }, { "epoch": 0.09207467673979369, "grad_norm": 1.0265488961996272, "learning_rate": 1.966279171220242e-05, "loss": 0.6912, "step": 5070 }, { "epoch": 0.09225628359726863, "grad_norm": 1.0173233502357775, "learning_rate": 1.966130615004192e-05, "loss": 0.6804, "step": 5080 }, { "epoch": 0.09243789045474357, "grad_norm": 1.011519310127714, "learning_rate": 1.96598173791349e-05, "loss": 0.687, "step": 5090 }, { "epoch": 0.09261949731221851, "grad_norm": 1.0410563993194057, "learning_rate": 1.9658325399975805e-05, "loss": 0.6903, "step": 5100 }, { "epoch": 0.09280110416969345, "grad_norm": 0.9864148781394764, "learning_rate": 1.965683021306016e-05, "loss": 0.6892, "step": 5110 }, { "epoch": 0.09298271102716839, "grad_norm": 0.9713213425711966, "learning_rate": 1.9655331818884554e-05, "loss": 0.6826, "step": 5120 }, { "epoch": 0.09316431788464333, "grad_norm": 0.9812478434990345, "learning_rate": 1.9653830217946636e-05, "loss": 0.6823, "step": 5130 }, { "epoch": 0.09334592474211827, "grad_norm": 1.0464491803159974, "learning_rate": 1.9652325410745124e-05, "loss": 0.6872, "step": 5140 }, { "epoch": 0.0935275315995932, "grad_norm": 0.9835994692702739, "learning_rate": 1.9650817397779806e-05, "loss": 0.6846, "step": 5150 }, { "epoch": 0.09370913845706814, "grad_norm": 0.9668781909914851, "learning_rate": 1.9649306179551515e-05, "loss": 0.6918, "step": 5160 }, { "epoch": 0.09389074531454308, "grad_norm": 0.9723408031839981, "learning_rate": 1.9647791756562172e-05, "loss": 0.6876, "step": 5170 }, { "epoch": 0.09407235217201801, "grad_norm": 1.0032686351091826, "learning_rate": 1.9646274129314752e-05, "loss": 0.6806, "step": 5180 }, { "epoch": 0.09425395902949295, "grad_norm": 0.9958225630742548, "learning_rate": 1.9644753298313288e-05, "loss": 0.6706, "step": 5190 }, { "epoch": 0.09443556588696789, "grad_norm": 1.0278502202945794, "learning_rate": 1.9643229264062892e-05, "loss": 0.6728, "step": 5200 }, { "epoch": 0.09461717274444283, "grad_norm": 0.9705973341636792, "learning_rate": 1.9641702027069727e-05, "loss": 0.6713, "step": 5210 }, { "epoch": 0.09479877960191777, "grad_norm": 1.0281595333893083, "learning_rate": 1.9640171587841027e-05, "loss": 0.6949, "step": 5220 }, { "epoch": 0.0949803864593927, "grad_norm": 1.0072733916234213, "learning_rate": 1.963863794688508e-05, "loss": 0.6816, "step": 5230 }, { "epoch": 0.09516199331686764, "grad_norm": 1.0383737340385428, "learning_rate": 1.963710110471125e-05, "loss": 0.6842, "step": 5240 }, { "epoch": 0.09534360017434258, "grad_norm": 1.1194245984392244, "learning_rate": 1.9635561061829958e-05, "loss": 0.6736, "step": 5250 }, { "epoch": 0.09552520703181752, "grad_norm": 0.9569288417966133, "learning_rate": 1.963401781875269e-05, "loss": 0.6778, "step": 5260 }, { "epoch": 0.09570681388929246, "grad_norm": 1.0264133365144843, "learning_rate": 1.9632471375991992e-05, "loss": 0.6929, "step": 5270 }, { "epoch": 0.0958884207467674, "grad_norm": 0.9654879024034014, "learning_rate": 1.963092173406147e-05, "loss": 0.6887, "step": 5280 }, { "epoch": 0.09607002760424234, "grad_norm": 1.0330834795177908, "learning_rate": 1.96293688934758e-05, "loss": 0.6802, "step": 5290 }, { "epoch": 0.09625163446171728, "grad_norm": 1.033255728671044, "learning_rate": 1.9627812854750713e-05, "loss": 0.6834, "step": 5300 }, { "epoch": 0.09643324131919222, "grad_norm": 0.9804098824061368, "learning_rate": 1.9626253618403014e-05, "loss": 0.685, "step": 5310 }, { "epoch": 0.09661484817666716, "grad_norm": 1.026836615451246, "learning_rate": 1.9624691184950553e-05, "loss": 0.6733, "step": 5320 }, { "epoch": 0.0967964550341421, "grad_norm": 1.0029722947072592, "learning_rate": 1.962312555491226e-05, "loss": 0.6711, "step": 5330 }, { "epoch": 0.09697806189161702, "grad_norm": 0.9970386343917251, "learning_rate": 1.962155672880811e-05, "loss": 0.6807, "step": 5340 }, { "epoch": 0.09715966874909196, "grad_norm": 0.9918460743030859, "learning_rate": 1.961998470715915e-05, "loss": 0.683, "step": 5350 }, { "epoch": 0.0973412756065669, "grad_norm": 1.0099332444757323, "learning_rate": 1.961840949048748e-05, "loss": 0.6764, "step": 5360 }, { "epoch": 0.09752288246404184, "grad_norm": 0.9721953940425762, "learning_rate": 1.9616831079316273e-05, "loss": 0.6894, "step": 5370 }, { "epoch": 0.09770448932151678, "grad_norm": 0.9680292131275711, "learning_rate": 1.9615249474169758e-05, "loss": 0.6845, "step": 5380 }, { "epoch": 0.09788609617899172, "grad_norm": 0.9213318259643737, "learning_rate": 1.9613664675573217e-05, "loss": 0.6877, "step": 5390 }, { "epoch": 0.09806770303646666, "grad_norm": 0.9585792003941472, "learning_rate": 1.9612076684053003e-05, "loss": 0.677, "step": 5400 }, { "epoch": 0.0982493098939416, "grad_norm": 1.0242580170954496, "learning_rate": 1.9610485500136523e-05, "loss": 0.6817, "step": 5410 }, { "epoch": 0.09843091675141653, "grad_norm": 1.0006587065896633, "learning_rate": 1.9608891124352246e-05, "loss": 0.6841, "step": 5420 }, { "epoch": 0.09861252360889147, "grad_norm": 0.9841869585322501, "learning_rate": 1.9607293557229707e-05, "loss": 0.681, "step": 5430 }, { "epoch": 0.09879413046636641, "grad_norm": 0.9040185375629803, "learning_rate": 1.960569279929949e-05, "loss": 0.677, "step": 5440 }, { "epoch": 0.09897573732384135, "grad_norm": 0.9975063452811581, "learning_rate": 1.960408885109324e-05, "loss": 0.6862, "step": 5450 }, { "epoch": 0.09915734418131629, "grad_norm": 0.9922792350399808, "learning_rate": 1.9602481713143678e-05, "loss": 0.682, "step": 5460 }, { "epoch": 0.09933895103879123, "grad_norm": 1.0304987389015763, "learning_rate": 1.960087138598456e-05, "loss": 0.6926, "step": 5470 }, { "epoch": 0.09952055789626617, "grad_norm": 1.0130497662060916, "learning_rate": 1.9599257870150726e-05, "loss": 0.6835, "step": 5480 }, { "epoch": 0.09970216475374111, "grad_norm": 0.9739598200523554, "learning_rate": 1.959764116617805e-05, "loss": 0.6893, "step": 5490 }, { "epoch": 0.09988377161121605, "grad_norm": 0.9886236139275231, "learning_rate": 1.9596021274603488e-05, "loss": 0.6675, "step": 5500 }, { "epoch": 0.10006537846869097, "grad_norm": 1.0063381111916454, "learning_rate": 1.9594398195965038e-05, "loss": 0.68, "step": 5510 }, { "epoch": 0.10024698532616591, "grad_norm": 1.0100858049820274, "learning_rate": 1.9592771930801762e-05, "loss": 0.6863, "step": 5520 }, { "epoch": 0.10042859218364085, "grad_norm": 0.9766285696323538, "learning_rate": 1.9591142479653783e-05, "loss": 0.6778, "step": 5530 }, { "epoch": 0.10061019904111579, "grad_norm": 0.9697756924999182, "learning_rate": 1.9589509843062274e-05, "loss": 0.6875, "step": 5540 }, { "epoch": 0.10079180589859073, "grad_norm": 0.9617891064540283, "learning_rate": 1.9587874021569485e-05, "loss": 0.6711, "step": 5550 }, { "epoch": 0.10097341275606567, "grad_norm": 0.9844971828913424, "learning_rate": 1.95862350157187e-05, "loss": 0.6771, "step": 5560 }, { "epoch": 0.1011550196135406, "grad_norm": 0.9947963035948991, "learning_rate": 1.9584592826054276e-05, "loss": 0.6843, "step": 5570 }, { "epoch": 0.10133662647101555, "grad_norm": 0.9911898633291814, "learning_rate": 1.958294745312162e-05, "loss": 0.6749, "step": 5580 }, { "epoch": 0.10151823332849048, "grad_norm": 1.1923604819621771, "learning_rate": 1.95812988974672e-05, "loss": 0.684, "step": 5590 }, { "epoch": 0.10169984018596542, "grad_norm": 0.9941670778108078, "learning_rate": 1.9579647159638538e-05, "loss": 0.6745, "step": 5600 }, { "epoch": 0.10188144704344036, "grad_norm": 0.9871325993971709, "learning_rate": 1.9577992240184218e-05, "loss": 0.6834, "step": 5610 }, { "epoch": 0.1020630539009153, "grad_norm": 1.0192375606081805, "learning_rate": 1.957633413965388e-05, "loss": 0.6963, "step": 5620 }, { "epoch": 0.10224466075839024, "grad_norm": 1.0336737764483612, "learning_rate": 1.957467285859821e-05, "loss": 0.6763, "step": 5630 }, { "epoch": 0.10242626761586518, "grad_norm": 0.9706840976496031, "learning_rate": 1.9573008397568963e-05, "loss": 0.6794, "step": 5640 }, { "epoch": 0.10260787447334012, "grad_norm": 0.9742547099733623, "learning_rate": 1.957134075711895e-05, "loss": 0.6819, "step": 5650 }, { "epoch": 0.10278948133081506, "grad_norm": 1.00924068300285, "learning_rate": 1.9569669937802026e-05, "loss": 0.6882, "step": 5660 }, { "epoch": 0.10297108818828998, "grad_norm": 0.9424793690371418, "learning_rate": 1.9567995940173113e-05, "loss": 0.6812, "step": 5670 }, { "epoch": 0.10315269504576492, "grad_norm": 0.9533957529717435, "learning_rate": 1.9566318764788185e-05, "loss": 0.6772, "step": 5680 }, { "epoch": 0.10333430190323986, "grad_norm": 0.9605857305149167, "learning_rate": 1.956463841220427e-05, "loss": 0.678, "step": 5690 }, { "epoch": 0.1035159087607148, "grad_norm": 0.9779276958657896, "learning_rate": 1.9562954882979453e-05, "loss": 0.6911, "step": 5700 }, { "epoch": 0.10369751561818974, "grad_norm": 1.005178790147599, "learning_rate": 1.9561268177672873e-05, "loss": 0.6753, "step": 5710 }, { "epoch": 0.10387912247566468, "grad_norm": 0.9496205564797482, "learning_rate": 1.9559578296844727e-05, "loss": 0.6737, "step": 5720 }, { "epoch": 0.10406072933313962, "grad_norm": 0.9641272649591275, "learning_rate": 1.9557885241056263e-05, "loss": 0.6715, "step": 5730 }, { "epoch": 0.10424233619061456, "grad_norm": 0.9531844154923612, "learning_rate": 1.9556189010869778e-05, "loss": 0.6841, "step": 5740 }, { "epoch": 0.1044239430480895, "grad_norm": 1.0321529877714708, "learning_rate": 1.9554489606848644e-05, "loss": 0.687, "step": 5750 }, { "epoch": 0.10460554990556443, "grad_norm": 0.9931931061442154, "learning_rate": 1.955278702955726e-05, "loss": 0.6857, "step": 5760 }, { "epoch": 0.10478715676303937, "grad_norm": 0.9858931564539258, "learning_rate": 1.9551081279561098e-05, "loss": 0.6858, "step": 5770 }, { "epoch": 0.10496876362051431, "grad_norm": 0.9328698046749504, "learning_rate": 1.9549372357426673e-05, "loss": 0.6847, "step": 5780 }, { "epoch": 0.10515037047798925, "grad_norm": 0.983606429781359, "learning_rate": 1.9547660263721567e-05, "loss": 0.6905, "step": 5790 }, { "epoch": 0.10533197733546419, "grad_norm": 1.0064725716986476, "learning_rate": 1.9545944999014398e-05, "loss": 0.6829, "step": 5800 }, { "epoch": 0.10551358419293913, "grad_norm": 0.9884360685579324, "learning_rate": 1.9544226563874847e-05, "loss": 0.6753, "step": 5810 }, { "epoch": 0.10569519105041407, "grad_norm": 0.9911562223346119, "learning_rate": 1.954250495887365e-05, "loss": 0.6842, "step": 5820 }, { "epoch": 0.10587679790788901, "grad_norm": 0.9821638289677145, "learning_rate": 1.954078018458259e-05, "loss": 0.6932, "step": 5830 }, { "epoch": 0.10605840476536393, "grad_norm": 1.0386627098751497, "learning_rate": 1.9539052241574504e-05, "loss": 0.6892, "step": 5840 }, { "epoch": 0.10624001162283887, "grad_norm": 0.989136772433665, "learning_rate": 1.9537321130423285e-05, "loss": 0.6694, "step": 5850 }, { "epoch": 0.10642161848031381, "grad_norm": 1.0002271404756051, "learning_rate": 1.9535586851703875e-05, "loss": 0.6705, "step": 5860 }, { "epoch": 0.10660322533778875, "grad_norm": 0.9272785042319822, "learning_rate": 1.953384940599227e-05, "loss": 0.6784, "step": 5870 }, { "epoch": 0.10678483219526369, "grad_norm": 1.0347436707267008, "learning_rate": 1.9532108793865513e-05, "loss": 0.6722, "step": 5880 }, { "epoch": 0.10696643905273863, "grad_norm": 0.9637349060414896, "learning_rate": 1.9530365015901705e-05, "loss": 0.6682, "step": 5890 }, { "epoch": 0.10714804591021357, "grad_norm": 0.973910818683368, "learning_rate": 1.952861807267999e-05, "loss": 0.6718, "step": 5900 }, { "epoch": 0.10732965276768851, "grad_norm": 1.006872218052995, "learning_rate": 1.952686796478058e-05, "loss": 0.6868, "step": 5910 }, { "epoch": 0.10751125962516345, "grad_norm": 0.9507056081864714, "learning_rate": 1.9525114692784716e-05, "loss": 0.6681, "step": 5920 }, { "epoch": 0.10769286648263839, "grad_norm": 0.9736144313069676, "learning_rate": 1.9523358257274706e-05, "loss": 0.6841, "step": 5930 }, { "epoch": 0.10787447334011332, "grad_norm": 0.9970626360377258, "learning_rate": 1.9521598658833906e-05, "loss": 0.6943, "step": 5940 }, { "epoch": 0.10805608019758826, "grad_norm": 0.9844795685819223, "learning_rate": 1.9519835898046714e-05, "loss": 0.6785, "step": 5950 }, { "epoch": 0.1082376870550632, "grad_norm": 0.9584258688966787, "learning_rate": 1.9518069975498583e-05, "loss": 0.6823, "step": 5960 }, { "epoch": 0.10841929391253814, "grad_norm": 1.002838017750606, "learning_rate": 1.9516300891776024e-05, "loss": 0.68, "step": 5970 }, { "epoch": 0.10860090077001308, "grad_norm": 0.9718874228050478, "learning_rate": 1.9514528647466592e-05, "loss": 0.6682, "step": 5980 }, { "epoch": 0.10878250762748802, "grad_norm": 0.9744990785108785, "learning_rate": 1.9512753243158882e-05, "loss": 0.6807, "step": 5990 }, { "epoch": 0.10896411448496295, "grad_norm": 1.00428210612053, "learning_rate": 1.9510974679442555e-05, "loss": 0.69, "step": 6000 }, { "epoch": 0.10914572134243788, "grad_norm": 0.9858677175961252, "learning_rate": 1.950919295690831e-05, "loss": 0.6865, "step": 6010 }, { "epoch": 0.10932732819991282, "grad_norm": 0.9813229057694569, "learning_rate": 1.95074080761479e-05, "loss": 0.6745, "step": 6020 }, { "epoch": 0.10950893505738776, "grad_norm": 0.9501919555299003, "learning_rate": 1.950562003775413e-05, "loss": 0.6874, "step": 6030 }, { "epoch": 0.1096905419148627, "grad_norm": 0.9537042031912005, "learning_rate": 1.950382884232084e-05, "loss": 0.6846, "step": 6040 }, { "epoch": 0.10987214877233764, "grad_norm": 1.0138854385292502, "learning_rate": 1.9502034490442936e-05, "loss": 0.6923, "step": 6050 }, { "epoch": 0.11005375562981258, "grad_norm": 0.9684737042556253, "learning_rate": 1.9500236982716362e-05, "loss": 0.6644, "step": 6060 }, { "epoch": 0.11023536248728752, "grad_norm": 0.988009633715568, "learning_rate": 1.9498436319738113e-05, "loss": 0.6775, "step": 6070 }, { "epoch": 0.11041696934476246, "grad_norm": 0.9528623404258506, "learning_rate": 1.949663250210623e-05, "loss": 0.6856, "step": 6080 }, { "epoch": 0.1105985762022374, "grad_norm": 1.0056642553691868, "learning_rate": 1.9494825530419804e-05, "loss": 0.6812, "step": 6090 }, { "epoch": 0.11078018305971234, "grad_norm": 0.9481920524620984, "learning_rate": 1.9493015405278976e-05, "loss": 0.6652, "step": 6100 }, { "epoch": 0.11096178991718728, "grad_norm": 0.9661592742067979, "learning_rate": 1.949120212728492e-05, "loss": 0.6726, "step": 6110 }, { "epoch": 0.11114339677466221, "grad_norm": 0.990244848712884, "learning_rate": 1.9489385697039884e-05, "loss": 0.6875, "step": 6120 }, { "epoch": 0.11132500363213715, "grad_norm": 0.968597609675184, "learning_rate": 1.9487566115147136e-05, "loss": 0.6779, "step": 6130 }, { "epoch": 0.11150661048961209, "grad_norm": 0.9436992241286043, "learning_rate": 1.9485743382211003e-05, "loss": 0.6687, "step": 6140 }, { "epoch": 0.11168821734708703, "grad_norm": 0.947429992707116, "learning_rate": 1.9483917498836864e-05, "loss": 0.6692, "step": 6150 }, { "epoch": 0.11186982420456197, "grad_norm": 0.9617695912959141, "learning_rate": 1.9482088465631133e-05, "loss": 0.6773, "step": 6160 }, { "epoch": 0.1120514310620369, "grad_norm": 0.9577667790077407, "learning_rate": 1.948025628320127e-05, "loss": 0.6862, "step": 6170 }, { "epoch": 0.11223303791951184, "grad_norm": 0.9600565533883848, "learning_rate": 1.9478420952155795e-05, "loss": 0.663, "step": 6180 }, { "epoch": 0.11241464477698677, "grad_norm": 0.9926268473522155, "learning_rate": 1.947658247310426e-05, "loss": 0.6772, "step": 6190 }, { "epoch": 0.11259625163446171, "grad_norm": 0.9836487857624935, "learning_rate": 1.9474740846657262e-05, "loss": 0.684, "step": 6200 }, { "epoch": 0.11277785849193665, "grad_norm": 0.9659448238976095, "learning_rate": 1.947289607342646e-05, "loss": 0.6816, "step": 6210 }, { "epoch": 0.11295946534941159, "grad_norm": 0.9787706730440235, "learning_rate": 1.9471048154024537e-05, "loss": 0.6841, "step": 6220 }, { "epoch": 0.11314107220688653, "grad_norm": 0.9556112329759329, "learning_rate": 1.946919708906523e-05, "loss": 0.6978, "step": 6230 }, { "epoch": 0.11332267906436147, "grad_norm": 0.9780093370747914, "learning_rate": 1.9467342879163334e-05, "loss": 0.6781, "step": 6240 }, { "epoch": 0.11350428592183641, "grad_norm": 0.934300481141388, "learning_rate": 1.9465485524934663e-05, "loss": 0.663, "step": 6250 }, { "epoch": 0.11368589277931135, "grad_norm": 0.9176558640731577, "learning_rate": 1.9463625026996088e-05, "loss": 0.6758, "step": 6260 }, { "epoch": 0.11386749963678629, "grad_norm": 0.956350493792973, "learning_rate": 1.946176138596553e-05, "loss": 0.6719, "step": 6270 }, { "epoch": 0.11404910649426123, "grad_norm": 0.9701299891295175, "learning_rate": 1.945989460246194e-05, "loss": 0.6656, "step": 6280 }, { "epoch": 0.11423071335173617, "grad_norm": 0.9952068040146557, "learning_rate": 1.9458024677105332e-05, "loss": 0.6649, "step": 6290 }, { "epoch": 0.1144123202092111, "grad_norm": 0.9850196800788763, "learning_rate": 1.9456151610516745e-05, "loss": 0.673, "step": 6300 }, { "epoch": 0.11459392706668604, "grad_norm": 0.9725141098637025, "learning_rate": 1.9454275403318264e-05, "loss": 0.6782, "step": 6310 }, { "epoch": 0.11477553392416098, "grad_norm": 0.9426137113940767, "learning_rate": 1.9452396056133032e-05, "loss": 0.6722, "step": 6320 }, { "epoch": 0.11495714078163591, "grad_norm": 1.0012694435185965, "learning_rate": 1.945051356958522e-05, "loss": 0.6666, "step": 6330 }, { "epoch": 0.11513874763911085, "grad_norm": 0.9891229673628431, "learning_rate": 1.9448627944300044e-05, "loss": 0.6844, "step": 6340 }, { "epoch": 0.11532035449658579, "grad_norm": 1.0072514568716826, "learning_rate": 1.9446739180903762e-05, "loss": 0.6684, "step": 6350 }, { "epoch": 0.11550196135406073, "grad_norm": 0.9570532953004515, "learning_rate": 1.944484728002368e-05, "loss": 0.6908, "step": 6360 }, { "epoch": 0.11568356821153566, "grad_norm": 0.974893582998029, "learning_rate": 1.9442952242288146e-05, "loss": 0.6643, "step": 6370 }, { "epoch": 0.1158651750690106, "grad_norm": 0.9725099403319607, "learning_rate": 1.9441054068326543e-05, "loss": 0.6699, "step": 6380 }, { "epoch": 0.11604678192648554, "grad_norm": 0.9629425404257206, "learning_rate": 1.94391527587693e-05, "loss": 0.6636, "step": 6390 }, { "epoch": 0.11622838878396048, "grad_norm": 0.9742361717757323, "learning_rate": 1.9437248314247884e-05, "loss": 0.6735, "step": 6400 }, { "epoch": 0.11640999564143542, "grad_norm": 0.9585892860589535, "learning_rate": 1.9435340735394807e-05, "loss": 0.6608, "step": 6410 }, { "epoch": 0.11659160249891036, "grad_norm": 1.0185151557404648, "learning_rate": 1.9433430022843623e-05, "loss": 0.6801, "step": 6420 }, { "epoch": 0.1167732093563853, "grad_norm": 0.9345848115216404, "learning_rate": 1.943151617722892e-05, "loss": 0.6849, "step": 6430 }, { "epoch": 0.11695481621386024, "grad_norm": 0.9660701092535433, "learning_rate": 1.942959919918634e-05, "loss": 0.6631, "step": 6440 }, { "epoch": 0.11713642307133518, "grad_norm": 0.9480967698180887, "learning_rate": 1.9427679089352546e-05, "loss": 0.6673, "step": 6450 }, { "epoch": 0.11731802992881012, "grad_norm": 0.9500543043835188, "learning_rate": 1.9425755848365255e-05, "loss": 0.6836, "step": 6460 }, { "epoch": 0.11749963678628506, "grad_norm": 0.9782986042150313, "learning_rate": 1.9423829476863223e-05, "loss": 0.669, "step": 6470 }, { "epoch": 0.11768124364376, "grad_norm": 0.9481825899249441, "learning_rate": 1.942189997548624e-05, "loss": 0.6756, "step": 6480 }, { "epoch": 0.11786285050123492, "grad_norm": 0.9669301900292928, "learning_rate": 1.941996734487514e-05, "loss": 0.6725, "step": 6490 }, { "epoch": 0.11804445735870986, "grad_norm": 0.9932068601011935, "learning_rate": 1.9418031585671798e-05, "loss": 0.6762, "step": 6500 }, { "epoch": 0.1182260642161848, "grad_norm": 0.9862894668023602, "learning_rate": 1.9416092698519124e-05, "loss": 0.667, "step": 6510 }, { "epoch": 0.11840767107365974, "grad_norm": 0.9973265446508446, "learning_rate": 1.9414150684061058e-05, "loss": 0.6693, "step": 6520 }, { "epoch": 0.11858927793113468, "grad_norm": 1.0036219767247914, "learning_rate": 1.9412205542942604e-05, "loss": 0.6794, "step": 6530 }, { "epoch": 0.11877088478860962, "grad_norm": 0.9772302729108107, "learning_rate": 1.941025727580978e-05, "loss": 0.685, "step": 6540 }, { "epoch": 0.11895249164608455, "grad_norm": 0.940013414823102, "learning_rate": 1.940830588330965e-05, "loss": 0.6825, "step": 6550 }, { "epoch": 0.1191340985035595, "grad_norm": 0.9427098877385732, "learning_rate": 1.9406351366090324e-05, "loss": 0.675, "step": 6560 }, { "epoch": 0.11931570536103443, "grad_norm": 0.9717875608567507, "learning_rate": 1.9404393724800937e-05, "loss": 0.6619, "step": 6570 }, { "epoch": 0.11949731221850937, "grad_norm": 0.9342040326881886, "learning_rate": 1.940243296009167e-05, "loss": 0.6763, "step": 6580 }, { "epoch": 0.11967891907598431, "grad_norm": 0.9578320741293034, "learning_rate": 1.940046907261374e-05, "loss": 0.671, "step": 6590 }, { "epoch": 0.11986052593345925, "grad_norm": 0.9162512944622752, "learning_rate": 1.93985020630194e-05, "loss": 0.6602, "step": 6600 }, { "epoch": 0.12004213279093419, "grad_norm": 0.9988120829114734, "learning_rate": 1.9396531931961937e-05, "loss": 0.6723, "step": 6610 }, { "epoch": 0.12022373964840913, "grad_norm": 0.9514843332030815, "learning_rate": 1.939455868009568e-05, "loss": 0.6886, "step": 6620 }, { "epoch": 0.12040534650588407, "grad_norm": 0.9422669005489921, "learning_rate": 1.9392582308075992e-05, "loss": 0.6811, "step": 6630 }, { "epoch": 0.120586953363359, "grad_norm": 0.9847825974993284, "learning_rate": 1.9390602816559273e-05, "loss": 0.6834, "step": 6640 }, { "epoch": 0.12076856022083395, "grad_norm": 1.0048414129054277, "learning_rate": 1.938862020620296e-05, "loss": 0.6823, "step": 6650 }, { "epoch": 0.12095016707830887, "grad_norm": 1.0661841380322656, "learning_rate": 1.938663447766552e-05, "loss": 0.6694, "step": 6660 }, { "epoch": 0.12113177393578381, "grad_norm": 0.981721761061712, "learning_rate": 1.9384645631606462e-05, "loss": 0.672, "step": 6670 }, { "epoch": 0.12131338079325875, "grad_norm": 0.9692670255203116, "learning_rate": 1.9382653668686333e-05, "loss": 0.6735, "step": 6680 }, { "epoch": 0.12149498765073369, "grad_norm": 1.0109942630783149, "learning_rate": 1.9380658589566703e-05, "loss": 0.683, "step": 6690 }, { "epoch": 0.12167659450820863, "grad_norm": 0.9958567894476961, "learning_rate": 1.937866039491019e-05, "loss": 0.6791, "step": 6700 }, { "epoch": 0.12185820136568357, "grad_norm": 0.9907905103169877, "learning_rate": 1.937665908538044e-05, "loss": 0.6874, "step": 6710 }, { "epoch": 0.1220398082231585, "grad_norm": 0.9094363244279887, "learning_rate": 1.9374654661642135e-05, "loss": 0.6691, "step": 6720 }, { "epoch": 0.12222141508063344, "grad_norm": 0.9527228732964198, "learning_rate": 1.937264712436099e-05, "loss": 0.6834, "step": 6730 }, { "epoch": 0.12240302193810838, "grad_norm": 0.9692810600481918, "learning_rate": 1.9370636474203754e-05, "loss": 0.6799, "step": 6740 }, { "epoch": 0.12258462879558332, "grad_norm": 0.9159139998282989, "learning_rate": 1.9368622711838215e-05, "loss": 0.6724, "step": 6750 }, { "epoch": 0.12276623565305826, "grad_norm": 0.9378700361409161, "learning_rate": 1.936660583793319e-05, "loss": 0.6667, "step": 6760 }, { "epoch": 0.1229478425105332, "grad_norm": 0.9238911918970877, "learning_rate": 1.936458585315853e-05, "loss": 0.6651, "step": 6770 }, { "epoch": 0.12312944936800814, "grad_norm": 1.03257941543745, "learning_rate": 1.9362562758185116e-05, "loss": 0.6959, "step": 6780 }, { "epoch": 0.12331105622548308, "grad_norm": 0.9374600924083311, "learning_rate": 1.9360536553684865e-05, "loss": 0.6698, "step": 6790 }, { "epoch": 0.12349266308295802, "grad_norm": 0.9215727956739971, "learning_rate": 1.9358507240330735e-05, "loss": 0.6625, "step": 6800 }, { "epoch": 0.12367426994043296, "grad_norm": 0.949735812914819, "learning_rate": 1.93564748187967e-05, "loss": 0.6647, "step": 6810 }, { "epoch": 0.12385587679790788, "grad_norm": 0.9795565893194558, "learning_rate": 1.935443928975778e-05, "loss": 0.6952, "step": 6820 }, { "epoch": 0.12403748365538282, "grad_norm": 0.9599516694650017, "learning_rate": 1.935240065389002e-05, "loss": 0.6733, "step": 6830 }, { "epoch": 0.12421909051285776, "grad_norm": 0.9603808941183447, "learning_rate": 1.9350358911870496e-05, "loss": 0.6802, "step": 6840 }, { "epoch": 0.1244006973703327, "grad_norm": 1.0158935418469113, "learning_rate": 1.9348314064377327e-05, "loss": 0.6804, "step": 6850 }, { "epoch": 0.12458230422780764, "grad_norm": 0.975790253145487, "learning_rate": 1.9346266112089647e-05, "loss": 0.6693, "step": 6860 }, { "epoch": 0.12476391108528258, "grad_norm": 0.9393216534468396, "learning_rate": 1.934421505568763e-05, "loss": 0.6851, "step": 6870 }, { "epoch": 0.12494551794275752, "grad_norm": 0.9468257411532298, "learning_rate": 1.934216089585248e-05, "loss": 0.6701, "step": 6880 }, { "epoch": 0.12512712480023247, "grad_norm": 0.937646894933839, "learning_rate": 1.934010363326644e-05, "loss": 0.6693, "step": 6890 }, { "epoch": 0.1253087316577074, "grad_norm": 0.9457439343180853, "learning_rate": 1.9338043268612763e-05, "loss": 0.6596, "step": 6900 }, { "epoch": 0.12549033851518232, "grad_norm": 0.9416975995266438, "learning_rate": 1.9335979802575747e-05, "loss": 0.6685, "step": 6910 }, { "epoch": 0.12567194537265727, "grad_norm": 0.9245636574827694, "learning_rate": 1.9333913235840725e-05, "loss": 0.6714, "step": 6920 }, { "epoch": 0.1258535522301322, "grad_norm": 0.9631352847198362, "learning_rate": 1.9331843569094045e-05, "loss": 0.6762, "step": 6930 }, { "epoch": 0.12603515908760715, "grad_norm": 1.015713921490038, "learning_rate": 1.9329770803023094e-05, "loss": 0.6701, "step": 6940 }, { "epoch": 0.12621676594508208, "grad_norm": 0.9694292248719614, "learning_rate": 1.932769493831629e-05, "loss": 0.6874, "step": 6950 }, { "epoch": 0.12639837280255703, "grad_norm": 0.9876107628562991, "learning_rate": 1.932561597566307e-05, "loss": 0.6937, "step": 6960 }, { "epoch": 0.12657997966003195, "grad_norm": 0.9585884345260757, "learning_rate": 1.932353391575391e-05, "loss": 0.6678, "step": 6970 }, { "epoch": 0.1267615865175069, "grad_norm": 0.923017734524627, "learning_rate": 1.9321448759280313e-05, "loss": 0.6789, "step": 6980 }, { "epoch": 0.12694319337498183, "grad_norm": 0.9673915360030476, "learning_rate": 1.93193605069348e-05, "loss": 0.663, "step": 6990 }, { "epoch": 0.12712480023245679, "grad_norm": 0.9470088390179876, "learning_rate": 1.9317269159410937e-05, "loss": 0.6704, "step": 7000 }, { "epoch": 0.1273064070899317, "grad_norm": 0.9157490664862021, "learning_rate": 1.931517471740331e-05, "loss": 0.6537, "step": 7010 }, { "epoch": 0.12748801394740666, "grad_norm": 0.9681583073336721, "learning_rate": 1.9313077181607527e-05, "loss": 0.6755, "step": 7020 }, { "epoch": 0.1276696208048816, "grad_norm": 0.9504258759740108, "learning_rate": 1.931097655272023e-05, "loss": 0.6781, "step": 7030 }, { "epoch": 0.12785122766235654, "grad_norm": 0.9497121736961599, "learning_rate": 1.930887283143909e-05, "loss": 0.681, "step": 7040 }, { "epoch": 0.12803283451983147, "grad_norm": 0.9293758297465639, "learning_rate": 1.9306766018462797e-05, "loss": 0.6807, "step": 7050 }, { "epoch": 0.12821444137730642, "grad_norm": 0.9156513670802634, "learning_rate": 1.930465611449108e-05, "loss": 0.6604, "step": 7060 }, { "epoch": 0.12839604823478135, "grad_norm": 0.9399133504758205, "learning_rate": 1.9302543120224683e-05, "loss": 0.6794, "step": 7070 }, { "epoch": 0.12857765509225627, "grad_norm": 0.9603364041392694, "learning_rate": 1.9300427036365384e-05, "loss": 0.663, "step": 7080 }, { "epoch": 0.12875926194973122, "grad_norm": 0.9550520120258681, "learning_rate": 1.929830786361598e-05, "loss": 0.6693, "step": 7090 }, { "epoch": 0.12894086880720615, "grad_norm": 0.9316315481708761, "learning_rate": 1.92961856026803e-05, "loss": 0.6808, "step": 7100 }, { "epoch": 0.1291224756646811, "grad_norm": 0.9810010725243198, "learning_rate": 1.92940602542632e-05, "loss": 0.6856, "step": 7110 }, { "epoch": 0.12930408252215603, "grad_norm": 0.8967202251654762, "learning_rate": 1.9291931819070552e-05, "loss": 0.6778, "step": 7120 }, { "epoch": 0.12948568937963098, "grad_norm": 0.9336775937161217, "learning_rate": 1.928980029780926e-05, "loss": 0.6598, "step": 7130 }, { "epoch": 0.1296672962371059, "grad_norm": 0.9493523628805867, "learning_rate": 1.928766569118726e-05, "loss": 0.6625, "step": 7140 }, { "epoch": 0.12984890309458086, "grad_norm": 0.9345537925324131, "learning_rate": 1.9285527999913494e-05, "loss": 0.6633, "step": 7150 }, { "epoch": 0.13003050995205578, "grad_norm": 0.9299857986239477, "learning_rate": 1.928338722469795e-05, "loss": 0.6618, "step": 7160 }, { "epoch": 0.13021211680953074, "grad_norm": 0.8934177235109261, "learning_rate": 1.9281243366251624e-05, "loss": 0.6779, "step": 7170 }, { "epoch": 0.13039372366700566, "grad_norm": 0.984355834076018, "learning_rate": 1.927909642528654e-05, "loss": 0.6752, "step": 7180 }, { "epoch": 0.13057533052448062, "grad_norm": 0.9674678119850129, "learning_rate": 1.9276946402515753e-05, "loss": 0.6765, "step": 7190 }, { "epoch": 0.13075693738195554, "grad_norm": 0.9135640772644503, "learning_rate": 1.927479329865333e-05, "loss": 0.6649, "step": 7200 }, { "epoch": 0.1309385442394305, "grad_norm": 0.9119423638028953, "learning_rate": 1.9272637114414368e-05, "loss": 0.6718, "step": 7210 }, { "epoch": 0.13112015109690542, "grad_norm": 0.9780193810092587, "learning_rate": 1.9270477850514996e-05, "loss": 0.6821, "step": 7220 }, { "epoch": 0.13130175795438034, "grad_norm": 0.969200506376997, "learning_rate": 1.9268315507672342e-05, "loss": 0.6826, "step": 7230 }, { "epoch": 0.1314833648118553, "grad_norm": 0.9451691244375606, "learning_rate": 1.926615008660458e-05, "loss": 0.6683, "step": 7240 }, { "epoch": 0.13166497166933022, "grad_norm": 0.9142901414329919, "learning_rate": 1.9263981588030894e-05, "loss": 0.6712, "step": 7250 }, { "epoch": 0.13184657852680517, "grad_norm": 0.9697697921924247, "learning_rate": 1.9261810012671494e-05, "loss": 0.6789, "step": 7260 }, { "epoch": 0.1320281853842801, "grad_norm": 0.9710045627739632, "learning_rate": 1.9259635361247616e-05, "loss": 0.6744, "step": 7270 }, { "epoch": 0.13220979224175505, "grad_norm": 0.937833222285678, "learning_rate": 1.9257457634481504e-05, "loss": 0.6774, "step": 7280 }, { "epoch": 0.13239139909922998, "grad_norm": 0.9229600395431539, "learning_rate": 1.9255276833096436e-05, "loss": 0.671, "step": 7290 }, { "epoch": 0.13257300595670493, "grad_norm": 0.9056503837566917, "learning_rate": 1.9253092957816707e-05, "loss": 0.6695, "step": 7300 }, { "epoch": 0.13275461281417986, "grad_norm": 0.9425410317217253, "learning_rate": 1.925090600936763e-05, "loss": 0.6657, "step": 7310 }, { "epoch": 0.1329362196716548, "grad_norm": 0.9611148931728828, "learning_rate": 1.9248715988475552e-05, "loss": 0.6652, "step": 7320 }, { "epoch": 0.13311782652912973, "grad_norm": 0.9593931835996243, "learning_rate": 1.9246522895867822e-05, "loss": 0.6778, "step": 7330 }, { "epoch": 0.1332994333866047, "grad_norm": 0.9720588726787228, "learning_rate": 1.924432673227282e-05, "loss": 0.6884, "step": 7340 }, { "epoch": 0.1334810402440796, "grad_norm": 0.9647657369274472, "learning_rate": 1.9242127498419943e-05, "loss": 0.6698, "step": 7350 }, { "epoch": 0.13366264710155457, "grad_norm": 0.8956050048429959, "learning_rate": 1.9239925195039606e-05, "loss": 0.6765, "step": 7360 }, { "epoch": 0.1338442539590295, "grad_norm": 0.97428320087275, "learning_rate": 1.9237719822863247e-05, "loss": 0.6894, "step": 7370 }, { "epoch": 0.13402586081650444, "grad_norm": 0.9765069246787706, "learning_rate": 1.9235511382623328e-05, "loss": 0.6692, "step": 7380 }, { "epoch": 0.13420746767397937, "grad_norm": 0.9128314882463173, "learning_rate": 1.9233299875053314e-05, "loss": 0.6711, "step": 7390 }, { "epoch": 0.1343890745314543, "grad_norm": 0.9314801797414151, "learning_rate": 1.9231085300887704e-05, "loss": 0.6775, "step": 7400 }, { "epoch": 0.13457068138892925, "grad_norm": 0.9010681027912392, "learning_rate": 1.922886766086201e-05, "loss": 0.6637, "step": 7410 }, { "epoch": 0.13475228824640417, "grad_norm": 0.9032988768470179, "learning_rate": 1.922664695571276e-05, "loss": 0.6651, "step": 7420 }, { "epoch": 0.13493389510387913, "grad_norm": 0.9444002408586389, "learning_rate": 1.9224423186177504e-05, "loss": 0.6624, "step": 7430 }, { "epoch": 0.13511550196135405, "grad_norm": 0.9203267111416561, "learning_rate": 1.9222196352994807e-05, "loss": 0.6816, "step": 7440 }, { "epoch": 0.135297108818829, "grad_norm": 0.9801537768222162, "learning_rate": 1.921996645690426e-05, "loss": 0.6834, "step": 7450 }, { "epoch": 0.13547871567630393, "grad_norm": 0.9610527600974511, "learning_rate": 1.921773349864645e-05, "loss": 0.6671, "step": 7460 }, { "epoch": 0.13566032253377888, "grad_norm": 0.9488500790089762, "learning_rate": 1.9215497478963e-05, "loss": 0.6768, "step": 7470 }, { "epoch": 0.1358419293912538, "grad_norm": 0.9678441231689988, "learning_rate": 1.9213258398596554e-05, "loss": 0.6852, "step": 7480 }, { "epoch": 0.13602353624872876, "grad_norm": 0.9185140489043131, "learning_rate": 1.9211016258290747e-05, "loss": 0.6735, "step": 7490 }, { "epoch": 0.13620514310620369, "grad_norm": 0.9233244423717796, "learning_rate": 1.9208771058790262e-05, "loss": 0.6704, "step": 7500 }, { "epoch": 0.13638674996367864, "grad_norm": 1.0071393565430344, "learning_rate": 1.9206522800840775e-05, "loss": 0.6705, "step": 7510 }, { "epoch": 0.13656835682115356, "grad_norm": 0.9668485599193456, "learning_rate": 1.9204271485188982e-05, "loss": 0.6825, "step": 7520 }, { "epoch": 0.13674996367862852, "grad_norm": 0.9416536991023118, "learning_rate": 1.9202017112582605e-05, "loss": 0.6786, "step": 7530 }, { "epoch": 0.13693157053610344, "grad_norm": 0.9275747894066285, "learning_rate": 1.919975968377037e-05, "loss": 0.6868, "step": 7540 }, { "epoch": 0.1371131773935784, "grad_norm": 0.9755695369702378, "learning_rate": 1.9197499199502023e-05, "loss": 0.6614, "step": 7550 }, { "epoch": 0.13729478425105332, "grad_norm": 0.9606159906242602, "learning_rate": 1.919523566052832e-05, "loss": 0.671, "step": 7560 }, { "epoch": 0.13747639110852825, "grad_norm": 0.9526916491160607, "learning_rate": 1.9192969067601045e-05, "loss": 0.6871, "step": 7570 }, { "epoch": 0.1376579979660032, "grad_norm": 0.9379521879075021, "learning_rate": 1.9190699421472976e-05, "loss": 0.6688, "step": 7580 }, { "epoch": 0.13783960482347812, "grad_norm": 0.9653085567488748, "learning_rate": 1.9188426722897923e-05, "loss": 0.6817, "step": 7590 }, { "epoch": 0.13802121168095308, "grad_norm": 0.9130857356661161, "learning_rate": 1.91861509726307e-05, "loss": 0.667, "step": 7600 }, { "epoch": 0.138202818538428, "grad_norm": 0.9448466759682035, "learning_rate": 1.9183872171427137e-05, "loss": 0.6683, "step": 7610 }, { "epoch": 0.13838442539590295, "grad_norm": 0.9342758071973932, "learning_rate": 1.9181590320044076e-05, "loss": 0.6772, "step": 7620 }, { "epoch": 0.13856603225337788, "grad_norm": 0.9700527411657128, "learning_rate": 1.9179305419239376e-05, "loss": 0.6922, "step": 7630 }, { "epoch": 0.13874763911085283, "grad_norm": 0.9469785705368822, "learning_rate": 1.9177017469771905e-05, "loss": 0.6683, "step": 7640 }, { "epoch": 0.13892924596832776, "grad_norm": 0.9202551256399039, "learning_rate": 1.9174726472401546e-05, "loss": 0.6788, "step": 7650 }, { "epoch": 0.1391108528258027, "grad_norm": 0.9322572722849655, "learning_rate": 1.917243242788919e-05, "loss": 0.6678, "step": 7660 }, { "epoch": 0.13929245968327764, "grad_norm": 0.928173051421851, "learning_rate": 1.9170135336996748e-05, "loss": 0.6807, "step": 7670 }, { "epoch": 0.1394740665407526, "grad_norm": 0.9720826367967286, "learning_rate": 1.9167835200487127e-05, "loss": 0.6696, "step": 7680 }, { "epoch": 0.13965567339822751, "grad_norm": 0.936994645914921, "learning_rate": 1.9165532019124275e-05, "loss": 0.6729, "step": 7690 }, { "epoch": 0.13983728025570247, "grad_norm": 0.9344875270150738, "learning_rate": 1.9163225793673112e-05, "loss": 0.6785, "step": 7700 }, { "epoch": 0.1400188871131774, "grad_norm": 0.9058642902727104, "learning_rate": 1.91609165248996e-05, "loss": 0.6872, "step": 7710 }, { "epoch": 0.14020049397065235, "grad_norm": 0.9161719748105754, "learning_rate": 1.9158604213570705e-05, "loss": 0.671, "step": 7720 }, { "epoch": 0.14038210082812727, "grad_norm": 0.938923757002432, "learning_rate": 1.9156288860454393e-05, "loss": 0.6745, "step": 7730 }, { "epoch": 0.1405637076856022, "grad_norm": 0.9376514701525069, "learning_rate": 1.9153970466319652e-05, "loss": 0.6682, "step": 7740 }, { "epoch": 0.14074531454307715, "grad_norm": 0.9682838352989783, "learning_rate": 1.915164903193647e-05, "loss": 0.6749, "step": 7750 }, { "epoch": 0.14092692140055207, "grad_norm": 0.9339941888825519, "learning_rate": 1.9149324558075852e-05, "loss": 0.67, "step": 7760 }, { "epoch": 0.14110852825802703, "grad_norm": 0.9003092149694086, "learning_rate": 1.9146997045509813e-05, "loss": 0.6746, "step": 7770 }, { "epoch": 0.14129013511550195, "grad_norm": 0.9632125015831834, "learning_rate": 1.914466649501137e-05, "loss": 0.6748, "step": 7780 }, { "epoch": 0.1414717419729769, "grad_norm": 0.9230131814124364, "learning_rate": 1.9142332907354557e-05, "loss": 0.6681, "step": 7790 }, { "epoch": 0.14165334883045183, "grad_norm": 0.9428782124750376, "learning_rate": 1.913999628331441e-05, "loss": 0.6744, "step": 7800 }, { "epoch": 0.14183495568792678, "grad_norm": 0.9645755106823983, "learning_rate": 1.913765662366698e-05, "loss": 0.6744, "step": 7810 }, { "epoch": 0.1420165625454017, "grad_norm": 0.9228725969148222, "learning_rate": 1.9135313929189324e-05, "loss": 0.6826, "step": 7820 }, { "epoch": 0.14219816940287666, "grad_norm": 0.9385979326492946, "learning_rate": 1.91329682006595e-05, "loss": 0.6763, "step": 7830 }, { "epoch": 0.1423797762603516, "grad_norm": 0.94020906688249, "learning_rate": 1.9130619438856587e-05, "loss": 0.6819, "step": 7840 }, { "epoch": 0.14256138311782654, "grad_norm": 0.9028741594448367, "learning_rate": 1.9128267644560656e-05, "loss": 0.6809, "step": 7850 }, { "epoch": 0.14274298997530147, "grad_norm": 0.9214138844124753, "learning_rate": 1.9125912818552798e-05, "loss": 0.6671, "step": 7860 }, { "epoch": 0.14292459683277642, "grad_norm": 0.9666898705200409, "learning_rate": 1.9123554961615104e-05, "loss": 0.6593, "step": 7870 }, { "epoch": 0.14310620369025134, "grad_norm": 1.0941908009781578, "learning_rate": 1.9121194074530676e-05, "loss": 0.6754, "step": 7880 }, { "epoch": 0.14328781054772627, "grad_norm": 0.9646285620053774, "learning_rate": 1.9118830158083615e-05, "loss": 0.6565, "step": 7890 }, { "epoch": 0.14346941740520122, "grad_norm": 0.8938706740301572, "learning_rate": 1.911646321305904e-05, "loss": 0.6487, "step": 7900 }, { "epoch": 0.14365102426267615, "grad_norm": 0.9339200160806166, "learning_rate": 1.9114093240243065e-05, "loss": 0.6714, "step": 7910 }, { "epoch": 0.1438326311201511, "grad_norm": 0.9736351316115242, "learning_rate": 1.9111720240422814e-05, "loss": 0.6729, "step": 7920 }, { "epoch": 0.14401423797762603, "grad_norm": 0.8932225227135472, "learning_rate": 1.9109344214386418e-05, "loss": 0.6664, "step": 7930 }, { "epoch": 0.14419584483510098, "grad_norm": 0.926634143584288, "learning_rate": 1.910696516292301e-05, "loss": 0.6765, "step": 7940 }, { "epoch": 0.1443774516925759, "grad_norm": 0.9815774753340013, "learning_rate": 1.9104583086822727e-05, "loss": 0.6602, "step": 7950 }, { "epoch": 0.14455905855005086, "grad_norm": 0.919006603183469, "learning_rate": 1.9102197986876708e-05, "loss": 0.6779, "step": 7960 }, { "epoch": 0.14474066540752578, "grad_norm": 0.951710240173992, "learning_rate": 1.9099809863877113e-05, "loss": 0.6722, "step": 7970 }, { "epoch": 0.14492227226500073, "grad_norm": 0.9068629745685267, "learning_rate": 1.909741871861708e-05, "loss": 0.6694, "step": 7980 }, { "epoch": 0.14510387912247566, "grad_norm": 0.9515411976901116, "learning_rate": 1.909502455189078e-05, "loss": 0.6736, "step": 7990 }, { "epoch": 0.1452854859799506, "grad_norm": 1.4170421855705646, "learning_rate": 1.909262736449336e-05, "loss": 0.6781, "step": 8000 }, { "epoch": 0.14546709283742554, "grad_norm": 0.95227688509352, "learning_rate": 1.9090227157220983e-05, "loss": 0.6715, "step": 8010 }, { "epoch": 0.1456486996949005, "grad_norm": 0.9246947556420575, "learning_rate": 1.9087823930870818e-05, "loss": 0.6516, "step": 8020 }, { "epoch": 0.14583030655237542, "grad_norm": 0.9519161371196657, "learning_rate": 1.9085417686241027e-05, "loss": 0.6871, "step": 8030 }, { "epoch": 0.14601191340985037, "grad_norm": 0.9055169445932469, "learning_rate": 1.908300842413079e-05, "loss": 0.6625, "step": 8040 }, { "epoch": 0.1461935202673253, "grad_norm": 0.920016045454727, "learning_rate": 1.9080596145340277e-05, "loss": 0.672, "step": 8050 }, { "epoch": 0.14637512712480022, "grad_norm": 0.9299833511004767, "learning_rate": 1.9078180850670654e-05, "loss": 0.6719, "step": 8060 }, { "epoch": 0.14655673398227517, "grad_norm": 0.9416432201207575, "learning_rate": 1.9075762540924104e-05, "loss": 0.6679, "step": 8070 }, { "epoch": 0.1467383408397501, "grad_norm": 0.9717192802422135, "learning_rate": 1.9073341216903804e-05, "loss": 0.6693, "step": 8080 }, { "epoch": 0.14691994769722505, "grad_norm": 0.9765786556937369, "learning_rate": 1.9070916879413934e-05, "loss": 0.6762, "step": 8090 }, { "epoch": 0.14710155455469998, "grad_norm": 0.8690970686477616, "learning_rate": 1.9068489529259666e-05, "loss": 0.6617, "step": 8100 }, { "epoch": 0.14728316141217493, "grad_norm": 0.9241952248372552, "learning_rate": 1.906605916724719e-05, "loss": 0.6733, "step": 8110 }, { "epoch": 0.14746476826964985, "grad_norm": 1.1126709387069396, "learning_rate": 1.906362579418368e-05, "loss": 0.6811, "step": 8120 }, { "epoch": 0.1476463751271248, "grad_norm": 0.9281333197282393, "learning_rate": 1.9061189410877312e-05, "loss": 0.6685, "step": 8130 }, { "epoch": 0.14782798198459973, "grad_norm": 0.9596809334873408, "learning_rate": 1.9058750018137277e-05, "loss": 0.668, "step": 8140 }, { "epoch": 0.14800958884207469, "grad_norm": 0.9617402685993298, "learning_rate": 1.9056307616773747e-05, "loss": 0.6545, "step": 8150 }, { "epoch": 0.1481911956995496, "grad_norm": 0.8816189464488475, "learning_rate": 1.9053862207597906e-05, "loss": 0.6633, "step": 8160 }, { "epoch": 0.14837280255702456, "grad_norm": 0.904890088503716, "learning_rate": 1.9051413791421928e-05, "loss": 0.6638, "step": 8170 }, { "epoch": 0.1485544094144995, "grad_norm": 0.9653294318857446, "learning_rate": 1.904896236905899e-05, "loss": 0.6714, "step": 8180 }, { "epoch": 0.14873601627197444, "grad_norm": 0.9155511129162388, "learning_rate": 1.9046507941323263e-05, "loss": 0.659, "step": 8190 }, { "epoch": 0.14891762312944937, "grad_norm": 0.8853814858544656, "learning_rate": 1.904405050902993e-05, "loss": 0.6626, "step": 8200 }, { "epoch": 0.14909922998692432, "grad_norm": 0.9155553284358566, "learning_rate": 1.9041590072995155e-05, "loss": 0.6757, "step": 8210 }, { "epoch": 0.14928083684439925, "grad_norm": 0.9618757024399991, "learning_rate": 1.9039126634036108e-05, "loss": 0.6809, "step": 8220 }, { "epoch": 0.14946244370187417, "grad_norm": 0.9061039994230609, "learning_rate": 1.9036660192970952e-05, "loss": 0.6609, "step": 8230 }, { "epoch": 0.14964405055934912, "grad_norm": 0.9639177286822906, "learning_rate": 1.9034190750618854e-05, "loss": 0.6802, "step": 8240 }, { "epoch": 0.14982565741682405, "grad_norm": 0.9116737280913507, "learning_rate": 1.9031718307799974e-05, "loss": 0.6712, "step": 8250 }, { "epoch": 0.150007264274299, "grad_norm": 1.0050550724523197, "learning_rate": 1.9029242865335465e-05, "loss": 0.6804, "step": 8260 }, { "epoch": 0.15018887113177393, "grad_norm": 0.9052815876824483, "learning_rate": 1.9026764424047482e-05, "loss": 0.6609, "step": 8270 }, { "epoch": 0.15037047798924888, "grad_norm": 0.9365753666098946, "learning_rate": 1.9024282984759174e-05, "loss": 0.6608, "step": 8280 }, { "epoch": 0.1505520848467238, "grad_norm": 0.9526194940071949, "learning_rate": 1.9021798548294682e-05, "loss": 0.6856, "step": 8290 }, { "epoch": 0.15073369170419876, "grad_norm": 0.9597366824754889, "learning_rate": 1.901931111547915e-05, "loss": 0.6753, "step": 8300 }, { "epoch": 0.15091529856167368, "grad_norm": 0.9576853918687106, "learning_rate": 1.9016820687138706e-05, "loss": 0.6717, "step": 8310 }, { "epoch": 0.15109690541914864, "grad_norm": 0.9126730638582262, "learning_rate": 1.9014327264100484e-05, "loss": 0.6663, "step": 8320 }, { "epoch": 0.15127851227662356, "grad_norm": 0.954327515101929, "learning_rate": 1.9011830847192615e-05, "loss": 0.673, "step": 8330 }, { "epoch": 0.15146011913409851, "grad_norm": 1.0107377390910426, "learning_rate": 1.9009331437244207e-05, "loss": 0.6891, "step": 8340 }, { "epoch": 0.15164172599157344, "grad_norm": 0.9943203441927435, "learning_rate": 1.9006829035085377e-05, "loss": 0.6646, "step": 8350 }, { "epoch": 0.1518233328490484, "grad_norm": 0.9290926709644903, "learning_rate": 1.900432364154723e-05, "loss": 0.6653, "step": 8360 }, { "epoch": 0.15200493970652332, "grad_norm": 0.907955838939528, "learning_rate": 1.900181525746187e-05, "loss": 0.6553, "step": 8370 }, { "epoch": 0.15218654656399824, "grad_norm": 0.8760803042738968, "learning_rate": 1.8999303883662387e-05, "loss": 0.6688, "step": 8380 }, { "epoch": 0.1523681534214732, "grad_norm": 0.8941426597373794, "learning_rate": 1.8996789520982868e-05, "loss": 0.6758, "step": 8390 }, { "epoch": 0.15254976027894812, "grad_norm": 0.8940690641944836, "learning_rate": 1.8994272170258388e-05, "loss": 0.666, "step": 8400 }, { "epoch": 0.15273136713642307, "grad_norm": 0.8917574873263462, "learning_rate": 1.8991751832325026e-05, "loss": 0.6627, "step": 8410 }, { "epoch": 0.152912973993898, "grad_norm": 0.94291799864182, "learning_rate": 1.898922850801984e-05, "loss": 0.6572, "step": 8420 }, { "epoch": 0.15309458085137295, "grad_norm": 0.8840228230176418, "learning_rate": 1.8986702198180883e-05, "loss": 0.689, "step": 8430 }, { "epoch": 0.15327618770884788, "grad_norm": 0.9475982540671414, "learning_rate": 1.898417290364721e-05, "loss": 0.6686, "step": 8440 }, { "epoch": 0.15345779456632283, "grad_norm": 1.0131771878707314, "learning_rate": 1.8981640625258855e-05, "loss": 0.6622, "step": 8450 }, { "epoch": 0.15363940142379776, "grad_norm": 0.9039219870925006, "learning_rate": 1.8979105363856846e-05, "loss": 0.6718, "step": 8460 }, { "epoch": 0.1538210082812727, "grad_norm": 0.9296349248812131, "learning_rate": 1.89765671202832e-05, "loss": 0.6665, "step": 8470 }, { "epoch": 0.15400261513874763, "grad_norm": 0.8906764973243674, "learning_rate": 1.897402589538093e-05, "loss": 0.6784, "step": 8480 }, { "epoch": 0.1541842219962226, "grad_norm": 0.905787002131705, "learning_rate": 1.897148168999404e-05, "loss": 0.6619, "step": 8490 }, { "epoch": 0.1543658288536975, "grad_norm": 0.8959517101475569, "learning_rate": 1.8968934504967514e-05, "loss": 0.6651, "step": 8500 }, { "epoch": 0.15454743571117247, "grad_norm": 0.9244612645384582, "learning_rate": 1.896638434114734e-05, "loss": 0.6676, "step": 8510 }, { "epoch": 0.1547290425686474, "grad_norm": 0.9131588345294638, "learning_rate": 1.8963831199380478e-05, "loss": 0.668, "step": 8520 }, { "epoch": 0.15491064942612234, "grad_norm": 0.89730908716075, "learning_rate": 1.8961275080514892e-05, "loss": 0.6672, "step": 8530 }, { "epoch": 0.15509225628359727, "grad_norm": 0.9351451619423865, "learning_rate": 1.895871598539953e-05, "loss": 0.6756, "step": 8540 }, { "epoch": 0.1552738631410722, "grad_norm": 0.936080705053778, "learning_rate": 1.8956153914884323e-05, "loss": 0.6786, "step": 8550 }, { "epoch": 0.15545546999854715, "grad_norm": 1.0480212945015184, "learning_rate": 1.89535888698202e-05, "loss": 0.6708, "step": 8560 }, { "epoch": 0.15563707685602207, "grad_norm": 0.9008886525296521, "learning_rate": 1.8951020851059064e-05, "loss": 0.6727, "step": 8570 }, { "epoch": 0.15581868371349702, "grad_norm": 0.9350485828154496, "learning_rate": 1.8948449859453822e-05, "loss": 0.6712, "step": 8580 }, { "epoch": 0.15600029057097195, "grad_norm": 0.9545062877937444, "learning_rate": 1.894587589585836e-05, "loss": 0.6493, "step": 8590 }, { "epoch": 0.1561818974284469, "grad_norm": 0.860033825497448, "learning_rate": 1.8943298961127553e-05, "loss": 0.6632, "step": 8600 }, { "epoch": 0.15636350428592183, "grad_norm": 0.9032154722849723, "learning_rate": 1.8940719056117256e-05, "loss": 0.6516, "step": 8610 }, { "epoch": 0.15654511114339678, "grad_norm": 0.9680248997990648, "learning_rate": 1.8938136181684318e-05, "loss": 0.6717, "step": 8620 }, { "epoch": 0.1567267180008717, "grad_norm": 0.8953236605924889, "learning_rate": 1.8935550338686577e-05, "loss": 0.6681, "step": 8630 }, { "epoch": 0.15690832485834666, "grad_norm": 0.9194994222262721, "learning_rate": 1.8932961527982846e-05, "loss": 0.6766, "step": 8640 }, { "epoch": 0.15708993171582158, "grad_norm": 0.9015966105507505, "learning_rate": 1.8930369750432932e-05, "loss": 0.6601, "step": 8650 }, { "epoch": 0.15727153857329654, "grad_norm": 0.9197382808976159, "learning_rate": 1.8927775006897627e-05, "loss": 0.6757, "step": 8660 }, { "epoch": 0.15745314543077146, "grad_norm": 0.9181909229143549, "learning_rate": 1.89251772982387e-05, "loss": 0.6718, "step": 8670 }, { "epoch": 0.15763475228824642, "grad_norm": 0.9265139066198639, "learning_rate": 1.892257662531892e-05, "loss": 0.6747, "step": 8680 }, { "epoch": 0.15781635914572134, "grad_norm": 0.9319315684356857, "learning_rate": 1.8919972989002027e-05, "loss": 0.6779, "step": 8690 }, { "epoch": 0.1579979660031963, "grad_norm": 0.889439020937144, "learning_rate": 1.8917366390152747e-05, "loss": 0.6612, "step": 8700 }, { "epoch": 0.15817957286067122, "grad_norm": 0.8839470450133247, "learning_rate": 1.8914756829636798e-05, "loss": 0.6631, "step": 8710 }, { "epoch": 0.15836117971814614, "grad_norm": 0.9268699116309675, "learning_rate": 1.8912144308320872e-05, "loss": 0.6611, "step": 8720 }, { "epoch": 0.1585427865756211, "grad_norm": 0.9533010559384496, "learning_rate": 1.8909528827072652e-05, "loss": 0.6516, "step": 8730 }, { "epoch": 0.15872439343309602, "grad_norm": 1.0080263959435383, "learning_rate": 1.8906910386760798e-05, "loss": 0.6779, "step": 8740 }, { "epoch": 0.15890600029057098, "grad_norm": 0.9323480293539326, "learning_rate": 1.8904288988254954e-05, "loss": 0.6729, "step": 8750 }, { "epoch": 0.1590876071480459, "grad_norm": 0.8769546025557158, "learning_rate": 1.8901664632425753e-05, "loss": 0.6839, "step": 8760 }, { "epoch": 0.15926921400552085, "grad_norm": 0.9349059883477517, "learning_rate": 1.8899037320144804e-05, "loss": 0.6717, "step": 8770 }, { "epoch": 0.15945082086299578, "grad_norm": 0.8841137277494416, "learning_rate": 1.8896407052284694e-05, "loss": 0.6594, "step": 8780 }, { "epoch": 0.15963242772047073, "grad_norm": 0.8719657895373323, "learning_rate": 1.8893773829719006e-05, "loss": 0.6654, "step": 8790 }, { "epoch": 0.15981403457794566, "grad_norm": 0.9393681078022827, "learning_rate": 1.8891137653322283e-05, "loss": 0.6618, "step": 8800 }, { "epoch": 0.1599956414354206, "grad_norm": 0.8907310689317379, "learning_rate": 1.8888498523970073e-05, "loss": 0.6803, "step": 8810 }, { "epoch": 0.16017724829289554, "grad_norm": 0.9566101601547832, "learning_rate": 1.8885856442538887e-05, "loss": 0.6688, "step": 8820 }, { "epoch": 0.1603588551503705, "grad_norm": 0.8741727090264532, "learning_rate": 1.888321140990622e-05, "loss": 0.6747, "step": 8830 }, { "epoch": 0.1605404620078454, "grad_norm": 0.9848620052416092, "learning_rate": 1.8880563426950554e-05, "loss": 0.669, "step": 8840 }, { "epoch": 0.16072206886532037, "grad_norm": 0.9565650029461419, "learning_rate": 1.887791249455134e-05, "loss": 0.6677, "step": 8850 }, { "epoch": 0.1609036757227953, "grad_norm": 0.9176600015272357, "learning_rate": 1.8875258613589024e-05, "loss": 0.6791, "step": 8860 }, { "epoch": 0.16108528258027024, "grad_norm": 0.8968952374478568, "learning_rate": 1.8872601784945014e-05, "loss": 0.66, "step": 8870 }, { "epoch": 0.16126688943774517, "grad_norm": 0.9624123402180444, "learning_rate": 1.886994200950171e-05, "loss": 0.6819, "step": 8880 }, { "epoch": 0.1614484962952201, "grad_norm": 0.9430988417698624, "learning_rate": 1.8867279288142483e-05, "loss": 0.6681, "step": 8890 }, { "epoch": 0.16163010315269505, "grad_norm": 0.8798916851891772, "learning_rate": 1.886461362175169e-05, "loss": 0.6613, "step": 8900 }, { "epoch": 0.16181171001016997, "grad_norm": 0.9024826036372658, "learning_rate": 1.8861945011214655e-05, "loss": 0.6712, "step": 8910 }, { "epoch": 0.16199331686764493, "grad_norm": 0.8827699023074691, "learning_rate": 1.885927345741769e-05, "loss": 0.6714, "step": 8920 }, { "epoch": 0.16217492372511985, "grad_norm": 0.8808292098203342, "learning_rate": 1.885659896124808e-05, "loss": 0.6768, "step": 8930 }, { "epoch": 0.1623565305825948, "grad_norm": 0.8980199626097165, "learning_rate": 1.8853921523594087e-05, "loss": 0.6567, "step": 8940 }, { "epoch": 0.16253813744006973, "grad_norm": 0.8954059765613723, "learning_rate": 1.885124114534495e-05, "loss": 0.6813, "step": 8950 }, { "epoch": 0.16271974429754468, "grad_norm": 0.961018153374235, "learning_rate": 1.8848557827390894e-05, "loss": 0.6661, "step": 8960 }, { "epoch": 0.1629013511550196, "grad_norm": 0.9949000584275438, "learning_rate": 1.8845871570623097e-05, "loss": 0.673, "step": 8970 }, { "epoch": 0.16308295801249456, "grad_norm": 0.941911279835903, "learning_rate": 1.884318237593374e-05, "loss": 0.6693, "step": 8980 }, { "epoch": 0.1632645648699695, "grad_norm": 0.9541296317772576, "learning_rate": 1.8840490244215966e-05, "loss": 0.6724, "step": 8990 }, { "epoch": 0.16344617172744444, "grad_norm": 0.9614852132436511, "learning_rate": 1.883779517636389e-05, "loss": 0.6682, "step": 9000 }, { "epoch": 0.16362777858491936, "grad_norm": 0.908089488923094, "learning_rate": 1.883509717327261e-05, "loss": 0.6673, "step": 9010 }, { "epoch": 0.16380938544239432, "grad_norm": 0.8981301191294633, "learning_rate": 1.8832396235838196e-05, "loss": 0.6577, "step": 9020 }, { "epoch": 0.16399099229986924, "grad_norm": 0.8984352405901469, "learning_rate": 1.882969236495769e-05, "loss": 0.663, "step": 9030 }, { "epoch": 0.16417259915734417, "grad_norm": 0.9322895536383006, "learning_rate": 1.882698556152912e-05, "loss": 0.6728, "step": 9040 }, { "epoch": 0.16435420601481912, "grad_norm": 0.9173145921156606, "learning_rate": 1.8824275826451467e-05, "loss": 0.6731, "step": 9050 }, { "epoch": 0.16453581287229405, "grad_norm": 0.9809628100328398, "learning_rate": 1.8821563160624706e-05, "loss": 0.6782, "step": 9060 }, { "epoch": 0.164717419729769, "grad_norm": 0.9022508483565174, "learning_rate": 1.881884756494978e-05, "loss": 0.6622, "step": 9070 }, { "epoch": 0.16489902658724392, "grad_norm": 0.9109858088149964, "learning_rate": 1.8816129040328587e-05, "loss": 0.6612, "step": 9080 }, { "epoch": 0.16508063344471888, "grad_norm": 0.8963274210874169, "learning_rate": 1.881340758766403e-05, "loss": 0.6763, "step": 9090 }, { "epoch": 0.1652622403021938, "grad_norm": 0.8840389334779856, "learning_rate": 1.8810683207859957e-05, "loss": 0.6682, "step": 9100 }, { "epoch": 0.16544384715966876, "grad_norm": 0.8808059011702626, "learning_rate": 1.8807955901821197e-05, "loss": 0.6678, "step": 9110 }, { "epoch": 0.16562545401714368, "grad_norm": 0.8996857956643703, "learning_rate": 1.8805225670453563e-05, "loss": 0.6649, "step": 9120 }, { "epoch": 0.16580706087461863, "grad_norm": 0.9106229090500998, "learning_rate": 1.880249251466382e-05, "loss": 0.6674, "step": 9130 }, { "epoch": 0.16598866773209356, "grad_norm": 0.9116590606926714, "learning_rate": 1.8799756435359714e-05, "loss": 0.6671, "step": 9140 }, { "epoch": 0.1661702745895685, "grad_norm": 0.9137055099664655, "learning_rate": 1.879701743344996e-05, "loss": 0.6564, "step": 9150 }, { "epoch": 0.16635188144704344, "grad_norm": 0.8916810270399452, "learning_rate": 1.8794275509844254e-05, "loss": 0.6677, "step": 9160 }, { "epoch": 0.1665334883045184, "grad_norm": 0.8874672677797987, "learning_rate": 1.8791530665453247e-05, "loss": 0.6702, "step": 9170 }, { "epoch": 0.16671509516199332, "grad_norm": 0.9285964267268814, "learning_rate": 1.878878290118856e-05, "loss": 0.6592, "step": 9180 }, { "epoch": 0.16689670201946827, "grad_norm": 0.9267200017519825, "learning_rate": 1.87860322179628e-05, "loss": 0.6741, "step": 9190 }, { "epoch": 0.1670783088769432, "grad_norm": 0.9011416335410022, "learning_rate": 1.878327861668953e-05, "loss": 0.6776, "step": 9200 }, { "epoch": 0.16725991573441812, "grad_norm": 0.9136227021496788, "learning_rate": 1.8780522098283284e-05, "loss": 0.6533, "step": 9210 }, { "epoch": 0.16744152259189307, "grad_norm": 0.9053750527389005, "learning_rate": 1.8777762663659568e-05, "loss": 0.6598, "step": 9220 }, { "epoch": 0.167623129449368, "grad_norm": 0.9016869274472938, "learning_rate": 1.8775000313734853e-05, "loss": 0.6614, "step": 9230 }, { "epoch": 0.16780473630684295, "grad_norm": 0.8777438752927479, "learning_rate": 1.877223504942658e-05, "loss": 0.6746, "step": 9240 }, { "epoch": 0.16798634316431788, "grad_norm": 0.9449223535700999, "learning_rate": 1.8769466871653167e-05, "loss": 0.6648, "step": 9250 }, { "epoch": 0.16816795002179283, "grad_norm": 0.9022026103461326, "learning_rate": 1.8766695781333976e-05, "loss": 0.6576, "step": 9260 }, { "epoch": 0.16834955687926775, "grad_norm": 0.8830237200364665, "learning_rate": 1.8763921779389363e-05, "loss": 0.6779, "step": 9270 }, { "epoch": 0.1685311637367427, "grad_norm": 0.9066159272311136, "learning_rate": 1.8761144866740632e-05, "loss": 0.681, "step": 9280 }, { "epoch": 0.16871277059421763, "grad_norm": 0.9126343551891531, "learning_rate": 1.875836504431007e-05, "loss": 0.6638, "step": 9290 }, { "epoch": 0.16889437745169258, "grad_norm": 0.8986030274900095, "learning_rate": 1.8755582313020912e-05, "loss": 0.6626, "step": 9300 }, { "epoch": 0.1690759843091675, "grad_norm": 0.8694866806290495, "learning_rate": 1.875279667379737e-05, "loss": 0.6628, "step": 9310 }, { "epoch": 0.16925759116664246, "grad_norm": 1.0276537346157701, "learning_rate": 1.8750008127564622e-05, "loss": 0.6668, "step": 9320 }, { "epoch": 0.1694391980241174, "grad_norm": 0.9715132763595875, "learning_rate": 1.874721667524881e-05, "loss": 0.6646, "step": 9330 }, { "epoch": 0.16962080488159234, "grad_norm": 0.8976617134594197, "learning_rate": 1.874442231777704e-05, "loss": 0.6591, "step": 9340 }, { "epoch": 0.16980241173906727, "grad_norm": 0.8796075444345451, "learning_rate": 1.8741625056077385e-05, "loss": 0.6639, "step": 9350 }, { "epoch": 0.16998401859654222, "grad_norm": 0.9141711063272232, "learning_rate": 1.8738824891078877e-05, "loss": 0.6787, "step": 9360 }, { "epoch": 0.17016562545401714, "grad_norm": 0.8850182073128398, "learning_rate": 1.8736021823711524e-05, "loss": 0.6524, "step": 9370 }, { "epoch": 0.17034723231149207, "grad_norm": 0.9448374078331703, "learning_rate": 1.8733215854906284e-05, "loss": 0.6792, "step": 9380 }, { "epoch": 0.17052883916896702, "grad_norm": 0.9805796293005677, "learning_rate": 1.8730406985595085e-05, "loss": 0.6612, "step": 9390 }, { "epoch": 0.17071044602644195, "grad_norm": 0.9156595841820467, "learning_rate": 1.8727595216710825e-05, "loss": 0.6661, "step": 9400 }, { "epoch": 0.1708920528839169, "grad_norm": 0.9145953516102162, "learning_rate": 1.8724780549187353e-05, "loss": 0.6666, "step": 9410 }, { "epoch": 0.17107365974139183, "grad_norm": 0.9493658367067921, "learning_rate": 1.8721962983959486e-05, "loss": 0.6762, "step": 9420 }, { "epoch": 0.17125526659886678, "grad_norm": 0.9189488942550526, "learning_rate": 1.8719142521963007e-05, "loss": 0.6795, "step": 9430 }, { "epoch": 0.1714368734563417, "grad_norm": 0.9296260099775718, "learning_rate": 1.871631916413465e-05, "loss": 0.6816, "step": 9440 }, { "epoch": 0.17161848031381666, "grad_norm": 0.9306629927980813, "learning_rate": 1.871349291141213e-05, "loss": 0.6771, "step": 9450 }, { "epoch": 0.17180008717129158, "grad_norm": 0.8992824848334818, "learning_rate": 1.8710663764734104e-05, "loss": 0.6605, "step": 9460 }, { "epoch": 0.17198169402876654, "grad_norm": 0.9212645389081897, "learning_rate": 1.8707831725040198e-05, "loss": 0.6707, "step": 9470 }, { "epoch": 0.17216330088624146, "grad_norm": 0.9495078829522784, "learning_rate": 1.8704996793271e-05, "loss": 0.6793, "step": 9480 }, { "epoch": 0.1723449077437164, "grad_norm": 0.8675663925046528, "learning_rate": 1.870215897036806e-05, "loss": 0.6587, "step": 9490 }, { "epoch": 0.17252651460119134, "grad_norm": 0.8607732468251781, "learning_rate": 1.8699318257273882e-05, "loss": 0.6555, "step": 9500 }, { "epoch": 0.1727081214586663, "grad_norm": 0.9427537135601011, "learning_rate": 1.8696474654931938e-05, "loss": 0.6799, "step": 9510 }, { "epoch": 0.17288972831614122, "grad_norm": 0.895899093611135, "learning_rate": 1.869362816428665e-05, "loss": 0.6718, "step": 9520 }, { "epoch": 0.17307133517361614, "grad_norm": 0.8716989880853256, "learning_rate": 1.8690778786283406e-05, "loss": 0.6821, "step": 9530 }, { "epoch": 0.1732529420310911, "grad_norm": 0.9058009163051288, "learning_rate": 1.8687926521868555e-05, "loss": 0.6642, "step": 9540 }, { "epoch": 0.17343454888856602, "grad_norm": 0.8590823277509672, "learning_rate": 1.8685071371989392e-05, "loss": 0.6647, "step": 9550 }, { "epoch": 0.17361615574604097, "grad_norm": 0.9031360677864261, "learning_rate": 1.8682213337594188e-05, "loss": 0.6562, "step": 9560 }, { "epoch": 0.1737977626035159, "grad_norm": 0.9294287211362984, "learning_rate": 1.8679352419632158e-05, "loss": 0.6689, "step": 9570 }, { "epoch": 0.17397936946099085, "grad_norm": 0.9220826065526591, "learning_rate": 1.8676488619053484e-05, "loss": 0.6817, "step": 9580 }, { "epoch": 0.17416097631846578, "grad_norm": 0.9102222323008147, "learning_rate": 1.8673621936809303e-05, "loss": 0.6651, "step": 9590 }, { "epoch": 0.17434258317594073, "grad_norm": 0.9033499059001578, "learning_rate": 1.8670752373851703e-05, "loss": 0.6603, "step": 9600 }, { "epoch": 0.17452419003341565, "grad_norm": 0.9264429584861695, "learning_rate": 1.866787993113373e-05, "loss": 0.6668, "step": 9610 }, { "epoch": 0.1747057968908906, "grad_norm": 0.9215895179685631, "learning_rate": 1.8665004609609395e-05, "loss": 0.6643, "step": 9620 }, { "epoch": 0.17488740374836553, "grad_norm": 0.9194187868979737, "learning_rate": 1.8662126410233662e-05, "loss": 0.6785, "step": 9630 }, { "epoch": 0.17506901060584049, "grad_norm": 0.9256858618984054, "learning_rate": 1.8659245333962444e-05, "loss": 0.6647, "step": 9640 }, { "epoch": 0.1752506174633154, "grad_norm": 0.8958197888646496, "learning_rate": 1.8656361381752616e-05, "loss": 0.6671, "step": 9650 }, { "epoch": 0.17543222432079036, "grad_norm": 0.8808678293678387, "learning_rate": 1.8653474554562004e-05, "loss": 0.6614, "step": 9660 }, { "epoch": 0.1756138311782653, "grad_norm": 0.8886734319435483, "learning_rate": 1.8650584853349394e-05, "loss": 0.6603, "step": 9670 }, { "epoch": 0.17579543803574024, "grad_norm": 0.9048895635916631, "learning_rate": 1.864769227907452e-05, "loss": 0.6777, "step": 9680 }, { "epoch": 0.17597704489321517, "grad_norm": 0.9162705465355842, "learning_rate": 1.8644796832698077e-05, "loss": 0.6735, "step": 9690 }, { "epoch": 0.1761586517506901, "grad_norm": 0.9605413913671322, "learning_rate": 1.8641898515181715e-05, "loss": 0.6762, "step": 9700 }, { "epoch": 0.17634025860816505, "grad_norm": 0.8460590803534965, "learning_rate": 1.863899732748802e-05, "loss": 0.6621, "step": 9710 }, { "epoch": 0.17652186546563997, "grad_norm": 0.9197395130166192, "learning_rate": 1.863609327058056e-05, "loss": 0.6572, "step": 9720 }, { "epoch": 0.17670347232311492, "grad_norm": 0.8852159071772866, "learning_rate": 1.8633186345423825e-05, "loss": 0.6692, "step": 9730 }, { "epoch": 0.17688507918058985, "grad_norm": 0.8767566806082937, "learning_rate": 1.8630276552983286e-05, "loss": 0.6678, "step": 9740 }, { "epoch": 0.1770666860380648, "grad_norm": 0.9056445133163169, "learning_rate": 1.8627363894225347e-05, "loss": 0.6607, "step": 9750 }, { "epoch": 0.17724829289553973, "grad_norm": 0.89717685825169, "learning_rate": 1.862444837011737e-05, "loss": 0.686, "step": 9760 }, { "epoch": 0.17742989975301468, "grad_norm": 0.869140712303309, "learning_rate": 1.8621529981627672e-05, "loss": 0.6646, "step": 9770 }, { "epoch": 0.1776115066104896, "grad_norm": 0.9366283333108916, "learning_rate": 1.861860872972552e-05, "loss": 0.6734, "step": 9780 }, { "epoch": 0.17779311346796456, "grad_norm": 0.9023665085471634, "learning_rate": 1.8615684615381123e-05, "loss": 0.6797, "step": 9790 }, { "epoch": 0.17797472032543948, "grad_norm": 0.881689273807554, "learning_rate": 1.861275763956566e-05, "loss": 0.6731, "step": 9800 }, { "epoch": 0.17815632718291444, "grad_norm": 0.8892491673330979, "learning_rate": 1.8609827803251234e-05, "loss": 0.6616, "step": 9810 }, { "epoch": 0.17833793404038936, "grad_norm": 0.8646432664942649, "learning_rate": 1.860689510741092e-05, "loss": 0.6439, "step": 9820 }, { "epoch": 0.17851954089786432, "grad_norm": 0.8870649462494121, "learning_rate": 1.8603959553018736e-05, "loss": 0.6733, "step": 9830 }, { "epoch": 0.17870114775533924, "grad_norm": 0.9126257395397516, "learning_rate": 1.8601021141049645e-05, "loss": 0.6506, "step": 9840 }, { "epoch": 0.1788827546128142, "grad_norm": 0.8925984360549715, "learning_rate": 1.8598079872479565e-05, "loss": 0.6606, "step": 9850 }, { "epoch": 0.17906436147028912, "grad_norm": 0.8769590911169747, "learning_rate": 1.8595135748285362e-05, "loss": 0.6753, "step": 9860 }, { "epoch": 0.17924596832776404, "grad_norm": 0.8675393814676615, "learning_rate": 1.859218876944484e-05, "loss": 0.6766, "step": 9870 }, { "epoch": 0.179427575185239, "grad_norm": 0.8901731027654227, "learning_rate": 1.8589238936936772e-05, "loss": 0.6647, "step": 9880 }, { "epoch": 0.17960918204271392, "grad_norm": 0.8563360046078867, "learning_rate": 1.858628625174086e-05, "loss": 0.6563, "step": 9890 }, { "epoch": 0.17979078890018887, "grad_norm": 0.9079977914807117, "learning_rate": 1.858333071483776e-05, "loss": 0.6713, "step": 9900 }, { "epoch": 0.1799723957576638, "grad_norm": 0.9224389784980636, "learning_rate": 1.8580372327209077e-05, "loss": 0.6633, "step": 9910 }, { "epoch": 0.18015400261513875, "grad_norm": 0.8842882149034061, "learning_rate": 1.8577411089837357e-05, "loss": 0.6453, "step": 9920 }, { "epoch": 0.18033560947261368, "grad_norm": 0.8944994894337551, "learning_rate": 1.8574447003706103e-05, "loss": 0.6709, "step": 9930 }, { "epoch": 0.18051721633008863, "grad_norm": 0.9308559493404445, "learning_rate": 1.8571480069799755e-05, "loss": 0.6641, "step": 9940 }, { "epoch": 0.18069882318756356, "grad_norm": 0.9352761667807539, "learning_rate": 1.85685102891037e-05, "loss": 0.6674, "step": 9950 }, { "epoch": 0.1808804300450385, "grad_norm": 0.9029152478657403, "learning_rate": 1.856553766260427e-05, "loss": 0.67, "step": 9960 }, { "epoch": 0.18106203690251343, "grad_norm": 0.9208663058457088, "learning_rate": 1.8562562191288747e-05, "loss": 0.6626, "step": 9970 }, { "epoch": 0.1812436437599884, "grad_norm": 0.8966137520839604, "learning_rate": 1.8559583876145356e-05, "loss": 0.6552, "step": 9980 }, { "epoch": 0.1814252506174633, "grad_norm": 0.8621015341665488, "learning_rate": 1.855660271816326e-05, "loss": 0.6535, "step": 9990 }, { "epoch": 0.18160685747493827, "grad_norm": 0.9500149424365281, "learning_rate": 1.8553618718332577e-05, "loss": 0.6596, "step": 10000 }, { "epoch": 0.1817884643324132, "grad_norm": 0.8804741419070612, "learning_rate": 1.8550631877644362e-05, "loss": 0.6742, "step": 10010 }, { "epoch": 0.18197007118988814, "grad_norm": 0.9235700542006762, "learning_rate": 1.8547642197090618e-05, "loss": 0.6599, "step": 10020 }, { "epoch": 0.18215167804736307, "grad_norm": 0.9266912298323196, "learning_rate": 1.8544649677664277e-05, "loss": 0.6558, "step": 10030 }, { "epoch": 0.182333284904838, "grad_norm": 0.9020396707330713, "learning_rate": 1.8541654320359238e-05, "loss": 0.6496, "step": 10040 }, { "epoch": 0.18251489176231295, "grad_norm": 0.8668496818419269, "learning_rate": 1.853865612617032e-05, "loss": 0.6576, "step": 10050 }, { "epoch": 0.18269649861978787, "grad_norm": 0.9137373151483905, "learning_rate": 1.8535655096093302e-05, "loss": 0.6484, "step": 10060 }, { "epoch": 0.18287810547726283, "grad_norm": 0.9060559886278975, "learning_rate": 1.853265123112489e-05, "loss": 0.6743, "step": 10070 }, { "epoch": 0.18305971233473775, "grad_norm": 0.9048618752990223, "learning_rate": 1.8529644532262738e-05, "loss": 0.681, "step": 10080 }, { "epoch": 0.1832413191922127, "grad_norm": 0.9337406622422129, "learning_rate": 1.8526635000505444e-05, "loss": 0.6684, "step": 10090 }, { "epoch": 0.18342292604968763, "grad_norm": 0.8551684444882508, "learning_rate": 1.8523622636852547e-05, "loss": 0.6707, "step": 10100 }, { "epoch": 0.18360453290716258, "grad_norm": 0.8891747145658401, "learning_rate": 1.8520607442304518e-05, "loss": 0.6637, "step": 10110 }, { "epoch": 0.1837861397646375, "grad_norm": 0.8575370712970501, "learning_rate": 1.8517589417862773e-05, "loss": 0.6524, "step": 10120 }, { "epoch": 0.18396774662211246, "grad_norm": 0.8423797698184826, "learning_rate": 1.8514568564529675e-05, "loss": 0.6672, "step": 10130 }, { "epoch": 0.18414935347958739, "grad_norm": 0.8841264456450905, "learning_rate": 1.8511544883308517e-05, "loss": 0.6614, "step": 10140 }, { "epoch": 0.18433096033706234, "grad_norm": 0.9095734067280564, "learning_rate": 1.8508518375203534e-05, "loss": 0.6666, "step": 10150 }, { "epoch": 0.18451256719453726, "grad_norm": 0.9144025748681253, "learning_rate": 1.85054890412199e-05, "loss": 0.6571, "step": 10160 }, { "epoch": 0.18469417405201222, "grad_norm": 0.8948024661286501, "learning_rate": 1.850245688236373e-05, "loss": 0.6683, "step": 10170 }, { "epoch": 0.18487578090948714, "grad_norm": 0.9020656549918618, "learning_rate": 1.8499421899642077e-05, "loss": 0.6582, "step": 10180 }, { "epoch": 0.18505738776696207, "grad_norm": 0.8612534538978499, "learning_rate": 1.849638409406292e-05, "loss": 0.6547, "step": 10190 }, { "epoch": 0.18523899462443702, "grad_norm": 0.9345768119794872, "learning_rate": 1.84933434666352e-05, "loss": 0.6715, "step": 10200 }, { "epoch": 0.18542060148191195, "grad_norm": 0.8852839414991407, "learning_rate": 1.8490300018368768e-05, "loss": 0.655, "step": 10210 }, { "epoch": 0.1856022083393869, "grad_norm": 0.910851675432073, "learning_rate": 1.8487253750274435e-05, "loss": 0.6597, "step": 10220 }, { "epoch": 0.18578381519686182, "grad_norm": 0.844096662249635, "learning_rate": 1.848420466336393e-05, "loss": 0.671, "step": 10230 }, { "epoch": 0.18596542205433678, "grad_norm": 0.9084059731787958, "learning_rate": 1.848115275864993e-05, "loss": 0.6601, "step": 10240 }, { "epoch": 0.1861470289118117, "grad_norm": 0.8991548177129566, "learning_rate": 1.847809803714604e-05, "loss": 0.6775, "step": 10250 }, { "epoch": 0.18632863576928665, "grad_norm": 0.8796857670756669, "learning_rate": 1.8475040499866815e-05, "loss": 0.6629, "step": 10260 }, { "epoch": 0.18651024262676158, "grad_norm": 0.8858051322561173, "learning_rate": 1.847198014782772e-05, "loss": 0.6593, "step": 10270 }, { "epoch": 0.18669184948423653, "grad_norm": 0.8692401888821515, "learning_rate": 1.8468916982045184e-05, "loss": 0.6651, "step": 10280 }, { "epoch": 0.18687345634171146, "grad_norm": 0.9297669267005013, "learning_rate": 1.8465851003536547e-05, "loss": 0.6545, "step": 10290 }, { "epoch": 0.1870550631991864, "grad_norm": 0.8912053671928187, "learning_rate": 1.8462782213320097e-05, "loss": 0.672, "step": 10300 }, { "epoch": 0.18723667005666134, "grad_norm": 0.8761843670165088, "learning_rate": 1.845971061241505e-05, "loss": 0.6653, "step": 10310 }, { "epoch": 0.1874182769141363, "grad_norm": 0.8742662745579617, "learning_rate": 1.845663620184156e-05, "loss": 0.6573, "step": 10320 }, { "epoch": 0.18759988377161121, "grad_norm": 0.9150496847881198, "learning_rate": 1.84535589826207e-05, "loss": 0.6719, "step": 10330 }, { "epoch": 0.18778149062908617, "grad_norm": 0.86766582991689, "learning_rate": 1.8450478955774497e-05, "loss": 0.6623, "step": 10340 }, { "epoch": 0.1879630974865611, "grad_norm": 0.8602352195216588, "learning_rate": 1.8447396122325898e-05, "loss": 0.6691, "step": 10350 }, { "epoch": 0.18814470434403602, "grad_norm": 0.91367675151933, "learning_rate": 1.8444310483298785e-05, "loss": 0.6768, "step": 10360 }, { "epoch": 0.18832631120151097, "grad_norm": 0.8962801414455497, "learning_rate": 1.8441222039717967e-05, "loss": 0.6616, "step": 10370 }, { "epoch": 0.1885079180589859, "grad_norm": 0.9270205774197331, "learning_rate": 1.843813079260919e-05, "loss": 0.6594, "step": 10380 }, { "epoch": 0.18868952491646085, "grad_norm": 0.9319739831032364, "learning_rate": 1.8435036742999138e-05, "loss": 0.6711, "step": 10390 }, { "epoch": 0.18887113177393577, "grad_norm": 0.9024058327046711, "learning_rate": 1.8431939891915406e-05, "loss": 0.6699, "step": 10400 }, { "epoch": 0.18905273863141073, "grad_norm": 0.8749038098164967, "learning_rate": 1.8428840240386536e-05, "loss": 0.667, "step": 10410 }, { "epoch": 0.18923434548888565, "grad_norm": 0.9034090922228188, "learning_rate": 1.8425737789441998e-05, "loss": 0.6662, "step": 10420 }, { "epoch": 0.1894159523463606, "grad_norm": 0.900872465064203, "learning_rate": 1.842263254011218e-05, "loss": 0.6653, "step": 10430 }, { "epoch": 0.18959755920383553, "grad_norm": 0.8663015396060103, "learning_rate": 1.841952449342842e-05, "loss": 0.6661, "step": 10440 }, { "epoch": 0.18977916606131048, "grad_norm": 0.9185208020362338, "learning_rate": 1.841641365042296e-05, "loss": 0.6687, "step": 10450 }, { "epoch": 0.1899607729187854, "grad_norm": 0.9089675879831727, "learning_rate": 1.8413300012128995e-05, "loss": 0.6649, "step": 10460 }, { "epoch": 0.19014237977626036, "grad_norm": 0.895262693543109, "learning_rate": 1.841018357958063e-05, "loss": 0.6666, "step": 10470 }, { "epoch": 0.1903239866337353, "grad_norm": 0.9129866887789162, "learning_rate": 1.8407064353812913e-05, "loss": 0.6622, "step": 10480 }, { "epoch": 0.19050559349121024, "grad_norm": 0.8330474481777989, "learning_rate": 1.84039423358618e-05, "loss": 0.6673, "step": 10490 }, { "epoch": 0.19068720034868517, "grad_norm": 0.957398097032084, "learning_rate": 1.84008175267642e-05, "loss": 0.6687, "step": 10500 }, { "epoch": 0.19086880720616012, "grad_norm": 0.8617094988507991, "learning_rate": 1.8397689927557932e-05, "loss": 0.6713, "step": 10510 }, { "epoch": 0.19105041406363504, "grad_norm": 0.8696448968837681, "learning_rate": 1.8394559539281737e-05, "loss": 0.6581, "step": 10520 }, { "epoch": 0.19123202092110997, "grad_norm": 0.9317998299175133, "learning_rate": 1.8391426362975298e-05, "loss": 0.6526, "step": 10530 }, { "epoch": 0.19141362777858492, "grad_norm": 0.8870517302758233, "learning_rate": 1.8388290399679216e-05, "loss": 0.6648, "step": 10540 }, { "epoch": 0.19159523463605985, "grad_norm": 0.8955745837990094, "learning_rate": 1.8385151650435015e-05, "loss": 0.6538, "step": 10550 }, { "epoch": 0.1917768414935348, "grad_norm": 0.8637710697055085, "learning_rate": 1.838201011628515e-05, "loss": 0.656, "step": 10560 }, { "epoch": 0.19195844835100973, "grad_norm": 0.8556017772660319, "learning_rate": 1.8378865798272993e-05, "loss": 0.6578, "step": 10570 }, { "epoch": 0.19214005520848468, "grad_norm": 0.8632630469825513, "learning_rate": 1.8375718697442853e-05, "loss": 0.6619, "step": 10580 }, { "epoch": 0.1923216620659596, "grad_norm": 0.9227248997068084, "learning_rate": 1.8372568814839952e-05, "loss": 0.6636, "step": 10590 }, { "epoch": 0.19250326892343456, "grad_norm": 0.8471877549456798, "learning_rate": 1.8369416151510445e-05, "loss": 0.6729, "step": 10600 }, { "epoch": 0.19268487578090948, "grad_norm": 0.9131225358173415, "learning_rate": 1.8366260708501394e-05, "loss": 0.6586, "step": 10610 }, { "epoch": 0.19286648263838443, "grad_norm": 0.8832818669019323, "learning_rate": 1.836310248686081e-05, "loss": 0.665, "step": 10620 }, { "epoch": 0.19304808949585936, "grad_norm": 0.939655191283715, "learning_rate": 1.8359941487637602e-05, "loss": 0.6633, "step": 10630 }, { "epoch": 0.1932296963533343, "grad_norm": 0.9189417861109649, "learning_rate": 1.8356777711881614e-05, "loss": 0.6562, "step": 10640 }, { "epoch": 0.19341130321080924, "grad_norm": 0.8698806332775636, "learning_rate": 1.8353611160643613e-05, "loss": 0.6657, "step": 10650 }, { "epoch": 0.1935929100682842, "grad_norm": 0.8710115166467098, "learning_rate": 1.8350441834975283e-05, "loss": 0.6708, "step": 10660 }, { "epoch": 0.19377451692575912, "grad_norm": 0.8530389250736495, "learning_rate": 1.8347269735929233e-05, "loss": 0.6585, "step": 10670 }, { "epoch": 0.19395612378323404, "grad_norm": 0.8630120134448251, "learning_rate": 1.8344094864558987e-05, "loss": 0.6504, "step": 10680 }, { "epoch": 0.194137730640709, "grad_norm": 0.9069846027583208, "learning_rate": 1.8340917221919002e-05, "loss": 0.6628, "step": 10690 }, { "epoch": 0.19431933749818392, "grad_norm": 0.8548154848686844, "learning_rate": 1.8337736809064642e-05, "loss": 0.6625, "step": 10700 }, { "epoch": 0.19450094435565887, "grad_norm": 0.8952896738080183, "learning_rate": 1.83345536270522e-05, "loss": 0.6645, "step": 10710 }, { "epoch": 0.1946825512131338, "grad_norm": 0.8503612840867971, "learning_rate": 1.8331367676938884e-05, "loss": 0.6612, "step": 10720 }, { "epoch": 0.19486415807060875, "grad_norm": 0.871120684627442, "learning_rate": 1.8328178959782822e-05, "loss": 0.6616, "step": 10730 }, { "epoch": 0.19504576492808368, "grad_norm": 0.863367632002504, "learning_rate": 1.8324987476643064e-05, "loss": 0.6649, "step": 10740 }, { "epoch": 0.19522737178555863, "grad_norm": 0.8896350415058336, "learning_rate": 1.832179322857957e-05, "loss": 0.6624, "step": 10750 }, { "epoch": 0.19540897864303355, "grad_norm": 0.9103703372882971, "learning_rate": 1.8318596216653234e-05, "loss": 0.6818, "step": 10760 }, { "epoch": 0.1955905855005085, "grad_norm": 0.9325189747732964, "learning_rate": 1.8315396441925854e-05, "loss": 0.6622, "step": 10770 }, { "epoch": 0.19577219235798343, "grad_norm": 0.903186609402833, "learning_rate": 1.831219390546015e-05, "loss": 0.663, "step": 10780 }, { "epoch": 0.19595379921545839, "grad_norm": 0.8847752748181842, "learning_rate": 1.830898860831976e-05, "loss": 0.655, "step": 10790 }, { "epoch": 0.1961354060729333, "grad_norm": 0.93151592418549, "learning_rate": 1.8305780551569236e-05, "loss": 0.6617, "step": 10800 }, { "epoch": 0.19631701293040826, "grad_norm": 0.8970888711619099, "learning_rate": 1.8302569736274057e-05, "loss": 0.6615, "step": 10810 }, { "epoch": 0.1964986197878832, "grad_norm": 0.8979609587766876, "learning_rate": 1.82993561635006e-05, "loss": 0.6547, "step": 10820 }, { "epoch": 0.19668022664535814, "grad_norm": 0.8826214490641865, "learning_rate": 1.8296139834316177e-05, "loss": 0.6727, "step": 10830 }, { "epoch": 0.19686183350283307, "grad_norm": 0.903569327863966, "learning_rate": 1.8292920749788998e-05, "loss": 0.6679, "step": 10840 }, { "epoch": 0.197043440360308, "grad_norm": 0.9236103316890073, "learning_rate": 1.82896989109882e-05, "loss": 0.6809, "step": 10850 }, { "epoch": 0.19722504721778295, "grad_norm": 0.871067352200625, "learning_rate": 1.8286474318983837e-05, "loss": 0.6636, "step": 10860 }, { "epoch": 0.19740665407525787, "grad_norm": 0.8752118593901772, "learning_rate": 1.828324697484686e-05, "loss": 0.6634, "step": 10870 }, { "epoch": 0.19758826093273282, "grad_norm": 2.0824418301541074, "learning_rate": 1.8280016879649155e-05, "loss": 0.6604, "step": 10880 }, { "epoch": 0.19776986779020775, "grad_norm": 0.9197639419241672, "learning_rate": 1.827678403446351e-05, "loss": 0.6635, "step": 10890 }, { "epoch": 0.1979514746476827, "grad_norm": 0.9312343884973956, "learning_rate": 1.827354844036363e-05, "loss": 0.6698, "step": 10900 }, { "epoch": 0.19813308150515763, "grad_norm": 0.9036794253804332, "learning_rate": 1.8270310098424128e-05, "loss": 0.6641, "step": 10910 }, { "epoch": 0.19831468836263258, "grad_norm": 0.9106393192737863, "learning_rate": 1.826706900972054e-05, "loss": 0.6733, "step": 10920 }, { "epoch": 0.1984962952201075, "grad_norm": 0.9058523978706446, "learning_rate": 1.8263825175329296e-05, "loss": 0.6526, "step": 10930 }, { "epoch": 0.19867790207758246, "grad_norm": 0.8928345149848897, "learning_rate": 1.8260578596327762e-05, "loss": 0.6611, "step": 10940 }, { "epoch": 0.19885950893505738, "grad_norm": 0.9052593441073535, "learning_rate": 1.8257329273794196e-05, "loss": 0.662, "step": 10950 }, { "epoch": 0.19904111579253234, "grad_norm": 0.8985753645249634, "learning_rate": 1.8254077208807776e-05, "loss": 0.663, "step": 10960 }, { "epoch": 0.19922272265000726, "grad_norm": 0.852926018252108, "learning_rate": 1.8250822402448587e-05, "loss": 0.6639, "step": 10970 }, { "epoch": 0.19940432950748221, "grad_norm": 0.9316001392324651, "learning_rate": 1.824756485579763e-05, "loss": 0.6646, "step": 10980 }, { "epoch": 0.19958593636495714, "grad_norm": 0.8896212541531933, "learning_rate": 1.8244304569936813e-05, "loss": 0.6438, "step": 10990 }, { "epoch": 0.1997675432224321, "grad_norm": 0.8743559669967436, "learning_rate": 1.824104154594895e-05, "loss": 0.6601, "step": 11000 }, { "epoch": 0.19994915007990702, "grad_norm": 0.8464293277637592, "learning_rate": 1.823777578491777e-05, "loss": 0.6674, "step": 11010 }, { "epoch": 0.20013075693738194, "grad_norm": 0.8884024658870917, "learning_rate": 1.823450728792791e-05, "loss": 0.6782, "step": 11020 }, { "epoch": 0.2003123637948569, "grad_norm": 1.1031110810202045, "learning_rate": 1.8231236056064915e-05, "loss": 0.6555, "step": 11030 }, { "epoch": 0.20049397065233182, "grad_norm": 0.8668006473464908, "learning_rate": 1.8227962090415233e-05, "loss": 0.6568, "step": 11040 }, { "epoch": 0.20067557750980677, "grad_norm": 0.8549202866370879, "learning_rate": 1.822468539206623e-05, "loss": 0.672, "step": 11050 }, { "epoch": 0.2008571843672817, "grad_norm": 0.867245127069732, "learning_rate": 1.822140596210617e-05, "loss": 0.6586, "step": 11060 }, { "epoch": 0.20103879122475665, "grad_norm": 0.8680590376133601, "learning_rate": 1.821812380162423e-05, "loss": 0.6492, "step": 11070 }, { "epoch": 0.20122039808223158, "grad_norm": 0.9022281326591903, "learning_rate": 1.8214838911710497e-05, "loss": 0.6634, "step": 11080 }, { "epoch": 0.20140200493970653, "grad_norm": 0.8849639974025364, "learning_rate": 1.8211551293455953e-05, "loss": 0.6742, "step": 11090 }, { "epoch": 0.20158361179718146, "grad_norm": 0.9426623904285061, "learning_rate": 1.82082609479525e-05, "loss": 0.6641, "step": 11100 }, { "epoch": 0.2017652186546564, "grad_norm": 0.8497466218790415, "learning_rate": 1.8204967876292934e-05, "loss": 0.651, "step": 11110 }, { "epoch": 0.20194682551213133, "grad_norm": 0.8598770707744482, "learning_rate": 1.8201672079570965e-05, "loss": 0.6545, "step": 11120 }, { "epoch": 0.2021284323696063, "grad_norm": 0.9115586496191539, "learning_rate": 1.8198373558881203e-05, "loss": 0.6698, "step": 11130 }, { "epoch": 0.2023100392270812, "grad_norm": 0.917563188345291, "learning_rate": 1.8195072315319156e-05, "loss": 0.6777, "step": 11140 }, { "epoch": 0.20249164608455616, "grad_norm": 0.9029790698886566, "learning_rate": 1.819176834998126e-05, "loss": 0.6503, "step": 11150 }, { "epoch": 0.2026732529420311, "grad_norm": 0.910979663965766, "learning_rate": 1.818846166396483e-05, "loss": 0.67, "step": 11160 }, { "epoch": 0.20285485979950604, "grad_norm": 0.8328299442516779, "learning_rate": 1.8185152258368094e-05, "loss": 0.6519, "step": 11170 }, { "epoch": 0.20303646665698097, "grad_norm": 0.9058063414450518, "learning_rate": 1.8181840134290184e-05, "loss": 0.6739, "step": 11180 }, { "epoch": 0.2032180735144559, "grad_norm": 0.851673650697749, "learning_rate": 1.817852529283114e-05, "loss": 0.6579, "step": 11190 }, { "epoch": 0.20339968037193085, "grad_norm": 0.858735026754962, "learning_rate": 1.817520773509189e-05, "loss": 0.665, "step": 11200 }, { "epoch": 0.20358128722940577, "grad_norm": 0.8566961344113166, "learning_rate": 1.8171887462174277e-05, "loss": 0.6631, "step": 11210 }, { "epoch": 0.20376289408688072, "grad_norm": 0.8824919848431362, "learning_rate": 1.816856447518104e-05, "loss": 0.6567, "step": 11220 }, { "epoch": 0.20394450094435565, "grad_norm": 0.8779265472884943, "learning_rate": 1.8165238775215826e-05, "loss": 0.6626, "step": 11230 }, { "epoch": 0.2041261078018306, "grad_norm": 0.8989044356909198, "learning_rate": 1.816191036338317e-05, "loss": 0.6559, "step": 11240 }, { "epoch": 0.20430771465930553, "grad_norm": 0.8507182314152735, "learning_rate": 1.815857924078852e-05, "loss": 0.6595, "step": 11250 }, { "epoch": 0.20448932151678048, "grad_norm": 0.8653544929856848, "learning_rate": 1.8155245408538222e-05, "loss": 0.6615, "step": 11260 }, { "epoch": 0.2046709283742554, "grad_norm": 0.9023195705046002, "learning_rate": 1.8151908867739515e-05, "loss": 0.6564, "step": 11270 }, { "epoch": 0.20485253523173036, "grad_norm": 0.9092019456518251, "learning_rate": 1.8148569619500548e-05, "loss": 0.6676, "step": 11280 }, { "epoch": 0.20503414208920528, "grad_norm": 0.876692767369164, "learning_rate": 1.8145227664930358e-05, "loss": 0.6629, "step": 11290 }, { "epoch": 0.20521574894668024, "grad_norm": 0.9031844424580083, "learning_rate": 1.814188300513889e-05, "loss": 0.6639, "step": 11300 }, { "epoch": 0.20539735580415516, "grad_norm": 0.8839045425881437, "learning_rate": 1.8138535641236984e-05, "loss": 0.66, "step": 11310 }, { "epoch": 0.20557896266163012, "grad_norm": 0.8708152634511429, "learning_rate": 1.813518557433638e-05, "loss": 0.6739, "step": 11320 }, { "epoch": 0.20576056951910504, "grad_norm": 0.872667923009569, "learning_rate": 1.8131832805549708e-05, "loss": 0.6583, "step": 11330 }, { "epoch": 0.20594217637657997, "grad_norm": 0.9093617370467278, "learning_rate": 1.8128477335990507e-05, "loss": 0.6665, "step": 11340 }, { "epoch": 0.20612378323405492, "grad_norm": 0.8682876320649524, "learning_rate": 1.81251191667732e-05, "loss": 0.6589, "step": 11350 }, { "epoch": 0.20630539009152984, "grad_norm": 0.8570747361349121, "learning_rate": 1.8121758299013122e-05, "loss": 0.6601, "step": 11360 }, { "epoch": 0.2064869969490048, "grad_norm": 0.9040108800909268, "learning_rate": 1.8118394733826492e-05, "loss": 0.6499, "step": 11370 }, { "epoch": 0.20666860380647972, "grad_norm": 0.9055987223514259, "learning_rate": 1.8115028472330432e-05, "loss": 0.6442, "step": 11380 }, { "epoch": 0.20685021066395468, "grad_norm": 0.8747097487403799, "learning_rate": 1.811165951564295e-05, "loss": 0.6434, "step": 11390 }, { "epoch": 0.2070318175214296, "grad_norm": 0.8707645395275526, "learning_rate": 1.8108287864882962e-05, "loss": 0.6706, "step": 11400 }, { "epoch": 0.20721342437890455, "grad_norm": 0.889081517766749, "learning_rate": 1.810491352117027e-05, "loss": 0.6585, "step": 11410 }, { "epoch": 0.20739503123637948, "grad_norm": 0.9326314635521358, "learning_rate": 1.8101536485625576e-05, "loss": 0.6595, "step": 11420 }, { "epoch": 0.20757663809385443, "grad_norm": 0.8842118345948157, "learning_rate": 1.8098156759370464e-05, "loss": 0.6706, "step": 11430 }, { "epoch": 0.20775824495132936, "grad_norm": 0.899331927229011, "learning_rate": 1.809477434352743e-05, "loss": 0.6647, "step": 11440 }, { "epoch": 0.2079398518088043, "grad_norm": 0.8885048057777032, "learning_rate": 1.809138923921985e-05, "loss": 0.6575, "step": 11450 }, { "epoch": 0.20812145866627924, "grad_norm": 0.8839498316013631, "learning_rate": 1.808800144757199e-05, "loss": 0.6729, "step": 11460 }, { "epoch": 0.2083030655237542, "grad_norm": 0.9064322070474584, "learning_rate": 1.8084610969709024e-05, "loss": 0.677, "step": 11470 }, { "epoch": 0.2084846723812291, "grad_norm": 0.8072248050564691, "learning_rate": 1.808121780675701e-05, "loss": 0.6491, "step": 11480 }, { "epoch": 0.20866627923870407, "grad_norm": 0.8724046145929641, "learning_rate": 1.8077821959842888e-05, "loss": 0.6568, "step": 11490 }, { "epoch": 0.208847886096179, "grad_norm": 0.869750204496696, "learning_rate": 1.807442343009451e-05, "loss": 0.6644, "step": 11500 }, { "epoch": 0.20902949295365392, "grad_norm": 0.8663161497315843, "learning_rate": 1.80710222186406e-05, "loss": 0.6618, "step": 11510 }, { "epoch": 0.20921109981112887, "grad_norm": 0.8801568304858264, "learning_rate": 1.8067618326610777e-05, "loss": 0.6592, "step": 11520 }, { "epoch": 0.2093927066686038, "grad_norm": 0.8615085740910665, "learning_rate": 1.8064211755135567e-05, "loss": 0.6462, "step": 11530 }, { "epoch": 0.20957431352607875, "grad_norm": 0.846215124623668, "learning_rate": 1.8060802505346355e-05, "loss": 0.664, "step": 11540 }, { "epoch": 0.20975592038355367, "grad_norm": 0.8820384446936922, "learning_rate": 1.8057390578375445e-05, "loss": 0.6674, "step": 11550 }, { "epoch": 0.20993752724102863, "grad_norm": 0.8850321637838668, "learning_rate": 1.8053975975356015e-05, "loss": 0.664, "step": 11560 }, { "epoch": 0.21011913409850355, "grad_norm": 0.8982250386030642, "learning_rate": 1.8050558697422132e-05, "loss": 0.6556, "step": 11570 }, { "epoch": 0.2103007409559785, "grad_norm": 0.8714401420633962, "learning_rate": 1.8047138745708758e-05, "loss": 0.647, "step": 11580 }, { "epoch": 0.21048234781345343, "grad_norm": 0.8699058213676436, "learning_rate": 1.804371612135174e-05, "loss": 0.6601, "step": 11590 }, { "epoch": 0.21066395467092838, "grad_norm": 0.8505900181161022, "learning_rate": 1.8040290825487807e-05, "loss": 0.6559, "step": 11600 }, { "epoch": 0.2108455615284033, "grad_norm": 0.9038161393623574, "learning_rate": 1.8036862859254582e-05, "loss": 0.6599, "step": 11610 }, { "epoch": 0.21102716838587826, "grad_norm": 0.863412131567686, "learning_rate": 1.8033432223790574e-05, "loss": 0.6585, "step": 11620 }, { "epoch": 0.2112087752433532, "grad_norm": 0.838554413902621, "learning_rate": 1.8029998920235177e-05, "loss": 0.6461, "step": 11630 }, { "epoch": 0.21139038210082814, "grad_norm": 0.8705343090424966, "learning_rate": 1.8026562949728676e-05, "loss": 0.6555, "step": 11640 }, { "epoch": 0.21157198895830306, "grad_norm": 0.9021819439343641, "learning_rate": 1.802312431341223e-05, "loss": 0.6584, "step": 11650 }, { "epoch": 0.21175359581577802, "grad_norm": 0.8785346864582364, "learning_rate": 1.8019683012427894e-05, "loss": 0.6641, "step": 11660 }, { "epoch": 0.21193520267325294, "grad_norm": 0.8818418113353871, "learning_rate": 1.8016239047918604e-05, "loss": 0.6473, "step": 11670 }, { "epoch": 0.21211680953072787, "grad_norm": 0.8750419891883443, "learning_rate": 1.8012792421028185e-05, "loss": 0.6553, "step": 11680 }, { "epoch": 0.21229841638820282, "grad_norm": 0.880524601723996, "learning_rate": 1.8009343132901338e-05, "loss": 0.6585, "step": 11690 }, { "epoch": 0.21248002324567775, "grad_norm": 0.8692780543637455, "learning_rate": 1.8005891184683657e-05, "loss": 0.6604, "step": 11700 }, { "epoch": 0.2126616301031527, "grad_norm": 0.8513800071040694, "learning_rate": 1.8002436577521613e-05, "loss": 0.6558, "step": 11710 }, { "epoch": 0.21284323696062762, "grad_norm": 0.8815200306598779, "learning_rate": 1.7998979312562558e-05, "loss": 0.6515, "step": 11720 }, { "epoch": 0.21302484381810258, "grad_norm": 0.8486005396726578, "learning_rate": 1.799551939095474e-05, "loss": 0.6527, "step": 11730 }, { "epoch": 0.2132064506755775, "grad_norm": 0.8867958968725242, "learning_rate": 1.799205681384727e-05, "loss": 0.6527, "step": 11740 }, { "epoch": 0.21338805753305246, "grad_norm": 0.8764722298353533, "learning_rate": 1.798859158239016e-05, "loss": 0.6559, "step": 11750 }, { "epoch": 0.21356966439052738, "grad_norm": 0.8822387438429383, "learning_rate": 1.798512369773429e-05, "loss": 0.6651, "step": 11760 }, { "epoch": 0.21375127124800233, "grad_norm": 0.8609128442418139, "learning_rate": 1.7981653161031425e-05, "loss": 0.6684, "step": 11770 }, { "epoch": 0.21393287810547726, "grad_norm": 0.8542798966510925, "learning_rate": 1.7978179973434213e-05, "loss": 0.646, "step": 11780 }, { "epoch": 0.2141144849629522, "grad_norm": 0.8510485381292806, "learning_rate": 1.7974704136096185e-05, "loss": 0.6502, "step": 11790 }, { "epoch": 0.21429609182042714, "grad_norm": 0.8710457808264653, "learning_rate": 1.797122565017174e-05, "loss": 0.6489, "step": 11800 }, { "epoch": 0.2144776986779021, "grad_norm": 0.8258004538538546, "learning_rate": 1.796774451681617e-05, "loss": 0.6578, "step": 11810 }, { "epoch": 0.21465930553537702, "grad_norm": 0.8546785885045791, "learning_rate": 1.7964260737185643e-05, "loss": 0.6669, "step": 11820 }, { "epoch": 0.21484091239285194, "grad_norm": 0.8873584245665047, "learning_rate": 1.7960774312437198e-05, "loss": 0.6534, "step": 11830 }, { "epoch": 0.2150225192503269, "grad_norm": 0.8792905638071611, "learning_rate": 1.7957285243728768e-05, "loss": 0.6682, "step": 11840 }, { "epoch": 0.21520412610780182, "grad_norm": 0.8807617112504986, "learning_rate": 1.7953793532219144e-05, "loss": 0.6621, "step": 11850 }, { "epoch": 0.21538573296527677, "grad_norm": 0.8667759103876027, "learning_rate": 1.795029917906801e-05, "loss": 0.6492, "step": 11860 }, { "epoch": 0.2155673398227517, "grad_norm": 0.8807257947255568, "learning_rate": 1.7946802185435923e-05, "loss": 0.661, "step": 11870 }, { "epoch": 0.21574894668022665, "grad_norm": 0.8507807179198267, "learning_rate": 1.7943302552484318e-05, "loss": 0.6553, "step": 11880 }, { "epoch": 0.21593055353770158, "grad_norm": 0.8947830500920535, "learning_rate": 1.7939800281375503e-05, "loss": 0.6631, "step": 11890 }, { "epoch": 0.21611216039517653, "grad_norm": 0.8739000456874879, "learning_rate": 1.793629537327266e-05, "loss": 0.6609, "step": 11900 }, { "epoch": 0.21629376725265145, "grad_norm": 0.89617688664167, "learning_rate": 1.7932787829339862e-05, "loss": 0.6678, "step": 11910 }, { "epoch": 0.2164753741101264, "grad_norm": 0.9282750489707705, "learning_rate": 1.792927765074204e-05, "loss": 0.6583, "step": 11920 }, { "epoch": 0.21665698096760133, "grad_norm": 0.8689279860637149, "learning_rate": 1.7925764838645006e-05, "loss": 0.6603, "step": 11930 }, { "epoch": 0.21683858782507628, "grad_norm": 0.930994130065292, "learning_rate": 1.792224939421545e-05, "loss": 0.6591, "step": 11940 }, { "epoch": 0.2170201946825512, "grad_norm": 0.8933651249891562, "learning_rate": 1.7918731318620933e-05, "loss": 0.6428, "step": 11950 }, { "epoch": 0.21720180154002616, "grad_norm": 0.8369064132065412, "learning_rate": 1.791521061302989e-05, "loss": 0.6601, "step": 11960 }, { "epoch": 0.2173834083975011, "grad_norm": 0.922842564563028, "learning_rate": 1.7911687278611624e-05, "loss": 0.6566, "step": 11970 }, { "epoch": 0.21756501525497604, "grad_norm": 0.8320666698404093, "learning_rate": 1.790816131653633e-05, "loss": 0.6582, "step": 11980 }, { "epoch": 0.21774662211245097, "grad_norm": 0.8917362415480394, "learning_rate": 1.7904632727975052e-05, "loss": 0.6611, "step": 11990 }, { "epoch": 0.2179282289699259, "grad_norm": 0.8597866368608351, "learning_rate": 1.7901101514099725e-05, "loss": 0.6659, "step": 12000 }, { "epoch": 0.21810983582740084, "grad_norm": 0.8956838522493162, "learning_rate": 1.789756767608314e-05, "loss": 0.6755, "step": 12010 }, { "epoch": 0.21829144268487577, "grad_norm": 0.9048439040296938, "learning_rate": 1.7894031215098972e-05, "loss": 0.6533, "step": 12020 }, { "epoch": 0.21847304954235072, "grad_norm": 0.8925599733910916, "learning_rate": 1.7890492132321765e-05, "loss": 0.6572, "step": 12030 }, { "epoch": 0.21865465639982565, "grad_norm": 0.8540928002539943, "learning_rate": 1.7886950428926924e-05, "loss": 0.6525, "step": 12040 }, { "epoch": 0.2188362632573006, "grad_norm": 0.8499942045937747, "learning_rate": 1.788340610609074e-05, "loss": 0.6484, "step": 12050 }, { "epoch": 0.21901787011477553, "grad_norm": 0.8450384783282379, "learning_rate": 1.787985916499036e-05, "loss": 0.6597, "step": 12060 }, { "epoch": 0.21919947697225048, "grad_norm": 0.9467145154395716, "learning_rate": 1.7876309606803807e-05, "loss": 0.6669, "step": 12070 }, { "epoch": 0.2193810838297254, "grad_norm": 0.8869028842950375, "learning_rate": 1.7872757432709975e-05, "loss": 0.65, "step": 12080 }, { "epoch": 0.21956269068720036, "grad_norm": 0.9291920126595652, "learning_rate": 1.786920264388863e-05, "loss": 0.642, "step": 12090 }, { "epoch": 0.21974429754467528, "grad_norm": 0.9063904645003658, "learning_rate": 1.7865645241520386e-05, "loss": 0.6489, "step": 12100 }, { "epoch": 0.21992590440215024, "grad_norm": 0.8774320908912441, "learning_rate": 1.7862085226786748e-05, "loss": 0.6705, "step": 12110 }, { "epoch": 0.22010751125962516, "grad_norm": 0.8329092967830967, "learning_rate": 1.7858522600870088e-05, "loss": 0.6411, "step": 12120 }, { "epoch": 0.2202891181171001, "grad_norm": 0.8658815778796111, "learning_rate": 1.7854957364953625e-05, "loss": 0.6592, "step": 12130 }, { "epoch": 0.22047072497457504, "grad_norm": 0.8683367687536903, "learning_rate": 1.7851389520221463e-05, "loss": 0.6552, "step": 12140 }, { "epoch": 0.22065233183205, "grad_norm": 0.8693473942133426, "learning_rate": 1.7847819067858568e-05, "loss": 0.6672, "step": 12150 }, { "epoch": 0.22083393868952492, "grad_norm": 0.8533218183865935, "learning_rate": 1.7844246009050767e-05, "loss": 0.6624, "step": 12160 }, { "epoch": 0.22101554554699984, "grad_norm": 0.8539553189968166, "learning_rate": 1.7840670344984764e-05, "loss": 0.6797, "step": 12170 }, { "epoch": 0.2211971524044748, "grad_norm": 0.8372913559141172, "learning_rate": 1.783709207684811e-05, "loss": 0.6622, "step": 12180 }, { "epoch": 0.22137875926194972, "grad_norm": 0.9179230748366217, "learning_rate": 1.7833511205829245e-05, "loss": 0.6677, "step": 12190 }, { "epoch": 0.22156036611942467, "grad_norm": 0.8574532172412569, "learning_rate": 1.7829927733117452e-05, "loss": 0.6656, "step": 12200 }, { "epoch": 0.2217419729768996, "grad_norm": 0.8826656924259856, "learning_rate": 1.7826341659902886e-05, "loss": 0.6457, "step": 12210 }, { "epoch": 0.22192357983437455, "grad_norm": 1.1171878030247109, "learning_rate": 1.782275298737657e-05, "loss": 0.6575, "step": 12220 }, { "epoch": 0.22210518669184948, "grad_norm": 0.8943953396703331, "learning_rate": 1.781916171673038e-05, "loss": 0.6615, "step": 12230 }, { "epoch": 0.22228679354932443, "grad_norm": 0.9057807703866023, "learning_rate": 1.7815567849157068e-05, "loss": 0.6649, "step": 12240 }, { "epoch": 0.22246840040679935, "grad_norm": 0.8784824854536468, "learning_rate": 1.7811971385850242e-05, "loss": 0.6543, "step": 12250 }, { "epoch": 0.2226500072642743, "grad_norm": 0.8401617400414855, "learning_rate": 1.7808372328004368e-05, "loss": 0.6619, "step": 12260 }, { "epoch": 0.22283161412174923, "grad_norm": 0.8760315016231095, "learning_rate": 1.780477067681478e-05, "loss": 0.6493, "step": 12270 }, { "epoch": 0.22301322097922419, "grad_norm": 0.9113059005387253, "learning_rate": 1.7801166433477668e-05, "loss": 0.6702, "step": 12280 }, { "epoch": 0.2231948278366991, "grad_norm": 0.8873055399692047, "learning_rate": 1.7797559599190085e-05, "loss": 0.6528, "step": 12290 }, { "epoch": 0.22337643469417406, "grad_norm": 0.8543952519322505, "learning_rate": 1.7793950175149953e-05, "loss": 0.6615, "step": 12300 }, { "epoch": 0.223558041551649, "grad_norm": 0.8863024846207143, "learning_rate": 1.7790338162556036e-05, "loss": 0.6622, "step": 12310 }, { "epoch": 0.22373964840912394, "grad_norm": 0.8553949661381182, "learning_rate": 1.778672356260798e-05, "loss": 0.6452, "step": 12320 }, { "epoch": 0.22392125526659887, "grad_norm": 0.8328375260252014, "learning_rate": 1.778310637650627e-05, "loss": 0.6517, "step": 12330 }, { "epoch": 0.2241028621240738, "grad_norm": 0.8526689760256284, "learning_rate": 1.7779486605452256e-05, "loss": 0.6588, "step": 12340 }, { "epoch": 0.22428446898154875, "grad_norm": 0.806947959421359, "learning_rate": 1.7775864250648157e-05, "loss": 0.6597, "step": 12350 }, { "epoch": 0.22446607583902367, "grad_norm": 0.863083241800036, "learning_rate": 1.777223931329704e-05, "loss": 0.6629, "step": 12360 }, { "epoch": 0.22464768269649862, "grad_norm": 0.8728913057364109, "learning_rate": 1.7768611794602826e-05, "loss": 0.6576, "step": 12370 }, { "epoch": 0.22482928955397355, "grad_norm": 0.8725953089293471, "learning_rate": 1.7764981695770303e-05, "loss": 0.6638, "step": 12380 }, { "epoch": 0.2250108964114485, "grad_norm": 0.8462034594787662, "learning_rate": 1.7761349018005115e-05, "loss": 0.6575, "step": 12390 }, { "epoch": 0.22519250326892343, "grad_norm": 0.8354468918425442, "learning_rate": 1.7757713762513757e-05, "loss": 0.6618, "step": 12400 }, { "epoch": 0.22537411012639838, "grad_norm": 0.890146083412934, "learning_rate": 1.7754075930503584e-05, "loss": 0.6623, "step": 12410 }, { "epoch": 0.2255557169838733, "grad_norm": 0.836723374781763, "learning_rate": 1.77504355231828e-05, "loss": 0.653, "step": 12420 }, { "epoch": 0.22573732384134826, "grad_norm": 0.9127245211286444, "learning_rate": 1.7746792541760476e-05, "loss": 0.6633, "step": 12430 }, { "epoch": 0.22591893069882318, "grad_norm": 0.8928604552886074, "learning_rate": 1.774314698744653e-05, "loss": 0.6662, "step": 12440 }, { "epoch": 0.22610053755629814, "grad_norm": 0.8541945079331733, "learning_rate": 1.773949886145173e-05, "loss": 0.6434, "step": 12450 }, { "epoch": 0.22628214441377306, "grad_norm": 0.8718566240458835, "learning_rate": 1.7735848164987715e-05, "loss": 0.6403, "step": 12460 }, { "epoch": 0.22646375127124801, "grad_norm": 0.8336153483723912, "learning_rate": 1.7732194899266958e-05, "loss": 0.6509, "step": 12470 }, { "epoch": 0.22664535812872294, "grad_norm": 0.8939467125211618, "learning_rate": 1.7728539065502804e-05, "loss": 0.6602, "step": 12480 }, { "epoch": 0.22682696498619787, "grad_norm": 0.8940149415183093, "learning_rate": 1.7724880664909428e-05, "loss": 0.6649, "step": 12490 }, { "epoch": 0.22700857184367282, "grad_norm": 0.8307438499446056, "learning_rate": 1.772121969870188e-05, "loss": 0.666, "step": 12500 }, { "epoch": 0.22719017870114774, "grad_norm": 0.8421016849355495, "learning_rate": 1.7717556168096054e-05, "loss": 0.6583, "step": 12510 }, { "epoch": 0.2273717855586227, "grad_norm": 0.8863686753859604, "learning_rate": 1.771389007430869e-05, "loss": 0.6662, "step": 12520 }, { "epoch": 0.22755339241609762, "grad_norm": 0.8795497388166233, "learning_rate": 1.771022141855738e-05, "loss": 0.6505, "step": 12530 }, { "epoch": 0.22773499927357257, "grad_norm": 0.8579165224912773, "learning_rate": 1.770655020206058e-05, "loss": 0.6528, "step": 12540 }, { "epoch": 0.2279166061310475, "grad_norm": 0.9035659051096383, "learning_rate": 1.770287642603758e-05, "loss": 0.6737, "step": 12550 }, { "epoch": 0.22809821298852245, "grad_norm": 0.8363015474059913, "learning_rate": 1.7699200091708533e-05, "loss": 0.6506, "step": 12560 }, { "epoch": 0.22827981984599738, "grad_norm": 0.8472583019820091, "learning_rate": 1.769552120029443e-05, "loss": 0.6572, "step": 12570 }, { "epoch": 0.22846142670347233, "grad_norm": 0.8404153642970884, "learning_rate": 1.7691839753017124e-05, "loss": 0.6567, "step": 12580 }, { "epoch": 0.22864303356094726, "grad_norm": 0.8784200071593238, "learning_rate": 1.76881557510993e-05, "loss": 0.6527, "step": 12590 }, { "epoch": 0.2288246404184222, "grad_norm": 0.8937311513437806, "learning_rate": 1.768446919576451e-05, "loss": 0.6568, "step": 12600 }, { "epoch": 0.22900624727589713, "grad_norm": 0.8893171628791652, "learning_rate": 1.7680780088237147e-05, "loss": 0.6509, "step": 12610 }, { "epoch": 0.2291878541333721, "grad_norm": 0.8694000063802628, "learning_rate": 1.7677088429742445e-05, "loss": 0.6523, "step": 12620 }, { "epoch": 0.229369460990847, "grad_norm": 0.8415945015708228, "learning_rate": 1.767339422150649e-05, "loss": 0.6516, "step": 12630 }, { "epoch": 0.22955106784832197, "grad_norm": 0.8680204450249983, "learning_rate": 1.766969746475622e-05, "loss": 0.6663, "step": 12640 }, { "epoch": 0.2297326747057969, "grad_norm": 0.8700288864755487, "learning_rate": 1.7665998160719407e-05, "loss": 0.6573, "step": 12650 }, { "epoch": 0.22991428156327182, "grad_norm": 0.8594432468091665, "learning_rate": 1.7662296310624682e-05, "loss": 0.6703, "step": 12660 }, { "epoch": 0.23009588842074677, "grad_norm": 0.8376983385982337, "learning_rate": 1.7658591915701518e-05, "loss": 0.6538, "step": 12670 }, { "epoch": 0.2302774952782217, "grad_norm": 0.8681195969020371, "learning_rate": 1.765488497718023e-05, "loss": 0.6548, "step": 12680 }, { "epoch": 0.23045910213569665, "grad_norm": 0.8998526083955809, "learning_rate": 1.7651175496291975e-05, "loss": 0.6458, "step": 12690 }, { "epoch": 0.23064070899317157, "grad_norm": 0.8328825072849665, "learning_rate": 1.7647463474268766e-05, "loss": 0.6621, "step": 12700 }, { "epoch": 0.23082231585064653, "grad_norm": 0.896397008529695, "learning_rate": 1.764374891234345e-05, "loss": 0.6767, "step": 12710 }, { "epoch": 0.23100392270812145, "grad_norm": 1.1084658744645797, "learning_rate": 1.7640031811749714e-05, "loss": 0.6469, "step": 12720 }, { "epoch": 0.2311855295655964, "grad_norm": 0.8860673849052138, "learning_rate": 1.7636312173722102e-05, "loss": 0.6629, "step": 12730 }, { "epoch": 0.23136713642307133, "grad_norm": 0.8851759356915253, "learning_rate": 1.763258999949599e-05, "loss": 0.6595, "step": 12740 }, { "epoch": 0.23154874328054628, "grad_norm": 0.8855432526297445, "learning_rate": 1.76288652903076e-05, "loss": 0.6662, "step": 12750 }, { "epoch": 0.2317303501380212, "grad_norm": 0.9045367108166775, "learning_rate": 1.7625138047393996e-05, "loss": 0.6612, "step": 12760 }, { "epoch": 0.23191195699549616, "grad_norm": 0.8504405037593553, "learning_rate": 1.762140827199308e-05, "loss": 0.6578, "step": 12770 }, { "epoch": 0.23209356385297109, "grad_norm": 0.8863017948660662, "learning_rate": 1.7617675965343603e-05, "loss": 0.6459, "step": 12780 }, { "epoch": 0.23227517071044604, "grad_norm": 0.9174416322136191, "learning_rate": 1.7613941128685145e-05, "loss": 0.6606, "step": 12790 }, { "epoch": 0.23245677756792096, "grad_norm": 0.8557883222820197, "learning_rate": 1.7610203763258137e-05, "loss": 0.6527, "step": 12800 }, { "epoch": 0.23263838442539592, "grad_norm": 0.8923614298823994, "learning_rate": 1.7606463870303846e-05, "loss": 0.6589, "step": 12810 }, { "epoch": 0.23281999128287084, "grad_norm": 0.882257296567051, "learning_rate": 1.760272145106438e-05, "loss": 0.65, "step": 12820 }, { "epoch": 0.23300159814034577, "grad_norm": 0.8942517423761631, "learning_rate": 1.759897650678268e-05, "loss": 0.6545, "step": 12830 }, { "epoch": 0.23318320499782072, "grad_norm": 0.8534944103549024, "learning_rate": 1.759522903870253e-05, "loss": 0.6594, "step": 12840 }, { "epoch": 0.23336481185529565, "grad_norm": 0.8570404889380213, "learning_rate": 1.759147904806856e-05, "loss": 0.6515, "step": 12850 }, { "epoch": 0.2335464187127706, "grad_norm": 0.8869400241193951, "learning_rate": 1.7587726536126216e-05, "loss": 0.67, "step": 12860 }, { "epoch": 0.23372802557024552, "grad_norm": 0.8365187157273115, "learning_rate": 1.7583971504121806e-05, "loss": 0.6519, "step": 12870 }, { "epoch": 0.23390963242772048, "grad_norm": 0.8886606069257742, "learning_rate": 1.7580213953302464e-05, "loss": 0.6624, "step": 12880 }, { "epoch": 0.2340912392851954, "grad_norm": 0.8627330846616194, "learning_rate": 1.7576453884916155e-05, "loss": 0.654, "step": 12890 }, { "epoch": 0.23427284614267035, "grad_norm": 0.8450544997938403, "learning_rate": 1.757269130021169e-05, "loss": 0.6623, "step": 12900 }, { "epoch": 0.23445445300014528, "grad_norm": 0.8673615168478528, "learning_rate": 1.756892620043871e-05, "loss": 0.6628, "step": 12910 }, { "epoch": 0.23463605985762023, "grad_norm": 0.8499378402280346, "learning_rate": 1.7565158586847696e-05, "loss": 0.6634, "step": 12920 }, { "epoch": 0.23481766671509516, "grad_norm": 0.8234588403792649, "learning_rate": 1.7561388460689956e-05, "loss": 0.6661, "step": 12930 }, { "epoch": 0.2349992735725701, "grad_norm": 0.8687553137816749, "learning_rate": 1.755761582321764e-05, "loss": 0.6691, "step": 12940 }, { "epoch": 0.23518088043004504, "grad_norm": 0.866195048626026, "learning_rate": 1.7553840675683732e-05, "loss": 0.6503, "step": 12950 }, { "epoch": 0.23536248728752, "grad_norm": 0.8677508001309372, "learning_rate": 1.755006301934204e-05, "loss": 0.6644, "step": 12960 }, { "epoch": 0.23554409414499491, "grad_norm": 0.8993453111974257, "learning_rate": 1.754628285544722e-05, "loss": 0.6598, "step": 12970 }, { "epoch": 0.23572570100246984, "grad_norm": 0.8837923932745652, "learning_rate": 1.7542500185254752e-05, "loss": 0.6605, "step": 12980 }, { "epoch": 0.2359073078599448, "grad_norm": 0.8769511558248262, "learning_rate": 1.7538715010020945e-05, "loss": 0.6492, "step": 12990 }, { "epoch": 0.23608891471741972, "grad_norm": 0.845618734116837, "learning_rate": 1.7534927331002947e-05, "loss": 0.6475, "step": 13000 }, { "epoch": 0.23627052157489467, "grad_norm": 0.8396403161447703, "learning_rate": 1.7531137149458737e-05, "loss": 0.6584, "step": 13010 }, { "epoch": 0.2364521284323696, "grad_norm": 0.872724036230542, "learning_rate": 1.752734446664712e-05, "loss": 0.6625, "step": 13020 }, { "epoch": 0.23663373528984455, "grad_norm": 0.8472734431059974, "learning_rate": 1.7523549283827737e-05, "loss": 0.6662, "step": 13030 }, { "epoch": 0.23681534214731947, "grad_norm": 0.8402701407601469, "learning_rate": 1.7519751602261056e-05, "loss": 0.6537, "step": 13040 }, { "epoch": 0.23699694900479443, "grad_norm": 0.8921903755358679, "learning_rate": 1.751595142320838e-05, "loss": 0.6566, "step": 13050 }, { "epoch": 0.23717855586226935, "grad_norm": 0.9624965062728381, "learning_rate": 1.751214874793183e-05, "loss": 0.6542, "step": 13060 }, { "epoch": 0.2373601627197443, "grad_norm": 0.8528876981147386, "learning_rate": 1.750834357769437e-05, "loss": 0.6688, "step": 13070 }, { "epoch": 0.23754176957721923, "grad_norm": 0.8828456650782183, "learning_rate": 1.7504535913759784e-05, "loss": 0.6672, "step": 13080 }, { "epoch": 0.23772337643469418, "grad_norm": 0.8805168657921408, "learning_rate": 1.7500725757392692e-05, "loss": 0.6789, "step": 13090 }, { "epoch": 0.2379049832921691, "grad_norm": 0.8817989271749902, "learning_rate": 1.7496913109858527e-05, "loss": 0.6789, "step": 13100 }, { "epoch": 0.23808659014964406, "grad_norm": 0.8755862146766783, "learning_rate": 1.7493097972423563e-05, "loss": 0.6563, "step": 13110 }, { "epoch": 0.238268197007119, "grad_norm": 0.900801493114139, "learning_rate": 1.74892803463549e-05, "loss": 0.6612, "step": 13120 }, { "epoch": 0.23844980386459394, "grad_norm": 0.8500168541124616, "learning_rate": 1.7485460232920455e-05, "loss": 0.658, "step": 13130 }, { "epoch": 0.23863141072206887, "grad_norm": 0.8297861936008648, "learning_rate": 1.7481637633388985e-05, "loss": 0.6604, "step": 13140 }, { "epoch": 0.2388130175795438, "grad_norm": 0.8562449728966066, "learning_rate": 1.7477812549030053e-05, "loss": 0.6409, "step": 13150 }, { "epoch": 0.23899462443701874, "grad_norm": 0.8394107302689248, "learning_rate": 1.7473984981114074e-05, "loss": 0.6643, "step": 13160 }, { "epoch": 0.23917623129449367, "grad_norm": 0.8926401749141439, "learning_rate": 1.7470154930912264e-05, "loss": 0.6668, "step": 13170 }, { "epoch": 0.23935783815196862, "grad_norm": 0.9002020611673339, "learning_rate": 1.7466322399696673e-05, "loss": 0.6585, "step": 13180 }, { "epoch": 0.23953944500944355, "grad_norm": 0.8750029295702437, "learning_rate": 1.7462487388740176e-05, "loss": 0.6588, "step": 13190 }, { "epoch": 0.2397210518669185, "grad_norm": 0.8518616488057122, "learning_rate": 1.7458649899316473e-05, "loss": 0.6489, "step": 13200 }, { "epoch": 0.23990265872439342, "grad_norm": 0.8569619164232141, "learning_rate": 1.745480993270008e-05, "loss": 0.6564, "step": 13210 }, { "epoch": 0.24008426558186838, "grad_norm": 0.8667789076514618, "learning_rate": 1.745096749016634e-05, "loss": 0.6525, "step": 13220 }, { "epoch": 0.2402658724393433, "grad_norm": 0.8006277731724223, "learning_rate": 1.744712257299142e-05, "loss": 0.6564, "step": 13230 }, { "epoch": 0.24044747929681826, "grad_norm": 0.8756425804755044, "learning_rate": 1.744327518245231e-05, "loss": 0.6454, "step": 13240 }, { "epoch": 0.24062908615429318, "grad_norm": 0.8185785847415867, "learning_rate": 1.7439425319826813e-05, "loss": 0.6701, "step": 13250 }, { "epoch": 0.24081069301176813, "grad_norm": 0.8466603813564164, "learning_rate": 1.7435572986393563e-05, "loss": 0.6653, "step": 13260 }, { "epoch": 0.24099229986924306, "grad_norm": 0.8827812334130388, "learning_rate": 1.7431718183432012e-05, "loss": 0.6623, "step": 13270 }, { "epoch": 0.241173906726718, "grad_norm": 0.8408581997271775, "learning_rate": 1.7427860912222423e-05, "loss": 0.6527, "step": 13280 }, { "epoch": 0.24135551358419294, "grad_norm": 0.874551085743629, "learning_rate": 1.742400117404589e-05, "loss": 0.6431, "step": 13290 }, { "epoch": 0.2415371204416679, "grad_norm": 0.8936873723157428, "learning_rate": 1.7420138970184325e-05, "loss": 0.6519, "step": 13300 }, { "epoch": 0.24171872729914282, "grad_norm": 0.9005878945013681, "learning_rate": 1.741627430192046e-05, "loss": 0.656, "step": 13310 }, { "epoch": 0.24190033415661774, "grad_norm": 0.8672041571477256, "learning_rate": 1.741240717053783e-05, "loss": 0.6592, "step": 13320 }, { "epoch": 0.2420819410140927, "grad_norm": 0.8873010634214735, "learning_rate": 1.7408537577320813e-05, "loss": 0.6641, "step": 13330 }, { "epoch": 0.24226354787156762, "grad_norm": 0.906964193660761, "learning_rate": 1.740466552355458e-05, "loss": 0.6509, "step": 13340 }, { "epoch": 0.24244515472904257, "grad_norm": 0.8367996848829171, "learning_rate": 1.7400791010525143e-05, "loss": 0.6593, "step": 13350 }, { "epoch": 0.2426267615865175, "grad_norm": 0.8564710381134513, "learning_rate": 1.7396914039519306e-05, "loss": 0.6615, "step": 13360 }, { "epoch": 0.24280836844399245, "grad_norm": 0.8909349368750499, "learning_rate": 1.739303461182471e-05, "loss": 0.6596, "step": 13370 }, { "epoch": 0.24298997530146738, "grad_norm": 0.8962107900697165, "learning_rate": 1.738915272872981e-05, "loss": 0.6541, "step": 13380 }, { "epoch": 0.24317158215894233, "grad_norm": 0.8444815297425334, "learning_rate": 1.7385268391523853e-05, "loss": 0.6591, "step": 13390 }, { "epoch": 0.24335318901641725, "grad_norm": 0.8491112279186145, "learning_rate": 1.7381381601496935e-05, "loss": 0.6448, "step": 13400 }, { "epoch": 0.2435347958738922, "grad_norm": 0.8767284180900423, "learning_rate": 1.7377492359939938e-05, "loss": 0.6611, "step": 13410 }, { "epoch": 0.24371640273136713, "grad_norm": 0.868948027830455, "learning_rate": 1.7373600668144582e-05, "loss": 0.6552, "step": 13420 }, { "epoch": 0.24389800958884209, "grad_norm": 0.8755231013402207, "learning_rate": 1.736970652740338e-05, "loss": 0.6512, "step": 13430 }, { "epoch": 0.244079616446317, "grad_norm": 0.8270400558447041, "learning_rate": 1.7365809939009674e-05, "loss": 0.6586, "step": 13440 }, { "epoch": 0.24426122330379196, "grad_norm": 0.8898453893147658, "learning_rate": 1.7361910904257607e-05, "loss": 0.6661, "step": 13450 }, { "epoch": 0.2444428301612669, "grad_norm": 0.8931468199306506, "learning_rate": 1.7358009424442142e-05, "loss": 0.672, "step": 13460 }, { "epoch": 0.24462443701874184, "grad_norm": 0.8810947458757835, "learning_rate": 1.7354105500859053e-05, "loss": 0.6672, "step": 13470 }, { "epoch": 0.24480604387621677, "grad_norm": 1.4473406409283072, "learning_rate": 1.7350199134804927e-05, "loss": 0.6647, "step": 13480 }, { "epoch": 0.2449876507336917, "grad_norm": 0.8331542046165686, "learning_rate": 1.734629032757715e-05, "loss": 0.6548, "step": 13490 }, { "epoch": 0.24516925759116664, "grad_norm": 0.8609215268187145, "learning_rate": 1.7342379080473942e-05, "loss": 0.6702, "step": 13500 }, { "epoch": 0.24535086444864157, "grad_norm": 0.8582189610288022, "learning_rate": 1.733846539479431e-05, "loss": 0.6519, "step": 13510 }, { "epoch": 0.24553247130611652, "grad_norm": 0.8422762842533789, "learning_rate": 1.7334549271838086e-05, "loss": 0.6472, "step": 13520 }, { "epoch": 0.24571407816359145, "grad_norm": 0.8483750048962373, "learning_rate": 1.7330630712905902e-05, "loss": 0.6499, "step": 13530 }, { "epoch": 0.2458956850210664, "grad_norm": 0.8468099540872028, "learning_rate": 1.732670971929921e-05, "loss": 0.6579, "step": 13540 }, { "epoch": 0.24607729187854133, "grad_norm": 0.8785330446338683, "learning_rate": 1.7322786292320257e-05, "loss": 0.6583, "step": 13550 }, { "epoch": 0.24625889873601628, "grad_norm": 0.8716622397146624, "learning_rate": 1.7318860433272106e-05, "loss": 0.6628, "step": 13560 }, { "epoch": 0.2464405055934912, "grad_norm": 0.8166541398291929, "learning_rate": 1.7314932143458633e-05, "loss": 0.673, "step": 13570 }, { "epoch": 0.24662211245096616, "grad_norm": 0.8593043917370432, "learning_rate": 1.7311001424184512e-05, "loss": 0.6553, "step": 13580 }, { "epoch": 0.24680371930844108, "grad_norm": 0.8811060810595607, "learning_rate": 1.7307068276755227e-05, "loss": 0.6656, "step": 13590 }, { "epoch": 0.24698532616591604, "grad_norm": 0.8527835794310227, "learning_rate": 1.7303132702477062e-05, "loss": 0.6496, "step": 13600 }, { "epoch": 0.24716693302339096, "grad_norm": 0.8822752850817733, "learning_rate": 1.7299194702657126e-05, "loss": 0.6527, "step": 13610 }, { "epoch": 0.24734853988086591, "grad_norm": 0.840596354392563, "learning_rate": 1.729525427860331e-05, "loss": 0.6502, "step": 13620 }, { "epoch": 0.24753014673834084, "grad_norm": 0.855082304350793, "learning_rate": 1.729131143162433e-05, "loss": 0.6537, "step": 13630 }, { "epoch": 0.24771175359581576, "grad_norm": 0.8463722497640477, "learning_rate": 1.7287366163029692e-05, "loss": 0.6594, "step": 13640 }, { "epoch": 0.24789336045329072, "grad_norm": 0.8446315324953755, "learning_rate": 1.7283418474129718e-05, "loss": 0.6426, "step": 13650 }, { "epoch": 0.24807496731076564, "grad_norm": 0.8640223231587052, "learning_rate": 1.727946836623552e-05, "loss": 0.6417, "step": 13660 }, { "epoch": 0.2482565741682406, "grad_norm": 0.8208785546530446, "learning_rate": 1.727551584065903e-05, "loss": 0.6687, "step": 13670 }, { "epoch": 0.24843818102571552, "grad_norm": 0.9049238152730514, "learning_rate": 1.7271560898712968e-05, "loss": 0.6546, "step": 13680 }, { "epoch": 0.24861978788319047, "grad_norm": 0.8763732990094294, "learning_rate": 1.7267603541710864e-05, "loss": 0.655, "step": 13690 }, { "epoch": 0.2488013947406654, "grad_norm": 0.8433334317869575, "learning_rate": 1.7263643770967057e-05, "loss": 0.6622, "step": 13700 }, { "epoch": 0.24898300159814035, "grad_norm": 0.8607337039714439, "learning_rate": 1.725968158779667e-05, "loss": 0.6638, "step": 13710 }, { "epoch": 0.24916460845561528, "grad_norm": 0.8593589300642748, "learning_rate": 1.725571699351564e-05, "loss": 0.6473, "step": 13720 }, { "epoch": 0.24934621531309023, "grad_norm": 0.8355197256999987, "learning_rate": 1.7251749989440704e-05, "loss": 0.6424, "step": 13730 }, { "epoch": 0.24952782217056516, "grad_norm": 0.8681390437417169, "learning_rate": 1.7247780576889393e-05, "loss": 0.6547, "step": 13740 }, { "epoch": 0.2497094290280401, "grad_norm": 0.9065075096967222, "learning_rate": 1.7243808757180046e-05, "loss": 0.6616, "step": 13750 }, { "epoch": 0.24989103588551503, "grad_norm": 0.8465658500490992, "learning_rate": 1.7239834531631796e-05, "loss": 0.6616, "step": 13760 }, { "epoch": 0.25007264274298996, "grad_norm": 0.85507768766869, "learning_rate": 1.7235857901564577e-05, "loss": 0.654, "step": 13770 }, { "epoch": 0.25025424960046494, "grad_norm": 0.8085808259246307, "learning_rate": 1.7231878868299122e-05, "loss": 0.6385, "step": 13780 }, { "epoch": 0.25043585645793986, "grad_norm": 0.8323829352676202, "learning_rate": 1.7227897433156962e-05, "loss": 0.6564, "step": 13790 }, { "epoch": 0.2506174633154148, "grad_norm": 0.8853465959650003, "learning_rate": 1.722391359746042e-05, "loss": 0.6576, "step": 13800 }, { "epoch": 0.2507990701728897, "grad_norm": 0.8428244006457294, "learning_rate": 1.7219927362532627e-05, "loss": 0.6417, "step": 13810 }, { "epoch": 0.25098067703036464, "grad_norm": 0.8470868217925704, "learning_rate": 1.7215938729697505e-05, "loss": 0.6598, "step": 13820 }, { "epoch": 0.2511622838878396, "grad_norm": 0.8497796813456825, "learning_rate": 1.7211947700279765e-05, "loss": 0.6531, "step": 13830 }, { "epoch": 0.25134389074531455, "grad_norm": 0.8461009230939083, "learning_rate": 1.720795427560493e-05, "loss": 0.6583, "step": 13840 }, { "epoch": 0.25152549760278947, "grad_norm": 0.854314785041766, "learning_rate": 1.7203958456999305e-05, "loss": 0.6535, "step": 13850 }, { "epoch": 0.2517071044602644, "grad_norm": 0.8489602232639125, "learning_rate": 1.719996024579e-05, "loss": 0.6398, "step": 13860 }, { "epoch": 0.2518887113177394, "grad_norm": 0.8193980100893525, "learning_rate": 1.7195959643304912e-05, "loss": 0.6517, "step": 13870 }, { "epoch": 0.2520703181752143, "grad_norm": 0.8626510692858842, "learning_rate": 1.7191956650872734e-05, "loss": 0.6541, "step": 13880 }, { "epoch": 0.25225192503268923, "grad_norm": 0.8225365079686762, "learning_rate": 1.7187951269822953e-05, "loss": 0.644, "step": 13890 }, { "epoch": 0.25243353189016415, "grad_norm": 0.8592804921510698, "learning_rate": 1.7183943501485854e-05, "loss": 0.6625, "step": 13900 }, { "epoch": 0.25261513874763913, "grad_norm": 0.82737398299438, "learning_rate": 1.717993334719251e-05, "loss": 0.6465, "step": 13910 }, { "epoch": 0.25279674560511406, "grad_norm": 0.8973714300800757, "learning_rate": 1.7175920808274784e-05, "loss": 0.6508, "step": 13920 }, { "epoch": 0.252978352462589, "grad_norm": 0.8589314214592193, "learning_rate": 1.7171905886065338e-05, "loss": 0.6693, "step": 13930 }, { "epoch": 0.2531599593200639, "grad_norm": 0.8231568569419865, "learning_rate": 1.7167888581897617e-05, "loss": 0.6689, "step": 13940 }, { "epoch": 0.2533415661775389, "grad_norm": 0.8816291613749159, "learning_rate": 1.7163868897105865e-05, "loss": 0.653, "step": 13950 }, { "epoch": 0.2535231730350138, "grad_norm": 0.8816430252437165, "learning_rate": 1.7159846833025117e-05, "loss": 0.6459, "step": 13960 }, { "epoch": 0.25370477989248874, "grad_norm": 0.8631512122226602, "learning_rate": 1.7155822390991194e-05, "loss": 0.6443, "step": 13970 }, { "epoch": 0.25388638674996367, "grad_norm": 0.8338704167905968, "learning_rate": 1.71517955723407e-05, "loss": 0.6531, "step": 13980 }, { "epoch": 0.2540679936074386, "grad_norm": 0.8636967696195916, "learning_rate": 1.7147766378411047e-05, "loss": 0.6506, "step": 13990 }, { "epoch": 0.25424960046491357, "grad_norm": 0.9086559982256932, "learning_rate": 1.714373481054042e-05, "loss": 0.6773, "step": 14000 }, { "epoch": 0.2544312073223885, "grad_norm": 0.8658983289533483, "learning_rate": 1.713970087006779e-05, "loss": 0.6475, "step": 14010 }, { "epoch": 0.2546128141798634, "grad_norm": 0.8237281784645388, "learning_rate": 1.7135664558332935e-05, "loss": 0.6367, "step": 14020 }, { "epoch": 0.25479442103733835, "grad_norm": 0.8803835156037374, "learning_rate": 1.7131625876676402e-05, "loss": 0.667, "step": 14030 }, { "epoch": 0.25497602789481333, "grad_norm": 0.8602399456040155, "learning_rate": 1.7127584826439535e-05, "loss": 0.653, "step": 14040 }, { "epoch": 0.25515763475228825, "grad_norm": 0.8523000194507652, "learning_rate": 1.712354140896446e-05, "loss": 0.6504, "step": 14050 }, { "epoch": 0.2553392416097632, "grad_norm": 0.8552867986671868, "learning_rate": 1.711949562559409e-05, "loss": 0.6588, "step": 14060 }, { "epoch": 0.2555208484672381, "grad_norm": 0.831600221273703, "learning_rate": 1.7115447477672126e-05, "loss": 0.6588, "step": 14070 }, { "epoch": 0.2557024553247131, "grad_norm": 0.8504706032545892, "learning_rate": 1.7111396966543054e-05, "loss": 0.6575, "step": 14080 }, { "epoch": 0.255884062182188, "grad_norm": 0.8699776246145262, "learning_rate": 1.710734409355214e-05, "loss": 0.6533, "step": 14090 }, { "epoch": 0.25606566903966294, "grad_norm": 0.8286462326812581, "learning_rate": 1.7103288860045447e-05, "loss": 0.6572, "step": 14100 }, { "epoch": 0.25624727589713786, "grad_norm": 0.8654371220130472, "learning_rate": 1.70992312673698e-05, "loss": 0.6617, "step": 14110 }, { "epoch": 0.25642888275461284, "grad_norm": 0.8485787311296759, "learning_rate": 1.7095171316872833e-05, "loss": 0.6635, "step": 14120 }, { "epoch": 0.25661048961208777, "grad_norm": 0.8428321898763546, "learning_rate": 1.7091109009902948e-05, "loss": 0.662, "step": 14130 }, { "epoch": 0.2567920964695627, "grad_norm": 0.8494995276646179, "learning_rate": 1.7087044347809324e-05, "loss": 0.6582, "step": 14140 }, { "epoch": 0.2569737033270376, "grad_norm": 0.8441372604240243, "learning_rate": 1.708297733194194e-05, "loss": 0.66, "step": 14150 }, { "epoch": 0.25715531018451254, "grad_norm": 0.8398870674497402, "learning_rate": 1.7078907963651545e-05, "loss": 0.6513, "step": 14160 }, { "epoch": 0.2573369170419875, "grad_norm": 0.9130567398455934, "learning_rate": 1.707483624428967e-05, "loss": 0.6534, "step": 14170 }, { "epoch": 0.25751852389946245, "grad_norm": 0.8626289428779459, "learning_rate": 1.7070762175208625e-05, "loss": 0.6568, "step": 14180 }, { "epoch": 0.2577001307569374, "grad_norm": 0.8353734891281288, "learning_rate": 1.7066685757761515e-05, "loss": 0.6548, "step": 14190 }, { "epoch": 0.2578817376144123, "grad_norm": 0.8423755818634702, "learning_rate": 1.7062606993302206e-05, "loss": 0.647, "step": 14200 }, { "epoch": 0.2580633444718873, "grad_norm": 0.8433376124749119, "learning_rate": 1.705852588318535e-05, "loss": 0.6604, "step": 14210 }, { "epoch": 0.2582449513293622, "grad_norm": 0.8079501622791349, "learning_rate": 1.705444242876639e-05, "loss": 0.65, "step": 14220 }, { "epoch": 0.25842655818683713, "grad_norm": 0.8367021747606666, "learning_rate": 1.7050356631401522e-05, "loss": 0.6574, "step": 14230 }, { "epoch": 0.25860816504431205, "grad_norm": 0.8463151802810073, "learning_rate": 1.7046268492447743e-05, "loss": 0.6496, "step": 14240 }, { "epoch": 0.25878977190178704, "grad_norm": 0.8235196109584998, "learning_rate": 1.7042178013262822e-05, "loss": 0.6438, "step": 14250 }, { "epoch": 0.25897137875926196, "grad_norm": 0.8479952550290328, "learning_rate": 1.70380851952053e-05, "loss": 0.6466, "step": 14260 }, { "epoch": 0.2591529856167369, "grad_norm": 0.8485264668010234, "learning_rate": 1.7033990039634497e-05, "loss": 0.6394, "step": 14270 }, { "epoch": 0.2593345924742118, "grad_norm": 0.8857533430398015, "learning_rate": 1.7029892547910515e-05, "loss": 0.6562, "step": 14280 }, { "epoch": 0.2595161993316868, "grad_norm": 0.8660034252557988, "learning_rate": 1.7025792721394224e-05, "loss": 0.6537, "step": 14290 }, { "epoch": 0.2596978061891617, "grad_norm": 0.8477029826506386, "learning_rate": 1.7021690561447274e-05, "loss": 0.6475, "step": 14300 }, { "epoch": 0.25987941304663664, "grad_norm": 0.8606477400019616, "learning_rate": 1.7017586069432083e-05, "loss": 0.6531, "step": 14310 }, { "epoch": 0.26006101990411157, "grad_norm": 0.8714173518332767, "learning_rate": 1.7013479246711853e-05, "loss": 0.6521, "step": 14320 }, { "epoch": 0.2602426267615865, "grad_norm": 0.87158383348827, "learning_rate": 1.7009370094650556e-05, "loss": 0.6526, "step": 14330 }, { "epoch": 0.2604242336190615, "grad_norm": 0.8532540155724461, "learning_rate": 1.7005258614612944e-05, "loss": 0.6525, "step": 14340 }, { "epoch": 0.2606058404765364, "grad_norm": 0.8356680640859531, "learning_rate": 1.7001144807964528e-05, "loss": 0.6551, "step": 14350 }, { "epoch": 0.2607874473340113, "grad_norm": 0.8736948913164077, "learning_rate": 1.69970286760716e-05, "loss": 0.6467, "step": 14360 }, { "epoch": 0.26096905419148625, "grad_norm": 0.8709832463853046, "learning_rate": 1.6992910220301227e-05, "loss": 0.6552, "step": 14370 }, { "epoch": 0.26115066104896123, "grad_norm": 0.8725260963848052, "learning_rate": 1.6988789442021242e-05, "loss": 0.6487, "step": 14380 }, { "epoch": 0.26133226790643616, "grad_norm": 0.8362207967950238, "learning_rate": 1.6984666342600257e-05, "loss": 0.6496, "step": 14390 }, { "epoch": 0.2615138747639111, "grad_norm": 0.8064734622172196, "learning_rate": 1.698054092340765e-05, "loss": 0.6609, "step": 14400 }, { "epoch": 0.261695481621386, "grad_norm": 0.8211367979507089, "learning_rate": 1.6976413185813565e-05, "loss": 0.6465, "step": 14410 }, { "epoch": 0.261877088478861, "grad_norm": 0.8452430784173474, "learning_rate": 1.697228313118892e-05, "loss": 0.6555, "step": 14420 }, { "epoch": 0.2620586953363359, "grad_norm": 0.8905710836722227, "learning_rate": 1.6968150760905405e-05, "loss": 0.6612, "step": 14430 }, { "epoch": 0.26224030219381084, "grad_norm": 0.860671657145462, "learning_rate": 1.6964016076335483e-05, "loss": 0.6502, "step": 14440 }, { "epoch": 0.26242190905128576, "grad_norm": 0.8675898033765473, "learning_rate": 1.695987907885237e-05, "loss": 0.657, "step": 14450 }, { "epoch": 0.2626035159087607, "grad_norm": 0.8546481448686049, "learning_rate": 1.6955739769830063e-05, "loss": 0.6603, "step": 14460 }, { "epoch": 0.26278512276623567, "grad_norm": 0.8201008681380181, "learning_rate": 1.695159815064333e-05, "loss": 0.6499, "step": 14470 }, { "epoch": 0.2629667296237106, "grad_norm": 0.8696136130861339, "learning_rate": 1.694745422266769e-05, "loss": 0.6509, "step": 14480 }, { "epoch": 0.2631483364811855, "grad_norm": 0.8725277357832835, "learning_rate": 1.6943307987279445e-05, "loss": 0.6599, "step": 14490 }, { "epoch": 0.26332994333866044, "grad_norm": 0.8298132906262071, "learning_rate": 1.693915944585566e-05, "loss": 0.6479, "step": 14500 }, { "epoch": 0.2635115501961354, "grad_norm": 0.8440585153372373, "learning_rate": 1.6935008599774155e-05, "loss": 0.6629, "step": 14510 }, { "epoch": 0.26369315705361035, "grad_norm": 0.8347130157292314, "learning_rate": 1.6930855450413525e-05, "loss": 0.656, "step": 14520 }, { "epoch": 0.2638747639110853, "grad_norm": 0.8413030937987998, "learning_rate": 1.6926699999153135e-05, "loss": 0.6411, "step": 14530 }, { "epoch": 0.2640563707685602, "grad_norm": 0.8394279861780969, "learning_rate": 1.69225422473731e-05, "loss": 0.6531, "step": 14540 }, { "epoch": 0.2642379776260352, "grad_norm": 0.8284333024449849, "learning_rate": 1.691838219645431e-05, "loss": 0.6386, "step": 14550 }, { "epoch": 0.2644195844835101, "grad_norm": 0.8370332097074018, "learning_rate": 1.6914219847778418e-05, "loss": 0.6502, "step": 14560 }, { "epoch": 0.26460119134098503, "grad_norm": 0.8494467535263572, "learning_rate": 1.691005520272784e-05, "loss": 0.6623, "step": 14570 }, { "epoch": 0.26478279819845996, "grad_norm": 0.843212218086334, "learning_rate": 1.690588826268574e-05, "loss": 0.6591, "step": 14580 }, { "epoch": 0.26496440505593494, "grad_norm": 0.8817468291661902, "learning_rate": 1.690171902903607e-05, "loss": 0.6441, "step": 14590 }, { "epoch": 0.26514601191340986, "grad_norm": 0.8934245692888808, "learning_rate": 1.6897547503163524e-05, "loss": 0.6617, "step": 14600 }, { "epoch": 0.2653276187708848, "grad_norm": 0.8144696372582535, "learning_rate": 1.6893373686453562e-05, "loss": 0.6557, "step": 14610 }, { "epoch": 0.2655092256283597, "grad_norm": 0.8778243417451097, "learning_rate": 1.6889197580292412e-05, "loss": 0.6556, "step": 14620 }, { "epoch": 0.26569083248583464, "grad_norm": 0.8779815466281378, "learning_rate": 1.688501918606705e-05, "loss": 0.6442, "step": 14630 }, { "epoch": 0.2658724393433096, "grad_norm": 0.8288250354747666, "learning_rate": 1.6880838505165225e-05, "loss": 0.6599, "step": 14640 }, { "epoch": 0.26605404620078454, "grad_norm": 0.8345486891216323, "learning_rate": 1.687665553897544e-05, "loss": 0.6549, "step": 14650 }, { "epoch": 0.26623565305825947, "grad_norm": 0.8653987316900146, "learning_rate": 1.687247028888695e-05, "loss": 0.6443, "step": 14660 }, { "epoch": 0.2664172599157344, "grad_norm": 0.8580390751048288, "learning_rate": 1.686828275628978e-05, "loss": 0.6598, "step": 14670 }, { "epoch": 0.2665988667732094, "grad_norm": 0.8241044138382287, "learning_rate": 1.68640929425747e-05, "loss": 0.6403, "step": 14680 }, { "epoch": 0.2667804736306843, "grad_norm": 0.8158537645573267, "learning_rate": 1.6859900849133258e-05, "loss": 0.6598, "step": 14690 }, { "epoch": 0.2669620804881592, "grad_norm": 0.8099907523560598, "learning_rate": 1.685570647735774e-05, "loss": 0.6462, "step": 14700 }, { "epoch": 0.26714368734563415, "grad_norm": 0.8179549097893836, "learning_rate": 1.6851509828641193e-05, "loss": 0.6494, "step": 14710 }, { "epoch": 0.26732529420310913, "grad_norm": 0.8021970198528586, "learning_rate": 1.6847310904377425e-05, "loss": 0.6551, "step": 14720 }, { "epoch": 0.26750690106058406, "grad_norm": 0.8466136396499492, "learning_rate": 1.6843109705960995e-05, "loss": 0.6508, "step": 14730 }, { "epoch": 0.267688507918059, "grad_norm": 0.8805149604943805, "learning_rate": 1.6838906234787225e-05, "loss": 0.6574, "step": 14740 }, { "epoch": 0.2678701147755339, "grad_norm": 0.8115756035105846, "learning_rate": 1.683470049225218e-05, "loss": 0.6525, "step": 14750 }, { "epoch": 0.2680517216330089, "grad_norm": 0.86677489987241, "learning_rate": 1.683049247975269e-05, "loss": 0.6642, "step": 14760 }, { "epoch": 0.2682333284904838, "grad_norm": 0.8194989214218901, "learning_rate": 1.6826282198686336e-05, "loss": 0.6414, "step": 14770 }, { "epoch": 0.26841493534795874, "grad_norm": 0.8579288615384643, "learning_rate": 1.6822069650451448e-05, "loss": 0.6644, "step": 14780 }, { "epoch": 0.26859654220543366, "grad_norm": 0.8739703315229067, "learning_rate": 1.681785483644711e-05, "loss": 0.652, "step": 14790 }, { "epoch": 0.2687781490629086, "grad_norm": 0.8602709829088709, "learning_rate": 1.6813637758073165e-05, "loss": 0.6473, "step": 14800 }, { "epoch": 0.26895975592038357, "grad_norm": 0.839284604747024, "learning_rate": 1.6809418416730206e-05, "loss": 0.6634, "step": 14810 }, { "epoch": 0.2691413627778585, "grad_norm": 0.8495105834821204, "learning_rate": 1.6805196813819568e-05, "loss": 0.6568, "step": 14820 }, { "epoch": 0.2693229696353334, "grad_norm": 0.8434238726390673, "learning_rate": 1.6800972950743347e-05, "loss": 0.6426, "step": 14830 }, { "epoch": 0.26950457649280835, "grad_norm": 0.9144855171644316, "learning_rate": 1.679674682890439e-05, "loss": 0.6549, "step": 14840 }, { "epoch": 0.2696861833502833, "grad_norm": 0.8369982053638885, "learning_rate": 1.679251844970629e-05, "loss": 0.6522, "step": 14850 }, { "epoch": 0.26986779020775825, "grad_norm": 0.8095731173895273, "learning_rate": 1.678828781455339e-05, "loss": 0.6363, "step": 14860 }, { "epoch": 0.2700493970652332, "grad_norm": 0.8967432845576416, "learning_rate": 1.678405492485078e-05, "loss": 0.6541, "step": 14870 }, { "epoch": 0.2702310039227081, "grad_norm": 0.8481696293125762, "learning_rate": 1.677981978200431e-05, "loss": 0.6476, "step": 14880 }, { "epoch": 0.2704126107801831, "grad_norm": 0.858084570771263, "learning_rate": 1.6775582387420564e-05, "loss": 0.6345, "step": 14890 }, { "epoch": 0.270594217637658, "grad_norm": 0.8518749958582705, "learning_rate": 1.677134274250688e-05, "loss": 0.6472, "step": 14900 }, { "epoch": 0.27077582449513293, "grad_norm": 0.8611330749856008, "learning_rate": 1.676710084867135e-05, "loss": 0.6571, "step": 14910 }, { "epoch": 0.27095743135260786, "grad_norm": 0.8521574435292052, "learning_rate": 1.6762856707322802e-05, "loss": 0.6386, "step": 14920 }, { "epoch": 0.27113903821008284, "grad_norm": 0.839109915609967, "learning_rate": 1.675861031987081e-05, "loss": 0.6411, "step": 14930 }, { "epoch": 0.27132064506755776, "grad_norm": 0.8416360359363627, "learning_rate": 1.675436168772571e-05, "loss": 0.657, "step": 14940 }, { "epoch": 0.2715022519250327, "grad_norm": 0.82365410887756, "learning_rate": 1.6750110812298564e-05, "loss": 0.6571, "step": 14950 }, { "epoch": 0.2716838587825076, "grad_norm": 0.8465228124660457, "learning_rate": 1.674585769500119e-05, "loss": 0.641, "step": 14960 }, { "epoch": 0.27186546563998254, "grad_norm": 0.8420063967057052, "learning_rate": 1.6741602337246145e-05, "loss": 0.6544, "step": 14970 }, { "epoch": 0.2720470724974575, "grad_norm": 0.8619662553525235, "learning_rate": 1.673734474044674e-05, "loss": 0.6385, "step": 14980 }, { "epoch": 0.27222867935493245, "grad_norm": 0.8635315150074274, "learning_rate": 1.673308490601702e-05, "loss": 0.6541, "step": 14990 }, { "epoch": 0.27241028621240737, "grad_norm": 0.8520837840303535, "learning_rate": 1.6728822835371772e-05, "loss": 0.6537, "step": 15000 }, { "epoch": 0.2725918930698823, "grad_norm": 0.8218857423200026, "learning_rate": 1.6724558529926537e-05, "loss": 0.6553, "step": 15010 }, { "epoch": 0.2727734999273573, "grad_norm": 0.8462695649848196, "learning_rate": 1.6720291991097583e-05, "loss": 0.6432, "step": 15020 }, { "epoch": 0.2729551067848322, "grad_norm": 0.8438358517353146, "learning_rate": 1.671602322030193e-05, "loss": 0.6611, "step": 15030 }, { "epoch": 0.2731367136423071, "grad_norm": 0.8075391492114471, "learning_rate": 1.6711752218957343e-05, "loss": 0.6423, "step": 15040 }, { "epoch": 0.27331832049978205, "grad_norm": 0.8451991561568928, "learning_rate": 1.6707478988482312e-05, "loss": 0.644, "step": 15050 }, { "epoch": 0.27349992735725703, "grad_norm": 0.9981644515595834, "learning_rate": 1.6703203530296087e-05, "loss": 0.6429, "step": 15060 }, { "epoch": 0.27368153421473196, "grad_norm": 0.8790364469576698, "learning_rate": 1.669892584581864e-05, "loss": 0.6563, "step": 15070 }, { "epoch": 0.2738631410722069, "grad_norm": 0.849034876320935, "learning_rate": 1.6694645936470696e-05, "loss": 0.6454, "step": 15080 }, { "epoch": 0.2740447479296818, "grad_norm": 0.8463566749974659, "learning_rate": 1.6690363803673708e-05, "loss": 0.6538, "step": 15090 }, { "epoch": 0.2742263547871568, "grad_norm": 0.8315764645034416, "learning_rate": 1.668607944884988e-05, "loss": 0.6541, "step": 15100 }, { "epoch": 0.2744079616446317, "grad_norm": 0.8398288640477336, "learning_rate": 1.6681792873422142e-05, "loss": 0.6404, "step": 15110 }, { "epoch": 0.27458956850210664, "grad_norm": 0.8769586113318957, "learning_rate": 1.667750407881416e-05, "loss": 0.6535, "step": 15120 }, { "epoch": 0.27477117535958157, "grad_norm": 0.8689904818754133, "learning_rate": 1.667321306645036e-05, "loss": 0.6629, "step": 15130 }, { "epoch": 0.2749527822170565, "grad_norm": 0.850261509173691, "learning_rate": 1.6668919837755874e-05, "loss": 0.6572, "step": 15140 }, { "epoch": 0.27513438907453147, "grad_norm": 0.8215304303432788, "learning_rate": 1.6664624394156586e-05, "loss": 0.6454, "step": 15150 }, { "epoch": 0.2753159959320064, "grad_norm": 0.8672813658429249, "learning_rate": 1.6660326737079117e-05, "loss": 0.6587, "step": 15160 }, { "epoch": 0.2754976027894813, "grad_norm": 0.8278190962456429, "learning_rate": 1.665602686795082e-05, "loss": 0.6604, "step": 15170 }, { "epoch": 0.27567920964695625, "grad_norm": 0.8209763888045413, "learning_rate": 1.665172478819978e-05, "loss": 0.6497, "step": 15180 }, { "epoch": 0.2758608165044312, "grad_norm": 0.9680430108484973, "learning_rate": 1.664742049925482e-05, "loss": 0.6562, "step": 15190 }, { "epoch": 0.27604242336190615, "grad_norm": 0.8372808062856856, "learning_rate": 1.6643114002545498e-05, "loss": 0.6467, "step": 15200 }, { "epoch": 0.2762240302193811, "grad_norm": 0.8579039390752083, "learning_rate": 1.66388052995021e-05, "loss": 0.6691, "step": 15210 }, { "epoch": 0.276405637076856, "grad_norm": 0.8357847776693095, "learning_rate": 1.6634494391555642e-05, "loss": 0.6401, "step": 15220 }, { "epoch": 0.276587243934331, "grad_norm": 0.8222150749046389, "learning_rate": 1.6630181280137885e-05, "loss": 0.6415, "step": 15230 }, { "epoch": 0.2767688507918059, "grad_norm": 0.9040270691493882, "learning_rate": 1.6625865966681315e-05, "loss": 0.6535, "step": 15240 }, { "epoch": 0.27695045764928083, "grad_norm": 0.8459140367301146, "learning_rate": 1.662154845261914e-05, "loss": 0.6401, "step": 15250 }, { "epoch": 0.27713206450675576, "grad_norm": 0.8123449597415736, "learning_rate": 1.661722873938532e-05, "loss": 0.6491, "step": 15260 }, { "epoch": 0.27731367136423074, "grad_norm": 0.8194569893797579, "learning_rate": 1.6612906828414524e-05, "loss": 0.6485, "step": 15270 }, { "epoch": 0.27749527822170567, "grad_norm": 0.8239125857492373, "learning_rate": 1.6608582721142167e-05, "loss": 0.6482, "step": 15280 }, { "epoch": 0.2776768850791806, "grad_norm": 0.8903026553728774, "learning_rate": 1.660425641900438e-05, "loss": 0.6443, "step": 15290 }, { "epoch": 0.2778584919366555, "grad_norm": 0.8632835889709094, "learning_rate": 1.659992792343803e-05, "loss": 0.652, "step": 15300 }, { "epoch": 0.27804009879413044, "grad_norm": 0.829106460151174, "learning_rate": 1.659559723588072e-05, "loss": 0.6478, "step": 15310 }, { "epoch": 0.2782217056516054, "grad_norm": 0.8265063054635335, "learning_rate": 1.6591264357770765e-05, "loss": 0.6491, "step": 15320 }, { "epoch": 0.27840331250908035, "grad_norm": 0.8384192597773116, "learning_rate": 1.6586929290547217e-05, "loss": 0.6547, "step": 15330 }, { "epoch": 0.2785849193665553, "grad_norm": 0.8079642747194757, "learning_rate": 1.6582592035649852e-05, "loss": 0.6468, "step": 15340 }, { "epoch": 0.2787665262240302, "grad_norm": 0.8498749453443043, "learning_rate": 1.657825259451918e-05, "loss": 0.6477, "step": 15350 }, { "epoch": 0.2789481330815052, "grad_norm": 0.8467120067453066, "learning_rate": 1.6573910968596424e-05, "loss": 0.652, "step": 15360 }, { "epoch": 0.2791297399389801, "grad_norm": 0.8523092559574207, "learning_rate": 1.6569567159323548e-05, "loss": 0.6409, "step": 15370 }, { "epoch": 0.27931134679645503, "grad_norm": 0.8288253383183156, "learning_rate": 1.6565221168143226e-05, "loss": 0.6488, "step": 15380 }, { "epoch": 0.27949295365392995, "grad_norm": 0.859136806776112, "learning_rate": 1.6560872996498866e-05, "loss": 0.6561, "step": 15390 }, { "epoch": 0.27967456051140493, "grad_norm": 0.8877386515076942, "learning_rate": 1.65565226458346e-05, "loss": 0.6533, "step": 15400 }, { "epoch": 0.27985616736887986, "grad_norm": 0.8484048390872314, "learning_rate": 1.655217011759528e-05, "loss": 0.6455, "step": 15410 }, { "epoch": 0.2800377742263548, "grad_norm": 0.8415953908178256, "learning_rate": 1.654781541322648e-05, "loss": 0.648, "step": 15420 }, { "epoch": 0.2802193810838297, "grad_norm": 0.8323255517552371, "learning_rate": 1.6543458534174505e-05, "loss": 0.6504, "step": 15430 }, { "epoch": 0.2804009879413047, "grad_norm": 0.8314049012467154, "learning_rate": 1.653909948188637e-05, "loss": 0.6469, "step": 15440 }, { "epoch": 0.2805825947987796, "grad_norm": 0.846630974239697, "learning_rate": 1.6534738257809822e-05, "loss": 0.6605, "step": 15450 }, { "epoch": 0.28076420165625454, "grad_norm": 0.8570537496396345, "learning_rate": 1.6530374863393327e-05, "loss": 0.646, "step": 15460 }, { "epoch": 0.28094580851372947, "grad_norm": 0.8288223374430593, "learning_rate": 1.6526009300086072e-05, "loss": 0.6435, "step": 15470 }, { "epoch": 0.2811274153712044, "grad_norm": 0.849272179817979, "learning_rate": 1.652164156933796e-05, "loss": 0.6586, "step": 15480 }, { "epoch": 0.2813090222286794, "grad_norm": 0.8841671192977071, "learning_rate": 1.6517271672599616e-05, "loss": 0.657, "step": 15490 }, { "epoch": 0.2814906290861543, "grad_norm": 0.8324931925006019, "learning_rate": 1.651289961132239e-05, "loss": 0.657, "step": 15500 }, { "epoch": 0.2816722359436292, "grad_norm": 0.8561978197915442, "learning_rate": 1.6508525386958347e-05, "loss": 0.6507, "step": 15510 }, { "epoch": 0.28185384280110415, "grad_norm": 0.8207959939347177, "learning_rate": 1.650414900096026e-05, "loss": 0.6357, "step": 15520 }, { "epoch": 0.28203544965857913, "grad_norm": 0.828075419404295, "learning_rate": 1.6499770454781634e-05, "loss": 0.6533, "step": 15530 }, { "epoch": 0.28221705651605405, "grad_norm": 0.8324204760263442, "learning_rate": 1.649538974987669e-05, "loss": 0.6438, "step": 15540 }, { "epoch": 0.282398663373529, "grad_norm": 0.8845981328510704, "learning_rate": 1.6491006887700363e-05, "loss": 0.6517, "step": 15550 }, { "epoch": 0.2825802702310039, "grad_norm": 0.8299027249754555, "learning_rate": 1.64866218697083e-05, "loss": 0.6501, "step": 15560 }, { "epoch": 0.2827618770884789, "grad_norm": 0.8068698211618668, "learning_rate": 1.6482234697356875e-05, "loss": 0.651, "step": 15570 }, { "epoch": 0.2829434839459538, "grad_norm": 0.8307344984926534, "learning_rate": 1.6477845372103163e-05, "loss": 0.6466, "step": 15580 }, { "epoch": 0.28312509080342874, "grad_norm": 0.8232801026975087, "learning_rate": 1.6473453895404968e-05, "loss": 0.6533, "step": 15590 }, { "epoch": 0.28330669766090366, "grad_norm": 0.806784397556911, "learning_rate": 1.6469060268720798e-05, "loss": 0.6519, "step": 15600 }, { "epoch": 0.2834883045183786, "grad_norm": 0.8400690451661984, "learning_rate": 1.646466449350989e-05, "loss": 0.6602, "step": 15610 }, { "epoch": 0.28366991137585357, "grad_norm": 0.8626520891496278, "learning_rate": 1.6460266571232165e-05, "loss": 0.6478, "step": 15620 }, { "epoch": 0.2838515182333285, "grad_norm": 0.9176007535728321, "learning_rate": 1.6455866503348292e-05, "loss": 0.645, "step": 15630 }, { "epoch": 0.2840331250908034, "grad_norm": 0.8352574754334003, "learning_rate": 1.6451464291319633e-05, "loss": 0.6593, "step": 15640 }, { "epoch": 0.28421473194827834, "grad_norm": 0.8218557250923734, "learning_rate": 1.6447059936608262e-05, "loss": 0.6407, "step": 15650 }, { "epoch": 0.2843963388057533, "grad_norm": 0.8406172547489655, "learning_rate": 1.644265344067697e-05, "loss": 0.6415, "step": 15660 }, { "epoch": 0.28457794566322825, "grad_norm": 0.8370378482846118, "learning_rate": 1.6438244804989266e-05, "loss": 0.6356, "step": 15670 }, { "epoch": 0.2847595525207032, "grad_norm": 0.8646865966377305, "learning_rate": 1.643383403100935e-05, "loss": 0.646, "step": 15680 }, { "epoch": 0.2849411593781781, "grad_norm": 0.8245764385686716, "learning_rate": 1.642942112020215e-05, "loss": 0.6615, "step": 15690 }, { "epoch": 0.2851227662356531, "grad_norm": 0.8631800660938068, "learning_rate": 1.642500607403329e-05, "loss": 0.644, "step": 15700 }, { "epoch": 0.285304373093128, "grad_norm": 0.8246853235415568, "learning_rate": 1.6420588893969124e-05, "loss": 0.6607, "step": 15710 }, { "epoch": 0.28548597995060293, "grad_norm": 0.846918483311391, "learning_rate": 1.6416169581476692e-05, "loss": 0.6632, "step": 15720 }, { "epoch": 0.28566758680807786, "grad_norm": 0.8820474039390199, "learning_rate": 1.641174813802375e-05, "loss": 0.6452, "step": 15730 }, { "epoch": 0.28584919366555284, "grad_norm": 0.844549350375768, "learning_rate": 1.6407324565078763e-05, "loss": 0.6736, "step": 15740 }, { "epoch": 0.28603080052302776, "grad_norm": 0.8193634770633605, "learning_rate": 1.640289886411091e-05, "loss": 0.646, "step": 15750 }, { "epoch": 0.2862124073805027, "grad_norm": 0.82378751770077, "learning_rate": 1.6398471036590064e-05, "loss": 0.6486, "step": 15760 }, { "epoch": 0.2863940142379776, "grad_norm": 0.8904941828559907, "learning_rate": 1.6394041083986815e-05, "loss": 0.6401, "step": 15770 }, { "epoch": 0.28657562109545254, "grad_norm": 0.8368493590363815, "learning_rate": 1.638960900777245e-05, "loss": 0.6409, "step": 15780 }, { "epoch": 0.2867572279529275, "grad_norm": 0.8273145604508875, "learning_rate": 1.6385174809418964e-05, "loss": 0.646, "step": 15790 }, { "epoch": 0.28693883481040244, "grad_norm": 0.8527233958868922, "learning_rate": 1.6380738490399067e-05, "loss": 0.6507, "step": 15800 }, { "epoch": 0.28712044166787737, "grad_norm": 0.8643563387052745, "learning_rate": 1.6376300052186162e-05, "loss": 0.6452, "step": 15810 }, { "epoch": 0.2873020485253523, "grad_norm": 0.8621857780198096, "learning_rate": 1.637185949625435e-05, "loss": 0.6558, "step": 15820 }, { "epoch": 0.2874836553828273, "grad_norm": 0.8609101311842222, "learning_rate": 1.636741682407845e-05, "loss": 0.653, "step": 15830 }, { "epoch": 0.2876652622403022, "grad_norm": 0.8427742764873699, "learning_rate": 1.636297203713398e-05, "loss": 0.6585, "step": 15840 }, { "epoch": 0.2878468690977771, "grad_norm": 0.8470768280803237, "learning_rate": 1.635852513689715e-05, "loss": 0.6573, "step": 15850 }, { "epoch": 0.28802847595525205, "grad_norm": 0.8479229942612249, "learning_rate": 1.635407612484489e-05, "loss": 0.6499, "step": 15860 }, { "epoch": 0.28821008281272703, "grad_norm": 0.8551854896335656, "learning_rate": 1.634962500245482e-05, "loss": 0.6404, "step": 15870 }, { "epoch": 0.28839168967020196, "grad_norm": 0.810928752484743, "learning_rate": 1.634517177120525e-05, "loss": 0.6406, "step": 15880 }, { "epoch": 0.2885732965276769, "grad_norm": 0.8256978864560677, "learning_rate": 1.634071643257522e-05, "loss": 0.666, "step": 15890 }, { "epoch": 0.2887549033851518, "grad_norm": 0.8540673662193193, "learning_rate": 1.633625898804444e-05, "loss": 0.6475, "step": 15900 }, { "epoch": 0.2889365102426268, "grad_norm": 0.8046763050800051, "learning_rate": 1.633179943909334e-05, "loss": 0.6441, "step": 15910 }, { "epoch": 0.2891181171001017, "grad_norm": 0.840993818071118, "learning_rate": 1.6327337787203036e-05, "loss": 0.6559, "step": 15920 }, { "epoch": 0.28929972395757664, "grad_norm": 0.8109556605459114, "learning_rate": 1.6322874033855346e-05, "loss": 0.6625, "step": 15930 }, { "epoch": 0.28948133081505156, "grad_norm": 0.8574573941681891, "learning_rate": 1.631840818053279e-05, "loss": 0.6587, "step": 15940 }, { "epoch": 0.2896629376725265, "grad_norm": 0.8182896503357692, "learning_rate": 1.631394022871858e-05, "loss": 0.6572, "step": 15950 }, { "epoch": 0.28984454453000147, "grad_norm": 0.8446471066265024, "learning_rate": 1.630947017989663e-05, "loss": 0.6495, "step": 15960 }, { "epoch": 0.2900261513874764, "grad_norm": 0.8410327190565536, "learning_rate": 1.6304998035551553e-05, "loss": 0.6553, "step": 15970 }, { "epoch": 0.2902077582449513, "grad_norm": 0.8801694838458978, "learning_rate": 1.6300523797168643e-05, "loss": 0.6554, "step": 15980 }, { "epoch": 0.29038936510242624, "grad_norm": 0.8776745033899852, "learning_rate": 1.629604746623391e-05, "loss": 0.6479, "step": 15990 }, { "epoch": 0.2905709719599012, "grad_norm": 0.8153582736656172, "learning_rate": 1.629156904423404e-05, "loss": 0.6471, "step": 16000 }, { "epoch": 0.29075257881737615, "grad_norm": 0.8510673689667132, "learning_rate": 1.6287088532656424e-05, "loss": 0.6509, "step": 16010 }, { "epoch": 0.2909341856748511, "grad_norm": 0.8523743319402737, "learning_rate": 1.6282605932989152e-05, "loss": 0.6483, "step": 16020 }, { "epoch": 0.291115792532326, "grad_norm": 0.8523949548902391, "learning_rate": 1.627812124672099e-05, "loss": 0.6581, "step": 16030 }, { "epoch": 0.291297399389801, "grad_norm": 0.8239402305409608, "learning_rate": 1.627363447534141e-05, "loss": 0.6629, "step": 16040 }, { "epoch": 0.2914790062472759, "grad_norm": 0.8643937127258251, "learning_rate": 1.6269145620340577e-05, "loss": 0.6499, "step": 16050 }, { "epoch": 0.29166061310475083, "grad_norm": 0.8646818286916647, "learning_rate": 1.626465468320935e-05, "loss": 0.6434, "step": 16060 }, { "epoch": 0.29184221996222576, "grad_norm": 0.8204140611667207, "learning_rate": 1.6260161665439265e-05, "loss": 0.6496, "step": 16070 }, { "epoch": 0.29202382681970074, "grad_norm": 0.8241391631424507, "learning_rate": 1.6255666568522566e-05, "loss": 0.6298, "step": 16080 }, { "epoch": 0.29220543367717566, "grad_norm": 0.8397819485657981, "learning_rate": 1.6251169393952174e-05, "loss": 0.6659, "step": 16090 }, { "epoch": 0.2923870405346506, "grad_norm": 0.8431918365965536, "learning_rate": 1.6246670143221714e-05, "loss": 0.6438, "step": 16100 }, { "epoch": 0.2925686473921255, "grad_norm": 0.8304856515675444, "learning_rate": 1.6242168817825487e-05, "loss": 0.6557, "step": 16110 }, { "epoch": 0.29275025424960044, "grad_norm": 0.8070705783826553, "learning_rate": 1.6237665419258486e-05, "loss": 0.6493, "step": 16120 }, { "epoch": 0.2929318611070754, "grad_norm": 0.8365687953698132, "learning_rate": 1.6233159949016402e-05, "loss": 0.629, "step": 16130 }, { "epoch": 0.29311346796455034, "grad_norm": 0.8269750926264643, "learning_rate": 1.6228652408595603e-05, "loss": 0.6449, "step": 16140 }, { "epoch": 0.29329507482202527, "grad_norm": 0.8286380780979626, "learning_rate": 1.622414279949315e-05, "loss": 0.6476, "step": 16150 }, { "epoch": 0.2934766816795002, "grad_norm": 0.8123290190551086, "learning_rate": 1.621963112320679e-05, "loss": 0.6554, "step": 16160 }, { "epoch": 0.2936582885369752, "grad_norm": 0.8354799299794496, "learning_rate": 1.621511738123496e-05, "loss": 0.6514, "step": 16170 }, { "epoch": 0.2938398953944501, "grad_norm": 0.833877293822186, "learning_rate": 1.621060157507678e-05, "loss": 0.6575, "step": 16180 }, { "epoch": 0.294021502251925, "grad_norm": 0.8323904208515759, "learning_rate": 1.6206083706232047e-05, "loss": 0.6464, "step": 16190 }, { "epoch": 0.29420310910939995, "grad_norm": 0.8196523869479794, "learning_rate": 1.620156377620126e-05, "loss": 0.648, "step": 16200 }, { "epoch": 0.29438471596687493, "grad_norm": 0.8696977943506746, "learning_rate": 1.619704178648559e-05, "loss": 0.6542, "step": 16210 }, { "epoch": 0.29456632282434986, "grad_norm": 0.8141885487572599, "learning_rate": 1.6192517738586893e-05, "loss": 0.6623, "step": 16220 }, { "epoch": 0.2947479296818248, "grad_norm": 0.8914322154443355, "learning_rate": 1.618799163400772e-05, "loss": 0.6514, "step": 16230 }, { "epoch": 0.2949295365392997, "grad_norm": 0.8272749747050272, "learning_rate": 1.6183463474251285e-05, "loss": 0.6453, "step": 16240 }, { "epoch": 0.2951111433967747, "grad_norm": 0.8486347235325836, "learning_rate": 1.6178933260821504e-05, "loss": 0.6486, "step": 16250 }, { "epoch": 0.2952927502542496, "grad_norm": 0.8726376945097105, "learning_rate": 1.6174400995222966e-05, "loss": 0.637, "step": 16260 }, { "epoch": 0.29547435711172454, "grad_norm": 0.7898643597633483, "learning_rate": 1.6169866678960942e-05, "loss": 0.6424, "step": 16270 }, { "epoch": 0.29565596396919946, "grad_norm": 0.8105511489060537, "learning_rate": 1.6165330313541386e-05, "loss": 0.6543, "step": 16280 }, { "epoch": 0.2958375708266744, "grad_norm": 0.8811659338956125, "learning_rate": 1.616079190047093e-05, "loss": 0.6476, "step": 16290 }, { "epoch": 0.29601917768414937, "grad_norm": 0.8039544758892891, "learning_rate": 1.615625144125689e-05, "loss": 0.6318, "step": 16300 }, { "epoch": 0.2962007845416243, "grad_norm": 0.8380676916111653, "learning_rate": 1.6151708937407255e-05, "loss": 0.667, "step": 16310 }, { "epoch": 0.2963823913990992, "grad_norm": 0.860393210790037, "learning_rate": 1.61471643904307e-05, "loss": 0.6526, "step": 16320 }, { "epoch": 0.29656399825657415, "grad_norm": 0.8494385556285572, "learning_rate": 1.6142617801836577e-05, "loss": 0.6608, "step": 16330 }, { "epoch": 0.2967456051140491, "grad_norm": 0.9207439363536579, "learning_rate": 1.613806917313491e-05, "loss": 0.649, "step": 16340 }, { "epoch": 0.29692721197152405, "grad_norm": 0.8211943778471873, "learning_rate": 1.6133518505836407e-05, "loss": 0.6304, "step": 16350 }, { "epoch": 0.297108818828999, "grad_norm": 9.032553430998265, "learning_rate": 1.6128965801452456e-05, "loss": 0.6548, "step": 16360 }, { "epoch": 0.2972904256864739, "grad_norm": 0.8400450695420199, "learning_rate": 1.612441106149511e-05, "loss": 0.6431, "step": 16370 }, { "epoch": 0.2974720325439489, "grad_norm": 0.8702162390245989, "learning_rate": 1.6119854287477116e-05, "loss": 0.6372, "step": 16380 }, { "epoch": 0.2976536394014238, "grad_norm": 0.8460681544939401, "learning_rate": 1.611529548091187e-05, "loss": 0.6397, "step": 16390 }, { "epoch": 0.29783524625889873, "grad_norm": 0.8190797952197343, "learning_rate": 1.611073464331347e-05, "loss": 0.652, "step": 16400 }, { "epoch": 0.29801685311637366, "grad_norm": 0.8420981745358594, "learning_rate": 1.6106171776196675e-05, "loss": 0.6465, "step": 16410 }, { "epoch": 0.29819845997384864, "grad_norm": 0.8687318855153598, "learning_rate": 1.6101606881076917e-05, "loss": 0.6366, "step": 16420 }, { "epoch": 0.29838006683132356, "grad_norm": 0.8337419368063741, "learning_rate": 1.609703995947031e-05, "loss": 0.6483, "step": 16430 }, { "epoch": 0.2985616736887985, "grad_norm": 0.8304561670285906, "learning_rate": 1.609247101289363e-05, "loss": 0.6621, "step": 16440 }, { "epoch": 0.2987432805462734, "grad_norm": 0.8228805571025996, "learning_rate": 1.608790004286434e-05, "loss": 0.6569, "step": 16450 }, { "epoch": 0.29892488740374834, "grad_norm": 0.8411125130776925, "learning_rate": 1.6083327050900554e-05, "loss": 0.6484, "step": 16460 }, { "epoch": 0.2991064942612233, "grad_norm": 0.842189876833745, "learning_rate": 1.607875203852108e-05, "loss": 0.6537, "step": 16470 }, { "epoch": 0.29928810111869825, "grad_norm": 0.8147922506381137, "learning_rate": 1.6074175007245382e-05, "loss": 0.6591, "step": 16480 }, { "epoch": 0.29946970797617317, "grad_norm": 0.8384962570209459, "learning_rate": 1.6069595958593606e-05, "loss": 0.6591, "step": 16490 }, { "epoch": 0.2996513148336481, "grad_norm": 0.8153792144913535, "learning_rate": 1.6065014894086554e-05, "loss": 0.6446, "step": 16500 }, { "epoch": 0.2998329216911231, "grad_norm": 0.8539940357814356, "learning_rate": 1.6060431815245706e-05, "loss": 0.6528, "step": 16510 }, { "epoch": 0.300014528548598, "grad_norm": 0.7998054018262394, "learning_rate": 1.6055846723593215e-05, "loss": 0.6489, "step": 16520 }, { "epoch": 0.30019613540607293, "grad_norm": 0.8314358386388156, "learning_rate": 1.605125962065189e-05, "loss": 0.6509, "step": 16530 }, { "epoch": 0.30037774226354785, "grad_norm": 0.9878811232440584, "learning_rate": 1.6046670507945226e-05, "loss": 0.6506, "step": 16540 }, { "epoch": 0.30055934912102283, "grad_norm": 0.8365948871858038, "learning_rate": 1.6042079386997366e-05, "loss": 0.6485, "step": 16550 }, { "epoch": 0.30074095597849776, "grad_norm": 0.8643801873544963, "learning_rate": 1.603748625933313e-05, "loss": 0.6433, "step": 16560 }, { "epoch": 0.3009225628359727, "grad_norm": 0.8608391217042897, "learning_rate": 1.6032891126478007e-05, "loss": 0.6466, "step": 16570 }, { "epoch": 0.3011041696934476, "grad_norm": 0.8453111356987866, "learning_rate": 1.6028293989958147e-05, "loss": 0.6479, "step": 16580 }, { "epoch": 0.3012857765509226, "grad_norm": 0.7989497954412367, "learning_rate": 1.6023694851300368e-05, "loss": 0.639, "step": 16590 }, { "epoch": 0.3014673834083975, "grad_norm": 0.833217357747448, "learning_rate": 1.601909371203215e-05, "loss": 0.6389, "step": 16600 }, { "epoch": 0.30164899026587244, "grad_norm": 0.8591972618350837, "learning_rate": 1.601449057368164e-05, "loss": 0.6438, "step": 16610 }, { "epoch": 0.30183059712334737, "grad_norm": 0.8411389981365536, "learning_rate": 1.6009885437777652e-05, "loss": 0.6427, "step": 16620 }, { "epoch": 0.3020122039808223, "grad_norm": 0.8342202790164238, "learning_rate": 1.6005278305849652e-05, "loss": 0.6552, "step": 16630 }, { "epoch": 0.30219381083829727, "grad_norm": 0.8232997976095879, "learning_rate": 1.600066917942778e-05, "loss": 0.6488, "step": 16640 }, { "epoch": 0.3023754176957722, "grad_norm": 0.8496349583263193, "learning_rate": 1.5996058060042838e-05, "loss": 0.6651, "step": 16650 }, { "epoch": 0.3025570245532471, "grad_norm": 0.818705860662821, "learning_rate": 1.5991444949226288e-05, "loss": 0.6521, "step": 16660 }, { "epoch": 0.30273863141072205, "grad_norm": 0.8443308873852128, "learning_rate": 1.5986829848510244e-05, "loss": 0.6421, "step": 16670 }, { "epoch": 0.30292023826819703, "grad_norm": 0.8186370478906742, "learning_rate": 1.5982212759427494e-05, "loss": 0.6432, "step": 16680 }, { "epoch": 0.30310184512567195, "grad_norm": 0.8375462269828647, "learning_rate": 1.5977593683511487e-05, "loss": 0.6415, "step": 16690 }, { "epoch": 0.3032834519831469, "grad_norm": 0.8368073528081048, "learning_rate": 1.597297262229632e-05, "loss": 0.6496, "step": 16700 }, { "epoch": 0.3034650588406218, "grad_norm": 0.8269492185708069, "learning_rate": 1.5968349577316757e-05, "loss": 0.6608, "step": 16710 }, { "epoch": 0.3036466656980968, "grad_norm": 0.8338573967755473, "learning_rate": 1.596372455010822e-05, "loss": 0.649, "step": 16720 }, { "epoch": 0.3038282725555717, "grad_norm": 0.8075331298921753, "learning_rate": 1.5959097542206792e-05, "loss": 0.662, "step": 16730 }, { "epoch": 0.30400987941304664, "grad_norm": 0.8277455717361134, "learning_rate": 1.5954468555149206e-05, "loss": 0.6343, "step": 16740 }, { "epoch": 0.30419148627052156, "grad_norm": 0.8422986263206338, "learning_rate": 1.5949837590472857e-05, "loss": 0.6526, "step": 16750 }, { "epoch": 0.3043730931279965, "grad_norm": 0.8128312482316712, "learning_rate": 1.59452046497158e-05, "loss": 0.6345, "step": 16760 }, { "epoch": 0.30455469998547147, "grad_norm": 0.8491101388784509, "learning_rate": 1.5940569734416745e-05, "loss": 0.6409, "step": 16770 }, { "epoch": 0.3047363068429464, "grad_norm": 0.7982415959889931, "learning_rate": 1.593593284611506e-05, "loss": 0.6511, "step": 16780 }, { "epoch": 0.3049179137004213, "grad_norm": 0.8344906728640508, "learning_rate": 1.593129398635075e-05, "loss": 0.6435, "step": 16790 }, { "epoch": 0.30509952055789624, "grad_norm": 0.8130909003843811, "learning_rate": 1.5926653156664503e-05, "loss": 0.6599, "step": 16800 }, { "epoch": 0.3052811274153712, "grad_norm": 0.84919965046327, "learning_rate": 1.592201035859764e-05, "loss": 0.6387, "step": 16810 }, { "epoch": 0.30546273427284615, "grad_norm": 0.869955701297178, "learning_rate": 1.5917365593692147e-05, "loss": 0.6396, "step": 16820 }, { "epoch": 0.3056443411303211, "grad_norm": 0.8154688677963211, "learning_rate": 1.5912718863490655e-05, "loss": 0.6506, "step": 16830 }, { "epoch": 0.305825947987796, "grad_norm": 0.8459236528189067, "learning_rate": 1.5908070169536455e-05, "loss": 0.6509, "step": 16840 }, { "epoch": 0.306007554845271, "grad_norm": 0.8619678078063159, "learning_rate": 1.590341951337349e-05, "loss": 0.6456, "step": 16850 }, { "epoch": 0.3061891617027459, "grad_norm": 0.8350549938134767, "learning_rate": 1.5898766896546348e-05, "loss": 0.6359, "step": 16860 }, { "epoch": 0.30637076856022083, "grad_norm": 0.8653253424989257, "learning_rate": 1.5894112320600273e-05, "loss": 0.6376, "step": 16870 }, { "epoch": 0.30655237541769575, "grad_norm": 0.8301432540716627, "learning_rate": 1.588945578708116e-05, "loss": 0.6422, "step": 16880 }, { "epoch": 0.30673398227517074, "grad_norm": 0.8732467781129216, "learning_rate": 1.5884797297535555e-05, "loss": 0.6527, "step": 16890 }, { "epoch": 0.30691558913264566, "grad_norm": 0.8329603596991023, "learning_rate": 1.5880136853510644e-05, "loss": 0.6523, "step": 16900 }, { "epoch": 0.3070971959901206, "grad_norm": 0.8107568088744626, "learning_rate": 1.5875474456554282e-05, "loss": 0.651, "step": 16910 }, { "epoch": 0.3072788028475955, "grad_norm": 0.8316103495685644, "learning_rate": 1.5870810108214952e-05, "loss": 0.64, "step": 16920 }, { "epoch": 0.30746040970507044, "grad_norm": 0.8210486537156605, "learning_rate": 1.5866143810041793e-05, "loss": 0.6374, "step": 16930 }, { "epoch": 0.3076420165625454, "grad_norm": 0.828259019620478, "learning_rate": 1.58614755635846e-05, "loss": 0.6604, "step": 16940 }, { "epoch": 0.30782362342002034, "grad_norm": 0.8987568155256934, "learning_rate": 1.5856805370393793e-05, "loss": 0.6405, "step": 16950 }, { "epoch": 0.30800523027749527, "grad_norm": 0.833577678775648, "learning_rate": 1.5852133232020465e-05, "loss": 0.6467, "step": 16960 }, { "epoch": 0.3081868371349702, "grad_norm": 0.7978117853948254, "learning_rate": 1.5847459150016343e-05, "loss": 0.6442, "step": 16970 }, { "epoch": 0.3083684439924452, "grad_norm": 0.9062412489699437, "learning_rate": 1.5842783125933793e-05, "loss": 0.6446, "step": 16980 }, { "epoch": 0.3085500508499201, "grad_norm": 0.8213744267068074, "learning_rate": 1.583810516132583e-05, "loss": 0.6491, "step": 16990 }, { "epoch": 0.308731657707395, "grad_norm": 0.8114513097296241, "learning_rate": 1.583342525774613e-05, "loss": 0.645, "step": 17000 }, { "epoch": 0.30891326456486995, "grad_norm": 0.8333698197578184, "learning_rate": 1.5828743416748983e-05, "loss": 0.6612, "step": 17010 }, { "epoch": 0.30909487142234493, "grad_norm": 0.8221073765805347, "learning_rate": 1.5824059639889347e-05, "loss": 0.6609, "step": 17020 }, { "epoch": 0.30927647827981986, "grad_norm": 0.8406944651160381, "learning_rate": 1.5819373928722813e-05, "loss": 0.6436, "step": 17030 }, { "epoch": 0.3094580851372948, "grad_norm": 0.8320634691465437, "learning_rate": 1.581468628480561e-05, "loss": 0.6254, "step": 17040 }, { "epoch": 0.3096396919947697, "grad_norm": 0.819076921676924, "learning_rate": 1.5809996709694624e-05, "loss": 0.6369, "step": 17050 }, { "epoch": 0.3098212988522447, "grad_norm": 0.804662533420714, "learning_rate": 1.5805305204947366e-05, "loss": 0.6391, "step": 17060 }, { "epoch": 0.3100029057097196, "grad_norm": 0.8423034469323136, "learning_rate": 1.5800611772122e-05, "loss": 0.6526, "step": 17070 }, { "epoch": 0.31018451256719454, "grad_norm": 0.7994359610186427, "learning_rate": 1.579591641277732e-05, "loss": 0.6439, "step": 17080 }, { "epoch": 0.31036611942466946, "grad_norm": 0.8614358838811105, "learning_rate": 1.5791219128472768e-05, "loss": 0.6572, "step": 17090 }, { "epoch": 0.3105477262821444, "grad_norm": 0.8477025203579783, "learning_rate": 1.578651992076842e-05, "loss": 0.6542, "step": 17100 }, { "epoch": 0.31072933313961937, "grad_norm": 0.8136028152029956, "learning_rate": 1.5781818791224997e-05, "loss": 0.6447, "step": 17110 }, { "epoch": 0.3109109399970943, "grad_norm": 0.835149494799354, "learning_rate": 1.5777115741403852e-05, "loss": 0.6381, "step": 17120 }, { "epoch": 0.3110925468545692, "grad_norm": 0.8073461131586591, "learning_rate": 1.577241077286698e-05, "loss": 0.6515, "step": 17130 }, { "epoch": 0.31127415371204414, "grad_norm": 0.8100411491436759, "learning_rate": 1.5767703887177006e-05, "loss": 0.6381, "step": 17140 }, { "epoch": 0.3114557605695191, "grad_norm": 0.8462114925937005, "learning_rate": 1.5762995085897202e-05, "loss": 0.6342, "step": 17150 }, { "epoch": 0.31163736742699405, "grad_norm": 0.8382328936120468, "learning_rate": 1.575828437059147e-05, "loss": 0.642, "step": 17160 }, { "epoch": 0.311818974284469, "grad_norm": 0.8216181795740755, "learning_rate": 1.5753571742824352e-05, "loss": 0.6594, "step": 17170 }, { "epoch": 0.3120005811419439, "grad_norm": 0.8225230517878589, "learning_rate": 1.5748857204161022e-05, "loss": 0.6398, "step": 17180 }, { "epoch": 0.3121821879994189, "grad_norm": 0.8495798957756286, "learning_rate": 1.5744140756167285e-05, "loss": 0.6446, "step": 17190 }, { "epoch": 0.3123637948568938, "grad_norm": 0.8302450012897673, "learning_rate": 1.5739422400409585e-05, "loss": 0.6398, "step": 17200 }, { "epoch": 0.31254540171436873, "grad_norm": 0.7910086993988978, "learning_rate": 1.5734702138455002e-05, "loss": 0.6561, "step": 17210 }, { "epoch": 0.31272700857184366, "grad_norm": 0.8632615963542718, "learning_rate": 1.5729979971871244e-05, "loss": 0.6522, "step": 17220 }, { "epoch": 0.31290861542931864, "grad_norm": 0.8086669259415407, "learning_rate": 1.572525590222665e-05, "loss": 0.6381, "step": 17230 }, { "epoch": 0.31309022228679356, "grad_norm": 0.7954896009307276, "learning_rate": 1.57205299310902e-05, "loss": 0.6458, "step": 17240 }, { "epoch": 0.3132718291442685, "grad_norm": 0.8372656218531147, "learning_rate": 1.5715802060031493e-05, "loss": 0.6432, "step": 17250 }, { "epoch": 0.3134534360017434, "grad_norm": 0.7973614217070174, "learning_rate": 1.571107229062077e-05, "loss": 0.6418, "step": 17260 }, { "epoch": 0.31363504285921834, "grad_norm": 0.8547119149558993, "learning_rate": 1.5706340624428903e-05, "loss": 0.6473, "step": 17270 }, { "epoch": 0.3138166497166933, "grad_norm": 0.8140642419004726, "learning_rate": 1.5701607063027384e-05, "loss": 0.6336, "step": 17280 }, { "epoch": 0.31399825657416824, "grad_norm": 0.8458812060589598, "learning_rate": 1.569687160798834e-05, "loss": 0.6393, "step": 17290 }, { "epoch": 0.31417986343164317, "grad_norm": 0.8186620147737508, "learning_rate": 1.569213426088453e-05, "loss": 0.6411, "step": 17300 }, { "epoch": 0.3143614702891181, "grad_norm": 0.8309915903195878, "learning_rate": 1.568739502328933e-05, "loss": 0.6613, "step": 17310 }, { "epoch": 0.3145430771465931, "grad_norm": 0.8378820326523878, "learning_rate": 1.5682653896776766e-05, "loss": 0.6465, "step": 17320 }, { "epoch": 0.314724684004068, "grad_norm": 0.8386424907971766, "learning_rate": 1.5677910882921464e-05, "loss": 0.6391, "step": 17330 }, { "epoch": 0.3149062908615429, "grad_norm": 0.8443927046766954, "learning_rate": 1.5673165983298694e-05, "loss": 0.6451, "step": 17340 }, { "epoch": 0.31508789771901785, "grad_norm": 0.8166627042779235, "learning_rate": 1.5668419199484353e-05, "loss": 0.6426, "step": 17350 }, { "epoch": 0.31526950457649283, "grad_norm": 0.8026280678004138, "learning_rate": 1.5663670533054956e-05, "loss": 0.6361, "step": 17360 }, { "epoch": 0.31545111143396776, "grad_norm": 0.84146311862697, "learning_rate": 1.5658919985587644e-05, "loss": 0.6491, "step": 17370 }, { "epoch": 0.3156327182914427, "grad_norm": 0.8923013313228256, "learning_rate": 1.565416755866019e-05, "loss": 0.647, "step": 17380 }, { "epoch": 0.3158143251489176, "grad_norm": 0.8414615603726571, "learning_rate": 1.5649413253850985e-05, "loss": 0.664, "step": 17390 }, { "epoch": 0.3159959320063926, "grad_norm": 0.8322065708004481, "learning_rate": 1.5644657072739048e-05, "loss": 0.6445, "step": 17400 }, { "epoch": 0.3161775388638675, "grad_norm": 1.447048081119012, "learning_rate": 1.563989901690401e-05, "loss": 0.6616, "step": 17410 }, { "epoch": 0.31635914572134244, "grad_norm": 0.8003090766298705, "learning_rate": 1.5635139087926142e-05, "loss": 0.6473, "step": 17420 }, { "epoch": 0.31654075257881736, "grad_norm": 0.8264687535810021, "learning_rate": 1.563037728738632e-05, "loss": 0.6521, "step": 17430 }, { "epoch": 0.3167223594362923, "grad_norm": 0.8507317298025567, "learning_rate": 1.562561361686606e-05, "loss": 0.6538, "step": 17440 }, { "epoch": 0.31690396629376727, "grad_norm": 0.7983891907879277, "learning_rate": 1.5620848077947478e-05, "loss": 0.645, "step": 17450 }, { "epoch": 0.3170855731512422, "grad_norm": 0.7854459815616903, "learning_rate": 1.5616080672213326e-05, "loss": 0.6378, "step": 17460 }, { "epoch": 0.3172671800087171, "grad_norm": 0.8447961007729673, "learning_rate": 1.5611311401246975e-05, "loss": 0.6614, "step": 17470 }, { "epoch": 0.31744878686619205, "grad_norm": 0.862487664327029, "learning_rate": 1.560654026663241e-05, "loss": 0.6511, "step": 17480 }, { "epoch": 0.317630393723667, "grad_norm": 0.8197589016510411, "learning_rate": 1.560176726995423e-05, "loss": 0.6446, "step": 17490 }, { "epoch": 0.31781200058114195, "grad_norm": 0.850869140346168, "learning_rate": 1.5596992412797666e-05, "loss": 0.6489, "step": 17500 }, { "epoch": 0.3179936074386169, "grad_norm": 0.7958157009089174, "learning_rate": 1.5592215696748557e-05, "loss": 0.6328, "step": 17510 }, { "epoch": 0.3181752142960918, "grad_norm": 0.8461017185048999, "learning_rate": 1.558743712339337e-05, "loss": 0.6441, "step": 17520 }, { "epoch": 0.3183568211535668, "grad_norm": 0.840104487409879, "learning_rate": 1.558265669431917e-05, "loss": 0.6621, "step": 17530 }, { "epoch": 0.3185384280110417, "grad_norm": 0.842356075988965, "learning_rate": 1.557787441111366e-05, "loss": 0.6445, "step": 17540 }, { "epoch": 0.31872003486851663, "grad_norm": 0.8281324343893868, "learning_rate": 1.5573090275365137e-05, "loss": 0.6526, "step": 17550 }, { "epoch": 0.31890164172599156, "grad_norm": 0.8023147211995156, "learning_rate": 1.5568304288662536e-05, "loss": 0.64, "step": 17560 }, { "epoch": 0.31908324858346654, "grad_norm": 0.8538196039572888, "learning_rate": 1.5563516452595387e-05, "loss": 0.6499, "step": 17570 }, { "epoch": 0.31926485544094146, "grad_norm": 0.7936667350706389, "learning_rate": 1.555872676875385e-05, "loss": 0.6503, "step": 17580 }, { "epoch": 0.3194464622984164, "grad_norm": 0.8397125898110697, "learning_rate": 1.5553935238728687e-05, "loss": 0.6506, "step": 17590 }, { "epoch": 0.3196280691558913, "grad_norm": 0.8200617571746285, "learning_rate": 1.5549141864111278e-05, "loss": 0.6447, "step": 17600 }, { "epoch": 0.31980967601336624, "grad_norm": 0.8420447062818407, "learning_rate": 1.5544346646493616e-05, "loss": 0.6465, "step": 17610 }, { "epoch": 0.3199912828708412, "grad_norm": 0.8372020685928607, "learning_rate": 1.553954958746831e-05, "loss": 0.6393, "step": 17620 }, { "epoch": 0.32017288972831615, "grad_norm": 0.8048611864663605, "learning_rate": 1.5534750688628566e-05, "loss": 0.6398, "step": 17630 }, { "epoch": 0.32035449658579107, "grad_norm": 0.8311004328314626, "learning_rate": 1.5529949951568217e-05, "loss": 0.6654, "step": 17640 }, { "epoch": 0.320536103443266, "grad_norm": 0.8065578792930164, "learning_rate": 1.5525147377881696e-05, "loss": 0.646, "step": 17650 }, { "epoch": 0.320717710300741, "grad_norm": 0.8249478011711635, "learning_rate": 1.5520342969164056e-05, "loss": 0.6496, "step": 17660 }, { "epoch": 0.3208993171582159, "grad_norm": 0.8101104323803099, "learning_rate": 1.5515536727010956e-05, "loss": 0.6596, "step": 17670 }, { "epoch": 0.3210809240156908, "grad_norm": 0.8571757969962313, "learning_rate": 1.5510728653018655e-05, "loss": 0.6552, "step": 17680 }, { "epoch": 0.32126253087316575, "grad_norm": 0.799603450076537, "learning_rate": 1.5505918748784026e-05, "loss": 0.6391, "step": 17690 }, { "epoch": 0.32144413773064073, "grad_norm": 0.7715777944786233, "learning_rate": 1.5501107015904557e-05, "loss": 0.645, "step": 17700 }, { "epoch": 0.32162574458811566, "grad_norm": 0.8085442730912736, "learning_rate": 1.5496293455978337e-05, "loss": 0.6377, "step": 17710 }, { "epoch": 0.3218073514455906, "grad_norm": 0.838734745462757, "learning_rate": 1.549147807060406e-05, "loss": 0.6447, "step": 17720 }, { "epoch": 0.3219889583030655, "grad_norm": 0.8207051337751485, "learning_rate": 1.548666086138103e-05, "loss": 0.647, "step": 17730 }, { "epoch": 0.3221705651605405, "grad_norm": 0.7784503748560165, "learning_rate": 1.5481841829909153e-05, "loss": 0.6412, "step": 17740 }, { "epoch": 0.3223521720180154, "grad_norm": 0.7813744943571831, "learning_rate": 1.5477020977788945e-05, "loss": 0.6482, "step": 17750 }, { "epoch": 0.32253377887549034, "grad_norm": 0.7940025336954772, "learning_rate": 1.547219830662152e-05, "loss": 0.648, "step": 17760 }, { "epoch": 0.32271538573296527, "grad_norm": 0.8136310386315059, "learning_rate": 1.5467373818008604e-05, "loss": 0.6402, "step": 17770 }, { "epoch": 0.3228969925904402, "grad_norm": 0.7910115168042722, "learning_rate": 1.546254751355252e-05, "loss": 0.6369, "step": 17780 }, { "epoch": 0.32307859944791517, "grad_norm": 0.8249515662908692, "learning_rate": 1.5457719394856204e-05, "loss": 0.6528, "step": 17790 }, { "epoch": 0.3232602063053901, "grad_norm": 0.8128397729824178, "learning_rate": 1.545288946352318e-05, "loss": 0.6582, "step": 17800 }, { "epoch": 0.323441813162865, "grad_norm": 0.799723140849801, "learning_rate": 1.5448057721157584e-05, "loss": 0.6407, "step": 17810 }, { "epoch": 0.32362342002033995, "grad_norm": 0.8330395124060153, "learning_rate": 1.5443224169364154e-05, "loss": 0.6497, "step": 17820 }, { "epoch": 0.3238050268778149, "grad_norm": 0.8405237663174201, "learning_rate": 1.5438388809748222e-05, "loss": 0.6475, "step": 17830 }, { "epoch": 0.32398663373528985, "grad_norm": 0.7830768706253862, "learning_rate": 1.5433551643915724e-05, "loss": 0.6507, "step": 17840 }, { "epoch": 0.3241682405927648, "grad_norm": 0.8522298979298782, "learning_rate": 1.54287126734732e-05, "loss": 0.6442, "step": 17850 }, { "epoch": 0.3243498474502397, "grad_norm": 0.8218967896768214, "learning_rate": 1.5423871900027778e-05, "loss": 0.6322, "step": 17860 }, { "epoch": 0.3245314543077147, "grad_norm": 0.8850192607792, "learning_rate": 1.5419029325187202e-05, "loss": 0.6219, "step": 17870 }, { "epoch": 0.3247130611651896, "grad_norm": 0.8376659659617283, "learning_rate": 1.54141849505598e-05, "loss": 0.6529, "step": 17880 }, { "epoch": 0.32489466802266453, "grad_norm": 0.8091259574160535, "learning_rate": 1.5409338777754504e-05, "loss": 0.6442, "step": 17890 }, { "epoch": 0.32507627488013946, "grad_norm": 0.811672450765079, "learning_rate": 1.5404490808380842e-05, "loss": 0.6359, "step": 17900 }, { "epoch": 0.3252578817376144, "grad_norm": 0.8233768367159089, "learning_rate": 1.5399641044048935e-05, "loss": 0.6475, "step": 17910 }, { "epoch": 0.32543948859508937, "grad_norm": 0.81374819739862, "learning_rate": 1.5394789486369507e-05, "loss": 0.6411, "step": 17920 }, { "epoch": 0.3256210954525643, "grad_norm": 0.8015173560032894, "learning_rate": 1.5389936136953876e-05, "loss": 0.6505, "step": 17930 }, { "epoch": 0.3258027023100392, "grad_norm": 0.848237430399679, "learning_rate": 1.5385080997413948e-05, "loss": 0.6421, "step": 17940 }, { "epoch": 0.32598430916751414, "grad_norm": 0.848517662317247, "learning_rate": 1.538022406936223e-05, "loss": 0.651, "step": 17950 }, { "epoch": 0.3261659160249891, "grad_norm": 0.802318916159906, "learning_rate": 1.5375365354411825e-05, "loss": 0.6561, "step": 17960 }, { "epoch": 0.32634752288246405, "grad_norm": 0.8489196371385144, "learning_rate": 1.5370504854176426e-05, "loss": 0.665, "step": 17970 }, { "epoch": 0.326529129739939, "grad_norm": 0.7996379306448876, "learning_rate": 1.536564257027031e-05, "loss": 0.6603, "step": 17980 }, { "epoch": 0.3267107365974139, "grad_norm": 0.8144651483944454, "learning_rate": 1.536077850430837e-05, "loss": 0.6364, "step": 17990 }, { "epoch": 0.3268923434548889, "grad_norm": 0.8235288753794492, "learning_rate": 1.5355912657906068e-05, "loss": 0.6378, "step": 18000 }, { "epoch": 0.3270739503123638, "grad_norm": 0.8014549486598319, "learning_rate": 1.5351045032679465e-05, "loss": 0.6476, "step": 18010 }, { "epoch": 0.32725555716983873, "grad_norm": 0.831322075235229, "learning_rate": 1.5346175630245216e-05, "loss": 0.6545, "step": 18020 }, { "epoch": 0.32743716402731365, "grad_norm": 0.8026696227992831, "learning_rate": 1.534130445222057e-05, "loss": 0.6457, "step": 18030 }, { "epoch": 0.32761877088478863, "grad_norm": 0.8193024629898459, "learning_rate": 1.5336431500223346e-05, "loss": 0.6406, "step": 18040 }, { "epoch": 0.32780037774226356, "grad_norm": 0.8192620839345028, "learning_rate": 1.533155677587198e-05, "loss": 0.6532, "step": 18050 }, { "epoch": 0.3279819845997385, "grad_norm": 0.8377272391932564, "learning_rate": 1.5326680280785468e-05, "loss": 0.6684, "step": 18060 }, { "epoch": 0.3281635914572134, "grad_norm": 0.8019542913485985, "learning_rate": 1.532180201658342e-05, "loss": 0.6372, "step": 18070 }, { "epoch": 0.32834519831468834, "grad_norm": 0.7956795373916614, "learning_rate": 1.5316921984886018e-05, "loss": 0.6445, "step": 18080 }, { "epoch": 0.3285268051721633, "grad_norm": 0.8525906739980886, "learning_rate": 1.5312040187314036e-05, "loss": 0.654, "step": 18090 }, { "epoch": 0.32870841202963824, "grad_norm": 0.816689551705431, "learning_rate": 1.5307156625488828e-05, "loss": 0.6427, "step": 18100 }, { "epoch": 0.32889001888711317, "grad_norm": 0.8473694320766226, "learning_rate": 1.5302271301032346e-05, "loss": 0.6485, "step": 18110 }, { "epoch": 0.3290716257445881, "grad_norm": 0.8275558962089192, "learning_rate": 1.529738421556712e-05, "loss": 0.641, "step": 18120 }, { "epoch": 0.3292532326020631, "grad_norm": 0.7998321552906746, "learning_rate": 1.5292495370716264e-05, "loss": 0.6361, "step": 18130 }, { "epoch": 0.329434839459538, "grad_norm": 0.8299757185017043, "learning_rate": 1.528760476810348e-05, "loss": 0.6414, "step": 18140 }, { "epoch": 0.3296164463170129, "grad_norm": 0.791708315851963, "learning_rate": 1.5282712409353047e-05, "loss": 0.6615, "step": 18150 }, { "epoch": 0.32979805317448785, "grad_norm": 0.7759076536897195, "learning_rate": 1.527781829608984e-05, "loss": 0.6404, "step": 18160 }, { "epoch": 0.32997966003196283, "grad_norm": 0.8436739037233523, "learning_rate": 1.5272922429939305e-05, "loss": 0.6385, "step": 18170 }, { "epoch": 0.33016126688943775, "grad_norm": 0.8626550830462593, "learning_rate": 1.526802481252747e-05, "loss": 0.6341, "step": 18180 }, { "epoch": 0.3303428737469127, "grad_norm": 0.8461715626876529, "learning_rate": 1.5263125445480957e-05, "loss": 0.6349, "step": 18190 }, { "epoch": 0.3305244806043876, "grad_norm": 0.8404846323382882, "learning_rate": 1.5258224330426954e-05, "loss": 0.6431, "step": 18200 }, { "epoch": 0.3307060874618626, "grad_norm": 0.7937997035696778, "learning_rate": 1.5253321468993242e-05, "loss": 0.6516, "step": 18210 }, { "epoch": 0.3308876943193375, "grad_norm": 0.8099486113937379, "learning_rate": 1.5248416862808167e-05, "loss": 0.6495, "step": 18220 }, { "epoch": 0.33106930117681244, "grad_norm": 0.8009094524412349, "learning_rate": 1.5243510513500676e-05, "loss": 0.6461, "step": 18230 }, { "epoch": 0.33125090803428736, "grad_norm": 0.8162156136242184, "learning_rate": 1.5238602422700273e-05, "loss": 0.6466, "step": 18240 }, { "epoch": 0.3314325148917623, "grad_norm": 0.8324291865744824, "learning_rate": 1.5233692592037054e-05, "loss": 0.6637, "step": 18250 }, { "epoch": 0.33161412174923727, "grad_norm": 0.8472996261313398, "learning_rate": 1.5228781023141688e-05, "loss": 0.6555, "step": 18260 }, { "epoch": 0.3317957286067122, "grad_norm": 0.8311746018640718, "learning_rate": 1.5223867717645427e-05, "loss": 0.6462, "step": 18270 }, { "epoch": 0.3319773354641871, "grad_norm": 0.8092187072578456, "learning_rate": 1.5218952677180085e-05, "loss": 0.6435, "step": 18280 }, { "epoch": 0.33215894232166204, "grad_norm": 0.8059266994042373, "learning_rate": 1.5214035903378073e-05, "loss": 0.6389, "step": 18290 }, { "epoch": 0.332340549179137, "grad_norm": 0.7832481574050161, "learning_rate": 1.520911739787236e-05, "loss": 0.6305, "step": 18300 }, { "epoch": 0.33252215603661195, "grad_norm": 0.8075514067749094, "learning_rate": 1.5204197162296501e-05, "loss": 0.6439, "step": 18310 }, { "epoch": 0.3327037628940869, "grad_norm": 0.7992927695823047, "learning_rate": 1.5199275198284623e-05, "loss": 0.6289, "step": 18320 }, { "epoch": 0.3328853697515618, "grad_norm": 0.8646171384789528, "learning_rate": 1.5194351507471417e-05, "loss": 0.6338, "step": 18330 }, { "epoch": 0.3330669766090368, "grad_norm": 0.8328110442071387, "learning_rate": 1.5189426091492166e-05, "loss": 0.6422, "step": 18340 }, { "epoch": 0.3332485834665117, "grad_norm": 0.8377718770714404, "learning_rate": 1.5184498951982711e-05, "loss": 0.6476, "step": 18350 }, { "epoch": 0.33343019032398663, "grad_norm": 0.8422086244479118, "learning_rate": 1.5179570090579475e-05, "loss": 0.6304, "step": 18360 }, { "epoch": 0.33361179718146156, "grad_norm": 0.8197058096979747, "learning_rate": 1.5174639508919442e-05, "loss": 0.6254, "step": 18370 }, { "epoch": 0.33379340403893654, "grad_norm": 0.8269902341869279, "learning_rate": 1.516970720864018e-05, "loss": 0.6497, "step": 18380 }, { "epoch": 0.33397501089641146, "grad_norm": 0.8132315585260665, "learning_rate": 1.5164773191379819e-05, "loss": 0.6374, "step": 18390 }, { "epoch": 0.3341566177538864, "grad_norm": 0.8115370389151053, "learning_rate": 1.5159837458777062e-05, "loss": 0.6483, "step": 18400 }, { "epoch": 0.3343382246113613, "grad_norm": 0.8240743695188791, "learning_rate": 1.515490001247118e-05, "loss": 0.6361, "step": 18410 }, { "epoch": 0.33451983146883624, "grad_norm": 0.8043285834872143, "learning_rate": 1.514996085410202e-05, "loss": 0.6402, "step": 18420 }, { "epoch": 0.3347014383263112, "grad_norm": 0.8372907684004384, "learning_rate": 1.514501998530999e-05, "loss": 0.6435, "step": 18430 }, { "epoch": 0.33488304518378614, "grad_norm": 0.8118737248733006, "learning_rate": 1.5140077407736065e-05, "loss": 0.6463, "step": 18440 }, { "epoch": 0.33506465204126107, "grad_norm": 0.8228086468555579, "learning_rate": 1.5135133123021795e-05, "loss": 0.6497, "step": 18450 }, { "epoch": 0.335246258898736, "grad_norm": 0.818717890799493, "learning_rate": 1.5130187132809288e-05, "loss": 0.6298, "step": 18460 }, { "epoch": 0.335427865756211, "grad_norm": 0.8291398026873898, "learning_rate": 1.5125239438741232e-05, "loss": 0.6391, "step": 18470 }, { "epoch": 0.3356094726136859, "grad_norm": 0.807832610474607, "learning_rate": 1.5120290042460863e-05, "loss": 0.6512, "step": 18480 }, { "epoch": 0.3357910794711608, "grad_norm": 0.8154696734966701, "learning_rate": 1.5115338945612e-05, "loss": 0.6389, "step": 18490 }, { "epoch": 0.33597268632863575, "grad_norm": 0.81402253047074, "learning_rate": 1.5110386149839014e-05, "loss": 0.6416, "step": 18500 }, { "epoch": 0.33615429318611073, "grad_norm": 0.7887303965673794, "learning_rate": 1.5105431656786844e-05, "loss": 0.6371, "step": 18510 }, { "epoch": 0.33633590004358566, "grad_norm": 0.7987425693885543, "learning_rate": 1.5100475468100992e-05, "loss": 0.6499, "step": 18520 }, { "epoch": 0.3365175069010606, "grad_norm": 0.7804166010629396, "learning_rate": 1.5095517585427533e-05, "loss": 0.6402, "step": 18530 }, { "epoch": 0.3366991137585355, "grad_norm": 0.8151702104802525, "learning_rate": 1.5090558010413085e-05, "loss": 0.6332, "step": 18540 }, { "epoch": 0.3368807206160105, "grad_norm": 0.7854640036352082, "learning_rate": 1.508559674470485e-05, "loss": 0.6326, "step": 18550 }, { "epoch": 0.3370623274734854, "grad_norm": 0.8702896853281366, "learning_rate": 1.5080633789950571e-05, "loss": 0.6463, "step": 18560 }, { "epoch": 0.33724393433096034, "grad_norm": 0.8213300840373513, "learning_rate": 1.5075669147798568e-05, "loss": 0.6471, "step": 18570 }, { "epoch": 0.33742554118843526, "grad_norm": 0.8348887256431374, "learning_rate": 1.5070702819897713e-05, "loss": 0.6451, "step": 18580 }, { "epoch": 0.3376071480459102, "grad_norm": 0.8202458376255772, "learning_rate": 1.506573480789744e-05, "loss": 0.6386, "step": 18590 }, { "epoch": 0.33778875490338517, "grad_norm": 0.7926659270242613, "learning_rate": 1.5060765113447747e-05, "loss": 0.6487, "step": 18600 }, { "epoch": 0.3379703617608601, "grad_norm": 0.8394892424194595, "learning_rate": 1.5055793738199178e-05, "loss": 0.6363, "step": 18610 }, { "epoch": 0.338151968618335, "grad_norm": 0.8012137283726322, "learning_rate": 1.5050820683802849e-05, "loss": 0.6388, "step": 18620 }, { "epoch": 0.33833357547580994, "grad_norm": 0.8082798322888255, "learning_rate": 1.5045845951910428e-05, "loss": 0.6515, "step": 18630 }, { "epoch": 0.3385151823332849, "grad_norm": 0.7866191923003977, "learning_rate": 1.5040869544174137e-05, "loss": 0.6293, "step": 18640 }, { "epoch": 0.33869678919075985, "grad_norm": 0.8057400464702151, "learning_rate": 1.5035891462246759e-05, "loss": 0.6471, "step": 18650 }, { "epoch": 0.3388783960482348, "grad_norm": 0.8019916040030877, "learning_rate": 1.5030911707781632e-05, "loss": 0.6429, "step": 18660 }, { "epoch": 0.3390600029057097, "grad_norm": 0.8283790779047818, "learning_rate": 1.502593028243265e-05, "loss": 0.6593, "step": 18670 }, { "epoch": 0.3392416097631847, "grad_norm": 0.8108919298464611, "learning_rate": 1.5020947187854263e-05, "loss": 0.6519, "step": 18680 }, { "epoch": 0.3394232166206596, "grad_norm": 0.846044233916363, "learning_rate": 1.5015962425701469e-05, "loss": 0.6573, "step": 18690 }, { "epoch": 0.33960482347813453, "grad_norm": 0.8133420937905227, "learning_rate": 1.5010975997629829e-05, "loss": 0.6429, "step": 18700 }, { "epoch": 0.33978643033560946, "grad_norm": 0.8046031920535007, "learning_rate": 1.5005987905295452e-05, "loss": 0.6583, "step": 18710 }, { "epoch": 0.33996803719308444, "grad_norm": 0.7972971056352472, "learning_rate": 1.5000998150354998e-05, "loss": 0.6387, "step": 18720 }, { "epoch": 0.34014964405055936, "grad_norm": 0.8333092812734557, "learning_rate": 1.4996006734465682e-05, "loss": 0.6447, "step": 18730 }, { "epoch": 0.3403312509080343, "grad_norm": 0.8222053147110086, "learning_rate": 1.4991013659285272e-05, "loss": 0.6415, "step": 18740 }, { "epoch": 0.3405128577655092, "grad_norm": 0.809927967355379, "learning_rate": 1.4986018926472086e-05, "loss": 0.6489, "step": 18750 }, { "epoch": 0.34069446462298414, "grad_norm": 0.8131511608680706, "learning_rate": 1.498102253768499e-05, "loss": 0.6432, "step": 18760 }, { "epoch": 0.3408760714804591, "grad_norm": 0.7866140152480998, "learning_rate": 1.4976024494583406e-05, "loss": 0.6316, "step": 18770 }, { "epoch": 0.34105767833793404, "grad_norm": 0.8564302650351978, "learning_rate": 1.4971024798827296e-05, "loss": 0.6557, "step": 18780 }, { "epoch": 0.34123928519540897, "grad_norm": 0.8365618272398581, "learning_rate": 1.4966023452077178e-05, "loss": 0.6499, "step": 18790 }, { "epoch": 0.3414208920528839, "grad_norm": 0.8111452363697601, "learning_rate": 1.496102045599412e-05, "loss": 0.6558, "step": 18800 }, { "epoch": 0.3416024989103589, "grad_norm": 0.8125729170088569, "learning_rate": 1.495601581223973e-05, "loss": 0.6353, "step": 18810 }, { "epoch": 0.3417841057678338, "grad_norm": 0.7915332601595509, "learning_rate": 1.4951009522476172e-05, "loss": 0.6475, "step": 18820 }, { "epoch": 0.3419657126253087, "grad_norm": 0.8345652430545681, "learning_rate": 1.4946001588366148e-05, "loss": 0.6414, "step": 18830 }, { "epoch": 0.34214731948278365, "grad_norm": 0.837025489934177, "learning_rate": 1.4940992011572912e-05, "loss": 0.6493, "step": 18840 }, { "epoch": 0.34232892634025863, "grad_norm": 0.856290869163285, "learning_rate": 1.4935980793760263e-05, "loss": 0.6636, "step": 18850 }, { "epoch": 0.34251053319773356, "grad_norm": 0.7836418965167442, "learning_rate": 1.4930967936592544e-05, "loss": 0.6449, "step": 18860 }, { "epoch": 0.3426921400552085, "grad_norm": 0.783018091580593, "learning_rate": 1.4925953441734642e-05, "loss": 0.6414, "step": 18870 }, { "epoch": 0.3428737469126834, "grad_norm": 0.7883593241454818, "learning_rate": 1.4920937310851987e-05, "loss": 0.6401, "step": 18880 }, { "epoch": 0.3430553537701584, "grad_norm": 0.8137666261533215, "learning_rate": 1.4915919545610553e-05, "loss": 0.6462, "step": 18890 }, { "epoch": 0.3432369606276333, "grad_norm": 0.8424407063510464, "learning_rate": 1.4910900147676864e-05, "loss": 0.6431, "step": 18900 }, { "epoch": 0.34341856748510824, "grad_norm": 0.8105503815311115, "learning_rate": 1.4905879118717971e-05, "loss": 0.636, "step": 18910 }, { "epoch": 0.34360017434258316, "grad_norm": 0.7786998143089598, "learning_rate": 1.4900856460401485e-05, "loss": 0.6305, "step": 18920 }, { "epoch": 0.3437817812000581, "grad_norm": 0.803023676789978, "learning_rate": 1.4895832174395542e-05, "loss": 0.6326, "step": 18930 }, { "epoch": 0.34396338805753307, "grad_norm": 0.837020995439301, "learning_rate": 1.4890806262368824e-05, "loss": 0.641, "step": 18940 }, { "epoch": 0.344144994915008, "grad_norm": 0.8089909649471173, "learning_rate": 1.488577872599056e-05, "loss": 0.6319, "step": 18950 }, { "epoch": 0.3443266017724829, "grad_norm": 0.8446183348381545, "learning_rate": 1.4880749566930512e-05, "loss": 0.6451, "step": 18960 }, { "epoch": 0.34450820862995785, "grad_norm": 0.8268414885460748, "learning_rate": 1.487571878685898e-05, "loss": 0.6568, "step": 18970 }, { "epoch": 0.3446898154874328, "grad_norm": 0.8190752794518568, "learning_rate": 1.4870686387446802e-05, "loss": 0.6521, "step": 18980 }, { "epoch": 0.34487142234490775, "grad_norm": 0.8152425972784002, "learning_rate": 1.4865652370365357e-05, "loss": 0.6436, "step": 18990 }, { "epoch": 0.3450530292023827, "grad_norm": 0.8086725778056065, "learning_rate": 1.4860616737286564e-05, "loss": 0.6613, "step": 19000 }, { "epoch": 0.3452346360598576, "grad_norm": 0.808529320466898, "learning_rate": 1.4855579489882872e-05, "loss": 0.6444, "step": 19010 }, { "epoch": 0.3454162429173326, "grad_norm": 0.7825168305805288, "learning_rate": 1.4850540629827271e-05, "loss": 0.6391, "step": 19020 }, { "epoch": 0.3455978497748075, "grad_norm": 0.8425060594971665, "learning_rate": 1.4845500158793282e-05, "loss": 0.643, "step": 19030 }, { "epoch": 0.34577945663228243, "grad_norm": 0.829569932772308, "learning_rate": 1.4840458078454964e-05, "loss": 0.6378, "step": 19040 }, { "epoch": 0.34596106348975736, "grad_norm": 0.8128031366697402, "learning_rate": 1.483541439048691e-05, "loss": 0.6459, "step": 19050 }, { "epoch": 0.3461426703472323, "grad_norm": 0.7979050687020879, "learning_rate": 1.483036909656425e-05, "loss": 0.639, "step": 19060 }, { "epoch": 0.34632427720470726, "grad_norm": 0.8270129028607586, "learning_rate": 1.4825322198362643e-05, "loss": 0.6441, "step": 19070 }, { "epoch": 0.3465058840621822, "grad_norm": 0.8302804300344447, "learning_rate": 1.482027369755828e-05, "loss": 0.6358, "step": 19080 }, { "epoch": 0.3466874909196571, "grad_norm": 0.8736295350569514, "learning_rate": 1.481522359582789e-05, "loss": 0.6556, "step": 19090 }, { "epoch": 0.34686909777713204, "grad_norm": 0.8328674208436504, "learning_rate": 1.4810171894848728e-05, "loss": 0.6253, "step": 19100 }, { "epoch": 0.347050704634607, "grad_norm": 0.7934346569302624, "learning_rate": 1.4805118596298587e-05, "loss": 0.6463, "step": 19110 }, { "epoch": 0.34723231149208195, "grad_norm": 0.8228363023697975, "learning_rate": 1.4800063701855777e-05, "loss": 0.6404, "step": 19120 }, { "epoch": 0.34741391834955687, "grad_norm": 0.8145263359184084, "learning_rate": 1.4795007213199157e-05, "loss": 0.6401, "step": 19130 }, { "epoch": 0.3475955252070318, "grad_norm": 0.8485663693789394, "learning_rate": 1.4789949132008099e-05, "loss": 0.6293, "step": 19140 }, { "epoch": 0.3477771320645068, "grad_norm": 0.7892355915442727, "learning_rate": 1.4784889459962515e-05, "loss": 0.6482, "step": 19150 }, { "epoch": 0.3479587389219817, "grad_norm": 0.8733064185835784, "learning_rate": 1.4779828198742836e-05, "loss": 0.6555, "step": 19160 }, { "epoch": 0.34814034577945663, "grad_norm": 0.7774296811628775, "learning_rate": 1.4774765350030028e-05, "loss": 0.6287, "step": 19170 }, { "epoch": 0.34832195263693155, "grad_norm": 0.8157195374548823, "learning_rate": 1.4769700915505585e-05, "loss": 0.6305, "step": 19180 }, { "epoch": 0.34850355949440653, "grad_norm": 0.7960412512068722, "learning_rate": 1.4764634896851518e-05, "loss": 0.6284, "step": 19190 }, { "epoch": 0.34868516635188146, "grad_norm": 0.8480396735179784, "learning_rate": 1.4759567295750376e-05, "loss": 0.6442, "step": 19200 }, { "epoch": 0.3488667732093564, "grad_norm": 0.8045467786287656, "learning_rate": 1.4754498113885225e-05, "loss": 0.6368, "step": 19210 }, { "epoch": 0.3490483800668313, "grad_norm": 0.7898827752204916, "learning_rate": 1.4749427352939663e-05, "loss": 0.6455, "step": 19220 }, { "epoch": 0.34922998692430623, "grad_norm": 0.7736168606877072, "learning_rate": 1.4744355014597806e-05, "loss": 0.6393, "step": 19230 }, { "epoch": 0.3494115937817812, "grad_norm": 0.8027702675431982, "learning_rate": 1.4739281100544298e-05, "loss": 0.6432, "step": 19240 }, { "epoch": 0.34959320063925614, "grad_norm": 0.8027830489699773, "learning_rate": 1.4734205612464304e-05, "loss": 0.638, "step": 19250 }, { "epoch": 0.34977480749673107, "grad_norm": 0.7839606944479498, "learning_rate": 1.472912855204351e-05, "loss": 0.6346, "step": 19260 }, { "epoch": 0.349956414354206, "grad_norm": 0.771001866265059, "learning_rate": 1.4724049920968133e-05, "loss": 0.6401, "step": 19270 }, { "epoch": 0.35013802121168097, "grad_norm": 0.7856557602205209, "learning_rate": 1.4718969720924901e-05, "loss": 0.6466, "step": 19280 }, { "epoch": 0.3503196280691559, "grad_norm": 0.7995655069176001, "learning_rate": 1.4713887953601067e-05, "loss": 0.6411, "step": 19290 }, { "epoch": 0.3505012349266308, "grad_norm": 0.8138211601200934, "learning_rate": 1.4708804620684408e-05, "loss": 0.639, "step": 19300 }, { "epoch": 0.35068284178410575, "grad_norm": 0.7986987429346464, "learning_rate": 1.470371972386322e-05, "loss": 0.6318, "step": 19310 }, { "epoch": 0.35086444864158073, "grad_norm": 0.8072578805121743, "learning_rate": 1.469863326482631e-05, "loss": 0.6488, "step": 19320 }, { "epoch": 0.35104605549905565, "grad_norm": 0.8124799465031762, "learning_rate": 1.4693545245263017e-05, "loss": 0.6425, "step": 19330 }, { "epoch": 0.3512276623565306, "grad_norm": 0.875865952703411, "learning_rate": 1.4688455666863188e-05, "loss": 0.6276, "step": 19340 }, { "epoch": 0.3514092692140055, "grad_norm": 0.8025986038007402, "learning_rate": 1.4683364531317195e-05, "loss": 0.6489, "step": 19350 }, { "epoch": 0.3515908760714805, "grad_norm": 0.7770293025537772, "learning_rate": 1.4678271840315919e-05, "loss": 0.6383, "step": 19360 }, { "epoch": 0.3517724829289554, "grad_norm": 0.8193459060091545, "learning_rate": 1.4673177595550763e-05, "loss": 0.647, "step": 19370 }, { "epoch": 0.35195408978643034, "grad_norm": 0.8062877991608324, "learning_rate": 1.4668081798713649e-05, "loss": 0.6299, "step": 19380 }, { "epoch": 0.35213569664390526, "grad_norm": 0.7929053628604131, "learning_rate": 1.4662984451497007e-05, "loss": 0.6393, "step": 19390 }, { "epoch": 0.3523173035013802, "grad_norm": 0.7718470885215868, "learning_rate": 1.4657885555593787e-05, "loss": 0.6424, "step": 19400 }, { "epoch": 0.35249891035885517, "grad_norm": 0.835805362182853, "learning_rate": 1.4652785112697451e-05, "loss": 0.6415, "step": 19410 }, { "epoch": 0.3526805172163301, "grad_norm": 0.8067622533454853, "learning_rate": 1.4647683124501979e-05, "loss": 0.63, "step": 19420 }, { "epoch": 0.352862124073805, "grad_norm": 0.7830548066871043, "learning_rate": 1.464257959270186e-05, "loss": 0.6409, "step": 19430 }, { "epoch": 0.35304373093127994, "grad_norm": 0.7899183901539799, "learning_rate": 1.4637474518992092e-05, "loss": 0.6507, "step": 19440 }, { "epoch": 0.3532253377887549, "grad_norm": 0.852585418548363, "learning_rate": 1.4632367905068194e-05, "loss": 0.6455, "step": 19450 }, { "epoch": 0.35340694464622985, "grad_norm": 0.8434965388593144, "learning_rate": 1.4627259752626193e-05, "loss": 0.6325, "step": 19460 }, { "epoch": 0.3535885515037048, "grad_norm": 0.8421657641245283, "learning_rate": 1.4622150063362623e-05, "loss": 0.6472, "step": 19470 }, { "epoch": 0.3537701583611797, "grad_norm": 0.8036488388500975, "learning_rate": 1.4617038838974535e-05, "loss": 0.6419, "step": 19480 }, { "epoch": 0.3539517652186547, "grad_norm": 0.7823300290070329, "learning_rate": 1.4611926081159484e-05, "loss": 0.6194, "step": 19490 }, { "epoch": 0.3541333720761296, "grad_norm": 0.8227976657365802, "learning_rate": 1.4606811791615538e-05, "loss": 0.6687, "step": 19500 }, { "epoch": 0.35431497893360453, "grad_norm": 0.7961449181387195, "learning_rate": 1.4601695972041274e-05, "loss": 0.6465, "step": 19510 }, { "epoch": 0.35449658579107945, "grad_norm": 0.8005840739165497, "learning_rate": 1.4596578624135771e-05, "loss": 0.6432, "step": 19520 }, { "epoch": 0.35467819264855444, "grad_norm": 0.8290534030967114, "learning_rate": 1.4591459749598627e-05, "loss": 0.6443, "step": 19530 }, { "epoch": 0.35485979950602936, "grad_norm": 0.8042215079542265, "learning_rate": 1.4586339350129938e-05, "loss": 0.6406, "step": 19540 }, { "epoch": 0.3550414063635043, "grad_norm": 0.7721707593653226, "learning_rate": 1.4581217427430308e-05, "loss": 0.6417, "step": 19550 }, { "epoch": 0.3552230132209792, "grad_norm": 0.806844248397917, "learning_rate": 1.4576093983200848e-05, "loss": 0.6446, "step": 19560 }, { "epoch": 0.35540462007845414, "grad_norm": 0.8164805158459638, "learning_rate": 1.457096901914317e-05, "loss": 0.6485, "step": 19570 }, { "epoch": 0.3555862269359291, "grad_norm": 0.8319084935144251, "learning_rate": 1.4565842536959402e-05, "loss": 0.6382, "step": 19580 }, { "epoch": 0.35576783379340404, "grad_norm": 0.8168859048544125, "learning_rate": 1.4560714538352163e-05, "loss": 0.6572, "step": 19590 }, { "epoch": 0.35594944065087897, "grad_norm": 0.8171123548639974, "learning_rate": 1.4555585025024588e-05, "loss": 0.6361, "step": 19600 }, { "epoch": 0.3561310475083539, "grad_norm": 0.7904572902902103, "learning_rate": 1.4550453998680302e-05, "loss": 0.6528, "step": 19610 }, { "epoch": 0.3563126543658289, "grad_norm": 0.860632946849529, "learning_rate": 1.4545321461023445e-05, "loss": 0.65, "step": 19620 }, { "epoch": 0.3564942612233038, "grad_norm": 0.780325005052511, "learning_rate": 1.4540187413758649e-05, "loss": 0.658, "step": 19630 }, { "epoch": 0.3566758680807787, "grad_norm": 0.8145738606374059, "learning_rate": 1.4535051858591054e-05, "loss": 0.6554, "step": 19640 }, { "epoch": 0.35685747493825365, "grad_norm": 0.7974601922880384, "learning_rate": 1.4529914797226296e-05, "loss": 0.63, "step": 19650 }, { "epoch": 0.35703908179572863, "grad_norm": 0.8280593383431553, "learning_rate": 1.4524776231370519e-05, "loss": 0.6378, "step": 19660 }, { "epoch": 0.35722068865320356, "grad_norm": 0.8304913848747599, "learning_rate": 1.4519636162730355e-05, "loss": 0.6359, "step": 19670 }, { "epoch": 0.3574022955106785, "grad_norm": 0.8086980585187619, "learning_rate": 1.4514494593012945e-05, "loss": 0.6555, "step": 19680 }, { "epoch": 0.3575839023681534, "grad_norm": 0.7768361195321387, "learning_rate": 1.4509351523925924e-05, "loss": 0.6365, "step": 19690 }, { "epoch": 0.3577655092256284, "grad_norm": 0.7899046426392352, "learning_rate": 1.4504206957177423e-05, "loss": 0.634, "step": 19700 }, { "epoch": 0.3579471160831033, "grad_norm": 0.7727162234348056, "learning_rate": 1.4499060894476078e-05, "loss": 0.63, "step": 19710 }, { "epoch": 0.35812872294057824, "grad_norm": 0.8111152018918073, "learning_rate": 1.4493913337531016e-05, "loss": 0.6498, "step": 19720 }, { "epoch": 0.35831032979805316, "grad_norm": 0.8188045326993105, "learning_rate": 1.448876428805186e-05, "loss": 0.6246, "step": 19730 }, { "epoch": 0.3584919366555281, "grad_norm": 0.7842303272251907, "learning_rate": 1.4483613747748734e-05, "loss": 0.6508, "step": 19740 }, { "epoch": 0.35867354351300307, "grad_norm": 0.8234041462851087, "learning_rate": 1.4478461718332248e-05, "loss": 0.6448, "step": 19750 }, { "epoch": 0.358855150370478, "grad_norm": 0.8446455816481074, "learning_rate": 1.4473308201513517e-05, "loss": 0.6354, "step": 19760 }, { "epoch": 0.3590367572279529, "grad_norm": 0.8125235685068126, "learning_rate": 1.446815319900414e-05, "loss": 0.6485, "step": 19770 }, { "epoch": 0.35921836408542784, "grad_norm": 0.798963616538348, "learning_rate": 1.4462996712516217e-05, "loss": 0.6438, "step": 19780 }, { "epoch": 0.3593999709429028, "grad_norm": 0.7899376018554207, "learning_rate": 1.4457838743762338e-05, "loss": 0.6398, "step": 19790 }, { "epoch": 0.35958157780037775, "grad_norm": 0.7984331911543712, "learning_rate": 1.4452679294455587e-05, "loss": 0.639, "step": 19800 }, { "epoch": 0.3597631846578527, "grad_norm": 0.8043755967256122, "learning_rate": 1.4447518366309535e-05, "loss": 0.646, "step": 19810 }, { "epoch": 0.3599447915153276, "grad_norm": 0.7865167895233717, "learning_rate": 1.4442355961038248e-05, "loss": 0.6427, "step": 19820 }, { "epoch": 0.3601263983728026, "grad_norm": 0.8060365710057186, "learning_rate": 1.4437192080356284e-05, "loss": 0.6314, "step": 19830 }, { "epoch": 0.3603080052302775, "grad_norm": 0.8349226263698294, "learning_rate": 1.443202672597869e-05, "loss": 0.6371, "step": 19840 }, { "epoch": 0.36048961208775243, "grad_norm": 0.837630612699249, "learning_rate": 1.4426859899620998e-05, "loss": 0.6416, "step": 19850 }, { "epoch": 0.36067121894522736, "grad_norm": 0.7719879235056568, "learning_rate": 1.442169160299923e-05, "loss": 0.6462, "step": 19860 }, { "epoch": 0.36085282580270234, "grad_norm": 0.8308823558827275, "learning_rate": 1.4416521837829906e-05, "loss": 0.6332, "step": 19870 }, { "epoch": 0.36103443266017726, "grad_norm": 0.766207105486582, "learning_rate": 1.441135060583002e-05, "loss": 0.6288, "step": 19880 }, { "epoch": 0.3612160395176522, "grad_norm": 0.7866687124907966, "learning_rate": 1.4406177908717063e-05, "loss": 0.6422, "step": 19890 }, { "epoch": 0.3613976463751271, "grad_norm": 0.8110980953318422, "learning_rate": 1.4401003748209008e-05, "loss": 0.6303, "step": 19900 }, { "epoch": 0.36157925323260204, "grad_norm": 0.82694486522325, "learning_rate": 1.4395828126024317e-05, "loss": 0.6367, "step": 19910 }, { "epoch": 0.361760860090077, "grad_norm": 0.8183317093697238, "learning_rate": 1.439065104388193e-05, "loss": 0.6437, "step": 19920 }, { "epoch": 0.36194246694755194, "grad_norm": 0.8246291194565074, "learning_rate": 1.4385472503501283e-05, "loss": 0.6556, "step": 19930 }, { "epoch": 0.36212407380502687, "grad_norm": 0.8035863520465961, "learning_rate": 1.4380292506602291e-05, "loss": 0.6368, "step": 19940 }, { "epoch": 0.3623056806625018, "grad_norm": 0.8233320056972744, "learning_rate": 1.4375111054905349e-05, "loss": 0.6307, "step": 19950 }, { "epoch": 0.3624872875199768, "grad_norm": 0.7847955199558366, "learning_rate": 1.4369928150131343e-05, "loss": 0.6368, "step": 19960 }, { "epoch": 0.3626688943774517, "grad_norm": 0.8033737723806577, "learning_rate": 1.4364743794001632e-05, "loss": 0.6471, "step": 19970 }, { "epoch": 0.3628505012349266, "grad_norm": 0.7834540043446866, "learning_rate": 1.4359557988238067e-05, "loss": 0.6283, "step": 19980 }, { "epoch": 0.36303210809240155, "grad_norm": 0.8047599191877977, "learning_rate": 1.435437073456297e-05, "loss": 0.6259, "step": 19990 }, { "epoch": 0.36321371494987653, "grad_norm": 0.845959967608556, "learning_rate": 1.4349182034699158e-05, "loss": 0.6521, "step": 20000 }, { "epoch": 0.36339532180735146, "grad_norm": 0.80618189894831, "learning_rate": 1.4343991890369913e-05, "loss": 0.6404, "step": 20010 }, { "epoch": 0.3635769286648264, "grad_norm": 0.8062200102322665, "learning_rate": 1.4338800303299007e-05, "loss": 0.6359, "step": 20020 }, { "epoch": 0.3637585355223013, "grad_norm": 0.7841224472565457, "learning_rate": 1.4333607275210688e-05, "loss": 0.6415, "step": 20030 }, { "epoch": 0.3639401423797763, "grad_norm": 0.8072602901179272, "learning_rate": 1.4328412807829683e-05, "loss": 0.6436, "step": 20040 }, { "epoch": 0.3641217492372512, "grad_norm": 0.7945771256947273, "learning_rate": 1.4323216902881195e-05, "loss": 0.6496, "step": 20050 }, { "epoch": 0.36430335609472614, "grad_norm": 0.816092810071592, "learning_rate": 1.4318019562090904e-05, "loss": 0.6372, "step": 20060 }, { "epoch": 0.36448496295220106, "grad_norm": 0.7872394131419452, "learning_rate": 1.4312820787184971e-05, "loss": 0.6386, "step": 20070 }, { "epoch": 0.364666569809676, "grad_norm": 0.8130283424730148, "learning_rate": 1.4307620579890031e-05, "loss": 0.6369, "step": 20080 }, { "epoch": 0.36484817666715097, "grad_norm": 0.8090489030591053, "learning_rate": 1.4302418941933199e-05, "loss": 0.6504, "step": 20090 }, { "epoch": 0.3650297835246259, "grad_norm": 0.780705880606659, "learning_rate": 1.4297215875042052e-05, "loss": 0.6324, "step": 20100 }, { "epoch": 0.3652113903821008, "grad_norm": 0.8673836255950366, "learning_rate": 1.4292011380944658e-05, "loss": 0.6564, "step": 20110 }, { "epoch": 0.36539299723957575, "grad_norm": 0.7927025887110971, "learning_rate": 1.428680546136955e-05, "loss": 0.6413, "step": 20120 }, { "epoch": 0.3655746040970507, "grad_norm": 0.8046954698864139, "learning_rate": 1.4281598118045737e-05, "loss": 0.629, "step": 20130 }, { "epoch": 0.36575621095452565, "grad_norm": 0.8658671190988848, "learning_rate": 1.4276389352702697e-05, "loss": 0.6426, "step": 20140 }, { "epoch": 0.3659378178120006, "grad_norm": 0.802419925758884, "learning_rate": 1.4271179167070385e-05, "loss": 0.6502, "step": 20150 }, { "epoch": 0.3661194246694755, "grad_norm": 0.7794242672218189, "learning_rate": 1.4265967562879226e-05, "loss": 0.6357, "step": 20160 }, { "epoch": 0.3663010315269505, "grad_norm": 0.8279963617638413, "learning_rate": 1.4260754541860115e-05, "loss": 0.6416, "step": 20170 }, { "epoch": 0.3664826383844254, "grad_norm": 0.8217863136943282, "learning_rate": 1.425554010574442e-05, "loss": 0.641, "step": 20180 }, { "epoch": 0.36666424524190033, "grad_norm": 0.8087004004011364, "learning_rate": 1.4250324256263976e-05, "loss": 0.6432, "step": 20190 }, { "epoch": 0.36684585209937526, "grad_norm": 0.8044342473709678, "learning_rate": 1.4245106995151091e-05, "loss": 0.6532, "step": 20200 }, { "epoch": 0.3670274589568502, "grad_norm": 0.80119651928194, "learning_rate": 1.423988832413854e-05, "loss": 0.6455, "step": 20210 }, { "epoch": 0.36720906581432516, "grad_norm": 0.8197421597125872, "learning_rate": 1.4234668244959564e-05, "loss": 0.6441, "step": 20220 }, { "epoch": 0.3673906726718001, "grad_norm": 0.8189169799840025, "learning_rate": 1.4229446759347875e-05, "loss": 0.641, "step": 20230 }, { "epoch": 0.367572279529275, "grad_norm": 0.8288265557864654, "learning_rate": 1.4224223869037652e-05, "loss": 0.6511, "step": 20240 }, { "epoch": 0.36775388638674994, "grad_norm": 0.7912346682870233, "learning_rate": 1.4218999575763537e-05, "loss": 0.6347, "step": 20250 }, { "epoch": 0.3679354932442249, "grad_norm": 0.8329421198705724, "learning_rate": 1.4213773881260641e-05, "loss": 0.6258, "step": 20260 }, { "epoch": 0.36811710010169985, "grad_norm": 0.7821370905083034, "learning_rate": 1.4208546787264541e-05, "loss": 0.6574, "step": 20270 }, { "epoch": 0.36829870695917477, "grad_norm": 0.8298642079581212, "learning_rate": 1.4203318295511277e-05, "loss": 0.6301, "step": 20280 }, { "epoch": 0.3684803138166497, "grad_norm": 0.8115949021753422, "learning_rate": 1.4198088407737354e-05, "loss": 0.6333, "step": 20290 }, { "epoch": 0.3686619206741247, "grad_norm": 0.8421096367763174, "learning_rate": 1.419285712567974e-05, "loss": 0.6362, "step": 20300 }, { "epoch": 0.3688435275315996, "grad_norm": 0.7846404161700561, "learning_rate": 1.4187624451075865e-05, "loss": 0.6371, "step": 20310 }, { "epoch": 0.3690251343890745, "grad_norm": 0.8394970431334691, "learning_rate": 1.4182390385663628e-05, "loss": 0.6341, "step": 20320 }, { "epoch": 0.36920674124654945, "grad_norm": 0.8214379704954383, "learning_rate": 1.4177154931181379e-05, "loss": 0.6508, "step": 20330 }, { "epoch": 0.36938834810402443, "grad_norm": 0.8055296420821507, "learning_rate": 1.4171918089367942e-05, "loss": 0.6423, "step": 20340 }, { "epoch": 0.36956995496149936, "grad_norm": 0.8152988511867711, "learning_rate": 1.416667986196259e-05, "loss": 0.6511, "step": 20350 }, { "epoch": 0.3697515618189743, "grad_norm": 0.8186377075041649, "learning_rate": 1.4161440250705061e-05, "loss": 0.6309, "step": 20360 }, { "epoch": 0.3699331686764492, "grad_norm": 0.8240697404649634, "learning_rate": 1.4156199257335554e-05, "loss": 0.6409, "step": 20370 }, { "epoch": 0.37011477553392413, "grad_norm": 0.8050850865842357, "learning_rate": 1.4150956883594728e-05, "loss": 0.6299, "step": 20380 }, { "epoch": 0.3702963823913991, "grad_norm": 0.8524162663072823, "learning_rate": 1.4145713131223696e-05, "loss": 0.6398, "step": 20390 }, { "epoch": 0.37047798924887404, "grad_norm": 0.7999617546300752, "learning_rate": 1.4140468001964031e-05, "loss": 0.6537, "step": 20400 }, { "epoch": 0.37065959610634897, "grad_norm": 0.8114154421832781, "learning_rate": 1.4135221497557767e-05, "loss": 0.6374, "step": 20410 }, { "epoch": 0.3708412029638239, "grad_norm": 0.803453818524496, "learning_rate": 1.4129973619747385e-05, "loss": 0.6463, "step": 20420 }, { "epoch": 0.37102280982129887, "grad_norm": 0.8253475586692667, "learning_rate": 1.4124724370275838e-05, "loss": 0.6457, "step": 20430 }, { "epoch": 0.3712044166787738, "grad_norm": 0.8413507653149408, "learning_rate": 1.4119473750886514e-05, "loss": 0.6341, "step": 20440 }, { "epoch": 0.3713860235362487, "grad_norm": 0.8140423003226639, "learning_rate": 1.4114221763323275e-05, "loss": 0.6432, "step": 20450 }, { "epoch": 0.37156763039372365, "grad_norm": 0.777332326461681, "learning_rate": 1.4108968409330426e-05, "loss": 0.6549, "step": 20460 }, { "epoch": 0.3717492372511986, "grad_norm": 0.8573394785572827, "learning_rate": 1.4103713690652727e-05, "loss": 0.6406, "step": 20470 }, { "epoch": 0.37193084410867355, "grad_norm": 0.8048609167857267, "learning_rate": 1.4098457609035396e-05, "loss": 0.6354, "step": 20480 }, { "epoch": 0.3721124509661485, "grad_norm": 0.7912280020540556, "learning_rate": 1.4093200166224103e-05, "loss": 0.6412, "step": 20490 }, { "epoch": 0.3722940578236234, "grad_norm": 0.8259183199623052, "learning_rate": 1.4087941363964965e-05, "loss": 0.6306, "step": 20500 }, { "epoch": 0.3724756646810984, "grad_norm": 0.8284203359103604, "learning_rate": 1.4082681204004556e-05, "loss": 0.6543, "step": 20510 }, { "epoch": 0.3726572715385733, "grad_norm": 0.7804940766943143, "learning_rate": 1.4077419688089893e-05, "loss": 0.6328, "step": 20520 }, { "epoch": 0.37283887839604823, "grad_norm": 0.8016321478810219, "learning_rate": 1.4072156817968457e-05, "loss": 0.6429, "step": 20530 }, { "epoch": 0.37302048525352316, "grad_norm": 0.8224106685361625, "learning_rate": 1.4066892595388168e-05, "loss": 0.6401, "step": 20540 }, { "epoch": 0.3732020921109981, "grad_norm": 0.8013736454575727, "learning_rate": 1.4061627022097395e-05, "loss": 0.641, "step": 20550 }, { "epoch": 0.37338369896847307, "grad_norm": 0.8128249683300023, "learning_rate": 1.405636009984496e-05, "loss": 0.6326, "step": 20560 }, { "epoch": 0.373565305825948, "grad_norm": 0.828105671677548, "learning_rate": 1.4051091830380133e-05, "loss": 0.6292, "step": 20570 }, { "epoch": 0.3737469126834229, "grad_norm": 0.7908069190641113, "learning_rate": 1.4045822215452627e-05, "loss": 0.633, "step": 20580 }, { "epoch": 0.37392851954089784, "grad_norm": 0.768792665702528, "learning_rate": 1.4040551256812607e-05, "loss": 0.6328, "step": 20590 }, { "epoch": 0.3741101263983728, "grad_norm": 0.7976250927140883, "learning_rate": 1.4035278956210682e-05, "loss": 0.6452, "step": 20600 }, { "epoch": 0.37429173325584775, "grad_norm": 0.7980839230411598, "learning_rate": 1.4030005315397908e-05, "loss": 0.6437, "step": 20610 }, { "epoch": 0.3744733401133227, "grad_norm": 0.8303200869509675, "learning_rate": 1.402473033612578e-05, "loss": 0.6281, "step": 20620 }, { "epoch": 0.3746549469707976, "grad_norm": 0.8247845342636989, "learning_rate": 1.4019454020146248e-05, "loss": 0.655, "step": 20630 }, { "epoch": 0.3748365538282726, "grad_norm": 0.8129046421224291, "learning_rate": 1.40141763692117e-05, "loss": 0.6429, "step": 20640 }, { "epoch": 0.3750181606857475, "grad_norm": 0.8140042479566115, "learning_rate": 1.4008897385074962e-05, "loss": 0.6428, "step": 20650 }, { "epoch": 0.37519976754322243, "grad_norm": 0.817467859395238, "learning_rate": 1.4003617069489315e-05, "loss": 0.6415, "step": 20660 }, { "epoch": 0.37538137440069735, "grad_norm": 0.7825747608681857, "learning_rate": 1.3998335424208471e-05, "loss": 0.6432, "step": 20670 }, { "epoch": 0.37556298125817233, "grad_norm": 0.78897284276989, "learning_rate": 1.3993052450986591e-05, "loss": 0.6376, "step": 20680 }, { "epoch": 0.37574458811564726, "grad_norm": 0.8306869805824303, "learning_rate": 1.3987768151578273e-05, "loss": 0.6391, "step": 20690 }, { "epoch": 0.3759261949731222, "grad_norm": 0.8113874665153764, "learning_rate": 1.398248252773856e-05, "loss": 0.6441, "step": 20700 }, { "epoch": 0.3761078018305971, "grad_norm": 0.8084250498778737, "learning_rate": 1.3977195581222926e-05, "loss": 0.6448, "step": 20710 }, { "epoch": 0.37628940868807204, "grad_norm": 0.7912468498609005, "learning_rate": 1.3971907313787294e-05, "loss": 0.6465, "step": 20720 }, { "epoch": 0.376471015545547, "grad_norm": 0.8165850251641638, "learning_rate": 1.3966617727188023e-05, "loss": 0.6385, "step": 20730 }, { "epoch": 0.37665262240302194, "grad_norm": 0.8014275031913737, "learning_rate": 1.3961326823181908e-05, "loss": 0.639, "step": 20740 }, { "epoch": 0.37683422926049687, "grad_norm": 0.8128651790551523, "learning_rate": 1.395603460352618e-05, "loss": 0.647, "step": 20750 }, { "epoch": 0.3770158361179718, "grad_norm": 0.8353387406864191, "learning_rate": 1.395074106997851e-05, "loss": 0.6436, "step": 20760 }, { "epoch": 0.3771974429754468, "grad_norm": 0.7893150397953775, "learning_rate": 1.3945446224297007e-05, "loss": 0.6371, "step": 20770 }, { "epoch": 0.3773790498329217, "grad_norm": 0.7869623491362683, "learning_rate": 1.3940150068240212e-05, "loss": 0.6381, "step": 20780 }, { "epoch": 0.3775606566903966, "grad_norm": 0.7992459842961532, "learning_rate": 1.3934852603567102e-05, "loss": 0.6452, "step": 20790 }, { "epoch": 0.37774226354787155, "grad_norm": 0.824198880403826, "learning_rate": 1.392955383203709e-05, "loss": 0.6403, "step": 20800 }, { "epoch": 0.37792387040534653, "grad_norm": 0.8037473091885379, "learning_rate": 1.3924253755410021e-05, "loss": 0.6407, "step": 20810 }, { "epoch": 0.37810547726282145, "grad_norm": 0.8020915820777189, "learning_rate": 1.391895237544618e-05, "loss": 0.6405, "step": 20820 }, { "epoch": 0.3782870841202964, "grad_norm": 0.8034131871250992, "learning_rate": 1.3913649693906274e-05, "loss": 0.6323, "step": 20830 }, { "epoch": 0.3784686909777713, "grad_norm": 0.8123141510172959, "learning_rate": 1.3908345712551452e-05, "loss": 0.6429, "step": 20840 }, { "epoch": 0.3786502978352463, "grad_norm": 0.7748169340207421, "learning_rate": 1.3903040433143286e-05, "loss": 0.6218, "step": 20850 }, { "epoch": 0.3788319046927212, "grad_norm": 0.7962831924466401, "learning_rate": 1.389773385744379e-05, "loss": 0.6275, "step": 20860 }, { "epoch": 0.37901351155019614, "grad_norm": 0.828587791739563, "learning_rate": 1.3892425987215395e-05, "loss": 0.6459, "step": 20870 }, { "epoch": 0.37919511840767106, "grad_norm": 0.8608585002388912, "learning_rate": 1.3887116824220974e-05, "loss": 0.6221, "step": 20880 }, { "epoch": 0.379376725265146, "grad_norm": 0.7829874488685653, "learning_rate": 1.3881806370223827e-05, "loss": 0.6316, "step": 20890 }, { "epoch": 0.37955833212262097, "grad_norm": 0.7812615469471651, "learning_rate": 1.3876494626987672e-05, "loss": 0.6283, "step": 20900 }, { "epoch": 0.3797399389800959, "grad_norm": 0.8320472318203223, "learning_rate": 1.3871181596276673e-05, "loss": 0.6453, "step": 20910 }, { "epoch": 0.3799215458375708, "grad_norm": 0.8221129886078693, "learning_rate": 1.3865867279855405e-05, "loss": 0.6405, "step": 20920 }, { "epoch": 0.38010315269504574, "grad_norm": 0.799701960233829, "learning_rate": 1.386055167948888e-05, "loss": 0.6438, "step": 20930 }, { "epoch": 0.3802847595525207, "grad_norm": 0.805050023019081, "learning_rate": 1.3855234796942538e-05, "loss": 0.6277, "step": 20940 }, { "epoch": 0.38046636640999565, "grad_norm": 0.7655446308895019, "learning_rate": 1.3849916633982234e-05, "loss": 0.6377, "step": 20950 }, { "epoch": 0.3806479732674706, "grad_norm": 0.8093826212500356, "learning_rate": 1.3844597192374258e-05, "loss": 0.6335, "step": 20960 }, { "epoch": 0.3808295801249455, "grad_norm": 0.7790298152809432, "learning_rate": 1.3839276473885319e-05, "loss": 0.6315, "step": 20970 }, { "epoch": 0.3810111869824205, "grad_norm": 0.8164546972540131, "learning_rate": 1.3833954480282554e-05, "loss": 0.6319, "step": 20980 }, { "epoch": 0.3811927938398954, "grad_norm": 0.7982935448425952, "learning_rate": 1.3828631213333523e-05, "loss": 0.6315, "step": 20990 }, { "epoch": 0.38137440069737033, "grad_norm": 0.8087280405059534, "learning_rate": 1.3823306674806207e-05, "loss": 0.6363, "step": 21000 }, { "epoch": 0.38155600755484526, "grad_norm": 0.7967229946125761, "learning_rate": 1.381798086646901e-05, "loss": 0.6191, "step": 21010 }, { "epoch": 0.38173761441232024, "grad_norm": 0.7994603519523895, "learning_rate": 1.3812653790090758e-05, "loss": 0.6422, "step": 21020 }, { "epoch": 0.38191922126979516, "grad_norm": 0.8079878705787978, "learning_rate": 1.3807325447440696e-05, "loss": 0.6309, "step": 21030 }, { "epoch": 0.3821008281272701, "grad_norm": 0.7970044574758597, "learning_rate": 1.3801995840288497e-05, "loss": 0.6444, "step": 21040 }, { "epoch": 0.382282434984745, "grad_norm": 0.8098362041114144, "learning_rate": 1.3796664970404242e-05, "loss": 0.6311, "step": 21050 }, { "epoch": 0.38246404184221994, "grad_norm": 0.7959518582879919, "learning_rate": 1.3791332839558446e-05, "loss": 0.6451, "step": 21060 }, { "epoch": 0.3826456486996949, "grad_norm": 0.8203907219256951, "learning_rate": 1.3785999449522027e-05, "loss": 0.639, "step": 21070 }, { "epoch": 0.38282725555716984, "grad_norm": 0.8076617911142998, "learning_rate": 1.3780664802066333e-05, "loss": 0.6313, "step": 21080 }, { "epoch": 0.38300886241464477, "grad_norm": 0.7746828328603367, "learning_rate": 1.3775328898963123e-05, "loss": 0.6353, "step": 21090 }, { "epoch": 0.3831904692721197, "grad_norm": 0.8327213507255266, "learning_rate": 1.376999174198458e-05, "loss": 0.6329, "step": 21100 }, { "epoch": 0.3833720761295947, "grad_norm": 0.7879447979197644, "learning_rate": 1.3764653332903295e-05, "loss": 0.6239, "step": 21110 }, { "epoch": 0.3835536829870696, "grad_norm": 0.8294995431515543, "learning_rate": 1.375931367349228e-05, "loss": 0.6322, "step": 21120 }, { "epoch": 0.3837352898445445, "grad_norm": 0.7835009498287067, "learning_rate": 1.3753972765524962e-05, "loss": 0.6286, "step": 21130 }, { "epoch": 0.38391689670201945, "grad_norm": 0.8055477503615514, "learning_rate": 1.3748630610775182e-05, "loss": 0.6355, "step": 21140 }, { "epoch": 0.38409850355949443, "grad_norm": 0.7749814668166527, "learning_rate": 1.3743287211017197e-05, "loss": 0.6423, "step": 21150 }, { "epoch": 0.38428011041696936, "grad_norm": 0.8657971783387873, "learning_rate": 1.3737942568025672e-05, "loss": 0.6388, "step": 21160 }, { "epoch": 0.3844617172744443, "grad_norm": 0.8054938176544186, "learning_rate": 1.3732596683575689e-05, "loss": 0.6427, "step": 21170 }, { "epoch": 0.3846433241319192, "grad_norm": 0.8022114739229531, "learning_rate": 1.3727249559442741e-05, "loss": 0.6336, "step": 21180 }, { "epoch": 0.3848249309893942, "grad_norm": 0.7632705731336765, "learning_rate": 1.3721901197402735e-05, "loss": 0.6278, "step": 21190 }, { "epoch": 0.3850065378468691, "grad_norm": 0.7863090434171895, "learning_rate": 1.371655159923199e-05, "loss": 0.6383, "step": 21200 }, { "epoch": 0.38518814470434404, "grad_norm": 0.8098813853705223, "learning_rate": 1.3711200766707227e-05, "loss": 0.6422, "step": 21210 }, { "epoch": 0.38536975156181896, "grad_norm": 0.8141840111566231, "learning_rate": 1.3705848701605586e-05, "loss": 0.6393, "step": 21220 }, { "epoch": 0.3855513584192939, "grad_norm": 0.8040780627162977, "learning_rate": 1.3700495405704614e-05, "loss": 0.6231, "step": 21230 }, { "epoch": 0.38573296527676887, "grad_norm": 0.8109256684609204, "learning_rate": 1.3695140880782267e-05, "loss": 0.6282, "step": 21240 }, { "epoch": 0.3859145721342438, "grad_norm": 0.8077019714000826, "learning_rate": 1.3689785128616911e-05, "loss": 0.6422, "step": 21250 }, { "epoch": 0.3860961789917187, "grad_norm": 0.8096521301038626, "learning_rate": 1.3684428150987308e-05, "loss": 0.6376, "step": 21260 }, { "epoch": 0.38627778584919364, "grad_norm": 0.8072379045669407, "learning_rate": 1.3679069949672643e-05, "loss": 0.6401, "step": 21270 }, { "epoch": 0.3864593927066686, "grad_norm": 0.8343522561851693, "learning_rate": 1.36737105264525e-05, "loss": 0.6323, "step": 21280 }, { "epoch": 0.38664099956414355, "grad_norm": 0.7765071229830891, "learning_rate": 1.3668349883106866e-05, "loss": 0.6403, "step": 21290 }, { "epoch": 0.3868226064216185, "grad_norm": 0.786230876251259, "learning_rate": 1.3662988021416139e-05, "loss": 0.6385, "step": 21300 }, { "epoch": 0.3870042132790934, "grad_norm": 0.836341041984722, "learning_rate": 1.3657624943161119e-05, "loss": 0.6416, "step": 21310 }, { "epoch": 0.3871858201365684, "grad_norm": 0.8109936772446925, "learning_rate": 1.3652260650123009e-05, "loss": 0.6393, "step": 21320 }, { "epoch": 0.3873674269940433, "grad_norm": 0.767995472474195, "learning_rate": 1.3646895144083416e-05, "loss": 0.627, "step": 21330 }, { "epoch": 0.38754903385151823, "grad_norm": 0.7902195964073245, "learning_rate": 1.3641528426824354e-05, "loss": 0.6453, "step": 21340 }, { "epoch": 0.38773064070899316, "grad_norm": 0.794841805643152, "learning_rate": 1.3636160500128234e-05, "loss": 0.635, "step": 21350 }, { "epoch": 0.3879122475664681, "grad_norm": 0.8023845001218884, "learning_rate": 1.3630791365777872e-05, "loss": 0.6434, "step": 21360 }, { "epoch": 0.38809385442394306, "grad_norm": 0.8137519505341976, "learning_rate": 1.3625421025556477e-05, "loss": 0.638, "step": 21370 }, { "epoch": 0.388275461281418, "grad_norm": 0.7960591924335173, "learning_rate": 1.3620049481247672e-05, "loss": 0.6317, "step": 21380 }, { "epoch": 0.3884570681388929, "grad_norm": 0.7994183764684498, "learning_rate": 1.361467673463547e-05, "loss": 0.6395, "step": 21390 }, { "epoch": 0.38863867499636784, "grad_norm": 0.8499595655181198, "learning_rate": 1.3609302787504289e-05, "loss": 0.646, "step": 21400 }, { "epoch": 0.3888202818538428, "grad_norm": 0.8192802158131034, "learning_rate": 1.3603927641638939e-05, "loss": 0.6582, "step": 21410 }, { "epoch": 0.38900188871131774, "grad_norm": 0.7933983642385122, "learning_rate": 1.3598551298824631e-05, "loss": 0.6345, "step": 21420 }, { "epoch": 0.38918349556879267, "grad_norm": 0.8404006475835214, "learning_rate": 1.3593173760846982e-05, "loss": 0.647, "step": 21430 }, { "epoch": 0.3893651024262676, "grad_norm": 0.7817691760738107, "learning_rate": 1.3587795029491992e-05, "loss": 0.6423, "step": 21440 }, { "epoch": 0.3895467092837426, "grad_norm": 0.781137385096573, "learning_rate": 1.358241510654607e-05, "loss": 0.6261, "step": 21450 }, { "epoch": 0.3897283161412175, "grad_norm": 0.7691448071259168, "learning_rate": 1.3577033993796006e-05, "loss": 0.63, "step": 21460 }, { "epoch": 0.3899099229986924, "grad_norm": 0.7826979132128948, "learning_rate": 1.3571651693029e-05, "loss": 0.6438, "step": 21470 }, { "epoch": 0.39009152985616735, "grad_norm": 0.801274128375069, "learning_rate": 1.356626820603264e-05, "loss": 0.6314, "step": 21480 }, { "epoch": 0.39027313671364233, "grad_norm": 0.805461962827671, "learning_rate": 1.3560883534594905e-05, "loss": 0.6376, "step": 21490 }, { "epoch": 0.39045474357111726, "grad_norm": 0.7741358727426146, "learning_rate": 1.3555497680504175e-05, "loss": 0.6281, "step": 21500 }, { "epoch": 0.3906363504285922, "grad_norm": 0.7972191584041852, "learning_rate": 1.3550110645549215e-05, "loss": 0.6217, "step": 21510 }, { "epoch": 0.3908179572860671, "grad_norm": 0.7903455132409597, "learning_rate": 1.3544722431519186e-05, "loss": 0.6325, "step": 21520 }, { "epoch": 0.39099956414354203, "grad_norm": 0.7945317165836109, "learning_rate": 1.3539333040203644e-05, "loss": 0.6416, "step": 21530 }, { "epoch": 0.391181171001017, "grad_norm": 0.7990176367918457, "learning_rate": 1.3533942473392529e-05, "loss": 0.6204, "step": 21540 }, { "epoch": 0.39136277785849194, "grad_norm": 0.7748432160882923, "learning_rate": 1.3528550732876178e-05, "loss": 0.6352, "step": 21550 }, { "epoch": 0.39154438471596686, "grad_norm": 0.7859311830002473, "learning_rate": 1.3523157820445312e-05, "loss": 0.6283, "step": 21560 }, { "epoch": 0.3917259915734418, "grad_norm": 0.7772077251804612, "learning_rate": 1.3517763737891043e-05, "loss": 0.637, "step": 21570 }, { "epoch": 0.39190759843091677, "grad_norm": 0.7942016169113938, "learning_rate": 1.3512368487004875e-05, "loss": 0.6357, "step": 21580 }, { "epoch": 0.3920892052883917, "grad_norm": 0.8451774552301153, "learning_rate": 1.3506972069578694e-05, "loss": 0.6231, "step": 21590 }, { "epoch": 0.3922708121458666, "grad_norm": 0.8227980957771751, "learning_rate": 1.3501574487404781e-05, "loss": 0.6197, "step": 21600 }, { "epoch": 0.39245241900334155, "grad_norm": 0.7824484847065534, "learning_rate": 1.3496175742275796e-05, "loss": 0.6228, "step": 21610 }, { "epoch": 0.3926340258608165, "grad_norm": 0.8109239403633437, "learning_rate": 1.3490775835984793e-05, "loss": 0.6301, "step": 21620 }, { "epoch": 0.39281563271829145, "grad_norm": 0.8076464587871647, "learning_rate": 1.3485374770325202e-05, "loss": 0.6467, "step": 21630 }, { "epoch": 0.3929972395757664, "grad_norm": 0.7574445059598566, "learning_rate": 1.3479972547090849e-05, "loss": 0.621, "step": 21640 }, { "epoch": 0.3931788464332413, "grad_norm": 0.7720860361182531, "learning_rate": 1.347456916807594e-05, "loss": 0.627, "step": 21650 }, { "epoch": 0.3933604532907163, "grad_norm": 0.7892718019897376, "learning_rate": 1.3469164635075056e-05, "loss": 0.6349, "step": 21660 }, { "epoch": 0.3935420601481912, "grad_norm": 0.7969115869362771, "learning_rate": 1.3463758949883177e-05, "loss": 0.6256, "step": 21670 }, { "epoch": 0.39372366700566613, "grad_norm": 0.7965304245493526, "learning_rate": 1.3458352114295655e-05, "loss": 0.6253, "step": 21680 }, { "epoch": 0.39390527386314106, "grad_norm": 0.8303228167295269, "learning_rate": 1.3452944130108225e-05, "loss": 0.6288, "step": 21690 }, { "epoch": 0.394086880720616, "grad_norm": 0.7875137099839025, "learning_rate": 1.3447534999117009e-05, "loss": 0.647, "step": 21700 }, { "epoch": 0.39426848757809096, "grad_norm": 0.8172982184683523, "learning_rate": 1.3442124723118504e-05, "loss": 0.6187, "step": 21710 }, { "epoch": 0.3944500944355659, "grad_norm": 0.8119935288201642, "learning_rate": 1.3436713303909592e-05, "loss": 0.6387, "step": 21720 }, { "epoch": 0.3946317012930408, "grad_norm": 0.7760127517433187, "learning_rate": 1.343130074328753e-05, "loss": 0.6326, "step": 21730 }, { "epoch": 0.39481330815051574, "grad_norm": 0.7979569518810631, "learning_rate": 1.3425887043049959e-05, "loss": 0.6429, "step": 21740 }, { "epoch": 0.3949949150079907, "grad_norm": 0.8007125809016912, "learning_rate": 1.3420472204994894e-05, "loss": 0.6184, "step": 21750 }, { "epoch": 0.39517652186546565, "grad_norm": 0.8268964406111874, "learning_rate": 1.341505623092073e-05, "loss": 0.6354, "step": 21760 }, { "epoch": 0.39535812872294057, "grad_norm": 0.7825997130942914, "learning_rate": 1.3409639122626238e-05, "loss": 0.6322, "step": 21770 }, { "epoch": 0.3955397355804155, "grad_norm": 0.9069133756912717, "learning_rate": 1.340422088191057e-05, "loss": 0.6284, "step": 21780 }, { "epoch": 0.3957213424378905, "grad_norm": 0.7810375841678974, "learning_rate": 1.339880151057325e-05, "loss": 0.6427, "step": 21790 }, { "epoch": 0.3959029492953654, "grad_norm": 0.7993552017296853, "learning_rate": 1.3393381010414174e-05, "loss": 0.6454, "step": 21800 }, { "epoch": 0.39608455615284033, "grad_norm": 0.7812944924028511, "learning_rate": 1.3387959383233622e-05, "loss": 0.6259, "step": 21810 }, { "epoch": 0.39626616301031525, "grad_norm": 0.7744273504097878, "learning_rate": 1.3382536630832244e-05, "loss": 0.6391, "step": 21820 }, { "epoch": 0.39644776986779023, "grad_norm": 0.7943233978777882, "learning_rate": 1.337711275501106e-05, "loss": 0.6404, "step": 21830 }, { "epoch": 0.39662937672526516, "grad_norm": 0.8105544728717836, "learning_rate": 1.3371687757571467e-05, "loss": 0.6507, "step": 21840 }, { "epoch": 0.3968109835827401, "grad_norm": 0.7943125422845448, "learning_rate": 1.3366261640315239e-05, "loss": 0.6329, "step": 21850 }, { "epoch": 0.396992590440215, "grad_norm": 0.7812251622203287, "learning_rate": 1.3360834405044512e-05, "loss": 0.6311, "step": 21860 }, { "epoch": 0.39717419729768993, "grad_norm": 0.8039397360275754, "learning_rate": 1.3355406053561797e-05, "loss": 0.6259, "step": 21870 }, { "epoch": 0.3973558041551649, "grad_norm": 0.8343309064427084, "learning_rate": 1.334997658766998e-05, "loss": 0.6423, "step": 21880 }, { "epoch": 0.39753741101263984, "grad_norm": 0.8281779812073156, "learning_rate": 1.3344546009172313e-05, "loss": 0.6353, "step": 21890 }, { "epoch": 0.39771901787011477, "grad_norm": 0.7849820759455926, "learning_rate": 1.333911431987242e-05, "loss": 0.6338, "step": 21900 }, { "epoch": 0.3979006247275897, "grad_norm": 0.7927846299558654, "learning_rate": 1.3333681521574288e-05, "loss": 0.6318, "step": 21910 }, { "epoch": 0.39808223158506467, "grad_norm": 0.8166117308690978, "learning_rate": 1.3328247616082281e-05, "loss": 0.6287, "step": 21920 }, { "epoch": 0.3982638384425396, "grad_norm": 0.7837095941309807, "learning_rate": 1.3322812605201128e-05, "loss": 0.621, "step": 21930 }, { "epoch": 0.3984454453000145, "grad_norm": 0.7625435337206057, "learning_rate": 1.331737649073592e-05, "loss": 0.632, "step": 21940 }, { "epoch": 0.39862705215748945, "grad_norm": 0.7892951411591683, "learning_rate": 1.3311939274492123e-05, "loss": 0.6183, "step": 21950 }, { "epoch": 0.39880865901496443, "grad_norm": 0.7703812782731696, "learning_rate": 1.3306500958275557e-05, "loss": 0.6305, "step": 21960 }, { "epoch": 0.39899026587243935, "grad_norm": 0.822831418594348, "learning_rate": 1.3301061543892419e-05, "loss": 0.6289, "step": 21970 }, { "epoch": 0.3991718727299143, "grad_norm": 0.7770590493359208, "learning_rate": 1.3295621033149266e-05, "loss": 0.6337, "step": 21980 }, { "epoch": 0.3993534795873892, "grad_norm": 0.7747499709604415, "learning_rate": 1.3290179427853016e-05, "loss": 0.6204, "step": 21990 }, { "epoch": 0.3995350864448642, "grad_norm": 0.771305382156182, "learning_rate": 1.3284736729810958e-05, "loss": 0.6444, "step": 22000 }, { "epoch": 0.3997166933023391, "grad_norm": 0.7955880415840585, "learning_rate": 1.327929294083074e-05, "loss": 0.643, "step": 22010 }, { "epoch": 0.39989830015981404, "grad_norm": 0.7999247195341327, "learning_rate": 1.3273848062720367e-05, "loss": 0.6426, "step": 22020 }, { "epoch": 0.40007990701728896, "grad_norm": 0.8201399673805597, "learning_rate": 1.3268402097288218e-05, "loss": 0.6429, "step": 22030 }, { "epoch": 0.4002615138747639, "grad_norm": 0.7814709239747507, "learning_rate": 1.3262955046343021e-05, "loss": 0.6296, "step": 22040 }, { "epoch": 0.40044312073223887, "grad_norm": 0.7609242144164771, "learning_rate": 1.325750691169387e-05, "loss": 0.6321, "step": 22050 }, { "epoch": 0.4006247275897138, "grad_norm": 0.7837193357022723, "learning_rate": 1.3252057695150218e-05, "loss": 0.6363, "step": 22060 }, { "epoch": 0.4008063344471887, "grad_norm": 0.803517802835461, "learning_rate": 1.3246607398521878e-05, "loss": 0.6412, "step": 22070 }, { "epoch": 0.40098794130466364, "grad_norm": 0.7903042847574273, "learning_rate": 1.3241156023619023e-05, "loss": 0.6395, "step": 22080 }, { "epoch": 0.4011695481621386, "grad_norm": 0.7969451379218543, "learning_rate": 1.3235703572252184e-05, "loss": 0.6282, "step": 22090 }, { "epoch": 0.40135115501961355, "grad_norm": 0.7867945442811808, "learning_rate": 1.3230250046232243e-05, "loss": 0.6345, "step": 22100 }, { "epoch": 0.4015327618770885, "grad_norm": 0.8035188297307959, "learning_rate": 1.3224795447370445e-05, "loss": 0.6229, "step": 22110 }, { "epoch": 0.4017143687345634, "grad_norm": 0.77178901385994, "learning_rate": 1.3219339777478392e-05, "loss": 0.6359, "step": 22120 }, { "epoch": 0.4018959755920384, "grad_norm": 0.7839164910963092, "learning_rate": 1.3213883038368042e-05, "loss": 0.6313, "step": 22130 }, { "epoch": 0.4020775824495133, "grad_norm": 0.7902989852008317, "learning_rate": 1.3208425231851705e-05, "loss": 0.6431, "step": 22140 }, { "epoch": 0.40225918930698823, "grad_norm": 0.8120115096191194, "learning_rate": 1.3202966359742046e-05, "loss": 0.6325, "step": 22150 }, { "epoch": 0.40244079616446315, "grad_norm": 0.808917957220713, "learning_rate": 1.3197506423852085e-05, "loss": 0.628, "step": 22160 }, { "epoch": 0.40262240302193814, "grad_norm": 0.7797850466222066, "learning_rate": 1.319204542599519e-05, "loss": 0.6431, "step": 22170 }, { "epoch": 0.40280400987941306, "grad_norm": 0.8518715575108601, "learning_rate": 1.3186583367985094e-05, "loss": 0.6554, "step": 22180 }, { "epoch": 0.402985616736888, "grad_norm": 0.7759372693096842, "learning_rate": 1.318112025163587e-05, "loss": 0.6374, "step": 22190 }, { "epoch": 0.4031672235943629, "grad_norm": 0.8070319637023986, "learning_rate": 1.317565607876195e-05, "loss": 0.6353, "step": 22200 }, { "epoch": 0.40334883045183784, "grad_norm": 0.794921905637664, "learning_rate": 1.317019085117811e-05, "loss": 0.6529, "step": 22210 }, { "epoch": 0.4035304373093128, "grad_norm": 0.7846935187709286, "learning_rate": 1.3164724570699485e-05, "loss": 0.6206, "step": 22220 }, { "epoch": 0.40371204416678774, "grad_norm": 0.7708219333496747, "learning_rate": 1.3159257239141552e-05, "loss": 0.6274, "step": 22230 }, { "epoch": 0.40389365102426267, "grad_norm": 0.7820914153549479, "learning_rate": 1.3153788858320142e-05, "loss": 0.6279, "step": 22240 }, { "epoch": 0.4040752578817376, "grad_norm": 0.7789287092957632, "learning_rate": 1.3148319430051432e-05, "loss": 0.6342, "step": 22250 }, { "epoch": 0.4042568647392126, "grad_norm": 0.8152130923210626, "learning_rate": 1.3142848956151948e-05, "loss": 0.6276, "step": 22260 }, { "epoch": 0.4044384715966875, "grad_norm": 0.7719618011748165, "learning_rate": 1.313737743843856e-05, "loss": 0.6372, "step": 22270 }, { "epoch": 0.4046200784541624, "grad_norm": 0.8100892841304898, "learning_rate": 1.313190487872849e-05, "loss": 0.6328, "step": 22280 }, { "epoch": 0.40480168531163735, "grad_norm": 0.8579107273923195, "learning_rate": 1.3126431278839302e-05, "loss": 0.6323, "step": 22290 }, { "epoch": 0.40498329216911233, "grad_norm": 0.7646414265108741, "learning_rate": 1.3120956640588909e-05, "loss": 0.6381, "step": 22300 }, { "epoch": 0.40516489902658726, "grad_norm": 0.7963809367654802, "learning_rate": 1.3115480965795564e-05, "loss": 0.6377, "step": 22310 }, { "epoch": 0.4053465058840622, "grad_norm": 0.7764217785214153, "learning_rate": 1.3110004256277871e-05, "loss": 0.6198, "step": 22320 }, { "epoch": 0.4055281127415371, "grad_norm": 0.7589723704492802, "learning_rate": 1.310452651385477e-05, "loss": 0.6372, "step": 22330 }, { "epoch": 0.4057097195990121, "grad_norm": 0.7976859385062768, "learning_rate": 1.3099047740345552e-05, "loss": 0.628, "step": 22340 }, { "epoch": 0.405891326456487, "grad_norm": 0.8039579789211889, "learning_rate": 1.3093567937569845e-05, "loss": 0.6328, "step": 22350 }, { "epoch": 0.40607293331396194, "grad_norm": 0.7906480675807946, "learning_rate": 1.3088087107347616e-05, "loss": 0.6339, "step": 22360 }, { "epoch": 0.40625454017143686, "grad_norm": 0.7818358631374481, "learning_rate": 1.308260525149918e-05, "loss": 0.6298, "step": 22370 }, { "epoch": 0.4064361470289118, "grad_norm": 0.8248069698609598, "learning_rate": 1.3077122371845193e-05, "loss": 0.6388, "step": 22380 }, { "epoch": 0.40661775388638677, "grad_norm": 0.7804156441314481, "learning_rate": 1.3071638470206642e-05, "loss": 0.6339, "step": 22390 }, { "epoch": 0.4067993607438617, "grad_norm": 0.7945403367866264, "learning_rate": 1.3066153548404863e-05, "loss": 0.6348, "step": 22400 }, { "epoch": 0.4069809676013366, "grad_norm": 0.7796989073466092, "learning_rate": 1.3060667608261529e-05, "loss": 0.637, "step": 22410 }, { "epoch": 0.40716257445881154, "grad_norm": 0.7946978088938557, "learning_rate": 1.3055180651598647e-05, "loss": 0.6352, "step": 22420 }, { "epoch": 0.4073441813162865, "grad_norm": 0.7814935601222147, "learning_rate": 1.3049692680238563e-05, "loss": 0.641, "step": 22430 }, { "epoch": 0.40752578817376145, "grad_norm": 0.7944440206368436, "learning_rate": 1.3044203696003965e-05, "loss": 0.6283, "step": 22440 }, { "epoch": 0.4077073950312364, "grad_norm": 0.7791530131508159, "learning_rate": 1.3038713700717872e-05, "loss": 0.6311, "step": 22450 }, { "epoch": 0.4078890018887113, "grad_norm": 0.812334983417402, "learning_rate": 1.3033222696203636e-05, "loss": 0.618, "step": 22460 }, { "epoch": 0.4080706087461863, "grad_norm": 0.8204490703350698, "learning_rate": 1.3027730684284954e-05, "loss": 0.643, "step": 22470 }, { "epoch": 0.4082522156036612, "grad_norm": 0.7861543429522293, "learning_rate": 1.3022237666785849e-05, "loss": 0.6264, "step": 22480 }, { "epoch": 0.40843382246113613, "grad_norm": 0.8114448540835532, "learning_rate": 1.301674364553068e-05, "loss": 0.6329, "step": 22490 }, { "epoch": 0.40861542931861106, "grad_norm": 0.7783540401896589, "learning_rate": 1.3011248622344146e-05, "loss": 0.6338, "step": 22500 }, { "epoch": 0.408797036176086, "grad_norm": 0.8348972273709674, "learning_rate": 1.3005752599051267e-05, "loss": 0.6438, "step": 22510 }, { "epoch": 0.40897864303356096, "grad_norm": 0.7818966568394914, "learning_rate": 1.3000255577477404e-05, "loss": 0.6354, "step": 22520 }, { "epoch": 0.4091602498910359, "grad_norm": 0.8146474311574076, "learning_rate": 1.2994757559448249e-05, "loss": 0.6298, "step": 22530 }, { "epoch": 0.4093418567485108, "grad_norm": 0.7847149495787081, "learning_rate": 1.2989258546789818e-05, "loss": 0.6327, "step": 22540 }, { "epoch": 0.40952346360598574, "grad_norm": 0.8048948969790757, "learning_rate": 1.2983758541328468e-05, "loss": 0.6455, "step": 22550 }, { "epoch": 0.4097050704634607, "grad_norm": 0.821977389762622, "learning_rate": 1.2978257544890875e-05, "loss": 0.6383, "step": 22560 }, { "epoch": 0.40988667732093564, "grad_norm": 0.8177304189396596, "learning_rate": 1.2972755559304048e-05, "loss": 0.6347, "step": 22570 }, { "epoch": 0.41006828417841057, "grad_norm": 0.8102335093947931, "learning_rate": 1.2967252586395333e-05, "loss": 0.6384, "step": 22580 }, { "epoch": 0.4102498910358855, "grad_norm": 0.8404408684838004, "learning_rate": 1.296174862799239e-05, "loss": 0.6413, "step": 22590 }, { "epoch": 0.4104314978933605, "grad_norm": 0.7827481924483792, "learning_rate": 1.2956243685923214e-05, "loss": 0.6454, "step": 22600 }, { "epoch": 0.4106131047508354, "grad_norm": 0.7956549024071249, "learning_rate": 1.2950737762016124e-05, "loss": 0.6283, "step": 22610 }, { "epoch": 0.4107947116083103, "grad_norm": 0.8021980864640567, "learning_rate": 1.2945230858099771e-05, "loss": 0.6304, "step": 22620 }, { "epoch": 0.41097631846578525, "grad_norm": 0.8027793260540464, "learning_rate": 1.2939722976003123e-05, "loss": 0.6247, "step": 22630 }, { "epoch": 0.41115792532326023, "grad_norm": 0.8033396928574353, "learning_rate": 1.293421411755548e-05, "loss": 0.6286, "step": 22640 }, { "epoch": 0.41133953218073516, "grad_norm": 0.8253565162506915, "learning_rate": 1.2928704284586461e-05, "loss": 0.6457, "step": 22650 }, { "epoch": 0.4115211390382101, "grad_norm": 0.8058854439345888, "learning_rate": 1.2923193478926012e-05, "loss": 0.6226, "step": 22660 }, { "epoch": 0.411702745895685, "grad_norm": 0.7870736193022131, "learning_rate": 1.29176817024044e-05, "loss": 0.6342, "step": 22670 }, { "epoch": 0.41188435275315993, "grad_norm": 0.8151704307809902, "learning_rate": 1.2912168956852216e-05, "loss": 0.6548, "step": 22680 }, { "epoch": 0.4120659596106349, "grad_norm": 0.7892550388486019, "learning_rate": 1.2906655244100372e-05, "loss": 0.6328, "step": 22690 }, { "epoch": 0.41224756646810984, "grad_norm": 0.7822813301740866, "learning_rate": 1.2901140565980099e-05, "loss": 0.6278, "step": 22700 }, { "epoch": 0.41242917332558476, "grad_norm": 0.7943469247228125, "learning_rate": 1.2895624924322953e-05, "loss": 0.6411, "step": 22710 }, { "epoch": 0.4126107801830597, "grad_norm": 0.8265936704933986, "learning_rate": 1.2890108320960812e-05, "loss": 0.6315, "step": 22720 }, { "epoch": 0.41279238704053467, "grad_norm": 0.8148174647324435, "learning_rate": 1.2884590757725864e-05, "loss": 0.6331, "step": 22730 }, { "epoch": 0.4129739938980096, "grad_norm": 0.8189428641732668, "learning_rate": 1.2879072236450623e-05, "loss": 0.6372, "step": 22740 }, { "epoch": 0.4131556007554845, "grad_norm": 0.790597386015318, "learning_rate": 1.287355275896792e-05, "loss": 0.6395, "step": 22750 }, { "epoch": 0.41333720761295945, "grad_norm": 0.8074320169482656, "learning_rate": 1.2868032327110904e-05, "loss": 0.6327, "step": 22760 }, { "epoch": 0.4135188144704344, "grad_norm": 0.7904010307729344, "learning_rate": 1.2862510942713038e-05, "loss": 0.6283, "step": 22770 }, { "epoch": 0.41370042132790935, "grad_norm": 0.7821048033002067, "learning_rate": 1.2856988607608103e-05, "loss": 0.6309, "step": 22780 }, { "epoch": 0.4138820281853843, "grad_norm": 0.785533606814579, "learning_rate": 1.2851465323630196e-05, "loss": 0.626, "step": 22790 }, { "epoch": 0.4140636350428592, "grad_norm": 0.794614431119556, "learning_rate": 1.2845941092613731e-05, "loss": 0.6474, "step": 22800 }, { "epoch": 0.4142452419003342, "grad_norm": 0.7936929648446815, "learning_rate": 1.2840415916393434e-05, "loss": 0.6228, "step": 22810 }, { "epoch": 0.4144268487578091, "grad_norm": 0.8284051139178619, "learning_rate": 1.2834889796804345e-05, "loss": 0.628, "step": 22820 }, { "epoch": 0.41460845561528403, "grad_norm": 0.8155599203013358, "learning_rate": 1.282936273568182e-05, "loss": 0.6461, "step": 22830 }, { "epoch": 0.41479006247275896, "grad_norm": 0.7500655906054178, "learning_rate": 1.2823834734861525e-05, "loss": 0.6316, "step": 22840 }, { "epoch": 0.4149716693302339, "grad_norm": 0.8051082608705614, "learning_rate": 1.2818305796179438e-05, "loss": 0.6415, "step": 22850 }, { "epoch": 0.41515327618770886, "grad_norm": 0.7899233762662512, "learning_rate": 1.281277592147185e-05, "loss": 0.6126, "step": 22860 }, { "epoch": 0.4153348830451838, "grad_norm": 0.8000198466495876, "learning_rate": 1.2807245112575359e-05, "loss": 0.6251, "step": 22870 }, { "epoch": 0.4155164899026587, "grad_norm": 0.8044836265030252, "learning_rate": 1.2801713371326883e-05, "loss": 0.6227, "step": 22880 }, { "epoch": 0.41569809676013364, "grad_norm": 0.7707123531388468, "learning_rate": 1.2796180699563637e-05, "loss": 0.6269, "step": 22890 }, { "epoch": 0.4158797036176086, "grad_norm": 0.7593110651184254, "learning_rate": 1.2790647099123153e-05, "loss": 0.6397, "step": 22900 }, { "epoch": 0.41606131047508355, "grad_norm": 0.816066528233612, "learning_rate": 1.2785112571843268e-05, "loss": 0.6326, "step": 22910 }, { "epoch": 0.41624291733255847, "grad_norm": 0.823891808529459, "learning_rate": 1.2779577119562132e-05, "loss": 0.6373, "step": 22920 }, { "epoch": 0.4164245241900334, "grad_norm": 0.8736527434526141, "learning_rate": 1.2774040744118196e-05, "loss": 0.6458, "step": 22930 }, { "epoch": 0.4166061310475084, "grad_norm": 0.8033969633572412, "learning_rate": 1.2768503447350224e-05, "loss": 0.639, "step": 22940 }, { "epoch": 0.4167877379049833, "grad_norm": 0.8100152224671956, "learning_rate": 1.276296523109728e-05, "loss": 0.623, "step": 22950 }, { "epoch": 0.4169693447624582, "grad_norm": 0.8075300068332366, "learning_rate": 1.2757426097198735e-05, "loss": 0.627, "step": 22960 }, { "epoch": 0.41715095161993315, "grad_norm": 0.7693388673060039, "learning_rate": 1.2751886047494266e-05, "loss": 0.6518, "step": 22970 }, { "epoch": 0.41733255847740813, "grad_norm": 0.8065868291484775, "learning_rate": 1.274634508382385e-05, "loss": 0.6487, "step": 22980 }, { "epoch": 0.41751416533488306, "grad_norm": 0.8006479561588351, "learning_rate": 1.2740803208027775e-05, "loss": 0.6254, "step": 22990 }, { "epoch": 0.417695772192358, "grad_norm": 0.7831246237926768, "learning_rate": 1.2735260421946627e-05, "loss": 0.6369, "step": 23000 }, { "epoch": 0.4178773790498329, "grad_norm": 0.7753216593126592, "learning_rate": 1.2729716727421297e-05, "loss": 0.6254, "step": 23010 }, { "epoch": 0.41805898590730783, "grad_norm": 0.7677528537640461, "learning_rate": 1.2724172126292973e-05, "loss": 0.6382, "step": 23020 }, { "epoch": 0.4182405927647828, "grad_norm": 0.77586219547189, "learning_rate": 1.271862662040315e-05, "loss": 0.6317, "step": 23030 }, { "epoch": 0.41842219962225774, "grad_norm": 0.8044288022384594, "learning_rate": 1.2713080211593618e-05, "loss": 0.6423, "step": 23040 }, { "epoch": 0.41860380647973267, "grad_norm": 0.8079691630409839, "learning_rate": 1.270753290170647e-05, "loss": 0.649, "step": 23050 }, { "epoch": 0.4187854133372076, "grad_norm": 0.777300773791333, "learning_rate": 1.2701984692584102e-05, "loss": 0.6369, "step": 23060 }, { "epoch": 0.41896702019468257, "grad_norm": 0.7774307506074853, "learning_rate": 1.2696435586069198e-05, "loss": 0.6322, "step": 23070 }, { "epoch": 0.4191486270521575, "grad_norm": 0.785887475273553, "learning_rate": 1.2690885584004753e-05, "loss": 0.6303, "step": 23080 }, { "epoch": 0.4193302339096324, "grad_norm": 0.7824969247765722, "learning_rate": 1.2685334688234046e-05, "loss": 0.637, "step": 23090 }, { "epoch": 0.41951184076710735, "grad_norm": 0.7701132566937872, "learning_rate": 1.2679782900600665e-05, "loss": 0.634, "step": 23100 }, { "epoch": 0.4196934476245823, "grad_norm": 0.794797575959735, "learning_rate": 1.2674230222948484e-05, "loss": 0.6341, "step": 23110 }, { "epoch": 0.41987505448205725, "grad_norm": 0.8156535530454394, "learning_rate": 1.2668676657121686e-05, "loss": 0.6413, "step": 23120 }, { "epoch": 0.4200566613395322, "grad_norm": 0.8113263887211091, "learning_rate": 1.2663122204964734e-05, "loss": 0.6372, "step": 23130 }, { "epoch": 0.4202382681970071, "grad_norm": 0.7637484114227605, "learning_rate": 1.2657566868322391e-05, "loss": 0.6109, "step": 23140 }, { "epoch": 0.4204198750544821, "grad_norm": 0.7580650324649502, "learning_rate": 1.2652010649039718e-05, "loss": 0.6229, "step": 23150 }, { "epoch": 0.420601481911957, "grad_norm": 0.8114403742628955, "learning_rate": 1.264645354896207e-05, "loss": 0.6328, "step": 23160 }, { "epoch": 0.42078308876943193, "grad_norm": 0.7804249057490272, "learning_rate": 1.2640895569935081e-05, "loss": 0.6264, "step": 23170 }, { "epoch": 0.42096469562690686, "grad_norm": 0.786827471395438, "learning_rate": 1.263533671380469e-05, "loss": 0.6255, "step": 23180 }, { "epoch": 0.4211463024843818, "grad_norm": 0.8194977957384895, "learning_rate": 1.2629776982417125e-05, "loss": 0.6346, "step": 23190 }, { "epoch": 0.42132790934185677, "grad_norm": 0.7723841894022391, "learning_rate": 1.2624216377618903e-05, "loss": 0.6326, "step": 23200 }, { "epoch": 0.4215095161993317, "grad_norm": 0.7922350846962113, "learning_rate": 1.2618654901256831e-05, "loss": 0.6318, "step": 23210 }, { "epoch": 0.4216911230568066, "grad_norm": 0.8011827444003154, "learning_rate": 1.2613092555178004e-05, "loss": 0.637, "step": 23220 }, { "epoch": 0.42187272991428154, "grad_norm": 0.7859046954846711, "learning_rate": 1.260752934122981e-05, "loss": 0.654, "step": 23230 }, { "epoch": 0.4220543367717565, "grad_norm": 0.7933194940323456, "learning_rate": 1.2601965261259923e-05, "loss": 0.6311, "step": 23240 }, { "epoch": 0.42223594362923145, "grad_norm": 0.7394592070387919, "learning_rate": 1.2596400317116301e-05, "loss": 0.6433, "step": 23250 }, { "epoch": 0.4224175504867064, "grad_norm": 0.7791121387870142, "learning_rate": 1.25908345106472e-05, "loss": 0.6319, "step": 23260 }, { "epoch": 0.4225991573441813, "grad_norm": 0.8138821441063397, "learning_rate": 1.2585267843701148e-05, "loss": 0.6246, "step": 23270 }, { "epoch": 0.4227807642016563, "grad_norm": 0.7787630344297529, "learning_rate": 1.2579700318126968e-05, "loss": 0.6335, "step": 23280 }, { "epoch": 0.4229623710591312, "grad_norm": 0.8142195976688584, "learning_rate": 1.2574131935773765e-05, "loss": 0.6405, "step": 23290 }, { "epoch": 0.42314397791660613, "grad_norm": 0.7933790648296415, "learning_rate": 1.2568562698490934e-05, "loss": 0.6358, "step": 23300 }, { "epoch": 0.42332558477408105, "grad_norm": 0.7898822590999476, "learning_rate": 1.256299260812814e-05, "loss": 0.6418, "step": 23310 }, { "epoch": 0.42350719163155603, "grad_norm": 0.8105759894483028, "learning_rate": 1.2557421666535348e-05, "loss": 0.6457, "step": 23320 }, { "epoch": 0.42368879848903096, "grad_norm": 0.7765641716127036, "learning_rate": 1.25518498755628e-05, "loss": 0.6407, "step": 23330 }, { "epoch": 0.4238704053465059, "grad_norm": 0.7980226835999761, "learning_rate": 1.2546277237061016e-05, "loss": 0.6411, "step": 23340 }, { "epoch": 0.4240520122039808, "grad_norm": 0.7924171462700276, "learning_rate": 1.2540703752880797e-05, "loss": 0.6184, "step": 23350 }, { "epoch": 0.42423361906145574, "grad_norm": 0.7563939780657688, "learning_rate": 1.2535129424873233e-05, "loss": 0.6283, "step": 23360 }, { "epoch": 0.4244152259189307, "grad_norm": 0.7812489172134536, "learning_rate": 1.2529554254889687e-05, "loss": 0.6307, "step": 23370 }, { "epoch": 0.42459683277640564, "grad_norm": 0.8133549951660096, "learning_rate": 1.25239782447818e-05, "loss": 0.6402, "step": 23380 }, { "epoch": 0.42477843963388057, "grad_norm": 0.7958288741195908, "learning_rate": 1.25184013964015e-05, "loss": 0.6338, "step": 23390 }, { "epoch": 0.4249600464913555, "grad_norm": 0.796340445886066, "learning_rate": 1.251282371160099e-05, "loss": 0.6195, "step": 23400 }, { "epoch": 0.4251416533488305, "grad_norm": 0.795998985244561, "learning_rate": 1.250724519223275e-05, "loss": 0.6258, "step": 23410 }, { "epoch": 0.4253232602063054, "grad_norm": 0.8226461418645703, "learning_rate": 1.2501665840149532e-05, "loss": 0.6442, "step": 23420 }, { "epoch": 0.4255048670637803, "grad_norm": 0.7954120587874659, "learning_rate": 1.2496085657204378e-05, "loss": 0.6303, "step": 23430 }, { "epoch": 0.42568647392125525, "grad_norm": 0.7399091572951557, "learning_rate": 1.249050464525059e-05, "loss": 0.6398, "step": 23440 }, { "epoch": 0.42586808077873023, "grad_norm": 0.7914211911168616, "learning_rate": 1.2484922806141757e-05, "loss": 0.6323, "step": 23450 }, { "epoch": 0.42604968763620515, "grad_norm": 0.8107981785198076, "learning_rate": 1.2479340141731742e-05, "loss": 0.6252, "step": 23460 }, { "epoch": 0.4262312944936801, "grad_norm": 0.8264681781240041, "learning_rate": 1.2473756653874672e-05, "loss": 0.6294, "step": 23470 }, { "epoch": 0.426412901351155, "grad_norm": 0.7854721817728131, "learning_rate": 1.2468172344424956e-05, "loss": 0.6309, "step": 23480 }, { "epoch": 0.42659450820863, "grad_norm": 0.8346281103392095, "learning_rate": 1.2462587215237277e-05, "loss": 0.6405, "step": 23490 }, { "epoch": 0.4267761150661049, "grad_norm": 0.7913972345601503, "learning_rate": 1.2457001268166585e-05, "loss": 0.632, "step": 23500 }, { "epoch": 0.42695772192357984, "grad_norm": 0.7673868856055962, "learning_rate": 1.2451414505068105e-05, "loss": 0.6264, "step": 23510 }, { "epoch": 0.42713932878105476, "grad_norm": 0.8106989543892801, "learning_rate": 1.2445826927797334e-05, "loss": 0.6274, "step": 23520 }, { "epoch": 0.4273209356385297, "grad_norm": 0.7746168120021903, "learning_rate": 1.2440238538210034e-05, "loss": 0.6231, "step": 23530 }, { "epoch": 0.42750254249600467, "grad_norm": 0.7755834914116786, "learning_rate": 1.2434649338162243e-05, "loss": 0.6298, "step": 23540 }, { "epoch": 0.4276841493534796, "grad_norm": 0.7940068325407359, "learning_rate": 1.2429059329510267e-05, "loss": 0.6344, "step": 23550 }, { "epoch": 0.4278657562109545, "grad_norm": 0.7924305610202961, "learning_rate": 1.2423468514110677e-05, "loss": 0.6466, "step": 23560 }, { "epoch": 0.42804736306842944, "grad_norm": 0.8621513535832984, "learning_rate": 1.241787689382031e-05, "loss": 0.6259, "step": 23570 }, { "epoch": 0.4282289699259044, "grad_norm": 0.8047365384891538, "learning_rate": 1.2412284470496281e-05, "loss": 0.6383, "step": 23580 }, { "epoch": 0.42841057678337935, "grad_norm": 0.7798358810193773, "learning_rate": 1.2406691245995964e-05, "loss": 0.6338, "step": 23590 }, { "epoch": 0.4285921836408543, "grad_norm": 0.7732319850107318, "learning_rate": 1.2401097222176993e-05, "loss": 0.6336, "step": 23600 }, { "epoch": 0.4287737904983292, "grad_norm": 0.8101910641103583, "learning_rate": 1.2395502400897286e-05, "loss": 0.6321, "step": 23610 }, { "epoch": 0.4289553973558042, "grad_norm": 0.7718713441132536, "learning_rate": 1.2389906784015005e-05, "loss": 0.628, "step": 23620 }, { "epoch": 0.4291370042132791, "grad_norm": 0.7957969877251938, "learning_rate": 1.2384310373388588e-05, "loss": 0.6296, "step": 23630 }, { "epoch": 0.42931861107075403, "grad_norm": 0.795362090330362, "learning_rate": 1.2378713170876737e-05, "loss": 0.6295, "step": 23640 }, { "epoch": 0.42950021792822896, "grad_norm": 0.7508430965077066, "learning_rate": 1.2373115178338416e-05, "loss": 0.6425, "step": 23650 }, { "epoch": 0.4296818247857039, "grad_norm": 0.7920205133452405, "learning_rate": 1.2367516397632844e-05, "loss": 0.6292, "step": 23660 }, { "epoch": 0.42986343164317886, "grad_norm": 0.8069242970558917, "learning_rate": 1.2361916830619508e-05, "loss": 0.6295, "step": 23670 }, { "epoch": 0.4300450385006538, "grad_norm": 0.8091570903632074, "learning_rate": 1.2356316479158162e-05, "loss": 0.6283, "step": 23680 }, { "epoch": 0.4302266453581287, "grad_norm": 0.8020455685324634, "learning_rate": 1.2350715345108805e-05, "loss": 0.6317, "step": 23690 }, { "epoch": 0.43040825221560364, "grad_norm": 0.7876424554285248, "learning_rate": 1.2345113430331713e-05, "loss": 0.6307, "step": 23700 }, { "epoch": 0.4305898590730786, "grad_norm": 0.8297360779908803, "learning_rate": 1.2339510736687406e-05, "loss": 0.627, "step": 23710 }, { "epoch": 0.43077146593055354, "grad_norm": 0.7709348895136934, "learning_rate": 1.2333907266036677e-05, "loss": 0.6318, "step": 23720 }, { "epoch": 0.43095307278802847, "grad_norm": 0.7956819058544659, "learning_rate": 1.2328303020240569e-05, "loss": 0.6282, "step": 23730 }, { "epoch": 0.4311346796455034, "grad_norm": 0.7749711048179153, "learning_rate": 1.232269800116038e-05, "loss": 0.6251, "step": 23740 }, { "epoch": 0.4313162865029784, "grad_norm": 0.7497520494709168, "learning_rate": 1.231709221065767e-05, "loss": 0.6289, "step": 23750 }, { "epoch": 0.4314978933604533, "grad_norm": 0.7733937049461652, "learning_rate": 1.2311485650594256e-05, "loss": 0.629, "step": 23760 }, { "epoch": 0.4316795002179282, "grad_norm": 0.8500678178757163, "learning_rate": 1.2305878322832208e-05, "loss": 0.6317, "step": 23770 }, { "epoch": 0.43186110707540315, "grad_norm": 0.7867029827258855, "learning_rate": 1.2300270229233846e-05, "loss": 0.6297, "step": 23780 }, { "epoch": 0.43204271393287813, "grad_norm": 0.7922744154356706, "learning_rate": 1.2294661371661753e-05, "loss": 0.6354, "step": 23790 }, { "epoch": 0.43222432079035306, "grad_norm": 0.790572236273273, "learning_rate": 1.2289051751978762e-05, "loss": 0.6239, "step": 23800 }, { "epoch": 0.432405927647828, "grad_norm": 0.8120584761983458, "learning_rate": 1.2283441372047959e-05, "loss": 0.6438, "step": 23810 }, { "epoch": 0.4325875345053029, "grad_norm": 0.7770745575718956, "learning_rate": 1.2277830233732684e-05, "loss": 0.6338, "step": 23820 }, { "epoch": 0.43276914136277783, "grad_norm": 0.8090151179424983, "learning_rate": 1.2272218338896527e-05, "loss": 0.6332, "step": 23830 }, { "epoch": 0.4329507482202528, "grad_norm": 0.8359047722942516, "learning_rate": 1.226660568940333e-05, "loss": 0.6312, "step": 23840 }, { "epoch": 0.43313235507772774, "grad_norm": 0.7953918379568184, "learning_rate": 1.2260992287117186e-05, "loss": 0.6527, "step": 23850 }, { "epoch": 0.43331396193520266, "grad_norm": 0.7917106362130549, "learning_rate": 1.2255378133902439e-05, "loss": 0.6414, "step": 23860 }, { "epoch": 0.4334955687926776, "grad_norm": 0.778923061937481, "learning_rate": 1.2249763231623676e-05, "loss": 0.6414, "step": 23870 }, { "epoch": 0.43367717565015257, "grad_norm": 0.8133258609872266, "learning_rate": 1.2244147582145742e-05, "loss": 0.6369, "step": 23880 }, { "epoch": 0.4338587825076275, "grad_norm": 0.767443813161579, "learning_rate": 1.2238531187333726e-05, "loss": 0.6364, "step": 23890 }, { "epoch": 0.4340403893651024, "grad_norm": 0.7926562161810151, "learning_rate": 1.2232914049052961e-05, "loss": 0.6462, "step": 23900 }, { "epoch": 0.43422199622257734, "grad_norm": 0.8088306558136066, "learning_rate": 1.2227296169169034e-05, "loss": 0.6375, "step": 23910 }, { "epoch": 0.4344036030800523, "grad_norm": 0.8229613307713151, "learning_rate": 1.2221677549547777e-05, "loss": 0.6165, "step": 23920 }, { "epoch": 0.43458520993752725, "grad_norm": 0.7818720831915358, "learning_rate": 1.221605819205526e-05, "loss": 0.6544, "step": 23930 }, { "epoch": 0.4347668167950022, "grad_norm": 0.7947872709948766, "learning_rate": 1.2210438098557804e-05, "loss": 0.6365, "step": 23940 }, { "epoch": 0.4349484236524771, "grad_norm": 0.7823112675513043, "learning_rate": 1.2204817270921977e-05, "loss": 0.615, "step": 23950 }, { "epoch": 0.4351300305099521, "grad_norm": 0.7546597131829654, "learning_rate": 1.219919571101459e-05, "loss": 0.6368, "step": 23960 }, { "epoch": 0.435311637367427, "grad_norm": 0.7861181311303312, "learning_rate": 1.2193573420702692e-05, "loss": 0.6369, "step": 23970 }, { "epoch": 0.43549324422490193, "grad_norm": 0.7684215273552584, "learning_rate": 1.2187950401853573e-05, "loss": 0.6466, "step": 23980 }, { "epoch": 0.43567485108237686, "grad_norm": 0.7648860121322364, "learning_rate": 1.2182326656334773e-05, "loss": 0.6281, "step": 23990 }, { "epoch": 0.4358564579398518, "grad_norm": 0.7378093217400838, "learning_rate": 1.2176702186014073e-05, "loss": 0.6143, "step": 24000 }, { "epoch": 0.43603806479732676, "grad_norm": 0.8013462495283112, "learning_rate": 1.217107699275949e-05, "loss": 0.6289, "step": 24010 }, { "epoch": 0.4362196716548017, "grad_norm": 0.7444806074185047, "learning_rate": 1.216545107843928e-05, "loss": 0.6361, "step": 24020 }, { "epoch": 0.4364012785122766, "grad_norm": 0.7960571176584859, "learning_rate": 1.2159824444921942e-05, "loss": 0.6319, "step": 24030 }, { "epoch": 0.43658288536975154, "grad_norm": 0.7921289742711825, "learning_rate": 1.2154197094076214e-05, "loss": 0.6519, "step": 24040 }, { "epoch": 0.4367644922272265, "grad_norm": 0.7799790780191412, "learning_rate": 1.2148569027771074e-05, "loss": 0.6423, "step": 24050 }, { "epoch": 0.43694609908470144, "grad_norm": 0.8211823195309921, "learning_rate": 1.2142940247875735e-05, "loss": 0.6221, "step": 24060 }, { "epoch": 0.43712770594217637, "grad_norm": 0.7721947398705987, "learning_rate": 1.2137310756259639e-05, "loss": 0.6423, "step": 24070 }, { "epoch": 0.4373093127996513, "grad_norm": 0.7923607803809453, "learning_rate": 1.2131680554792477e-05, "loss": 0.6345, "step": 24080 }, { "epoch": 0.4374909196571263, "grad_norm": 0.7821819254174722, "learning_rate": 1.2126049645344175e-05, "loss": 0.6336, "step": 24090 }, { "epoch": 0.4376725265146012, "grad_norm": 0.8277937991267484, "learning_rate": 1.2120418029784885e-05, "loss": 0.6378, "step": 24100 }, { "epoch": 0.4378541333720761, "grad_norm": 0.8204209597975339, "learning_rate": 1.2114785709984997e-05, "loss": 0.641, "step": 24110 }, { "epoch": 0.43803574022955105, "grad_norm": 0.7901577734690395, "learning_rate": 1.2109152687815144e-05, "loss": 0.6256, "step": 24120 }, { "epoch": 0.43821734708702603, "grad_norm": 0.783282084505769, "learning_rate": 1.2103518965146179e-05, "loss": 0.6296, "step": 24130 }, { "epoch": 0.43839895394450096, "grad_norm": 0.8136522619811808, "learning_rate": 1.2097884543849199e-05, "loss": 0.6289, "step": 24140 }, { "epoch": 0.4385805608019759, "grad_norm": 0.7836183490933589, "learning_rate": 1.209224942579552e-05, "loss": 0.6289, "step": 24150 }, { "epoch": 0.4387621676594508, "grad_norm": 0.8035209831339278, "learning_rate": 1.2086613612856705e-05, "loss": 0.6354, "step": 24160 }, { "epoch": 0.43894377451692573, "grad_norm": 0.8020740736703409, "learning_rate": 1.2080977106904535e-05, "loss": 0.6287, "step": 24170 }, { "epoch": 0.4391253813744007, "grad_norm": 0.7938570243450443, "learning_rate": 1.2075339909811024e-05, "loss": 0.6323, "step": 24180 }, { "epoch": 0.43930698823187564, "grad_norm": 0.7783981761653078, "learning_rate": 1.206970202344842e-05, "loss": 0.6286, "step": 24190 }, { "epoch": 0.43948859508935056, "grad_norm": 0.8025099941012674, "learning_rate": 1.2064063449689198e-05, "loss": 0.6365, "step": 24200 }, { "epoch": 0.4396702019468255, "grad_norm": 0.7739972095837314, "learning_rate": 1.2058424190406061e-05, "loss": 0.6366, "step": 24210 }, { "epoch": 0.43985180880430047, "grad_norm": 0.7762964312584825, "learning_rate": 1.2052784247471938e-05, "loss": 0.6232, "step": 24220 }, { "epoch": 0.4400334156617754, "grad_norm": 0.783041728832148, "learning_rate": 1.2047143622759986e-05, "loss": 0.6332, "step": 24230 }, { "epoch": 0.4402150225192503, "grad_norm": 0.8012851830162169, "learning_rate": 1.2041502318143592e-05, "loss": 0.6499, "step": 24240 }, { "epoch": 0.44039662937672525, "grad_norm": 0.796706615786962, "learning_rate": 1.2035860335496364e-05, "loss": 0.6361, "step": 24250 }, { "epoch": 0.4405782362342002, "grad_norm": 0.7970008866782532, "learning_rate": 1.2030217676692133e-05, "loss": 0.6504, "step": 24260 }, { "epoch": 0.44075984309167515, "grad_norm": 0.784205694235422, "learning_rate": 1.2024574343604965e-05, "loss": 0.6321, "step": 24270 }, { "epoch": 0.4409414499491501, "grad_norm": 0.7875479510988294, "learning_rate": 1.2018930338109138e-05, "loss": 0.626, "step": 24280 }, { "epoch": 0.441123056806625, "grad_norm": 0.7595339363692345, "learning_rate": 1.2013285662079161e-05, "loss": 0.6253, "step": 24290 }, { "epoch": 0.4413046636641, "grad_norm": 0.7617634146072585, "learning_rate": 1.2007640317389764e-05, "loss": 0.6403, "step": 24300 }, { "epoch": 0.4414862705215749, "grad_norm": 0.8052695911546455, "learning_rate": 1.2001994305915895e-05, "loss": 0.6321, "step": 24310 }, { "epoch": 0.44166787737904983, "grad_norm": 0.7378537055950425, "learning_rate": 1.1996347629532733e-05, "loss": 0.6113, "step": 24320 }, { "epoch": 0.44184948423652476, "grad_norm": 0.7927924304331914, "learning_rate": 1.1990700290115664e-05, "loss": 0.6344, "step": 24330 }, { "epoch": 0.4420310910939997, "grad_norm": 0.7742886000405349, "learning_rate": 1.198505228954031e-05, "loss": 0.6356, "step": 24340 }, { "epoch": 0.44221269795147466, "grad_norm": 0.7993504517590462, "learning_rate": 1.19794036296825e-05, "loss": 0.6273, "step": 24350 }, { "epoch": 0.4423943048089496, "grad_norm": 0.7720971963454708, "learning_rate": 1.1973754312418287e-05, "loss": 0.627, "step": 24360 }, { "epoch": 0.4425759116664245, "grad_norm": 0.7853018188242924, "learning_rate": 1.1968104339623944e-05, "loss": 0.6329, "step": 24370 }, { "epoch": 0.44275751852389944, "grad_norm": 0.8157917435518919, "learning_rate": 1.1962453713175957e-05, "loss": 0.6302, "step": 24380 }, { "epoch": 0.4429391253813744, "grad_norm": 0.7957095051015058, "learning_rate": 1.1956802434951031e-05, "loss": 0.6294, "step": 24390 }, { "epoch": 0.44312073223884935, "grad_norm": 0.7619391607091643, "learning_rate": 1.195115050682609e-05, "loss": 0.6125, "step": 24400 }, { "epoch": 0.44330233909632427, "grad_norm": 0.7836148654344649, "learning_rate": 1.1945497930678274e-05, "loss": 0.6334, "step": 24410 }, { "epoch": 0.4434839459537992, "grad_norm": 0.8090918793481064, "learning_rate": 1.1939844708384932e-05, "loss": 0.6349, "step": 24420 }, { "epoch": 0.4436655528112742, "grad_norm": 0.7962988994841151, "learning_rate": 1.1934190841823631e-05, "loss": 0.6297, "step": 24430 }, { "epoch": 0.4438471596687491, "grad_norm": 0.7726809491148656, "learning_rate": 1.1928536332872159e-05, "loss": 0.6274, "step": 24440 }, { "epoch": 0.44402876652622403, "grad_norm": 0.8063755521270672, "learning_rate": 1.1922881183408505e-05, "loss": 0.6165, "step": 24450 }, { "epoch": 0.44421037338369895, "grad_norm": 0.7905809503945724, "learning_rate": 1.1917225395310881e-05, "loss": 0.6198, "step": 24460 }, { "epoch": 0.44439198024117393, "grad_norm": 0.7652851032943141, "learning_rate": 1.1911568970457708e-05, "loss": 0.6411, "step": 24470 }, { "epoch": 0.44457358709864886, "grad_norm": 0.8051540950900645, "learning_rate": 1.190591191072761e-05, "loss": 0.6406, "step": 24480 }, { "epoch": 0.4447551939561238, "grad_norm": 0.7639431481565677, "learning_rate": 1.1900254217999436e-05, "loss": 0.6294, "step": 24490 }, { "epoch": 0.4449368008135987, "grad_norm": 0.7901376786409776, "learning_rate": 1.1894595894152237e-05, "loss": 0.6269, "step": 24500 }, { "epoch": 0.44511840767107363, "grad_norm": 0.7658212349569419, "learning_rate": 1.1888936941065272e-05, "loss": 0.6372, "step": 24510 }, { "epoch": 0.4453000145285486, "grad_norm": 0.7762005522814853, "learning_rate": 1.1883277360618015e-05, "loss": 0.6209, "step": 24520 }, { "epoch": 0.44548162138602354, "grad_norm": 0.7783675506520202, "learning_rate": 1.1877617154690149e-05, "loss": 0.6319, "step": 24530 }, { "epoch": 0.44566322824349847, "grad_norm": 0.8027332688985519, "learning_rate": 1.1871956325161556e-05, "loss": 0.6493, "step": 24540 }, { "epoch": 0.4458448351009734, "grad_norm": 0.7831854285170529, "learning_rate": 1.1866294873912332e-05, "loss": 0.6331, "step": 24550 }, { "epoch": 0.44602644195844837, "grad_norm": 0.763502712285422, "learning_rate": 1.1860632802822778e-05, "loss": 0.618, "step": 24560 }, { "epoch": 0.4462080488159233, "grad_norm": 0.7977987824561196, "learning_rate": 1.1854970113773403e-05, "loss": 0.6408, "step": 24570 }, { "epoch": 0.4463896556733982, "grad_norm": 0.7851263351199326, "learning_rate": 1.1849306808644914e-05, "loss": 0.6325, "step": 24580 }, { "epoch": 0.44657126253087315, "grad_norm": 0.7702665575915668, "learning_rate": 1.1843642889318229e-05, "loss": 0.6408, "step": 24590 }, { "epoch": 0.44675286938834813, "grad_norm": 0.7924573265147983, "learning_rate": 1.183797835767447e-05, "loss": 0.6417, "step": 24600 }, { "epoch": 0.44693447624582305, "grad_norm": 0.8820398825196997, "learning_rate": 1.1832313215594961e-05, "loss": 0.6246, "step": 24610 }, { "epoch": 0.447116083103298, "grad_norm": 0.7906263109873068, "learning_rate": 1.1826647464961228e-05, "loss": 0.6229, "step": 24620 }, { "epoch": 0.4472976899607729, "grad_norm": 0.7664232109383715, "learning_rate": 1.1820981107655001e-05, "loss": 0.6301, "step": 24630 }, { "epoch": 0.4474792968182479, "grad_norm": 0.7670843022478366, "learning_rate": 1.1815314145558208e-05, "loss": 0.6504, "step": 24640 }, { "epoch": 0.4476609036757228, "grad_norm": 0.7997016844448248, "learning_rate": 1.180964658055298e-05, "loss": 0.6309, "step": 24650 }, { "epoch": 0.44784251053319774, "grad_norm": 0.8223148081395346, "learning_rate": 1.1803978414521654e-05, "loss": 0.6362, "step": 24660 }, { "epoch": 0.44802411739067266, "grad_norm": 0.7892186774061475, "learning_rate": 1.1798309649346753e-05, "loss": 0.622, "step": 24670 }, { "epoch": 0.4482057242481476, "grad_norm": 0.7786827085550639, "learning_rate": 1.1792640286911012e-05, "loss": 0.6292, "step": 24680 }, { "epoch": 0.44838733110562257, "grad_norm": 0.753274238246836, "learning_rate": 1.1786970329097357e-05, "loss": 0.6249, "step": 24690 }, { "epoch": 0.4485689379630975, "grad_norm": 0.8422008476434909, "learning_rate": 1.1781299777788914e-05, "loss": 0.6273, "step": 24700 }, { "epoch": 0.4487505448205724, "grad_norm": 0.7823402902178088, "learning_rate": 1.1775628634869008e-05, "loss": 0.63, "step": 24710 }, { "epoch": 0.44893215167804734, "grad_norm": 0.8390753663676402, "learning_rate": 1.1769956902221157e-05, "loss": 0.6341, "step": 24720 }, { "epoch": 0.4491137585355223, "grad_norm": 0.8138822466125751, "learning_rate": 1.1764284581729077e-05, "loss": 0.6349, "step": 24730 }, { "epoch": 0.44929536539299725, "grad_norm": 0.768819718983013, "learning_rate": 1.1758611675276681e-05, "loss": 0.6352, "step": 24740 }, { "epoch": 0.4494769722504722, "grad_norm": 0.791797740560002, "learning_rate": 1.1752938184748073e-05, "loss": 0.6351, "step": 24750 }, { "epoch": 0.4496585791079471, "grad_norm": 0.7852772354612219, "learning_rate": 1.174726411202755e-05, "loss": 0.6362, "step": 24760 }, { "epoch": 0.4498401859654221, "grad_norm": 0.7902105522383707, "learning_rate": 1.174158945899961e-05, "loss": 0.6324, "step": 24770 }, { "epoch": 0.450021792822897, "grad_norm": 0.7353774299335111, "learning_rate": 1.1735914227548932e-05, "loss": 0.6414, "step": 24780 }, { "epoch": 0.45020339968037193, "grad_norm": 0.7625142978275683, "learning_rate": 1.1730238419560398e-05, "loss": 0.6171, "step": 24790 }, { "epoch": 0.45038500653784685, "grad_norm": 0.7795743968424051, "learning_rate": 1.1724562036919073e-05, "loss": 0.6133, "step": 24800 }, { "epoch": 0.4505666133953218, "grad_norm": 0.8151822051248454, "learning_rate": 1.171888508151022e-05, "loss": 0.6277, "step": 24810 }, { "epoch": 0.45074822025279676, "grad_norm": 0.805884419097353, "learning_rate": 1.171320755521929e-05, "loss": 0.6397, "step": 24820 }, { "epoch": 0.4509298271102717, "grad_norm": 0.7910059632024501, "learning_rate": 1.1707529459931921e-05, "loss": 0.6391, "step": 24830 }, { "epoch": 0.4511114339677466, "grad_norm": 0.7829453059208196, "learning_rate": 1.1701850797533943e-05, "loss": 0.6228, "step": 24840 }, { "epoch": 0.45129304082522154, "grad_norm": 0.7701200925990075, "learning_rate": 1.1696171569911372e-05, "loss": 0.628, "step": 24850 }, { "epoch": 0.4514746476826965, "grad_norm": 0.757256461355654, "learning_rate": 1.1690491778950412e-05, "loss": 0.6298, "step": 24860 }, { "epoch": 0.45165625454017144, "grad_norm": 0.7733285228829869, "learning_rate": 1.1684811426537456e-05, "loss": 0.6268, "step": 24870 }, { "epoch": 0.45183786139764637, "grad_norm": 0.7609778124990151, "learning_rate": 1.1679130514559085e-05, "loss": 0.6242, "step": 24880 }, { "epoch": 0.4520194682551213, "grad_norm": 0.7660044907139226, "learning_rate": 1.1673449044902057e-05, "loss": 0.6427, "step": 24890 }, { "epoch": 0.4522010751125963, "grad_norm": 0.8579248389336074, "learning_rate": 1.1667767019453323e-05, "loss": 0.6379, "step": 24900 }, { "epoch": 0.4523826819700712, "grad_norm": 0.7839181061028281, "learning_rate": 1.1662084440100021e-05, "loss": 0.629, "step": 24910 }, { "epoch": 0.4525642888275461, "grad_norm": 0.7796627595199768, "learning_rate": 1.1656401308729467e-05, "loss": 0.6358, "step": 24920 }, { "epoch": 0.45274589568502105, "grad_norm": 0.7589924963209367, "learning_rate": 1.1650717627229163e-05, "loss": 0.6258, "step": 24930 }, { "epoch": 0.45292750254249603, "grad_norm": 0.7776067264208054, "learning_rate": 1.1645033397486792e-05, "loss": 0.6393, "step": 24940 }, { "epoch": 0.45310910939997096, "grad_norm": 0.778600577276513, "learning_rate": 1.1639348621390218e-05, "loss": 0.6308, "step": 24950 }, { "epoch": 0.4532907162574459, "grad_norm": 0.7643941246796054, "learning_rate": 1.1633663300827494e-05, "loss": 0.628, "step": 24960 }, { "epoch": 0.4534723231149208, "grad_norm": 0.7596806136539556, "learning_rate": 1.1627977437686843e-05, "loss": 0.6319, "step": 24970 }, { "epoch": 0.45365392997239573, "grad_norm": 0.7692859436857161, "learning_rate": 1.1622291033856679e-05, "loss": 0.6389, "step": 24980 }, { "epoch": 0.4538355368298707, "grad_norm": 0.7550957385809673, "learning_rate": 1.1616604091225583e-05, "loss": 0.633, "step": 24990 }, { "epoch": 0.45401714368734564, "grad_norm": 0.8073808804827023, "learning_rate": 1.1610916611682329e-05, "loss": 0.6375, "step": 25000 }, { "epoch": 0.45419875054482056, "grad_norm": 0.7792798230362259, "learning_rate": 1.1605228597115856e-05, "loss": 0.6306, "step": 25010 }, { "epoch": 0.4543803574022955, "grad_norm": 0.7783404087211787, "learning_rate": 1.1599540049415291e-05, "loss": 0.6347, "step": 25020 }, { "epoch": 0.45456196425977047, "grad_norm": 0.7650624336503484, "learning_rate": 1.1593850970469938e-05, "loss": 0.6236, "step": 25030 }, { "epoch": 0.4547435711172454, "grad_norm": 0.7632676757550757, "learning_rate": 1.1588161362169266e-05, "loss": 0.6406, "step": 25040 }, { "epoch": 0.4549251779747203, "grad_norm": 0.7731191982879163, "learning_rate": 1.1582471226402934e-05, "loss": 0.6249, "step": 25050 }, { "epoch": 0.45510678483219524, "grad_norm": 0.771877823011937, "learning_rate": 1.1576780565060764e-05, "loss": 0.631, "step": 25060 }, { "epoch": 0.4552883916896702, "grad_norm": 0.7616395929256216, "learning_rate": 1.1571089380032763e-05, "loss": 0.6229, "step": 25070 }, { "epoch": 0.45546999854714515, "grad_norm": 0.7955067405619293, "learning_rate": 1.1565397673209103e-05, "loss": 0.6345, "step": 25080 }, { "epoch": 0.4556516054046201, "grad_norm": 0.8144873157445452, "learning_rate": 1.1559705446480136e-05, "loss": 0.6226, "step": 25090 }, { "epoch": 0.455833212262095, "grad_norm": 0.767030493297081, "learning_rate": 1.1554012701736381e-05, "loss": 0.6321, "step": 25100 }, { "epoch": 0.45601481911957, "grad_norm": 0.7799991194666746, "learning_rate": 1.1548319440868536e-05, "loss": 0.633, "step": 25110 }, { "epoch": 0.4561964259770449, "grad_norm": 0.785080200408731, "learning_rate": 1.1542625665767467e-05, "loss": 0.6449, "step": 25120 }, { "epoch": 0.45637803283451983, "grad_norm": 0.7673239422207986, "learning_rate": 1.1536931378324208e-05, "loss": 0.6326, "step": 25130 }, { "epoch": 0.45655963969199476, "grad_norm": 0.8036275034521476, "learning_rate": 1.1531236580429963e-05, "loss": 0.6305, "step": 25140 }, { "epoch": 0.4567412465494697, "grad_norm": 0.8137532698531785, "learning_rate": 1.1525541273976116e-05, "loss": 0.6337, "step": 25150 }, { "epoch": 0.45692285340694466, "grad_norm": 0.7610923109174339, "learning_rate": 1.1519845460854202e-05, "loss": 0.6249, "step": 25160 }, { "epoch": 0.4571044602644196, "grad_norm": 0.782770351772882, "learning_rate": 1.1514149142955944e-05, "loss": 0.6361, "step": 25170 }, { "epoch": 0.4572860671218945, "grad_norm": 0.8114825780287448, "learning_rate": 1.1508452322173215e-05, "loss": 0.6206, "step": 25180 }, { "epoch": 0.45746767397936944, "grad_norm": 0.7682503858810977, "learning_rate": 1.1502755000398068e-05, "loss": 0.6341, "step": 25190 }, { "epoch": 0.4576492808368444, "grad_norm": 0.797616682961327, "learning_rate": 1.1497057179522712e-05, "loss": 0.6295, "step": 25200 }, { "epoch": 0.45783088769431934, "grad_norm": 0.8261125183206091, "learning_rate": 1.1491358861439534e-05, "loss": 0.6149, "step": 25210 }, { "epoch": 0.45801249455179427, "grad_norm": 0.7922043700522, "learning_rate": 1.1485660048041073e-05, "loss": 0.6181, "step": 25220 }, { "epoch": 0.4581941014092692, "grad_norm": 0.795082017239135, "learning_rate": 1.1479960741220042e-05, "loss": 0.6258, "step": 25230 }, { "epoch": 0.4583757082667442, "grad_norm": 0.7783978254073228, "learning_rate": 1.1474260942869313e-05, "loss": 0.6193, "step": 25240 }, { "epoch": 0.4585573151242191, "grad_norm": 0.7688562591142106, "learning_rate": 1.1468560654881923e-05, "loss": 0.6218, "step": 25250 }, { "epoch": 0.458738921981694, "grad_norm": 0.774066284286788, "learning_rate": 1.1462859879151074e-05, "loss": 0.6205, "step": 25260 }, { "epoch": 0.45892052883916895, "grad_norm": 0.7705047098325215, "learning_rate": 1.1457158617570125e-05, "loss": 0.6272, "step": 25270 }, { "epoch": 0.45910213569664393, "grad_norm": 0.8130409661811907, "learning_rate": 1.14514568720326e-05, "loss": 0.6329, "step": 25280 }, { "epoch": 0.45928374255411886, "grad_norm": 0.7788604245331201, "learning_rate": 1.1445754644432179e-05, "loss": 0.6244, "step": 25290 }, { "epoch": 0.4594653494115938, "grad_norm": 0.7828804634788282, "learning_rate": 1.1440051936662706e-05, "loss": 0.6171, "step": 25300 }, { "epoch": 0.4596469562690687, "grad_norm": 0.7809525848592396, "learning_rate": 1.1434348750618192e-05, "loss": 0.6297, "step": 25310 }, { "epoch": 0.45982856312654363, "grad_norm": 0.7986284896830311, "learning_rate": 1.1428645088192788e-05, "loss": 0.6356, "step": 25320 }, { "epoch": 0.4600101699840186, "grad_norm": 0.8027095075214651, "learning_rate": 1.1422940951280824e-05, "loss": 0.6123, "step": 25330 }, { "epoch": 0.46019177684149354, "grad_norm": 0.7585194473643277, "learning_rate": 1.1417236341776773e-05, "loss": 0.6272, "step": 25340 }, { "epoch": 0.46037338369896846, "grad_norm": 0.8114410676026663, "learning_rate": 1.1411531261575266e-05, "loss": 0.6288, "step": 25350 }, { "epoch": 0.4605549905564434, "grad_norm": 0.8326214113154972, "learning_rate": 1.1405825712571102e-05, "loss": 0.6297, "step": 25360 }, { "epoch": 0.46073659741391837, "grad_norm": 0.7901714869777959, "learning_rate": 1.1400119696659224e-05, "loss": 0.6312, "step": 25370 }, { "epoch": 0.4609182042713933, "grad_norm": 0.765216283002538, "learning_rate": 1.1394413215734729e-05, "loss": 0.6435, "step": 25380 }, { "epoch": 0.4610998111288682, "grad_norm": 0.784556345961143, "learning_rate": 1.1388706271692882e-05, "loss": 0.6231, "step": 25390 }, { "epoch": 0.46128141798634315, "grad_norm": 0.7913868853214578, "learning_rate": 1.1382998866429087e-05, "loss": 0.6317, "step": 25400 }, { "epoch": 0.4614630248438181, "grad_norm": 0.7732497062149675, "learning_rate": 1.1377291001838908e-05, "loss": 0.6295, "step": 25410 }, { "epoch": 0.46164463170129305, "grad_norm": 0.772576460857993, "learning_rate": 1.1371582679818064e-05, "loss": 0.6266, "step": 25420 }, { "epoch": 0.461826238558768, "grad_norm": 0.7829787368468141, "learning_rate": 1.1365873902262421e-05, "loss": 0.6325, "step": 25430 }, { "epoch": 0.4620078454162429, "grad_norm": 0.7727149071946088, "learning_rate": 1.1360164671067995e-05, "loss": 0.6345, "step": 25440 }, { "epoch": 0.4621894522737179, "grad_norm": 0.8153205125906642, "learning_rate": 1.1354454988130959e-05, "loss": 0.6285, "step": 25450 }, { "epoch": 0.4623710591311928, "grad_norm": 0.8088551552285208, "learning_rate": 1.1348744855347633e-05, "loss": 0.623, "step": 25460 }, { "epoch": 0.46255266598866773, "grad_norm": 0.7835940747500164, "learning_rate": 1.1343034274614486e-05, "loss": 0.63, "step": 25470 }, { "epoch": 0.46273427284614266, "grad_norm": 0.7613056956166401, "learning_rate": 1.1337323247828134e-05, "loss": 0.6304, "step": 25480 }, { "epoch": 0.4629158797036176, "grad_norm": 0.7870172918487448, "learning_rate": 1.1331611776885343e-05, "loss": 0.6211, "step": 25490 }, { "epoch": 0.46309748656109256, "grad_norm": 0.7775134775508493, "learning_rate": 1.1325899863683025e-05, "loss": 0.6361, "step": 25500 }, { "epoch": 0.4632790934185675, "grad_norm": 0.8400281492196916, "learning_rate": 1.1320187510118245e-05, "loss": 0.6326, "step": 25510 }, { "epoch": 0.4634607002760424, "grad_norm": 0.7691556715720079, "learning_rate": 1.1314474718088209e-05, "loss": 0.6277, "step": 25520 }, { "epoch": 0.46364230713351734, "grad_norm": 0.7775227594156303, "learning_rate": 1.1308761489490268e-05, "loss": 0.6126, "step": 25530 }, { "epoch": 0.4638239139909923, "grad_norm": 0.791957672670495, "learning_rate": 1.1303047826221917e-05, "loss": 0.6243, "step": 25540 }, { "epoch": 0.46400552084846725, "grad_norm": 0.7730134934887208, "learning_rate": 1.1297333730180805e-05, "loss": 0.6429, "step": 25550 }, { "epoch": 0.46418712770594217, "grad_norm": 0.7995765701772003, "learning_rate": 1.1291619203264708e-05, "loss": 0.6393, "step": 25560 }, { "epoch": 0.4643687345634171, "grad_norm": 0.8009385934231701, "learning_rate": 1.1285904247371562e-05, "loss": 0.6231, "step": 25570 }, { "epoch": 0.4645503414208921, "grad_norm": 0.7809390772892754, "learning_rate": 1.1280188864399433e-05, "loss": 0.6318, "step": 25580 }, { "epoch": 0.464731948278367, "grad_norm": 0.7708399844203283, "learning_rate": 1.1274473056246535e-05, "loss": 0.634, "step": 25590 }, { "epoch": 0.4649135551358419, "grad_norm": 0.8064105790950562, "learning_rate": 1.1268756824811225e-05, "loss": 0.6223, "step": 25600 }, { "epoch": 0.46509516199331685, "grad_norm": 0.7756497613447526, "learning_rate": 1.1263040171991993e-05, "loss": 0.6261, "step": 25610 }, { "epoch": 0.46527676885079183, "grad_norm": 0.7769174085993247, "learning_rate": 1.1257323099687476e-05, "loss": 0.6296, "step": 25620 }, { "epoch": 0.46545837570826676, "grad_norm": 0.7655965185924614, "learning_rate": 1.1251605609796448e-05, "loss": 0.6342, "step": 25630 }, { "epoch": 0.4656399825657417, "grad_norm": 0.8057917713547992, "learning_rate": 1.1245887704217822e-05, "loss": 0.624, "step": 25640 }, { "epoch": 0.4658215894232166, "grad_norm": 0.788693664582173, "learning_rate": 1.1240169384850648e-05, "loss": 0.6234, "step": 25650 }, { "epoch": 0.46600319628069153, "grad_norm": 0.7539615659543948, "learning_rate": 1.1234450653594113e-05, "loss": 0.6132, "step": 25660 }, { "epoch": 0.4661848031381665, "grad_norm": 0.7852787075746073, "learning_rate": 1.1228731512347546e-05, "loss": 0.6221, "step": 25670 }, { "epoch": 0.46636640999564144, "grad_norm": 0.7825650254897133, "learning_rate": 1.1223011963010404e-05, "loss": 0.6251, "step": 25680 }, { "epoch": 0.46654801685311637, "grad_norm": 0.7788328242390347, "learning_rate": 1.1217292007482284e-05, "loss": 0.6232, "step": 25690 }, { "epoch": 0.4667296237105913, "grad_norm": 0.8225351320366655, "learning_rate": 1.1211571647662915e-05, "loss": 0.6157, "step": 25700 }, { "epoch": 0.46691123056806627, "grad_norm": 0.7977930927630901, "learning_rate": 1.1205850885452167e-05, "loss": 0.6222, "step": 25710 }, { "epoch": 0.4670928374255412, "grad_norm": 0.7868008248599971, "learning_rate": 1.1200129722750041e-05, "loss": 0.6174, "step": 25720 }, { "epoch": 0.4672744442830161, "grad_norm": 0.7853097256413674, "learning_rate": 1.1194408161456664e-05, "loss": 0.6487, "step": 25730 }, { "epoch": 0.46745605114049105, "grad_norm": 0.7950123117129582, "learning_rate": 1.1188686203472305e-05, "loss": 0.6254, "step": 25740 }, { "epoch": 0.467637657997966, "grad_norm": 0.8217684400074092, "learning_rate": 1.118296385069736e-05, "loss": 0.6404, "step": 25750 }, { "epoch": 0.46781926485544095, "grad_norm": 0.7536178828647284, "learning_rate": 1.1177241105032356e-05, "loss": 0.6206, "step": 25760 }, { "epoch": 0.4680008717129159, "grad_norm": 0.7694933455016264, "learning_rate": 1.117151796837795e-05, "loss": 0.6276, "step": 25770 }, { "epoch": 0.4681824785703908, "grad_norm": 0.7790619878864624, "learning_rate": 1.116579444263493e-05, "loss": 0.6226, "step": 25780 }, { "epoch": 0.4683640854278658, "grad_norm": 0.7778323121705238, "learning_rate": 1.1160070529704215e-05, "loss": 0.6304, "step": 25790 }, { "epoch": 0.4685456922853407, "grad_norm": 0.7619516440650869, "learning_rate": 1.1154346231486847e-05, "loss": 0.6269, "step": 25800 }, { "epoch": 0.46872729914281563, "grad_norm": 0.8036972413570328, "learning_rate": 1.1148621549884007e-05, "loss": 0.6417, "step": 25810 }, { "epoch": 0.46890890600029056, "grad_norm": 0.7928206358625928, "learning_rate": 1.1142896486796988e-05, "loss": 0.643, "step": 25820 }, { "epoch": 0.4690905128577655, "grad_norm": 0.7774663954949977, "learning_rate": 1.1137171044127225e-05, "loss": 0.6295, "step": 25830 }, { "epoch": 0.46927211971524047, "grad_norm": 0.8147809620778406, "learning_rate": 1.1131445223776265e-05, "loss": 0.6288, "step": 25840 }, { "epoch": 0.4694537265727154, "grad_norm": 0.8069813050874947, "learning_rate": 1.1125719027645791e-05, "loss": 0.628, "step": 25850 }, { "epoch": 0.4696353334301903, "grad_norm": 0.7812981454503144, "learning_rate": 1.111999245763761e-05, "loss": 0.6268, "step": 25860 }, { "epoch": 0.46981694028766524, "grad_norm": 0.7824305386445141, "learning_rate": 1.1114265515653647e-05, "loss": 0.6216, "step": 25870 }, { "epoch": 0.4699985471451402, "grad_norm": 0.8315613981888547, "learning_rate": 1.1108538203595954e-05, "loss": 0.6328, "step": 25880 }, { "epoch": 0.47018015400261515, "grad_norm": 0.7996103355987125, "learning_rate": 1.1102810523366703e-05, "loss": 0.6348, "step": 25890 }, { "epoch": 0.4703617608600901, "grad_norm": 0.8213482702639714, "learning_rate": 1.1097082476868192e-05, "loss": 0.6438, "step": 25900 }, { "epoch": 0.470543367717565, "grad_norm": 0.773988925985054, "learning_rate": 1.1091354066002846e-05, "loss": 0.626, "step": 25910 }, { "epoch": 0.47072497457504, "grad_norm": 0.7855247454811504, "learning_rate": 1.1085625292673198e-05, "loss": 0.6355, "step": 25920 }, { "epoch": 0.4709065814325149, "grad_norm": 0.824597799929547, "learning_rate": 1.1079896158781912e-05, "loss": 0.6343, "step": 25930 }, { "epoch": 0.47108818828998983, "grad_norm": 0.7767969048197576, "learning_rate": 1.1074166666231767e-05, "loss": 0.6189, "step": 25940 }, { "epoch": 0.47126979514746475, "grad_norm": 0.7664577704728027, "learning_rate": 1.106843681692566e-05, "loss": 0.6288, "step": 25950 }, { "epoch": 0.4714514020049397, "grad_norm": 0.7706132673487693, "learning_rate": 1.1062706612766609e-05, "loss": 0.6206, "step": 25960 }, { "epoch": 0.47163300886241466, "grad_norm": 0.7551164663332216, "learning_rate": 1.1056976055657751e-05, "loss": 0.6186, "step": 25970 }, { "epoch": 0.4718146157198896, "grad_norm": 0.7704861307586737, "learning_rate": 1.105124514750234e-05, "loss": 0.6407, "step": 25980 }, { "epoch": 0.4719962225773645, "grad_norm": 0.792443711399661, "learning_rate": 1.1045513890203737e-05, "loss": 0.6246, "step": 25990 }, { "epoch": 0.47217782943483944, "grad_norm": 0.7561756185015354, "learning_rate": 1.1039782285665434e-05, "loss": 0.6405, "step": 26000 }, { "epoch": 0.4723594362923144, "grad_norm": 0.8075728274137036, "learning_rate": 1.1034050335791031e-05, "loss": 0.6375, "step": 26010 }, { "epoch": 0.47254104314978934, "grad_norm": 0.7654082797308985, "learning_rate": 1.102831804248424e-05, "loss": 0.6381, "step": 26020 }, { "epoch": 0.47272265000726427, "grad_norm": 0.7793147351330504, "learning_rate": 1.1022585407648894e-05, "loss": 0.6415, "step": 26030 }, { "epoch": 0.4729042568647392, "grad_norm": 0.7912742163461811, "learning_rate": 1.1016852433188934e-05, "loss": 0.6358, "step": 26040 }, { "epoch": 0.4730858637222142, "grad_norm": 0.812435396640524, "learning_rate": 1.1011119121008413e-05, "loss": 0.6209, "step": 26050 }, { "epoch": 0.4732674705796891, "grad_norm": 0.7577485046718638, "learning_rate": 1.1005385473011503e-05, "loss": 0.6365, "step": 26060 }, { "epoch": 0.473449077437164, "grad_norm": 0.8088109052267565, "learning_rate": 1.099965149110248e-05, "loss": 0.6171, "step": 26070 }, { "epoch": 0.47363068429463895, "grad_norm": 0.7711743404066201, "learning_rate": 1.0993917177185736e-05, "loss": 0.6259, "step": 26080 }, { "epoch": 0.47381229115211393, "grad_norm": 0.7532906680022687, "learning_rate": 1.0988182533165768e-05, "loss": 0.6384, "step": 26090 }, { "epoch": 0.47399389800958885, "grad_norm": 0.7648046416850268, "learning_rate": 1.0982447560947185e-05, "loss": 0.6238, "step": 26100 }, { "epoch": 0.4741755048670638, "grad_norm": 0.8154399923416722, "learning_rate": 1.0976712262434713e-05, "loss": 0.6314, "step": 26110 }, { "epoch": 0.4743571117245387, "grad_norm": 0.7330295450448552, "learning_rate": 1.0970976639533174e-05, "loss": 0.6258, "step": 26120 }, { "epoch": 0.47453871858201363, "grad_norm": 0.7853868409371483, "learning_rate": 1.0965240694147502e-05, "loss": 0.6321, "step": 26130 }, { "epoch": 0.4747203254394886, "grad_norm": 0.7821941738627174, "learning_rate": 1.0959504428182744e-05, "loss": 0.6197, "step": 26140 }, { "epoch": 0.47490193229696354, "grad_norm": 0.7920326479308142, "learning_rate": 1.0953767843544044e-05, "loss": 0.6462, "step": 26150 }, { "epoch": 0.47508353915443846, "grad_norm": 0.7779247184131846, "learning_rate": 1.0948030942136656e-05, "loss": 0.6315, "step": 26160 }, { "epoch": 0.4752651460119134, "grad_norm": 0.7898367486534853, "learning_rate": 1.0942293725865944e-05, "loss": 0.6344, "step": 26170 }, { "epoch": 0.47544675286938837, "grad_norm": 0.7651147408070637, "learning_rate": 1.0936556196637371e-05, "loss": 0.624, "step": 26180 }, { "epoch": 0.4756283597268633, "grad_norm": 0.7885130581361282, "learning_rate": 1.0930818356356503e-05, "loss": 0.6313, "step": 26190 }, { "epoch": 0.4758099665843382, "grad_norm": 0.7821587375393683, "learning_rate": 1.092508020692901e-05, "loss": 0.6334, "step": 26200 }, { "epoch": 0.47599157344181314, "grad_norm": 0.7753145349076839, "learning_rate": 1.091934175026067e-05, "loss": 0.6311, "step": 26210 }, { "epoch": 0.4761731802992881, "grad_norm": 0.7791570208381352, "learning_rate": 1.0913602988257357e-05, "loss": 0.6161, "step": 26220 }, { "epoch": 0.47635478715676305, "grad_norm": 0.827055472222255, "learning_rate": 1.0907863922825049e-05, "loss": 0.6403, "step": 26230 }, { "epoch": 0.476536394014238, "grad_norm": 0.7605701078522699, "learning_rate": 1.0902124555869824e-05, "loss": 0.6283, "step": 26240 }, { "epoch": 0.4767180008717129, "grad_norm": 0.7739905796435023, "learning_rate": 1.0896384889297863e-05, "loss": 0.6418, "step": 26250 }, { "epoch": 0.4768996077291879, "grad_norm": 0.7810392483083716, "learning_rate": 1.089064492501544e-05, "loss": 0.6198, "step": 26260 }, { "epoch": 0.4770812145866628, "grad_norm": 0.7765202022826322, "learning_rate": 1.0884904664928933e-05, "loss": 0.6328, "step": 26270 }, { "epoch": 0.47726282144413773, "grad_norm": 0.804969683129, "learning_rate": 1.087916411094482e-05, "loss": 0.6438, "step": 26280 }, { "epoch": 0.47744442830161266, "grad_norm": 0.813074655085077, "learning_rate": 1.0873423264969671e-05, "loss": 0.6415, "step": 26290 }, { "epoch": 0.4776260351590876, "grad_norm": 0.7589431820590263, "learning_rate": 1.0867682128910153e-05, "loss": 0.6145, "step": 26300 }, { "epoch": 0.47780764201656256, "grad_norm": 0.7581543585991557, "learning_rate": 1.0861940704673038e-05, "loss": 0.6284, "step": 26310 }, { "epoch": 0.4779892488740375, "grad_norm": 0.7772281059073558, "learning_rate": 1.0856198994165183e-05, "loss": 0.625, "step": 26320 }, { "epoch": 0.4781708557315124, "grad_norm": 0.7783015770202761, "learning_rate": 1.0850456999293549e-05, "loss": 0.6324, "step": 26330 }, { "epoch": 0.47835246258898734, "grad_norm": 0.762161058149159, "learning_rate": 1.0844714721965183e-05, "loss": 0.6162, "step": 26340 }, { "epoch": 0.4785340694464623, "grad_norm": 0.7924264022548065, "learning_rate": 1.0838972164087234e-05, "loss": 0.6198, "step": 26350 }, { "epoch": 0.47871567630393724, "grad_norm": 0.7989198143624786, "learning_rate": 1.0833229327566939e-05, "loss": 0.6272, "step": 26360 }, { "epoch": 0.47889728316141217, "grad_norm": 0.7812760860794213, "learning_rate": 1.0827486214311627e-05, "loss": 0.6422, "step": 26370 }, { "epoch": 0.4790788900188871, "grad_norm": 0.7733362694201478, "learning_rate": 1.0821742826228722e-05, "loss": 0.6126, "step": 26380 }, { "epoch": 0.4792604968763621, "grad_norm": 0.7815564225494588, "learning_rate": 1.0815999165225738e-05, "loss": 0.632, "step": 26390 }, { "epoch": 0.479442103733837, "grad_norm": 0.7914426533762761, "learning_rate": 1.081025523321028e-05, "loss": 0.6209, "step": 26400 }, { "epoch": 0.4796237105913119, "grad_norm": 0.7740124197300129, "learning_rate": 1.0804511032090041e-05, "loss": 0.6169, "step": 26410 }, { "epoch": 0.47980531744878685, "grad_norm": 0.7534955371461827, "learning_rate": 1.0798766563772806e-05, "loss": 0.6327, "step": 26420 }, { "epoch": 0.47998692430626183, "grad_norm": 0.7772156275514714, "learning_rate": 1.0793021830166448e-05, "loss": 0.6171, "step": 26430 }, { "epoch": 0.48016853116373676, "grad_norm": 0.7975508211639998, "learning_rate": 1.0787276833178927e-05, "loss": 0.6166, "step": 26440 }, { "epoch": 0.4803501380212117, "grad_norm": 0.7773840475186417, "learning_rate": 1.0781531574718294e-05, "loss": 0.6195, "step": 26450 }, { "epoch": 0.4805317448786866, "grad_norm": 0.780203978703081, "learning_rate": 1.077578605669268e-05, "loss": 0.6261, "step": 26460 }, { "epoch": 0.48071335173616153, "grad_norm": 0.7955023329477412, "learning_rate": 1.077004028101031e-05, "loss": 0.6261, "step": 26470 }, { "epoch": 0.4808949585936365, "grad_norm": 0.7945112319675874, "learning_rate": 1.0764294249579493e-05, "loss": 0.6265, "step": 26480 }, { "epoch": 0.48107656545111144, "grad_norm": 0.7929299508773716, "learning_rate": 1.0758547964308615e-05, "loss": 0.6294, "step": 26490 }, { "epoch": 0.48125817230858636, "grad_norm": 0.7831581806814939, "learning_rate": 1.075280142710615e-05, "loss": 0.6315, "step": 26500 }, { "epoch": 0.4814397791660613, "grad_norm": 0.7711667022470046, "learning_rate": 1.0747054639880666e-05, "loss": 0.633, "step": 26510 }, { "epoch": 0.48162138602353627, "grad_norm": 0.765000369503394, "learning_rate": 1.0741307604540803e-05, "loss": 0.6269, "step": 26520 }, { "epoch": 0.4818029928810112, "grad_norm": 0.7563709638327732, "learning_rate": 1.0735560322995284e-05, "loss": 0.6401, "step": 26530 }, { "epoch": 0.4819845997384861, "grad_norm": 0.7608059713740406, "learning_rate": 1.0729812797152922e-05, "loss": 0.6184, "step": 26540 }, { "epoch": 0.48216620659596104, "grad_norm": 0.743701098541095, "learning_rate": 1.0724065028922596e-05, "loss": 0.618, "step": 26550 }, { "epoch": 0.482347813453436, "grad_norm": 0.7715742063015341, "learning_rate": 1.0718317020213283e-05, "loss": 0.6271, "step": 26560 }, { "epoch": 0.48252942031091095, "grad_norm": 0.7739282520588403, "learning_rate": 1.071256877293403e-05, "loss": 0.6246, "step": 26570 }, { "epoch": 0.4827110271683859, "grad_norm": 0.7571007067211323, "learning_rate": 1.0706820288993962e-05, "loss": 0.6276, "step": 26580 }, { "epoch": 0.4828926340258608, "grad_norm": 0.7799184725217868, "learning_rate": 1.0701071570302286e-05, "loss": 0.6188, "step": 26590 }, { "epoch": 0.4830742408833358, "grad_norm": 0.7713751296614896, "learning_rate": 1.0695322618768287e-05, "loss": 0.6284, "step": 26600 }, { "epoch": 0.4832558477408107, "grad_norm": 0.7873653599179814, "learning_rate": 1.0689573436301326e-05, "loss": 0.6089, "step": 26610 }, { "epoch": 0.48343745459828563, "grad_norm": 0.8119686819789784, "learning_rate": 1.0683824024810843e-05, "loss": 0.6315, "step": 26620 }, { "epoch": 0.48361906145576056, "grad_norm": 0.7474003663642608, "learning_rate": 1.0678074386206352e-05, "loss": 0.6186, "step": 26630 }, { "epoch": 0.4838006683132355, "grad_norm": 0.7823609653366637, "learning_rate": 1.0672324522397443e-05, "loss": 0.6194, "step": 26640 }, { "epoch": 0.48398227517071046, "grad_norm": 0.7930257098401009, "learning_rate": 1.066657443529378e-05, "loss": 0.6311, "step": 26650 }, { "epoch": 0.4841638820281854, "grad_norm": 0.8076615293310772, "learning_rate": 1.0660824126805103e-05, "loss": 0.6315, "step": 26660 }, { "epoch": 0.4843454888856603, "grad_norm": 0.7903476762151008, "learning_rate": 1.0655073598841223e-05, "loss": 0.627, "step": 26670 }, { "epoch": 0.48452709574313524, "grad_norm": 0.7481668907748682, "learning_rate": 1.0649322853312028e-05, "loss": 0.6372, "step": 26680 }, { "epoch": 0.4847087026006102, "grad_norm": 0.7675655575561258, "learning_rate": 1.0643571892127472e-05, "loss": 0.6315, "step": 26690 }, { "epoch": 0.48489030945808514, "grad_norm": 0.7552552469373007, "learning_rate": 1.0637820717197582e-05, "loss": 0.6243, "step": 26700 }, { "epoch": 0.48507191631556007, "grad_norm": 0.7699288961971745, "learning_rate": 1.0632069330432467e-05, "loss": 0.6177, "step": 26710 }, { "epoch": 0.485253523173035, "grad_norm": 0.8190870429332296, "learning_rate": 1.062631773374229e-05, "loss": 0.6282, "step": 26720 }, { "epoch": 0.48543513003051, "grad_norm": 0.8047977435884177, "learning_rate": 1.0620565929037294e-05, "loss": 0.642, "step": 26730 }, { "epoch": 0.4856167368879849, "grad_norm": 0.7838034149262221, "learning_rate": 1.061481391822779e-05, "loss": 0.624, "step": 26740 }, { "epoch": 0.4857983437454598, "grad_norm": 0.832106419598656, "learning_rate": 1.0609061703224152e-05, "loss": 0.6079, "step": 26750 }, { "epoch": 0.48597995060293475, "grad_norm": 0.7951567218354155, "learning_rate": 1.0603309285936829e-05, "loss": 0.62, "step": 26760 }, { "epoch": 0.48616155746040973, "grad_norm": 0.7852744455375142, "learning_rate": 1.0597556668276332e-05, "loss": 0.6249, "step": 26770 }, { "epoch": 0.48634316431788466, "grad_norm": 0.7920813000658563, "learning_rate": 1.0591803852153242e-05, "loss": 0.6209, "step": 26780 }, { "epoch": 0.4865247711753596, "grad_norm": 0.8182382604159459, "learning_rate": 1.0586050839478201e-05, "loss": 0.6274, "step": 26790 }, { "epoch": 0.4867063780328345, "grad_norm": 0.8123670029618676, "learning_rate": 1.0580297632161921e-05, "loss": 0.6247, "step": 26800 }, { "epoch": 0.48688798489030943, "grad_norm": 0.7944191350641026, "learning_rate": 1.0574544232115178e-05, "loss": 0.621, "step": 26810 }, { "epoch": 0.4870695917477844, "grad_norm": 0.7898509089502526, "learning_rate": 1.0568790641248812e-05, "loss": 0.6371, "step": 26820 }, { "epoch": 0.48725119860525934, "grad_norm": 0.7849450419175081, "learning_rate": 1.0563036861473723e-05, "loss": 0.6106, "step": 26830 }, { "epoch": 0.48743280546273426, "grad_norm": 0.7978169203457051, "learning_rate": 1.0557282894700877e-05, "loss": 0.6298, "step": 26840 }, { "epoch": 0.4876144123202092, "grad_norm": 0.8086563657354445, "learning_rate": 1.0551528742841304e-05, "loss": 0.6242, "step": 26850 }, { "epoch": 0.48779601917768417, "grad_norm": 0.7631674191401034, "learning_rate": 1.0545774407806088e-05, "loss": 0.6268, "step": 26860 }, { "epoch": 0.4879776260351591, "grad_norm": 0.7621062734537993, "learning_rate": 1.0540019891506383e-05, "loss": 0.6174, "step": 26870 }, { "epoch": 0.488159232892634, "grad_norm": 0.7720339758771014, "learning_rate": 1.0534265195853402e-05, "loss": 0.6217, "step": 26880 }, { "epoch": 0.48834083975010895, "grad_norm": 0.7751175808161629, "learning_rate": 1.0528510322758405e-05, "loss": 0.6121, "step": 26890 }, { "epoch": 0.4885224466075839, "grad_norm": 0.8317359905536021, "learning_rate": 1.0522755274132725e-05, "loss": 0.6441, "step": 26900 }, { "epoch": 0.48870405346505885, "grad_norm": 0.816294608586857, "learning_rate": 1.0517000051887752e-05, "loss": 0.6255, "step": 26910 }, { "epoch": 0.4888856603225338, "grad_norm": 0.7923352370736328, "learning_rate": 1.0511244657934924e-05, "loss": 0.6324, "step": 26920 }, { "epoch": 0.4890672671800087, "grad_norm": 0.8668337892263391, "learning_rate": 1.050548909418575e-05, "loss": 0.6499, "step": 26930 }, { "epoch": 0.4892488740374837, "grad_norm": 0.7766029377189932, "learning_rate": 1.0499733362551783e-05, "loss": 0.6264, "step": 26940 }, { "epoch": 0.4894304808949586, "grad_norm": 0.7568610137532393, "learning_rate": 1.0493977464944635e-05, "loss": 0.6237, "step": 26950 }, { "epoch": 0.48961208775243353, "grad_norm": 0.7434895897372141, "learning_rate": 1.048822140327598e-05, "loss": 0.617, "step": 26960 }, { "epoch": 0.48979369460990846, "grad_norm": 0.767284832194371, "learning_rate": 1.0482465179457538e-05, "loss": 0.6309, "step": 26970 }, { "epoch": 0.4899753014673834, "grad_norm": 0.7724632975203266, "learning_rate": 1.0476708795401088e-05, "loss": 0.6209, "step": 26980 }, { "epoch": 0.49015690832485836, "grad_norm": 0.8094131367889574, "learning_rate": 1.0470952253018456e-05, "loss": 0.6207, "step": 26990 }, { "epoch": 0.4903385151823333, "grad_norm": 0.8012969816084337, "learning_rate": 1.0465195554221525e-05, "loss": 0.6196, "step": 27000 }, { "epoch": 0.4905201220398082, "grad_norm": 0.7437101740902367, "learning_rate": 1.0459438700922235e-05, "loss": 0.6316, "step": 27010 }, { "epoch": 0.49070172889728314, "grad_norm": 0.7887426468025693, "learning_rate": 1.0453681695032569e-05, "loss": 0.6232, "step": 27020 }, { "epoch": 0.4908833357547581, "grad_norm": 0.7661444712048319, "learning_rate": 1.0447924538464565e-05, "loss": 0.6116, "step": 27030 }, { "epoch": 0.49106494261223305, "grad_norm": 0.7739172805183759, "learning_rate": 1.0442167233130307e-05, "loss": 0.6144, "step": 27040 }, { "epoch": 0.49124654946970797, "grad_norm": 0.7710449851631934, "learning_rate": 1.0436409780941935e-05, "loss": 0.6245, "step": 27050 }, { "epoch": 0.4914281563271829, "grad_norm": 0.8161537662272956, "learning_rate": 1.0430652183811628e-05, "loss": 0.6332, "step": 27060 }, { "epoch": 0.4916097631846579, "grad_norm": 0.8209032560087998, "learning_rate": 1.0424894443651627e-05, "loss": 0.633, "step": 27070 }, { "epoch": 0.4917913700421328, "grad_norm": 0.7501940160252585, "learning_rate": 1.041913656237421e-05, "loss": 0.6093, "step": 27080 }, { "epoch": 0.49197297689960773, "grad_norm": 0.766104279865796, "learning_rate": 1.04133785418917e-05, "loss": 0.6244, "step": 27090 }, { "epoch": 0.49215458375708265, "grad_norm": 0.7777664070574697, "learning_rate": 1.0407620384116475e-05, "loss": 0.6277, "step": 27100 }, { "epoch": 0.4923361906145576, "grad_norm": 0.7990271797157592, "learning_rate": 1.0401862090960953e-05, "loss": 0.6281, "step": 27110 }, { "epoch": 0.49251779747203256, "grad_norm": 0.8006631111402321, "learning_rate": 1.03961036643376e-05, "loss": 0.6331, "step": 27120 }, { "epoch": 0.4926994043295075, "grad_norm": 0.7896987510503555, "learning_rate": 1.0390345106158923e-05, "loss": 0.6204, "step": 27130 }, { "epoch": 0.4928810111869824, "grad_norm": 0.8241502136015704, "learning_rate": 1.0384586418337474e-05, "loss": 0.6439, "step": 27140 }, { "epoch": 0.49306261804445733, "grad_norm": 0.856244868550557, "learning_rate": 1.037882760278585e-05, "loss": 0.6306, "step": 27150 }, { "epoch": 0.4932442249019323, "grad_norm": 0.7864800891012548, "learning_rate": 1.0373068661416688e-05, "loss": 0.6355, "step": 27160 }, { "epoch": 0.49342583175940724, "grad_norm": 0.7975232471855948, "learning_rate": 1.0367309596142667e-05, "loss": 0.6148, "step": 27170 }, { "epoch": 0.49360743861688217, "grad_norm": 0.785531995411287, "learning_rate": 1.0361550408876511e-05, "loss": 0.6259, "step": 27180 }, { "epoch": 0.4937890454743571, "grad_norm": 0.8133918867055823, "learning_rate": 1.0355791101530973e-05, "loss": 0.6203, "step": 27190 }, { "epoch": 0.49397065233183207, "grad_norm": 0.7885732993765122, "learning_rate": 1.0350031676018866e-05, "loss": 0.6131, "step": 27200 }, { "epoch": 0.494152259189307, "grad_norm": 0.7575504071130758, "learning_rate": 1.034427213425302e-05, "loss": 0.6114, "step": 27210 }, { "epoch": 0.4943338660467819, "grad_norm": 0.7482413578087241, "learning_rate": 1.0338512478146318e-05, "loss": 0.6287, "step": 27220 }, { "epoch": 0.49451547290425685, "grad_norm": 0.7670829988182195, "learning_rate": 1.0332752709611678e-05, "loss": 0.6191, "step": 27230 }, { "epoch": 0.49469707976173183, "grad_norm": 0.7742195369492256, "learning_rate": 1.0326992830562052e-05, "loss": 0.6279, "step": 27240 }, { "epoch": 0.49487868661920675, "grad_norm": 0.7564148393791248, "learning_rate": 1.0321232842910434e-05, "loss": 0.6183, "step": 27250 }, { "epoch": 0.4950602934766817, "grad_norm": 0.7924700678165149, "learning_rate": 1.0315472748569848e-05, "loss": 0.6194, "step": 27260 }, { "epoch": 0.4952419003341566, "grad_norm": 0.7551477740066305, "learning_rate": 1.030971254945336e-05, "loss": 0.6239, "step": 27270 }, { "epoch": 0.49542350719163153, "grad_norm": 0.7692915316929931, "learning_rate": 1.0303952247474066e-05, "loss": 0.6215, "step": 27280 }, { "epoch": 0.4956051140491065, "grad_norm": 0.7602470360846133, "learning_rate": 1.0298191844545094e-05, "loss": 0.6298, "step": 27290 }, { "epoch": 0.49578672090658144, "grad_norm": 0.7495992993634899, "learning_rate": 1.0292431342579609e-05, "loss": 0.6245, "step": 27300 }, { "epoch": 0.49596832776405636, "grad_norm": 0.7322302879948476, "learning_rate": 1.0286670743490817e-05, "loss": 0.6099, "step": 27310 }, { "epoch": 0.4961499346215313, "grad_norm": 0.7994509318695882, "learning_rate": 1.028091004919194e-05, "loss": 0.6325, "step": 27320 }, { "epoch": 0.49633154147900627, "grad_norm": 0.7657174532567308, "learning_rate": 1.0275149261596244e-05, "loss": 0.6336, "step": 27330 }, { "epoch": 0.4965131483364812, "grad_norm": 0.7888385290793481, "learning_rate": 1.0269388382617021e-05, "loss": 0.6421, "step": 27340 }, { "epoch": 0.4966947551939561, "grad_norm": 0.7433049081631405, "learning_rate": 1.0263627414167591e-05, "loss": 0.6202, "step": 27350 }, { "epoch": 0.49687636205143104, "grad_norm": 0.7692338697548724, "learning_rate": 1.0257866358161313e-05, "loss": 0.6118, "step": 27360 }, { "epoch": 0.497057968908906, "grad_norm": 0.7853811279158462, "learning_rate": 1.025210521651156e-05, "loss": 0.6171, "step": 27370 }, { "epoch": 0.49723957576638095, "grad_norm": 0.8211208740869642, "learning_rate": 1.0246343991131756e-05, "loss": 0.6221, "step": 27380 }, { "epoch": 0.4974211826238559, "grad_norm": 0.7696196770489749, "learning_rate": 1.0240582683935326e-05, "loss": 0.6262, "step": 27390 }, { "epoch": 0.4976027894813308, "grad_norm": 0.7694557450611631, "learning_rate": 1.0234821296835737e-05, "loss": 0.6363, "step": 27400 }, { "epoch": 0.4977843963388058, "grad_norm": 0.7610480347032771, "learning_rate": 1.0229059831746489e-05, "loss": 0.6357, "step": 27410 }, { "epoch": 0.4979660031962807, "grad_norm": 0.7490723205433439, "learning_rate": 1.0223298290581092e-05, "loss": 0.6168, "step": 27420 }, { "epoch": 0.49814761005375563, "grad_norm": 0.8004168839289946, "learning_rate": 1.021753667525309e-05, "loss": 0.6347, "step": 27430 }, { "epoch": 0.49832921691123055, "grad_norm": 0.7732278893651502, "learning_rate": 1.0211774987676054e-05, "loss": 0.6232, "step": 27440 }, { "epoch": 0.4985108237687055, "grad_norm": 0.7521446298666011, "learning_rate": 1.0206013229763576e-05, "loss": 0.6191, "step": 27450 }, { "epoch": 0.49869243062618046, "grad_norm": 0.7232014084447778, "learning_rate": 1.0200251403429269e-05, "loss": 0.6177, "step": 27460 }, { "epoch": 0.4988740374836554, "grad_norm": 0.7759077677698761, "learning_rate": 1.0194489510586768e-05, "loss": 0.6119, "step": 27470 }, { "epoch": 0.4990556443411303, "grad_norm": 0.7953874441850844, "learning_rate": 1.018872755314974e-05, "loss": 0.6271, "step": 27480 }, { "epoch": 0.49923725119860524, "grad_norm": 0.78339051909691, "learning_rate": 1.0182965533031859e-05, "loss": 0.6234, "step": 27490 }, { "epoch": 0.4994188580560802, "grad_norm": 0.7903506872783707, "learning_rate": 1.017720345214683e-05, "loss": 0.6328, "step": 27500 }, { "epoch": 0.49960046491355514, "grad_norm": 0.7979208362569365, "learning_rate": 1.017144131240838e-05, "loss": 0.6214, "step": 27510 }, { "epoch": 0.49978207177103007, "grad_norm": 0.8040579395039092, "learning_rate": 1.0165679115730241e-05, "loss": 0.6237, "step": 27520 }, { "epoch": 0.499963678628505, "grad_norm": 0.7656246001970165, "learning_rate": 1.0159916864026181e-05, "loss": 0.6376, "step": 27530 }, { "epoch": 0.5001452854859799, "grad_norm": 0.7669680290934134, "learning_rate": 1.0154154559209977e-05, "loss": 0.617, "step": 27540 }, { "epoch": 0.5003268923434548, "grad_norm": 0.7878479778909432, "learning_rate": 1.0148392203195427e-05, "loss": 0.6208, "step": 27550 }, { "epoch": 0.5005084992009299, "grad_norm": 0.8233207192978754, "learning_rate": 1.0142629797896342e-05, "loss": 0.6261, "step": 27560 }, { "epoch": 0.5006901060584048, "grad_norm": 0.7664967324356919, "learning_rate": 1.0136867345226556e-05, "loss": 0.6271, "step": 27570 }, { "epoch": 0.5008717129158797, "grad_norm": 0.7781060389005868, "learning_rate": 1.0131104847099908e-05, "loss": 0.633, "step": 27580 }, { "epoch": 0.5010533197733547, "grad_norm": 0.7748245544419214, "learning_rate": 1.0125342305430268e-05, "loss": 0.6292, "step": 27590 }, { "epoch": 0.5012349266308296, "grad_norm": 0.7830374822563978, "learning_rate": 1.0119579722131505e-05, "loss": 0.6302, "step": 27600 }, { "epoch": 0.5014165334883045, "grad_norm": 0.7591361522618186, "learning_rate": 1.011381709911751e-05, "loss": 0.6169, "step": 27610 }, { "epoch": 0.5015981403457794, "grad_norm": 0.8304967202976388, "learning_rate": 1.0108054438302184e-05, "loss": 0.636, "step": 27620 }, { "epoch": 0.5017797472032544, "grad_norm": 0.7548469288111828, "learning_rate": 1.0102291741599441e-05, "loss": 0.6231, "step": 27630 }, { "epoch": 0.5019613540607293, "grad_norm": 0.800644207422228, "learning_rate": 1.0096529010923213e-05, "loss": 0.6363, "step": 27640 }, { "epoch": 0.5021429609182043, "grad_norm": 0.7813610595824915, "learning_rate": 1.0090766248187434e-05, "loss": 0.6214, "step": 27650 }, { "epoch": 0.5023245677756792, "grad_norm": 0.7596885753704605, "learning_rate": 1.0085003455306053e-05, "loss": 0.6265, "step": 27660 }, { "epoch": 0.5025061746331542, "grad_norm": 0.7768155697722935, "learning_rate": 1.007924063419303e-05, "loss": 0.6294, "step": 27670 }, { "epoch": 0.5026877814906291, "grad_norm": 0.792559876217128, "learning_rate": 1.0073477786762331e-05, "loss": 0.6222, "step": 27680 }, { "epoch": 0.502869388348104, "grad_norm": 0.7619216904273676, "learning_rate": 1.0067714914927937e-05, "loss": 0.6121, "step": 27690 }, { "epoch": 0.5030509952055789, "grad_norm": 0.8195095619803375, "learning_rate": 1.0061952020603829e-05, "loss": 0.6241, "step": 27700 }, { "epoch": 0.5032326020630539, "grad_norm": 0.7777117548690421, "learning_rate": 1.0056189105704001e-05, "loss": 0.6359, "step": 27710 }, { "epoch": 0.5034142089205288, "grad_norm": 1.124169367317352, "learning_rate": 1.0050426172142454e-05, "loss": 0.6208, "step": 27720 }, { "epoch": 0.5035958157780038, "grad_norm": 0.780469535641033, "learning_rate": 1.0044663221833188e-05, "loss": 0.6217, "step": 27730 }, { "epoch": 0.5037774226354788, "grad_norm": 0.7542433798051323, "learning_rate": 1.003890025669022e-05, "loss": 0.6349, "step": 27740 }, { "epoch": 0.5039590294929537, "grad_norm": 0.761558817546374, "learning_rate": 1.0033137278627565e-05, "loss": 0.6321, "step": 27750 }, { "epoch": 0.5041406363504286, "grad_norm": 0.7838756005192423, "learning_rate": 1.002737428955924e-05, "loss": 0.6277, "step": 27760 }, { "epoch": 0.5043222432079035, "grad_norm": 0.7779924824120343, "learning_rate": 1.0021611291399272e-05, "loss": 0.6233, "step": 27770 }, { "epoch": 0.5045038500653785, "grad_norm": 0.8222943148623792, "learning_rate": 1.0015848286061688e-05, "loss": 0.6253, "step": 27780 }, { "epoch": 0.5046854569228534, "grad_norm": 0.7952454004467984, "learning_rate": 1.0010085275460515e-05, "loss": 0.6356, "step": 27790 }, { "epoch": 0.5048670637803283, "grad_norm": 0.7430718424062072, "learning_rate": 1.0004322261509786e-05, "loss": 0.6156, "step": 27800 }, { "epoch": 0.5050486706378032, "grad_norm": 0.7916169314813509, "learning_rate": 9.998559246123531e-06, "loss": 0.6262, "step": 27810 }, { "epoch": 0.5052302774952783, "grad_norm": 0.7815824896250616, "learning_rate": 9.992796231215784e-06, "loss": 0.6229, "step": 27820 }, { "epoch": 0.5054118843527532, "grad_norm": 0.7736598414004793, "learning_rate": 9.98703321870058e-06, "loss": 0.625, "step": 27830 }, { "epoch": 0.5055934912102281, "grad_norm": 0.7973050400178415, "learning_rate": 9.98127021049195e-06, "loss": 0.6248, "step": 27840 }, { "epoch": 0.505775098067703, "grad_norm": 0.7296658578920678, "learning_rate": 9.975507208503916e-06, "loss": 0.6296, "step": 27850 }, { "epoch": 0.505956704925178, "grad_norm": 0.7727803381972728, "learning_rate": 9.96974421465052e-06, "loss": 0.6348, "step": 27860 }, { "epoch": 0.5061383117826529, "grad_norm": 0.8070791839702708, "learning_rate": 9.963981230845775e-06, "loss": 0.6155, "step": 27870 }, { "epoch": 0.5063199186401278, "grad_norm": 0.7505121668798957, "learning_rate": 9.958218259003712e-06, "loss": 0.6188, "step": 27880 }, { "epoch": 0.5065015254976027, "grad_norm": 0.7867294023812, "learning_rate": 9.952455301038342e-06, "loss": 0.6235, "step": 27890 }, { "epoch": 0.5066831323550778, "grad_norm": 0.7539586810869011, "learning_rate": 9.946692358863684e-06, "loss": 0.6094, "step": 27900 }, { "epoch": 0.5068647392125527, "grad_norm": 0.7982373180935008, "learning_rate": 9.940929434393746e-06, "loss": 0.6134, "step": 27910 }, { "epoch": 0.5070463460700276, "grad_norm": 0.7998676041818631, "learning_rate": 9.93516652954253e-06, "loss": 0.6151, "step": 27920 }, { "epoch": 0.5072279529275026, "grad_norm": 0.7691074444134993, "learning_rate": 9.92940364622403e-06, "loss": 0.6284, "step": 27930 }, { "epoch": 0.5074095597849775, "grad_norm": 0.7577697537586595, "learning_rate": 9.923640786352234e-06, "loss": 0.6156, "step": 27940 }, { "epoch": 0.5075911666424524, "grad_norm": 0.8040019759873829, "learning_rate": 9.917877951841129e-06, "loss": 0.6229, "step": 27950 }, { "epoch": 0.5077727734999273, "grad_norm": 0.7901374300733353, "learning_rate": 9.912115144604681e-06, "loss": 0.6258, "step": 27960 }, { "epoch": 0.5079543803574023, "grad_norm": 0.7701941733040918, "learning_rate": 9.906352366556858e-06, "loss": 0.6195, "step": 27970 }, { "epoch": 0.5081359872148772, "grad_norm": 0.7568439347083032, "learning_rate": 9.900589619611609e-06, "loss": 0.6281, "step": 27980 }, { "epoch": 0.5083175940723522, "grad_norm": 0.7615469873982568, "learning_rate": 9.894826905682886e-06, "loss": 0.6106, "step": 27990 }, { "epoch": 0.5084992009298271, "grad_norm": 0.7598619198928485, "learning_rate": 9.889064226684609e-06, "loss": 0.6219, "step": 28000 }, { "epoch": 0.5086808077873021, "grad_norm": 0.7853560783919393, "learning_rate": 9.883301584530712e-06, "loss": 0.62, "step": 28010 }, { "epoch": 0.508862414644777, "grad_norm": 0.7520059124552202, "learning_rate": 9.877538981135092e-06, "loss": 0.6192, "step": 28020 }, { "epoch": 0.5090440215022519, "grad_norm": 0.7351464115432818, "learning_rate": 9.871776418411655e-06, "loss": 0.6177, "step": 28030 }, { "epoch": 0.5092256283597268, "grad_norm": 0.8010346833963227, "learning_rate": 9.866013898274282e-06, "loss": 0.6214, "step": 28040 }, { "epoch": 0.5094072352172018, "grad_norm": 0.7753661813461566, "learning_rate": 9.86025142263683e-06, "loss": 0.6197, "step": 28050 }, { "epoch": 0.5095888420746767, "grad_norm": 0.7806815214344167, "learning_rate": 9.854488993413167e-06, "loss": 0.6187, "step": 28060 }, { "epoch": 0.5097704489321517, "grad_norm": 0.7910972681398658, "learning_rate": 9.84872661251712e-06, "loss": 0.62, "step": 28070 }, { "epoch": 0.5099520557896267, "grad_norm": 0.7873776014351008, "learning_rate": 9.842964281862518e-06, "loss": 0.6171, "step": 28080 }, { "epoch": 0.5101336626471016, "grad_norm": 0.7532611564184676, "learning_rate": 9.837202003363161e-06, "loss": 0.6224, "step": 28090 }, { "epoch": 0.5103152695045765, "grad_norm": 0.8162736860349926, "learning_rate": 9.83143977893284e-06, "loss": 0.6217, "step": 28100 }, { "epoch": 0.5104968763620514, "grad_norm": 0.7916239873727373, "learning_rate": 9.825677610485328e-06, "loss": 0.6182, "step": 28110 }, { "epoch": 0.5106784832195264, "grad_norm": 0.7939640453678818, "learning_rate": 9.81991549993437e-06, "loss": 0.6158, "step": 28120 }, { "epoch": 0.5108600900770013, "grad_norm": 0.7731163257158199, "learning_rate": 9.814153449193701e-06, "loss": 0.6299, "step": 28130 }, { "epoch": 0.5110416969344762, "grad_norm": 0.7969761816930753, "learning_rate": 9.808391460177037e-06, "loss": 0.6254, "step": 28140 }, { "epoch": 0.5112233037919511, "grad_norm": 0.7419760478182216, "learning_rate": 9.802629534798064e-06, "loss": 0.6147, "step": 28150 }, { "epoch": 0.5114049106494262, "grad_norm": 0.7848902723532374, "learning_rate": 9.796867674970453e-06, "loss": 0.6188, "step": 28160 }, { "epoch": 0.5115865175069011, "grad_norm": 0.7659830752530774, "learning_rate": 9.791105882607857e-06, "loss": 0.625, "step": 28170 }, { "epoch": 0.511768124364376, "grad_norm": 0.7714494291147217, "learning_rate": 9.785344159623897e-06, "loss": 0.6317, "step": 28180 }, { "epoch": 0.511949731221851, "grad_norm": 0.7653629963377399, "learning_rate": 9.779582507932185e-06, "loss": 0.6229, "step": 28190 }, { "epoch": 0.5121313380793259, "grad_norm": 0.7871090948953289, "learning_rate": 9.773820929446291e-06, "loss": 0.6163, "step": 28200 }, { "epoch": 0.5123129449368008, "grad_norm": 0.7710454042016346, "learning_rate": 9.768059426079778e-06, "loss": 0.6258, "step": 28210 }, { "epoch": 0.5124945517942757, "grad_norm": 0.7902315174545815, "learning_rate": 9.762297999746169e-06, "loss": 0.614, "step": 28220 }, { "epoch": 0.5126761586517506, "grad_norm": 0.789872509439735, "learning_rate": 9.756536652358978e-06, "loss": 0.6184, "step": 28230 }, { "epoch": 0.5128577655092257, "grad_norm": 0.7688676688968099, "learning_rate": 9.75077538583168e-06, "loss": 0.6206, "step": 28240 }, { "epoch": 0.5130393723667006, "grad_norm": 0.7610458862871616, "learning_rate": 9.745014202077718e-06, "loss": 0.615, "step": 28250 }, { "epoch": 0.5132209792241755, "grad_norm": 0.7605892898515824, "learning_rate": 9.739253103010531e-06, "loss": 0.6247, "step": 28260 }, { "epoch": 0.5134025860816505, "grad_norm": 0.8023661898110745, "learning_rate": 9.733492090543505e-06, "loss": 0.6176, "step": 28270 }, { "epoch": 0.5135841929391254, "grad_norm": 0.7825099083702515, "learning_rate": 9.727731166590012e-06, "loss": 0.624, "step": 28280 }, { "epoch": 0.5137657997966003, "grad_norm": 0.7898433036989705, "learning_rate": 9.721970333063386e-06, "loss": 0.6181, "step": 28290 }, { "epoch": 0.5139474066540752, "grad_norm": 0.8057477837063364, "learning_rate": 9.71620959187694e-06, "loss": 0.6384, "step": 28300 }, { "epoch": 0.5141290135115502, "grad_norm": 0.7419736975883708, "learning_rate": 9.71044894494395e-06, "loss": 0.6178, "step": 28310 }, { "epoch": 0.5143106203690251, "grad_norm": 0.7478111974486576, "learning_rate": 9.70468839417766e-06, "loss": 0.6279, "step": 28320 }, { "epoch": 0.5144922272265001, "grad_norm": 0.7760822651640332, "learning_rate": 9.698927941491287e-06, "loss": 0.6326, "step": 28330 }, { "epoch": 0.514673834083975, "grad_norm": 0.7584523512670567, "learning_rate": 9.69316758879801e-06, "loss": 0.6143, "step": 28340 }, { "epoch": 0.51485544094145, "grad_norm": 0.7446625336797935, "learning_rate": 9.68740733801098e-06, "loss": 0.6202, "step": 28350 }, { "epoch": 0.5150370477989249, "grad_norm": 0.8189611682735353, "learning_rate": 9.681647191043305e-06, "loss": 0.6315, "step": 28360 }, { "epoch": 0.5152186546563998, "grad_norm": 0.7764311295851658, "learning_rate": 9.675887149808076e-06, "loss": 0.6363, "step": 28370 }, { "epoch": 0.5154002615138747, "grad_norm": 0.7603707910032648, "learning_rate": 9.670127216218324e-06, "loss": 0.6262, "step": 28380 }, { "epoch": 0.5155818683713497, "grad_norm": 0.7864451143700195, "learning_rate": 9.664367392187073e-06, "loss": 0.6268, "step": 28390 }, { "epoch": 0.5157634752288246, "grad_norm": 0.7486180552744881, "learning_rate": 9.658607679627282e-06, "loss": 0.6204, "step": 28400 }, { "epoch": 0.5159450820862996, "grad_norm": 0.7486358764799392, "learning_rate": 9.652848080451899e-06, "loss": 0.6203, "step": 28410 }, { "epoch": 0.5161266889437746, "grad_norm": 0.8076762899397284, "learning_rate": 9.64708859657381e-06, "loss": 0.6178, "step": 28420 }, { "epoch": 0.5163082958012495, "grad_norm": 0.798362655913005, "learning_rate": 9.641329229905889e-06, "loss": 0.6085, "step": 28430 }, { "epoch": 0.5164899026587244, "grad_norm": 0.7769972317571693, "learning_rate": 9.635569982360946e-06, "loss": 0.6069, "step": 28440 }, { "epoch": 0.5166715095161993, "grad_norm": 0.7624962550572464, "learning_rate": 9.629810855851761e-06, "loss": 0.6199, "step": 28450 }, { "epoch": 0.5168531163736743, "grad_norm": 0.803773885058879, "learning_rate": 9.624051852291085e-06, "loss": 0.6317, "step": 28460 }, { "epoch": 0.5170347232311492, "grad_norm": 0.7807343385805104, "learning_rate": 9.618292973591606e-06, "loss": 0.6138, "step": 28470 }, { "epoch": 0.5172163300886241, "grad_norm": 0.8031753476335387, "learning_rate": 9.612534221665996e-06, "loss": 0.615, "step": 28480 }, { "epoch": 0.517397936946099, "grad_norm": 0.7805122571060378, "learning_rate": 9.606775598426859e-06, "loss": 0.6264, "step": 28490 }, { "epoch": 0.5175795438035741, "grad_norm": 0.7580126830250584, "learning_rate": 9.601017105786778e-06, "loss": 0.6327, "step": 28500 }, { "epoch": 0.517761150661049, "grad_norm": 0.7590543025129922, "learning_rate": 9.595258745658278e-06, "loss": 0.6286, "step": 28510 }, { "epoch": 0.5179427575185239, "grad_norm": 0.7712473228619305, "learning_rate": 9.58950051995385e-06, "loss": 0.6312, "step": 28520 }, { "epoch": 0.5181243643759988, "grad_norm": 0.7606905648696619, "learning_rate": 9.583742430585934e-06, "loss": 0.6317, "step": 28530 }, { "epoch": 0.5183059712334738, "grad_norm": 0.7913999037974163, "learning_rate": 9.577984479466927e-06, "loss": 0.6266, "step": 28540 }, { "epoch": 0.5184875780909487, "grad_norm": 0.8439769580366927, "learning_rate": 9.57222666850918e-06, "loss": 0.6157, "step": 28550 }, { "epoch": 0.5186691849484236, "grad_norm": 0.7617797434846884, "learning_rate": 9.566468999624992e-06, "loss": 0.6264, "step": 28560 }, { "epoch": 0.5188507918058985, "grad_norm": 0.7895691182003668, "learning_rate": 9.560711474726629e-06, "loss": 0.6138, "step": 28570 }, { "epoch": 0.5190323986633736, "grad_norm": 0.7907900372009989, "learning_rate": 9.554954095726292e-06, "loss": 0.6327, "step": 28580 }, { "epoch": 0.5192140055208485, "grad_norm": 0.756396441017453, "learning_rate": 9.549196864536149e-06, "loss": 0.6238, "step": 28590 }, { "epoch": 0.5193956123783234, "grad_norm": 0.769293722338971, "learning_rate": 9.543439783068303e-06, "loss": 0.6095, "step": 28600 }, { "epoch": 0.5195772192357984, "grad_norm": 0.8001202374013867, "learning_rate": 9.537682853234825e-06, "loss": 0.6208, "step": 28610 }, { "epoch": 0.5197588260932733, "grad_norm": 0.7667732033171781, "learning_rate": 9.53192607694772e-06, "loss": 0.6318, "step": 28620 }, { "epoch": 0.5199404329507482, "grad_norm": 0.7474837041316701, "learning_rate": 9.526169456118952e-06, "loss": 0.6203, "step": 28630 }, { "epoch": 0.5201220398082231, "grad_norm": 0.7700628798448256, "learning_rate": 9.520412992660429e-06, "loss": 0.6282, "step": 28640 }, { "epoch": 0.5203036466656981, "grad_norm": 0.7521908542754546, "learning_rate": 9.514656688484004e-06, "loss": 0.6219, "step": 28650 }, { "epoch": 0.520485253523173, "grad_norm": 0.7515439909732629, "learning_rate": 9.508900545501488e-06, "loss": 0.6232, "step": 28660 }, { "epoch": 0.520666860380648, "grad_norm": 0.7809719677107992, "learning_rate": 9.503144565624622e-06, "loss": 0.629, "step": 28670 }, { "epoch": 0.520848467238123, "grad_norm": 0.8123234931250876, "learning_rate": 9.497388750765114e-06, "loss": 0.6255, "step": 28680 }, { "epoch": 0.5210300740955979, "grad_norm": 0.831235511681933, "learning_rate": 9.491633102834592e-06, "loss": 0.6266, "step": 28690 }, { "epoch": 0.5212116809530728, "grad_norm": 0.7673704047575048, "learning_rate": 9.48587762374465e-06, "loss": 0.6303, "step": 28700 }, { "epoch": 0.5213932878105477, "grad_norm": 0.7408443274173981, "learning_rate": 9.480122315406815e-06, "loss": 0.6236, "step": 28710 }, { "epoch": 0.5215748946680226, "grad_norm": 0.7692962330032803, "learning_rate": 9.474367179732561e-06, "loss": 0.6245, "step": 28720 }, { "epoch": 0.5217565015254976, "grad_norm": 0.763568757553102, "learning_rate": 9.468612218633306e-06, "loss": 0.6211, "step": 28730 }, { "epoch": 0.5219381083829725, "grad_norm": 0.8048081776422836, "learning_rate": 9.462857434020403e-06, "loss": 0.6127, "step": 28740 }, { "epoch": 0.5221197152404474, "grad_norm": 0.7645054773999246, "learning_rate": 9.457102827805155e-06, "loss": 0.6332, "step": 28750 }, { "epoch": 0.5223013220979225, "grad_norm": 0.7577250011797221, "learning_rate": 9.451348401898797e-06, "loss": 0.6215, "step": 28760 }, { "epoch": 0.5224829289553974, "grad_norm": 0.755116019318044, "learning_rate": 9.445594158212514e-06, "loss": 0.6215, "step": 28770 }, { "epoch": 0.5226645358128723, "grad_norm": 0.7712686571066073, "learning_rate": 9.43984009865742e-06, "loss": 0.6307, "step": 28780 }, { "epoch": 0.5228461426703472, "grad_norm": 0.7703443543281813, "learning_rate": 9.434086225144581e-06, "loss": 0.6249, "step": 28790 }, { "epoch": 0.5230277495278222, "grad_norm": 0.7626568036731148, "learning_rate": 9.428332539584983e-06, "loss": 0.6103, "step": 28800 }, { "epoch": 0.5232093563852971, "grad_norm": 0.7804004139715912, "learning_rate": 9.42257904388957e-06, "loss": 0.6273, "step": 28810 }, { "epoch": 0.523390963242772, "grad_norm": 0.7642474748490445, "learning_rate": 9.416825739969207e-06, "loss": 0.6136, "step": 28820 }, { "epoch": 0.5235725701002469, "grad_norm": 0.7437288059983805, "learning_rate": 9.411072629734707e-06, "loss": 0.5999, "step": 28830 }, { "epoch": 0.523754176957722, "grad_norm": 0.7837188195626034, "learning_rate": 9.405319715096808e-06, "loss": 0.6319, "step": 28840 }, { "epoch": 0.5239357838151969, "grad_norm": 0.7635830078657779, "learning_rate": 9.399566997966184e-06, "loss": 0.6329, "step": 28850 }, { "epoch": 0.5241173906726718, "grad_norm": 0.7636899072229364, "learning_rate": 9.393814480253457e-06, "loss": 0.6186, "step": 28860 }, { "epoch": 0.5242989975301467, "grad_norm": 0.7767583127662112, "learning_rate": 9.388062163869164e-06, "loss": 0.6229, "step": 28870 }, { "epoch": 0.5244806043876217, "grad_norm": 0.7239758567183049, "learning_rate": 9.382310050723794e-06, "loss": 0.6074, "step": 28880 }, { "epoch": 0.5246622112450966, "grad_norm": 0.8909589005803659, "learning_rate": 9.37655814272775e-06, "loss": 0.6374, "step": 28890 }, { "epoch": 0.5248438181025715, "grad_norm": 0.7803626087774964, "learning_rate": 9.370806441791379e-06, "loss": 0.6218, "step": 28900 }, { "epoch": 0.5250254249600464, "grad_norm": 0.7991690485469884, "learning_rate": 9.365054949824958e-06, "loss": 0.6338, "step": 28910 }, { "epoch": 0.5252070318175214, "grad_norm": 0.761801992557132, "learning_rate": 9.359303668738689e-06, "loss": 0.6133, "step": 28920 }, { "epoch": 0.5253886386749964, "grad_norm": 0.7789525060598242, "learning_rate": 9.353552600442708e-06, "loss": 0.6209, "step": 28930 }, { "epoch": 0.5255702455324713, "grad_norm": 0.7797784234195945, "learning_rate": 9.347801746847084e-06, "loss": 0.6329, "step": 28940 }, { "epoch": 0.5257518523899463, "grad_norm": 0.7538023833998073, "learning_rate": 9.342051109861807e-06, "loss": 0.6366, "step": 28950 }, { "epoch": 0.5259334592474212, "grad_norm": 0.7888231286267628, "learning_rate": 9.336300691396795e-06, "loss": 0.6174, "step": 28960 }, { "epoch": 0.5261150661048961, "grad_norm": 0.7483450148859832, "learning_rate": 9.330550493361906e-06, "loss": 0.6128, "step": 28970 }, { "epoch": 0.526296672962371, "grad_norm": 0.7706195067874098, "learning_rate": 9.324800517666904e-06, "loss": 0.6276, "step": 28980 }, { "epoch": 0.526478279819846, "grad_norm": 0.7825828774216996, "learning_rate": 9.319050766221503e-06, "loss": 0.6155, "step": 28990 }, { "epoch": 0.5266598866773209, "grad_norm": 0.7587033237709484, "learning_rate": 9.313301240935319e-06, "loss": 0.623, "step": 29000 }, { "epoch": 0.5268414935347959, "grad_norm": 0.7601841882876831, "learning_rate": 9.307551943717916e-06, "loss": 0.6252, "step": 29010 }, { "epoch": 0.5270231003922708, "grad_norm": 0.793990928721402, "learning_rate": 9.301802876478759e-06, "loss": 0.6322, "step": 29020 }, { "epoch": 0.5272047072497458, "grad_norm": 0.772073514019924, "learning_rate": 9.296054041127258e-06, "loss": 0.612, "step": 29030 }, { "epoch": 0.5273863141072207, "grad_norm": 0.7845240662242806, "learning_rate": 9.290305439572727e-06, "loss": 0.6166, "step": 29040 }, { "epoch": 0.5275679209646956, "grad_norm": 0.782267237785824, "learning_rate": 9.284557073724421e-06, "loss": 0.6211, "step": 29050 }, { "epoch": 0.5277495278221705, "grad_norm": 0.7430942180995049, "learning_rate": 9.278808945491505e-06, "loss": 0.606, "step": 29060 }, { "epoch": 0.5279311346796455, "grad_norm": 0.7440411640615793, "learning_rate": 9.273061056783057e-06, "loss": 0.6162, "step": 29070 }, { "epoch": 0.5281127415371204, "grad_norm": 0.728850723412336, "learning_rate": 9.267313409508098e-06, "loss": 0.6167, "step": 29080 }, { "epoch": 0.5282943483945953, "grad_norm": 0.741535735585185, "learning_rate": 9.26156600557555e-06, "loss": 0.618, "step": 29090 }, { "epoch": 0.5284759552520704, "grad_norm": 0.7726176802770809, "learning_rate": 9.255818846894264e-06, "loss": 0.6041, "step": 29100 }, { "epoch": 0.5286575621095453, "grad_norm": 0.7474408237754476, "learning_rate": 9.250071935373004e-06, "loss": 0.6348, "step": 29110 }, { "epoch": 0.5288391689670202, "grad_norm": 0.7845168846767036, "learning_rate": 9.244325272920455e-06, "loss": 0.6304, "step": 29120 }, { "epoch": 0.5290207758244951, "grad_norm": 0.7554983458003958, "learning_rate": 9.238578861445221e-06, "loss": 0.6146, "step": 29130 }, { "epoch": 0.5292023826819701, "grad_norm": 0.7604491802560442, "learning_rate": 9.232832702855817e-06, "loss": 0.6256, "step": 29140 }, { "epoch": 0.529383989539445, "grad_norm": 0.7868817382608225, "learning_rate": 9.22708679906068e-06, "loss": 0.6232, "step": 29150 }, { "epoch": 0.5295655963969199, "grad_norm": 0.7697783815277512, "learning_rate": 9.221341151968154e-06, "loss": 0.6257, "step": 29160 }, { "epoch": 0.5297472032543948, "grad_norm": 0.7677206619295256, "learning_rate": 9.21559576348651e-06, "loss": 0.6183, "step": 29170 }, { "epoch": 0.5299288101118699, "grad_norm": 0.7887056279454413, "learning_rate": 9.209850635523919e-06, "loss": 0.6205, "step": 29180 }, { "epoch": 0.5301104169693448, "grad_norm": 0.7596639578865956, "learning_rate": 9.204105769988481e-06, "loss": 0.6172, "step": 29190 }, { "epoch": 0.5302920238268197, "grad_norm": 0.7647927688949688, "learning_rate": 9.198361168788194e-06, "loss": 0.62, "step": 29200 }, { "epoch": 0.5304736306842947, "grad_norm": 0.7372018425667601, "learning_rate": 9.192616833830981e-06, "loss": 0.6157, "step": 29210 }, { "epoch": 0.5306552375417696, "grad_norm": 0.7873931443586031, "learning_rate": 9.186872767024663e-06, "loss": 0.6313, "step": 29220 }, { "epoch": 0.5308368443992445, "grad_norm": 0.795692780696399, "learning_rate": 9.181128970276987e-06, "loss": 0.6165, "step": 29230 }, { "epoch": 0.5310184512567194, "grad_norm": 0.7813298829394318, "learning_rate": 9.175385445495595e-06, "loss": 0.6227, "step": 29240 }, { "epoch": 0.5312000581141944, "grad_norm": 0.7918424276688278, "learning_rate": 9.169642194588055e-06, "loss": 0.6262, "step": 29250 }, { "epoch": 0.5313816649716693, "grad_norm": 0.7646774979246946, "learning_rate": 9.16389921946183e-06, "loss": 0.6144, "step": 29260 }, { "epoch": 0.5315632718291443, "grad_norm": 0.7707911393946592, "learning_rate": 9.158156522024296e-06, "loss": 0.6166, "step": 29270 }, { "epoch": 0.5317448786866192, "grad_norm": 0.7778679729221845, "learning_rate": 9.15241410418274e-06, "loss": 0.6166, "step": 29280 }, { "epoch": 0.5319264855440942, "grad_norm": 0.7397500454046197, "learning_rate": 9.146671967844351e-06, "loss": 0.624, "step": 29290 }, { "epoch": 0.5321080924015691, "grad_norm": 0.7872259662566303, "learning_rate": 9.140930114916233e-06, "loss": 0.6257, "step": 29300 }, { "epoch": 0.532289699259044, "grad_norm": 0.7495911558911852, "learning_rate": 9.135188547305384e-06, "loss": 0.6171, "step": 29310 }, { "epoch": 0.5324713061165189, "grad_norm": 0.7487481278185043, "learning_rate": 9.129447266918716e-06, "loss": 0.6198, "step": 29320 }, { "epoch": 0.5326529129739939, "grad_norm": 0.7804574034754905, "learning_rate": 9.123706275663044e-06, "loss": 0.6272, "step": 29330 }, { "epoch": 0.5328345198314688, "grad_norm": 0.7970810413754327, "learning_rate": 9.117965575445083e-06, "loss": 0.6322, "step": 29340 }, { "epoch": 0.5330161266889438, "grad_norm": 0.7814096574901659, "learning_rate": 9.11222516817146e-06, "loss": 0.6046, "step": 29350 }, { "epoch": 0.5331977335464188, "grad_norm": 0.7849750534507548, "learning_rate": 9.10648505574869e-06, "loss": 0.6128, "step": 29360 }, { "epoch": 0.5333793404038937, "grad_norm": 0.7585842530647015, "learning_rate": 9.100745240083209e-06, "loss": 0.6221, "step": 29370 }, { "epoch": 0.5335609472613686, "grad_norm": 0.7373767233720306, "learning_rate": 9.095005723081335e-06, "loss": 0.6189, "step": 29380 }, { "epoch": 0.5337425541188435, "grad_norm": 0.7542316570681615, "learning_rate": 9.089266506649305e-06, "loss": 0.6365, "step": 29390 }, { "epoch": 0.5339241609763185, "grad_norm": 0.8147457750709117, "learning_rate": 9.083527592693237e-06, "loss": 0.626, "step": 29400 }, { "epoch": 0.5341057678337934, "grad_norm": 0.8083880569203181, "learning_rate": 9.077788983119172e-06, "loss": 0.6129, "step": 29410 }, { "epoch": 0.5342873746912683, "grad_norm": 0.8090763802939416, "learning_rate": 9.072050679833027e-06, "loss": 0.6196, "step": 29420 }, { "epoch": 0.5344689815487432, "grad_norm": 0.7664728714017878, "learning_rate": 9.066312684740633e-06, "loss": 0.6256, "step": 29430 }, { "epoch": 0.5346505884062183, "grad_norm": 0.7711610603304675, "learning_rate": 9.060574999747705e-06, "loss": 0.6333, "step": 29440 }, { "epoch": 0.5348321952636932, "grad_norm": 0.757618861913886, "learning_rate": 9.054837626759874e-06, "loss": 0.6213, "step": 29450 }, { "epoch": 0.5350138021211681, "grad_norm": 0.7710781846620202, "learning_rate": 9.049100567682651e-06, "loss": 0.6196, "step": 29460 }, { "epoch": 0.535195408978643, "grad_norm": 0.7798475596280993, "learning_rate": 9.043363824421443e-06, "loss": 0.6327, "step": 29470 }, { "epoch": 0.535377015836118, "grad_norm": 0.7797151923800749, "learning_rate": 9.037627398881564e-06, "loss": 0.6307, "step": 29480 }, { "epoch": 0.5355586226935929, "grad_norm": 0.7651140250988484, "learning_rate": 9.03189129296821e-06, "loss": 0.6275, "step": 29490 }, { "epoch": 0.5357402295510678, "grad_norm": 0.7902239846988282, "learning_rate": 9.026155508586481e-06, "loss": 0.6142, "step": 29500 }, { "epoch": 0.5359218364085427, "grad_norm": 0.7809875080058903, "learning_rate": 9.020420047641365e-06, "loss": 0.6141, "step": 29510 }, { "epoch": 0.5361034432660178, "grad_norm": 0.7796684032489313, "learning_rate": 9.014684912037742e-06, "loss": 0.6509, "step": 29520 }, { "epoch": 0.5362850501234927, "grad_norm": 0.7396099291731293, "learning_rate": 9.008950103680385e-06, "loss": 0.6109, "step": 29530 }, { "epoch": 0.5364666569809676, "grad_norm": 0.7680170308870415, "learning_rate": 9.00321562447396e-06, "loss": 0.6197, "step": 29540 }, { "epoch": 0.5366482638384426, "grad_norm": 0.7637212316491998, "learning_rate": 8.997481476323021e-06, "loss": 0.6205, "step": 29550 }, { "epoch": 0.5368298706959175, "grad_norm": 0.7631422370895404, "learning_rate": 8.99174766113201e-06, "loss": 0.6244, "step": 29560 }, { "epoch": 0.5370114775533924, "grad_norm": 0.7459578755943815, "learning_rate": 8.986014180805268e-06, "loss": 0.6274, "step": 29570 }, { "epoch": 0.5371930844108673, "grad_norm": 0.780361122003118, "learning_rate": 8.980281037247013e-06, "loss": 0.6169, "step": 29580 }, { "epoch": 0.5373746912683423, "grad_norm": 0.7657795310726192, "learning_rate": 8.974548232361361e-06, "loss": 0.62, "step": 29590 }, { "epoch": 0.5375562981258172, "grad_norm": 0.7353406989427229, "learning_rate": 8.968815768052304e-06, "loss": 0.6194, "step": 29600 }, { "epoch": 0.5377379049832922, "grad_norm": 0.764485959417921, "learning_rate": 8.963083646223739e-06, "loss": 0.612, "step": 29610 }, { "epoch": 0.5379195118407671, "grad_norm": 0.7624508977451415, "learning_rate": 8.957351868779426e-06, "loss": 0.6131, "step": 29620 }, { "epoch": 0.5381011186982421, "grad_norm": 0.7461136934753141, "learning_rate": 8.951620437623034e-06, "loss": 0.6167, "step": 29630 }, { "epoch": 0.538282725555717, "grad_norm": 0.7820265482286026, "learning_rate": 8.945889354658094e-06, "loss": 0.6438, "step": 29640 }, { "epoch": 0.5384643324131919, "grad_norm": 0.765701880810642, "learning_rate": 8.940158621788047e-06, "loss": 0.6179, "step": 29650 }, { "epoch": 0.5386459392706668, "grad_norm": 0.7664695632628276, "learning_rate": 8.934428240916196e-06, "loss": 0.6179, "step": 29660 }, { "epoch": 0.5388275461281418, "grad_norm": 0.7411751401604528, "learning_rate": 8.92869821394573e-06, "loss": 0.6264, "step": 29670 }, { "epoch": 0.5390091529856167, "grad_norm": 0.8177970430509839, "learning_rate": 8.922968542779736e-06, "loss": 0.6307, "step": 29680 }, { "epoch": 0.5391907598430917, "grad_norm": 0.7620766620321953, "learning_rate": 8.917239229321162e-06, "loss": 0.6153, "step": 29690 }, { "epoch": 0.5393723667005667, "grad_norm": 0.7633215654611853, "learning_rate": 8.911510275472855e-06, "loss": 0.6291, "step": 29700 }, { "epoch": 0.5395539735580416, "grad_norm": 0.7626736132112101, "learning_rate": 8.905781683137532e-06, "loss": 0.6194, "step": 29710 }, { "epoch": 0.5397355804155165, "grad_norm": 0.7940176204480338, "learning_rate": 8.900053454217796e-06, "loss": 0.6273, "step": 29720 }, { "epoch": 0.5399171872729914, "grad_norm": 0.7783625900970746, "learning_rate": 8.894325590616122e-06, "loss": 0.6272, "step": 29730 }, { "epoch": 0.5400987941304664, "grad_norm": 0.779597365946391, "learning_rate": 8.888598094234871e-06, "loss": 0.6221, "step": 29740 }, { "epoch": 0.5402804009879413, "grad_norm": 0.7711221630283235, "learning_rate": 8.88287096697628e-06, "loss": 0.6126, "step": 29750 }, { "epoch": 0.5404620078454162, "grad_norm": 0.8103613938169784, "learning_rate": 8.877144210742455e-06, "loss": 0.6279, "step": 29760 }, { "epoch": 0.5406436147028911, "grad_norm": 0.7641647292422666, "learning_rate": 8.871417827435396e-06, "loss": 0.6312, "step": 29770 }, { "epoch": 0.5408252215603662, "grad_norm": 0.8033952115738247, "learning_rate": 8.86569181895696e-06, "loss": 0.6185, "step": 29780 }, { "epoch": 0.5410068284178411, "grad_norm": 0.771468665694071, "learning_rate": 8.859966187208898e-06, "loss": 0.6231, "step": 29790 }, { "epoch": 0.541188435275316, "grad_norm": 0.7898252795153079, "learning_rate": 8.854240934092818e-06, "loss": 0.6153, "step": 29800 }, { "epoch": 0.5413700421327909, "grad_norm": 0.7681595276333816, "learning_rate": 8.84851606151022e-06, "loss": 0.6228, "step": 29810 }, { "epoch": 0.5415516489902659, "grad_norm": 0.7727488824685629, "learning_rate": 8.84279157136246e-06, "loss": 0.6236, "step": 29820 }, { "epoch": 0.5417332558477408, "grad_norm": 0.7517907120976008, "learning_rate": 8.837067465550782e-06, "loss": 0.6161, "step": 29830 }, { "epoch": 0.5419148627052157, "grad_norm": 0.7631832923953785, "learning_rate": 8.831343745976288e-06, "loss": 0.6187, "step": 29840 }, { "epoch": 0.5420964695626906, "grad_norm": 0.8186366981403983, "learning_rate": 8.825620414539971e-06, "loss": 0.6212, "step": 29850 }, { "epoch": 0.5422780764201657, "grad_norm": 0.7774571411435269, "learning_rate": 8.819897473142677e-06, "loss": 0.6223, "step": 29860 }, { "epoch": 0.5424596832776406, "grad_norm": 0.7469457210074554, "learning_rate": 8.814174923685124e-06, "loss": 0.6173, "step": 29870 }, { "epoch": 0.5426412901351155, "grad_norm": 0.7347379383530415, "learning_rate": 8.808452768067917e-06, "loss": 0.6181, "step": 29880 }, { "epoch": 0.5428228969925905, "grad_norm": 0.7864410428788526, "learning_rate": 8.802731008191506e-06, "loss": 0.6305, "step": 29890 }, { "epoch": 0.5430045038500654, "grad_norm": 0.7733272104963337, "learning_rate": 8.79700964595623e-06, "loss": 0.6287, "step": 29900 }, { "epoch": 0.5431861107075403, "grad_norm": 0.7698060595012854, "learning_rate": 8.791288683262285e-06, "loss": 0.6248, "step": 29910 }, { "epoch": 0.5433677175650152, "grad_norm": 0.7781235610049484, "learning_rate": 8.785568122009736e-06, "loss": 0.6327, "step": 29920 }, { "epoch": 0.5435493244224902, "grad_norm": 0.7422387601286639, "learning_rate": 8.779847964098519e-06, "loss": 0.6188, "step": 29930 }, { "epoch": 0.5437309312799651, "grad_norm": 0.7564093655070953, "learning_rate": 8.774128211428429e-06, "loss": 0.633, "step": 29940 }, { "epoch": 0.5439125381374401, "grad_norm": 0.7503774830410096, "learning_rate": 8.768408865899133e-06, "loss": 0.6221, "step": 29950 }, { "epoch": 0.544094144994915, "grad_norm": 0.7854726245965756, "learning_rate": 8.762689929410156e-06, "loss": 0.6192, "step": 29960 }, { "epoch": 0.54427575185239, "grad_norm": 0.7870435903029678, "learning_rate": 8.756971403860896e-06, "loss": 0.6187, "step": 29970 }, { "epoch": 0.5444573587098649, "grad_norm": 0.7631384742626597, "learning_rate": 8.751253291150605e-06, "loss": 0.6262, "step": 29980 }, { "epoch": 0.5446389655673398, "grad_norm": 0.8027050452411225, "learning_rate": 8.745535593178407e-06, "loss": 0.6198, "step": 29990 }, { "epoch": 0.5448205724248147, "grad_norm": 0.7517804860348828, "learning_rate": 8.739818311843277e-06, "loss": 0.612, "step": 30000 }, { "epoch": 0.5450021792822897, "grad_norm": 0.7528892495197888, "learning_rate": 8.734101449044067e-06, "loss": 0.6223, "step": 30010 }, { "epoch": 0.5451837861397646, "grad_norm": 0.7522429981894864, "learning_rate": 8.728385006679475e-06, "loss": 0.6153, "step": 30020 }, { "epoch": 0.5453653929972396, "grad_norm": 0.7726402963300274, "learning_rate": 8.722668986648068e-06, "loss": 0.6152, "step": 30030 }, { "epoch": 0.5455469998547146, "grad_norm": 0.748432423398455, "learning_rate": 8.716953390848267e-06, "loss": 0.6224, "step": 30040 }, { "epoch": 0.5457286067121895, "grad_norm": 0.7963865168958043, "learning_rate": 8.711238221178362e-06, "loss": 0.624, "step": 30050 }, { "epoch": 0.5459102135696644, "grad_norm": 0.7772953490650027, "learning_rate": 8.705523479536493e-06, "loss": 0.6213, "step": 30060 }, { "epoch": 0.5460918204271393, "grad_norm": 0.7782668831419224, "learning_rate": 8.699809167820653e-06, "loss": 0.6295, "step": 30070 }, { "epoch": 0.5462734272846143, "grad_norm": 0.7815226948885513, "learning_rate": 8.69409528792871e-06, "loss": 0.621, "step": 30080 }, { "epoch": 0.5464550341420892, "grad_norm": 0.8007424807450354, "learning_rate": 8.688381841758366e-06, "loss": 0.6229, "step": 30090 }, { "epoch": 0.5466366409995641, "grad_norm": 0.7640836520216913, "learning_rate": 8.682668831207199e-06, "loss": 0.6193, "step": 30100 }, { "epoch": 0.546818247857039, "grad_norm": 0.7343127266436309, "learning_rate": 8.67695625817263e-06, "loss": 0.6243, "step": 30110 }, { "epoch": 0.5469998547145141, "grad_norm": 0.7675851959901626, "learning_rate": 8.67124412455194e-06, "loss": 0.6202, "step": 30120 }, { "epoch": 0.547181461571989, "grad_norm": 0.755225065921083, "learning_rate": 8.665532432242264e-06, "loss": 0.6249, "step": 30130 }, { "epoch": 0.5473630684294639, "grad_norm": 0.7911935427875328, "learning_rate": 8.659821183140589e-06, "loss": 0.6231, "step": 30140 }, { "epoch": 0.5475446752869388, "grad_norm": 0.7556646523240148, "learning_rate": 8.654110379143753e-06, "loss": 0.6126, "step": 30150 }, { "epoch": 0.5477262821444138, "grad_norm": 0.7746048218271495, "learning_rate": 8.648400022148446e-06, "loss": 0.6136, "step": 30160 }, { "epoch": 0.5479078890018887, "grad_norm": 0.7978978810889634, "learning_rate": 8.642690114051218e-06, "loss": 0.627, "step": 30170 }, { "epoch": 0.5480894958593636, "grad_norm": 0.7951965509844003, "learning_rate": 8.636980656748453e-06, "loss": 0.6053, "step": 30180 }, { "epoch": 0.5482711027168385, "grad_norm": 0.7752324574704978, "learning_rate": 8.63127165213641e-06, "loss": 0.6248, "step": 30190 }, { "epoch": 0.5484527095743136, "grad_norm": 0.7824338979188811, "learning_rate": 8.62556310211117e-06, "loss": 0.6279, "step": 30200 }, { "epoch": 0.5486343164317885, "grad_norm": 0.7997350576125302, "learning_rate": 8.619855008568686e-06, "loss": 0.6275, "step": 30210 }, { "epoch": 0.5488159232892634, "grad_norm": 0.7523708221743625, "learning_rate": 8.614147373404744e-06, "loss": 0.6202, "step": 30220 }, { "epoch": 0.5489975301467384, "grad_norm": 0.7606145339563231, "learning_rate": 8.608440198514987e-06, "loss": 0.6039, "step": 30230 }, { "epoch": 0.5491791370042133, "grad_norm": 0.7710952191616195, "learning_rate": 8.602733485794898e-06, "loss": 0.6331, "step": 30240 }, { "epoch": 0.5493607438616882, "grad_norm": 0.7768028632664599, "learning_rate": 8.597027237139816e-06, "loss": 0.6217, "step": 30250 }, { "epoch": 0.5495423507191631, "grad_norm": 0.7775211851638041, "learning_rate": 8.591321454444917e-06, "loss": 0.6085, "step": 30260 }, { "epoch": 0.5497239575766381, "grad_norm": 0.7519413270065118, "learning_rate": 8.585616139605223e-06, "loss": 0.6026, "step": 30270 }, { "epoch": 0.549905564434113, "grad_norm": 0.7836642188402372, "learning_rate": 8.579911294515605e-06, "loss": 0.628, "step": 30280 }, { "epoch": 0.550087171291588, "grad_norm": 0.7592937517297041, "learning_rate": 8.574206921070776e-06, "loss": 0.6195, "step": 30290 }, { "epoch": 0.5502687781490629, "grad_norm": 0.7655274830885118, "learning_rate": 8.568503021165293e-06, "loss": 0.6209, "step": 30300 }, { "epoch": 0.5504503850065379, "grad_norm": 0.7639211904496852, "learning_rate": 8.562799596693553e-06, "loss": 0.6171, "step": 30310 }, { "epoch": 0.5506319918640128, "grad_norm": 0.733288711813114, "learning_rate": 8.5570966495498e-06, "loss": 0.6237, "step": 30320 }, { "epoch": 0.5508135987214877, "grad_norm": 0.7509227670480431, "learning_rate": 8.551394181628114e-06, "loss": 0.6247, "step": 30330 }, { "epoch": 0.5509952055789626, "grad_norm": 0.7316681967442851, "learning_rate": 8.54569219482242e-06, "loss": 0.615, "step": 30340 }, { "epoch": 0.5511768124364376, "grad_norm": 0.7858913760411887, "learning_rate": 8.539990691026484e-06, "loss": 0.6103, "step": 30350 }, { "epoch": 0.5513584192939125, "grad_norm": 0.727928627799071, "learning_rate": 8.534289672133902e-06, "loss": 0.6274, "step": 30360 }, { "epoch": 0.5515400261513875, "grad_norm": 0.7467863789638532, "learning_rate": 8.528589140038124e-06, "loss": 0.6154, "step": 30370 }, { "epoch": 0.5517216330088625, "grad_norm": 0.780492758773914, "learning_rate": 8.522889096632423e-06, "loss": 0.6237, "step": 30380 }, { "epoch": 0.5519032398663374, "grad_norm": 0.7710144802655811, "learning_rate": 8.517189543809926e-06, "loss": 0.6158, "step": 30390 }, { "epoch": 0.5520848467238123, "grad_norm": 0.7359557295581519, "learning_rate": 8.51149048346358e-06, "loss": 0.6183, "step": 30400 }, { "epoch": 0.5522664535812872, "grad_norm": 0.7758862043804271, "learning_rate": 8.505791917486183e-06, "loss": 0.63, "step": 30410 }, { "epoch": 0.5524480604387622, "grad_norm": 0.7837327346567087, "learning_rate": 8.500093847770355e-06, "loss": 0.6192, "step": 30420 }, { "epoch": 0.5526296672962371, "grad_norm": 0.7611093795140489, "learning_rate": 8.494396276208569e-06, "loss": 0.629, "step": 30430 }, { "epoch": 0.552811274153712, "grad_norm": 0.791860543496828, "learning_rate": 8.48869920469311e-06, "loss": 0.6219, "step": 30440 }, { "epoch": 0.5529928810111869, "grad_norm": 0.8127366764919719, "learning_rate": 8.483002635116124e-06, "loss": 0.6304, "step": 30450 }, { "epoch": 0.553174487868662, "grad_norm": 0.7634376094375748, "learning_rate": 8.477306569369566e-06, "loss": 0.6243, "step": 30460 }, { "epoch": 0.5533560947261369, "grad_norm": 0.761591899689623, "learning_rate": 8.47161100934523e-06, "loss": 0.631, "step": 30470 }, { "epoch": 0.5535377015836118, "grad_norm": 0.7509699564344594, "learning_rate": 8.465915956934751e-06, "loss": 0.6045, "step": 30480 }, { "epoch": 0.5537193084410867, "grad_norm": 0.772058103099332, "learning_rate": 8.460221414029588e-06, "loss": 0.6129, "step": 30490 }, { "epoch": 0.5539009152985617, "grad_norm": 0.7152794610076728, "learning_rate": 8.454527382521033e-06, "loss": 0.6101, "step": 30500 }, { "epoch": 0.5540825221560366, "grad_norm": 0.7705846649978882, "learning_rate": 8.448833864300207e-06, "loss": 0.6154, "step": 30510 }, { "epoch": 0.5542641290135115, "grad_norm": 0.7722133019308678, "learning_rate": 8.443140861258061e-06, "loss": 0.6097, "step": 30520 }, { "epoch": 0.5544457358709864, "grad_norm": 0.8066172048720407, "learning_rate": 8.437448375285375e-06, "loss": 0.632, "step": 30530 }, { "epoch": 0.5546273427284615, "grad_norm": 0.7502894108204775, "learning_rate": 8.431756408272756e-06, "loss": 0.6142, "step": 30540 }, { "epoch": 0.5548089495859364, "grad_norm": 0.7696412961319602, "learning_rate": 8.426064962110646e-06, "loss": 0.6215, "step": 30550 }, { "epoch": 0.5549905564434113, "grad_norm": 0.7759608373680167, "learning_rate": 8.420374038689296e-06, "loss": 0.6194, "step": 30560 }, { "epoch": 0.5551721633008863, "grad_norm": 0.755755004232809, "learning_rate": 8.414683639898807e-06, "loss": 0.6243, "step": 30570 }, { "epoch": 0.5553537701583612, "grad_norm": 0.77966004781254, "learning_rate": 8.408993767629085e-06, "loss": 0.624, "step": 30580 }, { "epoch": 0.5555353770158361, "grad_norm": 0.8004922797141183, "learning_rate": 8.403304423769878e-06, "loss": 0.6155, "step": 30590 }, { "epoch": 0.555716983873311, "grad_norm": 0.814035738101385, "learning_rate": 8.397615610210743e-06, "loss": 0.6101, "step": 30600 }, { "epoch": 0.555898590730786, "grad_norm": 0.7505077276485755, "learning_rate": 8.391927328841076e-06, "loss": 0.6337, "step": 30610 }, { "epoch": 0.5560801975882609, "grad_norm": 0.7674195875796678, "learning_rate": 8.386239581550082e-06, "loss": 0.62, "step": 30620 }, { "epoch": 0.5562618044457359, "grad_norm": 0.7395172981159436, "learning_rate": 8.380552370226801e-06, "loss": 0.6112, "step": 30630 }, { "epoch": 0.5564434113032108, "grad_norm": 0.7665270973133974, "learning_rate": 8.374865696760084e-06, "loss": 0.6164, "step": 30640 }, { "epoch": 0.5566250181606858, "grad_norm": 0.7736430477214762, "learning_rate": 8.369179563038614e-06, "loss": 0.6159, "step": 30650 }, { "epoch": 0.5568066250181607, "grad_norm": 0.7444959561967246, "learning_rate": 8.363493970950889e-06, "loss": 0.6155, "step": 30660 }, { "epoch": 0.5569882318756356, "grad_norm": 0.7788228154801226, "learning_rate": 8.35780892238522e-06, "loss": 0.6087, "step": 30670 }, { "epoch": 0.5571698387331105, "grad_norm": 0.7148934022100976, "learning_rate": 8.352124419229755e-06, "loss": 0.6129, "step": 30680 }, { "epoch": 0.5573514455905855, "grad_norm": 0.7562201642261144, "learning_rate": 8.346440463372443e-06, "loss": 0.6143, "step": 30690 }, { "epoch": 0.5575330524480604, "grad_norm": 0.7336153907918489, "learning_rate": 8.340757056701065e-06, "loss": 0.6132, "step": 30700 }, { "epoch": 0.5577146593055354, "grad_norm": 0.7559343080781495, "learning_rate": 8.335074201103211e-06, "loss": 0.619, "step": 30710 }, { "epoch": 0.5578962661630104, "grad_norm": 0.7631541806951628, "learning_rate": 8.329391898466291e-06, "loss": 0.6243, "step": 30720 }, { "epoch": 0.5580778730204853, "grad_norm": 0.7954577922251997, "learning_rate": 8.323710150677533e-06, "loss": 0.6171, "step": 30730 }, { "epoch": 0.5582594798779602, "grad_norm": 0.7315510084068568, "learning_rate": 8.318028959623974e-06, "loss": 0.6062, "step": 30740 }, { "epoch": 0.5584410867354351, "grad_norm": 0.7978483903118245, "learning_rate": 8.312348327192476e-06, "loss": 0.6222, "step": 30750 }, { "epoch": 0.5586226935929101, "grad_norm": 0.747247999129653, "learning_rate": 8.306668255269708e-06, "loss": 0.616, "step": 30760 }, { "epoch": 0.558804300450385, "grad_norm": 0.7558564121286439, "learning_rate": 8.300988745742155e-06, "loss": 0.6262, "step": 30770 }, { "epoch": 0.5589859073078599, "grad_norm": 0.736783959672439, "learning_rate": 8.295309800496115e-06, "loss": 0.6191, "step": 30780 }, { "epoch": 0.5591675141653348, "grad_norm": 0.7875765311419195, "learning_rate": 8.289631421417703e-06, "loss": 0.6116, "step": 30790 }, { "epoch": 0.5593491210228099, "grad_norm": 0.7909062714461255, "learning_rate": 8.283953610392833e-06, "loss": 0.6159, "step": 30800 }, { "epoch": 0.5595307278802848, "grad_norm": 0.7483472795760742, "learning_rate": 8.278276369307252e-06, "loss": 0.6109, "step": 30810 }, { "epoch": 0.5597123347377597, "grad_norm": 0.7717741410599246, "learning_rate": 8.272599700046491e-06, "loss": 0.6236, "step": 30820 }, { "epoch": 0.5598939415952346, "grad_norm": 0.7681264628988367, "learning_rate": 8.26692360449592e-06, "loss": 0.6191, "step": 30830 }, { "epoch": 0.5600755484527096, "grad_norm": 0.7552726537792713, "learning_rate": 8.26124808454069e-06, "loss": 0.6208, "step": 30840 }, { "epoch": 0.5602571553101845, "grad_norm": 0.7495489711803623, "learning_rate": 8.255573142065784e-06, "loss": 0.6133, "step": 30850 }, { "epoch": 0.5604387621676594, "grad_norm": 0.7731601869390657, "learning_rate": 8.24989877895598e-06, "loss": 0.6298, "step": 30860 }, { "epoch": 0.5606203690251343, "grad_norm": 0.7828446861648183, "learning_rate": 8.244224997095863e-06, "loss": 0.6074, "step": 30870 }, { "epoch": 0.5608019758826094, "grad_norm": 0.8136964759871286, "learning_rate": 8.238551798369834e-06, "loss": 0.6391, "step": 30880 }, { "epoch": 0.5609835827400843, "grad_norm": 0.7563455902513433, "learning_rate": 8.232879184662095e-06, "loss": 0.6043, "step": 30890 }, { "epoch": 0.5611651895975592, "grad_norm": 0.7571909066183155, "learning_rate": 8.227207157856654e-06, "loss": 0.6241, "step": 30900 }, { "epoch": 0.5613467964550342, "grad_norm": 0.7462305779243927, "learning_rate": 8.221535719837323e-06, "loss": 0.6211, "step": 30910 }, { "epoch": 0.5615284033125091, "grad_norm": 0.7592161274359435, "learning_rate": 8.215864872487722e-06, "loss": 0.6285, "step": 30920 }, { "epoch": 0.561710010169984, "grad_norm": 0.7836782487687558, "learning_rate": 8.21019461769127e-06, "loss": 0.6195, "step": 30930 }, { "epoch": 0.5618916170274589, "grad_norm": 0.755013514718969, "learning_rate": 8.204524957331194e-06, "loss": 0.624, "step": 30940 }, { "epoch": 0.5620732238849339, "grad_norm": 0.7694885727268082, "learning_rate": 8.19885589329052e-06, "loss": 0.6075, "step": 30950 }, { "epoch": 0.5622548307424088, "grad_norm": 0.7730100247517175, "learning_rate": 8.193187427452076e-06, "loss": 0.6267, "step": 30960 }, { "epoch": 0.5624364375998838, "grad_norm": 0.7575542193113762, "learning_rate": 8.187519561698496e-06, "loss": 0.617, "step": 30970 }, { "epoch": 0.5626180444573587, "grad_norm": 0.7451299310481434, "learning_rate": 8.181852297912205e-06, "loss": 0.6193, "step": 30980 }, { "epoch": 0.5627996513148337, "grad_norm": 0.7753368740805455, "learning_rate": 8.176185637975443e-06, "loss": 0.6141, "step": 30990 }, { "epoch": 0.5629812581723086, "grad_norm": 0.7534085560291613, "learning_rate": 8.170519583770233e-06, "loss": 0.6232, "step": 31000 }, { "epoch": 0.5631628650297835, "grad_norm": 0.7947699073288483, "learning_rate": 8.16485413717841e-06, "loss": 0.6117, "step": 31010 }, { "epoch": 0.5633444718872584, "grad_norm": 0.7523062561127537, "learning_rate": 8.159189300081596e-06, "loss": 0.6069, "step": 31020 }, { "epoch": 0.5635260787447334, "grad_norm": 0.7516435744486174, "learning_rate": 8.153525074361223e-06, "loss": 0.6224, "step": 31030 }, { "epoch": 0.5637076856022083, "grad_norm": 0.7729324636322821, "learning_rate": 8.147861461898508e-06, "loss": 0.6149, "step": 31040 }, { "epoch": 0.5638892924596832, "grad_norm": 0.7488200535914336, "learning_rate": 8.142198464574476e-06, "loss": 0.6148, "step": 31050 }, { "epoch": 0.5640708993171583, "grad_norm": 0.7497158750404743, "learning_rate": 8.136536084269935e-06, "loss": 0.6193, "step": 31060 }, { "epoch": 0.5642525061746332, "grad_norm": 0.7704240458616062, "learning_rate": 8.130874322865494e-06, "loss": 0.6221, "step": 31070 }, { "epoch": 0.5644341130321081, "grad_norm": 0.7738097829319375, "learning_rate": 8.12521318224156e-06, "loss": 0.6251, "step": 31080 }, { "epoch": 0.564615719889583, "grad_norm": 0.7904450533165651, "learning_rate": 8.119552664278331e-06, "loss": 0.6248, "step": 31090 }, { "epoch": 0.564797326747058, "grad_norm": 0.7789056086793767, "learning_rate": 8.113892770855797e-06, "loss": 0.6305, "step": 31100 }, { "epoch": 0.5649789336045329, "grad_norm": 0.7759309508103815, "learning_rate": 8.10823350385374e-06, "loss": 0.6161, "step": 31110 }, { "epoch": 0.5651605404620078, "grad_norm": 0.7722303545784086, "learning_rate": 8.102574865151739e-06, "loss": 0.6202, "step": 31120 }, { "epoch": 0.5653421473194827, "grad_norm": 0.7782152106639679, "learning_rate": 8.096916856629157e-06, "loss": 0.618, "step": 31130 }, { "epoch": 0.5655237541769578, "grad_norm": 0.7404656968186237, "learning_rate": 8.091259480165154e-06, "loss": 0.6046, "step": 31140 }, { "epoch": 0.5657053610344327, "grad_norm": 0.7784295418082765, "learning_rate": 8.085602737638674e-06, "loss": 0.607, "step": 31150 }, { "epoch": 0.5658869678919076, "grad_norm": 0.7595043338310725, "learning_rate": 8.079946630928459e-06, "loss": 0.6202, "step": 31160 }, { "epoch": 0.5660685747493825, "grad_norm": 0.7561624178815989, "learning_rate": 8.074291161913033e-06, "loss": 0.6299, "step": 31170 }, { "epoch": 0.5662501816068575, "grad_norm": 0.7844013935229746, "learning_rate": 8.068636332470706e-06, "loss": 0.6179, "step": 31180 }, { "epoch": 0.5664317884643324, "grad_norm": 0.7613587486751354, "learning_rate": 8.062982144479583e-06, "loss": 0.6135, "step": 31190 }, { "epoch": 0.5666133953218073, "grad_norm": 0.7399049998582832, "learning_rate": 8.057328599817552e-06, "loss": 0.6175, "step": 31200 }, { "epoch": 0.5667950021792822, "grad_norm": 0.7612107675426315, "learning_rate": 8.051675700362292e-06, "loss": 0.622, "step": 31210 }, { "epoch": 0.5669766090367572, "grad_norm": 0.7363153131159599, "learning_rate": 8.046023447991255e-06, "loss": 0.6109, "step": 31220 }, { "epoch": 0.5671582158942322, "grad_norm": 0.7668819316252853, "learning_rate": 8.040371844581694e-06, "loss": 0.6126, "step": 31230 }, { "epoch": 0.5673398227517071, "grad_norm": 0.7774574111210477, "learning_rate": 8.034720892010635e-06, "loss": 0.6264, "step": 31240 }, { "epoch": 0.5675214296091821, "grad_norm": 0.7632809932742232, "learning_rate": 8.029070592154894e-06, "loss": 0.621, "step": 31250 }, { "epoch": 0.567703036466657, "grad_norm": 0.7919068664397775, "learning_rate": 8.023420946891072e-06, "loss": 0.6091, "step": 31260 }, { "epoch": 0.5678846433241319, "grad_norm": 0.7325732487812145, "learning_rate": 8.017771958095538e-06, "loss": 0.6279, "step": 31270 }, { "epoch": 0.5680662501816068, "grad_norm": 0.7727387557477273, "learning_rate": 8.012123627644462e-06, "loss": 0.6315, "step": 31280 }, { "epoch": 0.5682478570390818, "grad_norm": 0.7389520100585053, "learning_rate": 8.006475957413787e-06, "loss": 0.6235, "step": 31290 }, { "epoch": 0.5684294638965567, "grad_norm": 0.7547379555371162, "learning_rate": 8.000828949279234e-06, "loss": 0.6334, "step": 31300 }, { "epoch": 0.5686110707540317, "grad_norm": 0.7690219150792764, "learning_rate": 7.995182605116307e-06, "loss": 0.6111, "step": 31310 }, { "epoch": 0.5687926776115066, "grad_norm": 0.726111725387462, "learning_rate": 7.989536926800292e-06, "loss": 0.6158, "step": 31320 }, { "epoch": 0.5689742844689816, "grad_norm": 0.8164347472057074, "learning_rate": 7.98389191620625e-06, "loss": 0.612, "step": 31330 }, { "epoch": 0.5691558913264565, "grad_norm": 0.752214125064388, "learning_rate": 7.978247575209022e-06, "loss": 0.6146, "step": 31340 }, { "epoch": 0.5693374981839314, "grad_norm": 0.7712245700547763, "learning_rate": 7.972603905683223e-06, "loss": 0.6115, "step": 31350 }, { "epoch": 0.5695191050414063, "grad_norm": 0.7656879219328606, "learning_rate": 7.966960909503252e-06, "loss": 0.6002, "step": 31360 }, { "epoch": 0.5697007118988813, "grad_norm": 0.7333913632546085, "learning_rate": 7.961318588543281e-06, "loss": 0.6248, "step": 31370 }, { "epoch": 0.5698823187563562, "grad_norm": 0.7475603465183839, "learning_rate": 7.955676944677252e-06, "loss": 0.6277, "step": 31380 }, { "epoch": 0.5700639256138311, "grad_norm": 0.7557231395948714, "learning_rate": 7.950035979778895e-06, "loss": 0.6271, "step": 31390 }, { "epoch": 0.5702455324713062, "grad_norm": 0.731463048647602, "learning_rate": 7.944395695721696e-06, "loss": 0.6132, "step": 31400 }, { "epoch": 0.5704271393287811, "grad_norm": 0.769533316028413, "learning_rate": 7.938756094378936e-06, "loss": 0.6184, "step": 31410 }, { "epoch": 0.570608746186256, "grad_norm": 0.7314040448259184, "learning_rate": 7.933117177623653e-06, "loss": 0.616, "step": 31420 }, { "epoch": 0.5707903530437309, "grad_norm": 0.8163437209818287, "learning_rate": 7.92747894732867e-06, "loss": 0.6249, "step": 31430 }, { "epoch": 0.5709719599012059, "grad_norm": 0.7521402721604944, "learning_rate": 7.921841405366566e-06, "loss": 0.6142, "step": 31440 }, { "epoch": 0.5711535667586808, "grad_norm": 0.7970020537767576, "learning_rate": 7.91620455360971e-06, "loss": 0.6115, "step": 31450 }, { "epoch": 0.5713351736161557, "grad_norm": 0.7782957359204059, "learning_rate": 7.910568393930232e-06, "loss": 0.6133, "step": 31460 }, { "epoch": 0.5715167804736306, "grad_norm": 0.7425690712157623, "learning_rate": 7.904932928200022e-06, "loss": 0.6282, "step": 31470 }, { "epoch": 0.5716983873311057, "grad_norm": 0.7476848280599706, "learning_rate": 7.899298158290763e-06, "loss": 0.6107, "step": 31480 }, { "epoch": 0.5718799941885806, "grad_norm": 0.7732615361201025, "learning_rate": 7.893664086073889e-06, "loss": 0.6289, "step": 31490 }, { "epoch": 0.5720616010460555, "grad_norm": 0.7417427074923302, "learning_rate": 7.888030713420608e-06, "loss": 0.6132, "step": 31500 }, { "epoch": 0.5722432079035304, "grad_norm": 0.7800211722973234, "learning_rate": 7.882398042201896e-06, "loss": 0.622, "step": 31510 }, { "epoch": 0.5724248147610054, "grad_norm": 0.757618635015025, "learning_rate": 7.876766074288495e-06, "loss": 0.6184, "step": 31520 }, { "epoch": 0.5726064216184803, "grad_norm": 0.7848787582455539, "learning_rate": 7.871134811550913e-06, "loss": 0.617, "step": 31530 }, { "epoch": 0.5727880284759552, "grad_norm": 0.8014139753219958, "learning_rate": 7.865504255859426e-06, "loss": 0.6145, "step": 31540 }, { "epoch": 0.5729696353334301, "grad_norm": 0.7783264512754405, "learning_rate": 7.859874409084074e-06, "loss": 0.6304, "step": 31550 }, { "epoch": 0.5731512421909051, "grad_norm": 0.8020385494660885, "learning_rate": 7.854245273094659e-06, "loss": 0.6115, "step": 31560 }, { "epoch": 0.5733328490483801, "grad_norm": 0.7331600157437301, "learning_rate": 7.848616849760753e-06, "loss": 0.6166, "step": 31570 }, { "epoch": 0.573514455905855, "grad_norm": 0.760035808556449, "learning_rate": 7.84298914095168e-06, "loss": 0.6064, "step": 31580 }, { "epoch": 0.57369606276333, "grad_norm": 0.763951959244978, "learning_rate": 7.837362148536543e-06, "loss": 0.6231, "step": 31590 }, { "epoch": 0.5738776696208049, "grad_norm": 0.7537972496032576, "learning_rate": 7.831735874384189e-06, "loss": 0.6197, "step": 31600 }, { "epoch": 0.5740592764782798, "grad_norm": 0.7374233870620666, "learning_rate": 7.826110320363248e-06, "loss": 0.6095, "step": 31610 }, { "epoch": 0.5742408833357547, "grad_norm": 0.7653681437937963, "learning_rate": 7.820485488342085e-06, "loss": 0.6095, "step": 31620 }, { "epoch": 0.5744224901932297, "grad_norm": 0.7849071038328569, "learning_rate": 7.814861380188852e-06, "loss": 0.618, "step": 31630 }, { "epoch": 0.5746040970507046, "grad_norm": 0.7687627206232255, "learning_rate": 7.809237997771435e-06, "loss": 0.6043, "step": 31640 }, { "epoch": 0.5747857039081796, "grad_norm": 0.7533295204660895, "learning_rate": 7.8036153429575e-06, "loss": 0.6109, "step": 31650 }, { "epoch": 0.5749673107656545, "grad_norm": 0.7526095388231681, "learning_rate": 7.79799341761446e-06, "loss": 0.6184, "step": 31660 }, { "epoch": 0.5751489176231295, "grad_norm": 0.7680927367712181, "learning_rate": 7.792372223609483e-06, "loss": 0.6142, "step": 31670 }, { "epoch": 0.5753305244806044, "grad_norm": 0.7860645777017797, "learning_rate": 7.786751762809508e-06, "loss": 0.6134, "step": 31680 }, { "epoch": 0.5755121313380793, "grad_norm": 0.7957477692443371, "learning_rate": 7.781132037081215e-06, "loss": 0.6049, "step": 31690 }, { "epoch": 0.5756937381955542, "grad_norm": 0.7638488937842521, "learning_rate": 7.775513048291053e-06, "loss": 0.6276, "step": 31700 }, { "epoch": 0.5758753450530292, "grad_norm": 0.7272969145185059, "learning_rate": 7.769894798305217e-06, "loss": 0.6205, "step": 31710 }, { "epoch": 0.5760569519105041, "grad_norm": 0.7397841945449902, "learning_rate": 7.764277288989659e-06, "loss": 0.62, "step": 31720 }, { "epoch": 0.576238558767979, "grad_norm": 0.7739606165524868, "learning_rate": 7.758660522210086e-06, "loss": 0.6216, "step": 31730 }, { "epoch": 0.5764201656254541, "grad_norm": 0.7551121960293175, "learning_rate": 7.753044499831959e-06, "loss": 0.6209, "step": 31740 }, { "epoch": 0.576601772482929, "grad_norm": 0.8119366420694633, "learning_rate": 7.747429223720489e-06, "loss": 0.6196, "step": 31750 }, { "epoch": 0.5767833793404039, "grad_norm": 0.7595154823419229, "learning_rate": 7.741814695740646e-06, "loss": 0.5988, "step": 31760 }, { "epoch": 0.5769649861978788, "grad_norm": 0.7620084067090794, "learning_rate": 7.736200917757143e-06, "loss": 0.5975, "step": 31770 }, { "epoch": 0.5771465930553538, "grad_norm": 0.7558314870060701, "learning_rate": 7.730587891634442e-06, "loss": 0.6072, "step": 31780 }, { "epoch": 0.5773281999128287, "grad_norm": 0.7804226404160579, "learning_rate": 7.724975619236771e-06, "loss": 0.6117, "step": 31790 }, { "epoch": 0.5775098067703036, "grad_norm": 0.7731262296638466, "learning_rate": 7.719364102428089e-06, "loss": 0.6162, "step": 31800 }, { "epoch": 0.5776914136277785, "grad_norm": 0.7696929107830797, "learning_rate": 7.713753343072119e-06, "loss": 0.6177, "step": 31810 }, { "epoch": 0.5778730204852536, "grad_norm": 0.7654905039566557, "learning_rate": 7.708143343032319e-06, "loss": 0.6174, "step": 31820 }, { "epoch": 0.5780546273427285, "grad_norm": 0.7731729544473892, "learning_rate": 7.702534104171909e-06, "loss": 0.6197, "step": 31830 }, { "epoch": 0.5782362342002034, "grad_norm": 0.7801718599305456, "learning_rate": 7.696925628353842e-06, "loss": 0.6028, "step": 31840 }, { "epoch": 0.5784178410576784, "grad_norm": 0.759432701023911, "learning_rate": 7.691317917440832e-06, "loss": 0.6121, "step": 31850 }, { "epoch": 0.5785994479151533, "grad_norm": 0.7573726935449524, "learning_rate": 7.685710973295326e-06, "loss": 0.6209, "step": 31860 }, { "epoch": 0.5787810547726282, "grad_norm": 0.7847843678354159, "learning_rate": 7.680104797779518e-06, "loss": 0.6175, "step": 31870 }, { "epoch": 0.5789626616301031, "grad_norm": 0.7537351683811736, "learning_rate": 7.674499392755359e-06, "loss": 0.6111, "step": 31880 }, { "epoch": 0.579144268487578, "grad_norm": 0.7489736562354088, "learning_rate": 7.668894760084531e-06, "loss": 0.6146, "step": 31890 }, { "epoch": 0.579325875345053, "grad_norm": 0.744401693032038, "learning_rate": 7.663290901628467e-06, "loss": 0.6098, "step": 31900 }, { "epoch": 0.579507482202528, "grad_norm": 0.7669202897360379, "learning_rate": 7.657687819248334e-06, "loss": 0.6083, "step": 31910 }, { "epoch": 0.5796890890600029, "grad_norm": 0.738850500075608, "learning_rate": 7.652085514805055e-06, "loss": 0.6083, "step": 31920 }, { "epoch": 0.5798706959174779, "grad_norm": 0.7512863713484491, "learning_rate": 7.646483990159281e-06, "loss": 0.6227, "step": 31930 }, { "epoch": 0.5800523027749528, "grad_norm": 0.7627633476935757, "learning_rate": 7.64088324717141e-06, "loss": 0.6192, "step": 31940 }, { "epoch": 0.5802339096324277, "grad_norm": 0.7340693661811512, "learning_rate": 7.635283287701583e-06, "loss": 0.6118, "step": 31950 }, { "epoch": 0.5804155164899026, "grad_norm": 0.7874893515534199, "learning_rate": 7.629684113609678e-06, "loss": 0.6031, "step": 31960 }, { "epoch": 0.5805971233473776, "grad_norm": 0.754376004975035, "learning_rate": 7.624085726755313e-06, "loss": 0.6261, "step": 31970 }, { "epoch": 0.5807787302048525, "grad_norm": 0.7454072135811399, "learning_rate": 7.618488128997837e-06, "loss": 0.6176, "step": 31980 }, { "epoch": 0.5809603370623275, "grad_norm": 0.7592946135671067, "learning_rate": 7.612891322196353e-06, "loss": 0.6179, "step": 31990 }, { "epoch": 0.5811419439198025, "grad_norm": 0.7986518947182593, "learning_rate": 7.607295308209681e-06, "loss": 0.6227, "step": 32000 }, { "epoch": 0.5813235507772774, "grad_norm": 0.7535045962206661, "learning_rate": 7.601700088896401e-06, "loss": 0.6142, "step": 32010 }, { "epoch": 0.5815051576347523, "grad_norm": 0.7683450240654415, "learning_rate": 7.596105666114804e-06, "loss": 0.6109, "step": 32020 }, { "epoch": 0.5816867644922272, "grad_norm": 0.7806826642384206, "learning_rate": 7.590512041722941e-06, "loss": 0.6134, "step": 32030 }, { "epoch": 0.5818683713497022, "grad_norm": 0.7459777925063231, "learning_rate": 7.584919217578577e-06, "loss": 0.625, "step": 32040 }, { "epoch": 0.5820499782071771, "grad_norm": 0.7917610611810282, "learning_rate": 7.579327195539226e-06, "loss": 0.6218, "step": 32050 }, { "epoch": 0.582231585064652, "grad_norm": 0.7676969542416314, "learning_rate": 7.573735977462129e-06, "loss": 0.6161, "step": 32060 }, { "epoch": 0.5824131919221269, "grad_norm": 0.7488559275840403, "learning_rate": 7.568145565204253e-06, "loss": 0.6351, "step": 32070 }, { "epoch": 0.582594798779602, "grad_norm": 0.7617175757571196, "learning_rate": 7.562555960622314e-06, "loss": 0.6284, "step": 32080 }, { "epoch": 0.5827764056370769, "grad_norm": 0.8024974324720653, "learning_rate": 7.5569671655727485e-06, "loss": 0.6287, "step": 32090 }, { "epoch": 0.5829580124945518, "grad_norm": 0.7425801680837948, "learning_rate": 7.551379181911727e-06, "loss": 0.6228, "step": 32100 }, { "epoch": 0.5831396193520267, "grad_norm": 0.7850889709390375, "learning_rate": 7.545792011495146e-06, "loss": 0.6172, "step": 32110 }, { "epoch": 0.5833212262095017, "grad_norm": 0.7890241852491409, "learning_rate": 7.540205656178642e-06, "loss": 0.6128, "step": 32120 }, { "epoch": 0.5835028330669766, "grad_norm": 0.7683450919427972, "learning_rate": 7.5346201178175704e-06, "loss": 0.6117, "step": 32130 }, { "epoch": 0.5836844399244515, "grad_norm": 0.7702692221011976, "learning_rate": 7.529035398267021e-06, "loss": 0.6062, "step": 32140 }, { "epoch": 0.5838660467819264, "grad_norm": 0.7504311219225608, "learning_rate": 7.523451499381809e-06, "loss": 0.6077, "step": 32150 }, { "epoch": 0.5840476536394015, "grad_norm": 0.7573466351159333, "learning_rate": 7.517868423016482e-06, "loss": 0.6238, "step": 32160 }, { "epoch": 0.5842292604968764, "grad_norm": 0.768750009742155, "learning_rate": 7.512286171025309e-06, "loss": 0.6089, "step": 32170 }, { "epoch": 0.5844108673543513, "grad_norm": 0.7620451964210927, "learning_rate": 7.506704745262282e-06, "loss": 0.6175, "step": 32180 }, { "epoch": 0.5845924742118263, "grad_norm": 0.9058477629258435, "learning_rate": 7.501124147581131e-06, "loss": 0.6052, "step": 32190 }, { "epoch": 0.5847740810693012, "grad_norm": 0.7274989463318499, "learning_rate": 7.495544379835298e-06, "loss": 0.6101, "step": 32200 }, { "epoch": 0.5849556879267761, "grad_norm": 0.7475766288966762, "learning_rate": 7.489965443877958e-06, "loss": 0.6231, "step": 32210 }, { "epoch": 0.585137294784251, "grad_norm": 0.7655021969416996, "learning_rate": 7.4843873415620026e-06, "loss": 0.6017, "step": 32220 }, { "epoch": 0.585318901641726, "grad_norm": 0.7848831568128652, "learning_rate": 7.478810074740057e-06, "loss": 0.6155, "step": 32230 }, { "epoch": 0.5855005084992009, "grad_norm": 0.807945898063571, "learning_rate": 7.473233645264456e-06, "loss": 0.6163, "step": 32240 }, { "epoch": 0.5856821153566759, "grad_norm": 0.7982965022590002, "learning_rate": 7.467658054987268e-06, "loss": 0.6082, "step": 32250 }, { "epoch": 0.5858637222141508, "grad_norm": 0.7531277203061919, "learning_rate": 7.462083305760271e-06, "loss": 0.613, "step": 32260 }, { "epoch": 0.5860453290716258, "grad_norm": 0.8057865919804604, "learning_rate": 7.456509399434979e-06, "loss": 0.6232, "step": 32270 }, { "epoch": 0.5862269359291007, "grad_norm": 0.7458134735801518, "learning_rate": 7.450936337862609e-06, "loss": 0.6194, "step": 32280 }, { "epoch": 0.5864085427865756, "grad_norm": 0.7465561342820882, "learning_rate": 7.4453641228941085e-06, "loss": 0.6195, "step": 32290 }, { "epoch": 0.5865901496440505, "grad_norm": 0.7395158194712795, "learning_rate": 7.439792756380141e-06, "loss": 0.6121, "step": 32300 }, { "epoch": 0.5867717565015255, "grad_norm": 0.773871093470431, "learning_rate": 7.434222240171087e-06, "loss": 0.614, "step": 32310 }, { "epoch": 0.5869533633590004, "grad_norm": 0.7932894735667756, "learning_rate": 7.428652576117048e-06, "loss": 0.6195, "step": 32320 }, { "epoch": 0.5871349702164754, "grad_norm": 0.751897487552208, "learning_rate": 7.423083766067839e-06, "loss": 0.6123, "step": 32330 }, { "epoch": 0.5873165770739504, "grad_norm": 0.7524355187527606, "learning_rate": 7.4175158118729915e-06, "loss": 0.6219, "step": 32340 }, { "epoch": 0.5874981839314253, "grad_norm": 0.8100645899445227, "learning_rate": 7.4119487153817534e-06, "loss": 0.6067, "step": 32350 }, { "epoch": 0.5876797907889002, "grad_norm": 0.7334010187872636, "learning_rate": 7.406382478443092e-06, "loss": 0.6123, "step": 32360 }, { "epoch": 0.5878613976463751, "grad_norm": 0.751227627426095, "learning_rate": 7.400817102905684e-06, "loss": 0.6261, "step": 32370 }, { "epoch": 0.58804300450385, "grad_norm": 0.7681660683848066, "learning_rate": 7.395252590617915e-06, "loss": 0.6195, "step": 32380 }, { "epoch": 0.588224611361325, "grad_norm": 0.7691633381895461, "learning_rate": 7.3896889434279e-06, "loss": 0.6131, "step": 32390 }, { "epoch": 0.5884062182187999, "grad_norm": 0.7799136375171192, "learning_rate": 7.384126163183446e-06, "loss": 0.6138, "step": 32400 }, { "epoch": 0.5885878250762748, "grad_norm": 0.7755217570065788, "learning_rate": 7.378564251732093e-06, "loss": 0.6124, "step": 32410 }, { "epoch": 0.5887694319337499, "grad_norm": 0.749551465693579, "learning_rate": 7.373003210921075e-06, "loss": 0.6027, "step": 32420 }, { "epoch": 0.5889510387912248, "grad_norm": 0.768770194252687, "learning_rate": 7.36744304259735e-06, "loss": 0.6125, "step": 32430 }, { "epoch": 0.5891326456486997, "grad_norm": 0.7479860346485665, "learning_rate": 7.361883748607575e-06, "loss": 0.6065, "step": 32440 }, { "epoch": 0.5893142525061746, "grad_norm": 0.800133895482771, "learning_rate": 7.3563253307981265e-06, "loss": 0.6145, "step": 32450 }, { "epoch": 0.5894958593636496, "grad_norm": 0.7236402213708548, "learning_rate": 7.3507677910150795e-06, "loss": 0.6216, "step": 32460 }, { "epoch": 0.5896774662211245, "grad_norm": 0.758155290025572, "learning_rate": 7.345211131104233e-06, "loss": 0.615, "step": 32470 }, { "epoch": 0.5898590730785994, "grad_norm": 0.7337760164099163, "learning_rate": 7.339655352911076e-06, "loss": 0.6081, "step": 32480 }, { "epoch": 0.5900406799360743, "grad_norm": 0.7852980884851121, "learning_rate": 7.334100458280815e-06, "loss": 0.6212, "step": 32490 }, { "epoch": 0.5902222867935494, "grad_norm": 0.7442188254837167, "learning_rate": 7.328546449058363e-06, "loss": 0.6166, "step": 32500 }, { "epoch": 0.5904038936510243, "grad_norm": 0.7630048038100238, "learning_rate": 7.322993327088332e-06, "loss": 0.6061, "step": 32510 }, { "epoch": 0.5905855005084992, "grad_norm": 0.7619409597367293, "learning_rate": 7.31744109421505e-06, "loss": 0.6237, "step": 32520 }, { "epoch": 0.5907671073659742, "grad_norm": 0.8348723166860695, "learning_rate": 7.311889752282539e-06, "loss": 0.62, "step": 32530 }, { "epoch": 0.5909487142234491, "grad_norm": 0.7998289785343392, "learning_rate": 7.306339303134533e-06, "loss": 0.6259, "step": 32540 }, { "epoch": 0.591130321080924, "grad_norm": 0.7547792309749168, "learning_rate": 7.300789748614464e-06, "loss": 0.6231, "step": 32550 }, { "epoch": 0.5913119279383989, "grad_norm": 0.7626111515381085, "learning_rate": 7.295241090565475e-06, "loss": 0.6114, "step": 32560 }, { "epoch": 0.5914935347958739, "grad_norm": 0.7830029134529124, "learning_rate": 7.289693330830401e-06, "loss": 0.6056, "step": 32570 }, { "epoch": 0.5916751416533488, "grad_norm": 0.7561224691884693, "learning_rate": 7.284146471251779e-06, "loss": 0.6127, "step": 32580 }, { "epoch": 0.5918567485108238, "grad_norm": 0.7870780337834292, "learning_rate": 7.27860051367186e-06, "loss": 0.6269, "step": 32590 }, { "epoch": 0.5920383553682987, "grad_norm": 0.7462977398868377, "learning_rate": 7.273055459932578e-06, "loss": 0.6158, "step": 32600 }, { "epoch": 0.5922199622257737, "grad_norm": 0.7442475467069031, "learning_rate": 7.267511311875586e-06, "loss": 0.6009, "step": 32610 }, { "epoch": 0.5924015690832486, "grad_norm": 0.7513506182666835, "learning_rate": 7.261968071342215e-06, "loss": 0.6073, "step": 32620 }, { "epoch": 0.5925831759407235, "grad_norm": 0.760632163838566, "learning_rate": 7.256425740173514e-06, "loss": 0.6222, "step": 32630 }, { "epoch": 0.5927647827981984, "grad_norm": 0.7613004614440709, "learning_rate": 7.250884320210212e-06, "loss": 0.618, "step": 32640 }, { "epoch": 0.5929463896556734, "grad_norm": 0.790656249856218, "learning_rate": 7.245343813292754e-06, "loss": 0.614, "step": 32650 }, { "epoch": 0.5931279965131483, "grad_norm": 0.7681148190402085, "learning_rate": 7.239804221261265e-06, "loss": 0.6214, "step": 32660 }, { "epoch": 0.5933096033706233, "grad_norm": 0.7659890962383296, "learning_rate": 7.234265545955581e-06, "loss": 0.6258, "step": 32670 }, { "epoch": 0.5934912102280983, "grad_norm": 0.7496182741783064, "learning_rate": 7.22872778921522e-06, "loss": 0.6123, "step": 32680 }, { "epoch": 0.5936728170855732, "grad_norm": 0.7378787576682893, "learning_rate": 7.223190952879402e-06, "loss": 0.6058, "step": 32690 }, { "epoch": 0.5938544239430481, "grad_norm": 0.7612440017246356, "learning_rate": 7.217655038787041e-06, "loss": 0.6161, "step": 32700 }, { "epoch": 0.594036030800523, "grad_norm": 0.7890778883918608, "learning_rate": 7.212120048776745e-06, "loss": 0.609, "step": 32710 }, { "epoch": 0.594217637657998, "grad_norm": 0.7848446331603675, "learning_rate": 7.206585984686814e-06, "loss": 0.6186, "step": 32720 }, { "epoch": 0.5943992445154729, "grad_norm": 0.7426687000175539, "learning_rate": 7.201052848355238e-06, "loss": 0.6019, "step": 32730 }, { "epoch": 0.5945808513729478, "grad_norm": 0.7283803366614341, "learning_rate": 7.1955206416197035e-06, "loss": 0.6021, "step": 32740 }, { "epoch": 0.5947624582304227, "grad_norm": 0.7425965965405857, "learning_rate": 7.189989366317584e-06, "loss": 0.6025, "step": 32750 }, { "epoch": 0.5949440650878978, "grad_norm": 0.7482099334881874, "learning_rate": 7.184459024285952e-06, "loss": 0.6021, "step": 32760 }, { "epoch": 0.5951256719453727, "grad_norm": 0.7758719935798921, "learning_rate": 7.17892961736156e-06, "loss": 0.6054, "step": 32770 }, { "epoch": 0.5953072788028476, "grad_norm": 0.7377077578424965, "learning_rate": 7.1734011473808496e-06, "loss": 0.6068, "step": 32780 }, { "epoch": 0.5954888856603225, "grad_norm": 0.7603843075058757, "learning_rate": 7.167873616179964e-06, "loss": 0.6244, "step": 32790 }, { "epoch": 0.5956704925177975, "grad_norm": 0.7731461177517821, "learning_rate": 7.162347025594714e-06, "loss": 0.6157, "step": 32800 }, { "epoch": 0.5958520993752724, "grad_norm": 0.7386728838057953, "learning_rate": 7.156821377460626e-06, "loss": 0.629, "step": 32810 }, { "epoch": 0.5960337062327473, "grad_norm": 0.753658695634063, "learning_rate": 7.151296673612882e-06, "loss": 0.6259, "step": 32820 }, { "epoch": 0.5962153130902222, "grad_norm": 0.7684222526280715, "learning_rate": 7.145772915886376e-06, "loss": 0.6184, "step": 32830 }, { "epoch": 0.5963969199476973, "grad_norm": 0.7811681408636328, "learning_rate": 7.140250106115672e-06, "loss": 0.6192, "step": 32840 }, { "epoch": 0.5965785268051722, "grad_norm": 0.7454232844636497, "learning_rate": 7.134728246135031e-06, "loss": 0.6111, "step": 32850 }, { "epoch": 0.5967601336626471, "grad_norm": 0.7460511901651637, "learning_rate": 7.129207337778384e-06, "loss": 0.612, "step": 32860 }, { "epoch": 0.596941740520122, "grad_norm": 0.7632131985654765, "learning_rate": 7.123687382879362e-06, "loss": 0.6207, "step": 32870 }, { "epoch": 0.597123347377597, "grad_norm": 0.7552416538015503, "learning_rate": 7.118168383271267e-06, "loss": 0.622, "step": 32880 }, { "epoch": 0.5973049542350719, "grad_norm": 0.791037316774777, "learning_rate": 7.112650340787088e-06, "loss": 0.6204, "step": 32890 }, { "epoch": 0.5974865610925468, "grad_norm": 0.7764369943715443, "learning_rate": 7.107133257259499e-06, "loss": 0.616, "step": 32900 }, { "epoch": 0.5976681679500218, "grad_norm": 0.7244380187993817, "learning_rate": 7.10161713452085e-06, "loss": 0.6161, "step": 32910 }, { "epoch": 0.5978497748074967, "grad_norm": 0.7336273303155185, "learning_rate": 7.09610197440318e-06, "loss": 0.6105, "step": 32920 }, { "epoch": 0.5980313816649717, "grad_norm": 0.7639133920286095, "learning_rate": 7.0905877787381975e-06, "loss": 0.6194, "step": 32930 }, { "epoch": 0.5982129885224466, "grad_norm": 0.7809030505188477, "learning_rate": 7.085074549357298e-06, "loss": 0.6025, "step": 32940 }, { "epoch": 0.5983945953799216, "grad_norm": 0.7653348816048732, "learning_rate": 7.079562288091554e-06, "loss": 0.6096, "step": 32950 }, { "epoch": 0.5985762022373965, "grad_norm": 0.7873789380794649, "learning_rate": 7.074050996771722e-06, "loss": 0.6253, "step": 32960 }, { "epoch": 0.5987578090948714, "grad_norm": 0.7655808112664212, "learning_rate": 7.0685406772282265e-06, "loss": 0.6086, "step": 32970 }, { "epoch": 0.5989394159523463, "grad_norm": 0.7746650185340189, "learning_rate": 7.0630313312911695e-06, "loss": 0.6159, "step": 32980 }, { "epoch": 0.5991210228098213, "grad_norm": 0.8032246352412352, "learning_rate": 7.057522960790344e-06, "loss": 0.6217, "step": 32990 }, { "epoch": 0.5993026296672962, "grad_norm": 0.7670824614377272, "learning_rate": 7.052015567555199e-06, "loss": 0.6255, "step": 33000 }, { "epoch": 0.5994842365247712, "grad_norm": 0.8189906177664579, "learning_rate": 7.046509153414879e-06, "loss": 0.607, "step": 33010 }, { "epoch": 0.5996658433822462, "grad_norm": 0.7873756464645236, "learning_rate": 7.041003720198184e-06, "loss": 0.6237, "step": 33020 }, { "epoch": 0.5998474502397211, "grad_norm": 0.7393133885052415, "learning_rate": 7.035499269733606e-06, "loss": 0.6189, "step": 33030 }, { "epoch": 0.600029057097196, "grad_norm": 0.7520668607716116, "learning_rate": 7.029995803849295e-06, "loss": 0.6068, "step": 33040 }, { "epoch": 0.6002106639546709, "grad_norm": 0.7112220330696993, "learning_rate": 7.0244933243730885e-06, "loss": 0.6229, "step": 33050 }, { "epoch": 0.6003922708121459, "grad_norm": 0.8231448380214996, "learning_rate": 7.018991833132481e-06, "loss": 0.6087, "step": 33060 }, { "epoch": 0.6005738776696208, "grad_norm": 0.7358456323577315, "learning_rate": 7.013491331954653e-06, "loss": 0.6133, "step": 33070 }, { "epoch": 0.6007554845270957, "grad_norm": 0.7781777469447889, "learning_rate": 7.007991822666449e-06, "loss": 0.6177, "step": 33080 }, { "epoch": 0.6009370913845706, "grad_norm": 0.7287361710338334, "learning_rate": 7.002493307094382e-06, "loss": 0.6217, "step": 33090 }, { "epoch": 0.6011186982420457, "grad_norm": 0.7249689456901803, "learning_rate": 6.996995787064642e-06, "loss": 0.6102, "step": 33100 }, { "epoch": 0.6013003050995206, "grad_norm": 0.796715241304093, "learning_rate": 6.991499264403081e-06, "loss": 0.6237, "step": 33110 }, { "epoch": 0.6014819119569955, "grad_norm": 0.77706373181661, "learning_rate": 6.986003740935225e-06, "loss": 0.6139, "step": 33120 }, { "epoch": 0.6016635188144704, "grad_norm": 0.7916433455173252, "learning_rate": 6.980509218486267e-06, "loss": 0.6104, "step": 33130 }, { "epoch": 0.6018451256719454, "grad_norm": 0.7582960891957017, "learning_rate": 6.975015698881064e-06, "loss": 0.6168, "step": 33140 }, { "epoch": 0.6020267325294203, "grad_norm": 0.735779998164949, "learning_rate": 6.969523183944144e-06, "loss": 0.6211, "step": 33150 }, { "epoch": 0.6022083393868952, "grad_norm": 0.7777056388966007, "learning_rate": 6.964031675499705e-06, "loss": 0.6122, "step": 33160 }, { "epoch": 0.6023899462443701, "grad_norm": 0.7852413559507727, "learning_rate": 6.958541175371602e-06, "loss": 0.626, "step": 33170 }, { "epoch": 0.6025715531018452, "grad_norm": 0.7495121500539759, "learning_rate": 6.953051685383354e-06, "loss": 0.6066, "step": 33180 }, { "epoch": 0.6027531599593201, "grad_norm": 0.7546485388878356, "learning_rate": 6.947563207358159e-06, "loss": 0.6228, "step": 33190 }, { "epoch": 0.602934766816795, "grad_norm": 0.7493744630644694, "learning_rate": 6.942075743118859e-06, "loss": 0.6199, "step": 33200 }, { "epoch": 0.60311637367427, "grad_norm": 0.7696415900242, "learning_rate": 6.936589294487981e-06, "loss": 0.6127, "step": 33210 }, { "epoch": 0.6032979805317449, "grad_norm": 0.7484056081090337, "learning_rate": 6.931103863287691e-06, "loss": 0.6269, "step": 33220 }, { "epoch": 0.6034795873892198, "grad_norm": 0.7577122113685043, "learning_rate": 6.925619451339842e-06, "loss": 0.6067, "step": 33230 }, { "epoch": 0.6036611942466947, "grad_norm": 0.7455320245176215, "learning_rate": 6.920136060465925e-06, "loss": 0.6076, "step": 33240 }, { "epoch": 0.6038428011041697, "grad_norm": 0.7801320755948558, "learning_rate": 6.914653692487112e-06, "loss": 0.6203, "step": 33250 }, { "epoch": 0.6040244079616446, "grad_norm": 0.7397295525236347, "learning_rate": 6.909172349224218e-06, "loss": 0.6139, "step": 33260 }, { "epoch": 0.6042060148191196, "grad_norm": 0.7132767140391848, "learning_rate": 6.903692032497734e-06, "loss": 0.6218, "step": 33270 }, { "epoch": 0.6043876216765945, "grad_norm": 0.7531644923321782, "learning_rate": 6.898212744127794e-06, "loss": 0.6214, "step": 33280 }, { "epoch": 0.6045692285340695, "grad_norm": 0.7687327101900895, "learning_rate": 6.8927344859342025e-06, "loss": 0.6236, "step": 33290 }, { "epoch": 0.6047508353915444, "grad_norm": 0.7591266856248643, "learning_rate": 6.887257259736417e-06, "loss": 0.6207, "step": 33300 }, { "epoch": 0.6049324422490193, "grad_norm": 0.7637289572640541, "learning_rate": 6.881781067353552e-06, "loss": 0.6343, "step": 33310 }, { "epoch": 0.6051140491064942, "grad_norm": 0.7438436697962322, "learning_rate": 6.876305910604379e-06, "loss": 0.6104, "step": 33320 }, { "epoch": 0.6052956559639692, "grad_norm": 0.7771592717111582, "learning_rate": 6.87083179130733e-06, "loss": 0.6179, "step": 33330 }, { "epoch": 0.6054772628214441, "grad_norm": 0.7498585693417734, "learning_rate": 6.865358711280483e-06, "loss": 0.6111, "step": 33340 }, { "epoch": 0.605658869678919, "grad_norm": 0.9509886176641791, "learning_rate": 6.859886672341578e-06, "loss": 0.6135, "step": 33350 }, { "epoch": 0.6058404765363941, "grad_norm": 0.7480687087964755, "learning_rate": 6.854415676308013e-06, "loss": 0.611, "step": 33360 }, { "epoch": 0.606022083393869, "grad_norm": 0.7468886966666821, "learning_rate": 6.848945724996829e-06, "loss": 0.6185, "step": 33370 }, { "epoch": 0.6062036902513439, "grad_norm": 0.7859363040069545, "learning_rate": 6.843476820224724e-06, "loss": 0.6042, "step": 33380 }, { "epoch": 0.6063852971088188, "grad_norm": 0.7381890063292975, "learning_rate": 6.838008963808054e-06, "loss": 0.6144, "step": 33390 }, { "epoch": 0.6065669039662938, "grad_norm": 0.7575829264094223, "learning_rate": 6.832542157562816e-06, "loss": 0.6167, "step": 33400 }, { "epoch": 0.6067485108237687, "grad_norm": 0.7390842188412658, "learning_rate": 6.827076403304672e-06, "loss": 0.6008, "step": 33410 }, { "epoch": 0.6069301176812436, "grad_norm": 0.7791522457373756, "learning_rate": 6.8216117028489205e-06, "loss": 0.6177, "step": 33420 }, { "epoch": 0.6071117245387185, "grad_norm": 0.769328427926713, "learning_rate": 6.8161480580105256e-06, "loss": 0.6229, "step": 33430 }, { "epoch": 0.6072933313961936, "grad_norm": 0.7849683021034206, "learning_rate": 6.81068547060408e-06, "loss": 0.608, "step": 33440 }, { "epoch": 0.6074749382536685, "grad_norm": 0.7485544359604854, "learning_rate": 6.805223942443851e-06, "loss": 0.5982, "step": 33450 }, { "epoch": 0.6076565451111434, "grad_norm": 0.7362564700041343, "learning_rate": 6.799763475343726e-06, "loss": 0.6208, "step": 33460 }, { "epoch": 0.6078381519686183, "grad_norm": 0.7673144289644854, "learning_rate": 6.794304071117268e-06, "loss": 0.6111, "step": 33470 }, { "epoch": 0.6080197588260933, "grad_norm": 0.7378994375846172, "learning_rate": 6.788845731577662e-06, "loss": 0.6188, "step": 33480 }, { "epoch": 0.6082013656835682, "grad_norm": 0.7643523717606131, "learning_rate": 6.783388458537759e-06, "loss": 0.6189, "step": 33490 }, { "epoch": 0.6083829725410431, "grad_norm": 0.7741029847937929, "learning_rate": 6.77793225381004e-06, "loss": 0.6218, "step": 33500 }, { "epoch": 0.608564579398518, "grad_norm": 0.7518554961819341, "learning_rate": 6.772477119206644e-06, "loss": 0.6053, "step": 33510 }, { "epoch": 0.608746186255993, "grad_norm": 0.7648832475295915, "learning_rate": 6.767023056539348e-06, "loss": 0.6308, "step": 33520 }, { "epoch": 0.608927793113468, "grad_norm": 0.739094225498146, "learning_rate": 6.761570067619574e-06, "loss": 0.6152, "step": 33530 }, { "epoch": 0.6091093999709429, "grad_norm": 0.767468644165235, "learning_rate": 6.756118154258387e-06, "loss": 0.6241, "step": 33540 }, { "epoch": 0.6092910068284179, "grad_norm": 0.7195653856105702, "learning_rate": 6.750667318266495e-06, "loss": 0.6358, "step": 33550 }, { "epoch": 0.6094726136858928, "grad_norm": 0.762204252139312, "learning_rate": 6.745217561454253e-06, "loss": 0.6309, "step": 33560 }, { "epoch": 0.6096542205433677, "grad_norm": 0.7680600326356445, "learning_rate": 6.739768885631649e-06, "loss": 0.6204, "step": 33570 }, { "epoch": 0.6098358274008426, "grad_norm": 0.7717651830924318, "learning_rate": 6.734321292608313e-06, "loss": 0.6195, "step": 33580 }, { "epoch": 0.6100174342583176, "grad_norm": 0.7639033310984724, "learning_rate": 6.728874784193527e-06, "loss": 0.6136, "step": 33590 }, { "epoch": 0.6101990411157925, "grad_norm": 0.7588845270517851, "learning_rate": 6.723429362196194e-06, "loss": 0.6013, "step": 33600 }, { "epoch": 0.6103806479732675, "grad_norm": 0.7704355790964403, "learning_rate": 6.717985028424876e-06, "loss": 0.6235, "step": 33610 }, { "epoch": 0.6105622548307424, "grad_norm": 0.7743129088897021, "learning_rate": 6.712541784687757e-06, "loss": 0.6221, "step": 33620 }, { "epoch": 0.6107438616882174, "grad_norm": 0.7356117766576337, "learning_rate": 6.707099632792673e-06, "loss": 0.6267, "step": 33630 }, { "epoch": 0.6109254685456923, "grad_norm": 0.7525811853749765, "learning_rate": 6.701658574547082e-06, "loss": 0.6176, "step": 33640 }, { "epoch": 0.6111070754031672, "grad_norm": 0.733146693219564, "learning_rate": 6.696218611758096e-06, "loss": 0.615, "step": 33650 }, { "epoch": 0.6112886822606421, "grad_norm": 0.7596550083905107, "learning_rate": 6.690779746232446e-06, "loss": 0.6253, "step": 33660 }, { "epoch": 0.6114702891181171, "grad_norm": 0.7863090346187394, "learning_rate": 6.6853419797765125e-06, "loss": 0.6087, "step": 33670 }, { "epoch": 0.611651895975592, "grad_norm": 0.7110044730836543, "learning_rate": 6.679905314196305e-06, "loss": 0.6155, "step": 33680 }, { "epoch": 0.6118335028330669, "grad_norm": 0.7553199669822658, "learning_rate": 6.674469751297463e-06, "loss": 0.6195, "step": 33690 }, { "epoch": 0.612015109690542, "grad_norm": 0.7813400956945336, "learning_rate": 6.669035292885269e-06, "loss": 0.6212, "step": 33700 }, { "epoch": 0.6121967165480169, "grad_norm": 0.7256005369541852, "learning_rate": 6.663601940764634e-06, "loss": 0.6099, "step": 33710 }, { "epoch": 0.6123783234054918, "grad_norm": 0.7606307406480938, "learning_rate": 6.6581696967401e-06, "loss": 0.6114, "step": 33720 }, { "epoch": 0.6125599302629667, "grad_norm": 0.7526859825577203, "learning_rate": 6.6527385626158435e-06, "loss": 0.6105, "step": 33730 }, { "epoch": 0.6127415371204417, "grad_norm": 0.767084155906378, "learning_rate": 6.647308540195671e-06, "loss": 0.6099, "step": 33740 }, { "epoch": 0.6129231439779166, "grad_norm": 0.7413937323714145, "learning_rate": 6.641879631283021e-06, "loss": 0.6181, "step": 33750 }, { "epoch": 0.6131047508353915, "grad_norm": 0.7469975525692082, "learning_rate": 6.636451837680965e-06, "loss": 0.6201, "step": 33760 }, { "epoch": 0.6132863576928664, "grad_norm": 0.7510979151876809, "learning_rate": 6.6310251611921924e-06, "loss": 0.6233, "step": 33770 }, { "epoch": 0.6134679645503415, "grad_norm": 0.7655027043921051, "learning_rate": 6.62559960361904e-06, "loss": 0.6252, "step": 33780 }, { "epoch": 0.6136495714078164, "grad_norm": 0.7815596053941354, "learning_rate": 6.620175166763456e-06, "loss": 0.6175, "step": 33790 }, { "epoch": 0.6138311782652913, "grad_norm": 0.7641851704119795, "learning_rate": 6.6147518524270215e-06, "loss": 0.6217, "step": 33800 }, { "epoch": 0.6140127851227662, "grad_norm": 0.763873384322547, "learning_rate": 6.609329662410952e-06, "loss": 0.6049, "step": 33810 }, { "epoch": 0.6141943919802412, "grad_norm": 0.763638905519396, "learning_rate": 6.60390859851608e-06, "loss": 0.6077, "step": 33820 }, { "epoch": 0.6143759988377161, "grad_norm": 0.7270629808324862, "learning_rate": 6.598488662542872e-06, "loss": 0.6143, "step": 33830 }, { "epoch": 0.614557605695191, "grad_norm": 0.7681817645900042, "learning_rate": 6.593069856291411e-06, "loss": 0.6261, "step": 33840 }, { "epoch": 0.614739212552666, "grad_norm": 0.7823677160046816, "learning_rate": 6.587652181561416e-06, "loss": 0.6169, "step": 33850 }, { "epoch": 0.6149208194101409, "grad_norm": 0.7657707735564546, "learning_rate": 6.582235640152217e-06, "loss": 0.6218, "step": 33860 }, { "epoch": 0.6151024262676159, "grad_norm": 0.768870182993542, "learning_rate": 6.5768202338627804e-06, "loss": 0.6256, "step": 33870 }, { "epoch": 0.6152840331250908, "grad_norm": 0.7648708531458204, "learning_rate": 6.571405964491686e-06, "loss": 0.6235, "step": 33880 }, { "epoch": 0.6154656399825658, "grad_norm": 0.7605365612162736, "learning_rate": 6.565992833837141e-06, "loss": 0.6272, "step": 33890 }, { "epoch": 0.6156472468400407, "grad_norm": 0.7496387291186003, "learning_rate": 6.560580843696973e-06, "loss": 0.6322, "step": 33900 }, { "epoch": 0.6158288536975156, "grad_norm": 0.7472708274381779, "learning_rate": 6.555169995868632e-06, "loss": 0.6198, "step": 33910 }, { "epoch": 0.6160104605549905, "grad_norm": 0.7419050698187114, "learning_rate": 6.549760292149185e-06, "loss": 0.6238, "step": 33920 }, { "epoch": 0.6161920674124655, "grad_norm": 0.7574244103868771, "learning_rate": 6.544351734335326e-06, "loss": 0.6025, "step": 33930 }, { "epoch": 0.6163736742699404, "grad_norm": 0.7366814822244893, "learning_rate": 6.538944324223357e-06, "loss": 0.6079, "step": 33940 }, { "epoch": 0.6165552811274154, "grad_norm": 0.733114440856328, "learning_rate": 6.533538063609211e-06, "loss": 0.6094, "step": 33950 }, { "epoch": 0.6167368879848903, "grad_norm": 0.7542352598507903, "learning_rate": 6.528132954288438e-06, "loss": 0.611, "step": 33960 }, { "epoch": 0.6169184948423653, "grad_norm": 0.7446173642575366, "learning_rate": 6.522728998056189e-06, "loss": 0.6157, "step": 33970 }, { "epoch": 0.6171001016998402, "grad_norm": 0.7508960587887238, "learning_rate": 6.517326196707259e-06, "loss": 0.6314, "step": 33980 }, { "epoch": 0.6172817085573151, "grad_norm": 0.7912736112295, "learning_rate": 6.511924552036038e-06, "loss": 0.6163, "step": 33990 }, { "epoch": 0.61746331541479, "grad_norm": 0.7594441194366559, "learning_rate": 6.5065240658365355e-06, "loss": 0.6246, "step": 34000 }, { "epoch": 0.617644922272265, "grad_norm": 0.7746712297592647, "learning_rate": 6.501124739902388e-06, "loss": 0.6154, "step": 34010 }, { "epoch": 0.6178265291297399, "grad_norm": 0.7580702646715672, "learning_rate": 6.49572657602683e-06, "loss": 0.6157, "step": 34020 }, { "epoch": 0.6180081359872148, "grad_norm": 0.8018905079103998, "learning_rate": 6.490329576002725e-06, "loss": 0.6187, "step": 34030 }, { "epoch": 0.6181897428446899, "grad_norm": 0.7386803112908396, "learning_rate": 6.4849337416225386e-06, "loss": 0.6036, "step": 34040 }, { "epoch": 0.6183713497021648, "grad_norm": 0.7468725758542347, "learning_rate": 6.47953907467836e-06, "loss": 0.6044, "step": 34050 }, { "epoch": 0.6185529565596397, "grad_norm": 0.757318089984693, "learning_rate": 6.474145576961877e-06, "loss": 0.6073, "step": 34060 }, { "epoch": 0.6187345634171146, "grad_norm": 0.7827569697921564, "learning_rate": 6.468753250264406e-06, "loss": 0.6236, "step": 34070 }, { "epoch": 0.6189161702745896, "grad_norm": 0.7393433266002213, "learning_rate": 6.463362096376855e-06, "loss": 0.6043, "step": 34080 }, { "epoch": 0.6190977771320645, "grad_norm": 0.7599688414873673, "learning_rate": 6.45797211708976e-06, "loss": 0.6137, "step": 34090 }, { "epoch": 0.6192793839895394, "grad_norm": 0.7542415934191171, "learning_rate": 6.452583314193257e-06, "loss": 0.6272, "step": 34100 }, { "epoch": 0.6194609908470143, "grad_norm": 0.7728646929525814, "learning_rate": 6.4471956894770945e-06, "loss": 0.6216, "step": 34110 }, { "epoch": 0.6196425977044894, "grad_norm": 0.7859778189472892, "learning_rate": 6.441809244730629e-06, "loss": 0.6259, "step": 34120 }, { "epoch": 0.6198242045619643, "grad_norm": 0.7565203832760069, "learning_rate": 6.436423981742825e-06, "loss": 0.61, "step": 34130 }, { "epoch": 0.6200058114194392, "grad_norm": 0.7466592124258572, "learning_rate": 6.4310399023022544e-06, "loss": 0.6027, "step": 34140 }, { "epoch": 0.6201874182769141, "grad_norm": 0.7913939509217114, "learning_rate": 6.425657008197096e-06, "loss": 0.6253, "step": 34150 }, { "epoch": 0.6203690251343891, "grad_norm": 0.7805873936130199, "learning_rate": 6.42027530121514e-06, "loss": 0.6217, "step": 34160 }, { "epoch": 0.620550631991864, "grad_norm": 0.7336673791085904, "learning_rate": 6.414894783143768e-06, "loss": 0.6116, "step": 34170 }, { "epoch": 0.6207322388493389, "grad_norm": 0.7787590401550786, "learning_rate": 6.409515455769988e-06, "loss": 0.6394, "step": 34180 }, { "epoch": 0.6209138457068138, "grad_norm": 0.7646222226981902, "learning_rate": 6.404137320880395e-06, "loss": 0.6068, "step": 34190 }, { "epoch": 0.6210954525642888, "grad_norm": 0.7593298763950174, "learning_rate": 6.398760380261189e-06, "loss": 0.6078, "step": 34200 }, { "epoch": 0.6212770594217638, "grad_norm": 0.744009994623694, "learning_rate": 6.3933846356981855e-06, "loss": 0.6137, "step": 34210 }, { "epoch": 0.6214586662792387, "grad_norm": 0.7616593978057656, "learning_rate": 6.388010088976791e-06, "loss": 0.6217, "step": 34220 }, { "epoch": 0.6216402731367137, "grad_norm": 0.7744199142537522, "learning_rate": 6.382636741882025e-06, "loss": 0.6195, "step": 34230 }, { "epoch": 0.6218218799941886, "grad_norm": 0.7515458098007083, "learning_rate": 6.377264596198491e-06, "loss": 0.6269, "step": 34240 }, { "epoch": 0.6220034868516635, "grad_norm": 0.7497293128237386, "learning_rate": 6.371893653710417e-06, "loss": 0.6046, "step": 34250 }, { "epoch": 0.6221850937091384, "grad_norm": 0.7871236097713377, "learning_rate": 6.36652391620161e-06, "loss": 0.61, "step": 34260 }, { "epoch": 0.6223667005666134, "grad_norm": 0.7880283340671053, "learning_rate": 6.361155385455493e-06, "loss": 0.6116, "step": 34270 }, { "epoch": 0.6225483074240883, "grad_norm": 0.774664111721175, "learning_rate": 6.355788063255075e-06, "loss": 0.6156, "step": 34280 }, { "epoch": 0.6227299142815633, "grad_norm": 0.766156785785142, "learning_rate": 6.3504219513829715e-06, "loss": 0.6166, "step": 34290 }, { "epoch": 0.6229115211390382, "grad_norm": 0.7573842201807189, "learning_rate": 6.345057051621395e-06, "loss": 0.6153, "step": 34300 }, { "epoch": 0.6230931279965132, "grad_norm": 0.7524408226819007, "learning_rate": 6.339693365752154e-06, "loss": 0.611, "step": 34310 }, { "epoch": 0.6232747348539881, "grad_norm": 0.7531146239067475, "learning_rate": 6.334330895556655e-06, "loss": 0.626, "step": 34320 }, { "epoch": 0.623456341711463, "grad_norm": 0.754382556756597, "learning_rate": 6.328969642815901e-06, "loss": 0.6055, "step": 34330 }, { "epoch": 0.623637948568938, "grad_norm": 0.7873405029522093, "learning_rate": 6.323609609310488e-06, "loss": 0.6074, "step": 34340 }, { "epoch": 0.6238195554264129, "grad_norm": 0.7634162459909721, "learning_rate": 6.318250796820607e-06, "loss": 0.6095, "step": 34350 }, { "epoch": 0.6240011622838878, "grad_norm": 0.7613849338802667, "learning_rate": 6.312893207126054e-06, "loss": 0.6155, "step": 34360 }, { "epoch": 0.6241827691413627, "grad_norm": 0.7678649020181825, "learning_rate": 6.307536842006199e-06, "loss": 0.6111, "step": 34370 }, { "epoch": 0.6243643759988378, "grad_norm": 0.7796385820251162, "learning_rate": 6.302181703240027e-06, "loss": 0.6059, "step": 34380 }, { "epoch": 0.6245459828563127, "grad_norm": 0.7673299404434043, "learning_rate": 6.296827792606099e-06, "loss": 0.6171, "step": 34390 }, { "epoch": 0.6247275897137876, "grad_norm": 0.740585777191567, "learning_rate": 6.291475111882573e-06, "loss": 0.6156, "step": 34400 }, { "epoch": 0.6249091965712625, "grad_norm": 0.7889158314244312, "learning_rate": 6.286123662847205e-06, "loss": 0.6167, "step": 34410 }, { "epoch": 0.6250908034287375, "grad_norm": 0.7614401591470689, "learning_rate": 6.280773447277333e-06, "loss": 0.6007, "step": 34420 }, { "epoch": 0.6252724102862124, "grad_norm": 0.7411471169821464, "learning_rate": 6.275424466949893e-06, "loss": 0.6122, "step": 34430 }, { "epoch": 0.6254540171436873, "grad_norm": 0.7869014403313621, "learning_rate": 6.2700767236414e-06, "loss": 0.6157, "step": 34440 }, { "epoch": 0.6256356240011622, "grad_norm": 0.7580745328148111, "learning_rate": 6.264730219127975e-06, "loss": 0.6216, "step": 34450 }, { "epoch": 0.6258172308586373, "grad_norm": 0.7464411888353327, "learning_rate": 6.259384955185308e-06, "loss": 0.6093, "step": 34460 }, { "epoch": 0.6259988377161122, "grad_norm": 0.7431054131005418, "learning_rate": 6.2540409335886946e-06, "loss": 0.6049, "step": 34470 }, { "epoch": 0.6261804445735871, "grad_norm": 0.7544356050677001, "learning_rate": 6.248698156113002e-06, "loss": 0.6145, "step": 34480 }, { "epoch": 0.626362051431062, "grad_norm": 0.7592310778919589, "learning_rate": 6.243356624532699e-06, "loss": 0.6111, "step": 34490 }, { "epoch": 0.626543658288537, "grad_norm": 0.7728405532462514, "learning_rate": 6.238016340621829e-06, "loss": 0.6309, "step": 34500 }, { "epoch": 0.6267252651460119, "grad_norm": 0.7731387017094955, "learning_rate": 6.232677306154027e-06, "loss": 0.6171, "step": 34510 }, { "epoch": 0.6269068720034868, "grad_norm": 0.7663994127433222, "learning_rate": 6.227339522902512e-06, "loss": 0.6077, "step": 34520 }, { "epoch": 0.6270884788609618, "grad_norm": 0.7341689779746423, "learning_rate": 6.222002992640088e-06, "loss": 0.6192, "step": 34530 }, { "epoch": 0.6272700857184367, "grad_norm": 0.7807124308575811, "learning_rate": 6.21666771713914e-06, "loss": 0.618, "step": 34540 }, { "epoch": 0.6274516925759117, "grad_norm": 0.7705578691321155, "learning_rate": 6.211333698171638e-06, "loss": 0.6167, "step": 34550 }, { "epoch": 0.6276332994333866, "grad_norm": 0.7494961735966369, "learning_rate": 6.206000937509138e-06, "loss": 0.608, "step": 34560 }, { "epoch": 0.6278149062908616, "grad_norm": 0.801435593292592, "learning_rate": 6.200669436922771e-06, "loss": 0.6182, "step": 34570 }, { "epoch": 0.6279965131483365, "grad_norm": 0.7561768669403898, "learning_rate": 6.195339198183259e-06, "loss": 0.611, "step": 34580 }, { "epoch": 0.6281781200058114, "grad_norm": 0.7518848733526252, "learning_rate": 6.190010223060896e-06, "loss": 0.6237, "step": 34590 }, { "epoch": 0.6283597268632863, "grad_norm": 0.7479512293434558, "learning_rate": 6.184682513325555e-06, "loss": 0.6202, "step": 34600 }, { "epoch": 0.6285413337207613, "grad_norm": 0.7171269337395153, "learning_rate": 6.1793560707467025e-06, "loss": 0.6104, "step": 34610 }, { "epoch": 0.6287229405782362, "grad_norm": 0.7491972368607657, "learning_rate": 6.174030897093366e-06, "loss": 0.6096, "step": 34620 }, { "epoch": 0.6289045474357112, "grad_norm": 0.7564530666644039, "learning_rate": 6.1687069941341705e-06, "loss": 0.6049, "step": 34630 }, { "epoch": 0.6290861542931862, "grad_norm": 0.7750775507770785, "learning_rate": 6.163384363637299e-06, "loss": 0.6137, "step": 34640 }, { "epoch": 0.6292677611506611, "grad_norm": 0.8005721570774417, "learning_rate": 6.158063007370532e-06, "loss": 0.6238, "step": 34650 }, { "epoch": 0.629449368008136, "grad_norm": 0.786934479741956, "learning_rate": 6.152742927101206e-06, "loss": 0.61, "step": 34660 }, { "epoch": 0.6296309748656109, "grad_norm": 0.8183032995356138, "learning_rate": 6.147424124596255e-06, "loss": 0.6157, "step": 34670 }, { "epoch": 0.6298125817230859, "grad_norm": 0.7444433439392794, "learning_rate": 6.142106601622171e-06, "loss": 0.6007, "step": 34680 }, { "epoch": 0.6299941885805608, "grad_norm": 0.7525869332766065, "learning_rate": 6.136790359945032e-06, "loss": 0.6118, "step": 34690 }, { "epoch": 0.6301757954380357, "grad_norm": 0.781292493953104, "learning_rate": 6.131475401330485e-06, "loss": 0.6171, "step": 34700 }, { "epoch": 0.6303574022955106, "grad_norm": 0.7831387058189707, "learning_rate": 6.126161727543752e-06, "loss": 0.619, "step": 34710 }, { "epoch": 0.6305390091529857, "grad_norm": 0.7448759137199659, "learning_rate": 6.120849340349629e-06, "loss": 0.6152, "step": 34720 }, { "epoch": 0.6307206160104606, "grad_norm": 0.7681233644843548, "learning_rate": 6.115538241512484e-06, "loss": 0.6179, "step": 34730 }, { "epoch": 0.6309022228679355, "grad_norm": 0.7400236581017514, "learning_rate": 6.110228432796261e-06, "loss": 0.6116, "step": 34740 }, { "epoch": 0.6310838297254104, "grad_norm": 0.7529837802427211, "learning_rate": 6.1049199159644666e-06, "loss": 0.6088, "step": 34750 }, { "epoch": 0.6312654365828854, "grad_norm": 0.7512864638067378, "learning_rate": 6.09961269278019e-06, "loss": 0.6079, "step": 34760 }, { "epoch": 0.6314470434403603, "grad_norm": 0.7728992232473084, "learning_rate": 6.094306765006079e-06, "loss": 0.6235, "step": 34770 }, { "epoch": 0.6316286502978352, "grad_norm": 0.7529747352878019, "learning_rate": 6.0890021344043625e-06, "loss": 0.6136, "step": 34780 }, { "epoch": 0.6318102571553101, "grad_norm": 0.740567568891701, "learning_rate": 6.08369880273683e-06, "loss": 0.6186, "step": 34790 }, { "epoch": 0.6319918640127852, "grad_norm": 0.7526657971515575, "learning_rate": 6.078396771764837e-06, "loss": 0.6123, "step": 34800 }, { "epoch": 0.6321734708702601, "grad_norm": 0.781249345722722, "learning_rate": 6.073096043249322e-06, "loss": 0.61, "step": 34810 }, { "epoch": 0.632355077727735, "grad_norm": 0.7703638828705879, "learning_rate": 6.067796618950773e-06, "loss": 0.6174, "step": 34820 }, { "epoch": 0.63253668458521, "grad_norm": 0.7678639986003168, "learning_rate": 6.06249850062926e-06, "loss": 0.6102, "step": 34830 }, { "epoch": 0.6327182914426849, "grad_norm": 0.7462240553212582, "learning_rate": 6.057201690044407e-06, "loss": 0.6123, "step": 34840 }, { "epoch": 0.6328998983001598, "grad_norm": 0.7333741820481221, "learning_rate": 6.051906188955415e-06, "loss": 0.6004, "step": 34850 }, { "epoch": 0.6330815051576347, "grad_norm": 0.7863438460004046, "learning_rate": 6.046611999121035e-06, "loss": 0.6184, "step": 34860 }, { "epoch": 0.6332631120151097, "grad_norm": 0.7976379765334237, "learning_rate": 6.041319122299603e-06, "loss": 0.621, "step": 34870 }, { "epoch": 0.6334447188725846, "grad_norm": 0.775687483313169, "learning_rate": 6.036027560248998e-06, "loss": 0.6021, "step": 34880 }, { "epoch": 0.6336263257300596, "grad_norm": 0.7527467784580757, "learning_rate": 6.030737314726678e-06, "loss": 0.6051, "step": 34890 }, { "epoch": 0.6338079325875345, "grad_norm": 0.7567444850683546, "learning_rate": 6.025448387489654e-06, "loss": 0.6096, "step": 34900 }, { "epoch": 0.6339895394450095, "grad_norm": 0.784317789490246, "learning_rate": 6.020160780294506e-06, "loss": 0.6122, "step": 34910 }, { "epoch": 0.6341711463024844, "grad_norm": 0.7473602133141249, "learning_rate": 6.014874494897369e-06, "loss": 0.6119, "step": 34920 }, { "epoch": 0.6343527531599593, "grad_norm": 0.8583909885711183, "learning_rate": 6.009589533053947e-06, "loss": 0.6075, "step": 34930 }, { "epoch": 0.6345343600174342, "grad_norm": 0.7838318313345938, "learning_rate": 6.004305896519496e-06, "loss": 0.606, "step": 34940 }, { "epoch": 0.6347159668749092, "grad_norm": 0.8082544172377599, "learning_rate": 5.999023587048835e-06, "loss": 0.6267, "step": 34950 }, { "epoch": 0.6348975737323841, "grad_norm": 0.7869267285669823, "learning_rate": 5.993742606396349e-06, "loss": 0.6182, "step": 34960 }, { "epoch": 0.6350791805898591, "grad_norm": 0.7333880677390727, "learning_rate": 5.988462956315967e-06, "loss": 0.6191, "step": 34970 }, { "epoch": 0.635260787447334, "grad_norm": 0.7755929371301052, "learning_rate": 5.983184638561193e-06, "loss": 0.6241, "step": 34980 }, { "epoch": 0.635442394304809, "grad_norm": 0.7639459342165117, "learning_rate": 5.9779076548850774e-06, "loss": 0.629, "step": 34990 }, { "epoch": 0.6356240011622839, "grad_norm": 0.772714601576114, "learning_rate": 5.9726320070402255e-06, "loss": 0.612, "step": 35000 }, { "epoch": 0.6358056080197588, "grad_norm": 0.7699565582487937, "learning_rate": 5.967357696778811e-06, "loss": 0.6135, "step": 35010 }, { "epoch": 0.6359872148772338, "grad_norm": 0.7118753121582491, "learning_rate": 5.962084725852549e-06, "loss": 0.5999, "step": 35020 }, { "epoch": 0.6361688217347087, "grad_norm": 0.7597255454705512, "learning_rate": 5.956813096012725e-06, "loss": 0.6218, "step": 35030 }, { "epoch": 0.6363504285921836, "grad_norm": 0.7181455385027511, "learning_rate": 5.951542809010162e-06, "loss": 0.612, "step": 35040 }, { "epoch": 0.6365320354496585, "grad_norm": 0.7482864002698272, "learning_rate": 5.946273866595256e-06, "loss": 0.6051, "step": 35050 }, { "epoch": 0.6367136423071336, "grad_norm": 0.7196770003544057, "learning_rate": 5.941006270517935e-06, "loss": 0.6083, "step": 35060 }, { "epoch": 0.6368952491646085, "grad_norm": 0.7844318186268332, "learning_rate": 5.935740022527703e-06, "loss": 0.6132, "step": 35070 }, { "epoch": 0.6370768560220834, "grad_norm": 0.7651515161245798, "learning_rate": 5.930475124373597e-06, "loss": 0.612, "step": 35080 }, { "epoch": 0.6372584628795583, "grad_norm": 0.7300387593981003, "learning_rate": 5.925211577804215e-06, "loss": 0.6015, "step": 35090 }, { "epoch": 0.6374400697370333, "grad_norm": 0.7831703933318056, "learning_rate": 5.919949384567705e-06, "loss": 0.6079, "step": 35100 }, { "epoch": 0.6376216765945082, "grad_norm": 0.7624739292594908, "learning_rate": 5.914688546411764e-06, "loss": 0.6133, "step": 35110 }, { "epoch": 0.6378032834519831, "grad_norm": 0.7792444288137207, "learning_rate": 5.909429065083641e-06, "loss": 0.6194, "step": 35120 }, { "epoch": 0.637984890309458, "grad_norm": 0.7488994544693202, "learning_rate": 5.904170942330131e-06, "loss": 0.6159, "step": 35130 }, { "epoch": 0.6381664971669331, "grad_norm": 0.73812791009518, "learning_rate": 5.898914179897582e-06, "loss": 0.6038, "step": 35140 }, { "epoch": 0.638348104024408, "grad_norm": 0.7572021826298562, "learning_rate": 5.8936587795318855e-06, "loss": 0.6179, "step": 35150 }, { "epoch": 0.6385297108818829, "grad_norm": 0.7786068834555679, "learning_rate": 5.88840474297849e-06, "loss": 0.6168, "step": 35160 }, { "epoch": 0.6387113177393579, "grad_norm": 0.7554899473852893, "learning_rate": 5.883152071982375e-06, "loss": 0.6208, "step": 35170 }, { "epoch": 0.6388929245968328, "grad_norm": 0.7284385359139335, "learning_rate": 5.877900768288085e-06, "loss": 0.6084, "step": 35180 }, { "epoch": 0.6390745314543077, "grad_norm": 0.7170867692092298, "learning_rate": 5.872650833639697e-06, "loss": 0.6009, "step": 35190 }, { "epoch": 0.6392561383117826, "grad_norm": 0.7159487471244188, "learning_rate": 5.867402269780834e-06, "loss": 0.619, "step": 35200 }, { "epoch": 0.6394377451692576, "grad_norm": 0.7276462525328471, "learning_rate": 5.862155078454674e-06, "loss": 0.5957, "step": 35210 }, { "epoch": 0.6396193520267325, "grad_norm": 0.7613532642787546, "learning_rate": 5.856909261403925e-06, "loss": 0.6097, "step": 35220 }, { "epoch": 0.6398009588842075, "grad_norm": 0.7234396832798047, "learning_rate": 5.851664820370854e-06, "loss": 0.6218, "step": 35230 }, { "epoch": 0.6399825657416824, "grad_norm": 0.7596280976114406, "learning_rate": 5.8464217570972534e-06, "loss": 0.6152, "step": 35240 }, { "epoch": 0.6401641725991574, "grad_norm": 0.7448939999480816, "learning_rate": 5.84118007332448e-06, "loss": 0.6271, "step": 35250 }, { "epoch": 0.6403457794566323, "grad_norm": 0.7386309411076041, "learning_rate": 5.835939770793406e-06, "loss": 0.6166, "step": 35260 }, { "epoch": 0.6405273863141072, "grad_norm": 0.8104284839628288, "learning_rate": 5.830700851244473e-06, "loss": 0.6213, "step": 35270 }, { "epoch": 0.6407089931715821, "grad_norm": 0.7855461096337625, "learning_rate": 5.825463316417639e-06, "loss": 0.6034, "step": 35280 }, { "epoch": 0.6408906000290571, "grad_norm": 0.7785370960839919, "learning_rate": 5.820227168052414e-06, "loss": 0.6036, "step": 35290 }, { "epoch": 0.641072206886532, "grad_norm": 0.7324733972840406, "learning_rate": 5.814992407887849e-06, "loss": 0.6098, "step": 35300 }, { "epoch": 0.641253813744007, "grad_norm": 0.8039065853284459, "learning_rate": 5.809759037662526e-06, "loss": 0.623, "step": 35310 }, { "epoch": 0.641435420601482, "grad_norm": 0.7598404586743528, "learning_rate": 5.8045270591145765e-06, "loss": 0.6133, "step": 35320 }, { "epoch": 0.6416170274589569, "grad_norm": 0.7502302980210652, "learning_rate": 5.799296473981656e-06, "loss": 0.6181, "step": 35330 }, { "epoch": 0.6417986343164318, "grad_norm": 0.7405154273150648, "learning_rate": 5.794067284000973e-06, "loss": 0.6122, "step": 35340 }, { "epoch": 0.6419802411739067, "grad_norm": 0.7480945475028545, "learning_rate": 5.788839490909253e-06, "loss": 0.6049, "step": 35350 }, { "epoch": 0.6421618480313817, "grad_norm": 0.7452110533565985, "learning_rate": 5.7836130964427815e-06, "loss": 0.6238, "step": 35360 }, { "epoch": 0.6423434548888566, "grad_norm": 0.757395972731489, "learning_rate": 5.778388102337355e-06, "loss": 0.6162, "step": 35370 }, { "epoch": 0.6425250617463315, "grad_norm": 0.7537923603488462, "learning_rate": 5.773164510328329e-06, "loss": 0.606, "step": 35380 }, { "epoch": 0.6427066686038064, "grad_norm": 0.7774623731065536, "learning_rate": 5.767942322150568e-06, "loss": 0.6217, "step": 35390 }, { "epoch": 0.6428882754612815, "grad_norm": 0.7450982727587779, "learning_rate": 5.762721539538494e-06, "loss": 0.6059, "step": 35400 }, { "epoch": 0.6430698823187564, "grad_norm": 0.7307289185377467, "learning_rate": 5.757502164226043e-06, "loss": 0.6227, "step": 35410 }, { "epoch": 0.6432514891762313, "grad_norm": 0.7228254681266121, "learning_rate": 5.7522841979467e-06, "loss": 0.6175, "step": 35420 }, { "epoch": 0.6434330960337062, "grad_norm": 0.7532067824482962, "learning_rate": 5.747067642433467e-06, "loss": 0.6143, "step": 35430 }, { "epoch": 0.6436147028911812, "grad_norm": 0.7545884804398788, "learning_rate": 5.741852499418887e-06, "loss": 0.6183, "step": 35440 }, { "epoch": 0.6437963097486561, "grad_norm": 0.7750890367171714, "learning_rate": 5.736638770635036e-06, "loss": 0.6096, "step": 35450 }, { "epoch": 0.643977916606131, "grad_norm": 0.7599874994160281, "learning_rate": 5.731426457813507e-06, "loss": 0.6205, "step": 35460 }, { "epoch": 0.6441595234636059, "grad_norm": 0.8078113536329385, "learning_rate": 5.726215562685441e-06, "loss": 0.606, "step": 35470 }, { "epoch": 0.644341130321081, "grad_norm": 0.7697629949529926, "learning_rate": 5.7210060869814895e-06, "loss": 0.6098, "step": 35480 }, { "epoch": 0.6445227371785559, "grad_norm": 0.7417236345651818, "learning_rate": 5.71579803243185e-06, "loss": 0.6116, "step": 35490 }, { "epoch": 0.6447043440360308, "grad_norm": 0.7335794977408081, "learning_rate": 5.7105914007662355e-06, "loss": 0.6193, "step": 35500 }, { "epoch": 0.6448859508935058, "grad_norm": 0.7401181111207639, "learning_rate": 5.705386193713887e-06, "loss": 0.5984, "step": 35510 }, { "epoch": 0.6450675577509807, "grad_norm": 0.7734604051094561, "learning_rate": 5.700182413003582e-06, "loss": 0.6035, "step": 35520 }, { "epoch": 0.6452491646084556, "grad_norm": 0.7466909191669017, "learning_rate": 5.694980060363613e-06, "loss": 0.5994, "step": 35530 }, { "epoch": 0.6454307714659305, "grad_norm": 0.7513965515489816, "learning_rate": 5.689779137521809e-06, "loss": 0.608, "step": 35540 }, { "epoch": 0.6456123783234055, "grad_norm": 0.7665421919730762, "learning_rate": 5.684579646205513e-06, "loss": 0.6172, "step": 35550 }, { "epoch": 0.6457939851808804, "grad_norm": 0.7468346879768756, "learning_rate": 5.6793815881416035e-06, "loss": 0.6074, "step": 35560 }, { "epoch": 0.6459755920383554, "grad_norm": 0.7383157574974374, "learning_rate": 5.674184965056473e-06, "loss": 0.6199, "step": 35570 }, { "epoch": 0.6461571988958303, "grad_norm": 0.7543645961884874, "learning_rate": 5.668989778676046e-06, "loss": 0.609, "step": 35580 }, { "epoch": 0.6463388057533053, "grad_norm": 0.7542571342593984, "learning_rate": 5.663796030725763e-06, "loss": 0.6172, "step": 35590 }, { "epoch": 0.6465204126107802, "grad_norm": 0.7237209478095504, "learning_rate": 5.6586037229305894e-06, "loss": 0.6197, "step": 35600 }, { "epoch": 0.6467020194682551, "grad_norm": 0.7639810853151778, "learning_rate": 5.653412857015015e-06, "loss": 0.6116, "step": 35610 }, { "epoch": 0.64688362632573, "grad_norm": 0.7667488073970388, "learning_rate": 5.648223434703042e-06, "loss": 0.6032, "step": 35620 }, { "epoch": 0.647065233183205, "grad_norm": 0.7578429659423369, "learning_rate": 5.643035457718209e-06, "loss": 0.6064, "step": 35630 }, { "epoch": 0.6472468400406799, "grad_norm": 0.7688980712302805, "learning_rate": 5.637848927783556e-06, "loss": 0.6107, "step": 35640 }, { "epoch": 0.6474284468981548, "grad_norm": 0.7766108050817204, "learning_rate": 5.632663846621658e-06, "loss": 0.6119, "step": 35650 }, { "epoch": 0.6476100537556299, "grad_norm": 0.755411906979626, "learning_rate": 5.6274802159545975e-06, "loss": 0.6012, "step": 35660 }, { "epoch": 0.6477916606131048, "grad_norm": 0.7562910937711079, "learning_rate": 5.622298037503984e-06, "loss": 0.6228, "step": 35670 }, { "epoch": 0.6479732674705797, "grad_norm": 0.7705447594350847, "learning_rate": 5.617117312990934e-06, "loss": 0.6114, "step": 35680 }, { "epoch": 0.6481548743280546, "grad_norm": 0.7901396861687965, "learning_rate": 5.611938044136096e-06, "loss": 0.6152, "step": 35690 }, { "epoch": 0.6483364811855296, "grad_norm": 0.7455368923057928, "learning_rate": 5.6067602326596235e-06, "loss": 0.6076, "step": 35700 }, { "epoch": 0.6485180880430045, "grad_norm": 0.7419908221416471, "learning_rate": 5.601583880281185e-06, "loss": 0.6115, "step": 35710 }, { "epoch": 0.6486996949004794, "grad_norm": 0.7275163606972599, "learning_rate": 5.596408988719975e-06, "loss": 0.6277, "step": 35720 }, { "epoch": 0.6488813017579543, "grad_norm": 0.7344196251459219, "learning_rate": 5.59123555969469e-06, "loss": 0.6015, "step": 35730 }, { "epoch": 0.6490629086154294, "grad_norm": 0.7511134885580565, "learning_rate": 5.586063594923554e-06, "loss": 0.6021, "step": 35740 }, { "epoch": 0.6492445154729043, "grad_norm": 0.7261465999695575, "learning_rate": 5.580893096124292e-06, "loss": 0.6102, "step": 35750 }, { "epoch": 0.6494261223303792, "grad_norm": 0.8068254282455398, "learning_rate": 5.575724065014154e-06, "loss": 0.6005, "step": 35760 }, { "epoch": 0.6496077291878541, "grad_norm": 0.7636807505176756, "learning_rate": 5.570556503309889e-06, "loss": 0.6012, "step": 35770 }, { "epoch": 0.6497893360453291, "grad_norm": 0.7636781714844127, "learning_rate": 5.565390412727774e-06, "loss": 0.6118, "step": 35780 }, { "epoch": 0.649970942902804, "grad_norm": 0.7260401769113266, "learning_rate": 5.5602257949835805e-06, "loss": 0.6197, "step": 35790 }, { "epoch": 0.6501525497602789, "grad_norm": 0.7668316988309315, "learning_rate": 5.5550626517926065e-06, "loss": 0.6132, "step": 35800 }, { "epoch": 0.6503341566177538, "grad_norm": 0.7685667177727769, "learning_rate": 5.549900984869646e-06, "loss": 0.6142, "step": 35810 }, { "epoch": 0.6505157634752288, "grad_norm": 0.7603710606888695, "learning_rate": 5.544740795929018e-06, "loss": 0.6165, "step": 35820 }, { "epoch": 0.6506973703327038, "grad_norm": 0.7546486559074698, "learning_rate": 5.5395820866845355e-06, "loss": 0.6092, "step": 35830 }, { "epoch": 0.6508789771901787, "grad_norm": 0.784140080278734, "learning_rate": 5.5344248588495275e-06, "loss": 0.612, "step": 35840 }, { "epoch": 0.6510605840476537, "grad_norm": 0.719027392557971, "learning_rate": 5.5292691141368375e-06, "loss": 0.6066, "step": 35850 }, { "epoch": 0.6512421909051286, "grad_norm": 0.7828070086709417, "learning_rate": 5.5241148542588e-06, "loss": 0.6168, "step": 35860 }, { "epoch": 0.6514237977626035, "grad_norm": 0.7528120340442321, "learning_rate": 5.518962080927273e-06, "loss": 0.6084, "step": 35870 }, { "epoch": 0.6516054046200784, "grad_norm": 0.7262285760603214, "learning_rate": 5.513810795853607e-06, "loss": 0.605, "step": 35880 }, { "epoch": 0.6517870114775534, "grad_norm": 0.7282406292537998, "learning_rate": 5.5086610007486715e-06, "loss": 0.6207, "step": 35890 }, { "epoch": 0.6519686183350283, "grad_norm": 0.7651166023722765, "learning_rate": 5.503512697322831e-06, "loss": 0.6096, "step": 35900 }, { "epoch": 0.6521502251925033, "grad_norm": 0.726864559833979, "learning_rate": 5.498365887285954e-06, "loss": 0.6079, "step": 35910 }, { "epoch": 0.6523318320499782, "grad_norm": 0.7909228511606402, "learning_rate": 5.493220572347424e-06, "loss": 0.6192, "step": 35920 }, { "epoch": 0.6525134389074532, "grad_norm": 0.7175850642949104, "learning_rate": 5.488076754216114e-06, "loss": 0.6254, "step": 35930 }, { "epoch": 0.6526950457649281, "grad_norm": 0.7856202023538403, "learning_rate": 5.482934434600414e-06, "loss": 0.6218, "step": 35940 }, { "epoch": 0.652876652622403, "grad_norm": 0.7705860188299007, "learning_rate": 5.477793615208202e-06, "loss": 0.6242, "step": 35950 }, { "epoch": 0.653058259479878, "grad_norm": 0.7574932495525696, "learning_rate": 5.472654297746871e-06, "loss": 0.5983, "step": 35960 }, { "epoch": 0.6532398663373529, "grad_norm": 0.7756849075474205, "learning_rate": 5.467516483923303e-06, "loss": 0.6092, "step": 35970 }, { "epoch": 0.6534214731948278, "grad_norm": 0.7373163769327402, "learning_rate": 5.462380175443892e-06, "loss": 0.6064, "step": 35980 }, { "epoch": 0.6536030800523027, "grad_norm": 0.7541759964672136, "learning_rate": 5.457245374014525e-06, "loss": 0.6045, "step": 35990 }, { "epoch": 0.6537846869097778, "grad_norm": 0.7426616698918884, "learning_rate": 5.452112081340586e-06, "loss": 0.6107, "step": 36000 }, { "epoch": 0.6539662937672527, "grad_norm": 0.7513490659471523, "learning_rate": 5.446980299126967e-06, "loss": 0.6134, "step": 36010 }, { "epoch": 0.6541479006247276, "grad_norm": 0.7756404754339672, "learning_rate": 5.441850029078048e-06, "loss": 0.6245, "step": 36020 }, { "epoch": 0.6543295074822025, "grad_norm": 0.7467445587758684, "learning_rate": 5.436721272897721e-06, "loss": 0.6186, "step": 36030 }, { "epoch": 0.6545111143396775, "grad_norm": 0.7602644373302982, "learning_rate": 5.431594032289356e-06, "loss": 0.6084, "step": 36040 }, { "epoch": 0.6546927211971524, "grad_norm": 0.7878216351917248, "learning_rate": 5.426468308955839e-06, "loss": 0.6144, "step": 36050 }, { "epoch": 0.6548743280546273, "grad_norm": 0.7934202485321212, "learning_rate": 5.421344104599535e-06, "loss": 0.6208, "step": 36060 }, { "epoch": 0.6550559349121022, "grad_norm": 0.8006369543899604, "learning_rate": 5.416221420922321e-06, "loss": 0.623, "step": 36070 }, { "epoch": 0.6552375417695773, "grad_norm": 0.7644546654954447, "learning_rate": 5.41110025962555e-06, "loss": 0.609, "step": 36080 }, { "epoch": 0.6554191486270522, "grad_norm": 0.7406254084687209, "learning_rate": 5.405980622410091e-06, "loss": 0.6187, "step": 36090 }, { "epoch": 0.6556007554845271, "grad_norm": 0.7356966852999621, "learning_rate": 5.40086251097629e-06, "loss": 0.6125, "step": 36100 }, { "epoch": 0.655782362342002, "grad_norm": 0.80026215754789, "learning_rate": 5.395745927023987e-06, "loss": 0.6094, "step": 36110 }, { "epoch": 0.655963969199477, "grad_norm": 0.7703272828446815, "learning_rate": 5.390630872252532e-06, "loss": 0.6236, "step": 36120 }, { "epoch": 0.6561455760569519, "grad_norm": 0.7719526903447744, "learning_rate": 5.385517348360741e-06, "loss": 0.6086, "step": 36130 }, { "epoch": 0.6563271829144268, "grad_norm": 0.7698133450499337, "learning_rate": 5.380405357046947e-06, "loss": 0.6004, "step": 36140 }, { "epoch": 0.6565087897719017, "grad_norm": 0.7875916976296384, "learning_rate": 5.375294900008954e-06, "loss": 0.591, "step": 36150 }, { "epoch": 0.6566903966293767, "grad_norm": 0.7481092062275017, "learning_rate": 5.370185978944071e-06, "loss": 0.6106, "step": 36160 }, { "epoch": 0.6568720034868517, "grad_norm": 0.7533828533669964, "learning_rate": 5.365078595549083e-06, "loss": 0.6067, "step": 36170 }, { "epoch": 0.6570536103443266, "grad_norm": 0.7359759414340461, "learning_rate": 5.359972751520282e-06, "loss": 0.6153, "step": 36180 }, { "epoch": 0.6572352172018016, "grad_norm": 0.7243056555760972, "learning_rate": 5.3548684485534285e-06, "loss": 0.625, "step": 36190 }, { "epoch": 0.6574168240592765, "grad_norm": 0.7809978611742506, "learning_rate": 5.3497656883437896e-06, "loss": 0.6136, "step": 36200 }, { "epoch": 0.6575984309167514, "grad_norm": 0.7378114196097976, "learning_rate": 5.344664472586105e-06, "loss": 0.6017, "step": 36210 }, { "epoch": 0.6577800377742263, "grad_norm": 0.7467448905635798, "learning_rate": 5.339564802974615e-06, "loss": 0.5935, "step": 36220 }, { "epoch": 0.6579616446317013, "grad_norm": 0.7940542670827594, "learning_rate": 5.33446668120303e-06, "loss": 0.6153, "step": 36230 }, { "epoch": 0.6581432514891762, "grad_norm": 0.7724698878230501, "learning_rate": 5.3293701089645644e-06, "loss": 0.6078, "step": 36240 }, { "epoch": 0.6583248583466512, "grad_norm": 0.737544905947672, "learning_rate": 5.324275087951909e-06, "loss": 0.5961, "step": 36250 }, { "epoch": 0.6585064652041261, "grad_norm": 0.7437274831939343, "learning_rate": 5.319181619857234e-06, "loss": 0.6202, "step": 36260 }, { "epoch": 0.6586880720616011, "grad_norm": 0.7508785073104932, "learning_rate": 5.314089706372208e-06, "loss": 0.614, "step": 36270 }, { "epoch": 0.658869678919076, "grad_norm": 0.7519619494462746, "learning_rate": 5.3089993491879655e-06, "loss": 0.6197, "step": 36280 }, { "epoch": 0.6590512857765509, "grad_norm": 0.7649410204706264, "learning_rate": 5.303910549995143e-06, "loss": 0.6222, "step": 36290 }, { "epoch": 0.6592328926340258, "grad_norm": 0.769106357686693, "learning_rate": 5.298823310483845e-06, "loss": 0.604, "step": 36300 }, { "epoch": 0.6594144994915008, "grad_norm": 0.7526011013140522, "learning_rate": 5.29373763234366e-06, "loss": 0.6094, "step": 36310 }, { "epoch": 0.6595961063489757, "grad_norm": 0.713705278352968, "learning_rate": 5.288653517263669e-06, "loss": 0.6107, "step": 36320 }, { "epoch": 0.6597777132064506, "grad_norm": 0.7720964495252961, "learning_rate": 5.283570966932416e-06, "loss": 0.6099, "step": 36330 }, { "epoch": 0.6599593200639257, "grad_norm": 0.742409294778066, "learning_rate": 5.278489983037946e-06, "loss": 0.6003, "step": 36340 }, { "epoch": 0.6601409269214006, "grad_norm": 0.7330213606077108, "learning_rate": 5.273410567267765e-06, "loss": 0.5926, "step": 36350 }, { "epoch": 0.6603225337788755, "grad_norm": 0.7550166705845262, "learning_rate": 5.268332721308873e-06, "loss": 0.6057, "step": 36360 }, { "epoch": 0.6605041406363504, "grad_norm": 0.7365889075517789, "learning_rate": 5.263256446847733e-06, "loss": 0.6208, "step": 36370 }, { "epoch": 0.6606857474938254, "grad_norm": 0.703387937986498, "learning_rate": 5.258181745570306e-06, "loss": 0.5888, "step": 36380 }, { "epoch": 0.6608673543513003, "grad_norm": 0.7429054341336863, "learning_rate": 5.253108619162011e-06, "loss": 0.6148, "step": 36390 }, { "epoch": 0.6610489612087752, "grad_norm": 0.7486100450713987, "learning_rate": 5.24803706930775e-06, "loss": 0.608, "step": 36400 }, { "epoch": 0.6612305680662501, "grad_norm": 0.7570297054294352, "learning_rate": 5.242967097691913e-06, "loss": 0.6135, "step": 36410 }, { "epoch": 0.6614121749237252, "grad_norm": 0.7756744151634911, "learning_rate": 5.23789870599835e-06, "loss": 0.5938, "step": 36420 }, { "epoch": 0.6615937817812001, "grad_norm": 0.7416925652294662, "learning_rate": 5.232831895910397e-06, "loss": 0.6107, "step": 36430 }, { "epoch": 0.661775388638675, "grad_norm": 0.7429531084633985, "learning_rate": 5.227766669110854e-06, "loss": 0.6014, "step": 36440 }, { "epoch": 0.66195699549615, "grad_norm": 0.7587392061726737, "learning_rate": 5.222703027282011e-06, "loss": 0.6073, "step": 36450 }, { "epoch": 0.6621386023536249, "grad_norm": 0.7340517568119946, "learning_rate": 5.217640972105613e-06, "loss": 0.6214, "step": 36460 }, { "epoch": 0.6623202092110998, "grad_norm": 0.7546078822347828, "learning_rate": 5.212580505262895e-06, "loss": 0.6243, "step": 36470 }, { "epoch": 0.6625018160685747, "grad_norm": 0.7512961441743727, "learning_rate": 5.2075216284345506e-06, "loss": 0.6174, "step": 36480 }, { "epoch": 0.6626834229260496, "grad_norm": 0.7499914552688636, "learning_rate": 5.202464343300757e-06, "loss": 0.6004, "step": 36490 }, { "epoch": 0.6628650297835246, "grad_norm": 0.7856119086719601, "learning_rate": 5.1974086515411555e-06, "loss": 0.6147, "step": 36500 }, { "epoch": 0.6630466366409996, "grad_norm": 0.7536660975659257, "learning_rate": 5.192354554834855e-06, "loss": 0.6203, "step": 36510 }, { "epoch": 0.6632282434984745, "grad_norm": 0.7661643804179131, "learning_rate": 5.187302054860448e-06, "loss": 0.6039, "step": 36520 }, { "epoch": 0.6634098503559495, "grad_norm": 0.7375534597018792, "learning_rate": 5.182251153295981e-06, "loss": 0.6194, "step": 36530 }, { "epoch": 0.6635914572134244, "grad_norm": 0.7539512761232893, "learning_rate": 5.177201851818983e-06, "loss": 0.6111, "step": 36540 }, { "epoch": 0.6637730640708993, "grad_norm": 0.7676391799282325, "learning_rate": 5.172154152106439e-06, "loss": 0.6114, "step": 36550 }, { "epoch": 0.6639546709283742, "grad_norm": 0.7482301020959566, "learning_rate": 5.1671080558348155e-06, "loss": 0.6224, "step": 36560 }, { "epoch": 0.6641362777858492, "grad_norm": 0.7460231793781124, "learning_rate": 5.162063564680032e-06, "loss": 0.611, "step": 36570 }, { "epoch": 0.6643178846433241, "grad_norm": 0.7435410452667458, "learning_rate": 5.157020680317491e-06, "loss": 0.6227, "step": 36580 }, { "epoch": 0.6644994915007991, "grad_norm": 0.7956062520537461, "learning_rate": 5.151979404422045e-06, "loss": 0.623, "step": 36590 }, { "epoch": 0.664681098358274, "grad_norm": 0.7566739107861266, "learning_rate": 5.146939738668027e-06, "loss": 0.611, "step": 36600 }, { "epoch": 0.664862705215749, "grad_norm": 0.7185310349450444, "learning_rate": 5.1419016847292204e-06, "loss": 0.6149, "step": 36610 }, { "epoch": 0.6650443120732239, "grad_norm": 0.7715553030494807, "learning_rate": 5.1368652442788894e-06, "loss": 0.6202, "step": 36620 }, { "epoch": 0.6652259189306988, "grad_norm": 0.7527799913223882, "learning_rate": 5.131830418989745e-06, "loss": 0.609, "step": 36630 }, { "epoch": 0.6654075257881737, "grad_norm": 0.7460107878207526, "learning_rate": 5.126797210533978e-06, "loss": 0.6048, "step": 36640 }, { "epoch": 0.6655891326456487, "grad_norm": 0.7336449649764959, "learning_rate": 5.1217656205832364e-06, "loss": 0.601, "step": 36650 }, { "epoch": 0.6657707395031236, "grad_norm": 0.7687095065733943, "learning_rate": 5.116735650808622e-06, "loss": 0.6179, "step": 36660 }, { "epoch": 0.6659523463605985, "grad_norm": 0.7541440763037021, "learning_rate": 5.111707302880713e-06, "loss": 0.6029, "step": 36670 }, { "epoch": 0.6661339532180736, "grad_norm": 0.7639032681760931, "learning_rate": 5.106680578469534e-06, "loss": 0.604, "step": 36680 }, { "epoch": 0.6663155600755485, "grad_norm": 0.7915960139509152, "learning_rate": 5.101655479244587e-06, "loss": 0.614, "step": 36690 }, { "epoch": 0.6664971669330234, "grad_norm": 0.7535875330171262, "learning_rate": 5.096632006874822e-06, "loss": 0.6219, "step": 36700 }, { "epoch": 0.6666787737904983, "grad_norm": 0.7665339904551687, "learning_rate": 5.091610163028646e-06, "loss": 0.6051, "step": 36710 }, { "epoch": 0.6668603806479733, "grad_norm": 0.7626055590893218, "learning_rate": 5.086589949373941e-06, "loss": 0.6026, "step": 36720 }, { "epoch": 0.6670419875054482, "grad_norm": 0.7409823221245162, "learning_rate": 5.081571367578029e-06, "loss": 0.6089, "step": 36730 }, { "epoch": 0.6672235943629231, "grad_norm": 0.7546352782487906, "learning_rate": 5.0765544193077065e-06, "loss": 0.6152, "step": 36740 }, { "epoch": 0.667405201220398, "grad_norm": 0.8002846906665351, "learning_rate": 5.071539106229213e-06, "loss": 0.6145, "step": 36750 }, { "epoch": 0.6675868080778731, "grad_norm": 0.7340904255007377, "learning_rate": 5.066525430008259e-06, "loss": 0.6073, "step": 36760 }, { "epoch": 0.667768414935348, "grad_norm": 0.7647055024324004, "learning_rate": 5.0615133923099955e-06, "loss": 0.6226, "step": 36770 }, { "epoch": 0.6679500217928229, "grad_norm": 0.7657838712743321, "learning_rate": 5.056502994799046e-06, "loss": 0.5878, "step": 36780 }, { "epoch": 0.6681316286502978, "grad_norm": 0.7889136992044349, "learning_rate": 5.0514942391394765e-06, "loss": 0.6155, "step": 36790 }, { "epoch": 0.6683132355077728, "grad_norm": 0.761363734938244, "learning_rate": 5.0464871269948105e-06, "loss": 0.612, "step": 36800 }, { "epoch": 0.6684948423652477, "grad_norm": 0.7524698720922834, "learning_rate": 5.041481660028033e-06, "loss": 0.6154, "step": 36810 }, { "epoch": 0.6686764492227226, "grad_norm": 0.7728336900904171, "learning_rate": 5.036477839901572e-06, "loss": 0.621, "step": 36820 }, { "epoch": 0.6688580560801975, "grad_norm": 0.7772057019944373, "learning_rate": 5.031475668277319e-06, "loss": 0.6151, "step": 36830 }, { "epoch": 0.6690396629376725, "grad_norm": 0.7805637591002029, "learning_rate": 5.026475146816605e-06, "loss": 0.6129, "step": 36840 }, { "epoch": 0.6692212697951475, "grad_norm": 0.7597956232088521, "learning_rate": 5.021476277180229e-06, "loss": 0.6224, "step": 36850 }, { "epoch": 0.6694028766526224, "grad_norm": 0.7682320540054652, "learning_rate": 5.016479061028425e-06, "loss": 0.6023, "step": 36860 }, { "epoch": 0.6695844835100974, "grad_norm": 0.7922073171486643, "learning_rate": 5.011483500020893e-06, "loss": 0.6184, "step": 36870 }, { "epoch": 0.6697660903675723, "grad_norm": 0.7228424461809553, "learning_rate": 5.00648959581677e-06, "loss": 0.6081, "step": 36880 }, { "epoch": 0.6699476972250472, "grad_norm": 0.7960808695396819, "learning_rate": 5.001497350074654e-06, "loss": 0.6036, "step": 36890 }, { "epoch": 0.6701293040825221, "grad_norm": 0.7536164772337701, "learning_rate": 4.996506764452586e-06, "loss": 0.608, "step": 36900 }, { "epoch": 0.6703109109399971, "grad_norm": 0.7958544886032208, "learning_rate": 4.99151784060805e-06, "loss": 0.5959, "step": 36910 }, { "epoch": 0.670492517797472, "grad_norm": 0.7592378151019257, "learning_rate": 4.986530580197995e-06, "loss": 0.6159, "step": 36920 }, { "epoch": 0.670674124654947, "grad_norm": 0.71832119485657, "learning_rate": 4.981544984878797e-06, "loss": 0.599, "step": 36930 }, { "epoch": 0.670855731512422, "grad_norm": 0.7220857156744599, "learning_rate": 4.976561056306298e-06, "loss": 0.5977, "step": 36940 }, { "epoch": 0.6710373383698969, "grad_norm": 0.7841810857159055, "learning_rate": 4.97157879613577e-06, "loss": 0.6165, "step": 36950 }, { "epoch": 0.6712189452273718, "grad_norm": 0.7672863526620863, "learning_rate": 4.966598206021947e-06, "loss": 0.6038, "step": 36960 }, { "epoch": 0.6714005520848467, "grad_norm": 0.7713790100243609, "learning_rate": 4.961619287618992e-06, "loss": 0.6221, "step": 36970 }, { "epoch": 0.6715821589423216, "grad_norm": 0.7591977775083167, "learning_rate": 4.956642042580526e-06, "loss": 0.6092, "step": 36980 }, { "epoch": 0.6717637657997966, "grad_norm": 0.7987910725137298, "learning_rate": 4.951666472559604e-06, "loss": 0.6169, "step": 36990 }, { "epoch": 0.6719453726572715, "grad_norm": 0.7599927293206188, "learning_rate": 4.946692579208736e-06, "loss": 0.6168, "step": 37000 }, { "epoch": 0.6721269795147464, "grad_norm": 0.7795291851817769, "learning_rate": 4.941720364179862e-06, "loss": 0.6127, "step": 37010 }, { "epoch": 0.6723085863722215, "grad_norm": 0.7638309460510547, "learning_rate": 4.936749829124377e-06, "loss": 0.6155, "step": 37020 }, { "epoch": 0.6724901932296964, "grad_norm": 0.7751043510554114, "learning_rate": 4.931780975693108e-06, "loss": 0.6078, "step": 37030 }, { "epoch": 0.6726718000871713, "grad_norm": 0.7289229250970529, "learning_rate": 4.926813805536329e-06, "loss": 0.6011, "step": 37040 }, { "epoch": 0.6728534069446462, "grad_norm": 0.7670931715285316, "learning_rate": 4.921848320303757e-06, "loss": 0.5981, "step": 37050 }, { "epoch": 0.6730350138021212, "grad_norm": 0.7677018157006793, "learning_rate": 4.916884521644542e-06, "loss": 0.6063, "step": 37060 }, { "epoch": 0.6732166206595961, "grad_norm": 0.7603063450352847, "learning_rate": 4.911922411207281e-06, "loss": 0.6136, "step": 37070 }, { "epoch": 0.673398227517071, "grad_norm": 0.7523889111887814, "learning_rate": 4.906961990640005e-06, "loss": 0.6111, "step": 37080 }, { "epoch": 0.6735798343745459, "grad_norm": 0.7488339895809909, "learning_rate": 4.902003261590188e-06, "loss": 0.6282, "step": 37090 }, { "epoch": 0.673761441232021, "grad_norm": 0.7316568869322341, "learning_rate": 4.897046225704741e-06, "loss": 0.6134, "step": 37100 }, { "epoch": 0.6739430480894959, "grad_norm": 0.779689728656329, "learning_rate": 4.892090884630007e-06, "loss": 0.6063, "step": 37110 }, { "epoch": 0.6741246549469708, "grad_norm": 0.7558814866139707, "learning_rate": 4.887137240011778e-06, "loss": 0.6063, "step": 37120 }, { "epoch": 0.6743062618044458, "grad_norm": 0.7806603465915771, "learning_rate": 4.882185293495267e-06, "loss": 0.6156, "step": 37130 }, { "epoch": 0.6744878686619207, "grad_norm": 0.743453572355385, "learning_rate": 4.8772350467251415e-06, "loss": 0.605, "step": 37140 }, { "epoch": 0.6746694755193956, "grad_norm": 0.7707961069493929, "learning_rate": 4.872286501345487e-06, "loss": 0.598, "step": 37150 }, { "epoch": 0.6748510823768705, "grad_norm": 0.7551785837449719, "learning_rate": 4.8673396589998365e-06, "loss": 0.6052, "step": 37160 }, { "epoch": 0.6750326892343455, "grad_norm": 0.7671487683216606, "learning_rate": 4.862394521331148e-06, "loss": 0.609, "step": 37170 }, { "epoch": 0.6752142960918204, "grad_norm": 0.7728910612389385, "learning_rate": 4.8574510899818226e-06, "loss": 0.6078, "step": 37180 }, { "epoch": 0.6753959029492954, "grad_norm": 0.7513751729534175, "learning_rate": 4.852509366593685e-06, "loss": 0.612, "step": 37190 }, { "epoch": 0.6755775098067703, "grad_norm": 0.7402598982863504, "learning_rate": 4.847569352808004e-06, "loss": 0.6073, "step": 37200 }, { "epoch": 0.6757591166642453, "grad_norm": 0.747413947371438, "learning_rate": 4.842631050265468e-06, "loss": 0.6041, "step": 37210 }, { "epoch": 0.6759407235217202, "grad_norm": 0.7435308041201959, "learning_rate": 4.837694460606204e-06, "loss": 0.6094, "step": 37220 }, { "epoch": 0.6761223303791951, "grad_norm": 0.7477567601905951, "learning_rate": 4.832759585469773e-06, "loss": 0.6128, "step": 37230 }, { "epoch": 0.67630393723667, "grad_norm": 0.7432234686605631, "learning_rate": 4.8278264264951565e-06, "loss": 0.6111, "step": 37240 }, { "epoch": 0.676485544094145, "grad_norm": 0.7566258379620581, "learning_rate": 4.822894985320781e-06, "loss": 0.5958, "step": 37250 }, { "epoch": 0.6766671509516199, "grad_norm": 0.726083998117346, "learning_rate": 4.817965263584485e-06, "loss": 0.6119, "step": 37260 }, { "epoch": 0.6768487578090949, "grad_norm": 0.7866894941363168, "learning_rate": 4.813037262923552e-06, "loss": 0.6096, "step": 37270 }, { "epoch": 0.6770303646665699, "grad_norm": 0.7752429676116702, "learning_rate": 4.808110984974681e-06, "loss": 0.6098, "step": 37280 }, { "epoch": 0.6772119715240448, "grad_norm": 0.8147784793563952, "learning_rate": 4.80318643137401e-06, "loss": 0.595, "step": 37290 }, { "epoch": 0.6773935783815197, "grad_norm": 0.7503160387539992, "learning_rate": 4.798263603757097e-06, "loss": 0.6273, "step": 37300 }, { "epoch": 0.6775751852389946, "grad_norm": 0.7370471133388882, "learning_rate": 4.793342503758923e-06, "loss": 0.6097, "step": 37310 }, { "epoch": 0.6777567920964696, "grad_norm": 0.7474688901555923, "learning_rate": 4.78842313301391e-06, "loss": 0.6109, "step": 37320 }, { "epoch": 0.6779383989539445, "grad_norm": 0.7496106964518592, "learning_rate": 4.783505493155887e-06, "loss": 0.6072, "step": 37330 }, { "epoch": 0.6781200058114194, "grad_norm": 0.7452088332956401, "learning_rate": 4.778589585818127e-06, "loss": 0.6114, "step": 37340 }, { "epoch": 0.6783016126688943, "grad_norm": 0.764880317924836, "learning_rate": 4.7736754126333095e-06, "loss": 0.6216, "step": 37350 }, { "epoch": 0.6784832195263694, "grad_norm": 0.7647622079258235, "learning_rate": 4.768762975233555e-06, "loss": 0.6173, "step": 37360 }, { "epoch": 0.6786648263838443, "grad_norm": 0.7210693044906051, "learning_rate": 4.7638522752503914e-06, "loss": 0.6039, "step": 37370 }, { "epoch": 0.6788464332413192, "grad_norm": 0.7749521371498277, "learning_rate": 4.758943314314786e-06, "loss": 0.6027, "step": 37380 }, { "epoch": 0.6790280400987941, "grad_norm": 0.7161340583854637, "learning_rate": 4.75403609405711e-06, "loss": 0.6083, "step": 37390 }, { "epoch": 0.6792096469562691, "grad_norm": 0.753044490682021, "learning_rate": 4.749130616107176e-06, "loss": 0.5965, "step": 37400 }, { "epoch": 0.679391253813744, "grad_norm": 0.7548633175195729, "learning_rate": 4.7442268820941995e-06, "loss": 0.6182, "step": 37410 }, { "epoch": 0.6795728606712189, "grad_norm": 0.7666354064776979, "learning_rate": 4.739324893646834e-06, "loss": 0.6184, "step": 37420 }, { "epoch": 0.6797544675286938, "grad_norm": 0.7314633653461934, "learning_rate": 4.7344246523931385e-06, "loss": 0.63, "step": 37430 }, { "epoch": 0.6799360743861689, "grad_norm": 0.7374579369980423, "learning_rate": 4.729526159960599e-06, "loss": 0.6077, "step": 37440 }, { "epoch": 0.6801176812436438, "grad_norm": 0.743645158893927, "learning_rate": 4.724629417976127e-06, "loss": 0.6117, "step": 37450 }, { "epoch": 0.6802992881011187, "grad_norm": 0.7894255293966708, "learning_rate": 4.719734428066034e-06, "loss": 0.6091, "step": 37460 }, { "epoch": 0.6804808949585937, "grad_norm": 0.7212887886061086, "learning_rate": 4.714841191856072e-06, "loss": 0.6081, "step": 37470 }, { "epoch": 0.6806625018160686, "grad_norm": 0.7422306261838613, "learning_rate": 4.709949710971391e-06, "loss": 0.6013, "step": 37480 }, { "epoch": 0.6808441086735435, "grad_norm": 0.7407262877608465, "learning_rate": 4.705059987036573e-06, "loss": 0.6201, "step": 37490 }, { "epoch": 0.6810257155310184, "grad_norm": 0.7324719751099857, "learning_rate": 4.700172021675607e-06, "loss": 0.5973, "step": 37500 }, { "epoch": 0.6812073223884934, "grad_norm": 0.770637543258817, "learning_rate": 4.6952858165119e-06, "loss": 0.624, "step": 37510 }, { "epoch": 0.6813889292459683, "grad_norm": 0.7691189689951451, "learning_rate": 4.690401373168277e-06, "loss": 0.6292, "step": 37520 }, { "epoch": 0.6815705361034433, "grad_norm": 0.7648641669176355, "learning_rate": 4.685518693266975e-06, "loss": 0.6107, "step": 37530 }, { "epoch": 0.6817521429609182, "grad_norm": 0.7532989493916101, "learning_rate": 4.68063777842965e-06, "loss": 0.6057, "step": 37540 }, { "epoch": 0.6819337498183932, "grad_norm": 0.8091750273533114, "learning_rate": 4.675758630277362e-06, "loss": 0.6124, "step": 37550 }, { "epoch": 0.6821153566758681, "grad_norm": 0.7542769726939528, "learning_rate": 4.6708812504305985e-06, "loss": 0.6268, "step": 37560 }, { "epoch": 0.682296963533343, "grad_norm": 0.748237662426182, "learning_rate": 4.666005640509244e-06, "loss": 0.5998, "step": 37570 }, { "epoch": 0.6824785703908179, "grad_norm": 0.7717556946528243, "learning_rate": 4.66113180213261e-06, "loss": 0.6149, "step": 37580 }, { "epoch": 0.6826601772482929, "grad_norm": 0.7393076946140711, "learning_rate": 4.656259736919407e-06, "loss": 0.6148, "step": 37590 }, { "epoch": 0.6828417841057678, "grad_norm": 0.7625466491838826, "learning_rate": 4.651389446487767e-06, "loss": 0.6034, "step": 37600 }, { "epoch": 0.6830233909632428, "grad_norm": 0.7634765138984785, "learning_rate": 4.646520932455227e-06, "loss": 0.6128, "step": 37610 }, { "epoch": 0.6832049978207178, "grad_norm": 0.7819331695568886, "learning_rate": 4.641654196438729e-06, "loss": 0.6042, "step": 37620 }, { "epoch": 0.6833866046781927, "grad_norm": 0.7676333076155288, "learning_rate": 4.636789240054636e-06, "loss": 0.6103, "step": 37630 }, { "epoch": 0.6835682115356676, "grad_norm": 0.7482736861996334, "learning_rate": 4.631926064918712e-06, "loss": 0.6086, "step": 37640 }, { "epoch": 0.6837498183931425, "grad_norm": 0.7333397365867906, "learning_rate": 4.627064672646134e-06, "loss": 0.6105, "step": 37650 }, { "epoch": 0.6839314252506175, "grad_norm": 0.7392263515381137, "learning_rate": 4.622205064851481e-06, "loss": 0.5962, "step": 37660 }, { "epoch": 0.6841130321080924, "grad_norm": 0.7657631572873119, "learning_rate": 4.617347243148745e-06, "loss": 0.6158, "step": 37670 }, { "epoch": 0.6842946389655673, "grad_norm": 0.7302193324213482, "learning_rate": 4.612491209151321e-06, "loss": 0.6057, "step": 37680 }, { "epoch": 0.6844762458230422, "grad_norm": 0.7454724622717771, "learning_rate": 4.6076369644720154e-06, "loss": 0.6003, "step": 37690 }, { "epoch": 0.6846578526805173, "grad_norm": 0.7693508614354543, "learning_rate": 4.602784510723035e-06, "loss": 0.6014, "step": 37700 }, { "epoch": 0.6848394595379922, "grad_norm": 0.781302031459357, "learning_rate": 4.5979338495159895e-06, "loss": 0.63, "step": 37710 }, { "epoch": 0.6850210663954671, "grad_norm": 0.7809273150086193, "learning_rate": 4.593084982461904e-06, "loss": 0.6111, "step": 37720 }, { "epoch": 0.685202673252942, "grad_norm": 0.7416294332524904, "learning_rate": 4.588237911171194e-06, "loss": 0.6202, "step": 37730 }, { "epoch": 0.685384280110417, "grad_norm": 0.7752750395386059, "learning_rate": 4.583392637253693e-06, "loss": 0.6112, "step": 37740 }, { "epoch": 0.6855658869678919, "grad_norm": 0.7402334186876088, "learning_rate": 4.578549162318624e-06, "loss": 0.6007, "step": 37750 }, { "epoch": 0.6857474938253668, "grad_norm": 0.7630592933741744, "learning_rate": 4.573707487974625e-06, "loss": 0.6102, "step": 37760 }, { "epoch": 0.6859291006828417, "grad_norm": 0.725179638289622, "learning_rate": 4.568867615829721e-06, "loss": 0.6036, "step": 37770 }, { "epoch": 0.6861107075403168, "grad_norm": 0.7841504223531633, "learning_rate": 4.564029547491357e-06, "loss": 0.6033, "step": 37780 }, { "epoch": 0.6862923143977917, "grad_norm": 0.755148376591439, "learning_rate": 4.55919328456636e-06, "loss": 0.5986, "step": 37790 }, { "epoch": 0.6864739212552666, "grad_norm": 0.7388394762066866, "learning_rate": 4.554358828660974e-06, "loss": 0.6058, "step": 37800 }, { "epoch": 0.6866555281127416, "grad_norm": 0.7574068740869769, "learning_rate": 4.549526181380829e-06, "loss": 0.6017, "step": 37810 }, { "epoch": 0.6868371349702165, "grad_norm": 0.7416315491439343, "learning_rate": 4.544695344330967e-06, "loss": 0.5985, "step": 37820 }, { "epoch": 0.6870187418276914, "grad_norm": 0.7312830690509389, "learning_rate": 4.539866319115815e-06, "loss": 0.611, "step": 37830 }, { "epoch": 0.6872003486851663, "grad_norm": 0.7687204268876618, "learning_rate": 4.53503910733921e-06, "loss": 0.6136, "step": 37840 }, { "epoch": 0.6873819555426413, "grad_norm": 0.7706548225632187, "learning_rate": 4.5302137106043845e-06, "loss": 0.6149, "step": 37850 }, { "epoch": 0.6875635624001162, "grad_norm": 0.8077649611393644, "learning_rate": 4.52539013051396e-06, "loss": 0.6191, "step": 37860 }, { "epoch": 0.6877451692575912, "grad_norm": 0.7242513039211056, "learning_rate": 4.5205683686699675e-06, "loss": 0.6064, "step": 37870 }, { "epoch": 0.6879267761150661, "grad_norm": 0.7354445497262871, "learning_rate": 4.5157484266738206e-06, "loss": 0.6049, "step": 37880 }, { "epoch": 0.6881083829725411, "grad_norm": 0.8133840821644105, "learning_rate": 4.5109303061263425e-06, "loss": 0.6177, "step": 37890 }, { "epoch": 0.688289989830016, "grad_norm": 0.7297387937767978, "learning_rate": 4.506114008627739e-06, "loss": 0.5975, "step": 37900 }, { "epoch": 0.6884715966874909, "grad_norm": 0.7448172510483413, "learning_rate": 4.501299535777613e-06, "loss": 0.6112, "step": 37910 }, { "epoch": 0.6886532035449658, "grad_norm": 0.7282575476162813, "learning_rate": 4.496486889174971e-06, "loss": 0.6148, "step": 37920 }, { "epoch": 0.6888348104024408, "grad_norm": 0.8262033148001544, "learning_rate": 4.491676070418198e-06, "loss": 0.6244, "step": 37930 }, { "epoch": 0.6890164172599157, "grad_norm": 0.7610763459082669, "learning_rate": 4.486867081105089e-06, "loss": 0.611, "step": 37940 }, { "epoch": 0.6891980241173906, "grad_norm": 0.763512065740236, "learning_rate": 4.482059922832813e-06, "loss": 0.6049, "step": 37950 }, { "epoch": 0.6893796309748657, "grad_norm": 0.747339749011707, "learning_rate": 4.477254597197949e-06, "loss": 0.6061, "step": 37960 }, { "epoch": 0.6895612378323406, "grad_norm": 0.7315035373694573, "learning_rate": 4.472451105796449e-06, "loss": 0.6052, "step": 37970 }, { "epoch": 0.6897428446898155, "grad_norm": 0.753276823573675, "learning_rate": 4.467649450223674e-06, "loss": 0.6235, "step": 37980 }, { "epoch": 0.6899244515472904, "grad_norm": 0.7637442529179773, "learning_rate": 4.46284963207436e-06, "loss": 0.6075, "step": 37990 }, { "epoch": 0.6901060584047654, "grad_norm": 0.7786520383491323, "learning_rate": 4.4580516529426444e-06, "loss": 0.6158, "step": 38000 }, { "epoch": 0.6902876652622403, "grad_norm": 0.7459974478416213, "learning_rate": 4.4532555144220464e-06, "loss": 0.6098, "step": 38010 }, { "epoch": 0.6904692721197152, "grad_norm": 0.7825639548947807, "learning_rate": 4.448461218105472e-06, "loss": 0.6172, "step": 38020 }, { "epoch": 0.6906508789771901, "grad_norm": 0.7486694156721976, "learning_rate": 4.443668765585228e-06, "loss": 0.6147, "step": 38030 }, { "epoch": 0.6908324858346652, "grad_norm": 0.7497931749663758, "learning_rate": 4.438878158452991e-06, "loss": 0.6138, "step": 38040 }, { "epoch": 0.6910140926921401, "grad_norm": 0.7691076285545073, "learning_rate": 4.434089398299843e-06, "loss": 0.6095, "step": 38050 }, { "epoch": 0.691195699549615, "grad_norm": 0.6941864300371954, "learning_rate": 4.429302486716236e-06, "loss": 0.6158, "step": 38060 }, { "epoch": 0.6913773064070899, "grad_norm": 0.8025872197520797, "learning_rate": 4.424517425292023e-06, "loss": 0.6031, "step": 38070 }, { "epoch": 0.6915589132645649, "grad_norm": 0.7644034255109294, "learning_rate": 4.419734215616428e-06, "loss": 0.6088, "step": 38080 }, { "epoch": 0.6917405201220398, "grad_norm": 0.7913969152539738, "learning_rate": 4.414952859278074e-06, "loss": 0.6287, "step": 38090 }, { "epoch": 0.6919221269795147, "grad_norm": 0.8357419504489796, "learning_rate": 4.410173357864957e-06, "loss": 0.6145, "step": 38100 }, { "epoch": 0.6921037338369896, "grad_norm": 0.7677925279744706, "learning_rate": 4.405395712964461e-06, "loss": 0.6127, "step": 38110 }, { "epoch": 0.6922853406944646, "grad_norm": 0.7562291176097277, "learning_rate": 4.400619926163358e-06, "loss": 0.6202, "step": 38120 }, { "epoch": 0.6924669475519396, "grad_norm": 0.7584732351662697, "learning_rate": 4.395845999047794e-06, "loss": 0.6123, "step": 38130 }, { "epoch": 0.6926485544094145, "grad_norm": 0.7519998943088299, "learning_rate": 4.3910739332033095e-06, "loss": 0.6095, "step": 38140 }, { "epoch": 0.6928301612668895, "grad_norm": 0.803220390926886, "learning_rate": 4.386303730214809e-06, "loss": 0.6065, "step": 38150 }, { "epoch": 0.6930117681243644, "grad_norm": 0.7814836373541316, "learning_rate": 4.3815353916666e-06, "loss": 0.6079, "step": 38160 }, { "epoch": 0.6931933749818393, "grad_norm": 0.7520726437108981, "learning_rate": 4.376768919142351e-06, "loss": 0.6089, "step": 38170 }, { "epoch": 0.6933749818393142, "grad_norm": 0.7742631942636818, "learning_rate": 4.372004314225127e-06, "loss": 0.6094, "step": 38180 }, { "epoch": 0.6935565886967892, "grad_norm": 0.749236136554567, "learning_rate": 4.367241578497357e-06, "loss": 0.6061, "step": 38190 }, { "epoch": 0.6937381955542641, "grad_norm": 0.7520008617505071, "learning_rate": 4.362480713540864e-06, "loss": 0.5974, "step": 38200 }, { "epoch": 0.6939198024117391, "grad_norm": 0.7819393689452182, "learning_rate": 4.357721720936839e-06, "loss": 0.6144, "step": 38210 }, { "epoch": 0.694101409269214, "grad_norm": 0.7753505699750426, "learning_rate": 4.352964602265858e-06, "loss": 0.6087, "step": 38220 }, { "epoch": 0.694283016126689, "grad_norm": 0.7938352920083864, "learning_rate": 4.348209359107868e-06, "loss": 0.6159, "step": 38230 }, { "epoch": 0.6944646229841639, "grad_norm": 0.7571369857069093, "learning_rate": 4.343455993042198e-06, "loss": 0.6107, "step": 38240 }, { "epoch": 0.6946462298416388, "grad_norm": 0.7848407748871741, "learning_rate": 4.338704505647559e-06, "loss": 0.6063, "step": 38250 }, { "epoch": 0.6948278366991137, "grad_norm": 0.7415158672855432, "learning_rate": 4.333954898502021e-06, "loss": 0.6064, "step": 38260 }, { "epoch": 0.6950094435565887, "grad_norm": 0.7710309863486307, "learning_rate": 4.3292071731830485e-06, "loss": 0.6099, "step": 38270 }, { "epoch": 0.6951910504140636, "grad_norm": 0.7419921492156959, "learning_rate": 4.324461331267465e-06, "loss": 0.6084, "step": 38280 }, { "epoch": 0.6953726572715385, "grad_norm": 0.7687223259052621, "learning_rate": 4.3197173743314855e-06, "loss": 0.6144, "step": 38290 }, { "epoch": 0.6955542641290136, "grad_norm": 0.7168734932082522, "learning_rate": 4.314975303950684e-06, "loss": 0.5954, "step": 38300 }, { "epoch": 0.6957358709864885, "grad_norm": 0.7658852249723787, "learning_rate": 4.310235121700008e-06, "loss": 0.6147, "step": 38310 }, { "epoch": 0.6959174778439634, "grad_norm": 0.8473989725948279, "learning_rate": 4.305496829153793e-06, "loss": 0.6122, "step": 38320 }, { "epoch": 0.6960990847014383, "grad_norm": 0.761327155606353, "learning_rate": 4.30076042788573e-06, "loss": 0.6114, "step": 38330 }, { "epoch": 0.6962806915589133, "grad_norm": 0.7542963004969986, "learning_rate": 4.296025919468894e-06, "loss": 0.6153, "step": 38340 }, { "epoch": 0.6964622984163882, "grad_norm": 0.7850818537115077, "learning_rate": 4.291293305475722e-06, "loss": 0.6163, "step": 38350 }, { "epoch": 0.6966439052738631, "grad_norm": 0.7151016373484917, "learning_rate": 4.286562587478033e-06, "loss": 0.6, "step": 38360 }, { "epoch": 0.696825512131338, "grad_norm": 0.744711383238082, "learning_rate": 4.281833767046999e-06, "loss": 0.621, "step": 38370 }, { "epoch": 0.6970071189888131, "grad_norm": 0.7567227676620173, "learning_rate": 4.277106845753183e-06, "loss": 0.6115, "step": 38380 }, { "epoch": 0.697188725846288, "grad_norm": 0.7521867912902067, "learning_rate": 4.2723818251664974e-06, "loss": 0.6081, "step": 38390 }, { "epoch": 0.6973703327037629, "grad_norm": 0.7655887794893396, "learning_rate": 4.26765870685624e-06, "loss": 0.61, "step": 38400 }, { "epoch": 0.6975519395612378, "grad_norm": 0.7852972355544175, "learning_rate": 4.262937492391066e-06, "loss": 0.6127, "step": 38410 }, { "epoch": 0.6977335464187128, "grad_norm": 0.7044970962521973, "learning_rate": 4.258218183338999e-06, "loss": 0.6085, "step": 38420 }, { "epoch": 0.6979151532761877, "grad_norm": 0.7429714078435461, "learning_rate": 4.253500781267438e-06, "loss": 0.6039, "step": 38430 }, { "epoch": 0.6980967601336626, "grad_norm": 0.7511772099447875, "learning_rate": 4.248785287743135e-06, "loss": 0.6033, "step": 38440 }, { "epoch": 0.6982783669911375, "grad_norm": 0.7610880885566971, "learning_rate": 4.244071704332225e-06, "loss": 0.6085, "step": 38450 }, { "epoch": 0.6984599738486125, "grad_norm": 0.7429137773088729, "learning_rate": 4.239360032600192e-06, "loss": 0.6231, "step": 38460 }, { "epoch": 0.6986415807060875, "grad_norm": 0.7211332453896304, "learning_rate": 4.2346502741119e-06, "loss": 0.5952, "step": 38470 }, { "epoch": 0.6988231875635624, "grad_norm": 0.7758013475306472, "learning_rate": 4.2299424304315615e-06, "loss": 0.61, "step": 38480 }, { "epoch": 0.6990047944210374, "grad_norm": 0.7439477009850969, "learning_rate": 4.225236503122773e-06, "loss": 0.599, "step": 38490 }, { "epoch": 0.6991864012785123, "grad_norm": 0.7716852988777309, "learning_rate": 4.220532493748476e-06, "loss": 0.6164, "step": 38500 }, { "epoch": 0.6993680081359872, "grad_norm": 0.7383732335281092, "learning_rate": 4.21583040387098e-06, "loss": 0.6034, "step": 38510 }, { "epoch": 0.6995496149934621, "grad_norm": 0.7937039889642169, "learning_rate": 4.211130235051967e-06, "loss": 0.5984, "step": 38520 }, { "epoch": 0.6997312218509371, "grad_norm": 0.789958823761353, "learning_rate": 4.2064319888524655e-06, "loss": 0.6119, "step": 38530 }, { "epoch": 0.699912828708412, "grad_norm": 0.74071931716997, "learning_rate": 4.201735666832881e-06, "loss": 0.5959, "step": 38540 }, { "epoch": 0.700094435565887, "grad_norm": 0.7357911701243951, "learning_rate": 4.197041270552966e-06, "loss": 0.6095, "step": 38550 }, { "epoch": 0.7002760424233619, "grad_norm": 0.7625526963350101, "learning_rate": 4.192348801571845e-06, "loss": 0.5954, "step": 38560 }, { "epoch": 0.7004576492808369, "grad_norm": 0.7417388562513365, "learning_rate": 4.187658261447991e-06, "loss": 0.6082, "step": 38570 }, { "epoch": 0.7006392561383118, "grad_norm": 0.7349213297236094, "learning_rate": 4.182969651739249e-06, "loss": 0.5952, "step": 38580 }, { "epoch": 0.7008208629957867, "grad_norm": 0.7302553515396469, "learning_rate": 4.178282974002811e-06, "loss": 0.6097, "step": 38590 }, { "epoch": 0.7010024698532616, "grad_norm": 0.7868153510860779, "learning_rate": 4.173598229795237e-06, "loss": 0.596, "step": 38600 }, { "epoch": 0.7011840767107366, "grad_norm": 0.7235176756185437, "learning_rate": 4.168915420672436e-06, "loss": 0.6276, "step": 38610 }, { "epoch": 0.7013656835682115, "grad_norm": 0.749306760106379, "learning_rate": 4.164234548189684e-06, "loss": 0.6135, "step": 38620 }, { "epoch": 0.7015472904256864, "grad_norm": 0.7458467412832508, "learning_rate": 4.159555613901603e-06, "loss": 0.6165, "step": 38630 }, { "epoch": 0.7017288972831615, "grad_norm": 0.7374430504646496, "learning_rate": 4.154878619362179e-06, "loss": 0.5924, "step": 38640 }, { "epoch": 0.7019105041406364, "grad_norm": 0.7725797687434195, "learning_rate": 4.150203566124756e-06, "loss": 0.602, "step": 38650 }, { "epoch": 0.7020921109981113, "grad_norm": 0.7410446601909335, "learning_rate": 4.145530455742022e-06, "loss": 0.6293, "step": 38660 }, { "epoch": 0.7022737178555862, "grad_norm": 0.7203414089630424, "learning_rate": 4.1408592897660325e-06, "loss": 0.6007, "step": 38670 }, { "epoch": 0.7024553247130612, "grad_norm": 0.7621764386587776, "learning_rate": 4.136190069748186e-06, "loss": 0.6252, "step": 38680 }, { "epoch": 0.7026369315705361, "grad_norm": 0.7450839189955456, "learning_rate": 4.131522797239247e-06, "loss": 0.6106, "step": 38690 }, { "epoch": 0.702818538428011, "grad_norm": 0.7342013371754722, "learning_rate": 4.1268574737893174e-06, "loss": 0.6102, "step": 38700 }, { "epoch": 0.7030001452854859, "grad_norm": 0.7551342375986975, "learning_rate": 4.122194100947869e-06, "loss": 0.6088, "step": 38710 }, { "epoch": 0.703181752142961, "grad_norm": 0.7399017810257532, "learning_rate": 4.1175326802637135e-06, "loss": 0.6037, "step": 38720 }, { "epoch": 0.7033633590004359, "grad_norm": 0.7475574489915575, "learning_rate": 4.112873213285016e-06, "loss": 0.6129, "step": 38730 }, { "epoch": 0.7035449658579108, "grad_norm": 0.7715849251536895, "learning_rate": 4.108215701559299e-06, "loss": 0.6014, "step": 38740 }, { "epoch": 0.7037265727153857, "grad_norm": 0.7589458084905047, "learning_rate": 4.103560146633427e-06, "loss": 0.6155, "step": 38750 }, { "epoch": 0.7039081795728607, "grad_norm": 0.7425482627773521, "learning_rate": 4.098906550053625e-06, "loss": 0.6159, "step": 38760 }, { "epoch": 0.7040897864303356, "grad_norm": 0.7315932128969138, "learning_rate": 4.094254913365454e-06, "loss": 0.6198, "step": 38770 }, { "epoch": 0.7042713932878105, "grad_norm": 0.7630526499307448, "learning_rate": 4.089605238113841e-06, "loss": 0.6029, "step": 38780 }, { "epoch": 0.7044530001452854, "grad_norm": 0.7286040260582903, "learning_rate": 4.084957525843043e-06, "loss": 0.6155, "step": 38790 }, { "epoch": 0.7046346070027604, "grad_norm": 0.741451305742584, "learning_rate": 4.080311778096682e-06, "loss": 0.6086, "step": 38800 }, { "epoch": 0.7048162138602354, "grad_norm": 0.7587003854233706, "learning_rate": 4.075667996417716e-06, "loss": 0.6153, "step": 38810 }, { "epoch": 0.7049978207177103, "grad_norm": 0.7453325448549198, "learning_rate": 4.071026182348451e-06, "loss": 0.615, "step": 38820 }, { "epoch": 0.7051794275751853, "grad_norm": 0.7540102712717833, "learning_rate": 4.06638633743055e-06, "loss": 0.5919, "step": 38830 }, { "epoch": 0.7053610344326602, "grad_norm": 0.7543091408319749, "learning_rate": 4.061748463205005e-06, "loss": 0.6024, "step": 38840 }, { "epoch": 0.7055426412901351, "grad_norm": 0.7465429903588662, "learning_rate": 4.057112561212173e-06, "loss": 0.6065, "step": 38850 }, { "epoch": 0.70572424814761, "grad_norm": 0.7625022381881826, "learning_rate": 4.052478632991738e-06, "loss": 0.6209, "step": 38860 }, { "epoch": 0.705905855005085, "grad_norm": 0.7610466748761857, "learning_rate": 4.047846680082741e-06, "loss": 0.6145, "step": 38870 }, { "epoch": 0.7060874618625599, "grad_norm": 0.749185954799997, "learning_rate": 4.043216704023557e-06, "loss": 0.6134, "step": 38880 }, { "epoch": 0.7062690687200349, "grad_norm": 0.7746150046920369, "learning_rate": 4.038588706351918e-06, "loss": 0.6129, "step": 38890 }, { "epoch": 0.7064506755775098, "grad_norm": 0.7559136934525371, "learning_rate": 4.033962688604881e-06, "loss": 0.6063, "step": 38900 }, { "epoch": 0.7066322824349848, "grad_norm": 0.7443392350620975, "learning_rate": 4.0293386523188636e-06, "loss": 0.6086, "step": 38910 }, { "epoch": 0.7068138892924597, "grad_norm": 0.7579643588910492, "learning_rate": 4.024716599029614e-06, "loss": 0.5986, "step": 38920 }, { "epoch": 0.7069954961499346, "grad_norm": 0.7237290233058735, "learning_rate": 4.020096530272219e-06, "loss": 0.6114, "step": 38930 }, { "epoch": 0.7071771030074095, "grad_norm": 0.7752731128116033, "learning_rate": 4.0154784475811216e-06, "loss": 0.6041, "step": 38940 }, { "epoch": 0.7073587098648845, "grad_norm": 0.7539965022900896, "learning_rate": 4.0108623524900854e-06, "loss": 0.609, "step": 38950 }, { "epoch": 0.7075403167223594, "grad_norm": 0.7641363858743456, "learning_rate": 4.006248246532233e-06, "loss": 0.6028, "step": 38960 }, { "epoch": 0.7077219235798343, "grad_norm": 0.751778679208928, "learning_rate": 4.00163613124001e-06, "loss": 0.6141, "step": 38970 }, { "epoch": 0.7079035304373094, "grad_norm": 0.7708358562024847, "learning_rate": 3.997026008145214e-06, "loss": 0.608, "step": 38980 }, { "epoch": 0.7080851372947843, "grad_norm": 0.7337026473297047, "learning_rate": 3.9924178787789696e-06, "loss": 0.6114, "step": 38990 }, { "epoch": 0.7082667441522592, "grad_norm": 0.7408307736764848, "learning_rate": 3.98781174467175e-06, "loss": 0.5972, "step": 39000 }, { "epoch": 0.7084483510097341, "grad_norm": 0.7759722850424216, "learning_rate": 3.9832076073533555e-06, "loss": 0.6087, "step": 39010 }, { "epoch": 0.7086299578672091, "grad_norm": 0.7543809887490249, "learning_rate": 3.978605468352934e-06, "loss": 0.6023, "step": 39020 }, { "epoch": 0.708811564724684, "grad_norm": 0.7367263028054127, "learning_rate": 3.974005329198957e-06, "loss": 0.6007, "step": 39030 }, { "epoch": 0.7089931715821589, "grad_norm": 0.77405108427043, "learning_rate": 3.969407191419242e-06, "loss": 0.6063, "step": 39040 }, { "epoch": 0.7091747784396338, "grad_norm": 0.7575617689208956, "learning_rate": 3.9648110565409415e-06, "loss": 0.617, "step": 39050 }, { "epoch": 0.7093563852971089, "grad_norm": 0.7316100702287931, "learning_rate": 3.960216926090535e-06, "loss": 0.6023, "step": 39060 }, { "epoch": 0.7095379921545838, "grad_norm": 0.7330650922137103, "learning_rate": 3.955624801593845e-06, "loss": 0.6095, "step": 39070 }, { "epoch": 0.7097195990120587, "grad_norm": 0.7616421631882575, "learning_rate": 3.951034684576018e-06, "loss": 0.6098, "step": 39080 }, { "epoch": 0.7099012058695336, "grad_norm": 0.7292834047459319, "learning_rate": 3.946446576561548e-06, "loss": 0.6084, "step": 39090 }, { "epoch": 0.7100828127270086, "grad_norm": 0.7753968244534055, "learning_rate": 3.941860479074246e-06, "loss": 0.6088, "step": 39100 }, { "epoch": 0.7102644195844835, "grad_norm": 0.7801124407867248, "learning_rate": 3.937276393637267e-06, "loss": 0.6105, "step": 39110 }, { "epoch": 0.7104460264419584, "grad_norm": 0.7480424668754616, "learning_rate": 3.932694321773092e-06, "loss": 0.5997, "step": 39120 }, { "epoch": 0.7106276332994333, "grad_norm": 0.7511699646751813, "learning_rate": 3.928114265003532e-06, "loss": 0.6184, "step": 39130 }, { "epoch": 0.7108092401569083, "grad_norm": 0.7405251771028957, "learning_rate": 3.923536224849736e-06, "loss": 0.6227, "step": 39140 }, { "epoch": 0.7109908470143833, "grad_norm": 0.7267757062088357, "learning_rate": 3.918960202832173e-06, "loss": 0.62, "step": 39150 }, { "epoch": 0.7111724538718582, "grad_norm": 0.7297993501841934, "learning_rate": 3.914386200470655e-06, "loss": 0.5993, "step": 39160 }, { "epoch": 0.7113540607293332, "grad_norm": 0.7786241993031318, "learning_rate": 3.909814219284306e-06, "loss": 0.5979, "step": 39170 }, { "epoch": 0.7115356675868081, "grad_norm": 0.7373603412831021, "learning_rate": 3.9052442607915975e-06, "loss": 0.6205, "step": 39180 }, { "epoch": 0.711717274444283, "grad_norm": 0.7703348098842104, "learning_rate": 3.900676326510313e-06, "loss": 0.6125, "step": 39190 }, { "epoch": 0.7118988813017579, "grad_norm": 0.7610302896584317, "learning_rate": 3.896110417957577e-06, "loss": 0.5964, "step": 39200 }, { "epoch": 0.7120804881592329, "grad_norm": 0.7399066605495482, "learning_rate": 3.89154653664983e-06, "loss": 0.6001, "step": 39210 }, { "epoch": 0.7122620950167078, "grad_norm": 0.7440849493110029, "learning_rate": 3.8869846841028435e-06, "loss": 0.61, "step": 39220 }, { "epoch": 0.7124437018741828, "grad_norm": 0.7424674829199611, "learning_rate": 3.882424861831721e-06, "loss": 0.6127, "step": 39230 }, { "epoch": 0.7126253087316577, "grad_norm": 0.7411379150012408, "learning_rate": 3.8778670713508794e-06, "loss": 0.6283, "step": 39240 }, { "epoch": 0.7128069155891327, "grad_norm": 0.7471035751864897, "learning_rate": 3.8733113141740754e-06, "loss": 0.6054, "step": 39250 }, { "epoch": 0.7129885224466076, "grad_norm": 0.7466536210579363, "learning_rate": 3.868757591814376e-06, "loss": 0.6084, "step": 39260 }, { "epoch": 0.7131701293040825, "grad_norm": 0.7921691871420615, "learning_rate": 3.864205905784186e-06, "loss": 0.603, "step": 39270 }, { "epoch": 0.7133517361615574, "grad_norm": 0.7820058661240067, "learning_rate": 3.85965625759522e-06, "loss": 0.6177, "step": 39280 }, { "epoch": 0.7135333430190324, "grad_norm": 0.7655182662187862, "learning_rate": 3.855108648758531e-06, "loss": 0.6087, "step": 39290 }, { "epoch": 0.7137149498765073, "grad_norm": 0.7279059229054603, "learning_rate": 3.850563080784478e-06, "loss": 0.6079, "step": 39300 }, { "epoch": 0.7138965567339822, "grad_norm": 0.7687896879223637, "learning_rate": 3.846019555182758e-06, "loss": 0.6094, "step": 39310 }, { "epoch": 0.7140781635914573, "grad_norm": 0.7433528471099984, "learning_rate": 3.84147807346238e-06, "loss": 0.6193, "step": 39320 }, { "epoch": 0.7142597704489322, "grad_norm": 0.7772092835115222, "learning_rate": 3.836938637131674e-06, "loss": 0.6101, "step": 39330 }, { "epoch": 0.7144413773064071, "grad_norm": 0.7688318309771305, "learning_rate": 3.832401247698297e-06, "loss": 0.6083, "step": 39340 }, { "epoch": 0.714622984163882, "grad_norm": 0.7679760912754761, "learning_rate": 3.8278659066692196e-06, "loss": 0.6078, "step": 39350 }, { "epoch": 0.714804591021357, "grad_norm": 0.7916061568744597, "learning_rate": 3.823332615550739e-06, "loss": 0.615, "step": 39360 }, { "epoch": 0.7149861978788319, "grad_norm": 0.7697934870253603, "learning_rate": 3.818801375848462e-06, "loss": 0.6058, "step": 39370 }, { "epoch": 0.7151678047363068, "grad_norm": 0.7603511588660146, "learning_rate": 3.8142721890673263e-06, "loss": 0.6005, "step": 39380 }, { "epoch": 0.7153494115937817, "grad_norm": 0.7853521177173057, "learning_rate": 3.8097450567115734e-06, "loss": 0.6089, "step": 39390 }, { "epoch": 0.7155310184512568, "grad_norm": 0.7612395882119687, "learning_rate": 3.805219980284779e-06, "loss": 0.6257, "step": 39400 }, { "epoch": 0.7157126253087317, "grad_norm": 0.7707358642775387, "learning_rate": 3.800696961289818e-06, "loss": 0.6088, "step": 39410 }, { "epoch": 0.7158942321662066, "grad_norm": 0.7696365312838441, "learning_rate": 3.7961760012289007e-06, "loss": 0.6056, "step": 39420 }, { "epoch": 0.7160758390236815, "grad_norm": 0.7375870839153534, "learning_rate": 3.791657101603534e-06, "loss": 0.6156, "step": 39430 }, { "epoch": 0.7162574458811565, "grad_norm": 0.7645698105759324, "learning_rate": 3.7871402639145573e-06, "loss": 0.6057, "step": 39440 }, { "epoch": 0.7164390527386314, "grad_norm": 0.75704710843194, "learning_rate": 3.7826254896621185e-06, "loss": 0.618, "step": 39450 }, { "epoch": 0.7166206595961063, "grad_norm": 0.7587263477749097, "learning_rate": 3.778112780345675e-06, "loss": 0.5903, "step": 39460 }, { "epoch": 0.7168022664535812, "grad_norm": 0.7280291302413385, "learning_rate": 3.773602137464012e-06, "loss": 0.608, "step": 39470 }, { "epoch": 0.7169838733110562, "grad_norm": 0.7396085756949252, "learning_rate": 3.7690935625152093e-06, "loss": 0.5998, "step": 39480 }, { "epoch": 0.7171654801685312, "grad_norm": 0.7369593898467222, "learning_rate": 3.7645870569966804e-06, "loss": 0.6041, "step": 39490 }, { "epoch": 0.7173470870260061, "grad_norm": 0.7694489079044711, "learning_rate": 3.7600826224051334e-06, "loss": 0.6057, "step": 39500 }, { "epoch": 0.7175286938834811, "grad_norm": 0.7975142988826902, "learning_rate": 3.755580260236603e-06, "loss": 0.609, "step": 39510 }, { "epoch": 0.717710300740956, "grad_norm": 0.7442541073324763, "learning_rate": 3.7510799719864277e-06, "loss": 0.6048, "step": 39520 }, { "epoch": 0.7178919075984309, "grad_norm": 0.7319283623210868, "learning_rate": 3.746581759149254e-06, "loss": 0.6012, "step": 39530 }, { "epoch": 0.7180735144559058, "grad_norm": 0.742625245817774, "learning_rate": 3.7420856232190505e-06, "loss": 0.6012, "step": 39540 }, { "epoch": 0.7182551213133808, "grad_norm": 0.7254740269830173, "learning_rate": 3.737591565689085e-06, "loss": 0.6034, "step": 39550 }, { "epoch": 0.7184367281708557, "grad_norm": 0.7856401769479923, "learning_rate": 3.7330995880519427e-06, "loss": 0.5932, "step": 39560 }, { "epoch": 0.7186183350283307, "grad_norm": 0.7613889390200258, "learning_rate": 3.7286096917995117e-06, "loss": 0.6044, "step": 39570 }, { "epoch": 0.7187999418858056, "grad_norm": 0.741119150940872, "learning_rate": 3.724121878422996e-06, "loss": 0.607, "step": 39580 }, { "epoch": 0.7189815487432806, "grad_norm": 0.7233808397472772, "learning_rate": 3.7196361494128986e-06, "loss": 0.6073, "step": 39590 }, { "epoch": 0.7191631556007555, "grad_norm": 0.8184509436503696, "learning_rate": 3.71515250625904e-06, "loss": 0.6182, "step": 39600 }, { "epoch": 0.7193447624582304, "grad_norm": 0.7521792061088491, "learning_rate": 3.7106709504505434e-06, "loss": 0.6178, "step": 39610 }, { "epoch": 0.7195263693157053, "grad_norm": 0.7836863036084848, "learning_rate": 3.7061914834758317e-06, "loss": 0.6145, "step": 39620 }, { "epoch": 0.7197079761731803, "grad_norm": 0.7615920782553338, "learning_rate": 3.7017141068226505e-06, "loss": 0.6184, "step": 39630 }, { "epoch": 0.7198895830306552, "grad_norm": 0.7840607517983248, "learning_rate": 3.697238821978033e-06, "loss": 0.6105, "step": 39640 }, { "epoch": 0.7200711898881301, "grad_norm": 0.7358202057720123, "learning_rate": 3.6927656304283345e-06, "loss": 0.6097, "step": 39650 }, { "epoch": 0.7202527967456052, "grad_norm": 0.740513793630826, "learning_rate": 3.688294533659199e-06, "loss": 0.6138, "step": 39660 }, { "epoch": 0.7204344036030801, "grad_norm": 0.7276412050346991, "learning_rate": 3.6838255331555906e-06, "loss": 0.6043, "step": 39670 }, { "epoch": 0.720616010460555, "grad_norm": 0.7249560972310062, "learning_rate": 3.6793586304017616e-06, "loss": 0.6083, "step": 39680 }, { "epoch": 0.7207976173180299, "grad_norm": 0.7859605608743911, "learning_rate": 3.674893826881282e-06, "loss": 0.604, "step": 39690 }, { "epoch": 0.7209792241755049, "grad_norm": 0.754101139787993, "learning_rate": 3.670431124077011e-06, "loss": 0.6185, "step": 39700 }, { "epoch": 0.7211608310329798, "grad_norm": 0.781585657265439, "learning_rate": 3.6659705234711253e-06, "loss": 0.6115, "step": 39710 }, { "epoch": 0.7213424378904547, "grad_norm": 0.7859962126592266, "learning_rate": 3.661512026545089e-06, "loss": 0.614, "step": 39720 }, { "epoch": 0.7215240447479296, "grad_norm": 0.7369104926738821, "learning_rate": 3.6570556347796725e-06, "loss": 0.6012, "step": 39730 }, { "epoch": 0.7217056516054047, "grad_norm": 0.7676755687397296, "learning_rate": 3.652601349654954e-06, "loss": 0.6132, "step": 39740 }, { "epoch": 0.7218872584628796, "grad_norm": 0.7357286375610328, "learning_rate": 3.6481491726502993e-06, "loss": 0.5931, "step": 39750 }, { "epoch": 0.7220688653203545, "grad_norm": 0.7629605406155244, "learning_rate": 3.643699105244387e-06, "loss": 0.6138, "step": 39760 }, { "epoch": 0.7222504721778295, "grad_norm": 0.7570895379147984, "learning_rate": 3.639251148915184e-06, "loss": 0.6083, "step": 39770 }, { "epoch": 0.7224320790353044, "grad_norm": 0.7602541397273952, "learning_rate": 3.634805305139966e-06, "loss": 0.6024, "step": 39780 }, { "epoch": 0.7226136858927793, "grad_norm": 0.7462682034422828, "learning_rate": 3.630361575395296e-06, "loss": 0.6132, "step": 39790 }, { "epoch": 0.7227952927502542, "grad_norm": 0.758436218470221, "learning_rate": 3.6259199611570473e-06, "loss": 0.6065, "step": 39800 }, { "epoch": 0.7229768996077292, "grad_norm": 0.7267867319510442, "learning_rate": 3.6214804639003786e-06, "loss": 0.6068, "step": 39810 }, { "epoch": 0.7231585064652041, "grad_norm": 0.7899450434849337, "learning_rate": 3.6170430850997527e-06, "loss": 0.6017, "step": 39820 }, { "epoch": 0.7233401133226791, "grad_norm": 0.7292048579007522, "learning_rate": 3.612607826228932e-06, "loss": 0.6113, "step": 39830 }, { "epoch": 0.723521720180154, "grad_norm": 0.7350972495974719, "learning_rate": 3.6081746887609635e-06, "loss": 0.6055, "step": 39840 }, { "epoch": 0.723703327037629, "grad_norm": 0.7423896876176413, "learning_rate": 3.603743674168202e-06, "loss": 0.6003, "step": 39850 }, { "epoch": 0.7238849338951039, "grad_norm": 0.7321296061539613, "learning_rate": 3.599314783922284e-06, "loss": 0.601, "step": 39860 }, { "epoch": 0.7240665407525788, "grad_norm": 0.7348957428574105, "learning_rate": 3.5948880194941573e-06, "loss": 0.6149, "step": 39870 }, { "epoch": 0.7242481476100537, "grad_norm": 0.7677141484468782, "learning_rate": 3.590463382354046e-06, "loss": 0.6083, "step": 39880 }, { "epoch": 0.7244297544675287, "grad_norm": 0.7483645103043386, "learning_rate": 3.5860408739714816e-06, "loss": 0.6101, "step": 39890 }, { "epoch": 0.7246113613250036, "grad_norm": 0.7922443770936456, "learning_rate": 3.5816204958152777e-06, "loss": 0.6226, "step": 39900 }, { "epoch": 0.7247929681824786, "grad_norm": 0.7665378629808439, "learning_rate": 3.577202249353552e-06, "loss": 0.6111, "step": 39910 }, { "epoch": 0.7249745750399536, "grad_norm": 0.7775367874680101, "learning_rate": 3.572786136053704e-06, "loss": 0.6065, "step": 39920 }, { "epoch": 0.7251561818974285, "grad_norm": 0.7531566409021995, "learning_rate": 3.5683721573824258e-06, "loss": 0.6042, "step": 39930 }, { "epoch": 0.7253377887549034, "grad_norm": 0.7695798393796811, "learning_rate": 3.56396031480571e-06, "loss": 0.6013, "step": 39940 }, { "epoch": 0.7255193956123783, "grad_norm": 0.7298712558690861, "learning_rate": 3.5595506097888266e-06, "loss": 0.6221, "step": 39950 }, { "epoch": 0.7257010024698533, "grad_norm": 0.7588032718516698, "learning_rate": 3.5551430437963474e-06, "loss": 0.6049, "step": 39960 }, { "epoch": 0.7258826093273282, "grad_norm": 0.7415070061284748, "learning_rate": 3.550737618292124e-06, "loss": 0.5925, "step": 39970 }, { "epoch": 0.7260642161848031, "grad_norm": 0.7562342027605881, "learning_rate": 3.5463343347393065e-06, "loss": 0.6009, "step": 39980 }, { "epoch": 0.726245823042278, "grad_norm": 0.7113280199797536, "learning_rate": 3.541933194600322e-06, "loss": 0.6078, "step": 39990 }, { "epoch": 0.7264274298997531, "grad_norm": 0.7193764323565248, "learning_rate": 3.5375341993369007e-06, "loss": 0.6107, "step": 40000 }, { "epoch": 0.726609036757228, "grad_norm": 0.7570955291613048, "learning_rate": 3.5331373504100485e-06, "loss": 0.5973, "step": 40010 }, { "epoch": 0.7267906436147029, "grad_norm": 0.7489262802924626, "learning_rate": 3.528742649280057e-06, "loss": 0.5876, "step": 40020 }, { "epoch": 0.7269722504721778, "grad_norm": 0.7595134817867245, "learning_rate": 3.524350097406518e-06, "loss": 0.6079, "step": 40030 }, { "epoch": 0.7271538573296528, "grad_norm": 0.7759477560004102, "learning_rate": 3.519959696248294e-06, "loss": 0.6304, "step": 40040 }, { "epoch": 0.7273354641871277, "grad_norm": 0.765802045298198, "learning_rate": 3.515571447263546e-06, "loss": 0.5961, "step": 40050 }, { "epoch": 0.7275170710446026, "grad_norm": 0.7563982194234631, "learning_rate": 3.5111853519097094e-06, "loss": 0.5895, "step": 40060 }, { "epoch": 0.7276986779020775, "grad_norm": 0.7274857292185822, "learning_rate": 3.5068014116435144e-06, "loss": 0.6132, "step": 40070 }, { "epoch": 0.7278802847595526, "grad_norm": 0.7306365441201885, "learning_rate": 3.5024196279209657e-06, "loss": 0.6207, "step": 40080 }, { "epoch": 0.7280618916170275, "grad_norm": 0.7488840151447962, "learning_rate": 3.4980400021973615e-06, "loss": 0.5988, "step": 40090 }, { "epoch": 0.7282434984745024, "grad_norm": 0.787635791631766, "learning_rate": 3.4936625359272726e-06, "loss": 0.6023, "step": 40100 }, { "epoch": 0.7284251053319774, "grad_norm": 0.7648148992726639, "learning_rate": 3.4892872305645655e-06, "loss": 0.6028, "step": 40110 }, { "epoch": 0.7286067121894523, "grad_norm": 0.7361349888108216, "learning_rate": 3.4849140875623768e-06, "loss": 0.6204, "step": 40120 }, { "epoch": 0.7287883190469272, "grad_norm": 0.7671621213371583, "learning_rate": 3.4805431083731265e-06, "loss": 0.6073, "step": 40130 }, { "epoch": 0.7289699259044021, "grad_norm": 0.8324975033389743, "learning_rate": 3.4761742944485277e-06, "loss": 0.6022, "step": 40140 }, { "epoch": 0.729151532761877, "grad_norm": 0.7572282164099698, "learning_rate": 3.471807647239559e-06, "loss": 0.5962, "step": 40150 }, { "epoch": 0.729333139619352, "grad_norm": 0.7664165235333001, "learning_rate": 3.4674431681964925e-06, "loss": 0.6204, "step": 40160 }, { "epoch": 0.729514746476827, "grad_norm": 0.7378880272924535, "learning_rate": 3.463080858768868e-06, "loss": 0.6064, "step": 40170 }, { "epoch": 0.7296963533343019, "grad_norm": 0.7846773016187378, "learning_rate": 3.4587207204055164e-06, "loss": 0.632, "step": 40180 }, { "epoch": 0.7298779601917769, "grad_norm": 0.771022001032966, "learning_rate": 3.4543627545545365e-06, "loss": 0.6032, "step": 40190 }, { "epoch": 0.7300595670492518, "grad_norm": 0.7672963973024639, "learning_rate": 3.4500069626633183e-06, "loss": 0.6131, "step": 40200 }, { "epoch": 0.7302411739067267, "grad_norm": 0.7398374093317691, "learning_rate": 3.4456533461785137e-06, "loss": 0.611, "step": 40210 }, { "epoch": 0.7304227807642016, "grad_norm": 0.7383546981468272, "learning_rate": 3.441301906546065e-06, "loss": 0.611, "step": 40220 }, { "epoch": 0.7306043876216766, "grad_norm": 0.734570582805836, "learning_rate": 3.4369526452111924e-06, "loss": 0.6002, "step": 40230 }, { "epoch": 0.7307859944791515, "grad_norm": 0.7509971493330213, "learning_rate": 3.4326055636183784e-06, "loss": 0.6095, "step": 40240 }, { "epoch": 0.7309676013366264, "grad_norm": 0.7514855240827217, "learning_rate": 3.4282606632114004e-06, "loss": 0.6009, "step": 40250 }, { "epoch": 0.7311492081941015, "grad_norm": 0.7570554504391942, "learning_rate": 3.4239179454332926e-06, "loss": 0.601, "step": 40260 }, { "epoch": 0.7313308150515764, "grad_norm": 0.7639226764685392, "learning_rate": 3.4195774117263813e-06, "loss": 0.6041, "step": 40270 }, { "epoch": 0.7315124219090513, "grad_norm": 0.7524937458990761, "learning_rate": 3.415239063532253e-06, "loss": 0.6197, "step": 40280 }, { "epoch": 0.7316940287665262, "grad_norm": 0.7824805907709051, "learning_rate": 3.4109029022917816e-06, "loss": 0.6051, "step": 40290 }, { "epoch": 0.7318756356240012, "grad_norm": 0.7671703025601129, "learning_rate": 3.4065689294451022e-06, "loss": 0.6167, "step": 40300 }, { "epoch": 0.7320572424814761, "grad_norm": 0.7218922024370366, "learning_rate": 3.402237146431633e-06, "loss": 0.5946, "step": 40310 }, { "epoch": 0.732238849338951, "grad_norm": 0.8175792734584212, "learning_rate": 3.3979075546900597e-06, "loss": 0.6317, "step": 40320 }, { "epoch": 0.7324204561964259, "grad_norm": 0.759870705231742, "learning_rate": 3.393580155658337e-06, "loss": 0.6138, "step": 40330 }, { "epoch": 0.732602063053901, "grad_norm": 0.7666529875103064, "learning_rate": 3.3892549507737025e-06, "loss": 0.6034, "step": 40340 }, { "epoch": 0.7327836699113759, "grad_norm": 0.7066980112790103, "learning_rate": 3.384931941472652e-06, "loss": 0.5991, "step": 40350 }, { "epoch": 0.7329652767688508, "grad_norm": 0.7737827493956564, "learning_rate": 3.380611129190966e-06, "loss": 0.6159, "step": 40360 }, { "epoch": 0.7331468836263257, "grad_norm": 0.7554469628054048, "learning_rate": 3.3762925153636785e-06, "loss": 0.6051, "step": 40370 }, { "epoch": 0.7333284904838007, "grad_norm": 0.7356719752347554, "learning_rate": 3.3719761014251107e-06, "loss": 0.6146, "step": 40380 }, { "epoch": 0.7335100973412756, "grad_norm": 0.7776049748676859, "learning_rate": 3.3676618888088387e-06, "loss": 0.6189, "step": 40390 }, { "epoch": 0.7336917041987505, "grad_norm": 0.7471366650062767, "learning_rate": 3.3633498789477193e-06, "loss": 0.6086, "step": 40400 }, { "epoch": 0.7338733110562254, "grad_norm": 0.7657025001293123, "learning_rate": 3.359040073273866e-06, "loss": 0.5951, "step": 40410 }, { "epoch": 0.7340549179137004, "grad_norm": 0.7403609088525983, "learning_rate": 3.3547324732186728e-06, "loss": 0.6061, "step": 40420 }, { "epoch": 0.7342365247711754, "grad_norm": 0.7316313552312537, "learning_rate": 3.3504270802127926e-06, "loss": 0.6052, "step": 40430 }, { "epoch": 0.7344181316286503, "grad_norm": 0.7428502412722016, "learning_rate": 3.346123895686142e-06, "loss": 0.5973, "step": 40440 }, { "epoch": 0.7345997384861253, "grad_norm": 0.7480322642540513, "learning_rate": 3.341822921067919e-06, "loss": 0.6166, "step": 40450 }, { "epoch": 0.7347813453436002, "grad_norm": 0.7614235782889098, "learning_rate": 3.3375241577865693e-06, "loss": 0.6092, "step": 40460 }, { "epoch": 0.7349629522010751, "grad_norm": 0.7596003063000913, "learning_rate": 3.333227607269821e-06, "loss": 0.6058, "step": 40470 }, { "epoch": 0.73514455905855, "grad_norm": 0.7380938342199995, "learning_rate": 3.328933270944652e-06, "loss": 0.606, "step": 40480 }, { "epoch": 0.735326165916025, "grad_norm": 0.7355924578698452, "learning_rate": 3.3246411502373186e-06, "loss": 0.5987, "step": 40490 }, { "epoch": 0.7355077727734999, "grad_norm": 0.7645618111263119, "learning_rate": 3.32035124657333e-06, "loss": 0.6007, "step": 40500 }, { "epoch": 0.7356893796309749, "grad_norm": 0.738076505618816, "learning_rate": 3.316063561377468e-06, "loss": 0.611, "step": 40510 }, { "epoch": 0.7358709864884498, "grad_norm": 0.7639356233048101, "learning_rate": 3.3117780960737723e-06, "loss": 0.605, "step": 40520 }, { "epoch": 0.7360525933459248, "grad_norm": 0.7727862061678817, "learning_rate": 3.307494852085541e-06, "loss": 0.6134, "step": 40530 }, { "epoch": 0.7362342002033997, "grad_norm": 0.7470060234487601, "learning_rate": 3.303213830835349e-06, "loss": 0.6072, "step": 40540 }, { "epoch": 0.7364158070608746, "grad_norm": 0.7717150777409315, "learning_rate": 3.2989350337450152e-06, "loss": 0.5958, "step": 40550 }, { "epoch": 0.7365974139183495, "grad_norm": 0.7427416144383007, "learning_rate": 3.294658462235637e-06, "loss": 0.6136, "step": 40560 }, { "epoch": 0.7367790207758245, "grad_norm": 0.7640019600471395, "learning_rate": 3.2903841177275566e-06, "loss": 0.6066, "step": 40570 }, { "epoch": 0.7369606276332994, "grad_norm": 0.738214443946399, "learning_rate": 3.2861120016403904e-06, "loss": 0.6178, "step": 40580 }, { "epoch": 0.7371422344907743, "grad_norm": 0.7746100263495964, "learning_rate": 3.2818421153930026e-06, "loss": 0.6031, "step": 40590 }, { "epoch": 0.7373238413482494, "grad_norm": 0.7274774290814136, "learning_rate": 3.2775744604035285e-06, "loss": 0.6021, "step": 40600 }, { "epoch": 0.7375054482057243, "grad_norm": 0.7263968076257193, "learning_rate": 3.2733090380893506e-06, "loss": 0.6017, "step": 40610 }, { "epoch": 0.7376870550631992, "grad_norm": 0.7715404739639631, "learning_rate": 3.2690458498671184e-06, "loss": 0.6169, "step": 40620 }, { "epoch": 0.7378686619206741, "grad_norm": 0.8025525656540753, "learning_rate": 3.2647848971527398e-06, "loss": 0.6209, "step": 40630 }, { "epoch": 0.738050268778149, "grad_norm": 0.7436922036053039, "learning_rate": 3.260526181361372e-06, "loss": 0.6038, "step": 40640 }, { "epoch": 0.738231875635624, "grad_norm": 0.7614193830069145, "learning_rate": 3.25626970390744e-06, "loss": 0.5902, "step": 40650 }, { "epoch": 0.7384134824930989, "grad_norm": 0.741643152418165, "learning_rate": 3.2520154662046143e-06, "loss": 0.5932, "step": 40660 }, { "epoch": 0.7385950893505738, "grad_norm": 0.7275671202985948, "learning_rate": 3.2477634696658323e-06, "loss": 0.6115, "step": 40670 }, { "epoch": 0.7387766962080489, "grad_norm": 0.7359446386435643, "learning_rate": 3.2435137157032748e-06, "loss": 0.6068, "step": 40680 }, { "epoch": 0.7389583030655238, "grad_norm": 0.7676636999359553, "learning_rate": 3.2392662057283943e-06, "loss": 0.6092, "step": 40690 }, { "epoch": 0.7391399099229987, "grad_norm": 0.7406640045730691, "learning_rate": 3.2350209411518785e-06, "loss": 0.5934, "step": 40700 }, { "epoch": 0.7393215167804736, "grad_norm": 0.7806806107394644, "learning_rate": 3.230777923383689e-06, "loss": 0.6219, "step": 40710 }, { "epoch": 0.7395031236379486, "grad_norm": 0.7564317914579614, "learning_rate": 3.226537153833026e-06, "loss": 0.6018, "step": 40720 }, { "epoch": 0.7396847304954235, "grad_norm": 0.7437305887637035, "learning_rate": 3.222298633908347e-06, "loss": 0.6069, "step": 40730 }, { "epoch": 0.7398663373528984, "grad_norm": 0.7689670969499394, "learning_rate": 3.218062365017369e-06, "loss": 0.6004, "step": 40740 }, { "epoch": 0.7400479442103733, "grad_norm": 0.7888452660677384, "learning_rate": 3.213828348567051e-06, "loss": 0.6073, "step": 40750 }, { "epoch": 0.7402295510678483, "grad_norm": 0.7351068848399711, "learning_rate": 3.2095965859636147e-06, "loss": 0.5935, "step": 40760 }, { "epoch": 0.7404111579253233, "grad_norm": 0.776382314845364, "learning_rate": 3.205367078612522e-06, "loss": 0.6048, "step": 40770 }, { "epoch": 0.7405927647827982, "grad_norm": 0.7575016982511146, "learning_rate": 3.201139827918499e-06, "loss": 0.609, "step": 40780 }, { "epoch": 0.7407743716402732, "grad_norm": 0.7557718068813959, "learning_rate": 3.196914835285506e-06, "loss": 0.6072, "step": 40790 }, { "epoch": 0.7409559784977481, "grad_norm": 0.7748256909901368, "learning_rate": 3.192692102116771e-06, "loss": 0.6112, "step": 40800 }, { "epoch": 0.741137585355223, "grad_norm": 0.7975178028872086, "learning_rate": 3.1884716298147554e-06, "loss": 0.6096, "step": 40810 }, { "epoch": 0.7413191922126979, "grad_norm": 0.7279652375683772, "learning_rate": 3.1842534197811823e-06, "loss": 0.6049, "step": 40820 }, { "epoch": 0.7415007990701729, "grad_norm": 0.7259196453585355, "learning_rate": 3.180037473417017e-06, "loss": 0.6139, "step": 40830 }, { "epoch": 0.7416824059276478, "grad_norm": 0.7448385487459984, "learning_rate": 3.17582379212247e-06, "loss": 0.6027, "step": 40840 }, { "epoch": 0.7418640127851228, "grad_norm": 0.7289790118819617, "learning_rate": 3.171612377297011e-06, "loss": 0.5981, "step": 40850 }, { "epoch": 0.7420456196425977, "grad_norm": 0.7554037554013199, "learning_rate": 3.167403230339342e-06, "loss": 0.6105, "step": 40860 }, { "epoch": 0.7422272265000727, "grad_norm": 0.7746456348638993, "learning_rate": 3.1631963526474275e-06, "loss": 0.6031, "step": 40870 }, { "epoch": 0.7424088333575476, "grad_norm": 0.7845328233375405, "learning_rate": 3.1589917456184617e-06, "loss": 0.6036, "step": 40880 }, { "epoch": 0.7425904402150225, "grad_norm": 0.738309692714563, "learning_rate": 3.154789410648902e-06, "loss": 0.6026, "step": 40890 }, { "epoch": 0.7427720470724974, "grad_norm": 0.746403995803417, "learning_rate": 3.1505893491344353e-06, "loss": 0.5953, "step": 40900 }, { "epoch": 0.7429536539299724, "grad_norm": 0.7730707258701003, "learning_rate": 3.146391562470006e-06, "loss": 0.6107, "step": 40910 }, { "epoch": 0.7431352607874473, "grad_norm": 0.7632135156448597, "learning_rate": 3.142196052049795e-06, "loss": 0.5948, "step": 40920 }, { "epoch": 0.7433168676449222, "grad_norm": 0.7926646716726529, "learning_rate": 3.1380028192672275e-06, "loss": 0.6141, "step": 40930 }, { "epoch": 0.7434984745023973, "grad_norm": 0.7818330850257178, "learning_rate": 3.1338118655149796e-06, "loss": 0.6129, "step": 40940 }, { "epoch": 0.7436800813598722, "grad_norm": 0.7869792674784906, "learning_rate": 3.1296231921849597e-06, "loss": 0.6072, "step": 40950 }, { "epoch": 0.7438616882173471, "grad_norm": 0.7414086447443046, "learning_rate": 3.1254368006683313e-06, "loss": 0.6153, "step": 40960 }, { "epoch": 0.744043295074822, "grad_norm": 0.7461482315721615, "learning_rate": 3.1212526923554853e-06, "loss": 0.6139, "step": 40970 }, { "epoch": 0.744224901932297, "grad_norm": 0.7243930148832428, "learning_rate": 3.1170708686360705e-06, "loss": 0.5991, "step": 40980 }, { "epoch": 0.7444065087897719, "grad_norm": 0.7454768865639874, "learning_rate": 3.1128913308989616e-06, "loss": 0.6101, "step": 40990 }, { "epoch": 0.7445881156472468, "grad_norm": 0.7604083788579176, "learning_rate": 3.108714080532288e-06, "loss": 0.6006, "step": 41000 }, { "epoch": 0.7447697225047217, "grad_norm": 0.7581186942744691, "learning_rate": 3.104539118923405e-06, "loss": 0.6148, "step": 41010 }, { "epoch": 0.7449513293621968, "grad_norm": 0.7372234379846289, "learning_rate": 3.100366447458919e-06, "loss": 0.6209, "step": 41020 }, { "epoch": 0.7451329362196717, "grad_norm": 0.7503162132140674, "learning_rate": 3.096196067524676e-06, "loss": 0.6102, "step": 41030 }, { "epoch": 0.7453145430771466, "grad_norm": 0.7255311852271559, "learning_rate": 3.0920279805057507e-06, "loss": 0.6136, "step": 41040 }, { "epoch": 0.7454961499346215, "grad_norm": 0.7592820202096339, "learning_rate": 3.087862187786468e-06, "loss": 0.6121, "step": 41050 }, { "epoch": 0.7456777567920965, "grad_norm": 0.7486088364540154, "learning_rate": 3.0836986907503796e-06, "loss": 0.6093, "step": 41060 }, { "epoch": 0.7458593636495714, "grad_norm": 0.7550857911174796, "learning_rate": 3.0795374907802865e-06, "loss": 0.6219, "step": 41070 }, { "epoch": 0.7460409705070463, "grad_norm": 0.7446887395683907, "learning_rate": 3.0753785892582154e-06, "loss": 0.6131, "step": 41080 }, { "epoch": 0.7462225773645212, "grad_norm": 0.7687679840155268, "learning_rate": 3.0712219875654412e-06, "loss": 0.6099, "step": 41090 }, { "epoch": 0.7464041842219962, "grad_norm": 0.7338582798647709, "learning_rate": 3.067067687082462e-06, "loss": 0.5956, "step": 41100 }, { "epoch": 0.7465857910794712, "grad_norm": 0.7642046531929945, "learning_rate": 3.0629156891890256e-06, "loss": 0.6139, "step": 41110 }, { "epoch": 0.7467673979369461, "grad_norm": 0.7411104931638061, "learning_rate": 3.0587659952641047e-06, "loss": 0.6212, "step": 41120 }, { "epoch": 0.7469490047944211, "grad_norm": 0.7326611755662776, "learning_rate": 3.0546186066859064e-06, "loss": 0.602, "step": 41130 }, { "epoch": 0.747130611651896, "grad_norm": 0.7430854146205851, "learning_rate": 3.0504735248318816e-06, "loss": 0.5981, "step": 41140 }, { "epoch": 0.7473122185093709, "grad_norm": 0.7924807180144228, "learning_rate": 3.0463307510787044e-06, "loss": 0.607, "step": 41150 }, { "epoch": 0.7474938253668458, "grad_norm": 0.7555377693003424, "learning_rate": 3.0421902868022922e-06, "loss": 0.6058, "step": 41160 }, { "epoch": 0.7476754322243208, "grad_norm": 0.7862541818228658, "learning_rate": 3.0380521333777844e-06, "loss": 0.6114, "step": 41170 }, { "epoch": 0.7478570390817957, "grad_norm": 0.7463057296862899, "learning_rate": 3.033916292179566e-06, "loss": 0.5986, "step": 41180 }, { "epoch": 0.7480386459392707, "grad_norm": 0.7431396886565137, "learning_rate": 3.0297827645812394e-06, "loss": 0.6027, "step": 41190 }, { "epoch": 0.7482202527967456, "grad_norm": 0.7669009282525486, "learning_rate": 3.025651551955654e-06, "loss": 0.5985, "step": 41200 }, { "epoch": 0.7484018596542206, "grad_norm": 0.7739310327767764, "learning_rate": 3.0215226556748743e-06, "loss": 0.6222, "step": 41210 }, { "epoch": 0.7485834665116955, "grad_norm": 0.7676407851668247, "learning_rate": 3.017396077110212e-06, "loss": 0.6197, "step": 41220 }, { "epoch": 0.7487650733691704, "grad_norm": 0.7573672354684229, "learning_rate": 3.013271817632195e-06, "loss": 0.6086, "step": 41230 }, { "epoch": 0.7489466802266453, "grad_norm": 0.7410282730508022, "learning_rate": 3.0091498786105854e-06, "loss": 0.6146, "step": 41240 }, { "epoch": 0.7491282870841203, "grad_norm": 0.7623307186572623, "learning_rate": 3.005030261414382e-06, "loss": 0.6007, "step": 41250 }, { "epoch": 0.7493098939415952, "grad_norm": 0.7639022326158379, "learning_rate": 3.0009129674118e-06, "loss": 0.6256, "step": 41260 }, { "epoch": 0.7494915007990701, "grad_norm": 0.7298172146793932, "learning_rate": 2.9967979979702945e-06, "loss": 0.6077, "step": 41270 }, { "epoch": 0.7496731076565452, "grad_norm": 0.7667800751722161, "learning_rate": 2.9926853544565382e-06, "loss": 0.6268, "step": 41280 }, { "epoch": 0.7498547145140201, "grad_norm": 0.7542388592133614, "learning_rate": 2.9885750382364433e-06, "loss": 0.6257, "step": 41290 }, { "epoch": 0.750036321371495, "grad_norm": 0.7527073185529664, "learning_rate": 2.984467050675136e-06, "loss": 0.6143, "step": 41300 }, { "epoch": 0.7502179282289699, "grad_norm": 0.7542938359315055, "learning_rate": 2.98036139313698e-06, "loss": 0.6004, "step": 41310 }, { "epoch": 0.7503995350864449, "grad_norm": 0.7626843037304398, "learning_rate": 2.976258066985559e-06, "loss": 0.6016, "step": 41320 }, { "epoch": 0.7505811419439198, "grad_norm": 0.7708535686362618, "learning_rate": 2.9721570735836804e-06, "loss": 0.6131, "step": 41330 }, { "epoch": 0.7507627488013947, "grad_norm": 0.7517397024480451, "learning_rate": 2.9680584142933857e-06, "loss": 0.6165, "step": 41340 }, { "epoch": 0.7509443556588696, "grad_norm": 0.7471654631458938, "learning_rate": 2.9639620904759315e-06, "loss": 0.6124, "step": 41350 }, { "epoch": 0.7511259625163447, "grad_norm": 0.7648818514654617, "learning_rate": 2.9598681034918075e-06, "loss": 0.6203, "step": 41360 }, { "epoch": 0.7513075693738196, "grad_norm": 0.7355656060546226, "learning_rate": 2.9557764547007174e-06, "loss": 0.6104, "step": 41370 }, { "epoch": 0.7514891762312945, "grad_norm": 0.744301749261148, "learning_rate": 2.951687145461599e-06, "loss": 0.607, "step": 41380 }, { "epoch": 0.7516707830887694, "grad_norm": 0.714245622373523, "learning_rate": 2.9476001771326048e-06, "loss": 0.5946, "step": 41390 }, { "epoch": 0.7518523899462444, "grad_norm": 0.7580789018625402, "learning_rate": 2.9435155510711157e-06, "loss": 0.5963, "step": 41400 }, { "epoch": 0.7520339968037193, "grad_norm": 0.7338433170695443, "learning_rate": 2.939433268633727e-06, "loss": 0.6066, "step": 41410 }, { "epoch": 0.7522156036611942, "grad_norm": 0.7696945216976931, "learning_rate": 2.9353533311762626e-06, "loss": 0.6053, "step": 41420 }, { "epoch": 0.7523972105186691, "grad_norm": 0.752808711880722, "learning_rate": 2.9312757400537704e-06, "loss": 0.6108, "step": 41430 }, { "epoch": 0.7525788173761441, "grad_norm": 0.7570160498590571, "learning_rate": 2.927200496620507e-06, "loss": 0.5991, "step": 41440 }, { "epoch": 0.7527604242336191, "grad_norm": 0.7366456870932931, "learning_rate": 2.9231276022299626e-06, "loss": 0.615, "step": 41450 }, { "epoch": 0.752942031091094, "grad_norm": 0.7349419944602819, "learning_rate": 2.9190570582348352e-06, "loss": 0.6006, "step": 41460 }, { "epoch": 0.753123637948569, "grad_norm": 0.7704512859009433, "learning_rate": 2.914988865987054e-06, "loss": 0.6023, "step": 41470 }, { "epoch": 0.7533052448060439, "grad_norm": 0.7390497221402651, "learning_rate": 2.9109230268377544e-06, "loss": 0.6149, "step": 41480 }, { "epoch": 0.7534868516635188, "grad_norm": 0.7599833442371805, "learning_rate": 2.9068595421373024e-06, "loss": 0.6133, "step": 41490 }, { "epoch": 0.7536684585209937, "grad_norm": 0.7665869321652165, "learning_rate": 2.9027984132352728e-06, "loss": 0.5986, "step": 41500 }, { "epoch": 0.7538500653784687, "grad_norm": 0.7509976223710684, "learning_rate": 2.8987396414804667e-06, "loss": 0.621, "step": 41510 }, { "epoch": 0.7540316722359436, "grad_norm": 0.7609293044934906, "learning_rate": 2.894683228220895e-06, "loss": 0.6106, "step": 41520 }, { "epoch": 0.7542132790934186, "grad_norm": 0.7222503607753753, "learning_rate": 2.890629174803783e-06, "loss": 0.6312, "step": 41530 }, { "epoch": 0.7543948859508935, "grad_norm": 0.7440059848855615, "learning_rate": 2.8865774825755854e-06, "loss": 0.605, "step": 41540 }, { "epoch": 0.7545764928083685, "grad_norm": 0.8062724883312173, "learning_rate": 2.882528152881956e-06, "loss": 0.602, "step": 41550 }, { "epoch": 0.7547580996658434, "grad_norm": 0.7398868270598157, "learning_rate": 2.8784811870677797e-06, "loss": 0.603, "step": 41560 }, { "epoch": 0.7549397065233183, "grad_norm": 0.7478261482662717, "learning_rate": 2.874436586477143e-06, "loss": 0.6099, "step": 41570 }, { "epoch": 0.7551213133807932, "grad_norm": 0.7729517090721314, "learning_rate": 2.8703943524533582e-06, "loss": 0.6094, "step": 41580 }, { "epoch": 0.7553029202382682, "grad_norm": 0.7353822318866045, "learning_rate": 2.866354486338939e-06, "loss": 0.5888, "step": 41590 }, { "epoch": 0.7554845270957431, "grad_norm": 0.7472690722662619, "learning_rate": 2.8623169894756274e-06, "loss": 0.5952, "step": 41600 }, { "epoch": 0.755666133953218, "grad_norm": 0.7235034999250367, "learning_rate": 2.858281863204365e-06, "loss": 0.6106, "step": 41610 }, { "epoch": 0.7558477408106931, "grad_norm": 0.8077522606083105, "learning_rate": 2.854249108865317e-06, "loss": 0.6074, "step": 41620 }, { "epoch": 0.756029347668168, "grad_norm": 0.7582407516891054, "learning_rate": 2.850218727797852e-06, "loss": 0.6102, "step": 41630 }, { "epoch": 0.7562109545256429, "grad_norm": 0.7703471702812952, "learning_rate": 2.8461907213405526e-06, "loss": 0.6105, "step": 41640 }, { "epoch": 0.7563925613831178, "grad_norm": 0.7321609120192972, "learning_rate": 2.8421650908312204e-06, "loss": 0.6018, "step": 41650 }, { "epoch": 0.7565741682405928, "grad_norm": 0.7406318192251546, "learning_rate": 2.8381418376068547e-06, "loss": 0.5995, "step": 41660 }, { "epoch": 0.7567557750980677, "grad_norm": 0.7559479954923053, "learning_rate": 2.8341209630036783e-06, "loss": 0.636, "step": 41670 }, { "epoch": 0.7569373819555426, "grad_norm": 0.7631860093475072, "learning_rate": 2.830102468357113e-06, "loss": 0.6091, "step": 41680 }, { "epoch": 0.7571189888130175, "grad_norm": 0.7612527862223002, "learning_rate": 2.8260863550018013e-06, "loss": 0.6188, "step": 41690 }, { "epoch": 0.7573005956704926, "grad_norm": 0.735046180626288, "learning_rate": 2.822072624271581e-06, "loss": 0.6117, "step": 41700 }, { "epoch": 0.7574822025279675, "grad_norm": 0.7486705832622494, "learning_rate": 2.818061277499514e-06, "loss": 0.6049, "step": 41710 }, { "epoch": 0.7576638093854424, "grad_norm": 0.7425448988447753, "learning_rate": 2.8140523160178556e-06, "loss": 0.6108, "step": 41720 }, { "epoch": 0.7578454162429173, "grad_norm": 0.7368488417907046, "learning_rate": 2.810045741158084e-06, "loss": 0.6011, "step": 41730 }, { "epoch": 0.7580270231003923, "grad_norm": 0.7773068385524674, "learning_rate": 2.8060415542508713e-06, "loss": 0.608, "step": 41740 }, { "epoch": 0.7582086299578672, "grad_norm": 0.7451086455420689, "learning_rate": 2.8020397566260995e-06, "loss": 0.608, "step": 41750 }, { "epoch": 0.7583902368153421, "grad_norm": 0.7686133033509063, "learning_rate": 2.798040349612866e-06, "loss": 0.6073, "step": 41760 }, { "epoch": 0.758571843672817, "grad_norm": 0.7599659345186714, "learning_rate": 2.7940433345394623e-06, "loss": 0.5932, "step": 41770 }, { "epoch": 0.758753450530292, "grad_norm": 0.7470554567223487, "learning_rate": 2.790048712733395e-06, "loss": 0.6085, "step": 41780 }, { "epoch": 0.758935057387767, "grad_norm": 0.818639074202983, "learning_rate": 2.786056485521369e-06, "loss": 0.6293, "step": 41790 }, { "epoch": 0.7591166642452419, "grad_norm": 0.7568911414512784, "learning_rate": 2.7820666542292994e-06, "loss": 0.6019, "step": 41800 }, { "epoch": 0.7592982711027169, "grad_norm": 0.7690726901584567, "learning_rate": 2.778079220182298e-06, "loss": 0.6028, "step": 41810 }, { "epoch": 0.7594798779601918, "grad_norm": 0.7390317581317971, "learning_rate": 2.7740941847046885e-06, "loss": 0.6109, "step": 41820 }, { "epoch": 0.7596614848176667, "grad_norm": 0.7783773713600641, "learning_rate": 2.770111549119998e-06, "loss": 0.6089, "step": 41830 }, { "epoch": 0.7598430916751416, "grad_norm": 0.7433361095198616, "learning_rate": 2.766131314750947e-06, "loss": 0.6088, "step": 41840 }, { "epoch": 0.7600246985326166, "grad_norm": 0.730662277442114, "learning_rate": 2.7621534829194695e-06, "loss": 0.5958, "step": 41850 }, { "epoch": 0.7602063053900915, "grad_norm": 0.7245858776601657, "learning_rate": 2.758178054946693e-06, "loss": 0.6119, "step": 41860 }, { "epoch": 0.7603879122475665, "grad_norm": 0.7544934598062601, "learning_rate": 2.7542050321529545e-06, "loss": 0.6054, "step": 41870 }, { "epoch": 0.7605695191050414, "grad_norm": 0.742180885245035, "learning_rate": 2.750234415857783e-06, "loss": 0.6146, "step": 41880 }, { "epoch": 0.7607511259625164, "grad_norm": 0.7761125494972861, "learning_rate": 2.7462662073799195e-06, "loss": 0.6015, "step": 41890 }, { "epoch": 0.7609327328199913, "grad_norm": 0.7646107269033192, "learning_rate": 2.7423004080372938e-06, "loss": 0.6082, "step": 41900 }, { "epoch": 0.7611143396774662, "grad_norm": 0.7638689896910916, "learning_rate": 2.7383370191470447e-06, "loss": 0.6066, "step": 41910 }, { "epoch": 0.7612959465349411, "grad_norm": 0.7872782113594128, "learning_rate": 2.7343760420255026e-06, "loss": 0.6086, "step": 41920 }, { "epoch": 0.7614775533924161, "grad_norm": 0.7361560063236275, "learning_rate": 2.7304174779882066e-06, "loss": 0.5981, "step": 41930 }, { "epoch": 0.761659160249891, "grad_norm": 0.7820474260412796, "learning_rate": 2.726461328349885e-06, "loss": 0.6181, "step": 41940 }, { "epoch": 0.7618407671073659, "grad_norm": 0.7525801556906359, "learning_rate": 2.722507594424466e-06, "loss": 0.5997, "step": 41950 }, { "epoch": 0.762022373964841, "grad_norm": 0.7646395450219202, "learning_rate": 2.7185562775250827e-06, "loss": 0.5988, "step": 41960 }, { "epoch": 0.7622039808223159, "grad_norm": 0.7796992974975557, "learning_rate": 2.714607378964055e-06, "loss": 0.6022, "step": 41970 }, { "epoch": 0.7623855876797908, "grad_norm": 0.7404002022727113, "learning_rate": 2.7106609000529105e-06, "loss": 0.6151, "step": 41980 }, { "epoch": 0.7625671945372657, "grad_norm": 0.7642393818256699, "learning_rate": 2.7067168421023603e-06, "loss": 0.6081, "step": 41990 }, { "epoch": 0.7627488013947407, "grad_norm": 0.7703661087476039, "learning_rate": 2.702775206422326e-06, "loss": 0.6067, "step": 42000 }, { "epoch": 0.7629304082522156, "grad_norm": 0.7412546617673699, "learning_rate": 2.69883599432191e-06, "loss": 0.5971, "step": 42010 }, { "epoch": 0.7631120151096905, "grad_norm": 0.7648117666549855, "learning_rate": 2.694899207109424e-06, "loss": 0.6115, "step": 42020 }, { "epoch": 0.7632936219671654, "grad_norm": 0.7503331303442443, "learning_rate": 2.6909648460923655e-06, "loss": 0.6096, "step": 42030 }, { "epoch": 0.7634752288246405, "grad_norm": 0.7227868733069357, "learning_rate": 2.687032912577423e-06, "loss": 0.6035, "step": 42040 }, { "epoch": 0.7636568356821154, "grad_norm": 0.7320775873563646, "learning_rate": 2.683103407870491e-06, "loss": 0.5968, "step": 42050 }, { "epoch": 0.7638384425395903, "grad_norm": 0.7661375962057448, "learning_rate": 2.679176333276644e-06, "loss": 0.5925, "step": 42060 }, { "epoch": 0.7640200493970652, "grad_norm": 0.7552379850790841, "learning_rate": 2.6752516901001624e-06, "loss": 0.6159, "step": 42070 }, { "epoch": 0.7642016562545402, "grad_norm": 0.7444017096291907, "learning_rate": 2.6713294796445053e-06, "loss": 0.6085, "step": 42080 }, { "epoch": 0.7643832631120151, "grad_norm": 0.7010504617136649, "learning_rate": 2.667409703212337e-06, "loss": 0.6067, "step": 42090 }, { "epoch": 0.76456486996949, "grad_norm": 0.7422388925322451, "learning_rate": 2.663492362105502e-06, "loss": 0.5949, "step": 42100 }, { "epoch": 0.764746476826965, "grad_norm": 0.8132540568940644, "learning_rate": 2.6595774576250466e-06, "loss": 0.6178, "step": 42110 }, { "epoch": 0.7649280836844399, "grad_norm": 0.7953637202753348, "learning_rate": 2.655664991071196e-06, "loss": 0.635, "step": 42120 }, { "epoch": 0.7651096905419149, "grad_norm": 0.7369637280048549, "learning_rate": 2.6517549637433794e-06, "loss": 0.6159, "step": 42130 }, { "epoch": 0.7652912973993898, "grad_norm": 0.7476134248182076, "learning_rate": 2.6478473769402047e-06, "loss": 0.6031, "step": 42140 }, { "epoch": 0.7654729042568648, "grad_norm": 0.7325813345766415, "learning_rate": 2.64394223195947e-06, "loss": 0.6019, "step": 42150 }, { "epoch": 0.7656545111143397, "grad_norm": 0.749892808117599, "learning_rate": 2.6400395300981717e-06, "loss": 0.5978, "step": 42160 }, { "epoch": 0.7658361179718146, "grad_norm": 0.7549392152788778, "learning_rate": 2.636139272652485e-06, "loss": 0.5957, "step": 42170 }, { "epoch": 0.7660177248292895, "grad_norm": 0.8116512569940408, "learning_rate": 2.6322414609177793e-06, "loss": 0.6043, "step": 42180 }, { "epoch": 0.7661993316867645, "grad_norm": 0.7806136166356925, "learning_rate": 2.6283460961886053e-06, "loss": 0.6162, "step": 42190 }, { "epoch": 0.7663809385442394, "grad_norm": 0.7617998128191944, "learning_rate": 2.6244531797587112e-06, "loss": 0.6117, "step": 42200 }, { "epoch": 0.7665625454017144, "grad_norm": 0.7470288453939365, "learning_rate": 2.62056271292102e-06, "loss": 0.6077, "step": 42210 }, { "epoch": 0.7667441522591893, "grad_norm": 0.7519641415149944, "learning_rate": 2.6166746969676483e-06, "loss": 0.6099, "step": 42220 }, { "epoch": 0.7669257591166643, "grad_norm": 0.7351533222024702, "learning_rate": 2.6127891331899023e-06, "loss": 0.6107, "step": 42230 }, { "epoch": 0.7671073659741392, "grad_norm": 0.7974050541034658, "learning_rate": 2.6089060228782624e-06, "loss": 0.5997, "step": 42240 }, { "epoch": 0.7672889728316141, "grad_norm": 0.7660032082339266, "learning_rate": 2.605025367322407e-06, "loss": 0.603, "step": 42250 }, { "epoch": 0.767470579689089, "grad_norm": 0.7824737583772526, "learning_rate": 2.6011471678111866e-06, "loss": 0.5901, "step": 42260 }, { "epoch": 0.767652186546564, "grad_norm": 0.7666370308765342, "learning_rate": 2.597271425632647e-06, "loss": 0.6086, "step": 42270 }, { "epoch": 0.7678337934040389, "grad_norm": 0.7273182240167543, "learning_rate": 2.5933981420740094e-06, "loss": 0.606, "step": 42280 }, { "epoch": 0.7680154002615138, "grad_norm": 0.7792518399116685, "learning_rate": 2.589527318421686e-06, "loss": 0.6162, "step": 42290 }, { "epoch": 0.7681970071189889, "grad_norm": 0.7476068720177618, "learning_rate": 2.585658955961264e-06, "loss": 0.6057, "step": 42300 }, { "epoch": 0.7683786139764638, "grad_norm": 0.7494796010080906, "learning_rate": 2.581793055977523e-06, "loss": 0.6063, "step": 42310 }, { "epoch": 0.7685602208339387, "grad_norm": 0.7507029135100518, "learning_rate": 2.5779296197544123e-06, "loss": 0.6022, "step": 42320 }, { "epoch": 0.7687418276914136, "grad_norm": 0.7545068800022058, "learning_rate": 2.5740686485750768e-06, "loss": 0.6056, "step": 42330 }, { "epoch": 0.7689234345488886, "grad_norm": 0.759558632833146, "learning_rate": 2.5702101437218318e-06, "loss": 0.596, "step": 42340 }, { "epoch": 0.7691050414063635, "grad_norm": 0.7887648081315435, "learning_rate": 2.5663541064761743e-06, "loss": 0.6005, "step": 42350 }, { "epoch": 0.7692866482638384, "grad_norm": 0.7552802569507392, "learning_rate": 2.5625005381187918e-06, "loss": 0.6054, "step": 42360 }, { "epoch": 0.7694682551213133, "grad_norm": 0.7405774869687246, "learning_rate": 2.558649439929537e-06, "loss": 0.6018, "step": 42370 }, { "epoch": 0.7696498619787884, "grad_norm": 0.7431633719112246, "learning_rate": 2.554800813187458e-06, "loss": 0.6167, "step": 42380 }, { "epoch": 0.7698314688362633, "grad_norm": 0.7715016259354018, "learning_rate": 2.550954659170766e-06, "loss": 0.6079, "step": 42390 }, { "epoch": 0.7700130756937382, "grad_norm": 0.7564185649269767, "learning_rate": 2.547110979156867e-06, "loss": 0.5897, "step": 42400 }, { "epoch": 0.7701946825512132, "grad_norm": 0.7738607260694447, "learning_rate": 2.54326977442233e-06, "loss": 0.6019, "step": 42410 }, { "epoch": 0.7703762894086881, "grad_norm": 0.7495466974430285, "learning_rate": 2.5394310462429163e-06, "loss": 0.6084, "step": 42420 }, { "epoch": 0.770557896266163, "grad_norm": 0.823648269215415, "learning_rate": 2.5355947958935535e-06, "loss": 0.6097, "step": 42430 }, { "epoch": 0.7707395031236379, "grad_norm": 0.7608557089343276, "learning_rate": 2.5317610246483484e-06, "loss": 0.6105, "step": 42440 }, { "epoch": 0.7709211099811129, "grad_norm": 0.7946644069591479, "learning_rate": 2.527929733780592e-06, "loss": 0.6024, "step": 42450 }, { "epoch": 0.7711027168385878, "grad_norm": 0.73863297358178, "learning_rate": 2.524100924562739e-06, "loss": 0.6015, "step": 42460 }, { "epoch": 0.7712843236960628, "grad_norm": 0.7246399603381466, "learning_rate": 2.5202745982664344e-06, "loss": 0.5935, "step": 42470 }, { "epoch": 0.7714659305535377, "grad_norm": 0.7574871092396626, "learning_rate": 2.516450756162484e-06, "loss": 0.6144, "step": 42480 }, { "epoch": 0.7716475374110127, "grad_norm": 0.725587726384372, "learning_rate": 2.5126293995208815e-06, "loss": 0.6067, "step": 42490 }, { "epoch": 0.7718291442684876, "grad_norm": 0.7664430168290982, "learning_rate": 2.5088105296107844e-06, "loss": 0.6074, "step": 42500 }, { "epoch": 0.7720107511259625, "grad_norm": 0.7295328446731992, "learning_rate": 2.5049941477005325e-06, "loss": 0.602, "step": 42510 }, { "epoch": 0.7721923579834374, "grad_norm": 0.7380482460503274, "learning_rate": 2.5011802550576325e-06, "loss": 0.5997, "step": 42520 }, { "epoch": 0.7723739648409124, "grad_norm": 0.7282627795364219, "learning_rate": 2.4973688529487714e-06, "loss": 0.592, "step": 42530 }, { "epoch": 0.7725555716983873, "grad_norm": 0.7858468301008009, "learning_rate": 2.493559942639805e-06, "loss": 0.6205, "step": 42540 }, { "epoch": 0.7727371785558622, "grad_norm": 0.7210415469896833, "learning_rate": 2.489753525395756e-06, "loss": 0.6046, "step": 42550 }, { "epoch": 0.7729187854133373, "grad_norm": 0.7971555901184003, "learning_rate": 2.4859496024808326e-06, "loss": 0.6099, "step": 42560 }, { "epoch": 0.7731003922708122, "grad_norm": 0.7956481557685448, "learning_rate": 2.4821481751583997e-06, "loss": 0.6143, "step": 42570 }, { "epoch": 0.7732819991282871, "grad_norm": 0.7454325418774673, "learning_rate": 2.4783492446910073e-06, "loss": 0.5949, "step": 42580 }, { "epoch": 0.773463605985762, "grad_norm": 0.7545154064202798, "learning_rate": 2.474552812340364e-06, "loss": 0.6091, "step": 42590 }, { "epoch": 0.773645212843237, "grad_norm": 0.7614211345325885, "learning_rate": 2.4707588793673588e-06, "loss": 0.6107, "step": 42600 }, { "epoch": 0.7738268197007119, "grad_norm": 0.7628763901267706, "learning_rate": 2.4669674470320403e-06, "loss": 0.611, "step": 42610 }, { "epoch": 0.7740084265581868, "grad_norm": 0.7706010689141964, "learning_rate": 2.463178516593635e-06, "loss": 0.6008, "step": 42620 }, { "epoch": 0.7741900334156617, "grad_norm": 0.7258270354755025, "learning_rate": 2.4593920893105393e-06, "loss": 0.6047, "step": 42630 }, { "epoch": 0.7743716402731368, "grad_norm": 0.8233344928809906, "learning_rate": 2.4556081664403085e-06, "loss": 0.6058, "step": 42640 }, { "epoch": 0.7745532471306117, "grad_norm": 0.7244578549053541, "learning_rate": 2.4518267492396776e-06, "loss": 0.5934, "step": 42650 }, { "epoch": 0.7747348539880866, "grad_norm": 0.7435207346911056, "learning_rate": 2.448047838964539e-06, "loss": 0.6006, "step": 42660 }, { "epoch": 0.7749164608455615, "grad_norm": 0.7613174803506427, "learning_rate": 2.4442714368699615e-06, "loss": 0.6166, "step": 42670 }, { "epoch": 0.7750980677030365, "grad_norm": 0.7720655825086589, "learning_rate": 2.440497544210173e-06, "loss": 0.6026, "step": 42680 }, { "epoch": 0.7752796745605114, "grad_norm": 0.7548660476178842, "learning_rate": 2.4367261622385763e-06, "loss": 0.6089, "step": 42690 }, { "epoch": 0.7754612814179863, "grad_norm": 0.8136370470840846, "learning_rate": 2.4329572922077318e-06, "loss": 0.6217, "step": 42700 }, { "epoch": 0.7756428882754612, "grad_norm": 0.8037155202799314, "learning_rate": 2.429190935369373e-06, "loss": 0.6038, "step": 42710 }, { "epoch": 0.7758244951329362, "grad_norm": 0.7729610092132859, "learning_rate": 2.4254270929743917e-06, "loss": 0.6094, "step": 42720 }, { "epoch": 0.7760061019904112, "grad_norm": 0.7359075225218386, "learning_rate": 2.4216657662728536e-06, "loss": 0.6081, "step": 42730 }, { "epoch": 0.7761877088478861, "grad_norm": 0.7850924533666902, "learning_rate": 2.41790695651398e-06, "loss": 0.6183, "step": 42740 }, { "epoch": 0.776369315705361, "grad_norm": 0.7930256088738173, "learning_rate": 2.4141506649461577e-06, "loss": 0.6081, "step": 42750 }, { "epoch": 0.776550922562836, "grad_norm": 0.7636589756661049, "learning_rate": 2.410396892816944e-06, "loss": 0.6144, "step": 42760 }, { "epoch": 0.7767325294203109, "grad_norm": 0.7711720256474365, "learning_rate": 2.40664564137305e-06, "loss": 0.6053, "step": 42770 }, { "epoch": 0.7769141362777858, "grad_norm": 0.7803398232978636, "learning_rate": 2.4028969118603595e-06, "loss": 0.6268, "step": 42780 }, { "epoch": 0.7770957431352608, "grad_norm": 0.7464263199589861, "learning_rate": 2.3991507055239094e-06, "loss": 0.6045, "step": 42790 }, { "epoch": 0.7772773499927357, "grad_norm": 0.7627839122117877, "learning_rate": 2.3954070236079064e-06, "loss": 0.6038, "step": 42800 }, { "epoch": 0.7774589568502107, "grad_norm": 0.7434446932581289, "learning_rate": 2.3916658673557092e-06, "loss": 0.605, "step": 42810 }, { "epoch": 0.7776405637076856, "grad_norm": 0.7556402651235276, "learning_rate": 2.387927238009852e-06, "loss": 0.6059, "step": 42820 }, { "epoch": 0.7778221705651606, "grad_norm": 0.7329440132278315, "learning_rate": 2.384191136812016e-06, "loss": 0.6056, "step": 42830 }, { "epoch": 0.7780037774226355, "grad_norm": 0.7423811153384784, "learning_rate": 2.380457565003046e-06, "loss": 0.6114, "step": 42840 }, { "epoch": 0.7781853842801104, "grad_norm": 0.7558125360869339, "learning_rate": 2.376726523822954e-06, "loss": 0.6048, "step": 42850 }, { "epoch": 0.7783669911375853, "grad_norm": 0.7238288269764899, "learning_rate": 2.372998014510902e-06, "loss": 0.6083, "step": 42860 }, { "epoch": 0.7785485979950603, "grad_norm": 0.78121544799292, "learning_rate": 2.36927203830522e-06, "loss": 0.6124, "step": 42870 }, { "epoch": 0.7787302048525352, "grad_norm": 0.741217023057949, "learning_rate": 2.3655485964433876e-06, "loss": 0.5964, "step": 42880 }, { "epoch": 0.7789118117100101, "grad_norm": 0.7633920510890895, "learning_rate": 2.3618276901620516e-06, "loss": 0.6084, "step": 42890 }, { "epoch": 0.7790934185674852, "grad_norm": 0.7703682607692671, "learning_rate": 2.358109320697007e-06, "loss": 0.6139, "step": 42900 }, { "epoch": 0.7792750254249601, "grad_norm": 0.7750519327837154, "learning_rate": 2.354393489283219e-06, "loss": 0.6, "step": 42910 }, { "epoch": 0.779456632282435, "grad_norm": 0.7666026511207671, "learning_rate": 2.3506801971547934e-06, "loss": 0.5962, "step": 42920 }, { "epoch": 0.7796382391399099, "grad_norm": 0.7970736045493838, "learning_rate": 2.34696944554501e-06, "loss": 0.6019, "step": 42930 }, { "epoch": 0.7798198459973849, "grad_norm": 0.7971483830678034, "learning_rate": 2.3432612356862917e-06, "loss": 0.6093, "step": 42940 }, { "epoch": 0.7800014528548598, "grad_norm": 0.7312371322059779, "learning_rate": 2.339555568810221e-06, "loss": 0.5882, "step": 42950 }, { "epoch": 0.7801830597123347, "grad_norm": 0.7839839114033006, "learning_rate": 2.3358524461475417e-06, "loss": 0.6113, "step": 42960 }, { "epoch": 0.7803646665698096, "grad_norm": 0.7373474258758905, "learning_rate": 2.332151868928142e-06, "loss": 0.6129, "step": 42970 }, { "epoch": 0.7805462734272847, "grad_norm": 0.742532863408731, "learning_rate": 2.3284538383810774e-06, "loss": 0.6091, "step": 42980 }, { "epoch": 0.7807278802847596, "grad_norm": 0.7373542431148319, "learning_rate": 2.3247583557345423e-06, "loss": 0.6074, "step": 42990 }, { "epoch": 0.7809094871422345, "grad_norm": 0.763868862467078, "learning_rate": 2.3210654222159013e-06, "loss": 0.609, "step": 43000 }, { "epoch": 0.7810910939997094, "grad_norm": 0.790125017625639, "learning_rate": 2.317375039051657e-06, "loss": 0.603, "step": 43010 }, { "epoch": 0.7812727008571844, "grad_norm": 0.7338867832979503, "learning_rate": 2.3136872074674765e-06, "loss": 0.5954, "step": 43020 }, { "epoch": 0.7814543077146593, "grad_norm": 0.7530250233442007, "learning_rate": 2.310001928688177e-06, "loss": 0.6102, "step": 43030 }, { "epoch": 0.7816359145721342, "grad_norm": 0.7738049147439409, "learning_rate": 2.3063192039377215e-06, "loss": 0.6155, "step": 43040 }, { "epoch": 0.7818175214296091, "grad_norm": 0.7400735936875263, "learning_rate": 2.3026390344392346e-06, "loss": 0.598, "step": 43050 }, { "epoch": 0.7819991282870841, "grad_norm": 0.750823817394605, "learning_rate": 2.29896142141498e-06, "loss": 0.6025, "step": 43060 }, { "epoch": 0.7821807351445591, "grad_norm": 0.743467467885676, "learning_rate": 2.2952863660863865e-06, "loss": 0.6128, "step": 43070 }, { "epoch": 0.782362342002034, "grad_norm": 0.7568582553890956, "learning_rate": 2.2916138696740197e-06, "loss": 0.6046, "step": 43080 }, { "epoch": 0.782543948859509, "grad_norm": 0.7437191370435757, "learning_rate": 2.2879439333976084e-06, "loss": 0.6088, "step": 43090 }, { "epoch": 0.7827255557169839, "grad_norm": 0.7442265589991528, "learning_rate": 2.2842765584760183e-06, "loss": 0.6024, "step": 43100 }, { "epoch": 0.7829071625744588, "grad_norm": 0.7513299890283187, "learning_rate": 2.280611746127276e-06, "loss": 0.6012, "step": 43110 }, { "epoch": 0.7830887694319337, "grad_norm": 0.7778446348251232, "learning_rate": 2.2769494975685468e-06, "loss": 0.6203, "step": 43120 }, { "epoch": 0.7832703762894087, "grad_norm": 0.8143305883243791, "learning_rate": 2.273289814016154e-06, "loss": 0.6061, "step": 43130 }, { "epoch": 0.7834519831468836, "grad_norm": 0.7378708778931365, "learning_rate": 2.269632696685563e-06, "loss": 0.6032, "step": 43140 }, { "epoch": 0.7836335900043586, "grad_norm": 0.7295533754404513, "learning_rate": 2.2659781467913855e-06, "loss": 0.6085, "step": 43150 }, { "epoch": 0.7838151968618335, "grad_norm": 0.7860518981123403, "learning_rate": 2.2623261655473873e-06, "loss": 0.6153, "step": 43160 }, { "epoch": 0.7839968037193085, "grad_norm": 0.7499614874332642, "learning_rate": 2.2586767541664723e-06, "loss": 0.5985, "step": 43170 }, { "epoch": 0.7841784105767834, "grad_norm": 0.7329729453405747, "learning_rate": 2.255029913860701e-06, "loss": 0.6021, "step": 43180 }, { "epoch": 0.7843600174342583, "grad_norm": 0.7493315521287991, "learning_rate": 2.2513856458412707e-06, "loss": 0.6037, "step": 43190 }, { "epoch": 0.7845416242917332, "grad_norm": 0.7756513494798079, "learning_rate": 2.247743951318532e-06, "loss": 0.6167, "step": 43200 }, { "epoch": 0.7847232311492082, "grad_norm": 0.7493629495446351, "learning_rate": 2.244104831501972e-06, "loss": 0.6036, "step": 43210 }, { "epoch": 0.7849048380066831, "grad_norm": 0.7439568136387824, "learning_rate": 2.2404682876002328e-06, "loss": 0.6019, "step": 43220 }, { "epoch": 0.785086444864158, "grad_norm": 0.7713614140082651, "learning_rate": 2.236834320821095e-06, "loss": 0.6044, "step": 43230 }, { "epoch": 0.785268051721633, "grad_norm": 0.7423017606487511, "learning_rate": 2.23320293237148e-06, "loss": 0.6156, "step": 43240 }, { "epoch": 0.785449658579108, "grad_norm": 0.7212120995500149, "learning_rate": 2.229574123457463e-06, "loss": 0.6143, "step": 43250 }, { "epoch": 0.7856312654365829, "grad_norm": 0.7591657918375606, "learning_rate": 2.225947895284252e-06, "loss": 0.6103, "step": 43260 }, { "epoch": 0.7858128722940578, "grad_norm": 0.7551472502534412, "learning_rate": 2.222324249056207e-06, "loss": 0.6004, "step": 43270 }, { "epoch": 0.7859944791515328, "grad_norm": 0.7519067151050154, "learning_rate": 2.2187031859768205e-06, "loss": 0.6123, "step": 43280 }, { "epoch": 0.7861760860090077, "grad_norm": 0.7845523393738375, "learning_rate": 2.215084707248738e-06, "loss": 0.6097, "step": 43290 }, { "epoch": 0.7863576928664826, "grad_norm": 0.7675437529432699, "learning_rate": 2.211468814073735e-06, "loss": 0.6116, "step": 43300 }, { "epoch": 0.7865392997239575, "grad_norm": 0.760239045454409, "learning_rate": 2.207855507652742e-06, "loss": 0.606, "step": 43310 }, { "epoch": 0.7867209065814326, "grad_norm": 0.7430685620557078, "learning_rate": 2.2042447891858163e-06, "loss": 0.6044, "step": 43320 }, { "epoch": 0.7869025134389075, "grad_norm": 0.7625078212905818, "learning_rate": 2.2006366598721672e-06, "loss": 0.6088, "step": 43330 }, { "epoch": 0.7870841202963824, "grad_norm": 0.7748781145794288, "learning_rate": 2.197031120910137e-06, "loss": 0.6079, "step": 43340 }, { "epoch": 0.7872657271538573, "grad_norm": 0.7619960988870845, "learning_rate": 2.1934281734972076e-06, "loss": 0.6053, "step": 43350 }, { "epoch": 0.7874473340113323, "grad_norm": 0.722593279356107, "learning_rate": 2.189827818830006e-06, "loss": 0.6006, "step": 43360 }, { "epoch": 0.7876289408688072, "grad_norm": 0.7546847009419657, "learning_rate": 2.1862300581042917e-06, "loss": 0.6131, "step": 43370 }, { "epoch": 0.7878105477262821, "grad_norm": 0.7553866635607991, "learning_rate": 2.182634892514969e-06, "loss": 0.6, "step": 43380 }, { "epoch": 0.787992154583757, "grad_norm": 0.7325984649751267, "learning_rate": 2.179042323256071e-06, "loss": 0.6058, "step": 43390 }, { "epoch": 0.788173761441232, "grad_norm": 0.7780036553430332, "learning_rate": 2.1754523515207815e-06, "loss": 0.6114, "step": 43400 }, { "epoch": 0.788355368298707, "grad_norm": 0.7712190929783168, "learning_rate": 2.171864978501407e-06, "loss": 0.6024, "step": 43410 }, { "epoch": 0.7885369751561819, "grad_norm": 0.7395549867108487, "learning_rate": 2.1682802053894024e-06, "loss": 0.6012, "step": 43420 }, { "epoch": 0.7887185820136569, "grad_norm": 0.7436755330475647, "learning_rate": 2.1646980333753577e-06, "loss": 0.6007, "step": 43430 }, { "epoch": 0.7889001888711318, "grad_norm": 0.748714328583083, "learning_rate": 2.161118463648989e-06, "loss": 0.6019, "step": 43440 }, { "epoch": 0.7890817957286067, "grad_norm": 0.7478650492075248, "learning_rate": 2.1575414973991636e-06, "loss": 0.6061, "step": 43450 }, { "epoch": 0.7892634025860816, "grad_norm": 0.7604196446313101, "learning_rate": 2.153967135813869e-06, "loss": 0.6038, "step": 43460 }, { "epoch": 0.7894450094435566, "grad_norm": 0.7436056255382231, "learning_rate": 2.1503953800802402e-06, "loss": 0.6125, "step": 43470 }, { "epoch": 0.7896266163010315, "grad_norm": 0.7373510979199187, "learning_rate": 2.1468262313845355e-06, "loss": 0.6133, "step": 43480 }, { "epoch": 0.7898082231585065, "grad_norm": 0.7721686684552289, "learning_rate": 2.1432596909121583e-06, "loss": 0.6065, "step": 43490 }, { "epoch": 0.7899898300159814, "grad_norm": 0.7885032487803493, "learning_rate": 2.139695759847635e-06, "loss": 0.628, "step": 43500 }, { "epoch": 0.7901714368734564, "grad_norm": 0.774301873599082, "learning_rate": 2.1361344393746374e-06, "loss": 0.6047, "step": 43510 }, { "epoch": 0.7903530437309313, "grad_norm": 0.7241257277631102, "learning_rate": 2.1325757306759564e-06, "loss": 0.6133, "step": 43520 }, { "epoch": 0.7905346505884062, "grad_norm": 0.7448594068036899, "learning_rate": 2.129019634933529e-06, "loss": 0.5965, "step": 43530 }, { "epoch": 0.7907162574458811, "grad_norm": 0.7818290879051741, "learning_rate": 2.125466153328416e-06, "loss": 0.6121, "step": 43540 }, { "epoch": 0.7908978643033561, "grad_norm": 0.7596793605682411, "learning_rate": 2.1219152870408075e-06, "loss": 0.5941, "step": 43550 }, { "epoch": 0.791079471160831, "grad_norm": 0.7516179715664214, "learning_rate": 2.1183670372500366e-06, "loss": 0.6031, "step": 43560 }, { "epoch": 0.7912610780183059, "grad_norm": 0.7542111922601261, "learning_rate": 2.114821405134554e-06, "loss": 0.6058, "step": 43570 }, { "epoch": 0.791442684875781, "grad_norm": 0.7928110319736237, "learning_rate": 2.1112783918719536e-06, "loss": 0.5987, "step": 43580 }, { "epoch": 0.7916242917332559, "grad_norm": 0.7478803918909156, "learning_rate": 2.1077379986389467e-06, "loss": 0.5999, "step": 43590 }, { "epoch": 0.7918058985907308, "grad_norm": 0.7454498517290341, "learning_rate": 2.104200226611387e-06, "loss": 0.6168, "step": 43600 }, { "epoch": 0.7919875054482057, "grad_norm": 0.7394434948393345, "learning_rate": 2.1006650769642466e-06, "loss": 0.5892, "step": 43610 }, { "epoch": 0.7921691123056807, "grad_norm": 0.7297880401653095, "learning_rate": 2.0971325508716366e-06, "loss": 0.6042, "step": 43620 }, { "epoch": 0.7923507191631556, "grad_norm": 0.7632691320240426, "learning_rate": 2.0936026495067874e-06, "loss": 0.6004, "step": 43630 }, { "epoch": 0.7925323260206305, "grad_norm": 0.7489331655176801, "learning_rate": 2.0900753740420653e-06, "loss": 0.6003, "step": 43640 }, { "epoch": 0.7927139328781054, "grad_norm": 0.7786422957412561, "learning_rate": 2.0865507256489614e-06, "loss": 0.596, "step": 43650 }, { "epoch": 0.7928955397355805, "grad_norm": 0.7820522732599451, "learning_rate": 2.0830287054980893e-06, "loss": 0.6082, "step": 43660 }, { "epoch": 0.7930771465930554, "grad_norm": 0.7776232368622294, "learning_rate": 2.0795093147591993e-06, "loss": 0.6089, "step": 43670 }, { "epoch": 0.7932587534505303, "grad_norm": 0.7601703528992599, "learning_rate": 2.0759925546011617e-06, "loss": 0.5976, "step": 43680 }, { "epoch": 0.7934403603080052, "grad_norm": 0.7782831701251932, "learning_rate": 2.0724784261919774e-06, "loss": 0.5973, "step": 43690 }, { "epoch": 0.7936219671654802, "grad_norm": 0.7670314988436066, "learning_rate": 2.068966930698766e-06, "loss": 0.599, "step": 43700 }, { "epoch": 0.7938035740229551, "grad_norm": 0.740200030471557, "learning_rate": 2.0654580692877835e-06, "loss": 0.6097, "step": 43710 }, { "epoch": 0.79398518088043, "grad_norm": 0.7335919681824439, "learning_rate": 2.0619518431244e-06, "loss": 0.6173, "step": 43720 }, { "epoch": 0.794166787737905, "grad_norm": 0.7505780548695741, "learning_rate": 2.058448253373121e-06, "loss": 0.6121, "step": 43730 }, { "epoch": 0.7943483945953799, "grad_norm": 0.7867824151045717, "learning_rate": 2.0549473011975683e-06, "loss": 0.604, "step": 43740 }, { "epoch": 0.7945300014528549, "grad_norm": 0.7355550278687951, "learning_rate": 2.051448987760486e-06, "loss": 0.608, "step": 43750 }, { "epoch": 0.7947116083103298, "grad_norm": 0.7889466642676407, "learning_rate": 2.047953314223753e-06, "loss": 0.6094, "step": 43760 }, { "epoch": 0.7948932151678048, "grad_norm": 0.7900667038422436, "learning_rate": 2.044460281748358e-06, "loss": 0.6181, "step": 43770 }, { "epoch": 0.7950748220252797, "grad_norm": 0.7310500265022055, "learning_rate": 2.0409698914944264e-06, "loss": 0.5943, "step": 43780 }, { "epoch": 0.7952564288827546, "grad_norm": 0.7731892952492893, "learning_rate": 2.037482144621191e-06, "loss": 0.6038, "step": 43790 }, { "epoch": 0.7954380357402295, "grad_norm": 0.7512904862350794, "learning_rate": 2.0339970422870213e-06, "loss": 0.6029, "step": 43800 }, { "epoch": 0.7956196425977045, "grad_norm": 0.7791271780662056, "learning_rate": 2.0305145856493958e-06, "loss": 0.5992, "step": 43810 }, { "epoch": 0.7958012494551794, "grad_norm": 0.7806194255305533, "learning_rate": 2.0270347758649234e-06, "loss": 0.6053, "step": 43820 }, { "epoch": 0.7959828563126544, "grad_norm": 0.7449993077835673, "learning_rate": 2.023557614089332e-06, "loss": 0.6211, "step": 43830 }, { "epoch": 0.7961644631701293, "grad_norm": 0.7569863887841024, "learning_rate": 2.0200831014774635e-06, "loss": 0.5994, "step": 43840 }, { "epoch": 0.7963460700276043, "grad_norm": 0.7727736692018184, "learning_rate": 2.0166112391832917e-06, "loss": 0.6093, "step": 43850 }, { "epoch": 0.7965276768850792, "grad_norm": 0.7573692924359763, "learning_rate": 2.013142028359897e-06, "loss": 0.6126, "step": 43860 }, { "epoch": 0.7967092837425541, "grad_norm": 0.7472797306292803, "learning_rate": 2.009675470159491e-06, "loss": 0.6027, "step": 43870 }, { "epoch": 0.796890890600029, "grad_norm": 0.7654646996853655, "learning_rate": 2.0062115657333936e-06, "loss": 0.6047, "step": 43880 }, { "epoch": 0.797072497457504, "grad_norm": 0.7683867872765917, "learning_rate": 2.002750316232056e-06, "loss": 0.6068, "step": 43890 }, { "epoch": 0.7972541043149789, "grad_norm": 0.7620160994569624, "learning_rate": 1.9992917228050336e-06, "loss": 0.6139, "step": 43900 }, { "epoch": 0.7974357111724538, "grad_norm": 0.7731323112092376, "learning_rate": 1.9958357866010114e-06, "loss": 0.5876, "step": 43910 }, { "epoch": 0.7976173180299289, "grad_norm": 0.7491810993572644, "learning_rate": 1.992382508767782e-06, "loss": 0.6142, "step": 43920 }, { "epoch": 0.7977989248874038, "grad_norm": 0.7597073205152852, "learning_rate": 1.9889318904522648e-06, "loss": 0.5957, "step": 43930 }, { "epoch": 0.7979805317448787, "grad_norm": 0.7623105919803012, "learning_rate": 1.9854839328004906e-06, "loss": 0.6015, "step": 43940 }, { "epoch": 0.7981621386023536, "grad_norm": 0.7497092673352149, "learning_rate": 1.982038636957602e-06, "loss": 0.5942, "step": 43950 }, { "epoch": 0.7983437454598286, "grad_norm": 0.7641498042386932, "learning_rate": 1.978596004067869e-06, "loss": 0.6042, "step": 43960 }, { "epoch": 0.7985253523173035, "grad_norm": 0.7170535983260388, "learning_rate": 1.9751560352746644e-06, "loss": 0.6038, "step": 43970 }, { "epoch": 0.7987069591747784, "grad_norm": 0.7394039290726923, "learning_rate": 1.97171873172049e-06, "loss": 0.6026, "step": 43980 }, { "epoch": 0.7988885660322533, "grad_norm": 0.7464706543311861, "learning_rate": 1.968284094546948e-06, "loss": 0.5903, "step": 43990 }, { "epoch": 0.7990701728897284, "grad_norm": 0.7587652717794332, "learning_rate": 1.9648521248947683e-06, "loss": 0.6002, "step": 44000 }, { "epoch": 0.7992517797472033, "grad_norm": 0.7364585632276454, "learning_rate": 1.9614228239037815e-06, "loss": 0.6188, "step": 44010 }, { "epoch": 0.7994333866046782, "grad_norm": 0.7590695161723631, "learning_rate": 1.9579961927129467e-06, "loss": 0.6162, "step": 44020 }, { "epoch": 0.7996149934621531, "grad_norm": 0.7550094213712449, "learning_rate": 1.9545722324603213e-06, "loss": 0.6085, "step": 44030 }, { "epoch": 0.7997966003196281, "grad_norm": 0.7275057080953313, "learning_rate": 1.951150944283089e-06, "loss": 0.6215, "step": 44040 }, { "epoch": 0.799978207177103, "grad_norm": 0.7383375192813141, "learning_rate": 1.9477323293175377e-06, "loss": 0.5939, "step": 44050 }, { "epoch": 0.8001598140345779, "grad_norm": 0.7621366946989473, "learning_rate": 1.9443163886990655e-06, "loss": 0.5907, "step": 44060 }, { "epoch": 0.8003414208920528, "grad_norm": 0.7528825937971195, "learning_rate": 1.9409031235621935e-06, "loss": 0.5964, "step": 44070 }, { "epoch": 0.8005230277495278, "grad_norm": 0.7427268765949887, "learning_rate": 1.9374925350405405e-06, "loss": 0.5967, "step": 44080 }, { "epoch": 0.8007046346070028, "grad_norm": 0.7500473330637603, "learning_rate": 1.934084624266849e-06, "loss": 0.6113, "step": 44090 }, { "epoch": 0.8008862414644777, "grad_norm": 0.7381109511507898, "learning_rate": 1.9306793923729607e-06, "loss": 0.5955, "step": 44100 }, { "epoch": 0.8010678483219527, "grad_norm": 0.7197992854241548, "learning_rate": 1.927276840489839e-06, "loss": 0.5886, "step": 44110 }, { "epoch": 0.8012494551794276, "grad_norm": 0.7462529629565133, "learning_rate": 1.9238769697475444e-06, "loss": 0.5934, "step": 44120 }, { "epoch": 0.8014310620369025, "grad_norm": 0.7405674429025835, "learning_rate": 1.9204797812752598e-06, "loss": 0.5973, "step": 44130 }, { "epoch": 0.8016126688943774, "grad_norm": 0.7296756520151272, "learning_rate": 1.917085276201269e-06, "loss": 0.5986, "step": 44140 }, { "epoch": 0.8017942757518524, "grad_norm": 0.7760844693878701, "learning_rate": 1.913693455652965e-06, "loss": 0.5947, "step": 44150 }, { "epoch": 0.8019758826093273, "grad_norm": 0.7726425962472621, "learning_rate": 1.9103043207568537e-06, "loss": 0.6222, "step": 44160 }, { "epoch": 0.8021574894668023, "grad_norm": 0.7622370183907031, "learning_rate": 1.906917872638544e-06, "loss": 0.6107, "step": 44170 }, { "epoch": 0.8023390963242772, "grad_norm": 0.7742951009914507, "learning_rate": 1.9035341124227568e-06, "loss": 0.6065, "step": 44180 }, { "epoch": 0.8025207031817522, "grad_norm": 0.756266000625133, "learning_rate": 1.9001530412333157e-06, "loss": 0.6016, "step": 44190 }, { "epoch": 0.8027023100392271, "grad_norm": 0.7161135632641699, "learning_rate": 1.896774660193159e-06, "loss": 0.6089, "step": 44200 }, { "epoch": 0.802883916896702, "grad_norm": 0.7869839868643703, "learning_rate": 1.8933989704243195e-06, "loss": 0.5905, "step": 44210 }, { "epoch": 0.803065523754177, "grad_norm": 0.7530112999852727, "learning_rate": 1.8900259730479465e-06, "loss": 0.6065, "step": 44220 }, { "epoch": 0.8032471306116519, "grad_norm": 0.7646402848276708, "learning_rate": 1.8866556691842941e-06, "loss": 0.6078, "step": 44230 }, { "epoch": 0.8034287374691268, "grad_norm": 0.7566928233352738, "learning_rate": 1.8832880599527147e-06, "loss": 0.6038, "step": 44240 }, { "epoch": 0.8036103443266017, "grad_norm": 0.7636224241366529, "learning_rate": 1.8799231464716738e-06, "loss": 0.6006, "step": 44250 }, { "epoch": 0.8037919511840768, "grad_norm": 0.8002872308130835, "learning_rate": 1.8765609298587351e-06, "loss": 0.614, "step": 44260 }, { "epoch": 0.8039735580415517, "grad_norm": 0.7373833712352478, "learning_rate": 1.873201411230574e-06, "loss": 0.6028, "step": 44270 }, { "epoch": 0.8041551648990266, "grad_norm": 0.7503289301871344, "learning_rate": 1.8698445917029596e-06, "loss": 0.6079, "step": 44280 }, { "epoch": 0.8043367717565015, "grad_norm": 0.7113512882941354, "learning_rate": 1.8664904723907761e-06, "loss": 0.5995, "step": 44290 }, { "epoch": 0.8045183786139765, "grad_norm": 0.7861298357330855, "learning_rate": 1.8631390544080007e-06, "loss": 0.6228, "step": 44300 }, { "epoch": 0.8046999854714514, "grad_norm": 0.7614856114451434, "learning_rate": 1.8597903388677218e-06, "loss": 0.6083, "step": 44310 }, { "epoch": 0.8048815923289263, "grad_norm": 0.7387220093158613, "learning_rate": 1.856444326882123e-06, "loss": 0.5991, "step": 44320 }, { "epoch": 0.8050631991864012, "grad_norm": 0.7600414595442101, "learning_rate": 1.8531010195624977e-06, "loss": 0.5957, "step": 44330 }, { "epoch": 0.8052448060438763, "grad_norm": 0.7717980826566894, "learning_rate": 1.849760418019233e-06, "loss": 0.5954, "step": 44340 }, { "epoch": 0.8054264129013512, "grad_norm": 0.7590208036641378, "learning_rate": 1.8464225233618206e-06, "loss": 0.6037, "step": 44350 }, { "epoch": 0.8056080197588261, "grad_norm": 0.7440585057744539, "learning_rate": 1.8430873366988577e-06, "loss": 0.608, "step": 44360 }, { "epoch": 0.805789626616301, "grad_norm": 0.7348483296171633, "learning_rate": 1.839754859138032e-06, "loss": 0.6059, "step": 44370 }, { "epoch": 0.805971233473776, "grad_norm": 0.7509888878635868, "learning_rate": 1.8364250917861448e-06, "loss": 0.6059, "step": 44380 }, { "epoch": 0.8061528403312509, "grad_norm": 0.7389078906024855, "learning_rate": 1.8330980357490836e-06, "loss": 0.5968, "step": 44390 }, { "epoch": 0.8063344471887258, "grad_norm": 0.7323689724580104, "learning_rate": 1.8297736921318465e-06, "loss": 0.6021, "step": 44400 }, { "epoch": 0.8065160540462007, "grad_norm": 0.7500194424939126, "learning_rate": 1.8264520620385218e-06, "loss": 0.591, "step": 44410 }, { "epoch": 0.8066976609036757, "grad_norm": 0.7633424257969015, "learning_rate": 1.8231331465723056e-06, "loss": 0.6009, "step": 44420 }, { "epoch": 0.8068792677611507, "grad_norm": 0.7503821750030107, "learning_rate": 1.8198169468354832e-06, "loss": 0.5999, "step": 44430 }, { "epoch": 0.8070608746186256, "grad_norm": 0.7685207694281935, "learning_rate": 1.8165034639294455e-06, "loss": 0.6104, "step": 44440 }, { "epoch": 0.8072424814761006, "grad_norm": 0.7517831582114416, "learning_rate": 1.8131926989546778e-06, "loss": 0.6187, "step": 44450 }, { "epoch": 0.8074240883335755, "grad_norm": 0.7611395969739944, "learning_rate": 1.8098846530107583e-06, "loss": 0.6058, "step": 44460 }, { "epoch": 0.8076056951910504, "grad_norm": 0.7459848440855419, "learning_rate": 1.8065793271963739e-06, "loss": 0.6102, "step": 44470 }, { "epoch": 0.8077873020485253, "grad_norm": 0.7721738318922051, "learning_rate": 1.8032767226092928e-06, "loss": 0.5938, "step": 44480 }, { "epoch": 0.8079689089060003, "grad_norm": 0.7612621402462828, "learning_rate": 1.7999768403463958e-06, "loss": 0.6003, "step": 44490 }, { "epoch": 0.8081505157634752, "grad_norm": 0.7687777030085625, "learning_rate": 1.7966796815036447e-06, "loss": 0.5955, "step": 44500 }, { "epoch": 0.8083321226209502, "grad_norm": 0.7216342128088589, "learning_rate": 1.793385247176107e-06, "loss": 0.6013, "step": 44510 }, { "epoch": 0.8085137294784251, "grad_norm": 0.7566285460771103, "learning_rate": 1.7900935384579398e-06, "loss": 0.6099, "step": 44520 }, { "epoch": 0.8086953363359001, "grad_norm": 0.7461942603469562, "learning_rate": 1.7868045564423985e-06, "loss": 0.6227, "step": 44530 }, { "epoch": 0.808876943193375, "grad_norm": 0.744495412940129, "learning_rate": 1.7835183022218316e-06, "loss": 0.6058, "step": 44540 }, { "epoch": 0.8090585500508499, "grad_norm": 0.7539851845295297, "learning_rate": 1.780234776887677e-06, "loss": 0.613, "step": 44550 }, { "epoch": 0.8092401569083248, "grad_norm": 0.7276555298986713, "learning_rate": 1.776953981530476e-06, "loss": 0.6085, "step": 44560 }, { "epoch": 0.8094217637657998, "grad_norm": 0.7402714511955961, "learning_rate": 1.773675917239852e-06, "loss": 0.5798, "step": 44570 }, { "epoch": 0.8096033706232747, "grad_norm": 0.7666797873754213, "learning_rate": 1.7704005851045335e-06, "loss": 0.6133, "step": 44580 }, { "epoch": 0.8097849774807496, "grad_norm": 0.7641250525458486, "learning_rate": 1.7671279862123303e-06, "loss": 0.5985, "step": 44590 }, { "epoch": 0.8099665843382247, "grad_norm": 0.767042742624577, "learning_rate": 1.7638581216501526e-06, "loss": 0.617, "step": 44600 }, { "epoch": 0.8101481911956996, "grad_norm": 0.7477703932172046, "learning_rate": 1.7605909925039955e-06, "loss": 0.5925, "step": 44610 }, { "epoch": 0.8103297980531745, "grad_norm": 0.7875806410674625, "learning_rate": 1.7573265998589506e-06, "loss": 0.6055, "step": 44620 }, { "epoch": 0.8105114049106494, "grad_norm": 0.7217962618979262, "learning_rate": 1.754064944799203e-06, "loss": 0.6051, "step": 44630 }, { "epoch": 0.8106930117681244, "grad_norm": 0.7825456850634832, "learning_rate": 1.7508060284080186e-06, "loss": 0.6088, "step": 44640 }, { "epoch": 0.8108746186255993, "grad_norm": 0.7404150648339282, "learning_rate": 1.7475498517677669e-06, "loss": 0.6014, "step": 44650 }, { "epoch": 0.8110562254830742, "grad_norm": 0.7576265715632314, "learning_rate": 1.7442964159598941e-06, "loss": 0.5974, "step": 44660 }, { "epoch": 0.8112378323405491, "grad_norm": 0.7865021746012597, "learning_rate": 1.741045722064948e-06, "loss": 0.6166, "step": 44670 }, { "epoch": 0.8114194391980242, "grad_norm": 0.7402872753793972, "learning_rate": 1.737797771162556e-06, "loss": 0.6112, "step": 44680 }, { "epoch": 0.8116010460554991, "grad_norm": 0.7520392292753979, "learning_rate": 1.734552564331442e-06, "loss": 0.6125, "step": 44690 }, { "epoch": 0.811782652912974, "grad_norm": 0.7869599498577355, "learning_rate": 1.7313101026494128e-06, "loss": 0.5944, "step": 44700 }, { "epoch": 0.811964259770449, "grad_norm": 0.7674189406557513, "learning_rate": 1.728070387193369e-06, "loss": 0.5974, "step": 44710 }, { "epoch": 0.8121458666279239, "grad_norm": 0.7749660102987824, "learning_rate": 1.7248334190392924e-06, "loss": 0.6142, "step": 44720 }, { "epoch": 0.8123274734853988, "grad_norm": 0.7703066136612297, "learning_rate": 1.7215991992622606e-06, "loss": 0.6011, "step": 44730 }, { "epoch": 0.8125090803428737, "grad_norm": 0.7417002103042404, "learning_rate": 1.7183677289364309e-06, "loss": 0.6136, "step": 44740 }, { "epoch": 0.8126906872003486, "grad_norm": 0.7444902402711399, "learning_rate": 1.7151390091350484e-06, "loss": 0.6071, "step": 44750 }, { "epoch": 0.8128722940578236, "grad_norm": 0.7620036268657873, "learning_rate": 1.7119130409304519e-06, "loss": 0.6101, "step": 44760 }, { "epoch": 0.8130539009152986, "grad_norm": 0.7536783862522696, "learning_rate": 1.7086898253940565e-06, "loss": 0.6102, "step": 44770 }, { "epoch": 0.8132355077727735, "grad_norm": 0.7521270485421665, "learning_rate": 1.7054693635963715e-06, "loss": 0.6069, "step": 44780 }, { "epoch": 0.8134171146302485, "grad_norm": 0.7509560047498899, "learning_rate": 1.7022516566069846e-06, "loss": 0.6135, "step": 44790 }, { "epoch": 0.8135987214877234, "grad_norm": 0.7431967229655528, "learning_rate": 1.6990367054945756e-06, "loss": 0.6085, "step": 44800 }, { "epoch": 0.8137803283451983, "grad_norm": 0.7546069840045943, "learning_rate": 1.6958245113269e-06, "loss": 0.5883, "step": 44810 }, { "epoch": 0.8139619352026732, "grad_norm": 0.767417716426412, "learning_rate": 1.692615075170808e-06, "loss": 0.6049, "step": 44820 }, { "epoch": 0.8141435420601482, "grad_norm": 0.8104531230827686, "learning_rate": 1.689408398092225e-06, "loss": 0.6037, "step": 44830 }, { "epoch": 0.8143251489176231, "grad_norm": 0.7861271693019676, "learning_rate": 1.6862044811561663e-06, "loss": 0.6067, "step": 44840 }, { "epoch": 0.814506755775098, "grad_norm": 0.7399358097391419, "learning_rate": 1.6830033254267275e-06, "loss": 0.6108, "step": 44850 }, { "epoch": 0.814688362632573, "grad_norm": 0.7553825075175628, "learning_rate": 1.679804931967085e-06, "loss": 0.5975, "step": 44860 }, { "epoch": 0.814869969490048, "grad_norm": 0.7606675172833707, "learning_rate": 1.6766093018395047e-06, "loss": 0.6048, "step": 44870 }, { "epoch": 0.8150515763475229, "grad_norm": 0.7666267828065743, "learning_rate": 1.673416436105324e-06, "loss": 0.6051, "step": 44880 }, { "epoch": 0.8152331832049978, "grad_norm": 0.7669595808351932, "learning_rate": 1.670226335824976e-06, "loss": 0.6026, "step": 44890 }, { "epoch": 0.8154147900624727, "grad_norm": 0.7233728203880738, "learning_rate": 1.667039002057962e-06, "loss": 0.6077, "step": 44900 }, { "epoch": 0.8155963969199477, "grad_norm": 0.7619709563875166, "learning_rate": 1.663854435862875e-06, "loss": 0.612, "step": 44910 }, { "epoch": 0.8157780037774226, "grad_norm": 0.7581556986830876, "learning_rate": 1.6606726382973781e-06, "loss": 0.6074, "step": 44920 }, { "epoch": 0.8159596106348975, "grad_norm": 0.7778458354090645, "learning_rate": 1.6574936104182281e-06, "loss": 0.6071, "step": 44930 }, { "epoch": 0.8161412174923726, "grad_norm": 0.738995011194599, "learning_rate": 1.6543173532812496e-06, "loss": 0.6076, "step": 44940 }, { "epoch": 0.8163228243498475, "grad_norm": 0.8167571097163119, "learning_rate": 1.6511438679413549e-06, "loss": 0.615, "step": 44950 }, { "epoch": 0.8165044312073224, "grad_norm": 0.7674716510312597, "learning_rate": 1.6479731554525336e-06, "loss": 0.6092, "step": 44960 }, { "epoch": 0.8166860380647973, "grad_norm": 0.8094845100073017, "learning_rate": 1.6448052168678485e-06, "loss": 0.6113, "step": 44970 }, { "epoch": 0.8168676449222723, "grad_norm": 0.7619269886998477, "learning_rate": 1.6416400532394528e-06, "loss": 0.6122, "step": 44980 }, { "epoch": 0.8170492517797472, "grad_norm": 0.7779162953308667, "learning_rate": 1.6384776656185664e-06, "loss": 0.6146, "step": 44990 }, { "epoch": 0.8172308586372221, "grad_norm": 0.7571588231679076, "learning_rate": 1.635318055055496e-06, "loss": 0.5899, "step": 45000 }, { "epoch": 0.817412465494697, "grad_norm": 0.8158635919002475, "learning_rate": 1.6321612225996197e-06, "loss": 0.6126, "step": 45010 }, { "epoch": 0.817594072352172, "grad_norm": 0.750518185084995, "learning_rate": 1.629007169299398e-06, "loss": 0.6036, "step": 45020 }, { "epoch": 0.817775679209647, "grad_norm": 0.7705205444319552, "learning_rate": 1.6258558962023662e-06, "loss": 0.59, "step": 45030 }, { "epoch": 0.8179572860671219, "grad_norm": 0.7672255467661828, "learning_rate": 1.622707404355134e-06, "loss": 0.6061, "step": 45040 }, { "epoch": 0.8181388929245968, "grad_norm": 0.7466000013612077, "learning_rate": 1.619561694803392e-06, "loss": 0.6094, "step": 45050 }, { "epoch": 0.8183204997820718, "grad_norm": 0.7801741517838446, "learning_rate": 1.616418768591901e-06, "loss": 0.6057, "step": 45060 }, { "epoch": 0.8185021066395467, "grad_norm": 0.722829742207297, "learning_rate": 1.613278626764504e-06, "loss": 0.6044, "step": 45070 }, { "epoch": 0.8186837134970216, "grad_norm": 0.7926435126355671, "learning_rate": 1.6101412703641128e-06, "loss": 0.6027, "step": 45080 }, { "epoch": 0.8188653203544966, "grad_norm": 0.7589566194513302, "learning_rate": 1.6070067004327195e-06, "loss": 0.6009, "step": 45090 }, { "epoch": 0.8190469272119715, "grad_norm": 0.7202029076588582, "learning_rate": 1.6038749180113855e-06, "loss": 0.6046, "step": 45100 }, { "epoch": 0.8192285340694465, "grad_norm": 0.7559369778459007, "learning_rate": 1.6007459241402523e-06, "loss": 0.6069, "step": 45110 }, { "epoch": 0.8194101409269214, "grad_norm": 0.7783331625779493, "learning_rate": 1.5976197198585297e-06, "loss": 0.6108, "step": 45120 }, { "epoch": 0.8195917477843964, "grad_norm": 0.8076094237497319, "learning_rate": 1.594496306204506e-06, "loss": 0.6155, "step": 45130 }, { "epoch": 0.8197733546418713, "grad_norm": 0.7573066320850887, "learning_rate": 1.591375684215537e-06, "loss": 0.6002, "step": 45140 }, { "epoch": 0.8199549614993462, "grad_norm": 0.7515069996444279, "learning_rate": 1.5882578549280592e-06, "loss": 0.5968, "step": 45150 }, { "epoch": 0.8201365683568211, "grad_norm": 0.7559180360993281, "learning_rate": 1.585142819377573e-06, "loss": 0.6023, "step": 45160 }, { "epoch": 0.8203181752142961, "grad_norm": 0.746852950037934, "learning_rate": 1.5820305785986545e-06, "loss": 0.5981, "step": 45170 }, { "epoch": 0.820499782071771, "grad_norm": 0.7592415641019811, "learning_rate": 1.5789211336249555e-06, "loss": 0.6068, "step": 45180 }, { "epoch": 0.8206813889292459, "grad_norm": 0.7347488686002405, "learning_rate": 1.5758144854891898e-06, "loss": 0.5969, "step": 45190 }, { "epoch": 0.820862995786721, "grad_norm": 0.7833294034835616, "learning_rate": 1.5727106352231558e-06, "loss": 0.6062, "step": 45200 }, { "epoch": 0.8210446026441959, "grad_norm": 0.8065500103611223, "learning_rate": 1.5696095838577074e-06, "loss": 0.6005, "step": 45210 }, { "epoch": 0.8212262095016708, "grad_norm": 0.7480366400875708, "learning_rate": 1.5665113324227831e-06, "loss": 0.5986, "step": 45220 }, { "epoch": 0.8214078163591457, "grad_norm": 0.8055514517678343, "learning_rate": 1.5634158819473789e-06, "loss": 0.6096, "step": 45230 }, { "epoch": 0.8215894232166207, "grad_norm": 0.737294812819905, "learning_rate": 1.560323233459573e-06, "loss": 0.6191, "step": 45240 }, { "epoch": 0.8217710300740956, "grad_norm": 0.7641723831689754, "learning_rate": 1.557233387986502e-06, "loss": 0.6016, "step": 45250 }, { "epoch": 0.8219526369315705, "grad_norm": 0.7477944472990905, "learning_rate": 1.554146346554376e-06, "loss": 0.6015, "step": 45260 }, { "epoch": 0.8221342437890454, "grad_norm": 0.7383427951913821, "learning_rate": 1.5510621101884772e-06, "loss": 0.608, "step": 45270 }, { "epoch": 0.8223158506465205, "grad_norm": 0.7834812508281271, "learning_rate": 1.547980679913148e-06, "loss": 0.5951, "step": 45280 }, { "epoch": 0.8224974575039954, "grad_norm": 0.8118697452353113, "learning_rate": 1.5449020567518091e-06, "loss": 0.6068, "step": 45290 }, { "epoch": 0.8226790643614703, "grad_norm": 0.7697434485750223, "learning_rate": 1.5418262417269391e-06, "loss": 0.6107, "step": 45300 }, { "epoch": 0.8228606712189452, "grad_norm": 0.7481067389358659, "learning_rate": 1.5387532358600922e-06, "loss": 0.6106, "step": 45310 }, { "epoch": 0.8230422780764202, "grad_norm": 0.7868616644301757, "learning_rate": 1.5356830401718815e-06, "loss": 0.5968, "step": 45320 }, { "epoch": 0.8232238849338951, "grad_norm": 0.7859509979344687, "learning_rate": 1.5326156556819948e-06, "loss": 0.6063, "step": 45330 }, { "epoch": 0.82340549179137, "grad_norm": 0.7362842148887591, "learning_rate": 1.5295510834091799e-06, "loss": 0.6053, "step": 45340 }, { "epoch": 0.8235870986488449, "grad_norm": 0.735146155423451, "learning_rate": 1.5264893243712564e-06, "loss": 0.5964, "step": 45350 }, { "epoch": 0.8237687055063199, "grad_norm": 0.7614894696174953, "learning_rate": 1.5234303795851046e-06, "loss": 0.6085, "step": 45360 }, { "epoch": 0.8239503123637949, "grad_norm": 0.7665538855140923, "learning_rate": 1.5203742500666685e-06, "loss": 0.6106, "step": 45370 }, { "epoch": 0.8241319192212698, "grad_norm": 0.7520337499474341, "learning_rate": 1.517320936830966e-06, "loss": 0.6084, "step": 45380 }, { "epoch": 0.8243135260787448, "grad_norm": 0.8370124265749539, "learning_rate": 1.5142704408920695e-06, "loss": 0.6189, "step": 45390 }, { "epoch": 0.8244951329362197, "grad_norm": 0.7450704525145339, "learning_rate": 1.5112227632631216e-06, "loss": 0.5922, "step": 45400 }, { "epoch": 0.8246767397936946, "grad_norm": 0.7805181451426672, "learning_rate": 1.5081779049563317e-06, "loss": 0.6184, "step": 45410 }, { "epoch": 0.8248583466511695, "grad_norm": 0.7422647035709451, "learning_rate": 1.505135866982962e-06, "loss": 0.6024, "step": 45420 }, { "epoch": 0.8250399535086445, "grad_norm": 0.770578154415408, "learning_rate": 1.5020966503533507e-06, "loss": 0.592, "step": 45430 }, { "epoch": 0.8252215603661194, "grad_norm": 0.7330235164695539, "learning_rate": 1.4990602560768886e-06, "loss": 0.605, "step": 45440 }, { "epoch": 0.8254031672235944, "grad_norm": 0.7819935109029226, "learning_rate": 1.4960266851620364e-06, "loss": 0.6101, "step": 45450 }, { "epoch": 0.8255847740810693, "grad_norm": 0.7643814218059038, "learning_rate": 1.4929959386163118e-06, "loss": 0.6094, "step": 45460 }, { "epoch": 0.8257663809385443, "grad_norm": 0.7514764068987838, "learning_rate": 1.4899680174462994e-06, "loss": 0.6042, "step": 45470 }, { "epoch": 0.8259479877960192, "grad_norm": 0.7351973030392731, "learning_rate": 1.4869429226576393e-06, "loss": 0.6086, "step": 45480 }, { "epoch": 0.8261295946534941, "grad_norm": 0.7677604562206056, "learning_rate": 1.4839206552550422e-06, "loss": 0.6037, "step": 45490 }, { "epoch": 0.826311201510969, "grad_norm": 0.7435435257210141, "learning_rate": 1.4809012162422676e-06, "loss": 0.6125, "step": 45500 }, { "epoch": 0.826492808368444, "grad_norm": 0.7668091988348902, "learning_rate": 1.4778846066221465e-06, "loss": 0.6049, "step": 45510 }, { "epoch": 0.8266744152259189, "grad_norm": 0.7963785366844215, "learning_rate": 1.4748708273965628e-06, "loss": 0.6115, "step": 45520 }, { "epoch": 0.8268560220833938, "grad_norm": 0.7621830453312801, "learning_rate": 1.4718598795664673e-06, "loss": 0.6202, "step": 45530 }, { "epoch": 0.8270376289408689, "grad_norm": 0.793571342656397, "learning_rate": 1.4688517641318612e-06, "loss": 0.616, "step": 45540 }, { "epoch": 0.8272192357983438, "grad_norm": 0.7584651648448801, "learning_rate": 1.465846482091815e-06, "loss": 0.6055, "step": 45550 }, { "epoch": 0.8274008426558187, "grad_norm": 0.7456596129636716, "learning_rate": 1.4628440344444516e-06, "loss": 0.5944, "step": 45560 }, { "epoch": 0.8275824495132936, "grad_norm": 0.7555413505063553, "learning_rate": 1.4598444221869524e-06, "loss": 0.6088, "step": 45570 }, { "epoch": 0.8277640563707686, "grad_norm": 0.7466612400486148, "learning_rate": 1.4568476463155623e-06, "loss": 0.6078, "step": 45580 }, { "epoch": 0.8279456632282435, "grad_norm": 0.7854642312455539, "learning_rate": 1.4538537078255777e-06, "loss": 0.6072, "step": 45590 }, { "epoch": 0.8281272700857184, "grad_norm": 0.772431604254803, "learning_rate": 1.4508626077113596e-06, "loss": 0.6097, "step": 45600 }, { "epoch": 0.8283088769431933, "grad_norm": 0.7749300965115206, "learning_rate": 1.447874346966318e-06, "loss": 0.593, "step": 45610 }, { "epoch": 0.8284904838006684, "grad_norm": 0.7226250255272013, "learning_rate": 1.4448889265829291e-06, "loss": 0.612, "step": 45620 }, { "epoch": 0.8286720906581433, "grad_norm": 0.762219138619419, "learning_rate": 1.4419063475527163e-06, "loss": 0.6098, "step": 45630 }, { "epoch": 0.8288536975156182, "grad_norm": 0.7530905545066207, "learning_rate": 1.4389266108662691e-06, "loss": 0.6075, "step": 45640 }, { "epoch": 0.8290353043730931, "grad_norm": 0.752290668920139, "learning_rate": 1.4359497175132252e-06, "loss": 0.6103, "step": 45650 }, { "epoch": 0.8292169112305681, "grad_norm": 0.756924677254634, "learning_rate": 1.4329756684822793e-06, "loss": 0.5988, "step": 45660 }, { "epoch": 0.829398518088043, "grad_norm": 0.7332591068754777, "learning_rate": 1.4300044647611876e-06, "loss": 0.6154, "step": 45670 }, { "epoch": 0.8295801249455179, "grad_norm": 0.7451804278020893, "learning_rate": 1.42703610733675e-06, "loss": 0.6033, "step": 45680 }, { "epoch": 0.8297617318029928, "grad_norm": 0.7807607213262615, "learning_rate": 1.4240705971948343e-06, "loss": 0.607, "step": 45690 }, { "epoch": 0.8299433386604678, "grad_norm": 0.7627524129571159, "learning_rate": 1.4211079353203516e-06, "loss": 0.5999, "step": 45700 }, { "epoch": 0.8301249455179428, "grad_norm": 0.7417085985712628, "learning_rate": 1.4181481226972738e-06, "loss": 0.6007, "step": 45710 }, { "epoch": 0.8303065523754177, "grad_norm": 0.7652558344781752, "learning_rate": 1.415191160308621e-06, "loss": 0.6138, "step": 45720 }, { "epoch": 0.8304881592328927, "grad_norm": 0.766110986338471, "learning_rate": 1.4122370491364757e-06, "loss": 0.609, "step": 45730 }, { "epoch": 0.8306697660903676, "grad_norm": 0.7486829323713908, "learning_rate": 1.4092857901619604e-06, "loss": 0.5999, "step": 45740 }, { "epoch": 0.8308513729478425, "grad_norm": 0.7615874382345634, "learning_rate": 1.406337384365264e-06, "loss": 0.6121, "step": 45750 }, { "epoch": 0.8310329798053174, "grad_norm": 0.753786355939292, "learning_rate": 1.403391832725617e-06, "loss": 0.6001, "step": 45760 }, { "epoch": 0.8312145866627924, "grad_norm": 0.7448237404278752, "learning_rate": 1.4004491362213057e-06, "loss": 0.6052, "step": 45770 }, { "epoch": 0.8313961935202673, "grad_norm": 0.7778500036520022, "learning_rate": 1.3975092958296731e-06, "loss": 0.6316, "step": 45780 }, { "epoch": 0.8315778003777423, "grad_norm": 0.7746699394739536, "learning_rate": 1.3945723125271049e-06, "loss": 0.5999, "step": 45790 }, { "epoch": 0.8317594072352172, "grad_norm": 0.7961686025483308, "learning_rate": 1.3916381872890427e-06, "loss": 0.6123, "step": 45800 }, { "epoch": 0.8319410140926922, "grad_norm": 0.7564645579309625, "learning_rate": 1.3887069210899817e-06, "loss": 0.6059, "step": 45810 }, { "epoch": 0.8321226209501671, "grad_norm": 0.7440488462796607, "learning_rate": 1.385778514903461e-06, "loss": 0.6036, "step": 45820 }, { "epoch": 0.832304227807642, "grad_norm": 0.7933912027668201, "learning_rate": 1.3828529697020744e-06, "loss": 0.6094, "step": 45830 }, { "epoch": 0.8324858346651169, "grad_norm": 0.8032343900403964, "learning_rate": 1.3799302864574627e-06, "loss": 0.606, "step": 45840 }, { "epoch": 0.8326674415225919, "grad_norm": 0.7829101679610465, "learning_rate": 1.3770104661403205e-06, "loss": 0.6151, "step": 45850 }, { "epoch": 0.8328490483800668, "grad_norm": 0.7257021708970014, "learning_rate": 1.3740935097203845e-06, "loss": 0.6022, "step": 45860 }, { "epoch": 0.8330306552375417, "grad_norm": 0.7595638915554797, "learning_rate": 1.3711794181664496e-06, "loss": 0.6155, "step": 45870 }, { "epoch": 0.8332122620950168, "grad_norm": 0.7507519020264165, "learning_rate": 1.3682681924463482e-06, "loss": 0.604, "step": 45880 }, { "epoch": 0.8333938689524917, "grad_norm": 0.7607608880210237, "learning_rate": 1.3653598335269714e-06, "loss": 0.596, "step": 45890 }, { "epoch": 0.8335754758099666, "grad_norm": 0.7642459543282365, "learning_rate": 1.3624543423742497e-06, "loss": 0.6103, "step": 45900 }, { "epoch": 0.8337570826674415, "grad_norm": 0.7416710878472343, "learning_rate": 1.3595517199531693e-06, "loss": 0.6063, "step": 45910 }, { "epoch": 0.8339386895249165, "grad_norm": 0.7769927173559025, "learning_rate": 1.3566519672277545e-06, "loss": 0.6122, "step": 45920 }, { "epoch": 0.8341202963823914, "grad_norm": 0.7574905627986358, "learning_rate": 1.3537550851610858e-06, "loss": 0.5982, "step": 45930 }, { "epoch": 0.8343019032398663, "grad_norm": 0.7634360795399904, "learning_rate": 1.3508610747152817e-06, "loss": 0.6033, "step": 45940 }, { "epoch": 0.8344835100973412, "grad_norm": 0.7681084729935626, "learning_rate": 1.3479699368515142e-06, "loss": 0.6038, "step": 45950 }, { "epoch": 0.8346651169548163, "grad_norm": 0.8082745865479839, "learning_rate": 1.3450816725299964e-06, "loss": 0.6247, "step": 45960 }, { "epoch": 0.8348467238122912, "grad_norm": 0.7614429646626037, "learning_rate": 1.3421962827099865e-06, "loss": 0.6096, "step": 45970 }, { "epoch": 0.8350283306697661, "grad_norm": 0.7260642123470069, "learning_rate": 1.3393137683497958e-06, "loss": 0.5855, "step": 45980 }, { "epoch": 0.835209937527241, "grad_norm": 0.7340872054451397, "learning_rate": 1.3364341304067685e-06, "loss": 0.6173, "step": 45990 }, { "epoch": 0.835391544384716, "grad_norm": 0.7515137513697888, "learning_rate": 1.3335573698373061e-06, "loss": 0.5929, "step": 46000 }, { "epoch": 0.8355731512421909, "grad_norm": 0.7315669409211204, "learning_rate": 1.3306834875968422e-06, "loss": 0.6034, "step": 46010 }, { "epoch": 0.8357547580996658, "grad_norm": 0.7490707029911932, "learning_rate": 1.3278124846398665e-06, "loss": 0.6035, "step": 46020 }, { "epoch": 0.8359363649571407, "grad_norm": 0.8274226369653472, "learning_rate": 1.324944361919901e-06, "loss": 0.6217, "step": 46030 }, { "epoch": 0.8361179718146157, "grad_norm": 0.7845857790486066, "learning_rate": 1.3220791203895222e-06, "loss": 0.6113, "step": 46040 }, { "epoch": 0.8362995786720907, "grad_norm": 0.7877630024460193, "learning_rate": 1.3192167610003404e-06, "loss": 0.6057, "step": 46050 }, { "epoch": 0.8364811855295656, "grad_norm": 0.791174535335336, "learning_rate": 1.3163572847030103e-06, "loss": 0.6109, "step": 46060 }, { "epoch": 0.8366627923870406, "grad_norm": 0.7695798454314391, "learning_rate": 1.3135006924472372e-06, "loss": 0.6069, "step": 46070 }, { "epoch": 0.8368443992445155, "grad_norm": 0.7614617143515143, "learning_rate": 1.310646985181756e-06, "loss": 0.6053, "step": 46080 }, { "epoch": 0.8370260061019904, "grad_norm": 0.7243389464294908, "learning_rate": 1.3077961638543546e-06, "loss": 0.6018, "step": 46090 }, { "epoch": 0.8372076129594653, "grad_norm": 0.7242386955769279, "learning_rate": 1.3049482294118553e-06, "loss": 0.6022, "step": 46100 }, { "epoch": 0.8373892198169403, "grad_norm": 0.7812125947984032, "learning_rate": 1.3021031828001252e-06, "loss": 0.6098, "step": 46110 }, { "epoch": 0.8375708266744152, "grad_norm": 0.7602798201361457, "learning_rate": 1.299261024964069e-06, "loss": 0.6288, "step": 46120 }, { "epoch": 0.8377524335318902, "grad_norm": 0.769346383801071, "learning_rate": 1.2964217568476379e-06, "loss": 0.6028, "step": 46130 }, { "epoch": 0.8379340403893651, "grad_norm": 0.7554247208045762, "learning_rate": 1.2935853793938146e-06, "loss": 0.6126, "step": 46140 }, { "epoch": 0.8381156472468401, "grad_norm": 0.7385263428105249, "learning_rate": 1.2907518935446317e-06, "loss": 0.5939, "step": 46150 }, { "epoch": 0.838297254104315, "grad_norm": 0.7471023208519229, "learning_rate": 1.2879213002411538e-06, "loss": 0.5994, "step": 46160 }, { "epoch": 0.8384788609617899, "grad_norm": 0.74770994346501, "learning_rate": 1.2850936004234848e-06, "loss": 0.6044, "step": 46170 }, { "epoch": 0.8386604678192648, "grad_norm": 0.7628648105786163, "learning_rate": 1.2822687950307744e-06, "loss": 0.5989, "step": 46180 }, { "epoch": 0.8388420746767398, "grad_norm": 0.7455747244333708, "learning_rate": 1.2794468850012044e-06, "loss": 0.6079, "step": 46190 }, { "epoch": 0.8390236815342147, "grad_norm": 0.7804517360758885, "learning_rate": 1.276627871271997e-06, "loss": 0.606, "step": 46200 }, { "epoch": 0.8392052883916896, "grad_norm": 0.7701084921169288, "learning_rate": 1.273811754779416e-06, "loss": 0.6045, "step": 46210 }, { "epoch": 0.8393868952491647, "grad_norm": 0.7357699471318754, "learning_rate": 1.2709985364587551e-06, "loss": 0.6001, "step": 46220 }, { "epoch": 0.8395685021066396, "grad_norm": 0.7540225413965421, "learning_rate": 1.268188217244355e-06, "loss": 0.6192, "step": 46230 }, { "epoch": 0.8397501089641145, "grad_norm": 0.7612477866413035, "learning_rate": 1.2653807980695855e-06, "loss": 0.6079, "step": 46240 }, { "epoch": 0.8399317158215894, "grad_norm": 0.7753348328629496, "learning_rate": 1.2625762798668574e-06, "loss": 0.5979, "step": 46250 }, { "epoch": 0.8401133226790644, "grad_norm": 0.7944717229282624, "learning_rate": 1.2597746635676155e-06, "loss": 0.599, "step": 46260 }, { "epoch": 0.8402949295365393, "grad_norm": 0.7553976819948377, "learning_rate": 1.256975950102346e-06, "loss": 0.6093, "step": 46270 }, { "epoch": 0.8404765363940142, "grad_norm": 0.7367746564543207, "learning_rate": 1.2541801404005637e-06, "loss": 0.5931, "step": 46280 }, { "epoch": 0.8406581432514891, "grad_norm": 0.7996080866675169, "learning_rate": 1.2513872353908252e-06, "loss": 0.6036, "step": 46290 }, { "epoch": 0.8408397501089642, "grad_norm": 0.7597724559629313, "learning_rate": 1.2485972360007159e-06, "loss": 0.5967, "step": 46300 }, { "epoch": 0.8410213569664391, "grad_norm": 0.8099024091434388, "learning_rate": 1.245810143156866e-06, "loss": 0.5926, "step": 46310 }, { "epoch": 0.841202963823914, "grad_norm": 0.7298211045867223, "learning_rate": 1.2430259577849279e-06, "loss": 0.5975, "step": 46320 }, { "epoch": 0.8413845706813889, "grad_norm": 0.7584178946668593, "learning_rate": 1.2402446808096014e-06, "loss": 0.6162, "step": 46330 }, { "epoch": 0.8415661775388639, "grad_norm": 0.7756051773613708, "learning_rate": 1.2374663131546071e-06, "loss": 0.602, "step": 46340 }, { "epoch": 0.8417477843963388, "grad_norm": 0.7536897011021936, "learning_rate": 1.2346908557427128e-06, "loss": 0.6023, "step": 46350 }, { "epoch": 0.8419293912538137, "grad_norm": 0.7388826225205428, "learning_rate": 1.23191830949571e-06, "loss": 0.6107, "step": 46360 }, { "epoch": 0.8421109981112886, "grad_norm": 0.7426799912839711, "learning_rate": 1.229148675334424e-06, "loss": 0.5961, "step": 46370 }, { "epoch": 0.8422926049687636, "grad_norm": 0.7313061879938568, "learning_rate": 1.2263819541787193e-06, "loss": 0.5913, "step": 46380 }, { "epoch": 0.8424742118262386, "grad_norm": 0.7435987153514182, "learning_rate": 1.2236181469474851e-06, "loss": 0.6039, "step": 46390 }, { "epoch": 0.8426558186837135, "grad_norm": 0.769349863939767, "learning_rate": 1.2208572545586506e-06, "loss": 0.6106, "step": 46400 }, { "epoch": 0.8428374255411885, "grad_norm": 0.7525780991296108, "learning_rate": 1.21809927792917e-06, "loss": 0.6067, "step": 46410 }, { "epoch": 0.8430190323986634, "grad_norm": 0.7582475000418681, "learning_rate": 1.215344217975034e-06, "loss": 0.6054, "step": 46420 }, { "epoch": 0.8432006392561383, "grad_norm": 0.8215577082899334, "learning_rate": 1.2125920756112609e-06, "loss": 0.6186, "step": 46430 }, { "epoch": 0.8433822461136132, "grad_norm": 0.7255676888795436, "learning_rate": 1.2098428517519045e-06, "loss": 0.6044, "step": 46440 }, { "epoch": 0.8435638529710882, "grad_norm": 0.7429480170117697, "learning_rate": 1.2070965473100449e-06, "loss": 0.6017, "step": 46450 }, { "epoch": 0.8437454598285631, "grad_norm": 0.7993894651084388, "learning_rate": 1.2043531631977912e-06, "loss": 0.6131, "step": 46460 }, { "epoch": 0.8439270666860381, "grad_norm": 0.764250603960519, "learning_rate": 1.2016127003262923e-06, "loss": 0.5964, "step": 46470 }, { "epoch": 0.844108673543513, "grad_norm": 0.7300700476091184, "learning_rate": 1.1988751596057135e-06, "loss": 0.6097, "step": 46480 }, { "epoch": 0.844290280400988, "grad_norm": 0.7540898324400989, "learning_rate": 1.1961405419452609e-06, "loss": 0.5887, "step": 46490 }, { "epoch": 0.8444718872584629, "grad_norm": 0.8019611579523442, "learning_rate": 1.1934088482531625e-06, "loss": 0.6024, "step": 46500 }, { "epoch": 0.8446534941159378, "grad_norm": 0.7819975362993454, "learning_rate": 1.1906800794366812e-06, "loss": 0.6186, "step": 46510 }, { "epoch": 0.8448351009734127, "grad_norm": 0.7114007535926536, "learning_rate": 1.1879542364021002e-06, "loss": 0.6002, "step": 46520 }, { "epoch": 0.8450167078308877, "grad_norm": 0.7627699077875955, "learning_rate": 1.1852313200547416e-06, "loss": 0.6024, "step": 46530 }, { "epoch": 0.8451983146883626, "grad_norm": 0.7408499954403502, "learning_rate": 1.1825113312989444e-06, "loss": 0.6033, "step": 46540 }, { "epoch": 0.8453799215458375, "grad_norm": 0.7242219620062257, "learning_rate": 1.179794271038086e-06, "loss": 0.594, "step": 46550 }, { "epoch": 0.8455615284033126, "grad_norm": 0.7716722575535309, "learning_rate": 1.1770801401745624e-06, "loss": 0.6066, "step": 46560 }, { "epoch": 0.8457431352607875, "grad_norm": 0.7478645766046583, "learning_rate": 1.1743689396098002e-06, "loss": 0.5926, "step": 46570 }, { "epoch": 0.8459247421182624, "grad_norm": 0.7574406349631279, "learning_rate": 1.1716606702442546e-06, "loss": 0.5999, "step": 46580 }, { "epoch": 0.8461063489757373, "grad_norm": 0.7404694795127527, "learning_rate": 1.1689553329774018e-06, "loss": 0.6091, "step": 46590 }, { "epoch": 0.8462879558332123, "grad_norm": 0.73599778791135, "learning_rate": 1.1662529287077505e-06, "loss": 0.5929, "step": 46600 }, { "epoch": 0.8464695626906872, "grad_norm": 0.7355872516607286, "learning_rate": 1.1635534583328356e-06, "loss": 0.607, "step": 46610 }, { "epoch": 0.8466511695481621, "grad_norm": 0.8106415568280255, "learning_rate": 1.1608569227492085e-06, "loss": 0.6068, "step": 46620 }, { "epoch": 0.846832776405637, "grad_norm": 0.7675318813691869, "learning_rate": 1.1581633228524568e-06, "loss": 0.5904, "step": 46630 }, { "epoch": 0.8470143832631121, "grad_norm": 0.7606574011970472, "learning_rate": 1.1554726595371845e-06, "loss": 0.5975, "step": 46640 }, { "epoch": 0.847195990120587, "grad_norm": 0.7870349459572205, "learning_rate": 1.1527849336970275e-06, "loss": 0.6038, "step": 46650 }, { "epoch": 0.8473775969780619, "grad_norm": 0.7502986209144342, "learning_rate": 1.1501001462246398e-06, "loss": 0.5916, "step": 46660 }, { "epoch": 0.8475592038355368, "grad_norm": 0.7310931342065532, "learning_rate": 1.1474182980117044e-06, "loss": 0.6094, "step": 46670 }, { "epoch": 0.8477408106930118, "grad_norm": 0.7571301017345503, "learning_rate": 1.1447393899489245e-06, "loss": 0.5936, "step": 46680 }, { "epoch": 0.8479224175504867, "grad_norm": 0.8330788334322222, "learning_rate": 1.1420634229260297e-06, "loss": 0.608, "step": 46690 }, { "epoch": 0.8481040244079616, "grad_norm": 0.7735669504449486, "learning_rate": 1.1393903978317688e-06, "loss": 0.6081, "step": 46700 }, { "epoch": 0.8482856312654365, "grad_norm": 0.7622773504084177, "learning_rate": 1.1367203155539208e-06, "loss": 0.5803, "step": 46710 }, { "epoch": 0.8484672381229115, "grad_norm": 0.7396854461763522, "learning_rate": 1.1340531769792773e-06, "loss": 0.6005, "step": 46720 }, { "epoch": 0.8486488449803865, "grad_norm": 0.7554518572419501, "learning_rate": 1.131388982993663e-06, "loss": 0.6133, "step": 46730 }, { "epoch": 0.8488304518378614, "grad_norm": 0.7637667701709474, "learning_rate": 1.1287277344819136e-06, "loss": 0.5997, "step": 46740 }, { "epoch": 0.8490120586953364, "grad_norm": 0.7612414055601809, "learning_rate": 1.1260694323278987e-06, "loss": 0.6042, "step": 46750 }, { "epoch": 0.8491936655528113, "grad_norm": 0.775777989629281, "learning_rate": 1.1234140774144975e-06, "loss": 0.6009, "step": 46760 }, { "epoch": 0.8493752724102862, "grad_norm": 0.7730687377237452, "learning_rate": 1.1207616706236168e-06, "loss": 0.6078, "step": 46770 }, { "epoch": 0.8495568792677611, "grad_norm": 0.8044386718978153, "learning_rate": 1.1181122128361854e-06, "loss": 0.6114, "step": 46780 }, { "epoch": 0.8497384861252361, "grad_norm": 0.7564202472586765, "learning_rate": 1.115465704932146e-06, "loss": 0.6197, "step": 46790 }, { "epoch": 0.849920092982711, "grad_norm": 0.9707937549999947, "learning_rate": 1.1128221477904723e-06, "loss": 0.6073, "step": 46800 }, { "epoch": 0.850101699840186, "grad_norm": 0.7255274206498579, "learning_rate": 1.1101815422891448e-06, "loss": 0.6059, "step": 46810 }, { "epoch": 0.850283306697661, "grad_norm": 0.722595859725567, "learning_rate": 1.1075438893051771e-06, "loss": 0.609, "step": 46820 }, { "epoch": 0.8504649135551359, "grad_norm": 0.769238270330243, "learning_rate": 1.1049091897145892e-06, "loss": 0.6045, "step": 46830 }, { "epoch": 0.8506465204126108, "grad_norm": 0.7529528550355656, "learning_rate": 1.1022774443924333e-06, "loss": 0.6059, "step": 46840 }, { "epoch": 0.8508281272700857, "grad_norm": 0.7678030751320154, "learning_rate": 1.0996486542127682e-06, "loss": 0.5984, "step": 46850 }, { "epoch": 0.8510097341275606, "grad_norm": 0.754228699453907, "learning_rate": 1.097022820048681e-06, "loss": 0.6118, "step": 46860 }, { "epoch": 0.8511913409850356, "grad_norm": 0.7933136130510405, "learning_rate": 1.0943999427722707e-06, "loss": 0.6037, "step": 46870 }, { "epoch": 0.8513729478425105, "grad_norm": 0.7944403682621036, "learning_rate": 1.0917800232546538e-06, "loss": 0.606, "step": 46880 }, { "epoch": 0.8515545546999854, "grad_norm": 0.7425266784867043, "learning_rate": 1.0891630623659732e-06, "loss": 0.6175, "step": 46890 }, { "epoch": 0.8517361615574605, "grad_norm": 0.7512013946396576, "learning_rate": 1.086549060975377e-06, "loss": 0.5956, "step": 46900 }, { "epoch": 0.8519177684149354, "grad_norm": 0.7544506454976909, "learning_rate": 1.0839380199510419e-06, "loss": 0.6021, "step": 46910 }, { "epoch": 0.8520993752724103, "grad_norm": 0.7837099898298056, "learning_rate": 1.0813299401601507e-06, "loss": 0.5967, "step": 46920 }, { "epoch": 0.8522809821298852, "grad_norm": 0.769531365712162, "learning_rate": 1.0787248224689128e-06, "loss": 0.6092, "step": 46930 }, { "epoch": 0.8524625889873602, "grad_norm": 0.7292753348389845, "learning_rate": 1.076122667742544e-06, "loss": 0.607, "step": 46940 }, { "epoch": 0.8526441958448351, "grad_norm": 0.746751690444065, "learning_rate": 1.0735234768452862e-06, "loss": 0.6028, "step": 46950 }, { "epoch": 0.85282580270231, "grad_norm": 0.7810554010069936, "learning_rate": 1.0709272506403878e-06, "loss": 0.6043, "step": 46960 }, { "epoch": 0.8530074095597849, "grad_norm": 0.7859990700410122, "learning_rate": 1.0683339899901169e-06, "loss": 0.5987, "step": 46970 }, { "epoch": 0.85318901641726, "grad_norm": 0.7950619199960635, "learning_rate": 1.0657436957557588e-06, "loss": 0.6092, "step": 46980 }, { "epoch": 0.8533706232747349, "grad_norm": 0.7930246248623071, "learning_rate": 1.0631563687976066e-06, "loss": 0.5968, "step": 46990 }, { "epoch": 0.8535522301322098, "grad_norm": 0.7670510675249508, "learning_rate": 1.060572009974975e-06, "loss": 0.6155, "step": 47000 }, { "epoch": 0.8537338369896847, "grad_norm": 0.7556526600640343, "learning_rate": 1.057990620146192e-06, "loss": 0.5994, "step": 47010 }, { "epoch": 0.8539154438471597, "grad_norm": 0.7608382436801863, "learning_rate": 1.055412200168594e-06, "loss": 0.6114, "step": 47020 }, { "epoch": 0.8540970507046346, "grad_norm": 0.8028892513534569, "learning_rate": 1.0528367508985383e-06, "loss": 0.6017, "step": 47030 }, { "epoch": 0.8542786575621095, "grad_norm": 0.7617343140875255, "learning_rate": 1.0502642731913882e-06, "loss": 0.6092, "step": 47040 }, { "epoch": 0.8544602644195844, "grad_norm": 0.7617205033853321, "learning_rate": 1.0476947679015282e-06, "loss": 0.605, "step": 47050 }, { "epoch": 0.8546418712770594, "grad_norm": 0.7807272534521028, "learning_rate": 1.0451282358823479e-06, "loss": 0.6183, "step": 47060 }, { "epoch": 0.8548234781345344, "grad_norm": 0.7395704233855511, "learning_rate": 1.0425646779862554e-06, "loss": 0.6008, "step": 47070 }, { "epoch": 0.8550050849920093, "grad_norm": 0.7527787723816234, "learning_rate": 1.0400040950646662e-06, "loss": 0.6088, "step": 47080 }, { "epoch": 0.8551866918494843, "grad_norm": 0.7409276818622944, "learning_rate": 1.0374464879680123e-06, "loss": 0.6082, "step": 47090 }, { "epoch": 0.8553682987069592, "grad_norm": 0.7428384144401213, "learning_rate": 1.034891857545731e-06, "loss": 0.6166, "step": 47100 }, { "epoch": 0.8555499055644341, "grad_norm": 0.7932383118474432, "learning_rate": 1.0323402046462804e-06, "loss": 0.612, "step": 47110 }, { "epoch": 0.855731512421909, "grad_norm": 0.7495911196044952, "learning_rate": 1.0297915301171201e-06, "loss": 0.603, "step": 47120 }, { "epoch": 0.855913119279384, "grad_norm": 0.7824893216280085, "learning_rate": 1.0272458348047276e-06, "loss": 0.6157, "step": 47130 }, { "epoch": 0.8560947261368589, "grad_norm": 0.7819001983490946, "learning_rate": 1.0247031195545854e-06, "loss": 0.6183, "step": 47140 }, { "epoch": 0.8562763329943338, "grad_norm": 0.7482921918326374, "learning_rate": 1.0221633852111911e-06, "loss": 0.617, "step": 47150 }, { "epoch": 0.8564579398518088, "grad_norm": 0.7627916596325393, "learning_rate": 1.0196266326180505e-06, "loss": 0.5963, "step": 47160 }, { "epoch": 0.8566395467092838, "grad_norm": 0.7491278775483206, "learning_rate": 1.0170928626176745e-06, "loss": 0.6079, "step": 47170 }, { "epoch": 0.8568211535667587, "grad_norm": 0.7589579802052714, "learning_rate": 1.0145620760515928e-06, "loss": 0.5951, "step": 47180 }, { "epoch": 0.8570027604242336, "grad_norm": 0.7689596401296745, "learning_rate": 1.012034273760335e-06, "loss": 0.6009, "step": 47190 }, { "epoch": 0.8571843672817085, "grad_norm": 0.757398659329747, "learning_rate": 1.0095094565834473e-06, "loss": 0.6007, "step": 47200 }, { "epoch": 0.8573659741391835, "grad_norm": 0.7604928043118948, "learning_rate": 1.0069876253594767e-06, "loss": 0.5981, "step": 47210 }, { "epoch": 0.8575475809966584, "grad_norm": 0.7552878897275472, "learning_rate": 1.0044687809259867e-06, "loss": 0.6127, "step": 47220 }, { "epoch": 0.8577291878541333, "grad_norm": 0.7804064452863361, "learning_rate": 1.0019529241195403e-06, "loss": 0.6042, "step": 47230 }, { "epoch": 0.8579107947116084, "grad_norm": 0.7724314565499104, "learning_rate": 9.994400557757166e-07, "loss": 0.613, "step": 47240 }, { "epoch": 0.8580924015690833, "grad_norm": 0.7486877804484524, "learning_rate": 9.96930176729094e-07, "loss": 0.5992, "step": 47250 }, { "epoch": 0.8582740084265582, "grad_norm": 0.7156401599986569, "learning_rate": 9.944232878132675e-07, "loss": 0.5978, "step": 47260 }, { "epoch": 0.8584556152840331, "grad_norm": 0.7535485060551712, "learning_rate": 9.919193898608304e-07, "loss": 0.6029, "step": 47270 }, { "epoch": 0.8586372221415081, "grad_norm": 0.811539817996338, "learning_rate": 9.894184837033838e-07, "loss": 0.6058, "step": 47280 }, { "epoch": 0.858818828998983, "grad_norm": 0.748950834823584, "learning_rate": 9.869205701715423e-07, "loss": 0.6064, "step": 47290 }, { "epoch": 0.8590004358564579, "grad_norm": 0.7625983364857367, "learning_rate": 9.844256500949178e-07, "loss": 0.6083, "step": 47300 }, { "epoch": 0.8591820427139328, "grad_norm": 0.7616140190113327, "learning_rate": 9.819337243021354e-07, "loss": 0.5932, "step": 47310 }, { "epoch": 0.8593636495714078, "grad_norm": 0.7623401873885904, "learning_rate": 9.794447936208174e-07, "loss": 0.6014, "step": 47320 }, { "epoch": 0.8595452564288828, "grad_norm": 0.7753651571793474, "learning_rate": 9.769588588776013e-07, "loss": 0.6103, "step": 47330 }, { "epoch": 0.8597268632863577, "grad_norm": 0.7252643234797767, "learning_rate": 9.744759208981192e-07, "loss": 0.6033, "step": 47340 }, { "epoch": 0.8599084701438326, "grad_norm": 0.7517973542163523, "learning_rate": 9.719959805070178e-07, "loss": 0.6229, "step": 47350 }, { "epoch": 0.8600900770013076, "grad_norm": 0.791350238063778, "learning_rate": 9.695190385279419e-07, "loss": 0.627, "step": 47360 }, { "epoch": 0.8602716838587825, "grad_norm": 0.7840431566844855, "learning_rate": 9.670450957835398e-07, "loss": 0.6046, "step": 47370 }, { "epoch": 0.8604532907162574, "grad_norm": 0.7432490765213231, "learning_rate": 9.645741530954689e-07, "loss": 0.6133, "step": 47380 }, { "epoch": 0.8606348975737323, "grad_norm": 0.7756925626345617, "learning_rate": 9.621062112843837e-07, "loss": 0.6005, "step": 47390 }, { "epoch": 0.8608165044312073, "grad_norm": 0.759440258764908, "learning_rate": 9.596412711699487e-07, "loss": 0.6045, "step": 47400 }, { "epoch": 0.8609981112886823, "grad_norm": 0.7694771170814524, "learning_rate": 9.57179333570829e-07, "loss": 0.6117, "step": 47410 }, { "epoch": 0.8611797181461572, "grad_norm": 0.7678811136265823, "learning_rate": 9.547203993046893e-07, "loss": 0.6069, "step": 47420 }, { "epoch": 0.8613613250036322, "grad_norm": 0.75732314245439, "learning_rate": 9.522644691882022e-07, "loss": 0.5994, "step": 47430 }, { "epoch": 0.8615429318611071, "grad_norm": 0.7350395281045633, "learning_rate": 9.498115440370359e-07, "loss": 0.5896, "step": 47440 }, { "epoch": 0.861724538718582, "grad_norm": 0.753513988164978, "learning_rate": 9.47361624665869e-07, "loss": 0.597, "step": 47450 }, { "epoch": 0.8619061455760569, "grad_norm": 0.7439555811199079, "learning_rate": 9.449147118883728e-07, "loss": 0.5976, "step": 47460 }, { "epoch": 0.8620877524335319, "grad_norm": 0.8012197505570235, "learning_rate": 9.424708065172283e-07, "loss": 0.6234, "step": 47470 }, { "epoch": 0.8622693592910068, "grad_norm": 0.7892887848128739, "learning_rate": 9.400299093641097e-07, "loss": 0.6059, "step": 47480 }, { "epoch": 0.8624509661484817, "grad_norm": 0.7952633651825813, "learning_rate": 9.375920212397016e-07, "loss": 0.6119, "step": 47490 }, { "epoch": 0.8626325730059567, "grad_norm": 0.7212879097665951, "learning_rate": 9.351571429536777e-07, "loss": 0.6114, "step": 47500 }, { "epoch": 0.8628141798634317, "grad_norm": 0.7767582609803537, "learning_rate": 9.327252753147243e-07, "loss": 0.6031, "step": 47510 }, { "epoch": 0.8629957867209066, "grad_norm": 0.7559234416531312, "learning_rate": 9.302964191305152e-07, "loss": 0.6076, "step": 47520 }, { "epoch": 0.8631773935783815, "grad_norm": 0.7182782632659969, "learning_rate": 9.278705752077377e-07, "loss": 0.5944, "step": 47530 }, { "epoch": 0.8633590004358564, "grad_norm": 0.781830344859526, "learning_rate": 9.254477443520648e-07, "loss": 0.6044, "step": 47540 }, { "epoch": 0.8635406072933314, "grad_norm": 0.7704396497834572, "learning_rate": 9.230279273681819e-07, "loss": 0.5999, "step": 47550 }, { "epoch": 0.8637222141508063, "grad_norm": 0.7335631137305684, "learning_rate": 9.206111250597627e-07, "loss": 0.5954, "step": 47560 }, { "epoch": 0.8639038210082812, "grad_norm": 0.7494704051434405, "learning_rate": 9.18197338229484e-07, "loss": 0.609, "step": 47570 }, { "epoch": 0.8640854278657563, "grad_norm": 0.800835547767644, "learning_rate": 9.157865676790223e-07, "loss": 0.5874, "step": 47580 }, { "epoch": 0.8642670347232312, "grad_norm": 0.7431658087623555, "learning_rate": 9.133788142090505e-07, "loss": 0.6106, "step": 47590 }, { "epoch": 0.8644486415807061, "grad_norm": 0.749613807625859, "learning_rate": 9.109740786192422e-07, "loss": 0.6045, "step": 47600 }, { "epoch": 0.864630248438181, "grad_norm": 0.7976821563418012, "learning_rate": 9.08572361708262e-07, "loss": 0.5936, "step": 47610 }, { "epoch": 0.864811855295656, "grad_norm": 0.7271082001661672, "learning_rate": 9.061736642737818e-07, "loss": 0.595, "step": 47620 }, { "epoch": 0.8649934621531309, "grad_norm": 0.7697594588691059, "learning_rate": 9.037779871124596e-07, "loss": 0.607, "step": 47630 }, { "epoch": 0.8651750690106058, "grad_norm": 0.7557477412485838, "learning_rate": 9.013853310199616e-07, "loss": 0.6029, "step": 47640 }, { "epoch": 0.8653566758680807, "grad_norm": 0.7814463929124721, "learning_rate": 8.989956967909408e-07, "loss": 0.6072, "step": 47650 }, { "epoch": 0.8655382827255557, "grad_norm": 0.7735533388757478, "learning_rate": 8.966090852190545e-07, "loss": 0.6046, "step": 47660 }, { "epoch": 0.8657198895830307, "grad_norm": 0.7624543116019231, "learning_rate": 8.942254970969511e-07, "loss": 0.5908, "step": 47670 }, { "epoch": 0.8659014964405056, "grad_norm": 0.7328388482226803, "learning_rate": 8.918449332162726e-07, "loss": 0.6093, "step": 47680 }, { "epoch": 0.8660831032979805, "grad_norm": 0.7909279013406909, "learning_rate": 8.894673943676646e-07, "loss": 0.6182, "step": 47690 }, { "epoch": 0.8662647101554555, "grad_norm": 0.7516651471406924, "learning_rate": 8.870928813407609e-07, "loss": 0.5944, "step": 47700 }, { "epoch": 0.8664463170129304, "grad_norm": 0.7489261365589478, "learning_rate": 8.847213949241961e-07, "loss": 0.6117, "step": 47710 }, { "epoch": 0.8666279238704053, "grad_norm": 0.756610479900079, "learning_rate": 8.823529359055926e-07, "loss": 0.614, "step": 47720 }, { "epoch": 0.8668095307278803, "grad_norm": 0.7621312063035002, "learning_rate": 8.799875050715745e-07, "loss": 0.5834, "step": 47730 }, { "epoch": 0.8669911375853552, "grad_norm": 0.7424104904506483, "learning_rate": 8.776251032077543e-07, "loss": 0.6108, "step": 47740 }, { "epoch": 0.8671727444428302, "grad_norm": 0.7647007020244536, "learning_rate": 8.752657310987445e-07, "loss": 0.6132, "step": 47750 }, { "epoch": 0.8673543513003051, "grad_norm": 0.7472984732198961, "learning_rate": 8.729093895281448e-07, "loss": 0.6101, "step": 47760 }, { "epoch": 0.8675359581577801, "grad_norm": 0.7567739921912671, "learning_rate": 8.705560792785517e-07, "loss": 0.5876, "step": 47770 }, { "epoch": 0.867717565015255, "grad_norm": 0.7442579018310277, "learning_rate": 8.682058011315564e-07, "loss": 0.5956, "step": 47780 }, { "epoch": 0.8678991718727299, "grad_norm": 0.7697398009440654, "learning_rate": 8.65858555867739e-07, "loss": 0.6163, "step": 47790 }, { "epoch": 0.8680807787302048, "grad_norm": 0.7144196788632393, "learning_rate": 8.635143442666771e-07, "loss": 0.6091, "step": 47800 }, { "epoch": 0.8682623855876798, "grad_norm": 0.7331716749281931, "learning_rate": 8.611731671069389e-07, "loss": 0.6087, "step": 47810 }, { "epoch": 0.8684439924451547, "grad_norm": 0.7240134891205644, "learning_rate": 8.588350251660815e-07, "loss": 0.6158, "step": 47820 }, { "epoch": 0.8686255993026296, "grad_norm": 0.7544970449029366, "learning_rate": 8.564999192206602e-07, "loss": 0.6039, "step": 47830 }, { "epoch": 0.8688072061601047, "grad_norm": 0.7237420380882149, "learning_rate": 8.541678500462147e-07, "loss": 0.5965, "step": 47840 }, { "epoch": 0.8689888130175796, "grad_norm": 0.7793499306997083, "learning_rate": 8.518388184172832e-07, "loss": 0.6003, "step": 47850 }, { "epoch": 0.8691704198750545, "grad_norm": 0.7594397625533995, "learning_rate": 8.495128251073881e-07, "loss": 0.6091, "step": 47860 }, { "epoch": 0.8693520267325294, "grad_norm": 0.7492955516922264, "learning_rate": 8.471898708890514e-07, "loss": 0.5914, "step": 47870 }, { "epoch": 0.8695336335900044, "grad_norm": 0.7294046205455741, "learning_rate": 8.448699565337759e-07, "loss": 0.5942, "step": 47880 }, { "epoch": 0.8697152404474793, "grad_norm": 0.7446107703136602, "learning_rate": 8.425530828120631e-07, "loss": 0.5961, "step": 47890 }, { "epoch": 0.8698968473049542, "grad_norm": 0.7932297677207725, "learning_rate": 8.402392504933987e-07, "loss": 0.6081, "step": 47900 }, { "epoch": 0.8700784541624291, "grad_norm": 0.7857089987010543, "learning_rate": 8.379284603462623e-07, "loss": 0.6191, "step": 47910 }, { "epoch": 0.8702600610199042, "grad_norm": 0.7597873077999314, "learning_rate": 8.3562071313812e-07, "loss": 0.5965, "step": 47920 }, { "epoch": 0.8704416678773791, "grad_norm": 0.755312726136528, "learning_rate": 8.33316009635431e-07, "loss": 0.6075, "step": 47930 }, { "epoch": 0.870623274734854, "grad_norm": 0.7212638053932224, "learning_rate": 8.310143506036384e-07, "loss": 0.6065, "step": 47940 }, { "epoch": 0.8708048815923289, "grad_norm": 0.7494693739651167, "learning_rate": 8.287157368071819e-07, "loss": 0.602, "step": 47950 }, { "epoch": 0.8709864884498039, "grad_norm": 0.824002104051019, "learning_rate": 8.264201690094809e-07, "loss": 0.6045, "step": 47960 }, { "epoch": 0.8711680953072788, "grad_norm": 0.7222529348227452, "learning_rate": 8.241276479729465e-07, "loss": 0.5942, "step": 47970 }, { "epoch": 0.8713497021647537, "grad_norm": 0.7773551728968554, "learning_rate": 8.218381744589843e-07, "loss": 0.6047, "step": 47980 }, { "epoch": 0.8715313090222286, "grad_norm": 0.7491387825749664, "learning_rate": 8.195517492279759e-07, "loss": 0.6054, "step": 47990 }, { "epoch": 0.8717129158797036, "grad_norm": 0.7783893725418312, "learning_rate": 8.172683730393005e-07, "loss": 0.6138, "step": 48000 }, { "epoch": 0.8718945227371786, "grad_norm": 0.7705385531167535, "learning_rate": 8.149880466513194e-07, "loss": 0.6133, "step": 48010 }, { "epoch": 0.8720761295946535, "grad_norm": 0.7960522789157416, "learning_rate": 8.127107708213844e-07, "loss": 0.5989, "step": 48020 }, { "epoch": 0.8722577364521285, "grad_norm": 0.7202962191965004, "learning_rate": 8.104365463058294e-07, "loss": 0.6051, "step": 48030 }, { "epoch": 0.8724393433096034, "grad_norm": 0.764475356278517, "learning_rate": 8.081653738599816e-07, "loss": 0.5859, "step": 48040 }, { "epoch": 0.8726209501670783, "grad_norm": 0.7294267690171113, "learning_rate": 8.05897254238146e-07, "loss": 0.6035, "step": 48050 }, { "epoch": 0.8728025570245532, "grad_norm": 0.7228725085606621, "learning_rate": 8.036321881936226e-07, "loss": 0.5874, "step": 48060 }, { "epoch": 0.8729841638820282, "grad_norm": 0.816493576984029, "learning_rate": 8.013701764786919e-07, "loss": 0.6136, "step": 48070 }, { "epoch": 0.8731657707395031, "grad_norm": 0.7654841331078336, "learning_rate": 7.991112198446183e-07, "loss": 0.6165, "step": 48080 }, { "epoch": 0.8733473775969781, "grad_norm": 0.736721426969504, "learning_rate": 7.968553190416573e-07, "loss": 0.5861, "step": 48090 }, { "epoch": 0.873528984454453, "grad_norm": 0.7563137975026405, "learning_rate": 7.94602474819044e-07, "loss": 0.5916, "step": 48100 }, { "epoch": 0.873710591311928, "grad_norm": 0.758004378318775, "learning_rate": 7.923526879250043e-07, "loss": 0.5912, "step": 48110 }, { "epoch": 0.8738921981694029, "grad_norm": 0.7873979882939869, "learning_rate": 7.901059591067417e-07, "loss": 0.6228, "step": 48120 }, { "epoch": 0.8740738050268778, "grad_norm": 0.8187738636327352, "learning_rate": 7.878622891104515e-07, "loss": 0.6158, "step": 48130 }, { "epoch": 0.8742554118843527, "grad_norm": 0.7227752356106337, "learning_rate": 7.856216786813042e-07, "loss": 0.6101, "step": 48140 }, { "epoch": 0.8744370187418277, "grad_norm": 0.7679347187087531, "learning_rate": 7.833841285634646e-07, "loss": 0.6051, "step": 48150 }, { "epoch": 0.8746186255993026, "grad_norm": 0.7989686352000126, "learning_rate": 7.811496395000706e-07, "loss": 0.603, "step": 48160 }, { "epoch": 0.8748002324567775, "grad_norm": 0.7380166916409296, "learning_rate": 7.789182122332517e-07, "loss": 0.5927, "step": 48170 }, { "epoch": 0.8749818393142526, "grad_norm": 0.7925009023608609, "learning_rate": 7.766898475041174e-07, "loss": 0.6239, "step": 48180 }, { "epoch": 0.8751634461717275, "grad_norm": 0.7352797957002852, "learning_rate": 7.744645460527555e-07, "loss": 0.6014, "step": 48190 }, { "epoch": 0.8753450530292024, "grad_norm": 0.7733438414446282, "learning_rate": 7.722423086182452e-07, "loss": 0.6137, "step": 48200 }, { "epoch": 0.8755266598866773, "grad_norm": 0.7851389353000932, "learning_rate": 7.700231359386445e-07, "loss": 0.5985, "step": 48210 }, { "epoch": 0.8757082667441523, "grad_norm": 0.7476201986136634, "learning_rate": 7.678070287509887e-07, "loss": 0.5952, "step": 48220 }, { "epoch": 0.8758898736016272, "grad_norm": 0.7943995418324011, "learning_rate": 7.655939877913021e-07, "loss": 0.6089, "step": 48230 }, { "epoch": 0.8760714804591021, "grad_norm": 0.753369480577241, "learning_rate": 7.633840137945858e-07, "loss": 0.6058, "step": 48240 }, { "epoch": 0.876253087316577, "grad_norm": 0.7690507481529554, "learning_rate": 7.611771074948259e-07, "loss": 0.6024, "step": 48250 }, { "epoch": 0.8764346941740521, "grad_norm": 0.7551136706226678, "learning_rate": 7.589732696249863e-07, "loss": 0.6102, "step": 48260 }, { "epoch": 0.876616301031527, "grad_norm": 0.7581976729161041, "learning_rate": 7.567725009170135e-07, "loss": 0.6141, "step": 48270 }, { "epoch": 0.8767979078890019, "grad_norm": 0.7867926052806621, "learning_rate": 7.54574802101834e-07, "loss": 0.6283, "step": 48280 }, { "epoch": 0.8769795147464768, "grad_norm": 0.7943743155574031, "learning_rate": 7.523801739093573e-07, "loss": 0.6101, "step": 48290 }, { "epoch": 0.8771611216039518, "grad_norm": 0.7786604700429414, "learning_rate": 7.50188617068468e-07, "loss": 0.6082, "step": 48300 }, { "epoch": 0.8773427284614267, "grad_norm": 0.755046862559304, "learning_rate": 7.480001323070362e-07, "loss": 0.6046, "step": 48310 }, { "epoch": 0.8775243353189016, "grad_norm": 0.8038548328303966, "learning_rate": 7.458147203519073e-07, "loss": 0.6044, "step": 48320 }, { "epoch": 0.8777059421763765, "grad_norm": 0.7666904842842974, "learning_rate": 7.436323819289093e-07, "loss": 0.6081, "step": 48330 }, { "epoch": 0.8778875490338515, "grad_norm": 0.7399158995585283, "learning_rate": 7.414531177628447e-07, "loss": 0.5925, "step": 48340 }, { "epoch": 0.8780691558913265, "grad_norm": 0.7791812931178523, "learning_rate": 7.392769285775037e-07, "loss": 0.6126, "step": 48350 }, { "epoch": 0.8782507627488014, "grad_norm": 0.7965036463843483, "learning_rate": 7.371038150956444e-07, "loss": 0.6016, "step": 48360 }, { "epoch": 0.8784323696062764, "grad_norm": 0.77814693961384, "learning_rate": 7.34933778039012e-07, "loss": 0.6038, "step": 48370 }, { "epoch": 0.8786139764637513, "grad_norm": 0.7807771582737342, "learning_rate": 7.327668181283276e-07, "loss": 0.6157, "step": 48380 }, { "epoch": 0.8787955833212262, "grad_norm": 0.7726321582739711, "learning_rate": 7.306029360832856e-07, "loss": 0.6066, "step": 48390 }, { "epoch": 0.8789771901787011, "grad_norm": 0.7851288660934952, "learning_rate": 7.284421326225654e-07, "loss": 0.5955, "step": 48400 }, { "epoch": 0.879158797036176, "grad_norm": 0.7465580383428895, "learning_rate": 7.262844084638177e-07, "loss": 0.6053, "step": 48410 }, { "epoch": 0.879340403893651, "grad_norm": 0.725932677640987, "learning_rate": 7.241297643236767e-07, "loss": 0.5946, "step": 48420 }, { "epoch": 0.879522010751126, "grad_norm": 0.7775947277296307, "learning_rate": 7.219782009177479e-07, "loss": 0.6086, "step": 48430 }, { "epoch": 0.8797036176086009, "grad_norm": 0.737544968893827, "learning_rate": 7.198297189606174e-07, "loss": 0.5926, "step": 48440 }, { "epoch": 0.8798852244660759, "grad_norm": 0.7490706449182624, "learning_rate": 7.176843191658445e-07, "loss": 0.6162, "step": 48450 }, { "epoch": 0.8800668313235508, "grad_norm": 0.7873596226521022, "learning_rate": 7.155420022459691e-07, "loss": 0.6216, "step": 48460 }, { "epoch": 0.8802484381810257, "grad_norm": 0.7756288939865175, "learning_rate": 7.134027689125045e-07, "loss": 0.6176, "step": 48470 }, { "epoch": 0.8804300450385006, "grad_norm": 0.7668721469857234, "learning_rate": 7.112666198759377e-07, "loss": 0.6156, "step": 48480 }, { "epoch": 0.8806116518959756, "grad_norm": 0.7178357317238433, "learning_rate": 7.091335558457379e-07, "loss": 0.6034, "step": 48490 }, { "epoch": 0.8807932587534505, "grad_norm": 0.742271640510025, "learning_rate": 7.070035775303419e-07, "loss": 0.5977, "step": 48500 }, { "epoch": 0.8809748656109254, "grad_norm": 0.7267372455998916, "learning_rate": 7.048766856371691e-07, "loss": 0.607, "step": 48510 }, { "epoch": 0.8811564724684005, "grad_norm": 0.734946819983122, "learning_rate": 7.027528808726069e-07, "loss": 0.5997, "step": 48520 }, { "epoch": 0.8813380793258754, "grad_norm": 0.7539837775079808, "learning_rate": 7.006321639420244e-07, "loss": 0.6054, "step": 48530 }, { "epoch": 0.8815196861833503, "grad_norm": 0.7520729189165122, "learning_rate": 6.98514535549757e-07, "loss": 0.6094, "step": 48540 }, { "epoch": 0.8817012930408252, "grad_norm": 0.752036598421802, "learning_rate": 6.96399996399123e-07, "loss": 0.5936, "step": 48550 }, { "epoch": 0.8818828998983002, "grad_norm": 0.7562488894014732, "learning_rate": 6.942885471924077e-07, "loss": 0.6074, "step": 48560 }, { "epoch": 0.8820645067557751, "grad_norm": 0.7217689704642988, "learning_rate": 6.921801886308743e-07, "loss": 0.6019, "step": 48570 }, { "epoch": 0.88224611361325, "grad_norm": 0.7816091062946805, "learning_rate": 6.900749214147584e-07, "loss": 0.6026, "step": 48580 }, { "epoch": 0.8824277204707249, "grad_norm": 0.7613520097072867, "learning_rate": 6.879727462432661e-07, "loss": 0.5955, "step": 48590 }, { "epoch": 0.8826093273282, "grad_norm": 0.7532143432432423, "learning_rate": 6.858736638145814e-07, "loss": 0.591, "step": 48600 }, { "epoch": 0.8827909341856749, "grad_norm": 0.7403284826357476, "learning_rate": 6.837776748258596e-07, "loss": 0.598, "step": 48610 }, { "epoch": 0.8829725410431498, "grad_norm": 0.783562709544459, "learning_rate": 6.816847799732251e-07, "loss": 0.5994, "step": 48620 }, { "epoch": 0.8831541479006247, "grad_norm": 0.7876282191736056, "learning_rate": 6.795949799517809e-07, "loss": 0.6151, "step": 48630 }, { "epoch": 0.8833357547580997, "grad_norm": 0.7304914727220814, "learning_rate": 6.77508275455595e-07, "loss": 0.6059, "step": 48640 }, { "epoch": 0.8835173616155746, "grad_norm": 0.7723752154920711, "learning_rate": 6.754246671777142e-07, "loss": 0.6028, "step": 48650 }, { "epoch": 0.8836989684730495, "grad_norm": 0.7368569645081439, "learning_rate": 6.733441558101505e-07, "loss": 0.5984, "step": 48660 }, { "epoch": 0.8838805753305244, "grad_norm": 0.7563591063538961, "learning_rate": 6.712667420438934e-07, "loss": 0.6054, "step": 48670 }, { "epoch": 0.8840621821879994, "grad_norm": 0.7526961850463055, "learning_rate": 6.691924265688987e-07, "loss": 0.616, "step": 48680 }, { "epoch": 0.8842437890454744, "grad_norm": 0.7507464465932041, "learning_rate": 6.671212100740976e-07, "loss": 0.6178, "step": 48690 }, { "epoch": 0.8844253959029493, "grad_norm": 0.7652640697784432, "learning_rate": 6.650530932473864e-07, "loss": 0.5974, "step": 48700 }, { "epoch": 0.8846070027604243, "grad_norm": 0.7674902161071773, "learning_rate": 6.629880767756392e-07, "loss": 0.6007, "step": 48710 }, { "epoch": 0.8847886096178992, "grad_norm": 0.7421760590100382, "learning_rate": 6.609261613446916e-07, "loss": 0.6092, "step": 48720 }, { "epoch": 0.8849702164753741, "grad_norm": 0.7838524836322728, "learning_rate": 6.588673476393592e-07, "loss": 0.6, "step": 48730 }, { "epoch": 0.885151823332849, "grad_norm": 0.8026396910719413, "learning_rate": 6.568116363434185e-07, "loss": 0.6189, "step": 48740 }, { "epoch": 0.885333430190324, "grad_norm": 0.7577912314416312, "learning_rate": 6.547590281396232e-07, "loss": 0.6086, "step": 48750 }, { "epoch": 0.8855150370477989, "grad_norm": 0.7454288840950347, "learning_rate": 6.52709523709687e-07, "loss": 0.6121, "step": 48760 }, { "epoch": 0.8856966439052739, "grad_norm": 0.7153846838933833, "learning_rate": 6.506631237343053e-07, "loss": 0.597, "step": 48770 }, { "epoch": 0.8858782507627488, "grad_norm": 0.7381124004035406, "learning_rate": 6.486198288931311e-07, "loss": 0.6029, "step": 48780 }, { "epoch": 0.8860598576202238, "grad_norm": 0.7219760978744691, "learning_rate": 6.465796398647894e-07, "loss": 0.6043, "step": 48790 }, { "epoch": 0.8862414644776987, "grad_norm": 0.7581577905245371, "learning_rate": 6.445425573268804e-07, "loss": 0.6112, "step": 48800 }, { "epoch": 0.8864230713351736, "grad_norm": 0.7368382556948464, "learning_rate": 6.425085819559606e-07, "loss": 0.6037, "step": 48810 }, { "epoch": 0.8866046781926485, "grad_norm": 0.7338162554464823, "learning_rate": 6.404777144275665e-07, "loss": 0.6111, "step": 48820 }, { "epoch": 0.8867862850501235, "grad_norm": 0.761183530718008, "learning_rate": 6.384499554161916e-07, "loss": 0.6049, "step": 48830 }, { "epoch": 0.8869678919075984, "grad_norm": 0.7591090571777108, "learning_rate": 6.364253055953073e-07, "loss": 0.5972, "step": 48840 }, { "epoch": 0.8871494987650733, "grad_norm": 0.7721395982137949, "learning_rate": 6.344037656373436e-07, "loss": 0.6019, "step": 48850 }, { "epoch": 0.8873311056225484, "grad_norm": 0.7612805765595962, "learning_rate": 6.323853362137044e-07, "loss": 0.6045, "step": 48860 }, { "epoch": 0.8875127124800233, "grad_norm": 0.7409135185829618, "learning_rate": 6.303700179947558e-07, "loss": 0.6214, "step": 48870 }, { "epoch": 0.8876943193374982, "grad_norm": 0.7899601936570754, "learning_rate": 6.283578116498312e-07, "loss": 0.6138, "step": 48880 }, { "epoch": 0.8878759261949731, "grad_norm": 0.7713658828575692, "learning_rate": 6.263487178472339e-07, "loss": 0.5928, "step": 48890 }, { "epoch": 0.8880575330524481, "grad_norm": 0.7791913126360036, "learning_rate": 6.243427372542288e-07, "loss": 0.6127, "step": 48900 }, { "epoch": 0.888239139909923, "grad_norm": 0.7416339638870098, "learning_rate": 6.22339870537052e-07, "loss": 0.6027, "step": 48910 }, { "epoch": 0.8884207467673979, "grad_norm": 0.7382310623245801, "learning_rate": 6.203401183608981e-07, "loss": 0.5983, "step": 48920 }, { "epoch": 0.8886023536248728, "grad_norm": 0.7853217102089135, "learning_rate": 6.18343481389937e-07, "loss": 0.6064, "step": 48930 }, { "epoch": 0.8887839604823479, "grad_norm": 0.7250471400796991, "learning_rate": 6.163499602872946e-07, "loss": 0.62, "step": 48940 }, { "epoch": 0.8889655673398228, "grad_norm": 0.7780238060885034, "learning_rate": 6.143595557150695e-07, "loss": 0.6084, "step": 48950 }, { "epoch": 0.8891471741972977, "grad_norm": 0.7786583972958707, "learning_rate": 6.12372268334317e-07, "loss": 0.6125, "step": 48960 }, { "epoch": 0.8893287810547726, "grad_norm": 0.7843256980011194, "learning_rate": 6.103880988050681e-07, "loss": 0.614, "step": 48970 }, { "epoch": 0.8895103879122476, "grad_norm": 0.7399155953146068, "learning_rate": 6.084070477863068e-07, "loss": 0.5969, "step": 48980 }, { "epoch": 0.8896919947697225, "grad_norm": 0.7588437399354937, "learning_rate": 6.064291159359881e-07, "loss": 0.5979, "step": 48990 }, { "epoch": 0.8898736016271974, "grad_norm": 0.7393955467842771, "learning_rate": 6.044543039110307e-07, "loss": 0.6151, "step": 49000 }, { "epoch": 0.8900552084846723, "grad_norm": 0.7633803934342699, "learning_rate": 6.024826123673167e-07, "loss": 0.6113, "step": 49010 }, { "epoch": 0.8902368153421473, "grad_norm": 0.7339406250783665, "learning_rate": 6.005140419596878e-07, "loss": 0.6046, "step": 49020 }, { "epoch": 0.8904184221996223, "grad_norm": 0.729813088655041, "learning_rate": 5.985485933419555e-07, "loss": 0.6022, "step": 49030 }, { "epoch": 0.8906000290570972, "grad_norm": 0.724214314573205, "learning_rate": 5.965862671668898e-07, "loss": 0.5949, "step": 49040 }, { "epoch": 0.8907816359145722, "grad_norm": 0.7565489402630816, "learning_rate": 5.946270640862272e-07, "loss": 0.5887, "step": 49050 }, { "epoch": 0.8909632427720471, "grad_norm": 0.7367717294104295, "learning_rate": 5.926709847506617e-07, "loss": 0.6064, "step": 49060 }, { "epoch": 0.891144849629522, "grad_norm": 0.7682834811871239, "learning_rate": 5.90718029809857e-07, "loss": 0.6112, "step": 49070 }, { "epoch": 0.8913264564869969, "grad_norm": 0.7340985513373861, "learning_rate": 5.887681999124318e-07, "loss": 0.6016, "step": 49080 }, { "epoch": 0.8915080633444719, "grad_norm": 0.7488206178738398, "learning_rate": 5.868214957059737e-07, "loss": 0.5974, "step": 49090 }, { "epoch": 0.8916896702019468, "grad_norm": 0.7690616998709959, "learning_rate": 5.848779178370256e-07, "loss": 0.6, "step": 49100 }, { "epoch": 0.8918712770594218, "grad_norm": 0.7468742393640543, "learning_rate": 5.829374669510979e-07, "loss": 0.608, "step": 49110 }, { "epoch": 0.8920528839168967, "grad_norm": 0.763579014982169, "learning_rate": 5.810001436926582e-07, "loss": 0.6089, "step": 49120 }, { "epoch": 0.8922344907743717, "grad_norm": 0.7825575615982164, "learning_rate": 5.790659487051387e-07, "loss": 0.6098, "step": 49130 }, { "epoch": 0.8924160976318466, "grad_norm": 0.7460442968531459, "learning_rate": 5.771348826309286e-07, "loss": 0.6086, "step": 49140 }, { "epoch": 0.8925977044893215, "grad_norm": 0.7928816619431084, "learning_rate": 5.752069461113829e-07, "loss": 0.61, "step": 49150 }, { "epoch": 0.8927793113467964, "grad_norm": 0.7643088403964968, "learning_rate": 5.732821397868115e-07, "loss": 0.6142, "step": 49160 }, { "epoch": 0.8929609182042714, "grad_norm": 0.7354125706128646, "learning_rate": 5.71360464296492e-07, "loss": 0.5946, "step": 49170 }, { "epoch": 0.8931425250617463, "grad_norm": 0.7558280761075743, "learning_rate": 5.69441920278655e-07, "loss": 0.5987, "step": 49180 }, { "epoch": 0.8933241319192212, "grad_norm": 0.7466979371168078, "learning_rate": 5.675265083704928e-07, "loss": 0.6046, "step": 49190 }, { "epoch": 0.8935057387766963, "grad_norm": 0.7712411510963189, "learning_rate": 5.656142292081612e-07, "loss": 0.6076, "step": 49200 }, { "epoch": 0.8936873456341712, "grad_norm": 0.7616844220189933, "learning_rate": 5.637050834267721e-07, "loss": 0.6007, "step": 49210 }, { "epoch": 0.8938689524916461, "grad_norm": 0.7614657697814874, "learning_rate": 5.617990716603972e-07, "loss": 0.5949, "step": 49220 }, { "epoch": 0.894050559349121, "grad_norm": 0.7882575374415881, "learning_rate": 5.598961945420678e-07, "loss": 0.6059, "step": 49230 }, { "epoch": 0.894232166206596, "grad_norm": 0.7511370883609576, "learning_rate": 5.579964527037762e-07, "loss": 0.6048, "step": 49240 }, { "epoch": 0.8944137730640709, "grad_norm": 0.7340425327320359, "learning_rate": 5.560998467764677e-07, "loss": 0.6156, "step": 49250 }, { "epoch": 0.8945953799215458, "grad_norm": 0.7619014388540465, "learning_rate": 5.54206377390053e-07, "loss": 0.6147, "step": 49260 }, { "epoch": 0.8947769867790207, "grad_norm": 0.7470654498398998, "learning_rate": 5.523160451733955e-07, "loss": 0.5981, "step": 49270 }, { "epoch": 0.8949585936364958, "grad_norm": 0.7370169622901142, "learning_rate": 5.504288507543187e-07, "loss": 0.6005, "step": 49280 }, { "epoch": 0.8951402004939707, "grad_norm": 0.7684256071222184, "learning_rate": 5.485447947596057e-07, "loss": 0.6081, "step": 49290 }, { "epoch": 0.8953218073514456, "grad_norm": 0.7468860502241468, "learning_rate": 5.466638778149946e-07, "loss": 0.5886, "step": 49300 }, { "epoch": 0.8955034142089205, "grad_norm": 0.7399166081051242, "learning_rate": 5.447861005451826e-07, "loss": 0.5915, "step": 49310 }, { "epoch": 0.8956850210663955, "grad_norm": 0.7821523672199907, "learning_rate": 5.429114635738231e-07, "loss": 0.6074, "step": 49320 }, { "epoch": 0.8958666279238704, "grad_norm": 0.7454300613459799, "learning_rate": 5.41039967523529e-07, "loss": 0.6087, "step": 49330 }, { "epoch": 0.8960482347813453, "grad_norm": 0.7462544324760203, "learning_rate": 5.391716130158642e-07, "loss": 0.6249, "step": 49340 }, { "epoch": 0.8962298416388202, "grad_norm": 0.7325859203795402, "learning_rate": 5.373064006713569e-07, "loss": 0.6002, "step": 49350 }, { "epoch": 0.8964114484962952, "grad_norm": 0.7487389287127371, "learning_rate": 5.354443311094859e-07, "loss": 0.5919, "step": 49360 }, { "epoch": 0.8965930553537702, "grad_norm": 0.7662696573076432, "learning_rate": 5.335854049486888e-07, "loss": 0.596, "step": 49370 }, { "epoch": 0.8967746622112451, "grad_norm": 0.7312209033535302, "learning_rate": 5.317296228063595e-07, "loss": 0.6083, "step": 49380 }, { "epoch": 0.8969562690687201, "grad_norm": 0.765519513505071, "learning_rate": 5.298769852988439e-07, "loss": 0.6052, "step": 49390 }, { "epoch": 0.897137875926195, "grad_norm": 0.7562105727550398, "learning_rate": 5.280274930414475e-07, "loss": 0.5997, "step": 49400 }, { "epoch": 0.8973194827836699, "grad_norm": 0.780135767616705, "learning_rate": 5.261811466484334e-07, "loss": 0.6119, "step": 49410 }, { "epoch": 0.8975010896411448, "grad_norm": 0.7405245638576207, "learning_rate": 5.243379467330123e-07, "loss": 0.5897, "step": 49420 }, { "epoch": 0.8976826964986198, "grad_norm": 0.7650765263510991, "learning_rate": 5.224978939073577e-07, "loss": 0.6188, "step": 49430 }, { "epoch": 0.8978643033560947, "grad_norm": 0.7943844553339893, "learning_rate": 5.206609887825897e-07, "loss": 0.6126, "step": 49440 }, { "epoch": 0.8980459102135696, "grad_norm": 0.7684808372474912, "learning_rate": 5.188272319687926e-07, "loss": 0.5976, "step": 49450 }, { "epoch": 0.8982275170710446, "grad_norm": 0.7631878071297966, "learning_rate": 5.169966240749968e-07, "loss": 0.5867, "step": 49460 }, { "epoch": 0.8984091239285196, "grad_norm": 0.749029439705392, "learning_rate": 5.151691657091929e-07, "loss": 0.6018, "step": 49470 }, { "epoch": 0.8985907307859945, "grad_norm": 0.7335182605174747, "learning_rate": 5.133448574783185e-07, "loss": 0.6054, "step": 49480 }, { "epoch": 0.8987723376434694, "grad_norm": 0.7775886085251622, "learning_rate": 5.115236999882744e-07, "loss": 0.6051, "step": 49490 }, { "epoch": 0.8989539445009443, "grad_norm": 0.7323967436510559, "learning_rate": 5.097056938439049e-07, "loss": 0.6009, "step": 49500 }, { "epoch": 0.8991355513584193, "grad_norm": 0.7469611523394597, "learning_rate": 5.078908396490157e-07, "loss": 0.6038, "step": 49510 }, { "epoch": 0.8993171582158942, "grad_norm": 0.7316488215728838, "learning_rate": 5.060791380063613e-07, "loss": 0.6009, "step": 49520 }, { "epoch": 0.8994987650733691, "grad_norm": 0.7541716369121424, "learning_rate": 5.042705895176514e-07, "loss": 0.5961, "step": 49530 }, { "epoch": 0.8996803719308442, "grad_norm": 0.7438387367589286, "learning_rate": 5.024651947835447e-07, "loss": 0.6108, "step": 49540 }, { "epoch": 0.8998619787883191, "grad_norm": 0.764443072960244, "learning_rate": 5.006629544036579e-07, "loss": 0.5957, "step": 49550 }, { "epoch": 0.900043585645794, "grad_norm": 0.812954365925742, "learning_rate": 4.988638689765557e-07, "loss": 0.6211, "step": 49560 }, { "epoch": 0.9002251925032689, "grad_norm": 0.7679283204818669, "learning_rate": 4.97067939099759e-07, "loss": 0.6035, "step": 49570 }, { "epoch": 0.9004067993607439, "grad_norm": 0.7404733661403672, "learning_rate": 4.952751653697363e-07, "loss": 0.5986, "step": 49580 }, { "epoch": 0.9005884062182188, "grad_norm": 0.7667787269161248, "learning_rate": 4.934855483819079e-07, "loss": 0.6109, "step": 49590 }, { "epoch": 0.9007700130756937, "grad_norm": 0.7497592283552905, "learning_rate": 4.916990887306516e-07, "loss": 0.6038, "step": 49600 }, { "epoch": 0.9009516199331686, "grad_norm": 0.7483620209722501, "learning_rate": 4.899157870092897e-07, "loss": 0.6137, "step": 49610 }, { "epoch": 0.9011332267906436, "grad_norm": 0.7434714408856941, "learning_rate": 4.881356438101015e-07, "loss": 0.602, "step": 49620 }, { "epoch": 0.9013148336481186, "grad_norm": 0.7034925762241041, "learning_rate": 4.863586597243109e-07, "loss": 0.5935, "step": 49630 }, { "epoch": 0.9014964405055935, "grad_norm": 0.7485068656017565, "learning_rate": 4.845848353420979e-07, "loss": 0.5996, "step": 49640 }, { "epoch": 0.9016780473630684, "grad_norm": 0.7584463171998116, "learning_rate": 4.828141712525902e-07, "loss": 0.6125, "step": 49650 }, { "epoch": 0.9018596542205434, "grad_norm": 0.7861207410175949, "learning_rate": 4.810466680438686e-07, "loss": 0.6016, "step": 49660 }, { "epoch": 0.9020412610780183, "grad_norm": 0.7295495732986097, "learning_rate": 4.7928232630296e-07, "loss": 0.6034, "step": 49670 }, { "epoch": 0.9022228679354932, "grad_norm": 0.7325146407714423, "learning_rate": 4.775211466158469e-07, "loss": 0.6057, "step": 49680 }, { "epoch": 0.9024044747929681, "grad_norm": 0.7499457749812861, "learning_rate": 4.7576312956745695e-07, "loss": 0.6096, "step": 49690 }, { "epoch": 0.9025860816504431, "grad_norm": 0.7479350115334552, "learning_rate": 4.740082757416664e-07, "loss": 0.6016, "step": 49700 }, { "epoch": 0.9027676885079181, "grad_norm": 0.7633862871424407, "learning_rate": 4.7225658572130686e-07, "loss": 0.6012, "step": 49710 }, { "epoch": 0.902949295365393, "grad_norm": 0.7807938210926112, "learning_rate": 4.7050806008815395e-07, "loss": 0.6121, "step": 49720 }, { "epoch": 0.903130902222868, "grad_norm": 0.7550585601033802, "learning_rate": 4.687626994229344e-07, "loss": 0.5982, "step": 49730 }, { "epoch": 0.9033125090803429, "grad_norm": 0.7598061021086439, "learning_rate": 4.6702050430532333e-07, "loss": 0.6148, "step": 49740 }, { "epoch": 0.9034941159378178, "grad_norm": 0.7689569243197906, "learning_rate": 4.652814753139456e-07, "loss": 0.6059, "step": 49750 }, { "epoch": 0.9036757227952927, "grad_norm": 0.7399902142741516, "learning_rate": 4.635456130263716e-07, "loss": 0.61, "step": 49760 }, { "epoch": 0.9038573296527677, "grad_norm": 0.7299160445293005, "learning_rate": 4.6181291801912444e-07, "loss": 0.6124, "step": 49770 }, { "epoch": 0.9040389365102426, "grad_norm": 0.7581540580883446, "learning_rate": 4.6008339086767164e-07, "loss": 0.6223, "step": 49780 }, { "epoch": 0.9042205433677175, "grad_norm": 0.7200608566022665, "learning_rate": 4.5835703214642814e-07, "loss": 0.6028, "step": 49790 }, { "epoch": 0.9044021502251925, "grad_norm": 0.7523754349794727, "learning_rate": 4.5663384242875976e-07, "loss": 0.5854, "step": 49800 }, { "epoch": 0.9045837570826675, "grad_norm": 0.7602205643086987, "learning_rate": 4.5491382228697866e-07, "loss": 0.5883, "step": 49810 }, { "epoch": 0.9047653639401424, "grad_norm": 0.7318191363569525, "learning_rate": 4.531969722923435e-07, "loss": 0.5931, "step": 49820 }, { "epoch": 0.9049469707976173, "grad_norm": 0.7738225940634131, "learning_rate": 4.5148329301506035e-07, "loss": 0.6083, "step": 49830 }, { "epoch": 0.9051285776550922, "grad_norm": 0.7468706896205417, "learning_rate": 4.4977278502428166e-07, "loss": 0.6033, "step": 49840 }, { "epoch": 0.9053101845125672, "grad_norm": 0.7569401818445654, "learning_rate": 4.4806544888810866e-07, "loss": 0.6126, "step": 49850 }, { "epoch": 0.9054917913700421, "grad_norm": 0.7716639684424248, "learning_rate": 4.463612851735866e-07, "loss": 0.5993, "step": 49860 }, { "epoch": 0.905673398227517, "grad_norm": 0.7471916267398522, "learning_rate": 4.446602944467082e-07, "loss": 0.6054, "step": 49870 }, { "epoch": 0.9058550050849921, "grad_norm": 0.7976852488731906, "learning_rate": 4.429624772724139e-07, "loss": 0.6011, "step": 49880 }, { "epoch": 0.906036611942467, "grad_norm": 0.7661996179864001, "learning_rate": 4.4126783421458707e-07, "loss": 0.6155, "step": 49890 }, { "epoch": 0.9062182187999419, "grad_norm": 0.8268223882070098, "learning_rate": 4.3957636583605854e-07, "loss": 0.6104, "step": 49900 }, { "epoch": 0.9063998256574168, "grad_norm": 0.743169516711705, "learning_rate": 4.378880726986068e-07, "loss": 0.6007, "step": 49910 }, { "epoch": 0.9065814325148918, "grad_norm": 0.7615991997923548, "learning_rate": 4.362029553629499e-07, "loss": 0.6186, "step": 49920 }, { "epoch": 0.9067630393723667, "grad_norm": 0.758146806448296, "learning_rate": 4.345210143887579e-07, "loss": 0.6007, "step": 49930 }, { "epoch": 0.9069446462298416, "grad_norm": 0.7475730300141966, "learning_rate": 4.3284225033464076e-07, "loss": 0.6056, "step": 49940 }, { "epoch": 0.9071262530873165, "grad_norm": 0.7590398535242823, "learning_rate": 4.3116666375815794e-07, "loss": 0.6016, "step": 49950 }, { "epoch": 0.9073078599447915, "grad_norm": 0.7634448651273741, "learning_rate": 4.294942552158088e-07, "loss": 0.6045, "step": 49960 }, { "epoch": 0.9074894668022665, "grad_norm": 0.7309252472144498, "learning_rate": 4.2782502526304115e-07, "loss": 0.5919, "step": 49970 }, { "epoch": 0.9076710736597414, "grad_norm": 0.7459280900740535, "learning_rate": 4.261589744542449e-07, "loss": 0.614, "step": 49980 }, { "epoch": 0.9078526805172163, "grad_norm": 0.7425541343479419, "learning_rate": 4.2449610334275196e-07, "loss": 0.6123, "step": 49990 }, { "epoch": 0.9080342873746913, "grad_norm": 0.7355133431659354, "learning_rate": 4.2283641248084594e-07, "loss": 0.6102, "step": 50000 }, { "epoch": 0.9082158942321662, "grad_norm": 0.7291668831758434, "learning_rate": 4.211799024197438e-07, "loss": 0.5937, "step": 50010 }, { "epoch": 0.9083975010896411, "grad_norm": 0.7870342082642907, "learning_rate": 4.1952657370961547e-07, "loss": 0.5983, "step": 50020 }, { "epoch": 0.908579107947116, "grad_norm": 0.7492757131767207, "learning_rate": 4.178764268995683e-07, "loss": 0.6088, "step": 50030 }, { "epoch": 0.908760714804591, "grad_norm": 0.7450702179481808, "learning_rate": 4.1622946253765614e-07, "loss": 0.605, "step": 50040 }, { "epoch": 0.908942321662066, "grad_norm": 0.7538540436619304, "learning_rate": 4.145856811708726e-07, "loss": 0.6113, "step": 50050 }, { "epoch": 0.9091239285195409, "grad_norm": 0.8100674563940838, "learning_rate": 4.1294508334515757e-07, "loss": 0.6134, "step": 50060 }, { "epoch": 0.9093055353770159, "grad_norm": 0.7526563591127641, "learning_rate": 4.113076696053908e-07, "loss": 0.5972, "step": 50070 }, { "epoch": 0.9094871422344908, "grad_norm": 0.7541671755255781, "learning_rate": 4.0967344049539725e-07, "loss": 0.5992, "step": 50080 }, { "epoch": 0.9096687490919657, "grad_norm": 0.7265198471712528, "learning_rate": 4.080423965579428e-07, "loss": 0.6041, "step": 50090 }, { "epoch": 0.9098503559494406, "grad_norm": 0.7518318473914194, "learning_rate": 4.0641453833473397e-07, "loss": 0.6047, "step": 50100 }, { "epoch": 0.9100319628069156, "grad_norm": 0.7978386137463667, "learning_rate": 4.047898663664218e-07, "loss": 0.6022, "step": 50110 }, { "epoch": 0.9102135696643905, "grad_norm": 0.7243682419619067, "learning_rate": 4.0316838119259685e-07, "loss": 0.6049, "step": 50120 }, { "epoch": 0.9103951765218654, "grad_norm": 0.7608226891252273, "learning_rate": 4.015500833517938e-07, "loss": 0.6208, "step": 50130 }, { "epoch": 0.9105767833793404, "grad_norm": 0.7640059367040345, "learning_rate": 3.999349733814861e-07, "loss": 0.5989, "step": 50140 }, { "epoch": 0.9107583902368154, "grad_norm": 0.7341983913564671, "learning_rate": 3.983230518180914e-07, "loss": 0.6022, "step": 50150 }, { "epoch": 0.9109399970942903, "grad_norm": 0.7616488408719393, "learning_rate": 3.967143191969636e-07, "loss": 0.6163, "step": 50160 }, { "epoch": 0.9111216039517652, "grad_norm": 0.767627492936958, "learning_rate": 3.951087760524053e-07, "loss": 0.613, "step": 50170 }, { "epoch": 0.9113032108092401, "grad_norm": 0.742577724814444, "learning_rate": 3.9350642291765106e-07, "loss": 0.611, "step": 50180 }, { "epoch": 0.9114848176667151, "grad_norm": 0.7651499735081362, "learning_rate": 3.919072603248797e-07, "loss": 0.6098, "step": 50190 }, { "epoch": 0.91166642452419, "grad_norm": 0.7826464917362435, "learning_rate": 3.9031128880521294e-07, "loss": 0.6008, "step": 50200 }, { "epoch": 0.9118480313816649, "grad_norm": 0.7510104463461551, "learning_rate": 3.887185088887113e-07, "loss": 0.6071, "step": 50210 }, { "epoch": 0.91202963823914, "grad_norm": 0.7434208709651519, "learning_rate": 3.871289211043705e-07, "loss": 0.6102, "step": 50220 }, { "epoch": 0.9122112450966149, "grad_norm": 0.7568808686176061, "learning_rate": 3.8554252598013374e-07, "loss": 0.6175, "step": 50230 }, { "epoch": 0.9123928519540898, "grad_norm": 0.8368857211732535, "learning_rate": 3.839593240428774e-07, "loss": 0.6076, "step": 50240 }, { "epoch": 0.9125744588115647, "grad_norm": 0.782685445878695, "learning_rate": 3.8237931581842194e-07, "loss": 0.5986, "step": 50250 }, { "epoch": 0.9127560656690397, "grad_norm": 0.8245510164049196, "learning_rate": 3.8080250183152313e-07, "loss": 0.6007, "step": 50260 }, { "epoch": 0.9129376725265146, "grad_norm": 0.7471069112670023, "learning_rate": 3.7922888260587876e-07, "loss": 0.6065, "step": 50270 }, { "epoch": 0.9131192793839895, "grad_norm": 0.7480170037952248, "learning_rate": 3.7765845866412743e-07, "loss": 0.5931, "step": 50280 }, { "epoch": 0.9133008862414644, "grad_norm": 0.7865005037884463, "learning_rate": 3.760912305278408e-07, "loss": 0.6002, "step": 50290 }, { "epoch": 0.9134824930989394, "grad_norm": 0.7526788484779642, "learning_rate": 3.745271987175303e-07, "loss": 0.6021, "step": 50300 }, { "epoch": 0.9136640999564144, "grad_norm": 0.7324708139027639, "learning_rate": 3.729663637526526e-07, "loss": 0.6002, "step": 50310 }, { "epoch": 0.9138457068138893, "grad_norm": 0.8053789419017573, "learning_rate": 3.7140872615159306e-07, "loss": 0.6079, "step": 50320 }, { "epoch": 0.9140273136713642, "grad_norm": 0.7685641059443281, "learning_rate": 3.6985428643168344e-07, "loss": 0.6076, "step": 50330 }, { "epoch": 0.9142089205288392, "grad_norm": 0.7595954185727315, "learning_rate": 3.683030451091862e-07, "loss": 0.5971, "step": 50340 }, { "epoch": 0.9143905273863141, "grad_norm": 0.7765083071569062, "learning_rate": 3.667550026993083e-07, "loss": 0.6086, "step": 50350 }, { "epoch": 0.914572134243789, "grad_norm": 0.7648395306092066, "learning_rate": 3.6521015971618724e-07, "loss": 0.6073, "step": 50360 }, { "epoch": 0.914753741101264, "grad_norm": 0.734516847162805, "learning_rate": 3.6366851667290594e-07, "loss": 0.5994, "step": 50370 }, { "epoch": 0.9149353479587389, "grad_norm": 0.7801071796040466, "learning_rate": 3.6213007408147814e-07, "loss": 0.6117, "step": 50380 }, { "epoch": 0.9151169548162139, "grad_norm": 0.7390779454145836, "learning_rate": 3.605948324528541e-07, "loss": 0.6076, "step": 50390 }, { "epoch": 0.9152985616736888, "grad_norm": 0.7321409983074411, "learning_rate": 3.590627922969281e-07, "loss": 0.612, "step": 50400 }, { "epoch": 0.9154801685311638, "grad_norm": 0.7076091414235155, "learning_rate": 3.575339541225231e-07, "loss": 0.6095, "step": 50410 }, { "epoch": 0.9156617753886387, "grad_norm": 0.7108253630129969, "learning_rate": 3.5600831843740523e-07, "loss": 0.6008, "step": 50420 }, { "epoch": 0.9158433822461136, "grad_norm": 0.7346085458356855, "learning_rate": 3.544858857482714e-07, "loss": 0.6157, "step": 50430 }, { "epoch": 0.9160249891035885, "grad_norm": 0.7654634665989051, "learning_rate": 3.529666565607592e-07, "loss": 0.6131, "step": 50440 }, { "epoch": 0.9162065959610635, "grad_norm": 0.7797184460901604, "learning_rate": 3.5145063137943746e-07, "loss": 0.6022, "step": 50450 }, { "epoch": 0.9163882028185384, "grad_norm": 0.78830883263587, "learning_rate": 3.4993781070781664e-07, "loss": 0.6055, "step": 50460 }, { "epoch": 0.9165698096760133, "grad_norm": 0.7686352941678942, "learning_rate": 3.484281950483381e-07, "loss": 0.6126, "step": 50470 }, { "epoch": 0.9167514165334884, "grad_norm": 0.794305170215474, "learning_rate": 3.4692178490238316e-07, "loss": 0.6033, "step": 50480 }, { "epoch": 0.9169330233909633, "grad_norm": 0.7604135513705945, "learning_rate": 3.4541858077026367e-07, "loss": 0.6183, "step": 50490 }, { "epoch": 0.9171146302484382, "grad_norm": 0.7666792049313115, "learning_rate": 3.439185831512293e-07, "loss": 0.6048, "step": 50500 }, { "epoch": 0.9172962371059131, "grad_norm": 0.7571010577029355, "learning_rate": 3.4242179254346476e-07, "loss": 0.5988, "step": 50510 }, { "epoch": 0.917477843963388, "grad_norm": 0.8160445652016368, "learning_rate": 3.40928209444088e-07, "loss": 0.6099, "step": 50520 }, { "epoch": 0.917659450820863, "grad_norm": 0.7725759499467307, "learning_rate": 3.394378343491567e-07, "loss": 0.6022, "step": 50530 }, { "epoch": 0.9178410576783379, "grad_norm": 0.7677803214604813, "learning_rate": 3.3795066775365483e-07, "loss": 0.6147, "step": 50540 }, { "epoch": 0.9180226645358128, "grad_norm": 0.8282735687020979, "learning_rate": 3.3646671015150955e-07, "loss": 0.6126, "step": 50550 }, { "epoch": 0.9182042713932879, "grad_norm": 0.7358221606228678, "learning_rate": 3.349859620355744e-07, "loss": 0.5983, "step": 50560 }, { "epoch": 0.9183858782507628, "grad_norm": 0.7507867672237989, "learning_rate": 3.335084238976438e-07, "loss": 0.5977, "step": 50570 }, { "epoch": 0.9185674851082377, "grad_norm": 0.7616527955664334, "learning_rate": 3.3203409622844073e-07, "loss": 0.5898, "step": 50580 }, { "epoch": 0.9187490919657126, "grad_norm": 0.7810588680935137, "learning_rate": 3.305629795176235e-07, "loss": 0.606, "step": 50590 }, { "epoch": 0.9189306988231876, "grad_norm": 0.7581549364249548, "learning_rate": 3.290950742537846e-07, "loss": 0.5868, "step": 50600 }, { "epoch": 0.9191123056806625, "grad_norm": 0.7855288233277647, "learning_rate": 3.27630380924453e-07, "loss": 0.6074, "step": 50610 }, { "epoch": 0.9192939125381374, "grad_norm": 0.7686786835090662, "learning_rate": 3.2616890001608393e-07, "loss": 0.6108, "step": 50620 }, { "epoch": 0.9194755193956123, "grad_norm": 0.7984458744326891, "learning_rate": 3.247106320140725e-07, "loss": 0.5924, "step": 50630 }, { "epoch": 0.9196571262530873, "grad_norm": 0.7514761012971884, "learning_rate": 3.232555774027413e-07, "loss": 0.6133, "step": 50640 }, { "epoch": 0.9198387331105623, "grad_norm": 0.7271923241745301, "learning_rate": 3.2180373666535035e-07, "loss": 0.6046, "step": 50650 }, { "epoch": 0.9200203399680372, "grad_norm": 0.7849552905144063, "learning_rate": 3.2035511028408626e-07, "loss": 0.5955, "step": 50660 }, { "epoch": 0.9202019468255122, "grad_norm": 0.7669820043029563, "learning_rate": 3.1890969874007635e-07, "loss": 0.6069, "step": 50670 }, { "epoch": 0.9203835536829871, "grad_norm": 0.7943689481997058, "learning_rate": 3.174675025133733e-07, "loss": 0.6024, "step": 50680 }, { "epoch": 0.920565160540462, "grad_norm": 0.8002397457788576, "learning_rate": 3.1602852208296507e-07, "loss": 0.6094, "step": 50690 }, { "epoch": 0.9207467673979369, "grad_norm": 0.7576772103852754, "learning_rate": 3.1459275792677047e-07, "loss": 0.6066, "step": 50700 }, { "epoch": 0.9209283742554119, "grad_norm": 0.7732122832198298, "learning_rate": 3.1316021052164026e-07, "loss": 0.5998, "step": 50710 }, { "epoch": 0.9211099811128868, "grad_norm": 0.7498542492265383, "learning_rate": 3.117308803433572e-07, "loss": 0.5861, "step": 50720 }, { "epoch": 0.9212915879703618, "grad_norm": 0.7348144213570119, "learning_rate": 3.10304767866636e-07, "loss": 0.602, "step": 50730 }, { "epoch": 0.9214731948278367, "grad_norm": 0.7783160936096489, "learning_rate": 3.0888187356512113e-07, "loss": 0.6012, "step": 50740 }, { "epoch": 0.9216548016853117, "grad_norm": 0.7636426696371809, "learning_rate": 3.074621979113901e-07, "loss": 0.6039, "step": 50750 }, { "epoch": 0.9218364085427866, "grad_norm": 0.7439911836476064, "learning_rate": 3.060457413769491e-07, "loss": 0.6012, "step": 50760 }, { "epoch": 0.9220180154002615, "grad_norm": 0.772433319745601, "learning_rate": 3.0463250443223845e-07, "loss": 0.6064, "step": 50770 }, { "epoch": 0.9221996222577364, "grad_norm": 0.7331391650339623, "learning_rate": 3.032224875466272e-07, "loss": 0.6095, "step": 50780 }, { "epoch": 0.9223812291152114, "grad_norm": 0.7409933677594513, "learning_rate": 3.018156911884129e-07, "loss": 0.6041, "step": 50790 }, { "epoch": 0.9225628359726863, "grad_norm": 0.7644111363858808, "learning_rate": 3.004121158248274e-07, "loss": 0.6027, "step": 50800 }, { "epoch": 0.9227444428301612, "grad_norm": 0.7758104871655895, "learning_rate": 2.9901176192203005e-07, "loss": 0.6032, "step": 50810 }, { "epoch": 0.9229260496876363, "grad_norm": 0.7710702418868999, "learning_rate": 2.9761462994511326e-07, "loss": 0.6029, "step": 50820 }, { "epoch": 0.9231076565451112, "grad_norm": 0.7248079412129488, "learning_rate": 2.9622072035809467e-07, "loss": 0.6079, "step": 50830 }, { "epoch": 0.9232892634025861, "grad_norm": 0.7688480754949094, "learning_rate": 2.948300336239263e-07, "loss": 0.6092, "step": 50840 }, { "epoch": 0.923470870260061, "grad_norm": 0.7655245464906443, "learning_rate": 2.934425702044863e-07, "loss": 0.5997, "step": 50850 }, { "epoch": 0.923652477117536, "grad_norm": 0.74326949190015, "learning_rate": 2.920583305605851e-07, "loss": 0.5971, "step": 50860 }, { "epoch": 0.9238340839750109, "grad_norm": 0.7719289438412458, "learning_rate": 2.906773151519604e-07, "loss": 0.5933, "step": 50870 }, { "epoch": 0.9240156908324858, "grad_norm": 0.7431906192337852, "learning_rate": 2.8929952443728203e-07, "loss": 0.6093, "step": 50880 }, { "epoch": 0.9241972976899607, "grad_norm": 0.7502938733412416, "learning_rate": 2.8792495887414397e-07, "loss": 0.5994, "step": 50890 }, { "epoch": 0.9243789045474358, "grad_norm": 0.7588061919481459, "learning_rate": 2.8655361891907096e-07, "loss": 0.5853, "step": 50900 }, { "epoch": 0.9245605114049107, "grad_norm": 0.7730487413944254, "learning_rate": 2.85185505027521e-07, "loss": 0.6019, "step": 50910 }, { "epoch": 0.9247421182623856, "grad_norm": 0.7546814793162849, "learning_rate": 2.838206176538727e-07, "loss": 0.5993, "step": 50920 }, { "epoch": 0.9249237251198605, "grad_norm": 0.7707253910871291, "learning_rate": 2.824589572514402e-07, "loss": 0.6236, "step": 50930 }, { "epoch": 0.9251053319773355, "grad_norm": 0.7441322608760151, "learning_rate": 2.811005242724607e-07, "loss": 0.6065, "step": 50940 }, { "epoch": 0.9252869388348104, "grad_norm": 0.7505473082560069, "learning_rate": 2.7974531916810434e-07, "loss": 0.6227, "step": 50950 }, { "epoch": 0.9254685456922853, "grad_norm": 0.7599793010987704, "learning_rate": 2.783933423884633e-07, "loss": 0.6088, "step": 50960 }, { "epoch": 0.9256501525497602, "grad_norm": 0.7497613578001451, "learning_rate": 2.770445943825639e-07, "loss": 0.6074, "step": 50970 }, { "epoch": 0.9258317594072352, "grad_norm": 0.7597207144801751, "learning_rate": 2.756990755983546e-07, "loss": 0.5993, "step": 50980 }, { "epoch": 0.9260133662647102, "grad_norm": 0.7462891476241433, "learning_rate": 2.743567864827157e-07, "loss": 0.5967, "step": 50990 }, { "epoch": 0.9261949731221851, "grad_norm": 0.7032210564239078, "learning_rate": 2.730177274814505e-07, "loss": 0.6036, "step": 51000 }, { "epoch": 0.92637657997966, "grad_norm": 0.719375446507444, "learning_rate": 2.716818990392944e-07, "loss": 0.6029, "step": 51010 }, { "epoch": 0.926558186837135, "grad_norm": 0.7545580015640114, "learning_rate": 2.703493015999048e-07, "loss": 0.61, "step": 51020 }, { "epoch": 0.9267397936946099, "grad_norm": 0.8058200669755846, "learning_rate": 2.69019935605872e-07, "loss": 0.6089, "step": 51030 }, { "epoch": 0.9269214005520848, "grad_norm": 0.733954717445417, "learning_rate": 2.6769380149870627e-07, "loss": 0.5955, "step": 51040 }, { "epoch": 0.9271030074095598, "grad_norm": 0.7741797830940321, "learning_rate": 2.6637089971885076e-07, "loss": 0.616, "step": 51050 }, { "epoch": 0.9272846142670347, "grad_norm": 0.7742006470611249, "learning_rate": 2.6505123070566963e-07, "loss": 0.5994, "step": 51060 }, { "epoch": 0.9274662211245097, "grad_norm": 0.7411800915591342, "learning_rate": 2.637347948974578e-07, "loss": 0.6031, "step": 51070 }, { "epoch": 0.9276478279819846, "grad_norm": 0.7922059343133575, "learning_rate": 2.6242159273143444e-07, "loss": 0.6007, "step": 51080 }, { "epoch": 0.9278294348394596, "grad_norm": 0.7993513776741435, "learning_rate": 2.6111162464374393e-07, "loss": 0.5986, "step": 51090 }, { "epoch": 0.9280110416969345, "grad_norm": 0.7201419357246813, "learning_rate": 2.5980489106945616e-07, "loss": 0.5934, "step": 51100 }, { "epoch": 0.9281926485544094, "grad_norm": 0.7494389269444853, "learning_rate": 2.5850139244257054e-07, "loss": 0.6124, "step": 51110 }, { "epoch": 0.9283742554118843, "grad_norm": 0.7797758817182426, "learning_rate": 2.572011291960086e-07, "loss": 0.609, "step": 51120 }, { "epoch": 0.9285558622693593, "grad_norm": 0.9006268920693479, "learning_rate": 2.5590410176161815e-07, "loss": 0.6007, "step": 51130 }, { "epoch": 0.9287374691268342, "grad_norm": 0.7805800605757304, "learning_rate": 2.546103105701725e-07, "loss": 0.606, "step": 51140 }, { "epoch": 0.9289190759843091, "grad_norm": 0.7447498295506929, "learning_rate": 2.5331975605137007e-07, "loss": 0.596, "step": 51150 }, { "epoch": 0.9291006828417842, "grad_norm": 0.8045968606904654, "learning_rate": 2.5203243863383467e-07, "loss": 0.6079, "step": 51160 }, { "epoch": 0.9292822896992591, "grad_norm": 0.725399521074902, "learning_rate": 2.5074835874511426e-07, "loss": 0.5967, "step": 51170 }, { "epoch": 0.929463896556734, "grad_norm": 0.7716058655929194, "learning_rate": 2.4946751681168225e-07, "loss": 0.6048, "step": 51180 }, { "epoch": 0.9296455034142089, "grad_norm": 0.7461716601862316, "learning_rate": 2.4818991325893385e-07, "loss": 0.6088, "step": 51190 }, { "epoch": 0.9298271102716839, "grad_norm": 0.7874187604341794, "learning_rate": 2.469155485111963e-07, "loss": 0.5981, "step": 51200 }, { "epoch": 0.9300087171291588, "grad_norm": 0.715504999358988, "learning_rate": 2.4564442299171096e-07, "loss": 0.5916, "step": 51210 }, { "epoch": 0.9301903239866337, "grad_norm": 0.7669444971419997, "learning_rate": 2.4437653712265144e-07, "loss": 0.6103, "step": 51220 }, { "epoch": 0.9303719308441086, "grad_norm": 0.7532406606434517, "learning_rate": 2.431118913251107e-07, "loss": 0.584, "step": 51230 }, { "epoch": 0.9305535377015837, "grad_norm": 0.7672344655009595, "learning_rate": 2.4185048601910957e-07, "loss": 0.6227, "step": 51240 }, { "epoch": 0.9307351445590586, "grad_norm": 0.7634762043503427, "learning_rate": 2.4059232162358726e-07, "loss": 0.6024, "step": 51250 }, { "epoch": 0.9309167514165335, "grad_norm": 0.7817458702695858, "learning_rate": 2.393373985564118e-07, "loss": 0.6076, "step": 51260 }, { "epoch": 0.9310983582740084, "grad_norm": 0.7661684673302939, "learning_rate": 2.3808571723437202e-07, "loss": 0.6058, "step": 51270 }, { "epoch": 0.9312799651314834, "grad_norm": 0.7449413189589967, "learning_rate": 2.3683727807318203e-07, "loss": 0.6047, "step": 51280 }, { "epoch": 0.9314615719889583, "grad_norm": 0.740617741586527, "learning_rate": 2.355920814874757e-07, "loss": 0.613, "step": 51290 }, { "epoch": 0.9316431788464332, "grad_norm": 0.7218337073215252, "learning_rate": 2.3435012789081223e-07, "loss": 0.6016, "step": 51300 }, { "epoch": 0.9318247857039081, "grad_norm": 0.7437234737058076, "learning_rate": 2.3311141769567502e-07, "loss": 0.6071, "step": 51310 }, { "epoch": 0.9320063925613831, "grad_norm": 0.7725200162136682, "learning_rate": 2.3187595131346719e-07, "loss": 0.6155, "step": 51320 }, { "epoch": 0.9321879994188581, "grad_norm": 0.7229271034929877, "learning_rate": 2.3064372915451827e-07, "loss": 0.5976, "step": 51330 }, { "epoch": 0.932369606276333, "grad_norm": 0.7195710144531646, "learning_rate": 2.2941475162807537e-07, "loss": 0.5972, "step": 51340 }, { "epoch": 0.932551213133808, "grad_norm": 0.7299158767158037, "learning_rate": 2.2818901914231307e-07, "loss": 0.6128, "step": 51350 }, { "epoch": 0.9327328199912829, "grad_norm": 0.7600854158285022, "learning_rate": 2.269665321043235e-07, "loss": 0.5897, "step": 51360 }, { "epoch": 0.9329144268487578, "grad_norm": 0.7637242523687375, "learning_rate": 2.2574729092012637e-07, "loss": 0.6009, "step": 51370 }, { "epoch": 0.9330960337062327, "grad_norm": 0.7667606406112326, "learning_rate": 2.2453129599465774e-07, "loss": 0.5975, "step": 51380 }, { "epoch": 0.9332776405637077, "grad_norm": 0.7513458507439684, "learning_rate": 2.2331854773178007e-07, "loss": 0.6032, "step": 51390 }, { "epoch": 0.9334592474211826, "grad_norm": 0.7608708274120443, "learning_rate": 2.2210904653427344e-07, "loss": 0.6077, "step": 51400 }, { "epoch": 0.9336408542786576, "grad_norm": 0.7525567920981044, "learning_rate": 2.209027928038432e-07, "loss": 0.608, "step": 51410 }, { "epoch": 0.9338224611361325, "grad_norm": 0.7729119854470285, "learning_rate": 2.1969978694111439e-07, "loss": 0.5992, "step": 51420 }, { "epoch": 0.9340040679936075, "grad_norm": 0.7751753904397265, "learning_rate": 2.1850002934563296e-07, "loss": 0.6008, "step": 51430 }, { "epoch": 0.9341856748510824, "grad_norm": 0.7583306365328539, "learning_rate": 2.1730352041586578e-07, "loss": 0.5986, "step": 51440 }, { "epoch": 0.9343672817085573, "grad_norm": 0.7392234820856594, "learning_rate": 2.161102605492027e-07, "loss": 0.5998, "step": 51450 }, { "epoch": 0.9345488885660322, "grad_norm": 0.7685002721723304, "learning_rate": 2.1492025014195338e-07, "loss": 0.601, "step": 51460 }, { "epoch": 0.9347304954235072, "grad_norm": 0.7392315348251186, "learning_rate": 2.1373348958934726e-07, "loss": 0.5976, "step": 51470 }, { "epoch": 0.9349121022809821, "grad_norm": 0.7487706798582171, "learning_rate": 2.125499792855379e-07, "loss": 0.6225, "step": 51480 }, { "epoch": 0.935093709138457, "grad_norm": 0.7613502406309688, "learning_rate": 2.113697196235931e-07, "loss": 0.6197, "step": 51490 }, { "epoch": 0.935275315995932, "grad_norm": 0.7523523833990414, "learning_rate": 2.101927109955071e-07, "loss": 0.6025, "step": 51500 }, { "epoch": 0.935456922853407, "grad_norm": 0.7329803270596863, "learning_rate": 2.0901895379219271e-07, "loss": 0.5969, "step": 51510 }, { "epoch": 0.9356385297108819, "grad_norm": 0.7147817497420091, "learning_rate": 2.0784844840347928e-07, "loss": 0.5935, "step": 51520 }, { "epoch": 0.9358201365683568, "grad_norm": 0.7821215224980419, "learning_rate": 2.0668119521812246e-07, "loss": 0.6098, "step": 51530 }, { "epoch": 0.9360017434258318, "grad_norm": 0.7310846319124664, "learning_rate": 2.0551719462379216e-07, "loss": 0.6084, "step": 51540 }, { "epoch": 0.9361833502833067, "grad_norm": 0.7464481293346198, "learning_rate": 2.0435644700708245e-07, "loss": 0.5989, "step": 51550 }, { "epoch": 0.9363649571407816, "grad_norm": 0.7470967659929613, "learning_rate": 2.0319895275350275e-07, "loss": 0.5913, "step": 51560 }, { "epoch": 0.9365465639982565, "grad_norm": 0.7616402871063482, "learning_rate": 2.0204471224748447e-07, "loss": 0.5969, "step": 51570 }, { "epoch": 0.9367281708557316, "grad_norm": 0.7868982005341049, "learning_rate": 2.0089372587237864e-07, "loss": 0.6073, "step": 51580 }, { "epoch": 0.9369097777132065, "grad_norm": 0.797838888054497, "learning_rate": 1.9974599401045514e-07, "loss": 0.62, "step": 51590 }, { "epoch": 0.9370913845706814, "grad_norm": 0.7761981102659006, "learning_rate": 1.986015170429012e-07, "loss": 0.5992, "step": 51600 }, { "epoch": 0.9372729914281563, "grad_norm": 0.7447013057525923, "learning_rate": 1.9746029534982615e-07, "loss": 0.61, "step": 51610 }, { "epoch": 0.9374545982856313, "grad_norm": 0.739137644596867, "learning_rate": 1.9632232931025563e-07, "loss": 0.5945, "step": 51620 }, { "epoch": 0.9376362051431062, "grad_norm": 0.7581405391786794, "learning_rate": 1.9518761930213404e-07, "loss": 0.6023, "step": 51630 }, { "epoch": 0.9378178120005811, "grad_norm": 0.786884117197453, "learning_rate": 1.9405616570232654e-07, "loss": 0.6072, "step": 51640 }, { "epoch": 0.937999418858056, "grad_norm": 0.7525338355295846, "learning_rate": 1.929279688866148e-07, "loss": 0.6056, "step": 51650 }, { "epoch": 0.938181025715531, "grad_norm": 0.7284413722388048, "learning_rate": 1.918030292297013e-07, "loss": 0.5987, "step": 51660 }, { "epoch": 0.938362632573006, "grad_norm": 0.7578747619191227, "learning_rate": 1.906813471052016e-07, "loss": 0.5976, "step": 51670 }, { "epoch": 0.9385442394304809, "grad_norm": 0.705951362091451, "learning_rate": 1.8956292288565436e-07, "loss": 0.6094, "step": 51680 }, { "epoch": 0.9387258462879559, "grad_norm": 0.773535544044399, "learning_rate": 1.8844775694251583e-07, "loss": 0.6184, "step": 51690 }, { "epoch": 0.9389074531454308, "grad_norm": 0.7598925098939191, "learning_rate": 1.8733584964615637e-07, "loss": 0.6017, "step": 51700 }, { "epoch": 0.9390890600029057, "grad_norm": 0.7442478611060783, "learning_rate": 1.8622720136586726e-07, "loss": 0.6071, "step": 51710 }, { "epoch": 0.9392706668603806, "grad_norm": 0.7547665468312116, "learning_rate": 1.8512181246985726e-07, "loss": 0.6007, "step": 51720 }, { "epoch": 0.9394522737178556, "grad_norm": 0.7673125476350948, "learning_rate": 1.8401968332525278e-07, "loss": 0.6036, "step": 51730 }, { "epoch": 0.9396338805753305, "grad_norm": 0.75860091480021, "learning_rate": 1.8292081429809428e-07, "loss": 0.6018, "step": 51740 }, { "epoch": 0.9398154874328054, "grad_norm": 0.7292219568248506, "learning_rate": 1.8182520575334317e-07, "loss": 0.6227, "step": 51750 }, { "epoch": 0.9399970942902804, "grad_norm": 0.7387052971945223, "learning_rate": 1.8073285805487727e-07, "loss": 0.6106, "step": 51760 }, { "epoch": 0.9401787011477554, "grad_norm": 0.7520931825907714, "learning_rate": 1.7964377156549085e-07, "loss": 0.6097, "step": 51770 }, { "epoch": 0.9403603080052303, "grad_norm": 0.7529431892120098, "learning_rate": 1.785579466468945e-07, "loss": 0.5976, "step": 51780 }, { "epoch": 0.9405419148627052, "grad_norm": 0.7566196273207799, "learning_rate": 1.7747538365971651e-07, "loss": 0.6134, "step": 51790 }, { "epoch": 0.9407235217201801, "grad_norm": 0.7552963882801421, "learning_rate": 1.7639608296350143e-07, "loss": 0.6006, "step": 51800 }, { "epoch": 0.9409051285776551, "grad_norm": 0.7796659868867918, "learning_rate": 1.7532004491671029e-07, "loss": 0.6062, "step": 51810 }, { "epoch": 0.94108673543513, "grad_norm": 0.7695930683322851, "learning_rate": 1.7424726987671947e-07, "loss": 0.5952, "step": 51820 }, { "epoch": 0.9412683422926049, "grad_norm": 0.7564977964305386, "learning_rate": 1.7317775819982506e-07, "loss": 0.6005, "step": 51830 }, { "epoch": 0.94144994915008, "grad_norm": 0.7812105149521631, "learning_rate": 1.721115102412352e-07, "loss": 0.6004, "step": 51840 }, { "epoch": 0.9416315560075549, "grad_norm": 0.741655000269689, "learning_rate": 1.7104852635507763e-07, "loss": 0.6058, "step": 51850 }, { "epoch": 0.9418131628650298, "grad_norm": 0.7340254721578197, "learning_rate": 1.6998880689439224e-07, "loss": 0.594, "step": 51860 }, { "epoch": 0.9419947697225047, "grad_norm": 0.7214502301735577, "learning_rate": 1.6893235221113636e-07, "loss": 0.5953, "step": 51870 }, { "epoch": 0.9421763765799797, "grad_norm": 0.7764474201862455, "learning_rate": 1.67879162656186e-07, "loss": 0.6047, "step": 51880 }, { "epoch": 0.9423579834374546, "grad_norm": 0.7368030138551329, "learning_rate": 1.668292385793291e-07, "loss": 0.6031, "step": 51890 }, { "epoch": 0.9425395902949295, "grad_norm": 0.7582623531345077, "learning_rate": 1.6578258032926898e-07, "loss": 0.6067, "step": 51900 }, { "epoch": 0.9427211971524044, "grad_norm": 0.7824866909796695, "learning_rate": 1.6473918825362644e-07, "loss": 0.6277, "step": 51910 }, { "epoch": 0.9429028040098794, "grad_norm": 0.7641818515764638, "learning_rate": 1.6369906269893543e-07, "loss": 0.6132, "step": 51920 }, { "epoch": 0.9430844108673544, "grad_norm": 0.7753532911374105, "learning_rate": 1.6266220401064737e-07, "loss": 0.6035, "step": 51930 }, { "epoch": 0.9432660177248293, "grad_norm": 0.7336441395093595, "learning_rate": 1.616286125331268e-07, "loss": 0.6025, "step": 51940 }, { "epoch": 0.9434476245823042, "grad_norm": 0.7692138492023438, "learning_rate": 1.6059828860965353e-07, "loss": 0.6137, "step": 51950 }, { "epoch": 0.9436292314397792, "grad_norm": 0.7610876324638323, "learning_rate": 1.595712325824217e-07, "loss": 0.62, "step": 51960 }, { "epoch": 0.9438108382972541, "grad_norm": 0.7525934465847413, "learning_rate": 1.585474447925428e-07, "loss": 0.6157, "step": 51970 }, { "epoch": 0.943992445154729, "grad_norm": 0.7843897184005023, "learning_rate": 1.5752692558003935e-07, "loss": 0.602, "step": 51980 }, { "epoch": 0.944174052012204, "grad_norm": 0.7545654634233784, "learning_rate": 1.5650967528384908e-07, "loss": 0.611, "step": 51990 }, { "epoch": 0.9443556588696789, "grad_norm": 0.7866049230257959, "learning_rate": 1.554956942418262e-07, "loss": 0.617, "step": 52000 }, { "epoch": 0.9445372657271539, "grad_norm": 0.7520717768207501, "learning_rate": 1.5448498279073687e-07, "loss": 0.5969, "step": 52010 }, { "epoch": 0.9447188725846288, "grad_norm": 0.7570767537702662, "learning_rate": 1.5347754126626258e-07, "loss": 0.6017, "step": 52020 }, { "epoch": 0.9449004794421038, "grad_norm": 0.7637910520024273, "learning_rate": 1.5247337000299567e-07, "loss": 0.6153, "step": 52030 }, { "epoch": 0.9450820862995787, "grad_norm": 0.7897796870116971, "learning_rate": 1.5147246933444937e-07, "loss": 0.6172, "step": 52040 }, { "epoch": 0.9452636931570536, "grad_norm": 0.7210853169682127, "learning_rate": 1.5047483959304332e-07, "loss": 0.5958, "step": 52050 }, { "epoch": 0.9454453000145285, "grad_norm": 0.733971535560246, "learning_rate": 1.4948048111011472e-07, "loss": 0.6034, "step": 52060 }, { "epoch": 0.9456269068720035, "grad_norm": 0.7734973611311994, "learning_rate": 1.484893942159127e-07, "loss": 0.5962, "step": 52070 }, { "epoch": 0.9458085137294784, "grad_norm": 0.7794687656511753, "learning_rate": 1.4750157923960063e-07, "loss": 0.6006, "step": 52080 }, { "epoch": 0.9459901205869533, "grad_norm": 0.7377951600406117, "learning_rate": 1.4651703650925607e-07, "loss": 0.6075, "step": 52090 }, { "epoch": 0.9461717274444283, "grad_norm": 0.771578646860377, "learning_rate": 1.455357663518675e-07, "loss": 0.6049, "step": 52100 }, { "epoch": 0.9463533343019033, "grad_norm": 0.7323027031622577, "learning_rate": 1.4455776909333975e-07, "loss": 0.6018, "step": 52110 }, { "epoch": 0.9465349411593782, "grad_norm": 0.7223246954406356, "learning_rate": 1.435830450584852e-07, "loss": 0.6089, "step": 52120 }, { "epoch": 0.9467165480168531, "grad_norm": 0.747938901594537, "learning_rate": 1.42611594571036e-07, "loss": 0.613, "step": 52130 }, { "epoch": 0.946898154874328, "grad_norm": 0.755548580083348, "learning_rate": 1.416434179536319e-07, "loss": 0.5969, "step": 52140 }, { "epoch": 0.947079761731803, "grad_norm": 0.7214295260944077, "learning_rate": 1.4067851552782675e-07, "loss": 0.5887, "step": 52150 }, { "epoch": 0.9472613685892779, "grad_norm": 0.7461453687615649, "learning_rate": 1.3971688761408864e-07, "loss": 0.5971, "step": 52160 }, { "epoch": 0.9474429754467528, "grad_norm": 0.7726736603856255, "learning_rate": 1.3875853453179654e-07, "loss": 0.6189, "step": 52170 }, { "epoch": 0.9476245823042279, "grad_norm": 0.7470485265610644, "learning_rate": 1.3780345659924033e-07, "loss": 0.5939, "step": 52180 }, { "epoch": 0.9478061891617028, "grad_norm": 0.7873766294192767, "learning_rate": 1.3685165413362623e-07, "loss": 0.6056, "step": 52190 }, { "epoch": 0.9479877960191777, "grad_norm": 0.7717368883734519, "learning_rate": 1.3590312745106803e-07, "loss": 0.6044, "step": 52200 }, { "epoch": 0.9481694028766526, "grad_norm": 0.7651056682715577, "learning_rate": 1.34957876866596e-07, "loss": 0.6109, "step": 52210 }, { "epoch": 0.9483510097341276, "grad_norm": 0.7710929357737809, "learning_rate": 1.3401590269414788e-07, "loss": 0.6085, "step": 52220 }, { "epoch": 0.9485326165916025, "grad_norm": 0.7298171701625609, "learning_rate": 1.3307720524657674e-07, "loss": 0.6008, "step": 52230 }, { "epoch": 0.9487142234490774, "grad_norm": 0.7965736557468776, "learning_rate": 1.3214178483564433e-07, "loss": 0.6068, "step": 52240 }, { "epoch": 0.9488958303065523, "grad_norm": 0.7459377468350158, "learning_rate": 1.3120964177202767e-07, "loss": 0.6176, "step": 52250 }, { "epoch": 0.9490774371640273, "grad_norm": 0.7348115933406615, "learning_rate": 1.3028077636531245e-07, "loss": 0.5981, "step": 52260 }, { "epoch": 0.9492590440215023, "grad_norm": 0.7508479282346985, "learning_rate": 1.2935518892399635e-07, "loss": 0.6064, "step": 52270 }, { "epoch": 0.9494406508789772, "grad_norm": 0.7579284460398523, "learning_rate": 1.2843287975549013e-07, "loss": 0.6091, "step": 52280 }, { "epoch": 0.9496222577364521, "grad_norm": 0.731878008072738, "learning_rate": 1.2751384916611321e-07, "loss": 0.605, "step": 52290 }, { "epoch": 0.9498038645939271, "grad_norm": 0.7497098046906705, "learning_rate": 1.2659809746109696e-07, "loss": 0.6055, "step": 52300 }, { "epoch": 0.949985471451402, "grad_norm": 0.7285468442528535, "learning_rate": 1.256856249445848e-07, "loss": 0.6065, "step": 52310 }, { "epoch": 0.9501670783088769, "grad_norm": 0.7244574006798752, "learning_rate": 1.2477643191962985e-07, "loss": 0.6018, "step": 52320 }, { "epoch": 0.9503486851663518, "grad_norm": 0.794068531587994, "learning_rate": 1.238705186881972e-07, "loss": 0.6119, "step": 52330 }, { "epoch": 0.9505302920238268, "grad_norm": 0.7544504636791471, "learning_rate": 1.2296788555115957e-07, "loss": 0.6026, "step": 52340 }, { "epoch": 0.9507118988813018, "grad_norm": 0.737692455773392, "learning_rate": 1.2206853280830488e-07, "loss": 0.5968, "step": 52350 }, { "epoch": 0.9508935057387767, "grad_norm": 0.7384157637812051, "learning_rate": 1.2117246075832866e-07, "loss": 0.6158, "step": 52360 }, { "epoch": 0.9510751125962517, "grad_norm": 0.7446810357077279, "learning_rate": 1.202796696988373e-07, "loss": 0.6114, "step": 52370 }, { "epoch": 0.9512567194537266, "grad_norm": 0.7713971792273915, "learning_rate": 1.1939015992634806e-07, "loss": 0.6069, "step": 52380 }, { "epoch": 0.9514383263112015, "grad_norm": 0.7412379551189678, "learning_rate": 1.1850393173628793e-07, "loss": 0.6016, "step": 52390 }, { "epoch": 0.9516199331686764, "grad_norm": 0.7803670688353072, "learning_rate": 1.1762098542299371e-07, "loss": 0.6207, "step": 52400 }, { "epoch": 0.9518015400261514, "grad_norm": 0.7569607349732238, "learning_rate": 1.1674132127971304e-07, "loss": 0.6052, "step": 52410 }, { "epoch": 0.9519831468836263, "grad_norm": 0.7493241996387697, "learning_rate": 1.1586493959860224e-07, "loss": 0.6142, "step": 52420 }, { "epoch": 0.9521647537411012, "grad_norm": 0.8260858172911553, "learning_rate": 1.1499184067072844e-07, "loss": 0.6004, "step": 52430 }, { "epoch": 0.9523463605985762, "grad_norm": 0.7960854389831521, "learning_rate": 1.1412202478606971e-07, "loss": 0.6177, "step": 52440 }, { "epoch": 0.9525279674560512, "grad_norm": 0.739845245398693, "learning_rate": 1.1325549223351051e-07, "loss": 0.6243, "step": 52450 }, { "epoch": 0.9527095743135261, "grad_norm": 0.7798679509049824, "learning_rate": 1.1239224330084731e-07, "loss": 0.6047, "step": 52460 }, { "epoch": 0.952891181171001, "grad_norm": 0.7583862416223787, "learning_rate": 1.1153227827478519e-07, "loss": 0.5987, "step": 52470 }, { "epoch": 0.953072788028476, "grad_norm": 0.7612009239401778, "learning_rate": 1.1067559744094014e-07, "loss": 0.6167, "step": 52480 }, { "epoch": 0.9532543948859509, "grad_norm": 0.7496844757324415, "learning_rate": 1.0982220108383457e-07, "loss": 0.5991, "step": 52490 }, { "epoch": 0.9534360017434258, "grad_norm": 0.731458690916635, "learning_rate": 1.0897208948690063e-07, "loss": 0.6043, "step": 52500 }, { "epoch": 0.9536176086009007, "grad_norm": 0.744137703760177, "learning_rate": 1.0812526293248137e-07, "loss": 0.5955, "step": 52510 }, { "epoch": 0.9537992154583758, "grad_norm": 0.7733028490533506, "learning_rate": 1.0728172170182737e-07, "loss": 0.6041, "step": 52520 }, { "epoch": 0.9539808223158507, "grad_norm": 0.7565303397854867, "learning_rate": 1.0644146607510009e-07, "loss": 0.6183, "step": 52530 }, { "epoch": 0.9541624291733256, "grad_norm": 0.7330205915064325, "learning_rate": 1.0560449633136627e-07, "loss": 0.6011, "step": 52540 }, { "epoch": 0.9543440360308005, "grad_norm": 0.7597284234527549, "learning_rate": 1.0477081274860356e-07, "loss": 0.6012, "step": 52550 }, { "epoch": 0.9545256428882755, "grad_norm": 0.73798507219663, "learning_rate": 1.0394041560369827e-07, "loss": 0.5914, "step": 52560 }, { "epoch": 0.9547072497457504, "grad_norm": 0.7338200215916639, "learning_rate": 1.0311330517244533e-07, "loss": 0.5987, "step": 52570 }, { "epoch": 0.9548888566032253, "grad_norm": 0.7727719740617192, "learning_rate": 1.0228948172954611e-07, "loss": 0.6079, "step": 52580 }, { "epoch": 0.9550704634607002, "grad_norm": 0.7592253284015877, "learning_rate": 1.0146894554861176e-07, "loss": 0.6062, "step": 52590 }, { "epoch": 0.9552520703181752, "grad_norm": 0.7587925019634769, "learning_rate": 1.0065169690216315e-07, "loss": 0.6135, "step": 52600 }, { "epoch": 0.9554336771756502, "grad_norm": 0.7254221371230478, "learning_rate": 9.98377360616265e-08, "loss": 0.6084, "step": 52610 }, { "epoch": 0.9556152840331251, "grad_norm": 0.7350755104513862, "learning_rate": 9.90270632973378e-08, "loss": 0.6093, "step": 52620 }, { "epoch": 0.9557968908906, "grad_norm": 0.7949770317524856, "learning_rate": 9.821967887854056e-08, "loss": 0.6054, "step": 52630 }, { "epoch": 0.955978497748075, "grad_norm": 0.7674554917146877, "learning_rate": 9.741558307338472e-08, "loss": 0.6049, "step": 52640 }, { "epoch": 0.9561601046055499, "grad_norm": 0.7481572566653393, "learning_rate": 9.66147761489311e-08, "loss": 0.6018, "step": 52650 }, { "epoch": 0.9563417114630248, "grad_norm": 0.757926502366131, "learning_rate": 9.581725837114697e-08, "loss": 0.6043, "step": 52660 }, { "epoch": 0.9565233183204997, "grad_norm": 0.7764205926658521, "learning_rate": 9.502303000490487e-08, "loss": 0.6157, "step": 52670 }, { "epoch": 0.9567049251779747, "grad_norm": 0.741918694708022, "learning_rate": 9.423209131398714e-08, "loss": 0.6006, "step": 52680 }, { "epoch": 0.9568865320354497, "grad_norm": 0.7403680458311321, "learning_rate": 9.344444256108476e-08, "loss": 0.5871, "step": 52690 }, { "epoch": 0.9570681388929246, "grad_norm": 0.7420799193511162, "learning_rate": 9.26600840077907e-08, "loss": 0.6011, "step": 52700 }, { "epoch": 0.9572497457503996, "grad_norm": 0.7203008252019271, "learning_rate": 9.187901591461323e-08, "loss": 0.5971, "step": 52710 }, { "epoch": 0.9574313526078745, "grad_norm": 0.7230811871218331, "learning_rate": 9.110123854096043e-08, "loss": 0.6039, "step": 52720 }, { "epoch": 0.9576129594653494, "grad_norm": 0.7657962484843257, "learning_rate": 9.032675214515119e-08, "loss": 0.5989, "step": 52730 }, { "epoch": 0.9577945663228243, "grad_norm": 0.7498798692713716, "learning_rate": 8.955555698441088e-08, "loss": 0.5976, "step": 52740 }, { "epoch": 0.9579761731802993, "grad_norm": 0.7306330127129788, "learning_rate": 8.878765331487128e-08, "loss": 0.6087, "step": 52750 }, { "epoch": 0.9581577800377742, "grad_norm": 0.7329794240220372, "learning_rate": 8.80230413915717e-08, "loss": 0.5994, "step": 52760 }, { "epoch": 0.9583393868952491, "grad_norm": 0.7556320794766748, "learning_rate": 8.72617214684568e-08, "loss": 0.6026, "step": 52770 }, { "epoch": 0.9585209937527241, "grad_norm": 0.7821922750936857, "learning_rate": 8.650369379837876e-08, "loss": 0.597, "step": 52780 }, { "epoch": 0.9587026006101991, "grad_norm": 0.7412511680906401, "learning_rate": 8.57489586330984e-08, "loss": 0.6011, "step": 52790 }, { "epoch": 0.958884207467674, "grad_norm": 0.7395579492565528, "learning_rate": 8.499751622327857e-08, "loss": 0.6067, "step": 52800 }, { "epoch": 0.9590658143251489, "grad_norm": 0.7539643183498935, "learning_rate": 8.424936681849183e-08, "loss": 0.5966, "step": 52810 }, { "epoch": 0.9592474211826238, "grad_norm": 0.7564819255203444, "learning_rate": 8.350451066721721e-08, "loss": 0.6184, "step": 52820 }, { "epoch": 0.9594290280400988, "grad_norm": 0.7752320952442203, "learning_rate": 8.276294801683571e-08, "loss": 0.6043, "step": 52830 }, { "epoch": 0.9596106348975737, "grad_norm": 0.7281476273566716, "learning_rate": 8.202467911364143e-08, "loss": 0.5988, "step": 52840 }, { "epoch": 0.9597922417550486, "grad_norm": 0.7351544470364202, "learning_rate": 8.128970420282934e-08, "loss": 0.606, "step": 52850 }, { "epoch": 0.9599738486125237, "grad_norm": 0.778099090867205, "learning_rate": 8.055802352850084e-08, "loss": 0.603, "step": 52860 }, { "epoch": 0.9601554554699986, "grad_norm": 0.7529254423202285, "learning_rate": 7.9829637333666e-08, "loss": 0.6163, "step": 52870 }, { "epoch": 0.9603370623274735, "grad_norm": 0.7297621763921389, "learning_rate": 7.910454586023796e-08, "loss": 0.6172, "step": 52880 }, { "epoch": 0.9605186691849484, "grad_norm": 0.7451484393264641, "learning_rate": 7.838274934903633e-08, "loss": 0.5929, "step": 52890 }, { "epoch": 0.9607002760424234, "grad_norm": 0.7685200671013378, "learning_rate": 7.766424803978822e-08, "loss": 0.5868, "step": 52900 }, { "epoch": 0.9608818828998983, "grad_norm": 0.7422383801391411, "learning_rate": 7.694904217112276e-08, "loss": 0.6002, "step": 52910 }, { "epoch": 0.9610634897573732, "grad_norm": 0.7890362500395146, "learning_rate": 7.623713198057769e-08, "loss": 0.5968, "step": 52920 }, { "epoch": 0.9612450966148481, "grad_norm": 0.7245096961947854, "learning_rate": 7.552851770459502e-08, "loss": 0.5982, "step": 52930 }, { "epoch": 0.9614267034723231, "grad_norm": 0.7800605494750419, "learning_rate": 7.482319957852202e-08, "loss": 0.6032, "step": 52940 }, { "epoch": 0.9616083103297981, "grad_norm": 0.7586835992662043, "learning_rate": 7.412117783661132e-08, "loss": 0.6065, "step": 52950 }, { "epoch": 0.961789917187273, "grad_norm": 0.7360401492517249, "learning_rate": 7.342245271202086e-08, "loss": 0.6039, "step": 52960 }, { "epoch": 0.961971524044748, "grad_norm": 0.7866549055918531, "learning_rate": 7.272702443681389e-08, "loss": 0.6045, "step": 52970 }, { "epoch": 0.9621531309022229, "grad_norm": 0.7623084455440343, "learning_rate": 7.2034893241959e-08, "loss": 0.5883, "step": 52980 }, { "epoch": 0.9623347377596978, "grad_norm": 0.7221758303520605, "learning_rate": 7.134605935732786e-08, "loss": 0.6093, "step": 52990 }, { "epoch": 0.9625163446171727, "grad_norm": 0.7646338364891957, "learning_rate": 7.066052301169967e-08, "loss": 0.608, "step": 53000 }, { "epoch": 0.9626979514746477, "grad_norm": 0.7359744414998282, "learning_rate": 6.997828443275567e-08, "loss": 0.6017, "step": 53010 }, { "epoch": 0.9628795583321226, "grad_norm": 0.7673981813270367, "learning_rate": 6.92993438470846e-08, "loss": 0.6119, "step": 53020 }, { "epoch": 0.9630611651895976, "grad_norm": 0.7591925869692295, "learning_rate": 6.862370148017827e-08, "loss": 0.6051, "step": 53030 }, { "epoch": 0.9632427720470725, "grad_norm": 0.7404502598167414, "learning_rate": 6.795135755643279e-08, "loss": 0.5901, "step": 53040 }, { "epoch": 0.9634243789045475, "grad_norm": 0.7287071694921745, "learning_rate": 6.72823122991495e-08, "loss": 0.6002, "step": 53050 }, { "epoch": 0.9636059857620224, "grad_norm": 0.7465896290787082, "learning_rate": 6.661656593053511e-08, "loss": 0.6231, "step": 53060 }, { "epoch": 0.9637875926194973, "grad_norm": 0.7428147290290958, "learning_rate": 6.595411867169832e-08, "loss": 0.6192, "step": 53070 }, { "epoch": 0.9639691994769722, "grad_norm": 0.7348217231032484, "learning_rate": 6.529497074265534e-08, "loss": 0.5936, "step": 53080 }, { "epoch": 0.9641508063344472, "grad_norm": 0.750520033427035, "learning_rate": 6.463912236232106e-08, "loss": 0.595, "step": 53090 }, { "epoch": 0.9643324131919221, "grad_norm": 0.7784690904637291, "learning_rate": 6.398657374852236e-08, "loss": 0.6097, "step": 53100 }, { "epoch": 0.964514020049397, "grad_norm": 0.7443030773741132, "learning_rate": 6.333732511798251e-08, "loss": 0.6172, "step": 53110 }, { "epoch": 0.964695626906872, "grad_norm": 0.777051896643597, "learning_rate": 6.269137668633352e-08, "loss": 0.6194, "step": 53120 }, { "epoch": 0.964877233764347, "grad_norm": 0.7715968051917648, "learning_rate": 6.204872866811151e-08, "loss": 0.6141, "step": 53130 }, { "epoch": 0.9650588406218219, "grad_norm": 0.7600565746167899, "learning_rate": 6.140938127675245e-08, "loss": 0.6037, "step": 53140 }, { "epoch": 0.9652404474792968, "grad_norm": 0.7603703155354152, "learning_rate": 6.077333472459979e-08, "loss": 0.6043, "step": 53150 }, { "epoch": 0.9654220543367718, "grad_norm": 0.7339402059143717, "learning_rate": 6.014058922289901e-08, "loss": 0.6012, "step": 53160 }, { "epoch": 0.9656036611942467, "grad_norm": 0.7498462664662492, "learning_rate": 5.951114498180088e-08, "loss": 0.6035, "step": 53170 }, { "epoch": 0.9657852680517216, "grad_norm": 0.760483085609335, "learning_rate": 5.88850022103582e-08, "loss": 0.607, "step": 53180 }, { "epoch": 0.9659668749091965, "grad_norm": 0.7719579335142439, "learning_rate": 5.826216111652683e-08, "loss": 0.6091, "step": 53190 }, { "epoch": 0.9661484817666716, "grad_norm": 0.7961561061548822, "learning_rate": 5.764262190716796e-08, "loss": 0.5892, "step": 53200 }, { "epoch": 0.9663300886241465, "grad_norm": 0.7694672904652383, "learning_rate": 5.7026384788043674e-08, "loss": 0.6131, "step": 53210 }, { "epoch": 0.9665116954816214, "grad_norm": 0.738611831853357, "learning_rate": 5.6413449963822477e-08, "loss": 0.6019, "step": 53220 }, { "epoch": 0.9666933023390963, "grad_norm": 0.7682282400464762, "learning_rate": 5.580381763807374e-08, "loss": 0.6197, "step": 53230 }, { "epoch": 0.9668749091965713, "grad_norm": 0.7708357775266105, "learning_rate": 5.519748801327107e-08, "loss": 0.602, "step": 53240 }, { "epoch": 0.9670565160540462, "grad_norm": 0.7384352425866493, "learning_rate": 5.4594461290788936e-08, "loss": 0.5985, "step": 53250 }, { "epoch": 0.9672381229115211, "grad_norm": 0.74740500476514, "learning_rate": 5.399473767090935e-08, "loss": 0.5971, "step": 53260 }, { "epoch": 0.967419729768996, "grad_norm": 0.7611198294163639, "learning_rate": 5.3398317352811865e-08, "loss": 0.6045, "step": 53270 }, { "epoch": 0.967601336626471, "grad_norm": 0.7648570381683021, "learning_rate": 5.28052005345836e-08, "loss": 0.6025, "step": 53280 }, { "epoch": 0.967782943483946, "grad_norm": 0.7712618581763855, "learning_rate": 5.2215387413212525e-08, "loss": 0.6177, "step": 53290 }, { "epoch": 0.9679645503414209, "grad_norm": 0.7406635476454798, "learning_rate": 5.1628878184588616e-08, "loss": 0.6118, "step": 53300 }, { "epoch": 0.9681461571988959, "grad_norm": 0.7609314445980025, "learning_rate": 5.104567304350605e-08, "loss": 0.6033, "step": 53310 }, { "epoch": 0.9683277640563708, "grad_norm": 0.7658972260732141, "learning_rate": 5.046577218365989e-08, "loss": 0.5912, "step": 53320 }, { "epoch": 0.9685093709138457, "grad_norm": 0.7405724219918521, "learning_rate": 4.988917579765051e-08, "loss": 0.6016, "step": 53330 }, { "epoch": 0.9686909777713206, "grad_norm": 0.7035897930313946, "learning_rate": 4.9315884076976964e-08, "loss": 0.5916, "step": 53340 }, { "epoch": 0.9688725846287956, "grad_norm": 0.7373545167214769, "learning_rate": 4.87458972120447e-08, "loss": 0.6113, "step": 53350 }, { "epoch": 0.9690541914862705, "grad_norm": 0.7183898489046648, "learning_rate": 4.817921539215897e-08, "loss": 0.6022, "step": 53360 }, { "epoch": 0.9692357983437455, "grad_norm": 0.752618492208793, "learning_rate": 4.761583880552812e-08, "loss": 0.6041, "step": 53370 }, { "epoch": 0.9694174052012204, "grad_norm": 0.7699922708511353, "learning_rate": 4.705576763926245e-08, "loss": 0.6147, "step": 53380 }, { "epoch": 0.9695990120586954, "grad_norm": 0.8246994616230332, "learning_rate": 4.649900207937652e-08, "loss": 0.6126, "step": 53390 }, { "epoch": 0.9697806189161703, "grad_norm": 0.7812369980285275, "learning_rate": 4.594554231078241e-08, "loss": 0.6012, "step": 53400 }, { "epoch": 0.9699622257736452, "grad_norm": 0.7669969426609139, "learning_rate": 4.539538851729863e-08, "loss": 0.6059, "step": 53410 }, { "epoch": 0.9701438326311201, "grad_norm": 0.7660823331300521, "learning_rate": 4.4848540881643474e-08, "loss": 0.6213, "step": 53420 }, { "epoch": 0.9703254394885951, "grad_norm": 0.7568314467368003, "learning_rate": 4.4304999585439436e-08, "loss": 0.6131, "step": 53430 }, { "epoch": 0.97050704634607, "grad_norm": 0.7408383043795069, "learning_rate": 4.376476480920877e-08, "loss": 0.5984, "step": 53440 }, { "epoch": 0.9706886532035449, "grad_norm": 0.7534144221444321, "learning_rate": 4.3227836732374626e-08, "loss": 0.6039, "step": 53450 }, { "epoch": 0.97087026006102, "grad_norm": 0.7678202228267355, "learning_rate": 4.269421553326547e-08, "loss": 0.6061, "step": 53460 }, { "epoch": 0.9710518669184949, "grad_norm": 0.7739730172440257, "learning_rate": 4.2163901389107306e-08, "loss": 0.589, "step": 53470 }, { "epoch": 0.9712334737759698, "grad_norm": 0.7650878165346017, "learning_rate": 4.1636894476031474e-08, "loss": 0.6089, "step": 53480 }, { "epoch": 0.9714150806334447, "grad_norm": 0.777717733668128, "learning_rate": 4.111319496906907e-08, "loss": 0.6009, "step": 53490 }, { "epoch": 0.9715966874909197, "grad_norm": 0.7687307593853074, "learning_rate": 4.059280304215318e-08, "loss": 0.6058, "step": 53500 }, { "epoch": 0.9717782943483946, "grad_norm": 0.779303609738871, "learning_rate": 4.0075718868117785e-08, "loss": 0.6165, "step": 53510 }, { "epoch": 0.9719599012058695, "grad_norm": 0.7915301587877226, "learning_rate": 3.956194261869772e-08, "loss": 0.5992, "step": 53520 }, { "epoch": 0.9721415080633444, "grad_norm": 0.7516032531428389, "learning_rate": 3.9051474464532055e-08, "loss": 0.605, "step": 53530 }, { "epoch": 0.9723231149208195, "grad_norm": 0.7659154285091286, "learning_rate": 3.8544314575158504e-08, "loss": 0.595, "step": 53540 }, { "epoch": 0.9725047217782944, "grad_norm": 0.7292945949846524, "learning_rate": 3.804046311901566e-08, "loss": 0.5875, "step": 53550 }, { "epoch": 0.9726863286357693, "grad_norm": 0.742237278006761, "learning_rate": 3.753992026344633e-08, "loss": 0.6147, "step": 53560 }, { "epoch": 0.9728679354932442, "grad_norm": 0.7438268066943341, "learning_rate": 3.7042686174690866e-08, "loss": 0.6022, "step": 53570 }, { "epoch": 0.9730495423507192, "grad_norm": 0.7421516962517833, "learning_rate": 3.654876101789384e-08, "loss": 0.6119, "step": 53580 }, { "epoch": 0.9732311492081941, "grad_norm": 0.7391602013224629, "learning_rate": 3.605814495709847e-08, "loss": 0.5914, "step": 53590 }, { "epoch": 0.973412756065669, "grad_norm": 0.8074524494885074, "learning_rate": 3.557083815524998e-08, "loss": 0.595, "step": 53600 }, { "epoch": 0.9735943629231439, "grad_norm": 0.7628482103639609, "learning_rate": 3.508684077419333e-08, "loss": 0.611, "step": 53610 }, { "epoch": 0.9737759697806189, "grad_norm": 0.7541700176528331, "learning_rate": 3.460615297467773e-08, "loss": 0.5986, "step": 53620 }, { "epoch": 0.9739575766380939, "grad_norm": 0.7787936081135625, "learning_rate": 3.4128774916348805e-08, "loss": 0.6179, "step": 53630 }, { "epoch": 0.9741391834955688, "grad_norm": 0.7531258055090208, "learning_rate": 3.3654706757756394e-08, "loss": 0.5974, "step": 53640 }, { "epoch": 0.9743207903530438, "grad_norm": 0.7423696335007345, "learning_rate": 3.3183948656348994e-08, "loss": 0.6086, "step": 53650 }, { "epoch": 0.9745023972105187, "grad_norm": 0.763331971375791, "learning_rate": 3.27165007684771e-08, "loss": 0.6022, "step": 53660 }, { "epoch": 0.9746840040679936, "grad_norm": 0.7497406609013, "learning_rate": 3.225236324938985e-08, "loss": 0.6009, "step": 53670 }, { "epoch": 0.9748656109254685, "grad_norm": 0.7490793486072483, "learning_rate": 3.1791536253239494e-08, "loss": 0.5875, "step": 53680 }, { "epoch": 0.9750472177829435, "grad_norm": 0.756589904257844, "learning_rate": 3.133401993307694e-08, "loss": 0.6151, "step": 53690 }, { "epoch": 0.9752288246404184, "grad_norm": 0.7890417219296415, "learning_rate": 3.087981444085397e-08, "loss": 0.5969, "step": 53700 }, { "epoch": 0.9754104314978934, "grad_norm": 0.7393797581316708, "learning_rate": 3.042891992742325e-08, "loss": 0.605, "step": 53710 }, { "epoch": 0.9755920383553683, "grad_norm": 0.765438143513834, "learning_rate": 2.998133654253721e-08, "loss": 0.6164, "step": 53720 }, { "epoch": 0.9757736452128433, "grad_norm": 0.721355368186898, "learning_rate": 2.953706443484805e-08, "loss": 0.5944, "step": 53730 }, { "epoch": 0.9759552520703182, "grad_norm": 0.7729363417183932, "learning_rate": 2.9096103751909964e-08, "loss": 0.6164, "step": 53740 }, { "epoch": 0.9761368589277931, "grad_norm": 0.7368913780286569, "learning_rate": 2.8658454640176914e-08, "loss": 0.5977, "step": 53750 }, { "epoch": 0.976318465785268, "grad_norm": 0.7530207270476408, "learning_rate": 2.8224117245000405e-08, "loss": 0.607, "step": 53760 }, { "epoch": 0.976500072642743, "grad_norm": 0.7903366366252007, "learning_rate": 2.7793091710636157e-08, "loss": 0.6062, "step": 53770 }, { "epoch": 0.9766816795002179, "grad_norm": 0.7493932999572568, "learning_rate": 2.7365378180237433e-08, "loss": 0.6056, "step": 53780 }, { "epoch": 0.9768632863576928, "grad_norm": 0.739181885667686, "learning_rate": 2.6940976795856168e-08, "loss": 0.593, "step": 53790 }, { "epoch": 0.9770448932151679, "grad_norm": 0.7509343094283856, "learning_rate": 2.6519887698448488e-08, "loss": 0.6135, "step": 53800 }, { "epoch": 0.9772265000726428, "grad_norm": 0.7408571623354933, "learning_rate": 2.6102111027865862e-08, "loss": 0.6064, "step": 53810 }, { "epoch": 0.9774081069301177, "grad_norm": 0.7752707990406983, "learning_rate": 2.5687646922863963e-08, "loss": 0.61, "step": 53820 }, { "epoch": 0.9775897137875926, "grad_norm": 0.7329237034364863, "learning_rate": 2.5276495521093792e-08, "loss": 0.5992, "step": 53830 }, { "epoch": 0.9777713206450676, "grad_norm": 0.7251085238454384, "learning_rate": 2.486865695910945e-08, "loss": 0.5958, "step": 53840 }, { "epoch": 0.9779529275025425, "grad_norm": 0.7689159623899914, "learning_rate": 2.4464131372363696e-08, "loss": 0.6027, "step": 53850 }, { "epoch": 0.9781345343600174, "grad_norm": 0.752732289271732, "learning_rate": 2.4062918895209063e-08, "loss": 0.6107, "step": 53860 }, { "epoch": 0.9783161412174923, "grad_norm": 0.7532401085820033, "learning_rate": 2.366501966089674e-08, "loss": 0.593, "step": 53870 }, { "epoch": 0.9784977480749674, "grad_norm": 0.7366645455274542, "learning_rate": 2.3270433801579894e-08, "loss": 0.6208, "step": 53880 }, { "epoch": 0.9786793549324423, "grad_norm": 0.7404805924249628, "learning_rate": 2.287916144830926e-08, "loss": 0.5974, "step": 53890 }, { "epoch": 0.9788609617899172, "grad_norm": 0.7345277113446625, "learning_rate": 2.2491202731035333e-08, "loss": 0.5939, "step": 53900 }, { "epoch": 0.9790425686473921, "grad_norm": 0.7596002273204684, "learning_rate": 2.2106557778607262e-08, "loss": 0.6212, "step": 53910 }, { "epoch": 0.9792241755048671, "grad_norm": 0.8340543365410695, "learning_rate": 2.1725226718776195e-08, "loss": 0.6168, "step": 53920 }, { "epoch": 0.979405782362342, "grad_norm": 0.7352866779676072, "learning_rate": 2.1347209678190816e-08, "loss": 0.6018, "step": 53930 }, { "epoch": 0.9795873892198169, "grad_norm": 0.7619272749674798, "learning_rate": 2.0972506782398484e-08, "loss": 0.6059, "step": 53940 }, { "epoch": 0.9797689960772918, "grad_norm": 0.7322715595347119, "learning_rate": 2.060111815584853e-08, "loss": 0.6044, "step": 53950 }, { "epoch": 0.9799506029347668, "grad_norm": 0.7348434022373543, "learning_rate": 2.023304392188674e-08, "loss": 0.5969, "step": 53960 }, { "epoch": 0.9801322097922418, "grad_norm": 0.7388462868384463, "learning_rate": 1.9868284202758658e-08, "loss": 0.6094, "step": 53970 }, { "epoch": 0.9803138166497167, "grad_norm": 0.7482492294341978, "learning_rate": 1.950683911961071e-08, "loss": 0.6026, "step": 53980 }, { "epoch": 0.9804954235071917, "grad_norm": 0.7991403302729679, "learning_rate": 1.914870879248687e-08, "loss": 0.6071, "step": 53990 }, { "epoch": 0.9806770303646666, "grad_norm": 0.7382592442462071, "learning_rate": 1.8793893340330882e-08, "loss": 0.6052, "step": 54000 }, { "epoch": 0.9808586372221415, "grad_norm": 0.7567227343425391, "learning_rate": 1.844239288098404e-08, "loss": 0.6038, "step": 54010 }, { "epoch": 0.9810402440796164, "grad_norm": 0.7396790590369798, "learning_rate": 1.8094207531189624e-08, "loss": 0.6136, "step": 54020 }, { "epoch": 0.9812218509370914, "grad_norm": 0.7843602731174754, "learning_rate": 1.7749337406587353e-08, "loss": 0.6123, "step": 54030 }, { "epoch": 0.9814034577945663, "grad_norm": 0.7802740920706173, "learning_rate": 1.7407782621716717e-08, "loss": 0.5996, "step": 54040 }, { "epoch": 0.9815850646520412, "grad_norm": 0.7618181923827761, "learning_rate": 1.7069543290015865e-08, "loss": 0.597, "step": 54050 }, { "epoch": 0.9817666715095162, "grad_norm": 0.8021498135592913, "learning_rate": 1.6734619523821604e-08, "loss": 0.6057, "step": 54060 }, { "epoch": 0.9819482783669912, "grad_norm": 0.7204617986360827, "learning_rate": 1.640301143437162e-08, "loss": 0.6051, "step": 54070 }, { "epoch": 0.9821298852244661, "grad_norm": 0.7450650338788148, "learning_rate": 1.6074719131798922e-08, "loss": 0.6017, "step": 54080 }, { "epoch": 0.982311492081941, "grad_norm": 0.74225698249233, "learning_rate": 1.5749742725138516e-08, "loss": 0.6029, "step": 54090 }, { "epoch": 0.9824930989394159, "grad_norm": 0.7397952191092625, "learning_rate": 1.542808232232185e-08, "loss": 0.5939, "step": 54100 }, { "epoch": 0.9826747057968909, "grad_norm": 0.7722188539423609, "learning_rate": 1.510973803018012e-08, "loss": 0.6116, "step": 54110 }, { "epoch": 0.9828563126543658, "grad_norm": 0.724786112615658, "learning_rate": 1.47947099544421e-08, "loss": 0.5994, "step": 54120 }, { "epoch": 0.9830379195118407, "grad_norm": 0.7597220010791104, "learning_rate": 1.4482998199738529e-08, "loss": 0.5957, "step": 54130 }, { "epoch": 0.9832195263693158, "grad_norm": 0.7403477454514342, "learning_rate": 1.4174602869593268e-08, "loss": 0.5998, "step": 54140 }, { "epoch": 0.9834011332267907, "grad_norm": 0.760252927564641, "learning_rate": 1.386952406643327e-08, "loss": 0.608, "step": 54150 }, { "epoch": 0.9835827400842656, "grad_norm": 0.7255039262654326, "learning_rate": 1.3567761891581931e-08, "loss": 0.6029, "step": 54160 }, { "epoch": 0.9837643469417405, "grad_norm": 0.7741322130008426, "learning_rate": 1.3269316445261304e-08, "loss": 0.6005, "step": 54170 }, { "epoch": 0.9839459537992155, "grad_norm": 0.7856220812620303, "learning_rate": 1.29741878265921e-08, "loss": 0.6077, "step": 54180 }, { "epoch": 0.9841275606566904, "grad_norm": 0.7366757932412791, "learning_rate": 1.2682376133594799e-08, "loss": 0.5978, "step": 54190 }, { "epoch": 0.9843091675141653, "grad_norm": 0.7673810736143117, "learning_rate": 1.2393881463185209e-08, "loss": 0.6143, "step": 54200 }, { "epoch": 0.9844907743716402, "grad_norm": 0.7646315207804112, "learning_rate": 1.210870391118002e-08, "loss": 0.6102, "step": 54210 }, { "epoch": 0.9846723812291152, "grad_norm": 0.7758358848721127, "learning_rate": 1.1826843572293467e-08, "loss": 0.6041, "step": 54220 }, { "epoch": 0.9848539880865902, "grad_norm": 0.7411282948835914, "learning_rate": 1.1548300540137336e-08, "loss": 0.6017, "step": 54230 }, { "epoch": 0.9850355949440651, "grad_norm": 0.7337438640172114, "learning_rate": 1.1273074907223181e-08, "loss": 0.6217, "step": 54240 }, { "epoch": 0.98521720180154, "grad_norm": 0.7467649864162708, "learning_rate": 1.1001166764958992e-08, "loss": 0.6092, "step": 54250 }, { "epoch": 0.985398808659015, "grad_norm": 0.7371896580805545, "learning_rate": 1.0732576203652533e-08, "loss": 0.6038, "step": 54260 }, { "epoch": 0.9855804155164899, "grad_norm": 0.7458300287603546, "learning_rate": 1.0467303312508004e-08, "loss": 0.6028, "step": 54270 }, { "epoch": 0.9857620223739648, "grad_norm": 0.787495860908894, "learning_rate": 1.0205348179630482e-08, "loss": 0.5961, "step": 54280 }, { "epoch": 0.9859436292314397, "grad_norm": 0.7413365581970456, "learning_rate": 9.946710892020372e-09, "loss": 0.5993, "step": 54290 }, { "epoch": 0.9861252360889147, "grad_norm": 0.7526716953431392, "learning_rate": 9.691391535576743e-09, "loss": 0.61, "step": 54300 }, { "epoch": 0.9863068429463897, "grad_norm": 0.7739749268507472, "learning_rate": 9.43939019509843e-09, "loss": 0.6073, "step": 54310 }, { "epoch": 0.9864884498038646, "grad_norm": 0.7123245373249443, "learning_rate": 9.190706954279594e-09, "loss": 0.594, "step": 54320 }, { "epoch": 0.9866700566613396, "grad_norm": 0.7866631370287955, "learning_rate": 8.945341895715275e-09, "loss": 0.6104, "step": 54330 }, { "epoch": 0.9868516635188145, "grad_norm": 0.7474089673602864, "learning_rate": 8.703295100894738e-09, "loss": 0.6018, "step": 54340 }, { "epoch": 0.9870332703762894, "grad_norm": 0.7340700300228983, "learning_rate": 8.464566650210337e-09, "loss": 0.5934, "step": 54350 }, { "epoch": 0.9872148772337643, "grad_norm": 0.7764553479417879, "learning_rate": 8.229156622946433e-09, "loss": 0.6073, "step": 54360 }, { "epoch": 0.9873964840912393, "grad_norm": 0.7463716791744127, "learning_rate": 7.99706509729048e-09, "loss": 0.601, "step": 54370 }, { "epoch": 0.9875780909487142, "grad_norm": 0.7722846479563443, "learning_rate": 7.768292150324152e-09, "loss": 0.6027, "step": 54380 }, { "epoch": 0.9877596978061891, "grad_norm": 0.7780802684730613, "learning_rate": 7.542837858030005e-09, "loss": 0.6081, "step": 54390 }, { "epoch": 0.9879413046636641, "grad_norm": 0.7552363002883841, "learning_rate": 7.320702295284809e-09, "loss": 0.6061, "step": 54400 }, { "epoch": 0.9881229115211391, "grad_norm": 0.7237438977041529, "learning_rate": 7.101885535865105e-09, "loss": 0.5998, "step": 54410 }, { "epoch": 0.988304518378614, "grad_norm": 0.7420112221071468, "learning_rate": 6.886387652446091e-09, "loss": 0.6143, "step": 54420 }, { "epoch": 0.9884861252360889, "grad_norm": 0.7584553655424141, "learning_rate": 6.674208716599407e-09, "loss": 0.595, "step": 54430 }, { "epoch": 0.9886677320935638, "grad_norm": 0.7819903531292393, "learning_rate": 6.465348798794235e-09, "loss": 0.5935, "step": 54440 }, { "epoch": 0.9888493389510388, "grad_norm": 0.772486040968641, "learning_rate": 6.259807968398424e-09, "loss": 0.607, "step": 54450 }, { "epoch": 0.9890309458085137, "grad_norm": 0.771456279885689, "learning_rate": 6.057586293677365e-09, "loss": 0.5983, "step": 54460 }, { "epoch": 0.9892125526659886, "grad_norm": 0.7388752659282082, "learning_rate": 5.858683841791779e-09, "loss": 0.5956, "step": 54470 }, { "epoch": 0.9893941595234637, "grad_norm": 0.7455446271605896, "learning_rate": 5.663100678803268e-09, "loss": 0.6038, "step": 54480 }, { "epoch": 0.9895757663809386, "grad_norm": 0.7411597660122727, "learning_rate": 5.470836869669871e-09, "loss": 0.5986, "step": 54490 }, { "epoch": 0.9897573732384135, "grad_norm": 0.7622413526250306, "learning_rate": 5.281892478244954e-09, "loss": 0.6109, "step": 54500 }, { "epoch": 0.9899389800958884, "grad_norm": 0.8049919071583729, "learning_rate": 5.096267567283874e-09, "loss": 0.6221, "step": 54510 }, { "epoch": 0.9901205869533634, "grad_norm": 0.746441488204805, "learning_rate": 4.9139621984362065e-09, "loss": 0.603, "step": 54520 }, { "epoch": 0.9903021938108383, "grad_norm": 0.8237076674560222, "learning_rate": 4.734976432247962e-09, "loss": 0.6115, "step": 54530 }, { "epoch": 0.9904838006683132, "grad_norm": 0.7312499928339171, "learning_rate": 4.559310328167144e-09, "loss": 0.6027, "step": 54540 }, { "epoch": 0.9906654075257881, "grad_norm": 0.7521584998080433, "learning_rate": 4.3869639445359715e-09, "loss": 0.6035, "step": 54550 }, { "epoch": 0.9908470143832631, "grad_norm": 0.7547431415534962, "learning_rate": 4.217937338593103e-09, "loss": 0.6169, "step": 54560 }, { "epoch": 0.9910286212407381, "grad_norm": 0.7456075258499745, "learning_rate": 4.052230566478077e-09, "loss": 0.5976, "step": 54570 }, { "epoch": 0.991210228098213, "grad_norm": 0.75442261977029, "learning_rate": 3.889843683225758e-09, "loss": 0.6027, "step": 54580 }, { "epoch": 0.991391834955688, "grad_norm": 0.7700059465427482, "learning_rate": 3.73077674276745e-09, "loss": 0.6143, "step": 54590 }, { "epoch": 0.9915734418131629, "grad_norm": 0.7466255891451408, "learning_rate": 3.575029797934226e-09, "loss": 0.6034, "step": 54600 }, { "epoch": 0.9917550486706378, "grad_norm": 0.7461092024803662, "learning_rate": 3.4226029004524875e-09, "loss": 0.6076, "step": 54610 }, { "epoch": 0.9919366555281127, "grad_norm": 0.7232842543795769, "learning_rate": 3.2734961009472934e-09, "loss": 0.6065, "step": 54620 }, { "epoch": 0.9921182623855876, "grad_norm": 0.7367075566767728, "learning_rate": 3.1277094489401417e-09, "loss": 0.6136, "step": 54630 }, { "epoch": 0.9922998692430626, "grad_norm": 0.7716010178939078, "learning_rate": 2.9852429928511896e-09, "loss": 0.5941, "step": 54640 }, { "epoch": 0.9924814761005376, "grad_norm": 0.7713908717588421, "learning_rate": 2.8460967799948116e-09, "loss": 0.6166, "step": 54650 }, { "epoch": 0.9926630829580125, "grad_norm": 0.7249910257922757, "learning_rate": 2.7102708565873714e-09, "loss": 0.6012, "step": 54660 }, { "epoch": 0.9928446898154875, "grad_norm": 0.7628225034511561, "learning_rate": 2.5777652677383415e-09, "loss": 0.6092, "step": 54670 }, { "epoch": 0.9930262966729624, "grad_norm": 0.775613343032088, "learning_rate": 2.4485800574558514e-09, "loss": 0.5933, "step": 54680 }, { "epoch": 0.9932079035304373, "grad_norm": 0.7657361306979431, "learning_rate": 2.3227152686455813e-09, "loss": 0.6063, "step": 54690 }, { "epoch": 0.9933895103879122, "grad_norm": 0.7670080986314056, "learning_rate": 2.2001709431096473e-09, "loss": 0.6, "step": 54700 }, { "epoch": 0.9935711172453872, "grad_norm": 0.7144901092685801, "learning_rate": 2.0809471215488266e-09, "loss": 0.604, "step": 54710 }, { "epoch": 0.9937527241028621, "grad_norm": 0.7568815383902628, "learning_rate": 1.9650438435603324e-09, "loss": 0.6024, "step": 54720 }, { "epoch": 0.993934330960337, "grad_norm": 0.7830831696775129, "learning_rate": 1.8524611476378184e-09, "loss": 0.5981, "step": 54730 }, { "epoch": 0.994115937817812, "grad_norm": 0.7559774920422405, "learning_rate": 1.7431990711724856e-09, "loss": 0.6207, "step": 54740 }, { "epoch": 0.994297544675287, "grad_norm": 0.7237697616332017, "learning_rate": 1.6372576504530835e-09, "loss": 0.6161, "step": 54750 }, { "epoch": 0.9944791515327619, "grad_norm": 0.7981425519572788, "learning_rate": 1.5346369206648004e-09, "loss": 0.6086, "step": 54760 }, { "epoch": 0.9946607583902368, "grad_norm": 0.7429782325016008, "learning_rate": 1.435336915890373e-09, "loss": 0.5923, "step": 54770 }, { "epoch": 0.9948423652477117, "grad_norm": 0.760288160965475, "learning_rate": 1.3393576691100862e-09, "loss": 0.6009, "step": 54780 }, { "epoch": 0.9950239721051867, "grad_norm": 0.768530337922834, "learning_rate": 1.246699212201774e-09, "loss": 0.6046, "step": 54790 }, { "epoch": 0.9952055789626616, "grad_norm": 0.7639932219244694, "learning_rate": 1.157361575937488e-09, "loss": 0.6187, "step": 54800 }, { "epoch": 0.9953871858201365, "grad_norm": 0.7493449701752253, "learning_rate": 1.0713447899901586e-09, "loss": 0.6092, "step": 54810 }, { "epoch": 0.9955687926776116, "grad_norm": 0.7350160153769278, "learning_rate": 9.886488829269348e-10, "loss": 0.6087, "step": 54820 }, { "epoch": 0.9957503995350865, "grad_norm": 0.759490188942006, "learning_rate": 9.092738822136237e-10, "loss": 0.6034, "step": 54830 }, { "epoch": 0.9959320063925614, "grad_norm": 0.7555065486515099, "learning_rate": 8.332198142124715e-10, "loss": 0.598, "step": 54840 }, { "epoch": 0.9961136132500363, "grad_norm": 0.7898008822924586, "learning_rate": 7.604867041821617e-10, "loss": 0.6127, "step": 54850 }, { "epoch": 0.9962952201075113, "grad_norm": 0.7592380430882975, "learning_rate": 6.910745762800375e-10, "loss": 0.6034, "step": 54860 }, { "epoch": 0.9964768269649862, "grad_norm": 0.750676089512398, "learning_rate": 6.249834535587695e-10, "loss": 0.6055, "step": 54870 }, { "epoch": 0.9966584338224611, "grad_norm": 0.7527736854758721, "learning_rate": 5.622133579685773e-10, "loss": 0.6077, "step": 54880 }, { "epoch": 0.996840040679936, "grad_norm": 0.7793546557992068, "learning_rate": 5.027643103572288e-10, "loss": 0.6007, "step": 54890 }, { "epoch": 0.997021647537411, "grad_norm": 0.7333626923943967, "learning_rate": 4.4663633047004053e-10, "loss": 0.6115, "step": 54900 }, { "epoch": 0.997203254394886, "grad_norm": 0.7870734847565505, "learning_rate": 3.9382943694654674e-10, "loss": 0.6071, "step": 54910 }, { "epoch": 0.9973848612523609, "grad_norm": 0.7235024157080154, "learning_rate": 3.4434364732716106e-10, "loss": 0.6019, "step": 54920 }, { "epoch": 0.9975664681098358, "grad_norm": 0.755169166916736, "learning_rate": 2.9817897804540473e-10, "loss": 0.6013, "step": 54930 }, { "epoch": 0.9977480749673108, "grad_norm": 0.7968480578719229, "learning_rate": 2.5533544443567815e-10, "loss": 0.6023, "step": 54940 }, { "epoch": 0.9979296818247857, "grad_norm": 0.7888371723060097, "learning_rate": 2.1581306072548937e-10, "loss": 0.6115, "step": 54950 }, { "epoch": 0.9981112886822606, "grad_norm": 0.7586988008088237, "learning_rate": 1.796118400421154e-10, "loss": 0.604, "step": 54960 }, { "epoch": 0.9982928955397355, "grad_norm": 0.7689666893619106, "learning_rate": 1.4673179440816142e-10, "loss": 0.6046, "step": 54970 }, { "epoch": 0.9984745023972105, "grad_norm": 0.7719059431129934, "learning_rate": 1.1717293474489134e-10, "loss": 0.6013, "step": 54980 }, { "epoch": 0.9986561092546855, "grad_norm": 0.7308905149018582, "learning_rate": 9.093527086889708e-11, "loss": 0.5977, "step": 54990 }, { "epoch": 0.9988377161121604, "grad_norm": 0.803797148082756, "learning_rate": 6.801881149431921e-11, "loss": 0.6107, "step": 55000 }, { "epoch": 0.9990193229696354, "grad_norm": 0.7403417245085911, "learning_rate": 4.842356423173655e-11, "loss": 0.6029, "step": 55010 }, { "epoch": 0.9992009298271103, "grad_norm": 0.7657196138658173, "learning_rate": 3.214953559038669e-11, "loss": 0.6145, "step": 55020 }, { "epoch": 0.9993825366845852, "grad_norm": 0.770839420797249, "learning_rate": 1.9196730973725098e-11, "loss": 0.5998, "step": 55030 }, { "epoch": 0.9995641435420601, "grad_norm": 0.7771573906100528, "learning_rate": 9.565154684976208e-12, "loss": 0.6082, "step": 55040 }, { "epoch": 0.9997457503995351, "grad_norm": 0.7733834485520078, "learning_rate": 3.254809922692559e-12, "loss": 0.5993, "step": 55050 }, { "epoch": 0.99992735725701, "grad_norm": 0.7383575316289198, "learning_rate": 2.656987829752211e-13, "loss": 0.6117, "step": 55060 }, { "epoch": 1.0, "eval_loss": 0.5037005543708801, "eval_runtime": 20.479, "eval_samples_per_second": 41.799, "eval_steps_per_second": 1.318, "step": 55064 }, { "epoch": 1.0, "step": 55064, "total_flos": 1.2575785360765747e+17, "train_loss": 0.6373805527283185, "train_runtime": 394515.55, "train_samples_per_second": 8.933, "train_steps_per_second": 0.14 } ], "logging_steps": 10, "max_steps": 55064, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2575785360765747e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }