{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 46554, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.1480431327061047e-05, "grad_norm": 26.65201832627396, "learning_rate": 4.291845493562232e-08, "loss": 2.0281, "step": 1 }, { "epoch": 0.0002148043132706105, "grad_norm": 24.545838847916603, "learning_rate": 4.291845493562232e-07, "loss": 1.9707, "step": 10 }, { "epoch": 0.000429608626541221, "grad_norm": 6.925669336747074, "learning_rate": 8.583690987124464e-07, "loss": 1.7361, "step": 20 }, { "epoch": 0.0006444129398118315, "grad_norm": 3.563926800405336, "learning_rate": 1.2875536480686696e-06, "loss": 1.5248, "step": 30 }, { "epoch": 0.000859217253082442, "grad_norm": 2.7917255910937255, "learning_rate": 1.7167381974248929e-06, "loss": 1.3065, "step": 40 }, { "epoch": 0.0010740215663530524, "grad_norm": 1.3127546793240747, "learning_rate": 2.145922746781116e-06, "loss": 1.2055, "step": 50 }, { "epoch": 0.001288825879623663, "grad_norm": 1.1166152222590124, "learning_rate": 2.575107296137339e-06, "loss": 1.1276, "step": 60 }, { "epoch": 0.0015036301928942734, "grad_norm": 1.1067088125847406, "learning_rate": 3.004291845493563e-06, "loss": 1.111, "step": 70 }, { "epoch": 0.001718434506164884, "grad_norm": 1.0213495251064038, "learning_rate": 3.4334763948497858e-06, "loss": 1.0633, "step": 80 }, { "epoch": 0.0019332388194354942, "grad_norm": 1.4698974676460892, "learning_rate": 3.862660944206009e-06, "loss": 1.0363, "step": 90 }, { "epoch": 0.002148043132706105, "grad_norm": 1.1069539300208826, "learning_rate": 4.291845493562232e-06, "loss": 1.0595, "step": 100 }, { "epoch": 0.002362847445976715, "grad_norm": 1.0428861367575017, "learning_rate": 4.721030042918455e-06, "loss": 1.0544, "step": 110 }, { "epoch": 0.002577651759247326, "grad_norm": 1.0155126146726958, "learning_rate": 5.150214592274678e-06, "loss": 1.0247, "step": 120 }, { "epoch": 0.002792456072517936, "grad_norm": 1.0457307269517822, "learning_rate": 5.579399141630901e-06, "loss": 1.0231, "step": 130 }, { "epoch": 0.003007260385788547, "grad_norm": 0.9747222232804185, "learning_rate": 6.008583690987126e-06, "loss": 1.0112, "step": 140 }, { "epoch": 0.003222064699059157, "grad_norm": 1.0485056497719716, "learning_rate": 6.437768240343349e-06, "loss": 0.9848, "step": 150 }, { "epoch": 0.003436869012329768, "grad_norm": 1.015067491069871, "learning_rate": 6.8669527896995715e-06, "loss": 1.0101, "step": 160 }, { "epoch": 0.003651673325600378, "grad_norm": 0.9810404079330955, "learning_rate": 7.296137339055794e-06, "loss": 0.981, "step": 170 }, { "epoch": 0.0038664776388709883, "grad_norm": 1.018895365287239, "learning_rate": 7.725321888412017e-06, "loss": 0.9754, "step": 180 }, { "epoch": 0.004081281952141599, "grad_norm": 1.079857560560485, "learning_rate": 8.154506437768241e-06, "loss": 0.9996, "step": 190 }, { "epoch": 0.00429608626541221, "grad_norm": 0.9967859001475669, "learning_rate": 8.583690987124465e-06, "loss": 0.9952, "step": 200 }, { "epoch": 0.00451089057868282, "grad_norm": 1.115201831741858, "learning_rate": 9.012875536480687e-06, "loss": 0.9742, "step": 210 }, { "epoch": 0.00472569489195343, "grad_norm": 0.9523509165576094, "learning_rate": 9.44206008583691e-06, "loss": 0.9831, "step": 220 }, { "epoch": 0.004940499205224041, "grad_norm": 1.005436826590897, "learning_rate": 9.871244635193133e-06, "loss": 0.9773, "step": 230 }, { "epoch": 0.005155303518494652, "grad_norm": 0.9901616620650135, "learning_rate": 1.0300429184549356e-05, "loss": 0.9781, "step": 240 }, { "epoch": 0.0053701078317652615, "grad_norm": 1.0638485393442876, "learning_rate": 1.072961373390558e-05, "loss": 0.96, "step": 250 }, { "epoch": 0.005584912145035872, "grad_norm": 0.9964178960246284, "learning_rate": 1.1158798283261802e-05, "loss": 0.9742, "step": 260 }, { "epoch": 0.005799716458306483, "grad_norm": 1.0370493768439413, "learning_rate": 1.1587982832618028e-05, "loss": 0.9602, "step": 270 }, { "epoch": 0.006014520771577094, "grad_norm": 0.9796931302576042, "learning_rate": 1.2017167381974251e-05, "loss": 0.9632, "step": 280 }, { "epoch": 0.0062293250848477035, "grad_norm": 1.031038089029062, "learning_rate": 1.2446351931330473e-05, "loss": 0.9593, "step": 290 }, { "epoch": 0.006444129398118314, "grad_norm": 0.9515929962341028, "learning_rate": 1.2875536480686697e-05, "loss": 0.949, "step": 300 }, { "epoch": 0.006658933711388925, "grad_norm": 0.9520559803996252, "learning_rate": 1.330472103004292e-05, "loss": 0.9457, "step": 310 }, { "epoch": 0.006873738024659536, "grad_norm": 1.3890037359682255, "learning_rate": 1.3733905579399143e-05, "loss": 0.9584, "step": 320 }, { "epoch": 0.007088542337930145, "grad_norm": 0.9169319166076477, "learning_rate": 1.4163090128755365e-05, "loss": 0.9464, "step": 330 }, { "epoch": 0.007303346651200756, "grad_norm": 0.9772519097125956, "learning_rate": 1.4592274678111589e-05, "loss": 0.9614, "step": 340 }, { "epoch": 0.007518150964471367, "grad_norm": 1.1706529511376937, "learning_rate": 1.5021459227467811e-05, "loss": 0.9553, "step": 350 }, { "epoch": 0.007732955277741977, "grad_norm": 0.9436703190905599, "learning_rate": 1.5450643776824035e-05, "loss": 0.9417, "step": 360 }, { "epoch": 0.007947759591012588, "grad_norm": 0.8941917268650862, "learning_rate": 1.587982832618026e-05, "loss": 0.9382, "step": 370 }, { "epoch": 0.008162563904283198, "grad_norm": 0.9682494688789302, "learning_rate": 1.6309012875536482e-05, "loss": 0.9636, "step": 380 }, { "epoch": 0.008377368217553808, "grad_norm": 0.968118858319082, "learning_rate": 1.6738197424892706e-05, "loss": 0.9406, "step": 390 }, { "epoch": 0.00859217253082442, "grad_norm": 0.9304992691801565, "learning_rate": 1.716738197424893e-05, "loss": 0.9467, "step": 400 }, { "epoch": 0.00880697684409503, "grad_norm": 0.8846995721706429, "learning_rate": 1.7596566523605153e-05, "loss": 0.9552, "step": 410 }, { "epoch": 0.00902178115736564, "grad_norm": 1.0342299734011187, "learning_rate": 1.8025751072961374e-05, "loss": 0.9485, "step": 420 }, { "epoch": 0.00923658547063625, "grad_norm": 1.051909712027358, "learning_rate": 1.8454935622317597e-05, "loss": 0.9635, "step": 430 }, { "epoch": 0.00945138978390686, "grad_norm": 1.0942808627738883, "learning_rate": 1.888412017167382e-05, "loss": 0.9421, "step": 440 }, { "epoch": 0.009666194097177472, "grad_norm": 0.8646650135642147, "learning_rate": 1.9313304721030045e-05, "loss": 0.977, "step": 450 }, { "epoch": 0.009880998410448082, "grad_norm": 0.9215653231258697, "learning_rate": 1.9742489270386265e-05, "loss": 0.9571, "step": 460 }, { "epoch": 0.010095802723718692, "grad_norm": 0.9199665564587306, "learning_rate": 1.999999962828165e-05, "loss": 0.9356, "step": 470 }, { "epoch": 0.010310607036989303, "grad_norm": 0.9425113636574227, "learning_rate": 1.9999995446450485e-05, "loss": 0.9624, "step": 480 }, { "epoch": 0.010525411350259913, "grad_norm": 0.9337828020549753, "learning_rate": 1.999998661814216e-05, "loss": 0.9468, "step": 490 }, { "epoch": 0.010740215663530523, "grad_norm": 0.927989701189411, "learning_rate": 1.9999973143360788e-05, "loss": 0.9396, "step": 500 }, { "epoch": 0.010955019976801135, "grad_norm": 0.9324290313999429, "learning_rate": 1.999995502211262e-05, "loss": 0.9563, "step": 510 }, { "epoch": 0.011169824290071744, "grad_norm": 0.9434102759361815, "learning_rate": 1.9999932254406077e-05, "loss": 0.9609, "step": 520 }, { "epoch": 0.011384628603342354, "grad_norm": 0.8724473565087025, "learning_rate": 1.9999904840251743e-05, "loss": 0.9427, "step": 530 }, { "epoch": 0.011599432916612966, "grad_norm": 0.8942612386723021, "learning_rate": 1.9999872779662347e-05, "loss": 0.9245, "step": 540 }, { "epoch": 0.011814237229883576, "grad_norm": 0.9928300421934061, "learning_rate": 1.9999836072652794e-05, "loss": 0.932, "step": 550 }, { "epoch": 0.012029041543154187, "grad_norm": 0.8947389195896635, "learning_rate": 1.999979471924014e-05, "loss": 0.945, "step": 560 }, { "epoch": 0.012243845856424797, "grad_norm": 0.8728388436746601, "learning_rate": 1.9999748719443594e-05, "loss": 0.9508, "step": 570 }, { "epoch": 0.012458650169695407, "grad_norm": 1.0931390263840937, "learning_rate": 1.9999698073284534e-05, "loss": 0.9334, "step": 580 }, { "epoch": 0.012673454482966019, "grad_norm": 0.8867248563676206, "learning_rate": 1.9999642780786486e-05, "loss": 0.9352, "step": 590 }, { "epoch": 0.012888258796236628, "grad_norm": 0.8378993692657936, "learning_rate": 1.9999582841975152e-05, "loss": 0.9211, "step": 600 }, { "epoch": 0.013103063109507238, "grad_norm": 0.944765913779485, "learning_rate": 1.9999518256878377e-05, "loss": 0.9455, "step": 610 }, { "epoch": 0.01331786742277785, "grad_norm": 1.110589470050176, "learning_rate": 1.999944902552617e-05, "loss": 0.9312, "step": 620 }, { "epoch": 0.01353267173604846, "grad_norm": 0.93740820263563, "learning_rate": 1.9999375147950698e-05, "loss": 0.9476, "step": 630 }, { "epoch": 0.013747476049319071, "grad_norm": 0.8693919640628258, "learning_rate": 1.999929662418629e-05, "loss": 0.9397, "step": 640 }, { "epoch": 0.013962280362589681, "grad_norm": 0.821330072078906, "learning_rate": 1.9999213454269433e-05, "loss": 0.9387, "step": 650 }, { "epoch": 0.01417708467586029, "grad_norm": 0.8706987017404653, "learning_rate": 1.999912563823877e-05, "loss": 0.9293, "step": 660 }, { "epoch": 0.014391888989130902, "grad_norm": 0.8517170283338585, "learning_rate": 1.9999033176135104e-05, "loss": 0.9359, "step": 670 }, { "epoch": 0.014606693302401512, "grad_norm": 0.8579178853579097, "learning_rate": 1.99989360680014e-05, "loss": 0.9063, "step": 680 }, { "epoch": 0.014821497615672122, "grad_norm": 0.8866957779531295, "learning_rate": 1.999883431388278e-05, "loss": 0.9354, "step": 690 }, { "epoch": 0.015036301928942734, "grad_norm": 0.8870565536110462, "learning_rate": 1.9998727913826516e-05, "loss": 0.9366, "step": 700 }, { "epoch": 0.015251106242213344, "grad_norm": 0.8987989639943288, "learning_rate": 1.9998616867882052e-05, "loss": 0.9369, "step": 710 }, { "epoch": 0.015465910555483953, "grad_norm": 0.8110762813161089, "learning_rate": 1.9998501176100987e-05, "loss": 0.9267, "step": 720 }, { "epoch": 0.015680714868754563, "grad_norm": 0.8393357992971544, "learning_rate": 1.9998380838537075e-05, "loss": 0.9275, "step": 730 }, { "epoch": 0.015895519182025176, "grad_norm": 0.9524547482458253, "learning_rate": 1.999825585524623e-05, "loss": 0.9252, "step": 740 }, { "epoch": 0.016110323495295786, "grad_norm": 0.8155389716366475, "learning_rate": 1.9998126226286528e-05, "loss": 0.9147, "step": 750 }, { "epoch": 0.016325127808566396, "grad_norm": 0.8231072792576088, "learning_rate": 1.9997991951718196e-05, "loss": 0.9276, "step": 760 }, { "epoch": 0.016539932121837006, "grad_norm": 0.8125297140263947, "learning_rate": 1.9997853031603628e-05, "loss": 0.9345, "step": 770 }, { "epoch": 0.016754736435107616, "grad_norm": 0.9347609633412168, "learning_rate": 1.9997709466007374e-05, "loss": 0.9139, "step": 780 }, { "epoch": 0.01696954074837823, "grad_norm": 0.8167131053353975, "learning_rate": 1.9997561254996138e-05, "loss": 0.9258, "step": 790 }, { "epoch": 0.01718434506164884, "grad_norm": 0.8141997003853896, "learning_rate": 1.9997408398638785e-05, "loss": 0.9169, "step": 800 }, { "epoch": 0.01739914937491945, "grad_norm": 0.8020228216788807, "learning_rate": 1.9997250897006345e-05, "loss": 0.9114, "step": 810 }, { "epoch": 0.01761395368819006, "grad_norm": 0.7977580905585953, "learning_rate": 1.9997088750171995e-05, "loss": 0.9243, "step": 820 }, { "epoch": 0.01782875800146067, "grad_norm": 0.8518531375273793, "learning_rate": 1.999692195821108e-05, "loss": 0.9191, "step": 830 }, { "epoch": 0.01804356231473128, "grad_norm": 0.8006041023630638, "learning_rate": 1.99967505212011e-05, "loss": 0.9152, "step": 840 }, { "epoch": 0.01825836662800189, "grad_norm": 0.793159791876333, "learning_rate": 1.9996574439221708e-05, "loss": 0.9235, "step": 850 }, { "epoch": 0.0184731709412725, "grad_norm": 0.798911293614745, "learning_rate": 1.9996393712354724e-05, "loss": 0.907, "step": 860 }, { "epoch": 0.01868797525454311, "grad_norm": 0.7876487421020302, "learning_rate": 1.9996208340684124e-05, "loss": 0.9073, "step": 870 }, { "epoch": 0.01890277956781372, "grad_norm": 0.7656899260637665, "learning_rate": 1.9996018324296036e-05, "loss": 0.9191, "step": 880 }, { "epoch": 0.01911758388108433, "grad_norm": 0.8200150022936012, "learning_rate": 1.9995823663278753e-05, "loss": 0.9144, "step": 890 }, { "epoch": 0.019332388194354944, "grad_norm": 0.8730487914144835, "learning_rate": 1.9995624357722726e-05, "loss": 0.9239, "step": 900 }, { "epoch": 0.019547192507625554, "grad_norm": 0.8281237932100032, "learning_rate": 1.9995420407720556e-05, "loss": 0.9205, "step": 910 }, { "epoch": 0.019761996820896164, "grad_norm": 0.8232205349274369, "learning_rate": 1.9995211813367014e-05, "loss": 0.919, "step": 920 }, { "epoch": 0.019976801134166774, "grad_norm": 0.8147366319849692, "learning_rate": 1.9994998574759023e-05, "loss": 0.907, "step": 930 }, { "epoch": 0.020191605447437384, "grad_norm": 0.7976853731174323, "learning_rate": 1.9994780691995658e-05, "loss": 0.9112, "step": 940 }, { "epoch": 0.020406409760707993, "grad_norm": 0.8274499776650804, "learning_rate": 1.9994558165178165e-05, "loss": 0.9228, "step": 950 }, { "epoch": 0.020621214073978607, "grad_norm": 0.824749615058281, "learning_rate": 1.9994330994409932e-05, "loss": 0.9331, "step": 960 }, { "epoch": 0.020836018387249217, "grad_norm": 0.7904631449154701, "learning_rate": 1.999409917979652e-05, "loss": 0.91, "step": 970 }, { "epoch": 0.021050822700519826, "grad_norm": 0.8284138971940564, "learning_rate": 1.999386272144564e-05, "loss": 0.9163, "step": 980 }, { "epoch": 0.021265627013790436, "grad_norm": 0.8529599065323894, "learning_rate": 1.9993621619467163e-05, "loss": 0.9098, "step": 990 }, { "epoch": 0.021480431327061046, "grad_norm": 0.8034510770514283, "learning_rate": 1.999337587397311e-05, "loss": 0.9221, "step": 1000 }, { "epoch": 0.02169523564033166, "grad_norm": 0.7866973908662558, "learning_rate": 1.9993125485077675e-05, "loss": 0.9284, "step": 1010 }, { "epoch": 0.02191003995360227, "grad_norm": 0.8772012374367091, "learning_rate": 1.9992870452897195e-05, "loss": 0.9145, "step": 1020 }, { "epoch": 0.02212484426687288, "grad_norm": 0.7704037718521081, "learning_rate": 1.9992610777550174e-05, "loss": 0.9022, "step": 1030 }, { "epoch": 0.02233964858014349, "grad_norm": 0.8139905784316902, "learning_rate": 1.999234645915727e-05, "loss": 0.899, "step": 1040 }, { "epoch": 0.0225544528934141, "grad_norm": 0.8035022366722004, "learning_rate": 1.999207749784129e-05, "loss": 0.9119, "step": 1050 }, { "epoch": 0.02276925720668471, "grad_norm": 0.8018634228953854, "learning_rate": 1.9991803893727213e-05, "loss": 0.913, "step": 1060 }, { "epoch": 0.022984061519955322, "grad_norm": 0.8762806370784909, "learning_rate": 1.9991525646942167e-05, "loss": 0.8927, "step": 1070 }, { "epoch": 0.023198865833225932, "grad_norm": 0.8434215687788776, "learning_rate": 1.999124275761544e-05, "loss": 0.9035, "step": 1080 }, { "epoch": 0.02341367014649654, "grad_norm": 0.8323692540509068, "learning_rate": 1.9990955225878475e-05, "loss": 0.9184, "step": 1090 }, { "epoch": 0.02362847445976715, "grad_norm": 0.8062273577144896, "learning_rate": 1.9990663051864875e-05, "loss": 0.9158, "step": 1100 }, { "epoch": 0.02384327877303776, "grad_norm": 0.7741118350909906, "learning_rate": 1.9990366235710392e-05, "loss": 0.8994, "step": 1110 }, { "epoch": 0.024058083086308375, "grad_norm": 0.8409438928193937, "learning_rate": 1.9990064777552947e-05, "loss": 0.9039, "step": 1120 }, { "epoch": 0.024272887399578984, "grad_norm": 0.8040510324600977, "learning_rate": 1.9989758677532613e-05, "loss": 0.9153, "step": 1130 }, { "epoch": 0.024487691712849594, "grad_norm": 0.7860147216474672, "learning_rate": 1.9989447935791616e-05, "loss": 0.9111, "step": 1140 }, { "epoch": 0.024702496026120204, "grad_norm": 0.7955991962712436, "learning_rate": 1.9989132552474335e-05, "loss": 0.9148, "step": 1150 }, { "epoch": 0.024917300339390814, "grad_norm": 1.305671067059314, "learning_rate": 1.9988812527727324e-05, "loss": 0.9093, "step": 1160 }, { "epoch": 0.025132104652661424, "grad_norm": 0.7217672239649101, "learning_rate": 1.9988487861699277e-05, "loss": 0.9105, "step": 1170 }, { "epoch": 0.025346908965932037, "grad_norm": 6.271674767342428, "learning_rate": 1.9988158554541047e-05, "loss": 0.9047, "step": 1180 }, { "epoch": 0.025561713279202647, "grad_norm": 0.8349965377102029, "learning_rate": 1.9987824606405647e-05, "loss": 0.914, "step": 1190 }, { "epoch": 0.025776517592473257, "grad_norm": 0.8221862837050625, "learning_rate": 1.9987486017448245e-05, "loss": 0.9111, "step": 1200 }, { "epoch": 0.025991321905743867, "grad_norm": 0.7579519216571478, "learning_rate": 1.998714278782617e-05, "loss": 0.9148, "step": 1210 }, { "epoch": 0.026206126219014476, "grad_norm": 1.220350327147385, "learning_rate": 1.9986794917698894e-05, "loss": 0.9209, "step": 1220 }, { "epoch": 0.02642093053228509, "grad_norm": 0.765028183579955, "learning_rate": 1.9986442407228064e-05, "loss": 0.908, "step": 1230 }, { "epoch": 0.0266357348455557, "grad_norm": 0.7816681258072063, "learning_rate": 1.998608525657747e-05, "loss": 0.9077, "step": 1240 }, { "epoch": 0.02685053915882631, "grad_norm": 0.8249985245317925, "learning_rate": 1.9985723465913058e-05, "loss": 0.9021, "step": 1250 }, { "epoch": 0.02706534347209692, "grad_norm": 0.7570103217659379, "learning_rate": 1.9985357035402937e-05, "loss": 0.8967, "step": 1260 }, { "epoch": 0.02728014778536753, "grad_norm": 0.7550746594070511, "learning_rate": 1.998498596521737e-05, "loss": 0.9088, "step": 1270 }, { "epoch": 0.027494952098638142, "grad_norm": 0.74593111439136, "learning_rate": 1.998461025552876e-05, "loss": 0.914, "step": 1280 }, { "epoch": 0.027709756411908752, "grad_norm": 0.7538206481587928, "learning_rate": 1.99842299065117e-05, "loss": 0.892, "step": 1290 }, { "epoch": 0.027924560725179362, "grad_norm": 0.7808275060158067, "learning_rate": 1.9983844918342907e-05, "loss": 0.9034, "step": 1300 }, { "epoch": 0.028139365038449972, "grad_norm": 0.7539145866225501, "learning_rate": 1.9983455291201267e-05, "loss": 0.8867, "step": 1310 }, { "epoch": 0.02835416935172058, "grad_norm": 0.8633072463687512, "learning_rate": 1.9983061025267815e-05, "loss": 0.9009, "step": 1320 }, { "epoch": 0.02856897366499119, "grad_norm": 0.7733521101210006, "learning_rate": 1.9982662120725753e-05, "loss": 0.912, "step": 1330 }, { "epoch": 0.028783777978261805, "grad_norm": 0.7673039038449831, "learning_rate": 1.998225857776043e-05, "loss": 0.8992, "step": 1340 }, { "epoch": 0.028998582291532415, "grad_norm": 0.7714006517358715, "learning_rate": 1.9981850396559348e-05, "loss": 0.8889, "step": 1350 }, { "epoch": 0.029213386604803025, "grad_norm": 0.8178218009457255, "learning_rate": 1.9981437577312167e-05, "loss": 0.9063, "step": 1360 }, { "epoch": 0.029428190918073634, "grad_norm": 0.7794714859641063, "learning_rate": 1.998102012021071e-05, "loss": 0.9023, "step": 1370 }, { "epoch": 0.029642995231344244, "grad_norm": 0.8496853423209075, "learning_rate": 1.998059802544894e-05, "loss": 0.8999, "step": 1380 }, { "epoch": 0.029857799544614858, "grad_norm": 0.7714878599883224, "learning_rate": 1.9980171293222982e-05, "loss": 0.8896, "step": 1390 }, { "epoch": 0.030072603857885467, "grad_norm": 0.7795267501311487, "learning_rate": 1.9979739923731125e-05, "loss": 0.9123, "step": 1400 }, { "epoch": 0.030287408171156077, "grad_norm": 0.7765636777336408, "learning_rate": 1.9979303917173793e-05, "loss": 0.8967, "step": 1410 }, { "epoch": 0.030502212484426687, "grad_norm": 0.7854049103534458, "learning_rate": 1.9978863273753583e-05, "loss": 0.8921, "step": 1420 }, { "epoch": 0.030717016797697297, "grad_norm": 0.7525748519317893, "learning_rate": 1.9978417993675236e-05, "loss": 0.9078, "step": 1430 }, { "epoch": 0.030931821110967907, "grad_norm": 0.76321986852774, "learning_rate": 1.997796807714565e-05, "loss": 0.9121, "step": 1440 }, { "epoch": 0.03114662542423852, "grad_norm": 0.7619696286583564, "learning_rate": 1.9977513524373878e-05, "loss": 0.8986, "step": 1450 }, { "epoch": 0.031361429737509126, "grad_norm": 0.8025471522668453, "learning_rate": 1.997705433557113e-05, "loss": 0.8827, "step": 1460 }, { "epoch": 0.031576234050779736, "grad_norm": 0.7763997567061249, "learning_rate": 1.9976590510950766e-05, "loss": 0.9019, "step": 1470 }, { "epoch": 0.03179103836405035, "grad_norm": 0.7413217078674147, "learning_rate": 1.99761220507283e-05, "loss": 0.8864, "step": 1480 }, { "epoch": 0.03200584267732096, "grad_norm": 0.7457795342330259, "learning_rate": 1.99756489551214e-05, "loss": 0.8927, "step": 1490 }, { "epoch": 0.03222064699059157, "grad_norm": 0.7177239737436911, "learning_rate": 1.997517122434989e-05, "loss": 0.8664, "step": 1500 }, { "epoch": 0.03243545130386218, "grad_norm": 0.729910325595943, "learning_rate": 1.9974688858635748e-05, "loss": 0.8872, "step": 1510 }, { "epoch": 0.03265025561713279, "grad_norm": 0.734760164934118, "learning_rate": 1.9974201858203098e-05, "loss": 0.902, "step": 1520 }, { "epoch": 0.0328650599304034, "grad_norm": 0.7602546032074973, "learning_rate": 1.9973710223278236e-05, "loss": 0.8948, "step": 1530 }, { "epoch": 0.03307986424367401, "grad_norm": 0.7562657004832819, "learning_rate": 1.9973213954089586e-05, "loss": 0.8869, "step": 1540 }, { "epoch": 0.03329466855694462, "grad_norm": 0.7774738334729859, "learning_rate": 1.9972713050867745e-05, "loss": 0.9045, "step": 1550 }, { "epoch": 0.03350947287021523, "grad_norm": 0.7681125250896116, "learning_rate": 1.997220751384546e-05, "loss": 0.9013, "step": 1560 }, { "epoch": 0.03372427718348584, "grad_norm": 0.7339330432465896, "learning_rate": 1.997169734325762e-05, "loss": 0.9007, "step": 1570 }, { "epoch": 0.03393908149675646, "grad_norm": 0.7065402914181205, "learning_rate": 1.9971182539341284e-05, "loss": 0.8787, "step": 1580 }, { "epoch": 0.03415388581002707, "grad_norm": 0.7801128948603702, "learning_rate": 1.9970663102335645e-05, "loss": 0.9026, "step": 1590 }, { "epoch": 0.03436869012329768, "grad_norm": 0.788227685604599, "learning_rate": 1.997013903248206e-05, "loss": 0.8949, "step": 1600 }, { "epoch": 0.03458349443656829, "grad_norm": 0.7478323289786412, "learning_rate": 1.9969610330024043e-05, "loss": 0.8807, "step": 1610 }, { "epoch": 0.0347982987498389, "grad_norm": 0.7594500146074927, "learning_rate": 1.996907699520725e-05, "loss": 0.897, "step": 1620 }, { "epoch": 0.03501310306310951, "grad_norm": 0.7557561178768684, "learning_rate": 1.9968539028279493e-05, "loss": 0.8817, "step": 1630 }, { "epoch": 0.03522790737638012, "grad_norm": 0.7705545022363263, "learning_rate": 1.9967996429490738e-05, "loss": 0.8975, "step": 1640 }, { "epoch": 0.03544271168965073, "grad_norm": 0.7586938534256935, "learning_rate": 1.9967449199093104e-05, "loss": 0.9046, "step": 1650 }, { "epoch": 0.03565751600292134, "grad_norm": 0.7431761255095913, "learning_rate": 1.996689733734086e-05, "loss": 0.8873, "step": 1660 }, { "epoch": 0.03587232031619195, "grad_norm": 0.7208377192401944, "learning_rate": 1.9966340844490427e-05, "loss": 0.8785, "step": 1670 }, { "epoch": 0.03608712462946256, "grad_norm": 0.7506584753315972, "learning_rate": 1.9965779720800383e-05, "loss": 0.8684, "step": 1680 }, { "epoch": 0.03630192894273317, "grad_norm": 0.7391526908534235, "learning_rate": 1.9965213966531443e-05, "loss": 0.8925, "step": 1690 }, { "epoch": 0.03651673325600378, "grad_norm": 0.7088294334006218, "learning_rate": 1.996464358194649e-05, "loss": 0.8812, "step": 1700 }, { "epoch": 0.03673153756927439, "grad_norm": 0.7199397102089583, "learning_rate": 1.9964068567310552e-05, "loss": 0.9021, "step": 1710 }, { "epoch": 0.036946341882545, "grad_norm": 0.7386582371472061, "learning_rate": 1.996348892289081e-05, "loss": 0.8847, "step": 1720 }, { "epoch": 0.03716114619581561, "grad_norm": 0.7508464385857595, "learning_rate": 1.9962904648956585e-05, "loss": 0.8753, "step": 1730 }, { "epoch": 0.03737595050908622, "grad_norm": 0.7470256078534874, "learning_rate": 1.9962315745779372e-05, "loss": 0.8963, "step": 1740 }, { "epoch": 0.03759075482235683, "grad_norm": 0.7418761066889206, "learning_rate": 1.9961722213632794e-05, "loss": 0.8963, "step": 1750 }, { "epoch": 0.03780555913562744, "grad_norm": 0.7616668970329441, "learning_rate": 1.9961124052792636e-05, "loss": 0.8795, "step": 1760 }, { "epoch": 0.03802036344889805, "grad_norm": 0.8233002415920407, "learning_rate": 1.996052126353684e-05, "loss": 0.8716, "step": 1770 }, { "epoch": 0.03823516776216866, "grad_norm": 0.7383776636172839, "learning_rate": 1.995991384614548e-05, "loss": 0.8813, "step": 1780 }, { "epoch": 0.03844997207543927, "grad_norm": 0.7101146565285336, "learning_rate": 1.9959301800900795e-05, "loss": 0.8744, "step": 1790 }, { "epoch": 0.03866477638870989, "grad_norm": 0.7797901881755945, "learning_rate": 1.9958685128087175e-05, "loss": 0.8739, "step": 1800 }, { "epoch": 0.0388795807019805, "grad_norm": 0.7607914365027031, "learning_rate": 1.995806382799115e-05, "loss": 0.8769, "step": 1810 }, { "epoch": 0.03909438501525111, "grad_norm": 0.7186167152089645, "learning_rate": 1.9957437900901408e-05, "loss": 0.8728, "step": 1820 }, { "epoch": 0.03930918932852172, "grad_norm": 0.7500460414609829, "learning_rate": 1.9956807347108787e-05, "loss": 0.886, "step": 1830 }, { "epoch": 0.03952399364179233, "grad_norm": 0.8448420531394487, "learning_rate": 1.9956172166906267e-05, "loss": 0.876, "step": 1840 }, { "epoch": 0.03973879795506294, "grad_norm": 0.7616239143421761, "learning_rate": 1.9955532360588986e-05, "loss": 0.8731, "step": 1850 }, { "epoch": 0.03995360226833355, "grad_norm": 0.7516220625045015, "learning_rate": 1.9954887928454232e-05, "loss": 0.8895, "step": 1860 }, { "epoch": 0.04016840658160416, "grad_norm": 0.6940486511611167, "learning_rate": 1.9954238870801434e-05, "loss": 0.8816, "step": 1870 }, { "epoch": 0.04038321089487477, "grad_norm": 0.7207106268872732, "learning_rate": 1.995358518793218e-05, "loss": 0.8786, "step": 1880 }, { "epoch": 0.04059801520814538, "grad_norm": 0.7003859763965479, "learning_rate": 1.99529268801502e-05, "loss": 0.8909, "step": 1890 }, { "epoch": 0.04081281952141599, "grad_norm": 0.7143855349325927, "learning_rate": 1.995226394776137e-05, "loss": 0.876, "step": 1900 }, { "epoch": 0.041027623834686604, "grad_norm": 1.027903124400268, "learning_rate": 1.9951596391073732e-05, "loss": 0.8845, "step": 1910 }, { "epoch": 0.041242428147957214, "grad_norm": 0.7561863103739813, "learning_rate": 1.9950924210397453e-05, "loss": 0.8805, "step": 1920 }, { "epoch": 0.04145723246122782, "grad_norm": 0.7070451516725674, "learning_rate": 1.9950247406044867e-05, "loss": 0.8866, "step": 1930 }, { "epoch": 0.04167203677449843, "grad_norm": 0.7414269087308623, "learning_rate": 1.9949565978330447e-05, "loss": 0.8967, "step": 1940 }, { "epoch": 0.04188684108776904, "grad_norm": 0.7282024119626752, "learning_rate": 1.9948879927570823e-05, "loss": 0.883, "step": 1950 }, { "epoch": 0.04210164540103965, "grad_norm": 0.7460528091826555, "learning_rate": 1.9948189254084758e-05, "loss": 0.8701, "step": 1960 }, { "epoch": 0.04231644971431026, "grad_norm": 0.6963758847916436, "learning_rate": 1.9947493958193176e-05, "loss": 0.8748, "step": 1970 }, { "epoch": 0.04253125402758087, "grad_norm": 0.7069516648127419, "learning_rate": 1.9946794040219147e-05, "loss": 0.8832, "step": 1980 }, { "epoch": 0.04274605834085148, "grad_norm": 0.746440584689567, "learning_rate": 1.9946089500487888e-05, "loss": 0.8733, "step": 1990 }, { "epoch": 0.04296086265412209, "grad_norm": 0.7146001602504596, "learning_rate": 1.994538033932675e-05, "loss": 0.88, "step": 2000 }, { "epoch": 0.0431756669673927, "grad_norm": 0.8314467011125141, "learning_rate": 1.9944666557065257e-05, "loss": 0.8876, "step": 2010 }, { "epoch": 0.04339047128066332, "grad_norm": 0.8015245377512796, "learning_rate": 1.994394815403506e-05, "loss": 0.9035, "step": 2020 }, { "epoch": 0.04360527559393393, "grad_norm": 0.7222397980014357, "learning_rate": 1.9943225130569967e-05, "loss": 0.8846, "step": 2030 }, { "epoch": 0.04382007990720454, "grad_norm": 0.9345503125138953, "learning_rate": 1.9942497487005922e-05, "loss": 0.875, "step": 2040 }, { "epoch": 0.04403488422047515, "grad_norm": 0.777701186759307, "learning_rate": 1.9941765223681032e-05, "loss": 0.8886, "step": 2050 }, { "epoch": 0.04424968853374576, "grad_norm": 0.7503022644965333, "learning_rate": 1.9941028340935536e-05, "loss": 0.8579, "step": 2060 }, { "epoch": 0.04446449284701637, "grad_norm": 0.719946551023971, "learning_rate": 1.9940286839111832e-05, "loss": 0.8802, "step": 2070 }, { "epoch": 0.04467929716028698, "grad_norm": 0.749093812327845, "learning_rate": 1.9939540718554445e-05, "loss": 0.8834, "step": 2080 }, { "epoch": 0.04489410147355759, "grad_norm": 0.7422208424583762, "learning_rate": 1.993878997961007e-05, "loss": 0.8853, "step": 2090 }, { "epoch": 0.0451089057868282, "grad_norm": 0.7374573183269133, "learning_rate": 1.993803462262753e-05, "loss": 0.891, "step": 2100 }, { "epoch": 0.04532371010009881, "grad_norm": 0.7350782974678935, "learning_rate": 1.99372746479578e-05, "loss": 0.9003, "step": 2110 }, { "epoch": 0.04553851441336942, "grad_norm": 0.7281319667850538, "learning_rate": 1.9936510055954002e-05, "loss": 0.8683, "step": 2120 }, { "epoch": 0.045753318726640034, "grad_norm": 0.7965856691084933, "learning_rate": 1.9935740846971404e-05, "loss": 0.8818, "step": 2130 }, { "epoch": 0.045968123039910644, "grad_norm": 0.7049379609150542, "learning_rate": 1.9934967021367417e-05, "loss": 0.8873, "step": 2140 }, { "epoch": 0.046182927353181254, "grad_norm": 0.6866133102632582, "learning_rate": 1.9934188579501596e-05, "loss": 0.8799, "step": 2150 }, { "epoch": 0.046397731666451864, "grad_norm": 0.7086840811762911, "learning_rate": 1.993340552173564e-05, "loss": 0.8724, "step": 2160 }, { "epoch": 0.04661253597972247, "grad_norm": 0.7205449804498884, "learning_rate": 1.99326178484334e-05, "loss": 0.8906, "step": 2170 }, { "epoch": 0.04682734029299308, "grad_norm": 0.7271300532378566, "learning_rate": 1.9931825559960867e-05, "loss": 0.8739, "step": 2180 }, { "epoch": 0.04704214460626369, "grad_norm": 0.782395233795208, "learning_rate": 1.993102865668617e-05, "loss": 0.8766, "step": 2190 }, { "epoch": 0.0472569489195343, "grad_norm": 0.7344948505203559, "learning_rate": 1.9930227138979595e-05, "loss": 0.8825, "step": 2200 }, { "epoch": 0.04747175323280491, "grad_norm": 0.6708701924983489, "learning_rate": 1.992942100721356e-05, "loss": 0.8659, "step": 2210 }, { "epoch": 0.04768655754607552, "grad_norm": 0.6843127757880155, "learning_rate": 1.992861026176264e-05, "loss": 0.8689, "step": 2220 }, { "epoch": 0.04790136185934613, "grad_norm": 0.7601120607749401, "learning_rate": 1.992779490300354e-05, "loss": 0.8809, "step": 2230 }, { "epoch": 0.04811616617261675, "grad_norm": 0.7771578485534071, "learning_rate": 1.9926974931315114e-05, "loss": 0.874, "step": 2240 }, { "epoch": 0.04833097048588736, "grad_norm": 0.6677106942558194, "learning_rate": 1.9926150347078363e-05, "loss": 0.8715, "step": 2250 }, { "epoch": 0.04854577479915797, "grad_norm": 0.6825562101263428, "learning_rate": 1.9925321150676426e-05, "loss": 0.8747, "step": 2260 }, { "epoch": 0.04876057911242858, "grad_norm": 0.7870531733277122, "learning_rate": 1.992448734249459e-05, "loss": 0.876, "step": 2270 }, { "epoch": 0.04897538342569919, "grad_norm": 0.7487545797908844, "learning_rate": 1.9923648922920284e-05, "loss": 0.8842, "step": 2280 }, { "epoch": 0.0491901877389698, "grad_norm": 0.7615635965125641, "learning_rate": 1.9922805892343073e-05, "loss": 0.8758, "step": 2290 }, { "epoch": 0.04940499205224041, "grad_norm": 0.7376131287808475, "learning_rate": 1.9921958251154673e-05, "loss": 0.8767, "step": 2300 }, { "epoch": 0.04961979636551102, "grad_norm": 0.7011021964691581, "learning_rate": 1.9921105999748937e-05, "loss": 0.8785, "step": 2310 }, { "epoch": 0.04983460067878163, "grad_norm": 0.6991405501900602, "learning_rate": 1.9920249138521864e-05, "loss": 0.8659, "step": 2320 }, { "epoch": 0.05004940499205224, "grad_norm": 0.8709849609023845, "learning_rate": 1.9919387667871586e-05, "loss": 0.8831, "step": 2330 }, { "epoch": 0.05026420930532285, "grad_norm": 0.7150713210870077, "learning_rate": 1.9918521588198395e-05, "loss": 0.8758, "step": 2340 }, { "epoch": 0.050479013618593464, "grad_norm": 0.7286077482741432, "learning_rate": 1.9917650899904704e-05, "loss": 0.8807, "step": 2350 }, { "epoch": 0.050693817931864074, "grad_norm": 0.7213279597207197, "learning_rate": 1.9916775603395078e-05, "loss": 0.8884, "step": 2360 }, { "epoch": 0.050908622245134684, "grad_norm": 0.7135061250388653, "learning_rate": 1.9915895699076224e-05, "loss": 0.8759, "step": 2370 }, { "epoch": 0.051123426558405294, "grad_norm": 0.7313831738364597, "learning_rate": 1.9915011187356988e-05, "loss": 0.857, "step": 2380 }, { "epoch": 0.051338230871675904, "grad_norm": 0.6876477115175337, "learning_rate": 1.991412206864835e-05, "loss": 0.8572, "step": 2390 }, { "epoch": 0.051553035184946513, "grad_norm": 0.6945309755588273, "learning_rate": 1.9913228343363448e-05, "loss": 0.8652, "step": 2400 }, { "epoch": 0.05176783949821712, "grad_norm": 0.704316576659832, "learning_rate": 1.991233001191754e-05, "loss": 0.8601, "step": 2410 }, { "epoch": 0.05198264381148773, "grad_norm": 0.6985006179988886, "learning_rate": 1.9911427074728043e-05, "loss": 0.8859, "step": 2420 }, { "epoch": 0.05219744812475834, "grad_norm": 0.7524066390625885, "learning_rate": 1.9910519532214498e-05, "loss": 0.8892, "step": 2430 }, { "epoch": 0.05241225243802895, "grad_norm": 0.6901836096027437, "learning_rate": 1.990960738479859e-05, "loss": 0.8846, "step": 2440 }, { "epoch": 0.05262705675129956, "grad_norm": 0.6849358351981636, "learning_rate": 1.9908690632904157e-05, "loss": 0.8634, "step": 2450 }, { "epoch": 0.05284186106457018, "grad_norm": 0.765461828081668, "learning_rate": 1.9907769276957156e-05, "loss": 0.8746, "step": 2460 }, { "epoch": 0.05305666537784079, "grad_norm": 0.7679215224483907, "learning_rate": 1.9906843317385696e-05, "loss": 0.8717, "step": 2470 }, { "epoch": 0.0532714696911114, "grad_norm": 0.7102215793244879, "learning_rate": 1.9905912754620028e-05, "loss": 0.8768, "step": 2480 }, { "epoch": 0.05348627400438201, "grad_norm": 0.7328716303001253, "learning_rate": 1.9904977589092526e-05, "loss": 0.8762, "step": 2490 }, { "epoch": 0.05370107831765262, "grad_norm": 0.6873393080849327, "learning_rate": 1.990403782123772e-05, "loss": 0.8666, "step": 2500 }, { "epoch": 0.05391588263092323, "grad_norm": 0.7147008262903141, "learning_rate": 1.990309345149227e-05, "loss": 0.8702, "step": 2510 }, { "epoch": 0.05413068694419384, "grad_norm": 0.7258515311880707, "learning_rate": 1.990214448029497e-05, "loss": 0.861, "step": 2520 }, { "epoch": 0.05434549125746445, "grad_norm": 0.6864484714517523, "learning_rate": 1.9901190908086768e-05, "loss": 0.8607, "step": 2530 }, { "epoch": 0.05456029557073506, "grad_norm": 0.7219498116874772, "learning_rate": 1.9900232735310732e-05, "loss": 0.8636, "step": 2540 }, { "epoch": 0.05477509988400567, "grad_norm": 0.7132672252604229, "learning_rate": 1.9899269962412075e-05, "loss": 0.8785, "step": 2550 }, { "epoch": 0.054989904197276285, "grad_norm": 0.6983739180648069, "learning_rate": 1.989830258983815e-05, "loss": 0.8688, "step": 2560 }, { "epoch": 0.055204708510546895, "grad_norm": 0.7061201979674048, "learning_rate": 1.9897330618038443e-05, "loss": 0.8771, "step": 2570 }, { "epoch": 0.055419512823817504, "grad_norm": 0.7219202198100194, "learning_rate": 1.9896354047464578e-05, "loss": 0.8875, "step": 2580 }, { "epoch": 0.055634317137088114, "grad_norm": 0.8868550476280311, "learning_rate": 1.9895372878570314e-05, "loss": 0.8623, "step": 2590 }, { "epoch": 0.055849121450358724, "grad_norm": 0.7475858890792529, "learning_rate": 1.989438711181156e-05, "loss": 0.852, "step": 2600 }, { "epoch": 0.056063925763629334, "grad_norm": 0.6861827188596874, "learning_rate": 1.9893396747646337e-05, "loss": 0.8919, "step": 2610 }, { "epoch": 0.056278730076899944, "grad_norm": 0.6806996838562623, "learning_rate": 1.9892401786534827e-05, "loss": 0.8581, "step": 2620 }, { "epoch": 0.056493534390170554, "grad_norm": 0.6852549300278078, "learning_rate": 1.989140222893933e-05, "loss": 0.8697, "step": 2630 }, { "epoch": 0.05670833870344116, "grad_norm": 0.6657649574580968, "learning_rate": 1.989039807532429e-05, "loss": 0.8807, "step": 2640 }, { "epoch": 0.05692314301671177, "grad_norm": 0.6905459875216616, "learning_rate": 1.988938932615628e-05, "loss": 0.8673, "step": 2650 }, { "epoch": 0.05713794732998238, "grad_norm": 0.7020389616663805, "learning_rate": 1.9888375981904024e-05, "loss": 0.8641, "step": 2660 }, { "epoch": 0.057352751643253, "grad_norm": 0.6916187469122672, "learning_rate": 1.988735804303836e-05, "loss": 0.8621, "step": 2670 }, { "epoch": 0.05756755595652361, "grad_norm": 0.7314594201824384, "learning_rate": 1.988633551003228e-05, "loss": 0.879, "step": 2680 }, { "epoch": 0.05778236026979422, "grad_norm": 0.746171027813674, "learning_rate": 1.9885308383360894e-05, "loss": 0.8707, "step": 2690 }, { "epoch": 0.05799716458306483, "grad_norm": 0.6634180978048528, "learning_rate": 1.988427666350146e-05, "loss": 0.8598, "step": 2700 }, { "epoch": 0.05821196889633544, "grad_norm": 0.6453064675883513, "learning_rate": 1.988324035093336e-05, "loss": 0.8683, "step": 2710 }, { "epoch": 0.05842677320960605, "grad_norm": 0.6912271165294316, "learning_rate": 1.9882199446138116e-05, "loss": 0.8746, "step": 2720 }, { "epoch": 0.05864157752287666, "grad_norm": 0.7643854365177722, "learning_rate": 1.9881153949599384e-05, "loss": 0.8744, "step": 2730 }, { "epoch": 0.05885638183614727, "grad_norm": 0.6767199632505446, "learning_rate": 1.988010386180295e-05, "loss": 0.8516, "step": 2740 }, { "epoch": 0.05907118614941788, "grad_norm": 0.7108698181797346, "learning_rate": 1.9879049183236735e-05, "loss": 0.8695, "step": 2750 }, { "epoch": 0.05928599046268849, "grad_norm": 0.7253238005438297, "learning_rate": 1.9877989914390794e-05, "loss": 0.8555, "step": 2760 }, { "epoch": 0.0595007947759591, "grad_norm": 0.6453538421152363, "learning_rate": 1.9876926055757316e-05, "loss": 0.8633, "step": 2770 }, { "epoch": 0.059715599089229715, "grad_norm": 0.7205543033570071, "learning_rate": 1.9875857607830616e-05, "loss": 0.8684, "step": 2780 }, { "epoch": 0.059930403402500325, "grad_norm": 0.756612819272997, "learning_rate": 1.987478457110715e-05, "loss": 0.8544, "step": 2790 }, { "epoch": 0.060145207715770935, "grad_norm": 0.7043547954742904, "learning_rate": 1.9873706946085504e-05, "loss": 0.8622, "step": 2800 }, { "epoch": 0.060360012029041545, "grad_norm": 0.7593440635743444, "learning_rate": 1.9872624733266386e-05, "loss": 0.8834, "step": 2810 }, { "epoch": 0.060574816342312154, "grad_norm": 0.6863494468953989, "learning_rate": 1.9871537933152653e-05, "loss": 0.8633, "step": 2820 }, { "epoch": 0.060789620655582764, "grad_norm": 0.7138487939483891, "learning_rate": 1.987044654624928e-05, "loss": 0.8594, "step": 2830 }, { "epoch": 0.061004424968853374, "grad_norm": 0.6981415113485295, "learning_rate": 1.9869350573063376e-05, "loss": 0.8677, "step": 2840 }, { "epoch": 0.061219229282123984, "grad_norm": 0.7075330565366764, "learning_rate": 1.9868250014104187e-05, "loss": 0.8682, "step": 2850 }, { "epoch": 0.061434033595394594, "grad_norm": 0.714624614534178, "learning_rate": 1.9867144869883083e-05, "loss": 0.8628, "step": 2860 }, { "epoch": 0.061648837908665204, "grad_norm": 0.7137186386058576, "learning_rate": 1.9866035140913568e-05, "loss": 0.8521, "step": 2870 }, { "epoch": 0.06186364222193581, "grad_norm": 0.6880470582910433, "learning_rate": 1.9864920827711273e-05, "loss": 0.8615, "step": 2880 }, { "epoch": 0.06207844653520643, "grad_norm": 0.6631751007947058, "learning_rate": 1.9863801930793966e-05, "loss": 0.8684, "step": 2890 }, { "epoch": 0.06229325084847704, "grad_norm": 0.680905465307777, "learning_rate": 1.9862678450681537e-05, "loss": 0.8573, "step": 2900 }, { "epoch": 0.06250805516174765, "grad_norm": 0.722687320949144, "learning_rate": 1.9861550387896007e-05, "loss": 0.8573, "step": 2910 }, { "epoch": 0.06272285947501825, "grad_norm": 0.707058027911016, "learning_rate": 1.986041774296153e-05, "loss": 0.8623, "step": 2920 }, { "epoch": 0.06293766378828887, "grad_norm": 0.6766536125382012, "learning_rate": 1.9859280516404387e-05, "loss": 0.862, "step": 2930 }, { "epoch": 0.06315246810155947, "grad_norm": 0.7142428076724684, "learning_rate": 1.985813870875299e-05, "loss": 0.8769, "step": 2940 }, { "epoch": 0.06336727241483009, "grad_norm": 0.6712747602182263, "learning_rate": 1.9856992320537872e-05, "loss": 0.8651, "step": 2950 }, { "epoch": 0.0635820767281007, "grad_norm": 0.7333177718791057, "learning_rate": 1.9855841352291705e-05, "loss": 0.8496, "step": 2960 }, { "epoch": 0.06379688104137131, "grad_norm": 0.6738403800576495, "learning_rate": 1.9854685804549282e-05, "loss": 0.8679, "step": 2970 }, { "epoch": 0.06401168535464193, "grad_norm": 0.6926446949090426, "learning_rate": 1.985352567784753e-05, "loss": 0.8737, "step": 2980 }, { "epoch": 0.06422648966791253, "grad_norm": 0.6819388635508017, "learning_rate": 1.985236097272549e-05, "loss": 0.8433, "step": 2990 }, { "epoch": 0.06444129398118315, "grad_norm": 0.6578041236132836, "learning_rate": 1.985119168972435e-05, "loss": 0.8419, "step": 3000 }, { "epoch": 0.06465609829445375, "grad_norm": 0.6561607204214681, "learning_rate": 1.9850017829387406e-05, "loss": 0.8539, "step": 3010 }, { "epoch": 0.06487090260772437, "grad_norm": 0.693338421429239, "learning_rate": 1.98488393922601e-05, "loss": 0.8482, "step": 3020 }, { "epoch": 0.06508570692099497, "grad_norm": 0.7241505791029577, "learning_rate": 1.984765637888998e-05, "loss": 0.8506, "step": 3030 }, { "epoch": 0.06530051123426558, "grad_norm": 0.6726045381693205, "learning_rate": 1.9846468789826737e-05, "loss": 0.8605, "step": 3040 }, { "epoch": 0.0655153155475362, "grad_norm": 0.7093889099822586, "learning_rate": 1.984527662562218e-05, "loss": 0.8651, "step": 3050 }, { "epoch": 0.0657301198608068, "grad_norm": 0.6510030697876009, "learning_rate": 1.9844079886830246e-05, "loss": 0.8641, "step": 3060 }, { "epoch": 0.06594492417407742, "grad_norm": 0.6560638118957208, "learning_rate": 1.9842878574007e-05, "loss": 0.874, "step": 3070 }, { "epoch": 0.06615972848734802, "grad_norm": 0.7134647721402186, "learning_rate": 1.9841672687710624e-05, "loss": 0.8528, "step": 3080 }, { "epoch": 0.06637453280061864, "grad_norm": 0.6922835771738086, "learning_rate": 1.9840462228501432e-05, "loss": 0.8609, "step": 3090 }, { "epoch": 0.06658933711388924, "grad_norm": 0.6573245546694103, "learning_rate": 1.9839247196941862e-05, "loss": 0.8644, "step": 3100 }, { "epoch": 0.06680414142715986, "grad_norm": 0.6864765270164866, "learning_rate": 1.9838027593596477e-05, "loss": 0.8633, "step": 3110 }, { "epoch": 0.06701894574043046, "grad_norm": 0.6508340450389928, "learning_rate": 1.9836803419031964e-05, "loss": 0.8486, "step": 3120 }, { "epoch": 0.06723375005370108, "grad_norm": 0.6712340814966278, "learning_rate": 1.983557467381713e-05, "loss": 0.8576, "step": 3130 }, { "epoch": 0.06744855436697168, "grad_norm": 0.6946743953968081, "learning_rate": 1.9834341358522914e-05, "loss": 0.8615, "step": 3140 }, { "epoch": 0.0676633586802423, "grad_norm": 0.6631456581489633, "learning_rate": 1.9833103473722366e-05, "loss": 0.8616, "step": 3150 }, { "epoch": 0.06787816299351292, "grad_norm": 0.7015972542735149, "learning_rate": 1.983186101999067e-05, "loss": 0.8616, "step": 3160 }, { "epoch": 0.06809296730678352, "grad_norm": 0.6897254667864308, "learning_rate": 1.9830613997905136e-05, "loss": 0.865, "step": 3170 }, { "epoch": 0.06830777162005414, "grad_norm": 0.6577402763346296, "learning_rate": 1.982936240804518e-05, "loss": 0.8544, "step": 3180 }, { "epoch": 0.06852257593332474, "grad_norm": 0.6890291309672536, "learning_rate": 1.9828106250992353e-05, "loss": 0.8645, "step": 3190 }, { "epoch": 0.06873738024659536, "grad_norm": 0.703215862838126, "learning_rate": 1.982684552733033e-05, "loss": 0.8728, "step": 3200 }, { "epoch": 0.06895218455986596, "grad_norm": 0.6827501388608269, "learning_rate": 1.9825580237644903e-05, "loss": 0.8602, "step": 3210 }, { "epoch": 0.06916698887313658, "grad_norm": 0.697650000738595, "learning_rate": 1.9824310382523982e-05, "loss": 0.8726, "step": 3220 }, { "epoch": 0.06938179318640718, "grad_norm": 0.6721960819494287, "learning_rate": 1.9823035962557608e-05, "loss": 0.8605, "step": 3230 }, { "epoch": 0.0695965974996778, "grad_norm": 0.7154662455928354, "learning_rate": 1.9821756978337935e-05, "loss": 0.8643, "step": 3240 }, { "epoch": 0.0698114018129484, "grad_norm": 0.681240118026375, "learning_rate": 1.9820473430459237e-05, "loss": 0.8621, "step": 3250 }, { "epoch": 0.07002620612621901, "grad_norm": 0.6580326580613558, "learning_rate": 1.9819185319517915e-05, "loss": 0.8466, "step": 3260 }, { "epoch": 0.07024101043948963, "grad_norm": 0.6710767809283256, "learning_rate": 1.981789264611249e-05, "loss": 0.8659, "step": 3270 }, { "epoch": 0.07045581475276023, "grad_norm": 0.6593001195695262, "learning_rate": 1.9816595410843596e-05, "loss": 0.8663, "step": 3280 }, { "epoch": 0.07067061906603085, "grad_norm": 0.6628941061395422, "learning_rate": 1.981529361431399e-05, "loss": 0.8766, "step": 3290 }, { "epoch": 0.07088542337930145, "grad_norm": 0.6706542522918096, "learning_rate": 1.9813987257128552e-05, "loss": 0.8515, "step": 3300 }, { "epoch": 0.07110022769257207, "grad_norm": 0.7081735675645245, "learning_rate": 1.981267633989428e-05, "loss": 0.8536, "step": 3310 }, { "epoch": 0.07131503200584267, "grad_norm": 0.6631068395510924, "learning_rate": 1.981136086322028e-05, "loss": 0.8595, "step": 3320 }, { "epoch": 0.07152983631911329, "grad_norm": 0.6815244729783259, "learning_rate": 1.98100408277178e-05, "loss": 0.8452, "step": 3330 }, { "epoch": 0.0717446406323839, "grad_norm": 0.6708767643583214, "learning_rate": 1.9808716234000176e-05, "loss": 0.8646, "step": 3340 }, { "epoch": 0.07195944494565451, "grad_norm": 0.7070823636265635, "learning_rate": 1.9807387082682888e-05, "loss": 0.8596, "step": 3350 }, { "epoch": 0.07217424925892511, "grad_norm": 0.6385205053779432, "learning_rate": 1.980605337438352e-05, "loss": 0.8596, "step": 3360 }, { "epoch": 0.07238905357219573, "grad_norm": 0.6900045332944192, "learning_rate": 1.9804715109721773e-05, "loss": 0.8455, "step": 3370 }, { "epoch": 0.07260385788546635, "grad_norm": 0.6952726462768112, "learning_rate": 1.980337228931948e-05, "loss": 0.854, "step": 3380 }, { "epoch": 0.07281866219873695, "grad_norm": 0.6316666106551074, "learning_rate": 1.9802024913800567e-05, "loss": 0.8607, "step": 3390 }, { "epoch": 0.07303346651200757, "grad_norm": 0.6782909648023765, "learning_rate": 1.9800672983791097e-05, "loss": 0.8578, "step": 3400 }, { "epoch": 0.07324827082527817, "grad_norm": 0.703136553111246, "learning_rate": 1.979931649991924e-05, "loss": 0.8537, "step": 3410 }, { "epoch": 0.07346307513854879, "grad_norm": 0.6766152461075413, "learning_rate": 1.9797955462815285e-05, "loss": 0.8551, "step": 3420 }, { "epoch": 0.07367787945181939, "grad_norm": 0.6548199817442408, "learning_rate": 1.9796589873111627e-05, "loss": 0.848, "step": 3430 }, { "epoch": 0.07389268376509, "grad_norm": 0.6699380907258793, "learning_rate": 1.9795219731442798e-05, "loss": 0.8518, "step": 3440 }, { "epoch": 0.07410748807836061, "grad_norm": 0.646613126593096, "learning_rate": 1.979384503844542e-05, "loss": 0.8503, "step": 3450 }, { "epoch": 0.07432229239163123, "grad_norm": 0.6657451032225497, "learning_rate": 1.9792465794758246e-05, "loss": 0.865, "step": 3460 }, { "epoch": 0.07453709670490183, "grad_norm": 0.6566855884425971, "learning_rate": 1.9791082001022137e-05, "loss": 0.843, "step": 3470 }, { "epoch": 0.07475190101817245, "grad_norm": 0.68529511952354, "learning_rate": 1.978969365788007e-05, "loss": 0.8569, "step": 3480 }, { "epoch": 0.07496670533144306, "grad_norm": 0.694727034477339, "learning_rate": 1.978830076597714e-05, "loss": 0.8472, "step": 3490 }, { "epoch": 0.07518150964471366, "grad_norm": 0.6631339825070333, "learning_rate": 1.9786903325960545e-05, "loss": 0.855, "step": 3500 }, { "epoch": 0.07539631395798428, "grad_norm": 0.694444241956036, "learning_rate": 1.9785501338479605e-05, "loss": 0.8524, "step": 3510 }, { "epoch": 0.07561111827125488, "grad_norm": 0.6945927280010864, "learning_rate": 1.9784094804185755e-05, "loss": 0.85, "step": 3520 }, { "epoch": 0.0758259225845255, "grad_norm": 0.6672445340085226, "learning_rate": 1.978268372373253e-05, "loss": 0.8498, "step": 3530 }, { "epoch": 0.0760407268977961, "grad_norm": 0.646988287501803, "learning_rate": 1.978126809777559e-05, "loss": 0.848, "step": 3540 }, { "epoch": 0.07625553121106672, "grad_norm": 0.6911424046720633, "learning_rate": 1.9779847926972703e-05, "loss": 0.8652, "step": 3550 }, { "epoch": 0.07647033552433732, "grad_norm": 0.6571056886382604, "learning_rate": 1.977842321198375e-05, "loss": 0.864, "step": 3560 }, { "epoch": 0.07668513983760794, "grad_norm": 0.6668618449865177, "learning_rate": 1.977699395347072e-05, "loss": 0.8486, "step": 3570 }, { "epoch": 0.07689994415087854, "grad_norm": 0.6575595446352954, "learning_rate": 1.9775560152097713e-05, "loss": 0.8534, "step": 3580 }, { "epoch": 0.07711474846414916, "grad_norm": 0.658442535770228, "learning_rate": 1.9774121808530944e-05, "loss": 0.8554, "step": 3590 }, { "epoch": 0.07732955277741978, "grad_norm": 0.6725808248232724, "learning_rate": 1.977267892343874e-05, "loss": 0.8524, "step": 3600 }, { "epoch": 0.07754435709069038, "grad_norm": 0.6579027842938031, "learning_rate": 1.9771231497491526e-05, "loss": 0.8505, "step": 3610 }, { "epoch": 0.077759161403961, "grad_norm": 0.6960301484489192, "learning_rate": 1.976977953136185e-05, "loss": 0.8602, "step": 3620 }, { "epoch": 0.0779739657172316, "grad_norm": 0.6697741714896654, "learning_rate": 1.9768323025724368e-05, "loss": 0.8758, "step": 3630 }, { "epoch": 0.07818877003050222, "grad_norm": 2.859419478862952, "learning_rate": 1.9766861981255837e-05, "loss": 0.864, "step": 3640 }, { "epoch": 0.07840357434377282, "grad_norm": 0.6822863380857591, "learning_rate": 1.9765396398635133e-05, "loss": 0.8562, "step": 3650 }, { "epoch": 0.07861837865704344, "grad_norm": 0.6500244246977851, "learning_rate": 1.976392627854323e-05, "loss": 0.8458, "step": 3660 }, { "epoch": 0.07883318297031404, "grad_norm": 0.7383310944585931, "learning_rate": 1.9762451621663225e-05, "loss": 0.8435, "step": 3670 }, { "epoch": 0.07904798728358466, "grad_norm": 0.692308880241196, "learning_rate": 1.9760972428680304e-05, "loss": 0.8506, "step": 3680 }, { "epoch": 0.07926279159685526, "grad_norm": 0.6856631590673132, "learning_rate": 1.975948870028178e-05, "loss": 0.8686, "step": 3690 }, { "epoch": 0.07947759591012588, "grad_norm": 0.6903089329869376, "learning_rate": 1.9758000437157058e-05, "loss": 0.8483, "step": 3700 }, { "epoch": 0.07969240022339649, "grad_norm": 0.7078943762114845, "learning_rate": 1.9756507639997663e-05, "loss": 0.855, "step": 3710 }, { "epoch": 0.0799072045366671, "grad_norm": 0.694778958880388, "learning_rate": 1.975501030949721e-05, "loss": 0.8738, "step": 3720 }, { "epoch": 0.08012200884993771, "grad_norm": 0.7025809779085943, "learning_rate": 1.975350844635144e-05, "loss": 0.8455, "step": 3730 }, { "epoch": 0.08033681316320831, "grad_norm": 0.7689670952873736, "learning_rate": 1.9752002051258187e-05, "loss": 0.8553, "step": 3740 }, { "epoch": 0.08055161747647893, "grad_norm": 0.6448632769568776, "learning_rate": 1.9750491124917396e-05, "loss": 0.847, "step": 3750 }, { "epoch": 0.08076642178974953, "grad_norm": 0.6613202041245299, "learning_rate": 1.9748975668031113e-05, "loss": 0.8514, "step": 3760 }, { "epoch": 0.08098122610302015, "grad_norm": 0.6795441486901408, "learning_rate": 1.974745568130349e-05, "loss": 0.8551, "step": 3770 }, { "epoch": 0.08119603041629075, "grad_norm": 0.6665715977606939, "learning_rate": 1.974593116544079e-05, "loss": 0.8464, "step": 3780 }, { "epoch": 0.08141083472956137, "grad_norm": 0.6572184682125283, "learning_rate": 1.974440212115138e-05, "loss": 0.8535, "step": 3790 }, { "epoch": 0.08162563904283197, "grad_norm": 0.6552167641764182, "learning_rate": 1.9742868549145716e-05, "loss": 0.8432, "step": 3800 }, { "epoch": 0.08184044335610259, "grad_norm": 0.6421489707631659, "learning_rate": 1.9741330450136377e-05, "loss": 0.849, "step": 3810 }, { "epoch": 0.08205524766937321, "grad_norm": 0.6485917266184518, "learning_rate": 1.9739787824838036e-05, "loss": 0.838, "step": 3820 }, { "epoch": 0.08227005198264381, "grad_norm": 0.6959055244885545, "learning_rate": 1.9738240673967473e-05, "loss": 0.8473, "step": 3830 }, { "epoch": 0.08248485629591443, "grad_norm": 0.713164987686896, "learning_rate": 1.9736688998243562e-05, "loss": 0.8528, "step": 3840 }, { "epoch": 0.08269966060918503, "grad_norm": 0.6580917396986089, "learning_rate": 1.9735132798387294e-05, "loss": 0.852, "step": 3850 }, { "epoch": 0.08291446492245565, "grad_norm": 0.7027948387761466, "learning_rate": 1.9733572075121746e-05, "loss": 0.8447, "step": 3860 }, { "epoch": 0.08312926923572625, "grad_norm": 0.6861004251959052, "learning_rate": 1.973200682917211e-05, "loss": 0.8526, "step": 3870 }, { "epoch": 0.08334407354899687, "grad_norm": 0.6869412094000382, "learning_rate": 1.9730437061265674e-05, "loss": 0.8416, "step": 3880 }, { "epoch": 0.08355887786226747, "grad_norm": 0.6269537456004645, "learning_rate": 1.9728862772131822e-05, "loss": 0.8487, "step": 3890 }, { "epoch": 0.08377368217553809, "grad_norm": 0.6491403415344045, "learning_rate": 1.9727283962502054e-05, "loss": 0.8678, "step": 3900 }, { "epoch": 0.08398848648880869, "grad_norm": 0.6995859598956509, "learning_rate": 1.9725700633109955e-05, "loss": 0.8433, "step": 3910 }, { "epoch": 0.0842032908020793, "grad_norm": 0.6700202312183003, "learning_rate": 1.9724112784691213e-05, "loss": 0.847, "step": 3920 }, { "epoch": 0.08441809511534992, "grad_norm": 0.6485370618480597, "learning_rate": 1.9722520417983618e-05, "loss": 0.8544, "step": 3930 }, { "epoch": 0.08463289942862053, "grad_norm": 0.6592424429265452, "learning_rate": 1.972092353372707e-05, "loss": 0.837, "step": 3940 }, { "epoch": 0.08484770374189114, "grad_norm": 0.6555177826724199, "learning_rate": 1.9719322132663547e-05, "loss": 0.8654, "step": 3950 }, { "epoch": 0.08506250805516175, "grad_norm": 0.6836389096207424, "learning_rate": 1.9717716215537145e-05, "loss": 0.8626, "step": 3960 }, { "epoch": 0.08527731236843236, "grad_norm": 0.6336266324638282, "learning_rate": 1.971610578309404e-05, "loss": 0.8488, "step": 3970 }, { "epoch": 0.08549211668170296, "grad_norm": 0.6198253547328094, "learning_rate": 1.9714490836082527e-05, "loss": 0.8606, "step": 3980 }, { "epoch": 0.08570692099497358, "grad_norm": 0.6537044949678656, "learning_rate": 1.9712871375252983e-05, "loss": 0.8457, "step": 3990 }, { "epoch": 0.08592172530824418, "grad_norm": 0.6678736312065985, "learning_rate": 1.9711247401357886e-05, "loss": 0.835, "step": 4000 }, { "epoch": 0.0861365296215148, "grad_norm": 0.6692336566643887, "learning_rate": 1.970961891515181e-05, "loss": 0.8566, "step": 4010 }, { "epoch": 0.0863513339347854, "grad_norm": 0.6585733096564904, "learning_rate": 1.9707985917391435e-05, "loss": 0.8339, "step": 4020 }, { "epoch": 0.08656613824805602, "grad_norm": 0.6303363377268884, "learning_rate": 1.9706348408835522e-05, "loss": 0.8593, "step": 4030 }, { "epoch": 0.08678094256132664, "grad_norm": 0.6782552039693405, "learning_rate": 1.9704706390244943e-05, "loss": 0.8601, "step": 4040 }, { "epoch": 0.08699574687459724, "grad_norm": 0.6324566209058814, "learning_rate": 1.970305986238265e-05, "loss": 0.8513, "step": 4050 }, { "epoch": 0.08721055118786786, "grad_norm": 0.6534333164738351, "learning_rate": 1.970140882601371e-05, "loss": 0.8526, "step": 4060 }, { "epoch": 0.08742535550113846, "grad_norm": 0.6319924406902253, "learning_rate": 1.9699753281905266e-05, "loss": 0.8432, "step": 4070 }, { "epoch": 0.08764015981440908, "grad_norm": 0.6523877824199336, "learning_rate": 1.9698093230826566e-05, "loss": 0.8472, "step": 4080 }, { "epoch": 0.08785496412767968, "grad_norm": 0.6485572561784627, "learning_rate": 1.9696428673548948e-05, "loss": 0.8581, "step": 4090 }, { "epoch": 0.0880697684409503, "grad_norm": 0.6747875245775294, "learning_rate": 1.969475961084584e-05, "loss": 0.8268, "step": 4100 }, { "epoch": 0.0882845727542209, "grad_norm": 0.6183565239941164, "learning_rate": 1.9693086043492778e-05, "loss": 0.8336, "step": 4110 }, { "epoch": 0.08849937706749152, "grad_norm": 0.6659244460477072, "learning_rate": 1.9691407972267377e-05, "loss": 0.8718, "step": 4120 }, { "epoch": 0.08871418138076212, "grad_norm": 0.6537327783323973, "learning_rate": 1.968972539794935e-05, "loss": 0.8369, "step": 4130 }, { "epoch": 0.08892898569403274, "grad_norm": 0.7320247153435835, "learning_rate": 1.96880383213205e-05, "loss": 0.8539, "step": 4140 }, { "epoch": 0.08914379000730335, "grad_norm": 0.6260363637735203, "learning_rate": 1.9686346743164726e-05, "loss": 0.847, "step": 4150 }, { "epoch": 0.08935859432057396, "grad_norm": 0.65678546506254, "learning_rate": 1.9684650664268016e-05, "loss": 0.8428, "step": 4160 }, { "epoch": 0.08957339863384457, "grad_norm": 0.6552849199787741, "learning_rate": 1.9682950085418446e-05, "loss": 0.8336, "step": 4170 }, { "epoch": 0.08978820294711518, "grad_norm": 0.6302616145397817, "learning_rate": 1.9681245007406192e-05, "loss": 0.8452, "step": 4180 }, { "epoch": 0.09000300726038579, "grad_norm": 0.664218834980882, "learning_rate": 1.9679535431023512e-05, "loss": 0.8528, "step": 4190 }, { "epoch": 0.0902178115736564, "grad_norm": 0.6382635022742835, "learning_rate": 1.9677821357064758e-05, "loss": 0.837, "step": 4200 }, { "epoch": 0.09043261588692701, "grad_norm": 0.6287924929580544, "learning_rate": 1.967610278632637e-05, "loss": 0.8456, "step": 4210 }, { "epoch": 0.09064742020019761, "grad_norm": 0.6735082120990712, "learning_rate": 1.9674379719606874e-05, "loss": 0.8307, "step": 4220 }, { "epoch": 0.09086222451346823, "grad_norm": 0.6377012982256299, "learning_rate": 1.9672652157706897e-05, "loss": 0.8483, "step": 4230 }, { "epoch": 0.09107702882673883, "grad_norm": 0.6593721140765066, "learning_rate": 1.9670920101429142e-05, "loss": 0.8441, "step": 4240 }, { "epoch": 0.09129183314000945, "grad_norm": 0.674319812375293, "learning_rate": 1.9669183551578414e-05, "loss": 0.8243, "step": 4250 }, { "epoch": 0.09150663745328007, "grad_norm": 0.627013344868889, "learning_rate": 1.9667442508961585e-05, "loss": 0.8425, "step": 4260 }, { "epoch": 0.09172144176655067, "grad_norm": 0.6958437594298889, "learning_rate": 1.9665696974387633e-05, "loss": 0.8526, "step": 4270 }, { "epoch": 0.09193624607982129, "grad_norm": 0.6044037023198737, "learning_rate": 1.966394694866762e-05, "loss": 0.8533, "step": 4280 }, { "epoch": 0.09215105039309189, "grad_norm": 0.6542299584474508, "learning_rate": 1.9662192432614683e-05, "loss": 0.8565, "step": 4290 }, { "epoch": 0.09236585470636251, "grad_norm": 0.6390677549567899, "learning_rate": 1.9660433427044064e-05, "loss": 0.8494, "step": 4300 }, { "epoch": 0.09258065901963311, "grad_norm": 0.679504215836474, "learning_rate": 1.9658669932773072e-05, "loss": 0.8587, "step": 4310 }, { "epoch": 0.09279546333290373, "grad_norm": 0.6627368866357531, "learning_rate": 1.965690195062112e-05, "loss": 0.8355, "step": 4320 }, { "epoch": 0.09301026764617433, "grad_norm": 0.6349256488418371, "learning_rate": 1.9655129481409695e-05, "loss": 0.8476, "step": 4330 }, { "epoch": 0.09322507195944495, "grad_norm": 0.6397326363467064, "learning_rate": 1.9653352525962363e-05, "loss": 0.8424, "step": 4340 }, { "epoch": 0.09343987627271555, "grad_norm": 0.6437203998695848, "learning_rate": 1.9651571085104796e-05, "loss": 0.8575, "step": 4350 }, { "epoch": 0.09365468058598617, "grad_norm": 0.6296219759406441, "learning_rate": 1.9649785159664723e-05, "loss": 0.8444, "step": 4360 }, { "epoch": 0.09386948489925678, "grad_norm": 0.6364710421208644, "learning_rate": 1.964799475047198e-05, "loss": 0.8409, "step": 4370 }, { "epoch": 0.09408428921252739, "grad_norm": 0.664099321204506, "learning_rate": 1.9646199858358475e-05, "loss": 0.8537, "step": 4380 }, { "epoch": 0.094299093525798, "grad_norm": 0.6145796569330872, "learning_rate": 1.96444004841582e-05, "loss": 0.835, "step": 4390 }, { "epoch": 0.0945138978390686, "grad_norm": 0.6258749417882592, "learning_rate": 1.964259662870723e-05, "loss": 0.8423, "step": 4400 }, { "epoch": 0.09472870215233922, "grad_norm": 0.6768124086471206, "learning_rate": 1.9640788292843722e-05, "loss": 0.8526, "step": 4410 }, { "epoch": 0.09494350646560983, "grad_norm": 0.6679825227131061, "learning_rate": 1.963897547740792e-05, "loss": 0.8593, "step": 4420 }, { "epoch": 0.09515831077888044, "grad_norm": 0.6708318644795358, "learning_rate": 1.9637158183242138e-05, "loss": 0.8403, "step": 4430 }, { "epoch": 0.09537311509215105, "grad_norm": 0.6182453595533811, "learning_rate": 1.9635336411190786e-05, "loss": 0.8464, "step": 4440 }, { "epoch": 0.09558791940542166, "grad_norm": 0.6518263259411956, "learning_rate": 1.963351016210034e-05, "loss": 0.8406, "step": 4450 }, { "epoch": 0.09580272371869226, "grad_norm": 0.6695930906879859, "learning_rate": 1.9631679436819363e-05, "loss": 0.8461, "step": 4460 }, { "epoch": 0.09601752803196288, "grad_norm": 0.6609509728832942, "learning_rate": 1.9629844236198502e-05, "loss": 0.8533, "step": 4470 }, { "epoch": 0.0962323323452335, "grad_norm": 0.6657143200018495, "learning_rate": 1.9628004561090474e-05, "loss": 0.8357, "step": 4480 }, { "epoch": 0.0964471366585041, "grad_norm": 0.6630542341667736, "learning_rate": 1.9626160412350085e-05, "loss": 0.8322, "step": 4490 }, { "epoch": 0.09666194097177472, "grad_norm": 0.6314767253481299, "learning_rate": 1.9624311790834217e-05, "loss": 0.8385, "step": 4500 }, { "epoch": 0.09687674528504532, "grad_norm": 0.6903306283162439, "learning_rate": 1.962245869740182e-05, "loss": 0.8557, "step": 4510 }, { "epoch": 0.09709154959831594, "grad_norm": 0.669279047897614, "learning_rate": 1.9620601132913936e-05, "loss": 0.8552, "step": 4520 }, { "epoch": 0.09730635391158654, "grad_norm": 0.6814182363619116, "learning_rate": 1.9618739098233676e-05, "loss": 0.8416, "step": 4530 }, { "epoch": 0.09752115822485716, "grad_norm": 0.6462039533634677, "learning_rate": 1.9616872594226232e-05, "loss": 0.8443, "step": 4540 }, { "epoch": 0.09773596253812776, "grad_norm": 0.6397436767943191, "learning_rate": 1.9615001621758867e-05, "loss": 0.8401, "step": 4550 }, { "epoch": 0.09795076685139838, "grad_norm": 0.661032708042101, "learning_rate": 1.9613126181700932e-05, "loss": 0.8313, "step": 4560 }, { "epoch": 0.09816557116466898, "grad_norm": 0.6481039328379562, "learning_rate": 1.9611246274923844e-05, "loss": 0.834, "step": 4570 }, { "epoch": 0.0983803754779396, "grad_norm": 0.6747780419159984, "learning_rate": 1.960936190230109e-05, "loss": 0.8326, "step": 4580 }, { "epoch": 0.09859517979121021, "grad_norm": 0.661376547720958, "learning_rate": 1.960747306470825e-05, "loss": 0.8294, "step": 4590 }, { "epoch": 0.09880998410448082, "grad_norm": 0.650547641584137, "learning_rate": 1.9605579763022966e-05, "loss": 0.8426, "step": 4600 }, { "epoch": 0.09902478841775143, "grad_norm": 0.6682948635311629, "learning_rate": 1.960368199812495e-05, "loss": 0.8419, "step": 4610 }, { "epoch": 0.09923959273102204, "grad_norm": 0.6467278134601648, "learning_rate": 1.9601779770896007e-05, "loss": 0.8293, "step": 4620 }, { "epoch": 0.09945439704429265, "grad_norm": 0.6325684184789404, "learning_rate": 1.9599873082219992e-05, "loss": 0.8338, "step": 4630 }, { "epoch": 0.09966920135756326, "grad_norm": 0.6688907540062, "learning_rate": 1.9597961932982845e-05, "loss": 0.8363, "step": 4640 }, { "epoch": 0.09988400567083387, "grad_norm": 0.654845343648715, "learning_rate": 1.9596046324072586e-05, "loss": 0.8416, "step": 4650 }, { "epoch": 0.10009880998410448, "grad_norm": 0.6320139560546627, "learning_rate": 1.9594126256379286e-05, "loss": 0.863, "step": 4660 }, { "epoch": 0.10031361429737509, "grad_norm": 0.6691994371938742, "learning_rate": 1.9592201730795112e-05, "loss": 0.8456, "step": 4670 }, { "epoch": 0.1005284186106457, "grad_norm": 0.6606839474988468, "learning_rate": 1.9590272748214283e-05, "loss": 0.8302, "step": 4680 }, { "epoch": 0.10074322292391631, "grad_norm": 0.6262068989715459, "learning_rate": 1.9588339309533103e-05, "loss": 0.8377, "step": 4690 }, { "epoch": 0.10095802723718693, "grad_norm": 0.6243559896905192, "learning_rate": 1.9586401415649935e-05, "loss": 0.8396, "step": 4700 }, { "epoch": 0.10117283155045753, "grad_norm": 0.6687990805968839, "learning_rate": 1.958445906746522e-05, "loss": 0.8354, "step": 4710 }, { "epoch": 0.10138763586372815, "grad_norm": 0.6310170587812323, "learning_rate": 1.9582512265881467e-05, "loss": 0.8211, "step": 4720 }, { "epoch": 0.10160244017699875, "grad_norm": 0.6388753086935843, "learning_rate": 1.958056101180325e-05, "loss": 0.8441, "step": 4730 }, { "epoch": 0.10181724449026937, "grad_norm": 0.6638753058560826, "learning_rate": 1.9578605306137216e-05, "loss": 0.8358, "step": 4740 }, { "epoch": 0.10203204880353997, "grad_norm": 0.6302752971289505, "learning_rate": 1.9576645149792083e-05, "loss": 0.8395, "step": 4750 }, { "epoch": 0.10224685311681059, "grad_norm": 0.6458064089587059, "learning_rate": 1.957468054367863e-05, "loss": 0.8401, "step": 4760 }, { "epoch": 0.10246165743008119, "grad_norm": 0.646382816972308, "learning_rate": 1.957271148870971e-05, "loss": 0.8435, "step": 4770 }, { "epoch": 0.10267646174335181, "grad_norm": 0.6155538183074262, "learning_rate": 1.9570737985800237e-05, "loss": 0.8306, "step": 4780 }, { "epoch": 0.10289126605662241, "grad_norm": 0.6367578606631397, "learning_rate": 1.95687600358672e-05, "loss": 0.8369, "step": 4790 }, { "epoch": 0.10310607036989303, "grad_norm": 0.6404541919457326, "learning_rate": 1.956677763982964e-05, "loss": 0.8254, "step": 4800 }, { "epoch": 0.10332087468316364, "grad_norm": 0.6215108376628148, "learning_rate": 1.9564790798608682e-05, "loss": 0.8584, "step": 4810 }, { "epoch": 0.10353567899643425, "grad_norm": 0.6338570435141742, "learning_rate": 1.9562799513127507e-05, "loss": 0.8324, "step": 4820 }, { "epoch": 0.10375048330970486, "grad_norm": 0.6019370576161179, "learning_rate": 1.956080378431136e-05, "loss": 0.8559, "step": 4830 }, { "epoch": 0.10396528762297547, "grad_norm": 0.6297162423533607, "learning_rate": 1.9558803613087548e-05, "loss": 0.8462, "step": 4840 }, { "epoch": 0.10418009193624608, "grad_norm": 0.6045449219846318, "learning_rate": 1.9556799000385454e-05, "loss": 0.8386, "step": 4850 }, { "epoch": 0.10439489624951669, "grad_norm": 0.6549861670806117, "learning_rate": 1.9554789947136508e-05, "loss": 0.8306, "step": 4860 }, { "epoch": 0.1046097005627873, "grad_norm": 0.6630879044217777, "learning_rate": 1.9552776454274223e-05, "loss": 0.8314, "step": 4870 }, { "epoch": 0.1048245048760579, "grad_norm": 0.651950768006272, "learning_rate": 1.955075852273416e-05, "loss": 0.8385, "step": 4880 }, { "epoch": 0.10503930918932852, "grad_norm": 0.6626536087969357, "learning_rate": 1.9548736153453943e-05, "loss": 0.8363, "step": 4890 }, { "epoch": 0.10525411350259913, "grad_norm": 0.6886579304677174, "learning_rate": 1.9546709347373265e-05, "loss": 0.8499, "step": 4900 }, { "epoch": 0.10546891781586974, "grad_norm": 0.6312997525023037, "learning_rate": 1.954467810543388e-05, "loss": 0.8235, "step": 4910 }, { "epoch": 0.10568372212914036, "grad_norm": 0.6365142217446823, "learning_rate": 1.954264242857959e-05, "loss": 0.8356, "step": 4920 }, { "epoch": 0.10589852644241096, "grad_norm": 0.6746025653030885, "learning_rate": 1.954060231775628e-05, "loss": 0.8528, "step": 4930 }, { "epoch": 0.10611333075568158, "grad_norm": 0.6281429344046033, "learning_rate": 1.9538557773911878e-05, "loss": 0.8473, "step": 4940 }, { "epoch": 0.10632813506895218, "grad_norm": 0.6505897210485625, "learning_rate": 1.9536508797996377e-05, "loss": 0.8377, "step": 4950 }, { "epoch": 0.1065429393822228, "grad_norm": 0.6758994881928211, "learning_rate": 1.953445539096183e-05, "loss": 0.8544, "step": 4960 }, { "epoch": 0.1067577436954934, "grad_norm": 0.6285712244694984, "learning_rate": 1.953239755376235e-05, "loss": 0.8544, "step": 4970 }, { "epoch": 0.10697254800876402, "grad_norm": 0.6210838565177652, "learning_rate": 1.9530335287354102e-05, "loss": 0.8304, "step": 4980 }, { "epoch": 0.10718735232203462, "grad_norm": 0.6140776582373115, "learning_rate": 1.952826859269532e-05, "loss": 0.836, "step": 4990 }, { "epoch": 0.10740215663530524, "grad_norm": 0.6671326709180452, "learning_rate": 1.9526197470746283e-05, "loss": 0.8343, "step": 5000 }, { "epoch": 0.10761696094857584, "grad_norm": 0.6597003360765735, "learning_rate": 1.9524121922469338e-05, "loss": 0.8652, "step": 5010 }, { "epoch": 0.10783176526184646, "grad_norm": 0.5956926521621552, "learning_rate": 1.952204194882888e-05, "loss": 0.8262, "step": 5020 }, { "epoch": 0.10804656957511707, "grad_norm": 0.7019664111760795, "learning_rate": 1.9519957550791372e-05, "loss": 0.8429, "step": 5030 }, { "epoch": 0.10826137388838768, "grad_norm": 0.6304099214813407, "learning_rate": 1.951786872932532e-05, "loss": 0.8393, "step": 5040 }, { "epoch": 0.1084761782016583, "grad_norm": 0.6463172161616285, "learning_rate": 1.951577548540129e-05, "loss": 0.8387, "step": 5050 }, { "epoch": 0.1086909825149289, "grad_norm": 0.6502922763327477, "learning_rate": 1.9513677819991905e-05, "loss": 0.835, "step": 5060 }, { "epoch": 0.10890578682819951, "grad_norm": 0.618888528634145, "learning_rate": 1.951157573407184e-05, "loss": 0.8559, "step": 5070 }, { "epoch": 0.10912059114147012, "grad_norm": 0.6374773953158367, "learning_rate": 1.9509469228617827e-05, "loss": 0.8317, "step": 5080 }, { "epoch": 0.10933539545474073, "grad_norm": 0.6303019884270492, "learning_rate": 1.9507358304608644e-05, "loss": 0.8303, "step": 5090 }, { "epoch": 0.10955019976801134, "grad_norm": 0.6561252982950152, "learning_rate": 1.9505242963025133e-05, "loss": 0.8273, "step": 5100 }, { "epoch": 0.10976500408128195, "grad_norm": 0.6290165189842113, "learning_rate": 1.9503123204850184e-05, "loss": 0.8421, "step": 5110 }, { "epoch": 0.10997980839455257, "grad_norm": 0.617329955446763, "learning_rate": 1.9500999031068734e-05, "loss": 0.8232, "step": 5120 }, { "epoch": 0.11019461270782317, "grad_norm": 0.6107026109474121, "learning_rate": 1.949887044266778e-05, "loss": 0.835, "step": 5130 }, { "epoch": 0.11040941702109379, "grad_norm": 0.6269967944152154, "learning_rate": 1.949673744063636e-05, "loss": 0.8305, "step": 5140 }, { "epoch": 0.11062422133436439, "grad_norm": 0.6360564673651822, "learning_rate": 1.949460002596557e-05, "loss": 0.832, "step": 5150 }, { "epoch": 0.11083902564763501, "grad_norm": 0.6328470911946484, "learning_rate": 1.9492458199648564e-05, "loss": 0.8273, "step": 5160 }, { "epoch": 0.11105382996090561, "grad_norm": 0.880034167436241, "learning_rate": 1.949031196268053e-05, "loss": 0.8422, "step": 5170 }, { "epoch": 0.11126863427417623, "grad_norm": 0.6718310412202478, "learning_rate": 1.948816131605871e-05, "loss": 0.8444, "step": 5180 }, { "epoch": 0.11148343858744683, "grad_norm": 0.6443011347377299, "learning_rate": 1.9486006260782406e-05, "loss": 0.8365, "step": 5190 }, { "epoch": 0.11169824290071745, "grad_norm": 0.6183046355489557, "learning_rate": 1.948384679785295e-05, "loss": 0.8545, "step": 5200 }, { "epoch": 0.11191304721398805, "grad_norm": 0.6251393760951905, "learning_rate": 1.9481682928273738e-05, "loss": 0.8268, "step": 5210 }, { "epoch": 0.11212785152725867, "grad_norm": 0.6344101557996115, "learning_rate": 1.9479514653050212e-05, "loss": 0.8404, "step": 5220 }, { "epoch": 0.11234265584052928, "grad_norm": 0.5860944916243113, "learning_rate": 1.9477341973189844e-05, "loss": 0.8355, "step": 5230 }, { "epoch": 0.11255746015379989, "grad_norm": 0.6186996808539741, "learning_rate": 1.9475164889702175e-05, "loss": 0.8479, "step": 5240 }, { "epoch": 0.1127722644670705, "grad_norm": 0.6034601205701468, "learning_rate": 1.9472983403598783e-05, "loss": 0.8277, "step": 5250 }, { "epoch": 0.11298706878034111, "grad_norm": 0.6440643260727591, "learning_rate": 1.9470797515893286e-05, "loss": 0.8376, "step": 5260 }, { "epoch": 0.11320187309361172, "grad_norm": 0.6324744223203518, "learning_rate": 1.946860722760135e-05, "loss": 0.8311, "step": 5270 }, { "epoch": 0.11341667740688233, "grad_norm": 0.618379973907413, "learning_rate": 1.9466412539740697e-05, "loss": 0.8287, "step": 5280 }, { "epoch": 0.11363148172015294, "grad_norm": 0.6256736960626628, "learning_rate": 1.9464213453331076e-05, "loss": 0.8384, "step": 5290 }, { "epoch": 0.11384628603342355, "grad_norm": 0.607811975191907, "learning_rate": 1.9462009969394292e-05, "loss": 0.8439, "step": 5300 }, { "epoch": 0.11406109034669416, "grad_norm": 0.6214988694407934, "learning_rate": 1.945980208895419e-05, "loss": 0.8385, "step": 5310 }, { "epoch": 0.11427589465996477, "grad_norm": 0.6260531019431299, "learning_rate": 1.945758981303665e-05, "loss": 0.8386, "step": 5320 }, { "epoch": 0.11449069897323538, "grad_norm": 0.6510239330074841, "learning_rate": 1.9455373142669615e-05, "loss": 0.8155, "step": 5330 }, { "epoch": 0.114705503286506, "grad_norm": 0.6114277545538802, "learning_rate": 1.945315207888304e-05, "loss": 0.8236, "step": 5340 }, { "epoch": 0.1149203075997766, "grad_norm": 0.6290587853644128, "learning_rate": 1.945092662270895e-05, "loss": 0.8391, "step": 5350 }, { "epoch": 0.11513511191304722, "grad_norm": 0.6252788540877645, "learning_rate": 1.9448696775181393e-05, "loss": 0.8302, "step": 5360 }, { "epoch": 0.11534991622631782, "grad_norm": 0.6366366260996406, "learning_rate": 1.9446462537336462e-05, "loss": 0.832, "step": 5370 }, { "epoch": 0.11556472053958844, "grad_norm": 0.6403035006648804, "learning_rate": 1.9444223910212297e-05, "loss": 0.8401, "step": 5380 }, { "epoch": 0.11577952485285904, "grad_norm": 0.6471061463773111, "learning_rate": 1.9441980894849068e-05, "loss": 0.822, "step": 5390 }, { "epoch": 0.11599432916612966, "grad_norm": 0.6485069683086401, "learning_rate": 1.9439733492288986e-05, "loss": 0.8327, "step": 5400 }, { "epoch": 0.11620913347940026, "grad_norm": 0.6195424958899896, "learning_rate": 1.9437481703576303e-05, "loss": 0.827, "step": 5410 }, { "epoch": 0.11642393779267088, "grad_norm": 0.6067271492911613, "learning_rate": 1.9435225529757307e-05, "loss": 0.8378, "step": 5420 }, { "epoch": 0.11663874210594148, "grad_norm": 0.6333870629436046, "learning_rate": 1.9432964971880325e-05, "loss": 0.8274, "step": 5430 }, { "epoch": 0.1168535464192121, "grad_norm": 0.6349621748512131, "learning_rate": 1.9430700030995724e-05, "loss": 0.8371, "step": 5440 }, { "epoch": 0.11706835073248271, "grad_norm": 0.6132008334219502, "learning_rate": 1.94284307081559e-05, "loss": 0.8254, "step": 5450 }, { "epoch": 0.11728315504575332, "grad_norm": 0.626720073182986, "learning_rate": 1.942615700441529e-05, "loss": 0.8296, "step": 5460 }, { "epoch": 0.11749795935902393, "grad_norm": 0.6280587181806514, "learning_rate": 1.9423878920830366e-05, "loss": 0.8228, "step": 5470 }, { "epoch": 0.11771276367229454, "grad_norm": 0.6514270560053967, "learning_rate": 1.942159645845963e-05, "loss": 0.8262, "step": 5480 }, { "epoch": 0.11792756798556515, "grad_norm": 0.6130650794491808, "learning_rate": 1.9419309618363637e-05, "loss": 0.8379, "step": 5490 }, { "epoch": 0.11814237229883576, "grad_norm": 0.637159008327422, "learning_rate": 1.9417018401604947e-05, "loss": 0.8282, "step": 5500 }, { "epoch": 0.11835717661210637, "grad_norm": 0.6188423823463596, "learning_rate": 1.9414722809248182e-05, "loss": 0.8305, "step": 5510 }, { "epoch": 0.11857198092537698, "grad_norm": 0.6245770269033344, "learning_rate": 1.9412422842359976e-05, "loss": 0.833, "step": 5520 }, { "epoch": 0.1187867852386476, "grad_norm": 0.6365044259718952, "learning_rate": 1.9410118502009003e-05, "loss": 0.8388, "step": 5530 }, { "epoch": 0.1190015895519182, "grad_norm": 0.6328160935271273, "learning_rate": 1.9407809789265973e-05, "loss": 0.845, "step": 5540 }, { "epoch": 0.11921639386518881, "grad_norm": 0.630311910667844, "learning_rate": 1.9405496705203628e-05, "loss": 0.8345, "step": 5550 }, { "epoch": 0.11943119817845943, "grad_norm": 0.638696910751977, "learning_rate": 1.9403179250896733e-05, "loss": 0.8416, "step": 5560 }, { "epoch": 0.11964600249173003, "grad_norm": 0.6247248724186333, "learning_rate": 1.940085742742209e-05, "loss": 0.8331, "step": 5570 }, { "epoch": 0.11986080680500065, "grad_norm": 0.6213341924856995, "learning_rate": 1.9398531235858525e-05, "loss": 0.8347, "step": 5580 }, { "epoch": 0.12007561111827125, "grad_norm": 0.6121503915927947, "learning_rate": 1.9396200677286907e-05, "loss": 0.8323, "step": 5590 }, { "epoch": 0.12029041543154187, "grad_norm": 0.6189647502053036, "learning_rate": 1.939386575279012e-05, "loss": 0.8457, "step": 5600 }, { "epoch": 0.12050521974481247, "grad_norm": 0.6135493571505611, "learning_rate": 1.939152646345308e-05, "loss": 0.8368, "step": 5610 }, { "epoch": 0.12072002405808309, "grad_norm": 0.6378689788750032, "learning_rate": 1.9389182810362738e-05, "loss": 0.8449, "step": 5620 }, { "epoch": 0.12093482837135369, "grad_norm": 0.6294801432407113, "learning_rate": 1.938683479460806e-05, "loss": 0.8271, "step": 5630 }, { "epoch": 0.12114963268462431, "grad_norm": 0.6328552710008448, "learning_rate": 1.9384482417280056e-05, "loss": 0.8262, "step": 5640 }, { "epoch": 0.12136443699789491, "grad_norm": 0.6186605157387922, "learning_rate": 1.9382125679471745e-05, "loss": 0.8278, "step": 5650 }, { "epoch": 0.12157924131116553, "grad_norm": 0.6334724170139352, "learning_rate": 1.9379764582278185e-05, "loss": 0.8284, "step": 5660 }, { "epoch": 0.12179404562443615, "grad_norm": 0.6283387797940377, "learning_rate": 1.9377399126796454e-05, "loss": 0.8359, "step": 5670 }, { "epoch": 0.12200884993770675, "grad_norm": 0.6055428009166745, "learning_rate": 1.9375029314125658e-05, "loss": 0.8267, "step": 5680 }, { "epoch": 0.12222365425097736, "grad_norm": 0.6034275177420789, "learning_rate": 1.9372655145366922e-05, "loss": 0.8252, "step": 5690 }, { "epoch": 0.12243845856424797, "grad_norm": 0.6100632464810285, "learning_rate": 1.93702766216234e-05, "loss": 0.8284, "step": 5700 }, { "epoch": 0.12265326287751858, "grad_norm": 0.6654383228152203, "learning_rate": 1.936789374400027e-05, "loss": 0.8124, "step": 5710 }, { "epoch": 0.12286806719078919, "grad_norm": 0.6202336240170405, "learning_rate": 1.9365506513604725e-05, "loss": 0.828, "step": 5720 }, { "epoch": 0.1230828715040598, "grad_norm": 0.6181456688535198, "learning_rate": 1.936311493154599e-05, "loss": 0.8446, "step": 5730 }, { "epoch": 0.12329767581733041, "grad_norm": 0.619325566527615, "learning_rate": 1.9360718998935315e-05, "loss": 0.835, "step": 5740 }, { "epoch": 0.12351248013060102, "grad_norm": 0.6293306658393487, "learning_rate": 1.9358318716885955e-05, "loss": 0.8225, "step": 5750 }, { "epoch": 0.12372728444387163, "grad_norm": 0.6231931933033569, "learning_rate": 1.9355914086513205e-05, "loss": 0.845, "step": 5760 }, { "epoch": 0.12394208875714224, "grad_norm": 0.6298812123623225, "learning_rate": 1.9353505108934363e-05, "loss": 0.829, "step": 5770 }, { "epoch": 0.12415689307041286, "grad_norm": 0.6200961201702063, "learning_rate": 1.9351091785268762e-05, "loss": 0.8269, "step": 5780 }, { "epoch": 0.12437169738368346, "grad_norm": 0.6118430477057613, "learning_rate": 1.9348674116637747e-05, "loss": 0.8308, "step": 5790 }, { "epoch": 0.12458650169695408, "grad_norm": 0.6127169022228522, "learning_rate": 1.934625210416468e-05, "loss": 0.8188, "step": 5800 }, { "epoch": 0.12480130601022468, "grad_norm": 0.5858112550024495, "learning_rate": 1.9343825748974946e-05, "loss": 0.8225, "step": 5810 }, { "epoch": 0.1250161103234953, "grad_norm": 0.6431362667704915, "learning_rate": 1.9341395052195943e-05, "loss": 0.8251, "step": 5820 }, { "epoch": 0.12523091463676592, "grad_norm": 0.6177184812180734, "learning_rate": 1.9338960014957094e-05, "loss": 0.8117, "step": 5830 }, { "epoch": 0.1254457189500365, "grad_norm": 7.261555237494985, "learning_rate": 1.9336520638389828e-05, "loss": 0.8424, "step": 5840 }, { "epoch": 0.12566052326330712, "grad_norm": 0.6165137803342475, "learning_rate": 1.9334076923627602e-05, "loss": 0.8468, "step": 5850 }, { "epoch": 0.12587532757657774, "grad_norm": 0.6652429602651283, "learning_rate": 1.9331628871805882e-05, "loss": 0.8241, "step": 5860 }, { "epoch": 0.12609013188984836, "grad_norm": 0.5977417751851771, "learning_rate": 1.9329176484062147e-05, "loss": 0.815, "step": 5870 }, { "epoch": 0.12630493620311894, "grad_norm": 0.6143551332297007, "learning_rate": 1.9326719761535896e-05, "loss": 0.829, "step": 5880 }, { "epoch": 0.12651974051638956, "grad_norm": 0.5722771504606794, "learning_rate": 1.932425870536864e-05, "loss": 0.8276, "step": 5890 }, { "epoch": 0.12673454482966018, "grad_norm": 0.6233204435595209, "learning_rate": 1.9321793316703904e-05, "loss": 0.8257, "step": 5900 }, { "epoch": 0.1269493491429308, "grad_norm": 0.6060690392530359, "learning_rate": 1.9319323596687226e-05, "loss": 0.8361, "step": 5910 }, { "epoch": 0.1271641534562014, "grad_norm": 0.6503234853479353, "learning_rate": 1.9316849546466154e-05, "loss": 0.8534, "step": 5920 }, { "epoch": 0.127378957769472, "grad_norm": 0.6577873265981129, "learning_rate": 1.931437116719025e-05, "loss": 0.8263, "step": 5930 }, { "epoch": 0.12759376208274262, "grad_norm": 0.6504948800648593, "learning_rate": 1.9311888460011096e-05, "loss": 0.8294, "step": 5940 }, { "epoch": 0.12780856639601323, "grad_norm": 0.6317328887950812, "learning_rate": 1.9309401426082263e-05, "loss": 0.8313, "step": 5950 }, { "epoch": 0.12802337070928385, "grad_norm": 0.6211031800695695, "learning_rate": 1.9306910066559358e-05, "loss": 0.8355, "step": 5960 }, { "epoch": 0.12823817502255444, "grad_norm": 0.6055270936106261, "learning_rate": 1.9304414382599977e-05, "loss": 0.8246, "step": 5970 }, { "epoch": 0.12845297933582506, "grad_norm": 0.5997807600765583, "learning_rate": 1.9301914375363746e-05, "loss": 0.8056, "step": 5980 }, { "epoch": 0.12866778364909567, "grad_norm": 0.6211427083875907, "learning_rate": 1.9299410046012277e-05, "loss": 0.8315, "step": 5990 }, { "epoch": 0.1288825879623663, "grad_norm": 0.5688864716592172, "learning_rate": 1.9296901395709206e-05, "loss": 0.8257, "step": 6000 }, { "epoch": 0.1290973922756369, "grad_norm": 0.6012936384715789, "learning_rate": 1.929438842562017e-05, "loss": 0.8351, "step": 6010 }, { "epoch": 0.1293121965889075, "grad_norm": 0.6005677088042237, "learning_rate": 1.929187113691282e-05, "loss": 0.8357, "step": 6020 }, { "epoch": 0.1295270009021781, "grad_norm": 0.6160401614681437, "learning_rate": 1.9289349530756804e-05, "loss": 0.843, "step": 6030 }, { "epoch": 0.12974180521544873, "grad_norm": 0.6124623308197319, "learning_rate": 1.9286823608323785e-05, "loss": 0.8233, "step": 6040 }, { "epoch": 0.12995660952871935, "grad_norm": 0.6203159062875466, "learning_rate": 1.9284293370787424e-05, "loss": 0.8303, "step": 6050 }, { "epoch": 0.13017141384198994, "grad_norm": 0.6107868080049285, "learning_rate": 1.9281758819323393e-05, "loss": 0.8236, "step": 6060 }, { "epoch": 0.13038621815526055, "grad_norm": 0.6132700985840815, "learning_rate": 1.9279219955109366e-05, "loss": 0.8307, "step": 6070 }, { "epoch": 0.13060102246853117, "grad_norm": 0.6055289750075414, "learning_rate": 1.927667677932502e-05, "loss": 0.8127, "step": 6080 }, { "epoch": 0.1308158267818018, "grad_norm": 0.6116418582015993, "learning_rate": 1.9274129293152037e-05, "loss": 0.832, "step": 6090 }, { "epoch": 0.1310306310950724, "grad_norm": 0.6179783492037558, "learning_rate": 1.92715774977741e-05, "loss": 0.829, "step": 6100 }, { "epoch": 0.131245435408343, "grad_norm": 0.5940118498029715, "learning_rate": 1.9269021394376896e-05, "loss": 0.8176, "step": 6110 }, { "epoch": 0.1314602397216136, "grad_norm": 0.6031829297041618, "learning_rate": 1.9266460984148116e-05, "loss": 0.8279, "step": 6120 }, { "epoch": 0.13167504403488423, "grad_norm": 0.5930057021061649, "learning_rate": 1.9263896268277448e-05, "loss": 0.8139, "step": 6130 }, { "epoch": 0.13188984834815484, "grad_norm": 0.6095258155514939, "learning_rate": 1.9261327247956575e-05, "loss": 0.8163, "step": 6140 }, { "epoch": 0.13210465266142543, "grad_norm": 0.600045191978913, "learning_rate": 1.9258753924379196e-05, "loss": 0.8167, "step": 6150 }, { "epoch": 0.13231945697469605, "grad_norm": 0.6983897516026605, "learning_rate": 1.9256176298740997e-05, "loss": 0.828, "step": 6160 }, { "epoch": 0.13253426128796666, "grad_norm": 0.6132138821976615, "learning_rate": 1.925359437223967e-05, "loss": 0.8201, "step": 6170 }, { "epoch": 0.13274906560123728, "grad_norm": 0.6155911518334949, "learning_rate": 1.9251008146074895e-05, "loss": 0.822, "step": 6180 }, { "epoch": 0.13296386991450787, "grad_norm": 1.3805735351829016, "learning_rate": 1.924841762144836e-05, "loss": 0.8297, "step": 6190 }, { "epoch": 0.1331786742277785, "grad_norm": 0.7417405322736338, "learning_rate": 1.924582279956375e-05, "loss": 0.8178, "step": 6200 }, { "epoch": 0.1333934785410491, "grad_norm": 0.6590382875105805, "learning_rate": 1.9243223681626734e-05, "loss": 0.8177, "step": 6210 }, { "epoch": 0.13360828285431972, "grad_norm": 0.5947961735015622, "learning_rate": 1.9240620268845e-05, "loss": 0.8375, "step": 6220 }, { "epoch": 0.13382308716759034, "grad_norm": 0.6151652104414234, "learning_rate": 1.9238012562428204e-05, "loss": 0.8136, "step": 6230 }, { "epoch": 0.13403789148086093, "grad_norm": 0.6081687195694883, "learning_rate": 1.9235400563588027e-05, "loss": 0.8092, "step": 6240 }, { "epoch": 0.13425269579413154, "grad_norm": 0.609223276457847, "learning_rate": 1.923278427353812e-05, "loss": 0.8176, "step": 6250 }, { "epoch": 0.13446750010740216, "grad_norm": 0.6057832795531277, "learning_rate": 1.9230163693494134e-05, "loss": 0.8245, "step": 6260 }, { "epoch": 0.13468230442067278, "grad_norm": 0.7218938650204069, "learning_rate": 1.922753882467372e-05, "loss": 0.829, "step": 6270 }, { "epoch": 0.13489710873394337, "grad_norm": 0.5956376678659784, "learning_rate": 1.9224909668296518e-05, "loss": 0.8323, "step": 6280 }, { "epoch": 0.13511191304721398, "grad_norm": 0.6055774590690349, "learning_rate": 1.922227622558416e-05, "loss": 0.8325, "step": 6290 }, { "epoch": 0.1353267173604846, "grad_norm": 0.63594852369417, "learning_rate": 1.9219638497760272e-05, "loss": 0.8236, "step": 6300 }, { "epoch": 0.13554152167375522, "grad_norm": 0.6615251477336661, "learning_rate": 1.921699648605046e-05, "loss": 0.8299, "step": 6310 }, { "epoch": 0.13575632598702583, "grad_norm": 0.5939888969308088, "learning_rate": 1.921435019168234e-05, "loss": 0.8314, "step": 6320 }, { "epoch": 0.13597113030029642, "grad_norm": 0.6224118503409543, "learning_rate": 1.9211699615885505e-05, "loss": 0.8105, "step": 6330 }, { "epoch": 0.13618593461356704, "grad_norm": 0.6349901674353197, "learning_rate": 1.920904475989153e-05, "loss": 0.8267, "step": 6340 }, { "epoch": 0.13640073892683766, "grad_norm": 0.6184766342855774, "learning_rate": 1.9206385624934002e-05, "loss": 0.8235, "step": 6350 }, { "epoch": 0.13661554324010827, "grad_norm": 0.593593688668399, "learning_rate": 1.9203722212248473e-05, "loss": 0.8179, "step": 6360 }, { "epoch": 0.13683034755337886, "grad_norm": 0.5904445744420688, "learning_rate": 1.9201054523072497e-05, "loss": 0.8278, "step": 6370 }, { "epoch": 0.13704515186664948, "grad_norm": 0.6197705191484348, "learning_rate": 1.9198382558645608e-05, "loss": 0.8315, "step": 6380 }, { "epoch": 0.1372599561799201, "grad_norm": 0.5970982729709278, "learning_rate": 1.9195706320209326e-05, "loss": 0.823, "step": 6390 }, { "epoch": 0.1374747604931907, "grad_norm": 0.5860292711215872, "learning_rate": 1.9193025809007164e-05, "loss": 0.8029, "step": 6400 }, { "epoch": 0.1376895648064613, "grad_norm": 0.5880366108161124, "learning_rate": 1.919034102628462e-05, "loss": 0.8206, "step": 6410 }, { "epoch": 0.13790436911973192, "grad_norm": 0.6454586755047972, "learning_rate": 1.918765197328916e-05, "loss": 0.8252, "step": 6420 }, { "epoch": 0.13811917343300253, "grad_norm": 0.5855160919576355, "learning_rate": 1.918495865127026e-05, "loss": 0.8177, "step": 6430 }, { "epoch": 0.13833397774627315, "grad_norm": 0.568749463356773, "learning_rate": 1.9182261061479357e-05, "loss": 0.8168, "step": 6440 }, { "epoch": 0.13854878205954377, "grad_norm": 0.578722846016812, "learning_rate": 1.9179559205169885e-05, "loss": 0.8283, "step": 6450 }, { "epoch": 0.13876358637281436, "grad_norm": 0.5933587203351999, "learning_rate": 1.9176853083597257e-05, "loss": 0.8366, "step": 6460 }, { "epoch": 0.13897839068608497, "grad_norm": 0.679493941830241, "learning_rate": 1.9174142698018864e-05, "loss": 0.8439, "step": 6470 }, { "epoch": 0.1391931949993556, "grad_norm": 0.6027286672996572, "learning_rate": 1.9171428049694082e-05, "loss": 0.8181, "step": 6480 }, { "epoch": 0.1394079993126262, "grad_norm": 0.6104128555070933, "learning_rate": 1.916870913988427e-05, "loss": 0.8276, "step": 6490 }, { "epoch": 0.1396228036258968, "grad_norm": 0.6013008901028725, "learning_rate": 1.9165985969852757e-05, "loss": 0.8274, "step": 6500 }, { "epoch": 0.1398376079391674, "grad_norm": 0.6122352927576057, "learning_rate": 1.916325854086486e-05, "loss": 0.824, "step": 6510 }, { "epoch": 0.14005241225243803, "grad_norm": 0.6300545859500599, "learning_rate": 1.916052685418788e-05, "loss": 0.8308, "step": 6520 }, { "epoch": 0.14026721656570865, "grad_norm": 0.583338621183778, "learning_rate": 1.9157790911091082e-05, "loss": 0.8186, "step": 6530 }, { "epoch": 0.14048202087897926, "grad_norm": 0.5947321272096432, "learning_rate": 1.9155050712845722e-05, "loss": 0.8141, "step": 6540 }, { "epoch": 0.14069682519224985, "grad_norm": 0.6508513713242783, "learning_rate": 1.915230626072502e-05, "loss": 0.8241, "step": 6550 }, { "epoch": 0.14091162950552047, "grad_norm": 0.5902491680834664, "learning_rate": 1.914955755600419e-05, "loss": 0.812, "step": 6560 }, { "epoch": 0.14112643381879109, "grad_norm": 0.6223882649259539, "learning_rate": 1.91468045999604e-05, "loss": 0.8205, "step": 6570 }, { "epoch": 0.1413412381320617, "grad_norm": 0.6039015945397973, "learning_rate": 1.9144047393872818e-05, "loss": 0.8254, "step": 6580 }, { "epoch": 0.1415560424453323, "grad_norm": 4.480885455321173, "learning_rate": 1.9141285939022563e-05, "loss": 0.8123, "step": 6590 }, { "epoch": 0.1417708467586029, "grad_norm": 0.6004465875420536, "learning_rate": 1.9138520236692747e-05, "loss": 0.8316, "step": 6600 }, { "epoch": 0.14198565107187353, "grad_norm": 0.5877543081764919, "learning_rate": 1.9135750288168446e-05, "loss": 0.8194, "step": 6610 }, { "epoch": 0.14220045538514414, "grad_norm": 0.6060063198959242, "learning_rate": 1.9132976094736707e-05, "loss": 0.8122, "step": 6620 }, { "epoch": 0.14241525969841473, "grad_norm": 0.6105433238582144, "learning_rate": 1.9130197657686555e-05, "loss": 0.8144, "step": 6630 }, { "epoch": 0.14263006401168535, "grad_norm": 0.5854211970675489, "learning_rate": 1.9127414978308987e-05, "loss": 0.826, "step": 6640 }, { "epoch": 0.14284486832495596, "grad_norm": 0.6132307714509498, "learning_rate": 1.9124628057896972e-05, "loss": 0.8096, "step": 6650 }, { "epoch": 0.14305967263822658, "grad_norm": 0.6097804703131902, "learning_rate": 1.912183689774544e-05, "loss": 0.8137, "step": 6660 }, { "epoch": 0.1432744769514972, "grad_norm": 0.5755382542472285, "learning_rate": 1.91190414991513e-05, "loss": 0.8191, "step": 6670 }, { "epoch": 0.1434892812647678, "grad_norm": 0.6077737497744028, "learning_rate": 1.9116241863413433e-05, "loss": 0.8327, "step": 6680 }, { "epoch": 0.1437040855780384, "grad_norm": 0.6330293724055359, "learning_rate": 1.9113437991832678e-05, "loss": 0.8159, "step": 6690 }, { "epoch": 0.14391888989130902, "grad_norm": 0.6019952588522437, "learning_rate": 1.911062988571185e-05, "loss": 0.8103, "step": 6700 }, { "epoch": 0.14413369420457964, "grad_norm": 0.6157593320641529, "learning_rate": 1.9107817546355726e-05, "loss": 0.8192, "step": 6710 }, { "epoch": 0.14434849851785023, "grad_norm": 0.6262865372100354, "learning_rate": 1.9105000975071062e-05, "loss": 0.8102, "step": 6720 }, { "epoch": 0.14456330283112084, "grad_norm": 0.598604944782719, "learning_rate": 1.9102180173166565e-05, "loss": 0.8178, "step": 6730 }, { "epoch": 0.14477810714439146, "grad_norm": 0.6130319012297948, "learning_rate": 1.909935514195292e-05, "loss": 0.8146, "step": 6740 }, { "epoch": 0.14499291145766208, "grad_norm": 0.6051680516635761, "learning_rate": 1.9096525882742766e-05, "loss": 0.8243, "step": 6750 }, { "epoch": 0.1452077157709327, "grad_norm": 0.5945658988091994, "learning_rate": 1.909369239685072e-05, "loss": 0.8215, "step": 6760 }, { "epoch": 0.14542252008420328, "grad_norm": 0.6014406118512295, "learning_rate": 1.9090854685593344e-05, "loss": 0.8152, "step": 6770 }, { "epoch": 0.1456373243974739, "grad_norm": 0.6069056436911239, "learning_rate": 1.9088012750289185e-05, "loss": 0.8089, "step": 6780 }, { "epoch": 0.14585212871074452, "grad_norm": 0.6384017208108504, "learning_rate": 1.908516659225874e-05, "loss": 0.8185, "step": 6790 }, { "epoch": 0.14606693302401513, "grad_norm": 0.6183552826731581, "learning_rate": 1.9082316212824467e-05, "loss": 0.829, "step": 6800 }, { "epoch": 0.14628173733728572, "grad_norm": 0.6033726613256812, "learning_rate": 1.9079461613310793e-05, "loss": 0.8368, "step": 6810 }, { "epoch": 0.14649654165055634, "grad_norm": 0.6767597702311462, "learning_rate": 1.90766027950441e-05, "loss": 0.8312, "step": 6820 }, { "epoch": 0.14671134596382696, "grad_norm": 0.6047536333583156, "learning_rate": 1.9073739759352728e-05, "loss": 0.8334, "step": 6830 }, { "epoch": 0.14692615027709757, "grad_norm": 0.5868050666630678, "learning_rate": 1.9070872507566988e-05, "loss": 0.8243, "step": 6840 }, { "epoch": 0.14714095459036816, "grad_norm": 0.5987664541670297, "learning_rate": 1.906800104101914e-05, "loss": 0.8259, "step": 6850 }, { "epoch": 0.14735575890363878, "grad_norm": 0.6171104544958446, "learning_rate": 1.9065125361043403e-05, "loss": 0.8265, "step": 6860 }, { "epoch": 0.1475705632169094, "grad_norm": 0.5898285999163112, "learning_rate": 1.9062245468975958e-05, "loss": 0.8208, "step": 6870 }, { "epoch": 0.14778536753018, "grad_norm": 0.6178554712481548, "learning_rate": 1.9059361366154937e-05, "loss": 0.8269, "step": 6880 }, { "epoch": 0.14800017184345063, "grad_norm": 0.5673710580373017, "learning_rate": 1.9056473053920436e-05, "loss": 0.8027, "step": 6890 }, { "epoch": 0.14821497615672122, "grad_norm": 0.599452012891556, "learning_rate": 1.9053580533614502e-05, "loss": 0.841, "step": 6900 }, { "epoch": 0.14842978046999183, "grad_norm": 0.5874536149399996, "learning_rate": 1.905068380658114e-05, "loss": 0.805, "step": 6910 }, { "epoch": 0.14864458478326245, "grad_norm": 0.5970361855515987, "learning_rate": 1.9047782874166308e-05, "loss": 0.8067, "step": 6920 }, { "epoch": 0.14885938909653307, "grad_norm": 0.6177964172907193, "learning_rate": 1.9044877737717916e-05, "loss": 0.8372, "step": 6930 }, { "epoch": 0.14907419340980366, "grad_norm": 0.5961958626534212, "learning_rate": 1.904196839858583e-05, "loss": 0.8336, "step": 6940 }, { "epoch": 0.14928899772307427, "grad_norm": 0.6016838781672733, "learning_rate": 1.9039054858121872e-05, "loss": 0.8138, "step": 6950 }, { "epoch": 0.1495038020363449, "grad_norm": 0.5772199742586428, "learning_rate": 1.903613711767981e-05, "loss": 0.8061, "step": 6960 }, { "epoch": 0.1497186063496155, "grad_norm": 0.6091085341764657, "learning_rate": 1.9033215178615363e-05, "loss": 0.8292, "step": 6970 }, { "epoch": 0.14993341066288612, "grad_norm": 0.6111170798001496, "learning_rate": 1.903028904228621e-05, "loss": 0.8123, "step": 6980 }, { "epoch": 0.1501482149761567, "grad_norm": 0.641212096959498, "learning_rate": 1.902735871005197e-05, "loss": 0.8281, "step": 6990 }, { "epoch": 0.15036301928942733, "grad_norm": 0.601677168468839, "learning_rate": 1.9024424183274216e-05, "loss": 0.8302, "step": 7000 }, { "epoch": 0.15057782360269795, "grad_norm": 0.6001519478847499, "learning_rate": 1.9021485463316468e-05, "loss": 0.8328, "step": 7010 }, { "epoch": 0.15079262791596856, "grad_norm": 0.590756165100162, "learning_rate": 1.9018542551544205e-05, "loss": 0.8283, "step": 7020 }, { "epoch": 0.15100743222923915, "grad_norm": 0.5930250026896874, "learning_rate": 1.9015595449324837e-05, "loss": 0.8211, "step": 7030 }, { "epoch": 0.15122223654250977, "grad_norm": 0.6342243168511045, "learning_rate": 1.9012644158027727e-05, "loss": 0.8136, "step": 7040 }, { "epoch": 0.15143704085578039, "grad_norm": 0.6288449639482769, "learning_rate": 1.900968867902419e-05, "loss": 0.8161, "step": 7050 }, { "epoch": 0.151651845169051, "grad_norm": 0.5885895763623495, "learning_rate": 1.9006729013687488e-05, "loss": 0.823, "step": 7060 }, { "epoch": 0.1518666494823216, "grad_norm": 0.5945227181413594, "learning_rate": 1.900376516339282e-05, "loss": 0.8184, "step": 7070 }, { "epoch": 0.1520814537955922, "grad_norm": 0.6038107671354757, "learning_rate": 1.9000797129517326e-05, "loss": 0.8338, "step": 7080 }, { "epoch": 0.15229625810886283, "grad_norm": 0.6179750378509373, "learning_rate": 1.8997824913440102e-05, "loss": 0.819, "step": 7090 }, { "epoch": 0.15251106242213344, "grad_norm": 0.580594427759617, "learning_rate": 1.8994848516542187e-05, "loss": 0.8116, "step": 7100 }, { "epoch": 0.15272586673540406, "grad_norm": 0.6629610839791461, "learning_rate": 1.899186794020655e-05, "loss": 0.8164, "step": 7110 }, { "epoch": 0.15294067104867465, "grad_norm": 0.630038582706391, "learning_rate": 1.898888318581811e-05, "loss": 0.8197, "step": 7120 }, { "epoch": 0.15315547536194526, "grad_norm": 0.6060433882973041, "learning_rate": 1.8985894254763734e-05, "loss": 0.8345, "step": 7130 }, { "epoch": 0.15337027967521588, "grad_norm": 0.5763806285603041, "learning_rate": 1.8982901148432214e-05, "loss": 0.8195, "step": 7140 }, { "epoch": 0.1535850839884865, "grad_norm": 0.5858697966874623, "learning_rate": 1.89799038682143e-05, "loss": 0.8108, "step": 7150 }, { "epoch": 0.1537998883017571, "grad_norm": 0.6192381232542434, "learning_rate": 1.8976902415502664e-05, "loss": 0.8096, "step": 7160 }, { "epoch": 0.1540146926150277, "grad_norm": 0.5775749857070134, "learning_rate": 1.8973896791691925e-05, "loss": 0.8152, "step": 7170 }, { "epoch": 0.15422949692829832, "grad_norm": 0.6222213814962546, "learning_rate": 1.8970886998178648e-05, "loss": 0.8409, "step": 7180 }, { "epoch": 0.15444430124156894, "grad_norm": 0.6088269913390629, "learning_rate": 1.8967873036361316e-05, "loss": 0.8327, "step": 7190 }, { "epoch": 0.15465910555483955, "grad_norm": 0.5930210210719912, "learning_rate": 1.896485490764037e-05, "loss": 0.8115, "step": 7200 }, { "epoch": 0.15487390986811014, "grad_norm": 0.6000706146439299, "learning_rate": 1.8961832613418173e-05, "loss": 0.8238, "step": 7210 }, { "epoch": 0.15508871418138076, "grad_norm": 0.6006800299840814, "learning_rate": 1.895880615509903e-05, "loss": 0.8145, "step": 7220 }, { "epoch": 0.15530351849465138, "grad_norm": 0.6451495489221977, "learning_rate": 1.8955775534089168e-05, "loss": 0.8222, "step": 7230 }, { "epoch": 0.155518322807922, "grad_norm": 0.5789768463565536, "learning_rate": 1.8952740751796776e-05, "loss": 0.8025, "step": 7240 }, { "epoch": 0.15573312712119258, "grad_norm": 0.5853169790059398, "learning_rate": 1.8949701809631945e-05, "loss": 0.8204, "step": 7250 }, { "epoch": 0.1559479314344632, "grad_norm": 0.5695304385263977, "learning_rate": 1.894665870900672e-05, "loss": 0.8125, "step": 7260 }, { "epoch": 0.15616273574773382, "grad_norm": 0.6049599632857573, "learning_rate": 1.8943611451335075e-05, "loss": 0.8107, "step": 7270 }, { "epoch": 0.15637754006100443, "grad_norm": 0.573259462488128, "learning_rate": 1.89405600380329e-05, "loss": 0.8135, "step": 7280 }, { "epoch": 0.15659234437427502, "grad_norm": 0.5940723497985662, "learning_rate": 1.893750447051804e-05, "loss": 0.808, "step": 7290 }, { "epoch": 0.15680714868754564, "grad_norm": 0.641894313123313, "learning_rate": 1.893444475021025e-05, "loss": 0.8177, "step": 7300 }, { "epoch": 0.15702195300081626, "grad_norm": 0.5858730208906369, "learning_rate": 1.8931380878531228e-05, "loss": 0.8286, "step": 7310 }, { "epoch": 0.15723675731408687, "grad_norm": 0.582476355104475, "learning_rate": 1.892831285690459e-05, "loss": 0.8257, "step": 7320 }, { "epoch": 0.1574515616273575, "grad_norm": 0.5832307163787261, "learning_rate": 1.892524068675589e-05, "loss": 0.8007, "step": 7330 }, { "epoch": 0.15766636594062808, "grad_norm": 0.5790768352438785, "learning_rate": 1.892216436951261e-05, "loss": 0.7953, "step": 7340 }, { "epoch": 0.1578811702538987, "grad_norm": 0.633690146863657, "learning_rate": 1.8919083906604144e-05, "loss": 0.8205, "step": 7350 }, { "epoch": 0.1580959745671693, "grad_norm": 0.617991296431158, "learning_rate": 1.891599929946183e-05, "loss": 0.8098, "step": 7360 }, { "epoch": 0.15831077888043993, "grad_norm": 0.5974548816213079, "learning_rate": 1.8912910549518924e-05, "loss": 0.8079, "step": 7370 }, { "epoch": 0.15852558319371052, "grad_norm": 0.6020174180448247, "learning_rate": 1.89098176582106e-05, "loss": 0.8177, "step": 7380 }, { "epoch": 0.15874038750698113, "grad_norm": 0.5899662948515287, "learning_rate": 1.8906720626973975e-05, "loss": 0.8175, "step": 7390 }, { "epoch": 0.15895519182025175, "grad_norm": 0.5872850170519595, "learning_rate": 1.8903619457248072e-05, "loss": 0.8005, "step": 7400 }, { "epoch": 0.15916999613352237, "grad_norm": 0.5852430036799516, "learning_rate": 1.890051415047384e-05, "loss": 0.8219, "step": 7410 }, { "epoch": 0.15938480044679298, "grad_norm": 0.5917524465188728, "learning_rate": 1.8897404708094154e-05, "loss": 0.8072, "step": 7420 }, { "epoch": 0.15959960476006357, "grad_norm": 0.5508421705343692, "learning_rate": 1.8894291131553817e-05, "loss": 0.8198, "step": 7430 }, { "epoch": 0.1598144090733342, "grad_norm": 0.5852065142165715, "learning_rate": 1.889117342229954e-05, "loss": 0.8194, "step": 7440 }, { "epoch": 0.1600292133866048, "grad_norm": 0.5801695841837192, "learning_rate": 1.8888051581779964e-05, "loss": 0.8203, "step": 7450 }, { "epoch": 0.16024401769987542, "grad_norm": 0.602952041149204, "learning_rate": 1.888492561144564e-05, "loss": 0.8059, "step": 7460 }, { "epoch": 0.160458822013146, "grad_norm": 0.6431449708685523, "learning_rate": 1.8881795512749046e-05, "loss": 0.8326, "step": 7470 }, { "epoch": 0.16067362632641663, "grad_norm": 0.6196861452682948, "learning_rate": 1.887866128714458e-05, "loss": 0.8457, "step": 7480 }, { "epoch": 0.16088843063968725, "grad_norm": 0.5649025093930304, "learning_rate": 1.887552293608855e-05, "loss": 0.8113, "step": 7490 }, { "epoch": 0.16110323495295786, "grad_norm": 0.63666785388803, "learning_rate": 1.8872380461039184e-05, "loss": 0.8264, "step": 7500 }, { "epoch": 0.16131803926622845, "grad_norm": 0.6005193455583923, "learning_rate": 1.8869233863456627e-05, "loss": 0.819, "step": 7510 }, { "epoch": 0.16153284357949907, "grad_norm": 0.6077387623232092, "learning_rate": 1.8866083144802938e-05, "loss": 0.8185, "step": 7520 }, { "epoch": 0.16174764789276969, "grad_norm": 0.5938972114256622, "learning_rate": 1.8862928306542093e-05, "loss": 0.8225, "step": 7530 }, { "epoch": 0.1619624522060403, "grad_norm": 0.6156548912606762, "learning_rate": 1.8859769350139982e-05, "loss": 0.8263, "step": 7540 }, { "epoch": 0.16217725651931092, "grad_norm": 0.6056374183349044, "learning_rate": 1.8856606277064407e-05, "loss": 0.8198, "step": 7550 }, { "epoch": 0.1623920608325815, "grad_norm": 0.5663370767459714, "learning_rate": 1.8853439088785084e-05, "loss": 0.8159, "step": 7560 }, { "epoch": 0.16260686514585213, "grad_norm": 0.5775902577797735, "learning_rate": 1.885026778677364e-05, "loss": 0.8139, "step": 7570 }, { "epoch": 0.16282166945912274, "grad_norm": 0.591831820384521, "learning_rate": 1.884709237250361e-05, "loss": 0.8176, "step": 7580 }, { "epoch": 0.16303647377239336, "grad_norm": 0.5987459436284528, "learning_rate": 1.884391284745045e-05, "loss": 0.8246, "step": 7590 }, { "epoch": 0.16325127808566395, "grad_norm": 0.5985317736090139, "learning_rate": 1.8840729213091514e-05, "loss": 0.8211, "step": 7600 }, { "epoch": 0.16346608239893456, "grad_norm": 0.5967857623504043, "learning_rate": 1.8837541470906076e-05, "loss": 0.8068, "step": 7610 }, { "epoch": 0.16368088671220518, "grad_norm": 0.5809065765322244, "learning_rate": 1.883434962237531e-05, "loss": 0.8008, "step": 7620 }, { "epoch": 0.1638956910254758, "grad_norm": 0.5851551177907041, "learning_rate": 1.8831153668982304e-05, "loss": 0.8204, "step": 7630 }, { "epoch": 0.16411049533874641, "grad_norm": 0.5917914776651239, "learning_rate": 1.882795361221205e-05, "loss": 0.8209, "step": 7640 }, { "epoch": 0.164325299652017, "grad_norm": 0.6224876855421713, "learning_rate": 1.882474945355145e-05, "loss": 0.7925, "step": 7650 }, { "epoch": 0.16454010396528762, "grad_norm": 0.5603554031670195, "learning_rate": 1.8821541194489307e-05, "loss": 0.8085, "step": 7660 }, { "epoch": 0.16475490827855824, "grad_norm": 0.5489520906362242, "learning_rate": 1.8818328836516334e-05, "loss": 0.8161, "step": 7670 }, { "epoch": 0.16496971259182885, "grad_norm": 0.6169881588767844, "learning_rate": 1.8815112381125146e-05, "loss": 0.8297, "step": 7680 }, { "epoch": 0.16518451690509944, "grad_norm": 0.5901587326877425, "learning_rate": 1.8811891829810257e-05, "loss": 0.8172, "step": 7690 }, { "epoch": 0.16539932121837006, "grad_norm": 0.561361319802584, "learning_rate": 1.8808667184068098e-05, "loss": 0.8172, "step": 7700 }, { "epoch": 0.16561412553164068, "grad_norm": 0.5859380687978089, "learning_rate": 1.880543844539699e-05, "loss": 0.8145, "step": 7710 }, { "epoch": 0.1658289298449113, "grad_norm": 0.5693560253720011, "learning_rate": 1.880220561529716e-05, "loss": 0.8259, "step": 7720 }, { "epoch": 0.16604373415818188, "grad_norm": 0.6279685202690973, "learning_rate": 1.8798968695270735e-05, "loss": 0.8073, "step": 7730 }, { "epoch": 0.1662585384714525, "grad_norm": 0.5868609499267986, "learning_rate": 1.879572768682174e-05, "loss": 0.8045, "step": 7740 }, { "epoch": 0.16647334278472312, "grad_norm": 0.5949413009843553, "learning_rate": 1.8792482591456115e-05, "loss": 0.821, "step": 7750 }, { "epoch": 0.16668814709799373, "grad_norm": 0.5734770401983357, "learning_rate": 1.8789233410681675e-05, "loss": 0.8037, "step": 7760 }, { "epoch": 0.16690295141126435, "grad_norm": 0.6165809896599992, "learning_rate": 1.8785980146008146e-05, "loss": 0.8229, "step": 7770 }, { "epoch": 0.16711775572453494, "grad_norm": 0.5747676549015432, "learning_rate": 1.8782722798947154e-05, "loss": 0.8145, "step": 7780 }, { "epoch": 0.16733256003780556, "grad_norm": 0.587504015936523, "learning_rate": 1.8779461371012224e-05, "loss": 0.8096, "step": 7790 }, { "epoch": 0.16754736435107617, "grad_norm": 0.6174090349249043, "learning_rate": 1.877619586371876e-05, "loss": 0.8142, "step": 7800 }, { "epoch": 0.1677621686643468, "grad_norm": 0.5847105333987745, "learning_rate": 1.8772926278584077e-05, "loss": 0.8247, "step": 7810 }, { "epoch": 0.16797697297761738, "grad_norm": 0.588578056608747, "learning_rate": 1.8769652617127388e-05, "loss": 0.8043, "step": 7820 }, { "epoch": 0.168191777290888, "grad_norm": 0.577693735778559, "learning_rate": 1.8766374880869785e-05, "loss": 0.8224, "step": 7830 }, { "epoch": 0.1684065816041586, "grad_norm": 0.5827121728285275, "learning_rate": 1.8763093071334263e-05, "loss": 0.8016, "step": 7840 }, { "epoch": 0.16862138591742923, "grad_norm": 0.5897150773152798, "learning_rate": 1.8759807190045715e-05, "loss": 0.8078, "step": 7850 }, { "epoch": 0.16883619023069985, "grad_norm": 0.5759666753347583, "learning_rate": 1.8756517238530904e-05, "loss": 0.8072, "step": 7860 }, { "epoch": 0.16905099454397043, "grad_norm": 0.5818077020634499, "learning_rate": 1.8753223218318515e-05, "loss": 0.8117, "step": 7870 }, { "epoch": 0.16926579885724105, "grad_norm": 0.6076398421069771, "learning_rate": 1.87499251309391e-05, "loss": 0.8033, "step": 7880 }, { "epoch": 0.16948060317051167, "grad_norm": 0.5810094500721327, "learning_rate": 1.874662297792511e-05, "loss": 0.8123, "step": 7890 }, { "epoch": 0.16969540748378228, "grad_norm": 0.6001843216635706, "learning_rate": 1.874331676081088e-05, "loss": 0.8239, "step": 7900 }, { "epoch": 0.16991021179705287, "grad_norm": 0.6000314594312535, "learning_rate": 1.874000648113264e-05, "loss": 0.8276, "step": 7910 }, { "epoch": 0.1701250161103235, "grad_norm": 0.5742460502628838, "learning_rate": 1.8736692140428506e-05, "loss": 0.8053, "step": 7920 }, { "epoch": 0.1703398204235941, "grad_norm": 0.5832371696394795, "learning_rate": 1.873337374023848e-05, "loss": 0.8116, "step": 7930 }, { "epoch": 0.17055462473686472, "grad_norm": 0.5845557501110522, "learning_rate": 1.8730051282104446e-05, "loss": 0.8221, "step": 7940 }, { "epoch": 0.1707694290501353, "grad_norm": 0.6092794931268248, "learning_rate": 1.8726724767570178e-05, "loss": 0.8034, "step": 7950 }, { "epoch": 0.17098423336340593, "grad_norm": 0.5941748660659438, "learning_rate": 1.8723394198181333e-05, "loss": 0.7995, "step": 7960 }, { "epoch": 0.17119903767667655, "grad_norm": 0.5742787477493947, "learning_rate": 1.8720059575485462e-05, "loss": 0.8093, "step": 7970 }, { "epoch": 0.17141384198994716, "grad_norm": 0.5862196213050433, "learning_rate": 1.8716720901031983e-05, "loss": 0.82, "step": 7980 }, { "epoch": 0.17162864630321778, "grad_norm": 0.595590568398061, "learning_rate": 1.8713378176372198e-05, "loss": 0.8208, "step": 7990 }, { "epoch": 0.17184345061648837, "grad_norm": 0.6014060853390284, "learning_rate": 1.871003140305931e-05, "loss": 0.8243, "step": 8000 }, { "epoch": 0.17205825492975899, "grad_norm": 0.5885951381362904, "learning_rate": 1.870668058264838e-05, "loss": 0.8184, "step": 8010 }, { "epoch": 0.1722730592430296, "grad_norm": 0.5675767542282781, "learning_rate": 1.870332571669637e-05, "loss": 0.8353, "step": 8020 }, { "epoch": 0.17248786355630022, "grad_norm": 0.5885374361788073, "learning_rate": 1.86999668067621e-05, "loss": 0.8067, "step": 8030 }, { "epoch": 0.1727026678695708, "grad_norm": 0.5725047507144028, "learning_rate": 1.869660385440629e-05, "loss": 0.809, "step": 8040 }, { "epoch": 0.17291747218284143, "grad_norm": 0.5669767417801742, "learning_rate": 1.8693236861191524e-05, "loss": 0.7987, "step": 8050 }, { "epoch": 0.17313227649611204, "grad_norm": 0.6015883976560725, "learning_rate": 1.8689865828682266e-05, "loss": 0.8249, "step": 8060 }, { "epoch": 0.17334708080938266, "grad_norm": 0.5761246694287212, "learning_rate": 1.868649075844487e-05, "loss": 0.8161, "step": 8070 }, { "epoch": 0.17356188512265328, "grad_norm": 0.5969047559623099, "learning_rate": 1.8683111652047543e-05, "loss": 0.8361, "step": 8080 }, { "epoch": 0.17377668943592386, "grad_norm": 0.5929471728979538, "learning_rate": 1.8679728511060385e-05, "loss": 0.8157, "step": 8090 }, { "epoch": 0.17399149374919448, "grad_norm": 0.5709547291713754, "learning_rate": 1.8676341337055367e-05, "loss": 0.7972, "step": 8100 }, { "epoch": 0.1742062980624651, "grad_norm": 0.5792794867839971, "learning_rate": 1.867295013160633e-05, "loss": 0.8236, "step": 8110 }, { "epoch": 0.17442110237573571, "grad_norm": 0.580543068320473, "learning_rate": 1.866955489628899e-05, "loss": 0.8322, "step": 8120 }, { "epoch": 0.1746359066890063, "grad_norm": 0.5795792932547941, "learning_rate": 1.8666155632680938e-05, "loss": 0.8128, "step": 8130 }, { "epoch": 0.17485071100227692, "grad_norm": 0.5800071115326852, "learning_rate": 1.8662752342361633e-05, "loss": 0.8164, "step": 8140 }, { "epoch": 0.17506551531554754, "grad_norm": 0.5845246581742584, "learning_rate": 1.865934502691241e-05, "loss": 0.8117, "step": 8150 }, { "epoch": 0.17528031962881815, "grad_norm": 0.5894935518444326, "learning_rate": 1.865593368791647e-05, "loss": 0.8063, "step": 8160 }, { "epoch": 0.17549512394208877, "grad_norm": 0.5877616770415447, "learning_rate": 1.8652518326958882e-05, "loss": 0.8112, "step": 8170 }, { "epoch": 0.17570992825535936, "grad_norm": 0.6072757728168394, "learning_rate": 1.8649098945626588e-05, "loss": 0.8029, "step": 8180 }, { "epoch": 0.17592473256862998, "grad_norm": 0.6010925475932754, "learning_rate": 1.8645675545508397e-05, "loss": 0.8162, "step": 8190 }, { "epoch": 0.1761395368819006, "grad_norm": 0.5718419491303891, "learning_rate": 1.864224812819498e-05, "loss": 0.8124, "step": 8200 }, { "epoch": 0.1763543411951712, "grad_norm": 0.5527383739528845, "learning_rate": 1.863881669527889e-05, "loss": 0.808, "step": 8210 }, { "epoch": 0.1765691455084418, "grad_norm": 0.573823793660002, "learning_rate": 1.8635381248354525e-05, "loss": 0.8089, "step": 8220 }, { "epoch": 0.17678394982171242, "grad_norm": 0.5638340296138096, "learning_rate": 1.8631941789018162e-05, "loss": 0.7961, "step": 8230 }, { "epoch": 0.17699875413498303, "grad_norm": 0.5646934644189842, "learning_rate": 1.862849831886794e-05, "loss": 0.8187, "step": 8240 }, { "epoch": 0.17721355844825365, "grad_norm": 0.5729549283565889, "learning_rate": 1.862505083950386e-05, "loss": 0.7946, "step": 8250 }, { "epoch": 0.17742836276152424, "grad_norm": 0.5697299724643171, "learning_rate": 1.8621599352527783e-05, "loss": 0.7989, "step": 8260 }, { "epoch": 0.17764316707479486, "grad_norm": 0.5742935431980787, "learning_rate": 1.8618143859543436e-05, "loss": 0.8068, "step": 8270 }, { "epoch": 0.17785797138806547, "grad_norm": 0.6022944664204131, "learning_rate": 1.861468436215641e-05, "loss": 0.8316, "step": 8280 }, { "epoch": 0.1780727757013361, "grad_norm": 0.6181521941813161, "learning_rate": 1.861122086197415e-05, "loss": 0.8297, "step": 8290 }, { "epoch": 0.1782875800146067, "grad_norm": 0.5854718908960673, "learning_rate": 1.8607753360605968e-05, "loss": 0.8139, "step": 8300 }, { "epoch": 0.1785023843278773, "grad_norm": 0.5677926977124813, "learning_rate": 1.8604281859663027e-05, "loss": 0.8067, "step": 8310 }, { "epoch": 0.1787171886411479, "grad_norm": 1.2822660953314349, "learning_rate": 1.8600806360758355e-05, "loss": 0.7901, "step": 8320 }, { "epoch": 0.17893199295441853, "grad_norm": 0.561871053213264, "learning_rate": 1.8597326865506838e-05, "loss": 0.8073, "step": 8330 }, { "epoch": 0.17914679726768915, "grad_norm": 0.5900981652272886, "learning_rate": 1.8593843375525205e-05, "loss": 0.8019, "step": 8340 }, { "epoch": 0.17936160158095973, "grad_norm": 0.6033017106283523, "learning_rate": 1.8590355892432068e-05, "loss": 0.8129, "step": 8350 }, { "epoch": 0.17957640589423035, "grad_norm": 0.5755568064961115, "learning_rate": 1.858686441784787e-05, "loss": 0.825, "step": 8360 }, { "epoch": 0.17979121020750097, "grad_norm": 0.5837961244746128, "learning_rate": 1.8583368953394917e-05, "loss": 0.8152, "step": 8370 }, { "epoch": 0.18000601452077158, "grad_norm": 0.5830801987993122, "learning_rate": 1.857986950069737e-05, "loss": 0.7978, "step": 8380 }, { "epoch": 0.1802208188340422, "grad_norm": 0.6071324798915776, "learning_rate": 1.8576366061381246e-05, "loss": 0.8173, "step": 8390 }, { "epoch": 0.1804356231473128, "grad_norm": 0.5787242342053142, "learning_rate": 1.857285863707441e-05, "loss": 0.8266, "step": 8400 }, { "epoch": 0.1806504274605834, "grad_norm": 0.5830266332748674, "learning_rate": 1.8569347229406575e-05, "loss": 0.8196, "step": 8410 }, { "epoch": 0.18086523177385402, "grad_norm": 0.5826514559974909, "learning_rate": 1.8565831840009312e-05, "loss": 0.7978, "step": 8420 }, { "epoch": 0.18108003608712464, "grad_norm": 0.5647693782228277, "learning_rate": 1.856231247051604e-05, "loss": 0.8152, "step": 8430 }, { "epoch": 0.18129484040039523, "grad_norm": 0.6171161964315975, "learning_rate": 1.8558789122562024e-05, "loss": 0.805, "step": 8440 }, { "epoch": 0.18150964471366585, "grad_norm": 0.574924762582272, "learning_rate": 1.8555261797784387e-05, "loss": 0.825, "step": 8450 }, { "epoch": 0.18172444902693646, "grad_norm": 0.5849907741058937, "learning_rate": 1.8551730497822086e-05, "loss": 0.813, "step": 8460 }, { "epoch": 0.18193925334020708, "grad_norm": 0.6006674682429172, "learning_rate": 1.8548195224315933e-05, "loss": 0.8058, "step": 8470 }, { "epoch": 0.18215405765347767, "grad_norm": 0.5694938669138162, "learning_rate": 1.854465597890859e-05, "loss": 0.8128, "step": 8480 }, { "epoch": 0.18236886196674829, "grad_norm": 0.5800620824875963, "learning_rate": 1.8541112763244554e-05, "loss": 0.8, "step": 8490 }, { "epoch": 0.1825836662800189, "grad_norm": 0.6003822667618681, "learning_rate": 1.8537565578970182e-05, "loss": 0.8125, "step": 8500 }, { "epoch": 0.18279847059328952, "grad_norm": 0.5805075835998008, "learning_rate": 1.8534014427733655e-05, "loss": 0.814, "step": 8510 }, { "epoch": 0.18301327490656014, "grad_norm": 0.608140890949641, "learning_rate": 1.8530459311185017e-05, "loss": 0.8074, "step": 8520 }, { "epoch": 0.18322807921983073, "grad_norm": 0.5939316987741534, "learning_rate": 1.852690023097614e-05, "loss": 0.8083, "step": 8530 }, { "epoch": 0.18344288353310134, "grad_norm": 0.5634001132722806, "learning_rate": 1.8523337188760747e-05, "loss": 0.814, "step": 8540 }, { "epoch": 0.18365768784637196, "grad_norm": 0.6076400115964997, "learning_rate": 1.8519770186194393e-05, "loss": 0.8131, "step": 8550 }, { "epoch": 0.18387249215964258, "grad_norm": 0.5804582235616338, "learning_rate": 1.851619922493448e-05, "loss": 0.8026, "step": 8560 }, { "epoch": 0.18408729647291316, "grad_norm": 0.5516633970987541, "learning_rate": 1.8512624306640254e-05, "loss": 0.8008, "step": 8570 }, { "epoch": 0.18430210078618378, "grad_norm": 0.5977108054692924, "learning_rate": 1.850904543297278e-05, "loss": 0.8093, "step": 8580 }, { "epoch": 0.1845169050994544, "grad_norm": 0.5751584053301783, "learning_rate": 1.850546260559499e-05, "loss": 0.8069, "step": 8590 }, { "epoch": 0.18473170941272501, "grad_norm": 0.5683030910307522, "learning_rate": 1.8501875826171632e-05, "loss": 0.8088, "step": 8600 }, { "epoch": 0.18494651372599563, "grad_norm": 0.5981914621652948, "learning_rate": 1.8498285096369287e-05, "loss": 0.815, "step": 8610 }, { "epoch": 0.18516131803926622, "grad_norm": 0.5987116882619, "learning_rate": 1.849469041785639e-05, "loss": 0.8247, "step": 8620 }, { "epoch": 0.18537612235253684, "grad_norm": 0.598340119849919, "learning_rate": 1.8491091792303203e-05, "loss": 0.7891, "step": 8630 }, { "epoch": 0.18559092666580745, "grad_norm": 0.5874259069859069, "learning_rate": 1.8487489221381807e-05, "loss": 0.7963, "step": 8640 }, { "epoch": 0.18580573097907807, "grad_norm": 0.60338871634641, "learning_rate": 1.848388270676614e-05, "loss": 0.8176, "step": 8650 }, { "epoch": 0.18602053529234866, "grad_norm": 0.6382989451380708, "learning_rate": 1.848027225013196e-05, "loss": 0.8034, "step": 8660 }, { "epoch": 0.18623533960561928, "grad_norm": 0.6100978833111902, "learning_rate": 1.8476657853156854e-05, "loss": 0.8295, "step": 8670 }, { "epoch": 0.1864501439188899, "grad_norm": 0.5847210050409527, "learning_rate": 1.8473039517520245e-05, "loss": 0.8093, "step": 8680 }, { "epoch": 0.1866649482321605, "grad_norm": 0.5720679964643604, "learning_rate": 1.846941724490339e-05, "loss": 0.7988, "step": 8690 }, { "epoch": 0.1868797525454311, "grad_norm": 0.6251350978467913, "learning_rate": 1.8465791036989367e-05, "loss": 0.7998, "step": 8700 }, { "epoch": 0.18709455685870172, "grad_norm": 0.5765117453954964, "learning_rate": 1.8462160895463087e-05, "loss": 0.8121, "step": 8710 }, { "epoch": 0.18730936117197233, "grad_norm": 0.5652462786030213, "learning_rate": 1.8458526822011283e-05, "loss": 0.8102, "step": 8720 }, { "epoch": 0.18752416548524295, "grad_norm": 0.5618404540951464, "learning_rate": 1.8454888818322527e-05, "loss": 0.7841, "step": 8730 }, { "epoch": 0.18773896979851357, "grad_norm": 0.5715079105467191, "learning_rate": 1.8451246886087207e-05, "loss": 0.8019, "step": 8740 }, { "epoch": 0.18795377411178416, "grad_norm": 0.5786067429900265, "learning_rate": 1.8447601026997534e-05, "loss": 0.8185, "step": 8750 }, { "epoch": 0.18816857842505477, "grad_norm": 0.5618942537054571, "learning_rate": 1.8443951242747558e-05, "loss": 0.8122, "step": 8760 }, { "epoch": 0.1883833827383254, "grad_norm": 0.5602005480295104, "learning_rate": 1.8440297535033137e-05, "loss": 0.8044, "step": 8770 }, { "epoch": 0.188598187051596, "grad_norm": 0.5526135225587552, "learning_rate": 1.8436639905551966e-05, "loss": 0.8132, "step": 8780 }, { "epoch": 0.1888129913648666, "grad_norm": 0.5708051150082964, "learning_rate": 1.8432978356003544e-05, "loss": 0.7984, "step": 8790 }, { "epoch": 0.1890277956781372, "grad_norm": 0.5845241545642078, "learning_rate": 1.842931288808921e-05, "loss": 0.8215, "step": 8800 }, { "epoch": 0.18924259999140783, "grad_norm": 0.5747369107161527, "learning_rate": 1.8425643503512115e-05, "loss": 0.8027, "step": 8810 }, { "epoch": 0.18945740430467845, "grad_norm": 0.5726755466246907, "learning_rate": 1.842197020397723e-05, "loss": 0.7891, "step": 8820 }, { "epoch": 0.18967220861794906, "grad_norm": 0.5757944229048639, "learning_rate": 1.841829299119135e-05, "loss": 0.8064, "step": 8830 }, { "epoch": 0.18988701293121965, "grad_norm": 0.5911903584887567, "learning_rate": 1.8414611866863078e-05, "loss": 0.8239, "step": 8840 }, { "epoch": 0.19010181724449027, "grad_norm": 0.5784649631481444, "learning_rate": 1.8410926832702842e-05, "loss": 0.8074, "step": 8850 }, { "epoch": 0.19031662155776088, "grad_norm": 0.5930932814488833, "learning_rate": 1.8407237890422888e-05, "loss": 0.8226, "step": 8860 }, { "epoch": 0.1905314258710315, "grad_norm": 0.5579023925128831, "learning_rate": 1.8403545041737275e-05, "loss": 0.8008, "step": 8870 }, { "epoch": 0.1907462301843021, "grad_norm": 0.5988868685379616, "learning_rate": 1.8399848288361878e-05, "loss": 0.8202, "step": 8880 }, { "epoch": 0.1909610344975727, "grad_norm": 0.5633872393293222, "learning_rate": 1.8396147632014383e-05, "loss": 0.7865, "step": 8890 }, { "epoch": 0.19117583881084332, "grad_norm": 0.579775369575011, "learning_rate": 1.8392443074414292e-05, "loss": 0.8081, "step": 8900 }, { "epoch": 0.19139064312411394, "grad_norm": 0.5761284131901169, "learning_rate": 1.8388734617282926e-05, "loss": 0.7957, "step": 8910 }, { "epoch": 0.19160544743738453, "grad_norm": 0.5790288426152821, "learning_rate": 1.8385022262343405e-05, "loss": 0.8016, "step": 8920 }, { "epoch": 0.19182025175065515, "grad_norm": 0.5835056469508209, "learning_rate": 1.8381306011320673e-05, "loss": 0.8192, "step": 8930 }, { "epoch": 0.19203505606392576, "grad_norm": 0.5600680666644086, "learning_rate": 1.837758586594147e-05, "loss": 0.8041, "step": 8940 }, { "epoch": 0.19224986037719638, "grad_norm": 0.6130002119585894, "learning_rate": 1.8373861827934364e-05, "loss": 0.8173, "step": 8950 }, { "epoch": 0.192464664690467, "grad_norm": 0.5673926783038583, "learning_rate": 1.8370133899029717e-05, "loss": 0.8119, "step": 8960 }, { "epoch": 0.19267946900373759, "grad_norm": 0.5917000843066154, "learning_rate": 1.83664020809597e-05, "loss": 0.8172, "step": 8970 }, { "epoch": 0.1928942733170082, "grad_norm": 0.5733143994810274, "learning_rate": 1.83626663754583e-05, "loss": 0.8032, "step": 8980 }, { "epoch": 0.19310907763027882, "grad_norm": 0.5592658911509888, "learning_rate": 1.8358926784261303e-05, "loss": 0.7981, "step": 8990 }, { "epoch": 0.19332388194354944, "grad_norm": 0.562962728797357, "learning_rate": 1.8355183309106297e-05, "loss": 0.8156, "step": 9000 }, { "epoch": 0.19353868625682003, "grad_norm": 0.569803487699369, "learning_rate": 1.8351435951732692e-05, "loss": 0.8001, "step": 9010 }, { "epoch": 0.19375349057009064, "grad_norm": 0.5789202611510278, "learning_rate": 1.8347684713881675e-05, "loss": 0.8116, "step": 9020 }, { "epoch": 0.19396829488336126, "grad_norm": 0.5768841779522369, "learning_rate": 1.8343929597296265e-05, "loss": 0.8075, "step": 9030 }, { "epoch": 0.19418309919663188, "grad_norm": 0.5643884618141694, "learning_rate": 1.8340170603721258e-05, "loss": 0.8005, "step": 9040 }, { "epoch": 0.1943979035099025, "grad_norm": 0.6040260449303918, "learning_rate": 1.8336407734903266e-05, "loss": 0.8037, "step": 9050 }, { "epoch": 0.19461270782317308, "grad_norm": 0.547010947358847, "learning_rate": 1.83326409925907e-05, "loss": 0.8086, "step": 9060 }, { "epoch": 0.1948275121364437, "grad_norm": 0.5791718088895571, "learning_rate": 1.8328870378533774e-05, "loss": 0.8221, "step": 9070 }, { "epoch": 0.19504231644971431, "grad_norm": 0.5929678639479692, "learning_rate": 1.8325095894484487e-05, "loss": 0.8037, "step": 9080 }, { "epoch": 0.19525712076298493, "grad_norm": 0.5837325290517947, "learning_rate": 1.8321317542196645e-05, "loss": 0.8214, "step": 9090 }, { "epoch": 0.19547192507625552, "grad_norm": 0.6021341746404556, "learning_rate": 1.831753532342586e-05, "loss": 0.808, "step": 9100 }, { "epoch": 0.19568672938952614, "grad_norm": 0.5851595859581337, "learning_rate": 1.8313749239929527e-05, "loss": 0.8027, "step": 9110 }, { "epoch": 0.19590153370279675, "grad_norm": 0.6429053021754854, "learning_rate": 1.830995929346684e-05, "loss": 0.8165, "step": 9120 }, { "epoch": 0.19611633801606737, "grad_norm": 0.6030014532201955, "learning_rate": 1.830616548579879e-05, "loss": 0.8246, "step": 9130 }, { "epoch": 0.19633114232933796, "grad_norm": 0.6323373448001347, "learning_rate": 1.8302367818688168e-05, "loss": 0.8079, "step": 9140 }, { "epoch": 0.19654594664260858, "grad_norm": 0.5846336022969297, "learning_rate": 1.8298566293899543e-05, "loss": 0.8269, "step": 9150 }, { "epoch": 0.1967607509558792, "grad_norm": 0.5607765403444699, "learning_rate": 1.8294760913199296e-05, "loss": 0.7932, "step": 9160 }, { "epoch": 0.1969755552691498, "grad_norm": 0.5738811351652273, "learning_rate": 1.8290951678355583e-05, "loss": 0.8121, "step": 9170 }, { "epoch": 0.19719035958242043, "grad_norm": 0.5893344214505519, "learning_rate": 1.8287138591138355e-05, "loss": 0.8227, "step": 9180 }, { "epoch": 0.19740516389569102, "grad_norm": 0.5849184704039714, "learning_rate": 1.828332165331936e-05, "loss": 0.8037, "step": 9190 }, { "epoch": 0.19761996820896163, "grad_norm": 0.5844113484112203, "learning_rate": 1.8279500866672124e-05, "loss": 0.814, "step": 9200 }, { "epoch": 0.19783477252223225, "grad_norm": 0.5652668990930761, "learning_rate": 1.8275676232971977e-05, "loss": 0.8119, "step": 9210 }, { "epoch": 0.19804957683550287, "grad_norm": 0.597730632354087, "learning_rate": 1.827184775399602e-05, "loss": 0.8053, "step": 9220 }, { "epoch": 0.19826438114877346, "grad_norm": 0.5888675343237012, "learning_rate": 1.8268015431523147e-05, "loss": 0.801, "step": 9230 }, { "epoch": 0.19847918546204407, "grad_norm": 0.5663470763977005, "learning_rate": 1.8264179267334043e-05, "loss": 0.8135, "step": 9240 }, { "epoch": 0.1986939897753147, "grad_norm": 0.5533581191534693, "learning_rate": 1.8260339263211174e-05, "loss": 0.79, "step": 9250 }, { "epoch": 0.1989087940885853, "grad_norm": 0.5979441715439602, "learning_rate": 1.8256495420938786e-05, "loss": 0.7929, "step": 9260 }, { "epoch": 0.19912359840185592, "grad_norm": 0.5825141443552068, "learning_rate": 1.8252647742302914e-05, "loss": 0.8056, "step": 9270 }, { "epoch": 0.1993384027151265, "grad_norm": 0.5757431007161539, "learning_rate": 1.8248796229091373e-05, "loss": 0.8138, "step": 9280 }, { "epoch": 0.19955320702839713, "grad_norm": 0.6259334303275133, "learning_rate": 1.8244940883093767e-05, "loss": 0.813, "step": 9290 }, { "epoch": 0.19976801134166774, "grad_norm": 0.549857706300012, "learning_rate": 1.824108170610147e-05, "loss": 0.8077, "step": 9300 }, { "epoch": 0.19998281565493836, "grad_norm": 0.56236031168535, "learning_rate": 1.8237218699907635e-05, "loss": 0.8045, "step": 9310 }, { "epoch": 0.20019761996820895, "grad_norm": 0.6015575274536221, "learning_rate": 1.8233351866307206e-05, "loss": 0.8061, "step": 9320 }, { "epoch": 0.20041242428147957, "grad_norm": 0.6085710236514931, "learning_rate": 1.8229481207096896e-05, "loss": 0.8018, "step": 9330 }, { "epoch": 0.20062722859475018, "grad_norm": 0.597465062355765, "learning_rate": 1.8225606724075205e-05, "loss": 0.8122, "step": 9340 }, { "epoch": 0.2008420329080208, "grad_norm": 0.5653695698060156, "learning_rate": 1.82217284190424e-05, "loss": 0.7998, "step": 9350 }, { "epoch": 0.2010568372212914, "grad_norm": 0.5803499948562468, "learning_rate": 1.8217846293800523e-05, "loss": 0.8171, "step": 9360 }, { "epoch": 0.201271641534562, "grad_norm": 0.5914980494379427, "learning_rate": 1.82139603501534e-05, "loss": 0.8078, "step": 9370 }, { "epoch": 0.20148644584783262, "grad_norm": 0.5753469591052403, "learning_rate": 1.8210070589906628e-05, "loss": 0.8144, "step": 9380 }, { "epoch": 0.20170125016110324, "grad_norm": 0.5917347916307549, "learning_rate": 1.8206177014867572e-05, "loss": 0.8078, "step": 9390 }, { "epoch": 0.20191605447437386, "grad_norm": 0.5524089908557045, "learning_rate": 1.8202279626845377e-05, "loss": 0.8028, "step": 9400 }, { "epoch": 0.20213085878764445, "grad_norm": 0.5529213134018454, "learning_rate": 1.8198378427650955e-05, "loss": 0.7994, "step": 9410 }, { "epoch": 0.20234566310091506, "grad_norm": 0.567892445973469, "learning_rate": 1.8194473419096987e-05, "loss": 0.8094, "step": 9420 }, { "epoch": 0.20256046741418568, "grad_norm": 0.5858778724872428, "learning_rate": 1.819056460299793e-05, "loss": 0.7983, "step": 9430 }, { "epoch": 0.2027752717274563, "grad_norm": 0.573018383200498, "learning_rate": 1.818665198117001e-05, "loss": 0.8113, "step": 9440 }, { "epoch": 0.20299007604072689, "grad_norm": 0.5815406945956942, "learning_rate": 1.8182735555431213e-05, "loss": 0.8131, "step": 9450 }, { "epoch": 0.2032048803539975, "grad_norm": 0.5712680885146894, "learning_rate": 1.8178815327601306e-05, "loss": 0.8, "step": 9460 }, { "epoch": 0.20341968466726812, "grad_norm": 0.7090531423336768, "learning_rate": 1.8174891299501807e-05, "loss": 0.8117, "step": 9470 }, { "epoch": 0.20363448898053874, "grad_norm": 0.6104073346551036, "learning_rate": 1.8170963472956006e-05, "loss": 0.8203, "step": 9480 }, { "epoch": 0.20384929329380935, "grad_norm": 0.5768361886322763, "learning_rate": 1.816703184978897e-05, "loss": 0.8079, "step": 9490 }, { "epoch": 0.20406409760707994, "grad_norm": 0.5351105176871881, "learning_rate": 1.816309643182751e-05, "loss": 0.7961, "step": 9500 }, { "epoch": 0.20427890192035056, "grad_norm": 0.5636835938070731, "learning_rate": 1.8159157220900216e-05, "loss": 0.7832, "step": 9510 }, { "epoch": 0.20449370623362118, "grad_norm": 0.5730516941042331, "learning_rate": 1.815521421883743e-05, "loss": 0.7937, "step": 9520 }, { "epoch": 0.2047085105468918, "grad_norm": 0.5733890782084365, "learning_rate": 1.815126742747126e-05, "loss": 0.8011, "step": 9530 }, { "epoch": 0.20492331486016238, "grad_norm": 0.5616802718550227, "learning_rate": 1.8147316848635576e-05, "loss": 0.8038, "step": 9540 }, { "epoch": 0.205138119173433, "grad_norm": 0.5889286620725945, "learning_rate": 1.8143362484166004e-05, "loss": 0.8021, "step": 9550 }, { "epoch": 0.20535292348670361, "grad_norm": 0.5585779556583447, "learning_rate": 1.8139404335899937e-05, "loss": 0.8058, "step": 9560 }, { "epoch": 0.20556772779997423, "grad_norm": 0.571453117672738, "learning_rate": 1.813544240567651e-05, "loss": 0.804, "step": 9570 }, { "epoch": 0.20578253211324482, "grad_norm": 0.5654185920520682, "learning_rate": 1.8131476695336636e-05, "loss": 0.802, "step": 9580 }, { "epoch": 0.20599733642651544, "grad_norm": 0.5844027397396927, "learning_rate": 1.8127507206722973e-05, "loss": 0.8019, "step": 9590 }, { "epoch": 0.20621214073978605, "grad_norm": 0.5689638203458927, "learning_rate": 1.8123533941679928e-05, "loss": 0.8094, "step": 9600 }, { "epoch": 0.20642694505305667, "grad_norm": 0.577234661880537, "learning_rate": 1.8119556902053678e-05, "loss": 0.8246, "step": 9610 }, { "epoch": 0.2066417493663273, "grad_norm": 0.5763434534447465, "learning_rate": 1.811557608969214e-05, "loss": 0.8158, "step": 9620 }, { "epoch": 0.20685655367959788, "grad_norm": 0.5778688135687731, "learning_rate": 1.8111591506444997e-05, "loss": 0.7955, "step": 9630 }, { "epoch": 0.2070713579928685, "grad_norm": 0.5813830588006104, "learning_rate": 1.810760315416367e-05, "loss": 0.7891, "step": 9640 }, { "epoch": 0.2072861623061391, "grad_norm": 0.576252297984537, "learning_rate": 1.8103611034701348e-05, "loss": 0.8085, "step": 9650 }, { "epoch": 0.20750096661940973, "grad_norm": 0.5837430318834449, "learning_rate": 1.8099615149912953e-05, "loss": 0.807, "step": 9660 }, { "epoch": 0.20771577093268032, "grad_norm": 0.5890346965782849, "learning_rate": 1.8095615501655166e-05, "loss": 0.7944, "step": 9670 }, { "epoch": 0.20793057524595093, "grad_norm": 0.5852131021460842, "learning_rate": 1.8091612091786416e-05, "loss": 0.8088, "step": 9680 }, { "epoch": 0.20814537955922155, "grad_norm": 0.5589031639536682, "learning_rate": 1.8087604922166884e-05, "loss": 0.8121, "step": 9690 }, { "epoch": 0.20836018387249217, "grad_norm": 0.5837838090958873, "learning_rate": 1.8083593994658483e-05, "loss": 0.8031, "step": 9700 }, { "epoch": 0.20857498818576278, "grad_norm": 0.5759528081913161, "learning_rate": 1.807957931112489e-05, "loss": 0.7909, "step": 9710 }, { "epoch": 0.20878979249903337, "grad_norm": 0.6143282377901799, "learning_rate": 1.807556087343152e-05, "loss": 0.8115, "step": 9720 }, { "epoch": 0.209004596812304, "grad_norm": 0.578534081939633, "learning_rate": 1.8071538683445524e-05, "loss": 0.8073, "step": 9730 }, { "epoch": 0.2092194011255746, "grad_norm": 0.5990944031783112, "learning_rate": 1.806751274303581e-05, "loss": 0.8145, "step": 9740 }, { "epoch": 0.20943420543884522, "grad_norm": 0.5374932915323056, "learning_rate": 1.8063483054073026e-05, "loss": 0.8186, "step": 9750 }, { "epoch": 0.2096490097521158, "grad_norm": 0.573621801810147, "learning_rate": 1.805944961842955e-05, "loss": 0.7858, "step": 9760 }, { "epoch": 0.20986381406538643, "grad_norm": 0.5667771093016414, "learning_rate": 1.8055412437979517e-05, "loss": 0.815, "step": 9770 }, { "epoch": 0.21007861837865704, "grad_norm": 0.5777392591556257, "learning_rate": 1.805137151459879e-05, "loss": 0.8099, "step": 9780 }, { "epoch": 0.21029342269192766, "grad_norm": 0.5716970868389603, "learning_rate": 1.804732685016498e-05, "loss": 0.7963, "step": 9790 }, { "epoch": 0.21050822700519825, "grad_norm": 0.5784937064403081, "learning_rate": 1.8043278446557424e-05, "loss": 0.7985, "step": 9800 }, { "epoch": 0.21072303131846887, "grad_norm": 0.5883045627604655, "learning_rate": 1.8039226305657212e-05, "loss": 0.8074, "step": 9810 }, { "epoch": 0.21093783563173948, "grad_norm": 0.5514729284429919, "learning_rate": 1.803517042934716e-05, "loss": 0.8004, "step": 9820 }, { "epoch": 0.2111526399450101, "grad_norm": 0.5816019682238576, "learning_rate": 1.8031110819511825e-05, "loss": 0.8012, "step": 9830 }, { "epoch": 0.21136744425828072, "grad_norm": 0.5870070746603354, "learning_rate": 1.8027047478037495e-05, "loss": 0.8057, "step": 9840 }, { "epoch": 0.2115822485715513, "grad_norm": 0.6076535851376632, "learning_rate": 1.802298040681219e-05, "loss": 0.8269, "step": 9850 }, { "epoch": 0.21179705288482192, "grad_norm": 0.5748747580649357, "learning_rate": 1.801890960772567e-05, "loss": 0.8158, "step": 9860 }, { "epoch": 0.21201185719809254, "grad_norm": 0.5786976018669034, "learning_rate": 1.801483508266942e-05, "loss": 0.7988, "step": 9870 }, { "epoch": 0.21222666151136316, "grad_norm": 0.540488437271146, "learning_rate": 1.8010756833536663e-05, "loss": 0.8049, "step": 9880 }, { "epoch": 0.21244146582463375, "grad_norm": 0.6221560289316503, "learning_rate": 1.800667486222235e-05, "loss": 0.7987, "step": 9890 }, { "epoch": 0.21265627013790436, "grad_norm": 0.5961921503665486, "learning_rate": 1.8002589170623155e-05, "loss": 0.8264, "step": 9900 }, { "epoch": 0.21287107445117498, "grad_norm": 0.5741672373117042, "learning_rate": 1.7998499760637492e-05, "loss": 0.798, "step": 9910 }, { "epoch": 0.2130858787644456, "grad_norm": 0.5615151845280278, "learning_rate": 1.7994406634165492e-05, "loss": 0.8067, "step": 9920 }, { "epoch": 0.2133006830777162, "grad_norm": 0.5836885054107523, "learning_rate": 1.799030979310902e-05, "loss": 0.7914, "step": 9930 }, { "epoch": 0.2135154873909868, "grad_norm": 0.5764046639257419, "learning_rate": 1.7986209239371665e-05, "loss": 0.7975, "step": 9940 }, { "epoch": 0.21373029170425742, "grad_norm": 0.5848553181136704, "learning_rate": 1.798210497485874e-05, "loss": 0.802, "step": 9950 }, { "epoch": 0.21394509601752804, "grad_norm": 0.5739518953663529, "learning_rate": 1.7977997001477285e-05, "loss": 0.8157, "step": 9960 }, { "epoch": 0.21415990033079865, "grad_norm": 0.5929025643332227, "learning_rate": 1.797388532113606e-05, "loss": 0.7859, "step": 9970 }, { "epoch": 0.21437470464406924, "grad_norm": 0.5661521287114709, "learning_rate": 1.7969769935745544e-05, "loss": 0.7999, "step": 9980 }, { "epoch": 0.21458950895733986, "grad_norm": 0.5422378316949167, "learning_rate": 1.796565084721795e-05, "loss": 0.7842, "step": 9990 }, { "epoch": 0.21480431327061048, "grad_norm": 0.6043847597224838, "learning_rate": 1.79615280574672e-05, "loss": 0.8107, "step": 10000 }, { "epoch": 0.2150191175838811, "grad_norm": 0.588943789547453, "learning_rate": 1.795740156840894e-05, "loss": 0.8095, "step": 10010 }, { "epoch": 0.21523392189715168, "grad_norm": 0.5773036210661655, "learning_rate": 1.7953271381960536e-05, "loss": 0.8043, "step": 10020 }, { "epoch": 0.2154487262104223, "grad_norm": 0.5758207275091537, "learning_rate": 1.794913750004107e-05, "loss": 0.793, "step": 10030 }, { "epoch": 0.21566353052369291, "grad_norm": 0.5566484791876923, "learning_rate": 1.7944999924571345e-05, "loss": 0.7877, "step": 10040 }, { "epoch": 0.21587833483696353, "grad_norm": 0.5695977896565292, "learning_rate": 1.7940858657473867e-05, "loss": 0.7956, "step": 10050 }, { "epoch": 0.21609313915023415, "grad_norm": 0.5792438417645663, "learning_rate": 1.7936713700672874e-05, "loss": 0.7905, "step": 10060 }, { "epoch": 0.21630794346350474, "grad_norm": 0.5702659128860479, "learning_rate": 1.7932565056094312e-05, "loss": 0.8087, "step": 10070 }, { "epoch": 0.21652274777677535, "grad_norm": 0.6188318728871035, "learning_rate": 1.7928412725665844e-05, "loss": 0.8039, "step": 10080 }, { "epoch": 0.21673755209004597, "grad_norm": 0.5606872536463133, "learning_rate": 1.792425671131683e-05, "loss": 0.7881, "step": 10090 }, { "epoch": 0.2169523564033166, "grad_norm": 0.5722706317917913, "learning_rate": 1.792009701497836e-05, "loss": 0.7996, "step": 10100 }, { "epoch": 0.21716716071658718, "grad_norm": 0.7107815425462004, "learning_rate": 1.791593363858323e-05, "loss": 0.8148, "step": 10110 }, { "epoch": 0.2173819650298578, "grad_norm": 0.5883477981610786, "learning_rate": 1.7911766584065945e-05, "loss": 0.7909, "step": 10120 }, { "epoch": 0.2175967693431284, "grad_norm": 0.5798116807331893, "learning_rate": 1.7907595853362713e-05, "loss": 0.7983, "step": 10130 }, { "epoch": 0.21781157365639903, "grad_norm": 0.5609531537994755, "learning_rate": 1.790342144841146e-05, "loss": 0.7961, "step": 10140 }, { "epoch": 0.21802637796966964, "grad_norm": 0.6517122093198296, "learning_rate": 1.7899243371151813e-05, "loss": 0.7861, "step": 10150 }, { "epoch": 0.21824118228294023, "grad_norm": 0.5516309036102172, "learning_rate": 1.7895061623525104e-05, "loss": 0.8067, "step": 10160 }, { "epoch": 0.21845598659621085, "grad_norm": 0.5613471363551992, "learning_rate": 1.789087620747438e-05, "loss": 0.7923, "step": 10170 }, { "epoch": 0.21867079090948147, "grad_norm": 0.9188778029201942, "learning_rate": 1.788668712494438e-05, "loss": 0.8096, "step": 10180 }, { "epoch": 0.21888559522275208, "grad_norm": 0.5617547681562008, "learning_rate": 1.7882494377881558e-05, "loss": 0.7964, "step": 10190 }, { "epoch": 0.21910039953602267, "grad_norm": 0.61534749263247, "learning_rate": 1.7878297968234055e-05, "loss": 0.7946, "step": 10200 }, { "epoch": 0.2193152038492933, "grad_norm": 0.5782211052832341, "learning_rate": 1.7874097897951737e-05, "loss": 0.7873, "step": 10210 }, { "epoch": 0.2195300081625639, "grad_norm": 0.5514543385854074, "learning_rate": 1.786989416898615e-05, "loss": 0.7975, "step": 10220 }, { "epoch": 0.21974481247583452, "grad_norm": 0.5480749170558831, "learning_rate": 1.786568678329055e-05, "loss": 0.8018, "step": 10230 }, { "epoch": 0.21995961678910514, "grad_norm": 0.5783322467533664, "learning_rate": 1.7861475742819885e-05, "loss": 0.7848, "step": 10240 }, { "epoch": 0.22017442110237573, "grad_norm": 0.5737996720368215, "learning_rate": 1.7857261049530817e-05, "loss": 0.7953, "step": 10250 }, { "epoch": 0.22038922541564634, "grad_norm": 0.5763402239431955, "learning_rate": 1.7853042705381684e-05, "loss": 0.7851, "step": 10260 }, { "epoch": 0.22060402972891696, "grad_norm": 0.5731754369723199, "learning_rate": 1.7848820712332542e-05, "loss": 0.8126, "step": 10270 }, { "epoch": 0.22081883404218758, "grad_norm": 0.5772583690128964, "learning_rate": 1.784459507234512e-05, "loss": 0.82, "step": 10280 }, { "epoch": 0.22103363835545817, "grad_norm": 0.5411569999708086, "learning_rate": 1.7840365787382858e-05, "loss": 0.7931, "step": 10290 }, { "epoch": 0.22124844266872878, "grad_norm": 0.5653187339476065, "learning_rate": 1.7836132859410885e-05, "loss": 0.8067, "step": 10300 }, { "epoch": 0.2214632469819994, "grad_norm": 0.565092919613001, "learning_rate": 1.783189629039602e-05, "loss": 0.7973, "step": 10310 }, { "epoch": 0.22167805129527002, "grad_norm": 0.5520905710174285, "learning_rate": 1.782765608230678e-05, "loss": 0.8074, "step": 10320 }, { "epoch": 0.2218928556085406, "grad_norm": 0.5499770181938648, "learning_rate": 1.782341223711336e-05, "loss": 0.8047, "step": 10330 }, { "epoch": 0.22210765992181122, "grad_norm": 0.5770398453057336, "learning_rate": 1.7819164756787667e-05, "loss": 0.8024, "step": 10340 }, { "epoch": 0.22232246423508184, "grad_norm": 0.6005922987428989, "learning_rate": 1.781491364330327e-05, "loss": 0.8023, "step": 10350 }, { "epoch": 0.22253726854835246, "grad_norm": 0.5692299117115992, "learning_rate": 1.7810658898635455e-05, "loss": 0.7944, "step": 10360 }, { "epoch": 0.22275207286162307, "grad_norm": 0.5657236504416966, "learning_rate": 1.780640052476117e-05, "loss": 0.8146, "step": 10370 }, { "epoch": 0.22296687717489366, "grad_norm": 0.5561699306834719, "learning_rate": 1.780213852365906e-05, "loss": 0.785, "step": 10380 }, { "epoch": 0.22318168148816428, "grad_norm": 0.7081144347430791, "learning_rate": 1.779787289730946e-05, "loss": 0.8007, "step": 10390 }, { "epoch": 0.2233964858014349, "grad_norm": 0.5625449988527748, "learning_rate": 1.779360364769438e-05, "loss": 0.7903, "step": 10400 }, { "epoch": 0.2236112901147055, "grad_norm": 0.5443207117710023, "learning_rate": 1.7789330776797515e-05, "loss": 0.8029, "step": 10410 }, { "epoch": 0.2238260944279761, "grad_norm": 0.5625349389867544, "learning_rate": 1.7785054286604254e-05, "loss": 0.8169, "step": 10420 }, { "epoch": 0.22404089874124672, "grad_norm": 0.5476014487186761, "learning_rate": 1.7780774179101654e-05, "loss": 0.7917, "step": 10430 }, { "epoch": 0.22425570305451734, "grad_norm": 0.5800827416013181, "learning_rate": 1.7776490456278462e-05, "loss": 0.8073, "step": 10440 }, { "epoch": 0.22447050736778795, "grad_norm": 0.5542113664451737, "learning_rate": 1.7772203120125095e-05, "loss": 0.7947, "step": 10450 }, { "epoch": 0.22468531168105857, "grad_norm": 0.5404026820286727, "learning_rate": 1.776791217263366e-05, "loss": 0.8024, "step": 10460 }, { "epoch": 0.22490011599432916, "grad_norm": 0.5538986415644934, "learning_rate": 1.7763617615797934e-05, "loss": 0.7828, "step": 10470 }, { "epoch": 0.22511492030759978, "grad_norm": 0.5585745280123449, "learning_rate": 1.7759319451613376e-05, "loss": 0.801, "step": 10480 }, { "epoch": 0.2253297246208704, "grad_norm": 0.5622099418484012, "learning_rate": 1.7755017682077118e-05, "loss": 0.7945, "step": 10490 }, { "epoch": 0.225544528934141, "grad_norm": 0.5370985490496776, "learning_rate": 1.7750712309187967e-05, "loss": 0.7863, "step": 10500 }, { "epoch": 0.2257593332474116, "grad_norm": 0.5372310056648028, "learning_rate": 1.7746403334946407e-05, "loss": 0.7946, "step": 10510 }, { "epoch": 0.22597413756068221, "grad_norm": 0.556076402437103, "learning_rate": 1.7742090761354596e-05, "loss": 0.7881, "step": 10520 }, { "epoch": 0.22618894187395283, "grad_norm": 0.5580749453853192, "learning_rate": 1.7737774590416358e-05, "loss": 0.7938, "step": 10530 }, { "epoch": 0.22640374618722345, "grad_norm": 0.5957795192444436, "learning_rate": 1.7733454824137196e-05, "loss": 0.7955, "step": 10540 }, { "epoch": 0.22661855050049404, "grad_norm": 0.5712477046163247, "learning_rate": 1.772913146452428e-05, "loss": 0.8038, "step": 10550 }, { "epoch": 0.22683335481376465, "grad_norm": 0.548160058046596, "learning_rate": 1.7724804513586448e-05, "loss": 0.7939, "step": 10560 }, { "epoch": 0.22704815912703527, "grad_norm": 0.5369592474615921, "learning_rate": 1.7720473973334213e-05, "loss": 0.7993, "step": 10570 }, { "epoch": 0.2272629634403059, "grad_norm": 0.5423611002909383, "learning_rate": 1.7716139845779746e-05, "loss": 0.8045, "step": 10580 }, { "epoch": 0.2274777677535765, "grad_norm": 0.5550989146015426, "learning_rate": 1.7711802132936896e-05, "loss": 0.7892, "step": 10590 }, { "epoch": 0.2276925720668471, "grad_norm": 0.5703653518043215, "learning_rate": 1.770746083682117e-05, "loss": 0.8055, "step": 10600 }, { "epoch": 0.2279073763801177, "grad_norm": 0.5615344431225333, "learning_rate": 1.7703115959449738e-05, "loss": 0.7979, "step": 10610 }, { "epoch": 0.22812218069338833, "grad_norm": 0.553731045220489, "learning_rate": 1.7698767502841445e-05, "loss": 0.811, "step": 10620 }, { "epoch": 0.22833698500665894, "grad_norm": 0.5501876227065152, "learning_rate": 1.769441546901679e-05, "loss": 0.7964, "step": 10630 }, { "epoch": 0.22855178931992953, "grad_norm": 0.5412824361191674, "learning_rate": 1.7690059859997935e-05, "loss": 0.7859, "step": 10640 }, { "epoch": 0.22876659363320015, "grad_norm": 0.5751057669815162, "learning_rate": 1.7685700677808703e-05, "loss": 0.8021, "step": 10650 }, { "epoch": 0.22898139794647077, "grad_norm": 0.5413904254802253, "learning_rate": 1.7681337924474585e-05, "loss": 0.7911, "step": 10660 }, { "epoch": 0.22919620225974138, "grad_norm": 0.5443592022724559, "learning_rate": 1.7676971602022722e-05, "loss": 0.804, "step": 10670 }, { "epoch": 0.229411006573012, "grad_norm": 0.5963768922972459, "learning_rate": 1.7672601712481916e-05, "loss": 0.7838, "step": 10680 }, { "epoch": 0.2296258108862826, "grad_norm": 0.5814546759073268, "learning_rate": 1.7668228257882628e-05, "loss": 0.7934, "step": 10690 }, { "epoch": 0.2298406151995532, "grad_norm": 0.5513099385655584, "learning_rate": 1.7663851240256973e-05, "loss": 0.7901, "step": 10700 }, { "epoch": 0.23005541951282382, "grad_norm": 0.5531620819292025, "learning_rate": 1.7659470661638727e-05, "loss": 0.8093, "step": 10710 }, { "epoch": 0.23027022382609444, "grad_norm": 0.5656118475573507, "learning_rate": 1.7655086524063314e-05, "loss": 0.793, "step": 10720 }, { "epoch": 0.23048502813936503, "grad_norm": 0.5575833476944946, "learning_rate": 1.765069882956781e-05, "loss": 0.7976, "step": 10730 }, { "epoch": 0.23069983245263564, "grad_norm": 0.5564123290228586, "learning_rate": 1.764630758019096e-05, "loss": 0.8022, "step": 10740 }, { "epoch": 0.23091463676590626, "grad_norm": 0.5534405369439489, "learning_rate": 1.7641912777973136e-05, "loss": 0.8079, "step": 10750 }, { "epoch": 0.23112944107917688, "grad_norm": 0.5473270189074221, "learning_rate": 1.7637514424956386e-05, "loss": 0.7938, "step": 10760 }, { "epoch": 0.23134424539244747, "grad_norm": 0.5357674731887498, "learning_rate": 1.7633112523184383e-05, "loss": 0.7846, "step": 10770 }, { "epoch": 0.23155904970571808, "grad_norm": 0.5552201424591323, "learning_rate": 1.762870707470247e-05, "loss": 0.7803, "step": 10780 }, { "epoch": 0.2317738540189887, "grad_norm": 0.558152601578086, "learning_rate": 1.7624298081557626e-05, "loss": 0.7902, "step": 10790 }, { "epoch": 0.23198865833225932, "grad_norm": 0.5595014924866345, "learning_rate": 1.7619885545798486e-05, "loss": 0.7891, "step": 10800 }, { "epoch": 0.23220346264552993, "grad_norm": 0.5495730986900741, "learning_rate": 1.7615469469475315e-05, "loss": 0.8026, "step": 10810 }, { "epoch": 0.23241826695880052, "grad_norm": 0.5760660001958988, "learning_rate": 1.7611049854640044e-05, "loss": 0.8077, "step": 10820 }, { "epoch": 0.23263307127207114, "grad_norm": 0.5550120424472271, "learning_rate": 1.7606626703346235e-05, "loss": 0.7965, "step": 10830 }, { "epoch": 0.23284787558534176, "grad_norm": 0.5534917812163986, "learning_rate": 1.7602200017649093e-05, "loss": 0.7862, "step": 10840 }, { "epoch": 0.23306267989861237, "grad_norm": 0.567697162536355, "learning_rate": 1.759776979960547e-05, "loss": 0.7882, "step": 10850 }, { "epoch": 0.23327748421188296, "grad_norm": 0.5544878261968137, "learning_rate": 1.7593336051273857e-05, "loss": 0.7894, "step": 10860 }, { "epoch": 0.23349228852515358, "grad_norm": 0.5522547428302197, "learning_rate": 1.7588898774714387e-05, "loss": 0.792, "step": 10870 }, { "epoch": 0.2337070928384242, "grad_norm": 0.5561521408221043, "learning_rate": 1.7584457971988836e-05, "loss": 0.7984, "step": 10880 }, { "epoch": 0.2339218971516948, "grad_norm": 0.5701789587662502, "learning_rate": 1.75800136451606e-05, "loss": 0.7891, "step": 10890 }, { "epoch": 0.23413670146496543, "grad_norm": 0.5512069625729566, "learning_rate": 1.7575565796294745e-05, "loss": 0.8033, "step": 10900 }, { "epoch": 0.23435150577823602, "grad_norm": 0.5671925282466402, "learning_rate": 1.7571114427457942e-05, "loss": 0.7914, "step": 10910 }, { "epoch": 0.23456631009150664, "grad_norm": 0.5987674977748154, "learning_rate": 1.7566659540718512e-05, "loss": 0.8081, "step": 10920 }, { "epoch": 0.23478111440477725, "grad_norm": 0.5576016505511755, "learning_rate": 1.7562201138146407e-05, "loss": 0.7881, "step": 10930 }, { "epoch": 0.23499591871804787, "grad_norm": 0.5606224459108454, "learning_rate": 1.7557739221813226e-05, "loss": 0.7999, "step": 10940 }, { "epoch": 0.23521072303131846, "grad_norm": 0.5778821827576899, "learning_rate": 1.7553273793792176e-05, "loss": 0.8156, "step": 10950 }, { "epoch": 0.23542552734458908, "grad_norm": 0.5512352918888542, "learning_rate": 1.7548804856158113e-05, "loss": 0.7987, "step": 10960 }, { "epoch": 0.2356403316578597, "grad_norm": 0.5566762408624399, "learning_rate": 1.7544332410987523e-05, "loss": 0.7976, "step": 10970 }, { "epoch": 0.2358551359711303, "grad_norm": 0.5645656364576122, "learning_rate": 1.7539856460358515e-05, "loss": 0.7869, "step": 10980 }, { "epoch": 0.2360699402844009, "grad_norm": 0.5445877178654857, "learning_rate": 1.753537700635083e-05, "loss": 0.7869, "step": 10990 }, { "epoch": 0.23628474459767151, "grad_norm": 0.5590715894295356, "learning_rate": 1.753089405104584e-05, "loss": 0.8091, "step": 11000 }, { "epoch": 0.23649954891094213, "grad_norm": 0.5671948914432249, "learning_rate": 1.7526407596526536e-05, "loss": 0.8074, "step": 11010 }, { "epoch": 0.23671435322421275, "grad_norm": 0.5623507718873683, "learning_rate": 1.7521917644877546e-05, "loss": 0.7854, "step": 11020 }, { "epoch": 0.23692915753748336, "grad_norm": 0.5932218934601358, "learning_rate": 1.7517424198185108e-05, "loss": 0.8094, "step": 11030 }, { "epoch": 0.23714396185075395, "grad_norm": 0.6112761900916727, "learning_rate": 1.7512927258537105e-05, "loss": 0.7942, "step": 11040 }, { "epoch": 0.23735876616402457, "grad_norm": 0.566759308520504, "learning_rate": 1.750842682802302e-05, "loss": 0.7797, "step": 11050 }, { "epoch": 0.2375735704772952, "grad_norm": 0.536263668019581, "learning_rate": 1.7503922908733972e-05, "loss": 0.7899, "step": 11060 }, { "epoch": 0.2377883747905658, "grad_norm": 0.5570104505775965, "learning_rate": 1.74994155027627e-05, "loss": 0.7971, "step": 11070 }, { "epoch": 0.2380031791038364, "grad_norm": 0.5609858905324858, "learning_rate": 1.7494904612203557e-05, "loss": 0.7866, "step": 11080 }, { "epoch": 0.238217983417107, "grad_norm": 0.5571287784622291, "learning_rate": 1.7490390239152523e-05, "loss": 0.7964, "step": 11090 }, { "epoch": 0.23843278773037763, "grad_norm": 0.5618669494447492, "learning_rate": 1.748587238570719e-05, "loss": 0.8074, "step": 11100 }, { "epoch": 0.23864759204364824, "grad_norm": 0.5955821944742522, "learning_rate": 1.748135105396677e-05, "loss": 0.8106, "step": 11110 }, { "epoch": 0.23886239635691886, "grad_norm": 0.5507627007193192, "learning_rate": 1.747682624603209e-05, "loss": 0.7828, "step": 11120 }, { "epoch": 0.23907720067018945, "grad_norm": 0.5861560768919388, "learning_rate": 1.747229796400559e-05, "loss": 0.8241, "step": 11130 }, { "epoch": 0.23929200498346007, "grad_norm": 0.5718220707403837, "learning_rate": 1.7467766209991332e-05, "loss": 0.794, "step": 11140 }, { "epoch": 0.23950680929673068, "grad_norm": 0.6034932739087517, "learning_rate": 1.7463230986094982e-05, "loss": 0.801, "step": 11150 }, { "epoch": 0.2397216136100013, "grad_norm": 0.5581638199123323, "learning_rate": 1.7458692294423825e-05, "loss": 0.8061, "step": 11160 }, { "epoch": 0.2399364179232719, "grad_norm": 0.5395247610317755, "learning_rate": 1.7454150137086753e-05, "loss": 0.7864, "step": 11170 }, { "epoch": 0.2401512222365425, "grad_norm": 0.5931900048885603, "learning_rate": 1.7449604516194266e-05, "loss": 0.7947, "step": 11180 }, { "epoch": 0.24036602654981312, "grad_norm": 0.5574150201939502, "learning_rate": 1.7445055433858485e-05, "loss": 0.7992, "step": 11190 }, { "epoch": 0.24058083086308374, "grad_norm": 0.558048596510596, "learning_rate": 1.744050289219313e-05, "loss": 0.8036, "step": 11200 }, { "epoch": 0.24079563517635433, "grad_norm": 0.5544503116923134, "learning_rate": 1.743594689331353e-05, "loss": 0.7892, "step": 11210 }, { "epoch": 0.24101043948962494, "grad_norm": 0.5913966197755747, "learning_rate": 1.7431387439336615e-05, "loss": 0.7882, "step": 11220 }, { "epoch": 0.24122524380289556, "grad_norm": 0.5609044365761472, "learning_rate": 1.7426824532380932e-05, "loss": 0.8006, "step": 11230 }, { "epoch": 0.24144004811616618, "grad_norm": 0.5598501767608501, "learning_rate": 1.7422258174566625e-05, "loss": 0.8034, "step": 11240 }, { "epoch": 0.2416548524294368, "grad_norm": 0.5598467174087662, "learning_rate": 1.741768836801544e-05, "loss": 0.808, "step": 11250 }, { "epoch": 0.24186965674270738, "grad_norm": 0.5571089293330055, "learning_rate": 1.741311511485073e-05, "loss": 0.7828, "step": 11260 }, { "epoch": 0.242084461055978, "grad_norm": 0.5858030780897929, "learning_rate": 1.7408538417197447e-05, "loss": 0.7881, "step": 11270 }, { "epoch": 0.24229926536924862, "grad_norm": 0.5664884004513725, "learning_rate": 1.7403958277182143e-05, "loss": 0.792, "step": 11280 }, { "epoch": 0.24251406968251923, "grad_norm": 0.5523972315190862, "learning_rate": 1.7399374696932977e-05, "loss": 0.795, "step": 11290 }, { "epoch": 0.24272887399578982, "grad_norm": 0.5801081531529861, "learning_rate": 1.7394787678579693e-05, "loss": 0.8056, "step": 11300 }, { "epoch": 0.24294367830906044, "grad_norm": 0.5851376223053917, "learning_rate": 1.739019722425364e-05, "loss": 0.797, "step": 11310 }, { "epoch": 0.24315848262233106, "grad_norm": 0.5826239567427932, "learning_rate": 1.7385603336087766e-05, "loss": 0.7967, "step": 11320 }, { "epoch": 0.24337328693560167, "grad_norm": 0.5671104979630887, "learning_rate": 1.738100601621661e-05, "loss": 0.8042, "step": 11330 }, { "epoch": 0.2435880912488723, "grad_norm": 0.5526309623916124, "learning_rate": 1.737640526677631e-05, "loss": 0.7974, "step": 11340 }, { "epoch": 0.24380289556214288, "grad_norm": 0.5952220328485525, "learning_rate": 1.737180108990459e-05, "loss": 0.7988, "step": 11350 }, { "epoch": 0.2440176998754135, "grad_norm": 0.5576900075243258, "learning_rate": 1.736719348774077e-05, "loss": 0.792, "step": 11360 }, { "epoch": 0.2442325041886841, "grad_norm": 0.5646590624675798, "learning_rate": 1.7362582462425775e-05, "loss": 0.7918, "step": 11370 }, { "epoch": 0.24444730850195473, "grad_norm": 0.5650066999862957, "learning_rate": 1.7357968016102094e-05, "loss": 0.8026, "step": 11380 }, { "epoch": 0.24466211281522532, "grad_norm": 0.5675843616193625, "learning_rate": 1.7353350150913826e-05, "loss": 0.7983, "step": 11390 }, { "epoch": 0.24487691712849594, "grad_norm": 0.516651808309515, "learning_rate": 1.734872886900665e-05, "loss": 0.7707, "step": 11400 }, { "epoch": 0.24509172144176655, "grad_norm": 0.5480548050237413, "learning_rate": 1.7344104172527845e-05, "loss": 0.7901, "step": 11410 }, { "epoch": 0.24530652575503717, "grad_norm": 0.5467792895993273, "learning_rate": 1.7339476063626252e-05, "loss": 0.805, "step": 11420 }, { "epoch": 0.24552133006830776, "grad_norm": 0.5675774870807921, "learning_rate": 1.733484454445232e-05, "loss": 0.7789, "step": 11430 }, { "epoch": 0.24573613438157837, "grad_norm": 0.5430330356783232, "learning_rate": 1.7330209617158075e-05, "loss": 0.7893, "step": 11440 }, { "epoch": 0.245950938694849, "grad_norm": 0.5408803807446219, "learning_rate": 1.7325571283897126e-05, "loss": 0.7897, "step": 11450 }, { "epoch": 0.2461657430081196, "grad_norm": 0.5536255557245513, "learning_rate": 1.7320929546824662e-05, "loss": 0.793, "step": 11460 }, { "epoch": 0.24638054732139023, "grad_norm": 0.5669589146457302, "learning_rate": 1.731628440809746e-05, "loss": 0.7802, "step": 11470 }, { "epoch": 0.24659535163466081, "grad_norm": 0.5574302178274012, "learning_rate": 1.731163586987387e-05, "loss": 0.7847, "step": 11480 }, { "epoch": 0.24681015594793143, "grad_norm": 0.5563586377765385, "learning_rate": 1.7306983934313833e-05, "loss": 0.7973, "step": 11490 }, { "epoch": 0.24702496026120205, "grad_norm": 0.5541682433080475, "learning_rate": 1.730232860357885e-05, "loss": 0.7972, "step": 11500 }, { "epoch": 0.24723976457447266, "grad_norm": 0.5487057694995032, "learning_rate": 1.7297669879832025e-05, "loss": 0.783, "step": 11510 }, { "epoch": 0.24745456888774325, "grad_norm": 0.5407120966552204, "learning_rate": 1.7293007765238012e-05, "loss": 0.7858, "step": 11520 }, { "epoch": 0.24766937320101387, "grad_norm": 0.6189329723987206, "learning_rate": 1.728834226196306e-05, "loss": 0.7928, "step": 11530 }, { "epoch": 0.2478841775142845, "grad_norm": 0.5365545625639055, "learning_rate": 1.728367337217498e-05, "loss": 0.7922, "step": 11540 }, { "epoch": 0.2480989818275551, "grad_norm": 0.5541237894376929, "learning_rate": 1.7279001098043164e-05, "loss": 0.7864, "step": 11550 }, { "epoch": 0.24831378614082572, "grad_norm": 0.5518328452834319, "learning_rate": 1.7274325441738578e-05, "loss": 0.7993, "step": 11560 }, { "epoch": 0.2485285904540963, "grad_norm": 0.5476209039060521, "learning_rate": 1.7269646405433754e-05, "loss": 0.7845, "step": 11570 }, { "epoch": 0.24874339476736693, "grad_norm": 0.5374788890443903, "learning_rate": 1.7264963991302798e-05, "loss": 0.7736, "step": 11580 }, { "epoch": 0.24895819908063754, "grad_norm": 0.5528441478075925, "learning_rate": 1.7260278201521377e-05, "loss": 0.8082, "step": 11590 }, { "epoch": 0.24917300339390816, "grad_norm": 0.5517627198236814, "learning_rate": 1.725558903826674e-05, "loss": 0.7964, "step": 11600 }, { "epoch": 0.24938780770717875, "grad_norm": 0.526708104255714, "learning_rate": 1.7250896503717697e-05, "loss": 0.7846, "step": 11610 }, { "epoch": 0.24960261202044937, "grad_norm": 0.5677760683591515, "learning_rate": 1.724620060005462e-05, "loss": 0.8056, "step": 11620 }, { "epoch": 0.24981741633371998, "grad_norm": 0.5445293154923263, "learning_rate": 1.724150132945946e-05, "loss": 0.7925, "step": 11630 }, { "epoch": 0.2500322206469906, "grad_norm": 0.5253609554392292, "learning_rate": 1.723679869411571e-05, "loss": 0.795, "step": 11640 }, { "epoch": 0.2502470249602612, "grad_norm": 0.5717202718646365, "learning_rate": 1.723209269620845e-05, "loss": 0.776, "step": 11650 }, { "epoch": 0.25046182927353183, "grad_norm": 0.5795012398020939, "learning_rate": 1.722738333792431e-05, "loss": 0.7879, "step": 11660 }, { "epoch": 0.2506766335868024, "grad_norm": 0.5572184542777283, "learning_rate": 1.722267062145148e-05, "loss": 0.7822, "step": 11670 }, { "epoch": 0.250891437900073, "grad_norm": 0.5617363405871534, "learning_rate": 1.721795454897972e-05, "loss": 0.7905, "step": 11680 }, { "epoch": 0.25110624221334366, "grad_norm": 0.5445190221346468, "learning_rate": 1.7213235122700334e-05, "loss": 0.8082, "step": 11690 }, { "epoch": 0.25132104652661424, "grad_norm": 0.5395663294268565, "learning_rate": 1.7208512344806204e-05, "loss": 0.776, "step": 11700 }, { "epoch": 0.2515358508398849, "grad_norm": 0.5454264355323872, "learning_rate": 1.7203786217491757e-05, "loss": 0.7828, "step": 11710 }, { "epoch": 0.2517506551531555, "grad_norm": 0.5778897096941694, "learning_rate": 1.7199056742952973e-05, "loss": 0.7861, "step": 11720 }, { "epoch": 0.25196545946642607, "grad_norm": 0.5436108360977332, "learning_rate": 1.7194323923387396e-05, "loss": 0.7887, "step": 11730 }, { "epoch": 0.2521802637796967, "grad_norm": 0.5480998527967708, "learning_rate": 1.718958776099412e-05, "loss": 0.8009, "step": 11740 }, { "epoch": 0.2523950680929673, "grad_norm": 0.5553968599080923, "learning_rate": 1.718484825797379e-05, "loss": 0.7953, "step": 11750 }, { "epoch": 0.2526098724062379, "grad_norm": 0.5633121625376932, "learning_rate": 1.7180105416528613e-05, "loss": 0.7836, "step": 11760 }, { "epoch": 0.25282467671950853, "grad_norm": 0.5706756900855683, "learning_rate": 1.7175359238862335e-05, "loss": 0.7963, "step": 11770 }, { "epoch": 0.2530394810327791, "grad_norm": 0.5594666956751811, "learning_rate": 1.7170609727180258e-05, "loss": 0.7881, "step": 11780 }, { "epoch": 0.25325428534604977, "grad_norm": 0.5537400526042484, "learning_rate": 1.7165856883689237e-05, "loss": 0.7862, "step": 11790 }, { "epoch": 0.25346908965932036, "grad_norm": 0.5561566681420613, "learning_rate": 1.7161100710597668e-05, "loss": 0.7856, "step": 11800 }, { "epoch": 0.25368389397259095, "grad_norm": 0.575034466737522, "learning_rate": 1.71563412101155e-05, "loss": 0.7872, "step": 11810 }, { "epoch": 0.2538986982858616, "grad_norm": 0.5674721717746797, "learning_rate": 1.7151578384454218e-05, "loss": 0.8035, "step": 11820 }, { "epoch": 0.2541135025991322, "grad_norm": 0.5471441442602709, "learning_rate": 1.714681223582686e-05, "loss": 0.7835, "step": 11830 }, { "epoch": 0.2543283069124028, "grad_norm": 0.5349338344027432, "learning_rate": 1.7142042766448016e-05, "loss": 0.7704, "step": 11840 }, { "epoch": 0.2545431112256734, "grad_norm": 0.5289756077207555, "learning_rate": 1.7137269978533804e-05, "loss": 0.7881, "step": 11850 }, { "epoch": 0.254757915538944, "grad_norm": 0.5654604443984577, "learning_rate": 1.713249387430189e-05, "loss": 0.7873, "step": 11860 }, { "epoch": 0.25497271985221465, "grad_norm": 0.5479301372643116, "learning_rate": 1.7127714455971476e-05, "loss": 0.7853, "step": 11870 }, { "epoch": 0.25518752416548524, "grad_norm": 0.5642448334289781, "learning_rate": 1.7122931725763318e-05, "loss": 0.7955, "step": 11880 }, { "epoch": 0.2554023284787559, "grad_norm": 0.5561951319477092, "learning_rate": 1.71181456858997e-05, "loss": 0.7998, "step": 11890 }, { "epoch": 0.25561713279202647, "grad_norm": 0.5551743423950759, "learning_rate": 1.711335633860444e-05, "loss": 0.7868, "step": 11900 }, { "epoch": 0.25583193710529706, "grad_norm": 0.575663087645707, "learning_rate": 1.71085636861029e-05, "loss": 0.7946, "step": 11910 }, { "epoch": 0.2560467414185677, "grad_norm": 0.5705702380544423, "learning_rate": 1.710376773062198e-05, "loss": 0.8077, "step": 11920 }, { "epoch": 0.2562615457318383, "grad_norm": 0.5517950323778168, "learning_rate": 1.709896847439011e-05, "loss": 0.7798, "step": 11930 }, { "epoch": 0.2564763500451089, "grad_norm": 0.5654242254525084, "learning_rate": 1.709416591963725e-05, "loss": 0.7854, "step": 11940 }, { "epoch": 0.2566911543583795, "grad_norm": 0.556721343917251, "learning_rate": 1.7089360068594903e-05, "loss": 0.7845, "step": 11950 }, { "epoch": 0.2569059586716501, "grad_norm": 0.5647470558063254, "learning_rate": 1.7084550923496094e-05, "loss": 0.7827, "step": 11960 }, { "epoch": 0.25712076298492076, "grad_norm": 0.5580955635924464, "learning_rate": 1.7079738486575382e-05, "loss": 0.7857, "step": 11970 }, { "epoch": 0.25733556729819135, "grad_norm": 0.5655207694822407, "learning_rate": 1.7074922760068855e-05, "loss": 0.7751, "step": 11980 }, { "epoch": 0.25755037161146194, "grad_norm": 0.5700568764310818, "learning_rate": 1.7070103746214135e-05, "loss": 0.796, "step": 11990 }, { "epoch": 0.2577651759247326, "grad_norm": 0.530803254919797, "learning_rate": 1.7065281447250363e-05, "loss": 0.7799, "step": 12000 }, { "epoch": 0.25797998023800317, "grad_norm": 0.559034831132991, "learning_rate": 1.706045586541821e-05, "loss": 0.7957, "step": 12010 }, { "epoch": 0.2581947845512738, "grad_norm": 0.5597546125165273, "learning_rate": 1.7055627002959872e-05, "loss": 0.7845, "step": 12020 }, { "epoch": 0.2584095888645444, "grad_norm": 0.6058840929391112, "learning_rate": 1.7050794862119078e-05, "loss": 0.7895, "step": 12030 }, { "epoch": 0.258624393177815, "grad_norm": 0.5651994162157234, "learning_rate": 1.704595944514106e-05, "loss": 0.8003, "step": 12040 }, { "epoch": 0.25883919749108564, "grad_norm": 0.5596861708905858, "learning_rate": 1.7041120754272594e-05, "loss": 0.8011, "step": 12050 }, { "epoch": 0.2590540018043562, "grad_norm": 0.5505706888687052, "learning_rate": 1.7036278791761965e-05, "loss": 0.791, "step": 12060 }, { "epoch": 0.2592688061176268, "grad_norm": 0.5545931326572373, "learning_rate": 1.7031433559858977e-05, "loss": 0.7804, "step": 12070 }, { "epoch": 0.25948361043089746, "grad_norm": 3.102401423632908, "learning_rate": 1.702658506081496e-05, "loss": 0.8037, "step": 12080 }, { "epoch": 0.25969841474416805, "grad_norm": 0.5450387286613136, "learning_rate": 1.7021733296882758e-05, "loss": 0.8026, "step": 12090 }, { "epoch": 0.2599132190574387, "grad_norm": 0.541274551933326, "learning_rate": 1.7016878270316738e-05, "loss": 0.7963, "step": 12100 }, { "epoch": 0.2601280233707093, "grad_norm": 0.5316129158445448, "learning_rate": 1.701201998337277e-05, "loss": 0.7797, "step": 12110 }, { "epoch": 0.26034282768397987, "grad_norm": 0.5332911250510366, "learning_rate": 1.700715843830825e-05, "loss": 0.788, "step": 12120 }, { "epoch": 0.2605576319972505, "grad_norm": 0.5385176920383756, "learning_rate": 1.700229363738209e-05, "loss": 0.7766, "step": 12130 }, { "epoch": 0.2607724363105211, "grad_norm": 0.5598906242851724, "learning_rate": 1.6997425582854704e-05, "loss": 0.777, "step": 12140 }, { "epoch": 0.26098724062379175, "grad_norm": 0.5487776354269147, "learning_rate": 1.6992554276988022e-05, "loss": 0.7737, "step": 12150 }, { "epoch": 0.26120204493706234, "grad_norm": 0.5330929016698871, "learning_rate": 1.6987679722045493e-05, "loss": 0.7907, "step": 12160 }, { "epoch": 0.2614168492503329, "grad_norm": 0.5382294593191113, "learning_rate": 1.6982801920292063e-05, "loss": 0.792, "step": 12170 }, { "epoch": 0.2616316535636036, "grad_norm": 0.5458332411357165, "learning_rate": 1.6977920873994196e-05, "loss": 0.8109, "step": 12180 }, { "epoch": 0.26184645787687416, "grad_norm": 0.5588172559612458, "learning_rate": 1.697303658541986e-05, "loss": 0.7976, "step": 12190 }, { "epoch": 0.2620612621901448, "grad_norm": 0.5291579474271565, "learning_rate": 1.6968149056838525e-05, "loss": 0.7796, "step": 12200 }, { "epoch": 0.2622760665034154, "grad_norm": 0.5477937857011975, "learning_rate": 1.6963258290521173e-05, "loss": 0.7831, "step": 12210 }, { "epoch": 0.262490870816686, "grad_norm": 0.5311847854529211, "learning_rate": 1.6958364288740293e-05, "loss": 0.7584, "step": 12220 }, { "epoch": 0.26270567512995663, "grad_norm": 0.5360611480627258, "learning_rate": 1.6953467053769864e-05, "loss": 0.7846, "step": 12230 }, { "epoch": 0.2629204794432272, "grad_norm": 0.5704482862900305, "learning_rate": 1.6948566587885388e-05, "loss": 0.8022, "step": 12240 }, { "epoch": 0.2631352837564978, "grad_norm": 0.5362588968839999, "learning_rate": 1.6943662893363845e-05, "loss": 0.7916, "step": 12250 }, { "epoch": 0.26335008806976845, "grad_norm": 0.5675401491595801, "learning_rate": 1.6938755972483732e-05, "loss": 0.7959, "step": 12260 }, { "epoch": 0.26356489238303904, "grad_norm": 0.5540442999120004, "learning_rate": 1.6933845827525037e-05, "loss": 0.7906, "step": 12270 }, { "epoch": 0.2637796966963097, "grad_norm": 0.5414329042014562, "learning_rate": 1.6928932460769254e-05, "loss": 0.7819, "step": 12280 }, { "epoch": 0.2639945010095803, "grad_norm": 0.5410748013073313, "learning_rate": 1.6924015874499363e-05, "loss": 0.7669, "step": 12290 }, { "epoch": 0.26420930532285086, "grad_norm": 0.5472743067105443, "learning_rate": 1.691909607099985e-05, "loss": 0.7821, "step": 12300 }, { "epoch": 0.2644241096361215, "grad_norm": 0.5475456006044419, "learning_rate": 1.6914173052556688e-05, "loss": 0.7811, "step": 12310 }, { "epoch": 0.2646389139493921, "grad_norm": 0.5507328978978161, "learning_rate": 1.6909246821457346e-05, "loss": 0.7774, "step": 12320 }, { "epoch": 0.26485371826266274, "grad_norm": 0.5599737303976604, "learning_rate": 1.690431737999079e-05, "loss": 0.7921, "step": 12330 }, { "epoch": 0.26506852257593333, "grad_norm": 0.5280366450787144, "learning_rate": 1.6899384730447477e-05, "loss": 0.7847, "step": 12340 }, { "epoch": 0.2652833268892039, "grad_norm": 0.5435632022539839, "learning_rate": 1.689444887511935e-05, "loss": 0.7854, "step": 12350 }, { "epoch": 0.26549813120247456, "grad_norm": 0.5647493787301932, "learning_rate": 1.6889509816299844e-05, "loss": 0.7899, "step": 12360 }, { "epoch": 0.26571293551574515, "grad_norm": 0.5610876101815535, "learning_rate": 1.688456755628388e-05, "loss": 0.7874, "step": 12370 }, { "epoch": 0.26592773982901574, "grad_norm": 0.5498086456378252, "learning_rate": 1.6879622097367874e-05, "loss": 0.7941, "step": 12380 }, { "epoch": 0.2661425441422864, "grad_norm": 0.5622568962892611, "learning_rate": 1.6874673441849715e-05, "loss": 0.7856, "step": 12390 }, { "epoch": 0.266357348455557, "grad_norm": 0.5279820352190503, "learning_rate": 1.6869721592028792e-05, "loss": 0.7965, "step": 12400 }, { "epoch": 0.2665721527688276, "grad_norm": 0.5316923893826371, "learning_rate": 1.6864766550205977e-05, "loss": 0.8056, "step": 12410 }, { "epoch": 0.2667869570820982, "grad_norm": 0.5847422188711642, "learning_rate": 1.6859808318683606e-05, "loss": 0.7845, "step": 12420 }, { "epoch": 0.2670017613953688, "grad_norm": 0.5183382490604992, "learning_rate": 1.685484689976552e-05, "loss": 0.7954, "step": 12430 }, { "epoch": 0.26721656570863944, "grad_norm": 0.548534083313305, "learning_rate": 1.6849882295757037e-05, "loss": 0.7834, "step": 12440 }, { "epoch": 0.26743137002191003, "grad_norm": 0.555857167141612, "learning_rate": 1.6844914508964937e-05, "loss": 0.7885, "step": 12450 }, { "epoch": 0.2676461743351807, "grad_norm": 0.5609532665687568, "learning_rate": 1.6839943541697498e-05, "loss": 0.7804, "step": 12460 }, { "epoch": 0.26786097864845126, "grad_norm": 0.5601428417687324, "learning_rate": 1.6834969396264472e-05, "loss": 0.7858, "step": 12470 }, { "epoch": 0.26807578296172185, "grad_norm": 0.5476848469778967, "learning_rate": 1.682999207497708e-05, "loss": 0.7938, "step": 12480 }, { "epoch": 0.2682905872749925, "grad_norm": 0.5638917271864747, "learning_rate": 1.6825011580148033e-05, "loss": 0.7837, "step": 12490 }, { "epoch": 0.2685053915882631, "grad_norm": 0.5894522331907057, "learning_rate": 1.6820027914091497e-05, "loss": 0.7844, "step": 12500 }, { "epoch": 0.2687201959015337, "grad_norm": 0.5610666708837045, "learning_rate": 1.681504107912313e-05, "loss": 0.7843, "step": 12510 }, { "epoch": 0.2689350002148043, "grad_norm": 0.5446880622878415, "learning_rate": 1.681005107756005e-05, "loss": 0.7921, "step": 12520 }, { "epoch": 0.2691498045280749, "grad_norm": 0.542926637375099, "learning_rate": 1.6805057911720852e-05, "loss": 0.7906, "step": 12530 }, { "epoch": 0.26936460884134555, "grad_norm": 0.5750689040548754, "learning_rate": 1.6800061583925603e-05, "loss": 0.785, "step": 12540 }, { "epoch": 0.26957941315461614, "grad_norm": 0.5144723555135201, "learning_rate": 1.679506209649583e-05, "loss": 0.7801, "step": 12550 }, { "epoch": 0.26979421746788673, "grad_norm": 0.5368301892072017, "learning_rate": 1.6790059451754545e-05, "loss": 0.7764, "step": 12560 }, { "epoch": 0.2700090217811574, "grad_norm": 0.5642425304345098, "learning_rate": 1.6785053652026204e-05, "loss": 0.8061, "step": 12570 }, { "epoch": 0.27022382609442797, "grad_norm": 0.5539198208515579, "learning_rate": 1.678004469963675e-05, "loss": 0.7863, "step": 12580 }, { "epoch": 0.2704386304076986, "grad_norm": 0.5512816690327085, "learning_rate": 1.6775032596913576e-05, "loss": 0.7911, "step": 12590 }, { "epoch": 0.2706534347209692, "grad_norm": 0.5815790405385224, "learning_rate": 1.677001734618555e-05, "loss": 0.7852, "step": 12600 }, { "epoch": 0.2708682390342398, "grad_norm": 0.5612088205707622, "learning_rate": 1.6764998949783e-05, "loss": 0.7828, "step": 12610 }, { "epoch": 0.27108304334751043, "grad_norm": 0.530172464989036, "learning_rate": 1.675997741003771e-05, "loss": 0.7698, "step": 12620 }, { "epoch": 0.271297847660781, "grad_norm": 0.552005993228374, "learning_rate": 1.6754952729282926e-05, "loss": 0.7745, "step": 12630 }, { "epoch": 0.27151265197405167, "grad_norm": 0.5435122213473302, "learning_rate": 1.6749924909853355e-05, "loss": 0.7854, "step": 12640 }, { "epoch": 0.27172745628732226, "grad_norm": 0.5530374063509907, "learning_rate": 1.6744893954085173e-05, "loss": 0.7958, "step": 12650 }, { "epoch": 0.27194226060059284, "grad_norm": 0.5611802591773057, "learning_rate": 1.673985986431599e-05, "loss": 0.7966, "step": 12660 }, { "epoch": 0.2721570649138635, "grad_norm": 0.5568671271875136, "learning_rate": 1.6734822642884893e-05, "loss": 0.7764, "step": 12670 }, { "epoch": 0.2723718692271341, "grad_norm": 0.5572155300702621, "learning_rate": 1.6729782292132415e-05, "loss": 0.7757, "step": 12680 }, { "epoch": 0.27258667354040467, "grad_norm": 0.5615138514243179, "learning_rate": 1.6724738814400545e-05, "loss": 0.798, "step": 12690 }, { "epoch": 0.2728014778536753, "grad_norm": 0.5339476128169535, "learning_rate": 1.671969221203272e-05, "loss": 0.7828, "step": 12700 }, { "epoch": 0.2730162821669459, "grad_norm": 0.5438064983467911, "learning_rate": 1.671464248737384e-05, "loss": 0.7792, "step": 12710 }, { "epoch": 0.27323108648021655, "grad_norm": 0.5349876744589488, "learning_rate": 1.6709589642770247e-05, "loss": 0.7855, "step": 12720 }, { "epoch": 0.27344589079348713, "grad_norm": 0.553863217138617, "learning_rate": 1.6704533680569732e-05, "loss": 0.7868, "step": 12730 }, { "epoch": 0.2736606951067577, "grad_norm": 0.5390862263215795, "learning_rate": 1.669947460312154e-05, "loss": 0.7754, "step": 12740 }, { "epoch": 0.27387549942002837, "grad_norm": 0.583549319445325, "learning_rate": 1.669441241277636e-05, "loss": 0.7917, "step": 12750 }, { "epoch": 0.27409030373329896, "grad_norm": 0.5293298289589918, "learning_rate": 1.668934711188633e-05, "loss": 0.7909, "step": 12760 }, { "epoch": 0.2743051080465696, "grad_norm": 0.569877867106919, "learning_rate": 1.6684278702805024e-05, "loss": 0.7767, "step": 12770 }, { "epoch": 0.2745199123598402, "grad_norm": 0.5773582415344543, "learning_rate": 1.667920718788748e-05, "loss": 0.7856, "step": 12780 }, { "epoch": 0.2747347166731108, "grad_norm": 0.5208586202102348, "learning_rate": 1.667413256949016e-05, "loss": 0.7805, "step": 12790 }, { "epoch": 0.2749495209863814, "grad_norm": 0.5353938318257099, "learning_rate": 1.666905484997097e-05, "loss": 0.7876, "step": 12800 }, { "epoch": 0.275164325299652, "grad_norm": 0.5424525339376945, "learning_rate": 1.6663974031689272e-05, "loss": 0.803, "step": 12810 }, { "epoch": 0.2753791296129226, "grad_norm": 0.5505344864467518, "learning_rate": 1.665889011700585e-05, "loss": 0.7839, "step": 12820 }, { "epoch": 0.27559393392619325, "grad_norm": 0.5688231819530734, "learning_rate": 1.665380310828294e-05, "loss": 0.8044, "step": 12830 }, { "epoch": 0.27580873823946384, "grad_norm": 0.5423707638785459, "learning_rate": 1.6648713007884207e-05, "loss": 0.7813, "step": 12840 }, { "epoch": 0.2760235425527345, "grad_norm": 0.5622431891874606, "learning_rate": 1.6643619818174755e-05, "loss": 0.792, "step": 12850 }, { "epoch": 0.27623834686600507, "grad_norm": 0.5348635272783925, "learning_rate": 1.6638523541521126e-05, "loss": 0.8017, "step": 12860 }, { "epoch": 0.27645315117927566, "grad_norm": 0.5539184848623694, "learning_rate": 1.6633424180291293e-05, "loss": 0.7935, "step": 12870 }, { "epoch": 0.2766679554925463, "grad_norm": 0.531258076725924, "learning_rate": 1.6628321736854664e-05, "loss": 0.7771, "step": 12880 }, { "epoch": 0.2768827598058169, "grad_norm": 0.5429903770907508, "learning_rate": 1.662321621358208e-05, "loss": 0.7682, "step": 12890 }, { "epoch": 0.27709756411908754, "grad_norm": 0.5338561005284871, "learning_rate": 1.6618107612845812e-05, "loss": 0.7562, "step": 12900 }, { "epoch": 0.2773123684323581, "grad_norm": 0.563699713244769, "learning_rate": 1.6612995937019557e-05, "loss": 0.7831, "step": 12910 }, { "epoch": 0.2775271727456287, "grad_norm": 0.5974035990189168, "learning_rate": 1.6607881188478446e-05, "loss": 0.793, "step": 12920 }, { "epoch": 0.27774197705889936, "grad_norm": 0.5786108959234898, "learning_rate": 1.6602763369599037e-05, "loss": 0.7701, "step": 12930 }, { "epoch": 0.27795678137216995, "grad_norm": 0.5244598945283411, "learning_rate": 1.659764248275932e-05, "loss": 0.7752, "step": 12940 }, { "epoch": 0.27817158568544054, "grad_norm": 0.5392263527270091, "learning_rate": 1.6592518530338692e-05, "loss": 0.789, "step": 12950 }, { "epoch": 0.2783863899987112, "grad_norm": 0.5572545049836365, "learning_rate": 1.6587391514718e-05, "loss": 0.8154, "step": 12960 }, { "epoch": 0.27860119431198177, "grad_norm": 0.5549009307377286, "learning_rate": 1.6582261438279488e-05, "loss": 0.7904, "step": 12970 }, { "epoch": 0.2788159986252524, "grad_norm": 0.5736525203694448, "learning_rate": 1.6577128303406843e-05, "loss": 0.7838, "step": 12980 }, { "epoch": 0.279030802938523, "grad_norm": 0.5422061227803314, "learning_rate": 1.657199211248517e-05, "loss": 0.7775, "step": 12990 }, { "epoch": 0.2792456072517936, "grad_norm": 0.5552087862142333, "learning_rate": 1.656685286790098e-05, "loss": 0.7918, "step": 13000 }, { "epoch": 0.27946041156506424, "grad_norm": 0.5769438755666713, "learning_rate": 1.656171057204222e-05, "loss": 0.773, "step": 13010 }, { "epoch": 0.2796752158783348, "grad_norm": 0.544310206877259, "learning_rate": 1.655656522729824e-05, "loss": 0.787, "step": 13020 }, { "epoch": 0.27989002019160547, "grad_norm": 0.5380675260235818, "learning_rate": 1.6551416836059818e-05, "loss": 0.7874, "step": 13030 }, { "epoch": 0.28010482450487606, "grad_norm": 0.5340600751027904, "learning_rate": 1.6546265400719143e-05, "loss": 0.7858, "step": 13040 }, { "epoch": 0.28031962881814665, "grad_norm": 0.547403375047864, "learning_rate": 1.6541110923669816e-05, "loss": 0.7847, "step": 13050 }, { "epoch": 0.2805344331314173, "grad_norm": 0.533402580926284, "learning_rate": 1.653595340730686e-05, "loss": 0.8004, "step": 13060 }, { "epoch": 0.2807492374446879, "grad_norm": 0.5426741999184683, "learning_rate": 1.65307928540267e-05, "loss": 0.7797, "step": 13070 }, { "epoch": 0.2809640417579585, "grad_norm": 0.5342611404015724, "learning_rate": 1.6525629266227177e-05, "loss": 0.7787, "step": 13080 }, { "epoch": 0.2811788460712291, "grad_norm": 1.7861965467084178, "learning_rate": 1.6520462646307543e-05, "loss": 0.8042, "step": 13090 }, { "epoch": 0.2813936503844997, "grad_norm": 0.5453771703119942, "learning_rate": 1.6515292996668452e-05, "loss": 0.7896, "step": 13100 }, { "epoch": 0.28160845469777035, "grad_norm": 0.5393581439017914, "learning_rate": 1.6510120319711974e-05, "loss": 0.7636, "step": 13110 }, { "epoch": 0.28182325901104094, "grad_norm": 0.566923191821844, "learning_rate": 1.650494461784159e-05, "loss": 0.7824, "step": 13120 }, { "epoch": 0.2820380633243115, "grad_norm": 0.5909033333766904, "learning_rate": 1.649976589346217e-05, "loss": 0.7791, "step": 13130 }, { "epoch": 0.28225286763758217, "grad_norm": 0.5459678033195983, "learning_rate": 1.6494584148979996e-05, "loss": 0.7861, "step": 13140 }, { "epoch": 0.28246767195085276, "grad_norm": 0.5763291162585454, "learning_rate": 1.648939938680276e-05, "loss": 0.7864, "step": 13150 }, { "epoch": 0.2826824762641234, "grad_norm": 0.5341209524759871, "learning_rate": 1.6484211609339555e-05, "loss": 0.7803, "step": 13160 }, { "epoch": 0.282897280577394, "grad_norm": 0.5736855699742509, "learning_rate": 1.647902081900086e-05, "loss": 0.7897, "step": 13170 }, { "epoch": 0.2831120848906646, "grad_norm": 0.5531932895913767, "learning_rate": 1.6473827018198573e-05, "loss": 0.7928, "step": 13180 }, { "epoch": 0.28332688920393523, "grad_norm": 0.5640369977192191, "learning_rate": 1.6468630209345984e-05, "loss": 0.8076, "step": 13190 }, { "epoch": 0.2835416935172058, "grad_norm": 0.5609123963099327, "learning_rate": 1.6463430394857772e-05, "loss": 0.7912, "step": 13200 }, { "epoch": 0.28375649783047646, "grad_norm": 0.5557233716140605, "learning_rate": 1.6458227577150024e-05, "loss": 0.788, "step": 13210 }, { "epoch": 0.28397130214374705, "grad_norm": 0.5662855198896333, "learning_rate": 1.6453021758640218e-05, "loss": 0.7884, "step": 13220 }, { "epoch": 0.28418610645701764, "grad_norm": 0.5412381516020387, "learning_rate": 1.644781294174723e-05, "loss": 0.7956, "step": 13230 }, { "epoch": 0.2844009107702883, "grad_norm": 0.5385533947113791, "learning_rate": 1.6442601128891323e-05, "loss": 0.7864, "step": 13240 }, { "epoch": 0.2846157150835589, "grad_norm": 0.531510421245019, "learning_rate": 1.6437386322494155e-05, "loss": 0.7705, "step": 13250 }, { "epoch": 0.28483051939682946, "grad_norm": 0.5426033523747722, "learning_rate": 1.6432168524978777e-05, "loss": 0.7787, "step": 13260 }, { "epoch": 0.2850453237101001, "grad_norm": 0.5623236256089558, "learning_rate": 1.6426947738769623e-05, "loss": 0.7813, "step": 13270 }, { "epoch": 0.2852601280233707, "grad_norm": 0.5394725869592929, "learning_rate": 1.6421723966292525e-05, "loss": 0.7738, "step": 13280 }, { "epoch": 0.28547493233664134, "grad_norm": 0.5465469510807215, "learning_rate": 1.6416497209974697e-05, "loss": 0.7557, "step": 13290 }, { "epoch": 0.28568973664991193, "grad_norm": 0.5371708781271813, "learning_rate": 1.641126747224474e-05, "loss": 0.7871, "step": 13300 }, { "epoch": 0.2859045409631825, "grad_norm": 0.5400513136671761, "learning_rate": 1.640603475553264e-05, "loss": 0.7835, "step": 13310 }, { "epoch": 0.28611934527645316, "grad_norm": 0.5394221231389933, "learning_rate": 1.640079906226977e-05, "loss": 0.765, "step": 13320 }, { "epoch": 0.28633414958972375, "grad_norm": 0.5222441443064099, "learning_rate": 1.6395560394888888e-05, "loss": 0.7757, "step": 13330 }, { "epoch": 0.2865489539029944, "grad_norm": 0.5606502765218887, "learning_rate": 1.6390318755824118e-05, "loss": 0.7868, "step": 13340 }, { "epoch": 0.286763758216265, "grad_norm": 0.609652591588806, "learning_rate": 1.6385074147510987e-05, "loss": 0.7809, "step": 13350 }, { "epoch": 0.2869785625295356, "grad_norm": 0.526740208124346, "learning_rate": 1.6379826572386386e-05, "loss": 0.7898, "step": 13360 }, { "epoch": 0.2871933668428062, "grad_norm": 0.5305922224130004, "learning_rate": 1.6374576032888594e-05, "loss": 0.7807, "step": 13370 }, { "epoch": 0.2874081711560768, "grad_norm": 0.5540486723971066, "learning_rate": 1.6369322531457263e-05, "loss": 0.7788, "step": 13380 }, { "epoch": 0.2876229754693474, "grad_norm": 0.52462212158824, "learning_rate": 1.6364066070533414e-05, "loss": 0.7933, "step": 13390 }, { "epoch": 0.28783777978261804, "grad_norm": 0.531247543108683, "learning_rate": 1.635880665255946e-05, "loss": 0.776, "step": 13400 }, { "epoch": 0.28805258409588863, "grad_norm": 0.5732783680452849, "learning_rate": 1.6353544279979177e-05, "loss": 0.7931, "step": 13410 }, { "epoch": 0.2882673884091593, "grad_norm": 0.542974827627677, "learning_rate": 1.634827895523771e-05, "loss": 0.7782, "step": 13420 }, { "epoch": 0.28848219272242986, "grad_norm": 0.5876492011137021, "learning_rate": 1.6343010680781586e-05, "loss": 0.7774, "step": 13430 }, { "epoch": 0.28869699703570045, "grad_norm": 0.5333223558241071, "learning_rate": 1.63377394590587e-05, "loss": 0.7908, "step": 13440 }, { "epoch": 0.2889118013489711, "grad_norm": 0.5435404704513007, "learning_rate": 1.6332465292518306e-05, "loss": 0.784, "step": 13450 }, { "epoch": 0.2891266056622417, "grad_norm": 0.5372220581463112, "learning_rate": 1.6327188183611043e-05, "loss": 0.7877, "step": 13460 }, { "epoch": 0.28934140997551233, "grad_norm": 0.5517875641980315, "learning_rate": 1.6321908134788904e-05, "loss": 0.7755, "step": 13470 }, { "epoch": 0.2895562142887829, "grad_norm": 0.5624183224646342, "learning_rate": 1.6316625148505253e-05, "loss": 0.7697, "step": 13480 }, { "epoch": 0.2897710186020535, "grad_norm": 0.5190475908632274, "learning_rate": 1.631133922721482e-05, "loss": 0.7768, "step": 13490 }, { "epoch": 0.28998582291532415, "grad_norm": 0.5513772019738882, "learning_rate": 1.6306050373373698e-05, "loss": 0.7742, "step": 13500 }, { "epoch": 0.29020062722859474, "grad_norm": 0.552607695073294, "learning_rate": 1.6300758589439342e-05, "loss": 0.787, "step": 13510 }, { "epoch": 0.2904154315418654, "grad_norm": 0.5521440595481499, "learning_rate": 1.6295463877870566e-05, "loss": 0.7812, "step": 13520 }, { "epoch": 0.290630235855136, "grad_norm": 0.541378746109039, "learning_rate": 1.6290166241127545e-05, "loss": 0.7813, "step": 13530 }, { "epoch": 0.29084504016840657, "grad_norm": 0.5837702840940312, "learning_rate": 1.6284865681671826e-05, "loss": 0.7875, "step": 13540 }, { "epoch": 0.2910598444816772, "grad_norm": 0.531456315705228, "learning_rate": 1.6279562201966287e-05, "loss": 0.7744, "step": 13550 }, { "epoch": 0.2912746487949478, "grad_norm": 0.5409752871762843, "learning_rate": 1.627425580447519e-05, "loss": 0.7848, "step": 13560 }, { "epoch": 0.2914894531082184, "grad_norm": 0.5685739203049746, "learning_rate": 1.626894649166414e-05, "loss": 0.7902, "step": 13570 }, { "epoch": 0.29170425742148903, "grad_norm": 0.5290562340681636, "learning_rate": 1.6263634266000093e-05, "loss": 0.7748, "step": 13580 }, { "epoch": 0.2919190617347596, "grad_norm": 0.5526529473322866, "learning_rate": 1.6258319129951366e-05, "loss": 0.7911, "step": 13590 }, { "epoch": 0.29213386604803027, "grad_norm": 0.5379467854648743, "learning_rate": 1.6253001085987635e-05, "loss": 0.7837, "step": 13600 }, { "epoch": 0.29234867036130086, "grad_norm": 0.5629511775533564, "learning_rate": 1.6247680136579904e-05, "loss": 0.7904, "step": 13610 }, { "epoch": 0.29256347467457144, "grad_norm": 0.5637137982010608, "learning_rate": 1.624235628420055e-05, "loss": 0.7789, "step": 13620 }, { "epoch": 0.2927782789878421, "grad_norm": 0.5552139861154454, "learning_rate": 1.6237029531323286e-05, "loss": 0.7766, "step": 13630 }, { "epoch": 0.2929930833011127, "grad_norm": 0.5311375889317766, "learning_rate": 1.6231699880423182e-05, "loss": 0.7878, "step": 13640 }, { "epoch": 0.2932078876143833, "grad_norm": 0.5438973923343646, "learning_rate": 1.6226367333976642e-05, "loss": 0.7665, "step": 13650 }, { "epoch": 0.2934226919276539, "grad_norm": 0.5471337589971532, "learning_rate": 1.6221031894461426e-05, "loss": 0.7712, "step": 13660 }, { "epoch": 0.2936374962409245, "grad_norm": 0.5478152109462807, "learning_rate": 1.6215693564356635e-05, "loss": 0.7734, "step": 13670 }, { "epoch": 0.29385230055419514, "grad_norm": 0.5523511877357635, "learning_rate": 1.6210352346142713e-05, "loss": 0.7882, "step": 13680 }, { "epoch": 0.29406710486746573, "grad_norm": 0.5405593748173753, "learning_rate": 1.6205008242301445e-05, "loss": 0.7967, "step": 13690 }, { "epoch": 0.2942819091807363, "grad_norm": 0.5393904185913311, "learning_rate": 1.619966125531596e-05, "loss": 0.7975, "step": 13700 }, { "epoch": 0.29449671349400697, "grad_norm": 0.5459167139610831, "learning_rate": 1.6194311387670726e-05, "loss": 0.7802, "step": 13710 }, { "epoch": 0.29471151780727756, "grad_norm": 0.5734671415245739, "learning_rate": 1.618895864185154e-05, "loss": 0.7805, "step": 13720 }, { "epoch": 0.2949263221205482, "grad_norm": 0.5200409133048494, "learning_rate": 1.6183603020345552e-05, "loss": 0.7684, "step": 13730 }, { "epoch": 0.2951411264338188, "grad_norm": 0.532032766630498, "learning_rate": 1.6178244525641238e-05, "loss": 0.7607, "step": 13740 }, { "epoch": 0.2953559307470894, "grad_norm": 0.5450386213659163, "learning_rate": 1.617288316022841e-05, "loss": 0.7853, "step": 13750 }, { "epoch": 0.29557073506036, "grad_norm": 0.5412583231864979, "learning_rate": 1.6167518926598215e-05, "loss": 0.7878, "step": 13760 }, { "epoch": 0.2957855393736306, "grad_norm": 0.5495103389055368, "learning_rate": 1.616215182724314e-05, "loss": 0.7768, "step": 13770 }, { "epoch": 0.29600034368690126, "grad_norm": 0.5735150374820255, "learning_rate": 1.6156781864656984e-05, "loss": 0.7826, "step": 13780 }, { "epoch": 0.29621514800017185, "grad_norm": 0.535148317937703, "learning_rate": 1.6151409041334903e-05, "loss": 0.7871, "step": 13790 }, { "epoch": 0.29642995231344244, "grad_norm": 0.5237498381145003, "learning_rate": 1.6146033359773356e-05, "loss": 0.7759, "step": 13800 }, { "epoch": 0.2966447566267131, "grad_norm": 0.5342689644277192, "learning_rate": 1.614065482247015e-05, "loss": 0.7736, "step": 13810 }, { "epoch": 0.29685956093998367, "grad_norm": 0.5322183986533855, "learning_rate": 1.6135273431924408e-05, "loss": 0.7913, "step": 13820 }, { "epoch": 0.29707436525325426, "grad_norm": 0.5536125865349972, "learning_rate": 1.6129889190636582e-05, "loss": 0.7836, "step": 13830 }, { "epoch": 0.2972891695665249, "grad_norm": 0.5320481447634432, "learning_rate": 1.6124502101108453e-05, "loss": 0.7802, "step": 13840 }, { "epoch": 0.2975039738797955, "grad_norm": 0.5337884474675023, "learning_rate": 1.6119112165843114e-05, "loss": 0.7711, "step": 13850 }, { "epoch": 0.29771877819306614, "grad_norm": 0.5448873311868312, "learning_rate": 1.6113719387344992e-05, "loss": 0.7763, "step": 13860 }, { "epoch": 0.2979335825063367, "grad_norm": 0.5390758922033408, "learning_rate": 1.6108323768119827e-05, "loss": 0.7739, "step": 13870 }, { "epoch": 0.2981483868196073, "grad_norm": 0.5365705766236541, "learning_rate": 1.6102925310674686e-05, "loss": 0.7847, "step": 13880 }, { "epoch": 0.29836319113287796, "grad_norm": 0.5590376051193626, "learning_rate": 1.6097524017517948e-05, "loss": 0.7854, "step": 13890 }, { "epoch": 0.29857799544614855, "grad_norm": 0.5309793859058803, "learning_rate": 1.609211989115932e-05, "loss": 0.7692, "step": 13900 }, { "epoch": 0.2987927997594192, "grad_norm": 0.5308210455003953, "learning_rate": 1.6086712934109804e-05, "loss": 0.785, "step": 13910 }, { "epoch": 0.2990076040726898, "grad_norm": 0.5490327798250314, "learning_rate": 1.608130314888174e-05, "loss": 0.7735, "step": 13920 }, { "epoch": 0.29922240838596037, "grad_norm": 0.5343320632700467, "learning_rate": 1.6075890537988778e-05, "loss": 0.7903, "step": 13930 }, { "epoch": 0.299437212699231, "grad_norm": 0.5479636639748584, "learning_rate": 1.6070475103945867e-05, "loss": 0.7889, "step": 13940 }, { "epoch": 0.2996520170125016, "grad_norm": 0.5527723222499173, "learning_rate": 1.6065056849269286e-05, "loss": 0.7697, "step": 13950 }, { "epoch": 0.29986682132577225, "grad_norm": 0.5576773833757296, "learning_rate": 1.6059635776476607e-05, "loss": 0.7796, "step": 13960 }, { "epoch": 0.30008162563904284, "grad_norm": 0.540017123946042, "learning_rate": 1.6054211888086726e-05, "loss": 0.7789, "step": 13970 }, { "epoch": 0.3002964299523134, "grad_norm": 0.5382534676965747, "learning_rate": 1.604878518661984e-05, "loss": 0.7864, "step": 13980 }, { "epoch": 0.30051123426558407, "grad_norm": 0.5620971765430572, "learning_rate": 1.6043355674597456e-05, "loss": 0.7724, "step": 13990 }, { "epoch": 0.30072603857885466, "grad_norm": 0.5512920688522195, "learning_rate": 1.603792335454238e-05, "loss": 0.7686, "step": 14000 }, { "epoch": 0.30094084289212525, "grad_norm": 0.5681571893413786, "learning_rate": 1.603248822897874e-05, "loss": 0.7796, "step": 14010 }, { "epoch": 0.3011556472053959, "grad_norm": 0.5466486772113822, "learning_rate": 1.6027050300431945e-05, "loss": 0.7809, "step": 14020 }, { "epoch": 0.3013704515186665, "grad_norm": 0.5235657333916433, "learning_rate": 1.6021609571428718e-05, "loss": 0.7868, "step": 14030 }, { "epoch": 0.3015852558319371, "grad_norm": 0.5456016921796866, "learning_rate": 1.6016166044497085e-05, "loss": 0.7682, "step": 14040 }, { "epoch": 0.3018000601452077, "grad_norm": 0.5352992818298888, "learning_rate": 1.6010719722166373e-05, "loss": 0.7809, "step": 14050 }, { "epoch": 0.3020148644584783, "grad_norm": 0.5282682341595342, "learning_rate": 1.6005270606967197e-05, "loss": 0.7844, "step": 14060 }, { "epoch": 0.30222966877174895, "grad_norm": 0.5139635209270281, "learning_rate": 1.5999818701431485e-05, "loss": 0.7667, "step": 14070 }, { "epoch": 0.30244447308501954, "grad_norm": 0.5381885807038028, "learning_rate": 1.599436400809245e-05, "loss": 0.7911, "step": 14080 }, { "epoch": 0.3026592773982902, "grad_norm": 0.5155734950185481, "learning_rate": 1.59889065294846e-05, "loss": 0.7656, "step": 14090 }, { "epoch": 0.30287408171156077, "grad_norm": 0.5564242549239296, "learning_rate": 1.5983446268143746e-05, "loss": 0.7822, "step": 14100 }, { "epoch": 0.30308888602483136, "grad_norm": 0.5187487100642706, "learning_rate": 1.5977983226606987e-05, "loss": 0.7908, "step": 14110 }, { "epoch": 0.303303690338102, "grad_norm": 0.5642387470463246, "learning_rate": 1.597251740741271e-05, "loss": 0.7885, "step": 14120 }, { "epoch": 0.3035184946513726, "grad_norm": 0.536223279687209, "learning_rate": 1.59670488131006e-05, "loss": 0.7718, "step": 14130 }, { "epoch": 0.3037332989646432, "grad_norm": 0.5600475408070319, "learning_rate": 1.5961577446211627e-05, "loss": 0.7776, "step": 14140 }, { "epoch": 0.30394810327791383, "grad_norm": 0.5437843190566551, "learning_rate": 1.5956103309288053e-05, "loss": 0.7685, "step": 14150 }, { "epoch": 0.3041629075911844, "grad_norm": 0.5628863721101852, "learning_rate": 1.5950626404873418e-05, "loss": 0.7918, "step": 14160 }, { "epoch": 0.30437771190445506, "grad_norm": 0.5149513964803338, "learning_rate": 1.594514673551256e-05, "loss": 0.7751, "step": 14170 }, { "epoch": 0.30459251621772565, "grad_norm": 0.5563402316389365, "learning_rate": 1.5939664303751596e-05, "loss": 0.761, "step": 14180 }, { "epoch": 0.30480732053099624, "grad_norm": 0.55820340770972, "learning_rate": 1.5934179112137923e-05, "loss": 0.8045, "step": 14190 }, { "epoch": 0.3050221248442669, "grad_norm": 0.5265505329965681, "learning_rate": 1.5928691163220228e-05, "loss": 0.7685, "step": 14200 }, { "epoch": 0.3052369291575375, "grad_norm": 0.5362485863281781, "learning_rate": 1.592320045954847e-05, "loss": 0.7765, "step": 14210 }, { "epoch": 0.3054517334708081, "grad_norm": 0.5529028127823579, "learning_rate": 1.5917707003673895e-05, "loss": 0.7899, "step": 14220 }, { "epoch": 0.3056665377840787, "grad_norm": 0.5546518668601578, "learning_rate": 1.591221079814903e-05, "loss": 0.7863, "step": 14230 }, { "epoch": 0.3058813420973493, "grad_norm": 0.5542533511738308, "learning_rate": 1.590671184552767e-05, "loss": 0.7713, "step": 14240 }, { "epoch": 0.30609614641061994, "grad_norm": 0.5234620656060907, "learning_rate": 1.5901210148364895e-05, "loss": 0.7673, "step": 14250 }, { "epoch": 0.30631095072389053, "grad_norm": 0.5687517220863817, "learning_rate": 1.5895705709217056e-05, "loss": 0.796, "step": 14260 }, { "epoch": 0.3065257550371612, "grad_norm": 0.5396488135156106, "learning_rate": 1.589019853064178e-05, "loss": 0.7718, "step": 14270 }, { "epoch": 0.30674055935043176, "grad_norm": 0.538832149688794, "learning_rate": 1.5884688615197964e-05, "loss": 0.7881, "step": 14280 }, { "epoch": 0.30695536366370235, "grad_norm": 0.5560141103067452, "learning_rate": 1.5879175965445782e-05, "loss": 0.7714, "step": 14290 }, { "epoch": 0.307170167976973, "grad_norm": 0.5565327133097089, "learning_rate": 1.5873660583946673e-05, "loss": 0.781, "step": 14300 }, { "epoch": 0.3073849722902436, "grad_norm": 0.561419912796845, "learning_rate": 1.5868142473263352e-05, "loss": 0.7683, "step": 14310 }, { "epoch": 0.3075997766035142, "grad_norm": 0.5469680337173222, "learning_rate": 1.5862621635959788e-05, "loss": 0.7952, "step": 14320 }, { "epoch": 0.3078145809167848, "grad_norm": 0.6073248751313165, "learning_rate": 1.5857098074601236e-05, "loss": 0.7978, "step": 14330 }, { "epoch": 0.3080293852300554, "grad_norm": 0.5435528930279098, "learning_rate": 1.5851571791754205e-05, "loss": 0.7731, "step": 14340 }, { "epoch": 0.30824418954332605, "grad_norm": 0.5201227548391855, "learning_rate": 1.584604278998647e-05, "loss": 0.7774, "step": 14350 }, { "epoch": 0.30845899385659664, "grad_norm": 0.5494541529358633, "learning_rate": 1.5840511071867065e-05, "loss": 0.7668, "step": 14360 }, { "epoch": 0.30867379816986723, "grad_norm": 0.5795936450755371, "learning_rate": 1.58349766399663e-05, "loss": 0.7727, "step": 14370 }, { "epoch": 0.3088886024831379, "grad_norm": 0.569527328062543, "learning_rate": 1.5829439496855735e-05, "loss": 0.7942, "step": 14380 }, { "epoch": 0.30910340679640846, "grad_norm": 0.5347014125524017, "learning_rate": 1.582389964510819e-05, "loss": 0.763, "step": 14390 }, { "epoch": 0.3093182111096791, "grad_norm": 0.5292356683082766, "learning_rate": 1.5818357087297746e-05, "loss": 0.7733, "step": 14400 }, { "epoch": 0.3095330154229497, "grad_norm": 0.5564018120086363, "learning_rate": 1.5812811825999742e-05, "loss": 0.7832, "step": 14410 }, { "epoch": 0.3097478197362203, "grad_norm": 0.5406172696967337, "learning_rate": 1.580726386379077e-05, "loss": 0.7755, "step": 14420 }, { "epoch": 0.30996262404949093, "grad_norm": 0.5461638054581808, "learning_rate": 1.5801713203248683e-05, "loss": 0.7625, "step": 14430 }, { "epoch": 0.3101774283627615, "grad_norm": 0.5454736211561317, "learning_rate": 1.5796159846952578e-05, "loss": 0.7815, "step": 14440 }, { "epoch": 0.3103922326760321, "grad_norm": 0.5474695031583966, "learning_rate": 1.579060379748282e-05, "loss": 0.793, "step": 14450 }, { "epoch": 0.31060703698930275, "grad_norm": 0.5312856498486378, "learning_rate": 1.5785045057421006e-05, "loss": 0.7645, "step": 14460 }, { "epoch": 0.31082184130257334, "grad_norm": 0.5165648723128465, "learning_rate": 1.5779483629349997e-05, "loss": 0.7595, "step": 14470 }, { "epoch": 0.311036645615844, "grad_norm": 0.5559441987275184, "learning_rate": 1.57739195158539e-05, "loss": 0.7839, "step": 14480 }, { "epoch": 0.3112514499291146, "grad_norm": 0.5769441262612243, "learning_rate": 1.5768352719518068e-05, "loss": 0.7738, "step": 14490 }, { "epoch": 0.31146625424238517, "grad_norm": 0.5473900907836559, "learning_rate": 1.57627832429291e-05, "loss": 0.7865, "step": 14500 }, { "epoch": 0.3116810585556558, "grad_norm": 0.529556628584659, "learning_rate": 1.5757211088674845e-05, "loss": 0.7745, "step": 14510 }, { "epoch": 0.3118958628689264, "grad_norm": 0.5361987622170035, "learning_rate": 1.575163625934439e-05, "loss": 0.7777, "step": 14520 }, { "epoch": 0.31211066718219704, "grad_norm": 0.5262241261547033, "learning_rate": 1.574605875752807e-05, "loss": 0.778, "step": 14530 }, { "epoch": 0.31232547149546763, "grad_norm": 0.5232473606874927, "learning_rate": 1.5740478585817455e-05, "loss": 0.7606, "step": 14540 }, { "epoch": 0.3125402758087382, "grad_norm": 0.5302585690675944, "learning_rate": 1.573489574680537e-05, "loss": 0.7738, "step": 14550 }, { "epoch": 0.31275508012200887, "grad_norm": 0.5338970296170283, "learning_rate": 1.5729310243085858e-05, "loss": 0.7769, "step": 14560 }, { "epoch": 0.31296988443527946, "grad_norm": 0.5337153071748394, "learning_rate": 1.5723722077254216e-05, "loss": 0.7738, "step": 14570 }, { "epoch": 0.31318468874855004, "grad_norm": 0.5408069906876969, "learning_rate": 1.5718131251906978e-05, "loss": 0.7837, "step": 14580 }, { "epoch": 0.3133994930618207, "grad_norm": 0.5317712434488392, "learning_rate": 1.5712537769641905e-05, "loss": 0.7823, "step": 14590 }, { "epoch": 0.3136142973750913, "grad_norm": 0.5312973847806638, "learning_rate": 1.5706941633058e-05, "loss": 0.7712, "step": 14600 }, { "epoch": 0.3138291016883619, "grad_norm": 0.5465034121886095, "learning_rate": 1.5701342844755492e-05, "loss": 0.7767, "step": 14610 }, { "epoch": 0.3140439060016325, "grad_norm": 0.5301591392732491, "learning_rate": 1.5695741407335852e-05, "loss": 0.7629, "step": 14620 }, { "epoch": 0.3142587103149031, "grad_norm": 0.5474541814548766, "learning_rate": 1.569013732340177e-05, "loss": 0.7772, "step": 14630 }, { "epoch": 0.31447351462817374, "grad_norm": 0.5168423966046714, "learning_rate": 1.5684530595557174e-05, "loss": 0.7595, "step": 14640 }, { "epoch": 0.31468831894144433, "grad_norm": 0.5322937512665464, "learning_rate": 1.5678921226407222e-05, "loss": 0.7806, "step": 14650 }, { "epoch": 0.314903123254715, "grad_norm": 0.5418401693011264, "learning_rate": 1.567330921855829e-05, "loss": 0.7729, "step": 14660 }, { "epoch": 0.31511792756798557, "grad_norm": 0.5301368247497126, "learning_rate": 1.566769457461799e-05, "loss": 0.7683, "step": 14670 }, { "epoch": 0.31533273188125616, "grad_norm": 0.5273233080040064, "learning_rate": 1.5662077297195154e-05, "loss": 0.7711, "step": 14680 }, { "epoch": 0.3155475361945268, "grad_norm": 0.5162157276165048, "learning_rate": 1.565645738889984e-05, "loss": 0.7688, "step": 14690 }, { "epoch": 0.3157623405077974, "grad_norm": 0.5571046212802432, "learning_rate": 1.5650834852343324e-05, "loss": 0.7767, "step": 14700 }, { "epoch": 0.31597714482106803, "grad_norm": 0.5174571005771366, "learning_rate": 1.5645209690138107e-05, "loss": 0.7717, "step": 14710 }, { "epoch": 0.3161919491343386, "grad_norm": 0.5250948275688863, "learning_rate": 1.5639581904897908e-05, "loss": 0.7875, "step": 14720 }, { "epoch": 0.3164067534476092, "grad_norm": 0.5449756805014006, "learning_rate": 1.5633951499237667e-05, "loss": 0.7702, "step": 14730 }, { "epoch": 0.31662155776087986, "grad_norm": 0.5249159478890357, "learning_rate": 1.5628318475773538e-05, "loss": 0.7704, "step": 14740 }, { "epoch": 0.31683636207415045, "grad_norm": 0.5363351915331944, "learning_rate": 1.5622682837122895e-05, "loss": 0.7795, "step": 14750 }, { "epoch": 0.31705116638742104, "grad_norm": 0.5366684931300841, "learning_rate": 1.5617044585904328e-05, "loss": 0.7788, "step": 14760 }, { "epoch": 0.3172659707006917, "grad_norm": 0.5176930213448244, "learning_rate": 1.5611403724737635e-05, "loss": 0.7729, "step": 14770 }, { "epoch": 0.31748077501396227, "grad_norm": 0.53731576710967, "learning_rate": 1.5605760256243834e-05, "loss": 0.7673, "step": 14780 }, { "epoch": 0.3176955793272329, "grad_norm": 0.5530960087868758, "learning_rate": 1.560011418304515e-05, "loss": 0.7719, "step": 14790 }, { "epoch": 0.3179103836405035, "grad_norm": 0.5325204498819602, "learning_rate": 1.559446550776502e-05, "loss": 0.7777, "step": 14800 }, { "epoch": 0.3181251879537741, "grad_norm": 0.5199725220262916, "learning_rate": 1.558881423302808e-05, "loss": 0.7752, "step": 14810 }, { "epoch": 0.31833999226704474, "grad_norm": 0.5370286678853574, "learning_rate": 1.5583160361460198e-05, "loss": 0.7681, "step": 14820 }, { "epoch": 0.3185547965803153, "grad_norm": 0.5257072462311427, "learning_rate": 1.5577503895688427e-05, "loss": 0.7619, "step": 14830 }, { "epoch": 0.31876960089358597, "grad_norm": 0.5343487750052494, "learning_rate": 1.557184483834103e-05, "loss": 0.7703, "step": 14840 }, { "epoch": 0.31898440520685656, "grad_norm": 0.5251831277823487, "learning_rate": 1.5566183192047476e-05, "loss": 0.7682, "step": 14850 }, { "epoch": 0.31919920952012715, "grad_norm": 0.5458229759413251, "learning_rate": 1.556051895943844e-05, "loss": 0.7716, "step": 14860 }, { "epoch": 0.3194140138333978, "grad_norm": 0.5272161429140526, "learning_rate": 1.5554852143145794e-05, "loss": 0.7759, "step": 14870 }, { "epoch": 0.3196288181466684, "grad_norm": 0.5344254727616319, "learning_rate": 1.5549182745802614e-05, "loss": 0.7777, "step": 14880 }, { "epoch": 0.31984362245993897, "grad_norm": 0.5548010647048678, "learning_rate": 1.5543510770043177e-05, "loss": 0.769, "step": 14890 }, { "epoch": 0.3200584267732096, "grad_norm": 0.5412970475079941, "learning_rate": 1.553783621850295e-05, "loss": 0.7703, "step": 14900 }, { "epoch": 0.3202732310864802, "grad_norm": 0.5351285928852882, "learning_rate": 1.55321590938186e-05, "loss": 0.7715, "step": 14910 }, { "epoch": 0.32048803539975085, "grad_norm": 0.5223639221501771, "learning_rate": 1.5526479398627997e-05, "loss": 0.767, "step": 14920 }, { "epoch": 0.32070283971302144, "grad_norm": 0.5335867628748313, "learning_rate": 1.5520797135570192e-05, "loss": 0.7703, "step": 14930 }, { "epoch": 0.320917644026292, "grad_norm": 0.5441683074016587, "learning_rate": 1.5515112307285446e-05, "loss": 0.7753, "step": 14940 }, { "epoch": 0.32113244833956267, "grad_norm": 0.5601042025539121, "learning_rate": 1.5509424916415198e-05, "loss": 0.7777, "step": 14950 }, { "epoch": 0.32134725265283326, "grad_norm": 0.538515402379057, "learning_rate": 1.5503734965602078e-05, "loss": 0.7808, "step": 14960 }, { "epoch": 0.3215620569661039, "grad_norm": 0.5436748120429375, "learning_rate": 1.5498042457489916e-05, "loss": 0.782, "step": 14970 }, { "epoch": 0.3217768612793745, "grad_norm": 0.5158949131886207, "learning_rate": 1.5492347394723726e-05, "loss": 0.7631, "step": 14980 }, { "epoch": 0.3219916655926451, "grad_norm": 0.5328439457730709, "learning_rate": 1.5486649779949695e-05, "loss": 0.7774, "step": 14990 }, { "epoch": 0.3222064699059157, "grad_norm": 0.5433132241965292, "learning_rate": 1.5480949615815223e-05, "loss": 0.7834, "step": 15000 }, { "epoch": 0.3224212742191863, "grad_norm": 0.5470052170551871, "learning_rate": 1.547524690496887e-05, "loss": 0.7727, "step": 15010 }, { "epoch": 0.3226360785324569, "grad_norm": 0.5466443586756153, "learning_rate": 1.5469541650060396e-05, "loss": 0.7803, "step": 15020 }, { "epoch": 0.32285088284572755, "grad_norm": 0.5351421951971927, "learning_rate": 1.5463833853740723e-05, "loss": 0.7955, "step": 15030 }, { "epoch": 0.32306568715899814, "grad_norm": 0.5236071642893849, "learning_rate": 1.5458123518661984e-05, "loss": 0.774, "step": 15040 }, { "epoch": 0.3232804914722688, "grad_norm": 0.5201113478881045, "learning_rate": 1.5452410647477462e-05, "loss": 0.7765, "step": 15050 }, { "epoch": 0.32349529578553937, "grad_norm": 0.551550727831456, "learning_rate": 1.544669524284163e-05, "loss": 0.7749, "step": 15060 }, { "epoch": 0.32371010009880996, "grad_norm": 0.5396886090448376, "learning_rate": 1.5440977307410148e-05, "loss": 0.7748, "step": 15070 }, { "epoch": 0.3239249044120806, "grad_norm": 0.5490265215897929, "learning_rate": 1.5435256843839837e-05, "loss": 0.7782, "step": 15080 }, { "epoch": 0.3241397087253512, "grad_norm": 0.5680139354556848, "learning_rate": 1.5429533854788698e-05, "loss": 0.769, "step": 15090 }, { "epoch": 0.32435451303862184, "grad_norm": 0.5724567681997974, "learning_rate": 1.542380834291591e-05, "loss": 0.7743, "step": 15100 }, { "epoch": 0.32456931735189243, "grad_norm": 0.5551606358878742, "learning_rate": 1.5418080310881816e-05, "loss": 0.7874, "step": 15110 }, { "epoch": 0.324784121665163, "grad_norm": 0.5273126015719424, "learning_rate": 1.5412349761347937e-05, "loss": 0.7681, "step": 15120 }, { "epoch": 0.32499892597843366, "grad_norm": 0.5347341788619094, "learning_rate": 1.5406616696976957e-05, "loss": 0.7616, "step": 15130 }, { "epoch": 0.32521373029170425, "grad_norm": 0.5239352669850086, "learning_rate": 1.540088112043274e-05, "loss": 0.7626, "step": 15140 }, { "epoch": 0.3254285346049749, "grad_norm": 0.5303037869360442, "learning_rate": 1.53951430343803e-05, "loss": 0.7695, "step": 15150 }, { "epoch": 0.3256433389182455, "grad_norm": 0.5396447269485543, "learning_rate": 1.5389402441485835e-05, "loss": 0.7534, "step": 15160 }, { "epoch": 0.3258581432315161, "grad_norm": 0.5288994376628046, "learning_rate": 1.5383659344416696e-05, "loss": 0.7661, "step": 15170 }, { "epoch": 0.3260729475447867, "grad_norm": 0.5366154432296876, "learning_rate": 1.5377913745841404e-05, "loss": 0.7796, "step": 15180 }, { "epoch": 0.3262877518580573, "grad_norm": 0.5221692922283262, "learning_rate": 1.5372165648429633e-05, "loss": 0.7726, "step": 15190 }, { "epoch": 0.3265025561713279, "grad_norm": 0.5422324565519508, "learning_rate": 1.536641505485223e-05, "loss": 0.7633, "step": 15200 }, { "epoch": 0.32671736048459854, "grad_norm": 0.5408071342677044, "learning_rate": 1.5360661967781194e-05, "loss": 0.7689, "step": 15210 }, { "epoch": 0.32693216479786913, "grad_norm": 0.5454745974106339, "learning_rate": 1.5354906389889686e-05, "loss": 0.7741, "step": 15220 }, { "epoch": 0.3271469691111398, "grad_norm": 0.5218591885366504, "learning_rate": 1.534914832385203e-05, "loss": 0.7909, "step": 15230 }, { "epoch": 0.32736177342441036, "grad_norm": 0.5519162115511386, "learning_rate": 1.5343387772343687e-05, "loss": 0.7809, "step": 15240 }, { "epoch": 0.32757657773768095, "grad_norm": 0.5327394461244905, "learning_rate": 1.533762473804129e-05, "loss": 0.773, "step": 15250 }, { "epoch": 0.3277913820509516, "grad_norm": 0.5584463854444557, "learning_rate": 1.5331859223622623e-05, "loss": 0.7735, "step": 15260 }, { "epoch": 0.3280061863642222, "grad_norm": 0.5390984045147804, "learning_rate": 1.532609123176662e-05, "loss": 0.7757, "step": 15270 }, { "epoch": 0.32822099067749283, "grad_norm": 0.543837161370247, "learning_rate": 1.5320320765153367e-05, "loss": 0.762, "step": 15280 }, { "epoch": 0.3284357949907634, "grad_norm": 0.5352816574606879, "learning_rate": 1.53145478264641e-05, "loss": 0.7693, "step": 15290 }, { "epoch": 0.328650599304034, "grad_norm": 0.5605556044481591, "learning_rate": 1.5308772418381196e-05, "loss": 0.7623, "step": 15300 }, { "epoch": 0.32886540361730465, "grad_norm": 0.5446921802277784, "learning_rate": 1.5302994543588194e-05, "loss": 0.774, "step": 15310 }, { "epoch": 0.32908020793057524, "grad_norm": 0.5151829148535916, "learning_rate": 1.529721420476977e-05, "loss": 0.7826, "step": 15320 }, { "epoch": 0.32929501224384583, "grad_norm": 0.5223909324949548, "learning_rate": 1.5291431404611744e-05, "loss": 0.7593, "step": 15330 }, { "epoch": 0.3295098165571165, "grad_norm": 0.5267824527110152, "learning_rate": 1.5285646145801086e-05, "loss": 0.7665, "step": 15340 }, { "epoch": 0.32972462087038706, "grad_norm": 0.5466419782290777, "learning_rate": 1.5279858431025903e-05, "loss": 0.7704, "step": 15350 }, { "epoch": 0.3299394251836577, "grad_norm": 0.5557083967709183, "learning_rate": 1.527406826297544e-05, "loss": 0.7661, "step": 15360 }, { "epoch": 0.3301542294969283, "grad_norm": 0.5337371028909507, "learning_rate": 1.526827564434009e-05, "loss": 0.7604, "step": 15370 }, { "epoch": 0.3303690338101989, "grad_norm": 0.5703493813264273, "learning_rate": 1.5262480577811386e-05, "loss": 0.7672, "step": 15380 }, { "epoch": 0.33058383812346953, "grad_norm": 0.5472412709147122, "learning_rate": 1.5256683066081986e-05, "loss": 0.7709, "step": 15390 }, { "epoch": 0.3307986424367401, "grad_norm": 0.5457023754017395, "learning_rate": 1.5250883111845697e-05, "loss": 0.787, "step": 15400 }, { "epoch": 0.33101344675001076, "grad_norm": 0.5416955760823877, "learning_rate": 1.5245080717797454e-05, "loss": 0.759, "step": 15410 }, { "epoch": 0.33122825106328135, "grad_norm": 0.5138493444141494, "learning_rate": 1.5239275886633326e-05, "loss": 0.7735, "step": 15420 }, { "epoch": 0.33144305537655194, "grad_norm": 0.5211626703386022, "learning_rate": 1.5233468621050518e-05, "loss": 0.7784, "step": 15430 }, { "epoch": 0.3316578596898226, "grad_norm": 0.5275918206293356, "learning_rate": 1.5227658923747364e-05, "loss": 0.7833, "step": 15440 }, { "epoch": 0.3318726640030932, "grad_norm": 0.6206866088264538, "learning_rate": 1.5221846797423325e-05, "loss": 0.7708, "step": 15450 }, { "epoch": 0.33208746831636377, "grad_norm": 0.5261380809398264, "learning_rate": 1.5216032244778993e-05, "loss": 0.7782, "step": 15460 }, { "epoch": 0.3323022726296344, "grad_norm": 0.541782059211599, "learning_rate": 1.5210215268516093e-05, "loss": 0.7608, "step": 15470 }, { "epoch": 0.332517076942905, "grad_norm": 0.5240473732673129, "learning_rate": 1.520439587133747e-05, "loss": 0.7726, "step": 15480 }, { "epoch": 0.33273188125617564, "grad_norm": 0.5191076944218657, "learning_rate": 1.5198574055947091e-05, "loss": 0.7642, "step": 15490 }, { "epoch": 0.33294668556944623, "grad_norm": 0.539873653252131, "learning_rate": 1.5192749825050052e-05, "loss": 0.7636, "step": 15500 }, { "epoch": 0.3331614898827168, "grad_norm": 0.531660226924968, "learning_rate": 1.518692318135257e-05, "loss": 0.7796, "step": 15510 }, { "epoch": 0.33337629419598747, "grad_norm": 0.5173762361506008, "learning_rate": 1.5181094127561982e-05, "loss": 0.7615, "step": 15520 }, { "epoch": 0.33359109850925805, "grad_norm": 0.5114268089145959, "learning_rate": 1.5175262666386745e-05, "loss": 0.7559, "step": 15530 }, { "epoch": 0.3338059028225287, "grad_norm": 0.5271733156368176, "learning_rate": 1.5169428800536441e-05, "loss": 0.7711, "step": 15540 }, { "epoch": 0.3340207071357993, "grad_norm": 0.5135401558436904, "learning_rate": 1.5163592532721763e-05, "loss": 0.7776, "step": 15550 }, { "epoch": 0.3342355114490699, "grad_norm": 0.53726076093529, "learning_rate": 1.515775386565451e-05, "loss": 0.7594, "step": 15560 }, { "epoch": 0.3344503157623405, "grad_norm": 0.5184174928971732, "learning_rate": 1.515191280204762e-05, "loss": 0.7774, "step": 15570 }, { "epoch": 0.3346651200756111, "grad_norm": 0.5225627226289906, "learning_rate": 1.514606934461512e-05, "loss": 0.7653, "step": 15580 }, { "epoch": 0.33487992438888176, "grad_norm": 0.521185908731502, "learning_rate": 1.5140223496072168e-05, "loss": 0.7702, "step": 15590 }, { "epoch": 0.33509472870215234, "grad_norm": 0.5605456313685063, "learning_rate": 1.5134375259135024e-05, "loss": 0.7725, "step": 15600 }, { "epoch": 0.33530953301542293, "grad_norm": 0.5504337164442501, "learning_rate": 1.5128524636521058e-05, "loss": 0.7502, "step": 15610 }, { "epoch": 0.3355243373286936, "grad_norm": 0.5304219453129593, "learning_rate": 1.5122671630948748e-05, "loss": 0.7731, "step": 15620 }, { "epoch": 0.33573914164196417, "grad_norm": 0.5405431462257487, "learning_rate": 1.5116816245137684e-05, "loss": 0.7787, "step": 15630 }, { "epoch": 0.33595394595523476, "grad_norm": 0.540626596116053, "learning_rate": 1.5110958481808558e-05, "loss": 0.7707, "step": 15640 }, { "epoch": 0.3361687502685054, "grad_norm": 0.5107167602298724, "learning_rate": 1.5105098343683166e-05, "loss": 0.7853, "step": 15650 }, { "epoch": 0.336383554581776, "grad_norm": 0.5466449645523599, "learning_rate": 1.5099235833484411e-05, "loss": 0.7756, "step": 15660 }, { "epoch": 0.33659835889504663, "grad_norm": 0.5123157159149067, "learning_rate": 1.5093370953936298e-05, "loss": 0.7635, "step": 15670 }, { "epoch": 0.3368131632083172, "grad_norm": 0.5561787006388946, "learning_rate": 1.5087503707763925e-05, "loss": 0.7777, "step": 15680 }, { "epoch": 0.3370279675215878, "grad_norm": 0.5423194514303097, "learning_rate": 1.5081634097693498e-05, "loss": 0.7709, "step": 15690 }, { "epoch": 0.33724277183485846, "grad_norm": 0.5131134543368485, "learning_rate": 1.5075762126452324e-05, "loss": 0.7536, "step": 15700 }, { "epoch": 0.33745757614812905, "grad_norm": 0.5263701017681474, "learning_rate": 1.5069887796768798e-05, "loss": 0.7646, "step": 15710 }, { "epoch": 0.3376723804613997, "grad_norm": 0.5335909937404967, "learning_rate": 1.5064011111372417e-05, "loss": 0.774, "step": 15720 }, { "epoch": 0.3378871847746703, "grad_norm": 0.5205555068375117, "learning_rate": 1.5058132072993767e-05, "loss": 0.7729, "step": 15730 }, { "epoch": 0.33810198908794087, "grad_norm": 0.529162741413939, "learning_rate": 1.5052250684364535e-05, "loss": 0.7823, "step": 15740 }, { "epoch": 0.3383167934012115, "grad_norm": 0.5335725684320926, "learning_rate": 1.5046366948217495e-05, "loss": 0.7881, "step": 15750 }, { "epoch": 0.3385315977144821, "grad_norm": 0.5422608148576844, "learning_rate": 1.5040480867286511e-05, "loss": 0.7723, "step": 15760 }, { "epoch": 0.3387464020277527, "grad_norm": 0.525108601464528, "learning_rate": 1.5034592444306541e-05, "loss": 0.7661, "step": 15770 }, { "epoch": 0.33896120634102334, "grad_norm": 0.5212632664656071, "learning_rate": 1.5028701682013626e-05, "loss": 0.7722, "step": 15780 }, { "epoch": 0.3391760106542939, "grad_norm": 0.523147333913358, "learning_rate": 1.50228085831449e-05, "loss": 0.766, "step": 15790 }, { "epoch": 0.33939081496756457, "grad_norm": 0.5155900387298158, "learning_rate": 1.5016913150438575e-05, "loss": 0.7544, "step": 15800 }, { "epoch": 0.33960561928083516, "grad_norm": 2.57094670571956, "learning_rate": 1.5011015386633954e-05, "loss": 0.7738, "step": 15810 }, { "epoch": 0.33982042359410575, "grad_norm": 0.5163335640756341, "learning_rate": 1.5005115294471422e-05, "loss": 0.755, "step": 15820 }, { "epoch": 0.3400352279073764, "grad_norm": 0.5597200416389281, "learning_rate": 1.499921287669244e-05, "loss": 0.7905, "step": 15830 }, { "epoch": 0.340250032220647, "grad_norm": 0.5137889894949109, "learning_rate": 1.4993308136039557e-05, "loss": 0.7632, "step": 15840 }, { "epoch": 0.3404648365339176, "grad_norm": 0.5245555303836951, "learning_rate": 1.4987401075256398e-05, "loss": 0.7573, "step": 15850 }, { "epoch": 0.3406796408471882, "grad_norm": 0.5466373460073314, "learning_rate": 1.498149169708767e-05, "loss": 0.7738, "step": 15860 }, { "epoch": 0.3408944451604588, "grad_norm": 0.5229866422016913, "learning_rate": 1.4975580004279149e-05, "loss": 0.7583, "step": 15870 }, { "epoch": 0.34110924947372945, "grad_norm": 0.5114015357336622, "learning_rate": 1.4969665999577693e-05, "loss": 0.7522, "step": 15880 }, { "epoch": 0.34132405378700004, "grad_norm": 0.5125004960033485, "learning_rate": 1.4963749685731231e-05, "loss": 0.7739, "step": 15890 }, { "epoch": 0.3415388581002706, "grad_norm": 0.5365066975799684, "learning_rate": 1.4957831065488763e-05, "loss": 0.7767, "step": 15900 }, { "epoch": 0.34175366241354127, "grad_norm": 0.5224206864785478, "learning_rate": 1.495191014160037e-05, "loss": 0.7736, "step": 15910 }, { "epoch": 0.34196846672681186, "grad_norm": 0.5138735687941413, "learning_rate": 1.4945986916817194e-05, "loss": 0.7558, "step": 15920 }, { "epoch": 0.3421832710400825, "grad_norm": 0.5156382777636157, "learning_rate": 1.494006139389145e-05, "loss": 0.755, "step": 15930 }, { "epoch": 0.3423980753533531, "grad_norm": 0.5173997349464476, "learning_rate": 1.4934133575576418e-05, "loss": 0.7644, "step": 15940 }, { "epoch": 0.3426128796666237, "grad_norm": 0.5773347686325697, "learning_rate": 1.4928203464626446e-05, "loss": 0.7803, "step": 15950 }, { "epoch": 0.3428276839798943, "grad_norm": 0.5238727000577228, "learning_rate": 1.4922271063796946e-05, "loss": 0.7688, "step": 15960 }, { "epoch": 0.3430424882931649, "grad_norm": 0.49544133793048006, "learning_rate": 1.4916336375844402e-05, "loss": 0.765, "step": 15970 }, { "epoch": 0.34325729260643556, "grad_norm": 0.5559128491079445, "learning_rate": 1.4910399403526355e-05, "loss": 0.7643, "step": 15980 }, { "epoch": 0.34347209691970615, "grad_norm": 0.5224474824510437, "learning_rate": 1.4904460149601399e-05, "loss": 0.7605, "step": 15990 }, { "epoch": 0.34368690123297674, "grad_norm": 0.5084160847363332, "learning_rate": 1.48985186168292e-05, "loss": 0.7525, "step": 16000 }, { "epoch": 0.3439017055462474, "grad_norm": 0.5178973898547171, "learning_rate": 1.489257480797048e-05, "loss": 0.7623, "step": 16010 }, { "epoch": 0.34411650985951797, "grad_norm": 0.5480129624627523, "learning_rate": 1.4886628725787017e-05, "loss": 0.7831, "step": 16020 }, { "epoch": 0.3443313141727886, "grad_norm": 0.524153874187991, "learning_rate": 1.4880680373041646e-05, "loss": 0.7659, "step": 16030 }, { "epoch": 0.3445461184860592, "grad_norm": 0.5488165779868663, "learning_rate": 1.4874729752498256e-05, "loss": 0.7508, "step": 16040 }, { "epoch": 0.3447609227993298, "grad_norm": 0.6146964896934437, "learning_rate": 1.4868776866921792e-05, "loss": 0.7708, "step": 16050 }, { "epoch": 0.34497572711260044, "grad_norm": 0.5367282750533525, "learning_rate": 1.4862821719078246e-05, "loss": 0.7647, "step": 16060 }, { "epoch": 0.34519053142587103, "grad_norm": 0.5370082911271288, "learning_rate": 1.4856864311734667e-05, "loss": 0.7716, "step": 16070 }, { "epoch": 0.3454053357391416, "grad_norm": 0.5210318070953477, "learning_rate": 1.4850904647659155e-05, "loss": 0.7562, "step": 16080 }, { "epoch": 0.34562014005241226, "grad_norm": 0.5426172156914827, "learning_rate": 1.4844942729620853e-05, "loss": 0.7517, "step": 16090 }, { "epoch": 0.34583494436568285, "grad_norm": 0.5353756178515557, "learning_rate": 1.4838978560389952e-05, "loss": 0.777, "step": 16100 }, { "epoch": 0.3460497486789535, "grad_norm": 0.5458690566171005, "learning_rate": 1.4833012142737696e-05, "loss": 0.7596, "step": 16110 }, { "epoch": 0.3462645529922241, "grad_norm": 0.5222605704575792, "learning_rate": 1.4827043479436362e-05, "loss": 0.7641, "step": 16120 }, { "epoch": 0.3464793573054947, "grad_norm": 0.517515304030216, "learning_rate": 1.4821072573259283e-05, "loss": 0.7735, "step": 16130 }, { "epoch": 0.3466941616187653, "grad_norm": 0.5619458732612833, "learning_rate": 1.4815099426980825e-05, "loss": 0.7646, "step": 16140 }, { "epoch": 0.3469089659320359, "grad_norm": 0.5308436572477332, "learning_rate": 1.4809124043376399e-05, "loss": 0.7767, "step": 16150 }, { "epoch": 0.34712377024530655, "grad_norm": 0.5135551609727562, "learning_rate": 1.4803146425222457e-05, "loss": 0.7611, "step": 16160 }, { "epoch": 0.34733857455857714, "grad_norm": 0.5330663686858607, "learning_rate": 1.479716657529648e-05, "loss": 0.7702, "step": 16170 }, { "epoch": 0.34755337887184773, "grad_norm": 0.5359601127061994, "learning_rate": 1.4791184496377e-05, "loss": 0.776, "step": 16180 }, { "epoch": 0.3477681831851184, "grad_norm": 0.5140711185809557, "learning_rate": 1.4785200191243574e-05, "loss": 0.7595, "step": 16190 }, { "epoch": 0.34798298749838896, "grad_norm": 0.5406658844636193, "learning_rate": 1.4779213662676797e-05, "loss": 0.7729, "step": 16200 }, { "epoch": 0.34819779181165955, "grad_norm": 0.5104834415117768, "learning_rate": 1.4773224913458298e-05, "loss": 0.7811, "step": 16210 }, { "epoch": 0.3484125961249302, "grad_norm": 0.5595639311867889, "learning_rate": 1.4767233946370735e-05, "loss": 0.7691, "step": 16220 }, { "epoch": 0.3486274004382008, "grad_norm": 0.5453224484229146, "learning_rate": 1.4761240764197804e-05, "loss": 0.7711, "step": 16230 }, { "epoch": 0.34884220475147143, "grad_norm": 0.5533371724805077, "learning_rate": 1.4755245369724219e-05, "loss": 0.7647, "step": 16240 }, { "epoch": 0.349057009064742, "grad_norm": 0.5029137026412714, "learning_rate": 1.4749247765735727e-05, "loss": 0.7625, "step": 16250 }, { "epoch": 0.3492718133780126, "grad_norm": 0.5407424760271811, "learning_rate": 1.474324795501911e-05, "loss": 0.7777, "step": 16260 }, { "epoch": 0.34948661769128325, "grad_norm": 0.5360642655145464, "learning_rate": 1.4737245940362158e-05, "loss": 0.7644, "step": 16270 }, { "epoch": 0.34970142200455384, "grad_norm": 0.5431989168889423, "learning_rate": 1.47312417245537e-05, "loss": 0.7725, "step": 16280 }, { "epoch": 0.3499162263178245, "grad_norm": 0.5212803701855959, "learning_rate": 1.472523531038358e-05, "loss": 0.7503, "step": 16290 }, { "epoch": 0.3501310306310951, "grad_norm": 0.5375323551611733, "learning_rate": 1.471922670064267e-05, "loss": 0.7599, "step": 16300 }, { "epoch": 0.35034583494436566, "grad_norm": 0.5239870812277655, "learning_rate": 1.4713215898122857e-05, "loss": 0.7863, "step": 16310 }, { "epoch": 0.3505606392576363, "grad_norm": 0.5180937115766086, "learning_rate": 1.4707202905617042e-05, "loss": 0.7689, "step": 16320 }, { "epoch": 0.3507754435709069, "grad_norm": 0.5417292224014176, "learning_rate": 1.4701187725919157e-05, "loss": 0.7527, "step": 16330 }, { "epoch": 0.35099024788417754, "grad_norm": 0.5357164040418824, "learning_rate": 1.4695170361824136e-05, "loss": 0.7642, "step": 16340 }, { "epoch": 0.35120505219744813, "grad_norm": 0.5303332337613179, "learning_rate": 1.468915081612794e-05, "loss": 0.7566, "step": 16350 }, { "epoch": 0.3514198565107187, "grad_norm": 0.5382188260863363, "learning_rate": 1.468312909162754e-05, "loss": 0.7672, "step": 16360 }, { "epoch": 0.35163466082398936, "grad_norm": 0.5385128353648729, "learning_rate": 1.467710519112091e-05, "loss": 0.7809, "step": 16370 }, { "epoch": 0.35184946513725995, "grad_norm": 0.5310058917606305, "learning_rate": 1.467107911740705e-05, "loss": 0.7654, "step": 16380 }, { "epoch": 0.35206426945053054, "grad_norm": 0.5031221816084535, "learning_rate": 1.4665050873285957e-05, "loss": 0.7507, "step": 16390 }, { "epoch": 0.3522790737638012, "grad_norm": 0.54140785390428, "learning_rate": 1.4659020461558649e-05, "loss": 0.7545, "step": 16400 }, { "epoch": 0.3524938780770718, "grad_norm": 0.5370610309213006, "learning_rate": 1.4652987885027142e-05, "loss": 0.7658, "step": 16410 }, { "epoch": 0.3527086823903424, "grad_norm": 0.5490842521616487, "learning_rate": 1.4646953146494454e-05, "loss": 0.7606, "step": 16420 }, { "epoch": 0.352923486703613, "grad_norm": 0.5131178943599031, "learning_rate": 1.4640916248764621e-05, "loss": 0.7738, "step": 16430 }, { "epoch": 0.3531382910168836, "grad_norm": 0.5328970310455506, "learning_rate": 1.4634877194642672e-05, "loss": 0.7774, "step": 16440 }, { "epoch": 0.35335309533015424, "grad_norm": 0.5137803015241591, "learning_rate": 1.462883598693464e-05, "loss": 0.7728, "step": 16450 }, { "epoch": 0.35356789964342483, "grad_norm": 0.5199243192373794, "learning_rate": 1.4622792628447562e-05, "loss": 0.77, "step": 16460 }, { "epoch": 0.3537827039566955, "grad_norm": 0.5425545285150617, "learning_rate": 1.4616747121989474e-05, "loss": 0.777, "step": 16470 }, { "epoch": 0.35399750826996607, "grad_norm": 0.5395199010657613, "learning_rate": 1.4610699470369401e-05, "loss": 0.7688, "step": 16480 }, { "epoch": 0.35421231258323665, "grad_norm": 0.5475850285247497, "learning_rate": 1.4604649676397377e-05, "loss": 0.769, "step": 16490 }, { "epoch": 0.3544271168965073, "grad_norm": 0.5170429327520523, "learning_rate": 1.4598597742884429e-05, "loss": 0.7693, "step": 16500 }, { "epoch": 0.3546419212097779, "grad_norm": 0.5402757575811128, "learning_rate": 1.4592543672642567e-05, "loss": 0.7663, "step": 16510 }, { "epoch": 0.3548567255230485, "grad_norm": 0.536139194934942, "learning_rate": 1.4586487468484809e-05, "loss": 0.777, "step": 16520 }, { "epoch": 0.3550715298363191, "grad_norm": 0.5276298531695289, "learning_rate": 1.4580429133225153e-05, "loss": 0.7669, "step": 16530 }, { "epoch": 0.3552863341495897, "grad_norm": 0.5245685397779214, "learning_rate": 1.4574368669678598e-05, "loss": 0.7905, "step": 16540 }, { "epoch": 0.35550113846286036, "grad_norm": 0.5281075002913366, "learning_rate": 1.4568306080661118e-05, "loss": 0.778, "step": 16550 }, { "epoch": 0.35571594277613094, "grad_norm": 0.5113618186605529, "learning_rate": 1.4562241368989691e-05, "loss": 0.7669, "step": 16560 }, { "epoch": 0.35593074708940153, "grad_norm": 0.506708948383745, "learning_rate": 1.4556174537482267e-05, "loss": 0.7476, "step": 16570 }, { "epoch": 0.3561455514026722, "grad_norm": 0.5508043321880731, "learning_rate": 1.4550105588957789e-05, "loss": 0.7609, "step": 16580 }, { "epoch": 0.35636035571594277, "grad_norm": 1.4314950449809294, "learning_rate": 1.4544034526236174e-05, "loss": 0.7623, "step": 16590 }, { "epoch": 0.3565751600292134, "grad_norm": 0.5244372331321259, "learning_rate": 1.453796135213834e-05, "loss": 0.7669, "step": 16600 }, { "epoch": 0.356789964342484, "grad_norm": 0.5224396045025858, "learning_rate": 1.4531886069486169e-05, "loss": 0.7605, "step": 16610 }, { "epoch": 0.3570047686557546, "grad_norm": 0.5070138150623654, "learning_rate": 1.4525808681102533e-05, "loss": 0.7668, "step": 16620 }, { "epoch": 0.35721957296902523, "grad_norm": 0.5288742137487699, "learning_rate": 1.4519729189811271e-05, "loss": 0.7554, "step": 16630 }, { "epoch": 0.3574343772822958, "grad_norm": 0.5677338154821888, "learning_rate": 1.4513647598437208e-05, "loss": 0.7609, "step": 16640 }, { "epoch": 0.3576491815955664, "grad_norm": 0.5357651788636495, "learning_rate": 1.4507563909806146e-05, "loss": 0.7742, "step": 16650 }, { "epoch": 0.35786398590883706, "grad_norm": 0.5179083963651603, "learning_rate": 1.4501478126744855e-05, "loss": 0.7726, "step": 16660 }, { "epoch": 0.35807879022210765, "grad_norm": 0.5295056672371503, "learning_rate": 1.4495390252081082e-05, "loss": 0.7458, "step": 16670 }, { "epoch": 0.3582935945353783, "grad_norm": 0.5214891906282768, "learning_rate": 1.448930028864355e-05, "loss": 0.7619, "step": 16680 }, { "epoch": 0.3585083988486489, "grad_norm": 0.5578244478041603, "learning_rate": 1.4483208239261943e-05, "loss": 0.7703, "step": 16690 }, { "epoch": 0.35872320316191947, "grad_norm": 0.5376124885496097, "learning_rate": 1.4477114106766921e-05, "loss": 0.7546, "step": 16700 }, { "epoch": 0.3589380074751901, "grad_norm": 0.5177377132348334, "learning_rate": 1.4471017893990107e-05, "loss": 0.7662, "step": 16710 }, { "epoch": 0.3591528117884607, "grad_norm": 0.5134062584490465, "learning_rate": 1.4464919603764097e-05, "loss": 0.7667, "step": 16720 }, { "epoch": 0.35936761610173135, "grad_norm": 0.5054504951310723, "learning_rate": 1.4458819238922446e-05, "loss": 0.7467, "step": 16730 }, { "epoch": 0.35958242041500194, "grad_norm": 0.5344939536393053, "learning_rate": 1.445271680229968e-05, "loss": 0.7705, "step": 16740 }, { "epoch": 0.3597972247282725, "grad_norm": 0.5470998293542991, "learning_rate": 1.4446612296731282e-05, "loss": 0.7564, "step": 16750 }, { "epoch": 0.36001202904154317, "grad_norm": 0.5743146214103152, "learning_rate": 1.4440505725053693e-05, "loss": 0.7623, "step": 16760 }, { "epoch": 0.36022683335481376, "grad_norm": 0.5333319615482879, "learning_rate": 1.4434397090104324e-05, "loss": 0.7714, "step": 16770 }, { "epoch": 0.3604416376680844, "grad_norm": 0.5202228507285751, "learning_rate": 1.442828639472154e-05, "loss": 0.7515, "step": 16780 }, { "epoch": 0.360656441981355, "grad_norm": 0.5277143639598636, "learning_rate": 1.4422173641744663e-05, "loss": 0.7633, "step": 16790 }, { "epoch": 0.3608712462946256, "grad_norm": 0.5468528449571223, "learning_rate": 1.4416058834013967e-05, "loss": 0.7629, "step": 16800 }, { "epoch": 0.3610860506078962, "grad_norm": 0.5389600692166813, "learning_rate": 1.440994197437069e-05, "loss": 0.7681, "step": 16810 }, { "epoch": 0.3613008549211668, "grad_norm": 0.5335455178234667, "learning_rate": 1.4403823065657012e-05, "loss": 0.7607, "step": 16820 }, { "epoch": 0.3615156592344374, "grad_norm": 0.5400771216289987, "learning_rate": 1.4397702110716076e-05, "loss": 0.7659, "step": 16830 }, { "epoch": 0.36173046354770805, "grad_norm": 0.5602481821229619, "learning_rate": 1.4391579112391969e-05, "loss": 0.7765, "step": 16840 }, { "epoch": 0.36194526786097864, "grad_norm": 0.5272902803012298, "learning_rate": 1.4385454073529731e-05, "loss": 0.7649, "step": 16850 }, { "epoch": 0.3621600721742493, "grad_norm": 0.5187058176818714, "learning_rate": 1.4379326996975347e-05, "loss": 0.7648, "step": 16860 }, { "epoch": 0.36237487648751987, "grad_norm": 0.5331452093710046, "learning_rate": 1.4373197885575752e-05, "loss": 0.7713, "step": 16870 }, { "epoch": 0.36258968080079046, "grad_norm": 0.5302443268053321, "learning_rate": 1.4367066742178824e-05, "loss": 0.7568, "step": 16880 }, { "epoch": 0.3628044851140611, "grad_norm": 0.5114903800320675, "learning_rate": 1.4360933569633386e-05, "loss": 0.7532, "step": 16890 }, { "epoch": 0.3630192894273317, "grad_norm": 0.5240676752585789, "learning_rate": 1.4354798370789204e-05, "loss": 0.7726, "step": 16900 }, { "epoch": 0.36323409374060234, "grad_norm": 0.5477577408032249, "learning_rate": 1.4348661148496985e-05, "loss": 0.773, "step": 16910 }, { "epoch": 0.3634488980538729, "grad_norm": 0.5276323223923417, "learning_rate": 1.4342521905608377e-05, "loss": 0.7604, "step": 16920 }, { "epoch": 0.3636637023671435, "grad_norm": 0.5170851911406245, "learning_rate": 1.4336380644975964e-05, "loss": 0.7491, "step": 16930 }, { "epoch": 0.36387850668041416, "grad_norm": 0.5356549892783816, "learning_rate": 1.433023736945328e-05, "loss": 0.7657, "step": 16940 }, { "epoch": 0.36409331099368475, "grad_norm": 0.5428984566871172, "learning_rate": 1.4324092081894771e-05, "loss": 0.7661, "step": 16950 }, { "epoch": 0.36430811530695534, "grad_norm": 0.5324713712005715, "learning_rate": 1.4317944785155841e-05, "loss": 0.7628, "step": 16960 }, { "epoch": 0.364522919620226, "grad_norm": 0.5162388083041777, "learning_rate": 1.4311795482092819e-05, "loss": 0.7667, "step": 16970 }, { "epoch": 0.36473772393349657, "grad_norm": 0.5143878924802234, "learning_rate": 1.430564417556296e-05, "loss": 0.7567, "step": 16980 }, { "epoch": 0.3649525282467672, "grad_norm": 0.5152886688089272, "learning_rate": 1.429949086842446e-05, "loss": 0.7579, "step": 16990 }, { "epoch": 0.3651673325600378, "grad_norm": 0.5232658905035079, "learning_rate": 1.4293335563536444e-05, "loss": 0.773, "step": 17000 }, { "epoch": 0.3653821368733084, "grad_norm": 0.5351086541332937, "learning_rate": 1.4287178263758954e-05, "loss": 0.7743, "step": 17010 }, { "epoch": 0.36559694118657904, "grad_norm": 0.534471003893465, "learning_rate": 1.4281018971952968e-05, "loss": 0.7766, "step": 17020 }, { "epoch": 0.3658117454998496, "grad_norm": 0.5285689768103836, "learning_rate": 1.4274857690980393e-05, "loss": 0.7609, "step": 17030 }, { "epoch": 0.36602654981312027, "grad_norm": 0.5247525596746352, "learning_rate": 1.426869442370405e-05, "loss": 0.7584, "step": 17040 }, { "epoch": 0.36624135412639086, "grad_norm": 0.5343814259733137, "learning_rate": 1.4262529172987694e-05, "loss": 0.7715, "step": 17050 }, { "epoch": 0.36645615843966145, "grad_norm": 1.5564508451969874, "learning_rate": 1.4256361941695994e-05, "loss": 0.7641, "step": 17060 }, { "epoch": 0.3666709627529321, "grad_norm": 0.5184034090359657, "learning_rate": 1.4250192732694539e-05, "loss": 0.7544, "step": 17070 }, { "epoch": 0.3668857670662027, "grad_norm": 0.9518622956465803, "learning_rate": 1.4244021548849838e-05, "loss": 0.7681, "step": 17080 }, { "epoch": 0.3671005713794733, "grad_norm": 0.5468495559267383, "learning_rate": 1.4237848393029321e-05, "loss": 0.761, "step": 17090 }, { "epoch": 0.3673153756927439, "grad_norm": 0.5213501404226146, "learning_rate": 1.4231673268101334e-05, "loss": 0.7667, "step": 17100 }, { "epoch": 0.3675301800060145, "grad_norm": 0.5088259296767228, "learning_rate": 1.4225496176935135e-05, "loss": 0.7781, "step": 17110 }, { "epoch": 0.36774498431928515, "grad_norm": 0.5236538699356668, "learning_rate": 1.4219317122400895e-05, "loss": 0.771, "step": 17120 }, { "epoch": 0.36795978863255574, "grad_norm": 0.5211467299731635, "learning_rate": 1.4213136107369701e-05, "loss": 0.7793, "step": 17130 }, { "epoch": 0.36817459294582633, "grad_norm": 0.5472140162284628, "learning_rate": 1.4206953134713546e-05, "loss": 0.7692, "step": 17140 }, { "epoch": 0.368389397259097, "grad_norm": 0.5189807730044983, "learning_rate": 1.4200768207305338e-05, "loss": 0.7814, "step": 17150 }, { "epoch": 0.36860420157236756, "grad_norm": 0.5355282252957151, "learning_rate": 1.4194581328018887e-05, "loss": 0.7701, "step": 17160 }, { "epoch": 0.3688190058856382, "grad_norm": 0.5178079624135835, "learning_rate": 1.418839249972892e-05, "loss": 0.7692, "step": 17170 }, { "epoch": 0.3690338101989088, "grad_norm": 0.5294877879343505, "learning_rate": 1.4182201725311056e-05, "loss": 0.7493, "step": 17180 }, { "epoch": 0.3692486145121794, "grad_norm": 0.506889416207276, "learning_rate": 1.417600900764183e-05, "loss": 0.7575, "step": 17190 }, { "epoch": 0.36946341882545003, "grad_norm": 0.5285238726260074, "learning_rate": 1.4169814349598676e-05, "loss": 0.761, "step": 17200 }, { "epoch": 0.3696782231387206, "grad_norm": 0.5263966505400081, "learning_rate": 1.4163617754059927e-05, "loss": 0.7574, "step": 17210 }, { "epoch": 0.36989302745199126, "grad_norm": 0.5444318728567634, "learning_rate": 1.4157419223904816e-05, "loss": 0.7836, "step": 17220 }, { "epoch": 0.37010783176526185, "grad_norm": 0.504161618679252, "learning_rate": 1.4151218762013486e-05, "loss": 0.7667, "step": 17230 }, { "epoch": 0.37032263607853244, "grad_norm": 0.5367134980237321, "learning_rate": 1.4145016371266958e-05, "loss": 0.7658, "step": 17240 }, { "epoch": 0.3705374403918031, "grad_norm": 0.5437915470695406, "learning_rate": 1.4138812054547164e-05, "loss": 0.7642, "step": 17250 }, { "epoch": 0.3707522447050737, "grad_norm": 0.5250865582583174, "learning_rate": 1.413260581473693e-05, "loss": 0.7629, "step": 17260 }, { "epoch": 0.37096704901834426, "grad_norm": 0.5219131992470888, "learning_rate": 1.412639765471997e-05, "loss": 0.7492, "step": 17270 }, { "epoch": 0.3711818533316149, "grad_norm": 0.5434175810188079, "learning_rate": 1.4120187577380893e-05, "loss": 0.7721, "step": 17280 }, { "epoch": 0.3713966576448855, "grad_norm": 0.5297492710247524, "learning_rate": 1.4113975585605197e-05, "loss": 0.7627, "step": 17290 }, { "epoch": 0.37161146195815614, "grad_norm": 0.5172129186389717, "learning_rate": 1.4107761682279273e-05, "loss": 0.7824, "step": 17300 }, { "epoch": 0.37182626627142673, "grad_norm": 0.5530724301179245, "learning_rate": 1.4101545870290397e-05, "loss": 0.7623, "step": 17310 }, { "epoch": 0.3720410705846973, "grad_norm": 0.5228165330027325, "learning_rate": 1.4095328152526734e-05, "loss": 0.7603, "step": 17320 }, { "epoch": 0.37225587489796796, "grad_norm": 0.5134940227470199, "learning_rate": 1.4089108531877336e-05, "loss": 0.7396, "step": 17330 }, { "epoch": 0.37247067921123855, "grad_norm": 0.5259963794438499, "learning_rate": 1.4082887011232131e-05, "loss": 0.7694, "step": 17340 }, { "epoch": 0.3726854835245092, "grad_norm": 0.5189340620745447, "learning_rate": 1.4076663593481943e-05, "loss": 0.7759, "step": 17350 }, { "epoch": 0.3729002878377798, "grad_norm": 0.5035877483790274, "learning_rate": 1.4070438281518462e-05, "loss": 0.7516, "step": 17360 }, { "epoch": 0.3731150921510504, "grad_norm": 0.5274597389686024, "learning_rate": 1.406421107823427e-05, "loss": 0.7705, "step": 17370 }, { "epoch": 0.373329896464321, "grad_norm": 0.5817576244957883, "learning_rate": 1.405798198652283e-05, "loss": 0.7664, "step": 17380 }, { "epoch": 0.3735447007775916, "grad_norm": 0.5517499638677766, "learning_rate": 1.405175100927847e-05, "loss": 0.775, "step": 17390 }, { "epoch": 0.3737595050908622, "grad_norm": 0.5527733297913258, "learning_rate": 1.40455181493964e-05, "loss": 0.7621, "step": 17400 }, { "epoch": 0.37397430940413284, "grad_norm": 0.5363364768618227, "learning_rate": 1.4039283409772706e-05, "loss": 0.755, "step": 17410 }, { "epoch": 0.37418911371740343, "grad_norm": 0.5375291344235176, "learning_rate": 1.4033046793304348e-05, "loss": 0.7594, "step": 17420 }, { "epoch": 0.3744039180306741, "grad_norm": 0.5402648263612099, "learning_rate": 1.402680830288916e-05, "loss": 0.7605, "step": 17430 }, { "epoch": 0.37461872234394467, "grad_norm": 0.538129594884827, "learning_rate": 1.4020567941425837e-05, "loss": 0.7753, "step": 17440 }, { "epoch": 0.37483352665721525, "grad_norm": 0.5261936791371908, "learning_rate": 1.4014325711813958e-05, "loss": 0.7571, "step": 17450 }, { "epoch": 0.3750483309704859, "grad_norm": 0.534378842346243, "learning_rate": 1.4008081616953954e-05, "loss": 0.7813, "step": 17460 }, { "epoch": 0.3752631352837565, "grad_norm": 0.5320321584083502, "learning_rate": 1.4001835659747137e-05, "loss": 0.7602, "step": 17470 }, { "epoch": 0.37547793959702713, "grad_norm": 0.5315203643805748, "learning_rate": 1.3995587843095675e-05, "loss": 0.7575, "step": 17480 }, { "epoch": 0.3756927439102977, "grad_norm": 0.5301936608790958, "learning_rate": 1.3989338169902604e-05, "loss": 0.7676, "step": 17490 }, { "epoch": 0.3759075482235683, "grad_norm": 0.5577865205877498, "learning_rate": 1.398308664307182e-05, "loss": 0.7614, "step": 17500 }, { "epoch": 0.37612235253683896, "grad_norm": 0.5179881999767708, "learning_rate": 1.3976833265508085e-05, "loss": 0.7601, "step": 17510 }, { "epoch": 0.37633715685010954, "grad_norm": 0.5208749334999229, "learning_rate": 1.3970578040117013e-05, "loss": 0.7525, "step": 17520 }, { "epoch": 0.37655196116338013, "grad_norm": 0.5352630545243395, "learning_rate": 1.3964320969805085e-05, "loss": 0.7647, "step": 17530 }, { "epoch": 0.3767667654766508, "grad_norm": 0.5193667340738525, "learning_rate": 1.3958062057479638e-05, "loss": 0.7527, "step": 17540 }, { "epoch": 0.37698156978992137, "grad_norm": 0.5361696072953572, "learning_rate": 1.395180130604886e-05, "loss": 0.7672, "step": 17550 }, { "epoch": 0.377196374103192, "grad_norm": 0.5518592474145112, "learning_rate": 1.3945538718421797e-05, "loss": 0.769, "step": 17560 }, { "epoch": 0.3774111784164626, "grad_norm": 0.5148045343692099, "learning_rate": 1.3939274297508343e-05, "loss": 0.7542, "step": 17570 }, { "epoch": 0.3776259827297332, "grad_norm": 0.5228184172172592, "learning_rate": 1.3933008046219256e-05, "loss": 0.7722, "step": 17580 }, { "epoch": 0.37784078704300383, "grad_norm": 0.5606699310701568, "learning_rate": 1.392673996746613e-05, "loss": 0.7608, "step": 17590 }, { "epoch": 0.3780555913562744, "grad_norm": 0.5155322885195631, "learning_rate": 1.3920470064161417e-05, "loss": 0.7648, "step": 17600 }, { "epoch": 0.37827039566954507, "grad_norm": 0.5295832405444447, "learning_rate": 1.3914198339218417e-05, "loss": 0.7529, "step": 17610 }, { "epoch": 0.37848519998281566, "grad_norm": 0.5379568307902901, "learning_rate": 1.3907924795551269e-05, "loss": 0.7638, "step": 17620 }, { "epoch": 0.37870000429608625, "grad_norm": 0.5202511460977277, "learning_rate": 1.3901649436074967e-05, "loss": 0.747, "step": 17630 }, { "epoch": 0.3789148086093569, "grad_norm": 0.5082094007910867, "learning_rate": 1.3895372263705342e-05, "loss": 0.7553, "step": 17640 }, { "epoch": 0.3791296129226275, "grad_norm": 0.5019905535533106, "learning_rate": 1.3889093281359068e-05, "loss": 0.7723, "step": 17650 }, { "epoch": 0.3793444172358981, "grad_norm": 0.5289787148328216, "learning_rate": 1.388281249195366e-05, "loss": 0.7538, "step": 17660 }, { "epoch": 0.3795592215491687, "grad_norm": 0.5138445337600223, "learning_rate": 1.3876529898407479e-05, "loss": 0.7703, "step": 17670 }, { "epoch": 0.3797740258624393, "grad_norm": 0.4950419489829235, "learning_rate": 1.3870245503639715e-05, "loss": 0.7576, "step": 17680 }, { "epoch": 0.37998883017570995, "grad_norm": 0.5254250563222473, "learning_rate": 1.3863959310570398e-05, "loss": 0.7667, "step": 17690 }, { "epoch": 0.38020363448898054, "grad_norm": 0.520664787706605, "learning_rate": 1.38576713221204e-05, "loss": 0.7635, "step": 17700 }, { "epoch": 0.3804184388022511, "grad_norm": 0.5313493460126173, "learning_rate": 1.3851381541211418e-05, "loss": 0.7751, "step": 17710 }, { "epoch": 0.38063324311552177, "grad_norm": 0.5398410905297825, "learning_rate": 1.384508997076598e-05, "loss": 0.7624, "step": 17720 }, { "epoch": 0.38084804742879236, "grad_norm": 0.5263114278035196, "learning_rate": 1.3838796613707462e-05, "loss": 0.7613, "step": 17730 }, { "epoch": 0.381062851742063, "grad_norm": 0.5283488770743783, "learning_rate": 1.3832501472960051e-05, "loss": 0.7544, "step": 17740 }, { "epoch": 0.3812776560553336, "grad_norm": 0.543513524216455, "learning_rate": 1.3826204551448777e-05, "loss": 0.7504, "step": 17750 }, { "epoch": 0.3814924603686042, "grad_norm": 0.5214420491236335, "learning_rate": 1.3819905852099492e-05, "loss": 0.7651, "step": 17760 }, { "epoch": 0.3817072646818748, "grad_norm": 0.5184770230999961, "learning_rate": 1.3813605377838866e-05, "loss": 0.752, "step": 17770 }, { "epoch": 0.3819220689951454, "grad_norm": 0.5283519182814274, "learning_rate": 1.3807303131594407e-05, "loss": 0.7547, "step": 17780 }, { "epoch": 0.38213687330841606, "grad_norm": 0.5385698106036354, "learning_rate": 1.380099911629444e-05, "loss": 0.7593, "step": 17790 }, { "epoch": 0.38235167762168665, "grad_norm": 0.5402749618995843, "learning_rate": 1.379469333486811e-05, "loss": 0.7516, "step": 17800 }, { "epoch": 0.38256648193495724, "grad_norm": 0.5247063198184665, "learning_rate": 1.378838579024539e-05, "loss": 0.7652, "step": 17810 }, { "epoch": 0.3827812862482279, "grad_norm": 0.5378877903900204, "learning_rate": 1.3782076485357062e-05, "loss": 0.7635, "step": 17820 }, { "epoch": 0.38299609056149847, "grad_norm": 0.5338589078746822, "learning_rate": 1.377576542313474e-05, "loss": 0.7582, "step": 17830 }, { "epoch": 0.38321089487476906, "grad_norm": 0.5103257254373668, "learning_rate": 1.3769452606510837e-05, "loss": 0.76, "step": 17840 }, { "epoch": 0.3834256991880397, "grad_norm": 0.5109691457275021, "learning_rate": 1.3763138038418592e-05, "loss": 0.7556, "step": 17850 }, { "epoch": 0.3836405035013103, "grad_norm": 0.5358067276980497, "learning_rate": 1.375682172179206e-05, "loss": 0.7708, "step": 17860 }, { "epoch": 0.38385530781458094, "grad_norm": 0.518350530032854, "learning_rate": 1.37505036595661e-05, "loss": 0.7784, "step": 17870 }, { "epoch": 0.3840701121278515, "grad_norm": 0.5255617991273894, "learning_rate": 1.374418385467639e-05, "loss": 0.751, "step": 17880 }, { "epoch": 0.3842849164411221, "grad_norm": 0.5491809745075493, "learning_rate": 1.3737862310059413e-05, "loss": 0.7485, "step": 17890 }, { "epoch": 0.38449972075439276, "grad_norm": 0.5437186078897788, "learning_rate": 1.373153902865246e-05, "loss": 0.7716, "step": 17900 }, { "epoch": 0.38471452506766335, "grad_norm": 0.553356354641266, "learning_rate": 1.372521401339363e-05, "loss": 0.7578, "step": 17910 }, { "epoch": 0.384929329380934, "grad_norm": 0.531549500667387, "learning_rate": 1.3718887267221835e-05, "loss": 0.759, "step": 17920 }, { "epoch": 0.3851441336942046, "grad_norm": 0.5164949440136289, "learning_rate": 1.3712558793076777e-05, "loss": 0.7706, "step": 17930 }, { "epoch": 0.38535893800747517, "grad_norm": 0.508363864448989, "learning_rate": 1.3706228593898971e-05, "loss": 0.761, "step": 17940 }, { "epoch": 0.3855737423207458, "grad_norm": 0.5645367042304027, "learning_rate": 1.369989667262973e-05, "loss": 0.7529, "step": 17950 }, { "epoch": 0.3857885466340164, "grad_norm": 0.5276474618348226, "learning_rate": 1.3693563032211173e-05, "loss": 0.7619, "step": 17960 }, { "epoch": 0.386003350947287, "grad_norm": 0.5219439134743928, "learning_rate": 1.3687227675586205e-05, "loss": 0.7602, "step": 17970 }, { "epoch": 0.38621815526055764, "grad_norm": 0.4996278152443501, "learning_rate": 1.3680890605698543e-05, "loss": 0.7644, "step": 17980 }, { "epoch": 0.3864329595738282, "grad_norm": 0.522998743172047, "learning_rate": 1.3674551825492688e-05, "loss": 0.7701, "step": 17990 }, { "epoch": 0.38664776388709887, "grad_norm": 0.5374567407341325, "learning_rate": 1.3668211337913944e-05, "loss": 0.7746, "step": 18000 }, { "epoch": 0.38686256820036946, "grad_norm": 0.5196875770806691, "learning_rate": 1.3661869145908407e-05, "loss": 0.7478, "step": 18010 }, { "epoch": 0.38707737251364005, "grad_norm": 0.5292498035951604, "learning_rate": 1.365552525242296e-05, "loss": 0.7645, "step": 18020 }, { "epoch": 0.3872921768269107, "grad_norm": 0.5248382854490489, "learning_rate": 1.3649179660405282e-05, "loss": 0.7602, "step": 18030 }, { "epoch": 0.3875069811401813, "grad_norm": 0.5313710119749431, "learning_rate": 1.364283237280384e-05, "loss": 0.7786, "step": 18040 }, { "epoch": 0.38772178545345193, "grad_norm": 0.5307887108904168, "learning_rate": 1.3636483392567884e-05, "loss": 0.7738, "step": 18050 }, { "epoch": 0.3879365897667225, "grad_norm": 0.540751829854641, "learning_rate": 1.363013272264746e-05, "loss": 0.7731, "step": 18060 }, { "epoch": 0.3881513940799931, "grad_norm": 0.5156037186616597, "learning_rate": 1.3623780365993389e-05, "loss": 0.7552, "step": 18070 }, { "epoch": 0.38836619839326375, "grad_norm": 0.5141088635988575, "learning_rate": 1.3617426325557283e-05, "loss": 0.7547, "step": 18080 }, { "epoch": 0.38858100270653434, "grad_norm": 0.5318673688677644, "learning_rate": 1.3611070604291535e-05, "loss": 0.7689, "step": 18090 }, { "epoch": 0.388795807019805, "grad_norm": 0.5436642226772909, "learning_rate": 1.360471320514931e-05, "loss": 0.7598, "step": 18100 }, { "epoch": 0.3890106113330756, "grad_norm": 0.5438843302772419, "learning_rate": 1.359835413108457e-05, "loss": 0.7589, "step": 18110 }, { "epoch": 0.38922541564634616, "grad_norm": 0.5257581273173912, "learning_rate": 1.359199338505204e-05, "loss": 0.7497, "step": 18120 }, { "epoch": 0.3894402199596168, "grad_norm": 0.5190745673753684, "learning_rate": 1.3585630970007228e-05, "loss": 0.757, "step": 18130 }, { "epoch": 0.3896550242728874, "grad_norm": 0.5299254643622509, "learning_rate": 1.3579266888906422e-05, "loss": 0.7564, "step": 18140 }, { "epoch": 0.389869828586158, "grad_norm": 0.5305677893941887, "learning_rate": 1.3572901144706675e-05, "loss": 0.743, "step": 18150 }, { "epoch": 0.39008463289942863, "grad_norm": 0.5173856069530872, "learning_rate": 1.3566533740365812e-05, "loss": 0.7552, "step": 18160 }, { "epoch": 0.3902994372126992, "grad_norm": 0.5148950374087616, "learning_rate": 1.3560164678842442e-05, "loss": 0.7568, "step": 18170 }, { "epoch": 0.39051424152596986, "grad_norm": 0.5407560251354756, "learning_rate": 1.355379396309593e-05, "loss": 0.7546, "step": 18180 }, { "epoch": 0.39072904583924045, "grad_norm": 0.5124304727359418, "learning_rate": 1.3547421596086425e-05, "loss": 0.7565, "step": 18190 }, { "epoch": 0.39094385015251104, "grad_norm": 0.5431987417174345, "learning_rate": 1.3541047580774827e-05, "loss": 0.756, "step": 18200 }, { "epoch": 0.3911586544657817, "grad_norm": 0.5073011287145005, "learning_rate": 1.3534671920122809e-05, "loss": 0.7464, "step": 18210 }, { "epoch": 0.3913734587790523, "grad_norm": 0.5325329739414308, "learning_rate": 1.3528294617092807e-05, "loss": 0.7605, "step": 18220 }, { "epoch": 0.3915882630923229, "grad_norm": 0.5369634007500615, "learning_rate": 1.3521915674648027e-05, "loss": 0.7621, "step": 18230 }, { "epoch": 0.3918030674055935, "grad_norm": 0.5167864671283865, "learning_rate": 1.351553509575243e-05, "loss": 0.7719, "step": 18240 }, { "epoch": 0.3920178717188641, "grad_norm": 0.5135322995315039, "learning_rate": 1.3509152883370738e-05, "loss": 0.7687, "step": 18250 }, { "epoch": 0.39223267603213474, "grad_norm": 0.5320096412814102, "learning_rate": 1.3502769040468428e-05, "loss": 0.7483, "step": 18260 }, { "epoch": 0.39244748034540533, "grad_norm": 0.5413014307472465, "learning_rate": 1.3496383570011749e-05, "loss": 0.7631, "step": 18270 }, { "epoch": 0.3926622846586759, "grad_norm": 0.5335344669447194, "learning_rate": 1.3489996474967688e-05, "loss": 0.7688, "step": 18280 }, { "epoch": 0.39287708897194656, "grad_norm": 0.5155463587590748, "learning_rate": 1.3483607758304e-05, "loss": 0.7603, "step": 18290 }, { "epoch": 0.39309189328521715, "grad_norm": 0.5144797987402185, "learning_rate": 1.3477217422989186e-05, "loss": 0.7516, "step": 18300 }, { "epoch": 0.3933066975984878, "grad_norm": 0.5138996863954206, "learning_rate": 1.3470825471992508e-05, "loss": 0.743, "step": 18310 }, { "epoch": 0.3935215019117584, "grad_norm": 0.5084404683155206, "learning_rate": 1.3464431908283966e-05, "loss": 0.748, "step": 18320 }, { "epoch": 0.393736306225029, "grad_norm": 0.5145070791156562, "learning_rate": 1.3458036734834317e-05, "loss": 0.7624, "step": 18330 }, { "epoch": 0.3939511105382996, "grad_norm": 0.5228286492426271, "learning_rate": 1.345163995461507e-05, "loss": 0.7453, "step": 18340 }, { "epoch": 0.3941659148515702, "grad_norm": 0.5234868717582319, "learning_rate": 1.3445241570598471e-05, "loss": 0.7755, "step": 18350 }, { "epoch": 0.39438071916484085, "grad_norm": 0.5157437356324394, "learning_rate": 1.3438841585757518e-05, "loss": 0.7495, "step": 18360 }, { "epoch": 0.39459552347811144, "grad_norm": 0.5390531614993764, "learning_rate": 1.3432440003065949e-05, "loss": 0.76, "step": 18370 }, { "epoch": 0.39481032779138203, "grad_norm": 0.5384002860850948, "learning_rate": 1.3426036825498248e-05, "loss": 0.7666, "step": 18380 }, { "epoch": 0.3950251321046527, "grad_norm": 0.5121988305761532, "learning_rate": 1.3419632056029637e-05, "loss": 0.7597, "step": 18390 }, { "epoch": 0.39523993641792327, "grad_norm": 0.5104755486106719, "learning_rate": 1.3413225697636079e-05, "loss": 0.7483, "step": 18400 }, { "epoch": 0.3954547407311939, "grad_norm": 0.5032637298232104, "learning_rate": 1.3406817753294277e-05, "loss": 0.7607, "step": 18410 }, { "epoch": 0.3956695450444645, "grad_norm": 0.510737282878729, "learning_rate": 1.3400408225981666e-05, "loss": 0.7487, "step": 18420 }, { "epoch": 0.3958843493577351, "grad_norm": 0.5527046441176208, "learning_rate": 1.339399711867642e-05, "loss": 0.76, "step": 18430 }, { "epoch": 0.39609915367100573, "grad_norm": 0.5226806691957405, "learning_rate": 1.338758443435745e-05, "loss": 0.7537, "step": 18440 }, { "epoch": 0.3963139579842763, "grad_norm": 0.5235233310915026, "learning_rate": 1.3381170176004393e-05, "loss": 0.7559, "step": 18450 }, { "epoch": 0.3965287622975469, "grad_norm": 0.5168608712748513, "learning_rate": 1.3374754346597622e-05, "loss": 0.7523, "step": 18460 }, { "epoch": 0.39674356661081756, "grad_norm": 0.5506026541978102, "learning_rate": 1.3368336949118241e-05, "loss": 0.7521, "step": 18470 }, { "epoch": 0.39695837092408814, "grad_norm": 0.5268087236613826, "learning_rate": 1.3361917986548076e-05, "loss": 0.7693, "step": 18480 }, { "epoch": 0.3971731752373588, "grad_norm": 0.5299142550170114, "learning_rate": 1.3355497461869686e-05, "loss": 0.762, "step": 18490 }, { "epoch": 0.3973879795506294, "grad_norm": 0.49923625368152, "learning_rate": 1.3349075378066358e-05, "loss": 0.7538, "step": 18500 }, { "epoch": 0.39760278386389997, "grad_norm": 0.526770181891027, "learning_rate": 1.3342651738122096e-05, "loss": 0.7692, "step": 18510 }, { "epoch": 0.3978175881771706, "grad_norm": 0.5278097868657259, "learning_rate": 1.3336226545021633e-05, "loss": 0.7674, "step": 18520 }, { "epoch": 0.3980323924904412, "grad_norm": 0.5253722249102108, "learning_rate": 1.3329799801750425e-05, "loss": 0.768, "step": 18530 }, { "epoch": 0.39824719680371184, "grad_norm": 0.536105570764583, "learning_rate": 1.3323371511294636e-05, "loss": 0.762, "step": 18540 }, { "epoch": 0.39846200111698243, "grad_norm": 0.5193573833212372, "learning_rate": 1.3316941676641162e-05, "loss": 0.7589, "step": 18550 }, { "epoch": 0.398676805430253, "grad_norm": 0.5070127139643907, "learning_rate": 1.3310510300777615e-05, "loss": 0.7628, "step": 18560 }, { "epoch": 0.39889160974352367, "grad_norm": 0.5191238299803989, "learning_rate": 1.3304077386692318e-05, "loss": 0.751, "step": 18570 }, { "epoch": 0.39910641405679426, "grad_norm": 0.5205364024981556, "learning_rate": 1.3297642937374313e-05, "loss": 0.7545, "step": 18580 }, { "epoch": 0.39932121837006485, "grad_norm": 0.5187329471387797, "learning_rate": 1.3291206955813354e-05, "loss": 0.7464, "step": 18590 }, { "epoch": 0.3995360226833355, "grad_norm": 0.5049021637761884, "learning_rate": 1.3284769444999901e-05, "loss": 0.7595, "step": 18600 }, { "epoch": 0.3997508269966061, "grad_norm": 0.5029990992742273, "learning_rate": 1.3278330407925135e-05, "loss": 0.7543, "step": 18610 }, { "epoch": 0.3999656313098767, "grad_norm": 0.5173702160515916, "learning_rate": 1.327188984758094e-05, "loss": 0.7619, "step": 18620 }, { "epoch": 0.4001804356231473, "grad_norm": 0.5192562549904551, "learning_rate": 1.3265447766959911e-05, "loss": 0.7561, "step": 18630 }, { "epoch": 0.4003952399364179, "grad_norm": 0.5312951137431535, "learning_rate": 1.3259004169055346e-05, "loss": 0.7694, "step": 18640 }, { "epoch": 0.40061004424968855, "grad_norm": 0.5266364154510266, "learning_rate": 1.3252559056861246e-05, "loss": 0.7543, "step": 18650 }, { "epoch": 0.40082484856295914, "grad_norm": 0.533316964707828, "learning_rate": 1.3246112433372322e-05, "loss": 0.7554, "step": 18660 }, { "epoch": 0.4010396528762298, "grad_norm": 0.5482477597760441, "learning_rate": 1.3239664301583988e-05, "loss": 0.7656, "step": 18670 }, { "epoch": 0.40125445718950037, "grad_norm": 0.5206451326269249, "learning_rate": 1.3233214664492349e-05, "loss": 0.7543, "step": 18680 }, { "epoch": 0.40146926150277096, "grad_norm": 0.5418016042557484, "learning_rate": 1.3226763525094217e-05, "loss": 0.7611, "step": 18690 }, { "epoch": 0.4016840658160416, "grad_norm": 0.5558615173058707, "learning_rate": 1.3220310886387103e-05, "loss": 0.7627, "step": 18700 }, { "epoch": 0.4018988701293122, "grad_norm": 0.5143709640231149, "learning_rate": 1.3213856751369207e-05, "loss": 0.7714, "step": 18710 }, { "epoch": 0.4021136744425828, "grad_norm": 0.5376677743737073, "learning_rate": 1.3207401123039438e-05, "loss": 0.7691, "step": 18720 }, { "epoch": 0.4023284787558534, "grad_norm": 0.5399761390877215, "learning_rate": 1.3200944004397383e-05, "loss": 0.7456, "step": 18730 }, { "epoch": 0.402543283069124, "grad_norm": 0.5249199592562105, "learning_rate": 1.3194485398443332e-05, "loss": 0.7505, "step": 18740 }, { "epoch": 0.40275808738239466, "grad_norm": 0.5155915016561029, "learning_rate": 1.318802530817826e-05, "loss": 0.7306, "step": 18750 }, { "epoch": 0.40297289169566525, "grad_norm": 0.5318691830490915, "learning_rate": 1.3181563736603837e-05, "loss": 0.748, "step": 18760 }, { "epoch": 0.40318769600893584, "grad_norm": 0.515075633683763, "learning_rate": 1.317510068672242e-05, "loss": 0.7597, "step": 18770 }, { "epoch": 0.4034025003222065, "grad_norm": 0.5179116497827865, "learning_rate": 1.316863616153705e-05, "loss": 0.7613, "step": 18780 }, { "epoch": 0.40361730463547707, "grad_norm": 0.5332150745577201, "learning_rate": 1.3162170164051456e-05, "loss": 0.7521, "step": 18790 }, { "epoch": 0.4038321089487477, "grad_norm": 0.5253427224156337, "learning_rate": 1.3155702697270047e-05, "loss": 0.7733, "step": 18800 }, { "epoch": 0.4040469132620183, "grad_norm": 0.49768785272010546, "learning_rate": 1.3149233764197922e-05, "loss": 0.7566, "step": 18810 }, { "epoch": 0.4042617175752889, "grad_norm": 0.5267923414911647, "learning_rate": 1.3142763367840857e-05, "loss": 0.7667, "step": 18820 }, { "epoch": 0.40447652188855954, "grad_norm": 0.5282610764687665, "learning_rate": 1.3136291511205306e-05, "loss": 0.755, "step": 18830 }, { "epoch": 0.4046913262018301, "grad_norm": 0.5223892713619213, "learning_rate": 1.3129818197298405e-05, "loss": 0.7738, "step": 18840 }, { "epoch": 0.40490613051510077, "grad_norm": 0.5096764777578232, "learning_rate": 1.3123343429127968e-05, "loss": 0.7588, "step": 18850 }, { "epoch": 0.40512093482837136, "grad_norm": 0.5467891192906261, "learning_rate": 1.3116867209702479e-05, "loss": 0.7592, "step": 18860 }, { "epoch": 0.40533573914164195, "grad_norm": 1.134506804779343, "learning_rate": 1.3110389542031102e-05, "loss": 0.7478, "step": 18870 }, { "epoch": 0.4055505434549126, "grad_norm": 0.5097103865358088, "learning_rate": 1.310391042912367e-05, "loss": 0.7512, "step": 18880 }, { "epoch": 0.4057653477681832, "grad_norm": 0.5393124410906558, "learning_rate": 1.3097429873990693e-05, "loss": 0.7485, "step": 18890 }, { "epoch": 0.40598015208145377, "grad_norm": 0.5190274805087902, "learning_rate": 1.3090947879643344e-05, "loss": 0.7406, "step": 18900 }, { "epoch": 0.4061949563947244, "grad_norm": 0.5440017480426464, "learning_rate": 1.308446444909347e-05, "loss": 0.7711, "step": 18910 }, { "epoch": 0.406409760707995, "grad_norm": 0.5154904534059127, "learning_rate": 1.3077979585353582e-05, "loss": 0.75, "step": 18920 }, { "epoch": 0.40662456502126565, "grad_norm": 0.5278477316199867, "learning_rate": 1.307149329143686e-05, "loss": 0.7612, "step": 18930 }, { "epoch": 0.40683936933453624, "grad_norm": 0.5322812965577661, "learning_rate": 1.3065005570357148e-05, "loss": 0.7624, "step": 18940 }, { "epoch": 0.4070541736478068, "grad_norm": 0.5160265527970386, "learning_rate": 1.305851642512895e-05, "loss": 0.7472, "step": 18950 }, { "epoch": 0.40726897796107747, "grad_norm": 0.5318911272420563, "learning_rate": 1.3052025858767435e-05, "loss": 0.7703, "step": 18960 }, { "epoch": 0.40748378227434806, "grad_norm": 0.5055369294794778, "learning_rate": 1.3045533874288429e-05, "loss": 0.7505, "step": 18970 }, { "epoch": 0.4076985865876187, "grad_norm": 0.5298819562145214, "learning_rate": 1.3039040474708422e-05, "loss": 0.7645, "step": 18980 }, { "epoch": 0.4079133909008893, "grad_norm": 0.552924739228477, "learning_rate": 1.3032545663044558e-05, "loss": 0.7734, "step": 18990 }, { "epoch": 0.4081281952141599, "grad_norm": 0.5346123293209584, "learning_rate": 1.3026049442314636e-05, "loss": 0.7553, "step": 19000 }, { "epoch": 0.40834299952743053, "grad_norm": 0.5246739179110647, "learning_rate": 1.3019551815537117e-05, "loss": 0.7505, "step": 19010 }, { "epoch": 0.4085578038407011, "grad_norm": 0.5338166445740993, "learning_rate": 1.3013052785731102e-05, "loss": 0.7414, "step": 19020 }, { "epoch": 0.4087726081539717, "grad_norm": 0.5109686263652257, "learning_rate": 1.3006552355916355e-05, "loss": 0.7477, "step": 19030 }, { "epoch": 0.40898741246724235, "grad_norm": 0.5045191677684051, "learning_rate": 1.300005052911329e-05, "loss": 0.7546, "step": 19040 }, { "epoch": 0.40920221678051294, "grad_norm": 0.5374579208608851, "learning_rate": 1.2993547308342965e-05, "loss": 0.7639, "step": 19050 }, { "epoch": 0.4094170210937836, "grad_norm": 0.5316853043110944, "learning_rate": 1.2987042696627085e-05, "loss": 0.7595, "step": 19060 }, { "epoch": 0.4096318254070542, "grad_norm": 0.5278988439756973, "learning_rate": 1.298053669698801e-05, "loss": 0.735, "step": 19070 }, { "epoch": 0.40984662972032476, "grad_norm": 0.5654598465285191, "learning_rate": 1.2974029312448737e-05, "loss": 0.7684, "step": 19080 }, { "epoch": 0.4100614340335954, "grad_norm": 0.5162837541623254, "learning_rate": 1.2967520546032905e-05, "loss": 0.7579, "step": 19090 }, { "epoch": 0.410276238346866, "grad_norm": 0.5224389671190344, "learning_rate": 1.2961010400764807e-05, "loss": 0.7513, "step": 19100 }, { "epoch": 0.41049104266013664, "grad_norm": 0.5176219223001366, "learning_rate": 1.2954498879669361e-05, "loss": 0.7638, "step": 19110 }, { "epoch": 0.41070584697340723, "grad_norm": 0.534813023204064, "learning_rate": 1.2947985985772137e-05, "loss": 0.762, "step": 19120 }, { "epoch": 0.4109206512866778, "grad_norm": 0.5272695180224509, "learning_rate": 1.2941471722099332e-05, "loss": 0.7592, "step": 19130 }, { "epoch": 0.41113545559994846, "grad_norm": 0.5378612978101573, "learning_rate": 1.2934956091677788e-05, "loss": 0.7554, "step": 19140 }, { "epoch": 0.41135025991321905, "grad_norm": 0.5258722653546618, "learning_rate": 1.2928439097534981e-05, "loss": 0.7635, "step": 19150 }, { "epoch": 0.41156506422648964, "grad_norm": 0.5351602497774721, "learning_rate": 1.2921920742699019e-05, "loss": 0.7483, "step": 19160 }, { "epoch": 0.4117798685397603, "grad_norm": 0.5367356185774689, "learning_rate": 1.291540103019864e-05, "loss": 0.755, "step": 19170 }, { "epoch": 0.4119946728530309, "grad_norm": 0.5255579867546255, "learning_rate": 1.2908879963063212e-05, "loss": 0.7603, "step": 19180 }, { "epoch": 0.4122094771663015, "grad_norm": 0.5058299224660839, "learning_rate": 1.2902357544322741e-05, "loss": 0.7623, "step": 19190 }, { "epoch": 0.4124242814795721, "grad_norm": 0.5257377107858072, "learning_rate": 1.289583377700785e-05, "loss": 0.7433, "step": 19200 }, { "epoch": 0.4126390857928427, "grad_norm": 0.5202414578374462, "learning_rate": 1.28893086641498e-05, "loss": 0.7522, "step": 19210 }, { "epoch": 0.41285389010611334, "grad_norm": 0.514742134271601, "learning_rate": 1.288278220878047e-05, "loss": 0.7478, "step": 19220 }, { "epoch": 0.41306869441938393, "grad_norm": 0.498665233670508, "learning_rate": 1.287625441393236e-05, "loss": 0.7732, "step": 19230 }, { "epoch": 0.4132834987326546, "grad_norm": 0.5152993923389213, "learning_rate": 1.2869725282638596e-05, "loss": 0.7389, "step": 19240 }, { "epoch": 0.41349830304592516, "grad_norm": 0.5369968074928185, "learning_rate": 1.286319481793293e-05, "loss": 0.7559, "step": 19250 }, { "epoch": 0.41371310735919575, "grad_norm": 0.5168991961504903, "learning_rate": 1.2856663022849724e-05, "loss": 0.7425, "step": 19260 }, { "epoch": 0.4139279116724664, "grad_norm": 0.5226064009439039, "learning_rate": 1.2850129900423972e-05, "loss": 0.7476, "step": 19270 }, { "epoch": 0.414142715985737, "grad_norm": 0.5338894520277626, "learning_rate": 1.2843595453691262e-05, "loss": 0.7701, "step": 19280 }, { "epoch": 0.41435752029900763, "grad_norm": 0.5143229406464946, "learning_rate": 1.2837059685687823e-05, "loss": 0.7565, "step": 19290 }, { "epoch": 0.4145723246122782, "grad_norm": 0.5162616207638232, "learning_rate": 1.2830522599450479e-05, "loss": 0.765, "step": 19300 }, { "epoch": 0.4147871289255488, "grad_norm": 0.5006245141202402, "learning_rate": 1.2823984198016676e-05, "loss": 0.7403, "step": 19310 }, { "epoch": 0.41500193323881945, "grad_norm": 0.5284695404789536, "learning_rate": 1.281744448442447e-05, "loss": 0.7545, "step": 19320 }, { "epoch": 0.41521673755209004, "grad_norm": 0.5214124159977402, "learning_rate": 1.2810903461712524e-05, "loss": 0.761, "step": 19330 }, { "epoch": 0.41543154186536063, "grad_norm": 0.5254239866013033, "learning_rate": 1.2804361132920114e-05, "loss": 0.7446, "step": 19340 }, { "epoch": 0.4156463461786313, "grad_norm": 0.5278037952638479, "learning_rate": 1.2797817501087113e-05, "loss": 0.7572, "step": 19350 }, { "epoch": 0.41586115049190187, "grad_norm": 0.5367094234076616, "learning_rate": 1.2791272569254009e-05, "loss": 0.7462, "step": 19360 }, { "epoch": 0.4160759548051725, "grad_norm": 0.5252418860683364, "learning_rate": 1.2784726340461892e-05, "loss": 0.7693, "step": 19370 }, { "epoch": 0.4162907591184431, "grad_norm": 0.5055520616899003, "learning_rate": 1.2778178817752454e-05, "loss": 0.7481, "step": 19380 }, { "epoch": 0.4165055634317137, "grad_norm": 0.5204456122832675, "learning_rate": 1.2771630004167985e-05, "loss": 0.7419, "step": 19390 }, { "epoch": 0.41672036774498433, "grad_norm": 0.5217514946799076, "learning_rate": 1.2765079902751381e-05, "loss": 0.7459, "step": 19400 }, { "epoch": 0.4169351720582549, "grad_norm": 0.53197721768162, "learning_rate": 1.275852851654613e-05, "loss": 0.7451, "step": 19410 }, { "epoch": 0.41714997637152557, "grad_norm": 0.5345894761871923, "learning_rate": 1.2751975848596324e-05, "loss": 0.7545, "step": 19420 }, { "epoch": 0.41736478068479615, "grad_norm": 0.5172473442706328, "learning_rate": 1.274542190194664e-05, "loss": 0.7633, "step": 19430 }, { "epoch": 0.41757958499806674, "grad_norm": 0.5325296172080332, "learning_rate": 1.2738866679642365e-05, "loss": 0.749, "step": 19440 }, { "epoch": 0.4177943893113374, "grad_norm": 0.5270837129930179, "learning_rate": 1.2732310184729362e-05, "loss": 0.7563, "step": 19450 }, { "epoch": 0.418009193624608, "grad_norm": 0.514258180577775, "learning_rate": 1.2725752420254094e-05, "loss": 0.7591, "step": 19460 }, { "epoch": 0.41822399793787857, "grad_norm": 0.5187969540802417, "learning_rate": 1.2719193389263613e-05, "loss": 0.7482, "step": 19470 }, { "epoch": 0.4184388022511492, "grad_norm": 0.5303861691448711, "learning_rate": 1.2712633094805561e-05, "loss": 0.7547, "step": 19480 }, { "epoch": 0.4186536065644198, "grad_norm": 0.5105008679720247, "learning_rate": 1.2706071539928166e-05, "loss": 0.7591, "step": 19490 }, { "epoch": 0.41886841087769044, "grad_norm": 0.5277024751757646, "learning_rate": 1.2699508727680238e-05, "loss": 0.7563, "step": 19500 }, { "epoch": 0.41908321519096103, "grad_norm": 0.5341955327167359, "learning_rate": 1.2692944661111176e-05, "loss": 0.7616, "step": 19510 }, { "epoch": 0.4192980195042316, "grad_norm": 0.5321207219044839, "learning_rate": 1.2686379343270956e-05, "loss": 0.7487, "step": 19520 }, { "epoch": 0.41951282381750227, "grad_norm": 0.5028973933951222, "learning_rate": 1.2679812777210142e-05, "loss": 0.7589, "step": 19530 }, { "epoch": 0.41972762813077286, "grad_norm": 0.5333649392140662, "learning_rate": 1.2673244965979881e-05, "loss": 0.7499, "step": 19540 }, { "epoch": 0.4199424324440435, "grad_norm": 0.8809161047806207, "learning_rate": 1.2666675912631885e-05, "loss": 0.7431, "step": 19550 }, { "epoch": 0.4201572367573141, "grad_norm": 0.547365448998943, "learning_rate": 1.2660105620218452e-05, "loss": 0.7516, "step": 19560 }, { "epoch": 0.4203720410705847, "grad_norm": 0.5278558806116719, "learning_rate": 1.2653534091792459e-05, "loss": 0.7572, "step": 19570 }, { "epoch": 0.4205868453838553, "grad_norm": 0.5242417730519813, "learning_rate": 1.2646961330407349e-05, "loss": 0.7561, "step": 19580 }, { "epoch": 0.4208016496971259, "grad_norm": 0.5145264148757996, "learning_rate": 1.264038733911714e-05, "loss": 0.7526, "step": 19590 }, { "epoch": 0.4210164540103965, "grad_norm": 0.5226054611704246, "learning_rate": 1.2633812120976432e-05, "loss": 0.7585, "step": 19600 }, { "epoch": 0.42123125832366715, "grad_norm": 0.5094656107452357, "learning_rate": 1.262723567904038e-05, "loss": 0.7431, "step": 19610 }, { "epoch": 0.42144606263693773, "grad_norm": 0.5208004120644633, "learning_rate": 1.2620658016364713e-05, "loss": 0.7481, "step": 19620 }, { "epoch": 0.4216608669502084, "grad_norm": 0.5368286486289077, "learning_rate": 1.2614079136005732e-05, "loss": 0.7659, "step": 19630 }, { "epoch": 0.42187567126347897, "grad_norm": 0.5470952446979304, "learning_rate": 1.26074990410203e-05, "loss": 0.7511, "step": 19640 }, { "epoch": 0.42209047557674956, "grad_norm": 0.5132946022336456, "learning_rate": 1.2600917734465843e-05, "loss": 0.7725, "step": 19650 }, { "epoch": 0.4223052798900202, "grad_norm": 0.5597310989128308, "learning_rate": 1.2594335219400349e-05, "loss": 0.7431, "step": 19660 }, { "epoch": 0.4225200842032908, "grad_norm": 0.5260208167606223, "learning_rate": 1.2587751498882376e-05, "loss": 0.7533, "step": 19670 }, { "epoch": 0.42273488851656144, "grad_norm": 0.5029129520256707, "learning_rate": 1.2581166575971031e-05, "loss": 0.7506, "step": 19680 }, { "epoch": 0.422949692829832, "grad_norm": 0.5202725382006363, "learning_rate": 1.2574580453725987e-05, "loss": 0.7541, "step": 19690 }, { "epoch": 0.4231644971431026, "grad_norm": 0.5134917201548623, "learning_rate": 1.256799313520747e-05, "loss": 0.7564, "step": 19700 }, { "epoch": 0.42337930145637326, "grad_norm": 0.5354376298898013, "learning_rate": 1.256140462347627e-05, "loss": 0.7594, "step": 19710 }, { "epoch": 0.42359410576964385, "grad_norm": 0.5211973403768663, "learning_rate": 1.2554814921593716e-05, "loss": 0.7584, "step": 19720 }, { "epoch": 0.4238089100829145, "grad_norm": 0.5520144836915775, "learning_rate": 1.2548224032621707e-05, "loss": 0.7486, "step": 19730 }, { "epoch": 0.4240237143961851, "grad_norm": 0.4998308570756764, "learning_rate": 1.2541631959622684e-05, "loss": 0.7462, "step": 19740 }, { "epoch": 0.42423851870945567, "grad_norm": 0.496745741982448, "learning_rate": 1.2535038705659637e-05, "loss": 0.7557, "step": 19750 }, { "epoch": 0.4244533230227263, "grad_norm": 0.5179608442643614, "learning_rate": 1.2528444273796113e-05, "loss": 0.7559, "step": 19760 }, { "epoch": 0.4246681273359969, "grad_norm": 0.5237889562247441, "learning_rate": 1.2521848667096196e-05, "loss": 0.759, "step": 19770 }, { "epoch": 0.4248829316492675, "grad_norm": 0.5448250284811663, "learning_rate": 1.251525188862452e-05, "loss": 0.7612, "step": 19780 }, { "epoch": 0.42509773596253814, "grad_norm": 0.5060894163492381, "learning_rate": 1.250865394144627e-05, "loss": 0.7593, "step": 19790 }, { "epoch": 0.4253125402758087, "grad_norm": 0.49819573581917886, "learning_rate": 1.2502054828627168e-05, "loss": 0.7429, "step": 19800 }, { "epoch": 0.42552734458907937, "grad_norm": 0.5213140703162686, "learning_rate": 1.2495454553233473e-05, "loss": 0.7549, "step": 19810 }, { "epoch": 0.42574214890234996, "grad_norm": 0.5205619160973527, "learning_rate": 1.2488853118331993e-05, "loss": 0.7381, "step": 19820 }, { "epoch": 0.42595695321562055, "grad_norm": 0.5411772964173832, "learning_rate": 1.2482250526990072e-05, "loss": 0.7563, "step": 19830 }, { "epoch": 0.4261717575288912, "grad_norm": 0.5070043547298757, "learning_rate": 1.2475646782275588e-05, "loss": 0.7345, "step": 19840 }, { "epoch": 0.4263865618421618, "grad_norm": 0.5446352761437885, "learning_rate": 1.2469041887256955e-05, "loss": 0.7575, "step": 19850 }, { "epoch": 0.4266013661554324, "grad_norm": 0.5071987431688498, "learning_rate": 1.2462435845003131e-05, "loss": 0.7446, "step": 19860 }, { "epoch": 0.426816170468703, "grad_norm": 0.5207497470969883, "learning_rate": 1.2455828658583595e-05, "loss": 0.7656, "step": 19870 }, { "epoch": 0.4270309747819736, "grad_norm": 0.5117446569849975, "learning_rate": 1.2449220331068363e-05, "loss": 0.7582, "step": 19880 }, { "epoch": 0.42724577909524425, "grad_norm": 0.5264990840912481, "learning_rate": 1.2442610865527977e-05, "loss": 0.7442, "step": 19890 }, { "epoch": 0.42746058340851484, "grad_norm": 0.5258670236773598, "learning_rate": 1.2436000265033518e-05, "loss": 0.7531, "step": 19900 }, { "epoch": 0.4276753877217854, "grad_norm": 0.538439943387158, "learning_rate": 1.2429388532656586e-05, "loss": 0.764, "step": 19910 }, { "epoch": 0.42789019203505607, "grad_norm": 0.5177931014614214, "learning_rate": 1.242277567146931e-05, "loss": 0.7528, "step": 19920 }, { "epoch": 0.42810499634832666, "grad_norm": 0.5194066108047999, "learning_rate": 1.2416161684544337e-05, "loss": 0.7478, "step": 19930 }, { "epoch": 0.4283198006615973, "grad_norm": 0.5223576429675216, "learning_rate": 1.2409546574954844e-05, "loss": 0.7436, "step": 19940 }, { "epoch": 0.4285346049748679, "grad_norm": 0.5331311220294771, "learning_rate": 1.2402930345774533e-05, "loss": 0.7626, "step": 19950 }, { "epoch": 0.4287494092881385, "grad_norm": 0.5321622654012298, "learning_rate": 1.2396313000077613e-05, "loss": 0.7528, "step": 19960 }, { "epoch": 0.42896421360140913, "grad_norm": 0.5213440299088972, "learning_rate": 1.2389694540938828e-05, "loss": 0.7607, "step": 19970 }, { "epoch": 0.4291790179146797, "grad_norm": 0.5451420730102323, "learning_rate": 1.238307497143343e-05, "loss": 0.7514, "step": 19980 }, { "epoch": 0.42939382222795036, "grad_norm": 0.5396602450861735, "learning_rate": 1.2376454294637182e-05, "loss": 0.754, "step": 19990 }, { "epoch": 0.42960862654122095, "grad_norm": 0.5294320493844773, "learning_rate": 1.2369832513626372e-05, "loss": 0.7417, "step": 20000 }, { "epoch": 0.42982343085449154, "grad_norm": 0.5229398240238433, "learning_rate": 1.2363209631477797e-05, "loss": 0.7516, "step": 20010 }, { "epoch": 0.4300382351677622, "grad_norm": 0.5169446602861949, "learning_rate": 1.2356585651268765e-05, "loss": 0.7475, "step": 20020 }, { "epoch": 0.4302530394810328, "grad_norm": 0.5471875007778104, "learning_rate": 1.2349960576077097e-05, "loss": 0.7589, "step": 20030 }, { "epoch": 0.43046784379430336, "grad_norm": 0.5125110507107423, "learning_rate": 1.2343334408981116e-05, "loss": 0.7572, "step": 20040 }, { "epoch": 0.430682648107574, "grad_norm": 0.5036197114282005, "learning_rate": 1.2336707153059662e-05, "loss": 0.7507, "step": 20050 }, { "epoch": 0.4308974524208446, "grad_norm": 0.5169855576103094, "learning_rate": 1.2330078811392068e-05, "loss": 0.7567, "step": 20060 }, { "epoch": 0.43111225673411524, "grad_norm": 0.5229836121047303, "learning_rate": 1.2323449387058185e-05, "loss": 0.7433, "step": 20070 }, { "epoch": 0.43132706104738583, "grad_norm": 0.5272331493503679, "learning_rate": 1.2316818883138362e-05, "loss": 0.7618, "step": 20080 }, { "epoch": 0.4315418653606564, "grad_norm": 0.5127750810913362, "learning_rate": 1.2310187302713448e-05, "loss": 0.7557, "step": 20090 }, { "epoch": 0.43175666967392706, "grad_norm": 0.5046998338723229, "learning_rate": 1.2303554648864791e-05, "loss": 0.7406, "step": 20100 }, { "epoch": 0.43197147398719765, "grad_norm": 0.5256594356973203, "learning_rate": 1.229692092467424e-05, "loss": 0.7516, "step": 20110 }, { "epoch": 0.4321862783004683, "grad_norm": 0.5171058923668201, "learning_rate": 1.2290286133224146e-05, "loss": 0.7513, "step": 20120 }, { "epoch": 0.4324010826137389, "grad_norm": 0.5104680411343472, "learning_rate": 1.2283650277597343e-05, "loss": 0.7378, "step": 20130 }, { "epoch": 0.4326158869270095, "grad_norm": 0.5133259060250314, "learning_rate": 1.2277013360877174e-05, "loss": 0.7497, "step": 20140 }, { "epoch": 0.4328306912402801, "grad_norm": 0.5068993271066068, "learning_rate": 1.2270375386147466e-05, "loss": 0.7562, "step": 20150 }, { "epoch": 0.4330454955535507, "grad_norm": 0.5411116105681395, "learning_rate": 1.2263736356492541e-05, "loss": 0.7513, "step": 20160 }, { "epoch": 0.43326029986682135, "grad_norm": 0.49754370415948745, "learning_rate": 1.2257096274997209e-05, "loss": 0.7427, "step": 20170 }, { "epoch": 0.43347510418009194, "grad_norm": 0.5163307527085924, "learning_rate": 1.2250455144746776e-05, "loss": 0.7383, "step": 20180 }, { "epoch": 0.43368990849336253, "grad_norm": 0.5302499060158602, "learning_rate": 1.2243812968827021e-05, "loss": 0.7304, "step": 20190 }, { "epoch": 0.4339047128066332, "grad_norm": 0.5089583346915815, "learning_rate": 1.223716975032422e-05, "loss": 0.7482, "step": 20200 }, { "epoch": 0.43411951711990376, "grad_norm": 0.5302791890282693, "learning_rate": 1.2230525492325133e-05, "loss": 0.7587, "step": 20210 }, { "epoch": 0.43433432143317435, "grad_norm": 0.5183200687604282, "learning_rate": 1.2223880197916997e-05, "loss": 0.7385, "step": 20220 }, { "epoch": 0.434549125746445, "grad_norm": 0.5283129948501645, "learning_rate": 1.2217233870187539e-05, "loss": 0.7618, "step": 20230 }, { "epoch": 0.4347639300597156, "grad_norm": 0.5334500046605607, "learning_rate": 1.221058651222496e-05, "loss": 0.7574, "step": 20240 }, { "epoch": 0.43497873437298623, "grad_norm": 0.5397388701861974, "learning_rate": 1.220393812711794e-05, "loss": 0.7544, "step": 20250 }, { "epoch": 0.4351935386862568, "grad_norm": 0.5557994197082688, "learning_rate": 1.2197288717955636e-05, "loss": 0.7515, "step": 20260 }, { "epoch": 0.4354083429995274, "grad_norm": 0.5214757763867583, "learning_rate": 1.2190638287827683e-05, "loss": 0.7562, "step": 20270 }, { "epoch": 0.43562314731279805, "grad_norm": 0.5363420017553637, "learning_rate": 1.2183986839824191e-05, "loss": 0.7552, "step": 20280 }, { "epoch": 0.43583795162606864, "grad_norm": 0.5173925675212045, "learning_rate": 1.217733437703574e-05, "loss": 0.7454, "step": 20290 }, { "epoch": 0.4360527559393393, "grad_norm": 0.5052520562415731, "learning_rate": 1.2170680902553388e-05, "loss": 0.7335, "step": 20300 }, { "epoch": 0.4362675602526099, "grad_norm": 0.5214947525173893, "learning_rate": 1.216402641946865e-05, "loss": 0.7533, "step": 20310 }, { "epoch": 0.43648236456588047, "grad_norm": 0.5066915832021032, "learning_rate": 1.2157370930873521e-05, "loss": 0.7466, "step": 20320 }, { "epoch": 0.4366971688791511, "grad_norm": 0.544927092725932, "learning_rate": 1.2150714439860463e-05, "loss": 0.7422, "step": 20330 }, { "epoch": 0.4369119731924217, "grad_norm": 0.5232539415746384, "learning_rate": 1.2144056949522396e-05, "loss": 0.7456, "step": 20340 }, { "epoch": 0.4371267775056923, "grad_norm": 0.5358017048774394, "learning_rate": 1.2137398462952714e-05, "loss": 0.761, "step": 20350 }, { "epoch": 0.43734158181896293, "grad_norm": 0.514296276827805, "learning_rate": 1.2130738983245265e-05, "loss": 0.7406, "step": 20360 }, { "epoch": 0.4375563861322335, "grad_norm": 0.5274917168112747, "learning_rate": 1.2124078513494366e-05, "loss": 0.7498, "step": 20370 }, { "epoch": 0.43777119044550417, "grad_norm": 0.5091641073845611, "learning_rate": 1.2117417056794787e-05, "loss": 0.7669, "step": 20380 }, { "epoch": 0.43798599475877475, "grad_norm": 0.5238531386173619, "learning_rate": 1.211075461624176e-05, "loss": 0.7454, "step": 20390 }, { "epoch": 0.43820079907204534, "grad_norm": 0.5263012775160619, "learning_rate": 1.2104091194930976e-05, "loss": 0.7546, "step": 20400 }, { "epoch": 0.438415603385316, "grad_norm": 0.5206973036533034, "learning_rate": 1.2097426795958582e-05, "loss": 0.7633, "step": 20410 }, { "epoch": 0.4386304076985866, "grad_norm": 0.5329927051305835, "learning_rate": 1.2090761422421173e-05, "loss": 0.7492, "step": 20420 }, { "epoch": 0.4388452120118572, "grad_norm": 0.5248347095576246, "learning_rate": 1.2084095077415802e-05, "loss": 0.7525, "step": 20430 }, { "epoch": 0.4390600163251278, "grad_norm": 0.5763619430364404, "learning_rate": 1.2077427764039976e-05, "loss": 0.763, "step": 20440 }, { "epoch": 0.4392748206383984, "grad_norm": 0.5277721502587376, "learning_rate": 1.2070759485391642e-05, "loss": 0.7728, "step": 20450 }, { "epoch": 0.43948962495166904, "grad_norm": 0.5349602899110175, "learning_rate": 1.206409024456921e-05, "loss": 0.7529, "step": 20460 }, { "epoch": 0.43970442926493963, "grad_norm": 0.5322814748856364, "learning_rate": 1.2057420044671517e-05, "loss": 0.7403, "step": 20470 }, { "epoch": 0.4399192335782103, "grad_norm": 0.5118893319232871, "learning_rate": 1.205074888879787e-05, "loss": 0.7409, "step": 20480 }, { "epoch": 0.44013403789148087, "grad_norm": 0.5455136550456627, "learning_rate": 1.2044076780048e-05, "loss": 0.7471, "step": 20490 }, { "epoch": 0.44034884220475146, "grad_norm": 0.5299308092265677, "learning_rate": 1.2037403721522095e-05, "loss": 0.7577, "step": 20500 }, { "epoch": 0.4405636465180221, "grad_norm": 0.5121296470133871, "learning_rate": 1.2030729716320768e-05, "loss": 0.747, "step": 20510 }, { "epoch": 0.4407784508312927, "grad_norm": 0.5386378414121707, "learning_rate": 1.2024054767545092e-05, "loss": 0.7534, "step": 20520 }, { "epoch": 0.4409932551445633, "grad_norm": 0.5433674923886992, "learning_rate": 1.2017378878296562e-05, "loss": 0.7503, "step": 20530 }, { "epoch": 0.4412080594578339, "grad_norm": 0.5016141269307198, "learning_rate": 1.2010702051677118e-05, "loss": 0.7502, "step": 20540 }, { "epoch": 0.4414228637711045, "grad_norm": 0.504046182876863, "learning_rate": 1.2004024290789133e-05, "loss": 0.7516, "step": 20550 }, { "epoch": 0.44163766808437516, "grad_norm": 0.5226732086112348, "learning_rate": 1.1997345598735418e-05, "loss": 0.7494, "step": 20560 }, { "epoch": 0.44185247239764575, "grad_norm": 0.5114919367543062, "learning_rate": 1.1990665978619207e-05, "loss": 0.7402, "step": 20570 }, { "epoch": 0.44206727671091633, "grad_norm": 0.5440210462668759, "learning_rate": 1.1983985433544176e-05, "loss": 0.7533, "step": 20580 }, { "epoch": 0.442282081024187, "grad_norm": 0.4942048477137153, "learning_rate": 1.1977303966614426e-05, "loss": 0.7581, "step": 20590 }, { "epoch": 0.44249688533745757, "grad_norm": 0.5320405941370843, "learning_rate": 1.1970621580934487e-05, "loss": 0.7624, "step": 20600 }, { "epoch": 0.4427116896507282, "grad_norm": 0.5185257384736662, "learning_rate": 1.1963938279609313e-05, "loss": 0.7406, "step": 20610 }, { "epoch": 0.4429264939639988, "grad_norm": 0.5263134282281284, "learning_rate": 1.195725406574429e-05, "loss": 0.7621, "step": 20620 }, { "epoch": 0.4431412982772694, "grad_norm": 0.5089836853097602, "learning_rate": 1.1950568942445225e-05, "loss": 0.7457, "step": 20630 }, { "epoch": 0.44335610259054004, "grad_norm": 0.5201727085179321, "learning_rate": 1.1943882912818339e-05, "loss": 0.7349, "step": 20640 }, { "epoch": 0.4435709069038106, "grad_norm": 0.5595981699934405, "learning_rate": 1.1937195979970289e-05, "loss": 0.7588, "step": 20650 }, { "epoch": 0.4437857112170812, "grad_norm": 0.5276981692403241, "learning_rate": 1.193050814700814e-05, "loss": 0.7736, "step": 20660 }, { "epoch": 0.44400051553035186, "grad_norm": 0.5236268232985871, "learning_rate": 1.1923819417039383e-05, "loss": 0.7595, "step": 20670 }, { "epoch": 0.44421531984362245, "grad_norm": 0.49905338520313625, "learning_rate": 1.1917129793171924e-05, "loss": 0.7413, "step": 20680 }, { "epoch": 0.4444301241568931, "grad_norm": 0.5164080638755711, "learning_rate": 1.1910439278514081e-05, "loss": 0.752, "step": 20690 }, { "epoch": 0.4446449284701637, "grad_norm": 0.49930489760055635, "learning_rate": 1.1903747876174583e-05, "loss": 0.7514, "step": 20700 }, { "epoch": 0.44485973278343427, "grad_norm": 0.5300452626534115, "learning_rate": 1.1897055589262583e-05, "loss": 0.7396, "step": 20710 }, { "epoch": 0.4450745370967049, "grad_norm": 0.5321479894953371, "learning_rate": 1.1890362420887634e-05, "loss": 0.7508, "step": 20720 }, { "epoch": 0.4452893414099755, "grad_norm": 0.5308819147481129, "learning_rate": 1.1883668374159705e-05, "loss": 0.7538, "step": 20730 }, { "epoch": 0.44550414572324615, "grad_norm": 0.5069966537674646, "learning_rate": 1.1876973452189172e-05, "loss": 0.7629, "step": 20740 }, { "epoch": 0.44571895003651674, "grad_norm": 0.5585367021939927, "learning_rate": 1.1870277658086813e-05, "loss": 0.7432, "step": 20750 }, { "epoch": 0.4459337543497873, "grad_norm": 0.5223674054406362, "learning_rate": 1.1863580994963817e-05, "loss": 0.7439, "step": 20760 }, { "epoch": 0.44614855866305797, "grad_norm": 0.5141402859358744, "learning_rate": 1.1856883465931772e-05, "loss": 0.746, "step": 20770 }, { "epoch": 0.44636336297632856, "grad_norm": 0.5461400382474692, "learning_rate": 1.1850185074102675e-05, "loss": 0.7681, "step": 20780 }, { "epoch": 0.44657816728959915, "grad_norm": 0.5190276865079356, "learning_rate": 1.1843485822588923e-05, "loss": 0.746, "step": 20790 }, { "epoch": 0.4467929716028698, "grad_norm": 0.5149818719484064, "learning_rate": 1.18367857145033e-05, "loss": 0.7511, "step": 20800 }, { "epoch": 0.4470077759161404, "grad_norm": 0.5038594272188172, "learning_rate": 1.1830084752959002e-05, "loss": 0.7601, "step": 20810 }, { "epoch": 0.447222580229411, "grad_norm": 0.5057367274782396, "learning_rate": 1.1823382941069618e-05, "loss": 0.7504, "step": 20820 }, { "epoch": 0.4474373845426816, "grad_norm": 0.5278200169568622, "learning_rate": 1.1816680281949132e-05, "loss": 0.734, "step": 20830 }, { "epoch": 0.4476521888559522, "grad_norm": 0.4932889583978271, "learning_rate": 1.1809976778711915e-05, "loss": 0.7427, "step": 20840 }, { "epoch": 0.44786699316922285, "grad_norm": 0.5254976142469129, "learning_rate": 1.180327243447274e-05, "loss": 0.7495, "step": 20850 }, { "epoch": 0.44808179748249344, "grad_norm": 0.5258601024362219, "learning_rate": 1.1796567252346766e-05, "loss": 0.751, "step": 20860 }, { "epoch": 0.4482966017957641, "grad_norm": 0.5361224414625771, "learning_rate": 1.1789861235449542e-05, "loss": 0.7354, "step": 20870 }, { "epoch": 0.44851140610903467, "grad_norm": 0.508201235290661, "learning_rate": 1.1783154386897008e-05, "loss": 0.7453, "step": 20880 }, { "epoch": 0.44872621042230526, "grad_norm": 0.5054092342839664, "learning_rate": 1.1776446709805482e-05, "loss": 0.7541, "step": 20890 }, { "epoch": 0.4489410147355759, "grad_norm": 0.5178562956293307, "learning_rate": 1.1769738207291674e-05, "loss": 0.7564, "step": 20900 }, { "epoch": 0.4491558190488465, "grad_norm": 0.5160643686715897, "learning_rate": 1.1763028882472678e-05, "loss": 0.7482, "step": 20910 }, { "epoch": 0.44937062336211714, "grad_norm": 0.5200060584249153, "learning_rate": 1.1756318738465963e-05, "loss": 0.7432, "step": 20920 }, { "epoch": 0.4495854276753877, "grad_norm": 0.5059948102109856, "learning_rate": 1.1749607778389386e-05, "loss": 0.7501, "step": 20930 }, { "epoch": 0.4498002319886583, "grad_norm": 0.5186746353091075, "learning_rate": 1.1742896005361186e-05, "loss": 0.7333, "step": 20940 }, { "epoch": 0.45001503630192896, "grad_norm": 0.5600841006515612, "learning_rate": 1.173618342249997e-05, "loss": 0.7478, "step": 20950 }, { "epoch": 0.45022984061519955, "grad_norm": 0.5766313324253101, "learning_rate": 1.1729470032924721e-05, "loss": 0.7432, "step": 20960 }, { "epoch": 0.45044464492847014, "grad_norm": 0.5166006082552747, "learning_rate": 1.1722755839754807e-05, "loss": 0.7473, "step": 20970 }, { "epoch": 0.4506594492417408, "grad_norm": 0.49578934781253853, "learning_rate": 1.1716040846109965e-05, "loss": 0.7479, "step": 20980 }, { "epoch": 0.4508742535550114, "grad_norm": 0.5016517079821076, "learning_rate": 1.1709325055110296e-05, "loss": 0.7349, "step": 20990 }, { "epoch": 0.451089057868282, "grad_norm": 0.5210845740181136, "learning_rate": 1.1702608469876288e-05, "loss": 0.7338, "step": 21000 }, { "epoch": 0.4513038621815526, "grad_norm": 0.5096278925929334, "learning_rate": 1.1695891093528785e-05, "loss": 0.7563, "step": 21010 }, { "epoch": 0.4515186664948232, "grad_norm": 0.5086441029859898, "learning_rate": 1.1689172929188997e-05, "loss": 0.748, "step": 21020 }, { "epoch": 0.45173347080809384, "grad_norm": 0.5387202509164648, "learning_rate": 1.1682453979978507e-05, "loss": 0.7562, "step": 21030 }, { "epoch": 0.45194827512136443, "grad_norm": 0.5167975539045492, "learning_rate": 1.1675734249019264e-05, "loss": 0.7459, "step": 21040 }, { "epoch": 0.4521630794346351, "grad_norm": 0.5094712446059221, "learning_rate": 1.1669013739433576e-05, "loss": 0.7372, "step": 21050 }, { "epoch": 0.45237788374790566, "grad_norm": 0.5467898326634167, "learning_rate": 1.1662292454344116e-05, "loss": 0.7407, "step": 21060 }, { "epoch": 0.45259268806117625, "grad_norm": 0.4989509970894457, "learning_rate": 1.1655570396873911e-05, "loss": 0.7469, "step": 21070 }, { "epoch": 0.4528074923744469, "grad_norm": 0.5500057146231173, "learning_rate": 1.1648847570146353e-05, "loss": 0.7385, "step": 21080 }, { "epoch": 0.4530222966877175, "grad_norm": 0.5119213457731346, "learning_rate": 1.1642123977285187e-05, "loss": 0.7535, "step": 21090 }, { "epoch": 0.4532371010009881, "grad_norm": 0.5347652595873134, "learning_rate": 1.163539962141452e-05, "loss": 0.7514, "step": 21100 }, { "epoch": 0.4534519053142587, "grad_norm": 0.5181244057297967, "learning_rate": 1.162867450565881e-05, "loss": 0.7448, "step": 21110 }, { "epoch": 0.4536667096275293, "grad_norm": 0.5097669753456439, "learning_rate": 1.1621948633142863e-05, "loss": 0.7512, "step": 21120 }, { "epoch": 0.45388151394079995, "grad_norm": 0.521551942346145, "learning_rate": 1.161522200699185e-05, "loss": 0.7335, "step": 21130 }, { "epoch": 0.45409631825407054, "grad_norm": 0.5183982050773465, "learning_rate": 1.1608494630331278e-05, "loss": 0.7389, "step": 21140 }, { "epoch": 0.45431112256734113, "grad_norm": 0.5005973180005574, "learning_rate": 1.1601766506287009e-05, "loss": 0.7244, "step": 21150 }, { "epoch": 0.4545259268806118, "grad_norm": 0.5510297098838621, "learning_rate": 1.1595037637985254e-05, "loss": 0.7363, "step": 21160 }, { "epoch": 0.45474073119388236, "grad_norm": 0.5172681651878976, "learning_rate": 1.1588308028552567e-05, "loss": 0.7469, "step": 21170 }, { "epoch": 0.454955535507153, "grad_norm": 0.5101036668126001, "learning_rate": 1.1581577681115844e-05, "loss": 0.7403, "step": 21180 }, { "epoch": 0.4551703398204236, "grad_norm": 0.5128096662612829, "learning_rate": 1.1574846598802331e-05, "loss": 0.7546, "step": 21190 }, { "epoch": 0.4553851441336942, "grad_norm": 0.500612279234514, "learning_rate": 1.1568114784739612e-05, "loss": 0.748, "step": 21200 }, { "epoch": 0.45559994844696483, "grad_norm": 0.5376520924082702, "learning_rate": 1.1561382242055607e-05, "loss": 0.7583, "step": 21210 }, { "epoch": 0.4558147527602354, "grad_norm": 0.536096912372685, "learning_rate": 1.1554648973878582e-05, "loss": 0.7516, "step": 21220 }, { "epoch": 0.456029557073506, "grad_norm": 0.5174865347901014, "learning_rate": 1.154791498333713e-05, "loss": 0.7433, "step": 21230 }, { "epoch": 0.45624436138677665, "grad_norm": 0.5344982319266313, "learning_rate": 1.154118027356019e-05, "loss": 0.7348, "step": 21240 }, { "epoch": 0.45645916570004724, "grad_norm": 0.49211553724020624, "learning_rate": 1.153444484767703e-05, "loss": 0.7431, "step": 21250 }, { "epoch": 0.4566739700133179, "grad_norm": 0.5309412807127188, "learning_rate": 1.1527708708817255e-05, "loss": 0.7592, "step": 21260 }, { "epoch": 0.4568887743265885, "grad_norm": 0.5266934403871943, "learning_rate": 1.1520971860110795e-05, "loss": 0.7467, "step": 21270 }, { "epoch": 0.45710357863985907, "grad_norm": 0.5113799966183405, "learning_rate": 1.1514234304687912e-05, "loss": 0.7492, "step": 21280 }, { "epoch": 0.4573183829531297, "grad_norm": 0.5265597304769074, "learning_rate": 1.1507496045679196e-05, "loss": 0.7577, "step": 21290 }, { "epoch": 0.4575331872664003, "grad_norm": 0.5011207398210541, "learning_rate": 1.1500757086215568e-05, "loss": 0.7358, "step": 21300 }, { "epoch": 0.45774799157967094, "grad_norm": 0.5197831944252082, "learning_rate": 1.1494017429428271e-05, "loss": 0.7402, "step": 21310 }, { "epoch": 0.45796279589294153, "grad_norm": 0.5051983633873826, "learning_rate": 1.1487277078448873e-05, "loss": 0.7386, "step": 21320 }, { "epoch": 0.4581776002062121, "grad_norm": 0.5177898532161163, "learning_rate": 1.1480536036409262e-05, "loss": 0.74, "step": 21330 }, { "epoch": 0.45839240451948277, "grad_norm": 0.5150685367159163, "learning_rate": 1.1473794306441652e-05, "loss": 0.7379, "step": 21340 }, { "epoch": 0.45860720883275335, "grad_norm": 0.5074160095947531, "learning_rate": 1.1467051891678568e-05, "loss": 0.7314, "step": 21350 }, { "epoch": 0.458822013146024, "grad_norm": 0.5145616285881711, "learning_rate": 1.1460308795252867e-05, "loss": 0.7399, "step": 21360 }, { "epoch": 0.4590368174592946, "grad_norm": 0.5160410203461876, "learning_rate": 1.145356502029771e-05, "loss": 0.7495, "step": 21370 }, { "epoch": 0.4592516217725652, "grad_norm": 0.5315723943002935, "learning_rate": 1.1446820569946581e-05, "loss": 0.75, "step": 21380 }, { "epoch": 0.4594664260858358, "grad_norm": 0.5106116756563384, "learning_rate": 1.1440075447333274e-05, "loss": 0.7417, "step": 21390 }, { "epoch": 0.4596812303991064, "grad_norm": 0.5122130397254849, "learning_rate": 1.1433329655591894e-05, "loss": 0.7449, "step": 21400 }, { "epoch": 0.459896034712377, "grad_norm": 0.5054378223152889, "learning_rate": 1.1426583197856858e-05, "loss": 0.7418, "step": 21410 }, { "epoch": 0.46011083902564764, "grad_norm": 0.5244487700127819, "learning_rate": 1.1419836077262899e-05, "loss": 0.7419, "step": 21420 }, { "epoch": 0.46032564333891823, "grad_norm": 0.5252805536342786, "learning_rate": 1.1413088296945048e-05, "loss": 0.7474, "step": 21430 }, { "epoch": 0.4605404476521889, "grad_norm": 0.5098516964749148, "learning_rate": 1.1406339860038648e-05, "loss": 0.7566, "step": 21440 }, { "epoch": 0.46075525196545947, "grad_norm": 0.53055505223306, "learning_rate": 1.139959076967935e-05, "loss": 0.7448, "step": 21450 }, { "epoch": 0.46097005627873006, "grad_norm": 0.5135560710510545, "learning_rate": 1.1392841029003102e-05, "loss": 0.74, "step": 21460 }, { "epoch": 0.4611848605920007, "grad_norm": 0.5214337220451569, "learning_rate": 1.1386090641146152e-05, "loss": 0.745, "step": 21470 }, { "epoch": 0.4613996649052713, "grad_norm": 0.5375224268150335, "learning_rate": 1.137933960924506e-05, "loss": 0.7442, "step": 21480 }, { "epoch": 0.46161446921854193, "grad_norm": 0.5139666564481621, "learning_rate": 1.1372587936436683e-05, "loss": 0.7419, "step": 21490 }, { "epoch": 0.4618292735318125, "grad_norm": 0.5559740898339312, "learning_rate": 1.1365835625858162e-05, "loss": 0.7544, "step": 21500 }, { "epoch": 0.4620440778450831, "grad_norm": 0.49830043641658905, "learning_rate": 1.1359082680646952e-05, "loss": 0.7341, "step": 21510 }, { "epoch": 0.46225888215835376, "grad_norm": 0.5125992039187067, "learning_rate": 1.1352329103940788e-05, "loss": 0.7559, "step": 21520 }, { "epoch": 0.46247368647162435, "grad_norm": 0.5194082716862066, "learning_rate": 1.1345574898877707e-05, "loss": 0.7439, "step": 21530 }, { "epoch": 0.46268849078489493, "grad_norm": 0.5147396997044464, "learning_rate": 1.1338820068596044e-05, "loss": 0.7511, "step": 21540 }, { "epoch": 0.4629032950981656, "grad_norm": 0.5392671660722685, "learning_rate": 1.1332064616234407e-05, "loss": 0.7653, "step": 21550 }, { "epoch": 0.46311809941143617, "grad_norm": 0.5201496726087629, "learning_rate": 1.1325308544931706e-05, "loss": 0.7381, "step": 21560 }, { "epoch": 0.4633329037247068, "grad_norm": 0.5180175465707748, "learning_rate": 1.131855185782714e-05, "loss": 0.7391, "step": 21570 }, { "epoch": 0.4635477080379774, "grad_norm": 0.5336962808784633, "learning_rate": 1.1311794558060186e-05, "loss": 0.7469, "step": 21580 }, { "epoch": 0.463762512351248, "grad_norm": 0.49650035979570856, "learning_rate": 1.1305036648770608e-05, "loss": 0.732, "step": 21590 }, { "epoch": 0.46397731666451864, "grad_norm": 0.5281758487503815, "learning_rate": 1.129827813309846e-05, "loss": 0.7528, "step": 21600 }, { "epoch": 0.4641921209777892, "grad_norm": 0.5234542589841056, "learning_rate": 1.1291519014184062e-05, "loss": 0.7247, "step": 21610 }, { "epoch": 0.46440692529105987, "grad_norm": 0.517114689702638, "learning_rate": 1.1284759295168035e-05, "loss": 0.7511, "step": 21620 }, { "epoch": 0.46462172960433046, "grad_norm": 0.5054831713408943, "learning_rate": 1.1277998979191262e-05, "loss": 0.7348, "step": 21630 }, { "epoch": 0.46483653391760105, "grad_norm": 0.5169749307688332, "learning_rate": 1.1271238069394916e-05, "loss": 0.7517, "step": 21640 }, { "epoch": 0.4650513382308717, "grad_norm": 0.5287808498540633, "learning_rate": 1.1264476568920434e-05, "loss": 0.7437, "step": 21650 }, { "epoch": 0.4652661425441423, "grad_norm": 0.5133827362263325, "learning_rate": 1.1257714480909538e-05, "loss": 0.7256, "step": 21660 }, { "epoch": 0.46548094685741287, "grad_norm": 0.4986667333468926, "learning_rate": 1.125095180850421e-05, "loss": 0.733, "step": 21670 }, { "epoch": 0.4656957511706835, "grad_norm": 0.5041449057994531, "learning_rate": 1.1244188554846722e-05, "loss": 0.739, "step": 21680 }, { "epoch": 0.4659105554839541, "grad_norm": 0.5091046285922776, "learning_rate": 1.1237424723079597e-05, "loss": 0.7352, "step": 21690 }, { "epoch": 0.46612535979722475, "grad_norm": 0.5135885820320195, "learning_rate": 1.1230660316345643e-05, "loss": 0.7329, "step": 21700 }, { "epoch": 0.46634016411049534, "grad_norm": 0.5200190247573033, "learning_rate": 1.1223895337787924e-05, "loss": 0.743, "step": 21710 }, { "epoch": 0.4665549684237659, "grad_norm": 0.5353216172156325, "learning_rate": 1.121712979054977e-05, "loss": 0.7483, "step": 21720 }, { "epoch": 0.46676977273703657, "grad_norm": 0.5082835412156764, "learning_rate": 1.1210363677774782e-05, "loss": 0.7464, "step": 21730 }, { "epoch": 0.46698457705030716, "grad_norm": 0.5280559944508908, "learning_rate": 1.1203597002606821e-05, "loss": 0.7537, "step": 21740 }, { "epoch": 0.4671993813635778, "grad_norm": 0.5260182340412108, "learning_rate": 1.1196829768190008e-05, "loss": 0.7474, "step": 21750 }, { "epoch": 0.4674141856768484, "grad_norm": 0.5318302840617868, "learning_rate": 1.1190061977668723e-05, "loss": 0.752, "step": 21760 }, { "epoch": 0.467628989990119, "grad_norm": 0.5202925069615598, "learning_rate": 1.1183293634187609e-05, "loss": 0.7395, "step": 21770 }, { "epoch": 0.4678437943033896, "grad_norm": 0.5420558362059565, "learning_rate": 1.1176524740891558e-05, "loss": 0.746, "step": 21780 }, { "epoch": 0.4680585986166602, "grad_norm": 0.5209989958223947, "learning_rate": 1.1169755300925723e-05, "loss": 0.7528, "step": 21790 }, { "epoch": 0.46827340292993086, "grad_norm": 0.5323335344870374, "learning_rate": 1.1162985317435514e-05, "loss": 0.7419, "step": 21800 }, { "epoch": 0.46848820724320145, "grad_norm": 0.5268811768109316, "learning_rate": 1.1156214793566591e-05, "loss": 0.7413, "step": 21810 }, { "epoch": 0.46870301155647204, "grad_norm": 0.5526549348147461, "learning_rate": 1.1149443732464858e-05, "loss": 0.7423, "step": 21820 }, { "epoch": 0.4689178158697427, "grad_norm": 0.5428671229053159, "learning_rate": 1.1142672137276478e-05, "loss": 0.7478, "step": 21830 }, { "epoch": 0.46913262018301327, "grad_norm": 0.513399124764161, "learning_rate": 1.1135900011147858e-05, "loss": 0.7479, "step": 21840 }, { "epoch": 0.46934742449628386, "grad_norm": 0.536835542346075, "learning_rate": 1.1129127357225648e-05, "loss": 0.7415, "step": 21850 }, { "epoch": 0.4695622288095545, "grad_norm": 0.517525876955086, "learning_rate": 1.1122354178656756e-05, "loss": 0.7454, "step": 21860 }, { "epoch": 0.4697770331228251, "grad_norm": 0.5016054637504687, "learning_rate": 1.111558047858832e-05, "loss": 0.7571, "step": 21870 }, { "epoch": 0.46999183743609574, "grad_norm": 0.5114598800325706, "learning_rate": 1.1108806260167727e-05, "loss": 0.728, "step": 21880 }, { "epoch": 0.4702066417493663, "grad_norm": 0.5092042135062025, "learning_rate": 1.1102031526542605e-05, "loss": 0.7356, "step": 21890 }, { "epoch": 0.4704214460626369, "grad_norm": 0.5223633189785332, "learning_rate": 1.109525628086082e-05, "loss": 0.7477, "step": 21900 }, { "epoch": 0.47063625037590756, "grad_norm": 0.5137541232678928, "learning_rate": 1.1088480526270472e-05, "loss": 0.7384, "step": 21910 }, { "epoch": 0.47085105468917815, "grad_norm": 0.5268764811281718, "learning_rate": 1.1081704265919904e-05, "loss": 0.7434, "step": 21920 }, { "epoch": 0.4710658590024488, "grad_norm": 0.5214178282194654, "learning_rate": 1.1074927502957688e-05, "loss": 0.747, "step": 21930 }, { "epoch": 0.4712806633157194, "grad_norm": 0.5150008031537309, "learning_rate": 1.1068150240532637e-05, "loss": 0.7427, "step": 21940 }, { "epoch": 0.47149546762899, "grad_norm": 0.5155181176356146, "learning_rate": 1.1061372481793793e-05, "loss": 0.735, "step": 21950 }, { "epoch": 0.4717102719422606, "grad_norm": 0.528603646425843, "learning_rate": 1.1054594229890425e-05, "loss": 0.7206, "step": 21960 }, { "epoch": 0.4719250762555312, "grad_norm": 0.5119688474719511, "learning_rate": 1.1047815487972034e-05, "loss": 0.7467, "step": 21970 }, { "epoch": 0.4721398805688018, "grad_norm": 0.5252997267209152, "learning_rate": 1.104103625918835e-05, "loss": 0.7466, "step": 21980 }, { "epoch": 0.47235468488207244, "grad_norm": 0.5257720369841751, "learning_rate": 1.1034256546689321e-05, "loss": 0.7479, "step": 21990 }, { "epoch": 0.47256948919534303, "grad_norm": 0.536029357234098, "learning_rate": 1.1027476353625132e-05, "loss": 0.7447, "step": 22000 }, { "epoch": 0.4727842935086137, "grad_norm": 0.5050633427695835, "learning_rate": 1.1020695683146188e-05, "loss": 0.7504, "step": 22010 }, { "epoch": 0.47299909782188426, "grad_norm": 0.514261629466442, "learning_rate": 1.101391453840311e-05, "loss": 0.7397, "step": 22020 }, { "epoch": 0.47321390213515485, "grad_norm": 0.5198813281021253, "learning_rate": 1.1007132922546743e-05, "loss": 0.7506, "step": 22030 }, { "epoch": 0.4734287064484255, "grad_norm": 0.5181411136523617, "learning_rate": 1.100035083872815e-05, "loss": 0.746, "step": 22040 }, { "epoch": 0.4736435107616961, "grad_norm": 0.5147323905400899, "learning_rate": 1.0993568290098615e-05, "loss": 0.7356, "step": 22050 }, { "epoch": 0.47385831507496673, "grad_norm": 0.526477837321041, "learning_rate": 1.0986785279809631e-05, "loss": 0.7412, "step": 22060 }, { "epoch": 0.4740731193882373, "grad_norm": 0.5313979721453921, "learning_rate": 1.0980001811012915e-05, "loss": 0.7336, "step": 22070 }, { "epoch": 0.4742879237015079, "grad_norm": 0.5170598479629712, "learning_rate": 1.0973217886860387e-05, "loss": 0.7238, "step": 22080 }, { "epoch": 0.47450272801477855, "grad_norm": 0.5221293133998406, "learning_rate": 1.0966433510504188e-05, "loss": 0.7344, "step": 22090 }, { "epoch": 0.47471753232804914, "grad_norm": 0.5190509203550013, "learning_rate": 1.0959648685096657e-05, "loss": 0.7536, "step": 22100 }, { "epoch": 0.47493233664131973, "grad_norm": 0.5103636926313977, "learning_rate": 1.0952863413790355e-05, "loss": 0.7493, "step": 22110 }, { "epoch": 0.4751471409545904, "grad_norm": 0.5017701329508603, "learning_rate": 1.0946077699738045e-05, "loss": 0.7218, "step": 22120 }, { "epoch": 0.47536194526786096, "grad_norm": 0.5334534885238904, "learning_rate": 1.093929154609269e-05, "loss": 0.7355, "step": 22130 }, { "epoch": 0.4755767495811316, "grad_norm": 0.5347297096552461, "learning_rate": 1.0932504956007468e-05, "loss": 0.7414, "step": 22140 }, { "epoch": 0.4757915538944022, "grad_norm": 0.5361415400554347, "learning_rate": 1.0925717932635751e-05, "loss": 0.7429, "step": 22150 }, { "epoch": 0.4760063582076728, "grad_norm": 0.5276150896460355, "learning_rate": 1.0918930479131114e-05, "loss": 0.7343, "step": 22160 }, { "epoch": 0.47622116252094343, "grad_norm": 0.5242096273960569, "learning_rate": 1.0912142598647332e-05, "loss": 0.7352, "step": 22170 }, { "epoch": 0.476435966834214, "grad_norm": 0.5124773180856481, "learning_rate": 1.0905354294338384e-05, "loss": 0.7458, "step": 22180 }, { "epoch": 0.47665077114748466, "grad_norm": 0.5009918414284297, "learning_rate": 1.089856556935844e-05, "loss": 0.7338, "step": 22190 }, { "epoch": 0.47686557546075525, "grad_norm": 0.5108245418067267, "learning_rate": 1.0891776426861868e-05, "loss": 0.7421, "step": 22200 }, { "epoch": 0.47708037977402584, "grad_norm": 0.509138834615506, "learning_rate": 1.0884986870003229e-05, "loss": 0.7431, "step": 22210 }, { "epoch": 0.4772951840872965, "grad_norm": 0.5272156276514611, "learning_rate": 1.0878196901937272e-05, "loss": 0.7522, "step": 22220 }, { "epoch": 0.4775099884005671, "grad_norm": 0.5053807372710165, "learning_rate": 1.0871406525818947e-05, "loss": 0.735, "step": 22230 }, { "epoch": 0.4777247927138377, "grad_norm": 0.5166733531977794, "learning_rate": 1.0864615744803382e-05, "loss": 0.7278, "step": 22240 }, { "epoch": 0.4779395970271083, "grad_norm": 0.5117417119746009, "learning_rate": 1.0857824562045907e-05, "loss": 0.7521, "step": 22250 }, { "epoch": 0.4781544013403789, "grad_norm": 0.5256764124583501, "learning_rate": 1.0851032980702025e-05, "loss": 0.7392, "step": 22260 }, { "epoch": 0.47836920565364954, "grad_norm": 0.5169468372261654, "learning_rate": 1.0844241003927433e-05, "loss": 0.7442, "step": 22270 }, { "epoch": 0.47858400996692013, "grad_norm": 0.5075813885039875, "learning_rate": 1.0837448634878011e-05, "loss": 0.7498, "step": 22280 }, { "epoch": 0.4787988142801907, "grad_norm": 0.49867951346910183, "learning_rate": 1.0830655876709817e-05, "loss": 0.7405, "step": 22290 }, { "epoch": 0.47901361859346137, "grad_norm": 0.549353416061091, "learning_rate": 1.0823862732579088e-05, "loss": 0.7356, "step": 22300 }, { "epoch": 0.47922842290673195, "grad_norm": 0.5233565980832593, "learning_rate": 1.081706920564225e-05, "loss": 0.7393, "step": 22310 }, { "epoch": 0.4794432272200026, "grad_norm": 0.5166833901430437, "learning_rate": 1.0810275299055899e-05, "loss": 0.7399, "step": 22320 }, { "epoch": 0.4796580315332732, "grad_norm": 0.5156376883464526, "learning_rate": 1.0803481015976809e-05, "loss": 0.7356, "step": 22330 }, { "epoch": 0.4798728358465438, "grad_norm": 0.5361312416973215, "learning_rate": 1.0796686359561931e-05, "loss": 0.7575, "step": 22340 }, { "epoch": 0.4800876401598144, "grad_norm": 0.52020980112163, "learning_rate": 1.0789891332968387e-05, "loss": 0.7236, "step": 22350 }, { "epoch": 0.480302444473085, "grad_norm": 0.5250129443878427, "learning_rate": 1.0783095939353474e-05, "loss": 0.7297, "step": 22360 }, { "epoch": 0.48051724878635566, "grad_norm": 0.5134420144953245, "learning_rate": 1.0776300181874654e-05, "loss": 0.7355, "step": 22370 }, { "epoch": 0.48073205309962624, "grad_norm": 0.5291422486339019, "learning_rate": 1.0769504063689564e-05, "loss": 0.7422, "step": 22380 }, { "epoch": 0.48094685741289683, "grad_norm": 0.5115283701598856, "learning_rate": 1.0762707587956005e-05, "loss": 0.7521, "step": 22390 }, { "epoch": 0.4811616617261675, "grad_norm": 0.5148143798916653, "learning_rate": 1.0755910757831949e-05, "loss": 0.7351, "step": 22400 }, { "epoch": 0.48137646603943807, "grad_norm": 0.5291173579659362, "learning_rate": 1.0749113576475525e-05, "loss": 0.7431, "step": 22410 }, { "epoch": 0.48159127035270866, "grad_norm": 0.5391435806265806, "learning_rate": 1.0742316047045029e-05, "loss": 0.7435, "step": 22420 }, { "epoch": 0.4818060746659793, "grad_norm": 0.5092459862659836, "learning_rate": 1.0735518172698922e-05, "loss": 0.7368, "step": 22430 }, { "epoch": 0.4820208789792499, "grad_norm": 0.5267935560236964, "learning_rate": 1.0728719956595818e-05, "loss": 0.7469, "step": 22440 }, { "epoch": 0.48223568329252053, "grad_norm": 0.5176688281994023, "learning_rate": 1.0721921401894498e-05, "loss": 0.7501, "step": 22450 }, { "epoch": 0.4824504876057911, "grad_norm": 0.5137388029359484, "learning_rate": 1.0715122511753897e-05, "loss": 0.7425, "step": 22460 }, { "epoch": 0.4826652919190617, "grad_norm": 0.5159930397277978, "learning_rate": 1.0708323289333102e-05, "loss": 0.7635, "step": 22470 }, { "epoch": 0.48288009623233236, "grad_norm": 0.5256807433908982, "learning_rate": 1.0701523737791359e-05, "loss": 0.7319, "step": 22480 }, { "epoch": 0.48309490054560295, "grad_norm": 0.5210214722370281, "learning_rate": 1.0694723860288063e-05, "loss": 0.7492, "step": 22490 }, { "epoch": 0.4833097048588736, "grad_norm": 0.5231744475769452, "learning_rate": 1.0687923659982766e-05, "loss": 0.735, "step": 22500 }, { "epoch": 0.4835245091721442, "grad_norm": 0.5011667972879604, "learning_rate": 1.068112314003517e-05, "loss": 0.7355, "step": 22510 }, { "epoch": 0.48373931348541477, "grad_norm": 0.5265184060707572, "learning_rate": 1.0674322303605115e-05, "loss": 0.7395, "step": 22520 }, { "epoch": 0.4839541177986854, "grad_norm": 0.5125129868588316, "learning_rate": 1.0667521153852603e-05, "loss": 0.7268, "step": 22530 }, { "epoch": 0.484168922111956, "grad_norm": 0.5220283396876785, "learning_rate": 1.0660719693937766e-05, "loss": 0.7373, "step": 22540 }, { "epoch": 0.48438372642522665, "grad_norm": 0.5091857205706148, "learning_rate": 1.0653917927020894e-05, "loss": 0.7402, "step": 22550 }, { "epoch": 0.48459853073849724, "grad_norm": 0.5107328961554215, "learning_rate": 1.0647115856262413e-05, "loss": 0.7306, "step": 22560 }, { "epoch": 0.4848133350517678, "grad_norm": 0.5133105974860892, "learning_rate": 1.0640313484822893e-05, "loss": 0.7324, "step": 22570 }, { "epoch": 0.48502813936503847, "grad_norm": 0.529832381078322, "learning_rate": 1.0633510815863036e-05, "loss": 0.744, "step": 22580 }, { "epoch": 0.48524294367830906, "grad_norm": 0.5223357765212729, "learning_rate": 1.0626707852543695e-05, "loss": 0.7363, "step": 22590 }, { "epoch": 0.48545774799157965, "grad_norm": 0.5300543063286008, "learning_rate": 1.0619904598025846e-05, "loss": 0.7604, "step": 22600 }, { "epoch": 0.4856725523048503, "grad_norm": 0.5379619059578166, "learning_rate": 1.0613101055470612e-05, "loss": 0.7536, "step": 22610 }, { "epoch": 0.4858873566181209, "grad_norm": 0.5133418467154645, "learning_rate": 1.0606297228039244e-05, "loss": 0.7463, "step": 22620 }, { "epoch": 0.4861021609313915, "grad_norm": 0.506231778810294, "learning_rate": 1.0599493118893122e-05, "loss": 0.7364, "step": 22630 }, { "epoch": 0.4863169652446621, "grad_norm": 0.5019381915489841, "learning_rate": 1.0592688731193768e-05, "loss": 0.7333, "step": 22640 }, { "epoch": 0.4865317695579327, "grad_norm": 0.5133937862155273, "learning_rate": 1.0585884068102824e-05, "loss": 0.7392, "step": 22650 }, { "epoch": 0.48674657387120335, "grad_norm": 0.5225027361367924, "learning_rate": 1.0579079132782061e-05, "loss": 0.7376, "step": 22660 }, { "epoch": 0.48696137818447394, "grad_norm": 0.5266639720499878, "learning_rate": 1.0572273928393379e-05, "loss": 0.7254, "step": 22670 }, { "epoch": 0.4871761824977446, "grad_norm": 0.5509818361008318, "learning_rate": 1.0565468458098806e-05, "loss": 0.7393, "step": 22680 }, { "epoch": 0.48739098681101517, "grad_norm": 0.5237573163610023, "learning_rate": 1.0558662725060483e-05, "loss": 0.7359, "step": 22690 }, { "epoch": 0.48760579112428576, "grad_norm": 0.5107223127670096, "learning_rate": 1.0551856732440681e-05, "loss": 0.7585, "step": 22700 }, { "epoch": 0.4878205954375564, "grad_norm": 0.5059159746679568, "learning_rate": 1.0545050483401793e-05, "loss": 0.7401, "step": 22710 }, { "epoch": 0.488035399750827, "grad_norm": 0.5200289327198926, "learning_rate": 1.0538243981106331e-05, "loss": 0.7498, "step": 22720 }, { "epoch": 0.4882502040640976, "grad_norm": 0.5189637119559256, "learning_rate": 1.0531437228716914e-05, "loss": 0.7482, "step": 22730 }, { "epoch": 0.4884650083773682, "grad_norm": 0.49679955732243036, "learning_rate": 1.0524630229396295e-05, "loss": 0.7446, "step": 22740 }, { "epoch": 0.4886798126906388, "grad_norm": 0.5207941387326519, "learning_rate": 1.0517822986307325e-05, "loss": 0.7624, "step": 22750 }, { "epoch": 0.48889461700390946, "grad_norm": 0.5286403629591689, "learning_rate": 1.0511015502612975e-05, "loss": 0.7414, "step": 22760 }, { "epoch": 0.48910942131718005, "grad_norm": 0.5120125001043839, "learning_rate": 1.0504207781476334e-05, "loss": 0.7366, "step": 22770 }, { "epoch": 0.48932422563045064, "grad_norm": 0.5179656319122231, "learning_rate": 1.0497399826060596e-05, "loss": 0.7418, "step": 22780 }, { "epoch": 0.4895390299437213, "grad_norm": 0.5366945858487391, "learning_rate": 1.0490591639529055e-05, "loss": 0.7357, "step": 22790 }, { "epoch": 0.48975383425699187, "grad_norm": 0.5154119510735722, "learning_rate": 1.0483783225045126e-05, "loss": 0.7219, "step": 22800 }, { "epoch": 0.4899686385702625, "grad_norm": 0.518597726407149, "learning_rate": 1.0476974585772323e-05, "loss": 0.741, "step": 22810 }, { "epoch": 0.4901834428835331, "grad_norm": 0.5140931629957429, "learning_rate": 1.047016572487427e-05, "loss": 0.7578, "step": 22820 }, { "epoch": 0.4903982471968037, "grad_norm": 0.5104752969998968, "learning_rate": 1.0463356645514687e-05, "loss": 0.7416, "step": 22830 }, { "epoch": 0.49061305151007434, "grad_norm": 0.5313548540682782, "learning_rate": 1.0456547350857397e-05, "loss": 0.7472, "step": 22840 }, { "epoch": 0.4908278558233449, "grad_norm": 0.5104717883570301, "learning_rate": 1.0449737844066332e-05, "loss": 0.7415, "step": 22850 }, { "epoch": 0.4910426601366155, "grad_norm": 0.5321738566400499, "learning_rate": 1.0442928128305504e-05, "loss": 0.7307, "step": 22860 }, { "epoch": 0.49125746444988616, "grad_norm": 0.5180000313964818, "learning_rate": 1.0436118206739044e-05, "loss": 0.7319, "step": 22870 }, { "epoch": 0.49147226876315675, "grad_norm": 0.5269259655740001, "learning_rate": 1.0429308082531157e-05, "loss": 0.7297, "step": 22880 }, { "epoch": 0.4916870730764274, "grad_norm": 0.5162542621179522, "learning_rate": 1.0422497758846166e-05, "loss": 0.7303, "step": 22890 }, { "epoch": 0.491901877389698, "grad_norm": 0.9082353493403971, "learning_rate": 1.0415687238848465e-05, "loss": 0.7393, "step": 22900 }, { "epoch": 0.4921166817029686, "grad_norm": 0.5249523721707448, "learning_rate": 1.040887652570255e-05, "loss": 0.7359, "step": 22910 }, { "epoch": 0.4923314860162392, "grad_norm": 0.5310314571240732, "learning_rate": 1.0402065622573003e-05, "loss": 0.7473, "step": 22920 }, { "epoch": 0.4925462903295098, "grad_norm": 0.5152700502053869, "learning_rate": 1.03952545326245e-05, "loss": 0.7386, "step": 22930 }, { "epoch": 0.49276109464278045, "grad_norm": 0.5137486693952514, "learning_rate": 1.0388443259021794e-05, "loss": 0.7416, "step": 22940 }, { "epoch": 0.49297589895605104, "grad_norm": 0.5147003203872301, "learning_rate": 1.0381631804929737e-05, "loss": 0.7558, "step": 22950 }, { "epoch": 0.49319070326932163, "grad_norm": 0.5150451603396607, "learning_rate": 1.0374820173513252e-05, "loss": 0.7323, "step": 22960 }, { "epoch": 0.4934055075825923, "grad_norm": 0.4998638447851373, "learning_rate": 1.0368008367937348e-05, "loss": 0.7394, "step": 22970 }, { "epoch": 0.49362031189586286, "grad_norm": 0.5065272447150604, "learning_rate": 1.036119639136712e-05, "loss": 0.7547, "step": 22980 }, { "epoch": 0.4938351162091335, "grad_norm": 0.5106286499648831, "learning_rate": 1.035438424696774e-05, "loss": 0.7497, "step": 22990 }, { "epoch": 0.4940499205224041, "grad_norm": 0.5101651671094659, "learning_rate": 1.0347571937904452e-05, "loss": 0.7363, "step": 23000 }, { "epoch": 0.4942647248356747, "grad_norm": 0.5243897522248346, "learning_rate": 1.0340759467342582e-05, "loss": 0.7444, "step": 23010 }, { "epoch": 0.49447952914894533, "grad_norm": 0.5402515262236681, "learning_rate": 1.0333946838447533e-05, "loss": 0.7315, "step": 23020 }, { "epoch": 0.4946943334622159, "grad_norm": 0.5051561251334392, "learning_rate": 1.0327134054384778e-05, "loss": 0.7332, "step": 23030 }, { "epoch": 0.4949091377754865, "grad_norm": 0.5685316863277247, "learning_rate": 1.0320321118319865e-05, "loss": 0.7501, "step": 23040 }, { "epoch": 0.49512394208875715, "grad_norm": 0.5057771401661005, "learning_rate": 1.0313508033418405e-05, "loss": 0.7315, "step": 23050 }, { "epoch": 0.49533874640202774, "grad_norm": 0.5152345284853551, "learning_rate": 1.0306694802846089e-05, "loss": 0.7327, "step": 23060 }, { "epoch": 0.4955535507152984, "grad_norm": 0.5021280830392914, "learning_rate": 1.0299881429768669e-05, "loss": 0.7376, "step": 23070 }, { "epoch": 0.495768355028569, "grad_norm": 0.5249378964815303, "learning_rate": 1.029306791735196e-05, "loss": 0.7422, "step": 23080 }, { "epoch": 0.49598315934183956, "grad_norm": 0.5032927828015555, "learning_rate": 1.0286254268761853e-05, "loss": 0.7412, "step": 23090 }, { "epoch": 0.4961979636551102, "grad_norm": 0.5120900752083057, "learning_rate": 1.0279440487164296e-05, "loss": 0.7411, "step": 23100 }, { "epoch": 0.4964127679683808, "grad_norm": 0.5194032778917269, "learning_rate": 1.0272626575725296e-05, "loss": 0.7422, "step": 23110 }, { "epoch": 0.49662757228165144, "grad_norm": 0.5051279599131995, "learning_rate": 1.0265812537610918e-05, "loss": 0.734, "step": 23120 }, { "epoch": 0.49684237659492203, "grad_norm": 0.524066136680626, "learning_rate": 1.0258998375987297e-05, "loss": 0.7471, "step": 23130 }, { "epoch": 0.4970571809081926, "grad_norm": 0.5212407578128498, "learning_rate": 1.025218409402062e-05, "loss": 0.7234, "step": 23140 }, { "epoch": 0.49727198522146326, "grad_norm": 0.5294461859743091, "learning_rate": 1.0245369694877121e-05, "loss": 0.7226, "step": 23150 }, { "epoch": 0.49748678953473385, "grad_norm": 0.5223634925273634, "learning_rate": 1.0238555181723108e-05, "loss": 0.7453, "step": 23160 }, { "epoch": 0.49770159384800444, "grad_norm": 0.5233047547217023, "learning_rate": 1.0231740557724922e-05, "loss": 0.7343, "step": 23170 }, { "epoch": 0.4979163981612751, "grad_norm": 0.507862147559393, "learning_rate": 1.0224925826048966e-05, "loss": 0.7274, "step": 23180 }, { "epoch": 0.4981312024745457, "grad_norm": 0.5155495724786578, "learning_rate": 1.0218110989861691e-05, "loss": 0.7443, "step": 23190 }, { "epoch": 0.4983460067878163, "grad_norm": 0.5206048961869586, "learning_rate": 1.0211296052329596e-05, "loss": 0.7242, "step": 23200 }, { "epoch": 0.4985608111010869, "grad_norm": 0.5119701323767093, "learning_rate": 1.020448101661923e-05, "loss": 0.7229, "step": 23210 }, { "epoch": 0.4987756154143575, "grad_norm": 0.5253915545306624, "learning_rate": 1.0197665885897184e-05, "loss": 0.7243, "step": 23220 }, { "epoch": 0.49899041972762814, "grad_norm": 0.4909170913393957, "learning_rate": 1.0190850663330093e-05, "loss": 0.7365, "step": 23230 }, { "epoch": 0.49920522404089873, "grad_norm": 0.513781488832539, "learning_rate": 1.0184035352084635e-05, "loss": 0.7399, "step": 23240 }, { "epoch": 0.4994200283541694, "grad_norm": 0.5186220905358521, "learning_rate": 1.0177219955327533e-05, "loss": 0.7416, "step": 23250 }, { "epoch": 0.49963483266743997, "grad_norm": 0.5273590434779399, "learning_rate": 1.0170404476225546e-05, "loss": 0.7388, "step": 23260 }, { "epoch": 0.49984963698071055, "grad_norm": 0.5270725741076642, "learning_rate": 1.0163588917945472e-05, "loss": 0.7465, "step": 23270 }, { "epoch": 0.5000644412939812, "grad_norm": 0.5115666946192222, "learning_rate": 1.0156773283654146e-05, "loss": 0.7514, "step": 23280 }, { "epoch": 0.5002792456072518, "grad_norm": 0.5140916715485198, "learning_rate": 1.0149957576518444e-05, "loss": 0.743, "step": 23290 }, { "epoch": 0.5004940499205224, "grad_norm": 0.5049649532600137, "learning_rate": 1.0143141799705259e-05, "loss": 0.7492, "step": 23300 }, { "epoch": 0.500708854233793, "grad_norm": 0.5148345236098256, "learning_rate": 1.0136325956381535e-05, "loss": 0.754, "step": 23310 }, { "epoch": 0.5009236585470637, "grad_norm": 0.5081943498814092, "learning_rate": 1.0129510049714238e-05, "loss": 0.7464, "step": 23320 }, { "epoch": 0.5011384628603343, "grad_norm": 0.5263481032653132, "learning_rate": 1.0122694082870365e-05, "loss": 0.7269, "step": 23330 }, { "epoch": 0.5013532671736048, "grad_norm": 0.49404517879695686, "learning_rate": 1.0115878059016942e-05, "loss": 0.7431, "step": 23340 }, { "epoch": 0.5015680714868754, "grad_norm": 0.5058326845907175, "learning_rate": 1.0109061981321015e-05, "loss": 0.7396, "step": 23350 }, { "epoch": 0.501782875800146, "grad_norm": 0.5264138844155276, "learning_rate": 1.0102245852949668e-05, "loss": 0.7262, "step": 23360 }, { "epoch": 0.5019976801134167, "grad_norm": 0.528096691604748, "learning_rate": 1.0095429677069997e-05, "loss": 0.7305, "step": 23370 }, { "epoch": 0.5022124844266873, "grad_norm": 0.5116467023890083, "learning_rate": 1.0088613456849125e-05, "loss": 0.7339, "step": 23380 }, { "epoch": 0.5024272887399579, "grad_norm": 0.4984116595301803, "learning_rate": 1.0081797195454193e-05, "loss": 0.734, "step": 23390 }, { "epoch": 0.5026420930532285, "grad_norm": 0.5172883195495444, "learning_rate": 1.0074980896052361e-05, "loss": 0.7202, "step": 23400 }, { "epoch": 0.5028568973664991, "grad_norm": 0.5069474447864731, "learning_rate": 1.0068164561810814e-05, "loss": 0.7261, "step": 23410 }, { "epoch": 0.5030717016797698, "grad_norm": 0.5339797128974847, "learning_rate": 1.0061348195896745e-05, "loss": 0.756, "step": 23420 }, { "epoch": 0.5032865059930404, "grad_norm": 0.5143777722194492, "learning_rate": 1.0054531801477364e-05, "loss": 0.7355, "step": 23430 }, { "epoch": 0.503501310306311, "grad_norm": 0.5374619747017549, "learning_rate": 1.0047715381719893e-05, "loss": 0.7168, "step": 23440 }, { "epoch": 0.5037161146195815, "grad_norm": 0.5141935837309727, "learning_rate": 1.004089893979157e-05, "loss": 0.7423, "step": 23450 }, { "epoch": 0.5039309189328521, "grad_norm": 0.5097667863863926, "learning_rate": 1.003408247885964e-05, "loss": 0.7273, "step": 23460 }, { "epoch": 0.5041457232461228, "grad_norm": 0.7070241417378597, "learning_rate": 1.0027266002091353e-05, "loss": 0.751, "step": 23470 }, { "epoch": 0.5043605275593934, "grad_norm": 0.5200900402474737, "learning_rate": 1.0020449512653978e-05, "loss": 0.7479, "step": 23480 }, { "epoch": 0.504575331872664, "grad_norm": 0.502245321750959, "learning_rate": 1.001363301371478e-05, "loss": 0.7344, "step": 23490 }, { "epoch": 0.5047901361859346, "grad_norm": 0.5102853378974559, "learning_rate": 1.0006816508441028e-05, "loss": 0.7261, "step": 23500 }, { "epoch": 0.5050049404992052, "grad_norm": 0.5139914043526608, "learning_rate": 1e-05, "loss": 0.7387, "step": 23510 }, { "epoch": 0.5052197448124758, "grad_norm": 0.5138651494123287, "learning_rate": 9.993183491558975e-06, "loss": 0.7448, "step": 23520 }, { "epoch": 0.5054345491257465, "grad_norm": 0.5138929240424788, "learning_rate": 9.986366986285222e-06, "loss": 0.7249, "step": 23530 }, { "epoch": 0.5056493534390171, "grad_norm": 0.5184056795842351, "learning_rate": 9.979550487346024e-06, "loss": 0.74, "step": 23540 }, { "epoch": 0.5058641577522877, "grad_norm": 0.5485760882777143, "learning_rate": 9.972733997908648e-06, "loss": 0.7288, "step": 23550 }, { "epoch": 0.5060789620655582, "grad_norm": 0.5106653968189862, "learning_rate": 9.965917521140365e-06, "loss": 0.7364, "step": 23560 }, { "epoch": 0.5062937663788288, "grad_norm": 0.49612689415307143, "learning_rate": 9.95910106020843e-06, "loss": 0.7324, "step": 23570 }, { "epoch": 0.5065085706920995, "grad_norm": 0.5297725980627347, "learning_rate": 9.952284618280108e-06, "loss": 0.7332, "step": 23580 }, { "epoch": 0.5067233750053701, "grad_norm": 0.5170669007337249, "learning_rate": 9.94546819852264e-06, "loss": 0.7483, "step": 23590 }, { "epoch": 0.5069381793186407, "grad_norm": 0.5038468226015915, "learning_rate": 9.938651804103257e-06, "loss": 0.7398, "step": 23600 }, { "epoch": 0.5071529836319113, "grad_norm": 0.5087783532039669, "learning_rate": 9.93183543818919e-06, "loss": 0.7325, "step": 23610 }, { "epoch": 0.5073677879451819, "grad_norm": 0.5160070069541578, "learning_rate": 9.925019103947639e-06, "loss": 0.7325, "step": 23620 }, { "epoch": 0.5075825922584526, "grad_norm": 0.5075365995460227, "learning_rate": 9.91820280454581e-06, "loss": 0.7553, "step": 23630 }, { "epoch": 0.5077973965717232, "grad_norm": 0.5267021157619796, "learning_rate": 9.91138654315088e-06, "loss": 0.7298, "step": 23640 }, { "epoch": 0.5080122008849938, "grad_norm": 0.5186830564197705, "learning_rate": 9.904570322930006e-06, "loss": 0.7556, "step": 23650 }, { "epoch": 0.5082270051982644, "grad_norm": 0.5210004208221275, "learning_rate": 9.897754147050335e-06, "loss": 0.7376, "step": 23660 }, { "epoch": 0.508441809511535, "grad_norm": 0.48685588400494934, "learning_rate": 9.890938018678985e-06, "loss": 0.7347, "step": 23670 }, { "epoch": 0.5086566138248056, "grad_norm": 0.5539745463393094, "learning_rate": 9.884121940983062e-06, "loss": 0.7515, "step": 23680 }, { "epoch": 0.5088714181380762, "grad_norm": 0.514283104621466, "learning_rate": 9.877305917129636e-06, "loss": 0.7456, "step": 23690 }, { "epoch": 0.5090862224513468, "grad_norm": 0.5050582130270673, "learning_rate": 9.870489950285765e-06, "loss": 0.7217, "step": 23700 }, { "epoch": 0.5093010267646174, "grad_norm": 0.5153338859894899, "learning_rate": 9.86367404361847e-06, "loss": 0.7345, "step": 23710 }, { "epoch": 0.509515831077888, "grad_norm": 0.5292429611522316, "learning_rate": 9.856858200294742e-06, "loss": 0.7455, "step": 23720 }, { "epoch": 0.5097306353911587, "grad_norm": 0.535174907505527, "learning_rate": 9.850042423481561e-06, "loss": 0.7321, "step": 23730 }, { "epoch": 0.5099454397044293, "grad_norm": 0.5137588499191782, "learning_rate": 9.843226716345852e-06, "loss": 0.7283, "step": 23740 }, { "epoch": 0.5101602440176999, "grad_norm": 0.5261263905458244, "learning_rate": 9.83641108205453e-06, "loss": 0.7356, "step": 23750 }, { "epoch": 0.5103750483309705, "grad_norm": 0.5057280317999486, "learning_rate": 9.829595523774456e-06, "loss": 0.7346, "step": 23760 }, { "epoch": 0.5105898526442411, "grad_norm": 0.5366500418456487, "learning_rate": 9.82278004467247e-06, "loss": 0.7353, "step": 23770 }, { "epoch": 0.5108046569575118, "grad_norm": 0.5064327805142302, "learning_rate": 9.81596464791537e-06, "loss": 0.7224, "step": 23780 }, { "epoch": 0.5110194612707823, "grad_norm": 0.48473823245015357, "learning_rate": 9.80914933666991e-06, "loss": 0.7302, "step": 23790 }, { "epoch": 0.5112342655840529, "grad_norm": 0.530367936435279, "learning_rate": 9.802334114102821e-06, "loss": 0.723, "step": 23800 }, { "epoch": 0.5114490698973235, "grad_norm": 0.53191867394961, "learning_rate": 9.795518983380771e-06, "loss": 0.7322, "step": 23810 }, { "epoch": 0.5116638742105941, "grad_norm": 0.5145552336460878, "learning_rate": 9.788703947670407e-06, "loss": 0.7174, "step": 23820 }, { "epoch": 0.5118786785238647, "grad_norm": 0.5160752379900866, "learning_rate": 9.781889010138315e-06, "loss": 0.7371, "step": 23830 }, { "epoch": 0.5120934828371354, "grad_norm": 0.5215561353383581, "learning_rate": 9.775074173951038e-06, "loss": 0.7386, "step": 23840 }, { "epoch": 0.512308287150406, "grad_norm": 0.546987216601412, "learning_rate": 9.768259442275083e-06, "loss": 0.7252, "step": 23850 }, { "epoch": 0.5125230914636766, "grad_norm": 0.5250432835086648, "learning_rate": 9.761444818276895e-06, "loss": 0.7326, "step": 23860 }, { "epoch": 0.5127378957769472, "grad_norm": 0.5201485746022573, "learning_rate": 9.75463030512288e-06, "loss": 0.7361, "step": 23870 }, { "epoch": 0.5129527000902178, "grad_norm": 0.5161112017032182, "learning_rate": 9.747815905979382e-06, "loss": 0.7175, "step": 23880 }, { "epoch": 0.5131675044034885, "grad_norm": 0.5194134872899566, "learning_rate": 9.741001624012706e-06, "loss": 0.7306, "step": 23890 }, { "epoch": 0.513382308716759, "grad_norm": 0.501372178188598, "learning_rate": 9.734187462389086e-06, "loss": 0.7359, "step": 23900 }, { "epoch": 0.5135971130300296, "grad_norm": 0.5021971430887777, "learning_rate": 9.72737342427471e-06, "loss": 0.7423, "step": 23910 }, { "epoch": 0.5138119173433002, "grad_norm": 0.501620537070542, "learning_rate": 9.720559512835708e-06, "loss": 0.7262, "step": 23920 }, { "epoch": 0.5140267216565708, "grad_norm": 0.5022999836926363, "learning_rate": 9.713745731238147e-06, "loss": 0.7331, "step": 23930 }, { "epoch": 0.5142415259698415, "grad_norm": 0.48851644845547765, "learning_rate": 9.706932082648043e-06, "loss": 0.7392, "step": 23940 }, { "epoch": 0.5144563302831121, "grad_norm": 0.5066291795790192, "learning_rate": 9.700118570231333e-06, "loss": 0.7338, "step": 23950 }, { "epoch": 0.5146711345963827, "grad_norm": 0.5589181767659236, "learning_rate": 9.693305197153914e-06, "loss": 0.7272, "step": 23960 }, { "epoch": 0.5148859389096533, "grad_norm": 0.5241863149299513, "learning_rate": 9.686491966581598e-06, "loss": 0.7308, "step": 23970 }, { "epoch": 0.5151007432229239, "grad_norm": 0.540530773434582, "learning_rate": 9.679678881680138e-06, "loss": 0.7351, "step": 23980 }, { "epoch": 0.5153155475361946, "grad_norm": 0.5390573047706596, "learning_rate": 9.672865945615225e-06, "loss": 0.7376, "step": 23990 }, { "epoch": 0.5155303518494652, "grad_norm": 0.5375969313392924, "learning_rate": 9.666053161552467e-06, "loss": 0.7415, "step": 24000 }, { "epoch": 0.5157451561627358, "grad_norm": 0.5327773082890134, "learning_rate": 9.65924053265742e-06, "loss": 0.7533, "step": 24010 }, { "epoch": 0.5159599604760063, "grad_norm": 0.5150496428189917, "learning_rate": 9.652428062095553e-06, "loss": 0.7342, "step": 24020 }, { "epoch": 0.5161747647892769, "grad_norm": 0.5334047289102489, "learning_rate": 9.645615753032264e-06, "loss": 0.7336, "step": 24030 }, { "epoch": 0.5163895691025476, "grad_norm": 0.4961872565305141, "learning_rate": 9.638803608632883e-06, "loss": 0.7264, "step": 24040 }, { "epoch": 0.5166043734158182, "grad_norm": 0.4911939322989355, "learning_rate": 9.631991632062652e-06, "loss": 0.7257, "step": 24050 }, { "epoch": 0.5168191777290888, "grad_norm": 0.5200979968551847, "learning_rate": 9.625179826486752e-06, "loss": 0.7361, "step": 24060 }, { "epoch": 0.5170339820423594, "grad_norm": 0.5127022024427715, "learning_rate": 9.618368195070265e-06, "loss": 0.7463, "step": 24070 }, { "epoch": 0.51724878635563, "grad_norm": 0.5185873242375817, "learning_rate": 9.611556740978208e-06, "loss": 0.741, "step": 24080 }, { "epoch": 0.5174635906689007, "grad_norm": 0.5115026188640582, "learning_rate": 9.604745467375507e-06, "loss": 0.727, "step": 24090 }, { "epoch": 0.5176783949821713, "grad_norm": 0.5114651615343405, "learning_rate": 9.597934377427e-06, "loss": 0.7401, "step": 24100 }, { "epoch": 0.5178931992954419, "grad_norm": 0.5155760878770381, "learning_rate": 9.591123474297456e-06, "loss": 0.7316, "step": 24110 }, { "epoch": 0.5181080036087125, "grad_norm": 0.5207602050012875, "learning_rate": 9.584312761151537e-06, "loss": 0.7303, "step": 24120 }, { "epoch": 0.518322807921983, "grad_norm": 0.5101795182054067, "learning_rate": 9.577502241153836e-06, "loss": 0.7541, "step": 24130 }, { "epoch": 0.5185376122352536, "grad_norm": 0.5031850419195654, "learning_rate": 9.570691917468841e-06, "loss": 0.7178, "step": 24140 }, { "epoch": 0.5187524165485243, "grad_norm": 0.5059459890339639, "learning_rate": 9.563881793260961e-06, "loss": 0.74, "step": 24150 }, { "epoch": 0.5189672208617949, "grad_norm": 0.5252032527071196, "learning_rate": 9.5570718716945e-06, "loss": 0.724, "step": 24160 }, { "epoch": 0.5191820251750655, "grad_norm": 0.49701627155963785, "learning_rate": 9.55026215593367e-06, "loss": 0.7352, "step": 24170 }, { "epoch": 0.5193968294883361, "grad_norm": 0.5011050975997907, "learning_rate": 9.543452649142605e-06, "loss": 0.7114, "step": 24180 }, { "epoch": 0.5196116338016067, "grad_norm": 0.5166863959564101, "learning_rate": 9.536643354485315e-06, "loss": 0.7287, "step": 24190 }, { "epoch": 0.5198264381148774, "grad_norm": 0.5320535495138456, "learning_rate": 9.529834275125733e-06, "loss": 0.7318, "step": 24200 }, { "epoch": 0.520041242428148, "grad_norm": 0.5427362518912576, "learning_rate": 9.52302541422768e-06, "loss": 0.7383, "step": 24210 }, { "epoch": 0.5202560467414186, "grad_norm": 0.518569825254274, "learning_rate": 9.516216774954876e-06, "loss": 0.7255, "step": 24220 }, { "epoch": 0.5204708510546892, "grad_norm": 0.5023215208325698, "learning_rate": 9.50940836047095e-06, "loss": 0.7217, "step": 24230 }, { "epoch": 0.5206856553679597, "grad_norm": 0.5280315511540243, "learning_rate": 9.50260017393941e-06, "loss": 0.7396, "step": 24240 }, { "epoch": 0.5209004596812304, "grad_norm": 0.5262868684024251, "learning_rate": 9.495792218523668e-06, "loss": 0.7404, "step": 24250 }, { "epoch": 0.521115263994501, "grad_norm": 0.5399291263932274, "learning_rate": 9.488984497387023e-06, "loss": 0.7301, "step": 24260 }, { "epoch": 0.5213300683077716, "grad_norm": 0.5090315167154729, "learning_rate": 9.482177013692678e-06, "loss": 0.7246, "step": 24270 }, { "epoch": 0.5215448726210422, "grad_norm": 0.5056887738360717, "learning_rate": 9.47536977060371e-06, "loss": 0.7354, "step": 24280 }, { "epoch": 0.5217596769343128, "grad_norm": 0.5576273098938848, "learning_rate": 9.468562771283088e-06, "loss": 0.7494, "step": 24290 }, { "epoch": 0.5219744812475835, "grad_norm": 0.5192891146116362, "learning_rate": 9.461756018893674e-06, "loss": 0.7231, "step": 24300 }, { "epoch": 0.5221892855608541, "grad_norm": 0.5176441563361027, "learning_rate": 9.454949516598207e-06, "loss": 0.7508, "step": 24310 }, { "epoch": 0.5224040898741247, "grad_norm": 0.5440570683474872, "learning_rate": 9.448143267559322e-06, "loss": 0.7358, "step": 24320 }, { "epoch": 0.5226188941873953, "grad_norm": 0.5095332879054673, "learning_rate": 9.441337274939519e-06, "loss": 0.7388, "step": 24330 }, { "epoch": 0.5228336985006659, "grad_norm": 0.5097526753015743, "learning_rate": 9.434531541901197e-06, "loss": 0.7306, "step": 24340 }, { "epoch": 0.5230485028139366, "grad_norm": 0.5232789277714874, "learning_rate": 9.427726071606623e-06, "loss": 0.7242, "step": 24350 }, { "epoch": 0.5232633071272071, "grad_norm": 0.5054634510771875, "learning_rate": 9.42092086721794e-06, "loss": 0.7254, "step": 24360 }, { "epoch": 0.5234781114404777, "grad_norm": 0.532869087530748, "learning_rate": 9.41411593189718e-06, "loss": 0.7344, "step": 24370 }, { "epoch": 0.5236929157537483, "grad_norm": 0.5096100205368526, "learning_rate": 9.407311268806232e-06, "loss": 0.7313, "step": 24380 }, { "epoch": 0.5239077200670189, "grad_norm": 0.5296911724090648, "learning_rate": 9.40050688110688e-06, "loss": 0.7265, "step": 24390 }, { "epoch": 0.5241225243802896, "grad_norm": 0.5213949606620313, "learning_rate": 9.393702771960763e-06, "loss": 0.7306, "step": 24400 }, { "epoch": 0.5243373286935602, "grad_norm": 0.518401803230462, "learning_rate": 9.386898944529392e-06, "loss": 0.7404, "step": 24410 }, { "epoch": 0.5245521330068308, "grad_norm": 0.519834518279471, "learning_rate": 9.380095401974159e-06, "loss": 0.7409, "step": 24420 }, { "epoch": 0.5247669373201014, "grad_norm": 0.5243292050356617, "learning_rate": 9.373292147456309e-06, "loss": 0.7264, "step": 24430 }, { "epoch": 0.524981741633372, "grad_norm": 0.5058736452889017, "learning_rate": 9.366489184136966e-06, "loss": 0.7207, "step": 24440 }, { "epoch": 0.5251965459466426, "grad_norm": 0.5213210419850763, "learning_rate": 9.35968651517711e-06, "loss": 0.746, "step": 24450 }, { "epoch": 0.5254113502599133, "grad_norm": 0.4984379629009062, "learning_rate": 9.35288414373759e-06, "loss": 0.7372, "step": 24460 }, { "epoch": 0.5256261545731838, "grad_norm": 0.524707210819013, "learning_rate": 9.346082072979111e-06, "loss": 0.7375, "step": 24470 }, { "epoch": 0.5258409588864544, "grad_norm": 0.5200185641061693, "learning_rate": 9.339280306062237e-06, "loss": 0.7366, "step": 24480 }, { "epoch": 0.526055763199725, "grad_norm": 0.5277055511224572, "learning_rate": 9.332478846147404e-06, "loss": 0.7346, "step": 24490 }, { "epoch": 0.5262705675129956, "grad_norm": 0.5580648886270715, "learning_rate": 9.325677696394887e-06, "loss": 0.7513, "step": 24500 }, { "epoch": 0.5264853718262663, "grad_norm": 0.4991201127455899, "learning_rate": 9.318876859964832e-06, "loss": 0.738, "step": 24510 }, { "epoch": 0.5267001761395369, "grad_norm": 0.5098501461011234, "learning_rate": 9.312076340017232e-06, "loss": 0.7287, "step": 24520 }, { "epoch": 0.5269149804528075, "grad_norm": 0.6021201034831747, "learning_rate": 9.30527613971194e-06, "loss": 0.7499, "step": 24530 }, { "epoch": 0.5271297847660781, "grad_norm": 0.5252128416172374, "learning_rate": 9.298476262208646e-06, "loss": 0.7326, "step": 24540 }, { "epoch": 0.5273445890793487, "grad_norm": 0.49998760334312775, "learning_rate": 9.2916767106669e-06, "loss": 0.7307, "step": 24550 }, { "epoch": 0.5275593933926194, "grad_norm": 0.5125373956927092, "learning_rate": 9.284877488246105e-06, "loss": 0.7298, "step": 24560 }, { "epoch": 0.52777419770589, "grad_norm": 0.5080343316486924, "learning_rate": 9.278078598105502e-06, "loss": 0.7345, "step": 24570 }, { "epoch": 0.5279890020191605, "grad_norm": 0.5174244580842269, "learning_rate": 9.271280043404185e-06, "loss": 0.7332, "step": 24580 }, { "epoch": 0.5282038063324311, "grad_norm": 0.5058297456693376, "learning_rate": 9.264481827301083e-06, "loss": 0.734, "step": 24590 }, { "epoch": 0.5284186106457017, "grad_norm": 0.5292500345044558, "learning_rate": 9.257683952954973e-06, "loss": 0.7409, "step": 24600 }, { "epoch": 0.5286334149589724, "grad_norm": 0.5225245379527742, "learning_rate": 9.25088642352448e-06, "loss": 0.7321, "step": 24610 }, { "epoch": 0.528848219272243, "grad_norm": 0.5224232644015462, "learning_rate": 9.244089242168055e-06, "loss": 0.7172, "step": 24620 }, { "epoch": 0.5290630235855136, "grad_norm": 0.5062021193853431, "learning_rate": 9.237292412043997e-06, "loss": 0.736, "step": 24630 }, { "epoch": 0.5292778278987842, "grad_norm": 0.5295018228622798, "learning_rate": 9.230495936310436e-06, "loss": 0.7307, "step": 24640 }, { "epoch": 0.5294926322120548, "grad_norm": 0.5267263552657305, "learning_rate": 9.223699818125348e-06, "loss": 0.7345, "step": 24650 }, { "epoch": 0.5297074365253255, "grad_norm": 0.5311126333449129, "learning_rate": 9.21690406064653e-06, "loss": 0.7405, "step": 24660 }, { "epoch": 0.5299222408385961, "grad_norm": 0.5153915101075538, "learning_rate": 9.210108667031616e-06, "loss": 0.7391, "step": 24670 }, { "epoch": 0.5301370451518667, "grad_norm": 0.5151283148764161, "learning_rate": 9.203313640438074e-06, "loss": 0.7148, "step": 24680 }, { "epoch": 0.5303518494651372, "grad_norm": 0.5089214087459585, "learning_rate": 9.196518984023191e-06, "loss": 0.7539, "step": 24690 }, { "epoch": 0.5305666537784078, "grad_norm": 0.5127764385700229, "learning_rate": 9.189724700944104e-06, "loss": 0.734, "step": 24700 }, { "epoch": 0.5307814580916784, "grad_norm": 0.4988269906652733, "learning_rate": 9.182930794357749e-06, "loss": 0.7217, "step": 24710 }, { "epoch": 0.5309962624049491, "grad_norm": 0.5289065899803795, "learning_rate": 9.176137267420913e-06, "loss": 0.7228, "step": 24720 }, { "epoch": 0.5312110667182197, "grad_norm": 0.520505526330622, "learning_rate": 9.169344123290186e-06, "loss": 0.7126, "step": 24730 }, { "epoch": 0.5314258710314903, "grad_norm": 0.4998804860251492, "learning_rate": 9.16255136512199e-06, "loss": 0.7288, "step": 24740 }, { "epoch": 0.5316406753447609, "grad_norm": 0.505080866954583, "learning_rate": 9.155758996072568e-06, "loss": 0.7338, "step": 24750 }, { "epoch": 0.5318554796580315, "grad_norm": 0.4987180793184345, "learning_rate": 9.148967019297973e-06, "loss": 0.7137, "step": 24760 }, { "epoch": 0.5320702839713022, "grad_norm": 0.5197695735697442, "learning_rate": 9.142175437954095e-06, "loss": 0.7395, "step": 24770 }, { "epoch": 0.5322850882845728, "grad_norm": 0.5291148961767198, "learning_rate": 9.13538425519662e-06, "loss": 0.7339, "step": 24780 }, { "epoch": 0.5324998925978434, "grad_norm": 0.5129546756881223, "learning_rate": 9.128593474181058e-06, "loss": 0.7253, "step": 24790 }, { "epoch": 0.532714696911114, "grad_norm": 0.5032558888699209, "learning_rate": 9.121803098062732e-06, "loss": 0.728, "step": 24800 }, { "epoch": 0.5329295012243845, "grad_norm": 0.5159297096786016, "learning_rate": 9.115013129996774e-06, "loss": 0.7179, "step": 24810 }, { "epoch": 0.5331443055376552, "grad_norm": 0.5167687353856899, "learning_rate": 9.108223573138133e-06, "loss": 0.7252, "step": 24820 }, { "epoch": 0.5333591098509258, "grad_norm": 0.5259491525566676, "learning_rate": 9.101434430641561e-06, "loss": 0.7201, "step": 24830 }, { "epoch": 0.5335739141641964, "grad_norm": 0.5236412827950238, "learning_rate": 9.09464570566162e-06, "loss": 0.7237, "step": 24840 }, { "epoch": 0.533788718477467, "grad_norm": 0.5181715919819297, "learning_rate": 9.087857401352673e-06, "loss": 0.7275, "step": 24850 }, { "epoch": 0.5340035227907376, "grad_norm": 0.5281987861982105, "learning_rate": 9.081069520868891e-06, "loss": 0.7338, "step": 24860 }, { "epoch": 0.5342183271040083, "grad_norm": 0.5075187850648184, "learning_rate": 9.074282067364254e-06, "loss": 0.7287, "step": 24870 }, { "epoch": 0.5344331314172789, "grad_norm": 0.505210251407526, "learning_rate": 9.067495043992532e-06, "loss": 0.7448, "step": 24880 }, { "epoch": 0.5346479357305495, "grad_norm": 0.5045253045306024, "learning_rate": 9.060708453907312e-06, "loss": 0.7124, "step": 24890 }, { "epoch": 0.5348627400438201, "grad_norm": 0.5332241555538618, "learning_rate": 9.053922300261957e-06, "loss": 0.7295, "step": 24900 }, { "epoch": 0.5350775443570907, "grad_norm": 0.5242064871975776, "learning_rate": 9.047136586209646e-06, "loss": 0.7305, "step": 24910 }, { "epoch": 0.5352923486703614, "grad_norm": 0.5213752485486699, "learning_rate": 9.040351314903346e-06, "loss": 0.7344, "step": 24920 }, { "epoch": 0.5355071529836319, "grad_norm": 0.5279860659235984, "learning_rate": 9.033566489495815e-06, "loss": 0.7149, "step": 24930 }, { "epoch": 0.5357219572969025, "grad_norm": 0.5153059474379569, "learning_rate": 9.026782113139614e-06, "loss": 0.7272, "step": 24940 }, { "epoch": 0.5359367616101731, "grad_norm": 0.502633555256086, "learning_rate": 9.019998188987087e-06, "loss": 0.7358, "step": 24950 }, { "epoch": 0.5361515659234437, "grad_norm": 0.5227824018715104, "learning_rate": 9.01321472019037e-06, "loss": 0.7322, "step": 24960 }, { "epoch": 0.5363663702367144, "grad_norm": 0.5096066611505108, "learning_rate": 9.006431709901385e-06, "loss": 0.7291, "step": 24970 }, { "epoch": 0.536581174549985, "grad_norm": 0.5050380085409679, "learning_rate": 8.999649161271851e-06, "loss": 0.7383, "step": 24980 }, { "epoch": 0.5367959788632556, "grad_norm": 0.5098544768515563, "learning_rate": 8.99286707745326e-06, "loss": 0.7296, "step": 24990 }, { "epoch": 0.5370107831765262, "grad_norm": 0.49596489778805924, "learning_rate": 8.986085461596892e-06, "loss": 0.7448, "step": 25000 }, { "epoch": 0.5372255874897968, "grad_norm": 0.524177361015968, "learning_rate": 8.979304316853816e-06, "loss": 0.7223, "step": 25010 }, { "epoch": 0.5374403918030674, "grad_norm": 0.5275256334675215, "learning_rate": 8.972523646374868e-06, "loss": 0.7155, "step": 25020 }, { "epoch": 0.537655196116338, "grad_norm": 0.5230415273183412, "learning_rate": 8.965743453310682e-06, "loss": 0.7244, "step": 25030 }, { "epoch": 0.5378700004296086, "grad_norm": 0.5056528573977891, "learning_rate": 8.958963740811657e-06, "loss": 0.7373, "step": 25040 }, { "epoch": 0.5380848047428792, "grad_norm": 0.5129477293931691, "learning_rate": 8.952184512027971e-06, "loss": 0.7409, "step": 25050 }, { "epoch": 0.5382996090561498, "grad_norm": 0.5254219239381808, "learning_rate": 8.94540577010958e-06, "loss": 0.7289, "step": 25060 }, { "epoch": 0.5385144133694204, "grad_norm": 0.5137037649048417, "learning_rate": 8.938627518206207e-06, "loss": 0.7346, "step": 25070 }, { "epoch": 0.5387292176826911, "grad_norm": 0.501166734367294, "learning_rate": 8.931849759467364e-06, "loss": 0.7376, "step": 25080 }, { "epoch": 0.5389440219959617, "grad_norm": 0.5158686563553051, "learning_rate": 8.925072497042312e-06, "loss": 0.7246, "step": 25090 }, { "epoch": 0.5391588263092323, "grad_norm": 0.5064258891872856, "learning_rate": 8.9182957340801e-06, "loss": 0.7224, "step": 25100 }, { "epoch": 0.5393736306225029, "grad_norm": 0.5109514152684812, "learning_rate": 8.911519473729533e-06, "loss": 0.7258, "step": 25110 }, { "epoch": 0.5395884349357735, "grad_norm": 0.4965294744044547, "learning_rate": 8.904743719139184e-06, "loss": 0.7231, "step": 25120 }, { "epoch": 0.5398032392490442, "grad_norm": 0.5163978388155885, "learning_rate": 8.897968473457397e-06, "loss": 0.7213, "step": 25130 }, { "epoch": 0.5400180435623148, "grad_norm": 0.5179202460764463, "learning_rate": 8.89119373983227e-06, "loss": 0.7333, "step": 25140 }, { "epoch": 0.5402328478755853, "grad_norm": 0.5267218520730645, "learning_rate": 8.884419521411681e-06, "loss": 0.7271, "step": 25150 }, { "epoch": 0.5404476521888559, "grad_norm": 0.49970760568396944, "learning_rate": 8.877645821343245e-06, "loss": 0.7134, "step": 25160 }, { "epoch": 0.5406624565021265, "grad_norm": 0.5110857110324059, "learning_rate": 8.870872642774354e-06, "loss": 0.7371, "step": 25170 }, { "epoch": 0.5408772608153972, "grad_norm": 0.524857212027166, "learning_rate": 8.864099988852149e-06, "loss": 0.7303, "step": 25180 }, { "epoch": 0.5410920651286678, "grad_norm": 0.5056157960738001, "learning_rate": 8.857327862723524e-06, "loss": 0.7163, "step": 25190 }, { "epoch": 0.5413068694419384, "grad_norm": 0.4958979488976298, "learning_rate": 8.850556267535146e-06, "loss": 0.7364, "step": 25200 }, { "epoch": 0.541521673755209, "grad_norm": 0.5132188143017179, "learning_rate": 8.843785206433412e-06, "loss": 0.7335, "step": 25210 }, { "epoch": 0.5417364780684796, "grad_norm": 0.4994335318852988, "learning_rate": 8.837014682564487e-06, "loss": 0.7257, "step": 25220 }, { "epoch": 0.5419512823817503, "grad_norm": 0.5384327760658743, "learning_rate": 8.83024469907428e-06, "loss": 0.7351, "step": 25230 }, { "epoch": 0.5421660866950209, "grad_norm": 0.5261027642763866, "learning_rate": 8.823475259108445e-06, "loss": 0.7356, "step": 25240 }, { "epoch": 0.5423808910082915, "grad_norm": 0.5060595175869693, "learning_rate": 8.816706365812396e-06, "loss": 0.7136, "step": 25250 }, { "epoch": 0.542595695321562, "grad_norm": 0.5089315623590392, "learning_rate": 8.809938022331279e-06, "loss": 0.7582, "step": 25260 }, { "epoch": 0.5428104996348326, "grad_norm": 0.5517010005843618, "learning_rate": 8.803170231809997e-06, "loss": 0.7264, "step": 25270 }, { "epoch": 0.5430253039481033, "grad_norm": 0.5192946990016879, "learning_rate": 8.796402997393179e-06, "loss": 0.7338, "step": 25280 }, { "epoch": 0.5432401082613739, "grad_norm": 0.5141174201291905, "learning_rate": 8.789636322225221e-06, "loss": 0.7301, "step": 25290 }, { "epoch": 0.5434549125746445, "grad_norm": 0.5156454974533952, "learning_rate": 8.782870209450234e-06, "loss": 0.7386, "step": 25300 }, { "epoch": 0.5436697168879151, "grad_norm": 0.5399434222135454, "learning_rate": 8.776104662212077e-06, "loss": 0.7297, "step": 25310 }, { "epoch": 0.5438845212011857, "grad_norm": 0.5080337339754446, "learning_rate": 8.769339683654358e-06, "loss": 0.7268, "step": 25320 }, { "epoch": 0.5440993255144563, "grad_norm": 0.5211965166488158, "learning_rate": 8.762575276920403e-06, "loss": 0.72, "step": 25330 }, { "epoch": 0.544314129827727, "grad_norm": 0.511040069798665, "learning_rate": 8.755811445153282e-06, "loss": 0.7245, "step": 25340 }, { "epoch": 0.5445289341409976, "grad_norm": 0.5496977731743303, "learning_rate": 8.749048191495787e-06, "loss": 0.7362, "step": 25350 }, { "epoch": 0.5447437384542682, "grad_norm": 0.497836078774839, "learning_rate": 8.742285519090465e-06, "loss": 0.7369, "step": 25360 }, { "epoch": 0.5449585427675387, "grad_norm": 0.4958506383207596, "learning_rate": 8.735523431079567e-06, "loss": 0.6986, "step": 25370 }, { "epoch": 0.5451733470808093, "grad_norm": 0.5023433708525002, "learning_rate": 8.728761930605086e-06, "loss": 0.7225, "step": 25380 }, { "epoch": 0.54538815139408, "grad_norm": 0.5115175514759246, "learning_rate": 8.72200102080874e-06, "loss": 0.7311, "step": 25390 }, { "epoch": 0.5456029557073506, "grad_norm": 0.49400314000603524, "learning_rate": 8.715240704831965e-06, "loss": 0.711, "step": 25400 }, { "epoch": 0.5458177600206212, "grad_norm": 0.5281302488810649, "learning_rate": 8.70848098581594e-06, "loss": 0.7307, "step": 25410 }, { "epoch": 0.5460325643338918, "grad_norm": 0.5253108140644606, "learning_rate": 8.701721866901548e-06, "loss": 0.7542, "step": 25420 }, { "epoch": 0.5462473686471624, "grad_norm": 0.49677506907339725, "learning_rate": 8.694963351229395e-06, "loss": 0.7346, "step": 25430 }, { "epoch": 0.5464621729604331, "grad_norm": 0.49156308079799516, "learning_rate": 8.68820544193982e-06, "loss": 0.7301, "step": 25440 }, { "epoch": 0.5466769772737037, "grad_norm": 0.5054408047803833, "learning_rate": 8.681448142172862e-06, "loss": 0.717, "step": 25450 }, { "epoch": 0.5468917815869743, "grad_norm": 0.5250496149552367, "learning_rate": 8.674691455068296e-06, "loss": 0.7208, "step": 25460 }, { "epoch": 0.5471065859002449, "grad_norm": 3.4136357360926253, "learning_rate": 8.667935383765595e-06, "loss": 0.735, "step": 25470 }, { "epoch": 0.5473213902135154, "grad_norm": 0.4871497116063486, "learning_rate": 8.66117993140396e-06, "loss": 0.7342, "step": 25480 }, { "epoch": 0.5475361945267861, "grad_norm": 0.5209915171759771, "learning_rate": 8.654425101122296e-06, "loss": 0.7336, "step": 25490 }, { "epoch": 0.5477509988400567, "grad_norm": 0.5156635252595837, "learning_rate": 8.647670896059216e-06, "loss": 0.7293, "step": 25500 }, { "epoch": 0.5479658031533273, "grad_norm": 0.5152409120879327, "learning_rate": 8.640917319353055e-06, "loss": 0.7273, "step": 25510 }, { "epoch": 0.5481806074665979, "grad_norm": 0.5416207574425208, "learning_rate": 8.634164374141838e-06, "loss": 0.7258, "step": 25520 }, { "epoch": 0.5483954117798685, "grad_norm": 0.515769700298451, "learning_rate": 8.62741206356332e-06, "loss": 0.7196, "step": 25530 }, { "epoch": 0.5486102160931392, "grad_norm": 0.5161072492397152, "learning_rate": 8.62066039075494e-06, "loss": 0.7146, "step": 25540 }, { "epoch": 0.5488250204064098, "grad_norm": 0.5188545967485997, "learning_rate": 8.61390935885385e-06, "loss": 0.7343, "step": 25550 }, { "epoch": 0.5490398247196804, "grad_norm": 0.5220576998409723, "learning_rate": 8.607158970996905e-06, "loss": 0.7225, "step": 25560 }, { "epoch": 0.549254629032951, "grad_norm": 0.4933476797650889, "learning_rate": 8.600409230320652e-06, "loss": 0.727, "step": 25570 }, { "epoch": 0.5494694333462216, "grad_norm": 0.5360731957654817, "learning_rate": 8.593660139961354e-06, "loss": 0.7304, "step": 25580 }, { "epoch": 0.5496842376594921, "grad_norm": 0.4967871791391136, "learning_rate": 8.586911703054953e-06, "loss": 0.718, "step": 25590 }, { "epoch": 0.5498990419727628, "grad_norm": 0.5094344264185746, "learning_rate": 8.580163922737107e-06, "loss": 0.7351, "step": 25600 }, { "epoch": 0.5501138462860334, "grad_norm": 0.4998333921079445, "learning_rate": 8.573416802143147e-06, "loss": 0.7362, "step": 25610 }, { "epoch": 0.550328650599304, "grad_norm": 0.49839738637776776, "learning_rate": 8.56667034440811e-06, "loss": 0.7343, "step": 25620 }, { "epoch": 0.5505434549125746, "grad_norm": 0.5122812659780278, "learning_rate": 8.559924552666731e-06, "loss": 0.7299, "step": 25630 }, { "epoch": 0.5507582592258452, "grad_norm": 0.510657449522192, "learning_rate": 8.55317943005342e-06, "loss": 0.7323, "step": 25640 }, { "epoch": 0.5509730635391159, "grad_norm": 0.5029028813355643, "learning_rate": 8.546434979702293e-06, "loss": 0.7287, "step": 25650 }, { "epoch": 0.5511878678523865, "grad_norm": 0.5228476287215734, "learning_rate": 8.539691204747134e-06, "loss": 0.7321, "step": 25660 }, { "epoch": 0.5514026721656571, "grad_norm": 0.5359812041522654, "learning_rate": 8.532948108321433e-06, "loss": 0.742, "step": 25670 }, { "epoch": 0.5516174764789277, "grad_norm": 0.5130595618709932, "learning_rate": 8.526205693558353e-06, "loss": 0.7319, "step": 25680 }, { "epoch": 0.5518322807921983, "grad_norm": 0.4996255988319911, "learning_rate": 8.51946396359074e-06, "loss": 0.7407, "step": 25690 }, { "epoch": 0.552047085105469, "grad_norm": 0.49407356190346213, "learning_rate": 8.51272292155113e-06, "loss": 0.717, "step": 25700 }, { "epoch": 0.5522618894187395, "grad_norm": 0.4898737936647392, "learning_rate": 8.50598257057173e-06, "loss": 0.747, "step": 25710 }, { "epoch": 0.5524766937320101, "grad_norm": 0.5212613411297291, "learning_rate": 8.499242913784436e-06, "loss": 0.733, "step": 25720 }, { "epoch": 0.5526914980452807, "grad_norm": 0.5428952625628243, "learning_rate": 8.492503954320804e-06, "loss": 0.7235, "step": 25730 }, { "epoch": 0.5529063023585513, "grad_norm": 0.5179866204385108, "learning_rate": 8.485765695312091e-06, "loss": 0.7353, "step": 25740 }, { "epoch": 0.553121106671822, "grad_norm": 0.5143508651285503, "learning_rate": 8.479028139889209e-06, "loss": 0.7092, "step": 25750 }, { "epoch": 0.5533359109850926, "grad_norm": 0.4899586729640197, "learning_rate": 8.472291291182747e-06, "loss": 0.7214, "step": 25760 }, { "epoch": 0.5535507152983632, "grad_norm": 0.5104765609615125, "learning_rate": 8.465555152322971e-06, "loss": 0.7343, "step": 25770 }, { "epoch": 0.5537655196116338, "grad_norm": 0.4936010581482795, "learning_rate": 8.45881972643981e-06, "loss": 0.7201, "step": 25780 }, { "epoch": 0.5539803239249044, "grad_norm": 0.5103205277286559, "learning_rate": 8.452085016662873e-06, "loss": 0.7249, "step": 25790 }, { "epoch": 0.5541951282381751, "grad_norm": 0.5337997235376728, "learning_rate": 8.445351026121425e-06, "loss": 0.723, "step": 25800 }, { "epoch": 0.5544099325514457, "grad_norm": 0.5330888068677769, "learning_rate": 8.438617757944396e-06, "loss": 0.7368, "step": 25810 }, { "epoch": 0.5546247368647163, "grad_norm": 0.507422198237814, "learning_rate": 8.431885215260393e-06, "loss": 0.7277, "step": 25820 }, { "epoch": 0.5548395411779868, "grad_norm": 0.5092852644195597, "learning_rate": 8.42515340119767e-06, "loss": 0.7141, "step": 25830 }, { "epoch": 0.5550543454912574, "grad_norm": 0.4983369620376665, "learning_rate": 8.418422318884158e-06, "loss": 0.7213, "step": 25840 }, { "epoch": 0.5552691498045281, "grad_norm": 0.5057768818854864, "learning_rate": 8.411691971447437e-06, "loss": 0.7204, "step": 25850 }, { "epoch": 0.5554839541177987, "grad_norm": 0.4957062726709344, "learning_rate": 8.40496236201475e-06, "loss": 0.7239, "step": 25860 }, { "epoch": 0.5556987584310693, "grad_norm": 0.5098362148369251, "learning_rate": 8.398233493712997e-06, "loss": 0.7229, "step": 25870 }, { "epoch": 0.5559135627443399, "grad_norm": 0.540586305639984, "learning_rate": 8.391505369668725e-06, "loss": 0.7223, "step": 25880 }, { "epoch": 0.5561283670576105, "grad_norm": 0.5164869278284776, "learning_rate": 8.384777993008154e-06, "loss": 0.7241, "step": 25890 }, { "epoch": 0.5563431713708811, "grad_norm": 0.5178900728279536, "learning_rate": 8.378051366857137e-06, "loss": 0.7324, "step": 25900 }, { "epoch": 0.5565579756841518, "grad_norm": 0.5093380002448458, "learning_rate": 8.371325494341193e-06, "loss": 0.7337, "step": 25910 }, { "epoch": 0.5567727799974224, "grad_norm": 0.5228044379168946, "learning_rate": 8.364600378585482e-06, "loss": 0.7399, "step": 25920 }, { "epoch": 0.556987584310693, "grad_norm": 0.5144671939481396, "learning_rate": 8.357876022714816e-06, "loss": 0.7254, "step": 25930 }, { "epoch": 0.5572023886239635, "grad_norm": 0.4986034297019067, "learning_rate": 8.351152429853653e-06, "loss": 0.7106, "step": 25940 }, { "epoch": 0.5574171929372341, "grad_norm": 0.5023603101794233, "learning_rate": 8.34442960312609e-06, "loss": 0.7226, "step": 25950 }, { "epoch": 0.5576319972505048, "grad_norm": 0.5209318452676072, "learning_rate": 8.337707545655886e-06, "loss": 0.7238, "step": 25960 }, { "epoch": 0.5578468015637754, "grad_norm": 0.4926074970141638, "learning_rate": 8.330986260566424e-06, "loss": 0.7276, "step": 25970 }, { "epoch": 0.558061605877046, "grad_norm": 0.4958510123483159, "learning_rate": 8.324265750980738e-06, "loss": 0.7148, "step": 25980 }, { "epoch": 0.5582764101903166, "grad_norm": 0.5075764876809195, "learning_rate": 8.317546020021498e-06, "loss": 0.7155, "step": 25990 }, { "epoch": 0.5584912145035872, "grad_norm": 0.5141742208103084, "learning_rate": 8.310827070811008e-06, "loss": 0.7385, "step": 26000 }, { "epoch": 0.5587060188168579, "grad_norm": 0.5084465568638146, "learning_rate": 8.30410890647122e-06, "loss": 0.7202, "step": 26010 }, { "epoch": 0.5589208231301285, "grad_norm": 0.502206471808609, "learning_rate": 8.297391530123713e-06, "loss": 0.7237, "step": 26020 }, { "epoch": 0.5591356274433991, "grad_norm": 0.5086015295612804, "learning_rate": 8.290674944889705e-06, "loss": 0.7387, "step": 26030 }, { "epoch": 0.5593504317566697, "grad_norm": 0.5252472355926682, "learning_rate": 8.283959153890037e-06, "loss": 0.728, "step": 26040 }, { "epoch": 0.5595652360699402, "grad_norm": 0.4975594240387986, "learning_rate": 8.277244160245196e-06, "loss": 0.7213, "step": 26050 }, { "epoch": 0.5597800403832109, "grad_norm": 0.4978146880109962, "learning_rate": 8.270529967075284e-06, "loss": 0.7173, "step": 26060 }, { "epoch": 0.5599948446964815, "grad_norm": 0.5268438508793595, "learning_rate": 8.263816577500034e-06, "loss": 0.7371, "step": 26070 }, { "epoch": 0.5602096490097521, "grad_norm": 0.5137983659248461, "learning_rate": 8.257103994638817e-06, "loss": 0.7197, "step": 26080 }, { "epoch": 0.5604244533230227, "grad_norm": 0.5194520105890317, "learning_rate": 8.250392221610612e-06, "loss": 0.7297, "step": 26090 }, { "epoch": 0.5606392576362933, "grad_norm": 0.5133858765119592, "learning_rate": 8.24368126153404e-06, "loss": 0.732, "step": 26100 }, { "epoch": 0.560854061949564, "grad_norm": 0.5029432924404077, "learning_rate": 8.236971117527324e-06, "loss": 0.728, "step": 26110 }, { "epoch": 0.5610688662628346, "grad_norm": 0.5081831178394076, "learning_rate": 8.230261792708328e-06, "loss": 0.7324, "step": 26120 }, { "epoch": 0.5612836705761052, "grad_norm": 0.5046477194727244, "learning_rate": 8.223553290194521e-06, "loss": 0.7363, "step": 26130 }, { "epoch": 0.5614984748893758, "grad_norm": 0.4956098306676567, "learning_rate": 8.216845613102995e-06, "loss": 0.7245, "step": 26140 }, { "epoch": 0.5617132792026464, "grad_norm": 0.5157536272581615, "learning_rate": 8.21013876455046e-06, "loss": 0.7117, "step": 26150 }, { "epoch": 0.561928083515917, "grad_norm": 0.5168564344065179, "learning_rate": 8.203432747653234e-06, "loss": 0.7245, "step": 26160 }, { "epoch": 0.5621428878291876, "grad_norm": 0.505045763218236, "learning_rate": 8.19672756552726e-06, "loss": 0.7176, "step": 26170 }, { "epoch": 0.5623576921424582, "grad_norm": 0.5239596568078339, "learning_rate": 8.190023221288088e-06, "loss": 0.7326, "step": 26180 }, { "epoch": 0.5625724964557288, "grad_norm": 0.5302666154295604, "learning_rate": 8.183319718050873e-06, "loss": 0.7422, "step": 26190 }, { "epoch": 0.5627873007689994, "grad_norm": 0.5003281299438962, "learning_rate": 8.176617058930385e-06, "loss": 0.7284, "step": 26200 }, { "epoch": 0.56300210508227, "grad_norm": 0.49200878286488725, "learning_rate": 8.169915247040998e-06, "loss": 0.7313, "step": 26210 }, { "epoch": 0.5632169093955407, "grad_norm": 0.49815572715612766, "learning_rate": 8.163214285496704e-06, "loss": 0.7216, "step": 26220 }, { "epoch": 0.5634317137088113, "grad_norm": 0.5142148330083207, "learning_rate": 8.15651417741108e-06, "loss": 0.7239, "step": 26230 }, { "epoch": 0.5636465180220819, "grad_norm": 0.49075756148020155, "learning_rate": 8.149814925897327e-06, "loss": 0.7295, "step": 26240 }, { "epoch": 0.5638613223353525, "grad_norm": 0.5246558400473214, "learning_rate": 8.143116534068231e-06, "loss": 0.726, "step": 26250 }, { "epoch": 0.564076126648623, "grad_norm": 0.5246290515230206, "learning_rate": 8.136419005036186e-06, "loss": 0.7182, "step": 26260 }, { "epoch": 0.5642909309618938, "grad_norm": 0.5036297463223443, "learning_rate": 8.129722341913192e-06, "loss": 0.7384, "step": 26270 }, { "epoch": 0.5645057352751643, "grad_norm": 0.527968106908949, "learning_rate": 8.12302654781083e-06, "loss": 0.7215, "step": 26280 }, { "epoch": 0.5647205395884349, "grad_norm": 0.5225876564331369, "learning_rate": 8.116331625840297e-06, "loss": 0.7244, "step": 26290 }, { "epoch": 0.5649353439017055, "grad_norm": 0.5436618520471259, "learning_rate": 8.109637579112368e-06, "loss": 0.7398, "step": 26300 }, { "epoch": 0.5651501482149761, "grad_norm": 0.5148458404115451, "learning_rate": 8.102944410737422e-06, "loss": 0.7273, "step": 26310 }, { "epoch": 0.5653649525282468, "grad_norm": 0.5015994626598883, "learning_rate": 8.096252123825422e-06, "loss": 0.7157, "step": 26320 }, { "epoch": 0.5655797568415174, "grad_norm": 0.505696959132355, "learning_rate": 8.089560721485922e-06, "loss": 0.7234, "step": 26330 }, { "epoch": 0.565794561154788, "grad_norm": 0.5359793879908359, "learning_rate": 8.082870206828078e-06, "loss": 0.72, "step": 26340 }, { "epoch": 0.5660093654680586, "grad_norm": 0.5110305690812829, "learning_rate": 8.076180582960618e-06, "loss": 0.7234, "step": 26350 }, { "epoch": 0.5662241697813292, "grad_norm": 0.498824135773946, "learning_rate": 8.069491852991861e-06, "loss": 0.7137, "step": 26360 }, { "epoch": 0.5664389740945999, "grad_norm": 0.5071066304511312, "learning_rate": 8.062804020029716e-06, "loss": 0.7257, "step": 26370 }, { "epoch": 0.5666537784078705, "grad_norm": 0.5382565877388203, "learning_rate": 8.056117087181663e-06, "loss": 0.7233, "step": 26380 }, { "epoch": 0.566868582721141, "grad_norm": 0.507293946375138, "learning_rate": 8.04943105755478e-06, "loss": 0.7283, "step": 26390 }, { "epoch": 0.5670833870344116, "grad_norm": 0.5280130888890053, "learning_rate": 8.04274593425571e-06, "loss": 0.7317, "step": 26400 }, { "epoch": 0.5672981913476822, "grad_norm": 0.518867511724086, "learning_rate": 8.03606172039069e-06, "loss": 0.7167, "step": 26410 }, { "epoch": 0.5675129956609529, "grad_norm": 0.5060431523298652, "learning_rate": 8.029378419065515e-06, "loss": 0.717, "step": 26420 }, { "epoch": 0.5677277999742235, "grad_norm": 0.5063699893804817, "learning_rate": 8.022696033385576e-06, "loss": 0.7272, "step": 26430 }, { "epoch": 0.5679426042874941, "grad_norm": 0.5240786574435802, "learning_rate": 8.016014566455827e-06, "loss": 0.7104, "step": 26440 }, { "epoch": 0.5681574086007647, "grad_norm": 0.5023416444895722, "learning_rate": 8.009334021380797e-06, "loss": 0.7271, "step": 26450 }, { "epoch": 0.5683722129140353, "grad_norm": 0.5364383380095676, "learning_rate": 8.002654401264587e-06, "loss": 0.7263, "step": 26460 }, { "epoch": 0.568587017227306, "grad_norm": 0.5281110306818434, "learning_rate": 7.995975709210869e-06, "loss": 0.7145, "step": 26470 }, { "epoch": 0.5688018215405766, "grad_norm": 0.5172649393256681, "learning_rate": 7.989297948322885e-06, "loss": 0.7279, "step": 26480 }, { "epoch": 0.5690166258538472, "grad_norm": 0.4984810683384183, "learning_rate": 7.982621121703438e-06, "loss": 0.7017, "step": 26490 }, { "epoch": 0.5692314301671177, "grad_norm": 0.5013994336883097, "learning_rate": 7.97594523245491e-06, "loss": 0.7052, "step": 26500 }, { "epoch": 0.5694462344803883, "grad_norm": 0.4962092438638248, "learning_rate": 7.969270283679233e-06, "loss": 0.7145, "step": 26510 }, { "epoch": 0.5696610387936589, "grad_norm": 0.5113556248884656, "learning_rate": 7.96259627847791e-06, "loss": 0.7182, "step": 26520 }, { "epoch": 0.5698758431069296, "grad_norm": 0.5049922401075675, "learning_rate": 7.955923219952002e-06, "loss": 0.7093, "step": 26530 }, { "epoch": 0.5700906474202002, "grad_norm": 0.5033957581062982, "learning_rate": 7.949251111202132e-06, "loss": 0.7274, "step": 26540 }, { "epoch": 0.5703054517334708, "grad_norm": 0.5356557064198388, "learning_rate": 7.942579955328485e-06, "loss": 0.7241, "step": 26550 }, { "epoch": 0.5705202560467414, "grad_norm": 0.5345233979565142, "learning_rate": 7.935909755430797e-06, "loss": 0.7334, "step": 26560 }, { "epoch": 0.570735060360012, "grad_norm": 0.49378966951960895, "learning_rate": 7.92924051460836e-06, "loss": 0.7388, "step": 26570 }, { "epoch": 0.5709498646732827, "grad_norm": 0.5077813116094394, "learning_rate": 7.92257223596003e-06, "loss": 0.7226, "step": 26580 }, { "epoch": 0.5711646689865533, "grad_norm": 0.5242154347144848, "learning_rate": 7.9159049225842e-06, "loss": 0.7293, "step": 26590 }, { "epoch": 0.5713794732998239, "grad_norm": 0.4939088384409381, "learning_rate": 7.90923857757883e-06, "loss": 0.7351, "step": 26600 }, { "epoch": 0.5715942776130944, "grad_norm": 0.49854560906783285, "learning_rate": 7.902573204041422e-06, "loss": 0.7112, "step": 26610 }, { "epoch": 0.571809081926365, "grad_norm": 0.489407868294081, "learning_rate": 7.895908805069026e-06, "loss": 0.731, "step": 26620 }, { "epoch": 0.5720238862396357, "grad_norm": 0.5063998256679312, "learning_rate": 7.889245383758247e-06, "loss": 0.733, "step": 26630 }, { "epoch": 0.5722386905529063, "grad_norm": 0.5118782447758873, "learning_rate": 7.882582943205218e-06, "loss": 0.7172, "step": 26640 }, { "epoch": 0.5724534948661769, "grad_norm": 0.4961115360495314, "learning_rate": 7.87592148650564e-06, "loss": 0.7183, "step": 26650 }, { "epoch": 0.5726682991794475, "grad_norm": 0.5280800091056106, "learning_rate": 7.869261016754736e-06, "loss": 0.7295, "step": 26660 }, { "epoch": 0.5728831034927181, "grad_norm": 0.5289805883203285, "learning_rate": 7.862601537047289e-06, "loss": 0.7265, "step": 26670 }, { "epoch": 0.5730979078059888, "grad_norm": 0.5193534190312329, "learning_rate": 7.855943050477605e-06, "loss": 0.7333, "step": 26680 }, { "epoch": 0.5733127121192594, "grad_norm": 0.5290783598213893, "learning_rate": 7.84928556013954e-06, "loss": 0.7221, "step": 26690 }, { "epoch": 0.57352751643253, "grad_norm": 0.5127746177226163, "learning_rate": 7.842629069126484e-06, "loss": 0.7388, "step": 26700 }, { "epoch": 0.5737423207458006, "grad_norm": 0.512414892050149, "learning_rate": 7.835973580531353e-06, "loss": 0.7284, "step": 26710 }, { "epoch": 0.5739571250590711, "grad_norm": 0.5029147333178203, "learning_rate": 7.829319097446617e-06, "loss": 0.716, "step": 26720 }, { "epoch": 0.5741719293723418, "grad_norm": 0.520040331471592, "learning_rate": 7.82266562296426e-06, "loss": 0.7159, "step": 26730 }, { "epoch": 0.5743867336856124, "grad_norm": 0.4978396177758656, "learning_rate": 7.816013160175812e-06, "loss": 0.7208, "step": 26740 }, { "epoch": 0.574601537998883, "grad_norm": 0.538410253835665, "learning_rate": 7.809361712172322e-06, "loss": 0.7146, "step": 26750 }, { "epoch": 0.5748163423121536, "grad_norm": 0.5071511006184085, "learning_rate": 7.802711282044366e-06, "loss": 0.7314, "step": 26760 }, { "epoch": 0.5750311466254242, "grad_norm": 0.5026752106850176, "learning_rate": 7.796061872882065e-06, "loss": 0.7353, "step": 26770 }, { "epoch": 0.5752459509386948, "grad_norm": 0.4928490982584089, "learning_rate": 7.789413487775043e-06, "loss": 0.7161, "step": 26780 }, { "epoch": 0.5754607552519655, "grad_norm": 0.5054902551078443, "learning_rate": 7.782766129812463e-06, "loss": 0.7227, "step": 26790 }, { "epoch": 0.5756755595652361, "grad_norm": 0.5106529312162003, "learning_rate": 7.776119802083001e-06, "loss": 0.7173, "step": 26800 }, { "epoch": 0.5758903638785067, "grad_norm": 0.5157371052564345, "learning_rate": 7.76947450767487e-06, "loss": 0.7122, "step": 26810 }, { "epoch": 0.5761051681917773, "grad_norm": 0.5284843076164706, "learning_rate": 7.762830249675785e-06, "loss": 0.7256, "step": 26820 }, { "epoch": 0.5763199725050479, "grad_norm": 0.5456146407952996, "learning_rate": 7.756187031172984e-06, "loss": 0.7391, "step": 26830 }, { "epoch": 0.5765347768183186, "grad_norm": 0.4917659110447439, "learning_rate": 7.74954485525323e-06, "loss": 0.7185, "step": 26840 }, { "epoch": 0.5767495811315891, "grad_norm": 0.506243431890311, "learning_rate": 7.74290372500279e-06, "loss": 0.7367, "step": 26850 }, { "epoch": 0.5769643854448597, "grad_norm": 0.506566764718463, "learning_rate": 7.736263643507462e-06, "loss": 0.7336, "step": 26860 }, { "epoch": 0.5771791897581303, "grad_norm": 0.5188308701341517, "learning_rate": 7.729624613852533e-06, "loss": 0.7165, "step": 26870 }, { "epoch": 0.5773939940714009, "grad_norm": 0.5149438496598627, "learning_rate": 7.722986639122827e-06, "loss": 0.7287, "step": 26880 }, { "epoch": 0.5776087983846716, "grad_norm": 0.5235992368572671, "learning_rate": 7.71634972240266e-06, "loss": 0.7174, "step": 26890 }, { "epoch": 0.5778236026979422, "grad_norm": 0.5113815635765164, "learning_rate": 7.70971386677586e-06, "loss": 0.7109, "step": 26900 }, { "epoch": 0.5780384070112128, "grad_norm": 0.49355622034783786, "learning_rate": 7.703079075325764e-06, "loss": 0.7103, "step": 26910 }, { "epoch": 0.5782532113244834, "grad_norm": 0.519299512412156, "learning_rate": 7.69644535113521e-06, "loss": 0.7369, "step": 26920 }, { "epoch": 0.578468015637754, "grad_norm": 0.5310388569132752, "learning_rate": 7.689812697286555e-06, "loss": 0.7188, "step": 26930 }, { "epoch": 0.5786828199510247, "grad_norm": 0.5168701357967611, "learning_rate": 7.683181116861642e-06, "loss": 0.73, "step": 26940 }, { "epoch": 0.5788976242642953, "grad_norm": 0.5042983692202927, "learning_rate": 7.676550612941816e-06, "loss": 0.7135, "step": 26950 }, { "epoch": 0.5791124285775658, "grad_norm": 0.5177162051099963, "learning_rate": 7.669921188607935e-06, "loss": 0.7394, "step": 26960 }, { "epoch": 0.5793272328908364, "grad_norm": 0.5119619648123758, "learning_rate": 7.663292846940343e-06, "loss": 0.7207, "step": 26970 }, { "epoch": 0.579542037204107, "grad_norm": 0.5362452673286001, "learning_rate": 7.656665591018887e-06, "loss": 0.7143, "step": 26980 }, { "epoch": 0.5797568415173777, "grad_norm": 0.5129247448122536, "learning_rate": 7.650039423922905e-06, "loss": 0.7224, "step": 26990 }, { "epoch": 0.5799716458306483, "grad_norm": 0.5088798828391344, "learning_rate": 7.643414348731237e-06, "loss": 0.7148, "step": 27000 }, { "epoch": 0.5801864501439189, "grad_norm": 0.5027910917195445, "learning_rate": 7.636790368522208e-06, "loss": 0.7219, "step": 27010 }, { "epoch": 0.5804012544571895, "grad_norm": 0.5185537348120938, "learning_rate": 7.630167486373632e-06, "loss": 0.7221, "step": 27020 }, { "epoch": 0.5806160587704601, "grad_norm": 0.5272726326291755, "learning_rate": 7.623545705362822e-06, "loss": 0.7175, "step": 27030 }, { "epoch": 0.5808308630837308, "grad_norm": 0.5166485329784066, "learning_rate": 7.616925028566575e-06, "loss": 0.7341, "step": 27040 }, { "epoch": 0.5810456673970014, "grad_norm": 0.5315842783462765, "learning_rate": 7.6103054590611755e-06, "loss": 0.7391, "step": 27050 }, { "epoch": 0.581260471710272, "grad_norm": 0.5015083854889525, "learning_rate": 7.603686999922386e-06, "loss": 0.7144, "step": 27060 }, { "epoch": 0.5814752760235425, "grad_norm": 0.5184207030120949, "learning_rate": 7.597069654225471e-06, "loss": 0.7248, "step": 27070 }, { "epoch": 0.5816900803368131, "grad_norm": 0.5316867610900236, "learning_rate": 7.590453425045159e-06, "loss": 0.733, "step": 27080 }, { "epoch": 0.5819048846500837, "grad_norm": 0.4965452979386377, "learning_rate": 7.583838315455665e-06, "loss": 0.7204, "step": 27090 }, { "epoch": 0.5821196889633544, "grad_norm": 0.5297374352101749, "learning_rate": 7.577224328530694e-06, "loss": 0.7227, "step": 27100 }, { "epoch": 0.582334493276625, "grad_norm": 0.5359023033266158, "learning_rate": 7.570611467343414e-06, "loss": 0.7341, "step": 27110 }, { "epoch": 0.5825492975898956, "grad_norm": 0.5131959097511573, "learning_rate": 7.563999734966483e-06, "loss": 0.7137, "step": 27120 }, { "epoch": 0.5827641019031662, "grad_norm": 0.5332592734188711, "learning_rate": 7.557389134472021e-06, "loss": 0.7339, "step": 27130 }, { "epoch": 0.5829789062164368, "grad_norm": 0.4950524149977223, "learning_rate": 7.550779668931641e-06, "loss": 0.7278, "step": 27140 }, { "epoch": 0.5831937105297075, "grad_norm": 0.4979574277254489, "learning_rate": 7.544171341416409e-06, "loss": 0.7116, "step": 27150 }, { "epoch": 0.5834085148429781, "grad_norm": 0.5184623277698175, "learning_rate": 7.537564154996871e-06, "loss": 0.7171, "step": 27160 }, { "epoch": 0.5836233191562487, "grad_norm": 0.5028351179542662, "learning_rate": 7.530958112743048e-06, "loss": 0.7074, "step": 27170 }, { "epoch": 0.5838381234695192, "grad_norm": 0.5140269218645958, "learning_rate": 7.524353217724414e-06, "loss": 0.7183, "step": 27180 }, { "epoch": 0.5840529277827898, "grad_norm": 0.5116611764499623, "learning_rate": 7.517749473009931e-06, "loss": 0.731, "step": 27190 }, { "epoch": 0.5842677320960605, "grad_norm": 0.5034040525475355, "learning_rate": 7.51114688166801e-06, "loss": 0.7026, "step": 27200 }, { "epoch": 0.5844825364093311, "grad_norm": 0.5120240441103905, "learning_rate": 7.50454544676653e-06, "loss": 0.7298, "step": 27210 }, { "epoch": 0.5846973407226017, "grad_norm": 0.5141277473304001, "learning_rate": 7.497945171372838e-06, "loss": 0.7169, "step": 27220 }, { "epoch": 0.5849121450358723, "grad_norm": 0.5092099955268595, "learning_rate": 7.4913460585537314e-06, "loss": 0.7227, "step": 27230 }, { "epoch": 0.5851269493491429, "grad_norm": 0.5394087392182072, "learning_rate": 7.484748111375482e-06, "loss": 0.7368, "step": 27240 }, { "epoch": 0.5853417536624136, "grad_norm": 0.5296527582320034, "learning_rate": 7.478151332903807e-06, "loss": 0.7249, "step": 27250 }, { "epoch": 0.5855565579756842, "grad_norm": 0.527906994069362, "learning_rate": 7.4715557262038904e-06, "loss": 0.702, "step": 27260 }, { "epoch": 0.5857713622889548, "grad_norm": 0.49003860514458714, "learning_rate": 7.464961294340366e-06, "loss": 0.7216, "step": 27270 }, { "epoch": 0.5859861666022254, "grad_norm": 0.5158001608175528, "learning_rate": 7.45836804037732e-06, "loss": 0.7076, "step": 27280 }, { "epoch": 0.586200970915496, "grad_norm": 0.5189638533176454, "learning_rate": 7.451775967378296e-06, "loss": 0.7302, "step": 27290 }, { "epoch": 0.5864157752287666, "grad_norm": 0.5114303128229795, "learning_rate": 7.4451850784062825e-06, "loss": 0.7195, "step": 27300 }, { "epoch": 0.5866305795420372, "grad_norm": 0.5141598815151798, "learning_rate": 7.438595376523734e-06, "loss": 0.7259, "step": 27310 }, { "epoch": 0.5868453838553078, "grad_norm": 0.5115551026689363, "learning_rate": 7.432006864792529e-06, "loss": 0.7363, "step": 27320 }, { "epoch": 0.5870601881685784, "grad_norm": 0.5067452647479966, "learning_rate": 7.4254195462740165e-06, "loss": 0.7088, "step": 27330 }, { "epoch": 0.587274992481849, "grad_norm": 0.5201913281991899, "learning_rate": 7.418833424028974e-06, "loss": 0.7268, "step": 27340 }, { "epoch": 0.5874897967951197, "grad_norm": 0.5178789741191531, "learning_rate": 7.412248501117627e-06, "loss": 0.701, "step": 27350 }, { "epoch": 0.5877046011083903, "grad_norm": 0.5059570164144281, "learning_rate": 7.4056647805996526e-06, "loss": 0.7159, "step": 27360 }, { "epoch": 0.5879194054216609, "grad_norm": 0.5130596201558596, "learning_rate": 7.399082265534161e-06, "loss": 0.7208, "step": 27370 }, { "epoch": 0.5881342097349315, "grad_norm": 0.5129021388577076, "learning_rate": 7.392500958979705e-06, "loss": 0.7268, "step": 27380 }, { "epoch": 0.5883490140482021, "grad_norm": 0.49928892977313877, "learning_rate": 7.385920863994273e-06, "loss": 0.7291, "step": 27390 }, { "epoch": 0.5885638183614726, "grad_norm": 0.5013211052439496, "learning_rate": 7.3793419836352884e-06, "loss": 0.7319, "step": 27400 }, { "epoch": 0.5887786226747433, "grad_norm": 0.5333496221119072, "learning_rate": 7.372764320959624e-06, "loss": 0.7174, "step": 27410 }, { "epoch": 0.5889934269880139, "grad_norm": 0.516097492110587, "learning_rate": 7.366187879023572e-06, "loss": 0.7144, "step": 27420 }, { "epoch": 0.5892082313012845, "grad_norm": 0.5135516714260109, "learning_rate": 7.359612660882862e-06, "loss": 0.7107, "step": 27430 }, { "epoch": 0.5894230356145551, "grad_norm": 0.5339129088339684, "learning_rate": 7.353038669592654e-06, "loss": 0.7238, "step": 27440 }, { "epoch": 0.5896378399278257, "grad_norm": 0.4964474003475041, "learning_rate": 7.346465908207545e-06, "loss": 0.7176, "step": 27450 }, { "epoch": 0.5898526442410964, "grad_norm": 0.501452498338148, "learning_rate": 7.339894379781551e-06, "loss": 0.7131, "step": 27460 }, { "epoch": 0.590067448554367, "grad_norm": 0.5133436088736439, "learning_rate": 7.333324087368117e-06, "loss": 0.7215, "step": 27470 }, { "epoch": 0.5902822528676376, "grad_norm": 0.5192593089145867, "learning_rate": 7.326755034020122e-06, "loss": 0.7162, "step": 27480 }, { "epoch": 0.5904970571809082, "grad_norm": 0.5477084008394134, "learning_rate": 7.320187222789856e-06, "loss": 0.7042, "step": 27490 }, { "epoch": 0.5907118614941788, "grad_norm": 0.5180536818105407, "learning_rate": 7.3136206567290465e-06, "loss": 0.7305, "step": 27500 }, { "epoch": 0.5909266658074495, "grad_norm": 0.502178932682032, "learning_rate": 7.307055338888826e-06, "loss": 0.7139, "step": 27510 }, { "epoch": 0.59114147012072, "grad_norm": 0.5157309880089097, "learning_rate": 7.300491272319764e-06, "loss": 0.7163, "step": 27520 }, { "epoch": 0.5913562744339906, "grad_norm": 0.5148147550554781, "learning_rate": 7.293928460071838e-06, "loss": 0.7175, "step": 27530 }, { "epoch": 0.5915710787472612, "grad_norm": 0.5082539143510494, "learning_rate": 7.287366905194439e-06, "loss": 0.7155, "step": 27540 }, { "epoch": 0.5917858830605318, "grad_norm": 0.5141335882021415, "learning_rate": 7.280806610736391e-06, "loss": 0.7287, "step": 27550 }, { "epoch": 0.5920006873738025, "grad_norm": 0.48870302456568654, "learning_rate": 7.274247579745908e-06, "loss": 0.717, "step": 27560 }, { "epoch": 0.5922154916870731, "grad_norm": 0.514022070926401, "learning_rate": 7.267689815270642e-06, "loss": 0.7339, "step": 27570 }, { "epoch": 0.5924302960003437, "grad_norm": 0.5135139016976253, "learning_rate": 7.261133320357641e-06, "loss": 0.7198, "step": 27580 }, { "epoch": 0.5926451003136143, "grad_norm": 0.53429824873582, "learning_rate": 7.254578098053362e-06, "loss": 0.7349, "step": 27590 }, { "epoch": 0.5928599046268849, "grad_norm": 0.5058250349786495, "learning_rate": 7.248024151403682e-06, "loss": 0.724, "step": 27600 }, { "epoch": 0.5930747089401556, "grad_norm": 0.5206560440876598, "learning_rate": 7.2414714834538725e-06, "loss": 0.7337, "step": 27610 }, { "epoch": 0.5932895132534262, "grad_norm": 0.48980099237908875, "learning_rate": 7.234920097248623e-06, "loss": 0.7181, "step": 27620 }, { "epoch": 0.5935043175666967, "grad_norm": 0.5115417079620694, "learning_rate": 7.228369995832015e-06, "loss": 0.7297, "step": 27630 }, { "epoch": 0.5937191218799673, "grad_norm": 0.5016557907859813, "learning_rate": 7.221821182247548e-06, "loss": 0.732, "step": 27640 }, { "epoch": 0.5939339261932379, "grad_norm": 0.49835395618153433, "learning_rate": 7.215273659538114e-06, "loss": 0.7145, "step": 27650 }, { "epoch": 0.5941487305065085, "grad_norm": 0.5169175369018176, "learning_rate": 7.2087274307459945e-06, "loss": 0.7091, "step": 27660 }, { "epoch": 0.5943635348197792, "grad_norm": 0.5068225266138379, "learning_rate": 7.2021824989128915e-06, "loss": 0.7318, "step": 27670 }, { "epoch": 0.5945783391330498, "grad_norm": 0.5091346931786659, "learning_rate": 7.195638867079889e-06, "loss": 0.7203, "step": 27680 }, { "epoch": 0.5947931434463204, "grad_norm": 0.5271726430775541, "learning_rate": 7.1890965382874765e-06, "loss": 0.7318, "step": 27690 }, { "epoch": 0.595007947759591, "grad_norm": 0.5205082559060062, "learning_rate": 7.182555515575531e-06, "loss": 0.7221, "step": 27700 }, { "epoch": 0.5952227520728616, "grad_norm": 0.5108228067074766, "learning_rate": 7.176015801983326e-06, "loss": 0.7186, "step": 27710 }, { "epoch": 0.5954375563861323, "grad_norm": 0.5032605086696729, "learning_rate": 7.169477400549525e-06, "loss": 0.7159, "step": 27720 }, { "epoch": 0.5956523606994029, "grad_norm": 0.5143877348374966, "learning_rate": 7.16294031431218e-06, "loss": 0.7161, "step": 27730 }, { "epoch": 0.5958671650126734, "grad_norm": 0.5175753229468208, "learning_rate": 7.156404546308741e-06, "loss": 0.7241, "step": 27740 }, { "epoch": 0.596081969325944, "grad_norm": 0.5160103076524589, "learning_rate": 7.149870099576033e-06, "loss": 0.722, "step": 27750 }, { "epoch": 0.5962967736392146, "grad_norm": 0.5276236091636177, "learning_rate": 7.143336977150278e-06, "loss": 0.7271, "step": 27760 }, { "epoch": 0.5965115779524853, "grad_norm": 0.5115582800303922, "learning_rate": 7.136805182067074e-06, "loss": 0.7276, "step": 27770 }, { "epoch": 0.5967263822657559, "grad_norm": 0.5007489074574742, "learning_rate": 7.130274717361405e-06, "loss": 0.7101, "step": 27780 }, { "epoch": 0.5969411865790265, "grad_norm": 0.5247645617356222, "learning_rate": 7.123745586067645e-06, "loss": 0.7214, "step": 27790 }, { "epoch": 0.5971559908922971, "grad_norm": 0.509054996286859, "learning_rate": 7.117217791219533e-06, "loss": 0.7062, "step": 27800 }, { "epoch": 0.5973707952055677, "grad_norm": 0.5146383947427272, "learning_rate": 7.110691335850202e-06, "loss": 0.727, "step": 27810 }, { "epoch": 0.5975855995188384, "grad_norm": 0.5107471000601168, "learning_rate": 7.1041662229921485e-06, "loss": 0.7078, "step": 27820 }, { "epoch": 0.597800403832109, "grad_norm": 0.48699390234221435, "learning_rate": 7.097642455677261e-06, "loss": 0.7127, "step": 27830 }, { "epoch": 0.5980152081453796, "grad_norm": 0.48779003203616056, "learning_rate": 7.091120036936791e-06, "loss": 0.713, "step": 27840 }, { "epoch": 0.5982300124586502, "grad_norm": 0.5073544067387802, "learning_rate": 7.084598969801362e-06, "loss": 0.7194, "step": 27850 }, { "epoch": 0.5984448167719207, "grad_norm": 0.5143979161778222, "learning_rate": 7.0780792573009835e-06, "loss": 0.7171, "step": 27860 }, { "epoch": 0.5986596210851914, "grad_norm": 0.5100260027061173, "learning_rate": 7.07156090246502e-06, "loss": 0.7219, "step": 27870 }, { "epoch": 0.598874425398462, "grad_norm": 0.5080855464944665, "learning_rate": 7.065043908322214e-06, "loss": 0.7288, "step": 27880 }, { "epoch": 0.5990892297117326, "grad_norm": 0.503233048103245, "learning_rate": 7.058528277900669e-06, "loss": 0.708, "step": 27890 }, { "epoch": 0.5993040340250032, "grad_norm": 0.5262280681492298, "learning_rate": 7.052014014227866e-06, "loss": 0.7167, "step": 27900 }, { "epoch": 0.5995188383382738, "grad_norm": 0.522581486548443, "learning_rate": 7.045501120330642e-06, "loss": 0.7272, "step": 27910 }, { "epoch": 0.5997336426515445, "grad_norm": 0.5104789267173563, "learning_rate": 7.0389895992351956e-06, "loss": 0.7087, "step": 27920 }, { "epoch": 0.5999484469648151, "grad_norm": 0.5146754901769843, "learning_rate": 7.032479453967097e-06, "loss": 0.728, "step": 27930 }, { "epoch": 0.6001632512780857, "grad_norm": 0.5116496747974748, "learning_rate": 7.0259706875512645e-06, "loss": 0.7125, "step": 27940 }, { "epoch": 0.6003780555913563, "grad_norm": 0.521148762194517, "learning_rate": 7.019463303011993e-06, "loss": 0.7174, "step": 27950 }, { "epoch": 0.6005928599046269, "grad_norm": 0.5311474419836788, "learning_rate": 7.012957303372918e-06, "loss": 0.7239, "step": 27960 }, { "epoch": 0.6008076642178974, "grad_norm": 0.5235689594460002, "learning_rate": 7.006452691657039e-06, "loss": 0.7212, "step": 27970 }, { "epoch": 0.6010224685311681, "grad_norm": 0.5092035810607163, "learning_rate": 6.999949470886715e-06, "loss": 0.7092, "step": 27980 }, { "epoch": 0.6012372728444387, "grad_norm": 0.4938851390556326, "learning_rate": 6.9934476440836465e-06, "loss": 0.7244, "step": 27990 }, { "epoch": 0.6014520771577093, "grad_norm": 0.5309729653806932, "learning_rate": 6.986947214268902e-06, "loss": 0.7263, "step": 28000 }, { "epoch": 0.6016668814709799, "grad_norm": 0.5203026110014208, "learning_rate": 6.980448184462887e-06, "loss": 0.707, "step": 28010 }, { "epoch": 0.6018816857842505, "grad_norm": 0.508186230193396, "learning_rate": 6.973950557685366e-06, "loss": 0.7188, "step": 28020 }, { "epoch": 0.6020964900975212, "grad_norm": 0.5101290046607262, "learning_rate": 6.967454336955447e-06, "loss": 0.7261, "step": 28030 }, { "epoch": 0.6023112944107918, "grad_norm": 0.5004521118510085, "learning_rate": 6.96095952529158e-06, "loss": 0.7097, "step": 28040 }, { "epoch": 0.6025260987240624, "grad_norm": 0.49710168230451124, "learning_rate": 6.9544661257115745e-06, "loss": 0.7275, "step": 28050 }, { "epoch": 0.602740903037333, "grad_norm": 0.5324092271912054, "learning_rate": 6.947974141232568e-06, "loss": 0.7176, "step": 28060 }, { "epoch": 0.6029557073506036, "grad_norm": 0.5254153398251872, "learning_rate": 6.9414835748710525e-06, "loss": 0.7329, "step": 28070 }, { "epoch": 0.6031705116638743, "grad_norm": 0.5018438803534653, "learning_rate": 6.934994429642854e-06, "loss": 0.7114, "step": 28080 }, { "epoch": 0.6033853159771448, "grad_norm": 0.5110608431528573, "learning_rate": 6.928506708563142e-06, "loss": 0.7175, "step": 28090 }, { "epoch": 0.6036001202904154, "grad_norm": 0.5110626161063511, "learning_rate": 6.922020414646422e-06, "loss": 0.7228, "step": 28100 }, { "epoch": 0.603814924603686, "grad_norm": 0.5165958575854644, "learning_rate": 6.915535550906532e-06, "loss": 0.7282, "step": 28110 }, { "epoch": 0.6040297289169566, "grad_norm": 0.5252321669011311, "learning_rate": 6.909052120356659e-06, "loss": 0.7218, "step": 28120 }, { "epoch": 0.6042445332302273, "grad_norm": 0.5145699979700377, "learning_rate": 6.902570126009309e-06, "loss": 0.721, "step": 28130 }, { "epoch": 0.6044593375434979, "grad_norm": 0.5257878272393476, "learning_rate": 6.8960895708763335e-06, "loss": 0.7236, "step": 28140 }, { "epoch": 0.6046741418567685, "grad_norm": 0.5107902953084463, "learning_rate": 6.8896104579689026e-06, "loss": 0.7139, "step": 28150 }, { "epoch": 0.6048889461700391, "grad_norm": 0.5187512853597482, "learning_rate": 6.8831327902975245e-06, "loss": 0.7211, "step": 28160 }, { "epoch": 0.6051037504833097, "grad_norm": 0.5179836553200129, "learning_rate": 6.876656570872036e-06, "loss": 0.7157, "step": 28170 }, { "epoch": 0.6053185547965804, "grad_norm": 0.4961138882817056, "learning_rate": 6.870181802701596e-06, "loss": 0.7144, "step": 28180 }, { "epoch": 0.605533359109851, "grad_norm": 0.4994657394772925, "learning_rate": 6.8637084887946985e-06, "loss": 0.7154, "step": 28190 }, { "epoch": 0.6057481634231215, "grad_norm": 0.5360515600802547, "learning_rate": 6.857236632159146e-06, "loss": 0.6994, "step": 28200 }, { "epoch": 0.6059629677363921, "grad_norm": 0.49991257688614876, "learning_rate": 6.85076623580208e-06, "loss": 0.7249, "step": 28210 }, { "epoch": 0.6061777720496627, "grad_norm": 0.5108166128653483, "learning_rate": 6.8442973027299565e-06, "loss": 0.7229, "step": 28220 }, { "epoch": 0.6063925763629334, "grad_norm": 0.49773101558129873, "learning_rate": 6.837829835948546e-06, "loss": 0.7091, "step": 28230 }, { "epoch": 0.606607380676204, "grad_norm": 0.505655201231906, "learning_rate": 6.8313638384629525e-06, "loss": 0.7206, "step": 28240 }, { "epoch": 0.6068221849894746, "grad_norm": 0.5131719935934191, "learning_rate": 6.824899313277582e-06, "loss": 0.7252, "step": 28250 }, { "epoch": 0.6070369893027452, "grad_norm": 0.503882580363197, "learning_rate": 6.8184362633961655e-06, "loss": 0.7305, "step": 28260 }, { "epoch": 0.6072517936160158, "grad_norm": 0.5001026171381061, "learning_rate": 6.811974691821741e-06, "loss": 0.7131, "step": 28270 }, { "epoch": 0.6074665979292864, "grad_norm": 0.5018092976471628, "learning_rate": 6.805514601556671e-06, "loss": 0.7179, "step": 28280 }, { "epoch": 0.6076814022425571, "grad_norm": 0.507419770302612, "learning_rate": 6.799055995602621e-06, "loss": 0.7056, "step": 28290 }, { "epoch": 0.6078962065558277, "grad_norm": 0.5171706639932914, "learning_rate": 6.7925988769605656e-06, "loss": 0.7265, "step": 28300 }, { "epoch": 0.6081110108690982, "grad_norm": 0.5057274040421198, "learning_rate": 6.786143248630795e-06, "loss": 0.7055, "step": 28310 }, { "epoch": 0.6083258151823688, "grad_norm": 0.5132979835569406, "learning_rate": 6.7796891136129e-06, "loss": 0.7117, "step": 28320 }, { "epoch": 0.6085406194956394, "grad_norm": 0.5100482558341333, "learning_rate": 6.773236474905786e-06, "loss": 0.7099, "step": 28330 }, { "epoch": 0.6087554238089101, "grad_norm": 0.5068203945434347, "learning_rate": 6.7667853355076565e-06, "loss": 0.7152, "step": 28340 }, { "epoch": 0.6089702281221807, "grad_norm": 0.4922194906275137, "learning_rate": 6.760335698416016e-06, "loss": 0.7248, "step": 28350 }, { "epoch": 0.6091850324354513, "grad_norm": 0.4940640802222499, "learning_rate": 6.753887566627682e-06, "loss": 0.7088, "step": 28360 }, { "epoch": 0.6093998367487219, "grad_norm": 0.4917970774712095, "learning_rate": 6.747440943138757e-06, "loss": 0.7041, "step": 28370 }, { "epoch": 0.6096146410619925, "grad_norm": 0.5157876999086878, "learning_rate": 6.740995830944658e-06, "loss": 0.7242, "step": 28380 }, { "epoch": 0.6098294453752632, "grad_norm": 0.638677062933045, "learning_rate": 6.7345522330400915e-06, "loss": 0.7107, "step": 28390 }, { "epoch": 0.6100442496885338, "grad_norm": 0.5291704161918205, "learning_rate": 6.728110152419063e-06, "loss": 0.7259, "step": 28400 }, { "epoch": 0.6102590540018044, "grad_norm": 0.5435155538816592, "learning_rate": 6.7216695920748694e-06, "loss": 0.7292, "step": 28410 }, { "epoch": 0.610473858315075, "grad_norm": 0.5249535872174969, "learning_rate": 6.715230555000102e-06, "loss": 0.7124, "step": 28420 }, { "epoch": 0.6106886626283455, "grad_norm": 0.5168556551996696, "learning_rate": 6.708793044186652e-06, "loss": 0.7135, "step": 28430 }, { "epoch": 0.6109034669416162, "grad_norm": 0.506513826242894, "learning_rate": 6.702357062625689e-06, "loss": 0.7171, "step": 28440 }, { "epoch": 0.6111182712548868, "grad_norm": 0.5122231459063203, "learning_rate": 6.695922613307684e-06, "loss": 0.7366, "step": 28450 }, { "epoch": 0.6113330755681574, "grad_norm": 0.5266734325137272, "learning_rate": 6.689489699222387e-06, "loss": 0.7177, "step": 28460 }, { "epoch": 0.611547879881428, "grad_norm": 0.5249480955219177, "learning_rate": 6.6830583233588406e-06, "loss": 0.7177, "step": 28470 }, { "epoch": 0.6117626841946986, "grad_norm": 0.536864147141546, "learning_rate": 6.67662848870537e-06, "loss": 0.7334, "step": 28480 }, { "epoch": 0.6119774885079693, "grad_norm": 0.5017540321010265, "learning_rate": 6.670200198249579e-06, "loss": 0.7141, "step": 28490 }, { "epoch": 0.6121922928212399, "grad_norm": 0.5022749037709945, "learning_rate": 6.663773454978368e-06, "loss": 0.7087, "step": 28500 }, { "epoch": 0.6124070971345105, "grad_norm": 0.5079751963910859, "learning_rate": 6.657348261877905e-06, "loss": 0.7411, "step": 28510 }, { "epoch": 0.6126219014477811, "grad_norm": 0.5184599582736278, "learning_rate": 6.650924621933645e-06, "loss": 0.7171, "step": 28520 }, { "epoch": 0.6128367057610516, "grad_norm": 0.50121952483461, "learning_rate": 6.644502538130317e-06, "loss": 0.7209, "step": 28530 }, { "epoch": 0.6130515100743223, "grad_norm": 0.515280894738279, "learning_rate": 6.638082013451925e-06, "loss": 0.7198, "step": 28540 }, { "epoch": 0.6132663143875929, "grad_norm": 0.5136328125428188, "learning_rate": 6.631663050881763e-06, "loss": 0.7099, "step": 28550 }, { "epoch": 0.6134811187008635, "grad_norm": 0.5174719027017979, "learning_rate": 6.625245653402379e-06, "loss": 0.725, "step": 28560 }, { "epoch": 0.6136959230141341, "grad_norm": 0.5250930188904968, "learning_rate": 6.6188298239956105e-06, "loss": 0.7072, "step": 28570 }, { "epoch": 0.6139107273274047, "grad_norm": 0.5369921932935243, "learning_rate": 6.612415565642552e-06, "loss": 0.7229, "step": 28580 }, { "epoch": 0.6141255316406753, "grad_norm": 0.5023867945450698, "learning_rate": 6.606002881323581e-06, "loss": 0.7225, "step": 28590 }, { "epoch": 0.614340335953946, "grad_norm": 0.5107950386038079, "learning_rate": 6.599591774018338e-06, "loss": 0.7155, "step": 28600 }, { "epoch": 0.6145551402672166, "grad_norm": 0.5003163460337205, "learning_rate": 6.5931822467057275e-06, "loss": 0.7168, "step": 28610 }, { "epoch": 0.6147699445804872, "grad_norm": 0.5273078172630057, "learning_rate": 6.5867743023639255e-06, "loss": 0.7251, "step": 28620 }, { "epoch": 0.6149847488937578, "grad_norm": 0.5126901132037992, "learning_rate": 6.580367943970365e-06, "loss": 0.7083, "step": 28630 }, { "epoch": 0.6151995532070283, "grad_norm": 0.5155415180923669, "learning_rate": 6.573963174501755e-06, "loss": 0.7117, "step": 28640 }, { "epoch": 0.615414357520299, "grad_norm": 0.514330215394583, "learning_rate": 6.567559996934052e-06, "loss": 0.7147, "step": 28650 }, { "epoch": 0.6156291618335696, "grad_norm": 0.5245843260086881, "learning_rate": 6.5611584142424845e-06, "loss": 0.7109, "step": 28660 }, { "epoch": 0.6158439661468402, "grad_norm": 0.5167536487547844, "learning_rate": 6.554758429401532e-06, "loss": 0.7216, "step": 28670 }, { "epoch": 0.6160587704601108, "grad_norm": 0.5109351895706468, "learning_rate": 6.548360045384933e-06, "loss": 0.7056, "step": 28680 }, { "epoch": 0.6162735747733814, "grad_norm": 0.5190795799887076, "learning_rate": 6.541963265165686e-06, "loss": 0.7242, "step": 28690 }, { "epoch": 0.6164883790866521, "grad_norm": 0.5106655693293612, "learning_rate": 6.535568091716036e-06, "loss": 0.7168, "step": 28700 }, { "epoch": 0.6167031833999227, "grad_norm": 0.5047058090402041, "learning_rate": 6.529174528007496e-06, "loss": 0.7412, "step": 28710 }, { "epoch": 0.6169179877131933, "grad_norm": 0.5197711629668383, "learning_rate": 6.522782577010815e-06, "loss": 0.7214, "step": 28720 }, { "epoch": 0.6171327920264639, "grad_norm": 0.4994718882123556, "learning_rate": 6.516392241696004e-06, "loss": 0.7331, "step": 28730 }, { "epoch": 0.6173475963397345, "grad_norm": 0.5072482739785458, "learning_rate": 6.5100035250323155e-06, "loss": 0.7052, "step": 28740 }, { "epoch": 0.6175624006530052, "grad_norm": 0.4899692225682852, "learning_rate": 6.503616429988253e-06, "loss": 0.6999, "step": 28750 }, { "epoch": 0.6177772049662758, "grad_norm": 0.5047645583660254, "learning_rate": 6.497230959531573e-06, "loss": 0.7141, "step": 28760 }, { "epoch": 0.6179920092795463, "grad_norm": 0.5144621325420531, "learning_rate": 6.490847116629267e-06, "loss": 0.721, "step": 28770 }, { "epoch": 0.6182068135928169, "grad_norm": 0.5073195245109307, "learning_rate": 6.484464904247573e-06, "loss": 0.7095, "step": 28780 }, { "epoch": 0.6184216179060875, "grad_norm": 0.5122140072591916, "learning_rate": 6.4780843253519766e-06, "loss": 0.7085, "step": 28790 }, { "epoch": 0.6186364222193582, "grad_norm": 0.5103737664691493, "learning_rate": 6.471705382907194e-06, "loss": 0.7023, "step": 28800 }, { "epoch": 0.6188512265326288, "grad_norm": 0.49611210872837896, "learning_rate": 6.465328079877196e-06, "loss": 0.7027, "step": 28810 }, { "epoch": 0.6190660308458994, "grad_norm": 0.5196971341780113, "learning_rate": 6.458952419225175e-06, "loss": 0.7147, "step": 28820 }, { "epoch": 0.61928083515917, "grad_norm": 0.5200694337234366, "learning_rate": 6.452578403913577e-06, "loss": 0.7129, "step": 28830 }, { "epoch": 0.6194956394724406, "grad_norm": 0.49974616265603755, "learning_rate": 6.446206036904068e-06, "loss": 0.7025, "step": 28840 }, { "epoch": 0.6197104437857112, "grad_norm": 0.5036120598369263, "learning_rate": 6.439835321157561e-06, "loss": 0.7117, "step": 28850 }, { "epoch": 0.6199252480989819, "grad_norm": 0.5312724368454428, "learning_rate": 6.433466259634191e-06, "loss": 0.7174, "step": 28860 }, { "epoch": 0.6201400524122525, "grad_norm": 0.5424611207118475, "learning_rate": 6.427098855293328e-06, "loss": 0.7106, "step": 28870 }, { "epoch": 0.620354856725523, "grad_norm": 0.5100284315892076, "learning_rate": 6.42073311109358e-06, "loss": 0.7226, "step": 28880 }, { "epoch": 0.6205696610387936, "grad_norm": 0.5173898778049251, "learning_rate": 6.414369029992771e-06, "loss": 0.7311, "step": 28890 }, { "epoch": 0.6207844653520642, "grad_norm": 0.5125989979068201, "learning_rate": 6.408006614947963e-06, "loss": 0.7136, "step": 28900 }, { "epoch": 0.6209992696653349, "grad_norm": 0.5241560351725514, "learning_rate": 6.401645868915434e-06, "loss": 0.7309, "step": 28910 }, { "epoch": 0.6212140739786055, "grad_norm": 0.5326577223760485, "learning_rate": 6.39528679485069e-06, "loss": 0.7222, "step": 28920 }, { "epoch": 0.6214288782918761, "grad_norm": 0.5412010757518007, "learning_rate": 6.388929395708469e-06, "loss": 0.7275, "step": 28930 }, { "epoch": 0.6216436826051467, "grad_norm": 0.501280757518304, "learning_rate": 6.382573674442719e-06, "loss": 0.7043, "step": 28940 }, { "epoch": 0.6218584869184173, "grad_norm": 0.5035153061061383, "learning_rate": 6.376219634006614e-06, "loss": 0.72, "step": 28950 }, { "epoch": 0.622073291231688, "grad_norm": 0.5006644086233506, "learning_rate": 6.3698672773525414e-06, "loss": 0.7375, "step": 28960 }, { "epoch": 0.6222880955449586, "grad_norm": 0.5148002471276852, "learning_rate": 6.363516607432116e-06, "loss": 0.7029, "step": 28970 }, { "epoch": 0.6225028998582292, "grad_norm": 0.5117306851220371, "learning_rate": 6.357167627196164e-06, "loss": 0.7268, "step": 28980 }, { "epoch": 0.6227177041714997, "grad_norm": 0.5040793669298755, "learning_rate": 6.35082033959472e-06, "loss": 0.7366, "step": 28990 }, { "epoch": 0.6229325084847703, "grad_norm": 0.511200471454493, "learning_rate": 6.344474747577043e-06, "loss": 0.7241, "step": 29000 }, { "epoch": 0.623147312798041, "grad_norm": 0.5050786822698712, "learning_rate": 6.338130854091595e-06, "loss": 0.7216, "step": 29010 }, { "epoch": 0.6233621171113116, "grad_norm": 0.5058176815929498, "learning_rate": 6.331788662086058e-06, "loss": 0.7058, "step": 29020 }, { "epoch": 0.6235769214245822, "grad_norm": 0.4981789798742353, "learning_rate": 6.325448174507312e-06, "loss": 0.7211, "step": 29030 }, { "epoch": 0.6237917257378528, "grad_norm": 0.496475062930444, "learning_rate": 6.319109394301459e-06, "loss": 0.7246, "step": 29040 }, { "epoch": 0.6240065300511234, "grad_norm": 0.5206633397961193, "learning_rate": 6.312772324413798e-06, "loss": 0.719, "step": 29050 }, { "epoch": 0.6242213343643941, "grad_norm": 0.5147683511194359, "learning_rate": 6.30643696778883e-06, "loss": 0.7319, "step": 29060 }, { "epoch": 0.6244361386776647, "grad_norm": 0.5013409604266713, "learning_rate": 6.300103327370272e-06, "loss": 0.7138, "step": 29070 }, { "epoch": 0.6246509429909353, "grad_norm": 0.5448909474545818, "learning_rate": 6.29377140610103e-06, "loss": 0.7219, "step": 29080 }, { "epoch": 0.6248657473042059, "grad_norm": 0.4980788653525116, "learning_rate": 6.287441206923225e-06, "loss": 0.7054, "step": 29090 }, { "epoch": 0.6250805516174764, "grad_norm": 0.5063361287454743, "learning_rate": 6.28111273277817e-06, "loss": 0.7291, "step": 29100 }, { "epoch": 0.6252953559307471, "grad_norm": 0.5079410093076961, "learning_rate": 6.274785986606371e-06, "loss": 0.7176, "step": 29110 }, { "epoch": 0.6255101602440177, "grad_norm": 0.5169067311424639, "learning_rate": 6.2684609713475454e-06, "loss": 0.7153, "step": 29120 }, { "epoch": 0.6257249645572883, "grad_norm": 0.5053854343865869, "learning_rate": 6.26213768994059e-06, "loss": 0.7257, "step": 29130 }, { "epoch": 0.6259397688705589, "grad_norm": 0.5019853299444441, "learning_rate": 6.255816145323613e-06, "loss": 0.7243, "step": 29140 }, { "epoch": 0.6261545731838295, "grad_norm": 0.5136425354507431, "learning_rate": 6.249496340433903e-06, "loss": 0.7133, "step": 29150 }, { "epoch": 0.6263693774971001, "grad_norm": 0.4961378621057317, "learning_rate": 6.243178278207944e-06, "loss": 0.7201, "step": 29160 }, { "epoch": 0.6265841818103708, "grad_norm": 0.500774286297422, "learning_rate": 6.236861961581413e-06, "loss": 0.7183, "step": 29170 }, { "epoch": 0.6267989861236414, "grad_norm": 0.5360545274720105, "learning_rate": 6.230547393489166e-06, "loss": 0.7197, "step": 29180 }, { "epoch": 0.627013790436912, "grad_norm": 0.5298466892431664, "learning_rate": 6.224234576865264e-06, "loss": 0.7137, "step": 29190 }, { "epoch": 0.6272285947501826, "grad_norm": 0.5397675431496055, "learning_rate": 6.217923514642938e-06, "loss": 0.7302, "step": 29200 }, { "epoch": 0.6274433990634531, "grad_norm": 0.5102301973810862, "learning_rate": 6.211614209754615e-06, "loss": 0.7001, "step": 29210 }, { "epoch": 0.6276582033767238, "grad_norm": 0.49821596276059255, "learning_rate": 6.205306665131892e-06, "loss": 0.7234, "step": 29220 }, { "epoch": 0.6278730076899944, "grad_norm": 0.5107970378794817, "learning_rate": 6.199000883705563e-06, "loss": 0.7074, "step": 29230 }, { "epoch": 0.628087812003265, "grad_norm": 0.5262658842700451, "learning_rate": 6.192696868405598e-06, "loss": 0.7082, "step": 29240 }, { "epoch": 0.6283026163165356, "grad_norm": 0.49640493413761044, "learning_rate": 6.186394622161136e-06, "loss": 0.7117, "step": 29250 }, { "epoch": 0.6285174206298062, "grad_norm": 0.520113591142662, "learning_rate": 6.1800941479005125e-06, "loss": 0.7089, "step": 29260 }, { "epoch": 0.6287322249430769, "grad_norm": 0.5152262365486174, "learning_rate": 6.173795448551223e-06, "loss": 0.7041, "step": 29270 }, { "epoch": 0.6289470292563475, "grad_norm": 0.5044955000995295, "learning_rate": 6.16749852703995e-06, "loss": 0.7066, "step": 29280 }, { "epoch": 0.6291618335696181, "grad_norm": 0.5039566573378436, "learning_rate": 6.161203386292539e-06, "loss": 0.7168, "step": 29290 }, { "epoch": 0.6293766378828887, "grad_norm": 0.5132657302646363, "learning_rate": 6.154910029234022e-06, "loss": 0.7282, "step": 29300 }, { "epoch": 0.6295914421961593, "grad_norm": 0.501215277645178, "learning_rate": 6.148618458788589e-06, "loss": 0.7125, "step": 29310 }, { "epoch": 0.62980624650943, "grad_norm": 0.5171520975134748, "learning_rate": 6.142328677879604e-06, "loss": 0.7201, "step": 29320 }, { "epoch": 0.6300210508227005, "grad_norm": 0.5093541985407058, "learning_rate": 6.136040689429606e-06, "loss": 0.7249, "step": 29330 }, { "epoch": 0.6302358551359711, "grad_norm": 0.5038211596059813, "learning_rate": 6.1297544963602874e-06, "loss": 0.7116, "step": 29340 }, { "epoch": 0.6304506594492417, "grad_norm": 0.499683261524144, "learning_rate": 6.123470101592524e-06, "loss": 0.7168, "step": 29350 }, { "epoch": 0.6306654637625123, "grad_norm": 0.5524946516291463, "learning_rate": 6.1171875080463425e-06, "loss": 0.7087, "step": 29360 }, { "epoch": 0.630880268075783, "grad_norm": 0.5018295390971191, "learning_rate": 6.1109067186409365e-06, "loss": 0.7219, "step": 29370 }, { "epoch": 0.6310950723890536, "grad_norm": 0.53849226672192, "learning_rate": 6.104627736294664e-06, "loss": 0.7008, "step": 29380 }, { "epoch": 0.6313098767023242, "grad_norm": 0.5134656847802564, "learning_rate": 6.098350563925035e-06, "loss": 0.7165, "step": 29390 }, { "epoch": 0.6315246810155948, "grad_norm": 0.5395193132941271, "learning_rate": 6.092075204448734e-06, "loss": 0.7263, "step": 29400 }, { "epoch": 0.6317394853288654, "grad_norm": 0.5291663702673787, "learning_rate": 6.085801660781585e-06, "loss": 0.7226, "step": 29410 }, { "epoch": 0.6319542896421361, "grad_norm": 0.4920471964797305, "learning_rate": 6.079529935838584e-06, "loss": 0.7253, "step": 29420 }, { "epoch": 0.6321690939554067, "grad_norm": 0.5091308822653934, "learning_rate": 6.0732600325338745e-06, "loss": 0.7084, "step": 29430 }, { "epoch": 0.6323838982686772, "grad_norm": 0.5083144836724668, "learning_rate": 6.066991953780748e-06, "loss": 0.7096, "step": 29440 }, { "epoch": 0.6325987025819478, "grad_norm": 0.5553675193548707, "learning_rate": 6.06072570249166e-06, "loss": 0.7189, "step": 29450 }, { "epoch": 0.6328135068952184, "grad_norm": 0.5042084082023858, "learning_rate": 6.054461281578206e-06, "loss": 0.7076, "step": 29460 }, { "epoch": 0.633028311208489, "grad_norm": 0.4889129315159301, "learning_rate": 6.048198693951142e-06, "loss": 0.7034, "step": 29470 }, { "epoch": 0.6332431155217597, "grad_norm": 0.49120737847508095, "learning_rate": 6.041937942520363e-06, "loss": 0.7162, "step": 29480 }, { "epoch": 0.6334579198350303, "grad_norm": 0.5106224481761457, "learning_rate": 6.035679030194917e-06, "loss": 0.7108, "step": 29490 }, { "epoch": 0.6336727241483009, "grad_norm": 0.5142715172785339, "learning_rate": 6.02942195988299e-06, "loss": 0.7182, "step": 29500 }, { "epoch": 0.6338875284615715, "grad_norm": 0.5245286567422575, "learning_rate": 6.023166734491919e-06, "loss": 0.7069, "step": 29510 }, { "epoch": 0.6341023327748421, "grad_norm": 0.49896046500592833, "learning_rate": 6.0169133569281835e-06, "loss": 0.7217, "step": 29520 }, { "epoch": 0.6343171370881128, "grad_norm": 0.5076374052572614, "learning_rate": 6.010661830097399e-06, "loss": 0.7244, "step": 29530 }, { "epoch": 0.6345319414013834, "grad_norm": 0.5036960875426778, "learning_rate": 6.004412156904329e-06, "loss": 0.7246, "step": 29540 }, { "epoch": 0.634746745714654, "grad_norm": 0.5190186034362325, "learning_rate": 5.9981643402528675e-06, "loss": 0.7181, "step": 29550 }, { "epoch": 0.6349615500279245, "grad_norm": 0.5185275006081458, "learning_rate": 5.991918383046047e-06, "loss": 0.7171, "step": 29560 }, { "epoch": 0.6351763543411951, "grad_norm": 0.5092203362213314, "learning_rate": 5.985674288186045e-06, "loss": 0.7282, "step": 29570 }, { "epoch": 0.6353911586544658, "grad_norm": 0.4997149128548273, "learning_rate": 5.979432058574164e-06, "loss": 0.7174, "step": 29580 }, { "epoch": 0.6356059629677364, "grad_norm": 0.5068981922104722, "learning_rate": 5.973191697110845e-06, "loss": 0.7141, "step": 29590 }, { "epoch": 0.635820767281007, "grad_norm": 0.5110915167748981, "learning_rate": 5.966953206695653e-06, "loss": 0.7102, "step": 29600 }, { "epoch": 0.6360355715942776, "grad_norm": 0.511793837071635, "learning_rate": 5.960716590227298e-06, "loss": 0.7029, "step": 29610 }, { "epoch": 0.6362503759075482, "grad_norm": 0.5010481752806605, "learning_rate": 5.954481850603606e-06, "loss": 0.7052, "step": 29620 }, { "epoch": 0.6364651802208189, "grad_norm": 0.5184653587723411, "learning_rate": 5.9482489907215324e-06, "loss": 0.709, "step": 29630 }, { "epoch": 0.6366799845340895, "grad_norm": 0.5186353614978536, "learning_rate": 5.9420180134771735e-06, "loss": 0.7256, "step": 29640 }, { "epoch": 0.6368947888473601, "grad_norm": 0.5090791214108952, "learning_rate": 5.935788921765728e-06, "loss": 0.7125, "step": 29650 }, { "epoch": 0.6371095931606306, "grad_norm": 0.5182097600656689, "learning_rate": 5.92956171848154e-06, "loss": 0.7095, "step": 29660 }, { "epoch": 0.6373243974739012, "grad_norm": 0.49906621293560977, "learning_rate": 5.923336406518059e-06, "loss": 0.7179, "step": 29670 }, { "epoch": 0.6375392017871719, "grad_norm": 0.49207042909075416, "learning_rate": 5.91711298876787e-06, "loss": 0.7141, "step": 29680 }, { "epoch": 0.6377540061004425, "grad_norm": 0.491835738604546, "learning_rate": 5.910891468122668e-06, "loss": 0.7078, "step": 29690 }, { "epoch": 0.6379688104137131, "grad_norm": 0.5078817311304347, "learning_rate": 5.9046718474732666e-06, "loss": 0.7072, "step": 29700 }, { "epoch": 0.6381836147269837, "grad_norm": 0.5124871601736757, "learning_rate": 5.898454129709606e-06, "loss": 0.7202, "step": 29710 }, { "epoch": 0.6383984190402543, "grad_norm": 0.5134905108899009, "learning_rate": 5.8922383177207286e-06, "loss": 0.7282, "step": 29720 }, { "epoch": 0.6386132233535249, "grad_norm": 0.5155981047848882, "learning_rate": 5.886024414394806e-06, "loss": 0.7113, "step": 29730 }, { "epoch": 0.6388280276667956, "grad_norm": 0.5060176620986123, "learning_rate": 5.879812422619111e-06, "loss": 0.7152, "step": 29740 }, { "epoch": 0.6390428319800662, "grad_norm": 0.5031393620149552, "learning_rate": 5.873602345280033e-06, "loss": 0.7171, "step": 29750 }, { "epoch": 0.6392576362933368, "grad_norm": 0.5091217506585529, "learning_rate": 5.8673941852630735e-06, "loss": 0.7072, "step": 29760 }, { "epoch": 0.6394724406066074, "grad_norm": 0.5124995237491698, "learning_rate": 5.861187945452836e-06, "loss": 0.7179, "step": 29770 }, { "epoch": 0.6396872449198779, "grad_norm": 0.5111197620823217, "learning_rate": 5.854983628733046e-06, "loss": 0.7265, "step": 29780 }, { "epoch": 0.6399020492331486, "grad_norm": 0.5121226878357348, "learning_rate": 5.848781237986516e-06, "loss": 0.7058, "step": 29790 }, { "epoch": 0.6401168535464192, "grad_norm": 0.5029940756710116, "learning_rate": 5.842580776095186e-06, "loss": 0.7242, "step": 29800 }, { "epoch": 0.6403316578596898, "grad_norm": 0.518317509648807, "learning_rate": 5.8363822459400766e-06, "loss": 0.7122, "step": 29810 }, { "epoch": 0.6405464621729604, "grad_norm": 0.513796209625438, "learning_rate": 5.830185650401327e-06, "loss": 0.7109, "step": 29820 }, { "epoch": 0.640761266486231, "grad_norm": 0.5065915184714491, "learning_rate": 5.82399099235817e-06, "loss": 0.7185, "step": 29830 }, { "epoch": 0.6409760707995017, "grad_norm": 0.5383003131895742, "learning_rate": 5.817798274688945e-06, "loss": 0.7149, "step": 29840 }, { "epoch": 0.6411908751127723, "grad_norm": 0.5013183139637445, "learning_rate": 5.811607500271086e-06, "loss": 0.7061, "step": 29850 }, { "epoch": 0.6414056794260429, "grad_norm": 0.5084891027020223, "learning_rate": 5.805418671981112e-06, "loss": 0.7078, "step": 29860 }, { "epoch": 0.6416204837393135, "grad_norm": 0.5358856173892373, "learning_rate": 5.7992317926946666e-06, "loss": 0.7059, "step": 29870 }, { "epoch": 0.641835288052584, "grad_norm": 0.5118937595826556, "learning_rate": 5.793046865286457e-06, "loss": 0.7084, "step": 29880 }, { "epoch": 0.6420500923658548, "grad_norm": 0.5289502239664226, "learning_rate": 5.786863892630301e-06, "loss": 0.7186, "step": 29890 }, { "epoch": 0.6422648966791253, "grad_norm": 0.5030440160930806, "learning_rate": 5.78068287759911e-06, "loss": 0.7093, "step": 29900 }, { "epoch": 0.6424797009923959, "grad_norm": 0.5013233963022401, "learning_rate": 5.774503823064865e-06, "loss": 0.7046, "step": 29910 }, { "epoch": 0.6426945053056665, "grad_norm": 0.5218815308682272, "learning_rate": 5.768326731898668e-06, "loss": 0.7093, "step": 29920 }, { "epoch": 0.6429093096189371, "grad_norm": 0.4911776039589, "learning_rate": 5.762151606970681e-06, "loss": 0.6985, "step": 29930 }, { "epoch": 0.6431241139322078, "grad_norm": 0.4990229286965237, "learning_rate": 5.755978451150165e-06, "loss": 0.7039, "step": 29940 }, { "epoch": 0.6433389182454784, "grad_norm": 0.5017215398112681, "learning_rate": 5.749807267305469e-06, "loss": 0.719, "step": 29950 }, { "epoch": 0.643553722558749, "grad_norm": 0.49447190334501206, "learning_rate": 5.743638058304009e-06, "loss": 0.7096, "step": 29960 }, { "epoch": 0.6437685268720196, "grad_norm": 0.4932132883026573, "learning_rate": 5.737470827012309e-06, "loss": 0.719, "step": 29970 }, { "epoch": 0.6439833311852902, "grad_norm": 0.4990600666743509, "learning_rate": 5.731305576295951e-06, "loss": 0.7119, "step": 29980 }, { "epoch": 0.6441981354985609, "grad_norm": 0.49829438730309705, "learning_rate": 5.72514230901961e-06, "loss": 0.7111, "step": 29990 }, { "epoch": 0.6444129398118315, "grad_norm": 0.5142379504720469, "learning_rate": 5.7189810280470374e-06, "loss": 0.7182, "step": 30000 }, { "epoch": 0.644627744125102, "grad_norm": 0.5004279772636778, "learning_rate": 5.71282173624105e-06, "loss": 0.7123, "step": 30010 }, { "epoch": 0.6448425484383726, "grad_norm": 0.4997893834727797, "learning_rate": 5.706664436463563e-06, "loss": 0.7069, "step": 30020 }, { "epoch": 0.6450573527516432, "grad_norm": 0.5224573953092382, "learning_rate": 5.700509131575538e-06, "loss": 0.7315, "step": 30030 }, { "epoch": 0.6452721570649138, "grad_norm": 0.5157124232819824, "learning_rate": 5.6943558244370435e-06, "loss": 0.7085, "step": 30040 }, { "epoch": 0.6454869613781845, "grad_norm": 0.5098311956322868, "learning_rate": 5.688204517907184e-06, "loss": 0.698, "step": 30050 }, { "epoch": 0.6457017656914551, "grad_norm": 0.5585768535083954, "learning_rate": 5.6820552148441595e-06, "loss": 0.721, "step": 30060 }, { "epoch": 0.6459165700047257, "grad_norm": 0.5191401680283296, "learning_rate": 5.675907918105233e-06, "loss": 0.718, "step": 30070 }, { "epoch": 0.6461313743179963, "grad_norm": 0.5154719384443771, "learning_rate": 5.669762630546722e-06, "loss": 0.7135, "step": 30080 }, { "epoch": 0.6463461786312669, "grad_norm": 0.5161802110337572, "learning_rate": 5.663619355024037e-06, "loss": 0.7299, "step": 30090 }, { "epoch": 0.6465609829445376, "grad_norm": 0.5251836486068403, "learning_rate": 5.6574780943916265e-06, "loss": 0.7139, "step": 30100 }, { "epoch": 0.6467757872578082, "grad_norm": 0.5253745077383208, "learning_rate": 5.651338851503017e-06, "loss": 0.7081, "step": 30110 }, { "epoch": 0.6469905915710787, "grad_norm": 0.499068951345218, "learning_rate": 5.645201629210802e-06, "loss": 0.7118, "step": 30120 }, { "epoch": 0.6472053958843493, "grad_norm": 0.5212681373915045, "learning_rate": 5.639066430366616e-06, "loss": 0.7058, "step": 30130 }, { "epoch": 0.6474202001976199, "grad_norm": 0.5192551695189365, "learning_rate": 5.63293325782118e-06, "loss": 0.7209, "step": 30140 }, { "epoch": 0.6476350045108906, "grad_norm": 0.5009778176216481, "learning_rate": 5.626802114424252e-06, "loss": 0.7147, "step": 30150 }, { "epoch": 0.6478498088241612, "grad_norm": 0.511657473010408, "learning_rate": 5.620673003024654e-06, "loss": 0.7111, "step": 30160 }, { "epoch": 0.6480646131374318, "grad_norm": 0.4946400904637439, "learning_rate": 5.614545926470272e-06, "loss": 0.7039, "step": 30170 }, { "epoch": 0.6482794174507024, "grad_norm": 0.5452566834347506, "learning_rate": 5.608420887608033e-06, "loss": 0.7124, "step": 30180 }, { "epoch": 0.648494221763973, "grad_norm": 0.5187590204840195, "learning_rate": 5.60229788928393e-06, "loss": 0.7138, "step": 30190 }, { "epoch": 0.6487090260772437, "grad_norm": 0.4961448077999401, "learning_rate": 5.5961769343429885e-06, "loss": 0.7088, "step": 30200 }, { "epoch": 0.6489238303905143, "grad_norm": 0.49664867664736395, "learning_rate": 5.590058025629315e-06, "loss": 0.7043, "step": 30210 }, { "epoch": 0.6491386347037849, "grad_norm": 0.5089161076297286, "learning_rate": 5.5839411659860355e-06, "loss": 0.696, "step": 30220 }, { "epoch": 0.6493534390170554, "grad_norm": 0.5007244262694044, "learning_rate": 5.577826358255339e-06, "loss": 0.7104, "step": 30230 }, { "epoch": 0.649568243330326, "grad_norm": 0.5116267642012309, "learning_rate": 5.57171360527846e-06, "loss": 0.7176, "step": 30240 }, { "epoch": 0.6497830476435967, "grad_norm": 0.5110539680425629, "learning_rate": 5.5656029098956755e-06, "loss": 0.7176, "step": 30250 }, { "epoch": 0.6499978519568673, "grad_norm": 0.5087432238272799, "learning_rate": 5.559494274946311e-06, "loss": 0.7045, "step": 30260 }, { "epoch": 0.6502126562701379, "grad_norm": 0.5049786677025178, "learning_rate": 5.553387703268725e-06, "loss": 0.7105, "step": 30270 }, { "epoch": 0.6504274605834085, "grad_norm": 0.5099641743518565, "learning_rate": 5.547283197700324e-06, "loss": 0.7056, "step": 30280 }, { "epoch": 0.6506422648966791, "grad_norm": 0.5143331023536819, "learning_rate": 5.541180761077556e-06, "loss": 0.6937, "step": 30290 }, { "epoch": 0.6508570692099498, "grad_norm": 0.49683778878230767, "learning_rate": 5.535080396235906e-06, "loss": 0.7087, "step": 30300 }, { "epoch": 0.6510718735232204, "grad_norm": 0.533544500826429, "learning_rate": 5.528982106009899e-06, "loss": 0.7144, "step": 30310 }, { "epoch": 0.651286677836491, "grad_norm": 0.5151062048248995, "learning_rate": 5.5228858932330845e-06, "loss": 0.7009, "step": 30320 }, { "epoch": 0.6515014821497616, "grad_norm": 0.5122586500692712, "learning_rate": 5.5167917607380605e-06, "loss": 0.7031, "step": 30330 }, { "epoch": 0.6517162864630321, "grad_norm": 0.5212961303785209, "learning_rate": 5.510699711356451e-06, "loss": 0.6934, "step": 30340 }, { "epoch": 0.6519310907763027, "grad_norm": 0.5100624104875204, "learning_rate": 5.504609747918916e-06, "loss": 0.7259, "step": 30350 }, { "epoch": 0.6521458950895734, "grad_norm": 0.5072539914616855, "learning_rate": 5.498521873255145e-06, "loss": 0.7156, "step": 30360 }, { "epoch": 0.652360699402844, "grad_norm": 0.5025326619318836, "learning_rate": 5.4924360901938574e-06, "loss": 0.6881, "step": 30370 }, { "epoch": 0.6525755037161146, "grad_norm": 0.5194768705340874, "learning_rate": 5.486352401562796e-06, "loss": 0.6997, "step": 30380 }, { "epoch": 0.6527903080293852, "grad_norm": 0.4926980584558694, "learning_rate": 5.480270810188732e-06, "loss": 0.7079, "step": 30390 }, { "epoch": 0.6530051123426558, "grad_norm": 0.5301706061164216, "learning_rate": 5.4741913188974705e-06, "loss": 0.7148, "step": 30400 }, { "epoch": 0.6532199166559265, "grad_norm": 0.5077273663914399, "learning_rate": 5.468113930513831e-06, "loss": 0.702, "step": 30410 }, { "epoch": 0.6534347209691971, "grad_norm": 0.5113783715432562, "learning_rate": 5.462038647861659e-06, "loss": 0.7237, "step": 30420 }, { "epoch": 0.6536495252824677, "grad_norm": 0.5350113300965483, "learning_rate": 5.455965473763824e-06, "loss": 0.7077, "step": 30430 }, { "epoch": 0.6538643295957383, "grad_norm": 0.534802331833065, "learning_rate": 5.449894411042218e-06, "loss": 0.7098, "step": 30440 }, { "epoch": 0.6540791339090088, "grad_norm": 0.505768970566755, "learning_rate": 5.4438254625177376e-06, "loss": 0.7037, "step": 30450 }, { "epoch": 0.6542939382222795, "grad_norm": 0.5243742881101912, "learning_rate": 5.437758631010313e-06, "loss": 0.7184, "step": 30460 }, { "epoch": 0.6545087425355501, "grad_norm": 0.5174893952785108, "learning_rate": 5.431693919338883e-06, "loss": 0.7074, "step": 30470 }, { "epoch": 0.6547235468488207, "grad_norm": 0.5018291601456127, "learning_rate": 5.425631330321403e-06, "loss": 0.7181, "step": 30480 }, { "epoch": 0.6549383511620913, "grad_norm": 0.509935560750042, "learning_rate": 5.41957086677485e-06, "loss": 0.7272, "step": 30490 }, { "epoch": 0.6551531554753619, "grad_norm": 0.5106281701236529, "learning_rate": 5.413512531515195e-06, "loss": 0.7181, "step": 30500 }, { "epoch": 0.6553679597886326, "grad_norm": 0.5021707092734177, "learning_rate": 5.407456327357437e-06, "loss": 0.7122, "step": 30510 }, { "epoch": 0.6555827641019032, "grad_norm": 0.5211648146663314, "learning_rate": 5.401402257115576e-06, "loss": 0.7042, "step": 30520 }, { "epoch": 0.6557975684151738, "grad_norm": 0.5108248508895996, "learning_rate": 5.395350323602624e-06, "loss": 0.7027, "step": 30530 }, { "epoch": 0.6560123727284444, "grad_norm": 0.504994079751055, "learning_rate": 5.389300529630603e-06, "loss": 0.7124, "step": 30540 }, { "epoch": 0.656227177041715, "grad_norm": 0.5035319701169906, "learning_rate": 5.383252878010528e-06, "loss": 0.7117, "step": 30550 }, { "epoch": 0.6564419813549857, "grad_norm": 0.5140239243917751, "learning_rate": 5.3772073715524405e-06, "loss": 0.7143, "step": 30560 }, { "epoch": 0.6566567856682562, "grad_norm": 0.5014878071394002, "learning_rate": 5.371164013065362e-06, "loss": 0.7013, "step": 30570 }, { "epoch": 0.6568715899815268, "grad_norm": 0.530273663858455, "learning_rate": 5.365122805357331e-06, "loss": 0.7137, "step": 30580 }, { "epoch": 0.6570863942947974, "grad_norm": 0.5249382641477562, "learning_rate": 5.3590837512353855e-06, "loss": 0.7093, "step": 30590 }, { "epoch": 0.657301198608068, "grad_norm": 0.5048796782346253, "learning_rate": 5.353046853505548e-06, "loss": 0.7091, "step": 30600 }, { "epoch": 0.6575160029213387, "grad_norm": 0.5091027814069775, "learning_rate": 5.347012114972865e-06, "loss": 0.7028, "step": 30610 }, { "epoch": 0.6577308072346093, "grad_norm": 0.5113428171167113, "learning_rate": 5.34097953844135e-06, "loss": 0.7142, "step": 30620 }, { "epoch": 0.6579456115478799, "grad_norm": 0.4914977814112626, "learning_rate": 5.334949126714044e-06, "loss": 0.7137, "step": 30630 }, { "epoch": 0.6581604158611505, "grad_norm": 0.4964243055566761, "learning_rate": 5.328920882592953e-06, "loss": 0.7087, "step": 30640 }, { "epoch": 0.6583752201744211, "grad_norm": 0.5207847747861839, "learning_rate": 5.322894808879091e-06, "loss": 0.7134, "step": 30650 }, { "epoch": 0.6585900244876917, "grad_norm": 0.5043669757937382, "learning_rate": 5.3168709083724664e-06, "loss": 0.7004, "step": 30660 }, { "epoch": 0.6588048288009624, "grad_norm": 0.511841340255158, "learning_rate": 5.310849183872059e-06, "loss": 0.7081, "step": 30670 }, { "epoch": 0.659019633114233, "grad_norm": 0.5084802396159405, "learning_rate": 5.304829638175866e-06, "loss": 0.7027, "step": 30680 }, { "epoch": 0.6592344374275035, "grad_norm": 0.5068077969636335, "learning_rate": 5.298812274080847e-06, "loss": 0.7068, "step": 30690 }, { "epoch": 0.6594492417407741, "grad_norm": 0.5156077038006672, "learning_rate": 5.29279709438296e-06, "loss": 0.7155, "step": 30700 }, { "epoch": 0.6596640460540447, "grad_norm": 0.5004198196593579, "learning_rate": 5.286784101877149e-06, "loss": 0.7049, "step": 30710 }, { "epoch": 0.6598788503673154, "grad_norm": 0.5306581949971477, "learning_rate": 5.2807732993573295e-06, "loss": 0.7078, "step": 30720 }, { "epoch": 0.660093654680586, "grad_norm": 0.5036574433980202, "learning_rate": 5.2747646896164215e-06, "loss": 0.6997, "step": 30730 }, { "epoch": 0.6603084589938566, "grad_norm": 0.52149079735347, "learning_rate": 5.268758275446303e-06, "loss": 0.7228, "step": 30740 }, { "epoch": 0.6605232633071272, "grad_norm": 0.5079017436216969, "learning_rate": 5.262754059637845e-06, "loss": 0.7057, "step": 30750 }, { "epoch": 0.6607380676203978, "grad_norm": 0.5013233677412027, "learning_rate": 5.2567520449808975e-06, "loss": 0.7175, "step": 30760 }, { "epoch": 0.6609528719336685, "grad_norm": 0.5172472025182714, "learning_rate": 5.2507522342642725e-06, "loss": 0.7074, "step": 30770 }, { "epoch": 0.6611676762469391, "grad_norm": 0.5302279379741301, "learning_rate": 5.244754630275786e-06, "loss": 0.7046, "step": 30780 }, { "epoch": 0.6613824805602097, "grad_norm": 0.5103884640220524, "learning_rate": 5.2387592358021954e-06, "loss": 0.7087, "step": 30790 }, { "epoch": 0.6615972848734802, "grad_norm": 0.5266498719563615, "learning_rate": 5.2327660536292655e-06, "loss": 0.7208, "step": 30800 }, { "epoch": 0.6618120891867508, "grad_norm": 0.547112136398287, "learning_rate": 5.226775086541705e-06, "loss": 0.7246, "step": 30810 }, { "epoch": 0.6620268935000215, "grad_norm": 0.5134807747932422, "learning_rate": 5.220786337323205e-06, "loss": 0.7038, "step": 30820 }, { "epoch": 0.6622416978132921, "grad_norm": 0.5069417485434651, "learning_rate": 5.214799808756432e-06, "loss": 0.7267, "step": 30830 }, { "epoch": 0.6624565021265627, "grad_norm": 0.5112475490765994, "learning_rate": 5.208815503623001e-06, "loss": 0.7049, "step": 30840 }, { "epoch": 0.6626713064398333, "grad_norm": 0.49798758139115207, "learning_rate": 5.202833424703524e-06, "loss": 0.7049, "step": 30850 }, { "epoch": 0.6628861107531039, "grad_norm": 0.5208558474736136, "learning_rate": 5.196853574777547e-06, "loss": 0.7054, "step": 30860 }, { "epoch": 0.6631009150663746, "grad_norm": 0.5143732263552888, "learning_rate": 5.190875956623602e-06, "loss": 0.7161, "step": 30870 }, { "epoch": 0.6633157193796452, "grad_norm": 0.5072200840559442, "learning_rate": 5.184900573019179e-06, "loss": 0.7143, "step": 30880 }, { "epoch": 0.6635305236929158, "grad_norm": 0.5148338042683931, "learning_rate": 5.1789274267407174e-06, "loss": 0.7197, "step": 30890 }, { "epoch": 0.6637453280061864, "grad_norm": 0.5355543429844896, "learning_rate": 5.172956520563641e-06, "loss": 0.6953, "step": 30900 }, { "epoch": 0.6639601323194569, "grad_norm": 0.5111517548781118, "learning_rate": 5.166987857262309e-06, "loss": 0.704, "step": 30910 }, { "epoch": 0.6641749366327275, "grad_norm": 0.5176946751784909, "learning_rate": 5.16102143961005e-06, "loss": 0.7041, "step": 30920 }, { "epoch": 0.6643897409459982, "grad_norm": 0.5096883894063213, "learning_rate": 5.155057270379149e-06, "loss": 0.7065, "step": 30930 }, { "epoch": 0.6646045452592688, "grad_norm": 0.5077111477162392, "learning_rate": 5.149095352340847e-06, "loss": 0.6996, "step": 30940 }, { "epoch": 0.6648193495725394, "grad_norm": 0.49554593063940544, "learning_rate": 5.143135688265337e-06, "loss": 0.7097, "step": 30950 }, { "epoch": 0.66503415388581, "grad_norm": 0.499479767542394, "learning_rate": 5.137178280921759e-06, "loss": 0.7176, "step": 30960 }, { "epoch": 0.6652489581990806, "grad_norm": 0.49131352882866786, "learning_rate": 5.131223133078213e-06, "loss": 0.7166, "step": 30970 }, { "epoch": 0.6654637625123513, "grad_norm": 0.5522986513731968, "learning_rate": 5.125270247501746e-06, "loss": 0.7076, "step": 30980 }, { "epoch": 0.6656785668256219, "grad_norm": 0.5241166130212557, "learning_rate": 5.119319626958354e-06, "loss": 0.7071, "step": 30990 }, { "epoch": 0.6658933711388925, "grad_norm": 0.5161672474322907, "learning_rate": 5.113371274212984e-06, "loss": 0.6892, "step": 31000 }, { "epoch": 0.666108175452163, "grad_norm": 0.5019103502653095, "learning_rate": 5.1074251920295194e-06, "loss": 0.6991, "step": 31010 }, { "epoch": 0.6663229797654336, "grad_norm": 0.5032297589781682, "learning_rate": 5.1014813831708035e-06, "loss": 0.7206, "step": 31020 }, { "epoch": 0.6665377840787043, "grad_norm": 0.5002814476280841, "learning_rate": 5.095539850398605e-06, "loss": 0.7048, "step": 31030 }, { "epoch": 0.6667525883919749, "grad_norm": 0.5156688113615284, "learning_rate": 5.089600596473649e-06, "loss": 0.7189, "step": 31040 }, { "epoch": 0.6669673927052455, "grad_norm": 0.5188971013023798, "learning_rate": 5.083663624155598e-06, "loss": 0.7107, "step": 31050 }, { "epoch": 0.6671821970185161, "grad_norm": 0.5203051062665321, "learning_rate": 5.077728936203055e-06, "loss": 0.7045, "step": 31060 }, { "epoch": 0.6673970013317867, "grad_norm": 0.5041312268909838, "learning_rate": 5.071796535373561e-06, "loss": 0.7156, "step": 31070 }, { "epoch": 0.6676118056450574, "grad_norm": 0.5159105151923982, "learning_rate": 5.065866424423589e-06, "loss": 0.6996, "step": 31080 }, { "epoch": 0.667826609958328, "grad_norm": 0.5186673038935381, "learning_rate": 5.059938606108554e-06, "loss": 0.6947, "step": 31090 }, { "epoch": 0.6680414142715986, "grad_norm": 0.5233119049957315, "learning_rate": 5.054013083182808e-06, "loss": 0.7172, "step": 31100 }, { "epoch": 0.6682562185848692, "grad_norm": 0.5067736527978581, "learning_rate": 5.048089858399632e-06, "loss": 0.7021, "step": 31110 }, { "epoch": 0.6684710228981398, "grad_norm": 0.5039632454745936, "learning_rate": 5.042168934511237e-06, "loss": 0.7059, "step": 31120 }, { "epoch": 0.6686858272114105, "grad_norm": 0.5504075401326516, "learning_rate": 5.036250314268775e-06, "loss": 0.7125, "step": 31130 }, { "epoch": 0.668900631524681, "grad_norm": 0.5140327017691517, "learning_rate": 5.030334000422311e-06, "loss": 0.716, "step": 31140 }, { "epoch": 0.6691154358379516, "grad_norm": 0.5174324057304772, "learning_rate": 5.024419995720854e-06, "loss": 0.7091, "step": 31150 }, { "epoch": 0.6693302401512222, "grad_norm": 0.5105348539745065, "learning_rate": 5.0185083029123326e-06, "loss": 0.7188, "step": 31160 }, { "epoch": 0.6695450444644928, "grad_norm": 0.5104216621284471, "learning_rate": 5.012598924743603e-06, "loss": 0.6973, "step": 31170 }, { "epoch": 0.6697598487777635, "grad_norm": 0.5015920565585724, "learning_rate": 5.006691863960448e-06, "loss": 0.7, "step": 31180 }, { "epoch": 0.6699746530910341, "grad_norm": 0.5246820583424576, "learning_rate": 5.000787123307562e-06, "loss": 0.7043, "step": 31190 }, { "epoch": 0.6701894574043047, "grad_norm": 0.5157821738943842, "learning_rate": 4.994884705528583e-06, "loss": 0.7145, "step": 31200 }, { "epoch": 0.6704042617175753, "grad_norm": 0.49631469998188016, "learning_rate": 4.9889846133660495e-06, "loss": 0.7218, "step": 31210 }, { "epoch": 0.6706190660308459, "grad_norm": 0.49915868212535197, "learning_rate": 4.983086849561427e-06, "loss": 0.7047, "step": 31220 }, { "epoch": 0.6708338703441165, "grad_norm": 0.5307081421883926, "learning_rate": 4.977191416855102e-06, "loss": 0.7166, "step": 31230 }, { "epoch": 0.6710486746573872, "grad_norm": 0.5169884060557652, "learning_rate": 4.971298317986374e-06, "loss": 0.7257, "step": 31240 }, { "epoch": 0.6712634789706577, "grad_norm": 0.5009977824029985, "learning_rate": 4.965407555693464e-06, "loss": 0.7004, "step": 31250 }, { "epoch": 0.6714782832839283, "grad_norm": 0.5155270739896842, "learning_rate": 4.9595191327134915e-06, "loss": 0.7153, "step": 31260 }, { "epoch": 0.6716930875971989, "grad_norm": 0.5044556594755597, "learning_rate": 4.9536330517825085e-06, "loss": 0.7048, "step": 31270 }, { "epoch": 0.6719078919104695, "grad_norm": 0.5050373362709072, "learning_rate": 4.947749315635467e-06, "loss": 0.7176, "step": 31280 }, { "epoch": 0.6721226962237402, "grad_norm": 0.4952372981882212, "learning_rate": 4.9418679270062345e-06, "loss": 0.7315, "step": 31290 }, { "epoch": 0.6723375005370108, "grad_norm": 0.5075005573307158, "learning_rate": 4.935988888627589e-06, "loss": 0.6999, "step": 31300 }, { "epoch": 0.6725523048502814, "grad_norm": 0.5237607258041448, "learning_rate": 4.930112203231202e-06, "loss": 0.7199, "step": 31310 }, { "epoch": 0.672767109163552, "grad_norm": 0.5140569272272469, "learning_rate": 4.924237873547678e-06, "loss": 0.7062, "step": 31320 }, { "epoch": 0.6729819134768226, "grad_norm": 0.5064496524470513, "learning_rate": 4.9183659023065035e-06, "loss": 0.7098, "step": 31330 }, { "epoch": 0.6731967177900933, "grad_norm": 0.49850427047279267, "learning_rate": 4.912496292236078e-06, "loss": 0.7127, "step": 31340 }, { "epoch": 0.6734115221033639, "grad_norm": 0.5129869200842382, "learning_rate": 4.906629046063709e-06, "loss": 0.6957, "step": 31350 }, { "epoch": 0.6736263264166344, "grad_norm": 0.5037255148710768, "learning_rate": 4.900764166515589e-06, "loss": 0.7053, "step": 31360 }, { "epoch": 0.673841130729905, "grad_norm": 0.5218708185856743, "learning_rate": 4.894901656316837e-06, "loss": 0.6969, "step": 31370 }, { "epoch": 0.6740559350431756, "grad_norm": 0.5106451275587679, "learning_rate": 4.889041518191442e-06, "loss": 0.7029, "step": 31380 }, { "epoch": 0.6742707393564463, "grad_norm": 0.504620024778387, "learning_rate": 4.883183754862319e-06, "loss": 0.7108, "step": 31390 }, { "epoch": 0.6744855436697169, "grad_norm": 0.500245116066161, "learning_rate": 4.877328369051254e-06, "loss": 0.7023, "step": 31400 }, { "epoch": 0.6747003479829875, "grad_norm": 0.4984112969568294, "learning_rate": 4.871475363478945e-06, "loss": 0.6984, "step": 31410 }, { "epoch": 0.6749151522962581, "grad_norm": 0.490340528966453, "learning_rate": 4.865624740864981e-06, "loss": 0.7171, "step": 31420 }, { "epoch": 0.6751299566095287, "grad_norm": 0.5154565088297944, "learning_rate": 4.859776503927831e-06, "loss": 0.7254, "step": 31430 }, { "epoch": 0.6753447609227994, "grad_norm": 0.5162711441600745, "learning_rate": 4.8539306553848835e-06, "loss": 0.7, "step": 31440 }, { "epoch": 0.67555956523607, "grad_norm": 0.5296439320774667, "learning_rate": 4.848087197952385e-06, "loss": 0.7201, "step": 31450 }, { "epoch": 0.6757743695493406, "grad_norm": 0.5211601804495921, "learning_rate": 4.842246134345492e-06, "loss": 0.7077, "step": 31460 }, { "epoch": 0.6759891738626111, "grad_norm": 0.5204672709973052, "learning_rate": 4.8364074672782445e-06, "loss": 0.7025, "step": 31470 }, { "epoch": 0.6762039781758817, "grad_norm": 0.5443914115894181, "learning_rate": 4.8305711994635585e-06, "loss": 0.7161, "step": 31480 }, { "epoch": 0.6764187824891524, "grad_norm": 0.5398665273534337, "learning_rate": 4.8247373336132565e-06, "loss": 0.7152, "step": 31490 }, { "epoch": 0.676633586802423, "grad_norm": 0.5205394432033161, "learning_rate": 4.818905872438021e-06, "loss": 0.7045, "step": 31500 }, { "epoch": 0.6768483911156936, "grad_norm": 0.505861773994322, "learning_rate": 4.813076818647434e-06, "loss": 0.7176, "step": 31510 }, { "epoch": 0.6770631954289642, "grad_norm": 0.5138045231439949, "learning_rate": 4.807250174949955e-06, "loss": 0.7092, "step": 31520 }, { "epoch": 0.6772779997422348, "grad_norm": 0.5198137824432044, "learning_rate": 4.801425944052911e-06, "loss": 0.7115, "step": 31530 }, { "epoch": 0.6774928040555054, "grad_norm": 0.5121939716704446, "learning_rate": 4.795604128662536e-06, "loss": 0.7001, "step": 31540 }, { "epoch": 0.6777076083687761, "grad_norm": 0.5036727571961231, "learning_rate": 4.78978473148391e-06, "loss": 0.7005, "step": 31550 }, { "epoch": 0.6779224126820467, "grad_norm": 0.4963339926656166, "learning_rate": 4.783967755221008e-06, "loss": 0.6981, "step": 31560 }, { "epoch": 0.6781372169953173, "grad_norm": 0.5026469805657872, "learning_rate": 4.778153202576678e-06, "loss": 0.7061, "step": 31570 }, { "epoch": 0.6783520213085878, "grad_norm": 0.4888239012176708, "learning_rate": 4.772341076252639e-06, "loss": 0.6928, "step": 31580 }, { "epoch": 0.6785668256218584, "grad_norm": 0.5033386278198433, "learning_rate": 4.766531378949487e-06, "loss": 0.7066, "step": 31590 }, { "epoch": 0.6787816299351291, "grad_norm": 0.5076861079010834, "learning_rate": 4.7607241133666735e-06, "loss": 0.7045, "step": 31600 }, { "epoch": 0.6789964342483997, "grad_norm": 0.5368479007734174, "learning_rate": 4.75491928220255e-06, "loss": 0.7136, "step": 31610 }, { "epoch": 0.6792112385616703, "grad_norm": 0.5082100349533373, "learning_rate": 4.749116888154306e-06, "loss": 0.7046, "step": 31620 }, { "epoch": 0.6794260428749409, "grad_norm": 0.5133784115796547, "learning_rate": 4.743316933918016e-06, "loss": 0.7035, "step": 31630 }, { "epoch": 0.6796408471882115, "grad_norm": 0.5274702887841751, "learning_rate": 4.737519422188617e-06, "loss": 0.707, "step": 31640 }, { "epoch": 0.6798556515014822, "grad_norm": 0.5053552682276936, "learning_rate": 4.73172435565991e-06, "loss": 0.7126, "step": 31650 }, { "epoch": 0.6800704558147528, "grad_norm": 0.529992238236458, "learning_rate": 4.725931737024565e-06, "loss": 0.7124, "step": 31660 }, { "epoch": 0.6802852601280234, "grad_norm": 0.5316204503933539, "learning_rate": 4.720141568974104e-06, "loss": 0.7188, "step": 31670 }, { "epoch": 0.680500064441294, "grad_norm": 0.5054012212316678, "learning_rate": 4.714353854198918e-06, "loss": 0.7118, "step": 31680 }, { "epoch": 0.6807148687545646, "grad_norm": 0.5143920728251262, "learning_rate": 4.708568595388258e-06, "loss": 0.7087, "step": 31690 }, { "epoch": 0.6809296730678353, "grad_norm": 0.5322332430035215, "learning_rate": 4.7027857952302315e-06, "loss": 0.7123, "step": 31700 }, { "epoch": 0.6811444773811058, "grad_norm": 0.520262064220361, "learning_rate": 4.697005456411811e-06, "loss": 0.7156, "step": 31710 }, { "epoch": 0.6813592816943764, "grad_norm": 0.5095960914177793, "learning_rate": 4.691227581618808e-06, "loss": 0.7102, "step": 31720 }, { "epoch": 0.681574086007647, "grad_norm": 0.509847182822789, "learning_rate": 4.685452173535906e-06, "loss": 0.7073, "step": 31730 }, { "epoch": 0.6817888903209176, "grad_norm": 0.4928338256132167, "learning_rate": 4.679679234846636e-06, "loss": 0.7094, "step": 31740 }, { "epoch": 0.6820036946341883, "grad_norm": 0.5281801273797183, "learning_rate": 4.67390876823338e-06, "loss": 0.7103, "step": 31750 }, { "epoch": 0.6822184989474589, "grad_norm": 0.5147073369745232, "learning_rate": 4.668140776377378e-06, "loss": 0.7138, "step": 31760 }, { "epoch": 0.6824333032607295, "grad_norm": 0.518041164686382, "learning_rate": 4.66237526195871e-06, "loss": 0.7056, "step": 31770 }, { "epoch": 0.6826481075740001, "grad_norm": 0.5032736884933615, "learning_rate": 4.656612227656318e-06, "loss": 0.7005, "step": 31780 }, { "epoch": 0.6828629118872707, "grad_norm": 0.5164912240008823, "learning_rate": 4.650851676147976e-06, "loss": 0.7112, "step": 31790 }, { "epoch": 0.6830777162005413, "grad_norm": 0.5190164964456715, "learning_rate": 4.645093610110314e-06, "loss": 0.7031, "step": 31800 }, { "epoch": 0.683292520513812, "grad_norm": 0.499193870377874, "learning_rate": 4.639338032218806e-06, "loss": 0.6953, "step": 31810 }, { "epoch": 0.6835073248270825, "grad_norm": 0.5107643101481627, "learning_rate": 4.633584945147771e-06, "loss": 0.7058, "step": 31820 }, { "epoch": 0.6837221291403531, "grad_norm": 0.4788958047068638, "learning_rate": 4.627834351570368e-06, "loss": 0.6999, "step": 31830 }, { "epoch": 0.6839369334536237, "grad_norm": 0.5243752262151895, "learning_rate": 4.622086254158602e-06, "loss": 0.7093, "step": 31840 }, { "epoch": 0.6841517377668943, "grad_norm": 0.5117247308515849, "learning_rate": 4.616340655583307e-06, "loss": 0.7127, "step": 31850 }, { "epoch": 0.684366542080165, "grad_norm": 0.5039057854230863, "learning_rate": 4.610597558514167e-06, "loss": 0.7155, "step": 31860 }, { "epoch": 0.6845813463934356, "grad_norm": 0.5110748098732393, "learning_rate": 4.6048569656197005e-06, "loss": 0.7182, "step": 31870 }, { "epoch": 0.6847961507067062, "grad_norm": 0.5033241610487543, "learning_rate": 4.599118879567262e-06, "loss": 0.7178, "step": 31880 }, { "epoch": 0.6850109550199768, "grad_norm": 0.5330181219085199, "learning_rate": 4.593383303023047e-06, "loss": 0.6988, "step": 31890 }, { "epoch": 0.6852257593332474, "grad_norm": 0.5146643640112155, "learning_rate": 4.587650238652068e-06, "loss": 0.7071, "step": 31900 }, { "epoch": 0.6854405636465181, "grad_norm": 0.5558498206129705, "learning_rate": 4.581919689118187e-06, "loss": 0.7089, "step": 31910 }, { "epoch": 0.6856553679597887, "grad_norm": 0.5051408069436808, "learning_rate": 4.576191657084093e-06, "loss": 0.7171, "step": 31920 }, { "epoch": 0.6858701722730592, "grad_norm": 0.4947858066673929, "learning_rate": 4.570466145211303e-06, "loss": 0.705, "step": 31930 }, { "epoch": 0.6860849765863298, "grad_norm": 0.49215658445409516, "learning_rate": 4.564743156160167e-06, "loss": 0.7004, "step": 31940 }, { "epoch": 0.6862997808996004, "grad_norm": 0.5052799951057044, "learning_rate": 4.559022692589851e-06, "loss": 0.7048, "step": 31950 }, { "epoch": 0.6865145852128711, "grad_norm": 0.49573841106926664, "learning_rate": 4.553304757158372e-06, "loss": 0.7074, "step": 31960 }, { "epoch": 0.6867293895261417, "grad_norm": 0.5143793031370387, "learning_rate": 4.547589352522542e-06, "loss": 0.7039, "step": 31970 }, { "epoch": 0.6869441938394123, "grad_norm": 0.504158519855662, "learning_rate": 4.541876481338019e-06, "loss": 0.7006, "step": 31980 }, { "epoch": 0.6871589981526829, "grad_norm": 0.503671838515148, "learning_rate": 4.536166146259276e-06, "loss": 0.7147, "step": 31990 }, { "epoch": 0.6873738024659535, "grad_norm": 0.5287913454757696, "learning_rate": 4.530458349939606e-06, "loss": 0.7243, "step": 32000 }, { "epoch": 0.6875886067792242, "grad_norm": 0.5016494023049036, "learning_rate": 4.524753095031133e-06, "loss": 0.7028, "step": 32010 }, { "epoch": 0.6878034110924948, "grad_norm": 0.5166519761915471, "learning_rate": 4.519050384184775e-06, "loss": 0.7129, "step": 32020 }, { "epoch": 0.6880182154057654, "grad_norm": 0.5158977598310385, "learning_rate": 4.513350220050305e-06, "loss": 0.7071, "step": 32030 }, { "epoch": 0.6882330197190359, "grad_norm": 0.5016269659965205, "learning_rate": 4.507652605276278e-06, "loss": 0.7047, "step": 32040 }, { "epoch": 0.6884478240323065, "grad_norm": 0.5163690901069261, "learning_rate": 4.501957542510085e-06, "loss": 0.7117, "step": 32050 }, { "epoch": 0.6886626283455772, "grad_norm": 0.5108399393202574, "learning_rate": 4.4962650343979255e-06, "loss": 0.7183, "step": 32060 }, { "epoch": 0.6888774326588478, "grad_norm": 0.5084486965060432, "learning_rate": 4.490575083584804e-06, "loss": 0.7138, "step": 32070 }, { "epoch": 0.6890922369721184, "grad_norm": 0.5218646619009035, "learning_rate": 4.484887692714557e-06, "loss": 0.7106, "step": 32080 }, { "epoch": 0.689307041285389, "grad_norm": 0.5201258534596843, "learning_rate": 4.479202864429808e-06, "loss": 0.6951, "step": 32090 }, { "epoch": 0.6895218455986596, "grad_norm": 0.516396272076133, "learning_rate": 4.473520601372006e-06, "loss": 0.7136, "step": 32100 }, { "epoch": 0.6897366499119302, "grad_norm": 0.5032875799939698, "learning_rate": 4.467840906181403e-06, "loss": 0.6944, "step": 32110 }, { "epoch": 0.6899514542252009, "grad_norm": 0.49750240137602164, "learning_rate": 4.462163781497051e-06, "loss": 0.7029, "step": 32120 }, { "epoch": 0.6901662585384715, "grad_norm": 0.5033581986528483, "learning_rate": 4.456489229956826e-06, "loss": 0.7043, "step": 32130 }, { "epoch": 0.6903810628517421, "grad_norm": 0.5166768407944105, "learning_rate": 4.450817254197386e-06, "loss": 0.6929, "step": 32140 }, { "epoch": 0.6905958671650126, "grad_norm": 0.4918195538890641, "learning_rate": 4.4451478568542064e-06, "loss": 0.7109, "step": 32150 }, { "epoch": 0.6908106714782832, "grad_norm": 0.5145980685669729, "learning_rate": 4.439481040561565e-06, "loss": 0.7008, "step": 32160 }, { "epoch": 0.6910254757915539, "grad_norm": 0.5170120361869788, "learning_rate": 4.433816807952525e-06, "loss": 0.7102, "step": 32170 }, { "epoch": 0.6912402801048245, "grad_norm": 0.5390912485506946, "learning_rate": 4.428155161658976e-06, "loss": 0.6978, "step": 32180 }, { "epoch": 0.6914550844180951, "grad_norm": 0.513671096300247, "learning_rate": 4.422496104311574e-06, "loss": 0.704, "step": 32190 }, { "epoch": 0.6916698887313657, "grad_norm": 0.5512396747122822, "learning_rate": 4.416839638539804e-06, "loss": 0.7071, "step": 32200 }, { "epoch": 0.6918846930446363, "grad_norm": 0.5039999780605813, "learning_rate": 4.411185766971919e-06, "loss": 0.7098, "step": 32210 }, { "epoch": 0.692099497357907, "grad_norm": 0.5056673300275484, "learning_rate": 4.4055344922349845e-06, "loss": 0.7002, "step": 32220 }, { "epoch": 0.6923143016711776, "grad_norm": 0.5228713724860102, "learning_rate": 4.399885816954855e-06, "loss": 0.7069, "step": 32230 }, { "epoch": 0.6925291059844482, "grad_norm": 0.5154170548308558, "learning_rate": 4.394239743756166e-06, "loss": 0.7173, "step": 32240 }, { "epoch": 0.6927439102977188, "grad_norm": 0.4932044034667852, "learning_rate": 4.3885962752623675e-06, "loss": 0.7, "step": 32250 }, { "epoch": 0.6929587146109893, "grad_norm": 0.5291577432915663, "learning_rate": 4.382955414095675e-06, "loss": 0.707, "step": 32260 }, { "epoch": 0.69317351892426, "grad_norm": 0.5062081138297674, "learning_rate": 4.3773171628771075e-06, "loss": 0.7053, "step": 32270 }, { "epoch": 0.6933883232375306, "grad_norm": 0.5235760210430898, "learning_rate": 4.371681524226469e-06, "loss": 0.7203, "step": 32280 }, { "epoch": 0.6936031275508012, "grad_norm": 0.5061927155930297, "learning_rate": 4.366048500762335e-06, "loss": 0.7112, "step": 32290 }, { "epoch": 0.6938179318640718, "grad_norm": 0.5131390727212936, "learning_rate": 4.360418095102097e-06, "loss": 0.7095, "step": 32300 }, { "epoch": 0.6940327361773424, "grad_norm": 0.5280050051289648, "learning_rate": 4.354790309861897e-06, "loss": 0.7088, "step": 32310 }, { "epoch": 0.6942475404906131, "grad_norm": 0.5170240200200222, "learning_rate": 4.349165147656679e-06, "loss": 0.6946, "step": 32320 }, { "epoch": 0.6944623448038837, "grad_norm": 0.49194844383818315, "learning_rate": 4.343542611100161e-06, "loss": 0.7109, "step": 32330 }, { "epoch": 0.6946771491171543, "grad_norm": 0.5021844766055354, "learning_rate": 4.337922702804846e-06, "loss": 0.6949, "step": 32340 }, { "epoch": 0.6948919534304249, "grad_norm": 0.5099714451400136, "learning_rate": 4.332305425382013e-06, "loss": 0.707, "step": 32350 }, { "epoch": 0.6951067577436955, "grad_norm": 0.5212733236473239, "learning_rate": 4.326690781441711e-06, "loss": 0.6997, "step": 32360 }, { "epoch": 0.6953215620569662, "grad_norm": 0.4903110024798184, "learning_rate": 4.3210787735927824e-06, "loss": 0.7021, "step": 32370 }, { "epoch": 0.6955363663702367, "grad_norm": 0.505886652482517, "learning_rate": 4.315469404442829e-06, "loss": 0.7, "step": 32380 }, { "epoch": 0.6957511706835073, "grad_norm": 0.5348702621789376, "learning_rate": 4.309862676598233e-06, "loss": 0.6989, "step": 32390 }, { "epoch": 0.6959659749967779, "grad_norm": 0.5053002509205807, "learning_rate": 4.304258592664151e-06, "loss": 0.705, "step": 32400 }, { "epoch": 0.6961807793100485, "grad_norm": 0.49313377903105116, "learning_rate": 4.298657155244508e-06, "loss": 0.7126, "step": 32410 }, { "epoch": 0.6963955836233191, "grad_norm": 0.49836073223251326, "learning_rate": 4.293058366942004e-06, "loss": 0.6942, "step": 32420 }, { "epoch": 0.6966103879365898, "grad_norm": 0.5033826955151206, "learning_rate": 4.287462230358097e-06, "loss": 0.69, "step": 32430 }, { "epoch": 0.6968251922498604, "grad_norm": 0.5124591714204908, "learning_rate": 4.281868748093023e-06, "loss": 0.7088, "step": 32440 }, { "epoch": 0.697039996563131, "grad_norm": 0.5152063252453369, "learning_rate": 4.276277922745784e-06, "loss": 0.7235, "step": 32450 }, { "epoch": 0.6972548008764016, "grad_norm": 0.5068795899169701, "learning_rate": 4.2706897569141435e-06, "loss": 0.6995, "step": 32460 }, { "epoch": 0.6974696051896722, "grad_norm": 0.5194180550407479, "learning_rate": 4.2651042531946364e-06, "loss": 0.7132, "step": 32470 }, { "epoch": 0.6976844095029429, "grad_norm": 0.5093344990238854, "learning_rate": 4.259521414182547e-06, "loss": 0.6991, "step": 32480 }, { "epoch": 0.6978992138162134, "grad_norm": 0.5226727716813793, "learning_rate": 4.253941242471934e-06, "loss": 0.7052, "step": 32490 }, { "epoch": 0.698114018129484, "grad_norm": 0.5101918234709942, "learning_rate": 4.248363740655612e-06, "loss": 0.709, "step": 32500 }, { "epoch": 0.6983288224427546, "grad_norm": 0.5101101585718761, "learning_rate": 4.242788911325156e-06, "loss": 0.7108, "step": 32510 }, { "epoch": 0.6985436267560252, "grad_norm": 0.4867970175259229, "learning_rate": 4.2372167570709e-06, "loss": 0.7042, "step": 32520 }, { "epoch": 0.6987584310692959, "grad_norm": 0.5014423746270559, "learning_rate": 4.231647280481936e-06, "loss": 0.694, "step": 32530 }, { "epoch": 0.6989732353825665, "grad_norm": 0.5146828954090973, "learning_rate": 4.226080484146103e-06, "loss": 0.7207, "step": 32540 }, { "epoch": 0.6991880396958371, "grad_norm": 0.4901928041129853, "learning_rate": 4.220516370650007e-06, "loss": 0.6995, "step": 32550 }, { "epoch": 0.6994028440091077, "grad_norm": 0.5066794982082476, "learning_rate": 4.214954942578997e-06, "loss": 0.7031, "step": 32560 }, { "epoch": 0.6996176483223783, "grad_norm": 0.5117861279536446, "learning_rate": 4.209396202517183e-06, "loss": 0.6918, "step": 32570 }, { "epoch": 0.699832452635649, "grad_norm": 0.5064003948776196, "learning_rate": 4.203840153047422e-06, "loss": 0.706, "step": 32580 }, { "epoch": 0.7000472569489196, "grad_norm": 0.5292023184958814, "learning_rate": 4.198286796751318e-06, "loss": 0.7005, "step": 32590 }, { "epoch": 0.7002620612621901, "grad_norm": 0.5050282381222843, "learning_rate": 4.1927361362092336e-06, "loss": 0.699, "step": 32600 }, { "epoch": 0.7004768655754607, "grad_norm": 0.5072730993957972, "learning_rate": 4.187188174000262e-06, "loss": 0.6993, "step": 32610 }, { "epoch": 0.7006916698887313, "grad_norm": 0.5081741609904642, "learning_rate": 4.181642912702256e-06, "loss": 0.7046, "step": 32620 }, { "epoch": 0.700906474202002, "grad_norm": 0.5132090809761299, "learning_rate": 4.176100354891812e-06, "loss": 0.6907, "step": 32630 }, { "epoch": 0.7011212785152726, "grad_norm": 0.5114069928059646, "learning_rate": 4.170560503144266e-06, "loss": 0.7162, "step": 32640 }, { "epoch": 0.7013360828285432, "grad_norm": 0.4963333550516421, "learning_rate": 4.165023360033703e-06, "loss": 0.706, "step": 32650 }, { "epoch": 0.7015508871418138, "grad_norm": 0.5148865519289305, "learning_rate": 4.159488928132938e-06, "loss": 0.7147, "step": 32660 }, { "epoch": 0.7017656914550844, "grad_norm": 0.5264557896427997, "learning_rate": 4.153957210013535e-06, "loss": 0.7022, "step": 32670 }, { "epoch": 0.7019804957683551, "grad_norm": 0.5085093647122813, "learning_rate": 4.148428208245799e-06, "loss": 0.714, "step": 32680 }, { "epoch": 0.7021953000816257, "grad_norm": 0.5323744427421241, "learning_rate": 4.142901925398766e-06, "loss": 0.6977, "step": 32690 }, { "epoch": 0.7024101043948963, "grad_norm": 0.5166468220520238, "learning_rate": 4.137378364040216e-06, "loss": 0.6951, "step": 32700 }, { "epoch": 0.7026249087081669, "grad_norm": 0.5004414110795155, "learning_rate": 4.1318575267366515e-06, "loss": 0.718, "step": 32710 }, { "epoch": 0.7028397130214374, "grad_norm": 0.5144502686296928, "learning_rate": 4.12633941605333e-06, "loss": 0.6955, "step": 32720 }, { "epoch": 0.703054517334708, "grad_norm": 0.503363662007366, "learning_rate": 4.120824034554221e-06, "loss": 0.709, "step": 32730 }, { "epoch": 0.7032693216479787, "grad_norm": 0.532794569128917, "learning_rate": 4.115311384802038e-06, "loss": 0.7011, "step": 32740 }, { "epoch": 0.7034841259612493, "grad_norm": 0.5310969014155835, "learning_rate": 4.1098014693582265e-06, "loss": 0.7008, "step": 32750 }, { "epoch": 0.7036989302745199, "grad_norm": 0.4961846506249242, "learning_rate": 4.104294290782946e-06, "loss": 0.6958, "step": 32760 }, { "epoch": 0.7039137345877905, "grad_norm": 0.5131064931445611, "learning_rate": 4.09878985163511e-06, "loss": 0.7031, "step": 32770 }, { "epoch": 0.7041285389010611, "grad_norm": 0.501337109243204, "learning_rate": 4.09328815447233e-06, "loss": 0.7002, "step": 32780 }, { "epoch": 0.7043433432143318, "grad_norm": 0.5010141549810861, "learning_rate": 4.0877892018509735e-06, "loss": 0.6982, "step": 32790 }, { "epoch": 0.7045581475276024, "grad_norm": 0.5190222646312463, "learning_rate": 4.082292996326107e-06, "loss": 0.6942, "step": 32800 }, { "epoch": 0.704772951840873, "grad_norm": 0.5021762528824651, "learning_rate": 4.076799540451532e-06, "loss": 0.7036, "step": 32810 }, { "epoch": 0.7049877561541436, "grad_norm": 0.5216162795537221, "learning_rate": 4.071308836779778e-06, "loss": 0.705, "step": 32820 }, { "epoch": 0.7052025604674141, "grad_norm": 0.5081741693301844, "learning_rate": 4.065820887862077e-06, "loss": 0.7118, "step": 32830 }, { "epoch": 0.7054173647806848, "grad_norm": 0.4991610018130501, "learning_rate": 4.0603356962484075e-06, "loss": 0.7004, "step": 32840 }, { "epoch": 0.7056321690939554, "grad_norm": 0.5315742680425537, "learning_rate": 4.054853264487442e-06, "loss": 0.6897, "step": 32850 }, { "epoch": 0.705846973407226, "grad_norm": 0.5012628613471524, "learning_rate": 4.049373595126584e-06, "loss": 0.6952, "step": 32860 }, { "epoch": 0.7060617777204966, "grad_norm": 0.5112201352671399, "learning_rate": 4.043896690711954e-06, "loss": 0.7163, "step": 32870 }, { "epoch": 0.7062765820337672, "grad_norm": 0.4955072270942802, "learning_rate": 4.0384225537883735e-06, "loss": 0.7037, "step": 32880 }, { "epoch": 0.7064913863470379, "grad_norm": 0.5272007156898026, "learning_rate": 4.032951186899404e-06, "loss": 0.7045, "step": 32890 }, { "epoch": 0.7067061906603085, "grad_norm": 0.5131148249079197, "learning_rate": 4.027482592587294e-06, "loss": 0.7132, "step": 32900 }, { "epoch": 0.7069209949735791, "grad_norm": 0.50205423046405, "learning_rate": 4.022016773393017e-06, "loss": 0.7166, "step": 32910 }, { "epoch": 0.7071357992868497, "grad_norm": 0.5022328702294467, "learning_rate": 4.0165537318562595e-06, "loss": 0.7065, "step": 32920 }, { "epoch": 0.7073506036001203, "grad_norm": 0.5175065388500295, "learning_rate": 4.011093470515402e-06, "loss": 0.6979, "step": 32930 }, { "epoch": 0.707565407913391, "grad_norm": 0.5212370076896278, "learning_rate": 4.005635991907556e-06, "loss": 0.7022, "step": 32940 }, { "epoch": 0.7077802122266615, "grad_norm": 0.4941680464626519, "learning_rate": 4.000181298568514e-06, "loss": 0.6891, "step": 32950 }, { "epoch": 0.7079950165399321, "grad_norm": 0.5160432814123912, "learning_rate": 3.994729393032803e-06, "loss": 0.6975, "step": 32960 }, { "epoch": 0.7082098208532027, "grad_norm": 0.5090291581171473, "learning_rate": 3.989280277833629e-06, "loss": 0.7021, "step": 32970 }, { "epoch": 0.7084246251664733, "grad_norm": 0.5048922334558574, "learning_rate": 3.983833955502915e-06, "loss": 0.6897, "step": 32980 }, { "epoch": 0.7086394294797439, "grad_norm": 0.5065038482347751, "learning_rate": 3.978390428571286e-06, "loss": 0.6934, "step": 32990 }, { "epoch": 0.7088542337930146, "grad_norm": 0.535448626077636, "learning_rate": 3.972949699568057e-06, "loss": 0.6893, "step": 33000 }, { "epoch": 0.7090690381062852, "grad_norm": 0.5169501432061164, "learning_rate": 3.967511771021264e-06, "loss": 0.7029, "step": 33010 }, { "epoch": 0.7092838424195558, "grad_norm": 0.511359264294529, "learning_rate": 3.96207664545762e-06, "loss": 0.7034, "step": 33020 }, { "epoch": 0.7094986467328264, "grad_norm": 0.4960055217526054, "learning_rate": 3.956644325402547e-06, "loss": 0.7024, "step": 33030 }, { "epoch": 0.709713451046097, "grad_norm": 0.5274217499816306, "learning_rate": 3.951214813380164e-06, "loss": 0.7051, "step": 33040 }, { "epoch": 0.7099282553593677, "grad_norm": 0.4966382776596969, "learning_rate": 3.9457881119132745e-06, "loss": 0.6994, "step": 33050 }, { "epoch": 0.7101430596726382, "grad_norm": 0.5360848716987743, "learning_rate": 3.940364223523398e-06, "loss": 0.7146, "step": 33060 }, { "epoch": 0.7103578639859088, "grad_norm": 0.5765957098247436, "learning_rate": 3.934943150730719e-06, "loss": 0.6882, "step": 33070 }, { "epoch": 0.7105726682991794, "grad_norm": 0.5219156969848493, "learning_rate": 3.9295248960541355e-06, "loss": 0.7012, "step": 33080 }, { "epoch": 0.71078747261245, "grad_norm": 0.5177328505359572, "learning_rate": 3.924109462011225e-06, "loss": 0.6934, "step": 33090 }, { "epoch": 0.7110022769257207, "grad_norm": 0.5083853674745945, "learning_rate": 3.91869685111826e-06, "loss": 0.699, "step": 33100 }, { "epoch": 0.7112170812389913, "grad_norm": 0.5161556089894507, "learning_rate": 3.913287065890201e-06, "loss": 0.6894, "step": 33110 }, { "epoch": 0.7114318855522619, "grad_norm": 0.5176090611045527, "learning_rate": 3.907880108840688e-06, "loss": 0.7132, "step": 33120 }, { "epoch": 0.7116466898655325, "grad_norm": 0.5048282675725968, "learning_rate": 3.902475982482055e-06, "loss": 0.7103, "step": 33130 }, { "epoch": 0.7118614941788031, "grad_norm": 0.5184451927132917, "learning_rate": 3.897074689325316e-06, "loss": 0.6916, "step": 33140 }, { "epoch": 0.7120762984920738, "grad_norm": 0.5010826412828934, "learning_rate": 3.891676231880175e-06, "loss": 0.6897, "step": 33150 }, { "epoch": 0.7122911028053444, "grad_norm": 0.524146057682627, "learning_rate": 3.8862806126550105e-06, "loss": 0.7169, "step": 33160 }, { "epoch": 0.712505907118615, "grad_norm": 0.5038515983066828, "learning_rate": 3.8808878341568875e-06, "loss": 0.7081, "step": 33170 }, { "epoch": 0.7127207114318855, "grad_norm": 0.5255219224371609, "learning_rate": 3.875497898891552e-06, "loss": 0.7022, "step": 33180 }, { "epoch": 0.7129355157451561, "grad_norm": 0.4985578239131686, "learning_rate": 3.87011080936342e-06, "loss": 0.7012, "step": 33190 }, { "epoch": 0.7131503200584268, "grad_norm": 0.4924989128893002, "learning_rate": 3.864726568075595e-06, "loss": 0.6947, "step": 33200 }, { "epoch": 0.7133651243716974, "grad_norm": 0.5309886497942049, "learning_rate": 3.859345177529853e-06, "loss": 0.7049, "step": 33210 }, { "epoch": 0.713579928684968, "grad_norm": 0.5206730299572171, "learning_rate": 3.8539666402266465e-06, "loss": 0.6977, "step": 33220 }, { "epoch": 0.7137947329982386, "grad_norm": 0.5158488974635094, "learning_rate": 3.848590958665104e-06, "loss": 0.7065, "step": 33230 }, { "epoch": 0.7140095373115092, "grad_norm": 0.48965491649636267, "learning_rate": 3.843218135343019e-06, "loss": 0.7011, "step": 33240 }, { "epoch": 0.7142243416247799, "grad_norm": 0.564742066719133, "learning_rate": 3.837848172756865e-06, "loss": 0.7058, "step": 33250 }, { "epoch": 0.7144391459380505, "grad_norm": 0.5137654269820426, "learning_rate": 3.832481073401786e-06, "loss": 0.7052, "step": 33260 }, { "epoch": 0.7146539502513211, "grad_norm": 0.5180489971377459, "learning_rate": 3.827116839771593e-06, "loss": 0.7148, "step": 33270 }, { "epoch": 0.7148687545645916, "grad_norm": 0.5058146041046021, "learning_rate": 3.821755474358764e-06, "loss": 0.6951, "step": 33280 }, { "epoch": 0.7150835588778622, "grad_norm": 0.5048478043474748, "learning_rate": 3.816396979654452e-06, "loss": 0.708, "step": 33290 }, { "epoch": 0.7152983631911328, "grad_norm": 0.5200290541035588, "learning_rate": 3.8110413581484628e-06, "loss": 0.7076, "step": 33300 }, { "epoch": 0.7155131675044035, "grad_norm": 0.5041024765974936, "learning_rate": 3.805688612329279e-06, "loss": 0.7087, "step": 33310 }, { "epoch": 0.7157279718176741, "grad_norm": 0.517113427361278, "learning_rate": 3.800338744684041e-06, "loss": 0.7035, "step": 33320 }, { "epoch": 0.7159427761309447, "grad_norm": 0.5180046345306407, "learning_rate": 3.794991757698555e-06, "loss": 0.7112, "step": 33330 }, { "epoch": 0.7161575804442153, "grad_norm": 0.5088817995846314, "learning_rate": 3.7896476538572914e-06, "loss": 0.7107, "step": 33340 }, { "epoch": 0.7163723847574859, "grad_norm": 0.49205509109092965, "learning_rate": 3.7843064356433656e-06, "loss": 0.6988, "step": 33350 }, { "epoch": 0.7165871890707566, "grad_norm": 0.5006056060538654, "learning_rate": 3.7789681055385787e-06, "loss": 0.7052, "step": 33360 }, { "epoch": 0.7168019933840272, "grad_norm": 0.521153940432141, "learning_rate": 3.7736326660233623e-06, "loss": 0.7135, "step": 33370 }, { "epoch": 0.7170167976972978, "grad_norm": 0.5069579531044478, "learning_rate": 3.768300119576822e-06, "loss": 0.7183, "step": 33380 }, { "epoch": 0.7172316020105683, "grad_norm": 0.5215580306799487, "learning_rate": 3.7629704686767144e-06, "loss": 0.6942, "step": 33390 }, { "epoch": 0.7174464063238389, "grad_norm": 0.5122954931151245, "learning_rate": 3.7576437157994506e-06, "loss": 0.6879, "step": 33400 }, { "epoch": 0.7176612106371096, "grad_norm": 0.5464224896140304, "learning_rate": 3.7523198634200985e-06, "loss": 0.7155, "step": 33410 }, { "epoch": 0.7178760149503802, "grad_norm": 0.4963871129860938, "learning_rate": 3.7469989140123696e-06, "loss": 0.6932, "step": 33420 }, { "epoch": 0.7180908192636508, "grad_norm": 0.5220526668691016, "learning_rate": 3.7416808700486328e-06, "loss": 0.7096, "step": 33430 }, { "epoch": 0.7183056235769214, "grad_norm": 0.5142411024190581, "learning_rate": 3.7363657339999093e-06, "loss": 0.6957, "step": 33440 }, { "epoch": 0.718520427890192, "grad_norm": 0.5078089733725818, "learning_rate": 3.7310535083358635e-06, "loss": 0.7011, "step": 33450 }, { "epoch": 0.7187352322034627, "grad_norm": 0.49558634222610376, "learning_rate": 3.7257441955248153e-06, "loss": 0.6895, "step": 33460 }, { "epoch": 0.7189500365167333, "grad_norm": 0.48750240509688914, "learning_rate": 3.7204377980337137e-06, "loss": 0.718, "step": 33470 }, { "epoch": 0.7191648408300039, "grad_norm": 0.5033127653562809, "learning_rate": 3.7151343183281808e-06, "loss": 0.7034, "step": 33480 }, { "epoch": 0.7193796451432745, "grad_norm": 0.4924112060947027, "learning_rate": 3.7098337588724565e-06, "loss": 0.6955, "step": 33490 }, { "epoch": 0.719594449456545, "grad_norm": 0.5143671743596542, "learning_rate": 3.7045361221294375e-06, "loss": 0.708, "step": 33500 }, { "epoch": 0.7198092537698157, "grad_norm": 0.5082249361246125, "learning_rate": 3.6992414105606644e-06, "loss": 0.6782, "step": 33510 }, { "epoch": 0.7200240580830863, "grad_norm": 0.5043808950755216, "learning_rate": 3.693949626626302e-06, "loss": 0.6979, "step": 33520 }, { "epoch": 0.7202388623963569, "grad_norm": 0.5219484042509999, "learning_rate": 3.688660772785183e-06, "loss": 0.704, "step": 33530 }, { "epoch": 0.7204536667096275, "grad_norm": 0.5449994389723714, "learning_rate": 3.6833748514947465e-06, "loss": 0.709, "step": 33540 }, { "epoch": 0.7206684710228981, "grad_norm": 0.5187402619586201, "learning_rate": 3.6780918652110984e-06, "loss": 0.7063, "step": 33550 }, { "epoch": 0.7208832753361688, "grad_norm": 0.5025397434112188, "learning_rate": 3.672811816388959e-06, "loss": 0.7125, "step": 33560 }, { "epoch": 0.7210980796494394, "grad_norm": 0.4922234036325025, "learning_rate": 3.6675347074816948e-06, "loss": 0.699, "step": 33570 }, { "epoch": 0.72131288396271, "grad_norm": 0.5049263733928765, "learning_rate": 3.662260540941306e-06, "loss": 0.7042, "step": 33580 }, { "epoch": 0.7215276882759806, "grad_norm": 0.5209764065547637, "learning_rate": 3.6569893192184123e-06, "loss": 0.7002, "step": 33590 }, { "epoch": 0.7217424925892512, "grad_norm": 0.5046889630269408, "learning_rate": 3.6517210447622918e-06, "loss": 0.7017, "step": 33600 }, { "epoch": 0.7219572969025218, "grad_norm": 0.5059886025352888, "learning_rate": 3.646455720020826e-06, "loss": 0.6994, "step": 33610 }, { "epoch": 0.7221721012157925, "grad_norm": 0.5098305495450107, "learning_rate": 3.6411933474405402e-06, "loss": 0.7073, "step": 33620 }, { "epoch": 0.722386905529063, "grad_norm": 0.5002036914879331, "learning_rate": 3.6359339294665897e-06, "loss": 0.6901, "step": 33630 }, { "epoch": 0.7226017098423336, "grad_norm": 0.523077232704163, "learning_rate": 3.630677468542739e-06, "loss": 0.7114, "step": 33640 }, { "epoch": 0.7228165141556042, "grad_norm": 0.5040742343196016, "learning_rate": 3.625423967111409e-06, "loss": 0.6915, "step": 33650 }, { "epoch": 0.7230313184688748, "grad_norm": 0.504689168487558, "learning_rate": 3.6201734276136156e-06, "loss": 0.7027, "step": 33660 }, { "epoch": 0.7232461227821455, "grad_norm": 0.5360928374223011, "learning_rate": 3.614925852489015e-06, "loss": 0.7044, "step": 33670 }, { "epoch": 0.7234609270954161, "grad_norm": 0.508286028640259, "learning_rate": 3.6096812441758865e-06, "loss": 0.7102, "step": 33680 }, { "epoch": 0.7236757314086867, "grad_norm": 0.4950815691274723, "learning_rate": 3.604439605111114e-06, "loss": 0.7042, "step": 33690 }, { "epoch": 0.7238905357219573, "grad_norm": 0.4990133005177096, "learning_rate": 3.5992009377302306e-06, "loss": 0.7092, "step": 33700 }, { "epoch": 0.7241053400352279, "grad_norm": 0.5080144595958782, "learning_rate": 3.5939652444673614e-06, "loss": 0.7057, "step": 33710 }, { "epoch": 0.7243201443484986, "grad_norm": 0.49504096828193644, "learning_rate": 3.588732527755262e-06, "loss": 0.6866, "step": 33720 }, { "epoch": 0.7245349486617692, "grad_norm": 0.5010995541979635, "learning_rate": 3.583502790025304e-06, "loss": 0.7017, "step": 33730 }, { "epoch": 0.7247497529750397, "grad_norm": 0.5259173065794601, "learning_rate": 3.578276033707476e-06, "loss": 0.7094, "step": 33740 }, { "epoch": 0.7249645572883103, "grad_norm": 0.5119905092520929, "learning_rate": 3.5730522612303808e-06, "loss": 0.701, "step": 33750 }, { "epoch": 0.7251793616015809, "grad_norm": 0.5647220761659497, "learning_rate": 3.5678314750212253e-06, "loss": 0.6946, "step": 33760 }, { "epoch": 0.7253941659148516, "grad_norm": 0.5084711336782207, "learning_rate": 3.5626136775058484e-06, "loss": 0.7022, "step": 33770 }, { "epoch": 0.7256089702281222, "grad_norm": 0.5259298296821393, "learning_rate": 3.5573988711086793e-06, "loss": 0.6953, "step": 33780 }, { "epoch": 0.7258237745413928, "grad_norm": 0.4968324999154312, "learning_rate": 3.552187058252772e-06, "loss": 0.7083, "step": 33790 }, { "epoch": 0.7260385788546634, "grad_norm": 0.5111234225003652, "learning_rate": 3.546978241359785e-06, "loss": 0.6995, "step": 33800 }, { "epoch": 0.726253383167934, "grad_norm": 0.5321941233022512, "learning_rate": 3.541772422849977e-06, "loss": 0.6862, "step": 33810 }, { "epoch": 0.7264681874812047, "grad_norm": 0.5446186196935507, "learning_rate": 3.5365696051422337e-06, "loss": 0.7064, "step": 33820 }, { "epoch": 0.7266829917944753, "grad_norm": 0.5339112094968238, "learning_rate": 3.531369790654022e-06, "loss": 0.6939, "step": 33830 }, { "epoch": 0.7268977961077459, "grad_norm": 0.5116370373744857, "learning_rate": 3.526172981801429e-06, "loss": 0.6848, "step": 33840 }, { "epoch": 0.7271126004210164, "grad_norm": 0.49391991528376755, "learning_rate": 3.5209791809991424e-06, "loss": 0.6974, "step": 33850 }, { "epoch": 0.727327404734287, "grad_norm": 0.4986794011059733, "learning_rate": 3.5157883906604484e-06, "loss": 0.6986, "step": 33860 }, { "epoch": 0.7275422090475576, "grad_norm": 0.5365339519034793, "learning_rate": 3.5106006131972425e-06, "loss": 0.7011, "step": 33870 }, { "epoch": 0.7277570133608283, "grad_norm": 0.505426889604584, "learning_rate": 3.5054158510200077e-06, "loss": 0.7272, "step": 33880 }, { "epoch": 0.7279718176740989, "grad_norm": 0.534863544759657, "learning_rate": 3.5002341065378352e-06, "loss": 0.6975, "step": 33890 }, { "epoch": 0.7281866219873695, "grad_norm": 0.5029648260679437, "learning_rate": 3.4950553821584133e-06, "loss": 0.6967, "step": 33900 }, { "epoch": 0.7284014263006401, "grad_norm": 0.5014206921527354, "learning_rate": 3.4898796802880253e-06, "loss": 0.7089, "step": 33910 }, { "epoch": 0.7286162306139107, "grad_norm": 0.5234721876994048, "learning_rate": 3.484707003331549e-06, "loss": 0.7085, "step": 33920 }, { "epoch": 0.7288310349271814, "grad_norm": 0.5239292676381829, "learning_rate": 3.4795373536924627e-06, "loss": 0.6843, "step": 33930 }, { "epoch": 0.729045839240452, "grad_norm": 0.4980484392742611, "learning_rate": 3.474370733772827e-06, "loss": 0.6965, "step": 33940 }, { "epoch": 0.7292606435537226, "grad_norm": 0.5364410045950752, "learning_rate": 3.4692071459733024e-06, "loss": 0.6984, "step": 33950 }, { "epoch": 0.7294754478669931, "grad_norm": 0.5079175468345598, "learning_rate": 3.464046592693142e-06, "loss": 0.6894, "step": 33960 }, { "epoch": 0.7296902521802637, "grad_norm": 0.5195456559419563, "learning_rate": 3.4588890763301843e-06, "loss": 0.7166, "step": 33970 }, { "epoch": 0.7299050564935344, "grad_norm": 0.5069432370518213, "learning_rate": 3.453734599280859e-06, "loss": 0.7039, "step": 33980 }, { "epoch": 0.730119860806805, "grad_norm": 0.5132347077274939, "learning_rate": 3.4485831639401836e-06, "loss": 0.7007, "step": 33990 }, { "epoch": 0.7303346651200756, "grad_norm": 0.4969882977586769, "learning_rate": 3.4434347727017645e-06, "loss": 0.7043, "step": 34000 }, { "epoch": 0.7305494694333462, "grad_norm": 0.519434920750127, "learning_rate": 3.438289427957785e-06, "loss": 0.7021, "step": 34010 }, { "epoch": 0.7307642737466168, "grad_norm": 0.5195248274879436, "learning_rate": 3.4331471320990216e-06, "loss": 0.7106, "step": 34020 }, { "epoch": 0.7309790780598875, "grad_norm": 0.5452676568255899, "learning_rate": 3.4280078875148317e-06, "loss": 0.689, "step": 34030 }, { "epoch": 0.7311938823731581, "grad_norm": 0.5006895911589073, "learning_rate": 3.4228716965931553e-06, "loss": 0.7172, "step": 34040 }, { "epoch": 0.7314086866864287, "grad_norm": 0.543855444481588, "learning_rate": 3.417738561720515e-06, "loss": 0.7082, "step": 34050 }, { "epoch": 0.7316234909996993, "grad_norm": 0.5225704241808111, "learning_rate": 3.4126084852820064e-06, "loss": 0.7012, "step": 34060 }, { "epoch": 0.7318382953129698, "grad_norm": 0.5439757755860316, "learning_rate": 3.40748146966131e-06, "loss": 0.6898, "step": 34070 }, { "epoch": 0.7320530996262405, "grad_norm": 0.521232354304882, "learning_rate": 3.402357517240684e-06, "loss": 0.7246, "step": 34080 }, { "epoch": 0.7322679039395111, "grad_norm": 0.504585628259099, "learning_rate": 3.397236630400962e-06, "loss": 0.6846, "step": 34090 }, { "epoch": 0.7324827082527817, "grad_norm": 0.5105232285125065, "learning_rate": 3.3921188115215574e-06, "loss": 0.709, "step": 34100 }, { "epoch": 0.7326975125660523, "grad_norm": 0.5118542121131522, "learning_rate": 3.3870040629804445e-06, "loss": 0.6957, "step": 34110 }, { "epoch": 0.7329123168793229, "grad_norm": 0.494913168921892, "learning_rate": 3.3818923871541932e-06, "loss": 0.6846, "step": 34120 }, { "epoch": 0.7331271211925936, "grad_norm": 0.5369888915842843, "learning_rate": 3.376783786417922e-06, "loss": 0.7195, "step": 34130 }, { "epoch": 0.7333419255058642, "grad_norm": 0.49649984813451475, "learning_rate": 3.371678263145337e-06, "loss": 0.6934, "step": 34140 }, { "epoch": 0.7335567298191348, "grad_norm": 0.5374808788911171, "learning_rate": 3.3665758197087084e-06, "loss": 0.7042, "step": 34150 }, { "epoch": 0.7337715341324054, "grad_norm": 0.508279517829229, "learning_rate": 3.361476458478875e-06, "loss": 0.7006, "step": 34160 }, { "epoch": 0.733986338445676, "grad_norm": 0.506858247543215, "learning_rate": 3.3563801818252483e-06, "loss": 0.6905, "step": 34170 }, { "epoch": 0.7342011427589465, "grad_norm": 0.5233572520758975, "learning_rate": 3.351286992115793e-06, "loss": 0.7044, "step": 34180 }, { "epoch": 0.7344159470722172, "grad_norm": 0.49926488863527185, "learning_rate": 3.346196891717063e-06, "loss": 0.6974, "step": 34190 }, { "epoch": 0.7346307513854878, "grad_norm": 0.5127254239928138, "learning_rate": 3.3411098829941513e-06, "loss": 0.6949, "step": 34200 }, { "epoch": 0.7348455556987584, "grad_norm": 0.5157624607732113, "learning_rate": 3.33602596831073e-06, "loss": 0.7038, "step": 34210 }, { "epoch": 0.735060360012029, "grad_norm": 0.5324610708151536, "learning_rate": 3.3309451500290336e-06, "loss": 0.7072, "step": 34220 }, { "epoch": 0.7352751643252996, "grad_norm": 0.5185302310161645, "learning_rate": 3.3258674305098435e-06, "loss": 0.6948, "step": 34230 }, { "epoch": 0.7354899686385703, "grad_norm": 0.4957613188241341, "learning_rate": 3.3207928121125243e-06, "loss": 0.7011, "step": 34240 }, { "epoch": 0.7357047729518409, "grad_norm": 0.5441591292886901, "learning_rate": 3.315721297194977e-06, "loss": 0.705, "step": 34250 }, { "epoch": 0.7359195772651115, "grad_norm": 0.5278633009999051, "learning_rate": 3.3106528881136745e-06, "loss": 0.7038, "step": 34260 }, { "epoch": 0.7361343815783821, "grad_norm": 0.49493002519430035, "learning_rate": 3.305587587223645e-06, "loss": 0.7014, "step": 34270 }, { "epoch": 0.7363491858916527, "grad_norm": 0.5164985719789421, "learning_rate": 3.300525396878461e-06, "loss": 0.6918, "step": 34280 }, { "epoch": 0.7365639902049234, "grad_norm": 0.5385527870192864, "learning_rate": 3.2954663194302715e-06, "loss": 0.7056, "step": 34290 }, { "epoch": 0.736778794518194, "grad_norm": 0.5222972879975742, "learning_rate": 3.2904103572297565e-06, "loss": 0.7012, "step": 34300 }, { "epoch": 0.7369935988314645, "grad_norm": 0.515795075393324, "learning_rate": 3.2853575126261606e-06, "loss": 0.704, "step": 34310 }, { "epoch": 0.7372084031447351, "grad_norm": 0.4972752236528448, "learning_rate": 3.280307787967283e-06, "loss": 0.6891, "step": 34320 }, { "epoch": 0.7374232074580057, "grad_norm": 0.5276200292672564, "learning_rate": 3.2752611855994566e-06, "loss": 0.7069, "step": 34330 }, { "epoch": 0.7376380117712764, "grad_norm": 0.5527465990583433, "learning_rate": 3.270217707867588e-06, "loss": 0.6867, "step": 34340 }, { "epoch": 0.737852816084547, "grad_norm": 0.5098818638723858, "learning_rate": 3.265177357115106e-06, "loss": 0.6933, "step": 34350 }, { "epoch": 0.7380676203978176, "grad_norm": 0.5001020449149964, "learning_rate": 3.2601401356840112e-06, "loss": 0.7017, "step": 34360 }, { "epoch": 0.7382824247110882, "grad_norm": 0.5090868444518032, "learning_rate": 3.2551060459148297e-06, "loss": 0.6899, "step": 34370 }, { "epoch": 0.7384972290243588, "grad_norm": 0.5291041455847213, "learning_rate": 3.250075090146644e-06, "loss": 0.6951, "step": 34380 }, { "epoch": 0.7387120333376295, "grad_norm": 0.49906694116191236, "learning_rate": 3.2450472707170786e-06, "loss": 0.704, "step": 34390 }, { "epoch": 0.7389268376509001, "grad_norm": 0.49374014306535213, "learning_rate": 3.2400225899622917e-06, "loss": 0.6808, "step": 34400 }, { "epoch": 0.7391416419641706, "grad_norm": 0.5331437741952022, "learning_rate": 3.235001050217003e-06, "loss": 0.7108, "step": 34410 }, { "epoch": 0.7393564462774412, "grad_norm": 0.5433540729594903, "learning_rate": 3.2299826538144506e-06, "loss": 0.7037, "step": 34420 }, { "epoch": 0.7395712505907118, "grad_norm": 0.49990473998285656, "learning_rate": 3.2249674030864254e-06, "loss": 0.7034, "step": 34430 }, { "epoch": 0.7397860549039825, "grad_norm": 0.5184986266564906, "learning_rate": 3.2199553003632566e-06, "loss": 0.7059, "step": 34440 }, { "epoch": 0.7400008592172531, "grad_norm": 0.5255092855129455, "learning_rate": 3.2149463479737974e-06, "loss": 0.7078, "step": 34450 }, { "epoch": 0.7402156635305237, "grad_norm": 0.5078685785943006, "learning_rate": 3.2099405482454613e-06, "loss": 0.6856, "step": 34460 }, { "epoch": 0.7404304678437943, "grad_norm": 0.5001995241491086, "learning_rate": 3.2049379035041718e-06, "loss": 0.6947, "step": 34470 }, { "epoch": 0.7406452721570649, "grad_norm": 0.5229199908697051, "learning_rate": 3.199938416074401e-06, "loss": 0.698, "step": 34480 }, { "epoch": 0.7408600764703355, "grad_norm": 0.506611104007963, "learning_rate": 3.1949420882791493e-06, "loss": 0.7095, "step": 34490 }, { "epoch": 0.7410748807836062, "grad_norm": 0.5063586727734837, "learning_rate": 3.1899489224399514e-06, "loss": 0.7033, "step": 34500 }, { "epoch": 0.7412896850968768, "grad_norm": 0.5267235455499195, "learning_rate": 3.184958920876874e-06, "loss": 0.695, "step": 34510 }, { "epoch": 0.7415044894101473, "grad_norm": 0.501781417708024, "learning_rate": 3.1799720859085025e-06, "loss": 0.6983, "step": 34520 }, { "epoch": 0.7417192937234179, "grad_norm": 0.5127416336972987, "learning_rate": 3.174988419851971e-06, "loss": 0.6996, "step": 34530 }, { "epoch": 0.7419340980366885, "grad_norm": 0.5238966491029383, "learning_rate": 3.170007925022921e-06, "loss": 0.7003, "step": 34540 }, { "epoch": 0.7421489023499592, "grad_norm": 0.5027088254862997, "learning_rate": 3.1650306037355303e-06, "loss": 0.693, "step": 34550 }, { "epoch": 0.7423637066632298, "grad_norm": 0.5038648172133018, "learning_rate": 3.160056458302504e-06, "loss": 0.6989, "step": 34560 }, { "epoch": 0.7425785109765004, "grad_norm": 0.5125824985664078, "learning_rate": 3.155085491035066e-06, "loss": 0.7069, "step": 34570 }, { "epoch": 0.742793315289771, "grad_norm": 0.5375010969807497, "learning_rate": 3.1501177042429697e-06, "loss": 0.7113, "step": 34580 }, { "epoch": 0.7430081196030416, "grad_norm": 0.507552166036012, "learning_rate": 3.1451531002344804e-06, "loss": 0.7009, "step": 34590 }, { "epoch": 0.7432229239163123, "grad_norm": 0.5130116795996187, "learning_rate": 3.140191681316396e-06, "loss": 0.7009, "step": 34600 }, { "epoch": 0.7434377282295829, "grad_norm": 0.513751129751177, "learning_rate": 3.1352334497940262e-06, "loss": 0.7051, "step": 34610 }, { "epoch": 0.7436525325428535, "grad_norm": 0.5088528949925636, "learning_rate": 3.1302784079712067e-06, "loss": 0.7024, "step": 34620 }, { "epoch": 0.743867336856124, "grad_norm": 0.519047865808951, "learning_rate": 3.1253265581502877e-06, "loss": 0.6916, "step": 34630 }, { "epoch": 0.7440821411693946, "grad_norm": 0.5261411126518063, "learning_rate": 3.1203779026321313e-06, "loss": 0.6956, "step": 34640 }, { "epoch": 0.7442969454826653, "grad_norm": 0.5173606042960484, "learning_rate": 3.115432443716123e-06, "loss": 0.7191, "step": 34650 }, { "epoch": 0.7445117497959359, "grad_norm": 0.5197079005508889, "learning_rate": 3.110490183700159e-06, "loss": 0.7082, "step": 34660 }, { "epoch": 0.7447265541092065, "grad_norm": 0.5109626703024929, "learning_rate": 3.1055511248806514e-06, "loss": 0.6868, "step": 34670 }, { "epoch": 0.7449413584224771, "grad_norm": 0.5194401187437783, "learning_rate": 3.100615269552523e-06, "loss": 0.6972, "step": 34680 }, { "epoch": 0.7451561627357477, "grad_norm": 0.5061536798223207, "learning_rate": 3.0956826200092114e-06, "loss": 0.6966, "step": 34690 }, { "epoch": 0.7453709670490184, "grad_norm": 0.5083820365394748, "learning_rate": 3.090753178542657e-06, "loss": 0.6783, "step": 34700 }, { "epoch": 0.745585771362289, "grad_norm": 0.5077977495235433, "learning_rate": 3.0858269474433165e-06, "loss": 0.7079, "step": 34710 }, { "epoch": 0.7458005756755596, "grad_norm": 0.512990160380994, "learning_rate": 3.080903929000153e-06, "loss": 0.6993, "step": 34720 }, { "epoch": 0.7460153799888302, "grad_norm": 0.517593034507449, "learning_rate": 3.0759841255006386e-06, "loss": 0.7033, "step": 34730 }, { "epoch": 0.7462301843021008, "grad_norm": 0.4897976460660666, "learning_rate": 3.0710675392307477e-06, "loss": 0.6933, "step": 34740 }, { "epoch": 0.7464449886153715, "grad_norm": 0.4947523248312534, "learning_rate": 3.066154172474962e-06, "loss": 0.6946, "step": 34750 }, { "epoch": 0.746659792928642, "grad_norm": 0.5091626362163222, "learning_rate": 3.0612440275162727e-06, "loss": 0.6867, "step": 34760 }, { "epoch": 0.7468745972419126, "grad_norm": 0.5114642942026933, "learning_rate": 3.056337106636159e-06, "loss": 0.6996, "step": 34770 }, { "epoch": 0.7470894015551832, "grad_norm": 0.5014471572550719, "learning_rate": 3.0514334121146173e-06, "loss": 0.7086, "step": 34780 }, { "epoch": 0.7473042058684538, "grad_norm": 0.5084778446042698, "learning_rate": 3.046532946230136e-06, "loss": 0.6935, "step": 34790 }, { "epoch": 0.7475190101817244, "grad_norm": 0.5229812004697073, "learning_rate": 3.0416357112597108e-06, "loss": 0.7039, "step": 34800 }, { "epoch": 0.7477338144949951, "grad_norm": 0.5019941677521293, "learning_rate": 3.0367417094788308e-06, "loss": 0.7018, "step": 34810 }, { "epoch": 0.7479486188082657, "grad_norm": 0.5255533916684315, "learning_rate": 3.0318509431614794e-06, "loss": 0.7103, "step": 34820 }, { "epoch": 0.7481634231215363, "grad_norm": 0.5094742009361463, "learning_rate": 3.026963414580145e-06, "loss": 0.6916, "step": 34830 }, { "epoch": 0.7483782274348069, "grad_norm": 0.503694503035299, "learning_rate": 3.0220791260058057e-06, "loss": 0.7062, "step": 34840 }, { "epoch": 0.7485930317480775, "grad_norm": 0.5306897240771807, "learning_rate": 3.017198079707937e-06, "loss": 0.7022, "step": 34850 }, { "epoch": 0.7488078360613482, "grad_norm": 0.5166101019632601, "learning_rate": 3.01232027795451e-06, "loss": 0.7041, "step": 34860 }, { "epoch": 0.7490226403746187, "grad_norm": 0.5145609190625092, "learning_rate": 3.0074457230119768e-06, "loss": 0.6888, "step": 34870 }, { "epoch": 0.7492374446878893, "grad_norm": 0.508976620646315, "learning_rate": 3.0025744171452997e-06, "loss": 0.7115, "step": 34880 }, { "epoch": 0.7494522490011599, "grad_norm": 0.5253897144615868, "learning_rate": 2.9977063626179128e-06, "loss": 0.6893, "step": 34890 }, { "epoch": 0.7496670533144305, "grad_norm": 0.5137649765422893, "learning_rate": 2.9928415616917505e-06, "loss": 0.6867, "step": 34900 }, { "epoch": 0.7498818576277012, "grad_norm": 0.49499574504817595, "learning_rate": 2.9879800166272355e-06, "loss": 0.7002, "step": 34910 }, { "epoch": 0.7500966619409718, "grad_norm": 0.519084029719955, "learning_rate": 2.983121729683265e-06, "loss": 0.7104, "step": 34920 }, { "epoch": 0.7503114662542424, "grad_norm": 0.4981518656316401, "learning_rate": 2.9782667031172454e-06, "loss": 0.7028, "step": 34930 }, { "epoch": 0.750526270567513, "grad_norm": 0.51741442278905, "learning_rate": 2.973414939185041e-06, "loss": 0.6982, "step": 34940 }, { "epoch": 0.7507410748807836, "grad_norm": 0.5060387675297806, "learning_rate": 2.9685664401410277e-06, "loss": 0.697, "step": 34950 }, { "epoch": 0.7509558791940543, "grad_norm": 0.5048062525549186, "learning_rate": 2.9637212082380395e-06, "loss": 0.71, "step": 34960 }, { "epoch": 0.7511706835073249, "grad_norm": 0.5212032772870495, "learning_rate": 2.9588792457274075e-06, "loss": 0.6943, "step": 34970 }, { "epoch": 0.7513854878205954, "grad_norm": 0.5107973393581554, "learning_rate": 2.9540405548589434e-06, "loss": 0.7015, "step": 34980 }, { "epoch": 0.751600292133866, "grad_norm": 0.49864860876835415, "learning_rate": 2.949205137880924e-06, "loss": 0.69, "step": 34990 }, { "epoch": 0.7518150964471366, "grad_norm": 0.5307082939307299, "learning_rate": 2.944372997040129e-06, "loss": 0.7061, "step": 35000 }, { "epoch": 0.7520299007604073, "grad_norm": 0.5193800817097175, "learning_rate": 2.9395441345817932e-06, "loss": 0.6902, "step": 35010 }, { "epoch": 0.7522447050736779, "grad_norm": 0.5177658570066975, "learning_rate": 2.9347185527496403e-06, "loss": 0.6956, "step": 35020 }, { "epoch": 0.7524595093869485, "grad_norm": 0.5015771316930479, "learning_rate": 2.929896253785871e-06, "loss": 0.7147, "step": 35030 }, { "epoch": 0.7526743137002191, "grad_norm": 0.4998092573649113, "learning_rate": 2.9250772399311457e-06, "loss": 0.7021, "step": 35040 }, { "epoch": 0.7528891180134897, "grad_norm": 0.5249322187174793, "learning_rate": 2.9202615134246225e-06, "loss": 0.6977, "step": 35050 }, { "epoch": 0.7531039223267603, "grad_norm": 0.5148512865886042, "learning_rate": 2.91544907650391e-06, "loss": 0.6941, "step": 35060 }, { "epoch": 0.753318726640031, "grad_norm": 0.5031383431200541, "learning_rate": 2.9106399314050993e-06, "loss": 0.7068, "step": 35070 }, { "epoch": 0.7535335309533016, "grad_norm": 0.5077882216038482, "learning_rate": 2.905834080362754e-06, "loss": 0.7062, "step": 35080 }, { "epoch": 0.7537483352665721, "grad_norm": 0.5259578113827944, "learning_rate": 2.901031525609891e-06, "loss": 0.6936, "step": 35090 }, { "epoch": 0.7539631395798427, "grad_norm": 0.5193893619415478, "learning_rate": 2.896232269378022e-06, "loss": 0.7213, "step": 35100 }, { "epoch": 0.7541779438931133, "grad_norm": 0.5323126089856631, "learning_rate": 2.8914363138970992e-06, "loss": 0.703, "step": 35110 }, { "epoch": 0.754392748206384, "grad_norm": 0.5173517751625921, "learning_rate": 2.886643661395564e-06, "loss": 0.7092, "step": 35120 }, { "epoch": 0.7546075525196546, "grad_norm": 0.6741926928108194, "learning_rate": 2.8818543141003043e-06, "loss": 0.6951, "step": 35130 }, { "epoch": 0.7548223568329252, "grad_norm": 0.5178832293172879, "learning_rate": 2.877068274236683e-06, "loss": 0.6924, "step": 35140 }, { "epoch": 0.7550371611461958, "grad_norm": 0.5132574620286683, "learning_rate": 2.872285544028528e-06, "loss": 0.6977, "step": 35150 }, { "epoch": 0.7552519654594664, "grad_norm": 0.523341159554861, "learning_rate": 2.867506125698114e-06, "loss": 0.7007, "step": 35160 }, { "epoch": 0.7554667697727371, "grad_norm": 0.559952979776443, "learning_rate": 2.862730021466201e-06, "loss": 0.7094, "step": 35170 }, { "epoch": 0.7556815740860077, "grad_norm": 0.5149609461834663, "learning_rate": 2.8579572335519866e-06, "loss": 0.6943, "step": 35180 }, { "epoch": 0.7558963783992783, "grad_norm": 0.507539558550583, "learning_rate": 2.853187764173141e-06, "loss": 0.7008, "step": 35190 }, { "epoch": 0.7561111827125488, "grad_norm": 0.49479288184079256, "learning_rate": 2.848421615545789e-06, "loss": 0.7042, "step": 35200 }, { "epoch": 0.7563259870258194, "grad_norm": 0.5189990316887987, "learning_rate": 2.8436587898845035e-06, "loss": 0.6971, "step": 35210 }, { "epoch": 0.7565407913390901, "grad_norm": 0.5162232202770758, "learning_rate": 2.838899289402335e-06, "loss": 0.7109, "step": 35220 }, { "epoch": 0.7567555956523607, "grad_norm": 0.5165521047716195, "learning_rate": 2.8341431163107648e-06, "loss": 0.6952, "step": 35230 }, { "epoch": 0.7569703999656313, "grad_norm": 0.4956540738020749, "learning_rate": 2.829390272819742e-06, "loss": 0.6871, "step": 35240 }, { "epoch": 0.7571852042789019, "grad_norm": 0.513330560976213, "learning_rate": 2.824640761137667e-06, "loss": 0.6974, "step": 35250 }, { "epoch": 0.7574000085921725, "grad_norm": 0.5032601979915532, "learning_rate": 2.8198945834713885e-06, "loss": 0.6913, "step": 35260 }, { "epoch": 0.7576148129054432, "grad_norm": 0.5128246318375348, "learning_rate": 2.815151742026213e-06, "loss": 0.7056, "step": 35270 }, { "epoch": 0.7578296172187138, "grad_norm": 0.5030995060530129, "learning_rate": 2.810412239005885e-06, "loss": 0.713, "step": 35280 }, { "epoch": 0.7580444215319844, "grad_norm": 0.5038841598854293, "learning_rate": 2.805676076612608e-06, "loss": 0.6996, "step": 35290 }, { "epoch": 0.758259225845255, "grad_norm": 0.5122170399356182, "learning_rate": 2.8009432570470296e-06, "loss": 0.7059, "step": 35300 }, { "epoch": 0.7584740301585255, "grad_norm": 0.5156117155914561, "learning_rate": 2.7962137825082446e-06, "loss": 0.684, "step": 35310 }, { "epoch": 0.7586888344717962, "grad_norm": 0.5049866933728283, "learning_rate": 2.7914876551937953e-06, "loss": 0.6942, "step": 35320 }, { "epoch": 0.7589036387850668, "grad_norm": 0.5094420858773214, "learning_rate": 2.7867648772996634e-06, "loss": 0.708, "step": 35330 }, { "epoch": 0.7591184430983374, "grad_norm": 0.5011895237248974, "learning_rate": 2.7820454510202843e-06, "loss": 0.6918, "step": 35340 }, { "epoch": 0.759333247411608, "grad_norm": 0.504089241568298, "learning_rate": 2.777329378548522e-06, "loss": 0.7058, "step": 35350 }, { "epoch": 0.7595480517248786, "grad_norm": 0.5365866928409356, "learning_rate": 2.7726166620756934e-06, "loss": 0.692, "step": 35360 }, { "epoch": 0.7597628560381492, "grad_norm": 0.4984929620909205, "learning_rate": 2.7679073037915516e-06, "loss": 0.6874, "step": 35370 }, { "epoch": 0.7599776603514199, "grad_norm": 0.5410453059904045, "learning_rate": 2.763201305884291e-06, "loss": 0.7145, "step": 35380 }, { "epoch": 0.7601924646646905, "grad_norm": 0.5522929417691606, "learning_rate": 2.758498670540546e-06, "loss": 0.7132, "step": 35390 }, { "epoch": 0.7604072689779611, "grad_norm": 0.5191149506874112, "learning_rate": 2.7537993999453818e-06, "loss": 0.7094, "step": 35400 }, { "epoch": 0.7606220732912317, "grad_norm": 0.5364420903269488, "learning_rate": 2.749103496282306e-06, "loss": 0.7105, "step": 35410 }, { "epoch": 0.7608368776045022, "grad_norm": 0.5307396572245091, "learning_rate": 2.7444109617332614e-06, "loss": 0.7067, "step": 35420 }, { "epoch": 0.761051681917773, "grad_norm": 0.5189987491520354, "learning_rate": 2.739721798478625e-06, "loss": 0.6967, "step": 35430 }, { "epoch": 0.7612664862310435, "grad_norm": 0.5522945068168588, "learning_rate": 2.735036008697205e-06, "loss": 0.7211, "step": 35440 }, { "epoch": 0.7614812905443141, "grad_norm": 0.49783370978462715, "learning_rate": 2.7303535945662485e-06, "loss": 0.6972, "step": 35450 }, { "epoch": 0.7616960948575847, "grad_norm": 0.5134039941417213, "learning_rate": 2.725674558261423e-06, "loss": 0.6903, "step": 35460 }, { "epoch": 0.7619108991708553, "grad_norm": 0.5056018475415773, "learning_rate": 2.720998901956836e-06, "loss": 0.6978, "step": 35470 }, { "epoch": 0.762125703484126, "grad_norm": 0.5016409320759787, "learning_rate": 2.7163266278250222e-06, "loss": 0.7092, "step": 35480 }, { "epoch": 0.7623405077973966, "grad_norm": 0.5090009069207025, "learning_rate": 2.7116577380369434e-06, "loss": 0.7068, "step": 35490 }, { "epoch": 0.7625553121106672, "grad_norm": 0.5138423219447262, "learning_rate": 2.7069922347619926e-06, "loss": 0.6999, "step": 35500 }, { "epoch": 0.7627701164239378, "grad_norm": 0.5079624500816473, "learning_rate": 2.7023301201679763e-06, "loss": 0.7128, "step": 35510 }, { "epoch": 0.7629849207372084, "grad_norm": 0.519763641884365, "learning_rate": 2.69767139642115e-06, "loss": 0.6984, "step": 35520 }, { "epoch": 0.7631997250504791, "grad_norm": 0.526599503026077, "learning_rate": 2.6930160656861704e-06, "loss": 0.6996, "step": 35530 }, { "epoch": 0.7634145293637496, "grad_norm": 0.527777664386869, "learning_rate": 2.688364130126131e-06, "loss": 0.7001, "step": 35540 }, { "epoch": 0.7636293336770202, "grad_norm": 0.5167948414932604, "learning_rate": 2.6837155919025426e-06, "loss": 0.6886, "step": 35550 }, { "epoch": 0.7638441379902908, "grad_norm": 0.5005085326477348, "learning_rate": 2.6790704531753385e-06, "loss": 0.6922, "step": 35560 }, { "epoch": 0.7640589423035614, "grad_norm": 0.5008777298038203, "learning_rate": 2.6744287161028782e-06, "loss": 0.6941, "step": 35570 }, { "epoch": 0.7642737466168321, "grad_norm": 0.5168378019628393, "learning_rate": 2.669790382841928e-06, "loss": 0.7073, "step": 35580 }, { "epoch": 0.7644885509301027, "grad_norm": 0.5064269251779776, "learning_rate": 2.665155455547682e-06, "loss": 0.7061, "step": 35590 }, { "epoch": 0.7647033552433733, "grad_norm": 0.5045676176502482, "learning_rate": 2.66052393637375e-06, "loss": 0.7015, "step": 35600 }, { "epoch": 0.7649181595566439, "grad_norm": 0.4956210829558816, "learning_rate": 2.655895827472158e-06, "loss": 0.6862, "step": 35610 }, { "epoch": 0.7651329638699145, "grad_norm": 0.5097661066228676, "learning_rate": 2.6512711309933503e-06, "loss": 0.6848, "step": 35620 }, { "epoch": 0.7653477681831852, "grad_norm": 0.510005397820754, "learning_rate": 2.6466498490861734e-06, "loss": 0.6905, "step": 35630 }, { "epoch": 0.7655625724964558, "grad_norm": 0.5253052971997748, "learning_rate": 2.642031983897909e-06, "loss": 0.6904, "step": 35640 }, { "epoch": 0.7657773768097264, "grad_norm": 0.5027879257266836, "learning_rate": 2.6374175375742285e-06, "loss": 0.685, "step": 35650 }, { "epoch": 0.7659921811229969, "grad_norm": 0.519001207669886, "learning_rate": 2.6328065122592284e-06, "loss": 0.6946, "step": 35660 }, { "epoch": 0.7662069854362675, "grad_norm": 0.5156599981554072, "learning_rate": 2.6281989100954155e-06, "loss": 0.6955, "step": 35670 }, { "epoch": 0.7664217897495381, "grad_norm": 0.5065842964681723, "learning_rate": 2.623594733223692e-06, "loss": 0.6901, "step": 35680 }, { "epoch": 0.7666365940628088, "grad_norm": 0.5043045850760082, "learning_rate": 2.6189939837833934e-06, "loss": 0.6807, "step": 35690 }, { "epoch": 0.7668513983760794, "grad_norm": 0.5203139211551309, "learning_rate": 2.6143966639122343e-06, "loss": 0.7069, "step": 35700 }, { "epoch": 0.76706620268935, "grad_norm": 0.5059570893757329, "learning_rate": 2.609802775746363e-06, "loss": 0.6829, "step": 35710 }, { "epoch": 0.7672810070026206, "grad_norm": 0.5292183230754148, "learning_rate": 2.6052123214203106e-06, "loss": 0.7116, "step": 35720 }, { "epoch": 0.7674958113158912, "grad_norm": 0.5050171351436751, "learning_rate": 2.6006253030670246e-06, "loss": 0.6933, "step": 35730 }, { "epoch": 0.7677106156291619, "grad_norm": 0.5367079725288396, "learning_rate": 2.596041722817859e-06, "loss": 0.6986, "step": 35740 }, { "epoch": 0.7679254199424325, "grad_norm": 0.5157066385692741, "learning_rate": 2.5914615828025534e-06, "loss": 0.7029, "step": 35750 }, { "epoch": 0.768140224255703, "grad_norm": 0.5036694678541429, "learning_rate": 2.5868848851492733e-06, "loss": 0.688, "step": 35760 }, { "epoch": 0.7683550285689736, "grad_norm": 0.5071285342347541, "learning_rate": 2.5823116319845633e-06, "loss": 0.6878, "step": 35770 }, { "epoch": 0.7685698328822442, "grad_norm": 0.5068127626353475, "learning_rate": 2.5777418254333775e-06, "loss": 0.6938, "step": 35780 }, { "epoch": 0.7687846371955149, "grad_norm": 0.5198659666241273, "learning_rate": 2.5731754676190725e-06, "loss": 0.6919, "step": 35790 }, { "epoch": 0.7689994415087855, "grad_norm": 0.5040011486527468, "learning_rate": 2.568612560663385e-06, "loss": 0.6883, "step": 35800 }, { "epoch": 0.7692142458220561, "grad_norm": 0.5095849632751419, "learning_rate": 2.5640531066864737e-06, "loss": 0.6935, "step": 35810 }, { "epoch": 0.7694290501353267, "grad_norm": 0.5031387126877207, "learning_rate": 2.559497107806871e-06, "loss": 0.6873, "step": 35820 }, { "epoch": 0.7696438544485973, "grad_norm": 0.5069738792715828, "learning_rate": 2.554944566141515e-06, "loss": 0.6938, "step": 35830 }, { "epoch": 0.769858658761868, "grad_norm": 0.5225177154611406, "learning_rate": 2.5503954838057367e-06, "loss": 0.6986, "step": 35840 }, { "epoch": 0.7700734630751386, "grad_norm": 0.5337253376024659, "learning_rate": 2.54584986291325e-06, "loss": 0.6938, "step": 35850 }, { "epoch": 0.7702882673884092, "grad_norm": 0.4974683440922778, "learning_rate": 2.54130770557618e-06, "loss": 0.7002, "step": 35860 }, { "epoch": 0.7705030717016798, "grad_norm": 0.5164372270920562, "learning_rate": 2.536769013905022e-06, "loss": 0.7042, "step": 35870 }, { "epoch": 0.7707178760149503, "grad_norm": 0.5045862598256815, "learning_rate": 2.532233790008671e-06, "loss": 0.7122, "step": 35880 }, { "epoch": 0.770932680328221, "grad_norm": 0.5039493617623143, "learning_rate": 2.5277020359944114e-06, "loss": 0.6996, "step": 35890 }, { "epoch": 0.7711474846414916, "grad_norm": 0.4974176270519638, "learning_rate": 2.5231737539679124e-06, "loss": 0.7015, "step": 35900 }, { "epoch": 0.7713622889547622, "grad_norm": 0.5183097985832663, "learning_rate": 2.518648946033234e-06, "loss": 0.6908, "step": 35910 }, { "epoch": 0.7715770932680328, "grad_norm": 0.525976552546943, "learning_rate": 2.5141276142928093e-06, "loss": 0.6969, "step": 35920 }, { "epoch": 0.7717918975813034, "grad_norm": 0.5332780233463744, "learning_rate": 2.509609760847479e-06, "loss": 0.6916, "step": 35930 }, { "epoch": 0.772006701894574, "grad_norm": 0.5111293385118552, "learning_rate": 2.5050953877964446e-06, "loss": 0.7115, "step": 35940 }, { "epoch": 0.7722215062078447, "grad_norm": 0.5304473121055377, "learning_rate": 2.500584497237303e-06, "loss": 0.6997, "step": 35950 }, { "epoch": 0.7724363105211153, "grad_norm": 0.5202204337822746, "learning_rate": 2.4960770912660324e-06, "loss": 0.6949, "step": 35960 }, { "epoch": 0.7726511148343859, "grad_norm": 0.5082441180489494, "learning_rate": 2.491573171976982e-06, "loss": 0.6936, "step": 35970 }, { "epoch": 0.7728659191476565, "grad_norm": 0.5085533382278553, "learning_rate": 2.4870727414629005e-06, "loss": 0.6995, "step": 35980 }, { "epoch": 0.773080723460927, "grad_norm": 0.5110984394108816, "learning_rate": 2.482575801814894e-06, "loss": 0.6912, "step": 35990 }, { "epoch": 0.7732955277741977, "grad_norm": 0.5333234352124336, "learning_rate": 2.4780823551224586e-06, "loss": 0.7008, "step": 36000 }, { "epoch": 0.7735103320874683, "grad_norm": 0.5104268992806394, "learning_rate": 2.473592403473466e-06, "loss": 0.6972, "step": 36010 }, { "epoch": 0.7737251364007389, "grad_norm": 0.5095548549309175, "learning_rate": 2.4691059489541637e-06, "loss": 0.6833, "step": 36020 }, { "epoch": 0.7739399407140095, "grad_norm": 0.5379203466934536, "learning_rate": 2.464622993649174e-06, "loss": 0.6885, "step": 36030 }, { "epoch": 0.7741547450272801, "grad_norm": 0.5103185774726944, "learning_rate": 2.46014353964149e-06, "loss": 0.6972, "step": 36040 }, { "epoch": 0.7743695493405508, "grad_norm": 0.5108530905870736, "learning_rate": 2.4556675890124803e-06, "loss": 0.6918, "step": 36050 }, { "epoch": 0.7745843536538214, "grad_norm": 0.5114449569980617, "learning_rate": 2.451195143841889e-06, "loss": 0.6982, "step": 36060 }, { "epoch": 0.774799157967092, "grad_norm": 0.5117859340036564, "learning_rate": 2.446726206207827e-06, "loss": 0.7139, "step": 36070 }, { "epoch": 0.7750139622803626, "grad_norm": 0.5482258536620003, "learning_rate": 2.442260778186777e-06, "loss": 0.7027, "step": 36080 }, { "epoch": 0.7752287665936332, "grad_norm": 0.5072515818925265, "learning_rate": 2.4377988618535943e-06, "loss": 0.6881, "step": 36090 }, { "epoch": 0.7754435709069039, "grad_norm": 0.4985942219561439, "learning_rate": 2.4333404592814923e-06, "loss": 0.6863, "step": 36100 }, { "epoch": 0.7756583752201744, "grad_norm": 0.5028179650880126, "learning_rate": 2.4288855725420625e-06, "loss": 0.6791, "step": 36110 }, { "epoch": 0.775873179533445, "grad_norm": 0.5497148140060542, "learning_rate": 2.424434203705257e-06, "loss": 0.7117, "step": 36120 }, { "epoch": 0.7760879838467156, "grad_norm": 0.5121974367383552, "learning_rate": 2.4199863548393978e-06, "loss": 0.6947, "step": 36130 }, { "epoch": 0.7763027881599862, "grad_norm": 0.5024469961030319, "learning_rate": 2.4155420280111673e-06, "loss": 0.7016, "step": 36140 }, { "epoch": 0.7765175924732569, "grad_norm": 0.49804034625215227, "learning_rate": 2.411101225285616e-06, "loss": 0.6963, "step": 36150 }, { "epoch": 0.7767323967865275, "grad_norm": 0.5197970242944042, "learning_rate": 2.406663948726147e-06, "loss": 0.6917, "step": 36160 }, { "epoch": 0.7769472010997981, "grad_norm": 0.5243687759024098, "learning_rate": 2.4022302003945343e-06, "loss": 0.6886, "step": 36170 }, { "epoch": 0.7771620054130687, "grad_norm": 0.5067864445033509, "learning_rate": 2.3977999823509112e-06, "loss": 0.7043, "step": 36180 }, { "epoch": 0.7773768097263393, "grad_norm": 0.4969747214927612, "learning_rate": 2.3933732966537683e-06, "loss": 0.7067, "step": 36190 }, { "epoch": 0.77759161403961, "grad_norm": 0.5173099358575356, "learning_rate": 2.3889501453599575e-06, "loss": 0.7042, "step": 36200 }, { "epoch": 0.7778064183528806, "grad_norm": 0.5247869692545869, "learning_rate": 2.384530530524688e-06, "loss": 0.6935, "step": 36210 }, { "epoch": 0.7780212226661511, "grad_norm": 0.5564841459852391, "learning_rate": 2.3801144542015197e-06, "loss": 0.7144, "step": 36220 }, { "epoch": 0.7782360269794217, "grad_norm": 0.5269720198709235, "learning_rate": 2.3757019184423756e-06, "loss": 0.7053, "step": 36230 }, { "epoch": 0.7784508312926923, "grad_norm": 0.4911231646939383, "learning_rate": 2.3712929252975327e-06, "loss": 0.705, "step": 36240 }, { "epoch": 0.7786656356059629, "grad_norm": 0.5168096144036898, "learning_rate": 2.366887476815619e-06, "loss": 0.7133, "step": 36250 }, { "epoch": 0.7788804399192336, "grad_norm": 0.4905748157218672, "learning_rate": 2.3624855750436206e-06, "loss": 0.7, "step": 36260 }, { "epoch": 0.7790952442325042, "grad_norm": 0.5059325828111079, "learning_rate": 2.3580872220268623e-06, "loss": 0.6928, "step": 36270 }, { "epoch": 0.7793100485457748, "grad_norm": 0.5163071515499138, "learning_rate": 2.3536924198090437e-06, "loss": 0.7068, "step": 36280 }, { "epoch": 0.7795248528590454, "grad_norm": 0.5432830473338806, "learning_rate": 2.34930117043219e-06, "loss": 0.7001, "step": 36290 }, { "epoch": 0.779739657172316, "grad_norm": 0.5014221779118088, "learning_rate": 2.344913475936689e-06, "loss": 0.7067, "step": 36300 }, { "epoch": 0.7799544614855867, "grad_norm": 0.5190757630635009, "learning_rate": 2.340529338361275e-06, "loss": 0.7077, "step": 36310 }, { "epoch": 0.7801692657988573, "grad_norm": 0.5123110569142603, "learning_rate": 2.3361487597430265e-06, "loss": 0.71, "step": 36320 }, { "epoch": 0.7803840701121278, "grad_norm": 0.5208933584534376, "learning_rate": 2.3317717421173747e-06, "loss": 0.7115, "step": 36330 }, { "epoch": 0.7805988744253984, "grad_norm": 0.5101156973048868, "learning_rate": 2.3273982875180832e-06, "loss": 0.6866, "step": 36340 }, { "epoch": 0.780813678738669, "grad_norm": 0.5277994764678194, "learning_rate": 2.3230283979772795e-06, "loss": 0.7037, "step": 36350 }, { "epoch": 0.7810284830519397, "grad_norm": 0.5609421047646885, "learning_rate": 2.318662075525415e-06, "loss": 0.7071, "step": 36360 }, { "epoch": 0.7812432873652103, "grad_norm": 0.5162031245256725, "learning_rate": 2.3142993221912968e-06, "loss": 0.6859, "step": 36370 }, { "epoch": 0.7814580916784809, "grad_norm": 0.526786457219609, "learning_rate": 2.3099401400020693e-06, "loss": 0.7008, "step": 36380 }, { "epoch": 0.7816728959917515, "grad_norm": 0.5462499765613852, "learning_rate": 2.3055845309832113e-06, "loss": 0.6883, "step": 36390 }, { "epoch": 0.7818877003050221, "grad_norm": 0.517477588318434, "learning_rate": 2.3012324971585575e-06, "loss": 0.7137, "step": 36400 }, { "epoch": 0.7821025046182928, "grad_norm": 0.5159739518570036, "learning_rate": 2.2968840405502636e-06, "loss": 0.6996, "step": 36410 }, { "epoch": 0.7823173089315634, "grad_norm": 0.511918446890818, "learning_rate": 2.292539163178834e-06, "loss": 0.6882, "step": 36420 }, { "epoch": 0.782532113244834, "grad_norm": 0.5294058439127566, "learning_rate": 2.288197867063109e-06, "loss": 0.6968, "step": 36430 }, { "epoch": 0.7827469175581045, "grad_norm": 0.5295192397598697, "learning_rate": 2.2838601542202543e-06, "loss": 0.699, "step": 36440 }, { "epoch": 0.7829617218713751, "grad_norm": 0.5163303982789976, "learning_rate": 2.2795260266657905e-06, "loss": 0.6876, "step": 36450 }, { "epoch": 0.7831765261846458, "grad_norm": 0.528330319215088, "learning_rate": 2.275195486413554e-06, "loss": 0.6974, "step": 36460 }, { "epoch": 0.7833913304979164, "grad_norm": 0.5239254843684411, "learning_rate": 2.270868535475722e-06, "loss": 0.6845, "step": 36470 }, { "epoch": 0.783606134811187, "grad_norm": 0.5129180784875449, "learning_rate": 2.266545175862809e-06, "loss": 0.6919, "step": 36480 }, { "epoch": 0.7838209391244576, "grad_norm": 0.553503515608531, "learning_rate": 2.262225409583643e-06, "loss": 0.6931, "step": 36490 }, { "epoch": 0.7840357434377282, "grad_norm": 0.5193440729217803, "learning_rate": 2.257909238645408e-06, "loss": 0.691, "step": 36500 }, { "epoch": 0.7842505477509989, "grad_norm": 0.5409849700495803, "learning_rate": 2.253596665053592e-06, "loss": 0.6901, "step": 36510 }, { "epoch": 0.7844653520642695, "grad_norm": 0.50913652185814, "learning_rate": 2.2492876908120355e-06, "loss": 0.6969, "step": 36520 }, { "epoch": 0.7846801563775401, "grad_norm": 0.5190130256439345, "learning_rate": 2.2449823179228846e-06, "loss": 0.7067, "step": 36530 }, { "epoch": 0.7848949606908107, "grad_norm": 0.5142508244546575, "learning_rate": 2.240680548386626e-06, "loss": 0.6986, "step": 36540 }, { "epoch": 0.7851097650040813, "grad_norm": 0.5389748247074397, "learning_rate": 2.2363823842020694e-06, "loss": 0.7, "step": 36550 }, { "epoch": 0.7853245693173518, "grad_norm": 0.5077039536630492, "learning_rate": 2.2320878273663402e-06, "loss": 0.6961, "step": 36560 }, { "epoch": 0.7855393736306225, "grad_norm": 0.5387782887838709, "learning_rate": 2.2277968798749074e-06, "loss": 0.7003, "step": 36570 }, { "epoch": 0.7857541779438931, "grad_norm": 0.5181751565840694, "learning_rate": 2.2235095437215416e-06, "loss": 0.6927, "step": 36580 }, { "epoch": 0.7859689822571637, "grad_norm": 0.5150089685898623, "learning_rate": 2.2192258208983474e-06, "loss": 0.6922, "step": 36590 }, { "epoch": 0.7861837865704343, "grad_norm": 0.5203313759269558, "learning_rate": 2.2149457133957498e-06, "loss": 0.6952, "step": 36600 }, { "epoch": 0.7863985908837049, "grad_norm": 0.513794624005246, "learning_rate": 2.210669223202485e-06, "loss": 0.7039, "step": 36610 }, { "epoch": 0.7866133951969756, "grad_norm": 0.49987037240309906, "learning_rate": 2.2063963523056265e-06, "loss": 0.6922, "step": 36620 }, { "epoch": 0.7868281995102462, "grad_norm": 0.5086035486321187, "learning_rate": 2.2021271026905444e-06, "loss": 0.6855, "step": 36630 }, { "epoch": 0.7870430038235168, "grad_norm": 0.5105940396334009, "learning_rate": 2.1978614763409424e-06, "loss": 0.6969, "step": 36640 }, { "epoch": 0.7872578081367874, "grad_norm": 0.5037059973128649, "learning_rate": 2.1935994752388323e-06, "loss": 0.696, "step": 36650 }, { "epoch": 0.787472612450058, "grad_norm": 0.528348362552611, "learning_rate": 2.189341101364546e-06, "loss": 0.6948, "step": 36660 }, { "epoch": 0.7876874167633287, "grad_norm": 0.5312271902558634, "learning_rate": 2.1850863566967296e-06, "loss": 0.7015, "step": 36670 }, { "epoch": 0.7879022210765992, "grad_norm": 0.5205776526916766, "learning_rate": 2.1808352432123338e-06, "loss": 0.6935, "step": 36680 }, { "epoch": 0.7881170253898698, "grad_norm": 0.517986778286259, "learning_rate": 2.1765877628866405e-06, "loss": 0.695, "step": 36690 }, { "epoch": 0.7883318297031404, "grad_norm": 0.5059031168383098, "learning_rate": 2.172343917693225e-06, "loss": 0.694, "step": 36700 }, { "epoch": 0.788546634016411, "grad_norm": 0.4977347989860999, "learning_rate": 2.1681037096039826e-06, "loss": 0.6954, "step": 36710 }, { "epoch": 0.7887614383296817, "grad_norm": 0.5166359451995465, "learning_rate": 2.1638671405891177e-06, "loss": 0.6997, "step": 36720 }, { "epoch": 0.7889762426429523, "grad_norm": 0.5074952802695203, "learning_rate": 2.159634212617143e-06, "loss": 0.6871, "step": 36730 }, { "epoch": 0.7891910469562229, "grad_norm": 0.525379462136144, "learning_rate": 2.155404927654884e-06, "loss": 0.6867, "step": 36740 }, { "epoch": 0.7894058512694935, "grad_norm": 0.5055105654931678, "learning_rate": 2.151179287667463e-06, "loss": 0.685, "step": 36750 }, { "epoch": 0.7896206555827641, "grad_norm": 0.5392743199170078, "learning_rate": 2.146957294618316e-06, "loss": 0.7023, "step": 36760 }, { "epoch": 0.7898354598960348, "grad_norm": 0.5245335783053737, "learning_rate": 2.1427389504691854e-06, "loss": 0.687, "step": 36770 }, { "epoch": 0.7900502642093054, "grad_norm": 0.5382517802199199, "learning_rate": 2.1385242571801145e-06, "loss": 0.702, "step": 36780 }, { "epoch": 0.7902650685225759, "grad_norm": 0.5268825342443021, "learning_rate": 2.134313216709456e-06, "loss": 0.6948, "step": 36790 }, { "epoch": 0.7904798728358465, "grad_norm": 0.5169835737072852, "learning_rate": 2.1301058310138556e-06, "loss": 0.676, "step": 36800 }, { "epoch": 0.7906946771491171, "grad_norm": 0.5044997612963653, "learning_rate": 2.1259021020482674e-06, "loss": 0.7033, "step": 36810 }, { "epoch": 0.7909094814623878, "grad_norm": 0.514705637218055, "learning_rate": 2.1217020317659463e-06, "loss": 0.6949, "step": 36820 }, { "epoch": 0.7911242857756584, "grad_norm": 0.5165996618712064, "learning_rate": 2.1175056221184465e-06, "loss": 0.7022, "step": 36830 }, { "epoch": 0.791339090088929, "grad_norm": 0.5027344033789356, "learning_rate": 2.113312875055621e-06, "loss": 0.6877, "step": 36840 }, { "epoch": 0.7915538944021996, "grad_norm": 0.5122991945340712, "learning_rate": 2.1091237925256235e-06, "loss": 0.6949, "step": 36850 }, { "epoch": 0.7917686987154702, "grad_norm": 0.5295857931561139, "learning_rate": 2.1049383764748977e-06, "loss": 0.6942, "step": 36860 }, { "epoch": 0.7919835030287408, "grad_norm": 0.5120717805001744, "learning_rate": 2.1007566288481905e-06, "loss": 0.6997, "step": 36870 }, { "epoch": 0.7921983073420115, "grad_norm": 0.519141610866559, "learning_rate": 2.0965785515885416e-06, "loss": 0.6988, "step": 36880 }, { "epoch": 0.792413111655282, "grad_norm": 0.5333090103926987, "learning_rate": 2.0924041466372878e-06, "loss": 0.7008, "step": 36890 }, { "epoch": 0.7926279159685526, "grad_norm": 0.5168789441545899, "learning_rate": 2.0882334159340566e-06, "loss": 0.6837, "step": 36900 }, { "epoch": 0.7928427202818232, "grad_norm": 0.5249764329034577, "learning_rate": 2.0840663614167698e-06, "loss": 0.699, "step": 36910 }, { "epoch": 0.7930575245950938, "grad_norm": 0.5146899618296968, "learning_rate": 2.0799029850216424e-06, "loss": 0.6942, "step": 36920 }, { "epoch": 0.7932723289083645, "grad_norm": 0.5166243708864191, "learning_rate": 2.075743288683174e-06, "loss": 0.7027, "step": 36930 }, { "epoch": 0.7934871332216351, "grad_norm": 0.520103403645907, "learning_rate": 2.0715872743341613e-06, "loss": 0.6913, "step": 36940 }, { "epoch": 0.7937019375349057, "grad_norm": 0.5661241316056728, "learning_rate": 2.0674349439056884e-06, "loss": 0.6916, "step": 36950 }, { "epoch": 0.7939167418481763, "grad_norm": 0.5413841506028565, "learning_rate": 2.0632862993271264e-06, "loss": 0.6956, "step": 36960 }, { "epoch": 0.7941315461614469, "grad_norm": 0.5057462755052492, "learning_rate": 2.0591413425261364e-06, "loss": 0.6967, "step": 36970 }, { "epoch": 0.7943463504747176, "grad_norm": 0.5036998109773616, "learning_rate": 2.0550000754286603e-06, "loss": 0.6904, "step": 36980 }, { "epoch": 0.7945611547879882, "grad_norm": 0.5141790017851666, "learning_rate": 2.05086249995893e-06, "loss": 0.6862, "step": 36990 }, { "epoch": 0.7947759591012588, "grad_norm": 0.504848106422294, "learning_rate": 2.046728618039464e-06, "loss": 0.6813, "step": 37000 }, { "epoch": 0.7949907634145293, "grad_norm": 0.5069448860207141, "learning_rate": 2.0425984315910597e-06, "loss": 0.6896, "step": 37010 }, { "epoch": 0.7952055677277999, "grad_norm": 0.5179956584180097, "learning_rate": 2.0384719425328025e-06, "loss": 0.6923, "step": 37020 }, { "epoch": 0.7954203720410706, "grad_norm": 0.5137679574180213, "learning_rate": 2.0343491527820504e-06, "loss": 0.7031, "step": 37030 }, { "epoch": 0.7956351763543412, "grad_norm": 0.5076257848053785, "learning_rate": 2.0302300642544583e-06, "loss": 0.6845, "step": 37040 }, { "epoch": 0.7958499806676118, "grad_norm": 0.5259087825677118, "learning_rate": 2.0261146788639453e-06, "loss": 0.6858, "step": 37050 }, { "epoch": 0.7960647849808824, "grad_norm": 0.5244573164373156, "learning_rate": 2.0220029985227175e-06, "loss": 0.6967, "step": 37060 }, { "epoch": 0.796279589294153, "grad_norm": 0.5289311220896381, "learning_rate": 2.017895025141264e-06, "loss": 0.7002, "step": 37070 }, { "epoch": 0.7964943936074237, "grad_norm": 0.5141566850943272, "learning_rate": 2.013790760628336e-06, "loss": 0.694, "step": 37080 }, { "epoch": 0.7967091979206943, "grad_norm": 0.5130864502827068, "learning_rate": 2.009690206890984e-06, "loss": 0.7102, "step": 37090 }, { "epoch": 0.7969240022339649, "grad_norm": 0.5258889044125008, "learning_rate": 2.0055933658345094e-06, "loss": 0.7073, "step": 37100 }, { "epoch": 0.7971388065472355, "grad_norm": 0.5220405637403223, "learning_rate": 2.0015002393625114e-06, "loss": 0.6862, "step": 37110 }, { "epoch": 0.797353610860506, "grad_norm": 0.5143925147907012, "learning_rate": 1.997410829376847e-06, "loss": 0.6992, "step": 37120 }, { "epoch": 0.7975684151737766, "grad_norm": 0.501877470630008, "learning_rate": 1.993325137777652e-06, "loss": 0.6885, "step": 37130 }, { "epoch": 0.7977832194870473, "grad_norm": 0.5203927062197888, "learning_rate": 1.9892431664633393e-06, "loss": 0.697, "step": 37140 }, { "epoch": 0.7979980238003179, "grad_norm": 0.5025995904904371, "learning_rate": 1.9851649173305798e-06, "loss": 0.6942, "step": 37150 }, { "epoch": 0.7982128281135885, "grad_norm": 0.5191545346856352, "learning_rate": 1.981090392274333e-06, "loss": 0.6966, "step": 37160 }, { "epoch": 0.7984276324268591, "grad_norm": 0.5131619635945721, "learning_rate": 1.9770195931878123e-06, "loss": 0.6826, "step": 37170 }, { "epoch": 0.7986424367401297, "grad_norm": 0.5218712729260536, "learning_rate": 1.9729525219625077e-06, "loss": 0.6889, "step": 37180 }, { "epoch": 0.7988572410534004, "grad_norm": 0.5326896100885731, "learning_rate": 1.968889180488178e-06, "loss": 0.6956, "step": 37190 }, { "epoch": 0.799072045366671, "grad_norm": 0.5179996887681789, "learning_rate": 1.9648295706528385e-06, "loss": 0.691, "step": 37200 }, { "epoch": 0.7992868496799416, "grad_norm": 0.49638076183346225, "learning_rate": 1.96077369434279e-06, "loss": 0.6881, "step": 37210 }, { "epoch": 0.7995016539932122, "grad_norm": 0.5142154100208954, "learning_rate": 1.9567215534425777e-06, "loss": 0.6821, "step": 37220 }, { "epoch": 0.7997164583064827, "grad_norm": 0.5097975763047736, "learning_rate": 1.952673149835025e-06, "loss": 0.6934, "step": 37230 }, { "epoch": 0.7999312626197534, "grad_norm": 0.5188923865330697, "learning_rate": 1.948628485401215e-06, "loss": 0.7017, "step": 37240 }, { "epoch": 0.800146066933024, "grad_norm": 0.5393099873888024, "learning_rate": 1.9445875620204846e-06, "loss": 0.6985, "step": 37250 }, { "epoch": 0.8003608712462946, "grad_norm": 0.5270181645580231, "learning_rate": 1.940550381570453e-06, "loss": 0.7074, "step": 37260 }, { "epoch": 0.8005756755595652, "grad_norm": 0.49448978860517945, "learning_rate": 1.9365169459269763e-06, "loss": 0.704, "step": 37270 }, { "epoch": 0.8007904798728358, "grad_norm": 0.5090418918653451, "learning_rate": 1.932487256964191e-06, "loss": 0.6871, "step": 37280 }, { "epoch": 0.8010052841861065, "grad_norm": 0.5372331103704078, "learning_rate": 1.9284613165544776e-06, "loss": 0.7009, "step": 37290 }, { "epoch": 0.8012200884993771, "grad_norm": 0.5123892333670363, "learning_rate": 1.9244391265684836e-06, "loss": 0.6939, "step": 37300 }, { "epoch": 0.8014348928126477, "grad_norm": 0.4978925292624661, "learning_rate": 1.9204206888751133e-06, "loss": 0.6927, "step": 37310 }, { "epoch": 0.8016496971259183, "grad_norm": 0.4954993561307306, "learning_rate": 1.916406005341517e-06, "loss": 0.6832, "step": 37320 }, { "epoch": 0.8018645014391889, "grad_norm": 0.5248932509325274, "learning_rate": 1.9123950778331204e-06, "loss": 0.6889, "step": 37330 }, { "epoch": 0.8020793057524596, "grad_norm": 0.503520388001021, "learning_rate": 1.908387908213585e-06, "loss": 0.6941, "step": 37340 }, { "epoch": 0.8022941100657301, "grad_norm": 0.5217962284837745, "learning_rate": 1.9043844983448356e-06, "loss": 0.7014, "step": 37350 }, { "epoch": 0.8025089143790007, "grad_norm": 0.5172482221861939, "learning_rate": 1.9003848500870514e-06, "loss": 0.6816, "step": 37360 }, { "epoch": 0.8027237186922713, "grad_norm": 0.5171999415635867, "learning_rate": 1.8963889652986533e-06, "loss": 0.6918, "step": 37370 }, { "epoch": 0.8029385230055419, "grad_norm": 0.531206719078998, "learning_rate": 1.8923968458363307e-06, "loss": 0.6894, "step": 37380 }, { "epoch": 0.8031533273188126, "grad_norm": 0.509749112200991, "learning_rate": 1.8884084935550063e-06, "loss": 0.6937, "step": 37390 }, { "epoch": 0.8033681316320832, "grad_norm": 0.5234029461890899, "learning_rate": 1.884423910307861e-06, "loss": 0.7173, "step": 37400 }, { "epoch": 0.8035829359453538, "grad_norm": 0.520100769338295, "learning_rate": 1.880443097946325e-06, "loss": 0.6965, "step": 37410 }, { "epoch": 0.8037977402586244, "grad_norm": 0.5198521028897318, "learning_rate": 1.8764660583200733e-06, "loss": 0.6857, "step": 37420 }, { "epoch": 0.804012544571895, "grad_norm": 0.5170110623698808, "learning_rate": 1.872492793277032e-06, "loss": 0.6751, "step": 37430 }, { "epoch": 0.8042273488851656, "grad_norm": 0.5058623102726381, "learning_rate": 1.8685233046633655e-06, "loss": 0.6925, "step": 37440 }, { "epoch": 0.8044421531984363, "grad_norm": 0.5111088811413376, "learning_rate": 1.8645575943234906e-06, "loss": 0.6838, "step": 37450 }, { "epoch": 0.8046569575117068, "grad_norm": 0.5010466802240249, "learning_rate": 1.8605956641000678e-06, "loss": 0.6894, "step": 37460 }, { "epoch": 0.8048717618249774, "grad_norm": 0.5003570547858869, "learning_rate": 1.8566375158339977e-06, "loss": 0.6896, "step": 37470 }, { "epoch": 0.805086566138248, "grad_norm": 0.5067814043345279, "learning_rate": 1.852683151364426e-06, "loss": 0.6804, "step": 37480 }, { "epoch": 0.8053013704515186, "grad_norm": 0.5060047213750052, "learning_rate": 1.8487325725287419e-06, "loss": 0.7015, "step": 37490 }, { "epoch": 0.8055161747647893, "grad_norm": 0.5139506510936809, "learning_rate": 1.8447857811625747e-06, "loss": 0.6823, "step": 37500 }, { "epoch": 0.8057309790780599, "grad_norm": 0.4996768937608163, "learning_rate": 1.8408427790997873e-06, "loss": 0.6838, "step": 37510 }, { "epoch": 0.8059457833913305, "grad_norm": 0.5125914344469875, "learning_rate": 1.8369035681724912e-06, "loss": 0.6789, "step": 37520 }, { "epoch": 0.8061605877046011, "grad_norm": 0.5101593507880621, "learning_rate": 1.8329681502110308e-06, "loss": 0.7152, "step": 37530 }, { "epoch": 0.8063753920178717, "grad_norm": 0.5089159371492329, "learning_rate": 1.8290365270439926e-06, "loss": 0.6919, "step": 37540 }, { "epoch": 0.8065901963311424, "grad_norm": 0.5046175462761139, "learning_rate": 1.8251087004981972e-06, "loss": 0.6962, "step": 37550 }, { "epoch": 0.806805000644413, "grad_norm": 0.535765255941607, "learning_rate": 1.821184672398698e-06, "loss": 0.6824, "step": 37560 }, { "epoch": 0.8070198049576836, "grad_norm": 0.5057658285384766, "learning_rate": 1.817264444568787e-06, "loss": 0.7004, "step": 37570 }, { "epoch": 0.8072346092709541, "grad_norm": 0.5305219423002547, "learning_rate": 1.8133480188299913e-06, "loss": 0.6881, "step": 37580 }, { "epoch": 0.8074494135842247, "grad_norm": 0.5093435581220985, "learning_rate": 1.8094353970020705e-06, "loss": 0.7052, "step": 37590 }, { "epoch": 0.8076642178974954, "grad_norm": 0.5057006548211692, "learning_rate": 1.8055265809030142e-06, "loss": 0.6937, "step": 37600 }, { "epoch": 0.807879022210766, "grad_norm": 0.5062825370919449, "learning_rate": 1.8016215723490504e-06, "loss": 0.711, "step": 37610 }, { "epoch": 0.8080938265240366, "grad_norm": 0.5082475799530384, "learning_rate": 1.7977203731546266e-06, "loss": 0.7022, "step": 37620 }, { "epoch": 0.8083086308373072, "grad_norm": 0.5006515892573048, "learning_rate": 1.7938229851324308e-06, "loss": 0.6874, "step": 37630 }, { "epoch": 0.8085234351505778, "grad_norm": 0.5148519330223464, "learning_rate": 1.789929410093375e-06, "loss": 0.6914, "step": 37640 }, { "epoch": 0.8087382394638485, "grad_norm": 0.5026476503083747, "learning_rate": 1.7860396498466004e-06, "loss": 0.6916, "step": 37650 }, { "epoch": 0.8089530437771191, "grad_norm": 0.5054813405354616, "learning_rate": 1.782153706199481e-06, "loss": 0.6939, "step": 37660 }, { "epoch": 0.8091678480903897, "grad_norm": 0.5104786331574065, "learning_rate": 1.7782715809576023e-06, "loss": 0.708, "step": 37670 }, { "epoch": 0.8093826524036603, "grad_norm": 0.5104889216759052, "learning_rate": 1.7743932759247973e-06, "loss": 0.6941, "step": 37680 }, { "epoch": 0.8095974567169308, "grad_norm": 0.5301500050999222, "learning_rate": 1.7705187929031042e-06, "loss": 0.6935, "step": 37690 }, { "epoch": 0.8098122610302015, "grad_norm": 0.5008139461716258, "learning_rate": 1.7666481336927965e-06, "loss": 0.6966, "step": 37700 }, { "epoch": 0.8100270653434721, "grad_norm": 0.5292331018191944, "learning_rate": 1.7627813000923677e-06, "loss": 0.6869, "step": 37710 }, { "epoch": 0.8102418696567427, "grad_norm": 0.5214981520880394, "learning_rate": 1.7589182938985338e-06, "loss": 0.6881, "step": 37720 }, { "epoch": 0.8104566739700133, "grad_norm": 0.5162236641163164, "learning_rate": 1.755059116906236e-06, "loss": 0.6898, "step": 37730 }, { "epoch": 0.8106714782832839, "grad_norm": 0.5157714549239334, "learning_rate": 1.751203770908627e-06, "loss": 0.6875, "step": 37740 }, { "epoch": 0.8108862825965545, "grad_norm": 0.5107688828062216, "learning_rate": 1.7473522576970881e-06, "loss": 0.7055, "step": 37750 }, { "epoch": 0.8111010869098252, "grad_norm": 0.5467930765197646, "learning_rate": 1.743504579061216e-06, "loss": 0.6868, "step": 37760 }, { "epoch": 0.8113158912230958, "grad_norm": 0.5160466173487962, "learning_rate": 1.7396607367888286e-06, "loss": 0.6901, "step": 37770 }, { "epoch": 0.8115306955363664, "grad_norm": 0.5405614071125214, "learning_rate": 1.7358207326659604e-06, "loss": 0.689, "step": 37780 }, { "epoch": 0.811745499849637, "grad_norm": 0.5336778524063787, "learning_rate": 1.7319845684768533e-06, "loss": 0.6959, "step": 37790 }, { "epoch": 0.8119603041629075, "grad_norm": 0.5104195973429427, "learning_rate": 1.7281522460039845e-06, "loss": 0.6881, "step": 37800 }, { "epoch": 0.8121751084761782, "grad_norm": 0.5172156188555316, "learning_rate": 1.7243237670280267e-06, "loss": 0.6996, "step": 37810 }, { "epoch": 0.8123899127894488, "grad_norm": 0.5140221172801053, "learning_rate": 1.7204991333278776e-06, "loss": 0.7085, "step": 37820 }, { "epoch": 0.8126047171027194, "grad_norm": 0.5224661451049724, "learning_rate": 1.7166783466806458e-06, "loss": 0.7031, "step": 37830 }, { "epoch": 0.81281952141599, "grad_norm": 0.5110409285199197, "learning_rate": 1.712861408861647e-06, "loss": 0.7039, "step": 37840 }, { "epoch": 0.8130343257292606, "grad_norm": 0.5427771272170174, "learning_rate": 1.709048321644422e-06, "loss": 0.7081, "step": 37850 }, { "epoch": 0.8132491300425313, "grad_norm": 0.5132044659392205, "learning_rate": 1.705239086800704e-06, "loss": 0.7014, "step": 37860 }, { "epoch": 0.8134639343558019, "grad_norm": 0.5148999630994164, "learning_rate": 1.701433706100457e-06, "loss": 0.6973, "step": 37870 }, { "epoch": 0.8136787386690725, "grad_norm": 0.5079680548504395, "learning_rate": 1.6976321813118346e-06, "loss": 0.7022, "step": 37880 }, { "epoch": 0.8138935429823431, "grad_norm": 0.4960125156821642, "learning_rate": 1.6938345142012102e-06, "loss": 0.6785, "step": 37890 }, { "epoch": 0.8141083472956137, "grad_norm": 0.5272204610524729, "learning_rate": 1.6900407065331649e-06, "loss": 0.6939, "step": 37900 }, { "epoch": 0.8143231516088844, "grad_norm": 0.4848903289756121, "learning_rate": 1.6862507600704748e-06, "loss": 0.6998, "step": 37910 }, { "epoch": 0.8145379559221549, "grad_norm": 0.5052375107473448, "learning_rate": 1.6824646765741426e-06, "loss": 0.6828, "step": 37920 }, { "epoch": 0.8147527602354255, "grad_norm": 0.5116653542411183, "learning_rate": 1.6786824578033556e-06, "loss": 0.6925, "step": 37930 }, { "epoch": 0.8149675645486961, "grad_norm": 0.5191668583354877, "learning_rate": 1.6749041055155157e-06, "loss": 0.7069, "step": 37940 }, { "epoch": 0.8151823688619667, "grad_norm": 0.5216137206032925, "learning_rate": 1.6711296214662308e-06, "loss": 0.7014, "step": 37950 }, { "epoch": 0.8153971731752374, "grad_norm": 0.5212090406729156, "learning_rate": 1.6673590074092981e-06, "loss": 0.6915, "step": 37960 }, { "epoch": 0.815611977488508, "grad_norm": 0.5280444927816935, "learning_rate": 1.6635922650967363e-06, "loss": 0.7045, "step": 37970 }, { "epoch": 0.8158267818017786, "grad_norm": 0.5302776730833281, "learning_rate": 1.659829396278746e-06, "loss": 0.698, "step": 37980 }, { "epoch": 0.8160415861150492, "grad_norm": 0.5194339368072196, "learning_rate": 1.65607040270374e-06, "loss": 0.6844, "step": 37990 }, { "epoch": 0.8162563904283198, "grad_norm": 0.5268789032071284, "learning_rate": 1.6523152861183288e-06, "loss": 0.6933, "step": 38000 }, { "epoch": 0.8164711947415904, "grad_norm": 0.5034790229387067, "learning_rate": 1.6485640482673126e-06, "loss": 0.7029, "step": 38010 }, { "epoch": 0.8166859990548611, "grad_norm": 0.5238642863385727, "learning_rate": 1.6448166908937046e-06, "loss": 0.6984, "step": 38020 }, { "epoch": 0.8169008033681316, "grad_norm": 0.5384949344125787, "learning_rate": 1.641073215738702e-06, "loss": 0.6989, "step": 38030 }, { "epoch": 0.8171156076814022, "grad_norm": 0.515676479439265, "learning_rate": 1.6373336245417025e-06, "loss": 0.6967, "step": 38040 }, { "epoch": 0.8173304119946728, "grad_norm": 0.5176807526555749, "learning_rate": 1.6335979190403006e-06, "loss": 0.6783, "step": 38050 }, { "epoch": 0.8175452163079434, "grad_norm": 0.5006394103833677, "learning_rate": 1.6298661009702855e-06, "loss": 0.6995, "step": 38060 }, { "epoch": 0.8177600206212141, "grad_norm": 0.49950489053942465, "learning_rate": 1.6261381720656378e-06, "loss": 0.6855, "step": 38070 }, { "epoch": 0.8179748249344847, "grad_norm": 0.5114816231641758, "learning_rate": 1.6224141340585276e-06, "loss": 0.7026, "step": 38080 }, { "epoch": 0.8181896292477553, "grad_norm": 0.5242198840868686, "learning_rate": 1.6186939886793307e-06, "loss": 0.6848, "step": 38090 }, { "epoch": 0.8184044335610259, "grad_norm": 0.5396691916322732, "learning_rate": 1.6149777376565967e-06, "loss": 0.689, "step": 38100 }, { "epoch": 0.8186192378742965, "grad_norm": 0.5157018020298604, "learning_rate": 1.6112653827170754e-06, "loss": 0.6761, "step": 38110 }, { "epoch": 0.8188340421875672, "grad_norm": 0.5249870157381407, "learning_rate": 1.6075569255857104e-06, "loss": 0.6992, "step": 38120 }, { "epoch": 0.8190488465008378, "grad_norm": 0.5053189312234057, "learning_rate": 1.6038523679856189e-06, "loss": 0.6875, "step": 38130 }, { "epoch": 0.8192636508141083, "grad_norm": 0.5293662990085333, "learning_rate": 1.600151711638126e-06, "loss": 0.6911, "step": 38140 }, { "epoch": 0.8194784551273789, "grad_norm": 0.5048218038817608, "learning_rate": 1.5964549582627276e-06, "loss": 0.6813, "step": 38150 }, { "epoch": 0.8196932594406495, "grad_norm": 0.5288004984283555, "learning_rate": 1.592762109577114e-06, "loss": 0.6959, "step": 38160 }, { "epoch": 0.8199080637539202, "grad_norm": 0.5294144163040986, "learning_rate": 1.5890731672971594e-06, "loss": 0.7005, "step": 38170 }, { "epoch": 0.8201228680671908, "grad_norm": 0.5143583114789181, "learning_rate": 1.5853881331369247e-06, "loss": 0.6998, "step": 38180 }, { "epoch": 0.8203376723804614, "grad_norm": 0.5009264407402928, "learning_rate": 1.581707008808655e-06, "loss": 0.6915, "step": 38190 }, { "epoch": 0.820552476693732, "grad_norm": 0.4988425919957875, "learning_rate": 1.5780297960227708e-06, "loss": 0.6996, "step": 38200 }, { "epoch": 0.8207672810070026, "grad_norm": 0.5001446365022995, "learning_rate": 1.5743564964878866e-06, "loss": 0.7043, "step": 38210 }, { "epoch": 0.8209820853202733, "grad_norm": 0.5238040005094907, "learning_rate": 1.5706871119107914e-06, "loss": 0.6975, "step": 38220 }, { "epoch": 0.8211968896335439, "grad_norm": 0.49772899954971367, "learning_rate": 1.567021643996458e-06, "loss": 0.69, "step": 38230 }, { "epoch": 0.8214116939468145, "grad_norm": 0.5226931397210955, "learning_rate": 1.5633600944480377e-06, "loss": 0.7145, "step": 38240 }, { "epoch": 0.821626498260085, "grad_norm": 0.5111897161908127, "learning_rate": 1.5597024649668645e-06, "loss": 0.6868, "step": 38250 }, { "epoch": 0.8218413025733556, "grad_norm": 0.5194930113430709, "learning_rate": 1.5560487572524452e-06, "loss": 0.694, "step": 38260 }, { "epoch": 0.8220561068866263, "grad_norm": 0.5122991460889587, "learning_rate": 1.552398973002467e-06, "loss": 0.6853, "step": 38270 }, { "epoch": 0.8222709111998969, "grad_norm": 0.5144915325254265, "learning_rate": 1.5487531139127976e-06, "loss": 0.6981, "step": 38280 }, { "epoch": 0.8224857155131675, "grad_norm": 0.5099897016133259, "learning_rate": 1.5451111816774756e-06, "loss": 0.7032, "step": 38290 }, { "epoch": 0.8227005198264381, "grad_norm": 0.5162555967078819, "learning_rate": 1.5414731779887182e-06, "loss": 0.6842, "step": 38300 }, { "epoch": 0.8229153241397087, "grad_norm": 0.5338368411100364, "learning_rate": 1.537839104536918e-06, "loss": 0.688, "step": 38310 }, { "epoch": 0.8231301284529793, "grad_norm": 0.5112979366014295, "learning_rate": 1.534208963010636e-06, "loss": 0.6911, "step": 38320 }, { "epoch": 0.82334493276625, "grad_norm": 0.5087777288883957, "learning_rate": 1.5305827550966113e-06, "loss": 0.6872, "step": 38330 }, { "epoch": 0.8235597370795206, "grad_norm": 0.49160735113437615, "learning_rate": 1.5269604824797556e-06, "loss": 0.6877, "step": 38340 }, { "epoch": 0.8237745413927912, "grad_norm": 0.5281039504251113, "learning_rate": 1.5233421468431475e-06, "loss": 0.6916, "step": 38350 }, { "epoch": 0.8239893457060617, "grad_norm": 0.49685966763451844, "learning_rate": 1.519727749868042e-06, "loss": 0.6868, "step": 38360 }, { "epoch": 0.8242041500193323, "grad_norm": 0.5292477798818466, "learning_rate": 1.516117293233862e-06, "loss": 0.6813, "step": 38370 }, { "epoch": 0.824418954332603, "grad_norm": 0.4922303762705952, "learning_rate": 1.5125107786181948e-06, "loss": 0.6939, "step": 38380 }, { "epoch": 0.8246337586458736, "grad_norm": 0.5098238815457077, "learning_rate": 1.508908207696801e-06, "loss": 0.7038, "step": 38390 }, { "epoch": 0.8248485629591442, "grad_norm": 0.5204133525606097, "learning_rate": 1.5053095821436092e-06, "loss": 0.6912, "step": 38400 }, { "epoch": 0.8250633672724148, "grad_norm": 0.5110478312144826, "learning_rate": 1.501714903630712e-06, "loss": 0.6935, "step": 38410 }, { "epoch": 0.8252781715856854, "grad_norm": 0.4977353499043285, "learning_rate": 1.4981241738283724e-06, "loss": 0.6926, "step": 38420 }, { "epoch": 0.8254929758989561, "grad_norm": 0.49726992306043954, "learning_rate": 1.494537394405008e-06, "loss": 0.6898, "step": 38430 }, { "epoch": 0.8257077802122267, "grad_norm": 0.5192905390752384, "learning_rate": 1.4909545670272186e-06, "loss": 0.6868, "step": 38440 }, { "epoch": 0.8259225845254973, "grad_norm": 0.5050774765754228, "learning_rate": 1.4873756933597505e-06, "loss": 0.6783, "step": 38450 }, { "epoch": 0.8261373888387679, "grad_norm": 0.5182788128937821, "learning_rate": 1.4838007750655214e-06, "loss": 0.6937, "step": 38460 }, { "epoch": 0.8263521931520385, "grad_norm": 0.5075379442809032, "learning_rate": 1.4802298138056104e-06, "loss": 0.6824, "step": 38470 }, { "epoch": 0.8265669974653092, "grad_norm": 0.5149603108095413, "learning_rate": 1.4766628112392566e-06, "loss": 0.7117, "step": 38480 }, { "epoch": 0.8267818017785797, "grad_norm": 0.5170003208350263, "learning_rate": 1.4730997690238635e-06, "loss": 0.6859, "step": 38490 }, { "epoch": 0.8269966060918503, "grad_norm": 0.5188232774350414, "learning_rate": 1.4695406888149832e-06, "loss": 0.6959, "step": 38500 }, { "epoch": 0.8272114104051209, "grad_norm": 0.6124746424283791, "learning_rate": 1.4659855722663453e-06, "loss": 0.6881, "step": 38510 }, { "epoch": 0.8274262147183915, "grad_norm": 0.5820473251466972, "learning_rate": 1.4624344210298202e-06, "loss": 0.7056, "step": 38520 }, { "epoch": 0.8276410190316622, "grad_norm": 0.5487848245547172, "learning_rate": 1.4588872367554452e-06, "loss": 0.6813, "step": 38530 }, { "epoch": 0.8278558233449328, "grad_norm": 0.5052583706726699, "learning_rate": 1.455344021091414e-06, "loss": 0.702, "step": 38540 }, { "epoch": 0.8280706276582034, "grad_norm": 0.5449975100876051, "learning_rate": 1.451804775684067e-06, "loss": 0.6823, "step": 38550 }, { "epoch": 0.828285431971474, "grad_norm": 0.5086547663242421, "learning_rate": 1.448269502177918e-06, "loss": 0.6842, "step": 38560 }, { "epoch": 0.8285002362847446, "grad_norm": 0.5137832027200622, "learning_rate": 1.4447382022156165e-06, "loss": 0.687, "step": 38570 }, { "epoch": 0.8287150405980153, "grad_norm": 0.49746601253508876, "learning_rate": 1.4412108774379775e-06, "loss": 0.6818, "step": 38580 }, { "epoch": 0.8289298449112859, "grad_norm": 0.5103152319908182, "learning_rate": 1.4376875294839643e-06, "loss": 0.6914, "step": 38590 }, { "epoch": 0.8291446492245564, "grad_norm": 0.5275562256088486, "learning_rate": 1.4341681599906897e-06, "loss": 0.6901, "step": 38600 }, { "epoch": 0.829359453537827, "grad_norm": 0.5062231253926271, "learning_rate": 1.4306527705934293e-06, "loss": 0.6869, "step": 38610 }, { "epoch": 0.8295742578510976, "grad_norm": 0.5338108634050998, "learning_rate": 1.4271413629255937e-06, "loss": 0.6922, "step": 38620 }, { "epoch": 0.8297890621643682, "grad_norm": 0.5016771702455557, "learning_rate": 1.4236339386187547e-06, "loss": 0.6893, "step": 38630 }, { "epoch": 0.8300038664776389, "grad_norm": 0.5191984822566196, "learning_rate": 1.4201304993026321e-06, "loss": 0.6925, "step": 38640 }, { "epoch": 0.8302186707909095, "grad_norm": 0.5207586260457043, "learning_rate": 1.4166310466050836e-06, "loss": 0.6847, "step": 38650 }, { "epoch": 0.8304334751041801, "grad_norm": 0.5139953998878591, "learning_rate": 1.413135582152133e-06, "loss": 0.685, "step": 38660 }, { "epoch": 0.8306482794174507, "grad_norm": 0.5263182428133765, "learning_rate": 1.4096441075679325e-06, "loss": 0.6863, "step": 38670 }, { "epoch": 0.8308630837307213, "grad_norm": 0.5282654822309718, "learning_rate": 1.4061566244747937e-06, "loss": 0.6957, "step": 38680 }, { "epoch": 0.831077888043992, "grad_norm": 0.5209258215224781, "learning_rate": 1.4026731344931655e-06, "loss": 0.6956, "step": 38690 }, { "epoch": 0.8312926923572626, "grad_norm": 0.5106724966574794, "learning_rate": 1.399193639241645e-06, "loss": 0.684, "step": 38700 }, { "epoch": 0.8315074966705331, "grad_norm": 0.5033855375018464, "learning_rate": 1.3957181403369747e-06, "loss": 0.6867, "step": 38710 }, { "epoch": 0.8317223009838037, "grad_norm": 0.5085681808640763, "learning_rate": 1.3922466393940315e-06, "loss": 0.6937, "step": 38720 }, { "epoch": 0.8319371052970743, "grad_norm": 0.5034486647222803, "learning_rate": 1.3887791380258509e-06, "loss": 0.7001, "step": 38730 }, { "epoch": 0.832151909610345, "grad_norm": 0.5080003477917522, "learning_rate": 1.3853156378435916e-06, "loss": 0.6964, "step": 38740 }, { "epoch": 0.8323667139236156, "grad_norm": 0.5111977023138986, "learning_rate": 1.3818561404565645e-06, "loss": 0.6881, "step": 38750 }, { "epoch": 0.8325815182368862, "grad_norm": 0.5061875410904043, "learning_rate": 1.3784006474722212e-06, "loss": 0.6964, "step": 38760 }, { "epoch": 0.8327963225501568, "grad_norm": 0.5333922720806704, "learning_rate": 1.374949160496143e-06, "loss": 0.6946, "step": 38770 }, { "epoch": 0.8330111268634274, "grad_norm": 0.5285837150812875, "learning_rate": 1.3715016811320626e-06, "loss": 0.6896, "step": 38780 }, { "epoch": 0.8332259311766981, "grad_norm": 0.519609938847976, "learning_rate": 1.3680582109818396e-06, "loss": 0.6826, "step": 38790 }, { "epoch": 0.8334407354899687, "grad_norm": 0.5263908151996978, "learning_rate": 1.3646187516454778e-06, "loss": 0.6948, "step": 38800 }, { "epoch": 0.8336555398032393, "grad_norm": 0.50222280387675, "learning_rate": 1.3611833047211132e-06, "loss": 0.6938, "step": 38810 }, { "epoch": 0.8338703441165098, "grad_norm": 0.5295682152493145, "learning_rate": 1.3577518718050199e-06, "loss": 0.6847, "step": 38820 }, { "epoch": 0.8340851484297804, "grad_norm": 0.526283044923053, "learning_rate": 1.3543244544916078e-06, "loss": 0.6799, "step": 38830 }, { "epoch": 0.8342999527430511, "grad_norm": 0.521205682856861, "learning_rate": 1.3509010543734158e-06, "loss": 0.7013, "step": 38840 }, { "epoch": 0.8345147570563217, "grad_norm": 0.499623044668992, "learning_rate": 1.347481673041121e-06, "loss": 0.6916, "step": 38850 }, { "epoch": 0.8347295613695923, "grad_norm": 0.49936320967671904, "learning_rate": 1.3440663120835329e-06, "loss": 0.6938, "step": 38860 }, { "epoch": 0.8349443656828629, "grad_norm": 0.5293633851844111, "learning_rate": 1.3406549730875916e-06, "loss": 0.6836, "step": 38870 }, { "epoch": 0.8351591699961335, "grad_norm": 0.5168353636514088, "learning_rate": 1.3372476576383675e-06, "loss": 0.6904, "step": 38880 }, { "epoch": 0.8353739743094042, "grad_norm": 0.49422042008773903, "learning_rate": 1.3338443673190637e-06, "loss": 0.695, "step": 38890 }, { "epoch": 0.8355887786226748, "grad_norm": 0.5056409802616189, "learning_rate": 1.3304451037110132e-06, "loss": 0.6968, "step": 38900 }, { "epoch": 0.8358035829359454, "grad_norm": 0.517589863459402, "learning_rate": 1.3270498683936738e-06, "loss": 0.69, "step": 38910 }, { "epoch": 0.836018387249216, "grad_norm": 0.5214346220274612, "learning_rate": 1.3236586629446369e-06, "loss": 0.6914, "step": 38920 }, { "epoch": 0.8362331915624865, "grad_norm": 0.5087168684935114, "learning_rate": 1.3202714889396172e-06, "loss": 0.6795, "step": 38930 }, { "epoch": 0.8364479958757571, "grad_norm": 0.5205133962298867, "learning_rate": 1.3168883479524598e-06, "loss": 0.6909, "step": 38940 }, { "epoch": 0.8366628001890278, "grad_norm": 0.5318929561552359, "learning_rate": 1.3135092415551354e-06, "loss": 0.7047, "step": 38950 }, { "epoch": 0.8368776045022984, "grad_norm": 0.5028926526152647, "learning_rate": 1.3101341713177351e-06, "loss": 0.6878, "step": 38960 }, { "epoch": 0.837092408815569, "grad_norm": 0.5277421426222694, "learning_rate": 1.306763138808479e-06, "loss": 0.6932, "step": 38970 }, { "epoch": 0.8373072131288396, "grad_norm": 0.4805738830452809, "learning_rate": 1.3033961455937127e-06, "loss": 0.6822, "step": 38980 }, { "epoch": 0.8375220174421102, "grad_norm": 0.5108469134488737, "learning_rate": 1.3000331932379007e-06, "loss": 0.6893, "step": 38990 }, { "epoch": 0.8377368217553809, "grad_norm": 0.5201560237520049, "learning_rate": 1.2966742833036327e-06, "loss": 0.6857, "step": 39000 }, { "epoch": 0.8379516260686515, "grad_norm": 0.5188163877062854, "learning_rate": 1.2933194173516218e-06, "loss": 0.7, "step": 39010 }, { "epoch": 0.8381664303819221, "grad_norm": 0.5093643945181999, "learning_rate": 1.289968596940694e-06, "loss": 0.7081, "step": 39020 }, { "epoch": 0.8383812346951927, "grad_norm": 0.5203767610471154, "learning_rate": 1.2866218236278038e-06, "loss": 0.6819, "step": 39030 }, { "epoch": 0.8385960390084632, "grad_norm": 0.5184824000269601, "learning_rate": 1.2832790989680221e-06, "loss": 0.6855, "step": 39040 }, { "epoch": 0.838810843321734, "grad_norm": 0.5019301811954551, "learning_rate": 1.2799404245145396e-06, "loss": 0.6947, "step": 39050 }, { "epoch": 0.8390256476350045, "grad_norm": 0.5346206336302403, "learning_rate": 1.2766058018186645e-06, "loss": 0.703, "step": 39060 }, { "epoch": 0.8392404519482751, "grad_norm": 0.5227834077619942, "learning_rate": 1.2732752324298225e-06, "loss": 0.6945, "step": 39070 }, { "epoch": 0.8394552562615457, "grad_norm": 0.5216423504327496, "learning_rate": 1.2699487178955573e-06, "loss": 0.6927, "step": 39080 }, { "epoch": 0.8396700605748163, "grad_norm": 0.5446631129970325, "learning_rate": 1.266626259761523e-06, "loss": 0.6824, "step": 39090 }, { "epoch": 0.839884864888087, "grad_norm": 0.49940235609166955, "learning_rate": 1.2633078595714943e-06, "loss": 0.7022, "step": 39100 }, { "epoch": 0.8400996692013576, "grad_norm": 0.5079994069706872, "learning_rate": 1.2599935188673596e-06, "loss": 0.6923, "step": 39110 }, { "epoch": 0.8403144735146282, "grad_norm": 0.5057473773527276, "learning_rate": 1.2566832391891215e-06, "loss": 0.7097, "step": 39120 }, { "epoch": 0.8405292778278988, "grad_norm": 0.5100763490334336, "learning_rate": 1.2533770220748942e-06, "loss": 0.679, "step": 39130 }, { "epoch": 0.8407440821411694, "grad_norm": 0.5003721745532178, "learning_rate": 1.2500748690609033e-06, "loss": 0.699, "step": 39140 }, { "epoch": 0.8409588864544401, "grad_norm": 0.5137872175833427, "learning_rate": 1.246776781681487e-06, "loss": 0.6928, "step": 39150 }, { "epoch": 0.8411736907677106, "grad_norm": 0.497587322672259, "learning_rate": 1.2434827614690958e-06, "loss": 0.6988, "step": 39160 }, { "epoch": 0.8413884950809812, "grad_norm": 0.5202834990746827, "learning_rate": 1.2401928099542892e-06, "loss": 0.6861, "step": 39170 }, { "epoch": 0.8416032993942518, "grad_norm": 0.5189296596390629, "learning_rate": 1.2369069286657388e-06, "loss": 0.6805, "step": 39180 }, { "epoch": 0.8418181037075224, "grad_norm": 0.5141868855723348, "learning_rate": 1.2336251191302162e-06, "loss": 0.6897, "step": 39190 }, { "epoch": 0.842032908020793, "grad_norm": 0.5124494606085882, "learning_rate": 1.2303473828726153e-06, "loss": 0.6957, "step": 39200 }, { "epoch": 0.8422477123340637, "grad_norm": 0.5214072352573822, "learning_rate": 1.2270737214159245e-06, "loss": 0.6796, "step": 39210 }, { "epoch": 0.8424625166473343, "grad_norm": 0.528792736816499, "learning_rate": 1.2238041362812435e-06, "loss": 0.6899, "step": 39220 }, { "epoch": 0.8426773209606049, "grad_norm": 0.5093238296040209, "learning_rate": 1.2205386289877829e-06, "loss": 0.6911, "step": 39230 }, { "epoch": 0.8428921252738755, "grad_norm": 0.4978808338740638, "learning_rate": 1.2172772010528455e-06, "loss": 0.6933, "step": 39240 }, { "epoch": 0.8431069295871461, "grad_norm": 0.5325179259250296, "learning_rate": 1.2140198539918568e-06, "loss": 0.6933, "step": 39250 }, { "epoch": 0.8433217339004168, "grad_norm": 0.5239028106635986, "learning_rate": 1.2107665893183274e-06, "loss": 0.6845, "step": 39260 }, { "epoch": 0.8435365382136873, "grad_norm": 0.5206218974407494, "learning_rate": 1.2075174085438878e-06, "loss": 0.704, "step": 39270 }, { "epoch": 0.8437513425269579, "grad_norm": 0.5234721000920526, "learning_rate": 1.2042723131782586e-06, "loss": 0.6886, "step": 39280 }, { "epoch": 0.8439661468402285, "grad_norm": 0.5166325698154588, "learning_rate": 1.2010313047292677e-06, "loss": 0.6921, "step": 39290 }, { "epoch": 0.8441809511534991, "grad_norm": 0.5013497589587982, "learning_rate": 1.197794384702844e-06, "loss": 0.6839, "step": 39300 }, { "epoch": 0.8443957554667698, "grad_norm": 0.5392745947364839, "learning_rate": 1.1945615546030109e-06, "loss": 0.6848, "step": 39310 }, { "epoch": 0.8446105597800404, "grad_norm": 0.5083861122426512, "learning_rate": 1.1913328159319049e-06, "loss": 0.6914, "step": 39320 }, { "epoch": 0.844825364093311, "grad_norm": 0.49717604044905356, "learning_rate": 1.1881081701897446e-06, "loss": 0.6882, "step": 39330 }, { "epoch": 0.8450401684065816, "grad_norm": 0.5273892484603656, "learning_rate": 1.1848876188748583e-06, "loss": 0.7001, "step": 39340 }, { "epoch": 0.8452549727198522, "grad_norm": 0.5274494392930585, "learning_rate": 1.1816711634836708e-06, "loss": 0.6842, "step": 39350 }, { "epoch": 0.8454697770331229, "grad_norm": 0.5550754818367603, "learning_rate": 1.1784588055106937e-06, "loss": 0.6957, "step": 39360 }, { "epoch": 0.8456845813463935, "grad_norm": 0.5093805942815454, "learning_rate": 1.175250546448553e-06, "loss": 0.69, "step": 39370 }, { "epoch": 0.845899385659664, "grad_norm": 0.5291605321484583, "learning_rate": 1.1720463877879518e-06, "loss": 0.6954, "step": 39380 }, { "epoch": 0.8461141899729346, "grad_norm": 0.5213523166182829, "learning_rate": 1.1688463310176979e-06, "loss": 0.7063, "step": 39390 }, { "epoch": 0.8463289942862052, "grad_norm": 0.5043482415928106, "learning_rate": 1.1656503776246942e-06, "loss": 0.6896, "step": 39400 }, { "epoch": 0.8465437985994759, "grad_norm": 0.5340179003407842, "learning_rate": 1.1624585290939261e-06, "loss": 0.6895, "step": 39410 }, { "epoch": 0.8467586029127465, "grad_norm": 0.5334819112678492, "learning_rate": 1.1592707869084896e-06, "loss": 0.6925, "step": 39420 }, { "epoch": 0.8469734072260171, "grad_norm": 0.5196334748388658, "learning_rate": 1.156087152549552e-06, "loss": 0.6972, "step": 39430 }, { "epoch": 0.8471882115392877, "grad_norm": 0.5212173781531991, "learning_rate": 1.1529076274963925e-06, "loss": 0.6837, "step": 39440 }, { "epoch": 0.8474030158525583, "grad_norm": 0.5271644084715066, "learning_rate": 1.1497322132263643e-06, "loss": 0.6781, "step": 39450 }, { "epoch": 0.847617820165829, "grad_norm": 0.5251522649812483, "learning_rate": 1.1465609112149178e-06, "loss": 0.6966, "step": 39460 }, { "epoch": 0.8478326244790996, "grad_norm": 0.5028358197340315, "learning_rate": 1.1433937229355951e-06, "loss": 0.682, "step": 39470 }, { "epoch": 0.8480474287923702, "grad_norm": 0.5260187617540428, "learning_rate": 1.140230649860018e-06, "loss": 0.7039, "step": 39480 }, { "epoch": 0.8482622331056408, "grad_norm": 0.49354427489124325, "learning_rate": 1.1370716934579086e-06, "loss": 0.6806, "step": 39490 }, { "epoch": 0.8484770374189113, "grad_norm": 0.5353136217093745, "learning_rate": 1.1339168551970647e-06, "loss": 0.7164, "step": 39500 }, { "epoch": 0.8486918417321819, "grad_norm": 0.5172265496588412, "learning_rate": 1.1307661365433763e-06, "loss": 0.6857, "step": 39510 }, { "epoch": 0.8489066460454526, "grad_norm": 0.5240872817688168, "learning_rate": 1.1276195389608201e-06, "loss": 0.6842, "step": 39520 }, { "epoch": 0.8491214503587232, "grad_norm": 0.5120247735049234, "learning_rate": 1.1244770639114522e-06, "loss": 0.7057, "step": 39530 }, { "epoch": 0.8493362546719938, "grad_norm": 0.49496102620753335, "learning_rate": 1.1213387128554232e-06, "loss": 0.6822, "step": 39540 }, { "epoch": 0.8495510589852644, "grad_norm": 0.5000858443900138, "learning_rate": 1.1182044872509556e-06, "loss": 0.6881, "step": 39550 }, { "epoch": 0.849765863298535, "grad_norm": 0.5040809516882646, "learning_rate": 1.1150743885543636e-06, "loss": 0.6947, "step": 39560 }, { "epoch": 0.8499806676118057, "grad_norm": 0.5420538811255085, "learning_rate": 1.11194841822004e-06, "loss": 0.6772, "step": 39570 }, { "epoch": 0.8501954719250763, "grad_norm": 0.504747244903184, "learning_rate": 1.1088265777004613e-06, "loss": 0.6809, "step": 39580 }, { "epoch": 0.8504102762383469, "grad_norm": 0.49931071427553786, "learning_rate": 1.105708868446187e-06, "loss": 0.6829, "step": 39590 }, { "epoch": 0.8506250805516175, "grad_norm": 0.5090934536047, "learning_rate": 1.102595291905848e-06, "loss": 0.694, "step": 39600 }, { "epoch": 0.850839884864888, "grad_norm": 0.4994127034268232, "learning_rate": 1.099485849526165e-06, "loss": 0.6967, "step": 39610 }, { "epoch": 0.8510546891781587, "grad_norm": 0.5202678470784433, "learning_rate": 1.0963805427519335e-06, "loss": 0.6893, "step": 39620 }, { "epoch": 0.8512694934914293, "grad_norm": 0.5007238242310413, "learning_rate": 1.0932793730260272e-06, "loss": 0.6945, "step": 39630 }, { "epoch": 0.8514842978046999, "grad_norm": 0.5208651575984488, "learning_rate": 1.0901823417893997e-06, "loss": 0.6903, "step": 39640 }, { "epoch": 0.8516991021179705, "grad_norm": 0.511764861180484, "learning_rate": 1.087089450481078e-06, "loss": 0.6934, "step": 39650 }, { "epoch": 0.8519139064312411, "grad_norm": 0.5194391424779614, "learning_rate": 1.0840007005381714e-06, "loss": 0.6772, "step": 39660 }, { "epoch": 0.8521287107445118, "grad_norm": 0.5000894132679686, "learning_rate": 1.0809160933958574e-06, "loss": 0.6918, "step": 39670 }, { "epoch": 0.8523435150577824, "grad_norm": 0.5349623963637503, "learning_rate": 1.0778356304873927e-06, "loss": 0.6931, "step": 39680 }, { "epoch": 0.852558319371053, "grad_norm": 0.5127707494353972, "learning_rate": 1.074759313244108e-06, "loss": 0.6858, "step": 39690 }, { "epoch": 0.8527731236843236, "grad_norm": 0.5050351323720006, "learning_rate": 1.07168714309541e-06, "loss": 0.6983, "step": 39700 }, { "epoch": 0.8529879279975942, "grad_norm": 0.5042118701302163, "learning_rate": 1.0686191214687758e-06, "loss": 0.6907, "step": 39710 }, { "epoch": 0.8532027323108649, "grad_norm": 0.5319983433434204, "learning_rate": 1.0655552497897525e-06, "loss": 0.6797, "step": 39720 }, { "epoch": 0.8534175366241354, "grad_norm": 0.5126038269348526, "learning_rate": 1.0624955294819628e-06, "loss": 0.6798, "step": 39730 }, { "epoch": 0.853632340937406, "grad_norm": 0.5005716309497225, "learning_rate": 1.0594399619671013e-06, "loss": 0.6933, "step": 39740 }, { "epoch": 0.8538471452506766, "grad_norm": 0.5167440374916815, "learning_rate": 1.0563885486649284e-06, "loss": 0.6905, "step": 39750 }, { "epoch": 0.8540619495639472, "grad_norm": 0.504354990255589, "learning_rate": 1.0533412909932793e-06, "loss": 0.6799, "step": 39760 }, { "epoch": 0.8542767538772179, "grad_norm": 0.5143195869144642, "learning_rate": 1.0502981903680565e-06, "loss": 0.6833, "step": 39770 }, { "epoch": 0.8544915581904885, "grad_norm": 0.49947041857768637, "learning_rate": 1.0472592482032272e-06, "loss": 0.6746, "step": 39780 }, { "epoch": 0.8547063625037591, "grad_norm": 0.5217962095232429, "learning_rate": 1.0442244659108325e-06, "loss": 0.7035, "step": 39790 }, { "epoch": 0.8549211668170297, "grad_norm": 0.5096704985368335, "learning_rate": 1.0411938449009752e-06, "loss": 0.6825, "step": 39800 }, { "epoch": 0.8551359711303003, "grad_norm": 0.5135857160178005, "learning_rate": 1.0381673865818288e-06, "loss": 0.6831, "step": 39810 }, { "epoch": 0.8553507754435709, "grad_norm": 0.522664943240014, "learning_rate": 1.0351450923596317e-06, "loss": 0.6931, "step": 39820 }, { "epoch": 0.8555655797568416, "grad_norm": 0.49812520742428695, "learning_rate": 1.0321269636386833e-06, "loss": 0.6881, "step": 39830 }, { "epoch": 0.8557803840701121, "grad_norm": 0.5120365797945067, "learning_rate": 1.0291130018213558e-06, "loss": 0.6995, "step": 39840 }, { "epoch": 0.8559951883833827, "grad_norm": 0.5392653302148872, "learning_rate": 1.0261032083080746e-06, "loss": 0.6926, "step": 39850 }, { "epoch": 0.8562099926966533, "grad_norm": 1.451337023254773, "learning_rate": 1.023097584497339e-06, "loss": 0.6877, "step": 39860 }, { "epoch": 0.8564247970099239, "grad_norm": 0.5476743766148364, "learning_rate": 1.0200961317857027e-06, "loss": 0.6825, "step": 39870 }, { "epoch": 0.8566396013231946, "grad_norm": 0.5313081010660222, "learning_rate": 1.017098851567785e-06, "loss": 0.7, "step": 39880 }, { "epoch": 0.8568544056364652, "grad_norm": 0.5362672147803951, "learning_rate": 1.014105745236269e-06, "loss": 0.6909, "step": 39890 }, { "epoch": 0.8570692099497358, "grad_norm": 0.5217352952005082, "learning_rate": 1.0111168141818916e-06, "loss": 0.6921, "step": 39900 }, { "epoch": 0.8572840142630064, "grad_norm": 0.5111751841927796, "learning_rate": 1.0081320597934542e-06, "loss": 0.6819, "step": 39910 }, { "epoch": 0.857498818576277, "grad_norm": 0.4993174113029642, "learning_rate": 1.0051514834578169e-06, "loss": 0.6958, "step": 39920 }, { "epoch": 0.8577136228895477, "grad_norm": 0.5053368624600403, "learning_rate": 1.002175086559899e-06, "loss": 0.6881, "step": 39930 }, { "epoch": 0.8579284272028183, "grad_norm": 0.5130032786788024, "learning_rate": 9.992028704826785e-07, "loss": 0.7007, "step": 39940 }, { "epoch": 0.8581432315160888, "grad_norm": 0.5157365810431223, "learning_rate": 9.96234836607184e-07, "loss": 0.6744, "step": 39950 }, { "epoch": 0.8583580358293594, "grad_norm": 0.527685117613382, "learning_rate": 9.932709863125133e-07, "loss": 0.6949, "step": 39960 }, { "epoch": 0.85857284014263, "grad_norm": 0.512839755051422, "learning_rate": 9.903113209758098e-07, "loss": 0.6948, "step": 39970 }, { "epoch": 0.8587876444559007, "grad_norm": 0.5211080071086338, "learning_rate": 9.87355841972274e-07, "loss": 0.71, "step": 39980 }, { "epoch": 0.8590024487691713, "grad_norm": 0.5098392488580249, "learning_rate": 9.844045506751687e-07, "loss": 0.6889, "step": 39990 }, { "epoch": 0.8592172530824419, "grad_norm": 0.5218396209074099, "learning_rate": 9.81457448455797e-07, "loss": 0.6872, "step": 40000 }, { "epoch": 0.8594320573957125, "grad_norm": 0.5008077047222401, "learning_rate": 9.785145366835325e-07, "loss": 0.6867, "step": 40010 }, { "epoch": 0.8596468617089831, "grad_norm": 0.5055415929729381, "learning_rate": 9.755758167257857e-07, "loss": 0.7009, "step": 40020 }, { "epoch": 0.8598616660222538, "grad_norm": 0.5309493067199006, "learning_rate": 9.726412899480342e-07, "loss": 0.689, "step": 40030 }, { "epoch": 0.8600764703355244, "grad_norm": 0.5187592677496787, "learning_rate": 9.697109577137943e-07, "loss": 0.6768, "step": 40040 }, { "epoch": 0.860291274648795, "grad_norm": 0.5192073916886506, "learning_rate": 9.667848213846386e-07, "loss": 0.6891, "step": 40050 }, { "epoch": 0.8605060789620655, "grad_norm": 0.5259783631972406, "learning_rate": 9.638628823201956e-07, "loss": 0.6895, "step": 40060 }, { "epoch": 0.8607208832753361, "grad_norm": 0.5226106882651611, "learning_rate": 9.609451418781302e-07, "loss": 0.69, "step": 40070 }, { "epoch": 0.8609356875886067, "grad_norm": 0.5071335472658586, "learning_rate": 9.580316014141722e-07, "loss": 0.6846, "step": 40080 }, { "epoch": 0.8611504919018774, "grad_norm": 0.5119813837953315, "learning_rate": 9.551222622820866e-07, "loss": 0.695, "step": 40090 }, { "epoch": 0.861365296215148, "grad_norm": 0.4918570321661607, "learning_rate": 9.522171258336943e-07, "loss": 0.6898, "step": 40100 }, { "epoch": 0.8615801005284186, "grad_norm": 0.5038278185618428, "learning_rate": 9.493161934188632e-07, "loss": 0.6902, "step": 40110 }, { "epoch": 0.8617949048416892, "grad_norm": 0.5131068608059737, "learning_rate": 9.464194663854986e-07, "loss": 0.687, "step": 40120 }, { "epoch": 0.8620097091549598, "grad_norm": 0.5313015107754585, "learning_rate": 9.435269460795671e-07, "loss": 0.6973, "step": 40130 }, { "epoch": 0.8622245134682305, "grad_norm": 0.509195051088408, "learning_rate": 9.406386338450657e-07, "loss": 0.6946, "step": 40140 }, { "epoch": 0.8624393177815011, "grad_norm": 0.502073057996939, "learning_rate": 9.377545310240454e-07, "loss": 0.6909, "step": 40150 }, { "epoch": 0.8626541220947717, "grad_norm": 0.5081753672118122, "learning_rate": 9.34874638956601e-07, "loss": 0.6946, "step": 40160 }, { "epoch": 0.8628689264080422, "grad_norm": 0.5075208175361569, "learning_rate": 9.31998958980862e-07, "loss": 0.6846, "step": 40170 }, { "epoch": 0.8630837307213128, "grad_norm": 0.5559833588569257, "learning_rate": 9.291274924330141e-07, "loss": 0.6993, "step": 40180 }, { "epoch": 0.8632985350345835, "grad_norm": 0.5406798025815065, "learning_rate": 9.262602406472732e-07, "loss": 0.6948, "step": 40190 }, { "epoch": 0.8635133393478541, "grad_norm": 0.5074440350796556, "learning_rate": 9.233972049559037e-07, "loss": 0.6845, "step": 40200 }, { "epoch": 0.8637281436611247, "grad_norm": 0.5231140038044005, "learning_rate": 9.205383866892092e-07, "loss": 0.6887, "step": 40210 }, { "epoch": 0.8639429479743953, "grad_norm": 0.5120556983369559, "learning_rate": 9.176837871755351e-07, "loss": 0.6885, "step": 40220 }, { "epoch": 0.8641577522876659, "grad_norm": 0.5083353819396028, "learning_rate": 9.148334077412646e-07, "loss": 0.6913, "step": 40230 }, { "epoch": 0.8643725566009366, "grad_norm": 0.5038742161832033, "learning_rate": 9.119872497108162e-07, "loss": 0.691, "step": 40240 }, { "epoch": 0.8645873609142072, "grad_norm": 0.5099893292803395, "learning_rate": 9.091453144066587e-07, "loss": 0.6849, "step": 40250 }, { "epoch": 0.8648021652274778, "grad_norm": 0.5172315105235323, "learning_rate": 9.063076031492857e-07, "loss": 0.6825, "step": 40260 }, { "epoch": 0.8650169695407484, "grad_norm": 0.5187093289593909, "learning_rate": 9.034741172572359e-07, "loss": 0.6824, "step": 40270 }, { "epoch": 0.865231773854019, "grad_norm": 0.522303012114229, "learning_rate": 9.006448580470839e-07, "loss": 0.6887, "step": 40280 }, { "epoch": 0.8654465781672896, "grad_norm": 0.49894862369670223, "learning_rate": 8.978198268334348e-07, "loss": 0.6978, "step": 40290 }, { "epoch": 0.8656613824805602, "grad_norm": 0.5002768042699983, "learning_rate": 8.949990249289409e-07, "loss": 0.7016, "step": 40300 }, { "epoch": 0.8658761867938308, "grad_norm": 0.5333278616857066, "learning_rate": 8.921824536442747e-07, "loss": 0.7005, "step": 40310 }, { "epoch": 0.8660909911071014, "grad_norm": 0.4947738923548671, "learning_rate": 8.893701142881539e-07, "loss": 0.6847, "step": 40320 }, { "epoch": 0.866305795420372, "grad_norm": 0.5180236450171597, "learning_rate": 8.865620081673243e-07, "loss": 0.6955, "step": 40330 }, { "epoch": 0.8665205997336427, "grad_norm": 0.5318690818922256, "learning_rate": 8.837581365865688e-07, "loss": 0.6963, "step": 40340 }, { "epoch": 0.8667354040469133, "grad_norm": 0.5330479773777189, "learning_rate": 8.809585008487009e-07, "loss": 0.6853, "step": 40350 }, { "epoch": 0.8669502083601839, "grad_norm": 0.516868926152685, "learning_rate": 8.781631022545611e-07, "loss": 0.6887, "step": 40360 }, { "epoch": 0.8671650126734545, "grad_norm": 0.4970913477817239, "learning_rate": 8.753719421030294e-07, "loss": 0.6974, "step": 40370 }, { "epoch": 0.8673798169867251, "grad_norm": 0.5426821783948291, "learning_rate": 8.725850216910115e-07, "loss": 0.6916, "step": 40380 }, { "epoch": 0.8675946212999956, "grad_norm": 0.5320712513650713, "learning_rate": 8.69802342313445e-07, "loss": 0.6862, "step": 40390 }, { "epoch": 0.8678094256132663, "grad_norm": 0.514150609745708, "learning_rate": 8.670239052632945e-07, "loss": 0.6902, "step": 40400 }, { "epoch": 0.8680242299265369, "grad_norm": 0.5027008974103963, "learning_rate": 8.642497118315584e-07, "loss": 0.6743, "step": 40410 }, { "epoch": 0.8682390342398075, "grad_norm": 0.49920507536090114, "learning_rate": 8.614797633072547e-07, "loss": 0.6853, "step": 40420 }, { "epoch": 0.8684538385530781, "grad_norm": 0.5078214025245414, "learning_rate": 8.58714060977438e-07, "loss": 0.6927, "step": 40430 }, { "epoch": 0.8686686428663487, "grad_norm": 0.5068985776833694, "learning_rate": 8.55952606127185e-07, "loss": 0.6877, "step": 40440 }, { "epoch": 0.8688834471796194, "grad_norm": 0.5042210366606157, "learning_rate": 8.531954000395992e-07, "loss": 0.6797, "step": 40450 }, { "epoch": 0.86909825149289, "grad_norm": 0.5041201672207695, "learning_rate": 8.504424439958125e-07, "loss": 0.6912, "step": 40460 }, { "epoch": 0.8693130558061606, "grad_norm": 0.49939475577575226, "learning_rate": 8.476937392749817e-07, "loss": 0.689, "step": 40470 }, { "epoch": 0.8695278601194312, "grad_norm": 0.508423114252119, "learning_rate": 8.449492871542819e-07, "loss": 0.6967, "step": 40480 }, { "epoch": 0.8697426644327018, "grad_norm": 0.5049557993031742, "learning_rate": 8.422090889089196e-07, "loss": 0.6842, "step": 40490 }, { "epoch": 0.8699574687459725, "grad_norm": 0.5210648597690205, "learning_rate": 8.394731458121219e-07, "loss": 0.6926, "step": 40500 }, { "epoch": 0.870172273059243, "grad_norm": 0.5043799660787082, "learning_rate": 8.367414591351408e-07, "loss": 0.69, "step": 40510 }, { "epoch": 0.8703870773725136, "grad_norm": 0.5317434333391947, "learning_rate": 8.340140301472466e-07, "loss": 0.6889, "step": 40520 }, { "epoch": 0.8706018816857842, "grad_norm": 0.5400007003253303, "learning_rate": 8.312908601157355e-07, "loss": 0.6937, "step": 40530 }, { "epoch": 0.8708166859990548, "grad_norm": 0.5127402676218981, "learning_rate": 8.285719503059209e-07, "loss": 0.6916, "step": 40540 }, { "epoch": 0.8710314903123255, "grad_norm": 0.5151193689571636, "learning_rate": 8.258573019811389e-07, "loss": 0.6951, "step": 40550 }, { "epoch": 0.8712462946255961, "grad_norm": 0.5239133915563157, "learning_rate": 8.231469164027461e-07, "loss": 0.691, "step": 40560 }, { "epoch": 0.8714610989388667, "grad_norm": 0.5147336548996377, "learning_rate": 8.204407948301174e-07, "loss": 0.6881, "step": 40570 }, { "epoch": 0.8716759032521373, "grad_norm": 0.5191148599179711, "learning_rate": 8.177389385206469e-07, "loss": 0.6861, "step": 40580 }, { "epoch": 0.8718907075654079, "grad_norm": 0.5159462930405265, "learning_rate": 8.150413487297438e-07, "loss": 0.6907, "step": 40590 }, { "epoch": 0.8721055118786786, "grad_norm": 0.5512478977489063, "learning_rate": 8.12348026710843e-07, "loss": 0.6971, "step": 40600 }, { "epoch": 0.8723203161919492, "grad_norm": 0.512581984887224, "learning_rate": 8.096589737153859e-07, "loss": 0.6738, "step": 40610 }, { "epoch": 0.8725351205052198, "grad_norm": 0.5228312882242846, "learning_rate": 8.069741909928374e-07, "loss": 0.688, "step": 40620 }, { "epoch": 0.8727499248184903, "grad_norm": 0.5006383946158728, "learning_rate": 8.042936797906753e-07, "loss": 0.687, "step": 40630 }, { "epoch": 0.8729647291317609, "grad_norm": 0.5006156486251359, "learning_rate": 8.016174413543953e-07, "loss": 0.6831, "step": 40640 }, { "epoch": 0.8731795334450316, "grad_norm": 0.5111940788173959, "learning_rate": 7.989454769275073e-07, "loss": 0.6885, "step": 40650 }, { "epoch": 0.8733943377583022, "grad_norm": 0.5449360427418215, "learning_rate": 7.962777877515293e-07, "loss": 0.6927, "step": 40660 }, { "epoch": 0.8736091420715728, "grad_norm": 0.5089460028641523, "learning_rate": 7.936143750660008e-07, "loss": 0.6742, "step": 40670 }, { "epoch": 0.8738239463848434, "grad_norm": 0.5017956450345394, "learning_rate": 7.909552401084697e-07, "loss": 0.6715, "step": 40680 }, { "epoch": 0.874038750698114, "grad_norm": 0.49880673793119984, "learning_rate": 7.883003841144976e-07, "loss": 0.679, "step": 40690 }, { "epoch": 0.8742535550113846, "grad_norm": 0.4907335395967764, "learning_rate": 7.856498083176612e-07, "loss": 0.6785, "step": 40700 }, { "epoch": 0.8744683593246553, "grad_norm": 0.5329308849961445, "learning_rate": 7.830035139495384e-07, "loss": 0.6933, "step": 40710 }, { "epoch": 0.8746831636379259, "grad_norm": 0.5130223382623801, "learning_rate": 7.803615022397315e-07, "loss": 0.7, "step": 40720 }, { "epoch": 0.8748979679511965, "grad_norm": 0.5211784970357225, "learning_rate": 7.777237744158406e-07, "loss": 0.7002, "step": 40730 }, { "epoch": 0.875112772264467, "grad_norm": 0.5102775700430415, "learning_rate": 7.750903317034831e-07, "loss": 0.692, "step": 40740 }, { "epoch": 0.8753275765777376, "grad_norm": 0.5042499830623806, "learning_rate": 7.724611753262834e-07, "loss": 0.6793, "step": 40750 }, { "epoch": 0.8755423808910083, "grad_norm": 0.5160190803282572, "learning_rate": 7.698363065058689e-07, "loss": 0.6889, "step": 40760 }, { "epoch": 0.8757571852042789, "grad_norm": 0.50370584207469, "learning_rate": 7.672157264618852e-07, "loss": 0.6773, "step": 40770 }, { "epoch": 0.8759719895175495, "grad_norm": 0.5051104461571446, "learning_rate": 7.645994364119758e-07, "loss": 0.6992, "step": 40780 }, { "epoch": 0.8761867938308201, "grad_norm": 0.4946566053954112, "learning_rate": 7.61987437571795e-07, "loss": 0.6878, "step": 40790 }, { "epoch": 0.8764015981440907, "grad_norm": 0.4996685140970515, "learning_rate": 7.593797311550055e-07, "loss": 0.6808, "step": 40800 }, { "epoch": 0.8766164024573614, "grad_norm": 0.5172882831148481, "learning_rate": 7.567763183732668e-07, "loss": 0.6892, "step": 40810 }, { "epoch": 0.876831206770632, "grad_norm": 0.5091750814973897, "learning_rate": 7.541772004362557e-07, "loss": 0.693, "step": 40820 }, { "epoch": 0.8770460110839026, "grad_norm": 0.500703308186769, "learning_rate": 7.515823785516418e-07, "loss": 0.6884, "step": 40830 }, { "epoch": 0.8772608153971732, "grad_norm": 0.5224680213539507, "learning_rate": 7.489918539251085e-07, "loss": 0.7, "step": 40840 }, { "epoch": 0.8774756197104437, "grad_norm": 0.5058732772246898, "learning_rate": 7.464056277603326e-07, "loss": 0.6924, "step": 40850 }, { "epoch": 0.8776904240237144, "grad_norm": 0.5113746801292882, "learning_rate": 7.438237012590033e-07, "loss": 0.6887, "step": 40860 }, { "epoch": 0.877905228336985, "grad_norm": 0.5305907079168781, "learning_rate": 7.412460756208051e-07, "loss": 0.6798, "step": 40870 }, { "epoch": 0.8781200326502556, "grad_norm": 0.5126751045545114, "learning_rate": 7.386727520434245e-07, "loss": 0.6885, "step": 40880 }, { "epoch": 0.8783348369635262, "grad_norm": 0.5022215260758026, "learning_rate": 7.361037317225561e-07, "loss": 0.6854, "step": 40890 }, { "epoch": 0.8785496412767968, "grad_norm": 0.5192119588923588, "learning_rate": 7.335390158518852e-07, "loss": 0.6853, "step": 40900 }, { "epoch": 0.8787644455900675, "grad_norm": 0.49672397477009517, "learning_rate": 7.309786056231039e-07, "loss": 0.6874, "step": 40910 }, { "epoch": 0.8789792499033381, "grad_norm": 0.5145358299532603, "learning_rate": 7.284225022259028e-07, "loss": 0.6935, "step": 40920 }, { "epoch": 0.8791940542166087, "grad_norm": 0.5226202982810969, "learning_rate": 7.258707068479642e-07, "loss": 0.6899, "step": 40930 }, { "epoch": 0.8794088585298793, "grad_norm": 0.513542215006283, "learning_rate": 7.23323220674983e-07, "loss": 0.6793, "step": 40940 }, { "epoch": 0.8796236628431499, "grad_norm": 0.5225094478413966, "learning_rate": 7.207800448906366e-07, "loss": 0.7004, "step": 40950 }, { "epoch": 0.8798384671564206, "grad_norm": 0.5150496074301979, "learning_rate": 7.182411806766088e-07, "loss": 0.6911, "step": 40960 }, { "epoch": 0.8800532714696911, "grad_norm": 0.49652687262660034, "learning_rate": 7.157066292125769e-07, "loss": 0.6853, "step": 40970 }, { "epoch": 0.8802680757829617, "grad_norm": 0.5241260101055815, "learning_rate": 7.131763916762169e-07, "loss": 0.6935, "step": 40980 }, { "epoch": 0.8804828800962323, "grad_norm": 0.48817871163918836, "learning_rate": 7.106504692431981e-07, "loss": 0.693, "step": 40990 }, { "epoch": 0.8806976844095029, "grad_norm": 0.529292181472698, "learning_rate": 7.081288630871819e-07, "loss": 0.7034, "step": 41000 }, { "epoch": 0.8809124887227735, "grad_norm": 0.5059883454495374, "learning_rate": 7.056115743798309e-07, "loss": 0.6962, "step": 41010 }, { "epoch": 0.8811272930360442, "grad_norm": 0.5470086285335591, "learning_rate": 7.030986042907962e-07, "loss": 0.6866, "step": 41020 }, { "epoch": 0.8813420973493148, "grad_norm": 0.5236923421331561, "learning_rate": 7.005899539877248e-07, "loss": 0.6817, "step": 41030 }, { "epoch": 0.8815569016625854, "grad_norm": 0.5089883929134034, "learning_rate": 6.980856246362566e-07, "loss": 0.6948, "step": 41040 }, { "epoch": 0.881771705975856, "grad_norm": 0.5218195607179691, "learning_rate": 6.955856174000208e-07, "loss": 0.6975, "step": 41050 }, { "epoch": 0.8819865102891266, "grad_norm": 0.5082317075268683, "learning_rate": 6.93089933440645e-07, "loss": 0.7135, "step": 41060 }, { "epoch": 0.8822013146023973, "grad_norm": 0.5088831853959854, "learning_rate": 6.905985739177379e-07, "loss": 0.6887, "step": 41070 }, { "epoch": 0.8824161189156678, "grad_norm": 0.5185177820509219, "learning_rate": 6.881115399889083e-07, "loss": 0.6974, "step": 41080 }, { "epoch": 0.8826309232289384, "grad_norm": 0.5063149686563211, "learning_rate": 6.856288328097505e-07, "loss": 0.6998, "step": 41090 }, { "epoch": 0.882845727542209, "grad_norm": 0.5250508854568222, "learning_rate": 6.831504535338485e-07, "loss": 0.6856, "step": 41100 }, { "epoch": 0.8830605318554796, "grad_norm": 0.5124153836462256, "learning_rate": 6.806764033127778e-07, "loss": 0.6836, "step": 41110 }, { "epoch": 0.8832753361687503, "grad_norm": 0.5274948224156129, "learning_rate": 6.782066832960987e-07, "loss": 0.6924, "step": 41120 }, { "epoch": 0.8834901404820209, "grad_norm": 0.6205306011716412, "learning_rate": 6.757412946313613e-07, "loss": 0.6819, "step": 41130 }, { "epoch": 0.8837049447952915, "grad_norm": 0.5245818667913486, "learning_rate": 6.732802384641057e-07, "loss": 0.6844, "step": 41140 }, { "epoch": 0.8839197491085621, "grad_norm": 0.5205364438282932, "learning_rate": 6.708235159378551e-07, "loss": 0.6859, "step": 41150 }, { "epoch": 0.8841345534218327, "grad_norm": 0.5131908898170064, "learning_rate": 6.683711281941196e-07, "loss": 0.6902, "step": 41160 }, { "epoch": 0.8843493577351034, "grad_norm": 0.5062297624465112, "learning_rate": 6.659230763724001e-07, "loss": 0.6906, "step": 41170 }, { "epoch": 0.884564162048374, "grad_norm": 0.5026327253540595, "learning_rate": 6.634793616101732e-07, "loss": 0.6722, "step": 41180 }, { "epoch": 0.8847789663616445, "grad_norm": 0.5009299214524836, "learning_rate": 6.610399850429094e-07, "loss": 0.6872, "step": 41190 }, { "epoch": 0.8849937706749151, "grad_norm": 0.5238156380409812, "learning_rate": 6.58604947804059e-07, "loss": 0.6917, "step": 41200 }, { "epoch": 0.8852085749881857, "grad_norm": 0.5083745372288995, "learning_rate": 6.561742510250569e-07, "loss": 0.6809, "step": 41210 }, { "epoch": 0.8854233793014564, "grad_norm": 0.5383915042584236, "learning_rate": 6.537478958353216e-07, "loss": 0.6979, "step": 41220 }, { "epoch": 0.885638183614727, "grad_norm": 0.5043226314095659, "learning_rate": 6.513258833622537e-07, "loss": 0.682, "step": 41230 }, { "epoch": 0.8858529879279976, "grad_norm": 0.5127868644616522, "learning_rate": 6.489082147312387e-07, "loss": 0.6821, "step": 41240 }, { "epoch": 0.8860677922412682, "grad_norm": 0.4979888752333213, "learning_rate": 6.464948910656377e-07, "loss": 0.685, "step": 41250 }, { "epoch": 0.8862825965545388, "grad_norm": 0.5128304605983001, "learning_rate": 6.440859134867972e-07, "loss": 0.6962, "step": 41260 }, { "epoch": 0.8864974008678094, "grad_norm": 0.5051244175392152, "learning_rate": 6.416812831140451e-07, "loss": 0.6909, "step": 41270 }, { "epoch": 0.8867122051810801, "grad_norm": 0.5101008415865377, "learning_rate": 6.392810010646866e-07, "loss": 0.6855, "step": 41280 }, { "epoch": 0.8869270094943507, "grad_norm": 0.5202190940464628, "learning_rate": 6.368850684540106e-07, "loss": 0.7058, "step": 41290 }, { "epoch": 0.8871418138076212, "grad_norm": 0.5186081016150472, "learning_rate": 6.344934863952779e-07, "loss": 0.6781, "step": 41300 }, { "epoch": 0.8873566181208918, "grad_norm": 0.5329527297577857, "learning_rate": 6.321062559997349e-07, "loss": 0.7019, "step": 41310 }, { "epoch": 0.8875714224341624, "grad_norm": 0.5217265024678235, "learning_rate": 6.297233783766022e-07, "loss": 0.6985, "step": 41320 }, { "epoch": 0.8877862267474331, "grad_norm": 0.5194330367298682, "learning_rate": 6.273448546330796e-07, "loss": 0.6839, "step": 41330 }, { "epoch": 0.8880010310607037, "grad_norm": 0.5097902257790524, "learning_rate": 6.249706858743443e-07, "loss": 0.6973, "step": 41340 }, { "epoch": 0.8882158353739743, "grad_norm": 0.5142203693554276, "learning_rate": 6.22600873203545e-07, "loss": 0.6985, "step": 41350 }, { "epoch": 0.8884306396872449, "grad_norm": 0.5090401566472215, "learning_rate": 6.202354177218161e-07, "loss": 0.6795, "step": 41360 }, { "epoch": 0.8886454440005155, "grad_norm": 0.49677462148757334, "learning_rate": 6.178743205282567e-07, "loss": 0.6861, "step": 41370 }, { "epoch": 0.8888602483137862, "grad_norm": 0.521382571115747, "learning_rate": 6.155175827199467e-07, "loss": 0.7016, "step": 41380 }, { "epoch": 0.8890750526270568, "grad_norm": 0.5317933595388324, "learning_rate": 6.131652053919424e-07, "loss": 0.6907, "step": 41390 }, { "epoch": 0.8892898569403274, "grad_norm": 0.5213449864141357, "learning_rate": 6.10817189637265e-07, "loss": 0.702, "step": 41400 }, { "epoch": 0.889504661253598, "grad_norm": 0.5514505419123323, "learning_rate": 6.084735365469229e-07, "loss": 0.7022, "step": 41410 }, { "epoch": 0.8897194655668685, "grad_norm": 0.5138208717231828, "learning_rate": 6.061342472098819e-07, "loss": 0.6742, "step": 41420 }, { "epoch": 0.8899342698801392, "grad_norm": 0.5071996581331448, "learning_rate": 6.037993227130945e-07, "loss": 0.6922, "step": 41430 }, { "epoch": 0.8901490741934098, "grad_norm": 0.5102097970519316, "learning_rate": 6.014687641414751e-07, "loss": 0.6929, "step": 41440 }, { "epoch": 0.8903638785066804, "grad_norm": 0.5143324484042711, "learning_rate": 5.991425725779132e-07, "loss": 0.6975, "step": 41450 }, { "epoch": 0.890578682819951, "grad_norm": 0.518475417925795, "learning_rate": 5.968207491032707e-07, "loss": 0.6917, "step": 41460 }, { "epoch": 0.8907934871332216, "grad_norm": 0.4994682556931308, "learning_rate": 5.945032947963736e-07, "loss": 0.6811, "step": 41470 }, { "epoch": 0.8910082914464923, "grad_norm": 0.5070960760778904, "learning_rate": 5.921902107340282e-07, "loss": 0.7016, "step": 41480 }, { "epoch": 0.8912230957597629, "grad_norm": 0.5191444966925488, "learning_rate": 5.898814979909995e-07, "loss": 0.7052, "step": 41490 }, { "epoch": 0.8914379000730335, "grad_norm": 0.5035080497362481, "learning_rate": 5.87577157640028e-07, "loss": 0.6973, "step": 41500 }, { "epoch": 0.8916527043863041, "grad_norm": 0.5333052134877714, "learning_rate": 5.852771907518228e-07, "loss": 0.7011, "step": 41510 }, { "epoch": 0.8918675086995747, "grad_norm": 0.5155211109068989, "learning_rate": 5.829815983950526e-07, "loss": 0.693, "step": 41520 }, { "epoch": 0.8920823130128454, "grad_norm": 0.5023502013771952, "learning_rate": 5.806903816363663e-07, "loss": 0.6906, "step": 41530 }, { "epoch": 0.8922971173261159, "grad_norm": 0.5214356225380249, "learning_rate": 5.784035415403688e-07, "loss": 0.6883, "step": 41540 }, { "epoch": 0.8925119216393865, "grad_norm": 0.5159695126204489, "learning_rate": 5.761210791696381e-07, "loss": 0.6897, "step": 41550 }, { "epoch": 0.8927267259526571, "grad_norm": 0.5209933564528445, "learning_rate": 5.738429955847147e-07, "loss": 0.7004, "step": 41560 }, { "epoch": 0.8929415302659277, "grad_norm": 0.498039038963185, "learning_rate": 5.715692918441029e-07, "loss": 0.683, "step": 41570 }, { "epoch": 0.8931563345791983, "grad_norm": 0.5006203623139068, "learning_rate": 5.692999690042799e-07, "loss": 0.6945, "step": 41580 }, { "epoch": 0.893371138892469, "grad_norm": 0.5233476464091876, "learning_rate": 5.670350281196768e-07, "loss": 0.6987, "step": 41590 }, { "epoch": 0.8935859432057396, "grad_norm": 0.5136980303605799, "learning_rate": 5.64774470242696e-07, "loss": 0.7007, "step": 41600 }, { "epoch": 0.8938007475190102, "grad_norm": 0.512143271533227, "learning_rate": 5.625182964236997e-07, "loss": 0.6913, "step": 41610 }, { "epoch": 0.8940155518322808, "grad_norm": 0.512132113394993, "learning_rate": 5.602665077110169e-07, "loss": 0.6854, "step": 41620 }, { "epoch": 0.8942303561455514, "grad_norm": 0.51811405156484, "learning_rate": 5.580191051509354e-07, "loss": 0.691, "step": 41630 }, { "epoch": 0.894445160458822, "grad_norm": 0.5115508553053043, "learning_rate": 5.55776089787703e-07, "loss": 0.6748, "step": 41640 }, { "epoch": 0.8946599647720926, "grad_norm": 0.5241262214299848, "learning_rate": 5.535374626635381e-07, "loss": 0.6934, "step": 41650 }, { "epoch": 0.8948747690853632, "grad_norm": 0.5244130502553322, "learning_rate": 5.51303224818609e-07, "loss": 0.6871, "step": 41660 }, { "epoch": 0.8950895733986338, "grad_norm": 0.4994599674132126, "learning_rate": 5.490733772910517e-07, "loss": 0.6813, "step": 41670 }, { "epoch": 0.8953043777119044, "grad_norm": 0.4986836428006602, "learning_rate": 5.46847921116962e-07, "loss": 0.6902, "step": 41680 }, { "epoch": 0.8955191820251751, "grad_norm": 0.5152538290727082, "learning_rate": 5.446268573303881e-07, "loss": 0.6833, "step": 41690 }, { "epoch": 0.8957339863384457, "grad_norm": 0.502386100555975, "learning_rate": 5.424101869633491e-07, "loss": 0.6927, "step": 41700 }, { "epoch": 0.8959487906517163, "grad_norm": 0.5074939313677448, "learning_rate": 5.401979110458133e-07, "loss": 0.6758, "step": 41710 }, { "epoch": 0.8961635949649869, "grad_norm": 0.5058971850323379, "learning_rate": 5.379900306057084e-07, "loss": 0.6797, "step": 41720 }, { "epoch": 0.8963783992782575, "grad_norm": 0.4926506337654991, "learning_rate": 5.357865466689249e-07, "loss": 0.6822, "step": 41730 }, { "epoch": 0.8965932035915282, "grad_norm": 0.5202246837788422, "learning_rate": 5.33587460259305e-07, "loss": 0.686, "step": 41740 }, { "epoch": 0.8968080079047988, "grad_norm": 0.5224471201252612, "learning_rate": 5.313927723986511e-07, "loss": 0.6988, "step": 41750 }, { "epoch": 0.8970228122180693, "grad_norm": 0.499998013117945, "learning_rate": 5.292024841067179e-07, "loss": 0.6913, "step": 41760 }, { "epoch": 0.8972376165313399, "grad_norm": 0.5068130297990905, "learning_rate": 5.270165964012198e-07, "loss": 0.6918, "step": 41770 }, { "epoch": 0.8974524208446105, "grad_norm": 0.5178316720171696, "learning_rate": 5.248351102978255e-07, "loss": 0.7096, "step": 41780 }, { "epoch": 0.8976672251578812, "grad_norm": 0.5351505572464849, "learning_rate": 5.226580268101566e-07, "loss": 0.6831, "step": 41790 }, { "epoch": 0.8978820294711518, "grad_norm": 0.5113178684301397, "learning_rate": 5.204853469497917e-07, "loss": 0.6945, "step": 41800 }, { "epoch": 0.8980968337844224, "grad_norm": 0.5047128008727567, "learning_rate": 5.18317071726262e-07, "loss": 0.688, "step": 41810 }, { "epoch": 0.898311638097693, "grad_norm": 0.5100213323202718, "learning_rate": 5.161532021470527e-07, "loss": 0.6941, "step": 41820 }, { "epoch": 0.8985264424109636, "grad_norm": 0.5412542192317169, "learning_rate": 5.139937392175987e-07, "loss": 0.6804, "step": 41830 }, { "epoch": 0.8987412467242343, "grad_norm": 0.5102282656359679, "learning_rate": 5.118386839412925e-07, "loss": 0.6867, "step": 41840 }, { "epoch": 0.8989560510375049, "grad_norm": 0.5108191464876962, "learning_rate": 5.096880373194745e-07, "loss": 0.694, "step": 41850 }, { "epoch": 0.8991708553507755, "grad_norm": 0.5080216890454713, "learning_rate": 5.075418003514388e-07, "loss": 0.6807, "step": 41860 }, { "epoch": 0.899385659664046, "grad_norm": 0.5088339519727141, "learning_rate": 5.053999740344306e-07, "loss": 0.6868, "step": 41870 }, { "epoch": 0.8996004639773166, "grad_norm": 0.5152387132782632, "learning_rate": 5.032625593636442e-07, "loss": 0.7018, "step": 41880 }, { "epoch": 0.8998152682905872, "grad_norm": 0.4901427047062954, "learning_rate": 5.01129557332225e-07, "loss": 0.6845, "step": 41890 }, { "epoch": 0.9000300726038579, "grad_norm": 0.5129493119966259, "learning_rate": 4.990009689312669e-07, "loss": 0.6826, "step": 41900 }, { "epoch": 0.9002448769171285, "grad_norm": 0.5223099648979949, "learning_rate": 4.968767951498166e-07, "loss": 0.7005, "step": 41910 }, { "epoch": 0.9004596812303991, "grad_norm": 0.4891041542553347, "learning_rate": 4.947570369748656e-07, "loss": 0.688, "step": 41920 }, { "epoch": 0.9006744855436697, "grad_norm": 0.49608949461071944, "learning_rate": 4.92641695391356e-07, "loss": 0.6922, "step": 41930 }, { "epoch": 0.9008892898569403, "grad_norm": 0.521684682281621, "learning_rate": 4.905307713821761e-07, "loss": 0.6964, "step": 41940 }, { "epoch": 0.901104094170211, "grad_norm": 0.5052599021707938, "learning_rate": 4.884242659281613e-07, "loss": 0.6806, "step": 41950 }, { "epoch": 0.9013188984834816, "grad_norm": 0.5154672787022767, "learning_rate": 4.863221800080964e-07, "loss": 0.685, "step": 41960 }, { "epoch": 0.9015337027967522, "grad_norm": 0.5384871380139898, "learning_rate": 4.842245145987112e-07, "loss": 0.6828, "step": 41970 }, { "epoch": 0.9017485071100227, "grad_norm": 0.5399455434197028, "learning_rate": 4.821312706746817e-07, "loss": 0.6962, "step": 41980 }, { "epoch": 0.9019633114232933, "grad_norm": 0.5155738707830432, "learning_rate": 4.800424492086275e-07, "loss": 0.683, "step": 41990 }, { "epoch": 0.902178115736564, "grad_norm": 0.5046827454219124, "learning_rate": 4.779580511711191e-07, "loss": 0.6852, "step": 42000 }, { "epoch": 0.9023929200498346, "grad_norm": 0.5236358307924249, "learning_rate": 4.758780775306637e-07, "loss": 0.6973, "step": 42010 }, { "epoch": 0.9026077243631052, "grad_norm": 0.5308120199608228, "learning_rate": 4.738025292537185e-07, "loss": 0.6911, "step": 42020 }, { "epoch": 0.9028225286763758, "grad_norm": 0.5135158557502522, "learning_rate": 4.71731407304683e-07, "loss": 0.6889, "step": 42030 }, { "epoch": 0.9030373329896464, "grad_norm": 0.5192712739754789, "learning_rate": 4.6966471264589865e-07, "loss": 0.6966, "step": 42040 }, { "epoch": 0.9032521373029171, "grad_norm": 0.5213999046870769, "learning_rate": 4.6760244623765384e-07, "loss": 0.7042, "step": 42050 }, { "epoch": 0.9034669416161877, "grad_norm": 0.5105007301231993, "learning_rate": 4.65544609038171e-07, "loss": 0.6781, "step": 42060 }, { "epoch": 0.9036817459294583, "grad_norm": 0.500621312368029, "learning_rate": 4.634912020036242e-07, "loss": 0.6749, "step": 42070 }, { "epoch": 0.9038965502427289, "grad_norm": 0.505010805688616, "learning_rate": 4.614422260881235e-07, "loss": 0.6732, "step": 42080 }, { "epoch": 0.9041113545559994, "grad_norm": 0.5342937103145424, "learning_rate": 4.593976822437207e-07, "loss": 0.6844, "step": 42090 }, { "epoch": 0.9043261588692701, "grad_norm": 0.5183262843971005, "learning_rate": 4.573575714204115e-07, "loss": 0.6984, "step": 42100 }, { "epoch": 0.9045409631825407, "grad_norm": 0.517561745524737, "learning_rate": 4.5532189456612306e-07, "loss": 0.6889, "step": 42110 }, { "epoch": 0.9047557674958113, "grad_norm": 0.5026306781586204, "learning_rate": 4.5329065262673666e-07, "loss": 0.6823, "step": 42120 }, { "epoch": 0.9049705718090819, "grad_norm": 0.5427704129364224, "learning_rate": 4.512638465460584e-07, "loss": 0.6906, "step": 42130 }, { "epoch": 0.9051853761223525, "grad_norm": 0.5074939200787636, "learning_rate": 4.4924147726584153e-07, "loss": 0.672, "step": 42140 }, { "epoch": 0.9054001804356231, "grad_norm": 0.504225946197821, "learning_rate": 4.4722354572577785e-07, "loss": 0.6911, "step": 42150 }, { "epoch": 0.9056149847488938, "grad_norm": 0.5145785005958214, "learning_rate": 4.452100528634906e-07, "loss": 0.694, "step": 42160 }, { "epoch": 0.9058297890621644, "grad_norm": 0.49881418371078307, "learning_rate": 4.432009996145492e-07, "loss": 0.6794, "step": 42170 }, { "epoch": 0.906044593375435, "grad_norm": 0.5176800894518867, "learning_rate": 4.411963869124525e-07, "loss": 0.6945, "step": 42180 }, { "epoch": 0.9062593976887056, "grad_norm": 0.5090959735816222, "learning_rate": 4.3919621568864313e-07, "loss": 0.6875, "step": 42190 }, { "epoch": 0.9064742020019761, "grad_norm": 0.5075300086152117, "learning_rate": 4.372004868724944e-07, "loss": 0.6857, "step": 42200 }, { "epoch": 0.9066890063152468, "grad_norm": 0.5005344252080504, "learning_rate": 4.3520920139131784e-07, "loss": 0.6908, "step": 42210 }, { "epoch": 0.9069038106285174, "grad_norm": 0.5092091177314088, "learning_rate": 4.3322236017036114e-07, "loss": 0.684, "step": 42220 }, { "epoch": 0.907118614941788, "grad_norm": 0.5128571527964733, "learning_rate": 4.3123996413280356e-07, "loss": 0.6903, "step": 42230 }, { "epoch": 0.9073334192550586, "grad_norm": 0.5236930918406155, "learning_rate": 4.29262014199765e-07, "loss": 0.6884, "step": 42240 }, { "epoch": 0.9075482235683292, "grad_norm": 0.522031014128186, "learning_rate": 4.272885112902925e-07, "loss": 0.6862, "step": 42250 }, { "epoch": 0.9077630278815999, "grad_norm": 0.534657132580671, "learning_rate": 4.253194563213714e-07, "loss": 0.6976, "step": 42260 }, { "epoch": 0.9079778321948705, "grad_norm": 0.5270962563830953, "learning_rate": 4.233548502079199e-07, "loss": 0.6871, "step": 42270 }, { "epoch": 0.9081926365081411, "grad_norm": 0.5397710000399429, "learning_rate": 4.2139469386278445e-07, "loss": 0.6787, "step": 42280 }, { "epoch": 0.9084074408214117, "grad_norm": 0.5157952407298018, "learning_rate": 4.194389881967531e-07, "loss": 0.6929, "step": 42290 }, { "epoch": 0.9086222451346823, "grad_norm": 0.5155262585551267, "learning_rate": 4.174877341185368e-07, "loss": 0.699, "step": 42300 }, { "epoch": 0.908837049447953, "grad_norm": 0.6004351700870802, "learning_rate": 4.155409325347826e-07, "loss": 0.68, "step": 42310 }, { "epoch": 0.9090518537612235, "grad_norm": 0.5179978734902895, "learning_rate": 4.135985843500678e-07, "loss": 0.6855, "step": 42320 }, { "epoch": 0.9092666580744941, "grad_norm": 0.5172988314550917, "learning_rate": 4.116606904668996e-07, "loss": 0.6903, "step": 42330 }, { "epoch": 0.9094814623877647, "grad_norm": 0.527441343917074, "learning_rate": 4.097272517857187e-07, "loss": 0.6835, "step": 42340 }, { "epoch": 0.9096962667010353, "grad_norm": 0.5094088299463104, "learning_rate": 4.07798269204891e-07, "loss": 0.6745, "step": 42350 }, { "epoch": 0.909911071014306, "grad_norm": 0.49824225660299976, "learning_rate": 4.0587374362071495e-07, "loss": 0.692, "step": 42360 }, { "epoch": 0.9101258753275766, "grad_norm": 0.4958646778169003, "learning_rate": 4.0395367592741876e-07, "loss": 0.6927, "step": 42370 }, { "epoch": 0.9103406796408472, "grad_norm": 0.5330942753471896, "learning_rate": 4.020380670171553e-07, "loss": 0.6929, "step": 42380 }, { "epoch": 0.9105554839541178, "grad_norm": 0.5233184041925335, "learning_rate": 4.001269177800116e-07, "loss": 0.6965, "step": 42390 }, { "epoch": 0.9107702882673884, "grad_norm": 0.5120728262031152, "learning_rate": 3.9822022910399496e-07, "loss": 0.6741, "step": 42400 }, { "epoch": 0.9109850925806591, "grad_norm": 0.5064121099027354, "learning_rate": 3.96318001875049e-07, "loss": 0.6784, "step": 42410 }, { "epoch": 0.9111998968939297, "grad_norm": 0.5602640114772643, "learning_rate": 3.944202369770367e-07, "loss": 0.6961, "step": 42420 }, { "epoch": 0.9114147012072003, "grad_norm": 0.5432682414153873, "learning_rate": 3.925269352917505e-07, "loss": 0.6895, "step": 42430 }, { "epoch": 0.9116295055204708, "grad_norm": 0.5159534104410033, "learning_rate": 3.90638097698911e-07, "loss": 0.6967, "step": 42440 }, { "epoch": 0.9118443098337414, "grad_norm": 0.5421695386729521, "learning_rate": 3.887537250761597e-07, "loss": 0.6906, "step": 42450 }, { "epoch": 0.912059114147012, "grad_norm": 0.5269412372870173, "learning_rate": 3.8687381829906944e-07, "loss": 0.6921, "step": 42460 }, { "epoch": 0.9122739184602827, "grad_norm": 0.509562529599299, "learning_rate": 3.849983782411337e-07, "loss": 0.6885, "step": 42470 }, { "epoch": 0.9124887227735533, "grad_norm": 0.510134688474772, "learning_rate": 3.8312740577377214e-07, "loss": 0.6815, "step": 42480 }, { "epoch": 0.9127035270868239, "grad_norm": 0.512454202372446, "learning_rate": 3.812609017663271e-07, "loss": 0.7021, "step": 42490 }, { "epoch": 0.9129183314000945, "grad_norm": 0.5069629345687713, "learning_rate": 3.793988670860671e-07, "loss": 0.6908, "step": 42500 }, { "epoch": 0.9131331357133651, "grad_norm": 0.5145243709537642, "learning_rate": 3.7754130259818334e-07, "loss": 0.6761, "step": 42510 }, { "epoch": 0.9133479400266358, "grad_norm": 0.5057114456413828, "learning_rate": 3.7568820916578765e-07, "loss": 0.7133, "step": 42520 }, { "epoch": 0.9135627443399064, "grad_norm": 0.5027601053480496, "learning_rate": 3.738395876499157e-07, "loss": 0.68, "step": 42530 }, { "epoch": 0.913777548653177, "grad_norm": 0.500024246376363, "learning_rate": 3.7199543890952817e-07, "loss": 0.6921, "step": 42540 }, { "epoch": 0.9139923529664475, "grad_norm": 0.5082257613290262, "learning_rate": 3.7015576380150187e-07, "loss": 0.6812, "step": 42550 }, { "epoch": 0.9142071572797181, "grad_norm": 0.530878498561435, "learning_rate": 3.683205631806408e-07, "loss": 0.6864, "step": 42560 }, { "epoch": 0.9144219615929888, "grad_norm": 0.5061215824429818, "learning_rate": 3.664898378996662e-07, "loss": 0.6769, "step": 42570 }, { "epoch": 0.9146367659062594, "grad_norm": 0.4972627962848076, "learning_rate": 3.6466358880921984e-07, "loss": 0.6796, "step": 42580 }, { "epoch": 0.91485157021953, "grad_norm": 0.514903636534617, "learning_rate": 3.628418167578651e-07, "loss": 0.6843, "step": 42590 }, { "epoch": 0.9150663745328006, "grad_norm": 0.5047790440228213, "learning_rate": 3.610245225920839e-07, "loss": 0.6826, "step": 42600 }, { "epoch": 0.9152811788460712, "grad_norm": 0.5083554832988898, "learning_rate": 3.5921170715627953e-07, "loss": 0.6869, "step": 42610 }, { "epoch": 0.9154959831593419, "grad_norm": 0.5133159115347935, "learning_rate": 3.574033712927727e-07, "loss": 0.6966, "step": 42620 }, { "epoch": 0.9157107874726125, "grad_norm": 0.5164607440700887, "learning_rate": 3.5559951584180355e-07, "loss": 0.6882, "step": 42630 }, { "epoch": 0.9159255917858831, "grad_norm": 0.513461482277684, "learning_rate": 3.538001416415282e-07, "loss": 0.6884, "step": 42640 }, { "epoch": 0.9161403960991537, "grad_norm": 0.5008580969233325, "learning_rate": 3.520052495280213e-07, "loss": 0.6853, "step": 42650 }, { "epoch": 0.9163552004124242, "grad_norm": 0.5047493200035524, "learning_rate": 3.50214840335279e-07, "loss": 0.6987, "step": 42660 }, { "epoch": 0.9165700047256949, "grad_norm": 0.5126226903956973, "learning_rate": 3.4842891489520803e-07, "loss": 0.6782, "step": 42670 }, { "epoch": 0.9167848090389655, "grad_norm": 0.5250140948834445, "learning_rate": 3.466474740376369e-07, "loss": 0.6831, "step": 42680 }, { "epoch": 0.9169996133522361, "grad_norm": 0.4970994749703241, "learning_rate": 3.448705185903101e-07, "loss": 0.6813, "step": 42690 }, { "epoch": 0.9172144176655067, "grad_norm": 0.4957874759690821, "learning_rate": 3.4309804937888155e-07, "loss": 0.6812, "step": 42700 }, { "epoch": 0.9174292219787773, "grad_norm": 0.5126567512309492, "learning_rate": 3.413300672269282e-07, "loss": 0.6904, "step": 42710 }, { "epoch": 0.917644026292048, "grad_norm": 0.5269391777536964, "learning_rate": 3.3956657295593944e-07, "loss": 0.6799, "step": 42720 }, { "epoch": 0.9178588306053186, "grad_norm": 0.5037049888031642, "learning_rate": 3.378075673853187e-07, "loss": 0.6747, "step": 42730 }, { "epoch": 0.9180736349185892, "grad_norm": 0.5180923103541171, "learning_rate": 3.360530513323845e-07, "loss": 0.696, "step": 42740 }, { "epoch": 0.9182884392318598, "grad_norm": 0.5377551337981332, "learning_rate": 3.3430302561236806e-07, "loss": 0.7026, "step": 42750 }, { "epoch": 0.9185032435451304, "grad_norm": 0.5086131451212155, "learning_rate": 3.325574910384177e-07, "loss": 0.6867, "step": 42760 }, { "epoch": 0.9187180478584009, "grad_norm": 0.5360041477265602, "learning_rate": 3.3081644842158924e-07, "loss": 0.6868, "step": 42770 }, { "epoch": 0.9189328521716716, "grad_norm": 0.5259395774668065, "learning_rate": 3.290798985708576e-07, "loss": 0.6822, "step": 42780 }, { "epoch": 0.9191476564849422, "grad_norm": 0.511030877333274, "learning_rate": 3.2734784229310425e-07, "loss": 0.6748, "step": 42790 }, { "epoch": 0.9193624607982128, "grad_norm": 0.507095211823108, "learning_rate": 3.2562028039312746e-07, "loss": 0.6916, "step": 42800 }, { "epoch": 0.9195772651114834, "grad_norm": 0.5108278970810879, "learning_rate": 3.2389721367363623e-07, "loss": 0.6905, "step": 42810 }, { "epoch": 0.919792069424754, "grad_norm": 0.5063482123733604, "learning_rate": 3.221786429352458e-07, "loss": 0.6867, "step": 42820 }, { "epoch": 0.9200068737380247, "grad_norm": 0.5321360439147953, "learning_rate": 3.204645689764907e-07, "loss": 0.6799, "step": 42830 }, { "epoch": 0.9202216780512953, "grad_norm": 0.5010045520484869, "learning_rate": 3.187549925938094e-07, "loss": 0.6897, "step": 42840 }, { "epoch": 0.9204364823645659, "grad_norm": 0.5146448011202501, "learning_rate": 3.170499145815542e-07, "loss": 0.6936, "step": 42850 }, { "epoch": 0.9206512866778365, "grad_norm": 0.522131748562637, "learning_rate": 3.15349335731987e-07, "loss": 0.6758, "step": 42860 }, { "epoch": 0.9208660909911071, "grad_norm": 0.5084382700595336, "learning_rate": 3.136532568352746e-07, "loss": 0.6872, "step": 42870 }, { "epoch": 0.9210808953043778, "grad_norm": 0.5063314963517712, "learning_rate": 3.11961678679501e-07, "loss": 0.6763, "step": 42880 }, { "epoch": 0.9212956996176483, "grad_norm": 0.529160941637022, "learning_rate": 3.1027460205065194e-07, "loss": 0.6898, "step": 42890 }, { "epoch": 0.9215105039309189, "grad_norm": 0.5245818745748351, "learning_rate": 3.085920277326249e-07, "loss": 0.6639, "step": 42900 }, { "epoch": 0.9217253082441895, "grad_norm": 0.5217386624549989, "learning_rate": 3.0691395650722434e-07, "loss": 0.6969, "step": 42910 }, { "epoch": 0.9219401125574601, "grad_norm": 0.5129921994136167, "learning_rate": 3.0524038915416e-07, "loss": 0.6841, "step": 42920 }, { "epoch": 0.9221549168707308, "grad_norm": 0.5021469780724663, "learning_rate": 3.035713264510565e-07, "loss": 0.6826, "step": 42930 }, { "epoch": 0.9223697211840014, "grad_norm": 0.5116730541146018, "learning_rate": 3.0190676917343673e-07, "loss": 0.6923, "step": 42940 }, { "epoch": 0.922584525497272, "grad_norm": 0.5041634183521139, "learning_rate": 3.002467180947355e-07, "loss": 0.6801, "step": 42950 }, { "epoch": 0.9227993298105426, "grad_norm": 0.5071126517885879, "learning_rate": 2.9859117398629236e-07, "loss": 0.6707, "step": 42960 }, { "epoch": 0.9230141341238132, "grad_norm": 0.5191938232678536, "learning_rate": 2.969401376173486e-07, "loss": 0.6887, "step": 42970 }, { "epoch": 0.9232289384370839, "grad_norm": 0.51739812424603, "learning_rate": 2.9529360975506074e-07, "loss": 0.6799, "step": 42980 }, { "epoch": 0.9234437427503545, "grad_norm": 0.5153545341424074, "learning_rate": 2.9365159116447886e-07, "loss": 0.6835, "step": 42990 }, { "epoch": 0.923658547063625, "grad_norm": 0.5306207680932173, "learning_rate": 2.920140826085682e-07, "loss": 0.6986, "step": 43000 }, { "epoch": 0.9238733513768956, "grad_norm": 0.5055030531585294, "learning_rate": 2.9038108484819137e-07, "loss": 0.6895, "step": 43010 }, { "epoch": 0.9240881556901662, "grad_norm": 0.5063675711910615, "learning_rate": 2.887525986421169e-07, "loss": 0.6995, "step": 43020 }, { "epoch": 0.9243029600034369, "grad_norm": 0.4996360884485676, "learning_rate": 2.8712862474702063e-07, "loss": 0.6906, "step": 43030 }, { "epoch": 0.9245177643167075, "grad_norm": 0.5106698528538595, "learning_rate": 2.855091639174734e-07, "loss": 0.6989, "step": 43040 }, { "epoch": 0.9247325686299781, "grad_norm": 0.5074236659920939, "learning_rate": 2.8389421690595995e-07, "loss": 0.6839, "step": 43050 }, { "epoch": 0.9249473729432487, "grad_norm": 0.572337745383503, "learning_rate": 2.8228378446285784e-07, "loss": 0.6902, "step": 43060 }, { "epoch": 0.9251621772565193, "grad_norm": 0.5068410597647415, "learning_rate": 2.8067786733645296e-07, "loss": 0.6867, "step": 43070 }, { "epoch": 0.9253769815697899, "grad_norm": 0.5186456864886076, "learning_rate": 2.7907646627293284e-07, "loss": 0.6919, "step": 43080 }, { "epoch": 0.9255917858830606, "grad_norm": 0.5126033061284958, "learning_rate": 2.7747958201638113e-07, "loss": 0.6965, "step": 43090 }, { "epoch": 0.9258065901963312, "grad_norm": 0.5161766653294025, "learning_rate": 2.75887215308791e-07, "loss": 0.6901, "step": 43100 }, { "epoch": 0.9260213945096017, "grad_norm": 0.5049885765836929, "learning_rate": 2.7429936689004956e-07, "loss": 0.6788, "step": 43110 }, { "epoch": 0.9262361988228723, "grad_norm": 0.5106995321298553, "learning_rate": 2.7271603749794763e-07, "loss": 0.6804, "step": 43120 }, { "epoch": 0.9264510031361429, "grad_norm": 0.5080317309925427, "learning_rate": 2.711372278681779e-07, "loss": 0.6844, "step": 43130 }, { "epoch": 0.9266658074494136, "grad_norm": 0.5164907399314109, "learning_rate": 2.69562938734329e-07, "loss": 0.6936, "step": 43140 }, { "epoch": 0.9268806117626842, "grad_norm": 0.5205333655443777, "learning_rate": 2.6799317082789267e-07, "loss": 0.6772, "step": 43150 }, { "epoch": 0.9270954160759548, "grad_norm": 0.5259790796204346, "learning_rate": 2.664279248782564e-07, "loss": 0.7015, "step": 43160 }, { "epoch": 0.9273102203892254, "grad_norm": 0.48989809551529545, "learning_rate": 2.648672016127096e-07, "loss": 0.6818, "step": 43170 }, { "epoch": 0.927525024702496, "grad_norm": 0.5186292029129187, "learning_rate": 2.633110017564389e-07, "loss": 0.6841, "step": 43180 }, { "epoch": 0.9277398290157667, "grad_norm": 0.494677339162897, "learning_rate": 2.6175932603253017e-07, "loss": 0.6857, "step": 43190 }, { "epoch": 0.9279546333290373, "grad_norm": 0.513254704122593, "learning_rate": 2.6021217516196464e-07, "loss": 0.6844, "step": 43200 }, { "epoch": 0.9281694376423079, "grad_norm": 0.5097903856248501, "learning_rate": 2.586695498636238e-07, "loss": 0.6865, "step": 43210 }, { "epoch": 0.9283842419555784, "grad_norm": 0.5158784848429144, "learning_rate": 2.571314508542866e-07, "loss": 0.6883, "step": 43220 }, { "epoch": 0.928599046268849, "grad_norm": 0.49451018639662936, "learning_rate": 2.5559787884862465e-07, "loss": 0.6923, "step": 43230 }, { "epoch": 0.9288138505821197, "grad_norm": 0.5206523113168011, "learning_rate": 2.5406883455921015e-07, "loss": 0.6845, "step": 43240 }, { "epoch": 0.9290286548953903, "grad_norm": 0.5183662832534691, "learning_rate": 2.5254431869651154e-07, "loss": 0.6888, "step": 43250 }, { "epoch": 0.9292434592086609, "grad_norm": 0.5012033057724504, "learning_rate": 2.51024331968891e-07, "loss": 0.6838, "step": 43260 }, { "epoch": 0.9294582635219315, "grad_norm": 0.5271739265196879, "learning_rate": 2.49508875082608e-07, "loss": 0.6925, "step": 43270 }, { "epoch": 0.9296730678352021, "grad_norm": 0.5196998730244764, "learning_rate": 2.4799794874181496e-07, "loss": 0.6826, "step": 43280 }, { "epoch": 0.9298878721484728, "grad_norm": 0.5154557177789315, "learning_rate": 2.4649155364856127e-07, "loss": 0.6947, "step": 43290 }, { "epoch": 0.9301026764617434, "grad_norm": 0.5158926177913861, "learning_rate": 2.449896905027904e-07, "loss": 0.6774, "step": 43300 }, { "epoch": 0.930317480775014, "grad_norm": 0.5452488869938171, "learning_rate": 2.434923600023409e-07, "loss": 0.6895, "step": 43310 }, { "epoch": 0.9305322850882846, "grad_norm": 0.5240815118076753, "learning_rate": 2.419995628429428e-07, "loss": 0.6814, "step": 43320 }, { "epoch": 0.9307470894015551, "grad_norm": 0.5208459297385867, "learning_rate": 2.405112997182224e-07, "loss": 0.6986, "step": 43330 }, { "epoch": 0.9309618937148257, "grad_norm": 0.5146723954727245, "learning_rate": 2.3902757131969657e-07, "loss": 0.6936, "step": 43340 }, { "epoch": 0.9311766980280964, "grad_norm": 0.4988199872705836, "learning_rate": 2.3754837833677823e-07, "loss": 0.6921, "step": 43350 }, { "epoch": 0.931391502341367, "grad_norm": 0.5061737031935741, "learning_rate": 2.3607372145677098e-07, "loss": 0.6819, "step": 43360 }, { "epoch": 0.9316063066546376, "grad_norm": 0.5118209164152153, "learning_rate": 2.3460360136487004e-07, "loss": 0.6938, "step": 43370 }, { "epoch": 0.9318211109679082, "grad_norm": 0.5028715193115553, "learning_rate": 2.3313801874416452e-07, "loss": 0.6838, "step": 43380 }, { "epoch": 0.9320359152811788, "grad_norm": 0.5037421436122391, "learning_rate": 2.3167697427563418e-07, "loss": 0.6695, "step": 43390 }, { "epoch": 0.9322507195944495, "grad_norm": 0.5056308074418332, "learning_rate": 2.302204686381515e-07, "loss": 0.6915, "step": 43400 }, { "epoch": 0.9324655239077201, "grad_norm": 0.4973980726367497, "learning_rate": 2.2876850250847626e-07, "loss": 0.6902, "step": 43410 }, { "epoch": 0.9326803282209907, "grad_norm": 0.49416302287965536, "learning_rate": 2.2732107656126435e-07, "loss": 0.6778, "step": 43420 }, { "epoch": 0.9328951325342613, "grad_norm": 0.5373794494908628, "learning_rate": 2.2587819146905665e-07, "loss": 0.6905, "step": 43430 }, { "epoch": 0.9331099368475319, "grad_norm": 0.527952990743072, "learning_rate": 2.2443984790228802e-07, "loss": 0.6798, "step": 43440 }, { "epoch": 0.9333247411608026, "grad_norm": 0.5510638102933684, "learning_rate": 2.230060465292827e-07, "loss": 0.7014, "step": 43450 }, { "epoch": 0.9335395454740731, "grad_norm": 0.534593564488736, "learning_rate": 2.2157678801625115e-07, "loss": 0.6812, "step": 43460 }, { "epoch": 0.9337543497873437, "grad_norm": 0.51290579083354, "learning_rate": 2.201520730272966e-07, "loss": 0.6848, "step": 43470 }, { "epoch": 0.9339691541006143, "grad_norm": 0.498976254097296, "learning_rate": 2.1873190222441054e-07, "loss": 0.7, "step": 43480 }, { "epoch": 0.9341839584138849, "grad_norm": 0.5110569428279691, "learning_rate": 2.1731627626747186e-07, "loss": 0.691, "step": 43490 }, { "epoch": 0.9343987627271556, "grad_norm": 0.5108780076816333, "learning_rate": 2.1590519581424885e-07, "loss": 0.6902, "step": 43500 }, { "epoch": 0.9346135670404262, "grad_norm": 0.5104285936640807, "learning_rate": 2.144986615203959e-07, "loss": 0.6925, "step": 43510 }, { "epoch": 0.9348283713536968, "grad_norm": 0.5096080388433524, "learning_rate": 2.13096674039458e-07, "loss": 0.6992, "step": 43520 }, { "epoch": 0.9350431756669674, "grad_norm": 0.5402406252479437, "learning_rate": 2.1169923402286298e-07, "loss": 0.6953, "step": 43530 }, { "epoch": 0.935257979980238, "grad_norm": 0.5096415078767643, "learning_rate": 2.1030634211993028e-07, "loss": 0.6897, "step": 43540 }, { "epoch": 0.9354727842935087, "grad_norm": 0.5172382489410695, "learning_rate": 2.089179989778656e-07, "loss": 0.6967, "step": 43550 }, { "epoch": 0.9356875886067793, "grad_norm": 0.507182558755288, "learning_rate": 2.0753420524175616e-07, "loss": 0.6952, "step": 43560 }, { "epoch": 0.9359023929200498, "grad_norm": 0.525056213053443, "learning_rate": 2.0615496155458214e-07, "loss": 0.684, "step": 43570 }, { "epoch": 0.9361171972333204, "grad_norm": 0.5190206893050133, "learning_rate": 2.0478026855720313e-07, "loss": 0.6954, "step": 43580 }, { "epoch": 0.936332001546591, "grad_norm": 0.5217080830284128, "learning_rate": 2.0341012688837146e-07, "loss": 0.6891, "step": 43590 }, { "epoch": 0.9365468058598617, "grad_norm": 0.5373069241110895, "learning_rate": 2.0204453718471795e-07, "loss": 0.6683, "step": 43600 }, { "epoch": 0.9367616101731323, "grad_norm": 0.512356091417784, "learning_rate": 2.0068350008076055e-07, "loss": 0.6877, "step": 43610 }, { "epoch": 0.9369764144864029, "grad_norm": 0.5038542302769475, "learning_rate": 1.9932701620890448e-07, "loss": 0.6708, "step": 43620 }, { "epoch": 0.9371912187996735, "grad_norm": 0.5079741863034887, "learning_rate": 1.979750861994334e-07, "loss": 0.6676, "step": 43630 }, { "epoch": 0.9374060231129441, "grad_norm": 0.516342583535674, "learning_rate": 1.9662771068052367e-07, "loss": 0.6884, "step": 43640 }, { "epoch": 0.9376208274262147, "grad_norm": 0.5147837537823089, "learning_rate": 1.9528489027822784e-07, "loss": 0.6785, "step": 43650 }, { "epoch": 0.9378356317394854, "grad_norm": 0.5238895174683065, "learning_rate": 1.9394662561648458e-07, "loss": 0.6855, "step": 43660 }, { "epoch": 0.938050436052756, "grad_norm": 0.5038680839138875, "learning_rate": 1.926129173171165e-07, "loss": 0.6833, "step": 43670 }, { "epoch": 0.9382652403660265, "grad_norm": 0.510424924822099, "learning_rate": 1.912837659998268e-07, "loss": 0.6974, "step": 43680 }, { "epoch": 0.9384800446792971, "grad_norm": 0.5039561381967255, "learning_rate": 1.8995917228220473e-07, "loss": 0.6938, "step": 43690 }, { "epoch": 0.9386948489925677, "grad_norm": 0.5135618071345601, "learning_rate": 1.8863913677972023e-07, "loss": 0.6808, "step": 43700 }, { "epoch": 0.9389096533058384, "grad_norm": 0.517271399611518, "learning_rate": 1.8732366010572268e-07, "loss": 0.673, "step": 43710 }, { "epoch": 0.939124457619109, "grad_norm": 0.5125943059284018, "learning_rate": 1.8601274287144865e-07, "loss": 0.6988, "step": 43720 }, { "epoch": 0.9393392619323796, "grad_norm": 0.5196803747985291, "learning_rate": 1.8470638568601096e-07, "loss": 0.6921, "step": 43730 }, { "epoch": 0.9395540662456502, "grad_norm": 0.49370739380736406, "learning_rate": 1.8340458915640623e-07, "loss": 0.6917, "step": 43740 }, { "epoch": 0.9397688705589208, "grad_norm": 0.5091442466929231, "learning_rate": 1.8210735388751177e-07, "loss": 0.684, "step": 43750 }, { "epoch": 0.9399836748721915, "grad_norm": 0.5047518312454556, "learning_rate": 1.8081468048208539e-07, "loss": 0.6877, "step": 43760 }, { "epoch": 0.9401984791854621, "grad_norm": 0.5206191020901989, "learning_rate": 1.7952656954076443e-07, "loss": 0.7039, "step": 43770 }, { "epoch": 0.9404132834987327, "grad_norm": 0.5123513333596542, "learning_rate": 1.7824302166206786e-07, "loss": 0.683, "step": 43780 }, { "epoch": 0.9406280878120032, "grad_norm": 0.5064144872086758, "learning_rate": 1.7696403744239422e-07, "loss": 0.682, "step": 43790 }, { "epoch": 0.9408428921252738, "grad_norm": 0.5332117626196589, "learning_rate": 1.7568961747601808e-07, "loss": 0.6859, "step": 43800 }, { "epoch": 0.9410576964385445, "grad_norm": 0.500584413978133, "learning_rate": 1.7441976235509917e-07, "loss": 0.6863, "step": 43810 }, { "epoch": 0.9412725007518151, "grad_norm": 0.4941849890778416, "learning_rate": 1.731544726696699e-07, "loss": 0.7026, "step": 43820 }, { "epoch": 0.9414873050650857, "grad_norm": 0.5196772492638178, "learning_rate": 1.7189374900764776e-07, "loss": 0.6894, "step": 43830 }, { "epoch": 0.9417021093783563, "grad_norm": 0.5164400053424427, "learning_rate": 1.706375919548231e-07, "loss": 0.6822, "step": 43840 }, { "epoch": 0.9419169136916269, "grad_norm": 0.53777605471878, "learning_rate": 1.6938600209486678e-07, "loss": 0.6878, "step": 43850 }, { "epoch": 0.9421317180048976, "grad_norm": 0.5217587662345642, "learning_rate": 1.6813898000933025e-07, "loss": 0.6864, "step": 43860 }, { "epoch": 0.9423465223181682, "grad_norm": 0.5253001291239328, "learning_rate": 1.6689652627763563e-07, "loss": 0.6871, "step": 43870 }, { "epoch": 0.9425613266314388, "grad_norm": 0.5145763066504357, "learning_rate": 1.6565864147708888e-07, "loss": 0.6927, "step": 43880 }, { "epoch": 0.9427761309447094, "grad_norm": 0.5166068562999455, "learning_rate": 1.6442532618286987e-07, "loss": 0.6903, "step": 43890 }, { "epoch": 0.94299093525798, "grad_norm": 0.5020503378401979, "learning_rate": 1.631965809680369e-07, "loss": 0.6952, "step": 43900 }, { "epoch": 0.9432057395712506, "grad_norm": 0.5118081944837007, "learning_rate": 1.619724064035233e-07, "loss": 0.6976, "step": 43910 }, { "epoch": 0.9434205438845212, "grad_norm": 0.4971652889934386, "learning_rate": 1.6075280305813845e-07, "loss": 0.6816, "step": 43920 }, { "epoch": 0.9436353481977918, "grad_norm": 0.5322834743237593, "learning_rate": 1.5953777149857018e-07, "loss": 0.6843, "step": 43930 }, { "epoch": 0.9438501525110624, "grad_norm": 0.5255977281637059, "learning_rate": 1.5832731228937915e-07, "loss": 0.6849, "step": 43940 }, { "epoch": 0.944064956824333, "grad_norm": 0.5143738059522022, "learning_rate": 1.5712142599300206e-07, "loss": 0.6919, "step": 43950 }, { "epoch": 0.9442797611376036, "grad_norm": 0.5106099594288762, "learning_rate": 1.5592011316975297e-07, "loss": 0.6896, "step": 43960 }, { "epoch": 0.9444945654508743, "grad_norm": 0.4995085706778138, "learning_rate": 1.5472337437781982e-07, "loss": 0.6837, "step": 43970 }, { "epoch": 0.9447093697641449, "grad_norm": 0.5261391145616446, "learning_rate": 1.5353121017326334e-07, "loss": 0.674, "step": 43980 }, { "epoch": 0.9449241740774155, "grad_norm": 0.5230909604182425, "learning_rate": 1.5234362111002043e-07, "loss": 0.6711, "step": 43990 }, { "epoch": 0.9451389783906861, "grad_norm": 0.5062065384872058, "learning_rate": 1.5116060773990304e-07, "loss": 0.6824, "step": 44000 }, { "epoch": 0.9453537827039566, "grad_norm": 0.533824670779333, "learning_rate": 1.499821706125937e-07, "loss": 0.6932, "step": 44010 }, { "epoch": 0.9455685870172273, "grad_norm": 0.5288369825844738, "learning_rate": 1.4880831027565213e-07, "loss": 0.6815, "step": 44020 }, { "epoch": 0.9457833913304979, "grad_norm": 0.5279056326291518, "learning_rate": 1.4763902727451206e-07, "loss": 0.6859, "step": 44030 }, { "epoch": 0.9459981956437685, "grad_norm": 0.5141616425492499, "learning_rate": 1.4647432215247448e-07, "loss": 0.688, "step": 44040 }, { "epoch": 0.9462129999570391, "grad_norm": 0.506499141279277, "learning_rate": 1.4531419545071866e-07, "loss": 0.6842, "step": 44050 }, { "epoch": 0.9464278042703097, "grad_norm": 0.5137357443604906, "learning_rate": 1.441586477082968e-07, "loss": 0.7037, "step": 44060 }, { "epoch": 0.9466426085835804, "grad_norm": 0.5179057297643055, "learning_rate": 1.4300767946213046e-07, "loss": 0.6836, "step": 44070 }, { "epoch": 0.946857412896851, "grad_norm": 0.5062004724754988, "learning_rate": 1.4186129124701408e-07, "loss": 0.6934, "step": 44080 }, { "epoch": 0.9470722172101216, "grad_norm": 0.516759003331539, "learning_rate": 1.4071948359561605e-07, "loss": 0.6876, "step": 44090 }, { "epoch": 0.9472870215233922, "grad_norm": 0.5121867129971354, "learning_rate": 1.3958225703847305e-07, "loss": 0.6866, "step": 44100 }, { "epoch": 0.9475018258366628, "grad_norm": 0.5029706915206402, "learning_rate": 1.3844961210399576e-07, "loss": 0.6829, "step": 44110 }, { "epoch": 0.9477166301499335, "grad_norm": 0.5223591095442727, "learning_rate": 1.3732154931846652e-07, "loss": 0.6899, "step": 44120 }, { "epoch": 0.947931434463204, "grad_norm": 0.5029921805410988, "learning_rate": 1.3619806920603608e-07, "loss": 0.6945, "step": 44130 }, { "epoch": 0.9481462387764746, "grad_norm": 0.4862859723168768, "learning_rate": 1.350791722887279e-07, "loss": 0.6706, "step": 44140 }, { "epoch": 0.9483610430897452, "grad_norm": 0.4934171630865501, "learning_rate": 1.3396485908643398e-07, "loss": 0.6844, "step": 44150 }, { "epoch": 0.9485758474030158, "grad_norm": 0.4991882715586846, "learning_rate": 1.3285513011691898e-07, "loss": 0.6998, "step": 44160 }, { "epoch": 0.9487906517162865, "grad_norm": 0.5065887293501903, "learning_rate": 1.317499858958149e-07, "loss": 0.6983, "step": 44170 }, { "epoch": 0.9490054560295571, "grad_norm": 0.5217077228947482, "learning_rate": 1.306494269366254e-07, "loss": 0.6968, "step": 44180 }, { "epoch": 0.9492202603428277, "grad_norm": 0.5284986091713506, "learning_rate": 1.295534537507237e-07, "loss": 0.6784, "step": 44190 }, { "epoch": 0.9494350646560983, "grad_norm": 0.5007427227768886, "learning_rate": 1.284620668473502e-07, "loss": 0.6892, "step": 44200 }, { "epoch": 0.9496498689693689, "grad_norm": 0.5364368649236175, "learning_rate": 1.273752667336159e-07, "loss": 0.7067, "step": 44210 }, { "epoch": 0.9498646732826395, "grad_norm": 0.5149978301773179, "learning_rate": 1.262930539145013e-07, "loss": 0.6788, "step": 44220 }, { "epoch": 0.9500794775959102, "grad_norm": 0.48810830703974617, "learning_rate": 1.25215428892852e-07, "loss": 0.6894, "step": 44230 }, { "epoch": 0.9502942819091807, "grad_norm": 0.5094043254510362, "learning_rate": 1.2414239216938628e-07, "loss": 0.6734, "step": 44240 }, { "epoch": 0.9505090862224513, "grad_norm": 0.5180377313222472, "learning_rate": 1.2307394424268758e-07, "loss": 0.6876, "step": 44250 }, { "epoch": 0.9507238905357219, "grad_norm": 0.5003767010301355, "learning_rate": 1.220100856092088e-07, "loss": 0.6943, "step": 44260 }, { "epoch": 0.9509386948489925, "grad_norm": 0.5036430920842442, "learning_rate": 1.2095081676326669e-07, "loss": 0.6908, "step": 44270 }, { "epoch": 0.9511534991622632, "grad_norm": 0.5045971916714835, "learning_rate": 1.1989613819705314e-07, "loss": 0.6761, "step": 44280 }, { "epoch": 0.9513683034755338, "grad_norm": 0.538898900267551, "learning_rate": 1.1884605040061947e-07, "loss": 0.6907, "step": 44290 }, { "epoch": 0.9515831077888044, "grad_norm": 0.5035666702349213, "learning_rate": 1.178005538618865e-07, "loss": 0.6878, "step": 44300 }, { "epoch": 0.951797912102075, "grad_norm": 0.5149697763083855, "learning_rate": 1.167596490666445e-07, "loss": 0.6937, "step": 44310 }, { "epoch": 0.9520127164153456, "grad_norm": 0.5107879059485395, "learning_rate": 1.1572333649854328e-07, "loss": 0.6783, "step": 44320 }, { "epoch": 0.9522275207286163, "grad_norm": 0.5232337790057829, "learning_rate": 1.1469161663910877e-07, "loss": 0.6997, "step": 44330 }, { "epoch": 0.9524423250418869, "grad_norm": 0.5034816506492367, "learning_rate": 1.1366448996772195e-07, "loss": 0.6856, "step": 44340 }, { "epoch": 0.9526571293551575, "grad_norm": 0.524026297949951, "learning_rate": 1.1264195696163993e-07, "loss": 0.6832, "step": 44350 }, { "epoch": 0.952871933668428, "grad_norm": 0.5103648732522968, "learning_rate": 1.1162401809597822e-07, "loss": 0.6933, "step": 44360 }, { "epoch": 0.9530867379816986, "grad_norm": 0.5041210092671914, "learning_rate": 1.106106738437196e-07, "loss": 0.6835, "step": 44370 }, { "epoch": 0.9533015422949693, "grad_norm": 0.505579894206394, "learning_rate": 1.0960192467571407e-07, "loss": 0.6848, "step": 44380 }, { "epoch": 0.9535163466082399, "grad_norm": 0.48925078556562024, "learning_rate": 1.0859777106067226e-07, "loss": 0.6867, "step": 44390 }, { "epoch": 0.9537311509215105, "grad_norm": 0.510694819304209, "learning_rate": 1.0759821346517541e-07, "loss": 0.6942, "step": 44400 }, { "epoch": 0.9539459552347811, "grad_norm": 0.5020801402247028, "learning_rate": 1.0660325235366309e-07, "loss": 0.6752, "step": 44410 }, { "epoch": 0.9541607595480517, "grad_norm": 0.514838466266744, "learning_rate": 1.0561288818844217e-07, "loss": 0.6812, "step": 44420 }, { "epoch": 0.9543755638613224, "grad_norm": 0.5136075576538729, "learning_rate": 1.0462712142968567e-07, "loss": 0.6984, "step": 44430 }, { "epoch": 0.954590368174593, "grad_norm": 0.5373229072892426, "learning_rate": 1.0364595253542498e-07, "loss": 0.6844, "step": 44440 }, { "epoch": 0.9548051724878636, "grad_norm": 0.5177203696614198, "learning_rate": 1.0266938196156096e-07, "loss": 0.6842, "step": 44450 }, { "epoch": 0.9550199768011342, "grad_norm": 0.5304130297606695, "learning_rate": 1.0169741016185286e-07, "loss": 0.698, "step": 44460 }, { "epoch": 0.9552347811144047, "grad_norm": 0.521535821590921, "learning_rate": 1.0073003758792721e-07, "loss": 0.6791, "step": 44470 }, { "epoch": 0.9554495854276754, "grad_norm": 0.5216014836223208, "learning_rate": 9.976726468927112e-08, "loss": 0.6875, "step": 44480 }, { "epoch": 0.955664389740946, "grad_norm": 0.4958526215570128, "learning_rate": 9.88090919132334e-08, "loss": 0.6845, "step": 44490 }, { "epoch": 0.9558791940542166, "grad_norm": 0.491068148568293, "learning_rate": 9.785551970502904e-08, "loss": 0.6901, "step": 44500 }, { "epoch": 0.9560939983674872, "grad_norm": 0.5334092820305429, "learning_rate": 9.690654850773251e-08, "loss": 0.6981, "step": 44510 }, { "epoch": 0.9563088026807578, "grad_norm": 0.5455356568529366, "learning_rate": 9.596217876228109e-08, "loss": 0.6899, "step": 44520 }, { "epoch": 0.9565236069940284, "grad_norm": 0.5233272097407236, "learning_rate": 9.502241090747488e-08, "loss": 0.6965, "step": 44530 }, { "epoch": 0.9567384113072991, "grad_norm": 0.5221024613165127, "learning_rate": 9.40872453799746e-08, "loss": 0.6846, "step": 44540 }, { "epoch": 0.9569532156205697, "grad_norm": 0.5392768910456224, "learning_rate": 9.315668261430378e-08, "loss": 0.6956, "step": 44550 }, { "epoch": 0.9571680199338403, "grad_norm": 0.5040996409629596, "learning_rate": 9.223072304284542e-08, "loss": 0.6804, "step": 44560 }, { "epoch": 0.9573828242471109, "grad_norm": 0.5214855537136885, "learning_rate": 9.130936709584537e-08, "loss": 0.7017, "step": 44570 }, { "epoch": 0.9575976285603814, "grad_norm": 0.5117354205252992, "learning_rate": 9.039261520141008e-08, "loss": 0.6884, "step": 44580 }, { "epoch": 0.9578124328736521, "grad_norm": 0.5130259202484456, "learning_rate": 8.948046778550546e-08, "loss": 0.6864, "step": 44590 }, { "epoch": 0.9580272371869227, "grad_norm": 0.5153683100477022, "learning_rate": 8.857292527195916e-08, "loss": 0.6873, "step": 44600 }, { "epoch": 0.9582420415001933, "grad_norm": 0.52407348167293, "learning_rate": 8.76699880824583e-08, "loss": 0.6873, "step": 44610 }, { "epoch": 0.9584568458134639, "grad_norm": 0.5284487657831876, "learning_rate": 8.677165663655396e-08, "loss": 0.6678, "step": 44620 }, { "epoch": 0.9586716501267345, "grad_norm": 0.5136375221731139, "learning_rate": 8.587793135165001e-08, "loss": 0.6918, "step": 44630 }, { "epoch": 0.9588864544400052, "grad_norm": 0.5376228954573528, "learning_rate": 8.49888126430154e-08, "loss": 0.6918, "step": 44640 }, { "epoch": 0.9591012587532758, "grad_norm": 0.5102793357805696, "learning_rate": 8.410430092377853e-08, "loss": 0.6878, "step": 44650 }, { "epoch": 0.9593160630665464, "grad_norm": 0.5191445781226015, "learning_rate": 8.322439660492398e-08, "loss": 0.6882, "step": 44660 }, { "epoch": 0.959530867379817, "grad_norm": 0.5030540214464223, "learning_rate": 8.234910009529917e-08, "loss": 0.6846, "step": 44670 }, { "epoch": 0.9597456716930876, "grad_norm": 0.5221867031193459, "learning_rate": 8.147841180160765e-08, "loss": 0.6923, "step": 44680 }, { "epoch": 0.9599604760063583, "grad_norm": 0.49462846118517095, "learning_rate": 8.061233212841358e-08, "loss": 0.6725, "step": 44690 }, { "epoch": 0.9601752803196288, "grad_norm": 0.5156449438921737, "learning_rate": 7.975086147813837e-08, "loss": 0.6805, "step": 44700 }, { "epoch": 0.9603900846328994, "grad_norm": 0.5045341788319067, "learning_rate": 7.889400025106409e-08, "loss": 0.6827, "step": 44710 }, { "epoch": 0.96060488894617, "grad_norm": 0.5165527445661976, "learning_rate": 7.804174884532778e-08, "loss": 0.6875, "step": 44720 }, { "epoch": 0.9608196932594406, "grad_norm": 0.5290209182103901, "learning_rate": 7.719410765692825e-08, "loss": 0.6818, "step": 44730 }, { "epoch": 0.9610344975727113, "grad_norm": 0.5045124524630176, "learning_rate": 7.635107707971712e-08, "loss": 0.683, "step": 44740 }, { "epoch": 0.9612493018859819, "grad_norm": 0.5110671930296831, "learning_rate": 7.551265750540993e-08, "loss": 0.6716, "step": 44750 }, { "epoch": 0.9614641061992525, "grad_norm": 0.5184014503506628, "learning_rate": 7.467884932357505e-08, "loss": 0.6953, "step": 44760 }, { "epoch": 0.9616789105125231, "grad_norm": 0.5025771257461924, "learning_rate": 7.384965292164037e-08, "loss": 0.6875, "step": 44770 }, { "epoch": 0.9618937148257937, "grad_norm": 0.5106235992412884, "learning_rate": 7.30250686848888e-08, "loss": 0.6924, "step": 44780 }, { "epoch": 0.9621085191390644, "grad_norm": 0.5139107082255027, "learning_rate": 7.220509699646383e-08, "loss": 0.7049, "step": 44790 }, { "epoch": 0.962323323452335, "grad_norm": 0.5025433150135805, "learning_rate": 7.138973823736295e-08, "loss": 0.6851, "step": 44800 }, { "epoch": 0.9625381277656055, "grad_norm": 0.5025163153971417, "learning_rate": 7.057899278643975e-08, "loss": 0.6925, "step": 44810 }, { "epoch": 0.9627529320788761, "grad_norm": 0.5101640172429046, "learning_rate": 6.977286102040625e-08, "loss": 0.6688, "step": 44820 }, { "epoch": 0.9629677363921467, "grad_norm": 0.5162577234774317, "learning_rate": 6.89713433138306e-08, "loss": 0.683, "step": 44830 }, { "epoch": 0.9631825407054173, "grad_norm": 0.5102954616732902, "learning_rate": 6.817444003913487e-08, "loss": 0.6883, "step": 44840 }, { "epoch": 0.963397345018688, "grad_norm": 0.5156868683615887, "learning_rate": 6.738215156659955e-08, "loss": 0.671, "step": 44850 }, { "epoch": 0.9636121493319586, "grad_norm": 0.514482173761437, "learning_rate": 6.659447826436017e-08, "loss": 0.6849, "step": 44860 }, { "epoch": 0.9638269536452292, "grad_norm": 0.5251149204765465, "learning_rate": 6.581142049840616e-08, "loss": 0.6903, "step": 44870 }, { "epoch": 0.9640417579584998, "grad_norm": 0.4910026180097444, "learning_rate": 6.503297863258429e-08, "loss": 0.6878, "step": 44880 }, { "epoch": 0.9642565622717704, "grad_norm": 0.5436572691315237, "learning_rate": 6.425915302859631e-08, "loss": 0.6933, "step": 44890 }, { "epoch": 0.9644713665850411, "grad_norm": 0.4946026585653437, "learning_rate": 6.348994404599907e-08, "loss": 0.6943, "step": 44900 }, { "epoch": 0.9646861708983117, "grad_norm": 0.5030459362111663, "learning_rate": 6.27253520422022e-08, "loss": 0.6879, "step": 44910 }, { "epoch": 0.9649009752115822, "grad_norm": 0.5093527545070409, "learning_rate": 6.196537737247488e-08, "loss": 0.6776, "step": 44920 }, { "epoch": 0.9651157795248528, "grad_norm": 0.534202285869032, "learning_rate": 6.12100203899335e-08, "loss": 0.7175, "step": 44930 }, { "epoch": 0.9653305838381234, "grad_norm": 0.5112645845888609, "learning_rate": 6.045928144555736e-08, "loss": 0.6764, "step": 44940 }, { "epoch": 0.9655453881513941, "grad_norm": 0.5085395742615043, "learning_rate": 5.97131608881718e-08, "loss": 0.6778, "step": 44950 }, { "epoch": 0.9657601924646647, "grad_norm": 0.5335040181019025, "learning_rate": 5.8971659064464006e-08, "loss": 0.6836, "step": 44960 }, { "epoch": 0.9659749967779353, "grad_norm": 0.5132355793000835, "learning_rate": 5.823477631896945e-08, "loss": 0.6993, "step": 44970 }, { "epoch": 0.9661898010912059, "grad_norm": 0.5136374270218453, "learning_rate": 5.750251299407761e-08, "loss": 0.6885, "step": 44980 }, { "epoch": 0.9664046054044765, "grad_norm": 0.5109545044167216, "learning_rate": 5.6774869430036296e-08, "loss": 0.6972, "step": 44990 }, { "epoch": 0.9666194097177472, "grad_norm": 0.5188694431833389, "learning_rate": 5.605184596494062e-08, "loss": 0.6786, "step": 45000 }, { "epoch": 0.9668342140310178, "grad_norm": 0.4958762948588162, "learning_rate": 5.5333442934744074e-08, "loss": 0.6846, "step": 45010 }, { "epoch": 0.9670490183442884, "grad_norm": 0.5273914852077115, "learning_rate": 5.461966067325075e-08, "loss": 0.6889, "step": 45020 }, { "epoch": 0.967263822657559, "grad_norm": 0.5090438225495914, "learning_rate": 5.3910499512116465e-08, "loss": 0.6946, "step": 45030 }, { "epoch": 0.9674786269708295, "grad_norm": 0.5283538397180535, "learning_rate": 5.32059597808543e-08, "loss": 0.6821, "step": 45040 }, { "epoch": 0.9676934312841002, "grad_norm": 0.5129695644458037, "learning_rate": 5.2506041806824614e-08, "loss": 0.6836, "step": 45050 }, { "epoch": 0.9679082355973708, "grad_norm": 0.5357563822720558, "learning_rate": 5.181074591524393e-08, "loss": 0.6875, "step": 45060 }, { "epoch": 0.9681230399106414, "grad_norm": 0.9871280435330488, "learning_rate": 5.112007242918049e-08, "loss": 0.6777, "step": 45070 }, { "epoch": 0.968337844223912, "grad_norm": 0.5259865369064295, "learning_rate": 5.043402166955314e-08, "loss": 0.6878, "step": 45080 }, { "epoch": 0.9685526485371826, "grad_norm": 0.49858883676118315, "learning_rate": 4.9752593955134655e-08, "loss": 0.6797, "step": 45090 }, { "epoch": 0.9687674528504533, "grad_norm": 0.5177420266411118, "learning_rate": 4.907578960254955e-08, "loss": 0.696, "step": 45100 }, { "epoch": 0.9689822571637239, "grad_norm": 0.5112656951608426, "learning_rate": 4.840360892627183e-08, "loss": 0.6812, "step": 45110 }, { "epoch": 0.9691970614769945, "grad_norm": 0.5048791811705977, "learning_rate": 4.773605223863054e-08, "loss": 0.679, "step": 45120 }, { "epoch": 0.9694118657902651, "grad_norm": 0.5184010359011654, "learning_rate": 4.707311984980423e-08, "loss": 0.694, "step": 45130 }, { "epoch": 0.9696266701035356, "grad_norm": 0.526339150759906, "learning_rate": 4.6414812067822056e-08, "loss": 0.6889, "step": 45140 }, { "epoch": 0.9698414744168062, "grad_norm": 0.5192950033137796, "learning_rate": 4.576112919856601e-08, "loss": 0.6996, "step": 45150 }, { "epoch": 0.9700562787300769, "grad_norm": 0.5052304888836145, "learning_rate": 4.51120715457698e-08, "loss": 0.6903, "step": 45160 }, { "epoch": 0.9702710830433475, "grad_norm": 0.5084386454674246, "learning_rate": 4.44676394110144e-08, "loss": 0.693, "step": 45170 }, { "epoch": 0.9704858873566181, "grad_norm": 0.5136496837941966, "learning_rate": 4.382783309373473e-08, "loss": 0.6925, "step": 45180 }, { "epoch": 0.9707006916698887, "grad_norm": 0.49863491668395693, "learning_rate": 4.3192652891216326e-08, "loss": 0.6773, "step": 45190 }, { "epoch": 0.9709154959831593, "grad_norm": 0.5276766002981915, "learning_rate": 4.25620990985931e-08, "loss": 0.7053, "step": 45200 }, { "epoch": 0.97113030029643, "grad_norm": 0.5299405568061919, "learning_rate": 4.1936172008851806e-08, "loss": 0.6931, "step": 45210 }, { "epoch": 0.9713451046097006, "grad_norm": 0.5064992378132825, "learning_rate": 4.131487191282757e-08, "loss": 0.6806, "step": 45220 }, { "epoch": 0.9715599089229712, "grad_norm": 0.5097429755248392, "learning_rate": 4.069819909920614e-08, "loss": 0.6799, "step": 45230 }, { "epoch": 0.9717747132362418, "grad_norm": 0.4985251877587769, "learning_rate": 4.008615385452275e-08, "loss": 0.6891, "step": 45240 }, { "epoch": 0.9719895175495123, "grad_norm": 0.5189916494654802, "learning_rate": 3.947873646316325e-08, "loss": 0.6916, "step": 45250 }, { "epoch": 0.972204321862783, "grad_norm": 0.532252242471192, "learning_rate": 3.88759472073641e-08, "loss": 0.6869, "step": 45260 }, { "epoch": 0.9724191261760536, "grad_norm": 0.5058615192775043, "learning_rate": 3.827778636720791e-08, "loss": 0.6909, "step": 45270 }, { "epoch": 0.9726339304893242, "grad_norm": 0.4988140384119279, "learning_rate": 3.7684254220630156e-08, "loss": 0.6735, "step": 45280 }, { "epoch": 0.9728487348025948, "grad_norm": 0.5031240100440125, "learning_rate": 3.7095351043414665e-08, "loss": 0.6965, "step": 45290 }, { "epoch": 0.9730635391158654, "grad_norm": 0.5051450638071671, "learning_rate": 3.651107710919366e-08, "loss": 0.6921, "step": 45300 }, { "epoch": 0.9732783434291361, "grad_norm": 0.50854818588177, "learning_rate": 3.593143268944888e-08, "loss": 0.6766, "step": 45310 }, { "epoch": 0.9734931477424067, "grad_norm": 0.5209101220326824, "learning_rate": 3.535641805351042e-08, "loss": 0.6828, "step": 45320 }, { "epoch": 0.9737079520556773, "grad_norm": 0.5185530290222669, "learning_rate": 3.478603346855791e-08, "loss": 0.6793, "step": 45330 }, { "epoch": 0.9739227563689479, "grad_norm": 0.4981837565896543, "learning_rate": 3.4220279199619346e-08, "loss": 0.6813, "step": 45340 }, { "epoch": 0.9741375606822185, "grad_norm": 0.6029095886431749, "learning_rate": 3.365915550957222e-08, "loss": 0.6776, "step": 45350 }, { "epoch": 0.9743523649954892, "grad_norm": 0.529974027801158, "learning_rate": 3.3102662659140195e-08, "loss": 0.6956, "step": 45360 }, { "epoch": 0.9745671693087598, "grad_norm": 0.5098466744359518, "learning_rate": 3.255080090689644e-08, "loss": 0.6872, "step": 45370 }, { "epoch": 0.9747819736220303, "grad_norm": 0.5178231192461265, "learning_rate": 3.200357050926361e-08, "loss": 0.6765, "step": 45380 }, { "epoch": 0.9749967779353009, "grad_norm": 0.507719206604319, "learning_rate": 3.1460971720510544e-08, "loss": 0.691, "step": 45390 }, { "epoch": 0.9752115822485715, "grad_norm": 0.5012348553384962, "learning_rate": 3.0923004792754454e-08, "loss": 0.6947, "step": 45400 }, { "epoch": 0.9754263865618421, "grad_norm": 0.5012135896224041, "learning_rate": 3.0389669975959825e-08, "loss": 0.6933, "step": 45410 }, { "epoch": 0.9756411908751128, "grad_norm": 0.5171752738246117, "learning_rate": 2.9860967517941766e-08, "loss": 0.6924, "step": 45420 }, { "epoch": 0.9758559951883834, "grad_norm": 0.5209513564127771, "learning_rate": 2.933689766435932e-08, "loss": 0.6994, "step": 45430 }, { "epoch": 0.976070799501654, "grad_norm": 0.51299608455161, "learning_rate": 2.881746065871993e-08, "loss": 0.6846, "step": 45440 }, { "epoch": 0.9762856038149246, "grad_norm": 0.5426181859963906, "learning_rate": 2.830265674237942e-08, "loss": 0.6972, "step": 45450 }, { "epoch": 0.9765004081281952, "grad_norm": 0.5176632310591313, "learning_rate": 2.7792486154540888e-08, "loss": 0.6917, "step": 45460 }, { "epoch": 0.9767152124414659, "grad_norm": 0.5069734550437351, "learning_rate": 2.7286949132253605e-08, "loss": 0.687, "step": 45470 }, { "epoch": 0.9769300167547365, "grad_norm": 0.5134159936470577, "learning_rate": 2.6786045910414117e-08, "loss": 0.6829, "step": 45480 }, { "epoch": 0.977144821068007, "grad_norm": 0.5314226916450205, "learning_rate": 2.6289776721767356e-08, "loss": 0.6875, "step": 45490 }, { "epoch": 0.9773596253812776, "grad_norm": 0.5017572726059231, "learning_rate": 2.57981417969011e-08, "loss": 0.6891, "step": 45500 }, { "epoch": 0.9775744296945482, "grad_norm": 0.5097143098042627, "learning_rate": 2.5311141364254832e-08, "loss": 0.6773, "step": 45510 }, { "epoch": 0.9777892340078189, "grad_norm": 0.5094938311645063, "learning_rate": 2.4828775650111993e-08, "loss": 0.699, "step": 45520 }, { "epoch": 0.9780040383210895, "grad_norm": 0.5041111422716359, "learning_rate": 2.435104487860218e-08, "loss": 0.689, "step": 45530 }, { "epoch": 0.9782188426343601, "grad_norm": 0.49440540333290234, "learning_rate": 2.3877949271702283e-08, "loss": 0.6787, "step": 45540 }, { "epoch": 0.9784336469476307, "grad_norm": 0.5075734688713911, "learning_rate": 2.3409489049235347e-08, "loss": 0.6876, "step": 45550 }, { "epoch": 0.9786484512609013, "grad_norm": 0.4937175102422703, "learning_rate": 2.2945664428870583e-08, "loss": 0.6766, "step": 45560 }, { "epoch": 0.978863255574172, "grad_norm": 0.5195853543250676, "learning_rate": 2.2486475626122274e-08, "loss": 0.6893, "step": 45570 }, { "epoch": 0.9790780598874426, "grad_norm": 0.5066981294356002, "learning_rate": 2.203192285435196e-08, "loss": 0.6866, "step": 45580 }, { "epoch": 0.9792928642007132, "grad_norm": 0.5106780154162585, "learning_rate": 2.158200632476626e-08, "loss": 0.6806, "step": 45590 }, { "epoch": 0.9795076685139837, "grad_norm": 0.5216459413562595, "learning_rate": 2.1136726246419048e-08, "loss": 0.6838, "step": 45600 }, { "epoch": 0.9797224728272543, "grad_norm": 0.5244173549621101, "learning_rate": 2.0696082826209273e-08, "loss": 0.6783, "step": 45610 }, { "epoch": 0.979937277140525, "grad_norm": 0.508672858772139, "learning_rate": 2.0260076268878716e-08, "loss": 0.6991, "step": 45620 }, { "epoch": 0.9801520814537956, "grad_norm": 0.5316971841084902, "learning_rate": 1.9828706777017535e-08, "loss": 0.6776, "step": 45630 }, { "epoch": 0.9803668857670662, "grad_norm": 0.5278618608963893, "learning_rate": 1.9401974551062075e-08, "loss": 0.6895, "step": 45640 }, { "epoch": 0.9805816900803368, "grad_norm": 0.5178835527189292, "learning_rate": 1.89798797892915e-08, "loss": 0.6877, "step": 45650 }, { "epoch": 0.9807964943936074, "grad_norm": 0.5107328830474321, "learning_rate": 1.856242268783226e-08, "loss": 0.6806, "step": 45660 }, { "epoch": 0.9810112987068781, "grad_norm": 0.5178035321000685, "learning_rate": 1.814960344065364e-08, "loss": 0.6884, "step": 45670 }, { "epoch": 0.9812261030201487, "grad_norm": 0.5209369501476074, "learning_rate": 1.7741422239572203e-08, "loss": 0.7015, "step": 45680 }, { "epoch": 0.9814409073334193, "grad_norm": 0.5145085242189749, "learning_rate": 1.733787927424735e-08, "loss": 0.692, "step": 45690 }, { "epoch": 0.9816557116466899, "grad_norm": 0.5135454075520934, "learning_rate": 1.6938974732185754e-08, "loss": 0.6877, "step": 45700 }, { "epoch": 0.9818705159599604, "grad_norm": 0.5111760775067833, "learning_rate": 1.6544708798736932e-08, "loss": 0.6897, "step": 45710 }, { "epoch": 0.982085320273231, "grad_norm": 0.5073586927391305, "learning_rate": 1.615508165709545e-08, "loss": 0.6936, "step": 45720 }, { "epoch": 0.9823001245865017, "grad_norm": 0.5311468459249292, "learning_rate": 1.577009348830205e-08, "loss": 0.7014, "step": 45730 }, { "epoch": 0.9825149288997723, "grad_norm": 0.5133173645780131, "learning_rate": 1.5389744471238087e-08, "loss": 0.6858, "step": 45740 }, { "epoch": 0.9827297332130429, "grad_norm": 0.5177357908350075, "learning_rate": 1.5014034782635523e-08, "loss": 0.7051, "step": 45750 }, { "epoch": 0.9829445375263135, "grad_norm": 0.5334859426374919, "learning_rate": 1.4642964597064713e-08, "loss": 0.691, "step": 45760 }, { "epoch": 0.9831593418395841, "grad_norm": 0.5104405442693613, "learning_rate": 1.4276534086943295e-08, "loss": 0.6878, "step": 45770 }, { "epoch": 0.9833741461528548, "grad_norm": 0.5062353640908156, "learning_rate": 1.391474342253174e-08, "loss": 0.6876, "step": 45780 }, { "epoch": 0.9835889504661254, "grad_norm": 0.5032985620842442, "learning_rate": 1.3557592771935579e-08, "loss": 0.6852, "step": 45790 }, { "epoch": 0.983803754779396, "grad_norm": 0.5095670609317529, "learning_rate": 1.3205082301105399e-08, "loss": 0.6862, "step": 45800 }, { "epoch": 0.9840185590926666, "grad_norm": 0.5048150094794633, "learning_rate": 1.2857212173833512e-08, "loss": 0.6619, "step": 45810 }, { "epoch": 0.9842333634059371, "grad_norm": 0.5367881117018972, "learning_rate": 1.2513982551756176e-08, "loss": 0.6991, "step": 45820 }, { "epoch": 0.9844481677192078, "grad_norm": 0.5139161341851894, "learning_rate": 1.2175393594355822e-08, "loss": 0.6819, "step": 45830 }, { "epoch": 0.9846629720324784, "grad_norm": 0.5099451512210587, "learning_rate": 1.1841445458956601e-08, "loss": 0.6815, "step": 45840 }, { "epoch": 0.984877776345749, "grad_norm": 0.5153723004890068, "learning_rate": 1.1512138300726615e-08, "loss": 0.6928, "step": 45850 }, { "epoch": 0.9850925806590196, "grad_norm": 0.5169126964230165, "learning_rate": 1.1187472272677913e-08, "loss": 0.6861, "step": 45860 }, { "epoch": 0.9853073849722902, "grad_norm": 0.5111747109332822, "learning_rate": 1.0867447525665376e-08, "loss": 0.6846, "step": 45870 }, { "epoch": 0.9855221892855609, "grad_norm": 0.5237477989382217, "learning_rate": 1.0552064208388946e-08, "loss": 0.6892, "step": 45880 }, { "epoch": 0.9857369935988315, "grad_norm": 0.5252395338889467, "learning_rate": 1.0241322467389181e-08, "loss": 0.7028, "step": 45890 }, { "epoch": 0.9859517979121021, "grad_norm": 0.5050689116075816, "learning_rate": 9.935222447052805e-09, "loss": 0.6945, "step": 45900 }, { "epoch": 0.9861666022253727, "grad_norm": 0.5177719740515985, "learning_rate": 9.633764289608272e-09, "loss": 0.6848, "step": 45910 }, { "epoch": 0.9863814065386433, "grad_norm": 0.5082442912081105, "learning_rate": 9.336948135127976e-09, "loss": 0.6884, "step": 45920 }, { "epoch": 0.986596210851914, "grad_norm": 0.5106716496528746, "learning_rate": 9.044774121526045e-09, "loss": 0.6841, "step": 45930 }, { "epoch": 0.9868110151651845, "grad_norm": 0.5203087673402492, "learning_rate": 8.75724238456166e-09, "loss": 0.683, "step": 45940 }, { "epoch": 0.9870258194784551, "grad_norm": 0.5157554968748806, "learning_rate": 8.474353057834616e-09, "loss": 0.6789, "step": 45950 }, { "epoch": 0.9872406237917257, "grad_norm": 0.5085732376060498, "learning_rate": 8.196106272789772e-09, "loss": 0.6825, "step": 45960 }, { "epoch": 0.9874554281049963, "grad_norm": 0.5199228182558453, "learning_rate": 7.922502158713708e-09, "loss": 0.6964, "step": 45970 }, { "epoch": 0.987670232418267, "grad_norm": 0.5170481222004207, "learning_rate": 7.65354084273473e-09, "loss": 0.6866, "step": 45980 }, { "epoch": 0.9878850367315376, "grad_norm": 0.5057263382088416, "learning_rate": 7.389222449827316e-09, "loss": 0.6828, "step": 45990 }, { "epoch": 0.9880998410448082, "grad_norm": 0.5189108042110895, "learning_rate": 7.1295471028054455e-09, "loss": 0.6798, "step": 46000 }, { "epoch": 0.9883146453580788, "grad_norm": 0.517177846632246, "learning_rate": 6.874514922325937e-09, "loss": 0.6847, "step": 46010 }, { "epoch": 0.9885294496713494, "grad_norm": 0.5334596524865981, "learning_rate": 6.624126026890665e-09, "loss": 0.7139, "step": 46020 }, { "epoch": 0.98874425398462, "grad_norm": 0.5017669931684683, "learning_rate": 6.378380532839901e-09, "loss": 0.6784, "step": 46030 }, { "epoch": 0.9889590582978907, "grad_norm": 0.5157157762964487, "learning_rate": 6.137278554361192e-09, "loss": 0.6891, "step": 46040 }, { "epoch": 0.9891738626111612, "grad_norm": 0.5324764096551247, "learning_rate": 5.900820203481594e-09, "loss": 0.683, "step": 46050 }, { "epoch": 0.9893886669244318, "grad_norm": 0.5040931566397142, "learning_rate": 5.669005590069887e-09, "loss": 0.6853, "step": 46060 }, { "epoch": 0.9896034712377024, "grad_norm": 0.5144642042437939, "learning_rate": 5.4418348218387985e-09, "loss": 0.6855, "step": 46070 }, { "epoch": 0.989818275550973, "grad_norm": 0.5086245276544579, "learning_rate": 5.219308004343893e-09, "loss": 0.6973, "step": 46080 }, { "epoch": 0.9900330798642437, "grad_norm": 0.5106173051210142, "learning_rate": 5.00142524097913e-09, "loss": 0.6809, "step": 46090 }, { "epoch": 0.9902478841775143, "grad_norm": 0.5044758804491363, "learning_rate": 4.788186632985747e-09, "loss": 0.6866, "step": 46100 }, { "epoch": 0.9904626884907849, "grad_norm": 0.5011346072824133, "learning_rate": 4.579592279444489e-09, "loss": 0.6773, "step": 46110 }, { "epoch": 0.9906774928040555, "grad_norm": 0.5104576978811137, "learning_rate": 4.375642277276715e-09, "loss": 0.6896, "step": 46120 }, { "epoch": 0.9908922971173261, "grad_norm": 0.5061972545009272, "learning_rate": 4.1763367212477316e-09, "loss": 0.6916, "step": 46130 }, { "epoch": 0.9911071014305968, "grad_norm": 0.5114242229614572, "learning_rate": 3.981675703965681e-09, "loss": 0.686, "step": 46140 }, { "epoch": 0.9913219057438674, "grad_norm": 0.5034691443686983, "learning_rate": 3.791659315878216e-09, "loss": 0.6991, "step": 46150 }, { "epoch": 0.991536710057138, "grad_norm": 0.5237369943432371, "learning_rate": 3.606287645276929e-09, "loss": 0.7013, "step": 46160 }, { "epoch": 0.9917515143704085, "grad_norm": 0.5050943270330421, "learning_rate": 3.4255607782940348e-09, "loss": 0.6686, "step": 46170 }, { "epoch": 0.9919663186836791, "grad_norm": 0.5207022192351678, "learning_rate": 3.2494787989034715e-09, "loss": 0.6932, "step": 46180 }, { "epoch": 0.9921811229969498, "grad_norm": 0.5210263524410033, "learning_rate": 3.078041788922015e-09, "loss": 0.6781, "step": 46190 }, { "epoch": 0.9923959273102204, "grad_norm": 0.5144052422535814, "learning_rate": 2.9112498280070565e-09, "loss": 0.6888, "step": 46200 }, { "epoch": 0.992610731623491, "grad_norm": 0.5123530950333133, "learning_rate": 2.7491029936588252e-09, "loss": 0.6871, "step": 46210 }, { "epoch": 0.9928255359367616, "grad_norm": 0.5017887855233025, "learning_rate": 2.5916013612170555e-09, "loss": 0.6838, "step": 46220 }, { "epoch": 0.9930403402500322, "grad_norm": 0.5059792773955643, "learning_rate": 2.4387450038665385e-09, "loss": 0.6769, "step": 46230 }, { "epoch": 0.9932551445633029, "grad_norm": 0.5090551052637086, "learning_rate": 2.2905339926293513e-09, "loss": 0.6725, "step": 46240 }, { "epoch": 0.9934699488765735, "grad_norm": 0.5044393490597461, "learning_rate": 2.1469683963737386e-09, "loss": 0.6938, "step": 46250 }, { "epoch": 0.9936847531898441, "grad_norm": 0.5023690129044163, "learning_rate": 2.008048281806341e-09, "loss": 0.6813, "step": 46260 }, { "epoch": 0.9938995575031147, "grad_norm": 0.5241055668792824, "learning_rate": 1.8737737134744138e-09, "loss": 0.6848, "step": 46270 }, { "epoch": 0.9941143618163852, "grad_norm": 0.5136221543314958, "learning_rate": 1.7441447537713817e-09, "loss": 0.6737, "step": 46280 }, { "epoch": 0.9943291661296558, "grad_norm": 0.510186886967427, "learning_rate": 1.6191614629268437e-09, "loss": 0.6827, "step": 46290 }, { "epoch": 0.9945439704429265, "grad_norm": 0.5042541482836803, "learning_rate": 1.4988238990143456e-09, "loss": 0.6836, "step": 46300 }, { "epoch": 0.9947587747561971, "grad_norm": 0.5162834914933954, "learning_rate": 1.3831321179491596e-09, "loss": 0.6976, "step": 46310 }, { "epoch": 0.9949735790694677, "grad_norm": 0.5229889038934948, "learning_rate": 1.2720861734871749e-09, "loss": 0.698, "step": 46320 }, { "epoch": 0.9951883833827383, "grad_norm": 0.5140297849222798, "learning_rate": 1.1656861172248957e-09, "loss": 0.6909, "step": 46330 }, { "epoch": 0.9954031876960089, "grad_norm": 0.5211761716193927, "learning_rate": 1.0639319986016639e-09, "loss": 0.7059, "step": 46340 }, { "epoch": 0.9956179920092796, "grad_norm": 0.5166206612511635, "learning_rate": 9.66823864897437e-10, "loss": 0.6799, "step": 46350 }, { "epoch": 0.9958327963225502, "grad_norm": 0.5253510685541853, "learning_rate": 8.74361761231679e-10, "loss": 0.6795, "step": 46360 }, { "epoch": 0.9960476006358208, "grad_norm": 0.5175002276710958, "learning_rate": 7.865457305689106e-10, "loss": 0.7023, "step": 46370 }, { "epoch": 0.9962624049490914, "grad_norm": 0.5230288452217193, "learning_rate": 7.033758137120483e-10, "loss": 0.6874, "step": 46380 }, { "epoch": 0.9964772092623619, "grad_norm": 0.5224400052978022, "learning_rate": 6.24852049304625e-10, "loss": 0.681, "step": 46390 }, { "epoch": 0.9966920135756326, "grad_norm": 0.48862639002586716, "learning_rate": 5.509744738341205e-10, "loss": 0.6711, "step": 46400 }, { "epoch": 0.9969068178889032, "grad_norm": 0.5192250692618456, "learning_rate": 4.817431216264101e-10, "loss": 0.6856, "step": 46410 }, { "epoch": 0.9971216222021738, "grad_norm": 0.5154856759457314, "learning_rate": 4.1715802485020604e-10, "loss": 0.6921, "step": 46420 }, { "epoch": 0.9973364265154444, "grad_norm": 0.5105045772126395, "learning_rate": 3.5721921351483666e-10, "loss": 0.6938, "step": 46430 }, { "epoch": 0.997551230828715, "grad_norm": 0.508481663732828, "learning_rate": 3.0192671547024655e-10, "loss": 0.702, "step": 46440 }, { "epoch": 0.9977660351419857, "grad_norm": 0.5050542413161292, "learning_rate": 2.5128055640921687e-10, "loss": 0.6901, "step": 46450 }, { "epoch": 0.9979808394552563, "grad_norm": 0.5337828816495631, "learning_rate": 2.0528075986292473e-10, "loss": 0.69, "step": 46460 }, { "epoch": 0.9981956437685269, "grad_norm": 0.49911609656939643, "learning_rate": 1.6392734720649395e-10, "loss": 0.6754, "step": 46470 }, { "epoch": 0.9984104480817975, "grad_norm": 0.5450726921486923, "learning_rate": 1.272203376534442e-10, "loss": 0.6854, "step": 46480 }, { "epoch": 0.998625252395068, "grad_norm": 0.5271282020213263, "learning_rate": 9.515974826013186e-11, "loss": 0.6966, "step": 46490 }, { "epoch": 0.9988400567083388, "grad_norm": 0.5071464320894464, "learning_rate": 6.774559392352942e-11, "loss": 0.6808, "step": 46500 }, { "epoch": 0.9990548610216093, "grad_norm": 0.5184516291604048, "learning_rate": 4.497788738122566e-11, "loss": 0.6894, "step": 46510 }, { "epoch": 0.9992696653348799, "grad_norm": 0.5043819815091191, "learning_rate": 2.6856639212535783e-11, "loss": 0.6941, "step": 46520 }, { "epoch": 0.9994844696481505, "grad_norm": 0.5554025858919428, "learning_rate": 1.3381857838501433e-11, "loss": 0.6999, "step": 46530 }, { "epoch": 0.9996992739614211, "grad_norm": 0.5088148890810692, "learning_rate": 4.553549517449795e-12, "loss": 0.6761, "step": 46540 }, { "epoch": 0.9999140782746918, "grad_norm": 0.5035234752449383, "learning_rate": 3.7171835387539434e-13, "loss": 0.69, "step": 46550 }, { "epoch": 1.0, "eval_loss": 0.6645376086235046, "eval_runtime": 16.1599, "eval_samples_per_second": 44.245, "eval_steps_per_second": 0.743, "step": 46554 }, { "epoch": 1.0, "step": 46554, "total_flos": 6.49330829599703e+16, "train_loss": 0.6052397525345854, "train_runtime": 238477.2919, "train_samples_per_second": 12.494, "train_steps_per_second": 0.195 } ], "logging_steps": 10, "max_steps": 46554, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.49330829599703e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }